Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/z/codegen/S390PrivateLinkage.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2021 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include "codegen/S390PrivateLinkage.hpp"
24
25
#include "codegen/CodeGenerator.hpp"
26
#include "codegen/GCStackAtlas.hpp"
27
#include "codegen/Linkage_inlines.hpp"
28
#include "codegen/Snippet.hpp"
29
#include "compile/ResolvedMethod.hpp"
30
#include "compile/VirtualGuard.hpp"
31
#include "env/CHTable.hpp"
32
#include "env/CompilerEnv.hpp"
33
#include "env/J2IThunk.hpp"
34
#include "env/PersistentCHTable.hpp"
35
#include "env/StackMemoryRegion.hpp"
36
#include "env/VMJ9.h"
37
#include "env/jittypes.h"
38
#include "env/j9method.h"
39
#include "il/Node.hpp"
40
#include "il/Node_inlines.hpp"
41
#include "il/ParameterSymbol.hpp"
42
#include "il/TreeTop.hpp"
43
#include "il/TreeTop_inlines.hpp"
44
#include "infra/InterferenceGraph.hpp"
45
#include "z/codegen/OpMemToMem.hpp"
46
#include "z/codegen/S390Evaluator.hpp"
47
#include "z/codegen/S390GenerateInstructions.hpp"
48
#include "z/codegen/S390HelperCallSnippet.hpp"
49
#include "z/codegen/S390J9CallSnippet.hpp"
50
#include "z/codegen/S390StackCheckFailureSnippet.hpp"
51
#include "z/codegen/SystemLinkage.hpp"
52
#include "z/codegen/SystemLinkagezOS.hpp"
53
#include "runtime/J9Profiler.hpp"
54
#include "runtime/J9ValueProfiler.hpp"
55
56
#define MIN_PROFILED_CALL_FREQUENCY (.075f)
57
58
////////////////////////////////////////////////////////////////////////////////
59
// J9::Z::PrivateLinkage for J9
60
////////////////////////////////////////////////////////////////////////////////
61
J9::Z::PrivateLinkage::PrivateLinkage(TR::CodeGenerator * codeGen,TR_LinkageConventions lc)
62
: J9::PrivateLinkage(codeGen)
63
{
64
setLinkageType(lc);
65
66
// linkage properties
67
setProperty(SplitLongParm);
68
setProperty(TwoStackSlotsForLongAndDouble);
69
70
//Preserved Registers
71
72
setRegisterFlag(TR::RealRegister::GPR5, Preserved);
73
setRegisterFlag(TR::RealRegister::GPR6, Preserved);
74
setRegisterFlag(TR::RealRegister::GPR7, Preserved);
75
setRegisterFlag(TR::RealRegister::GPR8, Preserved);
76
setRegisterFlag(TR::RealRegister::GPR9, Preserved);
77
setRegisterFlag(TR::RealRegister::GPR10, Preserved);
78
setRegisterFlag(TR::RealRegister::GPR11, Preserved);
79
setRegisterFlag(TR::RealRegister::GPR12, Preserved);
80
setRegisterFlag(TR::RealRegister::GPR13, Preserved);
81
82
#if defined(ENABLE_PRESERVED_FPRS)
83
setRegisterFlag(TR::RealRegister::FPR8, Preserved);
84
setRegisterFlag(TR::RealRegister::FPR9, Preserved);
85
setRegisterFlag(TR::RealRegister::FPR10, Preserved);
86
setRegisterFlag(TR::RealRegister::FPR11, Preserved);
87
setRegisterFlag(TR::RealRegister::FPR12, Preserved);
88
setRegisterFlag(TR::RealRegister::FPR13, Preserved);
89
setRegisterFlag(TR::RealRegister::FPR14, Preserved);
90
setRegisterFlag(TR::RealRegister::FPR15, Preserved);
91
#endif
92
93
setIntegerReturnRegister (TR::RealRegister::GPR2 );
94
setLongLowReturnRegister (TR::RealRegister::GPR3 );
95
setLongHighReturnRegister(TR::RealRegister::GPR2 );
96
setLongReturnRegister (TR::RealRegister::GPR2 );
97
setFloatReturnRegister (TR::RealRegister::FPR0 );
98
setDoubleReturnRegister (TR::RealRegister::FPR0 );
99
setLongDoubleReturnRegister0 (TR::RealRegister::FPR0 );
100
setLongDoubleReturnRegister2 (TR::RealRegister::FPR2 );
101
setLongDoubleReturnRegister4 (TR::RealRegister::FPR4 );
102
setLongDoubleReturnRegister6 (TR::RealRegister::FPR6 );
103
104
if(comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13) && comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_FACILITY) &&
105
!comp()->getOption(TR_DisableSIMD))
106
{
107
codeGen->setSupportsVectorRegisters();
108
codeGen->setSupportsAutoSIMD();
109
}
110
else
111
{
112
comp()->setOption(TR_DisableSIMD);
113
}
114
115
const bool enableVectorLinkage = codeGen->getSupportsVectorRegisters();
116
if (enableVectorLinkage) setVectorReturnRegister(TR::RealRegister::VRF24);
117
118
setStackPointerRegister (TR::RealRegister::GPR5 );
119
setEntryPointRegister (comp()->target().isLinux() ? TR::RealRegister::GPR4 : TR::RealRegister::GPR15);
120
setReturnAddressRegister (TR::RealRegister::GPR14);
121
122
setVTableIndexArgumentRegister (TR::RealRegister::GPR0);
123
setJ9MethodArgumentRegister (TR::RealRegister::GPR1);
124
125
setLitPoolRegister (TR::RealRegister::GPR6 );
126
setMethodMetaDataRegister(TR::RealRegister::GPR13 );
127
128
setIntegerArgumentRegister(0, TR::RealRegister::GPR1);
129
setIntegerArgumentRegister(1, TR::RealRegister::GPR2);
130
setIntegerArgumentRegister(2, TR::RealRegister::GPR3);
131
setNumIntegerArgumentRegisters(3);
132
133
setFloatArgumentRegister(0, TR::RealRegister::FPR0);
134
setFloatArgumentRegister(1, TR::RealRegister::FPR2);
135
setFloatArgumentRegister(2, TR::RealRegister::FPR4);
136
setFloatArgumentRegister(3, TR::RealRegister::FPR6);
137
setNumFloatArgumentRegisters(4);
138
139
if (enableVectorLinkage)
140
{
141
int vecIndex = 0;
142
setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF25);
143
setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF26);
144
setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF27);
145
setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF28);
146
setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF29);
147
setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF30);
148
setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF31);
149
setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF24);
150
setNumVectorArgumentRegisters(vecIndex);
151
}
152
153
setOffsetToFirstLocal (comp()->target().is64Bit() ? -8 : -4);
154
setOffsetToRegSaveArea (0);
155
setOffsetToLongDispSlot(0);
156
setOffsetToFirstParm (0);
157
int32_t numDeps = 30;
158
159
if (codeGen->getSupportsVectorRegisters())
160
numDeps += 32; //need to kill VRFs
161
162
setNumberOfDependencyGPRegisters(numDeps);
163
164
setPreservedRegisterMapForGC(0x00001fc0);
165
setLargestOutgoingArgumentAreaSize(0);
166
}
167
168
////////////////////////////////////////////////////////////////////////////////
169
// J9::Z::PrivateLinkage::initS390RealRegisterLinkage - initialize the state
170
// of real register for register allocator
171
////////////////////////////////////////////////////////////////////////////////
172
void
173
J9::Z::PrivateLinkage::initS390RealRegisterLinkage()
174
{
175
TR::RealRegister * sspReal = getSystemStackPointerRealRegister();
176
TR::RealRegister * spReal = getStackPointerRealRegister();
177
TR::RealRegister * mdReal = getMethodMetaDataRealRegister();
178
int32_t icount, ret_count = 0;
179
180
// Lock all the dedicated registers
181
bool freeingSSPDisabled = true;
182
183
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
184
185
if (cg()->supportsJITFreeSystemStackPointer())
186
freeingSSPDisabled = false;
187
188
if (freeingSSPDisabled)
189
{
190
sspReal->setState(TR::RealRegister::Locked);
191
sspReal->setAssignedRegister(sspReal);
192
sspReal->setHasBeenAssignedInMethod(true);
193
}
194
195
// Java Stack pointer
196
spReal->setState(TR::RealRegister::Locked);
197
spReal->setAssignedRegister(spReal);
198
spReal->setHasBeenAssignedInMethod(true);
199
200
// meta data register
201
mdReal->setState(TR::RealRegister::Locked);
202
mdReal->setAssignedRegister(mdReal);
203
mdReal->setHasBeenAssignedInMethod(true);
204
205
// set register weight
206
for (icount = TR::RealRegister::FirstGPR; icount <= TR::RealRegister::GPR3; icount++)
207
{
208
int32_t weight;
209
if (getIntegerReturn((TR::RealRegister::RegNum) icount))
210
{
211
weight = ++ret_count;
212
}
213
else
214
{
215
weight = icount;
216
}
217
cg()->machine()->getRealRegister((TR::RealRegister::RegNum) icount)->setWeight(weight);
218
}
219
220
for (icount = TR::RealRegister::GPR4; icount >= TR::RealRegister::LastAssignableGPR; icount++)
221
{
222
cg()->machine()->getRealRegister((TR::RealRegister::RegNum) icount)->setWeight(0xf000 + icount);
223
}
224
}
225
226
void J9::Z::PrivateLinkage::alignLocalsOffset(uint32_t &stackIndex, uint32_t localObjectAlignment)
227
{
228
if (stackIndex % localObjectAlignment != 0)
229
{
230
uint32_t stackIndexBeforeAlignment = stackIndex;
231
232
// TODO: Is the negation here necessary?
233
stackIndex = -((-stackIndex + (localObjectAlignment - 1)) & ~(localObjectAlignment - 1));
234
235
TR::GCStackAtlas *atlas = cg()->getStackAtlas();
236
237
atlas->setNumberOfSlotsMapped(atlas->getNumberOfSlotsMapped() + ((stackIndexBeforeAlignment - stackIndex) / TR::Compiler->om.sizeofReferenceAddress()));
238
239
if (comp()->getOption(TR_TraceRA))
240
{
241
traceMsg(comp(),"\nAlign stack offset before alignment = %d and after alignment = %d\n", stackIndexBeforeAlignment, stackIndex);
242
}
243
}
244
}
245
246
247
////////////////////////////////////////////////////////////////////////////////
248
// J9::Z::PrivateLinkage::mapCompactedStack - maps variables onto the stack, sharing
249
// stack slots for automatic variables with non-interfering live ranges.
250
////////////////////////////////////////////////////////////////////////////////
251
void
252
J9::Z::PrivateLinkage::mapCompactedStack(TR::ResolvedMethodSymbol * method)
253
{
254
ListIterator<TR::AutomaticSymbol> automaticIterator(&method->getAutomaticList());
255
TR::AutomaticSymbol *localCursor = automaticIterator.getFirst();
256
int32_t firstLocalOffset = getOffsetToFirstLocal();
257
uint32_t stackIndex = getOffsetToFirstLocal();
258
TR::GCStackAtlas *atlas = cg()->getStackAtlas();
259
int32_t i;
260
uint8_t pointerSize = TR::Compiler->om.sizeofReferenceAddress();
261
262
263
#ifdef DEBUG
264
uint32_t origSize = 0; // the size of the stack had we not compacted it
265
#endif
266
267
{
268
TR::StackMemoryRegion stackMemoryRegion(*trMemory());
269
270
int32_t *colourToOffsetMap =
271
(int32_t *) trMemory()->allocateStackMemory(cg()->getLocalsIG()->getNumberOfColoursUsedToColour() * sizeof(int32_t));
272
273
uint32_t *colourToSizeMap =
274
(uint32_t *) trMemory()->allocateStackMemory(cg()->getLocalsIG()->getNumberOfColoursUsedToColour() * sizeof(uint32_t));
275
276
for (i=0; i<cg()->getLocalsIG()->getNumberOfColoursUsedToColour(); i++)
277
{
278
colourToOffsetMap[i] = -1;
279
colourToSizeMap[i] = 0;
280
}
281
282
// Find maximum allocation size for each shared local.
283
//
284
TR_IGNode *igNode;
285
uint32_t size;
286
IGNodeColour colour;
287
288
for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())
289
{
290
igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor);
291
if(igNode != NULL) // if the local doesn't have an interference graph node, we will just map it without attempt to compact, so we can ignore it
292
{
293
colour = igNode->getColour();
294
295
TR_ASSERT(colour != UNCOLOURED, "uncoloured local %p (igNode=%p) found in locals IG\n",
296
localCursor, igNode);
297
298
if (!(localCursor->isInternalPointer() || localCursor->isPinningArrayPointer() || localCursor->holdsMonitoredObject()))
299
{
300
size = localCursor->getRoundedSize();
301
if (size > colourToSizeMap[colour])
302
{
303
colourToSizeMap[colour] = size;
304
}
305
}
306
}
307
}
308
309
ListIterator<TR::AutomaticSymbol> variableSizeSymIterator(&method->getVariableSizeSymbolList());
310
TR::AutomaticSymbol * variableSizeSymCursor = variableSizeSymIterator.getFirst();
311
for (localCursor = variableSizeSymIterator.getFirst(); localCursor; localCursor = variableSizeSymIterator.getNext())
312
{
313
TR_ASSERT(localCursor->isVariableSizeSymbol(), "Should be variable sized");
314
igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor);
315
if(igNode != NULL) // if the local doesn't have an interference graph node, we will just map it without attempt to compact, so we can ignore it
316
{
317
colour = igNode->getColour();
318
TR_ASSERT(colour != UNCOLOURED, "uncoloured local %p (igNode=%p) found in locals IG\n",
319
localCursor, igNode);
320
if (!(localCursor->isInternalPointer() || localCursor->isPinningArrayPointer() || localCursor->holdsMonitoredObject()))
321
{
322
size = localCursor->getRoundedSize();
323
if (size > colourToSizeMap[colour])
324
{
325
colourToSizeMap[colour] = size;
326
}
327
}
328
}
329
}
330
331
// *************************************how we align local objects********************************
332
// because the offset of a local object is (stackIndex + pointerSize*(localCursor->getGCMapIndex()-firstLocalGCIndex))
333
// In createStackAtlas, we align pointerSize*(localCursor->getGCMapIndex()-firstLocalGCIndex) by modifying local objects' gc indices
334
// Here we align the stackIndex
335
// *************************************how we align local objects********************************
336
//
337
traceMsg(comp(), "stackIndex after compaction = %d\n", stackIndex);
338
339
// stackIndex in mapCompactedStack is calculated using only local reference sizes and does not include the padding
340
stackIndex -= pointerSize * atlas->getNumberOfPaddingSlots();
341
342
traceMsg(comp(), "stackIndex after padding slots = %d\n", stackIndex);
343
344
uint32_t localObjectAlignment = 1 << TR::Compiler->om.compressedReferenceShift();
345
346
if (localObjectAlignment >= 16)
347
{
348
// we don't want to fail gc when it tries to uncompress the reference of a stack allocated object, so we aligned the local objects based on the shift amount
349
// this is different to the alignment of heap objects, which is controlled separately and could be larger than 2<<shiftamount
350
alignLocalsOffset(stackIndex, localObjectAlignment);
351
}
352
353
// Map all garbage collected references together so we can concisely represent
354
// stack maps. They must be mapped so that the GC map index in each local
355
// symbol is honoured.
356
//
357
#ifdef DEBUG
358
// to report diagnostic information into the trace log that is guarded by if(debug("reportCL"))
359
// set the environment variable TR_DEBUG=reportCL
360
// also note that all diagnostic information is only reported in a debug build
361
if(debug("reportCL"))
362
diagnostic("\n****Mapping compacted stack for method: %s\n",comp()->signature());
363
#endif
364
365
// Here we map the garbage collected references onto the stack
366
// This stage is reversed later on, since in CodeGenGC we actually set all of the GC offsets
367
// so effectively the local stack compaction of collected references happens there
368
// but we must perform this stage to determine the size of the stack that contains object temp slots
369
int32_t lowGCOffset = stackIndex;
370
int32_t firstLocalGCIndex = atlas->getNumberOfParmSlotsMapped();
371
automaticIterator.reset();
372
for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())
373
{
374
if (localCursor->getGCMapIndex() >= 0)
375
{
376
TR_IGNode *igNode;
377
if (igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor))
378
{
379
IGNodeColour colour = igNode->getColour();
380
381
if (localCursor->isInternalPointer() || localCursor->isPinningArrayPointer() || localCursor->holdsMonitoredObject())
382
{
383
// Regardless of colouring on the local, map an internal
384
// pointer or a pinning array local. These kinds of locals
385
// do not participate in the compaction of locals phase and
386
// are handled specially (basically the slots are not shared for
387
// these autos).
388
//
389
#ifdef DEBUG
390
if(debug("reportCL"))
391
diagnostic("Mapping uncompactable ref local: %p\n",localCursor);
392
#endif
393
mapSingleAutomatic(localCursor, stackIndex);
394
}
395
else if (colourToOffsetMap[colour] == -1)
396
{
397
#ifdef DEBUG
398
if(debug("reportCL"))
399
diagnostic("Mapping first ref local: %p (colour=%d)\n",localCursor, colour);
400
#endif
401
mapSingleAutomatic(localCursor, stackIndex);
402
colourToOffsetMap[colour] = localCursor->getOffset();
403
}
404
else
405
{
406
traceMsg(comp(), "O^O COMPACT LOCALS: Sharing slot for local %p (colour = %d)\n",localCursor, colour);
407
localCursor->setOffset(colourToOffsetMap[colour]);
408
}
409
}
410
else
411
{
412
#ifdef DEBUG
413
if(debug("reportCL"))
414
diagnostic("No ig node exists for ref local %p, mapping regularly\n",localCursor);
415
#endif
416
mapSingleAutomatic(localCursor, stackIndex);
417
}
418
419
#ifdef DEBUG
420
origSize += localCursor->getRoundedSize();
421
#endif
422
}
423
}
424
425
// Here is where we reverse the previous stage
426
// We map local references again to set the stack position correct according to
427
// the GC map index, which is set in CodeGenGC
428
//
429
automaticIterator.reset();
430
for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())
431
if (localCursor->getGCMapIndex() >= 0)
432
{
433
int32_t newOffset = stackIndex + pointerSize*(localCursor->getGCMapIndex()-firstLocalGCIndex);
434
435
if (comp()->getOption(TR_TraceRA))
436
traceMsg(comp(), "\nmapCompactedStack: changing %s (GC index %d) offset from %d to %d",
437
comp()->getDebug()->getName(localCursor), localCursor->getGCMapIndex(), localCursor->getOffset(), newOffset);
438
439
localCursor->setOffset(newOffset);
440
441
TR_ASSERT((localCursor->getOffset() <= 0), "Local %p (GC index %d) offset cannot be positive (stackIndex = %d)\n", localCursor, localCursor->getGCMapIndex(), stackIndex);
442
443
if (localCursor->getGCMapIndex() == atlas->getIndexOfFirstInternalPointer())
444
{
445
atlas->setOffsetOfFirstInternalPointer(localCursor->getOffset() - firstLocalOffset);
446
}
447
}
448
449
method->setObjectTempSlots((lowGCOffset-stackIndex) / pointerSize);
450
lowGCOffset = stackIndex;
451
452
// Now map the rest of the locals (i.e. non-references)
453
//
454
// first map 4-byte locals, then 8-byte (and larger) locals
455
//
456
stackIndex -= (stackIndex & 0x4) ? 4 : 0;
457
automaticIterator.reset();
458
for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())
459
if (localCursor->getGCMapIndex() < 0)
460
{
461
TR_IGNode *igNode;
462
if (igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor))
463
{
464
IGNodeColour colour = igNode->getColour();
465
466
if(colourToSizeMap[colour] < 8)
467
{
468
if (colourToOffsetMap[colour] == -1) // map auto to stack slot
469
{
470
#ifdef DEBUG
471
if(debug("reportCL"))
472
diagnostic("Mapping first local: %p (colour=%d)\n",localCursor, colour);
473
#endif
474
mapSingleAutomatic(localCursor, colourToSizeMap[colour], stackIndex);
475
colourToOffsetMap[colour] = localCursor->getOffset();
476
}
477
else // share local with already mapped stack slot
478
{
479
traceMsg(comp(), "O^O COMPACT LOCALS: Sharing slot for local %p (colour = %d)\n",localCursor, colour);
480
localCursor->setOffset(colourToOffsetMap[colour]);
481
}
482
#ifdef DEBUG
483
origSize += localCursor->getRoundedSize();
484
#endif
485
}
486
}
487
else if(localCursor->getRoundedSize() < 8)
488
{
489
#ifdef DEBUG
490
if(debug("reportCL"))
491
diagnostic("No ig node exists for local %p, mapping regularly\n",localCursor);
492
origSize += localCursor->getRoundedSize();
493
#endif
494
mapSingleAutomatic(localCursor, stackIndex);
495
}
496
}
497
498
499
variableSizeSymIterator.reset();
500
variableSizeSymCursor = variableSizeSymIterator.getFirst();
501
while (variableSizeSymCursor != NULL)
502
{
503
if (variableSizeSymCursor->isReferenced())
504
{
505
if (cg()->traceBCDCodeGen())
506
traceMsg(comp(),"map variableSize sym %p (size %d) because isReferenced=true ",variableSizeSymCursor,variableSizeSymCursor->getSize());
507
mapSingleAutomatic(variableSizeSymCursor, stackIndex); //Ivan
508
if (cg()->traceBCDCodeGen())
509
traceMsg(comp(),"to auto offset %d\n",variableSizeSymCursor->getOffset());
510
}
511
else if (cg()->traceBCDCodeGen())
512
{
513
traceMsg(comp(),"do not map variableSize sym %p (size %d) because isReferenced=false\n",variableSizeSymCursor,variableSizeSymCursor->getSize());
514
}
515
variableSizeSymCursor = variableSizeSymIterator.getNext();
516
}
517
518
// Ensure the frame is double-word aligned, since we're about to map 8-byte autos
519
//
520
#ifdef DEBUG
521
origSize += (origSize & 0x4) ? 4 : 0;
522
#endif
523
stackIndex -= (stackIndex & 0x4) ? 4 : 0;
524
525
TR_ASSERT((stackIndex % pointerSize) == 0,
526
"size of scalar temp area not a multiple of Java pointer size");
527
528
// now map 8-byte autos
529
//
530
automaticIterator.reset();
531
for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())
532
if (localCursor->getGCMapIndex() < 0)
533
{
534
TR_IGNode *igNode;
535
if (igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor))
536
{
537
IGNodeColour colour = igNode->getColour();
538
539
if(colourToSizeMap[colour] >= 8)
540
{
541
if (colourToOffsetMap[colour] == -1) // map auto to stack slot
542
{
543
#ifdef DEBUG
544
if(debug("reportCL"))
545
diagnostic("Mapping first local: %p (colour=%d)\n",localCursor, colour);
546
#endif
547
stackIndex -= (stackIndex & 0x4) ? 4 : 0;
548
mapSingleAutomatic(localCursor, colourToSizeMap[colour], stackIndex);
549
colourToOffsetMap[colour] = localCursor->getOffset();
550
}
551
else // share local with already mapped stack slot
552
{
553
traceMsg(comp(), "O^O COMPACT LOCALS: Sharing slot for local %p (colour = %d)\n",localCursor, colour);
554
localCursor->setOffset(colourToOffsetMap[colour]);
555
}
556
#ifdef DEBUG
557
origSize += localCursor->getRoundedSize();
558
#endif
559
}
560
}
561
else if(localCursor->getRoundedSize() >= 8)
562
{
563
#ifdef DEBUG
564
if(debug("reportCL"))
565
diagnostic("No ig node exists for local %p, mapping regularly\n",localCursor);
566
origSize += localCursor->getRoundedSize();
567
#endif
568
stackIndex -= (stackIndex & 0x4) ? 4 : 0;
569
mapSingleAutomatic(localCursor, stackIndex);
570
}
571
}
572
573
// Map slot for Long Displacement
574
// Pick an arbitrary large number that is less than
575
// long disp (4K) to identify that we are no-where near
576
// a large stack or a large lit-pool
577
578
//stackIndex -= pointerSize;
579
stackIndex -= 16; // see defect 162458, 164661
580
#ifdef DEBUG
581
// origSize += pointerSize;
582
origSize += 16;
583
#endif
584
setOffsetToLongDispSlot((uint32_t) (-((int32_t)stackIndex)));
585
586
587
// msf - aligning the start of the parm list may not always
588
// be best, but if a long is passed into a virtual fn, it will
589
// then be aligned (and therefore can efficiently be accessed)
590
// a better approach would be to look at the signature and determine
591
// the best overall way to align the stack given that the parm list
592
// is contiguous in storage to make it easy on the interpreter
593
// and therefore there may not be a 'best' way to align the storage.
594
// This change was made upon noticing that sometimes getObject() is
595
// very hot and references its data from backing storage often.
596
// it is possible that the stack might not be double-word aligned, due to mapping for long displacement if the pointer size is 4
597
#ifdef DEBUG
598
origSize += (origSize & 0x4) ? 4 : 0;
599
#endif
600
stackIndex -= (stackIndex & 0x4) ? 4 : 0;
601
602
method->setScalarTempSlots((lowGCOffset-stackIndex) / pointerSize);
603
method->setLocalMappingCursor(stackIndex);
604
605
mapIncomingParms(method);
606
607
atlas->setLocalBaseOffset(lowGCOffset - firstLocalOffset);
608
atlas->setParmBaseOffset(atlas->getParmBaseOffset() + getOffsetToFirstParm() - firstLocalOffset);
609
610
} // scope of the stack memory region
611
612
#ifdef DEBUG
613
automaticIterator.reset();
614
615
// report stack mapping even if TR_DEBUG=reportCL isn't set
616
diagnostic("\n****SYMBOL OFFSETS\n");
617
for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())
618
{
619
diagnostic("Local %p, offset=%d\n", localCursor, localCursor->getOffset());
620
}
621
622
if (debug("reportCL"))
623
{
624
625
int mappedSize = firstLocalOffset - stackIndex;
626
diagnostic("\n**** Mapped locals size: %d (orig map size=%d, shared size=%d) %s\n",
627
(mappedSize),
628
origSize,
629
origSize - mappedSize,
630
comp()->signature());
631
}
632
#endif
633
634
}
635
636
void
637
J9::Z::PrivateLinkage::mapStack(TR::ResolvedMethodSymbol * method)
638
{
639
640
if (cg()->getLocalsIG() && cg()->getSupportsCompactedLocals())
641
{
642
mapCompactedStack(method);
643
return;
644
}
645
646
647
ListIterator<TR::AutomaticSymbol> automaticIterator(&method->getAutomaticList());
648
TR::AutomaticSymbol * localCursor = automaticIterator.getFirst();
649
TR::RealRegister::RegNum regIndex;
650
int32_t firstLocalOffset = getOffsetToFirstLocal();
651
uint32_t stackIndex = firstLocalOffset;
652
int32_t lowGCOffset;
653
TR::GCStackAtlas * atlas = cg()->getStackAtlas();
654
655
// map all garbage collected references together so can concisely represent
656
// stack maps. They must be mapped so that the GC map index in each local
657
// symbol is honoured.
658
lowGCOffset = stackIndex;
659
int32_t firstLocalGCIndex = atlas->getNumberOfParmSlotsMapped();
660
661
stackIndex -= (atlas->getNumberOfSlotsMapped() - firstLocalGCIndex) * TR::Compiler->om.sizeofReferenceAddress();
662
663
uint32_t localObjectAlignment = 1 << TR::Compiler->om.compressedReferenceShift();
664
665
if (localObjectAlignment >= 16)
666
{
667
// we don't want to fail gc when it tries to uncompress the reference of a stack allocated object, so we aligned the local objects based on the shift amount
668
// this is different to the alignment of heap objects, which is controlled separately and could be larger than 2<<shiftamount
669
alignLocalsOffset(stackIndex, localObjectAlignment);
670
}
671
672
// Map local references again to set the stack position correct according to
673
// the GC map index.
674
//
675
for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())
676
{
677
if (localCursor->getGCMapIndex() >= 0)
678
{
679
localCursor->setOffset(stackIndex + TR::Compiler->om.sizeofReferenceAddress() * (localCursor->getGCMapIndex() - firstLocalGCIndex));
680
}
681
if (localCursor->getGCMapIndex() == atlas->getIndexOfFirstInternalPointer())
682
{
683
atlas->setOffsetOfFirstInternalPointer(localCursor->getOffset() - firstLocalOffset);
684
}
685
}
686
687
method->setObjectTempSlots((lowGCOffset - stackIndex) / TR::Compiler->om.sizeofReferenceAddress());
688
lowGCOffset = stackIndex;
689
690
stackIndex -= (stackIndex & 0x4) ? 4 : 0;
691
692
// Now map the rest of the locals
693
//
694
ListIterator<TR::AutomaticSymbol> variableSizeSymIterator(&method->getVariableSizeSymbolList());
695
TR::AutomaticSymbol * variableSizeSymCursor = variableSizeSymIterator.getFirst();
696
while (variableSizeSymCursor != NULL)
697
{
698
TR_ASSERT(variableSizeSymCursor->isVariableSizeSymbol(), "should be variable sized");
699
if (variableSizeSymCursor->isReferenced())
700
{
701
if (cg()->traceBCDCodeGen())
702
traceMsg(comp(),"map variableSize sym %p (size %d) because isReferenced=true ",variableSizeSymCursor,variableSizeSymCursor->getSize());
703
mapSingleAutomatic(variableSizeSymCursor, stackIndex); //Ivan
704
if (cg()->traceBCDCodeGen())
705
traceMsg(comp(),"to auto offset %d\n",variableSizeSymCursor->getOffset());
706
}
707
else if (cg()->traceBCDCodeGen())
708
{
709
traceMsg(comp(),"do not map variableSize sym %p (size %d) because isReferenced=false\n",variableSizeSymCursor,variableSizeSymCursor->getSize());
710
}
711
variableSizeSymCursor = variableSizeSymIterator.getNext();
712
}
713
714
automaticIterator.reset();
715
localCursor = automaticIterator.getFirst();
716
717
while (localCursor != NULL)
718
{
719
if (localCursor->getGCMapIndex() < 0 && !TR::Linkage::needsAlignment(localCursor->getDataType(), cg()))
720
{
721
mapSingleAutomatic(localCursor, stackIndex);
722
}
723
localCursor = automaticIterator.getNext();
724
}
725
726
automaticIterator.reset();
727
localCursor = automaticIterator.getFirst();
728
729
// align double - but there is more to do to align the stack in general as double.
730
while (localCursor != NULL)
731
{
732
if (localCursor->getGCMapIndex() < 0 && TR::Linkage::needsAlignment(localCursor->getDataType(), cg()))
733
{
734
stackIndex -= (stackIndex & 0x4) ? 4 : 0;
735
mapSingleAutomatic(localCursor, stackIndex);
736
}
737
localCursor = automaticIterator.getNext();
738
}
739
740
// Force the stack size to be increased by...
741
if (comp()->getOption(TR_Randomize) && comp()->getOptions()->get390StackBufferSize() == 0)
742
{
743
if (cg()->randomizer.randomBoolean(300) && performTransformation(comp(),"O^O Random Codegen - Added 5000 dummy slots to Java Stack frame to test large displacement.\n"))
744
{
745
stackIndex -= 5000;
746
}
747
else
748
{
749
stackIndex -= 0;
750
}
751
}
752
else
753
{
754
stackIndex -= (comp()->getOptions()->get390StackBufferSize()/4)*4;
755
}
756
757
758
stackIndex -= (stackIndex & 0x4) ? 4 : 0;
759
760
// Pick an arbitrary large number that is less than
761
// long disp (4K) to identify that we are no-where near
762
// a large stack or a large lit-pool
763
//
764
765
stackIndex -= 16; // see defect 162458, 164661
766
setOffsetToLongDispSlot((uint32_t) (-((int32_t)stackIndex)));
767
768
method->setScalarTempSlots((lowGCOffset - stackIndex) / TR::Compiler->om.sizeofReferenceAddress());
769
method->setLocalMappingCursor(stackIndex);
770
771
// msf - aligning the start of the parm list may not always
772
// be best, but if a long is passed into a virtual fn, it will
773
// then be aligned (and therefore can efficiently be accessed)
774
// a better approach would be to look at the signature and determine
775
// the best overall way to align the stack given that the parm list
776
// is contiguous in storage to make it easy on the interpreter
777
// and therefore there may not be a 'best' way to align the storage.
778
// This change was made upon noticing that sometimes getObject() is
779
// very hot and references it's data from backing storage often.
780
stackIndex -= (stackIndex & 0x4) ? 4 : 0;
781
782
mapIncomingParms(method);
783
784
atlas->setLocalBaseOffset(lowGCOffset - firstLocalOffset);
785
atlas->setParmBaseOffset(atlas->getParmBaseOffset() + getOffsetToFirstParm() - firstLocalOffset);
786
787
#ifdef DEBUG
788
automaticIterator.reset();
789
diagnostic("\n****SYMBOL OFFSETS\n");
790
for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())
791
{
792
diagnostic("Local %p, offset=%d\n", localCursor, localCursor->getOffset());
793
}
794
#endif
795
796
}
797
798
////////////////////////////////////////////////////////////////////////////////
799
// J9::Z::PrivateLinkage::mapSingleAutomatic - maps an automatic onto the stack
800
// with size p->getRoundedSize()
801
////////////////////////////////////////////////////////////////////////////////
802
void
803
J9::Z::PrivateLinkage::mapSingleAutomatic(TR::AutomaticSymbol * p, uint32_t & stackIndex)
804
{
805
806
mapSingleAutomatic(p, p->getRoundedSize(), stackIndex);
807
}
808
809
////////////////////////////////////////////////////////////////////////////////
810
// J9::Z::PrivateLinkage::mapSingleAutomatic - maps an automatic onto the stack
811
////////////////////////////////////////////////////////////////////////////////
812
void
813
J9::Z::PrivateLinkage::mapSingleAutomatic(TR::AutomaticSymbol * p, uint32_t size, uint32_t & stackIndex)
814
{
815
816
p->setOffset(stackIndex -= size);
817
}
818
819
bool
820
J9::Z::PrivateLinkage::hasToBeOnStack(TR::ParameterSymbol * parm)
821
{
822
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
823
TR::ResolvedMethodSymbol * bodySymbol = comp()->getJittedMethodSymbol();
824
TR_OpaqueClassBlock * throwableClass;
825
826
// Need to save parameter on the stack if:
827
// A global register is allocated for the parameter AND either:
828
// 1. the parameter is the *this pointer of a virtual sync'd/jvmpi method
829
// 2. the address of the parameter is taken (JNI calls)
830
// (You can't get an address of the parameter if it is stored in a register -
831
// hence, parameter needs to be saved it onto the stack).
832
bool result = ( parm->getAssignedGlobalRegisterIndex() >= 0 && // is using global RA
833
( ( parm->getLinkageRegisterIndex() == 0 && // is first parameter (this pointer)
834
parm->isCollectedReference() && // is object reference
835
!bodySymbol->isStatic() && // is virtual
836
// TODO:
837
// We potentially only need to save param onto stack for sync'd methods
838
// which have calls/exception traps. Currently, we conservatively save
839
// param onto stack for sync'd methods, regardless of whether there are calls
840
// or not.
841
// see PPCLinkage for actual details.
842
// ( ( bodySymbol->isSynchronised() && // is sync method
843
// ( cg()->canExceptByTrap() || cg()->hasCall() ) // can trigger stack walker
844
// ) ||
845
( ( bodySymbol->isSynchronised()
846
) ||
847
(
848
!strncmp(bodySymbol->getResolvedMethod()->nameChars(), "<init>", 6) &&
849
( (throwableClass = fej9->getClassFromSignature("Ljava/lang/Throwable;", 21, bodySymbol->getResolvedMethod())) == 0 ||
850
fej9->isInstanceOf(bodySymbol->getResolvedMethod()->containingClass(), throwableClass, true) != TR_no
851
)
852
)
853
)
854
) ||
855
parm->isParmHasToBeOnStack() // JNI direct where the address of a parm can be taken. e.g. &this.
856
)
857
);
858
859
// Problem Report 96788:
860
//
861
// There is a potential race condition here. Because of the query to the frontend this function could
862
// possibly return different results at different points in the compilation dependent on whether the
863
// java/lang/Throwable class is resolved or not. This is a problem because this query is used to
864
// determine whether we need to generate a GC map for this parameter and whether we need to generate
865
// a store out to the stack for this parameter. Because these two queries happen at two different points
866
// in the compilation we could encounter a situation where we generate a GC map for this parameter but
867
// not generate a store out to the stack. This causes assertions in the VM if we hit a GC point in this
868
// compilation unit. To avoid this issue we cache the result of this function and directly modify the
869
// parameter symbol.
870
871
// TODO : Where does the java/lang/Throwable code below originate and why is it here? This seems like
872
// a very hacky fix to a very specific problem. Also why is this code not commoned up with P and why
873
// is it missing for X?
874
875
if (result)
876
parm->setParmHasToBeOnStack();
877
878
return result;
879
}
880
881
void
882
J9::Z::PrivateLinkage::setParameterLinkageRegisterIndex(TR::ResolvedMethodSymbol * method)
883
{
884
self()->setParameterLinkageRegisterIndex(method, method->getParameterList());
885
}
886
887
void
888
J9::Z::PrivateLinkage::setParameterLinkageRegisterIndex(TR::ResolvedMethodSymbol * method, List<TR::ParameterSymbol> &parmList)
889
{
890
ListIterator<TR::ParameterSymbol> paramIterator(&parmList);
891
TR::ParameterSymbol * paramCursor=paramIterator.getFirst();
892
int32_t numIntArgs = 0, numFloatArgs = 0, numVectorArgs = 0;
893
894
int32_t paramNum = -1;
895
while ((paramCursor != NULL) &&
896
(numIntArgs < self()->getNumIntegerArgumentRegisters() ||
897
numFloatArgs < self()->getNumFloatArgumentRegisters() ||
898
numVectorArgs < self()->getNumVectorArgumentRegisters()))
899
{
900
int32_t index = -1;
901
paramNum++;
902
903
TR::DataType dt = paramCursor->getDataType();
904
905
switch (dt)
906
{
907
case TR::Int8:
908
case TR::Int16:
909
case TR::Int32:
910
case TR::Address:
911
if (numIntArgs < self()->getNumIntegerArgumentRegisters())
912
{
913
index = numIntArgs;
914
}
915
numIntArgs++;
916
break;
917
case TR::Int64:
918
if(numIntArgs < self()->getNumIntegerArgumentRegisters())
919
{
920
index = numIntArgs;
921
}
922
numIntArgs += (comp()->target().is64Bit() ? 1 : 2);
923
break;
924
case TR::Float:
925
case TR::Double:
926
if (numFloatArgs < self()->getNumFloatArgumentRegisters())
927
{
928
index = numFloatArgs;
929
}
930
numFloatArgs++;
931
break;
932
case TR::PackedDecimal:
933
case TR::ZonedDecimal:
934
case TR::ZonedDecimalSignLeadingEmbedded:
935
case TR::ZonedDecimalSignLeadingSeparate:
936
case TR::ZonedDecimalSignTrailingSeparate:
937
case TR::UnicodeDecimal:
938
case TR::UnicodeDecimalSignLeading:
939
case TR::UnicodeDecimalSignTrailing:
940
case TR::Aggregate:
941
break;
942
case TR::VectorInt8:
943
case TR::VectorInt16:
944
case TR::VectorInt32:
945
case TR::VectorInt64:
946
case TR::VectorDouble:
947
if (numVectorArgs < self()->getNumVectorArgumentRegisters())
948
{
949
index = numVectorArgs;
950
}
951
numVectorArgs++;
952
break;
953
}
954
paramCursor->setLinkageRegisterIndex(index);
955
paramCursor = paramIterator.getNext();
956
957
if (self()->isFastLinkLinkageType())
958
{
959
if ((numFloatArgs == 1) || (numIntArgs >= self()->getNumIntegerArgumentRegisters()))
960
{
961
// force fastlink ABI condition of only one float parameter for fastlink parameter and it must be within first slots
962
numFloatArgs = self()->getNumFloatArgumentRegisters(); // no more float args possible now
963
}
964
}
965
}
966
}
967
968
//Clears numBytes bytes of storage from baseOffset(srcReg)
969
static TR::Instruction *
970
initStg(TR::CodeGenerator * codeGen, TR::Node * node, TR::RealRegister * tmpReg, TR::RealRegister * srcReg,TR::RealRegister * itersReg, int32_t baseOffset, int32_t numBytes,
971
TR::Instruction * cursor)
972
{
973
int32_t numIters = (numBytes / 256);
974
TR::RealRegister * baseReg = NULL;
975
TR::RealRegister * indexReg = tmpReg;
976
977
TR_ASSERT( numBytes >= 0, "number of bytes to clear must be positive");
978
TR_ASSERT( baseOffset >= 0, "starting offset must be positive");
979
980
if ((numBytes < 4096) && (numIters * 256 + baseOffset < 4096))
981
{
982
baseReg = srcReg;
983
}
984
else
985
{
986
baseReg = tmpReg;
987
988
// If we don't set the proper flag when we use GPR14 as a temp register
989
// here during prologue creation, we won't restore the return address
990
// into GPR14 in epilogue
991
tmpReg->setHasBeenAssignedInMethod(true);
992
993
if (baseOffset>=MIN_IMMEDIATE_VAL && baseOffset<=MAX_IMMEDIATE_VAL)
994
{
995
cursor = generateRRInstruction(codeGen, TR::InstOpCode::getLoadRegOpCode(), node, baseReg, srcReg, cursor);
996
cursor = generateRIInstruction(codeGen, TR::InstOpCode::getAddHalfWordImmOpCode(), node, baseReg, baseOffset, cursor);
997
}
998
else // Large frame situation
999
{
1000
cursor = generateS390ImmToRegister(codeGen, node, baseReg, (intptr_t)(baseOffset), cursor);
1001
cursor = generateRRInstruction(codeGen, TR::InstOpCode::getAddRegOpCode(), node, baseReg, srcReg, cursor);
1002
}
1003
baseOffset = 0;
1004
}
1005
1006
MemClearConstLenMacroOp op(node, node, codeGen, numBytes);
1007
return op.generate(baseReg, baseReg, indexReg, itersReg, baseOffset, cursor);
1008
}
1009
1010
int32_t
1011
J9::Z::PrivateLinkage::calculateRegisterSaveSize(TR::RealRegister::RegNum firstUsedReg,
1012
TR::RealRegister::RegNum lastUsedReg,
1013
int32_t &registerSaveDescription,
1014
int32_t &numIntSaved, int32_t &numFloatSaved)
1015
{
1016
int32_t regSaveSize = 0;
1017
// set up registerSaveDescription which looks the following
1018
//
1019
// 00000000 offsetfrombp 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1020
// gpr15 gpr0
1021
//
1022
// The bit is set to 1 if the register is saved.
1023
int32_t i;
1024
if (lastUsedReg != TR::RealRegister::NoReg)
1025
{
1026
for (i = firstUsedReg ; i <= lastUsedReg ; ++i)
1027
{
1028
registerSaveDescription |= 1 << (i - 1);
1029
numIntSaved++;
1030
}
1031
}
1032
1033
#if defined(ENABLE_PRESERVED_FPRS)
1034
for (i = TR::RealRegister::FPR8 ; i <= TR::RealRegister::FPR15 ; ++i)
1035
{
1036
if ((getRealRegister(i))->getHasBeenAssignedInMethod())
1037
{
1038
numFloatSaved++;
1039
}
1040
}
1041
#endif
1042
1043
// calculate stackFramesize
1044
regSaveSize += numIntSaved * cg()->machine()->getGPRSize() +
1045
numFloatSaved * cg()->machine()->getFPRSize();
1046
1047
1048
int32_t firstLocalOffset = getOffsetToFirstLocal();
1049
int32_t localSize = -1 * (int32_t) (comp()->getJittedMethodSymbol()->getLocalMappingCursor()); // Auto+Spill size
1050
1051
return regSaveSize;
1052
}
1053
1054
int32_t
1055
J9::Z::PrivateLinkage::setupLiteralPoolRegister(TR::Snippet *firstSnippet)
1056
{
1057
// setup literal pool register if needed
1058
// on freeway:
1059
// LARL r6, i2 <- where i2 = (addr of lit. pool-current addr)/2
1060
//
1061
// on non freeway:
1062
// BRAS r6, 4
1063
// <lit. pool addr>
1064
// L r6, 0(r6)
1065
1066
if (!cg()->isLiteralPoolOnDemandOn() && firstSnippet != NULL)
1067
{
1068
// The immediate operand will be patched when the actual address of the literal pool is known
1069
if (cg()->anyLitPoolSnippets())
1070
{
1071
return getLitPoolRealRegister()->getRegisterNumber();
1072
}
1073
}
1074
1075
return -1;
1076
}
1077
1078
////////////////////////////////////////////////////////////////////////////////
1079
// TS_390PrivateLinkage::createPrologue() - create prolog for private linkage
1080
////////////////////////////////////////////////////////////////////////////////
1081
void
1082
J9::Z::PrivateLinkage::createPrologue(TR::Instruction * cursor)
1083
{
1084
TR::RealRegister * spReg = getStackPointerRealRegister();
1085
TR::RealRegister * lpReg = getLitPoolRealRegister();
1086
TR::RealRegister * epReg = getEntryPointRealRegister();
1087
TR::Snippet * firstSnippet = NULL;
1088
TR::Node * firstNode = comp()->getStartTree()->getNode();
1089
int32_t size = 0, argSize = 0, regSaveSize = 0, numIntSaved = 0, numFloatSaved = 0;
1090
int32_t registerSaveDescription = 0;
1091
int32_t firstLocalOffset = getOffsetToFirstLocal();
1092
int32_t i;
1093
TR::ResolvedMethodSymbol * bodySymbol = comp()->getJittedMethodSymbol();
1094
int32_t localSize = -1 * (int32_t) (bodySymbol->getLocalMappingCursor()); // Auto+Spill size
1095
1096
// look for registers that need to be saved
1097
// Look between R6-R11
1098
//
1099
TR::RealRegister::RegNum firstUsedReg = getFirstSavedRegister(TR::RealRegister::GPR6,
1100
TR::RealRegister::GPR12);
1101
TR::RealRegister::RegNum lastUsedReg = getLastSavedRegister(TR::RealRegister::GPR6,
1102
TR::RealRegister::GPR12);
1103
1104
// compute the register save area
1105
regSaveSize = calculateRegisterSaveSize(firstUsedReg, lastUsedReg,
1106
registerSaveDescription,
1107
numIntSaved, numFloatSaved);
1108
1109
if (0 && comp()->target().is64Bit())
1110
{
1111
argSize = cg()->getLargestOutgoingArgSize() * 2 + getOffsetToFirstParm();
1112
}
1113
else
1114
{
1115
argSize = cg()->getLargestOutgoingArgSize() + getOffsetToFirstParm();
1116
}
1117
size = regSaveSize + localSize + argSize;
1118
1119
// TODO: Rename this option to "disableStackAlignment" as we can align to more than doubleword now
1120
if (!comp()->getOption(TR_DisableDoubleWordStackAlignment))
1121
{
1122
traceMsg(comp(), "Before stack alignment Framesize = %d, localSize = %d\n", size, localSize);
1123
1124
uint32_t stackFrameAlignment = std::max(1 << TR::Compiler->om.compressedReferenceShift(), 8);
1125
1126
// Represents the smallest non-negative x such that (size + x) % stackFrameAlignment == 0
1127
int32_t distanceToAlignment = (stackFrameAlignment - (size % stackFrameAlignment)) % stackFrameAlignment;
1128
1129
localSize += distanceToAlignment;
1130
1131
// Recompute the size with the new (potentially) updated localSize
1132
size = regSaveSize + localSize + argSize;
1133
1134
traceMsg(comp(), "After stack alignment Framesize = %d, localSize = %d\n", size, localSize);
1135
}
1136
1137
// Check for large stack
1138
bool largeStack = (size<MIN_IMMEDIATE_VAL || size>MAX_IMMEDIATE_VAL);
1139
1140
if (comp()->getOption(TR_TraceCG))
1141
{
1142
traceMsg(comp(), "\n regSaveSize = %d localSize = %d argSize = %d firstLocalOffset = %d \n",regSaveSize,localSize,argSize,firstLocalOffset);
1143
traceMsg(comp(), " Framesize = %d \n",size);
1144
}
1145
1146
TR_ASSERT( ((int32_t) size % 4 == 0), "misaligned stack detected");
1147
1148
setOffsetToRegSaveArea(argSize);
1149
1150
registerSaveDescription |= (localSize + firstLocalOffset + regSaveSize) << 16;
1151
1152
cg()->setRegisterSaveDescription(registerSaveDescription);
1153
1154
cg()->setFrameSizeInBytes(size + firstLocalOffset);
1155
1156
1157
int32_t offsetToLongDisp = size - getOffsetToLongDispSlot();
1158
setOffsetToLongDispSlot(offsetToLongDisp);
1159
if (comp()->getOption(TR_TraceCG))
1160
{
1161
traceMsg(comp(), "\n\nOffsetToLongDispSlot = %d\n", offsetToLongDisp);
1162
}
1163
1164
// Is GPR14 ever used? If not, we can avoid
1165
//
1166
// setRaContextSaveNeeded((getRealRegister(TR::RealRegister::GPR14))->getHasBeenAssignedInMethod());
1167
1168
// We assume frame size is less than 32k
1169
//TR_ASSERT(size<=MAX_IMMEDIATE_VAL,
1170
// "J9::Z::PrivateLinkage::createPrologue -- Frame size (0x%x) greater than 0x7FFF\n",size);
1171
1172
TR::MemoryReference * retAddrMemRef = NULL;
1173
1174
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
1175
//
1176
// I M P O R T A N T!
1177
//
1178
// when recovering from a failed recompile, for sampling, any patching
1179
// must be
1180
// reversed. The reversal code assumes that STY R14,-[4,8](r5) is
1181
// generated for trex, and a nop. If this ever changes,
1182
// TR::Recompilation::methodCannotBeRecompiled must be updated.
1183
//
1184
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
1185
1186
TR::RealRegister * tempReg = getRealRegister(TR::RealRegister::GPR0);
1187
1188
setFirstPrologueInstruction(cursor);
1189
static bool prologTuning = (feGetEnv("TR_PrologTuning")!=NULL);
1190
1191
if (prologTuning)
1192
{
1193
retAddrMemRef = generateS390MemoryReference(spReg, size - cg()->machine()->getGPRSize(), cg());
1194
}
1195
else
1196
{
1197
int32_t offset = cg()->machine()->getGPRSize() * -1;
1198
retAddrMemRef = generateS390MemoryReference(spReg, offset, cg());
1199
cursor = generateRXInstruction(cg(), TR::InstOpCode::getExtendedStoreOpCode(), firstNode, getRealRegister(getReturnAddressRegister()),
1200
retAddrMemRef, cursor);
1201
}
1202
1203
// adjust java stack frame pointer
1204
if (largeStack)
1205
{
1206
cursor = generateS390ImmToRegister(cg(), firstNode, tempReg, (intptr_t)(size * -1), cursor);
1207
cursor = generateRRInstruction(cg(), TR::InstOpCode::getAddRegOpCode(), firstNode, spReg, tempReg, cursor);
1208
}
1209
else
1210
{
1211
// Adjust stack pointer with LA (reduce AGI delay)
1212
cursor = generateRXInstruction(cg(), TR::InstOpCode::LAY, firstNode, spReg, generateS390MemoryReference(spReg,(size) * -1, cg()),cursor);
1213
}
1214
1215
if (!comp()->isDLT())
1216
{
1217
// Check stackoverflow /////////////////////////////////////
1218
//Load the stack limit in a temporary reg ( use R14, as it is killed later anyways )
1219
TR::RealRegister * stackLimitReg = getRealRegister(TR::RealRegister::GPR14);
1220
TR::RealRegister * mdReg = getMethodMetaDataRealRegister();
1221
TR::MemoryReference * stackLimitMR = generateS390MemoryReference(mdReg, cg()->getStackLimitOffset(), cg());
1222
1223
// Compare stackLimit and currentStackPointer
1224
cursor = generateRXInstruction(cg(), TR::InstOpCode::getCmpLogicalOpCode(), firstNode, spReg, stackLimitMR, cursor);
1225
1226
// Call stackOverflow helper, if stack limit is less than current Stack pointer. (Stack grows downwards)
1227
TR::LabelSymbol * stackOverflowSnippetLabel = generateLabelSymbol(cg());
1228
TR::LabelSymbol * reStartLabel = generateLabelSymbol(cg());
1229
1230
//Call Stack overflow helper
1231
cursor = generateS390BranchInstruction(cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BL, firstNode, stackOverflowSnippetLabel, cursor);
1232
1233
TR::SymbolReference * stackOverflowRef = comp()->getSymRefTab()->findOrCreateStackOverflowSymbolRef(comp()->getJittedMethodSymbol());
1234
1235
TR::Snippet * snippet =
1236
new (trHeapMemory()) TR::S390StackCheckFailureSnippet(cg(), firstNode, reStartLabel, stackOverflowSnippetLabel, stackOverflowRef, size - cg()->machine()->getGPRSize());
1237
1238
cg()->addSnippet(snippet);
1239
1240
// The stack overflow helper returns back here
1241
cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, firstNode, reStartLabel, cursor);
1242
}
1243
1244
// End of stack overflow checking code ////////////////////////
1245
static bool bppoutline = (feGetEnv("TR_BPRP_Outline")!=NULL);
1246
1247
if (bppoutline && cg()->_outlineCall._frequency != -1)
1248
{
1249
cursor = new (cg()->trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, firstNode, epReg, (cg()->_outlineCall._callSymRef)->getSymbol(),(cg()->_outlineCall._callSymRef), cursor, cg());
1250
1251
TR::MemoryReference * tempMR = generateS390MemoryReference(epReg, 0, cg());
1252
cursor = generateS390BranchPredictionPreloadInstruction(cg(), TR::InstOpCode::BPP, firstNode, cg()->_outlineCall._callLabel, (int8_t) 0xD, tempMR, cursor);
1253
}
1254
if (bppoutline && cg()->_outlineArrayCall._frequency != -1)
1255
{
1256
cursor = new (cg()->trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, firstNode, epReg, (cg()->_outlineArrayCall._callSymRef)->getSymbol(),(cg()->_outlineArrayCall._callSymRef), cursor, cg());
1257
1258
TR::MemoryReference * tempMR = generateS390MemoryReference(epReg, 0, cg());
1259
cursor = generateS390BranchPredictionPreloadInstruction(cg(), TR::InstOpCode::BPP, firstNode, cg()->_outlineArrayCall._callLabel, (int8_t) 0xD, tempMR, cursor);
1260
}
1261
1262
if (cg()->getSupportsRuntimeInstrumentation())
1263
cursor = TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RION, firstNode, cursor, true);
1264
1265
1266
// save registers that are used by this method
1267
int32_t disp = argSize;
1268
TR::MemoryReference * rsa ;
1269
1270
// save GPRs
1271
if (lastUsedReg != TR::RealRegister::NoReg)
1272
{
1273
rsa = generateS390MemoryReference(spReg, disp, cg());
1274
1275
if (firstUsedReg != lastUsedReg)
1276
{
1277
cursor = generateRSInstruction(cg(), TR::InstOpCode::getStoreMultipleOpCode(), firstNode, getRealRegister(firstUsedReg), getRealRegister(lastUsedReg), rsa, cursor);
1278
}
1279
else
1280
{
1281
cursor = generateRXInstruction(cg(), TR::InstOpCode::getStoreOpCode(), firstNode, getRealRegister(firstUsedReg), rsa, cursor);
1282
}
1283
}
1284
disp += numIntSaved * cg()->machine()->getGPRSize();
1285
1286
#if defined(ENABLE_PRESERVED_FPRS)
1287
//save FPRs
1288
for (i = TR::RealRegister::FPR8 ; i <= TR::RealRegister::FPR15 ; ++i)
1289
{
1290
if ((getRealRegister(i))->getHasBeenAssignedInMethod())
1291
{
1292
cursor = generateRXInstruction(cg(), TR::InstOpCode::STD, firstNode, getRealRegister(i), generateS390MemoryReference(spReg, disp, cg()),
1293
cursor);
1294
disp += cg()->machine()->getFPRSize();
1295
}
1296
}
1297
#endif
1298
1299
if (prologTuning)
1300
{
1301
if ( size>=MAXLONGDISP )
1302
{
1303
cursor = generateS390ImmToRegister(cg(), firstNode, epReg, (intptr_t)(retAddrMemRef->getOffset()), cursor);
1304
retAddrMemRef->setOffset(0);
1305
retAddrMemRef->setDispAdjusted();
1306
retAddrMemRef->setIndexRegister(epReg);
1307
}
1308
// Save return address(R14) on stack
1309
cursor = generateRXInstruction(cg(), TR::InstOpCode::getStoreOpCode(), firstNode, getRealRegister(getReturnAddressRegister()), retAddrMemRef, cursor);
1310
}
1311
1312
1313
// initialize local objects
1314
TR::GCStackAtlas * atlas = cg()->getStackAtlas();
1315
if (atlas)
1316
{
1317
// for large copies, we can use the literal pool reg as a temp
1318
// (for >4096 clearing) when it is implemented
1319
1320
// The GC stack maps are conservative in that they all say that
1321
// collectable locals are live. This means that these locals must be
1322
// cleared out in case a GC happens before they are allocated a valid
1323
// value.
1324
// The atlas contains the number of locals that need to be cleared. They
1325
// are all mapped together starting at GC index 0.
1326
//
1327
uint32_t numLocalsToBeInitialized = atlas->getNumberOfSlotsToBeInitialized();
1328
if (numLocalsToBeInitialized > 0 || atlas->getInternalPointerMap())
1329
{
1330
int32_t offsetLcls = atlas->getLocalBaseOffset() + firstLocalOffset;
1331
TR::RealRegister * tmpReg = getReturnAddressRealRegister();
1332
TR::RealRegister * itersReg = getRealRegister(TR::RealRegister::GPR0);
1333
1334
int32_t initbytes = cg()->machine()->getGPRSize() * numLocalsToBeInitialized;
1335
1336
//printf("\ncollected reference: init %d bytes at offset %d\n", initbytes, size+offsetLcls);
1337
1338
cursor = initStg(cg(), firstNode, tmpReg, spReg, itersReg, size + offsetLcls, initbytes, cursor);
1339
if (atlas->getInternalPointerMap())
1340
{
1341
int32_t offsetIntPtr = atlas->getOffsetOfFirstInternalPointer() + firstLocalOffset;
1342
1343
// Total number of slots to be initialized is number of pinning arrays +
1344
// number of derived internal pointer stack slots
1345
//
1346
int32_t initbytes = (atlas->getNumberOfDistinctPinningArrays() +
1347
atlas->getInternalPointerMap()->getNumInternalPointers()) * cg()->machine()->getGPRSize();
1348
1349
//printf("\ninternal pointer: init %d bytes at offset %d\n", initbytes, size+offsetIntPtr);
1350
1351
cursor = initStg(cg(), firstNode, tmpReg, spReg, itersReg, size + offsetIntPtr, initbytes, cursor);
1352
}
1353
}
1354
}
1355
1356
firstSnippet = cg()->getFirstSnippet();
1357
if (setupLiteralPoolRegister(firstSnippet) > 0)
1358
{
1359
cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, firstNode, lpReg, firstSnippet, cursor, cg());
1360
}
1361
1362
ListIterator<TR::AutomaticSymbol> variableSizeSymIterator(&bodySymbol->getVariableSizeSymbolList());
1363
TR::AutomaticSymbol * variableSizeSymCursor = variableSizeSymIterator.getFirst();
1364
1365
while (variableSizeSymCursor != NULL)
1366
{
1367
TR_ASSERT(variableSizeSymCursor->isVariableSizeSymbol(), "Should be variable sized");
1368
variableSizeSymCursor->setOffset(variableSizeSymCursor->getOffset() + size);
1369
variableSizeSymCursor = variableSizeSymIterator.getNext();
1370
}
1371
ListIterator<TR::AutomaticSymbol> automaticIterator(&bodySymbol->getAutomaticList());
1372
TR::AutomaticSymbol * localCursor = automaticIterator.getFirst();
1373
1374
while (localCursor != NULL)
1375
{
1376
localCursor->setOffset(localCursor->getOffset() + size);
1377
localCursor = automaticIterator.getNext();
1378
}
1379
1380
ListIterator<TR::ParameterSymbol> parameterIterator(&bodySymbol->getParameterList());
1381
TR::ParameterSymbol * parmCursor = parameterIterator.getFirst();
1382
while (parmCursor != NULL)
1383
{
1384
parmCursor->setParameterOffset(parmCursor->getParameterOffset() + size);
1385
parmCursor = parameterIterator.getNext();
1386
}
1387
1388
// Save or move arguments according to the result of register assignment.
1389
cursor = (TR::Instruction *) saveArguments(cursor, false);
1390
1391
static const bool prefetchStack = feGetEnv("TR_PrefetchStack") != NULL;
1392
if (cg()->isPrefetchNextStackCacheLine() && prefetchStack)
1393
{
1394
cursor = generateRXInstruction(cg(), TR::InstOpCode::PFD, firstNode, 2, generateS390MemoryReference(spReg, -256, cg()), cursor);
1395
}
1396
1397
// Cold Eyecatcher is used for padding of endPC so that Return Address for exception snippets will never equal the endPC.
1398
// -> stackwalker assumes valid RA must be < endPC (not <= endPC).
1399
cg()->CreateEyeCatcher(firstNode);
1400
setLastPrologueInstruction(cursor);
1401
}
1402
1403
1404
////////////////////////////////////////////////////////////////////////////////
1405
// TS_390PrivateLinkage::createEpilog() - create epilog for private linkage
1406
//
1407
// Here is the sample epilog that we are currently generated
1408
//
1409
// 10 c0 d0 00 LM GPR6, GPR15, 40(,GPR11)
1410
// 47 00 b0 00 BC GPR14
1411
////////////////////////////////////////////////////////////////////////////////
1412
void
1413
J9::Z::PrivateLinkage::createEpilogue(TR::Instruction * cursor)
1414
{
1415
TR::RealRegister * spReg = getRealRegister(getStackPointerRegister());
1416
TR::Node * currentNode = cursor->getNode();
1417
TR::Node * nextNode = cursor->getNext()->getNode();
1418
TR::ResolvedMethodSymbol * bodySymbol = comp()->getJittedMethodSymbol();
1419
uint32_t size = bodySymbol->getLocalMappingCursor();
1420
int32_t frameSize = cg()->getFrameSizeInBytes();
1421
int32_t i, offset = 0;
1422
TR::MemoryReference * rsa;
1423
TR::RealRegister::RegNum lastUsedReg, firstUsedReg;
1424
TR::RegisterDependencyConditions * dep;
1425
TR::RealRegister * tempReg = getRealRegister(TR::RealRegister::GPR0);
1426
TR::RealRegister * epReg = getRealRegister(getEntryPointRegister());
1427
int32_t blockNumber = -1;
1428
1429
bool enableBranchPreload = cg()->supportsBranchPreload();
1430
1431
dep = cursor->getNext()->getDependencyConditions();
1432
offset = getOffsetToRegSaveArea();
1433
1434
// Do Return Address restore
1435
uint32_t adjustSize = frameSize - getOffsetToFirstLocal();
1436
1437
static const char *disableRARestoreOpt = feGetEnv("TR_DisableRAOpt");
1438
1439
// Any one of these conditions will force us to restore RA
1440
bool restoreRA = disableRARestoreOpt ||
1441
!(performTransformation(comp(), "O^O No need to restore RAREG in epilog\n")) ||
1442
getRealRegister(getReturnAddressRegister())->getHasBeenAssignedInMethod() ||
1443
cg()->canExceptByTrap() ||
1444
cg()->getExitPointsInMethod() ||
1445
bodySymbol->isEHAware() ||
1446
comp()->getOption(TR_FullSpeedDebug); // CMVC 195232 - FSD can modify RA slot at a GC point.
1447
setRaContextRestoreNeeded(restoreRA);
1448
1449
if (getRaContextRestoreNeeded())
1450
{
1451
cursor = generateRXInstruction(cg(), TR::InstOpCode::getExtendedLoadOpCode(), nextNode,
1452
getRealRegister(getReturnAddressRegister()),
1453
generateS390MemoryReference(spReg, frameSize, cg()), cursor);
1454
}
1455
else
1456
{
1457
if (comp()->getOption(TR_TraceCG))
1458
traceMsg(comp(), "No RAREG context restore needed in Epilog\n");
1459
}
1460
1461
if (enableBranchPreload && (cursor->getNext() == cg()->_hottestReturn._returnInstr))
1462
{
1463
if (cg()->_hottestReturn._frequency > 6 && cg()->_hottestReturn._insertBPPInEpilogue)
1464
{
1465
cg()->_hottestReturn._returnLabel = generateLabelSymbol(cg());
1466
TR::MemoryReference * tempMR = generateS390MemoryReference(getRealRegister(getReturnAddressRegister()), 0, cg());
1467
cursor = generateS390BranchPredictionPreloadInstruction(cg(), TR::InstOpCode::BPP, nextNode, cg()->_hottestReturn._returnLabel, (int8_t) 0x6, tempMR, cursor);
1468
cg()->_hottestReturn._insertBPPInEpilogue = false;
1469
}
1470
}
1471
1472
// Restore GPRs
1473
firstUsedReg = getFirstRestoredRegister(TR::RealRegister::GPR6, TR::RealRegister::GPR12);
1474
lastUsedReg = getLastRestoredRegister(TR::RealRegister::GPR6, TR::RealRegister::GPR12);
1475
rsa = generateS390MemoryReference(spReg, offset, cg());
1476
1477
if (lastUsedReg != TR::RealRegister::NoReg)
1478
{
1479
if (firstUsedReg != lastUsedReg)
1480
{
1481
cursor = restorePreservedRegs(firstUsedReg, lastUsedReg, blockNumber, cursor, nextNode, spReg, rsa, getStackPointerRegister());
1482
}
1483
else
1484
{
1485
cursor = generateRXInstruction(cg(), TR::InstOpCode::getLoadOpCode(), nextNode, getRealRegister(firstUsedReg), rsa, cursor);
1486
}
1487
offset += cg()->machine()->getGPRSize() * (lastUsedReg - firstUsedReg + 1);
1488
}
1489
1490
#if defined(ENABLE_PRESERVED_FPRS)
1491
//Load FPRs
1492
for (i = TR::RealRegister::FPR8 ; i <= TR::RealRegister::FPR15 ; ++i)
1493
{
1494
if ((getRealRegister(i))->getHasBeenAssignedInMethod())
1495
{
1496
cursor = generateRXInstruction(cg(), TR::InstOpCode::LD, currentNode, getRealRegister(i),
1497
generateS390MemoryReference(spReg, offset, cg()), cursor);
1498
offset += cg()->machine()->getFPRSize();
1499
}
1500
}
1501
#endif
1502
1503
// Pop frame
1504
// use LA/LAY to add immediate through displacement
1505
if (adjustSize < MAXDISP)
1506
{
1507
cursor = generateRXInstruction(cg(), TR::InstOpCode::LA, nextNode, spReg, generateS390MemoryReference(spReg,adjustSize,cg()),cursor);
1508
}
1509
else if (adjustSize<MAXLONGDISP)
1510
{
1511
cursor = generateRXInstruction(cg(), TR::InstOpCode::LAY, nextNode, spReg, generateS390MemoryReference(spReg,adjustSize,cg()),cursor);
1512
}
1513
else
1514
{
1515
cursor = generateS390ImmToRegister(cg(), nextNode, tempReg, (intptr_t)(adjustSize), cursor);
1516
cursor = generateRRInstruction(cg(), TR::InstOpCode::getAddRegOpCode(), nextNode, spReg, tempReg, cursor);
1517
}
1518
1519
// Add RIOFF on Epilogue before we leave the JIT
1520
if (cg()->getSupportsRuntimeInstrumentation())
1521
cursor = TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RIOFF, currentNode, cursor, true);
1522
1523
1524
if (enableBranchPreload)
1525
{
1526
if (cursor->getNext() == cg()->_hottestReturn._returnInstr)
1527
{
1528
if (cg()->_hottestReturn._frequency > 6)
1529
{
1530
cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, currentNode, cg()->_hottestReturn._returnLabel, cursor);
1531
}
1532
}
1533
}
1534
1535
cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, currentNode, getRealRegister(getReturnAddressRegister()), cursor);
1536
((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BCR);
1537
1538
}
1539
1540
////////////////////////////////////////////////////////////////////////////////
1541
// J9::Z::PrivateLinkage::buildVirtualDispatch - build virtual function call
1542
////////////////////////////////////////////////////////////////////////////////
1543
void
1544
J9::Z::PrivateLinkage::buildVirtualDispatch(TR::Node * callNode, TR::RegisterDependencyConditions * dependencies,
1545
TR::Register * vftReg, uint32_t sizeOfArguments)
1546
{
1547
TR::RegisterDependencyGroup * Dgroup = dependencies->getPreConditions();
1548
TR::SymbolReference * methodSymRef = callNode->getSymbolReference();
1549
TR::MethodSymbol * methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();
1550
TR::LabelSymbol * vcallLabel = generateLabelSymbol(cg());
1551
TR::Instruction * gcPoint = NULL;
1552
TR::Snippet *unresolvedSnippet = NULL;
1553
TR_Debug * debugObj = cg()->getDebug();
1554
1555
TR_ResolvedMethod * profiledMethod = NULL;
1556
TR_OpaqueClassBlock *profiledClass = NULL;
1557
bool useProfiledValues = false;
1558
1559
if (comp()->getOption(TR_TraceCG))
1560
traceMsg(comp(), "Build Virtual Dispatch\n");
1561
1562
if ((methodSymbol && !methodSymbol->isComputed()) &&
1563
(comp()->getPersistentInfo()->isRuntimeInstrumentationEnabled()) &&
1564
(comp()->getOption(TR_EnableRIEMIT)))
1565
{
1566
TR::Instruction *emitInstruction = generateRIInstruction(cg(), TR::InstOpCode::RIEMIT, callNode, vftReg, 0);
1567
comp()->addHWPValueProfileInstruction(emitInstruction);
1568
}
1569
1570
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());
1571
1572
// Generate and register a thunk for a resolved virtual function
1573
void *virtualThunk;
1574
if (methodSymbol && methodSymbol->isComputed())
1575
{
1576
switch (methodSymbol->getMandatoryRecognizedMethod())
1577
{
1578
case TR::java_lang_invoke_ComputedCalls_dispatchVirtual:
1579
case TR::com_ibm_jit_JITHelpers_dispatchVirtual:
1580
{
1581
char *j2iSignature = fej9->getJ2IThunkSignatureForDispatchVirtual(methodSymbol->getMethod()->signatureChars(), methodSymbol->getMethod()->signatureLength(), comp());
1582
int32_t signatureLen = strlen(j2iSignature);
1583
virtualThunk = fej9->getJ2IThunk(j2iSignature, signatureLen, comp());
1584
if (!virtualThunk)
1585
{
1586
virtualThunk = fej9->setJ2IThunk(j2iSignature, signatureLen,
1587
TR::S390J9CallSnippet::generateVIThunk(
1588
fej9->getEquivalentVirtualCallNodeForDispatchVirtual(callNode, comp()), sizeOfArguments, cg()), comp()); // TODO:JSR292: Is this the right sizeOfArguments?
1589
}
1590
}
1591
break;
1592
default:
1593
if (fej9->needsInvokeExactJ2IThunk(callNode, comp()))
1594
{
1595
TR_J2IThunk *thunk = TR::S390J9CallSnippet::generateInvokeExactJ2IThunk(callNode, sizeOfArguments, methodSymbol->getMethod()->signatureChars(), cg());
1596
fej9->setInvokeExactJ2IThunk(thunk, comp());
1597
}
1598
break;
1599
}
1600
}
1601
else
1602
{
1603
virtualThunk = fej9->getJ2IThunk(methodSymbol->getMethod(), comp());
1604
if (!virtualThunk)
1605
virtualThunk = fej9->setJ2IThunk(methodSymbol->getMethod(), TR::S390J9CallSnippet::generateVIThunk(callNode, sizeOfArguments, cg()), comp());
1606
}
1607
1608
if (methodSymbol->isVirtual() && (!methodSymRef->isUnresolved() && !comp()->compileRelocatableCode()))
1609
{
1610
TR_ResolvedMethod * rsm = methodSymbol->castToResolvedMethodSymbol()->getResolvedMethod();
1611
1612
// Simple heuristic to determine when to prefetch the next cache line in method prologue.
1613
// We check the J9ROMMethod of the non-cold callsite to estimate how big of a stack
1614
// frame will be required for the call.
1615
if (!(cg()->getCurrentEvaluationTreeTop()->getEnclosingBlock()->isCold()) &&
1616
(rsm->numberOfParameterSlots() + rsm->numberOfTemps()) > 5)
1617
{
1618
cg()->setPrefetchNextStackCacheLine(true);
1619
}
1620
}
1621
1622
if (cg()->getSupportsRuntimeInstrumentation())
1623
TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RIOFF, callNode);
1624
1625
if (methodSymbol->isVirtual())
1626
{
1627
TR::Instruction * cursor = NULL;
1628
bool performGuardedDevirtualization = false;
1629
TR::LabelSymbol * virtualLabel = NULL;
1630
TR::LabelSymbol * doneVirtualLabel = generateLabelSymbol(cg());
1631
int32_t offset = comp()->compileRelocatableCode() ? 0: methodSymRef->getOffset();
1632
1633
if (comp()->getOption(TR_TraceCG))
1634
traceMsg(comp(), "Virtual call with offset %d\n", offset);
1635
1636
// We split dependencies to make sure the RA doesn't insert any register motion code in the fixed
1637
// block sequence.
1638
//
1639
TR::RegisterDependencyConditions * preDeps = new (trHeapMemory())
1640
TR::RegisterDependencyConditions(dependencies->getPreConditions(), NULL,
1641
dependencies->getAddCursorForPre(), 0, cg());
1642
1643
// Add the ThisReg to the postDeps to avoid seeing a SPILL inserted between the resolution code
1644
// and the VTABLE. This sequence is assumed to be fixed length.
1645
// Added one more slot for the post dep that might be added in buildDirectCall
1646
//
1647
TR::RegisterDependencyConditions * postDepsTemp = new (trHeapMemory())
1648
TR::RegisterDependencyConditions(NULL, dependencies->getPostConditions(), 0,
1649
dependencies->getAddCursorForPost(), cg());
1650
TR::RegisterDependencyConditions * postDeps = new (trHeapMemory())
1651
TR::RegisterDependencyConditions(postDepsTemp,0,4, cg());
1652
1653
// Search ARG Deps for vregs used for RA/EP and this
1654
//
1655
TR::Register * RegZero = dependencies->searchPostConditionRegister(TR::RealRegister::GPR0);
1656
TR::Register * RegThis = dependencies->searchPreConditionRegister(TR::RealRegister::GPR1);
1657
TR::Register * RegRA = dependencies->searchPostConditionRegister(getReturnAddressRegister());
1658
1659
// Check the thisChild to see if anyone uses this object after the call (if not, we won't add it to post Deps)
1660
if (callNode->getChild(callNode->getFirstArgumentIndex())->getReferenceCount() > 0)
1661
{
1662
postDeps->addPostCondition(RegThis, TR::RealRegister::AssignAny);
1663
}
1664
1665
if (methodSymRef->isUnresolved() || comp()->compileRelocatableCode())
1666
{
1667
if (comp()->getOption(TR_TraceCG))
1668
traceMsg(comp(), "... virtual call is unresolved\n");
1669
1670
// TODO: Task 124512. Fix picbuilder register preservation before
1671
// moving this vft register dependency to BASR pre-deps.
1672
postDeps->addPostConditionIfNotAlreadyInserted(vftReg, TR::RealRegister::AssignAny);
1673
1674
// Emit the resolve snippet and BRASL to call it
1675
//
1676
TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg());
1677
unresolvedSnippet = new (trHeapMemory()) TR::S390VirtualUnresolvedSnippet(cg(), callNode, snippetLabel, sizeOfArguments, virtualThunk);
1678
cg()->addSnippet(unresolvedSnippet);
1679
//generateSnippetCall extracts preDeps from dependencies and puts them on BRASL
1680
TR::Instruction * gcPoint =
1681
generateSnippetCall(cg(), callNode, unresolvedSnippet, dependencies, methodSymRef);
1682
gcPoint->setNeedsGCMap(getPreservedRegisterMapForGC());
1683
}
1684
else
1685
{
1686
if (comp()->getOption(TR_TraceCG))
1687
traceMsg(comp(), "...call resolved\n");
1688
1689
TR::ResolvedMethodSymbol * resolvedSymbol = methodSymRef->getSymbol()->getResolvedMethodSymbol();
1690
TR_ResolvedMethod * resolvedMethod = resolvedSymbol ? resolvedSymbol->getResolvedMethod() : 0;
1691
1692
if ((comp()->performVirtualGuardNOPing() && comp()->isVirtualGuardNOPingRequired()))
1693
{
1694
TR_VirtualGuard * virtualGuard;
1695
1696
if (resolvedMethod &&
1697
!resolvedMethod->isInterpreted() &&
1698
!callNode->isTheVirtualCallNodeForAGuardedInlinedCall())
1699
{
1700
if (!resolvedMethod->virtualMethodIsOverridden() && !resolvedMethod->isAbstract())
1701
{
1702
1703
performGuardedDevirtualization = true;
1704
1705
// Build guarded devirtualization dispatch.
1706
//
1707
virtualGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_NonoverriddenGuard,
1708
comp(), callNode);
1709
if (comp()->getOption(TR_TraceCG))
1710
{
1711
traceMsg(comp(), "Emit new Non-Overridden guard for call %s (%x) in %s\n", resolvedMethod->signature(trMemory()), callNode,
1712
comp()->signature());
1713
}
1714
}
1715
else
1716
{
1717
TR_OpaqueClassBlock * thisClass = resolvedMethod->containingClass();
1718
TR_DevirtualizedCallInfo * devirtualizedCallInfo = comp()->findDevirtualizedCall(callNode);
1719
TR_OpaqueClassBlock * refinedThisClass = 0;
1720
1721
if (devirtualizedCallInfo)
1722
{
1723
refinedThisClass = devirtualizedCallInfo->_thisType;
1724
if (comp()->getOption(TR_TraceCG))
1725
{
1726
traceMsg(comp(), "Found refined this class info %x for call %x in %s\n", refinedThisClass, callNode,
1727
comp()->signature());
1728
}
1729
if (refinedThisClass)
1730
{
1731
thisClass = refinedThisClass;
1732
}
1733
}
1734
1735
TR_PersistentCHTable * chTable = comp()->getPersistentInfo()->getPersistentCHTable();
1736
/* Devirtualization is not currently supported for AOT compilations */
1737
if (thisClass && TR::Compiler->cls.isAbstractClass(comp(), thisClass) && !comp()->compileRelocatableCode())
1738
{
1739
TR_ResolvedMethod * method = chTable->findSingleAbstractImplementer(thisClass, methodSymRef->getOffset(),
1740
methodSymRef->getOwningMethod(comp()), comp());
1741
if (method &&
1742
(comp()->isRecursiveMethodTarget(method) || !method->isInterpreted() || method->isJITInternalNative()))
1743
{
1744
performGuardedDevirtualization = true;
1745
resolvedMethod = method;
1746
virtualGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_AbstractGuard,
1747
comp(), callNode);
1748
if (comp()->getOption(TR_TraceCG))
1749
{
1750
traceMsg(comp(), "Emit new ABSTRACT guard for call %s (%x) in %s\n", resolvedMethod->signature(trMemory()), callNode,
1751
comp()->signature());
1752
}
1753
}
1754
}
1755
else if (refinedThisClass && !chTable->isOverriddenInThisHierarchy(resolvedMethod, refinedThisClass,
1756
methodSymRef->getOffset(), comp()))
1757
{
1758
if (resolvedMethod->virtualMethodIsOverridden())
1759
{
1760
TR_ResolvedMethod * calleeMethod = methodSymRef->getOwningMethod(comp())->getResolvedVirtualMethod(comp(),
1761
refinedThisClass, methodSymRef->getOffset());
1762
if (calleeMethod &&
1763
(comp()->isRecursiveMethodTarget(calleeMethod) ||
1764
!calleeMethod->isInterpreted() ||
1765
calleeMethod->isJITInternalNative()))
1766
{
1767
performGuardedDevirtualization = true;
1768
resolvedMethod = calleeMethod;
1769
virtualGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_HierarchyGuard,
1770
comp(), callNode);
1771
1772
if (comp()->getOption(TR_TraceCG))
1773
{
1774
traceMsg(comp(), "Emit new HierarchyGuardguard for call %s (%x) in %s\n", resolvedMethod->signature(trMemory()), callNode,
1775
comp()->signature());
1776
}
1777
}
1778
}
1779
}
1780
}
1781
if (performGuardedDevirtualization && virtualGuard)
1782
{
1783
virtualLabel = vcallLabel;
1784
generateVirtualGuardNOPInstruction(cg(), callNode, virtualGuard->addNOPSite(), NULL, virtualLabel);
1785
if (comp()->getOption(TR_EnableHCR))
1786
{
1787
if (cg()->supportsMergingGuards())
1788
{
1789
virtualGuard->setMergedWithHCRGuard();
1790
}
1791
else
1792
{
1793
TR_VirtualGuard* HCRGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_HCRGuard, comp(), callNode);
1794
generateVirtualGuardNOPInstruction(cg(), callNode, HCRGuard->addNOPSite(), NULL, virtualLabel);
1795
}
1796
}
1797
}
1798
}
1799
}
1800
1801
if (!performGuardedDevirtualization &&
1802
!comp()->getOption(TR_DisableInterpreterProfiling) &&
1803
comp()->getOption(TR_enableProfiledDevirtualization) &&
1804
TR_ValueProfileInfoManager::get(comp()) && resolvedMethod
1805
)
1806
{
1807
TR_AddressInfo *valueInfo = NULL;
1808
if (!comp()->compileRelocatableCode())
1809
valueInfo = static_cast<TR_AddressInfo*>(TR_ValueProfileInfoManager::getProfiledValueInfo(callNode, comp(), AddressInfo));
1810
1811
uintptr_t topValue = valueInfo ? valueInfo->getTopValue() : 0;
1812
1813
// Is the topValue valid?
1814
if( topValue )
1815
{
1816
if( valueInfo->getTopProbability() < MIN_PROFILED_CALL_FREQUENCY ||
1817
comp()->getPersistentInfo()->isObsoleteClass((void*)topValue, fej9) )
1818
{
1819
topValue = 0;
1820
}
1821
else
1822
{
1823
TR_OpaqueClassBlock *callSiteMethodClass = methodSymRef->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod()->classOfMethod();
1824
if (!cg()->isProfiledClassAndCallSiteCompatible((TR_OpaqueClassBlock *)topValue, callSiteMethodClass))
1825
{
1826
topValue = 0;
1827
}
1828
}
1829
}
1830
1831
if ( topValue )
1832
{
1833
TR_ResolvedMethod *profiledVirtualMethod = methodSymRef->getOwningMethod(comp())->getResolvedVirtualMethod(comp(),
1834
(TR_OpaqueClassBlock *)topValue, methodSymRef->getOffset());
1835
if (profiledVirtualMethod)
1836
{
1837
if (comp()->getOption(TR_TraceCG))
1838
{
1839
traceMsg(comp(),
1840
"Profiled method {%s}\n",
1841
fej9->sampleSignature((TR_OpaqueMethodBlock *)(profiledVirtualMethod->getPersistentIdentifier()), 0, 0, comp()->trMemory()));
1842
}
1843
profiledMethod = profiledVirtualMethod;
1844
profiledClass = (TR_OpaqueClassBlock *)topValue;
1845
useProfiledValues = true;
1846
virtualLabel = vcallLabel;
1847
}
1848
}
1849
}
1850
1851
if (performGuardedDevirtualization || useProfiledValues)
1852
{
1853
if (comp()->getOption(TR_TraceCG))
1854
traceMsg(comp(), "Make direct call under devirtualization\n");
1855
1856
TR::SymbolReference * realMethodSymRef = methodSymRef;
1857
if (useProfiledValues || resolvedMethod != resolvedSymbol->getResolvedMethod())
1858
{
1859
realMethodSymRef= comp()->getSymRefTab()->findOrCreateMethodSymbol(methodSymRef->getOwningMethodIndex(),
1860
-1, (useProfiledValues)?profiledMethod:resolvedMethod, TR::MethodSymbol::Virtual);
1861
}
1862
1863
if (useProfiledValues)
1864
{
1865
TR::Instruction * unloadableConstInstr = generateRILInstruction(cg(), TR::InstOpCode::LARL, callNode, RegZero, reinterpret_cast<uintptr_t*>(profiledClass));
1866
if (fej9->isUnloadAssumptionRequired(profiledClass, comp()->getCurrentMethod()))
1867
{
1868
comp()->getStaticPICSites()->push_front(unloadableConstInstr);
1869
}
1870
generateS390CompareAndBranchInstruction(cg(), TR::InstOpCode::getCmpLogicalRegOpCode(), callNode, vftReg, RegZero, TR::InstOpCode::COND_BNE, virtualLabel);
1871
}
1872
1873
buildDirectCall(callNode, realMethodSymRef, dependencies, sizeOfArguments);
1874
1875
if (!virtualLabel)
1876
generateS390BranchInstruction(cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, callNode, doneVirtualLabel);
1877
}
1878
}
1879
1880
TR_S390OutOfLineCodeSection *outlinedSlowPath = NULL;
1881
1882
if ( virtualLabel )
1883
{
1884
traceMsg (comp(), "OOL vcall: generating Vcall dispatch sequence\n");
1885
//Using OOL but generating code manually
1886
outlinedSlowPath = new (cg()->trHeapMemory()) TR_S390OutOfLineCodeSection(vcallLabel,doneVirtualLabel,cg());
1887
cg()->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
1888
outlinedSlowPath->swapInstructionListsWithCompilation();
1889
1890
TR::Instruction * temp = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, vcallLabel);
1891
if (debugObj)
1892
{
1893
debugObj->addInstructionComment(temp, "Denotes start of OOL vcall sequence");
1894
}
1895
}
1896
1897
// load class pointer
1898
TR::Register *classReg = vftReg;
1899
1900
// It should be impossible to have a offset that can't fit in 20bit given Java method table limitations.
1901
// We assert here to insure limitation/assumption remains true. If this fires we need to fix this code
1902
// and the _virtualUnresolvedHelper() code to deal with a new worst case scenario for patching.
1903
TR_ASSERT_FATAL(offset>MINLONGDISP, "JIT VFT offset does not fit in 20bits");
1904
TR_ASSERT_FATAL(offset!=0 || unresolvedSnippet, "Offset is 0 yet unresolvedSnippet is NULL");
1905
TR_ASSERT_FATAL(offset<=MAX_IMMEDIATE_VAL, "Offset is larger then MAX_IMMEDIATE_VAL");
1906
1907
// If unresolved/AOT, this instruction will be patched by _virtualUnresolvedHelper() with the correct offset
1908
cursor = generateRXInstruction(cg(), TR::InstOpCode::getExtendedLoadOpCode(), callNode, RegRA,
1909
generateS390MemoryReference(classReg, offset, cg()));
1910
1911
if (unresolvedSnippet)
1912
{
1913
((TR::S390VirtualUnresolvedSnippet *)unresolvedSnippet)->setPatchVftInstruction(cursor);
1914
}
1915
1916
// A load immediate into R0 instruction (LHI/LGFI) MUST be generated here because the "LA" instruction used by
1917
// the VM to find VFT table entries can't handle negative displacements. For unresolved/AOT targets we must assume
1918
// the worse case (offset can't fit in 16bits). VFT offset 0 means unresolved/AOT, otherwise offset is negative.
1919
// Some special cases have positive offsets i.e. java/lang/Object.newInstancePrototype()
1920
if (!unresolvedSnippet && offset >= MIN_IMMEDIATE_VAL && offset <= MAX_IMMEDIATE_VAL) // Offset fits in 16bits
1921
{
1922
cursor = generateRIInstruction(cg(), TR::InstOpCode::getLoadHalfWordImmOpCode(), callNode, RegZero, offset);
1923
}
1924
else // if unresolved || offset can't fit in 16bits
1925
{
1926
// If unresolved/AOT, this instruction will be patched by _virtualUnresolvedHelper() with the correct offset
1927
cursor = generateRILInstruction(cg(), TR::InstOpCode::LGFI, callNode, RegZero, static_cast<int32_t>(offset));
1928
}
1929
1930
gcPoint = new (trHeapMemory()) TR::S390RRInstruction(TR::InstOpCode::BASR, callNode, RegRA, RegRA, cg());
1931
gcPoint->setDependencyConditions(preDeps);
1932
1933
if (unresolvedSnippet != NULL)
1934
(static_cast<TR::S390VirtualUnresolvedSnippet *>(unresolvedSnippet))->setIndirectCallInstruction(gcPoint);
1935
1936
if (outlinedSlowPath)
1937
{
1938
TR::Instruction * temp = generateS390BranchInstruction(cg(),TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,callNode,doneVirtualLabel);
1939
if (debugObj)
1940
{
1941
debugObj->addInstructionComment(temp, "Denotes end of OOL vcall sequence: return to mainline");
1942
}
1943
// Done using OOL with manual code generation
1944
outlinedSlowPath->swapInstructionListsWithCompilation();
1945
1946
generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, doneVirtualLabel, postDeps);
1947
}
1948
else
1949
{
1950
gcPoint->setDependencyConditions(postDeps);
1951
}
1952
}
1953
else if (methodSymbol->isInterface())
1954
{
1955
int32_t i=0;
1956
TR::Register * thisClassRegister;
1957
TR::Register * methodRegister ;
1958
TR::RegisterPair * classMethodEPPairRegister;
1959
int32_t numInterfaceCallCacheSlots = comp()->getOptions()->getNumInterfaceCallCacheSlots();
1960
1961
if (comp()->getOption(TR_disableInterfaceCallCaching))
1962
{
1963
numInterfaceCallCacheSlots=0;
1964
}
1965
else if (comp()->getOption(TR_enableInterfaceCallCachingSingleDynamicSlot))
1966
{
1967
numInterfaceCallCacheSlots=1;
1968
}
1969
1970
TR_ValueProfileInfoManager *valueProfileInfo = TR_ValueProfileInfoManager::get(comp());
1971
TR_AddressInfo *info = NULL;
1972
uint32_t numStaticPICs = 0;
1973
if (valueProfileInfo)
1974
info = static_cast<TR_AddressInfo*>(valueProfileInfo->getValueInfo(callNode->getByteCodeInfo(), comp(), AddressInfo));
1975
1976
TR::list<TR_OpaqueClassBlock*> * profiledClassesList = NULL;
1977
1978
bool isAddressInfo = info != NULL;
1979
uint32_t totalFreq = info ? info->getTotalFrequency() : 0;
1980
bool isAOT = cg()->needClassAndMethodPointerRelocations();
1981
bool callIsSafe = methodSymRef != comp()->getSymRefTab()->findObjectNewInstanceImplSymbol();
1982
if (!isAOT && callIsSafe && isAddressInfo &&
1983
(totalFreq!=0 && info->getTopProbability() > MIN_PROFILED_CALL_FREQUENCY))
1984
{
1985
1986
TR_ScratchList<TR_ExtraAddressInfo> allValues(comp()->trMemory());
1987
info->getSortedList(comp(), &allValues);
1988
1989
TR::SymbolReference *methodSymRef = callNode->getSymbolReference();
1990
TR_ResolvedMethod *owningMethod = methodSymRef->getOwningMethod(comp());
1991
1992
ListIterator<TR_ExtraAddressInfo> valuesIt(&allValues);
1993
1994
uint32_t maxStaticPICs = comp()->getOptions()->getNumInterfaceCallStaticSlots();
1995
1996
TR_ExtraAddressInfo *profiledInfo;
1997
profiledClassesList = new (trHeapMemory()) TR::list<TR_OpaqueClassBlock*>(getTypedAllocator<TR_OpaqueClassBlock*>(comp()->allocator()));
1998
for (profiledInfo = valuesIt.getFirst(); numStaticPICs < maxStaticPICs && profiledInfo != NULL; profiledInfo = valuesIt.getNext())
1999
{
2000
2001
float freq = (float) profiledInfo->_frequency / totalFreq;
2002
if (freq < MIN_PROFILED_CALL_FREQUENCY)
2003
continue;
2004
2005
TR_OpaqueClassBlock *clazz = (TR_OpaqueClassBlock *)profiledInfo->_value;
2006
if (comp()->getPersistentInfo()->isObsoleteClass(clazz, fej9))
2007
continue;
2008
2009
TR::SymbolReference *methodSymRef = callNode->getSymbolReference();
2010
TR_ResolvedMethod * profiledMethod = methodSymRef->getOwningMethod(comp())->getResolvedInterfaceMethod(comp(),
2011
(TR_OpaqueClassBlock *)clazz, methodSymRef->getCPIndex());
2012
2013
if (profiledMethod && !profiledMethod->isInterpreted())
2014
{
2015
numInterfaceCallCacheSlots++;
2016
numStaticPICs++;
2017
profiledClassesList->push_front(clazz);
2018
}
2019
}
2020
}
2021
2022
if (comp()->getOption(TR_TraceCG))
2023
{
2024
if (numStaticPICs != 0)
2025
traceMsg(comp(), "Interface dispatch with %d cache slots, added extra %d slot(s) for profiled classes.\n", numInterfaceCallCacheSlots, numStaticPICs);
2026
else
2027
traceMsg(comp(), "Interface dispatch with %d cache slots\n", numInterfaceCallCacheSlots);
2028
}
2029
2030
TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg());
2031
TR::S390InterfaceCallSnippet * ifcSnippet = new (trHeapMemory()) TR::S390InterfaceCallSnippet(cg(), callNode,
2032
snippetLabel, sizeOfArguments, numInterfaceCallCacheSlots, virtualThunk, false);
2033
cg()->addSnippet(ifcSnippet);
2034
2035
if (numStaticPICs != 0)
2036
cg()->addPICsListForInterfaceSnippet(ifcSnippet->getDataConstantSnippet(), profiledClassesList);
2037
2038
if (numInterfaceCallCacheSlots == 0 )
2039
{
2040
//Disabled interface call caching
2041
TR::LabelSymbol * hitLabel = generateLabelSymbol(cg());
2042
TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg());
2043
2044
// Make a copy of input deps, but add on 3 new slots.
2045
TR::RegisterDependencyConditions * postDeps = new (trHeapMemory()) TR::RegisterDependencyConditions(dependencies, 0, 3, cg());
2046
postDeps->setAddCursorForPre(0); // Ignore all pre-deps that were copied.
2047
postDeps->setNumPreConditions(0, trMemory()); // Ignore all pre-deps that were copied.
2048
2049
gcPoint = generateSnippetCall(cg(), callNode, ifcSnippet, dependencies,methodSymRef);
2050
2051
// NOP is necessary so that the VM doesn't confuse Virtual Dispatch (expected to always use BASR
2052
// with interface dispatch (which must guarantee that RA-2 != 0x0D ie. BASR)
2053
//
2054
TR::Instruction * cursor = new (trHeapMemory()) TR::S390NOPInstruction(TR::InstOpCode::NOP, 2, callNode, cg());
2055
2056
// Fool the snippet into setting up the return address to be after the NOP
2057
//
2058
gcPoint = cursor;
2059
((TR::S390CallSnippet *) ifcSnippet)->setBranchInstruction(gcPoint);
2060
cursor->setDependencyConditions(postDeps);
2061
}
2062
else
2063
{
2064
TR::Instruction * cursor = NULL;
2065
TR::LabelSymbol * paramSetupDummyLabel = generateLabelSymbol(cg());
2066
TR::LabelSymbol * returnLocationLabel = generateLabelSymbol(cg());
2067
TR::LabelSymbol * cacheFailLabel = generateLabelSymbol(cg());
2068
2069
TR::Register * RegEP = dependencies->searchPostConditionRegister(getEntryPointRegister());
2070
TR::Register * RegRA = dependencies->searchPostConditionRegister(getReturnAddressRegister());
2071
TR::Register * RegThis = dependencies->searchPreConditionRegister(TR::RealRegister::GPR1);
2072
TR::Register * snippetReg = RegEP;
2073
2074
2075
// We split dependencies to make sure the RA doesn't insert any register motion code in the fixed
2076
// block sequence and to only enforce parameter setup on head of block.
2077
TR::RegisterDependencyConditions * preDeps = new (trHeapMemory()) TR::RegisterDependencyConditions(
2078
dependencies->getPreConditions(), NULL, dependencies->getAddCursorForPre(), 0, cg());
2079
2080
// Make a copy of input deps, but add on 3 new slots.
2081
TR::RegisterDependencyConditions * postDeps = new (trHeapMemory()) TR::RegisterDependencyConditions(dependencies, 0, 5, cg());
2082
postDeps->setAddCursorForPre(0); // Ignore all pre-deps that were copied.
2083
postDeps->setNumPreConditions(0, trMemory()); // Ignore all pre-deps that were copied.
2084
2085
// Check the thisChild to see if anyone uses this object after the call (if not, we won't add it to post Deps)
2086
if (callNode->getChild(callNode->getFirstArgumentIndex())->getReferenceCount() > 0)
2087
postDeps->addPostCondition(RegThis, TR::RealRegister::AssignAny);
2088
2089
// Add this reg to post deps to ensure no reg motion
2090
postDeps->addPostConditionIfNotAlreadyInserted(vftReg, TR::RealRegister::AssignAny);
2091
2092
bool useCLFIandBRCL = false;
2093
2094
if (comp()->getOption(TR_enableInterfaceCallCachingSingleDynamicSlot))
2095
{
2096
cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, snippetReg, ifcSnippet->getDataConstantSnippet(), cg());
2097
2098
// Single dynamic slot case
2099
// we cache one class-method pair and atomically load it using LM/LPQ
2100
TR::Register * classRegister = cg()->allocateRegister();
2101
TR::Register * methodRegister = cg()->allocateRegister();
2102
classMethodEPPairRegister = cg()->allocateConsecutiveRegisterPair(methodRegister, classRegister);
2103
2104
postDeps->addPostCondition(classMethodEPPairRegister, TR::RealRegister::EvenOddPair);
2105
postDeps->addPostCondition(classRegister, TR::RealRegister::LegalEvenOfPair);
2106
postDeps->addPostCondition(methodRegister, TR::RealRegister::LegalOddOfPair);
2107
2108
//Load return address in RegRA
2109
cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, RegRA, returnLocationLabel, cursor, cg());
2110
2111
if (comp()->target().is64Bit())
2112
cursor = generateRXInstruction(cg(), TR::InstOpCode::LPQ, callNode, classMethodEPPairRegister,
2113
generateS390MemoryReference(snippetReg, ifcSnippet->getDataConstantSnippet()->getSingleDynamicSlotOffset(), cg()), cursor);
2114
else
2115
cursor = generateRSInstruction(cg(), TR::InstOpCode::LM, callNode, classMethodEPPairRegister,
2116
generateS390MemoryReference(snippetReg, ifcSnippet->getDataConstantSnippet()->getSingleDynamicSlotOffset(), cg()), cursor);
2117
2118
// We need a dummy label to hook dependencies onto
2119
cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, paramSetupDummyLabel, preDeps, cursor);
2120
2121
//check if cached classPtr matches the receiving object classPtr
2122
cursor = generateRXInstruction(cg(), TR::InstOpCode::getCmpLogicalOpCode(), callNode, classRegister,
2123
generateS390MemoryReference(RegThis, 0, cg()), cursor);
2124
2125
//Cache hit? then jumpto cached method entrypoint directly
2126
cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, callNode, methodRegister, cursor);
2127
((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BER);
2128
2129
cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, snippetReg, ifcSnippet,cursor, cg());
2130
2131
// Cache miss... Too bad.. go to the slow path through the interface call snippet
2132
cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, callNode, snippetReg, cursor);
2133
((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BCR);
2134
2135
// Added NOP so that the pattern matching code in jit2itrg icallVMprJavaSendPatchupVirtual
2136
cursor = new (trHeapMemory()) TR::S390NOPInstruction(TR::InstOpCode::NOP, 2, callNode, cg());
2137
}
2138
else
2139
{
2140
useCLFIandBRCL = false && (comp()->target().is64Bit() && // Support for 64-bit
2141
TR::Compiler->om.generateCompressedObjectHeaders() // Classes are <2GB on CompressedRefs only.
2142
);
2143
2144
// Load the interface call data snippet pointer to register is required for non-CLFI / BRCL sequence.
2145
if (!useCLFIandBRCL)
2146
{
2147
cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, snippetReg, ifcSnippet->getDataConstantSnippet(), cg());
2148
methodRegister = cg()->allocateRegister();
2149
}
2150
else
2151
{
2152
#if defined(TR_TARGET_64BIT)
2153
#if defined(J9ZOS390)
2154
if (comp()->getOption(TR_EnableRMODE64))
2155
#endif
2156
{
2157
// Reserve a trampoline for this interface call. Might not be used, but we only
2158
// sacrifice a little trampoline space for it (24-bytes).
2159
if (methodSymRef->getReferenceNumber() >= TR_S390numRuntimeHelpers)
2160
fej9->reserveTrampolineIfNecessary(comp(), methodSymRef, false);
2161
}
2162
#endif
2163
}
2164
2165
// 64 bit MultiSlot case
2166
2167
cursor = generateRILInstruction(cg(), TR::InstOpCode::LARL, callNode, RegRA, returnLocationLabel, cursor);
2168
2169
// We need a dummy label to hook dependencies.
2170
cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, paramSetupDummyLabel, preDeps, cursor);
2171
2172
if (useCLFIandBRCL)
2173
{
2174
// Update the IFC Snippet to note we are using CLFI/BRCL sequence.
2175
// This changes the format of the constants in the data snippet
2176
ifcSnippet->setUseCLFIandBRCL(true);
2177
2178
// We will generate CLFI / BRCL sequence to dispatch to target branches.
2179
// First CLFI/BRCL
2180
cursor = generateRILInstruction(cg(), TR::InstOpCode::CLFI, callNode, vftReg, 0x0, cursor); //compare against 0
2181
2182
ifcSnippet->getDataConstantSnippet()->setFirstCLFI(cursor);
2183
2184
// BRCL
2185
cursor = generateRILInstruction(cg(), TR::InstOpCode::BRCL, callNode, static_cast<uint32_t>(0x0), reinterpret_cast<void*>(0x0), cursor);
2186
2187
for(i = 1; i < numInterfaceCallCacheSlots; i++)
2188
{
2189
// We will generate CLFI / BRCL sequence to dispatch to target branches.
2190
cursor = generateRILInstruction(cg(), TR::InstOpCode::CLFI, callNode, vftReg, 0x0, cursor); //compare against 0
2191
2192
// BRCL
2193
cursor = generateRILInstruction(cg(), TR::InstOpCode::BRCL, callNode, static_cast<uint32_t>(0x0), reinterpret_cast<void*>(0x0), cursor);
2194
}
2195
}
2196
else
2197
{
2198
int32_t slotOffset = ifcSnippet->getDataConstantSnippet()->getFirstSlotOffset();
2199
for(i = 0; i < numInterfaceCallCacheSlots; i++)
2200
{
2201
TR::InstOpCode::Mnemonic cmpOp = TR::InstOpCode::getCmpLogicalOpCode();
2202
if (comp()->target().is64Bit() && TR::Compiler->om.generateCompressedObjectHeaders())
2203
cmpOp = TR::InstOpCode::CL;
2204
2205
//check if cached class matches the receiving object class
2206
cursor = generateRXInstruction(cg(), cmpOp, callNode, vftReg,
2207
generateS390MemoryReference(snippetReg, slotOffset, cg()), cursor);
2208
2209
//load cached methodEP from current cache slot
2210
cursor = generateRXInstruction(cg(), TR::InstOpCode::getLoadOpCode(), callNode, methodRegister,
2211
generateS390MemoryReference(snippetReg, slotOffset+TR::Compiler->om.sizeofReferenceAddress(), cg()), cursor);
2212
2213
cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, callNode, methodRegister, cursor);
2214
((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BER);
2215
2216
slotOffset += 2*TR::Compiler->om.sizeofReferenceAddress();
2217
}
2218
}
2219
2220
cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, snippetReg, ifcSnippet,cursor, cg());
2221
2222
// Cache miss... Too bad.. go to the slow path through the interface call snippet
2223
cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, callNode, snippetReg, cursor);
2224
((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BCR);
2225
2226
cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::dd, callNode,
2227
ifcSnippet->getDataConstantSnippet()->getSnippetLabel());
2228
2229
// Added NOP so that the pattern matching code in jit2itrg icallVMprJavaSendPatchupVirtual
2230
cursor = new (trHeapMemory()) TR::S390NOPInstruction(TR::InstOpCode::NOP, 2, callNode, cg());
2231
2232
if (!useCLFIandBRCL)
2233
postDeps->addPostCondition(methodRegister, TR::RealRegister::AssignAny);
2234
}
2235
2236
gcPoint = cursor;
2237
((TR::S390CallSnippet *) ifcSnippet)->setBranchInstruction(gcPoint);
2238
2239
cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, returnLocationLabel, postDeps);
2240
2241
if (comp()->getOption(TR_enableInterfaceCallCachingSingleDynamicSlot))
2242
{
2243
cg()->stopUsingRegister(classMethodEPPairRegister);
2244
}
2245
else
2246
{
2247
if (!useCLFIandBRCL)
2248
cg()->stopUsingRegister(methodRegister);
2249
}
2250
}
2251
}
2252
else if (methodSymbol->isComputed())
2253
{
2254
TR::Register *targetAddress = cg()->evaluate(callNode->getFirstChild());
2255
if (targetAddress->getRegisterPair())
2256
targetAddress=targetAddress->getRegisterPair()->getLowOrder(); // on 31-bit, the top half doesn't matter, so discard it
2257
TR::Register *RegRA = dependencies->searchPostConditionRegister(getReturnAddressRegister());
2258
2259
gcPoint = generateRRInstruction(cg(), TR::InstOpCode::BASR, callNode, RegRA, targetAddress, dependencies);
2260
}
2261
else
2262
{
2263
TR_ASSERT(0, "Unknown methodSymbol kind");
2264
}
2265
2266
if (cg()->getSupportsRuntimeInstrumentation())
2267
TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RION, callNode);
2268
2269
TR_ASSERT( gcPoint, "Expected GC point for a virtual dispatch");
2270
gcPoint->setNeedsGCMap(getPreservedRegisterMapForGC());
2271
}
2272
2273
TR::Instruction *
2274
J9::Z::PrivateLinkage::buildDirectCall(TR::Node * callNode, TR::SymbolReference * callSymRef,
2275
TR::RegisterDependencyConditions * dependencies, int32_t argSize)
2276
{
2277
TR::Instruction * gcPoint = NULL;
2278
TR::MethodSymbol * callSymbol = callSymRef->getSymbol()->castToMethodSymbol();
2279
TR::ResolvedMethodSymbol * sym = callSymbol->getResolvedMethodSymbol();
2280
TR_ResolvedMethod * fem = (sym == NULL) ? NULL : sym->getResolvedMethod();
2281
bool myself;
2282
bool isJitInduceOSR = callSymRef->isOSRInductionHelper();
2283
myself = comp()->isRecursiveMethodTarget(fem);
2284
2285
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());
2286
2287
#if defined(TR_TARGET_64BIT)
2288
#if defined(J9ZOS390)
2289
// Reserve a trampoline for this direct call only if it is not a helper call. It may not be used, but we only
2290
// sacrifice a little trampoline space for it.
2291
if (comp()->getOption(TR_EnableRMODE64))
2292
#endif
2293
{
2294
if (callSymRef->getReferenceNumber() >= TR_S390numRuntimeHelpers)
2295
{
2296
fej9->reserveTrampolineIfNecessary(comp(), callSymRef, false);
2297
}
2298
}
2299
#endif
2300
2301
if (comp()->getOption(TR_TraceCG))
2302
traceMsg(comp(), "Build Direct Call\n");
2303
2304
// generate call
2305
if (isJitInduceOSR)
2306
{
2307
TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg());
2308
TR::LabelSymbol * reStartLabel = generateLabelSymbol(cg());
2309
2310
gcPoint = generateS390BranchInstruction(cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, callNode, snippetLabel, dependencies);
2311
TR::Snippet * snippet = new (trHeapMemory()) TR::S390HelperCallSnippet(cg(), callNode, snippetLabel,
2312
callSymRef?callSymRef:callNode->getSymbolReference(), reStartLabel, argSize);
2313
cg()->addSnippet(snippet);
2314
2315
auto* reStartInstruction = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, reStartLabel);
2316
2317
// NOP is necessary due to confusion when resolving shared slots at a transition. The OSR infrastructure needs
2318
// to locate the GC map metadata for this transition point by examining the return address. The algorithm used
2319
// attempts to find the last instruction PC that is smaller than or equal to the return address. The reason we
2320
// do this is because under involuntary OSR we may generate the GC map on the return instruction itself. Several
2321
// of our snippets do this. As such we need to handle both cases, i.e. locating the GC map if its on the yield
2322
// point or if its on the return address. Hence a less than or equal to comparison is used. We insert this NOP
2323
// to avoid confusion as the instruction following this yield could also have a GC map registered and we must
2324
// ensure we pick up the correct metadata.
2325
cg()->insertPad(callNode, reStartInstruction, 2, false);
2326
2327
gcPoint->setNeedsGCMap(getPreservedRegisterMapForGC());
2328
2329
return gcPoint;
2330
}
2331
2332
if (!callSymRef->isUnresolved() && !callSymbol->isInterpreted() && ((comp()->compileRelocatableCode() && callSymbol->isHelper()) || !comp()->compileRelocatableCode()))
2333
{
2334
// direct call for resolved method
2335
2336
gcPoint = generateDirectCall(cg(), callNode, myself ? true : false, callSymRef, dependencies);
2337
gcPoint->setDependencyConditions(dependencies);
2338
2339
}
2340
else
2341
{
2342
if (cg()->getSupportsRuntimeInstrumentation())
2343
TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RIOFF, callNode);
2344
2345
// call through snippet if the method is not resolved or not jitted yet
2346
TR::LabelSymbol * label = generateLabelSymbol(cg());
2347
TR::Snippet * snippet;
2348
2349
if (callSymRef->isUnresolved() || (comp()->compileRelocatableCode() && !comp()->getOption(TR_UseSymbolValidationManager)))
2350
{
2351
snippet = new (trHeapMemory()) TR::S390UnresolvedCallSnippet(cg(), callNode, label, argSize);
2352
}
2353
else
2354
{
2355
snippet = new (trHeapMemory()) TR::S390J9CallSnippet(cg(), callNode, label, callSymRef, argSize);
2356
}
2357
2358
cg()->addSnippet(snippet);
2359
2360
2361
gcPoint = generateSnippetCall(cg(), callNode, snippet, dependencies, callSymRef);
2362
2363
if (cg()->getSupportsRuntimeInstrumentation())
2364
TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RION, callNode);
2365
}
2366
2367
if (comp()->getOption(TR_TraceCG))
2368
traceMsg(comp(), "\nGC Point at %p has preserved register map %x\n", gcPoint, getPreservedRegisterMapForGC());
2369
2370
gcPoint->setNeedsGCMap(getPreservedRegisterMapForGC());
2371
return gcPoint;
2372
}
2373
2374
2375
void
2376
J9::Z::PrivateLinkage::callPreJNICallOffloadCheck(TR::Node * callNode)
2377
{
2378
TR::CodeGenerator * codeGen = cg();
2379
TR::LabelSymbol * offloadOffRestartLabel = generateLabelSymbol(codeGen);
2380
TR::LabelSymbol * offloadOffSnippetLabel = generateLabelSymbol(codeGen);
2381
TR::SymbolReference * offloadOffSymRef = codeGen->symRefTab()->findOrCreateRuntimeHelper(TR_S390jitPreJNICallOffloadCheck);
2382
2383
TR::Instruction *gcPoint = generateS390BranchInstruction(
2384
codeGen, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, callNode, offloadOffSnippetLabel);
2385
gcPoint->setNeedsGCMap(0);
2386
2387
codeGen->addSnippet(new (trHeapMemory()) TR::S390HelperCallSnippet(codeGen, callNode,
2388
offloadOffSnippetLabel, offloadOffSymRef, offloadOffRestartLabel));
2389
generateS390LabelInstruction(codeGen, TR::InstOpCode::label, callNode, offloadOffRestartLabel);
2390
}
2391
2392
void
2393
J9::Z::PrivateLinkage::callPostJNICallOffloadCheck(TR::Node * callNode)
2394
{
2395
TR::CodeGenerator * codeGen = cg();
2396
TR::LabelSymbol * offloadOnRestartLabel = generateLabelSymbol(codeGen);
2397
TR::LabelSymbol * offloadOnSnippetLabel = generateLabelSymbol(codeGen);
2398
2399
TR::Instruction *gcPoint = generateS390BranchInstruction(
2400
codeGen, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, callNode, offloadOnSnippetLabel);
2401
gcPoint->setNeedsGCMap(0);
2402
TR::SymbolReference * offloadOnSymRef = codeGen->symRefTab()->findOrCreateRuntimeHelper(TR_S390jitPostJNICallOffloadCheck);
2403
codeGen->addSnippet(new (trHeapMemory()) TR::S390HelperCallSnippet(codeGen, callNode,
2404
offloadOnSnippetLabel, offloadOnSymRef, offloadOnRestartLabel));
2405
generateS390LabelInstruction(codeGen, TR::InstOpCode::label, callNode, offloadOnRestartLabel);
2406
}
2407
2408
void J9::Z::PrivateLinkage::collapseJNIReferenceFrame(TR::Node * callNode,
2409
TR::RealRegister * javaStackPointerRealRegister,
2410
TR::Register * javaLitPoolVirtualRegister,
2411
TR::Register * tempReg)
2412
{
2413
// must check to see if the ref pool was used and clean them up if so--or we
2414
// leave a bunch of pinned garbage behind that screws up the gc quality forever
2415
TR::CodeGenerator * codeGen = cg();
2416
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
2417
intptr_t flagValue = fej9->constJNIReferenceFrameAllocatedFlags();
2418
TR::LabelSymbol * refPoolRestartLabel = generateLabelSymbol(codeGen);
2419
TR::LabelSymbol * refPoolSnippetLabel = generateLabelSymbol(codeGen);
2420
2421
genLoadAddressConstant(codeGen, callNode, flagValue, tempReg, NULL, NULL, javaLitPoolVirtualRegister);
2422
2423
generateRXInstruction(codeGen, TR::InstOpCode::getAndOpCode(), callNode, tempReg,
2424
new (trHeapMemory()) TR::MemoryReference(javaStackPointerRealRegister, (int32_t)fej9->constJNICallOutFrameFlagsOffset(), codeGen));
2425
TR::Instruction *gcPoint =
2426
generateS390BranchInstruction(codeGen, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, callNode, refPoolSnippetLabel);
2427
gcPoint->setNeedsGCMap(0);
2428
2429
TR::SymbolReference * collapseSymRef = cg()->symRefTab()->findOrCreateRuntimeHelper(TR_S390collapseJNIReferenceFrame);
2430
codeGen->addSnippet(new (trHeapMemory()) TR::S390HelperCallSnippet(codeGen, callNode,
2431
refPoolSnippetLabel, collapseSymRef, refPoolRestartLabel));
2432
generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, refPoolRestartLabel);
2433
}
2434
2435
//JNI Callout frame
2436
//
2437
// |-----|
2438
// | | <-- constJNICallOutFrameSpecialTag() (For jni thunk, constJNICallOutFrameInvisibleTag())
2439
// 16/32 |-----|
2440
// | | <-- savedPC ( we don't save anything here
2441
// 12/24 |-----|
2442
// | | <-- return address for JNI call
2443
// 8/16 |-----|
2444
// | | <-- constJNICallOutFrameFlags()
2445
// 4/8 -----
2446
// | | <-- ramMethod for the native method
2447
// ----- <-- stack pointer
2448
//
2449
2450
// release vm access - use hardware registers because of the control flow
2451
// At this point: arguments for the native routine are all in place already, i.e., if there are
2452
// more than 24 byte worth of arguments, some of them are on the stack. However,
2453
// we potentially go out to call a helper before jumping to the native.
2454
// but the helper call saves and restores all regs
2455
void
2456
J9::Z::PrivateLinkage::setupJNICallOutFrame(TR::Node * callNode,
2457
TR::RealRegister * javaStackPointerRealRegister,
2458
TR::Register * methodMetaDataVirtualRegister,
2459
TR::LabelSymbol * returnFromJNICallLabel,
2460
TR::S390JNICallDataSnippet *jniCallDataSnippet)
2461
{
2462
TR::CodeGenerator * codeGen = cg();
2463
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
2464
TR::ResolvedMethodSymbol * cs = callNode->getSymbol()->castToResolvedMethodSymbol();
2465
TR_ResolvedMethod * resolvedMethod = cs->getResolvedMethod();
2466
TR::Instruction * cursor = NULL;
2467
2468
int32_t stackAdjust = (-5 * (int32_t)sizeof(intptr_t));
2469
2470
cursor = generateRXInstruction(codeGen, TR::InstOpCode::LAY, callNode, javaStackPointerRealRegister, generateS390MemoryReference(javaStackPointerRealRegister, stackAdjust, codeGen), cursor);
2471
2472
setOffsetToLongDispSlot( getOffsetToLongDispSlot() - stackAdjust );
2473
2474
2475
// set up Java Thread
2476
intptr_t constJNICallOutFrameType = fej9->constJNICallOutFrameType();
2477
TR_ASSERT( constJNICallOutFrameType < MAX_IMMEDIATE_VAL, "OMR::Z::Linkage::setupJNICallOutFrame constJNICallOutFrameType is too big for MVHI");
2478
2479
TR_ASSERT((fej9->thisThreadGetJavaFrameFlagsOffset() == fej9->thisThreadGetJavaLiteralsOffset() + TR::Compiler->om.sizeofReferenceAddress()) &&
2480
fej9->thisThreadGetJavaLiteralsOffset() == fej9->thisThreadGetJavaPCOffset() + TR::Compiler->om.sizeofReferenceAddress()
2481
, "The vmthread field order should be pc,literals,jitStackFrameFlags\n");
2482
2483
jniCallDataSnippet->setPC(constJNICallOutFrameType);
2484
jniCallDataSnippet->setLiterals(0);
2485
jniCallDataSnippet->setJitStackFrameFlags(0);
2486
2487
generateSS1Instruction(cg(), TR::InstOpCode::MVC, callNode, 3*(TR::Compiler->om.sizeofReferenceAddress()) - 1,
2488
new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, fej9->thisThreadGetJavaPCOffset(), codeGen),
2489
new (trHeapMemory()) TR::MemoryReference(jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet->getPCOffset(), codeGen));
2490
2491
// store out jsp
2492
generateRXInstruction(codeGen, TR::InstOpCode::getStoreOpCode(), callNode, javaStackPointerRealRegister,
2493
new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister,
2494
fej9->thisThreadGetJavaSPOffset(), codeGen));
2495
2496
// JNI Callout Frame setup
2497
// 0(sp) : RAM method for the native
2498
intptr_t ramMethod = (uintptr_t) resolvedMethod->resolvedMethodAddress();
2499
jniCallDataSnippet->setRAMMethod(ramMethod);
2500
2501
// 4[8](sp) : flags
2502
intptr_t flags = fej9->constJNICallOutFrameFlags();
2503
jniCallDataSnippet->setJNICallOutFrameFlags(flags);
2504
2505
// 8[16](sp) : return address (savedCP)
2506
jniCallDataSnippet->setReturnFromJNICall(returnFromJNICallLabel);
2507
2508
// 12[24](sp) : savedPC
2509
jniCallDataSnippet->setSavedPC(0);
2510
2511
// 16[32](sp) : tag bits (savedA0)
2512
intptr_t tagBits = fej9->constJNICallOutFrameSpecialTag();
2513
// if the current method is simply a wrapper for the JNI call, hide the call-out stack frame
2514
if (resolvedMethod == comp()->getCurrentMethod())
2515
{
2516
tagBits |= fej9->constJNICallOutFrameInvisibleTag();
2517
}
2518
2519
jniCallDataSnippet->setTagBits(tagBits);
2520
2521
generateSS1Instruction(cg(), TR::InstOpCode::MVC, callNode, -stackAdjust - 1,
2522
new (trHeapMemory()) TR::MemoryReference(javaStackPointerRealRegister, 0, codeGen),
2523
new (trHeapMemory()) TR::MemoryReference(jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet->getJNICallOutFrameDataOffset(), codeGen));
2524
2525
}
2526
2527
2528
/**
2529
* release vm access - use hardware registers because of the control flow
2530
* At this point: arguments for the native routine are all in place already, i.e., if there are
2531
* more than 24 byte worth of arguments, some of them are on the stack. However,
2532
* we potentially go out to call a helper before jumping to the native.
2533
* but the helper call saves and restores all regs
2534
*/
2535
void J9::Z::JNILinkage::releaseVMAccessMask(TR::Node * callNode,
2536
TR::Register * methodMetaDataVirtualRegister, TR::Register * methodAddressReg, TR::Register * javaLitOffsetReg,
2537
TR::S390JNICallDataSnippet * jniCallDataSnippet, TR::RegisterDependencyConditions * deps)
2538
{
2539
TR::LabelSymbol * loopHead = generateLabelSymbol(self()->cg());
2540
TR::LabelSymbol * longReleaseLabel = generateLabelSymbol(self()->cg());
2541
TR::LabelSymbol * longReleaseSnippetLabel = generateLabelSymbol(self()->cg());
2542
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(self()->cg());
2543
TR_J9VMBase *fej9 = (TR_J9VMBase *)(self()->fe());
2544
2545
intptr_t aValue = fej9->constReleaseVMAccessMask(); //0xfffffffffffdffdf
2546
jniCallDataSnippet->setConstReleaseVMAccessMask(aValue);
2547
2548
generateRXInstruction(self()->cg(), TR::InstOpCode::getLoadOpCode(), callNode, methodAddressReg,
2549
generateS390MemoryReference(methodMetaDataVirtualRegister,
2550
fej9->thisThreadGetPublicFlagsOffset(), self()->cg()));
2551
2552
2553
generateS390LabelInstruction(self()->cg(), TR::InstOpCode::label, callNode, loopHead);
2554
loopHead->setStartInternalControlFlow();
2555
2556
2557
aValue = fej9->constReleaseVMAccessOutOfLineMask(); //0x340001
2558
jniCallDataSnippet->setConstReleaseVMAccessOutOfLineMask(aValue);
2559
2560
generateRRInstruction(self()->cg(), TR::InstOpCode::getLoadRegOpCode(), callNode, javaLitOffsetReg, methodAddressReg);
2561
generateRXInstruction(self()->cg(), TR::InstOpCode::getAndOpCode(), callNode, javaLitOffsetReg,
2562
generateS390MemoryReference(jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet->getConstReleaseVMAccessOutOfLineMaskOffset(), self()->cg()));
2563
2564
TR::Instruction * gcPoint = (TR::Instruction *) generateS390BranchInstruction(
2565
self()->cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, callNode, longReleaseSnippetLabel);
2566
gcPoint->setNeedsGCMap(0);
2567
2568
generateRRInstruction(self()->cg(), TR::InstOpCode::getLoadRegOpCode(), callNode, javaLitOffsetReg, methodAddressReg);
2569
generateRXInstruction(self()->cg(), TR::InstOpCode::getAndOpCode(), callNode, javaLitOffsetReg,
2570
generateS390MemoryReference(jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet->getConstReleaseVMAccessMaskOffset(), self()->cg()));
2571
generateRSInstruction(self()->cg(), TR::InstOpCode::getCmpAndSwapOpCode(), callNode, methodAddressReg, javaLitOffsetReg,
2572
generateS390MemoryReference(methodMetaDataVirtualRegister,
2573
fej9->thisThreadGetPublicFlagsOffset(), self()->cg()));
2574
2575
2576
//get existing post conditions on the registers parameters and create a new post cond for the internal control flow
2577
TR::RegisterDependencyConditions * postDeps = new (self()->trHeapMemory()) TR::RegisterDependencyConditions(0, 3, self()->cg());
2578
TR::RealRegister::RegNum realReg;
2579
int32_t regPos = deps->searchPostConditionRegisterPos(methodMetaDataVirtualRegister);
2580
if (regPos >= 0)
2581
{
2582
realReg = deps->getPostConditions()->getRegisterDependency(regPos)->getRealRegister();
2583
postDeps->addPostCondition(methodMetaDataVirtualRegister, realReg);
2584
}
2585
else
2586
postDeps->addPostCondition(methodMetaDataVirtualRegister, TR::RealRegister::AssignAny);
2587
2588
regPos = deps->searchPostConditionRegisterPos(methodAddressReg);
2589
if (regPos >= 0)
2590
{
2591
realReg = deps->getPostConditions()->getRegisterDependency(regPos)->getRealRegister();
2592
postDeps->addPostCondition(methodAddressReg, realReg);
2593
}
2594
else
2595
postDeps->addPostCondition(methodAddressReg, TR::RealRegister::AssignAny);
2596
2597
regPos = deps->searchPostConditionRegisterPos(javaLitOffsetReg);
2598
if (regPos >= 0)
2599
{
2600
realReg = deps->getPostConditions()->getRegisterDependency(regPos)->getRealRegister();
2601
postDeps->addPostCondition(javaLitOffsetReg, realReg);
2602
}
2603
else
2604
postDeps->addPostCondition(javaLitOffsetReg, TR::RealRegister::AssignAny);
2605
2606
2607
generateS390BranchInstruction(self()->cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, callNode, loopHead);
2608
2609
generateS390LabelInstruction(self()->cg(), TR::InstOpCode::label, callNode, cFlowRegionEnd, postDeps);
2610
cFlowRegionEnd->setEndInternalControlFlow();
2611
2612
2613
self()->cg()->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(self()->cg(), callNode, longReleaseSnippetLabel,
2614
comp()->getSymRefTab()->findOrCreateReleaseVMAccessSymbolRef(comp()->getJittedMethodSymbol()), cFlowRegionEnd));
2615
// end of release vm access (spin lock)
2616
}
2617
2618
2619
void J9::Z::JNILinkage::acquireVMAccessMask(TR::Node * callNode, TR::Register * javaLitPoolVirtualRegister,
2620
TR::Register * methodMetaDataVirtualRegister, TR::Register * methodAddressReg, TR::Register * javaLitOffsetReg)
2621
{
2622
// start of acquire vm access
2623
2624
// WARNING:
2625
// As java stack is not yet restored , Make sure that no instruction in this function
2626
// should use stack.
2627
// If instruction uses literal pool, it must only be to do load, and such instruction's memory reference should be marked MemRefMustNotSpill
2628
// so that in case of long disp, we will reuse the target reg as a scratch reg
2629
2630
TR_J9VMBase *fej9 = (TR_J9VMBase *)(self()->fe());
2631
intptr_t aValue = fej9->constAcquireVMAccessOutOfLineMask();
2632
2633
TR::Instruction * loadInstr = (TR::Instruction *) genLoadAddressConstant(self()->cg(), callNode, aValue, methodAddressReg, NULL, NULL, javaLitPoolVirtualRegister);
2634
switch (loadInstr->getKind())
2635
{
2636
case TR::Instruction::IsRX:
2637
case TR::Instruction::IsRXE:
2638
case TR::Instruction::IsRXY:
2639
case TR::Instruction::IsRXYb:
2640
((TR::S390RXInstruction *)loadInstr)->getMemoryReference()->setMemRefMustNotSpill();
2641
break;
2642
default:
2643
break;
2644
}
2645
2646
generateRRInstruction(self()->cg(), TR::InstOpCode::getXORRegOpCode(), callNode, javaLitOffsetReg, javaLitOffsetReg);
2647
2648
TR::LabelSymbol * longAcquireLabel = generateLabelSymbol(self()->cg());
2649
TR::LabelSymbol * longAcquireSnippetLabel = generateLabelSymbol(self()->cg());
2650
TR::LabelSymbol * acquireDoneLabel = generateLabelSymbol(self()->cg());
2651
generateRSInstruction(cg(), TR::InstOpCode::getCmpAndSwapOpCode(), callNode, javaLitOffsetReg, methodAddressReg,
2652
generateS390MemoryReference(methodMetaDataVirtualRegister,
2653
(int32_t)fej9->thisThreadGetPublicFlagsOffset(), self()->cg()));
2654
TR::Instruction *gcPoint = (TR::Instruction *) generateS390BranchInstruction(self()->cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, callNode, longAcquireSnippetLabel);
2655
gcPoint->setNeedsGCMap(0);
2656
2657
self()->cg()->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(self()->cg(), callNode, longAcquireSnippetLabel,
2658
comp()->getSymRefTab()->findOrCreateAcquireVMAccessSymbolRef(comp()->getJittedMethodSymbol()), acquireDoneLabel));
2659
generateS390LabelInstruction(self()->cg(), TR::InstOpCode::label, callNode, acquireDoneLabel);
2660
// end of acquire vm accessa
2661
}
2662
2663
#ifdef J9VM_INTERP_ATOMIC_FREE_JNI
2664
2665
/**
2666
* \brief
2667
* Build the atomic-free release VM access sequence for JNI dispatch.
2668
*
2669
* \details
2670
* This is the atomic-free JNI design and works in conjunction with VMAccess.cpp atomic-free JNI changes.
2671
*
2672
* In the JNI dispatch sequence, a release-vm-access action is performed before the branch to native code; and an acquire-vm-access
2673
* is done after the thread execution returns from the native call. Both of the actions require synchronization between the
2674
* application thread and the GC thread. This was previously implemented with the atomic compare-and-swap (CS) instruction, which is slow in nature.
2675
*
2676
* To speed up the JNI acquire and release access actions (the fast path), a store-load sequence is generated by this evaluator
2677
* to replace the CS instruction. Normally, the fast path ST-LD are not serialized and can be done out-of-order for higher performance. Synchronization
2678
* burden is offloaded to the slow path.
2679
*
2680
* The slow path is where a thread tries to acquire exclusive vm access. The slow path should be taken proportionally less often than the fast
2681
* path. Should the slow path be taken, that thread will be penalized by calling a slow flushProcessWriteBuffer() routine so that all threads
2682
* can momentarily synchronize memory writes. Having fast and slow paths makes the atomic-free JNI design asymmetric.
2683
*
2684
* Note that the z/OS currently does not support the asymmetric algorithm. Hence, a serialization instruction is required between the
2685
* store and the load.
2686
*
2687
*/
2688
void
2689
J9::Z::JNILinkage::releaseVMAccessMaskAtomicFree(TR::Node * callNode,
2690
TR::Register * methodMetaDataVirtualRegister,
2691
TR::Register * tempReg1)
2692
{
2693
TR_J9VMBase *fej9 = (TR_J9VMBase *)fe();
2694
TR::CodeGenerator* cg = self()->cg();
2695
2696
// Store a 1 into vmthread->inNative
2697
generateSILInstruction(cg, TR::InstOpCode::getMoveHalfWordImmOpCode(), callNode,
2698
generateS390MemoryReference(methodMetaDataVirtualRegister, offsetof(J9VMThread, inNative), cg),
2699
1);
2700
2701
2702
#if !defined(J9VM_INTERP_ATOMIC_FREE_JNI_USES_FLUSH)
2703
generateSerializationInstruction(cg, callNode, NULL);
2704
#endif
2705
2706
// Compare vmthread public flag with J9_PUBLIC_FLAGS_VM_ACCESS
2707
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), callNode, tempReg1,
2708
generateS390MemoryReference(methodMetaDataVirtualRegister, fej9->thisThreadGetPublicFlagsOffset(), cg));
2709
2710
TR::LabelSymbol * longReleaseSnippetLabel = generateLabelSymbol(cg);
2711
TR::LabelSymbol * longReleaseRestartLabel = generateLabelSymbol(cg);
2712
2713
TR_ASSERT_FATAL(J9_PUBLIC_FLAGS_VM_ACCESS >= MIN_IMMEDIATE_BYTE_VAL && J9_PUBLIC_FLAGS_VM_ACCESS <= MAX_IMMEDIATE_BYTE_VAL, "VM access bit must be immediate");
2714
2715
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), callNode, tempReg1, J9_PUBLIC_FLAGS_VM_ACCESS, TR::InstOpCode::COND_BNE, longReleaseSnippetLabel, false);
2716
2717
cg->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(cg,
2718
callNode, longReleaseSnippetLabel,
2719
comp()->getSymRefTab()->findOrCreateReleaseVMAccessSymbolRef(comp()->getJittedMethodSymbol()),
2720
longReleaseRestartLabel));
2721
2722
generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, longReleaseRestartLabel);
2723
}
2724
2725
/**
2726
* \brief
2727
* Build the atomic-free acquire VM access sequence for JNI dispatch.
2728
*
2729
* */
2730
void
2731
J9::Z::JNILinkage::acquireVMAccessMaskAtomicFree(TR::Node * callNode,
2732
TR::Register * methodMetaDataVirtualRegister,
2733
TR::Register * tempReg1)
2734
{
2735
TR_J9VMBase *fej9 = (TR_J9VMBase *)fe();
2736
TR::CodeGenerator* cg = self()->cg();
2737
2738
// Zero vmthread->inNative, which is a UDATA field
2739
generateSS1Instruction(cg, TR::InstOpCode::XC, callNode, TR::Compiler->om.sizeofReferenceAddress() - 1,
2740
generateS390MemoryReference(methodMetaDataVirtualRegister, offsetof(J9VMThread, inNative), cg),
2741
generateS390MemoryReference(methodMetaDataVirtualRegister, offsetof(J9VMThread, inNative), cg));
2742
2743
#if !defined(J9VM_INTERP_ATOMIC_FREE_JNI_USES_FLUSH)
2744
generateSerializationInstruction(cg, callNode, NULL);
2745
#endif
2746
2747
// Compare vmthread public flag with J9_PUBLIC_FLAGS_VM_ACCESS
2748
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), callNode, tempReg1,
2749
generateS390MemoryReference(methodMetaDataVirtualRegister, fej9->thisThreadGetPublicFlagsOffset(), cg));
2750
2751
TR::LabelSymbol * longAcquireSnippetLabel = generateLabelSymbol(cg);
2752
TR::LabelSymbol * longAcquireRestartLabel = generateLabelSymbol(cg);
2753
2754
TR_ASSERT_FATAL(J9_PUBLIC_FLAGS_VM_ACCESS >= MIN_IMMEDIATE_BYTE_VAL && J9_PUBLIC_FLAGS_VM_ACCESS <= MAX_IMMEDIATE_BYTE_VAL, "VM access bit must be immediate");
2755
2756
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), callNode, tempReg1, J9_PUBLIC_FLAGS_VM_ACCESS, TR::InstOpCode::COND_BNE, longAcquireSnippetLabel, false);
2757
2758
cg->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(cg,
2759
callNode, longAcquireSnippetLabel,
2760
comp()->getSymRefTab()->findOrCreateAcquireVMAccessSymbolRef(comp()->getJittedMethodSymbol()),
2761
longAcquireRestartLabel));
2762
2763
generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, longAcquireRestartLabel);
2764
}
2765
#endif
2766
2767
void J9::Z::JNILinkage::checkException(TR::Node * callNode,
2768
TR::Register * methodMetaDataVirtualRegister,
2769
TR::Register * tempReg)
2770
{
2771
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
2772
// check exception
2773
TR::LabelSymbol * exceptionRestartLabel = generateLabelSymbol(self()->cg());
2774
TR::LabelSymbol * exceptionSnippetLabel = generateLabelSymbol(self()->cg());
2775
generateRXInstruction(self()->cg(), TR::InstOpCode::getLoadOpCode(), callNode, tempReg,
2776
new (self()->trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, fej9->thisThreadGetCurrentExceptionOffset(), self()->cg()));
2777
2778
TR::Instruction *gcPoint = generateS390CompareAndBranchInstruction(self()->cg(),
2779
TR::InstOpCode::getCmpOpCode(), callNode, tempReg, 0, TR::InstOpCode::COND_BNE, exceptionSnippetLabel, false, true);
2780
gcPoint->setNeedsGCMap(0);
2781
2782
self()->cg()->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(self()->cg(), callNode, exceptionSnippetLabel,
2783
comp()->getSymRefTab()->findOrCreateThrowCurrentExceptionSymbolRef(comp()->getJittedMethodSymbol()), exceptionRestartLabel));
2784
generateS390LabelInstruction(self()->cg(), TR::InstOpCode::label, callNode, exceptionRestartLabel);
2785
}
2786
2787
void
2788
J9::Z::JNILinkage::processJNIReturnValue(TR::Node * callNode,
2789
TR::CodeGenerator* cg,
2790
TR::Register* javaReturnRegister)
2791
{
2792
auto resolvedMethod = callNode->getSymbol()->castToResolvedMethodSymbol()->getResolvedMethod();
2793
auto returnType = resolvedMethod->returnType();
2794
const bool isUnwrapAddressReturnValue = !((TR_J9VMBase *)fe())->jniDoNotWrapObjects(resolvedMethod)
2795
&& (returnType == TR::Address);
2796
2797
TR::LabelSymbol *cFlowRegionStart = NULL, *cFlowRegionEnd = NULL;
2798
2799
if (isUnwrapAddressReturnValue)
2800
{
2801
cFlowRegionStart = generateLabelSymbol(cg);
2802
cFlowRegionEnd = generateLabelSymbol(cg);
2803
2804
generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, cFlowRegionStart);
2805
cFlowRegionStart->setStartInternalControlFlow();
2806
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), callNode, javaReturnRegister, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd);
2807
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), callNode, javaReturnRegister,
2808
generateS390MemoryReference(javaReturnRegister, 0, cg));
2809
2810
generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, cFlowRegionEnd);
2811
cFlowRegionEnd->setEndInternalControlFlow();
2812
}
2813
else if ((returnType == TR::Int8) && comp()->getSymRefTab()->isReturnTypeBool(callNode->getSymbolReference()))
2814
{
2815
if (comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13))
2816
{
2817
generateRIInstruction(cg, TR::InstOpCode::getCmpHalfWordImmOpCode(), callNode, javaReturnRegister, 0);
2818
generateRIEInstruction(cg, comp()->target().is64Bit() ? TR::InstOpCode::LOCGHI : TR::InstOpCode::LOCHI,
2819
callNode, javaReturnRegister, 1, TR::InstOpCode::COND_BNE);
2820
}
2821
else
2822
{
2823
cFlowRegionStart = generateLabelSymbol(cg);
2824
cFlowRegionEnd = generateLabelSymbol(cg);
2825
2826
generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, cFlowRegionStart);
2827
cFlowRegionStart->setStartInternalControlFlow();
2828
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), callNode, javaReturnRegister,
2829
0, TR::InstOpCode::COND_BE, cFlowRegionEnd);
2830
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), callNode, javaReturnRegister, 1);
2831
generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, cFlowRegionEnd);
2832
cFlowRegionEnd->setEndInternalControlFlow();
2833
}
2834
}
2835
}
2836
2837
TR::Register * J9::Z::JNILinkage::buildDirectDispatch(TR::Node * callNode)
2838
{
2839
if (comp()->getOption(TR_TraceCG))
2840
traceMsg(comp(), "\nbuildDirectDispatch\n");
2841
2842
TR::CodeGenerator * codeGen = cg();
2843
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
2844
TR::SystemLinkage * systemLinkage = (TR::SystemLinkage *) cg()->getLinkage(TR_System);
2845
TR::LabelSymbol * returnFromJNICallLabel = generateLabelSymbol(cg());
2846
TR::RegisterDependencyConditions * deps;
2847
2848
// Extra dependency for killing volatile high registers (see KillVolHighRegs)
2849
int32_t numDeps = systemLinkage->getNumberOfDependencyGPRegisters() + 1;
2850
2851
if (cg()->getSupportsVectorRegisters())
2852
numDeps += 32; //VRFs need to be spilled
2853
2854
// 70896 Remove DEPEND instruction and merge glRegDeps to call deps
2855
// *Speculatively* increase numDeps for dependencies from glRegDeps
2856
// which is added right before callNativeFunction.
2857
// GlobalRegDeps should not add any more children after here.
2858
TR::RegisterDependencyConditions *glRegDeps;
2859
TR::Node *GlobalRegDeps;
2860
2861
bool hasGlRegDeps = (callNode->getNumChildren() >= 1) &&
2862
(callNode->getChild(callNode->getNumChildren()-1)->getOpCodeValue() == TR::GlRegDeps);
2863
if(hasGlRegDeps)
2864
{
2865
GlobalRegDeps = callNode->getChild(callNode->getNumChildren()-1);
2866
numDeps += GlobalRegDeps->getNumChildren();
2867
}
2868
2869
deps = generateRegisterDependencyConditions(numDeps, numDeps, cg());
2870
int64_t killMask = -1;
2871
TR::Register *vftReg = NULL;
2872
TR::S390JNICallDataSnippet * jniCallDataSnippet = NULL;
2873
TR::RealRegister * javaStackPointerRealRegister = getStackPointerRealRegister();
2874
TR::RealRegister * methodMetaDataRealRegister = getMethodMetaDataRealRegister();
2875
TR::RealRegister * javaLitPoolRealRegister = getLitPoolRealRegister();
2876
2877
TR::Register * javaLitPoolVirtualRegister = javaLitPoolRealRegister;
2878
TR::Register * methodMetaDataVirtualRegister = methodMetaDataRealRegister;
2879
2880
TR::Register * methodAddressReg = NULL;
2881
TR::Register * javaLitOffsetReg = NULL;
2882
intptr_t targetAddress = (intptr_t) 0;
2883
TR::DataType returnType = TR::NoType;
2884
int8_t numTempRegs = -1;
2885
comp()->setHasNativeCall();
2886
2887
if (codeGen->getSupportsRuntimeInstrumentation())
2888
TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(codeGen, TR::InstOpCode::RIOFF, callNode);
2889
2890
TR::ResolvedMethodSymbol * cs = callNode->getSymbol()->castToResolvedMethodSymbol();
2891
TR_ResolvedMethod * resolvedMethod = cs->getResolvedMethod();
2892
bool isFastJNI = true;
2893
bool isPassJNIThread = !fej9->jniDoNotPassThread(resolvedMethod);
2894
bool isPassReceiver = !fej9->jniDoNotPassReceiver(resolvedMethod);
2895
bool isJNIGCPoint = !fej9->jniNoGCPoint(resolvedMethod);
2896
bool isJNICallOutFrame = !fej9->jniNoNativeMethodFrame(resolvedMethod);
2897
bool isReleaseVMAccess = !fej9->jniRetainVMAccess(resolvedMethod);
2898
bool isJavaOffLoadCheck = false;
2899
bool isAcquireVMAccess = isReleaseVMAccess;
2900
bool isCollapseJNIReferenceFrame = !fej9->jniNoSpecialTeardown(resolvedMethod);
2901
bool isCheckException = !fej9->jniNoExceptionsThrown(resolvedMethod);
2902
bool isKillAllUnlockedGPRs = isJNIGCPoint;
2903
2904
killMask = killAndAssignRegister(killMask, deps, &methodAddressReg, (comp()->target().isLinux()) ? TR::RealRegister::GPR1 : TR::RealRegister::GPR9 , codeGen, true);
2905
killMask = killAndAssignRegister(killMask, deps, &javaLitOffsetReg, TR::RealRegister::GPR11, codeGen, true);
2906
2907
targetAddress = (intptr_t) resolvedMethod->startAddressForJNIMethod(comp());
2908
returnType = resolvedMethod->returnType();
2909
2910
static char * disablePureFn = feGetEnv("TR_DISABLE_PURE_FUNC_RECOGNITION");
2911
if (cs->canDirectNativeCall())
2912
{
2913
isReleaseVMAccess = false;
2914
isAcquireVMAccess = false;
2915
isKillAllUnlockedGPRs = false;
2916
isJNIGCPoint = false;
2917
isCheckException = false;
2918
isJNICallOutFrame = false;
2919
}
2920
if (cs->isPureFunction() && (disablePureFn == NULL))
2921
{
2922
isReleaseVMAccess=false;
2923
isAcquireVMAccess=false;
2924
isCheckException = false;
2925
}
2926
if ((fej9->isJavaOffloadEnabled() && static_cast<TR_ResolvedJ9Method *>(resolvedMethod)->methodIsNotzAAPEligible()) || (fej9->CEEHDLREnabled() && isJNICallOutFrame))
2927
isJavaOffLoadCheck = true;
2928
2929
2930
if (comp()->getOption(TR_TraceCG))
2931
traceMsg(comp(), "isPassReceiver: %d, isPassJNIThread: %d, isJNIGCPoint: %d, isJNICallOutFrame:%d, isReleaseVMAccess: %d, isCollapseJNIReferenceFrame: %d, isJNIGCPoint: %d\n", isPassReceiver, isPassJNIThread, isJNIGCPoint, isJNICallOutFrame, isReleaseVMAccess, isCollapseJNIReferenceFrame, isJNIGCPoint);
2932
2933
if (isPassJNIThread)
2934
{
2935
//First param for JNI call in JNIEnv pointer
2936
TR::Register * jniEnvRegister = cg()->allocateRegister();
2937
deps->addPreCondition(jniEnvRegister, systemLinkage->getIntegerArgumentRegister(0));
2938
generateRRInstruction(codeGen, TR::InstOpCode::getLoadRegOpCode(), callNode,
2939
jniEnvRegister, methodMetaDataVirtualRegister);
2940
}
2941
2942
// JNI dispatch does not allow for any object references to survive in preserved registers as they are saved onto
2943
// the system stack, which the JVM stack walker has no awareness of. Hence we need to ensure that all object
2944
// references are evicted from preserved registers at the call site.
2945
TR::Register* tempReg = cg()->allocateRegister();
2946
2947
deps->addPostCondition(tempReg, TR::RealRegister::KillVolHighRegs);
2948
cg()->stopUsingRegister(tempReg);
2949
2950
setupRegisterDepForLinkage(callNode, TR_JNIDispatch, deps, killMask, systemLinkage, GlobalRegDeps, hasGlRegDeps, &methodAddressReg, javaLitOffsetReg);
2951
2952
setupBuildArgForLinkage(callNode, TR_JNIDispatch, deps, isFastJNI, isPassReceiver, killMask, GlobalRegDeps, hasGlRegDeps, systemLinkage);
2953
2954
if (isJNICallOutFrame || isReleaseVMAccess)
2955
{
2956
TR::Register * JNISnippetBaseReg = NULL;
2957
killMask = killAndAssignRegister(killMask, deps, &JNISnippetBaseReg, TR::RealRegister::GPR12, codeGen, true);
2958
jniCallDataSnippet = new (trHeapMemory()) TR::S390JNICallDataSnippet(cg(), callNode);
2959
cg()->addSnippet(jniCallDataSnippet);
2960
jniCallDataSnippet->setBaseRegister(JNISnippetBaseReg);
2961
new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode,
2962
jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet, codeGen);
2963
jniCallDataSnippet->setTargetAddress(targetAddress);
2964
}
2965
2966
if (isJNICallOutFrame)
2967
{
2968
// Sets up PC, Stack pointer and literals offset slots.
2969
setupJNICallOutFrame(callNode, javaStackPointerRealRegister, methodMetaDataVirtualRegister,
2970
returnFromJNICallLabel, jniCallDataSnippet);
2971
}
2972
else
2973
{
2974
// store java stack pointer
2975
generateRXInstruction(codeGen, TR::InstOpCode::getStoreOpCode(), callNode, javaStackPointerRealRegister,
2976
new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaSPOffset(), codeGen));
2977
2978
2979
auto* literalOffsetMemoryReference = new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaLiteralsOffset(), codeGen);
2980
2981
// Set up literal offset slot to zero
2982
generateSILInstruction(codeGen, TR::InstOpCode::getMoveHalfWordImmOpCode(), callNode, literalOffsetMemoryReference, 0);
2983
}
2984
2985
if (isReleaseVMAccess)
2986
{
2987
#ifdef J9VM_INTERP_ATOMIC_FREE_JNI
2988
releaseVMAccessMaskAtomicFree(callNode, methodMetaDataVirtualRegister, methodAddressReg);
2989
#else
2990
releaseVMAccessMask(callNode, methodMetaDataVirtualRegister, methodAddressReg, javaLitOffsetReg, jniCallDataSnippet, deps);
2991
#endif
2992
}
2993
2994
//Turn off Java Offload if calling user native
2995
if (isJavaOffLoadCheck)
2996
{
2997
callPreJNICallOffloadCheck(callNode);
2998
}
2999
3000
// Generate a call to the native function
3001
TR::Register * javaReturnRegister = systemLinkage->callNativeFunction(
3002
callNode, deps, targetAddress, methodAddressReg, javaLitOffsetReg, returnFromJNICallLabel,
3003
jniCallDataSnippet, isJNIGCPoint);
3004
3005
// restore java stack pointer
3006
generateRXInstruction(codeGen, TR::InstOpCode::getLoadOpCode(), callNode, javaStackPointerRealRegister,
3007
new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaSPOffset(), codeGen));
3008
3009
//Turn on Java Offload
3010
if (isJavaOffLoadCheck)
3011
{
3012
callPostJNICallOffloadCheck(callNode);
3013
}
3014
3015
if (isAcquireVMAccess)
3016
{
3017
#ifdef J9VM_INTERP_ATOMIC_FREE_JNI
3018
acquireVMAccessMaskAtomicFree(callNode, methodMetaDataVirtualRegister, methodAddressReg);
3019
#else
3020
acquireVMAccessMask(callNode, javaLitPoolVirtualRegister, methodMetaDataVirtualRegister, methodAddressReg, javaLitOffsetReg);
3021
#endif
3022
}
3023
3024
3025
generateRXInstruction(codeGen, TR::InstOpCode::getAddOpCode(), callNode, javaStackPointerRealRegister,
3026
new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaLiteralsOffset(), codeGen));
3027
3028
processJNIReturnValue(callNode, codeGen, javaReturnRegister);
3029
3030
if (isCollapseJNIReferenceFrame)
3031
{
3032
collapseJNIReferenceFrame(callNode, javaStackPointerRealRegister, javaLitPoolVirtualRegister, methodAddressReg);
3033
}
3034
3035
// Restore the JIT frame
3036
if (isJNICallOutFrame)
3037
{
3038
generateRXInstruction(codeGen, TR::InstOpCode::LA, callNode, javaStackPointerRealRegister,
3039
generateS390MemoryReference(javaStackPointerRealRegister, 5 * sizeof(intptr_t), codeGen));
3040
3041
setOffsetToLongDispSlot(getOffsetToLongDispSlot() - (5 * (int32_t)sizeof(intptr_t)) );
3042
}
3043
3044
if (isCheckException)
3045
{
3046
checkException(callNode, methodMetaDataVirtualRegister, methodAddressReg);
3047
}
3048
3049
OMR::Z::Linkage::generateDispatchReturnLable(callNode, codeGen, deps, javaReturnRegister, hasGlRegDeps, GlobalRegDeps);
3050
return javaReturnRegister;
3051
}
3052
3053
////////////////////////////////////////////////////////////////////////////////
3054
// J9::Z::PrivateLinkage::doNotKillSpecialRegsForBuildArgs - Do not kill
3055
// special regs (java stack ptr, system stack ptr, and method metadata reg)
3056
////////////////////////////////////////////////////////////////////////////////
3057
void
3058
J9::Z::PrivateLinkage::doNotKillSpecialRegsForBuildArgs (TR::Linkage *linkage, bool isFastJNI, int64_t &killMask)
3059
{
3060
TR::SystemLinkage * systemLinkage = (TR::SystemLinkage *) cg()->getLinkage(TR_System);
3061
3062
int32_t i;
3063
killMask &= ~(0x1L << REGINDEX(getStackPointerRegister()));
3064
3065
if (systemLinkage->getStackPointerRealRegister()->getState() == TR::RealRegister::Locked)
3066
{
3067
killMask &= ~(0x1L << REGINDEX(getSystemStackPointerRegister()));
3068
}
3069
killMask &= ~(0x1L << REGINDEX(getMethodMetaDataRegister()));
3070
3071
// Remove preserved registers from kill set
3072
if (isFastJNI)
3073
{
3074
// We kill all unlocked GPRs for JNI preserved or not,
3075
// so only need to worry about not killing preserved FPRs
3076
for (i = TR::RealRegister::FirstFPR; i <= TR::RealRegister::LastFPR; i++)
3077
{
3078
if (linkage->getPreserved(REGNUM(i)))
3079
killMask &= ~(0x1L << REGINDEX(i));
3080
}
3081
}
3082
else
3083
{
3084
for (i = TR::RealRegister::FirstGPR; i <= TR::RealRegister::LastFPR; i++)
3085
{
3086
if (linkage->getPreserved(REGNUM(i)))
3087
killMask &= ~(0x1L << REGINDEX(i));
3088
}
3089
}
3090
}
3091
3092
////////////////////////////////////////////////////////////////////////////////
3093
// J9::Z::PrivateLinkage::addSpecialRegDepsForBuildArgs - add special argument
3094
// register dependencies for buildArgs
3095
////////////////////////////////////////////////////////////////////////////////
3096
void
3097
J9::Z::PrivateLinkage::addSpecialRegDepsForBuildArgs(TR::Node * callNode, TR::RegisterDependencyConditions * dependencies, int32_t& from, int32_t step)
3098
{
3099
TR::Node * child;
3100
TR::RealRegister::RegNum specialArgReg = TR::RealRegister::NoReg;
3101
switch (callNode->getSymbol()->castToMethodSymbol()->getMandatoryRecognizedMethod())
3102
{
3103
// Note: special long args are still only passed in one GPR
3104
case TR::java_lang_invoke_ComputedCalls_dispatchJ9Method:
3105
specialArgReg = getJ9MethodArgumentRegister();
3106
break;
3107
case TR::java_lang_invoke_ComputedCalls_dispatchVirtual:
3108
case TR::com_ibm_jit_JITHelpers_dispatchVirtual:
3109
specialArgReg = getVTableIndexArgumentRegister();
3110
break;
3111
}
3112
3113
if (specialArgReg != TR::RealRegister::NoReg)
3114
{
3115
child = callNode->getChild(from);
3116
TR::Register *specialArg = copyArgRegister(callNode, child, cg()->evaluate(child)); // TODO:JSR292: We don't need a copy of the highOrder reg on 31-bit
3117
if (specialArg->getRegisterPair())
3118
specialArg = specialArg->getLowOrder(); // on 31-bit, the top half doesn't matter, so discard it
3119
dependencies->addPreCondition(specialArg, specialArgReg );
3120
cg()->decReferenceCount(child);
3121
3122
if (comp()->getOption(TR_TraceCG))
3123
{
3124
traceMsg(comp(), "Special arg %s %s reg %s in %s\n",
3125
callNode->getOpCode().getName(),
3126
comp()->getDebug()->getName(callNode->getChild(from)),
3127
comp()->getDebug()->getName(callNode->getRegister()),
3128
comp()->getDebug()->getName(cg()->machine()->getRealRegister(specialArgReg)));
3129
}
3130
3131
from += step;
3132
}
3133
}
3134
3135
////////////////////////////////////////////////////////////////////////////////
3136
// J9::Z::PrivateLinkage::storeExtraEnvRegForBuildArgs - JNI specific,
3137
// account for extra env param. Return stackOffset.
3138
////////////////////////////////////////////////////////////////////////////////
3139
int32_t
3140
J9::Z::PrivateLinkage::storeExtraEnvRegForBuildArgs(TR::Node * callNode, TR::Linkage* linkage, TR::RegisterDependencyConditions * dependencies,
3141
bool isFastJNI, int32_t stackOffset, int8_t gprSize, uint32_t &numIntegerArgs)
3142
{
3143
//In XPLINK, when the called function has variable number of args, all args are passed on stack,
3144
//Because we have no way of knowing this, we will always store the args on stack and parm regs both.
3145
if (isFastJNI) // Account for extra parameter env
3146
{
3147
TR::Register * jniEnvRegister = dependencies->searchPreConditionRegister(getIntegerArgumentRegister(0));
3148
numIntegerArgs += 1;
3149
if (linkage->isAllParmsOnStack())
3150
{
3151
TR::Register *stackRegister = linkage->getStackRegisterForOutgoingArguments(callNode, dependencies); // delay (possibly) creating this till needed
3152
storeArgumentOnStack(callNode, TR::InstOpCode::getStoreOpCode(), jniEnvRegister, &stackOffset, stackRegister);
3153
}
3154
if (linkage->isXPLinkLinkageType()) // call specific
3155
{
3156
stackOffset += gprSize;
3157
}
3158
}
3159
return stackOffset;
3160
}
3161
3162
////////////////////////////////////////////////////////////////////////////////
3163
// J9::Z::PrivateLinkage::addFECustomizedReturnRegDependency - add extra
3164
// linkage specific return register dependency
3165
////////////////////////////////////////////////////////////////////////////////
3166
int64_t
3167
J9::Z::PrivateLinkage::addFECustomizedReturnRegDependency(int64_t killMask, TR::Linkage* linkage, TR::DataType resType,
3168
TR::RegisterDependencyConditions * dependencies)
3169
{
3170
TR::Register * javaResultReg;
3171
3172
//In zOS XPLink, return register(GPR3) is not same as privateLinkage (GPR2)
3173
// hence we need to add another dependency
3174
if (linkage->getIntegerReturnRegister() != getIntegerReturnRegister())
3175
{
3176
javaResultReg = (resType.isAddress())? cg()->allocateCollectedReferenceRegister() : cg()->allocateRegister();
3177
dependencies->addPostCondition(javaResultReg, getIntegerReturnRegister(),DefinesDependentRegister);
3178
killMask &= (~(0x1L << REGINDEX(getIntegerReturnRegister())));
3179
}
3180
return killMask;
3181
}
3182
3183
////////////////////////////////////////////////////////////////////////////////
3184
// J9::Z::PrivateLinkage::buildDirectDispatch - build direct function call
3185
// eg. Static, helpers... etc.
3186
////////////////////////////////////////////////////////////////////////////////
3187
TR::Register *
3188
J9::Z::PrivateLinkage::buildDirectDispatch(TR::Node * callNode)
3189
{
3190
TR::SymbolReference * callSymRef = callNode->getSymbolReference();
3191
TR::MethodSymbol * callSymbol = callSymRef->getSymbol()->castToMethodSymbol();
3192
int32_t argSize;
3193
TR::Register * returnRegister;
3194
TR::Register *vftReg = NULL;
3195
3196
if (comp()->getOption(TR_TraceCG))
3197
traceMsg(comp(), "\nbuildDirectDispatch\n");
3198
3199
// create register dependency conditions
3200
TR::RegisterDependencyConditions * dependencies = generateRegisterDependencyConditions(getNumberOfDependencyGPRegisters(),
3201
getNumberOfDependencyGPRegisters(), cg());
3202
3203
// setup arguments
3204
argSize = buildArgs(callNode, dependencies, false, -1, vftReg);
3205
3206
buildDirectCall(callNode, callSymRef, dependencies, argSize);
3207
3208
// set dependency on return register
3209
TR::Register * lowReg = NULL, * highReg;
3210
switch (callNode->getOpCodeValue())
3211
{
3212
case TR::icall:
3213
case TR::acall:
3214
returnRegister = dependencies->searchPostConditionRegister(getIntegerReturnRegister());
3215
break;
3216
case TR::lcall:
3217
{
3218
if (comp()->target().is64Bit())
3219
{
3220
returnRegister = dependencies->searchPostConditionRegister(getLongReturnRegister());
3221
}
3222
else
3223
{
3224
TR::Instruction *cursor = NULL;
3225
lowReg = dependencies->searchPostConditionRegister(getLongLowReturnRegister());
3226
highReg = dependencies->searchPostConditionRegister(getLongHighReturnRegister());
3227
3228
generateRSInstruction(cg(), TR::InstOpCode::SLLG, callNode, highReg, highReg, 32);
3229
cursor =
3230
generateRRInstruction(cg(), TR::InstOpCode::LR, callNode, highReg, lowReg);
3231
3232
TR::RegisterDependencyConditions * deps =
3233
new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg());
3234
deps->addPostCondition(lowReg, getLongLowReturnRegister(),DefinesDependentRegister);
3235
deps->addPostCondition(highReg, getLongHighReturnRegister(),DefinesDependentRegister);
3236
cursor->setDependencyConditions(deps);
3237
3238
cg()->stopUsingRegister(lowReg);
3239
returnRegister = highReg;
3240
}
3241
}
3242
break;
3243
case TR::fcall:
3244
case TR::dcall:
3245
returnRegister = dependencies->searchPostConditionRegister(getFloatReturnRegister());
3246
break;
3247
case TR::call:
3248
returnRegister = NULL;
3249
break;
3250
default:
3251
returnRegister = NULL;
3252
TR_ASSERT(0, "Unknown direct call Opcode %d.", callNode->getOpCodeValue());
3253
}
3254
3255
callNode->setRegister(returnRegister);
3256
3257
#if TODO // for live register - to do later
3258
cg()->freeAndResetTransientLongs();
3259
#endif
3260
dependencies->stopUsingDepRegs(cg(), lowReg == NULL ? returnRegister : highReg, lowReg);
3261
3262
return returnRegister;
3263
}
3264
3265
////////////////////////////////////////////////////////////////////////////////
3266
// J9::Z::PrivateLinkage::buildIndirectDispatch - build indirect function call.
3267
// This function handles the arguments setup and the return register. It will
3268
// buildVirtualDispatch() to handle the call sequence.
3269
////////////////////////////////////////////////////////////////////////////////
3270
TR::Register *
3271
J9::Z::PrivateLinkage::buildIndirectDispatch(TR::Node * callNode)
3272
{
3273
TR::RegisterDependencyConditions * dependencies = NULL;
3274
int32_t argSize = 0;
3275
TR::Register * returnRegister;
3276
TR::SymbolReference * methodSymRef = callNode->getSymbolReference();
3277
TR::MethodSymbol * methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();
3278
TR::Register *vftReg = NULL;
3279
//TR::S390SystemLinkage * systemLinkage = (TR::S390SystemLinkage *) cg()->getLinkage(TR_System);
3280
3281
3282
if (comp()->getOption(TR_TraceCG))
3283
traceMsg(comp(), "\nbuildIndirectDispatch\n");
3284
3285
// create register dependency conditions
3286
dependencies = generateRegisterDependencyConditions(getNumberOfDependencyGPRegisters(),
3287
getNumberOfDependencyGPRegisters(), cg());
3288
3289
argSize = buildArgs(callNode, dependencies, false, -1, vftReg);
3290
buildVirtualDispatch(callNode, dependencies, vftReg, argSize);
3291
3292
TR::Register * lowReg = NULL, * highReg;
3293
switch (callNode->getOpCodeValue())
3294
{
3295
case TR::icalli:
3296
case TR::acalli:
3297
returnRegister = dependencies->searchPostConditionRegister(getIntegerReturnRegister());
3298
break;
3299
case TR::lcalli:
3300
{
3301
if (comp()->target().is64Bit())
3302
{
3303
returnRegister = dependencies->searchPostConditionRegister(getLongReturnRegister());
3304
}
3305
else
3306
{
3307
TR::Instruction *cursor = NULL;
3308
lowReg = dependencies->searchPostConditionRegister(getLongLowReturnRegister());
3309
highReg = dependencies->searchPostConditionRegister(getLongHighReturnRegister());
3310
3311
generateRSInstruction(cg(), TR::InstOpCode::SLLG, callNode, highReg, highReg, 32);
3312
cursor =
3313
generateRRInstruction(cg(), TR::InstOpCode::LR, callNode, highReg, lowReg);
3314
3315
TR::RegisterDependencyConditions * deps =
3316
new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg());
3317
deps->addPostCondition(lowReg, getLongLowReturnRegister(),DefinesDependentRegister);
3318
deps->addPostCondition(highReg, getLongHighReturnRegister(),DefinesDependentRegister);
3319
cursor->setDependencyConditions(deps);
3320
3321
cg()->stopUsingRegister(lowReg);
3322
returnRegister = highReg;
3323
}
3324
}
3325
break;
3326
case TR::fcalli:
3327
case TR::dcalli:
3328
returnRegister = dependencies->searchPostConditionRegister(getFloatReturnRegister());
3329
break;
3330
case TR::calli:
3331
returnRegister = NULL;
3332
break;
3333
default:
3334
returnRegister = NULL;
3335
TR_ASSERT( 0, "Unknown indirect call Opcode.");
3336
}
3337
3338
callNode->setRegister(returnRegister);
3339
#if TODO // for live register - to do later
3340
cg()->freeAndResetTransientLongs();
3341
#endif
3342
dependencies->stopUsingDepRegs(cg(), lowReg == NULL ? returnRegister : highReg, lowReg);
3343
return returnRegister;
3344
}
3345
3346
void
3347
J9::Z::PrivateLinkage::setupBuildArgForLinkage(TR::Node * callNode, TR_DispatchType dispatchType, TR::RegisterDependencyConditions * deps, bool isFastJNI,
3348
bool isPassReceiver, int64_t & killMask, TR::Node * GlobalRegDeps, bool hasGlRegDeps, TR::SystemLinkage * systemLinkage)
3349
{
3350
TR::CodeGenerator * codeGen = cg();
3351
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
3352
// call base class
3353
OMR::Z::Linkage::setupBuildArgForLinkage(callNode, dispatchType, deps, isFastJNI, isPassReceiver, killMask, GlobalRegDeps, hasGlRegDeps, systemLinkage);
3354
3355
3356
// omr todo: this should be cleaned up once the logic of other linkage related method is cleaned up
3357
// basically JNIDispatch will perform the stuff after this statement and hence returning here
3358
// to avoid executing stuff twice...should be fixed in conjunction with JNIDispatch
3359
if (dispatchType == TR_JNIDispatch) return;
3360
3361
3362
J9::Z::PrivateLinkage * privateLinkage = (J9::Z::PrivateLinkage *) cg()->getLinkage(TR_Private);
3363
TR::RealRegister * javaStackPointerRealRegister = privateLinkage->getStackPointerRealRegister();
3364
TR::Register * methodMetaDataVirtualRegister = privateLinkage->getMethodMetaDataRealRegister();
3365
3366
// store java stack pointer
3367
generateRXInstruction(codeGen, TR::InstOpCode::getStoreOpCode(), callNode, javaStackPointerRealRegister,
3368
new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaSPOffset(), codeGen));
3369
3370
}
3371
3372
void
3373
J9::Z::PrivateLinkage::setupRegisterDepForLinkage(TR::Node * callNode, TR_DispatchType dispatchType,
3374
TR::RegisterDependencyConditions * &deps, int64_t & killMask, TR::SystemLinkage * systemLinkage,
3375
TR::Node * &GlobalRegDeps, bool &hasGlRegDeps, TR::Register ** methodAddressReg, TR::Register * &javaLitOffsetReg)
3376
{
3377
// call base class
3378
OMR::Z::Linkage::setupRegisterDepForLinkage(callNode, dispatchType, deps, killMask, systemLinkage, GlobalRegDeps, hasGlRegDeps, methodAddressReg, javaLitOffsetReg);
3379
3380
3381
TR::CodeGenerator * codeGen = cg();
3382
3383
if (dispatchType == TR_SystemDispatch)
3384
{
3385
killMask = killAndAssignRegister(killMask, deps, methodAddressReg, (comp()->target().isLinux()) ? TR::RealRegister::GPR14 : TR::RealRegister::GPR8 , codeGen, true);
3386
killMask = killAndAssignRegister(killMask, deps, &javaLitOffsetReg, (comp()->target().isLinux()) ? TR::RealRegister::GPR8 : TR::RealRegister::GPR14 , codeGen, true);
3387
}
3388
3389
/*****************/
3390
3391
TR::RealRegister * systemStackRealRegister = systemLinkage->getStackPointerRealRegister();
3392
TR::Register * systemStackVirtualRegister = systemStackRealRegister;
3393
3394
if (comp()->target().isZOS())
3395
{
3396
3397
TR::RealRegister::RegNum systemStackPointerRegister;
3398
TR::RealRegister::RegNum systemCAAPointerRegister = ((TR::S390zOSSystemLinkage *)systemLinkage)->getCAAPointerRegister();
3399
TR::Register * systemCAAVirtualRegister = NULL;
3400
3401
killMask = killAndAssignRegister(killMask, deps, &systemCAAVirtualRegister, systemCAAPointerRegister, codeGen, true);
3402
3403
if (systemStackRealRegister->getState() != TR::RealRegister::Locked)
3404
{
3405
systemStackPointerRegister = ((TR::S390zOSSystemLinkage *)systemLinkage)->getStackPointerRegister();
3406
systemStackVirtualRegister = NULL;
3407
killMask = killAndAssignRegister(killMask, deps, &systemStackVirtualRegister, systemStackPointerRegister, codeGen, true);
3408
deps->addPreCondition(systemStackVirtualRegister,systemStackPointerRegister);
3409
}
3410
}
3411
3412
/*****************/
3413
J9::Z::PrivateLinkage * privateLinkage = (J9::Z::PrivateLinkage *) cg()->getLinkage(TR_Private);
3414
3415
3416
TR::RealRegister * javaLitPoolRealRegister = privateLinkage->getLitPoolRealRegister();
3417
TR::Register * javaLitPoolVirtualRegister = javaLitPoolRealRegister;
3418
3419
if (codeGen->isLiteralPoolOnDemandOn())
3420
{
3421
javaLitPoolVirtualRegister = NULL;
3422
killMask = killAndAssignRegister(killMask, deps, &javaLitPoolVirtualRegister, javaLitPoolRealRegister, codeGen, true);
3423
generateLoadLiteralPoolAddress(codeGen, callNode, javaLitPoolVirtualRegister);
3424
}
3425
3426
3427
/*****************/
3428
TR::Register * methodMetaDataVirtualRegister = privateLinkage->getMethodMetaDataRealRegister();
3429
3430
3431
// This logic was originally in OMR::Z::Linkage::buildNativeDispatch and the condition is cg()->supportsJITFreeSystemStackPointer().
3432
// The original condition is only true for J9 and only on zos, so replacing it with comp()->target().isZOS().
3433
if ( comp()->target().isZOS() )
3434
{
3435
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
3436
generateRXInstruction(codeGen, TR::InstOpCode::getLoadOpCode(), callNode, systemStackVirtualRegister,
3437
new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetSystemSPOffset(), codeGen));
3438
}
3439
3440
}
3441
3442
3443
TR::RealRegister::RegNum
3444
J9::Z::PrivateLinkage::getSystemStackPointerRegister()
3445
{
3446
return cg()->getLinkage(TR_System)->getStackPointerRegister();
3447
}
3448
3449
3450
J9::Z::JNILinkage::JNILinkage(TR::CodeGenerator * cg, TR_LinkageConventions elc)
3451
:J9::Z::PrivateLinkage(cg, elc)
3452
{
3453
}
3454
3455