Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/z/codegen/ReduceSynchronizedFieldLoad.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2021 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include "codegen/ReduceSynchronizedFieldLoad.hpp"
24
25
#include <stddef.h>
26
#include <stdint.h>
27
#include "j9.h"
28
#include "j9cfg.h"
29
#include "j9consts.h"
30
#include "codegen/CodeGenerator.hpp"
31
#include "codegen/Linkage_inlines.hpp"
32
#include "codegen/S390CHelperLinkage.hpp"
33
#include "codegen/TreeEvaluator.hpp"
34
#include "env/VMJ9.h"
35
#include "il/ILOps.hpp"
36
#include "il/ILOpCodes.hpp"
37
#include "il/Node_inlines.hpp"
38
#include "il/TreeTop.hpp"
39
#include "il/TreeTop_inlines.hpp"
40
#include "infra/Assert.hpp"
41
#include "z/codegen/S390Evaluator.hpp"
42
#include "z/codegen/S390GenerateInstructions.hpp"
43
#include "z/codegen/S390Instruction.hpp"
44
#include "z/codegen/S390Register.hpp"
45
46
#define OPT_DETAILS "O^O REDUCE SYNCHRONIZED FIELD LOAD: "
47
48
void
49
ReduceSynchronizedFieldLoad::inlineSynchronizedFieldLoad(TR::Node* node, TR::CodeGenerator* cg)
50
{
51
TR::Compilation *comp = cg->comp();
52
TR::Node* synchronizedObjectNode = node->getChild(0);
53
54
// Materialize the object register first because LPD can only deal with base-displacement type memory references and
55
// because the object appears directly underneath the indirect load we are guaranteed not to have an index register
56
TR::Register* objectRegister = cg->evaluate(synchronizedObjectNode);
57
58
TR::Node* loadNode = node->getChild(1);
59
60
TR::Node* monentSymbolReferenceNode = node->getChild(3);
61
TR::Node* monexitSymbolReferenceNode = node->getChild(4);
62
63
TR::LabelSymbol* mergeLabel = generateLabelSymbol(cg);
64
TR::LabelSymbol* fastPathLabel = generateLabelSymbol(cg);
65
TR::LabelSymbol* slowPathLabel = generateLabelSymbol(cg);
66
67
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, fastPathLabel);
68
69
TR_S390OutOfLineCodeSection* outOfLineCodeSection = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(slowPathLabel, mergeLabel, cg);
70
cg->getS390OutOfLineCodeSectionList().push_front(outOfLineCodeSection);
71
outOfLineCodeSection->swapInstructionListsWithCompilation();
72
73
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, slowPathLabel);
74
75
// Generate a dynamic debug counter for the fallback path whose execution should be extremely rare
76
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "codegen/z/ReduceSynchronizedFieldLoad/success/OOL/%s", comp->signature()));
77
78
J9::Z::CHelperLinkage* helperLink = static_cast<J9::Z::CHelperLinkage*>(cg->getLinkage(TR_CHelper));
79
80
// Calling helper with call node which should NULL
81
helperLink->buildDirectDispatch(monentSymbolReferenceNode);
82
83
// The logic for evaluating a particular load is non-trivial in both evaluation sequence and setting of the various
84
// register flags (collected references, etc.). As such we evaluate the load preemptively and extract the
85
// materialized memory reference directly from the load itself for use in LPD.
86
TR::Register* loadRegister = cg->evaluate(loadNode);
87
88
// Search for the load memory reference from the previously evaluated load
89
TR::Instruction* loadInstruction = cg->getAppendInstruction();
90
91
TR_ASSERT_SAFE_FATAL(loadInstruction->isLoad() && (loadInstruction->getKind() == OMR::Instruction::Kind::IsRX || loadInstruction->getKind() == OMR::Instruction::Kind::IsRXY), "Expecting the append instruction to be a load of kind RX or RXY\n");
92
93
TR::MemoryReference* loadMemoryReference = static_cast<TR::S390RXInstruction*>(loadInstruction)->getMemoryReference();
94
95
TR_ASSERT_SAFE_FATAL(loadMemoryReference->getIndexRegister() == NULL, "Load memory reference must not have an index register\n");
96
97
helperLink->buildDirectDispatch(monexitSymbolReferenceNode);
98
99
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, mergeLabel);
100
101
outOfLineCodeSection->swapInstructionListsWithCompilation();
102
103
TR::Register* lockRegister = cg->allocateRegister();
104
TR::RegisterPair* registerPair = cg->allocateConsecutiveRegisterPair(lockRegister, loadRegister);
105
106
TR::RegisterDependencyConditions* conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 3, cg);
107
108
conditions->addPostCondition(registerPair, TR::RealRegister::EvenOddPair);
109
conditions->addPostCondition(loadRegister, TR::RealRegister::LegalEvenOfPair);
110
conditions->addPostCondition(lockRegister, TR::RealRegister::LegalOddOfPair);
111
112
// Recreate the memory reference since we cannot share the same one for fast and slow paths of the ICF diamond
113
loadMemoryReference = generateS390MemoryReference(*loadMemoryReference, 0, cg);
114
115
TR::Node* lockWordOffsetNode = node->getChild(2);
116
117
int32_t lockWordOffset = lockWordOffsetNode->getConst<int32_t>();
118
119
TR::MemoryReference* lockMemoryReference = generateS390MemoryReference(objectRegister, lockWordOffset, cg);
120
121
const bool generateCompressedLockWord = static_cast<TR_J9VMBase*>(comp->fe())->generateCompressedLockWord();
122
123
const bool is32BitLock = comp->target().is32Bit() || generateCompressedLockWord;
124
const bool is32BitLoad = J9::DataType::getSize(loadNode->getDataType()) == 4;
125
126
bool lockRegisterRequiresZeroExt = false;
127
bool loadRegisterRequiresZeroExt = false;
128
bool loadRegisterRequiresSignExt = false;
129
130
if (is32BitLock && is32BitLoad)
131
{
132
generateSSFInstruction(cg, TR::InstOpCode::LPD, node, registerPair, loadMemoryReference, lockMemoryReference);
133
134
loadRegisterRequiresZeroExt = loadNode->isZeroExtendedTo64BitAtSource();
135
loadRegisterRequiresSignExt = loadNode->isSignExtendedTo64BitAtSource();
136
}
137
else
138
{
139
// LPDG requires memory references to be aligned to a double-word boundary
140
TR::MemoryReference* alignedLockMemoryReference = lockMemoryReference;
141
TR::MemoryReference* alignedLoadMemoryReference = loadMemoryReference;
142
143
bool lockRegisterRequiresShift = false;
144
bool loadRegisterRequiresShift = false;
145
146
if (is32BitLock)
147
{
148
if ((lockWordOffset & 7) == 0)
149
{
150
lockRegisterRequiresShift = true;
151
}
152
else
153
{
154
// This is because we must use LPDG to load a 32-bit value using displacement -4
155
TR_ASSERT_SAFE_FATAL((lockWordOffset & 3) == 0, "Lockword must be aligned on a word boundary\n");
156
157
lockRegisterRequiresZeroExt = true;
158
alignedLockMemoryReference = generateS390MemoryReference(*lockMemoryReference, -4, cg);
159
}
160
}
161
else
162
{
163
TR_ASSERT_SAFE_FATAL((lockWordOffset & 7) == 0, "Lockword must be aligned on a double-word boundary\n");
164
}
165
166
if (is32BitLoad)
167
{
168
if ((loadMemoryReference->getOffset() & 7) == 0)
169
{
170
loadRegisterRequiresShift = true;
171
}
172
else
173
{
174
// This is because we must use LPDG to load a 32-bit value using displacement -4
175
TR_ASSERT_SAFE_FATAL((loadMemoryReference->getOffset() & 3) == 0, "Field must be aligned on a word boundary\n");
176
177
loadRegisterRequiresZeroExt = loadNode->isZeroExtendedTo64BitAtSource();
178
loadRegisterRequiresSignExt = loadNode->isSignExtendedTo64BitAtSource();
179
180
alignedLoadMemoryReference = generateS390MemoryReference(*loadMemoryReference, -4, cg);
181
}
182
}
183
else
184
{
185
TR_ASSERT_SAFE_FATAL((loadMemoryReference->getOffset() & 7) == 0, "Field must be aligned on a double-word boundary\n");
186
}
187
188
generateSSFInstruction(cg, TR::InstOpCode::LPDG, node, registerPair, alignedLoadMemoryReference, alignedLockMemoryReference);
189
190
if (lockRegisterRequiresShift)
191
{
192
generateRSInstruction(cg, TR::InstOpCode::SRLG, node, lockRegister, lockRegister, 32);
193
}
194
195
if (loadRegisterRequiresShift)
196
{
197
generateRSInstruction(cg, TR::InstOpCode::SRLG, node, loadRegister, loadRegister, 32);
198
}
199
}
200
201
if (lockRegisterRequiresZeroExt)
202
{
203
generateRREInstruction(cg, TR::InstOpCode::LLGFR, node, lockRegister, lockRegister);
204
}
205
206
if (loadRegisterRequiresZeroExt)
207
{
208
generateRREInstruction(cg, TR::InstOpCode::LLGFR, node, loadRegister, loadRegister);
209
}
210
211
if (loadRegisterRequiresSignExt)
212
{
213
generateRRInstruction(cg, TR::InstOpCode::LGFR, node, loadRegister, loadRegister);
214
}
215
216
// LPD sets condition code 3 if the pair as not loaded by means of interlocked fetch
217
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BO, node, slowPathLabel);
218
219
// Mask out the recursion and lock reservation bits
220
generateRIInstruction(cg, TR::InstOpCode::NILL, node, lockRegister, ~(OBJECT_HEADER_LOCK_RECURSION_MASK | OBJECT_HEADER_LOCK_RESERVED));
221
222
TR::Register *metaDataReg = cg->getMethodMetaDataRealRegister();
223
if (is32BitLock && is32BitLoad)
224
{
225
// Now check if we loaded the lock of the current thread
226
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CR, node, lockRegister, metaDataReg, TR::InstOpCode::COND_BE, mergeLabel, false);
227
228
// Lock could be free as well
229
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lockRegister, 0, TR::InstOpCode::COND_BE, mergeLabel, false);
230
}
231
else
232
{
233
// Now check if we loaded the lock of the current thread
234
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CGR, node, lockRegister, metaDataReg, TR::InstOpCode::COND_BE, mergeLabel, false);
235
236
// Lock could be free as well
237
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CG, node, lockRegister, 0, TR::InstOpCode::COND_BE, mergeLabel, false);
238
}
239
240
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, slowPathLabel);
241
242
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, mergeLabel, conditions);
243
244
cg->decReferenceCount(synchronizedObjectNode);
245
cg->decReferenceCount(loadNode);
246
cg->decReferenceCount(lockWordOffsetNode);
247
cg->recursivelyDecReferenceCount(monentSymbolReferenceNode);
248
cg->recursivelyDecReferenceCount(monexitSymbolReferenceNode);
249
cg->stopUsingRegister(lockRegister);
250
cg->stopUsingRegister(registerPair);
251
}
252
253
bool
254
ReduceSynchronizedFieldLoad::perform()
255
{
256
bool transformed = false;
257
258
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
259
{
260
if (!cg->comp()->getOption(TR_DisableSynchronizedFieldLoad) && cg->comp()->getMethodSymbol()->mayContainMonitors())
261
{
262
traceMsg(cg->comp(), "Performing ReduceSynchronizedFieldLoad\n");
263
264
for (TR::TreeTopOrderExtendedBlockIterator iter(cg->comp()); iter.getFirst() != NULL; ++iter)
265
{
266
transformed = performOnTreeTops(iter.getFirst()->getEntry(), iter.getLast()->getExit());
267
}
268
}
269
}
270
271
return transformed;
272
}
273
274
bool
275
ReduceSynchronizedFieldLoad::performOnTreeTops(TR::TreeTop* startTreeTop, TR::TreeTop* endTreeTop)
276
{
277
TR::Compilation *comp = cg->comp();
278
bool transformed = false;
279
280
for (TR::TreeTopIterator iter(startTreeTop, comp); iter != endTreeTop; ++iter)
281
{
282
if (iter.currentNode()->getOpCodeValue() == TR::monent ||
283
iter.currentNode()->getOpCodeValue() == TR::treetop && iter.currentNode()->getFirstChild()->getOpCodeValue() == TR::monent)
284
{
285
TR::TreeTop* monentTreeTop = iter.currentTree();
286
TR::Node* monentNode = iter.currentNode()->getOpCodeValue() == TR::monent ?
287
iter.currentNode() :
288
iter.currentNode()->getFirstChild();
289
// Locking on value types or value based classes is prohibited by the spec.,
290
// so this optimization can only be performed if we are certain (at compile time)
291
// the locking object is not a value type or value based
292
if (cg->isMonitorValueBasedOrValueType(monentNode) != TR_no)
293
continue;
294
if (comp->getOption(TR_TraceCG))
295
{
296
traceMsg(comp, "Found monent [%p]\n", monentNode);
297
}
298
299
for (++iter; iter != endTreeTop; ++iter)
300
{
301
if (iter.currentNode()->getOpCodeValue() == TR::monexit ||
302
iter.currentNode()->getOpCodeValue() == TR::treetop && iter.currentNode()->getFirstChild()->getOpCodeValue() == TR::monexit)
303
{
304
TR::TreeTop* monexitTreeTop = iter.currentTree();
305
TR::Node* monexitNode = iter.currentNode()->getOpCodeValue() == TR::monexit ?
306
iter.currentNode() :
307
iter.currentNode()->getFirstChild();
308
if (comp->getOption(TR_TraceCG))
309
{
310
traceMsg(comp, "Found monexit [%p]\n", monexitNode);
311
}
312
313
TR::Node* synchronizedObjectNode = monentNode->getFirstChild();
314
315
if (synchronizedObjectNode == monexitNode->getFirstChild())
316
{
317
if (comp->getOption(TR_TraceCG))
318
{
319
traceMsg(comp, "Children of monent and monexit are synchronizing on the same object\n", monexitNode);
320
}
321
322
TR::Node* loadNode = findLoadInSynchornizedRegion(startTreeTop, endTreeTop, monentTreeTop, monexitTreeTop, synchronizedObjectNode);
323
324
if (loadNode != NULL)
325
{
326
// Disallow this optimization for 64-bit loads on 31-bit JVM due to register pairs
327
if (comp->target().is32Bit() && J9::DataType::getSize(loadNode->getDataType()) == 8)
328
{
329
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "codegen/z/ReduceSynchronizedFieldLoad/failure/31-bit-register-pairs/%s", comp->signature()));
330
331
break;
332
}
333
334
// When concurrent scavenge is enabled we need to load the object reference using a read barrier however
335
// there is no guarded load alternative for the LPD instruction. As such this optimization cannot be carried
336
// out for object reference loads under concurrent scavenge.
337
if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none && loadNode->getDataType().isAddress())
338
{
339
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "codegen/z/ReduceSynchronizedFieldLoad/failure/read-barrier/%s", comp->signature()));
340
341
break;
342
}
343
344
int32_t lockWordOffset = static_cast<TR_J9VMBase*>(comp->fe())->getByteOffsetToLockword(static_cast<TR_OpaqueClassBlock*>(cg->getMonClass(monentNode)));
345
346
if (comp->getOption(TR_TraceCG))
347
{
348
traceMsg(comp, "Lock word offset = %d\n", lockWordOffset);
349
}
350
351
// LPD(G) is an SSF instruction with a 12-bit displacement
352
if (lockWordOffset > 0 && lockWordOffset < 4096)
353
{
354
if (performTransformation(comp, "%sReplacing monent [%p] - monexit [%p] synchronized region on load [%p] with fabricated call\n", OPT_DETAILS, monentNode, monexitNode, loadNode))
355
{
356
transformed = true;
357
358
// Fabricate a special codegen inlined method call symbol reference
359
TR::SymbolReference* methodSymRef = comp->getSymRefTab()->findOrCreateCodeGenInlinedHelper(TR::SymbolReferenceTable::synchronizedFieldLoadSymbol);
360
361
TR::Node* callNode = TR::Node::createWithSymRef(loadNode, TR::call, 5, methodSymRef);
362
363
callNode->setAndIncChild(0, synchronizedObjectNode);
364
callNode->setAndIncChild(1, loadNode);
365
366
TR::Node* lockWordOffsetNode = TR::Node::iconst(loadNode, lockWordOffset);
367
368
callNode->setAndIncChild(2, lockWordOffsetNode);
369
370
TR::Node* monentSymbolReferenceNode = TR::Node::createWithSymRef(loadNode, TR::call, 1, synchronizedObjectNode, monentNode->getSymbolReference());
371
TR::Node* monexitSymbolReferenceNode = TR::Node::createWithSymRef(loadNode, TR::call, 1, synchronizedObjectNode, monexitNode->getSymbolReference());
372
373
callNode->setAndIncChild(3, monentSymbolReferenceNode);
374
callNode->setAndIncChild(4, monexitSymbolReferenceNode);
375
376
TR::Node* treeTopNode = TR::Node::create(loadNode, TR::treetop, 1, callNode);
377
378
TR::TreeTop* callTreeTop = TR::TreeTop::create(comp, treeTopNode);
379
380
// Insert fabricated call treetop
381
monentTreeTop->insertBefore(callTreeTop);
382
383
// Remove the monitor region
384
monentTreeTop->unlink(true);
385
monexitTreeTop->unlink(true);
386
387
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "codegen/z/ReduceSynchronizedFieldLoad/success/%s", comp->signature()));
388
}
389
}
390
else
391
{
392
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "codegen/z/ReduceSynchronizedFieldLoad/failure/lockword-out-of-bounds/%s", comp->signature()));
393
}
394
}
395
else
396
{
397
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "codegen/z/ReduceSynchronizedFieldLoad/failure/load-not-found/%s", comp->signature()));
398
}
399
}
400
else
401
{
402
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "codegen/z/ReduceSynchronizedFieldLoad/failure/monexit-synchronized-object-mismatch/%s", comp->signature()));
403
}
404
405
break;
406
}
407
}
408
409
if (iter == endTreeTop)
410
{
411
break;
412
}
413
}
414
}
415
416
return transformed;
417
}
418
419
TR::Node*
420
ReduceSynchronizedFieldLoad::findLoadInSynchornizedRegion(TR::TreeTop* startTreeTop, TR::TreeTop* endTreeTop, TR::TreeTop* monentTreeTop, TR::TreeTop* monexitTreeTop, TR::Node* synchronizedObjectNode)
421
{
422
TR::Compilation *comp = cg->comp();
423
TR::PreorderNodeIterator iter(startTreeTop, comp);
424
425
// First iterate through all the nodes from the start treetop until we reach the monitor provided so that all nodes
426
// seen thus far would have already been visited, and hence we will not recurse into them in the subsequent for loop
427
// since a reference was already seen. This enables us to carry out the reduce synchronized field load optimization
428
// even if there are side-effect nodes within the monitored region - as long as those side-effect nodes have been
429
// evaluated outside of the monitored region.
430
for (; iter != monentTreeTop->getNextTreeTop(); ++iter)
431
{
432
TR::Node* currentNode = iter.currentNode();
433
434
if (comp->getOption(TR_TraceCG))
435
{
436
traceMsg(comp, "Iterating node [%p] outside the monitored region\n", currentNode);
437
}
438
}
439
440
TR::Node* loadNode = NULL;
441
442
for (; iter != monexitTreeTop; ++iter)
443
{
444
TR::Node* currentNode = iter.currentNode();
445
446
if (comp->getOption(TR_TraceCG))
447
{
448
traceMsg(comp, "Iterating node [%p] inside the monitored region\n", currentNode);
449
}
450
451
TR::ILOpCode opcode = currentNode->getOpCode();
452
453
if (opcode.hasSymbolReference() || opcode.isBranch())
454
{
455
if (loadNode == NULL &&
456
opcode.isLoadIndirect() && (opcode.isRef() || opcode.isInt() || opcode.isLong()) &&
457
currentNode->getFirstChild() == synchronizedObjectNode)
458
{
459
if (comp->getOption(TR_TraceCG))
460
{
461
traceMsg(comp, "Found load node [%p]\n", currentNode);
462
}
463
464
loadNode = currentNode;
465
}
466
else
467
{
468
if (comp->getOption(TR_TraceCG))
469
{
470
traceMsg(comp, "Found sideeffect node [%p] within the monitored region\n", currentNode);
471
}
472
473
loadNode = NULL;
474
475
break;
476
}
477
}
478
}
479
480
return loadNode;
481
}
482
483