Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/optimizer/DataAccessAccelerator.cpp
6000 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2022 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include "optimizer/DataAccessAccelerator.hpp"
24
25
#include <algorithm>
26
#include <limits.h>
27
#include <math.h>
28
#include <stddef.h>
29
#include <stdint.h>
30
#include <stdio.h>
31
#include <stdlib.h>
32
#include <string.h>
33
#include "codegen/CodeGenerator.hpp"
34
#include "env/FrontEnd.hpp"
35
#include "codegen/RecognizedMethods.hpp"
36
#include "codegen/RegisterConstants.hpp"
37
#include "compile/Compilation.hpp"
38
#include "compile/Method.hpp"
39
#include "compile/ResolvedMethod.hpp"
40
#include "compile/SymbolReferenceTable.hpp"
41
#include "control/Options.hpp"
42
#include "control/Options_inlines.hpp"
43
#include "control/Recompilation.hpp"
44
#include "control/RecompilationInfo.hpp"
45
#include "env/CompilerEnv.hpp"
46
#include "env/StackMemoryRegion.hpp"
47
#include "env/TRMemory.hpp"
48
#include "env/jittypes.h"
49
#include "env/VMJ9.h"
50
#include "il/Block.hpp"
51
#include "il/DataTypes.hpp"
52
#include "il/ILOpCodes.hpp"
53
#include "il/ILOps.hpp"
54
#include "il/MethodSymbol.hpp"
55
#include "il/Node.hpp"
56
#include "il/NodePool.hpp"
57
#include "il/Node_inlines.hpp"
58
#include "il/ParameterSymbol.hpp"
59
#include "il/ResolvedMethodSymbol.hpp"
60
#include "il/StaticSymbol.hpp"
61
#include "il/Symbol.hpp"
62
#include "il/SymbolReference.hpp"
63
#include "il/TreeTop.hpp"
64
#include "il/TreeTop_inlines.hpp"
65
#include "infra/Assert.hpp"
66
#include "infra/Cfg.hpp"
67
#include "infra/Stack.hpp"
68
#include "infra/TRCfgEdge.hpp"
69
#include "infra/TRCfgNode.hpp"
70
#include "optimizer/Optimization.hpp"
71
#include "optimizer/Optimization_inlines.hpp"
72
#include "optimizer/OptimizationManager.hpp"
73
#include "optimizer/Optimizations.hpp"
74
#include "optimizer/Optimizer.hpp"
75
#include "optimizer/OSRGuardRemoval.hpp"
76
#include "optimizer/Structure.hpp"
77
#include "optimizer/TransformUtil.hpp"
78
#include "ras/Debug.hpp"
79
80
#define IS_VARIABLE_PD2I(callNode) (!isChildConst(callNode, 2) || !isChildConst(callNode, 3))
81
82
TR_DataAccessAccelerator::TR_DataAccessAccelerator(TR::OptimizationManager* manager)
83
:
84
TR::Optimization(manager)
85
{
86
// Void
87
}
88
89
int32_t TR_DataAccessAccelerator::perform()
90
{
91
int32_t result = 0;
92
93
if (!comp()->getOption(TR_DisableIntrinsics) &&
94
!comp()->getOption(TR_MimicInterpreterFrameShape) &&
95
96
// We cannot handle arraylets because hardware intrinsics act on contiguous memory
97
!comp()->generateArraylets()&& !TR::Compiler->om.useHybridArraylets())
98
{
99
100
// A vector to keep track of variable packed decimal calls
101
TR::StackMemoryRegion stackMemoryRegion(*(comp()->trMemory()));
102
TreeTopContainer variableCallTreeTops(stackMemoryRegion);
103
104
for (TR::AllBlockIterator iter(optimizer()->getMethodSymbol()->getFlowGraph(), comp());
105
iter.currentBlock() != NULL;
106
++iter)
107
{
108
TR::Block* block = iter.currentBlock();
109
110
result += performOnBlock(block, &variableCallTreeTops);
111
}
112
113
result += processVariableCalls(&variableCallTreeTops);
114
}
115
116
if (result != 0)
117
{
118
optimizer()->setUseDefInfo(NULL);
119
optimizer()->setValueNumberInfo(NULL);
120
optimizer()->setAliasSetsAreValid(false);
121
}
122
123
return result;
124
}
125
126
int32_t
127
TR_DataAccessAccelerator::processVariableCalls(TreeTopContainer* variableCallTreeTops)
128
{
129
int32_t result = 0;
130
131
// Process variable precision calls after iterating through all the nodes
132
for(int i = 0; i < variableCallTreeTops->size(); ++i)
133
{
134
TR::TreeTop* treeTop = variableCallTreeTops->at(i);
135
TR::Node* callNode = treeTop->getNode()->getChild(0);
136
TR::ResolvedMethodSymbol* callSymbol = callNode->getSymbol()->getResolvedMethodSymbol();
137
if (callSymbol != NULL)
138
{
139
if (!comp()->getOption(TR_DisablePackedDecimalIntrinsics))
140
{
141
switch (callSymbol->getRecognizedMethod())
142
{
143
// DAA Packed Decimal <-> Integer
144
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_:
145
{
146
if (generatePD2IVariableParameter(treeTop, callNode, true, false))
147
{
148
++result;
149
}
150
continue;
151
}
152
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_:
153
{
154
if (generatePD2IVariableParameter(treeTop, callNode, true, true))
155
{
156
++result;
157
}
158
continue;
159
}
160
161
// DAA Packed Decimal <-> Long
162
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_:
163
{
164
if (generatePD2IVariableParameter(treeTop, callNode, false, false))
165
{
166
++result;
167
}
168
continue;
169
}
170
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_ByteBuffer_:
171
{
172
if (generatePD2IVariableParameter(treeTop, callNode, false, true))
173
{
174
++result;
175
}
176
continue;
177
}
178
default:
179
break;
180
}
181
}
182
}
183
}
184
185
return result;
186
}
187
188
const char *
189
TR_DataAccessAccelerator::optDetailString() const throw()
190
{
191
return "O^O DATA ACCESS ACCELERATOR: ";
192
}
193
194
int32_t TR_DataAccessAccelerator::performOnBlock(TR::Block* block, TreeTopContainer* variableCallTreeTops)
195
{
196
int32_t blockResult = 0;
197
bool requestOSRGuardRemoval = false;
198
199
for (TR::TreeTopIterator iter(block->getEntry(), comp()); iter != block->getExit(); ++iter)
200
{
201
TR::Node* currentNode = iter.currentNode();
202
if (currentNode->getOpCodeValue() == TR::treetop)
203
{
204
currentNode = currentNode->getChild(0);
205
}
206
207
if (currentNode != NULL && currentNode->getOpCode().isCall())
208
{
209
int32_t result = 0;
210
bool matched = false;
211
212
TR::TreeTop* treeTop = iter.currentTree();
213
214
TR::Node* callNode = currentNode;
215
216
TR::Node* returnNode = NULL;
217
218
TR::ResolvedMethodSymbol* callSymbol = callNode->getSymbol()->getResolvedMethodSymbol();
219
220
if (callSymbol != NULL)
221
{
222
if (!comp()->getOption(TR_DisableMarshallingIntrinsics))
223
{
224
switch (callSymbol->getRecognizedMethod())
225
{
226
// ByteArray Marshalling methods
227
case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeShort_:
228
returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 2, 2);
229
break;
230
case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeShortLength_:
231
returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 2, 0);
232
break;
233
case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeInt_:
234
returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 4, 4);
235
break;
236
case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeIntLength_:
237
returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 4, 0);
238
break;
239
case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeLong_:
240
returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 8, 8);
241
break;
242
case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeLongLength_:
243
returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 8, 0);
244
break;
245
246
case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeFloat_:
247
returnNode = insertDecimalSetIntrinsic(treeTop, callNode, 4, 4);
248
break;
249
case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeDouble_:
250
returnNode = insertDecimalSetIntrinsic(treeTop, callNode, 8, 8);
251
break;
252
253
// ByteArray Unmarshalling methods
254
case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readShort_:
255
returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 2, 2);
256
break;
257
case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readShortLength_:
258
returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 0, 2);
259
break;
260
case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readInt_:
261
returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 4, 4);
262
break;
263
case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readIntLength_:
264
returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 0, 4);
265
break;
266
case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readLong_:
267
returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 8, 8);
268
break;
269
case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readLongLength_:
270
returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 0, 8);
271
break;
272
273
case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readFloat_:
274
returnNode = insertDecimalGetIntrinsic(treeTop, callNode, 4, 4);
275
break;
276
case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readDouble_:
277
returnNode = insertDecimalGetIntrinsic(treeTop, callNode, 8, 8);
278
break;
279
280
default:
281
break;
282
}
283
284
if (returnNode)
285
{
286
result = 1;
287
matched = true;
288
289
printInliningStatus(true, callNode);
290
for (int i=callNode->getNumChildren();i>0;i--)
291
callNode->getChild(i-1)->recursivelyDecReferenceCount();
292
callNode->setNumChildren(returnNode->getNumChildren());
293
callNode->setSymbolReference(NULL);
294
TR::Node::recreate(callNode, returnNode->getOpCodeValue());
295
if (callNode->getOpCode().hasSymbolReference())
296
callNode->setSymbolReference(returnNode->getSymbolReference());
297
for (int i=callNode->getNumChildren();i>0;i--)
298
callNode->setChild(i-1, returnNode->getChild(i-1));
299
}
300
}
301
302
bool isZLinux = comp()->target().cpu.isZ() && comp()->target().isLinux();
303
bool isZOS = comp()->target().isZOS();
304
305
if (!matched && (isZOS || isZLinux) &&
306
!comp()->getOption(TR_DisablePackedDecimalIntrinsics))
307
{
308
matched = true;
309
switch (callSymbol->getRecognizedMethod())
310
{
311
// DAA Packed Decimal Check
312
case TR::com_ibm_dataaccess_PackedDecimal_checkPackedDecimal_:
313
if (inlineCheckPackedDecimal(treeTop, callNode))
314
{
315
++result;
316
}
317
break;
318
319
// DAA Packed Decimal <-> Unicode Decimal
320
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToUnicodeDecimal_:
321
if (generatePD2UD(treeTop, callNode, true))
322
{
323
++result;
324
}
325
break;
326
case TR::com_ibm_dataaccess_DecimalData_convertUnicodeDecimalToPackedDecimal_:
327
if (generateUD2PD(treeTop, callNode, true))
328
{
329
++result;
330
}
331
break;
332
333
// DAA Packed Decimal <-> External Decimal
334
case TR::com_ibm_dataaccess_DecimalData_convertExternalDecimalToPackedDecimal_:
335
if (generateUD2PD(treeTop, callNode, false))
336
{
337
++result;
338
}
339
break;
340
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToExternalDecimal_:
341
if (generatePD2UD(treeTop, callNode, false))
342
{
343
++result;
344
}
345
break;
346
347
default:
348
matched = false;
349
break;
350
}
351
}
352
353
if (!matched && (isZOS || isZLinux) &&
354
!block->isCold() &&
355
!comp()->getOption(TR_DisablePackedDecimalIntrinsics))
356
{
357
matched = true;
358
comp()->cg()->setUpStackSizeForCallNode(callNode);
359
switch (callSymbol->getRecognizedMethod())
360
{
361
// DAA Packed Decimal arithmetic methods
362
case TR::com_ibm_dataaccess_PackedDecimal_addPackedDecimal_:
363
if (genArithmeticIntrinsic(treeTop, callNode, TR::pdadd))
364
{
365
++result;
366
}
367
break;
368
case TR::com_ibm_dataaccess_PackedDecimal_subtractPackedDecimal_:
369
if (genArithmeticIntrinsic(treeTop, callNode, TR::pdsub))
370
{
371
++result;
372
}
373
break;
374
case TR::com_ibm_dataaccess_PackedDecimal_multiplyPackedDecimal_:
375
if (genArithmeticIntrinsic(treeTop, callNode, TR::pdmul))
376
{
377
++result;
378
}
379
break;
380
case TR::com_ibm_dataaccess_PackedDecimal_dividePackedDecimal_:
381
if (genArithmeticIntrinsic(treeTop, callNode, TR::pddiv))
382
{
383
++result;
384
}
385
break;
386
case TR::com_ibm_dataaccess_PackedDecimal_remainderPackedDecimal_:
387
if (genArithmeticIntrinsic(treeTop, callNode, TR::pdrem))
388
{
389
++result;
390
}
391
break;
392
393
// DAA Packed Decimal shift methods
394
case TR::com_ibm_dataaccess_PackedDecimal_shiftLeftPackedDecimal_:
395
if (genShiftLeftIntrinsic(treeTop, callNode))
396
{
397
++result;
398
}
399
break;
400
case TR::com_ibm_dataaccess_PackedDecimal_shiftRightPackedDecimal_:
401
if (genShiftRightIntrinsic(treeTop, callNode))
402
{
403
++result;
404
}
405
break;
406
407
// DAA Packed Decimal comparison methods
408
case TR::com_ibm_dataaccess_PackedDecimal_lessThanPackedDecimal_:
409
if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmplt))
410
{
411
++result;
412
}
413
break;
414
case TR::com_ibm_dataaccess_PackedDecimal_lessThanOrEqualsPackedDecimal_:
415
if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmple))
416
{
417
++result;
418
}
419
break;
420
case TR::com_ibm_dataaccess_PackedDecimal_greaterThanPackedDecimal_:
421
if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmpgt))
422
{
423
++result;
424
}
425
break;
426
case TR::com_ibm_dataaccess_PackedDecimal_greaterThanOrEqualsPackedDecimal_:
427
if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmpge))
428
{
429
++result;
430
}
431
break;
432
case TR::com_ibm_dataaccess_PackedDecimal_equalsPackedDecimal_:
433
if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmpeq))
434
{
435
++result;
436
}
437
break;
438
439
// DAA Packed Decimal <-> Integer
440
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_:
441
{
442
if (IS_VARIABLE_PD2I(callNode))
443
{
444
variableCallTreeTops->push_back(treeTop);
445
}
446
else
447
{
448
if (generatePD2I(treeTop, callNode, true, false))
449
{
450
++result;
451
}
452
}
453
break;
454
}
455
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_:
456
{
457
if (IS_VARIABLE_PD2I(callNode))
458
{
459
variableCallTreeTops->push_back(treeTop);
460
}
461
else
462
{
463
if (generatePD2I(treeTop, callNode, true, true))
464
{
465
++result;
466
}
467
}
468
break;
469
}
470
case TR::com_ibm_dataaccess_DecimalData_convertIntegerToPackedDecimal_:
471
if (generateI2PD(treeTop, callNode, true, false))
472
{
473
++result;
474
}
475
break;
476
case TR::com_ibm_dataaccess_DecimalData_convertIntegerToPackedDecimal_ByteBuffer_:
477
if (generateI2PD(treeTop, callNode, true, true))
478
{
479
++result;
480
}
481
break;
482
483
// DAA Packed Decimal <-> Long
484
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_:
485
{
486
if (IS_VARIABLE_PD2I(callNode))
487
{
488
variableCallTreeTops->push_back(treeTop);
489
}
490
else
491
{
492
if (generatePD2I(treeTop, callNode, false, false))
493
{
494
++result;
495
}
496
}
497
break;
498
}
499
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_ByteBuffer_:
500
{
501
if (IS_VARIABLE_PD2I(callNode))
502
{
503
variableCallTreeTops->push_back(treeTop);
504
}
505
else
506
{
507
if (generatePD2I(treeTop, callNode, false, true))
508
{
509
++result;
510
}
511
}
512
break;
513
}
514
case TR::com_ibm_dataaccess_DecimalData_convertLongToPackedDecimal_:
515
if (generateI2PD(treeTop, callNode, false, false))
516
{
517
++result;
518
}
519
break;
520
case TR::com_ibm_dataaccess_DecimalData_convertLongToPackedDecimal_ByteBuffer_:
521
if (generateI2PD(treeTop, callNode, false, true))
522
{
523
++result;
524
}
525
break;
526
527
default:
528
matched = false;
529
break;
530
}
531
}
532
533
if (matched && result
534
&& !requestOSRGuardRemoval
535
&& TR_OSRGuardRemoval::findMatchingOSRGuard(comp(), treeTop))
536
requestOSRGuardRemoval = true;
537
538
blockResult += result;
539
}
540
}
541
}
542
543
// If yields to the VM have been removed, it is possible to remove OSR guards as well
544
//
545
if (requestOSRGuardRemoval)
546
requestOpt(OMR::osrGuardRemoval);
547
548
return blockResult;
549
}
550
551
bool TR_DataAccessAccelerator::isChildConst(TR::Node* node, int32_t child)
552
{
553
return node->getChild(child)->getOpCode().isLoadConst();
554
}
555
556
TR::Node* TR_DataAccessAccelerator::insertDecimalGetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes)
557
{
558
if (targetNumBytes != 4 && targetNumBytes != 8)
559
{
560
printInliningStatus (false, callNode, "targetNumBytes is invalid. Valid targetNumBytes values are 4 or 8.");
561
return NULL;
562
}
563
564
if (sourceNumBytes != 4 && sourceNumBytes != 8)
565
{
566
printInliningStatus (false, callNode, "sourceNumBytes is invalid. Valid sourceNumBytes values are 4 or 8.");
567
return NULL;
568
}
569
570
if (sourceNumBytes > targetNumBytes)
571
{
572
printInliningStatus (false, callNode, "sourceNumBytes is out of bounds.");
573
return NULL;
574
}
575
576
TR::Node* byteArrayNode = callNode->getChild(0);
577
TR::Node* offsetNode = callNode->getChild(1);
578
TR::Node* bigEndianNode = callNode->getChild(2);
579
580
if (!bigEndianNode->getOpCode().isLoadConst())
581
{
582
printInliningStatus (false, callNode, "bigEndianNode is not constant.");
583
return NULL;
584
}
585
586
// Determines whether a TR::ByteSwap needs to be inserted before the store to the byteArray
587
bool requiresByteSwap = comp()->target().cpu.isBigEndian() != static_cast <bool> (bigEndianNode->getInt());
588
589
if (requiresByteSwap && !comp()->cg()->supportsByteswap())
590
{
591
printInliningStatus (false, callNode, "Unmarshalling is not supported because ByteSwap IL evaluators are not implemented.");
592
return NULL;
593
}
594
595
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: insertDecimalGetIntrinsic on callNode %p\n", callNode))
596
{
597
insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);
598
599
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);
600
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, sourceNumBytes - 1);
601
602
TR::DataType sourceDataType = TR::NoType;
603
TR::DataType targetDataType = TR::NoType;
604
605
// Default case is impossible due to previous checks
606
switch (sourceNumBytes)
607
{
608
case 4: sourceDataType = TR::Float; break;
609
case 8: sourceDataType = TR::Double; break;
610
}
611
612
TR::ILOpCodes op = TR::BadILOp;
613
614
// Default case is impossible due to previous checks
615
switch (sourceNumBytes)
616
{
617
case 4: op = requiresByteSwap ? TR::iloadi : TR::floadi; break;
618
case 8: op = requiresByteSwap ? TR::lloadi : TR::dloadi; break;
619
}
620
621
// Default case is impossible due to previous checks
622
switch (targetNumBytes)
623
{
624
case 4: targetDataType = TR::Float; break;
625
case 8: targetDataType = TR::Double; break;
626
}
627
628
TR::Node* valueNode = TR::Node::createWithSymRef(op, 1, 1, createByteArrayElementAddress(callTreeTop, callNode, byteArrayNode, offsetNode), comp()->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));
629
630
if (requiresByteSwap)
631
{
632
// Default case is impossible due to previous checks
633
switch (sourceNumBytes)
634
{
635
case 4: valueNode = TR::Node::create(TR::ibits2f, 1, TR::Node::create(TR::ibyteswap, 1, valueNode)); break;
636
case 8: valueNode = TR::Node::create(TR::lbits2d, 1, TR::Node::create(TR::lbyteswap, 1, valueNode)); break;
637
}
638
}
639
640
if (sourceNumBytes != targetNumBytes)
641
{
642
valueNode = TR::Node::create(TR::ILOpCode::getProperConversion(sourceDataType, targetDataType, false), 1, valueNode);
643
}
644
645
return valueNode;
646
}
647
648
return NULL;
649
}
650
651
TR::Node* TR_DataAccessAccelerator::insertDecimalSetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes)
652
{
653
if (sourceNumBytes != 4 && sourceNumBytes != 8)
654
{
655
printInliningStatus (false, callNode, "sourceNumBytes is invalid. Valid sourceNumBytes values are 4 or 8.");
656
return NULL;
657
}
658
659
if (targetNumBytes != 4 && targetNumBytes != 8)
660
{
661
printInliningStatus (false, callNode, "targetNumBytes is invalid. Valid targetNumBytes values are 4 or 8.");
662
return NULL;
663
}
664
665
if (targetNumBytes > sourceNumBytes)
666
{
667
printInliningStatus (false, callNode, "targetNumBytes is out of bounds.");
668
return NULL;
669
}
670
671
TR::Node* valueNode = callNode->getChild(0);
672
TR::Node* byteArrayNode = callNode->getChild(1);
673
TR::Node* offsetNode = callNode->getChild(2);
674
TR::Node* bigEndianNode = callNode->getChild(3);
675
676
if (!bigEndianNode->getOpCode().isLoadConst())
677
{
678
printInliningStatus (false, callNode, "bigEndianNode is not constant.");
679
return NULL;
680
}
681
682
// Determines whether a TR::ByteSwap needs to be inserted before the store to the byteArray
683
bool requiresByteSwap = comp()->target().cpu.isBigEndian() != static_cast <bool> (bigEndianNode->getInt());
684
685
if (requiresByteSwap && !comp()->cg()->supportsByteswap())
686
{
687
printInliningStatus (false, callNode, "Unmarshalling is not supported because ByteSwap IL evaluators are not implemented.");
688
return NULL;
689
}
690
691
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: insertDecimalSetIntrinsic on callNode %p\n", callNode))
692
{
693
insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);
694
695
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);
696
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, targetNumBytes - 1);
697
698
TR::DataType sourceDataType = TR::NoType;
699
TR::DataType targetDataType = TR::NoType;
700
701
// Default case is impossible due to previous checks
702
switch (sourceNumBytes)
703
{
704
case 4: sourceDataType = TR::Float; break;
705
case 8: sourceDataType = TR::Double; break;
706
}
707
708
// Default case is impossible due to previous checks
709
switch (targetNumBytes)
710
{
711
case 4: targetDataType = TR::Float; break;
712
case 8: targetDataType = TR::Double; break;
713
}
714
715
TR::ILOpCodes op = TR::BadILOp;
716
717
// Default case is impossible due to previous checks
718
switch (targetNumBytes)
719
{
720
case 4: op = requiresByteSwap ? TR::istorei : TR::fstorei; break;
721
case 8: op = requiresByteSwap ? TR::lstorei : TR::dstorei; break;
722
}
723
724
// Create the proper conversion if the source and target sizes are different
725
if (sourceNumBytes != targetNumBytes)
726
{
727
valueNode = TR::Node::create(TR::ILOpCode::getProperConversion(sourceDataType, targetDataType, false), 1, valueNode);
728
}
729
730
if (requiresByteSwap)
731
{
732
// Default case is impossible due to previous checks
733
switch (targetNumBytes)
734
{
735
case 4: valueNode = TR::Node::create(TR::ibyteswap, 1, TR::Node::create(TR::fbits2i, 1, valueNode)); break;
736
case 8: valueNode = TR::Node::create(TR::lbyteswap, 1, TR::Node::create(TR::dbits2l, 1, valueNode)); break;
737
}
738
}
739
740
return TR::Node::createWithSymRef(op, 2, 2, createByteArrayElementAddress(callTreeTop, callNode, byteArrayNode, offsetNode), valueNode, comp()->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));
741
}
742
743
return NULL;
744
}
745
746
bool TR_DataAccessAccelerator::inlineCheckPackedDecimal(TR::TreeTop* callTreeTop, TR::Node* callNode)
747
{
748
TR::Node* byteArrayNode = callNode->getChild(0);
749
TR::Node* offsetNode = callNode->getChild(1);
750
TR::Node* precisionNode = callNode->getChild(2);
751
TR::Node* ignoreHighNibbleForEvenPrecisionNode = callNode->getChild(3);
752
TR::Node* canOverwriteHighNibbleForEvenPrecisionNode = callNode->getChild(4);
753
int32_t precision = precisionNode->getInt();
754
char* failMsg = NULL;
755
756
if (!precisionNode->getOpCode().isLoadConst())
757
failMsg = "precisionNode is not constant.";
758
else if(precision < 1 || precision > 31)
759
failMsg = "precisionNode is out of bounds.";
760
else if (!ignoreHighNibbleForEvenPrecisionNode->getOpCode().isLoadConst())
761
failMsg = "ignoreHighNibbleForEvenPrecisionNode is not constant.";
762
else if (!canOverwriteHighNibbleForEvenPrecisionNode->getOpCode().isLoadConst())
763
failMsg = "canOverwriteHighNibbleForEvenPrecisionNode is not constant.";
764
765
if (failMsg)
766
{
767
TR::DebugCounter::incStaticDebugCounter(comp(),
768
TR::DebugCounter::debugCounterName(comp(),
769
"DAA/rejected/chkPacked"));
770
771
return printInliningStatus (false, callNode, failMsg);
772
}
773
774
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: inlineCheckPackedDecimal on callNode %p\n", callNode))
775
{
776
TR::DebugCounter::incStaticDebugCounter(comp(),
777
TR::DebugCounter::debugCounterName(comp(),
778
"DAA/inlined/chkPacked"));
779
780
insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);
781
782
int32_t precisionSizeInNumberOfBytes = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, precision);
783
784
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);
785
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, precisionSizeInNumberOfBytes - 1);
786
787
TR::SymbolReference* packedDecimalSymbolReference = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, NULL, precisionSizeInNumberOfBytes, fe());
788
789
TR::Node* pdchkChild0Node = TR::Node::createWithSymRef(TR::pdloadi, 1, 1, constructAddressNode(callNode, byteArrayNode, offsetNode), packedDecimalSymbolReference);
790
791
// The size argument passed to create an array shadow symbol reference is the size in number of bytes that this PackedDecimal represents.
792
// Unfortunately when a Node is constructed with this symbol reference we extract the size from the symbol reference and convert it to a
793
// precision via a helper function. Because this conversion is not injective we may not get back the original precision we calculated
794
// above. This is why we must explicitly set the precision on the Node after creation.
795
796
pdchkChild0Node->setDecimalPrecision(precision);
797
798
if (precision % 2 == 0)
799
{
800
const bool ignoreHighNibbleForEvenPrecision = static_cast <bool> (ignoreHighNibbleForEvenPrecisionNode->getInt());
801
const bool canOverwriteHighNibbleForEvenPrecision = static_cast <bool> (canOverwriteHighNibbleForEvenPrecisionNode->getInt());
802
803
if (ignoreHighNibbleForEvenPrecision || canOverwriteHighNibbleForEvenPrecision)
804
{
805
// Increase the precision of the pdload by 1 to pretend that we have an extra digit, then create a new parent on top of the pdload
806
// which will truncate Packed Decimal by modifying its precision to the desired value. This has the effect of creating a new temporary
807
// Packed Decimal value which properly ignores the high nibble if the precision is even, and more over it has a value of 0 in the high nibble.
808
809
pdchkChild0Node->setDecimalPrecision(precision + 1);
810
811
pdchkChild0Node = TR::Node::create(TR::pdModifyPrecision, 1, pdchkChild0Node);
812
813
pdchkChild0Node->setDecimalPrecision(precision);
814
815
// If we are allowed to overwrite the high nibble if the precision is even then we need to store temporary Packed Decimal we just
816
// created back into the original byte array. We once again pretend that we have an extra digit when doing this store because we also want to
817
// store out the extra 0 digit which is guaranteed to be present due to the above computation.
818
819
if (canOverwriteHighNibbleForEvenPrecision)
820
{
821
int32_t precisionSizeInNumberOfBytes = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, precision + 1);
822
823
TR::SymbolReference* packedDecimalSymbolReference = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, NULL, precisionSizeInNumberOfBytes, fe());
824
825
//this node should be inserted after callNode
826
TR::Node * pdstoreNode = TR::Node::createWithSymRef(TR::pdstorei, 2, 2, constructAddressNode(callNode, byteArrayNode, offsetNode), pdchkChild0Node, packedDecimalSymbolReference);
827
828
pdstoreNode->setDecimalPrecision(precision + 1);
829
830
callTreeTop->insertAfter(TR::TreeTop::create(comp(), pdstoreNode));
831
}
832
}
833
}
834
835
// We will be recreating the callNode so decrement the reference count of all it's children
836
for (auto i = 0; i < callNode->getNumChildren(); ++i)
837
{
838
callNode->getChild(i)->decReferenceCount();
839
}
840
841
TR::Node::recreateWithoutProperties(callNode, TR::pdchk, 1, pdchkChild0Node);
842
843
return true;
844
}
845
846
return false;
847
}
848
849
TR::Node* TR_DataAccessAccelerator::insertIntegerGetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes)
850
{
851
if (targetNumBytes != 1 && targetNumBytes != 2 && targetNumBytes != 4 && targetNumBytes != 8)
852
{
853
printInliningStatus (false, callNode, "targetNumBytes is invalid. Valid targetNumBytes values are 1, 2, 4, or 8.");
854
return NULL;
855
}
856
857
TR::Node* byteArrayNode = callNode->getChild(0);
858
TR::Node* offsetNode = callNode->getChild(1);
859
TR::Node* bigEndianNode = callNode->getChild(2);
860
TR::Node* numBytesNode = NULL;
861
TR::Node* signExtendNode = NULL;
862
863
if (!bigEndianNode->getOpCode().isLoadConst())
864
{
865
printInliningStatus (false, callNode, "bigEndianNode is not constant.");
866
return NULL;
867
}
868
869
bool needUnsignedConversion = false;
870
871
// This check indicates that the sourceNumBytes value is specified on the callNode, so we must extract it
872
if (sourceNumBytes == 0)
873
{
874
numBytesNode = callNode->getChild(3);
875
876
if (!numBytesNode->getOpCode().isLoadConst())
877
{
878
printInliningStatus (false, callNode, "numBytesNode is not constant.");
879
return NULL;
880
}
881
882
sourceNumBytes = numBytesNode->getInt();
883
884
if (sourceNumBytes != 1 && sourceNumBytes != 2 && sourceNumBytes != 4 && sourceNumBytes != 8)
885
{
886
printInliningStatus (false, callNode, "sourceNumBytes is invalid. Valid targetNumBytes values are 1, 2, 4, or 8.");
887
return NULL;
888
}
889
890
if (sourceNumBytes > targetNumBytes)
891
{
892
printInliningStatus (false, callNode, "sourceNumBytes is out of bounds.");
893
return NULL;
894
}
895
896
signExtendNode = callNode->getChild(4);
897
898
if (!signExtendNode->getOpCode().isLoadConst())
899
{
900
printInliningStatus (false, callNode, "signExtendNode is not constant.");
901
return NULL;
902
}
903
904
needUnsignedConversion = sourceNumBytes < targetNumBytes && static_cast <bool> (signExtendNode->getInt() != 1);
905
}
906
else
907
{
908
sourceNumBytes = targetNumBytes;
909
}
910
911
// Determines whether a TR::ByteSwap needs to be inserted before the store to the byteArray
912
bool requiresByteSwap = sourceNumBytes != 1 && comp()->target().cpu.isBigEndian() != static_cast <bool> (bigEndianNode->getInt());
913
914
if (requiresByteSwap && !comp()->cg()->supportsByteswap())
915
{
916
printInliningStatus (false, callNode, "Unmarshalling is not supported because ByteSwap IL evaluators are not implemented.");
917
return NULL;
918
}
919
920
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: genSimpleGetBinary call: %p inlined.\n", callNode))
921
{
922
insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);
923
924
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);
925
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, sourceNumBytes - 1);
926
927
TR::DataType sourceDataType = TR::NoType;
928
TR::DataType targetDataType = TR::NoType;
929
930
// Default case is impossible due to previous checks
931
switch (sourceNumBytes)
932
{
933
case 1: sourceDataType = TR::Int8; break;
934
case 2: sourceDataType = TR::Int16; break;
935
case 4: sourceDataType = TR::Int32; break;
936
case 8: sourceDataType = TR::Int64; break;
937
}
938
939
TR::ILOpCodes op = TR::BadILOp;
940
TR::ILOpCodes byteswapOp = TR::BadILOp;
941
942
// Default case is impossible due to previous checks
943
switch (sourceNumBytes)
944
{
945
case 1: op = TR::bloadi; break;
946
case 2: op = TR::sloadi; byteswapOp = TR::sbyteswap; break;
947
case 4: op = TR::iloadi; byteswapOp = TR::ibyteswap; break;
948
case 8: op = TR::lloadi; byteswapOp = TR::lbyteswap; break;
949
}
950
951
// Default case is impossible due to previous checks
952
switch (targetNumBytes)
953
{
954
case 1: targetDataType = TR::Int32; break;
955
case 2: targetDataType = TR::Int32; break;
956
case 4: targetDataType = TR::Int32; break;
957
case 8: targetDataType = TR::Int64; break;
958
}
959
960
TR::Node* valueNode = TR::Node::createWithSymRef(op, 1, 1, createByteArrayElementAddress(callTreeTop, callNode, byteArrayNode, offsetNode), comp()->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));
961
962
if (requiresByteSwap)
963
{
964
valueNode = TR::Node::create(byteswapOp, 1, valueNode);
965
}
966
967
if (sourceDataType != targetDataType)
968
{
969
valueNode = TR::Node::create(TR::ILOpCode::getProperConversion(sourceDataType, targetDataType, needUnsignedConversion), 1, valueNode);
970
}
971
972
return valueNode;
973
}
974
975
return NULL;
976
}
977
978
TR::Node* TR_DataAccessAccelerator::insertIntegerSetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes)
979
{
980
if (sourceNumBytes != 1 && sourceNumBytes != 2 && sourceNumBytes != 4 && sourceNumBytes != 8)
981
{
982
printInliningStatus (false, callNode, "sourceNumBytes is invalid. Valid sourceNumBytes values are 1, 2, 4, or 8.");
983
return NULL;
984
}
985
986
TR::Node* valueNode = callNode->getChild(0);
987
TR::Node* byteArrayNode = callNode->getChild(1);
988
TR::Node* offsetNode = callNode->getChild(2);
989
TR::Node* bigEndianNode = callNode->getChild(3);
990
TR::Node* numBytesNode = NULL;
991
992
if (!bigEndianNode->getOpCode().isLoadConst())
993
{
994
printInliningStatus (false, callNode, "bigEndianNode is not constant.");
995
return NULL;
996
}
997
998
// This check indicates that the targetNumBytes value is specified on the callNode, so we must extract it
999
if (targetNumBytes == 0)
1000
{
1001
numBytesNode = callNode->getChild(4);
1002
1003
if (!numBytesNode->getOpCode().isLoadConst())
1004
{
1005
printInliningStatus (false, callNode, "numBytesNode is not constant.");
1006
return NULL;
1007
}
1008
1009
targetNumBytes = numBytesNode->getInt();
1010
1011
if (targetNumBytes != 1 && targetNumBytes != 2 && targetNumBytes != 4 && targetNumBytes != 8)
1012
{
1013
printInliningStatus (false, callNode, "targetNumBytes is invalid. Valid targetNumBytes values are 1, 2, 4, or 8.");
1014
return NULL;
1015
}
1016
1017
if (targetNumBytes > sourceNumBytes)
1018
{
1019
printInliningStatus (false, callNode, "targetNumBytes is out of bounds.");
1020
return NULL;
1021
}
1022
}
1023
else
1024
{
1025
targetNumBytes = sourceNumBytes;
1026
}
1027
1028
// Determines whether a TR::ByteSwap needs to be inserted before the store to the byteArray
1029
bool requiresByteSwap = targetNumBytes != 1 && comp()->target().cpu.isBigEndian() != static_cast <bool> (bigEndianNode->getInt());
1030
1031
if (requiresByteSwap && !comp()->cg()->supportsByteswap())
1032
{
1033
printInliningStatus (false, callNode, "Marshalling is not supported because ByteSwap IL evaluators are not implemented.");
1034
return NULL;
1035
}
1036
1037
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: genSimplePutBinary call: %p inlined.\n", callNode))
1038
{
1039
insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);
1040
1041
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);
1042
insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, targetNumBytes - 1);
1043
1044
TR::DataType sourceDataType = TR::NoType;
1045
TR::DataType targetDataType = TR::NoType;
1046
1047
// Default case is impossible due to previous checks
1048
switch (sourceNumBytes)
1049
{
1050
case 1: sourceDataType = TR::Int32; break;
1051
case 2: sourceDataType = TR::Int32; break;
1052
case 4: sourceDataType = TR::Int32; break;
1053
case 8: sourceDataType = TR::Int64; break;
1054
}
1055
1056
// Default case is impossible due to previous checks
1057
switch (targetNumBytes)
1058
{
1059
case 1: targetDataType = TR::Int8; break;
1060
case 2: targetDataType = TR::Int16; break;
1061
case 4: targetDataType = TR::Int32; break;
1062
case 8: targetDataType = TR::Int64; break;
1063
}
1064
1065
TR::ILOpCodes op = TR::BadILOp;
1066
TR::ILOpCodes byteswapOp = TR::BadILOp;
1067
1068
// Default case is impossible due to previous checks
1069
switch (targetNumBytes)
1070
{
1071
case 1: op = TR::bstorei; break;
1072
case 2: op = TR::sstorei; byteswapOp = TR::sbyteswap; break;
1073
case 4: op = TR::istorei; byteswapOp = TR::ibyteswap; break;
1074
case 8: op = TR::lstorei; byteswapOp = TR::lbyteswap; break;
1075
}
1076
1077
// Create the proper conversion if the source and target sizes are different
1078
if (sourceDataType != targetDataType)
1079
{
1080
valueNode = TR::Node::create(TR::ILOpCode::getProperConversion(sourceDataType, targetDataType, false), 1, valueNode);
1081
}
1082
1083
if (requiresByteSwap)
1084
{
1085
valueNode = TR::Node::create(byteswapOp, 1, valueNode);
1086
}
1087
1088
return TR::Node::createWithSymRef(op, 2, 2, createByteArrayElementAddress(callTreeTop, callNode, byteArrayNode, offsetNode), valueNode, comp()->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));
1089
}
1090
1091
return NULL;
1092
}
1093
1094
TR::Node* TR_DataAccessAccelerator::constructAddressNode(TR::Node* callNode, TR::Node* arrayNode, TR::Node* offsetNode)
1095
{
1096
TR::Node * arrayAddressNode;
1097
TR::Node * headerConstNode;
1098
TR::Node * totalOffsetNode;
1099
1100
TR::Node * pdBufAddressNode = NULL;
1101
TR::Node * pdBufPositionNode = NULL;
1102
1103
1104
if (callNode->getSymbol()->getResolvedMethodSymbol())
1105
{
1106
if (callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod())
1107
{
1108
bool isByteBuffer = false;
1109
1110
if ((callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_dataaccess_DecimalData_convertIntegerToPackedDecimal_ByteBuffer_)
1111
|| (callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_dataaccess_DecimalData_convertLongToPackedDecimal_ByteBuffer_))
1112
{
1113
isByteBuffer = true;
1114
pdBufAddressNode = callNode->getChild(5);
1115
pdBufPositionNode = callNode->getChild(7);
1116
}
1117
else if ((callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_)
1118
|| (callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_ByteBuffer_))
1119
{
1120
isByteBuffer = true;
1121
pdBufAddressNode = callNode->getChild(4);
1122
pdBufPositionNode = callNode->getChild(6);
1123
}
1124
1125
if (isByteBuffer)
1126
{
1127
TR::Node* offset = TR::Node::create(TR::i2l, 1, TR::Node::create(TR::iadd, 2, pdBufPositionNode, offsetNode));
1128
TR::Node* address = TR::Node::create(TR::ladd, 2, pdBufAddressNode, offset);
1129
return TR::Node::create(TR::l2a, 1, address);
1130
}
1131
}
1132
}
1133
1134
if (comp()->target().is64Bit())
1135
{
1136
headerConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);
1137
headerConstNode->setLongInt(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
1138
totalOffsetNode = TR::Node::create(TR::ladd, 2, headerConstNode, TR::Node::create(TR::i2l, 1, offsetNode));
1139
arrayAddressNode = TR::Node::create(TR::aladd, 2, arrayNode, totalOffsetNode);
1140
}
1141
else
1142
{
1143
headerConstNode = TR::Node::create(callNode, TR::iconst, 0,
1144
TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
1145
1146
totalOffsetNode = TR::Node::create(TR::iadd, 2, headerConstNode, offsetNode);
1147
arrayAddressNode = TR::Node::create(TR::aiadd, 2, arrayNode, totalOffsetNode);
1148
}
1149
arrayAddressNode->setIsInternalPointer(true);
1150
return arrayAddressNode;
1151
}
1152
1153
bool TR_DataAccessAccelerator::genComparisionIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode, TR::ILOpCodes ops)
1154
{
1155
if (!isChildConst(callNode, 2) || !isChildConst(callNode, 5))
1156
{
1157
return printInliningStatus(false, callNode, "Child (2|5) is not constant");
1158
}
1159
1160
TR_ASSERT(callNode->getNumChildren() == 6, "Expecting BCD cmp call with 6 children.");
1161
1162
TR::Node * op1Node = callNode->getChild(0);
1163
TR::Node * offset1Node = callNode->getChild(1);
1164
TR::Node * prec1Node = callNode->getChild(2);
1165
TR::Node * op2Node = callNode->getChild(3);
1166
TR::Node * offset2Node = callNode->getChild(4);
1167
TR::Node * prec2Node = callNode->getChild(5);
1168
1169
int precision1 = prec1Node->getInt();
1170
int precision2 = prec2Node->getInt();
1171
1172
if (precision1 > 31 || precision2 > 31 || precision1 < 1 || precision2 < 1)
1173
{
1174
return printInliningStatus(false, callNode, "Invalid precisions. Valid precisions are in range [1, 31]");
1175
}
1176
1177
if (!performTransformation(comp(), "O^O TR_DataAccessAccelerator: genComparison call: %p, Comparison type: %d inlined.\n", callNode, ops))
1178
{
1179
return false;
1180
}
1181
1182
//create loading
1183
// loading The first operand
1184
TR::Node * arrayAddressNode1 = constructAddressNode(callNode, op1Node, offset1Node);
1185
TR::SymbolReference * symRef1 = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode1, 8, fe());
1186
symRef1->setOffset(0);
1187
1188
TR::Node * pdload1 = TR::Node::create(TR::pdloadi, 1, arrayAddressNode1);
1189
pdload1->setSymbolReference(symRef1);
1190
pdload1->setDecimalPrecision(precision1);
1191
1192
//load the second operand
1193
TR::Node * arrayAddressNode2 = constructAddressNode(callNode, op2Node, offset2Node);
1194
TR::SymbolReference * symRef2 = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode2, 8, fe());
1195
symRef2->setOffset(0);
1196
1197
TR::Node * pdload2 = TR::Node::create(TR::pdloadi, 1, arrayAddressNode2);
1198
pdload2->setSymbolReference(symRef2);
1199
pdload2->setDecimalPrecision(precision2);
1200
1201
//create the BCDCHK:
1202
TR::Node * pdOpNode = callNode;
1203
TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();
1204
TR::Node * bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 7, 7,
1205
pdOpNode,
1206
callNode->getChild(0), callNode->getChild(1),
1207
callNode->getChild(2), callNode->getChild(3),
1208
callNode->getChild(4), callNode->getChild(5),
1209
bcdChkSymRef);
1210
1211
pdOpNode->setNumChildren(2);
1212
pdOpNode->setAndIncChild(0, pdload1);
1213
pdOpNode->setAndIncChild(1, pdload2);
1214
pdOpNode->setSymbolReference(NULL);
1215
1216
// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can
1217
// correctly compute a new CP to relocate DAA OOL calls.
1218
bcdchkNode->setInlinedSiteIndex(callNode->getInlinedSiteIndex());
1219
1220
//instead of creating comparison operation, re use the callNode:
1221
TR::Node::recreate(pdOpNode, ops);
1222
1223
treeTop->setNode(bcdchkNode);
1224
1225
pdOpNode->decReferenceCount();
1226
op1Node->decReferenceCount();
1227
op2Node->decReferenceCount();
1228
offset1Node->decReferenceCount();
1229
offset2Node->decReferenceCount();
1230
prec1Node->decReferenceCount();
1231
prec2Node->decReferenceCount();
1232
1233
return printInliningStatus(true, callNode);
1234
}
1235
1236
bool TR_DataAccessAccelerator::generateI2PD(TR::TreeTop* treeTop, TR::Node* callNode, bool isI2PD, bool isByteBuffer)
1237
{
1238
int precision = callNode->getChild(3)->getInt();
1239
char* failMsg = NULL;
1240
1241
if (!isChildConst(callNode, 3) || !isChildConst(callNode, 4))
1242
failMsg = "Child (3|4) is not constant";
1243
else if (precision < 1 || precision > 31)
1244
failMsg = "Invalid precision. Valid precision is in range [1, 31]";
1245
1246
if (failMsg)
1247
{
1248
TR::DebugCounter::incStaticDebugCounter(comp(),
1249
TR::DebugCounter::debugCounterName(comp(),
1250
"DAA/rejected/%s",
1251
isI2PD ? "i2pd" : "l2pd"));
1252
return printInliningStatus(false, callNode, failMsg);
1253
}
1254
1255
TR::Node* intNode = NULL;
1256
TR::Node* pdNode = NULL;
1257
TR::Node* offsetNode = NULL;
1258
TR::Node* precNode = NULL;
1259
TR::Node* errorCheckingNode = NULL;
1260
1261
// Backing storage info for ByteBuffer
1262
TR::Node * pdBufAddressNodeCopy = NULL;
1263
TR::Node * pdBufCapacityNode = NULL;
1264
TR::Node * pdBufPositionNode = NULL;
1265
1266
TR::TreeTop *slowPathTreeTop = NULL;
1267
TR::TreeTop *fastPathTreeTop = NULL;
1268
TR::Node *slowPathNode = NULL;
1269
1270
bool needsBCDCHK = (isI2PD && (precision < 10)) || (!isI2PD && (precision < 19));
1271
1272
//still need to check bounds of pdNode
1273
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: %s call: %p inlined.\n", (isI2PD)?"generateI2PD":"generateL2PD", callNode))
1274
{
1275
TR::DebugCounter::incStaticDebugCounter(comp(),
1276
TR::DebugCounter::debugCounterName(comp(),
1277
"DAA/inlined/%s",
1278
isI2PD ? "i2pd" : "l2pd"));
1279
1280
if (isByteBuffer)
1281
{
1282
/* We will be creating a precision diamond for the fast / slow path and eliminating the original call.
1283
* Because we are about to split the CFG we would have to store the original parameters of the call into
1284
* temp slots as we will be duplicating the call node in the precision diamond but we don't need to since
1285
* createConditionalBlocksBeforeTree takes care of it. createConditionalBlocksBeforeTree calls block::split
1286
* with true for the option fixupCommoning and so it will break the commoning and add any necessary temps for you.
1287
*/
1288
1289
pdBufAddressNodeCopy = TR::Node::copy(callNode->getChild(5));
1290
pdBufAddressNodeCopy->setReferenceCount(0);
1291
pdBufCapacityNode = callNode->getChild(6);
1292
pdBufPositionNode = callNode->getChild(7);
1293
}
1294
1295
intNode = callNode->getChild(0);
1296
pdNode = callNode->getChild(1);
1297
offsetNode = callNode->getChild(2);
1298
precNode = callNode->getChild(3);
1299
errorCheckingNode = callNode->getChild(4);
1300
1301
//create a TR::i2pd node and an pdstore.
1302
//this will not cause an exception, so it is safe to remove BCDCHK
1303
TR::Node * i2pdNode = TR::Node::create((isI2PD)?TR::i2pd:TR::l2pd, 1, intNode);
1304
i2pdNode->setDecimalPrecision(precision);
1305
1306
/**
1307
* Create separate address nodes for BCDCHK and pdstorei because BCDCHK can GC-and-Return like a call.
1308
*
1309
* Having separate address nodes also allows AddrNode2 and AddrNode3 commoning, which then makes
1310
* copy propagations possible.
1311
*
1312
* AddrNode1 could still be commoned with address nodes before the BCDCHK. Hence, the need for
1313
* UncommonBCDCHKAddressNode codegen pass. AddrNode1 is special in that it has to be rematerialized and used
1314
* at the end of the BCDCHK OOL path's GC point. No commoning of this node should happen.
1315
*
1316
* Example:
1317
*
1318
* BCDCHK
1319
* pdshlOverflow <prec=9 (len=5) adj=0 round=0>
1320
* ....
1321
* aladd (internalPtr sharedMemory ) AddrNode1
1322
* ==>newarray
1323
* lconst 8 (highWordZero X!=0 X>=0 )
1324
* ....
1325
* pdstorei <array-shadow>[#490 Shadow] <prec=9 (len=5)>
1326
* aladd (internalPtr sharedMemory ) AddrNode2
1327
* ==>newarray
1328
* ==>lconst 8
1329
* ==>pdshlOverflow <prec=9 (len=5)
1330
* zdsleStorei <array-shadow>[#492 Shadow] <prec=9 (len=9)>
1331
* ....
1332
* zd2zdsle <prec=9 (len=9)>
1333
* pd2zd <prec=9 (len=9)>
1334
* pdloadi <prec=9 (len=5) adj=0 round=0>
1335
* aladd (internalPtr sharedMemory ) AddrNode3
1336
* ==>newarray
1337
* ==>lconst 8
1338
*
1339
* In the example above, AddrNode 1 to 3 have the same children.
1340
*
1341
* AddrNode1, the second child of the BCDCHK node, is meant to be rematerialized for OOL post-call data copy back.
1342
* See BCDCHKEvaluatorImpl() for BCDCHK tree structure and intended use of its children.
1343
*
1344
* 'outOfLineCopyBackAddr' and 'storeAddressNode' correspond to AddrNode1 and AddrNode2, respectively. They
1345
* are created as separate nodes so that LocalCSE is able to common up AddrNode2 and AddrNode3. If
1346
* AddrNode1 and AddrNode2 were the same node, the LocalCSE would not consider AddrNode1 an alternative replacement
1347
* of AddrNode3 because the BCDCHK's symbol canGCAndReturn().
1348
*
1349
* With AddrNode2 and AddrNode3 commoned up, the LocalCSE is able to copy propagate pdshlOverflow to the pd2zd
1350
* tree and replace its pdloadi.
1351
*/
1352
TR::Node * outOfLineCopyBackAddr = constructAddressNode(callNode, pdNode, offsetNode);
1353
TR::Node * storeAddressNode = constructAddressNode(callNode, pdNode, offsetNode);
1354
1355
TR::TreeTop * nextTT = treeTop->getNextTreeTop();
1356
TR::TreeTop * prevTT = treeTop->getPrevTreeTop();
1357
1358
TR::ILOpCodes op = comp()->il.opCodeForIndirectStore(TR::PackedDecimal);
1359
1360
TR::Node * pdstore = NULL;
1361
TR::Node * bcdchkNode = NULL;
1362
if (needsBCDCHK)
1363
{
1364
i2pdNode->setDecimalPrecision((isI2PD)? 10:19);
1365
TR::Node * pdshlNode = TR::Node::create(TR::pdshlOverflow, 2, i2pdNode, TR::Node::create(callNode, TR::iconst, 0));
1366
pdshlNode->setDecimalPrecision(precision);
1367
1368
/* Attaching all the original callNode's children as the children to BCDCHK.
1369
* We don't want to attach the callNode as a child to BCDCHK since it would be an aberration to the
1370
* definition of a BCDCHK node. BCDCHK node is already a special type of node, and all optimizations expect the
1371
* call (i2pd) to be inside the first child of BCDCHK. Attaching another call could cause many things to
1372
* break as all optimizations such as Value Propagation don't expect it to be there. Attaching the callNode's children
1373
* to BCDCHK would be safe. We would whip up the call with these attached children during codegen
1374
* for the fallback of the fastpath.
1375
*/
1376
TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();
1377
1378
if (isByteBuffer)
1379
{
1380
bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 10, 10,
1381
pdshlNode, outOfLineCopyBackAddr,
1382
callNode->getChild(0), callNode->getChild(1),
1383
callNode->getChild(2), callNode->getChild(3),
1384
callNode->getChild(4), callNode->getChild(5),
1385
callNode->getChild(6), callNode->getChild(7),
1386
bcdChkSymRef);
1387
}
1388
else
1389
{
1390
bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 7, 7,
1391
pdshlNode, outOfLineCopyBackAddr,
1392
callNode->getChild(0), callNode->getChild(1),
1393
callNode->getChild(2), callNode->getChild(3),
1394
callNode->getChild(4),
1395
bcdChkSymRef);
1396
}
1397
1398
// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can
1399
// correctly compute a new CP to relocate DAA OOL calls.
1400
bcdchkNode->setInlinedSiteIndex(callNode->getInlinedSiteIndex());
1401
1402
pdstore = TR::Node::create(op, 2, storeAddressNode, pdshlNode);
1403
}
1404
else
1405
{
1406
pdstore = TR::Node::create(op, 2, storeAddressNode, i2pdNode);
1407
}
1408
1409
TR::TreeTop* pdstoreTT = TR::TreeTop::create(comp(), pdstore);
1410
1411
if (isByteBuffer)
1412
{
1413
TR::CFG *cfg = comp()->getFlowGraph();
1414
TR::Block *callBlock = treeTop->getEnclosingBlock();
1415
1416
// Generate the slow path
1417
slowPathTreeTop = TR::TreeTop::create(comp(),treeTop->getNode()->duplicateTree());
1418
slowPathNode = slowPathTreeTop->getNode()->getFirstChild();
1419
1420
// Generate the tree to check if the ByteBuffer has a valid address or not
1421
TR::Node* nullNode = TR::Node::create(TR::lconst, 0, 0);
1422
TR::Node *isValidAddrNode = TR::Node::createif(TR::iflcmpeq, pdBufAddressNodeCopy, nullNode, treeTop);
1423
TR::TreeTop *isValidAddrTreeTop = TR::TreeTop::create(comp(), isValidAddrNode, NULL, NULL);
1424
1425
fastPathTreeTop = pdstoreTT;
1426
1427
/* Create the diamond in CFG
1428
* if (ByteBuffer.address != NULL)
1429
* fastpath (CVD instruction executed by HW)
1430
* else
1431
* slowpath (call to Java method: convertIntegerToPackedDecimal_)
1432
* */
1433
callBlock->createConditionalBlocksBeforeTree(treeTop, isValidAddrTreeTop, slowPathTreeTop, fastPathTreeTop, cfg, false, true);
1434
}
1435
1436
TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, outOfLineCopyBackAddr, 8, fe());
1437
pdstore->setSymbolReference(symRef);
1438
pdstore->setDecimalPrecision(precision);
1439
1440
TR::TreeTop* bcdchktt = NULL;
1441
if (needsBCDCHK)
1442
bcdchktt = TR::TreeTop::create(comp(), bcdchkNode);
1443
1444
if (isByteBuffer)
1445
{
1446
// the original call will be deleted by createConditionalBlocksBeforeTree, but if the refcount was > 1, we need to insert stores.
1447
if (needsBCDCHK)
1448
{
1449
pdstoreTT->insertBefore(bcdchktt);
1450
}
1451
}
1452
else
1453
{
1454
if (needsBCDCHK)
1455
{
1456
prevTT->join(bcdchktt);
1457
bcdchktt->join(pdstoreTT);
1458
}
1459
else
1460
{
1461
prevTT->join(pdstoreTT);
1462
}
1463
pdstoreTT->join(nextTT);
1464
1465
// we'll be removing the callNode, update its refcount.
1466
callNode->recursivelyDecReferenceCount();
1467
}
1468
1469
return true;
1470
}
1471
return false;
1472
}
1473
1474
void TR_DataAccessAccelerator::createPrecisionDiamond(TR::Compilation* comp,
1475
TR::TreeTop* treeTop,
1476
TR::TreeTop* fastTree,
1477
TR::TreeTop* slowTree,
1478
bool isPD2I,
1479
uint32_t numPrecisionNodes,
1480
...)
1481
{
1482
// Create precision guards
1483
const uint8_t precisionMin = 1;
1484
const uint8_t precisionMax = isPD2I ? 15 : 31;
1485
1486
uint32_t numGuards = numPrecisionNodes * 2;
1487
1488
TR::StackMemoryRegion stackMemoryRegion(*(comp->trMemory()));
1489
1490
BlockContainer testBlocks(stackMemoryRegion);
1491
TreeTopContainer testTTs(stackMemoryRegion);
1492
1493
va_list precisionNodeList;
1494
va_start(precisionNodeList, numPrecisionNodes);
1495
for(uint32_t i = 0; i < numPrecisionNodes; ++i)
1496
{
1497
TR::Node* precisionNode = va_arg(precisionNodeList, TR::Node*);
1498
TR_ASSERT(precisionNode, "Precision node should not be null");
1499
TR::Node* node1 = TR::Node::createif(TR::ificmpgt, precisionNode->duplicateTree(), TR::Node::iconst(precisionMax));
1500
TR::Node* node2 = TR::Node::createif(TR::ificmplt, precisionNode->duplicateTree(), TR::Node::iconst(precisionMin));
1501
1502
testTTs.push_back(TR::TreeTop::create(comp, node1));
1503
testTTs.push_back(TR::TreeTop::create(comp, node2));
1504
}
1505
va_end(precisionNodeList);
1506
1507
// Split blocks, 1 for each precision test block
1508
TR::CFG* cfg = comp->getFlowGraph();
1509
1510
// We will be updating the CFG so invalidate the structure
1511
cfg->setStructure(0);
1512
1513
testBlocks.push_back(treeTop->getEnclosingBlock(false));
1514
for(uint32_t i = 1; i < numGuards; ++i)
1515
{
1516
testBlocks.push_back(testBlocks[i-1]->split(treeTop, cfg, true));
1517
}
1518
1519
TR::Block* firstTestBlock = testBlocks.front();
1520
TR::Block* lastTestBlock = testBlocks.back();
1521
1522
// This block will contain everything AFTER tree
1523
TR::Block * otherBlock = lastTestBlock->split(treeTop, cfg, true);
1524
1525
// Append tree tops
1526
for(int i = 0; i < numGuards; ++i)
1527
{
1528
testBlocks[i]->append(testTTs[i]);
1529
}
1530
1531
TR::Node* node = treeTop->getNode();
1532
1533
// Remove the original tree from the other block
1534
node->removeAllChildren();
1535
1536
TR::TreeTop* prevTT = treeTop->getPrevTreeTop();
1537
TR::TreeTop* nextTT = treeTop->getNextTreeTop();
1538
1539
prevTT->join(nextTT);
1540
1541
TR::Block * fastBlock = TR::Block::createEmptyBlock(node, comp, firstTestBlock->getFrequency());
1542
TR::Block * slowBlock = TR::Block::createEmptyBlock(node, comp, UNKNOWN_COLD_BLOCK_COUNT);
1543
1544
TR::TreeTop* slowEntry = slowBlock->getEntry();
1545
TR::TreeTop* fastEntry = fastBlock->getEntry();
1546
TR::TreeTop* slowExit = slowBlock->getExit();
1547
TR::TreeTop* fastExit = fastBlock->getExit();
1548
1549
// Fast block is a fall-through of the second if test
1550
lastTestBlock->getExit()->join(fastEntry);
1551
1552
cfg->addNode(fastBlock);
1553
cfg->addNode(slowBlock);
1554
1555
TR::Block * bestBlock = otherBlock;
1556
1557
// Find the best place for the slow block
1558
while (bestBlock && bestBlock->canFallThroughToNextBlock())
1559
{
1560
bestBlock = bestBlock->getNextBlock();
1561
}
1562
1563
if (bestBlock)
1564
{
1565
TR::TreeTop* bestExit = bestBlock->getExit();
1566
TR::TreeTop* bestNext = bestBlock->getExit()->getNextTreeTop();
1567
1568
bestExit->join(slowEntry);
1569
slowExit->join(bestNext);
1570
}
1571
else
1572
{
1573
cfg->findLastTreeTop()->join(slowBlock->getEntry());
1574
}
1575
1576
fastBlock->append(fastTree);
1577
slowBlock->append(slowTree);
1578
1579
// Jump back to other block after slow path
1580
slowBlock->append(TR::TreeTop::create(comp, TR::Node::create(node, TR::Goto, 0, otherBlock->getEntry())));
1581
for(int i = 0; i < numGuards; ++i)
1582
{
1583
testTTs[i]->getNode()->setBranchDestination(slowEntry);
1584
}
1585
1586
// Other block is a fall-through of the fast block
1587
fastExit->join(otherBlock->getEntry());
1588
1589
cfg->addEdge(TR::CFGEdge::createEdge(lastTestBlock, fastBlock, trMemory()));
1590
cfg->addEdge(TR::CFGEdge::createEdge(fastBlock, otherBlock, trMemory()));
1591
cfg->addEdge(TR::CFGEdge::createEdge(slowBlock, otherBlock, trMemory()));
1592
for(int i = 0; i < numGuards; ++i)
1593
{
1594
cfg->addEdge(TR::CFGEdge::createEdge(testBlocks[i], slowBlock, trMemory()));
1595
}
1596
1597
// We introduced fastBlock in between these two, so it is not needed anymore
1598
cfg->removeEdge(lastTestBlock, otherBlock);
1599
1600
fastBlock->setIsExtensionOfPreviousBlock(false);
1601
slowBlock->setIsExtensionOfPreviousBlock(false);
1602
otherBlock->setIsExtensionOfPreviousBlock(false);
1603
1604
cfg->copyExceptionSuccessors(firstTestBlock, fastBlock);
1605
cfg->copyExceptionSuccessors(firstTestBlock, slowBlock);
1606
}
1607
1608
bool
1609
TR_DataAccessAccelerator::generatePD2IConstantParameter(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2i, bool isByteBuffer)
1610
{
1611
TR::Node* pdInputNode = callNode->getChild(0);
1612
TR::Node* offsetNode = callNode->getChild(1);
1613
TR::Node* precisionNode = callNode->getChild(2);
1614
TR::Node* overflowNode = callNode->getChild(3);
1615
1616
// Backing storage info for ByteBuffer
1617
TR::Node * pdBufAddressNodeCopy = NULL;
1618
TR::Node * pdBufCapacityNode = NULL;
1619
TR::Node * pdBufPositionNode = NULL;
1620
1621
TR::TreeTop *slowPathTreeTop = NULL;
1622
TR::TreeTop *fastPathTreeTop = NULL;
1623
TR::Node *slowPathNode = NULL;
1624
TR::Node *pd2iNode = NULL;
1625
TR::TreeTop *copiedCallNodeTreeTop = NULL;
1626
TR::Node *copiedCallNode = NULL;
1627
TR::SymbolReference *newSymbolReference = 0;
1628
TR::TreeTop *bcdchkTreeTop = NULL;
1629
int precision = precisionNode->getInt();
1630
int overflow = overflowNode->getInt();
1631
char* failMsg = NULL;
1632
1633
if (precision < 1)
1634
failMsg = "Invalid precision. Precision can not be less than 1";
1635
else if (isPD2i && precision > 10)
1636
failMsg = "Invalid precision. Precision can not be greater than 10";
1637
else if (!isPD2i && precision > 31)
1638
failMsg = "Invalid precision. Precision can not be greater than 31";
1639
1640
if (failMsg)
1641
{
1642
TR::DebugCounter::incStaticDebugCounter(comp(),
1643
TR::DebugCounter::debugCounterName(comp(),
1644
"DAA/rejected/%s",
1645
isPD2i ? "pd2i" : "pd2l"));
1646
return printInliningStatus(false, callNode, failMsg);
1647
}
1648
1649
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: %s call: %p inlined.\n", (isPD2i)?"generatePD2I":"generatePD2L", callNode))
1650
{
1651
TR::DebugCounter::incStaticDebugCounter(comp(),
1652
TR::DebugCounter::debugCounterName(comp(),
1653
"DAA/inlined/%s",
1654
isPD2i ? "pd2i" : "pd2l"));
1655
if (isByteBuffer)
1656
{
1657
/* We will be creating a precision diamond for the fast / slow path and eliminating the original call.
1658
* Because we are about to split the CFG we would have to store the original parameters of the call into
1659
* temp slots as we will be duplicating the call node in the precision diamond but we don't need to since
1660
* createConditionalBlocksBeforeTree takes care of it. createConditionalBlocksBeforeTree calls block::split
1661
* with true for the option fixupCommoning and so it will break the commoning and add any necessary temps for you.
1662
*/
1663
1664
pdBufAddressNodeCopy = TR::Node::copy(callNode->getChild(4));
1665
pdBufAddressNodeCopy->setReferenceCount(0);
1666
pdBufCapacityNode = callNode->getChild(5);
1667
pdBufPositionNode = callNode->getChild(6);
1668
}
1669
1670
//create the packed decimals
1671
1672
TR::Node * arrayAddressNode = constructAddressNode(callNode, pdInputNode, offsetNode);
1673
1674
int32_t size = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, precision) - 1;
1675
TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode, 8, fe());
1676
TR::Node * pdload = TR::Node::create(TR::pdloadi, 1, arrayAddressNode);
1677
pdload->setSymbolReference(symRef);
1678
pdload->setDecimalPrecision(precision);
1679
1680
TR::TreeTop * prevTT = treeTop->getPrevTreeTop();
1681
TR::TreeTop * nextTT = treeTop->getNextTreeTop();
1682
TR::Node *bcdchk = NULL;
1683
TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();
1684
1685
/* Attaching all the original callNode's children as the children to BCDCHK.
1686
* We don't want to attach the callNode as a child to BCDCHK since it would be an aberration to the
1687
* definition of a BCDCHK node. BCDCHK node is already a special type of node, and all optimizations expect the
1688
* call (pd2i) to be in its first child. Attaching another call could cause many things to break as all
1689
* optimizations such as Value Propagation don't expect it to be there. Attaching the callNode's children
1690
* to BCDCHK would be safe. We would whip up the call with these attached children during codegen
1691
* for the fallback of the fastpath.
1692
*/
1693
if (isByteBuffer)
1694
{
1695
// Tree with pd2i() under BCDCHK node
1696
copiedCallNodeTreeTop = TR::TreeTop::create(comp(),treeTop->getNode()->duplicateTree());
1697
copiedCallNode = copiedCallNodeTreeTop->getNode()->getFirstChild();
1698
bcdchk = TR::Node::createWithSymRef(TR::BCDCHK, 8, 8,
1699
copiedCallNode,
1700
callNode->getChild(0), callNode->getChild(1),
1701
callNode->getChild(2), callNode->getChild(3),
1702
callNode->getChild(4), callNode->getChild(5),
1703
callNode->getChild(6),
1704
bcdChkSymRef);
1705
1706
/*
1707
* BCDCHK would look something like this for ByteBuffer:
1708
*
1709
* n2975n BCDCHK [#959] ()
1710
n2958n pd2i ()
1711
n2949n pdloadi <array-shadow>[#497 Shadow] [flags 0x80000618 0x0 ] <prec=10 (len=6) adj=0> sign=hasState
1712
n2948n l2a
1713
n2947n ladd
1714
n2941n lload <temp slot 29>[#949 Auto] [flags 0x4 0x0 ] // address of ByteBuffer
1715
n2946n i2l
1716
n2945n iadd
1717
n2943n ==>iload // position
1718
n2938n ==>iload // offset
1719
n2937n ==>aload // ByteBuffer
1720
n2938n ==>iload // offset
1721
n2939n iload <temp slot 27>[#947 Auto] [flags 0x3 0x0 ] // precision
1722
n2940n iload <temp slot 28>[#948 Auto] [flags 0x3 0x0 ] // checkOverflow
1723
n2941n ==>lload // address of ByteBuffer
1724
n2942n ==>iload // capacity of ByteBuffer
1725
n2943n ==>iload // position of ByteBuffer
1726
*/
1727
}
1728
else
1729
{
1730
bcdchk = TR::Node::createWithSymRef(TR::BCDCHK, 5, 5,
1731
callNode,
1732
callNode->getChild(0), callNode->getChild(1),
1733
callNode->getChild(2), callNode->getChild(3),
1734
bcdChkSymRef);
1735
/*
1736
* BCDCHK would look something like this for byte[]:
1737
*
1738
* n2937n BCDCHK [#990] ()
1739
n990n pd2i ()
1740
n2929n pdloadi <array-shadow>[#486 Shadow] [flags 0xffffffff80000612 0x0 ] <prec=10 (len=6) adj=0> sign=hasState vn=- sti=- udi=- nc=1
1741
n2928n aladd (internalPtr sharedMemory )
1742
n986n ==>aload
1743
n2927n aladd
1744
n2925n lconst 8
1745
n2926n i2l
1746
n1001n ==>iconst 0
1747
n986n ==>aload // byte[]
1748
n1001n ==>iconst 0 // offset
1749
n1002n iconst 10 (X!=0 X>=0 ) // precision
1750
n1003n iconst 0 (X==0 X>=0 X<=0 ) // checkOverflow
1751
*/
1752
}
1753
1754
// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can
1755
// correctly compute a new CP to relocate DAA OOL calls.
1756
bcdchk->setInlinedSiteIndex(callNode->getInlinedSiteIndex());
1757
1758
TR::DataType dataType = callNode->getDataType();
1759
if (isByteBuffer)
1760
{
1761
TR::CFG *cfg = comp()->getFlowGraph();
1762
TR::Block *callBlock = treeTop->getEnclosingBlock();
1763
1764
// Generate the slow path
1765
slowPathTreeTop = TR::TreeTop::create(comp(),treeTop->getNode()->duplicateTree());
1766
slowPathNode = slowPathTreeTop->getNode()->getFirstChild();
1767
1768
// Generate the tree to check if the ByteBuffer has a valid address or not
1769
TR::Node* nullNode = TR::Node::create(TR::lconst, 0, 0);
1770
TR::Node *isValidAddrNode = TR::Node::createif(TR::iflcmpeq, pdBufAddressNodeCopy, nullNode, treeTop);
1771
TR::TreeTop *isValidAddrTreeTop = TR::TreeTop::create(comp(), isValidAddrNode, NULL, NULL);
1772
1773
bcdchkTreeTop = TR::TreeTop::create(comp(), bcdchk);
1774
fastPathTreeTop = bcdchkTreeTop;
1775
1776
if (callNode->getReferenceCount() > 1)
1777
{
1778
newSymbolReference = comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(), dataType);
1779
TR::Node::recreate(callNode, comp()->il.opCodeForDirectLoad(dataType));
1780
callNode->setSymbolReference(newSymbolReference);
1781
callNode->removeAllChildren();
1782
}
1783
1784
callNode = copiedCallNode;
1785
1786
/* Create the diamond in CFG
1787
* if (ByteBuffer.address != NULL)
1788
* fastpath (CVB instruction executed by HW)
1789
* else
1790
* slowpath (call to Java method: convertPackedDecimalToInteger_)
1791
* */
1792
callBlock->createConditionalBlocksBeforeTree(treeTop, isValidAddrTreeTop, slowPathTreeTop, fastPathTreeTop, cfg, false, true);
1793
}
1794
1795
// we'll be removing the callNode, update its refcount before replacing its fields.
1796
// The callNode may have more than 1 reference (treetop and i/lstore), so we need to scan through its list of children.
1797
for (int32_t childCount = callNode->getNumChildren()-1; childCount >= 0; childCount--)
1798
callNode->getChild(childCount)->recursivelyDecReferenceCount();
1799
1800
// Replacing TT with BCDCHK, so losing one reference.
1801
callNode->decReferenceCount();
1802
1803
//create pd2i:
1804
pd2iNode = callNode;
1805
pd2iNode->setNumChildren(1);
1806
pd2iNode->setAndIncChild(0, pdload);
1807
if (!isByteBuffer)
1808
{
1809
pd2iNode->setSymbolReference(NULL);
1810
}
1811
1812
if (isPD2i)
1813
{
1814
if (!overflow)
1815
TR::Node::recreate(pd2iNode, TR::pd2i);
1816
else
1817
TR::Node::recreate(pd2iNode, TR::pd2iOverflow);
1818
}
1819
else
1820
{
1821
if (!overflow)
1822
TR::Node::recreate(pd2iNode, TR::pd2l);
1823
else
1824
TR::Node::recreate(pd2iNode, TR::pd2lOverflow);
1825
}
1826
1827
1828
if (isByteBuffer)
1829
{
1830
// the original call will be deleted by createConditionalBlocksBeforeTree, but if the refcount was > 1, we need to insert stores.
1831
1832
if (newSymbolReference)
1833
{
1834
/* Storing the result to a temp slot so that it can be loaded from there later
1835
* We would need to store the result to the same temp slot for both fast and slow path so we that
1836
* we get the same result irrespective of the path taken.
1837
* For slowpath: storing the result of icall to temp slot
1838
* For fastpath: storing the result of pd2i() to temp slot
1839
*/
1840
1841
TR::Node *slowStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, slowPathTreeTop->getNode()->getFirstChild(), newSymbolReference);
1842
TR::TreeTop *slowStoreTree = TR::TreeTop::create(comp(), slowStoreNode);
1843
1844
slowPathTreeTop->insertAfter(slowStoreTree);
1845
1846
treeTop->setNode(bcdchk);
1847
TR::Node *fastStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, bcdchkTreeTop->getNode()->getFirstChild(), newSymbolReference);
1848
TR::TreeTop *fastStoreTree = TR::TreeTop::create(comp(), fastStoreNode);
1849
1850
bcdchkTreeTop->insertAfter(fastStoreTree);
1851
}
1852
else
1853
{
1854
treeTop->setNode(bcdchk);
1855
}
1856
}
1857
else
1858
{
1859
treeTop->setNode(bcdchk);
1860
prevTT->join(treeTop);
1861
treeTop->join(nextTT);
1862
}
1863
1864
return true;
1865
}
1866
return false;
1867
}
1868
1869
TR::Node*
1870
TR_DataAccessAccelerator::restructureVariablePrecisionCallNode(TR::TreeTop* treeTop, TR::Node* callNode)
1871
{
1872
uint32_t numCallParam = callNode->getNumChildren();
1873
TR::SymbolReferenceTable* symRefTab = comp()->getSymRefTab();
1874
TR::ResolvedMethodSymbol* methodSym = comp()->getMethodSymbol();
1875
1876
for(uint32_t i = 0; i < numCallParam; ++i)
1877
{
1878
TR::Node* child = callNode->getChild(i);
1879
TR::SymbolReference* newTemp = symRefTab->createTemporary(methodSym, child->getDataType());
1880
treeTop->insertBefore(TR::TreeTop::create(comp(), TR::Node::createStore(newTemp, child)));
1881
child->decReferenceCount();
1882
callNode->setAndIncChild(i, TR::Node::createLoad(child, newTemp));
1883
}
1884
1885
return callNode;
1886
}
1887
1888
bool
1889
TR_DataAccessAccelerator::generatePD2IVariableParameter(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2i, bool isByteBuffer)
1890
{
1891
TR::Node* precisionNode = callNode->getChild(2);
1892
1893
if (comp()->getOption(TR_DisableVariablePrecisionDAA) ||
1894
!performTransformation(comp(), "O^O TR_DataAccessAccelerator: [DAA] Generating variable %s for node %p \n", isPD2i ? "PD2I" : "PD2L", callNode))
1895
{
1896
TR::DebugCounter::incStaticDebugCounter(comp(),
1897
TR::DebugCounter::debugCounterName(comp(),
1898
"DAA/rejected/%s",
1899
isPD2i ? "var-pd2i" : "var-pd2l"));
1900
return false;
1901
}
1902
1903
TR::DebugCounter::incStaticDebugCounter(comp(),
1904
TR::DebugCounter::debugCounterName(comp(),
1905
"DAA/inlined/%s",
1906
isPD2i ? "var-pd2i" : "var-pd2l"));
1907
// We will be creating a precision diamond for the fast / slow path and eliminating the original call.
1908
// Because we are about to split the CFG we must store the original parameters of the call into temp slots
1909
// as we will be duplicating the call node in the precision diamond. We cannot duplicate the parameters
1910
// because tree duplication breaks commoning, and thus we want to avoid a situation where a commoned reference
1911
// to a newarray node gets duplicated and uncommoned.
1912
callNode = restructureVariablePrecisionCallNode(treeTop, callNode);
1913
1914
// Duplicate two trees for the precision diamond
1915
TR::Node* slowNode = callNode->duplicateTree();
1916
TR::Node* fastNode = callNode->duplicateTree();
1917
1918
// Create the corresponding treetops
1919
TR::TreeTop* slowTT = TR::TreeTop::create(comp(), TR::Node::create(TR::treetop, 1, slowNode));
1920
TR::TreeTop* fastTT = TR::TreeTop::create(comp(), TR::Node::create(TR::treetop, 1, fastNode));
1921
1922
// We mark the slow path with a flag to prevent this optimization to recurse on the slow path
1923
slowNode->setDAAVariableSlowCall(true);
1924
1925
// Create the precision test diamond
1926
createPrecisionDiamond(comp(), treeTop, fastTT, slowTT, isPD2i, 1, precisionNode);
1927
1928
// Fix up any references to the original call
1929
if (callNode->getReferenceCount() > 0)
1930
{
1931
// Create a temp variable to store the result of the call
1932
TR::SymbolReference* temp = comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(),
1933
callNode->getDataType());
1934
1935
TR::TreeTop* slowStore = TR::TreeTop::create(comp(), TR::Node::createStore(temp, slowNode));
1936
TR::TreeTop* fastStore = TR::TreeTop::create(comp(), TR::Node::createStore(temp, fastNode));
1937
1938
slowStore->join(slowTT->getNextTreeTop());
1939
fastStore->join(fastTT->getNextTreeTop());
1940
1941
slowTT->join(slowStore);
1942
fastTT->join(fastStore);
1943
1944
// Replacing original call with a load, so remove all children
1945
callNode->removeAllChildren();
1946
1947
// Update the op code to the correct type and make it reference the temp variable
1948
TR::Node::recreate(callNode, comp()->il.opCodeForDirectLoad(callNode->getDataType()));
1949
callNode->setSymbolReference(temp);
1950
}
1951
1952
// Create BCDCHK node
1953
TR::SymbolReference* bcdChkSymRef = fastNode->getSymbolReference();
1954
TR::Node* pdAddressNode = constructAddressNode(fastNode, fastNode->getChild(0), fastNode->getChild(1));
1955
TR::Node* bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 2, 2, fastNode, pdAddressNode, bcdChkSymRef);
1956
fastTT->setNode(bcdchkNode);
1957
1958
// TreeTop replaced by BCDCHK, so we lose 1 reference
1959
fastNode->decReferenceCount();
1960
1961
return true;
1962
}
1963
1964
bool TR_DataAccessAccelerator::generatePD2I(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2i, bool isByteBuffer)
1965
{
1966
// Check if the current call node is the slow part of a previous variable precision DAA optimization
1967
if (callNode->isDAAVariableSlowCall())
1968
{
1969
return false;
1970
}
1971
1972
TR_ASSERT(!IS_VARIABLE_PD2I(callNode), "Variable PD2I should not be handled here.");
1973
1974
return generatePD2IConstantParameter(treeTop, callNode, isPD2i, isByteBuffer);
1975
}
1976
1977
bool TR_DataAccessAccelerator::genArithmeticIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode, TR::ILOpCodes opCode)
1978
{
1979
if (!isChildConst(callNode, 2) || !isChildConst(callNode, 5) ||
1980
!isChildConst(callNode, 8) || !isChildConst(callNode, 9))
1981
{
1982
return printInliningStatus(false, callNode, "Child (2|5|8|9) is not constant");
1983
}
1984
1985
TR_ASSERT(callNode->getNumChildren() == 10, "Expecting BCD arithmetics call with 10 children.");
1986
1987
TR::Node* resultNode = callNode->getChild(0);
1988
TR::Node* resOffsetNode = callNode->getChild(1);
1989
TR::Node* resPrecNode = callNode->getChild(2);
1990
TR::Node* input1Node = callNode->getChild(3);
1991
TR::Node* offset1Node = callNode->getChild(4);
1992
TR::Node* prec1Node = callNode->getChild(5);
1993
TR::Node* input2Node = callNode->getChild(6);
1994
TR::Node* offset2Node = callNode->getChild(7);
1995
TR::Node* prec2Node = callNode->getChild(8);
1996
TR::Node* overflowNode = callNode->getChild(9);
1997
1998
int precision1 = prec1Node->getInt();
1999
int precision2 = prec2Node->getInt();
2000
int precisionResult = resPrecNode->getInt();
2001
int isCheckOverflow = overflowNode->getInt();
2002
2003
if (precision1 < 1 || precision1 > 15 ||
2004
precision2 < 1 || precision2> 15 ||
2005
precisionResult < 1 || precisionResult > 15)
2006
{
2007
return printInliningStatus(false, callNode, "Invalid precisions. Valid precisions are in range [1, 15]");
2008
}
2009
2010
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: genArithmetics call: %p inlined, with opcode:%d \n", callNode, opCode))
2011
{
2012
//create loading
2013
// loading The first operand
2014
TR::Node * arrayAddressNodeA = constructAddressNode(callNode, input1Node, offset1Node);
2015
TR::SymbolReference * symRef1 = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNodeA, 8, fe());
2016
symRef1->setOffset(0);
2017
2018
TR::Node * pdloadA = TR::Node::create(TR::pdloadi, 1, arrayAddressNodeA);
2019
pdloadA->setSymbolReference(symRef1);
2020
pdloadA->setDecimalPrecision(precision1);
2021
2022
//load the second operand
2023
TR::Node * arrayAddressNodeB = constructAddressNode(callNode, input2Node, offset2Node);
2024
TR::SymbolReference * symRef2 = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNodeB, 8, fe());
2025
symRef2->setOffset(0);
2026
2027
TR::Node * pdloadB = TR::Node::create(TR::pdloadi, 1, arrayAddressNodeB);
2028
pdloadB->setSymbolReference(symRef2);
2029
pdloadB->setDecimalPrecision(precision2);
2030
2031
// create actual arithmetic operation.
2032
TR::Node * operationNode = TR::Node::create(opCode, 2, pdloadA, pdloadB);
2033
2034
switch(opCode)
2035
{
2036
case TR::pdadd:
2037
case TR::pdsub:
2038
operationNode->setDecimalPrecision(((precision1 > precision2) ? precision1 : precision2) + 1);
2039
break;
2040
case TR::pdmul:
2041
operationNode->setDecimalPrecision(precision1 + precision2); //TODO: check +1. +1 because pdshlOverflow will do the overflow check
2042
break;
2043
case TR::pddiv:
2044
operationNode->setDecimalPrecision(precision1);
2045
break;
2046
case TR::pdrem:
2047
operationNode->setDecimalPrecision((precision1 < precision2) ? precision1 : precision2);
2048
break;
2049
default:
2050
TR_ASSERT(0, "Unsupported DAA arithmetics opCode");
2051
break;
2052
}
2053
2054
/*
2055
* Resulting tree
2056
*
2057
* BCDCHK
2058
* pdshlOverflow
2059
* operationNode
2060
* pdLoadA
2061
* pdLoadB
2062
* iconst 0
2063
* arrayAddressNode
2064
* call-param-1
2065
* ...
2066
* call-param-9
2067
*
2068
* pdstorei
2069
* pdStoreAddressNode
2070
* => pdshlOverflow
2071
*
2072
* Create separate address nodes for BCDCHK and pdstorei. See generateI2PD() for an explanation to this.
2073
*/
2074
TR::Node* outOfLineCopyBackAddr = constructAddressNode(callNode, resultNode, resOffsetNode);
2075
TR::Node* pdStoreAddressNode = constructAddressNode(callNode, resultNode, resOffsetNode);
2076
2077
TR::ILOpCodes op = comp()->il.opCodeForIndirectStore(TR::PackedDecimal);
2078
TR::SymbolReference * symRefPdstore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, outOfLineCopyBackAddr, 8, fe());
2079
TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), TR::PackedDecimal, TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, resPrecNode->getInt()));
2080
symStore->setArrayShadowSymbol();
2081
symRefPdstore->setSymbol(symStore);
2082
2083
TR::Node * pdshlNode = TR::Node::create(TR::pdshlOverflow, 2, operationNode,
2084
TR::Node::create(callNode, TR::iconst, 0, 0));
2085
pdshlNode->setDecimalPrecision(resPrecNode->getInt());
2086
TR::Node * pdstore = TR::Node::create(op, 2, pdStoreAddressNode, pdshlNode);
2087
2088
TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();
2089
TR::Node * bcdchk = TR::Node::createWithSymRef(TR::BCDCHK, 12, 12,
2090
pdshlNode, outOfLineCopyBackAddr,
2091
callNode->getChild(0), callNode->getChild(1),
2092
callNode->getChild(2), callNode->getChild(3),
2093
callNode->getChild(4), callNode->getChild(5),
2094
callNode->getChild(6), callNode->getChild(7),
2095
callNode->getChild(8), callNode->getChild(9),
2096
bcdChkSymRef);
2097
2098
// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can
2099
// correctly compute a new CP to relocate DAA OOL calls.
2100
bcdchk->setInlinedSiteIndex(callNode->getInlinedSiteIndex());
2101
2102
pdstore->setSymbolReference(symRefPdstore);
2103
pdstore->setDecimalPrecision(resPrecNode->getInt());
2104
2105
TR::TreeTop * ttPdstore = TR::TreeTop::create(comp(), pdstore);
2106
2107
// Join treetops:
2108
TR::TreeTop * nextTT = treeTop->getNextTreeTop();
2109
TR::TreeTop * prevTT = treeTop->getPrevTreeTop();
2110
treeTop->setNode(bcdchk);
2111
prevTT->join(treeTop);
2112
treeTop->join(ttPdstore);
2113
ttPdstore->join(nextTT);
2114
2115
callNode->recursivelyDecReferenceCount();
2116
2117
return printInliningStatus(true, callNode);
2118
}
2119
return false;
2120
}
2121
2122
bool TR_DataAccessAccelerator::genShiftRightIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode)
2123
{
2124
TR::Node * dstNode = callNode->getChild(0);
2125
TR::Node * dstOffsetNode = callNode->getChild(1);
2126
TR::Node * dstPrecNode = callNode->getChild(2);
2127
2128
TR::Node * srcNode = callNode->getChild(3);
2129
TR::Node * srcOffsetNode = callNode->getChild(4);
2130
TR::Node * srcPrecNode = callNode->getChild(5);
2131
2132
TR::Node * shiftNode = callNode->getChild(6);
2133
TR::Node * roundNode = callNode->getChild(7);
2134
TR::Node * overflowNode = callNode->getChild(8);
2135
2136
int srcPrec = srcPrecNode->getInt();
2137
int dstPrec = dstPrecNode->getInt();
2138
2139
int shiftAmount = shiftNode->getInt();
2140
int isRound = roundNode->getInt();
2141
int isCheckOverflow = overflowNode->getInt();
2142
char* failMsg = NULL;
2143
2144
if (!isChildConst(callNode, 2) || !isChildConst(callNode, 5) ||
2145
!isChildConst(callNode, 7) || !isChildConst(callNode, 8))
2146
failMsg = "Child (2|5|7|8) is not constant";
2147
else if (srcPrec < 1)
2148
failMsg = "Invalid precision. Source precision can not be less than 1";
2149
else if (dstPrec < 1)
2150
failMsg = "Invalid precision. Destination precision can not be less than 1";
2151
else if (srcPrec > 15)
2152
failMsg = "Invalid precision. Source precision can not be greater than 15";
2153
else if (dstPrec > 15)
2154
failMsg = "Invalid precision. Destination precision can not be greater than 15";
2155
else if (dstPrec < srcPrec - shiftAmount)
2156
failMsg = "Invalid shift amount. Precision is too low to contain shifted Packed Decimal";
2157
2158
if (!performTransformation(comp(), "O^O TR_DataAccessAccelerator: genShiftRight call: %p inlined.\n", callNode) && !failMsg)
2159
failMsg = "Not allowed";
2160
2161
if (failMsg)
2162
{
2163
TR::DebugCounter::incStaticDebugCounter(comp(),
2164
TR::DebugCounter::debugCounterName(comp(),
2165
"DAA/rejected/shr"));
2166
return printInliningStatus(false, callNode, failMsg);
2167
}
2168
2169
TR::DebugCounter::incStaticDebugCounter(comp(),
2170
TR::DebugCounter::debugCounterName(comp(),
2171
"DAA/inlined/shr"));
2172
2173
//gen source pdload
2174
TR::Node * arrayAddressNode = constructAddressNode(callNode, srcNode, srcOffsetNode);
2175
2176
TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode, 8, fe());
2177
symRef->setOffset(0);
2178
2179
//gen pdshr:
2180
TR::Node * roundValueNode = TR::Node::create(callNode, TR::iconst, 0, isRound ? 5 : 0);
2181
TR::Node * outOfLineCopyBackAddr = constructAddressNode(callNode, dstNode, dstOffsetNode);
2182
TR::Node * pdStoreAddressNode = constructAddressNode(callNode, dstNode, dstOffsetNode);
2183
2184
TR::Node * pdload = TR::Node::create(TR::pdloadi, 1, arrayAddressNode);
2185
pdload->setSymbolReference(symRef);
2186
pdload->setDecimalPrecision(srcPrec);
2187
2188
TR::Node * pdshrNode = TR::Node::create(TR::pdshr, 3, pdload, shiftNode, roundValueNode);
2189
pdshrNode->setDecimalPrecision(dstPrec);
2190
2191
TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();
2192
TR::Node* bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 11, 11,
2193
pdshrNode, outOfLineCopyBackAddr,
2194
callNode->getChild(0), callNode->getChild(1),
2195
callNode->getChild(2), callNode->getChild(3),
2196
callNode->getChild(4), callNode->getChild(5),
2197
callNode->getChild(6), callNode->getChild(7),
2198
callNode->getChild(8),
2199
bcdChkSymRef);
2200
2201
// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can
2202
// correctly compute a new CP to relocate DAA OOL calls.
2203
bcdchkNode->setInlinedSiteIndex(callNode->getInlinedSiteIndex());
2204
2205
TR::ILOpCodes op = comp()->il.opCodeForIndirectStore(TR::PackedDecimal);
2206
TR::SymbolReference * symRefPdstore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, outOfLineCopyBackAddr, 8, fe());
2207
TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), TR::PackedDecimal, TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, dstPrec));
2208
symStore->setArrayShadowSymbol();
2209
symRefPdstore->setSymbol(symStore);
2210
2211
TR::Node * pdstore = TR::Node::create(op, 2, pdStoreAddressNode, pdshrNode);
2212
2213
pdstore->setSymbolReference(symRefPdstore);
2214
pdstore->setDecimalPrecision(dstPrec);
2215
2216
TR::TreeTop * pdstoreNodeTT = TR::TreeTop::create(comp(), pdstore);
2217
2218
//link them together:
2219
TR::TreeTop * prevTT = treeTop->getPrevTreeTop();
2220
TR::TreeTop * nextTT = treeTop->getNextTreeTop();
2221
2222
prevTT->join(treeTop);
2223
treeTop->setNode(bcdchkNode);
2224
treeTop->join(pdstoreNodeTT);
2225
pdstoreNodeTT->join(nextTT);
2226
2227
callNode->recursivelyDecReferenceCount();
2228
return printInliningStatus(true, callNode);
2229
}
2230
2231
bool TR_DataAccessAccelerator::genShiftLeftIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode)
2232
{
2233
2234
2235
TR::Node * dstNode = callNode->getChild(0);
2236
TR::Node * dstOffsetNode = callNode->getChild(1);
2237
TR::Node * dstPrecNode = callNode->getChild(2);
2238
2239
TR::Node * srcNode = callNode->getChild(3);
2240
TR::Node * srcOffsetNode = callNode->getChild(4);
2241
TR::Node * srcPrecNode = callNode->getChild(5);
2242
2243
TR::Node * shiftNode = callNode->getChild(6);
2244
2245
int srcPrec = srcPrecNode->getInt();
2246
int dstPrec = dstPrecNode->getInt();
2247
int shiftAmount = shiftNode->getInt();
2248
char* failMsg = NULL;
2249
2250
if (!isChildConst(callNode, 2) || !isChildConst(callNode, 5) ||
2251
!isChildConst(callNode, 6) || !isChildConst(callNode, 7))
2252
failMsg = "Child (2|5|6|7) is not constant";
2253
else if (srcPrec < 1)
2254
failMsg = "Invalid precision. Source precision can not be less than 1";
2255
else if (dstPrec < 1)
2256
failMsg = "Invalid precision. Destination precision can not be less than 1";
2257
else if (srcPrec > 15)
2258
failMsg = "Invalid precision. Source precision can not be greater than 15";
2259
else if (dstPrec > 15)
2260
failMsg = "Invalid precision. Destination precision can not be greater than 15";
2261
else if (shiftAmount < 0)
2262
failMsg = "Invalid shift amount. Shift amount can not be less than 0";
2263
2264
if (!performTransformation(comp(), "O^O TR_DataAccessAccelerator: genShiftLeft call: %p inlined.\n", callNode) && !failMsg)
2265
failMsg = "Not allowed";
2266
2267
if (failMsg)
2268
{
2269
TR::DebugCounter::incStaticDebugCounter(comp(),
2270
TR::DebugCounter::debugCounterName(comp(),
2271
"DAA/rejected/shl"));
2272
return printInliningStatus(false, callNode, failMsg);
2273
}
2274
2275
TR::DebugCounter::incStaticDebugCounter(comp(),
2276
TR::DebugCounter::debugCounterName(comp(),
2277
"DAA/inlined/shl"));
2278
2279
TR::Node* srcAddrNode = constructAddressNode(callNode, srcNode, srcOffsetNode);
2280
TR::Node* outOfLineCopyBackAddr = constructAddressNode(callNode, dstNode, dstOffsetNode);
2281
TR::Node* pdStoreAddrNode = constructAddressNode(callNode, dstNode, dstOffsetNode);
2282
2283
//pdload:
2284
TR::Node * pdload = TR::Node::create(TR::pdloadi, 1, srcAddrNode);
2285
TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, srcAddrNode, 8, fe());
2286
symRef->setOffset(0);
2287
pdload->setSymbolReference(symRef);
2288
pdload->setDecimalPrecision(srcPrec);
2289
2290
// Always use BCDCHK node for exception handling (invalid digits/sign).
2291
TR::Node * pdshlNode = TR::Node::create(TR::pdshlOverflow, 2, pdload, shiftNode);
2292
pdshlNode->setDecimalPrecision(dstPrec);
2293
2294
TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();
2295
TR::Node* bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 10, 10,
2296
pdshlNode, outOfLineCopyBackAddr,
2297
callNode->getChild(0), callNode->getChild(1),
2298
callNode->getChild(2), callNode->getChild(3),
2299
callNode->getChild(4), callNode->getChild(5),
2300
callNode->getChild(6), callNode->getChild(7),
2301
bcdChkSymRef);
2302
2303
// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can
2304
// correctly compute a new CP to relocate DAA OOL calls.
2305
bcdchkNode->setInlinedSiteIndex(callNode->getInlinedSiteIndex());
2306
2307
//following pdstore
2308
TR::ILOpCodes op = comp()->il.opCodeForIndirectStore(TR::PackedDecimal);
2309
TR::SymbolReference * symRefPdstore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, outOfLineCopyBackAddr, 8, fe());
2310
TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), TR::PackedDecimal, TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, dstPrec));
2311
symStore->setArrayShadowSymbol();
2312
symRefPdstore->setSymbol(symStore);
2313
2314
TR::Node * pdstore = TR::Node::create(op, 2, pdStoreAddrNode, pdshlNode);
2315
pdstore->setSymbolReference(symRefPdstore);
2316
pdstore->setDecimalPrecision(dstPrec);
2317
2318
//gen treeTop tops
2319
TR::TreeTop * nextTT = treeTop->getNextTreeTop();
2320
TR::TreeTop * prevTT = treeTop->getPrevTreeTop();
2321
TR::TreeTop * pdstoreTT = TR::TreeTop::create(comp(), pdstore);
2322
2323
prevTT->join(treeTop);
2324
treeTop->setNode(bcdchkNode);
2325
treeTop->join(pdstoreTT);
2326
pdstoreTT->join(nextTT);
2327
2328
callNode->recursivelyDecReferenceCount();
2329
return printInliningStatus(true, callNode);
2330
}
2331
2332
bool TR_DataAccessAccelerator::generateUD2PD(TR::TreeTop* treeTop, TR::Node* callNode, bool isUD2PD)
2333
{
2334
TR::Node * decimalNode = callNode->getChild(0);
2335
TR::Node * decimalOffsetNode = callNode->getChild(1);
2336
TR::Node * pdNode = callNode->getChild(2);
2337
TR::Node * pdOffsetNode = callNode->getChild(3);
2338
TR::Node * precNode = callNode->getChild(4);
2339
TR::Node * typeNode = callNode->getChild(5);
2340
2341
//first, check decimalType
2342
int type = typeNode->getInt();
2343
int prec = precNode->getInt();
2344
char* failMsg = NULL;
2345
2346
if (!isChildConst(callNode, 4) || !isChildConst(callNode, 5))
2347
failMsg = "Child (4|5) is not constant";
2348
else if (isUD2PD && type != 5 && type != 6 && type != 7)
2349
failMsg = "Invalid decimal type. Supported types are (5|6|7)";
2350
else if (!isUD2PD && (type < 1 || type > 4))
2351
failMsg = "Invalid decimal type. Supported types are (1|2|3|4)";
2352
else if (prec < 1 || prec > 31)
2353
failMsg = "Invalid precision. Valid precision is in range [1, 31]";
2354
2355
if (failMsg)
2356
{
2357
TR::DebugCounter::incStaticDebugCounter(comp(),
2358
TR::DebugCounter::debugCounterName(comp(),
2359
"DAA/rejected/ud2pd"));
2360
2361
return printInliningStatus(false, callNode, failMsg);
2362
}
2363
2364
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: generate UD2PD/ED2PD call: %p inlined.\n", callNode))
2365
{
2366
TR::DebugCounter::incStaticDebugCounter(comp(),
2367
TR::DebugCounter::debugCounterName(comp(),
2368
"DAA/inlined/ud2pd"));
2369
2370
TR::ILOpCodes loadOp;
2371
TR::DataType dt = TR::DataTypes::NoType; //unicode data type, as it could be unsigned decimal, sign trailing or sign leading.
2372
2373
switch (type)
2374
{
2375
case 1:
2376
loadOp = TR::zdloadi;
2377
dt = TR::ZonedDecimal;
2378
break;
2379
case 2:
2380
loadOp = TR::zdsleLoadi;
2381
dt = TR::ZonedDecimalSignLeadingEmbedded;
2382
break;
2383
case 3:
2384
loadOp = TR::zdstsLoadi;
2385
dt = TR::ZonedDecimalSignTrailingSeparate;
2386
break;
2387
case 4:
2388
loadOp = TR::zdslsLoadi;
2389
dt = TR::ZonedDecimalSignLeadingSeparate;
2390
break;
2391
case 5:
2392
loadOp = TR::udLoadi;
2393
dt = TR::UnicodeDecimal;
2394
break;
2395
case 6:
2396
loadOp = TR::udslLoadi;
2397
dt = TR::UnicodeDecimalSignLeading;
2398
break;
2399
case 7:
2400
loadOp = TR::udstLoadi;
2401
dt = TR::UnicodeDecimalSignTrailing;
2402
break;
2403
default:
2404
TR_ASSERT(false, "illegal decimalType.\n");
2405
}
2406
2407
//create decimalload
2408
TR::Node * decimalAddressNode;
2409
int offset = decimalOffsetNode->getInt();
2410
TR::Node * twoConstNode;
2411
TR::Node * multipliedOffsetNode;
2412
TR::Node * totalOffsetNode;
2413
TR::Node * headerConstNode;
2414
if (comp()->target().is64Bit())
2415
{
2416
headerConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);
2417
headerConstNode->setLongInt(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
2418
twoConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);
2419
twoConstNode->setLongInt(isUD2PD ? 2 : 1);
2420
multipliedOffsetNode = TR::Node::create(TR::lmul, 2,
2421
TR::Node::create(TR::i2l, 1, decimalOffsetNode), twoConstNode);
2422
totalOffsetNode = TR::Node::create(TR::ladd, 2, headerConstNode, multipliedOffsetNode);
2423
decimalAddressNode = TR::Node::create(TR::aladd, 2, decimalNode, totalOffsetNode);
2424
}
2425
else
2426
{
2427
headerConstNode = TR::Node::create(callNode, TR::iconst, 0,
2428
TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
2429
twoConstNode = TR::Node::create(callNode, TR::iconst, 0, isUD2PD ? 2 : 1);
2430
multipliedOffsetNode = TR::Node::create(TR::imul, 2, decimalOffsetNode, twoConstNode);
2431
totalOffsetNode = TR::Node::create(TR::iadd, 2, headerConstNode, multipliedOffsetNode);
2432
decimalAddressNode = TR::Node::create(TR::aiadd, 2, decimalNode, totalOffsetNode);
2433
}
2434
2435
decimalAddressNode->setIsInternalPointer(true);
2436
TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(dt, decimalAddressNode, 8, fe());
2437
symRef->setOffset(0);
2438
2439
TR::Node * decimalload = TR::Node::create(loadOp, 1, decimalAddressNode);
2440
decimalload->setSymbolReference(symRef);
2441
decimalload->setDecimalPrecision(prec);
2442
2443
//create PDaddress
2444
TR::Node * pdAddressNode = constructAddressNode(callNode, pdNode, pdOffsetNode);
2445
2446
int elementSize = isUD2PD ? TR::DataType::getUnicodeElementSize() : TR::DataType::getZonedElementSize();
2447
2448
//bound values
2449
int pdPrecSize = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, prec) - 1;
2450
int decimalPrecSize = (TR::DataType::getSizeFromBCDPrecision(dt, prec) / elementSize) - 1;
2451
TR::Node * pdBndvalue = TR::Node::create(TR::iadd, 2,
2452
pdOffsetNode,
2453
TR::Node::create(callNode, TR::iconst, 0, pdPrecSize));
2454
TR::Node * decimalBndvalue = TR::Node::create(TR::iadd, 2,
2455
decimalOffsetNode,
2456
TR::Node::create(callNode, TR::iconst, 0, decimalPrecSize)); //size of unicode is 2 bytes
2457
2458
//create ud2pd
2459
TR::ILOpCodes op = TR::BadILOp;
2460
TR::ILOpCodes interOp = TR::BadILOp;
2461
switch (type)
2462
{
2463
case 1:
2464
op = TR::zd2pd;
2465
break;
2466
case 2:
2467
interOp = TR::zdsle2zd;
2468
op = TR::zd2pd;
2469
break;
2470
case 3:
2471
interOp = TR::zdsts2zd;
2472
op = TR::zd2pd;
2473
break;
2474
case 4:
2475
interOp = TR::zdsls2zd;
2476
op = TR::zd2pd;
2477
break;
2478
case 5:
2479
op = TR::ud2pd;
2480
break;
2481
case 6:
2482
op = TR::udsl2pd;
2483
break;
2484
case 7:
2485
op = TR::udst2pd;
2486
break;
2487
default:
2488
TR_ASSERT(false, "illegal decimalType.\n");
2489
}
2490
2491
TR::Node * decimal2pdNode = NULL;
2492
if (isUD2PD || type == 1)
2493
{
2494
//for converting zd to pd (here type == 1), dont need the additional intermediate conversion
2495
decimal2pdNode = TR::Node::create(op, 1, decimalload);
2496
}
2497
else //ED2PD, needs intermediate conversion
2498
{
2499
TR::Node * decimal2zdNode = TR::Node::create(interOp, 1, decimalload);
2500
decimal2zdNode->setDecimalPrecision(prec);
2501
decimal2pdNode = TR::Node::create(op, 1, decimal2zdNode);
2502
}
2503
decimal2pdNode->setDecimalPrecision(prec);
2504
2505
//create pdstore
2506
TR::Node * pdstoreNode = TR::Node::create(TR::pdstorei, 2, pdAddressNode, decimal2pdNode);
2507
TR::SymbolReference * symRefStore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, pdAddressNode, 8, fe());
2508
TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), TR::PackedDecimal, TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, prec));
2509
symRefStore->setSymbol(symStore);
2510
2511
pdstoreNode->setSymbolReference(symRefStore);
2512
pdstoreNode->setDecimalPrecision(prec);
2513
2514
//set up bndchks, and null chks
2515
TR::Node * pdPassThroughNode = TR::Node::create(TR::PassThrough, 1, pdNode);
2516
TR::Node * decimalPassThroughNode = TR::Node::create(TR::PassThrough, 1, decimalNode);
2517
2518
TR::Node * pdArrayLengthNode = TR::Node::create(TR::arraylength, 1, pdNode);
2519
TR::Node * decimalArrayLengthNode = TR::Node::create(TR::arraylength, 1, decimalNode);
2520
2521
TR::Node * pdNullChkNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, pdPassThroughNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_nullCheck, false, true, true));
2522
TR::Node * decimalNullChkNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, decimalPassThroughNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_nullCheck, false, true, true));
2523
2524
TR::Node * pdBndChk = TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, pdArrayLengthNode, pdOffsetNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));
2525
TR::Node * pdBndChk2 =TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, pdArrayLengthNode, pdBndvalue, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));
2526
TR::Node * decimalBndChk = TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, decimalArrayLengthNode, decimalOffsetNode,
2527
getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));
2528
TR::Node * decimalBndChk2 =TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, decimalArrayLengthNode, decimalBndvalue,
2529
getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));
2530
2531
//gen tree tops
2532
TR::TreeTop * pdNullChktt = TR::TreeTop::create(comp(), pdNullChkNode);
2533
TR::TreeTop * decimalNullChktt = TR::TreeTop::create(comp(), decimalNullChkNode);
2534
2535
TR::TreeTop * pdBndChktt1 = TR::TreeTop::create(comp(), pdBndChk);
2536
TR::TreeTop * pdBndChktt2 = TR::TreeTop::create(comp(), pdBndChk2);
2537
TR::TreeTop * decimalBndChktt1 = TR::TreeTop::create(comp(), decimalBndChk );
2538
TR::TreeTop * decimalBndChktt2 = TR::TreeTop::create(comp(), decimalBndChk2);
2539
2540
TR::TreeTop * ttPdstore = TR::TreeTop::create(comp(), pdstoreNode);
2541
2542
2543
//link together
2544
TR::TreeTop * nextTT = treeTop->getNextTreeTop();
2545
TR::TreeTop * prevTT = treeTop->getPrevTreeTop();
2546
2547
prevTT->join(decimalNullChktt);
2548
decimalNullChktt->join(pdNullChktt);
2549
pdNullChktt->join(decimalBndChktt1);
2550
decimalBndChktt1->join(decimalBndChktt2);
2551
decimalBndChktt2->join(pdBndChktt1);
2552
pdBndChktt1->join(pdBndChktt2);
2553
pdBndChktt2->join(ttPdstore);
2554
ttPdstore->join(nextTT);
2555
2556
callNode->recursivelyDecReferenceCount();
2557
return true;
2558
}
2559
2560
return false;
2561
}
2562
2563
bool TR_DataAccessAccelerator::generatePD2UD(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2UD)
2564
{
2565
TR::Node * pdNode = callNode->getChild(0);
2566
TR::Node * pdOffsetNode = callNode->getChild(1);
2567
TR::Node * decimalNode = callNode->getChild(2);
2568
TR::Node * decimalOffsetNode = callNode->getChild(3);
2569
TR::Node * precNode = callNode->getChild(4);
2570
TR::Node * typeNode = callNode->getChild(5);
2571
2572
//first, check decimalType
2573
int type = typeNode->getInt();
2574
char* failMsg = NULL;
2575
int prec = precNode->getInt();
2576
2577
if (!isChildConst(callNode, 4) || !isChildConst(callNode, 5))
2578
failMsg = "Child (4|5) is not constant";
2579
else if (isPD2UD && type != 5 && type != 6 && type != 7)
2580
failMsg = "Invalid decimal type. Supported types are (5|6|7)";
2581
else if (!isPD2UD && (type < 1 || type > 4)) //PD2ED
2582
failMsg = "Invalid decimal type. Supported types are (1|2|3|4)";
2583
else if (prec < 1 || prec > 31)
2584
failMsg = "Invalid precision. Valid precision is in range [1, 31]";
2585
2586
if (failMsg)
2587
{
2588
TR::DebugCounter::incStaticDebugCounter(comp(),
2589
TR::DebugCounter::debugCounterName(comp(),
2590
"DAA/rejected/%s",
2591
isPD2UD ? "pd2ud" : "pd2ed"));
2592
2593
return printInliningStatus(false, callNode, failMsg);
2594
}
2595
2596
if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: generate PD2UD/PD2ED call: %p inlined.\n", callNode))
2597
{
2598
TR::DebugCounter::incStaticDebugCounter(comp(),
2599
TR::DebugCounter::debugCounterName(comp(),
2600
"DAA/inlined/%s",
2601
isPD2UD ? "pd2ud" : "pd2ed"));
2602
2603
//set up pdload:
2604
TR::Node * arrayAddressNode = constructAddressNode(callNode, pdNode, pdOffsetNode);
2605
2606
int size = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, prec) - 1;
2607
TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode, 8, fe());
2608
symRef->setOffset(0);
2609
2610
TR::Node * pdload = TR::Node::create(TR::pdloadi, 1, arrayAddressNode);
2611
pdload->setSymbolReference(symRef);
2612
pdload->setDecimalPrecision(prec);
2613
2614
//set up decimal arrayAddressNode
2615
TR::Node * decimalAddressNode;
2616
{
2617
TR::Node * twoConstNode;
2618
TR::Node * multipliedOffsetNode;
2619
TR::Node * totalOffsetNode;
2620
TR::Node * headerConstNode;
2621
if (comp()->target().is64Bit())
2622
{
2623
headerConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);
2624
headerConstNode->setLongInt(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
2625
twoConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);
2626
twoConstNode->setLongInt(isPD2UD ? 2 : 1);
2627
multipliedOffsetNode = TR::Node::create(TR::lmul, 2,
2628
TR::Node::create(TR::i2l, 1, decimalOffsetNode), twoConstNode);
2629
totalOffsetNode = TR::Node::create(TR::ladd, 2, headerConstNode, multipliedOffsetNode);
2630
decimalAddressNode = TR::Node::create(TR::aladd, 2, decimalNode, totalOffsetNode);
2631
}
2632
else
2633
{
2634
headerConstNode = TR::Node::create(callNode, TR::iconst, 0,
2635
TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
2636
twoConstNode = TR::Node::create(callNode, TR::iconst, 0, isPD2UD ? 2 : 1);
2637
multipliedOffsetNode = TR::Node::create(TR::imul, 2, decimalOffsetNode, twoConstNode);
2638
totalOffsetNode = TR::Node::create(TR::iadd, 2, headerConstNode, multipliedOffsetNode);
2639
decimalAddressNode = TR::Node::create(TR::aiadd, 2, decimalNode, totalOffsetNode);
2640
}
2641
decimalAddressNode->setIsInternalPointer(true);
2642
}
2643
2644
//set up pd2decimal node
2645
TR::ILOpCodes op = TR::BadILOp;
2646
TR::ILOpCodes storeOp = TR::BadILOp;
2647
TR::ILOpCodes interOp = TR::BadILOp;
2648
TR::DataType dt = TR::NoType;
2649
switch (type)
2650
{
2651
case 1:
2652
op = TR::pd2zd;
2653
storeOp = TR::zdstorei;
2654
dt = TR::ZonedDecimal;
2655
break;
2656
case 2:
2657
op = TR::zd2zdsle;
2658
interOp = TR::pd2zd;
2659
storeOp = TR::zdsleStorei;
2660
dt = TR::ZonedDecimalSignLeadingEmbedded;
2661
break;
2662
case 3:
2663
op = TR::zd2zdsts;
2664
interOp = TR::pd2zd;
2665
storeOp = TR::zdstsStorei;
2666
dt = TR::ZonedDecimalSignTrailingSeparate;
2667
break;
2668
case 4:
2669
op = TR::zd2zdsls;
2670
interOp = TR::pd2zd;
2671
storeOp = TR::zdslsStorei;
2672
dt = TR::ZonedDecimalSignLeadingSeparate;
2673
break;
2674
case 5:
2675
op = TR::pd2ud;
2676
interOp = TR::pd2ud;
2677
storeOp = TR::udStorei;
2678
dt = TR::UnicodeDecimal;
2679
break;
2680
case 6:
2681
op = TR::pd2udsl;
2682
interOp = TR::pd2ud;
2683
storeOp = TR::udslStorei;
2684
dt = TR::UnicodeDecimalSignLeading;
2685
break;
2686
case 7:
2687
op = TR::pd2udst;
2688
interOp = TR::pd2ud;
2689
storeOp = TR::udstStorei;
2690
dt = TR::UnicodeDecimalSignTrailing;
2691
break;
2692
default:
2693
TR_ASSERT(false, "unsupported decimalType.\n");
2694
}
2695
2696
TR::Node * pd2decimalNode = NULL;
2697
if (isPD2UD || type == 1)
2698
{
2699
pd2decimalNode = TR::Node::create(op, 1, pdload);
2700
}
2701
else //ED2PD
2702
{
2703
TR::Node * toZDNode = TR::Node::create(interOp, 1, pdload);
2704
toZDNode->setDecimalPrecision(precNode->getInt());
2705
pd2decimalNode = TR::Node::create(op, 1, toZDNode);
2706
}
2707
pd2decimalNode->setDecimalPrecision(precNode->getInt());
2708
2709
//set up decimalStore node
2710
TR::SymbolReference * symRefDecimalStore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(dt, decimalAddressNode, 8, fe());
2711
TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), dt, TR::DataType::getSizeFromBCDPrecision(dt, prec));
2712
symStore->setArrayShadowSymbol();
2713
symRefDecimalStore->setSymbol(symStore);
2714
2715
TR::Node * decimalStore = TR::Node::create(storeOp, 2, decimalAddressNode, pd2decimalNode);
2716
decimalStore->setSymbolReference(symRefDecimalStore);
2717
decimalStore->setDecimalPrecision(precNode->getInt());
2718
2719
//set up bndchks, and null chks
2720
TR::Node * pdPassThroughNode = TR::Node::create(TR::PassThrough, 1, pdNode);
2721
TR::Node * decimalPassThroughNode = TR::Node::create(TR::PassThrough, 1, decimalNode);
2722
int elementSize = isPD2UD ? TR::DataType::getUnicodeElementSize() : TR::DataType::getZonedElementSize();
2723
int pdPrecSize = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, prec) - 1;
2724
int decimalPrecSize = (TR::DataType::getSizeFromBCDPrecision(dt, prec) / elementSize) - 1;
2725
TR::Node * pdBndvalue = TR::Node::create(TR::iadd, 2, pdOffsetNode, TR::Node::create(callNode, TR::iconst, 0, pdPrecSize));
2726
TR::Node * decimalBndvalue = TR::Node::create(TR::iadd, 2, decimalOffsetNode, TR::Node::create(callNode, TR::iconst, 0, decimalPrecSize)); //size of unicode is 2 bytes
2727
2728
TR::Node * pdArrayLengthNode = TR::Node::create(TR::arraylength, 1, pdNode);
2729
TR::Node * decimalArrayLengthNode = TR::Node::create(TR::arraylength, 1, decimalNode);
2730
2731
TR::Node * pdNullChkNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, pdPassThroughNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_nullCheck, false, true, true));
2732
TR::Node * decimalNullChkNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, decimalPassThroughNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_nullCheck, false, true, true));
2733
2734
TR::Node * pdBndChk = TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, pdArrayLengthNode, pdOffsetNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));
2735
TR::Node * pdBndChk2 =TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, pdArrayLengthNode, pdBndvalue, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));
2736
TR::Node * decimalBndChk = TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, decimalArrayLengthNode, decimalOffsetNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));
2737
TR::Node * decimalBndChk2 =TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, decimalArrayLengthNode, decimalBndvalue, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));
2738
2739
//gen tree tops
2740
TR::TreeTop * nextTT = treeTop->getNextTreeTop();
2741
TR::TreeTop * prevTT = treeTop->getPrevTreeTop();
2742
2743
TR::TreeTop * pdNullChktt = TR::TreeTop::create(comp(), pdNullChkNode);
2744
TR::TreeTop * decimalNullChktt = TR::TreeTop::create(comp(), decimalNullChkNode);
2745
2746
TR::TreeTop * pdBndChktt1 = TR::TreeTop::create(comp(), pdBndChk);
2747
TR::TreeTop * pdBndChktt2 = TR::TreeTop::create(comp(), pdBndChk2);
2748
TR::TreeTop * decimalBndChktt1 = TR::TreeTop::create(comp(), decimalBndChk);
2749
TR::TreeTop * decimalBndChktt2 = TR::TreeTop::create(comp(), decimalBndChk2);
2750
2751
TR::TreeTop * decimalStoreTT = TR::TreeTop::create(comp(), decimalStore);
2752
2753
prevTT->join(pdNullChktt);
2754
pdNullChktt->join(decimalNullChktt);
2755
decimalNullChktt->join(pdBndChktt1);
2756
pdBndChktt1->join(pdBndChktt2);
2757
pdBndChktt2->join(decimalBndChktt1);
2758
decimalBndChktt1->join(decimalBndChktt2);
2759
decimalBndChktt2->join(decimalStoreTT);
2760
decimalStoreTT->join(nextTT);
2761
2762
callNode->recursivelyDecReferenceCount();
2763
2764
return true;
2765
}
2766
2767
return false;
2768
}
2769
2770
void TR_DataAccessAccelerator::insertByteArrayNULLCHK(TR::TreeTop* callTreeTop, TR::Node* callNode, TR::Node* byteArrayNode)
2771
{
2772
TR::Compilation* comp = OMR::Optimization::comp();
2773
2774
callTreeTop->insertBefore(TR::TreeTop::create(comp, TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, TR::Node::create(TR::PassThrough, 1, byteArrayNode), comp->getSymRefTab()->findOrCreateNullCheckSymbolRef(callNode->getSymbol()->getResolvedMethodSymbol()))));
2775
}
2776
2777
void TR_DataAccessAccelerator::insertByteArrayBNDCHK(TR::TreeTop* callTreeTop, TR::Node* callNode, TR::Node* byteArrayNode, TR::Node* offsetNode, int32_t index)
2778
{
2779
TR::Compilation* comp = OMR::Optimization::comp();
2780
2781
if (index != 0)
2782
{
2783
offsetNode = TR::Node::create(TR::iadd, 2, offsetNode, TR::Node::create(callNode, TR::iconst, 0, index));
2784
}
2785
2786
TR::Node* arraylengthNode = TR::Node::create(TR::arraylength, 1, byteArrayNode);
2787
2788
// byte[] is always of type TR::Int8 so set the appropriate stride
2789
arraylengthNode->setArrayStride(TR::Symbol::convertTypeToSize(TR::Int8));
2790
2791
callTreeTop->insertBefore(TR::TreeTop::create(comp, TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, arraylengthNode, offsetNode, comp->getSymRefTab()->findOrCreateArrayBoundsCheckSymbolRef(callNode->getSymbol()->getResolvedMethodSymbol()))));
2792
}
2793
2794
TR::Node* TR_DataAccessAccelerator::createByteArrayElementAddress(TR::TreeTop* callTreeTop, TR::Node* callNode, TR::Node* byteArrayNode, TR::Node* offsetNode)
2795
{
2796
TR::CodeGenerator* cg = comp()->cg();
2797
2798
TR::Node* byteArrayElementAddressNode;
2799
2800
if (comp()->target().is64Bit())
2801
{
2802
byteArrayElementAddressNode = TR::Node::create(TR::aladd, 2, byteArrayNode, TR::Node::create(TR::ladd, 2, TR::Node::create(callNode, TR::lconst, 0, TR::Compiler->om.contiguousArrayHeaderSizeInBytes()), TR::Node::create(TR::i2l, 1, offsetNode)));
2803
}
2804
else
2805
{
2806
byteArrayElementAddressNode = TR::Node::create(TR::aiadd, 2, byteArrayNode, TR::Node::create(TR::iadd, 2, TR::Node::create(callNode, TR::iconst, 0, TR::Compiler->om.contiguousArrayHeaderSizeInBytes()), offsetNode));
2807
}
2808
2809
// This node is pointing to an array element so we must mark it as such
2810
byteArrayElementAddressNode->setIsInternalPointer(true);
2811
2812
return byteArrayElementAddressNode;
2813
}
2814
2815