Path: blob/master/runtime/compiler/optimizer/DataAccessAccelerator.hpp
6000 views
/*******************************************************************************1* Copyright (c) 2000, 2020 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#ifndef DATAACCESSACCELERATOR_INCL23#define DATAACCESSACCELERATOR_INCL2425#include <stddef.h>26#include <stdint.h>27#include <vector>28#include "compile/Compilation.hpp"29#include "env/TRMemory.hpp"30#include "il/Block.hpp"31#include "il/ILOpCodes.hpp"32#include "il/Node.hpp"33#include "infra/Array.hpp"34#include "infra/Assert.hpp"35#include "infra/BitVector.hpp"36#include "infra/ILWalk.hpp"37#include "infra/List.hpp"38#include "optimizer/Optimization.hpp"39#include "optimizer/OptimizationManager.hpp"4041namespace TR { class TreeTop; }4243/** \brief44*45* Transforms calls to recognized Data Access Accelerator (DAA) library methods into hardware semantically46* equivalent hardware intrinsics for the underlying platform.47*48* \details49*50* The Data Access Accelerator (DAA) library (com/ibm/dataaccess) found in IBM J9 Virtual Machine is a utility51* library for performing hardware accelerated operations on Java data types. All library methods have a Java52* implementation, however if hardware support exists for a particular operation the JIT compiler will attempt53* to replace calls to such library methods with semantically equivalent hardware intrinsics.54*55* The DAA library method calls that are actually replaced with hardware intrinsics are not the publically visible56* API methods. Instead we replace the private "underscore" counterparts to the public API so as to guard us for57* possible future modifications of such private methods. For example the58* com/ibm/dataaccess/ByteArrayMarshaller.writeInt(I[BIZ)V method calls a private59* com/ibm/dataaccess/ByteArrayMarshaller.writeInt_(I[BIZ)V method which carries out the write operation. The60* latter method is the so called "underscore" method which we recognize and hardware accelerate.6162* Note that if hardware acceleration support is detected we prevent the inlining of such "underscore" methods63* in the inliner on the assumption that this optimization will reduce such calls to simple semantically64* equivalent trees which will outperform the inlined call.65*66* The DAA library is broken up into four main classes and hardware support is defined per method as follows:67*68* \section com.ibm.data.ByteArrayMarshaller69*70* The following methods have hardware acceleration support on x86 (Linux and Windows), PPC (Linux and AIX),71* and System z (Linux and z/OS):72*73* - com/ibm/dataaccess/ByteArrayMarshaller.writeShort_(S[BIZ)V74* - com/ibm/dataaccess/ByteArrayMarshaller.writeShort_(S[BIZI)V75* - com/ibm/dataaccess/ByteArrayMarshaller.writeInt_(I[BIZ)V76* - com/ibm/dataaccess/ByteArrayMarshaller.writeInt_(I[BIZI)V77* - com/ibm/dataaccess/ByteArrayMarshaller.writeLong_(J[BIZ)V78* - com/ibm/dataaccess/ByteArrayMarshaller.writeLong_(J[BIZI)V79* - com/ibm/dataaccess/ByteArrayMarshaller.writeFloat_(F[BIZ)V80* - com/ibm/dataaccess/ByteArrayMarshaller.writeDouble_(D[BIZ)V81*82* \section com.ibm.data.ByteArrayUnMarshaller83*84* The following methods have hardware acceleration support on x86 (Linux and Windows), PPC (Linux and AIX),85* and System z (Linux and z/OS):86*87* - com/ibm/dataaccess/ByteArrayUnmarshaller.readShort_([BIZ)S88* - com/ibm/dataaccess/ByteArrayUnmarshaller.readShort_([BIZIZ)S89* - com/ibm/dataaccess/ByteArrayUnmarshaller.readInt_([BIZ)I90* - com/ibm/dataaccess/ByteArrayUnmarshaller.readInt_([BIZIZ)I91* - com/ibm/dataaccess/ByteArrayUnmarshaller.readLong_([BIZ)J92* - com/ibm/dataaccess/ByteArrayUnmarshaller.readLong_([BIZIZ)J93* - com/ibm/dataaccess/ByteArrayUnmarshaller.readFloat_([BIZ)F94* - com/ibm/dataaccess/ByteArrayUnmarshaller.readDouble_([BIZ)D95*96* \section com.ibm.data.DecimalData97*98* The following methods have hardware acceleration support on System z (Linux and z/OS):99*100* - com/ibm/dataaccess/DecimalData.convertPackedDecimalToUnicodeDecimal_([BI[CIII)V101* - com/ibm/dataaccess/DecimalData.convertUnicodeDecimalToPackedDecimal_([CI[BIII)V102* - com/ibm/dataaccess/DecimalData.convertPackedDecimalToExternalDecimal_([BI[BIII)V103* - com/ibm/dataaccess/DecimalData.convertExternalDecimalToPackedDecimal_([BI[BIII)V104*105* The following methods have hardware acceleration support on System z (z/OS):106*107* - com/ibm/dataaccess/DecimalData.convertPackedDecimalToInteger_([BIIZ)I108* - com/ibm/dataaccess/DecimalData.convertPackedDecimalToInteger_(Ljava/nio/ByteBuffer;IIZJII)I109* - com/ibm/dataaccess/DecimalData.convertIntegerToPackedDecimal_(I[BIIZ)V110* - com/ibm/dataaccess/DecimalData.convertIntegerToPackedDecimal_(ILjava/nio/ByteBuffer;IIZJII)V111* - com/ibm/dataaccess/DecimalData.convertPackedDecimalToLong_([BIIZ)J112* - com/ibm/dataaccess/DecimalData.convertPackedDecimalToLong_(Ljava/nio/ByteBuffer;IIZJII)J113* - com/ibm/dataaccess/DecimalData.convertLongToPackedDecimal_(J[BIIZ)V114* - com/ibm/dataaccess/DecimalData.convertLongToPackedDecimal_(JLjava/nio/ByteBuffer;IIZJII)V115*116* \section com.ibm.data.PackedDecimal117*118* The following methods have hardware acceleration support on System z (Linux and z/OS):119*120* - com/ibm/dataaccess/PackedDecimal.checkPackedDecimal_([BIIZZ)I121*122* The following methods have hardware acceleration support on System z (z/OS):123*124* - com/ibm/dataaccess/PackedDecimal.addPackedDecimal_([BII[BII[BIIZ)V125* - com/ibm/dataaccess/PackedDecimal.subtractPackedDecimal_([BII[BII[BIIZ)V126* - com/ibm/dataaccess/PackedDecimal.multiplyPackedDecimal_([BII[BII[BIIZ)V127* - com/ibm/dataaccess/PackedDecimal.dividePackedDecimal_([BII[BII[BIIZ)V128* - com/ibm/dataaccess/PackedDecimal.remainderPackedDecimal_([BII[BII[BIIZ)V129* - com/ibm/dataaccess/PackedDecimal.shiftLeftPackedDecimal_([BII[BIIIZ)V130* - com/ibm/dataaccess/PackedDecimal.shiftRightPackedDecimal_([BII[BIIIZ)V131* - com/ibm/dataaccess/PackedDecimal.lessThanPackedDecimal_([BII[BII)Z132* - com/ibm/dataaccess/PackedDecimal.lessThanOrEqualsPackedDecimal_([BII[BII)Z133* - com/ibm/dataaccess/PackedDecimal.greaterThanPackedDecimal_([BII[BII)Z134* - com/ibm/dataaccess/PackedDecimal.greaterThanOrEqualsPackedDecimal_([BII[BII)Z135* - com/ibm/dataaccess/PackedDecimal.equalsPackedDecimal_([BII[BII)Z136*137*/138class TR_DataAccessAccelerator : public TR::Optimization139{140public:141typedef TR::typed_allocator< TR::TreeTop *, TR::Region & > TreeTopContainerAllocator;142typedef std::vector< TR::TreeTop*, TreeTopContainerAllocator > TreeTopContainer;143typedef TR::typed_allocator< TR::Block *, TR::Region & > BlockContainerAllocator;144typedef std::vector< TR::Block*, BlockContainerAllocator > BlockContainer;145146TR_DataAccessAccelerator(TR::OptimizationManager* manager);147148/** \brief149* Helper function to create an instance of the StringBuilderTransformer optimization using the150* OptimizationManager's default allocator.151*152* \param manager153* The optimization manager.154*/155static TR::Optimization* create(TR::OptimizationManager* manager)156{157return new (manager->allocator()) TR_DataAccessAccelerator(manager);158}159160/** \brief161* Performs the optimization on this compilation unit.162*163* \return164* 1 if any transformation was performed; 0 otherwise.165*/166virtual int32_t perform();167168/** \brief169* Performs the optimization on a specific block within this compilation unit.170*171* \param block172* The block on which to perform this optimization.173*174* \param variableCallTreeTops175* A vector of TR::TreeTop*. Used to build a list of variable precision calls to be used176* later177*178* \return179* 1 if any transformation was performed; 0 otherwise.180*/181virtual int32_t performOnBlock(TR::Block* block, TreeTopContainer* variableCallTreeTops);182183/** \brief184* Performs inlining of variable precision API calls after iterating through the entire tree185*186* \detail187* Unlike constant precision DAA call inlining which can be done in-place without introducing extra blocks,188* each variable precision call node has to be bloated into multiple189* basic blocks to form a precision diamond. This disrupts the CFG and invalidates block iterator and190* TreeTop iterator that might be in use. As a result of this, it's difficult to inline variable precision191* calls while iterating the entire tree. The solution to this problem is to build a list of variable192* precision TreeTops during the tree traversal phase. And after that, go through this list and inline each one of them.193*194* \param variableCallTreeTops195* A vector of TR::TreeTop*. All variable precision calls listed here will be inlined.196*197* \return198* 1 if any transformation was performed; 0 otherwise.199*/200virtual int32_t processVariableCalls(TreeTopContainer* variableCallTreeTops);201202virtual const char * optDetailString() const throw();203204bool isChildConst (TR::Node* node, int32_t child);205206TR::Node* insertIntegerGetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes);207TR::Node* insertIntegerSetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes);208209TR::Node* insertDecimalGetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes);210TR::Node* insertDecimalSetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes);211212bool inlineCheckPackedDecimal(TR::TreeTop* callTreeTop, TR::Node* callNode);213214private:215216TR::Node* constructAddressNode(TR::Node* callNode, TR::Node* arrayNode, TR::Node* offsetNode);217218void createPrecisionDiamond(TR::Compilation* comp,219TR::TreeTop* treeTop,220TR::TreeTop* fastTree, TR::TreeTop* slowTree,221bool isPD2I,222uint32_t numPrecisionNodes,223...);224225TR::Node* restructureVariablePrecisionCallNode(TR::TreeTop* treeTop, TR::Node* callNode);226227bool generatePD2I(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2i, bool isByteBuffer);228bool generatePD2IVariableParameter(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2i, bool isByteBuffer);229bool generatePD2IConstantParameter(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2i, bool isByteBuffer);230bool generateI2PD(TR::TreeTop* treeTop, TR::Node* callNode, bool isI2PD, bool isByteBuffer);231bool genArithmeticIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode, TR::ILOpCodes opCode);232bool genComparisionIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode, TR::ILOpCodes opCode);233bool genShiftLeftIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode);234bool genShiftRightIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode);235bool generateUD2PD(TR::TreeTop* treeTop, TR::Node* callNode, bool isUD2PD);236bool generatePD2UD(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2UD);237238void insertByteArrayNULLCHK(TR::TreeTop* callTreeTop, TR::Node* callNode, TR::Node* byteArrayNode);239void insertByteArrayBNDCHK(TR::TreeTop* callTreeTop, TR::Node* callNode, TR::Node* byteArrayNode, TR::Node* offsetNode, int32_t index);240241TR::Node* createByteArrayElementAddress(TR::TreeTop* callTreeTop, TR::Node* callNode, TR::Node* byteArrayNode, TR::Node* offsetNode);242243bool printInliningStatus(bool status, TR::Node* node, const char* reason = "")244{245if (trace())246{247if (status)248traceMsg(comp(), "DataAccessAccelerator: Intrinsics on node %p : SUCCESS\n", node);249else250{251traceMsg(comp(), "DataAccessAccelerator: Intrinsics on node %p : FAILED\n", node);252traceMsg(comp(), "DataAccessAccelerator: Reason : %s\n", reason);253}254}255return status;256}257};258259#endif260261262