Path: blob/master/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2018, 2022 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122//On zOS XLC linker can't handle files with same name at link time23//This workaround with pragma is needed. What this does is essentially24//give a different name to the codesection (csect) for this file. So it25//doesn't conflict with another file with same name.26#pragma csect(CODE,"TRJ9ZBCDTreeEvalBase#C")27#pragma csect(STATIC,"TRJ9ZBCDTreeEvalBase#S")28#pragma csect(TEST,"TRJ9ZBCDTreeEvalBase#T")2930#include <algorithm>31#include <limits.h>32#include <math.h>33#include <stdint.h>34#include "j9.h"35#include "j9cfg.h"36#include "j9consts.h"37#include "j9modron.h"38#include "thrdsup.h"39#include "thrtypes.h"40#include "codegen/CodeGenerator.hpp"41#include "codegen/CodeGenerator_inlines.hpp"42#include "codegen/Machine.hpp"43#include "compile/ResolvedMethod.hpp"44#include "env/CompilerEnv.hpp"45#include "env/jittypes.h"46#include "env/VMJ9.h"47#include "il/DataTypes.hpp"48#include "il/LabelSymbol.hpp"49#include "il/MethodSymbol.hpp"50#include "il/Node.hpp"51#include "il/Node_inlines.hpp"52#include "il/RegisterMappedSymbol.hpp"53#include "il/ResolvedMethodSymbol.hpp"54#include "il/Symbol.hpp"55#include "il/TreeTop.hpp"56#include "il/TreeTop_inlines.hpp"57#include "ras/DebugCounter.hpp"58#include "env/VMJ9.h"59#include "z/codegen/J9S390Snippet.hpp"60#include "z/codegen/S390J9CallSnippet.hpp"61#include "z/codegen/S390Evaluator.hpp"62#include "z/codegen/S390GenerateInstructions.hpp"63#include "z/codegen/S390HelperCallSnippet.hpp"64#include "z/codegen/S390Instruction.hpp"65#include "z/codegen/S390Register.hpp"66#include "z/codegen/SystemLinkage.hpp"6768TR::MemoryReference *69J9::Z::TreeEvaluator::asciiAndUnicodeToPackedHelper(TR::Node *node,70TR_PseudoRegister *targetReg,71TR::MemoryReference *sourceMR,72TR_PseudoRegister *childReg,73TR::CodeGenerator * cg)74{75TR::Node *child = node->getFirstChild();76bool isUnicode = child->getType().isAnyUnicode();77bool isZoned = child->getType().isAnyZoned();7879TR::DataType sourceType = TR::NoType;80TR::Compilation *comp = cg->comp();81if (isUnicode)82sourceType = TR::UnicodeDecimal;83else if (isZoned)84sourceType = TR::ZonedDecimal;85else86TR_ASSERT(false,"unexpected type on node %s (%p)\n",child->getOpCode().getName(),child);8788TR_StorageReference *hint = node->getStorageReferenceHint();89TR_StorageReference *targetStorageReference = NULL;90int32_t destSize = isUnicode ? cg->getUnicodeToPackedFixedResultSize() : cg->getAsciiToPackedFixedResultSize();91TR_ASSERT(TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize) >= childReg->getDecimalPrecision(),92"%s source precision of %d should not exceed the fixed precision of %d\n",93node->getOpCode().getName(), childReg->getDecimalPrecision(), TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize));9495if (hint)96{97if (childReg->isInitialized() && hint == childReg->getStorageReference())98{99TR_ASSERT( false,"ad2pd/ud2pd operands will overlap because child storageReference of ud2pd is initialized hint\n");100}101else102{103TR_ASSERT(hint->getSymbolSize() >= destSize, "ad2pd/ud2pd hint size of %d should be >= the fixed size of %d\n",hint->getSymbolSize(),destSize);104targetStorageReference = hint;105}106}107108if (targetStorageReference == NULL)109targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, comp);110111targetReg->setStorageReference(targetStorageReference, node);112113int32_t sourcePrecision = childReg->getDecimalPrecision();114bool isTruncation = sourcePrecision > node->getDecimalPrecision();115int32_t pkxSourcePrecision = isTruncation ? node->getDecimalPrecision() : sourcePrecision;116int32_t pkxSourceSize = TR::DataType::getSizeFromBCDPrecision(sourceType, pkxSourcePrecision);117int32_t targetPrecision = pkxSourcePrecision;118int32_t sourceEndByte = TR::DataType::getLeftMostByte(child->getDataType(), pkxSourceSize);119120if (cg->traceBCDCodeGen())121traceMsg(comp,"\tasciiAndUnicodeToPackedHelper %p : op %s, isTruncation=%s, fixedDestSize %d, targetRegPrec %d, sourcePrecision %d, sourceEndByte %d, sourceSize %d, pkuSourceSize %d\n",122node,node->getOpCode().getName(),isTruncation?"yes":"no",destSize,targetPrecision,sourcePrecision,sourceEndByte,childReg->getSize(),pkxSourceSize);123124// For PKA/PKU the 1st operand (target) size is fixed at 16 bytes and the 2nd operand (source) is variable.125// For this reason use left, instead of right, aligned memory references so the correct alignment is done for both operands126// (using right aligned references with SS1 would apply the same bump to both operands)127TR::MemoryReference *destMR = generateS390LeftAlignedMemoryReference(node, targetReg->getStorageReference(), cg, destSize);128sourceMR = reuseS390LeftAlignedMemoryReference(sourceMR, child, childReg->getStorageReference(), cg, sourceEndByte);129130if (cg->traceBCDCodeGen())131traceMsg(comp,"\tgen %s with fixed dest size of %d and source size %d. Set targetRegPrec to sourcePrec (%d)\n",isUnicode?"PKU":"PKA",destSize,pkxSourceSize,sourcePrecision);132133generateSS1Instruction(cg, isUnicode ? TR::InstOpCode::PKU : TR::InstOpCode::PKA, node, pkxSourceSize-1, destMR, sourceMR);134135int32_t destSizeAsCeilingPrecision = TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(destSize);136if (destSizeAsCeilingPrecision > pkxSourcePrecision)137targetReg->addRangeOfZeroDigits(pkxSourcePrecision, destSizeAsCeilingPrecision);138139if (node->getOpCode().isSetSign())140{141TR::Node *setSignNode = node->getSetSignValueNode();142TR_ASSERT(setSignNode->getOpCode().isLoadConst() && setSignNode->getOpCode().getSize() <= 4,"expecting a <= 4 size integral constant set sign amount on node %p\n",setSignNode);143int32_t sign = setSignNode->get32bitIntegralValue();144if (sign == TR::DataType::getPreferredPlusCode())145targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());146else147cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), destMR, sign, targetReg, 0, false); // numericNibbleIsZero=false148cg->decReferenceCount(setSignNode);149}150else151{152// PKA/PKU always sets the preferred positive code and therefore a known clean sign is generated.153targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());154}155156targetReg->setDecimalPrecision(targetPrecision);157targetReg->transferDataState(childReg);158targetReg->setIsInitialized();159node->setRegister(targetReg);160return destMR;161}162163TR::Register *164J9::Z::TreeEvaluator::ud2pdVectorEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)165{166// 1. use ud2pd helper to put ud->pd in some storage reference167TR_PseudoRegister *packedPseudoReg = cg->allocatePseudoRegister(node->getDataType());168TR::Node *child = node->getFirstChild();169TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);170childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);171TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);172asciiAndUnicodeToPackedHelper(node, packedPseudoReg, sourceMR, childReg, cg);173174// 2. load packed decimal from storage reference to register.175TR::Register * targetReg = cg->allocateRegister(TR_VRF);176TR::MemoryReference * pdSourceMR = generateS390RightAlignedMemoryReference(node,177packedPseudoReg->getStorageReference(),178cg);179180// PKU always puts the result into 16 bytes space181uint8_t lengthToLoad = TR_VECTOR_REGISTER_SIZE - 1;182generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, targetReg, pdSourceMR, lengthToLoad);183184cg->decReferenceCount(child);185node->setRegister(targetReg);186return targetReg;187}188189/**190* Handles TR::ud2pd191*/192TR::Register *193J9::Z::TreeEvaluator::ud2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)194{195TR::Compilation *comp = cg->comp();196cg->traceBCDEntry("ud2pd",node);197cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "PD-Op/%s", node->getOpCode().getName()),1981, TR::DebugCounter::Cheap);199TR::Register* targetReg = NULL;200201static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");202if(comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&203!comp->getOption(TR_DisableVectorBCD) ||204isVectorBCDEnv)205{206targetReg = ud2pdVectorEvaluatorHelper(node, cg);207}208else209{210targetReg = cg->allocatePseudoRegister(node->getDataType());211TR::Node *child = node->getFirstChild();212TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);213childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);214TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);215asciiAndUnicodeToPackedHelper(node, static_cast<TR_PseudoRegister*>(targetReg), sourceMR, childReg, cg);216cg->decReferenceCount(child);217node->setRegister(targetReg);218}219220cg->traceBCDExit("ud2pd",node);221return targetReg;222}223224/**225* Handles TR::udsl2pd, TR::udst2pd226*/227TR::Register *228J9::Z::TreeEvaluator::udsl2pdEvaluator(TR::Node *node, TR::CodeGenerator *cg)229{230TR::Compilation *comp = cg->comp();231cg->traceBCDEntry("udsl2pd",node);232cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "PD-Op/%s", node->getOpCode().getName()),2331, TR::DebugCounter::Cheap);234TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());235TR::Node *child = node->getFirstChild();236TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);237childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);238239bool isSrcTrailingSign = (child->getDataType() == TR::UnicodeDecimalSignTrailing);240int32_t sourceSignEndByte = isSrcTrailingSign ? TR::DataType::getUnicodeSignSize() : childReg->getSize();241TR::MemoryReference *sourceMR = generateS390LeftAlignedMemoryReference(child, childReg->getStorageReference(), cg, sourceSignEndByte);242TR::MemoryReference *destMR = asciiAndUnicodeToPackedHelper(node, targetReg, sourceMR, childReg, cg);243244if (!node->getOpCode().isSetSign())245{246TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);247TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);248249bool isImplicitValue = node->getNumChildren() < 2;250251TR::RegisterDependencyConditions * deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, isImplicitValue ? 4 : 2, cg);252253if (destMR->getIndexRegister())254deps->addPostConditionIfNotAlreadyInserted(destMR->getIndexRegister(), TR::RealRegister::AssignAny);255if (destMR->getBaseRegister())256deps->addPostConditionIfNotAlreadyInserted(destMR->getBaseRegister(), TR::RealRegister::AssignAny);257258bool isTruncation = childReg->getDecimalPrecision() > node->getDecimalPrecision();259260if (cg->traceBCDCodeGen())261traceMsg(comp,"\tudsl2pdEvaluator %p : op %s, isTruncation=%s, targetReg->isInit=%s, targetRegSize=%d, targetRegPrec=%d, srcRegSize=%d, srcRegPrec=%d, sourceSignEndByte=%d\n",262node,node->getOpCode().getName(),isTruncation?"yes":"no",targetReg->isInitialized()?"yes":"no",targetReg->getSize(),targetReg->getDecimalPrecision(),childReg->getSize(),childReg->getDecimalPrecision(),sourceSignEndByte);263264if (isImplicitValue)265{266if (sourceMR->getIndexRegister())267deps->addPostConditionIfNotAlreadyInserted(sourceMR->getIndexRegister(), TR::RealRegister::AssignAny);268if (sourceMR->getBaseRegister())269deps->addPostConditionIfNotAlreadyInserted(sourceMR->getBaseRegister(), TR::RealRegister::AssignAny);270271generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);272cFlowRegionStart->setStartInternalControlFlow();273274// The primary (and currently the only) consumer of BCD evaluators in Java is the DAA intrinsics275// library. The DAA library assumes all BCD types are positive, unless an explicit negative sign276// code is present. Because of this deviation from the COBOL treatment of sign codes we must277// take a specialized control path when generating instructions for Java.278279generateSILInstruction(cg, TR::InstOpCode::CLHHSI, node, generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSignEndByte), 0x002D);280generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, cFlowRegionEnd);281}282else283{284TR::Node *minusSign = node->getSecondChild();285286TR::MemoryReference *minusSignMR = generateS390ConstantAreaMemoryReference(cg, minusSign, true); // forSS=true287288generateSS1Instruction(cg, TR::InstOpCode::CLC, node,289TR::DataType::getUnicodeSignSize()-1,290generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSignEndByte),291minusSignMR);292293generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);294cFlowRegionStart->setStartInternalControlFlow();295296generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, cFlowRegionEnd);297}298299cg->genSignCodeSetting(node, NULL, targetReg->getSize(),300generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),301TR::DataType::getPreferredMinusCode(), targetReg, 0, false); // numericNibbleIsZero=false302303generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, deps);304cFlowRegionEnd->setEndInternalControlFlow();305306targetReg->resetSignState();307targetReg->setHasKnownPreferredSign();308309if (!isTruncation)310targetReg->transferCleanSign(childReg);311else312traceMsg(comp,"\tudsx2p is a truncation (srcRegPrec %d > nodePrec %d) so do not transfer any clean sign flags\n",childReg->getDecimalPrecision(),node->getDecimalPrecision());313}314315//at this point targetReg is PseudoRegister that has converted Packed decimal value.316static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");317if (comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&318!comp->getOption(TR_DisableVectorBCD) ||319isVectorBCDEnv)320{321TR::Register * pdVectorTargetReg = cg->allocateRegister(TR_VRF);322TR::MemoryReference * pdSourceMR = generateS390RightAlignedMemoryReference(node,323targetReg->getStorageReference(),324cg);325//PKU always puts the result into 16 bytes space326uint8_t lengthToLoad = TR_VECTOR_REGISTER_SIZE - 1;327generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, pdVectorTargetReg, pdSourceMR, lengthToLoad);328329cg->decReferenceCount(child);330node->setRegister(pdVectorTargetReg);331cg->traceBCDExit("udsl2pd",node);332return pdVectorTargetReg;333}334else335{336cg->decReferenceCount(child);337node->setRegister(targetReg);338cg->traceBCDExit("udsl2pd",node);339return targetReg;340}341}342343/**344* Handles pd2udsl,pd2udst, where the Unicode decimal signs are separate.345*/346TR::Register *347J9::Z::TreeEvaluator::pd2udslEvaluator(TR::Node *node, TR::CodeGenerator *cg)348{349cg->traceBCDEntry("pd2udsl",node);350cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),3511, TR::DebugCounter::Cheap);352353TR::Node* childNode = node->getFirstChild();354TR::Compilation *comp = cg->comp();355TR_PseudoRegister *childReg = NULL;356TR::MemoryReference *sourceMR = NULL;357TR_StorageReference* pdStorageRef = NULL;358359static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");360if(comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !comp->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)361{362// Perform an intermediate vector store. See pd2udVectorEvaluateHelper().363TR::Register* pdValueReg = cg->evaluate(childNode);364pdStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(TR_VECTOR_REGISTER_SIZE, comp);365pdStorageRef->setIsSingleUseTemporary();366367TR::MemoryReference* pdMR = generateS390RightAlignedMemoryReference(node, pdStorageRef, cg);368sourceMR = pdMR;369370childReg = cg->allocatePseudoRegister(childNode->getDataType());371childReg->setIsInitialized();372childReg->setSize(childNode->getSize());373childReg->setHasKnownValidData();374childReg->setDecimalPrecision(childNode->getDecimalPrecision());375376generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, pdValueReg, pdMR, TR_VECTOR_REGISTER_SIZE - 1);377378}379else380{381int32_t byteLength = TR::DataType::packedDecimalPrecisionToByteLength(node->getDecimalPrecision());382childReg = cg->evaluateBCDNode(childNode);383childReg = cg->privatizeBCDRegisterIfNeeded(node, childNode, childReg);384sourceMR = cg->materializeFullBCDValue(childNode, childReg,385cg->getPackedToUnicodeFixedSourceSize(),386byteLength);387}388389// One of two sequences generated by the reset of this evaluator:390// for non-setSign ops when the knownSign=negative (known positive signs are more common so '+' is the initial/default setting)391//392// MVC [destSign],[minusSign] // [sign] <- 002B '+'393// UNPKU [destData],[src]394// MVI [destSign+1],0x2D // '-'395//396// for non-setSign ops (pd2udsl/pd2udst)397//398// MVC [destSign],[minusSign] // [sign] <- 002B '+'399// UNPKU [destData],[src]400// BRC 0x8,done // if src sign is + (cc=0) we are done, otherwise in '-' (cc=1) and invalid (cc=3) case fall through and set '-' sign401// MVI [destSign+1],0x2D // '-'402// done:403//404// The MVC/UNPKU are generated by the shared routine packedToUnicodeHelper and the BRC/MVI by this routine405406407TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());408TR::MemoryReference *destMR = packedToUnicodeHelper(node, targetReg, sourceMR, childReg, true, cg, pdStorageRef); // isSeparateSign=true409410int32_t destSignEndByte = (node->getDataType() == TR::UnicodeDecimalSignTrailing) ? TR::DataType::getUnicodeSignSize() : targetReg->getSize();411412if (childReg->hasKnownSignCode())413{414int32_t convertedSign = TR::DataType::convertSignEncoding(childNode->getDataType(), node->getDataType(), childReg->getKnownSignCode());415if (convertedSign == TR::DataType::getNationalSeparateMinus())416{417if (cg->traceBCDCodeGen())418traceMsg(comp,"\tchildReg has negative knownSignCode 0x%x so generate an MVI of the converted sign 0x%x\n",childReg->getKnownSignCode(),convertedSign);419generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390LeftAlignedMemoryReference(*destMR, node, 1, cg, destSignEndByte), convertedSign);420}421else422{423if (cg->traceBCDCodeGen())424traceMsg(comp,"\tchildReg has positive knownSignCode 0x%x so no more codegen is needed (an MVC of 002B was already done)\n", childReg->getKnownSignCode());425TR_ASSERT(convertedSign == TR::DataType::getNationalSeparatePlus(), "converted sign should be nationalSeparatePlusSign of 0x%x and not 0x%x\n", TR::DataType::getNationalSeparatePlus(), convertedSign);426}427targetReg->setKnownSignCode(convertedSign);428}429else430{431TR_ASSERT(cg->getAppendInstruction()->getOpCodeValue() == TR::InstOpCode::UNPKU,432"the previous instruction should be an UNPKU\n");433434TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);435TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);436437TR::RegisterDependencyConditions * targetMRDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);438439if (destMR->getIndexRegister())440targetMRDeps->addPostConditionIfNotAlreadyInserted(destMR->getIndexRegister(), TR::RealRegister::AssignAny);441if (destMR->getBaseRegister())442targetMRDeps->addPostConditionIfNotAlreadyInserted(destMR->getBaseRegister(), TR::RealRegister::AssignAny);443444generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, targetMRDeps);445cFlowRegionStart->setStartInternalControlFlow();446447// DAA library assumes all BCD types are positive, unless an explicit negative sign code is present448generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK9, node, cFlowRegionEnd);449450TR_ASSERT(TR::DataType::getNationalSeparateMinus() <= 0xFF, "expecting nationalSeparateMinusSign to be <= 0xFF and not 0x%x\n", TR::DataType::getNationalSeparateMinus());451generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390LeftAlignedMemoryReference(*destMR, node, 1, cg, destSignEndByte), TR::DataType::getNationalSeparateMinus());452453generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, targetMRDeps);454cFlowRegionEnd->setEndInternalControlFlow();455456targetReg->setHasKnownPreferredSign();457}458459cg->decReferenceCount(childNode);460node->setRegister(targetReg);461cg->traceBCDExit("pd2udsl",node);462return targetReg;463}464465TR::Register *466J9::Z::TreeEvaluator::pd2udEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)467{468TR::Node *child = node->getFirstChild();469cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),4701, TR::DebugCounter::Cheap);471TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);472TR_PseudoRegister* targetReg = cg->allocatePseudoRegister(node->getDataType());473childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);474int32_t byteLength = TR::DataType::packedDecimalPrecisionToByteLength(node->getDecimalPrecision());475TR::MemoryReference *sourceMR = cg->materializeFullBCDValue(child,476childReg,477cg->getPackedToUnicodeFixedSourceSize(),478byteLength);479480packedToUnicodeHelper(node, targetReg, sourceMR, childReg, false, cg, NULL); // isSeparateSign=false481482cg->decReferenceCount(child);483return targetReg;484}485486TR::Register *487J9::Z::TreeEvaluator::pd2udVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)488{489// 1. Evaluate child node and get a packed decimal in vector register490TR::Node* childNode = node->getFirstChild();491TR::Register* pdValueReg = cg->evaluate(childNode);492493// 2. Create a temp storage reference of size 16 bytes and dump all vector register contents there, to be picked up by UNPKU later494// This intermediate vector store is needed because vectorized pdloadi puts packed decimal in registers;495// but UNPKU is an SS instruction that takes inputs from memory.496TR_StorageReference* pdStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(TR_VECTOR_REGISTER_SIZE, cg->comp());497pdStorageRef->setIsSingleUseTemporary();498499TR::MemoryReference* pdMR = generateS390RightAlignedMemoryReference(node, pdStorageRef, cg, true, true);500generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, pdValueReg, pdMR, TR_VECTOR_REGISTER_SIZE - 1);501502// 3. Allocate and setup childReg PseudoRegister503TR_PseudoRegister* childReg = cg->allocatePseudoRegister(childNode->getDataType());504childReg->setIsInitialized();505childReg->setSize(childNode->getSize());506childReg->setDecimalPrecision(childNode->getDecimalPrecision());507childReg->setHasKnownValidData();508509// 4. Generate UNPKU to unpack pdMR content to targetReg PseudoRegister510TR_PseudoRegister* targetReg = cg->allocatePseudoRegister(node->getDataType());511packedToUnicodeHelper(node, targetReg, pdMR, childReg, false, cg, pdStorageRef); // isSeparateSign=false512513cg->decReferenceCount(childNode);514return targetReg;515}516517TR::Register *518J9::Z::TreeEvaluator::pd2udEvaluator(TR::Node *node, TR::CodeGenerator *cg)519{520cg->traceBCDEntry("pd2ud",node);521TR::Register* targetReg = NULL;522static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");523if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)524{525targetReg = pd2udVectorEvaluatorHelper(node, cg);526}527else528{529targetReg = pd2udEvaluatorHelper(node, cg);530}531532node->setRegister(targetReg);533cg->traceBCDExit("pd2ud",node);534return targetReg;535}536537/**538* \brief This evaluator helper is invoked by pd2ud Evaluator and pd2udsl Evaluator to generate unpack unicode539* instruction (UNPKU).540*541* \param node Parent node object.542* \param targetReg PseudoRegister object for the parent node (the node)543* \param sourceMR MemoryRefernece object pointer544* \param childReg PseudoRegister object for the child node (e.g. pdloadi node)545* \param isSeparateSign True if the operation is pd2udsl or pd2udst, which all have separate sign code. False546* if it's pd2ud.547* \param cg The codegen object548* \param srcStorageReference If not null, this replaces the childReg's StorageReference for unpack to unicode549*/550TR::MemoryReference *551J9::Z::TreeEvaluator::packedToUnicodeHelper(TR::Node *node,552TR_PseudoRegister *targetReg,553TR::MemoryReference *sourceMR,554TR_PseudoRegister *childReg,555bool isSeparateSign,556TR::CodeGenerator * cg,557TR_StorageReference* srcStorageReference)558{559TR::Node *child = node->getFirstChild();560TR_StorageReference *hint = node->getStorageReferenceHint();561TR_StorageReference *targetStorageReference = NULL;562TR::Compilation *comp = cg->comp();563564int32_t destSize = node->getStorageReferenceSize();565566if (hint)567{568if (childReg->isInitialized() && hint == childReg->getStorageReference())569{570TR_ASSERT( false,"pd2ud operands will overlap because child storageReference of pd2ud is initialized hint\n");571}572else573{574if (destSize <= hint->getSymbolSize())575targetStorageReference = hint;576else577TR_ASSERT(false,"pd2ud destSize (%d) should be <= hint size (%d)\n",destSize,hint->getSymbolSize());578}579}580581if (targetStorageReference == NULL)582targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, comp);583584targetReg->setStorageReference(targetStorageReference, node);585586int32_t unpkuDestPrecision = node->getDecimalPrecision();587targetReg->setDecimalPrecision(unpkuDestPrecision);588int32_t unpkuDestSize = TR::DataType::getSizeFromBCDPrecision(TR::UnicodeDecimal, unpkuDestPrecision);589int32_t unpkuDestEndByte = TR::DataType::getLeftMostByte(node->getDataType(), unpkuDestSize);590591if (cg->traceBCDCodeGen())592traceMsg(comp,"\tpackedToUnicodeHelper %p : op %s, targetRegSize %d, targetRegPrec %d, srcRegSize %d, srcRegPrec %d\n",593node,node->getOpCode().getName(),targetReg->getSize(),targetReg->getDecimalPrecision(),childReg->getSize(),childReg->getDecimalPrecision());594595// For UNPKU the 1st operand (target-unicode) size is variable and the 2nd operand (source-packed) is fixed at 16 bytes.596// For this reason use left, instead of right, aligned memory references so the correct alignment is done for both operands597// (using right aligned references with SS1 would apply the same bump to both operands)598TR::MemoryReference *destMR = generateS390LeftAlignedMemoryReference(node, targetReg->getStorageReference(), cg, unpkuDestEndByte);599// The sourceMR should have been created by calling materializeFullBCDValue to ensure it is large enough to be used in the UNPKU600int32_t fixedSourceSize = cg->getPackedToUnicodeFixedSourceSize();601602TR_ASSERT(sourceMR->getStorageReference()->getSymbolSize() >= fixedSourceSize,603"source memRef %d is not large enough to be used in the UNPKU (%d)\n",sourceMR->getStorageReference()->getSymbolSize(),fixedSourceSize);604605sourceMR = reuseS390LeftAlignedMemoryReference(sourceMR, child,606(srcStorageReference == NULL) ? childReg->getStorageReference() : srcStorageReference,607cg, fixedSourceSize);608609if (isSeparateSign)610{611//TR_ASSERT((node->getOpCode().isSetSign() && node->getNumChildren() == 3) || (node->getNumChildren() == 2),612// "expected two (or three if setSign) children on %s and not %d child(ren)\n",node->getOpCode().getName(),node->getNumChildren());613int32_t destSignEndByte = (node->getDataType() == TR::UnicodeDecimalSignTrailing) ? TR::DataType::getUnicodeSignSize() : unpkuDestEndByte + TR::DataType::getUnicodeSignSize();614615bool isImplicitValue = node->getNumChildren() < 2;616617if (isImplicitValue)618{619if (cg->traceBCDCodeGen())620traceMsg(comp, "\tgen 2 MVIs of unicode sign with size of %d and destSignEndByte of %d\n", TR::DataType::getUnicodeSignSize(),destSignEndByte);621generateSIInstruction(cg, TR::InstOpCode::MVI, node,622generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, destSignEndByte), 0x00);623generateSIInstruction(cg, TR::InstOpCode::MVI, node,624generateS390LeftAlignedMemoryReference(*destMR, node, 1, cg, destSignEndByte), 0x2B);625}626else627{628TR::Node *signNode = node->getSecondChild();629TR::MemoryReference *signMR = generateS390ConstantAreaMemoryReference(cg, signNode, true); // forSS=true630if (cg->traceBCDCodeGen())631traceMsg(comp, "\tgen MVC of unicode sign with size of %d and destSignEndByte of %d\n", TR::DataType::getUnicodeSignSize(),destSignEndByte);632generateSS1Instruction(cg, TR::InstOpCode::MVC, node,633TR::DataType::getUnicodeSignSize()-1,634generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, destSignEndByte),635signMR);636}637if (node->getOpCode().isSetSign())638{639TR::Node *setSignValue = node->getSetSignValueNode();640if (setSignValue->getOpCode().isLoadConst() && setSignValue->getOpCode().getSize() <= 4)641{642targetReg->setKnownSignCode(setSignValue->get32bitIntegralValue());643}644}645}646647if (cg->traceBCDCodeGen())648traceMsg(comp,"\tgen UNPKU: unpkuDestSize %d, destEndByte %d and fixed source size %d\n",unpkuDestSize,unpkuDestEndByte,fixedSourceSize);649650generateSS1Instruction(cg, TR::InstOpCode::UNPKU, node,651unpkuDestSize-1,652destMR,653sourceMR);654655targetReg->transferDataState(childReg);656targetReg->setIsInitialized();657node->setRegister(targetReg);658return destMR;659}660661void662J9::Z::TreeEvaluator::zonedToZonedSeparateSignHelper(TR::Node *node, TR_PseudoRegister *srcReg, TR_PseudoRegister *targetReg, TR::MemoryReference *sourceMR, TR::MemoryReference *destMR, TR::CodeGenerator * cg)663{664TR_ASSERT(targetReg->isInitialized(),"targetRegister must be initialized before calling zonedToZonedSeparateSignHelper\n");665targetReg->resetSignState(); // reset any incoming sign state now as sign is being moved from embedded to separate by this routine (so embedded setting is no longer valid)666bool isSetSign = node->getOpCode().isSetSign();667int32_t sign = 0;668TR::Node *signCodeNode = NULL;669TR::Compilation *comp = cg->comp();670671if (isSetSign)672{673signCodeNode = node->getSecondChild();674TR_ASSERT(signCodeNode->getOpCode().isLoadConst(),"excepting zdsle2zdSetSign sign code to be a const\n");675sign = signCodeNode->get32bitIntegralValue();676}677bool isDestTrailingSign = (node->getDataType() == TR::ZonedDecimalSignTrailingSeparate);678bool isTruncation = false;679int32_t digitsToClear = 0;680if (node->getDecimalPrecision() < targetReg->getDecimalPrecision())681isTruncation = true;682else if (node->getDecimalPrecision() > targetReg->getDecimalPrecision())683digitsToClear = node->getDecimalPrecision()-targetReg->getDecimalPrecision();684685if (cg->traceBCDCodeGen())686traceMsg(comp,"\tzonedToZonedSeparateSignHelper %p : op %s, isTruncation=%s, targetReg->knownSign=0x%x, trgSignIsZone=%s, targetReg->size=%d, targetRegPrec=%d, , digitsToClear=%d, (isSetSign=%s, sign 0x%x)\n",687node,node->getOpCode().getName(),isTruncation?"yes":"no",targetReg->hasKnownOrAssumedSignCode() ? targetReg->getKnownOrAssumedSignCode() : 0,targetReg->knownOrAssumedSignIsZone()?"yes":"no",688targetReg->getSize(),targetReg->getDecimalPrecision(),digitsToClear,isSetSign?"yes":"no",sign);689690TR_ASSERT(!isTruncation,"a zd2zdsxs operation should not truncate\n");691if (digitsToClear > 0)692{693if (cg->traceBCDCodeGen())694traceMsg(comp,"\tdigitsToClear > 0 (%d) so set upper bytes to 0x%x and set targetRegPrec to nodePrec %d\n",digitsToClear,TR::DataType::getZonedZeroCode(),node->getDecimalPrecision());695int32_t endByte = isDestTrailingSign ? node->getSize() : node->getSize() - TR::DataType::getZonedSignSize();696cg->genZeroLeftMostZonedBytes(node, targetReg, endByte, digitsToClear, generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, endByte));697targetReg->setDecimalPrecision(node->getDecimalPrecision());698}699700int32_t endByteForDestSign = isDestTrailingSign ? TR::DataType::getZonedSignSize() : targetReg->getSize();701TR::MemoryReference *destSignCodeMR = generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, endByteForDestSign);702703int32_t endByteForSourceSign = isDestTrailingSign ? (TR::DataType::getZonedSignSize() + TR::DataType::getZonedSignSize()) : TR::DataType::getZonedSignSize();704TR::MemoryReference *srcSignCodeMR = generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, endByteForSourceSign);705706// no 'invalid sign' message is ever required for a setSign operation or when a known (but *not* assumed) sign is 0xc,0xd or 0xf707intptr_t litPoolOffset = 0;708if (isSetSign || (srcReg->hasKnownSignCode() && srcReg->knownSignIsEmbeddedPreferredOrUnsigned()))709{710int32_t signToSet = isSetSign ? sign :711TR::DataType::convertSignEncoding(TR::ZonedDecimal, node->getDataType(), srcReg->getKnownSignCode());712bool srcSignAlreadyZone = srcReg->knownOrAssumedSignIsZone(); // || targetReg->temporaryKnownSignCodeIs(TR::DataType::getZonedValue());713if (cg->traceBCDCodeGen())714traceMsg(comp,"\t%s case so gen MVI to set target sign to 0x%x (from source sign 0x%x) and do %sgen OI because srcReg->knownOrAssumedSignIsZone() = %s\n",715isSetSign?"isSetSign=true":"srcReg->hasKnownSignCode",716signToSet,717isSetSign?sign:srcReg->getKnownSignCode(),718srcSignAlreadyZone?"not ":"",719srcSignAlreadyZone?"true":"false");720721TR_ASSERT(signToSet == TR::DataType::getZonedSeparatePlus() || signToSet == TR::DataType::getZonedSeparateMinus(),722"signToSet value should be 0x%x ('+') or 0x%x ('-') and not 0x%x\n", TR::DataType::getZonedSeparatePlus(), TR::DataType::getZonedSeparateMinus(), sign);723if (!srcSignAlreadyZone)724{725generateSIInstruction(cg, TR::InstOpCode::OI, node, srcSignCodeMR, TR::DataType::getZonedCode());726}727generateSIInstruction(cg, TR::InstOpCode::MVI, node, destSignCodeMR, (signToSet & 0xFF));728targetReg->setKnownSignCode(signToSet);729}730else if (srcReg->hasKnownCleanSign())731{732TR_ASSERT(TR::DataType::getZonedSeparatePlus() == 0x4E && TR::DataType::getZonedSeparateMinus() == 0x60, "zd2zdsxs sequence only works when plus sign is 0x4E and minus sign is 0x60\n");733TR::Register *tempReg1 = cg->allocateRegister(TR_GPR);734TR::Register *tempReg2 = cg->allocateRegister(TR_GPR);735736generateRXInstruction(cg, TR::InstOpCode::IC, node, tempReg1, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()));737738generateRIInstruction(cg, TR::InstOpCode::NILL, node, tempReg1, 0x10);739generateRSInstruction(cg, TR::InstOpCode::RLL, node, tempReg2, tempReg1, 29); // rotate right by 3 (32-3=29)740if (!targetReg->knownSignIsZone())741{742generateSIInstruction(cg, TR::InstOpCode::OI, node, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()), TR::DataType::getZonedCode());743}744generateRRInstruction(cg, TR::InstOpCode::OR, node, tempReg2, tempReg1);745generateRIInstruction(cg, TR::InstOpCode::AHI, node, tempReg2, 0x4E);746generateRXInstruction(cg, TR::InstOpCode::STC, node, tempReg2, destSignCodeMR);747cg->stopUsingRegister(tempReg1);748cg->stopUsingRegister(tempReg2);749targetReg->setHasKnownPreferredSign();750if (!isTruncation)751targetReg->setHasKnownCleanSign();752}753else754{755// DAA library assumes all BCD types are positive, unless an explicit negative sign code is present756TR::LabelSymbol * processSign = generateLabelSymbol(cg);757TR::LabelSymbol * processPositive = generateLabelSymbol(cg);758TR::LabelSymbol * processNegative = generateLabelSymbol(cg);759TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);760761generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSign);762processSign->setStartInternalControlFlow();763764// A negative sign code is represented by 0xB and 0xD (1011 and 1101 in binary). Due to the765// symmetry in the binary encoding of the negative sign codes we can get away with two bit766// mask tests to check if a sign code is negative:767//768// Step 1 : Test if bit 0 and bit 3 are set769// Step 2 : Test if there is exactly one bit set from bit 1 and bit 2770771// Step 1772generateSIInstruction(cg, TR::InstOpCode::TM, node, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()), 0x90);773774generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK12, node, processPositive);775776// Step 2777generateSIInstruction(cg, TR::InstOpCode::TM, node, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()), 0x60);778779generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK9, node, processPositive);780781// ----------------- Incoming branch -----------------782783generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processNegative);784785// Patch in the preferred negative sign code786generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390LeftAlignedMemoryReference(*destSignCodeMR, node, 0, cg, destSignCodeMR->getLeftMostByte()), TR::DataType::getZonedSeparateMinus());787788generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, cFlowRegionEnd);789790// ----------------- Incoming branch -----------------791792generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processPositive);793794// Patch in the preferred positive sign code795generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390LeftAlignedMemoryReference(*destSignCodeMR, node, 0, cg, destSignCodeMR->getLeftMostByte()), TR::DataType::getZonedSeparatePlus());796797// ----------------- Incoming branch -----------------798799generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd);800cFlowRegionEnd->setEndInternalControlFlow();801802// Clear the embedded sign code of the source803TR::Instruction* cursor = generateSIInstruction(cg, TR::InstOpCode::OI, node, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()), TR::DataType::getZonedCode());804805// Set up the proper register dependencies806TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);807808if (srcSignCodeMR->getIndexRegister())809dependencies->addPostCondition(srcSignCodeMR->getIndexRegister(), TR::RealRegister::AssignAny);810811if (srcSignCodeMR->getBaseRegister())812dependencies->addPostCondition(srcSignCodeMR->getBaseRegister(), TR::RealRegister::AssignAny);813814if (destSignCodeMR->getIndexRegister())815dependencies->addPostConditionIfNotAlreadyInserted(destSignCodeMR->getIndexRegister(), TR::RealRegister::AssignAny);816817if (destSignCodeMR->getBaseRegister())818dependencies->addPostConditionIfNotAlreadyInserted(destSignCodeMR->getBaseRegister(), TR::RealRegister::AssignAny);819820cursor->setDependencyConditions(dependencies);821822targetReg->setHasKnownPreferredSign();823}824}825826/**827* Handles pd2zdsls,pd2zdsts,pd2zdslsSetSign,pd2zdstsSetSign828*/829TR::Register *830J9::Z::TreeEvaluator::pd2zdslsEvaluator(TR::Node * node, TR::CodeGenerator * cg)831{832cg->traceBCDEntry("pd2zdsls",node);833cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),8341, TR::DebugCounter::Cheap);835TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());836TR::Node *child = node->getFirstChild();837TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);838childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);839TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);840TR::MemoryReference *destMR = packedToZonedHelper(node, targetReg, sourceMR, childReg, cg);841zonedToZonedSeparateSignHelper(node, childReg, targetReg, sourceMR, destMR, cg);842cg->decReferenceCount(child);843if (node->getOpCode().isSetSign())844cg->decReferenceCount(node->getSecondChild());845node->setRegister(targetReg);846cg->traceBCDExit("pd2zdsls",node);847return targetReg;848}849850void851J9::Z::TreeEvaluator::zonedSeparateSignToPackedOrZonedHelper(TR::Node *node, TR_PseudoRegister *targetReg, TR::MemoryReference *sourceMR, TR::MemoryReference *destMR, TR::CodeGenerator * cg)852{853TR_ASSERT( targetReg->isInitialized(),"targetRegister must be initialized before calling zonedSeparateSignToPackedOrZonedHelper\n");854TR::Node *srcNode = node->getFirstChild();855TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);856bool isTruncation = srcReg->getDecimalPrecision() > node->getDecimalPrecision();857bool isSrcTrailingSign = (srcNode->getDataType() == TR::ZonedDecimalSignTrailingSeparate);858int32_t sourceSignEndByte = isSrcTrailingSign ? TR::DataType::getZonedSignSize() : srcReg->getSize();859TR::Compilation *comp = cg->comp();860if (node->getOpCode().isSetSign())861{862TR::Node *signCodeNode = node->getSetSignValueNode();863TR_ASSERT( signCodeNode->getOpCode().isLoadConst(),"excepting zonedSeparateSignToPackedOrZonedHelper sign code to be a const\n");864int32_t sign = signCodeNode->get32bitIntegralValue();865if (sign == TR::DataType::getIgnoredSignCode())866{867// just check for an invalid sign but do not set anything in this case868if (cg->traceBCDCodeGen())869traceMsg(comp,"\tzonedSeparateSignToPackedOrZonedHelper %p : op %s, ignoredSetSign=true case, sign 0x%x\n",node,node->getOpCode().getName(),sign);870871TR::LabelSymbol * returnLabel = generateLabelSymbol(cg);872TR::LabelSymbol * callLabel = generateLabelSymbol(cg);873874TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);875TR::LabelSymbol * cflowRegionEnd = generateLabelSymbol(cg);876877TR::RegisterDependencyConditions * deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);878879if (sourceMR->getIndexRegister())880deps->addPostConditionIfNotAlreadyInserted(sourceMR->getIndexRegister(), TR::RealRegister::AssignAny);881if (sourceMR->getBaseRegister())882deps->addPostConditionIfNotAlreadyInserted(sourceMR->getBaseRegister(), TR::RealRegister::AssignAny);883884generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);885cFlowRegionStart->setStartInternalControlFlow();886887if (cg->traceBCDCodeGen())888traceMsg(comp,"\t\ttargetReg->isInit=%s, targetRegSize=%d, targetRegPrec=%d, srcRegSize=%d, srcRegPrec=%d, sourceSignEndByte=%d\n",889targetReg->isInitialized()?"yes":"no",targetReg->getSize(),targetReg->getDecimalPrecision(),srcReg->getSize(),srcReg->getDecimalPrecision(),sourceSignEndByte);890891generateSIInstruction(cg, TR::InstOpCode::CLI, node, generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSignEndByte), TR::DataType::getZonedSeparatePlus());892generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, cflowRegionEnd);893894895generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cflowRegionEnd, deps);896cflowRegionEnd->setEndInternalControlFlow();897898targetReg->transferSignState(srcReg, isTruncation);899}900else901{902if (cg->traceBCDCodeGen())903traceMsg(comp,"\tzonedSeparateSignToPackedOrZonedHelper %p : op %s, setSign=true case, sign 0x%x\n",node,node->getOpCode().getName(),sign);904cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), sign, targetReg, 0, false /* !numericNibbleIsZero */);905}906}907else908{909TR::LabelSymbol * checkMinusLabel = generateLabelSymbol(cg);910TR::LabelSymbol * returnLabel = generateLabelSymbol(cg);911TR::LabelSymbol * callLabel = generateLabelSymbol(cg);912913TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);914TR::LabelSymbol * cflowRegionEnd = generateLabelSymbol(cg);915916TR::RegisterDependencyConditions * deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg);917918if (sourceMR->getIndexRegister())919deps->addPostConditionIfNotAlreadyInserted(sourceMR->getIndexRegister(), TR::RealRegister::AssignAny);920if (sourceMR->getBaseRegister())921deps->addPostConditionIfNotAlreadyInserted(sourceMR->getBaseRegister(), TR::RealRegister::AssignAny);922923if (destMR->getIndexRegister())924deps->addPostConditionIfNotAlreadyInserted(destMR->getIndexRegister(), TR::RealRegister::AssignAny);925if (destMR->getBaseRegister())926deps->addPostConditionIfNotAlreadyInserted(destMR->getBaseRegister(), TR::RealRegister::AssignAny);927928929generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);930cFlowRegionStart->setStartInternalControlFlow();931932if (cg->traceBCDCodeGen())933traceMsg(comp,"\tzonedSeparateSignToPackedOrZonedHelper %p : op %s, targetReg->isInit=%s, targetRegSize=%d, targetRegPrec=%d, srcRegSize=%d, srcRegPrec=%d, sourceSignEndByte=%d\n",934node,node->getOpCode().getName(),targetReg->isInitialized()?"yes":"no",targetReg->getSize(),targetReg->getDecimalPrecision(),srcReg->getSize(),srcReg->getDecimalPrecision(),sourceSignEndByte);935936// DAA library assumes all BCD types are positive, unless an explicit negative sign code is present937generateSIInstruction(cg, TR::InstOpCode::CLI, node, generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSignEndByte), TR::DataType::getZonedSeparateMinus());938generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, checkMinusLabel);939940cg->genSignCodeSetting(node, NULL, targetReg->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), TR::DataType::getPreferredPlusCode(), targetReg, 0, false /* !numericNibbleIsZero */);941generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cflowRegionEnd);942943generateS390LabelInstruction(cg, TR::InstOpCode::label, node, checkMinusLabel);944945946cg->genSignCodeSetting(node, NULL, targetReg->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), TR::DataType::getPreferredMinusCode(), targetReg, 0, false /* !numericNibbleIsZero */);947948949950generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cflowRegionEnd, deps);951cflowRegionEnd->setEndInternalControlFlow();952953targetReg->setHasKnownPreferredSign();954}955}956957/**958* Handles zdsls2pd,zdsts2pd959*/960TR::Register *961J9::Z::TreeEvaluator::zdsls2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)962{963cg->traceBCDEntry("zdsls2pd",node);964cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),9651, TR::DebugCounter::Cheap);966TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());967TR::Node *child = node->getFirstChild();968TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);969childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);970TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);971TR::MemoryReference *destMR = zonedToPackedHelper(node, targetReg, sourceMR, childReg, cg);972targetReg->resetSignState(); // the conversion operation is not complete yet so reset any sign state transferred in the zonedToPackedHelper973// zonedToPackedHelper with a separate sign source will pack a zone code into the packed sign code position so set the zone value on the974// targetReg to improve the zonedSeparateSignToPackedOrZonedHelper code generation975targetReg->setTemporaryKnownSignCode(TR::DataType::getZonedValue());976zonedSeparateSignToPackedOrZonedHelper(node, targetReg, sourceMR, destMR, cg);977cg->decReferenceCount(child);978if (node->getOpCode().isSetSign())979cg->decReferenceCount(node->getSecondChild());980node->setRegister(targetReg);981cg->traceBCDExit("zdsls2pd",node);982return targetReg;983}984985/**986* Handles zdsls2zd,zdsts2zd987*/988TR::Register *989J9::Z::TreeEvaluator::zdsls2zdEvaluator(TR::Node * node, TR::CodeGenerator * cg)990{991cg->traceBCDEntry("zdsls2zd",node);992TR::Node *srcNode = node->getFirstChild();993TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);994995bool isSetSign = node->getOpCode().isSetSign();996int32_t sign = 0;997TR::Node *signCodeNode = NULL;998TR::Compilation *comp = cg->comp();999if (isSetSign)1000{1001signCodeNode = node->getSecondChild();1002TR_ASSERT( signCodeNode->getOpCode().isLoadConst(),"excepting zdsle2zdSetSign sign code to be a const\n");1003sign = signCodeNode->get32bitIntegralValue();1004}10051006bool isSrcTrailingSign = (srcNode->getDataType() == TR::ZonedDecimalSignTrailingSeparate);1007int32_t sourceOffset = 0;1008bool isTruncation = false;1009int32_t targetPrecision = srcReg->getDecimalPrecision();1010if (srcReg->getDecimalPrecision() > node->getDecimalPrecision()) // a truncation1011{1012isTruncation = true;1013sourceOffset = srcReg->getDecimalPrecision() - node->getDecimalPrecision(); // reach into the source by sourceOffset bytes to get the correct digits1014targetPrecision = node->getDecimalPrecision();1015}10161017bool isEffectiveNop = isZonedOperationAnEffectiveNop(node, 0, isTruncation, srcReg, isSetSign, sign, cg);1018TR_PseudoRegister *targetReg = NULL;1019TR::MemoryReference *sourceMR = NULL;1020TR::MemoryReference *destMR = NULL;1021if (isEffectiveNop)1022{1023targetReg = evaluateBCDSignModifyingOperand(node, isEffectiveNop, false, false, sourceMR, cg); // isNondestructiveNop=false,initTarget=false1024}1025else1026{1027targetReg = evaluateBCDValueModifyingOperand(node, false, sourceMR, cg); // initTarget=false1028sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);1029destMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);1030}10311032targetReg->setDecimalPrecision(targetPrecision);1033bool isInitialized = targetReg->isInitialized();1034if (cg->traceBCDCodeGen())1035traceMsg(comp,"\tzdsls2zdEvaluator %p : op %s, isInitialized=%s, targetRegSize=%d, targetRegPrec=%d, srcRegSize=%d, srcRegPrec=%d, isEffectiveNop=%s (isSetSign %s, sign 0x%x)\n",1036node,node->getOpCode().getName(),isInitialized?"yes":"no",1037targetReg->getSize(),targetReg->getDecimalPrecision(),srcReg->getSize(),srcReg->getDecimalPrecision(),isEffectiveNop?"yes":"no",isSetSign?"yes":"no",sign);10381039if (!isEffectiveNop)1040{1041if (!isInitialized)1042{1043int32_t mvcSize = targetReg->getDecimalPrecision();1044int32_t srcEndByte = isSrcTrailingSign ? srcReg->getSize() : srcReg->getSize() - TR::DataType::getZonedSignSize();1045if (cg->traceBCDCodeGen())1046traceMsg(comp,"\tisInit=false so gen MVC to init with size=%d and sourceOffset=%d, srcEndByte=%d\n",mvcSize,sourceOffset,srcEndByte);1047generateSS1Instruction(cg, TR::InstOpCode::MVC, node,1048mvcSize-1,1049generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),1050generateS390LeftAlignedMemoryReference(*sourceMR, node, sourceOffset, cg, srcEndByte));1051targetReg->transferDataState(srcReg);1052targetReg->setIsInitialized();1053}1054targetReg->setTemporaryKnownSignCode(TR::DataType::getZonedValue());1055if (isInitialized && isSrcTrailingSign)1056{1057destMR->addToTemporaryNegativeOffset(node, -TR::DataType::getZonedSignSize(), cg);1058}1059zonedSeparateSignToPackedOrZonedHelper(node, targetReg, sourceMR, destMR, cg);1060}10611062if (isSrcTrailingSign)1063{1064if (isEffectiveNop)1065{1066targetReg->addToRightAlignedIgnoredBytes(TR::DataType::getZonedSignSize());1067if (cg->traceBCDCodeGen())1068traceMsg(comp,"\tisSrcTrailingSign=true and isEffectiveNop=true (zdsls2zd) : increment targetReg %s ignoredBytes %d -> %d (by the TR::DataType::getZonedSignSize())\n",1069cg->getDebug()->getName(targetReg),targetReg->getRightAlignedIgnoredBytes() - TR::DataType::getZonedSignSize(),targetReg->getRightAlignedIgnoredBytes());1070}1071else if (isInitialized)1072{1073targetReg->addToRightAlignedDeadBytes(TR::DataType::getZonedSignSize());1074if (cg->traceBCDCodeGen())1075traceMsg(comp,"\tisSrcTrailingSign=true and isInitialized=true (zdsls2zd) : increment targetReg %s deadBytes %d -> %d (by the TR::DataType::getZonedSignSize())\n",1076cg->getDebug()->getName(targetReg),targetReg->getRightAlignedDeadBytes() - TR::DataType::getZonedSignSize(),targetReg->getRightAlignedDeadBytes());1077}1078}10791080cg->decReferenceCount(srcNode);1081if (node->getOpCode().isSetSign())1082cg->decReferenceCount(node->getSecondChild());1083node->setRegister(targetReg);1084cg->traceBCDExit("zdsls2zd",node);1085return targetReg;1086}10871088/**1089* Handles zd2zdsls,zd2zdsts1090*/1091TR::Register *1092J9::Z::TreeEvaluator::zd2zdslsEvaluator(TR::Node * node, TR::CodeGenerator * cg)1093{1094cg->traceBCDEntry("zd2zdsls",node);1095TR::Compilation *comp = cg->comp();1096TR::Node *srcNode = node->getFirstChild();1097TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);10981099TR_StorageReference *srcStorageReference = srcReg->getStorageReference();1100TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcStorageReference, cg);11011102TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, false, sourceMR, cg); // initTarget=false1103TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);11041105bool isTrailingSign = (node->getDataType() == TR::ZonedDecimalSignTrailingSeparate);11061107if (cg->traceBCDCodeGen())1108traceMsg(comp,"\tzd2zdslsEvaluator %p : op %s, targetReg->isInit=%s, targetRegSize=%d, targetRegPrec=%d\n",1109node,node->getOpCode().getName(),targetReg->isInitialized()?"yes":"no",targetReg->getSize(),targetReg->getDecimalPrecision());11101111bool isTruncation = node->getDecimalPrecision() < srcReg->getDecimalPrecision();1112TR_ASSERT( !isTruncation,"a zd2zdsxs operation should not truncate\n");11131114if (cg->traceBCDCodeGen())1115traceMsg(comp,"\tset targetReg->prec to srcReg->prec %d\n",srcReg->getDecimalPrecision());1116targetReg->setDecimalPrecision(srcReg->getDecimalPrecision());11171118// the (targetReg->isInitialized() && isTrailingSign) case below is needed to move the initialized data left by 1 byte to make room for the trailing separate sign code1119if (!targetReg->isInitialized() || (targetReg->isInitialized() && isTrailingSign))1120{1121int32_t mvcSize = srcReg->getSize();1122if (cg->traceBCDCodeGen())1123traceMsg(comp,"\t%s so gen MVC to init with size %d\n",!targetReg->isInitialized()?"isInit=false":"isInit=true and isTrailingSign=true", mvcSize);1124generateSS1Instruction(cg, TR::InstOpCode::MVC, node,1125mvcSize-1,1126generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, isTrailingSign ? srcReg->getSize() + TR::DataType::getZonedSignSize() : srcReg->getSize()),1127generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));1128targetReg->setIsInitialized();1129}11301131zonedToZonedSeparateSignHelper(node, srcReg, targetReg, sourceMR, destMR, cg);11321133cg->decReferenceCount(srcNode);1134if (node->getOpCode().isSetSign())1135cg->decReferenceCount(node->getSecondChild());1136node->setRegister(targetReg);1137cg->traceBCDExit("zd2zdsls",node);1138return targetReg;1139}11401141/**1142* Handles zdsle2zd,zd2zdsle1143*/1144TR::Register *1145J9::Z::TreeEvaluator::zdsle2zdEvaluator(TR::Node * node, TR::CodeGenerator * cg)1146{1147cg->traceBCDEntry("zdsle2zd",node);1148TR::Node *srcNode = node->getFirstChild();1149TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);11501151bool isSetSign = node->getOpCode().isSetSign();1152int32_t sign = 0;1153TR::Node *signCodeNode = NULL;1154TR::Compilation *comp = cg->comp();1155if (isSetSign)1156{1157signCodeNode = node->getSecondChild();1158TR_ASSERT(signCodeNode->getOpCode().isLoadConst(),"excepting zdsle2zdSetSign sign code to be a const\n");1159sign = signCodeNode->get32bitIntegralValue();1160}1161bool isTrailingDst = node->getDataType() == TR::ZonedDecimal;1162bool isLeadingDst = !isTrailingDst;1163bool isTrailingSrc = srcNode->getDataType() == TR::ZonedDecimal;1164bool isLeadingSrc = !isTrailingSrc;11651166bool isTruncation = false;1167int32_t digitsToClear = 0;1168if (node->getDecimalPrecision() < srcReg->getDecimalPrecision())1169isTruncation = true;1170else if (node->getDecimalPrecision() > srcReg->getDecimalPrecision())1171digitsToClear = node->getDecimalPrecision()-srcReg->getDecimalPrecision();11721173bool isEffectiveNop = isZonedOperationAnEffectiveNop(node, 0, isTruncation, srcReg, isSetSign, sign, cg);1174bool isNondestructiveNop = isEffectiveNop && !isTruncation;1175bool doWidening = true;11761177if (cg->traceBCDCodeGen())1178traceMsg(comp,"\tzdsle2zdEvaluator %p : op %s, isEffectiveNop=%s, isTruncation=%s, srcSignIsZone=%s, srcReg->getSize()=%d, (isSetSign=%s, sign 0x%x)\n",1179node,node->getOpCode().getName(),isEffectiveNop?"yes":"no",isTruncation?"yes":"no",srcReg->knownOrAssumedSignIsZone()?"yes":"no",srcReg->getSize(),isSetSign?"yes":"no",sign);11801181TR::MemoryReference *sourceMR = NULL;1182TR_PseudoRegister *targetReg = NULL;1183if (!isEffectiveNop &&1184isLeadingDst && // only do for leading sign so the sign code doesn't have to be moved again later1185doWidening &&1186digitsToClear > 0)1187{1188sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);1189targetReg = evaluateBCDValueModifyingOperand(node, true, sourceMR, cg); // initTarget=true1190if (cg->traceBCDCodeGen())1191traceMsg(comp,"\tperform an explicit widening (digitsToClear=%d, doWidening=yes, isEffectiveNop=no) set targetReg->prec to node->prec %d\n",digitsToClear,node->getDecimalPrecision());1192targetReg->setDecimalPrecision(node->getDecimalPrecision());1193}1194else1195{1196if (!isEffectiveNop)1197sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);1198targetReg = evaluateBCDSignModifyingOperand(node, isEffectiveNop, isNondestructiveNop, true /*initTarget*/, sourceMR, cg);1199int32_t targetPrecision = isTruncation ? node->getDecimalPrecision() : srcReg->getDecimalPrecision();1200if (cg->traceBCDCodeGen())1201traceMsg(comp,"\tdo not perform an explicit widening (set digitsToClear=%d->0, doWidening=%s, isEffectiveNop=%s) set targetReg->prec to %d\n",1202digitsToClear,doWidening?"yes":"no",isEffectiveNop ?"yes":"no",targetPrecision);1203digitsToClear = 0;1204targetReg->setDecimalPrecision(targetPrecision);1205}12061207if (!isEffectiveNop)1208{1209TR::MemoryReference *destMR = isTrailingDst ? generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg) :1210generateS390LeftAlignedMemoryReference(node, targetReg->getStorageReference(), cg, targetReg->getSize());1211int32_t clearLeftMostByte = targetReg->getSize();1212if (isSetSign)1213{1214if (sign == TR::DataType::getIgnoredSignCode())1215{1216if (cg->traceBCDCodeGen()) traceMsg(comp,"\tisSetSign=true with ignored sign=0x%x\n",sign);1217if (isTrailingDst) // zdsle2zd1218{1219if (srcReg->getSize() == 1)1220targetReg->transferSignState(srcReg, isTruncation);1221else1222targetReg->setKnownSignCode(TR::DataType::getZonedValue());1223}1224else // zd2zdsle1225{1226if (targetReg->getSize() == 1)1227targetReg->transferSignState(srcReg, isTruncation);1228else if (targetReg->getSize() > srcReg->getSize()) // a widening in the leadingDst and ignored case leaves a bad sign code1229targetReg->setHasKnownBadSignCode();1230else1231targetReg->setKnownSignCode(TR::DataType::getZonedValue());1232}1233}1234else1235{1236if (cg->traceBCDCodeGen()) traceMsg(comp,"\tisSetSign=true : call genSignCodeSetting with sign=0x%x\n",sign);1237bool numericNibbleIsZero = false;1238if (isTrailingDst) // zdsle2zd1239{1240// bytes above the leftmost one have a top nibble of 0xf so use this knowledge to improve the sign code setting1241if (srcReg->getSize() == 1)1242targetReg->transferSignState(srcReg, isTruncation);1243else1244targetReg->setTemporaryKnownSignCode(TR::DataType::getZonedValue());1245}1246else // zd2zdsle1247{1248// when not performing an explicit widening then the bytes above the first one have a top nibble of 0xf so use this knowledge to improve the sign code setting1249if (targetReg->getSize() == 1)1250targetReg->transferSignState(srcReg, isTruncation);1251else if (targetReg->getSize() <= srcReg->getSize())1252targetReg->setTemporaryKnownSignCode(TR::DataType::getZonedValue());12531254if (digitsToClear > 0)1255{1256numericNibbleIsZero = true;1257digitsToClear--;1258clearLeftMostByte--;1259}1260}1261int32_t digitsCleared = cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), destMR, sign, targetReg, 0, numericNibbleIsZero);1262TR_ASSERT(!numericNibbleIsZero || digitsCleared == 1,"the sign code setting should have also cleared 1 digit (digitsCleared = %d)\n",digitsCleared);1263}1264}12651266if (digitsToClear > 0)1267{1268cg->genZeroLeftMostZonedBytes(node, targetReg, clearLeftMostByte, digitsToClear, destMR);1269}12701271if (!isSetSign)1272{1273if (cg->traceBCDCodeGen()) traceMsg(comp,"\tisSetSign=false : generate MVZ of size 1 to transfer left aligned zdsle sign to right aligned zd sign position\n");12741275sourceMR = isTrailingSrc ? reuseS390RightAlignedMemoryReference(sourceMR, srcNode, srcReg->getStorageReference(), cg) :1276reuseS390LeftAlignedMemoryReference(sourceMR, srcNode, srcReg->getStorageReference(), cg, srcReg->getSize());1277destMR = isTrailingDst ? reuseS390RightAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg) :1278reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, targetReg->getSize());1279int32_t mvzSize = 1;1280generateSS1Instruction(cg, TR::InstOpCode::MVZ, node,1281mvzSize-1,1282destMR,1283sourceMR);1284targetReg->transferSignState(srcReg, isTruncation);1285}12861287bool srcSignWillBeIgnored = false;1288bool srcSignResetRedundant = srcReg->knownOrAssumedSignIsZone() || (isLeadingSrc && isTruncation);1289bool srcSignResetIllegal = targetReg->getSize() == 1;12901291if (cg->traceBCDCodeGen())1292traceMsg(comp,"\tcheck before resetting srcSignCode: srcSignWillBeIgnored %s, srcSignResetRedundant %s, srcSignResetIllegal %s\n",1293srcSignWillBeIgnored?"yes":"no",srcSignResetRedundant?"yes":"no",srcSignResetIllegal?"yes":"no");1294if (!(srcSignWillBeIgnored || srcSignResetRedundant || srcSignResetIllegal))1295{1296{1297if (cg->traceBCDCodeGen()) traceMsg(comp,"\tgenerate OI 0xF0 to force %s-aligned high nibble to 0xF\n",isTrailingSrc?"right":"left");1298generateSIInstruction(cg, TR::InstOpCode::OI, node, generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, isTrailingSrc ? 1 : targetReg->getSize()), TR::DataType::getZonedCode());1299}1300}1301targetReg->setIsInitialized();1302}13031304cg->decReferenceCount(srcNode);1305if (isSetSign)1306cg->decReferenceCount(signCodeNode);1307node->setRegister(targetReg);1308cg->traceBCDExit("zdsle2zd",node);1309return targetReg;1310}13111312TR::MemoryReference *1313J9::Z::TreeEvaluator::zonedToPackedHelper(TR::Node *node, TR_PseudoRegister *targetReg, TR::MemoryReference *sourceMR, TR_PseudoRegister *childReg, TR::CodeGenerator * cg)1314{1315TR::Node *child = node->getFirstChild();1316TR_StorageReference *hint = node->getStorageReferenceHint();1317TR_StorageReference *targetStorageReference = NULL;1318int32_t destPrecision = 0;1319int32_t destSize = 0;1320TR::Compilation *comp = cg->comp();1321if (hint)1322{1323TR_ASSERT( !childReg->isInitialized() || hint != childReg->getStorageReference(),"bcd conversion operands will overlap\n");1324destSize = hint->getSymbolSize(); // may be larger than the node->getSize() so take this opportunity to widen as part of the PACK1325destPrecision = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize); // may be larger than the node->getSize() so take this opportunity to widen as part of the PACK1326targetStorageReference = hint;1327}1328else1329{1330destSize = node->getSize();1331destPrecision = node->getDecimalPrecision();1332targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, comp);1333}13341335targetReg->setStorageReference(targetStorageReference, node);13361337int32_t sourcePrecision = childReg->getDecimalPrecision();1338bool isTruncation = false;1339int32_t sourceOffsetForLeftAlignment = 0;13401341if (cg->traceBCDCodeGen())1342traceMsg(comp,"\tzonedToPackedHelper %p : op %s, destPrecision %d, destSize %d, sourcePrecision %d, sourceSize %d\n",1343node,node->getOpCode().getName(),destPrecision,destSize,sourcePrecision,childReg->getSize());13441345if (node->getDecimalPrecision() < sourcePrecision)1346{1347if (cg->traceBCDCodeGen())1348traceMsg(comp,"\tnodePrec <= sourcePrecision (%d <= %d) so set sourcePrecision=nodePrec=%d,isTruncation=true,sourceOffsetForLeftAlignment=%d\n",1349node->getDecimalPrecision(),sourcePrecision,node->getDecimalPrecision(),sourcePrecision - node->getDecimalPrecision());1350sourceOffsetForLeftAlignment = sourcePrecision - node->getDecimalPrecision();1351sourcePrecision = node->getDecimalPrecision();1352isTruncation = true;1353}13541355TR::MemoryReference *destMR = NULL;1356if (destSize > 16)1357{1358if (cg->traceBCDCodeGen())1359traceMsg(comp,"\tdestSize %d > 16 so reduce destSize to 16 and destPrecision to 31 for PACK encoding and clear top %d byte(s)\n",destSize,(destSize-16));1360destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);1361cg->genZeroLeftMostPackedDigits(node, targetReg, destSize, (destSize-16)*2, destMR);1362destSize = 16;1363destPrecision = 31;1364}13651366if (cg->traceBCDCodeGen())1367traceMsg(comp,"\tsetting targetReg->prec to sourcePrecision %d\n",sourcePrecision);1368targetReg->setDecimalPrecision(sourcePrecision);13691370// skip over trailing sign for the unpack1371bool isSrcTrailingSign = (child->getDataType() == TR::ZonedDecimalSignTrailingSeparate);1372int32_t sourceEndByte = isSrcTrailingSign ? sourcePrecision + TR::DataType::getZonedSignSize() :1373sourcePrecision;13741375if (sourcePrecision <= 16)1376{1377if (cg->traceBCDCodeGen())1378traceMsg(comp,"\tsourcePrecision %d <= 16 so generate a single PACK destSize %d, sourcePrecision %d, sourceEndByte %d\n",sourcePrecision,destSize,sourcePrecision,sourceEndByte);1379destMR = reuseS390RightAlignedMemoryReference(destMR, node, targetStorageReference, cg);1380generateSS2Instruction(cg, TR::InstOpCode::PACK, node,1381destSize-1,1382destMR,1383sourcePrecision-1,1384generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceEndByte));1385int32_t destSizeAsCeilingPrecision = TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(destSize);1386if (destSizeAsCeilingPrecision > sourcePrecision)1387targetReg->addRangeOfZeroDigits(sourcePrecision, destSizeAsCeilingPrecision);1388}1389else if (sourcePrecision >= 17 && sourcePrecision <= 31)1390{1391if (cg->traceBCDCodeGen())1392{1393if (sourcePrecision >= 17 && sourcePrecision <= 30)1394traceMsg(comp,"\tsourcePrecision 17 <= %d <= 30 so generate two PACKs with sourceEndByte %d\n",sourcePrecision,sourceEndByte);1395else1396traceMsg(comp,"\tsourcePrecision == 31 so generate three PACKs with sourceEndByte %d\n",sourceEndByte);1397}1398bool needsThirdPack = false;1399if (sourcePrecision == 31)1400{1401sourcePrecision = 29; // The first two PACKs for the sourcePrecision=31 case are the same as for the sourcePrecision=29 case1402destPrecision = 29;1403needsThirdPack = true;1404if (cg->traceBCDCodeGen())1405traceMsg(comp,"\tsourcePrecision == 31 so reduce sourcePrecision and destPrecision to 29 and update sourceEndByte to %d\n",sourceEndByte);1406}14071408if (cg->traceBCDCodeGen())1409traceMsg(comp,"x^x : found large packed/zoned conv -- node %s (%p) prec %d, child %s (%p) prec %d (three=%s)\n",1410node->getOpCode().getName(),node,destPrecision,1411child->getOpCode().getName(),child,sourcePrecision,needsThirdPack?"yes":"no");14121413destMR = reuseS390LeftAlignedMemoryReference(destMR, node, targetStorageReference, cg, destSize);1414sourceMR = generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceEndByte);1415int32_t pack1SourceSize = sourcePrecision-14;1416int32_t pack1DestSize = TR::DataType::getSizeFromBCDPrecision(node->getDataType(), destPrecision-14);1417if (cg->traceBCDCodeGen())1418traceMsg(comp,"\t\t1st PACK destSize=%d,srcSize=%d\n",pack1DestSize,pack1SourceSize);1419generateSS2Instruction(cg, TR::InstOpCode::PACK, node,1420pack1DestSize-1,1421destMR,1422pack1SourceSize-1,1423sourceMR);1424int32_t pack1DestSizeAsPrecision = TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(pack1DestSize);1425if (pack1DestSizeAsPrecision > pack1SourceSize)1426{1427int32_t rightMostDigits = (destSize-pack1DestSize)*2;1428targetReg->addRangeOfZeroDigits(pack1SourceSize+rightMostDigits, pack1DestSizeAsPrecision+rightMostDigits);1429}1430int32_t pack2SourceSize = 15;1431int32_t pack2SourceOffset = pack1SourceSize-1;1432int32_t pack2DestSize = TR::DataType::getSizeFromBCDPrecision(node->getDataType(), pack2SourceSize);1433int32_t pack2DestOffset = pack1DestSize-1;1434if (cg->traceBCDCodeGen())1435traceMsg(comp,"\t\t2nd PACK destSize=%d,destOffset=%d, srcSize=%d,srcOffset=%d\n",pack2DestSize,pack2DestOffset,pack2SourceSize,pack2SourceOffset);1436generateSS2Instruction(cg, TR::InstOpCode::PACK, node,1437pack2DestSize-1,1438generateS390LeftAlignedMemoryReference(*destMR, node, pack2DestOffset, cg, destMR->getLeftMostByte()),1439pack2SourceSize-1,1440generateS390LeftAlignedMemoryReference(*sourceMR, node, pack2SourceOffset, cg, sourceMR->getLeftMostByte()));1441if (needsThirdPack)1442{1443int32_t pack3SourceSize = 3;1444int32_t pack3SourceOffset = pack2SourceOffset+(pack2SourceSize-1);1445int32_t pack3DestSize = TR::DataType::getSizeFromBCDPrecision(node->getDataType(), pack3SourceSize);1446int32_t pack3DestOffset = pack2DestOffset+(pack2DestSize-1);1447if (cg->traceBCDCodeGen())1448traceMsg(comp,"\t\t3rd PACK destSize=%d,destOffset=%d, srcSize=%d,srcOffset=%d\n",pack3DestSize,pack3DestOffset,pack3SourceSize,pack3SourceOffset);1449generateSS2Instruction(cg, TR::InstOpCode::PACK, node,1450pack3DestSize-1,1451generateS390LeftAlignedMemoryReference(*destMR, node, pack3DestOffset, cg, destMR->getLeftMostByte()),1452pack3SourceSize-1,1453generateS390LeftAlignedMemoryReference(*sourceMR, node, pack3SourceOffset, cg, sourceMR->getLeftMostByte()));1454}1455}1456else1457{1458TR_ASSERT(false,"zd2pd unexpected sourcePrecision %d\n",sourcePrecision);1459}14601461TR::Register* signCode = cg->allocateRegister();1462TR::Register* signCode4Bit = cg->allocateRegister();14631464TR::LabelSymbol * processSign = generateLabelSymbol(cg);1465TR::LabelSymbol * processSignEnd = generateLabelSymbol(cg);1466TR::LabelSymbol * processNegative = generateLabelSymbol(cg);1467TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);14681469generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSign);1470processSign->setStartInternalControlFlow();14711472// Load the sign byte of the Packed Decimal from memory1473generateRXInstruction(cg, TR::InstOpCode::LLC, node, signCode, generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, 1));14741475generateRRInstruction(cg, TR::InstOpCode::LR, node, signCode4Bit, signCode);14761477// Clear most significant 4 bits1478generateRIInstruction(cg, TR::InstOpCode::NILL, node, signCode4Bit, 0x000F);14791480// Compare the sign byte against the preferred negative sign code1481generateRIInstruction(cg, TR::InstOpCode::CHI, node, signCode4Bit, TR::DataType::getPreferredMinusCode());14821483// Branch if equal1484generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, cFlowRegionEnd);14851486// Clear least significant 4 bits1487generateRIInstruction(cg, TR::InstOpCode::NILL, node, signCode, 0x00F0);14881489// Compare the sign byte against the alternative negative sign code1490generateRIInstruction(cg, TR::InstOpCode::CHI, node, signCode4Bit, TR::DataType::getAlternateMinusCode());14911492// Branch if equal1493generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, processNegative);14941495// Patch in the preferred positive sign code1496generateRIInstruction(cg, TR::InstOpCode::OILL, node, signCode, TR::DataType::getPreferredPlusCode());14971498generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processSignEnd);14991500// ----------------- Incoming branch -----------------15011502generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processNegative);15031504// Patch in the preferred negative sign code1505generateRIInstruction(cg, TR::InstOpCode::OILL, node, signCode, TR::DataType::getPreferredMinusCode());15061507// ----------------- Incoming branch -----------------15081509generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSignEnd);15101511generateRXInstruction(cg, TR::InstOpCode::STC, node, signCode, generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, 1));15121513// Set up the proper register dependencies1514TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);15151516dependencies->addPostCondition(signCode, TR::RealRegister::AssignAny);1517dependencies->addPostCondition(signCode4Bit, TR::RealRegister::AssignAny);15181519if (destMR->getIndexRegister())1520dependencies->addPostCondition(destMR->getIndexRegister(), TR::RealRegister::AssignAny);15211522if (destMR->getBaseRegister())1523dependencies->addPostCondition(destMR->getBaseRegister(), TR::RealRegister::AssignAny);15241525generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);1526cFlowRegionEnd->setEndInternalControlFlow();15271528// Cleanup registers before returning1529cg->stopUsingRegister(signCode);1530cg->stopUsingRegister(signCode4Bit);15311532targetReg->transferSignState(childReg, isTruncation);1533targetReg->transferDataState(childReg);1534targetReg->setIsInitialized();1535node->setRegister(targetReg);1536return destMR;1537}15381539TR::Register *1540J9::Z::TreeEvaluator::zd2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)1541{1542cg->traceBCDEntry("zd2pd",node);1543TR::Register* targetReg = NULL;15441545static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");1546if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)1547{1548targetReg = zd2pdVectorEvaluatorHelper(node, cg);1549}1550else1551{1552targetReg = cg->allocatePseudoRegister(node->getDataType());1553TR::Node *child = node->getFirstChild();1554TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);1555childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);1556TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);1557zonedToPackedHelper(node, static_cast<TR_PseudoRegister*>(targetReg), sourceMR, childReg, cg);1558cg->decReferenceCount(child);1559node->setRegister(targetReg);1560}15611562cg->traceBCDExit("zd2pd",node);1563return targetReg;1564}15651566/**1567* 1. Get zd value by evaluating child node. It's in zdNode's PseudoRegister1568* 2. Get the memory reference from the pseudo register.1569* 3. Allocate Vector register to return1570* 4. get size of the node( node->getsize)1571* 5. generateVSI instruction using the information above.1572* 6. attach Vector register to the node.1573* 7. decReference BCD node for the child/1574* 8. return targetRegister.1575*/1576TR::Register *1577J9::Z::TreeEvaluator::zd2pdVectorEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)1578{1579TR::Register *targetReg = NULL;15801581TR::Node *child = node->getFirstChild();1582TR_PseudoRegister *sourceReg = cg->evaluateBCDNode(child);1583sourceReg = cg->privatizeBCDRegisterIfNeeded(node, child, sourceReg);1584TR::MemoryReference *sourceMR = generateS390LeftAlignedMemoryReference(child, sourceReg->getStorageReference(), cg, child->getDecimalPrecision());1585targetReg = cg->allocateRegister(TR_VRF);1586int32_t destPrecision = std::min(node->getDecimalPrecision(), child->getDecimalPrecision());1587generateVSIInstruction(cg, TR::InstOpCode::VPKZ, node, targetReg, sourceMR, destPrecision - 1);15881589node->setRegister(targetReg);1590cg->decReferenceCount(child);1591return targetReg;1592}15931594/**1595* \brief Check the sign of zd after pd2zd conversion.1596*1597* The UNPK instruction does not validate the digits nor the sign of the packed decimal.1598* We need to check the sign of PD and set ZD signs properly: use 0xc for positive, and 0xd for negative numbers.1599*1600*/1601void1602J9::Z::TreeEvaluator::pd2zdSignFixup(TR::Node *node,1603TR::MemoryReference *destMR,1604TR::CodeGenerator * cg,1605bool useLeftAlignedMR)1606{1607TR::Register* signCode = cg->allocateRegister();1608TR::Register* signCode4Bit = cg->allocateRegister();16091610TR::LabelSymbol * processSign = generateLabelSymbol(cg);1611TR::LabelSymbol * processSignEnd = generateLabelSymbol(cg);1612TR::LabelSymbol * processNegative = generateLabelSymbol(cg);1613TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);16141615generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSign);1616processSign->setStartInternalControlFlow();16171618TR::MemoryReference* signByteMR = NULL;1619if (useLeftAlignedMR)1620signByteMR = generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, 1);1621else1622signByteMR = generateS390MemoryReference(*destMR, (node->getSecondChild())->getDecimalPrecision() - 1, cg);16231624// Load the sign byte of the Zoned Decimal from memory1625generateRXInstruction(cg, TR::InstOpCode::LLC, node, signCode, signByteMR);16261627generateRRInstruction(cg, TR::InstOpCode::LR, node, signCode4Bit, signCode);16281629// Clear least significant 4 bits1630generateRIInstruction(cg, TR::InstOpCode::NILL, node, signCode4Bit, 0x00F0);16311632// Compare the sign byte against the preferred negative sign code1633generateRIInstruction(cg, TR::InstOpCode::CHI, node, signCode4Bit, TR::DataType::getPreferredMinusCode() << 4);16341635// Branch if equal1636generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, cFlowRegionEnd);16371638// Clear most significant 4 bits1639generateRIInstruction(cg, TR::InstOpCode::NILL, node, signCode, 0x000F);16401641// Compare the sign byte against the alternative negative sign code1642generateRIInstruction(cg, TR::InstOpCode::CHI, node, signCode4Bit, TR::DataType::getAlternateMinusCode() << 4);16431644// Branch if equal1645generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, processNegative);16461647// Patch in the preferred positive sign code1648generateRIInstruction(cg, TR::InstOpCode::OILL, node, signCode, TR::DataType::getPreferredPlusCode() << 4);16491650generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processSignEnd);16511652// ----------------- Incoming branch -----------------16531654generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processNegative);16551656// Patch in the preferred negative sign code1657generateRIInstruction(cg, TR::InstOpCode::OILL, node, signCode, TR::DataType::getPreferredMinusCode() << 4);16581659// ----------------- Incoming branch -----------------16601661generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSignEnd);16621663generateRXInstruction(cg, TR::InstOpCode::STC, node, signCode, generateS390MemoryReference(*signByteMR, 0, cg));16641665// Set up the proper register dependencies1666TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);16671668dependencies->addPostCondition(signCode, TR::RealRegister::AssignAny);1669dependencies->addPostCondition(signCode4Bit, TR::RealRegister::AssignAny);16701671if (destMR->getIndexRegister())1672dependencies->addPostCondition(destMR->getIndexRegister(), TR::RealRegister::AssignAny);16731674if (destMR->getBaseRegister())1675dependencies->addPostCondition(destMR->getBaseRegister(), TR::RealRegister::AssignAny);16761677generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);1678cFlowRegionEnd->setEndInternalControlFlow();16791680// Cleanup registers before returning1681cg->stopUsingRegister(signCode);1682cg->stopUsingRegister(signCode4Bit);1683}16841685TR::MemoryReference *1686J9::Z::TreeEvaluator::packedToZonedHelper(TR::Node *node, TR_PseudoRegister *targetReg, TR::MemoryReference *sourceMR, TR_PseudoRegister *childReg, TR::CodeGenerator * cg)1687{1688TR::Node *child = node->getFirstChild();1689TR::Compilation *comp = cg->comp();16901691TR_StorageReference *hint = node->getStorageReferenceHint();1692TR_StorageReference *targetStorageReference = NULL;1693int32_t destSize = 0;1694if (hint)1695{1696TR_ASSERT( !childReg->isInitialized() || hint != childReg->getStorageReference(),"bcd conversion operands will overlap\n");1697destSize = hint->getSymbolSize(); // may be larger than the node->getSize() so take this opportunity to widen as part of the UNPK1698targetStorageReference = hint;1699}1700else1701{1702destSize = node->getSize();1703targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, comp);1704}17051706targetReg->setStorageReference(targetStorageReference, node);17071708int32_t destPrecision = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize);1709// int32_t destPrecision = destSize;1710targetReg->setDecimalPrecision(destPrecision);1711int32_t sourcePrecision = childReg->getDecimalPrecision();1712int32_t sourceSize = childReg->getSize();17131714// skip over trailing sign for the unpack1715bool isDestTrailingSign = (node->getDataType() == TR::ZonedDecimalSignTrailingSeparate);1716int32_t destEndByte = isDestTrailingSign ? destPrecision + TR::DataType::getZonedSignSize() :1717destPrecision;17181719if (cg->traceBCDCodeGen())1720traceMsg(comp,"\tpackedToZonedHelper %p : op %s, destPrecision %d, destSize %d, destEndByte %d, sourcePrecision %d, sourceSize %d\n",1721node,node->getOpCode().getName(),destPrecision,destSize,destEndByte,sourcePrecision,childReg->getSize());17221723bool isTruncation = false;1724if (destPrecision < childReg->getDecimalPrecision())1725{1726isTruncation = true;1727sourcePrecision = destPrecision;1728sourceSize = TR::DataType::getSizeFromBCDPrecision(child->getDataType(), sourcePrecision);17291730if (cg->traceBCDCodeGen())1731traceMsg(comp,"\tisTruncation=true (dstPrec %d < srcPrec %d) reduce srcPrec %d->%d, srcSize %d->%d\n",1732destPrecision,childReg->getDecimalPrecision(),childReg->getDecimalPrecision(),sourcePrecision,childReg->getSize(),sourceSize);1733}17341735TR::Node *paddingAnchor = NULL;1736bool evaluatedPaddingAnchor = false;1737TR::MemoryReference *destMR = NULL;1738if (destPrecision <= 16 || sourcePrecision <= 16)1739{1740int32_t unpkDestOffset = 0;1741int32_t unpkDestSize = destPrecision;1742int32_t unpkSourceSize = sourceSize;1743destMR = generateS390LeftAlignedMemoryReference(node, targetStorageReference, cg, destEndByte);17441745if (destPrecision > 16)1746{1747int32_t bytesToSet = destPrecision-sourcePrecision;1748if (cg->traceBCDCodeGen())1749traceMsg(comp,"\tdestPrecision %d > 16, sourcePrecision %d <= 16 gen %d leftmost bytes of 0xF0\n",destPrecision,sourcePrecision,bytesToSet);1750TR_ASSERT(bytesToSet > 0,"destPrecision (%d) should be > sourcePrecision (%d)\n",destPrecision,sourcePrecision);1751cg->genZeroLeftMostZonedBytes(node, targetReg, destEndByte, bytesToSet, destMR);1752evaluatedPaddingAnchor = true;1753if (cg->traceBCDCodeGen())1754traceMsg(comp,"\treduce unpkDestOffset %d->%d and unpkDestSize %d->%d\n",unpkDestOffset,bytesToSet,unpkDestSize,sourcePrecision);1755unpkDestOffset = bytesToSet;1756unpkDestSize = sourcePrecision;1757}17581759if (cg->traceBCDCodeGen())1760traceMsg(comp,"\tdestPrecision %d <= 16 or sourcePrecision %d <= 16 so generate a single UNPK destPrecision %d, destOffset %d, unpkSourceSize %d\n",1761destPrecision,sourcePrecision,unpkDestSize,unpkDestOffset,unpkSourceSize);1762generateSS2Instruction(cg, TR::InstOpCode::UNPK, node,1763unpkDestSize-1,1764generateS390LeftAlignedMemoryReference(*destMR, node, unpkDestOffset, cg, destMR->getLeftMostByte()),1765unpkSourceSize-1,1766generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));1767if (unpkDestSize > sourcePrecision)1768{1769if (cg->traceBCDCodeGen())1770traceMsg(comp,"\tunpkDestSize %d > sourcePrecision %d adding range of zero digits for pd2zd op\n",unpkDestSize,sourcePrecision);1771targetReg->addRangeOfZeroDigits(sourcePrecision, unpkDestSize);1772}1773}1774else1775{1776TR_ASSERT(destPrecision <= 31,"pd2zd destPrecision should be <= 31 and not %d\n",destPrecision);1777TR_ASSERT(sourcePrecision <= 31,"pd2zd sourcePrecision should be <= 31 and not %d\n",sourcePrecision);1778if (cg->traceBCDCodeGen())1779{1780if (sourcePrecision >= 17 && sourcePrecision <= 30)1781traceMsg(comp,"\tsourcePrecision 17 <= %d <= 30 so generate two UNPKs\n",sourcePrecision);1782else1783traceMsg(comp,"\tsourcePrecision == 31 so generate three UNPKs\n");1784}1785bool needsThirdUnpk = false;1786int32_t precisionAdjustment = 14;1787if (sourcePrecision == 31)1788{1789precisionAdjustment=16;1790needsThirdUnpk = true;1791}1792else1793{1794// in this case can do the conversion in 2 UNPKs instead of 3. Keep the target precision up to 30 bytes to widen extra bytes.1795if (cg->traceBCDCodeGen())1796traceMsg(comp,"\tsourcePrecision < 31 (%d) so reduce destPrecision to min(destPrecision,30) = min(%d,30) = %d ",1797sourcePrecision,destPrecision,std::min(destPrecision,30));1798destPrecision = std::min(destPrecision, 30);1799destEndByte = isDestTrailingSign ? destPrecision + TR::DataType::getZonedSignSize() :1800destPrecision;1801targetReg->setDecimalPrecision(destPrecision);1802if (cg->traceBCDCodeGen())1803traceMsg(comp,"and update targetReg->prec to new destPrecision %d and update destEndByte to %d\n",destPrecision,destEndByte);1804}18051806if (cg->traceBCDCodeGen())1807traceMsg(comp,"x^x : found large packed/zoned conv -- node %s (%p) prec %d, child %s (%p) prec %d (three=%s)\n",1808node->getOpCode().getName(),node,destPrecision,1809child->getOpCode().getName(),child,sourcePrecision,needsThirdUnpk?"yes":"no");18101811destMR = generateS390LeftAlignedMemoryReference(node, targetStorageReference, cg, destEndByte);1812sourceMR = generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSize);1813int32_t unpk1DestSize = destPrecision-precisionAdjustment;1814int32_t unpk1SourceSize = TR::DataType::getSizeFromBCDPrecision(child->getDataType(), sourcePrecision-precisionAdjustment);1815if (cg->traceBCDCodeGen())1816traceMsg(comp,"\t\t1st UNPK destSize=%d,srcSize=%d\n",unpk1DestSize,unpk1SourceSize);1817generateSS2Instruction(cg, TR::InstOpCode::UNPK, node,1818unpk1DestSize-1,1819destMR,1820unpk1SourceSize-1,1821sourceMR);1822int32_t unpk2DestSize = 15;1823int32_t unpk2DestOffset = unpk1DestSize-1;1824int32_t unpk2SourceSize = TR::DataType::getSizeFromBCDPrecision(child->getDataType(), 15);1825int32_t unpk2SourceOffset = unpk1SourceSize-1;1826if (cg->traceBCDCodeGen())1827traceMsg(comp,"\t\t2nd UNPK destSize=%d,destOffset=%d, srcSize=%d,srcOffset=%d\n",unpk2DestSize,unpk2DestOffset,unpk2SourceSize,unpk2SourceOffset);1828generateSS2Instruction(cg, TR::InstOpCode::UNPK, node,1829unpk2DestSize-1,1830generateS390LeftAlignedMemoryReference(*destMR, node, unpk2DestOffset, cg, destMR->getLeftMostByte()),1831unpk2SourceSize-1,1832generateS390LeftAlignedMemoryReference(*sourceMR, node, unpk2SourceOffset, cg, sourceMR->getLeftMostByte()));1833if (needsThirdUnpk)1834{1835int32_t unpk3DestSize = 3;1836int32_t unpk3DestOffset = unpk2DestOffset+(unpk2DestSize-1);1837int32_t unpk3SourceSize = TR::DataType::getSizeFromBCDPrecision(child->getDataType(), 3);1838int32_t unpk3SourceOffset = unpk2SourceOffset+(unpk2SourceSize-1);1839if (cg->traceBCDCodeGen())1840traceMsg(comp,"\t\t3rd UNPK destSize=%d,destOffset=%d, srcSize=%d,srcOffset=%d\n",unpk3DestSize,unpk3DestOffset,unpk3SourceSize,unpk3SourceOffset);1841generateSS2Instruction(cg, TR::InstOpCode::UNPK, node,1842unpk3DestSize-1,1843generateS390LeftAlignedMemoryReference(*destMR, node, unpk3DestOffset, cg, destMR->getLeftMostByte()),1844unpk3SourceSize-1,1845generateS390LeftAlignedMemoryReference(*sourceMR, node, unpk3SourceOffset, cg, sourceMR->getLeftMostByte()));1846}1847}18481849if (!evaluatedPaddingAnchor)1850cg->processUnusedNodeDuringEvaluation(paddingAnchor);18511852pd2zdSignFixup(node, destMR, cg, true);18531854targetReg->transferSignState(childReg, isTruncation);1855targetReg->transferDataState(childReg);1856targetReg->setIsInitialized();1857node->setRegister(targetReg);1858return destMR;1859}18601861TR::Register *1862J9::Z::TreeEvaluator::pd2zdVectorEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)1863{1864TR::Compilation* comp = cg->comp();1865traceMsg(comp, "DAA: Enter pd2zdVectorEvaluatorHelper\n");1866TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());18671868// pd2zd we need to create storagerefence and save this value to the memoryreference1869// associated to that storagereference.1870// To do this, we need to1871//1872// 1. create NodeBasedStorageReference,1873// 2. creatememoryreference from the StorageREference,1874// 3. Use the memory reference to create VUPKZ instruction1875//1876// return the allocate PseudoRegister associate the storage reference to the Pseudo register1877// return this pseudoregister/1878//1879TR_StorageReference *hint = node->getStorageReferenceHint();1880int32_t sizeOfZonedValue = node->getSize(); //for zoned node, precision and the size must be the same.1881int32_t precision = node->getDecimalPrecision();1882TR_StorageReference* targetStorageReference = hint ? hint : TR_StorageReference::createTemporaryBasedStorageReference(sizeOfZonedValue, comp);18831884targetReg->setStorageReference(targetStorageReference, node);1885TR::Node *child = node->getFirstChild(); //This child will evaluate to Vector Register1886TR::Register *valueRegister = cg->evaluate(child);1887TR_ASSERT((valueRegister->getKind() == TR_VRF || valueRegister->getKind() == TR_FPR),1888"valueChild should evaluate to Vector register.");18891890TR::MemoryReference *targetMR = generateS390LeftAlignedMemoryReference(node, targetStorageReference, cg, sizeOfZonedValue, false);18911892if (!targetStorageReference->isTemporaryBased())1893{1894TR::SymbolReference *memSymRef = targetStorageReference->getNode()->getSymbolReference();1895if (memSymRef)1896{1897targetMR->setListingSymbolReference(memSymRef);1898}1899}19001901if(cg->traceBCDCodeGen())1902{1903traceMsg(comp, "gen VUKPZ, sizeOfZonedValue=%d, precision=%d\n", sizeOfZonedValue, precision);1904}19051906generateVSIInstruction(cg, TR::InstOpCode::VUPKZ, node, valueRegister, targetMR, sizeOfZonedValue - 1);19071908// Fix pd2zd signs. VUPKZ and its non-vector counterpart don't validate digits nor signs.1909pd2zdSignFixup(node, targetMR, cg, true);19101911node->setRegister(targetReg);1912cg->decReferenceCount(child);1913targetReg->setIsInitialized();1914traceMsg(comp, "DAA: Leave pd2zdVectorEvaluatorHelper\n");1915return targetReg;1916}19171918TR::Register *1919J9::Z::TreeEvaluator::pd2zdEvaluator(TR::Node * node, TR::CodeGenerator * cg)1920{1921cg->traceBCDEntry("pd2zd",node);1922TR::Register* targetReg = NULL;1923cg->generateDebugCounter("PD-Op/pd2zd", 1, TR::DebugCounter::Cheap);19241925static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");1926if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&1927!cg->comp()->getOption(TR_DisableVectorBCD) ||1928isVectorBCDEnv)1929{1930targetReg = pd2zdVectorEvaluatorHelper(node, cg);1931}1932else1933{1934targetReg = cg->allocatePseudoRegister(node->getDataType());1935TR::Node *child = node->getFirstChild();1936TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);1937childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);1938TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);1939packedToZonedHelper(node, static_cast<TR_PseudoRegister*>(targetReg), sourceMR, childReg, cg);1940cg->decReferenceCount(child);1941node->setRegister(targetReg);1942}19431944cg->traceBCDExit("pd2zd",node);1945return targetReg;1946}19471948bool1949J9::Z::TreeEvaluator::isZonedOperationAnEffectiveNop(TR::Node * node, int32_t shiftAmount, bool isTruncation, TR_PseudoRegister *srcReg, bool isSetSign, int32_t signToSet, TR::CodeGenerator * cg)1950{1951bool isEffectiveNop = false;1952int32_t zone = TR::DataType::getZonedValue();1953// For skipLeadingSignReset to be correct the node refCount must be 1 otherwise a commoned reference may be exposed to an incorrect1954// zone nibble (it will be the source's sign code and not the correct zone value)1955bool skipLeadingSignReset = false;1956bool srcSignIsZone = srcReg->knownOrAssumedSignIsZone();1957bool signIsAlreadySet = srcReg->hasKnownOrAssumedSignCode() && (srcReg->getKnownOrAssumedSignCode()==signToSet);1958bool signToSetIsZone = signToSet == zone;1959bool signToSetIsIgnored = signToSet == TR::DataType::getIgnoredSignCode();1960bool signToSetIsZoneOrIgnored = signToSetIsZone || signToSetIsIgnored;19611962TR_ASSERT(!node->getOpCode().isRightShift() || shiftAmount > 0,"shiftAmount should be > 0 for zoned right shifts and not a %d\n",shiftAmount);1963switch (node->getOpCodeValue())1964{1965case TR::zd2zdsle:1966isEffectiveNop = srcSignIsZone || (node->getDecimalPrecision() == 1);1967break;1968case TR::zdsle2zd:1969isEffectiveNop = srcSignIsZone || (srcReg->getDecimalPrecision() == 1);1970break;1971case TR::zdsts2zd:1972case TR::zdsls2zd:1973break;1974default:1975TR_ASSERT(false,"unexpected zoned opcode %d\n",node->getOpCodeValue());1976break;1977}1978return isEffectiveNop;1979}19801981/**1982* \brief This evaluator helper function evaluates BCDCHK nodes by emitting mainline and out-of-line instructions for1983* the underlying packed decimal operations. The mainline instructions perform the actual operations, and the OOL1984* instructions are for hardware exception handling.1985*1986* The canonical BCDCHK IL structure is the following:1987*1988* BCDCHK1989* pdOpNode // the operation node1990* aladd // optional address node. Exists only if the result of the operation is packed decimal1991* callParam1 // call parameter nodes of the original DAA API call1992* callParam21993* .1994* .1995* callParamN1996*1997* With the new DAA BCDCHK node tree structure, the first child of a BCDCHK node is1998* always the PD opNode. The first child and its sub-tree could throw packed decimal related hardware exceptions, which is1999* to be handled by the designated OOL instruction sequence.2000*2001* As for the second child of BCDCHK, it will be an address node if the result of the PD operation is a packed decimal. This address2002* node is to be used by the OOL for result copy back.2003*2004* The steps to evaluate the new BCDCHK node is the following:2005*2006* -# Create a callNode and attached BCDCHK's call parameter children to it. This callNode is to be evaluated2007* later in the OOL section2008*2009* -# If applicable, evaluate address node's children (e.g. this is applicable to i2pd but not to PD comparisons)2010*2011* -# Create a handlerLabel that points to the start of the OOL section2012*2013* -# Evaluate the pdopNode (first child) and decrement its refCount.2014*2015* -# Emit a NOP BRC bearing the handlerLabel right after evaluating the pdopNode. This is for SignalHandler.c2016*2017* -# Switch to OOL code generation and evaluate the callNode2018*2019* -# Evaluate the addressNode (second child of BCDCHK node) to yield a correct address into the byte[]2020*2021* -# Copy the results produced by the call from byte[] back to mainline storage reference2022*2023* -# Finish up by decRefCount on callNode and addressNode2024*2025* \param node the BCDCHK node2026* \param cg codegen object2027* \param numCallParam number of callNode children2028* \param callChildStartIndex the index of the first callChild under the BCDCHK node2029* \param isResultPD True if the result of the pdOpNode a PD; false if the result is a binary integer/long2030* This also implies that the second node of the BCDCHK node is an address node.2031* \param isUseVector If true, emit vector packed decimal instructions2032* \param isVariableParam true if the PD operation's precision is not a constant.2033*/2034TR::Register *2035J9::Z::TreeEvaluator::BCDCHKEvaluatorImpl(TR::Node * node,2036TR::CodeGenerator * cg,2037uint32_t numCallParam,2038uint32_t callChildStartIndex,2039bool isResultPD,2040bool isUseVector,2041bool isVariableParam)2042{2043TR::Compilation *comp = cg->comp();2044TR_Debug* debugObj = cg->getDebug();2045TR::Node* pdopNode = node->getFirstChild();2046TR::Node* secondChild = node->getSecondChild();20472048bool isResultLong = pdopNode->getOpCodeValue() == TR::pd2l ||2049pdopNode->getOpCodeValue() == TR::pd2lOverflow ||2050pdopNode->getOpCodeValue() == TR::lcall;20512052TR::LabelSymbol* handlerLabel = generateLabelSymbol(cg);2053TR::LabelSymbol* passThroughLabel = generateLabelSymbol(cg);2054cg->setCurrentBCDCHKHandlerLabel(handlerLabel);20552056// This is where the call children node come from and the node that has the call symRef2057TR::Node* childRootNode = isVariableParam ? pdopNode : node;20582059// Create a call2060TR::ILOpCodes callType = isResultPD ? TR::call : (isResultLong ? TR::lcall : TR::icall);20612062TR::Node * callNode = TR::Node::createWithSymRef(node, callType, numCallParam,2063childRootNode->getSymbolReference());2064cg->incReferenceCount(callNode);2065callNode->setNumChildren(numCallParam);20662067// Setup callNode children2068for (uint32_t i = 0; i < numCallParam; ++i)2069callNode->setAndIncChild(i, childRootNode->getChild(i + callChildStartIndex));20702071// Evaluate secondChild's children, if the secondChild is an address node into a byte[]2072if(isResultPD && secondChild->getNumChildren() == 2)2073{2074cg->evaluate(secondChild->getFirstChild());2075cg->evaluate(secondChild->getSecondChild());2076}20772078// Evaluate intrinsics node2079TR::Register* bcdOpResultReg = NULL;2080if(isVariableParam)2081{2082bcdOpResultReg = pd2lVariableEvaluator(node, cg, isUseVector);2083}2084else if(isResultPD && !isUseVector)2085{2086bcdOpResultReg = cg->evaluateBCDNode(pdopNode);2087}2088else2089{2090bcdOpResultReg = cg->evaluate(pdopNode);2091}20922093// start of OOL section2094traceMsg(comp, "starting OOL section generation.\n");2095TR_S390OutOfLineCodeSection* outlinedHelperCall = new (INSN_HEAP) TR_S390OutOfLineCodeSection(handlerLabel, passThroughLabel, cg);2096cg->getS390OutOfLineCodeSectionList().push_front(outlinedHelperCall);2097outlinedHelperCall->swapInstructionListsWithCompilation();2098// snippetLabel : OOL Start label2099TR::Instruction* cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, handlerLabel);21002101if(debugObj)2102{2103debugObj->addInstructionComment(cursor, "Start of BCDCHK OOL sequence");2104}21052106// Debug counter for tracking how often we fall back to the OOL path of the DAA intrinsic2107cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "DAA/OOL/(%s)/%p", comp->signature(), node), 1, TR::DebugCounter::Undetermined);21082109// Evaluate the callNode, duplicate and evaluate the address node, and then copy the2110// correct results back to the mainline storage ref or register2111TR::Register* callResultReg = cg->evaluate(callNode);21122113if(isResultPD)2114{2115TR::Register* srcBaseReg = cg->evaluate(secondChild);2116TR::MemoryReference* srcMR = generateS390MemoryReference(srcBaseReg, 0, cg);2117int32_t resultSize = TR::DataType::packedDecimalPrecisionToByteLength(pdopNode->getDecimalPrecision());21182119if(isUseVector)2120{2121TR_ASSERT(bcdOpResultReg && (bcdOpResultReg->getKind() == TR_VRF || bcdOpResultReg->getKind() == TR_FPR),2122"Vector register expected\n");21232124generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, bcdOpResultReg, srcMR, resultSize - 1);2125}2126else2127{2128TR::MemoryReference* targetMR = generateS390RightAlignedMemoryReference(pdopNode, static_cast<TR_PseudoRegister*>(bcdOpResultReg)->getStorageReference(), cg);2129generateSS1Instruction(cg, TR::InstOpCode::MVC, node, resultSize - 1, targetMR, srcMR);2130}21312132cg->decReferenceCount(secondChild);2133cg->stopUsingRegister(callResultReg);2134}2135else2136{2137if(isResultLong)2138{2139generateRREInstruction(cg, TR::InstOpCode::LGR, node, bcdOpResultReg, callResultReg);2140}2141else2142{2143generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, bcdOpResultReg, callResultReg);2144}2145}21462147cg->stopUsingRegister(callResultReg);2148cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, passThroughLabel);21492150// Decrement reference counts2151cg->recursivelyDecReferenceCount(callNode);2152if(isVariableParam)2153{2154// variable parameter l2pd is a call node2155cg->recursivelyDecReferenceCount(pdopNode);2156}2157else2158{2159cg->decReferenceCount(pdopNode);2160}21612162if(debugObj)2163{2164debugObj->addInstructionComment(cursor, "End of BCDCHK OOL sequence: return to mainline");2165}21662167traceMsg(comp, "Finished OOL section generation.\n");21682169// ***Done using OOL with manual code generation *** //2170outlinedHelperCall->swapInstructionListsWithCompilation();2171cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, passThroughLabel, cg->getCurrentCheckNodeRegDeps());21722173cg->setCurrentBCDCHKHandlerLabel(NULL);2174return bcdOpResultReg;2175}21762177/**2178* BCDCHKEvaluator -2179*/2180TR::Register *2181J9::Z::TreeEvaluator::BCDCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)2182{2183TR::Compilation *comp = cg->comp();2184TR::Node* pdopNode = node->getFirstChild();2185TR::Register* resultReg = pdopNode->getRegister();2186bool isResultPD = pdopNode->getDataType() == TR::PackedDecimal;2187bool isVariableParam = false;2188uint32_t firstCallParamIndex = 0;21892190static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2191bool isEnableVectorBCD = comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL)2192&& !comp->getOption(TR_DisableVectorBCD)2193|| isVectorBCDEnv;21942195// Validate PD operations under BCDCHK node2196switch (pdopNode->getOpCodeValue())2197{2198case TR::pdcmpgt:2199case TR::pdcmplt:2200case TR::pdcmpge:2201case TR::pdcmple:2202case TR::pdcmpeq:2203case TR::pdcmpne:2204break;2205case TR::i2pd:2206case TR::l2pd:2207case TR::pd2l:2208case TR::pd2i:2209case TR::pd2iOverflow:2210case TR::pd2lOverflow:2211case TR::pdadd:2212case TR::pdsub:2213case TR::pdmul:2214case TR::pddiv:2215case TR::pdrem:2216case TR::pdshlOverflow:2217case TR::pdshr:2218{2219cg->setIgnoreDecimalOverflowException(node->getLastChild()->getInt() == 0);2220break;2221}2222case TR::lcall:2223case TR::icall:2224{2225switch (pdopNode->getSymbol()->getMethodSymbol()->getMethod()->getRecognizedMethod())2226{2227case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_:2228case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_:2229case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_:2230case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_ByteBuffer_:2231{2232isVariableParam = true;22332234// Need a parameter check because variable PD2L and PD2I could have non-constant 'checkOverflow' (see IS_VARIABLE_PD2I macro).2235TR::Node* checkOverflowNode = pdopNode->getLastChild();2236cg->setIgnoreDecimalOverflowException(checkOverflowNode->getOpCode().isLoadConst() && (checkOverflowNode->getInt() == 0));2237break;2238}22392240default:2241{2242/**2243* BCDCHK can have a call node if the PD operation can be simplified to a No-Op.2244* For example, one can get an integer via a call2245* perform a i2pd followed by a pd2i. The pd2i (under BCDCHK) can be simplified to the icall.2246* If this is the case, the lcall/icall must have been evaluated.2247* We can skip the BCDCHK evaluation and return the call result.2248*/2249TR_ASSERT_FATAL(resultReg != NULL,2250"BCDCHKEvaluator: variable precision path encounters an unrecognized and unevaluated long/int call\n");2251}2252}2253break;2254}22552256default:2257{2258/**2259* Unrecognized opCodes under BCDCHK should come from optimizations such as local CSE and tree simplifications.2260* They should be commoned nodes that's evaluated previously. Skip these nodes.2261*/2262TR_ASSERT_FATAL(resultReg != NULL, "BCDCHKEvaluator: BCDCHK has an unevaluated non-PD node %p (non-PD op code %s) \n",2263pdopNode,2264pdopNode->getOpCode().getName());22652266traceMsg(comp, "BCDCHK node n%dn has non-PD operation %s\n",2267node->getGlobalIndex(), pdopNode->getOpCode().getName());2268}2269}22702271if (!isVariableParam)2272{2273firstCallParamIndex = isResultPD ? 2 : 1;2274}22752276// Evaluate call parameters2277TR::Node* callParamRoot = isVariableParam ? pdopNode : node;2278for (uint32_t i = firstCallParamIndex; i < callParamRoot->getNumChildren(); ++i)2279{2280TR::Node* callArg = callParamRoot->getChild(i);2281if (callArg->getReferenceCount() != 1 || callArg->getRegister() != NULL)2282cg->evaluate(callArg);2283}22842285/*2286* Avoid evaluating an evaluated pdOpNode (first child of BCDCHK) under a BCDCHK node if2287* it is already evaluated.2288*2289* This is to avoid generating OOL paths without mainline sequences. OOL without mainline can2290* cause RA to produce incorrect register use counts, and eventually produce incorrect GC maps that2291* make GC fail during runtime.2292*/2293if (resultReg != NULL)2294{2295if (isVariableParam)2296cg->recursivelyDecReferenceCount(pdopNode); // variable parameter l2pd is a call node2297else2298{2299// first child2300cg->decReferenceCount(pdopNode);23012302// second child2303if (isResultPD)2304cg->recursivelyDecReferenceCount(node->getSecondChild());23052306// call parameters: 2nd/3rd and above2307for (uint32_t i = firstCallParamIndex; i < node->getNumChildren(); ++i)2308cg->decReferenceCount(node->getChild(i));2309}23102311traceMsg(comp, "Skipped BCDCHK node n%dn\n", node->getGlobalIndex());2312}2313else2314{2315uint32_t numCallChildren = isVariableParam ? pdopNode->getNumChildren() : (node->getNumChildren() - firstCallParamIndex);23162317TR::RegisterDependencyConditions * daaDeps = new (INSN_HEAP) TR::RegisterDependencyConditions(0, 13, cg);23182319cg->setCurrentCheckNodeRegDeps(daaDeps);2320cg->setCurrentCheckNodeBeingEvaluated(node);23212322resultReg = BCDCHKEvaluatorImpl(node, cg, numCallChildren, firstCallParamIndex,2323isResultPD, isEnableVectorBCD, isVariableParam);23242325cg->setCurrentCheckNodeRegDeps(NULL);2326cg->setCurrentCheckNodeBeingEvaluated(NULL);2327}23282329cg->setIgnoreDecimalOverflowException(false);2330return resultReg;2331}23322333TR::Register*2334J9::Z::TreeEvaluator::pdcmpVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)2335{2336TR::Register* resultReg = cg->allocateRegister(TR_GPR);2337generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, resultReg, resultReg);2338generateLoad32BitConstant(cg, node, 1, resultReg, true);23392340TR::RegisterDependencyConditions* deps = new(cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);2341deps->addPostConditionIfNotAlreadyInserted(resultReg, TR::RealRegister::AssignAny);23422343TR::Node* pd1Node = node->getFirstChild();2344TR::Node* pd2Node = node->getSecondChild();23452346TR::Register* pd1Value = cg->evaluate(pd1Node);2347TR::Register* pd2Value = cg->evaluate(pd2Node);23482349// TODO: should we correct bad sign before comparing them2350TR::Instruction* cursor = generateVRRhInstruction(cg, TR::InstOpCode::VCP, node, pd1Value, pd2Value, 0);23512352TR::LabelSymbol* cFlowRegionStart = generateLabelSymbol(cg);2353cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);2354cFlowRegionStart->setStartInternalControlFlow();23552356TR::LabelSymbol* cFlowRegionEnd = generateLabelSymbol(cg);23572358// Generate Branch Instructions2359switch(node->getOpCodeValue())2360{2361case TR::pdcmpeq:2362cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, cFlowRegionEnd);2363break;2364case TR::pdcmpne:2365cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, cFlowRegionEnd);2366cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC2, node, cFlowRegionEnd);2367break;2368case TR::pdcmplt:2369cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, cFlowRegionEnd);2370break;2371case TR::pdcmple:2372cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, cFlowRegionEnd);2373cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, cFlowRegionEnd);2374break;2375case TR::pdcmpgt:2376cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC2, node, cFlowRegionEnd);2377break;2378case TR::pdcmpge:2379cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, cFlowRegionEnd);2380cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC2, node, cFlowRegionEnd);2381break;2382default:2383TR_ASSERT(0, "Unrecognized op code in pd cmp vector evaluator helper.");2384}23852386// TODO: The only reason we keep track of the cursor here is because `deps` has to be passed in after `cursor`. We2387// don't really need this restriction however if we rearrange the parameters.2388cursor = generateLoad32BitConstant(cg, node, 0, resultReg, true, cursor, deps);23892390cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, deps);2391cFlowRegionEnd->setEndInternalControlFlow();23922393node->setRegister(resultReg);23942395cg->decReferenceCount(pd1Node);2396cg->decReferenceCount(pd2Node);23972398return resultReg;2399}24002401TR::Register*2402J9::Z::TreeEvaluator::pdcmpeqEvaluator(TR::Node *node, TR::CodeGenerator *cg)2403{2404cg->traceBCDEntry("pdcmpeq",node);2405cg->generateDebugCounter("PD-Op/pdcmpeq", 1, TR::DebugCounter::Cheap);24062407// to support castedToBCD have to ensure generateS390CompareBool generates logical comparison only and not CP2408TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);2409TR::Register *targetReg = NULL;24102411static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2412if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2413{2414targetReg = pdcmpVectorEvaluatorHelper(node, cg);2415}2416else2417{2418targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, TR::InstOpCode::COND_BE, false);2419}24202421cg->traceBCDExit("pdcmpeq",node);2422return targetReg;2423}24242425TR::Register *2426J9::Z::TreeEvaluator::pdcmpneEvaluator(TR::Node *node, TR::CodeGenerator *cg)2427{2428cg->traceBCDEntry("pdcmpne",node);2429cg->generateDebugCounter("PD-Op/pdcmpne", 1, TR::DebugCounter::Cheap);24302431TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);2432TR::Register *targetReg = NULL;24332434static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2435if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2436{2437targetReg = pdcmpVectorEvaluatorHelper(node, cg);2438}2439else2440{2441targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, TR::InstOpCode::COND_BNE, false);2442}24432444cg->traceBCDExit("pdcmpne",node);2445return targetReg;2446}24472448TR::Register *2449J9::Z::TreeEvaluator::pdcmpltEvaluator(TR::Node *node, TR::CodeGenerator *cg)2450{2451cg->traceBCDEntry("pdcmplt",node);2452cg->generateDebugCounter("PD-Op/pdcmplt", 1, TR::DebugCounter::Cheap);24532454TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);2455TR::Register *targetReg = NULL;24562457static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2458if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2459{2460targetReg = pdcmpVectorEvaluatorHelper(node, cg);2461}2462else2463{2464targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BL, TR::InstOpCode::COND_BH, false);2465}24662467cg->traceBCDExit("pdcmplt",node);2468return targetReg;2469}24702471TR::Register *J9::Z::TreeEvaluator::pdcmpgeEvaluator(TR::Node *node, TR::CodeGenerator *cg)2472{2473cg->traceBCDEntry("pdcmpge",node);2474cg->generateDebugCounter("PD-Op/pdcmpge", 1, TR::DebugCounter::Cheap);24752476TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);2477TR::Register *targetReg = NULL;24782479static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2480if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2481{2482targetReg = pdcmpVectorEvaluatorHelper(node, cg);2483}2484else2485{2486targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNL, TR::InstOpCode::COND_BNH, false);2487}24882489cg->traceBCDExit("pdcmpge",node);2490return targetReg;2491}24922493TR::Register *J9::Z::TreeEvaluator::pdcmpgtEvaluator(TR::Node *node, TR::CodeGenerator *cg)2494{2495cg->traceBCDEntry("pdcmpgt",node);2496cg->generateDebugCounter("PD-Op/pdcmpgt", 1, TR::DebugCounter::Cheap);24972498TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);2499TR::Register *targetReg = NULL;25002501static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2502if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2503{2504targetReg = pdcmpVectorEvaluatorHelper(node, cg);2505}2506else2507{2508targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, TR::InstOpCode::COND_BL, false);2509}25102511cg->traceBCDExit("pdcmpgt",node);2512return targetReg;2513}25142515TR::Register *J9::Z::TreeEvaluator::pdcmpleEvaluator(TR::Node *node, TR::CodeGenerator *cg)2516{2517cg->traceBCDEntry("pdcmple",node);2518cg->generateDebugCounter("PD-Op/pdcmple", 1, TR::DebugCounter::Cheap);25192520TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);2521TR::Register *targetReg = NULL;25222523static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2524if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2525{2526targetReg = pdcmpVectorEvaluatorHelper(node, cg);2527}2528else2529{25302531targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, TR::InstOpCode::COND_BNL, false);2532}25332534cg->traceBCDExit("pdcmple",node);2535return targetReg;2536}25372538TR::Register *2539J9::Z::TreeEvaluator::pd2iEvaluator(TR::Node * node, TR::CodeGenerator * cg)2540{2541cg->traceBCDEntry("pd2i",node);2542cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),25431, TR::DebugCounter::Cheap);2544TR::Register * reg = NULL;25452546static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2547if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2548{2549reg = generateVectorPackedToBinaryConversion(node, TR::InstOpCode::VCVB, cg);2550}2551else2552{2553reg = generatePackedToBinaryConversion(node, TR::InstOpCode::CVB, cg);2554}25552556cg->traceBCDExit("pd2i",node);2557return reg;2558}25592560TR::Register *2561J9::Z::TreeEvaluator::pd2lEvaluator(TR::Node * node, TR::CodeGenerator * cg)2562{2563cg->traceBCDEntry("pd2l",node);2564cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),25651, TR::DebugCounter::Cheap);2566TR::Register * reg = NULL;25672568static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2569if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2570{2571reg = generateVectorPackedToBinaryConversion(node, TR::InstOpCode::VCVBG, cg);2572}2573else2574{2575reg = generatePackedToBinaryConversion(node, TR::InstOpCode::CVBG, cg);2576}25772578cg->traceBCDExit("pd2l",node);2579return reg;2580}25812582TR::Register*2583J9::Z::TreeEvaluator::pd2lVariableEvaluator(TR::Node* node, TR::CodeGenerator* cg, bool isUseVectorBCD)2584{2585cg->traceBCDEntry("pd2lVariableEvaluator",node);2586cg->generateDebugCounter("PD-Op/pd2l-var", 1, TR::DebugCounter::Cheap);25872588TR::Node* pdOpNode = node->getChild(0);2589TR::Node* pdAddressNode = node->getChild(1);25902591TR::Compilation *comp = cg->comp();25922593// This function handles PD2I and PD2L2594bool PD2I = pdOpNode->getOpCode().getOpCodeValue() == TR::icall;25952596TR::Register* returnReg = cg->allocateRegister();25972598TR::InstOpCode::Mnemonic conversionOp = PD2I ? TR::InstOpCode::VCVB : TR::InstOpCode::VCVBG;25992600TR::Register* callAddrReg = cg->evaluate(pdAddressNode);2601TR::Register* precisionReg = cg->evaluate(pdOpNode->getChild(2));2602TR::Register* lengthReg = cg->allocateRegister();2603TR_ASSERT(precisionReg && (precisionReg->getKind() == TR_GPR), "precision should be a 32bit GPR");26042605// byteLength = precision/2 + 1. Note that the length codes of all instructions are (byteLength-1).2606// Thus, lengthCode = precision/22607if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))2608{2609generateRSInstruction(cg, TR::InstOpCode::SRAK, pdOpNode, lengthReg, precisionReg, 0x1, NULL);2610}2611else2612{2613generateRRInstruction(cg, TR::InstOpCode::LR, pdOpNode, lengthReg, precisionReg);2614generateRSInstruction(cg, TR::InstOpCode::SRA, pdOpNode, lengthReg, 0x1);2615}26162617TR::MemoryReference* sourceMR = generateS390MemoryReference(callAddrReg, 0, cg);2618static bool disableTPBeforePD2I = feGetEnv("TR_DisableTPBeforePD2I") != NULL;26192620if (isUseVectorBCD)2621{2622// variable length load + vector convert to binary2623TR::Register* vPDReg = cg->allocateRegister(TR_VRF);2624generateVRSdInstruction(cg, TR::InstOpCode::VLRLR, node, lengthReg, vPDReg, sourceMR);26252626if (!disableTPBeforePD2I)2627{2628generateVRRgInstruction(cg, TR::InstOpCode::VTP, node, vPDReg);2629generateS390BranchInstruction(cg, TR::InstOpCode::BRC,2630TR::InstOpCode::COND_MASK7,2631node, cg->getCurrentBCDCHKHandlerLabel());2632}26332634uint8_t ignoreOverflowMask = 0;26352636if (comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())2637{2638ignoreOverflowMask = 0x8;2639}26402641generateVRRiInstruction(cg, conversionOp, node, returnReg, vPDReg, 1, ignoreOverflowMask);2642cg->stopUsingRegister(vPDReg);2643}2644else2645{2646const uint32_t tempSRSize = PD2I ? cg->getPackedToIntegerFixedSize()2647: cg->getPackedToLongFixedSize();26482649// Allocate space on the stack for the PD to be copied to2650TR_StorageReference* tempSR = TR_StorageReference::createTemporaryBasedStorageReference(tempSRSize, comp);26512652tempSR->setTemporaryReferenceCount(1);26532654TR::MemoryReference* ZAPtargetMR = generateS390MemRefFromStorageRef(node, tempSR, cg, false, true);2655TR::Register* zapTargetBaseReg = cg->allocateRegister();2656/*2657* Insert an intermediate LA instruction before the ZAP+EX sequence to hold the ZAP target base address2658* value. Intermediate LA instructions are needed for all instructions targeted by EX (or EXRL) and have2659* memory references with unmaterialized base/index registers. This is done so that we are immune to2660* large displacement instruction adjustments.2661*2662* In this particular case, the instruction selection phase emits ZAP+EX. The peephole optimization later2663* replaces the EX with an EXRL and expands to three instructions:2664*2665* BRC [to EXRl]2666* ZAP2667* EXRL [of ZAP]2668*2669* These three instructions work fine if they are all together. If the ZAP is targeting a memory location that's2670* far away down the stack, large displacement instructions will be added in the memory reference binary encoding phase2671* to create the following functionally incorrect instruction sequence:2672*2673* BRC [to EXRL]2674* STG2675* LGHI2676* LA2677* ZAP2678* LG2679* EXRL2680*2681*2682* Having an intermediate LA instruction here prevents the large displacement adjustments on the ZAP instruction and holds2683* the BRC+ZAP+EXRL instructions together.2684*/2685generateRXInstruction(cg, TR::InstOpCode::LA, node, zapTargetBaseReg, ZAPtargetMR);26862687if (!disableTPBeforePD2I)2688{2689TR::Register* tempLengthForTP = cg->allocateRegister();26902691if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))2692{2693generateRSInstruction(cg, TR::InstOpCode::SLAK, node, tempLengthForTP, lengthReg, 4);2694}2695else2696{2697generateRRInstruction(cg, TR::InstOpCode::LR, node, tempLengthForTP, lengthReg);2698generateRSInstruction(cg, TR::InstOpCode::SLA, node, tempLengthForTP, 4);2699}27002701auto* testPackedInstruction = generateRSLInstruction(cg, TR::InstOpCode::TP, node, 0, generateS390MemoryReference(*sourceMR, 0, cg));27022703generateEXDispatch(node, cg, tempLengthForTP, testPackedInstruction);27042705// Fallback to the OOL path if anything is wrong with the input packed decimal2706generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, cg->getCurrentBCDCHKHandlerLabel());27072708cg->stopUsingRegister(tempLengthForTP);2709}27102711TR::Instruction* instrZAP = generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,2712tempSRSize - 1,2713generateS390MemoryReference(zapTargetBaseReg, 0, cg),27140, sourceMR);27152716generateEXDispatch(node, cg, lengthReg, instrZAP);27172718if (PD2I)2719{2720generateRXInstruction (cg, TR::InstOpCode::CVB, node, returnReg, generateS390MemoryReference(*ZAPtargetMR, 0, cg));2721}2722else2723{2724generateRXInstruction(cg, TR::InstOpCode::CVBG, node, returnReg, generateS390MemoryReference(*ZAPtargetMR, 0, cg));2725}27262727tempSR->setTemporaryReferenceCount(0);2728cg->stopUsingRegister(zapTargetBaseReg);2729}27302731cg->decReferenceCount(pdAddressNode);2732cg->stopUsingRegister(lengthReg);2733pdOpNode->setRegister(returnReg);27342735// Create a debug counter to track how often we execute the inline path for variable operations2736cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp,2737"DAA/variable/inline/(%s)/%p",2738comp->signature(), node),27391, TR::DebugCounter::Undetermined);27402741cg->traceBCDExit("pd2lVariableEvaluator",node);27422743return returnReg;2744}27452746TR::Register *2747J9::Z::TreeEvaluator::generateVectorPackedToBinaryConversion(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)2748{2749TR_ASSERT( op == TR::InstOpCode::VCVB || op == TR::InstOpCode::VCVBG,"unexpected opcode in gen vector pd2i\n");2750bool isPDToLong = (op == TR::InstOpCode::VCVBG);27512752TR::Register *rResultReg = (isPDToLong) ? cg->allocateRegister() : cg->allocateRegister();27532754// evaluate pdload2755TR::Node *pdValueNode = node->getFirstChild();2756TR::Register *vPdValueReg = cg->evaluate(pdValueNode);2757TR_ASSERT(vPdValueReg->getKind() == TR_VRF || vPdValueReg->getKind() == TR_FPR, "Vector register expected.");27582759static bool disableTPBeforePD2I = feGetEnv("TR_DisableTPBeforePD2I") != NULL;2760if (!disableTPBeforePD2I)2761{2762generateVRRgInstruction(cg, TR::InstOpCode::VTP, node, vPdValueReg);2763generateS390BranchInstruction(cg, TR::InstOpCode::BRC,2764TR::InstOpCode::COND_MASK7, node,2765cg->getCurrentBCDCHKHandlerLabel());2766}27672768uint8_t ignoreOverflowMask = 0;27692770if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())2771{2772ignoreOverflowMask = 0x8;2773}27742775// Convert to signed binary of either 32-bit or 64-bit long2776generateVRRiInstruction(cg, op, node, rResultReg, vPdValueReg, 0x1, ignoreOverflowMask);27772778cg->decReferenceCount(pdValueNode);2779node->setRegister(rResultReg);2780return rResultReg;2781}27822783TR::Register *2784J9::Z::TreeEvaluator::generatePackedToBinaryConversion(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)2785{2786TR_ASSERT( op == TR::InstOpCode::CVB || op == TR::InstOpCode::CVBG,"unexpected opcode in generatePackedToBinaryFixedConversion\n");2787TR::Register *targetReg = cg->allocateRegister();27882789TR::Node *firstChild = node->getFirstChild();2790TR_PseudoRegister *firstReg = cg->evaluateBCDNode(firstChild);2791int32_t requiredSourceSize = op == TR::InstOpCode::CVB ? cg->getPackedToIntegerFixedSize() : cg->getPackedToLongFixedSize();2792TR::MemoryReference *sourceMR = cg->materializeFullBCDValue(firstChild,2793firstReg,2794requiredSourceSize,2795requiredSourceSize,2796false, // updateStorageReference2797false); // alwaysEnforceSSLimits -- to be used in CVB27982799TR_StorageReference *firstStorageReference = firstReg->getStorageReference();2800sourceMR = reuseS390LeftAlignedMemoryReference(sourceMR, firstChild, firstStorageReference, cg, requiredSourceSize, false); // enforceSSLimits=false for CVB28012802static bool disableTPBeforePD2I = feGetEnv("TR_DisableTPBeforePD2I") != NULL;28032804if (!disableTPBeforePD2I)2805{2806generateRSLInstruction(cg, TR::InstOpCode::TP, node, firstReg->getSize() - 1, generateS390RightAlignedMemoryReference(*sourceMR, firstChild, 0, cg, false));2807generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, cg->getCurrentBCDCHKHandlerLabel());2808}28092810TR::Instruction *inst = NULL;2811if (op == TR::InstOpCode::CVB)2812inst = generateRXInstruction(cg, op, node, targetReg, sourceMR);2813else2814inst = generateRXInstruction(cg, op, node, targetReg, sourceMR);28152816if (sourceMR->getStorageReference() == firstStorageReference)2817firstReg->setHasKnownValidSignAndData();28182819// Create a debug counter to track how often we execute the inline path2820cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "DAA/inline/(%s)/%p", cg->comp()->signature(), node), 1, TR::DebugCounter::Undetermined);28212822cg->decReferenceCount(firstChild);2823node->setRegister(targetReg);2824return targetReg;2825}28262827TR::Register *2828J9::Z::TreeEvaluator::i2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)2829{2830cg->traceBCDEntry("i2pd",node);2831cg->generateDebugCounter("PD-Op/i2pd", 1, TR::DebugCounter::Cheap);2832TR::Register * reg = NULL;28332834static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2835if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2836{2837reg = generateVectorBinaryToPackedConversion(node, TR::InstOpCode::VCVD, cg);2838}2839else2840{2841reg = generateBinaryToPackedConversion(node, TR::InstOpCode::CVD, cg);2842}28432844cg->traceBCDExit("i2pd",node);2845return reg;2846}28472848TR::Register *2849J9::Z::TreeEvaluator::l2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)2850{2851cg->traceBCDEntry("l2pd",node);2852cg->generateDebugCounter("PD-Op/l2pd", 1, TR::DebugCounter::Cheap);2853TR::Register * reg = NULL;28542855static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");2856if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)2857{2858reg = generateVectorBinaryToPackedConversion(node, TR::InstOpCode::VCVDG, cg);2859}2860else2861{2862reg = generateBinaryToPackedConversion(node, TR::InstOpCode::CVDG, cg);2863}28642865cg->traceBCDExit("l2pd",node);2866return reg;2867}28682869/**2870* \brief This evaluator helper function evaluates i2pd and l2pd conversion nodes2871* using CVD or CVDG instructions.2872*2873*/2874TR::Register *2875J9::Z::TreeEvaluator::generateBinaryToPackedConversion(TR::Node * node,2876TR::InstOpCode::Mnemonic op,2877TR::CodeGenerator * cg)2878{2879TR_ASSERT( op == TR::InstOpCode::CVD || op == TR::InstOpCode::CVDG,2880"unexpected opcode in generateBinaryToPackedConversion\n");28812882TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());2883TR::Compilation *comp = cg->comp();2884bool isI2PD = op == TR::InstOpCode::CVD;2885TR_StorageReference *hint = node->getStorageReferenceHint();2886int32_t cvdSize = isI2PD ? cg->getIntegerToPackedFixedSize() : cg->getLongToPackedFixedSize();2887TR_StorageReference *targetStorageReference = hint ? hint : TR_StorageReference::createTemporaryBasedStorageReference(cvdSize, comp);2888targetReg->setStorageReference(targetStorageReference, node);28892890TR::Node *firstChild = node->getFirstChild();2891TR::Register *firstReg = cg->evaluate(firstChild);2892TR::MemoryReference *targetMR = generateS390LeftAlignedMemoryReference(node,2893targetStorageReference,2894cg,2895cvdSize,2896false); // enforceSSLimits=false for CVD28972898generateRXInstruction(cg, op, node, firstReg, targetMR);28992900targetReg->setIsInitialized();29012902cg->stopUsingRegister(firstReg);2903cg->decReferenceCount(firstChild);2904node->setRegister(targetReg);2905return targetReg;2906}290729082909TR::Register *2910J9::Z::TreeEvaluator::pdnegEvaluator(TR::Node * node, TR::CodeGenerator * cg)2911{2912cg->traceBCDEntry("pdneg",node);2913cg->generateDebugCounter("PD-Op/pdneg", 1, TR::DebugCounter::Cheap);29142915TR_ASSERT(node->getNumChildren() == 1, "pdneg should only have 1 child");29162917TR::Node *srcNode = node->getFirstChild();2918TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);2919TR::Compilation *comp = cg->comp();29202921TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);29222923// also do for assumed (PFD) preferred and clean signs?2924int32_t srcSign = srcReg->hasKnownOrAssumedSignCode() ? srcReg->getKnownOrAssumedSignCode() : TR::DataType::getInvalidSignCode();2925bool useRegBasedSequence = srcReg->hasKnownValidSign();2926bool isSrcSign0xF = srcSign == 0xf;2927bool isSimpleSignFlip = srcSign == TR::DataType::getPreferredPlusCode() ||2928srcSign == TR::DataType::getPreferredMinusCode() ||2929srcReg->hasKnownOrAssumedPreferredSign() ||2930srcReg->hasKnownOrAssumedCleanSign();2931bool isSimpleSignSet = isSrcSign0xF || isSimpleSignFlip;2932bool needsFullInitialization = !useRegBasedSequence || isSimpleSignSet;2933bool isTruncation = node->getDecimalPrecision() < srcReg->getDecimalPrecision();2934bool isWiden = node->getDecimalPrecision() > srcReg->getDecimalPrecision();29352936if (cg->traceBCDCodeGen())2937traceMsg(comp,"\tpdnegEvaluator: isTruncation=%s, isWiden=%s, srcSign = 0x%x, srcSignIsValid=%s, isSimpleSignSet=%s, useRegBasedSequence=%s, needsFullInitialization=%s (== !useRegBasedSequence || isSimpleSignSet)\n",2938isTruncation ? "yes":"no",2939isWiden ? "yes":"no",2940srcSign,2941srcReg->hasKnownValidSign() ? "yes":"no",2942isSimpleSignSet ? "yes":"no",2943useRegBasedSequence?"yes":"no",2944needsFullInitialization? "yes":"no");294529462947TR_PseudoRegister *targetReg = evaluateBCDSignModifyingOperand(node,2948false, // isEffectiveNop=false2949false, // isNondestructiveNop=false2950needsFullInitialization,2951sourceMR,2952cg);2953targetReg->setDecimalPrecision(std::min<int32_t>(node->getDecimalPrecision(), srcReg->getDecimalPrecision()));29542955TR::MemoryReference *destMR = generateS390LeftAlignedMemoryReference(node, targetReg->getStorageReference(), cg, targetReg->getSize());29562957if (srcReg->hasKnownValidData())2958targetReg->setHasKnownValidData();29592960if (!needsFullInitialization && !targetReg->isInitialized() && targetReg->getSize() > 1)2961{2962int32_t mvcSize = targetReg->getSize() - 1; // do not include the least significant byte as this is done as part of the sign setting below2963if (cg->traceBCDCodeGen())2964traceMsg(comp,"\ttargetReg is not init and size %d > 1 so gen MVC with size targetRegSize-1 = %d and leftMostByte %d\n",2965targetReg->getSize(),mvcSize,targetReg->getSize());2966generateSS1Instruction(cg, TR::InstOpCode::MVC, node,2967mvcSize-1,2968reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, targetReg->getSize()),2969reuseS390LeftAlignedMemoryReference(sourceMR, srcNode, srcReg->getStorageReference(), cg, targetReg->getSize()));2970}29712972bool isSignManipulation = false;2973if (isSrcSign0xF)2974{2975cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), destMR, TR::DataType::getPreferredMinusCode(), srcReg, 0, false); // digitsToClear=0, numericNibbleIsZero=false2976if (targetReg->getDataType() == TR::PackedDecimal && targetReg->isEvenPrecision())2977cg->genZeroLeftMostDigitsIfNeeded(node, targetReg, targetReg->getSize(), 1, destMR);2978}2979else if (isSimpleSignFlip)2980{2981isSignManipulation = true;2982if (cg->traceBCDCodeGen())2983traceMsg(comp,"\tsrcReg has known preferred (%s) or known clean (%s) sign so gen XI 0x1 of sign byte to flip it\n",2984srcReg->hasKnownPreferredSign()?"yes":"no",srcReg->hasKnownCleanSign()?"yes":"no");2985generateSIInstruction(cg, TR::InstOpCode::XI, node, reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, 1), 0x01);2986if (targetReg->getDataType() == TR::PackedDecimal && targetReg->isEvenPrecision())2987cg->genZeroLeftMostDigitsIfNeeded(node, targetReg, targetReg->getSize(), 1, destMR);29882989}2990else if (useRegBasedSequence)2991{2992isSignManipulation = true;29932994if (cg->traceBCDCodeGen())2995traceMsg(comp,"\ttargetReg has unknown but valid sign so generate register based decode sequence\n");29962997TR::Register *tempSign = cg->allocateRegister();2998TR::Register *targetSign = cg->allocateRegister();2999TR::Register *targetData = cg->allocateRegister();30003001generateRXInstruction(cg, TR::InstOpCode::LB, node, tempSign, reuseS390LeftAlignedMemoryReference(sourceMR, srcNode, srcReg->getStorageReference(), cg, 1));30023003generateRRInstruction(cg, TR::InstOpCode::LR, node, targetSign, tempSign);3004generateRRInstruction(cg, TR::InstOpCode::LR, node, targetData, tempSign);30053006generateRIInstruction(cg, TR::InstOpCode::AHI, node, tempSign, 1);3007generateRIInstruction(cg, TR::InstOpCode::NILL, node, targetData, 0xF0);30083009if (targetReg->getDataType() == TR::PackedDecimal && targetReg->isEvenPrecision())3010cg->genZeroLeftMostDigitsIfNeeded(node, targetReg, targetReg->getSize(), 1, destMR);30113012if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12))3013generateRIEInstruction(cg, TR::InstOpCode::RISBGN, node, targetData, tempSign, 63, 63, 64-3);3014else3015generateRIEInstruction(cg, TR::InstOpCode::RISBG, node, targetData, tempSign, 63, 63, 64-3);30163017generateRRInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::NGR : TR::InstOpCode::NR, node, targetSign, targetData);3018generateRILInstruction(cg, TR::InstOpCode::XILF, node, targetSign, 13);30193020generateRXInstruction(cg, TR::InstOpCode::STC, node, targetSign, reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, 1));30213022cg->stopUsingRegister(tempSign);3023cg->stopUsingRegister(targetSign);3024cg->stopUsingRegister(targetData);3025}3026else3027{3028// This path used to contain a call to an API which would have returned a garbage result. Rather than 100% of the3029// time generating an invalid sequence here which is guaranteed to crash if executed, we fail the compilation.3030cg->comp()->failCompilation<TR::CompilationException>("Existing code relied on an unimplemented API and is thus not safe. See eclipse/omr#5937.");3031}30323033if (isSignManipulation)3034{3035if (srcReg->hasKnownPreferredSign())3036targetReg->setHasKnownPreferredSign();3037else if (srcReg->hasAssumedPreferredSign())3038targetReg->setHasAssumedPreferredSign();3039else3040targetReg->setSignStateInitialized();30413042if (srcReg->hasKnownValidSign())3043targetReg->setHasKnownValidSign();3044}30453046targetReg->transferDataState(srcReg);3047targetReg->setIsInitialized();30483049node->setRegister(targetReg);3050cg->decReferenceCount(srcNode);3051cg->traceBCDExit("pdneg",node);3052return targetReg;3053}30543055TR_PseudoRegister *3056J9::Z::TreeEvaluator::evaluateBCDValueModifyingOperand(TR::Node * node,3057bool initTarget,3058TR::MemoryReference *sourceMR,3059TR::CodeGenerator * cg,3060bool trackSignState,3061int32_t sourceSize,3062bool alwaysLegalToCleanSign) // alwaysLegalToCleanSign=true then a ZAP can be used to init/widen if another signMod inst is coming (e.g. AP)3063{3064TR_ASSERT(node->getType().isBCD(),"node %p type %s must be BCD\n",node,node->getDataType().toString());3065TR_OpaquePseudoRegister *reg = evaluateValueModifyingOperand(node, initTarget, sourceMR, cg, trackSignState, sourceSize, alwaysLegalToCleanSign);3066TR_PseudoRegister *pseudoReg = reg->getPseudoRegister();3067TR_ASSERT(pseudoReg,"pseudoReg should be non-NULL for node %p\n",node);3068return pseudoReg;3069}307030713072TR_OpaquePseudoRegister *3073J9::Z::TreeEvaluator::evaluateValueModifyingOperand(TR::Node * node,3074bool initTarget,3075TR::MemoryReference *sourceMR,3076TR::CodeGenerator * cg,3077bool trackSignState,3078int32_t sourceSize,3079bool alwaysLegalToCleanSign) // alwaysLegalToCleanSign=true then a ZAP can be used to init/widen if another signMod inst is coming (e.g. AP)3080{3081bool isBCD = node->getType().isBCD();3082bool isAggr = node->getType().isAggregate();3083TR_ASSERT(isBCD || isAggr,"node %p type %s must be BCD or aggregate\n",node,node->getDataType().toString());30843085TR_OpaquePseudoRegister *targetReg = isBCD ? cg->allocatePseudoRegister(node->getDataType()) : cg->allocateOpaquePseudoRegister(node->getDataType());3086TR_PseudoRegister *targetBCDReg = targetReg->getPseudoRegister();30873088TR::Node *firstChild = node->getFirstChild();3089TR_OpaquePseudoRegister *firstReg = cg->evaluateOPRNode(firstChild);3090TR_PseudoRegister *firstBCDReg = firstReg->getPseudoRegister();3091TR_StorageReference *firstStorageReference = firstReg->getStorageReference();3092TR::Compilation *comp = cg->comp();30933094bool isInitialized = firstReg->isInitialized();3095if (cg->traceBCDCodeGen())3096traceMsg(comp,"\tevaluateValueModifyingOperand for %s (%p) with targetReg %s and firstReg %s (#%d isInit %s), sourceSize=%d : initTarget=%s, alwaysLegalToCleanSign=%s\n",3097node->getOpCode().getName(),node,cg->getDebug()->getName(targetReg),cg->getDebug()->getName(firstReg),3098firstStorageReference->getReferenceNumber(),isInitialized ? "yes":"no",sourceSize,initTarget ? "yes":"no",alwaysLegalToCleanSign ? "yes":"no");30993100if (sourceSize == 0)3101sourceSize = firstReg->getSize();31023103bool useZAP = false;31043105// to avoid a clobber evaluate in the isInitialized case favour initializing to an available store hint and leave the isInitialized child untouched3106// also force to a new hint even if refCount==1 if there is ZAP widening to be done (and save a later clear)3107bool useNewStoreHint = !comp->getOption(TR_DisableNewStoreHint) &&3108node->getOpCode().canHaveStorageReferenceHint() &&3109initTarget && // have to also be initializing here otherwise in caller3110node->getStorageReferenceHint() &&3111node->getStorageReferenceHint()->isNodeBasedHint() &&3112(firstChild->getReferenceCount() > 1 || node->getStorageReferenceSize() > sourceSize) &&3113node->getStorageReferenceHint() != firstStorageReference;31143115if (useNewStoreHint && node->getStorageReferenceHint()->getSymbolSize() < node->getStorageReferenceSize())3116{3117useNewStoreHint = false;3118TR_ASSERT(false,"a storageRef hint should be big enough for the node result (%d is not >= %d)\n",3119node->getStorageReferenceHint()->getSymbolSize(),node->getStorageReferenceSize());3120}31213122if (isInitialized && !useNewStoreHint)3123{3124// Save the storage reference dependent state leftAlignedZeroDigits, rightAlignedDeadBytes and the derived liveSymbolSize before3125// the possible call to ssrClobberEvaluate below.3126// If a clobber evaluate is done then the above mentioned state will be reset on firstReg (so subsequent commoned uses of firstReg that now3127// use the newly created temporary storage reference are correct). Cache the values here as this state *will* persist up this tree on the targetReg.3128int32_t savedLiveSymbolSize = firstReg->getLiveSymbolSize();3129int32_t savedLeftAlignedZeroDigits = firstReg->getLeftAlignedZeroDigits();3130int32_t savedRightAlignedDeadBytes = firstReg->getRightAlignedDeadBytes();3131int32_t savedRightAlignedIgnoredBytes = firstReg->getRightAlignedIgnoredBytes();3132bool skipClobberEvaluate = false;3133if (node->getOpCode().isBasicOrSpecialPackedArithmetic())3134{3135// The special case of mul/add/sub/div = op1*op1 does not need a clobber evaluate as there are no uses beyond the current node's operation3136if (node->getNumChildren() > 1 &&3137node->getFirstChild() == node->getSecondChild() &&3138node->getFirstChild()->getReferenceCount() == 2 &&3139firstStorageReference->getOwningRegisterCount() == 1)3140{3141skipClobberEvaluate = true;3142}3143}3144if (!skipClobberEvaluate)3145cg->ssrClobberEvaluate(firstChild, sourceMR);3146int32_t resultSize = node->getStorageReferenceSize();3147if (cg->traceBCDCodeGen())3148traceMsg(comp,"\tisInitialized==true: liveSymSize %d (symSize %d - firstReg->deadAndIgnoredBytes %d), resultSize = %d (nodeSize %d)\n",3149savedLiveSymbolSize,firstStorageReference->getSymbolSize(),firstReg->getRightAlignedDeadAndIgnoredBytes(),resultSize,node->getSize());3150if (savedLiveSymbolSize < resultSize)3151{3152// In this case the source memory slot has been initialized but it is no longer larger enough to contain the result for the current node.3153// Therefore either the size of the symbol must be increased (for autos) or a new larger, memory slot must be created and initialized (for non-autos)3154if (firstStorageReference->isTemporaryBased())3155{3156if (cg->traceBCDCodeGen())3157{3158traceMsg(comp,"\treg->getLiveSymbolSize() < resultSize (%d < %d) so call increaseTemporarySymbolSize\n",savedLiveSymbolSize,resultSize);3159traceMsg(comp,"\t\t * setting rightAlignedDeadBytes %d from firstReg %s to targetReg %s (valueMod incSize)\n",3160savedRightAlignedDeadBytes,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));3161traceMsg(comp,"\t\t * setting rightAlignedIgnoredBytes %d from firstReg %s to targetReg %s (valueMod incSize)\n",3162savedRightAlignedIgnoredBytes,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));3163}3164targetReg->setStorageReference(firstStorageReference, node);3165targetReg->increaseTemporarySymbolSize(resultSize - savedLiveSymbolSize);3166targetReg->setRightAlignedDeadBytes(savedRightAlignedDeadBytes);3167targetReg->setRightAlignedIgnoredBytes(savedRightAlignedIgnoredBytes);3168}3169else3170{3171if (cg->traceBCDCodeGen())3172traceMsg(comp,"\t\tfirstStorageReference is not temporary based and liveSymSize < resultSize (%d < %d) so alloc and init a new temp slot and clear left most bytes\n",3173savedLiveSymbolSize,resultSize);3174int32_t destLength = resultSize;3175int32_t srcLength = sourceSize;3176// If the firstStorageReference is not a temp or a hint then the recursive dec in setStorageReference() will be wrong.3177// This should always be true because this is the initialized case and it is not legal to initialize a non-temp or non-hint.3178TR_ASSERT( firstStorageReference->isNodeBasedHint(), "expecting the srcStorargeReference to be a node based hint\n");3179bool performExplicitWidening = false;3180cg->initializeNewTemporaryStorageReference(node, targetReg, destLength, firstChild, firstReg, srcLength, sourceMR, performExplicitWidening, alwaysLegalToCleanSign, trackSignState);3181if (targetBCDReg)3182{3183TR_ASSERT(firstBCDReg,"firstBCDReg should be non-NULL when targetBCDReg is non-NULL for node %p\n",firstChild);3184if (performExplicitWidening)3185targetBCDReg->setDecimalPrecision(node->getDecimalPrecision());3186else3187targetBCDReg->setDecimalPrecision(firstBCDReg->getDecimalPrecision());3188}3189else3190{3191if (performExplicitWidening)3192targetReg->setSize(node->getSize());3193else3194targetReg->setSize(firstReg->getSize());3195}3196}3197}3198else3199{3200if (cg->traceBCDCodeGen())3201{3202traceMsg(comp,"\tliveSymSize >= resultSize (%d >= %d) so can reuse the firstStorageReference #%d for the targetStorageReference\n",3203savedLiveSymbolSize,resultSize,firstStorageReference->getReferenceNumber());3204traceMsg(comp,"\t\t * setting rightAlignedDeadBytes %d from firstReg %s to targetReg %s (valueMod reuse)\n",3205savedRightAlignedDeadBytes,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));3206traceMsg(comp,"\t\t * setting rightAlignedIgnoredBytes %d from firstReg %s to targetReg %s (valueMod reuse)\n",3207savedRightAlignedIgnoredBytes,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));3208traceMsg(comp,"\t\t * setting savedLeftAlignedZeroDigits %d from firstReg %s to targetReg %s (valueMod reuse)\n",3209savedLeftAlignedZeroDigits,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));3210}3211targetReg->setStorageReference(firstStorageReference, node);3212targetReg->setLeftAlignedZeroDigits(savedLeftAlignedZeroDigits);3213targetReg->setRightAlignedDeadBytes(savedRightAlignedDeadBytes);3214targetReg->setRightAlignedIgnoredBytes(savedRightAlignedIgnoredBytes);3215}3216targetReg->setIsInitialized();3217cg->freeUnusedTemporaryBasedHint(node);3218}3219else3220{3221// when initializing the hint storage reference use the symbol size and not the current node size so the same storage reference may be used3222// without further zero initialization for larger node sizes3223TR_StorageReference *targetStorageReference = NULL;3224int32_t destLength = 0;3225if (node->getOpCode().canHaveStorageReferenceHint() && node->getStorageReferenceHint())3226{3227int32_t resultSize = node->getStorageReferenceSize();3228targetStorageReference = node->getStorageReferenceHint();3229if (cg->traceBCDCodeGen())3230traceMsg(comp,"\tusing storageRefHint #%d on node %p (useNewStoreHintOnInit=%d)\n",targetStorageReference->getReferenceNumber(),node,useNewStoreHint && isInitialized);3231if (targetStorageReference->isTemporaryBased())3232{3233// Consider this scenario (common when a sub-expression is rooted in a load of a large value returned from a runtime routine)3234//3235// store3236// x <- size < 103237// y <- current node size=103238// z <- size > 10 and a passThrough operation3239// load <- size > 103240//3241// The temporary hint is the size of z but if performExplicitWidening is also set to true below then code will be generated to initialize up3242// to the size of z even though this extra initialized space will be unused for the rest of the operation.3243// Nodes (x,y,z) that share the same hint are tracked and removed when the node is evaluated. At the current node's (y) initialization point3244// only x,y will be in this list and only up to size=10 will be initialized.3245destLength = targetStorageReference->getMaxSharedNodeSize();3246}3247}3248else3249{3250targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(node->getStorageReferenceSize(), comp);3251if (cg->traceBCDCodeGen())3252traceMsg(comp,"\tcreated new targetStorageReference #%d on node %p\n",targetStorageReference->getReferenceNumber(),node);3253}32543255if (destLength > 0)3256{3257// update the symSize so in the initTarget=false case a consumer will not do a needlessly large initialization3258targetStorageReference->getTemporarySymbol()->setActiveSize(destLength);3259if (cg->traceBCDCodeGen())3260traceMsg(comp,"\tsetting destLength and activeSize for initialization based on the smallest remaining node left on the temp based hint #%d : %d\n",3261targetStorageReference->getReferenceNumber(),destLength);3262}3263else if (destLength == 0)3264{3265destLength = targetStorageReference->getSymbolSize();3266if (cg->traceBCDCodeGen())3267traceMsg(comp,"\tsetting destLength for initialization based on the current storageRef #%d size : %d\n",targetStorageReference->getReferenceNumber(),destLength);3268}3269else3270{3271TR_ASSERT(false,"unexpected negative destLength of %d for node %p\n",destLength,node);3272}32733274targetReg->setStorageReference(targetStorageReference, node);3275if (initTarget)3276{3277int32_t srcLength = sourceSize;3278TR::MemoryReference *destMR = isBCD ?3279generateS390RightAlignedMemoryReference(node, targetStorageReference, cg) :3280generateS390MemRefFromStorageRef(node, targetStorageReference, cg);3281// for packed to packed operations this is likely the start of some (possibly large) computation so *do* perform the explicit widening all at once at3282// the start so later operations do not have to clear.3283bool performExplicitWidening = targetReg->getDataType() == TR::PackedDecimal && firstReg->getDataType() == TR::PackedDecimal;32843285int32_t zeroDigits = firstReg->getLeftAlignedZeroDigits();3286if (isBCD &&3287zeroDigits > 0 &&3288zeroDigits > targetReg->getLeftAlignedZeroDigits() &&3289firstReg->getLiveSymbolSize() == targetReg->getLiveSymbolSize() &&3290cg->storageReferencesMatch(targetStorageReference, firstStorageReference))3291{3292if (cg->traceBCDCodeGen())3293traceMsg(comp,"\ty^y : transfer leftAlignedZeroDigits %d from firstReg %s to targetReg %s (node %s %p)\n",3294zeroDigits,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg),node->getOpCode().getName(),node);3295targetReg->setLeftAlignedZeroDigits(zeroDigits);3296}32973298cg->initializeStorageReference(node, targetReg, destMR, destLength, firstChild, firstReg, sourceMR, srcLength, performExplicitWidening, alwaysLegalToCleanSign, trackSignState);3299if (targetBCDReg)3300{3301TR_ASSERT(firstBCDReg,"firstBCDReg should be non-NULL when targetBCDReg is non-NULL for node %p\n",firstChild);3302if (performExplicitWidening)3303targetBCDReg->setDecimalPrecision(node->getDecimalPrecision());3304else3305targetBCDReg->setDecimalPrecision(firstBCDReg->getDecimalPrecision());3306targetBCDReg->transferDataState(firstBCDReg);3307}3308else3309{3310if (performExplicitWidening)3311targetReg->setSize(node->getSize());3312else3313targetReg->setSize(firstReg->getSize());3314}3315targetReg->setIsInitialized();3316}3317}3318if (cg->traceBCDCodeGen() && targetReg->getStorageReference()->isReadOnlyTemporary())3319traceMsg(comp,"reset readOnlyTemp flag on storageRef #%d (%s) (valueMod case)\n",3320targetReg->getStorageReference()->getReferenceNumber(),cg->getDebug()->getName(targetReg->getStorageReference()->getSymbol()));3321targetReg->getStorageReference()->setIsReadOnlyTemporary(false, NULL);3322node->setRegister(targetReg);3323return targetReg;3324}33253326/**3327* Handles all BCD and aggregate load and const types direct and indirect3328*3329* pdload3330* pdloadi3331*3332* zdload3333* zdloadi3334*3335* zdsleLoad3336* zdsleLoadi3337*3338* zdslsLoad3339* zdslsLoadi3340*3341* zdstsLoad3342* zdstsLoadi3343*3344* udLoad3345* udLoadi3346*3347* udstLoad3348* udstLoadi3349*3350* udslLoad3351* udslLoadi3352*/3353TR::Register *J9::Z::TreeEvaluator::pdloadEvaluator(TR::Node *node, TR::CodeGenerator *cg)3354{3355cg->traceBCDEntry("pdload",node);3356TR::Register* reg = NULL;33573358cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),33591, TR::DebugCounter::Cheap);3360static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");3361if((cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv) &&3362(node->getOpCodeValue() == TR::pdload || node->getOpCodeValue() == TR::pdloadi))3363{3364reg = pdloadVectorEvaluatorHelper(node, cg);3365}3366else3367{3368reg = pdloadEvaluatorHelper(node, cg);3369}33703371cg->traceBCDExit("pdload",node);3372return reg;3373}337433753376TR::Register *J9::Z::TreeEvaluator::pdloadEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)3377{3378TR::Compilation *comp = cg->comp();33793380bool isBCD = node->getType().isBCD();33813382TR_ASSERT(node->getOpCode().isLoadConst() ||3383(node->getOpCode().hasSymbolReference() && node->getSymbolReference() && !node->getSymbolReference()->isTempVariableSizeSymRef()),3384"load node %p must not be of a tempVariableSizeSymRef\n",node);33853386TR_StorageReference *storageRef = TR_StorageReference::createNodeBasedStorageReference(node, node->getReferenceCount(), comp);33873388TR_ASSERT(!node->getOpCode().isLoadConst() || node->getNumChildren() == 1,"BCD constant type (%s) should have 1 child and not %d children\n",3389node->getDataType().toString(),node->getNumChildren());3390bool isConstant = node->getOpCode().isLoadConst();3391bool isReadOnlyConstant = false;33923393TR_OpaquePseudoRegister *targetReg = NULL;3394if (isBCD)3395{3396targetReg = cg->allocatePseudoRegister(node->getDataType());3397TR_PseudoRegister *targetPseudoReg = targetReg->getPseudoRegister();3398TR_ASSERT(targetPseudoReg,"targetPseudoReg should be non-NULL for node %p\n",node);3399targetPseudoReg->setStorageReference(storageRef, node);3400if (isConstant)3401{3402if (cg->traceBCDCodeGen())3403traceMsg(comp,"\t%s (%p) is a constant load so set hasKnownValidSignAndData = true%s\n",3404node->getOpCode().getName(),node,isReadOnlyConstant?" and skip privatizeStorageReference":"");3405targetPseudoReg->setHasKnownValidSignAndData();3406}34073408if (node->hasKnownOrAssumedSignCode())3409{3410switch (node->getKnownOrAssumedSignCode())3411{3412case raw_bcd_sign_0xc:3413node->hasKnownSignCode() ? targetPseudoReg->setKnownSignCode(0xc) : targetPseudoReg->setAssumedSignCode(0xc);3414break;3415case raw_bcd_sign_0xd:3416node->hasKnownSignCode() ? targetPseudoReg->setKnownSignCode(0xd) : targetPseudoReg->setAssumedSignCode(0xd);3417break;3418case raw_bcd_sign_0xf:3419if (node->hasKnownOrAssumedCleanSign())3420{3421// Something has gone wrong and we've ended up with conflicting sign code properties on the node3422// This is a bug and should be fixed but in a prod build conservatively reset the clean sign flag and3423// do transfer the sign to the targetPseudoReg3424TR_ASSERT(false,"conflicting sign code: sign code 0xf is not clean\n");3425node->setHasKnownAndAssumedCleanSign(false);3426}3427else3428{3429node->hasKnownSignCode() ? targetPseudoReg->setKnownSignCode(0xf) : targetPseudoReg->setAssumedSignCode(0xf);3430}3431break;3432case raw_bcd_sign_unknown:3433break;3434default: TR_ASSERT(false,"unexpected node->getKnownOrAssumedSignCode() of %d\n",node->getKnownOrAssumedSignCode());3435}3436}34373438if (!node->getOpCode().isSignlessBCDType() && node->hasKnownOrAssumedCleanSign())3439{3440uint32_t preferredPlusSign = TR::DataType::getPreferredPlusSignCode(node->getDataType());3441uint32_t preferredMinusSign = TR::DataType::getPreferredMinusSignCode(node->getDataType());3442if (node->isNonNegative()) // >= 03443node->hasKnownCleanSign() ? targetPseudoReg->setKnownSignCode(preferredPlusSign) : targetPseudoReg->setAssumedSignCode(preferredPlusSign);3444else if (node->isNonZero() && node->isNonPositive()) // < 03445node->hasKnownCleanSign() ? targetPseudoReg->setKnownSignCode(preferredMinusSign) : targetPseudoReg->setAssumedSignCode(preferredMinusSign);3446if (cg->traceBCDCodeGen() && targetPseudoReg->hasKnownOrAssumedSignCode())3447traceMsg(comp,"\ttargetPseudoReg has%sSignCode = true and it is 0x%x\n",targetPseudoReg->hasAssumedSignCode()?"Assumed":"Known",targetPseudoReg->getKnownOrAssumedSignCode());3448// call setHasCleanSign() after the set*SignCode() calls so the TR::DataType::getPreferredMinusCode() does not unset3449// the clean flag (as it must conservatively do to account for the unclean case of -0)3450if (cg->traceBCDCodeGen())3451traceMsg(comp,"\tsetting Has%sCleanSign (due to node flag) on targetPseudoReg %s on %s (%p)\n",3452node->hasKnownCleanSign()?"Known":"Assumed",cg->getDebug()->getName(targetPseudoReg),node->getOpCode().getName(),node);3453node->hasKnownCleanSign() ? targetPseudoReg->setHasKnownCleanSign() : targetPseudoReg->setHasAssumedCleanSign();3454}34553456// set decimal precision here so any copy made in privatizeStorageReference is marked with the correct precision3457targetPseudoReg->setDecimalPrecision(node->getDecimalPrecision());34583459if (comp->fej9()->assumeLeftMostNibbleIsZero() && targetPseudoReg->isEvenPrecision() && TR::DataType::getDigitSize(node->getDataType()) == HalfByteDigit)3460targetPseudoReg->setLeftMostNibbleClear();34613462if (storageRef->isTemporaryBased())3463{3464TR_ASSERT(false,"storageRef for load node %p should not be temp based\n");3465if (cg->traceBCDCodeGen())3466traceMsg(comp,"\tstorageRef is tempBased so set targetReg %s to isInitialized=true\n",cg->getDebug()->getName(targetPseudoReg));3467targetPseudoReg->setIsInitialized();3468}34693470if (cg->traceBCDCodeGen())3471{3472traceMsg(comp,"\tsignState on targetReg %s for %s (%p) :\n",cg->getDebug()->getName(targetPseudoReg),node->getOpCode().getName(),node);3473traceMsg(comp,"\t\tknownCleanSign=%d, knownPrefSign=%d, knownSign=0x%x, assumedCleanSign=%d, assumedPrefSign=%d, assumedSign=0x%x (signStateKnown %d, signStateAssumed %d)\n",3474targetPseudoReg->hasKnownCleanSign(),targetPseudoReg->hasKnownPreferredSign(),targetPseudoReg->hasKnownSignCode()?targetPseudoReg->getKnownSignCode():0,3475targetPseudoReg->hasAssumedCleanSign(),targetPseudoReg->hasAssumedPreferredSign(),targetPseudoReg->hasAssumedSignCode()?targetPseudoReg->getAssumedSignCode():0,3476targetPseudoReg->signStateKnown(),3477targetPseudoReg->signStateAssumed());3478traceMsg(comp,"\t%s (%p) has hasSignStateOnLoad=%d\n",node->getOpCode().getName(),node,node->hasSignStateOnLoad());3479}34803481if (!node->hasSignStateOnLoad())3482{3483// even if a particular sign state is not known (i.e. clean,preferred, a particular value) knowing that a load does not have3484// any incoming sign state can help in generating better code (e.g. a ZAP can be used for widening as the side effect of cleaning3485// the sign will not matter vs using a ZAP to widen and illegally modifying a loaded value with an unsigned sign code 0xf->0xc)3486targetPseudoReg->setSignStateInitialized();3487if (cg->traceBCDCodeGen())3488traceMsg(comp,"\tsetting SignStateInitialized due to hasSignStateOnLoad=false flag on %s (%p)\n",node->getOpCode().getName(),node);3489}3490}3491else3492{3493targetReg = cg->allocateOpaquePseudoRegister(node->getDataType());3494targetReg->setStorageReference(storageRef, node);3495}3496node->setRegister(targetReg);3497if (comp->getOption(TR_ForceBCDInit) || !isReadOnlyConstant)3498cg->privatizeStorageReference(node, targetReg, NULL);3499return targetReg;3500}35013502/**3503* \brief This helper uses vector instructions to evaluate pdload and pdloadi.3504*3505* Other types of load (zd, ud, etc) can't use vector registers/instructions.3506*/3507TR::Register*3508J9::Z::TreeEvaluator::pdloadVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)3509{3510TR_ASSERT(node->getOpCodeValue() == TR::pdload || node->getOpCodeValue() == TR::pdloadi, "vector instructions only support PD load.");3511traceMsg(cg->comp(), "pdload Vector Evaluator, node=%p %d\n", node, __LINE__);35123513TR::Register* vTargetReg = vTargetReg = cg->allocateRegister(TR_VRF);3514TR::Node* addressNode = node->getFirstChild();35153516// No need to evaluate the address node of the pdloadi.3517// generateVSIInstruction() API will call separateIndexRegister() to separate the index3518// register by emitting an LA instruction. If there's a need for large displacement adjustment,3519// LAY will be emitted instead.3520TR::MemoryReference* sourceMR = TR::MemoryReference::create(cg, node);35213522// Index of the first byte to load, counting from the right ranging from 0-15.3523uint8_t indexFromTheRight = TR_VECTOR_REGISTER_SIZE - 1;3524if (node->getDecimalPrecision() > TR_MAX_INPUT_PACKED_DECIMAL_PRECISION)3525{3526// we are loading as many digits as we can starting from the right most digit of the PD in memory3527// Need to calculate offset in order to load this way3528sourceMR->addToOffset(node->getSize() - TR_VECTOR_REGISTER_SIZE);3529}3530else3531{3532indexFromTheRight = node->getSize() - 1;3533}35343535TR_ASSERT(indexFromTheRight >= 0 && indexFromTheRight <= 15, "Load length too large for VLRL instruction");3536if(cg->traceBCDCodeGen())3537{3538traceMsg(cg->comp(),"\tGen VLRL for %s node->size=%d\n",3539node->getOpCode().getName(),3540node->getSize());3541}3542generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, vTargetReg, sourceMR, indexFromTheRight);35433544node->setRegister(vTargetReg);3545cg->decReferenceCount(addressNode);3546return vTargetReg;3547}35483549/**3550* A ZAP with an overlapping dest (1st operand) and source (2nd operand) are allowed if the rightmost byte3551* of the 1st operand is coincident with or to the right of the rightmost byte of the second operand3552* Check for this special case here to allow it.3553*3554* pdstorei <mustClean> s=8 bytes3555* aiadd3556* aload3557* iconst 3863558* pdloadi s=5 bytes3559* aiadd3560* aload3561* iconst 3883562*3563* In this example the store is from 386->394 and the load from 388->393 so the rightmost byte (393->394) of the 1st operand (store) of the ZAP3564* is to the right of the rightmost byte of the 2nd operand (load) at 392->3933565*/3566bool3567isLegalOverlappingZAP(TR::Node *store, TR::CodeGenerator *cg)3568{3569TR::Compilation *comp = cg->comp();35703571if (cg->traceBCDCodeGen())3572traceMsg(comp,"\tisLegalOverlappingZAP check : store %s (%p), valueChild %s (%p)\n",3573store->getOpCode().getName(),store,store->getValueChild()->getOpCode().getName(),store->getValueChild());35743575if (!store->getOpCode().isStoreIndirect())3576return false;35773578TR::Node *load = store->getValueChild();3579if (!load->getOpCode().isLoadIndirect())3580return false;35813582if (load->getRegister())3583return false;35843585if (load->hasKnownOrAssumedCleanSign()) // won't need a ZAP anyway so don't bother going further3586return false;35873588TR::Node *storeAddr = store->getFirstChild();3589TR::Node *loadVarAddr = load->getFirstChild();35903591if (!cg->isSupportedAdd(storeAddr))3592return false;35933594if (!cg->isSupportedAdd(loadVarAddr))3595return false;35963597if (!cg->nodeMatches(storeAddr->getFirstChild(), loadVarAddr->getFirstChild()))3598return false;35993600if (!storeAddr->getSecondChild()->getOpCode().isIntegralConst())3601return false;36023603if (!loadVarAddr->getSecondChild()->getOpCode().isIntegralConst())3604return false;36053606int64_t storeSize = store->getSize();3607int64_t loadSize = load->getSize();36083609int64_t storeAddrOffset = storeAddr->getSecondChild()->get64bitIntegralValue() + store->getSymbolReference()->getOffset();3610int64_t loadAddrOffset = loadVarAddr->getSecondChild()->get64bitIntegralValue() + load->getSymbolReference()->getOffset();36113612int64_t storeStart = storeAddrOffset;3613int64_t storeEnd = storeStart + storeSize;36143615int64_t loadStart = loadAddrOffset;3616int64_t loadEnd = loadStart + loadSize;36173618if (cg->traceBCDCodeGen())3619{3620int64_t overlapStart = std::max(storeStart, loadStart);3621int64_t overlapEnd = std::min(storeEnd, loadEnd);3622traceMsg(comp,"\tstoreRange %lld->%lld vs loadRange %lld->%lld --> overlap range %lld -> %lld\n",3623storeStart,storeEnd,loadStart,loadEnd,overlapStart,overlapEnd);3624}36253626if (storeEnd >= loadEnd)3627{3628if (cg->traceBCDCodeGen())3629traceMsg(comp,"\t\tstoreEnd %lld >= loadEnd %lld : overlap ZAP is legal\n",storeEnd, loadEnd);3630return true;3631}3632else3633{3634if (cg->traceBCDCodeGen())3635traceMsg(comp,"\t\tstoreEnd %lld < loadEnd %lld : overlap ZAP is NOT legal\n",storeEnd, loadEnd);3636return false;3637}3638}36393640/**3641* This evaluator handles the following packed (pd) and unpacked (zd, ud)3642* direct/indirect store operations3643*3644* pdstore3645* pdstorei3646*3647* zdstore3648* zdstorei3649*3650* zdsleStore3651* zdsleStorei3652*3653* zdslsStore3654* zdslsStorei3655*3656* zdstsStore3657* zdstsStorei3658*3659* udStore3660* udStorei3661*3662* udstStore3663* udstStorei3664*3665* udslStore3666* udslStorei3667*/3668TR::Register*3669J9::Z::TreeEvaluator::pdstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)3670{3671cg->traceBCDEntry("pdstore",node);3672cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),36731, TR::DebugCounter::Cheap);36743675static bool disablePdstoreVectorEvaluator = (feGetEnv("TR_DisablePdstoreVectorEvaluator") != NULL);3676static bool disableZdstoreVectorEvaluator = (feGetEnv("TR_DisableZdstoreVectorEvaluator") != NULL);36773678if (!cg->comp()->getOption(TR_DisableVectorBCD) && !disablePdstoreVectorEvaluator3679&& cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL)3680&& (node->getOpCodeValue() == TR::pdstore || node->getOpCodeValue() == TR::pdstorei))3681{3682pdstoreVectorEvaluatorHelper(node, cg);3683}3684else if (!cg->comp()->getOption(TR_DisableVectorBCD) && !disableZdstoreVectorEvaluator3685&& cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_2)3686&& node->getOpCodeValue() == TR::zdstorei3687&& node->getSecondChild()->getReferenceCount() == 13688&& node->getSecondChild()->getRegister() == NULL3689&& (node->getSecondChild())->getOpCodeValue() == TR::pd2zd3690&& ((node->getSecondChild())->getFirstChild())->getOpCodeValue() == TR::pdloadi)3691{3692zdstoreiVectorEvaluatorHelper(node, cg);3693}3694else3695{3696pdstoreEvaluatorHelper(node, cg);3697}36983699cg->traceBCDExit("pdstore",node);3700return NULL;3701}37023703TR::Register* J9::Z::TreeEvaluator::pdstoreEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)3704{3705bool isBCD = node->getType().isBCD();3706bool isAggr = node->getType().isAggregate();37073708TR::Node * valueChild = node->getValueChild();3709bool isPacked = node->getType().isAnyPacked();3710bool isIndirect = node->getOpCode().isIndirect();3711TR::Compilation *comp = cg->comp();37123713bool evaluatedPaddingAnchor = false; // store nodes may contain an extra node giving an address of padding bytes (e.g. 0xF0F0..F0 for zoned)3714bool useZAP = isPacked && node->mustCleanSignInPDStoreEvaluator();37153716TR_ASSERT(isBCD || (node->getSize() == valueChild->getSize()),"nodeSize %d != srcSize %d for node %p\n",node->getSize(),valueChild->getSize(),node);37173718// If a temp copy may be needed for a child load or passthrough operations (such as a redundant pdclean) but the pdstore location3719// will live on (skipCopyOnStore=true) then force the use of the pdstore result location for the child value (and do not generate a temp copy)3720// Note: that size check below isn't quite the same as the isByteTruncation one below (when setting isLegalToChangeCommonedChildAddress)3721// as this first one uses valueChild nodeSize instead of the valueChild regSize.3722// However in cases where the flag will be checked then the valueReg will be uninitialized so the valueChild->getSize() will equal the valueReg->getSize().3723//3724// useStoreAsAnAccumulator check is needed below as it indicates no overlap between the store and any ancestor. If there is possible overlap then setting the skipCopyOnLoad3725// flag is incorrect as commoned references will use the updated value (updated by this store) instead of the correct value from the first reference point3726// pdstore "a1" // a1 and a2 overlap in some way3727// pdload "a2"3728//...3729// =>pdload "a2" // this commoned node needs the value at first reference and not the updated value after the pdstore to "a1"3730// // if skipCopyOnLoad is set then "a2" will be loaded again at the commoned point and get the wrong value.3731bool uninitializedSourceLocationMayBeKilled = false;3732bool mustUseZAP = false;3733bool overlapZAPIsAllowed = false;3734if (valueChild->getSize() <= node->getSize() &&3735!valueChild->skipCopyOnLoad() &&3736valueChild->getReferenceCount() > 1 &&3737node->skipCopyOnStore())3738{3739bool canForceSkipCopyOnLoad = false;3740if (node->useStoreAsAnAccumulator()) // see comment above3741{3742canForceSkipCopyOnLoad = true;3743if (cg->traceBCDCodeGen())3744traceMsg(comp,"\tsetting valueChild (%s) %p setSkipCopyOnLoad=true due to store with skipCopyOnStore=true (storeAccumCase)\n",valueChild->getOpCode().getName(),valueChild);3745}3746else if (useZAP && isLegalOverlappingZAP(node, cg))3747{3748canForceSkipCopyOnLoad = true;3749mustUseZAP = true; // the overlap check and forcing of skipCopyOnLoad is only valid if we do actually end up generating a ZAP (vs an MVC for example) so make sure this happens3750overlapZAPIsAllowed = true;3751if (cg->traceBCDCodeGen())3752traceMsg(comp,"\tsetting valueChild %s (%p) setSkipCopyOnLoad=true due to store with skipCopyOnStore=true (legalOverlappingZAPCase)\n",valueChild->getOpCode().getName(),valueChild);3753}3754if (canForceSkipCopyOnLoad)3755{3756valueChild->setSkipCopyOnLoad(true);3757uninitializedSourceLocationMayBeKilled = true;3758}3759}37603761if (useZAP && valueChild->getOpCode().isPackedLeftShift())3762{3763if (cg->traceBCDCodeGen())3764traceMsg(comp,"\tsetting valueChild %p cleanSignDuringPackedLeftShift=true due to store that needs a ZAP\n",valueChild);3765valueChild->setCleanSignDuringPackedLeftShift(true);3766}37673768TR_OpaquePseudoRegister *valueReg = cg->evaluateOPRNode(valueChild);37693770if (cg->traceBCDCodeGen())3771traceMsg(comp,"\t%s (%p) : isInMemoryCopyProp=%s\n",node->getOpCode().getName(),node,node->isInMemoryCopyProp()?"yes":"no");3772// NOTE: if a temp copy is generated below then valueStorageReference and valueReg are reset to point to the temp copies3773TR_StorageReference *valueStorageReference = valueReg->getStorageReference();3774TR::MemoryReference *sourceMR = NULL;3775TR_StorageReference *tempStorageReference = NULL;3776bool nodeAndValueRegSizeMatch = node->getSize() == valueReg->getSize();3777bool allSizesMatch = false;3778if (valueStorageReference->isNonConstantNodeBased())3779{3780allSizesMatch = nodeAndValueRegSizeMatch &&3781valueReg->getSize() == valueStorageReference->getNode()->getSize();3782}3783else3784{3785allSizesMatch = nodeAndValueRegSizeMatch;3786}37873788if (valueStorageReference->isNonConstantNodeBased() &&3789comp->getOption(TR_PrivatizeOverlaps) &&3790!overlapZAPIsAllowed &&3791!(node->useStoreAsAnAccumulator() || valueReg->isInitialized()))3792{3793// In addition to when the isInMemoryCopyProp flag is set on the store there are two other cases when an temp copy is needed for overlap3794// 1) isUsingStorageRefFromAnotherStore : even with CSE commoning (so not subject to isInMemoryCopyProp flag as the IL itself is safe)3795// can result in an overlap if 'b' is updated to point to 'c' storageRef and 'd' overlap3796// This is a lazy fixup -- could also pro-actively not set skipCopyOnStore for 'c' in the first place if the stores for any of the commoned 'b' nodes3797// are in memory types (BCD/Aggr) that also overlap with 'c' (e.g. 'd' in this case)3798//3799// c3800// b3801//3802// d3803// =>b (was just 'b' before CSE) but could be 'c' after 'c' is evaluated3804//3805TR::Node *storageRefNode = valueStorageReference->getNode();3806bool isUsingStorageRefFromAnotherStore = storageRefNode->getOpCode().isStore() && storageRefNode != node;38073808// 2) The valueRegHasDeadOrIgnoredBytes check is for when a ZAP could be generated for an overlapping copy where the rightmost3809// bytes are not coincident (due to the deadOrIgnoredBytes) so go through a temp in this case too3810//3811// This also handles the case like the below (so do not bother checking useZAP along with valueRegHasDeadOrIgnoredBytes)3812// The copy is not redundant when the valueReg has some dead or ignored bytes as the right most bytes of the source3813// and target will not be coincident in this case even if the addresses exactly match3814// izdstore p = 6 "A"3815// addr13816// zdshrSetSign p = 1 --> valueReg has 5 ignored bytes3817// izdload "A" p = 63818// =>addr13819// iconst 5 // shift3820/// iconst 15 // setSign3821//3822// In this case have to move from offset +0 to offset +5 and then clear the top 5 bytes (starting at offset +0)3823// If copyIsRedundant is incorrectly set to true then only the clear of the top 5 bytes happens and the one surviving3824// digit from the zdshrSetSign is clobbered3825// MVC +0(base,L=1),+5(base) move surviving digit first3826// MVC +0(base,L-5),(constant) complete widening by setting top 5 bytes to 0xF03827bool valueRegHasDeadOrIgnoredBytes = valueReg->getRightAlignedIgnoredBytes() > 0;38283829// 3) if there is any size mismatch between the sizes of node, valueReg and storageRefNode3830//3831// if nodeSize != storageRefNodeSize then this could be a truncating copy where the data needs to be moved back a number of bytes3832// "a" and "a_alias" start at the same address (so loadOrStoreAddressesMatch will return true) but "a" is 10 bytes and "a_alias" is 13 bytes3833// The meaning of the IL below is to move the low (addr+3) 10 bytes of "a_alias" back (to the left) 3 bytes.3834// This is actual needed data movement so a copy must be done (TODO : going through a temp here but this particular size mismatch case could3835// be done with an MVC as this direction of copy is non-destructive.3836// ipdstore "a" s=103837// addr3838// ipdload "a_alias" s=13 // valueChild may not be a simple load but some commoned pdX operation that has the ipdload as its storageRefNode3839// =>addr38403841if (cg->traceBCDCodeGen())3842traceMsg(comp,"\tisInMemoryCopyProp=%s, isUsingStorageRefFromAnotherStore=%s, valueRegHasDeadOrIgnoredBytes=%s : node %s (%p), valueReg %s, storageRefNode %s (%p)\n",3843node->isInMemoryCopyProp() ? "yes":"no",3844isUsingStorageRefFromAnotherStore ? "yes":"no",3845valueRegHasDeadOrIgnoredBytes ? "yes":"no",3846node->getOpCode().getName(),node,3847cg->getDebug()->getName(valueReg),3848storageRefNode->getOpCode().getName(),storageRefNode);38493850if (cg->traceBCDCodeGen())3851traceMsg(comp,"\tallSizesMatch=%s (nodeSize=%d, valueRegSize=%d, storageRefNodeSize=%d)\n",3852allSizesMatch ? "yes":"no",node->getSize(),valueReg->getSize(),storageRefNode->getSize());38533854if (node->isInMemoryCopyProp() || isUsingStorageRefFromAnotherStore || valueRegHasDeadOrIgnoredBytes || !allSizesMatch)3855{3856// a redundant copy is an MVC with exact matching target and source. This is a nop but a very expensive nop as the hardware treats it3857// as any other overlap copy (i.e. very slowly)3858if (cg->traceBCDCodeGen())3859traceMsg(comp,"\tnode %s (%p) and source %s (%p) may overlap but first check if copy would be redundant\n",3860node->getOpCode().getName(),node,valueChild->getOpCode().getName(),valueChild);38613862bool copyIsRedundant = !valueRegHasDeadOrIgnoredBytes && allSizesMatch && cg->loadOrStoreAddressesMatch(node, valueStorageReference->getNode());38633864if (cg->traceBCDCodeGen())3865traceMsg(comp,"\tgot copyIsRedundant=%s from first test\n",copyIsRedundant?"yes":"no");38663867//Further check if there is potential destructive overlap based on storage info3868if (isAggr && !copyIsRedundant && !valueRegHasDeadOrIgnoredBytes && allSizesMatch)3869{3870if (cg->traceBCDCodeGen())3871traceMsg(comp,"\tperform test for definitelyNoDestructive overlap\n");38723873if (cg->getStorageDestructiveOverlapInfo(valueStorageReference->getNode(), valueReg->getSize(), node, node->getSize()) == TR_DefinitelyNoDestructiveOverlap)3874{3875copyIsRedundant = true;3876if (cg->traceBCDCodeGen())3877traceMsg(comp,"\t\tset copyIsRedundant=true : overlap check between node %s (%p) size=%d and valueStorageRefNode %s (%p) valueRegSize %d returns TR_DefinitelyNoDestructiveOverlap\n",3878node->getOpCode().getName(),node,node->getSize(),3879valueStorageReference->getNode()->getOpCode().getName(),valueStorageReference->getNode(),valueReg->getSize());3880}3881}38823883if (cg->traceBCDCodeGen())3884traceMsg(comp,"\t\tcopyIsRedundant=%s\n",copyIsRedundant?"yes":"no");38853886if (!copyIsRedundant)3887{3888// i.e. a simple load/store BUT load and store memory may overlap so must use a temp so MVC doesn't destructively overlap and lose some source bytes3889if (cg->traceBCDCodeGen())3890traceMsg(comp,"\tnode %s (%p) and source %s (%p) (uninitialized valueReg %s) may overlap -- must privatize valueReg\n",3891node->getOpCode().getName(),node,valueChild->getOpCode().getName(),valueChild,cg->getDebug()->getName(valueReg));38923893int32_t privatizedSize = valueReg->getSize();3894int32_t storageRefNodeSize = storageRefNode->getSize();3895if (!valueReg->isInitialized() &&3896storageRefNodeSize != privatizedSize)3897{3898// may need to increase the size of the memcpy so it captures all of the source value -- this is important for the example above of moving 10 bytes starting at addr_1+33899// back 3 bytes to addr_13900// This 13 byte copy will copy the entire original field and then the store generated by the usual pdstoreEvaluator will be MVC addr_1(10,br),addr_1+3(10,br)3901privatizedSize = storageRefNodeSize;3902if (cg->traceBCDCodeGen())3903traceMsg(comp,"\tset privatizedSize to storageRefNodeSize %d for uninit valueReg %s with mismatched storageRefNodeSize %d and valueRegSize %d\n",3904privatizedSize,cg->getDebug()->getName(valueReg),storageRefNodeSize,valueReg->getSize());39053906if (valueRegHasDeadOrIgnoredBytes)3907{3908// below IL comes from statements like : DIVIDE powerOfTenLit into var where var is an unsigned zoned type3909// zdstore s=153910// addr3911// zdshrSetSign s=12 <- passThrough with 3 rightAligned deadBytes3912// izdload s=153913// =>addr3914// iconst 3 // shift3915// iconst 0xf // sign3916//3917// in this case using an overridden size of 15 from the zdload is incorrect as there are only 12 valid bytes after the passThru zdshrSetSign3918// If the offset on the addr is less then the shift then the final offset will be < 0 and the binary encoding time assume will be hit3919// For larger offsets no compile time problem is hit but the temp copy reaches back to read bytes from before it's field (but the these bytes3920// are not actually examined so everything ends up 'working' (delta any access exceptions if this were the first field in storage)3921if (cg->traceBCDCodeGen())3922traceMsg(comp,"\t\tgetRightAlignedIgnoredBytes %d > 0 so reduce privatizedSize %d -> %d\n",3923valueReg->getRightAlignedIgnoredBytes(), privatizedSize, privatizedSize - valueReg->getRightAlignedIgnoredBytes());3924privatizedSize = privatizedSize - valueReg->getRightAlignedIgnoredBytes();3925}3926}3927TR_OpaquePseudoRegister *tempRegister = cg->privatizePseudoRegister(valueChild, valueReg, valueStorageReference, privatizedSize);3928tempStorageReference = tempRegister->getStorageReference();39293930if (cg->traceBCDCodeGen())3931{3932if (node->isInMemoryCopyProp())3933traceMsg(comp,"\ta^a : privatize needed due to isInMemoryCopyProp node %s (%p) on line_no=%d (storeCase)\n",3934node->getOpCode().getName(),node,comp->getLineNumber(node));3935if (isUsingStorageRefFromAnotherStore)3936traceMsg(comp,"\ta^a : privatize needed due to isUsingStorageRefFromAnotherStore storageRefNode %s (%p) on line_no=%d (storeCase)\n",3937storageRefNode->getOpCode().getName(),storageRefNode,comp->getLineNumber(node));3938if (valueRegHasDeadOrIgnoredBytes)3939traceMsg(comp,"\ta^a : privatize needed due to valueRegHasDeadOrIgnoredBytes valueReg %s valueChild %s (%p) on line_no=%d (storeCase)\n",3940cg->getDebug()->getName(valueReg),valueChild->getOpCode().getName(),valueChild,comp->getLineNumber(node));3941}39423943TR_ASSERT(!comp->getOption(TR_EnablePerfAsserts),"gen overlap copy on node %s (%p) on line_no=%d (storeCase)\n",3944node->getOpCode().getName(),node,comp->getLineNumber(node));39453946if (isBCD)3947sourceMR = generateS390RightAlignedMemoryReference(valueChild, tempStorageReference, cg);3948else3949sourceMR = generateS390MemRefFromStorageRef(valueChild, tempStorageReference, cg);39503951valueReg = tempRegister;3952valueStorageReference = tempStorageReference;39533954TR_ASSERT(!isBCD || valueReg->getPseudoRegister(),"valueReg must be a pseudoRegister on node %s (%p)\n",valueChild->getOpCode().getName(),valueChild);3955}3956}3957else3958{3959if (cg->traceBCDCodeGen())3960traceMsg(comp,"y^y : temp copy saved isInMemoryCopyProp = false on %s (%p) (storeCase)\n",node->getOpCode().getName(),node);3961}3962}39633964TR_PseudoRegister *bcdValueReg = NULL;3965if (valueReg->getPseudoRegister())3966{3967bcdValueReg = valueReg->getPseudoRegister();3968}39693970int32_t destSize = node->getSize();3971int32_t sourceSize = valueReg->getSize();39723973TR_ASSERT(isBCD || (destSize == sourceSize),"destSize %d != sourceSize %d for node %p\n",destSize,sourceSize,node);39743975bool isByteTruncation = sourceSize > destSize;3976bool isByteWidening = destSize > sourceSize;39773978bool isLeadingSignByteWidening = isByteWidening && node->getType().isLeadingSign();39793980useZAP = useZAP && bcdValueReg && (!bcdValueReg->hasKnownOrAssumedCleanSign() || mustUseZAP);3981//useZAP = useZAP || (isPacked && isByteTruncation); // truncating packed stores that need overflow exception should be using pdshlOverflow39823983bool preserveSrcSign = bcdValueReg && !bcdValueReg->isLegalToCleanSign();39843985bool savePreZappedValue = false;3986if (useZAP &&3987valueChild->getReferenceCount() > 1 &&3988preserveSrcSign)3989{3990savePreZappedValue = true;3991if (cg->traceBCDCodeGen())3992{3993traceMsg(comp,"\tsetting savePreZappedValue=true because valueReg (from valueChild %p with refCount %d > 1) ",valueChild,valueChild->getReferenceCount());3994if (!bcdValueReg->signStateInitialized())3995traceMsg(comp,"has an uninitialized sign state and a ZAP is to be used for the store\n");3996else3997traceMsg(comp,"has signCode 0x%x and a ZAP is to be used for the store\n", bcdValueReg->getKnownOrAssumedSignCode());3998}3999}40004001bool childContainsAccumulatedResult = valueStorageReference->isNodeBased() &&4002valueStorageReference->isNodeBasedHint() &&4003(valueStorageReference->getNode() == node);40044005if (cg->traceBCDCodeGen())4006traceMsg(comp,"\tisPacked=%s, useZAP=%s, valueReg->signStateInit()=%s, valueReg->hasKnownOrAssumedCleanSign()=%s, isByteTruncation=%s, isByteWidening=%s, destSize=%d, sourceSize=%d\n",4007isPacked?"true":"false",4008useZAP?"true":"false",4009bcdValueReg && bcdValueReg->signStateInitialized()?"true":"false",4010bcdValueReg && bcdValueReg->hasKnownOrAssumedCleanSign()?"true":"false",4011isByteTruncation?"true":"false",4012isByteWidening?"true":"false",4013destSize,4014sourceSize);40154016TR::Node *sourceNode = NULL;4017bool changeCommonedChildAddress = false;4018bool isLegalToChangeCommonedChildAddress = false;40194020TR_ASSERT( !childContainsAccumulatedResult || valueReg->isInitialized(),"an accumulated result should also be initialized\n");40214022if (!isByteTruncation &&4023!isLeadingSignByteWidening &&4024!savePreZappedValue &&4025tempStorageReference == NULL && // valueReg->setStorageReference() will not work in this case as the valueReg is pointing to the copy (tempRef count underflow)4026valueChild->getReferenceCount() > 1 &&4027node->skipCopyOnStore())4028{4029isLegalToChangeCommonedChildAddress = true;4030if (cg->traceBCDCodeGen())4031traceMsg(comp,"\tsetting isLegalToChangeCommonedChildAddress=true for valueChild %s (%p) because isByteTruncation=false, isLeadingSignByteWidening=false, refCount %d > 1, skipCopyOnStore=true and savePreZappedValue=false\n",4032valueChild->getOpCode().getName(),4033valueChild,4034valueChild->getReferenceCount());4035}40364037if (!valueStorageReference->isTemporaryBased() &&4038valueStorageReference->getNode() != node)4039{4040TR_ASSERT(!valueReg->isInitialized(),"expecting valueReg to not be initialized for valueChild %p\n",valueChild);4041TR_ASSERT(valueReg->getStorageReference()->isNodeBased(),"expecting valueReg storageRef to be nodeBased on valueChild %p\n",valueChild);4042if (valueStorageReference->getNode()->getOpCode().isStore())4043{4044if (cg->traceBCDCodeGen())4045traceMsg(comp,"found uninit storageRef node based STORE case valueChild %s (%p) and storageRefNode %s (%p)\n",4046valueChild->getOpCode().getName(),4047valueChild,4048valueStorageReference->getNode()->getOpCode().getName(),4049valueStorageReference->getNode());4050}4051else if (valueStorageReference->getNode()->getOpCode().isLoad())4052{4053if (cg->traceBCDCodeGen())4054traceMsg(comp,"found uninit storageRef node based LOAD case valueChild %s (%p) and storageRefNode %s (%p), skipCopyOnLoad storageRefNode is %s\n",4055valueChild->getOpCode().getName(),4056valueChild,4057valueStorageReference->getNode()->getOpCode().getName(),4058valueStorageReference->getNode(),4059valueStorageReference->getNode()->skipCopyOnLoad()?"yes":"no");4060}4061else4062{4063TR_ASSERT(false,"storageRefNode %p should be a load or a store node %p (%s)\n",valueStorageReference->getNode(),cg->getDebug()->getName(valueStorageReference->getNode()));4064}4065}40664067if (valueStorageReference->isTemporaryBased() || (valueStorageReference->getNode() != node))4068{4069if (cg->traceBCDCodeGen() && valueStorageReference->isTemporaryBased())4070traceMsg(comp,"\tvalueStorageReference->isTemporaryBased() case so see if changeCommonedChildAddress should be set to true\n");4071else if (cg->traceBCDCodeGen())4072traceMsg(comp,"\tvalueStorageReference->getNode() != node (%p != %p) case so see if changeCommonedChildAddress should be set to true\n",4073valueStorageReference->getNode(),node);40744075sourceNode = valueChild;4076if (isLegalToChangeCommonedChildAddress)4077{4078if (useZAP)4079{4080changeCommonedChildAddress = true;4081if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tset changeCommonedChildAddress = true due to ZAP\n");4082}4083else if (isByteWidening)4084{4085changeCommonedChildAddress = true;4086if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tset changeCommonedChildAddress = true due to byteWidening\n");4087}4088/* // disable this case, not a good enough reason for potential operand store compare4089else if (!isIndirect && valueChild->getOpCode().isIndirect()) // addressability is cheaper4090{4091changeCommonedChildAddress = true;4092if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tset changeCommonedChildAddress = true due to cheaper addressability\n");4093}4094*/4095else if (uninitializedSourceLocationMayBeKilled &&4096!valueStorageReference->isTemporaryBased() && // last two conditions are true when source location is uninitialized (passThrough operations or just a load child)4097(valueStorageReference->getNode()->getOpCode().isLoadVar() || valueStorageReference->getNode()->getOpCode().isStore()))4098{4099changeCommonedChildAddress = true;4100if (cg->traceBCDCodeGen())4101traceMsg(comp,"\t\tset changeCommonedChildAddress = true due to uninitialized storageRefNode %p with skipCopyOnLoad that was forced to true\n",valueStorageReference->getNode());4102}4103else4104{4105if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tleave changeCommonedChildAddress = false\n");4106}4107}4108else4109{4110if (cg->traceBCDCodeGen())4111traceMsg(comp,"\t\tisLegalToChangeCommonedChildAddress = false so do not attempt to look for cases to set changeCommonedChildAddress to true\n");4112}4113}4114else4115{4116TR_ASSERT( childContainsAccumulatedResult, "expecting the pdstore child node to contain the accumulated result\n");4117// If there is any byte truncation and we are in the accumulator case then this means some leftmost child of the store4118// may have written data outside the bounds of the current store and this would be (horribly) incorrect.4119// This case should never occur as hints should only be assigned when the pdstore memory location is large enough4120// to contain any leftmost result value.4121TR_ASSERT( !isByteTruncation,"byte truncation should not occur when using the pdstore as an accumulator\n");4122changeCommonedChildAddress = true;4123if (cg->traceBCDCodeGen()) traceMsg(comp,"\taccumulated hint case so unconditionally set changeCommonedChildAddress = true\n");4124}41254126if (cg->traceBCDCodeGen())4127traceMsg(comp,"\tbef legality check: changeCommonedChildAddress = %s and isLegalToChangeCommonedChildAddress=%s so final changeCommonedChildAddress=%s\n",4128changeCommonedChildAddress?"true":"false",4129isLegalToChangeCommonedChildAddress?"true":"false",4130(changeCommonedChildAddress && isLegalToChangeCommonedChildAddress)?"true":"false");41314132changeCommonedChildAddress = changeCommonedChildAddress && isLegalToChangeCommonedChildAddress;41334134// well this is unfortunate -- the valueChild has skipCopyOnLoad set on it but for some reason (likely some corner case savePreZappedValue)4135// isLegalToChangeCommonedChildAddress is false.4136// This means that it is not safe to keep using the storageRef on the valueChild past this store point so must force it to a temp4137bool mustPrivatizeValueChild = tempStorageReference == NULL && !valueReg->isInitialized() && uninitializedSourceLocationMayBeKilled && !changeCommonedChildAddress;4138if (cg->traceBCDCodeGen())4139traceMsg(comp,"\tmustPrivatizeValueChild=%s\n",mustPrivatizeValueChild?"yes":"no");41404141TR_StorageReference *targetStorageReference =4142TR_StorageReference::createNodeBasedStorageReference(node,4143changeCommonedChildAddress ? valueChild->getReferenceCount() : 1,4144comp);41454146rcount_t origValueChildRefCount = valueChild->getReferenceCount();41474148if (cg->traceBCDCodeGen())4149traceMsg(comp,"\tcreate node based targetStorageReference #%d from %s (%p) and nodeRefCount %d (%s)\n",4150targetStorageReference->getReferenceNumber(),4151node->getOpCode().getName(),4152node,4153targetStorageReference->getNodeReferenceCount(),4154changeCommonedChildAddress?"from valueChild":"fixed at 1");41554156TR::MemoryReference *targetMR = NULL;4157if (useZAP)4158{4159if (cg->traceBCDCodeGen())4160traceMsg(comp,"\tuseZAP=true so gen ZAP but first determine the zapDestSize, initial size is destSize=%d\n",destSize);4161int32_t zapDestSize = destSize;4162targetMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);4163TR::Node *sourceNodeForZAP = sourceNode;4164if (sourceNode)4165{4166if (sourceMR == NULL)4167sourceMR = generateS390RightAlignedMemoryReference(sourceNode, valueStorageReference, cg);4168cg->correctBadSign(sourceNode, bcdValueReg, sourceSize, sourceMR);4169}4170else4171{4172// when zapping a field against itself then we may be able to reduce the destSize if some of the upper bytes are already clear4173if (isByteWidening)4174{4175if (cg->traceBCDCodeGen())4176traceMsg(comp,"\t\tdestSize > sourceSize (%d > %d) so check valueReg->getLiveSymbolSize() %d against destSize %d before checking if the upper bytes are clear\n",4177destSize,sourceSize,valueReg->getLiveSymbolSize(),destSize);4178if (valueReg->getBytesToClear(sourceSize, destSize) == 0)4179{4180zapDestSize=sourceSize;4181if (cg->traceBCDCodeGen())4182traceMsg(comp,"\t\tvalueReg bytes sourceSize->destSize (%d->%d) are already clear so set zapDestSize=sourceSize=%d\n",sourceSize,destSize,sourceSize);4183}4184}4185cg->correctBadSign(node, bcdValueReg, zapDestSize, targetMR);4186// save the dead/ignored bytes here as it will be reset to 0 if savePreZappedValue is true as part of the setStorageReference call below4187int32_t savedRightAlignedDeadAndIgnoredBytes = valueReg->getRightAlignedDeadAndIgnoredBytes();4188if (savePreZappedValue)4189{4190TR_StorageReference *valueStorageReferenceCopy = TR_StorageReference::createTemporaryBasedStorageReference(sourceSize, comp);4191// when tempStorageReference != NULL then the valueReg->setStorageReference call below will not work as the temp ref count will underflow4192// valueReg in this case is actually pointing to the tempRegister created when copyMR was initialized4193// shouldn't reach here in this case as tempStorageReference is only used for the uninit and non-hint cases and this is an init path4194TR_ASSERT(tempStorageReference == NULL,"tempStorageReference == NULL should be null for node %p\n",node);4195valueReg->setStorageReference(valueStorageReferenceCopy, valueChild);4196valueReg->setIsInitialized();4197valueStorageReference = valueStorageReferenceCopy;4198if (cg->traceBCDCodeGen())4199traceMsg(comp,"\tsavePreZappedValue=true so gen MVC with sourceSize %d to copy #%d on pdstore for valueChild %p with refCnt %d\n",4200sourceSize,valueStorageReferenceCopy->getReferenceNumber(),valueChild,valueChild->getReferenceCount());4201TR::MemoryReference *targetCopyMR = generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg);4202if (savedRightAlignedDeadAndIgnoredBytes > 0)4203{4204if (cg->traceBCDCodeGen())4205traceMsg(comp,"\tadd -savedRightAlignedDeadAndIgnoredBytes = -%d to sourceMR for savePreZappedValue copy\n",savedRightAlignedDeadAndIgnoredBytes);4206targetCopyMR->addToTemporaryNegativeOffset(node, -savedRightAlignedDeadAndIgnoredBytes, cg);4207}4208generateSS1Instruction(cg, TR::InstOpCode::MVC, node,4209sourceSize-1,4210generateS390RightAlignedMemoryReference(valueChild, valueStorageReferenceCopy, cg),4211targetCopyMR);42124213}4214sourceMR = generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg); // ensure sourceMR and targetMR are the same when used for the ZAP below42154216if (savedRightAlignedDeadAndIgnoredBytes > 0)4217{4218if (cg->traceBCDCodeGen())4219traceMsg(comp,"\tadd -savedRightAlignedDeadAndIgnoredBytes = -%d to sourceMR for final ZAP\n",savedRightAlignedDeadAndIgnoredBytes);4220sourceMR->addToTemporaryNegativeOffset(node, -savedRightAlignedDeadAndIgnoredBytes, cg);4221}42224223sourceNodeForZAP = node; // so a NULL sourceNode is not passed in for the ZAP sourceMR reuse below4224}42254226if (isByteTruncation)4227{4228if (cg->traceBCDCodeGen())4229traceMsg(comp,"\tisByteTruncating ZAP so reduce sourceSize %d->%d\n",sourceSize,zapDestSize);4230sourceSize = zapDestSize;4231}42324233if (cg->traceBCDCodeGen())4234traceMsg(comp,"\tgen ZAP with zapDestSize=%d,sourceSize=%d\n",zapDestSize,sourceSize);4235generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,4236zapDestSize-1,4237reuseS390RightAlignedMemoryReference(targetMR, node, targetStorageReference, cg),4238sourceSize-1,4239reuseS390RightAlignedMemoryReference(sourceMR, sourceNodeForZAP, valueStorageReference, cg));4240}4241else4242{4243if (sourceNode)4244{4245if (cg->traceBCDCodeGen())4246traceMsg(comp,"\tuseZAP=false and sourceNode %s (%p) is non-NULL so gen MVC but first determine the mvcSize\n",4247sourceNode->getOpCode().getName(),sourceNode);4248int32_t mvcSize = sourceSize;4249if (isByteTruncation)4250{4251mvcSize = destSize;4252}4253bool needsClear = false;4254if (isByteWidening)4255{4256needsClear = true;4257if (cg->traceBCDCodeGen())4258traceMsg(comp,"\t\tdestSize > sourceSize (%d > %d) so try to reduce mvcSize by checking if the upper bytes are clear\n",4259destSize,sourceSize,valueReg->getLiveSymbolSize(),destSize);4260if (valueReg->getBytesToClear(sourceSize, destSize) == 0)4261{4262needsClear=false;4263mvcSize=destSize;4264if (cg->traceBCDCodeGen())4265traceMsg(comp,"\t\tvalueReg bytes sourceSize->destSize (%d->%d) are already clear so set mvcSize=destSize=%d\n",sourceSize,destSize,mvcSize);4266}4267}42684269if (cg->traceBCDCodeGen())4270traceMsg(comp,"\tsourceNode %s (%p) is non-NULL so gen MVC/memcpy with size %d to store (isByteTruncation=%s)\n",4271sourceNode->getOpCode().getName(),sourceNode,mvcSize,isByteTruncation?"yes":"no");42724273if (isBCD)4274{4275targetMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);4276if (sourceMR == NULL)4277sourceMR = generateS390RightAlignedMemoryReference(sourceNode, valueStorageReference, cg);4278}4279else4280{4281targetMR = generateS390MemRefFromStorageRef(node, targetStorageReference, cg);4282if (sourceMR == NULL)4283sourceMR = generateS390MemRefFromStorageRef(sourceNode, valueStorageReference, cg);4284}42854286// if getRightAlignedIgnoredBytes > - then the rightmost bytes will not be coincident so the addressesMatch check is not sufficient4287// to detect if the copyIsRedundant4288//4289// Similarly if the node and storageRefNode sizes do not match (!allSizesMatch) then different offset bumps will be applied even if their starting addresses4290// are coincident (i.e. loadOrStoreAddressesMatch would return true)4291bool copyIsRedundant = valueReg->getRightAlignedIgnoredBytes() == 0 &&4292allSizesMatch &&4293valueStorageReference->isNonConstantNodeBased() &&4294cg->loadOrStoreAddressesMatch(node, valueStorageReference->getNode());4295if (cg->traceBCDCodeGen() && copyIsRedundant)4296traceMsg(comp,"\t\tcopyIsRedundant=yes so skip memcpy\n");4297if (!copyIsRedundant)4298cg->genMemCpy(targetMR, node, sourceMR, sourceNode, mvcSize);42994300if (needsClear)4301{4302cg->widenBCDValue(node, NULL, valueReg->getSize(), node->getSize(), targetMR);4303evaluatedPaddingAnchor = true;4304}4305}4306else if (isByteWidening)4307{4308if (cg->traceBCDCodeGen())4309traceMsg(comp,"\tuseZAP=false and sourceNode is NULL so just check if upper bytes need to be cleared\n");4310targetMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);4311cg->widenBCDValueIfNeeded(node, bcdValueReg, sourceSize, node->getSize(), targetMR);4312evaluatedPaddingAnchor = true;4313}4314}43154316if (valueChild->getReferenceCount() > 1)4317{4318if (changeCommonedChildAddress)4319{4320int32_t savedLeftAlignedZeroDigits = valueReg->getLeftAlignedZeroDigits();4321if (cg->traceBCDCodeGen())4322traceMsg(comp,"\tchangeCommonedChildAddress=true so update storage reference on valueReg %s (leftAlignedZeroDigits=%d) and reset isInit to false\n",4323cg->getDebug()->getName(valueReg),savedLeftAlignedZeroDigits);43244325valueReg->setStorageReference(targetStorageReference, valueChild); // also resets leftAlignedZeroDigits43264327// Reset isInit to false for correctness so the commoned reference does not clobber a user variable location4328// This reset is also done during addStorageReferenceHints but there is no guarantee this pass will be done for every4329// IL pattern4330if (!targetStorageReference->isTemporaryBased())4331valueReg->setIsInitialized(false);43324333if (isByteWidening)4334{4335bcdValueReg->addRangeOfZeroBytes(sourceSize, destSize);4336}4337else if (savedLeftAlignedZeroDigits > 0)4338{4339// TODO: is the size check below needed? -- isByteWidening is checked in the if above and isByteTruncation would never happen for an accum case4340if (childContainsAccumulatedResult &&4341valueReg->getSize() == node->getSize())4342{4343if (cg->traceBCDCodeGen())4344traceMsg(comp,"\tset leftAlignedZeroDigits to %d on %s after setStorageReference\n",savedLeftAlignedZeroDigits,cg->getDebug()->getName(valueReg));4345valueReg->setLeftAlignedZeroDigits(savedLeftAlignedZeroDigits);4346}4347else4348{4349// could also probably transfer savedLeftAlignedZeroDigits in some non-accum cases too but need to see a motivating case first4350if (cg->traceBCDCodeGen())4351traceMsg(comp,"z^z : missed transferring zeroDigits %d to valueChild %s (%p) (accum=%s, valueRegSize %d, nodeSize %d\n",4352savedLeftAlignedZeroDigits,valueChild->getOpCode().getName(),valueChild,childContainsAccumulatedResult?"yes":"no",valueReg->getSize(),node->getSize());4353}4354}43554356if (useZAP)4357{4358bcdValueReg->setHasKnownValidSignAndData();4359bcdValueReg->setHasKnownCleanSign();4360TR_ASSERT(!bcdValueReg->hasKnownOrAssumedSignCode() || bcdValueReg->getKnownOrAssumedSignCode() != 0xf,"inconsistent sign code of 0xf found for node %p\n",valueChild);4361if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tsetting HasKnownCleanSign (due to ZAP) on valueReg %s on valueChild %p\n",cg->getDebug()->getName(bcdValueReg),valueChild);4362}4363}4364else if (mustPrivatizeValueChild ||4365(!valueStorageReference->isTemporaryBased() && // comment1 below4366childContainsAccumulatedResult && // comment2 below4367(!node->skipCopyOnStore() || isLeadingSignByteWidening))) // comments 2 and 3 below4368{4369// comment1 (explains the first case where a temp copy is *not* needed)4370// do not generate another temp copy if storing a temp that is already attached to a commoned load or pass thru node4371// pdstore4372// =>ipdload (in temp1), skipSSCopy=false <- temp1 will have the correct ref count for all its commoned uses4373//4374// comment2 (explains the second case where a temp copy is *not* needed)4375// pdstore4376// =>pdshr4377// here the pdshr storageReference is store based as the result of the initial (an earlier) store of the same pdshr node being marked with skipCopyOnStore.4378// In this case all commoned references of pdshr can use the store based storageReference as this flag guarantees the store symbol is4379// not killed before the last reference to the pdshr is seen.4380// comment34381// skipCopyOnStore does not consider kills of the value that happen during the store itself. When storing a value4382// with a leading sign, if we have to widen that value, we move the sign code. This causes later uses of the value4383// child to see the wrong result unless we make a copy, so we ignore skipCopyOnStore if isLeadingSignByteWidening.43844385TR_StorageReference *valueStorageReferenceCopy = TR_StorageReference::createTemporaryBasedStorageReference(sourceSize, comp);4386// when tempStorageReference != NULL then the valueReg->setStorageReference call below will not work as the temp ref count will underflow4387// valueReg in this case is actually pointing to the tempRegister created when copyMR was initialized4388// shouldn't reach here in this case as tempStorageReference is only used for the uninit and non-hint cases and this is hint path4389TR_ASSERT(tempStorageReference == NULL,"tempStorageReference == NULL should be null for node %p\n",node);4390valueReg->setIsInitialized();43914392// do not clean sign for the BCD copy as the commoned use may not be a final use (so the sign cleaning may be premature)4393if (cg->traceBCDCodeGen())4394traceMsg(comp,"\tlate pdstore privatization of valueChild : so gen MVC/memcpy with sourceSize %d to copy #%d (%s) on %s for child %s (%p) with refCnt %d (mustPrivatizeValueChild %s)\n",4395sourceSize,valueStorageReferenceCopy->getReferenceNumber(),cg->getDebug()->getName(valueStorageReferenceCopy->getSymbol()),4396node->getOpCode().getName(),valueChild->getOpCode().getName(),valueChild,valueChild->getReferenceCount(),4397mustPrivatizeValueChild?"yes":"no");43984399bool useSourceMR = sourceMR && !overlapZAPIsAllowed;44004401TR::Node *copySourceNode = useSourceMR ? valueChild : node;4402TR::MemoryReference *copySourceMR = useSourceMR ? sourceMR : targetMR;4403TR_StorageReference *copySourceStorageRef = useSourceMR ? valueStorageReference : targetStorageReference;44044405TR::MemoryReference *copyTargetMR = NULL;4406if (isBCD)4407{4408copySourceMR = reuseS390RightAlignedMemoryReference(copySourceMR, copySourceNode, copySourceStorageRef, cg);4409valueReg->setStorageReference(valueStorageReferenceCopy, valueChild);4410copyTargetMR = generateS390RightAlignedMemoryReference(valueChild, valueStorageReferenceCopy, cg);4411}4412else4413{4414copySourceMR = reuseS390MemRefFromStorageRef(copySourceMR, 0, copySourceNode, copySourceStorageRef, cg);4415valueReg->setStorageReference(valueStorageReferenceCopy, valueChild);4416copyTargetMR = generateS390MemRefFromStorageRef(valueChild, valueStorageReferenceCopy, cg);4417}44184419cg->genMemCpy(copyTargetMR, node, copySourceMR, copySourceNode, sourceSize);44204421if (useSourceMR)4422sourceMR = copySourceMR;4423else4424targetMR = copySourceMR;44254426// If we are accumulating a leading sign type, then the above copy will include the4427// byte widening that we did before storing. The long-term fix is to rewrite this evaluator4428// to make the copy before we do any modification of the stored value.4429// The short term fix is to copy the widened sign back into this copy.4430if (childContainsAccumulatedResult && isLeadingSignByteWidening)4431{4432uint16_t signSize = 0;4433TR::InstOpCode::Mnemonic signCopyOp = TR::InstOpCode::bad;44344435switch (node->getType().getDataType())4436{4437case TR::ZonedDecimalSignLeadingEmbedded:4438signSize = 1;4439signCopyOp = TR::InstOpCode::MVZ;4440break;4441case TR::ZonedDecimalSignLeadingSeparate:4442signSize = 1;4443signCopyOp = TR::InstOpCode::MVC;4444break;4445case TR::UnicodeDecimalSignLeading:4446signSize = 2;4447signCopyOp = TR::InstOpCode::MVC;4448break;4449default:4450TR_ASSERT(0, "unknown leading sign type in pdStoreEvaluator");4451}44524453TR::MemoryReference *originalSignCodeMR =4454reuseS390LeftAlignedMemoryReference(targetMR, node, targetStorageReference, cg, node->getSize());44554456TR::MemoryReference *copyMR =4457reuseS390LeftAlignedMemoryReference(copyTargetMR, valueChild, valueStorageReferenceCopy, cg, sourceSize);44584459if (cg->traceBCDCodeGen())4460traceMsg(comp,"\tAccumulating a leading sign type: have to restore the sign code for the copy: signSize %d\n",4461signSize);446244634464generateSS1Instruction(cg, signCopyOp, node,4465signSize-1,4466copyMR,4467originalSignCodeMR);44684469}4470}4471}44724473rcount_t finalValueChildRefCount = valueChild->getReferenceCount();4474if (changeCommonedChildAddress &&4475finalValueChildRefCount != origValueChildRefCount)4476{4477// In this case the addressChild and the valueChild share a commoned node.4478// This will cause the addressChild evaluation (done as part of getting targetMR) to be an impliedMemoryReference and4479// the aiadd will be incremented by one (in anticipation of the valueChild using the targetStorageRef going forward)4480// In the trivial case where this future use is only under the current store ( == 1 check below) then have to take care to do the final4481// recDec of the addressChild to remove the extra increment done when forming the targetMR.4482//4483// izdstore4484// aiadd4485// ...4486// zdload4487// =>zdload4488//4489TR_ASSERT(finalValueChildRefCount > 0 && finalValueChildRefCount < origValueChildRefCount,4490"finalValueChildRefCount %d must be > 0 and less than origValueChildRefCount %d on store %p\n",finalValueChildRefCount,origValueChildRefCount,node);4491// the only way the refCounts can be not equal is if we evaluated a targetMR4492TR_ASSERT(targetMR,"finalValueChildRefCount %d must be equal to origValueChildRefCount %d if targetMR is non-NULL on store %p\n",finalValueChildRefCount,origValueChildRefCount,node);4493if (isIndirect && finalValueChildRefCount == 1)4494{4495// only remaining use is as the valueChild of this very store so must do the final recDec of the addressChild4496// a recDec is safe here as the targetMR would have already privatized any loads in the address child to registers4497if (cg->traceBCDCodeGen())4498traceMsg(comp,"\tfinalValueChildRefCount < origValueChildRefCount (%d < %d) and is 1 so recursively dec addrChild %s (%p) %d->%d\n",4499finalValueChildRefCount,origValueChildRefCount,4500node->getFirstChild()->getOpCode().getName(),4501node->getFirstChild(),4502node->getFirstChild()->getReferenceCount(),node->getFirstChild()->getReferenceCount()-1);4503cg->recursivelyDecReferenceCount(node->getFirstChild());4504}4505if (cg->traceBCDCodeGen())4506traceMsg(comp,"\tfinalValueChildRefCount < origValueChildRefCount (%d < %d) decrement the targetStorageReference nodeRefCount by the difference %d->%d\n",4507finalValueChildRefCount,origValueChildRefCount,4508targetStorageReference->getNodeReferenceCount(),targetStorageReference->getNodeReferenceCount()-(origValueChildRefCount-finalValueChildRefCount));4509// the valueChild may be commoned more than once under the addressChild of the store so dec by the difference of the before and after refCounts4510targetStorageReference->decrementNodeReferenceCount(origValueChildRefCount-finalValueChildRefCount);4511}45124513if (targetMR == NULL)4514{4515if (isIndirect)4516{4517// if changeCommonedChildAddress=true then we must not decrement the addressChild as it will be needed for future commoned references4518// to the valueChild4519// a recDec is safe here as the only way no store can be done (targetMR==NULL case) is when valueChildren have already privatized4520// any loads in the address child to registers when accumulating to the final store location4521if (!changeCommonedChildAddress)4522{4523if (cg->traceBCDCodeGen())4524traceMsg(comp,"\tno explicit store inst and changeCommonedChildAddress=false so recursively dec addrChild %p %d->%d\n",4525node->getFirstChild(),node->getFirstChild()->getReferenceCount(),node->getFirstChild()->getReferenceCount()-1);4526cg->recursivelyDecReferenceCount(node->getFirstChild());4527}4528else4529{4530if (cg->traceBCDCodeGen())4531traceMsg(comp,"\tno explicit store inst and changeCommonedChildAddress=true so do NOT recursively dec addrChild %p (refCount stays at %d)\n",4532node->getFirstChild(),node->getFirstChild()->getReferenceCount());4533}4534}4535if (cg->traceBCDCodeGen())4536traceMsg(comp,"\tno explicit store inst so decrement the targetStorageReference nodeRefCount %d->%d\n",4537targetStorageReference->getNodeReferenceCount(),targetStorageReference->getNodeReferenceCount()-1);4538targetStorageReference->decrementNodeReferenceCount();4539}45404541if (!evaluatedPaddingAnchor)4542cg->processUnusedNodeDuringEvaluation(NULL);45434544cg->decReferenceCount(valueChild);4545return NULL;4546}45474548/**4549* This only handles pdstore and pdstorei.4550* Other types of stores (zd, ud) can't use vector instructions.4551*/4552TR::Register*4553J9::Z::TreeEvaluator::pdstoreVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)4554{4555traceMsg(cg->comp(), "DAA: Entering pdstoreVectorEvaluator %d\n", __LINE__);4556TR::Compilation *comp = cg->comp();4557TR::Node * valueChild = node->getValueChild();4558TR::Node* addressNode = node->getChild(0);4559// evaluate valueChild (which is assumed by the OMR layer to be the second child) to Vector register.4560// for this "pdStore" we assume if we evaluate value node we get Vector Register4561TR::Register* pdValueReg = cg->evaluate(valueChild);45624563TR_ASSERT((pdValueReg->getKind() == TR_FPR || pdValueReg->getKind() == TR_VRF),4564"vectorized pdstore is expecting its value in a vector register.");45654566if (cg->traceBCDCodeGen())4567{4568traceMsg(comp,"generating VSTRL for pdstore node->size = %d.\n", node->getSize());4569}45704571// No need to evaluate the address node of the pdstorei.4572// generateVSIInstruction() API will call separateIndexRegister() to separate the index4573// register by emitting an LA instruction. If there's a need for large displacement adjustment,4574// LAY will be emitted instead.4575TR::MemoryReference * targetMR = TR::MemoryReference::create(cg, node);;45764577// 0 we store 1 byte, 15 we store 16 bytes4578uint8_t lengthToStore = TR_VECTOR_REGISTER_SIZE - 1;4579if (node->getDecimalPrecision() > TR_MAX_INPUT_PACKED_DECIMAL_PRECISION )4580{4581targetMR->addToOffset(node->getSize() - TR_VECTOR_REGISTER_SIZE);4582}4583else4584{4585lengthToStore = node->getSize() - 1;4586}45874588generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, pdValueReg, targetMR, lengthToStore);4589cg->decReferenceCount(valueChild);4590cg->decReferenceCount(addressNode);45914592traceMsg(comp, "DAA: Exiting pdstoreVectorEvaluator %d\n", __LINE__);4593return NULL;4594}45954596TR_PseudoRegister * J9::Z::TreeEvaluator::evaluateBCDSignModifyingOperand(TR::Node *node,4597bool isEffectiveNop,4598bool isNondestructiveNop,4599bool initTarget,4600TR::MemoryReference *sourceMR,4601TR::CodeGenerator *cg)4602{4603TR_ASSERT(node->getType().isBCD(),"node %p type %s must be BCD\n",node,node->getDataType().toString());4604TR_OpaquePseudoRegister *reg = evaluateSignModifyingOperand(node, isEffectiveNop, isNondestructiveNop, initTarget, sourceMR, cg);4605TR_PseudoRegister *pseudoReg = reg->getPseudoRegister();4606TR_ASSERT(pseudoReg,"pseudoReg should be non-NULL for node %p\n",node);4607return pseudoReg;4608}460946104611TR_OpaquePseudoRegister * J9::Z::TreeEvaluator::evaluateSignModifyingOperand(TR::Node *node,4612bool isEffectiveNop,4613bool isNondestructiveNop,4614bool initTarget,4615TR::MemoryReference *sourceMR,4616TR::CodeGenerator *cg)4617{4618bool isBCD = node->getType().isBCD();4619TR::Node *child = node->getFirstChild();4620TR_OpaquePseudoRegister *firstReg = cg->evaluateOPRNode(child);4621TR::Compilation *comp = cg->comp();46224623if (isBCD)4624TR_ASSERT(firstReg->getPseudoRegister(),"firstReg->getPseudoRegister() is null in evaluateSignModifyingOperand for BCD node %p\n",child);46254626if (cg->traceBCDCodeGen())4627{4628if (isBCD)4629traceMsg(comp,"\tevaluateSignModOperand %s (%p) : firstReg %s firstReg->getPseudoRegister()->prec %d (isInit %s, isLegalToCleanSign %s, isEffectiveNop %s, initTarget %s)\n",4630node->getOpCode().getName(),node,cg->getDebug()->getName(firstReg),firstReg->getPseudoRegister()->getDecimalPrecision(),4631firstReg->isInitialized() ? "yes":"no",firstReg->getPseudoRegister()->isLegalToCleanSign()? "yes":"no",isEffectiveNop ? "yes":"no",initTarget ? "yes":"no");4632else4633traceMsg(comp,"\tevaluateSignModOperand for aggr type %s (%p) : firstReg %s (isInit %s, isEffectiveNop %s, initTarget %s)\n",4634node->getOpCode().getName(),node,cg->getDebug()->getName(firstReg),4635firstReg->isInitialized() ? "yes":"no",isEffectiveNop ? "yes":"no",initTarget ? "yes":"no");4636}46374638TR_OpaquePseudoRegister *targetReg = NULL;46394640// Note that a clobber evaluate must be done for any initialized firstReg -- even in the effectiveNop case:4641// 2 pdclean <- (isEffectiveNop=true) (temp1)4642// 1 pdremSelect <- node (isEffectiveNop=true) (temp1)4643// 2 pddivrem <- child (temp1)4644// ...4645// pdshr (clobbers temp1)4646// =>pddivrem (temp1)4647// ...4648// =>pdclean (uses invalid clobbered temp1 - wrong)4649// if a clobber evaluate is *not* done and temp1 is used for the pdremSelect and the pdclean then the parent of the second reference to the pddivrem node4650// will clobber temp1 and subsequent references to pdclean (and pdremSelect if any) will use the incorrectly clobbered temp1.4651// The clobber evaluate will copy the pddivrem result in temp1 to temp2 and the commoned pdclean will use the (now unclobbered) temp14652// TODO: an alternative fix would be to *not* clobber evaluate for the isEffectiveNop=true case but to instead allocate and mark a new register as read-only4653// for the commoned pddivrem but clobberable for the pdremSelect and pdclean (basically do a clobber evaluate but don't generate an MVC to copy the value).4654// Doing the MVC copy lazily by any later consumer (the pdshr) would likely be better in some cases.4655// UPDATE: the above TODO is complete as part of ReadOnlyTemporary sets done below4656bool resetReadOnly = true;4657if (isEffectiveNop)4658{4659resetReadOnly = false;4660targetReg = isBCD? cg->allocatePseudoRegister(firstReg->getPseudoRegister()) : cg->allocateOpaquePseudoRegister(firstReg);46614662if (isBCD && (node->getDecimalPrecision() < firstReg->getPseudoRegister()->getDecimalPrecision()) &&4663(!firstReg->getPseudoRegister()->hasKnownOrAssumedSignCode() || (firstReg->getPseudoRegister()->getKnownOrAssumedSignCode() != TR::DataType::getPreferredPlusCode())))4664{4665// on a truncation of a value with an unknown or negative sign code then conservatively set clean to false as negative zero (unclean) may be produced4666targetReg->getPseudoRegister()->resetCleanSign();4667}4668TR_StorageReference *firstStorageReference = firstReg->getStorageReference();4669// transfer the zeroDigits/deadBytes and cache the firstReg->getStorageReference() *before* calling ssrClobberEvaluate in case4670// a new storage reference set on firstReg causes these values to be reset4671targetReg->setLeftAlignedZeroDigits(firstReg->getLeftAlignedZeroDigits());4672targetReg->setRightAlignedDeadBytes(firstReg->getRightAlignedDeadBytes());4673targetReg->setRightAlignedIgnoredBytes(firstReg->getRightAlignedIgnoredBytes());4674if (cg->traceBCDCodeGen())4675{4676traceMsg(comp,"\t * setting rightAlignedDeadBytes %d from firstReg %s to targetReg %s (signMod nop)\n",4677firstReg->getRightAlignedDeadBytes(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));4678traceMsg(comp,"\t * setting rightAlignedIgnoredBytes %d from firstReg %s to targetReg %s (signMod nop)\n",4679firstReg->getRightAlignedIgnoredBytes(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));4680if (isBCD)4681traceMsg(comp,"\t * setting savedLeftAlignedZeroDigits %d from firstReg %s to targetReg %s (signMod nop)\n",4682firstReg->getLeftAlignedZeroDigits(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));4683}46844685if (firstReg->isInitialized())4686{4687// The extra work to allow this for non-temp based is to expand the skipCopyOnStore check to all nodes (i.e. do not restrict this flag to those directly under a store node).4688// This skipCopyOnStore analysis will then guarantee that the underlying non-temp variable is not killed before its next use(s).4689if (!comp->getOption(TR_DisableRefinedBCDClobberEval) && firstStorageReference->isTemporaryBased() && isNondestructiveNop)4690{4691if (cg->traceBCDCodeGen())4692traceMsg(comp,"%sskipping ssrClobberEvaluate for %s (%p) with child %s (%p) refCount %d %s 1 owningRegisterCount %d %s 1-- %s mark #%d (%s) as readOnlyTemp (nondestructive nop case)\n",4693child->getReferenceCount() > 1 ? "y^y : ":"",4694node->getOpCode().getName(),node,child->getOpCode().getName(),child,4695child->getReferenceCount(),child->getReferenceCount() > 1 ? ">":"<=",4696firstStorageReference->getOwningRegisterCount(), firstStorageReference->getOwningRegisterCount() > 1 ? ">" : "<=",4697child->getReferenceCount() > 1 ? "do":"do not",firstStorageReference->getReferenceNumber(),4698cg->getDebug()->getName(firstStorageReference->getSymbol()));46994700if (child->getReferenceCount() > 1 || firstStorageReference->getOwningRegisterCount() > 1)4701{4702firstStorageReference->setIsReadOnlyTemporary(true, child);4703}4704resetReadOnly = false;4705}4706else4707{4708cg->ssrClobberEvaluate(child, sourceMR);4709}4710}47114712// transfer the storageRef *after* calling ssrClobberEvaluate so the referenceCounts of the temporaries are set correctly4713TR_StorageReference *targetStorageReference = firstStorageReference;4714targetReg->setStorageReference(targetStorageReference, node);4715if (!firstReg->isInitialized() && targetStorageReference->isNodeBased())4716{4717// NodeReferenceCounts are not used for node based hints and this path should never be reached for these hints4718// as this type of storage reference is only used when it has been initialized4719TR_ASSERT( !targetStorageReference->isNodeBasedHint(),"a node based hint should have been initialized\n");4720// This is the case where the firstChild is likely an ipdload (or a pdclean of ipdload etc)4721if (cg->traceBCDCodeGen())4722traceMsg(comp,"\tisEffectiveNop=yes and firstReg->isInit=false case so increment the targetStorageReference nodeRefCount by (node->refCount() - 1) = %d : %d->%d\n",4723node->getReferenceCount()-1,4724targetStorageReference->getNodeReferenceCount(),4725targetStorageReference->getNodeReferenceCount()+(node->getReferenceCount()-1));4726targetStorageReference->incrementNodeReferenceCount(node->getReferenceCount()-1);4727cg->privatizeStorageReference(node, targetReg, NULL);4728}4729}4730else if (firstReg->isInitialized())4731{4732TR_ASSERT( isBCD, "this path should only be taken for BCD nodes (unless we extend support for aggr types)\n");4733TR_StorageReference *firstStorageReference = firstReg->getStorageReference();4734// An initialized reg cannot have a non-hint node based storage reference as these would come from an ipdload node and pdload's never initialize a register4735TR_ASSERT( firstStorageReference->isTemporaryBased() || firstStorageReference->isNodeBasedHint(),"expecting the initalized firstReg to be either a temp or a node based hint\n");4736targetReg = cg->allocatePseudoRegister(node->getDataType());4737// transfer the zeroDigits/deadBytes and cache the firstReg->getStorageReference() *before* calling ssrClobberEvaluate in case4738// a new storage reference set on firstReg causes these values to be reset4739targetReg->setLeftAlignedZeroDigits(firstReg->getLeftAlignedZeroDigits());4740targetReg->setRightAlignedDeadBytes(firstReg->getRightAlignedDeadBytes());4741targetReg->setRightAlignedIgnoredBytes(firstReg->getRightAlignedIgnoredBytes());4742targetReg->getPseudoRegister()->transferDataState(firstReg->getPseudoRegister());4743if (cg->traceBCDCodeGen())4744{4745traceMsg(comp,"\t * setting rightAlignedDeadBytes %d from firstReg %s to targetReg %s (signMod isInit)\n",4746firstReg->getRightAlignedDeadBytes(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));4747traceMsg(comp,"\t * setting rightAlignedIgnoredBytes %d from firstReg %s to targetReg %s (signMod isInit)\n",4748firstReg->getRightAlignedIgnoredBytes(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));4749traceMsg(comp,"\t * setting savedLeftAlignedZeroDigits %d from firstReg %s to targetReg %s (signMod isInit)\n",4750firstReg->getLeftAlignedZeroDigits(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));47514752}47534754if (!comp->getOption(TR_DisableRefinedBCDClobberEval) && firstReg->canBeConservativelyClobberedBy(node))4755{4756// pdclean4757// 3 pdadd4758//4759// AP t1,t24760// ZAP t1,t1 // this ZAP is a conservative clobber as it will not modify the value in pdadd and there are no special sign codes to be preserved4761//4762// the t1 storageReference will be marked as readOnly and pdadd added to nodeToUpdateOnClobber list so if/when t1 is actually clobbered the commoned4763// register/node can have its storageRef updated to point to the saved value.4764//4765if (cg->traceBCDCodeGen())4766traceMsg(comp,"%sskipping ssrClobberEvaluate for %s (%p) with child %s (%p) refCount %d %s 1 owningRegisterCount %d %s 1-- %s mark #%d (%s) as readOnlyTemp (isInit case)\n",4767child->getReferenceCount() > 1 ? "y^y : ":"",4768node->getOpCode().getName(),node,child->getOpCode().getName(),child,4769child->getReferenceCount(),child->getReferenceCount() > 1 ? ">":"<=",4770firstStorageReference->getOwningRegisterCount(), firstStorageReference->getOwningRegisterCount() > 1 ? ">" : "<=",4771child->getReferenceCount() > 1 ? "do":"do not",firstStorageReference->getReferenceNumber(),4772cg->getDebug()->getName(firstStorageReference->getSymbol()));47734774if (child->getReferenceCount() > 1 || firstStorageReference->getOwningRegisterCount() > 1)4775{4776firstStorageReference->setIsReadOnlyTemporary(true, child);4777}4778resetReadOnly = false;4779}4780else4781{4782cg->ssrClobberEvaluate(child, sourceMR);4783}47844785// transfer the storageRef *after* calling ssrClobberEvaluate so the referenceCounts of the temporaries are set correctly4786targetReg->setStorageReference(firstStorageReference, node);4787targetReg->setIsInitialized();4788}4789else4790{4791TR_ASSERT( isBCD, "this path should only be taken for BCD nodes (unless we extend support for aggr types)\n");4792targetReg = cg->allocatePseudoRegister(node->getDataType());4793TR_StorageReference *targetStorageReference = NULL;4794if (node->getOpCode().canHaveStorageReferenceHint() && node->getStorageReferenceHint())4795targetStorageReference = node->getStorageReferenceHint();4796else4797targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(node->getStorageReferenceSize(), comp);4798targetReg->setStorageReference(targetStorageReference, node);4799if (initTarget)4800{4801int32_t srcLiveSymbolSize = firstReg->getLiveSymbolSize();4802int32_t targetLiveSymbolSize = targetReg->getLiveSymbolSize();4803int32_t mvcSize = node->getSize();4804bool isTruncation = node->getSize() < firstReg->getSize();4805// if there are some left aligned zero digits in the source then increase the mvcSize to capture these in the initializing MVC4806if (firstReg->trackZeroDigits() &&4807(targetLiveSymbolSize == srcLiveSymbolSize) &&4808(srcLiveSymbolSize > mvcSize) &&4809(firstReg->getBytesToClear(mvcSize, srcLiveSymbolSize) == 0))4810{4811// increasing the mvcSize to include already zero'd bytes is illegal if targetLiveSymbolSize < srcLiveSymbolSize and4812// legal if targetLiveSymbolSize>=srcLiveSymbolSize but pointless if targetLiveSymbolSize > srcLiveSymbolSize as the extra4813// zero bytes will not be tracked on the targetReg so only do this when targetLiveSymbolSize == srcLiveSymbolSize4814//4815// In this case the source register has some zero bytes above its register size so increase the MVC size to include these zero bytes4816// e.g. if targetReg->getSize()=6 but the childLiveSymbolSize=9 then increase the mvcSize by 3 to 94817if (cg->traceBCDCodeGen())4818traceMsg(comp,"\tupper %d bytes on srcReg %s are already clear so set mvcSize=%d\n", srcLiveSymbolSize-mvcSize,cg->getDebug()->getName(firstReg),srcLiveSymbolSize);4819targetReg->addRangeOfZeroBytes(mvcSize,srcLiveSymbolSize);4820mvcSize = srcLiveSymbolSize;4821}4822else if (!isTruncation) // on a widening only initialize up to the source size4823{4824if (cg->traceBCDCodeGen())4825traceMsg(comp,"\tfirstReg->getSize() <= node->getSize() (%d <= %d) so reduce mvcSize\n",firstReg->getSize(),node->getSize());4826mvcSize = firstReg->getSize();4827}48284829if (isTruncation && node->getType().isSeparateSign())4830{4831mvcSize -= node->getDataType().separateSignSize();4832if (cg->traceBCDCodeGen())4833traceMsg(comp,"\tnode %s is a truncating separateSign type so reduce mvcSize by sign size (%d->%d)\n",4834node->getOpCode().getName(),mvcSize+node->getDataType().separateSignSize(),mvcSize);4835}48364837if (cg->traceBCDCodeGen())4838traceMsg(comp,"\tfirstReg->isInitialized()==false so gen MVC to init with mvcSize %d\n", mvcSize);4839TR_ASSERT( sourceMR,"source memory reference should have been created by caller\n");4840generateSS1Instruction(cg, TR::InstOpCode::MVC, node,4841mvcSize-1,4842generateS390RightAlignedMemoryReference(node, targetStorageReference, cg),4843generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));4844targetReg->getPseudoRegister()->transferDataState(firstReg->getPseudoRegister());4845targetReg->setIsInitialized();4846}4847}48484849if (isEffectiveNop || firstReg->isInitialized())4850cg->freeUnusedTemporaryBasedHint(node);48514852if (firstReg->getSize() < node->getSize())4853{4854TR_ASSERT( isBCD, "this path should only be taken for BCD nodes (unless we extend support for aggr types)\n");4855if (cg->traceBCDCodeGen())4856traceMsg(comp,"\twidening: firstRegSize < nodeSize (%d < %d) so set targetReg->getPseudoRegister()->prec to firstReg->prec (%d)\n",firstReg->getSize(), node->getSize(),firstReg->getPseudoRegister()->getDecimalPrecision());4857targetReg->getPseudoRegister()->setDecimalPrecision(firstReg->getPseudoRegister()->getDecimalPrecision());4858}48594860if (cg->traceBCDCodeGen() && targetReg->getStorageReference()->isReadOnlyTemporary())4861traceMsg(comp,"%sreset readOnlyTemp flag on storageRef #%d (%s) (signMod case)\n",4862resetReadOnly?"":"do not ",targetReg->getStorageReference()->getReferenceNumber(),cg->getDebug()->getName(targetReg->getStorageReference()->getSymbol()));48634864if (resetReadOnly)4865targetReg->getStorageReference()->setIsReadOnlyTemporary(false, NULL);48664867node->setRegister(targetReg);4868return targetReg;4869}48704871TR::Register *J9::Z::TreeEvaluator::pdSetSignHelper(TR::Node *node, int32_t sign, TR::CodeGenerator *cg)4872{4873TR::Node *srcNode = node->getFirstChild();4874TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);4875TR_PseudoRegister *targetReg = NULL;48764877if (node->getType().isAnyPacked())4878{4879targetReg = simpleWideningOrTruncation(node, srcReg, true, sign, cg); // setSign=true4880}4881else if (node->getDataType() == TR::ZonedDecimal)4882{4883bool isEffectiveNop = (sign == TR::DataType::getIgnoredSignCode()) || srcReg->knownOrAssumedSignCodeIs(sign);4884TR::MemoryReference *sourceMR = NULL;4885if (!srcReg->isInitialized() && !isEffectiveNop)4886sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);4887targetReg = evaluateBCDSignModifyingOperand(node, isEffectiveNop, isEffectiveNop, true, sourceMR, cg); // initTarget=true4888bool isTruncation = srcReg->getDecimalPrecision() > node->getDecimalPrecision();4889if (isTruncation)4890targetReg->setDecimalPrecision(node->getDecimalPrecision());4891else4892targetReg->setDecimalPrecision(srcReg->getDecimalPrecision());4893if (!isEffectiveNop)4894{4895TR_StorageReference *targetStorageReference = targetReg->getStorageReference();4896TR_StorageReference *firstStorageReference = srcReg->getStorageReference();4897TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);4898int32_t destLength = targetReg->getSize();4899cg->genSignCodeSetting(node, targetReg, destLength, destMR, sign, srcReg, 0, false); // digitsToClear=0, numericNibbleIsZero=false4900}4901}4902else4903{4904TR_ASSERT(false,"unexpected datatype %s in pdSetSignHelper\n",node->getDataType().toString());4905}49064907node->setRegister(targetReg);4908cg->decReferenceCount(srcNode);4909return targetReg;4910}49114912/**4913* \brief Evaluator function to evaluate pdSetSign opCode4914*/4915TR::Register*4916J9::Z::TreeEvaluator::pdSetSignEvaluator(TR::Node *node, TR::CodeGenerator *cg)4917{4918cg->traceBCDEntry("pdSetSign",node);4919cg->generateDebugCounter("PD-Op/pdsetsign", 1, TR::DebugCounter::Cheap);49204921TR::Register *targetReg = NULL;4922TR::Node *signNode = node->getSecondChild();49234924TR_ASSERT(signNode->getOpCode().isLoadConst() && signNode->getOpCode().getSize() <= 4,4925"expecting a <= 4 size integral constant set sign amount\n");4926TR_ASSERT(node->getFirstChild()->getType().isAnyPacked(), "expecting setSign's first child of PD data type");49274928int32_t sign = (int32_t)signNode->get64bitIntegralValue();4929cg->decReferenceCount(signNode);49304931static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");4932if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)4933{4934targetReg = vectorPerformSignOperationHelper(node, cg, false, 0, node->hasKnownOrAssumedCleanSign(), SignOperationType::setSign, false, true, sign);4935}4936else4937{4938targetReg = pdSetSignHelper(node, sign, cg);4939}49404941cg->traceBCDExit("pdSetSign",node);4942return targetReg;4943}49444945/**4946* TR::pdclear4947* TR::pdclearSetSign4948* current limitation for this is that leftMostDigit must equal digitsToClear (i.e. clearing right most digits)4949*/4950TR::Register *4951J9::Z::TreeEvaluator::pdclearEvaluator(TR::Node *node, TR::CodeGenerator *cg)4952{4953cg->traceBCDEntry("pdclear",node);4954cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),49551, TR::DebugCounter::Cheap);4956TR_ASSERT(!node->getOpCode().isSetSign(),"isSetSign on child not supported for node %s (%p)\n",node->getOpCode().getName(),node);4957bool isSetSign = node->getOpCode().isSetSignOnNode();4958TR_RawBCDSignCode setSignValue = isSetSign ? node->getSetSign() : raw_bcd_sign_unknown;4959int32_t sign = TR::DataType::getValue(setSignValue);4960TR::Compilation *comp = cg->comp();49614962TR_ASSERT(!isSetSign || setSignValue != raw_bcd_sign_unknown,"setSignValue must be on the node for %p\n",node);49634964TR::Node *srcNode = node->getChild(0);4965TR::Node *leftMostDigitNode = node->getChild(1);4966TR::Node *digitsToClearNode = node->getChild(2);4967TR::Node *literalAddrNode = (isSetSign && node->getNumChildren() > 3) ? node->getChild(3) : NULL;49684969TR_ASSERT(leftMostDigitNode->getOpCode().isLoadConst() && leftMostDigitNode->getSize() <= 4,4970"leftMostDigitNode %p must be a <= 4 size const\n",leftMostDigitNode);4971TR_ASSERT(digitsToClearNode->getOpCode().isLoadConst() && digitsToClearNode->getSize() <= 4,4972"digitsToClearNode %p must be a <= 4 size const\n",digitsToClearNode);49734974int32_t leftMostDigit = leftMostDigitNode->get32bitIntegralValue();4975int32_t leftMostByte = TR::DataType::packedDecimalPrecisionToByteLength(leftMostDigit);4976int32_t digitsToClear = digitsToClearNode->get32bitIntegralValue();4977int32_t rightMostDigit = leftMostDigit - digitsToClear;49784979TR_ASSERT(leftMostDigit == digitsToClear,"leftMostDigit %d must equal digitsToClear for node %p\n",leftMostDigit,digitsToClear,node);49804981TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);4982bool isInitialized = srcReg->isInitialized();4983if (cg->traceBCDCodeGen())4984traceMsg(comp,"\t%s (%p) : srcNode %s (%p) isInit=%s, digitClearRange %d->%d (leftMostByte=%d), digitsToClear = %d (isSetSign %s, sign 0x%x)\n",4985node->getOpCode().getName(),node,4986srcNode->getOpCode().getName(),srcNode,4987isInitialized ? "yes":"no",4988leftMostDigit,rightMostDigit,leftMostByte,digitsToClear,isSetSign?"yes":"no",sign);4989TR_StorageReference *srcStorageReference = srcReg->getStorageReference();4990TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcStorageReference, cg);49914992TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, true, sourceMR, cg); // initTarget=true4993TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);49944995bool isTruncation = srcReg->getDecimalPrecision() > node->getDecimalPrecision();4996if (isTruncation)4997targetReg->setDecimalPrecision(node->getDecimalPrecision());4998else4999targetReg->setDecimalPrecision(srcReg->getDecimalPrecision());50005001int32_t targetRegPrec = targetReg->getDecimalPrecision();50025003if (cg->traceBCDCodeGen())5004traceMsg(comp,"\tset targetReg prec to %d (isTrucation %s)\n",targetRegPrec,isTruncation?"yes":"no");50055006bool truncatedIntoClearedDigits = false;5007if (targetRegPrec < leftMostDigit)5008{5009truncatedIntoClearedDigits = true;5010int32_t precDelta = leftMostDigit - targetRegPrec;5011leftMostDigit -= precDelta;5012leftMostByte = TR::DataType::packedDecimalPrecisionToByteLength(leftMostDigit);5013digitsToClear -= precDelta;5014rightMostDigit = leftMostDigit - digitsToClear;5015if (cg->traceBCDCodeGen())5016traceMsg(comp,"\ttargetRegPrec %d < leftMostDigit %d : update leftMostDigit %d->%d, leftMostByte = %d, digitsToClear %d->%d, rightMostDigit = %d\n",5017targetRegPrec,leftMostDigit+precDelta,leftMostDigit+precDelta,leftMostDigit,leftMostByte,digitsToClear+precDelta,digitsToClear,rightMostDigit);5018}50195020// do not bother checking !node->canSkipPadByteClearing() below because being able to clear the full byte generally results in better codegen5021// coincidentEvenDigitCorrection is true when leftMostNibble == targetRegPrec so instead of generating separate NI 0xF0 and then NI 0x0F on the same byte5022// just inc digitsToClear below so this full byte clearing can be done in one instruction5023// e.g. p4v0 = (p15v0 / 10000) * 100005024int32_t leftMostByteForClear = leftMostByte;5025bool needsEvenDigitCorrection = !truncatedIntoClearedDigits && isTruncation && targetReg->isEvenPrecision();5026bool coincidentEvenDigitCorrection = needsEvenDigitCorrection && (leftMostByteForClear == targetReg->getSize());5027if (isEven(leftMostDigit))5028{5029if (cg->traceBCDCodeGen())5030traceMsg(comp,"\tleftMostDigit %d isEven : isInit=%s, truncatedIntoClearedDigits=%s, coincidentEvenDigitCorrection=%s -- adjust the leftMostNibble to preserve or clear the leftMostByte\n",5031leftMostDigit,isInitialized?"yes":"no",truncatedIntoClearedDigits?"yes":"no",needsEvenDigitCorrection?"yes":"no");50325033if (isInitialized && !truncatedIntoClearedDigits && !coincidentEvenDigitCorrection) // full byte will be cleared if truncatedIntoClearedDigits or coincidentEvenDigitCorrection are true5034{5035if (cg->traceBCDCodeGen())5036traceMsg(comp,"\t\tisInit=yes,truncatedIntoClearedDigits=no,coincidentEvenDigitCorrection=no so dec %d->%d to preserve initialized leftMostNibble\n",digitsToClear,digitsToClear-1);5037digitsToClear--; // must preserve the top byte and then clear just the top digit after the clearAndSetSign5038leftMostByteForClear--;5039}5040else5041{5042if (cg->traceBCDCodeGen())5043traceMsg(comp,"\t\tisInit=no or truncatedIntoClearedDigits=yes or coincidentEvenDigitCorrection=yes so inc %d->%d to clear initialized leftMostNibble\n",digitsToClear,digitsToClear+1);5044digitsToClear++; // clear a larger even # of digits and put back5045}5046}50475048if (!isTruncation && srcReg->isEvenPrecision() && srcReg->isLeftMostNibbleClear())5049{5050if (cg->traceBCDCodeGen())5051traceMsg(comp,"\twidening with even srcRegPrec %d update targetReg with zero range for leftMostNibble %d->%d\n",5052srcReg->getDecimalPrecision(),srcReg->getDecimalPrecision(),srcReg->getDecimalPrecision()+1);5053targetReg->addRangeOfZeroDigits(srcReg->getDecimalPrecision(),srcReg->getDecimalPrecision()+1);5054}50555056// clearAndSetSign will be clearing full bytes so half byte values or signs will be put back afterwards5057clearAndSetSign(node, targetReg, leftMostByteForClear, digitsToClear, destMR, srcReg, sourceMR, isSetSign, sign, isInitialized, cg); // isSignInitialized=isInitialized50585059if (!(truncatedIntoClearedDigits || coincidentEvenDigitCorrection))5060{5061if (isEven(leftMostDigit))5062{5063if (isInitialized)5064{5065{5066if (cg->traceBCDCodeGen())5067traceMsg(comp,"\tisInit=yes : gen NI to clear right most nibble at byte %d\n",leftMostByte);5068generateSIInstruction(cg, TR::InstOpCode::NI, node,5069reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, leftMostByte),50700xF0);5071}5072}5073else5074{5075if (cg->traceBCDCodeGen())5076traceMsg(comp,"\tisInit=no : gen MVZ to restore left most nibble at byte %d\n",leftMostByte);5077int32_t mvzSize = 1;5078generateSS1Instruction(cg, TR::InstOpCode::MVZ, node,5079mvzSize-1,5080reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, leftMostByte),5081reuseS390LeftAlignedMemoryReference(sourceMR, srcNode, srcStorageReference, cg, leftMostByte));5082}5083}50845085if (needsEvenDigitCorrection && !node->canSkipPadByteClearing())5086cg->genZeroLeftMostPackedDigits(node, targetReg, targetReg->getSize(), 1, destMR);5087}50885089cg->decReferenceCount(srcNode);5090cg->decReferenceCount(leftMostDigitNode);5091cg->decReferenceCount(digitsToClearNode);5092cg->processUnusedNodeDuringEvaluation(literalAddrNode);5093cg->traceBCDExit("pdclear",node);5094return targetReg;5095}50965097TR::Register *5098J9::Z::TreeEvaluator::pdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg)5099{5100TR::Compilation *comp = cg->comp();5101TR::Register *chkResultReg = cg->allocateRegister(TR_GPR);5102generateRRInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::XGR : TR::InstOpCode::XR, node, chkResultReg, chkResultReg);51035104TR::Node * pdloadNode = node->getFirstChild();5105TR::Register* pdReg = NULL;51065107static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");5108if(comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&5109!comp->getOption(TR_DisableVectorBCD) ||5110isVectorBCDEnv)5111{5112pdReg = cg->evaluate(pdloadNode);5113generateVRRgInstruction(cg, TR::InstOpCode::VTP, node, pdReg);5114}5115else5116{5117pdReg = cg->evaluateBCDNode(pdloadNode);5118TR_StorageReference *pdStorageReference = static_cast<TR_PseudoRegister*>(pdReg)->getStorageReference();5119TR::MemoryReference *tempMR = generateS390RightAlignedMemoryReference(pdloadNode, pdStorageReference, cg);5120generateRSLInstruction(cg, TR::InstOpCode::TP, pdloadNode, static_cast<TR_PseudoRegister*>(pdReg)->getSize()-1, tempMR);5121}51225123generateRRInstruction(cg, TR::InstOpCode::IPM, node, chkResultReg, chkResultReg);51245125if(comp->target().is64Bit())5126{5127generateRRInstruction(cg, TR::InstOpCode::LLGTR, node, chkResultReg, chkResultReg);5128generateRSInstruction(cg, TR::InstOpCode::SRLG, node, chkResultReg, chkResultReg, 28);5129}5130else5131{5132generateRSInstruction(cg, TR::InstOpCode::SRL, node, chkResultReg, 28);5133}51345135node->setRegister(chkResultReg);5136cg->decReferenceCount(pdloadNode);5137return chkResultReg;5138}51395140/**5141* pd<op>Evaluator - various binary packed decimal evaluators5142*/5143void5144J9::Z::TreeEvaluator::correctPackedArithmeticPrecision(TR::Node *node, int32_t op1EncodingSize, TR_PseudoRegister *targetReg, int32_t computedResultPrecision, TR::CodeGenerator * cg)5145{5146int32_t computedResultSize = TR::DataType::packedDecimalPrecisionToByteLength(computedResultPrecision);5147if (op1EncodingSize >= computedResultSize)5148targetReg->removeRangeOfZeroDigits(0, computedResultPrecision);5149else5150targetReg->removeRangeOfZeroBytes(0, op1EncodingSize);51515152int32_t resultPrecision = std::min<int32_t>(computedResultPrecision, node->getDecimalPrecision());5153targetReg->setDecimalPrecision(resultPrecision);5154if (cg->traceBCDCodeGen())5155traceMsg(cg->comp(),"\tset targetRegPrec to min(computedResultPrecision, nodePrec) = min(%d, %d) = %d (targetRegSize = %d)\n",5156computedResultPrecision,node->getDecimalPrecision(),resultPrecision,targetReg->getSize());5157}51585159TR::Register *5160J9::Z::TreeEvaluator::pdaddEvaluator(TR::Node * node, TR::CodeGenerator * cg)5161{5162cg->traceBCDEntry("pdadd",node);5163cg->generateDebugCounter("PD-Op/pdadd", 1, TR::DebugCounter::Cheap);51645165TR::Register * reg = NULL;51665167static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");5168if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)5169{5170reg = pdArithmeticVectorEvaluatorHelper(node, TR::InstOpCode::VAP, cg);5171}5172else5173{5174reg = pdaddsubEvaluatorHelper(node, TR::InstOpCode::AP, cg);5175}51765177cg->traceBCDExit("pdadd",node);5178return reg;5179}51805181TR::Register *5182J9::Z::TreeEvaluator::pdsubEvaluator(TR::Node * node, TR::CodeGenerator * cg)5183{5184cg->traceBCDEntry("pdsub",node);5185cg->generateDebugCounter("PD-Op/pdsub", 1, TR::DebugCounter::Cheap);51865187TR::Register * reg = NULL;51885189static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");5190if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)5191{5192reg = pdArithmeticVectorEvaluatorHelper(node, TR::InstOpCode::VSP, cg);5193}5194else5195{5196reg = pdaddsubEvaluatorHelper(node, TR::InstOpCode::SP, cg);5197}51985199cg->traceBCDExit("pdsub",node);5200return reg;5201}52025203int32_t getAddSubComputedResultPrecision(TR::Node *node, TR::CodeGenerator * cg)5204{5205TR::Node *firstChild = node->getFirstChild();5206TR::Node *secondChild = node->getSecondChild();52075208TR_PseudoRegister *firstReg = firstChild->getPseudoRegister();5209if (firstReg == NULL)5210firstReg = cg->evaluateBCDNode(firstChild);52115212TR_PseudoRegister *secondReg = secondChild->getPseudoRegister();5213if (secondReg == NULL)5214secondReg = cg->evaluateBCDNode(secondChild);52155216int32_t precBump = (firstChild->isZero() || secondChild->isZero()) ? 0 : 1;5217int32_t computedResultPrecision = std::max(firstReg->getDecimalPrecision(), secondReg->getDecimalPrecision())+precBump;52185219return computedResultPrecision;5220}52215222/**5223* This evaluator helper function uses BCD vector instructions for PD arithmetic operations:5224*5225* -- pdadd5226* -- pdsub5227* -- pdmul5228* -- pddiv5229*5230* whose corresponding BCD vector instructions are of VRI-f format.5231*/5232TR::Register *5233J9::Z::TreeEvaluator::pdArithmeticVectorEvaluatorHelper(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)5234{5235int32_t immediateValue = node->getDecimalPrecision();5236TR_ASSERT_FATAL((immediateValue >> 8) == 0, "Decimal precision (%d) exceeds 1 byte", immediateValue);52375238if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())5239{5240immediateValue |= 0x80;5241}5242TR::Node* firstChild = node->getFirstChild();5243TR::Node* secondChild = node->getSecondChild();52445245TR::Register* firstChildReg = cg->evaluate(firstChild);5246TR::Register* secondChildReg = cg->evaluate(secondChild);52475248// For simple PD Decimal Operations, let's set the mask to 0: no force positive nor set CC5249TR::Register* targetReg = cg->allocateRegister(TR_VRF);5250generateVRIfInstruction(cg, op, node, targetReg, firstChildReg, secondChildReg, immediateValue, 0x1);5251node->setRegister(targetReg);52525253cg->decReferenceCount(firstChild);5254cg->decReferenceCount(secondChild);52555256return targetReg;5257}52585259/**5260* Handles pdadd,pdsub5261*/5262TR::Register *5263J9::Z::TreeEvaluator::pdaddsubEvaluatorHelper(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)5264{5265bool produceOverflowMessage = node->getOpCode().isPackedArithmeticOverflowMessage();5266bool isAdd = (op == TR::InstOpCode::AP);5267TR::Node *firstChild = node->getFirstChild();5268TR::Node *secondChild = node->getSecondChild();5269TR::Compilation *comp = cg->comp();52705271TR_PseudoRegister *firstReg = cg->evaluateBCDNode(firstChild);5272bool trackSignState=false;5273bool alwaysLegalToCleanSign=true; // ok to use ZAP (and clobber srcSign) to init as there is an AP/SP coming5274TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, true, NULL, cg, trackSignState, 0, alwaysLegalToCleanSign); // initTarget=true, sourceMR=NULL, srcSize=05275cg->decReferenceCount(firstChild); // dec bef evaluating the second child to avoid an unneeded clobber evaluate5276TR_PseudoRegister *secondReg = cg->evaluateBCDNode(secondChild);5277TR_StorageReference *targetStorageReference = targetReg->getStorageReference();5278TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);5279TR::MemoryReference *secondMR = generateS390RightAlignedMemoryReference(secondChild, secondReg->getStorageReference(), cg);528052815282int32_t op1EncodingPrecision = cg->getPDAddSubEncodedPrecision(node, firstReg);5283int32_t op1EncodingSize = cg->getPDAddSubEncodedSize(node, firstReg);5284// The preparatory clearing operations need a length set so base it on the op1EncodingSize but the final returned precision will be set after the AP/SP instruction has been generated5285targetReg->setDecimalPrecision(op1EncodingPrecision);52865287if (cg->traceBCDCodeGen())5288traceMsg(comp,"\t%s: produceOverflowMessage=%s, node->getSize()=%d, firstReg->getSize()=%d, secondReg->getSize()=%d, op1EncodingPrec=%d, op1EncodingSize=%d\n",5289node->getOpCode().getName(),produceOverflowMessage?"yes":"no", node->getSize(), firstReg->getSize(), secondReg->getSize(),op1EncodingPrecision, targetReg->getSize());52905291if (op1EncodingSize > firstReg->getSize())5292cg->clearByteRangeIfNeeded(node, targetReg, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), firstReg->getSize(), op1EncodingSize, true); // widenOnLeft=true52935294// endByte=firstReg->getSize but for types like packed where the sign is right aligned this endByte setting does not matter5295// as the leftMostByte for the sign is always known (== 1)5296cg->correctBadSign(firstChild, firstReg, firstReg->getSize(), destMR);5297cg->correctBadSign(secondChild, secondReg, secondReg->getSize(), secondMR);52985299int32_t computedResultPrecision = getAddSubComputedResultPrecision(node, cg);5300bool mayOverflow = computedResultPrecision > node->getDecimalPrecision();5301correctPackedArithmeticPrecision(node, op1EncodingSize, targetReg, computedResultPrecision, cg);53025303if (cg->traceBCDCodeGen())5304traceMsg(comp,"\tcomputedResultPrecision %s nodePrec (%d %s %d) -- mayOverflow = %s\n",5305mayOverflow?">":"<=",computedResultPrecision,mayOverflow?">":"<=",node->getDecimalPrecision(),mayOverflow?"yes":"no");53065307TR::LabelSymbol * cFlowRegionStart = NULL;5308TR::LabelSymbol * cflowRegionEnd = NULL;5309TR::RegisterDependencyConditions * deps = NULL;5310if (mayOverflow && produceOverflowMessage)5311{5312cFlowRegionStart = generateLabelSymbol(cg);5313cflowRegionEnd = generateLabelSymbol(cg);5314deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg);53155316if (destMR->getIndexRegister())5317deps->addPostConditionIfNotAlreadyInserted(destMR->getIndexRegister(), TR::RealRegister::AssignAny);5318if (destMR->getBaseRegister())5319deps->addPostConditionIfNotAlreadyInserted(destMR->getBaseRegister(), TR::RealRegister::AssignAny);5320if (secondMR->getIndexRegister())5321deps->addPostConditionIfNotAlreadyInserted(secondMR->getIndexRegister(), TR::RealRegister::AssignAny);5322if (secondMR->getBaseRegister())5323deps->addPostConditionIfNotAlreadyInserted(secondMR->getBaseRegister(), TR::RealRegister::AssignAny);53245325generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);5326cFlowRegionStart->setStartInternalControlFlow();5327}53285329generateSS2Instruction(cg, op, node,5330op1EncodingSize-1,5331generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),5332secondReg->getSize()-1,5333generateS390RightAlignedMemoryReference(*secondMR, node, 0, cg));53345335targetReg->setHasKnownValidSignAndData();53365337if (mayOverflow)5338{5339if (targetReg->isEvenPrecision() && !node->canSkipPadByteClearing())5340{5341cg->genZeroLeftMostPackedDigits(node, targetReg, targetReg->getSize(), 1, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg));5342}5343targetReg->setHasKnownPreferredSign();5344if (cg->traceBCDCodeGen())5345traceMsg(comp,"\toverflow may occur so set HasKnownPreferredSign = true on reg %s\n",cg->getDebug()->getName(targetReg));5346if (produceOverflowMessage)5347{5348// The only overflow message handled is overflow into the next byte (i.e. not 'even' to 'odd' precision 'overflow').5349// This is also an important restriction as no NI for the top nibble is done here and if it were to be done then this5350// would also overwrite the condition code in the isFoldedIf=true case5351TR_ASSERT(targetReg->isOddPrecision(),"expecting targetPrecision to be odd and not %d for addsubOverflowMessage\n",targetReg->getDecimalPrecision());53525353TR::LabelSymbol *oolEntryPoint = generateLabelSymbol(cg);5354TR::LabelSymbol *oolReturnPoint = generateLabelSymbol(cg);53555356generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BO, node, oolEntryPoint);53575358generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cflowRegionEnd, deps);5359cflowRegionEnd->setEndInternalControlFlow();5360}5361}5362else5363{5364targetReg->setHasKnownCleanSign();5365if (cg->traceBCDCodeGen())5366{5367if (firstChild->isZero() || secondChild->isZero())5368traceMsg(comp,"\t%s firstChild %p isZero=%s or secondChild %p isZero=%s so nibble clearing is NOT required and set HasKnownCleanSign = true on reg %s\n",5369isAdd?"add":"sub",firstChild,firstChild->isZero()?"yes":"no",secondChild,secondChild->isZero()?"yes":"no",cg->getDebug()->getName(targetReg));5370else5371traceMsg(comp,"\t%s result prec %d is > both reg1 prec %d and reg2 prec %d so nibble clearing is NOT required and set HasKnownCleanSign = true on reg %s\n",5372isAdd?"add":"sub",node->getDecimalPrecision(),firstReg->getDecimalPrecision(),secondReg->getDecimalPrecision(),cg->getDebug()->getName(targetReg));5373}5374// An NI to clear the top nibble is never required in this case:5375// If the largest source is even (eg prec 4) then biggest the result can be is odd (i.e. +1 largest source -- prec 5)5376// and on an odd result no clearing is needed5377// If the largest source is odd (eg prec 5) then the biggest the result can be is even (i.e. +1 largest source -- prec 6)5378// and the top nibble must already be clear as the whole byte must be clear before the operation5379}538053815382if (isAdd &&5383firstReg->hasKnownOrAssumedPositiveSignCode() &&5384secondReg->hasKnownOrAssumedPositiveSignCode())5385{5386if (cg->traceBCDCodeGen())5387traceMsg(comp, "\tfirstReg and secondReg have positive sign codes so set targetReg sign code to the preferred positive sign 0x%x\n", TR::DataType::getPreferredPlusCode());5388// positive+positive=positive and then AP will clean the positive sign to 0xc5389targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());5390}53915392node->setRegister(targetReg);5393cg->decReferenceCount(secondChild);5394return targetReg;5395}53965397TR::Register *5398J9::Z::TreeEvaluator::pdmulEvaluator(TR::Node * node, TR::CodeGenerator * cg)5399{5400cg->traceBCDEntry("pdmul",node);5401cg->generateDebugCounter("PD-Op/pdmul", 1, TR::DebugCounter::Cheap);54025403TR::Register * reg = NULL;54045405static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");5406if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&5407!cg->comp()->getOption(TR_DisableVectorBCD) ||5408isVectorBCDEnv)5409{5410reg = pdArithmeticVectorEvaluatorHelper(node, TR::InstOpCode::VMP, cg);5411}5412else5413{5414reg = pdmulEvaluatorHelper(node, cg);5415}54165417cg->traceBCDExit("pdmul",node);5418return reg;5419}54205421TR::Register *5422J9::Z::TreeEvaluator::pdmulEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)5423{5424TR::Node *firstChild = node->getFirstChild();5425TR::Node *secondChild = node->getSecondChild();5426TR::Compilation *comp = cg->comp();54275428TR_PseudoRegister *firstReg = cg->evaluateBCDNode(firstChild);5429bool trackSignState=false;5430bool alwaysLegalToCleanSign=true; // ok to use ZAP (and clobber srcSign) to init as there is an MP coming5431TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, true, NULL, cg, trackSignState, 0, alwaysLegalToCleanSign); // initTarget=true, sourceMR=NULL, srcSize=05432cg->decReferenceCount(firstChild);5433TR_PseudoRegister *secondReg = cg->evaluateBCDNode(secondChild);5434TR_StorageReference *targetStorageReference = targetReg->getStorageReference();5435TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);5436TR::MemoryReference *secondMR = generateS390RightAlignedMemoryReference(secondChild, secondReg->getStorageReference(), cg);54375438int32_t op1EncodingPrecision = cg->getPDMulEncodedPrecision(node, firstReg, secondReg);5439int32_t op1EncodingSize = cg->getPDMulEncodedSize(node, firstReg, secondReg);5440// The preparatory clearing operations need a length set so base it on the op1EncodingSize but the final precision will be set after the MP instruction has been generated5441targetReg->setDecimalPrecision(op1EncodingPrecision);54425443TR_ASSERT( targetReg->getSize() >= firstReg->getSize() + secondReg->getSize(),"MP may result in a data exception\n");5444TR_ASSERT( secondReg->getSize() <= 8, "MP will result in a spec exception\n");54455446cg->clearByteRangeIfNeeded(node, targetReg, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), firstReg->getSize(), op1EncodingSize, true); // widenOnLeft=true54475448cg->correctBadSign(firstChild, firstReg, firstReg->getSize(), destMR);5449cg->correctBadSign(secondChild, secondReg, secondReg->getSize(), secondMR);54505451TR::Instruction * cursor =5452generateSS2Instruction(cg, TR::InstOpCode::MP, node,5453op1EncodingSize-1,5454generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),5455secondReg->getSize()-1,5456generateS390RightAlignedMemoryReference(*secondMR, node, 0, cg));54575458targetReg->setHasKnownValidSignAndData();54595460int32_t computedResultPrecision = firstReg->getDecimalPrecision() + secondReg->getDecimalPrecision();5461correctPackedArithmeticPrecision(node, op1EncodingSize, targetReg, computedResultPrecision, cg);54625463if (targetReg->getDecimalPrecision() < computedResultPrecision)5464{5465if (!node->canSkipPadByteClearing() && targetReg->isEvenPrecision())5466cg->genZeroLeftMostPackedDigits(node, targetReg, targetReg->getSize(), 1, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg));5467}5468else if (cg->traceBCDCodeGen())5469{5470traceMsg(comp,"TR::InstOpCode::MP node %p targetRegPrec %d >= computedResultPrecision %d (firstRegPrec %d + secondRegPrec %d) so skip nibble clearing\n",5471node,targetReg->getDecimalPrecision(),computedResultPrecision,firstReg->getDecimalPrecision(),secondReg->getDecimalPrecision());5472}54735474// Even with no overflow MP can produce a negative zero as the sign of the result is determined from the rules5475// of algebra *even when one or both of the operands are zero*. So 0 * -1 = -0 (0x0c * 0x1d = 0x0d -- not clean result)5476// MP will always produce a result with a preferred sign however.5477if (firstReg->hasKnownOrAssumedPositiveSignCode() &&5478secondReg->hasKnownOrAssumedPositiveSignCode())5479{5480if (cg->traceBCDCodeGen())5481traceMsg(comp, "\tfirstReg and secondReg have positive sign codes so set targetReg sign code to the preferred positive sign 0x%x\n", TR::DataType::getPreferredPlusCode());5482// positive*positive=positive and then MP will clean the positive sign to 0xc5483targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());5484}5485else5486{5487targetReg->setHasKnownPreferredSign();5488}54895490cg->decReferenceCount(secondChild);5491return targetReg;5492}54935494/**5495* Handles pddiv, and pdrem.5496*/5497TR::Register *5498J9::Z::TreeEvaluator::pddivremEvaluator(TR::Node * node, TR::CodeGenerator * cg)5499{5500cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),55011, TR::DebugCounter::Cheap);5502TR::Register * reg = NULL;55035504static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");5505if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)5506{5507reg = pddivremVectorEvaluatorHelper(node, cg);5508}5509else5510{5511reg = pddivremEvaluatorHelper(node, cg);5512}55135514return reg;5515}55165517/**5518* Handles pddiv, and pdrem. This is the vector evaluator helper function.5519*/5520TR::Register *5521J9::Z::TreeEvaluator::pddivremVectorEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)5522{5523TR::Register* vTargetReg = NULL;5524TR::InstOpCode::Mnemonic opCode;5525switch(node->getOpCodeValue())5526{5527case TR::pddiv:5528opCode = TR::InstOpCode::VDP;5529break;5530case TR::pdrem:5531opCode = TR::InstOpCode::VRP;5532break;5533default:5534TR_ASSERT(0, "Unexpected opcode in pddiv/remVectorEvaluatorHelper");5535break;5536}55375538vTargetReg = pdArithmeticVectorEvaluatorHelper(node, opCode, cg);5539return vTargetReg;5540}55415542/**5543* Handles pddiv, and pdrem. This is the non-vector evaluator helper function.5544*/5545TR::Register *5546J9::Z::TreeEvaluator::pddivremEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)5547{5548TR_ASSERT( node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,5549"pddivEvaluator only valid for pddiv/pdrem\n");55505551TR::Node *firstChild = node->getFirstChild();5552TR::Node *secondChild = node->getSecondChild();5553TR::Compilation *comp = cg->comp();55545555TR_PseudoRegister *firstReg = cg->evaluateBCDNode(firstChild);5556bool trackSignState=false;5557bool alwaysLegalToCleanSign=true; // ok to use ZAP (and clobber srcSign) to init as there is a DP coming5558TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, true, NULL, cg, trackSignState, 0, alwaysLegalToCleanSign); // initTarget=true, sourceMR=NULL, srcSize=05559cg->decReferenceCount(firstChild);5560TR_PseudoRegister *secondReg = cg->evaluateBCDNode(secondChild);5561TR_StorageReference *targetStorageReference = targetReg->getStorageReference();5562TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);55635564if (secondReg->getDecimalPrecision() > secondChild->getDecimalPrecision())5565{5566TR_ASSERT( false,"the secondRegPrec has grown so using an inline DP may not be legal\n"); // TODO: for now disallow this completely but the below fix is also correct.5567TR_ASSERT(secondReg->getSize() == secondChild->getSize(),5568"the secondRegSize (regSize %d != nodeSize %d) has grown so using an inline DP may not be legal\n",secondReg->getSize(),secondChild->getSize());5569// The register precision may have been conservatively adjusted from an even precision to the next odd precision so in these5570// cases set it back to the even precision so the inline divide will still be legal. This extra nibble of precision will be zero so this is safe.5571secondReg->setDecimalPrecision(secondReg->getDecimalPrecision()-1);5572}55735574int32_t dividendPrecision = 0;5575int32_t divisorSize = 0;5576int32_t dividendSizeBumpForClear = 0;5577TR::MemoryReference *divisorMR = NULL;55785579divisorMR = generateS390RightAlignedMemoryReference(secondChild, secondReg->getStorageReference(), cg);5580dividendPrecision = cg->getPDDivEncodedPrecision(node, firstReg, secondReg);5581divisorSize = secondReg->getSize();55825583targetReg->setDecimalPrecision(dividendPrecision);5584int32_t dividendSize = targetReg->getSize();5585TR_ASSERT( dividendSize <= node->getStorageReferenceSize(),"allocated symbol for pddiv/pdrem is too small\n");5586if (cg->traceBCDCodeGen())5587traceMsg(comp,"\t%s: gen DP dividendSize = %d, secondOpSize = secondRegSize = %d, targetRegSize = %d (firstRegPrec %d, secondRegPrec %d)\n",5588node->getOpCode().getName(),dividendSize,secondReg->getSize(),targetReg->getSize(),firstReg->getDecimalPrecision(),secondReg->getDecimalPrecision());55895590cg->clearByteRangeIfNeeded(node, targetReg, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), dividendSize-divisorSize-dividendSizeBumpForClear, dividendSize, true); // widenOnLeft=true55915592cg->correctBadSign(firstChild, firstReg, targetReg->getSize(), destMR);5593cg->correctBadSign(secondChild, secondReg, secondReg->getSize(), divisorMR);55945595generateSS2Instruction(cg, TR::InstOpCode::DP, node,5596dividendSize-1,5597generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),5598divisorSize-1,5599generateS390RightAlignedMemoryReference(*divisorMR, node, 0, cg));56005601targetReg->setHasKnownValidSignAndData();56025603bool isRem = node->getOpCodeValue() == TR::pdrem;5604int32_t deadBytes = 0;5605bool isTruncation = false;5606if (isRem)5607{5608targetReg->setDecimalPrecision(secondReg->getDecimalPrecision());5609isTruncation = node->getDecimalPrecision() < targetReg->getDecimalPrecision();5610if (cg->traceBCDCodeGen())5611traceMsg(comp,"\tpdrem: setting targetReg prec to divisor prec %d (node prec is %d), isTruncation=%s\n",5612secondReg->getDecimalPrecision(),node->getDecimalPrecision(),isTruncation?"yes":"no");5613targetReg->removeRangeOfZeroDigits(0, TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(dividendSize));5614}5615else5616{5617deadBytes = divisorSize;5618// computedQuotientPrecision is the size of the quotient as computed by the DP instruction.5619// The actual returned node precision may be less.5620int32_t computedQuotientPrecision = TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(dividendSize - deadBytes);5621if (firstReg->isEvenPrecision())5622{5623if (cg->traceBCDCodeGen())5624traceMsg(comp,"\tfirstRegPrec (%d) isEven=true so reduce computedQuotientPrecision %d->%d\n",firstReg->getDecimalPrecision(),computedQuotientPrecision,computedQuotientPrecision-1);5625computedQuotientPrecision--;5626}5627isTruncation = node->getDecimalPrecision() < computedQuotientPrecision;5628int32_t resultQuotientPrecision = std::min<int32_t>(computedQuotientPrecision, node->getDecimalPrecision());5629targetReg->setDecimalPrecision(resultQuotientPrecision);5630targetReg->addToRightAlignedDeadBytes(deadBytes);5631if (cg->traceBCDCodeGen())5632{5633traceMsg(comp,"\tisDiv=true (pddivrem) : increment targetReg %s deadBytes %d -> %d (by the divisorSize)\n",5634cg->getDebug()->getName(targetReg),targetReg->getRightAlignedDeadBytes()-deadBytes,targetReg->getRightAlignedDeadBytes());5635traceMsg(comp,"\tsetting targetReg prec to min(computedQuotPrec, nodePrec) = min(%d, %d) = %d (size %d), isTruncation=%s\n",5636computedQuotientPrecision,node->getDecimalPrecision(),resultQuotientPrecision,targetReg->getSize(),isTruncation?"yes":"no");5637}5638targetReg->removeRangeOfZeroDigits(0, computedQuotientPrecision);5639}56405641if (!node->canSkipPadByteClearing() && targetReg->isEvenPrecision() && isTruncation)5642{5643TR_ASSERT( node->getStorageReferenceSize() >= dividendSize,"operand size should only shrink from original size\n");5644int32_t leftMostByte = targetReg->getSize();5645if (cg->traceBCDCodeGen())5646traceMsg(comp,"\t%s: generating NI to clear top nibble with leftMostByte = targetReg->getSize() = %d\n",isRem ? "pdrem":"pddiv",targetReg->getSize());5647cg->genZeroLeftMostPackedDigits(node, targetReg, leftMostByte, 1, generateS390RightAlignedMemoryReference(*destMR, node, -deadBytes, cg));5648}56495650targetReg->setHasKnownPreferredSign();5651if (isRem)5652{5653// sign of the remainder is the same as the sign of dividend (and then set to the preferred sign by the DP instruction)5654if (firstReg->hasKnownOrAssumedSignCode())5655{5656targetReg->setKnownSignCode(firstReg->hasKnownOrAssumedPositiveSignCode() ? TR::DataType::getPreferredPlusCode() : TR::DataType::getPreferredMinusCode());5657if (cg->traceBCDCodeGen())5658traceMsg(comp,"\tpdrem: firstReg has the knownSignCode 0x%x so set targetReg sign code to the preferred sign 0x%x\n",5659firstReg->getKnownOrAssumedSignCode(),targetReg->getKnownOrAssumedSignCode());5660}5661}5662else5663{5664// when the sign of the divisor and divident are different then the quotient sign is negative otherwise if the signs are the same then the5665// quotient sign is positive5666if (firstReg->hasKnownOrAssumedSignCode() && secondReg->hasKnownOrAssumedSignCode())5667{5668bool dividendSignIsPositive = firstReg->hasKnownOrAssumedPositiveSignCode();5669bool dividendSignIsNegative = !dividendSignIsPositive;5670bool divisorSignIsPositive = secondReg->hasKnownOrAssumedPositiveSignCode();5671bool divisorSignIsNegative = !divisorSignIsPositive;56725673if ((dividendSignIsPositive && divisorSignIsPositive) ||5674(dividendSignIsNegative && divisorSignIsNegative))5675{5676targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());5677if (cg->traceBCDCodeGen())5678traceMsg(comp,"\tpddiv: dividendSign matches the divisorSign so set targetReg sign code to the preferred sign 0x%x\n", TR::DataType::getPreferredPlusCode());5679}5680else5681{5682targetReg->setKnownSignCode(TR::DataType::getPreferredMinusCode());5683if (cg->traceBCDCodeGen())5684traceMsg(comp,"\tpddiv: dividendSign does not match the divisorSign so set targetReg sign code to the preferred sign 0x%x\n", TR::DataType::getPreferredMinusCode());5685}5686}5687}56885689cg->decReferenceCount(secondChild);5690return targetReg;5691}56925693/**5694* Handles pdshr5695*/5696TR::Register *5697J9::Z::TreeEvaluator::pdshrEvaluator(TR::Node * node, TR::CodeGenerator * cg)5698{5699cg->traceBCDEntry("pdshr",node);5700cg->generateDebugCounter("PD-Op/pdshr", 1, TR::DebugCounter::Cheap);57015702TR::Register* targetReg = NULL;57035704static char* isEnableVectorBCD = feGetEnv("TR_enableVectorBCD");5705if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&5706!cg->comp()->getOption(TR_DisableVectorBCD) ||5707isEnableVectorBCD)5708{5709targetReg = pdshrVectorEvaluatorHelper(node, cg);5710}5711else5712{5713targetReg = pdshiftEvaluatorHelper(node, cg, true);5714}57155716cg->traceBCDExit("pdshr",node);5717return targetReg;5718}57195720void5721J9::Z::TreeEvaluator::clearAndSetSign(TR::Node *node,5722TR_PseudoRegister *targetReg,5723int32_t leftMostByteForClear,5724int32_t digitsToClear,5725TR::MemoryReference *destMR,5726TR_PseudoRegister *srcReg,5727TR::MemoryReference *sourceMR,5728bool isSetSign,5729int32_t sign,5730bool signCodeIsInitialized,5731TR::CodeGenerator *cg)5732{5733TR::Compilation *comp = cg->comp();57345735if (cg->traceBCDCodeGen())5736traceMsg(comp,"\tclearAndSetSign: digitsToClear %d, leftMostByte %d (isSetSign=%s, sign 0x%x)\n",digitsToClear,leftMostByteForClear,isSetSign?"yes":"no",sign);5737bool clearingNeeded = digitsToClear > 0;5738if (isSetSign)5739{5740// a better sign code setting maybe possible if a current setting is known5741TR_PseudoRegister *signReg = signCodeIsInitialized ? targetReg : NULL;5742int32_t digitsCleared = cg->genSignCodeSetting(node, targetReg, node->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), sign, signReg, digitsToClear, !clearingNeeded);5743if (clearingNeeded)5744{5745digitsToClear-=digitsCleared;5746if (digitsToClear > 0 && (digitsToClear&0x1) && sign == TR::DataType::getIgnoredSignCode())5747{5748digitsToClear++; // when digitsToClear is odd for the ignore sign code case then bump up to the next even amount (and clear the sign too) as this is easier to clear5749targetReg->setHasKnownBadSignCode();5750if (cg->traceBCDCodeGen())5751traceMsg(comp,"\tignored setSign case so inc digitsToClear %d->%d and setHasKnownBadSignCode=true on targetReg %s\n",5752digitsToClear-1,digitsToClear,cg->getDebug()->getName(targetReg));5753}5754}5755signCodeIsInitialized = true;5756if (cg->traceBCDCodeGen())5757{5758if (clearingNeeded)5759traceMsg(comp,"\t\tisSetSign case (clearingNeeded==true): sign setting cleared %d digits so adjust digitsToClear %d->%d\n",5760digitsCleared,digitsToClear+digitsCleared,digitsToClear);5761traceMsg(comp,"\t\tisSetSign case: set signCode of 0x%x on targetReg %s\n",sign,cg->getDebug()->getName(targetReg));5762}5763}5764else if (!signCodeIsInitialized)5765{5766/* if (digitsToClear == 1) // MVN done later is better then MVC/NI as the latter suffers from an OSC5767{5768int32_t mvcSize = 1;5769generateSS1Instruction(cg, TR::InstOpCode::MVC, node,5770mvcSize-1,5771generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),5772generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));5773targetReg->transferSignState(srcReg, true); // digitsLost=true -- a clear always loses digits5774signCodeIsInitialized = true; // no longer clear the sign code in the code below for if (needLateClear)5775if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tdigitsToClear==1 case: gen MVC to initialize sign code\n");5776}5777else */5778if (clearingNeeded)5779{5780digitsToClear++; // clear the sign code too and then MVN in the new sign code5781if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\t init=false && isSetSign=false case : bump digitsToClear %d->%d to clear entire field\n",digitsToClear,digitsToClear+1);5782}5783}5784TR_ASSERT(digitsToClear >= 0,"digitsToClear %d should be >= 0\n",digitsToClear);5785if (digitsToClear > 0)5786{5787if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tdigitsToClear %d > 0 so call genClearLeftMostDigitsIfNeeded\n",digitsToClear);5788cg->genZeroLeftMostDigitsIfNeeded(node, targetReg, leftMostByteForClear, digitsToClear, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg));5789}57905791if (!signCodeIsInitialized)5792{5793if (cg->traceBCDCodeGen())5794traceMsg(comp,"\t\tsignCodeIsInitialized=false after clearing of %d digits : init the sign now with an MVN of size 1\n",digitsToClear,isSetSign?"yes":"no");5795// Move the sign code over from the source location. The top nibble has already been cleared above.5796int32_t mvnSize = 1;5797generateSS1Instruction(cg, TR::InstOpCode::MVN, node,5798mvnSize-1,5799generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),5800generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));5801targetReg->transferSignState(srcReg, true); // digitsLost=true -- a clear always loses digits5802}5803}58045805TR_PseudoRegister *5806J9::Z::TreeEvaluator::simpleWideningOrTruncation(TR::Node *node,5807TR_PseudoRegister *srcReg,5808bool isSetSign,5809int32_t sign,5810TR::CodeGenerator *cg)5811{5812TR::Compilation *comp = cg->comp();5813if (cg->traceBCDCodeGen())5814traceMsg(comp,"\tsimple widening or truncating shift: srcRegPrecision %d, isSetSign=%s, sign 0x%x\n",srcReg->getDecimalPrecision(),isSetSign?"yes":"no",sign);5815bool isDigitTruncation = false;5816bool needsTopNibbleClearing = false;5817int32_t srcPrecision = srcReg->getDecimalPrecision();5818if (srcReg->getDecimalPrecision() > node->getDecimalPrecision())5819{5820srcPrecision = node->getDecimalPrecision();5821isDigitTruncation = true;5822if (!node->canSkipPadByteClearing() && node->isEvenPrecision() && srcReg->getDigitsToClear(srcPrecision,srcPrecision+1) != 0)5823needsTopNibbleClearing = true;5824}58255826int32_t targetPrecision = node->getDecimalPrecision();58275828if (!isDigitTruncation && srcReg->isEvenPrecision() && !srcReg->isLeftMostNibbleClear())5829{5830if (targetPrecision != srcPrecision) // in case this routine starts doing explicit widenings at some point then !canSkipPadByteClearing alone is not valid5831{5832needsTopNibbleClearing = true;5833}5834else if (!node->canSkipPadByteClearing())5835{5836needsTopNibbleClearing = true;5837if (cg->traceBCDCodeGen()) traceMsg(comp,"z^z : new clear : simpleWide %p\n",node);5838}5839}58405841bool isPassThrough = false;5842bool initTargetAndSign = (isSetSign && !isPassThrough); // try to get a ZAP generated here for a widening as this can simplify the coming setSign operation5843bool isNondestructiveNop = isPassThrough && !isDigitTruncation;5844TR_PseudoRegister *targetReg = NULL;5845TR::MemoryReference *sourceMR = NULL;5846if (cg->traceBCDCodeGen())5847traceMsg(comp,"\tisDigitTruncation=%s, srcPrecision=%d, isPassThrough=%s, needsTopNibbleClearing=%s, initTargetAndSign=%s\n",5848isDigitTruncation?"true":"false",srcPrecision,isPassThrough?"true":"false",needsTopNibbleClearing?"true":"false",initTargetAndSign?"yes":"no");5849if (!isPassThrough)5850sourceMR = generateS390RightAlignedMemoryReference(node->getFirstChild(), srcReg->getStorageReference(), cg);5851if (initTargetAndSign || needsTopNibbleClearing)5852targetReg = evaluateBCDValueModifyingOperand(node, initTargetAndSign, sourceMR, cg, initTargetAndSign);5853else5854targetReg = evaluateBCDSignModifyingOperand(node, isPassThrough, isNondestructiveNop, false, sourceMR, cg); // initTarget=false58555856bool isInitialized = targetReg->isInitialized();5857TR::MemoryReference *destMR = NULL;5858if (!isPassThrough)5859destMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);5860if (!isInitialized && !isPassThrough)5861{5862int32_t srcSize = TR::DataType::packedDecimalPrecisionToByteLength(srcPrecision);5863if (cg->traceBCDCodeGen())5864traceMsg(comp,"\tisInit=false and isPassThru=false so gen initializing MVC with size %d. Do not clear after MVC just set targetReg->prec to srcPrecision %d\n",srcSize,srcPrecision);5865generateSS1Instruction(cg, TR::InstOpCode::MVC, node,5866srcSize-1,5867generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),5868generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));5869}5870else if (cg->traceBCDCodeGen())5871{5872traceMsg(comp,"\tisInit=true (%s) or isPassThru=true (%s): no move needed just set targetReg->prec to srcPrecision %d\n",isInitialized?"yes":"no",isPassThrough?"yes":"no",srcPrecision);5873}58745875// a ZAP may have been generated when initializing targetReg so in this case do not transfer the srcReg sign5876if (!targetReg->signStateInitialized() || !initTargetAndSign)5877targetReg->transferSignState(srcReg, isDigitTruncation);58785879targetReg->setDecimalPrecision(targetPrecision);58805881if (isSetSign && !isPassThrough)5882cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), sign, targetReg, 0, false /* !topNibbleIsZero */);5883else5884targetReg->transferSignState(srcReg, isDigitTruncation);58855886targetReg->transferDataState(srcReg);58875888if (needsTopNibbleClearing)5889{5890if (cg->traceBCDCodeGen()) traceMsg(comp,"\tisDigitTruncation=true and targetReg->isEvenPrecision() (%d) so clear top nibble\n",targetReg->isEvenPrecision());5891int32_t leftMostByteForClear = TR::DataType::packedDecimalPrecisionToByteLength(srcPrecision);5892cg->genZeroLeftMostPackedDigits(node, targetReg, leftMostByteForClear, 1, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg));5893}58945895if (!isPassThrough)5896targetReg->setIsInitialized();58975898return targetReg;5899}59005901/*5902* \brief5903* Generate non-exception throwing instructions for pdModifyPrecision node to narrow or widen packed decimals.5904* The generated instruction sequence does not validate the source packed decimals. Any invalid packed5905* decimals will be loaded as is and modified as if their digits and signs were valid.5906*/5907TR::Register *5908J9::Z::TreeEvaluator::pdModifyPrecisionEvaluator(TR::Node * node, TR::CodeGenerator * cg)5909{5910cg->traceBCDEntry("pdModifyPrecision",node);5911cg->generateDebugCounter("PD-Op/pdmodifyPrec", 1, TR::DebugCounter::Cheap);59125913TR::Register* targetReg = NULL;59145915static char* isEnableVectorBCD = feGetEnv("TR_enableVectorBCD");5916if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&5917!cg->comp()->getOption(TR_DisableVectorBCD)5918|| isEnableVectorBCD)5919{5920int32_t targetPrec = node->getDecimalPrecision();5921targetReg = cg->allocateRegister(TR_VRF);59225923if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY))5924{5925// Overflow exceptions can be ignored for z15 vector packed decimal VRI-i,f,g and VRR-i instructions. Given5926// this, VPSOP now becomes suitable for data truncations without incurring exceptions which eventually lead to5927// performance degradations. This is usually used to truncate high nibble of an even precision PD.5928targetReg = vectorPerformSignOperationHelper(node, cg, true, targetPrec, true, SignOperationType::maintain, false, false, 0, false, true);5929}5930else5931{5932int32_t imm = 0x0FFFF >> (TR_VECTOR_REGISTER_SIZE - TR::DataType::packedDecimalPrecisionToByteLength(targetPrec));5933TR::Register* pdReg = cg->evaluate(node->getFirstChild());5934TR::Register* maskReg = cg->allocateRegister(TR_VRF);5935generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, maskReg, imm, 0);59365937if (targetPrec % 2 == 0)5938{5939TR::Register* shiftAmountReg = cg->allocateRegister(TR_VRF);5940generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, shiftAmountReg, 4, 0);5941generateVRRcInstruction(cg, TR::InstOpCode::VSRL, node, maskReg, maskReg, shiftAmountReg, 0, 0, 0);5942cg->stopUsingRegister(shiftAmountReg);5943}59445945generateVRRcInstruction(cg, TR::InstOpCode::VN, node, targetReg, pdReg, maskReg, 0, 0, 0);59465947cg->stopUsingRegister(maskReg);5948cg->decReferenceCount(node->getFirstChild());5949}5950}5951else5952{5953TR::Node *srcNode = node->getChild(0);5954TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);5955targetReg = simpleWideningOrTruncation(node, srcReg, false, 0, cg);5956cg->decReferenceCount(srcNode);5957node->setRegister(targetReg);5958}59595960cg->traceBCDExit("pdModifyPrecision",node);5961return targetReg;5962}59635964TR::Register *5965J9::Z::TreeEvaluator::pdshlEvaluator(TR::Node * node, TR::CodeGenerator * cg)5966{5967cg->traceBCDEntry("pdshl",node);5968cg->generateDebugCounter("PD-Op/pdshl", 1, TR::DebugCounter::Cheap);59695970TR::Register* targetReg = NULL;59715972static char* isEnableVectorBCD = feGetEnv("TR_enableVectorBCD");5973if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&5974!cg->comp()->getOption(TR_DisableVectorBCD) ||5975isEnableVectorBCD)5976{5977targetReg = pdshlVectorEvaluatorHelper(node, cg);5978}5979else5980{5981targetReg = pdshiftEvaluatorHelper(node, cg, false);5982}59835984cg->traceBCDExit("pdshl",node);5985return targetReg;5986}59875988/**5989* \brief This is a helper function that handles pdshl, pdshr, and pdshlOverflow nodes.5990*5991* pdshl is currently not used and replaced by pdshlOverflow.5992*/5993TR::Register *5994J9::Z::TreeEvaluator::pdshiftEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isRightShift)5995{5996TR::Node* srcNode = node->getChild(0);5997TR::Node* shiftAmountNode = node->getChild(1);5998TR::Compilation *comp = cg->comp();5999int32_t roundAmount = 0;6000int32_t shiftAmount = 0;60016002TR_ASSERT(shiftAmountNode, "expecting a shiftAmountNode\n");6003TR_ASSERT(shiftAmountNode->getOpCode().isLoadConst() &&6004shiftAmountNode->getOpCode().getSize() <= 4,6005"expecting a <= 4 size integral constant PD shift amount\n");6006shiftAmount = (int32_t)shiftAmountNode->get64bitIntegralValue();6007TR_ASSERT(shiftAmount >= 0, "unexpected PD shift amount of %d\n", shiftAmount);60086009if(isRightShift)6010{6011shiftAmount *= -1;6012TR::Node* roundAmountNode = node->getChild(2);6013TR_ASSERT(roundAmountNode, "round amount node should not be null\n");6014roundAmount = roundAmountNode->get32bitIntegralValue();6015TR_ASSERT(roundAmount == 0 || roundAmount == 5, "unexpected round amount of %d\n", roundAmount);6016cg->decReferenceCount(roundAmountNode);6017}60186019TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);60206021uint32_t srcPrecision = srcNode->getDecimalPrecision();6022uint32_t resultPrecision = node->getDecimalPrecision();6023uint32_t resultSize = TR::DataType::packedDecimalPrecisionToByteLength(resultPrecision);6024uint32_t sourceSize = TR::DataType::packedDecimalPrecisionToByteLength(srcPrecision);60256026TR_StorageReference* targetStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(resultSize, comp);6027TR_PseudoRegister* targetReg = cg->allocatePseudoRegister(node->getDataType());6028targetReg->setIsInitialized(true);6029targetReg->setSize(resultSize);6030targetReg->setStorageReference(targetStorageRef, node);60316032TR::MemoryReference* targetMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);6033TR::MemoryReference* sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);6034TR_StorageReference* tmpStorageRef = NULL;6035TR::MemoryReference* tmpMR = NULL;60366037if (cg->traceBCDCodeGen())6038{6039traceMsg(comp,"\tGen packed decimal shift: %s %p : shift by %d, roundAmount=%d, result Size=%d, precision %d, sourceSize=%d, precision %d\n",6040node->getOpCode().getName(),6041node,6042shiftAmount,6043roundAmount,6044resultSize,6045resultPrecision,6046sourceSize,6047srcNode->getDecimalPrecision());6048}60496050if(shiftAmount == 0)6051{6052if (srcPrecision > resultPrecision)6053{6054/* Packed decimal narrowing with exception handling:6055*6056* If the narrowing operation truncates non-zero digits (e.g. shift "123C" by 0 digts and keep 2 digits yields "23C")6057* and the 'checkOverflow' parameter is true, the JIT'ed sequence should trigger HW exception and6058* yield control to the Java code (via OOL call) so that overflow exceptions can be thrown.6059* This is why PD arithmetic operations use 'pdshlOverflow' to perform data truncations6060* instead of 'modifyPrecision'.6061*/60626063tmpStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(sourceSize, comp);6064tmpStorageRef->setTemporaryReferenceCount(1);6065tmpMR = generateS390RightAlignedMemoryReference(node, tmpStorageRef, cg);60666067generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,6068sourceSize - 1,6069generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),6070sourceSize - 1,6071generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));60726073shiftAmount = srcPrecision - resultPrecision;6074if ((srcPrecision % 2) == 0)6075{6076// Source being even precision means we need an extra left shift to get right of the source's highest nibble.6077shiftAmount++;6078}60796080generateSS3Instruction(cg, TR::InstOpCode::SRP, node,6081sourceSize - 1,6082generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),6083shiftAmount, roundAmount);60846085generateSS3Instruction(cg, TR::InstOpCode::SRP, node,6086sourceSize - 1,6087generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),6088-1*shiftAmount, roundAmount);60896090generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,6091resultSize - 1,6092generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg),6093resultSize - 1,6094generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg));6095}6096else // zero shift, copy or widen result6097{6098generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,6099resultSize - 1,6100generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg),6101sourceSize - 1,6102generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));61036104// Top nibble cleaning if the PD widening or copying source precision is even6105if ((srcPrecision % 2) == 0)6106{6107cg->genZeroLeftMostPackedDigits(node, targetReg, sourceSize, 1,6108generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg));6109}6110}6111}6112else // shiftAmount != 06113{6114int32_t tmpResultByteSize = sourceSize;6115bool needExtraShift = false;61166117if (!isRightShift)6118{6119if ((resultPrecision % 2) == 0)6120{6121/* An extra shift is needed when the left shift result's precision is even.6122* For example, let the input be 00 12 3C (precision=5), shiftAmount=2 and let the result precision be 4.6123* Shift this left by 2 should produce and expected result of 02 30 0C.6124*6125* To produce this expected result with HW exception, we need to6126*6127* 1. shift 00 12 3C by 3 (instead of 2) digits to produce an intermediate result 01 23 00 0C6128* 2. use ZAP to truncate this to 23 00 0C. The purpose of this ZAP is to truncate the leading digits,6129* which may or may not be zero, and trigger HW exception in case they are non-zero so that the6130* DAA Java implementation gets a chance to thrown Java exceptions. In our example, the leading6131* '1' should not be silently discarded (using the NI instruction) because the API 'checkOverflow' parameter6132* may be true.6133* 3. perform a right shift of 1 on the intermediate result to produce the expected result 02 30 0C.6134*6135*/6136shiftAmount++;6137needExtraShift = true;6138}61396140// Allocate enough temporary space to accommodate the amount of left shifts.6141tmpResultByteSize += (shiftAmount + 1)/2;6142}61436144tmpStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(tmpResultByteSize, comp);6145tmpStorageRef->setTemporaryReferenceCount(1);6146tmpMR = generateS390RightAlignedMemoryReference(node, tmpStorageRef, cg);61476148// For this large tmp storage, we need to use XC+MVC to clear and move input into it.6149if (!isRightShift)6150{6151generateSS1Instruction(cg, TR::InstOpCode::XC, node,6152tmpResultByteSize - 1,6153generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),6154generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg));6155}61566157generateSS1Instruction(cg, TR::InstOpCode::MVC, node,6158sourceSize - 1,6159generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),6160sourceMR);61616162generateSS3Instruction(cg, TR::InstOpCode::SRP, node,6163tmpResultByteSize - 1,6164generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),6165shiftAmount, roundAmount);61666167generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,6168resultSize - 1,6169generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg),6170tmpResultByteSize - 1,6171generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg));61726173if (needExtraShift)6174{6175generateSS3Instruction(cg, TR::InstOpCode::SRP, node,6176resultSize - 1,6177generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg),6178-1, 0);6179}6180}61816182cg->decReferenceCount(srcNode);6183cg->decReferenceCount(shiftAmountNode);6184node->setRegister(targetReg);6185return targetReg;6186}61876188TR::Register*6189J9::Z::TreeEvaluator::vectorPerformSignOperationHelper(TR::Node *node,6190TR::CodeGenerator *cg,6191bool setPrecision,6192uint32_t precision,6193bool signedStatus,6194SignOperationType signOpType,6195bool signValidityCheck,6196bool digitValidityCheck,6197int32_t sign,6198bool setConditionCode,6199bool ignoreDecimalOverflow)6200{6201TR::Register *targetReg = cg->allocateRegister(TR_VRF);6202TR::Node *pdNode = node->getFirstChild();62036204TR::Register *childReg = cg->evaluate(pdNode);62056206int32_t numPrecisionDigits = setPrecision ? precision : TR_MAX_INPUT_PACKED_DECIMAL_PRECISION;6207if (numPrecisionDigits > TR_MAX_INPUT_PACKED_DECIMAL_PRECISION)6208{6209numPrecisionDigits = TR_MAX_INPUT_PACKED_DECIMAL_PRECISION;6210}62116212uint8_t constImm3 = numPrecisionDigits;62136214if (ignoreDecimalOverflow)6215{6216constImm3 |= 0x80;6217}62186219// Bit 4-5 Sign Operation, 6 Positive Sign code, 7 Sign validation on V26220uint8_t constImm4 = signOpType << 2;62216222if (signOpType == SignOperationType::setSign)6223{6224switch (sign)6225{6226case TR_PREFERRED_PLUS_CODE:6227case TR_ALTERNATE_PLUS_CODE:6228case TR_ZONED_PLUS:6229constImm4 |= 0x1;6230break;6231case TR_PREFERRED_MINUS_CODE:6232case TR_ALTERNATE_MINUS_CODE:6233break;6234default:6235TR_ASSERT_FATAL(false, "Packed Decimal sign code 0x%x is invalid", sign);6236break;6237}6238}62396240// If signedStatus is true it means signed so use 0xC instead of 0xF6241constImm4 |= (signedStatus ? 0x0 : 0x2 );6242constImm4 |= (signValidityCheck ? 0x1 : 0x0);6243constImm4 |= (digitValidityCheck ? 0x0 : 0x80);62446245// Current use of TR::pdclean does not want to modifyprecision or set condition code.6246// TODO: We can probably come up with more complex optimization that will collapse modify precision and TR::setsign6247// or TR::pdclean to one instruction.6248generateVRIgInstruction(cg, TR::InstOpCode::VPSOP, node, targetReg, childReg, constImm3, constImm4, setConditionCode);62496250node->setRegister(targetReg);6251cg->decReferenceCount(pdNode);6252return targetReg;6253}62546255TR::Register *6256J9::Z::TreeEvaluator::generateVectorBinaryToPackedConversion(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)6257{6258TR_ASSERT(op == TR::InstOpCode::VCVD || op == TR::InstOpCode::VCVDG,6259"unexpected opcode in gen vector i2pd\n");62606261TR::Register *vTargetReg = cg->allocateRegister(TR_VRF);6262TR::Node * firstChild = node->getFirstChild();6263TR::Register *sourceReg = cg->evaluate(firstChild);6264bool isUseRegPair = (op == TR::InstOpCode::VCVDG && sourceReg->getRegisterPair());62656266if (isUseRegPair)6267{6268TR::Register *tempReg = cg->allocateRegister();6269generateRSInstruction(cg, TR::InstOpCode::SLLG, node, tempReg, sourceReg->getRegisterPair()->getHighOrder(), 32);6270generateRRInstruction(cg, TR::InstOpCode::LR, node, tempReg, sourceReg->getRegisterPair()->getLowOrder());6271sourceReg = tempReg;6272}62736274uint8_t decimalPrecision = node->getDecimalPrecision();62756276if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())6277{6278decimalPrecision |= 0x80;6279}62806281generateVRIiInstruction(cg, op, node, vTargetReg, sourceReg, decimalPrecision, 0x1);62826283if (isUseRegPair)6284{6285cg->stopUsingRegister(sourceReg);6286}62876288cg->decReferenceCount(firstChild);6289node->setRegister(vTargetReg);6290return vTargetReg;6291}62926293TR::Register *6294J9::Z::TreeEvaluator::pdshlVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator * cg)6295{6296TR::Register * targetReg = NULL;6297TR::Node *firstChild = node->getChild(0);6298TR::Node *shiftAmountNode = node->getNumChildren() > 1 ? node->getSecondChild() : NULL;6299TR_ASSERT(shiftAmountNode, "shift amount node should not be null");6300TR_ASSERT(shiftAmountNode->getOpCode().isLoadConst() && shiftAmountNode->getOpCode().getSize() <= 4,6301"expecting a <= 4 size integral constant PD shift amount\n");63026303// If this is a pdshlOverflow with i2pd and other pd-arithmetic operations under it, these vector instructions will6304// truncate the resulting PD by the amount specified by 'decimalPrecision'. Therefore, we can6305// skip the shift and just return i2pd results.6306bool isSkipShift = node->getOpCodeValue() == TR::pdshlOverflow &&6307(firstChild->getOpCodeValue() == TR::i2pd ||6308firstChild->getOpCodeValue() == TR::l2pd ||6309firstChild->getOpCodeValue() == TR::pdadd ||6310firstChild->getOpCodeValue() == TR::pdsub ||6311firstChild->getOpCodeValue() == TR::pdmul ||6312firstChild->getOpCodeValue() == TR::pddiv ||6313firstChild->getOpCodeValue() == TR::pdrem) &&6314firstChild->getReferenceCount() == 1 &&6315firstChild->getRegister() == NULL;63166317int32_t shiftAmount = (int32_t)shiftAmountNode->get64bitIntegralValue();6318uint8_t decimalPrecision = node->getDecimalPrecision();63196320if (isSkipShift)6321{6322firstChild->setDecimalPrecision(decimalPrecision);6323}63246325TR::Register * sourceReg = cg->evaluate(firstChild);63266327if (isSkipShift)6328{6329// Passthrough. Assign register to node before decrementing refCount of the firstChild6330// to avoid killing this live register6331targetReg = sourceReg;6332}6333else6334{6335TR_ASSERT_FATAL((shiftAmount >= -32 && shiftAmount <= 31), "TR::pdshl/r shift amount (%d )not in range [-32, 31]", shiftAmount);63366337if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())6338{6339decimalPrecision |= 0x80;6340}63416342targetReg = cg->allocateRegister(TR_VRF);6343generateVRIgInstruction(cg, TR::InstOpCode::VSRP, node, targetReg, sourceReg, decimalPrecision, shiftAmount, 0x01);6344}63456346node->setRegister(targetReg);6347cg->decReferenceCount(firstChild);6348cg->decReferenceCount(shiftAmountNode);6349return targetReg;6350}63516352TR::Register *6353J9::Z::TreeEvaluator::pdshrVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator * cg)6354{6355TR::Node *srcNode = node->getChild(0);6356TR::Node *shiftAmountNode = node->getNumChildren() > 1 ? node->getChild(1) : NULL;6357TR_ASSERT(shiftAmountNode != NULL, "pdshrVectorEvaluatorHelper is expecting a shiftAmountNode as child-1\n");6358TR_ASSERT(shiftAmountNode->getOpCode().isLoadConst() && shiftAmountNode->getOpCode().getSize() <= 4,6359"expecting a <= 4 size integral constant PD shift amount\n");63606361int32_t shiftAmount = (int32_t)shiftAmountNode->get32bitIntegralValue();6362TR_ASSERT((shiftAmount >=0 || shiftAmount <= 31),"unexpected TR::pdshr shift amount of %d\n",shiftAmount);63636364//set shift amount and round amount6365shiftAmount *= -1; // right shift is negative6366shiftAmount &= 0x0000007F; // clear off top bits63676368TR::Node *roundAmountNode = node->getChild(2);6369TR_ASSERT( roundAmountNode->getOpCode().isLoadConst(),"excepting pdshr round amount to be a const\n");6370int32_t roundAmount = roundAmountNode->get32bitIntegralValue();6371TR_ASSERT(roundAmount == 0 || roundAmount == 5, "round amount should be 0 or 5 and not %d\n",roundAmount);6372if (roundAmount)6373{6374shiftAmount |= 0x80; //set the round bit in the shift amount. (immediate3 field in VRIg)6375}63766377// Get PD value6378TR::Register * pdValueReg = cg->evaluate(srcNode);6379TR::Register* targetReg = cg->allocateRegister(TR_VRF);6380uint8_t decimalPrecision = node->getDecimalPrecision();63816382if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())6383{6384decimalPrecision |= 0x80;6385}63866387// Perform shift and set condition code on overflows6388generateVRIgInstruction(cg, TR::InstOpCode::VSRP, node, targetReg, pdValueReg, decimalPrecision, shiftAmount, 0x1);63896390node->setRegister(targetReg);63916392cg->decReferenceCount(srcNode);6393cg->decReferenceCount(shiftAmountNode);6394cg->decReferenceCount(roundAmountNode);63956396return targetReg;6397}63986399TR::Register*6400J9::Z::TreeEvaluator::zdstoreiVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)6401{6402if (cg->comp()->getOption(TR_TraceCG))6403traceMsg(cg->comp(), "DAA: Entering zdstoreiVectorEvaluator %d\n", __LINE__);64046405TR::Node* pd2zdNode = node->getSecondChild();6406TR::Node* pdloadiNode = pd2zdNode->getFirstChild();6407TR::Register* pdValueReg = cg->evaluate(pdloadiNode);6408TR_ASSERT_FATAL_WITH_NODE(pdloadiNode, (pdValueReg->getKind() == TR_FPR || pdValueReg->getKind() == TR_VRF),6409"vectorized zdstore is expecting the packed decimal to be in a vector register.");64106411// No need to evaluate the address node of the zdstorei.6412// generateVSIInstruction() API will call separateIndexRegister() to separate the index6413// register by emitting an LA instruction. If there's a need for large displacement adjustment,6414// LAY will be emitted instead.6415TR::MemoryReference * targetMR = TR::MemoryReference::create(cg, node);64166417TR::Register *zonedDecimalHigh = cg->allocateRegister(TR_VRF);6418TR::Register *zonedDecimalLow = cg->allocateRegister(TR_VRF);64196420// 0 we store 1 byte, 15 we store 16 bytes.6421// 15 - lengthToStore = index from which to start.6422uint8_t lengthToStore = pd2zdNode->getDecimalPrecision() - 1;6423uint8_t M3 = 0x8; // Disable sign validation.6424TR::MemoryReference * zonedDecimalMR = targetMR;6425generateVRRkInstruction(cg, TR::InstOpCode::VUPKZL, node, zonedDecimalLow, pdValueReg, M3); // Also copies the sign bit.64266427if (pd2zdNode->getDecimalPrecision() > TR_VECTOR_REGISTER_SIZE)6428{6429generateVRRkInstruction(cg, TR::InstOpCode::VUPKZH, node, zonedDecimalHigh, pdValueReg, M3);6430lengthToStore = pd2zdNode->getDecimalPrecision() - TR_VECTOR_REGISTER_SIZE;6431generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, zonedDecimalHigh, zonedDecimalMR, lengthToStore - 1);6432zonedDecimalMR = generateS390MemoryReference(*targetMR, lengthToStore, cg);6433lengthToStore = TR_VECTOR_REGISTER_SIZE - 1;6434}64356436generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, zonedDecimalLow, zonedDecimalMR, lengthToStore);64376438pd2zdSignFixup(node, targetMR, cg, false);64396440// This would have been decremented in pd2zdVectorEvaluatorHelper6441// but since we skip that evaluator we decrement it here.6442cg->decReferenceCount(pdloadiNode);64436444for (int32_t i = 0; i < node->getNumChildren(); ++i)6445{6446cg->decReferenceCount(node->getChild(i));6447}64486449cg->stopUsingRegister(zonedDecimalHigh);6450cg->stopUsingRegister(zonedDecimalLow);64516452return NULL;6453}645464556456