Path: blob/master/runtime/compiler/p/codegen/J9CodeGenerator.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2000, 2021 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include "j9cfg.h"23#include "codegen/AheadOfTimeCompile.hpp"24#include "codegen/CodeGenerator.hpp"25#include "codegen/CodeGeneratorUtils.hpp"26#include "codegen/CodeGenerator_inlines.hpp"27#include "codegen/GenerateInstructions.hpp"28#include "codegen/Linkage.hpp"29#include "codegen/Linkage_inlines.hpp"30#include "codegen/PPCJNILinkage.hpp"31#include "codegen/PPCPrivateLinkage.hpp"32#include "env/CompilerEnv.hpp"33#include "env/OMRMemory.hpp"34#include "env/VMJ9.h"35#include "env/jittypes.h"36#include "il/Node.hpp"37#include "il/Node_inlines.hpp"38#include "il/TreeTop.hpp"39#include "il/TreeTop_inlines.hpp"40#include "p/codegen/PPCInstruction.hpp"41#include "p/codegen/PPCRecompilation.hpp"42#include "p/codegen/PPCSystemLinkage.hpp"4344extern void TEMPORARY_initJ9PPCTreeEvaluatorTable(TR::CodeGenerator *cg);4546J9::Power::CodeGenerator::CodeGenerator(TR::Compilation *comp) :47J9::CodeGenerator(comp)48{49/**50* Do not add CodeGenerator initialization logic here.51* Use the \c initialize() method instead.52*/53}5455void56J9::Power::CodeGenerator::initialize()57{58self()->J9::CodeGenerator::initialize();5960TR::CodeGenerator *cg = self();61TR::Compilation *comp = cg->comp();62TR_J9VMBase *fej9 = (TR_J9VMBase *) (comp->fe());6364cg->setAheadOfTimeCompile(new (cg->trHeapMemory()) TR::AheadOfTimeCompile(cg));6566if (!comp->getOption(TR_FullSpeedDebug))67cg->setSupportsDirectJNICalls();6869if (!comp->getOption(TR_DisableBDLLVersioning))70{71cg->setSupportsBigDecimalLongLookasideVersioning();72}7374if (cg->getSupportsTM())75{76cg->setSupportsInlineConcurrentLinkedQueue();77}7879cg->setSupportsNewInstanceImplOpt();8081static char *disableMonitorCacheLookup = feGetEnv("TR_disableMonitorCacheLookup");82if (!disableMonitorCacheLookup)83comp->setOption(TR_EnableMonitorCacheLookup);8485cg->setSupportsPartialInlineOfMethodHooks();86cg->setSupportsInliningOfTypeCoersionMethods();8788if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P8) && comp->target().cpu.supportsFeature(OMR_FEATURE_PPC_HAS_VSX) &&89comp->target().is64Bit() && !comp->getOption(TR_DisableFastStringIndexOf) &&90!TR::Compiler->om.canGenerateArraylets())91cg->setSupportsInlineStringIndexOf();9293if (!comp->getOption(TR_DisableReadMonitors))94cg->setSupportsReadOnlyLocks();9596static bool disableTLHPrefetch = (feGetEnv("TR_DisableTLHPrefetch") != NULL);9798// Enable software prefetch of the TLH and configure the TLH prefetching99// geometry.100//101if (!disableTLHPrefetch && comp->getOption(TR_TLHPrefetch) && !comp->compileRelocatableCode())102{103cg->setEnableTLHPrefetching();104}105106//This env-var does 3 things:107// 1. Prevents batch clear in frontend/j9/rossa.cpp108// 2. Prevents all allocations to nonZeroTLH109// 3. Maintains the old semantics zero-init and prefetch.110// The use of this env-var is more complete than the JIT Option then.111static bool disableDualTLH = (feGetEnv("TR_DisableDualTLH") != NULL);112// Enable use of non-zero initialized TLH for object allocations where113// zero-initialization is not required as detected by the optimizer.114//115if (!disableDualTLH && !comp->getOption(TR_DisableDualTLH) && !comp->compileRelocatableCode() && !comp->getOptions()->realTimeGC())116{117cg->setIsDualTLH();118}119120/*121* "Statically" initialize the FE-specific tree evaluator functions.122* This code only needs to execute once per JIT lifetime.123*/124static bool initTreeEvaluatorTable = false;125if (!initTreeEvaluatorTable)126{127TEMPORARY_initJ9PPCTreeEvaluatorTable(cg);128initTreeEvaluatorTable = true;129}130131if (comp->fej9()->hasFixedFrameC_CallingConvention())132cg->setHasFixedFrameC_CallingConvention();133134}135136bool137J9::Power::CodeGenerator::canEmitDataForExternallyRelocatableInstructions()138{139// On Power, data cannot be emitted inside instructions that will be associated with an140// external relocation record (ex. AOT or Remote compiles in OpenJ9). This is because when the141// relocation is applied when a method is loaded, the new data in the instruction is OR'ed in (The reason142// for OR'ing is that sometimes usefule information such as flags and hints can be stored during compilation in these data fields).143// Hence, for the relocation to be applied correctly, we must ensure that the data fields inside the instruction144// initially are zero.145#ifdef J9VM_OPT_JITSERVER146return !self()->comp()->compileRelocatableCode() && !self()->comp()->isOutOfProcessCompilation();147#endif148return !self()->comp()->compileRelocatableCode();149}150151// Get or create the TR::Linkage object that corresponds to the given linkage152// convention.153// Even though this method is common, its implementation is machine-specific.154//155TR::Linkage *156J9::Power::CodeGenerator::createLinkage(TR_LinkageConventions lc)157{158TR::Linkage *linkage;159switch (lc)160{161case TR_Private:162linkage = new (self()->trHeapMemory()) J9::Power::PrivateLinkage(self());163break;164case TR_System:165linkage = new (self()->trHeapMemory()) TR::PPCSystemLinkage(self());166break;167case TR_CHelper:168case TR_Helper:169linkage = new (self()->trHeapMemory()) J9::Power::HelperLinkage(self(), lc);170break;171case TR_J9JNILinkage:172linkage = new (self()->trHeapMemory()) J9::Power::JNILinkage(self());173break;174default :175linkage = new (self()->trHeapMemory()) TR::PPCSystemLinkage(self());176TR_ASSERT(false, "Unexpected linkage convention");177}178179self()->setLinkage(lc, linkage);180return linkage;181}182183TR::Recompilation *184J9::Power::CodeGenerator::allocateRecompilationInfo()185{186return TR_PPCRecompilation::allocate(self()->comp());187}188189void190J9::Power::CodeGenerator::generateBinaryEncodingPrologue(191TR_PPCBinaryEncodingData *data)192{193TR::Compilation *comp = self()->comp();194TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());195TR::Instruction *tempInstruction;196197data->recomp = comp->getRecompilationInfo();198data->cursorInstruction = self()->getFirstInstruction();199data->preProcInstruction = data->cursorInstruction;200data->jitTojitStart = data->cursorInstruction->getNext();201202self()->getLinkage()->loadUpArguments(data->cursorInstruction);203204if (data->recomp != NULL)205{206data->recomp->generatePrePrologue();207}208else209{210if (comp->getOption(TR_FullSpeedDebug) || comp->getOption(TR_SupportSwitchToInterpreter))211{212self()->generateSwitchToInterpreterPrePrologue(NULL, comp->getStartTree()->getNode());213}214else215{216TR::ResolvedMethodSymbol *methodSymbol = comp->getMethodSymbol();217/* save the original JNI native address if a JNI thunk is generated */218/* thunk is not recompilable, nor does it support FSD */219if (methodSymbol->isJNI())220{221uintptr_t JNIMethodAddress = (uintptr_t)methodSymbol->getResolvedMethod()->startAddressForJNIMethod(comp);222TR::Node *firstNode = comp->getStartTree()->getNode();223if (comp->target().is64Bit())224{225int32_t highBits = (int32_t)(JNIMethodAddress>>32), lowBits = (int32_t)JNIMethodAddress;226TR::Instruction *cursor = new (self()->trHeapMemory()) TR::PPCImmInstruction(TR::InstOpCode::dd, firstNode,227comp->target().cpu.isBigEndian()?highBits:lowBits, NULL, self());228generateImmInstruction(self(), TR::InstOpCode::dd, firstNode,229comp->target().cpu.isBigEndian()?lowBits:highBits, cursor);230}231else232{233new (self()->trHeapMemory()) TR::PPCImmInstruction(TR::InstOpCode::dd, firstNode, (int32_t)JNIMethodAddress, NULL, self());234}235}236}237}238239data->cursorInstruction = self()->getFirstInstruction();240241while (data->cursorInstruction && data->cursorInstruction->getOpCodeValue() != TR::InstOpCode::proc)242{243data->estimate = data->cursorInstruction->estimateBinaryLength(data->estimate);244data->cursorInstruction = data->cursorInstruction->getNext();245}246247if (self()->supportsJitMethodEntryAlignment())248{249self()->setPreJitMethodEntrySize(data->estimate);250data->estimate += (self()->getJitMethodEntryAlignmentBoundary() - 1);251}252253tempInstruction = data->cursorInstruction;254if ((data->recomp!=NULL) && (!data->recomp->useSampling()))255{256tempInstruction = data->recomp->generatePrologue(tempInstruction);257}258259self()->getLinkage()->createPrologue(tempInstruction);260261}262263264void265J9::Power::CodeGenerator::lowerTreeIfNeeded(266TR::Node *node,267int32_t childNumberOfNode,268TR::Node *parent,269TR::TreeTop *tt)270{271J9::CodeGenerator::lowerTreeIfNeeded(node, childNumberOfNode, parent, tt);272273if ((node->getOpCode().isLeftShift() ||274node->getOpCode().isRightShift() || node->getOpCode().isRotate()) &&275self()->needsNormalizationBeforeShifts() &&276!node->isNormalizedShift())277{278TR::Node *second = node->getSecondChild();279int32_t normalizationAmount;280if (node->getType().isInt64())281normalizationAmount = 63;282else283normalizationAmount = 31;284285// Some platforms like IA32 obey Java semantics for shifts even if the286// shift amount is greater than 31 or 63. However other platforms like PPC need287// to normalize the shift amount to range (0, 31) or (0, 63) before shifting in order288// to obey Java semantics. This can be captured in the IL and commoning/hoisting289// can be done (look at Compressor.compress).290//291if ( (second->getOpCodeValue() != TR::iconst) &&292((second->getOpCodeValue() != TR::iand) ||293(second->getSecondChild()->getOpCodeValue() != TR::iconst) ||294(second->getSecondChild()->getInt() != normalizationAmount)))295{296// Not normalized yet297//298TR::Node * normalizedNode = TR::Node::create(TR::iand, 2, second, TR::Node::create(second, TR::iconst, 0, normalizationAmount));299second->recursivelyDecReferenceCount();300node->setAndIncChild(1, normalizedNode);301node->setNormalizedShift(true);302}303}304305}306307308bool J9::Power::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod method)309{310TR::Compilation *comp = self()->comp();311312if (self()->isMethodInAtomicLongGroup(method))313{314return true;315}316317#ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION318if (self()->suppressInliningOfCryptoMethod(method))319{320return true;321}322#endif323324if ((method==TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet) ||325(method==TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd) ||326(method==TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement) ||327(method==TR::java_util_concurrent_atomic_AtomicInteger_getAndDecrement) ||328(method==TR::java_util_concurrent_atomic_AtomicInteger_getAndSet) ||329(method==TR::java_util_concurrent_atomic_AtomicInteger_addAndGet) ||330(method==TR::java_util_concurrent_atomic_AtomicInteger_decrementAndGet) ||331(method==TR::java_util_concurrent_atomic_AtomicInteger_incrementAndGet))332{333return true;334}335336if (method == TR::java_lang_Math_fma_D ||337method == TR::java_lang_Math_fma_F ||338method == TR::java_lang_StrictMath_fma_D ||339method == TR::java_lang_StrictMath_fma_F)340{341return true;342}343344// Transactional Memory345if (self()->getSupportsInlineConcurrentLinkedQueue())346{347if (method == TR::java_util_concurrent_ConcurrentLinkedQueue_tmOffer ||348method == TR::java_util_concurrent_ConcurrentLinkedQueue_tmPoll ||349method == TR::java_util_concurrent_ConcurrentLinkedQueue_tmEnabled)350{351return true;352}353}354355return false;356}357358359bool360J9::Power::CodeGenerator::enableAESInHardwareTransformations()361{362TR::Compilation *comp = self()->comp();363if ( (comp->target().cpu.getPPCSupportsAES() || (comp->target().cpu.supportsFeature(OMR_FEATURE_PPC_HAS_ALTIVEC) && comp->target().cpu.supportsFeature(OMR_FEATURE_PPC_HAS_VSX))) &&364!comp->getOption(TR_DisableAESInHardware))365return true;366else367return false;368}369370void371J9::Power::CodeGenerator::insertPrefetchIfNecessary(TR::Node *node, TR::Register *targetRegister)372{373TR::Compilation *comp = self()->comp();374static bool disableHotFieldPrefetch = (feGetEnv("TR_DisableHotFieldPrefetch") != NULL);375static bool disableHotFieldNextElementPrefetch = (feGetEnv("TR_DisableHotFieldNextElementPrefetch") != NULL);376static bool disableIteratorPrefetch = (feGetEnv("TR_DisableIteratorPrefetch") != NULL);377static bool disableStringObjPrefetch = (feGetEnv("TR_DisableStringObjPrefetch") != NULL);378bool optDisabled = false;379380if ((node->getOpCodeValue() == TR::aloadi && !comp->target().is64Bit()) ||381(comp->target().is64Bit() &&382comp->useCompressedPointers() &&383node->getOpCodeValue() == TR::l2a &&384comp->getMethodHotness() >= scorching &&385TR::Compiler->om.compressedReferenceShiftOffset() == 0 &&386comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P6))387)388{389int32_t prefetchOffset = comp->findPrefetchInfo(node);390TR::Node *firstChild = node->getFirstChild();391392if (!disableHotFieldPrefetch && prefetchOffset >= 0) // Prefetch based on hot field information393{394bool canSkipNullChk = false;395static bool disableDelayPrefetch = (feGetEnv("TR_DisableDelayPrefetch") != NULL);396TR::LabelSymbol *endCtrlFlowLabel = generateLabelSymbol(self());397TR::Node *topNode = self()->getCurrentEvaluationTreeTop()->getNode();398// Search the current block for a check for NULL399TR::TreeTop *tt = self()->getCurrentEvaluationTreeTop()->getNextTreeTop();400TR::Node *checkNode = NULL;401402while (!disableDelayPrefetch && tt && (tt->getNode()->getOpCodeValue() != TR::BBEnd))403{404checkNode = tt->getNode();405if (checkNode->getOpCodeValue() == TR::ifacmpeq &&406checkNode->getFirstChild() == node &&407checkNode->getSecondChild()->getDataType() == TR::Address &&408checkNode->getSecondChild()->isZero())409{410canSkipNullChk = true;411}412tt = tt->getNextTreeTop();413}414415if (!canSkipNullChk)416{417TR::Register *condReg = self()->allocateRegister(TR_CCR);418TR::LabelSymbol *startCtrlFlowLabel = generateLabelSymbol(self());419startCtrlFlowLabel->setStartInternalControlFlow();420endCtrlFlowLabel->setEndInternalControlFlow();421generateLabelInstruction(self(), TR::InstOpCode::label, node, startCtrlFlowLabel);422423// check for null424generateTrg1Src1ImmInstruction(self(), TR::InstOpCode::cmpli4, node, condReg, targetRegister, NULLVALUE);425generateConditionalBranchInstruction(self(), TR::InstOpCode::beql, node, endCtrlFlowLabel, condReg);426427TR::Register *tempReg = self()->allocateRegister();428TR::RegisterDependencyConditions *deps = new (self()->trHeapMemory()) TR::RegisterDependencyConditions(1, 2, self()->trMemory());429deps->addPostCondition(tempReg, TR::RealRegister::NoReg);430TR::addDependency(deps, condReg, TR::RealRegister::NoReg, TR_CCR, self());431432if (comp->target().is64Bit() && !comp->useCompressedPointers())433{434TR::MemoryReference *tempMR = TR::MemoryReference::createWithDisplacement(self(), targetRegister, prefetchOffset, 8);435generateTrg1MemInstruction(self(), TR::InstOpCode::ld, node, tempReg, tempMR);436}437else438{439TR::MemoryReference *tempMR = TR::MemoryReference::createWithDisplacement(self(), targetRegister, prefetchOffset, 4);440generateTrg1MemInstruction(self(), TR::InstOpCode::lwz, node, tempReg, tempMR);441}442443TR::MemoryReference *targetMR = TR::MemoryReference::createWithDisplacement(self(), tempReg, (int32_t)0, 4);444targetMR->forceIndexedForm(node, self());445generateMemInstruction(self(), TR::InstOpCode::dcbt, node, targetMR);446447self()->stopUsingRegister(tempReg);448self()->stopUsingRegister(condReg);449}450else451{452// Delay the dcbt to after the null check and fall through to the next block's treetop.453TR::TreeTop *useTree = tt->getNextTreeTop();454TR_ASSERT(useTree->getNode()->getOpCodeValue() == TR::BBStart, "Expecting a BBStart on the fall through\n");455TR::Node *useNode = useTree->getNode();456TR::Block *bb = useNode->getBlock();457if (bb->isExtensionOfPreviousBlock()) // Survived the null check458{459TR_PrefetchInfo *pf = new (self()->trHeapMemory())TR_PrefetchInfo(self()->getCurrentEvaluationTreeTop(), useTree, node, useNode, prefetchOffset, false);460comp->removeExtraPrefetchInfo(useNode);461comp->getExtraPrefetchInfo().push_front(pf);462}463}464465// Do a prefetch on the next element of the array466// if the pointer came from an array. Seems to give no gain at all, disabled until later467TR::Register *pointerReg = NULL;468bool fromRegLoad = false;469if (!disableHotFieldNextElementPrefetch)470{471// 32bit472if (comp->target().is32Bit())473{474if (!(firstChild->getOpCodeValue() == TR::aiadd &&475firstChild->getFirstChild() &&476firstChild->isInternalPointer()) &&477!(firstChild->getOpCodeValue() == TR::aRegLoad &&478firstChild->getSymbolReference()->getSymbol()->isInternalPointer()))479{480optDisabled = true;481}482else483{484fromRegLoad = (firstChild->getOpCodeValue() == TR::aRegLoad);485pointerReg = fromRegLoad ? firstChild->getRegister() : self()->allocateRegister();486if (!fromRegLoad)487{488// Case for aiadd, there should be 2 children489TR::Node *baseObject = firstChild->getFirstChild();490TR::Register *baseReg = (baseObject) ? baseObject->getRegister() : NULL;491TR::Node *indexObject = firstChild->getSecondChild();492TR::Register *indexReg = (indexObject) ? indexObject->getRegister() : NULL;493// If the index is constant we just grab it494if (indexObject->getOpCode().isLoadConst())495{496int32_t len = indexObject->getInt();497if (len >= LOWER_IMMED && len <= UPPER_IMMED)498generateTrg1Src1ImmInstruction(self(), TR::InstOpCode::addi, node, pointerReg, baseReg, len);499else500{501indexReg = self()->allocateRegister();502loadConstant(self(), node, len, indexReg);503generateTrg1Src2Instruction(self(), TR::InstOpCode::add, node, pointerReg, baseReg, indexReg);504self()->stopUsingRegister(indexReg);505}506}507else508generateTrg1Src2Instruction(self(), TR::InstOpCode::add, node, pointerReg, baseReg, indexReg);509}510}511}512// 64bit CR513else if (comp->target().is64Bit() && comp->useCompressedPointers())514{515if (!(firstChild->getOpCodeValue() == TR::iu2l &&516firstChild->getFirstChild() &&517firstChild->getFirstChild()->getOpCodeValue() == TR::iloadi &&518firstChild->getFirstChild()->getNumChildren() == 1))519{520optDisabled = true;521}522else523{524fromRegLoad = true;525pointerReg = firstChild->getFirstChild()->getFirstChild()->getRegister();526}527}528// 64bit - TODO529else530optDisabled = true;531}532533if (!optDisabled)534{535TR::Register *condReg = self()->allocateRegister(TR_CCR);536TR::Register *tempReg = self()->allocateRegister();537538// 32 bit only.... For -Xgc:noconcurrentmark, heapBase will be 0 and heapTop will be ok539// Otherwise, for a 2.25Gb or bigger heap, heapTop will be 0. Relying on correct JIT initialization540uintptr_t heapTop = comp->getOptions()->getHeapTop() ? comp->getOptions()->getHeapTop() : 0xFFFFFFFF;541542if (pointerReg && (heapTop > comp->getOptions()->getHeapBase())) // Check for gencon543{544TR::Register *temp3Reg = self()->allocateRegister();545static bool prefetch2Ahead = (feGetEnv("TR_Prefetch2Ahead") != NULL);546if (comp->target().is64Bit() && !comp->useCompressedPointers())547{548if (!prefetch2Ahead)549generateTrg1MemInstruction(self(), TR::InstOpCode::ld, node, temp3Reg, TR::MemoryReference::createWithDisplacement(self(), pointerReg, (int32_t)TR::Compiler->om.sizeofReferenceField(), 8));550else551generateTrg1MemInstruction(self(), TR::InstOpCode::ld, node, temp3Reg, TR::MemoryReference::createWithDisplacement(self(), pointerReg, (int32_t)(TR::Compiler->om.sizeofReferenceField()*2), 8));552}553else554{555if (!prefetch2Ahead)556generateTrg1MemInstruction(self(), TR::InstOpCode::lwz, node, temp3Reg, TR::MemoryReference::createWithDisplacement(self(), pointerReg, (int32_t)TR::Compiler->om.sizeofReferenceField(), 4));557else558generateTrg1MemInstruction(self(), TR::InstOpCode::lwz, node, temp3Reg, TR::MemoryReference::createWithDisplacement(self(), pointerReg, (int32_t)(TR::Compiler->om.sizeofReferenceField()*2), 4));559}560561if (comp->getOptions()->getHeapBase() != NULL)562{563loadAddressConstant(self(), comp->compileRelocatableCode(), node, (intptr_t)(comp->getOptions()->getHeapBase()), tempReg);564generateTrg1Src2Instruction(self(), TR::InstOpCode::cmpl4, node, condReg, temp3Reg, tempReg);565generateConditionalBranchInstruction(self(), TR::InstOpCode::blt, node, endCtrlFlowLabel, condReg);566}567if (heapTop != 0xFFFFFFFF)568{569loadAddressConstant(self(), comp->compileRelocatableCode(), node, (intptr_t)(heapTop-prefetchOffset), tempReg);570generateTrg1Src2Instruction(self(), TR::InstOpCode::cmpl4, node, condReg, temp3Reg, tempReg);571generateConditionalBranchInstruction(self(), TR::InstOpCode::bgt, node, endCtrlFlowLabel, condReg);572}573TR::MemoryReference *targetMR = TR::MemoryReference::createWithDisplacement(self(), temp3Reg, (int32_t)0, 4);574targetMR->forceIndexedForm(node, self());575generateMemInstruction(self(), TR::InstOpCode::dcbt, node, targetMR); // use dcbt for prefetch next element576577self()->stopUsingRegister(temp3Reg);578}579580if (!fromRegLoad)581self()->stopUsingRegister(pointerReg);582self()->stopUsingRegister(tempReg);583self()->stopUsingRegister(condReg);584}585generateLabelInstruction(self(), TR::InstOpCode::label, node, endCtrlFlowLabel);586}587588// Try prefetch all string objects, no apparent gain. Disabled for now.589if (!disableStringObjPrefetch && 0 &&590node->getSymbolReference() &&591!node->getSymbolReference()->isUnresolved() &&592(node->getSymbolReference()->getSymbol()->getKind() == TR::Symbol::IsShadow) &&593(node->getSymbolReference()->getCPIndex() >= 0))594{595int32_t len;596const char *fieldName = node->getSymbolReference()->getOwningMethod(comp)->fieldSignatureChars(597node->getSymbolReference()->getCPIndex(), len);598599if (fieldName && strstr(fieldName, "Ljava/lang/String;"))600{601TR::MemoryReference *targetMR = TR::MemoryReference::createWithDisplacement(self(), targetRegister, (int32_t)0, 4);602targetMR->forceIndexedForm(node, self());603generateMemInstruction(self(), TR::InstOpCode::dcbt, node, targetMR);604}605}606}607608if ((node->getOpCodeValue() == TR::aloadi && !comp->target().is64Bit()) ||609(comp->target().is64Bit() &&610comp->useCompressedPointers() &&611(node->getOpCodeValue() == TR::iloadi || node->getOpCodeValue() == TR::irdbari) &&612comp->getMethodHotness() >= hot))613{614TR::Node *firstChild = node->getFirstChild();615optDisabled = disableIteratorPrefetch;616if (!disableIteratorPrefetch)617{618// 32bit619if (comp->target().is32Bit())620{621if (!(firstChild &&622firstChild->getOpCodeValue() == TR::aiadd &&623firstChild->isInternalPointer() &&624(strstr(comp->fe()->sampleSignature(node->getOwningMethod(), 0, 0, self()->trMemory()),"java/util/TreeMap$UnboundedValueIterator.next()")625|| strstr(comp->fe()->sampleSignature(node->getOwningMethod(), 0, 0, self()->trMemory()),"java/util/ArrayList$Itr.next()"))626))627{628optDisabled = true;629}630}631// 64bit cr632else if (comp->target().is64Bit() && comp->useCompressedPointers())633{634if (!(firstChild &&635firstChild->getOpCodeValue() == TR::aladd &&636firstChild->isInternalPointer() &&637(strstr(comp->fe()->sampleSignature(node->getOwningMethod(), 0, 0, self()->trMemory()),"java/util/TreeMap$UnboundedValueIterator.next()")638|| strstr(comp->fe()->sampleSignature(node->getOwningMethod(), 0, 0, self()->trMemory()),"java/util/ArrayList$Itr.next()"))639))640{641optDisabled = true;642}643}644}645646// The use of this prefetching can cause a SEGV when the object array is allocated at the every end of the heap.647// The GC will protected against the SEGV by adding a "tail-padding" page, but only when -XAggressive is enabled!648if (!optDisabled && comp->getOption(TR_AggressiveOpts))649{650int32_t loopSize = 0;651int32_t prefetchElementStride = 1;652TR::Block *b = self()->getCurrentEvaluationBlock();653TR_BlockStructure *blockStructure = b->getStructureOf();654if (blockStructure)655{656TR_Structure *containingLoop = blockStructure->getContainingLoop();657if (containingLoop)658{659TR_ScratchList<TR::Block> blocksInLoop(comp->trMemory());660661containingLoop->getBlocks(&blocksInLoop);662ListIterator<TR::Block> blocksIt(&blocksInLoop);663TR::Block *nextBlock;664for (nextBlock = blocksIt.getCurrent(); nextBlock; nextBlock=blocksIt.getNext())665{666loopSize += nextBlock->getNumberOfRealTreeTops();667}668}669}670671if (comp->useCompressedPointers())672{673prefetchElementStride = 2;674}675else676{677if (loopSize < 200) //comp->useCompressedPointers() is false && loopSize < 200.678{679prefetchElementStride = 4;680}681else if (loopSize < 300) //comp->useCompressedPointers() is false && loopsize >=200 && loopsize < 300.682{683prefetchElementStride = 2;684}685//If comp->useCompressedPointers() is false and loopsize >= 300, prefetchElementStride does not get changed.686}687688// Look at the aiadd's children689TR::Node *baseObject = firstChild->getFirstChild();690TR::Register *baseReg = (baseObject) ? baseObject->getRegister() : NULL;691TR::Node *indexObject = firstChild->getSecondChild();692TR::Register *indexReg = (indexObject) ? indexObject->getRegister() : NULL;693if (baseReg && indexReg && loopSize > 0)694{695TR::Register *tempReg = self()->allocateRegister();696generateTrg1Src1ImmInstruction(self(), TR::InstOpCode::addi, node, tempReg, indexReg, prefetchElementStride * TR::Compiler->om.sizeofReferenceField());697if (comp->target().is64Bit() && !comp->useCompressedPointers())698{699TR::MemoryReference *targetMR = TR::MemoryReference::createWithIndexReg(self(), baseReg, tempReg, 8);700generateTrg1MemInstruction(self(), TR::InstOpCode::ld, node, tempReg, targetMR);701}702else703{704TR::MemoryReference *targetMR = TR::MemoryReference::createWithIndexReg(self(), baseReg, tempReg, 4);705generateTrg1MemInstruction(self(), TR::InstOpCode::lwz, node, tempReg, targetMR);706}707708if (comp->useCompressedPointers())709{710generateShiftLeftImmediateLong(self(), node, tempReg, tempReg, TR::Compiler->om.compressedReferenceShiftOffset());711}712TR::MemoryReference *target2MR = TR::MemoryReference::createWithDisplacement(self(), tempReg, 0, 4);713target2MR->forceIndexedForm(node, self());714generateMemInstruction(self(), TR::InstOpCode::dcbt, node, target2MR);715self()->stopUsingRegister(tempReg);716}717}718}719else if (node->getOpCodeValue() == TR::awrtbari &&720comp->getMethodHotness() >= scorching &&721comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P6) &&722(comp->target().is32Bit() ||723(comp->target().is64Bit() &&724comp->useCompressedPointers() &&725TR::Compiler->om.compressedReferenceShiftOffset() == 0726)727)728)729{730// Take the source register of the store and apply on the prefetchOffset right away731int32_t prefetchOffset = comp->findPrefetchInfo(node);732if (prefetchOffset >= 0)733{734TR::MemoryReference *targetMR = TR::MemoryReference::createWithDisplacement(self(), targetRegister, prefetchOffset, TR::Compiler->om.sizeofReferenceAddress());735targetMR->forceIndexedForm(node, self());736generateMemInstruction(self(), TR::InstOpCode::dcbt, node, targetMR);737}738}739}740741TR::Linkage *742J9::Power::CodeGenerator::deriveCallingLinkage(TR::Node *node, bool isIndirect)743{744TR::SymbolReference *symRef = node->getSymbolReference();745TR::MethodSymbol *callee = symRef->getSymbol()->castToMethodSymbol();746TR_J9VMBase *fej9 = (TR_J9VMBase *)(self()->fe());747748static char * disableDirectNativeCall = feGetEnv("TR_DisableDirectNativeCall");749750// Clean-up: the fej9 checking seemed unnecessary751if (!isIndirect && callee->isJNI() && fej9->canRelocateDirectNativeCalls() &&752(node->isPreparedForDirectJNI() ||753(disableDirectNativeCall == NULL && callee->getResolvedMethodSymbol()->canDirectNativeCall())))754return self()->getLinkage(TR_J9JNILinkage);755756return self()->getLinkage(callee->getLinkageConvention());757}758759760