Path: blob/master/runtime/compiler/x/codegen/J9CodeGenerator.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2000, 2021 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include "codegen/AheadOfTimeCompile.hpp"23#include "codegen/CodeGenerator.hpp"24#include "codegen/CodeGenerator_inlines.hpp"25#include "codegen/Machine.hpp"26#include "codegen/Linkage.hpp"27#include "codegen/Linkage_inlines.hpp"28#include "compile/Compilation.hpp"29#include "control/Recompilation.hpp"30#include "control/RecompilationInfo.hpp"31#include "env/CompilerEnv.hpp"32#include "env/VMJ9.h"33#include "env/jittypes.h"34#include "il/Node.hpp"35#include "il/Node_inlines.hpp"36#include "il/TreeTop.hpp"37#include "il/TreeTop_inlines.hpp"38#include "runtime/CodeCache.hpp"39#include "runtime/CodeCacheConfig.hpp"40#include "runtime/CodeCacheExceptions.hpp"41#include "runtime/CodeCacheManager.hpp"42#include "runtime/CodeCacheTypes.hpp"43#include "runtime/J9Runtime.hpp"44#include "x/codegen/CallSnippet.hpp"45#include "x/codegen/X86Recompilation.hpp"46#include "x/codegen/FPTreeEvaluator.hpp"47#include "x/codegen/X86Instruction.hpp"4849extern void TEMPORARY_initJ9X86TreeEvaluatorTable(TR::CodeGenerator *cg);5051J9::X86::CodeGenerator::CodeGenerator(TR::Compilation *comp) :52J9::CodeGenerator(comp),53_stackFramePaddingSizeInBytes(0)54{55/**56* Do not add CodeGenerator initialization logic here.57* Use the \c initialize() method instead.58*/59}6061void62J9::X86::CodeGenerator::initialize()63{64self()->J9::CodeGenerator::initialize();6566TR::CodeGenerator *cg = self();67TR::Compilation *comp = cg->comp();68TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());69TR::ResolvedMethodSymbol *methodSymbol = comp->getJittedMethodSymbol();70TR_ResolvedMethod * jittedMethod = methodSymbol->getResolvedMethod();7172cg->setAheadOfTimeCompile(new (cg->trHeapMemory()) TR::AheadOfTimeCompile(cg));7374if (!TR::Compiler->om.canGenerateArraylets())75{76cg->setSupportsReferenceArrayCopy();77cg->setSupportsInlineStringLatin1Inflate();78}7980if (comp->requiresSpineChecks())81{82// Spine check code doesn't officially support codegen register rematerialization83// yet. Better spill placement interferes with tracking live spills.84//85cg->setUseNonLinearRegisterAssigner();86cg->resetEnableRematerialisation();87cg->resetEnableBetterSpillPlacements();88}8990static char *disableMonitorCacheLookup = feGetEnv("TR_disableMonitorCacheLookup");91if (!disableMonitorCacheLookup)92{93comp->setOption(TR_EnableMonitorCacheLookup);94}9596cg->setSupportsPartialInlineOfMethodHooks();97cg->setSupportsInliningOfTypeCoersionMethods();98cg->setSupportsNewInstanceImplOpt();99100TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) == cg->getX86ProcessorInfo().supportsSSE4_1(), "supportsSSE4_1() failed\n");101TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSSE3) == cg->getX86ProcessorInfo().supportsSSSE3(), "supportsSSSE3() failed\n");102103if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) &&104!comp->getOption(TR_DisableSIMDStringCaseConv) &&105!TR::Compiler->om.canGenerateArraylets())106cg->setSupportsInlineStringCaseConversion();107108if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSSE3) &&109!comp->getOption(TR_DisableFastStringIndexOf) &&110!TR::Compiler->om.canGenerateArraylets())111{112cg->setSupportsInlineStringIndexOf();113}114115if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) &&116!comp->getOption(TR_DisableSIMDStringHashCode) &&117!TR::Compiler->om.canGenerateArraylets())118{119cg->setSupportsInlineStringHashCode();120}121122if (comp->generateArraylets() && !comp->getOptions()->realTimeGC())123{124cg->setSupportsStackAllocationOfArraylets();125}126127if (!comp->getOption(TR_FullSpeedDebug))128cg->setSupportsDirectJNICalls();129130if (!comp->getOption(TR_DisableBDLLVersioning))131{132cg->setSupportsBigDecimalLongLookasideVersioning();133cg->setSupportsBDLLHardwareOverflowCheck();134}135136// Disable fast gencon barriers for AOT compiles because relocations on137// the inlined heap addresses are not available (yet).138//139if (!fej9->supportsEmbeddedHeapBounds())140{141comp->setOption(TR_DisableWriteBarriersRangeCheck);142}143144// Enable copy propagation of floats.145//146cg->setSupportsJavaFloatSemantics();147148/*149* "Statically" initialize the FE-specific tree evaluator functions.150* This code only needs to execute once per JIT lifetime.151*/152static bool initTreeEvaluatorTable = false;153if (!initTreeEvaluatorTable)154{155TEMPORARY_initJ9X86TreeEvaluatorTable(cg);156initTreeEvaluatorTable = true;157}158159// Set return type info here so that we always set it in case the return is optimized out160TR_ReturnInfo returnInfo;161switch (jittedMethod->returnType())162{163case TR::NoType:164returnInfo = TR_VoidReturn;165break;166case TR::Int8:167case TR::Int16:168case TR::Int32:169returnInfo = TR_IntReturn;170break;171case TR::Int64:172returnInfo = TR_LongReturn;173break;174case TR::Address:175returnInfo = comp->target().is64Bit() ? TR_ObjectReturn : TR_IntReturn;176break;177case TR::Float:178returnInfo = TR_FloatXMMReturn;179break;180case TR::Double:181returnInfo = TR_DoubleXMMReturn;182break;183}184185comp->setReturnInfo(returnInfo);186}187188TR::Recompilation *189J9::X86::CodeGenerator::allocateRecompilationInfo()190{191return TR_X86Recompilation::allocate(self()->comp());192}193194void195J9::X86::CodeGenerator::beginInstructionSelection()196{197TR::Compilation *comp = self()->comp();198_returnTypeInfoInstruction = NULL;199TR::ResolvedMethodSymbol *methodSymbol = comp->getJittedMethodSymbol();200TR::Recompilation *recompilation = comp->getRecompilationInfo();201TR::Node *startNode = comp->getStartTree()->getNode();202203if (recompilation && recompilation->generatePrePrologue() != NULL)204{205// Return type info will have been generated by recompilation info206//207if (methodSymbol->getLinkageConvention() == TR_Private)208_returnTypeInfoInstruction = (TR::X86ImmInstruction*)self()->getAppendInstruction();209210if (methodSymbol->getLinkageConvention() == TR_System)211_returnTypeInfoInstruction = (TR::X86ImmInstruction*)self()->getAppendInstruction();212}213else if (comp->getOption(TR_FullSpeedDebug) || comp->getOption(TR_SupportSwitchToInterpreter))214{215int32_t alignmentMargin = comp->target().is64Bit()? 2 : 0; // # bytes between the alignment instruction and the address that needs to be aligned216if (methodSymbol->getLinkageConvention() == TR_Private)217alignmentMargin += 4; // The linkageInfo word218219// Make sure the startPC is at least 8-byte aligned. The VM needs to be220// able to low-tag the pointer, and also for code patching on IA32, this221// is how we make sure the first instruction doesn't cross a patching boundary (see 175746).222//223int32_t alignmentBoundary = 8;224225TR::Instruction *cursor = self()->generateSwitchToInterpreterPrePrologue(NULL, alignmentBoundary, alignmentMargin);226if (comp->target().is64Bit())227{228// A copy of the first two bytes of the method, in case we need to un-patch them229//230new (self()->trHeapMemory()) TR::X86ImmInstruction(cursor, TR::InstOpCode::DWImm2, 0xcccc, self());231}232}233else if (methodSymbol->isJNI())234{235236intptr_t methodAddress = (intptr_t)methodSymbol->getResolvedMethod()->startAddressForJNIMethod(comp);237238if (comp->target().is64Bit())239new (self()->trHeapMemory()) TR::AMD64Imm64Instruction ((TR::Instruction *)NULL, TR::InstOpCode::DQImm64, methodAddress, self());240else241new (self()->trHeapMemory()) TR::X86ImmInstruction ((TR::Instruction *)NULL, TR::InstOpCode::DDImm4, methodAddress, self());242}243244if (methodSymbol->getLinkageConvention() == TR_Private && !_returnTypeInfoInstruction)245{246// linkageInfo word247if (self()->getAppendInstruction())248_returnTypeInfoInstruction = generateImmInstruction(TR::InstOpCode::DDImm4, startNode, 0, self());249else250_returnTypeInfoInstruction = new (self()->trHeapMemory()) TR::X86ImmInstruction((TR::Instruction *)NULL, TR::InstOpCode::DDImm4, 0, self());251}252253if (methodSymbol->getLinkageConvention() == TR_System && !_returnTypeInfoInstruction)254{255// linkageInfo word256if (self()->getAppendInstruction())257_returnTypeInfoInstruction = generateImmInstruction(TR::InstOpCode::DDImm4, startNode, 0, self());258else259_returnTypeInfoInstruction = new (self()->trHeapMemory()) TR::X86ImmInstruction((TR::Instruction *)NULL, TR::InstOpCode::DDImm4, 0, self());260}261262TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)1, self());263if (_linkageProperties->getMethodMetaDataRegister() != TR::RealRegister::NoReg)264{265deps->addPostCondition(self()->getVMThreadRegister(),266(TR::RealRegister::RegNum)self()->getVMThreadRegister()->getAssociation(), self());267}268deps->stopAddingPostConditions();269270if (self()->getAppendInstruction())271generateInstruction(TR::InstOpCode::proc, startNode, deps, self());272else273new (self()->trHeapMemory()) TR::Instruction(deps, TR::InstOpCode::proc, (TR::Instruction *)NULL, self());274275// Set the default FPCW to single precision mode if we are allowed to.276//277if (self()->enableSinglePrecisionMethods() && comp->getJittedMethodSymbol()->usesSinglePrecisionMode())278{279auto cds = self()->findOrCreate2ByteConstant(startNode, SINGLE_PRECISION_ROUND_TO_NEAREST);280generateMemInstruction(TR::InstOpCode::LDCWMem, startNode, generateX86MemoryReference(cds, self()), self());281}282}283284void285J9::X86::CodeGenerator::endInstructionSelection()286{287TR::Compilation *comp = self()->comp();288if (_returnTypeInfoInstruction != NULL)289{290TR_ReturnInfo returnInfo = comp->getReturnInfo();291292// Note: this will get clobbered again in code generation on AMD64293_returnTypeInfoInstruction->setSourceImmediate(returnInfo);294}295296// Reset the FPCW in the dummy finally block.297//298if (self()->enableSinglePrecisionMethods() &&299comp->getJittedMethodSymbol()->usesSinglePrecisionMode())300{301TR_ASSERT(self()->getLastCatchAppendInstruction(),302"endInstructionSelection() ==> Could not find the dummy finally block!\n");303304auto cds = self()->findOrCreate2ByteConstant(self()->getLastCatchAppendInstruction()->getNode(), DOUBLE_PRECISION_ROUND_TO_NEAREST);305generateMemInstruction(self()->getLastCatchAppendInstruction(), TR::InstOpCode::LDCWMem, generateX86MemoryReference(cds, self()), self());306}307}308309TR::Instruction *310J9::X86::CodeGenerator::generateSwitchToInterpreterPrePrologue(311TR::Instruction *prev,312uint8_t alignment,313uint8_t alignmentMargin)314{315TR::Compilation *comp = self()->comp();316TR::Register *ediRegister = self()->allocateRegister();317TR::ResolvedMethodSymbol *methodSymbol = comp->getJittedMethodSymbol();318intptr_t feMethod = (intptr_t)methodSymbol->getResolvedMethod()->resolvedMethodAddress();319TR::LabelSymbol *startLabel = NULL;320321if (comp->target().is32Bit())322{323// Put the alignment before the interpreter jump so the jump's offset is fixed324//325alignmentMargin += 6; // TR::InstOpCode::MOV4RegImm4 below326prev = generateAlignmentInstruction(prev, alignment, alignmentMargin, self());327}328329startLabel = generateLabelSymbol(self());330prev = generateLabelInstruction(prev, TR::InstOpCode::label, startLabel, self());331self()->setSwitchToInterpreterLabel(startLabel);332333TR::RegisterDependencyConditions *deps =334generateRegisterDependencyConditions((uint8_t)1, (uint8_t)0, self());335deps->addPreCondition(ediRegister, TR::RealRegister::edi, self());336337TR::SymbolReference *helperSymRef =338self()->symRefTab()->findOrCreateRuntimeHelper(TR_j2iTransition);339340if (comp->target().is64Bit())341{342prev = generateRegImm64Instruction(prev, TR::InstOpCode::MOV8RegImm64, ediRegister, feMethod, self(), TR_RamMethod);343if (comp->getOption(TR_EnableHCR))344comp->getStaticHCRPICSites()->push_front(prev);345prev = self()->getLinkage(methodSymbol->getLinkageConvention())->storeArguments(prev, methodSymbol);346}347else348{349prev = generateRegImmInstruction(prev, TR::InstOpCode::MOV4RegImm4, ediRegister, feMethod, self(), TR_RamMethod);350if (comp->getOption(TR_EnableHCR))351comp->getStaticHCRPICSites()->push_front(prev);352}353354prev = new (self()->trHeapMemory()) TR::X86ImmSymInstruction(prev, TR::InstOpCode::JMP4, (uintptr_t)helperSymRef->getMethodAddress(), helperSymRef, deps, self());355self()->stopUsingRegister(ediRegister);356357if (comp->target().is64Bit())358{359// Generate a mini-trampoline jump to the start of the360// SwitchToInterpreterPrePrologue. This comes after the alignment361// instruction so we know where it will be relative to startPC. Note362// that it ought to be a TR::InstOpCode::JMP1 despite the fact that we're using a TR::InstOpCode::JMP4363// opCode; otherwise, this instruction is not 2 bytes long, so it will364// mess up alignment.365//366alignmentMargin += 2; // Size of the mini-trampoline367prev = generateAlignmentInstruction(prev, alignment, alignmentMargin, self());368prev = new (self()->trHeapMemory()) TR::X86LabelInstruction(prev, TR::InstOpCode::JMP4, startLabel, self());369}370371return prev;372}373374bool375J9::X86::CodeGenerator::nopsAlsoProcessedByRelocations()376{377return self()->fej9()->nopsAlsoProcessedByRelocations();378}379380381bool382J9::X86::CodeGenerator::enableAESInHardwareTransformations()383{384if (self()->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_AESNI) && !self()->comp()->getOption(TR_DisableAESInHardware) && !self()->comp()->getCurrentMethod()->isJNINative())385return true;386else387return false;388}389390bool391J9::X86::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod method)392{393switch (method)394{395case TR::java_lang_Object_clone:396return true;397default:398return false;399}400}401402bool403J9::X86::CodeGenerator::supportsInliningOfIsAssignableFrom()404{405static const bool disableInliningOfIsAssignableFrom = feGetEnv("TR_disableInlineIsAssignableFrom") != NULL;406return !disableInliningOfIsAssignableFrom;407}408409bool410J9::X86::CodeGenerator::canEmitBreakOnDFSet()411{412static const bool enableBreakOnDFSet = feGetEnv("TR_enableBreakOnDFSet") != NULL;413return enableBreakOnDFSet;414}415416void417J9::X86::CodeGenerator::reserveNTrampolines(int32_t numTrampolines)418{419TR_J9VMBase *fej9 = (TR_J9VMBase *)(self()->fe());420TR::Compilation *comp = self()->comp();421422if (!TR::CodeCacheManager::instance()->codeCacheConfig().needsMethodTrampolines())423return;424425bool hadClassUnloadMonitor;426bool hadVMAccess = fej9->releaseClassUnloadMonitorAndAcquireVMaccessIfNeeded(comp, &hadClassUnloadMonitor);427428TR::CodeCache *curCache = self()->getCodeCache();429TR::CodeCache *newCache = curCache;430OMR::CodeCacheErrorCode::ErrorCode status = OMR::CodeCacheErrorCode::ERRORCODE_SUCCESS;431432TR_ASSERT(curCache->isReserved(), "Current CodeCache is not reserved");433434if (!fej9->isAOT_DEPRECATED_DO_NOT_USE())435{436status = curCache->reserveSpaceForTrampoline_bridge(numTrampolines);437if (status != OMR::CodeCacheErrorCode::ERRORCODE_SUCCESS)438{439// Current code cache is no good. Must unreserve440curCache->unreserve();441newCache = 0;442if (self()->getCodeGeneratorPhase() != TR::CodeGenPhase::BinaryEncodingPhase)443{444newCache = TR::CodeCacheManager::instance()->getNewCodeCache(comp->getCompThreadID());445if (newCache)446{447status = newCache->reserveSpaceForTrampoline_bridge(numTrampolines);448449if (status != OMR::CodeCacheErrorCode::ERRORCODE_SUCCESS)450{451TR_ASSERT(0, "Failed to reserve trampolines in fresh code cache.");452newCache->unreserve();453}454}455}456}457}458459fej9->acquireClassUnloadMonitorAndReleaseVMAccessIfNeeded(comp, hadVMAccess, hadClassUnloadMonitor);460461if (!newCache)462{463comp->failCompilation<TR::TrampolineError>("Failed to allocate code cache in reserveNTrampolines");464}465466if (newCache != curCache)467{468// We keep track of number of IPIC trampolines that are present in the current code cache469// If the code caches have been switched we have to reset this number, the setCodeCacheSwitched helper called470// in switchCodeCacheTo resets the count471// If we are in binaryEncoding we will kill this compilation anyway472//473self()->switchCodeCacheTo(newCache);474}475else476{477self()->setNumReservedIPICTrampolines(self()->getNumReservedIPICTrampolines() + numTrampolines);478}479480TR_ASSERT(newCache->isReserved(), "New CodeCache is not reserved");481}482483484