Path: blob/master/runtime/compiler/x/codegen/J9TreeEvaluator.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2000, 2022 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include <assert.h>23#include <limits.h>24#include <math.h>25#include <stdint.h>26#include "j9.h"27#include "j9cfg.h"28#include "j9consts.h"29#include "j9port.h"30#include "locknursery.h"31#include "thrdsup.h"32#include "thrtypes.h"33#include "codegen/AheadOfTimeCompile.hpp"34#include "codegen/CodeGenerator.hpp"35#include "codegen/Instruction.hpp"36#include "codegen/Machine.hpp"37#include "codegen/Linkage.hpp"38#include "codegen/Linkage_inlines.hpp"39#include "codegen/LiveRegister.hpp"40#include "codegen/Relocation.hpp"41#include "codegen/Register.hpp"42#include "codegen/RegisterPair.hpp"43#include "codegen/ScratchRegisterManager.hpp"44#include "codegen/Snippet.hpp"45#include "codegen/TreeEvaluator.hpp"46#include "codegen/UnresolvedDataSnippet.hpp"47#include "compile/CompilationTypes.hpp"48#include "compile/ResolvedMethod.hpp"49#include "compile/VirtualGuard.hpp"50#include "control/Recompilation.hpp"51#include "control/RecompilationInfo.hpp"52#include "env/CompilerEnv.hpp"53#include "env/CHTable.hpp"54#include "env/IO.hpp"55#include "env/j9method.h"56#include "env/jittypes.h"57#include "env/PersistentCHTable.hpp"58#include "env/VMJ9.h"59#include "il/Block.hpp"60#include "il/DataTypes.hpp"61#include "il/Node.hpp"62#include "il/Node_inlines.hpp"63#include "il/TreeTop.hpp"64#include "il/TreeTop_inlines.hpp"65#include "infra/SimpleRegex.hpp"66#include "OMR/Bytes.hpp"67#include "x/codegen/AllocPrefetchSnippet.hpp"68#include "x/codegen/CheckFailureSnippet.hpp"69#include "x/codegen/CompareAnalyser.hpp"70#include "x/codegen/ForceRecompilationSnippet.hpp"71#include "x/codegen/FPTreeEvaluator.hpp"72#include "x/codegen/J9X86Instruction.hpp"73#include "x/codegen/MonitorSnippet.hpp"74#include "x/codegen/OutlinedInstructions.hpp"75#include "x/codegen/HelperCallSnippet.hpp"76#include "x/codegen/X86Evaluator.hpp"77#include "env/CompilerEnv.hpp"78#include "runtime/J9Runtime.hpp"79#include "codegen/J9WatchedStaticFieldSnippet.hpp"80#include "codegen/X86FPConversionSnippet.hpp"8182#ifdef TR_TARGET_64BIT83#include "codegen/AMD64PrivateLinkage.hpp"84#endif8586#ifdef TR_TARGET_32BIT87#include "codegen/IA32PrivateLinkage.hpp"88#endif8990#ifdef LINUX91#include <time.h>9293#endif9495#define NUM_PICS 39697// Minimum number of words for zero-initialization via REP TR::InstOpCode::STOSD98//99#define MIN_REPSTOSD_WORDS 64100static int32_t minRepstosdWords = 0;101102// Maximum number of words per loop iteration for loop zero-initialization.103//104#define MAX_ZERO_INIT_WORDS_PER_ITERATION 4105static int32_t maxZeroInitWordsPerIteration = 0;106107static bool getNodeIs64Bit(TR::Node *node, TR::CodeGenerator *cg);108static TR::Register *intOrLongClobberEvaluate(TR::Node *node, bool nodeIs64Bit, TR::CodeGenerator *cg);109110static uint32_t logBase2(uintptr_t n)111{112// Could use leadingZeroes, except we can't call it from here113//114uint32_t result = 8*sizeof(n)-1;115uintptr_t mask = ((uintptr_t)1) << result;116while (mask && !(mask & n))117{118mask >>= 1;119result--;120}121return result;122}123124// ----------------------------------------------------------------------------125inline void generateLoadJ9Class(TR::Node* node, TR::Register* j9class, TR::Register* object, TR::CodeGenerator* cg)126{127bool needsNULLCHK = false;128TR::ILOpCodes opValue = node->getOpCodeValue();129130if (node->getOpCode().isReadBar() || node->getOpCode().isWrtBar())131needsNULLCHK = true;132else133{134switch (opValue)135{136case TR::monent:137case TR::monexit:138TR_ASSERT_FATAL(TR::Compiler->om.areValueTypesEnabled() || TR::Compiler->om.areValueBasedMonitorChecksEnabled(),139"monent and monexit are expected for generateLoadJ9Class only when value type or when value based monitor check is enabled");140case TR::checkcastAndNULLCHK:141needsNULLCHK = true;142break;143case TR::icall: // TR_checkAssignable144return; // j9class register already holds j9class145case TR::checkcast:146case TR::instanceof:147break;148default:149TR_ASSERT_FATAL(false, "Unexpected opCode for generateLoadJ9Class %s.", node->getOpCode().getName());150break;151}152}153154auto use64BitClasses = cg->comp()->target().is64Bit() && !TR::Compiler->om.generateCompressedObjectHeaders();155auto instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, j9class, generateX86MemoryReference(object, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);156if (needsNULLCHK)157{158cg->setImplicitExceptionPoint(instr);159instr->setNeedsGCMap(0xFF00FFFF);160if (opValue == TR::checkcastAndNULLCHK)161instr->setNode(cg->comp()->findNullChkInfo(node));162}163164165auto mask = TR::Compiler->om.maskOfObjectVftField();166if (~mask != 0)167{168generateRegImmInstruction(~mask <= 127 ? TR::InstOpCode::ANDRegImms(use64BitClasses) : TR::InstOpCode::ANDRegImm4(use64BitClasses), node, j9class, mask, cg);169}170}171172static TR_OutlinedInstructions *generateArrayletReference(173TR::Node *node,174TR::Node *loadOrStoreOrArrayElementNode,175TR::Instruction *checkInstruction,176TR::LabelSymbol *arrayletRefLabel,177TR::LabelSymbol *restartLabel,178TR::Register *baseArrayReg,179TR::Register *loadOrStoreReg,180TR::Register *indexReg,181int32_t indexValue,182TR::Register *valueReg,183bool needsBoundCheck,184TR::CodeGenerator *cg)185{186TR::Compilation *comp = cg->comp();187TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());188189TR::Register *scratchReg = cg->allocateRegister();190191TR_OutlinedInstructions *arrayletRef = new (cg->trHeapMemory()) TR_OutlinedInstructions(arrayletRefLabel, cg);192arrayletRef->setRestartLabel(restartLabel);193194if (needsBoundCheck)195{196// The current block is required for exception handling and anchoring197// the GC map.198//199arrayletRef->setBlock(cg->getCurrentEvaluationBlock());200arrayletRef->setCallNode(node);201}202203cg->getOutlinedInstructionsList().push_front(arrayletRef);204205arrayletRef->swapInstructionListsWithCompilation();206207generateLabelInstruction(NULL, TR::InstOpCode::label, arrayletRefLabel, cg)->setNode(node);208209// TODO: REMOVE THIS!210//211// This merely indicates that this OOL sequence should be assigned with the non-linear212// assigner, and should go away when the non-linear assigner handles all OOL sequences.213//214arrayletRefLabel->setNonLinear();215216static char *forceArrayletInt = feGetEnv("TR_forceArrayletInt");217if (forceArrayletInt)218{219generateInstruction(TR::InstOpCode::INT3, node, cg);220}221222// -----------------------------------------------------------------------------------223// Track all virtual register use within the arraylet path. This info will be used224// to adjust the virtual register use counts within the mainline path for more precise225// register assignment.226// -----------------------------------------------------------------------------------227228cg->startRecordingRegisterUsage();229230if (needsBoundCheck)231{232// -------------------------------------------------------------------------233// Check if the base array has a spine. If not, this is a real AIOB.234// -------------------------------------------------------------------------235236TR::MemoryReference *arraySizeMR =237generateX86MemoryReference(baseArrayReg, fej9->getOffsetOfContiguousArraySizeField(), cg);238239generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, arraySizeMR, 0, cg);240241TR::LabelSymbol *boundCheckFailureLabel = generateLabelSymbol(cg);242243checkInstruction = generateLabelInstruction(TR::InstOpCode::JNE4, node, boundCheckFailureLabel, cg);244245cg->addSnippet(246new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(247cg, node->getSymbolReference(),248boundCheckFailureLabel,249checkInstruction,250false251));252253// -------------------------------------------------------------------------254// The array has a spine. Do a bound check on its true length.255// -------------------------------------------------------------------------256257arraySizeMR = generateX86MemoryReference(baseArrayReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg);258259if (!indexReg)260{261TR::InstOpCode::Mnemonic op = (indexValue >= -128 && indexValue <= 127) ? TR::InstOpCode::CMP4MemImms : TR::InstOpCode::CMP4MemImm4;262generateMemImmInstruction(op, node, arraySizeMR, indexValue, cg);263}264else265{266generateMemRegInstruction(TR::InstOpCode::CMP4MemReg, node, arraySizeMR, indexReg, cg);267}268269boundCheckFailureLabel = generateLabelSymbol(cg);270checkInstruction = generateLabelInstruction(TR::InstOpCode::JBE4, node, boundCheckFailureLabel, cg);271272cg->addSnippet(273new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(274cg, node->getSymbolReference(),275boundCheckFailureLabel,276checkInstruction,277false278));279}280281// -------------------------------------------------------------------------282// Determine if a load needs to be decompressed.283// -------------------------------------------------------------------------284285bool seenCompressionSequence = false;286bool loadNeedsDecompression = false;287288if (loadOrStoreOrArrayElementNode->getOpCodeValue() == TR::l2a ||289(((loadOrStoreOrArrayElementNode->getOpCodeValue() == TR::aload ||290loadOrStoreOrArrayElementNode->getOpCodeValue() == TR::aRegLoad) &&291node->isSpineCheckWithArrayElementChild()) &&292comp->target().is64Bit() && comp->useCompressedPointers()))293loadNeedsDecompression = true;294295TR::Node *actualLoadOrStoreOrArrayElementNode = loadOrStoreOrArrayElementNode;296while ((loadNeedsDecompression && actualLoadOrStoreOrArrayElementNode->getOpCode().isConversion()) ||297actualLoadOrStoreOrArrayElementNode->containsCompressionSequence())298{299if (actualLoadOrStoreOrArrayElementNode->containsCompressionSequence())300seenCompressionSequence = true;301302actualLoadOrStoreOrArrayElementNode = actualLoadOrStoreOrArrayElementNode->getFirstChild();303}304305// -------------------------------------------------------------------------306// Do the load, store, or array address calculation307// -------------------------------------------------------------------------308309TR::DataType dt = actualLoadOrStoreOrArrayElementNode->getDataType();310int32_t elementSize;311312if (dt == TR::Address)313{314elementSize = TR::Compiler->om.sizeofReferenceField();315}316else317{318elementSize = TR::Symbol::convertTypeToSize(dt);319}320321int32_t spinePointerSize = (comp->target().is64Bit() && !comp->useCompressedPointers()) ? 8 : 4;322int32_t arrayHeaderSize = TR::Compiler->om.discontiguousArrayHeaderSizeInBytes();323int32_t arrayletMask = fej9->getArrayletMask(elementSize);324325TR::MemoryReference *spineMR;326327// Load the arraylet from the spine.328//329if (indexReg)330{331TR::InstOpCode::Mnemonic op = comp->target().is64Bit() ? TR::InstOpCode::MOVSXReg8Reg4 : TR::InstOpCode::MOVRegReg();332generateRegRegInstruction(op, node, scratchReg, indexReg, cg);333334int32_t spineShift = fej9->getArraySpineShift(elementSize);335generateRegImmInstruction(TR::InstOpCode::SARRegImm1(), node, scratchReg, spineShift, cg);336337spineMR =338generateX86MemoryReference(339baseArrayReg,340scratchReg,341TR::MemoryReference::convertMultiplierToStride(spinePointerSize),342arrayHeaderSize,343cg);344}345else346{347int32_t spineIndex = fej9->getArrayletLeafIndex(indexValue, elementSize);348int32_t spineDisp32 = (spineIndex * spinePointerSize) + arrayHeaderSize;349350spineMR = generateX86MemoryReference(baseArrayReg, spineDisp32, cg);351}352353TR::InstOpCode::Mnemonic op = (spinePointerSize == 8) ? TR::InstOpCode::L8RegMem : TR::InstOpCode::L4RegMem;354generateRegMemInstruction(op, node, scratchReg, spineMR, cg);355356// Decompress the arraylet pointer from the spine.357int32_t shiftOffset = 0;358359if (comp->target().is64Bit() && comp->useCompressedPointers())360{361shiftOffset = TR::Compiler->om.compressedReferenceShiftOffset();362if (shiftOffset > 0)363{364generateRegImmInstruction(TR::InstOpCode::SHL8RegImm1, node, scratchReg, shiftOffset, cg);365}366}367368TR::MemoryReference *arrayletMR;369370// Calculate the offset with the arraylet for the index.371//372if (indexReg)373{374TR::Register *scratchReg2 = cg->allocateRegister();375376generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, scratchReg2, indexReg, cg);377generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, scratchReg2, arrayletMask, cg);378arrayletMR = generateX86MemoryReference(379scratchReg,380scratchReg2,381TR::MemoryReference::convertMultiplierToStride(elementSize),382cg);383384cg->stopUsingRegister(scratchReg2);385}386else387{388int32_t arrayletIndex = ((TR_J9VMBase *)fej9)->getLeafElementIndex(indexValue, elementSize);389arrayletMR = generateX86MemoryReference(scratchReg, arrayletIndex*elementSize, cg);390}391392cg->stopUsingRegister(scratchReg);393394if (!actualLoadOrStoreOrArrayElementNode->getOpCode().isStore())395{396TR::InstOpCode::Mnemonic op;397398TR::MemoryReference *highArrayletMR = NULL;399TR::Register *highRegister = NULL;400401// If we're not loading an array shadow then this must be an effective402// address computation on the array element (for a write barrier).403//404if ((!actualLoadOrStoreOrArrayElementNode->getOpCode().hasSymbolReference() ||405!actualLoadOrStoreOrArrayElementNode->getSymbolReference()->getSymbol()->isArrayShadowSymbol()) &&406!node->isSpineCheckWithArrayElementChild())407{408op = TR::InstOpCode::LEARegMem();409}410else411{412switch (dt)413{414case TR::Int8: op = TR::InstOpCode::L1RegMem; break;415case TR::Int16: op = TR::InstOpCode::L2RegMem; break;416case TR::Int32: op = TR::InstOpCode::L4RegMem; break;417case TR::Int64:418if (comp->target().is64Bit())419op = TR::InstOpCode::L8RegMem;420else421{422TR_ASSERT(loadOrStoreReg->getRegisterPair(), "expecting a register pair");423424op = TR::InstOpCode::L4RegMem;425highArrayletMR = generateX86MemoryReference(*arrayletMR, 4, cg);426highRegister = loadOrStoreReg->getHighOrder();427loadOrStoreReg = loadOrStoreReg->getLowOrder();428}429break;430431case TR::Float: op = TR::InstOpCode::MOVSSRegMem; break;432case TR::Double: op = TR::InstOpCode::MOVSDRegMem; break;433434case TR::Address:435if (comp->target().is32Bit() || comp->useCompressedPointers())436op = TR::InstOpCode::L4RegMem;437else438op = TR::InstOpCode::L8RegMem;439break;440441default:442TR_ASSERT(0, "unsupported array element load type");443op = TR::InstOpCode::bad;444}445}446447generateRegMemInstruction(op, node, loadOrStoreReg, arrayletMR, cg);448449if (highArrayletMR)450{451generateRegMemInstruction(op, node, highRegister, highArrayletMR, cg);452}453454// Decompress the loaded address if necessary.455//456if (loadNeedsDecompression)457{458if (comp->target().is64Bit() && comp->useCompressedPointers())459{460if (shiftOffset > 0)461{462generateRegImmInstruction(TR::InstOpCode::SHL8RegImm1, node, loadOrStoreReg, shiftOffset, cg);463}464}465}466}467else468{469if (dt != TR::Address)470{471// movE [S + S2], value472//473TR::InstOpCode::Mnemonic op;474bool needStore = true;475476switch (dt)477{478case TR::Int8: op = valueReg ? TR::InstOpCode::S1MemReg : TR::InstOpCode::S1MemImm1; break;479case TR::Int16: op = valueReg ? TR::InstOpCode::S2MemReg : TR::InstOpCode::S2MemImm2; break;480case TR::Int32: op = valueReg ? TR::InstOpCode::S4MemReg : TR::InstOpCode::S4MemImm4; break;481case TR::Int64:482if (comp->target().is64Bit())483{484// The range of the immediate must be verified before this function to485// fall within a signed 32-bit integer.486//487op = valueReg ? TR::InstOpCode::S8MemReg : TR::InstOpCode::S8MemImm4;488}489else490{491if (valueReg)492{493TR_ASSERT(valueReg->getRegisterPair(), "value must be a register pair");494generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, arrayletMR, valueReg->getLowOrder(), cg);495generateMemRegInstruction(TR::InstOpCode::S4MemReg, node,496generateX86MemoryReference(*arrayletMR, 4, cg),497valueReg->getHighOrder(), cg);498}499else500{501TR::Node *valueChild = actualLoadOrStoreOrArrayElementNode->getSecondChild();502TR_ASSERT(valueChild->getOpCode().isLoadConst(), "expecting a long constant child");503504generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, arrayletMR, valueChild->getLongIntLow(), cg);505generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,506generateX86MemoryReference(*arrayletMR, 4, cg),507valueChild->getLongIntHigh(), cg);508}509510needStore = false;511}512break;513514case TR::Float: op = TR::InstOpCode::MOVSSMemReg; break;515case TR::Double: op = TR::InstOpCode::MOVSDMemReg; break;516517default:518TR_ASSERT(0, "unsupported array element store type");519op = TR::InstOpCode::bad;520}521522if (needStore)523{524if (valueReg)525generateMemRegInstruction(op, node, arrayletMR, valueReg, cg);526else527{528int32_t value = actualLoadOrStoreOrArrayElementNode->getSecondChild()->getInt();529generateMemImmInstruction(op, node, arrayletMR, value, cg);530}531}532}533else534{535// lea S, [S+S2]536TR_ASSERT(0, "OOL reference stores not supported yet");537}538}539540generateLabelInstruction(TR::InstOpCode::JMP4, node, restartLabel, cg);541542// -----------------------------------------------------------------------------------543// Stop tracking virtual register usage.544// -----------------------------------------------------------------------------------545546arrayletRef->setOutlinedPathRegisterUsageList(cg->stopRecordingRegisterUsage());547548arrayletRef->swapInstructionListsWithCompilation();549550return arrayletRef;551}552553static TR::Instruction *generatePrefetchAfterHeaderAccess(TR::Node *node,554TR::Register *objectReg,555TR::CodeGenerator *cg)556{557TR::Compilation *comp = cg->comp();558TR::Instruction *instr = NULL;559560static const char *prefetch = feGetEnv("TR_EnableSoftwarePrefetch");561TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELCORE2) == cg->getX86ProcessorInfo().isIntelCore2(), "isIntelCore2() failed\n");562if (prefetch && comp->getMethodHotness()>=scorching && comp->target().cpu.is(OMR_PROCESSOR_X86_INTELCORE2))563{564int32_t fieldOffset = 0;565if (TR::TreeEvaluator::loadLookaheadAfterHeaderAccess(node, fieldOffset, cg))566{567if (fieldOffset > 32)568instr = generateMemInstruction(TR::InstOpCode::PREFETCHT0, node, generateX86MemoryReference(objectReg, fieldOffset, cg), cg);569570//printf("found a field load after monitor field at field offset %d\n", fieldOffset);571}572}573574return instr;575}576577// 32-bit float/double convert to long578//579TR::Register *J9::X86::TreeEvaluator::fpConvertToLong(TR::Node *node, TR::SymbolReference *helperSymRef, TR::CodeGenerator *cg)580{581TR::Compilation *comp = cg->comp();582TR_ASSERT_FATAL(comp->target().is32Bit(), "AMD64 doesn't use this logic");583584TR::Node *child = node->getFirstChild();585586if (child->getOpCode().isDouble())587{588TR::RegisterDependencyConditions *deps;589590TR::Register *doubleReg = cg->evaluate(child);591TR::Register *lowReg = cg->allocateRegister(TR_GPR);592TR::Register *highReg = cg->allocateRegister(TR_GPR);593TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp);594595deps = generateRegisterDependencyConditions((uint8_t) 0, 3, cg);596deps->addPostCondition(lowReg, TR::RealRegister::NoReg, cg);597deps->addPostCondition(highReg, TR::RealRegister::NoReg, cg);598deps->addPostCondition(doubleReg, TR::RealRegister::NoReg, cg);599deps->stopAddingConditions();600601TR::LabelSymbol *reStartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); // exit routine label602TR::LabelSymbol *CallLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); // label where long (64-bit) conversion will start603TR::LabelSymbol *StartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);604605StartLabel->setStartInternalControlFlow();606reStartLabel->setEndInternalControlFlow();607608// Attempt to convert a double in an XMM register to an integer using CVTTSD2SI.609// If the conversion succeeds, put the integer in lowReg and sign-extend it to highReg.610// If the conversion fails (the double is too large), call the helper.611generateRegRegInstruction(TR::InstOpCode::CVTTSD2SIReg4Reg, node, lowReg, doubleReg, cg);612generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, lowReg, 0x80000000, cg);613614generateLabelInstruction(TR::InstOpCode::label, node, StartLabel, cg);615generateLabelInstruction(TR::InstOpCode::JE4, node, CallLabel, cg);616617generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, highReg ,lowReg, cg);618generateRegImmInstruction(TR::InstOpCode::SAR4RegImm1, node, highReg , 31, cg);619620generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, deps, cg);621622TR::Register *targetRegister = cg->allocateRegisterPair(lowReg, highReg);623TR::SymbolReference *d2l = comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_IA32double2LongSSE);624d2l->getSymbol()->getMethodSymbol()->setLinkage(TR_Helper);625TR::Node::recreate(node, TR::lcall);626node->setSymbolReference(d2l);627TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::lcall, targetRegister, CallLabel, reStartLabel, cg);628cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);629630cg->decReferenceCount(child);631node->setRegister(targetRegister);632633return targetRegister;634}635else636{637TR::Register *accReg = NULL;638TR::Register *lowReg = cg->allocateRegister(TR_GPR);639TR::Register *highReg = cg->allocateRegister(TR_GPR);640TR::Register *floatReg = cg->evaluate(child);641642TR::LabelSymbol *snippetLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);643TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);644TR::LabelSymbol *reStartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);645646startLabel->setStartInternalControlFlow();647reStartLabel->setEndInternalControlFlow();648649generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);650651// These instructions must be set appropriately prior to the creation652// of the snippet near the end of this method. Also see warnings below.653//654TR::X86RegMemInstruction *loadHighInstr; // loads the high dword of the converted long655TR::X86RegMemInstruction *loadLowInstr; // loads the low dword of the converted long656657TR::MemoryReference *tempMR = cg->machine()->getDummyLocalMR(TR::Float);658generateMemRegInstruction(TR::InstOpCode::MOVSSMemReg, node, tempMR, floatReg, cg);659generateMemInstruction(TR::InstOpCode::FLDMem, node, generateX86MemoryReference(*tempMR, 0, cg), cg);660661generateInstruction(TR::InstOpCode::FLDDUP, node, cg);662663// For slow conversion only, change the rounding mode on the FPU via its control word register.664//665TR::MemoryReference *convertedLongMR = (cg->machine())->getDummyLocalMR(TR::Int64);666667if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE3))668{669generateMemInstruction(TR::InstOpCode::FLSTTPMem, node, convertedLongMR, cg);670}671else672{673int16_t fpcw = comp->getJittedMethodSymbol()->usesSinglePrecisionMode() ?674SINGLE_PRECISION_ROUND_TO_ZERO : DOUBLE_PRECISION_ROUND_TO_ZERO;675generateMemInstruction(TR::InstOpCode::LDCWMem, node, generateX86MemoryReference(cg->findOrCreate2ByteConstant(node, fpcw), cg), cg);676generateMemInstruction(TR::InstOpCode::FLSTPMem, node, convertedLongMR, cg);677678fpcw = comp->getJittedMethodSymbol()->usesSinglePrecisionMode() ?679SINGLE_PRECISION_ROUND_TO_NEAREST : DOUBLE_PRECISION_ROUND_TO_NEAREST;680681generateMemInstruction(TR::InstOpCode::LDCWMem, node, generateX86MemoryReference(cg->findOrCreate2ByteConstant(node, fpcw), cg), cg);682}683684// WARNING:685//686// The following load instructions are dissected in the snippet to determine the target registers.687// If they or their format is changed, you may need to change the snippet also.688//689loadHighInstr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, highReg,690generateX86MemoryReference(*convertedLongMR, 4, cg), cg);691692loadLowInstr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, lowReg,693generateX86MemoryReference(*convertedLongMR, 0, cg), cg);694695// Jump to the snippet if the converted value is an indefinite integer; otherwise continue.696//697generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, highReg, INT_MIN, cg);698generateLabelInstruction(TR::InstOpCode::JNE4, node, reStartLabel, cg);699generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, lowReg, lowReg, cg);700generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);701702// Create the conversion snippet.703//704cg->addSnippet( new (cg->trHeapMemory()) TR::X86FPConvertToLongSnippet(reStartLabel,705snippetLabel,706helperSymRef,707node,708loadHighInstr,709loadLowInstr,710cg) );711712TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, accReg ? 3 : 2, cg);713714// Make sure the high and low long registers are assigned to something.715//716if (accReg)717{718deps->addPostCondition(accReg, TR::RealRegister::eax, cg);719}720721deps->addPostCondition(lowReg, TR::RealRegister::NoReg, cg);722deps->addPostCondition(highReg, TR::RealRegister::NoReg, cg);723724generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, deps, cg);725726cg->decReferenceCount(child);727generateInstruction(TR::InstOpCode::FSTPST0, node, cg);728729TR::Register *targetRegister = cg->allocateRegisterPair(lowReg, highReg);730node->setRegister(targetRegister);731return targetRegister;732}733}734735// On AMD64, all four [fd]2[il] conversions are handled here736// On IA32, both [fd]2i conversions are handled here737TR::Register *J9::X86::TreeEvaluator::f2iEvaluator(TR::Node *node, TR::CodeGenerator *cg)738{739bool doubleSource;740bool longTarget;741TR::InstOpCode::Mnemonic cvttOpCode;742743switch (node->getOpCodeValue())744{745case TR::f2i:746cvttOpCode = TR::InstOpCode::CVTTSS2SIReg4Reg;747doubleSource = false;748longTarget = false;749break;750case TR::f2l:751cvttOpCode = TR::InstOpCode::CVTTSS2SIReg8Reg;752doubleSource = false;753longTarget = true;754break;755case TR::d2i:756cvttOpCode = TR::InstOpCode::CVTTSD2SIReg4Reg;757doubleSource = true;758longTarget = false;759break;760case TR::d2l:761cvttOpCode = TR::InstOpCode::CVTTSD2SIReg8Reg;762doubleSource = true;763longTarget = true;764break;765default:766TR_ASSERT_FATAL(0, "Unknown opcode value in f2iEvaluator");767break;768}769TR_ASSERT_FATAL(cg->comp()->target().is64Bit() || !longTarget, "Incorrect opcode value in f2iEvaluator");770771TR::Node *child = node->getFirstChild();772TR::Register *sourceRegister = NULL;773TR::Register *targetRegister = cg->allocateRegister(TR_GPR);774TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);775TR::LabelSymbol *endLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);776TR::LabelSymbol *exceptionLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);777778sourceRegister = cg->evaluate(child);779generateRegRegInstruction(cvttOpCode, node, targetRegister, sourceRegister, cg);780781startLabel->setStartInternalControlFlow();782endLabel->setEndInternalControlFlow();783784generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);785786if (longTarget)787{788TR_ASSERT_FATAL(cg->comp()->target().is64Bit(), "We should only get here on AMD64");789// We can't compare with 0x8000000000000000.790// Instead, rotate left 1 bit and compare with 0x0000000000000001.791generateRegInstruction(TR::InstOpCode::ROL8Reg1, node, targetRegister, cg);792generateRegImmInstruction(TR::InstOpCode::CMP8RegImms, node, targetRegister, 1, cg);793generateLabelInstruction(TR::InstOpCode::JE4, node, exceptionLabel, cg);794}795else796{797generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, targetRegister, INT_MIN, cg);798generateLabelInstruction(TR::InstOpCode::JE4, node, exceptionLabel, cg);799}800801//TODO: (omr issue #4969): Remove once support for spills in OOL paths is added802TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)2, cg);803deps->addPostCondition(targetRegister, TR::RealRegister::NoReg, cg);804deps->addPostCondition(sourceRegister, TR::RealRegister::NoReg, cg);805806{807TR_OutlinedInstructionsGenerator og(exceptionLabel, node, cg);808// at this point, target is set to -INF and there can only be THREE possible results: -INF, +INF, NaN809// compare source with ZERO810generateRegMemInstruction(doubleSource ? TR::InstOpCode::UCOMISDRegMem : TR::InstOpCode::UCOMISSRegMem,811node,812sourceRegister,813generateX86MemoryReference(doubleSource ? cg->findOrCreate8ByteConstant(node, 0) : cg->findOrCreate4ByteConstant(node, 0), cg),814cg);815// load max int if source is positive, note that for long case, LLONG_MAX << 1 is loaded as it will be shifted right816generateRegMemInstruction(TR::InstOpCode::CMOVARegMem(longTarget),817node,818targetRegister,819generateX86MemoryReference(longTarget ? cg->findOrCreate8ByteConstant(node, LLONG_MAX << 1) : cg->findOrCreate4ByteConstant(node, INT_MAX), cg),820cg);821// load zero if source is NaN822generateRegMemInstruction(TR::InstOpCode::CMOVPRegMem(longTarget),823node,824targetRegister,825generateX86MemoryReference(longTarget ? cg->findOrCreate8ByteConstant(node, 0) : cg->findOrCreate4ByteConstant(node, 0), cg),826cg);827828generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);829og.endOutlinedInstructionSequence();830}831832generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);833if (longTarget)834{835generateRegInstruction(TR::InstOpCode::ROR8Reg1, node, targetRegister, cg);836}837838node->setRegister(targetRegister);839cg->decReferenceCount(child);840return targetRegister;841}842843TR::Register *J9::X86::TreeEvaluator::f2lEvaluator(TR::Node *node, TR::CodeGenerator *cg)844{845TR_ASSERT_FATAL(cg->comp()->target().is32Bit(), "AMD64 uses f2iEvaluator for this");846return TR::TreeEvaluator::fpConvertToLong(node, cg->symRefTab()->findOrCreateRuntimeHelper(TR_IA32floatToLong), cg);847}848849TR::Register *J9::X86::TreeEvaluator::d2lEvaluator(TR::Node *node, TR::CodeGenerator *cg)850{851TR_ASSERT_FATAL(cg->comp()->target().is32Bit(), "AMD64 uses f2iEvaluator for this");852853return TR::TreeEvaluator::fpConvertToLong(node, cg->symRefTab()->findOrCreateRuntimeHelper(TR_IA32doubleToLong), cg);854}855856/*857* J9 X86 specific tree evaluator table overrides858*/859extern void TEMPORARY_initJ9X86TreeEvaluatorTable(TR::CodeGenerator *cg)860{861TR_TreeEvaluatorFunctionPointer *tet = cg->getTreeEvaluatorTable();862tet[TR::f2i] = TR::TreeEvaluator::f2iEvaluator;863tet[TR::f2iu] = TR::TreeEvaluator::f2iEvaluator;864tet[TR::f2l] = TR::TreeEvaluator::f2iEvaluator;865tet[TR::f2lu] = TR::TreeEvaluator::f2iEvaluator;866tet[TR::d2i] = TR::TreeEvaluator::f2iEvaluator;867tet[TR::d2iu] = TR::TreeEvaluator::f2iEvaluator;868tet[TR::d2l] = TR::TreeEvaluator::f2iEvaluator;869tet[TR::d2lu] = TR::TreeEvaluator::f2iEvaluator;870tet[TR::monent] = TR::TreeEvaluator::monentEvaluator;871tet[TR::monexit] = TR::TreeEvaluator::monexitEvaluator;872tet[TR::monexitfence] = TR::TreeEvaluator::monexitfenceEvaluator;873tet[TR::asynccheck] = TR::TreeEvaluator::asynccheckEvaluator;874tet[TR::instanceof] = TR::TreeEvaluator::checkcastinstanceofEvaluator;875tet[TR::checkcast] = TR::TreeEvaluator::checkcastinstanceofEvaluator;876tet[TR::checkcastAndNULLCHK] = TR::TreeEvaluator::checkcastinstanceofEvaluator;877tet[TR::New] = TR::TreeEvaluator::newEvaluator;878tet[TR::newarray] = TR::TreeEvaluator::newEvaluator;879tet[TR::anewarray] = TR::TreeEvaluator::newEvaluator;880tet[TR::variableNew] = TR::TreeEvaluator::newEvaluator;881tet[TR::variableNewArray] = TR::TreeEvaluator::newEvaluator;882tet[TR::multianewarray] = TR::TreeEvaluator::multianewArrayEvaluator;883tet[TR::arraylength] = TR::TreeEvaluator::arraylengthEvaluator;884tet[TR::lookup] = TR::TreeEvaluator::lookupEvaluator;885tet[TR::exceptionRangeFence] = TR::TreeEvaluator::exceptionRangeFenceEvaluator;886tet[TR::NULLCHK] = TR::TreeEvaluator::NULLCHKEvaluator;887tet[TR::ZEROCHK] = TR::TreeEvaluator::ZEROCHKEvaluator;888tet[TR::ResolveCHK] = TR::TreeEvaluator::resolveCHKEvaluator;889tet[TR::ResolveAndNULLCHK] = TR::TreeEvaluator::resolveAndNULLCHKEvaluator;890tet[TR::DIVCHK] = TR::TreeEvaluator::DIVCHKEvaluator;891tet[TR::BNDCHK] = TR::TreeEvaluator::BNDCHKEvaluator;892tet[TR::ArrayCopyBNDCHK] = TR::TreeEvaluator::ArrayCopyBNDCHKEvaluator;893tet[TR::BNDCHKwithSpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;894tet[TR::SpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;895tet[TR::ArrayStoreCHK] = TR::TreeEvaluator::ArrayStoreCHKEvaluator;896tet[TR::ArrayCHK] = TR::TreeEvaluator::ArrayCHKEvaluator;897tet[TR::MethodEnterHook] = TR::TreeEvaluator::conditionalHelperEvaluator;898tet[TR::MethodExitHook] = TR::TreeEvaluator::conditionalHelperEvaluator;899tet[TR::allocationFence] = TR::TreeEvaluator::NOPEvaluator;900tet[TR::loadFence] = TR::TreeEvaluator::barrierFenceEvaluator;901tet[TR::storeFence] = TR::TreeEvaluator::barrierFenceEvaluator;902tet[TR::fullFence] = TR::TreeEvaluator::barrierFenceEvaluator;903tet[TR::ihbit] = TR::TreeEvaluator::integerHighestOneBit;904tet[TR::ilbit] = TR::TreeEvaluator::integerLowestOneBit;905tet[TR::inolz] = TR::TreeEvaluator::integerNumberOfLeadingZeros;906tet[TR::inotz] = TR::TreeEvaluator::integerNumberOfTrailingZeros;907tet[TR::ipopcnt] = TR::TreeEvaluator::integerBitCount;908tet[TR::lhbit] = TR::TreeEvaluator::longHighestOneBit;909tet[TR::llbit] = TR::TreeEvaluator::longLowestOneBit;910tet[TR::lnolz] = TR::TreeEvaluator::longNumberOfLeadingZeros;911tet[TR::lnotz] = TR::TreeEvaluator::longNumberOfTrailingZeros;912tet[TR::lpopcnt] = TR::TreeEvaluator::longBitCount;913tet[TR::tstart] = TR::TreeEvaluator::tstartEvaluator;914tet[TR::tfinish] = TR::TreeEvaluator::tfinishEvaluator;915tet[TR::tabort] = TR::TreeEvaluator::tabortEvaluator;916917#if defined(TR_TARGET_32BIT)918// 32-bit overrides919tet[TR::f2l] = TR::TreeEvaluator::f2lEvaluator;920tet[TR::f2lu] = TR::TreeEvaluator::f2lEvaluator;921tet[TR::d2l] = TR::TreeEvaluator::d2lEvaluator;922tet[TR::d2lu] = TR::TreeEvaluator::d2lEvaluator;923tet[TR::ldiv] = TR::TreeEvaluator::integerPairDivEvaluator;924tet[TR::lrem] = TR::TreeEvaluator::integerPairRemEvaluator;925#endif926}927928929static void generateCommonLockNurseryCodes(TR::Node *node,930TR::CodeGenerator *cg,931bool monent, //true for VMmonentEvaluator, false for VMmonexitEvaluator932TR::LabelSymbol *monitorLookupCacheLabel,933TR::LabelSymbol *fallThruFromMonitorLookupCacheLabel,934TR::LabelSymbol *snippetLabel,935uint32_t &numDeps,936int &lwOffset,937TR::Register *objectClassReg,938TR::Register *&lookupOffsetReg,939TR::Register *vmThreadReg,940TR::Register *objectReg941)942{943TR::Compilation *comp = cg->comp();944TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());945if (comp->getOption(TR_EnableMonitorCacheLookup))946{947if (monent) lwOffset = 0;948generateLabelInstruction(TR::InstOpCode::JLE4, node, monitorLookupCacheLabel, cg);949generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThruFromMonitorLookupCacheLabel, cg);950951generateLabelInstruction(TR::InstOpCode::label, node, monitorLookupCacheLabel, cg);952953lookupOffsetReg = cg->allocateRegister();954numDeps++;955956int32_t offsetOfMonitorLookupCache = offsetof(J9VMThread, objectMonitorLookupCache);957958//generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, generateX86MemoryReference(vmThreadReg, offsetOfMonitorLookupCache, cg), cg);959generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, lookupOffsetReg, objectReg, cg);960961generateRegImmInstruction(TR::InstOpCode::SARRegImm1(comp->target().is64Bit()), node, lookupOffsetReg, trailingZeroes(TR::Compiler->om.getObjectAlignmentInBytes()), cg);962963J9JavaVM * jvm = fej9->getJ9JITConfig()->javaVM;964generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, lookupOffsetReg, J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1, cg);965generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, lookupOffsetReg, trailingZeroes(TR::Compiler->om.sizeofReferenceField()), cg);966generateRegMemInstruction((comp->target().is64Bit() && fej9->generateCompressedLockWord()) ? TR::InstOpCode::L4RegMem : TR::InstOpCode::LRegMem(), node, objectClassReg, generateX86MemoryReference(vmThreadReg, lookupOffsetReg, 0, offsetOfMonitorLookupCache, cg), cg);967968generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, objectClassReg, objectClassReg, cg);969generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);970971int32_t offsetOfMonitor = offsetof(J9ObjectMonitor, monitor);972generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lookupOffsetReg, generateX86MemoryReference(objectClassReg, offsetOfMonitor, cg), cg);973974int32_t offsetOfUserData = offsetof(J9ThreadAbstractMonitor, userData);975generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lookupOffsetReg, generateX86MemoryReference(lookupOffsetReg, offsetOfUserData, cg), cg);976977generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, lookupOffsetReg, objectReg, cg);978generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);979980int32_t offsetOfAlternateLockWord = offsetof(J9ObjectMonitor, alternateLockword);981//generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lookupOffsetReg, generateX86MemoryReference(objectClassReg, offsetOfAlternateLockWord, cg), cg);982generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, objectClassReg, offsetOfAlternateLockWord, cg);983//generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, objectClassReg, lookupOffsetReg, cg);984generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, objectClassReg, objectReg, cg);985986generateLabelInstruction(TR::InstOpCode::label, node, fallThruFromMonitorLookupCacheLabel, cg);987}988else989generateLabelInstruction(TR::InstOpCode::JLE4, node, snippetLabel, cg);990}991992#ifdef TR_TARGET_32BIT993TR::Register *J9::X86::I386::TreeEvaluator::conditionalHelperEvaluator(TR::Node *node, TR::CodeGenerator *cg)994{995// used by asynccheck, methodEnterhook, and methodExitHook996997// Decrement the reference count on the constant placeholder parameter to998// the MethodEnterHook call. An evaluation isn't necessary because the999// constant value isn't used here.1000//1001if (node->getOpCodeValue() == TR::MethodEnterHook)1002{1003if (node->getSecondChild()->getOpCode().isCall() &&1004node->getSecondChild()->getNumChildren() > 1)1005{1006cg->decReferenceCount(node->getSecondChild()->getFirstChild());1007}1008}10091010// The child contains an inline test.1011//1012TR::Node *testNode = node->getFirstChild();1013TR::Node *secondChild = testNode->getSecondChild();1014if (secondChild->getOpCode().isLoadConst() &&1015secondChild->getRegister() == NULL)1016{1017int32_t value = secondChild->getInt();1018TR::Node *firstChild = testNode->getFirstChild();1019TR::InstOpCode::Mnemonic opCode;1020if (value >= -128 && value <= 127)1021opCode = TR::InstOpCode::CMP4MemImms;1022else1023opCode = TR::InstOpCode::CMP4MemImm4;1024TR::MemoryReference * memRef = generateX86MemoryReference(firstChild, cg);1025generateMemImmInstruction(opCode, node, memRef, value, cg);1026memRef->decNodeReferenceCounts(cg);1027cg->decReferenceCount(secondChild);1028}1029else1030{1031TR_X86CompareAnalyser temp(cg);1032temp.integerCompareAnalyser(testNode, TR::InstOpCode::CMP4RegReg, TR::InstOpCode::CMP4RegMem, TR::InstOpCode::CMP4MemReg);1033}10341035TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);1036TR::LabelSymbol *reStartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);1037TR::LabelSymbol *snippetLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);1038startLabel->setStartInternalControlFlow();1039reStartLabel->setEndInternalControlFlow();1040generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);1041generateLabelInstruction(testNode->getOpCodeValue() == TR::icmpeq ? TR::InstOpCode::JE4 : TR::InstOpCode::JNE4, node, snippetLabel, cg);10421043TR::Snippet *snippet;1044if (node->getNumChildren() == 2)1045snippet = new (cg->trHeapMemory()) TR::X86HelperCallSnippet(cg, reStartLabel, snippetLabel, node->getSecondChild());1046else1047snippet = new (cg->trHeapMemory()) TR::X86HelperCallSnippet(cg, node, reStartLabel, snippetLabel, node->getSymbolReference());10481049cg->addSnippet(snippet);10501051generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, cg);1052cg->decReferenceCount(testNode);1053return NULL;1054}1055#endif10561057#ifdef TR_TARGET_64BIT1058TR::Register *J9::X86::AMD64::TreeEvaluator::conditionalHelperEvaluator(TR::Node *node, TR::CodeGenerator *cg)1059{1060// TODO:AMD64: Try to common this with the IA32 version10611062// used by asynccheck, methodEnterHook, and methodExitHook10631064// The trees for TR::MethodEnterHook are expected to look like one of the following only:1065//1066// (1) Static Method1067//1068// TR::MethodEnterHook1069// icmpne1070// iload eventFlags (VM Thread)1071// iconst 01072// vcall (jitReportMethodEnter)1073// aconst (RAM method)1074//1075// (2) Virtual Method1076//1077// TR::MethodEnterHook1078// icmpne1079// iload eventFlags (VM Thread)1080// iconst 01081// vcall (jitReportMethodEnter)1082// aload (receiver parameter)1083// aconst (RAM method)1084//1085//1086// The tree for TR::MethodExitHook is expected to look like the following:1087//1088// TR::MethodExitHook1089// icmpne1090// iload (MethodExitHook table entry)1091// iconst 01092// vcall (jitReportMethodExit)1093// aconst (RAM method)1094//10951096// The child contains an inline test.1097//1098TR::Node *testNode = node->getFirstChild();1099TR::Node *secondChild = testNode->getSecondChild();1100bool testIs64Bit = TR::TreeEvaluator::getNodeIs64Bit(secondChild, cg);1101bool testIsEQ = testNode->getOpCodeValue() == TR::icmpeq || testNode->getOpCodeValue() == TR::lcmpeq;11021103TR::Register *thisReg = NULL;1104TR::Register *ramMethodReg = NULL;11051106// The receiver and RAM method parameters must be evaluated outside of the internal control flow region if it is commoned,1107// and their registers added to the post dependency condition on the merge label.1108//1109// The reference counts will be decremented when the call node is evaluated.1110//1111if (node->getOpCodeValue() == TR::MethodEnterHook || node->getOpCodeValue() == TR::MethodExitHook)1112{1113TR::Node *callNode = node->getSecondChild();11141115if (callNode->getNumChildren() > 1)1116{1117if (callNode->getFirstChild()->getReferenceCount() > 1)1118thisReg = cg->evaluate(callNode->getFirstChild());11191120if (callNode->getSecondChild()->getReferenceCount() > 1)1121ramMethodReg = cg->evaluate(callNode->getSecondChild());1122}1123else1124{1125if (callNode->getFirstChild()->getReferenceCount() > 1)1126ramMethodReg = cg->evaluate(callNode->getFirstChild());1127}1128}11291130if (secondChild->getOpCode().isLoadConst() &&1131secondChild->getRegister() == NULL &&1132(!testIs64Bit || IS_32BIT_SIGNED(secondChild->getLongInt())))1133{1134// Try to compare memory directly with immediate1135//1136TR::MemoryReference * memRef = generateX86MemoryReference(testNode->getFirstChild(), cg);1137TR::InstOpCode::Mnemonic op;11381139if (testIs64Bit)1140{1141int64_t value = secondChild->getLongInt();1142op = IS_8BIT_SIGNED(value) ? TR::InstOpCode::CMP8MemImms : TR::InstOpCode::CMP8MemImm4;1143generateMemImmInstruction(op, node, memRef, value, cg);1144}1145else1146{1147int32_t value = secondChild->getInt();1148op = IS_8BIT_SIGNED(value) ? TR::InstOpCode::CMP4MemImms : TR::InstOpCode::CMP4MemImm4;1149generateMemImmInstruction(op, node, memRef, value, cg);1150}11511152memRef->decNodeReferenceCounts(cg);1153cg->decReferenceCount(secondChild);1154}1155else1156{1157TR_X86CompareAnalyser temp(cg);1158temp.integerCompareAnalyser(testNode, TR::InstOpCode::CMPRegReg(testIs64Bit), TR::InstOpCode::CMPRegMem(testIs64Bit), TR::InstOpCode::CMPMemReg(testIs64Bit));1159}11601161TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);1162TR::LabelSymbol *reStartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);1163TR::LabelSymbol *snippetLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);1164startLabel->setStartInternalControlFlow();1165reStartLabel->setEndInternalControlFlow();11661167TR::Instruction *startInstruction = generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);11681169if (node->getOpCodeValue() == TR::MethodEnterHook || node->getOpCodeValue() == TR::MethodExitHook)1170{1171TR::Node *callNode = node->getSecondChild();11721173// Generate an inverted jump around the call. This is necessary because we want to do the call inline rather1174// than through the snippet.1175//1176generateLabelInstruction(testIsEQ ? TR::InstOpCode::JNE4 : TR::InstOpCode::JE4, node, reStartLabel, cg);1177TR::TreeEvaluator::performCall(callNode, false, false, cg);11781179// Collect postconditions from the internal control flow region and put1180// them on the restart label to prevent spills in the internal control1181// flow region.1182// TODO:AMD64: This would be a useful general facility to have.1183//1184TR::Machine *machine = cg->machine();1185TR::RegisterDependencyConditions *postConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions((uint8_t)0, TR::RealRegister::NumRegisters, cg->trMemory());1186if (thisReg)1187postConditions->addPostCondition(thisReg, TR::RealRegister::NoReg, cg);11881189if (ramMethodReg)1190postConditions->addPostCondition(ramMethodReg, TR::RealRegister::NoReg, cg);11911192for (TR::Instruction *cursor = cg->getAppendInstruction(); cursor != startInstruction; cursor = cursor->getPrev())1193{1194TR::RegisterDependencyConditions *cursorDeps = cursor->getDependencyConditions();1195if (cursorDeps && cursor->getOpCodeValue() != TR::InstOpCode::assocreg)1196{1197if (debug("traceConditionalHelperEvaluator"))1198{1199diagnostic("conditionalHelperEvaluator: Adding deps from " POINTER_PRINTF_FORMAT "\n", cursor);1200}1201for (int32_t i = 0; i < cursorDeps->getNumPostConditions(); i++)1202{1203TR::RegisterDependency *cursorPostCondition = cursorDeps->getPostConditions()->getRegisterDependency(i);1204postConditions->unionPostCondition(cursorPostCondition->getRegister(), cursorPostCondition->getRealRegister(), cg);1205if (debug("traceConditionalHelperEvaluator"))1206{1207TR_Debug *debug = cg->getDebug();1208diagnostic("conditionalHelperEvaluator: [%s : %s]\n", debug->getName(cursorPostCondition->getRegister()), debug->getName(machine->getRealRegister(cursorPostCondition->getRealRegister())));1209}1210}1211}1212}1213postConditions->stopAddingPostConditions();12141215generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, postConditions, cg);1216}1217else1218{1219generateLabelInstruction(testIsEQ? TR::InstOpCode::JE4 : TR::InstOpCode::JNE4, node, snippetLabel, cg);12201221TR::Snippet *snippet;1222if (node->getNumChildren() == 2)1223snippet = new (cg->trHeapMemory()) TR::X86HelperCallSnippet(cg, reStartLabel, snippetLabel, node->getSecondChild());1224else1225snippet = new (cg->trHeapMemory()) TR::X86HelperCallSnippet(cg, node, reStartLabel, snippetLabel, node->getSymbolReference());12261227cg->addSnippet(snippet);1228generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, cg);1229}12301231cg->decReferenceCount(testNode);1232return NULL;1233}1234#endif12351236TR::Register* J9::X86::TreeEvaluator::performHeapLoadWithReadBarrier(TR::Node* node, TR::CodeGenerator* cg)1237{1238#ifndef OMR_GC_CONCURRENT_SCAVENGER1239TR_ASSERT_FATAL(0, "Concurrent Scavenger not supported.");1240return NULL;1241#else1242TR::Compilation *comp = cg->comp();1243bool use64BitClasses = comp->target().is64Bit() && !comp->useCompressedPointers();12441245TR::MemoryReference* sourceMR = generateX86MemoryReference(node, cg);1246TR::Register* address = TR::TreeEvaluator::loadMemory(node, sourceMR, TR_RematerializableLoadEffectiveAddress, false, cg);1247address->setMemRef(sourceMR);1248sourceMR->decNodeReferenceCounts(cg);12491250TR::Register* object = cg->allocateRegister();1251TR::Instruction* load = generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, object, generateX86MemoryReference(address, 0, cg), cg);1252cg->setImplicitExceptionPoint(load);12531254switch (TR::Compiler->om.readBarrierType())1255{1256case gc_modron_readbar_none:1257TR_ASSERT(false, "This path should only be reached when a read barrier is required.");1258break;1259case gc_modron_readbar_always:1260generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), address, cg);1261generateHelperCallInstruction(node, TR_softwareReadBarrier, NULL, cg);1262generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, object, generateX86MemoryReference(address, 0, cg), cg);1263break;1264case gc_modron_readbar_range_check:1265{1266TR::LabelSymbol* begLabel = generateLabelSymbol(cg);1267TR::LabelSymbol* endLabel = generateLabelSymbol(cg);1268TR::LabelSymbol* rdbarLabel = generateLabelSymbol(cg);1269begLabel->setStartInternalControlFlow();1270endLabel->setEndInternalControlFlow();12711272TR::RegisterDependencyConditions* deps = generateRegisterDependencyConditions((uint8_t)2, 2, cg);1273deps->addPreCondition(object, TR::RealRegister::NoReg, cg);1274deps->addPreCondition(address, TR::RealRegister::NoReg, cg);1275deps->addPostCondition(object, TR::RealRegister::NoReg, cg);1276deps->addPostCondition(address, TR::RealRegister::NoReg, cg);12771278generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);1279generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, object, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateBaseAddressOffset(), cg), cg);1280generateLabelInstruction(TR::InstOpCode::JAE4, node, rdbarLabel, cg);1281{1282TR_OutlinedInstructionsGenerator og(rdbarLabel, node, cg);1283generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, object, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateTopAddressOffset(), cg), cg);1284generateLabelInstruction(TR::InstOpCode::JA4, node, endLabel, cg);1285generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), address, cg);1286generateHelperCallInstruction(node, TR_softwareReadBarrier, NULL, cg);1287generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, object, generateX86MemoryReference(address, 0, cg), cg);1288generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);1289og.endOutlinedInstructionSequence();1290}1291generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);1292}1293break;1294default:1295TR_ASSERT(false, "Unsupported Read Barrier Type.");1296break;1297}1298cg->stopUsingRegister(address);1299return object;1300#endif1301}13021303// Should only be called for pure TR::awrtbar and TR::awrtbari nodes.1304//1305TR::Register *J9::X86::TreeEvaluator::writeBarrierEvaluator(TR::Node *node, TR::CodeGenerator *cg)1306{1307TR::MemoryReference *storeMR = generateX86MemoryReference(node, cg);1308TR::Node *destOwningObject;1309TR::Node *sourceObject;1310TR::Compilation *comp = cg->comp();1311bool usingCompressedPointers = false;1312bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);13131314if (node->getOpCodeValue() == TR::awrtbari)1315{1316destOwningObject = node->getChild(2);1317sourceObject = node->getSecondChild();1318if (comp->useCompressedPointers() &&1319(node->getSymbolReference()->getSymbol()->getDataType() == TR::Address) &&1320(node->getSecondChild()->getDataType() != TR::Address))1321{1322usingCompressedPointers = true;13231324if (useShiftedOffsets)1325{1326while ((sourceObject->getNumChildren() > 0) && (sourceObject->getOpCodeValue() != TR::a2l))1327sourceObject = sourceObject->getFirstChild();1328if (sourceObject->getOpCodeValue() == TR::a2l)1329sourceObject = sourceObject->getFirstChild();1330// this is required so that different registers are1331// allocated for the actual store and translated values1332sourceObject->incReferenceCount();1333}1334}1335}1336else1337{1338TR_ASSERT((node->getOpCodeValue() == TR::awrtbar), "expecting a TR::wrtbar");1339destOwningObject = node->getSecondChild();1340sourceObject = node->getFirstChild();1341}13421343TR_X86ScratchRegisterManager *scratchRegisterManager =1344cg->generateScratchRegisterManager(comp->target().is64Bit() ? 15 : 7);13451346TR::TreeEvaluator::VMwrtbarWithStoreEvaluator(1347node,1348storeMR,1349scratchRegisterManager,1350destOwningObject,1351sourceObject,1352(node->getOpCodeValue() == TR::awrtbari) ? true : false,1353cg,1354false);13551356if (comp->useAnchors() && (node->getOpCodeValue() == TR::awrtbari))1357node->setStoreAlreadyEvaluated(true);13581359if (usingCompressedPointers)1360cg->decReferenceCount(node->getSecondChild());13611362return NULL;1363}136413651366TR::Register *J9::X86::TreeEvaluator::monentEvaluator(TR::Node *node, TR::CodeGenerator *cg)1367{1368if (cg->enableRematerialisation() &&1369cg->supportsStaticMemoryRematerialization())1370TR::TreeEvaluator::removeLiveDiscardableStatics(cg);13711372return TR::TreeEvaluator::VMmonentEvaluator(node, cg);1373}13741375TR::Register *J9::X86::TreeEvaluator::monexitEvaluator(TR::Node *node, TR::CodeGenerator *cg)1376{1377if (cg->enableRematerialisation() &&1378cg->supportsStaticMemoryRematerialization())1379TR::TreeEvaluator::removeLiveDiscardableStatics(cg);13801381return TR::TreeEvaluator::VMmonexitEvaluator(node, cg);1382}13831384TR::Register *J9::X86::TreeEvaluator::asynccheckEvaluator(TR::Node *node, TR::CodeGenerator *cg)1385{1386// Generate the test and branch for async message processing.1387//1388TR::Node *compareNode = node->getFirstChild();1389TR::Node *secondChild = compareNode->getSecondChild();1390TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);1391TR::Compilation *comp = cg->comp();13921393if (comp->getOption(TR_RTGCMapCheck))1394{1395TR::TreeEvaluator::asyncGCMapCheckPatching(node, cg, snippetLabel);1396}1397else1398{1399TR_ASSERT_FATAL(secondChild->getOpCode().isLoadConst(), "unrecognized asynccheck test: special async check value is not a constant");14001401TR::MemoryReference *mr = generateX86MemoryReference(compareNode->getFirstChild(), cg);1402if ((secondChild->getRegister() != NULL) ||1403(comp->target().is64Bit() && !IS_32BIT_SIGNED(secondChild->getLongInt())))1404{1405TR::Register *valueReg = cg->evaluate(secondChild);1406TR::X86CheckAsyncMessagesMemRegInstruction *ins =1407generateCheckAsyncMessagesInstruction(node, TR::InstOpCode::CMPMemReg(), mr, valueReg, cg);1408}1409else1410{1411int32_t value = secondChild->getInt();1412TR::InstOpCode::Mnemonic op = (value < 127 && value >= -128) ? TR::InstOpCode::CMPMemImms() : TR::InstOpCode::CMPMemImm4();1413TR::X86CheckAsyncMessagesMemImmInstruction *ins =1414generateCheckAsyncMessagesInstruction(node, op, mr, value, cg);1415}14161417mr->decNodeReferenceCounts(cg);1418cg->decReferenceCount(secondChild);1419}14201421TR::LabelSymbol *startControlFlowLabel = generateLabelSymbol(cg);1422TR::LabelSymbol *endControlFlowLabel = generateLabelSymbol(cg);14231424bool testIsEqual = compareNode->getOpCodeValue() == TR::icmpeq || compareNode->getOpCodeValue() == TR::lcmpeq;14251426TR_ASSERT(testIsEqual, "unrecognized asynccheck test: test is not equal");14271428startControlFlowLabel->setStartInternalControlFlow();1429generateLabelInstruction(TR::InstOpCode::label, node, startControlFlowLabel, cg);14301431generateLabelInstruction(testIsEqual ? TR::InstOpCode::JE4 : TR::InstOpCode::JNE4, node, snippetLabel, cg);14321433{1434TR_OutlinedInstructionsGenerator og(snippetLabel, node, cg);1435generateImmSymInstruction(TR::InstOpCode::CALLImm4, node, (uintptr_t)node->getSymbolReference()->getMethodAddress(), node->getSymbolReference(), cg)->setNeedsGCMap(0xFF00FFFF);1436generateLabelInstruction(TR::InstOpCode::JMP4, node, endControlFlowLabel, cg);1437og.endOutlinedInstructionSequence();1438}14391440endControlFlowLabel->setEndInternalControlFlow();1441generateLabelInstruction(TR::InstOpCode::label, node, endControlFlowLabel, cg);14421443cg->decReferenceCount(compareNode);14441445return NULL;1446}14471448// Handles newObject, newArray, anewArray1449//1450TR::Register *J9::X86::TreeEvaluator::newEvaluator(TR::Node *node, TR::CodeGenerator *cg)1451{1452TR::Compilation *comp = cg->comp();1453TR::Register *targetRegister = NULL;14541455if (TR::TreeEvaluator::requireHelperCallValueTypeAllocation(node, cg))1456{1457TR_OpaqueClassBlock *classInfo;1458bool spillFPRegs = comp->canAllocateInlineOnStack(node, classInfo) <= 0;1459return TR::TreeEvaluator::performHelperCall(node, NULL, TR::acall, spillFPRegs, cg);1460}14611462targetRegister = TR::TreeEvaluator::VMnewEvaluator(node, cg);1463if (!targetRegister)1464{1465// Inline object allocation wasn't generated, just generate a call to the helper.1466// If we know that the class is fully initialized, we don't have to spill1467// the FP registers.1468//1469TR_OpaqueClassBlock *classInfo;1470bool spillFPRegs = (comp->canAllocateInlineOnStack(node, classInfo) <= 0);1471targetRegister = TR::TreeEvaluator::performHelperCall(node, NULL, TR::acall, spillFPRegs, cg);1472}1473else if (cg->canEmitBreakOnDFSet())1474{1475// Check DF flag after inline new1476generateBreakOnDFSet(cg);1477}14781479return targetRegister;1480}14811482TR::Register *J9::X86::TreeEvaluator::multianewArrayEvaluator(TR::Node *node, TR::CodeGenerator *cg)1483{1484TR::Node *firstChild = node->getFirstChild();1485TR::Node *secondChild = node->getSecondChild();1486TR::Node *thirdChild = node->getThirdChild();14871488// 2-dimensional MultiANewArray1489TR::Compilation *comp = cg->comp();1490TR_ASSERT_FATAL(comp->target().is64Bit(), "multianewArrayEvaluator is only supported on 64-bit JVMs!");1491TR_J9VMBase *fej9 = static_cast<TR_J9VMBase *>(comp->fe());14921493TR::Register *dimsPtrReg = NULL;1494TR::Register *dimReg = NULL;1495TR::Register *classReg = NULL;1496TR::Register *firstDimLenReg = NULL;1497TR::Register *secondDimLenReg = NULL;1498TR::Register *targetReg = NULL;1499TR::Register *temp1Reg = NULL;1500TR::Register *temp2Reg = NULL;1501TR::Register *temp3Reg = NULL;1502TR::Register *componentClassReg = NULL;15031504TR::Register *vmThreadReg = cg->getVMThreadRegister();1505targetReg = cg->allocateRegister();1506firstDimLenReg = cg->allocateRegister();1507secondDimLenReg = cg->allocateRegister();1508temp1Reg = cg->allocateRegister();1509temp2Reg = cg->allocateRegister();1510temp3Reg = cg->allocateRegister();1511componentClassReg = cg->allocateRegister();15121513TR::LabelSymbol *startLabel = generateLabelSymbol(cg);1514TR::LabelSymbol *fallThru = generateLabelSymbol(cg);1515TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);1516TR::LabelSymbol *nonZeroFirstDimLabel = generateLabelSymbol(cg);1517startLabel->setStartInternalControlFlow();1518fallThru->setEndInternalControlFlow();15191520TR::LabelSymbol *oolFailLabel = generateLabelSymbol(cg);1521TR::LabelSymbol *oolJumpPoint = generateLabelSymbol(cg);15221523generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);15241525// Generate the heap allocation, and the snippet that will handle heap overflow.1526TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::acall, targetReg, oolFailLabel, fallThru, cg);1527cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);15281529dimReg = cg->evaluate(secondChild);15301531dimsPtrReg = cg->evaluate(firstChild);15321533classReg = cg->evaluate(thirdChild);15341535generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, secondDimLenReg,1536generateX86MemoryReference(dimsPtrReg, 0, cg), cg);1537// Load the 32-bit length value as a 64-bit value so that the top half of the register1538// can be zeroed out. This will allow us to treat the value as 64-bit when performing1539// calculations later on.1540generateRegMemInstruction(TR::InstOpCode::MOVSXReg8Mem4, node, firstDimLenReg,1541generateX86MemoryReference(dimsPtrReg, 4, cg), cg);15421543generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, secondDimLenReg, 0, cg);15441545generateLabelInstruction(TR::InstOpCode::JNE4, node, oolJumpPoint, cg);1546// Second Dim length is 015471548generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, firstDimLenReg, 0, cg);1549generateLabelInstruction(TR::InstOpCode::JNE4, node, nonZeroFirstDimLabel, cg);15501551// First Dim zero, only allocate 1 zero-length object array1552generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, targetReg, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg), cg);15531554// Take into account alignment requirements for the size of the zero-length array header1555int32_t zeroArraySizeAligned = OMR::align(TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), TR::Compiler->om.getObjectAlignmentInBytes());1556generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, temp1Reg, generateX86MemoryReference(targetReg, zeroArraySizeAligned, cg), cg);15571558generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, temp1Reg, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapTop), cg), cg);1559generateLabelInstruction(TR::InstOpCode::JA4, node, oolJumpPoint, cg);1560generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg), temp1Reg, cg);15611562// Init class1563bool use64BitClasses = comp->target().is64Bit() && !TR::Compiler->om.generateCompressedObjectHeaders();1564generateMemRegInstruction(TR::InstOpCode::SMemReg(use64BitClasses), node, generateX86MemoryReference(targetReg, TR::Compiler->om.offsetOfObjectVftField(), cg), classReg, cg);15651566// Init size and '0' fields to 01567generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(targetReg, fej9->getOffsetOfContiguousArraySizeField(), cg), 0, cg);1568generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg), 0, cg);15691570generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);15711572//First dim length not 01573generateLabelInstruction(TR::InstOpCode::label, node, nonZeroFirstDimLabel, cg);15741575generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, componentClassReg,1576generateX86MemoryReference(classReg, offsetof(J9ArrayClass, componentType), cg), cg);15771578int32_t elementSize = TR::Compiler->om.sizeofReferenceField();15791580uintptr_t maxObjectSize = cg->getMaxObjectSizeGuaranteedNotToOverflow();1581uintptr_t maxObjectSizeInElements = maxObjectSize / elementSize;1582generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, firstDimLenReg, static_cast<int32_t>(maxObjectSizeInElements), cg);15831584// Must be an unsigned comparison on sizes.1585generateLabelInstruction(TR::InstOpCode::JAE4, node, oolJumpPoint, cg);15861587generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, temp1Reg, firstDimLenReg, cg);15881589int32_t elementSizeAligned = OMR::align(elementSize, TR::Compiler->om.getObjectAlignmentInBytes());1590int32_t alignmentCompensation = (elementSize == elementSizeAligned) ? 0 : elementSizeAligned - 1;15911592TR_ASSERT_FATAL(elementSize <= 8, "multianewArrayEvaluator - elementSize cannot be greater than 8!");1593generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, temp1Reg, TR::MemoryReference::convertMultiplierToStride(elementSize), cg);1594generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), node, temp1Reg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+alignmentCompensation, cg);15951596if (alignmentCompensation != 0)1597{1598generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, temp1Reg, -elementSizeAligned, cg);1599}16001601TR_ASSERT_FATAL(zeroArraySizeAligned >= 0 && zeroArraySizeAligned <= 127, "discontiguousArrayHeaderSizeInBytes cannot be > 127 for IMulRegRegImms instruction");1602generateRegRegImmInstruction(TR::InstOpCode::IMULRegRegImm4(), node, temp2Reg, firstDimLenReg, zeroArraySizeAligned, cg);16031604// temp2Reg = temp2Reg + temp1Reg1605generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, temp2Reg, temp1Reg, cg);16061607generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, targetReg, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg), cg);1608// temp2Reg = temp2Reg + J9VMThread->heapAlloc1609generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, temp2Reg, targetReg, cg);16101611generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, temp2Reg, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapTop), cg), cg);1612generateLabelInstruction(TR::InstOpCode::JA4, node, oolJumpPoint, cg);1613generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg), temp2Reg, cg);16141615//init 1st dim array class field1616generateMemRegInstruction(TR::InstOpCode::SMemReg(use64BitClasses), node, generateX86MemoryReference(targetReg, TR::Compiler->om.offsetOfObjectVftField(), cg), classReg, cg);1617// Init 1st dim array size field1618generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, generateX86MemoryReference(targetReg, fej9->getOffsetOfContiguousArraySizeField(), cg), firstDimLenReg, cg);16191620// temp2 point to end of 1st dim array i.e. start of 2nd dim1621generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, temp2Reg, targetReg, cg);1622generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, temp2Reg, temp1Reg, cg);1623// temp1 points to 1st dim array past header1624generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, temp1Reg, generateX86MemoryReference(targetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);16251626//loop start1627generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);1628// Init 2nd dim element's class1629generateMemRegInstruction(TR::InstOpCode::SMemReg(use64BitClasses), node, generateX86MemoryReference(temp2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), componentClassReg, cg);1630// Init 2nd dim element's size and '0' fields to 01631generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(temp2Reg, fej9->getOffsetOfContiguousArraySizeField(), cg), 0, cg);1632generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg), 0, cg);1633// Store 2nd dim element into 1st dim array slot, compress temp2 if needed1634if (comp->target().is64Bit() && comp->useCompressedPointers())1635{1636int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();1637generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, temp3Reg, temp2Reg, cg);1638if (shiftAmount != 0)1639{1640generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, temp3Reg, shiftAmount, cg);1641}1642generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, generateX86MemoryReference(temp1Reg, 0, cg), temp3Reg, cg);1643}1644else1645{1646generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(temp1Reg, 0, cg), temp2Reg, cg);1647}16481649// Advance cursors temp1 and temp21650generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, temp2Reg, zeroArraySizeAligned, cg);1651generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, temp1Reg, elementSize, cg);16521653generateRegInstruction(TR::InstOpCode::DEC4Reg, node, firstDimLenReg, cg);1654generateLabelInstruction(TR::InstOpCode::JA4, node, loopLabel, cg);1655generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);16561657TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 13, cg);16581659deps->addPostCondition(dimsPtrReg, TR::RealRegister::NoReg, cg);1660deps->addPostCondition(dimReg, TR::RealRegister::NoReg, cg);1661deps->addPostCondition(classReg, TR::RealRegister::NoReg, cg);16621663deps->addPostCondition(firstDimLenReg, TR::RealRegister::NoReg, cg);1664deps->addPostCondition(secondDimLenReg, TR::RealRegister::NoReg, cg);1665deps->addPostCondition(temp1Reg, TR::RealRegister::NoReg, cg);1666deps->addPostCondition(temp2Reg, TR::RealRegister::NoReg, cg);1667deps->addPostCondition(temp3Reg, TR::RealRegister::NoReg, cg);1668deps->addPostCondition(componentClassReg, TR::RealRegister::NoReg, cg);16691670deps->addPostCondition(targetReg, TR::RealRegister::eax, cg);1671deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);16721673TR::Node *callNode = outlinedHelperCall->getCallNode();1674TR::Register *reg;16751676if (callNode->getFirstChild() == node->getFirstChild())1677{1678reg = callNode->getFirstChild()->getRegister();1679if (reg)1680deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);1681}16821683if (callNode->getSecondChild() == node->getSecondChild())1684{1685reg = callNode->getSecondChild()->getRegister();1686if (reg)1687deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);1688}16891690if (callNode->getThirdChild() == node->getThirdChild())1691{1692reg = callNode->getThirdChild()->getRegister();1693if (reg)1694deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);1695}16961697deps->stopAddingConditions();16981699generateLabelInstruction(TR::InstOpCode::label, node, oolJumpPoint, cg);1700generateLabelInstruction(TR::InstOpCode::JMP4, node, oolFailLabel, cg);17011702generateLabelInstruction(TR::InstOpCode::label, node, fallThru, deps, cg);17031704// Copy the newly allocated object into a collected reference register now that it is a valid object.1705//1706TR::Register *targetReg2 = cg->allocateCollectedReferenceRegister();1707TR::RegisterDependencyConditions *deps2 = generateRegisterDependencyConditions(0, 1, cg);1708deps2->addPostCondition(targetReg2, TR::RealRegister::eax, cg);1709generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, targetReg2, targetReg, deps2, cg);1710cg->stopUsingRegister(targetReg);1711targetReg = targetReg2;17121713cg->stopUsingRegister(firstDimLenReg);1714cg->stopUsingRegister(secondDimLenReg);1715cg->stopUsingRegister(temp1Reg);1716cg->stopUsingRegister(temp2Reg);1717cg->stopUsingRegister(temp3Reg);1718cg->stopUsingRegister(componentClassReg);17191720// Decrement use counts on the children1721//1722cg->decReferenceCount(node->getFirstChild());1723cg->decReferenceCount(node->getSecondChild());1724cg->decReferenceCount(node->getThirdChild());17251726node->setRegister(targetReg);1727return targetReg;1728}17291730TR::Register *J9::X86::TreeEvaluator::arraycopyEvaluator(TR::Node *node, TR::CodeGenerator *cg)1731{1732if (cg->canEmitBreakOnDFSet())1733generateBreakOnDFSet(cg);17341735TR::Compilation *comp = cg->comp();17361737if (!node->isReferenceArrayCopy())1738{1739return OMR::TreeEvaluatorConnector::arraycopyEvaluator(node, cg);1740}17411742auto srcObjReg = cg->evaluate(node->getChild(0));1743auto dstObjReg = cg->evaluate(node->getChild(1));1744auto srcReg = cg->evaluate(node->getChild(2));1745auto dstReg = cg->evaluate(node->getChild(3));1746auto sizeReg = cg->evaluate(node->getChild(4));17471748if (comp->target().is64Bit() && !TR::TreeEvaluator::getNodeIs64Bit(node->getChild(4), cg))1749{1750generateRegRegInstruction(TR::InstOpCode::MOVZXReg8Reg4, node, sizeReg, sizeReg, cg);1751}17521753if (!node->isNoArrayStoreCheckArrayCopy())1754{1755// Nothing to optimize, simply call jitReferenceArrayCopy helper1756auto deps = generateRegisterDependencyConditions((uint8_t)3, 3, cg);1757deps->addPreCondition(srcReg, TR::RealRegister::esi, cg);1758deps->addPreCondition(dstReg, TR::RealRegister::edi, cg);1759deps->addPreCondition(sizeReg, TR::RealRegister::ecx, cg);1760deps->addPostCondition(srcReg, TR::RealRegister::esi, cg);1761deps->addPostCondition(dstReg, TR::RealRegister::edi, cg);1762deps->addPostCondition(sizeReg, TR::RealRegister::ecx, cg);17631764generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), srcObjReg, cg);1765generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp2), cg), dstObjReg, cg);1766generateHelperCallInstruction(node, TR_referenceArrayCopy, deps, cg)->setNeedsGCMap(0xFF00FFFF);17671768auto snippetLabel = generateLabelSymbol(cg);1769auto instr = generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg); // ReferenceArrayCopy set ZF when succeed.1770auto snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, cg->symRefTab()->findOrCreateRuntimeHelper(TR_arrayStoreException),1771snippetLabel, instr, false);1772cg->addSnippet(snippet);1773}1774else1775{1776bool use64BitClasses = comp->target().is64Bit() && !TR::Compiler->om.generateCompressedObjectHeaders();17771778auto RSI = cg->allocateRegister();1779auto RDI = cg->allocateRegister();1780auto RCX = cg->allocateRegister();17811782generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, RSI, srcReg, cg);1783generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, RDI, dstReg, cg);1784generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, RCX, sizeReg, cg);17851786auto deps = generateRegisterDependencyConditions((uint8_t)5, 5, cg);1787deps->addPreCondition(RSI, TR::RealRegister::esi, cg);1788deps->addPreCondition(RDI, TR::RealRegister::edi, cg);1789deps->addPreCondition(RCX, TR::RealRegister::ecx, cg);1790deps->addPreCondition(srcObjReg, TR::RealRegister::NoReg, cg);1791deps->addPreCondition(dstObjReg, TR::RealRegister::NoReg, cg);1792deps->addPostCondition(RSI, TR::RealRegister::esi, cg);1793deps->addPostCondition(RDI, TR::RealRegister::edi, cg);1794deps->addPostCondition(RCX, TR::RealRegister::ecx, cg);1795deps->addPostCondition(srcObjReg, TR::RealRegister::NoReg, cg);1796deps->addPostCondition(dstObjReg, TR::RealRegister::NoReg, cg);17971798auto begLabel = generateLabelSymbol(cg);1799auto endLabel = generateLabelSymbol(cg);1800begLabel->setStartInternalControlFlow();1801endLabel->setEndInternalControlFlow();18021803generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);18041805if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none)1806{1807bool use64BitClasses = comp->target().is64Bit() && !comp->useCompressedPointers();18081809TR::LabelSymbol* rdbarLabel = generateLabelSymbol(cg);1810// EvacuateTopAddress == 0 means Concurrent Scavenge is inactive1811generateMemImmInstruction(TR::InstOpCode::CMPMemImms(use64BitClasses), node, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateTopAddressOffset(), cg), 0, cg);1812generateLabelInstruction(TR::InstOpCode::JNE4, node, rdbarLabel, cg);18131814TR_OutlinedInstructionsGenerator og(rdbarLabel, node, cg);1815generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), srcObjReg, cg);1816generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp2), cg), dstObjReg, cg);1817generateHelperCallInstruction(node, TR_referenceArrayCopy, NULL, cg)->setNeedsGCMap(0xFF00FFFF);1818generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);1819og.endOutlinedInstructionSequence();1820}1821if (!node->isForwardArrayCopy())1822{1823TR::LabelSymbol* backwardLabel = generateLabelSymbol(cg);18241825generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, RDI, RSI, cg); // dst = dst - src1826generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, RDI, RCX, cg); // cmp dst, size1827generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, RDI, generateX86MemoryReference(RDI, RSI, 0, cg), cg); // dst = dst + src1828generateLabelInstruction(TR::InstOpCode::JB4, node, backwardLabel, cg); // jb, skip backward copy setup18291830TR_OutlinedInstructionsGenerator og(backwardLabel, node, cg);1831generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, RSI, generateX86MemoryReference(RSI, RCX, 0, -TR::Compiler->om.sizeofReferenceField(), cg), cg);1832generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, RDI, generateX86MemoryReference(RDI, RCX, 0, -TR::Compiler->om.sizeofReferenceField(), cg), cg);1833generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, RCX, use64BitClasses ? 3 : 2, cg);1834generateInstruction(TR::InstOpCode::STD, node, cg);1835generateInstruction(use64BitClasses ? TR::InstOpCode::REPMOVSQ : TR::InstOpCode::REPMOVSD, node, cg);1836generateInstruction(TR::InstOpCode::CLD, node, cg);1837generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);1838og.endOutlinedInstructionSequence();1839}1840generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, RCX, use64BitClasses ? 3 : 2, cg);1841generateInstruction(use64BitClasses ? TR::InstOpCode::REPMOVSQ : TR::InstOpCode::REPMOVSD, node, cg);1842generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);18431844cg->stopUsingRegister(RSI);1845cg->stopUsingRegister(RDI);1846cg->stopUsingRegister(RCX);18471848TR::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(node, node->getChild(1), NULL, NULL, cg->generateScratchRegisterManager(), cg);1849}18501851for (int32_t i = 0; i < node->getNumChildren(); i++)1852{1853cg->decReferenceCount(node->getChild(i));1854}1855return NULL;1856}18571858TR::Register *J9::X86::TreeEvaluator::arraylengthEvaluator(TR::Node *node, TR::CodeGenerator *cg)1859{1860TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());1861// MOV R, [B + contiguousSize]1862// TEST R, R1863// CMOVE R, [B + discontiguousSize]1864//1865TR::Register *objectReg = cg->evaluate(node->getFirstChild());1866TR::Register *lengthReg = cg->allocateRegister();18671868TR::MemoryReference *contiguousArraySizeMR =1869generateX86MemoryReference(objectReg, fej9->getOffsetOfContiguousArraySizeField(), cg);18701871TR::MemoryReference *discontiguousArraySizeMR =1872generateX86MemoryReference(objectReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg);18731874generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, lengthReg, contiguousArraySizeMR, cg);1875generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, lengthReg, lengthReg, cg);1876generateRegMemInstruction(TR::InstOpCode::CMOVE4RegMem, node, lengthReg, discontiguousArraySizeMR, cg);18771878cg->decReferenceCount(node->getFirstChild());1879node->setRegister(lengthReg);1880return lengthReg;1881}18821883TR::Register *J9::X86::TreeEvaluator::exceptionRangeFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg)1884{1885generateFenceInstruction(TR::InstOpCode::fence, node, node, cg);1886return NULL;1887}188818891890TR::Register *J9::X86::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(1891TR::Node *node,1892bool needResolution,1893TR::CodeGenerator *cg)1894{1895TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());1896static bool disableBranchlessPassThroughNULLCHK = feGetEnv("TR_disableBranchlessPassThroughNULLCHK") != NULL;1897// NOTE:1898//1899// If no code is generated for the null check, just evaluate the1900// child and decrement its use count UNLESS the child is a pass-through node1901// in which case some kind of explicit test or indirect load must be generated1902// to force the null check at this point.1903//1904TR::Node *firstChild = node->getFirstChild();1905TR::Node *reference = NULL;1906TR::Compilation *comp = cg->comp();19071908bool usingCompressedPointers = false;19091910if (comp->useCompressedPointers() &&1911firstChild->getOpCodeValue() == TR::l2a)1912{1913// pattern match the sequence under the l2a1914// NULLCHK NULLCHK <- node1915// aloadi f l2a1916// aload O ladd1917// lshl1918// i2l1919// iloadi/irdbari f <- firstChild1920// aload O <- reference1921// iconst shftKonst1922// lconst HB1923//1924usingCompressedPointers = true;19251926TR::ILOpCodes loadOp = comp->il.opCodeForIndirectLoad(TR::Int32);1927TR::ILOpCodes rdbarOp = comp->il.opCodeForIndirectReadBarrier(TR::Int32);1928while (firstChild->getOpCodeValue() != loadOp && firstChild->getOpCodeValue() != rdbarOp)1929firstChild = firstChild->getFirstChild();1930reference = firstChild->getFirstChild();1931}1932else1933reference = node->getNullCheckReference();19341935TR::ILOpCode &opCode = firstChild->getOpCode();19361937// Skip the NULLCHK for TR::loadaddr nodes.1938//1939if (reference->getOpCodeValue() == TR::loadaddr)1940{1941if (usingCompressedPointers)1942firstChild = node->getFirstChild();1943cg->evaluate(firstChild);1944cg->decReferenceCount(firstChild);1945return NULL;1946}19471948bool needExplicitCheck = true;1949bool needLateEvaluation = true;19501951// Add the explicit check after this instruction1952//1953TR::Instruction *appendTo = 0;19541955if (opCode.isLoadVar() || (comp->target().is64Bit() && opCode.getOpCodeValue()==TR::l2i))1956{1957TR::SymbolReference *symRef = NULL;19581959if (opCode.getOpCodeValue()==TR::l2i)1960{1961symRef = firstChild->getFirstChild()->getSymbolReference();1962}1963else1964symRef = firstChild->getSymbolReference();19651966if (symRef &&1967(symRef->getSymbol()->getOffset() + symRef->getOffset() < cg->getNumberBytesReadInaccessible()))1968{1969needExplicitCheck = false;19701971// If the child is an arraylength which has been reduced to an iiload,1972// and is only going to be used immediately in a bound check then combine the checks.1973//1974TR::TreeTop *nextTreeTop = cg->getCurrentEvaluationTreeTop()->getNextTreeTop();1975if (firstChild->getReferenceCount() == 2 && nextTreeTop)1976{1977TR::Node *nextTopNode = nextTreeTop->getNode();19781979if (nextTopNode)1980{1981if (nextTopNode->getOpCode().isBndCheck() || nextTopNode->getOpCode().isSpineCheck())1982{1983bool doIt = false;19841985if (nextTopNode->getOpCodeValue() == TR::SpineCHK)1986{1987// Implicit NULLCHKs and SpineCHKs can be merged if the base array1988// is the same.1989//1990if (firstChild->getOpCode().isIndirect() && firstChild->getOpCode().isLoadVar())1991{1992if (nextTopNode->getChild(1) == firstChild->getFirstChild())1993doIt = true;1994}1995}1996else1997{1998int32_t arrayLengthChildNum = (nextTopNode->getOpCodeValue() == TR::BNDCHKwithSpineCHK) ? 2 : 0;19992000if (nextTopNode->getChild(arrayLengthChildNum) == firstChild)2001doIt = true;2002}20032004if (doIt &&2005performTransformation(comp,2006"\nMerging NULLCHK [" POINTER_PRINTF_FORMAT "] and BNDCHK/SpineCHK [" POINTER_PRINTF_FORMAT "] of load child [" POINTER_PRINTF_FORMAT "]\n",2007node, nextTopNode, firstChild))2008{2009needLateEvaluation = false;2010nextTopNode->setHasFoldedImplicitNULLCHK(true);2011}2012}2013else if (nextTopNode->getOpCode().isIf() &&2014nextTopNode->isNonoverriddenGuard() &&2015nextTopNode->getFirstChild() == firstChild)2016{2017needLateEvaluation = false;2018needExplicitCheck = true;2019reference->incReferenceCount(); // will be decremented again later2020}2021}2022}2023}2024else if (firstChild->getReferenceCount() == 1 && !firstChild->getSymbolReference()->isUnresolved())2025{2026// If the child is only used here, we don't need to evaluate it2027// since all we need is the grandchild which will be evaluated by2028// the generation of the explicit check below.2029//2030needLateEvaluation = false;20312032// at this point, firstChild is the raw iiload (created by lowerTrees) and2033// reference is the aload of the object. node->getFirstChild is the2034// l2a sequence; as a result, firstChild's refCount will always be 12035// and node->getFirstChild's refCount will be at least 2 (one under the nullchk2036// and the other under the translate treetop)2037//2038if (usingCompressedPointers && node->getFirstChild()->getReferenceCount() >= 2)2039needLateEvaluation = true;2040}2041}2042else if (opCode.isStore())2043{2044TR::SymbolReference *symRef = firstChild->getSymbolReference();2045if (symRef &&2046symRef->getSymbol()->getOffset() + symRef->getOffset() < cg->getNumberBytesWriteInaccessible())2047{2048needExplicitCheck = false;2049}2050}2051else if (opCode.isCall() &&2052opCode.isIndirect() &&2053cg->getNumberBytesReadInaccessible() > TR::Compiler->om.offsetOfObjectVftField())2054{2055needExplicitCheck = false;2056}2057else if (opCode.getOpCodeValue() == TR::monent ||2058opCode.getOpCodeValue() == TR::monexit)2059{2060// The child may generate inline code that provides an implicit null check2061// but we won't know until the child is evaluated.2062//2063reference->incReferenceCount(); // will be decremented again later2064needLateEvaluation = false;2065cg->evaluate(reference);2066appendTo = cg->getAppendInstruction();2067cg->evaluate(firstChild);20682069// TODO: this shouldn't be getOffsetOfContiguousArraySizeField2070//2071if (cg->getImplicitExceptionPoint() &&2072cg->getNumberBytesReadInaccessible() > fej9->getOffsetOfContiguousArraySizeField())2073{2074needExplicitCheck = false;2075cg->decReferenceCount(reference);2076}2077}2078else if (!disableBranchlessPassThroughNULLCHK && opCode.getOpCodeValue () == TR::PassThrough2079&& !needResolution && cg->getHasResumableTrapHandler())2080{2081TR::Register *refRegister = cg->evaluate(firstChild);2082needLateEvaluation = false;20832084if (refRegister)2085{2086if (!appendTo)2087appendTo = cg->getAppendInstruction();2088if (cg->getNumberBytesReadInaccessible() > 0)2089{2090needExplicitCheck = false;2091TR::MemoryReference *memRef = NULL;2092if (TR::Compiler->om.compressedReferenceShift() > 02093&& firstChild->getType() == TR::Address2094&& firstChild->getOpCode().hasSymbolReference()2095&& firstChild->getSymbol()->isCollectedReference())2096{2097memRef = generateX86MemoryReference(NULL, refRegister, TR::Compiler->om.compressedReferenceShift(), 0, cg);2098}2099else2100{2101memRef = generateX86MemoryReference(refRegister, 0, cg);2102}2103appendTo = generateMemImmInstruction(appendTo, TR::InstOpCode::TEST1MemImm1, memRef, 0, cg);2104cg->setImplicitExceptionPoint(appendTo);2105}2106}2107}21082109// Generate the code for the null check.2110//2111if (needExplicitCheck)2112{2113// TODO - If a resolve check is needed as well, the resolve must be done2114// before the null check, so that exceptions are handled in the correct2115// order.2116//2117///// if (needResolution)2118///// {2119///// ...2120///// }21212122// Avoid loading the grandchild into a register if it is not going to be used again.2123//2124if (opCode.getOpCodeValue() == TR::PassThrough &&2125reference->getOpCode().isLoadVar() &&2126reference->getRegister() == NULL &&2127reference->getReferenceCount() == 1)2128{2129TR::MemoryReference *tempMR = generateX86MemoryReference(reference, cg);21302131if (!appendTo)2132appendTo = cg->getAppendInstruction();21332134TR::InstOpCode::Mnemonic op = TR::InstOpCode::CMPMemImms();2135appendTo = generateMemImmInstruction(appendTo, op, tempMR, NULLVALUE, cg);2136tempMR->decNodeReferenceCounts(cg);2137needLateEvaluation = false;2138}2139else2140{2141TR::Register *targetRegister = cg->evaluate(reference);21422143if (!appendTo)2144appendTo = cg->getAppendInstruction();21452146appendTo = generateRegRegInstruction(appendTo, TR::InstOpCode::TESTRegReg(), targetRegister, targetRegister, cg);2147}21482149TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);2150appendTo = generateLabelInstruction(appendTo, TR::InstOpCode::JE4, snippetLabel, cg);2151//the _node field should point to the current node2152appendTo->setNode(node);2153appendTo->setLiveLocals(cg->getLiveLocals());21542155TR::Snippet *snippet;2156if (opCode.isCall() || !needResolution || comp->target().is64Bit()) //TODO:AMD64: Implement the "withresolve" version2157{2158snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(),2159snippetLabel, appendTo);2160}2161else2162{2163TR_RuntimeHelper resolverCall;2164TR::Machine *machine = cg->machine();2165TR::Symbol * firstChildSym = firstChild->getSymbolReference()->getSymbol();21662167if (firstChildSym->isShadow())2168{2169resolverCall = opCode.isStore() ?2170TR_X86interpreterUnresolvedFieldSetterGlue : TR_X86interpreterUnresolvedFieldGlue;2171}2172else if (firstChildSym->isClassObject())2173{2174resolverCall = firstChildSym->addressIsCPIndexOfStatic() ?2175TR_X86interpreterUnresolvedClassFromStaticFieldGlue : TR_X86interpreterUnresolvedClassGlue;2176}2177else if (firstChildSym->isConstString())2178{2179resolverCall = TR_X86interpreterUnresolvedStringGlue;2180}2181else if (firstChildSym->isConstMethodType())2182{2183resolverCall = TR_interpreterUnresolvedMethodTypeGlue;2184}2185else if (firstChildSym->isConstMethodHandle())2186{2187resolverCall = TR_interpreterUnresolvedMethodHandleGlue;2188}2189else if (firstChildSym->isCallSiteTableEntry())2190{2191resolverCall = TR_interpreterUnresolvedCallSiteTableEntryGlue;2192}2193else if (firstChildSym->isMethodTypeTableEntry())2194{2195resolverCall = TR_interpreterUnresolvedMethodTypeTableEntryGlue;2196}2197else2198{2199resolverCall = opCode.isStore() ?2200TR_X86interpreterUnresolvedStaticFieldSetterGlue : TR_X86interpreterUnresolvedStaticFieldGlue;2201}22022203snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippetWithResolve(cg, node->getSymbolReference(),2204firstChild->getSymbolReference(),2205resolverCall,2206snippetLabel,2207appendTo);22082209((TR::X86CheckFailureSnippetWithResolve *)(snippet))->setNumLiveX87Registers(machine->fpGetNumberOfLiveFPRs());2210((TR::X86CheckFailureSnippetWithResolve *)(snippet))->setHasLiveXMMRs();2211}22122213cg->addSnippet(snippet);2214}22152216// If we need to evaluate the child, do so. Otherwise, if we have2217// evaluated the reference node, then decrement its use count.2218// The use count of the child is decremented when we are done2219// evaluating the NULLCHK.2220//2221if (needLateEvaluation)2222{2223cg->evaluate(node->getFirstChild());2224}2225else if (needExplicitCheck)2226{2227cg->decReferenceCount(reference);2228}22292230if (comp->useCompressedPointers())2231cg->decReferenceCount(node->getFirstChild());2232else2233cg->decReferenceCount(firstChild);22342235// If an explicit check has not been generated for the null check, there is2236// an instruction that will cause a hardware trap if the exception is to be2237// taken. If this method may catch the exception, a GC stack map must be2238// created for this instruction. All registers are valid at this GC point2239// TODO - if the method may not catch the exception we still need to note2240// that the GC point exists, since maps before this point and after it cannot2241// be merged.2242//2243if (!needExplicitCheck)2244{2245TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();2246if (faultingInstruction)2247{2248faultingInstruction->setNeedsGCMap(0xFF00FFFF);2249faultingInstruction->setNode(node);2250}2251}22522253TR::Node *n = NULL;2254if (comp->useCompressedPointers() &&2255reference->getOpCodeValue() == TR::l2a)2256{2257reference->setIsNonNull(true);2258n = reference->getFirstChild();2259TR::ILOpCodes loadOp = comp->il.opCodeForIndirectLoad(TR::Int32);2260TR::ILOpCodes rdbarOp = comp->il.opCodeForIndirectReadBarrier(TR::Int32);2261while (n->getOpCodeValue() != loadOp && n->getOpCodeValue() != rdbarOp)2262{2263n->setIsNonZero(true);2264n = n->getFirstChild();2265}2266n->setIsNonZero(true);2267}22682269reference->setIsNonNull(true);22702271return NULL;2272}22732274TR::Register *J9::X86::TreeEvaluator::NULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)2275{2276return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, false, cg);2277}22782279TR::Register *J9::X86::TreeEvaluator::resolveAndNULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)2280{2281return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, true, cg);2282}228322842285// Generate explicit checks for division by zero and division2286// overflow (i.e. 0x80000000 / 0xFFFFFFFF), if necessary.2287//2288TR::Register *J9::X86::TreeEvaluator::DIVCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)2289{2290bool hasConversion;2291TR::Node *divisionNode = node->getFirstChild();2292TR::Compilation *comp = cg->comp();22932294TR::ILOpCodes op = divisionNode->getOpCodeValue();22952296if (op == TR::iu2l ||2297op == TR::bu2i ||2298op == TR::bu2l ||2299op == TR::bu2s ||2300op == TR::su2i ||2301op == TR::su2l)2302{2303divisionNode = divisionNode->getFirstChild();2304hasConversion = true;2305}2306else2307hasConversion = false;23082309bool use64BitRegisters = comp->target().is64Bit() && divisionNode->getOpCode().isLong();2310bool useRegisterPairs = comp->target().is32Bit() && divisionNode->getOpCode().isLong();23112312// Not all targets support implicit division checks, so we generate explicit2313// tests and snippets to jump to.2314//2315bool platformNeedsExplicitCheck = !cg->enableImplicitDivideCheck();23162317// Only do this for TR::ldiv/TR::lrem and TR::idiv/TR::irem by non-constant2318// divisors, or by a constant of zero.2319// Other constant divisors are optimized in signedIntegerDivOrRemAnalyser,2320// and do not cause hardware exceptions.2321//2322bool operationNeedsCheck = (divisionNode->getOpCode().isInt() &&2323(!divisionNode->getSecondChild()->getOpCode().isLoadConst() || divisionNode->getSecondChild()->getInt() == 0));2324if (use64BitRegisters)2325{2326operationNeedsCheck = operationNeedsCheck |2327((!divisionNode->getSecondChild()->getOpCode().isLoadConst() || divisionNode->getSecondChild()->getLongInt() == 0));2328}2329else2330{2331operationNeedsCheck = operationNeedsCheck | useRegisterPairs;2332}23332334if (platformNeedsExplicitCheck && operationNeedsCheck)2335{2336TR::Register *dividendReg = cg->evaluate(divisionNode->getFirstChild());2337TR::Register *divisorReg = cg->evaluate(divisionNode->getSecondChild());23382339TR::LabelSymbol *startLabel = generateLabelSymbol(cg);2340TR::LabelSymbol *divisionLabel = generateLabelSymbol(cg);2341TR::LabelSymbol *divideByZeroSnippetLabel = generateLabelSymbol(cg);2342TR::LabelSymbol *restartLabel = generateLabelSymbol(cg);23432344// These instructions are dissected in the divide check snippet to determine2345// the source registers. If they or their format are changed, you may need to2346// change the snippet(s) also.2347//2348TR::X86RegRegInstruction *lowDivisorTestInstr;2349TR::X86RegRegInstruction *highDivisorTestInstr;23502351startLabel->setStartInternalControlFlow();2352restartLabel->setEndInternalControlFlow();23532354generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);23552356if (useRegisterPairs)2357{2358TR::Register *tempReg = cg->allocateRegister(TR_GPR);2359lowDivisorTestInstr = generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, tempReg, divisorReg->getLowOrder(), cg);2360highDivisorTestInstr = generateRegRegInstruction(TR::InstOpCode::OR4RegReg, node, tempReg, divisorReg->getHighOrder(), cg);2361generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, tempReg, tempReg, cg);2362cg->stopUsingRegister(tempReg);2363}2364else2365lowDivisorTestInstr = generateRegRegInstruction(TR::InstOpCode::TESTRegReg(use64BitRegisters), node, divisorReg, divisorReg, cg);23662367generateLabelInstruction(TR::InstOpCode::JE4, node, divideByZeroSnippetLabel, cg);23682369cg->addSnippet(new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(),2370divideByZeroSnippetLabel,2371cg->getAppendInstruction()));23722373generateLabelInstruction(TR::InstOpCode::label, node, divisionLabel, cg);23742375TR::Register *resultRegister = cg->evaluate(divisionNode);23762377if (!hasConversion)2378cg->decReferenceCount(divisionNode);23792380// We need to make sure that any spilling occurs only after restartLabel,2381// otherwise the divide check snippet may store into the wrong register.2382//2383TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t) 0, 2, cg);2384TR::Register *scratchRegister;23852386if (useRegisterPairs)2387{2388deps->addPostCondition(resultRegister->getLowOrder(), TR::RealRegister::eax, cg);2389deps->addPostCondition(resultRegister->getHighOrder(), TR::RealRegister::edx, cg);2390}2391else switch(divisionNode->getOpCodeValue())2392{2393case TR::idiv:2394case TR::ldiv:2395deps->addPostCondition(resultRegister, TR::RealRegister::eax, cg);2396scratchRegister = cg->allocateRegister(TR_GPR);2397deps->addPostCondition(scratchRegister, TR::RealRegister::edx, cg);2398cg->stopUsingRegister(scratchRegister);2399break;24002401case TR::irem:2402case TR::lrem:2403deps->addPostCondition(resultRegister, TR::RealRegister::edx, cg);2404scratchRegister = cg->allocateRegister(TR_GPR);2405deps->addPostCondition(scratchRegister, TR::RealRegister::eax, cg);2406cg->stopUsingRegister(scratchRegister);2407break;24082409default:2410TR_ASSERT(0, "bad division opcode for DIVCHK\n");2411}24122413generateLabelInstruction(TR::InstOpCode::label, node, restartLabel, deps, cg);24142415if (hasConversion)2416{2417cg->evaluate(node->getFirstChild());2418cg->decReferenceCount(node->getFirstChild());2419}2420}2421else2422{2423cg->evaluate(node->getFirstChild());2424cg->decReferenceCount(node->getFirstChild());24252426// There may be an instruction that will cause a hardware trap if an exception2427// is to be taken.2428// If this method may catch the exception, a GC stack map must be created for2429// this instruction. All registers are valid at this GC point2430//2431// TODO: if the method may not catch the exception we still need to note2432// that the GC point exists, since maps before this point and after it cannot2433// be merged.2434//2435TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();2436if (faultingInstruction)2437faultingInstruction->setNeedsGCMap(0xFF00FFFF);2438}24392440return NULL;2441}244224432444static bool isInteger(TR::ILOpCode &op, TR::CodeGenerator *cg)2445{2446if (cg->comp()->target().is64Bit())2447return op.isIntegerOrAddress();2448else2449return op.isIntegerOrAddress() && (op.getSize() <= 4);2450}245124522453static TR::InstOpCode::Mnemonic branchOpCodeForCompare(TR::ILOpCode &op, bool opposite=false)2454{2455int32_t index = 0;2456if (op.isCompareTrueIfLess())2457index += 1;2458if (op.isCompareTrueIfGreater())2459index += 2;2460if (op.isCompareTrueIfEqual())2461index += 4;2462if (op.isUnsignedCompare())2463index += 8;24642465if (opposite)2466index ^= 7;24672468static const TR::InstOpCode::Mnemonic opTable[] =2469{2470TR::InstOpCode::bad, TR::InstOpCode::JL4, TR::InstOpCode::JG4, TR::InstOpCode::JNE4,2471TR::InstOpCode::JE4, TR::InstOpCode::JLE4, TR::InstOpCode::JGE4, TR::InstOpCode::bad,2472TR::InstOpCode::bad, TR::InstOpCode::JB4, TR::InstOpCode::JA4, TR::InstOpCode::JNE4,2473TR::InstOpCode::JE4, TR::InstOpCode::JBE4, TR::InstOpCode::JAE4, TR::InstOpCode::bad,2474};2475return opTable[index];2476}247724782479TR::Register *J9::X86::TreeEvaluator::ZEROCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)2480{2481// NOTE: ZEROCHK is intended to be general and straightforward. If you're2482// thinking of adding special code for specific situations in here, consider2483// whether you want to add your own CHK opcode instead. If you feel the2484// need for special handling here, you may also want special handling in the2485// optimizer, in which case a separate opcode may be more suitable.2486//2487// On the other hand, if the improvements you're adding could benefit other2488// users of ZEROCHK, please go ahead and add them!2489//2490// If in doubt, discuss your design with your team lead.24912492TR::LabelSymbol *slowPathLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);2493TR::LabelSymbol *restartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);2494slowPathLabel->setStartInternalControlFlow();2495restartLabel->setEndInternalControlFlow();2496TR::Compilation *comp = cg->comp();24972498// Temporarily hide the first child so it doesn't appear in the outlined call2499//2500node->rotateChildren(node->getNumChildren()-1, 0);2501node->setNumChildren(node->getNumChildren()-1);25022503// Outlined instructions for check failure2504// Note: we don't pass the restartLabel in here because we don't want a2505// restart branch.2506//2507TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, slowPathLabel, NULL, cg);2508cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);2509cg->generateDebugCounter(2510outlinedHelperCall->getFirstInstruction(),2511TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),25121, TR::DebugCounter::Cheap);25132514// Restore the first child2515//2516node->setNumChildren(node->getNumChildren()+1);2517node->rotateChildren(0, node->getNumChildren()-1);25182519// Children other than the first are only for the outlined path; we don't need them here2520//2521for (int32_t i = 1; i < node->getNumChildren(); i++)2522cg->recursivelyDecReferenceCount(node->getChild(i));25232524// In-line instructions for the check2525//2526TR::Node *valueToCheck = node->getFirstChild();2527if ( valueToCheck->getOpCode().isBooleanCompare()2528&& isInteger(valueToCheck->getChild(0)->getOpCode(), cg)2529&& isInteger(valueToCheck->getChild(1)->getOpCode(), cg)2530&& performTransformation(comp, "O^O CODEGEN Optimizing ZEROCHK+%s %s\n", valueToCheck->getOpCode().getName(), valueToCheck->getName(cg->getDebug())))2531{2532if (valueToCheck->getOpCode().isCompareForOrder())2533{2534TR::TreeEvaluator::compareIntegersForOrder(valueToCheck, cg);2535}2536else2537{2538TR_ASSERT(valueToCheck->getOpCode().isCompareForEquality(), "Compare opcode must either be compare for order or for equality");2539TR::TreeEvaluator::compareIntegersForEquality(valueToCheck, cg);2540}2541generateLabelInstruction(branchOpCodeForCompare(valueToCheck->getOpCode(), true), node, slowPathLabel, cg);2542}2543else2544{2545TR::Register *value = cg->evaluate(node->getFirstChild());2546generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, value, value, cg);2547cg->decReferenceCount(node->getFirstChild());2548generateLabelInstruction(TR::InstOpCode::JE4, node, slowPathLabel, cg);2549}2550generateLabelInstruction(TR::InstOpCode::label, node, restartLabel, cg);25512552return NULL;2553}255425552556bool isConditionCodeSetForCompare(TR::Node *node, bool *jumpOnOppositeCondition)2557{2558TR::Compilation *comp = TR::comp();2559// Disable. Need to re-think how we handle overflow cases.2560//2561static char *disableNoCompareEFlags = feGetEnv("TR_disableNoCompareEFlags");2562if (disableNoCompareEFlags)2563return false;25642565// See if there is a previous instruction that has set the condition flags2566// properly for this node's register2567//2568TR::Register *firstChildReg = node->getFirstChild()->getRegister();2569TR::Register *secondChildReg = node->getSecondChild()->getRegister();25702571if (!firstChildReg || !secondChildReg)2572return false;25732574// Find the last instruction that either2575// 1) sets the appropriate condition flags, or2576// 2) modifies the register to be tested2577// (and that hopefully does both)2578//2579TR::Instruction *prevInstr;2580for (prevInstr = comp->cg()->getAppendInstruction();2581prevInstr;2582prevInstr = prevInstr->getPrev())2583{2584if (prevInstr->getOpCodeValue() == TR::InstOpCode::CMP4RegReg)2585{2586TR::Register *prevInstrTargetRegister = prevInstr->getTargetRegister();2587TR::Register *prevInstrSourceRegister = prevInstr->getSourceRegister();25882589if (prevInstrTargetRegister && prevInstrSourceRegister &&2590(((prevInstrSourceRegister == firstChildReg) && (prevInstrTargetRegister == secondChildReg)) ||2591((prevInstrSourceRegister == secondChildReg) && (prevInstrTargetRegister == firstChildReg))))2592{2593if (performTransformation(comp, "O^O SKIP BOUND CHECK COMPARISON at node %p\n", node))2594{2595if (prevInstrTargetRegister == secondChildReg)2596*jumpOnOppositeCondition = true;2597return true;2598}2599else2600return false;2601}2602}26032604if (prevInstr->getOpCodeValue() == TR::InstOpCode::label)2605{2606// This instruction is a possible branch target.2607return false;2608}26092610if (prevInstr->getOpCode().modifiesSomeArithmeticFlags())2611{2612// This instruction overwrites the condition flags.2613return false;2614}2615}26162617return false;2618}261926202621TR::Register *J9::X86::TreeEvaluator::BNDCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)2622{2623TR::Node *firstChild = node->getFirstChild();2624TR::Node *secondChild = node->getSecondChild();26252626// Perform a bound check.2627//2628// Value propagation or profile-directed optimization may have determined2629// that the array bound is a constant, and lowered TR::arraylength into an2630// iconst. In this case, make sure that the constant is the second child.2631//2632TR::LabelSymbol *boundCheckFailureLabel = generateLabelSymbol(cg);2633TR::Instruction *instr;2634TR::Compilation *comp = cg->comp();26352636bool skippedComparison = false;2637bool jumpOnOppositeCondition = false;2638if (firstChild->getOpCode().isLoadConst())2639{2640if (secondChild->getOpCode().isLoadConst() && firstChild->getInt() <= secondChild->getInt())2641{2642instr = generateLabelInstruction(TR::InstOpCode::JMP4, node, boundCheckFailureLabel, cg);2643cg->decReferenceCount(firstChild);2644cg->decReferenceCount(secondChild);2645}2646else2647{2648if (!isConditionCodeSetForCompare(node, &jumpOnOppositeCondition))2649{2650node->swapChildren();2651TR::TreeEvaluator::compareIntegersForOrder(node, cg);2652node->swapChildren();2653instr = generateLabelInstruction(TR::InstOpCode::JAE4, node, boundCheckFailureLabel, cg);2654}2655else2656skippedComparison = true;2657}2658}2659else2660{2661if (!isConditionCodeSetForCompare(node, &jumpOnOppositeCondition))2662{2663TR::TreeEvaluator::compareIntegersForOrder(node, cg);2664instr = generateLabelInstruction(TR::InstOpCode::JBE4, node, boundCheckFailureLabel, cg);2665}2666else2667skippedComparison = true;2668}26692670if (skippedComparison)2671{2672if (jumpOnOppositeCondition)2673instr = generateLabelInstruction(TR::InstOpCode::JAE4, node, boundCheckFailureLabel, cg);2674else2675instr = generateLabelInstruction(TR::InstOpCode::JBE4, node, boundCheckFailureLabel, cg);26762677cg->decReferenceCount(firstChild);2678cg->decReferenceCount(secondChild);2679}26802681cg->addSnippet(new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(),2682boundCheckFailureLabel,2683instr,2684false2685));26862687if (node->hasFoldedImplicitNULLCHK())2688{2689TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();2690if (comp->getOption(TR_TraceCG))2691{2692traceMsg(comp,"Node %p has foldedimplicitNULLCHK, and a faulting instruction of %p\n",node,faultingInstruction);2693}26942695if (faultingInstruction)2696{2697faultingInstruction->setNeedsGCMap(0xFF00FFFF);2698faultingInstruction->setNode(node);2699}2700}27012702firstChild->setIsNonNegative(true);2703secondChild->setIsNonNegative(true);27042705return NULL;2706}270727082709TR::Register *J9::X86::TreeEvaluator::ArrayCopyBNDCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)2710{2711// Check that first child >= second child2712//2713// If the first child is a constant and the second isn't, swap the children.2714//2715TR::Node *firstChild = node->getFirstChild();2716TR::Node *secondChild = node->getSecondChild();2717TR::LabelSymbol *boundCheckFailureLabel = generateLabelSymbol(cg);2718TR::Instruction *instr;27192720if (firstChild->getOpCode().isLoadConst())2721{2722if (secondChild->getOpCode().isLoadConst())2723{2724if (firstChild->getInt() < secondChild->getInt())2725{2726// Check will always fail, just jump to failure snippet2727//2728instr = generateLabelInstruction(TR::InstOpCode::JMP4, node, boundCheckFailureLabel, cg);2729}2730else2731{2732// Check will always succeed, no need for an instruction2733//2734instr = NULL;2735}2736cg->decReferenceCount(firstChild);2737cg->decReferenceCount(secondChild);2738}2739else2740{2741node->swapChildren();2742TR::TreeEvaluator::compareIntegersForOrder(node, cg);2743node->swapChildren();2744instr = generateLabelInstruction(TR::InstOpCode::JG4, node, boundCheckFailureLabel, cg);2745}2746}2747else2748{2749TR::TreeEvaluator::compareIntegersForOrder(node, cg);2750instr = generateLabelInstruction(TR::InstOpCode::JL4, node, boundCheckFailureLabel, cg);2751}27522753if (instr)2754cg->addSnippet(new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(),2755boundCheckFailureLabel,2756instr,2757false2758));27592760return NULL;2761}276227632764TR::Register *J9::X86::TreeEvaluator::ArrayStoreCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)2765{2766TR::Compilation *comp = cg->comp();2767TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());2768TR::Instruction *prevInstr = cg->getAppendInstruction();2769TR::LabelSymbol *startLabel,2770*startOfWrtbarLabel,2771*doNullStoreLabel,2772*doneLabel;27732774// skipStoreNullCheck2775// skipJLOCheck2776// skipSuperClassCheck2777// cannotSkipWriteBarrier27782779flags16_t actions;27802781TR::Node *firstChild = node->getFirstChild();2782TR::Node *sourceChild = firstChild->getSecondChild();27832784static bool isRealTimeGC = comp->getOptions()->realTimeGC();2785auto gcMode = TR::Compiler->om.writeBarrierType();27862787bool isNonRTWriteBarrierRequired = (gcMode != gc_modron_wrtbar_none && !firstChild->skipWrtBar()) ? true : false;2788bool generateWriteBarrier = isRealTimeGC || isNonRTWriteBarrierRequired;2789bool nopASC = (node->getArrayStoreClassInNode() &&2790comp->performVirtualGuardNOPing() &&2791!fej9->classHasBeenExtended(node->getArrayStoreClassInNode())2792) ? true : false;27932794doneLabel = generateLabelSymbol(cg);2795doneLabel->setEndInternalControlFlow();27962797doNullStoreLabel = generateWriteBarrier ? generateLabelSymbol(cg) : doneLabel;2798startOfWrtbarLabel = generateWriteBarrier ? generateLabelSymbol(cg) : doNullStoreLabel;27992800bool usingCompressedPointers = false;2801bool usingLowMemHeap = false;2802bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);28032804if (comp->useCompressedPointers() && firstChild->getOpCode().isIndirect())2805{2806usingLowMemHeap = true;2807usingCompressedPointers = true;28082809if (useShiftedOffsets)2810{2811while ((sourceChild->getNumChildren() > 0) && (sourceChild->getOpCodeValue() != TR::a2l))2812sourceChild = sourceChild->getFirstChild();2813if (sourceChild->getOpCodeValue() == TR::a2l)2814sourceChild = sourceChild->getFirstChild();2815// this is required so that different registers are2816// allocated for the actual store and translated values2817sourceChild->incReferenceCount();2818}2819}28202821// -------------------------------------------------------------------------2822//2823// Evaluate all of the children here to avoid issues with internal control2824// flow and outlined instructions.2825//2826// -------------------------------------------------------------------------28272828TR::MemoryReference *tempMR = NULL;28292830if (generateWriteBarrier)2831{2832tempMR = generateX86MemoryReference(firstChild, cg);2833}28342835TR::Node *destinationChild = firstChild->getChild(2);2836TR::Register *destinationRegister = cg->evaluate(destinationChild);2837TR::Register *sourceRegister = cg->evaluate(sourceChild);28382839TR_X86ScratchRegisterManager *scratchRegisterManager =2840cg->generateScratchRegisterManager(comp->target().is64Bit() ? 15 : 7);28412842TR::Register *compressedRegister = NULL;2843if (usingCompressedPointers)2844{2845if (usingLowMemHeap && !useShiftedOffsets)2846compressedRegister = sourceRegister;2847else2848{2849// valid for useShiftedOffsets2850compressedRegister = cg->evaluate(firstChild->getSecondChild());2851if (!usingLowMemHeap)2852{2853generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), firstChild, sourceRegister, sourceRegister, cg);2854generateRegRegInstruction(TR::InstOpCode::CMOVERegReg(), firstChild, compressedRegister, sourceRegister, cg);2855}2856}2857}28582859// -------------------------------------------------------------------------2860//2861// If the source reference is NULL, the array store checks and the write2862// barrier can be bypassed. Generate the NULL store in an outlined sequence.2863// For realtime GC we must still do the barrier. If we are not generating2864// a write barrier then the store will happen inline.2865//2866// -------------------------------------------------------------------------28672868startLabel = generateLabelSymbol(cg);2869startLabel->setStartInternalControlFlow();2870generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);28712872generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, sourceRegister, sourceRegister, cg);28732874TR::LabelSymbol *nullTargetLabel =2875isRealTimeGC ? startOfWrtbarLabel : doNullStoreLabel;28762877generateLabelInstruction(TR::InstOpCode::JE4, node, nullTargetLabel, cg);28782879// -------------------------------------------------------------------------2880//2881// Generate up-front array store checks to avoid calling out to the helper.2882//2883// -------------------------------------------------------------------------28842885TR::LabelSymbol *postASCLabel = NULL;2886if (nopASC)2887{2888// Speculatively NOP the array store check if VP is able to prove that the ASC2889// would always succeed given the current state of the class hierarchy.2890//2891TR::Node *helperCallNode = TR::Node::createWithSymRef(TR::call, 2, 2, sourceChild, destinationChild, node->getSymbolReference());2892helperCallNode->copyByteCodeInfo(node);28932894TR::LabelSymbol *oolASCLabel = generateLabelSymbol(cg);2895TR::LabelSymbol *restartLabel;28962897if (generateWriteBarrier)2898{2899restartLabel = startOfWrtbarLabel;2900}2901else2902{2903restartLabel = postASCLabel = generateLabelSymbol(cg);2904}29052906TR_OutlinedInstructions *outlinedASCHelperCall =2907new (cg->trHeapMemory()) TR_OutlinedInstructions(helperCallNode, TR::call, NULL, oolASCLabel, restartLabel, cg);2908cg->getOutlinedInstructionsList().push_front(outlinedASCHelperCall);29092910static char *alwaysDoOOLASCc = feGetEnv("TR_doOOLASC");2911static bool alwaysDoOOLASC = alwaysDoOOLASCc ? true : false;29122913if (!alwaysDoOOLASC)2914{2915TR_VirtualGuard *virtualGuard = TR_VirtualGuard::createArrayStoreCheckGuard(comp, node, node->getArrayStoreClassInNode());2916TR::Instruction *pachable = generateVirtualGuardNOPInstruction(node, virtualGuard->addNOPSite(), NULL, oolASCLabel, cg);2917}2918else2919{2920generateLabelInstruction(TR::InstOpCode::JMP4, node, oolASCLabel, cg);2921}29222923// Restore the reference counts of the children created for the temporary vacll node above.2924//2925sourceChild->decReferenceCount();2926destinationChild->decReferenceCount();2927}2928else2929{2930TR::TreeEvaluator::VMarrayStoreCHKEvaluator(2931node,2932sourceChild,2933destinationChild,2934scratchRegisterManager,2935startOfWrtbarLabel,2936prevInstr,2937cg);2938}29392940// -------------------------------------------------------------------------2941//2942// Generate write barrier.2943//2944// -------------------------------------------------------------------------29452946bool isSourceNonNull = sourceChild->isNonNull();29472948if (generateWriteBarrier)2949{2950generateLabelInstruction(TR::InstOpCode::label, node, startOfWrtbarLabel, cg);29512952if (!isRealTimeGC)2953{2954// HACK: set the nullness property on the source so that the write barrier2955// doesn't do the same test.2956//2957sourceChild->setIsNonNull(true);2958}29592960TR::TreeEvaluator::VMwrtbarWithStoreEvaluator(2961node,2962tempMR,2963scratchRegisterManager,2964destinationChild,2965sourceChild,2966true,2967cg,2968true);2969}2970else if (postASCLabel)2971{2972// Lay down a arestart label for OOL ASC if the write barrier was skipped2973//2974generateLabelInstruction(TR::InstOpCode::label, node, postASCLabel, cg);2975}29762977// -------------------------------------------------------------------------2978//2979// Either do the bypassed NULL store out of line or the reference store2980// inline if the write barrier was omitted.2981//2982// -------------------------------------------------------------------------29832984TR::MemoryReference *tempMR2 = NULL;29852986TR::Instruction *dependencyAnchorInstruction = NULL;29872988if (!isRealTimeGC)2989{2990if (generateWriteBarrier)2991{2992assert(isNonRTWriteBarrierRequired);2993assert(tempMR);29942995// HACK: reset the nullness property on the source.2996//2997sourceChild->setIsNonNull(isSourceNonNull);29982999// Perform the NULL store that was bypassed earlier by the write barrier.3000//3001TR_OutlinedInstructionsGenerator og(nullTargetLabel, node, cg);30023003tempMR2 = generateX86MemoryReference(*tempMR, 0, cg);30043005if (usingCompressedPointers)3006generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, tempMR2, compressedRegister, cg);3007else3008generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, tempMR2, sourceRegister, cg);30093010generateLabelInstruction(TR::InstOpCode::JMP4, node, doneLabel, cg);3011og.endOutlinedInstructionSequence();3012}3013else3014{3015// No write barrier emitted. Evaluate the store here.3016//3017assert(!isNonRTWriteBarrierRequired);3018assert(doneLabel == nullTargetLabel);30193020// This is where the dependency condition will eventually go.3021//3022dependencyAnchorInstruction = cg->getAppendInstruction();30233024tempMR = generateX86MemoryReference(firstChild, cg);30253026TR::X86MemRegInstruction *storeInstr;30273028if (usingCompressedPointers)3029storeInstr = generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, tempMR, compressedRegister, cg);3030else3031storeInstr = generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, tempMR, sourceRegister, cg);30323033cg->setImplicitExceptionPoint(storeInstr);30343035if (!usingLowMemHeap || useShiftedOffsets)3036cg->decReferenceCount(sourceChild);3037cg->decReferenceCount(destinationChild);3038tempMR->decNodeReferenceCounts(cg);3039}3040}30413042// -------------------------------------------------------------------------3043//3044// Generate outermost register dependencies3045//3046// -------------------------------------------------------------------------30473048TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions(12, 12, cg);3049deps->unionPostCondition(destinationRegister, TR::RealRegister::NoReg, cg);3050deps->unionPostCondition(sourceRegister, TR::RealRegister::NoReg, cg);30513052scratchRegisterManager->addScratchRegistersToDependencyList(deps);30533054if (usingCompressedPointers && (!usingLowMemHeap || useShiftedOffsets))3055{3056deps->unionPostCondition(compressedRegister, TR::RealRegister::NoReg, cg);3057}30583059if (generateWriteBarrier)3060{3061// Memory reference is not live in an internal control flow region.3062//3063if (tempMR->getBaseRegister() && tempMR->getBaseRegister() != destinationRegister)3064{3065deps->unionPostCondition(tempMR->getBaseRegister(), TR::RealRegister::NoReg, cg);3066}30673068if (tempMR->getIndexRegister() && tempMR->getIndexRegister() != destinationRegister)3069{3070deps->unionPostCondition(tempMR->getIndexRegister(), TR::RealRegister::NoReg, cg);3071}30723073if (comp->target().is64Bit())3074{3075TR::Register *addressRegister =tempMR->getAddressRegister();3076if (addressRegister && addressRegister != destinationRegister)3077{3078deps->unionPostCondition(addressRegister, TR::RealRegister::NoReg, cg);3079}3080}3081}30823083if (tempMR2 && comp->target().is64Bit())3084{3085TR::Register *addressRegister = tempMR2->getAddressRegister();3086if (addressRegister && addressRegister != destinationRegister)3087deps->unionPostCondition(addressRegister, TR::RealRegister::NoReg, cg);3088}30893090deps->unionPostCondition(3091cg->getVMThreadRegister(),3092(TR::RealRegister::RegNum)cg->getVMThreadRegister()->getAssociation(), cg);30933094deps->stopAddingConditions();30953096scratchRegisterManager->stopUsingRegisters();30973098if (dependencyAnchorInstruction)3099{3100generateLabelInstruction(dependencyAnchorInstruction, TR::InstOpCode::label, doneLabel, deps, cg);3101}3102else3103{3104generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);3105}31063107if (usingCompressedPointers)3108{3109cg->decReferenceCount(firstChild->getSecondChild());3110cg->decReferenceCount(firstChild);3111}31123113if (comp->useAnchors() && firstChild->getOpCode().isIndirect())3114firstChild->setStoreAlreadyEvaluated(true);31153116return NULL;3117}31183119TR::Register *J9::X86::TreeEvaluator::ArrayCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)3120{3121return TR::TreeEvaluator::VMarrayCheckEvaluator(node, cg);3122}312331243125// Handles both BNDCHKwithSpineCHK and SpineCHK nodes.3126//3127TR::Register *J9::X86::TreeEvaluator::BNDCHKwithSpineCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)3128{3129TR::Compilation *comp = cg->comp();3130TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());3131bool needsBoundCheck = (node->getOpCodeValue() == TR::BNDCHKwithSpineCHK) ? true : false;31323133TR::Node *loadOrStoreChild = node->getFirstChild();3134TR::Node *baseArrayChild = node->getSecondChild();3135TR::Node *arrayLengthChild;3136TR::Node *indexChild;31373138if (needsBoundCheck)3139{3140arrayLengthChild = node->getChild(2);3141indexChild = node->getChild(3);3142}3143else3144{3145arrayLengthChild = NULL;3146indexChild = node->getChild(2);3147}31483149// Perform a bound check.3150//3151// Value propagation or profile-directed optimization may have determined3152// that the array bound is a constant, and lowered TR::arraylength into an3153// iconst. In this case, make sure that the constant is the second child.3154//3155TR::InstOpCode::Mnemonic branchOpCode;31563157// For primitive stores anchored under the check node, we must evaluate the source node3158// before the bound check branch so that its available to the snippet. We can make3159// an exception for constant values that could be folded directly into a immediate3160// store instruction.3161//3162if (loadOrStoreChild->getOpCode().isStore() && loadOrStoreChild->getReferenceCount() <= 1)3163{3164TR::Node *valueChild = loadOrStoreChild->getSecondChild();31653166if (!valueChild->getOpCode().isLoadConst() ||3167(valueChild->getOpCode().isLoadConst() &&3168((valueChild->getDataType() == TR::Float) || (valueChild->getDataType() == TR::Double) ||3169(comp->target().is64Bit() && !IS_32BIT_SIGNED(valueChild->getLongInt())))))3170{3171cg->evaluate(valueChild);3172}3173}31743175TR::Register *baseArrayReg = cg->evaluate(baseArrayChild);31763177TR::TreeEvaluator::preEvaluateEscapingNodesForSpineCheck(node, cg);31783179TR::Instruction *faultingInstruction = NULL;31803181TR::LabelSymbol *boundCheckFailureLabel = generateLabelSymbol(cg);3182TR::X86LabelInstruction *checkInstr = NULL;31833184if (needsBoundCheck)3185{3186if (arrayLengthChild->getOpCode().isLoadConst())3187{3188if (indexChild->getOpCode().isLoadConst() && arrayLengthChild->getInt() <= indexChild->getInt())3189{3190// Create real check failure snippet if we can prove the3191// bound check will always fail.3192//3193branchOpCode = TR::InstOpCode::JMP4;3194cg->decReferenceCount(arrayLengthChild);3195cg->decReferenceCount(indexChild);3196}3197else3198{3199TR::DataType dt = loadOrStoreChild->getDataType();3200int32_t elementSize = (dt == TR::Address) ? TR::Compiler->om.sizeofReferenceField()3201: TR::Symbol::convertTypeToSize(dt);32023203if (TR::Compiler->om.isDiscontiguousArray(arrayLengthChild->getInt(), elementSize))3204{3205// Create real check failure snippet if we can prove the spine check3206// will always fail3207//3208branchOpCode = TR::InstOpCode::JMP4;3209cg->decReferenceCount(arrayLengthChild);3210if (!indexChild->getOpCode().isLoadConst())3211{3212cg->evaluate(indexChild);3213}3214else3215{3216cg->decReferenceCount(indexChild);3217}3218faultingInstruction = cg->getImplicitExceptionPoint();3219}3220else3221{3222// Check the bounds.3223//3224TR::TreeEvaluator::compareIntegersForOrder(node, indexChild, arrayLengthChild, cg);3225branchOpCode = TR::InstOpCode::JAE4;3226faultingInstruction = cg->getImplicitExceptionPoint();3227}3228}3229}3230else3231{3232// Check the bounds.3233//3234TR::TreeEvaluator::compareIntegersForOrder(node, arrayLengthChild, indexChild, cg);3235branchOpCode = TR::InstOpCode::JBE4;3236faultingInstruction = cg->getImplicitExceptionPoint();3237}32383239static char *forceArraylet = feGetEnv("TR_forceArraylet");3240if (forceArraylet)3241{3242branchOpCode = TR::InstOpCode::JMP4;3243}32443245checkInstr = generateLabelInstruction(branchOpCode, node, boundCheckFailureLabel, cg);3246}3247else3248{3249// -------------------------------------------------------------------------3250// Check if the base array has a spine. If so, process out of line.3251// -------------------------------------------------------------------------32523253if (!indexChild->getOpCode().isLoadConst())3254{3255cg->evaluate(indexChild);3256}32573258TR::MemoryReference *arraySizeMR =3259generateX86MemoryReference(baseArrayReg, fej9->getOffsetOfContiguousArraySizeField(), cg);32603261generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, arraySizeMR, 0, cg);3262generateLabelInstruction(TR::InstOpCode::JE4, node, boundCheckFailureLabel, cg);3263}32643265// -----------------------------------------------------------------------------------3266// Track all virtual register use within the mainline path. This info will be used3267// to adjust the virtual register use counts within the outlined path for more precise3268// register assignment.3269// -----------------------------------------------------------------------------------32703271cg->startRecordingRegisterUsage();32723273TR::Register *loadOrStoreReg = NULL;3274TR::Register *valueReg = NULL;32753276int32_t indexValue;32773278// For reference stores, only evaluate the array element address because the store cannot3279// happen here (it must be done via the array store check).3280//3281// For primitive stores, evaluate them now.3282//3283// For loads, evaluate them now.3284//3285if (loadOrStoreChild->getOpCode().isStore())3286{3287if (loadOrStoreChild->getReferenceCount() > 1)3288{3289TR_ASSERT(loadOrStoreChild->getOpCode().isWrtBar(), "Opcode must be wrtbar");3290loadOrStoreReg = cg->evaluate(loadOrStoreChild->getFirstChild());3291cg->decReferenceCount(loadOrStoreChild->getFirstChild());3292}3293else3294{3295// If the store is not commoned then it must be a primitive store.3296//3297loadOrStoreReg = cg->evaluate(loadOrStoreChild);3298valueReg = loadOrStoreChild->getSecondChild()->getRegister();32993300if (!valueReg)3301{3302// If the immediate value was not evaluated then it must have been folded3303// into the instruction.3304//3305TR_ASSERT(loadOrStoreChild->getSecondChild()->getOpCode().isLoadConst(), "unevaluated, non-constant value child");3306TR_ASSERT(IS_32BIT_SIGNED(loadOrStoreChild->getSecondChild()->getInt()), "immediate value too wide for instruction");3307}3308}3309}3310else3311{3312loadOrStoreReg = cg->evaluate(loadOrStoreChild);3313}33143315// -----------------------------------------------------------------------------------3316// Stop tracking virtual register usage.3317// -----------------------------------------------------------------------------------33183319TR::list<OMR::RegisterUsage*> *mainlineRUL = cg->stopRecordingRegisterUsage();33203321TR::Register *indexReg = indexChild->getRegister();33223323// Index register must be in a register or a constant.3324//3325TR_ASSERT((indexReg || indexChild->getOpCode().isLoadConst()),3326"index child is not evaluated or constant: indexReg=%p, indexChild=%p", indexReg, indexChild);33273328if (indexReg)3329{3330indexValue = -1;3331}3332else3333{3334indexValue = indexChild->getInt();3335}33363337// TODO: don't always require the VM thread3338//3339TR::RegisterDependencyConditions *deps =3340generateRegisterDependencyConditions((uint8_t) 0, 1, cg);33413342deps->addPostCondition(3343cg->getVMThreadRegister(),3344(TR::RealRegister::RegNum)cg->getVMThreadRegister()->getAssociation(), cg);33453346deps->stopAddingConditions();33473348TR::LabelSymbol *mergeLabel = generateLabelSymbol(cg);3349mergeLabel->setInternalControlFlowMerge();3350TR::X86LabelInstruction *restartInstr = generateLabelInstruction(TR::InstOpCode::label, node, mergeLabel, deps, cg);33513352TR_OutlinedInstructions *arrayletOI =3353generateArrayletReference(3354node,3355loadOrStoreChild,3356checkInstr,3357boundCheckFailureLabel,3358mergeLabel,3359baseArrayReg,3360loadOrStoreReg,3361indexReg,3362indexValue,3363valueReg,3364needsBoundCheck,3365cg);33663367arrayletOI->setMainlinePathRegisterUsageList(mainlineRUL);33683369if (node->hasFoldedImplicitNULLCHK())3370{3371if (faultingInstruction)3372{3373faultingInstruction->setNeedsGCMap(0xFF00FFFF);3374faultingInstruction->setNode(node);3375}3376}33773378if (arrayLengthChild)3379arrayLengthChild->setIsNonNegative(true);33803381indexChild->setIsNonNegative(true);33823383cg->decReferenceCount(loadOrStoreChild);3384cg->decReferenceCount(baseArrayChild);33853386if (!needsBoundCheck)3387{3388// Spine checks must decrement the reference count on the index explicitly.3389//3390cg->decReferenceCount(indexChild);3391}33923393return NULL;33943395}33963397/*3398* this evaluator is used specifically for evaluate the following three nodes3399*3400* storFence3401* loadFence3402* storeFence3403*3404* Since Java specification for loadfence and storefenc is stronger3405* than the intel specification, a full mfence instruction have to3406* be used for all three of them3407*3408* Due to performance penalty of mfence, a faster lockor on RSP is used3409* it achieve the same functionality but runs faster.3410*/3411TR::Register *J9::X86::TreeEvaluator::barrierFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg)3412{3413TR::ILOpCodes opCode = node->getOpCodeValue();3414if (opCode == TR::fullFence && node->canOmitSync())3415{3416generateLabelInstruction(TR::InstOpCode::label, node, generateLabelSymbol(cg), cg);3417}3418else if(cg->comp()->getOption(TR_X86UseMFENCE))3419{3420generateInstruction(TR::InstOpCode::MFENCE, node, cg);3421}3422else3423{3424TR::RealRegister *stackReg = cg->machine()->getRealRegister(TR::RealRegister::esp);3425TR::MemoryReference *mr = generateX86MemoryReference(stackReg, intptr_t(0), cg);34263427mr->setRequiresLockPrefix();3428generateMemImmInstruction(TR::InstOpCode::OR4MemImms, node, mr, 0, cg);3429cg->stopUsingRegister(stackReg);3430}3431return NULL;3432}343334343435TR::Register *J9::X86::TreeEvaluator::readbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)3436{3437TR_ASSERT(node->getNumChildren() == 1, "readbar node should have one child");3438TR::Node *handleNode = node->getChild(0);34393440TR::Compilation *comp = cg->comp();34413442bool needBranchAroundForNULL = !node->hasFoldedImplicitNULLCHK() && !node->isNonNull();3443traceMsg(comp, "\nnode %p has folded implicit nullchk: %d\n", node, node->hasFoldedImplicitNULLCHK());3444traceMsg(comp, "node %p is nonnull: %d\n", node, node->isNonNull());3445traceMsg(comp, "node %p needs branchAround: %d\n", node, needBranchAroundForNULL);34463447TR::LabelSymbol *startLabel=NULL;3448TR::LabelSymbol *doneLabel=NULL;3449if (needBranchAroundForNULL)3450{3451startLabel = generateLabelSymbol(cg);3452doneLabel = generateLabelSymbol(cg);34533454generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);3455startLabel->setStartInternalControlFlow();3456}34573458TR::Register *handleRegister = cg->intClobberEvaluate(handleNode);34593460if (needBranchAroundForNULL)3461{3462// if handle is NULL, then just branch around the redirection3463generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, handleRegister, handleRegister, cg);3464generateLabelInstruction(TR::InstOpCode::JE4, handleNode, doneLabel, cg);3465}34663467// handle is not NULL or we're an implicit nullcheck, so go through forwarding pointer to get object3468TR::MemoryReference *handleMR = generateX86MemoryReference(handleRegister, node->getSymbolReference()->getOffset(), cg);3469TR::Instruction *forwardingInstr=generateRegMemInstruction(TR::InstOpCode::L4RegMem, handleNode, handleRegister, handleMR, cg);3470cg->setImplicitExceptionPoint(forwardingInstr);34713472if (needBranchAroundForNULL)3473{3474TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t) 0, 1, cg);3475deps->addPostCondition(handleRegister, TR::RealRegister::NoReg, cg);34763477// and we're done3478generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);34793480doneLabel->setEndInternalControlFlow();3481}34823483node->setRegister(handleRegister);3484cg->decReferenceCount(handleNode);34853486return handleRegister;3487}34883489static3490TR::Register * highestOneBit(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit)3491{3492// xor r1, r13493// bsr r2, reg3494// setne r13495// shl r1, r23496TR::Register *scratchReg = cg->allocateRegister();3497TR::Register *bsrReg = cg->allocateRegister();3498generateRegRegInstruction(TR::InstOpCode::XORRegReg(is64Bit), node, scratchReg, scratchReg, cg);3499generateRegRegInstruction(TR::InstOpCode::BSRRegReg(is64Bit), node, bsrReg, reg, cg);3500generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, scratchReg, cg);3501TR::RegisterDependencyConditions *shiftDependencies = generateRegisterDependencyConditions((uint8_t)1, 1, cg);3502shiftDependencies->addPreCondition(bsrReg, TR::RealRegister::ecx, cg);3503shiftDependencies->addPostCondition(bsrReg, TR::RealRegister::ecx, cg);3504shiftDependencies->stopAddingConditions();3505generateRegRegInstruction(TR::InstOpCode::SHLRegCL(is64Bit), node, scratchReg, bsrReg, shiftDependencies, cg);3506cg->stopUsingRegister(bsrReg);3507return scratchReg;3508}35093510TR::Register *J9::X86::TreeEvaluator::integerHighestOneBit(TR::Node *node, TR::CodeGenerator *cg)3511{3512TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3513TR::Node *child = node->getFirstChild();3514TR::Register *inputReg = cg->evaluate(child);3515TR::Register *resultReg = highestOneBit(node, cg, inputReg, cg->comp()->target().is64Bit());3516cg->decReferenceCount(child);3517node->setRegister(resultReg);3518return resultReg;3519}35203521TR::Register *J9::X86::TreeEvaluator::longHighestOneBit(TR::Node *node, TR::CodeGenerator *cg)3522{3523TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3524TR::Node *child = node->getFirstChild();3525TR::Register *inputReg = cg->evaluate(child);3526TR::Register *resultReg = NULL;3527if (cg->comp()->target().is64Bit())3528{3529resultReg = highestOneBit(node, cg, inputReg, true);3530}3531else3532{3533//mask out low part result if high part is not 03534//xor r1 r13535//cmp inputHigh, 03536//setne r13537//dec r13538//and resultLow, r13539//ret resultHigh:resultLow3540TR::Register *inputLow = inputReg->getLowOrder();3541TR::Register *inputHigh = inputReg->getHighOrder();3542TR::Register *maskReg = cg->allocateRegister();3543TR::Register *resultHigh = highestOneBit(node, cg, inputHigh, false);3544TR::Register *resultLow = highestOneBit(node, cg, inputLow, false);3545generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, maskReg, maskReg, cg);3546generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, inputHigh, 0, cg);3547generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, maskReg, cg);3548generateRegInstruction(TR::InstOpCode::DEC4Reg, node, maskReg, cg);3549generateRegRegInstruction(TR::InstOpCode::AND4RegReg, node, resultLow, maskReg, cg);3550resultReg = cg->allocateRegisterPair(resultLow, resultHigh);3551cg->stopUsingRegister(maskReg);3552}3553cg->decReferenceCount(child);3554node->setRegister(resultReg);3555return resultReg;3556}35573558static3559TR::Register *lowestOneBit(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit)3560{3561TR::Register *resultReg = cg->allocateRegister();3562generateRegRegInstruction(TR::InstOpCode::MOVRegReg(is64Bit), node, resultReg, reg, cg);3563generateRegInstruction(TR::InstOpCode::NEGReg(is64Bit), node, resultReg, cg);3564generateRegRegInstruction(TR::InstOpCode::ANDRegReg(is64Bit), node, resultReg, reg, cg);3565return resultReg;3566}35673568TR::Register *J9::X86::TreeEvaluator::integerLowestOneBit(TR::Node *node, TR::CodeGenerator *cg)3569{3570TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3571TR::Node *child = node->getFirstChild();3572TR::Register *inputReg = cg->evaluate(child);3573TR::Register *reg = lowestOneBit(node, cg, inputReg, cg->comp()->target().is64Bit());3574node->setRegister(reg);3575cg->decReferenceCount(child);3576return reg;3577}35783579TR::Register *J9::X86::TreeEvaluator::longLowestOneBit(TR::Node *node, TR::CodeGenerator *cg)3580{3581TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3582TR::Node *child = node->getFirstChild();3583TR::Register *inputReg = cg->evaluate(child);3584TR::Register *resultReg = NULL;3585if (cg->comp()->target().is64Bit())3586{3587resultReg = lowestOneBit(node, cg, inputReg, true);3588}3589else3590{3591// mask out high part if low part is not 03592// xor r1, r13593// get low result3594// setne r13595// dec r13596// and r1, inputHigh3597// get high result3598// return resultHigh:resultLow3599TR::Register *inputHigh = inputReg->getHighOrder();3600TR::Register *inputLow = inputReg->getLowOrder();3601TR::Register *scratchReg = cg->allocateRegister();3602generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, scratchReg, scratchReg, cg);3603TR::Register *resultLow = lowestOneBit(node, cg, inputLow, false);3604generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, scratchReg, cg);3605generateRegInstruction(TR::InstOpCode::DEC4Reg, node, scratchReg, cg);3606generateRegRegInstruction(TR::InstOpCode::AND4RegReg, node, scratchReg, inputHigh, cg);3607TR::Register *resultHigh = lowestOneBit(node, cg, scratchReg, false);3608cg->stopUsingRegister(scratchReg);3609resultReg = cg->allocateRegisterPair(resultLow, resultHigh);3610}3611node->setRegister(resultReg);3612cg->decReferenceCount(child);3613return resultReg;3614}361536163617static3618TR::Register *numberOfLeadingZeros(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit, bool isLong)3619{3620// xor r1, r13621// bsr r2, reg3622// sete r13623// dec r13624// inc r23625// and r2, r13626// mov r1, is64Bit? 64: 323627// sub r1, r23628// ret r13629TR::Register *maskReg = cg->allocateRegister();3630TR::Register *bsrReg = cg->allocateRegister();3631generateRegRegInstruction(TR::InstOpCode::XORRegReg(is64Bit), node, maskReg, maskReg, cg);3632generateRegRegInstruction(TR::InstOpCode::BSRRegReg(is64Bit), node, bsrReg, reg, cg);3633generateRegInstruction(TR::InstOpCode::SETE1Reg, node, maskReg, cg);3634generateRegInstruction(TR::InstOpCode::DECReg(is64Bit), node, maskReg, cg);3635generateRegInstruction(TR::InstOpCode::INCReg(is64Bit), node, bsrReg, cg);3636generateRegRegInstruction(TR::InstOpCode::ANDRegReg(is64Bit), node, bsrReg, maskReg, cg);3637generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(is64Bit), node, maskReg, isLong ? 64 : 32, cg);3638generateRegRegInstruction(TR::InstOpCode::SUBRegReg(is64Bit), node, maskReg, bsrReg, cg);3639cg->stopUsingRegister(bsrReg);3640return maskReg;3641}36423643TR::Register *J9::X86::TreeEvaluator::integerNumberOfLeadingZeros(TR::Node *node, TR::CodeGenerator *cg)3644{3645TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3646TR::Node* child = node->getFirstChild();3647TR::Register* inputReg = cg->evaluate(child);3648TR::Register* resultReg = numberOfLeadingZeros(node, cg, inputReg, false, false);3649node->setRegister(resultReg);3650cg->decReferenceCount(child);3651return resultReg;3652}36533654TR::Register *J9::X86::TreeEvaluator::longNumberOfLeadingZeros(TR::Node *node, TR::CodeGenerator *cg)3655{3656TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3657TR::Node* child = node->getFirstChild();3658TR::Register* inputReg = cg->evaluate(child);3659TR::Register *resultReg = NULL;3660if (cg->comp()->target().is64Bit())3661{3662resultReg = numberOfLeadingZeros(node, cg, inputReg, true, true);3663}3664else3665{3666// keep low part if high part is 03667// xor r1, r13668// cmp inputHigh, 03669// setne r13670// dec r13671// and resultLow, r13672// add resultHigh, resultLow3673// return resultHigh3674TR::Register *inputHigh = inputReg->getHighOrder();3675TR::Register *inputLow = inputReg->getLowOrder();3676TR::Register *resultHigh = numberOfLeadingZeros(node, cg, inputHigh, false, false);3677TR::Register *resultLow = numberOfLeadingZeros(node, cg, inputLow, false, false);3678TR::Register *maskReg = cg->allocateRegister();3679generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, maskReg, maskReg, cg);3680generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, inputHigh, 0, cg);3681generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, maskReg, cg);3682generateRegInstruction(TR::InstOpCode::DEC4Reg, node, maskReg, cg);3683generateRegRegInstruction(TR::InstOpCode::AND4RegReg, node, resultLow, maskReg, cg);3684generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, resultHigh, resultLow, cg);3685cg->stopUsingRegister(resultLow);3686cg->stopUsingRegister(maskReg);3687resultReg = resultHigh;3688}3689node->setRegister(resultReg);3690cg->decReferenceCount(child);3691return resultReg;3692}36933694static3695TR::Register * numberOfTrailingZeros(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit, bool isLong)3696{3697// r1 is shift amount, r3 is the mask3698// xor r1, r13699// bsf r2, reg3700// sete r13701// mov r3, r13702// dec r33703// shl r1, is64Bit ? 6 : 53704// and r2, r33705// add r2, r13706// return r23707TR::Register *bsfReg = cg->allocateRegister();3708TR::Register *tempReg = cg->allocateRegister();3709TR::Register *maskReg = cg->allocateRegister();3710generateRegRegInstruction(TR::InstOpCode::XORRegReg(is64Bit), node, tempReg, tempReg, cg);3711generateRegRegInstruction(TR::InstOpCode::BSFRegReg(is64Bit), node, bsfReg, reg, cg);3712generateRegInstruction(TR::InstOpCode::SETE1Reg, node, tempReg, cg);3713generateRegRegInstruction(TR::InstOpCode::MOVRegReg(is64Bit), node, maskReg, tempReg, cg);3714generateRegInstruction(TR::InstOpCode::DECReg(is64Bit), node, maskReg, cg);3715generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(is64Bit), node, tempReg, isLong ? 6 : 5, cg);3716generateRegRegInstruction(TR::InstOpCode::ANDRegReg(is64Bit), node, bsfReg, maskReg, cg);3717generateRegRegInstruction(TR::InstOpCode::ADDRegReg(is64Bit), node, bsfReg, tempReg, cg);3718cg->stopUsingRegister(tempReg);3719cg->stopUsingRegister(maskReg);3720return bsfReg;3721}37223723TR::Register *J9::X86::TreeEvaluator::integerNumberOfTrailingZeros(TR::Node *node, TR::CodeGenerator *cg)3724{3725TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3726TR::Node* child = node->getFirstChild();3727TR::Register* inputReg = cg->evaluate(child);3728TR::Register *resultReg = numberOfTrailingZeros(node, cg, inputReg, cg->comp()->target().is64Bit(), false);3729node->setRegister(resultReg);3730cg->decReferenceCount(child);3731return resultReg;3732}37333734TR::Register *J9::X86::TreeEvaluator::longNumberOfTrailingZeros(TR::Node *node, TR::CodeGenerator *cg)3735{3736TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3737TR::Node * child = node->getFirstChild();3738TR::Register * inputReg = cg->evaluate(child);3739TR::Register * resultReg = NULL;3740if (cg->comp()->target().is64Bit())3741{3742resultReg = numberOfTrailingZeros(node, cg, inputReg, true, true);3743}3744else3745{3746// mask out result of high part if low part is not 323747// xor r1, r13748// cmp resultLow, 323749// setne r13750// dec r13751// and r1, resultHigh3752// and resultLow, r13753// return resultLow3754TR::Register *inputLow = inputReg->getLowOrder();3755TR::Register *inputHigh = inputReg->getHighOrder();3756TR::Register *maskReg = cg->allocateRegister();3757TR::Register *resultLow = numberOfTrailingZeros(node, cg, inputLow, false, false);3758TR::Register *resultHigh = numberOfTrailingZeros(node, cg, inputHigh, false, false);3759generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, maskReg, maskReg, cg);3760generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, resultLow, 32, cg);3761generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, maskReg, cg);3762generateRegInstruction(TR::InstOpCode::DEC4Reg, node, maskReg, cg);3763generateRegRegInstruction(TR::InstOpCode::AND4RegReg, node, maskReg, resultHigh, cg);3764generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, resultLow, maskReg, cg);3765cg->stopUsingRegister(resultHigh);3766cg->stopUsingRegister(maskReg);3767resultReg = resultLow;3768}3769node->setRegister(resultReg);3770cg->decReferenceCount(child);3771return resultReg;3772}37733774static3775TR::Register *bitCount(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit)3776{3777TR::Register *bsfReg = cg->allocateRegister();3778generateRegRegInstruction(TR::InstOpCode::POPCNTRegReg(is64Bit), node, bsfReg, reg, cg);3779return bsfReg;3780}37813782TR::Register *J9::X86::TreeEvaluator::integerBitCount(TR::Node *node, TR::CodeGenerator *cg)3783{3784TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3785TR::Node* child = node->getFirstChild();3786TR::Register* inputReg = cg->evaluate(child);3787TR::Register* resultReg = bitCount(node, cg, inputReg, cg->comp()->target().is64Bit());3788node->setRegister(resultReg);3789cg->decReferenceCount(child);3790return resultReg;3791}37923793TR::Register *J9::X86::TreeEvaluator::longBitCount(TR::Node *node, TR::CodeGenerator *cg)3794{3795TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");3796TR::Node * child = node->getFirstChild();3797TR::Register * inputReg = cg->evaluate(child);3798TR::Register * resultReg = NULL;3799if (cg->comp()->target().is64Bit())3800{3801resultReg = bitCount(node, cg, inputReg, true);3802}3803else3804{3805//add low result and high result together3806TR::Register * inputHigh = inputReg->getHighOrder();3807TR::Register * inputLow = inputReg->getLowOrder();3808TR::Register * resultLow = bitCount(node, cg, inputLow, false);3809TR::Register * resultHigh = bitCount(node, cg, inputHigh, false);3810generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, resultLow, resultHigh, cg);3811cg->stopUsingRegister(resultHigh);3812resultReg = resultLow;3813}3814node->setRegister(resultReg);3815cg->decReferenceCount(child);3816return resultReg;3817}38183819inline void generateInlinedCheckCastForDynamicCastClass(TR::Node* node, TR::CodeGenerator* cg)3820{3821TR::Compilation *comp = cg->comp();3822auto use64BitClasses = comp->target().is64Bit() &&3823(!TR::Compiler->om.generateCompressedObjectHeaders() ||3824(comp->compileRelocatableCode() && comp->getOption(TR_UseSymbolValidationManager)));3825TR::Register *ObjReg = cg->evaluate(node->getFirstChild());3826TR::Register *castClassReg = cg->evaluate(node->getSecondChild());3827TR::Register *temp1Reg = cg->allocateRegister();3828TR::Register *temp2Reg = cg->allocateRegister();3829TR::Register *objClassReg = cg->allocateRegister();38303831bool isCheckCastAndNullCheck = (node->getOpCodeValue() == TR::checkcastAndNULLCHK);38323833TR::LabelSymbol *startLabel = generateLabelSymbol(cg);3834TR::LabelSymbol *fallThruLabel = generateLabelSymbol(cg);3835TR::LabelSymbol *outlinedCallLabel = generateLabelSymbol(cg);3836TR::LabelSymbol *throwLabel = generateLabelSymbol(cg);3837TR::LabelSymbol *isClassLabel = generateLabelSymbol(cg);3838TR::LabelSymbol *iTableLoopLabel = generateLabelSymbol(cg);3839startLabel->setStartInternalControlFlow();3840fallThruLabel->setEndInternalControlFlow();38413842generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);38433844TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, outlinedCallLabel, fallThruLabel, cg);3845cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);38463847// objClassReg holds object class also serves as null check3848if (isCheckCastAndNullCheck)3849generateLoadJ9Class(node, objClassReg, ObjReg, cg);38503851// temp2Reg holds romClass of cast class, for testing array, interface class type3852generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, temp2Reg, generateX86MemoryReference(castClassReg, offsetof(J9Class, romClass), cg), cg);38533854// If cast class is array, call out of line helper3855generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node,3856generateX86MemoryReference(temp2Reg, offsetof(J9ROMClass, modifiers), cg), J9AccClassArray, cg);3857generateLabelInstruction(TR::InstOpCode::JNE4, node, outlinedCallLabel, cg);38583859// objClassReg holds object class3860if (!isCheckCastAndNullCheck)3861{3862generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, ObjReg, ObjReg, cg);3863generateLabelInstruction(TR::InstOpCode::JE4, node, fallThruLabel, cg);3864generateLoadJ9Class(node, objClassReg, ObjReg, cg);3865}38663867// Object not array, inline checks3868// Check cast class is interface3869generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node,3870generateX86MemoryReference(temp2Reg, offsetof(J9ROMClass, modifiers), cg), J9AccInterface, cg);3871generateLabelInstruction(TR::InstOpCode::JE4, node, isClassLabel, cg);38723873// Obtain I-Table3874// temp1Reg holds head of J9Class->iTable of obj class3875generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, temp1Reg, generateX86MemoryReference(objClassReg, offsetof(J9Class, iTable), cg), cg);3876// Loop through I-Table3877// temp1Reg holds iTable list element through the loop3878generateLabelInstruction(TR::InstOpCode::label, node, iTableLoopLabel, cg);3879generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, temp1Reg, temp1Reg, cg);3880generateLabelInstruction(TR::InstOpCode::JE4, node, throwLabel, cg);3881auto interfaceMR = generateX86MemoryReference(temp1Reg, offsetof(J9ITable, interfaceClass), cg);3882generateMemRegInstruction(TR::InstOpCode::CMPMemReg(), node, interfaceMR, castClassReg, cg);3883generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, temp1Reg, generateX86MemoryReference(temp1Reg, offsetof(J9ITable, next), cg), cg);3884generateLabelInstruction(TR::InstOpCode::JNE4, node, iTableLoopLabel, cg);38853886// Found from I-Table3887generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThruLabel, cg);38883889// cast class is non-interface class3890generateLabelInstruction(TR::InstOpCode::label, node, isClassLabel, cg);3891// equality test3892generateRegRegInstruction(TR::InstOpCode::CMPRegReg(use64BitClasses), node, objClassReg, castClassReg, cg);3893generateLabelInstruction(TR::InstOpCode::JE4, node, fallThruLabel, cg);38943895// class not equal3896// temp2 holds cast class depth3897// class depth mask must be low 16 bits to safely load without the mask.3898static_assert(J9AccClassDepthMask == 0xffff, "J9_JAVA_CLASS_DEPTH_MASK must be 0xffff");3899generateRegMemInstruction(comp->target().is64Bit()? TR::InstOpCode::MOVZXReg8Mem2 : TR::InstOpCode::MOVZXReg4Mem2, node,3900temp2Reg, generateX86MemoryReference(castClassReg, offsetof(J9Class, classDepthAndFlags), cg), cg);39013902// cast class depth >= obj class depth, throw3903generateRegMemInstruction(TR::InstOpCode::CMP2RegMem, node, temp2Reg, generateX86MemoryReference(objClassReg, offsetof(J9Class, classDepthAndFlags), cg), cg);3904generateLabelInstruction(TR::InstOpCode::JAE4, node, throwLabel, cg);39053906// check obj class's super class array entry3907// temp1Reg holds superClasses array of obj class3908// An alternative sequences requiring one less register may be:3909// SHL temp2Reg, 3 for 64-bit or 2 for 32-bit3910// ADD temp2Reg, [temp3Reg, superclasses offset]3911// CMP classClassReg, [temp2Reg]3912// On 64 bit, the extra reg isn't likely to cause significant register pressure.3913// On 32 bit, it could put more register pressure due to limited number of regs.3914// Since 64-bit is more prevalent, we opt to optimize for 64bit in this case3915generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, temp1Reg, generateX86MemoryReference(objClassReg, offsetof(J9Class, superclasses), cg), cg);3916generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, castClassReg,3917generateX86MemoryReference(temp1Reg, temp2Reg, comp->target().is64Bit()?3:2, cg), cg);3918generateLabelInstruction(TR::InstOpCode::JNE4, node, throwLabel, cg);39193920// throw classCastException3921{3922TR_OutlinedInstructionsGenerator og(throwLabel, node, cg);3923generateRegInstruction(TR::InstOpCode::PUSHReg, node, objClassReg, cg);3924generateRegInstruction(TR::InstOpCode::PUSHReg, node, castClassReg, cg);3925auto call = generateHelperCallInstruction(node, TR_throwClassCastException, NULL, cg);3926call->setNeedsGCMap(0xFF00FFFF);3927call->setAdjustsFramePointerBy(-2*(int32_t)sizeof(J9Class*));3928og.endOutlinedInstructionSequence();3929}39303931TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 8, cg);39323933deps->addPostCondition(ObjReg, TR::RealRegister::NoReg, cg);3934deps->addPostCondition(castClassReg, TR::RealRegister::NoReg, cg);3935deps->addPostCondition(temp1Reg, TR::RealRegister::NoReg, cg);3936deps->addPostCondition(temp2Reg, TR::RealRegister::NoReg, cg);3937deps->addPostCondition(objClassReg, TR::RealRegister::NoReg, cg);39383939TR::Node *callNode = outlinedHelperCall->getCallNode();3940TR::Register *reg;39413942if (callNode->getFirstChild() == node->getFirstChild())3943{3944reg = callNode->getFirstChild()->getRegister();3945if (reg)3946deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);3947}39483949if (callNode->getSecondChild() == node->getSecondChild())3950{3951reg = callNode->getSecondChild()->getRegister();3952if (reg)3953deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);3954}39553956deps->stopAddingConditions();39573958generateLabelInstruction(TR::InstOpCode::label, node, fallThruLabel, deps, cg);39593960cg->stopUsingRegister(temp1Reg);3961cg->stopUsingRegister(temp2Reg);3962cg->stopUsingRegister(objClassReg);39633964// Decrement use counts on the children3965//3966cg->decReferenceCount(node->getFirstChild());3967cg->decReferenceCount(node->getSecondChild());3968}39693970inline void generateInlinedCheckCastOrInstanceOfForInterface(TR::Node* node, TR_OpaqueClassBlock* clazz, TR::CodeGenerator* cg, bool isCheckCast)3971{3972TR::Compilation *comp = cg->comp();3973TR_ASSERT(clazz && TR::Compiler->cls.isInterfaceClass(comp, clazz), "Not a compile-time known Interface.");39743975auto use64BitClasses = comp->target().is64Bit() &&3976(!TR::Compiler->om.generateCompressedObjectHeaders() ||3977(comp->compileRelocatableCode() && comp->getOption(TR_UseSymbolValidationManager)));39783979// When running 64 bit compressed refs, if clazz is an address above the 2G boundary then we can't use3980// a push 32bit immediate instruction to pass it on the stack to the jitThrowClassCastException helper3981// as the address gets sign extended. It needs to be stored in a temp register and then push the3982// register to the stack.3983auto highClass = (comp->target().is64Bit() && ((uintptr_t)clazz) > INT_MAX) ? true : false;39843985auto j9class = cg->allocateRegister();3986auto tmp = (use64BitClasses || highClass) ? cg->allocateRegister() : NULL;39873988auto deps = generateRegisterDependencyConditions((uint8_t)2, (uint8_t)2, cg);3989deps->addPreCondition(j9class, TR::RealRegister::NoReg, cg);3990deps->addPostCondition(j9class, TR::RealRegister::NoReg, cg);3991if (tmp)3992{3993deps->addPreCondition(tmp, TR::RealRegister::NoReg, cg);3994deps->addPostCondition(tmp, TR::RealRegister::NoReg, cg);3995}3996deps->stopAddingConditions();39973998auto begLabel = generateLabelSymbol(cg);3999auto endLabel = generateLabelSymbol(cg);4000begLabel->setStartInternalControlFlow();4001endLabel->setEndInternalControlFlow();40024003auto iTableLookUpPathLabel = generateLabelSymbol(cg);4004auto iTableLookUpFailLabel = generateLabelSymbol(cg);4005auto iTableLoopLabel = generateLabelSymbol(cg);40064007generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, j9class, node->getChild(0)->getRegister(), cg);4008generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);40094010// Null test4011if (!node->getChild(0)->isNonNull() && node->getOpCodeValue() != TR::checkcastAndNULLCHK)4012{4013// j9class contains the object at this point, reusing the register as object is no longer used after this point.4014generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, j9class, j9class, cg);4015generateLabelInstruction(TR::InstOpCode::JE4, node, endLabel, cg);4016}40174018// Load J9Class4019generateLoadJ9Class(node, j9class, j9class, cg);40204021// Profiled call site cache4022uintptr_t guessClass = 0;4023if (!comp->compileRelocatableCode())4024{4025TR_OpaqueClassBlock* guessClassArray[NUM_PICS];4026auto num_PICs = TR::TreeEvaluator::interpreterProfilingInstanceOfOrCheckCastInfo(cg, node, guessClassArray);4027auto fej9 = static_cast<TR_J9VMBase *>(comp->fe());4028for (uint8_t i = 0; i < num_PICs; i++)4029{4030if (fej9->instanceOfOrCheckCastNoCacheUpdate((J9Class*)guessClassArray[i], (J9Class*)clazz))4031{4032guessClass = reinterpret_cast<uintptr_t>(guessClassArray[i]);4033}4034}4035}40364037// Call site cache4038auto cache = sizeof(J9Class*) == 4 ? cg->create4ByteData(node, (uint32_t)guessClass) : cg->create8ByteData(node, (uint64_t)guessClass);4039cache->setClassAddress(true);4040generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, j9class, generateX86MemoryReference(cache, cg), cg);4041generateLabelInstruction(TR::InstOpCode::JNE4, node, iTableLookUpPathLabel, cg);40424043// I-Table lookup4044{4045TR_OutlinedInstructionsGenerator og(iTableLookUpPathLabel, node, cg);4046auto itable = j9class; // re-use the j9class register to perform itable lookup40474048generateRegInstruction(TR::InstOpCode::PUSHReg, node, j9class, cg);40494050// Save VFP4051auto vfp = generateVFPSaveInstruction(node, cg);40524053// Obtain I-Table4054generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, itable, generateX86MemoryReference(j9class, offsetof(J9Class, iTable), cg), cg);4055if (tmp)4056{4057generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tmp, (uintptr_t)clazz, cg, TR_ClassAddress);4058}40594060// Loop through I-Table4061generateLabelInstruction(TR::InstOpCode::label, node, iTableLoopLabel, cg);4062generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, itable, itable, cg);4063generateLabelInstruction(TR::InstOpCode::JE4, node, iTableLookUpFailLabel, cg);4064auto interfaceMR = generateX86MemoryReference(itable, offsetof(J9ITable, interfaceClass), cg);4065if (tmp)4066{4067generateMemRegInstruction(TR::InstOpCode::CMP8MemReg, node, interfaceMR, tmp, cg);4068}4069else4070{4071generateMemImmSymInstruction(TR::InstOpCode::CMP4MemImm4, node, interfaceMR, (uintptr_t)clazz, node->getChild(1)->getSymbolReference(), cg);4072}4073generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, itable, generateX86MemoryReference(itable, offsetof(J9ITable, next), cg), cg);4074generateLabelInstruction(TR::InstOpCode::JNE4, node, iTableLoopLabel, cg);40754076// Found from I-Table4077generateMemInstruction(TR::InstOpCode::POPMem, node, generateX86MemoryReference(cache, cg), cg); // j9class4078if (!isCheckCast)4079{4080generateInstruction(TR::InstOpCode::STC, node, cg);4081}4082generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);40834084// Not found4085generateVFPRestoreInstruction(vfp, node, cg);4086generateLabelInstruction(TR::InstOpCode::label, node, iTableLookUpFailLabel, cg);4087if (isCheckCast)4088{4089if (tmp)4090{4091generateRegInstruction(TR::InstOpCode::PUSHReg, node, tmp, cg);4092}4093else4094{4095generateImmInstruction(TR::InstOpCode::PUSHImm4, node, (int32_t)(uintptr_t)clazz, cg);4096}4097auto call = generateHelperCallInstruction(node, TR_throwClassCastException, NULL, cg);4098call->setNeedsGCMap(0xFF00FFFF);4099call->setAdjustsFramePointerBy(-2*(int32_t)sizeof(J9Class*));4100}4101else4102{4103generateRegInstruction(TR::InstOpCode::POPReg, node, j9class, cg);4104generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);4105}41064107og.endOutlinedInstructionSequence();4108}41094110// Succeed4111if (!isCheckCast)4112{4113generateInstruction(TR::InstOpCode::STC, node, cg);4114}4115generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);41164117cg->stopUsingRegister(j9class);4118if (tmp)4119{4120cg->stopUsingRegister(tmp);4121}4122}41234124inline void generateInlinedCheckCastOrInstanceOfForClass(TR::Node* node, TR_OpaqueClassBlock* clazz, TR::CodeGenerator* cg, bool isCheckCast)4125{4126TR::Compilation *comp = cg->comp();4127auto fej9 = (TR_J9VMBase*)(cg->fe());41284129bool use64BitClasses = false;4130if (comp->target().is64Bit())4131{4132// When running 64 bit compressed refs, if clazz is an address above the 2G4133// boundary then we can't use a push 32bit immediate instruction to pass it4134// to the helper as the address gets sign extended. So we need to test for4135// this case and switch to the 64bit memory to memory encoding4136// that is used when running 64 bit non-compressed.4137auto highClass = ((uintptr_t)clazz) > INT_MAX;41384139use64BitClasses = !TR::Compiler->om.generateCompressedObjectHeaders() ||4140highClass ||4141(comp->compileRelocatableCode() && comp->getOption(TR_UseSymbolValidationManager));4142}41434144auto clazzData = use64BitClasses ? cg->create8ByteData(node, (uint64_t)(uintptr_t)clazz) : NULL;4145if (clazzData)4146{4147clazzData->setClassAddress(true);4148}41494150auto j9class = cg->allocateRegister();4151auto tmp = cg->allocateRegister();41524153auto deps = generateRegisterDependencyConditions((uint8_t)2, (uint8_t)2, cg);4154deps->addPreCondition(tmp, TR::RealRegister::NoReg, cg);4155deps->addPreCondition(j9class, TR::RealRegister::NoReg, cg);4156deps->addPostCondition(tmp, TR::RealRegister::NoReg, cg);4157deps->addPostCondition(j9class, TR::RealRegister::NoReg, cg);41584159auto begLabel = generateLabelSymbol(cg);4160auto endLabel = generateLabelSymbol(cg);4161begLabel->setStartInternalControlFlow();4162endLabel->setEndInternalControlFlow();41634164auto successLabel = isCheckCast ? endLabel : generateLabelSymbol(cg);4165auto failLabel = isCheckCast ? generateLabelSymbol(cg) : endLabel;41664167generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, j9class, node->getChild(0)->getRegister(), cg);4168generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);41694170// Null test4171if (!node->getChild(0)->isNonNull() && node->getOpCodeValue() != TR::checkcastAndNULLCHK)4172{4173// j9class contains the object at this point, reusing the register as object is no longer used after this point.4174generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, j9class, j9class, cg);4175generateLabelInstruction(TR::InstOpCode::JE4, node, endLabel, cg);4176}41774178// Load J9Class4179generateLoadJ9Class(node, j9class, j9class, cg);41804181// Equality test4182if (!fej9->isAbstractClass(clazz) || node->getOpCodeValue() == TR::icall/*TR_checkAssignable*/)4183{4184// For instanceof and checkcast, LHS is obtained from an instance, which cannot be abstract or interface;4185// therefore, equality test can be safely skipped for instanceof and checkcast when RHS is abstract.4186// However, LHS for TR_checkAssignable may be abstract or interface as it may be an arbitrary class, and4187// hence equality test is always needed.4188if (use64BitClasses)4189{4190generateRegMemInstruction(TR::InstOpCode::CMP8RegMem, node, j9class, generateX86MemoryReference(clazzData, cg), cg);4191}4192else4193{4194generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, j9class, (uintptr_t)clazz, cg);4195}4196if (!fej9->isClassFinal(clazz))4197{4198generateLabelInstruction(TR::InstOpCode::JE4, node, successLabel, cg);4199}4200}4201// at this point, ZF == 1 indicates success42024203// Superclass test4204if (!fej9->isClassFinal(clazz))4205{4206auto depth = TR::Compiler->cls.classDepthOf(clazz);4207if (depth >= comp->getOptions()->_minimumSuperclassArraySize)4208{4209static_assert(J9AccClassDepthMask == 0xffff, "J9AccClassDepthMask must be 0xffff");4210auto depthMR = generateX86MemoryReference(j9class, offsetof(J9Class, classDepthAndFlags), cg);4211generateMemImmInstruction(TR::InstOpCode::CMP2MemImm2, node, depthMR, depth, cg);4212if (!isCheckCast)4213{4214// Need ensure CF is cleared before reaching to fail label4215auto outlineLabel = generateLabelSymbol(cg);4216generateLabelInstruction(TR::InstOpCode::JBE4, node, outlineLabel, cg);42174218TR_OutlinedInstructionsGenerator og(outlineLabel, node, cg);4219generateInstruction(TR::InstOpCode::CLC, node, cg);4220generateLabelInstruction(TR::InstOpCode::JMP4, node, failLabel, cg);4221og.endOutlinedInstructionSequence();4222}4223else4224{4225generateLabelInstruction(TR::InstOpCode::JBE4, node, failLabel, cg);4226}4227}42284229generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tmp, generateX86MemoryReference(j9class, offsetof(J9Class, superclasses), cg), cg);4230auto offset = depth * sizeof(J9Class*);4231TR_ASSERT(IS_32BIT_SIGNED(offset), "The offset to superclass is unreasonably large.");4232auto superclass = generateX86MemoryReference(tmp, offset, cg);4233if (use64BitClasses)4234{4235generateRegMemInstruction(TR::InstOpCode::L8RegMem, node, tmp, superclass, cg);4236generateRegMemInstruction(TR::InstOpCode::CMP8RegMem, node, tmp, generateX86MemoryReference(clazzData, cg), cg);4237}4238else4239{4240generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4, node, superclass, (int32_t)(uintptr_t)clazz, cg);4241}4242}4243// at this point, ZF == 1 indicates success42444245// Branch to success/fail path4246if (!isCheckCast)4247{4248generateInstruction(TR::InstOpCode::CLC, node, cg);4249}4250generateLabelInstruction(TR::InstOpCode::JNE4, node, failLabel, cg);42514252// Set CF to report success4253if (!isCheckCast)4254{4255generateLabelInstruction(TR::InstOpCode::label, node, successLabel, cg);4256generateInstruction(TR::InstOpCode::STC, node, cg);4257}42584259// Throw exception for CheckCast4260if (isCheckCast)4261{4262TR_OutlinedInstructionsGenerator og(failLabel, node, cg);42634264generateRegInstruction(TR::InstOpCode::PUSHReg, node, j9class, cg);4265if (use64BitClasses)4266{4267generateMemInstruction(TR::InstOpCode::PUSHMem, node, generateX86MemoryReference(clazzData, cg), cg);4268}4269else4270{4271generateImmInstruction(TR::InstOpCode::PUSHImm4, node, (int32_t)(uintptr_t)clazz, cg);4272}4273auto call = generateHelperCallInstruction(node, TR_throwClassCastException, NULL, cg);4274call->setNeedsGCMap(0xFF00FFFF);4275call->setAdjustsFramePointerBy(-2*(int32_t)sizeof(J9Class*));42764277og.endOutlinedInstructionSequence();4278}42794280// Succeed4281generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);42824283cg->stopUsingRegister(j9class);4284cg->stopUsingRegister(tmp);4285}42864287TR::Register *J9::X86::TreeEvaluator::checkcastinstanceofEvaluator(TR::Node *node, TR::CodeGenerator *cg)4288{4289TR::Compilation *comp = cg->comp();42904291bool isCheckCast = false;4292switch (node->getOpCodeValue())4293{4294case TR::checkcast:4295case TR::checkcastAndNULLCHK:4296isCheckCast = true;4297break;4298case TR::instanceof:4299case TR::icall: // TR_checkAssignable4300break;4301default:4302TR_ASSERT(false, "Incorrect Op Code %d.", node->getOpCodeValue());4303break;4304}4305auto clazz = TR::TreeEvaluator::getCastClassAddress(node->getChild(1));4306if (isCheckCast && !clazz && !comp->getOption(TR_DisableInlineCheckCast) && (!comp->compileRelocatableCode() || comp->getOption(TR_UseSymbolValidationManager)))4307{4308generateInlinedCheckCastForDynamicCastClass(node, cg);4309}4310else if (clazz &&4311!TR::Compiler->cls.isClassArray(comp, clazz) && // not yet optimized4312(!comp->compileRelocatableCode() || comp->getOption(TR_UseSymbolValidationManager)) &&4313!comp->getOption(TR_DisableInlineCheckCast) &&4314!comp->getOption(TR_DisableInlineInstanceOf))4315{4316cg->evaluate(node->getChild(0));4317if (TR::Compiler->cls.isInterfaceClass(comp, clazz))4318{4319generateInlinedCheckCastOrInstanceOfForInterface(node, clazz, cg, isCheckCast);4320}4321else4322{4323generateInlinedCheckCastOrInstanceOfForClass(node, clazz, cg, isCheckCast);4324}4325if (!isCheckCast)4326{4327auto result = cg->allocateRegister();4328generateRegInstruction(TR::InstOpCode::SETB1Reg, node, result, cg);4329generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, result, result, cg);4330node->setRegister(result);4331}4332cg->decReferenceCount(node->getChild(0));4333cg->recursivelyDecReferenceCount(node->getChild(1));4334}4335else4336{4337if (node->getOpCodeValue() == TR::checkcastAndNULLCHK)4338{4339auto object = cg->evaluate(node->getChild(0));4340// Just touch the memory in case this is a NULL pointer and we need to throw4341// the exception after the checkcast. If the checkcast was combined with nullpointer4342// there's nobody after the checkcast to throw the exception.4343auto instr = generateMemImmInstruction(TR::InstOpCode::TEST1MemImm1, node, generateX86MemoryReference(object, TR::Compiler->om.offsetOfObjectVftField(), cg), 0, cg);4344cg->setImplicitExceptionPoint(instr);4345instr->setNeedsGCMap(0xFF00FFFF);4346instr->setNode(comp->findNullChkInfo(node));4347}4348TR::TreeEvaluator::performHelperCall(node, NULL, isCheckCast ? TR::call : TR::icall, false, cg);4349}4350return node->getRegister();4351}43524353static bool comesFromClassLib(TR::Node *node, TR::Compilation *comp)4354{4355TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());4356TR_OpaqueMethodBlock *mb = node->getOwningMethod();4357char buf[512];4358const char *methodSig = fej9->sampleSignature(mb, buf, 512, comp->trMemory());4359if (methodSig &&4360(strncmp(methodSig, "java", 4)==0 ||4361strncmp(methodSig, "sun", 3) ==0))4362return true;4363return false;4364}43654366static TR::MemoryReference *getMemoryReference(TR::Register *objectClassReg, TR::Register *objectReg, int32_t lwOffset, TR::CodeGenerator *cg)4367{4368if (objectClassReg)4369return generateX86MemoryReference(objectReg, objectClassReg, 0, cg);4370else4371return generateX86MemoryReference(objectReg, lwOffset, cg);4372}43734374void J9::X86::TreeEvaluator::asyncGCMapCheckPatching(TR::Node *node, TR::CodeGenerator *cg, TR::LabelSymbol *snippetLabel)4375{4376TR::MemoryReference *SOMmr = generateX86MemoryReference(node->getFirstChild()->getFirstChild(), cg);4377TR::Compilation *comp = cg->comp();43784379if (cg->comp()->target().is64Bit())4380{4381//64 bit sequence4382//4383//Generate a call to the out-of-line patching sequence.4384//This sequence will convert the call back into an asynch message check cmp4385//4386TR::LabelSymbol *gcMapPatchingLabel = generateLabelSymbol(cg);4387TR::LabelSymbol *outlinedStartLabel = generateLabelSymbol(cg);4388TR::LabelSymbol *outlinedEndLabel = generateLabelSymbol(cg);4389TR::LabelSymbol *asyncWithoutPatch = generateLabelSymbol(cg);43904391//Start inline patching sequence4392//4393TR::Register *patchableAddrReg = cg->allocateRegister();4394TR::Register *patchValReg = cg->allocateRegister();4395TR::Register *tempReg = cg->allocateRegister();439643974398outlinedStartLabel->setStartInternalControlFlow();4399outlinedEndLabel->setEndInternalControlFlow();44004401//generateLabelInstruction(TR::InstOpCode::CALLImm4, node, gcMapPatchingLabel, cg);4402generatePatchableCodeAlignmentInstruction(TR::X86PatchableCodeAlignmentInstruction::CALLImm4AtomicRegions, generateLabelInstruction(TR::InstOpCode::CALLImm4, node, gcMapPatchingLabel, cg), cg);44034404TR_OutlinedInstructionsGenerator og(gcMapPatchingLabel, node, cg);44054406generateLabelInstruction(TR::InstOpCode::label, node, outlinedStartLabel, cg);4407//Load the address that we are going to patch and clean up the stack4408//4409generateRegInstruction(TR::InstOpCode::POPReg, node, patchableAddrReg, cg);44104411//check if there is already an async even pending4412//4413generateMemImmInstruction(TR::InstOpCode::CMP8MemImm4, node, SOMmr, -1, cg);4414generateLabelInstruction(TR::InstOpCode::JE4, node, asyncWithoutPatch, cg);44154416//Signal the async event4417//4418static char *d = feGetEnv("TR_GCOnAsyncBREAK");4419if (d)4420generateInstruction(TR::InstOpCode::INT3, node, cg);44214422generateMemImmInstruction(TR::InstOpCode::S8MemImm4, node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, stackOverflowMark), cg), -1, cg);4423generateRegImmInstruction(TR::InstOpCode::MOV8RegImm4, node, tempReg, 1 << comp->getPersistentInfo()->getGCMapCheckEventHandle(), cg);4424generateMemRegInstruction(TR::InstOpCode::LOR8MemReg, node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, asyncEventFlags),cg), tempReg, cg);44254426//Populate the code we are going to patch in4427//4428//existing4429//000007ff`7d340578 e8f4170000 call 000007ff`7d341d71 <------4430//000007ff`7d34057d 0f84ee1e0000 je 000007ff`7d3424714431//*********4432//patching in4433//000007ff'7d34056f 48837d50ff cmp qword ptr [rbp+0x50], 0xffffffffffffffff <-----4434//000007ff`7d34057d 0f84ee1e0000 je 000007ff`7d34247144354436//Load the original value4437//44384439generateRegMemInstruction(TR::InstOpCode::L8RegMem, node, patchValReg, generateX86MemoryReference(patchableAddrReg, -5, cg), cg);4440generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, (uint64_t) 0x0, cg);4441generateRegRegInstruction(TR::InstOpCode::OR8RegReg, node, patchValReg, tempReg, cg);4442generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, (uint64_t) 0x0, cg);4443generateRegRegInstruction(TR::InstOpCode::AND8RegReg, node, patchValReg, tempReg , cg);44444445TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 4, cg);4446deps->addPostCondition(patchableAddrReg, TR::RealRegister::NoReg, cg);4447deps->addPostCondition(patchValReg, TR::RealRegister::NoReg, cg);4448deps->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);4449deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);4450deps->stopAddingConditions();44514452generateMemRegInstruction(TR::InstOpCode::S8MemReg, node, generateX86MemoryReference(patchableAddrReg, -5, cg), patchValReg, deps, cg);4453generateLabelInstruction(TR::InstOpCode::label, node, asyncWithoutPatch, cg);4454generateLabelInstruction(TR::InstOpCode::JMP4, node, snippetLabel, cg);44554456cg->stopUsingRegister(patchableAddrReg);4457cg->stopUsingRegister(patchValReg);4458cg->stopUsingRegister(tempReg);4459generateLabelInstruction(TR::InstOpCode::label, node, outlinedEndLabel, cg);44604461og.endOutlinedInstructionSequence();4462}4463else4464{4465//32 bit sequence4466//44674468//Generate a call to the out-of-line patching sequence.4469//This sequence will convert the call back into an asynch message check cmp4470//4471TR::LabelSymbol *gcMapPatchingLabel = generateLabelSymbol(cg);4472TR::LabelSymbol *outlinedStartLabel = generateLabelSymbol(cg);4473TR::LabelSymbol *outlinedEndLabel = generateLabelSymbol(cg);4474TR::LabelSymbol *asyncWithoutPatch = generateLabelSymbol(cg);44754476//Start inline patching sequence4477//4478TR::Register *patchableAddrReg = cg->allocateRegister();4479TR::Register *lowPatchValReg = cg->allocateRegister();4480TR::Register *highPatchValReg = cg->allocateRegister();4481TR::Register *lowExistingValReg = cg->allocateRegister();4482TR::Register *highExistingValReg = cg->allocateRegister();44834484outlinedStartLabel->setStartInternalControlFlow();4485outlinedEndLabel->setEndInternalControlFlow();44864487//generateBoundaryAvoidanceInstruction(TR::X86BoundaryAvoidanceInstruction::CALLImm4AtomicRegions, 8, 8,generateLabelInstruction(TR::InstOpCode::CALLImm4, node, gcMapPatchingLabel, cg), cg);4488TR::Instruction *callInst = generatePatchableCodeAlignmentInstruction(TR::X86PatchableCodeAlignmentInstruction::CALLImm4AtomicRegions, generateLabelInstruction(TR::InstOpCode::CALLImm4, node, gcMapPatchingLabel, cg), cg);4489TR::X86VFPSaveInstruction *vfpSaveInst = generateVFPSaveInstruction(callInst->getPrev(), cg);44904491TR_OutlinedInstructionsGenerator og(gcMapPatchingLabel, node, cg);44924493generateLabelInstruction(TR::InstOpCode::label, node, outlinedStartLabel, cg);4494//Load the address that we are going to patch and clean up the stack4495//4496generateRegInstruction(TR::InstOpCode::POPReg, node, patchableAddrReg, cg);449744984499//check if there is already an async even pending4500//4501generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4, node, SOMmr, -1, cg);4502generateLabelInstruction(TR::InstOpCode::JE4, node, asyncWithoutPatch, cg);45034504//Signal the async event4505//4506generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, stackOverflowMark), cg), -1, cg);4507generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, lowPatchValReg, 1 << comp->getPersistentInfo()->getGCMapCheckEventHandle(), cg);4508generateMemRegInstruction(TR::InstOpCode::LOR4MemReg, node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, asyncEventFlags),cg), lowPatchValReg, cg);45094510//Populate the registers we are going to use in the lock cmp xchg4511//45124513static char *d = feGetEnv("TR_GCOnAsyncBREAK");4514if (d)4515generateInstruction(TR::InstOpCode::INT3, node, cg);45164517//Populate the existing inline code4518//4519generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, lowExistingValReg, generateX86MemoryReference(patchableAddrReg, -5, cg), cg);4520generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, highExistingValReg, generateX86MemoryReference(patchableAddrReg, -1, cg), cg);45214522//Populate the code we are going to patch in4523//837d28ff cmp dword ptr [ebp+28h],0FFFFFFFFh <--- patching in4524//90 nop4525//*******************4526// call imm4 <---- patching over4527//4528generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, lowPatchValReg, (uint32_t) 0x287d8390, cg);4529generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, highPatchValReg, highExistingValReg, cg);4530generateRegImmInstruction(TR::InstOpCode::OR4RegImm4, node, highPatchValReg, (uint32_t) 0x000000ff, cg);45314532TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 6, cg);45334534deps->addPostCondition(patchableAddrReg, TR::RealRegister::edi, cg);4535deps->addPostCondition(lowPatchValReg, TR::RealRegister::ebx, cg);4536deps->addPostCondition(highPatchValReg, TR::RealRegister::ecx, cg);4537deps->addPostCondition(lowExistingValReg, TR::RealRegister::eax, cg);4538deps->addPostCondition(highExistingValReg, TR::RealRegister::edx, cg);4539deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);4540deps->stopAddingConditions();4541generateMemInstruction(TR::InstOpCode::LCMPXCHG8BMem, node, generateX86MemoryReference(patchableAddrReg, -5, cg), deps, cg);4542generateLabelInstruction(TR::InstOpCode::label, node, asyncWithoutPatch, cg);4543generateVFPRestoreInstruction(generateLabelInstruction(TR::InstOpCode::JMP4, node, snippetLabel, cg),vfpSaveInst,cg);45444545cg->stopUsingRegister(patchableAddrReg);4546cg->stopUsingRegister(lowPatchValReg);4547cg->stopUsingRegister(highPatchValReg);4548cg->stopUsingRegister(lowExistingValReg);4549cg->stopUsingRegister(highExistingValReg);4550generateLabelInstruction(TR::InstOpCode::label, node, outlinedEndLabel, cg);45514552og.endOutlinedInstructionSequence();4553}4554}45554556void J9::X86::TreeEvaluator::inlineRecursiveMonitor(TR::Node *node,4557TR::CodeGenerator *cg,4558TR::LabelSymbol *fallThruLabel,4559TR::LabelSymbol *jitMonitorEnterOrExitSnippetLabel,4560TR::LabelSymbol *inlineRecursiveSnippetLabel,4561TR::Register *objectReg,4562int lwOffset,4563TR::LabelSymbol *snippetRestartLabel,4564bool reservingLock)4565{4566//Code generated:4567// mov lockWordReg, [obj+lwOffset]4568// add lockWordReg, INC_DEC_VALUE/-INC_DEC_VALUE ---> lock word with increased recursive count4569// mov lockWordMaskedReg, NON_INC_DEC_MASK4570// and lockWordMaskedReg, lockWordReg ---> lock word masked out counter bits4571// cmp lockWordMaskedReg, ebp4572// jne jitMonitorEnterOrExitSnippetLabel4573// mov [obj+lwOffset], lockWordReg4574// jmp fallThruLabel45754576TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());4577TR::LabelSymbol *outlinedStartLabel = generateLabelSymbol(cg);4578TR::LabelSymbol *outlinedEndLabel = generateLabelSymbol(cg);45794580outlinedStartLabel->setStartInternalControlFlow();4581outlinedEndLabel->setEndInternalControlFlow();45824583TR_OutlinedInstructionsGenerator og(inlineRecursiveSnippetLabel, node, cg);45844585generateLabelInstruction(TR::InstOpCode::label, node, outlinedStartLabel, cg);4586TR::Register *lockWordReg = cg->allocateRegister();4587TR::Register *lockWordMaskedReg = cg->allocateRegister();4588TR::Register *vmThreadReg = cg->getVMThreadRegister();4589bool use64bitOp = cg->comp()->target().is64Bit() && !fej9->generateCompressedLockWord();4590bool isMonitorEnter = node->getSymbolReference() == cg->comp()->getSymRefTab()->findOrCreateMethodMonitorEntrySymbolRef(NULL)4591|| node->getSymbolReference() == cg->comp()->getSymRefTab()->findOrCreateMonitorEntrySymbolRef(NULL);45924593generateRegMemInstruction(TR::InstOpCode::LRegMem(use64bitOp), node, lockWordReg, generateX86MemoryReference(objectReg, lwOffset, cg), cg);4594generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(use64bitOp), node, lockWordReg, isMonitorEnter? INC_DEC_VALUE: -INC_DEC_VALUE, cg);4595generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(use64bitOp), node, lockWordMaskedReg, NON_INC_DEC_MASK - RES_BIT, cg);4596generateRegRegInstruction(TR::InstOpCode::ANDRegReg(use64bitOp), node, lockWordMaskedReg, lockWordReg, cg);4597generateRegRegInstruction(TR::InstOpCode::CMPRegReg(use64bitOp), node, lockWordMaskedReg, vmThreadReg, cg);45984599generateLabelInstruction(TR::InstOpCode::JNE4, node, jitMonitorEnterOrExitSnippetLabel, cg);4600generateMemRegInstruction(TR::InstOpCode::SMemReg(use64bitOp), node, generateX86MemoryReference(objectReg, lwOffset, cg), lockWordReg, cg);46014602TR::RegisterDependencyConditions *restartDeps = generateRegisterDependencyConditions((uint8_t)0, 4, cg);4603restartDeps->addPostCondition(objectReg, TR::RealRegister::NoReg, cg);4604restartDeps->addPostCondition(vmThreadReg, TR::RealRegister::ebp, cg);4605restartDeps->addPostCondition(lockWordMaskedReg, TR::RealRegister::NoReg, cg);4606restartDeps->addPostCondition(lockWordReg, TR::RealRegister::NoReg, cg);4607restartDeps->stopAddingConditions();4608generateLabelInstruction(TR::InstOpCode::label, node, snippetRestartLabel, restartDeps, cg);46094610generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThruLabel, cg);46114612cg->stopUsingRegister(lockWordReg);4613cg->stopUsingRegister(lockWordMaskedReg);46144615TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 1, cg);4616deps->addPostCondition(vmThreadReg, TR::RealRegister::ebp, cg);4617deps->stopAddingConditions();4618generateLabelInstruction(TR::InstOpCode::label, node, outlinedEndLabel, deps, cg);46194620og.endOutlinedInstructionSequence();4621}46224623void J9::X86::TreeEvaluator::transactionalMemoryJITMonitorEntry(TR::Node *node,4624TR::CodeGenerator *cg,4625TR::LabelSymbol *startLabel,4626TR::LabelSymbol *snippetLabel,4627TR::LabelSymbol *JITMonitorEnterSnippetLabel,4628TR::Register *objectReg,4629int lwOffset)46304631{4632TR::LabelSymbol *txJITMonitorEntryLabel = snippetLabel;4633TR::LabelSymbol *outlinedStartLabel = generateLabelSymbol(cg);4634TR::LabelSymbol *outlinedEndLabel = generateLabelSymbol(cg);46354636outlinedStartLabel->setStartInternalControlFlow();4637outlinedEndLabel->setEndInternalControlFlow();46384639TR_OutlinedInstructionsGenerator og(txJITMonitorEntryLabel, node, cg);46404641TR::Register *counterReg = cg->allocateRegister();4642generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, counterReg, 1024, cg);4643TR::LabelSymbol *spinLabel = outlinedStartLabel;4644generateLabelInstruction(TR::InstOpCode::label, node, spinLabel, cg);46454646generateInstruction(TR::InstOpCode::PAUSE, node, cg);4647generateRegInstruction(TR::InstOpCode::DEC4Reg, node, counterReg, cg); // might need to consider 32bits later4648generateLabelInstruction(TR::InstOpCode::JE4, node, JITMonitorEnterSnippetLabel, cg);4649TR::MemoryReference *objLockRef = generateX86MemoryReference(objectReg, lwOffset, cg);4650generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4, node, objLockRef, 0, cg);4651generateLabelInstruction(TR::InstOpCode::JNE4, node, spinLabel, cg);4652generateLabelInstruction(TR::InstOpCode::JMP4, node, startLabel, cg);46534654TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 1, cg);4655deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);4656deps->stopAddingConditions();4657generateLabelInstruction(TR::InstOpCode::label, node, outlinedEndLabel, cg);46584659cg->stopUsingRegister(counterReg);46604661og.endOutlinedInstructionSequence();4662}46634664void4665J9::X86::TreeEvaluator::generateCheckForValueMonitorEnterOrExit(4666TR::Node *node,4667int32_t classFlag,4668TR::LabelSymbol *snippetLabel,4669TR::CodeGenerator *cg)4670{4671TR::Register *objectReg = cg->evaluate(node->getFirstChild());4672TR::Register *j9classReg = cg->allocateRegister();4673generateLoadJ9Class(node, j9classReg, objectReg, cg);4674auto fej9 = (TR_J9VMBase *)(cg->fe());4675TR::MemoryReference *classFlagsMR = generateX86MemoryReference(j9classReg, (uintptr_t)(fej9->getOffsetOfClassFlags()), cg);46764677TR::InstOpCode::Mnemonic testOpCode;4678if ((uint32_t)classFlag <= USHRT_MAX)4679testOpCode = TR::InstOpCode::TEST2MemImm2;4680else4681testOpCode = TR::InstOpCode::TEST4MemImm4;46824683generateMemImmInstruction(testOpCode, node, classFlagsMR, classFlag, cg);4684generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);4685}46864687TR::Register *4688J9::X86::TreeEvaluator::VMmonentEvaluator(4689TR::Node *node,4690TR::CodeGenerator *cg)4691{4692// If there is a NULLCHK above this node it will be expecting us to set4693// up the excepting instruction. If we are not going to inline an4694// appropriate excepting instruction we must make sure to reset the4695// excepting instruction since our children may have set it.4696//4697TR::Compilation *comp = cg->comp();4698TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());4699static const char *noInline = feGetEnv("TR_NoInlineMonitor");4700static const char *firstMonEnt = feGetEnv("TR_FirstMonEnt");4701static int32_t monEntCount = 0;4702bool reservingLock = false;4703bool normalLockPreservingReservation = false;4704bool dummyMethodMonitor = false;4705TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);4706static const char *doCmpFirst = feGetEnv("TR_AddCMPBeforeCMPXCHG");47074708int lwOffset = fej9->getByteOffsetToLockword((TR_OpaqueClassBlock *) cg->getMonClass(node));4709if (comp->getOption(TR_MimicInterpreterFrameShape) ||4710(comp->getOption(TR_FullSpeedDebug) && node->isSyncMethodMonitor()) ||4711noInline ||4712(isMonitorValueBasedOrValueType == TR_yes) ||4713comp->getOption(TR_DisableInlineMonEnt) ||4714(firstMonEnt && (*firstMonEnt-'0') > monEntCount++))4715{4716// Don't inline4717//4718TR::ILOpCodes opCode = node->getOpCodeValue();4719TR::Node::recreate(node, TR::call);4720TR::TreeEvaluator::directCallEvaluator(node, cg);4721TR::Node::recreate(node, opCode);4722cg->setImplicitExceptionPoint(NULL);4723return NULL;4724}47254726if (lwOffset > 0 && comp->getOption(TR_ReservingLocks))4727{4728bool dummy=false;4729TR::TreeEvaluator::evaluateLockForReservation (node, &reservingLock, &normalLockPreservingReservation, cg);4730TR::TreeEvaluator::isPrimitiveMonitor (node, cg);47314732if (node->isPrimitiveLockedRegion() && reservingLock)4733dummyMethodMonitor = TR::TreeEvaluator::isDummyMonitorEnter(node, cg);47344735if (reservingLock && !node->isPrimitiveLockedRegion())4736dummyMethodMonitor = false;4737}47384739TR::Node *objectRef = node->getFirstChild();47404741static const char *disableInlineRecursiveEnv = feGetEnv("TR_DisableInlineRecursiveMonitor");4742bool inlineRecursive = disableInlineRecursiveEnv ? false : true;4743if (comp->getOption(TR_X86HLE) || lwOffset <= 0)4744inlineRecursive = false;47454746// Evaluate the object reference4747//4748TR::Register *objectReg = cg->evaluate(objectRef);4749TR::Register *eaxReal = cg->allocateRegister();4750TR::Register *scratchReg = NULL;4751uint32_t numDeps = 3; // objectReg, eax, ebp47524753generatePrefetchAfterHeaderAccess (node, objectReg, cg);47544755cg->setImplicitExceptionPoint(NULL);47564757TR::LabelSymbol *startLabel = generateLabelSymbol(cg);4758TR::LabelSymbol *fallThru = generateLabelSymbol(cg);4759TR::LabelSymbol *snippetFallThru = inlineRecursive ? generateLabelSymbol(cg) : fallThru;47604761startLabel->setStartInternalControlFlow();4762fallThru->setEndInternalControlFlow();4763generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);47644765TR::Register *vmThreadReg = cg->getVMThreadRegister();47664767TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);4768TR::LabelSymbol *monitorLookupCacheLabel = generateLabelSymbol(cg);4769TR::LabelSymbol *fallThruFromMonitorLookupCacheLabel = generateLabelSymbol(cg);4770TR::LabelSymbol *exitLabel = NULL;47714772TR_OutlinedInstructions *outlinedHelperCall;4773// In the reserving lock case below, we change the symref on the node... Here, we are going to store the original symref, so that we can restore our change.4774TR::SymbolReference *originalNodeSymRef = NULL;47754776TR::Node *helperCallNode = node;47774778if (isMonitorValueBasedOrValueType == TR_maybe)4779TR::TreeEvaluator::generateCheckForValueMonitorEnterOrExit(node, J9_CLASS_DISALLOWS_LOCKING_FLAGS, snippetLabel, cg);47804781if (comp->getOption(TR_ReservingLocks))4782{4783// About to change the node's symref... store the original.4784originalNodeSymRef = node->getSymbolReference();47854786if (reservingLock && node->isPrimitiveLockedRegion() && dummyMethodMonitor)4787{4788if (node->getSymbolReference() == cg->getSymRef(TR_methodMonitorEntry))4789node->setSymbolReference(comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_AMD64JitMethodMonitorExitReservedPrimitive, true, true, true));4790else4791node->setSymbolReference(comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_AMD64JitMonitorExitReservedPrimitive, true, true, true));47924793exitLabel = generateLabelSymbol(cg);4794TR_OutlinedInstructions *outlinedExitHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, exitLabel, fallThru, cg);4795cg->getOutlinedInstructionsList().push_front(outlinedExitHelperCall);4796}47974798TR_RuntimeHelper helper;4799bool success = TR::TreeEvaluator::monEntryExitHelper(true, node, reservingLock, normalLockPreservingReservation, helper, cg);4800if (success)4801node->setSymbolReference(comp->getSymRefTab()->findOrCreateRuntimeHelper(helper, true, true, true));48024803if (reservingLock)4804{4805uint32_t reservableLwValue = RES_BIT;4806if (TR::Options::_aggressiveLockReservation)4807reservableLwValue = 0;48084809// Make this integer the same size as the lock word. If we always4810// passed a 32-bit value, then on 64-bit with an uncompressed lock4811// word, the helper would have to either zero-extend the value, or4812// rely on the caller having done so even though the calling4813// convention doesn't appear to require it.4814TR::Node *reservableLwNode = NULL;4815if (cg->comp()->target().is32Bit() || fej9->generateCompressedLockWord())4816reservableLwNode = TR::Node::iconst(node, reservableLwValue);4817else4818reservableLwNode = TR::Node::lconst(node, reservableLwValue);48194820helperCallNode = TR::Node::create(4821node,4822TR::call,48232,4824objectRef,4825reservableLwNode);48264827helperCallNode->setSymbolReference(node->getSymbolReference());4828helperCallNode->incReferenceCount();4829}4830}48314832if (cg->comp()->target().is64Bit() && cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_HLE) && comp->getOption(TR_X86HLE))4833{4834TR::LabelSymbol *JITMonitorEntrySnippetLabel = generateLabelSymbol(cg);4835TR::TreeEvaluator::transactionalMemoryJITMonitorEntry(node, cg, startLabel, snippetLabel, JITMonitorEntrySnippetLabel, objectReg, lwOffset);4836outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(helperCallNode, TR::call, NULL,4837JITMonitorEntrySnippetLabel, (exitLabel) ? exitLabel : fallThru, cg);4838}4839else4840outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(helperCallNode, TR::call, NULL,4841snippetLabel, (exitLabel) ? exitLabel : snippetFallThru, cg);48424843if (helperCallNode != node)4844helperCallNode->recursivelyDecReferenceCount();48454846cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);4847cg->generateDebugCounter(4848outlinedHelperCall->getFirstInstruction(),4849TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),48501, TR::DebugCounter::Cheap);48514852// Okay, and we've made it down here and we've successfully generated all outlined snippets, let's restore the node's symref.4853if (comp->getOption(TR_ReservingLocks))4854{4855node->setSymbolReference(originalNodeSymRef);4856}48574858if (inlineRecursive)4859{4860TR::LabelSymbol *inlineRecursiveSnippetLabel = generateLabelSymbol(cg);4861TR::LabelSymbol *jitMonitorEnterSnippetLabel = snippetLabel;4862snippetLabel = inlineRecursiveSnippetLabel;4863TR::TreeEvaluator::inlineRecursiveMonitor(node, cg, fallThru, jitMonitorEnterSnippetLabel, inlineRecursiveSnippetLabel, objectReg, lwOffset, snippetFallThru, reservingLock);4864}48654866// Compare the monitor slot in the object against zero. If it succeeds4867// we are done. Else call the helper.4868// Code generated:4869// xor eax, eax4870// cmpxchg monitor(objectReg), ebp4871// jne snippet4872// label restartLabel4873//4874// Code generated for read monitor enter:4875// xor eax, eax4876// mov lockedReg, INC_DEC_VALUE (0x04)4877// cmpxchg monitor(objectReg), lockedReg4878// jne snippet4879// label restartLabel4880//4881TR::Register *lockedReg = NULL;4882TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad;48834884if (cg->comp()->target().is64Bit() && !fej9->generateCompressedLockWord())4885{4886op = cg->comp()->target().isSMP() ? TR::InstOpCode::LCMPXCHG8MemReg : TR::InstOpCode::CMPXCHG8MemReg;4887if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_HLE) && comp->getOption(TR_X86HLE))4888op = cg->comp()->target().isSMP() ? TR::InstOpCode::XALCMPXCHG8MemReg : TR::InstOpCode::XACMPXCHG8MemReg;4889}4890else4891{4892op = cg->comp()->target().isSMP() ? TR::InstOpCode::LCMPXCHG4MemReg : TR::InstOpCode::CMPXCHG4MemReg;4893if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_HLE) && comp->getOption(TR_X86HLE))4894op = cg->comp()->target().isSMP() ? TR::InstOpCode::XALCMPXCHG4MemReg : TR::InstOpCode::XACMPXCHG4MemReg;4895}48964897TR::Register *objectClassReg = NULL;4898TR::Register *lookupOffsetReg = NULL;48994900if (lwOffset <= 0)4901{4902TR::MemoryReference *objectClassMR = generateX86MemoryReference(objectReg, TMP_OFFSETOF_J9OBJECT_CLAZZ, cg);4903objectClassReg = cg->allocateRegister();4904numDeps++;4905TR::X86RegMemInstruction *instr;4906if (TR::Compiler->om.compressObjectReferences())4907instr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, objectClassReg, objectClassMR, cg);4908else4909instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, objectClassMR, cg);4910// This instruction may try to dereference a null memory address4911// add an implicit exception point for it.4912//4913cg->setImplicitExceptionPoint(instr);4914instr->setNeedsGCMap(0xFF00FFFF);49154916TR::TreeEvaluator::generateVFTMaskInstruction(node, objectClassReg, cg);4917int32_t offsetOfLockOffset = offsetof(J9Class, lockOffset);4918generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, generateX86MemoryReference(objectClassReg, offsetOfLockOffset, cg), cg);4919generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, objectClassReg, 0, cg);49204921generateCommonLockNurseryCodes(4922node,4923cg,4924true, //true for VMmonentEvaluator, false for VMmonexitEvaluator4925monitorLookupCacheLabel,4926fallThruFromMonitorLookupCacheLabel,4927snippetLabel,4928numDeps,4929lwOffset,4930objectClassReg,4931lookupOffsetReg,4932vmThreadReg,4933objectReg);4934}49354936if (comp->getOption(TR_ReservingLocks) && reservingLock)4937{4938TR::LabelSymbol *mismatchLabel = NULL;4939if (TR::Options::_aggressiveLockReservation)4940mismatchLabel = snippetLabel;4941else4942mismatchLabel = generateLabelSymbol(cg);49434944#if defined(TRACE_LOCK_RESERVATION)4945{4946auto cds = cg->findOrCreate4ByteConstant(node, (int)node);4947TR::MemoryReference *tempMR = generateX86MemoryReference(cds, cg);49484949TR::X86MemImmInstruction * instr;4950if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())4951{4952generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, eaxReal, eaxReal, cg); // Zero out eaxReal4953instr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, eaxReal, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg);4954}4955else4956instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg);49574958cg->setImplicitExceptionPoint(instr);4959instr->setNeedsGCMap(0xFF00FFFF);49604961TR::SymbolReference *tempRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);4962TR::MemoryReference *tempMR1 = generateX86MemoryReference(tempRef, cg);49634964generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR, eaxReal, cg);4965generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR1, eaxReal, cg);49664967auto cds1 = cg->findOrCreate4ByteConstant(node, (int)node+2);4968TR::MemoryReference *tempMR3 = generateX86MemoryReference(cds1, cg);4969TR::SymbolReference *tempRef2 = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);4970TR::MemoryReference *tempMR2 = generateX86MemoryReference(tempRef2, cg);49714972generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR3, objectReg, cg);4973generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR2, objectReg, cg);49744975scratchReg = cg->allocateRegister();4976numDeps++;4977TR::TreeEvaluator::generateValueTracingCode (node, vmThreadReg, scratchReg, objectReg, eaxReal, cg);4978}4979#endif49804981generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, eaxReal, generateX86MemoryReference(vmThreadReg, RES_BIT, cg), cg);49824983TR::X86MemRegInstruction * instr;4984if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())4985{4986// Use TR::InstOpCode::CMP4RegMem instead of TR::InstOpCode::CMPRegMem(...).4987instr = generateMemRegInstruction(TR::InstOpCode::CMP4MemReg, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), eaxReal, cg);4988}4989else4990instr = generateMemRegInstruction(TR::InstOpCode::CMPMemReg(cg->comp()->target().is64Bit()), node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), eaxReal, cg);49914992cg->setImplicitExceptionPoint(instr);4993instr->setNeedsGCMap(0xFF00FFFF);49944995generateLabelInstruction(TR::InstOpCode::JNE4, node, mismatchLabel, cg);49964997if (!node->isPrimitiveLockedRegion())4998{4999if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())5000{5001// Use ADD4memImms instead of TR::InstOpCode::ADDMemImms5002generateMemImmInstruction(TR::InstOpCode::ADD4MemImms, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), REC_BIT, cg);5003}5004else5005generateMemImmInstruction(TR::InstOpCode::ADDMemImms(), node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), REC_BIT, cg);5006}50075008if (!TR::Options::_aggressiveLockReservation)5009{5010// Jump over the non-reservable path5011generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);50125013// It's possible that the lock may be available, but not reservable. In5014// that case we should try the usual cmpxchg for non-reserving enter.5015// Otherwise we'll necessarily call the helper.5016generateLabelInstruction(TR::InstOpCode::label, node, mismatchLabel, cg);50175018TR::InstOpCode::Mnemonic cmpOp = TR::InstOpCode::CMPMemImms();5019if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())5020cmpOp = TR::InstOpCode::CMP4MemImms;50215022auto lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);5023generateMemImmInstruction(cmpOp, node, lwMR, 0, cg);5024generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);5025generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, eaxReal, eaxReal, cg);5026lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);5027generateMemRegInstruction(op, node, lwMR, vmThreadReg, cg);5028generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);5029}5030}5031else5032{5033if (TR::Options::_aggressiveLockReservation)5034{5035if (comp->getOption(TR_ReservingLocks) && normalLockPreservingReservation)5036{5037TR::X86MemImmInstruction * instr;5038if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())5039instr = generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);5040else5041instr = generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);5042cg->setImplicitExceptionPoint(instr);5043instr->setNeedsGCMap(0xFF00FFFF);50445045generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);5046}50475048generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, eaxReal, eaxReal, cg);5049}5050else if (!comp->getOption(TR_ReservingLocks))5051{5052generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, eaxReal, eaxReal, cg);5053}5054else5055{5056TR::InstOpCode::Mnemonic loadOp = TR::InstOpCode::LRegMem();5057TR::InstOpCode::Mnemonic testOp = TR::InstOpCode::TESTRegImm4();5058if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())5059{5060loadOp = TR::InstOpCode::L4RegMem;5061testOp = TR::InstOpCode::TEST4RegImm4;5062}50635064auto lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);5065auto instr = generateRegMemInstruction(loadOp, node, eaxReal, lwMR, cg);5066cg->setImplicitExceptionPoint(instr);5067instr->setNeedsGCMap(0xFF00FFFF);50685069generateRegImmInstruction(testOp, node, eaxReal, (int32_t)~RES_BIT, cg);5070generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);5071}50725073if (doCmpFirst &&5074!comesFromClassLib(node, comp))5075{5076TR::X86MemImmInstruction * instr;5077if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())5078instr = generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);5079else5080instr = generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);50815082cg->setImplicitExceptionPoint(instr);5083instr->setNeedsGCMap(0xFF00FFFF);50845085generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);5086}50875088if (node->isReadMonitor())5089{5090lockedReg = cg->allocateRegister();5091if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())5092generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, lockedReg, lockedReg, cg); //After lockedReg is allocated zero it out.5093generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, lockedReg, INC_DEC_VALUE, cg);5094++numDeps;5095}5096else5097{5098#if defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)5099// need to get monitor from cache, if we can5100lockedReg = cg->allocateRegister();5101numDeps++;5102generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lockedReg,5103generateX86MemoryReference(vmThreadReg, fej9->thisThreadMonitorCacheOffset(), cg), cg);5104generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, lockedReg, lockedReg, cg);5105generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);51065107#else5108bool conditionallyReserve = false;5109bool shouldConditionallyReserveForReservableClasses =5110comp->getOption(TR_ReservingLocks)5111&& !TR::Options::_aggressiveLockReservation5112&& lwOffset > 05113&& cg->getMonClass(node) != NULL;51145115if (shouldConditionallyReserveForReservableClasses)5116{5117TR_PersistentClassInfo *monClassInfo = comp5118->getPersistentInfo()5119->getPersistentCHTable()5120->findClassInfoAfterLocking(cg->getMonClass(node), comp);51215122if (monClassInfo != NULL && monClassInfo->isReservable())5123conditionallyReserve = true;5124}51255126if (!conditionallyReserve)5127{5128// we want to write thread reg into lock word5129lockedReg = vmThreadReg;5130}5131else5132{5133lockedReg = cg->allocateRegister();5134numDeps++;51355136// Compute the value to put into the lock word based on the5137// current value, which is either 0 or RES_BIT ("reservable").5138//5139// 0 ==> vmThreadReg5140// RES_BIT ==> vmThreadReg | RES_BIT | INC_DEC_VALUE5141//5142// For reservable locks, failure to reserve at this point would5143// prevent any future reservation of the same lock.51445145bool b64 = cg->comp()->target().is64Bit() && !fej9->generateCompressedLockWord();5146generateRegRegInstruction(TR::InstOpCode::MOVRegReg(b64), node, lockedReg, eaxReal, cg);5147generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(b64), node, lockedReg, RES_BIT_POSITION, cg);5148generateRegInstruction(TR::InstOpCode::NEGReg(b64), node, lockedReg, cg);5149generateRegImmInstruction(TR::InstOpCode::ANDRegImms(b64), node, lockedReg, RES_BIT | INC_DEC_VALUE, cg);5150generateRegRegInstruction(TR::InstOpCode::ADDRegReg(b64), node, lockedReg, vmThreadReg, cg);5151}5152#endif5153}51545155// try to swap into lock word5156TR::X86MemRegInstruction *instr = generateMemRegInstruction(op, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), lockedReg, cg);5157cg->setImplicitExceptionPoint(instr);5158instr->setNeedsGCMap(0xFF00FFFF);51595160generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);5161}51625163// Create dependencies for the registers used.5164// The dependencies must be in the order:5165// objectReg, eaxReal, vmThreadReg5166// since the snippet needs to find them to grab the real registers from them.5167//5168TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)numDeps, cg);5169deps->addPostCondition(objectReg, TR::RealRegister::NoReg, cg);5170deps->addPostCondition(eaxReal, TR::RealRegister::eax, cg);5171deps->addPostCondition(vmThreadReg, TR::RealRegister::ebp, cg);51725173if (scratchReg)5174deps->addPostCondition(scratchReg, TR::RealRegister::NoReg, cg);51755176if (lockedReg != NULL && lockedReg != vmThreadReg)5177{5178deps->addPostCondition(lockedReg, TR::RealRegister::NoReg, cg);5179}51805181if (objectClassReg)5182deps->addPostCondition(objectClassReg, TR::RealRegister::NoReg, cg);51835184if (lookupOffsetReg)5185deps->addPostCondition(lookupOffsetReg, TR::RealRegister::NoReg, cg);51865187deps->stopAddingConditions();51885189#if defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)5190// our lock is in the object, now need to advance to next monitor in cache5191generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lockedReg,5192generateX86MemoryReference(lockedReg, fej9->getMonitorNextOffset(), cg), cg);5193generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,5194generateX86MemoryReference(vmThreadReg, fej9->thisThreadMonitorCacheOffset(), cg),5195lockedReg, cg);5196#endif51975198generateLabelInstruction(TR::InstOpCode::label, node, fallThru, deps, cg);51995200#if defined(TRACE_LOCK_RESERVATION)5201{5202auto cds = cg->findOrCreate4ByteConstant(node, (int)node+1);5203TR::MemoryReference *tempMR = generateX86MemoryReference(cds, cg);52045205TR::X86RegMemInstruction *instr;5206if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())5207instr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, eaxReal, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg)5208else5209instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg);52105211cg->setImplicitExceptionPoint(instr);5212instr->setNeedsGCMap(0xFF00FFFF);52135214TR::SymbolReference *tempRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);5215TR::MemoryReference *tempMR1 = generateX86MemoryReference(tempRef, cg);52165217generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR, eaxReal, cg);5218generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR1, eaxReal, cg);52195220auto cds1 = cg->findOrCreate4ByteConstant(node, (int)node+2);5221TR::MemoryReference *tempMR3 = generateX86MemoryReference(cds1, cg);5222TR::SymbolReference *tempRef2 = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);5223TR::MemoryReference *tempMR2 = generateX86MemoryReference(tempRef2, cg);52245225generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR3, objectReg, cg);5226generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR2, objectReg, cg);5227}5228#endif52295230cg->decReferenceCount(objectRef);5231cg->stopUsingRegister(eaxReal);5232if (scratchReg)5233cg->stopUsingRegister(scratchReg);5234if (objectClassReg)5235cg->stopUsingRegister(objectClassReg);5236if (lookupOffsetReg)5237cg->stopUsingRegister(lookupOffsetReg);52385239if (lockedReg != NULL && lockedReg != vmThreadReg)5240{5241cg->stopUsingRegister(lockedReg);5242}52435244return NULL;5245}524652475248void J9::X86::TreeEvaluator::generateValueTracingCode(5249TR::Node *node,5250TR::Register *vmThreadReg,5251TR::Register *scratchReg,5252TR::Register *valueReg,5253TR::CodeGenerator *cg)5254{5255if (!cg->comp()->getOption(TR_EnableValueTracing))5256return;5257// the code requires that the caller has vmThread in EBP as well as5258// that the caller has already setup internal control flow5259uint32_t vmThreadBase = offsetof(J9VMThread, debugEventData6);5260uint32_t vmThreadTop = offsetof(J9VMThread, debugEventData4);5261uint32_t vmThreadCursor = offsetof(J9VMThread, debugEventData5);5262TR::LabelSymbol *endLabel = generateLabelSymbol(cg);52635264generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, scratchReg, generateX86MemoryReference(vmThreadReg, vmThreadCursor, cg), cg);5265generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, scratchReg, 8, cg);52665267generateMemRegInstruction(TR::InstOpCode::CMPMemReg(), node, generateX86MemoryReference(vmThreadReg, vmThreadTop, cg), scratchReg, cg);5268generateLabelInstruction(TR::InstOpCode::JG4, node, endLabel, cg);5269generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, scratchReg, generateX86MemoryReference(vmThreadReg, vmThreadBase, cg), cg);5270generateLabelInstruction(TR::InstOpCode::label, node, endLabel, cg);5271generateMemImmInstruction(TR::InstOpCode::SMemImm4(), node, generateX86MemoryReference(scratchReg, 0, cg), node->getOpCodeValue(), cg);5272generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(scratchReg, 0, cg), valueReg, cg);5273generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(vmThreadReg, vmThreadCursor, cg), scratchReg, cg);5274}52755276void J9::X86::TreeEvaluator::generateValueTracingCode(5277TR::Node *node,5278TR::Register *vmThreadReg,5279TR::Register *scratchReg,5280TR::Register *valueRegHigh,5281TR::Register *valueRegLow,5282TR::CodeGenerator *cg)5283{5284if (!cg->comp()->getOption(TR_EnableValueTracing))5285return;52865287// the code requires that the caller has vmThread in EBP as well as5288// that the caller has already setup internal control flow5289uint32_t vmThreadBase = offsetof(J9VMThread, debugEventData6);5290uint32_t vmThreadTop = offsetof(J9VMThread, debugEventData4);5291uint32_t vmThreadCursor = offsetof(J9VMThread, debugEventData5);5292TR::LabelSymbol *endLabel = generateLabelSymbol(cg);52935294generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, scratchReg, generateX86MemoryReference(vmThreadReg, vmThreadCursor, cg), cg);5295generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, scratchReg, 0x10, cg);52965297generateMemRegInstruction(TR::InstOpCode::CMPMemReg(), node, generateX86MemoryReference(vmThreadReg, vmThreadTop, cg), scratchReg, cg);5298generateLabelInstruction(TR::InstOpCode::JG4, node, endLabel, cg);5299generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, scratchReg, generateX86MemoryReference(vmThreadReg, vmThreadBase, cg), cg);5300generateLabelInstruction(TR::InstOpCode::label, node, endLabel, cg);5301generateMemImmInstruction(TR::InstOpCode::SMemImm4(), node, generateX86MemoryReference(scratchReg, 0, cg), node->getOpCodeValue(), cg);5302generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(scratchReg, 4, cg), valueRegHigh, cg);5303generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(scratchReg, 8, cg), valueRegLow, cg);5304generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, valueRegLow, generateX86MemoryReference(valueRegHigh, 0, cg), cg);5305generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(scratchReg, 0xc, cg), valueRegLow, cg);5306generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(vmThreadReg, vmThreadCursor, cg), scratchReg, cg);5307}53085309TR::Register5310*J9::X86::TreeEvaluator::VMmonexitEvaluator(5311TR::Node *node,5312TR::CodeGenerator *cg)5313{5314// If there is a NULLCHK above this node it will be expecting us to set5315// up the excepting instruction. If we are not going to inline an5316// appropriate excepting instruction we must make sure to reset the5317// excepting instruction since our children may have set it.5318//5319TR::Compilation *comp = cg->comp();5320TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());5321static const char *noInline = feGetEnv("TR_NoInlineMonitor");5322static const char *firstMonExit = feGetEnv("TR_FirstMonExit");5323static int32_t monExitCount = 0;5324bool reservingLock = false;5325bool normalLockPreservingReservation = false;5326bool dummyMethodMonitor = false;5327bool gen64BitInstr = cg->comp()->target().is64Bit() && !fej9->generateCompressedLockWord();5328int lwOffset = fej9->getByteOffsetToLockword((TR_OpaqueClassBlock *) cg->getMonClass(node));5329TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);53305331if ((comp->getOption(TR_MimicInterpreterFrameShape) /*&& !comp->getOption(TR_EnableLiveMonitorMetadata)*/) ||5332noInline ||5333(isMonitorValueBasedOrValueType == TR_yes) ||5334comp->getOption(TR_DisableInlineMonExit) ||5335(firstMonExit && (*firstMonExit-'0') > monExitCount++))5336{5337// Don't inline5338//5339TR::ILOpCodes opCode = node->getOpCodeValue();5340TR::Node::recreate(node, TR::call);5341TR::TreeEvaluator::directCallEvaluator(node, cg);5342TR::Node::recreate(node, opCode);5343cg->setImplicitExceptionPoint(NULL);5344return NULL;5345}53465347if (lwOffset > 0 && comp->getOption(TR_ReservingLocks))5348{5349bool dummy=false;5350TR::TreeEvaluator::evaluateLockForReservation (node, &reservingLock, &normalLockPreservingReservation, cg);5351if (node->isPrimitiveLockedRegion() && reservingLock)5352dummyMethodMonitor = TR::TreeEvaluator::isDummyMonitorExit(node, cg);53535354if (!node->isPrimitiveLockedRegion() && reservingLock)5355dummyMethodMonitor = false;5356}53575358if (dummyMethodMonitor)5359{5360cg->decReferenceCount(node->getFirstChild());5361return NULL;5362}53635364static const char *disableInlineRecursiveEnv = feGetEnv("TR_DisableInlineRecursiveMonitor");5365bool inlineRecursive = disableInlineRecursiveEnv ? false : true;5366if (comp->getOption(TR_X86HLE) || lwOffset <= 0)5367inlineRecursive = false;53685369// Evaluate the object reference5370//5371TR::Node *objectRef = node->getFirstChild();5372TR::Register *objectReg = cg->evaluate(objectRef);5373TR::Register *tempReg = NULL;5374uint32_t numDeps = 2; // objectReg, ebp53755376cg->setImplicitExceptionPoint(NULL);5377TR::Register *vmThreadReg = cg->getVMThreadRegister();53785379TR::LabelSymbol *startLabel = generateLabelSymbol(cg);5380TR::LabelSymbol *fallThru = generateLabelSymbol(cg);5381// Create the monitor exit snippet5382TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);53835384if (isMonitorValueBasedOrValueType == TR_maybe)5385TR::TreeEvaluator::generateCheckForValueMonitorEnterOrExit(node, J9_CLASS_DISALLOWS_LOCKING_FLAGS, snippetLabel, cg);53865387#if !defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)5388// Now that the object reference has been generated, see if this is the end5389// of a small synchronized block.5390// The definition of "small" depends on the method hotness and is measured5391// in instructions.5392// The following method makes use of the fact that the body of the sync5393// block has been generated but the monitor exit hasn't yet.5394//5395int32_t maxInstructions;5396TR_Hotness hotness = comp->getMethodHotness();5397if (hotness == scorching) maxInstructions = 30;5398else if (hotness == hot) maxInstructions = 20;5399else maxInstructions = 10;5400#endif54015402startLabel->setStartInternalControlFlow();5403TR::LabelSymbol *snippetFallThru = inlineRecursive ? generateLabelSymbol(cg): fallThru;5404fallThru->setEndInternalControlFlow();5405generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);54065407TR::Register *eaxReal = 0;5408TR::Register *unlockedReg = 0;5409TR::Register *scratchReg = 0;54105411TR::Register *objectClassReg = NULL;5412TR::Register *lookupOffsetReg = NULL;54135414if (lwOffset <= 0)5415{5416TR::MemoryReference *objectClassMR = generateX86MemoryReference(objectReg, TMP_OFFSETOF_J9OBJECT_CLAZZ, cg);5417objectClassReg = cg->allocateRegister();5418TR::Instruction *instr = NULL;5419if (TR::Compiler->om.compressObjectReferences())5420instr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, objectClassReg, objectClassMR, cg);5421else5422instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, objectClassMR, cg);5423//this instruction may try to dereference a null memory address5424//add an implicit exception point for it.5425cg->setImplicitExceptionPoint(instr);5426instr->setNeedsGCMap(0xFF00FFFF);54275428TR::TreeEvaluator::generateVFTMaskInstruction(node, objectClassReg, cg);5429int32_t offsetOfLockOffset = offsetof(J9Class, lockOffset);5430generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, generateX86MemoryReference(objectClassReg, offsetOfLockOffset, cg), cg);54315432numDeps++;5433}54345435TR::LabelSymbol *monitorLookupCacheLabel = generateLabelSymbol(cg);5436TR::LabelSymbol *fallThruFromMonitorLookupCacheLabel = generateLabelSymbol(cg);54375438#if defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)5439TR::LabelSymbol *decCountLabel = generateLabelSymbol(cg);54405441unlockedReg = cg->allocateRegister();5442tempReg = cg->allocateRegister();5443eaxReal = cg->allocateRegister();54445445numDeps += 3;54465447if (lwOffset <= 0)5448{5449generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, objectClassReg, 0, cg);54505451generateCommonLockNurseryCodes(node,5452cg,5453false, //true for VMmonentEvaluator, false for VMmonexitEvaluator5454monitorLookupCacheLabel,5455fallThruFromMonitorLookupCacheLabel,5456snippetLabel,5457numDeps,5458lwOffset,5459objectClassReg,5460lookupOffsetReg,5461vmThreadReg,5462objectReg);5463}546454655466// load lock word5467generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg);54685469// extract monitor from lock word5470generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, unlockedReg, tempReg, cg);54715472#define LOCK_PINNED_BIT (0x1)5473generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, unlockedReg, ~((UDATA) LOCK_PINNED_BIT), cg);54745475// need a NULL test to snippet: about to dereference lock word5476generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, unlockedReg, unlockedReg, cg);5477generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);54785479// if OS monitors don't match, let snippet handle it5480generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal,5481generateX86MemoryReference(unlockedReg, fej9->getMonitorOwnerOffset(), cg), cg);5482generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, eaxReal,5483generateX86MemoryReference(vmThreadReg, fej9->thisThreadOSThreadOffset(), cg), cg);5484generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);54855486// monitors match so we can unlock it5487// decrement count, maybe unlock object5488generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal,5489generateX86MemoryReference(unlockedReg, fej9->getMonitorEntryCountOffset(), cg), cg);5490generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, eaxReal, 1, cg);5491generateLabelInstruction(TR::InstOpCode::JA4, node, decCountLabel, cg);549254935494// leaving main-line code path5495// create the outlined path that decrements the count5496{5497TR_OutlinedInstructionsGenerator og(decCountLabel, node, cg);5498generateMemInstruction( TR::InstOpCode::DECMem(cg), node, generateX86MemoryReference(unlockedReg, fej9->getMonitorEntryCountOffset(), cg), cg);5499generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);55005501og.endOutlinedInstructionSequence();5502}55035504// back to main-line code path55055506// unlock object...but only if lock pinned bit is clear5507generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, eaxReal, unlockedReg, cg);5508generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);550955105511TR::InstOpCode::Mnemonic op = cg->comp()->target().isSMP() ? TR::InstOpCode::LCMPXCHGMemReg(gen64BitInstr) : TR::InstOpCode::CMPXCHGMemReg(gen64BitInstr);55125513// compare-and-swap to unlock:5514generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, eaxReal, eaxReal, cg);5515cg->setImplicitExceptionPoint(generateMemRegInstruction(op, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), eaxReal, cg));55165517generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, eaxReal, unlockedReg, cg);5518generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);55195520// unlocked the object, just need to put monitor back in thread cache5521generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal,5522generateX86MemoryReference(vmThreadReg, fej9->thisThreadMonitorCacheOffset(), cg), cg);5523generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,5524generateX86MemoryReference(unlockedReg, fej9->getMonitorNextOffset(), cg), eaxReal, cg);5525generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,5526generateX86MemoryReference(vmThreadReg, fej9->thisThreadMonitorCacheOffset(), cg),5527unlockedReg, cg);55285529TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, snippetLabel, fallThru, cg);5530cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);5531cg->generateDebugCounter(5532outlinedHelperCall->getFirstInstruction(),5533TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),55341, TR::DebugCounter::Cheap);55355536#else55375538if (lwOffset <= 0)5539{5540generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, objectClassReg, 0, cg);55415542generateCommonLockNurseryCodes(node,5543cg,5544false, //true for VMmonentEvaluator, false for VMmonexitEvaluator5545monitorLookupCacheLabel,5546fallThruFromMonitorLookupCacheLabel,5547snippetLabel,5548numDeps,5549lwOffset,5550objectClassReg,5551lookupOffsetReg,5552vmThreadReg,5553objectReg);5554}55555556// This is a normal inlined monitor exit5557//5558// Compare the monitor slot in the object against the thread register.5559// If it succeeds we are done. Else call the helper.5560//5561// Code generated:5562// cmp ebp, monitor(objectReg)5563// jne snippet5564// test flags(objectReg), FLC-bit ; Only if FLC in separate word5565// jne snippet5566// mov monitor(objectReg), 05567// label restartLabel5568//5569// Code generated for read monitor:5570// xor unlockedReg, unlockedReg5571// mov eax, INC_DEC_VALUE5572// (lock)cmpxchg monitor(objectReg), unlockedReg5573// jne snippet5574// label restartLabel5575//5576if (comp->getOption(TR_ReservingLocks))5577{5578if (reservingLock)5579{5580tempReg = cg->allocateRegister();5581numDeps++;5582}5583}55845585if (comp->getOption(TR_ReservingLocks))5586{5587if (reservingLock || normalLockPreservingReservation)5588{5589TR_RuntimeHelper helper;5590bool success = TR::TreeEvaluator::monEntryExitHelper(false, node, reservingLock, normalLockPreservingReservation, helper, cg);55915592TR_ASSERT(success == true, "monEntryExitHelper: could not find runtime helper");55935594node->setSymbolReference(comp->getSymRefTab()->findOrCreateRuntimeHelper(helper, true, true, true));5595}5596}5597TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, snippetLabel, snippetFallThru, cg);5598cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);5599cg->generateDebugCounter(5600outlinedHelperCall->getFirstInstruction(),5601TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),56021, TR::DebugCounter::Cheap);56035604if (inlineRecursive)5605{5606TR::LabelSymbol *inlineRecursiveSnippetLabel = generateLabelSymbol(cg);5607TR::LabelSymbol *jitMonitorExitSnippetLabel = snippetLabel;5608snippetLabel = inlineRecursiveSnippetLabel;5609TR::TreeEvaluator::inlineRecursiveMonitor(node, cg, fallThru, jitMonitorExitSnippetLabel, inlineRecursiveSnippetLabel, objectReg, lwOffset, snippetFallThru, reservingLock);5610}56115612bool reservingDecrementNeeded = false;56135614if (node->isReadMonitor())5615{5616unlockedReg = cg->allocateRegister();5617eaxReal = cg->allocateRegister();5618generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, unlockedReg, unlockedReg, cg);5619generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, eaxReal, INC_DEC_VALUE, cg);56205621TR::InstOpCode::Mnemonic op = cg->comp()->target().isSMP() ? TR::InstOpCode::LCMPXCHGMemReg(gen64BitInstr) : TR::InstOpCode::CMPXCHGMemReg(gen64BitInstr);5622cg->setImplicitExceptionPoint(generateMemRegInstruction(op, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), unlockedReg, cg));5623numDeps += 2;5624}5625else5626{5627if (reservingLock)5628{5629#if defined(TRACE_LOCK_RESERVATION)5630auto cds = cg->findOrCreate4ByteConstant(node, (int)node);5631TR::MemoryReference *tempMR = generateX86MemoryReference(cds, cg);56325633if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())5634{5635generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg); // Zero out tempReg before TR::InstOpCode::LRegMem op.5636}5637cg->setImplicitExceptionPoint(generateRegMemInstruction(5638TR::InstOpCode::LRegMem(gen64BitInstr), node, tempReg,5639getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg));56405641TR::SymbolReference *tempRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);5642TR::MemoryReference *tempMR1 = generateX86MemoryReference(tempRef, cg);56435644generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR, tempReg, cg);5645generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR1, tempReg, cg);56465647auto cds1 = cg->findOrCreate4ByteConstant(node, (int)node+2);5648TR::MemoryReference *tempMR3 = generateX86MemoryReference(cds1, cg);5649TR::SymbolReference *tempRef2 = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);5650TR::MemoryReference *tempMR2 = generateX86MemoryReference(tempRef2, cg);56515652generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR3, objectReg, cg);5653generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR2, objectReg, cg);56545655scratchReg = cg->allocateRegister();5656numDeps++;56575658TR::LabelSymbol *doneTestLabel = generateLabelSymbol(cg);56595660//generateLabelInstruction(TR::InstOpCode::label, node, doneTestLabel, cg);5661//generateImmSymInstruction(TR::InstOpCode::PUSHImm4, node, (uintptr_t)doneTestLabel->getStaticSymbol()->getStaticAddress(), node->getSymbolReference(), cg);5662//generateRegInstruction(TR::InstOpCode::POPReg, node, scratchReg, cg);56635664TR::TreeEvaluator::generateValueTracingCode (node, vmThreadReg, scratchReg, objectReg, tempReg, cg);56655666// cause crash in some cases5667if (0)5668{5669generateRegImmInstruction(TR::InstOpCode::TEST1RegImm1, node, tempReg, 0xA, cg);5670generateLabelInstruction(TR::InstOpCode::JNE4, node, doneTestLabel, cg);5671generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, scratchReg, scratchReg, cg);5672generateRegMemInstruction(TR::InstOpCode::LRegMem(), node,5673scratchReg,5674generateX86MemoryReference(scratchReg, 0, cg), cg);5675generateLabelInstruction(TR::InstOpCode::label, node, doneTestLabel, cg);5676}5677#endif5678if (node->isPrimitiveLockedRegion())5679{5680cg->setImplicitExceptionPoint(generateRegMemInstruction(5681TR::InstOpCode::LRegMem(gen64BitInstr), node, tempReg,5682getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg));5683// Mask out the thread ID and reservation count5684generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, tempReg, FLAGS_MASK, cg);5685// If only the RES flag is set and no other we can continue5686generateRegImmInstruction(TR::InstOpCode::XORRegImms(), node, tempReg, RES_BIT, cg);5687}5688else5689{5690reservingDecrementNeeded = true;5691generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tempReg, generateX86MemoryReference(vmThreadReg, (REC_BIT | RES_BIT), cg), cg);5692cg->setImplicitExceptionPoint(generateMemRegInstruction(5693TR::InstOpCode::CMPMemReg(gen64BitInstr), node,5694getMemoryReference(objectClassReg, objectReg, lwOffset, cg), tempReg, cg));5695}5696}5697else5698{5699cg->setImplicitExceptionPoint(generateRegMemInstruction(5700TR::InstOpCode::CMPRegMem(gen64BitInstr), node, vmThreadReg,5701getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg));5702}5703}57045705TR::LabelSymbol *mismatchLabel = NULL;5706if (reservingLock && !TR::Options::_aggressiveLockReservation)5707mismatchLabel = generateLabelSymbol(cg);5708else5709mismatchLabel = snippetLabel;57105711generateLabelInstruction(TR::InstOpCode::JNE4, node, mismatchLabel, cg);57125713if (reservingDecrementNeeded)5714{5715// Subtract the reservation count5716generateMemImmInstruction(TR::InstOpCode::SUBMemImms(gen64BitInstr), node,5717getMemoryReference(objectClassReg, objectReg, lwOffset, cg), REC_BIT, cg); // I'm not sure TR::InstOpCode::SUB4MemImms will work.5718}57195720if (!node->isReadMonitor() && !reservingLock)5721{5722if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_HLE) && comp->getOption(TR_X86HLE))5723generateMemImmInstruction(TR::InstOpCode::XRSMemImm4(gen64BitInstr),5724node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);5725else5726generateMemImmInstruction(TR::InstOpCode::SMemImm4(gen64BitInstr), node,5727getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);5728}57295730if (reservingLock && !TR::Options::_aggressiveLockReservation)5731{5732generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);57335734// Avoid the helper for non-recursive exit in case it isn't reserved5735generateLabelInstruction(TR::InstOpCode::label, node, mismatchLabel, cg);5736auto lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);5737generateMemRegInstruction(TR::InstOpCode::CMPMemReg(gen64BitInstr), node, lwMR, vmThreadReg, cg);5738generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);5739lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);5740generateMemImmInstruction(TR::InstOpCode::SMemImm4(gen64BitInstr), node, lwMR, 0, cg);5741}57425743#endif // J9VM_OPT_REAL_TIME_LOCKING_SUPPORT574457455746// Create dependencies for the registers used.5747// The first dependencies must be objectReg, vmThreadReg, tempReg5748// Or, for readmonitors they must be objectReg, vmThreadReg, unlockedReg, eaxReal5749// snippet needs to find them to grab the real registers from them.5750//5751TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)numDeps, cg);5752deps->addPostCondition(objectReg, TR::RealRegister::NoReg, cg);5753deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);57545755#if !defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)5756if (node->isReadMonitor())5757#endif5758{5759deps->addPostCondition(unlockedReg, TR::RealRegister::NoReg, cg);5760deps->addPostCondition(eaxReal, TR::RealRegister::eax, cg);5761}57625763if (lookupOffsetReg)5764deps->addPostCondition(lookupOffsetReg, TR::RealRegister::NoReg, cg);57655766if (tempReg && !node->isReadMonitor())5767deps->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);5768if (scratchReg)5769deps->addPostCondition(scratchReg, TR::RealRegister::NoReg, cg);5770if (objectClassReg)5771deps->addPostCondition(objectClassReg, TR::RealRegister::NoReg, cg);57725773deps->stopAddingConditions();5774generateLabelInstruction(TR::InstOpCode::label, node, fallThru, deps, cg);57755776#if defined(TRACE_LOCK_RESERVATION)5777if (reservingLock)5778{5779auto cds = cg->findOrCreate4ByteConstant(node, (int)node+1);5780TR::MemoryReference *tempMR = generateX86MemoryReference(cds, cg);57815782cg->setImplicitExceptionPoint(generateRegMemInstruction(5783TR::InstOpCode::LRegMem(gen64BitInstr), node, tempReg,5784getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg));57855786TR::SymbolReference *tempRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);5787TR::MemoryReference *tempMR1 = generateX86MemoryReference(tempRef, cg);57885789generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR, tempReg, cg);5790generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR1, tempReg, cg);57915792auto cds1 = cg->findOrCreate4ByteConstant(node, (int)node+2);5793TR::MemoryReference *tempMR3 = generateX86MemoryReference(cds1, cg);5794TR::SymbolReference *tempRef2 = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);5795TR::MemoryReference *tempMR2 = generateX86MemoryReference(tempRef2, cg);57965797generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR3, objectReg, cg);5798generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR2, objectReg, cg);5799}5800#endif58015802if (eaxReal)5803cg->stopUsingRegister(eaxReal);5804if (unlockedReg)5805cg->stopUsingRegister(unlockedReg);58065807cg->decReferenceCount(objectRef);5808if (tempReg)5809cg->stopUsingRegister(tempReg);58105811if (scratchReg)5812cg->stopUsingRegister(scratchReg);58135814if (objectClassReg)5815cg->stopUsingRegister(objectClassReg);58165817if (lookupOffsetReg)5818cg->stopUsingRegister(lookupOffsetReg);58195820return NULL;5821}582258235824bool J9::X86::TreeEvaluator::monEntryExitHelper(5825bool entry,5826TR::Node* node,5827bool reservingLock,5828bool normalLockPreservingReservation,5829TR_RuntimeHelper &helper,5830TR::CodeGenerator* cg)5831{5832bool methodMonitor = entry5833? (node->getSymbolReference() == cg->getSymRef(TR_methodMonitorEntry))5834: (node->getSymbolReference() == cg->getSymRef(TR_methodMonitorExit));58355836if (reservingLock)5837{5838if (node->isPrimitiveLockedRegion())5839{5840static TR_RuntimeHelper helpersCase1[2][2][2] =5841{5842{5843{TR_IA32JitMonitorExitReservedPrimitive, TR_IA32JitMethodMonitorExitReservedPrimitive},5844{TR_AMD64JitMonitorExitReservedPrimitive, TR_AMD64JitMethodMonitorExitReservedPrimitive}5845},5846{5847{TR_IA32JitMonitorEnterReservedPrimitive, TR_IA32JitMethodMonitorEnterReservedPrimitive},5848{TR_AMD64JitMonitorEnterReservedPrimitive, TR_AMD64JitMethodMonitorEnterReservedPrimitive}5849}5850};58515852helper = helpersCase1[entry?1:0][cg->comp()->target().is64Bit()?1:0][methodMonitor?1:0];5853return true;5854}5855else5856{5857static TR_RuntimeHelper helpersCase2[2][2][2] =5858{5859{5860{TR_IA32JitMonitorExitReserved, TR_IA32JitMethodMonitorExitReserved},5861{TR_AMD64JitMonitorExitReserved, TR_AMD64JitMethodMonitorExitReserved}5862},5863{5864{TR_IA32JitMonitorEnterReserved, TR_IA32JitMethodMonitorEnterReserved},5865{TR_AMD64JitMonitorEnterReserved, TR_AMD64JitMethodMonitorEnterReserved}5866}5867};58685869helper = helpersCase2[entry?1:0][cg->comp()->target().is64Bit()?1:0][methodMonitor?1:0];5870return true;5871}5872}5873else if (normalLockPreservingReservation)5874{5875static TR_RuntimeHelper helpersCase2[2][2][2] =5876{5877{5878{TR_IA32JitMonitorExitPreservingReservation, TR_IA32JitMethodMonitorExitPreservingReservation},5879{TR_AMD64JitMonitorExitPreservingReservation, TR_AMD64JitMethodMonitorExitPreservingReservation}5880},5881{5882{TR_IA32JitMonitorEnterPreservingReservation, TR_IA32JitMethodMonitorEnterPreservingReservation},5883{TR_AMD64JitMonitorEnterPreservingReservation, TR_AMD64JitMethodMonitorEnterPreservingReservation}5884}5885};58865887helper = helpersCase2[entry?1:0][cg->comp()->target().is64Bit()?1:0][methodMonitor?1:0];5888return true;5889}58905891return false;5892}5893589458955896// Generate code to allocate from the object heap. Returns the register5897// containing the address of the allocation.5898//5899// If the sizeReg is non-null, the allocation is variable length. In this case5900// the elementSize is meaningful and "size" is the extra size to be added.5901// Otherwise "size" contains the total size of the allocation.5902//5903// Also, on return the "segmentReg" register is set to the address of the5904// memory segment.5905//5906static void genHeapAlloc(5907TR::Node *node,5908TR_OpaqueClassBlock *clazz,5909int32_t allocationSizeOrDataOffset,5910int32_t elementSize,5911TR::Register *sizeReg,5912TR::Register *eaxReal,5913TR::Register *segmentReg,5914TR::Register *tempReg,5915TR::LabelSymbol *failLabel,5916TR::CodeGenerator *cg)5917{59185919// Load the current heap segment and see if there is room in it. Loop if5920// we can't get the lock on the segment.5921//5922TR::Compilation *comp = cg->comp();5923TR::Register *vmThreadReg = cg->getVMThreadRegister();5924bool generateArraylets = comp->generateArraylets();59255926TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());59275928if (comp->getOptions()->realTimeGC())5929{5930#if defined(J9VM_GC_REALTIME)5931// this will be bogus for variable length allocations because it only includes the header size (+ arraylet ptr for arrays)5932UDATA sizeClass = fej9->getObjectSizeClass(allocationSizeOrDataOffset);59335934if (comp->getOption(TR_BreakOnNew))5935generateInstruction(TR::InstOpCode::INT3, node, cg);59365937// heap allocation, so proceed5938if (sizeReg)5939{5940generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, eaxReal, eaxReal, cg);59415942// make sure size isn't too big5943// convert max object size to num elements because computing an object size from num elements may overflow5944TR_ASSERT(fej9->getMaxObjectSizeForSizeClass() <= UINT_MAX, "assertion failure");5945generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, sizeReg, (fej9->getMaxObjectSizeForSizeClass()-allocationSizeOrDataOffset)/elementSize, cg);5946generateLabelInstruction(TR::InstOpCode::JA4, node, failLabel, cg);59475948// Hybrid arraylets need a zero length test if the size is unknown.5949//5950if (!generateArraylets)5951{5952generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, sizeReg, sizeReg, cg);5953generateLabelInstruction(TR::InstOpCode::JE4, node, failLabel, cg);5954}59555956// need to round up to sizeof(UDATA) so we can use it to index into size class index array5957// conservatively just add sizeof(UDATA) bytes and round5958int32_t round = 0;5959if (elementSize < sizeof(UDATA))5960round = sizeof(UDATA) - 1;59615962// now compute size of object in bytes5963generateRegMemInstruction(TR::InstOpCode::LEARegMem(),5964node,5965segmentReg,5966generateX86MemoryReference(eaxReal,5967sizeReg,5968TR::MemoryReference::convertMultiplierToStride(elementSize),5969allocationSizeOrDataOffset + round, cg), cg);597059715972if (elementSize < sizeof(UDATA))5973generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, segmentReg, -(int32_t)sizeof(UDATA), cg);59745975#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT5976generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, segmentReg, J9_GC_MINIMUM_OBJECT_SIZE, cg);5977TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);5978generateLabelInstruction(TR::InstOpCode::JAE4, node, doneLabel, cg);5979generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, segmentReg, J9_GC_MINIMUM_OBJECT_SIZE, cg);5980generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, cg);5981#endif59825983// get size class5984generateRegMemInstruction(TR::InstOpCode::LRegMem(),5985node,5986tempReg,5987generateX86MemoryReference(vmThreadReg, fej9->thisThreadJavaVMOffset(), cg), cg);5988generateRegMemInstruction(TR::InstOpCode::LRegMem(),5989node,5990tempReg,5991generateX86MemoryReference(tempReg, fej9->getRealtimeSizeClassesOffset(), cg), cg);5992generateRegMemInstruction(TR::InstOpCode::LRegMem(),5993node,5994tempReg,5995generateX86MemoryReference(tempReg,5996segmentReg, TR::MemoryReference::convertMultiplierToStride(1),5997fej9->getSizeClassesIndexOffset(),5998cg),5999cg);60006001// tempReg now holds size class6002TR::MemoryReference *currentMemRef, *topMemRef, *currentMemRefBump;6003if (cg->comp()->target().is64Bit())6004{6005TR_ASSERT(sizeof(J9VMGCSegregatedAllocationCacheEntry) == 16, "unexpected J9VMGCSegregatedAllocationCacheEntry size");6006// going to play some games here6007// need to use tempReg to index into two arrays:6008// 1) allocation caches6009// 2) cell size array6010// The first one has stride 16, second one stride sizeof(UDATA)6011// We need a shift instruction to be able to do stride 166012// To avoid two shifts, only do one for stride sizeof(UDATA) and use a multiplier in memory ref for 166013// 64-bit, so shift 3 times for sizeof(UDATA) and use multiplier stride 2 in memory references6014generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, tempReg, 3, cg);6015currentMemRef = generateX86MemoryReference(vmThreadReg, tempReg, TR::MemoryReference::convertMultiplierToStride(2), fej9->thisThreadAllocationCacheCurrentOffset(0), cg);6016topMemRef = generateX86MemoryReference(vmThreadReg, tempReg, TR::MemoryReference::convertMultiplierToStride(2), fej9->thisThreadAllocationCacheTopOffset(0), cg);6017currentMemRefBump = generateX86MemoryReference(vmThreadReg, tempReg, TR::MemoryReference::convertMultiplierToStride(2), fej9->thisThreadAllocationCacheCurrentOffset(0), cg);6018}6019else6020{6021// size needs to be 8 or less or it there's no multiplier stride available (would need to use other branch of else)6022TR_ASSERT(sizeof(J9VMGCSegregatedAllocationCacheEntry) <= 8, "unexpected J9VMGCSegregatedAllocationCacheEntry size");60236024currentMemRef = generateX86MemoryReference(vmThreadReg, tempReg,6025TR::MemoryReference::convertMultiplierToStride(sizeof(J9VMGCSegregatedAllocationCacheEntry)),6026fej9->thisThreadAllocationCacheCurrentOffset(0), cg);6027topMemRef = generateX86MemoryReference(vmThreadReg, tempReg,6028TR::MemoryReference::convertMultiplierToStride(sizeof(J9VMGCSegregatedAllocationCacheEntry)),6029fej9->thisThreadAllocationCacheTopOffset(0), cg);6030currentMemRefBump = generateX86MemoryReference(vmThreadReg, tempReg,6031TR::MemoryReference::convertMultiplierToStride(sizeof(J9VMGCSegregatedAllocationCacheEntry)),6032fej9->thisThreadAllocationCacheCurrentOffset(0), cg);6033}6034// tempReg now contains size class (32-bit) or size class * sizeof(J9VMGCSegregatedAllocationCacheEntry) (64-bit)60356036// get next cell for this size class6037generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal, currentMemRef, cg);60386039// if null, then no cell available, use slow path6040generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, eaxReal, topMemRef, cg);6041generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);60426043// have a valid cell, need to update current cell pointer6044generateRegMemInstruction(TR::InstOpCode::LRegMem(),6045node,6046segmentReg,6047generateX86MemoryReference(vmThreadReg, fej9->thisThreadJavaVMOffset(), cg), cg);6048generateRegMemInstruction(TR::InstOpCode::LRegMem(),6049node,6050segmentReg,6051generateX86MemoryReference(segmentReg, fej9->getRealtimeSizeClassesOffset(), cg), cg);6052if (cg->comp()->target().is64Bit())6053{6054// tempReg already has already been shifted for sizeof(UDATA)6055generateRegMemInstruction(TR::InstOpCode::LRegMem(),6056node,6057segmentReg,6058generateX86MemoryReference(segmentReg,6059tempReg,6060TR::MemoryReference::convertMultiplierToStride(1),6061fej9->getSmallCellSizesOffset(),6062cg),6063cg);6064}6065else6066{6067// tempReg needs to be shifted for sizeof(UDATA)6068generateRegMemInstruction(TR::InstOpCode::LRegMem(),6069node,6070segmentReg,6071generateX86MemoryReference(segmentReg,6072tempReg,6073TR::MemoryReference::convertMultiplierToStride(sizeof(UDATA)),6074fej9->getSmallCellSizesOffset(),6075cg),6076cg);6077}6078// segmentReg now holds cell size60796080// update current cell by cell size6081generateMemRegInstruction(TR::InstOpCode::ADDMemReg(), node, currentMemRefBump, segmentReg, cg);6082}6083else6084{6085generateRegMemInstruction(TR::InstOpCode::LRegMem(),6086node,6087eaxReal,6088generateX86MemoryReference(vmThreadReg,6089fej9->thisThreadAllocationCacheCurrentOffset(sizeClass),6090cg),6091cg);60926093generateRegMemInstruction(TR::InstOpCode::CMPRegMem(),6094node,6095eaxReal,6096generateX86MemoryReference(vmThreadReg,6097fej9->thisThreadAllocationCacheTopOffset(sizeClass),6098cg),6099cg);61006101generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);61026103// we have an object in eaxReal, now bump the current updatepointer6104TR::InstOpCode::Mnemonic opcode;6105uint32_t cellSize = fej9->getCellSizeForSizeClass(sizeClass);6106if (cellSize <= 127)6107opcode = TR::InstOpCode::ADDMemImms();6108else if (cellSize == 128)6109{6110opcode = TR::InstOpCode::SUBMemImms();6111cellSize = (uint32_t)-128;6112}6113else6114opcode = TR::InstOpCode::ADDMemImm4();61156116generateMemImmInstruction(opcode, node,6117generateX86MemoryReference(vmThreadReg,6118fej9->thisThreadAllocationCacheCurrentOffset(sizeClass),6119cg),6120cellSize, cg);6121}61226123// we're done6124return;6125#endif6126}6127else6128{6129bool shouldAlignToCacheBoundary = false;6130bool isSmallAllocation = false;61316132size_t heapAlloc_offset=offsetof(J9VMThread, heapAlloc);6133size_t heapTop_offset=offsetof(J9VMThread, heapTop);6134size_t tlhPrefetchFTA_offset= offsetof(J9VMThread, tlhPrefetchFTA);6135#ifdef J9VM_GC_NON_ZERO_TLH6136if (!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization())6137{6138heapAlloc_offset=offsetof(J9VMThread, nonZeroHeapAlloc);6139heapTop_offset=offsetof(J9VMThread, nonZeroHeapTop);6140tlhPrefetchFTA_offset= offsetof(J9VMThread, nonZeroTlhPrefetchFTA);6141}6142#endif6143// Load the base of the next available heap storage. This load is done speculatively on the assumption that the6144// allocation will be inlined. If the assumption turns out to be false then the performance impact should be minimal6145// because the helper will be called in that case. It is necessary to insert this load here so that it dominates all6146// control paths through this internal control flow region.6147//6148generateRegMemInstruction(TR::InstOpCode::LRegMem(),6149node,6150eaxReal,6151generateX86MemoryReference(vmThreadReg,heapAlloc_offset, cg), cg);61526153if (comp->getOption(TR_EnableNewAllocationProfiling))6154{6155TR::LabelSymbol *doneProfilingLabel = generateLabelSymbol(cg);61566157uint32_t *globalAllocationDataPointer = fej9->getGlobalAllocationDataPointer();6158if (globalAllocationDataPointer)6159{6160TR::MemoryReference *gmr = generateX86MemoryReference((uintptr_t)globalAllocationDataPointer, cg);61616162generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4,6163node,6164generateX86MemoryReference((uint32_t)(uintptr_t)globalAllocationDataPointer, cg),61650x07ffffff,6166cg);6167generateLabelInstruction(TR::InstOpCode::JAE4, node, doneProfilingLabel, cg);61686169generateMemInstruction(TR::InstOpCode::INC4Mem, node, gmr, cg);6170uint32_t *dataPointer = fej9->getAllocationProfilingDataPointer(node->getByteCodeInfo(), clazz, node->getOwningMethod(), comp);6171if (dataPointer)6172{6173TR::MemoryReference *mr = generateX86MemoryReference((uint32_t)(uintptr_t)dataPointer, cg);6174generateMemInstruction(TR::InstOpCode::INC4Mem, node, mr, cg);6175}61766177generateLabelInstruction(TR::InstOpCode::label, node, doneProfilingLabel, cg);6178}6179}61806181bool canSkipOverflowCheck = false;61826183// If the array length is constant, check to see if the size of the array will fit in a single arraylet leaf.6184// If the allocation size is too large, call the snippet.6185//6186if (generateArraylets && (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray))6187{6188if (comp->getOption(TR_DisableTarokInlineArrayletAllocation))6189generateLabelInstruction(TR::InstOpCode::JMP4, node, failLabel, cg);61906191if (sizeReg)6192{6193uint32_t maxContiguousArrayletLeafSizeInBytes =6194(uint32_t)(TR::Compiler->om.arrayletLeafSize() - TR::Compiler->om.sizeofReferenceAddress());61956196int32_t maxArrayletSizeInElements = maxContiguousArrayletLeafSizeInBytes/elementSize;61976198// Hybrid arraylets need a zero length test if the size is unknown.6199//6200generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, sizeReg, sizeReg, cg);6201generateLabelInstruction(TR::InstOpCode::JE4, node, failLabel, cg);62026203generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, sizeReg, maxArrayletSizeInElements, cg);6204generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);62056206// If the max arraylet leaf size is less than the amount of free space available on6207// the stack, there is no need to check for an overflow scenario.6208//6209if (maxContiguousArrayletLeafSizeInBytes <= cg->getMaxObjectSizeGuaranteedNotToOverflow() )6210canSkipOverflowCheck = true;6211}6212else if (TR::Compiler->om.isDiscontiguousArray(allocationSizeOrDataOffset))6213{6214// TODO: just call the helper directly and don't generate any6215// further instructions.6216//6217// Actually, we should never get here because we've already checked6218// constant lengths for discontiguity...6219//6220generateLabelInstruction(TR::InstOpCode::JMP4, node, failLabel, cg);6221}6222}62236224if (sizeReg && !canSkipOverflowCheck)6225{6226// Hybrid arraylets need a zero length test if the size is unknown.6227// The length could be zero.6228//6229if (!generateArraylets)6230{6231generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, sizeReg, sizeReg, cg);6232generateLabelInstruction(TR::InstOpCode::JE4, node, failLabel, cg);6233}62346235// The GC will guarantee that at least 'maxObjectSizeGuaranteedNotToOverflow' bytes6236// of slush will exist between the top of the heap and the end of the address space.6237//6238uintptr_t maxObjectSize = cg->getMaxObjectSizeGuaranteedNotToOverflow();6239uintptr_t maxObjectSizeInElements = maxObjectSize / elementSize;62406241if (cg->comp()->target().is64Bit() && !(maxObjectSizeInElements > 0 && maxObjectSizeInElements <= (uintptr_t)INT_MAX))6242{6243generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, maxObjectSizeInElements, cg);6244generateRegRegInstruction(TR::InstOpCode::CMP8RegReg, node, sizeReg, tempReg, cg);6245}6246else6247{6248generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, sizeReg, (int32_t)maxObjectSizeInElements, cg);6249}62506251// Must be an unsigned comparison on sizes.6252//6253generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);6254}62556256#if !defined(J9VM_GC_THREAD_LOCAL_HEAP)6257// Establish a loop label in case the new heap pointer cannot be committed.6258//6259TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);6260generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);6261#endif62626263if (sizeReg)6264{6265// calculate variable size, rounding up if necessary to a intptr_t multiple boundary6266//6267int32_t round; // zero indicates no rounding is necessary62686269if (!generateArraylets)6270{6271// TR_ASSERT(allocationSizeOrDataOffset % fej9->getObjectAlignmentInBytes() == 0, "Array header size of %d is not a multiple of %d", allocationSizeOrDataOffset, fej9->getObjectAlignmentInBytes());6272}627362746275round = (elementSize < TR::Compiler->om.getObjectAlignmentInBytes()) ? TR::Compiler->om.getObjectAlignmentInBytes() : 0;62766277int32_t disp32 = round ? (round-1) : 0;6278#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT6279if ( (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray))6280{6281// All arrays in combo builds will always be at least 20 bytes in size in all specs:6282//6283// 1) class pointer + contig length + dataAddr + one or more elements6284// 2) class pointer + 0 + 0 (for zero length arrays) + dataAddr6285//6286TR_ASSERT(J9_GC_MINIMUM_INDEXABLE_OBJECT_SIZE >= 8, "Expecting a minimum indexable object size >= 8 (actual minimum is %d)\n", J9_GC_MINIMUM_INDEXABLE_OBJECT_SIZE);62876288generateRegMemInstruction(6289TR::InstOpCode::LEARegMem(),6290node,6291tempReg,6292generateX86MemoryReference(6293eaxReal,6294sizeReg,6295TR::MemoryReference::convertMultiplierToStride(elementSize),6296allocationSizeOrDataOffset+disp32, cg), cg);62976298if (round)6299{6300generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, tempReg, -round, cg);6301}6302}6303else6304#endif6305{6306#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT6307generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg);6308#endif630963106311generateRegMemInstruction(6312TR::InstOpCode::LEARegMem(),6313node,6314tempReg,6315generateX86MemoryReference(6316#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT6317tempReg,6318#else6319eaxReal,6320#endif6321sizeReg,6322TR::MemoryReference::convertMultiplierToStride(elementSize),6323allocationSizeOrDataOffset+disp32, cg), cg);63246325if (round)6326{6327generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, tempReg, -round, cg);6328}63296330#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT6331generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, J9_GC_MINIMUM_OBJECT_SIZE, cg);6332TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);6333generateLabelInstruction(TR::InstOpCode::JAE4, node, doneLabel, cg);6334generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, tempReg, J9_GC_MINIMUM_OBJECT_SIZE, cg);6335generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, cg);6336generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, tempReg, eaxReal, cg);6337#endif6338}6339}6340else6341{6342isSmallAllocation = allocationSizeOrDataOffset <= 0x40 ? true : false;6343allocationSizeOrDataOffset = (allocationSizeOrDataOffset+TR::Compiler->om.getObjectAlignmentInBytes()-1) & (-TR::Compiler->om.getObjectAlignmentInBytes());63446345#if defined(J9VM_GC_THREAD_LOCAL_HEAP)6346if ((node->getOpCodeValue() == TR::New) &&6347(comp->getMethodHotness() >= hot || node->shouldAlignTLHAlloc()))6348{6349TR_OpaqueMethodBlock *ownMethod = node->getOwningMethod();6350TR::Node *classChild = node->getFirstChild();6351char * className = NULL;6352TR_OpaqueClassBlock *clazz = NULL;63536354if (classChild &&6355classChild->getSymbolReference() &&6356!classChild->getSymbolReference()->isUnresolved())6357{6358TR::SymbolReference *symRef = classChild->getSymbolReference();6359TR::Symbol *sym = symRef->getSymbol();63606361if (sym &&6362sym->getKind() == TR::Symbol::IsStatic &&6363sym->isClassObject())6364{6365TR::StaticSymbol * staticSym = symRef->getSymbol()->castToStaticSymbol();6366void * staticAddress = staticSym->getStaticAddress();6367if (symRef->getCPIndex() >= 0)6368{6369if (!staticSym->addressIsCPIndexOfStatic() && staticAddress)6370{6371int32_t len;6372className = TR::Compiler->cls.classNameChars(comp,symRef, len);6373clazz = (TR_OpaqueClassBlock *)staticAddress;6374}6375}6376}6377}63786379uint32_t instanceSizeForAlignment = 30;6380static char *p= feGetEnv("TR_AlignInstanceSize");6381if (p)6382instanceSizeForAlignment = atoi(p);63836384if ((comp->getMethodHotness() >= hot) && clazz &&6385!cg->getCurrentEvaluationBlock()->isCold() &&6386TR::Compiler->cls.classInstanceSize(clazz)>=instanceSizeForAlignment)6387{6388shouldAlignToCacheBoundary = true;63896390generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, eaxReal,6391generateX86MemoryReference(eaxReal, 63, cg), cg);6392generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, eaxReal, 0xFFFFFFC0, cg);6393}6394}6395#endif // J9VM_GC_THREAD_LOCAL_HEAP63966397if ((uint32_t)allocationSizeOrDataOffset > cg->getMaxObjectSizeGuaranteedNotToOverflow())6398{6399generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, eaxReal, cg);6400if (allocationSizeOrDataOffset <= 127)6401generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, tempReg, allocationSizeOrDataOffset, cg);6402else if (allocationSizeOrDataOffset == 128)6403generateRegImmInstruction(TR::InstOpCode::SUBRegImms(), node, tempReg, (unsigned)-128, cg);6404else6405generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), node, tempReg, allocationSizeOrDataOffset, cg);64066407// Check for overflow6408generateLabelInstruction(TR::InstOpCode::JB4, node, failLabel, cg);6409}6410else6411{6412generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tempReg,6413generateX86MemoryReference(eaxReal, allocationSizeOrDataOffset, cg), cg);6414}6415}64166417generateRegMemInstruction(TR::InstOpCode::CMPRegMem(),6418node,6419tempReg,6420generateX86MemoryReference(vmThreadReg, heapTop_offset, cg), cg);64216422generateLabelInstruction(TR::InstOpCode::JA4, node, failLabel, cg);64236424#if defined(J9VM_GC_THREAD_LOCAL_HEAP)64256426if (shouldAlignToCacheBoundary)6427{6428// Alignment to a cache line boundary may require inserting more padding than is normally6429// necessary to achieve the alignment. In those cases, insert GC dark matter to describe6430// the space inserted.6431//64326433generateRegInstruction(TR::InstOpCode::PUSHReg, node, tempReg, cg);6434generateRegMemInstruction(TR::InstOpCode::LRegMem(),6435node,6436tempReg,6437generateX86MemoryReference(vmThreadReg,heapAlloc_offset, cg), cg);64386439generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, eaxReal, tempReg, cg);64406441TR::LabelSymbol *doneAlignLabel = generateLabelSymbol(cg);6442TR::LabelSymbol *multiSlotGapLabel = generateLabelSymbol(cg);64436444generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, eaxReal, sizeof(uintptr_t), cg);6445generateLabelInstruction(TR::InstOpCode::JB4, node, doneAlignLabel, cg);6446generateLabelInstruction(TR::InstOpCode::JA4, node, multiSlotGapLabel, cg);64476448int32_t singleSlotHole;64496450singleSlotHole = J9_GC_SINGLE_SLOT_HOLE;64516452if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())6453{6454generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,6455generateX86MemoryReference(tempReg, 0, cg), singleSlotHole, cg);6456generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,6457generateX86MemoryReference(tempReg, 4, cg), singleSlotHole, cg);6458}6459else6460{6461generateMemImmInstruction(6462TR::InstOpCode::SMemImm4(), node,6463generateX86MemoryReference(tempReg, 0, cg), singleSlotHole, cg);6464}64656466generateLabelInstruction(TR::InstOpCode::JMP4, node, doneAlignLabel, cg);6467generateLabelInstruction(TR::InstOpCode::label, node, multiSlotGapLabel, cg);64686469int32_t multiSlotHole;64706471multiSlotHole = J9_GC_MULTI_SLOT_HOLE;64726473generateMemImmInstruction(6474TR::InstOpCode::SMemImm4(), node,6475generateX86MemoryReference(tempReg, 0, cg),6476multiSlotHole, cg);64776478generateMemRegInstruction(6479TR::InstOpCode::SMemReg(), node,6480generateX86MemoryReference(tempReg, sizeof(uintptr_t), cg),6481eaxReal, cg);64826483generateLabelInstruction(TR::InstOpCode::label, node, doneAlignLabel, cg);6484generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, eaxReal, tempReg, cg);6485generateRegInstruction(TR::InstOpCode::POPReg, node, tempReg, cg);6486}64876488// Make sure that the arraylet is aligned properly.6489//6490if (generateArraylets && (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray) )6491{6492generateRegMemInstruction(TR::InstOpCode::LEARegMem(),node,tempReg, generateX86MemoryReference(tempReg,TR::Compiler->om.getObjectAlignmentInBytes()-1,cg),cg);6493if (cg->comp()->target().is64Bit())6494generateRegImmInstruction(TR::InstOpCode::AND8RegImm4,node,tempReg,-TR::Compiler->om.getObjectAlignmentInBytes(),cg);6495else6496generateRegImmInstruction(TR::InstOpCode::AND4RegImm4,node,tempReg,-TR::Compiler->om.getObjectAlignmentInBytes(),cg);6497}64986499generateMemRegInstruction(TR::InstOpCode::SMemReg(),6500node,6501generateX86MemoryReference(vmThreadReg, heapAlloc_offset, cg),6502tempReg, cg);65036504if (!isSmallAllocation && cg->enableTLHPrefetching())6505{6506TR::LabelSymbol *prefetchSnippetLabel = generateLabelSymbol(cg);6507TR::LabelSymbol *restartLabel = generateLabelSymbol(cg);6508cg->addSnippet(new (cg->trHeapMemory()) TR::X86AllocPrefetchSnippet(cg, node, TR::Options::_TLHPrefetchSize,6509restartLabel, prefetchSnippetLabel,6510(!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization())));651165126513bool useDirectPrefetchCall = false;6514bool useSharedCodeCacheSnippet = fej9->supportsCodeCacheSnippets();65156516// Generate the prefetch thunk in code cache. Only generate this once.6517//6518bool prefetchThunkGenerated = (fej9->getAllocationPrefetchCodeSnippetAddress(comp) != 0);6519#ifdef J9VM_GC_NON_ZERO_TLH6520if (!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization())6521{6522prefetchThunkGenerated = (fej9->getAllocationNoZeroPrefetchCodeSnippetAddress(comp) !=0);6523}6524#endif6525if (useSharedCodeCacheSnippet && prefetchThunkGenerated)6526{6527useDirectPrefetchCall = true;6528}65296530if (!comp->getOption(TR_EnableNewX86PrefetchTLH))6531{6532generateRegRegInstruction(TR::InstOpCode::SUB4RegReg, node, tempReg, eaxReal, cg);65336534generateMemRegInstruction(TR::InstOpCode::SUB4MemReg,6535node,6536generateX86MemoryReference(vmThreadReg, tlhPrefetchFTA_offset, cg),6537tempReg, cg);6538if (!useDirectPrefetchCall)6539generateLabelInstruction(TR::InstOpCode::JLE4, node, prefetchSnippetLabel, cg);6540else6541{6542generateLabelInstruction(TR::InstOpCode::JG4, node, restartLabel, cg);6543TR::SymbolReference * helperSymRef = cg->getSymRefTab()->findOrCreateRuntimeHelper(TR_X86CodeCachePrefetchHelper);6544TR::MethodSymbol *helperSymbol = helperSymRef->getSymbol()->castToMethodSymbol();6545#ifdef J9VM_GC_NON_ZERO_TLH6546if (!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization())6547{6548helperSymbol->setMethodAddress(fej9->getAllocationNoZeroPrefetchCodeSnippetAddress(comp));6549}6550else6551{6552helperSymbol->setMethodAddress(fej9->getAllocationPrefetchCodeSnippetAddress(comp));6553}6554#else6555helperSymbol->setMethodAddress(fej9->getAllocationPrefetchCodeSnippetAddress(comp));6556#endif6557generateImmSymInstruction(TR::InstOpCode::CALLImm4, node, (uintptr_t)helperSymbol->getMethodAddress(), helperSymRef, cg);6558}6559}6560else6561{6562// This currently only works when 'tlhPrefetchFTA' field is 4 bytes (on 32-bit or a6563// compressed references build). True 64-bit support requires this field be widened6564// to 64-bits.6565//6566generateRegMemInstruction(TR::InstOpCode::CMP4RegMem, node,6567tempReg,6568generateX86MemoryReference(vmThreadReg,tlhPrefetchFTA_offset, cg),6569cg);6570generateLabelInstruction(TR::InstOpCode::JAE4, node, prefetchSnippetLabel, cg);6571}65726573generateLabelInstruction(TR::InstOpCode::label, node, restartLabel, cg);6574}65756576#else // J9VM_GC_THREAD_LOCAL_HEAP6577generateMemRegInstruction(TR::InstOpCode::CMPXCHGMemReg(), node, generateX86MemoryReference(vmThreadReg, heapAlloc_offset, cg), tempReg, cg);6578generateLabelInstruction(TR::InstOpCode::JNE4, node, loopLabel, cg);6579#endif // !J9VM_GC_THREAD_LOCAL_HEAP6580}6581}65826583// ------------------------------------------------------------------------------6584// genHeapAlloc26585//6586// Will eventually become the de facto genHeapAlloc. Needs packed array and 2TLH6587// support.6588// ------------------------------------------------------------------------------65896590static void genHeapAlloc2(6591TR::Node *node,6592TR_OpaqueClassBlock *clazz,6593int32_t allocationSizeOrDataOffset,6594int32_t elementSize,6595TR::Register *sizeReg,6596TR::Register *eaxReal,6597TR::Register *segmentReg,6598TR::Register *tempReg,6599TR::LabelSymbol *failLabel,6600TR::CodeGenerator *cg)6601{6602// Load the current heap segment and see if there is room in it. Loop if6603// we can't get the lock on the segment.6604//6605TR::Compilation *comp = cg->comp();6606TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());6607TR::Register *vmThreadReg = cg->getVMThreadRegister();6608bool generateArraylets = comp->generateArraylets();6609bool isTooSmallToPrefetch = false;66106611{6612bool shouldAlignToCacheBoundary = false;66136614// Load the base of the next available heap storage. This load is done speculatively on the assumption that the6615// allocation will be inlined. If the assumption turns out to be false then the performance impact should be minimal6616// because the helper will be called in that case. It is necessary to insert this load here so that it dominates all6617// control paths through this internal control flow region.6618//66196620if (sizeReg)6621{66226623// -------------6624//6625// VARIABLE SIZE6626//6627// -------------66286629// The GC will guarantee that at least 'maxObjectSizeGuaranteedNotToOverflow' bytes6630// of slush will exist between the top of the heap and the end of the address space.6631//6632uintptr_t maxObjectSize = cg->getMaxObjectSizeGuaranteedNotToOverflow();6633uintptr_t maxObjectSizeInElements = maxObjectSize / elementSize;66346635if (cg->comp()->target().is64Bit() && !(maxObjectSizeInElements > 0 && maxObjectSizeInElements <= (uintptr_t)INT_MAX))6636{6637generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, segmentReg, maxObjectSizeInElements, cg);6638generateRegRegInstruction(TR::InstOpCode::CMP8RegReg, node, sizeReg, segmentReg, cg);6639}6640else6641{6642generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, sizeReg, (int32_t)maxObjectSizeInElements, cg);6643}66446645// Must be an unsigned comparison on sizes.6646//6647generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);664866496650generateRegMemInstruction(TR::InstOpCode::LRegMem(),6651node,6652eaxReal,6653generateX86MemoryReference(vmThreadReg,6654offsetof(J9VMThread, heapAlloc), cg), cg);665566566657// calculate variable size, rounding up if necessary to a intptr_t multiple boundary6658//6659int32_t round; // zero indicates no rounding is necessary66606661if (!generateArraylets)6662{6663// TR_ASSERT(allocationSizeOrDataOffset % fej9->getObjectAlignmentInBytes() == 0, "Array header size of %d is not a multiple of %d", allocationSizeOrDataOffset, fej9->getObjectAlignmentInBytes());6664}66656666round = (elementSize >= TR::Compiler->om.getObjectAlignmentInBytes())? 0 : TR::Compiler->om.getObjectAlignmentInBytes();6667int32_t disp32 = round ? (round-1) : 0;66686669/*6670mov rcx, rdx ; # of array elements (1)6671cmp rcx, 1 (1)6672adc rcx, 0 ; adjust for zero length (1)66736674shl rcx, 2 (1)6675add rcx, 0xf ; rcx + header (8) + 7 (1)66766677and rcx,0xfffffffffffffff8 ; round down (1)6678*/66796680generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, segmentReg, sizeReg, cg);66816682// Artificially adjust the number of elements by 1 if the array is zero length. This works6683// because either the array is zero length and needs a discontiguous array length field6684// (occupying a slot) or it has at least 1 element which will take up a slot anyway.6685//6686// Native 64-bit array headers do not need this adjustment because the6687// contiguous and discontiguous array headers are the same size.6688//6689if (cg->comp()->target().is32Bit() || (cg->comp()->target().is64Bit() && comp->useCompressedPointers()))6690{6691generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, segmentReg, 1, cg);6692generateRegImmInstruction(TR::InstOpCode::ADC4RegImm4, node, segmentReg, 0, cg);6693}66946695uint8_t shiftVal = TR::MemoryReference::convertMultiplierToStride(elementSize);6696if (shiftVal > 0)6697{6698generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, segmentReg, shiftVal, cg);6699}67006701generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), node, segmentReg, allocationSizeOrDataOffset+disp32, cg);67026703if (round)6704{6705generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, segmentReg, -round, cg);6706}67076708// Copy full object size in bytes to RCX for zero init via REP TR::InstOpCode::STOSQ6709//6710generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, segmentReg, cg);67116712generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, segmentReg, eaxReal, cg);6713}6714else6715{6716// ----------6717//6718// FIXED SIZE6719//6720// ----------67216722generateRegMemInstruction(TR::InstOpCode::LRegMem(),6723node,6724eaxReal,6725generateX86MemoryReference(vmThreadReg,6726offsetof(J9VMThread, heapAlloc), cg), cg);67276728if (comp->getOptLevel() < hot)6729isTooSmallToPrefetch = allocationSizeOrDataOffset <= 0x40 ? true : false;67306731allocationSizeOrDataOffset = (allocationSizeOrDataOffset+TR::Compiler->om.getObjectAlignmentInBytes()-1) & (-TR::Compiler->om.getObjectAlignmentInBytes());67326733#if defined(J9VM_GC_THREAD_LOCAL_HEAP)6734if ((node->getOpCodeValue() == TR::New) &&6735(comp->getMethodHotness() >= hot || node->shouldAlignTLHAlloc()))6736{6737TR_OpaqueMethodBlock *ownMethod = node->getOwningMethod();67386739TR::Node *classChild = node->getFirstChild();6740char * className = NULL;6741TR_OpaqueClassBlock *clazz = NULL;67426743if (classChild &&6744classChild->getSymbolReference() &&6745!classChild->getSymbolReference()->isUnresolved())6746{6747TR::SymbolReference *symRef = classChild->getSymbolReference();6748TR::Symbol *sym = symRef->getSymbol();67496750if (sym &&6751sym->getKind() == TR::Symbol::IsStatic &&6752sym->isClassObject())6753{6754TR::StaticSymbol * staticSym = symRef->getSymbol()->castToStaticSymbol();6755void * staticAddress = staticSym->getStaticAddress();6756if (symRef->getCPIndex() >= 0)6757{6758if (!staticSym->addressIsCPIndexOfStatic() && staticAddress)6759{6760int32_t len;6761className = TR::Compiler->cls.classNameChars(comp, symRef, len);6762clazz = (TR_OpaqueClassBlock *)staticAddress;6763}6764}6765}6766}67676768uint32_t instanceSizeForAlignment = 30;6769static char *p= feGetEnv("TR_AlignInstanceSize");6770if (p)6771instanceSizeForAlignment = atoi(p);67726773if ((comp->getMethodHotness() >= hot) && clazz &&6774!cg->getCurrentEvaluationBlock()->isCold() &&6775TR::Compiler->cls.classInstanceSize(clazz)>=instanceSizeForAlignment)6776{6777shouldAlignToCacheBoundary = true;67786779generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, eaxReal,6780generateX86MemoryReference(eaxReal, 63, cg), cg);6781generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, eaxReal, 0xFFFFFFC0, cg);6782}6783}6784#endif // J9VM_GC_THREAD_LOCAL_HEAP67856786if ((uint32_t)allocationSizeOrDataOffset > cg->getMaxObjectSizeGuaranteedNotToOverflow())6787{6788generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, segmentReg, eaxReal, cg);6789if (allocationSizeOrDataOffset <= 127)6790generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, segmentReg, allocationSizeOrDataOffset, cg);6791else if (allocationSizeOrDataOffset == 128)6792generateRegImmInstruction(TR::InstOpCode::SUBRegImms(), node, segmentReg, (unsigned)-128, cg);6793else6794generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), node, segmentReg, allocationSizeOrDataOffset, cg);67956796// Check for overflow6797generateLabelInstruction(TR::InstOpCode::JB4, node, failLabel, cg);6798}6799else6800{6801generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, segmentReg,6802generateX86MemoryReference(eaxReal, allocationSizeOrDataOffset, cg), cg);6803}6804}680568066807// -----------6808// MERGED PATH6809// -----------68106811generateRegMemInstruction(TR::InstOpCode::CMPRegMem(),6812node,6813segmentReg,6814generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapTop), cg), cg);68156816generateLabelInstruction(TR::InstOpCode::JA4, node, failLabel, cg);68176818// ------------6819// 1st PREFETCH6820// ------------68216822if (!isTooSmallToPrefetch)6823generateMemInstruction(TR::InstOpCode::PREFETCHNTA, node, generateX86MemoryReference(segmentReg, 0xc0, cg), cg);68246825if (shouldAlignToCacheBoundary)6826{6827// Alignment to a cache line boundary may require inserting more padding than is normally6828// necessary to achieve the alignment. In those cases, insert GC dark matter to describe6829// the space inserted.6830//68316832generateRegInstruction(TR::InstOpCode::PUSHReg, node, segmentReg, cg);6833generateRegMemInstruction(TR::InstOpCode::LRegMem(),6834node,6835segmentReg,6836generateX86MemoryReference(vmThreadReg,6837offsetof(J9VMThread, heapAlloc), cg), cg);68386839generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, eaxReal, segmentReg, cg);68406841TR::LabelSymbol *doneAlignLabel = generateLabelSymbol(cg);6842TR::LabelSymbol *multiSlotGapLabel = generateLabelSymbol(cg);68436844generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, eaxReal, sizeof(uintptr_t), cg);6845generateLabelInstruction(TR::InstOpCode::JB4, node, doneAlignLabel, cg);6846generateLabelInstruction(TR::InstOpCode::JA4, node, multiSlotGapLabel, cg);68476848int32_t singleSlotHole;68496850singleSlotHole = J9_GC_SINGLE_SLOT_HOLE;68516852if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())6853{6854generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,6855generateX86MemoryReference(segmentReg, 0, cg), singleSlotHole, cg);6856generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,6857generateX86MemoryReference(segmentReg, 4, cg), singleSlotHole, cg);6858}6859else6860{6861generateMemImmInstruction(6862TR::InstOpCode::SMemImm4(), node,6863generateX86MemoryReference(segmentReg, 0, cg), singleSlotHole, cg);6864}68656866generateLabelInstruction(TR::InstOpCode::JMP4, node, doneAlignLabel, cg);6867generateLabelInstruction(TR::InstOpCode::label, node, multiSlotGapLabel, cg);68686869int32_t multiSlotHole;68706871multiSlotHole = J9_GC_MULTI_SLOT_HOLE;68726873generateMemImmInstruction(6874TR::InstOpCode::SMemImm4(), node,6875generateX86MemoryReference(segmentReg, 0, cg),6876multiSlotHole, cg);68776878generateMemRegInstruction(6879TR::InstOpCode::SMemReg(), node,6880generateX86MemoryReference(segmentReg, sizeof(uintptr_t), cg),6881eaxReal, cg);68826883generateLabelInstruction(TR::InstOpCode::label, node, doneAlignLabel, cg);6884generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, eaxReal, segmentReg, cg);6885generateRegInstruction(TR::InstOpCode::POPReg, node, segmentReg, cg);6886}68876888// Make sure that the arraylet is aligned properly.6889//6890if (generateArraylets && (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray) )6891{6892generateRegMemInstruction(TR::InstOpCode::LEARegMem(),node,segmentReg, generateX86MemoryReference(tempReg,TR::Compiler->om.getObjectAlignmentInBytes()-1,cg),cg);6893if (cg->comp()->target().is64Bit())6894generateRegImmInstruction(TR::InstOpCode::AND8RegImm4,node,segmentReg,-TR::Compiler->om.getObjectAlignmentInBytes(),cg);6895else6896generateRegImmInstruction(TR::InstOpCode::AND4RegImm4,node,segmentReg,-TR::Compiler->om.getObjectAlignmentInBytes(),cg);6897}68986899generateMemRegInstruction(TR::InstOpCode::SMemReg(),6900node,6901generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg),6902segmentReg, cg);69036904if (!isTooSmallToPrefetch && node->getOpCodeValue() != TR::New)6905{6906// ------------6907// 2nd PREFETCH6908// ------------6909generateMemInstruction(TR::InstOpCode::PREFETCHNTA, node, generateX86MemoryReference(segmentReg, 0x100, cg), cg);69106911// ------------6912// 3rd PREFETCH6913// ------------6914generateMemInstruction(TR::InstOpCode::PREFETCHNTA, node, generateX86MemoryReference(segmentReg, 0x140, cg), cg);69156916// ------------6917// 4th PREFETCH6918// ------------6919generateMemInstruction(TR::InstOpCode::PREFETCHNTA, node, generateX86MemoryReference(segmentReg, 0x180, cg), cg);6920}6921}6922}69236924// Generate the code to initialize an object header - used for both new and6925// array new6926//6927static void genInitObjectHeader(TR::Node *node,6928TR_OpaqueClassBlock *clazz,6929TR::Register *classReg,6930TR::Register *objectReg,6931TR::Register *tempReg,6932bool isZeroInitialized,6933bool isDynamicAllocation,6934TR::CodeGenerator *cg)6935{6936TR::Compilation *comp = cg->comp();6937TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());69386939bool use64BitClasses = comp->target().is64Bit() &&6940(!TR::Compiler->om.generateCompressedObjectHeaders() ||6941(comp->compileRelocatableCode() && comp->getOption(TR_UseSymbolValidationManager)));69426943TR_ASSERT((isDynamicAllocation || clazz), "Cannot have a null clazz while not doing dynamic array allocation\n");69446945// --------------------------------------------------------------------------------6946//6947// Initialize CLASS field6948//6949// --------------------------------------------------------------------------------6950//6951TR::InstOpCode::Mnemonic opSMemReg = TR::InstOpCode::SMemReg(use64BitClasses);69526953TR::Register * clzReg = classReg;69546955// For dynamic array allocation, load the array class from the component class and store into clzReg6956if (isDynamicAllocation)6957{6958TR_ASSERT((node->getOpCodeValue() == TR::anewarray), "Dynamic allocation currently only supports reference arrays");6959TR_ASSERT(classReg, "must have a classReg for dynamic allocation");6960clzReg = tempReg;6961generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, clzReg, generateX86MemoryReference(classReg, offsetof(J9Class, arrayClass), cg), cg);6962}6963// TODO: should be able to use a TR_ClassPointer relocation without this stuff (along with class validation)6964else if (cg->needClassAndMethodPointerRelocations() && !comp->getOption(TR_UseSymbolValidationManager))6965{6966TR::Register *vmThreadReg = cg->getVMThreadRegister();6967if (node->getOpCodeValue() == TR::newarray)6968{6969generateRegMemInstruction(TR::InstOpCode::LRegMem(), node,tempReg,6970generateX86MemoryReference(vmThreadReg,offsetof(J9VMThread, javaVM), cg), cg);6971generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg,6972generateX86MemoryReference(tempReg,6973offsetof(J9JavaVM, booleanArrayClass)+(node->getSecondChild()->getInt()-4)*sizeof(J9Class*), cg), cg);6974// tempReg should contain a 32 bit pointer.6975generateMemRegInstruction(opSMemReg, node,6976generateX86MemoryReference(objectReg, TR::Compiler->om.offsetOfObjectVftField(), cg),6977tempReg, cg);6978clzReg = tempReg;6979}6980else6981{6982TR_ASSERT((node->getOpCodeValue() == TR::New)6983&& classReg, "must have a classReg for TR::New in AOT mode");6984clzReg = classReg;6985}6986}698769886989// For RealTime Code Only.6990int32_t orFlags = 0;6991int32_t orFlagsClass = 0;69926993if (!clzReg)6994{6995TR::Instruction *instr = NULL;6996if (use64BitClasses)6997{6998if (cg->needClassAndMethodPointerRelocations() && comp->getOption(TR_UseSymbolValidationManager))6999instr = generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, ((intptr_t)clazz|orFlagsClass), cg, TR_ClassPointer);7000else7001instr = generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, ((intptr_t)clazz|orFlagsClass), cg);7002generateMemRegInstruction(TR::InstOpCode::S8MemReg, node, generateX86MemoryReference(objectReg, TR::Compiler->om.offsetOfObjectVftField(), cg), tempReg, cg);7003}7004else7005{7006instr = generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(objectReg, TR::Compiler->om.offsetOfObjectVftField(), cg), (int32_t)((uintptr_t)clazz|orFlagsClass), cg);7007}70087009// HCR in genInitObjectHeader7010if (instr && cg->wantToPatchClassPointer(clazz, node))7011comp->getStaticHCRPICSites()->push_front(instr);7012}7013else7014{7015if (orFlagsClass != 0)7016generateRegImmInstruction(use64BitClasses ? TR::InstOpCode::OR8RegImm4 : TR::InstOpCode::OR4RegImm4, node, clzReg, orFlagsClass, cg);7017generateMemRegInstruction(opSMemReg, node,7018generateX86MemoryReference(objectReg, TR::Compiler->om.offsetOfObjectVftField(), cg), clzReg, cg);7019}70207021// --------------------------------------------------------------------------------7022//7023// Initialize FLAGS field7024//7025// --------------------------------------------------------------------------------7026//70277028// Collect the flags to be OR'd in that are known at compile time.7029//70307031#ifndef J9VM_INTERP_FLAGS_IN_CLASS_SLOT7032// Enable macro once GC-Helper is fixed7033J9ROMClass *romClass = TR::Compiler->cls.romClassOf(clazz);7034if (romClass)7035{7036orFlags |= romClass->instanceShape;7037orFlags |= fej9->getStaticObjectFlags();70387039#if defined(J9VM_OPT_NEW_OBJECT_HASH)7040// put orFlags or 0 into header if needed7041generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,7042generateX86MemoryReference(objectReg, TMP_OFFSETOF_J9OBJECT_FLAGS, cg),7043orFlags, cg);70447045#endif /* !J9VM_OPT_NEW_OBJECT_HASH */7046}7047#endif /* FLAGS_IN_CLASS_SLOT */70487049// --------------------------------------------------------------------------------7050//7051// Initialize MONITOR field7052//7053// --------------------------------------------------------------------------------7054//7055// For dynamic array allocation, in case (very unlikely) the object array has a lock word, we just initialized it to 0 conservatively.7056// In this case, if the original array is reserved, initializing the cloned object's lock word to 0 will force the7057// locking to go to the slow locking path.7058if (isDynamicAllocation)7059{7060TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);7061generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(clzReg, offsetof(J9ArrayClass, lockOffset), cg), cg);7062generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (int32_t)-1, cg);7063generateLabelInstruction (TR::InstOpCode::JE4, node, doneLabel, cg);7064generateMemImmInstruction(TR::InstOpCode::SMemImm4(comp->target().is64Bit() && !fej9->generateCompressedLockWord()),7065node, generateX86MemoryReference(objectReg, tempReg, 0, cg), 0, cg);7066generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, cg);7067}7068else7069{7070bool initReservable = TR::Compiler->cls.classFlagReservableWordInitValue(clazz);7071if (!isZeroInitialized || initReservable)7072{7073bool initLw = (node->getOpCodeValue() != TR::New) || initReservable;7074int lwOffset = fej9->getByteOffsetToLockword(clazz);7075if (lwOffset == -1)7076initLw = false;70777078if (initLw)7079{7080int32_t initialLwValue = 0;7081if (initReservable)7082initialLwValue = OBJECT_HEADER_LOCK_RESERVED;70837084generateMemImmInstruction(TR::InstOpCode::SMemImm4(comp->target().is64Bit() && !fej9->generateCompressedLockWord()),7085node, generateX86MemoryReference(objectReg, lwOffset, cg), initialLwValue, cg);7086}7087}7088}7089}709070917092// Generate the code to initialize an array object header7093//7094static void genInitArrayHeader(7095TR::Node *node,7096TR_OpaqueClassBlock *clazz,7097TR::Register *classReg,7098TR::Register *objectReg,7099TR::Register *sizeReg,7100int32_t elementSize,7101int32_t arrayletDataOffset,7102TR::Register *tempReg,7103bool isZeroInitialized,7104bool isDynamicAllocation,7105bool shouldInitZeroSizedArrayHeader,7106TR::CodeGenerator *cg)7107{7108TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());71097110// Initialize the object header7111//7112genInitObjectHeader(node, clazz, classReg, objectReg, tempReg, isZeroInitialized, isDynamicAllocation, cg);71137114int32_t arraySizeOffset = fej9->getOffsetOfContiguousArraySizeField();71157116TR::MemoryReference *arraySizeMR = generateX86MemoryReference(objectReg, arraySizeOffset, cg);7117// Special handling of zero sized arrays.7118// Zero length arrays are discontiguous (i.e. they also need the discontiguous length field to be 0) because7119// they are indistinguishable from non-zero length discontiguous arrays. But instead of explicitly checking7120// for zero sized arrays we unconditionally store 0 in the third dword of the array object header. That is7121// safe because the 3rd dword is either array size of a zero sized array or will contain the first elements7122// of an array:7123// - Zero sized arrays have the following layout:7124// - The smallest array possible is a byte array with 1 element which would have a layout:7125// #bits per section (compressed refs): | 32 bits | 32 bits | 32 bits | 32 bits | 32 bits | 32 bits |7126// zero sized arrays: | class | mustBeZero | size | padding | dataAddr |7127// smallest contiguous array: | class | size | dataAddr | 1 byte + padding | other |7128// This also reflects the minimum object size which is 16 bytes.7129int32_t arrayDiscontiguousSizeOffset = fej9->getOffsetOfDiscontiguousArraySizeField();7130TR::MemoryReference *arrayDiscontiguousSizeMR = generateX86MemoryReference(objectReg, arrayDiscontiguousSizeOffset, cg);71317132TR::Compilation *comp = cg->comp();71337134bool canUseFastInlineAllocation =7135(!comp->getOptions()->realTimeGC() &&7136!comp->generateArraylets()) ? true : false;71377138// Initialize the array size7139//7140if (sizeReg)7141{7142// Variable size7143//7144if (canUseFastInlineAllocation)7145{7146// Native 64-bit needs to cover the discontiguous size field7147//7148TR::InstOpCode::Mnemonic storeOp = (comp->target().is64Bit() && !comp->useCompressedPointers()) ? TR::InstOpCode::S8MemReg : TR::InstOpCode::S4MemReg;7149generateMemRegInstruction(storeOp, node, arraySizeMR, sizeReg, cg);7150}7151else7152{7153generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, arraySizeMR, sizeReg, cg);7154}7155// Take care of zero sized arrays as they are discontiguous and not contiguous7156if (shouldInitZeroSizedArrayHeader)7157{7158generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, arrayDiscontiguousSizeMR, 0, cg);7159}7160}7161else7162{7163// Fixed size7164//7165int32_t instanceSize = 0;7166if (canUseFastInlineAllocation)7167{7168// Native 64-bit needs to cover the discontiguous size field7169//7170TR::InstOpCode::Mnemonic storeOp = (comp->target().is64Bit() && !comp->useCompressedPointers()) ? TR::InstOpCode::S8MemImm4 : TR::InstOpCode::S4MemImm4;7171instanceSize = node->getFirstChild()->getInt();7172generateMemImmInstruction(storeOp, node, arraySizeMR, instanceSize, cg);7173}7174else7175{7176instanceSize = node->getFirstChild()->getInt();7177generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, arraySizeMR, instanceSize, cg);7178}7179// Take care of zero sized arrays as they are discontiguous and not contiguous7180if (shouldInitZeroSizedArrayHeader && (instanceSize == 0))7181{7182generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, arrayDiscontiguousSizeMR, 0, cg);7183}7184}71857186bool generateArraylets = comp->generateArraylets();71877188if (generateArraylets)7189{7190// write arraylet pointer7191TR::InstOpCode::Mnemonic storeOp;71927193generateRegMemInstruction(7194TR::InstOpCode::LEARegMem(), node,7195tempReg,7196generateX86MemoryReference(objectReg, arrayletDataOffset, cg), cg);71977198if (comp->useCompressedPointers())7199{7200storeOp = TR::InstOpCode::S4MemReg;72017202// Compress the arraylet pointer.7203//7204if (TR::Compiler->om.compressedReferenceShiftOffset() > 0)7205generateRegImmInstruction(TR::InstOpCode::SHR8RegImm1, node, tempReg, TR::Compiler->om.compressedReferenceShiftOffset(), cg);7206}7207else7208{7209storeOp = TR::InstOpCode::SMemReg();7210}72117212TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());7213generateMemRegInstruction(storeOp, node, generateX86MemoryReference(objectReg, fej9->getFirstArrayletPointerOffset(comp), cg), tempReg, cg);7214}72157216}721772187219// ------------------------------------------------------------------------------7220// genZeroInitObject27221// ------------------------------------------------------------------------------72227223static bool genZeroInitObject2(7224TR::Node *node,7225int32_t objectSize,7226int32_t elementSize,7227TR::Register *sizeReg,7228TR::Register *targetReg,7229TR::Register *tempReg,7230TR::Register *segmentReg,7231TR::Register *&scratchReg,7232TR::CodeGenerator *cg)7233{7234TR::Compilation *comp = cg->comp();72357236// set up clazz value here7237TR_OpaqueClassBlock *clazz = NULL;7238bool isArrayNew = (node->getOpCodeValue() != TR::New);7239comp->canAllocateInline(node, clazz);7240auto headerSize = isArrayNew ? TR::Compiler->om.contiguousArrayHeaderSizeInBytes() : TR::Compiler->om.objectHeaderSizeInBytes();7241// If we are using full refs both contiguous and discontiguous array header have the same size, in which case we must adjust header size7242// slightly so that rep stosb can initialize the size field of zero sized arrays appropriately7243// #bits per section (compressed refs): | 32 bits | 32 bits | 32 bits | 32 bits | 32 bits | 32 bits |7244// zero sized arrays: | class | mustBeZero | size | padding | dataAddr |7245// smallest contiguous array: | class | size | dataAddr | 1 byte + padding | other |7246// In order for us to successfully initialize the size field of a zero sized array in compressed refs7247// we must subtract 8 bytes (sizeof(dataAddr)) from header size. And in case of full refs we must7248// subtract 16 bytes from the header in order to properly initialize the zero sized field. We can7249// accomplish that by simply subtracting the offset of dataAddr field, which is 8 for compressed refs7250// and 16 for full refs.7251#if defined(TR_TARGET_64BIT)7252if (!cg->comp()->target().is32Bit() && isArrayNew)7253{7254headerSize -= static_cast<TR_J9VMBase *>(cg->fe())->getOffsetOfContiguousDataAddrField();7255}7256#endif /* TR_TARGET_64BIT */7257TR_ASSERT(headerSize >= 4, "Object/Array header must be >= 4.");7258objectSize -= headerSize;72597260if (!minRepstosdWords)7261{7262static char *p= feGetEnv("TR_MinRepstosdWords");7263if (p)7264minRepstosdWords = atoi(p);7265else7266minRepstosdWords = MIN_REPSTOSD_WORDS; // Use default value7267}72687269if (sizeReg || objectSize >= minRepstosdWords)7270{7271// Zero-initialize by using REP TR::InstOpCode::STOSB.7272//7273if (sizeReg)7274{7275// -------------7276//7277// VARIABLE SIZE7278//7279// -------------7280// Subtract off the header size and initialize the remaining slots.7281//7282generateRegImmInstruction(TR::InstOpCode::SUBRegImms(), node, tempReg, headerSize, cg);7283}7284else7285{7286// ----------7287// FIXED SIZE7288// ----------7289if (comp->target().is64Bit() && !IS_32BIT_SIGNED(objectSize))7290{7291generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, objectSize, cg);7292}7293else7294{7295generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, tempReg, objectSize, cg);7296}7297}72987299// -----------7300// Destination7301// -----------7302generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, segmentReg, generateX86MemoryReference(targetReg, headerSize, cg), cg);7303if (comp->target().is64Bit())7304{7305scratchReg = cg->allocateRegister();7306generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, scratchReg, targetReg, cg);7307}7308else7309{7310generateRegInstruction(TR::InstOpCode::PUSHReg, node, targetReg, cg);7311}7312generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, targetReg, targetReg, cg);7313generateInstruction(TR::InstOpCode::REPSTOSB, node, cg);7314if (comp->target().is64Bit())7315{7316generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, targetReg, scratchReg, cg);7317}7318else7319{7320generateRegInstruction(TR::InstOpCode::POPReg, node, targetReg, cg);7321}7322return true;7323}7324else if (objectSize > 0)7325{7326if (objectSize % 16 == 12)7327{7328// Zero-out header to avoid a 12-byte residue7329objectSize += 4;7330headerSize -= 4;7331}7332scratchReg = cg->allocateRegister(TR_FPR);7333generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, scratchReg, scratchReg, cg);7334int32_t offset = 0;7335while (objectSize >= 16)7336{7337generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(targetReg, headerSize + offset, cg), scratchReg, cg);7338objectSize -= 16;7339offset += 16;7340}7341switch (objectSize)7342{7343case 8:7344generateMemRegInstruction(TR::InstOpCode::MOVQMemReg, node, generateX86MemoryReference(targetReg, headerSize + offset, cg), scratchReg, cg);7345break;7346case 4:7347generateMemRegInstruction(TR::InstOpCode::MOVDMemReg, node, generateX86MemoryReference(targetReg, headerSize + offset, cg), scratchReg, cg);7348break;7349case 0:7350break;7351default:7352TR_ASSERT(false, "residue size should only be 0, 4 or 8.");7353}7354return false;7355}7356else7357{7358return false;7359}7360}736173627363// Generate the code to initialize the data portion of an allocated object.7364// Zero-initialize the monitor slot in the header at the same time.7365// If "sizeReg" is non-null it contains the number of array elements and7366// "elementSize" contains the size of each element.7367// Otherwise the object size is in "objectSize".7368//7369static bool genZeroInitObject(7370TR::Node *node,7371int32_t objectSize,7372int32_t elementSize,7373TR::Register *sizeReg,7374TR::Register *targetReg,7375TR::Register *tempReg,7376TR::Register *segmentReg,7377TR::Register *&scratchReg,7378TR::CodeGenerator *cg)7379{7380// object header flags now occupy 4bytes on 64-bit7381TR::ILOpCodes opcode = node->getOpCodeValue();7382TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());7383TR::Compilation *comp = cg->comp();73847385bool isArrayNew = (opcode != TR::New) ;7386TR_OpaqueClassBlock *clazz = NULL;73877388// set up clazz value here7389comp->canAllocateInline(node, clazz);73907391int32_t numSlots = 0;7392int32_t startOfZeroInits = isArrayNew ? TR::Compiler->om.contiguousArrayHeaderSizeInBytes() : TR::Compiler->om.objectHeaderSizeInBytes();73937394if (comp->target().is64Bit())7395{7396// round down to the nearest word size7397TR_ASSERT(startOfZeroInits < 0xF8, "expecting start of zero inits to be the size of the header");7398startOfZeroInits &= 0xF8;7399}74007401numSlots = (int32_t)((objectSize - startOfZeroInits)/TR::Compiler->om.sizeofReferenceAddress());74027403bool generateArraylets = comp->generateArraylets();74047405int32_t i;740674077408// *** old object header ***7409// since i'm always confused,7410// here is the layout of an object7411//7412// #if defined(J9VM_THR_LOCK_NURSERY)7413//7414// on 32-bit7415// for an indexable object [header = 4 or 3 slots]7416// #if defined(J9VM_THR_LOCK_NURSERY_FAT_ARRAYS)7417// --clazz-- --flags-- --monitor-- --size-- <--data-->7418// #else7419// --clazz-- --flags-- --size-- <--data-->7420// #endif7421//7422// for a non-indexable object (if the object has sync methods, monitor7423// slot is part of the data slots) [header = 2 slots]7424// --clazz-- --flags-- <--data-->7425//7426// on 64-bit7427// for an indexable object [header = 3 or 2 slots]7428// #if defined(J9VM_THR_LOCK_NURSERY_FAT_ARRAYS)7429// --clazz-- --flags+size-- --monitor-- <--data-->7430// #else7431// --clazz-- --flags+size-- <--data-->7432// #endif7433//7434// for a non-indexable object [header = 2 slots]7435// --clazz-- --flags-- <--data-->7436//7437// #else7438//7439// on 32-bit7440// for an indexable object [header = 4 slots]7441// --clazz-- --flags-- --monitor-- --size-- <--data-->7442//7443// for a non-indexable object [header = 3 slots]7444// --clazz-- --flags-- --monitor-- <--data-->7445//7446// on 64-bit7447// for an indexable object [header = 3 slots]7448// --clazz-- --flags+size-- --monitor-- <--data-->7449//7450// for a non-indexable object [header = 3 slots]7451// --clazz-- --flags-- --monitor-- <--data-->7452//7453// #endif7454//7455// Packed Objects adds two more fields,7456//74577458if (!minRepstosdWords)7459{7460static char *p= feGetEnv("TR_MinRepstosdWords");7461if (p)7462minRepstosdWords = atoi(p);7463else7464minRepstosdWords = MIN_REPSTOSD_WORDS; // Use default value7465}74667467int32_t alignmentDelta = 0; // for aligning properly to get best performance from REP TR::InstOpCode::STOSD/TR::InstOpCode::STOSQ74687469if (sizeReg || (numSlots + alignmentDelta) >= minRepstosdWords)7470{7471// Zero-initialize by using REP TR::InstOpCode::STOSD/TR::InstOpCode::STOSQ.7472//7473// startOffset will be monitorSlot only for arrays74747475generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, segmentReg, generateX86MemoryReference(targetReg, startOfZeroInits, cg), cg);74767477if (sizeReg)7478{7479int32_t additionalSlots = 0;74807481if (generateArraylets)7482{7483additionalSlots++;7484if (elementSize > sizeof(UDATA))7485additionalSlots++;7486}74877488switch (elementSize)7489{7490// Calculate the number of slots by rounding up to number of words,7491// adding in partialHeaderSize.adding in partialHeaderSize.7492//7493case 1:7494if (comp->target().is64Bit())7495{7496generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*8)+7, cg), cg);7497generateRegImmInstruction(TR::InstOpCode::SHR8RegImm1, node, tempReg, 3, cg);7498}7499else7500{7501generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*4)+3, cg), cg);7502generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, tempReg, 2, cg);7503}7504break;7505case 2:7506if (comp->target().is64Bit())7507{7508generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*4)+3, cg), cg);7509generateRegImmInstruction(TR::InstOpCode::SHR8RegImm1, node, tempReg, 2, cg);7510}7511else7512{7513generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*2)+1, cg), cg);7514generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, tempReg, 1, cg);7515}7516break;7517case 4:7518if (comp->target().is64Bit())7519{7520generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*2)+1, cg), cg);7521generateRegImmInstruction(TR::InstOpCode::SHR8RegImm1, node, tempReg, 1, cg);7522}7523else7524{7525generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, tempReg,7526generateX86MemoryReference(sizeReg, additionalSlots, cg), cg);7527}7528break;7529case 8:7530if (comp->target().is64Bit())7531{7532generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, tempReg,7533generateX86MemoryReference(sizeReg, additionalSlots, cg), cg);7534}7535else7536{7537generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, tempReg,7538generateX86MemoryReference(NULL, sizeReg,7539TR::MemoryReference::convertMultiplierToStride(2),7540additionalSlots, cg), cg);7541}7542break;7543}7544}7545else7546{7547// Fixed size7548//7549generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, tempReg, numSlots + alignmentDelta, cg);7550if (comp->target().is64Bit())7551{7552// TODO AMD64: replace both instructions with a LEA tempReg, [disp32]7553//7554generateRegRegInstruction(TR::InstOpCode::MOVSXReg8Reg4, node, tempReg, tempReg, cg);7555}7556}75577558if (comp->target().is64Bit())7559{7560scratchReg = cg->allocateRegister();7561generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, scratchReg, targetReg, cg);7562}7563else7564{7565generateRegInstruction(TR::InstOpCode::PUSHReg, node, targetReg, cg);7566}75677568generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, targetReg, targetReg, cg);75697570// We just pushed targetReg on the stack and zeroed it out. targetReg contained the address of the7571// beginning of the header. We want to use the 0-reg to initialize the monitor slot, so we use7572// segmentReg, which points to targetReg+startOfZeroInits and subtract the extra offset.75737574bool initLw = (node->getOpCodeValue() != TR::New);7575int lwOffset = fej9->getByteOffsetToLockword(clazz);7576initLw = false;75777578if (initLw)7579{7580TR::InstOpCode::Mnemonic op = (comp->target().is64Bit() && fej9->generateCompressedLockWord()) ? TR::InstOpCode::S4MemReg : TR::InstOpCode::SMemReg();7581generateMemRegInstruction(op, node, generateX86MemoryReference(segmentReg, lwOffset-startOfZeroInits, cg), targetReg, cg);7582}75837584TR::InstOpCode::Mnemonic op = comp->target().is64Bit() ? TR::InstOpCode::REPSTOSQ : TR::InstOpCode::REPSTOSD;7585generateInstruction(op, node, cg);75867587if (comp->target().is64Bit())7588{7589generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, targetReg, scratchReg, cg);7590}7591else7592{7593generateRegInstruction(TR::InstOpCode::POPReg, node, targetReg, cg);7594}75957596return true;7597}75987599if (numSlots > 0)7600{7601generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg);76027603bool initLw = (node->getOpCodeValue() != TR::New);7604int lwOffset = fej9->getByteOffsetToLockword(clazz);7605initLw = false;76067607if (initLw)7608{7609TR::InstOpCode::Mnemonic op = (comp->target().is64Bit() && fej9->generateCompressedLockWord()) ? TR::InstOpCode::S4MemReg : TR::InstOpCode::SMemReg();7610generateMemRegInstruction(op, node, generateX86MemoryReference(targetReg, lwOffset, cg), tempReg, cg);7611}7612}7613else7614{7615bool initLw = (node->getOpCodeValue() != TR::New);7616int lwOffset = fej9->getByteOffsetToLockword(clazz);7617initLw = false;76187619if (initLw)7620{7621TR::InstOpCode::Mnemonic op = (comp->target().is64Bit() && fej9->generateCompressedLockWord()) ? TR::InstOpCode::S4MemImm4 : TR::InstOpCode::SMemImm4();7622generateMemImmInstruction(op, node, generateX86MemoryReference(targetReg, lwOffset, cg), 0, cg);7623}7624return false;7625}76267627int32_t numIterations = numSlots/maxZeroInitWordsPerIteration;7628if (numIterations > 1)7629{7630// Generate the initializations in a loop7631//7632int32_t numLoopSlots = numIterations*maxZeroInitWordsPerIteration;7633int32_t endOffset;76347635endOffset = (int32_t)(numLoopSlots*TR::Compiler->om.sizeofReferenceAddress() + startOfZeroInits);76367637generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, segmentReg, -((numIterations-1)*maxZeroInitWordsPerIteration), cg);76387639if (comp->target().is64Bit())7640generateRegRegInstruction(TR::InstOpCode::MOVSXReg8Reg4, node, segmentReg, segmentReg, cg);76417642TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);7643generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);7644for (i = maxZeroInitWordsPerIteration; i > 0; i--)7645{7646generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,7647generateX86MemoryReference(targetReg,7648segmentReg,7649TR::MemoryReference::convertMultiplierToStride((int32_t)TR::Compiler->om.sizeofReferenceAddress()),7650endOffset - TR::Compiler->om.sizeofReferenceAddress()*i, cg),7651tempReg, cg);7652}7653generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, segmentReg, maxZeroInitWordsPerIteration, cg);7654generateLabelInstruction(TR::InstOpCode::JLE4, node, loopLabel, cg);76557656// Generate the left-over initializations7657//7658for (i = 0; i < numSlots % maxZeroInitWordsPerIteration; i++)7659{7660generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,7661generateX86MemoryReference(targetReg,7662endOffset+TR::Compiler->om.sizeofReferenceAddress()*i, cg),7663tempReg, cg);7664}7665}7666else7667{7668// Generate the initializations inline7669//7670for (i = 0; i < numSlots; i++)7671{7672// Don't bother initializing the array-size slot7673//7674generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,7675generateX86MemoryReference(targetReg,7676i*TR::Compiler->om.sizeofReferenceAddress() + startOfZeroInits, cg),7677tempReg, cg);7678}7679}76807681return false;7682}76837684TR::Register *7685objectCloneEvaluator(7686TR::Node *node,7687TR::CodeGenerator *cg)7688{7689/*7690* Commented out Object.clone() code has been removed for code cleanliness.7691* If it needs to be resurrected it can be found in RTC or CMVC.7692*/7693return NULL;7694}769576967697TR::Register *7698J9::X86::TreeEvaluator::VMnewEvaluator(7699TR::Node *node,7700TR::CodeGenerator *cg)7701{7702// See if inline allocation is appropriate.7703//7704// Don't do the inline allocation if we are generating JVMPI hooks, since7705// JVMPI needs to know about the allocation.7706//7707TR::Compilation *comp = cg->comp();7708TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());77097710if (comp->suppressAllocationInlining())7711return NULL;77127713// If the helper does not preserve all the registers there will not be7714// enough registers to do the inline allocation.7715// Also, don't do the inline allocation if optimizing for space7716//7717TR::MethodSymbol *helperSym = node->getSymbol()->castToMethodSymbol();7718if (!helperSym->preservesAllRegisters())7719return NULL;77207721TR_OpaqueClassBlock *clazz = NULL;7722TR::Register *classReg = NULL;7723bool isArrayNew = false;7724int32_t allocationSize = 0;7725int32_t objectSize = 0;7726int32_t elementSize = 0;7727int32_t dataOffset = 0;77287729bool realTimeGC = comp->getOptions()->realTimeGC();7730bool generateArraylets = comp->generateArraylets();77317732TR::Register *segmentReg = NULL;7733TR::Register *tempReg = NULL;7734TR::Register *targetReg = NULL;7735TR::Register *sizeReg = NULL;773677377738/**7739* Study of registers used in inline allocation.7740*7741* Result goes to targetReg. Unless outlinedHelperCall is used, which requires an extra register move to targetReg2.7742* targetReg2 is needed because the result needs to be CollectedReferenceRegister, but only after object is ready.7743*7744* classReg contains the J9Class for the object to be allocated. Not always used; instead, when loadaddr is not evaluated, it7745* is rematerialized like a constant (in which case, clazz contains the known value). When it is rematerialized, there are7746* 'interesting' AOT/HCR patching routines.7747*7748* sizeReg is used for array allocations to hold the number of elements. However...7749* for packed variable (objectSize==0) arrays, sizeReg behaves like segmentReg should (i.e. contains size in _bytes_): elementSize7750* is set to 1 and sizeReg is result of multiplication of real elementSize vs element count.7751*7752* segmentReg contains the size, _in bytes!_, of the object/array to be allocated. When outlining is used, it will be bound to edi.7753* This must contain the rounding (i.e. 8-aligned, so address will always end in 0x0 or 0x8). When size cannot be known (i.e.7754* dynamic array size) explicit assembly is generated to do rounding (allocationSize is reused to contain the header offset).7755* After tlh-top comparison, this register is reused as a temporary register (i.e. genHeapAlloc in non-outlined path, and7756* inside the outlined codert asm sequences). This size is not available at non-outlined zero-initialization routine and needs7757* to be re-materialized.7758*7759*/77607761TR::RegisterDependencyConditions *deps;77627763// --------------------------------------------------------------------------------7764//7765// Find the class info and allocation size depending on the node type.7766//7767// Returns:7768// size of object includes the size of the array header7769// -1 cannot allocate inline7770// 0 variable sized allocation7771//7772// --------------------------------------------------------------------------------77737774objectSize = comp->canAllocateInline(node, clazz);7775if (objectSize < 0)7776return NULL;7777// Currently dynamic allocation is only supported on reference array.7778// We are performing dynamic array allocation if both object size and7779// class block cannot be statically determined.7780bool dynamicArrayAllocation = (node->getOpCodeValue() == TR::anewarray)7781&& (objectSize == 0) && (clazz == NULL);7782allocationSize = objectSize;77837784static long count = 0;7785if (!performTransformation(comp, "O^O <%3d> Inlining Allocation of %s [0x%p].\n", count++, node->getOpCode().getName(), node))7786return NULL;77877788if (node->getOpCodeValue() == TR::New)7789{7790if (comp->getOption(TR_DisableAllocationInlining))7791return 0;77927793// realtimeGC: cannot inline if object size is too big to get a size class7794if (comp->getOptions()->realTimeGC())7795{7796if ((uint32_t) objectSize > fej9->getMaxObjectSizeForSizeClass())7797return NULL;7798}77997800dataOffset = TR::Compiler->om.objectHeaderSizeInBytes(); //Not used...7801classReg = node->getFirstChild()->getRegister();7802TR_ASSERT(objectSize > 0, "assertion failure");7803}7804else7805{7806if (node->getOpCodeValue() == TR::newarray)7807{7808if (comp->getOption(TR_DisableAllocationInlining))7809return 0;78107811elementSize = TR::Compiler->om.getSizeOfArrayElement(node);7812}7813else7814{7815// Must be TR::anewarray7816//7817if (comp->getOption(TR_DisableAllocationInlining))7818return 0;78197820if (comp->useCompressedPointers())7821elementSize = TR::Compiler->om.sizeofReferenceField();7822else7823elementSize = (int32_t)TR::Compiler->om.sizeofReferenceAddress();78247825classReg = node->getSecondChild()->getRegister();7826// For dynamic array allocation, need to evaluate second child7827if (!classReg && dynamicArrayAllocation)7828classReg = cg->evaluate(node->getSecondChild());7829}78307831isArrayNew = true;78327833if (generateArraylets)7834{7835dataOffset = fej9->getArrayletFirstElementOffset(elementSize, comp);7836}7837else7838{7839dataOffset = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();7840}7841}78427843TR::LabelSymbol *startLabel = generateLabelSymbol(cg);7844TR::LabelSymbol *fallThru = generateLabelSymbol(cg);7845startLabel->setStartInternalControlFlow();7846fallThru->setEndInternalControlFlow();78477848#ifdef J9VM_GC_NON_ZERO_TLH7849// If we can skip zero init, and it is not outlined new, we use the new TLH7850// same logic also appears later, but we need to do this before generate the helper call7851//7852if (node->canSkipZeroInitialization() && !comp->getOption(TR_DisableDualTLH) && !comp->getOptions()->realTimeGC())7853{7854// For value types, it should use jitNewValue helper call which is set up before code gen7855if ((node->getOpCodeValue() == TR::New)7856&& (!TR::Compiler->om.areValueTypesEnabled() || (node->getSymbolReference() != comp->getSymRefTab()->findOrCreateNewValueSymbolRef(comp->getMethodSymbol()))))7857node->setSymbolReference(comp->getSymRefTab()->findOrCreateNewObjectNoZeroInitSymbolRef(comp->getMethodSymbol()));7858else if (node->getOpCodeValue() == TR::newarray)7859node->setSymbolReference(comp->getSymRefTab()->findOrCreateNewArrayNoZeroInitSymbolRef(comp->getMethodSymbol()));7860if (comp->getOption(TR_TraceCG))7861traceMsg(comp, "SKIPZEROINIT: for %p, change the symbol to %p ", node, node->getSymbolReference());7862}7863else7864{7865if (comp->getOption(TR_TraceCG))7866traceMsg(comp, "NOSKIPZEROINIT: for %p, keep symbol as %p ", node, node->getSymbolReference());7867}7868#endif7869TR::LabelSymbol *failLabel = generateLabelSymbol(cg);78707871segmentReg = cg->allocateRegister();78727873tempReg = cg->allocateRegister();78747875// If the size is variable, evaluate it into a register7876//7877if (objectSize == 0)7878{7879sizeReg = cg->evaluate(node->getFirstChild());7880allocationSize += dataOffset;7881if (comp->getOption(TR_TraceCG))7882traceMsg(comp, "allocationSize %d dataOffset %d\n", allocationSize, dataOffset);7883}7884else7885{7886sizeReg = NULL;7887}78887889generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);78907891// Generate the heap allocation, and the snippet that will handle heap overflow.7892//7893TR_OutlinedInstructions *outlinedHelperCall = NULL;7894targetReg = cg->allocateRegister();7895outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::acall, targetReg, failLabel, fallThru, cg);7896cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);78977898TR::Instruction * startInstr = cg->getAppendInstruction();78997900// --------------------------------------------------------------------------------7901//7902// Do the allocation from the TLH and bump pointers.7903//7904// The address of the start of the allocated heap space will be in targetReg.7905//7906// --------------------------------------------------------------------------------79077908bool canUseFastInlineAllocation =7909(!comp->getOptions()->realTimeGC() &&7910!comp->generateArraylets()) ? true : false;79117912bool useRepInstruction;7913bool monitorSlotIsInitialized;7914bool skipOutlineZeroInit = false;7915TR_ExtraInfoForNew *initInfo = node->getSymbolReference()->getExtraInfo();7916if (node->canSkipZeroInitialization())7917{7918skipOutlineZeroInit = true;7919}7920else if (initInfo)7921{7922if (node->canSkipZeroInitialization())7923{7924initInfo->zeroInitSlots = NULL;7925initInfo->numZeroInitSlots = 0;7926skipOutlineZeroInit = true;7927}7928else if (initInfo->numZeroInitSlots <= 0)7929{7930skipOutlineZeroInit = true;7931}7932}79337934if (skipOutlineZeroInit && !performTransformation(comp, "O^O OUTLINED NEW: skip outlined zero init on %s %p\n", cg->getDebug()->getName(node), node))7935skipOutlineZeroInit = false;79367937// Faster inlined sequence. It does not understand arraylet shapes yet.7938//7939if (canUseFastInlineAllocation)7940{7941genHeapAlloc2(node, clazz, allocationSize, elementSize, sizeReg, targetReg, segmentReg, tempReg, failLabel, cg);7942}7943else7944{7945genHeapAlloc(node, clazz, allocationSize, elementSize, sizeReg, targetReg, segmentReg, tempReg, failLabel, cg);7946}79477948// --------------------------------------------------------------------------------7949//7950// Perform zero-initialization on data slots.7951//7952// There may be information about which slots are to be zero-initialized.7953// If there is no information, all slots must be zero-initialized.7954//7955// --------------------------------------------------------------------------------79567957TR::Register *scratchReg = NULL;7958bool shouldInitZeroSizedArrayHeader = true;79597960#ifdef J9VM_GC_NON_ZERO_TLH7961if (comp->getOption(TR_DisableDualTLH) || comp->getOptions()->realTimeGC())7962{7963#endif7964if (!maxZeroInitWordsPerIteration)7965{7966static char *p = feGetEnv("TR_MaxZeroInitWordsPerIteration");7967if (p)7968maxZeroInitWordsPerIteration = atoi(p);7969else7970maxZeroInitWordsPerIteration = MAX_ZERO_INIT_WORDS_PER_ITERATION; // Use default value7971}79727973if (initInfo && initInfo->zeroInitSlots)7974{7975// If there are too many words to be individually initialized, initialize7976// them all7977//7978if (initInfo->numZeroInitSlots >= maxZeroInitWordsPerIteration*2-1)7979initInfo->zeroInitSlots = NULL;7980}79817982if (initInfo && initInfo->zeroInitSlots)7983{7984// Zero-initialize by explicit zero stores.7985// Use the supplied bit vector to identify which slots to initialize7986//7987// Zero-initialize the monitor slot in the header at the same time.7988//7989TR_BitVectorIterator bvi(*initInfo->zeroInitSlots);7990static bool UseOldBVI = feGetEnv("TR_UseOldBVI");7991if (UseOldBVI)7992{7993generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg);7994while (bvi.hasMoreElements())7995{7996generateMemRegInstruction(TR::InstOpCode::S4MemReg, node,7997generateX86MemoryReference(targetReg, bvi.getNextElement()*4 +dataOffset, cg),7998tempReg, cg);7999}8000}8001else8002{8003int32_t lastElementIndex = -1;8004int32_t nextE = -2;8005int32_t span = 0;8006int32_t lastSpan = -1;8007scratchReg = cg->allocateRegister(TR_FPR);8008generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, scratchReg, scratchReg, cg);8009while (bvi.hasMoreElements())8010{8011nextE = bvi.getNextElement();8012if (-1 == lastElementIndex) lastElementIndex = nextE;8013span = nextE - lastElementIndex;8014TR_ASSERT(span>=0, "SPAN < 0");8015if (span < 3)8016{8017lastSpan = span;8018continue;8019}8020else if (span == 3)8021{8022generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);8023lastSpan = -1;8024lastElementIndex = -1;8025}8026else if (span > 3)8027{8028if (lastSpan == 0)8029{8030generateMemRegInstruction(TR::InstOpCode::MOVDMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);8031}8032else if (lastSpan == 1)8033{8034generateMemRegInstruction(TR::InstOpCode::MOVQMemReg, node,generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);8035}8036else8037{8038generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);8039}8040lastElementIndex = nextE;8041lastSpan = 0;8042}8043}8044if (lastSpan == 0)8045{8046generateMemRegInstruction(TR::InstOpCode::MOVDMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);8047}8048else if (lastSpan == 1)8049{8050generateMemRegInstruction(TR::InstOpCode::MOVQMemReg, node,generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);8051}8052else if (lastSpan == 2)8053{8054TR_ASSERT(dataOffset >= 4, "dataOffset must be >= 4.");8055generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset - 4, cg), scratchReg, cg);8056}8057}80588059useRepInstruction = false;80608061J9JavaVM * jvm = fej9->getJ9JITConfig()->javaVM;8062if (jvm->lockwordMode == LOCKNURSERY_ALGORITHM_ALL_INHERIT)8063monitorSlotIsInitialized = false;8064else8065monitorSlotIsInitialized = true;8066}8067else if ((!initInfo || initInfo->numZeroInitSlots > 0) &&8068!node->canSkipZeroInitialization())8069{8070// Initialize all slots8071//8072if (canUseFastInlineAllocation)8073{8074useRepInstruction = genZeroInitObject2(node, objectSize, elementSize, sizeReg, targetReg, tempReg, segmentReg, scratchReg, cg);8075shouldInitZeroSizedArrayHeader = false;8076}8077else8078{8079useRepInstruction = genZeroInitObject(node, objectSize, elementSize, sizeReg, targetReg, tempReg, segmentReg, scratchReg, cg);8080}80818082J9JavaVM * jvm = fej9->getJ9JITConfig()->javaVM;8083if (jvm->lockwordMode == LOCKNURSERY_ALGORITHM_ALL_INHERIT)8084monitorSlotIsInitialized = false;8085else8086monitorSlotIsInitialized = true;8087}8088else8089{8090// Skip data initialization8091//8092if (canUseFastInlineAllocation)8093{8094// Even though we can skip the data initialization, for arrays of unknown size we still have8095// to initialize at least one slot to cover the discontiguous length field in case the array8096// is zero sized. This is because the length is not checked at runtime and is only needed8097// for non-native 64-bit targets where the discontiguous length slot is already initialized8098// via the contiguous length slot.8099//8100if (node->getOpCodeValue() != TR::New &&8101(comp->target().is32Bit() || comp->useCompressedPointers()))8102{8103generateMemImmInstruction(TR::InstOpCode::SMemImm4(), node,8104generateX86MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg),81050, cg);8106shouldInitZeroSizedArrayHeader = false;8107}8108}81098110monitorSlotIsInitialized = false;8111useRepInstruction = false;8112}8113#ifdef J9VM_GC_NON_ZERO_TLH8114}8115else8116{8117monitorSlotIsInitialized = false;8118useRepInstruction = false;8119}8120#endif81218122// --------------------------------------------------------------------------------8123// Initialize the header8124// --------------------------------------------------------------------------------8125// If dynamic array allocation, must pass in classReg to initialize the array header8126if ((fej9->inlinedAllocationsMustBeVerified() && !comp->getOption(TR_UseSymbolValidationManager) && node->getOpCodeValue() == TR::anewarray) || dynamicArrayAllocation)8127{8128genInitArrayHeader(8129node,8130clazz,8131classReg,8132targetReg,8133sizeReg,8134elementSize,8135dataOffset,8136tempReg,8137monitorSlotIsInitialized,8138true,8139shouldInitZeroSizedArrayHeader,8140cg);8141}8142else if (isArrayNew)8143{8144genInitArrayHeader(8145node,8146clazz,8147NULL,8148targetReg,8149sizeReg,8150elementSize,8151dataOffset,8152tempReg,8153monitorSlotIsInitialized,8154false,8155shouldInitZeroSizedArrayHeader,8156cg);8157}8158else8159{8160genInitObjectHeader(node, clazz, classReg, targetReg, tempReg, monitorSlotIsInitialized, false, cg);8161}8162TR::Register *discontiguousDataAddrOffsetReg = NULL;8163#ifdef TR_TARGET_64BIT8164if (isArrayNew)8165{8166/* Here we'll update dataAddr slot for both fixed and variable length arrays. Fixed length arrays are8167* simple as we just need to check first child of the node for array size. For variable length arrays8168* runtime size checks are needed to determine whether to use contiguous or discontiguous header layout.8169*8170* In both scenarios, arrays of non-zero size use contiguous header layout while zero size arrays use8171* discontiguous header layout.8172*/8173TR::MemoryReference *dataAddrSlotMR = NULL;8174TR::MemoryReference *dataAddrMR = NULL;8175if (TR::Compiler->om.compressObjectReferences() && NULL != sizeReg)8176{8177/* We need to check sizeReg at runtime to determine correct offset of dataAddr field.8178* Here we deal only with compressed refs because dataAddr field offset for discontiguous8179* and contiguous arrays is the same in full refs.8180*/8181if (comp->getOption(TR_TraceCG))8182traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node);81838184TR_ASSERT_FATAL_WITH_NODE(node,8185(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8,8186"Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array. "8187"But was %d bytes for discontigous and %d bytes for contiguous array.\n",8188fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());81898190discontiguousDataAddrOffsetReg = cg->allocateRegister();8191generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, discontiguousDataAddrOffsetReg, discontiguousDataAddrOffsetReg, cg);8192generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, sizeReg, 1, cg);8193generateRegImmInstruction(TR::InstOpCode::ADCRegImm4(), node, discontiguousDataAddrOffsetReg, 0, cg);8194dataAddrMR = generateX86MemoryReference(targetReg, discontiguousDataAddrOffsetReg, 3, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);8195dataAddrSlotMR = generateX86MemoryReference(targetReg, discontiguousDataAddrOffsetReg, 3, fej9->getOffsetOfContiguousDataAddrField(), cg);8196}8197else if (NULL == sizeReg && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0)8198{8199if (comp->getOption(TR_TraceCG))8200traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length zero size array.\n", node);82018202dataAddrMR = generateX86MemoryReference(targetReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg);8203dataAddrSlotMR = generateX86MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg);8204}8205else8206{8207if (comp->getOption(TR_TraceCG))8208{8209traceMsg(comp,8210"Node (%p): Dealing with either full/compressed refs fixed length non-zero size array "8211"or full refs variable length array.\n",8212node);8213}82148215if (!TR::Compiler->om.compressObjectReferences())8216{8217TR_ASSERT_FATAL_WITH_NODE(node,8218fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(),8219"dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. "8220"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",8221fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());8222}82238224dataAddrMR = generateX86MemoryReference(targetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);8225dataAddrSlotMR = generateX86MemoryReference(targetReg, fej9->getOffsetOfContiguousDataAddrField(), cg);8226}82278228// write first data element address to dataAddr slot8229generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tempReg, dataAddrMR, cg);8230generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, dataAddrSlotMR, tempReg, cg);8231}8232#endif /* TR_TARGET_64BIT */82338234if (fej9->inlinedAllocationsMustBeVerified() && (node->getOpCodeValue() == TR::New ||8235node->getOpCodeValue() == TR::anewarray) )8236{8237startInstr = startInstr->getNext();8238TR_OpaqueClassBlock *classToValidate = clazz;82398240TR_RelocationRecordInformation *recordInfo =8241(TR_RelocationRecordInformation *) comp->trMemory()->allocateMemory(sizeof(TR_RelocationRecordInformation), heapAlloc);8242recordInfo->data1 = allocationSize;8243recordInfo->data2 = node->getInlinedSiteIndex();8244recordInfo->data3 = (uintptr_t) failLabel;8245recordInfo->data4 = (uintptr_t) startInstr;82468247TR::SymbolReference * classSymRef;8248TR_ExternalRelocationTargetKind reloKind;82498250if (node->getOpCodeValue() == TR::New)8251{8252classSymRef = node->getFirstChild()->getSymbolReference();8253reloKind = TR_VerifyClassObjectForAlloc;8254}8255else8256{8257classSymRef = node->getSecondChild()->getSymbolReference();8258reloKind = TR_VerifyRefArrayForAlloc;82598260if (comp->getOption(TR_UseSymbolValidationManager))8261classToValidate = comp->fej9()->getComponentClassFromArrayClass(classToValidate);8262}82638264if (comp->getOption(TR_UseSymbolValidationManager))8265{8266TR_ASSERT(classToValidate, "classToValidate should not be NULL, clazz=%p\n", clazz);8267recordInfo->data5 = (uintptr_t)classToValidate;8268}82698270cg->addExternalRelocation(new (cg->trHeapMemory()) TR::BeforeBinaryEncodingExternalRelocation(startInstr,8271(uint8_t *) classSymRef,8272(uint8_t *) recordInfo,8273reloKind, cg),8274__FILE__, __LINE__, node);8275}82768277int32_t numDeps = 4;8278if (classReg)8279numDeps += 2;8280if (sizeReg)8281numDeps += 2;82828283if (scratchReg)8284numDeps++;82858286if (outlinedHelperCall)8287{8288if (node->getOpCodeValue() == TR::New)8289numDeps++;8290else8291numDeps += 2;8292}82938294// Create dependencies for the allocation registers here.8295// The size and class registers, if they exist, must be the first8296// dependencies since the heap allocation snippet needs to find them to grab8297// the real registers from them.8298//8299deps = generateRegisterDependencyConditions((uint8_t)0, numDeps, cg);83008301if (sizeReg)8302deps->addPostCondition(sizeReg, TR::RealRegister::NoReg, cg);8303if (classReg)8304deps->addPostCondition(classReg, TR::RealRegister::NoReg, cg);83058306deps->addPostCondition(targetReg, TR::RealRegister::eax, cg);8307deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);83088309if (useRepInstruction)8310{8311deps->addPostCondition(tempReg, TR::RealRegister::ecx, cg);8312deps->addPostCondition(segmentReg, TR::RealRegister::edi, cg);8313}8314else8315{8316deps->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);8317if (segmentReg)8318deps->addPostCondition(segmentReg, TR::RealRegister::NoReg, cg);8319}83208321if (NULL != discontiguousDataAddrOffsetReg)8322{8323deps->addPostCondition(discontiguousDataAddrOffsetReg, TR::RealRegister::NoReg, cg);8324cg->stopUsingRegister(discontiguousDataAddrOffsetReg);8325}83268327if (scratchReg)8328{8329deps->addPostCondition(scratchReg, TR::RealRegister::NoReg, cg);8330cg->stopUsingRegister(scratchReg);8331}83328333if (outlinedHelperCall)8334{8335TR::Node *callNode = outlinedHelperCall->getCallNode();8336TR::Register *reg;83378338if (callNode->getFirstChild() == node->getFirstChild())8339{8340reg = callNode->getFirstChild()->getRegister();8341if (reg)8342deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);8343}83448345if (node->getOpCodeValue() != TR::New)8346if (callNode->getSecondChild() == node->getSecondChild())8347{8348reg = callNode->getSecondChild()->getRegister();8349if (reg)8350deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);8351}8352}83538354deps->stopAddingConditions();83558356generateLabelInstruction(TR::InstOpCode::label, node, fallThru, deps, cg);83578358if (outlinedHelperCall) // 64bit or TR_newstructRef||TR_anewarraystructRef8359{8360// Copy the newly allocated object into a collected reference register now that it is a valid object.8361//8362TR::Register *targetReg2 = cg->allocateCollectedReferenceRegister();8363TR::RegisterDependencyConditions *deps2 = generateRegisterDependencyConditions(0, 1, cg);8364deps2->addPostCondition(targetReg2, TR::RealRegister::eax, cg);8365generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, targetReg2, targetReg, deps2, cg);8366cg->stopUsingRegister(targetReg);8367targetReg = targetReg2;8368}83698370cg->stopUsingRegister(segmentReg);8371cg->stopUsingRegister(tempReg);83728373// Decrement use counts on the children8374//8375cg->decReferenceCount(node->getFirstChild());8376if (isArrayNew)8377cg->decReferenceCount(node->getSecondChild());83788379node->setRegister(targetReg);8380return targetReg;8381}838283838384// Generate instructions to type-check a store into a reference-type array.8385// The code sequence determines if the destination is an array of "java/lang/Object" instances,8386// or if the source object has the correct type (i.e. equal to the component type of the array).8387//8388void8389J9::X86::TreeEvaluator::VMarrayStoreCHKEvaluator(8390TR::Node *node,8391TR::Node *sourceChild,8392TR::Node *destinationChild,8393TR_X86ScratchRegisterManager *scratchRegisterManager,8394TR::LabelSymbol *wrtbarLabel,8395TR::Instruction *prevInstr,8396TR::CodeGenerator *cg)8397{8398TR::Compilation *comp = cg->comp();8399TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());8400TR::Register *sourceReg = sourceChild->getRegister();8401TR::Register *destReg = destinationChild->getRegister();8402TR::LabelSymbol *helperCallLabel = generateLabelSymbol(cg);84038404static char *disableArrayStoreCheckOpts = feGetEnv("TR_disableArrayStoreCheckOpts");8405if (!disableArrayStoreCheckOpts || !debug("noInlinedArrayStoreCHKs"))8406{8407// If the component type of the array is equal to the type of the source reference,8408// then the store always succeeds. The component type of the array is stored in a8409// field of the J9ArrayClass that represents the type of the array.8410//84118412TR::Register *sourceClassReg = scratchRegisterManager->findOrCreateScratchRegister();8413TR::Register *destComponentClassReg = scratchRegisterManager->findOrCreateScratchRegister();84148415TR::Instruction* instr;84168417if (TR::Compiler->om.compressObjectReferences())8418{84198420// FIXME: Add check for hint when doing the arraystore check as below when class pointer compression8421// is enabled.84228423TR::MemoryReference *destTypeMR = generateX86MemoryReference(destReg, TR::Compiler->om.offsetOfObjectVftField(), cg);84248425generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, destComponentClassReg, destTypeMR, cg); // class pointer is 32 bits8426TR::TreeEvaluator::generateVFTMaskInstruction(node, destComponentClassReg, cg);84278428// -------------------------------------------------------------------------8429//8430// If the component type is java.lang.Object then the store always succeeds.8431//8432// -------------------------------------------------------------------------84338434TR_OpaqueClassBlock *objectClass = fej9->getSystemClassFromClassName("java/lang/Object", 16);84358436TR_ASSERT((((uintptr_t)objectClass) >> 32) == 0, "TR_OpaqueClassBlock must fit on 32 bits when using class pointer compression");8437instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (uint32_t) ((uint64_t) objectClass), cg);84388439generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);84408441// HCR in VMarrayStoreCHKEvaluator8442if (cg->wantToPatchClassPointer(objectClass, node))8443comp->getStaticHCRPICSites()->push_front(instr);84448445// here we may have to convert the TR_OpaqueClassBlock into a J9Class pointer8446// and store it in destComponentClassReg8447// ..84488449TR::MemoryReference *destCompTypeMR =8450generateX86MemoryReference(destComponentClassReg, offsetof(J9ArrayClass, componentType), cg);8451generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, destComponentClassReg, destCompTypeMR, cg);84528453// here we may have to convert the J9Class pointer from destComponentClassReg into8454// a TR_OpaqueClassBlock and store it back into destComponentClassReg8455// ..84568457TR::MemoryReference *sourceRegClassMR = generateX86MemoryReference(sourceReg, TR::Compiler->om.offsetOfObjectVftField(), cg);8458generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, sourceClassReg, sourceRegClassMR, cg);8459TR::TreeEvaluator::generateVFTMaskInstruction(node, sourceClassReg, cg);84608461generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, destComponentClassReg, sourceClassReg, cg); // compare only 32 bits8462generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);84638464// -------------------------------------------------------------------------8465// // Check the source class cast cache8466//8467// -------------------------------------------------------------------------84688469generateMemRegInstruction(8470TR::InstOpCode::CMP4MemReg,8471node,8472generateX86MemoryReference(sourceClassReg, offsetof(J9Class, castClassCache), cg), destComponentClassReg, cg);8473}8474else // no class pointer compression8475{8476TR::MemoryReference *sourceClassMR = generateX86MemoryReference(sourceReg, TR::Compiler->om.offsetOfObjectVftField(), cg);8477generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, sourceClassReg, sourceClassMR, cg);8478TR::TreeEvaluator::generateVFTMaskInstruction(node, sourceClassReg, cg);84798480TR::MemoryReference *destClassMR = generateX86MemoryReference(destReg, TR::Compiler->om.offsetOfObjectVftField(), cg);8481generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, destComponentClassReg, destClassMR, cg);8482TR::TreeEvaluator::generateVFTMaskInstruction(node, destComponentClassReg, cg);8483TR::MemoryReference *destCompTypeMR =8484generateX86MemoryReference(destComponentClassReg, offsetof(J9ArrayClass, componentType), cg);8485generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, destComponentClassReg, destCompTypeMR, cg);84868487generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, destComponentClassReg, sourceClassReg, cg);8488generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);84898490// -------------------------------------------------------------------------8491//8492// Check the source class cast cache8493//8494// -------------------------------------------------------------------------84958496generateMemRegInstruction(8497TR::InstOpCode::CMPMemReg(),8498node,8499generateX86MemoryReference(sourceClassReg, offsetof(J9Class, castClassCache), cg), destComponentClassReg, cg);8500}8501generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);85028503instr = NULL;8504/*8505TR::Instruction *instr;850685078508// -------------------------------------------------------------------------8509//8510// If the component type is java.lang.Object then the store always succeeds.8511//8512// -------------------------------------------------------------------------85138514TR_OpaqueClassBlock *objectClass = fej9->getSystemClassFromClassName("java/lang/Object", 16);85158516if (comp->target().is64Bit())8517{8518if (TR::Compiler->om.compressObjectReferences())8519{8520TR_ASSERT((((uintptr_t)objectClass) >> 32) == 0, "TR_OpaqueClassBlock must fit on 32 bits when using class pointer compression");8521instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (uint32_t) ((uint64_t) objectClass), cg);8522}8523else // 64 bit but no class pointer compression8524{8525if ((uintptr_t)objectClass <= (uintptr_t)0x7fffffff)8526{8527instr = generateRegImmInstruction(TR::InstOpCode::CMP8RegImm4, node, destComponentClassReg, (uintptr_t) objectClass, cg);8528}8529else8530{8531TR::Register *objectClassReg = scratchRegisterManager->findOrCreateScratchRegister();8532instr = generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, objectClassReg, (uintptr_t) objectClass, cg);8533generateRegRegInstruction(TR::InstOpCode::CMP8RegReg, node, destComponentClassReg, objectClassReg, cg);8534scratchRegisterManager->reclaimScratchRegister(objectClassReg);8535}8536}8537}8538else8539{8540instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (int32_t)(uintptr_t) objectClass, cg);8541}85428543generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);85448545// HCR in VMarrayStoreCHKEvaluator8546if (cg->wantToPatchClassPointer(objectClass, node))8547comp->getStaticHCRPICSites()->push_front(instr);8548*/854985508551// ---------------------------------------------8552//8553// If isInstanceOf (objectClass,ArrayComponentClass,true,true) was successful and stored during VP, we need to test again the real arrayComponentClass8554// Need to relocate address of arrayComponentClass under aot sharedcache8555// Need to possibility of class unloading.8556// --------------------------------------------855785588559if (!(comp->getOption(TR_DisableArrayStoreCheckOpts)) && node->getArrayComponentClassInNode() )8560{8561TR_OpaqueClassBlock *arrayComponentClass = (TR_OpaqueClassBlock *) node->getArrayComponentClassInNode();8562if (comp->target().is64Bit())8563{8564if (TR::Compiler->om.compressObjectReferences())8565{8566TR_ASSERT((((uintptr_t)arrayComponentClass) >> 32) == 0, "TR_OpaqueClassBlock must fit on 32 bits when using class pointer compression");8567instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (uint32_t) ((uint64_t) arrayComponentClass), cg);85688569if (fej9->isUnloadAssumptionRequired(arrayComponentClass, comp->getCurrentMethod()))8570comp->getStaticPICSites()->push_front(instr);85718572}8573else // 64 bit but no class pointer compression8574{8575if ((uintptr_t)arrayComponentClass <= (uintptr_t)0x7fffffff)8576{8577instr = generateRegImmInstruction(TR::InstOpCode::CMP8RegImm4, node, destComponentClassReg, (uintptr_t) arrayComponentClass, cg);8578if (fej9->isUnloadAssumptionRequired(arrayComponentClass, comp->getCurrentMethod()))8579comp->getStaticPICSites()->push_front(instr);85808581}8582else8583{8584TR::Register *arrayComponentClassReg = scratchRegisterManager->findOrCreateScratchRegister();8585instr = generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, arrayComponentClassReg, (uintptr_t) arrayComponentClass, cg);8586generateRegRegInstruction(TR::InstOpCode::CMP8RegReg, node, destComponentClassReg, arrayComponentClassReg, cg);8587scratchRegisterManager->reclaimScratchRegister(arrayComponentClassReg);8588}8589}8590}8591else8592{8593instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (int32_t)(uintptr_t) arrayComponentClass, cg);8594if (fej9->isUnloadAssumptionRequired(arrayComponentClass, comp->getCurrentMethod()))8595comp->getStaticPICSites()->push_front(instr);85968597}85988599generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);86008601// HCR in VMarrayStoreCHKEvaluator8602if (cg->wantToPatchClassPointer(arrayComponentClass, node))8603comp->getStaticHCRPICSites()->push_front(instr);86048605}86068607860886098610// For compressed references:8611// destComponentClassReg contains the class offset so we may need to generate code8612// to convert from class offset to real J9Class pointer86138614// -------------------------------------------------------------------------8615//8616// Compare source and dest class depths8617//8618// -------------------------------------------------------------------------86198620// Get the depth of array component type in testerReg8621//8622bool eliminateDepthMask = (J9AccClassDepthMask == 0xffff);8623TR::MemoryReference *destComponentClassDepthMR =8624generateX86MemoryReference(destComponentClassReg, offsetof(J9Class,classDepthAndFlags), cg);86258626// DMDM 32-bit only???8627if (comp->target().is32Bit())8628{8629scratchRegisterManager->reclaimScratchRegister(destComponentClassReg);8630}86318632TR::Register *destComponentClassDepthReg = scratchRegisterManager->findOrCreateScratchRegister();86338634if (eliminateDepthMask)8635{8636if (comp->target().is64Bit())8637generateRegMemInstruction(TR::InstOpCode::MOVZXReg8Mem2, node, destComponentClassDepthReg, destComponentClassDepthMR, cg);8638else8639generateRegMemInstruction(TR::InstOpCode::MOVZXReg4Mem2, node, destComponentClassDepthReg, destComponentClassDepthMR, cg);8640}8641else8642{8643generateRegMemInstruction(8644TR::InstOpCode::LRegMem(),8645node,8646destComponentClassDepthReg,8647destComponentClassDepthMR, cg);8648}86498650if (!eliminateDepthMask)8651{8652if (comp->target().is64Bit())8653{8654TR_ASSERT(!(J9AccClassDepthMask & 0x80000000), "AMD64: need to use a second register for AND mask");8655if (!(J9AccClassDepthMask & 0x80000000))8656generateRegImmInstruction(TR::InstOpCode::AND8RegImm4, node, destComponentClassDepthReg, J9AccClassDepthMask, cg);8657}8658else8659{8660generateRegImmInstruction(TR::InstOpCode::AND4RegImm4, node, destComponentClassDepthReg, J9AccClassDepthMask, cg);8661}8662}86638664// For compressed references:8665// temp2 contains the class offset so we may need to generate code8666// to convert from class offset to real J9Class pointer86678668// Get the depth of type of object being stored into the array in testerReg28669//86708671TR::MemoryReference *mr = generateX86MemoryReference(sourceClassReg, offsetof(J9Class,classDepthAndFlags), cg);86728673// There aren't enough registers available on 32-bit across this internal8674// control flow region. Give one back and manually and force the source8675// class to be rematerialized later.8676//8677if (comp->target().is32Bit())8678{8679scratchRegisterManager->reclaimScratchRegister(sourceClassReg);8680}86818682TR::Register *sourceClassDepthReg = NULL;8683if (eliminateDepthMask)8684{8685generateMemRegInstruction(TR::InstOpCode::CMP2MemReg, node, mr, destComponentClassDepthReg, cg);8686}8687else8688{8689sourceClassDepthReg = scratchRegisterManager->findOrCreateScratchRegister();8690generateRegMemInstruction(8691TR::InstOpCode::LRegMem(),8692node,8693sourceClassDepthReg,8694mr, cg);86958696if (comp->target().is64Bit())8697{8698TR_ASSERT(!(J9AccClassDepthMask & 0x80000000), "AMD64: need to use a second register for AND mask");8699if (!(J9AccClassDepthMask & 0x80000000))8700generateRegImmInstruction(TR::InstOpCode::AND8RegImm4, node, sourceClassDepthReg, J9AccClassDepthMask, cg);8701}8702else8703{8704generateRegImmInstruction(TR::InstOpCode::AND4RegImm4, node, sourceClassDepthReg, J9AccClassDepthMask, cg);8705}8706generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, sourceClassDepthReg, destComponentClassDepthReg, cg);8707}87088709/*TR::Register *sourceClassDepthReg = scratchRegisterManager->findOrCreateScratchRegister();8710generateRegMemInstruction(8711TR::InstOpCode::LRegMem(),8712node,8713sourceClassDepthReg,8714mr, cg);87158716if (comp->target().is64Bit())8717{8718TR_ASSERT(!(J9AccClassDepthMask & 0x80000000), "AMD64: need to use a second register for AND mask");8719if (!(J9AccClassDepthMask & 0x80000000))8720generateRegImmInstruction(TR::InstOpCode::AND8RegImm4, node, sourceClassDepthReg, J9AccClassDepthMask, cg);8721}8722else8723{8724generateRegImmInstruction(TR::InstOpCode::AND4RegImm4, node, sourceClassDepthReg, J9AccClassDepthMask, cg);8725}87268727generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, sourceClassDepthReg, destComponentClassDepthReg, cg);*/87288729generateLabelInstruction(TR::InstOpCode::JBE4, node, helperCallLabel, cg);8730if (sourceClassDepthReg != NULL)8731scratchRegisterManager->reclaimScratchRegister(sourceClassDepthReg);873287338734// For compressed references:8735// destComponentClassReg contains the class offset so we may need to generate code8736// to convert from class offset to real J9Class pointer87378738if (comp->target().is32Bit())8739{8740// Rematerialize the source class.8741//8742sourceClassReg = scratchRegisterManager->findOrCreateScratchRegister();8743TR::MemoryReference *sourceClassMR = generateX86MemoryReference(sourceReg, TR::Compiler->om.offsetOfObjectVftField(), cg);8744generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, sourceClassReg, sourceClassMR, cg);8745TR::TreeEvaluator::generateVFTMaskInstruction(node, sourceClassReg, cg);8746}87478748TR::MemoryReference *tempMR = generateX86MemoryReference(sourceClassReg, offsetof(J9Class,superclasses), cg);87498750if (comp->target().is32Bit())8751{8752scratchRegisterManager->reclaimScratchRegister(sourceClassReg);8753}87548755TR::Register *sourceSuperClassReg = scratchRegisterManager->findOrCreateScratchRegister();87568757generateRegMemInstruction(8758TR::InstOpCode::LRegMem(),8759node,8760sourceSuperClassReg,8761tempMR,8762cg);87638764TR::MemoryReference *leaMR =8765generateX86MemoryReference(sourceSuperClassReg, destComponentClassDepthReg, logBase2(sizeof(uintptr_t)), 0, cg);87668767// For compressed references:8768// leaMR is a memory reference to a J9Class8769// destComponentClassReg contains a TR_OpaqueClassBlock8770// We may need to convert superClass to a class offset before doing the comparison87718772if (comp->target().is32Bit())8773{87748775generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, sourceSuperClassReg, leaMR, cg);87768777// Rematerialize destination component class8778//8779TR::MemoryReference *destClassMR = generateX86MemoryReference(destReg, TR::Compiler->om.offsetOfObjectVftField(), cg);87808781generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, destComponentClassReg, destClassMR, cg);8782TR::TreeEvaluator::generateVFTMaskInstruction(node, destComponentClassReg, cg);8783TR::MemoryReference *destCompTypeMR =8784generateX86MemoryReference(destComponentClassReg, offsetof(J9ArrayClass, componentType), cg);87858786generateMemRegInstruction(TR::InstOpCode::CMPMemReg(), node, destCompTypeMR, sourceSuperClassReg, cg);8787}8788else8789{8790generateRegMemInstruction(TR::InstOpCode::CMP4RegMem, node, destComponentClassReg, leaMR, cg);8791}87928793scratchRegisterManager->reclaimScratchRegister(destComponentClassReg);8794scratchRegisterManager->reclaimScratchRegister(destComponentClassDepthReg);8795scratchRegisterManager->reclaimScratchRegister(sourceClassReg);8796scratchRegisterManager->reclaimScratchRegister(sourceSuperClassReg);87978798generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);8799}88008801// The fast paths failed; execute the type-check helper call.8802//8803TR::LabelSymbol* helperReturnLabel = generateLabelSymbol(cg);8804TR::Node *helperCallNode = TR::Node::createWithSymRef(TR::call, 2, 2, sourceChild, destinationChild, node->getSymbolReference());8805helperCallNode->copyByteCodeInfo(node);8806generateLabelInstruction(TR::InstOpCode::JMP4, helperCallNode, helperCallLabel, cg);8807TR_OutlinedInstructions* outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(helperCallNode, TR::call, NULL, helperCallLabel, helperReturnLabel, cg);8808cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);8809generateLabelInstruction(TR::InstOpCode::label, helperCallNode, helperReturnLabel, cg);8810cg->decReferenceCount(sourceChild);8811cg->decReferenceCount(destinationChild);8812}881388148815// Check that two objects are compatible for use in an arraycopy operation.8816// If not, an ArrayStoreException is thrown.8817//8818TR::Register *J9::X86::TreeEvaluator::VMarrayCheckEvaluator(TR::Node *node, TR::CodeGenerator *cg)8819{8820TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());8821bool use64BitClasses = cg->comp()->target().is64Bit() && !TR::Compiler->om.generateCompressedObjectHeaders();88228823TR::Node *object1 = node->getFirstChild();8824TR::Node *object2 = node->getSecondChild();8825TR::Register *object1Reg = cg->evaluate(object1);8826TR::Register *object2Reg = cg->evaluate(object2);88278828TR::LabelSymbol *startLabel = generateLabelSymbol(cg);8829TR::LabelSymbol *fallThrough = generateLabelSymbol(cg);8830TR::Instruction *instr;8831TR::LabelSymbol *snippetLabel = NULL;8832TR::Snippet *snippet = NULL;8833TR::Register *tempReg = cg->allocateRegister();88348835startLabel->setStartInternalControlFlow();8836fallThrough->setEndInternalControlFlow();8837generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);88388839// If the objects are the same and one of them is known to be an array, they8840// are compatible.8841//8842if (node->isArrayChkPrimitiveArray1() ||8843node->isArrayChkReferenceArray1() ||8844node->isArrayChkPrimitiveArray2() ||8845node->isArrayChkReferenceArray2())8846{8847generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, object1Reg, object2Reg, cg);8848generateLabelInstruction(TR::InstOpCode::JE4, node, fallThrough, cg);8849}88508851else8852{8853// Neither object is known to be an array8854// Check that object 1 is an array. If not, throw exception.8855//8856TR::InstOpCode::Mnemonic testOpCode;8857if ((J9AccClassRAMArray >= CHAR_MIN) && (J9AccClassRAMArray <= CHAR_MAX))8858testOpCode = TR::InstOpCode::TEST1MemImm1;8859else8860testOpCode = TR::InstOpCode::TEST4MemImm4;88618862if (TR::Compiler->om.compressObjectReferences())8863generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);8864else8865generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);88668867TR::TreeEvaluator::generateVFTMaskInstruction(node, tempReg, cg);8868generateMemImmInstruction(testOpCode, node, generateX86MemoryReference(tempReg, offsetof(J9Class, classDepthAndFlags), cg), J9AccClassRAMArray, cg);8869if (!snippetLabel)8870{8871snippetLabel = generateLabelSymbol(cg);8872instr = generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);8873snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(), snippetLabel, instr);8874cg->addSnippet(snippet);8875}8876else8877generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);8878}88798880// Test equality of the object classes.8881//8882generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);8883generateRegMemInstruction(TR::InstOpCode::XORRegMem(use64BitClasses), node, tempReg, generateX86MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);8884TR::TreeEvaluator::generateVFTMaskInstruction(node, tempReg, cg);88858886// If either object is known to be a primitive array, we are done. Either8887// the equality test fails and we throw the exception or it succeeds and8888// we finish.8889//8890if (node->isArrayChkPrimitiveArray1() || node->isArrayChkPrimitiveArray2())8891{8892if (!snippetLabel)8893{8894snippetLabel = generateLabelSymbol(cg);8895instr = generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);8896snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(), snippetLabel, instr);8897cg->addSnippet(snippet);8898}8899else8900generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);8901}89028903// Otherwise, there is more testing to do. If the classes are equal we8904// are done, and branch to the fallThrough label.8905//8906else8907{8908generateLabelInstruction(TR::InstOpCode::JE4, node, fallThrough, cg);89098910// If either object is not known to be a reference array type, check it8911// We already know that object1 is an array type but we may have to now8912// check object2.8913//8914if (!node->isArrayChkReferenceArray1())8915{89168917if (TR::Compiler->om.compressObjectReferences())8918generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);8919else8920generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);89218922TR::TreeEvaluator::generateVFTMaskInstruction(node, tempReg, cg);8923generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(tempReg, offsetof(J9Class, classDepthAndFlags), cg), cg);8924// X = (ramclass->ClassDepthAndFlags)>>J9AccClassRAMShapeShift89258926// X & OBJECT_HEADER_SHAPE_MASK8927generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, tempReg, (OBJECT_HEADER_SHAPE_MASK << J9AccClassRAMShapeShift), cg);8928generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (OBJECT_HEADER_SHAPE_POINTERS << J9AccClassRAMShapeShift), cg);89298930if (!snippetLabel)8931{8932snippetLabel = generateLabelSymbol(cg);8933instr = generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);8934snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(), snippetLabel, instr);8935cg->addSnippet(snippet);8936}8937else8938generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);8939}8940if (!node->isArrayChkReferenceArray2())8941{8942// Check that object 2 is an array. If not, throw exception.8943//8944TR::InstOpCode::Mnemonic testOpCode;8945if ((J9AccClassRAMArray >= CHAR_MIN) && (J9AccClassRAMArray <= CHAR_MAX))8946testOpCode = TR::InstOpCode::TEST1MemImm1;8947else8948testOpCode = TR::InstOpCode::TEST4MemImm4;89498950// Check that object 2 is an array. If not, throw exception.8951//8952if (TR::Compiler->om.compressObjectReferences())8953generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, tempReg, generateX86MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);8954else8955generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);8956TR::TreeEvaluator::generateVFTMaskInstruction(node, tempReg, cg);8957generateMemImmInstruction(testOpCode, node, generateX86MemoryReference(tempReg, offsetof(J9Class, classDepthAndFlags), cg), J9AccClassRAMArray, cg);8958if (!snippetLabel)8959{8960snippetLabel = generateLabelSymbol(cg);8961instr = generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);8962snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(), snippetLabel, instr);8963cg->addSnippet(snippet);8964}8965else8966generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);89678968generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(tempReg, offsetof(J9Class, classDepthAndFlags), cg), cg);8969generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, tempReg, (OBJECT_HEADER_SHAPE_MASK << J9AccClassRAMShapeShift), cg);8970generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (OBJECT_HEADER_SHAPE_POINTERS << J9AccClassRAMShapeShift), cg);89718972generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);8973}89748975// Now both objects are known to be reference arrays, so they are8976// compatible for arraycopy.8977}89788979// Now generate the fall-through label8980//8981TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)4, cg);8982deps->addPostCondition(object1Reg, TR::RealRegister::NoReg, cg);8983deps->addPostCondition(object2Reg, TR::RealRegister::NoReg, cg);8984deps->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);8985deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);89868987generateLabelInstruction(TR::InstOpCode::label, node, fallThrough, deps, cg);89888989cg->stopUsingRegister(tempReg);8990cg->decReferenceCount(object1);8991cg->decReferenceCount(object2);89928993return NULL;8994}899589968997#ifdef LINUX8998#if defined(TR_TARGET_32BIT)8999static void9000addFPXMMDependencies(9001TR::CodeGenerator *cg,9002TR::RegisterDependencyConditions *dependencies)9003{9004TR_LiveRegisters *lr = cg->getLiveRegisters(TR_FPR);9005if (!lr || lr->getNumberOfLiveRegisters() > 0)9006{9007for (int regIndex = TR::RealRegister::FirstXMMR; regIndex <= TR::RealRegister::LastXMMR; regIndex++)9008{9009TR::Register *dummy = cg->allocateRegister(TR_FPR);9010dummy->setPlaceholderReg();9011dependencies->addPostCondition(dummy, (TR::RealRegister::RegNum)regIndex, cg);9012cg->stopUsingRegister(dummy);9013}9014}9015}9016#endif90179018#define J9TIME_NANOSECONDS_PER_SECOND ((I_64) 1000000000)9019#if defined(TR_TARGET_64BIT)9020static bool9021inlineNanoTime(9022TR::Node *node,9023TR::CodeGenerator *cg)9024{9025TR::Compilation *comp = cg->comp();9026TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());90279028if (debug("traceInlInlining"))9029diagnostic("nanoTime called by %s\n", comp->signature());90309031if (fej9->supportsFastNanoTime())9032{ // Fully Inlined Version90339034// First, evaluate resultAddress if provided. There's no telling how9035// many regs that address computation needs, so let's get it out of the9036// way before we start using registers for other things.9037//90389039TR::Register *resultAddress;9040if (node->getNumChildren() == 1)9041{9042resultAddress = cg->evaluate(node->getFirstChild());9043}9044else9045{9046TR_ASSERT(node->getNumChildren() == 0, "nanoTime must have zero or one children");9047resultAddress = NULL;9048}90499050TR::SymbolReference *gtod = comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_AMD64clockGetTime);9051TR::Node *timevalNode = TR::Node::createWithSymRef(node, TR::loadaddr, 0, cg->getNanoTimeTemp());9052TR::Node *clockSourceNode = TR::Node::create(node, TR::iconst, 0, CLOCK_MONOTONIC);9053TR::Node *callNode = TR::Node::createWithSymRef(TR::call, 2, 2, clockSourceNode, timevalNode, gtod);9054// TODO: Use performCall9055TR::Linkage *linkage = cg->getLinkage(gtod->getSymbol()->getMethodSymbol()->getLinkageConvention());9056linkage->buildDirectDispatch(callNode, false);90579058TR::Register *result = cg->allocateRegister();9059TR::Register *reg = cg->allocateRegister();90609061TR::MemoryReference *tv_sec;90629063// result = tv_sec * 1,000,000,000 (converts seconds to nanoseconds)90649065tv_sec = generateX86MemoryReference(timevalNode, cg, false);9066generateRegMemInstruction(TR::InstOpCode::L8RegMem, node, result, tv_sec, cg);9067generateRegRegImmInstruction(TR::InstOpCode::IMUL8RegRegImm4, node, result, result, J9TIME_NANOSECONDS_PER_SECOND, cg);90689069// reg = tv_usec9070generateRegMemInstruction(TR::InstOpCode::L8RegMem, node, reg, generateX86MemoryReference(*tv_sec, offsetof(struct timespec, tv_nsec), cg), cg);90719072// result = reg + result9073generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, result, generateX86MemoryReference(reg, result, 0, cg), cg);90749075cg->stopUsingRegister(reg);90769077// Store the result to memory if necessary9078if (resultAddress)9079{9080generateMemRegInstruction(TR::InstOpCode::S8MemReg, node, generateX86MemoryReference(resultAddress, 0, cg), result, cg);90819082cg->decReferenceCount(node->getFirstChild());9083if (node->getReferenceCount() == 1 && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCodeValue() == TR::treetop)9084{9085// Result is not needed in a register, so free it up9086//9087cg->stopUsingRegister(result);9088result = NULL;9089}9090}90919092node->setRegister(result);90939094return true;9095}9096else9097{ // Inlined call to Port Library9098return false;9099}9100}9101#else // !64bit9102static bool9103inlineNanoTime(9104TR::Node *node,9105TR::CodeGenerator *cg)9106{9107TR::Compilation *comp = cg->comp();9108TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());91099110if (debug("traceInlInlining"))9111diagnostic("nanoTime called by %s\n", comp->signature());91129113TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp);9114TR::Register *vmThreadReg = cg->getVMThreadRegister();9115TR::Register *temp2 = 0;91169117if (fej9->supportsFastNanoTime())9118{9119TR::Register *resultAddress;9120if (node->getNumChildren() == 1)9121{9122resultAddress = cg->evaluate(node->getFirstChild());9123generateRegInstruction(TR::InstOpCode::PUSHReg, node, resultAddress, cg);9124generateImmInstruction(TR::InstOpCode::PUSHImm4, node, CLOCK_MONOTONIC, cg);9125}9126else9127{9128// Leave space on the stack for the 64-bit result9129//91309131generateRegImmInstruction(TR::InstOpCode::SUB4RegImms, node, espReal, 8, cg);91329133resultAddress = cg->allocateRegister();9134generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, resultAddress, espReal, cg); // save away esp before the push9135generateRegInstruction(TR::InstOpCode::PUSHReg, node, resultAddress, cg);9136generateImmInstruction(TR::InstOpCode::PUSHImm4, node, CLOCK_MONOTONIC, cg);9137cg->stopUsingRegister(resultAddress);9138resultAddress = espReal;9139}91409141// 64-bit issues on the call instructions below91429143// Build register dependencies and call the method in the system library9144// directly. Since this is a "C"-style call, ebx, esi and edi are preserved9145//9146int32_t extraFPDeps = (uint8_t)(TR::RealRegister::LastXMMR - TR::RealRegister::FirstXMMR+1);9147TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)4 + extraFPDeps, cg);9148TR::Register *temp1 = cg->allocateRegister();9149deps->addPostCondition(temp1, TR::RealRegister::eax, cg);9150cg->stopUsingRegister(temp1);9151temp1 = cg->allocateRegister();9152deps->addPostCondition(temp1, TR::RealRegister::ecx, cg);9153cg->stopUsingRegister(temp1);9154temp1 = cg->allocateRegister();9155deps->addPostCondition(temp1, TR::RealRegister::edx, cg);9156cg->stopUsingRegister(temp1);9157deps->addPostCondition(cg->getMethodMetaDataRegister(), TR::RealRegister::ebp, cg);91589159// add the XMM dependencies9160addFPXMMDependencies(cg, deps);9161deps->stopAddingConditions();91629163TR::X86ImmInstruction *callInstr = generateImmInstruction(TR::InstOpCode::CALLImm4, node, (int32_t)&clock_gettime, deps, cg);91649165generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, espReal, 8, cg);91669167TR::Register *eaxReal = cg->allocateRegister();9168TR::Register *edxReal = cg->allocateRegister();91699170// load usec to a register9171TR::Register *reglow = cg->allocateRegister();9172generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, reglow, generateX86MemoryReference(resultAddress, 4, cg), cg);917391749175TR::RegisterDependencyConditions *dep1 = generateRegisterDependencyConditions((uint8_t)2, 2, cg);9176dep1->addPreCondition(eaxReal, TR::RealRegister::eax, cg);9177dep1->addPreCondition(edxReal, TR::RealRegister::edx, cg);9178dep1->addPostCondition(eaxReal, TR::RealRegister::eax, cg);9179dep1->addPostCondition(edxReal, TR::RealRegister::edx, cg);918091819182// load second to eax then multiply by 1,000,000,00091839184generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, edxReal, generateX86MemoryReference(resultAddress, 0, cg), cg);9185generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, eaxReal, J9TIME_NANOSECONDS_PER_SECOND, cg);9186generateRegRegInstruction(TR::InstOpCode::IMUL4AccReg, node, eaxReal, edxReal, dep1, cg);918791889189// add the two parts then store it back9190generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, eaxReal, reglow, cg);9191generateRegImmInstruction(TR::InstOpCode::ADC4RegImm4, node, edxReal, 0x0, cg);9192generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, generateX86MemoryReference(resultAddress, 0, cg), eaxReal, cg);9193generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, generateX86MemoryReference(resultAddress, 4, cg), edxReal, cg);91949195cg->stopUsingRegister(eaxReal);9196cg->stopUsingRegister(edxReal);9197cg->stopUsingRegister(reglow);91989199TR::Register *lowReg = cg->allocateRegister();9200TR::Register *highReg = cg->allocateRegister();92019202if (node->getNumChildren() == 1)9203{9204if (node->getReferenceCount() > 1 ||9205cg->getCurrentEvaluationTreeTop()->getNode()->getOpCodeValue() != TR::treetop)9206{9207generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, lowReg, generateX86MemoryReference(resultAddress, 0, cg), cg);9208generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, highReg, generateX86MemoryReference(resultAddress, 4, cg), cg);92099210TR::RegisterPair *result = cg->allocateRegisterPair(lowReg, highReg);9211node->setRegister(result);9212}9213cg->decReferenceCount(node->getFirstChild());9214}9215else9216{9217// The result of the call is now on the stack. Get it into registers.9218//9219generateRegInstruction(TR::InstOpCode::POPReg, node, lowReg, cg);9220generateRegInstruction(TR::InstOpCode::POPReg, node, highReg, cg);9221TR::RegisterPair *result = cg->allocateRegisterPair(lowReg, highReg);9222node->setRegister(result);9223}9224}9225else9226{9227// This code is busted. The hires clock is measured in microseconds, not9228// nanoseconds, and this code doesn't correct for that. The above code9229// will be faster anyway, and it should be upgraded to support AOT, so9230// then we'll never need the hires clock version again.9231static char *useHiResClock = feGetEnv("TR_useHiResClock");9232if (!useHiResClock)9233return false;9234// Leave space on the stack for the 64-bit result9235//9236temp2 = cg->allocateRegister();9237generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, temp2, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, javaVM), cg), cg);9238generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, temp2, generateX86MemoryReference(temp2, offsetof(J9JavaVM, portLibrary), cg), cg);9239generateRegInstruction(TR::InstOpCode::PUSHReg, node, espReal, cg);9240generateRegInstruction(TR::InstOpCode::PUSHReg, node, temp2, cg);92419242int32_t extraFPDeps = (uint8_t)(TR::RealRegister::LastXMMR - TR::RealRegister::FirstXMMR+1);92439244// Build register dependencies and call the method in the port library9245// directly. Since this is a "C"-style call, ebx, esi and edi are preserved9246//9247TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)4 + extraFPDeps, cg);9248TR::Register *temp1 = cg->allocateRegister();9249deps->addPostCondition(temp1, TR::RealRegister::ecx, cg);9250cg->stopUsingRegister(temp1);92519252TR::Register *lowReg = cg->allocateRegister();9253deps->addPostCondition(lowReg, TR::RealRegister::eax, cg);92549255TR::Register *highReg = cg->allocateRegister();9256deps->addPostCondition(highReg, TR::RealRegister::edx, cg);92579258deps->addPostCondition(cg->getMethodMetaDataRegister(), TR::RealRegister::ebp, cg);92599260// add the XMM dependencies9261addFPXMMDependencies(cg, deps);9262deps->stopAddingConditions();92639264generateCallMemInstruction(TR::InstOpCode::CALLMem, node, generateX86MemoryReference(temp2, offsetof(OMRPortLibrary, time_hires_clock), cg), deps, cg);9265cg->stopUsingRegister(temp2);92669267generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, espReal, 8, cg);92689269TR::RegisterPair *result = cg->allocateRegisterPair(lowReg, highReg);9270node->setRegister(result);9271}92729273return true;9274}9275#endif9276#endif // LINUX92779278// Convert serial String.hashCode computation into vectorization copy and implement with SSE instruction9279//9280// Conversion process example:9281//9282// str[8] = example string representing 8 characters (compressed or decompressed)9283//9284// The serial method for creating the hash:9285// hash = 0, offset = 0, count = 89286// for (int i = offset; i < offset+count; ++i) {9287// hash = (hash << 5) - hash + str[i];9288// }9289//9290// Note that ((hash << 5) - hash) is equivalent to hash * 319291//9292// Expanding out the for loop:9293// hash = ((((((((0*31+str[0])*31+str[1])*31+str[2])*31+str[3])*31+str[4])*31+str[5])*31+str[6])*31+str[7])9294//9295// Simplified:9296// hash = (31^7)*str[0] + (31^6)*str[1] + (31^5)*str[2] + (31^4)*str[3]9297// + (31^3)*str[4] + (31^2)*str[5] + (31^1)*str[6] + (31^0)*str[7]9298//9299// Rearranged:9300// hash = (31^7)*str[0] + (31^3)*str[4]9301// + (31^6)*str[1] + (31^2)*str[5]9302// + (31^5)*str[2] + (31^1)*str[6]9303// + (31^4)*str[3] + (31^0)*str[7]9304//9305// Factor out [31^3, 31^2, 31^1, 31^0]:9306// hash = 31^3*((31^4)*str[0] + str[4]) Vector[0]9307// + 31^2*((31^4)*str[1] + str[5]) Vector[1]9308// + 31^1*((31^4)*str[2] + str[6]) Vector[2]9309// + 31^0*((31^4)*str[3] + str[7]) Vector[3]9310//9311// Keep factoring out any 31^4 if possible (this example has no such case). If the string was 12 characters long then:9312// 31^3*((31^8)*str[0] + (31^4)*str[4] + (31^0)*str[8]) would become 31^3*(31^4((31^4)*str[0] + str[4]) + (31^0)*str[8])9313//9314// Vectorization is done by simultaneously calculating the four sums that hash is made of (each -> is a successive step):9315// Vector[0] = str[0] -> multiply 31^4 -> add str[4] -> multiply 31^39316// Vector[1] = str[1] -> multiply 31^4 -> add str[5] -> multiply 31^29317// Vector[2] = str[2] -> multiply 31^4 -> add str[6] -> multiply 31^19318// Vector[3] = str[3] -> multiply 31^4 -> add str[7] -> multiply 19319//9320// Adding these four vectorized values together produces the required hash.9321// If the number of characters in the string is not a multiple of 4, then the remainder of the hash is calculated serially.9322//9323// Implementation overview:9324//9325// start_label9326// if size < threshold, goto serial_label, current threshold is 49327// xmm0 = load 16 bytes align constant [923521, 923521, 923521, 923521]9328// xmm1 = 09329// SSEloop9330// xmm2 = decompressed: load 8 byte value in lower 8 bytes.9331// compressed: load 4 byte value in lower 4 bytes9332// xmm1 = xmm1 * xmm09333// if(isCompressed)9334// movzxbd xmm2, xmm29335// else9336// movzxwd xmm2, xmm29337// xmm1 = xmm1 + xmm29338// i = i + 4;9339// cmp i, end -39340// jge SSEloop9341// xmm0 = load 16 bytes align [31^3, 31^2, 31, 1]9342// xmm1 = xmm1 * xmm0 value contains [a0, a1, a2, a3]9343// xmm0 = xmm19344// xmm0 = xmm0 >> 64 bits9345// xmm1 = xmm1 + xmm0 reduce add [a0+a2, a1+a3, .., ...]9346// xmm0 = xmm19347// xmm0 = xmm0 >> 32 bits9348// xmm1 = xmm1 + xmm0 reduce add [a0+a2 + a1+a3, .., .., ..]9349// movd xmm1, GPR19350//9351// serial_label9352//9353// cmp i end9354// jle end9355// serial_loop9356// GPR2 = GPR19357// GPR1 = GPR1 << 59358// GPR1 = GPR1 - GPR29359// GPR2 = load c[i]9360// add GPR1, GPR29361// dec i9362// cmp i, end9363// jl serial_loop9364//9365// end_label9366static TR::Register* inlineStringHashCode(TR::Node* node, bool isCompressed, TR::CodeGenerator* cg)9367{9368if (!cg->getSupportsInlineStringHashCode())9369{9370return NULL;9371}9372else9373{9374TR_ASSERT(node->getChild(1)->getOpCodeValue() == TR::iconst && node->getChild(1)->getInt() == 0, "String hashcode offset can only be const zero.");93759376const int size = 4;9377auto shift = isCompressed ? 0 : 1;93789379auto address = cg->evaluate(node->getChild(0));9380auto length = cg->evaluate(node->getChild(2));9381auto index = cg->allocateRegister();9382auto hash = cg->allocateRegister();9383auto tmp = cg->allocateRegister();9384auto hashXMM = cg->allocateRegister(TR_VRF);9385auto tmpXMM = cg->allocateRegister(TR_VRF);9386auto multiplierXMM = cg->allocateRegister(TR_VRF);93879388auto begLabel = generateLabelSymbol(cg);9389auto endLabel = generateLabelSymbol(cg);9390auto loopLabel = generateLabelSymbol(cg);9391begLabel->setStartInternalControlFlow();9392endLabel->setEndInternalControlFlow();9393auto deps = generateRegisterDependencyConditions((uint8_t)6, (uint8_t)6, cg);9394deps->addPreCondition(address, TR::RealRegister::NoReg, cg);9395deps->addPreCondition(index, TR::RealRegister::NoReg, cg);9396deps->addPreCondition(length, TR::RealRegister::NoReg, cg);9397deps->addPreCondition(multiplierXMM, TR::RealRegister::NoReg, cg);9398deps->addPreCondition(tmpXMM, TR::RealRegister::NoReg, cg);9399deps->addPreCondition(hashXMM, TR::RealRegister::NoReg, cg);9400deps->addPostCondition(address, TR::RealRegister::NoReg, cg);9401deps->addPostCondition(index, TR::RealRegister::NoReg, cg);9402deps->addPostCondition(length, TR::RealRegister::NoReg, cg);9403deps->addPostCondition(multiplierXMM, TR::RealRegister::NoReg, cg);9404deps->addPostCondition(tmpXMM, TR::RealRegister::NoReg, cg);9405deps->addPostCondition(hashXMM, TR::RealRegister::NoReg, cg);94069407generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, index, length, cg);9408generateRegImmInstruction(TR::InstOpCode::AND4RegImms, node, index, size-1, cg); // mod size9409generateRegMemInstruction(TR::InstOpCode::CMOVE4RegMem, node, index, generateX86MemoryReference(cg->findOrCreate4ByteConstant(node, size), cg), cg);94109411// Prepend zeros9412{9413TR::Compilation *comp = cg->comp();94149415static uint64_t MASKDECOMPRESSED[] = { 0x0000000000000000ULL, 0xffffffffffffffffULL };9416static uint64_t MASKCOMPRESSED[] = { 0xffffffff00000000ULL, 0x0000000000000000ULL };9417generateRegMemInstruction(isCompressed ? TR::InstOpCode::MOVDRegMem : TR::InstOpCode::MOVQRegMem, node, hashXMM, generateX86MemoryReference(address, index, shift, -(size << shift) + TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);9418generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmp, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, isCompressed ? MASKCOMPRESSED : MASKDECOMPRESSED), cg), cg);94199420auto mr = generateX86MemoryReference(tmp, index, shift, 0, cg);9421if (comp->target().cpu.supportsAVX())9422{9423generateRegMemInstruction(TR::InstOpCode::PANDRegMem, node, hashXMM, mr, cg);9424}9425else9426{9427generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, tmpXMM, mr, cg);9428generateRegRegInstruction(TR::InstOpCode::PANDRegReg, node, hashXMM, tmpXMM, cg);9429}9430generateRegRegInstruction(isCompressed ? TR::InstOpCode::PMOVZXBDRegReg : TR::InstOpCode::PMOVZXWDRegReg, node, hashXMM, hashXMM, cg);9431}94329433// Reduction Loop9434{9435static uint32_t multiplier[] = { 31*31*31*31, 31*31*31*31, 31*31*31*31, 31*31*31*31 };9436generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);9437generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, index, length, cg);9438generateLabelInstruction(TR::InstOpCode::JGE4, node, endLabel, cg);9439generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, multiplierXMM, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, multiplier), cg), cg);9440generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);9441generateRegRegInstruction(TR::InstOpCode::PMULLDRegReg, node, hashXMM, multiplierXMM, cg);9442generateRegMemInstruction(isCompressed ? TR::InstOpCode::PMOVZXBDRegMem : TR::InstOpCode::PMOVZXWDRegMem, node, tmpXMM, generateX86MemoryReference(address, index, shift, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);9443generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, index, 4, cg);9444generateRegRegInstruction(TR::InstOpCode::PADDDRegReg, node, hashXMM, tmpXMM, cg);9445generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, index, length, cg);9446generateLabelInstruction(TR::InstOpCode::JL4, node, loopLabel, cg);9447generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);9448}94499450// Finalization9451{9452static uint32_t multiplier[] = { 31*31*31, 31*31, 31, 1 };9453generateRegMemInstruction(TR::InstOpCode::PMULLDRegMem, node, hashXMM, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, multiplier), cg), cg);9454generateRegRegImmInstruction(TR::InstOpCode::PSHUFDRegRegImm1, node, tmpXMM, hashXMM, 0x0e, cg);9455generateRegRegInstruction(TR::InstOpCode::PADDDRegReg, node, hashXMM, tmpXMM, cg);9456generateRegRegImmInstruction(TR::InstOpCode::PSHUFDRegRegImm1, node, tmpXMM, hashXMM, 0x01, cg);9457generateRegRegInstruction(TR::InstOpCode::PADDDRegReg, node, hashXMM, tmpXMM, cg);9458}94599460generateRegRegInstruction(TR::InstOpCode::MOVDReg4Reg, node, hash, hashXMM, cg);94619462cg->stopUsingRegister(index);9463cg->stopUsingRegister(tmp);9464cg->stopUsingRegister(hashXMM);9465cg->stopUsingRegister(tmpXMM);9466cg->stopUsingRegister(multiplierXMM);94679468node->setRegister(hash);9469cg->decReferenceCount(node->getChild(0));9470cg->recursivelyDecReferenceCount(node->getChild(1));9471cg->decReferenceCount(node->getChild(2));9472return hash;9473}9474}94759476static bool9477getNodeIs64Bit(9478TR::Node *node,9479TR::CodeGenerator *cg)9480{9481/* This function is intended to allow existing 32-bit instruction selection code9482* to be reused, almost unchanged, to do the corresponding 64-bit logic on AMD64.9483* It compiles away to nothing on IA32, thus preserving performance and code size9484* on IA32, while allowing the logic to be generalized to suit AMD64.9485*9486* Don't use this function for 64-bit logic on IA32; instead, either (1) use9487* separate logic, or (2) use a different test for 64-bitness. Usually this is9488* not a hindrance, because 64-bit code on IA32 uses register pairs and other9489* things that are totally different from their 32-bit counterparts.9490*/94919492TR_ASSERT(cg->comp()->target().is64Bit() || node->getSize() <= 4, "64-bit nodes on 32-bit platforms shouldn't use getNodeIs64Bit");9493return cg->comp()->target().is64Bit() && node->getSize() > 4;9494}94959496static9497TR::Register *intOrLongClobberEvaluate(9498TR::Node *node,9499bool nodeIs64Bit,9500TR::CodeGenerator *cg)9501{9502if (nodeIs64Bit)9503{9504TR_ASSERT(getNodeIs64Bit(node, cg), "nodeIs64Bit must be consistent with node size");9505return cg->longClobberEvaluate(node);9506}9507else9508{9509TR_ASSERT(!getNodeIs64Bit(node, cg), "nodeIs64Bit must be consistent with node size");9510return cg->intClobberEvaluate(node);9511}9512}95139514/**9515* \brief9516* Generate inlined instructions equivalent to com/ibm/jit/JITHelpers.intrinsicIndexOfLatin1 or com/ibm/jit/JITHelpers.intrinsicIndexOfUTF169517*9518* \param node9519* The tree node9520*9521* \param cg9522* The Code Generator9523*9524* \param isLatin19525* True when the string is Latin1, False when the string is UTF169526*9527* Note that this version does not support discontiguous arrays9528*/9529static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* cg, bool isLatin1)9530{9531static uint8_t MASKOFSIZEONE[] =9532{95330x00, 0x00, 0x00, 0x00,95340x00, 0x00, 0x00, 0x00,95350x00, 0x00, 0x00, 0x00,95360x00, 0x00, 0x00, 0x00,9537};9538static uint8_t MASKOFSIZETWO[] =9539{95400x00, 0x01, 0x00, 0x01,95410x00, 0x01, 0x00, 0x01,95420x00, 0x01, 0x00, 0x01,95430x00, 0x01, 0x00, 0x01,9544};95459546uint8_t width = 16;9547uint8_t shift = 0;9548uint8_t* shuffleMask = NULL;9549auto compareOp = TR::InstOpCode::bad;9550if(isLatin1)9551{9552shuffleMask = MASKOFSIZEONE;9553compareOp = TR::InstOpCode::PCMPEQBRegReg;9554shift = 0;9555}9556else9557{9558shuffleMask = MASKOFSIZETWO;9559compareOp = TR::InstOpCode::PCMPEQWRegReg;9560shift = 1;9561}95629563auto array = cg->evaluate(node->getChild(1));9564auto ch = cg->evaluate(node->getChild(2));9565auto offset = cg->evaluate(node->getChild(3));9566auto length = cg->evaluate(node->getChild(4));95679568auto ECX = cg->allocateRegister();9569auto result = cg->allocateRegister();9570auto scratch = cg->allocateRegister();9571auto scratchXMM = cg->allocateRegister(TR_VRF);9572auto valueXMM = cg->allocateRegister(TR_VRF);95739574auto dependencies = generateRegisterDependencyConditions((uint8_t)7, (uint8_t)7, cg);9575dependencies->addPreCondition(ECX, TR::RealRegister::ecx, cg);9576dependencies->addPreCondition(array, TR::RealRegister::NoReg, cg);9577dependencies->addPreCondition(length, TR::RealRegister::NoReg, cg);9578dependencies->addPreCondition(result, TR::RealRegister::NoReg, cg);9579dependencies->addPreCondition(scratch, TR::RealRegister::NoReg, cg);9580dependencies->addPreCondition(scratchXMM, TR::RealRegister::NoReg, cg);9581dependencies->addPreCondition(valueXMM, TR::RealRegister::NoReg, cg);9582dependencies->addPostCondition(ECX, TR::RealRegister::ecx, cg);9583dependencies->addPostCondition(array, TR::RealRegister::NoReg, cg);9584dependencies->addPostCondition(length, TR::RealRegister::NoReg, cg);9585dependencies->addPostCondition(result, TR::RealRegister::NoReg, cg);9586dependencies->addPostCondition(scratch, TR::RealRegister::NoReg, cg);9587dependencies->addPostCondition(scratchXMM, TR::RealRegister::NoReg, cg);9588dependencies->addPostCondition(valueXMM, TR::RealRegister::NoReg, cg);95899590auto begLabel = generateLabelSymbol(cg);9591auto endLabel = generateLabelSymbol(cg);9592auto loopLabel = generateLabelSymbol(cg);9593begLabel->setStartInternalControlFlow();9594endLabel->setEndInternalControlFlow();95959596generateRegRegInstruction(TR::InstOpCode::MOVDRegReg4, node, valueXMM, ch, cg);9597generateRegMemInstruction(TR::InstOpCode::PSHUFBRegMem, node, valueXMM, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, shuffleMask), cg), cg);95989599generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, result, offset, cg);96009601generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);9602generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, scratch, generateX86MemoryReference(array, result, shift, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);9603generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, ECX, scratch, cg);9604generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, scratch, ~(width - 1), cg);9605generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, ECX, width - 1, cg);9606generateLabelInstruction(TR::InstOpCode::JE1, node, loopLabel, cg);96079608generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, scratchXMM, generateX86MemoryReference(scratch, 0, cg), cg);9609generateRegRegInstruction(compareOp, node, scratchXMM, valueXMM, cg);9610generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, scratch, scratchXMM, cg);9611generateRegInstruction(TR::InstOpCode::SHR4RegCL, node, scratch, cg);9612generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, scratch, scratch, cg);9613generateLabelInstruction(TR::InstOpCode::JNE1, node, endLabel, cg);9614if (shift)9615{9616generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, ECX, shift, cg);9617}9618generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, result, width >> shift, cg);9619generateRegRegInstruction(TR::InstOpCode::SUB4RegReg, node, result, ECX, cg);9620generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, result, length, cg);9621generateLabelInstruction(TR::InstOpCode::JGE1, node, endLabel, cg);96229623generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);9624generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, scratchXMM, generateX86MemoryReference(array, result, shift, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);9625generateRegRegInstruction(compareOp, node, scratchXMM, valueXMM, cg);9626generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, scratch, scratchXMM, cg);9627generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, scratch, scratch, cg);9628generateLabelInstruction(TR::InstOpCode::JNE1, node, endLabel, cg);9629generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, result, width >> shift, cg);9630generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, result, length, cg);9631generateLabelInstruction(TR::InstOpCode::JL1, node, loopLabel, cg);9632generateLabelInstruction(TR::InstOpCode::label, node, endLabel, dependencies, cg);96339634generateRegRegInstruction(TR::InstOpCode::BSF4RegReg, node, scratch, scratch, cg);9635if (shift)9636{9637generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, scratch, shift, cg);9638}9639generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, result, scratch, cg);9640generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, result, length, cg);9641generateRegMemInstruction(TR::InstOpCode::CMOVGERegMem(), node, result, generateX86MemoryReference(cg->comp()->target().is32Bit() ? cg->findOrCreate4ByteConstant(node, -1) : cg->findOrCreate8ByteConstant(node, -1), cg), cg);96429643cg->stopUsingRegister(ECX);9644cg->stopUsingRegister(scratch);9645cg->stopUsingRegister(scratchXMM);9646cg->stopUsingRegister(valueXMM);964796489649node->setRegister(result);9650cg->recursivelyDecReferenceCount(node->getChild(0));9651cg->decReferenceCount(node->getChild(1));9652cg->decReferenceCount(node->getChild(2));9653cg->decReferenceCount(node->getChild(3));9654cg->decReferenceCount(node->getChild(4));9655return result;9656}96579658/**9659* \brief9660* Generate inlined instructions equivalent to sun/misc/Unsafe.compareAndSwapObject or jdk/internal/misc/Unsafe.compareAndSwapObject9661*9662* \param node9663* The tree node9664*9665* \param cg9666* The Code Generator9667*9668*/9669static TR::Register* inlineCompareAndSwapObjectNative(TR::Node* node, TR::CodeGenerator* cg)9670{9671TR::Compilation *comp = cg->comp();96729673TR_ASSERT(!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray(), "This evaluator does not support arraylets.");96749675cg->recursivelyDecReferenceCount(node->getChild(0)); // The Unsafe9676TR::Node* objectNode = node->getChild(1);9677TR::Node* offsetNode = node->getChild(2);9678TR::Node* oldValueNode = node->getChild(3);9679TR::Node* newValueNode = node->getChild(4);96809681TR::Register* object = cg->evaluate(objectNode);9682TR::Register* offset = cg->evaluate(offsetNode);9683TR::Register* oldValue = cg->evaluate(oldValueNode);9684TR::Register* newValue = cg->evaluate(newValueNode);9685TR::Register* result = cg->allocateRegister();9686TR::Register* EAX = cg->allocateRegister();9687TR::Register* tmp = cg->allocateRegister();96889689bool use64BitClasses = comp->target().is64Bit() && !comp->useCompressedPointers();96909691if (comp->target().is32Bit())9692{9693// Assume that the offset is positive and not pathologically large (i.e., > 2^31).9694offset = offset->getLowOrder();9695}96969697#if defined(OMR_GC_CONCURRENT_SCAVENGER)9698switch (TR::Compiler->om.readBarrierType())9699{9700case gc_modron_readbar_none:9701break;9702case gc_modron_readbar_always:9703generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmp, generateX86MemoryReference(object, offset, 0, cg), cg);9704generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), tmp, cg);9705generateHelperCallInstruction(node, TR_softwareReadBarrier, NULL, cg);9706break;9707case gc_modron_readbar_range_check:9708{9709generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, tmp, generateX86MemoryReference(object, offset, 0, cg), cg);97109711TR::LabelSymbol* begLabel = generateLabelSymbol(cg);9712TR::LabelSymbol* endLabel = generateLabelSymbol(cg);9713TR::LabelSymbol* rdbarLabel = generateLabelSymbol(cg);9714begLabel->setStartInternalControlFlow();9715endLabel->setEndInternalControlFlow();97169717TR::RegisterDependencyConditions* deps = generateRegisterDependencyConditions((uint8_t)1, 1, cg);9718deps->addPreCondition(tmp, TR::RealRegister::NoReg, cg);9719deps->addPostCondition(tmp, TR::RealRegister::NoReg, cg);97209721generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);97229723generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, tmp, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateBaseAddressOffset(), cg), cg);9724generateLabelInstruction(TR::InstOpCode::JAE4, node, rdbarLabel, cg);97259726{9727TR_OutlinedInstructionsGenerator og(rdbarLabel, node, cg);9728generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, tmp, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateTopAddressOffset(), cg), cg);9729generateLabelInstruction(TR::InstOpCode::JA4, node, endLabel, cg);9730generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmp, generateX86MemoryReference(object, offset, 0, cg), cg);9731generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), tmp, cg);9732generateHelperCallInstruction(node, TR_softwareReadBarrier, NULL, cg);9733generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);97349735og.endOutlinedInstructionSequence();9736}97379738generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);9739}9740break;9741default:9742TR_ASSERT(false, "Unsupported Read Barrier Type.");9743break;9744}9745#endif97469747generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, EAX, oldValue, cg);9748generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tmp, newValue, cg);9749if (TR::Compiler->om.compressedReferenceShiftOffset() != 0)9750{9751if (!oldValueNode->isNull())9752{9753generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, EAX, TR::Compiler->om.compressedReferenceShiftOffset(), cg);9754}9755if (!newValueNode->isNull())9756{9757generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, tmp, TR::Compiler->om.compressedReferenceShiftOffset(), cg);9758}9759}97609761TR::RegisterDependencyConditions* deps = generateRegisterDependencyConditions((uint8_t)1, 1, cg);9762deps->addPreCondition(EAX, TR::RealRegister::eax, cg);9763deps->addPostCondition(EAX, TR::RealRegister::eax, cg);9764generateMemRegInstruction(use64BitClasses ? TR::InstOpCode::LCMPXCHG8MemReg : TR::InstOpCode::LCMPXCHG4MemReg, node, generateX86MemoryReference(object, offset, 0, cg), tmp, deps, cg);9765generateRegInstruction(TR::InstOpCode::SETE1Reg, node, result, cg);9766generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, result, result, cg);97679768// We could insert a runtime test for whether the write actually succeeded or not.9769// However, since in practice it will almost always succeed we do not want to9770// penalize general runtime performance especially if it is still correct to do9771// a write barrier even if the store never actually happened.9772TR::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(node, objectNode, newValueNode, NULL, cg->generateScratchRegisterManager(), cg);97739774cg->stopUsingRegister(tmp);9775cg->stopUsingRegister(EAX);9776node->setRegister(result);9777for (int32_t i = 1; i < node->getNumChildren(); i++)9778{9779cg->decReferenceCount(node->getChild(i));9780}9781return result;9782}97839784/** Replaces a call to an Unsafe CAS method with inline instructions.9785@return true if the call was replaced, false if it was not.97869787Note that this function must have behaviour consistent with the OMR function9788willNotInlineCompareAndSwapNative in omr/compiler/x/codegen/OMRCodeGenerator.cpp9789*/9790static bool9791inlineCompareAndSwapNative(9792TR::Node *node,9793int8_t size,9794bool isObject,9795TR::CodeGenerator *cg)9796{9797TR::Node *firstChild = node->getFirstChild();9798TR::Node *objectChild = node->getSecondChild();9799TR::Node *offsetChild = node->getChild(2);9800TR::Node *oldValueChild = node->getChild(3);9801TR::Node *newValueChild = node->getChild(4);9802TR::Compilation *comp = cg->comp();9803TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());98049805TR::InstOpCode::Mnemonic op;98069807if (TR::Compiler->om.canGenerateArraylets() && !node->isUnsafeGetPutCASCallOnNonArray())9808return false;98099810static char *disableCASInlining = feGetEnv("TR_DisableCASInlining");98119812if (disableCASInlining /* || comp->useCompressedPointers() */)9813return false;98149815// size = 4 --> CMPXCHG49816// size = 8 --> if 64-bit -> CMPXCHG89817// else if proc supports CMPXCHG8B -> CMPXCHG8B9818// else return false9819//9820// Do this early so we can return early without additional evaluations.9821//9822if (size == 4)9823{9824op = TR::InstOpCode::LCMPXCHG4MemReg;9825}9826else if (size == 8 && comp->target().is64Bit())9827{9828op = TR::InstOpCode::LCMPXCHG8MemReg;9829}9830else9831{9832if (!comp->target().cpu.supportsFeature(OMR_FEATURE_X86_CX8))9833return false;98349835op = TR::InstOpCode::LCMPXCHG8BMem;9836}98379838// In Java9 the sun.misc.Unsafe JNI methods have been moved to jdk.internal,9839// with a set of wrappers remaining in sun.misc to delegate to the new package.9840// We can be called in this function for the wrappers (which we will9841// not be converting to assembly), the new jdk.internal JNI methods or the9842// Java8 sun.misc JNI methods (both of which we will convert). We can9843// differentiate between these cases by testing with isNative() on the method.9844{9845TR::MethodSymbol *methodSymbol = node->getSymbol()->getMethodSymbol();9846if (methodSymbol && !methodSymbol->isNative())9847return false;9848}98499850cg->recursivelyDecReferenceCount(firstChild);98519852TR::Register *objectReg = cg->evaluate(objectChild);98539854TR::Register *offsetReg = NULL;9855int32_t offset = 0;98569857if (offsetChild->getOpCode().isLoadConst() && !offsetChild->getRegister() && IS_32BIT_SIGNED(offsetChild->getLongInt()))9858{9859offset = (int32_t)(offsetChild->getLongInt());9860}9861else9862{9863offsetReg = cg->evaluate(offsetChild);98649865// Assume that the offset is positive and not pathologically large (i.e., > 2^31).9866//9867if (comp->target().is32Bit())9868offsetReg = offsetReg->getLowOrder();9869}9870cg->decReferenceCount(offsetChild);98719872TR::MemoryReference *mr;98739874if (offsetReg)9875mr = generateX86MemoryReference(objectReg, offsetReg, 0, cg);9876else9877mr = generateX86MemoryReference(objectReg, offset, cg);98789879bool bumpedRefCount = false;9880TR::Node *translatedNode = newValueChild;9881if (comp->useCompressedPointers() &&9882isObject &&9883(newValueChild->getDataType() != TR::Address))9884{9885bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);98869887translatedNode = newValueChild;9888if (translatedNode->getOpCode().isConversion())9889translatedNode = translatedNode->getFirstChild();9890if (translatedNode->getOpCode().isRightShift()) // optional9891translatedNode = translatedNode->getFirstChild();98929893translatedNode = newValueChild;9894if (useShiftedOffsets)9895{9896while ((translatedNode->getNumChildren() > 0) &&9897(translatedNode->getOpCodeValue() != TR::a2l))9898translatedNode = translatedNode->getFirstChild();98999900if (translatedNode->getOpCodeValue() == TR::a2l)9901translatedNode = translatedNode->getFirstChild();99029903// this is required so that different registers are9904// allocated for the actual store and translated values9905bumpedRefCount = true;9906translatedNode->incReferenceCount();9907}9908}99099910TR::Register *newValueRegister = cg->evaluate(newValueChild);99119912TR::Register *oldValueRegister = (size == 8) ?9913cg->longClobberEvaluate(oldValueChild) : cg->intClobberEvaluate(oldValueChild);9914bool killOldValueRegister = (oldValueChild->getReferenceCount() > 1) ? true : false;9915cg->decReferenceCount(oldValueChild);99169917TR::RegisterDependencyConditions *deps;9918TR_X86ScratchRegisterManager *scratchRegisterManagerForRealTime = NULL;9919TR::Register *storeAddressRegForRealTime = NULL;99209921if (comp->getOptions()->realTimeGC() && isObject)9922{9923scratchRegisterManagerForRealTime = cg->generateScratchRegisterManager();99249925// If reference is unresolved, need to resolve it right here before the barrier starts9926// Otherwise, we could get stopped during the resolution and that could invalidate any tests we would have performend9927// beforehand9928// For simplicity, just evaluate the store address into storeAddressRegForRealTime right now9929storeAddressRegForRealTime = scratchRegisterManagerForRealTime->findOrCreateScratchRegister();9930generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, storeAddressRegForRealTime, mr, cg);9931if (node->getSymbolReference()->isUnresolved())9932{9933TR::TreeEvaluator::padUnresolvedDataReferences(node, *node->getSymbolReference(), cg);99349935// storeMR was created against a (i)wrtbar node which is a store. The unresolved data snippet that9936// was created set the checkVolatility bit based on that node being a store. Since the resolution9937// is now going to occur on a LEA instruction, which does not require any memory fence and hence9938// no volatility check, we need to clear that "store" ness of the unresolved data snippet9939TR::UnresolvedDataSnippet *snippet = mr->getUnresolvedDataSnippet();9940if (snippet)9941snippet->resetUnresolvedStore();9942}99439944TR::TreeEvaluator::VMwrtbarRealTimeWithoutStoreEvaluator(9945node,9946mr,9947storeAddressRegForRealTime,9948objectChild,9949translatedNode,9950NULL,9951scratchRegisterManagerForRealTime,9952cg);9953}99549955TR::MemoryReference *cmpxchgMR = mr;99569957if (op == TR::InstOpCode::LCMPXCHG8BMem)9958{9959int numDeps = 4;9960if (storeAddressRegForRealTime != NULL)9961{9962numDeps++;9963cmpxchgMR = generateX86MemoryReference(storeAddressRegForRealTime, 0, cg);9964}99659966if (scratchRegisterManagerForRealTime)9967numDeps += scratchRegisterManagerForRealTime->numAvailableRegisters();99689969deps = generateRegisterDependencyConditions(numDeps, numDeps, cg);9970deps->addPreCondition(oldValueRegister->getLowOrder(), TR::RealRegister::eax, cg);9971deps->addPreCondition(oldValueRegister->getHighOrder(), TR::RealRegister::edx, cg);9972deps->addPreCondition(newValueRegister->getLowOrder(), TR::RealRegister::ebx, cg);9973deps->addPreCondition(newValueRegister->getHighOrder(), TR::RealRegister::ecx, cg);9974deps->addPostCondition(oldValueRegister->getLowOrder(), TR::RealRegister::eax, cg);9975deps->addPostCondition(oldValueRegister->getHighOrder(), TR::RealRegister::edx, cg);9976deps->addPostCondition(newValueRegister->getLowOrder(), TR::RealRegister::ebx, cg);9977deps->addPostCondition(newValueRegister->getHighOrder(), TR::RealRegister::ecx, cg);99789979if (scratchRegisterManagerForRealTime)9980scratchRegisterManagerForRealTime->addScratchRegistersToDependencyList(deps);99819982deps->stopAddingConditions();99839984generateMemInstruction(op, node, cmpxchgMR, deps, cg);9985}9986else9987{9988int numDeps = 1;9989if (storeAddressRegForRealTime != NULL)9990{9991numDeps++;9992cmpxchgMR = generateX86MemoryReference(storeAddressRegForRealTime, 0, cg);9993}99949995if (scratchRegisterManagerForRealTime)9996numDeps += scratchRegisterManagerForRealTime->numAvailableRegisters();99979998deps = generateRegisterDependencyConditions(numDeps, numDeps, cg);9999deps->addPreCondition(oldValueRegister, TR::RealRegister::eax, cg);10000deps->addPostCondition(oldValueRegister, TR::RealRegister::eax, cg);1000110002if (scratchRegisterManagerForRealTime)10003scratchRegisterManagerForRealTime->addScratchRegistersToDependencyList(deps);1000410005deps->stopAddingConditions();1000610007generateMemRegInstruction(op, node, cmpxchgMR, newValueRegister, deps, cg);10008}1000910010if (killOldValueRegister)10011cg->stopUsingRegister(oldValueRegister);1001210013if (storeAddressRegForRealTime)10014scratchRegisterManagerForRealTime->reclaimScratchRegister(storeAddressRegForRealTime);1001510016TR::Register *resultReg = cg->allocateRegister();10017generateRegInstruction(TR::InstOpCode::SETE1Reg, node, resultReg, cg);10018generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, resultReg, resultReg, cg);1001910020// Non-realtime: Generate a write barrier for this kind of object.10021//10022if (!comp->getOptions()->realTimeGC() && isObject)10023{10024// We could insert a runtime test for whether the write actually succeeded or not.10025// However, since in practice it will almost always succeed we do not want to10026// penalize general runtime performance especially if it is still correct to do10027// a write barrier even if the store never actually happened.10028//10029// A branch10030//10031TR_X86ScratchRegisterManager *scratchRegisterManager = cg->generateScratchRegisterManager();1003210033TR::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(10034node,10035objectChild,10036translatedNode,10037NULL,10038scratchRegisterManager,10039cg);10040}1004110042node->setRegister(resultReg);1004310044cg->decReferenceCount(newValueChild);10045cg->decReferenceCount(objectChild);10046if (bumpedRefCount)10047cg->decReferenceCount(translatedNode);1004810049return true;10050}100511005210053// Generate inline code if possible for a call to an inline method. The call10054// may be direct or indirect; if it is indirect a guard will be generated around10055// the inline code and a fall-back to the indirect call.10056// Returns true if the call was inlined, otherwise a regular call sequence must10057// be issued by the caller of this method.10058//10059bool J9::X86::TreeEvaluator::VMinlineCallEvaluator(10060TR::Node *node,10061bool isIndirect,10062TR::CodeGenerator *cg)10063{10064TR::MethodSymbol *methodSymbol = node->getSymbol()->castToMethodSymbol();10065TR::ResolvedMethodSymbol *resolvedMethodSymbol = node->getSymbol()->getResolvedMethodSymbol();1006610067TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());1006810069bool callWasInlined = false;10070TR::Compilation *comp = cg->comp();1007110072if (methodSymbol)10073{10074switch (methodSymbol->getRecognizedMethod())10075{10076case TR::sun_nio_ch_NativeThread_current:10077// The spec says that on systems that do not require signaling10078// that this method should return -1. I'm not sure what do realtime10079// systems do here10080if (!comp->getOptions()->realTimeGC() && node->getNumChildren()>0)10081{10082TR::Register *nativeThreadReg = cg->allocateRegister();10083TR::Register *nativeThreadRegHigh = NULL;10084TR::Register *vmThreadReg = cg->getVMThreadRegister();10085int32_t numDeps = 2;1008610087if (comp->target().is32Bit())10088{10089nativeThreadRegHigh = cg->allocateRegister();10090numDeps ++;10091}1009210093TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)numDeps, cg);10094deps->addPostCondition(nativeThreadReg, TR::RealRegister::NoReg, cg);10095if (comp->target().is32Bit())10096{10097deps->addPostCondition(nativeThreadRegHigh, TR::RealRegister::NoReg, cg);10098}10099deps->addPostCondition(vmThreadReg, TR::RealRegister::ebp, cg);1010010101if (comp->target().is64Bit())10102{10103TR::LabelSymbol *startLabel = generateLabelSymbol(cg);10104TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);10105startLabel->setStartInternalControlFlow();10106generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);1010710108generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, nativeThreadReg,10109generateX86MemoryReference(vmThreadReg, fej9->thisThreadOSThreadOffset(), cg), cg);10110generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, nativeThreadReg,10111generateX86MemoryReference(nativeThreadReg, offsetof(J9Thread, handle), cg), cg);10112doneLabel->setEndInternalControlFlow();10113generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);10114}10115else10116{10117TR::MemoryReference *lowMR = generateX86MemoryReference(vmThreadReg, fej9->thisThreadOSThreadOffset(), cg);10118TR::MemoryReference *highMR = generateX86MemoryReference(*lowMR, 4, cg);1011910120TR::LabelSymbol *startLabel = generateLabelSymbol(cg);10121TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);10122startLabel->setStartInternalControlFlow();10123generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);1012410125generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, nativeThreadReg, lowMR, cg);10126generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, nativeThreadRegHigh, highMR, cg);1012710128TR::MemoryReference *lowHandleMR = generateX86MemoryReference(nativeThreadReg, offsetof(J9Thread, handle), cg);10129TR::MemoryReference *highHandleMR = generateX86MemoryReference(*lowMR, 4, cg);1013010131generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, nativeThreadReg, lowHandleMR, cg);10132generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, nativeThreadRegHigh, highHandleMR, cg);1013310134doneLabel->setEndInternalControlFlow();10135generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);10136}1013710138if (comp->target().is32Bit())10139{10140TR::RegisterPair *longRegister = cg->allocateRegisterPair(nativeThreadReg, nativeThreadRegHigh);10141node->setRegister(longRegister);10142}10143else10144{10145node->setRegister(nativeThreadReg);10146}10147cg->recursivelyDecReferenceCount(node->getFirstChild());10148return true;10149}10150return false; // Call the native version of NativeThread.current()10151case TR::jdk_internal_misc_Unsafe_copyMemory0:10152case TR::sun_misc_Unsafe_copyMemory:10153{10154if (comp->canTransformUnsafeCopyToArrayCopy()10155&& methodSymbol->isNative()10156&& performTransformation(comp, "O^O Call arraycopy instead of Unsafe.copyMemory: %s\n", cg->getDebug()->getName(node)))10157{10158TR::Node *src = node->getChild(1);10159TR::Node *srcOffset = node->getChild(2);10160TR::Node *dest = node->getChild(3);10161TR::Node *destOffset = node->getChild(4);10162TR::Node *len = node->getChild(5);1016310164if (comp->target().is32Bit())10165{10166srcOffset = TR::Node::create(TR::l2i, 1, srcOffset);10167destOffset = TR::Node::create(TR::l2i, 1, destOffset);10168len = TR::Node::create(TR::l2i, 1, len);10169src = TR::Node::create(TR::aiadd, 2, src, srcOffset);10170dest = TR::Node::create(TR::aiadd, 2, dest, destOffset);10171}10172else10173{10174src = TR::Node::create(TR::aladd, 2, src, srcOffset);10175dest = TR::Node::create(TR::aladd, 2, dest, destOffset);10176}1017710178TR::Node *arraycopyNode = TR::Node::createArraycopy(src, dest, len);10179TR::TreeEvaluator::arraycopyEvaluator(arraycopyNode,cg);1018010181if (node->getChild(0)->getRegister())10182cg->decReferenceCount(node->getChild(0));10183else10184node->getChild(0)->recursivelyDecReferenceCount();1018510186cg->decReferenceCount(node->getChild(1));10187cg->decReferenceCount(node->getChild(2));10188cg->decReferenceCount(node->getChild(3));10189cg->decReferenceCount(node->getChild(4));10190cg->decReferenceCount(node->getChild(5));1019110192return true;10193}10194return false; // Perform the original Unsafe.copyMemory call10195}10196case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:10197{10198if(node->isSafeForCGToFastPathUnsafeCall())10199return inlineCompareAndSwapNative(node, 4, false, cg);10200}10201break;10202case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:10203{10204if(node->isSafeForCGToFastPathUnsafeCall())10205return inlineCompareAndSwapNative(node, 8, false, cg);10206}10207break;10208case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z:10209{10210static bool UseOldCompareAndSwapObject = (bool)feGetEnv("TR_UseOldCompareAndSwapObject");10211if(node->isSafeForCGToFastPathUnsafeCall())10212{10213if (UseOldCompareAndSwapObject)10214return inlineCompareAndSwapNative(node, (comp->target().is64Bit() && !comp->useCompressedPointers()) ? 8 : 4, true, cg);10215else10216{10217inlineCompareAndSwapObjectNative(node, cg);10218return true;10219}10220}10221}10222break;1022310224case TR::java_util_concurrent_atomic_Fences_reachabilityFence:10225{10226cg->decReferenceCount(node->getChild(0));10227break;10228}1022910230case TR::java_util_concurrent_atomic_Fences_orderAccesses:10231{10232if (comp->target().cpu.supportsMFence())10233{10234TR::InstOpCode fenceOp;10235fenceOp.setOpCodeValue(TR::InstOpCode::MFENCE);10236generateInstruction(fenceOp.getOpCodeValue(), node, cg);10237}1023810239cg->decReferenceCount(node->getChild(0));10240break;10241}1024210243case TR::java_util_concurrent_atomic_Fences_orderReads:10244{10245if (comp->target().cpu.requiresLFence() &&10246comp->target().cpu.supportsLFence())10247{10248TR::InstOpCode fenceOp;10249fenceOp.setOpCodeValue(TR::InstOpCode::LFENCE);10250generateInstruction(fenceOp.getOpCodeValue(), node, cg);10251}1025210253cg->decReferenceCount(node->getChild(0));10254break;10255}1025610257case TR::java_util_concurrent_atomic_Fences_orderWrites:10258{10259if (comp->target().cpu.supportsSFence())10260{10261TR::InstOpCode fenceOp;10262fenceOp.setOpCodeValue(TR::InstOpCode::SFENCE);10263generateInstruction(fenceOp.getOpCodeValue(), node, cg);10264}1026510266cg->decReferenceCount(node->getChild(0));10267break;10268}1026910270case TR::java_lang_Object_clone:10271{10272return (objectCloneEvaluator(node, cg) != NULL);10273break;10274}10275default:10276break;10277}10278}1027910280if (!resolvedMethodSymbol)10281return false;1028210283if (resolvedMethodSymbol)10284{10285switch (resolvedMethodSymbol->getRecognizedMethod())10286{10287#ifdef LINUX10288case TR::java_lang_System_nanoTime:10289{10290TR_ASSERT(!isIndirect, "Indirect call to nanoTime");10291callWasInlined = inlineNanoTime(node, cg);10292break;10293}10294#endif10295default:10296break;10297}10298}1029910300return callWasInlined;10301}103021030310304/**10305* \brief10306* Generate instructions to conditionally branch to a write barrier helper call10307*10308* \oaram branchOp10309* The branch instruction to jump to the write barrier helper call10310*10311* \param node10312* The write barrier node10313*10314* \param gcMode10315* The GC Mode10316*10317* \param owningObjectReg10318* The register holding the owning object10319*10320* \param sourceReg10321* The register holding the source object10322*10323* \param doneLabel10324* The label to jump to when returning from the write barrier helper10325*10326* \param cg10327* The Code Generator10328*10329* Note that RealTimeGC is handled separately in a different method.10330*/10331static void generateWriteBarrierCall(10332TR::InstOpCode::Mnemonic branchOp,10333TR::Node* node,10334MM_GCWriteBarrierType gcMode,10335TR::Register* owningObjectReg,10336TR::Register* sourceReg,10337TR::LabelSymbol* doneLabel,10338TR::CodeGenerator* cg)10339{10340TR::Compilation *comp = cg->comp();10341TR_ASSERT(gcMode != gc_modron_wrtbar_satb && !comp->getOptions()->realTimeGC(), "This helper is not for RealTimeGC.");1034210343uint8_t helperArgCount = 0; // Number of arguments passed on the runtime helper.10344TR::SymbolReference *wrtBarSymRef = NULL;1034510346if (node->getOpCodeValue() == TR::arraycopy)10347{10348wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierBatchStoreSymbolRef();10349helperArgCount = 1;10350}10351else if (gcMode == gc_modron_wrtbar_cardmark_and_oldcheck)10352{10353wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalAndConcurrentMarkSymbolRef();10354helperArgCount = 2;10355}10356else if (gcMode == gc_modron_wrtbar_always)10357{10358wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();10359helperArgCount = 2;10360}10361else if (comp->generateArraylets())10362{10363wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();10364helperArgCount = 2;10365}10366else10367{10368// Default case is a generational barrier (non-concurrent).10369//10370wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef();10371helperArgCount = 2;10372}1037310374TR::LabelSymbol* wrtBarLabel = generateLabelSymbol(cg);1037510376generateLabelInstruction(branchOp, node, wrtBarLabel, cg);1037710378TR_OutlinedInstructionsGenerator og(wrtBarLabel, node, cg);1037910380generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), owningObjectReg, cg);10381if (helperArgCount > 1)10382{10383generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp2), cg), sourceReg, cg);10384}10385generateImmSymInstruction(TR::InstOpCode::CALLImm4, node, (uintptr_t)wrtBarSymRef->getMethodAddress(), wrtBarSymRef, cg);10386generateLabelInstruction(TR::InstOpCode::JMP4, node, doneLabel, cg);1038710388og.endOutlinedInstructionSequence();10389}1039010391static void reportFlag(bool value, char *name, TR::CodeGenerator *cg)10392{10393if (value)10394traceMsg(cg->comp(), " %s", name);10395}1039610397static int32_t byteOffsetForMask(int32_t mask, TR::CodeGenerator *cg)10398{10399int32_t result;10400for (result = 3; result >= 0; --result)10401{10402int32_t shift = 8*result;10403if ( ((mask>>shift)<<shift) == mask )10404break;10405}1040610407if (result != -110408&& performTransformation(cg->comp(), "O^O TREE EVALUATION: Use 1-byte TEST with offset %d for mask %08x\n", result, mask))10409return result;1041010411return -1;10412}104131041410415#define REPORT_FLAG(name) reportFlag((name), #name, cg)1041610417void J9::X86::TreeEvaluator::VMwrtbarRealTimeWithoutStoreEvaluator(10418TR::Node *node,10419TR::MemoryReference *storeMRForRealTime, // RTJ only10420TR::Register *storeAddressRegForRealTime, // RTJ only10421TR::Node *destOwningObject, // only NULL for ME, always evaluated except for AC (evaluated below)10422TR::Node *sourceObject, // NULL for ME and AC(Array Copy?)10423TR::Register *srcReg, // should only be provided when sourceObject == NULL (ME Multimidlet)10424TR_X86ScratchRegisterManager *srm,10425TR::CodeGenerator *cg)10426{10427TR::Compilation *comp = cg->comp();10428TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());10429TR_ASSERT(comp->getOptions()->realTimeGC(),"Call the non real-time barrier");10430auto gcMode = TR::Compiler->om.writeBarrierType();1043110432if (node->getOpCode().isWrtBar() && node->skipWrtBar())10433gcMode = gc_modron_wrtbar_none;10434else if ((node->getOpCodeValue() == TR::ArrayStoreCHK) &&10435node->getFirstChild()->getOpCode().isWrtBar() &&10436node->getFirstChild()->skipWrtBar())10437gcMode = gc_modron_wrtbar_none;1043810439// PR98283: it is not acceptable to emit a label symbol twice so always generate a new label here10440// we can clean up the API later in a less risky manner10441TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);1044210443// srcReg could only be NULL at this point for arraycopy10444if (sourceObject)10445{10446TR_ASSERT(!srcReg, "assertion failure");10447srcReg = sourceObject->getRegister();10448TR_ASSERT(srcReg, "assertion failure");10449} //1045010451TR::Node *wrtbarNode;10452switch (node->getOpCodeValue())10453{10454case TR::ArrayStoreCHK:10455wrtbarNode = node->getFirstChild();10456break;10457case TR::arraycopy:10458wrtbarNode = NULL;10459break;10460case TR::awrtbari:10461case TR::awrtbar:10462wrtbarNode = node;10463break;10464default:10465wrtbarNode = NULL;10466break;10467}1046810469bool doInternalControlFlow;1047010471if (node->getOpCodeValue() == TR::ArrayStoreCHK)10472{10473// TR::ArrayStoreCHK will create its own internal control flow.10474//10475doInternalControlFlow = false;10476}10477else10478{10479doInternalControlFlow = true;10480}1048110482if (comp->getOption(TR_TraceCG) /*&& comp->getOption(TR_TraceOptDetails)*/)10483{10484traceMsg(comp, " | Real Time Write barrier info:\n");10485traceMsg(comp, " | GC mode = %d:%s\n", gcMode, cg->getDebug()->getWriteBarrierKindName(gcMode));10486traceMsg(comp, " | Node = %s %s sourceObject = %s\n",10487cg->getDebug()->getName(node->getOpCodeValue()),10488cg->getDebug()->getName(node),10489sourceObject? cg->getDebug()->getName(sourceObject) : "(none)");10490traceMsg(comp, " | Action flags:");10491REPORT_FLAG(doInternalControlFlow);10492traceMsg(comp, "\n");10493}1049410495//10496// Phase 2: Generate the appropriate code.10497//10498TR::Register *owningObjectReg;10499TR::Register *tempReg = NULL;1050010501owningObjectReg = cg->evaluate(destOwningObject);1050210503if (doInternalControlFlow)10504{10505TR::LabelSymbol *startLabel = generateLabelSymbol(cg);10506startLabel->setStartInternalControlFlow();10507generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);10508doneLabel->setEndInternalControlFlow();10509}1051010511if (comp->getOption(TR_BreakOnWriteBarrier))10512{10513generateInstruction(TR::InstOpCode::INT3, node, cg);10514}1051510516TR::SymbolReference *wrtBarSymRef = NULL;10517if (wrtbarNode && (wrtbarNode->getOpCodeValue()==TR::awrtbar || wrtbarNode->isUnsafeStaticWrtBar()))10518wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierClassStoreRealTimeGCSymbolRef();10519else10520wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreRealTimeGCSymbolRef();1052110522TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);1052310524// TR IL doesn't have a way to express the address of a field in an object, so we need some sneakiness here:10525// 1) create a dummy node for this argument to the call10526// 2) explicitly set that node's register to storeAddressRegForRealTime, preventing it from being evaluated10527// (will just push storeAddressRegForRealTime for the call)10528//10529TR::Node *dummyDestAddressNode = TR::Node::create(node, TR::aconst, 0, 0);10530dummyDestAddressNode->setRegister(storeAddressRegForRealTime);10531TR::Node *callNode = TR::Node::createWithSymRef(TR::call, 3, 3, sourceObject, dummyDestAddressNode, destOwningObject, wrtBarSymRef);1053210533if (comp->getOption(TR_DisableInlineWriteBarriersRT))10534{10535cg->evaluate(callNode);10536}10537else10538{10539TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(callNode, TR::call, NULL, snippetLabel, doneLabel, cg);1054010541// have to disassemble the call node we just created, first have to give it a ref count 110542callNode->setReferenceCount(1);10543cg->recursivelyDecReferenceCount(callNode);1054410545cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);10546cg->generateDebugCounter(10547outlinedHelperCall->getFirstInstruction(),10548TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),105491, TR::DebugCounter::Cheap);1055010551if (comp->getOption(TR_CountWriteBarriersRT))10552{10553TR::MemoryReference *barrierCountMR = generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, debugEventData6), cg);10554generateMemInstruction(TR::InstOpCode::INCMem(comp->target().is64Bit()), node, barrierCountMR, cg);10555}1055610557tempReg = srm->findOrCreateScratchRegister();1055810559// if barrier not enabled, nothing to do10560TR::MemoryReference *fragmentParentMR = generateX86MemoryReference(cg->getVMThreadRegister(), fej9->thisThreadRememberedSetFragmentOffset() + fej9->getFragmentParentOffset(), cg);10561generateRegMemInstruction(TR::InstOpCode::LRegMem(comp->target().is64Bit()), node, tempReg, fragmentParentMR, cg);10562TR::MemoryReference *globalFragmentIDMR = generateX86MemoryReference(tempReg, fej9->getRememberedSetGlobalFragmentOffset(), cg);10563generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, globalFragmentIDMR, 0, cg);10564generateLabelInstruction(TR::InstOpCode::JE4, node, doneLabel, cg);1056510566// now check if double barrier is enabled and definitely execute the barrier if it is10567// if (vmThread->localFragmentIndex == 0) goto snippetLabel10568TR::MemoryReference *localFragmentIndexMR = generateX86MemoryReference(cg->getVMThreadRegister(), fej9->thisThreadRememberedSetFragmentOffset() + fej9->getLocalFragmentOffset(), cg);10569generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, localFragmentIndexMR, 0, cg);10570generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);1057110572// null test on the reference we're about to store over: if it is null goto doneLabel10573// if (destObject->field == null) goto doneLabel10574TR::MemoryReference *nullTestMR = generateX86MemoryReference(storeAddressRegForRealTime, 0, cg);10575if (comp->target().is64Bit() && comp->useCompressedPointers())10576generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, nullTestMR, 0, cg);10577else10578generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, nullTestMR, 0, cg);10579generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);1058010581// fall-through means write barrier not needed, just do the store10582}1058310584if (doInternalControlFlow)10585{10586int32_t numPostConditions = 2 + srm->numAvailableRegisters();1058710588numPostConditions += 4;1058910590if (srcReg)10591{10592numPostConditions++;10593}1059410595TR::RegisterDependencyConditions *conditions =10596generateRegisterDependencyConditions((uint8_t) 0, numPostConditions, cg);1059710598conditions->addPostCondition(owningObjectReg, TR::RealRegister::NoReg, cg);10599if (srcReg)10600{10601conditions->addPostCondition(srcReg, TR::RealRegister::NoReg, cg);10602}1060310604conditions->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);1060510606if (!comp->getOption(TR_DisableInlineWriteBarriersRT))10607{10608TR_ASSERT(storeAddressRegForRealTime != NULL, "assertion failure");10609conditions->addPostCondition(storeAddressRegForRealTime, TR::RealRegister::NoReg, cg);1061010611TR_ASSERT(tempReg != NULL, "assertion failure");10612conditions->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);10613}1061410615if (destOwningObject->getOpCode().hasSymbolReference()10616&& destOwningObject->getSymbol()10617&& !destOwningObject->getSymbol()->isLocalObject())10618{10619if (storeMRForRealTime->getBaseRegister())10620{10621conditions->unionPostCondition(storeMRForRealTime->getBaseRegister(), TR::RealRegister::NoReg, cg);10622}10623if (storeMRForRealTime->getIndexRegister())10624{10625conditions->unionPostCondition(storeMRForRealTime->getIndexRegister(), TR::RealRegister::NoReg, cg);10626}10627}1062810629srm->addScratchRegistersToDependencyList(conditions);10630conditions->stopAddingConditions();1063110632generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, conditions, cg);1063310634srm->stopUsingRegisters();10635}10636else10637{10638TR_ASSERT(node->getOpCodeValue() == TR::ArrayStoreCHK, "assertion failure");10639generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, cg);10640}10641}1064210643106441064510646void J9::X86::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(10647TR::Node *node,10648TR::Node *destOwningObject, // only NULL for ME, always evaluated except for AC (evaluated below)10649TR::Node *sourceObject, // NULL for ME and AC(Array Copy?)10650TR::Register *srcReg, // should only be provided when sourceObject == NULL (ME Multimidlet)10651TR_X86ScratchRegisterManager *srm,10652TR::CodeGenerator *cg)10653{10654TR::Compilation *comp = cg->comp();10655TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());10656TR_ASSERT(!(comp->getOptions()->realTimeGC()),"Call the real-time barrier");10657auto gcMode = TR::Compiler->om.writeBarrierType();1065810659if (node->getOpCode().isWrtBar() && node->skipWrtBar())10660gcMode = gc_modron_wrtbar_none;10661else if ((node->getOpCodeValue() == TR::ArrayStoreCHK) &&10662node->getFirstChild()->getOpCode().isWrtBar() &&10663node->getFirstChild()->skipWrtBar())10664gcMode = gc_modron_wrtbar_none;1066510666// PR98283: it is not acceptable to emit a label symbol twice so always generate a new label here10667// we can clean up the API later in a less risky manner10668TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);1066910670TR::LabelSymbol *cardMarkDoneLabel = NULL;1067110672bool isSourceNonNull;1067310674// If a source node is provided, derive the source object register from it.10675// The source node must be evaluated before this function is called so it must10676// always be in a register.10677//10678if (sourceObject)10679{10680TR_ASSERT(!srcReg, "assertion failure");10681srcReg = sourceObject->getRegister();10682TR_ASSERT(srcReg, "assertion failure");10683isSourceNonNull = sourceObject->isNonNull();10684}10685else10686{10687isSourceNonNull = false;10688}106891069010691// srcReg could only be NULL at this point for arraycopy1069210693//10694// Phase 1: Decide what parts of this logic we need to do10695//1069610697TR::Node *wrtbarNode;10698switch (node->getOpCodeValue())10699{10700case TR::ArrayStoreCHK:10701wrtbarNode = node->getFirstChild();10702break;10703case TR::arraycopy:10704wrtbarNode = NULL;10705break;10706case TR::awrtbari:10707case TR::awrtbar:10708wrtbarNode = node;10709break;10710default:10711wrtbarNode = NULL;10712break;10713}1071410715bool doInlineCardMarkingWithoutOldSpaceCheck, doIsDestAHeapObjectCheck;1071610717if (wrtbarNode)10718{10719TR_ASSERT(wrtbarNode->getOpCode().isWrtBar(), "Expected node " POINTER_PRINTF_FORMAT " to be a WrtBar", wrtbarNode);10720// Note: for gc_modron_wrtbar_cardmark_and_oldcheck we let the helper do the card mark (ie. we don't inline it)10721doInlineCardMarkingWithoutOldSpaceCheck =10722(gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_incremental)10723&& !wrtbarNode->getSymbol()->isLocalObject()10724&& !wrtbarNode->isNonHeapObjectWrtBar();1072510726doIsDestAHeapObjectCheck = doInlineCardMarkingWithoutOldSpaceCheck && !wrtbarNode->isHeapObjectWrtBar();10727}10728else10729{10730// TR::arraycopy or TR::ArrayStoreCHK10731//10732// Old space checks will be done out-of-line, and if a card mark policy requires an old space check10733// as well then both will be done out-of-line.10734//10735doInlineCardMarkingWithoutOldSpaceCheck = doIsDestAHeapObjectCheck = (gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_incremental);10736}107371073810739// for Tarok gc_modron_wrtbar_cardmark10740//10741// doIsDestAHeapObjectCheck = true (if req) OK10742// doIsDestInOldSpaceCheck = false OK10743// doInlineCardMarkingWithoutOldSpaceCheck = maybe OK10744// doCheckConcurrentMarkActive = false OK10745// dirtyCardTableOutOfLine = false OK107461074710748bool doIsDestInOldSpaceCheck =10749gcMode == gc_modron_wrtbar_oldcheck10750|| gcMode == gc_modron_wrtbar_cardmark_and_oldcheck10751|| gcMode == gc_modron_wrtbar_always10752;1075310754bool unsafeCallBarrier = false;10755if (doIsDestInOldSpaceCheck &&10756(gcMode == gc_modron_wrtbar_cardmark10757|| gcMode == gc_modron_wrtbar_cardmark_and_oldcheck10758|| gcMode == gc_modron_wrtbar_cardmark_incremental) &&10759(node->getOpCodeValue()==TR::icall)) {10760TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol();10761if (symbol != NULL && symbol->getRecognizedMethod())10762unsafeCallBarrier = true;10763}1076410765bool doCheckConcurrentMarkActive =10766(gcMode == gc_modron_wrtbar_cardmark10767|| gcMode == gc_modron_wrtbar_cardmark_and_oldcheck10768|| gcMode == gc_modron_wrtbar_cardmark_incremental10769) && (doInlineCardMarkingWithoutOldSpaceCheck || (doIsDestInOldSpaceCheck && wrtbarNode) || unsafeCallBarrier);1077010771// Use out-of-line instructions to dirty the card table.10772//10773bool dirtyCardTableOutOfLine = true;1077410775if (gcMode == gc_modron_wrtbar_cardmark_incremental)10776{10777// Override these settings for policies that don't support concurrent mark.10778//10779doCheckConcurrentMarkActive = false;10780dirtyCardTableOutOfLine = false;10781}1078210783// For practical applications, adding an explicit test for NULL is not worth the pathlength cost10784// especially since storing null values is not the dominant case.10785//10786static char *doNullCheckOnWrtBar = feGetEnv("TR_doNullCheckOnWrtBar");10787bool doSrcIsNullCheck = (doNullCheckOnWrtBar && doIsDestInOldSpaceCheck && srcReg && !isSourceNonNull);1078810789bool doInternalControlFlow;1079010791if (node->getOpCodeValue() == TR::ArrayStoreCHK)10792{10793// TR::ArrayStoreCHK will create its own internal control flow.10794//10795doInternalControlFlow = false;10796}10797else10798{10799doInternalControlFlow =10800(doIsDestInOldSpaceCheck10801|| doIsDestAHeapObjectCheck10802|| doCheckConcurrentMarkActive10803|| doSrcIsNullCheck);10804}1080510806if (comp->getOption(TR_TraceCG) /*&& comp->getOption(TR_TraceOptDetails)*/)10807{10808traceMsg(comp, " | Write barrier info:\n");10809traceMsg(comp, " | GC mode = %d:%s\n", gcMode, cg->getDebug()->getWriteBarrierKindName(gcMode));10810traceMsg(comp, " | Node = %s %s sourceObject = %s\n",10811cg->getDebug()->getName(node->getOpCodeValue()),10812cg->getDebug()->getName(node),10813sourceObject? cg->getDebug()->getName(sourceObject) : "(none)");10814traceMsg(comp, " | Action flags:");10815REPORT_FLAG(doInternalControlFlow);10816REPORT_FLAG(doCheckConcurrentMarkActive);10817REPORT_FLAG(doInlineCardMarkingWithoutOldSpaceCheck);10818REPORT_FLAG(dirtyCardTableOutOfLine);10819REPORT_FLAG(doIsDestAHeapObjectCheck);10820REPORT_FLAG(doIsDestInOldSpaceCheck);10821REPORT_FLAG(isSourceNonNull);10822REPORT_FLAG(doSrcIsNullCheck);10823traceMsg(comp, "\n");10824}1082510826//10827// Phase 2: Generate the appropriate code.10828//10829TR::Register *owningObjectReg;10830TR::Register *tempReg = NULL;1083110832owningObjectReg = cg->evaluate(destOwningObject);1083310834if (doInternalControlFlow)10835{10836TR::LabelSymbol *startLabel = generateLabelSymbol(cg);10837startLabel->setStartInternalControlFlow();10838generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);10839doneLabel->setEndInternalControlFlow();10840}1084110842if (comp->getOption(TR_BreakOnWriteBarrier))10843{10844generateInstruction(TR::InstOpCode::INT3, node, cg);10845}1084610847TR::MemoryReference *fragmentParentMR = generateX86MemoryReference(cg->getVMThreadRegister(), fej9->thisThreadRememberedSetFragmentOffset() + fej9->getFragmentParentOffset(), cg);10848TR::MemoryReference *localFragmentIndexMR = generateX86MemoryReference(cg->getVMThreadRegister(), fej9->thisThreadRememberedSetFragmentOffset() + fej9->getLocalFragmentOffset(), cg);10849TR_OutlinedInstructions *inlineCardMarkPath = NULL;10850if (doInlineCardMarkingWithoutOldSpaceCheck && doCheckConcurrentMarkActive)10851{10852TR::MemoryReference *vmThreadPrivateFlagsMR = generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, privateFlags), cg);10853generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node, vmThreadPrivateFlagsMR, J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE, cg);1085410855// Branch to outlined instructions to inline card dirtying.10856//10857TR::LabelSymbol *inlineCardMarkLabel = generateLabelSymbol(cg);1085810859generateLabelInstruction(TR::InstOpCode::JNE4, node, inlineCardMarkLabel, cg);1086010861// Dirty the card table.10862//10863TR_OutlinedInstructionsGenerator og(inlineCardMarkLabel, node, cg);10864TR::Register *tempReg = srm->findOrCreateScratchRegister();1086510866generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, owningObjectReg, cg);1086710868if (comp->getOptions()->isVariableHeapBaseForBarrierRange0())10869{10870TR::MemoryReference *vhbMR =10871generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapBaseForBarrierRange0), cg);10872generateRegMemInstruction(TR::InstOpCode::SUBRegMem(), node, tempReg, vhbMR, cg);10873}10874else10875{10876uintptr_t chb = comp->getOptions()->getHeapBaseForBarrierRange0();1087710878if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chb) || TR::Compiler->om.nativeAddressesCanChangeSize()))10879{10880TR::Register *chbReg = srm->findOrCreateScratchRegister();10881generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chbReg, chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);10882generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, tempReg, chbReg, cg);10883srm->reclaimScratchRegister(chbReg);10884}10885else10886{10887generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), node, tempReg, (int32_t)chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);10888}10889}1089010891if (doIsDestAHeapObjectCheck)10892{10893cardMarkDoneLabel = doIsDestInOldSpaceCheck ? generateLabelSymbol(cg) : doneLabel;1089410895if (comp->getOptions()->isVariableHeapSizeForBarrierRange0())10896{10897TR::MemoryReference *vhsMR =10898generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);10899generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, tempReg, vhsMR, cg);10900}10901else10902{10903uintptr_t chs = comp->getOptions()->getHeapSizeForBarrierRange0();1090410905if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chs) || TR::Compiler->om.nativeAddressesCanChangeSize()))10906{10907TR::Register *chsReg = srm->findOrCreateScratchRegister();10908generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chsReg, chs, cg, TR_HEAP_SIZE_FOR_BARRIER_RANGE);10909generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, tempReg, chsReg, cg);10910srm->reclaimScratchRegister(chsReg);10911}10912else10913{10914generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (int32_t)chs, cg, TR_HEAP_SIZE_FOR_BARRIER_RANGE);10915}10916}1091710918generateLabelInstruction(TR::InstOpCode::JAE4, node, cardMarkDoneLabel, cg);10919}1092010921generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, tempReg, comp->getOptions()->getHeapAddressToCardAddressShift(), cg);1092210923// Mark the card10924//10925const uint8_t dirtyCard = 1;1092610927TR::MemoryReference *cardTableMR;1092810929if (comp->getOptions()->isVariableActiveCardTableBase())10930{10931TR::MemoryReference *actbMR =10932generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, activeCardTableBase), cg);10933generateRegMemInstruction(TR::InstOpCode::ADDRegMem(), node, tempReg, actbMR, cg);10934cardTableMR = generateX86MemoryReference(tempReg, 0, cg);10935}10936else10937{10938uintptr_t actb = comp->getOptions()->getActiveCardTableBase();1093910940if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(actb) || TR::Compiler->om.nativeAddressesCanChangeSize()))10941{10942TR::Register *tempReg3 = srm->findOrCreateScratchRegister();10943generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg3, actb, cg, TR_ACTIVE_CARD_TABLE_BASE);10944cardTableMR = generateX86MemoryReference(tempReg3, tempReg, 0, cg);10945srm->reclaimScratchRegister(tempReg3);10946}10947else10948{10949cardTableMR = generateX86MemoryReference(NULL, tempReg, 0, (int32_t)actb, cg);10950cardTableMR->setReloKind(TR_ACTIVE_CARD_TABLE_BASE);10951}10952}1095310954generateMemImmInstruction(TR::InstOpCode::S1MemImm1, node, cardTableMR, dirtyCard, cg);10955srm->reclaimScratchRegister(tempReg);10956generateLabelInstruction(TR::InstOpCode::JMP4, node, doneLabel, cg);1095710958og.endOutlinedInstructionSequence();10959}10960else if (doInlineCardMarkingWithoutOldSpaceCheck && !dirtyCardTableOutOfLine)10961{10962// Dirty the card table.10963//10964TR::Register *tempReg = srm->findOrCreateScratchRegister();1096510966generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, owningObjectReg, cg);1096710968if (comp->getOptions()->isVariableHeapBaseForBarrierRange0())10969{10970TR::MemoryReference *vhbMR =10971generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapBaseForBarrierRange0), cg);10972generateRegMemInstruction(TR::InstOpCode::SUBRegMem(), node, tempReg, vhbMR, cg);10973}10974else10975{10976uintptr_t chb = comp->getOptions()->getHeapBaseForBarrierRange0();1097710978if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chb) || TR::Compiler->om.nativeAddressesCanChangeSize()))10979{10980TR::Register *chbReg = srm->findOrCreateScratchRegister();10981generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chbReg, chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);10982generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, tempReg, chbReg, cg);10983srm->reclaimScratchRegister(chbReg);10984}10985else10986{10987generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), node, tempReg, (int32_t)chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);10988}10989}1099010991if (doIsDestAHeapObjectCheck)10992{10993cardMarkDoneLabel = doIsDestInOldSpaceCheck ? generateLabelSymbol(cg) : doneLabel;1099410995if (comp->getOptions()->isVariableHeapSizeForBarrierRange0())10996{10997TR::MemoryReference *vhsMR =10998generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);10999generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, tempReg, vhsMR, cg);11000}11001else11002{11003uintptr_t chs = comp->getOptions()->getHeapSizeForBarrierRange0();1100411005if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chs) || TR::Compiler->om.nativeAddressesCanChangeSize()))11006{11007TR::Register *chsReg = srm->findOrCreateScratchRegister();11008generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chsReg, chs, cg, TR_HEAP_SIZE_FOR_BARRIER_RANGE);11009generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, tempReg, chsReg, cg);11010srm->reclaimScratchRegister(chsReg);11011}11012else11013{11014generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (int32_t)chs, cg, TR_HEAP_SIZE_FOR_BARRIER_RANGE);11015}11016}1101711018generateLabelInstruction(TR::InstOpCode::JAE4, node, cardMarkDoneLabel, cg);11019}1102011021generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, tempReg, comp->getOptions()->getHeapAddressToCardAddressShift(), cg);1102211023// Mark the card11024//11025const uint8_t dirtyCard = 1;1102611027TR::MemoryReference *cardTableMR;1102811029if (comp->getOptions()->isVariableActiveCardTableBase())11030{11031TR::MemoryReference *actbMR =11032generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, activeCardTableBase), cg);11033generateRegMemInstruction(TR::InstOpCode::ADDRegMem(), node, tempReg, actbMR, cg);11034cardTableMR = generateX86MemoryReference(tempReg, 0, cg);11035}11036else11037{11038uintptr_t actb = comp->getOptions()->getActiveCardTableBase();1103911040if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(actb) || TR::Compiler->om.nativeAddressesCanChangeSize()))11041{11042TR::Register *tempReg3 = srm->findOrCreateScratchRegister();11043generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg3, actb, cg, TR_ACTIVE_CARD_TABLE_BASE);11044cardTableMR = generateX86MemoryReference(tempReg3, tempReg, 0, cg);11045srm->reclaimScratchRegister(tempReg3);11046}11047else11048{11049cardTableMR = generateX86MemoryReference(NULL, tempReg, 0, (int32_t)actb, cg);11050cardTableMR->setReloKind(TR_ACTIVE_CARD_TABLE_BASE);11051}11052}1105311054generateMemImmInstruction(TR::InstOpCode::S1MemImm1, node, cardTableMR, dirtyCard, cg);1105511056srm->reclaimScratchRegister(tempReg);11057}1105811059if (doIsDestAHeapObjectCheck && doIsDestInOldSpaceCheck)11060{11061generateLabelInstruction(TR::InstOpCode::label, node, cardMarkDoneLabel, cg);11062}1106311064if (doSrcIsNullCheck)11065{11066generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, srcReg, srcReg, cg);11067generateLabelInstruction(TR::InstOpCode::JE4, node, doneLabel, cg);11068}1106911070if (doIsDestInOldSpaceCheck)11071{11072static char *disableWrtbarOpt = feGetEnv("TR_DisableWrtbarOpt");1107311074TR::InstOpCode::Mnemonic branchOp;11075auto gcModeForSnippet = gcMode;1107611077bool skipSnippetIfSrcNotOld = false;11078bool skipSnippetIfDestOld = false;11079bool skipSnippetIfDestRemembered = false;1108011081TR::LabelSymbol *labelAfterBranchToSnippet = NULL;1108211083if (gcMode == gc_modron_wrtbar_always)11084{11085// Always call the write barrier helper.11086//11087// TODO: this should be an inline call.11088//11089branchOp = TR::InstOpCode::JMP4;11090}11091else if (doCheckConcurrentMarkActive)11092{11093//TR_ASSERT(wrtbarNode, "Must not be an arraycopy");1109411095// If the concurrent mark thread IS active then call the gencon write barrier in the helper11096// to perform card marking and any necessary remembered set updates.11097//11098// This is expected to be true for only a very small percentage of the time and hence11099// handling it out of line is justified.11100//11101if (!comp->getOption(TR_DisableWriteBarriersRangeCheck)11102&& (node->getOpCodeValue() == TR::awrtbari)11103&& doInternalControlFlow)11104{11105bool is64Bit = comp->target().is64Bit(); // On compressed refs, owningObjectReg is already uncompressed, and the vmthread fields are 64 bits11106labelAfterBranchToSnippet = generateLabelSymbol(cg);11107// AOT support to be implemented in another PR11108if (!comp->getOptions()->isVariableHeapSizeForBarrierRange0() && !comp->compileRelocatableCode() && !disableWrtbarOpt)11109{11110uintptr_t che = comp->getOptions()->getHeapBaseForBarrierRange0() + comp->getOptions()->getHeapSizeForBarrierRange0();11111if (comp->target().is64Bit() && !IS_32BIT_SIGNED(che))11112{11113generateRegMemInstruction(TR::InstOpCode::CMP8RegMem, node, owningObjectReg, generateX86MemoryReference(cg->findOrCreate8ByteConstant(node, che), cg), cg);11114}11115else11116{11117generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, owningObjectReg, (int32_t)che, cg);11118}11119}11120else11121{11122uintptr_t chb = comp->getOptions()->getHeapBaseForBarrierRange0();11123TR::Register *tempOwningObjReg = srm->findOrCreateScratchRegister();11124generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempOwningObjReg, owningObjectReg, cg);11125if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chb) || TR::Compiler->om.nativeAddressesCanChangeSize()))11126{11127TR::Register *chbReg = srm->findOrCreateScratchRegister();11128generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chbReg, chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);11129generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, tempOwningObjReg, chbReg, cg);11130srm->reclaimScratchRegister(chbReg);11131}11132else11133{11134generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), node, tempOwningObjReg, (int32_t)chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);11135}11136TR::MemoryReference *vhsMR1 =11137generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);11138generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, tempOwningObjReg, vhsMR1, cg);11139srm->reclaimScratchRegister(tempOwningObjReg);11140}1114111142generateLabelInstruction(TR::InstOpCode::JAE1, node, doneLabel, cg);1114311144skipSnippetIfSrcNotOld = true;11145}11146else11147{11148skipSnippetIfDestOld = true;11149}1115011151// See if we can do a TR::InstOpCode::TEST1MemImm111152//11153int32_t byteOffset = byteOffsetForMask(J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE, cg);11154if (byteOffset != -1)11155{11156TR::MemoryReference *vmThreadPrivateFlagsMR = generateX86MemoryReference(cg->getVMThreadRegister(), byteOffset + offsetof(J9VMThread, privateFlags), cg);11157generateMemImmInstruction(TR::InstOpCode::TEST1MemImm1, node, vmThreadPrivateFlagsMR, J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE >> (8*byteOffset), cg);11158}11159else11160{11161TR::MemoryReference *vmThreadPrivateFlagsMR = generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, privateFlags), cg);11162generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node, vmThreadPrivateFlagsMR, J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE, cg);11163}1116411165generateWriteBarrierCall(TR::InstOpCode::JNE4, node, gc_modron_wrtbar_cardmark_and_oldcheck, owningObjectReg, srcReg, doneLabel, cg);1116611167// If the destination object is old and not remembered then process the remembered11168// set update out-of-line with the generational helper.11169//11170skipSnippetIfDestRemembered = true;11171gcModeForSnippet = gc_modron_wrtbar_oldcheck;11172}11173else if (gcMode == gc_modron_wrtbar_oldcheck)11174{11175// For pure generational barriers if the object is old and remembered then the helper11176// can be skipped.11177//11178skipSnippetIfDestOld = true;11179skipSnippetIfDestRemembered = true;11180}11181else11182{11183skipSnippetIfDestOld = true;11184skipSnippetIfDestRemembered = false;11185}1118611187if (skipSnippetIfSrcNotOld || skipSnippetIfDestOld)11188{11189TR_ASSERT((!skipSnippetIfSrcNotOld || !skipSnippetIfDestOld), "At most one of skipSnippetIfSrcNotOld and skipSnippetIfDestOld can be true");11190TR_ASSERT(skipSnippetIfDestOld || (srcReg != NULL), "Expected to have a source register for wrtbari");1119111192bool is64Bit = comp->target().is64Bit(); // On compressed refs, owningObjectReg is already uncompressed, and the vmthread fields are 64 bits11193bool checkDest = skipSnippetIfDestOld; // Otherwise, check the src value11194bool skipSnippetIfOld = skipSnippetIfDestOld; // Otherwise, skip if the checked value (source or destination) is not old11195labelAfterBranchToSnippet = generateLabelSymbol(cg);11196// AOT support to be implemented in another PR11197if (!comp->getOptions()->isVariableHeapSizeForBarrierRange0() && !comp->compileRelocatableCode() && !disableWrtbarOpt)11198{11199uintptr_t che = comp->getOptions()->getHeapBaseForBarrierRange0() + comp->getOptions()->getHeapSizeForBarrierRange0();11200if (comp->target().is64Bit() && !IS_32BIT_SIGNED(che))11201{11202generateRegMemInstruction(TR::InstOpCode::CMP8RegMem, node, checkDest ? owningObjectReg : srcReg, generateX86MemoryReference(cg->findOrCreate8ByteConstant(node, che), cg), cg);11203}11204else11205{11206generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, checkDest ? owningObjectReg : srcReg, (int32_t)che, cg);11207}11208}11209else11210{11211uintptr_t chb = comp->getOptions()->getHeapBaseForBarrierRange0();11212TR::Register *tempReg = srm->findOrCreateScratchRegister();11213generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, checkDest ? owningObjectReg : srcReg, cg);11214if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chb) || TR::Compiler->om.nativeAddressesCanChangeSize()))11215{11216TR::Register *chbReg = srm->findOrCreateScratchRegister();11217generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chbReg, chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);11218generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, tempReg, chbReg, cg);11219srm->reclaimScratchRegister(chbReg);11220}11221else11222{11223generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), node, tempReg, (int32_t)chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);11224}11225TR::MemoryReference *vhsMR1 =11226generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);11227generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, tempReg, vhsMR1, cg);11228}1122911230branchOp = skipSnippetIfOld ? TR::InstOpCode::JB4 : TR::InstOpCode::JAE4; // For branch to snippet11231TR::InstOpCode::Mnemonic reverseBranchOp = skipSnippetIfOld ? TR::InstOpCode::JAE4 : TR::InstOpCode::JB4; // For branch past snippet1123211233// Now performing check for remembered11234if (skipSnippetIfDestRemembered)11235{11236// Set up for branch *past* snippet call for previous comparison11237generateLabelInstruction(reverseBranchOp, node, labelAfterBranchToSnippet, cg);1123811239int32_t byteOffset = byteOffsetForMask(J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST, cg);11240if (byteOffset != -1)11241{11242TR::MemoryReference *MR = generateX86MemoryReference(owningObjectReg, byteOffset + TR::Compiler->om.offsetOfHeaderFlags(), cg);11243generateMemImmInstruction(TR::InstOpCode::TEST1MemImm1, node, MR, J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST >> (8*byteOffset), cg);11244}11245else11246{11247TR::MemoryReference *MR = generateX86MemoryReference(owningObjectReg, TR::Compiler->om.offsetOfHeaderFlags(), cg);11248generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node, MR, J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST, cg);11249}11250branchOp=TR::InstOpCode::JE4;11251}11252}1125311254generateWriteBarrierCall(branchOp, node, gcModeForSnippet, owningObjectReg, srcReg, doneLabel, cg);1125511256if (labelAfterBranchToSnippet)11257generateLabelInstruction(TR::InstOpCode::label, node, labelAfterBranchToSnippet, cg);11258}1125911260int32_t numPostConditions = 2 + srm->numAvailableRegisters();1126111262if (srcReg)11263{11264numPostConditions++;11265}1126611267TR::RegisterDependencyConditions *conditions =11268generateRegisterDependencyConditions((uint8_t) 0, numPostConditions, cg);1126911270conditions->addPostCondition(owningObjectReg, TR::RealRegister::NoReg, cg);11271if (srcReg)11272{11273conditions->addPostCondition(srcReg, TR::RealRegister::NoReg, cg);11274}1127511276conditions->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);1127711278srm->addScratchRegistersToDependencyList(conditions);11279conditions->stopAddingConditions();1128011281generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, conditions, cg);1128211283srm->stopUsingRegisters();11284}112851128611287static TR::Instruction *11288doReferenceStore(11289TR::Node *node,11290TR::MemoryReference *storeMR,11291TR::Register *sourceReg,11292bool usingCompressedPointers,11293TR::CodeGenerator *cg)11294{11295TR::Compilation *comp = cg->comp();11296TR::InstOpCode::Mnemonic storeOp = usingCompressedPointers ? TR::InstOpCode::S4MemReg : TR::InstOpCode::SMemReg();11297TR::Instruction *instr = generateMemRegInstruction(storeOp, node, storeMR, sourceReg, cg);1129811299// for real-time GC, the data reference has already been resolved into an earlier LEA instruction so this padding isn't needed11300// even if the node symbol is marked as unresolved (the store instruction above is storing through a register11301// that contains the resolved address)11302if (!comp->getOptions()->realTimeGC() && node->getSymbolReference()->isUnresolved())11303{11304TR::TreeEvaluator::padUnresolvedDataReferences(node, *node->getSymbolReference(), cg);11305}1130611307return instr;11308}113091131011311void J9::X86::TreeEvaluator::VMwrtbarWithStoreEvaluator(11312TR::Node *node,11313TR::MemoryReference *storeMR,11314TR_X86ScratchRegisterManager *scratchRegisterManager,11315TR::Node *destOwningObject,11316TR::Node *sourceObject,11317bool isImplicitExceptionPoint,11318TR::CodeGenerator *cg,11319bool nullAdjusted)11320{11321TR_ASSERT(storeMR, "assertion failure");1132211323TR::Compilation *comp = cg->comp();1132411325TR::Register *owningObjectRegister = cg->evaluate(destOwningObject);11326TR::Register *sourceRegister = cg->evaluate(sourceObject);1132711328auto gcMode = TR::Compiler->om.writeBarrierType();11329bool isRealTimeGC = (comp->getOptions()->realTimeGC())? true:false;1133011331bool usingCompressedPointers = false;11332bool usingLowMemHeap = false;11333bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);11334TR::Node *translatedStore = NULL;1133511336// NOTE:11337//11338// If you change this code you also need to change writeBarrierEvaluator() in TreeEvaluator.cpp11339//11340if (comp->useCompressedPointers() &&11341((node->getOpCode().isCheck() && node->getFirstChild()->getOpCode().isIndirect() &&11342(node->getFirstChild()->getSecondChild()->getDataType() != TR::Address)) ||11343(node->getOpCode().isIndirect() && (node->getSecondChild()->getDataType() != TR::Address))))11344{11345if (node->getOpCode().isCheck())11346translatedStore = node->getFirstChild();11347else11348translatedStore = node;1134911350usingLowMemHeap = true;11351usingCompressedPointers = true;11352}1135311354TR::Register *translatedSourceReg = sourceRegister;11355if (usingCompressedPointers && (!usingLowMemHeap || useShiftedOffsets))11356{11357// handle stores of null values here1135811359if (nullAdjusted)11360translatedSourceReg = translatedStore->getSecondChild()->getRegister();11361else11362{11363translatedSourceReg = cg->evaluate(translatedStore->getSecondChild());11364if (!usingLowMemHeap)11365{11366generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), translatedStore, sourceRegister, sourceRegister, cg);11367generateRegRegInstruction(TR::InstOpCode::CMOVERegReg(), translatedStore, translatedSourceReg, sourceRegister, cg);11368}11369}11370}1137111372TR::Instruction *storeInstr = NULL;11373TR::Register *storeAddressRegForRealTime = NULL;1137411375if (isRealTimeGC)11376{11377// Realtime GC evaluates storeMR into a register here and then uses it to do the store after the write barrier1137811379// If reference is unresolved, need to resolve it right here before the barrier starts11380// Otherwise, we could get stopped during the resolution and that could invalidate any tests we would have performend11381// beforehand11382// For simplicity, just evaluate the store address into storeAddressRegForRealTime right now11383storeAddressRegForRealTime = scratchRegisterManager->findOrCreateScratchRegister();11384generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, storeAddressRegForRealTime, storeMR, cg);11385if (node->getSymbolReference()->isUnresolved())11386{11387TR::TreeEvaluator::padUnresolvedDataReferences(node, *node->getSymbolReference(), cg);1138811389// storeMR was created against a (i)wrtbar node which is a store. The unresolved data snippet that11390// was created set the checkVolatility bit based on that node being a store. Since the resolution11391// is now going to occur on a LEA instruction, which does not require any memory fence and hence11392// no volatility check, we need to clear that "store" ness of the unresolved data snippet11393TR::UnresolvedDataSnippet *snippet = storeMR->getUnresolvedDataSnippet();11394if (snippet)11395snippet->resetUnresolvedStore();11396}11397}11398else11399{11400// Non-realtime does the store first, then the write barrier.11401//11402storeInstr = doReferenceStore(node, storeMR, translatedSourceReg, usingCompressedPointers, cg);11403}1140411405if (TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_always && !isRealTimeGC)11406{11407TR::RegisterDependencyConditions *deps = NULL;11408TR::LabelSymbol *doneWrtBarLabel = generateLabelSymbol(cg);1140911410if (comp->target().is32Bit() && sourceObject->isNonNull() == false)11411{11412TR::LabelSymbol *startLabel = generateLabelSymbol(cg);11413startLabel->setStartInternalControlFlow();11414doneWrtBarLabel->setEndInternalControlFlow();1141511416generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);11417generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, sourceRegister, sourceRegister, cg);11418generateLabelInstruction(TR::InstOpCode::JE4, node, doneWrtBarLabel, cg);1141911420deps = generateRegisterDependencyConditions(0, 3, cg);11421deps->addPostCondition(sourceRegister, TR::RealRegister::NoReg, cg);11422deps->addPostCondition(owningObjectRegister, TR::RealRegister::NoReg, cg);11423deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);11424deps->stopAddingConditions();11425}1142611427generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), owningObjectRegister, cg);11428generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp2), cg), sourceRegister, cg);1142911430TR::SymbolReference* wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();11431generateImmSymInstruction(TR::InstOpCode::CALLImm4, node, (uintptr_t)wrtBarSymRef->getMethodAddress(), wrtBarSymRef, cg);1143211433generateLabelInstruction(TR::InstOpCode::label, node, doneWrtBarLabel, deps, cg);11434}11435else11436{11437if (isRealTimeGC)11438{11439TR::TreeEvaluator::VMwrtbarRealTimeWithoutStoreEvaluator(11440node,11441storeMR,11442storeAddressRegForRealTime,11443destOwningObject,11444sourceObject,11445NULL,11446scratchRegisterManager,11447cg);11448}11449else11450{11451TR::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(11452node,11453destOwningObject,11454sourceObject,11455NULL,11456scratchRegisterManager,11457cg);11458}11459}1146011461// Realtime GCs must do the write barrier first and then the store.11462//11463if (isRealTimeGC)11464{11465TR_ASSERT(storeAddressRegForRealTime, "assertion failure");11466TR::MemoryReference *myStoreMR = generateX86MemoryReference(storeAddressRegForRealTime, 0, cg);11467storeInstr = doReferenceStore(node, myStoreMR, translatedSourceReg, usingCompressedPointers, cg);11468scratchRegisterManager->reclaimScratchRegister(storeAddressRegForRealTime);11469}1147011471if (!usingLowMemHeap || useShiftedOffsets)11472cg->decReferenceCount(sourceObject);1147311474cg->decReferenceCount(destOwningObject);11475storeMR->decNodeReferenceCounts(cg);1147611477if (isImplicitExceptionPoint)11478cg->setImplicitExceptionPoint(storeInstr);11479}114801148111482void J9::X86::TreeEvaluator::generateVFTMaskInstruction(TR::Node *node, TR::Register *reg, TR::CodeGenerator *cg)11483{11484TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());11485uintptr_t mask = TR::Compiler->om.maskOfObjectVftField();11486bool is64Bit = cg->comp()->target().is64Bit(); // even with compressed object headers, a 64-bit mask operation is safe, though it may waste 1 byte because of the rex prefix11487if (~mask == 0)11488{11489// no mask instruction required11490}11491else if (~mask <= 127)11492{11493generateRegImmInstruction(TR::InstOpCode::ANDRegImms(is64Bit), node, reg, TR::Compiler->om.maskOfObjectVftField(), cg);11494}11495else11496{11497generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(is64Bit), node, reg, TR::Compiler->om.maskOfObjectVftField(), cg);11498}11499}115001150111502void11503VMgenerateCatchBlockBBStartPrologue(11504TR::Node *node,11505TR::Instruction *fenceInstruction,11506TR::CodeGenerator *cg)11507{11508TR::Compilation *comp = cg->comp();11509TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());1151011511if (comp->getJittedMethodSymbol()->usesSinglePrecisionMode() &&11512cg->enableSinglePrecisionMethods())11513{11514cg->setLastCatchAppendInstruction(fenceInstruction);11515}1151611517TR::Block *block = node->getBlock();11518if (fej9->shouldPerformEDO(block, comp))11519{11520TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);11521TR::LabelSymbol *restartLabel = generateLabelSymbol(cg);1152211523generateMemInstruction(TR::InstOpCode::DEC4Mem, node, generateX86MemoryReference((intptr_t)comp->getRecompilationInfo()->getCounterAddress(), cg), cg);11524generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);11525generateLabelInstruction(TR::InstOpCode::label, node, restartLabel, cg);11526cg->addSnippet(new (cg->trHeapMemory()) TR::X86ForceRecompilationSnippet(cg, node, restartLabel, snippetLabel));11527}1152811529}115301153111532TR::Register *11533J9::X86::TreeEvaluator::tstartEvaluator(TR::Node *node, TR::CodeGenerator *cg)11534{11535/*11536xbegin fall_back_path11537mov monReg, [obj+Lw_offset]11538cmp monReg, 0;11539je fallThroughLabel11540cmp monReg, rbp11541je fallThroughLabel11542xabort11543fall_back_path:11544test eax, 0x211545jne gotoTransientFailureNodeLabel11546test eax, 0x0000000111547je persistentFailureLabel11548test eax, 0x0100000011549jne gotoTransientFailureNodeLabel11550jmp persistentFailLabel11551gotoTransientFailureNodeLabel:11552mov counterReg,10011553spinLabel:11554dec counterReg11555jne spinLabel11556jmp TransientFailureNodeLabel11557*/11558TR::Compilation *comp = cg->comp();11559TR::Node *persistentFailureNode = node->getFirstChild();11560TR::Node *transientFailureNode = node->getSecondChild();11561TR::Node *fallThroughNode = node->getThirdChild();11562TR::Node *objNode = node->getChild(3);11563TR::Node *GRANode = NULL;1156411565TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);11566startLabel->setStartInternalControlFlow();11567TR::LabelSymbol *endLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);11568endLabel->setEndInternalControlFlow();1156911570TR::LabelSymbol *gotoTransientFailure = TR::LabelSymbol::create(cg->trHeapMemory(),cg);11571TR::LabelSymbol *gotoPersistentFailure = TR::LabelSymbol::create(cg->trHeapMemory(),cg);11572TR::LabelSymbol *gotoFallThrough = TR::LabelSymbol::create(cg->trHeapMemory(),cg);11573TR::LabelSymbol *transientFailureLabel = transientFailureNode->getBranchDestination()->getNode()->getLabel();11574TR::LabelSymbol *persistentFailureLabel = persistentFailureNode->getBranchDestination()->getNode()->getLabel();11575TR::LabelSymbol *fallBackPathLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);11576TR::LabelSymbol *fallThroughLabel = fallThroughNode->getBranchDestination()->getNode()->getLabel();1157711578TR::Register *objReg = cg->evaluate(objNode);11579TR::Register *accReg = cg->allocateRegister();11580TR::Register *monReg = cg->allocateRegister();11581TR::RegisterDependencyConditions *fallBackConditions = generateRegisterDependencyConditions((uint8_t)0, 2, cg);11582TR::RegisterDependencyConditions *endLabelConditions;11583TR::RegisterDependencyConditions *fallThroughConditions = NULL;11584TR::RegisterDependencyConditions *persistentConditions = NULL;11585TR::RegisterDependencyConditions *transientConditions = NULL;1158611587if (fallThroughNode->getNumChildren() != 0)11588{11589GRANode = fallThroughNode->getFirstChild();11590cg->evaluate(GRANode);11591List<TR::Register> popRegisters(cg->trMemory());11592fallThroughConditions = generateRegisterDependencyConditions(GRANode, cg, 0, &popRegisters);11593cg->decReferenceCount(GRANode);11594}1159511596if (persistentFailureNode->getNumChildren() != 0)11597{11598GRANode = persistentFailureNode->getFirstChild();11599cg->evaluate(GRANode);11600List<TR::Register> popRegisters(cg->trMemory());11601persistentConditions = generateRegisterDependencyConditions(GRANode, cg, 0, &popRegisters);11602cg->decReferenceCount(GRANode);11603}1160411605if (transientFailureNode->getNumChildren() != 0)11606{11607GRANode = transientFailureNode->getFirstChild();11608cg->evaluate(GRANode);11609List<TR::Register> popRegisters(cg->trMemory());11610transientConditions = generateRegisterDependencyConditions(GRANode, cg, 0, &popRegisters);11611cg->decReferenceCount(GRANode);11612}1161311614//startLabel11615//add place holder register so that eax would not contain any useful value before xbegin11616TR::Register *dummyReg = cg->allocateRegister();11617dummyReg->setPlaceholderReg();11618TR::RegisterDependencyConditions *startLabelConditions = generateRegisterDependencyConditions((uint8_t)0, 1, cg);11619startLabelConditions->addPostCondition(dummyReg, TR::RealRegister::eax, cg);11620startLabelConditions->stopAddingConditions();11621cg->stopUsingRegister(dummyReg);11622generateLabelInstruction(TR::InstOpCode::label, node, startLabel, startLabelConditions, cg);1162311624//xbegin fall_back_path11625generateLongLabelInstruction(TR::InstOpCode::XBEGIN4, node, fallBackPathLabel, cg);11626//mov monReg, obj+offset11627int32_t lwOffset = cg->fej9()->getByteOffsetToLockword((TR_OpaqueClassBlock *) cg->getMonClass(node));11628TR::MemoryReference *objLockRef = generateX86MemoryReference(objReg, lwOffset, cg);11629if (comp->target().is64Bit() && cg->fej9()->generateCompressedLockWord())11630{11631generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, monReg, objLockRef, cg);11632}11633else11634{11635generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, monReg, objLockRef, cg);11636}1163711638if (comp->target().is64Bit() && cg->fej9()->generateCompressedLockWord())11639{11640generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, monReg, 0, cg);11641}11642else11643{11644generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, monReg, 0, cg);11645}1164611647if (fallThroughConditions)11648generateLabelInstruction(TR::InstOpCode::JE4, node, fallThroughLabel, fallThroughConditions, cg);11649else11650generateLabelInstruction(TR::InstOpCode::JE4, node, fallThroughLabel, cg);1165111652TR::Register *vmThreadReg = cg->getVMThreadRegister();11653if (comp->target().is64Bit() && cg->fej9()->generateCompressedLockWord())11654{11655generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, monReg, vmThreadReg, cg);11656}11657else11658{11659generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, monReg, vmThreadReg, cg);11660}1166111662if (fallThroughConditions)11663generateLabelInstruction(TR::InstOpCode::JE4, node, fallThroughLabel, fallThroughConditions, cg);11664else11665generateLabelInstruction(TR::InstOpCode::JE4, node, fallThroughLabel, cg);1166611667//xabort11668generateImmInstruction(TR::InstOpCode::XABORT, node, 0x01, cg);1166911670cg->stopUsingRegister(monReg);11671//fall_back_path:11672generateLabelInstruction(TR::InstOpCode::label, node, fallBackPathLabel, cg);1167311674endLabelConditions = generateRegisterDependencyConditions((uint8_t)0, 1, cg);11675endLabelConditions->addPostCondition(accReg, TR::RealRegister::eax, cg);11676endLabelConditions->stopAddingConditions();1167711678// test eax, 0x211679generateRegImmInstruction(TR::InstOpCode::TEST1AccImm1, node, accReg, 0x2, cg);11680generateLabelInstruction(TR::InstOpCode::JNE4, node, gotoTransientFailure, cg);1168111682// abort because of nonzero lockword is also transient failure11683generateRegImmInstruction(TR::InstOpCode::TEST4AccImm4, node, accReg, 0x00000001, cg);11684if (persistentConditions)11685generateLabelInstruction(TR::InstOpCode::JE4, node, persistentFailureLabel, persistentConditions, cg);11686else11687generateLabelInstruction(TR::InstOpCode::JE4, node, persistentFailureLabel, cg);1168811689generateRegImmInstruction(TR::InstOpCode::TEST4AccImm4, node, accReg, 0x01000000, cg);11690// je gotransientFailureNodeLabel11691generateLabelInstruction(TR::InstOpCode::JNE4, node, gotoTransientFailure, cg);1169211693if (persistentConditions)11694generateLabelInstruction(TR::InstOpCode::JMP4, node, persistentFailureLabel, persistentConditions, cg);11695else11696generateLabelInstruction(TR::InstOpCode::JMP4, node, persistentFailureLabel, cg);11697cg->stopUsingRegister(accReg);1169811699// gotoTransientFailureLabel:11700if (transientConditions)11701generateLabelInstruction(TR::InstOpCode::label, node, gotoTransientFailure, transientConditions, cg);11702else11703generateLabelInstruction(TR::InstOpCode::label, node, gotoTransientFailure, cg);1170411705//delay11706TR::Register *counterReg = cg->allocateRegister();11707generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, counterReg, 100, cg);11708TR::LabelSymbol *spinLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);11709generateLabelInstruction(TR::InstOpCode::label, node, spinLabel, cg);11710generateInstruction(TR::InstOpCode::PAUSE, node, cg);11711generateInstruction(TR::InstOpCode::PAUSE, node, cg);11712generateInstruction(TR::InstOpCode::PAUSE, node, cg);11713generateInstruction(TR::InstOpCode::PAUSE, node, cg);11714generateInstruction(TR::InstOpCode::PAUSE, node, cg);11715generateRegInstruction(TR::InstOpCode::DEC4Reg, node, counterReg, cg);11716TR::RegisterDependencyConditions *loopConditions = generateRegisterDependencyConditions((uint8_t)0, 1, cg);11717loopConditions->addPostCondition(counterReg, TR::RealRegister::NoReg, cg);11718loopConditions->stopAddingConditions();11719generateLabelInstruction(TR::InstOpCode::JNE4, node, spinLabel, loopConditions, cg);11720cg->stopUsingRegister(counterReg);1172111722if(transientConditions)11723generateLabelInstruction(TR::InstOpCode::JMP4, node, transientFailureLabel, transientConditions, cg);11724else11725generateLabelInstruction(TR::InstOpCode::JMP4, node, transientFailureLabel, cg);1172611727generateLabelInstruction(TR::InstOpCode::label, node, endLabel, endLabelConditions, cg);11728cg->decReferenceCount(objNode);11729cg->decReferenceCount(persistentFailureNode);11730cg->decReferenceCount(transientFailureNode);11731return NULL;11732}1173311734TR::Register *11735J9::X86::TreeEvaluator::tfinishEvaluator(TR::Node *node, TR::CodeGenerator *cg)11736{11737generateInstruction(TR::InstOpCode::XEND, node, cg);11738return NULL;11739}1174011741TR::Register *11742J9::X86::TreeEvaluator::tabortEvaluator(TR::Node *node, TR::CodeGenerator *cg)11743{11744generateImmInstruction(TR::InstOpCode::XABORT, node, 0x04, cg);11745return NULL;11746}1174711748TR::Register *11749J9::X86::TreeEvaluator::directCallEvaluator(TR::Node *node, TR::CodeGenerator *cg)11750{11751static bool useJapaneseCompression = (feGetEnv("TR_JapaneseComp") != NULL);11752TR::Compilation *comp = cg->comp();11753TR::SymbolReference *symRef = node->getSymbolReference();1175411755bool callInlined = false;11756TR::Register *returnRegister = NULL;11757TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol();1175811759#ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION11760if (cg->inlineCryptoMethod(node, returnRegister))11761{11762return returnRegister;11763}11764#endif1176511766if (symbol->isHelper())11767{11768switch (symRef->getReferenceNumber())11769{11770case TR_checkAssignable:11771return TR::TreeEvaluator::checkcastinstanceofEvaluator(node, cg);11772default:11773break;11774}11775}1177611777switch (symbol->getMandatoryRecognizedMethod())11778{11779case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:11780if (!cg->getSupportsInlineStringIndexOf())11781break;11782else11783return inlineIntrinsicIndexOf(node, cg, true);11784case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfUTF16:11785if (!cg->getSupportsInlineStringIndexOf())11786break;11787else11788return inlineIntrinsicIndexOf(node, cg, false);11789case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big:11790case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Little:11791return TR::TreeEvaluator::encodeUTF16Evaluator(node, cg);1179211793case TR::java_lang_String_hashCodeImplDecompressed:11794returnRegister = inlineStringHashCode(node, false, cg);11795callInlined = (returnRegister != NULL);11796break;11797case TR::java_lang_String_hashCodeImplCompressed:11798returnRegister = inlineStringHashCode(node, true, cg);11799callInlined = (returnRegister != NULL);11800break;11801default:11802break;11803}1180411805if (cg->getSupportsInlineStringCaseConversion())11806{11807switch (symbol->getRecognizedMethod())11808{11809case TR::com_ibm_jit_JITHelpers_toUpperIntrinsicUTF16:11810return TR::TreeEvaluator::toUpperIntrinsicUTF16Evaluator(node, cg);11811case TR::com_ibm_jit_JITHelpers_toUpperIntrinsicLatin1:11812return TR::TreeEvaluator::toUpperIntrinsicLatin1Evaluator(node, cg);11813case TR::com_ibm_jit_JITHelpers_toLowerIntrinsicUTF16:11814return TR::TreeEvaluator::toLowerIntrinsicUTF16Evaluator(node, cg);11815case TR::com_ibm_jit_JITHelpers_toLowerIntrinsicLatin1:11816return TR::TreeEvaluator::toLowerIntrinsicLatin1Evaluator(node, cg);11817default:11818break;11819}11820}1182111822switch (symbol->getRecognizedMethod())11823{11824case TR::java_nio_Bits_keepAlive:11825case TR::java_lang_ref_Reference_reachabilityFence:11826{11827TR_ASSERT(node->getNumChildren() == 1, "keepAlive is assumed to have just one argument");1182811829// The only purpose of keepAlive is to prevent an otherwise11830// unreachable object from being garbage collected, because we don't11831// want its finalizer to be called too early. There's no need to11832// generate a full-blown call site just for this purpose.1183311834TR::Register *valueToKeepAlive = cg->evaluate(node->getFirstChild());1183511836// In theory, a value could be kept alive on the stack, rather than in11837// a register. It is unfortunate that the following deps will force11838// the value into a register for no reason. However, in many common11839// cases, this label will have no effect on the generated code, and11840// will only affect GC maps.11841//11842TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)1, (uint8_t)1, cg);11843deps->addPreCondition (valueToKeepAlive, TR::RealRegister::NoReg, cg);11844deps->addPostCondition (valueToKeepAlive, TR::RealRegister::NoReg, cg);11845new (cg->trHeapMemory()) TR::X86LabelInstruction(TR::InstOpCode::label, node, generateLabelSymbol(cg), deps, cg);11846cg->decReferenceCount(node->getFirstChild());1184711848return NULL; // keepAlive has no return value11849}1185011851case TR::java_math_BigDecimal_noLLOverflowAdd:11852case TR::java_math_BigDecimal_noLLOverflowMul:11853if (cg->getSupportsBDLLHardwareOverflowCheck())11854{11855// Eat this call as its only here to anchor where a long lookaside overflow check11856// needs to be done. There should be a TR::icmpeq node following11857// this one where the real overflow check will be inserted.11858//11859cg->recursivelyDecReferenceCount(node->getFirstChild());11860cg->recursivelyDecReferenceCount(node->getSecondChild());11861cg->evaluate(node->getChild(2));11862cg->decReferenceCount(node->getChild(2));11863returnRegister = cg->allocateRegister();11864node->setRegister(returnRegister);11865return returnRegister;11866}1186711868break;11869case TR::java_lang_StringLatin1_inflate:11870if (cg->getSupportsInlineStringLatin1Inflate())11871{11872return TR::TreeEvaluator::inlineStringLatin1Inflate(node, cg);11873}11874break;11875case TR::java_lang_Math_sqrt:11876case TR::java_lang_StrictMath_sqrt:11877case TR::java_lang_System_nanoTime:11878case TR::java_util_concurrent_atomic_Fences_orderAccesses:11879case TR::java_util_concurrent_atomic_Fences_orderReads:11880case TR::java_util_concurrent_atomic_Fences_orderWrites:11881case TR::java_util_concurrent_atomic_Fences_reachabilityFence:11882case TR::sun_nio_ch_NativeThread_current:11883case TR::sun_misc_Unsafe_copyMemory:11884if (TR::TreeEvaluator::VMinlineCallEvaluator(node, false, cg))11885{11886returnRegister = node->getRegister();11887}11888else11889{11890returnRegister = TR::TreeEvaluator::performCall(node, false, true, cg);11891}1189211893callInlined = true;11894break;1189511896case TR::java_lang_String_compress:11897return TR::TreeEvaluator::compressStringEvaluator(node, cg, useJapaneseCompression);1189811899case TR::java_lang_String_compressNoCheck:11900return TR::TreeEvaluator::compressStringNoCheckEvaluator(node, cg, useJapaneseCompression);1190111902case TR::java_lang_String_andOR:11903return TR::TreeEvaluator::andORStringEvaluator(node, cg);1190411905default:11906break;11907}119081190911910// If the method to be called is marked as an inline method, see if it can11911// actually be generated inline.11912//11913if (!callInlined && (symbol->isVMInternalNative() || symbol->isJITInternalNative()))11914{11915if (TR::TreeEvaluator::VMinlineCallEvaluator(node, false, cg))11916return node->getRegister();11917else11918return TR::TreeEvaluator::performCall(node, false, true, cg);11919}11920else if (callInlined)11921{11922return returnRegister;11923}1192411925// Call was not inlined. Delegate to the parent directCallEvaluator.11926//11927return J9::TreeEvaluator::directCallEvaluator(node, cg);11928}1192911930TR::Register *11931J9::X86::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator *cg)11932{11933TR_ASSERT_FATAL(cg->comp()->target().is64Bit(), "StringLatin1.inflate only supported on 64-bit targets");11934TR_ASSERT_FATAL(cg->getSupportsInlineStringLatin1Inflate(), "Inlining of StringLatin1.inflate not supported");11935TR_ASSERT_FATAL(!TR::Compiler->om.canGenerateArraylets(), "StringLatin1.inflate intrinsic is not supported with arraylets");11936TR_ASSERT_FATAL_WITH_NODE(node, node->getNumChildren() == 5, "Wrong number of children in inlineStringLatin1Inflate");1193711938intptr_t headerOffsetConst = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();11939uint8_t vectorLengthConst = 16;1194011941TR::Register *srcBufferReg = cg->evaluate(node->getChild(0));11942TR::Register *srcOffsetReg = cg->gprClobberEvaluate(node->getChild(1), TR::InstOpCode::MOV4RegReg);11943TR::Register *destBufferReg = cg->evaluate(node->getChild(2));11944TR::Register *destOffsetReg = cg->gprClobberEvaluate(node->getChild(3), TR::InstOpCode::MOV4RegReg);11945TR::Register *lengthReg = cg->gprClobberEvaluate(node->getChild(4), TR::InstOpCode::MOV4RegReg);1194611947TR::Register *xmmHighReg = cg->allocateRegister(TR_FPR);11948TR::Register *xmmLowReg = cg->allocateRegister(TR_FPR);11949TR::Register *zeroReg = cg->allocateRegister(TR_FPR);11950TR::Register *scratchReg = cg->allocateRegister(TR_GPR);1195111952int depCount = 9;11953TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, depCount, cg);11954deps->addPostCondition(xmmHighReg, TR::RealRegister::NoReg, cg);11955deps->addPostCondition(xmmLowReg, TR::RealRegister::NoReg, cg);11956deps->addPostCondition(zeroReg, TR::RealRegister::NoReg, cg);11957deps->addPostCondition(lengthReg, TR::RealRegister::NoReg, cg);11958deps->addPostCondition(srcBufferReg, TR::RealRegister::NoReg, cg);11959deps->addPostCondition(destBufferReg, TR::RealRegister::NoReg, cg);11960deps->addPostCondition(scratchReg, TR::RealRegister::eax, cg);11961deps->addPostCondition(srcOffsetReg, TR::RealRegister::ecx, cg);11962deps->addPostCondition(destOffsetReg, TR::RealRegister::edx, cg);11963deps->stopAddingConditions();1196411965TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);11966TR::LabelSymbol *copyResidueLabel = generateLabelSymbol(cg);11967TR::LabelSymbol *afterCopy8Label = generateLabelSymbol(cg);1196811969TR::LabelSymbol *startLabel = generateLabelSymbol(cg);11970startLabel->setStartInternalControlFlow();11971generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);1197211973TR::Node *destOffsetNode = node->getChild(3);1197411975if (!destOffsetNode->isConstZeroValue())11976{11977// dest offset measured in characters, convert it to bytes11978generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, destOffsetReg, destOffsetReg, cg);11979}1198011981generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, lengthReg, lengthReg, cg);11982generateLabelInstruction(TR::InstOpCode::JE4, node, doneLabel, cg);1198311984generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, lengthReg, 8, cg);11985generateLabelInstruction(TR::InstOpCode::JL4, node, afterCopy8Label, cg);1198611987// make sure the register is zero before interleaving11988generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, zeroReg, zeroReg, cg);1198911990TR::LabelSymbol *startLoop = generateLabelSymbol(cg);11991TR::LabelSymbol *endLoop = generateLabelSymbol(cg);1199211993// vectorized add in loop, 16 bytes per iteration11994// use srcOffsetReg for loop counter, add starting offset to lengthReg, subtract 16 (xmm register size)11995// to prevent reading/writing beyond the end of the array11996generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, scratchReg, generateX86MemoryReference(lengthReg, srcOffsetReg, 0, -vectorLengthConst, cg), cg);1199711998generateLabelInstruction(TR::InstOpCode::label, node, startLoop, cg);11999generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, srcOffsetReg, scratchReg, cg);12000generateLabelInstruction(TR::InstOpCode::JG4, node, endLoop, cg);1200112002generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmHighReg, generateX86MemoryReference(srcBufferReg, srcOffsetReg, 0, headerOffsetConst, cg), cg);1200312004generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmLowReg, xmmHighReg, cg);12005generateRegRegInstruction(TR::InstOpCode::PUNPCKHBWRegReg, node, xmmLowReg, zeroReg, cg);12006generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(destBufferReg, destOffsetReg, 0, headerOffsetConst + vectorLengthConst, cg), xmmLowReg, cg);1200712008generateRegRegInstruction(TR::InstOpCode::PUNPCKLBWRegReg, node, xmmHighReg, zeroReg, cg);12009generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(destBufferReg, destOffsetReg, 0, headerOffsetConst, cg), xmmHighReg, cg);1201012011// increase src offset by size of imm register12012// increase dest offset by double, to account for the byte->char inflation12013generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, srcOffsetReg, vectorLengthConst, cg);12014generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, destOffsetReg, 2 * vectorLengthConst, cg);1201512016// LOOP BACK12017generateLabelInstruction(TR::InstOpCode::JMP4, node, startLoop, cg);12018generateLabelInstruction(TR::InstOpCode::label, node, endLoop, cg);1201912020// AND length with 15 to compute residual remainder12021// then copy and interleave 8 bytes from src buffer with 0s into dest buffer if possible12022generateRegImmInstruction(TR::InstOpCode::AND4RegImm4, node, lengthReg, vectorLengthConst - 1, cg);1202312024generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, lengthReg, 8, cg);12025generateLabelInstruction(TR::InstOpCode::JL1, node, afterCopy8Label, cg);1202612027generateRegMemInstruction(TR::InstOpCode::MOVQRegMem, node, xmmLowReg, generateX86MemoryReference(srcBufferReg, srcOffsetReg, 0, headerOffsetConst, cg), cg);12028generateRegRegInstruction(TR::InstOpCode::PUNPCKLBWRegReg, node, xmmLowReg, zeroReg, cg);12029generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(destBufferReg, destOffsetReg, 0, headerOffsetConst, cg), xmmLowReg, cg);12030generateRegImmInstruction(TR::InstOpCode::SUB4RegImm4, node, lengthReg, 8, cg);1203112032generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, srcOffsetReg, 8, cg);12033generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, destOffsetReg, 16, cg);1203412035generateLabelInstruction(TR::InstOpCode::label, node, afterCopy8Label, cg);1203612037// handle residual (< 8 bytes left) & jump to copy instructions based on the number of bytes left12038// calculate how many bytes to skip based on length;1203912040const int copy_instruction_size = 5 // size of MOVZXReg2Mem112041+4; // size of S2MemReg1204212043// since copy_instruction_size could change depending on which registers are allocated to scratchReg, srcOffsetReg and destOffsetReg12044// we reserve them to be eax, ecx, edx, respectively1204512046generateRegRegImmInstruction(TR::InstOpCode::IMUL4RegRegImm4, node, lengthReg, lengthReg, -copy_instruction_size, cg);12047generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, lengthReg, copy_instruction_size * 7, cg);1204812049bool is64bit = cg->comp()->target().is64Bit();12050// calculate address to jump too12051generateRegMemInstruction(TR::InstOpCode::LEARegMem(is64bit), node, scratchReg, generateX86MemoryReference(copyResidueLabel, cg), cg);12052generateRegRegInstruction(TR::InstOpCode::ADDRegReg(is64bit), node, lengthReg, scratchReg, cg);1205312054generateRegMemInstruction(TR::InstOpCode::LEARegMem(is64bit), node, srcOffsetReg, generateX86MemoryReference(srcBufferReg, srcOffsetReg, 0, 0, cg), cg);12055generateRegMemInstruction(TR::InstOpCode::LEARegMem(is64bit), node, destOffsetReg, generateX86MemoryReference(destBufferReg, destOffsetReg, 0, 0, cg), cg);1205612057generateRegInstruction(TR::InstOpCode::JMPReg, node, lengthReg, cg);1205812059generateLabelInstruction(TR::InstOpCode::label, node, copyResidueLabel, cg);1206012061for (int i = 0; i < 7; i++)12062{12063generateRegMemInstruction(TR::InstOpCode::MOVZXReg2Mem1, node, scratchReg, generateX86MemoryReference(srcOffsetReg, headerOffsetConst + 6 - i, cg), cg);12064generateMemRegInstruction(TR::InstOpCode::S2MemReg, node, generateX86MemoryReference(destOffsetReg, headerOffsetConst + 2 * (6 - i), cg), scratchReg, cg);12065}1206612067generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);12068doneLabel->setEndInternalControlFlow();1206912070cg->stopUsingRegister(srcOffsetReg);12071cg->stopUsingRegister(destOffsetReg);12072cg->stopUsingRegister(lengthReg);1207312074cg->stopUsingRegister(xmmHighReg);12075cg->stopUsingRegister(xmmLowReg);12076cg->stopUsingRegister(zeroReg);12077cg->stopUsingRegister(scratchReg);1207812079for (int i = 0; i < 5; i++)12080{12081cg->decReferenceCount(node->getChild(i));12082}1208312084return NULL;12085}1208612087TR::Register *12088J9::X86::TreeEvaluator::encodeUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg)12089{12090// tree looks like:12091// icall com.ibm.jit.JITHelpers.encodeUTF16{Big,Little}()12092// input ptr12093// output ptr12094// input length (in elements)12095// Number of elements translated is returned1209612097TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol();12098bool bigEndian = symbol->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big;1209912100// Set up register dependencies12101const int gprClobberCount = 2;12102const int maxFprClobberCount = 5;12103const int fprClobberCount = bigEndian ? 5 : 4; // xmm4 only needed for big-endian12104TR::Register *srcPtrReg, *dstPtrReg, *lengthReg, *resultReg;12105TR::Register *gprClobbers[gprClobberCount], *fprClobbers[maxFprClobberCount];12106bool killSrc = TR::TreeEvaluator::stopUsingCopyRegAddr(node->getChild(0), srcPtrReg, cg);12107bool killDst = TR::TreeEvaluator::stopUsingCopyRegAddr(node->getChild(1), dstPtrReg, cg);12108bool killLen = TR::TreeEvaluator::stopUsingCopyRegInteger(node->getChild(2), lengthReg, cg);12109resultReg = cg->allocateRegister();12110for (int i = 0; i < gprClobberCount; i++)12111gprClobbers[i] = cg->allocateRegister();12112for (int i = 0; i < fprClobberCount; i++)12113fprClobbers[i] = cg->allocateRegister(TR_FPR);1211412115int depCount = 11;12116TR::RegisterDependencyConditions *deps =12117generateRegisterDependencyConditions((uint8_t)0, depCount, cg);1211812119deps->addPostCondition(srcPtrReg, TR::RealRegister::esi, cg);12120deps->addPostCondition(dstPtrReg, TR::RealRegister::edi, cg);12121deps->addPostCondition(lengthReg, TR::RealRegister::edx, cg);12122deps->addPostCondition(resultReg, TR::RealRegister::eax, cg);1212312124deps->addPostCondition(gprClobbers[0], TR::RealRegister::ecx, cg);12125deps->addPostCondition(gprClobbers[1], TR::RealRegister::ebx, cg);1212612127deps->addPostCondition(fprClobbers[0], TR::RealRegister::xmm0, cg);12128deps->addPostCondition(fprClobbers[1], TR::RealRegister::xmm1, cg);12129deps->addPostCondition(fprClobbers[2], TR::RealRegister::xmm2, cg);12130deps->addPostCondition(fprClobbers[3], TR::RealRegister::xmm3, cg);12131if (bigEndian)12132deps->addPostCondition(fprClobbers[4], TR::RealRegister::xmm4, cg);1213312134deps->stopAddingConditions();1213512136// Generate helper call12137TR_RuntimeHelper helper;12138if (cg->comp()->target().is64Bit())12139helper = bigEndian ? TR_AMD64encodeUTF16Big : TR_AMD64encodeUTF16Little;12140else12141helper = bigEndian ? TR_IA32encodeUTF16Big : TR_IA32encodeUTF16Little;1214212143generateHelperCallInstruction(node, helper, deps, cg);1214412145// Free up registers12146for (int i = 0; i < gprClobberCount; i++)12147cg->stopUsingRegister(gprClobbers[i]);12148for (int i = 0; i < fprClobberCount; i++)12149cg->stopUsingRegister(fprClobbers[i]);1215012151for (uint16_t i = 0; i < node->getNumChildren(); i++)12152cg->decReferenceCount(node->getChild(i));1215312154TR_LiveRegisters *liveRegs = cg->getLiveRegisters(TR_GPR);12155if (killSrc)12156liveRegs->registerIsDead(srcPtrReg);12157if (killDst)12158liveRegs->registerIsDead(dstPtrReg);12159if (killLen)12160liveRegs->registerIsDead(lengthReg);1216112162node->setRegister(resultReg);12163return resultReg;12164}121651216612167TR::Register *12168J9::X86::TreeEvaluator::compressStringEvaluator(12169TR::Node *node,12170TR::CodeGenerator *cg,12171bool japaneseMethod)12172{12173TR::Node *srcObjNode, *dstObjNode, *startNode, *lengthNode;12174TR::Register *srcObjReg, *dstObjReg, *lengthReg, *startReg;12175bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3, stopUsingCopyReg4;1217612177srcObjNode = node->getChild(0);12178dstObjNode = node->getChild(1);12179startNode = node->getChild(2);12180lengthNode = node->getChild(3);1218112182stopUsingCopyReg1 = TR::TreeEvaluator::stopUsingCopyRegAddr(srcObjNode, srcObjReg, cg);12183stopUsingCopyReg2 = TR::TreeEvaluator::stopUsingCopyRegAddr(dstObjNode, dstObjReg, cg);12184stopUsingCopyReg3 = TR::TreeEvaluator::stopUsingCopyRegInteger(startNode, startReg, cg);12185stopUsingCopyReg4 = TR::TreeEvaluator::stopUsingCopyRegInteger(lengthNode, lengthReg, cg);1218612187uintptr_t hdrSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();12188generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, srcObjReg, hdrSize, cg);12189generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, dstObjReg, hdrSize, cg);121901219112192// Now that we have all the registers, set up the dependencies12193TR::RegisterDependencyConditions *dependencies =12194generateRegisterDependencyConditions((uint8_t)0, 6, cg);12195TR::Register *resultReg = cg->allocateRegister();12196TR::Register *dummy = cg->allocateRegister();12197dependencies->addPostCondition(srcObjReg, TR::RealRegister::esi, cg);12198dependencies->addPostCondition(dstObjReg, TR::RealRegister::edi, cg);12199dependencies->addPostCondition(lengthReg, TR::RealRegister::ecx, cg);12200dependencies->addPostCondition(startReg, TR::RealRegister::eax, cg);12201dependencies->addPostCondition(resultReg, TR::RealRegister::edx, cg);12202dependencies->addPostCondition(dummy, TR::RealRegister::ebx, cg);12203dependencies->stopAddingConditions();1220412205TR_RuntimeHelper helper;12206if (cg->comp()->target().is64Bit())12207helper = japaneseMethod ? TR_AMD64compressStringJ : TR_AMD64compressString;12208else12209helper = japaneseMethod ? TR_IA32compressStringJ : TR_IA32compressString;12210generateHelperCallInstruction(node, helper, dependencies, cg);12211cg->stopUsingRegister(dummy);1221212213for (uint16_t i = 0; i < node->getNumChildren(); i++)12214cg->decReferenceCount(node->getChild(i));1221512216if (stopUsingCopyReg1)12217cg->getLiveRegisters(TR_GPR)->registerIsDead(srcObjReg);12218if (stopUsingCopyReg2)12219cg->getLiveRegisters(TR_GPR)->registerIsDead(dstObjReg);12220if (stopUsingCopyReg3)12221cg->getLiveRegisters(TR_GPR)->registerIsDead(startReg);12222if (stopUsingCopyReg4)12223cg->getLiveRegisters(TR_GPR)->registerIsDead(lengthReg);12224node->setRegister(resultReg);12225return resultReg;12226}1222712228/*12229* The CaseConversionManager is used to store info about the conversion. It defines the lower bound and upper bound value depending on12230* whether it's a toLower or toUpper case conversion. It also chooses byte or word data type depending on whether it's compressed string or not.12231* The stringCaseConversionHelper queries the manager for those info when generating the actual instructions.12232*/12233class J9::X86::TreeEvaluator::CaseConversionManager {12234public:12235CaseConversionManager(bool isCompressedString, bool toLowerCase):_isCompressedString(isCompressedString), _toLowerCase(toLowerCase)12236{12237if (isCompressedString)12238{12239static uint8_t UPPERCASE_A_ASCII_MINUS1_bytes[] =12240{12241'A'-1, 'A'-1, 'A'-1, 'A'-1,12242'A'-1, 'A'-1, 'A'-1, 'A'-1,12243'A'-1, 'A'-1, 'A'-1, 'A'-1,12244'A'-1, 'A'-1, 'A'-1, 'A'-112245};1224612247static uint8_t UPPERCASE_Z_ASCII_bytes[] =12248{12249'Z', 'Z', 'Z', 'Z',12250'Z', 'Z', 'Z', 'Z',12251'Z', 'Z', 'Z', 'Z',12252'Z', 'Z', 'Z', 'Z'12253};1225412255static uint8_t LOWERCASE_A_ASCII_MINUS1_bytes[] =12256{12257'a'-1, 'a'-1, 'a'-1, 'a'-1,12258'a'-1, 'a'-1, 'a'-1, 'a'-1,12259'a'-1, 'a'-1, 'a'-1, 'a'-1,12260'a'-1, 'a'-1, 'a'-1, 'a'-112261};1226212263static uint8_t LOWERCASE_Z_ASCII_bytes[] =12264{12265'z', 'z', 'z', 'z',12266'z', 'z', 'z', 'z',12267'z', 'z', 'z', 'z',12268'z', 'z', 'z', 'z',12269};1227012271static uint8_t CONVERSION_DIFF_bytes[] =12272{122730x20, 0x20, 0x20, 0x20,122740x20, 0x20, 0x20, 0x20,122750x20, 0x20, 0x20, 0x20,122760x20, 0x20, 0x20, 0x20,12277};1227812279static uint16_t ASCII_UPPERBND_bytes[] =12280{122810x7f, 0x7f, 0x7f, 0x7f,122820x7f, 0x7f, 0x7f, 0x7f,122830x7f, 0x7f, 0x7f, 0x7f,122840x7f, 0x7f, 0x7f, 0x7f,12285};1228612287if (toLowerCase)12288{12289_lowerBndMinus1 = UPPERCASE_A_ASCII_MINUS1_bytes;12290_upperBnd = UPPERCASE_Z_ASCII_bytes;12291}12292else12293{12294_lowerBndMinus1 = LOWERCASE_A_ASCII_MINUS1_bytes;12295_upperBnd = LOWERCASE_Z_ASCII_bytes;12296}12297_conversionDiff = CONVERSION_DIFF_bytes;12298_asciiMax = ASCII_UPPERBND_bytes;12299}12300else12301{12302static uint16_t UPPERCASE_A_ASCII_MINUS1_words[] =12303{12304'A'-1, 'A'-1, 'A'-1, 'A'-1,12305'A'-1, 'A'-1, 'A'-1, 'A'-112306};1230712308static uint16_t LOWERCASE_A_ASCII_MINUS1_words[] =12309{12310'a'-1, 'a'-1, 'a'-1, 'a'-1,12311'a'-1, 'a'-1, 'a'-1, 'a'-112312};1231312314static uint16_t UPPERCASE_Z_ASCII_words[] =12315{12316'Z', 'Z', 'Z', 'Z',12317'Z', 'Z', 'Z', 'Z'12318};1231912320static uint16_t LOWERCASE_Z_ASCII_words[] =12321{12322'z', 'z', 'z', 'z',12323'z', 'z', 'z', 'z'12324};1232512326static uint16_t CONVERSION_DIFF_words[] =12327{123280x20, 0x20, 0x20, 0x20,123290x20, 0x20, 0x20, 0x2012330};12331static uint16_t ASCII_UPPERBND_words[] =12332{123330x7f, 0x7f, 0x7f, 0x7f,123340x7f, 0x7f, 0x7f, 0x7f12335};1233612337if (toLowerCase)12338{12339_lowerBndMinus1 = UPPERCASE_A_ASCII_MINUS1_words;12340_upperBnd = UPPERCASE_Z_ASCII_words;12341}12342else12343{12344_lowerBndMinus1 = LOWERCASE_A_ASCII_MINUS1_words;12345_upperBnd = LOWERCASE_Z_ASCII_words;12346}12347_conversionDiff = CONVERSION_DIFF_words;12348_asciiMax = ASCII_UPPERBND_words;12349}12350};1235112352inline bool isCompressedString(){return _isCompressedString;};12353inline bool toLowerCase(){return _toLowerCase;};12354inline void * getLowerBndMinus1(){ return _lowerBndMinus1; };12355inline void * getUpperBnd(){ return _upperBnd; };12356inline void * getConversionDiff(){ return _conversionDiff; };12357inline void * getAsciiMax(){ return _asciiMax; };1235812359private:12360void * _lowerBndMinus1;12361void * _upperBnd;12362void * _asciiMax;12363void * _conversionDiff;12364bool _isCompressedString;12365bool _toLowerCase;12366};1236712368TR::Register *12369J9::X86::TreeEvaluator::toUpperIntrinsicLatin1Evaluator(TR::Node *node, TR::CodeGenerator *cg)12370{12371CaseConversionManager manager(true /* isCompressedString */, false /* toLowerCase */);12372return TR::TreeEvaluator::stringCaseConversionHelper(node, cg, manager);12373}123741237512376TR::Register *12377J9::X86::TreeEvaluator::toLowerIntrinsicLatin1Evaluator(TR::Node *node, TR::CodeGenerator *cg)12378{12379CaseConversionManager manager(true/* isCompressedString */, true /* toLowerCase */);12380return TR::TreeEvaluator::stringCaseConversionHelper(node, cg, manager);12381}1238212383TR::Register *12384J9::X86::TreeEvaluator::toUpperIntrinsicUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg)12385{12386CaseConversionManager manager(false /* isCompressedString */, false /* toLowerCase */);12387return TR::TreeEvaluator::stringCaseConversionHelper(node, cg, manager);12388}1238912390TR::Register *12391J9::X86::TreeEvaluator::toLowerIntrinsicUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg)12392{12393CaseConversionManager manager(false /* isCompressedString */, true /* toLowerCase */);12394return TR::TreeEvaluator::stringCaseConversionHelper(node, cg, manager);12395}1239612397static TR::Register* allocateRegAndAddCondition(TR::CodeGenerator *cg, TR::RegisterDependencyConditions * deps, TR_RegisterKinds rk=TR_GPR)12398{12399TR::Register* reg = cg->allocateRegister(rk);12400deps->addPostCondition(reg, TR::RealRegister::NoReg, cg);12401deps->addPreCondition(reg, TR::RealRegister::NoReg, cg);12402return reg;12403}124041240512406/**12407* \brief This evaluator is used to perform string toUpper and toLower conversion.12408*12409* This JIT HW optimized conversion helper is designed to convert strings that contains only ascii characters.12410* If a string contains non ascii characters, HW optimized routine will return NULL and fall back to the software implementation, which is able to convert a broader range of characters.12411*12412* There are the following steps in the generated assembly code:12413* 1. preparation (load value into register, calculate length etc)12414* 2. vectorized case conversion loop12415* 3. handle residue with non vectorized case conversion loop12416* 4. handle invalid case12417*12418* \param node12419* \param cg12420* \param manager Contains info about the conversion: whether it's toUpper or toLower conversion, the valid range of characters, etc12421*12422* This version does not support discontiguous arrays12423*/12424TR::Register *12425J9::X86::TreeEvaluator::stringCaseConversionHelper(TR::Node *node, TR::CodeGenerator *cg, CaseConversionManager &manager)12426{12427#define iComment(str) if (debug) debug->addInstructionComment(cursor, (const_cast<char*>(str)));12428TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)14, (uint8_t)14, cg);12429TR::Register *srcArray = cg->evaluate(node->getChild(1));12430deps->addPostCondition(srcArray, TR::RealRegister::NoReg, cg);12431deps->addPreCondition(srcArray, TR::RealRegister::NoReg, cg);1243212433TR::Register *dstArray = cg->evaluate(node->getChild(2));12434deps->addPostCondition(dstArray, TR::RealRegister::NoReg, cg);12435deps->addPreCondition(dstArray, TR::RealRegister::NoReg, cg);1243612437TR::Register *length = cg->intClobberEvaluate(node->getChild(3));12438deps->addPostCondition(length, TR::RealRegister::NoReg, cg);12439deps->addPreCondition(length, TR::RealRegister::NoReg, cg);1244012441TR::Register* counter = allocateRegAndAddCondition(cg, deps);12442TR::Register* residueStartLength = allocateRegAndAddCondition(cg, deps);12443TR::Register *singleChar = residueStartLength; // residueStartLength and singleChar do not overlap and can share the same register12444TR::Register *result = allocateRegAndAddCondition(cg, deps);1244512446TR::Register* xmmRegLowerBndMinus1 = allocateRegAndAddCondition(cg, deps, TR_FPR); // 'A-1' for toLowerCase, 'a-1' for toUpperCase12447TR::Register* xmmRegUpperBnd = allocateRegAndAddCondition(cg, deps, TR_FPR);// 'Z-1' for toLowerCase, 'z-1' for toUpperCase12448TR::Register* xmmRegConversionDiff = allocateRegAndAddCondition(cg, deps, TR_FPR);12449TR::Register* xmmRegMinus1 = allocateRegAndAddCondition(cg, deps, TR_FPR);12450TR::Register* xmmRegAsciiUpperBnd = allocateRegAndAddCondition(cg, deps, TR_FPR);12451TR::Register* xmmRegArrayContentCopy0 = allocateRegAndAddCondition(cg, deps, TR_FPR);12452TR::Register* xmmRegArrayContentCopy1 = allocateRegAndAddCondition(cg, deps, TR_FPR);12453TR::Register* xmmRegArrayContentCopy2 = allocateRegAndAddCondition(cg, deps, TR_FPR);12454TR_Debug *debug = cg->getDebug();12455TR::Instruction * cursor = NULL;1245612457uint32_t strideSize = 16;12458uintptr_t headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();1245912460static uint16_t MINUS1[] =12461{124620xffff, 0xffff, 0xffff, 0xffff,124630xffff, 0xffff, 0xffff, 0xffff,12464};1246512466TR::LabelSymbol *failLabel = generateLabelSymbol(cg);12467// Under decompressed string case for 32bits platforms, bail out if string is larger than INT_MAX32/2 since # character to # byte12468// conversion will cause overflow.12469if (!cg->comp()->target().is64Bit() && !manager.isCompressedString())12470{12471generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, length, (uint16_t) 0x8000, cg);12472generateLabelInstruction(TR::InstOpCode::JGE4, node, failLabel, cg);12473}1247412475// 1. preparation (load value into registers, calculate length etc)12476auto lowerBndMinus1 = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, manager.getLowerBndMinus1()), cg);12477cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegLowerBndMinus1, lowerBndMinus1, cg); iComment("lower bound ascii value minus one");1247812479auto upperBnd = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, manager.getUpperBnd()), cg);12480cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegUpperBnd, upperBnd, cg); iComment("upper bound ascii value");1248112482auto conversionDiff = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, manager.getConversionDiff()), cg);12483cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegConversionDiff, conversionDiff, cg); iComment("case conversion diff value");1248412485auto minus1 = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, MINUS1), cg);12486cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegMinus1, minus1, cg); iComment("-1");1248712488auto asciiUpperBnd = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, manager.getAsciiMax()), cg);12489cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegAsciiUpperBnd, asciiUpperBnd, cg); iComment("maximum ascii value ");1249012491generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, result, 1, cg);1249212493// initialize the loop counter12494cursor = generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, counter, counter, cg); iComment("initialize loop counter");1249512496//calculate the residueStartLength. Later instructions compare the counter with this length and decide when to jump to the residue handling sequence12497generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, residueStartLength, length, cg);12498generateRegImmInstruction(TR::InstOpCode::SUBRegImms(), node, residueStartLength, strideSize-1, cg);1249912500// 2. vectorized case conversion loop12501TR::LabelSymbol *startLabel = generateLabelSymbol(cg);12502TR::LabelSymbol *endLabel = generateLabelSymbol(cg);12503TR::LabelSymbol *residueStartLabel = generateLabelSymbol(cg);12504TR::LabelSymbol *storeToArrayLabel = generateLabelSymbol(cg);1250512506startLabel->setStartInternalControlFlow();12507endLabel->setEndInternalControlFlow();12508generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);1250912510TR::LabelSymbol *caseConversionMainLoopLabel = generateLabelSymbol(cg);12511generateLabelInstruction(TR::InstOpCode::label, node, caseConversionMainLoopLabel, cg);12512generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, counter, residueStartLength, cg);12513generateLabelInstruction(TR::InstOpCode::JGE4, node, residueStartLabel, cg);1251412515auto srcArrayMemRef = generateX86MemoryReference(srcArray, counter, 0, headerSize, cg);12516generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegArrayContentCopy0, srcArrayMemRef, cg);1251712518//detect invalid characters12519generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmRegArrayContentCopy1, xmmRegArrayContentCopy0, cg);12520generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmRegArrayContentCopy2, xmmRegArrayContentCopy0, cg);12521cursor = generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PCMPGTBRegReg: TR::InstOpCode::PCMPGTWRegReg, node,12522xmmRegArrayContentCopy1, xmmRegMinus1, cg); iComment(" > -1");12523cursor = generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PCMPGTBRegReg: TR::InstOpCode::PCMPGTWRegReg, node,12524xmmRegArrayContentCopy2, xmmRegAsciiUpperBnd, cg); iComment(" > maximum ascii value");12525cursor = generateRegRegInstruction(TR::InstOpCode::PANDNRegReg, node, xmmRegArrayContentCopy2, xmmRegArrayContentCopy1, cg); iComment(" >-1 && !(> maximum ascii value) valid when all bits are set");12526cursor = generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, xmmRegArrayContentCopy2, xmmRegMinus1, cg); iComment("reverse all bits");12527generateRegRegInstruction(TR::InstOpCode::PTESTRegReg, node, xmmRegArrayContentCopy2, xmmRegArrayContentCopy2, cg);12528generateLabelInstruction(TR::InstOpCode::JNE4, node, failLabel, cg); iComment("jump out if invalid chars are detected");1252912530//calculate case conversion with vector registers12531generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmRegArrayContentCopy1, xmmRegArrayContentCopy0, cg);12532generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmRegArrayContentCopy2, xmmRegArrayContentCopy0, cg);12533cursor = generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PCMPGTBRegReg: TR::InstOpCode::PCMPGTWRegReg, node,12534xmmRegArrayContentCopy0, xmmRegLowerBndMinus1, cg); iComment(manager.toLowerCase() ? " > 'A-1'" : "> 'a-1'");12535cursor = generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PCMPGTBRegReg: TR::InstOpCode::PCMPGTWRegReg, node,12536xmmRegArrayContentCopy1, xmmRegUpperBnd, cg); iComment(manager.toLowerCase()? " > 'Z'" : " > 'z'");12537cursor = generateRegRegInstruction(TR::InstOpCode::PANDNRegReg, node, xmmRegArrayContentCopy1, xmmRegArrayContentCopy0, cg); iComment(const_cast<char*> (manager.toLowerCase()? " >='A' && !( >'Z')": " >='a' && !( >'z')"));12538generateRegRegInstruction(TR::InstOpCode::PANDRegReg, node, xmmRegArrayContentCopy1, xmmRegConversionDiff, cg);1253912540if (manager.toLowerCase())12541generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PADDBRegReg: TR::InstOpCode::PADDWRegReg, node,12542xmmRegArrayContentCopy2, xmmRegArrayContentCopy1, cg);12543else12544generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PSUBBRegReg: TR::InstOpCode::PSUBWRegReg, node,12545xmmRegArrayContentCopy2, xmmRegArrayContentCopy1, cg);1254612547auto dstArrayMemRef = generateX86MemoryReference(dstArray, counter, 0, headerSize, cg);12548generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, dstArrayMemRef, xmmRegArrayContentCopy2, cg);12549generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, counter, strideSize, cg);12550generateLabelInstruction(TR::InstOpCode::JMP4, node, caseConversionMainLoopLabel, cg);1255112552// 3. handle residue with non vectorized case conversion loop12553generateLabelInstruction(TR::InstOpCode::label, node, residueStartLabel, cg);12554generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, counter, length, cg);12555generateLabelInstruction(TR::InstOpCode::JGE4, node, endLabel, cg);12556srcArrayMemRef = generateX86MemoryReference(srcArray, counter, 0, headerSize, cg);12557generateRegMemInstruction( manager.isCompressedString()? TR::InstOpCode::MOVZXReg4Mem1: TR::InstOpCode::MOVZXReg4Mem2, node, singleChar, srcArrayMemRef, cg);1255812559// use unsigned compare to detect invalid range12560generateRegImmInstruction(TR::InstOpCode::CMP4RegImms, node, singleChar, 0x7F, cg);12561generateLabelInstruction(TR::InstOpCode::JA4, node, failLabel, cg);1256212563generateRegImmInstruction(TR::InstOpCode::CMP4RegImms, node, singleChar, manager.toLowerCase()? 'A': 'a', cg);12564generateLabelInstruction(TR::InstOpCode::JB4, node, storeToArrayLabel, cg);1256512566generateRegImmInstruction(TR::InstOpCode::CMP4RegImms, node, singleChar, manager.toLowerCase()? 'Z': 'z', cg);12567generateLabelInstruction(TR::InstOpCode::JA4, node, storeToArrayLabel, cg);1256812569if (manager.toLowerCase())12570generateRegMemInstruction(TR::InstOpCode::LEARegMem(),12571node,12572singleChar,12573generateX86MemoryReference(singleChar, 0x20, cg),12574cg);1257512576else generateRegImmInstruction(TR::InstOpCode::SUB4RegImms, node, singleChar, 0x20, cg);1257712578generateLabelInstruction(TR::InstOpCode::label, node, storeToArrayLabel, cg);1257912580dstArrayMemRef = generateX86MemoryReference(dstArray, counter, 0, headerSize, cg);12581generateMemRegInstruction(manager.isCompressedString()? TR::InstOpCode::S1MemReg: TR::InstOpCode::S2MemReg, node, dstArrayMemRef, singleChar, cg);12582generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, counter, manager.isCompressedString()? 1: 2, cg);12583generateLabelInstruction(TR::InstOpCode::JMP4, node, residueStartLabel, cg);1258412585// 4. handle invalid case12586generateLabelInstruction(TR::InstOpCode::label, node, failLabel, cg);12587generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, result, result, cg);1258812589generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);12590node->setRegister(result);1259112592cg->stopUsingRegister(length);12593cg->stopUsingRegister(counter);12594cg->stopUsingRegister(residueStartLength);1259512596cg->stopUsingRegister(xmmRegLowerBndMinus1);12597cg->stopUsingRegister(xmmRegUpperBnd);12598cg->stopUsingRegister(xmmRegConversionDiff);12599cg->stopUsingRegister(xmmRegMinus1);12600cg->stopUsingRegister(xmmRegAsciiUpperBnd);12601cg->stopUsingRegister(xmmRegArrayContentCopy0);12602cg->stopUsingRegister(xmmRegArrayContentCopy1);12603cg->stopUsingRegister(xmmRegArrayContentCopy2);126041260512606cg->decReferenceCount(node->getChild(0));12607cg->decReferenceCount(node->getChild(1));12608cg->decReferenceCount(node->getChild(2));12609cg->decReferenceCount(node->getChild(3));12610return result;12611}1261212613TR::Register *12614J9::X86::TreeEvaluator::compressStringNoCheckEvaluator(12615TR::Node *node,12616TR::CodeGenerator *cg,12617bool japaneseMethod)12618{12619TR::Node *srcObjNode, *dstObjNode, *startNode, *lengthNode;12620TR::Register *srcObjReg, *dstObjReg, *lengthReg, *startReg;12621bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3, stopUsingCopyReg4;1262212623srcObjNode = node->getChild(0);12624dstObjNode = node->getChild(1);12625startNode = node->getChild(2);12626lengthNode = node->getChild(3);1262712628stopUsingCopyReg1 = TR::TreeEvaluator::stopUsingCopyRegAddr(srcObjNode, srcObjReg, cg);12629stopUsingCopyReg2 = TR::TreeEvaluator::stopUsingCopyRegAddr(dstObjNode, dstObjReg, cg);12630stopUsingCopyReg3 = TR::TreeEvaluator::stopUsingCopyRegInteger(startNode, startReg, cg);12631stopUsingCopyReg4 = TR::TreeEvaluator::stopUsingCopyRegInteger(lengthNode, lengthReg, cg);1263212633uintptr_t hdrSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();12634generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, srcObjReg, hdrSize, cg);12635generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, dstObjReg, hdrSize, cg);126361263712638// Now that we have all the registers, set up the dependencies12639TR::RegisterDependencyConditions *dependencies =12640generateRegisterDependencyConditions((uint8_t)0, 5, cg);12641dependencies->addPostCondition(srcObjReg, TR::RealRegister::esi, cg);12642dependencies->addPostCondition(dstObjReg, TR::RealRegister::edi, cg);12643dependencies->addPostCondition(lengthReg, TR::RealRegister::ecx, cg);12644dependencies->addPostCondition(startReg, TR::RealRegister::eax, cg);12645TR::Register *dummy = cg->allocateRegister();12646dependencies->addPostCondition(dummy, TR::RealRegister::ebx, cg);12647dependencies->stopAddingConditions();1264812649TR_RuntimeHelper helper;12650if (cg->comp()->target().is64Bit())12651helper = japaneseMethod ? TR_AMD64compressStringNoCheckJ : TR_AMD64compressStringNoCheck;12652else12653helper = japaneseMethod ? TR_IA32compressStringNoCheckJ : TR_IA32compressStringNoCheck;1265412655generateHelperCallInstruction(node, helper, dependencies, cg);12656cg->stopUsingRegister(dummy);1265712658for (uint16_t i = 0; i < node->getNumChildren(); i++)12659cg->decReferenceCount(node->getChild(i));1266012661if (stopUsingCopyReg1)12662cg->getLiveRegisters(TR_GPR)->registerIsDead(srcObjReg);12663if (stopUsingCopyReg2)12664cg->getLiveRegisters(TR_GPR)->registerIsDead(dstObjReg);12665if (stopUsingCopyReg3)12666cg->getLiveRegisters(TR_GPR)->registerIsDead(startReg);12667if (stopUsingCopyReg4)12668cg->getLiveRegisters(TR_GPR)->registerIsDead(lengthReg);12669return NULL;12670}126711267212673TR::Register *12674J9::X86::TreeEvaluator::andORStringEvaluator(TR::Node *node, TR::CodeGenerator *cg)12675{12676TR::Node *srcObjNode, *startNode, *lengthNode;12677TR::Register *srcObjReg, *lengthReg, *startReg;12678bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3;1267912680srcObjNode = node->getChild(0);12681startNode = node->getChild(1);12682lengthNode = node->getChild(2);1268312684stopUsingCopyReg1 = TR::TreeEvaluator::stopUsingCopyRegAddr(srcObjNode, srcObjReg, cg);12685stopUsingCopyReg2 = TR::TreeEvaluator::stopUsingCopyRegInteger(startNode, startReg, cg);12686stopUsingCopyReg3 = TR::TreeEvaluator::stopUsingCopyRegInteger(lengthNode, lengthReg, cg);1268712688uintptr_t hdrSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();12689generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, srcObjReg, hdrSize, cg);1269012691// Now that we have all the registers, set up the dependencies12692TR::RegisterDependencyConditions *dependencies =12693generateRegisterDependencyConditions((uint8_t)0, 5, cg);12694TR::Register *resultReg = cg->allocateRegister();12695dependencies->addPostCondition(srcObjReg, TR::RealRegister::esi, cg);12696dependencies->addPostCondition(lengthReg, TR::RealRegister::ecx, cg);12697dependencies->addPostCondition(startReg, TR::RealRegister::eax, cg);12698dependencies->addPostCondition(resultReg, TR::RealRegister::edx, cg);12699TR::Register *dummy = cg->allocateRegister();12700dependencies->addPostCondition(dummy, TR::RealRegister::ebx, cg);12701dependencies->stopAddingConditions();1270212703TR_RuntimeHelper helper =12704cg->comp()->target().is64Bit() ? TR_AMD64andORString : TR_IA32andORString;12705generateHelperCallInstruction(node, helper, dependencies, cg);12706cg->stopUsingRegister(dummy);1270712708for (uint16_t i = 0; i < node->getNumChildren(); i++)12709cg->decReferenceCount(node->getChild(i));1271012711if (stopUsingCopyReg1)12712cg->getLiveRegisters(TR_GPR)->registerIsDead(srcObjReg);12713if (stopUsingCopyReg2)12714cg->getLiveRegisters(TR_GPR)->registerIsDead(startReg);12715if (stopUsingCopyReg3)12716cg->getLiveRegisters(TR_GPR)->registerIsDead(lengthReg);12717node->setRegister(resultReg);12718return resultReg;12719}1272012721/*12722*12723* Generates instructions to fill in the J9JITWatchedStaticFieldData.fieldAddress, J9JITWatchedStaticFieldData.fieldClass for static fields,12724* and J9JITWatchedInstanceFieldData.offset for instance fields at runtime. Used for fieldwatch support.12725* Fill in the J9JITWatchedStaticFieldData.fieldAddress, J9JITWatchedStaticFieldData.fieldClass for static field12726* and J9JITWatchedInstanceFieldData.offset for instance field12727*12728* cmp J9JITWatchedStaticFieldData.fieldAddress / J9JITWatchedInstanceFieldData.offset, -112729* je unresolvedLabel12730* restart Label:12731* ....12732*12733* unresolvedLabel:12734* mov J9JITWatchedStaticFieldData.fieldClass J9Class (static field only)12735* call helper12736* mov J9JITWatchedStaticFieldData.fieldAddress / J9JITWatchedInstanceFieldData.offset resultReg12737* jmp restartLabel12738*/12739void12740J9::X86::TreeEvaluator::generateFillInDataBlockSequenceForUnresolvedField (TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *dataSnippetRegister)12741{12742TR::Compilation *comp = cg->comp();12743TR::SymbolReference *symRef = node->getSymbolReference();12744bool is64Bit = comp->target().is64Bit();12745bool isStatic = symRef->getSymbol()->getKind() == TR::Symbol::IsStatic;12746TR_RuntimeHelper helperIndex = isWrite? (isStatic ? TR_jitResolveStaticFieldSetterDirect: TR_jitResolveFieldSetterDirect):12747(isStatic ? TR_jitResolveStaticFieldDirect: TR_jitResolveFieldDirect);12748TR::Linkage *linkage = cg->getLinkage(runtimeHelperLinkage(helperIndex));12749auto linkageProperties = linkage->getProperties();12750intptr_t offsetInDataBlock = isStatic ? offsetof(J9JITWatchedStaticFieldData, fieldAddress): offsetof(J9JITWatchedInstanceFieldData, offset);1275112752TR::LabelSymbol* startLabel = generateLabelSymbol(cg);12753TR::LabelSymbol* endLabel = generateLabelSymbol(cg);12754TR::LabelSymbol* unresolveLabel = generateLabelSymbol(cg);12755startLabel->setStartInternalControlFlow();12756endLabel->setEndInternalControlFlow();1275712758// 64bit needs 2 argument registers (return register and first argument are the same),12759// 32bit only one return register12760// both 64/32bits need dataBlockReg12761uint8_t numOfConditions = is64Bit ? 3: 2;12762if (isStatic) // needs fieldClassReg12763{12764numOfConditions++;12765}12766TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions(numOfConditions, numOfConditions, cg);12767TR::Register *resultReg = NULL;12768TR::Register *dataBlockReg = cg->allocateRegister();12769deps->addPreCondition(dataBlockReg, TR::RealRegister::NoReg, cg);12770deps->addPostCondition(dataBlockReg, TR::RealRegister::NoReg, cg);1277112772generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);12773generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, dataBlockReg, generateX86MemoryReference(dataSnippet->getSnippetLabel(), cg), cg);12774generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, generateX86MemoryReference(dataBlockReg, offsetInDataBlock, cg), -1, cg);12775generateLabelInstruction(TR::InstOpCode::JE4, node, unresolveLabel, cg);1277612777{12778TR_OutlinedInstructionsGenerator og(unresolveLabel, node ,cg);12779if (isStatic)12780{12781// Fills in J9JITWatchedStaticFieldData.fieldClass12782TR::Register *fieldClassReg;12783if (isWrite)12784{12785fieldClassReg = cg->allocateRegister();12786generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, fieldClassReg, generateX86MemoryReference(sideEffectRegister, comp->fej9()->getOffsetOfClassFromJavaLangClassField(), cg), cg);12787}12788else12789{12790fieldClassReg = sideEffectRegister;12791}12792generateMemRegInstruction(TR::InstOpCode::SMemReg(is64Bit), node, generateX86MemoryReference(dataBlockReg, (intptr_t)(offsetof(J9JITWatchedStaticFieldData, fieldClass)), cg), fieldClassReg, cg);12793deps->addPreCondition(fieldClassReg, TR::RealRegister::NoReg, cg);12794deps->addPostCondition(fieldClassReg, TR::RealRegister::NoReg, cg);12795if (isWrite)12796{12797cg->stopUsingRegister(fieldClassReg);12798}12799}1280012801TR::ResolvedMethodSymbol *methodSymbol = node->getByteCodeInfo().getCallerIndex() == -1 ? comp->getMethodSymbol(): comp->getInlinedResolvedMethodSymbol(node->getByteCodeInfo().getCallerIndex());12802if (is64Bit)12803{12804TR::Register *cpAddressReg = cg->allocateRegister();12805TR::Register *cpIndexReg = cg->allocateRegister();12806generateRegImm64SymInstruction(TR::InstOpCode::MOV8RegImm64, node, cpAddressReg, (uintptr_t) methodSymbol->getResolvedMethod()->constantPool(), comp->getSymRefTab()->findOrCreateConstantPoolAddressSymbolRef(methodSymbol), cg);12807generateRegImmInstruction(TR::InstOpCode::MOV8RegImm4, node, cpIndexReg, symRef->getCPIndex(), cg);12808deps->addPreCondition(cpAddressReg, linkageProperties.getArgumentRegister(0, false /* isFloat */), cg);12809deps->addPostCondition(cpAddressReg, linkageProperties.getArgumentRegister(0, false /* isFloat */), cg);12810deps->addPreCondition(cpIndexReg, linkageProperties.getArgumentRegister(1, false /* isFloat */), cg);12811deps->addPostCondition(cpIndexReg, linkageProperties.getArgumentRegister(1, false /* isFloat */), cg);12812cg->stopUsingRegister(cpIndexReg);12813resultReg = cpAddressReg; // for 64bit private linkage both the first argument reg and the return reg are rax12814}12815else12816{12817generateImmInstruction(TR::InstOpCode::PUSHImm4, node, symRef->getCPIndex(), cg);12818generateImmSymInstruction(TR::InstOpCode::PUSHImm4, node, (uintptr_t) methodSymbol->getResolvedMethod()->constantPool(), comp->getSymRefTab()->findOrCreateConstantPoolAddressSymbolRef(methodSymbol), cg);12819resultReg = cg->allocateRegister();12820deps->addPreCondition(resultReg, linkageProperties.getIntegerReturnRegister(), cg);12821deps->addPostCondition(resultReg, linkageProperties.getIntegerReturnRegister(), cg);12822}12823TR::Instruction *call = generateHelperCallInstruction(node, helperIndex, NULL, cg);12824call->setNeedsGCMap(0xFF00FFFF);1282512826/*12827For instance field offset, the result returned by the vmhelper includes header size.12828subtract the header size to get the offset needed by field watch helpers12829*/12830if (!isStatic)12831{12832generateRegImmInstruction(TR::InstOpCode::SubRegImm4(is64Bit, false /*isWithBorrow*/), node, resultReg, TR::Compiler->om.objectHeaderSizeInBytes(), cg);12833}1283412835//store result into J9JITWatchedStaticFieldData.fieldAddress / J9JITWatchedInstanceFieldData.offset12836generateMemRegInstruction(TR::InstOpCode::SMemReg(is64Bit), node, generateX86MemoryReference(dataBlockReg, offsetInDataBlock, cg), resultReg, cg);12837generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);1283812839og.endOutlinedInstructionSequence();12840}1284112842deps->stopAddingConditions();12843generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);12844cg->stopUsingRegister(dataBlockReg);12845cg->stopUsingRegister(resultReg);12846}1284712848/*12849* Generate the reporting field access helper call with required arguments12850*12851* jitReportInstanceFieldRead12852* arg1 pointer to static data block12853* arg2 object being read12854*12855* jitReportInstanceFieldWrite12856* arg1 pointer to static data block12857* arg2 object being written to (represented by sideEffectRegister)12858* arg3 pointer to value being written12859*12860* jitReportStaticFieldRead12861* arg1 pointer to static data block12862*12863* jitReportStaticFieldWrite12864* arg1 pointer to static data block12865* arg2 pointer to value being written12866*12867*/12868void generateReportFieldAccessOutlinedInstructions(TR::Node *node, TR::LabelSymbol *endLabel, TR::Snippet *dataSnippet, bool isWrite, TR::RegisterDependencyConditions *deps, TR::CodeGenerator *cg, TR::Register *sideEffectRegister, TR::Register *valueReg)12869{12870bool is64Bit = cg->comp()->target().is64Bit();12871bool isInstanceField = node->getSymbolReference()->getSymbol()->getKind() != TR::Symbol::IsStatic;12872J9Method *owningMethod = (J9Method *)node->getOwningMethod();1287312874TR_RuntimeHelper helperIndex = isWrite ? (isInstanceField ? TR_jitReportInstanceFieldWrite: TR_jitReportStaticFieldWrite):12875(isInstanceField ? TR_jitReportInstanceFieldRead: TR_jitReportStaticFieldRead);1287612877TR::Linkage *linkage = cg->getLinkage(runtimeHelperLinkage(helperIndex));12878auto linkageProperties = linkage->getProperties();1287912880TR::Register *valueReferenceReg = NULL;12881TR::MemoryReference *valueMR = NULL;12882TR::Register *dataBlockReg = cg->allocateRegister();12883bool reuseValueReg = false;1288412885/*12886* For reporting field write, reference to the valueNode (valueNode is evaluated in valueReg) is needed so we need to store12887* the value on to a stack location first and pass the stack location address as an arguement12888* to the VM helper12889*/12890if (isWrite)12891{12892valueMR = cg->machine()->getDummyLocalMR(node->getType());12893if (!valueReg->getRegisterPair())12894{12895if (valueReg->getKind() == TR_GPR)12896{12897TR::AutomaticSymbol *autoSymbol = valueMR->getSymbolReference().getSymbol()->getAutoSymbol();12898generateMemRegInstruction(TR::InstOpCode::SMemReg(autoSymbol->getRoundedSize() == 8), node, valueMR, valueReg, cg);12899}12900else if (valueReg->isSinglePrecision())12901generateMemRegInstruction(TR::InstOpCode::MOVSSMemReg, node, valueMR, valueReg, cg);12902else12903generateMemRegInstruction(TR::InstOpCode::MOVSDMemReg, node, valueMR, valueReg, cg);12904// valueReg and valueReferenceReg are different. Add conditions for valueReg here12905deps->addPreCondition(valueReg, TR::RealRegister::NoReg, cg);12906deps->addPostCondition(valueReg, TR::RealRegister::NoReg, cg);12907valueReferenceReg = cg->allocateRegister();12908}12909else12910{ // 32bit long12911generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, valueMR, valueReg->getLowOrder(), cg);12912generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(*valueMR, 4, cg), valueReg->getHighOrder(), cg);1291312914// Add the dependency for higher half register here12915deps->addPostCondition(valueReg->getHighOrder(), TR::RealRegister::NoReg, cg);12916deps->addPreCondition(valueReg->getHighOrder(), TR::RealRegister::NoReg, cg);1291712918// on 32bit reuse lower half register to save one register12919// lower half register dependency will be added later when using as valueReferenceReg and a call argument12920// to keep consistency with the other call arguments12921valueReferenceReg = valueReg->getLowOrder();12922reuseValueReg = true;12923}1292412925//store the stack location into a register12926generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, valueReferenceReg, valueMR, cg);12927}1292812929generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, dataBlockReg, generateX86MemoryReference(dataSnippet->getSnippetLabel(), cg), cg);12930int numArgs = 0;12931if (is64Bit)12932{12933deps->addPreCondition(dataBlockReg, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);12934deps->addPostCondition(dataBlockReg, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);12935numArgs++;1293612937if (isInstanceField)12938{12939deps->addPreCondition(sideEffectRegister, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);12940deps->addPostCondition(sideEffectRegister, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);12941numArgs++;12942}1294312944if (isWrite)12945{12946deps->addPreCondition(valueReferenceReg, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);12947deps->addPostCondition(valueReferenceReg, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);12948}12949}12950else12951{12952if (isWrite)12953{12954generateRegInstruction(TR::InstOpCode::PUSHReg, node, valueReferenceReg, cg);12955deps->addPostCondition(valueReferenceReg, TR::RealRegister::NoReg, cg);12956deps->addPreCondition(valueReferenceReg, TR::RealRegister::NoReg, cg);12957}1295812959if (isInstanceField)12960{12961generateRegInstruction(TR::InstOpCode::PUSHReg, node, sideEffectRegister, cg);12962deps->addPreCondition(sideEffectRegister, TR::RealRegister::NoReg, cg);12963deps->addPostCondition(sideEffectRegister, TR::RealRegister::NoReg, cg);12964}12965generateRegInstruction(TR::InstOpCode::PUSHReg, node, dataBlockReg, cg);12966deps->addPreCondition(dataBlockReg, TR::RealRegister::NoReg, cg);12967deps->addPostCondition(dataBlockReg, TR::RealRegister::NoReg, cg);12968}1296912970TR::Instruction *call = generateHelperCallInstruction(node, helperIndex, NULL, cg);12971call->setNeedsGCMap(0xFF00FFFF);12972// Restore the value of lower part register12973if (isWrite && valueReg->getRegisterPair() && valueReg->getKind() == TR_GPR)12974generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, valueReg->getLowOrder(), valueMR, cg);12975if (!reuseValueReg)12976cg->stopUsingRegister(valueReferenceReg);12977generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);12978cg->stopUsingRegister(dataBlockReg);12979}1298012981/*12982* Get the number of register dependencies needed to generate the out-of-line sequence reporting field accesses12983*/12984static uint8_t getNumOfConditionsForReportFieldAccess(TR::Node *node, bool isResolved, bool isWrite, bool isInstanceField, TR::CodeGenerator *cg)12985{12986uint8_t numOfConditions = 1; // 1st arg is always the data block12987if (!isResolved || isInstanceField || cg->needClassAndMethodPointerRelocations())12988numOfConditions = numOfConditions+1; // classReg is needed in both cases.12989if (isWrite)12990{12991/* Field write report needs12992* a) value being written12993* b) the reference to the value being written12994*12995* The following cases are considered12996* 1. For 32bits using register pair(long), the valueReg is actually 2 registers,12997* and valueReferenceReg reuses one reg in valueReg to avoid running out of registers on 32bits12998* 2. For 32bits and 64bits no register pair, valueReferenceReg and valueReg are 2 different registers12999*/13000numOfConditions = numOfConditions + 2 ;13001}13002if (isInstanceField)13003numOfConditions = numOfConditions+1; // Instance field report needs the base object13004return numOfConditions;13005}1300613007void13008J9::X86::TreeEvaluator::generateTestAndReportFieldWatchInstructions(TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *valueReg, TR::Register *dataSnippetRegister)13009{13010bool isResolved = !node->getSymbolReference()->isUnresolved();13011TR::LabelSymbol* startLabel = generateLabelSymbol(cg);13012TR::LabelSymbol* endLabel = generateLabelSymbol(cg);13013TR::LabelSymbol* fieldReportLabel = generateLabelSymbol(cg);13014startLabel->setStartInternalControlFlow();13015endLabel->setEndInternalControlFlow();1301613017generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);1301813019TR::Register *fieldClassReg = NULL;13020TR::MemoryReference *classFlagsMemRef = NULL;13021TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());13022bool isInstanceField = node->getOpCode().isIndirect();13023bool fieldClassNeedsRelocation = cg->needClassAndMethodPointerRelocations();1302413025if (isInstanceField)13026{13027TR::Register *objReg = sideEffectRegister;13028fieldClassReg = cg->allocateRegister();13029generateLoadJ9Class(node, fieldClassReg, objReg, cg);13030classFlagsMemRef = generateX86MemoryReference(fieldClassReg, (uintptr_t)(fej9->getOffsetOfClassFlags()), cg);13031}13032else13033{13034if (isResolved)13035{13036if (!fieldClassNeedsRelocation)13037{13038// For non-AOT (JIT and JITServer) compiles we don't need to use sideEffectRegister here as the class information is available to us at compile time.13039J9Class *fieldClass = static_cast<TR::J9WatchedStaticFieldSnippet *>(dataSnippet)->getFieldClass();13040classFlagsMemRef = generateX86MemoryReference((uintptr_t)fieldClass + fej9->getOffsetOfClassFlags(), cg);13041}13042else13043{13044// If this is an AOT compile, we generate instructions to load the fieldClass directly from the snippet because the fieldClass in an AOT body will be invalid13045// if we load using the dataSnippet's helper query at compile time.13046fieldClassReg = cg->allocateRegister();13047generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, fieldClassReg, generateX86MemoryReference(dataSnippet->getSnippetLabel(), cg), cg);13048generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, fieldClassReg, generateX86MemoryReference(fieldClassReg, offsetof(J9JITWatchedStaticFieldData, fieldClass), cg), cg);13049classFlagsMemRef = generateX86MemoryReference(fieldClassReg, fej9->getOffsetOfClassFlags(), cg);13050}13051}13052else13053{13054if (isWrite)13055{13056fieldClassReg = cg->allocateRegister();13057generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, fieldClassReg, generateX86MemoryReference(sideEffectRegister, fej9->getOffsetOfClassFromJavaLangClassField(), cg), cg);13058}13059else13060{13061fieldClassReg = sideEffectRegister;13062}13063classFlagsMemRef = generateX86MemoryReference(fieldClassReg, fej9->getOffsetOfClassFlags(), cg);13064}13065}1306613067generateMemImmInstruction(TR::InstOpCode::TEST2MemImm2, node, classFlagsMemRef, J9ClassHasWatchedFields, cg);13068generateLabelInstruction(TR::InstOpCode::JNE4, node, fieldReportLabel, cg);1306913070uint8_t numOfConditions = getNumOfConditionsForReportFieldAccess(node, !node->getSymbolReference()->isUnresolved(), isWrite, isInstanceField, cg);13071TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions(numOfConditions, numOfConditions, cg);13072if (isInstanceField || !isResolved || fieldClassNeedsRelocation)13073{13074deps->addPreCondition(fieldClassReg, TR::RealRegister::NoReg, cg);13075deps->addPostCondition(fieldClassReg, TR::RealRegister::NoReg, cg);13076}1307713078{13079TR_OutlinedInstructionsGenerator og(fieldReportLabel, node ,cg);13080generateReportFieldAccessOutlinedInstructions(node, endLabel, dataSnippet, isWrite, deps, cg, sideEffectRegister, valueReg);13081og.endOutlinedInstructionSequence();13082}13083deps->stopAddingConditions();13084generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);1308513086if (isInstanceField || (!isResolved && isWrite) || fieldClassNeedsRelocation)13087{13088cg->stopUsingRegister(fieldClassReg);13089}13090}1309113092TR::Register *13093J9::X86::TreeEvaluator::generateConcurrentScavengeSequence(TR::Node *node, TR::CodeGenerator *cg)13094{13095TR::Register* object = TR::TreeEvaluator::performHeapLoadWithReadBarrier(node, cg);1309613097if (!node->getSymbolReference()->isUnresolved() &&13098(node->getSymbolReference()->getSymbol()->getKind() == TR::Symbol::IsShadow) &&13099(node->getSymbolReference()->getCPIndex() >= 0) &&13100(cg->comp()->getMethodHotness()>=scorching))13101{13102int32_t len;13103const char *fieldName = node->getSymbolReference()->getOwningMethod(cg->comp())->fieldSignatureChars(13104node->getSymbolReference()->getCPIndex(), len);1310513106if (fieldName && strstr(fieldName, "Ljava/lang/String;"))13107{13108generateMemInstruction(TR::InstOpCode::PREFETCHT0, node, generateX86MemoryReference(object, 0, cg), cg);13109}13110}13111return object;13112}1311313114TR::Register *13115J9::X86::TreeEvaluator::irdbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)13116{13117// For rdbar and wrtbar nodes we first evaluate the children we need to13118// handle the side effects. Then we delegate the evaluation of the remaining13119// children and the load/store operation to the appropriate load/store evaluator.13120TR::Node *sideEffectNode = node->getFirstChild();13121TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);1312213123if (cg->comp()->getOption(TR_EnableFieldWatch))13124{13125TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);13126}1312713128TR::Register *resultReg = NULL;13129// Perform regular load if no rdbar required.13130if (TR::Compiler->om.readBarrierType() == gc_modron_readbar_none)13131{13132resultReg = TR::TreeEvaluator::iloadEvaluator(node, cg);13133}13134else13135{13136if (cg->comp()->useCompressedPointers() &&13137(node->getOpCode().hasSymbolReference() &&13138node->getSymbolReference()->getSymbol()->getDataType() == TR::Address))13139{13140resultReg = TR::TreeEvaluator::generateConcurrentScavengeSequence(node, cg);13141node->setRegister(resultReg);13142}13143}1314413145// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the13146// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double13147// decrementing the node we skip doing it here and let the load evaluator do it.13148return resultReg;13149}1315013151TR::Register *13152J9::X86::TreeEvaluator::irdbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)13153{13154// For rdbar and wrtbar nodes we first evaluate the children we need to13155// handle the side effects. Then we delegate the evaluation of the remaining13156// children and the load/store operation to the appropriate load/store evaluator.13157TR::Node *sideEffectNode = node->getFirstChild();13158TR::Register * sideEffectRegister = cg->evaluate(sideEffectNode);1315913160if (cg->comp()->getOption(TR_EnableFieldWatch))13161{13162TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);13163}1316413165cg->decReferenceCount(sideEffectNode);13166return TR::TreeEvaluator::iloadEvaluator(node, cg);13167}1316813169TR::Register *13170J9::X86::TreeEvaluator::ardbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)13171{13172// For rdbar and wrtbar nodes we first evaluate the children we need to13173// handle the side effects. Then we delegate the evaluation of the remaining13174// children and the load/store operation to the appropriate load/store evaluator.13175TR::Node *sideEffectNode = node->getFirstChild();13176TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);1317713178if (cg->comp()->getOption(TR_EnableFieldWatch))13179{13180TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);13181}1318213183cg->decReferenceCount(sideEffectNode);13184return TR::TreeEvaluator::aloadEvaluator(node, cg);13185}1318613187TR::Register *13188J9::X86::TreeEvaluator::ardbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)13189{13190// For rdbar and wrtbar nodes we first evaluate the children we need to13191// handle the side effects. Then we delegate the evaluation of the remaining13192// children and the load/store operation to the appropriate load/store evaluator.13193TR::Register *sideEffectRegister = cg->evaluate(node->getFirstChild());1319413195if (cg->comp()->getOption(TR_EnableFieldWatch))13196{13197TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);13198}1319913200TR::Register *resultReg = NULL;13201// Perform regular load if no rdbar required.13202if (TR::Compiler->om.readBarrierType() == gc_modron_readbar_none)13203{13204resultReg = TR::TreeEvaluator::aloadEvaluator(node, cg);13205}13206else13207{13208resultReg = TR::TreeEvaluator::generateConcurrentScavengeSequence(node, cg);13209resultReg->setContainsCollectedReference();13210node->setRegister(resultReg);13211}1321213213// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the13214// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double13215// decrementing the node we skip doing it here and let the load evaluator do it.13216return resultReg;13217}1321813219TR::Register *J9::X86::TreeEvaluator::fwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)13220{13221// For rdbar and wrtbar nodes we first evaluate the children we need to13222// handle the side effects. Then we delegate the evaluation of the remaining13223// children and the load/store operation to the appropriate load/store evaluator.13224TR::Register *valueReg = cg->evaluate(node->getFirstChild());13225TR::Node *sideEffectNode = node->getSecondChild();13226TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);1322713228if (cg->comp()->getOption(TR_EnableFieldWatch))13229{13230TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);13231}1323213233// Note: The reference count for valueReg's node is not decremented here because the13234// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here13235// to avoid double decrementing.13236cg->decReferenceCount(sideEffectNode);13237return TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg);13238}1323913240TR::Register *J9::X86::TreeEvaluator::fwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)13241{13242// For rdbar and wrtbar nodes we first evaluate the children we need to13243// handle the side effects. Then we delegate the evaluation of the remaining13244// children and the load/store operation to the appropriate load/store evaluator.13245TR::Register *valueReg = cg->evaluate(node->getSecondChild());13246TR::Node *sideEffectNode = node->getThirdChild();13247TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);1324813249if (cg->comp()->getOption(TR_EnableFieldWatch))13250{13251TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);13252}1325313254// Note: The reference count for valueReg's node is not decremented here because the13255// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here13256// to avoid double decrementing.13257cg->decReferenceCount(sideEffectNode);13258return TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg);13259}1326013261#ifdef TR_TARGET_32BIT13262TR::Register *J9::X86::I386::TreeEvaluator::dwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)13263{13264// For rdbar and wrtbar nodes we first evaluate the children we need to13265// handle the side effects. Then we delegate the evaluation of the remaining13266// children and the load/store operation to the appropriate load/store evaluator.13267TR::Register *valueReg = cg->evaluate(node->getFirstChild());13268TR::Node *sideEffectNode = node->getSecondChild();13269TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);1327013271if (cg->comp()->getOption(TR_EnableFieldWatch))13272{13273TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);13274}1327513276// Note: The reference count for valueReg's node is not decremented here because the13277// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here13278// to avoid double decrementing.13279cg->decReferenceCount(sideEffectNode);13280return TR::TreeEvaluator::dstoreEvaluator(node, cg);13281}1328213283TR::Register *J9::X86::I386::TreeEvaluator::dwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)13284{13285// For rdbar and wrtbar nodes we first evaluate the children we need to13286// handle the side effects. Then we delegate the evaluation of the remaining13287// children and the load/store operation to the appropriate load/store evaluator.13288TR::Register *valueReg = cg->evaluate(node->getSecondChild());13289TR::Node *sideEffectNode = node->getThirdChild();13290TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);1329113292if (cg->comp()->getOption(TR_EnableFieldWatch))13293{13294TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);13295}1329613297// Note: The reference count for valueReg's node is not decremented here because the13298// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here13299// to avoid double decrementing.13300cg->decReferenceCount(sideEffectNode);13301return TR::TreeEvaluator::dstoreEvaluator(node, cg);13302}13303#endif1330413305#ifdef TR_TARGET_64BIT13306TR::Register *J9::X86::AMD64::TreeEvaluator::dwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)13307{13308// For rdbar and wrtbar nodes we first evaluate the children we need to13309// handle the side effects. Then we delegate the evaluation of the remaining13310// children and the load/store operation to the appropriate load/store evaluator.13311TR::Register *valueReg = cg->evaluate(node->getFirstChild());13312TR::Node *sideEffectNode = node->getSecondChild();13313TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);1331413315if (cg->comp()->getOption(TR_EnableFieldWatch))13316{13317TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);13318}1331913320// Note: The reference count for valueReg's node is not decremented here because the13321// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here13322// to avoid double decrementing.13323cg->decReferenceCount(sideEffectNode);13324return TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg);13325}1332613327TR::Register *J9::X86::AMD64::TreeEvaluator::dwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)13328{13329// For rdbar and wrtbar nodes we first evaluate the children we need to13330// handle the side effects. Then we delegate the evaluation of the remaining13331// children and the load/store operation to the appropriate load/store evaluator.13332TR::Register *valueReg = cg->evaluate(node->getSecondChild());13333TR::Node *sideEffectNode = node->getThirdChild();13334TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);1333513336if (cg->comp()->getOption(TR_EnableFieldWatch))13337{13338TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);13339}1334013341// Note: The reference count for valueReg's node is not decremented here because the13342// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here13343// to avoid double decrementing.13344cg->decReferenceCount(sideEffectNode);13345return TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg);13346}13347#endif1334813349TR::Register *J9::X86::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)13350{13351return TR::TreeEvaluator::awrtbarEvaluator(node, cg);13352}1335313354TR::Register *J9::X86::TreeEvaluator::awrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)13355{13356// For rdbar and wrtbar nodes we first evaluate the children we need to13357// handle the side effects. Then we delegate the evaluation of the remaining13358// children and the load/store operation to the appropriate load/store evaluator.13359TR::Register *valueReg;13360TR::Register *sideEffectRegister;13361TR::Node *firstChildNode = node->getFirstChild();1336213363// Evaluate the children we need to handle the side effect, then go to writeBarrierEvaluator to handle the write barrier13364if (node->getOpCode().isIndirect())13365{13366TR::Node *valueNode = NULL;13367// Pass in valueNode so it can be set to the correct node.13368TR::TreeEvaluator::getIndirectWrtbarValueNode(cg, node, valueNode, false);13369valueReg = cg->evaluate(valueNode);13370sideEffectRegister = cg->evaluate(node->getThirdChild());13371}13372else13373{13374valueReg = cg->evaluate(firstChildNode);13375sideEffectRegister = cg->evaluate(node->getSecondChild());13376}1337713378if (cg->comp()->getOption(TR_EnableFieldWatch) && !node->getSymbolReference()->getSymbol()->isArrayShadowSymbol())13379{13380TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);13381}1338213383// This evaluator handles the actual awrtbar operation. We also avoid decrementing the reference13384// counts of the children evaluated here and let this helper handle it.13385return TR::TreeEvaluator::writeBarrierEvaluator(node, cg);13386}133871338813389