Path: blob/master/runtime/compiler/x/codegen/AllocPrefetchSnippet.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2000, 2021 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include "x/codegen/AllocPrefetchSnippet.hpp"2324#include "codegen/Relocation.hpp"25#include "env/CompilerEnv.hpp"26#include "env/jittypes.h"27#include "il/Node.hpp"28#include "il/Node_inlines.hpp"29#include "runtime/CodeRuntime.hpp"30#include "runtime/J9CodeCache.hpp"31#include "env/VMJ9.h"3233uint8_t *TR::X86AllocPrefetchSnippet::emitSnippetBody()34{35TR::Compilation *comp = cg()->comp();36if (comp->getOptions()->realTimeGC())37return 0;3839TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());4041uint8_t *buffer = cg()->getBinaryBufferCursor();42getSnippetLabel()->setCodeLocation(buffer);4344TR::SymbolReference *helperSymRef = NULL;4546bool useSharedCodeCacheSnippet = fej9->supportsCodeCacheSnippets();4748bool prefetchThunkGenerated = (fej9->getAllocationPrefetchCodeSnippetAddress(comp) != 0);49#ifdef J9VM_GC_NON_ZERO_TLH50if (isNonZeroTLH())51{52prefetchThunkGenerated = (fej9->getAllocationNoZeroPrefetchCodeSnippetAddress(comp) != 0);53}54#endif5556TR_ASSERT(prefetchThunkGenerated, "Invalid prefetch snippet.");5758// CALL [32-bit relative]59//60*buffer++ = 0xe8;6162int32_t disp32;63uintptr_t helperAddress = 0;6465if (useSharedCodeCacheSnippet)66{67#ifdef J9VM_GC_NON_ZERO_TLH68if(!isNonZeroTLH())69{70helperAddress = (uintptr_t)(fej9->getAllocationPrefetchCodeSnippetAddress(comp));71}72else73{74helperAddress = (uintptr_t)(fej9->getAllocationNoZeroPrefetchCodeSnippetAddress(comp));75}76#else77helperAddress = (uintptr_t)(fej9->getAllocationPrefetchCodeSnippetAddress(comp));78#endif79}8081if (helperAddress && IS_32BIT_RIP(helperAddress, (buffer + 4) ) )82{83disp32 = (int32_t)(helperAddress - (uintptr_t)(buffer+4));84}85else86{87TR_RuntimeHelper helper = (comp->getOption(TR_EnableNewX86PrefetchTLH)) ? TR_X86newPrefetchTLH : TR_X86prefetchTLH;88helperSymRef = cg()->symRefTab()->findOrCreateRuntimeHelper(helper);89disp32 = cg()->branchDisplacementToHelperOrTrampoline(buffer+4, helperSymRef);90if (fej9->needRelocationsForHelpers())91{92cg()->addExternalRelocation(new (cg()->trHeapMemory()) TR::ExternalRelocation(buffer,93(uint8_t *)helperSymRef,94TR_HelperAddress,95cg()),96__FILE__, __LINE__, getNode());97}98}99100*(int32_t *)buffer = disp32;101buffer += 4;102103return genRestartJump(buffer);104}105106uint32_t TR::X86AllocPrefetchSnippet::getLength(int32_t estimatedSnippetStart)107{108return 10 + estimateRestartJumpLength(estimatedSnippetStart + 2);109}110111TR_X86AllocPrefetchGeometry112TR::X86AllocPrefetchSnippet::generatePrefetchGeometry()113{114115if (TR::Options::_TLHPrefetchSize <= 0)116TR::Options::_TLHPrefetchSize = 384;117118// These parameters were experimentally determined to be optimal for119// Woodcrest hardware for small applications.120121if (TR::Options::_TLHPrefetchLineSize <= 0)122TR::Options::_TLHPrefetchLineSize = 64;123124if (TR::Options::_TLHPrefetchLineCount <= 0)125TR::Options::_TLHPrefetchLineCount = 8;126127if (TR::Options::_TLHPrefetchStaggeredLineCount <= 0)128TR::Options::_TLHPrefetchStaggeredLineCount = 4;129130if (TR::Options::_TLHPrefetchBoundaryLineCount <= 0)131TR::Options::_TLHPrefetchBoundaryLineCount = 6;132133if (TR::Options::_TLHPrefetchTLHEndLineCount <= 0)134TR::Options::_TLHPrefetchTLHEndLineCount = 6;135136return TR_X86AllocPrefetchGeometry(137TR::Options::_TLHPrefetchLineSize,138TR::Options::_TLHPrefetchLineCount,139TR::Options::_TLHPrefetchStaggeredLineCount,140TR::Options::_TLHPrefetchBoundaryLineCount,141TR::Options::_TLHPrefetchTLHEndLineCount142);143}144145template <TR::HeapTypes::Type> struct vmThreadHeapOffsets;146147template <> struct vmThreadHeapOffsets<TR::HeapTypes::ZeroedHeap>148{149static const int32_t offsetOfHeapAlloc = offsetof(J9VMThread, heapAlloc);150static const int32_t offsetOfHeapTop = offsetof(J9VMThread, heapTop);151static const int32_t offsetOfTLHPrefetchCount = offsetof(J9VMThread, tlhPrefetchFTA);152};153154template <> struct vmThreadHeapOffsets<TR::HeapTypes::NonZeroedHeap>155{156static const int32_t offsetOfHeapAlloc = offsetof(J9VMThread, nonZeroHeapAlloc);157static const int32_t offsetOfHeapTop = offsetof(J9VMThread, nonZeroHeapTop);158static const int32_t offsetOfTLHPrefetchCount = offsetof(J9VMThread, nonZeroTlhPrefetchFTA);159};160161template <TR::HeapTypes::Type HEAP_TYPE>162class HeapProperties163{164private:165166typedef vmThreadHeapOffsets<HEAP_TYPE> HeapOffsets;167168public:169170static int32_t offsetOfHeapAlloc() { return HeapOffsets::offsetOfHeapAlloc; }171static int32_t offsetOfHeapTop() { return HeapOffsets::offsetOfHeapTop; }172static int32_t offsetOfTLHPrefetchCount() { return HeapOffsets::offsetOfTLHPrefetchCount; }173static bool needWideDisplacementForHeapAlloc() { return (offsetOfHeapAlloc() > 127 || offsetOfHeapAlloc() < -128); }174static bool needWideDisplacementForHeapTop() { return (offsetOfHeapTop() > 127 || offsetOfHeapTop() < -128); }175static bool needWideDisplacementForTLHPrefetchCount() { return (offsetOfTLHPrefetchCount() > 127 || offsetOfTLHPrefetchCount() < -128); }176};177178template <TR::HeapTypes::Type HEAP_TYPE, bool is64Bit>179uint8_t* TR::X86AllocPrefetchSnippet::emitSharedBody(uint8_t* prefetchSnippetBuffer, TR::Compilation* comp)180{181182typedef HeapProperties<HEAP_TYPE> HeapTraits;183184static char * printCodeCacheSnippetAddress = feGetEnv("TR_printCodeCacheSnippetAddress");185if (printCodeCacheSnippetAddress)186{187fprintf(stdout, "%s Allocation snippet is at address %p, size=%d\n", TR::HeapTypes::getPrefix(HEAP_TYPE), prefetchSnippetBuffer, sizeOfSharedBody<HEAP_TYPE, is64Bit>());188fflush(stdout);189}190191const TR_X86AllocPrefetchGeometry &prefetchGeometry = generatePrefetchGeometry();192193int32_t lineSize = prefetchGeometry.getPrefetchLineSize();194int32_t numLines = prefetchGeometry.getPrefetchLineCount();195int32_t staggerLines = prefetchGeometry.getPrefetchStaggeredLineCount();196int32_t boundaryLines = prefetchGeometry.getPrefetchBoundaryLineCount();197198// PUSH rcx199//200*prefetchSnippetBuffer++ = 0x51;201202// MOV rcx, qword ptr [rbp + heapAlloc]203//204if (is64Bit)205{206// REX207//208*prefetchSnippetBuffer++ = 0x48;209}210211prefetchSnippetBuffer[0] = 0x8B;212213if (HeapTraits::needWideDisplacementForHeapAlloc())214{215prefetchSnippetBuffer[1] = 0x8d;216prefetchSnippetBuffer += 2;217*((int32_t *)prefetchSnippetBuffer) = HeapTraits::offsetOfHeapAlloc();218prefetchSnippetBuffer += 4;219}220else221{222prefetchSnippetBuffer[1] = 0x4d;223prefetchSnippetBuffer[2] = (uint8_t) HeapTraits::offsetOfHeapAlloc();224prefetchSnippetBuffer += 3;225}226227// TR::InstOpCode::PREFETCHNTA [rcx + distance]228// TR::InstOpCode::PREFETCHNTA [rcx + distance + lineSize]229// ...230// TR::InstOpCode::PREFETCHNTA [rcx + distance + n*lineSize]231//232for (int32_t lineOffset = 0; lineOffset < numLines; ++lineOffset)233{234prefetchSnippetBuffer[0] = 0x0F;235if (comp->target().cpu.is(OMR_PROCESSOR_X86_AMDFAMILY15H))236prefetchSnippetBuffer[1] = 0x0D;237else238prefetchSnippetBuffer[1] = 0x18;239prefetchSnippetBuffer[2] = 0x81;240prefetchSnippetBuffer += 3;241*(int32_t *)prefetchSnippetBuffer = (staggerLines + lineOffset) * lineSize;242prefetchSnippetBuffer += 4;243}244245// MOV dword ptr [rbp + TLH_PREFETCH_COUNT], "size"246//247*prefetchSnippetBuffer++ = 0xC7;248249if (HeapTraits::needWideDisplacementForTLHPrefetchCount())250{251*prefetchSnippetBuffer++ = 0x85;252*(int32_t *)prefetchSnippetBuffer = HeapTraits::offsetOfTLHPrefetchCount();253prefetchSnippetBuffer += 4;254}255else256{257*prefetchSnippetBuffer++ = 0x45;258*prefetchSnippetBuffer++ = (uint8_t)HeapTraits::offsetOfTLHPrefetchCount();259}260261*(uint32_t *)prefetchSnippetBuffer = (uint32_t)(boundaryLines*lineSize);262prefetchSnippetBuffer += 4;263264// POP rcx265//266*prefetchSnippetBuffer++ = 0x59;267268// TR::InstOpCode::RET269//270*prefetchSnippetBuffer++ = 0xC3;271272return prefetchSnippetBuffer;273}274275template <TR::HeapTypes::Type HEAP_TYPE, bool is64Bit>276int32_t TR::X86AllocPrefetchSnippet::sizeOfSharedBody()277{278typedef HeapProperties<HEAP_TYPE> HeapTraits;279280const TR_X86AllocPrefetchGeometry &prefetchGeometry = generatePrefetchGeometry();281282int32_t prefetchSnippetSize = (is64Bit ? 14 : 13) + prefetchGeometry.getPrefetchLineCount() * 7;283284if (HeapTraits::needWideDisplacementForHeapAlloc())285{286prefetchSnippetSize += 3;287}288289290if (HeapTraits::needWideDisplacementForTLHPrefetchCount())291{292prefetchSnippetSize += 3;293}294295/*296* TODO: Refactor the alignment value to use a common definition either from the code cache or from some form of target query.297*/298int32_t alignedSize = TR::alignAllocationSize<32>(prefetchSnippetSize);299300return alignedSize;301}302303uint32_t TR::getCCPreLoadedCodeSize()304{305#if defined(TR_TARGET_64BIT)306uint32_t sizeOfZeroedPrefetchBody = TR::X86AllocPrefetchSnippet::sizeOfSharedBody<TR::HeapTypes::ZeroedHeap, true>();307uint32_t sizeOfNonZeroedPrefetchBody = TR::X86AllocPrefetchSnippet::sizeOfSharedBody<TR::HeapTypes::NonZeroedHeap, true>();308#else309uint32_t sizeOfZeroedPrefetchBody = TR::X86AllocPrefetchSnippet::sizeOfSharedBody<TR::HeapTypes::ZeroedHeap, false>();310uint32_t sizeOfNonZeroedPrefetchBody = TR::X86AllocPrefetchSnippet::sizeOfSharedBody<TR::HeapTypes::NonZeroedHeap, false>();311#endif312return sizeOfZeroedPrefetchBody + sizeOfNonZeroedPrefetchBody;313}314315void TR::createCCPreLoadedCode(uint8_t *CCPreLoadedCodeBase, uint8_t *CCPreLoadedCodeTop, void ** CCPreLoadedCodeTable, TR::CodeGenerator *cg)316{317TR::Compilation *comp = cg->comp();318uint8_t *cursor = CCPreLoadedCodeBase;319320CCPreLoadedCodeTable[TR_CCPreLoadedCode::TR_AllocPrefetch] = static_cast<void *>(cursor);321if (comp->target().is64Bit())322cursor = TR::X86AllocPrefetchSnippet::emitSharedBody<TR::HeapTypes::ZeroedHeap, true>(cursor, comp);323else324cursor = TR::X86AllocPrefetchSnippet::emitSharedBody<TR::HeapTypes::ZeroedHeap, false>(cursor, comp);325326cursor = static_cast<uint8_t *>( TR::alignAllocation<32>(cursor) );327328CCPreLoadedCodeTable[TR_CCPreLoadedCode::TR_NonZeroAllocPrefetch] = static_cast<void *>(cursor);329if (comp->target().is64Bit())330cursor = TR::X86AllocPrefetchSnippet::emitSharedBody<TR::HeapTypes::NonZeroedHeap, true>(cursor, comp);331else332cursor = TR::X86AllocPrefetchSnippet::emitSharedBody<TR::HeapTypes::NonZeroedHeap, false>(cursor, comp);333334cursor = static_cast<uint8_t *>( TR::alignAllocation<32>(cursor) );335336TR_ASSERT(cursor == CCPreLoadedCodeTop, "The expected and actual sizes of the emitted code differ. cursor = %p, CCPreLoadedCodeTop = %p", cursor, CCPreLoadedCodeTop);337}338339340