Path: blob/main/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
35271 views
//===-- NVPTXCtorDtorLowering.cpp - Handle global ctors and dtors --------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This pass creates a unified init and fini kernel with the required metadata10//===----------------------------------------------------------------------===//1112#include "NVPTXCtorDtorLowering.h"13#include "MCTargetDesc/NVPTXBaseInfo.h"14#include "NVPTX.h"15#include "llvm/ADT/StringExtras.h"16#include "llvm/IR/Constants.h"17#include "llvm/IR/Function.h"18#include "llvm/IR/GlobalVariable.h"19#include "llvm/IR/IRBuilder.h"20#include "llvm/IR/Module.h"21#include "llvm/IR/Value.h"22#include "llvm/Pass.h"23#include "llvm/Support/CommandLine.h"24#include "llvm/Support/MD5.h"25#include "llvm/Transforms/Utils/ModuleUtils.h"2627using namespace llvm;2829#define DEBUG_TYPE "nvptx-lower-ctor-dtor"3031static cl::opt<std::string>32GlobalStr("nvptx-lower-global-ctor-dtor-id",33cl::desc("Override unique ID of ctor/dtor globals."),34cl::init(""), cl::Hidden);3536static cl::opt<bool>37CreateKernels("nvptx-emit-init-fini-kernel",38cl::desc("Emit kernels to call ctor/dtor globals."),39cl::init(true), cl::Hidden);4041namespace {4243static std::string getHash(StringRef Str) {44llvm::MD5 Hasher;45llvm::MD5::MD5Result Hash;46Hasher.update(Str);47Hasher.final(Hash);48return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);49}5051static void addKernelMetadata(Module &M, GlobalValue *GV) {52llvm::LLVMContext &Ctx = M.getContext();5354// Get "nvvm.annotations" metadata node.55llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");5657llvm::Metadata *KernelMDVals[] = {58llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "kernel"),59llvm::ConstantAsMetadata::get(60llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};6162// This kernel is only to be called single-threaded.63llvm::Metadata *ThreadXMDVals[] = {64llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidx"),65llvm::ConstantAsMetadata::get(66llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};67llvm::Metadata *ThreadYMDVals[] = {68llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidy"),69llvm::ConstantAsMetadata::get(70llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};71llvm::Metadata *ThreadZMDVals[] = {72llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidz"),73llvm::ConstantAsMetadata::get(74llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};7576llvm::Metadata *BlockMDVals[] = {77llvm::ConstantAsMetadata::get(GV),78llvm::MDString::get(Ctx, "maxclusterrank"),79llvm::ConstantAsMetadata::get(80llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};8182// Append metadata to nvvm.annotations.83MD->addOperand(llvm::MDNode::get(Ctx, KernelMDVals));84MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals));85MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals));86MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals));87MD->addOperand(llvm::MDNode::get(Ctx, BlockMDVals));88}8990static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {91StringRef InitOrFiniKernelName =92IsCtor ? "nvptx$device$init" : "nvptx$device$fini";93if (M.getFunction(InitOrFiniKernelName))94return nullptr;9596Function *InitOrFiniKernel = Function::createWithDefaultAttr(97FunctionType::get(Type::getVoidTy(M.getContext()), false),98GlobalValue::WeakODRLinkage, 0, InitOrFiniKernelName, &M);99addKernelMetadata(M, InitOrFiniKernel);100101return InitOrFiniKernel;102}103104// We create the IR required to call each callback in this section. This is105// equivalent to the following code. Normally, the linker would provide us with106// the definitions of the init and fini array sections. The 'nvlink' linker does107// not do this so initializing these values is done by the runtime.108//109// extern "C" void **__init_array_start = nullptr;110// extern "C" void **__init_array_end = nullptr;111// extern "C" void **__fini_array_start = nullptr;112// extern "C" void **__fini_array_end = nullptr;113//114// using InitCallback = void();115// using FiniCallback = void();116//117// void call_init_array_callbacks() {118// for (auto start = __init_array_start; start != __init_array_end; ++start)119// reinterpret_cast<InitCallback *>(*start)();120// }121//122// void call_init_array_callbacks() {123// size_t fini_array_size = __fini_array_end - __fini_array_start;124// for (size_t i = fini_array_size; i > 0; --i)125// reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();126// }127static void createInitOrFiniCalls(Function &F, bool IsCtor) {128Module &M = *F.getParent();129LLVMContext &C = M.getContext();130131IRBuilder<> IRB(BasicBlock::Create(C, "entry", &F));132auto *LoopBB = BasicBlock::Create(C, "while.entry", &F);133auto *ExitBB = BasicBlock::Create(C, "while.end", &F);134Type *PtrTy = IRB.getPtrTy(llvm::ADDRESS_SPACE_GLOBAL);135136auto *Begin = M.getOrInsertGlobal(137IsCtor ? "__init_array_start" : "__fini_array_start",138PointerType::get(C, 0), [&]() {139auto *GV = new GlobalVariable(140M, PointerType::get(C, 0),141/*isConstant=*/false, GlobalValue::WeakAnyLinkage,142Constant::getNullValue(PointerType::get(C, 0)),143IsCtor ? "__init_array_start" : "__fini_array_start",144/*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,145/*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL);146GV->setVisibility(GlobalVariable::ProtectedVisibility);147return GV;148});149auto *End = M.getOrInsertGlobal(150IsCtor ? "__init_array_end" : "__fini_array_end", PointerType::get(C, 0),151[&]() {152auto *GV = new GlobalVariable(153M, PointerType::get(C, 0),154/*isConstant=*/false, GlobalValue::WeakAnyLinkage,155Constant::getNullValue(PointerType::get(C, 0)),156IsCtor ? "__init_array_end" : "__fini_array_end",157/*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,158/*AddressSpace=*/llvm::ADDRESS_SPACE_GLOBAL);159GV->setVisibility(GlobalVariable::ProtectedVisibility);160return GV;161});162163// The constructor type is suppoed to allow using the argument vectors, but164// for now we just call them with no arguments.165auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});166167// The destructor array must be called in reverse order. Get an expression to168// the end of the array and iterate backwards in that case.169Value *BeginVal = IRB.CreateLoad(Begin->getType(), Begin, "begin");170Value *EndVal = IRB.CreateLoad(Begin->getType(), End, "stop");171if (!IsCtor) {172auto *BeginInt = IRB.CreatePtrToInt(BeginVal, IntegerType::getInt64Ty(C));173auto *EndInt = IRB.CreatePtrToInt(EndVal, IntegerType::getInt64Ty(C));174auto *SubInst = IRB.CreateSub(EndInt, BeginInt);175auto *Offset = IRB.CreateAShr(176SubInst, ConstantInt::get(IntegerType::getInt64Ty(C), 3), "offset",177/*IsExact=*/true);178auto *ValuePtr = IRB.CreateGEP(PointerType::get(C, 0), BeginVal,179ArrayRef<Value *>({Offset}));180EndVal = BeginVal;181BeginVal = IRB.CreateInBoundsGEP(182PointerType::get(C, 0), ValuePtr,183ArrayRef<Value *>(ConstantInt::get(IntegerType::getInt64Ty(C), -1)),184"start");185}186IRB.CreateCondBr(187IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGT, BeginVal,188EndVal),189LoopBB, ExitBB);190IRB.SetInsertPoint(LoopBB);191auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");192auto *CallBack = IRB.CreateLoad(IRB.getPtrTy(F.getAddressSpace()),193CallBackPHI, "callback");194IRB.CreateCall(CallBackTy, CallBack);195auto *NewCallBack =196IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, IsCtor ? 1 : -1, "next");197auto *EndCmp = IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,198NewCallBack, EndVal, "end");199CallBackPHI->addIncoming(BeginVal, &F.getEntryBlock());200CallBackPHI->addIncoming(NewCallBack, LoopBB);201IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);202IRB.SetInsertPoint(ExitBB);203IRB.CreateRetVoid();204}205206static bool createInitOrFiniGlobals(Module &M, GlobalVariable *GV,207bool IsCtor) {208ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer());209if (!GA || GA->getNumOperands() == 0)210return false;211212// NVPTX has no way to emit variables at specific sections or support for213// the traditional constructor sections. Instead, we emit mangled global214// names so the runtime can build the list manually.215for (Value *V : GA->operands()) {216auto *CS = cast<ConstantStruct>(V);217auto *F = cast<Constant>(CS->getOperand(1));218uint64_t Priority = cast<ConstantInt>(CS->getOperand(0))->getSExtValue();219std::string PriorityStr = "." + std::to_string(Priority);220// We append a semi-unique hash and the priority to the global name.221std::string GlobalID =222!GlobalStr.empty() ? GlobalStr : getHash(M.getSourceFileName());223std::string NameStr =224((IsCtor ? "__init_array_object_" : "__fini_array_object_") +225F->getName() + "_" + GlobalID + "_" + std::to_string(Priority))226.str();227// PTX does not support exported names with '.' in them.228llvm::transform(NameStr, NameStr.begin(),229[](char c) { return c == '.' ? '_' : c; });230231auto *GV = new GlobalVariable(M, F->getType(), /*IsConstant=*/true,232GlobalValue::ExternalLinkage, F, NameStr,233nullptr, GlobalValue::NotThreadLocal,234/*AddressSpace=*/4);235// This isn't respected by Nvidia, simply put here for clarity.236GV->setSection(IsCtor ? ".init_array" + PriorityStr237: ".fini_array" + PriorityStr);238GV->setVisibility(GlobalVariable::ProtectedVisibility);239appendToUsed(M, {GV});240}241242return true;243}244245static bool createInitOrFiniKernel(Module &M, StringRef GlobalName,246bool IsCtor) {247GlobalVariable *GV = M.getGlobalVariable(GlobalName);248if (!GV || !GV->hasInitializer())249return false;250251if (!createInitOrFiniGlobals(M, GV, IsCtor))252return false;253254if (!CreateKernels)255return true;256257Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);258if (!InitOrFiniKernel)259return false;260261createInitOrFiniCalls(*InitOrFiniKernel, IsCtor);262263GV->eraseFromParent();264return true;265}266267static bool lowerCtorsAndDtors(Module &M) {268bool Modified = false;269Modified |= createInitOrFiniKernel(M, "llvm.global_ctors", /*IsCtor =*/true);270Modified |= createInitOrFiniKernel(M, "llvm.global_dtors", /*IsCtor =*/false);271return Modified;272}273274class NVPTXCtorDtorLoweringLegacy final : public ModulePass {275public:276static char ID;277NVPTXCtorDtorLoweringLegacy() : ModulePass(ID) {}278bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); }279};280281} // End anonymous namespace282283PreservedAnalyses NVPTXCtorDtorLoweringPass::run(Module &M,284ModuleAnalysisManager &AM) {285return lowerCtorsAndDtors(M) ? PreservedAnalyses::none()286: PreservedAnalyses::all();287}288289char NVPTXCtorDtorLoweringLegacy::ID = 0;290char &llvm::NVPTXCtorDtorLoweringLegacyPassID = NVPTXCtorDtorLoweringLegacy::ID;291INITIALIZE_PASS(NVPTXCtorDtorLoweringLegacy, DEBUG_TYPE,292"Lower ctors and dtors for NVPTX", false, false)293294ModulePass *llvm::createNVPTXCtorDtorLoweringLegacyPass() {295return new NVPTXCtorDtorLoweringLegacy();296}297298299