Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
35266 views
//===-- AMDGPUCtorDtorLowering.cpp - Handle global ctors and dtors --------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9/// This pass creates a unified init and fini kernel with the required metadata10//===----------------------------------------------------------------------===//1112#include "AMDGPUCtorDtorLowering.h"13#include "AMDGPU.h"14#include "llvm/IR/Constants.h"15#include "llvm/IR/Function.h"16#include "llvm/IR/GlobalVariable.h"17#include "llvm/IR/IRBuilder.h"18#include "llvm/IR/Module.h"19#include "llvm/IR/Value.h"20#include "llvm/Pass.h"21#include "llvm/Transforms/Utils/ModuleUtils.h"2223using namespace llvm;2425#define DEBUG_TYPE "amdgpu-lower-ctor-dtor"2627namespace {2829static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {30StringRef InitOrFiniKernelName = "amdgcn.device.init";31if (!IsCtor)32InitOrFiniKernelName = "amdgcn.device.fini";33if (M.getFunction(InitOrFiniKernelName))34return nullptr;3536Function *InitOrFiniKernel = Function::createWithDefaultAttr(37FunctionType::get(Type::getVoidTy(M.getContext()), false),38GlobalValue::WeakODRLinkage, 0, InitOrFiniKernelName, &M);39InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL);40InitOrFiniKernel->addFnAttr("amdgpu-flat-work-group-size", "1,1");41if (IsCtor)42InitOrFiniKernel->addFnAttr("device-init");43else44InitOrFiniKernel->addFnAttr("device-fini");45return InitOrFiniKernel;46}4748// The linker will provide the associated symbols to allow us to traverse the49// global constructors / destructors in priority order. We create the IR50// required to call each callback in this section. This is equivalent to the51// following code.52//53// extern "C" void * __init_array_start[];54// extern "C" void * __init_array_end[];55// extern "C" void * __fini_array_start[];56// extern "C" void * __fini_array_end[];57//58// using InitCallback = void();59// using FiniCallback = void(void);60//61// void call_init_array_callbacks() {62// for (auto start = __init_array_start; start != __init_array_end; ++start)63// reinterpret_cast<InitCallback *>(*start)();64// }65//66// void call_fini_array_callbacks() {67// size_t fini_array_size = __fini_array_end - __fini_array_start;68// for (size_t i = fini_array_size; i > 0; --i)69// reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();70// }71static void createInitOrFiniCalls(Function &F, bool IsCtor) {72Module &M = *F.getParent();73LLVMContext &C = M.getContext();7475IRBuilder<> IRB(BasicBlock::Create(C, "entry", &F));76auto *LoopBB = BasicBlock::Create(C, "while.entry", &F);77auto *ExitBB = BasicBlock::Create(C, "while.end", &F);78Type *PtrTy = IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS);7980auto *Begin = M.getOrInsertGlobal(81IsCtor ? "__init_array_start" : "__fini_array_start",82ArrayType::get(PtrTy, 0), [&]() {83return new GlobalVariable(84M, ArrayType::get(PtrTy, 0),85/*isConstant=*/true, GlobalValue::ExternalLinkage,86/*Initializer=*/nullptr,87IsCtor ? "__init_array_start" : "__fini_array_start",88/*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,89/*AddressSpace=*/1);90});91auto *End = M.getOrInsertGlobal(92IsCtor ? "__init_array_end" : "__fini_array_end",93ArrayType::get(PtrTy, 0), [&]() {94return new GlobalVariable(95M, ArrayType::get(PtrTy, 0),96/*isConstant=*/true, GlobalValue::ExternalLinkage,97/*Initializer=*/nullptr,98IsCtor ? "__init_array_end" : "__fini_array_end",99/*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,100/*AddressSpace=*/1);101});102103// The constructor type is suppoed to allow using the argument vectors, but104// for now we just call them with no arguments.105auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});106107Value *Start = Begin;108Value *Stop = End;109// The destructor array must be called in reverse order. Get a constant110// expression to the end of the array and iterate backwards instead.111if (!IsCtor) {112Type *Int64Ty = IntegerType::getInt64Ty(C);113auto *EndPtr = IRB.CreatePtrToInt(End, Int64Ty);114auto *BeginPtr = IRB.CreatePtrToInt(Begin, Int64Ty);115auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr);116auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3));117auto *Offset = IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1));118Start = IRB.CreateInBoundsGEP(119ArrayType::get(IRB.getPtrTy(), 0), Begin,120ArrayRef<Value *>({ConstantInt::get(Int64Ty, 0), Offset}));121Stop = Begin;122}123124IRB.CreateCondBr(125IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start,126Stop),127LoopBB, ExitBB);128IRB.SetInsertPoint(LoopBB);129auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");130auto *CallBack = IRB.CreateLoad(IRB.getPtrTy(F.getAddressSpace()),131CallBackPHI, "callback");132IRB.CreateCall(CallBackTy, CallBack);133auto *NewCallBack =134IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, IsCtor ? 1 : -1, "next");135auto *EndCmp = IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,136NewCallBack, Stop, "end");137CallBackPHI->addIncoming(Start, &F.getEntryBlock());138CallBackPHI->addIncoming(NewCallBack, LoopBB);139IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);140IRB.SetInsertPoint(ExitBB);141IRB.CreateRetVoid();142}143144static bool createInitOrFiniKernel(Module &M, StringRef GlobalName,145bool IsCtor) {146GlobalVariable *GV = M.getGlobalVariable(GlobalName);147if (!GV || !GV->hasInitializer())148return false;149ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer());150if (!GA || GA->getNumOperands() == 0)151return false;152153Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);154if (!InitOrFiniKernel)155return false;156157createInitOrFiniCalls(*InitOrFiniKernel, IsCtor);158159appendToUsed(M, {InitOrFiniKernel});160return true;161}162163static bool lowerCtorsAndDtors(Module &M) {164bool Modified = false;165Modified |= createInitOrFiniKernel(M, "llvm.global_ctors", /*IsCtor =*/true);166Modified |= createInitOrFiniKernel(M, "llvm.global_dtors", /*IsCtor =*/false);167return Modified;168}169170class AMDGPUCtorDtorLoweringLegacy final : public ModulePass {171public:172static char ID;173AMDGPUCtorDtorLoweringLegacy() : ModulePass(ID) {}174bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); }175};176177} // End anonymous namespace178179PreservedAnalyses AMDGPUCtorDtorLoweringPass::run(Module &M,180ModuleAnalysisManager &AM) {181return lowerCtorsAndDtors(M) ? PreservedAnalyses::none()182: PreservedAnalyses::all();183}184185char AMDGPUCtorDtorLoweringLegacy::ID = 0;186char &llvm::AMDGPUCtorDtorLoweringLegacyPassID =187AMDGPUCtorDtorLoweringLegacy::ID;188INITIALIZE_PASS(AMDGPUCtorDtorLoweringLegacy, DEBUG_TYPE,189"Lower ctors and dtors for AMDGPU", false, false)190191ModulePass *llvm::createAMDGPUCtorDtorLoweringLegacyPass() {192return new AMDGPUCtorDtorLoweringLegacy();193}194195196