Path: blob/main/contrib/llvm-project/clang/lib/Interpreter/DeviceOffload.cpp
35234 views
//===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements offloading to CUDA devices.9//10//===----------------------------------------------------------------------===//1112#include "DeviceOffload.h"1314#include "clang/Basic/TargetOptions.h"15#include "clang/CodeGen/ModuleBuilder.h"16#include "clang/Frontend/CompilerInstance.h"1718#include "llvm/IR/LegacyPassManager.h"19#include "llvm/IR/Module.h"20#include "llvm/MC/TargetRegistry.h"21#include "llvm/Target/TargetMachine.h"2223namespace clang {2425IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(26Interpreter &Interp, std::unique_ptr<CompilerInstance> Instance,27IncrementalParser &HostParser, llvm::LLVMContext &LLVMCtx,28llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS,29llvm::Error &Err)30: IncrementalParser(Interp, std::move(Instance), LLVMCtx, Err),31HostParser(HostParser), VFS(FS) {32if (Err)33return;34StringRef Arch = CI->getTargetOpts().CPU;35if (!Arch.starts_with("sm_") || Arch.substr(3).getAsInteger(10, SMVersion)) {36Err = llvm::joinErrors(std::move(Err), llvm::make_error<llvm::StringError>(37"Invalid CUDA architecture",38llvm::inconvertibleErrorCode()));39return;40}41}4243llvm::Expected<PartialTranslationUnit &>44IncrementalCUDADeviceParser::Parse(llvm::StringRef Input) {45auto PTU = IncrementalParser::Parse(Input);46if (!PTU)47return PTU.takeError();4849auto PTX = GeneratePTX();50if (!PTX)51return PTX.takeError();5253auto Err = GenerateFatbinary();54if (Err)55return std::move(Err);5657std::string FatbinFileName =58"/incr_module_" + std::to_string(PTUs.size()) + ".fatbin";59VFS->addFile(FatbinFileName, 0,60llvm::MemoryBuffer::getMemBuffer(61llvm::StringRef(FatbinContent.data(), FatbinContent.size()),62"", false));6364HostParser.getCI()->getCodeGenOpts().CudaGpuBinaryFileName = FatbinFileName;6566FatbinContent.clear();6768return PTU;69}7071llvm::Expected<llvm::StringRef> IncrementalCUDADeviceParser::GeneratePTX() {72auto &PTU = PTUs.back();73std::string Error;7475const llvm::Target *Target = llvm::TargetRegistry::lookupTarget(76PTU.TheModule->getTargetTriple(), Error);77if (!Target)78return llvm::make_error<llvm::StringError>(std::move(Error),79std::error_code());80llvm::TargetOptions TO = llvm::TargetOptions();81llvm::TargetMachine *TargetMachine = Target->createTargetMachine(82PTU.TheModule->getTargetTriple(), getCI()->getTargetOpts().CPU, "", TO,83llvm::Reloc::Model::PIC_);84PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());8586PTXCode.clear();87llvm::raw_svector_ostream dest(PTXCode);8889llvm::legacy::PassManager PM;90if (TargetMachine->addPassesToEmitFile(PM, dest, nullptr,91llvm::CodeGenFileType::AssemblyFile)) {92return llvm::make_error<llvm::StringError>(93"NVPTX backend cannot produce PTX code.",94llvm::inconvertibleErrorCode());95}9697if (!PM.run(*PTU.TheModule))98return llvm::make_error<llvm::StringError>("Failed to emit PTX code.",99llvm::inconvertibleErrorCode());100101PTXCode += '\0';102while (PTXCode.size() % 8)103PTXCode += '\0';104return PTXCode.str();105}106107llvm::Error IncrementalCUDADeviceParser::GenerateFatbinary() {108enum FatBinFlags {109AddressSize64 = 0x01,110HasDebugInfo = 0x02,111ProducerCuda = 0x04,112HostLinux = 0x10,113HostMac = 0x20,114HostWindows = 0x40115};116117struct FatBinInnerHeader {118uint16_t Kind; // 0x00119uint16_t unknown02; // 0x02120uint32_t HeaderSize; // 0x04121uint32_t DataSize; // 0x08122uint32_t unknown0c; // 0x0c123uint32_t CompressedSize; // 0x10124uint32_t SubHeaderSize; // 0x14125uint16_t VersionMinor; // 0x18126uint16_t VersionMajor; // 0x1a127uint32_t CudaArch; // 0x1c128uint32_t unknown20; // 0x20129uint32_t unknown24; // 0x24130uint32_t Flags; // 0x28131uint32_t unknown2c; // 0x2c132uint32_t unknown30; // 0x30133uint32_t unknown34; // 0x34134uint32_t UncompressedSize; // 0x38135uint32_t unknown3c; // 0x3c136uint32_t unknown40; // 0x40137uint32_t unknown44; // 0x44138FatBinInnerHeader(uint32_t DataSize, uint32_t CudaArch, uint32_t Flags)139: Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),140DataSize(DataSize), unknown0c(0), CompressedSize(0),141SubHeaderSize(HeaderSize - 8), VersionMinor(2), VersionMajor(4),142CudaArch(CudaArch), unknown20(0), unknown24(0), Flags(Flags),143unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),144unknown3c(0), unknown40(0), unknown44(0) {}145};146147struct FatBinHeader {148uint32_t Magic; // 0x00149uint16_t Version; // 0x04150uint16_t HeaderSize; // 0x06151uint32_t DataSize; // 0x08152uint32_t unknown0c; // 0x0c153public:154FatBinHeader(uint32_t DataSize)155: Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),156DataSize(DataSize), unknown0c(0) {}157};158159FatBinHeader OuterHeader(sizeof(FatBinInnerHeader) + PTXCode.size());160FatbinContent.append((char *)&OuterHeader,161((char *)&OuterHeader) + OuterHeader.HeaderSize);162163FatBinInnerHeader InnerHeader(PTXCode.size(), SMVersion,164FatBinFlags::AddressSize64 |165FatBinFlags::HostLinux);166FatbinContent.append((char *)&InnerHeader,167((char *)&InnerHeader) + InnerHeader.HeaderSize);168169FatbinContent.append(PTXCode.begin(), PTXCode.end());170171return llvm::Error::success();172}173174IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}175176} // namespace clang177178179