Path: blob/21.2-virgl/src/amd/llvm/ac_llvm_helper.cpp
7099 views
/*1* Copyright 2014 Advanced Micro Devices, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sub license, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR12* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,13* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL14* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,15* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR16* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE17* USE OR OTHER DEALINGS IN THE SOFTWARE.18*19* The above copyright notice and this permission notice (including the20* next paragraph) shall be included in all copies or substantial portions21* of the Software.22*23*/2425#include <llvm-c/Core.h>26#include <llvm/Analysis/TargetLibraryInfo.h>27#include <llvm/IR/IRBuilder.h>28#include <llvm/IR/LegacyPassManager.h>29#include <llvm/Target/TargetMachine.h>30#include <llvm/Transforms/IPO.h>3132#include <cstring>3334/* DO NOT REORDER THE HEADERS35* The LLVM headers need to all be included before any Mesa header,36* as they use the `restrict` keyword in ways that are incompatible37* with our #define in include/c99_compat.h38*/3940#include "ac_binary.h"41#include "ac_llvm_util.h"42#include "ac_llvm_build.h"43#include "util/macros.h"4445void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)46{47llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);48A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));49}5051void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)52{53llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);54A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));55}5657bool ac_is_sgpr_param(LLVMValueRef arg)58{59llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);60llvm::AttributeList AS = A->getParent()->getAttributes();61unsigned ArgNo = A->getArgNo();62return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);63}6465LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)66{67return LLVMGetCalledValue(call);68}6970bool ac_llvm_is_function(LLVMValueRef v)71{72return LLVMGetValueKind(v) == LLVMFunctionValueKind;73}7475LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)76{77llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);78LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);7980llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());81llvm::unwrap(module)->setDataLayout(TM->createDataLayout());82return module;83}8485LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)86{87LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);8889llvm::FastMathFlags flags;9091switch (float_mode) {92case AC_FLOAT_MODE_DEFAULT:93case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:94break;9596case AC_FLOAT_MODE_DEFAULT_OPENGL:97/* Allow optimizations to treat the sign of a zero argument or98* result as insignificant.99*/100flags.setNoSignedZeros(); /* nsz */101102/* Allow optimizations to use the reciprocal of an argument103* rather than perform division.104*/105flags.setAllowReciprocal(); /* arcp */106107llvm::unwrap(builder)->setFastMathFlags(flags);108break;109}110111return builder;112}113114void ac_enable_signed_zeros(struct ac_llvm_context *ctx)115{116if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {117auto *b = llvm::unwrap(ctx->builder);118llvm::FastMathFlags flags = b->getFastMathFlags();119120/* This disables the optimization of (x + 0), which is used121* to convert negative zero to positive zero.122*/123flags.setNoSignedZeros(false);124b->setFastMathFlags(flags);125}126}127128void ac_disable_signed_zeros(struct ac_llvm_context *ctx)129{130if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {131auto *b = llvm::unwrap(ctx->builder);132llvm::FastMathFlags flags = b->getFastMathFlags();133134flags.setNoSignedZeros();135b->setFastMathFlags(flags);136}137}138139LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)140{141return reinterpret_cast<LLVMTargetLibraryInfoRef>(142new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));143}144145void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)146{147delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);148}149150/* Implementation of raw_pwrite_stream that works on malloc()ed memory for151* better compatibility with C code. */152struct raw_memory_ostream : public llvm::raw_pwrite_stream {153char *buffer;154size_t written;155size_t bufsize;156157raw_memory_ostream()158{159buffer = NULL;160written = 0;161bufsize = 0;162SetUnbuffered();163}164165~raw_memory_ostream()166{167free(buffer);168}169170void clear()171{172written = 0;173}174175void take(char *&out_buffer, size_t &out_size)176{177out_buffer = buffer;178out_size = written;179buffer = NULL;180written = 0;181bufsize = 0;182}183184void flush() = delete;185186void write_impl(const char *ptr, size_t size) override187{188if (unlikely(written + size < written))189abort();190if (written + size > bufsize) {191bufsize = MAX3(1024, written + size, bufsize / 3 * 4);192buffer = (char *)realloc(buffer, bufsize);193if (!buffer) {194fprintf(stderr, "amd: out of memory allocating ELF buffer\n");195abort();196}197}198memcpy(buffer + written, ptr, size);199written += size;200}201202void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override203{204assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);205memcpy(buffer + offset, ptr, size);206}207208uint64_t current_pos() const override209{210return written;211}212};213214/* The LLVM compiler is represented as a pass manager containing passes for215* optimizations, instruction selection, and code generation.216*/217struct ac_compiler_passes {218raw_memory_ostream ostream; /* ELF shader binary stream */219llvm::legacy::PassManager passmgr; /* list of passes */220};221222struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)223{224struct ac_compiler_passes *p = new ac_compiler_passes();225if (!p)226return NULL;227228llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);229230if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,231llvm::CGFT_ObjectFile)) {232fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");233delete p;234return NULL;235}236return p;237}238239void ac_destroy_llvm_passes(struct ac_compiler_passes *p)240{241delete p;242}243244/* This returns false on failure. */245bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,246char **pelf_buffer, size_t *pelf_size)247{248p->passmgr.run(*llvm::unwrap(module));249p->ostream.take(*pelf_buffer, *pelf_size);250return true;251}252253void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)254{255llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());256}257258void ac_enable_global_isel(LLVMTargetMachineRef tm)259{260reinterpret_cast<llvm::TargetMachine *>(tm)->setGlobalISel(true);261}262263LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,264LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)265{266llvm::AtomicRMWInst::BinOp binop;267switch (op) {268case LLVMAtomicRMWBinOpXchg:269binop = llvm::AtomicRMWInst::Xchg;270break;271case LLVMAtomicRMWBinOpAdd:272binop = llvm::AtomicRMWInst::Add;273break;274case LLVMAtomicRMWBinOpSub:275binop = llvm::AtomicRMWInst::Sub;276break;277case LLVMAtomicRMWBinOpAnd:278binop = llvm::AtomicRMWInst::And;279break;280case LLVMAtomicRMWBinOpNand:281binop = llvm::AtomicRMWInst::Nand;282break;283case LLVMAtomicRMWBinOpOr:284binop = llvm::AtomicRMWInst::Or;285break;286case LLVMAtomicRMWBinOpXor:287binop = llvm::AtomicRMWInst::Xor;288break;289case LLVMAtomicRMWBinOpMax:290binop = llvm::AtomicRMWInst::Max;291break;292case LLVMAtomicRMWBinOpMin:293binop = llvm::AtomicRMWInst::Min;294break;295case LLVMAtomicRMWBinOpUMax:296binop = llvm::AtomicRMWInst::UMax;297break;298case LLVMAtomicRMWBinOpUMin:299binop = llvm::AtomicRMWInst::UMin;300break;301case LLVMAtomicRMWBinOpFAdd:302binop = llvm::AtomicRMWInst::FAdd;303break;304default:305unreachable("invalid LLVMAtomicRMWBinOp");306break;307}308unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);309return llvm::wrap(llvm::unwrap(ctx->builder)310->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),311#if LLVM_VERSION_MAJOR >= 13312llvm::MaybeAlign(0),313#endif314llvm::AtomicOrdering::SequentiallyConsistent, SSID));315}316317LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,318LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)319{320unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);321return llvm::wrap(llvm::unwrap(ctx->builder)322->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),323llvm::unwrap(val),324#if LLVM_VERSION_MAJOR >= 13325llvm::MaybeAlign(0),326#endif327llvm::AtomicOrdering::SequentiallyConsistent,328llvm::AtomicOrdering::SequentiallyConsistent, SSID));329}330331332