Path: blob/21.2-virgl/src/amd/llvm/ac_llvm_util.c
7206 views
/*1* Copyright 2014 Advanced Micro Devices, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sub license, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR12* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,13* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL14* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,15* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR16* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE17* USE OR OTHER DEALINGS IN THE SOFTWARE.18*19* The above copyright notice and this permission notice (including the20* next paragraph) shall be included in all copies or substantial portions21* of the Software.22*23*/24/* based on pieces from si_pipe.c and radeon_llvm_emit.c */25#include "ac_llvm_util.h"2627#include "ac_llvm_build.h"28#include "c11/threads.h"29#include "gallivm/lp_bld_misc.h"30#include "util/bitscan.h"31#include "util/u_math.h"32#include <llvm-c/Core.h>33#include <llvm-c/Support.h>34#include <llvm-c/Transforms/IPO.h>35#include <llvm-c/Transforms/Scalar.h>36#include <llvm-c/Transforms/Utils.h>3738#include <assert.h>39#include <stdio.h>40#include <string.h>4142static void ac_init_llvm_target(void)43{44LLVMInitializeAMDGPUTargetInfo();45LLVMInitializeAMDGPUTarget();46LLVMInitializeAMDGPUTargetMC();47LLVMInitializeAMDGPUAsmPrinter();4849/* For inline assembly. */50LLVMInitializeAMDGPUAsmParser();5152/* For ACO disassembly. */53LLVMInitializeAMDGPUDisassembler();5455/* Workaround for bug in llvm 4.0 that causes image intrinsics56* to disappear.57* https://reviews.llvm.org/D2634858*59* "mesa" is the prefix for error messages.60*61* -global-isel-abort=2 is a no-op unless global isel has been enabled.62* This option tells the backend to fall-back to SelectionDAG and print63* a diagnostic message if global isel fails.64*/65const char *argv[] = {66"mesa",67"-simplifycfg-sink-common=false",68"-global-isel-abort=2",69"-amdgpu-atomic-optimizations=true",70#if LLVM_VERSION_MAJOR == 1171/* This fixes variable indexing on LLVM 11. It also breaks atomic.cmpswap on LLVM >= 12. */72"-structurizecfg-skip-uniform-regions",73#endif74};75LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);76}7778PUBLIC void ac_init_shared_llvm_once(void)79{80static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;81call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);82}8384#if !LLVM_IS_SHARED85static once_flag ac_init_static_llvm_target_once_flag = ONCE_FLAG_INIT;86static void ac_init_static_llvm_once(void)87{88call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target);89}90#endif9192void ac_init_llvm_once(void)93{94#if LLVM_IS_SHARED95ac_init_shared_llvm_once();96#else97ac_init_static_llvm_once();98#endif99}100101static LLVMTargetRef ac_get_llvm_target(const char *triple)102{103LLVMTargetRef target = NULL;104char *err_message = NULL;105106if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {107fprintf(stderr, "Cannot find target for triple %s ", triple);108if (err_message) {109fprintf(stderr, "%s\n", err_message);110}111LLVMDisposeMessage(err_message);112return NULL;113}114return target;115}116117const char *ac_get_llvm_processor_name(enum radeon_family family)118{119switch (family) {120case CHIP_TAHITI:121return "tahiti";122case CHIP_PITCAIRN:123return "pitcairn";124case CHIP_VERDE:125return "verde";126case CHIP_OLAND:127return "oland";128case CHIP_HAINAN:129return "hainan";130case CHIP_BONAIRE:131return "bonaire";132case CHIP_KABINI:133return "kabini";134case CHIP_KAVERI:135return "kaveri";136case CHIP_HAWAII:137return "hawaii";138case CHIP_TONGA:139return "tonga";140case CHIP_ICELAND:141return "iceland";142case CHIP_CARRIZO:143return "carrizo";144case CHIP_FIJI:145return "fiji";146case CHIP_STONEY:147return "stoney";148case CHIP_POLARIS10:149return "polaris10";150case CHIP_POLARIS11:151case CHIP_POLARIS12:152case CHIP_VEGAM:153return "polaris11";154case CHIP_VEGA10:155return "gfx900";156case CHIP_RAVEN:157return "gfx902";158case CHIP_VEGA12:159return "gfx904";160case CHIP_VEGA20:161return "gfx906";162case CHIP_RAVEN2:163case CHIP_RENOIR:164return "gfx909";165case CHIP_ARCTURUS:166return "gfx908";167case CHIP_ALDEBARAN:168return "gfx90a";169case CHIP_NAVI10:170return "gfx1010";171case CHIP_NAVI12:172return "gfx1011";173case CHIP_NAVI14:174return "gfx1012";175case CHIP_SIENNA_CICHLID:176case CHIP_NAVY_FLOUNDER:177case CHIP_DIMGREY_CAVEFISH:178case CHIP_BEIGE_GOBY:179case CHIP_VANGOGH:180case CHIP_YELLOW_CARP:181return "gfx1030";182default:183return "";184}185}186187static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,188enum ac_target_machine_options tm_options,189LLVMCodeGenOptLevel level,190const char **out_triple)191{192assert(family >= CHIP_TAHITI);193const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";194LLVMTargetRef target = ac_get_llvm_target(triple);195196LLVMTargetMachineRef tm =197LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), "", level,198LLVMRelocDefault, LLVMCodeModelDefault);199200if (out_triple)201*out_triple = triple;202if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)203ac_enable_global_isel(tm);204return tm;205}206207static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,208bool check_ir)209{210LLVMPassManagerRef passmgr = LLVMCreatePassManager();211if (!passmgr)212return NULL;213214if (target_library_info)215LLVMAddTargetLibraryInfo(target_library_info, passmgr);216217if (check_ir)218LLVMAddVerifierPass(passmgr);219LLVMAddAlwaysInlinerPass(passmgr);220/* Normally, the pass manager runs all passes on one function before221* moving onto another. Adding a barrier no-op pass forces the pass222* manager to run the inliner on all functions first, which makes sure223* that the following passes are only run on the remaining non-inline224* function, so it removes useless work done on dead inline functions.225*/226ac_llvm_add_barrier_noop_pass(passmgr);227/* This pass should eliminate all the load and store instructions. */228LLVMAddPromoteMemoryToRegisterPass(passmgr);229LLVMAddScalarReplAggregatesPass(passmgr);230LLVMAddLICMPass(passmgr);231LLVMAddAggressiveDCEPass(passmgr);232LLVMAddCFGSimplificationPass(passmgr);233/* This is recommended by the instruction combining pass. */234LLVMAddEarlyCSEMemSSAPass(passmgr);235LLVMAddInstructionCombiningPass(passmgr);236return passmgr;237}238239static const char *attr_to_str(enum ac_func_attr attr)240{241switch (attr) {242case AC_FUNC_ATTR_ALWAYSINLINE:243return "alwaysinline";244case AC_FUNC_ATTR_INREG:245return "inreg";246case AC_FUNC_ATTR_NOALIAS:247return "noalias";248case AC_FUNC_ATTR_NOUNWIND:249return "nounwind";250case AC_FUNC_ATTR_READNONE:251return "readnone";252case AC_FUNC_ATTR_READONLY:253return "readonly";254case AC_FUNC_ATTR_WRITEONLY:255return "writeonly";256case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY:257return "inaccessiblememonly";258case AC_FUNC_ATTR_CONVERGENT:259return "convergent";260default:261fprintf(stderr, "Unhandled function attribute: %x\n", attr);262return 0;263}264}265266void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx,267enum ac_func_attr attr)268{269const char *attr_name = attr_to_str(attr);270unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name));271LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);272273if (LLVMIsAFunction(function))274LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);275else276LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);277}278279void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask)280{281attrib_mask |= AC_FUNC_ATTR_NOUNWIND;282attrib_mask &= ~AC_FUNC_ATTR_LEGACY;283284while (attrib_mask) {285enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);286ac_add_function_attr(ctx, function, -1, attr);287}288}289290void ac_dump_module(LLVMModuleRef module)291{292char *str = LLVMPrintModuleToString(module);293fprintf(stderr, "%s", str);294LLVMDisposeMessage(str);295}296297void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value)298{299char str[16];300301snprintf(str, sizeof(str), "0x%x", value);302LLVMAddTargetDependentFunctionAttr(F, name, str);303}304305void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)306{307if (!size)308return;309310char str[32];311snprintf(str, sizeof(str), "%u,%u", size, size);312LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);313}314315void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx)316{317char features[2048];318319snprintf(features, sizeof(features), "+DumpCode%s%s",320/* GFX9 has broken VGPR indexing, so always promote alloca to scratch. */321ctx->chip_class == GFX9 ? ",-promote-alloca" : "",322/* Wave32 is the default. */323ctx->chip_class >= GFX10 && ctx->wave_size == 64 ?324",+wavefrontsize64,-wavefrontsize32" : "");325326LLVMAddTargetDependentFunctionAttr(F, "target-features", features);327}328329unsigned ac_count_scratch_private_memory(LLVMValueRef function)330{331unsigned private_mem_vgprs = 0;332333/* Process all LLVM instructions. */334LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);335while (bb) {336LLVMValueRef next = LLVMGetFirstInstruction(bb);337338while (next) {339LLVMValueRef inst = next;340next = LLVMGetNextInstruction(next);341342if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)343continue;344345LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));346/* No idea why LLVM aligns allocas to 4 elements. */347unsigned alignment = LLVMGetAlignment(inst);348unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);349private_mem_vgprs += dw_size;350}351bb = LLVMGetNextBasicBlock(bb);352}353354return private_mem_vgprs;355}356357bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family,358enum ac_target_machine_options tm_options)359{360const char *triple;361memset(compiler, 0, sizeof(*compiler));362363compiler->tm = ac_create_target_machine(family, tm_options, LLVMCodeGenLevelDefault, &triple);364if (!compiler->tm)365return false;366367if (tm_options & AC_TM_CREATE_LOW_OPT) {368compiler->low_opt_tm =369ac_create_target_machine(family, tm_options, LLVMCodeGenLevelLess, NULL);370if (!compiler->low_opt_tm)371goto fail;372}373374compiler->target_library_info = ac_create_target_library_info(triple);375if (!compiler->target_library_info)376goto fail;377378compiler->passmgr =379ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR);380if (!compiler->passmgr)381goto fail;382383return true;384fail:385ac_destroy_llvm_compiler(compiler);386return false;387}388389void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)390{391ac_destroy_llvm_passes(compiler->passes);392ac_destroy_llvm_passes(compiler->low_opt_passes);393394if (compiler->passmgr)395LLVMDisposePassManager(compiler->passmgr);396if (compiler->target_library_info)397ac_dispose_target_library_info(compiler->target_library_info);398if (compiler->low_opt_tm)399LLVMDisposeTargetMachine(compiler->low_opt_tm);400if (compiler->tm)401LLVMDisposeTargetMachine(compiler->tm);402}403404405