Path: blob/21.2-virgl/src/gallium/frontends/clover/nir/invocation.cpp
4573 views
//1// Copyright 2019 Karol Herbst2//3// Permission is hereby granted, free of charge, to any person obtaining a4// copy of this software and associated documentation files (the "Software"),5// to deal in the Software without restriction, including without limitation6// the rights to use, copy, modify, merge, publish, distribute, sublicense,7// and/or sell copies of the Software, and to permit persons to whom the8// Software is furnished to do so, subject to the following conditions:9//10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19// OTHER DEALINGS IN THE SOFTWARE.20//2122#include "invocation.hpp"2324#include <tuple>2526#include "core/device.hpp"27#include "core/error.hpp"28#include "core/module.hpp"29#include "pipe/p_state.h"30#include "util/algorithm.hpp"31#include "util/functional.hpp"3233#include <compiler/glsl_types.h>34#include <compiler/nir/nir_builder.h>35#include <compiler/nir/nir_serialize.h>36#include <compiler/spirv/nir_spirv.h>37#include <util/u_math.h>3839using namespace clover;4041#ifdef HAVE_CLOVER_SPIRV4243// Refs and unrefs the glsl_type_singleton.44static class glsl_type_ref {45public:46glsl_type_ref() {47glsl_type_singleton_init_or_ref();48}4950~glsl_type_ref() {51glsl_type_singleton_decref();52}53} glsl_type_ref;5455static const nir_shader_compiler_options *56dev_get_nir_compiler_options(const device &dev)57{58const void *co = dev.get_compiler_options(PIPE_SHADER_IR_NIR);59return static_cast<const nir_shader_compiler_options*>(co);60}6162static void debug_function(void *private_data,63enum nir_spirv_debug_level level, size_t spirv_offset,64const char *message)65{66assert(private_data);67auto r_log = reinterpret_cast<std::string *>(private_data);68*r_log += message;69}7071static void72clover_arg_size_align(const glsl_type *type, unsigned *size, unsigned *align)73{74if (type == glsl_type::sampler_type) {75*size = 0;76*align = 1;77} else if (type->is_image()) {78*size = *align = sizeof(cl_mem);79} else {80*size = type->cl_size();81*align = type->cl_alignment();82}83}8485static bool86clover_nir_lower_images(nir_shader *shader)87{88nir_function_impl *impl = nir_shader_get_entrypoint(shader);8990ASSERTED int last_loc = -1;91int num_rd_images = 0, num_wr_images = 0, num_samplers = 0;92nir_foreach_uniform_variable(var, shader) {93if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {94/* Assume they come in order */95assert(var->data.location > last_loc);96last_loc = var->data.location;97}9899/* TODO: Constant samplers */100if (var->type == glsl_bare_sampler_type()) {101var->data.driver_location = num_samplers++;102} else if (glsl_type_is_image(var->type)) {103if (var->data.access & ACCESS_NON_WRITEABLE)104var->data.driver_location = num_rd_images++;105else106var->data.driver_location = num_wr_images++;107} else {108/* CL shouldn't have any sampled images */109assert(!glsl_type_is_sampler(var->type));110}111}112shader->info.num_textures = num_rd_images;113BITSET_ZERO(shader->info.textures_used);114if (num_rd_images)115BITSET_SET_RANGE(shader->info.textures_used, 0, num_rd_images - 1);116shader->info.num_images = num_wr_images;117118nir_builder b;119nir_builder_init(&b, impl);120121bool progress = false;122nir_foreach_block_reverse(block, impl) {123nir_foreach_instr_reverse_safe(instr, block) {124switch (instr->type) {125case nir_instr_type_deref: {126nir_deref_instr *deref = nir_instr_as_deref(instr);127if (deref->deref_type != nir_deref_type_var)128break;129130if (!glsl_type_is_image(deref->type) &&131!glsl_type_is_sampler(deref->type))132break;133134b.cursor = nir_instr_remove(&deref->instr);135nir_ssa_def *loc =136nir_imm_intN_t(&b, deref->var->data.driver_location,137deref->dest.ssa.bit_size);138nir_ssa_def_rewrite_uses(&deref->dest.ssa, loc);139progress = true;140break;141}142143case nir_instr_type_tex: {144nir_tex_instr *tex = nir_instr_as_tex(instr);145unsigned count = 0;146for (unsigned i = 0; i < tex->num_srcs; i++) {147if (tex->src[i].src_type == nir_tex_src_texture_deref ||148tex->src[i].src_type == nir_tex_src_sampler_deref) {149nir_deref_instr *deref = nir_src_as_deref(tex->src[i].src);150if (deref->deref_type == nir_deref_type_var) {151/* In this case, we know the actual variable */152if (tex->src[i].src_type == nir_tex_src_texture_deref)153tex->texture_index = deref->var->data.driver_location;154else155tex->sampler_index = deref->var->data.driver_location;156/* This source gets discarded */157nir_instr_rewrite_src(&tex->instr, &tex->src[i].src,158NIR_SRC_INIT);159continue;160} else {161assert(tex->src[i].src.is_ssa);162b.cursor = nir_before_instr(&tex->instr);163/* Back-ends expect a 32-bit thing, not 64-bit */164nir_ssa_def *offset = nir_u2u32(&b, tex->src[i].src.ssa);165if (tex->src[i].src_type == nir_tex_src_texture_deref)166tex->src[count].src_type = nir_tex_src_texture_offset;167else168tex->src[count].src_type = nir_tex_src_sampler_offset;169nir_instr_rewrite_src(&tex->instr, &tex->src[count].src,170nir_src_for_ssa(offset));171}172} else {173/* If we've removed a source, move this one down */174if (count != i) {175assert(count < i);176tex->src[count].src_type = tex->src[i].src_type;177nir_instr_move_src(&tex->instr, &tex->src[count].src,178&tex->src[i].src);179}180}181count++;182}183tex->num_srcs = count;184progress = true;185break;186}187188case nir_instr_type_intrinsic: {189nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);190switch (intrin->intrinsic) {191case nir_intrinsic_image_deref_load:192case nir_intrinsic_image_deref_store:193case nir_intrinsic_image_deref_atomic_add:194case nir_intrinsic_image_deref_atomic_imin:195case nir_intrinsic_image_deref_atomic_umin:196case nir_intrinsic_image_deref_atomic_imax:197case nir_intrinsic_image_deref_atomic_umax:198case nir_intrinsic_image_deref_atomic_and:199case nir_intrinsic_image_deref_atomic_or:200case nir_intrinsic_image_deref_atomic_xor:201case nir_intrinsic_image_deref_atomic_exchange:202case nir_intrinsic_image_deref_atomic_comp_swap:203case nir_intrinsic_image_deref_atomic_fadd:204case nir_intrinsic_image_deref_atomic_inc_wrap:205case nir_intrinsic_image_deref_atomic_dec_wrap:206case nir_intrinsic_image_deref_size:207case nir_intrinsic_image_deref_samples: {208assert(intrin->src[0].is_ssa);209b.cursor = nir_before_instr(&intrin->instr);210/* Back-ends expect a 32-bit thing, not 64-bit */211nir_ssa_def *offset = nir_u2u32(&b, intrin->src[0].ssa);212nir_rewrite_image_intrinsic(intrin, offset, false);213progress = true;214break;215}216217default:218break;219}220break;221}222223default:224break;225}226}227}228229if (progress) {230nir_metadata_preserve(impl, nir_metadata_block_index |231nir_metadata_dominance);232} else {233nir_metadata_preserve(impl, nir_metadata_all);234}235236return progress;237}238239struct clover_lower_nir_state {240std::vector<module::argument> &args;241uint32_t global_dims;242nir_variable *constant_var;243nir_variable *printf_buffer;244nir_variable *offset_vars[3];245};246247static bool248clover_lower_nir_filter(const nir_instr *instr, const void *)249{250return instr->type == nir_instr_type_intrinsic;251}252253static nir_ssa_def *254clover_lower_nir_instr(nir_builder *b, nir_instr *instr, void *_state)255{256clover_lower_nir_state *state = reinterpret_cast<clover_lower_nir_state*>(_state);257nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);258259switch (intrinsic->intrinsic) {260case nir_intrinsic_load_printf_buffer_address: {261if (!state->printf_buffer) {262unsigned location = state->args.size();263state->args.emplace_back(module::argument::global, sizeof(size_t),2648, 8, module::argument::zero_ext,265module::argument::printf_buffer);266267const glsl_type *type = glsl_uint64_t_type();268state->printf_buffer = nir_variable_create(b->shader, nir_var_uniform,269type, "global_printf_buffer");270state->printf_buffer->data.location = location;271}272return nir_load_var(b, state->printf_buffer);273}274case nir_intrinsic_load_base_global_invocation_id: {275nir_ssa_def *loads[3];276277/* create variables if we didn't do so alrady */278if (!state->offset_vars[0]) {279/* TODO: fix for 64 bit */280/* Even though we only place one scalar argument, clover will bind up to281* three 32 bit values282*/283unsigned location = state->args.size();284state->args.emplace_back(module::argument::scalar, 4, 4, 4,285module::argument::zero_ext,286module::argument::grid_offset);287288const glsl_type *type = glsl_uint_type();289for (uint32_t i = 0; i < 3; i++) {290state->offset_vars[i] =291nir_variable_create(b->shader, nir_var_uniform, type,292"global_invocation_id_offsets");293state->offset_vars[i]->data.location = location + i;294}295}296297for (int i = 0; i < 3; i++) {298nir_variable *var = state->offset_vars[i];299loads[i] = var ? nir_load_var(b, var) : nir_imm_int(b, 0);300}301302return nir_u2u(b, nir_vec(b, loads, state->global_dims),303nir_dest_bit_size(intrinsic->dest));304}305case nir_intrinsic_load_constant_base_ptr: {306return nir_load_var(b, state->constant_var);307}308309default:310return NULL;311}312}313314static bool315clover_lower_nir(nir_shader *nir, std::vector<module::argument> &args,316uint32_t dims, uint32_t pointer_bit_size)317{318nir_variable *constant_var = NULL;319if (nir->constant_data_size) {320const glsl_type *type = pointer_bit_size == 64 ? glsl_uint64_t_type() : glsl_uint_type();321322constant_var = nir_variable_create(nir, nir_var_uniform, type,323"constant_buffer_addr");324constant_var->data.location = args.size();325326args.emplace_back(module::argument::global, sizeof(cl_mem),327pointer_bit_size / 8, pointer_bit_size / 8,328module::argument::zero_ext,329module::argument::constant_buffer);330}331332clover_lower_nir_state state = { args, dims, constant_var };333return nir_shader_lower_instructions(nir,334clover_lower_nir_filter, clover_lower_nir_instr, &state);335}336337static spirv_to_nir_options338create_spirv_options(const device &dev, std::string &r_log)339{340struct spirv_to_nir_options spirv_options = {};341spirv_options.environment = NIR_SPIRV_OPENCL;342if (dev.address_bits() == 32u) {343spirv_options.shared_addr_format = nir_address_format_32bit_offset;344spirv_options.global_addr_format = nir_address_format_32bit_global;345spirv_options.temp_addr_format = nir_address_format_32bit_offset;346spirv_options.constant_addr_format = nir_address_format_32bit_global;347} else {348spirv_options.shared_addr_format = nir_address_format_32bit_offset_as_64bit;349spirv_options.global_addr_format = nir_address_format_64bit_global;350spirv_options.temp_addr_format = nir_address_format_32bit_offset_as_64bit;351spirv_options.constant_addr_format = nir_address_format_64bit_global;352}353spirv_options.caps.address = true;354spirv_options.caps.float64 = true;355spirv_options.caps.int8 = true;356spirv_options.caps.int16 = true;357spirv_options.caps.int64 = true;358spirv_options.caps.kernel = true;359spirv_options.caps.kernel_image = dev.image_support();360spirv_options.caps.int64_atomics = dev.has_int64_atomics();361spirv_options.debug.func = &debug_function;362spirv_options.debug.private_data = &r_log;363spirv_options.caps.printf = true;364return spirv_options;365}366367struct disk_cache *clover::nir::create_clc_disk_cache(void)368{369struct mesa_sha1 ctx;370unsigned char sha1[20];371char cache_id[20 * 2 + 1];372_mesa_sha1_init(&ctx);373374if (!disk_cache_get_function_identifier((void *)clover::nir::create_clc_disk_cache, &ctx))375return NULL;376377_mesa_sha1_final(&ctx, sha1);378379disk_cache_format_hex_id(cache_id, sha1, 20 * 2);380return disk_cache_create("clover-clc", cache_id, 0);381}382383void clover::nir::check_for_libclc(const device &dev)384{385if (!nir_can_find_libclc(dev.address_bits()))386throw error(CL_COMPILER_NOT_AVAILABLE);387}388389nir_shader *clover::nir::load_libclc_nir(const device &dev, std::string &r_log)390{391spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log);392auto *compiler_options = dev_get_nir_compiler_options(dev);393394return nir_load_libclc_shader(dev.address_bits(), dev.clc_cache,395&spirv_options, compiler_options);396}397398module clover::nir::spirv_to_nir(const module &mod, const device &dev,399std::string &r_log)400{401spirv_to_nir_options spirv_options = create_spirv_options(dev, r_log);402std::shared_ptr<nir_shader> nir = dev.clc_nir;403spirv_options.clc_shader = nir.get();404405module m;406// We only insert one section.407assert(mod.secs.size() == 1);408auto §ion = mod.secs[0];409410module::resource_id section_id = 0;411for (const auto &sym : mod.syms) {412assert(sym.section == 0);413414const auto *binary =415reinterpret_cast<const pipe_binary_program_header *>(section.data.data());416const uint32_t *data = reinterpret_cast<const uint32_t *>(binary->blob);417const size_t num_words = binary->num_bytes / 4;418const char *name = sym.name.c_str();419auto *compiler_options = dev_get_nir_compiler_options(dev);420421nir_shader *nir = spirv_to_nir(data, num_words, nullptr, 0,422MESA_SHADER_KERNEL, name,423&spirv_options, compiler_options);424if (!nir) {425r_log += "Translation from SPIR-V to NIR for kernel \"" + sym.name +426"\" failed.\n";427throw build_error();428}429430nir->info.workgroup_size_variable = sym.reqd_work_group_size[0] == 0;431nir->info.workgroup_size[0] = sym.reqd_work_group_size[0];432nir->info.workgroup_size[1] = sym.reqd_work_group_size[1];433nir->info.workgroup_size[2] = sym.reqd_work_group_size[2];434nir_validate_shader(nir, "clover");435436// Inline all functions first.437// according to the comment on nir_inline_functions438NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);439NIR_PASS_V(nir, nir_lower_returns);440NIR_PASS_V(nir, nir_lower_libclc, spirv_options.clc_shader);441442NIR_PASS_V(nir, nir_inline_functions);443NIR_PASS_V(nir, nir_copy_prop);444NIR_PASS_V(nir, nir_opt_deref);445446// Pick off the single entrypoint that we want.447foreach_list_typed_safe(nir_function, func, node, &nir->functions) {448if (!func->is_entrypoint)449exec_node_remove(&func->node);450}451assert(exec_list_length(&nir->functions) == 1);452453nir_validate_shader(nir, "clover after function inlining");454455NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);456457struct nir_lower_printf_options printf_options;458printf_options.treat_doubles_as_floats = false;459printf_options.max_buffer_size = dev.max_printf_buffer_size();460461NIR_PASS_V(nir, nir_lower_printf, &printf_options);462463NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);464465// copy propagate to prepare for lower_explicit_io466NIR_PASS_V(nir, nir_split_var_copies);467NIR_PASS_V(nir, nir_opt_copy_prop_vars);468NIR_PASS_V(nir, nir_lower_var_copies);469NIR_PASS_V(nir, nir_lower_vars_to_ssa);470NIR_PASS_V(nir, nir_opt_dce);471NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);472473NIR_PASS_V(nir, nir_lower_system_values);474nir_lower_compute_system_values_options sysval_options = { 0 };475sysval_options.has_base_global_invocation_id = true;476NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options);477478// constant fold before lowering mem constants479NIR_PASS_V(nir, nir_opt_constant_folding);480481NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_constant, NULL);482NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_constant,483glsl_get_cl_type_size_align);484if (nir->constant_data_size > 0) {485assert(nir->constant_data == NULL);486nir->constant_data = rzalloc_size(nir, nir->constant_data_size);487nir_gather_explicit_io_initializers(nir, nir->constant_data,488nir->constant_data_size,489nir_var_mem_constant);490}491NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,492spirv_options.constant_addr_format);493494auto args = sym.args;495NIR_PASS_V(nir, clover_lower_nir, args, dev.max_block_size().size(),496dev.address_bits());497498NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,499nir_var_uniform, clover_arg_size_align);500NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,501nir_var_mem_shared | nir_var_mem_global |502nir_var_function_temp,503glsl_get_cl_type_size_align);504505NIR_PASS_V(nir, nir_opt_deref);506NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false);507NIR_PASS_V(nir, clover_nir_lower_images);508NIR_PASS_V(nir, nir_lower_memcpy);509510/* use offsets for kernel inputs (uniform) */511NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform,512nir->info.cs.ptr_size == 64 ?513nir_address_format_32bit_offset_as_64bit :514nir_address_format_32bit_offset);515516NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant,517spirv_options.constant_addr_format);518NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared,519spirv_options.shared_addr_format);520521NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,522spirv_options.temp_addr_format);523524NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,525spirv_options.global_addr_format);526527NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_all, NULL);528529if (compiler_options->lower_int64_options)530NIR_PASS_V(nir, nir_lower_int64);531532NIR_PASS_V(nir, nir_opt_dce);533534if (nir->constant_data_size) {535const char *ptr = reinterpret_cast<const char *>(nir->constant_data);536const module::section constants {537section_id,538module::section::data_constant,539nir->constant_data_size,540{ ptr, ptr + nir->constant_data_size }541};542nir->constant_data = NULL;543nir->constant_data_size = 0;544m.secs.push_back(constants);545}546547void *mem_ctx = ralloc_context(NULL);548unsigned printf_info_count = nir->printf_info_count;549nir_printf_info *printf_infos = nir->printf_info;550551ralloc_steal(mem_ctx, printf_infos);552553struct blob blob;554blob_init(&blob);555nir_serialize(&blob, nir, false);556557ralloc_free(nir);558559const pipe_binary_program_header header { uint32_t(blob.size) };560module::section text { section_id, module::section::text_executable, header.num_bytes, {} };561text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header),562reinterpret_cast<const char *>(&header) + sizeof(header));563text.data.insert(text.data.end(), blob.data, blob.data + blob.size);564565free(blob.data);566567m.printf_strings_in_buffer = false;568m.printf_infos.reserve(printf_info_count);569for (unsigned i = 0; i < printf_info_count; i++) {570module::printf_info info;571572info.arg_sizes.reserve(printf_infos[i].num_args);573for (unsigned j = 0; j < printf_infos[i].num_args; j++)574info.arg_sizes.push_back(printf_infos[i].arg_sizes[j]);575576info.strings.resize(printf_infos[i].string_size);577memcpy(info.strings.data(), printf_infos[i].strings, printf_infos[i].string_size);578m.printf_infos.push_back(info);579}580581ralloc_free(mem_ctx);582583m.syms.emplace_back(sym.name, std::string(),584sym.reqd_work_group_size, section_id, 0, args);585m.secs.push_back(text);586section_id++;587}588return m;589}590#else591module clover::nir::spirv_to_nir(const module &mod, const device &dev, std::string &r_log)592{593r_log += "SPIR-V support in clover is not enabled.\n";594throw error(CL_LINKER_NOT_AVAILABLE);595}596#endif597598599