Path: blob/21.2-virgl/src/panfrost/lib/pan_indirect_dispatch.c
4560 views
/*1* Copyright (C) 2021 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22*/2324#include <stdio.h>25#include "pan_bo.h"26#include "pan_shader.h"27#include "pan_scoreboard.h"28#include "pan_encoder.h"29#include "pan_indirect_dispatch.h"30#include "pan_pool.h"31#include "pan_util.h"32#include "panfrost-quirks.h"33#include "compiler/nir/nir_builder.h"34#include "util/u_memory.h"35#include "util/macros.h"3637struct indirect_dispatch_inputs {38mali_ptr job;39mali_ptr indirect_dim;40mali_ptr num_wg_sysval[3];41};4243static nir_ssa_def *44get_input_data(nir_builder *b, unsigned offset, unsigned size)45{46assert(!(offset & 0x3));47assert(size && !(size & 0x3));4849return nir_load_ubo(b, 1, size,50nir_imm_int(b, 0),51nir_imm_int(b, offset),52.align_mul = 4,53.align_offset = 0,54.range_base = 0,55.range = ~0);56}5758#define get_input_field(b, name) \59get_input_data(b, offsetof(struct indirect_dispatch_inputs, name), \60sizeof(((struct indirect_dispatch_inputs *)0)->name) * 8)6162static mali_ptr63get_rsd(const struct panfrost_device *dev)64{65return dev->indirect_dispatch.descs->ptr.gpu;66}6768static mali_ptr69get_tls(const struct panfrost_device *dev)70{71return dev->indirect_dispatch.descs->ptr.gpu +72MALI_RENDERER_STATE_LENGTH;73}7475static mali_ptr76get_ubos(struct pan_pool *pool,77const struct indirect_dispatch_inputs *inputs)78{79struct panfrost_ptr inputs_buf =80pan_pool_alloc_aligned(pool, ALIGN_POT(sizeof(*inputs), 16), 16);8182memcpy(inputs_buf.cpu, inputs, sizeof(*inputs));8384struct panfrost_ptr ubos_buf =85pan_pool_alloc_desc(pool, UNIFORM_BUFFER);8687pan_pack(ubos_buf.cpu, UNIFORM_BUFFER, cfg) {88cfg.entries = DIV_ROUND_UP(sizeof(*inputs), 16);89cfg.pointer = inputs_buf.gpu;90}9192return ubos_buf.gpu;93}9495static mali_ptr96get_push_uniforms(struct pan_pool *pool,97const struct indirect_dispatch_inputs *inputs)98{99const struct panfrost_device *dev = pool->dev;100struct panfrost_ptr push_consts_buf =101pan_pool_alloc_aligned(pool,102ALIGN(dev->indirect_dispatch.push.count * 4, 16),10316);104uint32_t *out = push_consts_buf.cpu;105uint8_t *in = (uint8_t *)inputs;106107for (unsigned i = 0; i < dev->indirect_dispatch.push.count; ++i)108memcpy(out + i, in + dev->indirect_dispatch.push.words[i].offset, 4);109110return push_consts_buf.gpu;111}112113unsigned114pan_indirect_dispatch_emit(struct pan_pool *pool,115struct pan_scoreboard *scoreboard,116const struct pan_indirect_dispatch_info *dispatch_info)117{118struct panfrost_device *dev = pool->dev;119struct panfrost_ptr job =120pan_pool_alloc_desc(pool, COMPUTE_JOB);121void *invocation =122pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);123struct indirect_dispatch_inputs inputs = {124.job = dispatch_info->job,125.indirect_dim = dispatch_info->indirect_dim,126.num_wg_sysval = {127dispatch_info->num_wg_sysval[0],128dispatch_info->num_wg_sysval[1],129dispatch_info->num_wg_sysval[2],130},131};132133panfrost_pack_work_groups_compute(invocation,1341, 1, 1, 1, 1, 1,135false, false);136137pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {138cfg.job_task_split = 2;139}140141pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {142cfg.draw_descriptor_is_64b = true;143cfg.texture_descriptor_is_64b = !pan_is_bifrost(dev);144cfg.state = get_rsd(dev);145cfg.thread_storage = get_tls(pool->dev);146cfg.uniform_buffers = get_ubos(pool, &inputs);147cfg.push_uniforms = get_push_uniforms(pool, &inputs);148}149150pan_section_pack(job.cpu, COMPUTE_JOB, DRAW_PADDING, cfg);151152return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,153false, true, 0, 0, &job, false);154}155156void157pan_indirect_dispatch_init(struct panfrost_device *dev)158{159nir_builder b =160nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,161pan_shader_get_compiler_options(dev),162"%s", "indirect_dispatch");163b.shader->info.internal = true;164nir_variable_create(b.shader, nir_var_mem_ubo,165glsl_uint_type(), "inputs");166b.shader->info.num_ubos++;167168nir_ssa_def *zero = nir_imm_int(&b, 0);169nir_ssa_def *one = nir_imm_int(&b, 1);170nir_ssa_def *num_wg = nir_load_global(&b, get_input_field(&b, indirect_dim), 4, 3, 32);171nir_ssa_def *num_wg_x = nir_channel(&b, num_wg, 0);172nir_ssa_def *num_wg_y = nir_channel(&b, num_wg, 1);173nir_ssa_def *num_wg_z = nir_channel(&b, num_wg, 2);174175nir_ssa_def *job_hdr_ptr = get_input_field(&b, job);176nir_ssa_def *num_wg_flat = nir_imul(&b, num_wg_x, nir_imul(&b, num_wg_y, num_wg_z));177178nir_push_if(&b, nir_ieq(&b, num_wg_flat, zero));179{180nir_ssa_def *type_ptr = nir_iadd(&b, job_hdr_ptr, nir_imm_int64(&b, 4 * 4));181nir_ssa_def *ntype = nir_imm_intN_t(&b, (MALI_JOB_TYPE_NULL << 1) | 1, 8);182nir_store_global(&b, type_ptr, 1, ntype, 1);183}184nir_push_else(&b, NULL);185{186nir_ssa_def *job_dim_ptr = nir_iadd(&b, job_hdr_ptr,187nir_imm_int64(&b, pan_section_offset(COMPUTE_JOB, INVOCATION)));188nir_ssa_def *num_wg_x_m1 = nir_isub(&b, num_wg_x, one);189nir_ssa_def *num_wg_y_m1 = nir_isub(&b, num_wg_y, one);190nir_ssa_def *num_wg_z_m1 = nir_isub(&b, num_wg_z, one);191nir_ssa_def *job_dim = nir_load_global(&b, job_dim_ptr, 8, 2, 32);192nir_ssa_def *dims = nir_channel(&b, job_dim, 0);193nir_ssa_def *split = nir_channel(&b, job_dim, 1);194nir_ssa_def *num_wg_x_split = nir_iand_imm(&b, nir_ushr_imm(&b, split, 10), 0x3f);195nir_ssa_def *num_wg_y_split = nir_iadd(&b, num_wg_x_split,196nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_x_m1)));197nir_ssa_def *num_wg_z_split = nir_iadd(&b, num_wg_y_split,198nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_y_m1)));199split = nir_ior(&b, split,200nir_ior(&b,201nir_ishl(&b, num_wg_y_split, nir_imm_int(&b, 16)),202nir_ishl(&b, num_wg_z_split, nir_imm_int(&b, 22))));203dims = nir_ior(&b, dims,204nir_ior(&b, nir_ishl(&b, num_wg_x_m1, num_wg_x_split),205nir_ior(&b, nir_ishl(&b, num_wg_y_m1, num_wg_y_split),206nir_ishl(&b, num_wg_z_m1, num_wg_z_split))));207208nir_store_global(&b, job_dim_ptr, 8, nir_vec2(&b, dims, split), 3);209210nir_ssa_def *num_wg_x_ptr = get_input_field(&b, num_wg_sysval[0]);211212nir_push_if(&b, nir_ine(&b, num_wg_x_ptr, nir_imm_int64(&b, 0)));213{214nir_store_global(&b, num_wg_x_ptr, 8, num_wg_x, 1);215nir_store_global(&b, get_input_field(&b, num_wg_sysval[1]), 8, num_wg_y, 1);216nir_store_global(&b, get_input_field(&b, num_wg_sysval[2]), 8, num_wg_z, 1);217}218nir_pop_if(&b, NULL);219}220221nir_pop_if(&b, NULL);222223struct panfrost_compile_inputs inputs = { .gpu_id = dev->gpu_id };224struct pan_shader_info shader_info;225struct util_dynarray binary;226227util_dynarray_init(&binary, NULL);228pan_shader_compile(dev, b.shader, &inputs, &binary, &shader_info);229230ralloc_free(b.shader);231232assert(!shader_info.tls_size);233assert(!shader_info.wls_size);234assert(!shader_info.sysvals.sysval_count);235236dev->indirect_dispatch.bin =237panfrost_bo_create(dev, binary.size, PAN_BO_EXECUTE,238"Indirect dispatch shader");239240memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size);241util_dynarray_fini(&binary);242243dev->indirect_dispatch.push = shader_info.push;244dev->indirect_dispatch.descs =245panfrost_bo_create(dev,246MALI_RENDERER_STATE_LENGTH +247MALI_LOCAL_STORAGE_LENGTH,2480, "Indirect dispatch descriptors");249250mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu;251if (!pan_is_bifrost(dev))252address |= shader_info.midgard.first_tag;253254void *rsd = dev->indirect_dispatch.descs->ptr.cpu;255pan_pack(rsd, RENDERER_STATE, cfg) {256pan_shader_prepare_rsd(dev, &shader_info, address, &cfg);257}258259void *tsd = dev->indirect_dispatch.descs->ptr.cpu +260MALI_RENDERER_STATE_LENGTH;261pan_pack(tsd, LOCAL_STORAGE, ls) {262ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;263};264}265266void267pan_indirect_dispatch_cleanup(struct panfrost_device *dev)268{269panfrost_bo_unreference(dev->indirect_dispatch.bin);270panfrost_bo_unreference(dev->indirect_dispatch.descs);271}272273274