Path: blob/21.2-virgl/src/gallium/drivers/softpipe/sp_compute.c
4570 views
/*1* Copyright 2016 Red Hat.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* on the rights to use, copy, modify, merge, publish, distribute, sub7* license, and/or sell copies of the Software, and to permit persons to whom8* the Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,18* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR19* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE20* USE OR OTHER DEALINGS IN THE SOFTWARE.21*/22#include "util/u_inlines.h"23#include "util/u_math.h"24#include "util/u_memory.h"25#include "util/u_pstipple.h"26#include "pipe/p_shader_tokens.h"27#include "draw/draw_context.h"28#include "draw/draw_vertex.h"29#include "sp_context.h"30#include "sp_screen.h"31#include "sp_state.h"32#include "sp_texture.h"33#include "sp_tex_sample.h"34#include "sp_tex_tile_cache.h"35#include "tgsi/tgsi_parse.h"3637static void38cs_prepare(const struct sp_compute_shader *cs,39struct tgsi_exec_machine *machine,40int w, int h, int d,41int g_w, int g_h, int g_d,42int b_w, int b_h, int b_d,43struct tgsi_sampler *sampler,44struct tgsi_image *image,45struct tgsi_buffer *buffer )46{47int j;48/*49* Bind tokens/shader to the interpreter's machine state.50*/51tgsi_exec_machine_bind_shader(machine,52cs->tokens,53sampler, image, buffer);5455if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {56unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];57for (j = 0; j < TGSI_QUAD_SIZE; j++) {58machine->SystemValue[i].xyzw[0].i[j] = w;59machine->SystemValue[i].xyzw[1].i[j] = h;60machine->SystemValue[i].xyzw[2].i[j] = d;61}62}6364if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {65unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];66for (j = 0; j < TGSI_QUAD_SIZE; j++) {67machine->SystemValue[i].xyzw[0].i[j] = g_w;68machine->SystemValue[i].xyzw[1].i[j] = g_h;69machine->SystemValue[i].xyzw[2].i[j] = g_d;70}71}7273if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {74unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];75for (j = 0; j < TGSI_QUAD_SIZE; j++) {76machine->SystemValue[i].xyzw[0].i[j] = b_w;77machine->SystemValue[i].xyzw[1].i[j] = b_h;78machine->SystemValue[i].xyzw[2].i[j] = b_d;79}80}81}8283static bool84cs_run(const struct sp_compute_shader *cs,85int g_w, int g_h, int g_d,86struct tgsi_exec_machine *machine, bool restart)87{88if (!restart) {89if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {90unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];91int j;92for (j = 0; j < TGSI_QUAD_SIZE; j++) {93machine->SystemValue[i].xyzw[0].i[j] = g_w;94machine->SystemValue[i].xyzw[1].i[j] = g_h;95machine->SystemValue[i].xyzw[2].i[j] = g_d;96}97}98machine->NonHelperMask = (1 << 1) - 1;99}100101tgsi_exec_machine_run(machine, restart ? machine->pc : 0);102103if (machine->pc != -1)104return true;105return false;106}107108static void109run_workgroup(const struct sp_compute_shader *cs,110int g_w, int g_h, int g_d, int num_threads,111struct tgsi_exec_machine **machines)112{113int i;114bool grp_hit_barrier, restart_threads = false;115116do {117grp_hit_barrier = false;118for (i = 0; i < num_threads; i++) {119grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);120}121restart_threads = false;122if (grp_hit_barrier) {123grp_hit_barrier = false;124restart_threads = true;125}126} while (restart_threads);127}128129static void130cs_delete(const struct sp_compute_shader *cs,131struct tgsi_exec_machine *machine)132{133if (machine->Tokens == cs->tokens) {134tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);135}136}137138static void139fill_grid_size(struct pipe_context *context,140const struct pipe_grid_info *info,141uint32_t grid_size[3])142{143struct pipe_transfer *transfer;144uint32_t *params;145if (!info->indirect) {146grid_size[0] = info->grid[0];147grid_size[1] = info->grid[1];148grid_size[2] = info->grid[2];149return;150}151params = pipe_buffer_map_range(context, info->indirect,152info->indirect_offset,1533 * sizeof(uint32_t),154PIPE_MAP_READ,155&transfer);156157if (!transfer)158return;159160grid_size[0] = params[0];161grid_size[1] = params[1];162grid_size[2] = params[2];163pipe_buffer_unmap(context, transfer);164}165166void167softpipe_launch_grid(struct pipe_context *context,168const struct pipe_grid_info *info)169{170struct softpipe_context *softpipe = softpipe_context(context);171struct sp_compute_shader *cs = softpipe->cs;172int num_threads_in_group;173struct tgsi_exec_machine **machines;174int bwidth, bheight, bdepth;175int w, h, d, i;176int g_w, g_h, g_d;177uint32_t grid_size[3] = {0};178void *local_mem = NULL;179180softpipe_update_compute_samplers(softpipe);181bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];182bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];183bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];184num_threads_in_group = bwidth * bheight * bdepth;185186fill_grid_size(context, info, grid_size);187188if (cs->shader.req_local_mem) {189local_mem = CALLOC(1, cs->shader.req_local_mem);190}191192machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);193if (!machines) {194FREE(local_mem);195return;196}197198/* initialise machines + GRID_SIZE + THREAD_ID + BLOCK_SIZE */199for (d = 0; d < bdepth; d++) {200for (h = 0; h < bheight; h++) {201for (w = 0; w < bwidth; w++) {202int idx = w + (h * bwidth) + (d * bheight * bwidth);203machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);204205machines[idx]->LocalMem = local_mem;206machines[idx]->LocalMemSize = cs->shader.req_local_mem;207cs_prepare(cs, machines[idx],208w, h, d,209grid_size[0], grid_size[1], grid_size[2],210bwidth, bheight, bdepth,211(struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],212(struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],213(struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);214tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,215softpipe->mapped_constants[PIPE_SHADER_COMPUTE],216softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);217}218}219}220221for (g_d = 0; g_d < grid_size[2]; g_d++) {222for (g_h = 0; g_h < grid_size[1]; g_h++) {223for (g_w = 0; g_w < grid_size[0]; g_w++) {224run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);225}226}227}228229if (softpipe->active_statistics_queries) {230softpipe->pipeline_statistics.cs_invocations +=231grid_size[0] * grid_size[1] * grid_size[2];232}233234for (i = 0; i < num_threads_in_group; i++) {235cs_delete(cs, machines[i]);236tgsi_exec_machine_destroy(machines[i]);237}238239FREE(local_mem);240FREE(machines);241}242243244