CoCalc -- sp_compute.c

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/softpipe/sp_compute.c
⁷³⁰³ views
1
/*
2
 * Copyright 2016 Red Hat.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 */
23
#include "util/u_inlines.h"
24
#include "util/u_math.h"
25
#include "util/u_memory.h"
26
#include "util/u_pstipple.h"
27
#include "pipe/p_shader_tokens.h"
28
#include "draw/draw_context.h"
29
#include "draw/draw_vertex.h"
30
#include "sp_context.h"
31
#include "sp_screen.h"
32
#include "sp_state.h"
33
#include "sp_texture.h"
34
#include "sp_tex_sample.h"
35
#include "sp_tex_tile_cache.h"
36
#include "tgsi/tgsi_parse.h"
37

38
static void
39
cs_prepare(const struct sp_compute_shader *cs,
40
           struct tgsi_exec_machine *machine,
41
           int w, int h, int d,
42
           int g_w, int g_h, int g_d,
43
           int b_w, int b_h, int b_d,
44
           struct tgsi_sampler *sampler,
45
           struct tgsi_image *image,
46
           struct tgsi_buffer *buffer )
47
{
48
   int j;
49
   /*
50
    * Bind tokens/shader to the interpreter's machine state.
51
    */
52
   tgsi_exec_machine_bind_shader(machine,
53
                                 cs->tokens,
54
                                 sampler, image, buffer);
55

56
   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
57
      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
58
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
59
         machine->SystemValue[i].xyzw[0].i[j] = w;
60
         machine->SystemValue[i].xyzw[1].i[j] = h;
61
         machine->SystemValue[i].xyzw[2].i[j] = d;
62
      }
63
   }
64

65
   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
66
      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
67
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
68
         machine->SystemValue[i].xyzw[0].i[j] = g_w;
69
         machine->SystemValue[i].xyzw[1].i[j] = g_h;
70
         machine->SystemValue[i].xyzw[2].i[j] = g_d;
71
      }
72
   }
73

74
   if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
75
      unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
76
      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
77
         machine->SystemValue[i].xyzw[0].i[j] = b_w;
78
         machine->SystemValue[i].xyzw[1].i[j] = b_h;
79
         machine->SystemValue[i].xyzw[2].i[j] = b_d;
80
      }
81
   }
82
}
83

84
static bool
85
cs_run(const struct sp_compute_shader *cs,
86
       int g_w, int g_h, int g_d,
87
       struct tgsi_exec_machine *machine, bool restart)
88
{
89
   if (!restart) {
90
      if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
91
         unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
92
         int j;
93
         for (j = 0; j < TGSI_QUAD_SIZE; j++) {
94
            machine->SystemValue[i].xyzw[0].i[j] = g_w;
95
            machine->SystemValue[i].xyzw[1].i[j] = g_h;
96
            machine->SystemValue[i].xyzw[2].i[j] = g_d;
97
         }
98
      }
99
      machine->NonHelperMask = (1 << 1) - 1;
100
   }
101

102
   tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
103

104
   if (machine->pc != -1)
105
      return true;
106
   return false;
107
}
108

109
static void
110
run_workgroup(const struct sp_compute_shader *cs,
111
              int g_w, int g_h, int g_d, int num_threads,
112
              struct tgsi_exec_machine **machines)
113
{
114
   int i;
115
   bool grp_hit_barrier, restart_threads = false;
116

117
   do {
118
      grp_hit_barrier = false;
119
      for (i = 0; i < num_threads; i++) {
120
         grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
121
      }
122
      restart_threads = false;
123
      if (grp_hit_barrier) {
124
         grp_hit_barrier = false;
125
         restart_threads = true;
126
      }
127
   } while (restart_threads);
128
}
129

130
static void
131
cs_delete(const struct sp_compute_shader *cs,
132
          struct tgsi_exec_machine *machine)
133
{
134
   if (machine->Tokens == cs->tokens) {
135
      tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
136
   }
137
}
138

139
static void
140
fill_grid_size(struct pipe_context *context,
141
               const struct pipe_grid_info *info,
142
               uint32_t grid_size[3])
143
{
144
   struct pipe_transfer *transfer;
145
   uint32_t *params;
146
   if (!info->indirect) {
147
      grid_size[0] = info->grid[0];
148
      grid_size[1] = info->grid[1];
149
      grid_size[2] = info->grid[2];
150
      return;
151
   }
152
   params = pipe_buffer_map_range(context, info->indirect,
153
                                  info->indirect_offset,
154
                                  3 * sizeof(uint32_t),
155
                                  PIPE_MAP_READ,
156
                                  &transfer);
157

158
   if (!transfer)
159
      return;
160

161
   grid_size[0] = params[0];
162
   grid_size[1] = params[1];
163
   grid_size[2] = params[2];
164
   pipe_buffer_unmap(context, transfer);
165
}
166

167
void
168
softpipe_launch_grid(struct pipe_context *context,
169
                     const struct pipe_grid_info *info)
170
{
171
   struct softpipe_context *softpipe = softpipe_context(context);
172
   struct sp_compute_shader *cs = softpipe->cs;
173
   int num_threads_in_group;
174
   struct tgsi_exec_machine **machines;
175
   int bwidth, bheight, bdepth;
176
   int w, h, d, i;
177
   int g_w, g_h, g_d;
178
   uint32_t grid_size[3] = {0};
179
   void *local_mem = NULL;
180

181
   softpipe_update_compute_samplers(softpipe);
182
   bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
183
   bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
184
   bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
185
   num_threads_in_group = bwidth * bheight * bdepth;
186

187
   fill_grid_size(context, info, grid_size);
188

189
   if (cs->shader.req_local_mem) {
190
      local_mem = CALLOC(1, cs->shader.req_local_mem);
191
   }
192

193
   machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
194
   if (!machines) {
195
      FREE(local_mem);
196
      return;
197
   }
198

199
   /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
200
   for (d = 0; d < bdepth; d++) {
201
      for (h = 0; h < bheight; h++) {
202
         for (w = 0; w < bwidth; w++) {
203
            int idx = w + (h * bwidth) + (d * bheight * bwidth);
204
            machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
205

206
            machines[idx]->LocalMem = local_mem;
207
            machines[idx]->LocalMemSize = cs->shader.req_local_mem;
208
            cs_prepare(cs, machines[idx],
209
                       w, h, d,
210
                       grid_size[0], grid_size[1], grid_size[2],
211
                       bwidth, bheight, bdepth,
212
                       (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
213
                       (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
214
                       (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
215
            tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
216
                                           softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
217
                                           softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
218
         }
219
      }
220
   }
221

222
   for (g_d = 0; g_d < grid_size[2]; g_d++) {
223
      for (g_h = 0; g_h < grid_size[1]; g_h++) {
224
         for (g_w = 0; g_w < grid_size[0]; g_w++) {
225
            run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
226
         }
227
      }
228
   }
229

230
   if (softpipe->active_statistics_queries) {
231
      softpipe->pipeline_statistics.cs_invocations +=
232
          grid_size[0] * grid_size[1] * grid_size[2];
233
   }
234

235
   for (i = 0; i < num_threads_in_group; i++) {
236
      cs_delete(cs, machines[i]);
237
      tgsi_exec_machine_destroy(machines[i]);
238
   }
239

240
   FREE(local_mem);
241
   FREE(machines);
242
}
243

244
Product

Resources

Company