Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/panfrost/lib/pan_indirect_dispatch.c
4560 views
1
/*
2
* Copyright (C) 2021 Collabora, Ltd.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
*/
24
25
#include <stdio.h>
26
#include "pan_bo.h"
27
#include "pan_shader.h"
28
#include "pan_scoreboard.h"
29
#include "pan_encoder.h"
30
#include "pan_indirect_dispatch.h"
31
#include "pan_pool.h"
32
#include "pan_util.h"
33
#include "panfrost-quirks.h"
34
#include "compiler/nir/nir_builder.h"
35
#include "util/u_memory.h"
36
#include "util/macros.h"
37
38
struct indirect_dispatch_inputs {
39
mali_ptr job;
40
mali_ptr indirect_dim;
41
mali_ptr num_wg_sysval[3];
42
};
43
44
static nir_ssa_def *
45
get_input_data(nir_builder *b, unsigned offset, unsigned size)
46
{
47
assert(!(offset & 0x3));
48
assert(size && !(size & 0x3));
49
50
return nir_load_ubo(b, 1, size,
51
nir_imm_int(b, 0),
52
nir_imm_int(b, offset),
53
.align_mul = 4,
54
.align_offset = 0,
55
.range_base = 0,
56
.range = ~0);
57
}
58
59
#define get_input_field(b, name) \
60
get_input_data(b, offsetof(struct indirect_dispatch_inputs, name), \
61
sizeof(((struct indirect_dispatch_inputs *)0)->name) * 8)
62
63
static mali_ptr
64
get_rsd(const struct panfrost_device *dev)
65
{
66
return dev->indirect_dispatch.descs->ptr.gpu;
67
}
68
69
static mali_ptr
70
get_tls(const struct panfrost_device *dev)
71
{
72
return dev->indirect_dispatch.descs->ptr.gpu +
73
MALI_RENDERER_STATE_LENGTH;
74
}
75
76
static mali_ptr
77
get_ubos(struct pan_pool *pool,
78
const struct indirect_dispatch_inputs *inputs)
79
{
80
struct panfrost_ptr inputs_buf =
81
pan_pool_alloc_aligned(pool, ALIGN_POT(sizeof(*inputs), 16), 16);
82
83
memcpy(inputs_buf.cpu, inputs, sizeof(*inputs));
84
85
struct panfrost_ptr ubos_buf =
86
pan_pool_alloc_desc(pool, UNIFORM_BUFFER);
87
88
pan_pack(ubos_buf.cpu, UNIFORM_BUFFER, cfg) {
89
cfg.entries = DIV_ROUND_UP(sizeof(*inputs), 16);
90
cfg.pointer = inputs_buf.gpu;
91
}
92
93
return ubos_buf.gpu;
94
}
95
96
static mali_ptr
97
get_push_uniforms(struct pan_pool *pool,
98
const struct indirect_dispatch_inputs *inputs)
99
{
100
const struct panfrost_device *dev = pool->dev;
101
struct panfrost_ptr push_consts_buf =
102
pan_pool_alloc_aligned(pool,
103
ALIGN(dev->indirect_dispatch.push.count * 4, 16),
104
16);
105
uint32_t *out = push_consts_buf.cpu;
106
uint8_t *in = (uint8_t *)inputs;
107
108
for (unsigned i = 0; i < dev->indirect_dispatch.push.count; ++i)
109
memcpy(out + i, in + dev->indirect_dispatch.push.words[i].offset, 4);
110
111
return push_consts_buf.gpu;
112
}
113
114
unsigned
115
pan_indirect_dispatch_emit(struct pan_pool *pool,
116
struct pan_scoreboard *scoreboard,
117
const struct pan_indirect_dispatch_info *dispatch_info)
118
{
119
struct panfrost_device *dev = pool->dev;
120
struct panfrost_ptr job =
121
pan_pool_alloc_desc(pool, COMPUTE_JOB);
122
void *invocation =
123
pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION);
124
struct indirect_dispatch_inputs inputs = {
125
.job = dispatch_info->job,
126
.indirect_dim = dispatch_info->indirect_dim,
127
.num_wg_sysval = {
128
dispatch_info->num_wg_sysval[0],
129
dispatch_info->num_wg_sysval[1],
130
dispatch_info->num_wg_sysval[2],
131
},
132
};
133
134
panfrost_pack_work_groups_compute(invocation,
135
1, 1, 1, 1, 1, 1,
136
false, false);
137
138
pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
139
cfg.job_task_split = 2;
140
}
141
142
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
143
cfg.draw_descriptor_is_64b = true;
144
cfg.texture_descriptor_is_64b = !pan_is_bifrost(dev);
145
cfg.state = get_rsd(dev);
146
cfg.thread_storage = get_tls(pool->dev);
147
cfg.uniform_buffers = get_ubos(pool, &inputs);
148
cfg.push_uniforms = get_push_uniforms(pool, &inputs);
149
}
150
151
pan_section_pack(job.cpu, COMPUTE_JOB, DRAW_PADDING, cfg);
152
153
return panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
154
false, true, 0, 0, &job, false);
155
}
156
157
void
158
pan_indirect_dispatch_init(struct panfrost_device *dev)
159
{
160
nir_builder b =
161
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
162
pan_shader_get_compiler_options(dev),
163
"%s", "indirect_dispatch");
164
b.shader->info.internal = true;
165
nir_variable_create(b.shader, nir_var_mem_ubo,
166
glsl_uint_type(), "inputs");
167
b.shader->info.num_ubos++;
168
169
nir_ssa_def *zero = nir_imm_int(&b, 0);
170
nir_ssa_def *one = nir_imm_int(&b, 1);
171
nir_ssa_def *num_wg = nir_load_global(&b, get_input_field(&b, indirect_dim), 4, 3, 32);
172
nir_ssa_def *num_wg_x = nir_channel(&b, num_wg, 0);
173
nir_ssa_def *num_wg_y = nir_channel(&b, num_wg, 1);
174
nir_ssa_def *num_wg_z = nir_channel(&b, num_wg, 2);
175
176
nir_ssa_def *job_hdr_ptr = get_input_field(&b, job);
177
nir_ssa_def *num_wg_flat = nir_imul(&b, num_wg_x, nir_imul(&b, num_wg_y, num_wg_z));
178
179
nir_push_if(&b, nir_ieq(&b, num_wg_flat, zero));
180
{
181
nir_ssa_def *type_ptr = nir_iadd(&b, job_hdr_ptr, nir_imm_int64(&b, 4 * 4));
182
nir_ssa_def *ntype = nir_imm_intN_t(&b, (MALI_JOB_TYPE_NULL << 1) | 1, 8);
183
nir_store_global(&b, type_ptr, 1, ntype, 1);
184
}
185
nir_push_else(&b, NULL);
186
{
187
nir_ssa_def *job_dim_ptr = nir_iadd(&b, job_hdr_ptr,
188
nir_imm_int64(&b, pan_section_offset(COMPUTE_JOB, INVOCATION)));
189
nir_ssa_def *num_wg_x_m1 = nir_isub(&b, num_wg_x, one);
190
nir_ssa_def *num_wg_y_m1 = nir_isub(&b, num_wg_y, one);
191
nir_ssa_def *num_wg_z_m1 = nir_isub(&b, num_wg_z, one);
192
nir_ssa_def *job_dim = nir_load_global(&b, job_dim_ptr, 8, 2, 32);
193
nir_ssa_def *dims = nir_channel(&b, job_dim, 0);
194
nir_ssa_def *split = nir_channel(&b, job_dim, 1);
195
nir_ssa_def *num_wg_x_split = nir_iand_imm(&b, nir_ushr_imm(&b, split, 10), 0x3f);
196
nir_ssa_def *num_wg_y_split = nir_iadd(&b, num_wg_x_split,
197
nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_x_m1)));
198
nir_ssa_def *num_wg_z_split = nir_iadd(&b, num_wg_y_split,
199
nir_isub_imm(&b, 32, nir_uclz(&b, num_wg_y_m1)));
200
split = nir_ior(&b, split,
201
nir_ior(&b,
202
nir_ishl(&b, num_wg_y_split, nir_imm_int(&b, 16)),
203
nir_ishl(&b, num_wg_z_split, nir_imm_int(&b, 22))));
204
dims = nir_ior(&b, dims,
205
nir_ior(&b, nir_ishl(&b, num_wg_x_m1, num_wg_x_split),
206
nir_ior(&b, nir_ishl(&b, num_wg_y_m1, num_wg_y_split),
207
nir_ishl(&b, num_wg_z_m1, num_wg_z_split))));
208
209
nir_store_global(&b, job_dim_ptr, 8, nir_vec2(&b, dims, split), 3);
210
211
nir_ssa_def *num_wg_x_ptr = get_input_field(&b, num_wg_sysval[0]);
212
213
nir_push_if(&b, nir_ine(&b, num_wg_x_ptr, nir_imm_int64(&b, 0)));
214
{
215
nir_store_global(&b, num_wg_x_ptr, 8, num_wg_x, 1);
216
nir_store_global(&b, get_input_field(&b, num_wg_sysval[1]), 8, num_wg_y, 1);
217
nir_store_global(&b, get_input_field(&b, num_wg_sysval[2]), 8, num_wg_z, 1);
218
}
219
nir_pop_if(&b, NULL);
220
}
221
222
nir_pop_if(&b, NULL);
223
224
struct panfrost_compile_inputs inputs = { .gpu_id = dev->gpu_id };
225
struct pan_shader_info shader_info;
226
struct util_dynarray binary;
227
228
util_dynarray_init(&binary, NULL);
229
pan_shader_compile(dev, b.shader, &inputs, &binary, &shader_info);
230
231
ralloc_free(b.shader);
232
233
assert(!shader_info.tls_size);
234
assert(!shader_info.wls_size);
235
assert(!shader_info.sysvals.sysval_count);
236
237
dev->indirect_dispatch.bin =
238
panfrost_bo_create(dev, binary.size, PAN_BO_EXECUTE,
239
"Indirect dispatch shader");
240
241
memcpy(dev->indirect_dispatch.bin->ptr.cpu, binary.data, binary.size);
242
util_dynarray_fini(&binary);
243
244
dev->indirect_dispatch.push = shader_info.push;
245
dev->indirect_dispatch.descs =
246
panfrost_bo_create(dev,
247
MALI_RENDERER_STATE_LENGTH +
248
MALI_LOCAL_STORAGE_LENGTH,
249
0, "Indirect dispatch descriptors");
250
251
mali_ptr address = dev->indirect_dispatch.bin->ptr.gpu;
252
if (!pan_is_bifrost(dev))
253
address |= shader_info.midgard.first_tag;
254
255
void *rsd = dev->indirect_dispatch.descs->ptr.cpu;
256
pan_pack(rsd, RENDERER_STATE, cfg) {
257
pan_shader_prepare_rsd(dev, &shader_info, address, &cfg);
258
}
259
260
void *tsd = dev->indirect_dispatch.descs->ptr.cpu +
261
MALI_RENDERER_STATE_LENGTH;
262
pan_pack(tsd, LOCAL_STORAGE, ls) {
263
ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
264
};
265
}
266
267
void
268
pan_indirect_dispatch_cleanup(struct panfrost_device *dev)
269
{
270
panfrost_bo_unreference(dev->indirect_dispatch.bin);
271
panfrost_bo_unreference(dev->indirect_dispatch.descs);
272
}
273
274