Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/gfx10_query.c
4570 views
1
/*
2
* Copyright 2018 Advanced Micro Devices, Inc.
3
* All Rights Reserved.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24
25
#include "si_pipe.h"
26
#include "si_query.h"
27
#include "sid.h"
28
#include "util/u_memory.h"
29
#include "util/u_suballoc.h"
30
31
#include <stddef.h>
32
33
static void emit_shader_query(struct si_context *sctx)
34
{
35
assert(!list_is_empty(&sctx->shader_query_buffers));
36
37
struct gfx10_sh_query_buffer *qbuf =
38
list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
39
qbuf->head += sizeof(struct gfx10_sh_query_buffer_mem);
40
}
41
42
static void gfx10_release_query_buffers(struct si_context *sctx,
43
struct gfx10_sh_query_buffer *first,
44
struct gfx10_sh_query_buffer *last)
45
{
46
while (first) {
47
struct gfx10_sh_query_buffer *qbuf = first;
48
if (first != last)
49
first = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);
50
else
51
first = NULL;
52
53
qbuf->refcount--;
54
if (qbuf->refcount)
55
continue;
56
57
if (qbuf->list.next == &sctx->shader_query_buffers)
58
continue; /* keep the most recent buffer; it may not be full yet */
59
if (qbuf->list.prev == &sctx->shader_query_buffers)
60
continue; /* keep the oldest buffer for recycling */
61
62
list_del(&qbuf->list);
63
si_resource_reference(&qbuf->buf, NULL);
64
FREE(qbuf);
65
}
66
}
67
68
static bool gfx10_alloc_query_buffer(struct si_context *sctx)
69
{
70
if (si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query))
71
return true;
72
73
struct gfx10_sh_query_buffer *qbuf = NULL;
74
75
if (!list_is_empty(&sctx->shader_query_buffers)) {
76
qbuf = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
77
if (qbuf->head + sizeof(struct gfx10_sh_query_buffer_mem) <= qbuf->buf->b.b.width0)
78
goto success;
79
80
qbuf = list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
81
if (!qbuf->refcount &&
82
!si_cs_is_buffer_referenced(sctx, qbuf->buf->buf, RADEON_USAGE_READWRITE) &&
83
sctx->ws->buffer_wait(sctx->ws, qbuf->buf->buf, 0, RADEON_USAGE_READWRITE)) {
84
/* Can immediately re-use the oldest buffer */
85
list_del(&qbuf->list);
86
} else {
87
qbuf = NULL;
88
}
89
}
90
91
if (!qbuf) {
92
qbuf = CALLOC_STRUCT(gfx10_sh_query_buffer);
93
if (unlikely(!qbuf))
94
return false;
95
96
struct si_screen *screen = sctx->screen;
97
unsigned buf_size =
98
MAX2(sizeof(struct gfx10_sh_query_buffer_mem), screen->info.min_alloc_size);
99
qbuf->buf = si_resource(pipe_buffer_create(&screen->b, 0, PIPE_USAGE_STAGING, buf_size));
100
if (unlikely(!qbuf->buf)) {
101
FREE(qbuf);
102
return false;
103
}
104
}
105
106
/* The buffer is currently unused by the GPU. Initialize it.
107
*
108
* We need to set the high bit of all the primitive counters for
109
* compatibility with the SET_PREDICATION packet.
110
*/
111
uint64_t *results = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL,
112
PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED);
113
assert(results);
114
115
for (unsigned i = 0, e = qbuf->buf->b.b.width0 / sizeof(struct gfx10_sh_query_buffer_mem); i < e;
116
++i) {
117
for (unsigned j = 0; j < 16; ++j)
118
results[32 * i + j] = (uint64_t)1 << 63;
119
results[32 * i + 16] = 0;
120
}
121
122
list_addtail(&qbuf->list, &sctx->shader_query_buffers);
123
qbuf->head = 0;
124
qbuf->refcount = sctx->num_active_shader_queries;
125
126
success:;
127
struct pipe_shader_buffer sbuf;
128
sbuf.buffer = &qbuf->buf->b.b;
129
sbuf.buffer_offset = qbuf->head;
130
sbuf.buffer_size = sizeof(struct gfx10_sh_query_buffer_mem);
131
si_set_internal_shader_buffer(sctx, GFX10_GS_QUERY_BUF, &sbuf);
132
sctx->current_vs_state |= S_VS_STATE_STREAMOUT_QUERY_ENABLED(1);
133
134
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_query);
135
return true;
136
}
137
138
static void gfx10_sh_query_destroy(struct si_context *sctx, struct si_query *rquery)
139
{
140
struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
141
gfx10_release_query_buffers(sctx, query->first, query->last);
142
FREE(query);
143
}
144
145
static bool gfx10_sh_query_begin(struct si_context *sctx, struct si_query *rquery)
146
{
147
struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
148
149
gfx10_release_query_buffers(sctx, query->first, query->last);
150
query->first = query->last = NULL;
151
152
if (unlikely(!gfx10_alloc_query_buffer(sctx)))
153
return false;
154
155
query->first = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
156
query->first_begin = query->first->head;
157
158
sctx->num_active_shader_queries++;
159
query->first->refcount++;
160
161
return true;
162
}
163
164
static bool gfx10_sh_query_end(struct si_context *sctx, struct si_query *rquery)
165
{
166
struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
167
168
if (unlikely(!query->first))
169
return false; /* earlier out of memory error */
170
171
query->last = list_last_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
172
query->last_end = query->last->head;
173
174
/* Signal the fence of the previous chunk */
175
if (query->last_end != 0) {
176
uint64_t fence_va = query->last->buf->gpu_address;
177
fence_va += query->last_end - sizeof(struct gfx10_sh_query_buffer_mem);
178
fence_va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
179
si_cp_release_mem(sctx, &sctx->gfx_cs, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
180
EOP_INT_SEL_NONE, EOP_DATA_SEL_VALUE_32BIT, query->last->buf, fence_va,
181
0xffffffff, PIPE_QUERY_GPU_FINISHED);
182
}
183
184
sctx->num_active_shader_queries--;
185
186
if (sctx->num_active_shader_queries <= 0 || !si_is_atom_dirty(sctx, &sctx->atoms.s.shader_query)) {
187
si_set_internal_shader_buffer(sctx, GFX10_GS_QUERY_BUF, NULL);
188
sctx->current_vs_state &= C_VS_STATE_STREAMOUT_QUERY_ENABLED;
189
190
/* If a query_begin is followed by a query_end without a draw
191
* in-between, we need to clear the atom to ensure that the
192
* next query_begin will re-initialize the shader buffer. */
193
si_set_atom_dirty(sctx, &sctx->atoms.s.shader_query, false);
194
}
195
196
return true;
197
}
198
199
static void gfx10_sh_query_add_result(struct gfx10_sh_query *query,
200
struct gfx10_sh_query_buffer_mem *qmem,
201
union pipe_query_result *result)
202
{
203
static const uint64_t mask = ((uint64_t)1 << 63) - 1;
204
205
switch (query->b.type) {
206
case PIPE_QUERY_PRIMITIVES_EMITTED:
207
result->u64 += qmem->stream[query->stream].emitted_primitives & mask;
208
break;
209
case PIPE_QUERY_PRIMITIVES_GENERATED:
210
result->u64 += qmem->stream[query->stream].generated_primitives & mask;
211
break;
212
case PIPE_QUERY_SO_STATISTICS:
213
result->so_statistics.num_primitives_written +=
214
qmem->stream[query->stream].emitted_primitives & mask;
215
result->so_statistics.primitives_storage_needed +=
216
qmem->stream[query->stream].generated_primitives & mask;
217
break;
218
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
219
result->b |= qmem->stream[query->stream].emitted_primitives !=
220
qmem->stream[query->stream].generated_primitives;
221
break;
222
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
223
for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream) {
224
result->b |= qmem->stream[stream].emitted_primitives !=
225
qmem->stream[stream].generated_primitives;
226
}
227
break;
228
default:
229
assert(0);
230
}
231
}
232
233
static bool gfx10_sh_query_get_result(struct si_context *sctx, struct si_query *rquery, bool wait,
234
union pipe_query_result *result)
235
{
236
struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
237
238
util_query_clear_result(result, query->b.type);
239
240
if (unlikely(!query->first))
241
return false; /* earlier out of memory error */
242
assert(query->last);
243
244
for (struct gfx10_sh_query_buffer *qbuf = query->last;;
245
qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.prev, list)) {
246
unsigned usage = PIPE_MAP_READ | (wait ? 0 : PIPE_MAP_DONTBLOCK);
247
void *map;
248
249
if (rquery->b.flushed)
250
map = sctx->ws->buffer_map(sctx->ws, qbuf->buf->buf, NULL, usage);
251
else
252
map = si_buffer_map(sctx, qbuf->buf, usage);
253
254
if (!map)
255
return false;
256
257
unsigned results_begin = 0;
258
unsigned results_end = qbuf->head;
259
if (qbuf == query->first)
260
results_begin = query->first_begin;
261
if (qbuf == query->last)
262
results_end = query->last_end;
263
264
while (results_begin != results_end) {
265
struct gfx10_sh_query_buffer_mem *qmem = map + results_begin;
266
results_begin += sizeof(*qmem);
267
268
gfx10_sh_query_add_result(query, qmem, result);
269
}
270
271
if (qbuf == query->first)
272
break;
273
}
274
275
return true;
276
}
277
278
static void gfx10_sh_query_get_result_resource(struct si_context *sctx, struct si_query *rquery,
279
bool wait, enum pipe_query_value_type result_type,
280
int index, struct pipe_resource *resource,
281
unsigned offset)
282
{
283
struct gfx10_sh_query *query = (struct gfx10_sh_query *)rquery;
284
struct si_qbo_state saved_state = {};
285
struct pipe_resource *tmp_buffer = NULL;
286
unsigned tmp_buffer_offset = 0;
287
288
if (!sctx->sh_query_result_shader) {
289
sctx->sh_query_result_shader = gfx10_create_sh_query_result_cs(sctx);
290
if (!sctx->sh_query_result_shader)
291
return;
292
}
293
294
if (query->first != query->last) {
295
u_suballocator_alloc(&sctx->allocator_zeroed_memory, 16, 16, &tmp_buffer_offset, &tmp_buffer);
296
if (!tmp_buffer)
297
return;
298
}
299
300
si_save_qbo_state(sctx, &saved_state);
301
302
/* Pre-fill the constants configuring the shader behavior. */
303
struct {
304
uint32_t config;
305
uint32_t offset;
306
uint32_t chain;
307
uint32_t result_count;
308
} consts;
309
struct pipe_constant_buffer constant_buffer = {};
310
311
if (index >= 0) {
312
switch (query->b.type) {
313
case PIPE_QUERY_PRIMITIVES_GENERATED:
314
consts.offset = 4 * sizeof(uint64_t) * query->stream + 2 * sizeof(uint64_t);
315
consts.config = 0;
316
break;
317
case PIPE_QUERY_PRIMITIVES_EMITTED:
318
consts.offset = 4 * sizeof(uint64_t) * query->stream + 3 * sizeof(uint64_t);
319
consts.config = 0;
320
break;
321
case PIPE_QUERY_SO_STATISTICS:
322
consts.offset = sizeof(uint32_t) * (4 * index + query->stream);
323
consts.config = 0;
324
break;
325
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
326
consts.offset = 4 * sizeof(uint64_t) * query->stream;
327
consts.config = 2;
328
break;
329
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
330
consts.offset = 0;
331
consts.config = 3;
332
break;
333
default:
334
unreachable("bad query type");
335
}
336
} else {
337
/* Check result availability. */
338
consts.offset = 0;
339
consts.config = 1;
340
}
341
342
if (result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64)
343
consts.config |= 8;
344
345
constant_buffer.buffer_size = sizeof(consts);
346
constant_buffer.user_buffer = &consts;
347
348
/* Pre-fill the SSBOs and grid. */
349
struct pipe_shader_buffer ssbo[3];
350
struct pipe_grid_info grid = {};
351
352
ssbo[1].buffer = tmp_buffer;
353
ssbo[1].buffer_offset = tmp_buffer_offset;
354
ssbo[1].buffer_size = 16;
355
356
ssbo[2] = ssbo[1];
357
358
grid.block[0] = 1;
359
grid.block[1] = 1;
360
grid.block[2] = 1;
361
grid.grid[0] = 1;
362
grid.grid[1] = 1;
363
grid.grid[2] = 1;
364
365
struct gfx10_sh_query_buffer *qbuf = query->first;
366
for (;;) {
367
unsigned begin = qbuf == query->first ? query->first_begin : 0;
368
unsigned end = qbuf == query->last ? query->last_end : qbuf->buf->b.b.width0;
369
if (!end)
370
continue;
371
372
ssbo[0].buffer = &qbuf->buf->b.b;
373
ssbo[0].buffer_offset = begin;
374
ssbo[0].buffer_size = end - begin;
375
376
consts.result_count = (end - begin) / sizeof(struct gfx10_sh_query_buffer_mem);
377
consts.chain = 0;
378
if (qbuf != query->first)
379
consts.chain |= 1;
380
if (qbuf != query->last)
381
consts.chain |= 2;
382
383
if (qbuf == query->last) {
384
ssbo[2].buffer = resource;
385
ssbo[2].buffer_offset = offset;
386
ssbo[2].buffer_size = 8;
387
}
388
389
sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, false, &constant_buffer);
390
391
if (wait) {
392
uint64_t va;
393
394
/* Wait for result availability. Wait only for readiness
395
* of the last entry, since the fence writes should be
396
* serialized in the CP.
397
*/
398
va = qbuf->buf->gpu_address;
399
va += end - sizeof(struct gfx10_sh_query_buffer_mem);
400
va += offsetof(struct gfx10_sh_query_buffer_mem, fence);
401
402
si_cp_wait_mem(sctx, &sctx->gfx_cs, va, 0x00000001, 0x00000001, 0);
403
}
404
405
si_launch_grid_internal_ssbos(sctx, &grid, sctx->sh_query_result_shader,
406
SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_AFTER, SI_COHERENCY_SHADER,
407
3, ssbo, 0x6);
408
409
if (qbuf == query->last)
410
break;
411
qbuf = LIST_ENTRY(struct gfx10_sh_query_buffer, qbuf->list.next, list);
412
}
413
414
si_restore_qbo_state(sctx, &saved_state);
415
pipe_resource_reference(&tmp_buffer, NULL);
416
}
417
418
static const struct si_query_ops gfx10_sh_query_ops = {
419
.destroy = gfx10_sh_query_destroy,
420
.begin = gfx10_sh_query_begin,
421
.end = gfx10_sh_query_end,
422
.get_result = gfx10_sh_query_get_result,
423
.get_result_resource = gfx10_sh_query_get_result_resource,
424
};
425
426
struct pipe_query *gfx10_sh_query_create(struct si_screen *screen, enum pipe_query_type query_type,
427
unsigned index)
428
{
429
struct gfx10_sh_query *query = CALLOC_STRUCT(gfx10_sh_query);
430
if (unlikely(!query))
431
return NULL;
432
433
query->b.ops = &gfx10_sh_query_ops;
434
query->b.type = query_type;
435
query->stream = index;
436
437
return (struct pipe_query *)query;
438
}
439
440
void gfx10_init_query(struct si_context *sctx)
441
{
442
list_inithead(&sctx->shader_query_buffers);
443
sctx->atoms.s.shader_query.emit = emit_shader_query;
444
}
445
446
void gfx10_destroy_query(struct si_context *sctx)
447
{
448
while (!list_is_empty(&sctx->shader_query_buffers)) {
449
struct gfx10_sh_query_buffer *qbuf =
450
list_first_entry(&sctx->shader_query_buffers, struct gfx10_sh_query_buffer, list);
451
list_del(&qbuf->list);
452
453
assert(!qbuf->refcount);
454
si_resource_reference(&qbuf->buf, NULL);
455
FREE(qbuf);
456
}
457
}
458
459