Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_query.c
4570 views
1
/*
2
* Copyright © 2017 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included
12
* in all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
* DEALINGS IN THE SOFTWARE.
21
*/
22
23
/**
24
* @file crocus_query.c
25
*
26
* ============================= GENXML CODE =============================
27
* [This file is compiled once per generation.]
28
* =======================================================================
29
*
30
* Query object support. This allows measuring various simple statistics
31
* via counters on the GPU. We use GenX code for MI_MATH calculations.
32
*/
33
34
#include <stdio.h>
35
#include <errno.h>
36
#include "perf/intel_perf.h"
37
#include "pipe/p_defines.h"
38
#include "pipe/p_state.h"
39
#include "pipe/p_context.h"
40
#include "pipe/p_screen.h"
41
#include "util/u_inlines.h"
42
#include "util/u_upload_mgr.h"
43
#include "crocus_context.h"
44
#include "crocus_defines.h"
45
#include "crocus_fence.h"
46
#include "crocus_monitor.h"
47
#include "crocus_resource.h"
48
#include "crocus_screen.h"
49
50
#include "crocus_genx_macros.h"
51
52
#if GFX_VER == 6
53
// TOOD: Add these to genxml?
54
#define SO_PRIM_STORAGE_NEEDED(n) (0x2280)
55
#define SO_NUM_PRIMS_WRITTEN(n) (0x2288)
56
57
// TODO: remove HS/DS/CS
58
#define GFX6_IA_VERTICES_COUNT_num 0x2310
59
#define GFX6_IA_PRIMITIVES_COUNT_num 0x2318
60
#define GFX6_VS_INVOCATION_COUNT_num 0x2320
61
#define GFX6_HS_INVOCATION_COUNT_num 0x2300
62
#define GFX6_DS_INVOCATION_COUNT_num 0x2308
63
#define GFX6_GS_INVOCATION_COUNT_num 0x2328
64
#define GFX6_GS_PRIMITIVES_COUNT_num 0x2330
65
#define GFX6_CL_INVOCATION_COUNT_num 0x2338
66
#define GFX6_CL_PRIMITIVES_COUNT_num 0x2340
67
#define GFX6_PS_INVOCATION_COUNT_num 0x2348
68
#define GFX6_CS_INVOCATION_COUNT_num 0x2290
69
#define GFX6_PS_DEPTH_COUNT_num 0x2350
70
71
#elif GFX_VER >= 7
72
#define SO_PRIM_STORAGE_NEEDED(n) (GENX(SO_PRIM_STORAGE_NEEDED0_num) + (n) * 8)
73
#define SO_NUM_PRIMS_WRITTEN(n) (GENX(SO_NUM_PRIMS_WRITTEN0_num) + (n) * 8)
74
#endif
75
76
struct crocus_query {
77
struct threaded_query b;
78
79
enum pipe_query_type type;
80
int index;
81
82
bool ready;
83
84
bool stalled;
85
86
uint64_t result;
87
88
struct crocus_state_ref query_state_ref;
89
struct crocus_query_snapshots *map;
90
struct crocus_syncobj *syncobj;
91
92
int batch_idx;
93
94
struct crocus_monitor_object *monitor;
95
96
/* Fence for PIPE_QUERY_GPU_FINISHED. */
97
struct pipe_fence_handle *fence;
98
};
99
100
struct crocus_query_snapshots {
101
/** crocus_render_condition's saved MI_PREDICATE_RESULT value. */
102
uint64_t predicate_result;
103
104
/** Have the start/end snapshots landed? */
105
uint64_t snapshots_landed;
106
107
/** Starting and ending counter snapshots */
108
uint64_t start;
109
uint64_t end;
110
};
111
112
struct crocus_query_so_overflow {
113
uint64_t predicate_result;
114
uint64_t snapshots_landed;
115
116
struct {
117
uint64_t prim_storage_needed[2];
118
uint64_t num_prims[2];
119
} stream[4];
120
};
121
122
#if GFX_VERx10 >= 75
123
static struct mi_value
124
query_mem64(struct crocus_query *q, uint32_t offset)
125
{
126
return mi_mem64(rw_bo(crocus_resource_bo(q->query_state_ref.res),
127
q->query_state_ref.offset + offset));
128
}
129
#endif
130
131
/**
132
* Is this type of query written by PIPE_CONTROL?
133
*/
134
static bool
135
crocus_is_query_pipelined(struct crocus_query *q)
136
{
137
switch (q->type) {
138
case PIPE_QUERY_OCCLUSION_COUNTER:
139
case PIPE_QUERY_OCCLUSION_PREDICATE:
140
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
141
case PIPE_QUERY_TIMESTAMP:
142
case PIPE_QUERY_TIMESTAMP_DISJOINT:
143
case PIPE_QUERY_TIME_ELAPSED:
144
return true;
145
146
default:
147
return false;
148
}
149
}
150
151
static void
152
mark_available(struct crocus_context *ice, struct crocus_query *q)
153
{
154
#if GFX_VERx10 >= 75
155
struct crocus_batch *batch = &ice->batches[q->batch_idx];
156
struct crocus_screen *screen = batch->screen;
157
unsigned flags = PIPE_CONTROL_WRITE_IMMEDIATE;
158
unsigned offset = offsetof(struct crocus_query_snapshots, snapshots_landed);
159
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
160
offset += q->query_state_ref.offset;
161
162
if (!crocus_is_query_pipelined(q)) {
163
screen->vtbl.store_data_imm64(batch, bo, offset, true);
164
} else {
165
/* Order available *after* the query results. */
166
flags |= PIPE_CONTROL_FLUSH_ENABLE;
167
crocus_emit_pipe_control_write(batch, "query: mark available",
168
flags, bo, offset, true);
169
}
170
#endif
171
}
172
173
/**
174
* Write PS_DEPTH_COUNT to q->(dest) via a PIPE_CONTROL.
175
*/
176
static void
177
crocus_pipelined_write(struct crocus_batch *batch,
178
struct crocus_query *q,
179
enum pipe_control_flags flags,
180
unsigned offset)
181
{
182
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
183
184
crocus_emit_pipe_control_write(batch, "query: pipelined snapshot write",
185
flags,
186
bo, offset, 0ull);
187
}
188
189
static void
190
write_value(struct crocus_context *ice, struct crocus_query *q, unsigned offset)
191
{
192
struct crocus_batch *batch = &ice->batches[q->batch_idx];
193
#if GFX_VER >= 6
194
struct crocus_screen *screen = batch->screen;
195
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
196
#endif
197
198
if (!crocus_is_query_pipelined(q)) {
199
crocus_emit_pipe_control_flush(batch,
200
"query: non-pipelined snapshot write",
201
PIPE_CONTROL_CS_STALL |
202
PIPE_CONTROL_STALL_AT_SCOREBOARD);
203
q->stalled = true;
204
}
205
206
switch (q->type) {
207
case PIPE_QUERY_OCCLUSION_COUNTER:
208
case PIPE_QUERY_OCCLUSION_PREDICATE:
209
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
210
crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q,
211
PIPE_CONTROL_WRITE_DEPTH_COUNT |
212
PIPE_CONTROL_DEPTH_STALL,
213
offset);
214
break;
215
case PIPE_QUERY_TIME_ELAPSED:
216
case PIPE_QUERY_TIMESTAMP:
217
case PIPE_QUERY_TIMESTAMP_DISJOINT:
218
crocus_pipelined_write(&ice->batches[CROCUS_BATCH_RENDER], q,
219
PIPE_CONTROL_WRITE_TIMESTAMP,
220
offset);
221
break;
222
case PIPE_QUERY_PRIMITIVES_GENERATED:
223
#if GFX_VER >= 6
224
screen->vtbl.store_register_mem64(batch,
225
q->index == 0 ?
226
GENX(CL_INVOCATION_COUNT_num) :
227
SO_PRIM_STORAGE_NEEDED(q->index),
228
bo, offset, false);
229
#endif
230
break;
231
case PIPE_QUERY_PRIMITIVES_EMITTED:
232
#if GFX_VER >= 6
233
screen->vtbl.store_register_mem64(batch,
234
SO_NUM_PRIMS_WRITTEN(q->index),
235
bo, offset, false);
236
#endif
237
break;
238
case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE: {
239
#if GFX_VER >= 6
240
static const uint32_t index_to_reg[] = {
241
GENX(IA_VERTICES_COUNT_num),
242
GENX(IA_PRIMITIVES_COUNT_num),
243
GENX(VS_INVOCATION_COUNT_num),
244
GENX(GS_INVOCATION_COUNT_num),
245
GENX(GS_PRIMITIVES_COUNT_num),
246
GENX(CL_INVOCATION_COUNT_num),
247
GENX(CL_PRIMITIVES_COUNT_num),
248
GENX(PS_INVOCATION_COUNT_num),
249
GENX(HS_INVOCATION_COUNT_num),
250
GENX(DS_INVOCATION_COUNT_num),
251
GENX(CS_INVOCATION_COUNT_num),
252
};
253
uint32_t reg = index_to_reg[q->index];
254
255
#if GFX_VER == 6
256
/* Gfx6 GS code counts full primitives, that is, it won't count individual
257
* triangles in a triangle strip. Use CL_INVOCATION_COUNT for that.
258
*/
259
if (q->index == PIPE_STAT_QUERY_GS_PRIMITIVES)
260
reg = GENX(CL_INVOCATION_COUNT_num);
261
#endif
262
263
screen->vtbl.store_register_mem64(batch, reg, bo, offset, false);
264
#endif
265
break;
266
}
267
default:
268
assert(false);
269
}
270
}
271
272
#if GFX_VER >= 6
273
static void
274
write_overflow_values(struct crocus_context *ice, struct crocus_query *q, bool end)
275
{
276
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
277
struct crocus_screen *screen = batch->screen;
278
uint32_t count = q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ? 1 : 4;
279
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
280
uint32_t offset = q->query_state_ref.offset;
281
crocus_emit_pipe_control_flush(batch,
282
"query: write SO overflow snapshots",
283
PIPE_CONTROL_CS_STALL |
284
PIPE_CONTROL_STALL_AT_SCOREBOARD);
285
for (uint32_t i = 0; i < count; i++) {
286
int s = q->index + i;
287
int g_idx = offset + offsetof(struct crocus_query_so_overflow,
288
stream[s].num_prims[end]);
289
int w_idx = offset + offsetof(struct crocus_query_so_overflow,
290
stream[s].prim_storage_needed[end]);
291
screen->vtbl.store_register_mem64(batch, SO_NUM_PRIMS_WRITTEN(s),
292
bo, g_idx, false);
293
screen->vtbl.store_register_mem64(batch, SO_PRIM_STORAGE_NEEDED(s),
294
bo, w_idx, false);
295
}
296
}
297
#endif
298
static uint64_t
299
crocus_raw_timestamp_delta(uint64_t time0, uint64_t time1)
300
{
301
if (time0 > time1) {
302
return (1ULL << TIMESTAMP_BITS) + time1 - time0;
303
} else {
304
return time1 - time0;
305
}
306
}
307
308
static bool
309
stream_overflowed(struct crocus_query_so_overflow *so, int s)
310
{
311
return (so->stream[s].prim_storage_needed[1] -
312
so->stream[s].prim_storage_needed[0]) !=
313
(so->stream[s].num_prims[1] - so->stream[s].num_prims[0]);
314
}
315
316
static void
317
calculate_result_on_cpu(const struct intel_device_info *devinfo,
318
struct crocus_query *q)
319
{
320
switch (q->type) {
321
case PIPE_QUERY_OCCLUSION_PREDICATE:
322
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
323
q->result = q->map->end != q->map->start;
324
break;
325
case PIPE_QUERY_TIMESTAMP:
326
case PIPE_QUERY_TIMESTAMP_DISJOINT:
327
/* The timestamp is the single starting snapshot. */
328
q->result = intel_device_info_timebase_scale(devinfo, q->map->start);
329
q->result &= (1ull << TIMESTAMP_BITS) - 1;
330
break;
331
case PIPE_QUERY_TIME_ELAPSED:
332
q->result = crocus_raw_timestamp_delta(q->map->start, q->map->end);
333
q->result = intel_device_info_timebase_scale(devinfo, q->result);
334
q->result &= (1ull << TIMESTAMP_BITS) - 1;
335
break;
336
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
337
q->result = stream_overflowed((void *) q->map, q->index);
338
break;
339
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
340
q->result = false;
341
for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
342
q->result |= stream_overflowed((void *) q->map, i);
343
break;
344
case PIPE_QUERY_PIPELINE_STATISTICS_SINGLE:
345
q->result = q->map->end - q->map->start;
346
347
/* WaDividePSInvocationCountBy4:HSW,BDW */
348
if (GFX_VERx10 >= 75 && q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
349
q->result /= 4;
350
break;
351
case PIPE_QUERY_OCCLUSION_COUNTER:
352
case PIPE_QUERY_PRIMITIVES_GENERATED:
353
case PIPE_QUERY_PRIMITIVES_EMITTED:
354
default:
355
q->result = q->map->end - q->map->start;
356
break;
357
}
358
359
q->ready = true;
360
}
361
362
#if GFX_VERx10 >= 75
363
/**
364
* Calculate the streamout overflow for stream \p idx:
365
*
366
* (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
367
*/
368
static struct mi_value
369
calc_overflow_for_stream(struct mi_builder *b,
370
struct crocus_query *q,
371
int idx)
372
{
373
#define C(counter, i) query_mem64(q, \
374
offsetof(struct crocus_query_so_overflow, stream[idx].counter[i]))
375
376
return mi_isub(b, mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
377
mi_isub(b, C(prim_storage_needed, 1),
378
C(prim_storage_needed, 0)));
379
#undef C
380
}
381
382
/**
383
* Calculate whether any stream has overflowed.
384
*/
385
static struct mi_value
386
calc_overflow_any_stream(struct mi_builder *b, struct crocus_query *q)
387
{
388
struct mi_value stream_result[MAX_VERTEX_STREAMS];
389
for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
390
stream_result[i] = calc_overflow_for_stream(b, q, i);
391
392
struct mi_value result = stream_result[0];
393
for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
394
result = mi_ior(b, result, stream_result[i]);
395
396
return result;
397
}
398
399
400
static bool
401
query_is_boolean(enum pipe_query_type type)
402
{
403
switch (type) {
404
case PIPE_QUERY_OCCLUSION_PREDICATE:
405
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
406
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
407
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
408
return true;
409
default:
410
return false;
411
}
412
}
413
414
/**
415
* Calculate the result using MI_MATH.
416
*/
417
static struct mi_value
418
calculate_result_on_gpu(const struct intel_device_info *devinfo,
419
struct mi_builder *b,
420
struct crocus_query *q)
421
{
422
struct mi_value result;
423
struct mi_value start_val =
424
query_mem64(q, offsetof(struct crocus_query_snapshots, start));
425
struct mi_value end_val =
426
query_mem64(q, offsetof(struct crocus_query_snapshots, end));
427
428
switch (q->type) {
429
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
430
result = calc_overflow_for_stream(b, q, q->index);
431
break;
432
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
433
result = calc_overflow_any_stream(b, q);
434
break;
435
case PIPE_QUERY_TIMESTAMP: {
436
/* TODO: This discards any fractional bits of the timebase scale.
437
* We would need to do a bit of fixed point math on the CS ALU, or
438
* launch an actual shader to calculate this with full precision.
439
*/
440
uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
441
result = mi_iand(b, mi_imm((1ull << 36) - 1),
442
mi_imul_imm(b, start_val, scale));
443
break;
444
}
445
case PIPE_QUERY_TIME_ELAPSED: {
446
/* TODO: This discards fractional bits (see above). */
447
uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
448
result = mi_imul_imm(b, mi_isub(b, end_val, start_val), scale);
449
break;
450
}
451
default:
452
result = mi_isub(b, end_val, start_val);
453
break;
454
}
455
/* WaDividePSInvocationCountBy4:HSW,BDW */
456
if (GFX_VERx10 >= 75 &&
457
q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
458
q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
459
result = mi_ushr32_imm(b, result, 2);
460
461
if (query_is_boolean(q->type))
462
result = mi_iand(b, mi_nz(b, result), mi_imm(1));
463
464
return result;
465
}
466
#endif
467
468
static struct pipe_query *
469
crocus_create_query(struct pipe_context *ctx,
470
unsigned query_type,
471
unsigned index)
472
{
473
struct crocus_query *q = calloc(1, sizeof(struct crocus_query));
474
475
q->type = query_type;
476
q->index = index;
477
q->monitor = NULL;
478
479
if (q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
480
q->index == PIPE_STAT_QUERY_CS_INVOCATIONS)
481
q->batch_idx = CROCUS_BATCH_COMPUTE;
482
else
483
q->batch_idx = CROCUS_BATCH_RENDER;
484
return (struct pipe_query *) q;
485
}
486
487
static struct pipe_query *
488
crocus_create_batch_query(struct pipe_context *ctx,
489
unsigned num_queries,
490
unsigned *query_types)
491
{
492
struct crocus_context *ice = (void *) ctx;
493
struct crocus_query *q = calloc(1, sizeof(struct crocus_query));
494
if (unlikely(!q))
495
return NULL;
496
q->type = PIPE_QUERY_DRIVER_SPECIFIC;
497
q->index = -1;
498
q->monitor = crocus_create_monitor_object(ice, num_queries, query_types);
499
if (unlikely(!q->monitor)) {
500
free(q);
501
return NULL;
502
}
503
504
return (struct pipe_query *) q;
505
}
506
507
static void
508
crocus_destroy_query(struct pipe_context *ctx, struct pipe_query *p_query)
509
{
510
struct crocus_query *query = (void *) p_query;
511
struct crocus_screen *screen = (void *) ctx->screen;
512
if (query->monitor) {
513
crocus_destroy_monitor_object(ctx, query->monitor);
514
query->monitor = NULL;
515
} else {
516
crocus_syncobj_reference(screen, &query->syncobj, NULL);
517
screen->base.fence_reference(ctx->screen, &query->fence, NULL);
518
}
519
free(query);
520
}
521
522
523
static bool
524
crocus_begin_query(struct pipe_context *ctx, struct pipe_query *query)
525
{
526
struct crocus_context *ice = (void *) ctx;
527
struct crocus_query *q = (void *) query;
528
529
if (q->monitor)
530
return crocus_begin_monitor(ctx, q->monitor);
531
532
void *ptr = NULL;
533
uint32_t size;
534
535
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
536
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
537
size = sizeof(struct crocus_query_so_overflow);
538
else
539
size = sizeof(struct crocus_query_snapshots);
540
541
u_upload_alloc(ice->query_buffer_uploader, 0,
542
size, size, &q->query_state_ref.offset,
543
&q->query_state_ref.res, &ptr);
544
545
if (!crocus_resource_bo(q->query_state_ref.res))
546
return false;
547
548
q->map = ptr;
549
if (!q->map)
550
return false;
551
552
q->result = 0ull;
553
q->ready = false;
554
WRITE_ONCE(q->map->snapshots_landed, false);
555
556
if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
557
ice->state.prims_generated_query_active = true;
558
ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
559
}
560
561
#if GFX_VER <= 5
562
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER ||
563
q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
564
ice->state.stats_wm++;
565
ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE;
566
}
567
#endif
568
#if GFX_VER >= 6
569
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
570
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
571
write_overflow_values(ice, q, false);
572
else
573
#endif
574
write_value(ice, q,
575
q->query_state_ref.offset +
576
offsetof(struct crocus_query_snapshots, start));
577
578
return true;
579
}
580
581
static bool
582
crocus_end_query(struct pipe_context *ctx, struct pipe_query *query)
583
{
584
struct crocus_context *ice = (void *) ctx;
585
struct crocus_query *q = (void *) query;
586
587
if (q->monitor)
588
return crocus_end_monitor(ctx, q->monitor);
589
590
if (q->type == PIPE_QUERY_GPU_FINISHED) {
591
ctx->flush(ctx, &q->fence, PIPE_FLUSH_DEFERRED);
592
return true;
593
}
594
595
struct crocus_batch *batch = &ice->batches[q->batch_idx];
596
597
if (q->type == PIPE_QUERY_TIMESTAMP) {
598
crocus_begin_query(ctx, query);
599
crocus_batch_reference_signal_syncobj(batch, &q->syncobj);
600
mark_available(ice, q);
601
return true;
602
}
603
604
#if GFX_VER <= 5
605
if (q->type == PIPE_QUERY_OCCLUSION_COUNTER ||
606
q->type == PIPE_QUERY_OCCLUSION_PREDICATE) {
607
ice->state.stats_wm--;
608
ice->state.dirty |= CROCUS_DIRTY_WM | CROCUS_DIRTY_COLOR_CALC_STATE;
609
}
610
#endif
611
if (q->type == PIPE_QUERY_PRIMITIVES_GENERATED && q->index == 0) {
612
ice->state.prims_generated_query_active = false;
613
ice->state.dirty |= CROCUS_DIRTY_STREAMOUT | CROCUS_DIRTY_CLIP;
614
}
615
616
#if GFX_VER >= 6
617
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
618
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
619
write_overflow_values(ice, q, true);
620
else
621
#endif
622
write_value(ice, q,
623
q->query_state_ref.offset +
624
offsetof(struct crocus_query_snapshots, end));
625
626
crocus_batch_reference_signal_syncobj(batch, &q->syncobj);
627
mark_available(ice, q);
628
629
return true;
630
}
631
632
/**
633
* See if the snapshots have landed for a query, and if so, compute the
634
* result and mark it ready. Does not flush (unlike crocus_get_query_result).
635
*/
636
static void
637
crocus_check_query_no_flush(struct crocus_context *ice, struct crocus_query *q)
638
{
639
struct crocus_screen *screen = (void *) ice->ctx.screen;
640
const struct intel_device_info *devinfo = &screen->devinfo;
641
642
if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
643
calculate_result_on_cpu(devinfo, q);
644
}
645
}
646
647
static bool
648
crocus_get_query_result(struct pipe_context *ctx,
649
struct pipe_query *query,
650
bool wait,
651
union pipe_query_result *result)
652
{
653
struct crocus_context *ice = (void *) ctx;
654
struct crocus_query *q = (void *) query;
655
656
if (q->monitor)
657
return crocus_get_monitor_result(ctx, q->monitor, wait, result->batch);
658
659
struct crocus_screen *screen = (void *) ctx->screen;
660
const struct intel_device_info *devinfo = &screen->devinfo;
661
662
if (unlikely(screen->no_hw)) {
663
result->u64 = 0;
664
return true;
665
}
666
667
if (!q->ready) {
668
struct crocus_batch *batch = &ice->batches[q->batch_idx];
669
if (q->syncobj == crocus_batch_get_signal_syncobj(batch))
670
crocus_batch_flush(batch);
671
672
#if GFX_VERx10 >= 75
673
while (!READ_ONCE(q->map->snapshots_landed)) {
674
if (wait)
675
crocus_wait_syncobj(ctx->screen, q->syncobj, INT64_MAX);
676
else
677
return false;
678
}
679
assert(READ_ONCE(q->map->snapshots_landed));
680
#else
681
if (crocus_wait_syncobj(ctx->screen, q->syncobj, wait ? INT64_MAX : 0))
682
return false;
683
#endif
684
calculate_result_on_cpu(devinfo, q);
685
}
686
687
assert(q->ready);
688
689
result->u64 = q->result;
690
691
return true;
692
}
693
694
#if GFX_VER >= 7
695
static void
696
crocus_get_query_result_resource(struct pipe_context *ctx,
697
struct pipe_query *query,
698
bool wait,
699
enum pipe_query_value_type result_type,
700
int index,
701
struct pipe_resource *p_res,
702
unsigned offset)
703
{
704
struct crocus_context *ice = (void *) ctx;
705
struct crocus_query *q = (void *) query;
706
struct crocus_batch *batch = &ice->batches[q->batch_idx];
707
struct crocus_screen *screen = batch->screen;
708
const struct intel_device_info *devinfo = &batch->screen->devinfo;
709
struct crocus_resource *res = (void *) p_res;
710
struct crocus_bo *query_bo = crocus_resource_bo(q->query_state_ref.res);
711
struct crocus_bo *dst_bo = crocus_resource_bo(p_res);
712
unsigned snapshots_landed_offset =
713
offsetof(struct crocus_query_snapshots, snapshots_landed);
714
715
res->bind_history |= PIPE_BIND_QUERY_BUFFER;
716
717
if (index == -1) {
718
/* They're asking for the availability of the result. If we still
719
* have commands queued up which produce the result, submit them
720
* now so that progress happens. Either way, copy the snapshots
721
* landed field to the destination resource.
722
*/
723
if (q->syncobj == crocus_batch_get_signal_syncobj(batch))
724
crocus_batch_flush(batch);
725
726
screen->vtbl.copy_mem_mem(batch, dst_bo, offset,
727
query_bo, snapshots_landed_offset,
728
result_type <= PIPE_QUERY_TYPE_U32 ? 4 : 8);
729
return;
730
}
731
732
if (!q->ready && READ_ONCE(q->map->snapshots_landed)) {
733
/* The final snapshots happen to have landed, so let's just compute
734
* the result on the CPU now...
735
*/
736
calculate_result_on_cpu(devinfo, q);
737
}
738
739
if (q->ready) {
740
/* We happen to have the result on the CPU, so just copy it. */
741
if (result_type <= PIPE_QUERY_TYPE_U32) {
742
screen->vtbl.store_data_imm32(batch, dst_bo, offset, q->result);
743
} else {
744
screen->vtbl.store_data_imm64(batch, dst_bo, offset, q->result);
745
}
746
747
/* Make sure the result lands before they use bind the QBO elsewhere
748
* and use the result.
749
*/
750
// XXX: Why? i965 doesn't do this.
751
crocus_emit_pipe_control_flush(batch,
752
"query: unknown QBO flushing hack",
753
PIPE_CONTROL_CS_STALL);
754
return;
755
}
756
757
#if GFX_VERx10 >= 75
758
bool predicated = !wait && !q->stalled;
759
760
struct mi_builder b;
761
mi_builder_init(&b, &batch->screen->devinfo, batch);
762
763
struct mi_value result = calculate_result_on_gpu(devinfo, &b, q);
764
struct mi_value dst =
765
result_type <= PIPE_QUERY_TYPE_U32 ? mi_mem32(rw_bo(dst_bo, offset))
766
: mi_mem64(rw_bo(dst_bo, offset));
767
768
if (predicated) {
769
mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
770
mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
771
mi_store_if(&b, dst, result);
772
} else {
773
mi_store(&b, dst, result);
774
}
775
#endif
776
}
777
#endif
778
779
static void
780
crocus_set_active_query_state(struct pipe_context *ctx, bool enable)
781
{
782
struct crocus_context *ice = (void *) ctx;
783
784
if (ice->state.statistics_counters_enabled == enable)
785
return;
786
787
// XXX: most packets aren't paying attention to this yet, because it'd
788
// have to be done dynamically at draw time, which is a pain
789
ice->state.statistics_counters_enabled = enable;
790
ice->state.dirty |= CROCUS_DIRTY_CLIP |
791
CROCUS_DIRTY_RASTER |
792
CROCUS_DIRTY_STREAMOUT |
793
CROCUS_DIRTY_WM;
794
ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_GS |
795
CROCUS_STAGE_DIRTY_TCS |
796
CROCUS_STAGE_DIRTY_TES |
797
CROCUS_STAGE_DIRTY_VS;
798
}
799
800
static void
801
set_predicate_enable(struct crocus_context *ice, bool value)
802
{
803
if (value)
804
ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER;
805
else
806
ice->state.predicate = CROCUS_PREDICATE_STATE_DONT_RENDER;
807
}
808
809
#if GFX_VER >= 7
810
static void
811
set_predicate_for_result(struct crocus_context *ice,
812
struct crocus_query *q,
813
bool inverted)
814
{
815
struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
816
struct crocus_bo *bo = crocus_resource_bo(q->query_state_ref.res);
817
818
#if GFX_VERx10 < 75
819
/* IVB doesn't have enough MI for this */
820
if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
821
q->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
822
ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY;
823
return;
824
}
825
#endif
826
827
/* The CPU doesn't have the query result yet; use hardware predication */
828
ice->state.predicate = CROCUS_PREDICATE_STATE_USE_BIT;
829
830
/* Ensure the memory is coherent for MI_LOAD_REGISTER_* commands. */
831
crocus_emit_pipe_control_flush(batch,
832
"conditional rendering: set predicate",
833
PIPE_CONTROL_FLUSH_ENABLE);
834
q->stalled = true;
835
836
#if GFX_VERx10 < 75
837
struct crocus_screen *screen = batch->screen;
838
screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC0, bo,
839
q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, start));
840
screen->vtbl.load_register_mem64(batch, MI_PREDICATE_SRC1, bo,
841
q->query_state_ref.offset + offsetof(struct crocus_query_snapshots, end));
842
843
uint32_t mi_predicate = MI_PREDICATE | MI_PREDICATE_COMBINEOP_SET |
844
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
845
if (inverted)
846
mi_predicate |= MI_PREDICATE_LOADOP_LOAD;
847
else
848
mi_predicate |= MI_PREDICATE_LOADOP_LOADINV;
849
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
850
#else
851
struct mi_builder b;
852
mi_builder_init(&b, &batch->screen->devinfo, batch);
853
854
struct mi_value result;
855
856
switch (q->type) {
857
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
858
result = calc_overflow_for_stream(&b, q, q->index);
859
break;
860
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
861
result = calc_overflow_any_stream(&b, q);
862
break;
863
default: {
864
/* PIPE_QUERY_OCCLUSION_* */
865
struct mi_value start =
866
query_mem64(q, offsetof(struct crocus_query_snapshots, start));
867
struct mi_value end =
868
query_mem64(q, offsetof(struct crocus_query_snapshots, end));
869
result = mi_isub(&b, end, start);
870
break;
871
}
872
}
873
874
result = inverted ? mi_z(&b, result) : mi_nz(&b, result);
875
result = mi_iand(&b, result, mi_imm(1));
876
877
/* We immediately set the predicate on the render batch, as all the
878
* counters come from 3D operations. However, we may need to predicate
879
* a compute dispatch, which executes in a different GEM context and has
880
* a different MI_PREDICATE_RESULT register. So, we save the result to
881
* memory and reload it in crocus_launch_grid.
882
*/
883
mi_value_ref(&b, result);
884
885
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), result);
886
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
887
888
unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
889
MI_PREDICATE_COMBINEOP_SET |
890
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
891
892
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
893
mi_store(&b, query_mem64(q, offsetof(struct crocus_query_snapshots,
894
predicate_result)), result);
895
#endif
896
ice->state.compute_predicate = bo;
897
}
898
#endif
899
900
static void
901
crocus_render_condition(struct pipe_context *ctx,
902
struct pipe_query *query,
903
bool condition,
904
enum pipe_render_cond_flag mode)
905
{
906
struct crocus_context *ice = (void *) ctx;
907
struct crocus_query *q = (void *) query;
908
909
/* The old condition isn't relevant; we'll update it if necessary */
910
ice->state.compute_predicate = NULL;
911
ice->condition.query = q;
912
ice->condition.condition = condition;
913
ice->condition.mode = mode;
914
915
if (!q) {
916
ice->state.predicate = CROCUS_PREDICATE_STATE_RENDER;
917
return;
918
}
919
920
crocus_check_query_no_flush(ice, q);
921
922
if (q->result || q->ready) {
923
set_predicate_enable(ice, (q->result != 0) ^ condition);
924
} else {
925
if (mode == PIPE_RENDER_COND_NO_WAIT ||
926
mode == PIPE_RENDER_COND_BY_REGION_NO_WAIT) {
927
perf_debug(&ice->dbg, "Conditional rendering demoted from "
928
"\"no wait\" to \"wait\".");
929
}
930
#if GFX_VER >= 7
931
set_predicate_for_result(ice, q, condition);
932
#else
933
ice->state.predicate = CROCUS_PREDICATE_STATE_STALL_FOR_QUERY;
934
#endif
935
}
936
}
937
938
static void
939
crocus_resolve_conditional_render(struct crocus_context *ice)
940
{
941
struct pipe_context *ctx = (void *) ice;
942
struct crocus_query *q = ice->condition.query;
943
struct pipe_query *query = (void *) q;
944
union pipe_query_result result;
945
946
if (ice->state.predicate != CROCUS_PREDICATE_STATE_USE_BIT)
947
return;
948
949
assert(q);
950
951
crocus_get_query_result(ctx, query, true, &result);
952
set_predicate_enable(ice, (q->result != 0) ^ ice->condition.condition);
953
}
954
955
#if GFX_VER >= 7
956
static void
957
crocus_emit_compute_predicate(struct crocus_batch *batch)
958
{
959
struct crocus_context *ice = batch->ice;
960
struct crocus_screen *screen = batch->screen;
961
screen->vtbl.load_register_mem32(batch, MI_PREDICATE_SRC0,
962
ice->state.compute_predicate, 0);
963
screen->vtbl.load_register_imm32(batch, MI_PREDICATE_SRC1, 0);
964
unsigned mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
965
MI_PREDICATE_COMBINEOP_SET |
966
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
967
968
crocus_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
969
}
970
#endif
971
972
void
973
genX(crocus_init_screen_query)(struct crocus_screen *screen)
974
{
975
screen->vtbl.resolve_conditional_render = crocus_resolve_conditional_render;
976
#if GFX_VER >= 7
977
screen->vtbl.emit_compute_predicate = crocus_emit_compute_predicate;
978
#endif
979
}
980
981
void
982
genX(crocus_init_query)(struct crocus_context *ice)
983
{
984
struct pipe_context *ctx = &ice->ctx;
985
986
ctx->create_query = crocus_create_query;
987
ctx->create_batch_query = crocus_create_batch_query;
988
ctx->destroy_query = crocus_destroy_query;
989
ctx->begin_query = crocus_begin_query;
990
ctx->end_query = crocus_end_query;
991
ctx->get_query_result = crocus_get_query_result;
992
#if GFX_VER >= 7
993
ctx->get_query_result_resource = crocus_get_query_result_resource;
994
#endif
995
ctx->set_active_query_state = crocus_set_active_query_state;
996
ctx->render_condition = crocus_render_condition;
997
998
}
999
1000