Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/r600/r600_perfcounter.c
4570 views
1
/*
2
* Copyright 2015 Advanced Micro Devices, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors:
24
* Nicolai Hähnle <[email protected]>
25
*
26
*/
27
28
#include "util/u_memory.h"
29
#include "r600_query.h"
30
#include "r600_pipe_common.h"
31
#include "r600d_common.h"
32
33
/* Max counters per HW block */
34
#define R600_QUERY_MAX_COUNTERS 16
35
36
static struct r600_perfcounter_block *
37
lookup_counter(struct r600_perfcounters *pc, unsigned index,
38
unsigned *base_gid, unsigned *sub_index)
39
{
40
struct r600_perfcounter_block *block = pc->blocks;
41
unsigned bid;
42
43
*base_gid = 0;
44
for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
45
unsigned total = block->num_groups * block->num_selectors;
46
47
if (index < total) {
48
*sub_index = index;
49
return block;
50
}
51
52
index -= total;
53
*base_gid += block->num_groups;
54
}
55
56
return NULL;
57
}
58
59
static struct r600_perfcounter_block *
60
lookup_group(struct r600_perfcounters *pc, unsigned *index)
61
{
62
unsigned bid;
63
struct r600_perfcounter_block *block = pc->blocks;
64
65
for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
66
if (*index < block->num_groups)
67
return block;
68
*index -= block->num_groups;
69
}
70
71
return NULL;
72
}
73
74
struct r600_pc_group {
75
struct r600_pc_group *next;
76
struct r600_perfcounter_block *block;
77
unsigned sub_gid; /* only used during init */
78
unsigned result_base; /* only used during init */
79
int se;
80
int instance;
81
unsigned num_counters;
82
unsigned selectors[R600_QUERY_MAX_COUNTERS];
83
};
84
85
struct r600_pc_counter {
86
unsigned base;
87
unsigned qwords;
88
unsigned stride; /* in uint64s */
89
};
90
91
#define R600_PC_SHADERS_WINDOWING (1 << 31)
92
93
struct r600_query_pc {
94
struct r600_query_hw b;
95
96
unsigned shaders;
97
unsigned num_counters;
98
struct r600_pc_counter *counters;
99
struct r600_pc_group *groups;
100
};
101
102
static void r600_pc_query_destroy(struct r600_common_screen *rscreen,
103
struct r600_query *rquery)
104
{
105
struct r600_query_pc *query = (struct r600_query_pc *)rquery;
106
107
while (query->groups) {
108
struct r600_pc_group *group = query->groups;
109
query->groups = group->next;
110
FREE(group);
111
}
112
113
FREE(query->counters);
114
115
r600_query_hw_destroy(rscreen, rquery);
116
}
117
118
static bool r600_pc_query_prepare_buffer(struct r600_common_screen *screen,
119
struct r600_query_hw *hwquery,
120
struct r600_resource *buffer)
121
{
122
/* no-op */
123
return true;
124
}
125
126
static void r600_pc_query_emit_start(struct r600_common_context *ctx,
127
struct r600_query_hw *hwquery,
128
struct r600_resource *buffer, uint64_t va)
129
{
130
struct r600_perfcounters *pc = ctx->screen->perfcounters;
131
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
132
struct r600_pc_group *group;
133
int current_se = -1;
134
int current_instance = -1;
135
136
if (query->shaders)
137
pc->emit_shaders(ctx, query->shaders);
138
139
for (group = query->groups; group; group = group->next) {
140
struct r600_perfcounter_block *block = group->block;
141
142
if (group->se != current_se || group->instance != current_instance) {
143
current_se = group->se;
144
current_instance = group->instance;
145
pc->emit_instance(ctx, group->se, group->instance);
146
}
147
148
pc->emit_select(ctx, block, group->num_counters, group->selectors);
149
}
150
151
if (current_se != -1 || current_instance != -1)
152
pc->emit_instance(ctx, -1, -1);
153
154
pc->emit_start(ctx, buffer, va);
155
}
156
157
static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
158
struct r600_query_hw *hwquery,
159
struct r600_resource *buffer, uint64_t va)
160
{
161
struct r600_perfcounters *pc = ctx->screen->perfcounters;
162
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
163
struct r600_pc_group *group;
164
165
pc->emit_stop(ctx, buffer, va);
166
167
for (group = query->groups; group; group = group->next) {
168
struct r600_perfcounter_block *block = group->block;
169
unsigned se = group->se >= 0 ? group->se : 0;
170
unsigned se_end = se + 1;
171
172
if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
173
se_end = ctx->screen->info.max_se;
174
175
do {
176
unsigned instance = group->instance >= 0 ? group->instance : 0;
177
178
do {
179
pc->emit_instance(ctx, se, instance);
180
pc->emit_read(ctx, block,
181
group->num_counters, group->selectors,
182
buffer, va);
183
va += sizeof(uint64_t) * group->num_counters;
184
} while (group->instance < 0 && ++instance < block->num_instances);
185
} while (++se < se_end);
186
}
187
188
pc->emit_instance(ctx, -1, -1);
189
}
190
191
static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
192
union pipe_query_result *result)
193
{
194
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
195
196
memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
197
}
198
199
static void r600_pc_query_add_result(struct r600_common_screen *rscreen,
200
struct r600_query_hw *hwquery,
201
void *buffer,
202
union pipe_query_result *result)
203
{
204
struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
205
uint64_t *results = buffer;
206
unsigned i, j;
207
208
for (i = 0; i < query->num_counters; ++i) {
209
struct r600_pc_counter *counter = &query->counters[i];
210
211
for (j = 0; j < counter->qwords; ++j) {
212
uint32_t value = results[counter->base + j * counter->stride];
213
result->batch[i].u64 += value;
214
}
215
}
216
}
217
218
static struct r600_query_ops batch_query_ops = {
219
.destroy = r600_pc_query_destroy,
220
.begin = r600_query_hw_begin,
221
.end = r600_query_hw_end,
222
.get_result = r600_query_hw_get_result
223
};
224
225
static struct r600_query_hw_ops batch_query_hw_ops = {
226
.prepare_buffer = r600_pc_query_prepare_buffer,
227
.emit_start = r600_pc_query_emit_start,
228
.emit_stop = r600_pc_query_emit_stop,
229
.clear_result = r600_pc_query_clear_result,
230
.add_result = r600_pc_query_add_result,
231
};
232
233
static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
234
struct r600_query_pc *query,
235
struct r600_perfcounter_block *block,
236
unsigned sub_gid)
237
{
238
struct r600_pc_group *group = query->groups;
239
240
while (group) {
241
if (group->block == block && group->sub_gid == sub_gid)
242
return group;
243
group = group->next;
244
}
245
246
group = CALLOC_STRUCT(r600_pc_group);
247
if (!group)
248
return NULL;
249
250
group->block = block;
251
group->sub_gid = sub_gid;
252
253
if (block->flags & R600_PC_BLOCK_SHADER) {
254
unsigned sub_gids = block->num_instances;
255
unsigned shader_id;
256
unsigned shaders;
257
unsigned query_shaders;
258
259
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
260
sub_gids = sub_gids * screen->info.max_se;
261
shader_id = sub_gid / sub_gids;
262
sub_gid = sub_gid % sub_gids;
263
264
shaders = screen->perfcounters->shader_type_bits[shader_id];
265
266
query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
267
if (query_shaders && query_shaders != shaders) {
268
fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
269
FREE(group);
270
return NULL;
271
}
272
query->shaders = shaders;
273
}
274
275
if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
276
// A non-zero value in query->shaders ensures that the shader
277
// masking is reset unless the user explicitly requests one.
278
query->shaders = R600_PC_SHADERS_WINDOWING;
279
}
280
281
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
282
group->se = sub_gid / block->num_instances;
283
sub_gid = sub_gid % block->num_instances;
284
} else {
285
group->se = -1;
286
}
287
288
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
289
group->instance = sub_gid;
290
} else {
291
group->instance = -1;
292
}
293
294
group->next = query->groups;
295
query->groups = group;
296
297
return group;
298
}
299
300
struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
301
unsigned num_queries,
302
unsigned *query_types)
303
{
304
struct r600_common_screen *screen =
305
(struct r600_common_screen *)ctx->screen;
306
struct r600_perfcounters *pc = screen->perfcounters;
307
struct r600_perfcounter_block *block;
308
struct r600_pc_group *group;
309
struct r600_query_pc *query;
310
unsigned base_gid, sub_gid, sub_index;
311
unsigned i, j;
312
313
if (!pc)
314
return NULL;
315
316
query = CALLOC_STRUCT(r600_query_pc);
317
if (!query)
318
return NULL;
319
320
query->b.b.ops = &batch_query_ops;
321
query->b.ops = &batch_query_hw_ops;
322
323
query->num_counters = num_queries;
324
325
/* Collect selectors per group */
326
for (i = 0; i < num_queries; ++i) {
327
unsigned sub_gid;
328
329
if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
330
goto error;
331
332
block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
333
&base_gid, &sub_index);
334
if (!block)
335
goto error;
336
337
sub_gid = sub_index / block->num_selectors;
338
sub_index = sub_index % block->num_selectors;
339
340
group = get_group_state(screen, query, block, sub_gid);
341
if (!group)
342
goto error;
343
344
if (group->num_counters >= block->num_counters) {
345
fprintf(stderr,
346
"perfcounter group %s: too many selected\n",
347
block->basename);
348
goto error;
349
}
350
group->selectors[group->num_counters] = sub_index;
351
++group->num_counters;
352
}
353
354
/* Compute result bases and CS size per group */
355
query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
356
query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
357
358
query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
359
query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
360
361
i = 0;
362
for (group = query->groups; group; group = group->next) {
363
struct r600_perfcounter_block *block = group->block;
364
unsigned select_dw, read_dw;
365
unsigned instances = 1;
366
367
if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
368
instances = screen->info.max_se;
369
if (group->instance < 0)
370
instances *= block->num_instances;
371
372
group->result_base = i;
373
query->b.result_size += sizeof(uint64_t) * instances * group->num_counters;
374
i += instances * group->num_counters;
375
376
pc->get_size(block, group->num_counters, group->selectors,
377
&select_dw, &read_dw);
378
query->b.num_cs_dw_begin += select_dw;
379
query->b.num_cs_dw_end += instances * read_dw;
380
query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
381
query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
382
}
383
384
if (query->shaders) {
385
if (query->shaders == R600_PC_SHADERS_WINDOWING)
386
query->shaders = 0xffffffff;
387
query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
388
}
389
390
/* Map user-supplied query array to result indices */
391
query->counters = CALLOC(num_queries, sizeof(*query->counters));
392
for (i = 0; i < num_queries; ++i) {
393
struct r600_pc_counter *counter = &query->counters[i];
394
struct r600_perfcounter_block *block;
395
396
block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
397
&base_gid, &sub_index);
398
399
sub_gid = sub_index / block->num_selectors;
400
sub_index = sub_index % block->num_selectors;
401
402
group = get_group_state(screen, query, block, sub_gid);
403
assert(group != NULL);
404
405
for (j = 0; j < group->num_counters; ++j) {
406
if (group->selectors[j] == sub_index)
407
break;
408
}
409
410
counter->base = group->result_base + j;
411
counter->stride = group->num_counters;
412
413
counter->qwords = 1;
414
if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
415
counter->qwords = screen->info.max_se;
416
if (group->instance < 0)
417
counter->qwords *= block->num_instances;
418
}
419
420
if (!r600_query_hw_init(screen, &query->b))
421
goto error;
422
423
return (struct pipe_query *)query;
424
425
error:
426
r600_pc_query_destroy(screen, &query->b.b);
427
return NULL;
428
}
429
430
static bool r600_init_block_names(struct r600_common_screen *screen,
431
struct r600_perfcounter_block *block)
432
{
433
unsigned i, j, k;
434
unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
435
unsigned namelen;
436
char *groupname;
437
char *p;
438
439
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
440
groups_instance = block->num_instances;
441
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
442
groups_se = screen->info.max_se;
443
if (block->flags & R600_PC_BLOCK_SHADER)
444
groups_shader = screen->perfcounters->num_shader_types;
445
446
namelen = strlen(block->basename);
447
block->group_name_stride = namelen + 1;
448
if (block->flags & R600_PC_BLOCK_SHADER)
449
block->group_name_stride += 3;
450
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
451
assert(groups_se <= 10);
452
block->group_name_stride += 1;
453
454
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
455
block->group_name_stride += 1;
456
}
457
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
458
assert(groups_instance <= 100);
459
block->group_name_stride += 2;
460
}
461
462
block->group_names = MALLOC(block->num_groups * block->group_name_stride);
463
if (!block->group_names)
464
return false;
465
466
groupname = block->group_names;
467
for (i = 0; i < groups_shader; ++i) {
468
const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
469
unsigned shaderlen = strlen(shader_suffix);
470
for (j = 0; j < groups_se; ++j) {
471
for (k = 0; k < groups_instance; ++k) {
472
strcpy(groupname, block->basename);
473
p = groupname + namelen;
474
475
if (block->flags & R600_PC_BLOCK_SHADER) {
476
strcpy(p, shader_suffix);
477
p += shaderlen;
478
}
479
480
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
481
p += sprintf(p, "%d", j);
482
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
483
*p++ = '_';
484
}
485
486
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
487
p += sprintf(p, "%d", k);
488
489
groupname += block->group_name_stride;
490
}
491
}
492
}
493
494
assert(block->num_selectors <= 1000);
495
block->selector_name_stride = block->group_name_stride + 4;
496
block->selector_names = MALLOC(block->num_groups * block->num_selectors *
497
block->selector_name_stride);
498
if (!block->selector_names)
499
return false;
500
501
groupname = block->group_names;
502
p = block->selector_names;
503
for (i = 0; i < block->num_groups; ++i) {
504
for (j = 0; j < block->num_selectors; ++j) {
505
sprintf(p, "%s_%03d", groupname, j);
506
p += block->selector_name_stride;
507
}
508
groupname += block->group_name_stride;
509
}
510
511
return true;
512
}
513
514
int r600_get_perfcounter_info(struct r600_common_screen *screen,
515
unsigned index,
516
struct pipe_driver_query_info *info)
517
{
518
struct r600_perfcounters *pc = screen->perfcounters;
519
struct r600_perfcounter_block *block;
520
unsigned base_gid, sub;
521
522
if (!pc)
523
return 0;
524
525
if (!info) {
526
unsigned bid, num_queries = 0;
527
528
for (bid = 0; bid < pc->num_blocks; ++bid) {
529
num_queries += pc->blocks[bid].num_selectors *
530
pc->blocks[bid].num_groups;
531
}
532
533
return num_queries;
534
}
535
536
block = lookup_counter(pc, index, &base_gid, &sub);
537
if (!block)
538
return 0;
539
540
if (!block->selector_names) {
541
if (!r600_init_block_names(screen, block))
542
return 0;
543
}
544
info->name = block->selector_names + sub * block->selector_name_stride;
545
info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
546
info->max_value.u64 = 0;
547
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
548
info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;
549
info->group_id = base_gid + sub / block->num_selectors;
550
info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
551
if (sub > 0 && sub + 1 < block->num_selectors * block->num_groups)
552
info->flags |= PIPE_DRIVER_QUERY_FLAG_DONT_LIST;
553
return 1;
554
}
555
556
int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
557
unsigned index,
558
struct pipe_driver_query_group_info *info)
559
{
560
struct r600_perfcounters *pc = screen->perfcounters;
561
struct r600_perfcounter_block *block;
562
563
if (!pc)
564
return 0;
565
566
if (!info)
567
return pc->num_groups;
568
569
block = lookup_group(pc, &index);
570
if (!block)
571
return 0;
572
573
if (!block->group_names) {
574
if (!r600_init_block_names(screen, block))
575
return 0;
576
}
577
info->name = block->group_names + index * block->group_name_stride;
578
info->num_queries = block->num_selectors;
579
info->max_active_queries = block->num_counters;
580
return 1;
581
}
582
583
void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
584
{
585
if (rscreen->perfcounters)
586
rscreen->perfcounters->cleanup(rscreen);
587
}
588
589
bool r600_perfcounters_init(struct r600_perfcounters *pc,
590
unsigned num_blocks)
591
{
592
pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
593
if (!pc->blocks)
594
return false;
595
596
pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", false);
597
pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", false);
598
599
return true;
600
}
601
602
void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
603
struct r600_perfcounters *pc,
604
const char *name, unsigned flags,
605
unsigned counters, unsigned selectors,
606
unsigned instances, void *data)
607
{
608
struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
609
610
assert(counters <= R600_QUERY_MAX_COUNTERS);
611
612
block->basename = name;
613
block->flags = flags;
614
block->num_counters = counters;
615
block->num_selectors = selectors;
616
block->num_instances = MAX2(instances, 1);
617
block->data = data;
618
619
if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
620
block->flags |= R600_PC_BLOCK_SE_GROUPS;
621
if (pc->separate_instance && block->num_instances > 1)
622
block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
623
624
if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
625
block->num_groups = block->num_instances;
626
} else {
627
block->num_groups = 1;
628
}
629
630
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
631
block->num_groups *= rscreen->info.max_se;
632
if (block->flags & R600_PC_BLOCK_SHADER)
633
block->num_groups *= pc->num_shader_types;
634
635
++pc->num_blocks;
636
pc->num_groups += block->num_groups;
637
}
638
639
void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
640
{
641
unsigned i;
642
643
for (i = 0; i < pc->num_blocks; ++i) {
644
FREE(pc->blocks[i].group_names);
645
FREE(pc->blocks[i].selector_names);
646
}
647
FREE(pc->blocks);
648
FREE(pc);
649
}
650
651