Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
4574 views
1
/*
2
* Copyright 2015 Samuel Pitoiset
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*/
22
23
#include "nvc0/nvc0_context.h"
24
#include "nvc0/nvc0_query_hw_metric.h"
25
#include "nvc0/nvc0_query_hw_sm.h"
26
27
#define _Q(i,n,t,d) { NVC0_HW_METRIC_QUERY_##i, n, PIPE_DRIVER_QUERY_TYPE_##t, d }
28
static const struct nvc0_hw_metric_cfg {
29
unsigned id;
30
const char *name;
31
enum pipe_driver_query_type type;
32
const char *desc;
33
} nvc0_hw_metric_queries[] = {
34
_Q(ACHIEVED_OCCUPANCY,
35
"metric-achieved_occupancy",
36
PERCENTAGE,
37
"Ratio of the average active warps per active cycle to the maximum "
38
"number of warps supported on a multiprocessor"),
39
40
_Q(BRANCH_EFFICIENCY,
41
"metric-branch_efficiency",
42
PERCENTAGE,
43
"Ratio of non-divergent branches to total branches"),
44
45
_Q(INST_ISSUED,
46
"metric-inst_issued",
47
UINT64,
48
"The number of instructions issued"),
49
50
_Q(INST_PER_WRAP,
51
"metric-inst_per_wrap",
52
UINT64,
53
"Average number of instructions executed by each warp"),
54
55
_Q(INST_REPLAY_OVERHEAD,
56
"metric-inst_replay_overhead",
57
UINT64,
58
"Average number of replays for each instruction executed"),
59
60
_Q(ISSUED_IPC,
61
"metric-issued_ipc",
62
UINT64,
63
"Instructions issued per cycle"),
64
65
_Q(ISSUE_SLOTS,
66
"metric-issue_slots",
67
UINT64,
68
"The number of issue slots used"),
69
70
_Q(ISSUE_SLOT_UTILIZATION,
71
"metric-issue_slot_utilization",
72
PERCENTAGE,
73
"Percentage of issue slots that issued at least one instruction, "
74
"averaged across all cycles"),
75
76
_Q(IPC,
77
"metric-ipc",
78
UINT64,
79
"Instructions executed per cycle"),
80
81
_Q(SHARED_REPLAY_OVERHEAD,
82
"metric-shared_replay_overhead",
83
UINT64,
84
"Average number of replays due to shared memory conflicts for each "
85
"instruction executed"),
86
87
_Q(WARP_EXECUTION_EFFICIENCY,
88
"metric-warp_execution_efficiency",
89
PERCENTAGE,
90
"Ratio of the average active threads per warp to the maximum number of "
91
"threads per warp supported on a multiprocessor"),
92
93
_Q(WARP_NONPRED_EXECUTION_EFFICIENCY,
94
"metric-warp_nonpred_execution_efficiency",
95
PERCENTAGE,
96
"Ratio of the average active threads per warp executing non-predicated "
97
"instructions to the maximum number of threads per warp supported on a "
98
"multiprocessor"),
99
};
100
101
#undef _Q
102
103
static inline const struct nvc0_hw_metric_cfg *
104
nvc0_hw_metric_get_cfg(unsigned metric_id)
105
{
106
unsigned i;
107
108
for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) {
109
if (nvc0_hw_metric_queries[i].id == metric_id)
110
return &nvc0_hw_metric_queries[i];
111
}
112
assert(0);
113
return NULL;
114
}
115
116
struct nvc0_hw_metric_query_cfg {
117
unsigned type;
118
uint32_t queries[8];
119
uint32_t num_queries;
120
};
121
122
#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
123
124
/* ==== Compute capability 2.0 (GF100/GF110) ==== */
125
static const struct nvc0_hw_metric_query_cfg
126
sm20_achieved_occupancy =
127
{
128
.type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
129
.queries[0] = _SM(ACTIVE_WARPS),
130
.queries[1] = _SM(ACTIVE_CYCLES),
131
.num_queries = 2,
132
};
133
134
static const struct nvc0_hw_metric_query_cfg
135
sm20_branch_efficiency =
136
{
137
.type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
138
.queries[0] = _SM(BRANCH),
139
.queries[1] = _SM(DIVERGENT_BRANCH),
140
.num_queries = 2,
141
};
142
143
static const struct nvc0_hw_metric_query_cfg
144
sm20_inst_per_wrap =
145
{
146
.type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
147
.queries[0] = _SM(INST_EXECUTED),
148
.queries[1] = _SM(WARPS_LAUNCHED),
149
.num_queries = 2,
150
};
151
152
static const struct nvc0_hw_metric_query_cfg
153
sm20_inst_replay_overhead =
154
{
155
.type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
156
.queries[0] = _SM(INST_ISSUED),
157
.queries[1] = _SM(INST_EXECUTED),
158
.num_queries = 2,
159
};
160
161
static const struct nvc0_hw_metric_query_cfg
162
sm20_issued_ipc =
163
{
164
.type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
165
.queries[0] = _SM(INST_ISSUED),
166
.queries[1] = _SM(ACTIVE_CYCLES),
167
.num_queries = 2,
168
};
169
170
static const struct nvc0_hw_metric_query_cfg
171
sm20_issue_slot_utilization =
172
{
173
.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
174
.queries[0] = _SM(INST_ISSUED),
175
.queries[1] = _SM(ACTIVE_CYCLES),
176
.num_queries = 2,
177
};
178
179
static const struct nvc0_hw_metric_query_cfg
180
sm20_ipc =
181
{
182
.type = NVC0_HW_METRIC_QUERY_IPC,
183
.queries[0] = _SM(INST_EXECUTED),
184
.queries[1] = _SM(ACTIVE_CYCLES),
185
.num_queries = 2,
186
};
187
188
static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
189
{
190
&sm20_achieved_occupancy,
191
&sm20_branch_efficiency,
192
&sm20_inst_per_wrap,
193
&sm20_inst_replay_overhead,
194
&sm20_ipc,
195
&sm20_issued_ipc,
196
&sm20_issue_slot_utilization,
197
};
198
199
/* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
200
static const struct nvc0_hw_metric_query_cfg
201
sm21_inst_issued =
202
{
203
.type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
204
.queries[0] = _SM(INST_ISSUED1_0),
205
.queries[1] = _SM(INST_ISSUED1_1),
206
.queries[2] = _SM(INST_ISSUED2_0),
207
.queries[3] = _SM(INST_ISSUED2_1),
208
.num_queries = 4,
209
};
210
211
static const struct nvc0_hw_metric_query_cfg
212
sm21_inst_replay_overhead =
213
{
214
.type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
215
.queries[0] = _SM(INST_ISSUED1_0),
216
.queries[1] = _SM(INST_ISSUED1_1),
217
.queries[2] = _SM(INST_ISSUED2_0),
218
.queries[3] = _SM(INST_ISSUED2_1),
219
.queries[4] = _SM(INST_EXECUTED),
220
.num_queries = 5,
221
};
222
223
static const struct nvc0_hw_metric_query_cfg
224
sm21_issued_ipc =
225
{
226
.type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
227
.queries[0] = _SM(INST_ISSUED1_0),
228
.queries[1] = _SM(INST_ISSUED1_1),
229
.queries[2] = _SM(INST_ISSUED2_0),
230
.queries[3] = _SM(INST_ISSUED2_1),
231
.queries[4] = _SM(ACTIVE_CYCLES),
232
.num_queries = 5,
233
};
234
235
static const struct nvc0_hw_metric_query_cfg
236
sm21_issue_slots =
237
{
238
.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
239
.queries[0] = _SM(INST_ISSUED1_0),
240
.queries[1] = _SM(INST_ISSUED1_1),
241
.queries[2] = _SM(INST_ISSUED2_0),
242
.queries[3] = _SM(INST_ISSUED2_1),
243
.num_queries = 4,
244
};
245
246
static const struct nvc0_hw_metric_query_cfg
247
sm21_issue_slot_utilization =
248
{
249
.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
250
.queries[0] = _SM(INST_ISSUED1_0),
251
.queries[1] = _SM(INST_ISSUED1_1),
252
.queries[2] = _SM(INST_ISSUED2_0),
253
.queries[3] = _SM(INST_ISSUED2_1),
254
.queries[4] = _SM(ACTIVE_CYCLES),
255
.num_queries = 5,
256
};
257
258
static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
259
{
260
&sm20_achieved_occupancy,
261
&sm20_branch_efficiency,
262
&sm21_inst_issued,
263
&sm20_inst_per_wrap,
264
&sm21_inst_replay_overhead,
265
&sm20_ipc,
266
&sm21_issued_ipc,
267
&sm21_issue_slots,
268
&sm21_issue_slot_utilization,
269
};
270
271
/* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */
272
static const struct nvc0_hw_metric_query_cfg
273
sm30_inst_issued =
274
{
275
.type = NVC0_HW_METRIC_QUERY_INST_ISSUED,
276
.queries[0] = _SM(INST_ISSUED1),
277
.queries[1] = _SM(INST_ISSUED2),
278
.num_queries = 2,
279
};
280
281
static const struct nvc0_hw_metric_query_cfg
282
sm30_inst_replay_overhead =
283
{
284
.type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
285
.queries[0] = _SM(INST_ISSUED1),
286
.queries[1] = _SM(INST_ISSUED2),
287
.queries[2] = _SM(INST_EXECUTED),
288
.num_queries = 3,
289
};
290
291
static const struct nvc0_hw_metric_query_cfg
292
sm30_issued_ipc =
293
{
294
.type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,
295
.queries[0] = _SM(INST_ISSUED1),
296
.queries[1] = _SM(INST_ISSUED2),
297
.queries[2] = _SM(ACTIVE_CYCLES),
298
.num_queries = 3,
299
};
300
301
static const struct nvc0_hw_metric_query_cfg
302
sm30_issue_slots =
303
{
304
.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,
305
.queries[0] = _SM(INST_ISSUED1),
306
.queries[1] = _SM(INST_ISSUED2),
307
.num_queries = 2,
308
};
309
310
static const struct nvc0_hw_metric_query_cfg
311
sm30_issue_slot_utilization =
312
{
313
.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,
314
.queries[0] = _SM(INST_ISSUED1),
315
.queries[1] = _SM(INST_ISSUED2),
316
.queries[2] = _SM(ACTIVE_CYCLES),
317
.num_queries = 3,
318
};
319
320
static const struct nvc0_hw_metric_query_cfg
321
sm30_shared_replay_overhead =
322
{
323
.type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
324
.queries[0] = _SM(SHARED_LD_REPLAY),
325
.queries[1] = _SM(SHARED_ST_REPLAY),
326
.queries[2] = _SM(INST_EXECUTED),
327
.num_queries = 3,
328
};
329
330
static const struct nvc0_hw_metric_query_cfg
331
sm30_warp_execution_efficiency =
332
{
333
.type = NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY,
334
.queries[0] = _SM(INST_EXECUTED),
335
.queries[1] = _SM(TH_INST_EXECUTED),
336
.num_queries = 2,
337
};
338
339
static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
340
{
341
&sm20_achieved_occupancy,
342
&sm20_branch_efficiency,
343
&sm30_inst_issued,
344
&sm20_inst_per_wrap,
345
&sm30_inst_replay_overhead,
346
&sm20_ipc,
347
&sm30_issued_ipc,
348
&sm30_issue_slots,
349
&sm30_issue_slot_utilization,
350
&sm30_shared_replay_overhead,
351
&sm30_warp_execution_efficiency,
352
};
353
354
/* ==== Compute capability 3.5 (GK110/GK208) ==== */
355
static const struct nvc0_hw_metric_query_cfg
356
sm35_warp_nonpred_execution_efficiency =
357
{
358
.type = NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY,
359
.queries[0] = _SM(INST_EXECUTED),
360
.queries[1] = _SM(NOT_PRED_OFF_INST_EXECUTED),
361
.num_queries = 2,
362
};
363
364
static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
365
{
366
&sm20_achieved_occupancy,
367
&sm30_inst_issued,
368
&sm20_inst_per_wrap,
369
&sm30_inst_replay_overhead,
370
&sm20_ipc,
371
&sm30_issued_ipc,
372
&sm30_issue_slots,
373
&sm30_issue_slot_utilization,
374
&sm30_shared_replay_overhead,
375
&sm30_warp_execution_efficiency,
376
&sm35_warp_nonpred_execution_efficiency,
377
};
378
379
/* ==== Compute capability 5.0 (GM107/GM108) ==== */
380
static const struct nvc0_hw_metric_query_cfg *sm50_hw_metric_queries[] =
381
{
382
&sm20_achieved_occupancy,
383
&sm20_branch_efficiency,
384
&sm30_inst_issued,
385
&sm20_inst_per_wrap,
386
&sm30_inst_replay_overhead,
387
&sm20_ipc,
388
&sm30_issued_ipc,
389
&sm30_issue_slots,
390
&sm30_issue_slot_utilization,
391
&sm30_warp_execution_efficiency,
392
&sm35_warp_nonpred_execution_efficiency,
393
};
394
395
#undef _SM
396
397
static inline const struct nvc0_hw_metric_query_cfg **
398
nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
399
{
400
struct nouveau_device *dev = screen->base.device;
401
402
switch (screen->base.class_3d) {
403
case GM200_3D_CLASS:
404
case GM107_3D_CLASS:
405
return sm50_hw_metric_queries;
406
case NVF0_3D_CLASS:
407
return sm35_hw_metric_queries;
408
case NVE4_3D_CLASS:
409
return sm30_hw_metric_queries;
410
case NVC0_3D_CLASS:
411
case NVC1_3D_CLASS:
412
case NVC8_3D_CLASS:
413
if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
414
return sm20_hw_metric_queries;
415
return sm21_hw_metric_queries;
416
}
417
assert(0);
418
return NULL;
419
}
420
421
unsigned
422
nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen)
423
{
424
struct nouveau_device *dev = screen->base.device;
425
426
switch (screen->base.class_3d) {
427
case GM200_3D_CLASS:
428
case GM107_3D_CLASS:
429
return ARRAY_SIZE(sm50_hw_metric_queries);
430
case NVF0_3D_CLASS:
431
return ARRAY_SIZE(sm35_hw_metric_queries);
432
case NVE4_3D_CLASS:
433
return ARRAY_SIZE(sm30_hw_metric_queries);
434
case NVC0_3D_CLASS:
435
case NVC1_3D_CLASS:
436
case NVC8_3D_CLASS:
437
if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
438
return ARRAY_SIZE(sm20_hw_metric_queries);
439
return ARRAY_SIZE(sm21_hw_metric_queries);
440
}
441
return 0;
442
}
443
444
static const struct nvc0_hw_metric_query_cfg *
445
nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
446
{
447
const struct nvc0_hw_metric_query_cfg **queries;
448
struct nvc0_screen *screen = nvc0->screen;
449
struct nvc0_query *q = &hq->base;
450
unsigned num_queries;
451
unsigned i;
452
453
num_queries = nvc0_hw_metric_get_num_queries(screen);
454
queries = nvc0_hw_metric_get_queries(screen);
455
456
for (i = 0; i < num_queries; i++) {
457
if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type)
458
return queries[i];
459
}
460
assert(0);
461
return NULL;
462
}
463
464
static void
465
nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
466
struct nvc0_hw_query *hq)
467
{
468
struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
469
unsigned i;
470
471
for (i = 0; i < hmq->num_queries; i++)
472
if (hmq->queries[i]->funcs->destroy_query)
473
hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
474
FREE(hmq);
475
}
476
477
static bool
478
nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
479
{
480
struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
481
bool ret = false;
482
unsigned i;
483
484
for (i = 0; i < hmq->num_queries; i++) {
485
ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
486
if (!ret)
487
return ret;
488
}
489
return ret;
490
}
491
492
static void
493
nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
494
{
495
struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
496
unsigned i;
497
498
for (i = 0; i < hmq->num_queries; i++)
499
hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
500
}
501
502
static uint64_t
503
sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
504
{
505
switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
506
case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
507
/* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
508
if (res64[1])
509
return ((res64[0] / (double)res64[1]) / 48) * 100;
510
break;
511
case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
512
/* (branch / (branch + divergent_branch)) * 100 */
513
if (res64[0] + res64[1])
514
return (res64[0] / (double)(res64[0] + res64[1])) * 100;
515
break;
516
case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
517
/* inst_executed / warps_launched */
518
if (res64[1])
519
return res64[0] / (double)res64[1];
520
break;
521
case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
522
/* (inst_issued - inst_executed) / inst_executed */
523
if (res64[1])
524
return (res64[0] - res64[1]) / (double)res64[1];
525
break;
526
case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
527
/* inst_issued / active_cycles */
528
if (res64[1])
529
return res64[0] / (double)res64[1];
530
break;
531
case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
532
/* ((inst_issued / 2) / active_cycles) * 100 */
533
if (res64[1])
534
return ((res64[0] / 2) / (double)res64[1]) * 100;
535
break;
536
case NVC0_HW_METRIC_QUERY_IPC:
537
/* inst_executed / active_cycles */
538
if (res64[1])
539
return res64[0] / (double)res64[1];
540
break;
541
default:
542
debug_printf("invalid metric type: %d\n",
543
hq->base.type - NVC0_HW_METRIC_QUERY(0));
544
break;
545
}
546
return 0;
547
}
548
549
static uint64_t
550
sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
551
{
552
switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
553
case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
554
return sm20_hw_metric_calc_result(hq, res64);
555
case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
556
return sm20_hw_metric_calc_result(hq, res64);
557
case NVC0_HW_METRIC_QUERY_INST_ISSUED:
558
/* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */
559
return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;
560
break;
561
case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
562
return sm20_hw_metric_calc_result(hq, res64);
563
case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
564
/* (metric-inst_issued - inst_executed) / inst_executed */
565
if (res64[4])
566
return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -
567
res64[4]) / (double)res64[4]);
568
break;
569
case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
570
/* metric-inst_issued / active_cycles */
571
if (res64[4])
572
return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /
573
(double)res64[4];
574
break;
575
case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
576
/* issued1_0 + issued1_1 + issued2_0 + issued2_1 */
577
return res64[0] + res64[1] + res64[2] + res64[3];
578
break;
579
case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
580
/* ((metric-issue_slots / 2) / active_cycles) * 100 */
581
if (res64[4])
582
return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /
583
(double)res64[4]) * 100;
584
break;
585
case NVC0_HW_METRIC_QUERY_IPC:
586
return sm20_hw_metric_calc_result(hq, res64);
587
default:
588
debug_printf("invalid metric type: %d\n",
589
hq->base.type - NVC0_HW_METRIC_QUERY(0));
590
break;
591
}
592
return 0;
593
}
594
595
static uint64_t
596
sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
597
{
598
switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
599
case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
600
/* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */
601
if (res64[1])
602
return ((res64[0] / (double)res64[1]) / 64) * 100;
603
break;
604
case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
605
return sm20_hw_metric_calc_result(hq, res64);
606
case NVC0_HW_METRIC_QUERY_INST_ISSUED:
607
/* inst_issued1 + inst_issued2 * 2 */
608
return res64[0] + res64[1] * 2;
609
case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:
610
return sm20_hw_metric_calc_result(hq, res64);
611
case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:
612
/* (metric-inst_issued - inst_executed) / inst_executed */
613
if (res64[2])
614
return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);
615
break;
616
case NVC0_HW_METRIC_QUERY_ISSUED_IPC:
617
/* metric-inst_issued / active_cycles */
618
if (res64[2])
619
return (res64[0] + res64[1] * 2) / (double)res64[2];
620
break;
621
case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:
622
/* inst_issued1 + inst_issued2 */
623
return res64[0] + res64[1];
624
case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:
625
/* ((metric-issue_slots / 2) / active_cycles) * 100 */
626
if (res64[2])
627
return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;
628
break;
629
case NVC0_HW_METRIC_QUERY_IPC:
630
return sm20_hw_metric_calc_result(hq, res64);
631
case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:
632
/* (shared_load_replay + shared_store_replay) / inst_executed */
633
if (res64[2])
634
return (res64[0] + res64[1]) / (double)res64[2];
635
break;
636
case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY:
637
/* thread_inst_executed / (inst_executed * max. number of threads per
638
* wrap) * 100 */
639
if (res64[0])
640
return (res64[1] / ((double)res64[0] * 32)) * 100;
641
break;
642
default:
643
debug_printf("invalid metric type: %d\n",
644
hq->base.type - NVC0_HW_METRIC_QUERY(0));
645
break;
646
}
647
return 0;
648
}
649
650
static uint64_t
651
sm35_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
652
{
653
switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
654
case NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY:
655
/* not_predicated_off_thread_inst_executed / (inst_executed * max. number
656
* of threads per wrap) * 100 */
657
if (res64[0])
658
return (res64[1] / ((double)res64[0] * 32)) * 100;
659
break;
660
default:
661
return sm30_hw_metric_calc_result(hq, res64);
662
}
663
return 0;
664
}
665
666
static bool
667
nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,
668
struct nvc0_hw_query *hq, bool wait,
669
union pipe_query_result *result)
670
{
671
struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
672
struct nvc0_screen *screen = nvc0->screen;
673
struct nouveau_device *dev = screen->base.device;
674
union pipe_query_result results[8] = {};
675
uint64_t res64[8] = {};
676
uint64_t value = 0;
677
bool ret = false;
678
unsigned i;
679
680
for (i = 0; i < hmq->num_queries; i++) {
681
ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i],
682
wait, &results[i]);
683
if (!ret)
684
return ret;
685
res64[i] = *(uint64_t *)&results[i];
686
}
687
688
switch (screen->base.class_3d) {
689
case GM200_3D_CLASS:
690
case GM107_3D_CLASS:
691
case NVF0_3D_CLASS:
692
value = sm35_hw_metric_calc_result(hq, res64);
693
break;
694
case NVE4_3D_CLASS:
695
value = sm30_hw_metric_calc_result(hq, res64);
696
break;
697
default:
698
if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
699
value = sm20_hw_metric_calc_result(hq, res64);
700
else
701
value = sm21_hw_metric_calc_result(hq, res64);
702
break;
703
}
704
705
*(uint64_t *)result = value;
706
return ret;
707
}
708
709
static const struct nvc0_hw_query_funcs hw_metric_query_funcs = {
710
.destroy_query = nvc0_hw_metric_destroy_query,
711
.begin_query = nvc0_hw_metric_begin_query,
712
.end_query = nvc0_hw_metric_end_query,
713
.get_query_result = nvc0_hw_metric_get_query_result,
714
};
715
716
struct nvc0_hw_query *
717
nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)
718
{
719
const struct nvc0_hw_metric_query_cfg *cfg;
720
struct nvc0_hw_metric_query *hmq;
721
struct nvc0_hw_query *hq;
722
unsigned i;
723
724
if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)
725
return NULL;
726
727
hmq = CALLOC_STRUCT(nvc0_hw_metric_query);
728
if (!hmq)
729
return NULL;
730
731
hq = &hmq->base;
732
hq->funcs = &hw_metric_query_funcs;
733
hq->base.type = type;
734
735
cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq);
736
737
for (i = 0; i < cfg->num_queries; i++) {
738
hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]);
739
if (!hmq->queries[i]) {
740
nvc0_hw_metric_destroy_query(nvc0, hq);
741
return NULL;
742
}
743
hmq->num_queries++;
744
}
745
746
return hq;
747
}
748
749
int
750
nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
751
struct pipe_driver_query_info *info)
752
{
753
int count = 0;
754
755
if (screen->base.drm->version >= 0x01000101) {
756
if (screen->compute)
757
count = nvc0_hw_metric_get_num_queries(screen);
758
}
759
760
if (!info)
761
return count;
762
763
if (id < count) {
764
if (screen->compute) {
765
if (screen->base.class_3d <= GM200_3D_CLASS) {
766
const struct nvc0_hw_metric_query_cfg **queries =
767
nvc0_hw_metric_get_queries(screen);
768
const struct nvc0_hw_metric_cfg *cfg =
769
nvc0_hw_metric_get_cfg(queries[id]->type);
770
771
info->name = cfg->name;
772
info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type);
773
info->type = cfg->type;
774
info->group_id = NVC0_HW_METRIC_QUERY_GROUP;
775
return 1;
776
}
777
}
778
}
779
return 0;
780
}
781
782