Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/perf/intel_perf_mdapi.c
4547 views
1
/*
2
* Copyright © 2018 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "intel_perf.h"
25
#include "intel_perf_mdapi.h"
26
#include "intel_perf_private.h"
27
#include "intel_perf_regs.h"
28
29
#include "dev/intel_device_info.h"
30
31
#include <drm-uapi/i915_drm.h>
32
33
34
int
35
intel_perf_query_result_write_mdapi(void *data, uint32_t data_size,
36
const struct intel_device_info *devinfo,
37
const struct intel_perf_query_info *query,
38
const struct intel_perf_query_result *result)
39
{
40
switch (devinfo->ver) {
41
case 7: {
42
struct gfx7_mdapi_metrics *mdapi_data = (struct gfx7_mdapi_metrics *) data;
43
44
if (data_size < sizeof(*mdapi_data))
45
return 0;
46
47
assert(devinfo->is_haswell);
48
49
for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)
50
mdapi_data->ACounters[i] = result->accumulator[1 + i];
51
52
for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {
53
mdapi_data->NOACounters[i] =
54
result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];
55
}
56
57
mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
58
mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
59
60
mdapi_data->ReportsCount = result->reports_accumulated;
61
mdapi_data->TotalTime =
62
intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
63
mdapi_data->CoreFrequency = result->gt_frequency[1];
64
mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
65
mdapi_data->SplitOccured = result->query_disjoint;
66
return sizeof(*mdapi_data);
67
}
68
case 8: {
69
struct gfx8_mdapi_metrics *mdapi_data = (struct gfx8_mdapi_metrics *) data;
70
71
if (data_size < sizeof(*mdapi_data))
72
return 0;
73
74
for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
75
mdapi_data->OaCntr[i] = result->accumulator[2 + i];
76
for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
77
mdapi_data->NoaCntr[i] =
78
result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
79
}
80
81
mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
82
mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
83
84
mdapi_data->ReportId = result->hw_id;
85
mdapi_data->ReportsCount = result->reports_accumulated;
86
mdapi_data->TotalTime =
87
intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
88
mdapi_data->BeginTimestamp =
89
intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
90
mdapi_data->GPUTicks = result->accumulator[1];
91
mdapi_data->CoreFrequency = result->gt_frequency[1];
92
mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
93
mdapi_data->SliceFrequency =
94
(result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
95
mdapi_data->UnsliceFrequency =
96
(result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
97
mdapi_data->SplitOccured = result->query_disjoint;
98
return sizeof(*mdapi_data);
99
}
100
case 9:
101
case 11:
102
case 12:{
103
struct gfx9_mdapi_metrics *mdapi_data = (struct gfx9_mdapi_metrics *) data;
104
105
if (data_size < sizeof(*mdapi_data))
106
return 0;
107
108
for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)
109
mdapi_data->OaCntr[i] = result->accumulator[2 + i];
110
for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {
111
mdapi_data->NoaCntr[i] =
112
result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];
113
}
114
115
mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];
116
mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];
117
118
mdapi_data->ReportId = result->hw_id;
119
mdapi_data->ReportsCount = result->reports_accumulated;
120
mdapi_data->TotalTime =
121
intel_device_info_timebase_scale(devinfo, result->accumulator[0]);
122
mdapi_data->BeginTimestamp =
123
intel_device_info_timebase_scale(devinfo, result->begin_timestamp);
124
mdapi_data->GPUTicks = result->accumulator[1];
125
mdapi_data->CoreFrequency = result->gt_frequency[1];
126
mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];
127
mdapi_data->SliceFrequency =
128
(result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;
129
mdapi_data->UnsliceFrequency =
130
(result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;
131
mdapi_data->SplitOccured = result->query_disjoint;
132
return sizeof(*mdapi_data);
133
}
134
default:
135
unreachable("unexpected gen");
136
}
137
}
138
139
void
140
intel_perf_register_mdapi_statistic_query(struct intel_perf_config *perf_cfg,
141
const struct intel_device_info *devinfo)
142
{
143
if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
144
return;
145
146
struct intel_perf_query_info *query =
147
intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);
148
149
query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE;
150
query->name = "Intel_Raw_Pipeline_Statistics_Query";
151
152
/* The order has to match mdapi_pipeline_metrics. */
153
intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,
154
"N vertices submitted");
155
intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
156
"N primitives submitted");
157
intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
158
"N vertex shader invocations");
159
intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
160
"N geometry shader invocations");
161
intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
162
"N geometry shader primitives emitted");
163
intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
164
"N primitives entering clipping");
165
intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
166
"N primitives leaving clipping");
167
if (devinfo->is_haswell || devinfo->ver == 8) {
168
intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
169
"N fragment shader invocations",
170
"N fragment shader invocations");
171
} else {
172
intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
173
"N fragment shader invocations");
174
}
175
intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
176
"N TCS shader invocations");
177
intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
178
"N TES shader invocations");
179
if (devinfo->ver >= 7) {
180
intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
181
"N compute shader invocations");
182
}
183
184
if (devinfo->ver >= 10) {
185
/* Reuse existing CS invocation register until we can expose this new
186
* one.
187
*/
188
intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
189
"Reserved1");
190
}
191
192
query->data_size = sizeof(uint64_t) * query->n_counters;
193
}
194
195
static void
196
fill_mdapi_perf_query_counter(struct intel_perf_query_info *query,
197
const char *name,
198
uint32_t data_offset,
199
uint32_t data_size,
200
enum intel_perf_counter_data_type data_type)
201
{
202
struct intel_perf_query_counter *counter = &query->counters[query->n_counters];
203
204
assert(query->n_counters <= query->max_counters);
205
206
counter->name = name;
207
counter->desc = "Raw counter value";
208
counter->type = INTEL_PERF_COUNTER_TYPE_RAW;
209
counter->data_type = data_type;
210
counter->offset = data_offset;
211
212
query->n_counters++;
213
214
assert(counter->offset + intel_perf_query_counter_get_size(counter) <= query->data_size);
215
}
216
217
#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \
218
fill_mdapi_perf_query_counter(query, #field_name, \
219
(uint8_t *) &struct_name.field_name - \
220
(uint8_t *) &struct_name, \
221
sizeof(struct_name.field_name), \
222
INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
223
#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \
224
fill_mdapi_perf_query_counter(query, \
225
ralloc_asprintf(ctx, "%s%i", #field_name, idx), \
226
(uint8_t *) &struct_name.field_name[idx] - \
227
(uint8_t *) &struct_name, \
228
sizeof(struct_name.field_name[0]), \
229
INTEL_PERF_COUNTER_DATA_TYPE_##type_name)
230
231
void
232
intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,
233
const struct intel_device_info *devinfo)
234
{
235
struct intel_perf_query_info *query = NULL;
236
237
/* MDAPI requires different structures for pretty much every generation
238
* (right now we have definitions for gen 7 to 12).
239
*/
240
if (!(devinfo->ver >= 7 && devinfo->ver <= 12))
241
return;
242
243
switch (devinfo->ver) {
244
case 7: {
245
query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7);
246
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
247
248
struct gfx7_mdapi_metrics metric_data;
249
query->data_size = sizeof(metric_data);
250
251
MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
252
for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {
253
MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
254
metric_data, ACounters, i, UINT64);
255
}
256
for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {
257
MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
258
metric_data, NOACounters, i, UINT64);
259
}
260
MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
261
MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
262
MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
263
MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
264
MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
265
MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
266
MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
267
break;
268
}
269
case 8: {
270
query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16);
271
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
272
273
struct gfx8_mdapi_metrics metric_data;
274
query->data_size = sizeof(metric_data);
275
276
MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
277
MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
278
for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
279
MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
280
metric_data, OaCntr, i, UINT64);
281
}
282
for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
283
MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
284
metric_data, NoaCntr, i, UINT64);
285
}
286
MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
287
MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
288
MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
289
MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
290
MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
291
MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
292
MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
293
MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
294
MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
295
MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
296
MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
297
MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
298
MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
299
MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
300
MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
301
MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
302
break;
303
}
304
case 9:
305
case 11:
306
case 12: {
307
query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
308
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
309
310
struct gfx9_mdapi_metrics metric_data;
311
query->data_size = sizeof(metric_data);
312
313
MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);
314
MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);
315
for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {
316
MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
317
metric_data, OaCntr, i, UINT64);
318
}
319
for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {
320
MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
321
metric_data, NoaCntr, i, UINT64);
322
}
323
MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);
324
MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);
325
MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);
326
MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);
327
MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);
328
MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);
329
MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);
330
MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);
331
MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);
332
MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);
333
MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);
334
MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);
335
MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);
336
MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);
337
MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);
338
MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);
339
for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {
340
MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,
341
metric_data, UserCntr, i, UINT64);
342
}
343
MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);
344
MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);
345
break;
346
}
347
default:
348
unreachable("Unsupported gen");
349
break;
350
}
351
352
query->kind = INTEL_PERF_QUERY_TYPE_RAW;
353
query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";
354
query->guid = INTEL_PERF_QUERY_GUID_MDAPI;
355
356
{
357
/* Accumulation buffer offsets copied from an actual query... */
358
const struct intel_perf_query_info *copy_query =
359
&perf->queries[0];
360
361
query->gpu_time_offset = copy_query->gpu_time_offset;
362
query->gpu_clock_offset = copy_query->gpu_clock_offset;
363
query->a_offset = copy_query->a_offset;
364
query->b_offset = copy_query->b_offset;
365
query->c_offset = copy_query->c_offset;
366
query->perfcnt_offset = copy_query->perfcnt_offset;
367
}
368
}
369
370