Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/vulkan/anv_perf.c
4547 views
1
/*
2
* Copyright © 2018 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
22
*/
23
24
#include <assert.h>
25
#include <stdbool.h>
26
#include <stdint.h>
27
28
#include "anv_private.h"
29
#include "vk_util.h"
30
31
#include "perf/intel_perf.h"
32
#include "perf/intel_perf_mdapi.h"
33
34
#include "util/mesa-sha1.h"
35
36
void
37
anv_physical_device_init_perf(struct anv_physical_device *device, int fd)
38
{
39
const struct intel_device_info *devinfo = &device->info;
40
41
device->perf = NULL;
42
43
/* We need self modifying batches. The i915 parser prevents it on
44
* Gfx7.5 :( maybe one day.
45
*/
46
if (devinfo->ver < 8)
47
return;
48
49
struct intel_perf_config *perf = intel_perf_new(NULL);
50
51
intel_perf_init_metrics(perf, &device->info, fd,
52
false /* pipeline statistics */,
53
true /* register snapshots */);
54
55
if (!perf->n_queries) {
56
if (perf->platform_supported) {
57
static bool warned_once = false;
58
59
if (!warned_once) {
60
mesa_logw("Performance support disabled, "
61
"consider sysctl dev.i915.perf_stream_paranoid=0\n");
62
warned_once = true;
63
}
64
}
65
goto err;
66
}
67
68
/* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in
69
* perf revision 2.
70
*/
71
if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
72
if (!intel_perf_has_hold_preemption(perf))
73
goto err;
74
}
75
76
device->perf = perf;
77
78
/* Compute the number of commands we need to implement a performance
79
* query.
80
*/
81
const struct intel_perf_query_field_layout *layout = &perf->query_layout;
82
device->n_perf_query_commands = 0;
83
for (uint32_t f = 0; f < layout->n_fields; f++) {
84
struct intel_perf_query_field *field = &layout->fields[f];
85
86
switch (field->type) {
87
case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
88
device->n_perf_query_commands++;
89
break;
90
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
91
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
92
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
93
case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
94
device->n_perf_query_commands += field->size / 4;
95
break;
96
}
97
}
98
device->n_perf_query_commands *= 2; /* Begin & End */
99
device->n_perf_query_commands += 1; /* availability */
100
101
return;
102
103
err:
104
ralloc_free(perf);
105
}
106
107
void
108
anv_device_perf_init(struct anv_device *device)
109
{
110
device->perf_fd = -1;
111
}
112
113
static int
114
anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
115
{
116
uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];
117
struct drm_i915_perf_open_param param;
118
int p = 0, stream_fd;
119
120
properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;
121
properties[p++] = true;
122
123
properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;
124
properties[p++] = metric_id;
125
126
properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;
127
properties[p++] = device->info.ver >= 8 ?
128
I915_OA_FORMAT_A32u40_A4u32_B8_C8 :
129
I915_OA_FORMAT_A45_B8_C8;
130
131
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
132
properties[p++] = 31; /* slowest sampling period */
133
134
properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;
135
properties[p++] = device->context_id;
136
137
properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;
138
properties[p++] = true;
139
140
/* If global SSEU is available, pin it to the default. This will ensure on
141
* Gfx11 for instance we use the full EU array. Initially when perf was
142
* enabled we would use only half on Gfx11 because of functional
143
* requirements.
144
*/
145
if (intel_perf_has_global_sseu(device->physical->perf)) {
146
properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;
147
properties[p++] = (uintptr_t) &device->physical->perf->sseu;
148
}
149
150
memset(&param, 0, sizeof(param));
151
param.flags = 0;
152
param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;
153
param.properties_ptr = (uintptr_t)properties;
154
param.num_properties = p / 2;
155
156
stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, &param);
157
return stream_fd;
158
}
159
160
/* VK_INTEL_performance_query */
161
VkResult anv_InitializePerformanceApiINTEL(
162
VkDevice _device,
163
const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)
164
{
165
ANV_FROM_HANDLE(anv_device, device, _device);
166
167
if (!device->physical->perf)
168
return VK_ERROR_EXTENSION_NOT_PRESENT;
169
170
/* Not much to do here */
171
return VK_SUCCESS;
172
}
173
174
VkResult anv_GetPerformanceParameterINTEL(
175
VkDevice _device,
176
VkPerformanceParameterTypeINTEL parameter,
177
VkPerformanceValueINTEL* pValue)
178
{
179
ANV_FROM_HANDLE(anv_device, device, _device);
180
181
if (!device->physical->perf)
182
return VK_ERROR_EXTENSION_NOT_PRESENT;
183
184
VkResult result = VK_SUCCESS;
185
switch (parameter) {
186
case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:
187
pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;
188
pValue->data.valueBool = VK_TRUE;
189
break;
190
191
case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:
192
pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;
193
pValue->data.value32 = 25;
194
break;
195
196
default:
197
result = VK_ERROR_FEATURE_NOT_PRESENT;
198
break;
199
}
200
201
return result;
202
}
203
204
VkResult anv_CmdSetPerformanceMarkerINTEL(
205
VkCommandBuffer commandBuffer,
206
const VkPerformanceMarkerInfoINTEL* pMarkerInfo)
207
{
208
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
209
210
cmd_buffer->intel_perf_marker = pMarkerInfo->marker;
211
212
return VK_SUCCESS;
213
}
214
215
VkResult anv_AcquirePerformanceConfigurationINTEL(
216
VkDevice _device,
217
const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,
218
VkPerformanceConfigurationINTEL* pConfiguration)
219
{
220
ANV_FROM_HANDLE(anv_device, device, _device);
221
struct anv_performance_configuration_intel *config;
222
223
config = vk_object_alloc(&device->vk, NULL, sizeof(*config),
224
VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);
225
if (!config)
226
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
227
228
if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
229
config->register_config =
230
intel_perf_load_configuration(device->physical->perf, device->fd,
231
INTEL_PERF_QUERY_GUID_MDAPI);
232
if (!config->register_config) {
233
vk_object_free(&device->vk, NULL, config);
234
return VK_INCOMPLETE;
235
}
236
237
int ret =
238
intel_perf_store_configuration(device->physical->perf, device->fd,
239
config->register_config, NULL /* guid */);
240
if (ret < 0) {
241
ralloc_free(config->register_config);
242
vk_object_free(&device->vk, NULL, config);
243
return VK_INCOMPLETE;
244
}
245
246
config->config_id = ret;
247
}
248
249
*pConfiguration = anv_performance_configuration_intel_to_handle(config);
250
251
return VK_SUCCESS;
252
}
253
254
VkResult anv_ReleasePerformanceConfigurationINTEL(
255
VkDevice _device,
256
VkPerformanceConfigurationINTEL _configuration)
257
{
258
ANV_FROM_HANDLE(anv_device, device, _device);
259
ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
260
261
if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG))
262
intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);
263
264
ralloc_free(config->register_config);
265
266
vk_object_free(&device->vk, NULL, config);
267
268
return VK_SUCCESS;
269
}
270
271
VkResult anv_QueueSetPerformanceConfigurationINTEL(
272
VkQueue _queue,
273
VkPerformanceConfigurationINTEL _configuration)
274
{
275
ANV_FROM_HANDLE(anv_queue, queue, _queue);
276
ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);
277
struct anv_device *device = queue->device;
278
279
if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
280
if (device->perf_fd < 0) {
281
device->perf_fd = anv_device_perf_open(device, config->config_id);
282
if (device->perf_fd < 0)
283
return VK_ERROR_INITIALIZATION_FAILED;
284
} else {
285
int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,
286
(void *)(uintptr_t) config->config_id);
287
if (ret < 0)
288
return anv_device_set_lost(device, "i915-perf config failed: %m");
289
}
290
}
291
292
return VK_SUCCESS;
293
}
294
295
void anv_UninitializePerformanceApiINTEL(
296
VkDevice _device)
297
{
298
ANV_FROM_HANDLE(anv_device, device, _device);
299
300
if (device->perf_fd >= 0) {
301
close(device->perf_fd);
302
device->perf_fd = -1;
303
}
304
}
305
306
/* VK_KHR_performance_query */
307
static const VkPerformanceCounterUnitKHR
308
intel_perf_counter_unit_to_vk_unit[] = {
309
[INTEL_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,
310
[INTEL_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,
311
[INTEL_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,
312
[INTEL_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */
313
[INTEL_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
314
[INTEL_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
315
[INTEL_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
316
[INTEL_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,
317
[INTEL_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
318
[INTEL_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
319
[INTEL_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
320
[INTEL_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
321
[INTEL_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
322
[INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
323
[INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
324
[INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
325
[INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,
326
};
327
328
static const VkPerformanceCounterStorageKHR
329
intel_perf_counter_data_type_to_vk_storage[] = {
330
[INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
331
[INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,
332
[INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,
333
[INTEL_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,
334
[INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,
335
};
336
337
VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
338
VkPhysicalDevice physicalDevice,
339
uint32_t queueFamilyIndex,
340
uint32_t* pCounterCount,
341
VkPerformanceCounterKHR* pCounters,
342
VkPerformanceCounterDescriptionKHR* pCounterDescriptions)
343
{
344
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
345
struct intel_perf_config *perf = pdevice->perf;
346
347
uint32_t desc_count = *pCounterCount;
348
349
VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);
350
VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);
351
352
for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {
353
const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;
354
355
vk_outarray_append(&out, counter) {
356
counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];
357
counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;
358
counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];
359
360
unsigned char sha1_result[20];
361
_mesa_sha1_compute(intel_counter->symbol_name,
362
strlen(intel_counter->symbol_name),
363
sha1_result);
364
memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));
365
}
366
367
vk_outarray_append(&out_desc, desc) {
368
desc->flags = 0; /* None so far. */
369
snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);
370
snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);
371
snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);
372
}
373
}
374
375
return vk_outarray_status(&out);
376
}
377
378
void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
379
VkPhysicalDevice physicalDevice,
380
const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,
381
uint32_t* pNumPasses)
382
{
383
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
384
struct intel_perf_config *perf = pdevice->perf;
385
386
if (!perf) {
387
*pNumPasses = 0;
388
return;
389
}
390
391
*pNumPasses = intel_perf_get_n_passes(perf,
392
pPerformanceQueryCreateInfo->pCounterIndices,
393
pPerformanceQueryCreateInfo->counterIndexCount,
394
NULL);
395
}
396
397
VkResult anv_AcquireProfilingLockKHR(
398
VkDevice _device,
399
const VkAcquireProfilingLockInfoKHR* pInfo)
400
{
401
ANV_FROM_HANDLE(anv_device, device, _device);
402
struct intel_perf_config *perf = device->physical->perf;
403
struct intel_perf_query_info *first_metric_set = &perf->queries[0];
404
int fd = -1;
405
406
assert(device->perf_fd == -1);
407
408
if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
409
fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);
410
if (fd < 0)
411
return VK_TIMEOUT;
412
}
413
414
device->perf_fd = fd;
415
return VK_SUCCESS;
416
}
417
418
void anv_ReleaseProfilingLockKHR(
419
VkDevice _device)
420
{
421
ANV_FROM_HANDLE(anv_device, device, _device);
422
423
if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {
424
assert(device->perf_fd >= 0);
425
close(device->perf_fd);
426
}
427
device->perf_fd = -1;
428
}
429
430
void
431
anv_perf_write_pass_results(struct intel_perf_config *perf,
432
struct anv_query_pool *pool, uint32_t pass,
433
const struct intel_perf_query_result *accumulated_results,
434
union VkPerformanceCounterResultKHR *results)
435
{
436
for (uint32_t c = 0; c < pool->n_counters; c++) {
437
const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];
438
439
if (counter_pass->pass != pass)
440
continue;
441
442
switch (pool->pass_query[pass]->kind) {
443
case INTEL_PERF_QUERY_TYPE_PIPELINE: {
444
assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
445
uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);
446
results[c].uint64 = accumulated_results->accumulator[accu_offset];
447
break;
448
}
449
450
case INTEL_PERF_QUERY_TYPE_OA:
451
case INTEL_PERF_QUERY_TYPE_RAW:
452
switch (counter_pass->counter->data_type) {
453
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
454
results[c].uint64 =
455
counter_pass->counter->oa_counter_read_uint64(perf,
456
counter_pass->query,
457
accumulated_results);
458
break;
459
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
460
results[c].float32 =
461
counter_pass->counter->oa_counter_read_float(perf,
462
counter_pass->query,
463
accumulated_results);
464
break;
465
default:
466
/* So far we aren't using uint32, double or bool32... */
467
unreachable("unexpected counter data type");
468
}
469
break;
470
471
default:
472
unreachable("invalid query type");
473
}
474
475
/* The Vulkan extension only has nanoseconds as a unit */
476
if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {
477
assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);
478
results[c].uint64 *= 1000;
479
}
480
}
481
}
482
483