Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/ds/intel_pps_driver.cc
4547 views
1
/*
2
* Copyright © 2020-2021 Collabora, Ltd.
3
* Author: Antonio Caggiano <[email protected]>
4
* Author: Corentin Noël <[email protected]>
5
*
6
* SPDX-License-Identifier: MIT
7
*/
8
9
#include "intel_pps_driver.h"
10
11
#include <dirent.h>
12
#include <fcntl.h>
13
#include <math.h>
14
#include <poll.h>
15
#include <strings.h>
16
#include <sys/ioctl.h>
17
#include <unistd.h>
18
19
#include <i915_drm.h>
20
#include <intel/perf/intel_perf_query.h>
21
22
#include <pps/pps.h>
23
#include <pps/pps_algorithm.h>
24
25
#include "intel_pps_perf.h"
26
27
namespace pps
28
{
29
uint64_t IntelDriver::get_min_sampling_period_ns()
30
{
31
return 500000;
32
}
33
34
void IntelDriver::enable_counter(uint32_t counter_id)
35
{
36
auto &counter = counters[counter_id];
37
auto &group = groups[counter.group];
38
if (perf->query) {
39
if (perf->query->symbol_name != group.name) {
40
PPS_LOG_ERROR(
41
"Unable to enable metrics from different sets: %u "
42
"belongs to %s but %s is currently in use.",
43
counter_id,
44
perf->query->symbol_name,
45
group.name.c_str());
46
return;
47
}
48
}
49
50
enabled_counters.emplace_back(counter);
51
if (!perf->query) {
52
perf->query = perf->find_query_by_name(group.name);
53
}
54
}
55
56
void IntelDriver::enable_all_counters()
57
{
58
// We can only enable one metric set at a time so at least enable one.
59
for (auto &group : groups) {
60
if (group.name == "RenderBasic") {
61
for (uint32_t counter_id : group.counters) {
62
auto &counter = counters[counter_id];
63
enabled_counters.emplace_back(counter);
64
}
65
66
perf->query = perf->find_query_by_name(group.name);
67
break;
68
}
69
}
70
}
71
72
static uint64_t timespec_diff(timespec *begin, timespec *end)
73
{
74
return 1000000000ull * (end->tv_sec - begin->tv_sec) + end->tv_nsec - begin->tv_nsec;
75
}
76
77
/// @brief This function tries to correlate CPU time with GPU time
78
std::optional<TimestampCorrelation> IntelDriver::query_correlation_timestamps() const
79
{
80
TimestampCorrelation corr = {};
81
82
clock_t correlation_clock_id = CLOCK_BOOTTIME;
83
84
drm_i915_reg_read reg_read = {};
85
const uint64_t render_ring_timestamp = 0x2358;
86
reg_read.offset = render_ring_timestamp | I915_REG_READ_8B_WA;
87
88
constexpr size_t attempt_count = 3;
89
struct {
90
timespec cpu_ts_begin;
91
timespec cpu_ts_end;
92
uint64_t gpu_ts;
93
} attempts[attempt_count] = {};
94
95
uint32_t best = 0;
96
97
// Gather 3 correlations
98
for (uint32_t i = 0; i < attempt_count; i++) {
99
clock_gettime(correlation_clock_id, &attempts[i].cpu_ts_begin);
100
if (perf_ioctl(drm_device.fd, DRM_IOCTL_I915_REG_READ, &reg_read) < 0) {
101
return std::nullopt;
102
}
103
clock_gettime(correlation_clock_id, &attempts[i].cpu_ts_end);
104
105
attempts[i].gpu_ts = reg_read.val;
106
}
107
108
// Now select the best
109
for (uint32_t i = 1; i < attempt_count; i++) {
110
if (timespec_diff(&attempts[i].cpu_ts_begin, &attempts[i].cpu_ts_end) <
111
timespec_diff(&attempts[best].cpu_ts_begin, &attempts[best].cpu_ts_end)) {
112
best = i;
113
}
114
}
115
116
corr.cpu_timestamp =
117
(attempts[best].cpu_ts_begin.tv_sec * 1000000000ull + attempts[best].cpu_ts_begin.tv_nsec) +
118
timespec_diff(&attempts[best].cpu_ts_begin, &attempts[best].cpu_ts_end) / 2;
119
corr.gpu_timestamp = attempts[best].gpu_ts;
120
121
return corr;
122
}
123
124
void IntelDriver::get_new_correlation()
125
{
126
// Rotate left correlations by one position so to make space at the end
127
std::rotate(correlations.begin(), correlations.begin() + 1, correlations.end());
128
129
// Then we overwrite the last correlation with a new one
130
if (auto corr = query_correlation_timestamps()) {
131
correlations.back() = *corr;
132
} else {
133
PPS_LOG_FATAL("Failed to get correlation timestamps");
134
}
135
}
136
137
bool IntelDriver::init_perfcnt()
138
{
139
assert(!perf && "Intel perf should not be initialized at this point");
140
141
perf = std::make_unique<IntelPerf>(drm_device.fd);
142
143
for (auto &query : perf->get_queries()) {
144
// Create group
145
CounterGroup group = {};
146
group.id = groups.size();
147
group.name = query->symbol_name;
148
149
for (int i = 0; i < query->n_counters; ++i) {
150
intel_perf_query_counter &counter = query->counters[i];
151
152
// Create counter
153
Counter counter_desc = {};
154
counter_desc.id = counters.size();
155
counter_desc.name = counter.symbol_name;
156
counter_desc.group = group.id;
157
counter_desc.getter = [counter, query, this](
158
const Counter &c, const Driver &dri) -> Counter::Value {
159
switch (counter.data_type) {
160
case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:
161
case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:
162
case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:
163
return (int64_t)counter.oa_counter_read_uint64(perf->cfg, query, &result);
164
break;
165
case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:
166
case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:
167
return counter.oa_counter_read_float(perf->cfg, query, &result);
168
break;
169
}
170
171
return {};
172
};
173
174
// Add counter id to the group
175
group.counters.emplace_back(counter_desc.id);
176
177
// Store counter
178
counters.emplace_back(std::move(counter_desc));
179
}
180
181
// Store group
182
groups.emplace_back(std::move(group));
183
}
184
185
assert(groups.size() && "Failed to query groups");
186
assert(counters.size() && "Failed to query counters");
187
188
// Clear accumulations
189
intel_perf_query_result_clear(&result);
190
191
return true;
192
}
193
194
void IntelDriver::enable_perfcnt(uint64_t sampling_period_ns)
195
{
196
this->sampling_period_ns = sampling_period_ns;
197
198
// Fill correlations with an initial one
199
if (auto corr = query_correlation_timestamps()) {
200
correlations.fill(*corr);
201
} else {
202
PPS_LOG_FATAL("Failed to get correlation timestamps");
203
}
204
205
if (!perf->open(sampling_period_ns)) {
206
PPS_LOG_FATAL("Failed to open intel perf");
207
}
208
}
209
210
/// @brief Transforms the GPU timestop into a CPU timestamp equivalent
211
uint64_t IntelDriver::correlate_gpu_timestamp(const uint32_t gpu_ts)
212
{
213
auto &corr_a = correlations[0];
214
auto &corr_b = correlations[correlations.size() - 1];
215
216
// A correlation timestamp has 36 bits, so get the first 32 to make it work with gpu_ts
217
uint64_t mask = 0xffffffff;
218
uint32_t corr_a_gpu_ts = corr_a.gpu_timestamp & mask;
219
uint32_t corr_b_gpu_ts = corr_b.gpu_timestamp & mask;
220
221
// Make sure it is within the interval [a,b)
222
assert(gpu_ts >= corr_a_gpu_ts && "GPU TS < Corr a");
223
assert(gpu_ts < corr_b_gpu_ts && "GPU TS >= Corr b");
224
225
uint32_t gpu_delta = gpu_ts - corr_a_gpu_ts;
226
// Factor to convert gpu time to cpu time
227
double gpu_to_cpu = (corr_b.cpu_timestamp - corr_a.cpu_timestamp) /
228
double(corr_b.gpu_timestamp - corr_a.gpu_timestamp);
229
uint64_t cpu_delta = gpu_delta * gpu_to_cpu;
230
return corr_a.cpu_timestamp + cpu_delta;
231
}
232
233
void IntelDriver::disable_perfcnt()
234
{
235
perf = nullptr;
236
groups.clear();
237
counters.clear();
238
enabled_counters.clear();
239
}
240
241
struct Report {
242
uint32_t version;
243
uint32_t timestamp;
244
uint32_t id;
245
};
246
247
/// @brief Some perf record durations can be really short
248
/// @return True if the duration is at least close to the sampling period
249
static bool close_enough(uint64_t duration, uint64_t sampling_period)
250
{
251
return duration > sampling_period - 100000;
252
}
253
254
/// @brief Transforms the raw data received in from the driver into records
255
std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_t> &data,
256
const size_t byte_count)
257
{
258
std::vector<PerfRecord> records;
259
records.reserve(128);
260
261
PerfRecord record;
262
record.reserve(512);
263
264
const uint8_t *iter = data.data();
265
const uint8_t *end = iter + byte_count;
266
267
uint64_t prev_cpu_timestamp = last_cpu_timestamp;
268
269
while (iter < end) {
270
// Iterate a record at a time
271
auto header = reinterpret_cast<const drm_i915_perf_record_header *>(iter);
272
273
if (header->type == DRM_I915_PERF_RECORD_SAMPLE) {
274
// Report is next to the header
275
auto report = reinterpret_cast<const Report *>(header + 1);
276
auto cpu_timestamp = correlate_gpu_timestamp(report->timestamp);
277
auto duration = cpu_timestamp - prev_cpu_timestamp;
278
279
// Skip perf-records that are too short by checking
280
// the distance between last report and this one
281
if (close_enough(duration, sampling_period_ns)) {
282
prev_cpu_timestamp = cpu_timestamp;
283
284
// Add the new record to the list
285
record.resize(header->size); // Possibly 264?
286
memcpy(record.data(), iter, header->size);
287
records.emplace_back(record);
288
}
289
}
290
291
// Go to the next record
292
iter += header->size;
293
}
294
295
return records;
296
}
297
298
/// @brief Read all the available data from the metric set currently in use
299
void IntelDriver::read_data_from_metric_set()
300
{
301
assert(metric_buffer.size() >= 1024 && "Metric buffer should have space for reading");
302
303
ssize_t bytes_read = 0;
304
while ((bytes_read = perf->read_oa_stream(metric_buffer.data() + total_bytes_read,
305
metric_buffer.size() - total_bytes_read)) > 0 ||
306
errno == EINTR) {
307
total_bytes_read += std::max(ssize_t(0), bytes_read);
308
309
// Increase size of the buffer for the next read
310
if (metric_buffer.size() / 2 < total_bytes_read) {
311
metric_buffer.resize(metric_buffer.size() * 2);
312
}
313
}
314
315
assert(total_bytes_read < metric_buffer.size() && "Buffer not big enough");
316
}
317
318
bool IntelDriver::dump_perfcnt()
319
{
320
if (!perf->oa_stream_ready()) {
321
return false;
322
}
323
324
read_data_from_metric_set();
325
326
get_new_correlation();
327
328
auto new_records = parse_perf_records(metric_buffer, total_bytes_read);
329
if (new_records.empty()) {
330
PPS_LOG("No new records");
331
// No new records from the GPU yet
332
return false;
333
} else {
334
PPS_LOG("Records parsed bytes: %lu", total_bytes_read);
335
// Records are parsed correctly, so we can reset the
336
// number of bytes read so far from the metric set
337
total_bytes_read = 0;
338
}
339
340
APPEND(records, new_records);
341
342
if (records.size() < 2) {
343
// Not enough records to accumulate
344
return false;
345
}
346
347
return true;
348
}
349
350
uint32_t IntelDriver::gpu_next()
351
{
352
if (records.size() < 2) {
353
// Not enough records to accumulate
354
return 0;
355
}
356
357
// Get first and second
358
auto record_a = reinterpret_cast<const drm_i915_perf_record_header *>(records[0].data());
359
auto record_b = reinterpret_cast<const drm_i915_perf_record_header *>(records[1].data());
360
361
intel_perf_query_result_accumulate_fields(&result,
362
&perf->query.value(),
363
&perf->devinfo,
364
record_a + 1,
365
record_b + 1,
366
false /* no_oa_accumulate */);
367
368
// Get last timestamp
369
auto report_b = reinterpret_cast<const Report *>(record_b + 1);
370
auto gpu_timestamp = report_b->timestamp;
371
372
// Consume first record
373
records.erase(std::begin(records), std::begin(records) + 1);
374
375
return gpu_timestamp;
376
}
377
378
uint64_t IntelDriver::cpu_next()
379
{
380
if (auto gpu_timestamp = gpu_next()) {
381
auto cpu_timestamp = correlate_gpu_timestamp(gpu_timestamp);
382
383
last_cpu_timestamp = cpu_timestamp;
384
return cpu_timestamp;
385
}
386
387
return 0;
388
}
389
390
uint64_t IntelDriver::next()
391
{
392
// Reset accumulation
393
intel_perf_query_result_clear(&result);
394
return cpu_next();
395
}
396
397
} // namespace pps
398
399