Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/tools/aub_write.c
4547 views
1
/*
2
* Copyright © 2015 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "aub_write.h"
25
26
#include <inttypes.h>
27
#include <signal.h>
28
#include <stdarg.h>
29
#include <stdlib.h>
30
#include <string.h>
31
32
#include "drm-uapi/i915_drm.h"
33
#include "intel_aub.h"
34
#include "intel_context.h"
35
36
#ifndef ALIGN
37
#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
38
#endif
39
40
#define MI_BATCH_NON_SECURE_I965 (1 << 8)
41
42
#define min(a, b) ({ \
43
__typeof(a) _a = (a); \
44
__typeof(b) _b = (b); \
45
_a < _b ? _a : _b; \
46
})
47
48
#define max(a, b) ({ \
49
__typeof(a) _a = (a); \
50
__typeof(b) _b = (b); \
51
_a > _b ? _a : _b; \
52
})
53
54
static struct aub_context *aub_context_new(struct aub_file *aub, uint32_t new_id);
55
static void mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
56
uint32_t len, uint32_t addr_space,
57
const char *desc);
58
59
#define fail_if(cond, ...) _fail_if(cond, NULL, __VA_ARGS__)
60
61
static inline uint32_t
62
align_u32(uint32_t v, uint32_t a)
63
{
64
return (v + a - 1) & ~(a - 1);
65
}
66
67
static void
68
aub_ppgtt_table_finish(struct aub_ppgtt_table *table, int level)
69
{
70
if (level == 1)
71
return;
72
73
for (unsigned i = 0; i < ARRAY_SIZE(table->subtables); i++) {
74
if (table->subtables[i]) {
75
aub_ppgtt_table_finish(table->subtables[i], level - 1);
76
free(table->subtables[i]);
77
}
78
}
79
}
80
81
static void
82
data_out(struct aub_file *aub, const void *data, size_t size)
83
{
84
if (size == 0)
85
return;
86
87
fail_if(fwrite(data, 1, size, aub->file) == 0,
88
"Writing to output failed\n");
89
}
90
91
static void
92
dword_out(struct aub_file *aub, uint32_t data)
93
{
94
data_out(aub, &data, sizeof(data));
95
}
96
97
static void
98
write_execlists_header(struct aub_file *aub, const char *name)
99
{
100
char app_name[8 * 4];
101
int app_name_len, dwords;
102
103
app_name_len =
104
snprintf(app_name, sizeof(app_name), "PCI-ID=0x%X %s",
105
aub->pci_id, name);
106
app_name_len = ALIGN(app_name_len, sizeof(uint32_t));
107
108
dwords = 5 + app_name_len / sizeof(uint32_t);
109
dword_out(aub, CMD_MEM_TRACE_VERSION | (dwords - 1));
110
dword_out(aub, AUB_MEM_TRACE_VERSION_FILE_VERSION);
111
dword_out(aub, aub->devinfo.simulator_id << AUB_MEM_TRACE_VERSION_DEVICE_SHIFT);
112
dword_out(aub, 0); /* version */
113
dword_out(aub, 0); /* version */
114
data_out(aub, app_name, app_name_len);
115
}
116
117
static void
118
write_legacy_header(struct aub_file *aub, const char *name)
119
{
120
char app_name[8 * 4];
121
char comment[16];
122
int comment_len, comment_dwords, dwords;
123
124
comment_len = snprintf(comment, sizeof(comment), "PCI-ID=0x%x", aub->pci_id);
125
comment_dwords = ((comment_len + 3) / 4);
126
127
/* Start with a (required) version packet. */
128
dwords = 13 + comment_dwords;
129
dword_out(aub, CMD_AUB_HEADER | (dwords - 2));
130
dword_out(aub, (4 << AUB_HEADER_MAJOR_SHIFT) |
131
(0 << AUB_HEADER_MINOR_SHIFT));
132
133
/* Next comes a 32-byte application name. */
134
strncpy(app_name, name, sizeof(app_name));
135
app_name[sizeof(app_name) - 1] = 0;
136
data_out(aub, app_name, sizeof(app_name));
137
138
dword_out(aub, 0); /* timestamp */
139
dword_out(aub, 0); /* timestamp */
140
dword_out(aub, comment_len);
141
data_out(aub, comment, comment_dwords * 4);
142
}
143
144
145
static void
146
aub_write_header(struct aub_file *aub, const char *app_name)
147
{
148
if (aub_use_execlists(aub))
149
write_execlists_header(aub, app_name);
150
else
151
write_legacy_header(aub, app_name);
152
}
153
154
void
155
aub_file_init(struct aub_file *aub, FILE *file, FILE *debug, uint16_t pci_id, const char *app_name)
156
{
157
memset(aub, 0, sizeof(*aub));
158
159
aub->verbose_log_file = debug;
160
aub->file = file;
161
aub->pci_id = pci_id;
162
fail_if(!intel_get_device_info_from_pci_id(pci_id, &aub->devinfo),
163
"failed to identify chipset=0x%x\n", pci_id);
164
aub->addr_bits = aub->devinfo.ver >= 8 ? 48 : 32;
165
166
aub_write_header(aub, app_name);
167
168
aub->phys_addrs_allocator = 0;
169
aub->ggtt_addrs_allocator = 0;
170
aub->pml4.phys_addr = aub->phys_addrs_allocator++ << 12;
171
172
mem_trace_memory_write_header_out(aub, aub->ggtt_addrs_allocator++,
173
GFX8_PTE_SIZE,
174
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
175
"GGTT PT");
176
dword_out(aub, 1);
177
dword_out(aub, 0);
178
179
aub->next_context_handle = 1;
180
aub_context_new(aub, 0); /* Default context */
181
}
182
183
void
184
aub_file_finish(struct aub_file *aub)
185
{
186
aub_ppgtt_table_finish(&aub->pml4, 4);
187
fclose(aub->file);
188
}
189
190
uint32_t
191
aub_gtt_size(struct aub_file *aub)
192
{
193
return NUM_PT_ENTRIES * (aub->addr_bits > 32 ? GFX8_PTE_SIZE : PTE_SIZE);
194
}
195
196
static void
197
mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr,
198
uint32_t len, uint32_t addr_space,
199
const char *desc)
200
{
201
uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t);
202
203
if (aub->verbose_log_file) {
204
fprintf(aub->verbose_log_file,
205
" MEM WRITE (0x%016" PRIx64 "-0x%016" PRIx64 ") %s\n",
206
addr, addr + len, desc);
207
}
208
209
dword_out(aub, CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1));
210
dword_out(aub, addr & 0xFFFFFFFF); /* addr lo */
211
dword_out(aub, addr >> 32); /* addr hi */
212
dword_out(aub, addr_space); /* gtt */
213
dword_out(aub, len);
214
}
215
216
static void
217
register_write_out(struct aub_file *aub, uint32_t addr, uint32_t value)
218
{
219
uint32_t dwords = 1;
220
221
if (aub->verbose_log_file) {
222
fprintf(aub->verbose_log_file,
223
" MMIO WRITE (0x%08x = 0x%08x)\n", addr, value);
224
}
225
226
dword_out(aub, CMD_MEM_TRACE_REGISTER_WRITE | (5 + dwords - 1));
227
dword_out(aub, addr);
228
dword_out(aub, AUB_MEM_TRACE_REGISTER_SIZE_DWORD |
229
AUB_MEM_TRACE_REGISTER_SPACE_MMIO);
230
dword_out(aub, 0xFFFFFFFF); /* mask lo */
231
dword_out(aub, 0x00000000); /* mask hi */
232
dword_out(aub, value);
233
}
234
235
static void
236
populate_ppgtt_table(struct aub_file *aub, struct aub_ppgtt_table *table,
237
int start, int end, int level)
238
{
239
uint64_t entries[512] = {0};
240
int dirty_start = 512, dirty_end = 0;
241
242
if (aub->verbose_log_file) {
243
fprintf(aub->verbose_log_file,
244
" PPGTT (0x%016" PRIx64 "), lvl %d, start: %x, end: %x\n",
245
table->phys_addr, level, start, end);
246
}
247
248
for (int i = start; i <= end; i++) {
249
if (!table->subtables[i]) {
250
dirty_start = min(dirty_start, i);
251
dirty_end = max(dirty_end, i);
252
if (level == 1) {
253
table->subtables[i] =
254
(void *)(uintptr_t)(aub->phys_addrs_allocator++ << 12);
255
if (aub->verbose_log_file) {
256
fprintf(aub->verbose_log_file,
257
" Adding entry: %x, phys_addr: 0x%016" PRIx64 "\n",
258
i, (uint64_t)(uintptr_t)table->subtables[i]);
259
}
260
} else {
261
table->subtables[i] =
262
calloc(1, sizeof(struct aub_ppgtt_table));
263
table->subtables[i]->phys_addr =
264
aub->phys_addrs_allocator++ << 12;
265
if (aub->verbose_log_file) {
266
fprintf(aub->verbose_log_file,
267
" Adding entry: %x, phys_addr: 0x%016" PRIx64 "\n",
268
i, table->subtables[i]->phys_addr);
269
}
270
}
271
}
272
entries[i] = 3 /* read/write | present */ |
273
(level == 1 ? (uint64_t)(uintptr_t)table->subtables[i] :
274
table->subtables[i]->phys_addr);
275
}
276
277
if (dirty_start <= dirty_end) {
278
uint64_t write_addr = table->phys_addr + dirty_start *
279
sizeof(uint64_t);
280
uint64_t write_size = (dirty_end - dirty_start + 1) *
281
sizeof(uint64_t);
282
mem_trace_memory_write_header_out(aub, write_addr, write_size,
283
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL,
284
"PPGTT update");
285
data_out(aub, entries + dirty_start, write_size);
286
}
287
}
288
289
void
290
aub_map_ppgtt(struct aub_file *aub, uint64_t start, uint64_t size)
291
{
292
uint64_t l4_start = start & 0xff8000000000;
293
uint64_t l4_end = ((start + size - 1) | 0x007fffffffff) & 0xffffffffffff;
294
295
#define L4_index(addr) (((addr) >> 39) & 0x1ff)
296
#define L3_index(addr) (((addr) >> 30) & 0x1ff)
297
#define L2_index(addr) (((addr) >> 21) & 0x1ff)
298
#define L1_index(addr) (((addr) >> 12) & 0x1ff)
299
300
#define L3_table(addr) (aub->pml4.subtables[L4_index(addr)])
301
#define L2_table(addr) (L3_table(addr)->subtables[L3_index(addr)])
302
#define L1_table(addr) (L2_table(addr)->subtables[L2_index(addr)])
303
304
if (aub->verbose_log_file) {
305
fprintf(aub->verbose_log_file,
306
" Mapping PPGTT address: 0x%" PRIx64 ", size: %" PRIu64"\n",
307
start, size);
308
}
309
310
populate_ppgtt_table(aub, &aub->pml4, L4_index(l4_start), L4_index(l4_end), 4);
311
312
for (uint64_t l4 = l4_start; l4 < l4_end; l4 += (1ULL << 39)) {
313
uint64_t l3_start = max(l4, start & 0xffffc0000000);
314
uint64_t l3_end = min(l4 + (1ULL << 39) - 1,
315
((start + size - 1) | 0x00003fffffff) & 0xffffffffffff);
316
uint64_t l3_start_idx = L3_index(l3_start);
317
uint64_t l3_end_idx = L3_index(l3_end);
318
319
populate_ppgtt_table(aub, L3_table(l4), l3_start_idx, l3_end_idx, 3);
320
321
for (uint64_t l3 = l3_start; l3 < l3_end; l3 += (1ULL << 30)) {
322
uint64_t l2_start = max(l3, start & 0xffffffe00000);
323
uint64_t l2_end = min(l3 + (1ULL << 30) - 1,
324
((start + size - 1) | 0x0000001fffff) & 0xffffffffffff);
325
uint64_t l2_start_idx = L2_index(l2_start);
326
uint64_t l2_end_idx = L2_index(l2_end);
327
328
populate_ppgtt_table(aub, L2_table(l3), l2_start_idx, l2_end_idx, 2);
329
330
for (uint64_t l2 = l2_start; l2 < l2_end; l2 += (1ULL << 21)) {
331
uint64_t l1_start = max(l2, start & 0xfffffffff000);
332
uint64_t l1_end = min(l2 + (1ULL << 21) - 1,
333
((start + size - 1) | 0x000000000fff) & 0xffffffffffff);
334
uint64_t l1_start_idx = L1_index(l1_start);
335
uint64_t l1_end_idx = L1_index(l1_end);
336
337
populate_ppgtt_table(aub, L1_table(l2), l1_start_idx, l1_end_idx, 1);
338
}
339
}
340
}
341
}
342
343
static uint64_t
344
ppgtt_lookup(struct aub_file *aub, uint64_t ppgtt_addr)
345
{
346
return (uint64_t)(uintptr_t)L1_table(ppgtt_addr)->subtables[L1_index(ppgtt_addr)];
347
}
348
349
static const struct engine {
350
const char *name;
351
enum drm_i915_gem_engine_class engine_class;
352
uint32_t hw_class;
353
uint32_t elsp_reg;
354
uint32_t elsq_reg;
355
uint32_t status_reg;
356
uint32_t control_reg;
357
} engines[] = {
358
[I915_ENGINE_CLASS_RENDER] = {
359
.name = "RENDER",
360
.engine_class = I915_ENGINE_CLASS_RENDER,
361
.hw_class = 1,
362
.elsp_reg = EXECLIST_SUBMITPORT_RCSUNIT,
363
.elsq_reg = EXECLIST_SQ_CONTENTS0_RCSUNIT,
364
.status_reg = EXECLIST_STATUS_RCSUNIT,
365
.control_reg = EXECLIST_CONTROL_RCSUNIT,
366
},
367
[I915_ENGINE_CLASS_VIDEO] = {
368
.name = "VIDEO",
369
.engine_class = I915_ENGINE_CLASS_VIDEO,
370
.hw_class = 3,
371
.elsp_reg = EXECLIST_SUBMITPORT_VCSUNIT0,
372
.elsq_reg = EXECLIST_SQ_CONTENTS0_VCSUNIT0,
373
.status_reg = EXECLIST_STATUS_VCSUNIT0,
374
.control_reg = EXECLIST_CONTROL_VCSUNIT0,
375
},
376
[I915_ENGINE_CLASS_COPY] = {
377
.name = "BLITTER",
378
.engine_class = I915_ENGINE_CLASS_COPY,
379
.hw_class = 2,
380
.elsp_reg = EXECLIST_SUBMITPORT_BCSUNIT,
381
.elsq_reg = EXECLIST_SQ_CONTENTS0_BCSUNIT,
382
.status_reg = EXECLIST_STATUS_BCSUNIT,
383
.control_reg = EXECLIST_CONTROL_BCSUNIT,
384
},
385
};
386
387
static void
388
aub_map_ggtt(struct aub_file *aub, uint64_t virt_addr, uint64_t size)
389
{
390
/* Makes the code below a bit simpler. In practice all of the write we
391
* receive from error2aub are page aligned.
392
*/
393
assert(virt_addr % 4096 == 0);
394
assert((aub->phys_addrs_allocator + size) < (1ULL << 32));
395
396
/* GGTT PT */
397
uint32_t ggtt_ptes = DIV_ROUND_UP(size, 4096);
398
uint64_t phys_addr = aub->phys_addrs_allocator << 12;
399
aub->phys_addrs_allocator += ggtt_ptes;
400
401
if (aub->verbose_log_file) {
402
fprintf(aub->verbose_log_file,
403
" Mapping GGTT address: 0x%" PRIx64 ", size: %" PRIu64" phys_addr=0x%" PRIx64 " entries=%u\n",
404
virt_addr, size, phys_addr, ggtt_ptes);
405
}
406
407
mem_trace_memory_write_header_out(aub,
408
(virt_addr >> 12) * GFX8_PTE_SIZE,
409
ggtt_ptes * GFX8_PTE_SIZE,
410
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY,
411
"GGTT PT");
412
for (uint32_t i = 0; i < ggtt_ptes; i++) {
413
dword_out(aub, 1 + phys_addr + i * 4096);
414
dword_out(aub, 0);
415
}
416
}
417
418
void
419
aub_write_ggtt(struct aub_file *aub, uint64_t virt_addr, uint64_t size, const void *data)
420
{
421
/* Default setup assumes a 1 to 1 mapping between physical and virtual GGTT
422
* addresses. This is somewhat incompatible with the aub_write_ggtt()
423
* function. In practice it doesn't matter as the GGTT writes are used to
424
* replace the default setup and we've taken care to setup the PML4 as the
425
* top of the GGTT.
426
*/
427
assert(!aub->has_default_setup);
428
429
aub_map_ggtt(aub, virt_addr, size);
430
431
/* We write the GGTT buffer through the GGTT aub command rather than the
432
* PHYSICAL aub command. This is because the Gfx9 simulator seems to have 2
433
* different set of memory pools for GGTT and physical (probably someone
434
* didn't really understand the concept?).
435
*/
436
static const char null_block[8 * 4096];
437
for (uint64_t offset = 0; offset < size; offset += 4096) {
438
uint32_t block_size = min(4096, size - offset);
439
440
mem_trace_memory_write_header_out(aub, virt_addr + offset, block_size,
441
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
442
"GGTT buffer");
443
data_out(aub, (char *) data + offset, block_size);
444
445
/* Pad to a multiple of 4 bytes. */
446
data_out(aub, null_block, -block_size & 3);
447
}
448
}
449
450
static const struct engine *
451
engine_from_engine_class(enum drm_i915_gem_engine_class engine_class)
452
{
453
switch (engine_class) {
454
case I915_ENGINE_CLASS_RENDER:
455
case I915_ENGINE_CLASS_COPY:
456
case I915_ENGINE_CLASS_VIDEO:
457
return &engines[engine_class];
458
default:
459
unreachable("unknown ring");
460
}
461
}
462
463
static void
464
get_context_init(const struct intel_device_info *devinfo,
465
const struct intel_context_parameters *params,
466
enum drm_i915_gem_engine_class engine_class,
467
uint32_t *data,
468
uint32_t *size)
469
{
470
static const intel_context_init_t gfx8_contexts[] = {
471
[I915_ENGINE_CLASS_RENDER] = gfx8_render_context_init,
472
[I915_ENGINE_CLASS_COPY] = gfx8_blitter_context_init,
473
[I915_ENGINE_CLASS_VIDEO] = gfx8_video_context_init,
474
};
475
static const intel_context_init_t gfx10_contexts[] = {
476
[I915_ENGINE_CLASS_RENDER] = gfx10_render_context_init,
477
[I915_ENGINE_CLASS_COPY] = gfx10_blitter_context_init,
478
[I915_ENGINE_CLASS_VIDEO] = gfx10_video_context_init,
479
};
480
481
assert(devinfo->ver >= 8);
482
483
if (devinfo->ver <= 10)
484
gfx8_contexts[engine_class](params, data, size);
485
else
486
gfx10_contexts[engine_class](params, data, size);
487
}
488
489
static uint64_t
490
alloc_ggtt_address(struct aub_file *aub, uint64_t size)
491
{
492
uint32_t ggtt_ptes = DIV_ROUND_UP(size, 4096);
493
uint64_t addr = aub->ggtt_addrs_allocator << 12;
494
495
aub->ggtt_addrs_allocator += ggtt_ptes;
496
aub_map_ggtt(aub, addr, size);
497
498
return addr;
499
}
500
501
static void
502
write_hwsp(struct aub_file *aub,
503
enum drm_i915_gem_engine_class engine_class)
504
{
505
uint32_t reg = 0;
506
switch (engine_class) {
507
case I915_ENGINE_CLASS_RENDER: reg = HWS_PGA_RCSUNIT; break;
508
case I915_ENGINE_CLASS_COPY: reg = HWS_PGA_BCSUNIT; break;
509
case I915_ENGINE_CLASS_VIDEO: reg = HWS_PGA_VCSUNIT0; break;
510
default:
511
unreachable("unknown ring");
512
}
513
514
register_write_out(aub, reg, aub->engine_setup[engine_class].hwsp_addr);
515
}
516
517
static uint32_t
518
write_engine_execlist_setup(struct aub_file *aub,
519
uint32_t ctx_id,
520
struct aub_hw_context *hw_ctx,
521
enum drm_i915_gem_engine_class engine_class)
522
{
523
const struct engine *cs = engine_from_engine_class(engine_class);
524
uint32_t context_size;
525
526
get_context_init(&aub->devinfo, NULL, engine_class, NULL, &context_size);
527
528
/* GGTT PT */
529
uint32_t total_size = RING_SIZE + PPHWSP_SIZE + context_size;
530
char name[80];
531
uint64_t ggtt_addr = alloc_ggtt_address(aub, total_size);
532
533
snprintf(name, sizeof(name), "%s (ctx id: %d) GGTT PT", cs->name, ctx_id);
534
535
/* RING */
536
hw_ctx->ring_addr = ggtt_addr;
537
snprintf(name, sizeof(name), "%s RING", cs->name);
538
mem_trace_memory_write_header_out(aub, ggtt_addr, RING_SIZE,
539
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
540
name);
541
for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t))
542
dword_out(aub, 0);
543
ggtt_addr += RING_SIZE;
544
545
/* PPHWSP */
546
hw_ctx->pphwsp_addr = ggtt_addr;
547
snprintf(name, sizeof(name), "%s PPHWSP", cs->name);
548
mem_trace_memory_write_header_out(aub, ggtt_addr,
549
PPHWSP_SIZE + context_size,
550
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
551
name);
552
for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t))
553
dword_out(aub, 0);
554
555
/* CONTEXT */
556
struct intel_context_parameters params = {
557
.ring_addr = hw_ctx->ring_addr,
558
.ring_size = RING_SIZE,
559
.pml4_addr = aub->pml4.phys_addr,
560
};
561
uint32_t *context_data = calloc(1, context_size);
562
get_context_init(&aub->devinfo, &params, engine_class, context_data, &context_size);
563
data_out(aub, context_data, context_size);
564
free(context_data);
565
566
hw_ctx->initialized = true;
567
568
return total_size;
569
}
570
571
static void
572
write_execlists_default_setup(struct aub_file *aub)
573
{
574
register_write_out(aub, GFX_MODE_RCSUNIT, 0x80008000 /* execlist enable */);
575
register_write_out(aub, GFX_MODE_VCSUNIT0, 0x80008000 /* execlist enable */);
576
register_write_out(aub, GFX_MODE_BCSUNIT, 0x80008000 /* execlist enable */);
577
}
578
579
static void write_legacy_default_setup(struct aub_file *aub)
580
{
581
uint32_t entry = 0x200003;
582
583
/* Set up the GTT. The max we can handle is 64M */
584
dword_out(aub, CMD_AUB_TRACE_HEADER_BLOCK |
585
((aub->addr_bits > 32 ? 6 : 5) - 2));
586
dword_out(aub, AUB_TRACE_MEMTYPE_GTT_ENTRY |
587
AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE);
588
dword_out(aub, 0); /* subtype */
589
dword_out(aub, 0); /* offset */
590
dword_out(aub, aub_gtt_size(aub)); /* size */
591
if (aub->addr_bits > 32)
592
dword_out(aub, 0);
593
for (uint32_t i = 0; i < NUM_PT_ENTRIES; i++) {
594
dword_out(aub, entry + 0x1000 * i);
595
if (aub->addr_bits > 32)
596
dword_out(aub, 0);
597
}
598
}
599
600
/**
601
* Sets up a default GGTT/PPGTT address space and execlists context (when
602
* supported).
603
*/
604
void
605
aub_write_default_setup(struct aub_file *aub)
606
{
607
if (aub_use_execlists(aub))
608
write_execlists_default_setup(aub);
609
else
610
write_legacy_default_setup(aub);
611
612
aub->has_default_setup = true;
613
}
614
615
static struct aub_context *
616
aub_context_new(struct aub_file *aub, uint32_t new_id)
617
{
618
assert(aub->num_contexts < MAX_CONTEXT_COUNT);
619
620
struct aub_context *ctx = &aub->contexts[aub->num_contexts++];
621
memset(ctx, 0, sizeof(*ctx));
622
ctx->id = new_id;
623
624
return ctx;
625
}
626
627
uint32_t
628
aub_write_context_create(struct aub_file *aub, uint32_t *ctx_id)
629
{
630
uint32_t new_id = ctx_id ? *ctx_id : aub->next_context_handle;
631
632
aub_context_new(aub, new_id);
633
634
if (!ctx_id)
635
aub->next_context_handle++;
636
637
return new_id;
638
}
639
640
static struct aub_context *
641
aub_context_find(struct aub_file *aub, uint32_t id)
642
{
643
for (int i = 0; i < aub->num_contexts; i++) {
644
if (aub->contexts[i].id == id)
645
return &aub->contexts[i];
646
}
647
648
return NULL;
649
}
650
651
static struct aub_hw_context *
652
aub_write_ensure_context(struct aub_file *aub, uint32_t ctx_id,
653
enum drm_i915_gem_engine_class engine_class)
654
{
655
struct aub_context *ctx = aub_context_find(aub, ctx_id);
656
assert(ctx != NULL);
657
658
struct aub_hw_context *hw_ctx = &ctx->hw_contexts[engine_class];
659
if (!hw_ctx->initialized)
660
write_engine_execlist_setup(aub, ctx->id, hw_ctx, engine_class);
661
662
return hw_ctx;
663
}
664
665
static uint64_t
666
get_context_descriptor(struct aub_file *aub,
667
const struct engine *cs,
668
struct aub_hw_context *hw_ctx)
669
{
670
return cs->hw_class | hw_ctx->pphwsp_addr | CONTEXT_FLAGS;
671
}
672
673
/**
674
* Break up large objects into multiple writes. Otherwise a 128kb VBO
675
* would overflow the 16 bits of size field in the packet header and
676
* everything goes badly after that.
677
*/
678
void
679
aub_write_trace_block(struct aub_file *aub,
680
uint32_t type, void *virtual,
681
uint32_t size, uint64_t gtt_offset)
682
{
683
uint32_t block_size;
684
uint32_t subtype = 0;
685
static const char null_block[8 * 4096];
686
687
for (uint32_t offset = 0; offset < size; offset += block_size) {
688
block_size = min(8 * 4096, size - offset);
689
690
if (aub_use_execlists(aub)) {
691
block_size = min(4096, block_size);
692
mem_trace_memory_write_header_out(aub,
693
ppgtt_lookup(aub, gtt_offset + offset),
694
block_size,
695
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL,
696
"Trace Block");
697
} else {
698
dword_out(aub, CMD_AUB_TRACE_HEADER_BLOCK |
699
((aub->addr_bits > 32 ? 6 : 5) - 2));
700
dword_out(aub, AUB_TRACE_MEMTYPE_GTT |
701
type | AUB_TRACE_OP_DATA_WRITE);
702
dword_out(aub, subtype);
703
dword_out(aub, gtt_offset + offset);
704
dword_out(aub, align_u32(block_size, 4));
705
if (aub->addr_bits > 32)
706
dword_out(aub, (gtt_offset + offset) >> 32);
707
}
708
709
if (virtual)
710
data_out(aub, ((char *) virtual) + offset, block_size);
711
else
712
data_out(aub, null_block, block_size);
713
714
/* Pad to a multiple of 4 bytes. */
715
data_out(aub, null_block, -block_size & 3);
716
}
717
}
718
719
static void
720
aub_dump_ring_buffer_execlist(struct aub_file *aub,
721
struct aub_hw_context *hw_ctx,
722
const struct engine *cs,
723
uint64_t batch_offset)
724
{
725
mem_trace_memory_write_header_out(aub, hw_ctx->ring_addr, 16,
726
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
727
"RING MI_BATCH_BUFFER_START user");
728
dword_out(aub, AUB_MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965 | (3 - 2));
729
dword_out(aub, batch_offset & 0xFFFFFFFF);
730
dword_out(aub, batch_offset >> 32);
731
dword_out(aub, 0 /* MI_NOOP */);
732
733
mem_trace_memory_write_header_out(aub, hw_ctx->ring_addr + 8192 + 20, 4,
734
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
735
"RING BUFFER HEAD");
736
dword_out(aub, 0); /* RING_BUFFER_HEAD */
737
mem_trace_memory_write_header_out(aub, hw_ctx->ring_addr + 8192 + 28, 4,
738
AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT,
739
"RING BUFFER TAIL");
740
dword_out(aub, 16); /* RING_BUFFER_TAIL */
741
}
742
743
static void
744
aub_dump_execlist(struct aub_file *aub, const struct engine *cs, uint64_t descriptor)
745
{
746
if (aub->devinfo.ver >= 11) {
747
register_write_out(aub, cs->elsq_reg, descriptor & 0xFFFFFFFF);
748
register_write_out(aub, cs->elsq_reg + sizeof(uint32_t), descriptor >> 32);
749
register_write_out(aub, cs->control_reg, 1);
750
} else {
751
register_write_out(aub, cs->elsp_reg, 0);
752
register_write_out(aub, cs->elsp_reg, 0);
753
register_write_out(aub, cs->elsp_reg, descriptor >> 32);
754
register_write_out(aub, cs->elsp_reg, descriptor & 0xFFFFFFFF);
755
}
756
757
dword_out(aub, CMD_MEM_TRACE_REGISTER_POLL | (5 + 1 - 1));
758
dword_out(aub, cs->status_reg);
759
dword_out(aub, AUB_MEM_TRACE_REGISTER_SIZE_DWORD |
760
AUB_MEM_TRACE_REGISTER_SPACE_MMIO);
761
if (aub->devinfo.ver >= 11) {
762
dword_out(aub, 0x00000001); /* mask lo */
763
dword_out(aub, 0x00000000); /* mask hi */
764
dword_out(aub, 0x00000001);
765
} else {
766
dword_out(aub, 0x00000010); /* mask lo */
767
dword_out(aub, 0x00000000); /* mask hi */
768
dword_out(aub, 0x00000000);
769
}
770
}
771
772
static void
773
aub_dump_ring_buffer_legacy(struct aub_file *aub,
774
uint64_t batch_offset,
775
uint64_t offset,
776
enum drm_i915_gem_engine_class engine_class)
777
{
778
uint32_t ringbuffer[4096];
779
unsigned aub_mi_bbs_len;
780
int ring_count = 0;
781
static const int engine_class_to_ring[] = {
782
[I915_ENGINE_CLASS_RENDER] = AUB_TRACE_TYPE_RING_PRB0,
783
[I915_ENGINE_CLASS_VIDEO] = AUB_TRACE_TYPE_RING_PRB1,
784
[I915_ENGINE_CLASS_COPY] = AUB_TRACE_TYPE_RING_PRB2,
785
};
786
int ring = engine_class_to_ring[engine_class];
787
788
/* Make a ring buffer to execute our batchbuffer. */
789
memset(ringbuffer, 0, sizeof(ringbuffer));
790
791
aub_mi_bbs_len = aub->addr_bits > 32 ? 3 : 2;
792
ringbuffer[ring_count] = AUB_MI_BATCH_BUFFER_START | (aub_mi_bbs_len - 2);
793
aub_write_reloc(&aub->devinfo, &ringbuffer[ring_count + 1], batch_offset);
794
ring_count += aub_mi_bbs_len;
795
796
/* Write out the ring. This appears to trigger execution of
797
* the ring in the simulator.
798
*/
799
dword_out(aub, CMD_AUB_TRACE_HEADER_BLOCK |
800
((aub->addr_bits > 32 ? 6 : 5) - 2));
801
dword_out(aub, AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
802
dword_out(aub, 0); /* general/surface subtype */
803
dword_out(aub, offset);
804
dword_out(aub, ring_count * 4);
805
if (aub->addr_bits > 32)
806
dword_out(aub, offset >> 32);
807
808
data_out(aub, ringbuffer, ring_count * 4);
809
}
810
811
static void
812
aub_write_ensure_hwsp(struct aub_file *aub,
813
enum drm_i915_gem_engine_class engine_class)
814
{
815
uint64_t *hwsp_addr = &aub->engine_setup[engine_class].hwsp_addr;
816
817
if (*hwsp_addr != 0)
818
return;
819
820
*hwsp_addr = alloc_ggtt_address(aub, 4096);
821
write_hwsp(aub, engine_class);
822
}
823
824
void
825
aub_write_exec(struct aub_file *aub, uint32_t ctx_id, uint64_t batch_addr,
826
uint64_t offset, enum drm_i915_gem_engine_class engine_class)
827
{
828
const struct engine *cs = engine_from_engine_class(engine_class);
829
830
if (aub_use_execlists(aub)) {
831
struct aub_hw_context *hw_ctx =
832
aub_write_ensure_context(aub, ctx_id, engine_class);
833
uint64_t descriptor = get_context_descriptor(aub, cs, hw_ctx);
834
aub_write_ensure_hwsp(aub, engine_class);
835
aub_dump_ring_buffer_execlist(aub, hw_ctx, cs, batch_addr);
836
aub_dump_execlist(aub, cs, descriptor);
837
} else {
838
/* Dump ring buffer */
839
aub_dump_ring_buffer_legacy(aub, batch_addr, offset, engine_class);
840
}
841
fflush(aub->file);
842
}
843
844
void
845
aub_write_context_execlists(struct aub_file *aub, uint64_t context_addr,
846
enum drm_i915_gem_engine_class engine_class)
847
{
848
const struct engine *cs = engine_from_engine_class(engine_class);
849
uint64_t descriptor = ((uint64_t)1 << 62 | context_addr | CONTEXT_FLAGS);
850
aub_dump_execlist(aub, cs, descriptor);
851
}
852
853