Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/decode/cffdec.c
4565 views
1
/*
2
* Copyright (c) 2012 Rob Clark <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include <assert.h>
25
#include <ctype.h>
26
#include <err.h>
27
#include <errno.h>
28
#include <fcntl.h>
29
#include <inttypes.h>
30
#include <signal.h>
31
#include <stdarg.h>
32
#include <stdbool.h>
33
#include <stdint.h>
34
#include <stdio.h>
35
#include <stdlib.h>
36
#include <string.h>
37
#include <unistd.h>
38
#include <sys/stat.h>
39
#include <sys/types.h>
40
#include <sys/wait.h>
41
42
#include "freedreno_pm4.h"
43
44
#include "buffers.h"
45
#include "cffdec.h"
46
#include "disasm.h"
47
#include "redump.h"
48
#include "rnnutil.h"
49
#include "script.h"
50
51
/* ************************************************************************* */
52
/* originally based on kernel recovery dump code: */
53
54
static const struct cffdec_options *options;
55
56
static bool needs_wfi = false;
57
static bool summary = false;
58
static bool in_summary = false;
59
static int vertices;
60
61
static inline unsigned
62
regcnt(void)
63
{
64
if (options->gpu_id >= 500)
65
return 0xffff;
66
else
67
return 0x7fff;
68
}
69
70
static int
71
is_64b(void)
72
{
73
return options->gpu_id >= 500;
74
}
75
76
static int draws[4];
77
static struct {
78
uint64_t base;
79
uint32_t size; /* in dwords */
80
/* Generally cmdstream consists of multiple IB calls to different
81
* buffers, which are themselves often re-used for each tile. The
82
* triggered flag serves two purposes to help make it more clear
83
* what part of the cmdstream is before vs after the the GPU hang:
84
*
85
* 1) if in IB2 we are passed the point within the IB2 buffer where
86
* the GPU hung, but IB1 is not passed the point within its
87
* buffer where the GPU had hung, then we know the GPU hang
88
* happens on a future use of that IB2 buffer.
89
*
90
* 2) if in an IB1 or IB2 buffer that is not the one where the GPU
91
* hung, but we've already passed the trigger point at the same
92
* IB level, we know that we are passed the point where the GPU
93
* had hung.
94
*
95
* So this is a one way switch, false->true. And a higher #'d
96
* IB level isn't considered triggered unless the lower #'d IB
97
* level is.
98
*/
99
bool triggered;
100
} ibs[4];
101
static int ib;
102
103
static int draw_count;
104
static int current_draw_count;
105
106
/* query mode.. to handle symbolic register name queries, we need to
107
* defer parsing query string until after gpu_id is know and rnn db
108
* loaded:
109
*/
110
static int *queryvals;
111
112
static bool
113
quiet(int lvl)
114
{
115
if ((options->draw_filter != -1) &&
116
(options->draw_filter != current_draw_count))
117
return true;
118
if ((lvl >= 3) && (summary || options->querystrs || options->script))
119
return true;
120
if ((lvl >= 2) && (options->querystrs || options->script))
121
return true;
122
return false;
123
}
124
125
void
126
printl(int lvl, const char *fmt, ...)
127
{
128
va_list args;
129
if (quiet(lvl))
130
return;
131
va_start(args, fmt);
132
vprintf(fmt, args);
133
va_end(args);
134
}
135
136
static const char *levels[] = {
137
"\t",
138
"\t\t",
139
"\t\t\t",
140
"\t\t\t\t",
141
"\t\t\t\t\t",
142
"\t\t\t\t\t\t",
143
"\t\t\t\t\t\t\t",
144
"\t\t\t\t\t\t\t\t",
145
"\t\t\t\t\t\t\t\t\t",
146
"x",
147
"x",
148
"x",
149
"x",
150
"x",
151
"x",
152
};
153
154
enum state_src_t {
155
STATE_SRC_DIRECT,
156
STATE_SRC_INDIRECT,
157
STATE_SRC_BINDLESS,
158
};
159
160
/* SDS (CP_SET_DRAW_STATE) helpers: */
161
static void load_all_groups(int level);
162
static void disable_all_groups(void);
163
164
static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
165
int level);
166
static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
167
168
static bool
169
highlight_gpuaddr(uint64_t gpuaddr)
170
{
171
if (!options->color)
172
return false;
173
174
if (!options->ibs[ib].base)
175
return false;
176
177
if ((ib > 0) && options->ibs[ib - 1].base && !ibs[ib - 1].triggered)
178
return false;
179
180
if (ibs[ib].triggered)
181
return true;
182
183
if (options->ibs[ib].base != ibs[ib].base)
184
return false;
185
186
uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
187
uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
188
189
bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
190
191
ibs[ib].triggered |= triggered;
192
193
if (triggered)
194
printf("ESTIMATED CRASH LOCATION!\n");
195
196
return triggered;
197
}
198
199
static void
200
dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
201
{
202
int i, j;
203
int lastzero = 1;
204
205
if (quiet(2))
206
return;
207
208
for (i = 0; i < sizedwords; i += 8) {
209
int zero = 1;
210
211
/* always show first row: */
212
if (i == 0)
213
zero = 0;
214
215
for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
216
if (dwords[i + j])
217
zero = 0;
218
219
if (zero && !lastzero)
220
printf("*\n");
221
222
lastzero = zero;
223
224
if (zero)
225
continue;
226
227
uint64_t addr = gpuaddr(&dwords[i]);
228
bool highlight = highlight_gpuaddr(addr);
229
230
if (highlight)
231
printf("\x1b[0;1;31m");
232
233
if (is_64b()) {
234
printf("%016" PRIx64 ":%s", addr, levels[level]);
235
} else {
236
printf("%08x:%s", (uint32_t)addr, levels[level]);
237
}
238
239
if (highlight)
240
printf("\x1b[0m");
241
242
printf("%04x:", i * 4);
243
244
for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
245
printf(" %08x", dwords[i + j]);
246
}
247
248
printf("\n");
249
}
250
}
251
252
static void
253
dump_float(float *dwords, uint32_t sizedwords, int level)
254
{
255
int i;
256
for (i = 0; i < sizedwords; i++) {
257
if ((i % 8) == 0) {
258
if (is_64b()) {
259
printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
260
} else {
261
printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
262
}
263
} else {
264
printf(" ");
265
}
266
printf("%8f", *(dwords++));
267
if ((i % 8) == 7)
268
printf("\n");
269
}
270
if (i % 8)
271
printf("\n");
272
}
273
274
/* I believe the surface format is low bits:
275
#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
276
comments in sys2gmem_tex_const indicate that address is [31:12], but
277
looks like at least some of the bits above the format have different meaning..
278
*/
279
static void
280
parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
281
uint32_t mask)
282
{
283
assert(!is_64b()); /* this is only used on a2xx */
284
*gpuaddr = dword & ~mask;
285
*flags = dword & mask;
286
}
287
288
static uint32_t type0_reg_vals[0xffff + 1];
289
static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
290
8]; /* written since last draw */
291
static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
292
static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
293
294
static bool
295
reg_rewritten(uint32_t regbase)
296
{
297
return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
298
}
299
300
bool
301
reg_written(uint32_t regbase)
302
{
303
return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
304
}
305
306
static void
307
clear_rewritten(void)
308
{
309
memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
310
}
311
312
static void
313
clear_written(void)
314
{
315
memset(type0_reg_written, 0, sizeof(type0_reg_written));
316
clear_rewritten();
317
}
318
319
uint32_t
320
reg_lastval(uint32_t regbase)
321
{
322
return lastvals[regbase];
323
}
324
325
static void
326
clear_lastvals(void)
327
{
328
memset(lastvals, 0, sizeof(lastvals));
329
}
330
331
uint32_t
332
reg_val(uint32_t regbase)
333
{
334
return type0_reg_vals[regbase];
335
}
336
337
void
338
reg_set(uint32_t regbase, uint32_t val)
339
{
340
assert(regbase < regcnt());
341
type0_reg_vals[regbase] = val;
342
type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
343
type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
344
}
345
346
static void
347
reg_dump_scratch(const char *name, uint32_t dword, int level)
348
{
349
unsigned r;
350
351
if (quiet(3))
352
return;
353
354
r = regbase("CP_SCRATCH[0].REG");
355
356
// if not, try old a2xx/a3xx version:
357
if (!r)
358
r = regbase("CP_SCRATCH_REG0");
359
360
if (!r)
361
return;
362
363
printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
364
reg_val(r + 6), reg_val(r + 7));
365
}
366
367
static void
368
dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
369
{
370
void *buf;
371
372
if (quiet(quietlvl))
373
return;
374
375
buf = hostptr(gpuaddr);
376
if (buf) {
377
dump_hex(buf, sizedwords, level + 1);
378
}
379
}
380
381
static void
382
dump_gpuaddr(uint64_t gpuaddr, int level)
383
{
384
dump_gpuaddr_size(gpuaddr, level, 64, 3);
385
}
386
387
static void
388
reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
389
{
390
dump_gpuaddr(dword, level);
391
}
392
393
uint32_t gpuaddr_lo;
394
static void
395
reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
396
{
397
gpuaddr_lo = dword;
398
}
399
400
static void
401
reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
402
{
403
dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
404
}
405
406
static void
407
reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
408
{
409
dump_gpuaddr(qword, level);
410
}
411
412
static void
413
dump_shader(const char *ext, void *buf, int bufsz)
414
{
415
if (options->dump_shaders) {
416
static int n = 0;
417
char filename[16];
418
int fd;
419
sprintf(filename, "%04d.%s", n++, ext);
420
fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
421
if (fd != -1) {
422
write(fd, buf, bufsz);
423
close(fd);
424
}
425
}
426
}
427
428
static void
429
disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
430
{
431
void *buf;
432
433
gpuaddr &= 0xfffffffffffffff0;
434
435
if (quiet(3))
436
return;
437
438
buf = hostptr(gpuaddr);
439
if (buf) {
440
uint32_t sizedwords = hostlen(gpuaddr) / 4;
441
const char *ext;
442
443
dump_hex(buf, min(64, sizedwords), level + 1);
444
try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->gpu_id);
445
446
/* this is a bit ugly way, but oh well.. */
447
if (strstr(name, "SP_VS_OBJ")) {
448
ext = "vo3";
449
} else if (strstr(name, "SP_FS_OBJ")) {
450
ext = "fo3";
451
} else if (strstr(name, "SP_GS_OBJ")) {
452
ext = "go3";
453
} else if (strstr(name, "SP_CS_OBJ")) {
454
ext = "co3";
455
} else {
456
ext = NULL;
457
}
458
459
if (ext)
460
dump_shader(ext, buf, sizedwords * 4);
461
}
462
}
463
464
static void
465
reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
466
{
467
disasm_gpuaddr(name, dword, level);
468
}
469
470
static void
471
reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
472
{
473
disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
474
}
475
476
static void
477
reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
478
{
479
disasm_gpuaddr(name, qword, level);
480
}
481
482
/* Find the value of the TEX_COUNT register that corresponds to the named
483
* TEX_SAMP/TEX_CONST reg.
484
*
485
* Note, this kinda assumes an equal # of samplers and textures, but not
486
* really sure if there is a much better option. I suppose on a6xx we
487
* could instead decode the bitfields in SP_xS_CONFIG
488
*/
489
static int
490
get_tex_count(const char *name)
491
{
492
char count_reg[strlen(name) + 5];
493
char *p;
494
495
p = strstr(name, "CONST");
496
if (!p)
497
p = strstr(name, "SAMP");
498
if (!p)
499
return 0;
500
501
int n = p - name;
502
strncpy(count_reg, name, n);
503
strcpy(count_reg + n, "COUNT");
504
505
return reg_val(regbase(count_reg));
506
}
507
508
static void
509
reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
510
{
511
if (!in_summary)
512
return;
513
514
int num_unit = get_tex_count(name);
515
uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
516
void *buf = hostptr(gpuaddr);
517
518
if (!buf)
519
return;
520
521
dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
522
}
523
524
static void
525
reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
526
{
527
if (!in_summary)
528
return;
529
530
int num_unit = get_tex_count(name);
531
uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
532
void *buf = hostptr(gpuaddr);
533
534
if (!buf)
535
return;
536
537
dump_tex_const(buf, num_unit, level + 1);
538
}
539
540
/*
541
* Registers with special handling (rnndec_decode() handles rest):
542
*/
543
#define REG(x, fxn) { #x, fxn }
544
#define REG64(x, fxn) { #x, .fxn64 = fxn, .is_reg64 = true }
545
static struct {
546
const char *regname;
547
void (*fxn)(const char *name, uint32_t dword, int level);
548
void (*fxn64)(const char *name, uint64_t qword, int level);
549
uint32_t regbase;
550
bool is_reg64;
551
} reg_a2xx[] = {
552
REG(CP_SCRATCH_REG0, reg_dump_scratch),
553
REG(CP_SCRATCH_REG1, reg_dump_scratch),
554
REG(CP_SCRATCH_REG2, reg_dump_scratch),
555
REG(CP_SCRATCH_REG3, reg_dump_scratch),
556
REG(CP_SCRATCH_REG4, reg_dump_scratch),
557
REG(CP_SCRATCH_REG5, reg_dump_scratch),
558
REG(CP_SCRATCH_REG6, reg_dump_scratch),
559
REG(CP_SCRATCH_REG7, reg_dump_scratch),
560
{NULL},
561
}, reg_a3xx[] = {
562
REG(CP_SCRATCH_REG0, reg_dump_scratch),
563
REG(CP_SCRATCH_REG1, reg_dump_scratch),
564
REG(CP_SCRATCH_REG2, reg_dump_scratch),
565
REG(CP_SCRATCH_REG3, reg_dump_scratch),
566
REG(CP_SCRATCH_REG4, reg_dump_scratch),
567
REG(CP_SCRATCH_REG5, reg_dump_scratch),
568
REG(CP_SCRATCH_REG6, reg_dump_scratch),
569
REG(CP_SCRATCH_REG7, reg_dump_scratch),
570
REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
571
REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
572
REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
573
REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
574
REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
575
REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
576
{NULL},
577
}, reg_a4xx[] = {
578
REG(CP_SCRATCH[0].REG, reg_dump_scratch),
579
REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
580
REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
581
REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
582
REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
583
REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
584
REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
585
REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
586
REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
587
REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
588
REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
589
REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
590
REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
591
REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
592
REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
593
REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
594
REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
595
REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
596
REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
597
REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
598
REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
599
REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
600
REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
601
REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
602
REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
603
{NULL},
604
}, reg_a5xx[] = {
605
REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
606
REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
607
REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
608
REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
609
REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
610
REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
611
REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
612
REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
613
REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
614
REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
615
REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
616
REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
617
REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
618
REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
619
REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
620
REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
621
REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
622
REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
623
REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
624
REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
625
REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
626
REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
627
REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
628
REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
629
REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
630
REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
631
REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
632
REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
633
REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
634
REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
635
REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
636
REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
637
REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
638
REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
639
REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
640
REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
641
REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
642
REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
643
REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
644
REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
645
REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
646
REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
647
// REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
648
// REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
649
// REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
650
// REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
651
// REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
652
// REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
653
// REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
654
// REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
655
// REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
656
// REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
657
// REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
658
// REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
659
// REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
660
// REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
661
// REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
662
// REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
663
// REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
664
// REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
665
// REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
666
// REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
667
// REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
668
// REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
669
// REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
670
// REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
671
// REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
672
// REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
673
674
// REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
675
// REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
676
// REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
677
// REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
678
// REG(RB_2D_DST_LO, reg_gpuaddr_lo),
679
// REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
680
// REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
681
// REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
682
683
{NULL},
684
}, reg_a6xx[] = {
685
REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
686
REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
687
REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
688
REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
689
690
REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
691
REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
692
REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
693
REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
694
REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
695
REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
696
697
REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
698
REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
699
REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
700
REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
701
REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
702
REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
703
REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
704
REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
705
REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
706
REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
707
REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
708
REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
709
710
{NULL},
711
}, *type0_reg;
712
713
static struct rnn *rnn;
714
715
static void
716
init_rnn(const char *gpuname)
717
{
718
rnn = rnn_new(!options->color);
719
720
rnn_load(rnn, gpuname);
721
722
if (options->querystrs) {
723
int i;
724
queryvals = calloc(options->nquery, sizeof(queryvals[0]));
725
726
for (i = 0; i < options->nquery; i++) {
727
int val = strtol(options->querystrs[i], NULL, 0);
728
729
if (val == 0)
730
val = regbase(options->querystrs[i]);
731
732
queryvals[i] = val;
733
printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
734
}
735
}
736
737
for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
738
type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
739
if (!type0_reg[idx].regbase) {
740
printf("invalid register name: %s\n", type0_reg[idx].regname);
741
exit(1);
742
}
743
}
744
}
745
746
void
747
reset_regs(void)
748
{
749
clear_written();
750
clear_lastvals();
751
memset(&ibs, 0, sizeof(ibs));
752
}
753
754
void
755
cffdec_init(const struct cffdec_options *_options)
756
{
757
options = _options;
758
summary = options->summary;
759
760
/* in case we're decoding multiple files: */
761
free(queryvals);
762
reset_regs();
763
draw_count = 0;
764
765
/* TODO we need an API to free/cleanup any previous rnn */
766
767
switch (options->gpu_id) {
768
case 200 ... 299:
769
type0_reg = reg_a2xx;
770
init_rnn("a2xx");
771
break;
772
case 300 ... 399:
773
type0_reg = reg_a3xx;
774
init_rnn("a3xx");
775
break;
776
case 400 ... 499:
777
type0_reg = reg_a4xx;
778
init_rnn("a4xx");
779
break;
780
case 500 ... 599:
781
type0_reg = reg_a5xx;
782
init_rnn("a5xx");
783
break;
784
case 600 ... 699:
785
type0_reg = reg_a6xx;
786
init_rnn("a6xx");
787
break;
788
default:
789
errx(-1, "unsupported gpu");
790
}
791
}
792
793
const char *
794
pktname(unsigned opc)
795
{
796
return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
797
}
798
799
const char *
800
regname(uint32_t regbase, int color)
801
{
802
return rnn_regname(rnn, regbase, color);
803
}
804
805
uint32_t
806
regbase(const char *name)
807
{
808
return rnn_regbase(rnn, name);
809
}
810
811
static int
812
endswith(uint32_t regbase, const char *suffix)
813
{
814
const char *name = regname(regbase, 0);
815
const char *s = strstr(name, suffix);
816
if (!s)
817
return 0;
818
return (s - strlen(name) + strlen(suffix)) == name;
819
}
820
821
void
822
dump_register_val(uint32_t regbase, uint32_t dword, int level)
823
{
824
struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
825
826
if (info && info->typeinfo) {
827
uint64_t gpuaddr = 0;
828
char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
829
printf("%s%s: %s", levels[level], info->name, decoded);
830
831
/* Try and figure out if we are looking at a gpuaddr.. this
832
* might be useful for other gen's too, but at least a5xx has
833
* the _HI/_LO suffix we can look for. Maybe a better approach
834
* would be some special annotation in the xml..
835
* for a6xx use "address" and "waddress" types
836
*/
837
if (options->gpu_id >= 600) {
838
if (!strcmp(info->typeinfo->name, "address") ||
839
!strcmp(info->typeinfo->name, "waddress")) {
840
gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
841
}
842
} else if (options->gpu_id >= 500) {
843
if (endswith(regbase, "_HI") && endswith(regbase - 1, "_LO")) {
844
gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase - 1);
845
} else if (endswith(regbase, "_LO") && endswith(regbase + 1, "_HI")) {
846
gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
847
}
848
}
849
850
if (gpuaddr && hostptr(gpuaddr)) {
851
printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
852
gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
853
hostlen(gpubaseaddr(gpuaddr)));
854
}
855
856
printf("\n");
857
858
free(decoded);
859
} else if (info) {
860
printf("%s%s: %08x\n", levels[level], info->name, dword);
861
} else {
862
printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
863
}
864
865
if (info) {
866
free(info->name);
867
free(info);
868
}
869
}
870
871
static void
872
dump_register(uint32_t regbase, uint32_t dword, int level)
873
{
874
if (!quiet(3)) {
875
dump_register_val(regbase, dword, level);
876
}
877
878
for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
879
if (type0_reg[idx].regbase == regbase) {
880
if (type0_reg[idx].is_reg64) {
881
uint64_t qword = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
882
type0_reg[idx].fxn64(type0_reg[idx].regname, qword, level);
883
} else {
884
type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
885
}
886
break;
887
}
888
}
889
}
890
891
static bool
892
is_banked_reg(uint32_t regbase)
893
{
894
return (0x2000 <= regbase) && (regbase < 0x2400);
895
}
896
897
static void
898
dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
899
int level)
900
{
901
while (sizedwords--) {
902
int last_summary = summary;
903
904
/* access to non-banked registers needs a WFI:
905
* TODO banked register range for a2xx??
906
*/
907
if (needs_wfi && !is_banked_reg(regbase))
908
printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
909
910
reg_set(regbase, *dwords);
911
dump_register(regbase, *dwords, level);
912
regbase++;
913
dwords++;
914
summary = last_summary;
915
}
916
}
917
918
static void
919
dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
920
{
921
struct rnndomain *dom;
922
int i;
923
924
dom = rnn_finddomain(rnn->db, name);
925
926
if (!dom)
927
return;
928
929
if (script_packet)
930
script_packet(dwords, sizedwords, rnn, dom);
931
932
if (quiet(2))
933
return;
934
935
for (i = 0; i < sizedwords; i++) {
936
struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
937
char *decoded;
938
if (!(info && info->typeinfo))
939
break;
940
uint64_t value = dwords[i];
941
if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
942
value |= (uint64_t)dwords[i + 1] << 32;
943
i++; /* skip the next dword since we're printing it now */
944
}
945
decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
946
/* Unlike the register printing path, we don't print the name
947
* of the register, so if it doesn't contain other named
948
* things (i.e. it isn't a bitset) then print the register
949
* name as if it's a bitset with a single entry. This avoids
950
* having to create a dummy register with a single entry to
951
* get a name in the decoding.
952
*/
953
if (info->typeinfo->type == RNN_TTYPE_BITSET ||
954
info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
955
printf("%s%s\n", levels[level], decoded);
956
} else {
957
printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
958
info->name, rnn->vc->colors->reset, decoded);
959
}
960
free(decoded);
961
free(info->name);
962
free(info);
963
}
964
}
965
966
static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
967
static unsigned mode;
968
static const char *render_mode;
969
static enum {
970
MODE_BINNING = 0x1,
971
MODE_GMEM = 0x2,
972
MODE_BYPASS = 0x4,
973
MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
974
} enable_mask = MODE_ALL;
975
static bool skip_ib2_enable_global;
976
static bool skip_ib2_enable_local;
977
978
static void
979
print_mode(int level)
980
{
981
if ((options->gpu_id >= 500) && !quiet(2)) {
982
printf("%smode: %s\n", levels[level], render_mode);
983
printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
984
skip_ib2_enable_local);
985
}
986
}
987
988
static bool
989
skip_query(void)
990
{
991
switch (options->query_mode) {
992
case QUERY_ALL:
993
/* never skip: */
994
return false;
995
case QUERY_WRITTEN:
996
for (int i = 0; i < options->nquery; i++) {
997
uint32_t regbase = queryvals[i];
998
if (!reg_written(regbase)) {
999
continue;
1000
}
1001
if (reg_rewritten(regbase)) {
1002
return false;
1003
}
1004
}
1005
return true;
1006
case QUERY_DELTA:
1007
for (int i = 0; i < options->nquery; i++) {
1008
uint32_t regbase = queryvals[i];
1009
if (!reg_written(regbase)) {
1010
continue;
1011
}
1012
uint32_t lastval = reg_val(regbase);
1013
if (lastval != lastvals[regbase]) {
1014
return false;
1015
}
1016
}
1017
return true;
1018
}
1019
return true;
1020
}
1021
1022
static void
1023
__do_query(const char *primtype, uint32_t num_indices)
1024
{
1025
int n = 0;
1026
1027
if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
1028
uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1029
uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1030
1031
bin_x1 = scissor_tl & 0xffff;
1032
bin_y1 = scissor_tl >> 16;
1033
bin_x2 = scissor_br & 0xffff;
1034
bin_y2 = scissor_br >> 16;
1035
}
1036
1037
for (int i = 0; i < options->nquery; i++) {
1038
uint32_t regbase = queryvals[i];
1039
if (reg_written(regbase)) {
1040
uint32_t lastval = reg_val(regbase);
1041
printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1042
bin_y1, bin_x2, bin_y2, num_indices);
1043
if (options->gpu_id >= 500)
1044
printf("%s:", render_mode);
1045
printf("\t%08x", lastval);
1046
if (lastval != lastvals[regbase]) {
1047
printf("!");
1048
} else {
1049
printf(" ");
1050
}
1051
if (reg_rewritten(regbase)) {
1052
printf("+");
1053
} else {
1054
printf(" ");
1055
}
1056
dump_register_val(regbase, lastval, 0);
1057
n++;
1058
}
1059
}
1060
1061
if (n > 1)
1062
printf("\n");
1063
}
1064
1065
static void
1066
do_query_compare(const char *primtype, uint32_t num_indices)
1067
{
1068
unsigned saved_enable_mask = enable_mask;
1069
const char *saved_render_mode = render_mode;
1070
1071
/* in 'query-compare' mode, we want to see if the register is writtten
1072
* or changed in any mode:
1073
*
1074
* (NOTE: this could cause false-positive for 'query-delta' if the reg
1075
* is written with different values in binning vs sysmem/gmem mode, as
1076
* we don't track previous values per-mode, but I think we can live with
1077
* that)
1078
*/
1079
enable_mask = MODE_ALL;
1080
1081
clear_rewritten();
1082
load_all_groups(0);
1083
1084
if (!skip_query()) {
1085
/* dump binning pass values: */
1086
enable_mask = MODE_BINNING;
1087
render_mode = "BINNING";
1088
clear_rewritten();
1089
load_all_groups(0);
1090
__do_query(primtype, num_indices);
1091
1092
/* dump draw pass values: */
1093
enable_mask = MODE_GMEM | MODE_BYPASS;
1094
render_mode = "DRAW";
1095
clear_rewritten();
1096
load_all_groups(0);
1097
__do_query(primtype, num_indices);
1098
1099
printf("\n");
1100
}
1101
1102
enable_mask = saved_enable_mask;
1103
render_mode = saved_render_mode;
1104
1105
disable_all_groups();
1106
}
1107
1108
/* well, actually query and script..
1109
* NOTE: call this before dump_register_summary()
1110
*/
1111
static void
1112
do_query(const char *primtype, uint32_t num_indices)
1113
{
1114
if (script_draw)
1115
script_draw(primtype, num_indices);
1116
1117
if (options->query_compare) {
1118
do_query_compare(primtype, num_indices);
1119
return;
1120
}
1121
1122
if (skip_query())
1123
return;
1124
1125
__do_query(primtype, num_indices);
1126
}
1127
1128
static void
1129
cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1130
{
1131
uint32_t start = dwords[1] >> 16;
1132
uint32_t size = dwords[1] & 0xffff;
1133
const char *type = NULL, *ext = NULL;
1134
gl_shader_stage disasm_type;
1135
1136
switch (dwords[0]) {
1137
case 0:
1138
type = "vertex";
1139
ext = "vo";
1140
disasm_type = MESA_SHADER_VERTEX;
1141
break;
1142
case 1:
1143
type = "fragment";
1144
ext = "fo";
1145
disasm_type = MESA_SHADER_FRAGMENT;
1146
break;
1147
default:
1148
type = "<unknown>";
1149
disasm_type = 0;
1150
break;
1151
}
1152
1153
printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1154
size);
1155
disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1156
1157
/* dump raw shader: */
1158
if (ext)
1159
dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1160
}
1161
1162
static void
1163
cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1164
{
1165
uint32_t reg = dwords[0] & 0xffff;
1166
int i;
1167
for (i = 1; i < sizedwords; i++) {
1168
dump_register(reg, dwords[i], level + 1);
1169
reg_set(reg, dwords[i]);
1170
reg++;
1171
}
1172
}
1173
1174
enum state_t {
1175
TEX_SAMP = 1,
1176
TEX_CONST,
1177
TEX_MIPADDR, /* a3xx only */
1178
SHADER_PROG,
1179
SHADER_CONST,
1180
1181
// image/ssbo state:
1182
SSBO_0,
1183
SSBO_1,
1184
SSBO_2,
1185
1186
UBO,
1187
1188
// unknown things, just to hexdumps:
1189
UNKNOWN_DWORDS,
1190
UNKNOWN_2DWORDS,
1191
UNKNOWN_4DWORDS,
1192
};
1193
1194
enum adreno_state_block {
1195
SB_VERT_TEX = 0,
1196
SB_VERT_MIPADDR = 1,
1197
SB_FRAG_TEX = 2,
1198
SB_FRAG_MIPADDR = 3,
1199
SB_VERT_SHADER = 4,
1200
SB_GEOM_SHADER = 5,
1201
SB_FRAG_SHADER = 6,
1202
SB_COMPUTE_SHADER = 7,
1203
};
1204
1205
/* TODO there is probably a clever way to let rnndec parse things so
1206
* we don't have to care about packet format differences across gens
1207
*/
1208
1209
static void
1210
a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1211
enum state_t *state, enum state_src_t *src)
1212
{
1213
unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1214
unsigned state_type = dwords[1] & 0x3;
1215
static const struct {
1216
gl_shader_stage stage;
1217
enum state_t state;
1218
} lookup[0xf][0x3] = {
1219
[SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1220
[SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1221
[SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1222
[SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1223
[SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1224
[SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1225
[SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1226
[SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1227
};
1228
1229
*stage = lookup[state_block_id][state_type].stage;
1230
*state = lookup[state_block_id][state_type].state;
1231
unsigned state_src = (dwords[0] >> 16) & 0x7;
1232
if (state_src == 0 /* SS_DIRECT */)
1233
*src = STATE_SRC_DIRECT;
1234
else
1235
*src = STATE_SRC_INDIRECT;
1236
}
1237
1238
static enum state_src_t
1239
_get_state_src(unsigned dword0)
1240
{
1241
switch ((dword0 >> 16) & 0x3) {
1242
case 0: /* SS4_DIRECT / SS6_DIRECT */
1243
return STATE_SRC_DIRECT;
1244
case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1245
return STATE_SRC_INDIRECT;
1246
case 1: /* SS6_BINDLESS */
1247
return STATE_SRC_BINDLESS;
1248
default:
1249
return STATE_SRC_DIRECT;
1250
}
1251
}
1252
1253
static void
1254
_get_state_type(unsigned state_block_id, unsigned state_type,
1255
gl_shader_stage *stage, enum state_t *state)
1256
{
1257
static const struct {
1258
gl_shader_stage stage;
1259
enum state_t state;
1260
} lookup[0x10][0x4] = {
1261
// SB4_VS_TEX:
1262
[0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1263
[0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1264
[0x0][2] = {MESA_SHADER_VERTEX, UBO},
1265
// SB4_HS_TEX:
1266
[0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1267
[0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1268
[0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1269
// SB4_DS_TEX:
1270
[0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1271
[0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1272
[0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1273
// SB4_GS_TEX:
1274
[0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1275
[0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1276
[0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1277
// SB4_FS_TEX:
1278
[0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1279
[0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1280
[0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1281
// SB4_CS_TEX:
1282
[0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1283
[0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1284
[0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1285
// SB4_VS_SHADER:
1286
[0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1287
[0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1288
[0x8][2] = {MESA_SHADER_VERTEX, UBO},
1289
// SB4_HS_SHADER
1290
[0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1291
[0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1292
[0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1293
// SB4_DS_SHADER
1294
[0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1295
[0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1296
[0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1297
// SB4_GS_SHADER
1298
[0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1299
[0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1300
[0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1301
// SB4_FS_SHADER:
1302
[0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1303
[0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1304
[0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1305
// SB4_CS_SHADER:
1306
[0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1307
[0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1308
[0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1309
[0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1310
// SB4_SSBO (shared across all stages)
1311
[0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1312
[0xe][1] = {0, SSBO_1},
1313
[0xe][2] = {0, SSBO_2},
1314
// SB4_CS_SSBO
1315
[0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1316
[0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1317
[0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1318
// unknown things
1319
/* This looks like combined UBO state for 3d stages (a5xx and
1320
* before?? I think a6xx has UBO state per shader stage:
1321
*/
1322
[0x6][2] = {0, UBO},
1323
[0x7][1] = {0, UNKNOWN_2DWORDS},
1324
};
1325
1326
*stage = lookup[state_block_id][state_type].stage;
1327
*state = lookup[state_block_id][state_type].state;
1328
}
1329
1330
static void
1331
a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1332
enum state_t *state, enum state_src_t *src)
1333
{
1334
unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1335
unsigned state_type = dwords[1] & 0x3;
1336
_get_state_type(state_block_id, state_type, stage, state);
1337
*src = _get_state_src(dwords[0]);
1338
}
1339
1340
static void
1341
a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1342
enum state_t *state, enum state_src_t *src)
1343
{
1344
unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1345
unsigned state_type = (dwords[0] >> 14) & 0x3;
1346
_get_state_type(state_block_id, state_type, stage, state);
1347
*src = _get_state_src(dwords[0]);
1348
}
1349
1350
static void
1351
dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1352
{
1353
for (int i = 0; i < num_unit; i++) {
1354
/* work-around to reduce noise for opencl blob which always
1355
* writes the max # regardless of # of textures used
1356
*/
1357
if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1358
break;
1359
1360
if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1361
dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1362
dump_hex(texsamp, 2, level + 1);
1363
texsamp += 2;
1364
} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1365
dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1366
dump_hex(texsamp, 2, level + 1);
1367
texsamp += 2;
1368
} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1369
dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1370
dump_hex(texsamp, 4, level + 1);
1371
texsamp += 4;
1372
} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1373
dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1374
dump_hex(texsamp, 4, level + 1);
1375
texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1376
}
1377
}
1378
}
1379
1380
static void
1381
dump_tex_const(uint32_t *texconst, int num_unit, int level)
1382
{
1383
for (int i = 0; i < num_unit; i++) {
1384
/* work-around to reduce noise for opencl blob which always
1385
* writes the max # regardless of # of textures used
1386
*/
1387
if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1388
(texconst[2] == 0) && (texconst[3] == 0))
1389
break;
1390
1391
if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
1392
dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1393
dump_hex(texconst, 4, level + 1);
1394
texconst += 4;
1395
} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
1396
dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1397
if (options->dump_textures) {
1398
uint32_t addr = texconst[4] & ~0x1f;
1399
dump_gpuaddr(addr, level - 2);
1400
}
1401
dump_hex(texconst, 8, level + 1);
1402
texconst += 8;
1403
} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1404
dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1405
if (options->dump_textures) {
1406
uint64_t addr =
1407
(((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1408
dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1409
}
1410
dump_hex(texconst, 12, level + 1);
1411
texconst += 12;
1412
} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
1413
dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1414
if (options->dump_textures) {
1415
uint64_t addr =
1416
(((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1417
dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1418
}
1419
dump_hex(texconst, 16, level + 1);
1420
texconst += 16;
1421
}
1422
}
1423
}
1424
1425
static void
1426
cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1427
{
1428
gl_shader_stage stage;
1429
enum state_t state;
1430
enum state_src_t src;
1431
uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1432
uint64_t ext_src_addr;
1433
void *contents;
1434
int i;
1435
1436
if (quiet(2) && !options->script)
1437
return;
1438
1439
if (options->gpu_id >= 600)
1440
a6xx_get_state_type(dwords, &stage, &state, &src);
1441
else if (options->gpu_id >= 400)
1442
a4xx_get_state_type(dwords, &stage, &state, &src);
1443
else
1444
a3xx_get_state_type(dwords, &stage, &state, &src);
1445
1446
switch (src) {
1447
case STATE_SRC_DIRECT:
1448
ext_src_addr = 0;
1449
break;
1450
case STATE_SRC_INDIRECT:
1451
if (is_64b()) {
1452
ext_src_addr = dwords[1] & 0xfffffffc;
1453
ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1454
} else {
1455
ext_src_addr = dwords[1] & 0xfffffffc;
1456
}
1457
1458
break;
1459
case STATE_SRC_BINDLESS: {
1460
const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1461
? regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR")
1462
: regbase("HLSQ_BINDLESS_BASE[0].ADDR");
1463
1464
if (is_64b()) {
1465
const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1466
ext_src_addr = reg_val(reg) & 0xfffffffc;
1467
ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1468
} else {
1469
const unsigned reg = base_reg + (dwords[1] >> 28);
1470
ext_src_addr = reg_val(reg) & 0xfffffffc;
1471
}
1472
1473
ext_src_addr += 4 * (dwords[1] & 0xffffff);
1474
break;
1475
}
1476
}
1477
1478
if (ext_src_addr)
1479
contents = hostptr(ext_src_addr);
1480
else
1481
contents = is_64b() ? dwords + 3 : dwords + 2;
1482
1483
if (!contents)
1484
return;
1485
1486
switch (state) {
1487
case SHADER_PROG: {
1488
const char *ext = NULL;
1489
1490
if (quiet(2))
1491
return;
1492
1493
if (options->gpu_id >= 400)
1494
num_unit *= 16;
1495
else if (options->gpu_id >= 300)
1496
num_unit *= 4;
1497
1498
/* shaders:
1499
*
1500
* note: num_unit seems to be # of instruction groups, where
1501
* an instruction group has 4 64bit instructions.
1502
*/
1503
if (stage == MESA_SHADER_VERTEX) {
1504
ext = "vo3";
1505
} else if (stage == MESA_SHADER_GEOMETRY) {
1506
ext = "go3";
1507
} else if (stage == MESA_SHADER_COMPUTE) {
1508
ext = "co3";
1509
} else if (stage == MESA_SHADER_FRAGMENT) {
1510
ext = "fo3";
1511
}
1512
1513
if (contents)
1514
try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1515
options->gpu_id);
1516
1517
/* dump raw shader: */
1518
if (ext)
1519
dump_shader(ext, contents, num_unit * 2 * 4);
1520
1521
break;
1522
}
1523
case SHADER_CONST: {
1524
if (quiet(2))
1525
return;
1526
1527
/* uniforms/consts:
1528
*
1529
* note: num_unit seems to be # of pairs of dwords??
1530
*/
1531
1532
if (options->gpu_id >= 400)
1533
num_unit *= 2;
1534
1535
dump_float(contents, num_unit * 2, level + 1);
1536
dump_hex(contents, num_unit * 2, level + 1);
1537
1538
break;
1539
}
1540
case TEX_MIPADDR: {
1541
uint32_t *addrs = contents;
1542
1543
if (quiet(2))
1544
return;
1545
1546
/* mipmap consts block just appears to be array of num_unit gpu addr's: */
1547
for (i = 0; i < num_unit; i++) {
1548
void *ptr = hostptr(addrs[i]);
1549
printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1550
if (options->dump_textures) {
1551
printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1552
dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1553
}
1554
}
1555
break;
1556
}
1557
case TEX_SAMP: {
1558
dump_tex_samp(contents, src, num_unit, level);
1559
break;
1560
}
1561
case TEX_CONST: {
1562
dump_tex_const(contents, num_unit, level);
1563
break;
1564
}
1565
case SSBO_0: {
1566
uint32_t *ssboconst = (uint32_t *)contents;
1567
1568
for (i = 0; i < num_unit; i++) {
1569
int sz = 4;
1570
if (400 <= options->gpu_id && options->gpu_id < 500) {
1571
dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1572
} else if (500 <= options->gpu_id && options->gpu_id < 600) {
1573
dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1574
} else if (600 <= options->gpu_id && options->gpu_id < 700) {
1575
sz = 16;
1576
dump_domain(ssboconst, 16, level + 2, "A6XX_IBO");
1577
}
1578
dump_hex(ssboconst, sz, level + 1);
1579
ssboconst += sz;
1580
}
1581
break;
1582
}
1583
case SSBO_1: {
1584
uint32_t *ssboconst = (uint32_t *)contents;
1585
1586
for (i = 0; i < num_unit; i++) {
1587
if (400 <= options->gpu_id && options->gpu_id < 500)
1588
dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1589
else if (500 <= options->gpu_id && options->gpu_id < 600)
1590
dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1591
dump_hex(ssboconst, 2, level + 1);
1592
ssboconst += 2;
1593
}
1594
break;
1595
}
1596
case SSBO_2: {
1597
uint32_t *ssboconst = (uint32_t *)contents;
1598
1599
for (i = 0; i < num_unit; i++) {
1600
/* TODO a4xx and a5xx might be same: */
1601
if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
1602
dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1603
dump_hex(ssboconst, 2, level + 1);
1604
}
1605
if (options->dump_textures) {
1606
uint64_t addr =
1607
(((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1608
dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1609
}
1610
ssboconst += 2;
1611
}
1612
break;
1613
}
1614
case UBO: {
1615
uint32_t *uboconst = (uint32_t *)contents;
1616
1617
for (i = 0; i < num_unit; i++) {
1618
// TODO probably similar on a4xx..
1619
if (500 <= options->gpu_id && options->gpu_id < 600)
1620
dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1621
else if (600 <= options->gpu_id && options->gpu_id < 700)
1622
dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1623
dump_hex(uboconst, 2, level + 1);
1624
uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1625
}
1626
break;
1627
}
1628
case UNKNOWN_DWORDS: {
1629
if (quiet(2))
1630
return;
1631
dump_hex(contents, num_unit, level + 1);
1632
break;
1633
}
1634
case UNKNOWN_2DWORDS: {
1635
if (quiet(2))
1636
return;
1637
dump_hex(contents, num_unit * 2, level + 1);
1638
break;
1639
}
1640
case UNKNOWN_4DWORDS: {
1641
if (quiet(2))
1642
return;
1643
dump_hex(contents, num_unit * 4, level + 1);
1644
break;
1645
}
1646
default:
1647
if (quiet(2))
1648
return;
1649
/* hmm.. */
1650
dump_hex(contents, num_unit, level + 1);
1651
break;
1652
}
1653
}
1654
1655
static void
1656
cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1657
{
1658
bin_x1 = dwords[1] & 0xffff;
1659
bin_y1 = dwords[1] >> 16;
1660
bin_x2 = dwords[2] & 0xffff;
1661
bin_y2 = dwords[2] >> 16;
1662
}
1663
1664
static void
1665
dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1666
int level)
1667
{
1668
uint32_t w, h, p;
1669
uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1670
uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1671
static const char *filter[] = {
1672
"point",
1673
"bilinear",
1674
"bicubic",
1675
};
1676
static const char *clamp[] = {
1677
"wrap",
1678
"mirror",
1679
"clamp-last-texel",
1680
};
1681
static const char swiznames[] = "xyzw01??";
1682
1683
/* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1684
1685
/* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1686
* RFMode=ZeroClamp-1, Dim=1:2d, pitch
1687
*/
1688
p = (dwords[0] >> 22) << 5;
1689
clamp_x = (dwords[0] >> 10) & 0x3;
1690
clamp_y = (dwords[0] >> 13) & 0x3;
1691
clamp_z = (dwords[0] >> 16) & 0x3;
1692
1693
/* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1694
* NearestClamp=1:OGL Mode
1695
*/
1696
parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1697
1698
/* Width, Height, EndianSwap=0:None */
1699
w = (dwords[2] & 0x1fff) + 1;
1700
h = ((dwords[2] >> 13) & 0x1fff) + 1;
1701
1702
/* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1703
* Mip=2:BaseMap
1704
*/
1705
mag = (dwords[3] >> 19) & 0x3;
1706
min = (dwords[3] >> 21) & 0x3;
1707
swiz = (dwords[3] >> 1) & 0xfff;
1708
1709
/* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1710
* Dim3d=0
1711
*/
1712
// XXX
1713
1714
/* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1715
* Dim=1:2d, MipPacking=0
1716
*/
1717
parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1718
1719
printf("%sset texture const %04x\n", levels[level], val);
1720
printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1721
clamp[clamp_y], clamp[clamp_z]);
1722
printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1723
filter[mag]);
1724
printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1725
swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1726
swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1727
printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1728
levels[level + 1], gpuaddr, flags, w, h, p,
1729
rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1730
printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1731
mip_flags);
1732
}
1733
1734
static void
1735
dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1736
int level)
1737
{
1738
int i;
1739
printf("%sset shader const %04x\n", levels[level], val);
1740
for (i = 0; i < sizedwords;) {
1741
uint32_t gpuaddr, flags;
1742
parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1743
void *addr = hostptr(gpuaddr);
1744
if (addr) {
1745
const char *fmt =
1746
rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1747
uint32_t size = dwords[i++];
1748
printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1749
size, fmt);
1750
// TODO maybe dump these as bytes instead of dwords?
1751
size = (size + 3) / 4; // for now convert to dwords
1752
dump_hex(addr, min(size, 64), level + 1);
1753
if (size > min(size, 64))
1754
printf("%s\t\t...\n", levels[level + 1]);
1755
dump_float(addr, min(size, 64), level + 1);
1756
if (size > min(size, 64))
1757
printf("%s\t\t...\n", levels[level + 1]);
1758
}
1759
}
1760
}
1761
1762
static void
1763
cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1764
{
1765
uint32_t val = dwords[0] & 0xffff;
1766
switch ((dwords[0] >> 16) & 0xf) {
1767
case 0x0:
1768
dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1769
break;
1770
case 0x1:
1771
/* need to figure out how const space is partitioned between
1772
* attributes, textures, etc..
1773
*/
1774
if (val < 0x78) {
1775
dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1776
} else {
1777
dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1778
}
1779
break;
1780
case 0x2:
1781
printf("%sset bool const %04x\n", levels[level], val);
1782
break;
1783
case 0x3:
1784
printf("%sset loop const %04x\n", levels[level], val);
1785
break;
1786
case 0x4:
1787
val += 0x2000;
1788
if (dwords[0] & 0x80000000) {
1789
uint32_t srcreg = dwords[1];
1790
uint32_t dstval = dwords[2];
1791
1792
/* TODO: not sure what happens w/ payload != 2.. */
1793
assert(sizedwords == 3);
1794
assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1795
1796
/* note: rnn_regname uses a static buf so we can't do
1797
* two regname() calls for one printf..
1798
*/
1799
printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1800
printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1801
1802
dstval += type0_reg_vals[srcreg];
1803
1804
dump_registers(val, &dstval, 1, level + 1);
1805
} else {
1806
dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1807
}
1808
break;
1809
}
1810
}
1811
1812
static void dump_register_summary(int level);
1813
1814
static void
1815
cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1816
{
1817
const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
1818
printl(2, "%sevent %s\n", levels[level], name);
1819
1820
if (name && (options->gpu_id > 500)) {
1821
char eventname[64];
1822
snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1823
if (!strcmp(name, "BLIT")) {
1824
do_query(eventname, 0);
1825
print_mode(level);
1826
dump_register_summary(level);
1827
}
1828
}
1829
}
1830
1831
static void
1832
dump_register_summary(int level)
1833
{
1834
uint32_t i;
1835
bool saved_summary = summary;
1836
summary = false;
1837
1838
in_summary = true;
1839
1840
/* dump current state of registers: */
1841
printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1842
for (i = 0; i < regcnt(); i++) {
1843
uint32_t regbase = i;
1844
uint32_t lastval = reg_val(regbase);
1845
/* skip registers that haven't been updated since last draw/blit: */
1846
if (!(options->allregs || reg_rewritten(regbase)))
1847
continue;
1848
if (!reg_written(regbase))
1849
continue;
1850
if (lastval != lastvals[regbase]) {
1851
printl(2, "!");
1852
lastvals[regbase] = lastval;
1853
} else {
1854
printl(2, " ");
1855
}
1856
if (reg_rewritten(regbase)) {
1857
printl(2, "+");
1858
} else {
1859
printl(2, " ");
1860
}
1861
printl(2, "\t%08x", lastval);
1862
if (!quiet(2)) {
1863
dump_register(regbase, lastval, level);
1864
}
1865
}
1866
1867
clear_rewritten();
1868
1869
in_summary = false;
1870
1871
draw_count++;
1872
summary = saved_summary;
1873
}
1874
1875
static uint32_t
1876
draw_indx_common(uint32_t *dwords, int level)
1877
{
1878
uint32_t prim_type = dwords[1] & 0x1f;
1879
uint32_t source_select = (dwords[1] >> 6) & 0x3;
1880
uint32_t num_indices = dwords[2];
1881
const char *primtype;
1882
1883
primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1884
1885
do_query(primtype, num_indices);
1886
1887
printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
1888
printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, prim_type);
1889
printl(2, "%ssource_select: %s (%d)\n", levels[level],
1890
rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
1891
printl(2, "%snum_indices: %d\n", levels[level], num_indices);
1892
1893
vertices += num_indices;
1894
1895
draws[ib]++;
1896
1897
return num_indices;
1898
}
1899
1900
enum pc_di_index_size {
1901
INDEX_SIZE_IGN = 0,
1902
INDEX_SIZE_16_BIT = 0,
1903
INDEX_SIZE_32_BIT = 1,
1904
INDEX_SIZE_8_BIT = 2,
1905
INDEX_SIZE_INVALID = 0,
1906
};
1907
1908
static void
1909
cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1910
{
1911
uint32_t num_indices = draw_indx_common(dwords, level);
1912
1913
assert(!is_64b());
1914
1915
/* if we have an index buffer, dump that: */
1916
if (sizedwords == 5) {
1917
void *ptr = hostptr(dwords[3]);
1918
printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
1919
printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
1920
if (ptr) {
1921
enum pc_di_index_size size =
1922
((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1923
if (!quiet(2)) {
1924
int i;
1925
printf("%sidxs: ", levels[level]);
1926
if (size == INDEX_SIZE_8_BIT) {
1927
uint8_t *idx = ptr;
1928
for (i = 0; i < dwords[4]; i++)
1929
printf(" %u", idx[i]);
1930
} else if (size == INDEX_SIZE_16_BIT) {
1931
uint16_t *idx = ptr;
1932
for (i = 0; i < dwords[4] / 2; i++)
1933
printf(" %u", idx[i]);
1934
} else if (size == INDEX_SIZE_32_BIT) {
1935
uint32_t *idx = ptr;
1936
for (i = 0; i < dwords[4] / 4; i++)
1937
printf(" %u", idx[i]);
1938
}
1939
printf("\n");
1940
dump_hex(ptr, dwords[4] / 4, level + 1);
1941
}
1942
}
1943
}
1944
1945
/* don't bother dumping registers for the dummy draw_indx's.. */
1946
if (num_indices > 0)
1947
dump_register_summary(level);
1948
1949
needs_wfi = true;
1950
}
1951
1952
static void
1953
cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
1954
{
1955
uint32_t num_indices = draw_indx_common(dwords, level);
1956
enum pc_di_index_size size =
1957
((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
1958
void *ptr = &dwords[3];
1959
int sz = 0;
1960
1961
assert(!is_64b());
1962
1963
/* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
1964
if (!quiet(2)) {
1965
int i;
1966
printf("%sidxs: ", levels[level]);
1967
if (size == INDEX_SIZE_8_BIT) {
1968
uint8_t *idx = ptr;
1969
for (i = 0; i < num_indices; i++)
1970
printf(" %u", idx[i]);
1971
sz = num_indices;
1972
} else if (size == INDEX_SIZE_16_BIT) {
1973
uint16_t *idx = ptr;
1974
for (i = 0; i < num_indices; i++)
1975
printf(" %u", idx[i]);
1976
sz = num_indices * 2;
1977
} else if (size == INDEX_SIZE_32_BIT) {
1978
uint32_t *idx = ptr;
1979
for (i = 0; i < num_indices; i++)
1980
printf(" %u", idx[i]);
1981
sz = num_indices * 4;
1982
}
1983
printf("\n");
1984
dump_hex(ptr, sz / 4, level + 1);
1985
}
1986
1987
/* don't bother dumping registers for the dummy draw_indx's.. */
1988
if (num_indices > 0)
1989
dump_register_summary(level);
1990
}
1991
1992
static void
1993
cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
1994
{
1995
uint32_t num_indices = dwords[2];
1996
uint32_t prim_type = dwords[0] & 0x1f;
1997
1998
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
1999
print_mode(level);
2000
2001
/* don't bother dumping registers for the dummy draw_indx's.. */
2002
if (num_indices > 0)
2003
dump_register_summary(level);
2004
}
2005
2006
static void
2007
cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2008
{
2009
uint32_t prim_type = dwords[0] & 0x1f;
2010
uint64_t addr;
2011
2012
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2013
print_mode(level);
2014
2015
if (is_64b())
2016
addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2017
else
2018
addr = dwords[1];
2019
dump_gpuaddr_size(addr, level, 0x10, 2);
2020
2021
if (is_64b())
2022
addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2023
else
2024
addr = dwords[3];
2025
dump_gpuaddr_size(addr, level, 0x10, 2);
2026
2027
dump_register_summary(level);
2028
}
2029
2030
static void
2031
cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2032
{
2033
uint32_t prim_type = dwords[0] & 0x1f;
2034
uint64_t addr;
2035
2036
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2037
print_mode(level);
2038
2039
addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2040
dump_gpuaddr_size(addr, level, 0x10, 2);
2041
2042
dump_register_summary(level);
2043
}
2044
2045
static void
2046
cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2047
{
2048
uint32_t prim_type = dwords[0] & 0x1f;
2049
uint32_t count = dwords[2];
2050
2051
do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2052
print_mode(level);
2053
2054
struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2055
uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2056
uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2057
uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2058
2059
if (count_dword) {
2060
uint64_t count_addr =
2061
((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2062
uint32_t *buf = hostptr(count_addr);
2063
2064
/* Don't print more draws than this if we don't know the indirect
2065
* count. It's possible the user will give ~0 or some other large
2066
* value, expecting the GPU to fill in the draw count, and we don't
2067
* want to print a gazillion draws in that case:
2068
*/
2069
const uint32_t max_draw_count = 0x100;
2070
2071
/* Assume the indirect count is garbage if it's larger than this
2072
* (quite large) value or 0. Hopefully this catches most cases.
2073
*/
2074
const uint32_t max_indirect_draw_count = 0x10000;
2075
2076
if (buf) {
2077
printf("%sindirect count: %u\n", levels[level], *buf);
2078
if (*buf == 0 || *buf > max_indirect_draw_count) {
2079
/* garbage value */
2080
count = min(count, max_draw_count);
2081
} else {
2082
/* not garbage */
2083
count = min(count, *buf);
2084
}
2085
} else {
2086
count = min(count, max_draw_count);
2087
}
2088
}
2089
2090
if (addr_dword && stride_dword) {
2091
uint64_t addr =
2092
((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2093
uint32_t stride = dwords[stride_dword];
2094
2095
for (unsigned i = 0; i < count; i++, addr += stride) {
2096
printf("%sdraw %d:\n", levels[level], i);
2097
dump_gpuaddr_size(addr, level, 0x10, 2);
2098
}
2099
}
2100
2101
dump_register_summary(level);
2102
}
2103
2104
static void
2105
cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2106
{
2107
do_query("COMPUTE", 1);
2108
dump_register_summary(level);
2109
}
2110
2111
static void
2112
cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2113
{
2114
const char *buf = (void *)dwords;
2115
int i;
2116
2117
if (quiet(3))
2118
return;
2119
2120
// blob doesn't use CP_NOP for string_marker but it does
2121
// use it for things that end up looking like, but aren't
2122
// ascii chars:
2123
if (!options->decode_markers)
2124
return;
2125
2126
for (i = 0; i < 4 * sizedwords; i++) {
2127
if (buf[i] == '\0')
2128
break;
2129
if (isascii(buf[i]))
2130
printf("%c", buf[i]);
2131
}
2132
printf("\n");
2133
}
2134
2135
static void
2136
cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2137
{
2138
/* traverse indirect buffers */
2139
uint64_t ibaddr;
2140
uint32_t ibsize;
2141
uint32_t *ptr = NULL;
2142
2143
if (is_64b()) {
2144
/* a5xx+.. high 32b of gpu addr, then size: */
2145
ibaddr = dwords[0];
2146
ibaddr |= ((uint64_t)dwords[1]) << 32;
2147
ibsize = dwords[2];
2148
} else {
2149
ibaddr = dwords[0];
2150
ibsize = dwords[1];
2151
}
2152
2153
if (!quiet(3)) {
2154
if (is_64b()) {
2155
printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2156
} else {
2157
printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2158
}
2159
printf("%sibsize:%08x\n", levels[level], ibsize);
2160
}
2161
2162
if (options->once && has_dumped(ibaddr, enable_mask))
2163
return;
2164
2165
/* 'query-compare' mode implies 'once' mode, although we need only to
2166
* process the cmdstream for *any* enable_mask mode, since we are
2167
* comparing binning vs draw reg values at the same time, ie. it is
2168
* not useful to process the same draw in both binning and draw pass.
2169
*/
2170
if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2171
return;
2172
2173
/* map gpuaddr back to hostptr: */
2174
ptr = hostptr(ibaddr);
2175
2176
if (ptr) {
2177
/* If the GPU hung within the target IB, the trigger point will be
2178
* just after the current CP_INDIRECT_BUFFER. Because the IB is
2179
* executed but never returns. Account for this by checking if
2180
* the IB returned:
2181
*/
2182
highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
2183
2184
ib++;
2185
ibs[ib].base = ibaddr;
2186
ibs[ib].size = ibsize;
2187
2188
dump_commands(ptr, ibsize, level);
2189
ib--;
2190
} else {
2191
fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2192
}
2193
}
2194
2195
static void
2196
cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2197
{
2198
needs_wfi = false;
2199
}
2200
2201
static void
2202
cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2203
{
2204
if (quiet(2))
2205
return;
2206
2207
if (is_64b()) {
2208
uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2209
printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2210
dump_hex(&dwords[2], sizedwords - 2, level + 1);
2211
2212
if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2213
dump_commands(&dwords[2], sizedwords - 2, level + 1);
2214
} else {
2215
uint32_t gpuaddr = dwords[0];
2216
printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2217
dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2218
}
2219
}
2220
2221
static void
2222
cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2223
{
2224
uint32_t val = dwords[0] & 0xffff;
2225
uint32_t and = dwords[1];
2226
uint32_t or = dwords[2];
2227
printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2228
and, or);
2229
if (needs_wfi)
2230
printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2231
and, or);
2232
reg_set(val, (reg_val(val) & and) | or);
2233
}
2234
2235
static void
2236
cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2237
{
2238
uint32_t val = dwords[0] & 0xffff;
2239
printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2240
2241
if (quiet(2))
2242
return;
2243
2244
uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2245
printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2246
void *ptr = hostptr(gpuaddr);
2247
if (ptr) {
2248
uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2249
dump_hex(ptr, cnt, level + 1);
2250
}
2251
}
2252
2253
struct draw_state {
2254
uint16_t enable_mask;
2255
uint16_t flags;
2256
uint32_t count;
2257
uint64_t addr;
2258
};
2259
2260
struct draw_state state[32];
2261
2262
#define FLAG_DIRTY 0x1
2263
#define FLAG_DISABLE 0x2
2264
#define FLAG_DISABLE_ALL_GROUPS 0x4
2265
#define FLAG_LOAD_IMMED 0x8
2266
2267
static int draw_mode;
2268
2269
static void
2270
disable_group(unsigned group_id)
2271
{
2272
struct draw_state *ds = &state[group_id];
2273
memset(ds, 0, sizeof(*ds));
2274
}
2275
2276
static void
2277
disable_all_groups(void)
2278
{
2279
for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2280
disable_group(i);
2281
}
2282
2283
static void
2284
load_group(unsigned group_id, int level)
2285
{
2286
struct draw_state *ds = &state[group_id];
2287
2288
if (!ds->count)
2289
return;
2290
2291
printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2292
printl(2, "%scount: %d\n", levels[level], ds->count);
2293
printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2294
printl(2, "%sflags: %x\n", levels[level], ds->flags);
2295
2296
if (options->gpu_id >= 600) {
2297
printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2298
2299
if (!(ds->enable_mask & enable_mask)) {
2300
printl(2, "%s\tskipped!\n\n", levels[level]);
2301
return;
2302
}
2303
}
2304
2305
void *ptr = hostptr(ds->addr);
2306
if (ptr) {
2307
if (!quiet(2))
2308
dump_hex(ptr, ds->count, level + 1);
2309
2310
ib++;
2311
dump_commands(ptr, ds->count, level + 1);
2312
ib--;
2313
}
2314
}
2315
2316
static void
2317
load_all_groups(int level)
2318
{
2319
/* sanity check, we should never recursively hit recursion here, and if
2320
* we do bad things happen:
2321
*/
2322
static bool loading_groups = false;
2323
if (loading_groups) {
2324
printf("ERROR: nothing in draw state should trigger recursively loading "
2325
"groups!\n");
2326
return;
2327
}
2328
loading_groups = true;
2329
for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2330
load_group(i, level);
2331
loading_groups = false;
2332
2333
/* in 'query-compare' mode, defer disabling all groups until we have a
2334
* chance to process the query:
2335
*/
2336
if (!options->query_compare)
2337
disable_all_groups();
2338
}
2339
2340
static void
2341
cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2342
{
2343
uint32_t i;
2344
2345
for (i = 0; i < sizedwords;) {
2346
struct draw_state *ds;
2347
uint32_t count = dwords[i] & 0xffff;
2348
uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2349
uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2350
uint32_t flags = (dwords[i] >> 16) & 0xf;
2351
uint64_t addr;
2352
2353
if (is_64b()) {
2354
addr = dwords[i + 1];
2355
addr |= ((uint64_t)dwords[i + 2]) << 32;
2356
i += 3;
2357
} else {
2358
addr = dwords[i + 1];
2359
i += 2;
2360
}
2361
2362
if (flags & FLAG_DISABLE_ALL_GROUPS) {
2363
disable_all_groups();
2364
continue;
2365
}
2366
2367
if (flags & FLAG_DISABLE) {
2368
disable_group(group_id);
2369
continue;
2370
}
2371
2372
assert(group_id < ARRAY_SIZE(state));
2373
disable_group(group_id);
2374
2375
ds = &state[group_id];
2376
2377
ds->enable_mask = enable_mask;
2378
ds->flags = flags;
2379
ds->count = count;
2380
ds->addr = addr;
2381
2382
if (flags & FLAG_LOAD_IMMED) {
2383
load_group(group_id, level);
2384
disable_group(group_id);
2385
}
2386
}
2387
}
2388
2389
static void
2390
cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2391
{
2392
draw_mode = dwords[0];
2393
}
2394
2395
/* execute compute shader */
2396
static void
2397
cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2398
{
2399
do_query("compute", 0);
2400
dump_register_summary(level);
2401
}
2402
2403
static void
2404
cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2405
{
2406
uint64_t addr;
2407
2408
if (is_64b()) {
2409
addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2410
} else {
2411
addr = dwords[1];
2412
}
2413
2414
printl(3, "%saddr: %016llx\n", levels[level], addr);
2415
dump_gpuaddr_size(addr, level, 0x10, 2);
2416
2417
do_query("compute", 0);
2418
dump_register_summary(level);
2419
}
2420
2421
static void
2422
cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2423
{
2424
render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf);
2425
2426
if (!strcmp(render_mode, "RM6_BINNING")) {
2427
enable_mask = MODE_BINNING;
2428
} else if (!strcmp(render_mode, "RM6_GMEM")) {
2429
enable_mask = MODE_GMEM;
2430
} else if (!strcmp(render_mode, "RM6_BYPASS")) {
2431
enable_mask = MODE_BYPASS;
2432
}
2433
}
2434
2435
static void
2436
cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2437
{
2438
uint64_t addr;
2439
uint32_t *ptr, len;
2440
2441
assert(is_64b());
2442
2443
/* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2444
* not sure if this can come in different sizes.
2445
*
2446
* First ptr doesn't seem to be cmdstream, second one does.
2447
*
2448
* Comment from downstream kernel:
2449
*
2450
* SRM -- set render mode (ex binning, direct render etc)
2451
* SRM is set by UMD usually at start of IB to tell CP the type of
2452
* preemption.
2453
* KMD needs to set SRM to NULL to indicate CP that rendering is
2454
* done by IB.
2455
* ------------------------------------------------------------------
2456
*
2457
* Seems to always be one of these two:
2458
* 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2459
* 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2460
* 001c2000 00000000
2461
*
2462
*/
2463
2464
assert(options->gpu_id >= 500);
2465
2466
render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2467
2468
if (sizedwords == 1)
2469
return;
2470
2471
addr = dwords[1];
2472
addr |= ((uint64_t)dwords[2]) << 32;
2473
2474
mode = dwords[3];
2475
2476
dump_gpuaddr(addr, level + 1);
2477
2478
if (sizedwords == 5)
2479
return;
2480
2481
assert(sizedwords == 8);
2482
2483
len = dwords[5];
2484
addr = dwords[6];
2485
addr |= ((uint64_t)dwords[7]) << 32;
2486
2487
printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2488
printl(3, "%slen: 0x%x\n", levels[level], len);
2489
2490
ptr = hostptr(addr);
2491
2492
if (ptr) {
2493
if (!quiet(2)) {
2494
ib++;
2495
dump_commands(ptr, len, level + 1);
2496
ib--;
2497
dump_hex(ptr, len, level + 1);
2498
}
2499
}
2500
}
2501
2502
static void
2503
cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2504
{
2505
uint64_t addr;
2506
uint32_t *ptr, len;
2507
2508
assert(is_64b());
2509
assert(options->gpu_id >= 500);
2510
2511
assert(sizedwords == 8);
2512
2513
addr = dwords[5];
2514
addr |= ((uint64_t)dwords[6]) << 32;
2515
len = dwords[7];
2516
2517
printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2518
printl(3, "%slen: 0x%x\n", levels[level], len);
2519
2520
ptr = hostptr(addr);
2521
2522
if (ptr) {
2523
if (!quiet(2)) {
2524
ib++;
2525
dump_commands(ptr, len, level + 1);
2526
ib--;
2527
dump_hex(ptr, len, level + 1);
2528
}
2529
}
2530
}
2531
2532
static void
2533
cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2534
{
2535
do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2536
print_mode(level);
2537
dump_register_summary(level);
2538
}
2539
2540
static void
2541
cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2542
{
2543
int i;
2544
2545
/* NOTE: seems to write same reg multiple times.. not sure if different parts
2546
* of these are triggered by the FLUSH_SO_n events?? (if that is what they
2547
* actually are?)
2548
*/
2549
bool saved_summary = summary;
2550
summary = false;
2551
2552
for (i = 0; i < sizedwords; i += 2) {
2553
dump_register(dwords[i + 0], dwords[i + 1], level + 1);
2554
reg_set(dwords[i + 0], dwords[i + 1]);
2555
}
2556
2557
summary = saved_summary;
2558
}
2559
2560
static void
2561
cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2562
{
2563
uint32_t reg = dwords[1] & 0xffff;
2564
2565
dump_register(reg, dwords[2], level + 1);
2566
reg_set(reg, dwords[2]);
2567
}
2568
2569
static void
2570
cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2571
{
2572
uint64_t addr;
2573
uint32_t size = dwords[2] & 0xffff;
2574
void *ptr;
2575
2576
addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2577
2578
printf("addr=%" PRIx64 "\n", addr);
2579
ptr = hostptr(addr);
2580
if (ptr) {
2581
dump_commands(ptr, size, level + 1);
2582
}
2583
}
2584
2585
static void
2586
cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2587
{
2588
skip_ib2_enable_global = dwords[0];
2589
}
2590
2591
static void
2592
cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2593
{
2594
skip_ib2_enable_local = dwords[0];
2595
}
2596
2597
#define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2598
static const struct type3_op {
2599
const char *name;
2600
void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2601
struct {
2602
bool load_all_groups;
2603
} options;
2604
} type3_op[] = {
2605
CP(NOP, cp_nop),
2606
CP(INDIRECT_BUFFER, cp_indirect),
2607
CP(INDIRECT_BUFFER_PFD, cp_indirect),
2608
CP(WAIT_FOR_IDLE, cp_wfi),
2609
CP(REG_RMW, cp_rmw),
2610
CP(REG_TO_MEM, cp_reg_mem),
2611
CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2612
CP(MEM_WRITE, cp_mem_write),
2613
CP(EVENT_WRITE, cp_event_write),
2614
CP(RUN_OPENCL, cp_run_cl),
2615
CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2616
CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2617
CP(SET_CONSTANT, cp_set_const),
2618
CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2619
CP(WIDE_REG_WRITE, cp_wide_reg_write),
2620
2621
/* for a3xx */
2622
CP(LOAD_STATE, cp_load_state),
2623
CP(SET_BIN, cp_set_bin),
2624
2625
/* for a4xx */
2626
CP(LOAD_STATE4, cp_load_state),
2627
CP(SET_DRAW_STATE, cp_set_draw_state),
2628
CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2629
CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2630
CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2631
2632
/* for a5xx */
2633
CP(SET_RENDER_MODE, cp_set_render_mode),
2634
CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2635
CP(BLIT, cp_blit),
2636
CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2637
CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2638
CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2639
CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2640
CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2641
CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2642
2643
/* for a6xx */
2644
CP(LOAD_STATE6_GEOM, cp_load_state),
2645
CP(LOAD_STATE6_FRAG, cp_load_state),
2646
CP(LOAD_STATE6, cp_load_state),
2647
CP(SET_MODE, cp_set_mode),
2648
CP(SET_MARKER, cp_set_marker),
2649
CP(REG_WRITE, cp_reg_write),
2650
2651
CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2652
};
2653
2654
static void
2655
noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2656
{
2657
}
2658
2659
static const struct type3_op *
2660
get_type3_op(unsigned opc)
2661
{
2662
static const struct type3_op dummy_op = {
2663
.fxn = noop_fxn,
2664
};
2665
const char *name = pktname(opc);
2666
2667
if (!name)
2668
return &dummy_op;
2669
2670
for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2671
if (!strcmp(name, type3_op[i].name))
2672
return &type3_op[i];
2673
2674
return &dummy_op;
2675
}
2676
2677
void
2678
dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2679
{
2680
int dwords_left = sizedwords;
2681
uint32_t count = 0; /* dword count including packet header */
2682
uint32_t val;
2683
2684
// assert(dwords);
2685
if (!dwords) {
2686
printf("NULL cmd buffer!\n");
2687
return;
2688
}
2689
2690
assert(ib < ARRAY_SIZE(draws));
2691
draws[ib] = 0;
2692
2693
while (dwords_left > 0) {
2694
2695
current_draw_count = draw_count;
2696
2697
/* hack, this looks like a -1 underflow, in some versions
2698
* when it tries to write zero registers via pkt0
2699
*/
2700
// if ((dwords[0] >> 16) == 0xffff)
2701
// goto skip;
2702
2703
if (pkt_is_type0(dwords[0])) {
2704
printl(3, "t0");
2705
count = type0_pkt_size(dwords[0]) + 1;
2706
val = type0_pkt_offset(dwords[0]);
2707
assert(val < regcnt());
2708
printl(3, "%swrite %s%s (%04x)\n", levels[level + 1], regname(val, 1),
2709
(dwords[0] & 0x8000) ? " (same register)" : "", val);
2710
dump_registers(val, dwords + 1, count - 1, level + 2);
2711
if (!quiet(3))
2712
dump_hex(dwords, count, level + 1);
2713
} else if (pkt_is_type4(dwords[0])) {
2714
/* basically the same(ish) as type0 prior to a5xx */
2715
printl(3, "t4");
2716
count = type4_pkt_size(dwords[0]) + 1;
2717
val = type4_pkt_offset(dwords[0]);
2718
assert(val < regcnt());
2719
printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
2720
val);
2721
dump_registers(val, dwords + 1, count - 1, level + 2);
2722
if (!quiet(3))
2723
dump_hex(dwords, count, level + 1);
2724
#if 0
2725
} else if (pkt_is_type1(dwords[0])) {
2726
printl(3, "t1");
2727
count = 3;
2728
val = dwords[0] & 0xfff;
2729
printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2730
dump_registers(val, dwords+1, 1, level+2);
2731
val = (dwords[0] >> 12) & 0xfff;
2732
printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
2733
dump_registers(val, dwords+2, 1, level+2);
2734
if (!quiet(3))
2735
dump_hex(dwords, count, level+1);
2736
} else if (pkt_is_type2(dwords[0])) {
2737
printl(3, "t2");
2738
printf("%sNOP\n", levels[level+1]);
2739
count = 1;
2740
if (!quiet(3))
2741
dump_hex(dwords, count, level+1);
2742
#endif
2743
} else if (pkt_is_type3(dwords[0])) {
2744
count = type3_pkt_size(dwords[0]) + 1;
2745
val = cp_type3_opcode(dwords[0]);
2746
const struct type3_op *op = get_type3_op(val);
2747
if (op->options.load_all_groups)
2748
load_all_groups(level + 1);
2749
printl(3, "t3");
2750
const char *name = pktname(val);
2751
if (!quiet(2)) {
2752
printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
2753
rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2754
count, (dwords[0] & 0x1) ? " (predicated)" : "");
2755
}
2756
if (name)
2757
dump_domain(dwords + 1, count - 1, level + 2, name);
2758
op->fxn(dwords + 1, count - 1, level + 1);
2759
if (!quiet(2))
2760
dump_hex(dwords, count, level + 1);
2761
} else if (pkt_is_type7(dwords[0])) {
2762
count = type7_pkt_size(dwords[0]) + 1;
2763
val = cp_type7_opcode(dwords[0]);
2764
const struct type3_op *op = get_type3_op(val);
2765
if (op->options.load_all_groups)
2766
load_all_groups(level + 1);
2767
printl(3, "t7");
2768
const char *name = pktname(val);
2769
if (!quiet(2)) {
2770
printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
2771
rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
2772
count);
2773
}
2774
if (name) {
2775
/* special hack for two packets that decode the same way
2776
* on a6xx:
2777
*/
2778
if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
2779
!strcmp(name, "CP_LOAD_STATE6_GEOM"))
2780
name = "CP_LOAD_STATE6";
2781
dump_domain(dwords + 1, count - 1, level + 2, name);
2782
}
2783
op->fxn(dwords + 1, count - 1, level + 1);
2784
if (!quiet(2))
2785
dump_hex(dwords, count, level + 1);
2786
} else if (pkt_is_type2(dwords[0])) {
2787
printl(3, "t2");
2788
printl(3, "%snop\n", levels[level + 1]);
2789
} else {
2790
/* for 5xx+ we can do a passable job of looking for start of next valid
2791
* packet: */
2792
if (options->gpu_id >= 500) {
2793
while (dwords_left > 0) {
2794
if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
2795
break;
2796
printf("bad type! %08x\n", dwords[0]);
2797
dwords++;
2798
dwords_left--;
2799
}
2800
} else {
2801
printf("bad type! %08x\n", dwords[0]);
2802
return;
2803
}
2804
}
2805
2806
dwords += count;
2807
dwords_left -= count;
2808
}
2809
2810
if (dwords_left < 0)
2811
printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
2812
}
2813
2814