Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/decode/crashdec.c
4565 views
1
/*
2
* Copyright © 2020 Google, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
/*
25
* Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang,
26
* the coredump should be found in:
27
*
28
* /sys/class/devcoredump/devcd<n>/data
29
*
30
* The crashdump will hang around for 5min, it can be cleared by writing to
31
* the file, ie:
32
*
33
* echo 1 > /sys/class/devcoredump/devcd<n>/data
34
*
35
* (the driver won't log any new crashdumps until the previous one is cleared
36
* or times out after 5min)
37
*/
38
39
#include <assert.h>
40
#include <getopt.h>
41
#include <inttypes.h>
42
#include <stdarg.h>
43
#include <stdbool.h>
44
#include <stdint.h>
45
#include <stdio.h>
46
#include <stdlib.h>
47
#include <string.h>
48
#include <unistd.h>
49
50
#include "freedreno_pm4.h"
51
52
#include "ir3/instr-a3xx.h"
53
#include "buffers.h"
54
#include "cffdec.h"
55
#include "disasm.h"
56
#include "pager.h"
57
#include "rnnutil.h"
58
#include "util.h"
59
60
static FILE *in;
61
static bool verbose;
62
63
static struct rnn *rnn_gmu;
64
static struct rnn *rnn_control;
65
static struct rnn *rnn_pipe;
66
67
static struct cffdec_options options = {
68
.draw_filter = -1,
69
};
70
71
static inline bool
72
is_a6xx(void)
73
{
74
return (600 <= options.gpu_id) && (options.gpu_id < 700);
75
}
76
static inline bool
77
is_a5xx(void)
78
{
79
return (500 <= options.gpu_id) && (options.gpu_id < 600);
80
}
81
static inline bool
82
is_64b(void)
83
{
84
return options.gpu_id >= 500;
85
}
86
87
/*
88
* Helpers to read register values:
89
*/
90
91
/* read registers that are 64b on 64b GPUs (ie. a5xx+) */
92
static uint64_t
93
regval64(const char *name)
94
{
95
unsigned reg = regbase(name);
96
assert(reg);
97
uint64_t val = reg_val(reg);
98
if (is_64b())
99
val |= ((uint64_t)reg_val(reg + 1)) << 32;
100
return val;
101
}
102
103
static uint32_t
104
regval(const char *name)
105
{
106
unsigned reg = regbase(name);
107
assert(reg);
108
return reg_val(reg);
109
}
110
111
/*
112
* Line reading and string helpers:
113
*/
114
115
static char *
116
replacestr(char *line, const char *find, const char *replace)
117
{
118
char *tail, *s;
119
120
if (!(s = strstr(line, find)))
121
return line;
122
123
tail = s + strlen(find);
124
125
char *newline;
126
asprintf(&newline, "%.*s%s%s", (int)(s - line), line, replace, tail);
127
free(line);
128
129
return newline;
130
}
131
132
static char *lastline;
133
static char *pushedline;
134
135
static const char *
136
popline(void)
137
{
138
char *r = pushedline;
139
140
if (r) {
141
pushedline = NULL;
142
return r;
143
}
144
145
free(lastline);
146
147
size_t n = 0;
148
if (getline(&r, &n, in) < 0)
149
exit(0);
150
151
/* Handle section name typo's from earlier kernels: */
152
r = replacestr(r, "CP_MEMPOOOL", "CP_MEMPOOL");
153
r = replacestr(r, "CP_SEQ_STAT", "CP_SQE_STAT");
154
155
lastline = r;
156
return r;
157
}
158
159
static void
160
pushline(void)
161
{
162
assert(!pushedline);
163
pushedline = lastline;
164
}
165
166
static uint32_t *
167
popline_ascii85(uint32_t sizedwords)
168
{
169
const char *line = popline();
170
171
/* At this point we exepct the ascii85 data to be indented *some*
172
* amount, and to terminate at the end of the line. So just eat
173
* up the leading whitespace.
174
*/
175
assert(*line == ' ');
176
while (*line == ' ')
177
line++;
178
179
uint32_t *buf = calloc(1, 4 * sizedwords);
180
int idx = 0;
181
182
while (*line != '\n') {
183
if (*line == 'z') {
184
buf[idx++] = 0;
185
line++;
186
continue;
187
}
188
189
uint32_t accum = 0;
190
for (int i = 0; (i < 5) && (*line != '\n'); i++) {
191
accum *= 85;
192
accum += *line - '!';
193
line++;
194
}
195
196
buf[idx++] = accum;
197
}
198
199
return buf;
200
}
201
202
static bool
203
startswith(const char *line, const char *start)
204
{
205
return strstr(line, start) == line;
206
}
207
208
static void
209
parseline(const char *line, const char *fmt, ...)
210
{
211
int fmtlen = strlen(fmt);
212
int n = 0;
213
int l = 0;
214
215
/* scan fmt string to extract expected # of conversions: */
216
for (int i = 0; i < fmtlen; i++) {
217
if (fmt[i] == '%') {
218
if (i == (l - 1)) { /* prev char was %, ie. we have %% */
219
n--;
220
l = 0;
221
} else {
222
n++;
223
l = i;
224
}
225
}
226
}
227
228
va_list ap;
229
va_start(ap, fmt);
230
if (vsscanf(line, fmt, ap) != n) {
231
fprintf(stderr, "parse error scanning: '%s'\n", fmt);
232
exit(1);
233
}
234
va_end(ap);
235
}
236
237
#define foreach_line_in_section(_line) \
238
for (const char *_line = popline(); _line; _line = popline()) \
239
/* check for start of next section */ \
240
if (_line[0] != ' ') { \
241
pushline(); \
242
break; \
243
} else
244
245
/*
246
* Decode ringbuffer section:
247
*/
248
249
static struct {
250
uint64_t iova;
251
uint32_t rptr;
252
uint32_t wptr;
253
uint32_t size;
254
uint32_t *buf;
255
} ringbuffers[5];
256
257
static void
258
decode_ringbuffer(void)
259
{
260
int id = 0;
261
262
foreach_line_in_section (line) {
263
if (startswith(line, " - id:")) {
264
parseline(line, " - id: %d", &id);
265
assert(id < ARRAY_SIZE(ringbuffers));
266
} else if (startswith(line, " iova:")) {
267
parseline(line, " iova: %" PRIx64, &ringbuffers[id].iova);
268
} else if (startswith(line, " rptr:")) {
269
parseline(line, " rptr: %d", &ringbuffers[id].rptr);
270
} else if (startswith(line, " wptr:")) {
271
parseline(line, " wptr: %d", &ringbuffers[id].wptr);
272
} else if (startswith(line, " size:")) {
273
parseline(line, " size: %d", &ringbuffers[id].size);
274
} else if (startswith(line, " data: !!ascii85 |")) {
275
ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4);
276
add_buffer(ringbuffers[id].iova, ringbuffers[id].size,
277
ringbuffers[id].buf);
278
continue;
279
}
280
281
printf("%s", line);
282
}
283
}
284
285
static bool
286
valid_header(uint32_t pkt)
287
{
288
if (options.gpu_id >= 500) {
289
return pkt_is_type4(pkt) || pkt_is_type7(pkt);
290
} else {
291
/* TODO maybe we can check validish looking pkt3 opc or pkt0
292
* register offset.. the cmds sent by kernel are usually
293
* fairly limited (other than initialization) which confines
294
* the search space a bit..
295
*/
296
return true;
297
}
298
}
299
300
static void
301
dump_cmdstream(void)
302
{
303
uint64_t rb_base = regval64("CP_RB_BASE");
304
305
printf("got rb_base=%" PRIx64 "\n", rb_base);
306
307
options.ibs[1].base = regval64("CP_IB1_BASE");
308
options.ibs[1].rem = regval("CP_IB1_REM_SIZE");
309
options.ibs[2].base = regval64("CP_IB2_BASE");
310
options.ibs[2].rem = regval("CP_IB2_REM_SIZE");
311
312
/* Adjust remaining size to account for cmdstream slurped into ROQ
313
* but not yet consumed by SQE
314
*
315
* TODO add support for earlier GPUs once we tease out the needed
316
* registers.. see crashit.c in msmtest for hints.
317
*
318
* TODO it would be nice to be able to extract out register bitfields
319
* by name rather than hard-coding this.
320
*/
321
if (is_a6xx()) {
322
options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16;
323
options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16;
324
}
325
326
printf("IB1: %" PRIx64 ", %u\n", options.ibs[1].base, options.ibs[1].rem);
327
printf("IB2: %" PRIx64 ", %u\n", options.ibs[2].base, options.ibs[2].rem);
328
329
/* now that we've got the regvals we want, reset register state
330
* so we aren't seeing values from decode_registers();
331
*/
332
reset_regs();
333
334
for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) {
335
if (ringbuffers[id].iova != rb_base)
336
continue;
337
if (!ringbuffers[id].size)
338
continue;
339
340
printf("found ring!\n");
341
342
/* The kernel level ringbuffer (RB) wraps around, which
343
* cffdec doesn't really deal with.. so figure out how
344
* many dwords are unread
345
*/
346
unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */
347
348
/* helper macro to deal with modulo size math: */
349
#define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
350
351
/* The rptr will (most likely) have moved past the IB to
352
* userspace cmdstream, so back up a bit, and then advance
353
* until we find a valid start of a packet.. this is going
354
* to be less reliable on a4xx and before (pkt0/pkt3),
355
* compared to pkt4/pkt7 with parity bits
356
*/
357
const int lookback = 12;
358
unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback);
359
360
for (int idx = 0; idx < lookback; idx++) {
361
if (valid_header(ringbuffers[id].buf[rptr]))
362
break;
363
rptr = mod_add(rptr, 1);
364
}
365
366
unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr);
367
368
printf("got cmdszdw=%d\n", cmdszdw);
369
uint32_t *buf = malloc(cmdszdw * 4);
370
371
for (int idx = 0; idx < cmdszdw; idx++) {
372
int p = mod_add(rptr, idx);
373
buf[idx] = ringbuffers[id].buf[p];
374
}
375
376
dump_commands(buf, cmdszdw, 0);
377
free(buf);
378
}
379
}
380
381
/*
382
* Decode 'bos' (buffers) section:
383
*/
384
385
static void
386
decode_bos(void)
387
{
388
uint32_t size = 0;
389
uint64_t iova = 0;
390
391
foreach_line_in_section (line) {
392
if (startswith(line, " - iova:")) {
393
parseline(line, " - iova: %" PRIx64, &iova);
394
} else if (startswith(line, " size:")) {
395
parseline(line, " size: %u", &size);
396
} else if (startswith(line, " data: !!ascii85 |")) {
397
uint32_t *buf = popline_ascii85(size / 4);
398
399
if (verbose)
400
dump_hex_ascii(buf, size, 1);
401
402
add_buffer(iova, size, buf);
403
404
continue;
405
}
406
407
printf("%s", line);
408
}
409
}
410
411
/*
412
* Decode registers section:
413
*/
414
415
static void
416
dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
417
{
418
struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
419
if (info && info->typeinfo) {
420
char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
421
printf("%s: %s\n", info->name, decoded);
422
} else if (info) {
423
printf("%s: %08x\n", info->name, value);
424
} else {
425
printf("<%04x>: %08x\n", offset, value);
426
}
427
}
428
429
static void
430
decode_gmu_registers(void)
431
{
432
foreach_line_in_section (line) {
433
uint32_t offset, value;
434
parseline(line, " - { offset: %x, value: %x }", &offset, &value);
435
436
printf("\t%08x\t", value);
437
dump_register(rnn_gmu, offset / 4, value);
438
}
439
}
440
441
static void
442
decode_registers(void)
443
{
444
foreach_line_in_section (line) {
445
uint32_t offset, value;
446
parseline(line, " - { offset: %x, value: %x }", &offset, &value);
447
448
reg_set(offset / 4, value);
449
printf("\t%08x", value);
450
dump_register_val(offset / 4, value, 0);
451
}
452
}
453
454
/* similar to registers section, but for banked context regs: */
455
static void
456
decode_clusters(void)
457
{
458
foreach_line_in_section (line) {
459
if (startswith(line, " - cluster-name:") ||
460
startswith(line, " - context:")) {
461
printf("%s", line);
462
continue;
463
}
464
465
uint32_t offset, value;
466
parseline(line, " - { offset: %x, value: %x }", &offset, &value);
467
468
printf("\t%08x", value);
469
dump_register_val(offset / 4, value, 0);
470
}
471
}
472
473
/*
474
* Decode indexed-registers.. these aren't like normal registers, but a
475
* sort of FIFO where successive reads pop out associated debug state.
476
*/
477
478
static void
479
dump_cp_sqe_stat(uint32_t *stat)
480
{
481
printf("\t PC: %04x\n", stat[0]);
482
stat++;
483
484
if (is_a6xx() && valid_header(stat[0])) {
485
if (pkt_is_type7(stat[0])) {
486
unsigned opc = cp_type7_opcode(stat[0]);
487
const char *name = pktname(opc);
488
if (name)
489
printf("\tPKT: %s\n", name);
490
} else {
491
/* Not sure if this case can happen: */
492
}
493
}
494
495
for (int i = 0; i < 16; i++) {
496
printf("\t$%02x: %08x\t\t$%02x: %08x\n", i + 1, stat[i], i + 16 + 1,
497
stat[i + 16]);
498
}
499
}
500
501
static void
502
dump_control_regs(uint32_t *regs)
503
{
504
if (!rnn_control)
505
return;
506
507
/* Control regs 0x100-0x17f are a scratch space to be used by the
508
* firmware however it wants, unlike lower regs which involve some
509
* fixed-function units. Therefore only these registers get dumped
510
* directly.
511
*/
512
for (uint32_t i = 0; i < 0x80; i++) {
513
printf("\t%08x\t", regs[i]);
514
dump_register(rnn_control, i + 0x100, regs[i]);
515
}
516
}
517
518
static void
519
dump_cp_ucode_dbg(uint32_t *dbg)
520
{
521
/* Notes on the data:
522
* There seems to be a section every 4096 DWORD's. The sections aren't
523
* all the same size, so the rest of the 4096 DWORD's are filled with
524
* mirrors of the actual data.
525
*/
526
527
for (int section = 0; section < 6; section++, dbg += 0x1000) {
528
switch (section) {
529
case 0:
530
/* Contains scattered data from a630_sqe.fw: */
531
printf("\tSQE instruction cache:\n");
532
dump_hex_ascii(dbg, 4 * 0x400, 1);
533
break;
534
case 1:
535
printf("\tUnknown 1:\n");
536
dump_hex_ascii(dbg, 4 * 0x80, 1);
537
break;
538
case 2:
539
printf("\tUnknown 2:\n");
540
dump_hex_ascii(dbg, 4 * 0x200, 1);
541
break;
542
case 3:
543
printf("\tUnknown 3:\n");
544
dump_hex_ascii(dbg, 4 * 0x80, 1);
545
break;
546
case 4:
547
/* Don't bother printing this normally */
548
if (verbose) {
549
printf("\tSQE packet jumptable contents:\n");
550
dump_hex_ascii(dbg, 4 * 0x80, 1);
551
}
552
break;
553
case 5:
554
printf("\tSQE scratch control regs:\n");
555
dump_control_regs(dbg);
556
break;
557
}
558
}
559
}
560
561
static void
562
dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context,
563
bool pipe)
564
{
565
if (pipe) {
566
struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
567
printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
568
569
if (!strcmp(info->typeinfo->name, "void")) {
570
/* registers that ignore their payload */
571
} else {
572
printf("\t\t\t");
573
dump_register(rnn_pipe, reg, data);
574
}
575
} else {
576
printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
577
dump_register_val(reg, data, 2);
578
}
579
}
580
581
static void
582
dump_mem_pool_chunk(const uint32_t *chunk)
583
{
584
struct __attribute__((packed)) {
585
bool reg0_enabled : 1;
586
bool reg1_enabled : 1;
587
uint32_t data0 : 32;
588
uint32_t data1 : 32;
589
uint32_t reg0 : 18;
590
uint32_t reg1 : 18;
591
bool reg0_pipe : 1;
592
bool reg1_pipe : 1;
593
uint32_t reg0_context : 1;
594
uint32_t reg1_context : 1;
595
uint32_t padding : 22;
596
} fields;
597
598
memcpy(&fields, chunk, 4 * sizeof(uint32_t));
599
600
if (fields.reg0_enabled) {
601
dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context,
602
fields.reg0_pipe);
603
}
604
605
if (fields.reg1_enabled) {
606
dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context,
607
fields.reg1_pipe);
608
}
609
}
610
611
static void
612
dump_cp_mem_pool(uint32_t *mempool)
613
{
614
/* The mem pool is a shared pool of memory used for storing in-flight
615
* register writes. There are 6 different queues, one for each
616
* cluster. Writing to $data (or for some special registers, $addr)
617
* pushes data onto the appropriate queue, and each queue is pulled
618
* from by the appropriate cluster. The queues are thus written to
619
* in-order, but may be read out-of-order.
620
*
621
* The queues are conceptually divided into 128-bit "chunks", and the
622
* read and write pointers are in units of chunks. These chunks are
623
* organized internally into 8-chunk "blocks", and memory is allocated
624
* dynamically in terms of blocks. Each queue is represented as a
625
* singly-linked list of blocks, as well as 3-bit start/end chunk
626
* pointers that point within the first/last block. The next pointers
627
* are located in a separate array, rather than inline.
628
*/
629
630
/* TODO: The firmware CP_MEM_POOL save/restore routines do something
631
* like:
632
*
633
* cread $02, [ $00 + 0 ]
634
* and $02, $02, 0x118
635
* ...
636
* brne $02, 0, #label
637
* mov $03, 0x2000
638
* mov $03, 0x1000
639
* label:
640
* ...
641
*
642
* I think that control register 0 is the GPU version, and some
643
* versions have a smaller mem pool. It seems some models have a mem
644
* pool that's half the size, and a bunch of offsets are shifted
645
* accordingly. Unfortunately the kernel driver's dumping code doesn't
646
* seem to take this into account, even the downstream android driver,
647
* and we don't know which versions 0x8, 0x10, or 0x100 correspond
648
* to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
649
*/
650
bool small_mem_pool = false;
651
652
/* The array of next pointers for each block. */
653
const uint32_t *next_pointers =
654
small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
655
656
/* Maximum number of blocks in the pool, also the size of the pointers
657
* array.
658
*/
659
const int num_blocks = small_mem_pool ? 0x30 : 0x80;
660
661
/* Number of queues */
662
const unsigned num_queues = 6;
663
664
/* Unfortunately the per-queue state is a little more complicated than
665
* a simple pair of begin/end pointers. Instead of a single beginning
666
* block, there are *two*, with the property that either the two are
667
* equal or the second is the "next" of the first. Similarly there are
668
* two end blocks. Thus the queue either looks like this:
669
*
670
* A -> B -> ... -> C -> D
671
*
672
* Or like this, or some combination:
673
*
674
* A/B -> ... -> C/D
675
*
676
* However, there's only one beginning/end chunk offset. Now the
677
* question is, which of A or B is the actual start? I.e. is the chunk
678
* offset an offset inside A or B? It depends. I'll show a typical read
679
* cycle, starting here (read pointer marked with a *) with a chunk
680
* offset of 0:
681
*
682
* A B
683
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
684
* |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
685
*
686
* Once the pointer advances far enough, the hardware decides to free
687
* A, after which the read-side state looks like:
688
*
689
* (free) A/B
690
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
691
* |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
692
*
693
* Then after advancing the pointer a bit more, the hardware fetches
694
* the "next" pointer for A and stores it in B:
695
*
696
* (free) A B
697
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
698
* |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
699
*
700
* Then the read pointer advances into B, at which point we've come
701
* back to the first state having advanced a whole block:
702
*
703
* (free) A B
704
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
705
* |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
706
*
707
*
708
* There is a similar cycle for the write pointer. Now, the question
709
* is, how do we know which state we're in? We need to know this to
710
* know whether the pointer (*) is in A or B if they're different. It
711
* seems like there should be some bit somewhere describing this, but
712
* after lots of experimentation I've come up empty-handed. For now we
713
* assume that if the pointer is in the first half, then we're in
714
* either the first or second state and use B, and otherwise we're in
715
* the second or third state and use A. So far I haven't seen anything
716
* that violates this assumption.
717
*/
718
719
struct {
720
uint32_t unk0;
721
uint32_t padding0[7]; /* Mirrors of unk0 */
722
723
struct {
724
uint32_t chunk : 3;
725
uint32_t first_block : 32 - 3;
726
} writer[6];
727
uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
728
729
uint32_t unk1;
730
uint32_t padding2[7]; /* Mirrors of unk1 */
731
732
uint32_t writer_second_block[6];
733
uint32_t padding3[2];
734
735
uint32_t unk2[6];
736
uint32_t padding4[2];
737
738
struct {
739
uint32_t chunk : 3;
740
uint32_t first_block : 32 - 3;
741
} reader[6];
742
uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
743
744
uint32_t unk3;
745
uint32_t padding6[7]; /* Mirrors of unk3 */
746
747
uint32_t reader_second_block[6];
748
uint32_t padding7[2];
749
750
uint32_t block_count[6];
751
uint32_t padding[2];
752
753
uint32_t unk4;
754
uint32_t padding9[7]; /* Mirrors of unk4 */
755
} data1;
756
757
const uint32_t *data1_ptr =
758
small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
759
memcpy(&data1, data1_ptr, sizeof(data1));
760
761
/* Based on the kernel, the first dword is the mem pool size (in
762
* blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
763
*/
764
const uint32_t *data2_ptr =
765
small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
766
const int data2_size = 0x60;
767
768
/* This seems to be the size of each queue in chunks. */
769
const uint32_t *queue_sizes = &data2_ptr[0x18];
770
771
printf("\tdata2:\n");
772
dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
773
774
/* These seem to be some kind of counter of allocated/deallocated blocks */
775
if (verbose) {
776
printf("\tunk0: %x\n", data1.unk0);
777
printf("\tunk1: %x\n", data1.unk1);
778
printf("\tunk3: %x\n", data1.unk3);
779
printf("\tunk4: %x\n\n", data1.unk4);
780
}
781
782
for (int queue = 0; queue < num_queues; queue++) {
783
const char *cluster_names[6] = {"FE", "SP_VS", "PC_VS",
784
"GRAS", "SP_PS", "PS"};
785
printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
786
787
if (verbose) {
788
printf("\t\twriter_first_block: 0x%x\n",
789
data1.writer[queue].first_block);
790
printf("\t\twriter_second_block: 0x%x\n",
791
data1.writer_second_block[queue]);
792
printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
793
printf("\t\treader_first_block: 0x%x\n",
794
data1.reader[queue].first_block);
795
printf("\t\treader_second_block: 0x%x\n",
796
data1.reader_second_block[queue]);
797
printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
798
printf("\t\tblock_count: %d\n", data1.block_count[queue]);
799
printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
800
printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
801
}
802
803
uint32_t cur_chunk = data1.reader[queue].chunk;
804
uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block
805
: data1.reader_second_block[queue];
806
uint32_t last_chunk = data1.writer[queue].chunk;
807
uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block
808
: data1.writer_second_block[queue];
809
810
if (verbose)
811
printf("\tblock %x\n", cur_block);
812
if (cur_block >= num_blocks) {
813
fprintf(stderr, "block %x too large\n", cur_block);
814
exit(1);
815
}
816
unsigned calculated_queue_size = 0;
817
while (cur_block != last_block || cur_chunk != last_chunk) {
818
calculated_queue_size++;
819
uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
820
821
dump_mem_pool_chunk(chunk_ptr);
822
823
printf("\t%05x: %08x %08x %08x %08x\n",
824
4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0],
825
chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
826
827
cur_chunk++;
828
if (cur_chunk == 8) {
829
cur_block = next_pointers[cur_block];
830
if (verbose)
831
printf("\tblock %x\n", cur_block);
832
if (cur_block >= num_blocks) {
833
fprintf(stderr, "block %x too large\n", cur_block);
834
exit(1);
835
}
836
cur_chunk = 0;
837
}
838
}
839
if (calculated_queue_size != queue_sizes[queue]) {
840
printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n",
841
calculated_queue_size);
842
}
843
printf("\n");
844
}
845
}
846
847
static void
848
decode_indexed_registers(void)
849
{
850
char *name = NULL;
851
uint32_t sizedwords = 0;
852
853
foreach_line_in_section (line) {
854
if (startswith(line, " - regs-name:")) {
855
free(name);
856
parseline(line, " - regs-name: %ms", &name);
857
} else if (startswith(line, " dwords:")) {
858
parseline(line, " dwords: %u", &sizedwords);
859
} else if (startswith(line, " data: !!ascii85 |")) {
860
uint32_t *buf = popline_ascii85(sizedwords);
861
862
/* some of the sections are pretty large, and are (at least
863
* so far) not useful, so skip them if not in verbose mode:
864
*/
865
bool dump = verbose || !strcmp(name, "CP_SQE_STAT") ||
866
!strcmp(name, "CP_DRAW_STATE") ||
867
!strcmp(name, "CP_ROQ") || 0;
868
869
if (!strcmp(name, "CP_SQE_STAT"))
870
dump_cp_sqe_stat(buf);
871
872
if (!strcmp(name, "CP_UCODE_DBG_DATA"))
873
dump_cp_ucode_dbg(buf);
874
875
if (!strcmp(name, "CP_MEMPOOL"))
876
dump_cp_mem_pool(buf);
877
878
if (dump)
879
dump_hex_ascii(buf, 4 * sizedwords, 1);
880
881
free(buf);
882
883
continue;
884
}
885
886
printf("%s", line);
887
}
888
}
889
890
/*
891
* Decode shader-blocks:
892
*/
893
894
static void
895
decode_shader_blocks(void)
896
{
897
char *type = NULL;
898
uint32_t sizedwords = 0;
899
900
foreach_line_in_section (line) {
901
if (startswith(line, " - type:")) {
902
free(type);
903
parseline(line, " - type: %ms", &type);
904
} else if (startswith(line, " size:")) {
905
parseline(line, " size: %u", &sizedwords);
906
} else if (startswith(line, " data: !!ascii85 |")) {
907
uint32_t *buf = popline_ascii85(sizedwords);
908
909
/* some of the sections are pretty large, and are (at least
910
* so far) not useful, so skip them if not in verbose mode:
911
*/
912
bool dump = verbose || !strcmp(type, "A6XX_SP_INST_DATA") ||
913
!strcmp(type, "A6XX_HLSQ_INST_RAM") || 0;
914
915
if (!strcmp(type, "A6XX_SP_INST_DATA") ||
916
!strcmp(type, "A6XX_HLSQ_INST_RAM")) {
917
/* TODO this section actually contains multiple shaders
918
* (or parts of shaders?), so perhaps we should search
919
* for ends of shaders and decode each?
920
*/
921
try_disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id);
922
}
923
924
if (dump)
925
dump_hex_ascii(buf, 4 * sizedwords, 1);
926
927
free(buf);
928
929
continue;
930
}
931
932
printf("%s", line);
933
}
934
935
free(type);
936
}
937
938
/*
939
* Decode debugbus section:
940
*/
941
942
static void
943
decode_debugbus(void)
944
{
945
char *block = NULL;
946
uint32_t sizedwords = 0;
947
948
foreach_line_in_section (line) {
949
if (startswith(line, " - debugbus-block:")) {
950
free(block);
951
parseline(line, " - debugbus-block: %ms", &block);
952
} else if (startswith(line, " count:")) {
953
parseline(line, " count: %u", &sizedwords);
954
} else if (startswith(line, " data: !!ascii85 |")) {
955
uint32_t *buf = popline_ascii85(sizedwords);
956
957
/* some of the sections are pretty large, and are (at least
958
* so far) not useful, so skip them if not in verbose mode:
959
*/
960
bool dump = verbose || 0;
961
962
if (dump)
963
dump_hex_ascii(buf, 4 * sizedwords, 1);
964
965
free(buf);
966
967
continue;
968
}
969
970
printf("%s", line);
971
}
972
}
973
974
/*
975
* Main crashdump decode loop:
976
*/
977
978
static void
979
decode(void)
980
{
981
const char *line;
982
983
while ((line = popline())) {
984
printf("%s", line);
985
if (startswith(line, "revision:")) {
986
parseline(line, "revision: %u", &options.gpu_id);
987
printf("Got gpu_id=%u\n", options.gpu_id);
988
989
cffdec_init(&options);
990
991
if (is_a6xx()) {
992
rnn_gmu = rnn_new(!options.color);
993
rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX");
994
rnn_control = rnn_new(!options.color);
995
rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml",
996
"A6XX_CONTROL_REG");
997
rnn_pipe = rnn_new(!options.color);
998
rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml",
999
"A6XX_PIPE_REG");
1000
} else if (is_a5xx()) {
1001
rnn_control = rnn_new(!options.color);
1002
rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml",
1003
"A5XX_CONTROL_REG");
1004
} else {
1005
rnn_control = NULL;
1006
}
1007
} else if (startswith(line, "bos:")) {
1008
decode_bos();
1009
} else if (startswith(line, "ringbuffer:")) {
1010
decode_ringbuffer();
1011
} else if (startswith(line, "registers:")) {
1012
decode_registers();
1013
1014
/* after we've recorded buffer contents, and CP register values,
1015
* we can take a stab at decoding the cmdstream:
1016
*/
1017
dump_cmdstream();
1018
} else if (startswith(line, "registers-gmu:")) {
1019
decode_gmu_registers();
1020
} else if (startswith(line, "indexed-registers:")) {
1021
decode_indexed_registers();
1022
} else if (startswith(line, "shader-blocks:")) {
1023
decode_shader_blocks();
1024
} else if (startswith(line, "clusters:")) {
1025
decode_clusters();
1026
} else if (startswith(line, "debugbus:")) {
1027
decode_debugbus();
1028
}
1029
}
1030
}
1031
1032
/*
1033
* Usage and argument parsing:
1034
*/
1035
1036
static void
1037
usage(void)
1038
{
1039
/* clang-format off */
1040
fprintf(stderr, "Usage:\n\n"
1041
"\tcrashdec [-achmsv] [-f FILE]\n\n"
1042
"Options:\n"
1043
"\t-a, --allregs - show all registers (including ones not written since\n"
1044
"\t previous draw) at each draw\n"
1045
"\t-c, --color - use colors\n"
1046
"\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1047
"\t-h, --help - this usage message\n"
1048
"\t-m, --markers - try to decode CP_NOP string markers\n"
1049
"\t-s, --summary - don't show individual register writes, but just show\n"
1050
"\t register values on draws\n"
1051
"\t-v, --verbose - dump more verbose output, including contents of\n"
1052
"\t less interesting buffers\n"
1053
"\n"
1054
);
1055
/* clang-format on */
1056
exit(2);
1057
}
1058
1059
/* clang-format off */
1060
static const struct option opts[] = {
1061
{ .name = "allregs", .has_arg = 0, NULL, 'a' },
1062
{ .name = "color", .has_arg = 0, NULL, 'c' },
1063
{ .name = "file", .has_arg = 1, NULL, 'f' },
1064
{ .name = "help", .has_arg = 0, NULL, 'h' },
1065
{ .name = "markers", .has_arg = 0, NULL, 'm' },
1066
{ .name = "summary", .has_arg = 0, NULL, 's' },
1067
{ .name = "verbose", .has_arg = 0, NULL, 'v' },
1068
{}
1069
};
1070
/* clang-format on */
1071
1072
static bool interactive;
1073
1074
static void
1075
cleanup(void)
1076
{
1077
fflush(stdout);
1078
1079
if (interactive) {
1080
pager_close();
1081
}
1082
}
1083
1084
int
1085
main(int argc, char **argv)
1086
{
1087
int c;
1088
1089
interactive = isatty(STDOUT_FILENO);
1090
options.color = interactive;
1091
1092
/* default to read from stdin: */
1093
in = stdin;
1094
1095
while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) {
1096
switch (c) {
1097
case 'a':
1098
options.allregs = true;
1099
break;
1100
case 'c':
1101
options.color = true;
1102
break;
1103
case 'f':
1104
in = fopen(optarg, "r");
1105
break;
1106
case 'm':
1107
options.decode_markers = true;
1108
break;
1109
case 's':
1110
options.summary = true;
1111
break;
1112
case 'v':
1113
verbose = true;
1114
break;
1115
case 'h':
1116
default:
1117
usage();
1118
}
1119
}
1120
1121
disasm_a3xx_set_debug(PRINT_RAW);
1122
1123
if (interactive) {
1124
pager_open();
1125
}
1126
1127
atexit(cleanup);
1128
1129
decode();
1130
cleanup();
1131
}
1132
1133