Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/kern/kern_dump.c
103791 views
1
/*-
2
* Copyright (c) 2002 Marcel Moolenaar
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
*
15
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
#include <sys/param.h>
28
#include <sys/systm.h>
29
#include <sys/conf.h>
30
#include <sys/cons.h>
31
#include <sys/kdb.h>
32
#include <sys/kernel.h>
33
#include <sys/kerneldump.h>
34
#include <sys/malloc.h>
35
#include <sys/msgbuf.h>
36
#include <sys/proc.h>
37
#include <sys/watchdog.h>
38
39
#include <vm/vm.h>
40
#include <vm/vm_param.h>
41
#include <vm/vm_page.h>
42
#include <vm/vm_phys.h>
43
#include <vm/vm_dumpset.h>
44
#include <vm/pmap.h>
45
46
#include <machine/dump.h>
47
#include <machine/elf.h>
48
#include <machine/md_var.h>
49
#include <machine/pcb.h>
50
51
CTASSERT(sizeof(struct kerneldumpheader) == 512);
52
53
#define MD_ALIGN(x) roundup2((off_t)(x), PAGE_SIZE)
54
55
/* Handle buffered writes. */
56
static size_t fragsz;
57
58
struct dump_pa dump_map[DUMPSYS_MD_PA_NPAIRS];
59
60
#if !defined(__powerpc__)
61
void
62
dumpsys_gen_pa_init(void)
63
{
64
int n, idx;
65
66
bzero(dump_map, sizeof(dump_map));
67
for (n = 0; n < nitems(dump_map); n++) {
68
idx = n * 2;
69
if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
70
break;
71
dump_map[n].pa_start = dump_avail[idx];
72
dump_map[n].pa_size = dump_avail[idx + 1] - dump_avail[idx];
73
}
74
}
75
#endif
76
77
struct dump_pa *
78
dumpsys_gen_pa_next(struct dump_pa *mdp)
79
{
80
81
if (mdp == NULL)
82
return (&dump_map[0]);
83
84
mdp++;
85
if (mdp - dump_map >= nitems(dump_map) ||
86
(mdp->pa_start == 0 && mdp->pa_size == 0))
87
mdp = NULL;
88
return (mdp);
89
}
90
91
void
92
dumpsys_gen_wbinv_all(void)
93
{
94
95
}
96
97
void
98
dumpsys_gen_unmap_chunk(vm_paddr_t pa __unused, size_t chunk __unused,
99
void *va __unused)
100
{
101
102
}
103
104
int
105
dumpsys_gen_write_aux_headers(struct dumperinfo *di)
106
{
107
108
return (0);
109
}
110
111
int
112
dumpsys_buf_seek(struct dumperinfo *di, size_t sz)
113
{
114
static uint8_t buf[DEV_BSIZE];
115
size_t nbytes;
116
int error;
117
118
bzero(buf, sizeof(buf));
119
120
while (sz > 0) {
121
nbytes = MIN(sz, sizeof(buf));
122
123
error = dump_append(di, buf, nbytes);
124
if (error)
125
return (error);
126
sz -= nbytes;
127
}
128
129
return (0);
130
}
131
132
int
133
dumpsys_buf_write(struct dumperinfo *di, char *ptr, size_t sz)
134
{
135
size_t len;
136
int error;
137
138
while (sz) {
139
len = di->blocksize - fragsz;
140
if (len > sz)
141
len = sz;
142
memcpy((char *)di->blockbuf + fragsz, ptr, len);
143
fragsz += len;
144
ptr += len;
145
sz -= len;
146
if (fragsz == di->blocksize) {
147
error = dump_append(di, di->blockbuf, di->blocksize);
148
if (error)
149
return (error);
150
fragsz = 0;
151
}
152
}
153
return (0);
154
}
155
156
int
157
dumpsys_buf_flush(struct dumperinfo *di)
158
{
159
int error;
160
161
if (fragsz == 0)
162
return (0);
163
164
error = dump_append(di, di->blockbuf, di->blocksize);
165
fragsz = 0;
166
return (error);
167
}
168
169
CTASSERT(PAGE_SHIFT < 20);
170
#define PG2MB(pgs) ((pgs + (1 << (20 - PAGE_SHIFT)) - 1) >> (20 - PAGE_SHIFT))
171
172
int
173
dumpsys_cb_dumpdata(struct dump_pa *mdp, int seqnr, void *arg)
174
{
175
struct dumperinfo *di = (struct dumperinfo*)arg;
176
vm_paddr_t pa;
177
void *va;
178
uint64_t pgs;
179
size_t counter, sz, chunk;
180
int c, error;
181
u_int maxdumppgs;
182
183
error = 0; /* catch case in which chunk size is 0 */
184
counter = 0; /* Update twiddle every 16MB */
185
va = NULL;
186
pgs = mdp->pa_size / PAGE_SIZE;
187
pa = mdp->pa_start;
188
maxdumppgs = min(di->maxiosize / PAGE_SIZE, MAXDUMPPGS);
189
if (maxdumppgs == 0) /* seatbelt */
190
maxdumppgs = 1;
191
192
printf(" chunk %d: %juMB (%ju pages)", seqnr, (uintmax_t)PG2MB(pgs),
193
(uintmax_t)pgs);
194
195
dumpsys_wbinv_all();
196
while (pgs) {
197
chunk = pgs;
198
if (chunk > maxdumppgs)
199
chunk = maxdumppgs;
200
sz = chunk << PAGE_SHIFT;
201
counter += sz;
202
if (counter >> 24) {
203
printf(" %ju", (uintmax_t)PG2MB(pgs));
204
counter &= (1 << 24) - 1;
205
}
206
207
dumpsys_map_chunk(pa, chunk, &va);
208
wdog_kern_pat(WD_LASTVAL);
209
210
error = dump_append(di, va, sz);
211
dumpsys_unmap_chunk(pa, chunk, va);
212
if (error)
213
break;
214
pgs -= chunk;
215
pa += sz;
216
217
/* Check for user abort. */
218
c = cncheckc();
219
if (c == 0x03)
220
return (ECANCELED);
221
if (c != -1)
222
printf(" (CTRL-C to abort) ");
223
}
224
printf(" ... %s\n", (error) ? "fail" : "ok");
225
return (error);
226
}
227
228
int
229
dumpsys_foreach_chunk(dumpsys_callback_t cb, void *arg)
230
{
231
struct dump_pa *mdp;
232
int error, seqnr;
233
234
seqnr = 0;
235
mdp = dumpsys_pa_next(NULL);
236
while (mdp != NULL) {
237
error = (*cb)(mdp, seqnr++, arg);
238
if (error)
239
return (-error);
240
mdp = dumpsys_pa_next(mdp);
241
}
242
return (seqnr);
243
}
244
245
static off_t fileofs;
246
247
static int
248
cb_dumphdr(struct dump_pa *mdp, int seqnr, void *arg)
249
{
250
struct dumperinfo *di = (struct dumperinfo*)arg;
251
Elf_Phdr phdr;
252
uint64_t size;
253
int error;
254
255
size = mdp->pa_size;
256
bzero(&phdr, sizeof(phdr));
257
phdr.p_type = PT_LOAD;
258
phdr.p_flags = PF_R; /* XXX */
259
phdr.p_offset = fileofs;
260
#ifdef __powerpc__
261
phdr.p_vaddr = (do_minidump? mdp->pa_start : ~0L);
262
phdr.p_paddr = (do_minidump? ~0L : mdp->pa_start);
263
#else
264
phdr.p_vaddr = mdp->pa_start;
265
phdr.p_paddr = mdp->pa_start;
266
#endif
267
phdr.p_filesz = size;
268
phdr.p_memsz = size;
269
phdr.p_align = PAGE_SIZE;
270
271
error = dumpsys_buf_write(di, (char*)&phdr, sizeof(phdr));
272
fileofs += phdr.p_filesz;
273
return (error);
274
}
275
276
static int
277
cb_size(struct dump_pa *mdp, int seqnr, void *arg)
278
{
279
uint64_t *sz;
280
281
sz = (uint64_t *)arg;
282
*sz += (uint64_t)mdp->pa_size;
283
return (0);
284
}
285
286
int
287
dumpsys_generic(struct dumperinfo *di)
288
{
289
static struct kerneldumpheader kdh;
290
Elf_Ehdr ehdr;
291
uint64_t dumpsize;
292
off_t hdrgap;
293
size_t hdrsz;
294
int error;
295
296
#if MINIDUMP_PAGE_TRACKING == 1
297
if (do_minidump)
298
return (minidumpsys(di, false));
299
#endif
300
301
bzero(&ehdr, sizeof(ehdr));
302
ehdr.e_ident[EI_MAG0] = ELFMAG0;
303
ehdr.e_ident[EI_MAG1] = ELFMAG1;
304
ehdr.e_ident[EI_MAG2] = ELFMAG2;
305
ehdr.e_ident[EI_MAG3] = ELFMAG3;
306
ehdr.e_ident[EI_CLASS] = ELF_CLASS;
307
#if BYTE_ORDER == LITTLE_ENDIAN
308
ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
309
#else
310
ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
311
#endif
312
ehdr.e_ident[EI_VERSION] = EV_CURRENT;
313
ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */
314
ehdr.e_type = ET_CORE;
315
ehdr.e_machine = EM_VALUE;
316
ehdr.e_phoff = sizeof(ehdr);
317
ehdr.e_flags = 0;
318
ehdr.e_ehsize = sizeof(ehdr);
319
ehdr.e_phentsize = sizeof(Elf_Phdr);
320
ehdr.e_shentsize = sizeof(Elf_Shdr);
321
322
dumpsys_pa_init();
323
324
/* Calculate dump size. */
325
dumpsize = 0L;
326
ehdr.e_phnum = dumpsys_foreach_chunk(cb_size, &dumpsize) +
327
DUMPSYS_NUM_AUX_HDRS;
328
hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
329
fileofs = MD_ALIGN(hdrsz);
330
dumpsize += fileofs;
331
hdrgap = fileofs - roundup2((off_t)hdrsz, di->blocksize);
332
333
dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_ARCH_VERSION,
334
dumpsize);
335
336
error = dump_start(di, &kdh);
337
if (error != 0)
338
goto fail;
339
340
printf("Dumping %ju MB (%d chunks)\n", (uintmax_t)dumpsize >> 20,
341
ehdr.e_phnum - DUMPSYS_NUM_AUX_HDRS);
342
343
/* Dump ELF header */
344
error = dumpsys_buf_write(di, (char*)&ehdr, sizeof(ehdr));
345
if (error)
346
goto fail;
347
348
/* Dump program headers */
349
error = dumpsys_foreach_chunk(cb_dumphdr, di);
350
if (error < 0)
351
goto fail;
352
error = dumpsys_write_aux_headers(di);
353
if (error < 0)
354
goto fail;
355
dumpsys_buf_flush(di);
356
357
/*
358
* All headers are written using blocked I/O, so we know the
359
* current offset is (still) block aligned. Skip the alignement
360
* in the file to have the segment contents aligned at page
361
* boundary.
362
*/
363
error = dumpsys_buf_seek(di, (size_t)hdrgap);
364
if (error)
365
goto fail;
366
367
/* Dump memory chunks. */
368
error = dumpsys_foreach_chunk(dumpsys_cb_dumpdata, di);
369
if (error < 0)
370
goto fail;
371
372
error = dump_finish(di, &kdh);
373
if (error != 0)
374
goto fail;
375
376
printf("\nDump complete\n");
377
return (0);
378
379
fail:
380
if (error < 0)
381
error = -error;
382
383
if (error == ECANCELED)
384
printf("\nDump aborted\n");
385
else if (error == E2BIG || error == ENOSPC)
386
printf("\nDump failed. Partition too small.\n");
387
else
388
printf("\n** DUMP FAILED (ERROR %d) **\n", error);
389
return (error);
390
}
391
392
#if MINIDUMP_PAGE_TRACKING == 1
393
394
/* Minidump progress bar */
395
static struct {
396
const int min_per;
397
const int max_per;
398
bool visited;
399
} progress_track[10] = {
400
{ 0, 10, false},
401
{ 10, 20, false},
402
{ 20, 30, false},
403
{ 30, 40, false},
404
{ 40, 50, false},
405
{ 50, 60, false},
406
{ 60, 70, false},
407
{ 70, 80, false},
408
{ 80, 90, false},
409
{ 90, 100, false}
410
};
411
412
static uint64_t dumpsys_pb_size;
413
static uint64_t dumpsys_pb_remaining;
414
static uint64_t dumpsys_pb_check;
415
416
/* Reset the progress bar for a dump of dumpsize. */
417
void
418
dumpsys_pb_init(uint64_t dumpsize)
419
{
420
int i;
421
422
dumpsys_pb_size = dumpsys_pb_remaining = dumpsize;
423
dumpsys_pb_check = 0;
424
425
for (i = 0; i < nitems(progress_track); i++)
426
progress_track[i].visited = false;
427
}
428
429
/*
430
* Update the progress according to the delta bytes that were written out.
431
* Check and print the progress percentage.
432
*/
433
void
434
dumpsys_pb_progress(size_t delta)
435
{
436
int sofar, i;
437
438
dumpsys_pb_remaining -= delta;
439
dumpsys_pb_check += delta;
440
441
/*
442
* To save time while dumping, only loop through progress_track
443
* occasionally.
444
*/
445
if ((dumpsys_pb_check >> DUMPSYS_PB_CHECK_BITS) == 0)
446
return;
447
else
448
dumpsys_pb_check &= (1 << DUMPSYS_PB_CHECK_BITS) - 1;
449
450
sofar = 100 - ((dumpsys_pb_remaining * 100) / dumpsys_pb_size);
451
for (i = 0; i < nitems(progress_track); i++) {
452
if (sofar < progress_track[i].min_per ||
453
sofar > progress_track[i].max_per)
454
continue;
455
if (!progress_track[i].visited) {
456
progress_track[i].visited = true;
457
printf("..%d%%", sofar);
458
}
459
break;
460
}
461
}
462
463
int
464
minidumpsys(struct dumperinfo *di, bool livedump)
465
{
466
struct minidumpstate state;
467
struct msgbuf mb_copy;
468
char *msg_ptr;
469
int error;
470
471
if (livedump) {
472
KASSERT(!dumping, ("live dump invoked from incorrect context"));
473
474
/*
475
* Before invoking cpu_minidumpsys() on the live system, we
476
* must snapshot some required global state: the message
477
* buffer, and the page dump bitset. They may be modified at
478
* any moment, so for the sake of the live dump it is best to
479
* have an unchanging snapshot to work with. Both are included
480
* as part of the dump and consumed by userspace tools.
481
*
482
* Other global state important to the minidump code is the
483
* dump_avail array and the kernel's page tables, but snapshots
484
* are not taken of these. For one, dump_avail[] is expected
485
* not to change after boot. Snapshotting the kernel page
486
* tables would involve an additional walk, so this is avoided
487
* too.
488
*
489
* This means live dumps are best effort, and the result may or
490
* may not be usable; there are no guarantees about the
491
* consistency of the dump's contents. Any of the following
492
* (and likely more) may affect the live dump:
493
*
494
* - Data may be modified, freed, or remapped during the
495
* course of the dump, such that the contents written out
496
* are partially or entirely unrecognizable. This means
497
* valid references may point to destroyed/mangled objects,
498
* and vice versa.
499
*
500
* - The dumped context of any threads that ran during the
501
* dump process may be unreliable.
502
*
503
* - The set of kernel page tables included in the dump likely
504
* won't correspond exactly to the copy of the dump bitset.
505
* This means some pages will be dumped without any way to
506
* locate them, and some pages may not have been dumped
507
* despite appearing as if they should.
508
*/
509
msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK);
510
msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr);
511
state.msgbufp = &mb_copy;
512
513
state.dump_bitset = BITSET_ALLOC(vm_page_dump_pages, M_TEMP,
514
M_WAITOK);
515
BIT_COPY_STORE_REL(vm_page_dump_pages, vm_page_dump,
516
state.dump_bitset);
517
} else {
518
KASSERT(dumping, ("minidump invoked outside of doadump()"));
519
520
/* Use the globals. */
521
state.msgbufp = msgbufp;
522
state.dump_bitset = vm_page_dump;
523
}
524
525
error = cpu_minidumpsys(di, &state);
526
if (livedump) {
527
free(msg_ptr, M_TEMP);
528
BITSET_FREE(state.dump_bitset, M_TEMP);
529
}
530
531
return (error);
532
}
533
#endif /* MINIDUMP_PAGE_TRACKING == 1 */
534
535