Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/kexec/ranges.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* powerpc code to implement the kexec_file_load syscall
4
*
5
* Copyright (C) 2004 Adam Litke ([email protected])
6
* Copyright (C) 2004 IBM Corp.
7
* Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
8
* Copyright (C) 2005 R Sharada ([email protected])
9
* Copyright (C) 2006 Mohan Kumar M ([email protected])
10
* Copyright (C) 2020 IBM Corporation
11
*
12
* Based on kexec-tools' kexec-ppc64.c, fs2dt.c.
13
* Heavily modified for the kernel by
14
* Hari Bathini, IBM Corporation.
15
*/
16
17
#define pr_fmt(fmt) "kexec ranges: " fmt
18
19
#include <linux/sort.h>
20
#include <linux/kexec.h>
21
#include <linux/of.h>
22
#include <linux/slab.h>
23
#include <linux/memblock.h>
24
#include <linux/crash_core.h>
25
#include <asm/sections.h>
26
#include <asm/kexec_ranges.h>
27
#include <asm/crashdump-ppc64.h>
28
29
#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
30
/**
31
* get_max_nr_ranges - Get the max no. of ranges crash_mem structure
32
* could hold, given the size allocated for it.
33
* @size: Allocation size of crash_mem structure.
34
*
35
* Returns the maximum no. of ranges.
36
*/
37
static inline unsigned int get_max_nr_ranges(size_t size)
38
{
39
return ((size - sizeof(struct crash_mem)) /
40
sizeof(struct range));
41
}
42
43
/**
44
* get_mem_rngs_size - Get the allocated size of mem_rngs based on
45
* max_nr_ranges and chunk size.
46
* @mem_rngs: Memory ranges.
47
*
48
* Returns the maximum size of @mem_rngs.
49
*/
50
static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs)
51
{
52
size_t size;
53
54
if (!mem_rngs)
55
return 0;
56
57
size = (sizeof(struct crash_mem) +
58
(mem_rngs->max_nr_ranges * sizeof(struct range)));
59
60
/*
61
* Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ.
62
* So, align to get the actual length.
63
*/
64
return ALIGN(size, MEM_RANGE_CHUNK_SZ);
65
}
66
67
/**
68
* __add_mem_range - add a memory range to memory ranges list.
69
* @mem_ranges: Range list to add the memory range to.
70
* @base: Base address of the range to add.
71
* @size: Size of the memory range to add.
72
*
73
* (Re)allocates memory, if needed.
74
*
75
* Returns 0 on success, negative errno on error.
76
*/
77
static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
78
{
79
struct crash_mem *mem_rngs = *mem_ranges;
80
81
if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) {
82
mem_rngs = realloc_mem_ranges(mem_ranges);
83
if (!mem_rngs)
84
return -ENOMEM;
85
}
86
87
mem_rngs->ranges[mem_rngs->nr_ranges].start = base;
88
mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1;
89
pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n",
90
base, base + size - 1, mem_rngs->nr_ranges);
91
mem_rngs->nr_ranges++;
92
return 0;
93
}
94
95
/**
96
* __merge_memory_ranges - Merges the given memory ranges list.
97
* @mem_rngs: Range list to merge.
98
*
99
* Assumes a sorted range list.
100
*
101
* Returns nothing.
102
*/
103
static void __merge_memory_ranges(struct crash_mem *mem_rngs)
104
{
105
struct range *ranges;
106
int i, idx;
107
108
if (!mem_rngs)
109
return;
110
111
idx = 0;
112
ranges = &(mem_rngs->ranges[0]);
113
for (i = 1; i < mem_rngs->nr_ranges; i++) {
114
if (ranges[i].start <= (ranges[i-1].end + 1))
115
ranges[idx].end = ranges[i].end;
116
else {
117
idx++;
118
if (i == idx)
119
continue;
120
121
ranges[idx] = ranges[i];
122
}
123
}
124
mem_rngs->nr_ranges = idx + 1;
125
}
126
127
/* cmp_func_t callback to sort ranges with sort() */
128
static int rngcmp(const void *_x, const void *_y)
129
{
130
const struct range *x = _x, *y = _y;
131
132
if (x->start > y->start)
133
return 1;
134
if (x->start < y->start)
135
return -1;
136
return 0;
137
}
138
139
/**
140
* sort_memory_ranges - Sorts the given memory ranges list.
141
* @mem_rngs: Range list to sort.
142
* @merge: If true, merge the list after sorting.
143
*
144
* Returns nothing.
145
*/
146
void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge)
147
{
148
int i;
149
150
if (!mem_rngs)
151
return;
152
153
/* Sort the ranges in-place */
154
sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges,
155
sizeof(mem_rngs->ranges[0]), rngcmp, NULL);
156
157
if (merge)
158
__merge_memory_ranges(mem_rngs);
159
160
/* For debugging purpose */
161
pr_debug("Memory ranges:\n");
162
for (i = 0; i < mem_rngs->nr_ranges; i++) {
163
pr_debug("\t[%03d][%#016llx - %#016llx]\n", i,
164
mem_rngs->ranges[i].start,
165
mem_rngs->ranges[i].end);
166
}
167
}
168
169
/**
170
* realloc_mem_ranges - reallocate mem_ranges with size incremented
171
* by MEM_RANGE_CHUNK_SZ. Frees up the old memory,
172
* if memory allocation fails.
173
* @mem_ranges: Memory ranges to reallocate.
174
*
175
* Returns pointer to reallocated memory on success, NULL otherwise.
176
*/
177
struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges)
178
{
179
struct crash_mem *mem_rngs = *mem_ranges;
180
unsigned int nr_ranges;
181
size_t size;
182
183
size = get_mem_rngs_size(mem_rngs);
184
nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0;
185
186
size += MEM_RANGE_CHUNK_SZ;
187
mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL);
188
if (!mem_rngs) {
189
kfree(*mem_ranges);
190
*mem_ranges = NULL;
191
return NULL;
192
}
193
194
mem_rngs->nr_ranges = nr_ranges;
195
mem_rngs->max_nr_ranges = get_max_nr_ranges(size);
196
*mem_ranges = mem_rngs;
197
198
return mem_rngs;
199
}
200
201
/**
202
* add_mem_range - Updates existing memory range, if there is an overlap.
203
* Else, adds a new memory range.
204
* @mem_ranges: Range list to add the memory range to.
205
* @base: Base address of the range to add.
206
* @size: Size of the memory range to add.
207
*
208
* (Re)allocates memory, if needed.
209
*
210
* Returns 0 on success, negative errno on error.
211
*/
212
int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
213
{
214
struct crash_mem *mem_rngs = *mem_ranges;
215
u64 mstart, mend, end;
216
unsigned int i;
217
218
if (!size)
219
return 0;
220
221
end = base + size - 1;
222
223
if (!mem_rngs || !(mem_rngs->nr_ranges))
224
return __add_mem_range(mem_ranges, base, size);
225
226
for (i = 0; i < mem_rngs->nr_ranges; i++) {
227
mstart = mem_rngs->ranges[i].start;
228
mend = mem_rngs->ranges[i].end;
229
if (base < mend && end > mstart) {
230
if (base < mstart)
231
mem_rngs->ranges[i].start = base;
232
if (end > mend)
233
mem_rngs->ranges[i].end = end;
234
return 0;
235
}
236
}
237
238
return __add_mem_range(mem_ranges, base, size);
239
}
240
241
#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
242
243
#ifdef CONFIG_KEXEC_FILE
244
/**
245
* add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
246
* @mem_ranges: Range list to add the memory range(s) to.
247
*
248
* Returns 0 on success, negative errno on error.
249
*/
250
static int add_tce_mem_ranges(struct crash_mem **mem_ranges)
251
{
252
struct device_node *dn = NULL;
253
int ret = 0;
254
255
for_each_node_by_type(dn, "pci") {
256
u64 base;
257
u32 size;
258
259
ret = of_property_read_u64(dn, "linux,tce-base", &base);
260
ret |= of_property_read_u32(dn, "linux,tce-size", &size);
261
if (ret) {
262
/*
263
* It is ok to have pci nodes without tce. So, ignore
264
* property does not exist error.
265
*/
266
if (ret == -EINVAL) {
267
ret = 0;
268
continue;
269
}
270
break;
271
}
272
273
ret = add_mem_range(mem_ranges, base, size);
274
if (ret)
275
break;
276
}
277
278
of_node_put(dn);
279
return ret;
280
}
281
282
/**
283
* add_initrd_mem_range - Adds initrd range to the given memory ranges list,
284
* if the initrd was retained.
285
* @mem_ranges: Range list to add the memory range to.
286
*
287
* Returns 0 on success, negative errno on error.
288
*/
289
static int add_initrd_mem_range(struct crash_mem **mem_ranges)
290
{
291
u64 base, end;
292
int ret;
293
294
/* This range means something, only if initrd was retained */
295
if (!strstr(saved_command_line, "retain_initrd"))
296
return 0;
297
298
ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base);
299
ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end);
300
if (!ret)
301
ret = add_mem_range(mem_ranges, base, end - base + 1);
302
303
return ret;
304
}
305
306
/**
307
* add_htab_mem_range - Adds htab range to the given memory ranges list,
308
* if it exists
309
* @mem_ranges: Range list to add the memory range to.
310
*
311
* Returns 0 on success, negative errno on error.
312
*/
313
static int add_htab_mem_range(struct crash_mem **mem_ranges)
314
{
315
316
#ifdef CONFIG_PPC_64S_HASH_MMU
317
if (!htab_address)
318
return 0;
319
320
return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
321
#else
322
return 0;
323
#endif
324
}
325
326
/**
327
* add_kernel_mem_range - Adds kernel text region to the given
328
* memory ranges list.
329
* @mem_ranges: Range list to add the memory range to.
330
*
331
* Returns 0 on success, negative errno on error.
332
*/
333
static int add_kernel_mem_range(struct crash_mem **mem_ranges)
334
{
335
return add_mem_range(mem_ranges, 0, __pa(_end));
336
}
337
#endif /* CONFIG_KEXEC_FILE */
338
339
#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
340
/**
341
* add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
342
* @mem_ranges: Range list to add the memory range to.
343
*
344
* Returns 0 on success, negative errno on error.
345
*/
346
static int add_rtas_mem_range(struct crash_mem **mem_ranges)
347
{
348
struct device_node *dn;
349
u32 base, size;
350
int ret = 0;
351
352
dn = of_find_node_by_path("/rtas");
353
if (!dn)
354
return 0;
355
356
ret = of_property_read_u32(dn, "linux,rtas-base", &base);
357
ret |= of_property_read_u32(dn, "rtas-size", &size);
358
if (!ret)
359
ret = add_mem_range(mem_ranges, base, size);
360
361
of_node_put(dn);
362
return ret;
363
}
364
365
/**
366
* add_opal_mem_range - Adds OPAL region to the given memory ranges list.
367
* @mem_ranges: Range list to add the memory range to.
368
*
369
* Returns 0 on success, negative errno on error.
370
*/
371
static int add_opal_mem_range(struct crash_mem **mem_ranges)
372
{
373
struct device_node *dn;
374
u64 base, size;
375
int ret;
376
377
dn = of_find_node_by_path("/ibm,opal");
378
if (!dn)
379
return 0;
380
381
ret = of_property_read_u64(dn, "opal-base-address", &base);
382
ret |= of_property_read_u64(dn, "opal-runtime-size", &size);
383
if (!ret)
384
ret = add_mem_range(mem_ranges, base, size);
385
386
of_node_put(dn);
387
return ret;
388
}
389
#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
390
391
#ifdef CONFIG_KEXEC_FILE
392
/**
393
* add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
394
* to the given memory ranges list.
395
* @mem_ranges: Range list to add the memory ranges to.
396
*
397
* Returns 0 on success, negative errno on error.
398
*/
399
static int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
400
{
401
int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
402
struct device_node *root = of_find_node_by_path("/");
403
const __be32 *prop;
404
405
prop = of_get_property(root, "reserved-ranges", &len);
406
n_mem_addr_cells = of_n_addr_cells(root);
407
n_mem_size_cells = of_n_size_cells(root);
408
of_node_put(root);
409
if (!prop)
410
return 0;
411
412
cells = n_mem_addr_cells + n_mem_size_cells;
413
414
/* Each reserved range is an (address,size) pair */
415
for (i = 0; i < (len / (sizeof(u32) * cells)); i++) {
416
u64 base, size;
417
418
base = of_read_number(prop + (i * cells), n_mem_addr_cells);
419
size = of_read_number(prop + (i * cells) + n_mem_addr_cells,
420
n_mem_size_cells);
421
422
ret = add_mem_range(mem_ranges, base, size);
423
if (ret)
424
break;
425
}
426
427
return ret;
428
}
429
430
/**
431
* get_reserved_memory_ranges - Get reserve memory ranges. This list includes
432
* memory regions that should be added to the
433
* memory reserve map to ensure the region is
434
* protected from any mischief.
435
* @mem_ranges: Range list to add the memory ranges to.
436
*
437
* Returns 0 on success, negative errno on error.
438
*/
439
int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
440
{
441
int ret;
442
443
ret = add_rtas_mem_range(mem_ranges);
444
if (ret)
445
goto out;
446
447
ret = add_tce_mem_ranges(mem_ranges);
448
if (ret)
449
goto out;
450
451
ret = add_reserved_mem_ranges(mem_ranges);
452
out:
453
if (ret)
454
pr_err("Failed to setup reserved memory ranges\n");
455
return ret;
456
}
457
458
/**
459
* get_exclude_memory_ranges - Get exclude memory ranges. This list includes
460
* regions like opal/rtas, tce-table, initrd,
461
* kernel, htab which should be avoided while
462
* setting up kexec load segments.
463
* @mem_ranges: Range list to add the memory ranges to.
464
*
465
* Returns 0 on success, negative errno on error.
466
*/
467
int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
468
{
469
int ret;
470
471
ret = add_tce_mem_ranges(mem_ranges);
472
if (ret)
473
goto out;
474
475
ret = add_initrd_mem_range(mem_ranges);
476
if (ret)
477
goto out;
478
479
ret = add_htab_mem_range(mem_ranges);
480
if (ret)
481
goto out;
482
483
ret = add_kernel_mem_range(mem_ranges);
484
if (ret)
485
goto out;
486
487
ret = add_rtas_mem_range(mem_ranges);
488
if (ret)
489
goto out;
490
491
ret = add_opal_mem_range(mem_ranges);
492
if (ret)
493
goto out;
494
495
ret = add_reserved_mem_ranges(mem_ranges);
496
if (ret)
497
goto out;
498
499
/* exclude memory ranges should be sorted for easy lookup */
500
sort_memory_ranges(*mem_ranges, true);
501
out:
502
if (ret)
503
pr_err("Failed to setup exclude memory ranges\n");
504
return ret;
505
}
506
507
#ifdef CONFIG_CRASH_DUMP
508
/**
509
* get_usable_memory_ranges - Get usable memory ranges. This list includes
510
* regions like crashkernel, opal/rtas & tce-table,
511
* that kdump kernel could use.
512
* @mem_ranges: Range list to add the memory ranges to.
513
*
514
* Returns 0 on success, negative errno on error.
515
*/
516
int get_usable_memory_ranges(struct crash_mem **mem_ranges)
517
{
518
int ret;
519
520
/*
521
* Early boot failure observed on guests when low memory (first memory
522
* block?) is not added to usable memory. So, add [0, crashk_res.end]
523
* instead of [crashk_res.start, crashk_res.end] to workaround it.
524
* Also, crashed kernel's memory must be added to reserve map to
525
* avoid kdump kernel from using it.
526
*/
527
ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
528
if (ret)
529
goto out;
530
531
ret = add_rtas_mem_range(mem_ranges);
532
if (ret)
533
goto out;
534
535
ret = add_opal_mem_range(mem_ranges);
536
if (ret)
537
goto out;
538
539
ret = add_tce_mem_ranges(mem_ranges);
540
out:
541
if (ret)
542
pr_err("Failed to setup usable memory ranges\n");
543
return ret;
544
}
545
#endif /* CONFIG_CRASH_DUMP */
546
#endif /* CONFIG_KEXEC_FILE */
547
548
#ifdef CONFIG_CRASH_DUMP
549
/**
550
* get_crash_memory_ranges - Get crash memory ranges. This list includes
551
* first/crashing kernel's memory regions that
552
* would be exported via an elfcore.
553
* @mem_ranges: Range list to add the memory ranges to.
554
*
555
* Returns 0 on success, negative errno on error.
556
*/
557
int get_crash_memory_ranges(struct crash_mem **mem_ranges)
558
{
559
phys_addr_t base, end;
560
struct crash_mem *tmem;
561
u64 i;
562
int ret;
563
564
for_each_mem_range(i, &base, &end) {
565
u64 size = end - base;
566
567
/* Skip backup memory region, which needs a separate entry */
568
if (base == BACKUP_SRC_START) {
569
if (size > BACKUP_SRC_SIZE) {
570
base = BACKUP_SRC_END + 1;
571
size -= BACKUP_SRC_SIZE;
572
} else
573
continue;
574
}
575
576
ret = add_mem_range(mem_ranges, base, size);
577
if (ret)
578
goto out;
579
580
/* Try merging adjacent ranges before reallocation attempt */
581
if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
582
sort_memory_ranges(*mem_ranges, true);
583
}
584
585
/* Reallocate memory ranges if there is no space to split ranges */
586
tmem = *mem_ranges;
587
if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
588
tmem = realloc_mem_ranges(mem_ranges);
589
if (!tmem)
590
goto out;
591
}
592
593
/* Exclude crashkernel region */
594
ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
595
if (ret)
596
goto out;
597
598
/*
599
* FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
600
* regions are exported to save their context at the time of
601
* crash, they should actually be backed up just like the
602
* first 64K bytes of memory.
603
*/
604
ret = add_rtas_mem_range(mem_ranges);
605
if (ret)
606
goto out;
607
608
ret = add_opal_mem_range(mem_ranges);
609
if (ret)
610
goto out;
611
612
/* create a separate program header for the backup region */
613
ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
614
if (ret)
615
goto out;
616
617
sort_memory_ranges(*mem_ranges, false);
618
out:
619
if (ret)
620
pr_err("Failed to setup crash memory ranges\n");
621
return ret;
622
}
623
624
/**
625
* remove_mem_range - Removes the given memory range from the range list.
626
* @mem_ranges: Range list to remove the memory range to.
627
* @base: Base address of the range to remove.
628
* @size: Size of the memory range to remove.
629
*
630
* (Re)allocates memory, if needed.
631
*
632
* Returns 0 on success, negative errno on error.
633
*/
634
int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
635
{
636
u64 end;
637
int ret = 0;
638
unsigned int i;
639
u64 mstart, mend;
640
struct crash_mem *mem_rngs = *mem_ranges;
641
642
if (!size)
643
return 0;
644
645
/*
646
* Memory range are stored as start and end address, use
647
* the same format to do remove operation.
648
*/
649
end = base + size - 1;
650
651
for (i = 0; i < mem_rngs->nr_ranges; i++) {
652
mstart = mem_rngs->ranges[i].start;
653
mend = mem_rngs->ranges[i].end;
654
655
/*
656
* Memory range to remove is not part of this range entry
657
* in the memory range list
658
*/
659
if (!(base >= mstart && end <= mend))
660
continue;
661
662
/*
663
* Memory range to remove is equivalent to this entry in the
664
* memory range list. Remove the range entry from the list.
665
*/
666
if (base == mstart && end == mend) {
667
for (; i < mem_rngs->nr_ranges - 1; i++) {
668
mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;
669
mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
670
}
671
mem_rngs->nr_ranges--;
672
goto out;
673
}
674
/*
675
* Start address of the memory range to remove and the
676
* current memory range entry in the list is same. Just
677
* move the start address of the current memory range
678
* entry in the list to end + 1.
679
*/
680
else if (base == mstart) {
681
mem_rngs->ranges[i].start = end + 1;
682
goto out;
683
}
684
/*
685
* End address of the memory range to remove and the
686
* current memory range entry in the list is same.
687
* Just move the end address of the current memory
688
* range entry in the list to base - 1.
689
*/
690
else if (end == mend) {
691
mem_rngs->ranges[i].end = base - 1;
692
goto out;
693
}
694
/*
695
* Memory range to remove is not at the edge of current
696
* memory range entry. Split the current memory entry into
697
* two half.
698
*/
699
else {
700
mem_rngs->ranges[i].end = base - 1;
701
size = mem_rngs->ranges[i].end - end;
702
ret = add_mem_range(mem_ranges, end + 1, size);
703
}
704
}
705
out:
706
return ret;
707
}
708
#endif /* CONFIG_CRASH_DUMP */
709
710