Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/kernel/e820.c
10817 views
1
/*
2
* Handle the memory map.
3
* The functions here do the job until bootmem takes over.
4
*
5
* Getting sanitize_e820_map() in sync with i386 version by applying change:
6
* - Provisions for empty E820 memory regions (reported by certain BIOSes).
7
* Alex Achenbach <[email protected]>, December 2002.
8
* Venkatesh Pallipadi <[email protected]>
9
*
10
*/
11
#include <linux/kernel.h>
12
#include <linux/types.h>
13
#include <linux/init.h>
14
#include <linux/crash_dump.h>
15
#include <linux/bootmem.h>
16
#include <linux/pfn.h>
17
#include <linux/suspend.h>
18
#include <linux/acpi.h>
19
#include <linux/firmware-map.h>
20
#include <linux/memblock.h>
21
22
#include <asm/e820.h>
23
#include <asm/proto.h>
24
#include <asm/setup.h>
25
26
/*
27
* The e820 map is the map that gets modified e.g. with command line parameters
28
* and that is also registered with modifications in the kernel resource tree
29
* with the iomem_resource as parent.
30
*
31
* The e820_saved is directly saved after the BIOS-provided memory map is
32
* copied. It doesn't get modified afterwards. It's registered for the
33
* /sys/firmware/memmap interface.
34
*
35
* That memory map is not modified and is used as base for kexec. The kexec'd
36
* kernel should get the same memory map as the firmware provides. Then the
37
* user can e.g. boot the original kernel with mem=1G while still booting the
38
* next kernel with full memory.
39
*/
40
struct e820map e820;
41
struct e820map e820_saved;
42
43
/* For PCI or other memory-mapped resources */
44
unsigned long pci_mem_start = 0xaeedbabe;
45
#ifdef CONFIG_PCI
46
EXPORT_SYMBOL(pci_mem_start);
47
#endif
48
49
/*
50
* This function checks if any part of the range <start,end> is mapped
51
* with type.
52
*/
53
int
54
e820_any_mapped(u64 start, u64 end, unsigned type)
55
{
56
int i;
57
58
for (i = 0; i < e820.nr_map; i++) {
59
struct e820entry *ei = &e820.map[i];
60
61
if (type && ei->type != type)
62
continue;
63
if (ei->addr >= end || ei->addr + ei->size <= start)
64
continue;
65
return 1;
66
}
67
return 0;
68
}
69
EXPORT_SYMBOL_GPL(e820_any_mapped);
70
71
/*
72
* This function checks if the entire range <start,end> is mapped with type.
73
*
74
* Note: this function only works correct if the e820 table is sorted and
75
* not-overlapping, which is the case
76
*/
77
int __init e820_all_mapped(u64 start, u64 end, unsigned type)
78
{
79
int i;
80
81
for (i = 0; i < e820.nr_map; i++) {
82
struct e820entry *ei = &e820.map[i];
83
84
if (type && ei->type != type)
85
continue;
86
/* is the region (part) in overlap with the current region ?*/
87
if (ei->addr >= end || ei->addr + ei->size <= start)
88
continue;
89
90
/* if the region is at the beginning of <start,end> we move
91
* start to the end of the region since it's ok until there
92
*/
93
if (ei->addr <= start)
94
start = ei->addr + ei->size;
95
/*
96
* if start is now at or beyond end, we're done, full
97
* coverage
98
*/
99
if (start >= end)
100
return 1;
101
}
102
return 0;
103
}
104
105
/*
106
* Add a memory region to the kernel e820 map.
107
*/
108
static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
109
int type)
110
{
111
int x = e820x->nr_map;
112
113
if (x >= ARRAY_SIZE(e820x->map)) {
114
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
115
return;
116
}
117
118
e820x->map[x].addr = start;
119
e820x->map[x].size = size;
120
e820x->map[x].type = type;
121
e820x->nr_map++;
122
}
123
124
void __init e820_add_region(u64 start, u64 size, int type)
125
{
126
__e820_add_region(&e820, start, size, type);
127
}
128
129
static void __init e820_print_type(u32 type)
130
{
131
switch (type) {
132
case E820_RAM:
133
case E820_RESERVED_KERN:
134
printk(KERN_CONT "(usable)");
135
break;
136
case E820_RESERVED:
137
printk(KERN_CONT "(reserved)");
138
break;
139
case E820_ACPI:
140
printk(KERN_CONT "(ACPI data)");
141
break;
142
case E820_NVS:
143
printk(KERN_CONT "(ACPI NVS)");
144
break;
145
case E820_UNUSABLE:
146
printk(KERN_CONT "(unusable)");
147
break;
148
default:
149
printk(KERN_CONT "type %u", type);
150
break;
151
}
152
}
153
154
void __init e820_print_map(char *who)
155
{
156
int i;
157
158
for (i = 0; i < e820.nr_map; i++) {
159
printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
160
(unsigned long long) e820.map[i].addr,
161
(unsigned long long)
162
(e820.map[i].addr + e820.map[i].size));
163
e820_print_type(e820.map[i].type);
164
printk(KERN_CONT "\n");
165
}
166
}
167
168
/*
169
* Sanitize the BIOS e820 map.
170
*
171
* Some e820 responses include overlapping entries. The following
172
* replaces the original e820 map with a new one, removing overlaps,
173
* and resolving conflicting memory types in favor of highest
174
* numbered type.
175
*
176
* The input parameter biosmap points to an array of 'struct
177
* e820entry' which on entry has elements in the range [0, *pnr_map)
178
* valid, and which has space for up to max_nr_map entries.
179
* On return, the resulting sanitized e820 map entries will be in
180
* overwritten in the same location, starting at biosmap.
181
*
182
* The integer pointed to by pnr_map must be valid on entry (the
183
* current number of valid entries located at biosmap) and will
184
* be updated on return, with the new number of valid entries
185
* (something no more than max_nr_map.)
186
*
187
* The return value from sanitize_e820_map() is zero if it
188
* successfully 'sanitized' the map entries passed in, and is -1
189
* if it did nothing, which can happen if either of (1) it was
190
* only passed one map entry, or (2) any of the input map entries
191
* were invalid (start + size < start, meaning that the size was
192
* so big the described memory range wrapped around through zero.)
193
*
194
* Visually we're performing the following
195
* (1,2,3,4 = memory types)...
196
*
197
* Sample memory map (w/overlaps):
198
* ____22__________________
199
* ______________________4_
200
* ____1111________________
201
* _44_____________________
202
* 11111111________________
203
* ____________________33__
204
* ___________44___________
205
* __________33333_________
206
* ______________22________
207
* ___________________2222_
208
* _________111111111______
209
* _____________________11_
210
* _________________4______
211
*
212
* Sanitized equivalent (no overlap):
213
* 1_______________________
214
* _44_____________________
215
* ___1____________________
216
* ____22__________________
217
* ______11________________
218
* _________1______________
219
* __________3_____________
220
* ___________44___________
221
* _____________33_________
222
* _______________2________
223
* ________________1_______
224
* _________________4______
225
* ___________________2____
226
* ____________________33__
227
* ______________________4_
228
*/
229
230
int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
231
u32 *pnr_map)
232
{
233
struct change_member {
234
struct e820entry *pbios; /* pointer to original bios entry */
235
unsigned long long addr; /* address for this change point */
236
};
237
static struct change_member change_point_list[2*E820_X_MAX] __initdata;
238
static struct change_member *change_point[2*E820_X_MAX] __initdata;
239
static struct e820entry *overlap_list[E820_X_MAX] __initdata;
240
static struct e820entry new_bios[E820_X_MAX] __initdata;
241
struct change_member *change_tmp;
242
unsigned long current_type, last_type;
243
unsigned long long last_addr;
244
int chgidx, still_changing;
245
int overlap_entries;
246
int new_bios_entry;
247
int old_nr, new_nr, chg_nr;
248
int i;
249
250
/* if there's only one memory region, don't bother */
251
if (*pnr_map < 2)
252
return -1;
253
254
old_nr = *pnr_map;
255
BUG_ON(old_nr > max_nr_map);
256
257
/* bail out if we find any unreasonable addresses in bios map */
258
for (i = 0; i < old_nr; i++)
259
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
260
return -1;
261
262
/* create pointers for initial change-point information (for sorting) */
263
for (i = 0; i < 2 * old_nr; i++)
264
change_point[i] = &change_point_list[i];
265
266
/* record all known change-points (starting and ending addresses),
267
omitting those that are for empty memory regions */
268
chgidx = 0;
269
for (i = 0; i < old_nr; i++) {
270
if (biosmap[i].size != 0) {
271
change_point[chgidx]->addr = biosmap[i].addr;
272
change_point[chgidx++]->pbios = &biosmap[i];
273
change_point[chgidx]->addr = biosmap[i].addr +
274
biosmap[i].size;
275
change_point[chgidx++]->pbios = &biosmap[i];
276
}
277
}
278
chg_nr = chgidx;
279
280
/* sort change-point list by memory addresses (low -> high) */
281
still_changing = 1;
282
while (still_changing) {
283
still_changing = 0;
284
for (i = 1; i < chg_nr; i++) {
285
unsigned long long curaddr, lastaddr;
286
unsigned long long curpbaddr, lastpbaddr;
287
288
curaddr = change_point[i]->addr;
289
lastaddr = change_point[i - 1]->addr;
290
curpbaddr = change_point[i]->pbios->addr;
291
lastpbaddr = change_point[i - 1]->pbios->addr;
292
293
/*
294
* swap entries, when:
295
*
296
* curaddr > lastaddr or
297
* curaddr == lastaddr and curaddr == curpbaddr and
298
* lastaddr != lastpbaddr
299
*/
300
if (curaddr < lastaddr ||
301
(curaddr == lastaddr && curaddr == curpbaddr &&
302
lastaddr != lastpbaddr)) {
303
change_tmp = change_point[i];
304
change_point[i] = change_point[i-1];
305
change_point[i-1] = change_tmp;
306
still_changing = 1;
307
}
308
}
309
}
310
311
/* create a new bios memory map, removing overlaps */
312
overlap_entries = 0; /* number of entries in the overlap table */
313
new_bios_entry = 0; /* index for creating new bios map entries */
314
last_type = 0; /* start with undefined memory type */
315
last_addr = 0; /* start with 0 as last starting address */
316
317
/* loop through change-points, determining affect on the new bios map */
318
for (chgidx = 0; chgidx < chg_nr; chgidx++) {
319
/* keep track of all overlapping bios entries */
320
if (change_point[chgidx]->addr ==
321
change_point[chgidx]->pbios->addr) {
322
/*
323
* add map entry to overlap list (> 1 entry
324
* implies an overlap)
325
*/
326
overlap_list[overlap_entries++] =
327
change_point[chgidx]->pbios;
328
} else {
329
/*
330
* remove entry from list (order independent,
331
* so swap with last)
332
*/
333
for (i = 0; i < overlap_entries; i++) {
334
if (overlap_list[i] ==
335
change_point[chgidx]->pbios)
336
overlap_list[i] =
337
overlap_list[overlap_entries-1];
338
}
339
overlap_entries--;
340
}
341
/*
342
* if there are overlapping entries, decide which
343
* "type" to use (larger value takes precedence --
344
* 1=usable, 2,3,4,4+=unusable)
345
*/
346
current_type = 0;
347
for (i = 0; i < overlap_entries; i++)
348
if (overlap_list[i]->type > current_type)
349
current_type = overlap_list[i]->type;
350
/*
351
* continue building up new bios map based on this
352
* information
353
*/
354
if (current_type != last_type) {
355
if (last_type != 0) {
356
new_bios[new_bios_entry].size =
357
change_point[chgidx]->addr - last_addr;
358
/*
359
* move forward only if the new size
360
* was non-zero
361
*/
362
if (new_bios[new_bios_entry].size != 0)
363
/*
364
* no more space left for new
365
* bios entries ?
366
*/
367
if (++new_bios_entry >= max_nr_map)
368
break;
369
}
370
if (current_type != 0) {
371
new_bios[new_bios_entry].addr =
372
change_point[chgidx]->addr;
373
new_bios[new_bios_entry].type = current_type;
374
last_addr = change_point[chgidx]->addr;
375
}
376
last_type = current_type;
377
}
378
}
379
/* retain count for new bios entries */
380
new_nr = new_bios_entry;
381
382
/* copy new bios mapping into original location */
383
memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
384
*pnr_map = new_nr;
385
386
return 0;
387
}
388
389
static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
390
{
391
while (nr_map) {
392
u64 start = biosmap->addr;
393
u64 size = biosmap->size;
394
u64 end = start + size;
395
u32 type = biosmap->type;
396
397
/* Overflow in 64 bits? Ignore the memory map. */
398
if (start > end)
399
return -1;
400
401
e820_add_region(start, size, type);
402
403
biosmap++;
404
nr_map--;
405
}
406
return 0;
407
}
408
409
/*
410
* Copy the BIOS e820 map into a safe place.
411
*
412
* Sanity-check it while we're at it..
413
*
414
* If we're lucky and live on a modern system, the setup code
415
* will have given us a memory map that we can use to properly
416
* set up memory. If we aren't, we'll fake a memory map.
417
*/
418
static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
419
{
420
/* Only one memory region (or negative)? Ignore it */
421
if (nr_map < 2)
422
return -1;
423
424
return __append_e820_map(biosmap, nr_map);
425
}
426
427
static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
428
u64 size, unsigned old_type,
429
unsigned new_type)
430
{
431
u64 end;
432
unsigned int i;
433
u64 real_updated_size = 0;
434
435
BUG_ON(old_type == new_type);
436
437
if (size > (ULLONG_MAX - start))
438
size = ULLONG_MAX - start;
439
440
end = start + size;
441
printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
442
(unsigned long long) start,
443
(unsigned long long) end);
444
e820_print_type(old_type);
445
printk(KERN_CONT " ==> ");
446
e820_print_type(new_type);
447
printk(KERN_CONT "\n");
448
449
for (i = 0; i < e820x->nr_map; i++) {
450
struct e820entry *ei = &e820x->map[i];
451
u64 final_start, final_end;
452
u64 ei_end;
453
454
if (ei->type != old_type)
455
continue;
456
457
ei_end = ei->addr + ei->size;
458
/* totally covered by new range? */
459
if (ei->addr >= start && ei_end <= end) {
460
ei->type = new_type;
461
real_updated_size += ei->size;
462
continue;
463
}
464
465
/* new range is totally covered? */
466
if (ei->addr < start && ei_end > end) {
467
__e820_add_region(e820x, start, size, new_type);
468
__e820_add_region(e820x, end, ei_end - end, ei->type);
469
ei->size = start - ei->addr;
470
real_updated_size += size;
471
continue;
472
}
473
474
/* partially covered */
475
final_start = max(start, ei->addr);
476
final_end = min(end, ei_end);
477
if (final_start >= final_end)
478
continue;
479
480
__e820_add_region(e820x, final_start, final_end - final_start,
481
new_type);
482
483
real_updated_size += final_end - final_start;
484
485
/*
486
* left range could be head or tail, so need to update
487
* size at first.
488
*/
489
ei->size -= final_end - final_start;
490
if (ei->addr < final_start)
491
continue;
492
ei->addr = final_end;
493
}
494
return real_updated_size;
495
}
496
497
u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
498
unsigned new_type)
499
{
500
return __e820_update_range(&e820, start, size, old_type, new_type);
501
}
502
503
static u64 __init e820_update_range_saved(u64 start, u64 size,
504
unsigned old_type, unsigned new_type)
505
{
506
return __e820_update_range(&e820_saved, start, size, old_type,
507
new_type);
508
}
509
510
/* make e820 not cover the range */
511
u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
512
int checktype)
513
{
514
int i;
515
u64 end;
516
u64 real_removed_size = 0;
517
518
if (size > (ULLONG_MAX - start))
519
size = ULLONG_MAX - start;
520
521
end = start + size;
522
printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
523
(unsigned long long) start,
524
(unsigned long long) end);
525
if (checktype)
526
e820_print_type(old_type);
527
printk(KERN_CONT "\n");
528
529
for (i = 0; i < e820.nr_map; i++) {
530
struct e820entry *ei = &e820.map[i];
531
u64 final_start, final_end;
532
u64 ei_end;
533
534
if (checktype && ei->type != old_type)
535
continue;
536
537
ei_end = ei->addr + ei->size;
538
/* totally covered? */
539
if (ei->addr >= start && ei_end <= end) {
540
real_removed_size += ei->size;
541
memset(ei, 0, sizeof(struct e820entry));
542
continue;
543
}
544
545
/* new range is totally covered? */
546
if (ei->addr < start && ei_end > end) {
547
e820_add_region(end, ei_end - end, ei->type);
548
ei->size = start - ei->addr;
549
real_removed_size += size;
550
continue;
551
}
552
553
/* partially covered */
554
final_start = max(start, ei->addr);
555
final_end = min(end, ei_end);
556
if (final_start >= final_end)
557
continue;
558
real_removed_size += final_end - final_start;
559
560
/*
561
* left range could be head or tail, so need to update
562
* size at first.
563
*/
564
ei->size -= final_end - final_start;
565
if (ei->addr < final_start)
566
continue;
567
ei->addr = final_end;
568
}
569
return real_removed_size;
570
}
571
572
void __init update_e820(void)
573
{
574
u32 nr_map;
575
576
nr_map = e820.nr_map;
577
if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
578
return;
579
e820.nr_map = nr_map;
580
printk(KERN_INFO "modified physical RAM map:\n");
581
e820_print_map("modified");
582
}
583
static void __init update_e820_saved(void)
584
{
585
u32 nr_map;
586
587
nr_map = e820_saved.nr_map;
588
if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
589
return;
590
e820_saved.nr_map = nr_map;
591
}
592
#define MAX_GAP_END 0x100000000ull
593
/*
594
* Search for a gap in the e820 memory space from start_addr to end_addr.
595
*/
596
__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
597
unsigned long start_addr, unsigned long long end_addr)
598
{
599
unsigned long long last;
600
int i = e820.nr_map;
601
int found = 0;
602
603
last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
604
605
while (--i >= 0) {
606
unsigned long long start = e820.map[i].addr;
607
unsigned long long end = start + e820.map[i].size;
608
609
if (end < start_addr)
610
continue;
611
612
/*
613
* Since "last" is at most 4GB, we know we'll
614
* fit in 32 bits if this condition is true
615
*/
616
if (last > end) {
617
unsigned long gap = last - end;
618
619
if (gap >= *gapsize) {
620
*gapsize = gap;
621
*gapstart = end;
622
found = 1;
623
}
624
}
625
if (start < last)
626
last = start;
627
}
628
return found;
629
}
630
631
/*
632
* Search for the biggest gap in the low 32 bits of the e820
633
* memory space. We pass this space to PCI to assign MMIO resources
634
* for hotplug or unconfigured devices in.
635
* Hopefully the BIOS let enough space left.
636
*/
637
__init void e820_setup_gap(void)
638
{
639
unsigned long gapstart, gapsize;
640
int found;
641
642
gapstart = 0x10000000;
643
gapsize = 0x400000;
644
found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
645
646
#ifdef CONFIG_X86_64
647
if (!found) {
648
gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
649
printk(KERN_ERR
650
"PCI: Warning: Cannot find a gap in the 32bit address range\n"
651
"PCI: Unassigned devices with 32bit resource registers may break!\n");
652
}
653
#endif
654
655
/*
656
* e820_reserve_resources_late protect stolen RAM already
657
*/
658
pci_mem_start = gapstart;
659
660
printk(KERN_INFO
661
"Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
662
pci_mem_start, gapstart, gapsize);
663
}
664
665
/**
666
* Because of the size limitation of struct boot_params, only first
667
* 128 E820 memory entries are passed to kernel via
668
* boot_params.e820_map, others are passed via SETUP_E820_EXT node of
669
* linked list of struct setup_data, which is parsed here.
670
*/
671
void __init parse_e820_ext(struct setup_data *sdata)
672
{
673
int entries;
674
struct e820entry *extmap;
675
676
entries = sdata->len / sizeof(struct e820entry);
677
extmap = (struct e820entry *)(sdata->data);
678
__append_e820_map(extmap, entries);
679
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
680
printk(KERN_INFO "extended physical RAM map:\n");
681
e820_print_map("extended");
682
}
683
684
#if defined(CONFIG_X86_64) || \
685
(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
686
/**
687
* Find the ranges of physical addresses that do not correspond to
688
* e820 RAM areas and mark the corresponding pages as nosave for
689
* hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
690
*
691
* This function requires the e820 map to be sorted and without any
692
* overlapping entries and assumes the first e820 area to be RAM.
693
*/
694
void __init e820_mark_nosave_regions(unsigned long limit_pfn)
695
{
696
int i;
697
unsigned long pfn;
698
699
pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
700
for (i = 1; i < e820.nr_map; i++) {
701
struct e820entry *ei = &e820.map[i];
702
703
if (pfn < PFN_UP(ei->addr))
704
register_nosave_region(pfn, PFN_UP(ei->addr));
705
706
pfn = PFN_DOWN(ei->addr + ei->size);
707
if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
708
register_nosave_region(PFN_UP(ei->addr), pfn);
709
710
if (pfn >= limit_pfn)
711
break;
712
}
713
}
714
#endif
715
716
#ifdef CONFIG_HIBERNATION
717
/**
718
* Mark ACPI NVS memory region, so that we can save/restore it during
719
* hibernation and the subsequent resume.
720
*/
721
static int __init e820_mark_nvs_memory(void)
722
{
723
int i;
724
725
for (i = 0; i < e820.nr_map; i++) {
726
struct e820entry *ei = &e820.map[i];
727
728
if (ei->type == E820_NVS)
729
suspend_nvs_register(ei->addr, ei->size);
730
}
731
732
return 0;
733
}
734
core_initcall(e820_mark_nvs_memory);
735
#endif
736
737
/*
738
* pre allocated 4k and reserved it in memblock and e820_saved
739
*/
740
u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
741
{
742
u64 size = 0;
743
u64 addr;
744
u64 start;
745
746
for (start = startt; ; start += size) {
747
start = memblock_x86_find_in_range_size(start, &size, align);
748
if (start == MEMBLOCK_ERROR)
749
return 0;
750
if (size >= sizet)
751
break;
752
}
753
754
#ifdef CONFIG_X86_32
755
if (start >= MAXMEM)
756
return 0;
757
if (start + size > MAXMEM)
758
size = MAXMEM - start;
759
#endif
760
761
addr = round_down(start + size - sizet, align);
762
if (addr < start)
763
return 0;
764
memblock_x86_reserve_range(addr, addr + sizet, "new next");
765
e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
766
printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
767
update_e820_saved();
768
769
return addr;
770
}
771
772
#ifdef CONFIG_X86_32
773
# ifdef CONFIG_X86_PAE
774
# define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT))
775
# else
776
# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
777
# endif
778
#else /* CONFIG_X86_32 */
779
# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
780
#endif
781
782
/*
783
* Find the highest page frame number we have available
784
*/
785
static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
786
{
787
int i;
788
unsigned long last_pfn = 0;
789
unsigned long max_arch_pfn = MAX_ARCH_PFN;
790
791
for (i = 0; i < e820.nr_map; i++) {
792
struct e820entry *ei = &e820.map[i];
793
unsigned long start_pfn;
794
unsigned long end_pfn;
795
796
if (ei->type != type)
797
continue;
798
799
start_pfn = ei->addr >> PAGE_SHIFT;
800
end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
801
802
if (start_pfn >= limit_pfn)
803
continue;
804
if (end_pfn > limit_pfn) {
805
last_pfn = limit_pfn;
806
break;
807
}
808
if (end_pfn > last_pfn)
809
last_pfn = end_pfn;
810
}
811
812
if (last_pfn > max_arch_pfn)
813
last_pfn = max_arch_pfn;
814
815
printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
816
last_pfn, max_arch_pfn);
817
return last_pfn;
818
}
819
unsigned long __init e820_end_of_ram_pfn(void)
820
{
821
return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
822
}
823
824
unsigned long __init e820_end_of_low_ram_pfn(void)
825
{
826
return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
827
}
828
829
static void early_panic(char *msg)
830
{
831
early_printk(msg);
832
panic(msg);
833
}
834
835
static int userdef __initdata;
836
837
/* "mem=nopentium" disables the 4MB page tables. */
838
static int __init parse_memopt(char *p)
839
{
840
u64 mem_size;
841
842
if (!p)
843
return -EINVAL;
844
845
if (!strcmp(p, "nopentium")) {
846
#ifdef CONFIG_X86_32
847
setup_clear_cpu_cap(X86_FEATURE_PSE);
848
return 0;
849
#else
850
printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
851
return -EINVAL;
852
#endif
853
}
854
855
userdef = 1;
856
mem_size = memparse(p, &p);
857
/* don't remove all of memory when handling "mem={invalid}" param */
858
if (mem_size == 0)
859
return -EINVAL;
860
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
861
862
return 0;
863
}
864
early_param("mem", parse_memopt);
865
866
static int __init parse_memmap_opt(char *p)
867
{
868
char *oldp;
869
u64 start_at, mem_size;
870
871
if (!p)
872
return -EINVAL;
873
874
if (!strncmp(p, "exactmap", 8)) {
875
#ifdef CONFIG_CRASH_DUMP
876
/*
877
* If we are doing a crash dump, we still need to know
878
* the real mem size before original memory map is
879
* reset.
880
*/
881
saved_max_pfn = e820_end_of_ram_pfn();
882
#endif
883
e820.nr_map = 0;
884
userdef = 1;
885
return 0;
886
}
887
888
oldp = p;
889
mem_size = memparse(p, &p);
890
if (p == oldp)
891
return -EINVAL;
892
893
userdef = 1;
894
if (*p == '@') {
895
start_at = memparse(p+1, &p);
896
e820_add_region(start_at, mem_size, E820_RAM);
897
} else if (*p == '#') {
898
start_at = memparse(p+1, &p);
899
e820_add_region(start_at, mem_size, E820_ACPI);
900
} else if (*p == '$') {
901
start_at = memparse(p+1, &p);
902
e820_add_region(start_at, mem_size, E820_RESERVED);
903
} else
904
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
905
906
return *p == '\0' ? 0 : -EINVAL;
907
}
908
early_param("memmap", parse_memmap_opt);
909
910
void __init finish_e820_parsing(void)
911
{
912
if (userdef) {
913
u32 nr = e820.nr_map;
914
915
if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
916
early_panic("Invalid user supplied memory map");
917
e820.nr_map = nr;
918
919
printk(KERN_INFO "user-defined physical RAM map:\n");
920
e820_print_map("user");
921
}
922
}
923
924
static inline const char *e820_type_to_string(int e820_type)
925
{
926
switch (e820_type) {
927
case E820_RESERVED_KERN:
928
case E820_RAM: return "System RAM";
929
case E820_ACPI: return "ACPI Tables";
930
case E820_NVS: return "ACPI Non-volatile Storage";
931
case E820_UNUSABLE: return "Unusable memory";
932
default: return "reserved";
933
}
934
}
935
936
/*
937
* Mark e820 reserved areas as busy for the resource manager.
938
*/
939
static struct resource __initdata *e820_res;
940
void __init e820_reserve_resources(void)
941
{
942
int i;
943
struct resource *res;
944
u64 end;
945
946
res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
947
e820_res = res;
948
for (i = 0; i < e820.nr_map; i++) {
949
end = e820.map[i].addr + e820.map[i].size - 1;
950
if (end != (resource_size_t)end) {
951
res++;
952
continue;
953
}
954
res->name = e820_type_to_string(e820.map[i].type);
955
res->start = e820.map[i].addr;
956
res->end = end;
957
958
res->flags = IORESOURCE_MEM;
959
960
/*
961
* don't register the region that could be conflicted with
962
* pci device BAR resource and insert them later in
963
* pcibios_resource_survey()
964
*/
965
if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
966
res->flags |= IORESOURCE_BUSY;
967
insert_resource(&iomem_resource, res);
968
}
969
res++;
970
}
971
972
for (i = 0; i < e820_saved.nr_map; i++) {
973
struct e820entry *entry = &e820_saved.map[i];
974
firmware_map_add_early(entry->addr,
975
entry->addr + entry->size - 1,
976
e820_type_to_string(entry->type));
977
}
978
}
979
980
/* How much should we pad RAM ending depending on where it is? */
981
static unsigned long ram_alignment(resource_size_t pos)
982
{
983
unsigned long mb = pos >> 20;
984
985
/* To 64kB in the first megabyte */
986
if (!mb)
987
return 64*1024;
988
989
/* To 1MB in the first 16MB */
990
if (mb < 16)
991
return 1024*1024;
992
993
/* To 64MB for anything above that */
994
return 64*1024*1024;
995
}
996
997
#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
998
999
void __init e820_reserve_resources_late(void)
1000
{
1001
int i;
1002
struct resource *res;
1003
1004
res = e820_res;
1005
for (i = 0; i < e820.nr_map; i++) {
1006
if (!res->parent && res->end)
1007
insert_resource_expand_to_fit(&iomem_resource, res);
1008
res++;
1009
}
1010
1011
/*
1012
* Try to bump up RAM regions to reasonable boundaries to
1013
* avoid stolen RAM:
1014
*/
1015
for (i = 0; i < e820.nr_map; i++) {
1016
struct e820entry *entry = &e820.map[i];
1017
u64 start, end;
1018
1019
if (entry->type != E820_RAM)
1020
continue;
1021
start = entry->addr + entry->size;
1022
end = round_up(start, ram_alignment(start)) - 1;
1023
if (end > MAX_RESOURCE_SIZE)
1024
end = MAX_RESOURCE_SIZE;
1025
if (start >= end)
1026
continue;
1027
printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
1028
start, end);
1029
reserve_region_with_split(&iomem_resource, start, end,
1030
"RAM buffer");
1031
}
1032
}
1033
1034
char *__init default_machine_specific_memory_setup(void)
1035
{
1036
char *who = "BIOS-e820";
1037
u32 new_nr;
1038
/*
1039
* Try to copy the BIOS-supplied E820-map.
1040
*
1041
* Otherwise fake a memory map; one section from 0k->640k,
1042
* the next section from 1mb->appropriate_mem_k
1043
*/
1044
new_nr = boot_params.e820_entries;
1045
sanitize_e820_map(boot_params.e820_map,
1046
ARRAY_SIZE(boot_params.e820_map),
1047
&new_nr);
1048
boot_params.e820_entries = new_nr;
1049
if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
1050
< 0) {
1051
u64 mem_size;
1052
1053
/* compare results from other methods and take the greater */
1054
if (boot_params.alt_mem_k
1055
< boot_params.screen_info.ext_mem_k) {
1056
mem_size = boot_params.screen_info.ext_mem_k;
1057
who = "BIOS-88";
1058
} else {
1059
mem_size = boot_params.alt_mem_k;
1060
who = "BIOS-e801";
1061
}
1062
1063
e820.nr_map = 0;
1064
e820_add_region(0, LOWMEMSIZE(), E820_RAM);
1065
e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
1066
}
1067
1068
/* In case someone cares... */
1069
return who;
1070
}
1071
1072
void __init setup_memory_map(void)
1073
{
1074
char *who;
1075
1076
who = x86_init.resources.memory_setup();
1077
memcpy(&e820_saved, &e820, sizeof(struct e820map));
1078
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1079
e820_print_map(who);
1080
}
1081
1082
void __init memblock_x86_fill(void)
1083
{
1084
int i;
1085
u64 end;
1086
1087
/*
1088
* EFI may have more than 128 entries
1089
* We are safe to enable resizing, beause memblock_x86_fill()
1090
* is rather later for x86
1091
*/
1092
memblock_can_resize = 1;
1093
1094
for (i = 0; i < e820.nr_map; i++) {
1095
struct e820entry *ei = &e820.map[i];
1096
1097
end = ei->addr + ei->size;
1098
if (end != (resource_size_t)end)
1099
continue;
1100
1101
if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
1102
continue;
1103
1104
memblock_add(ei->addr, ei->size);
1105
}
1106
1107
memblock_analyze();
1108
memblock_dump_all();
1109
}
1110
1111
void __init memblock_find_dma_reserve(void)
1112
{
1113
#ifdef CONFIG_X86_64
1114
u64 free_size_pfn;
1115
u64 mem_size_pfn;
1116
/*
1117
* need to find out used area below MAX_DMA_PFN
1118
* need to use memblock to get free size in [0, MAX_DMA_PFN]
1119
* at first, and assume boot_mem will not take below MAX_DMA_PFN
1120
*/
1121
mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1122
free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
1123
set_dma_reserve(mem_size_pfn - free_size_pfn);
1124
#endif
1125
}
1126
1127