Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/mm/init_32.c
10817 views
1
/*
2
*
3
* Copyright (C) 1995 Linus Torvalds
4
*
5
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
6
*/
7
8
#include <linux/module.h>
9
#include <linux/signal.h>
10
#include <linux/sched.h>
11
#include <linux/kernel.h>
12
#include <linux/errno.h>
13
#include <linux/string.h>
14
#include <linux/types.h>
15
#include <linux/ptrace.h>
16
#include <linux/mman.h>
17
#include <linux/mm.h>
18
#include <linux/hugetlb.h>
19
#include <linux/swap.h>
20
#include <linux/smp.h>
21
#include <linux/init.h>
22
#include <linux/highmem.h>
23
#include <linux/pagemap.h>
24
#include <linux/pci.h>
25
#include <linux/pfn.h>
26
#include <linux/poison.h>
27
#include <linux/bootmem.h>
28
#include <linux/memblock.h>
29
#include <linux/proc_fs.h>
30
#include <linux/memory_hotplug.h>
31
#include <linux/initrd.h>
32
#include <linux/cpumask.h>
33
#include <linux/gfp.h>
34
35
#include <asm/asm.h>
36
#include <asm/bios_ebda.h>
37
#include <asm/processor.h>
38
#include <asm/system.h>
39
#include <asm/uaccess.h>
40
#include <asm/pgtable.h>
41
#include <asm/dma.h>
42
#include <asm/fixmap.h>
43
#include <asm/e820.h>
44
#include <asm/apic.h>
45
#include <asm/bugs.h>
46
#include <asm/tlb.h>
47
#include <asm/tlbflush.h>
48
#include <asm/olpc_ofw.h>
49
#include <asm/pgalloc.h>
50
#include <asm/sections.h>
51
#include <asm/paravirt.h>
52
#include <asm/setup.h>
53
#include <asm/cacheflush.h>
54
#include <asm/page_types.h>
55
#include <asm/init.h>
56
57
unsigned long highstart_pfn, highend_pfn;
58
59
static noinline int do_test_wp_bit(void);
60
61
bool __read_mostly __vmalloc_start_set = false;
62
63
static __init void *alloc_low_page(void)
64
{
65
unsigned long pfn = pgt_buf_end++;
66
void *adr;
67
68
if (pfn >= pgt_buf_top)
69
panic("alloc_low_page: ran out of memory");
70
71
adr = __va(pfn * PAGE_SIZE);
72
clear_page(adr);
73
return adr;
74
}
75
76
/*
77
* Creates a middle page table and puts a pointer to it in the
78
* given global directory entry. This only returns the gd entry
79
* in non-PAE compilation mode, since the middle layer is folded.
80
*/
81
static pmd_t * __init one_md_table_init(pgd_t *pgd)
82
{
83
pud_t *pud;
84
pmd_t *pmd_table;
85
86
#ifdef CONFIG_X86_PAE
87
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
88
if (after_bootmem)
89
pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
90
else
91
pmd_table = (pmd_t *)alloc_low_page();
92
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
93
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
94
pud = pud_offset(pgd, 0);
95
BUG_ON(pmd_table != pmd_offset(pud, 0));
96
97
return pmd_table;
98
}
99
#endif
100
pud = pud_offset(pgd, 0);
101
pmd_table = pmd_offset(pud, 0);
102
103
return pmd_table;
104
}
105
106
/*
107
* Create a page table and place a pointer to it in a middle page
108
* directory entry:
109
*/
110
static pte_t * __init one_page_table_init(pmd_t *pmd)
111
{
112
if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
113
pte_t *page_table = NULL;
114
115
if (after_bootmem) {
116
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
117
page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
118
#endif
119
if (!page_table)
120
page_table =
121
(pte_t *)alloc_bootmem_pages(PAGE_SIZE);
122
} else
123
page_table = (pte_t *)alloc_low_page();
124
125
paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
126
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
127
BUG_ON(page_table != pte_offset_kernel(pmd, 0));
128
}
129
130
return pte_offset_kernel(pmd, 0);
131
}
132
133
pmd_t * __init populate_extra_pmd(unsigned long vaddr)
134
{
135
int pgd_idx = pgd_index(vaddr);
136
int pmd_idx = pmd_index(vaddr);
137
138
return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx;
139
}
140
141
pte_t * __init populate_extra_pte(unsigned long vaddr)
142
{
143
int pte_idx = pte_index(vaddr);
144
pmd_t *pmd;
145
146
pmd = populate_extra_pmd(vaddr);
147
return one_page_table_init(pmd) + pte_idx;
148
}
149
150
static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
151
unsigned long vaddr, pte_t *lastpte)
152
{
153
#ifdef CONFIG_HIGHMEM
154
/*
155
* Something (early fixmap) may already have put a pte
156
* page here, which causes the page table allocation
157
* to become nonlinear. Attempt to fix it, and if it
158
* is still nonlinear then we have to bug.
159
*/
160
int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
161
int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
162
163
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
164
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
165
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
166
&& ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
167
|| (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
168
pte_t *newpte;
169
int i;
170
171
BUG_ON(after_bootmem);
172
newpte = alloc_low_page();
173
for (i = 0; i < PTRS_PER_PTE; i++)
174
set_pte(newpte + i, pte[i]);
175
176
paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
177
set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
178
BUG_ON(newpte != pte_offset_kernel(pmd, 0));
179
__flush_tlb_all();
180
181
paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
182
pte = newpte;
183
}
184
BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
185
&& vaddr > fix_to_virt(FIX_KMAP_END)
186
&& lastpte && lastpte + PTRS_PER_PTE != pte);
187
#endif
188
return pte;
189
}
190
191
/*
192
* This function initializes a certain range of kernel virtual memory
193
* with new bootmem page tables, everywhere page tables are missing in
194
* the given range.
195
*
196
* NOTE: The pagetables are allocated contiguous on the physical space
197
* so we can cache the place of the first one and move around without
198
* checking the pgd every time.
199
*/
200
static void __init
201
page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
202
{
203
int pgd_idx, pmd_idx;
204
unsigned long vaddr;
205
pgd_t *pgd;
206
pmd_t *pmd;
207
pte_t *pte = NULL;
208
209
vaddr = start;
210
pgd_idx = pgd_index(vaddr);
211
pmd_idx = pmd_index(vaddr);
212
pgd = pgd_base + pgd_idx;
213
214
for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
215
pmd = one_md_table_init(pgd);
216
pmd = pmd + pmd_index(vaddr);
217
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
218
pmd++, pmd_idx++) {
219
pte = page_table_kmap_check(one_page_table_init(pmd),
220
pmd, vaddr, pte);
221
222
vaddr += PMD_SIZE;
223
}
224
pmd_idx = 0;
225
}
226
}
227
228
static inline int is_kernel_text(unsigned long addr)
229
{
230
if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
231
return 1;
232
return 0;
233
}
234
235
/*
236
* This maps the physical memory to kernel virtual address space, a total
237
* of max_low_pfn pages, by creating page tables starting from address
238
* PAGE_OFFSET:
239
*/
240
unsigned long __init
241
kernel_physical_mapping_init(unsigned long start,
242
unsigned long end,
243
unsigned long page_size_mask)
244
{
245
int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
246
unsigned long last_map_addr = end;
247
unsigned long start_pfn, end_pfn;
248
pgd_t *pgd_base = swapper_pg_dir;
249
int pgd_idx, pmd_idx, pte_ofs;
250
unsigned long pfn;
251
pgd_t *pgd;
252
pmd_t *pmd;
253
pte_t *pte;
254
unsigned pages_2m, pages_4k;
255
int mapping_iter;
256
257
start_pfn = start >> PAGE_SHIFT;
258
end_pfn = end >> PAGE_SHIFT;
259
260
/*
261
* First iteration will setup identity mapping using large/small pages
262
* based on use_pse, with other attributes same as set by
263
* the early code in head_32.S
264
*
265
* Second iteration will setup the appropriate attributes (NX, GLOBAL..)
266
* as desired for the kernel identity mapping.
267
*
268
* This two pass mechanism conforms to the TLB app note which says:
269
*
270
* "Software should not write to a paging-structure entry in a way
271
* that would change, for any linear address, both the page size
272
* and either the page frame or attributes."
273
*/
274
mapping_iter = 1;
275
276
if (!cpu_has_pse)
277
use_pse = 0;
278
279
repeat:
280
pages_2m = pages_4k = 0;
281
pfn = start_pfn;
282
pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
283
pgd = pgd_base + pgd_idx;
284
for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
285
pmd = one_md_table_init(pgd);
286
287
if (pfn >= end_pfn)
288
continue;
289
#ifdef CONFIG_X86_PAE
290
pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
291
pmd += pmd_idx;
292
#else
293
pmd_idx = 0;
294
#endif
295
for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
296
pmd++, pmd_idx++) {
297
unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
298
299
/*
300
* Map with big pages if possible, otherwise
301
* create normal page tables:
302
*/
303
if (use_pse) {
304
unsigned int addr2;
305
pgprot_t prot = PAGE_KERNEL_LARGE;
306
/*
307
* first pass will use the same initial
308
* identity mapping attribute + _PAGE_PSE.
309
*/
310
pgprot_t init_prot =
311
__pgprot(PTE_IDENT_ATTR |
312
_PAGE_PSE);
313
314
addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
315
PAGE_OFFSET + PAGE_SIZE-1;
316
317
if (is_kernel_text(addr) ||
318
is_kernel_text(addr2))
319
prot = PAGE_KERNEL_LARGE_EXEC;
320
321
pages_2m++;
322
if (mapping_iter == 1)
323
set_pmd(pmd, pfn_pmd(pfn, init_prot));
324
else
325
set_pmd(pmd, pfn_pmd(pfn, prot));
326
327
pfn += PTRS_PER_PTE;
328
continue;
329
}
330
pte = one_page_table_init(pmd);
331
332
pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
333
pte += pte_ofs;
334
for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
335
pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
336
pgprot_t prot = PAGE_KERNEL;
337
/*
338
* first pass will use the same initial
339
* identity mapping attribute.
340
*/
341
pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
342
343
if (is_kernel_text(addr))
344
prot = PAGE_KERNEL_EXEC;
345
346
pages_4k++;
347
if (mapping_iter == 1) {
348
set_pte(pte, pfn_pte(pfn, init_prot));
349
last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
350
} else
351
set_pte(pte, pfn_pte(pfn, prot));
352
}
353
}
354
}
355
if (mapping_iter == 1) {
356
/*
357
* update direct mapping page count only in the first
358
* iteration.
359
*/
360
update_page_count(PG_LEVEL_2M, pages_2m);
361
update_page_count(PG_LEVEL_4K, pages_4k);
362
363
/*
364
* local global flush tlb, which will flush the previous
365
* mappings present in both small and large page TLB's.
366
*/
367
__flush_tlb_all();
368
369
/*
370
* Second iteration will set the actual desired PTE attributes.
371
*/
372
mapping_iter = 2;
373
goto repeat;
374
}
375
return last_map_addr;
376
}
377
378
pte_t *kmap_pte;
379
pgprot_t kmap_prot;
380
381
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
382
{
383
return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
384
vaddr), vaddr), vaddr);
385
}
386
387
static void __init kmap_init(void)
388
{
389
unsigned long kmap_vstart;
390
391
/*
392
* Cache the first kmap pte:
393
*/
394
kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
395
kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
396
397
kmap_prot = PAGE_KERNEL;
398
}
399
400
#ifdef CONFIG_HIGHMEM
401
static void __init permanent_kmaps_init(pgd_t *pgd_base)
402
{
403
unsigned long vaddr;
404
pgd_t *pgd;
405
pud_t *pud;
406
pmd_t *pmd;
407
pte_t *pte;
408
409
vaddr = PKMAP_BASE;
410
page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
411
412
pgd = swapper_pg_dir + pgd_index(vaddr);
413
pud = pud_offset(pgd, vaddr);
414
pmd = pmd_offset(pud, vaddr);
415
pte = pte_offset_kernel(pmd, vaddr);
416
pkmap_page_table = pte;
417
}
418
419
static void __init add_one_highpage_init(struct page *page)
420
{
421
ClearPageReserved(page);
422
init_page_count(page);
423
__free_page(page);
424
totalhigh_pages++;
425
}
426
427
void __init add_highpages_with_active_regions(int nid,
428
unsigned long start_pfn, unsigned long end_pfn)
429
{
430
struct range *range;
431
int nr_range;
432
int i;
433
434
nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn);
435
436
for (i = 0; i < nr_range; i++) {
437
struct page *page;
438
int node_pfn;
439
440
for (node_pfn = range[i].start; node_pfn < range[i].end;
441
node_pfn++) {
442
if (!pfn_valid(node_pfn))
443
continue;
444
page = pfn_to_page(node_pfn);
445
add_one_highpage_init(page);
446
}
447
}
448
}
449
#else
450
static inline void permanent_kmaps_init(pgd_t *pgd_base)
451
{
452
}
453
#endif /* CONFIG_HIGHMEM */
454
455
void __init native_pagetable_setup_start(pgd_t *base)
456
{
457
unsigned long pfn, va;
458
pgd_t *pgd;
459
pud_t *pud;
460
pmd_t *pmd;
461
pte_t *pte;
462
463
/*
464
* Remove any mappings which extend past the end of physical
465
* memory from the boot time page table:
466
*/
467
for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
468
va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
469
pgd = base + pgd_index(va);
470
if (!pgd_present(*pgd))
471
break;
472
473
pud = pud_offset(pgd, va);
474
pmd = pmd_offset(pud, va);
475
if (!pmd_present(*pmd))
476
break;
477
478
pte = pte_offset_kernel(pmd, va);
479
if (!pte_present(*pte))
480
break;
481
482
pte_clear(NULL, va, pte);
483
}
484
paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
485
}
486
487
void __init native_pagetable_setup_done(pgd_t *base)
488
{
489
}
490
491
/*
492
* Build a proper pagetable for the kernel mappings. Up until this
493
* point, we've been running on some set of pagetables constructed by
494
* the boot process.
495
*
496
* If we're booting on native hardware, this will be a pagetable
497
* constructed in arch/x86/kernel/head_32.S. The root of the
498
* pagetable will be swapper_pg_dir.
499
*
500
* If we're booting paravirtualized under a hypervisor, then there are
501
* more options: we may already be running PAE, and the pagetable may
502
* or may not be based in swapper_pg_dir. In any case,
503
* paravirt_pagetable_setup_start() will set up swapper_pg_dir
504
* appropriately for the rest of the initialization to work.
505
*
506
* In general, pagetable_init() assumes that the pagetable may already
507
* be partially populated, and so it avoids stomping on any existing
508
* mappings.
509
*/
510
void __init early_ioremap_page_table_range_init(void)
511
{
512
pgd_t *pgd_base = swapper_pg_dir;
513
unsigned long vaddr, end;
514
515
/*
516
* Fixed mappings, only the page table structure has to be
517
* created - mappings will be set by set_fixmap():
518
*/
519
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
520
end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
521
page_table_range_init(vaddr, end, pgd_base);
522
early_ioremap_reset();
523
}
524
525
static void __init pagetable_init(void)
526
{
527
pgd_t *pgd_base = swapper_pg_dir;
528
529
permanent_kmaps_init(pgd_base);
530
}
531
532
pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
533
EXPORT_SYMBOL_GPL(__supported_pte_mask);
534
535
/* user-defined highmem size */
536
static unsigned int highmem_pages = -1;
537
538
/*
539
* highmem=size forces highmem to be exactly 'size' bytes.
540
* This works even on boxes that have no highmem otherwise.
541
* This also works to reduce highmem size on bigger boxes.
542
*/
543
static int __init parse_highmem(char *arg)
544
{
545
if (!arg)
546
return -EINVAL;
547
548
highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
549
return 0;
550
}
551
early_param("highmem", parse_highmem);
552
553
#define MSG_HIGHMEM_TOO_BIG \
554
"highmem size (%luMB) is bigger than pages available (%luMB)!\n"
555
556
#define MSG_LOWMEM_TOO_SMALL \
557
"highmem size (%luMB) results in <64MB lowmem, ignoring it!\n"
558
/*
559
* All of RAM fits into lowmem - but if user wants highmem
560
* artificially via the highmem=x boot parameter then create
561
* it:
562
*/
563
void __init lowmem_pfn_init(void)
564
{
565
/* max_low_pfn is 0, we already have early_res support */
566
max_low_pfn = max_pfn;
567
568
if (highmem_pages == -1)
569
highmem_pages = 0;
570
#ifdef CONFIG_HIGHMEM
571
if (highmem_pages >= max_pfn) {
572
printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
573
pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
574
highmem_pages = 0;
575
}
576
if (highmem_pages) {
577
if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {
578
printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
579
pages_to_mb(highmem_pages));
580
highmem_pages = 0;
581
}
582
max_low_pfn -= highmem_pages;
583
}
584
#else
585
if (highmem_pages)
586
printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
587
#endif
588
}
589
590
#define MSG_HIGHMEM_TOO_SMALL \
591
"only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
592
593
#define MSG_HIGHMEM_TRIMMED \
594
"Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
595
/*
596
* We have more RAM than fits into lowmem - we try to put it into
597
* highmem, also taking the highmem=x boot parameter into account:
598
*/
599
void __init highmem_pfn_init(void)
600
{
601
max_low_pfn = MAXMEM_PFN;
602
603
if (highmem_pages == -1)
604
highmem_pages = max_pfn - MAXMEM_PFN;
605
606
if (highmem_pages + MAXMEM_PFN < max_pfn)
607
max_pfn = MAXMEM_PFN + highmem_pages;
608
609
if (highmem_pages + MAXMEM_PFN > max_pfn) {
610
printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
611
pages_to_mb(max_pfn - MAXMEM_PFN),
612
pages_to_mb(highmem_pages));
613
highmem_pages = 0;
614
}
615
#ifndef CONFIG_HIGHMEM
616
/* Maximum memory usable is what is directly addressable */
617
printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
618
if (max_pfn > MAX_NONPAE_PFN)
619
printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
620
else
621
printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
622
max_pfn = MAXMEM_PFN;
623
#else /* !CONFIG_HIGHMEM */
624
#ifndef CONFIG_HIGHMEM64G
625
if (max_pfn > MAX_NONPAE_PFN) {
626
max_pfn = MAX_NONPAE_PFN;
627
printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
628
}
629
#endif /* !CONFIG_HIGHMEM64G */
630
#endif /* !CONFIG_HIGHMEM */
631
}
632
633
/*
634
* Determine low and high memory ranges:
635
*/
636
void __init find_low_pfn_range(void)
637
{
638
/* it could update max_pfn */
639
640
if (max_pfn <= MAXMEM_PFN)
641
lowmem_pfn_init();
642
else
643
highmem_pfn_init();
644
}
645
646
#ifndef CONFIG_NEED_MULTIPLE_NODES
647
void __init initmem_init(void)
648
{
649
#ifdef CONFIG_HIGHMEM
650
highstart_pfn = highend_pfn = max_pfn;
651
if (max_pfn > max_low_pfn)
652
highstart_pfn = max_low_pfn;
653
memblock_x86_register_active_regions(0, 0, highend_pfn);
654
sparse_memory_present_with_active_regions(0);
655
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
656
pages_to_mb(highend_pfn - highstart_pfn));
657
num_physpages = highend_pfn;
658
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
659
#else
660
memblock_x86_register_active_regions(0, 0, max_low_pfn);
661
sparse_memory_present_with_active_regions(0);
662
num_physpages = max_low_pfn;
663
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
664
#endif
665
#ifdef CONFIG_FLATMEM
666
max_mapnr = num_physpages;
667
#endif
668
__vmalloc_start_set = true;
669
670
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
671
pages_to_mb(max_low_pfn));
672
673
setup_bootmem_allocator();
674
}
675
#endif /* !CONFIG_NEED_MULTIPLE_NODES */
676
677
static void __init zone_sizes_init(void)
678
{
679
unsigned long max_zone_pfns[MAX_NR_ZONES];
680
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
681
#ifdef CONFIG_ZONE_DMA
682
max_zone_pfns[ZONE_DMA] =
683
virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
684
#endif
685
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
686
#ifdef CONFIG_HIGHMEM
687
max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
688
#endif
689
690
free_area_init_nodes(max_zone_pfns);
691
}
692
693
void __init setup_bootmem_allocator(void)
694
{
695
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
696
max_pfn_mapped<<PAGE_SHIFT);
697
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
698
699
after_bootmem = 1;
700
}
701
702
/*
703
* paging_init() sets up the page tables - note that the first 8MB are
704
* already mapped by head.S.
705
*
706
* This routines also unmaps the page at virtual kernel address 0, so
707
* that we can trap those pesky NULL-reference errors in the kernel.
708
*/
709
void __init paging_init(void)
710
{
711
pagetable_init();
712
713
__flush_tlb_all();
714
715
kmap_init();
716
717
/*
718
* NOTE: at this point the bootmem allocator is fully available.
719
*/
720
olpc_dt_build_devicetree();
721
sparse_memory_present_with_active_regions(MAX_NUMNODES);
722
sparse_init();
723
zone_sizes_init();
724
}
725
726
/*
727
* Test if the WP bit works in supervisor mode. It isn't supported on 386's
728
* and also on some strange 486's. All 586+'s are OK. This used to involve
729
* black magic jumps to work around some nasty CPU bugs, but fortunately the
730
* switch to using exceptions got rid of all that.
731
*/
732
static void __init test_wp_bit(void)
733
{
734
printk(KERN_INFO
735
"Checking if this processor honours the WP bit even in supervisor mode...");
736
737
/* Any page-aligned address will do, the test is non-destructive */
738
__set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY);
739
boot_cpu_data.wp_works_ok = do_test_wp_bit();
740
clear_fixmap(FIX_WP_TEST);
741
742
if (!boot_cpu_data.wp_works_ok) {
743
printk(KERN_CONT "No.\n");
744
#ifdef CONFIG_X86_WP_WORKS_OK
745
panic(
746
"This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
747
#endif
748
} else {
749
printk(KERN_CONT "Ok.\n");
750
}
751
}
752
753
void __init mem_init(void)
754
{
755
int codesize, reservedpages, datasize, initsize;
756
int tmp;
757
758
pci_iommu_alloc();
759
760
#ifdef CONFIG_FLATMEM
761
BUG_ON(!mem_map);
762
#endif
763
/* this will put all low memory onto the freelists */
764
totalram_pages += free_all_bootmem();
765
766
reservedpages = 0;
767
for (tmp = 0; tmp < max_low_pfn; tmp++)
768
/*
769
* Only count reserved RAM pages:
770
*/
771
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
772
reservedpages++;
773
774
set_highmem_pages_init();
775
776
codesize = (unsigned long) &_etext - (unsigned long) &_text;
777
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
778
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
779
780
printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
781
"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
782
nr_free_pages() << (PAGE_SHIFT-10),
783
num_physpages << (PAGE_SHIFT-10),
784
codesize >> 10,
785
reservedpages << (PAGE_SHIFT-10),
786
datasize >> 10,
787
initsize >> 10,
788
totalhigh_pages << (PAGE_SHIFT-10));
789
790
printk(KERN_INFO "virtual kernel memory layout:\n"
791
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
792
#ifdef CONFIG_HIGHMEM
793
" pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
794
#endif
795
" vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
796
" lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
797
" .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
798
" .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
799
" .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
800
FIXADDR_START, FIXADDR_TOP,
801
(FIXADDR_TOP - FIXADDR_START) >> 10,
802
803
#ifdef CONFIG_HIGHMEM
804
PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
805
(LAST_PKMAP*PAGE_SIZE) >> 10,
806
#endif
807
808
VMALLOC_START, VMALLOC_END,
809
(VMALLOC_END - VMALLOC_START) >> 20,
810
811
(unsigned long)__va(0), (unsigned long)high_memory,
812
((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
813
814
(unsigned long)&__init_begin, (unsigned long)&__init_end,
815
((unsigned long)&__init_end -
816
(unsigned long)&__init_begin) >> 10,
817
818
(unsigned long)&_etext, (unsigned long)&_edata,
819
((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
820
821
(unsigned long)&_text, (unsigned long)&_etext,
822
((unsigned long)&_etext - (unsigned long)&_text) >> 10);
823
824
/*
825
* Check boundaries twice: Some fundamental inconsistencies can
826
* be detected at build time already.
827
*/
828
#define __FIXADDR_TOP (-PAGE_SIZE)
829
#ifdef CONFIG_HIGHMEM
830
BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
831
BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
832
#endif
833
#define high_memory (-128UL << 20)
834
BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
835
#undef high_memory
836
#undef __FIXADDR_TOP
837
838
#ifdef CONFIG_HIGHMEM
839
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
840
BUG_ON(VMALLOC_END > PKMAP_BASE);
841
#endif
842
BUG_ON(VMALLOC_START >= VMALLOC_END);
843
BUG_ON((unsigned long)high_memory > VMALLOC_START);
844
845
if (boot_cpu_data.wp_works_ok < 0)
846
test_wp_bit();
847
}
848
849
#ifdef CONFIG_MEMORY_HOTPLUG
850
int arch_add_memory(int nid, u64 start, u64 size)
851
{
852
struct pglist_data *pgdata = NODE_DATA(nid);
853
struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
854
unsigned long start_pfn = start >> PAGE_SHIFT;
855
unsigned long nr_pages = size >> PAGE_SHIFT;
856
857
return __add_pages(nid, zone, start_pfn, nr_pages);
858
}
859
#endif
860
861
/*
862
* This function cannot be __init, since exceptions don't work in that
863
* section. Put this after the callers, so that it cannot be inlined.
864
*/
865
static noinline int do_test_wp_bit(void)
866
{
867
char tmp_reg;
868
int flag;
869
870
__asm__ __volatile__(
871
" movb %0, %1 \n"
872
"1: movb %1, %0 \n"
873
" xorl %2, %2 \n"
874
"2: \n"
875
_ASM_EXTABLE(1b,2b)
876
:"=m" (*(char *)fix_to_virt(FIX_WP_TEST)),
877
"=q" (tmp_reg),
878
"=r" (flag)
879
:"2" (1)
880
:"memory");
881
882
return flag;
883
}
884
885
#ifdef CONFIG_DEBUG_RODATA
886
const int rodata_test_data = 0xC3;
887
EXPORT_SYMBOL_GPL(rodata_test_data);
888
889
int kernel_set_to_readonly __read_mostly;
890
891
void set_kernel_text_rw(void)
892
{
893
unsigned long start = PFN_ALIGN(_text);
894
unsigned long size = PFN_ALIGN(_etext) - start;
895
896
if (!kernel_set_to_readonly)
897
return;
898
899
pr_debug("Set kernel text: %lx - %lx for read write\n",
900
start, start+size);
901
902
set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
903
}
904
905
void set_kernel_text_ro(void)
906
{
907
unsigned long start = PFN_ALIGN(_text);
908
unsigned long size = PFN_ALIGN(_etext) - start;
909
910
if (!kernel_set_to_readonly)
911
return;
912
913
pr_debug("Set kernel text: %lx - %lx for read only\n",
914
start, start+size);
915
916
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
917
}
918
919
static void mark_nxdata_nx(void)
920
{
921
/*
922
* When this called, init has already been executed and released,
923
* so everything past _etext should be NX.
924
*/
925
unsigned long start = PFN_ALIGN(_etext);
926
/*
927
* This comes from is_kernel_text upper limit. Also HPAGE where used:
928
*/
929
unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
930
931
if (__supported_pte_mask & _PAGE_NX)
932
printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
933
set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
934
}
935
936
void mark_rodata_ro(void)
937
{
938
unsigned long start = PFN_ALIGN(_text);
939
unsigned long size = PFN_ALIGN(_etext) - start;
940
941
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
942
printk(KERN_INFO "Write protecting the kernel text: %luk\n",
943
size >> 10);
944
945
kernel_set_to_readonly = 1;
946
947
#ifdef CONFIG_CPA_DEBUG
948
printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
949
start, start+size);
950
set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
951
952
printk(KERN_INFO "Testing CPA: write protecting again\n");
953
set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
954
#endif
955
956
start += size;
957
size = (unsigned long)__end_rodata - start;
958
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
959
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
960
size >> 10);
961
rodata_test();
962
963
#ifdef CONFIG_CPA_DEBUG
964
printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size);
965
set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
966
967
printk(KERN_INFO "Testing CPA: write protecting again\n");
968
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
969
#endif
970
mark_nxdata_nx();
971
}
972
#endif
973
974
975