Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/kernel/head_64.S
10817 views
1
/*
2
* linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
3
*
4
* Copyright (C) 2000 Andrea Arcangeli <[email protected]> SuSE
5
* Copyright (C) 2000 Pavel Machek <[email protected]>
6
* Copyright (C) 2000 Karsten Keil <[email protected]>
7
* Copyright (C) 2001,2002 Andi Kleen <[email protected]>
8
* Copyright (C) 2005 Eric Biederman <[email protected]>
9
*/
10
11
12
#include <linux/linkage.h>
13
#include <linux/threads.h>
14
#include <linux/init.h>
15
#include <asm/segment.h>
16
#include <asm/pgtable.h>
17
#include <asm/page.h>
18
#include <asm/msr.h>
19
#include <asm/cache.h>
20
#include <asm/processor-flags.h>
21
#include <asm/percpu.h>
22
23
#ifdef CONFIG_PARAVIRT
24
#include <asm/asm-offsets.h>
25
#include <asm/paravirt.h>
26
#else
27
#define GET_CR2_INTO_RCX movq %cr2, %rcx
28
#endif
29
30
/* we are not able to switch in one step to the final KERNEL ADDRESS SPACE
31
* because we need identity-mapped pages.
32
*
33
*/
34
35
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
36
37
L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
38
L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
39
L4_START_KERNEL = pgd_index(__START_KERNEL_map)
40
L3_START_KERNEL = pud_index(__START_KERNEL_map)
41
42
.text
43
__HEAD
44
.code64
45
.globl startup_64
46
startup_64:
47
48
/*
49
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
50
* and someone has loaded an identity mapped page table
51
* for us. These identity mapped page tables map all of the
52
* kernel pages and possibly all of memory.
53
*
54
* %esi holds a physical pointer to real_mode_data.
55
*
56
* We come here either directly from a 64bit bootloader, or from
57
* arch/x86_64/boot/compressed/head.S.
58
*
59
* We only come here initially at boot nothing else comes here.
60
*
61
* Since we may be loaded at an address different from what we were
62
* compiled to run at we first fixup the physical addresses in our page
63
* tables and then reload them.
64
*/
65
66
/* Compute the delta between the address I am compiled to run at and the
67
* address I am actually running at.
68
*/
69
leaq _text(%rip), %rbp
70
subq $_text - __START_KERNEL_map, %rbp
71
72
/* Is the address not 2M aligned? */
73
movq %rbp, %rax
74
andl $~PMD_PAGE_MASK, %eax
75
testl %eax, %eax
76
jnz bad_address
77
78
/* Is the address too large? */
79
leaq _text(%rip), %rdx
80
movq $PGDIR_SIZE, %rax
81
cmpq %rax, %rdx
82
jae bad_address
83
84
/* Fixup the physical addresses in the page table
85
*/
86
addq %rbp, init_level4_pgt + 0(%rip)
87
addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip)
88
addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip)
89
90
addq %rbp, level3_ident_pgt + 0(%rip)
91
92
addq %rbp, level3_kernel_pgt + (510*8)(%rip)
93
addq %rbp, level3_kernel_pgt + (511*8)(%rip)
94
95
addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
96
97
/* Add an Identity mapping if I am above 1G */
98
leaq _text(%rip), %rdi
99
andq $PMD_PAGE_MASK, %rdi
100
101
movq %rdi, %rax
102
shrq $PUD_SHIFT, %rax
103
andq $(PTRS_PER_PUD - 1), %rax
104
jz ident_complete
105
106
leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
107
leaq level3_ident_pgt(%rip), %rbx
108
movq %rdx, 0(%rbx, %rax, 8)
109
110
movq %rdi, %rax
111
shrq $PMD_SHIFT, %rax
112
andq $(PTRS_PER_PMD - 1), %rax
113
leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx
114
leaq level2_spare_pgt(%rip), %rbx
115
movq %rdx, 0(%rbx, %rax, 8)
116
ident_complete:
117
118
/*
119
* Fixup the kernel text+data virtual addresses. Note that
120
* we might write invalid pmds, when the kernel is relocated
121
* cleanup_highmap() fixes this up along with the mappings
122
* beyond _end.
123
*/
124
125
leaq level2_kernel_pgt(%rip), %rdi
126
leaq 4096(%rdi), %r8
127
/* See if it is a valid page table entry */
128
1: testq $1, 0(%rdi)
129
jz 2f
130
addq %rbp, 0(%rdi)
131
/* Go to the next page */
132
2: addq $8, %rdi
133
cmp %r8, %rdi
134
jne 1b
135
136
/* Fixup phys_base */
137
addq %rbp, phys_base(%rip)
138
139
/* Fixup trampoline */
140
addq %rbp, trampoline_level4_pgt + 0(%rip)
141
addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
142
143
/* Due to ENTRY(), sometimes the empty space gets filled with
144
* zeros. Better take a jmp than relying on empty space being
145
* filled with 0x90 (nop)
146
*/
147
jmp secondary_startup_64
148
ENTRY(secondary_startup_64)
149
/*
150
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1,
151
* and someone has loaded a mapped page table.
152
*
153
* %esi holds a physical pointer to real_mode_data.
154
*
155
* We come here either from startup_64 (using physical addresses)
156
* or from trampoline.S (using virtual addresses).
157
*
158
* Using virtual addresses from trampoline.S removes the need
159
* to have any identity mapped pages in the kernel page table
160
* after the boot processor executes this code.
161
*/
162
163
/* Enable PAE mode and PGE */
164
movl $(X86_CR4_PAE | X86_CR4_PGE), %eax
165
movq %rax, %cr4
166
167
/* Setup early boot stage 4 level pagetables. */
168
movq $(init_level4_pgt - __START_KERNEL_map), %rax
169
addq phys_base(%rip), %rax
170
movq %rax, %cr3
171
172
/* Ensure I am executing from virtual addresses */
173
movq $1f, %rax
174
jmp *%rax
175
1:
176
177
/* Check if nx is implemented */
178
movl $0x80000001, %eax
179
cpuid
180
movl %edx,%edi
181
182
/* Setup EFER (Extended Feature Enable Register) */
183
movl $MSR_EFER, %ecx
184
rdmsr
185
btsl $_EFER_SCE, %eax /* Enable System Call */
186
btl $20,%edi /* No Execute supported? */
187
jnc 1f
188
btsl $_EFER_NX, %eax
189
1: wrmsr /* Make changes effective */
190
191
/* Setup cr0 */
192
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
193
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
194
X86_CR0_PG)
195
movl $CR0_STATE, %eax
196
/* Make changes effective */
197
movq %rax, %cr0
198
199
/* Setup a boot time stack */
200
movq stack_start(%rip),%rsp
201
202
/* zero EFLAGS after setting rsp */
203
pushq $0
204
popfq
205
206
/*
207
* We must switch to a new descriptor in kernel space for the GDT
208
* because soon the kernel won't have access anymore to the userspace
209
* addresses where we're currently running on. We have to do that here
210
* because in 32bit we couldn't load a 64bit linear address.
211
*/
212
lgdt early_gdt_descr(%rip)
213
214
/* set up data segments */
215
xorl %eax,%eax
216
movl %eax,%ds
217
movl %eax,%ss
218
movl %eax,%es
219
220
/*
221
* We don't really need to load %fs or %gs, but load them anyway
222
* to kill any stale realmode selectors. This allows execution
223
* under VT hardware.
224
*/
225
movl %eax,%fs
226
movl %eax,%gs
227
228
/* Set up %gs.
229
*
230
* The base of %gs always points to the bottom of the irqstack
231
* union. If the stack protector canary is enabled, it is
232
* located at %gs:40. Note that, on SMP, the boot cpu uses
233
* init data section till per cpu areas are set up.
234
*/
235
movl $MSR_GS_BASE,%ecx
236
movl initial_gs(%rip),%eax
237
movl initial_gs+4(%rip),%edx
238
wrmsr
239
240
/* esi is pointer to real mode structure with interesting info.
241
pass it to C */
242
movl %esi, %edi
243
244
/* Finally jump to run C code and to be on real kernel address
245
* Since we are running on identity-mapped space we have to jump
246
* to the full 64bit address, this is only possible as indirect
247
* jump. In addition we need to ensure %cs is set so we make this
248
* a far return.
249
*/
250
movq initial_code(%rip),%rax
251
pushq $0 # fake return address to stop unwinder
252
pushq $__KERNEL_CS # set correct cs
253
pushq %rax # target address in negative space
254
lretq
255
256
/* SMP bootup changes these two */
257
__REFDATA
258
.align 8
259
ENTRY(initial_code)
260
.quad x86_64_start_kernel
261
ENTRY(initial_gs)
262
.quad INIT_PER_CPU_VAR(irq_stack_union)
263
264
ENTRY(stack_start)
265
.quad init_thread_union+THREAD_SIZE-8
266
.word 0
267
__FINITDATA
268
269
bad_address:
270
jmp bad_address
271
272
.section ".init.text","ax"
273
#ifdef CONFIG_EARLY_PRINTK
274
.globl early_idt_handlers
275
early_idt_handlers:
276
i = 0
277
.rept NUM_EXCEPTION_VECTORS
278
movl $i, %esi
279
jmp early_idt_handler
280
i = i + 1
281
.endr
282
#endif
283
284
ENTRY(early_idt_handler)
285
#ifdef CONFIG_EARLY_PRINTK
286
cmpl $2,early_recursion_flag(%rip)
287
jz 1f
288
incl early_recursion_flag(%rip)
289
GET_CR2_INTO_RCX
290
movq %rcx,%r9
291
xorl %r8d,%r8d # zero for error code
292
movl %esi,%ecx # get vector number
293
# Test %ecx against mask of vectors that push error code.
294
cmpl $31,%ecx
295
ja 0f
296
movl $1,%eax
297
salq %cl,%rax
298
testl $0x27d00,%eax
299
je 0f
300
popq %r8 # get error code
301
0: movq 0(%rsp),%rcx # get ip
302
movq 8(%rsp),%rdx # get cs
303
xorl %eax,%eax
304
leaq early_idt_msg(%rip),%rdi
305
call early_printk
306
cmpl $2,early_recursion_flag(%rip)
307
jz 1f
308
call dump_stack
309
#ifdef CONFIG_KALLSYMS
310
leaq early_idt_ripmsg(%rip),%rdi
311
movq 0(%rsp),%rsi # get rip again
312
call __print_symbol
313
#endif
314
#endif /* EARLY_PRINTK */
315
1: hlt
316
jmp 1b
317
318
#ifdef CONFIG_EARLY_PRINTK
319
early_recursion_flag:
320
.long 0
321
322
early_idt_msg:
323
.asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
324
early_idt_ripmsg:
325
.asciz "RIP %s\n"
326
#endif /* CONFIG_EARLY_PRINTK */
327
.previous
328
329
#define NEXT_PAGE(name) \
330
.balign PAGE_SIZE; \
331
ENTRY(name)
332
333
/* Automate the creation of 1 to 1 mapping pmd entries */
334
#define PMDS(START, PERM, COUNT) \
335
i = 0 ; \
336
.rept (COUNT) ; \
337
.quad (START) + (i << PMD_SHIFT) + (PERM) ; \
338
i = i + 1 ; \
339
.endr
340
341
.data
342
/*
343
* This default setting generates an ident mapping at address 0x100000
344
* and a mapping for the kernel that precisely maps virtual address
345
* 0xffffffff80000000 to physical address 0x000000. (always using
346
* 2Mbyte large pages provided by PAE mode)
347
*/
348
NEXT_PAGE(init_level4_pgt)
349
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
350
.org init_level4_pgt + L4_PAGE_OFFSET*8, 0
351
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
352
.org init_level4_pgt + L4_START_KERNEL*8, 0
353
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
354
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
355
356
NEXT_PAGE(level3_ident_pgt)
357
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
358
.fill 511,8,0
359
360
NEXT_PAGE(level3_kernel_pgt)
361
.fill L3_START_KERNEL,8,0
362
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
363
.quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
364
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
365
366
NEXT_PAGE(level2_fixmap_pgt)
367
.fill 506,8,0
368
.quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
369
/* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
370
.fill 5,8,0
371
372
NEXT_PAGE(level1_fixmap_pgt)
373
.fill 512,8,0
374
375
NEXT_PAGE(level2_ident_pgt)
376
/* Since I easily can, map the first 1G.
377
* Don't set NX because code runs from these pages.
378
*/
379
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
380
381
NEXT_PAGE(level2_kernel_pgt)
382
/*
383
* 512 MB kernel mapping. We spend a full page on this pagetable
384
* anyway.
385
*
386
* The kernel code+data+bss must not be bigger than that.
387
*
388
* (NOTE: at +512MB starts the module area, see MODULES_VADDR.
389
* If you want to increase this then increase MODULES_VADDR
390
* too.)
391
*/
392
PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
393
KERNEL_IMAGE_SIZE/PMD_SIZE)
394
395
NEXT_PAGE(level2_spare_pgt)
396
.fill 512, 8, 0
397
398
#undef PMDS
399
#undef NEXT_PAGE
400
401
.data
402
.align 16
403
.globl early_gdt_descr
404
early_gdt_descr:
405
.word GDT_ENTRIES*8-1
406
early_gdt_descr_base:
407
.quad INIT_PER_CPU_VAR(gdt_page)
408
409
ENTRY(phys_base)
410
/* This must match the first entry in level2_kernel_pgt */
411
.quad 0x0000000000000000
412
413
#include "../../x86/xen/xen-head.S"
414
415
.section .bss, "aw", @nobits
416
.align L1_CACHE_BYTES
417
ENTRY(idt_table)
418
.skip IDT_ENTRIES * 16
419
420
__PAGE_ALIGNED_BSS
421
.align PAGE_SIZE
422
ENTRY(empty_zero_page)
423
.skip PAGE_SIZE
424
425