Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/boot/compressed/head_64.S
26481 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* linux/boot/head.S
4
*
5
* Copyright (C) 1991, 1992, 1993 Linus Torvalds
6
*/
7
8
/*
9
* head.S contains the 32-bit startup code.
10
*
11
* NOTE!!! Startup happens at absolute address 0x00001000, which is also where
12
* the page directory will exist. The startup code will be overwritten by
13
* the page directory. [According to comments etc elsewhere on a compressed
14
* kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
15
*
16
* Page 0 is deliberately kept safe, since System Management Mode code in
17
* laptops may need to access the BIOS data stored there. This is also
18
* useful for future device drivers that either access the BIOS via VM86
19
* mode.
20
*/
21
22
/*
23
* High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
24
*/
25
.code32
26
.text
27
28
#include <linux/init.h>
29
#include <linux/linkage.h>
30
#include <asm/segment.h>
31
#include <asm/boot.h>
32
#include <asm/msr.h>
33
#include <asm/processor-flags.h>
34
#include <asm/asm-offsets.h>
35
#include <asm/bootparam.h>
36
#include <asm/desc_defs.h>
37
#include <asm/trapnr.h>
38
39
/*
40
* Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
41
* in assembly errors due to trying to move .org backward due to the excessive
42
* alignment.
43
*/
44
#undef __ALIGN
45
#define __ALIGN .balign 16, 0x90
46
47
/*
48
* Locally defined symbols should be marked hidden:
49
*/
50
.hidden _bss
51
.hidden _ebss
52
.hidden _end
53
54
__HEAD
55
56
/*
57
* This macro gives the relative virtual address of X, i.e. the offset of X
58
* from startup_32. This is the same as the link-time virtual address of X,
59
* since startup_32 is at 0, but defining it this way tells the
60
* assembler/linker that we do not want the actual run-time address of X. This
61
* prevents the linker from trying to create unwanted run-time relocation
62
* entries for the reference when the compressed kernel is linked as PIE.
63
*
64
* A reference X(%reg) will result in the link-time VA of X being stored with
65
* the instruction, and a run-time R_X86_64_RELATIVE relocation entry that
66
* adds the 64-bit base address where the kernel is loaded.
67
*
68
* Replacing it with (X-startup_32)(%reg) results in the offset being stored,
69
* and no run-time relocation.
70
*
71
* The macro should be used as a displacement with a base register containing
72
* the run-time address of startup_32 [i.e. rva(X)(%reg)], or as an immediate
73
* [$ rva(X)].
74
*
75
* This macro can only be used from within the .head.text section, since the
76
* expression requires startup_32 to be in the same section as the code being
77
* assembled.
78
*/
79
#define rva(X) ((X) - startup_32)
80
81
.code32
82
SYM_FUNC_START(startup_32)
83
/*
84
* 32bit entry is 0 and it is ABI so immutable!
85
* If we come here directly from a bootloader,
86
* kernel(text+data+bss+brk) ramdisk, zero_page, command line
87
* all need to be under the 4G limit.
88
*/
89
cld
90
cli
91
92
/*
93
* Calculate the delta between where we were compiled to run
94
* at and where we were actually loaded at. This can only be done
95
* with a short local call on x86. Nothing else will tell us what
96
* address we are running at. The reserved chunk of the real-mode
97
* data at 0x1e4 (defined as a scratch field) are used as the stack
98
* for this calculation. Only 4 bytes are needed.
99
*/
100
leal (BP_scratch+4)(%esi), %esp
101
call 1f
102
1: popl %ebp
103
subl $ rva(1b), %ebp
104
105
/* Load new GDT with the 64bit segments using 32bit descriptor */
106
leal rva(gdt)(%ebp), %eax
107
movl %eax, 2(%eax)
108
lgdt (%eax)
109
110
/* Load segment registers with our descriptors */
111
movl $__BOOT_DS, %eax
112
movl %eax, %ds
113
movl %eax, %es
114
movl %eax, %fs
115
movl %eax, %gs
116
movl %eax, %ss
117
118
/* Setup a stack and load CS from current GDT */
119
leal rva(boot_stack_end)(%ebp), %esp
120
121
pushl $__KERNEL32_CS
122
leal rva(1f)(%ebp), %eax
123
pushl %eax
124
lretl
125
1:
126
127
/* Setup Exception handling for SEV-ES */
128
#ifdef CONFIG_AMD_MEM_ENCRYPT
129
call startup32_load_idt
130
#endif
131
132
/* Make sure cpu supports long mode. */
133
call verify_cpu
134
testl %eax, %eax
135
jnz .Lno_longmode
136
137
/*
138
* Compute the delta between where we were compiled to run at
139
* and where the code will actually run at.
140
*
141
* %ebp contains the address we are loaded at by the boot loader and %ebx
142
* contains the address where we should move the kernel image temporarily
143
* for safe in-place decompression.
144
*/
145
146
#ifdef CONFIG_RELOCATABLE
147
movl %ebp, %ebx
148
movl BP_kernel_alignment(%esi), %eax
149
decl %eax
150
addl %eax, %ebx
151
notl %eax
152
andl %eax, %ebx
153
cmpl $LOAD_PHYSICAL_ADDR, %ebx
154
jae 1f
155
#endif
156
movl $LOAD_PHYSICAL_ADDR, %ebx
157
1:
158
159
/* Target address to relocate to for decompression */
160
addl BP_init_size(%esi), %ebx
161
subl $ rva(_end), %ebx
162
163
/*
164
* Prepare for entering 64 bit mode
165
*/
166
167
/* Enable PAE mode */
168
movl %cr4, %eax
169
orl $X86_CR4_PAE, %eax
170
movl %eax, %cr4
171
172
/*
173
* Build early 4G boot pagetable
174
*/
175
/*
176
* If SEV is active then set the encryption mask in the page tables.
177
* This will ensure that when the kernel is copied and decompressed
178
* it will be done so encrypted.
179
*/
180
xorl %edx, %edx
181
#ifdef CONFIG_AMD_MEM_ENCRYPT
182
call get_sev_encryption_bit
183
xorl %edx, %edx
184
testl %eax, %eax
185
jz 1f
186
subl $32, %eax /* Encryption bit is always above bit 31 */
187
bts %eax, %edx /* Set encryption mask for page tables */
188
/*
189
* Set MSR_AMD64_SEV_ENABLED_BIT in sev_status so that
190
* startup32_check_sev_cbit() will do a check. sev_enable() will
191
* initialize sev_status with all the bits reported by
192
* MSR_AMD_SEV_STATUS later, but only MSR_AMD64_SEV_ENABLED_BIT
193
* needs to be set for now.
194
*/
195
movl $1, rva(sev_status)(%ebp)
196
1:
197
#endif
198
199
/* Initialize Page tables to 0 */
200
leal rva(pgtable)(%ebx), %edi
201
xorl %eax, %eax
202
movl $(BOOT_INIT_PGT_SIZE/4), %ecx
203
rep stosl
204
205
/* Build Level 4 */
206
leal rva(pgtable + 0)(%ebx), %edi
207
leal 0x1007 (%edi), %eax
208
movl %eax, 0(%edi)
209
addl %edx, 4(%edi)
210
211
/* Build Level 3 */
212
leal rva(pgtable + 0x1000)(%ebx), %edi
213
leal 0x1007(%edi), %eax
214
movl $4, %ecx
215
1: movl %eax, 0x00(%edi)
216
addl %edx, 0x04(%edi)
217
addl $0x00001000, %eax
218
addl $8, %edi
219
decl %ecx
220
jnz 1b
221
222
/* Build Level 2 */
223
leal rva(pgtable + 0x2000)(%ebx), %edi
224
movl $0x00000183, %eax
225
movl $2048, %ecx
226
1: movl %eax, 0(%edi)
227
addl %edx, 4(%edi)
228
addl $0x00200000, %eax
229
addl $8, %edi
230
decl %ecx
231
jnz 1b
232
233
/* Enable the boot page tables */
234
leal rva(pgtable)(%ebx), %eax
235
movl %eax, %cr3
236
237
/* Enable Long mode in EFER (Extended Feature Enable Register) */
238
movl $MSR_EFER, %ecx
239
rdmsr
240
btsl $_EFER_LME, %eax
241
wrmsr
242
243
/* After gdt is loaded */
244
xorl %eax, %eax
245
lldt %ax
246
movl $__BOOT_TSS, %eax
247
ltr %ax
248
249
#ifdef CONFIG_AMD_MEM_ENCRYPT
250
/* Check if the C-bit position is correct when SEV is active */
251
call startup32_check_sev_cbit
252
#endif
253
254
/*
255
* Setup for the jump to 64bit mode
256
*
257
* When the jump is performed we will be in long mode but
258
* in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
259
* (and in turn EFER.LMA = 1). To jump into 64bit mode we use
260
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
261
* We place all of the values on our mini stack so lret can
262
* used to perform that far jump.
263
*/
264
leal rva(startup_64)(%ebp), %eax
265
pushl $__KERNEL_CS
266
pushl %eax
267
268
/* Enter paged protected Mode, activating Long Mode */
269
movl $CR0_STATE, %eax
270
movl %eax, %cr0
271
272
/* Jump from 32bit compatibility mode into 64bit mode. */
273
lret
274
SYM_FUNC_END(startup_32)
275
276
.code64
277
.org 0x200
278
SYM_CODE_START(startup_64)
279
/*
280
* 64bit entry is 0x200 and it is ABI so immutable!
281
* We come here either from startup_32 or directly from a
282
* 64bit bootloader.
283
* If we come here from a bootloader, kernel(text+data+bss+brk),
284
* ramdisk, zero_page, command line could be above 4G.
285
* We depend on an identity mapped page table being provided
286
* that maps our entire kernel(text+data+bss+brk), zero page
287
* and command line.
288
*/
289
290
cld
291
cli
292
293
/* Setup data segments. */
294
xorl %eax, %eax
295
movl %eax, %ds
296
movl %eax, %es
297
movl %eax, %ss
298
movl %eax, %fs
299
movl %eax, %gs
300
301
/*
302
* Compute the decompressed kernel start address. It is where
303
* we were loaded at aligned to a 2M boundary. %rbp contains the
304
* decompressed kernel start address.
305
*
306
* If it is a relocatable kernel then decompress and run the kernel
307
* from load address aligned to 2MB addr, otherwise decompress and
308
* run the kernel from LOAD_PHYSICAL_ADDR
309
*
310
* We cannot rely on the calculation done in 32-bit mode, since we
311
* may have been invoked via the 64-bit entry point.
312
*/
313
314
/* Start with the delta to where the kernel will run at. */
315
#ifdef CONFIG_RELOCATABLE
316
leaq startup_32(%rip) /* - $startup_32 */, %rbp
317
movl BP_kernel_alignment(%rsi), %eax
318
decl %eax
319
addq %rax, %rbp
320
notq %rax
321
andq %rax, %rbp
322
cmpq $LOAD_PHYSICAL_ADDR, %rbp
323
jae 1f
324
#endif
325
movq $LOAD_PHYSICAL_ADDR, %rbp
326
1:
327
328
/* Target address to relocate to for decompression */
329
movl BP_init_size(%rsi), %ebx
330
subl $ rva(_end), %ebx
331
addq %rbp, %rbx
332
333
/* Set up the stack */
334
leaq rva(boot_stack_end)(%rbx), %rsp
335
336
/*
337
* At this point we are in long mode with 4-level paging enabled,
338
* but we might want to enable 5-level paging or vice versa.
339
*
340
* The problem is that we cannot do it directly. Setting or clearing
341
* CR4.LA57 in long mode would trigger #GP. So we need to switch off
342
* long mode and paging first.
343
*
344
* We also need a trampoline in lower memory to switch over from
345
* 4- to 5-level paging for cases when the bootloader puts the kernel
346
* above 4G, but didn't enable 5-level paging for us.
347
*
348
* The same trampoline can be used to switch from 5- to 4-level paging
349
* mode, like when starting 4-level paging kernel via kexec() when
350
* original kernel worked in 5-level paging mode.
351
*
352
* For the trampoline, we need the top page table to reside in lower
353
* memory as we don't have a way to load 64-bit values into CR3 in
354
* 32-bit mode.
355
*/
356
357
/* Make sure we have GDT with 32-bit code segment */
358
leaq gdt64(%rip), %rax
359
addq %rax, 2(%rax)
360
lgdt (%rax)
361
362
/* Reload CS so IRET returns to a CS actually in the GDT */
363
pushq $__KERNEL_CS
364
leaq .Lon_kernel_cs(%rip), %rax
365
pushq %rax
366
lretq
367
368
.Lon_kernel_cs:
369
/*
370
* RSI holds a pointer to a boot_params structure provided by the
371
* loader, and this needs to be preserved across C function calls. So
372
* move it into a callee saved register.
373
*/
374
movq %rsi, %r15
375
376
call load_stage1_idt
377
378
#ifdef CONFIG_AMD_MEM_ENCRYPT
379
/*
380
* Now that the stage1 interrupt handlers are set up, #VC exceptions from
381
* CPUID instructions can be properly handled for SEV-ES guests.
382
*
383
* For SEV-SNP, the CPUID table also needs to be set up in advance of any
384
* CPUID instructions being issued, so go ahead and do that now via
385
* sev_enable(), which will also handle the rest of the SEV-related
386
* detection/setup to ensure that has been done in advance of any dependent
387
* code. Pass the boot_params pointer as the first argument.
388
*/
389
movq %r15, %rdi
390
call sev_enable
391
#endif
392
393
/* Preserve only the CR4 bits that must be preserved, and clear the rest */
394
movq %cr4, %rax
395
andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax
396
movq %rax, %cr4
397
398
/*
399
* configure_5level_paging() updates the number of paging levels using
400
* a trampoline in 32-bit addressable memory if the current number does
401
* not match the desired number.
402
*
403
* Pass the boot_params pointer as the first argument. The second
404
* argument is the relocated address of the page table to use instead
405
* of the page table in trampoline memory (if required).
406
*/
407
movq %r15, %rdi
408
leaq rva(top_pgtable)(%rbx), %rsi
409
call configure_5level_paging
410
411
/* Zero EFLAGS */
412
pushq $0
413
popfq
414
415
/*
416
* Copy the compressed kernel to the end of our buffer
417
* where decompression in place becomes safe.
418
*/
419
leaq (_bss-8)(%rip), %rsi
420
leaq rva(_bss-8)(%rbx), %rdi
421
movl $(_bss - startup_32), %ecx
422
shrl $3, %ecx
423
std
424
rep movsq
425
cld
426
427
/*
428
* The GDT may get overwritten either during the copy we just did or
429
* during extract_kernel below. To avoid any issues, repoint the GDTR
430
* to the new copy of the GDT.
431
*/
432
leaq rva(gdt64)(%rbx), %rax
433
leaq rva(gdt)(%rbx), %rdx
434
movq %rdx, 2(%rax)
435
lgdt (%rax)
436
437
/*
438
* Jump to the relocated address.
439
*/
440
leaq rva(.Lrelocated)(%rbx), %rax
441
jmp *%rax
442
SYM_CODE_END(startup_64)
443
444
.text
445
SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
446
447
/*
448
* Clear BSS (stack is currently empty)
449
*/
450
xorl %eax, %eax
451
leaq _bss(%rip), %rdi
452
leaq _ebss(%rip), %rcx
453
subq %rdi, %rcx
454
shrq $3, %rcx
455
rep stosq
456
457
call load_stage2_idt
458
459
/* Pass boot_params to initialize_identity_maps() */
460
movq %r15, %rdi
461
call initialize_identity_maps
462
463
/*
464
* Do the extraction, and jump to the new kernel..
465
*/
466
/* pass struct boot_params pointer and output target address */
467
movq %r15, %rdi
468
movq %rbp, %rsi
469
call extract_kernel /* returns kernel entry point in %rax */
470
471
/*
472
* Jump to the decompressed kernel.
473
*/
474
movq %r15, %rsi
475
jmp *%rax
476
SYM_FUNC_END(.Lrelocated)
477
478
.code32
479
SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
480
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
481
1:
482
hlt
483
jmp 1b
484
SYM_FUNC_END(.Lno_longmode)
485
486
.globl verify_cpu
487
#include "../../kernel/verify_cpu.S"
488
489
.data
490
SYM_DATA_START_LOCAL(gdt64)
491
.word gdt_end - gdt - 1
492
.quad gdt - gdt64
493
SYM_DATA_END(gdt64)
494
.balign 8
495
SYM_DATA_START_LOCAL(gdt)
496
.word gdt_end - gdt - 1
497
.long 0
498
.word 0
499
.quad 0x00cf9a000000ffff /* __KERNEL32_CS */
500
.quad 0x00af9a000000ffff /* __KERNEL_CS */
501
.quad 0x00cf92000000ffff /* __KERNEL_DS */
502
.quad 0x0080890000000000 /* TS descriptor */
503
.quad 0x0000000000000000 /* TS continued */
504
SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
505
506
SYM_DATA_START(boot_idt_desc)
507
.word boot_idt_end - boot_idt - 1
508
.quad 0
509
SYM_DATA_END(boot_idt_desc)
510
.balign 8
511
SYM_DATA_START(boot_idt)
512
.rept BOOT_IDT_ENTRIES
513
.quad 0
514
.quad 0
515
.endr
516
SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
517
518
/*
519
* Stack and heap for uncompression
520
*/
521
.bss
522
.balign 4
523
SYM_DATA_START_LOCAL(boot_stack)
524
.fill BOOT_STACK_SIZE, 1, 0
525
.balign 16
526
SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
527
528
/*
529
* Space for page tables (not in .bss so not zeroed)
530
*/
531
.section ".pgtable","aw",@nobits
532
.balign 4096
533
SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
534
535
/*
536
* The page table is going to be used instead of page table in the trampoline
537
* memory.
538
*/
539
SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)
540
541