Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/vdso/vdso32-setup.c
10817 views
1
/*
2
* (C) Copyright 2002 Linus Torvalds
3
* Portions based on the vdso-randomization code from exec-shield:
4
* Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
5
*
6
* This file contains the needed initializations to support sysenter.
7
*/
8
9
#include <linux/init.h>
10
#include <linux/smp.h>
11
#include <linux/thread_info.h>
12
#include <linux/sched.h>
13
#include <linux/gfp.h>
14
#include <linux/string.h>
15
#include <linux/elf.h>
16
#include <linux/mm.h>
17
#include <linux/err.h>
18
#include <linux/module.h>
19
20
#include <asm/cpufeature.h>
21
#include <asm/msr.h>
22
#include <asm/pgtable.h>
23
#include <asm/unistd.h>
24
#include <asm/elf.h>
25
#include <asm/tlbflush.h>
26
#include <asm/vdso.h>
27
#include <asm/proto.h>
28
29
enum {
30
VDSO_DISABLED = 0,
31
VDSO_ENABLED = 1,
32
VDSO_COMPAT = 2,
33
};
34
35
#ifdef CONFIG_COMPAT_VDSO
36
#define VDSO_DEFAULT VDSO_COMPAT
37
#else
38
#define VDSO_DEFAULT VDSO_ENABLED
39
#endif
40
41
#ifdef CONFIG_X86_64
42
#define vdso_enabled sysctl_vsyscall32
43
#define arch_setup_additional_pages syscall32_setup_pages
44
#endif
45
46
/*
47
* This is the difference between the prelinked addresses in the vDSO images
48
* and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
49
* in the user address space.
50
*/
51
#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
52
53
/*
54
* Should the kernel map a VDSO page into processes and pass its
55
* address down to glibc upon exec()?
56
*/
57
unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
58
59
static int __init vdso_setup(char *s)
60
{
61
vdso_enabled = simple_strtoul(s, NULL, 0);
62
63
return 1;
64
}
65
66
/*
67
* For consistency, the argument vdso32=[012] affects the 32-bit vDSO
68
* behavior on both 64-bit and 32-bit kernels.
69
* On 32-bit kernels, vdso=[012] means the same thing.
70
*/
71
__setup("vdso32=", vdso_setup);
72
73
#ifdef CONFIG_X86_32
74
__setup_param("vdso=", vdso32_setup, vdso_setup, 0);
75
76
EXPORT_SYMBOL_GPL(vdso_enabled);
77
#endif
78
79
static __init void reloc_symtab(Elf32_Ehdr *ehdr,
80
unsigned offset, unsigned size)
81
{
82
Elf32_Sym *sym = (void *)ehdr + offset;
83
unsigned nsym = size / sizeof(*sym);
84
unsigned i;
85
86
for(i = 0; i < nsym; i++, sym++) {
87
if (sym->st_shndx == SHN_UNDEF ||
88
sym->st_shndx == SHN_ABS)
89
continue; /* skip */
90
91
if (sym->st_shndx > SHN_LORESERVE) {
92
printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
93
sym->st_shndx);
94
continue;
95
}
96
97
switch(ELF_ST_TYPE(sym->st_info)) {
98
case STT_OBJECT:
99
case STT_FUNC:
100
case STT_SECTION:
101
case STT_FILE:
102
sym->st_value += VDSO_ADDR_ADJUST;
103
}
104
}
105
}
106
107
static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
108
{
109
Elf32_Dyn *dyn = (void *)ehdr + offset;
110
111
for(; dyn->d_tag != DT_NULL; dyn++)
112
switch(dyn->d_tag) {
113
case DT_PLTGOT:
114
case DT_HASH:
115
case DT_STRTAB:
116
case DT_SYMTAB:
117
case DT_RELA:
118
case DT_INIT:
119
case DT_FINI:
120
case DT_REL:
121
case DT_DEBUG:
122
case DT_JMPREL:
123
case DT_VERSYM:
124
case DT_VERDEF:
125
case DT_VERNEED:
126
case DT_ADDRRNGLO ... DT_ADDRRNGHI:
127
/* definitely pointers needing relocation */
128
dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
129
break;
130
131
case DT_ENCODING ... OLD_DT_LOOS-1:
132
case DT_LOOS ... DT_HIOS-1:
133
/* Tags above DT_ENCODING are pointers if
134
they're even */
135
if (dyn->d_tag >= DT_ENCODING &&
136
(dyn->d_tag & 1) == 0)
137
dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
138
break;
139
140
case DT_VERDEFNUM:
141
case DT_VERNEEDNUM:
142
case DT_FLAGS_1:
143
case DT_RELACOUNT:
144
case DT_RELCOUNT:
145
case DT_VALRNGLO ... DT_VALRNGHI:
146
/* definitely not pointers */
147
break;
148
149
case OLD_DT_LOOS ... DT_LOOS-1:
150
case DT_HIOS ... DT_VALRNGLO-1:
151
default:
152
if (dyn->d_tag > DT_ENCODING)
153
printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
154
dyn->d_tag);
155
break;
156
}
157
}
158
159
static __init void relocate_vdso(Elf32_Ehdr *ehdr)
160
{
161
Elf32_Phdr *phdr;
162
Elf32_Shdr *shdr;
163
int i;
164
165
BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
166
!elf_check_arch_ia32(ehdr) ||
167
ehdr->e_type != ET_DYN);
168
169
ehdr->e_entry += VDSO_ADDR_ADJUST;
170
171
/* rebase phdrs */
172
phdr = (void *)ehdr + ehdr->e_phoff;
173
for (i = 0; i < ehdr->e_phnum; i++) {
174
phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
175
176
/* relocate dynamic stuff */
177
if (phdr[i].p_type == PT_DYNAMIC)
178
reloc_dyn(ehdr, phdr[i].p_offset);
179
}
180
181
/* rebase sections */
182
shdr = (void *)ehdr + ehdr->e_shoff;
183
for(i = 0; i < ehdr->e_shnum; i++) {
184
if (!(shdr[i].sh_flags & SHF_ALLOC))
185
continue;
186
187
shdr[i].sh_addr += VDSO_ADDR_ADJUST;
188
189
if (shdr[i].sh_type == SHT_SYMTAB ||
190
shdr[i].sh_type == SHT_DYNSYM)
191
reloc_symtab(ehdr, shdr[i].sh_offset,
192
shdr[i].sh_size);
193
}
194
}
195
196
static struct page *vdso32_pages[1];
197
198
#ifdef CONFIG_X86_64
199
200
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
201
#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
202
203
/* May not be __init: called during resume */
204
void syscall32_cpu_init(void)
205
{
206
/* Load these always in case some future AMD CPU supports
207
SYSENTER from compat mode too. */
208
checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
209
checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL);
210
checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
211
212
wrmsrl(MSR_CSTAR, ia32_cstar_target);
213
}
214
215
#define compat_uses_vma 1
216
217
static inline void map_compat_vdso(int map)
218
{
219
}
220
221
#else /* CONFIG_X86_32 */
222
223
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
224
#define vdso32_syscall() (0)
225
226
void enable_sep_cpu(void)
227
{
228
int cpu = get_cpu();
229
struct tss_struct *tss = &per_cpu(init_tss, cpu);
230
231
if (!boot_cpu_has(X86_FEATURE_SEP)) {
232
put_cpu();
233
return;
234
}
235
236
tss->x86_tss.ss1 = __KERNEL_CS;
237
tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
238
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
239
wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
240
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
241
put_cpu();
242
}
243
244
static struct vm_area_struct gate_vma;
245
246
static int __init gate_vma_init(void)
247
{
248
gate_vma.vm_mm = NULL;
249
gate_vma.vm_start = FIXADDR_USER_START;
250
gate_vma.vm_end = FIXADDR_USER_END;
251
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
252
gate_vma.vm_page_prot = __P101;
253
/*
254
* Make sure the vDSO gets into every core dump.
255
* Dumping its contents makes post-mortem fully interpretable later
256
* without matching up the same kernel and hardware config to see
257
* what PC values meant.
258
*/
259
gate_vma.vm_flags |= VM_ALWAYSDUMP;
260
return 0;
261
}
262
263
#define compat_uses_vma 0
264
265
static void map_compat_vdso(int map)
266
{
267
static int vdso_mapped;
268
269
if (map == vdso_mapped)
270
return;
271
272
vdso_mapped = map;
273
274
__set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
275
map ? PAGE_READONLY_EXEC : PAGE_NONE);
276
277
/* flush stray tlbs */
278
flush_tlb_all();
279
}
280
281
#endif /* CONFIG_X86_64 */
282
283
int __init sysenter_setup(void)
284
{
285
void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
286
const void *vsyscall;
287
size_t vsyscall_len;
288
289
vdso32_pages[0] = virt_to_page(syscall_page);
290
291
#ifdef CONFIG_X86_32
292
gate_vma_init();
293
#endif
294
295
if (vdso32_syscall()) {
296
vsyscall = &vdso32_syscall_start;
297
vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
298
} else if (vdso32_sysenter()){
299
vsyscall = &vdso32_sysenter_start;
300
vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
301
} else {
302
vsyscall = &vdso32_int80_start;
303
vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
304
}
305
306
memcpy(syscall_page, vsyscall, vsyscall_len);
307
relocate_vdso(syscall_page);
308
309
return 0;
310
}
311
312
/* Setup a VMA at program startup for the vsyscall page */
313
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
314
{
315
struct mm_struct *mm = current->mm;
316
unsigned long addr;
317
int ret = 0;
318
bool compat;
319
320
if (vdso_enabled == VDSO_DISABLED)
321
return 0;
322
323
down_write(&mm->mmap_sem);
324
325
/* Test compat mode once here, in case someone
326
changes it via sysctl */
327
compat = (vdso_enabled == VDSO_COMPAT);
328
329
map_compat_vdso(compat);
330
331
if (compat)
332
addr = VDSO_HIGH_BASE;
333
else {
334
addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
335
if (IS_ERR_VALUE(addr)) {
336
ret = addr;
337
goto up_fail;
338
}
339
}
340
341
current->mm->context.vdso = (void *)addr;
342
343
if (compat_uses_vma || !compat) {
344
/*
345
* MAYWRITE to allow gdb to COW and set breakpoints
346
*
347
* Make sure the vDSO gets into every core dump.
348
* Dumping its contents makes post-mortem fully
349
* interpretable later without matching up the same
350
* kernel and hardware config to see what PC values
351
* meant.
352
*/
353
ret = install_special_mapping(mm, addr, PAGE_SIZE,
354
VM_READ|VM_EXEC|
355
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
356
VM_ALWAYSDUMP,
357
vdso32_pages);
358
359
if (ret)
360
goto up_fail;
361
}
362
363
current_thread_info()->sysenter_return =
364
VDSO32_SYMBOL(addr, SYSENTER_RETURN);
365
366
up_fail:
367
if (ret)
368
current->mm->context.vdso = NULL;
369
370
up_write(&mm->mmap_sem);
371
372
return ret;
373
}
374
375
#ifdef CONFIG_X86_64
376
377
subsys_initcall(sysenter_setup);
378
379
#ifdef CONFIG_SYSCTL
380
/* Register vsyscall32 into the ABI table */
381
#include <linux/sysctl.h>
382
383
static ctl_table abi_table2[] = {
384
{
385
.procname = "vsyscall32",
386
.data = &sysctl_vsyscall32,
387
.maxlen = sizeof(int),
388
.mode = 0644,
389
.proc_handler = proc_dointvec
390
},
391
{}
392
};
393
394
static ctl_table abi_root_table2[] = {
395
{
396
.procname = "abi",
397
.mode = 0555,
398
.child = abi_table2
399
},
400
{}
401
};
402
403
static __init int ia32_binfmt_init(void)
404
{
405
register_sysctl_table(abi_root_table2);
406
return 0;
407
}
408
__initcall(ia32_binfmt_init);
409
#endif
410
411
#else /* CONFIG_X86_32 */
412
413
const char *arch_vma_name(struct vm_area_struct *vma)
414
{
415
if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
416
return "[vdso]";
417
return NULL;
418
}
419
420
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
421
{
422
/*
423
* Check to see if the corresponding task was created in compat vdso
424
* mode.
425
*/
426
if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
427
return &gate_vma;
428
return NULL;
429
}
430
431
int in_gate_area(struct mm_struct *mm, unsigned long addr)
432
{
433
const struct vm_area_struct *vma = get_gate_vma(mm);
434
435
return vma && addr >= vma->vm_start && addr < vma->vm_end;
436
}
437
438
int in_gate_area_no_mm(unsigned long addr)
439
{
440
return 0;
441
}
442
443
#endif /* CONFIG_X86_64 */
444
445