Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/entry/syscall_64.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/* 64-bit system call dispatch */
3
4
#include <linux/linkage.h>
5
#include <linux/sys.h>
6
#include <linux/cache.h>
7
#include <linux/syscalls.h>
8
#include <linux/entry-common.h>
9
#include <linux/nospec.h>
10
#include <asm/syscall.h>
11
12
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
13
#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
14
#include <asm/syscalls_64.h>
15
#ifdef CONFIG_X86_X32_ABI
16
#include <asm/syscalls_x32.h>
17
#endif
18
#undef __SYSCALL
19
20
#undef __SYSCALL_NORETURN
21
#define __SYSCALL_NORETURN __SYSCALL
22
23
/*
24
* The sys_call_table[] is no longer used for system calls, but
25
* kernel/trace/trace_syscalls.c still wants to know the system
26
* call address.
27
*/
28
#define __SYSCALL(nr, sym) __x64_##sym,
29
const sys_call_ptr_t sys_call_table[] = {
30
#include <asm/syscalls_64.h>
31
};
32
#undef __SYSCALL
33
34
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
35
long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
36
{
37
switch (nr) {
38
#include <asm/syscalls_64.h>
39
default: return __x64_sys_ni_syscall(regs);
40
}
41
}
42
43
#ifdef CONFIG_X86_X32_ABI
44
long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
45
{
46
switch (nr) {
47
#include <asm/syscalls_x32.h>
48
default: return __x64_sys_ni_syscall(regs);
49
}
50
}
51
#endif
52
53
static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
54
{
55
/*
56
* Convert negative numbers to very high and thus out of range
57
* numbers for comparisons.
58
*/
59
unsigned int unr = nr;
60
61
if (likely(unr < NR_syscalls)) {
62
unr = array_index_nospec(unr, NR_syscalls);
63
regs->ax = x64_sys_call(regs, unr);
64
return true;
65
}
66
return false;
67
}
68
69
static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
70
{
71
/*
72
* Adjust the starting offset of the table, and convert numbers
73
* < __X32_SYSCALL_BIT to very high and thus out of range
74
* numbers for comparisons.
75
*/
76
unsigned int xnr = nr - __X32_SYSCALL_BIT;
77
78
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
79
xnr = array_index_nospec(xnr, X32_NR_syscalls);
80
regs->ax = x32_sys_call(regs, xnr);
81
return true;
82
}
83
return false;
84
}
85
86
/* Returns true to return using SYSRET, or false to use IRET */
87
__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
88
{
89
add_random_kstack_offset();
90
nr = syscall_enter_from_user_mode(regs, nr);
91
92
instrumentation_begin();
93
94
if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
95
/* Invalid system call, but still a system call. */
96
regs->ax = __x64_sys_ni_syscall(regs);
97
}
98
99
instrumentation_end();
100
syscall_exit_to_user_mode(regs);
101
102
/*
103
* Check that the register state is valid for using SYSRET to exit
104
* to userspace. Otherwise use the slower but fully capable IRET
105
* exit path.
106
*/
107
108
/* XEN PV guests always use the IRET path */
109
if (cpu_feature_enabled(X86_FEATURE_XENPV))
110
return false;
111
112
/* SYSRET requires RCX == RIP and R11 == EFLAGS */
113
if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
114
return false;
115
116
/* CS and SS must match the values set in MSR_STAR */
117
if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
118
return false;
119
120
/*
121
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
122
* in kernel space. This essentially lets the user take over
123
* the kernel, since userspace controls RSP.
124
*
125
* TASK_SIZE_MAX covers all user-accessible addresses other than
126
* the deprecated vsyscall page.
127
*/
128
if (unlikely(regs->ip >= TASK_SIZE_MAX))
129
return false;
130
131
/*
132
* SYSRET cannot restore RF. It can restore TF, but unlike IRET,
133
* restoring TF results in a trap from userspace immediately after
134
* SYSRET.
135
*/
136
if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
137
return false;
138
139
/* Use SYSRET to exit to userspace */
140
return true;
141
}
142
143