Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/um/kernel/skas/stub.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright (C) 2021 Benjamin Berg <[email protected]>
4
*/
5
6
#include <sysdep/stub.h>
7
8
#include <linux/futex.h>
9
#include <sys/socket.h>
10
#include <errno.h>
11
12
/*
13
* Known security issues
14
*
15
* Userspace can jump to this address to execute *any* syscall that is
16
* permitted by the stub. As we will return afterwards, it can do
17
* whatever it likes, including:
18
* - Tricking the kernel into handing out the memory FD
19
* - Using this memory FD to read/write all physical memory
20
* - Running in parallel to the kernel processing a syscall
21
* (possibly creating data races?)
22
* - Blocking e.g. SIGALRM to avoid time based scheduling
23
*
24
* To avoid this, the permitted location for each syscall needs to be
25
* checked for in the SECCOMP filter (which is reasonably simple). Also,
26
* more care will need to go into considerations how the code might be
27
* tricked by using a prepared stack (or even modifying the stack from
28
* another thread in case SMP support is added).
29
*
30
* As for the SIGALRM, the best counter measure will be to check in the
31
* kernel that the process is reporting back the SIGALRM in a timely
32
* fashion.
33
*/
34
static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS])
35
{
36
struct stub_data *d = get_stub_data();
37
int i;
38
unsigned long res;
39
int fd;
40
41
for (i = 0; i < d->syscall_data_len; i++) {
42
struct stub_syscall *sc = &d->syscall_data[i];
43
44
switch (sc->syscall) {
45
case STUB_SYSCALL_MMAP:
46
if (fd_map)
47
fd = fd_map[sc->mem.fd];
48
else
49
fd = sc->mem.fd;
50
51
res = stub_syscall6(STUB_MMAP_NR,
52
sc->mem.addr, sc->mem.length,
53
sc->mem.prot,
54
MAP_SHARED | MAP_FIXED,
55
fd, sc->mem.offset);
56
if (res != sc->mem.addr) {
57
d->err = res;
58
d->syscall_data_len = i;
59
return -1;
60
}
61
break;
62
case STUB_SYSCALL_MUNMAP:
63
res = stub_syscall2(__NR_munmap,
64
sc->mem.addr, sc->mem.length);
65
if (res) {
66
d->err = res;
67
d->syscall_data_len = i;
68
return -1;
69
}
70
break;
71
default:
72
d->err = -95; /* EOPNOTSUPP */
73
d->syscall_data_len = i;
74
return -1;
75
}
76
}
77
78
d->err = 0;
79
d->syscall_data_len = 0;
80
81
return 0;
82
}
83
84
void __section(".__syscall_stub")
85
stub_syscall_handler(void)
86
{
87
syscall_handler(NULL);
88
89
trap_myself();
90
}
91
92
void __section(".__syscall_stub")
93
stub_signal_interrupt(int sig, siginfo_t *info, void *p)
94
{
95
struct stub_data *d = get_stub_data();
96
char rcv_data;
97
union {
98
char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)];
99
struct cmsghdr align;
100
} ctrl = {};
101
struct iovec iov = {
102
.iov_base = &rcv_data,
103
.iov_len = 1,
104
};
105
struct msghdr msghdr = {
106
.msg_iov = &iov,
107
.msg_iovlen = 1,
108
.msg_control = &ctrl,
109
.msg_controllen = sizeof(ctrl),
110
};
111
ucontext_t *uc = p;
112
struct cmsghdr *fd_msg;
113
int *fd_map;
114
int num_fds;
115
long res;
116
117
d->signal = sig;
118
d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0];
119
d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0];
120
121
restart_wait:
122
d->futex = FUTEX_IN_KERN;
123
do {
124
res = stub_syscall3(__NR_futex, (unsigned long)&d->futex,
125
FUTEX_WAKE, 1);
126
} while (res == -EINTR);
127
128
do {
129
res = stub_syscall4(__NR_futex, (unsigned long)&d->futex,
130
FUTEX_WAIT, FUTEX_IN_KERN, 0);
131
} while (res == -EINTR || d->futex == FUTEX_IN_KERN);
132
133
if (res < 0 && res != -EAGAIN)
134
stub_syscall1(__NR_exit_group, 1);
135
136
if (d->syscall_data_len) {
137
/* Read passed FDs (if any) */
138
do {
139
res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0);
140
} while (res == -EINTR);
141
142
/* We should never have a receive error (other than -EAGAIN) */
143
if (res < 0 && res != -EAGAIN)
144
stub_syscall1(__NR_exit_group, 1);
145
146
/* Receive the FDs */
147
num_fds = 0;
148
fd_msg = msghdr.msg_control;
149
fd_map = (void *)&CMSG_DATA(fd_msg);
150
if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr))
151
num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
152
153
/* Try running queued syscalls. */
154
res = syscall_handler(fd_map);
155
156
while (num_fds)
157
stub_syscall2(__NR_close, fd_map[--num_fds], 0);
158
} else {
159
res = 0;
160
}
161
162
if (res < 0 || d->restart_wait) {
163
/* Report SIGSYS if we restart. */
164
d->signal = SIGSYS;
165
d->restart_wait = 0;
166
167
goto restart_wait;
168
}
169
170
/* Restore arch dependent state that is not part of the mcontext */
171
stub_seccomp_restore_state(&d->arch_data);
172
173
/* Return so that the host modified mcontext is restored. */
174
}
175
176
void __section(".__syscall_stub")
177
stub_signal_restorer(void)
178
{
179
/* We must not have anything on the stack when doing rt_sigreturn */
180
stub_syscall0(__NR_rt_sigreturn);
181
}
182
183