Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/um/kernel/skas/stub_exe.c
26481 views
1
#include <sys/ptrace.h>
2
#include <sys/prctl.h>
3
#include <sys/fcntl.h>
4
#include <asm/unistd.h>
5
#include <sysdep/stub.h>
6
#include <stub-data.h>
7
#include <linux/filter.h>
8
#include <linux/seccomp.h>
9
#include <generated/asm-offsets.h>
10
11
void _start(void);
12
13
noinline static void real_init(void)
14
{
15
struct stub_init_data init_data;
16
unsigned long res;
17
struct {
18
void *ss_sp;
19
int ss_flags;
20
size_t ss_size;
21
} stack = {
22
.ss_size = STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
23
};
24
struct {
25
void *sa_handler_;
26
unsigned long sa_flags;
27
void *sa_restorer;
28
unsigned long long sa_mask;
29
} sa = {
30
/* Need to set SA_RESTORER (but the handler never returns) */
31
.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
32
};
33
34
/* set a nice name */
35
stub_syscall2(__NR_prctl, PR_SET_NAME, (unsigned long)"uml-userspace");
36
37
/* Make sure this process dies if the kernel dies */
38
stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
39
40
/* Needed in SECCOMP mode (and safe to do anyway) */
41
stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
42
43
/* read information from STDIN and close it */
44
res = stub_syscall3(__NR_read, 0,
45
(unsigned long)&init_data, sizeof(init_data));
46
if (res != sizeof(init_data))
47
stub_syscall1(__NR_exit, 10);
48
49
/* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */
50
if (!init_data.seccomp)
51
stub_syscall1(__NR_close, 0);
52
else
53
stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK);
54
55
/* map stub code + data */
56
res = stub_syscall6(STUB_MMAP_NR,
57
init_data.stub_start, UM_KERN_PAGE_SIZE,
58
PROT_READ | PROT_EXEC, MAP_FIXED | MAP_SHARED,
59
init_data.stub_code_fd, init_data.stub_code_offset);
60
if (res != init_data.stub_start)
61
stub_syscall1(__NR_exit, 11);
62
63
res = stub_syscall6(STUB_MMAP_NR,
64
init_data.stub_start + UM_KERN_PAGE_SIZE,
65
STUB_DATA_PAGES * UM_KERN_PAGE_SIZE,
66
PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED,
67
init_data.stub_data_fd, init_data.stub_data_offset);
68
if (res != init_data.stub_start + UM_KERN_PAGE_SIZE)
69
stub_syscall1(__NR_exit, 12);
70
71
/* In SECCOMP mode, we only need the signalling FD from now on */
72
if (init_data.seccomp) {
73
res = stub_syscall3(__NR_close_range, 1, ~0U, 0);
74
if (res != 0)
75
stub_syscall1(__NR_exit, 13);
76
}
77
78
/* setup signal stack inside stub data */
79
stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
80
stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
81
82
/* register signal handlers */
83
sa.sa_handler_ = (void *) init_data.signal_handler;
84
sa.sa_restorer = (void *) init_data.signal_restorer;
85
if (!init_data.seccomp) {
86
/* In ptrace mode, the SIGSEGV handler never returns */
87
sa.sa_mask = 0;
88
89
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
90
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
91
if (res != 0)
92
stub_syscall1(__NR_exit, 14);
93
} else {
94
/* SECCOMP mode uses rt_sigreturn, need to mask all signals */
95
sa.sa_mask = ~0ULL;
96
97
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
98
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
99
if (res != 0)
100
stub_syscall1(__NR_exit, 15);
101
102
res = stub_syscall4(__NR_rt_sigaction, SIGSYS,
103
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
104
if (res != 0)
105
stub_syscall1(__NR_exit, 16);
106
107
res = stub_syscall4(__NR_rt_sigaction, SIGALRM,
108
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
109
if (res != 0)
110
stub_syscall1(__NR_exit, 17);
111
112
res = stub_syscall4(__NR_rt_sigaction, SIGTRAP,
113
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
114
if (res != 0)
115
stub_syscall1(__NR_exit, 18);
116
117
res = stub_syscall4(__NR_rt_sigaction, SIGILL,
118
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
119
if (res != 0)
120
stub_syscall1(__NR_exit, 19);
121
122
res = stub_syscall4(__NR_rt_sigaction, SIGFPE,
123
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
124
if (res != 0)
125
stub_syscall1(__NR_exit, 20);
126
}
127
128
/*
129
* If in seccomp mode, install the SECCOMP filter and trigger a syscall.
130
* Otherwise set PTRACE_TRACEME and do a SIGSTOP.
131
*/
132
if (init_data.seccomp) {
133
struct sock_filter filter[] = {
134
#if __BITS_PER_LONG > 32
135
/* [0] Load upper 32bit of instruction pointer from seccomp_data */
136
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
137
(offsetof(struct seccomp_data, instruction_pointer) + 4)),
138
139
/* [1] Jump forward 3 instructions if the upper address is not identical */
140
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3),
141
#endif
142
/* [2] Load lower 32bit of instruction pointer from seccomp_data */
143
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
144
(offsetof(struct seccomp_data, instruction_pointer))),
145
146
/* [3] Mask out lower bits */
147
BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
148
149
/* [4] Jump to [6] if the lower bits are not on the expected page */
150
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0),
151
152
/* [5] Trap call, allow */
153
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
154
155
/* [6,7] Check architecture */
156
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
157
offsetof(struct seccomp_data, arch)),
158
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
159
UM_SECCOMP_ARCH_NATIVE, 1, 0),
160
161
/* [8] Kill (for architecture check) */
162
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
163
164
/* [9] Load syscall number */
165
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
166
offsetof(struct seccomp_data, nr)),
167
168
/* [10-16] Check against permitted syscalls */
169
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex,
170
7, 0),
171
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg,
172
6, 0),
173
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close,
174
5, 0),
175
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
176
4, 0),
177
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap,
178
3, 0),
179
#ifdef __i386__
180
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area,
181
2, 0),
182
#else
183
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl,
184
2, 0),
185
#endif
186
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn,
187
1, 0),
188
189
/* [17] Not one of the permitted syscalls */
190
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
191
192
/* [18] Permitted call for the stub */
193
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
194
};
195
struct sock_fprog prog = {
196
.len = sizeof(filter) / sizeof(filter[0]),
197
.filter = filter,
198
};
199
200
if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
201
SECCOMP_FILTER_FLAG_TSYNC,
202
(unsigned long)&prog) != 0)
203
stub_syscall1(__NR_exit, 21);
204
205
/* Fall through, the exit syscall will cause SIGSYS */
206
} else {
207
stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
208
209
stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
210
}
211
212
stub_syscall1(__NR_exit, 30);
213
214
__builtin_unreachable();
215
}
216
217
__attribute__((naked)) void _start(void)
218
{
219
/*
220
* Since the stack after exec() starts at the top-most address,
221
* but that's exactly where we also want to map the stub data
222
* and code, this must:
223
* - push the stack by 1 code and STUB_DATA_PAGES data pages
224
* - call real_init()
225
* This way, real_init() can use the stack normally, while the
226
* original stack further down (higher address) will become
227
* inaccessible after the mmap() calls above.
228
*/
229
stub_start(real_init);
230
}
231
232