Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/capsicum-test/linux.cc
39475 views
1
// Tests of Linux-specific functionality
2
#ifdef __linux__
3
4
#include <sys/types.h>
5
#include <sys/stat.h>
6
#include <sys/socket.h>
7
#include <sys/timerfd.h>
8
#include <sys/signalfd.h>
9
#include <sys/eventfd.h>
10
#include <sys/epoll.h>
11
#include <sys/inotify.h>
12
#include <sys/fanotify.h>
13
#include <sys/mman.h>
14
#include <sys/capability.h> // Requires e.g. libcap-dev package for POSIX.1e capabilities headers
15
#include <linux/aio_abi.h>
16
#include <linux/filter.h>
17
#include <linux/seccomp.h>
18
#include <linux/version.h>
19
#include <poll.h>
20
#include <sched.h>
21
#include <signal.h>
22
#include <fcntl.h>
23
#include <unistd.h>
24
25
#include <string>
26
27
#include "capsicum.h"
28
#include "syscalls.h"
29
#include "capsicum-test.h"
30
31
TEST(Linux, TimerFD) {
32
int fd = timerfd_create(CLOCK_MONOTONIC, 0);
33
34
cap_rights_t r_ro;
35
cap_rights_init(&r_ro, CAP_READ);
36
cap_rights_t r_wo;
37
cap_rights_init(&r_wo, CAP_WRITE);
38
cap_rights_t r_rw;
39
cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
40
cap_rights_t r_rwpoll;
41
cap_rights_init(&r_rwpoll, CAP_READ, CAP_WRITE, CAP_EVENT);
42
43
int cap_fd_ro = dup(fd);
44
EXPECT_OK(cap_fd_ro);
45
EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_ro));
46
int cap_fd_wo = dup(fd);
47
EXPECT_OK(cap_fd_wo);
48
EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_wo));
49
int cap_fd_rw = dup(fd);
50
EXPECT_OK(cap_fd_rw);
51
EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rw));
52
int cap_fd_all = dup(fd);
53
EXPECT_OK(cap_fd_all);
54
EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwpoll));
55
56
struct itimerspec old_ispec;
57
struct itimerspec ispec;
58
ispec.it_interval.tv_sec = 0;
59
ispec.it_interval.tv_nsec = 0;
60
ispec.it_value.tv_sec = 0;
61
ispec.it_value.tv_nsec = 100000000; // 100ms
62
EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_ro, 0, &ispec, NULL));
63
EXPECT_NOTCAPABLE(timerfd_settime(cap_fd_wo, 0, &ispec, &old_ispec));
64
EXPECT_OK(timerfd_settime(cap_fd_wo, 0, &ispec, NULL));
65
EXPECT_OK(timerfd_settime(cap_fd_rw, 0, &ispec, NULL));
66
EXPECT_OK(timerfd_settime(cap_fd_all, 0, &ispec, NULL));
67
68
EXPECT_NOTCAPABLE(timerfd_gettime(cap_fd_wo, &old_ispec));
69
EXPECT_OK(timerfd_gettime(cap_fd_ro, &old_ispec));
70
EXPECT_OK(timerfd_gettime(cap_fd_rw, &old_ispec));
71
EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
72
73
// To be able to poll() for the timer pop, still need CAP_EVENT.
74
struct pollfd poll_fd;
75
for (int ii = 0; ii < 3; ii++) {
76
poll_fd.revents = 0;
77
poll_fd.events = POLLIN;
78
switch (ii) {
79
case 0: poll_fd.fd = cap_fd_ro; break;
80
case 1: poll_fd.fd = cap_fd_wo; break;
81
case 2: poll_fd.fd = cap_fd_rw; break;
82
}
83
// Poll immediately returns with POLLNVAL
84
EXPECT_OK(poll(&poll_fd, 1, 400));
85
EXPECT_EQ(0, (poll_fd.revents & POLLIN));
86
EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
87
}
88
89
poll_fd.fd = cap_fd_all;
90
EXPECT_OK(poll(&poll_fd, 1, 400));
91
EXPECT_NE(0, (poll_fd.revents & POLLIN));
92
EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
93
94
EXPECT_OK(timerfd_gettime(cap_fd_all, &old_ispec));
95
EXPECT_EQ(0, old_ispec.it_value.tv_sec);
96
EXPECT_EQ(0, old_ispec.it_value.tv_nsec);
97
EXPECT_EQ(0, old_ispec.it_interval.tv_sec);
98
EXPECT_EQ(0, old_ispec.it_interval.tv_nsec);
99
100
close(cap_fd_all);
101
close(cap_fd_rw);
102
close(cap_fd_wo);
103
close(cap_fd_ro);
104
close(fd);
105
}
106
107
FORK_TEST(Linux, SignalFDIfSingleThreaded) {
108
if (force_mt) {
109
GTEST_SKIP() << "multi-threaded run clashes with signals";
110
}
111
pid_t me = getpid();
112
sigset_t mask;
113
sigemptyset(&mask);
114
sigaddset(&mask, SIGUSR1);
115
116
// Block signals before registering against a new signal FD.
117
EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
118
int fd = signalfd(-1, &mask, 0);
119
EXPECT_OK(fd);
120
121
cap_rights_t r_rs;
122
cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
123
cap_rights_t r_ws;
124
cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
125
cap_rights_t r_sig;
126
cap_rights_init(&r_sig, CAP_FSIGNAL);
127
cap_rights_t r_rssig;
128
cap_rights_init(&r_rssig, CAP_FSIGNAL, CAP_READ, CAP_SEEK);
129
cap_rights_t r_rssig_poll;
130
cap_rights_init(&r_rssig_poll, CAP_FSIGNAL, CAP_READ, CAP_SEEK, CAP_EVENT);
131
132
// Various capability variants.
133
int cap_fd_none = dup(fd);
134
EXPECT_OK(cap_fd_none);
135
EXPECT_OK(cap_rights_limit(cap_fd_none, &r_ws));
136
int cap_fd_read = dup(fd);
137
EXPECT_OK(cap_fd_read);
138
EXPECT_OK(cap_rights_limit(cap_fd_read, &r_rs));
139
int cap_fd_sig = dup(fd);
140
EXPECT_OK(cap_fd_sig);
141
EXPECT_OK(cap_rights_limit(cap_fd_sig, &r_sig));
142
int cap_fd_sig_read = dup(fd);
143
EXPECT_OK(cap_fd_sig_read);
144
EXPECT_OK(cap_rights_limit(cap_fd_sig_read, &r_rssig));
145
int cap_fd_all = dup(fd);
146
EXPECT_OK(cap_fd_all);
147
EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rssig_poll));
148
149
struct signalfd_siginfo fdsi;
150
151
// Need CAP_READ to read the signal information
152
kill(me, SIGUSR1);
153
EXPECT_NOTCAPABLE(read(cap_fd_none, &fdsi, sizeof(struct signalfd_siginfo)));
154
EXPECT_NOTCAPABLE(read(cap_fd_sig, &fdsi, sizeof(struct signalfd_siginfo)));
155
int len = read(cap_fd_read, &fdsi, sizeof(struct signalfd_siginfo));
156
EXPECT_OK(len);
157
EXPECT_EQ(sizeof(struct signalfd_siginfo), (size_t)len);
158
EXPECT_EQ(SIGUSR1, (int)fdsi.ssi_signo);
159
160
// Need CAP_FSIGNAL to modify the signal mask.
161
sigemptyset(&mask);
162
sigaddset(&mask, SIGUSR1);
163
sigaddset(&mask, SIGUSR2);
164
EXPECT_OK(sigprocmask(SIG_BLOCK, &mask, NULL));
165
EXPECT_NOTCAPABLE(signalfd(cap_fd_none, &mask, 0));
166
EXPECT_NOTCAPABLE(signalfd(cap_fd_read, &mask, 0));
167
EXPECT_EQ(cap_fd_sig, signalfd(cap_fd_sig, &mask, 0));
168
169
// Need CAP_EVENT to get notification of a signal in poll(2).
170
kill(me, SIGUSR2);
171
172
struct pollfd poll_fd;
173
poll_fd.revents = 0;
174
poll_fd.events = POLLIN;
175
poll_fd.fd = cap_fd_sig_read;
176
EXPECT_OK(poll(&poll_fd, 1, 400));
177
EXPECT_EQ(0, (poll_fd.revents & POLLIN));
178
EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
179
180
poll_fd.fd = cap_fd_all;
181
EXPECT_OK(poll(&poll_fd, 1, 400));
182
EXPECT_NE(0, (poll_fd.revents & POLLIN));
183
EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
184
}
185
186
TEST(Linux, EventFD) {
187
int fd = eventfd(0, 0);
188
EXPECT_OK(fd);
189
190
cap_rights_t r_rs;
191
cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
192
cap_rights_t r_ws;
193
cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
194
cap_rights_t r_rws;
195
cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
196
cap_rights_t r_rwspoll;
197
cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
198
199
int cap_ro = dup(fd);
200
EXPECT_OK(cap_ro);
201
EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
202
int cap_wo = dup(fd);
203
EXPECT_OK(cap_wo);
204
EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
205
int cap_rw = dup(fd);
206
EXPECT_OK(cap_rw);
207
EXPECT_OK(cap_rights_limit(cap_rw, &r_rws));
208
int cap_all = dup(fd);
209
EXPECT_OK(cap_all);
210
EXPECT_OK(cap_rights_limit(cap_all, &r_rwspoll));
211
212
pid_t child = fork();
213
if (child == 0) {
214
// Child: write counter to eventfd
215
uint64_t u = 42;
216
EXPECT_NOTCAPABLE(write(cap_ro, &u, sizeof(u)));
217
EXPECT_OK(write(cap_wo, &u, sizeof(u)));
218
exit(HasFailure());
219
}
220
221
sleep(1); // Allow child to write
222
223
struct pollfd poll_fd;
224
poll_fd.revents = 0;
225
poll_fd.events = POLLIN;
226
poll_fd.fd = cap_rw;
227
EXPECT_OK(poll(&poll_fd, 1, 400));
228
EXPECT_EQ(0, (poll_fd.revents & POLLIN));
229
EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
230
231
poll_fd.fd = cap_all;
232
EXPECT_OK(poll(&poll_fd, 1, 400));
233
EXPECT_NE(0, (poll_fd.revents & POLLIN));
234
EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
235
236
uint64_t u;
237
EXPECT_NOTCAPABLE(read(cap_wo, &u, sizeof(u)));
238
EXPECT_OK(read(cap_ro, &u, sizeof(u)));
239
EXPECT_EQ(42, (int)u);
240
241
// Wait for the child.
242
int status;
243
EXPECT_EQ(child, waitpid(child, &status, 0));
244
int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
245
EXPECT_EQ(0, rc);
246
247
close(cap_all);
248
close(cap_rw);
249
close(cap_wo);
250
close(cap_ro);
251
close(fd);
252
}
253
254
FORK_TEST(Linux, epoll) {
255
int sock_fds[2];
256
EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds));
257
// Queue some data.
258
char buffer[4] = {1, 2, 3, 4};
259
EXPECT_OK(write(sock_fds[1], buffer, sizeof(buffer)));
260
261
EXPECT_OK(cap_enter()); // Enter capability mode.
262
263
int epoll_fd = epoll_create(1);
264
EXPECT_OK(epoll_fd);
265
266
cap_rights_t r_rs;
267
cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
268
cap_rights_t r_ws;
269
cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
270
cap_rights_t r_rws;
271
cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
272
cap_rights_t r_rwspoll;
273
cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
274
cap_rights_t r_epoll;
275
cap_rights_init(&r_epoll, CAP_EPOLL_CTL);
276
277
int cap_epoll_wo = dup(epoll_fd);
278
EXPECT_OK(cap_epoll_wo);
279
EXPECT_OK(cap_rights_limit(cap_epoll_wo, &r_ws));
280
int cap_epoll_ro = dup(epoll_fd);
281
EXPECT_OK(cap_epoll_ro);
282
EXPECT_OK(cap_rights_limit(cap_epoll_ro, &r_rs));
283
int cap_epoll_rw = dup(epoll_fd);
284
EXPECT_OK(cap_epoll_rw);
285
EXPECT_OK(cap_rights_limit(cap_epoll_rw, &r_rws));
286
int cap_epoll_poll = dup(epoll_fd);
287
EXPECT_OK(cap_epoll_poll);
288
EXPECT_OK(cap_rights_limit(cap_epoll_poll, &r_rwspoll));
289
int cap_epoll_ctl = dup(epoll_fd);
290
EXPECT_OK(cap_epoll_ctl);
291
EXPECT_OK(cap_rights_limit(cap_epoll_ctl, &r_epoll));
292
293
// Can only modify the FDs being monitored if the CAP_EPOLL_CTL right is present.
294
struct epoll_event eev;
295
memset(&eev, 0, sizeof(eev));
296
eev.events = EPOLLIN|EPOLLOUT|EPOLLPRI;
297
EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_ADD, sock_fds[0], &eev));
298
EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_ADD, sock_fds[0], &eev));
299
EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_ADD, sock_fds[0], &eev));
300
EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_ADD, sock_fds[0], &eev));
301
eev.events = EPOLLIN|EPOLLOUT;
302
EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_MOD, sock_fds[0], &eev));
303
EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_MOD, sock_fds[0], &eev));
304
EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_MOD, sock_fds[0], &eev));
305
EXPECT_OK(epoll_ctl(cap_epoll_ctl, EPOLL_CTL_MOD, sock_fds[0], &eev));
306
307
// Running epoll_pwait(2) requires CAP_EVENT.
308
eev.events = 0;
309
EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_ro, &eev, 1, 100, NULL));
310
EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_wo, &eev, 1, 100, NULL));
311
EXPECT_NOTCAPABLE(epoll_pwait(cap_epoll_rw, &eev, 1, 100, NULL));
312
EXPECT_OK(epoll_pwait(cap_epoll_poll, &eev, 1, 100, NULL));
313
EXPECT_EQ(EPOLLIN, eev.events & EPOLLIN);
314
315
EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_ro, EPOLL_CTL_DEL, sock_fds[0], &eev));
316
EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_wo, EPOLL_CTL_DEL, sock_fds[0], &eev));
317
EXPECT_NOTCAPABLE(epoll_ctl(cap_epoll_rw, EPOLL_CTL_DEL, sock_fds[0], &eev));
318
EXPECT_OK(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, sock_fds[0], &eev));
319
320
close(cap_epoll_ctl);
321
close(cap_epoll_poll);
322
close(cap_epoll_rw);
323
close(cap_epoll_ro);
324
close(cap_epoll_wo);
325
close(epoll_fd);
326
close(sock_fds[1]);
327
close(sock_fds[0]);
328
}
329
330
TEST(Linux, fstatat) {
331
int fd = open(TmpFile("cap_fstatat"), O_CREAT|O_RDWR, 0644);
332
EXPECT_OK(fd);
333
unsigned char buffer[] = {1, 2, 3, 4};
334
EXPECT_OK(write(fd, buffer, sizeof(buffer)));
335
cap_rights_t rights;
336
int cap_rf = dup(fd);
337
EXPECT_OK(cap_rf);
338
EXPECT_OK(cap_rights_limit(cap_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
339
int cap_ro = dup(fd);
340
EXPECT_OK(cap_ro);
341
EXPECT_OK(cap_rights_limit(cap_ro, cap_rights_init(&rights, CAP_READ)));
342
343
struct stat info;
344
EXPECT_OK(fstatat(fd, "", &info, AT_EMPTY_PATH));
345
EXPECT_NOTCAPABLE(fstatat(cap_ro, "", &info, AT_EMPTY_PATH));
346
EXPECT_OK(fstatat(cap_rf, "", &info, AT_EMPTY_PATH));
347
348
close(cap_ro);
349
close(cap_rf);
350
close(fd);
351
352
int dir = open(tmpdir.c_str(), O_RDONLY);
353
EXPECT_OK(dir);
354
int dir_rf = dup(dir);
355
EXPECT_OK(dir_rf);
356
EXPECT_OK(cap_rights_limit(dir_rf, cap_rights_init(&rights, CAP_READ, CAP_FSTAT)));
357
int dir_ro = dup(fd);
358
EXPECT_OK(dir_ro);
359
EXPECT_OK(cap_rights_limit(dir_ro, cap_rights_init(&rights, CAP_READ)));
360
361
EXPECT_OK(fstatat(dir, "cap_fstatat", &info, AT_EMPTY_PATH));
362
EXPECT_NOTCAPABLE(fstatat(dir_ro, "cap_fstatat", &info, AT_EMPTY_PATH));
363
EXPECT_OK(fstatat(dir_rf, "cap_fstatat", &info, AT_EMPTY_PATH));
364
365
close(dir_ro);
366
close(dir_rf);
367
close(dir);
368
369
unlink(TmpFile("cap_fstatat"));
370
}
371
372
// fanotify support may not be available at compile-time
373
#ifdef __NR_fanotify_init
374
TEST(Linux, FanotifyIfRoot) {
375
GTEST_SKIP_IF_NOT_ROOT();
376
int fa_fd = fanotify_init(FAN_CLASS_NOTIF, O_RDWR);
377
EXPECT_OK(fa_fd);
378
if (fa_fd < 0) return; // May not be enabled
379
380
cap_rights_t r_rs;
381
cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
382
cap_rights_t r_ws;
383
cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
384
cap_rights_t r_rws;
385
cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
386
cap_rights_t r_rwspoll;
387
cap_rights_init(&r_rwspoll, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_EVENT);
388
cap_rights_t r_rwsnotify;
389
cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
390
cap_rights_t r_rsl;
391
cap_rights_init(&r_rsl, CAP_READ, CAP_SEEK, CAP_LOOKUP);
392
cap_rights_t r_rslstat;
393
cap_rights_init(&r_rslstat, CAP_READ, CAP_SEEK, CAP_LOOKUP, CAP_FSTAT);
394
cap_rights_t r_rsstat;
395
cap_rights_init(&r_rsstat, CAP_READ, CAP_SEEK, CAP_FSTAT);
396
397
int cap_fd_ro = dup(fa_fd);
398
EXPECT_OK(cap_fd_ro);
399
EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
400
int cap_fd_wo = dup(fa_fd);
401
EXPECT_OK(cap_fd_wo);
402
EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
403
int cap_fd_rw = dup(fa_fd);
404
EXPECT_OK(cap_fd_rw);
405
EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
406
int cap_fd_poll = dup(fa_fd);
407
EXPECT_OK(cap_fd_poll);
408
EXPECT_OK(cap_rights_limit(cap_fd_poll, &r_rwspoll));
409
int cap_fd_not = dup(fa_fd);
410
EXPECT_OK(cap_fd_not);
411
EXPECT_OK(cap_rights_limit(cap_fd_not, &r_rwsnotify));
412
413
int rc = mkdir(TmpFile("cap_notify"), 0755);
414
EXPECT_TRUE(rc == 0 || errno == EEXIST);
415
int dfd = open(TmpFile("cap_notify"), O_RDONLY);
416
EXPECT_OK(dfd);
417
int fd = open(TmpFile("cap_notify/file"), O_CREAT|O_RDWR, 0644);
418
close(fd);
419
int cap_dfd = dup(dfd);
420
EXPECT_OK(cap_dfd);
421
EXPECT_OK(cap_rights_limit(cap_dfd, &r_rslstat));
422
EXPECT_OK(cap_dfd);
423
int cap_dfd_rs = dup(dfd);
424
EXPECT_OK(cap_dfd_rs);
425
EXPECT_OK(cap_rights_limit(cap_dfd_rs, &r_rs));
426
EXPECT_OK(cap_dfd_rs);
427
int cap_dfd_rsstat = dup(dfd);
428
EXPECT_OK(cap_dfd_rsstat);
429
EXPECT_OK(cap_rights_limit(cap_dfd_rsstat, &r_rsstat));
430
EXPECT_OK(cap_dfd_rsstat);
431
int cap_dfd_rsl = dup(dfd);
432
EXPECT_OK(cap_dfd_rsl);
433
EXPECT_OK(cap_rights_limit(cap_dfd_rsl, &r_rsl));
434
EXPECT_OK(cap_dfd_rsl);
435
436
// Need CAP_NOTIFY to change what's monitored.
437
EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_ro, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
438
EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_wo, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
439
EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_rw, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
440
EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd, NULL));
441
442
// Need CAP_FSTAT on the thing monitored.
443
EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rs, NULL));
444
EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY|FAN_EVENT_ON_CHILD, cap_dfd_rsstat, NULL));
445
446
// Too add monitoring of a file under a dfd, need CAP_LOOKUP|CAP_FSTAT on the dfd.
447
EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsstat, "file"));
448
EXPECT_NOTCAPABLE(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd_rsl, "file"));
449
EXPECT_OK(fanotify_mark(cap_fd_not, FAN_MARK_ADD, FAN_OPEN|FAN_MODIFY, cap_dfd, "file"));
450
451
pid_t child = fork();
452
if (child == 0) {
453
// Child: Perform activity in the directory under notify.
454
sleep(1);
455
unlink(TmpFile("cap_notify/temp"));
456
int fd = open(TmpFile("cap_notify/temp"), O_CREAT|O_RDWR, 0644);
457
close(fd);
458
exit(0);
459
}
460
461
// Need CAP_EVENT to poll.
462
struct pollfd poll_fd;
463
poll_fd.revents = 0;
464
poll_fd.events = POLLIN;
465
poll_fd.fd = cap_fd_rw;
466
EXPECT_OK(poll(&poll_fd, 1, 1400));
467
EXPECT_EQ(0, (poll_fd.revents & POLLIN));
468
EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
469
470
poll_fd.fd = cap_fd_not;
471
EXPECT_OK(poll(&poll_fd, 1, 1400));
472
EXPECT_EQ(0, (poll_fd.revents & POLLIN));
473
EXPECT_NE(0, (poll_fd.revents & POLLNVAL));
474
475
poll_fd.fd = cap_fd_poll;
476
EXPECT_OK(poll(&poll_fd, 1, 1400));
477
EXPECT_NE(0, (poll_fd.revents & POLLIN));
478
EXPECT_EQ(0, (poll_fd.revents & POLLNVAL));
479
480
// Need CAP_READ to read.
481
struct fanotify_event_metadata ev;
482
memset(&ev, 0, sizeof(ev));
483
EXPECT_NOTCAPABLE(read(cap_fd_wo, &ev, sizeof(ev)));
484
rc = read(fa_fd, &ev, sizeof(ev));
485
EXPECT_OK(rc);
486
EXPECT_EQ((int)sizeof(struct fanotify_event_metadata), rc);
487
EXPECT_EQ(child, ev.pid);
488
EXPECT_NE(0, ev.fd);
489
490
// TODO(drysdale): reinstate if/when capsicum-linux propagates rights
491
// to fanotify-generated FDs.
492
#ifdef OMIT
493
// fanotify(7) gives us a FD for the changed file. This should
494
// only have rights that are a subset of those for the original
495
// monitored directory file descriptor.
496
cap_rights_t rights;
497
CAP_SET_ALL(&rights);
498
EXPECT_OK(cap_rights_get(ev.fd, &rights));
499
EXPECT_RIGHTS_IN(&rights, &r_rslstat);
500
#endif
501
502
// Wait for the child.
503
int status;
504
EXPECT_EQ(child, waitpid(child, &status, 0));
505
rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
506
EXPECT_EQ(0, rc);
507
508
close(cap_dfd_rsstat);
509
close(cap_dfd_rsl);
510
close(cap_dfd_rs);
511
close(cap_dfd);
512
close(dfd);
513
unlink(TmpFile("cap_notify/file"));
514
unlink(TmpFile("cap_notify/temp"));
515
rmdir(TmpFile("cap_notify"));
516
close(cap_fd_not);
517
close(cap_fd_poll);
518
close(cap_fd_rw);
519
close(cap_fd_wo);
520
close(cap_fd_ro);
521
close(fa_fd);
522
}
523
#endif
524
525
TEST(Linux, inotify) {
526
int i_fd = inotify_init();
527
EXPECT_OK(i_fd);
528
529
cap_rights_t r_rs;
530
cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
531
cap_rights_t r_ws;
532
cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
533
cap_rights_t r_rws;
534
cap_rights_init(&r_rws, CAP_READ, CAP_WRITE, CAP_SEEK);
535
cap_rights_t r_rwsnotify;
536
cap_rights_init(&r_rwsnotify, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_NOTIFY);
537
538
int cap_fd_ro = dup(i_fd);
539
EXPECT_OK(cap_fd_ro);
540
EXPECT_OK(cap_rights_limit(cap_fd_ro, &r_rs));
541
int cap_fd_wo = dup(i_fd);
542
EXPECT_OK(cap_fd_wo);
543
EXPECT_OK(cap_rights_limit(cap_fd_wo, &r_ws));
544
int cap_fd_rw = dup(i_fd);
545
EXPECT_OK(cap_fd_rw);
546
EXPECT_OK(cap_rights_limit(cap_fd_rw, &r_rws));
547
int cap_fd_all = dup(i_fd);
548
EXPECT_OK(cap_fd_all);
549
EXPECT_OK(cap_rights_limit(cap_fd_all, &r_rwsnotify));
550
551
int fd = open(TmpFile("cap_inotify"), O_CREAT|O_RDWR, 0644);
552
EXPECT_NOTCAPABLE(inotify_add_watch(cap_fd_rw, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY));
553
int wd = inotify_add_watch(i_fd, TmpFile("cap_inotify"), IN_ACCESS|IN_MODIFY);
554
EXPECT_OK(wd);
555
556
unsigned char buffer[] = {1, 2, 3, 4};
557
EXPECT_OK(write(fd, buffer, sizeof(buffer)));
558
559
struct inotify_event iev;
560
memset(&iev, 0, sizeof(iev));
561
EXPECT_NOTCAPABLE(read(cap_fd_wo, &iev, sizeof(iev)));
562
int rc = read(cap_fd_ro, &iev, sizeof(iev));
563
EXPECT_OK(rc);
564
EXPECT_EQ((int)sizeof(iev), rc);
565
EXPECT_EQ(wd, iev.wd);
566
567
EXPECT_NOTCAPABLE(inotify_rm_watch(cap_fd_wo, wd));
568
EXPECT_OK(inotify_rm_watch(cap_fd_all, wd));
569
570
close(fd);
571
close(cap_fd_all);
572
close(cap_fd_rw);
573
close(cap_fd_wo);
574
close(cap_fd_ro);
575
close(i_fd);
576
unlink(TmpFile("cap_inotify"));
577
}
578
579
TEST(Linux, ArchChangeIfAvailable) {
580
const char* prog_candidates[] = {"./mini-me.32", "./mini-me.x32", "./mini-me.64"};
581
const char* progs[] = {NULL, NULL, NULL};
582
char* argv_pass[] = {(char*)"to-come", (char*)"--capmode", NULL};
583
char* null_envp[] = {NULL};
584
int fds[3];
585
int count = 0;
586
587
for (int ii = 0; ii < 3; ii++) {
588
fds[count] = open(prog_candidates[ii], O_RDONLY);
589
if (fds[count] >= 0) {
590
progs[count] = prog_candidates[ii];
591
count++;
592
}
593
}
594
if (count == 0) {
595
GTEST_SKIP() << "no different-architecture programs available";
596
}
597
598
for (int ii = 0; ii < count; ii++) {
599
// Fork-and-exec a binary of this architecture.
600
pid_t child = fork();
601
if (child == 0) {
602
EXPECT_OK(cap_enter()); // Enter capability mode
603
if (verbose) fprintf(stderr, "[%d] call fexecve(%s, %s)\n",
604
getpid_(), progs[ii], argv_pass[1]);
605
argv_pass[0] = (char *)progs[ii];
606
int rc = fexecve_(fds[ii], argv_pass, null_envp);
607
fprintf(stderr, "fexecve(%s) returned %d errno %d\n", progs[ii], rc, errno);
608
exit(99); // Should not reach here.
609
}
610
int status;
611
EXPECT_EQ(child, waitpid(child, &status, 0));
612
int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
613
EXPECT_EQ(0, rc);
614
close(fds[ii]);
615
}
616
}
617
618
FORK_TEST(Linux, NamespaceIfRoot) {
619
GTEST_SKIP_IF_NOT_ROOT();
620
pid_t me = getpid_();
621
622
// Create a new UTS namespace.
623
EXPECT_OK(unshare(CLONE_NEWUTS));
624
// Open an FD to its symlink.
625
char buffer[256];
626
sprintf(buffer, "/proc/%d/ns/uts", me);
627
int ns_fd = open(buffer, O_RDONLY);
628
629
cap_rights_t r_rwlstat;
630
cap_rights_init(&r_rwlstat, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT);
631
cap_rights_t r_rwlstatns;
632
cap_rights_init(&r_rwlstatns, CAP_READ, CAP_WRITE, CAP_LOOKUP, CAP_FSTAT, CAP_SETNS);
633
634
int cap_fd = dup(ns_fd);
635
EXPECT_OK(cap_fd);
636
EXPECT_OK(cap_rights_limit(cap_fd, &r_rwlstat));
637
int cap_fd_setns = dup(ns_fd);
638
EXPECT_OK(cap_fd_setns);
639
EXPECT_OK(cap_rights_limit(cap_fd_setns, &r_rwlstatns));
640
EXPECT_NOTCAPABLE(setns(cap_fd, CLONE_NEWUTS));
641
EXPECT_OK(setns(cap_fd_setns, CLONE_NEWUTS));
642
643
EXPECT_OK(cap_enter()); // Enter capability mode.
644
645
// No setns(2) but unshare(2) is allowed.
646
EXPECT_CAPMODE(setns(ns_fd, CLONE_NEWUTS));
647
EXPECT_OK(unshare(CLONE_NEWUTS));
648
}
649
650
static void SendFD(int fd, int over) {
651
struct msghdr mh;
652
mh.msg_name = NULL; // No address needed
653
mh.msg_namelen = 0;
654
char buffer1[1024];
655
struct iovec iov[1];
656
iov[0].iov_base = buffer1;
657
iov[0].iov_len = sizeof(buffer1);
658
mh.msg_iov = iov;
659
mh.msg_iovlen = 1;
660
char buffer2[1024];
661
mh.msg_control = buffer2;
662
mh.msg_controllen = CMSG_LEN(sizeof(int));
663
struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
664
cmptr->cmsg_level = SOL_SOCKET;
665
cmptr->cmsg_type = SCM_RIGHTS;
666
cmptr->cmsg_len = CMSG_LEN(sizeof(int));
667
*(int *)CMSG_DATA(cmptr) = fd;
668
buffer1[0] = 0;
669
iov[0].iov_len = 1;
670
int rc = sendmsg(over, &mh, 0);
671
EXPECT_OK(rc);
672
}
673
674
static int ReceiveFD(int over) {
675
struct msghdr mh;
676
mh.msg_name = NULL; // No address needed
677
mh.msg_namelen = 0;
678
char buffer1[1024];
679
struct iovec iov[1];
680
iov[0].iov_base = buffer1;
681
iov[0].iov_len = sizeof(buffer1);
682
mh.msg_iov = iov;
683
mh.msg_iovlen = 1;
684
char buffer2[1024];
685
mh.msg_control = buffer2;
686
mh.msg_controllen = sizeof(buffer2);
687
int rc = recvmsg(over, &mh, 0);
688
EXPECT_OK(rc);
689
EXPECT_LE(CMSG_LEN(sizeof(int)), mh.msg_controllen);
690
struct cmsghdr *cmptr = CMSG_FIRSTHDR(&mh);
691
int fd = *(int*)CMSG_DATA(cmptr);
692
EXPECT_EQ(CMSG_LEN(sizeof(int)), cmptr->cmsg_len);
693
cmptr = CMSG_NXTHDR(&mh, cmptr);
694
EXPECT_TRUE(cmptr == NULL);
695
return fd;
696
}
697
698
static int shared_pd = -1;
699
static int shared_sock_fds[2];
700
701
static int ChildFunc(void *arg) {
702
// This function is running in a new PID namespace, and so is pid 1.
703
if (verbose) fprintf(stderr, " ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
704
EXPECT_EQ(1, getpid_());
705
EXPECT_EQ(0, getppid());
706
707
// The shared process descriptor is outside our namespace, so we cannot
708
// get its pid.
709
if (verbose) fprintf(stderr, " ChildFunc: shared_pd=%d\n", shared_pd);
710
pid_t shared_child = -1;
711
EXPECT_OK(pdgetpid(shared_pd, &shared_child));
712
if (verbose) fprintf(stderr, " ChildFunc: corresponding pid=%d\n", shared_child);
713
EXPECT_EQ(0, shared_child);
714
715
// But we can pdkill() it even so.
716
if (verbose) fprintf(stderr, " ChildFunc: call pdkill(pd=%d)\n", shared_pd);
717
EXPECT_OK(pdkill(shared_pd, SIGINT));
718
719
int pd;
720
pid_t child = pdfork(&pd, 0);
721
EXPECT_OK(child);
722
if (child == 0) {
723
// Child: expect pid 2.
724
if (verbose) fprintf(stderr, " child of ChildFunc: pid=%d, ppid=%d\n", getpid_(), getppid());
725
EXPECT_EQ(2, getpid_());
726
EXPECT_EQ(1, getppid());
727
while (true) {
728
if (verbose) fprintf(stderr, " child of ChildFunc: \"I aten't dead\"\n");
729
sleep(1);
730
}
731
exit(0);
732
}
733
EXPECT_EQ(2, child);
734
EXPECT_PID_ALIVE(child);
735
if (verbose) fprintf(stderr, " ChildFunc: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
736
pd, child, ProcessState(child));
737
738
pid_t pid;
739
EXPECT_OK(pdgetpid(pd, &pid));
740
EXPECT_EQ(child, pid);
741
742
sleep(2);
743
744
// Send the process descriptor over UNIX domain socket back to parent.
745
SendFD(pd, shared_sock_fds[1]);
746
747
// Wait for death of (grand)child, killed by our parent.
748
if (verbose) fprintf(stderr, " ChildFunc: wait on pid=%d\n", child);
749
int status;
750
EXPECT_EQ(child, wait4(child, &status, __WALL, NULL));
751
752
if (verbose) fprintf(stderr, " ChildFunc: return 0\n");
753
return 0;
754
}
755
756
#define STACK_SIZE (1024 * 1024)
757
static char child_stack[STACK_SIZE];
758
759
// TODO(drysdale): fork into a user namespace first so GTEST_SKIP_IF_NOT_ROOT can be removed.
760
TEST(Linux, PidNamespacePdForkIfRoot) {
761
GTEST_SKIP_IF_NOT_ROOT();
762
// Pass process descriptors in both directions across a PID namespace boundary.
763
// pdfork() off a child before we start, holding its process descriptor in a global
764
// variable that's accessible to children.
765
pid_t firstborn = pdfork(&shared_pd, 0);
766
EXPECT_OK(firstborn);
767
if (firstborn == 0) {
768
while (true) {
769
if (verbose) fprintf(stderr, " Firstborn: \"I aten't dead\"\n");
770
sleep(1);
771
}
772
exit(0);
773
}
774
EXPECT_PID_ALIVE(firstborn);
775
if (verbose) fprintf(stderr, "Parent: pre-pdfork()ed pd=%d, pid=%d state='%c'\n",
776
shared_pd, firstborn, ProcessState(firstborn));
777
sleep(2);
778
779
// Prepare sockets to communicate with child process.
780
EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
781
782
// Clone into a child process with a new pid namespace.
783
pid_t child = clone(ChildFunc, child_stack + STACK_SIZE,
784
CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
785
EXPECT_OK(child);
786
EXPECT_PID_ALIVE(child);
787
if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
788
789
// Ensure the child runs. First thing it does is to kill our firstborn, using shared_pd.
790
sleep(1);
791
EXPECT_PID_DEAD(firstborn);
792
793
// But we can still retrieve firstborn's PID, as it's not been reaped yet.
794
pid_t child0;
795
EXPECT_OK(pdgetpid(shared_pd, &child0));
796
EXPECT_EQ(firstborn, child0);
797
if (verbose) fprintf(stderr, "Parent: check on firstborn: pdgetpid(pd=%d) -> child=%d state='%c'\n",
798
shared_pd, child0, ProcessState(child0));
799
800
// Now reap it.
801
int status;
802
EXPECT_EQ(firstborn, waitpid(firstborn, &status, __WALL));
803
804
// Get the process descriptor of the child-of-child via socket transfer.
805
int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
806
807
// Our notion of the pid associated with the grandchild is in the main PID namespace.
808
pid_t grandchild;
809
EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
810
EXPECT_NE(2, grandchild);
811
if (verbose) fprintf(stderr, "Parent: pre-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
812
grandchild_pd, grandchild, ProcessState(grandchild));
813
EXPECT_PID_ALIVE(grandchild);
814
815
// Kill the grandchild via the process descriptor.
816
EXPECT_OK(pdkill(grandchild_pd, SIGINT));
817
usleep(10000);
818
if (verbose) fprintf(stderr, "Parent: post-pdkill: pdgetpid(grandchild_pd=%d) -> grandchild=%d state='%c'\n",
819
grandchild_pd, grandchild, ProcessState(grandchild));
820
EXPECT_PID_DEAD(grandchild);
821
822
sleep(2);
823
824
// Wait for the child.
825
EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
826
int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
827
EXPECT_EQ(0, rc);
828
829
close(shared_sock_fds[0]);
830
close(shared_sock_fds[1]);
831
close(shared_pd);
832
close(grandchild_pd);
833
}
834
835
int NSInit(void *data) {
836
// This function is running in a new PID namespace, and so is pid 1.
837
if (verbose) fprintf(stderr, " NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
838
EXPECT_EQ(1, getpid_());
839
EXPECT_EQ(0, getppid());
840
841
int pd;
842
pid_t child = pdfork(&pd, 0);
843
EXPECT_OK(child);
844
if (child == 0) {
845
// Child: loop forever until terminated.
846
if (verbose) fprintf(stderr, " child of NSInit: pid=%d, ppid=%d\n", getpid_(), getppid());
847
while (true) {
848
if (verbose) fprintf(stderr, " child of NSInit: \"I aten't dead\"\n");
849
usleep(100000);
850
}
851
exit(0);
852
}
853
EXPECT_EQ(2, child);
854
EXPECT_PID_ALIVE(child);
855
if (verbose) fprintf(stderr, " NSInit: pdfork() -> pd=%d, corresponding pid=%d state='%c'\n",
856
pd, child, ProcessState(child));
857
sleep(1);
858
859
// Send the process descriptor over UNIX domain socket back to parent.
860
SendFD(pd, shared_sock_fds[1]);
861
close(pd);
862
863
// Wait for a byte back in the other direction.
864
int value;
865
if (verbose) fprintf(stderr, " NSInit: block waiting for value\n");
866
read(shared_sock_fds[1], &value, sizeof(value));
867
868
if (verbose) fprintf(stderr, " NSInit: return 0\n");
869
return 0;
870
}
871
872
TEST(Linux, DeadNSInitIfRoot) {
873
GTEST_SKIP_IF_NOT_ROOT();
874
875
// Prepare sockets to communicate with child process.
876
EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
877
878
// Clone into a child process with a new pid namespace.
879
pid_t child = clone(NSInit, child_stack + STACK_SIZE,
880
CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
881
usleep(10000);
882
EXPECT_OK(child);
883
EXPECT_PID_ALIVE(child);
884
if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
885
886
// Get the process descriptor of the child-of-child via socket transfer.
887
int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
888
pid_t grandchild;
889
EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
890
if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
891
892
// Send an int to the child to trigger its termination. Grandchild should also
893
// go, as its init process is gone.
894
int zero = 0;
895
if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
896
write(shared_sock_fds[0], &zero, sizeof(zero));
897
EXPECT_PID_ZOMBIE(child);
898
EXPECT_PID_GONE(grandchild);
899
900
// Wait for the child.
901
int status;
902
EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
903
int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
904
EXPECT_EQ(0, rc);
905
EXPECT_PID_GONE(child);
906
907
close(shared_sock_fds[0]);
908
close(shared_sock_fds[1]);
909
close(grandchild_pd);
910
911
if (verbose) {
912
fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
913
fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
914
}
915
}
916
917
TEST(Linux, DeadNSInit2IfRoot) {
918
GTEST_SKIP_IF_NOT_ROOT();
919
920
// Prepare sockets to communicate with child process.
921
EXPECT_OK(socketpair(AF_UNIX, SOCK_STREAM, 0, shared_sock_fds));
922
923
// Clone into a child process with a new pid namespace.
924
pid_t child = clone(NSInit, child_stack + STACK_SIZE,
925
CLONE_FILES|CLONE_NEWPID|SIGCHLD, NULL);
926
usleep(10000);
927
EXPECT_OK(child);
928
EXPECT_PID_ALIVE(child);
929
if (verbose) fprintf(stderr, "Parent: child is %d state='%c'\n", child, ProcessState(child));
930
931
// Get the process descriptor of the child-of-child via socket transfer.
932
int grandchild_pd = ReceiveFD(shared_sock_fds[0]);
933
pid_t grandchild;
934
EXPECT_OK(pdgetpid(grandchild_pd, &grandchild));
935
if (verbose) fprintf(stderr, "Parent: grandchild is %d state='%c'\n", grandchild, ProcessState(grandchild));
936
937
// Kill the grandchild
938
EXPECT_OK(pdkill(grandchild_pd, SIGINT));
939
usleep(10000);
940
EXPECT_PID_ZOMBIE(grandchild);
941
// Close the process descriptor, so there are now no procdesc references to grandchild.
942
close(grandchild_pd);
943
944
// Send an int to the child to trigger its termination. Grandchild should also
945
// go, as its init process is gone.
946
int zero = 0;
947
if (verbose) fprintf(stderr, "Parent: write 0 to pipe\n");
948
write(shared_sock_fds[0], &zero, sizeof(zero));
949
EXPECT_PID_ZOMBIE(child);
950
EXPECT_PID_GONE(grandchild);
951
952
// Wait for the child.
953
int status;
954
EXPECT_EQ(child, waitpid(child, &status, WNOHANG));
955
int rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
956
EXPECT_EQ(0, rc);
957
958
close(shared_sock_fds[0]);
959
close(shared_sock_fds[1]);
960
961
if (verbose) {
962
fprintf(stderr, "Parent: child %d in state='%c'\n", child, ProcessState(child));
963
fprintf(stderr, "Parent: grandchild %d in state='%c'\n", grandchild, ProcessState(grandchild));
964
}
965
}
966
967
#ifdef __x86_64__
968
FORK_TEST(Linux, CheckHighWord) {
969
EXPECT_OK(cap_enter()); // Enter capability mode.
970
971
int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
972
EXPECT_OK(rc);
973
EXPECT_EQ(1, rc); // no_new_privs = 1
974
975
// Set some of the high 32-bits of argument zero.
976
uint64_t big_cmd = PR_GET_NO_NEW_PRIVS | 0x100000000LL;
977
EXPECT_CAPMODE(syscall(__NR_prctl, big_cmd, 0, 0, 0, 0));
978
}
979
#endif
980
981
FORK_TEST(Linux, PrctlOpenatBeneath) {
982
// Set no_new_privs = 1
983
EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
984
int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
985
EXPECT_OK(rc);
986
EXPECT_EQ(1, rc); // no_new_privs = 1
987
988
// Set openat-beneath mode
989
EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 1, 0, 0, 0));
990
rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
991
EXPECT_OK(rc);
992
EXPECT_EQ(1, rc); // openat_beneath = 1
993
994
// Clear openat-beneath mode
995
EXPECT_OK(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
996
rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
997
EXPECT_OK(rc);
998
EXPECT_EQ(0, rc); // openat_beneath = 0
999
1000
EXPECT_OK(cap_enter()); // Enter capability mode
1001
1002
// Expect to be in openat_beneath mode
1003
rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
1004
EXPECT_OK(rc);
1005
EXPECT_EQ(1, rc); // openat_beneath = 1
1006
1007
// Expect this to be immutable.
1008
EXPECT_CAPMODE(prctl(PR_SET_OPENAT_BENEATH, 0, 0, 0, 0));
1009
rc = prctl(PR_GET_OPENAT_BENEATH, 0, 0, 0, 0);
1010
EXPECT_OK(rc);
1011
EXPECT_EQ(1, rc); // openat_beneath = 1
1012
1013
}
1014
1015
FORK_TEST(Linux, NoNewPrivs) {
1016
if (getuid() == 0) {
1017
// If root, drop CAP_SYS_ADMIN POSIX.1e capability.
1018
struct __user_cap_header_struct hdr;
1019
hdr.version = _LINUX_CAPABILITY_VERSION_3;
1020
hdr.pid = getpid_();
1021
struct __user_cap_data_struct data[3];
1022
EXPECT_OK(capget(&hdr, &data[0]));
1023
data[0].effective &= ~(1 << CAP_SYS_ADMIN);
1024
data[0].permitted &= ~(1 << CAP_SYS_ADMIN);
1025
data[0].inheritable &= ~(1 << CAP_SYS_ADMIN);
1026
EXPECT_OK(capset(&hdr, &data[0]));
1027
}
1028
int rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1029
EXPECT_OK(rc);
1030
EXPECT_EQ(0, rc); // no_new_privs == 0
1031
1032
// Can't enter seccomp-bpf mode with no_new_privs == 0
1033
struct sock_filter filter[] = {
1034
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
1035
};
1036
struct sock_fprog bpf;
1037
bpf.len = (sizeof(filter) / sizeof(filter[0]));
1038
bpf.filter = filter;
1039
rc = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0);
1040
EXPECT_EQ(-1, rc);
1041
EXPECT_EQ(EACCES, errno);
1042
1043
// Set no_new_privs = 1
1044
EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
1045
rc = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1046
EXPECT_OK(rc);
1047
EXPECT_EQ(1, rc); // no_new_privs = 1
1048
1049
// Can now turn on seccomp mode
1050
EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
1051
}
1052
1053
/* Macros for BPF generation */
1054
#define BPF_RETURN_ERRNO(err) \
1055
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (err & 0xFFFF))
1056
#define BPF_KILL_PROCESS \
1057
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
1058
#define BPF_ALLOW \
1059
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
1060
#define EXAMINE_SYSCALL \
1061
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr))
1062
#define ALLOW_SYSCALL(name) \
1063
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1064
BPF_ALLOW
1065
#define KILL_SYSCALL(name) \
1066
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1067
BPF_KILL_PROCESS
1068
#define FAIL_SYSCALL(name, err) \
1069
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_##name, 0, 1), \
1070
BPF_RETURN_ERRNO(err)
1071
1072
TEST(Linux, CapModeWithBPF) {
1073
pid_t child = fork();
1074
EXPECT_OK(child);
1075
if (child == 0) {
1076
int fd = open(TmpFile("cap_bpf_capmode"), O_CREAT|O_RDWR, 0644);
1077
cap_rights_t rights;
1078
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
1079
EXPECT_OK(cap_rights_limit(fd, &rights));
1080
1081
struct sock_filter filter[] = { EXAMINE_SYSCALL,
1082
FAIL_SYSCALL(fchmod, ENOMEM),
1083
FAIL_SYSCALL(fstat, ENOEXEC),
1084
ALLOW_SYSCALL(close),
1085
KILL_SYSCALL(fsync),
1086
BPF_ALLOW };
1087
struct sock_fprog bpf = {.len = (sizeof(filter) / sizeof(filter[0])),
1088
.filter = filter};
1089
// Set up seccomp-bpf first.
1090
EXPECT_OK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
1091
EXPECT_OK(prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bpf, 0, 0));
1092
1093
EXPECT_OK(cap_enter()); // Enter capability mode.
1094
1095
// fchmod is allowed by Capsicum, but failed by BPF.
1096
EXPECT_SYSCALL_FAIL(ENOMEM, fchmod(fd, 0644));
1097
// open is allowed by BPF, but failed by Capsicum
1098
EXPECT_SYSCALL_FAIL(ECAPMODE, open(TmpFile("cap_bpf_capmode"), O_RDONLY));
1099
// fstat is failed by both BPF and Capsicum; tie-break is on errno
1100
struct stat buf;
1101
EXPECT_SYSCALL_FAIL(ENOEXEC, fstat(fd, &buf));
1102
// fsync is allowed by Capsicum, but BPF's SIGSYS generation take precedence
1103
fsync(fd); // terminate with unhandled SIGSYS
1104
exit(0);
1105
}
1106
int status;
1107
EXPECT_EQ(child, waitpid(child, &status, 0));
1108
EXPECT_TRUE(WIFSIGNALED(status));
1109
EXPECT_EQ(SIGSYS, WTERMSIG(status));
1110
unlink(TmpFile("cap_bpf_capmode"));
1111
}
1112
1113
TEST(Linux, AIO) {
1114
int fd = open(TmpFile("cap_aio"), O_CREAT|O_RDWR, 0644);
1115
EXPECT_OK(fd);
1116
1117
cap_rights_t r_rs;
1118
cap_rights_init(&r_rs, CAP_READ, CAP_SEEK);
1119
cap_rights_t r_ws;
1120
cap_rights_init(&r_ws, CAP_WRITE, CAP_SEEK);
1121
cap_rights_t r_rwssync;
1122
cap_rights_init(&r_rwssync, CAP_READ, CAP_WRITE, CAP_SEEK, CAP_FSYNC);
1123
1124
int cap_ro = dup(fd);
1125
EXPECT_OK(cap_ro);
1126
EXPECT_OK(cap_rights_limit(cap_ro, &r_rs));
1127
EXPECT_OK(cap_ro);
1128
int cap_wo = dup(fd);
1129
EXPECT_OK(cap_wo);
1130
EXPECT_OK(cap_rights_limit(cap_wo, &r_ws));
1131
EXPECT_OK(cap_wo);
1132
int cap_all = dup(fd);
1133
EXPECT_OK(cap_all);
1134
EXPECT_OK(cap_rights_limit(cap_all, &r_rwssync));
1135
EXPECT_OK(cap_all);
1136
1137
// Linux: io_setup, io_submit, io_getevents, io_cancel, io_destroy
1138
aio_context_t ctx = 0;
1139
EXPECT_OK(syscall(__NR_io_setup, 10, &ctx));
1140
1141
unsigned char buffer[32] = {1, 2, 3, 4};
1142
struct iocb req;
1143
memset(&req, 0, sizeof(req));
1144
req.aio_reqprio = 0;
1145
req.aio_fildes = fd;
1146
uintptr_t bufaddr = (uintptr_t)buffer;
1147
req.aio_buf = (__u64)bufaddr;
1148
req.aio_nbytes = 4;
1149
req.aio_offset = 0;
1150
struct iocb* reqs[1] = {&req};
1151
1152
// Write operation
1153
req.aio_lio_opcode = IOCB_CMD_PWRITE;
1154
req.aio_fildes = cap_ro;
1155
EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1156
req.aio_fildes = cap_wo;
1157
EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
1158
1159
// Sync operation
1160
req.aio_lio_opcode = IOCB_CMD_FSYNC;
1161
EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1162
req.aio_lio_opcode = IOCB_CMD_FDSYNC;
1163
EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1164
// Even with CAP_FSYNC, turns out fsync/fdsync aren't implemented
1165
req.aio_fildes = cap_all;
1166
EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1167
req.aio_lio_opcode = IOCB_CMD_FSYNC;
1168
EXPECT_FAIL_NOT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1169
1170
// Read operation
1171
req.aio_lio_opcode = IOCB_CMD_PREAD;
1172
req.aio_fildes = cap_wo;
1173
EXPECT_NOTCAPABLE(syscall(__NR_io_submit, ctx, 1, reqs));
1174
req.aio_fildes = cap_ro;
1175
EXPECT_OK(syscall(__NR_io_submit, ctx, 1, reqs));
1176
1177
EXPECT_OK(syscall(__NR_io_destroy, ctx));
1178
1179
close(cap_all);
1180
close(cap_wo);
1181
close(cap_ro);
1182
close(fd);
1183
unlink(TmpFile("cap_aio"));
1184
}
1185
1186
#ifndef KCMP_FILE
1187
#define KCMP_FILE 0
1188
#endif
1189
TEST(Linux, KcmpIfAvailable) {
1190
// This requires CONFIG_CHECKPOINT_RESTORE in kernel config.
1191
int fd = open("/etc/passwd", O_RDONLY);
1192
EXPECT_OK(fd);
1193
pid_t parent = getpid_();
1194
1195
errno = 0;
1196
int rc = syscall(__NR_kcmp, parent, parent, KCMP_FILE, fd, fd);
1197
if (rc == -1 && errno == ENOSYS) {
1198
GTEST_SKIP() << "kcmp(2) gives -ENOSYS";
1199
}
1200
1201
pid_t child = fork();
1202
if (child == 0) {
1203
// Child: limit rights on FD.
1204
child = getpid_();
1205
EXPECT_OK(syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
1206
cap_rights_t rights;
1207
cap_rights_init(&rights, CAP_READ, CAP_WRITE);
1208
EXPECT_OK(cap_rights_limit(fd, &rights));
1209
// A capability wrapping a normal FD is different (from a kcmp(2) perspective)
1210
// than the original file.
1211
EXPECT_NE(0, syscall(__NR_kcmp, parent, child, KCMP_FILE, fd, fd));
1212
exit(HasFailure());
1213
}
1214
// Wait for the child.
1215
int status;
1216
EXPECT_EQ(child, waitpid(child, &status, 0));
1217
rc = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
1218
EXPECT_EQ(0, rc);
1219
1220
close(fd);
1221
}
1222
1223
TEST(Linux, ProcFS) {
1224
cap_rights_t rights;
1225
cap_rights_init(&rights, CAP_READ, CAP_SEEK);
1226
int fd = open("/etc/passwd", O_RDONLY);
1227
EXPECT_OK(fd);
1228
lseek(fd, 4, SEEK_SET);
1229
int cap = dup(fd);
1230
EXPECT_OK(cap);
1231
EXPECT_OK(cap_rights_limit(cap, &rights));
1232
pid_t me = getpid_();
1233
1234
char buffer[1024];
1235
sprintf(buffer, "/proc/%d/fdinfo/%d", me, cap);
1236
int procfd = open(buffer, O_RDONLY);
1237
EXPECT_OK(procfd) << " failed to open " << buffer;
1238
if (procfd < 0) return;
1239
int proccap = dup(procfd);
1240
EXPECT_OK(proccap);
1241
EXPECT_OK(cap_rights_limit(proccap, &rights));
1242
1243
EXPECT_OK(read(proccap, buffer, sizeof(buffer)));
1244
// The fdinfo should include the file pos of the underlying file
1245
EXPECT_NE((char*)NULL, strstr(buffer, "pos:\t4"));
1246
// ...and the rights of the Capsicum capability.
1247
EXPECT_NE((char*)NULL, strstr(buffer, "rights:\t0x"));
1248
1249
close(procfd);
1250
close(proccap);
1251
close(cap);
1252
close(fd);
1253
}
1254
1255
FORK_TEST(Linux, ProcessClocks) {
1256
pid_t self = getpid_();
1257
pid_t child = fork();
1258
EXPECT_OK(child);
1259
if (child == 0) {
1260
child = getpid_();
1261
usleep(100000);
1262
exit(0);
1263
}
1264
1265
EXPECT_OK(cap_enter()); // Enter capability mode.
1266
1267
// Nefariously build a clock ID for the child's CPU time.
1268
// This relies on knowledge of the internal layout of clock IDs.
1269
clockid_t child_clock;
1270
child_clock = ((~child) << 3) | 0x0;
1271
struct timespec ts;
1272
memset(&ts, 0, sizeof(ts));
1273
1274
// TODO(drysdale): Should not be possible to retrieve info about a
1275
// different process, as the PID global namespace should be locked
1276
// down.
1277
EXPECT_OK(clock_gettime(child_clock, &ts));
1278
if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(child=%d->0x%08x) is %ld.%09ld \n",
1279
self, child, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
1280
1281
child_clock = ((~1) << 3) | 0x0;
1282
memset(&ts, 0, sizeof(ts));
1283
EXPECT_OK(clock_gettime(child_clock, &ts));
1284
if (verbose) fprintf(stderr, "[parent: %d] clock_gettime(init=1->0x%08x) is %ld.%09ld \n",
1285
self, child_clock, (long)ts.tv_sec, (long)ts.tv_nsec);
1286
1287
// Orphan the child.
1288
}
1289
1290
TEST(Linux, SetLease) {
1291
int fd_all = open(TmpFile("cap_lease"), O_CREAT|O_RDWR, 0644);
1292
EXPECT_OK(fd_all);
1293
int fd_rw = dup(fd_all);
1294
EXPECT_OK(fd_rw);
1295
1296
cap_rights_t r_all;
1297
cap_rights_init(&r_all, CAP_READ, CAP_WRITE, CAP_FLOCK, CAP_FSIGNAL);
1298
EXPECT_OK(cap_rights_limit(fd_all, &r_all));
1299
1300
cap_rights_t r_rw;
1301
cap_rights_init(&r_rw, CAP_READ, CAP_WRITE);
1302
EXPECT_OK(cap_rights_limit(fd_rw, &r_rw));
1303
1304
EXPECT_NOTCAPABLE(fcntl(fd_rw, F_SETLEASE, F_WRLCK));
1305
EXPECT_NOTCAPABLE(fcntl(fd_rw, F_GETLEASE));
1306
1307
if (!tmpdir_on_tmpfs) { // tmpfs doesn't support leases
1308
EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_WRLCK));
1309
EXPECT_EQ(F_WRLCK, fcntl(fd_all, F_GETLEASE));
1310
1311
EXPECT_OK(fcntl(fd_all, F_SETLEASE, F_UNLCK, 0));
1312
EXPECT_EQ(F_UNLCK, fcntl(fd_all, F_GETLEASE));
1313
}
1314
close(fd_all);
1315
close(fd_rw);
1316
unlink(TmpFile("cap_lease"));
1317
}
1318
1319
TEST(Linux, InvalidRightsSyscall) {
1320
int fd = open(TmpFile("cap_invalid_rights"), O_RDONLY|O_CREAT, 0644);
1321
EXPECT_OK(fd);
1322
1323
cap_rights_t rights;
1324
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FCHMOD, CAP_FSTAT);
1325
1326
// Use the raw syscall throughout.
1327
EXPECT_EQ(0, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1328
1329
// Directly access the syscall, and find all unseemly manner of use for it.
1330
// - Invalid flags
1331
EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 1));
1332
EXPECT_EQ(EINVAL, errno);
1333
// - Specify an fcntl subright, but no CAP_FCNTL set
1334
EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, CAP_FCNTL_GETFL, 0, NULL, 0));
1335
EXPECT_EQ(EINVAL, errno);
1336
// - Specify an ioctl subright, but no CAP_IOCTL set
1337
unsigned int ioctl1 = 1;
1338
EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, &ioctl1, 0));
1339
EXPECT_EQ(EINVAL, errno);
1340
// - N ioctls, but null pointer passed
1341
EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 1, NULL, 0));
1342
EXPECT_EQ(EINVAL, errno);
1343
// - Invalid nioctls
1344
EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, -2, NULL, 0));
1345
EXPECT_EQ(EINVAL, errno);
1346
// - Null primary rights
1347
EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, NULL, 0, 0, NULL, 0));
1348
EXPECT_EQ(EFAULT, errno);
1349
// - Invalid index bitmask
1350
rights.cr_rights[0] |= 3ULL << 57;
1351
EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1352
EXPECT_EQ(EINVAL, errno);
1353
// - Invalid version
1354
rights.cr_rights[0] |= 2ULL << 62;
1355
EXPECT_EQ(-1, syscall(__NR_cap_rights_limit, fd, &rights, 0, 0, NULL, 0));
1356
EXPECT_EQ(EINVAL, errno);
1357
1358
close(fd);
1359
unlink(TmpFile("cap_invalid_rights"));
1360
}
1361
1362
FORK_TEST_ON(Linux, OpenByHandleAtIfRoot, TmpFile("cap_openbyhandle_testfile")) {
1363
GTEST_SKIP_IF_NOT_ROOT();
1364
int dir = open(tmpdir.c_str(), O_RDONLY);
1365
EXPECT_OK(dir);
1366
int fd = openat(dir, "cap_openbyhandle_testfile", O_RDWR|O_CREAT, 0644);
1367
EXPECT_OK(fd);
1368
const char* message = "Saved text";
1369
EXPECT_OK(write(fd, message, strlen(message)));
1370
close(fd);
1371
1372
struct file_handle* fhandle = (struct file_handle*)malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
1373
fhandle->handle_bytes = MAX_HANDLE_SZ;
1374
int mount_id;
1375
EXPECT_OK(name_to_handle_at(dir, "cap_openbyhandle_testfile", fhandle, &mount_id, 0));
1376
1377
fd = open_by_handle_at(dir, fhandle, O_RDONLY);
1378
EXPECT_OK(fd);
1379
char buffer[200];
1380
ssize_t len = read(fd, buffer, 199);
1381
EXPECT_OK(len);
1382
EXPECT_EQ(std::string(message), std::string(buffer, len));
1383
close(fd);
1384
1385
// Cannot issue open_by_handle_at after entering capability mode.
1386
cap_enter();
1387
EXPECT_CAPMODE(open_by_handle_at(dir, fhandle, O_RDONLY));
1388
1389
close(dir);
1390
}
1391
1392
int getrandom_(void *buf, size_t buflen, unsigned int flags) {
1393
#ifdef __NR_getrandom
1394
return syscall(__NR_getrandom, buf, buflen, flags);
1395
#else
1396
errno = ENOSYS;
1397
return -1;
1398
#endif
1399
}
1400
1401
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
1402
#include <linux/random.h> // Requires 3.17 kernel
1403
FORK_TEST(Linux, GetRandom) {
1404
EXPECT_OK(cap_enter());
1405
unsigned char buffer[1024];
1406
unsigned char buffer2[1024];
1407
EXPECT_OK(getrandom_(buffer, sizeof(buffer), GRND_NONBLOCK));
1408
EXPECT_OK(getrandom_(buffer2, sizeof(buffer2), GRND_NONBLOCK));
1409
EXPECT_NE(0, memcmp(buffer, buffer2, sizeof(buffer)));
1410
}
1411
#endif
1412
1413
int memfd_create_(const char *name, unsigned int flags) {
1414
#ifdef __NR_memfd_create
1415
return syscall(__NR_memfd_create, name, flags);
1416
#else
1417
errno = ENOSYS;
1418
return -1;
1419
#endif
1420
}
1421
1422
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
1423
#include <linux/memfd.h> // Requires 3.17 kernel
1424
TEST(Linux, MemFDDeathTestIfAvailable) {
1425
int memfd = memfd_create_("capsicum-test", MFD_ALLOW_SEALING);
1426
if (memfd == -1 && errno == ENOSYS) {
1427
GTEST_SKIP() << "memfd_create(2) gives -ENOSYS";
1428
}
1429
const int LEN = 16;
1430
EXPECT_OK(ftruncate(memfd, LEN));
1431
int memfd_ro = dup(memfd);
1432
int memfd_rw = dup(memfd);
1433
EXPECT_OK(memfd_ro);
1434
EXPECT_OK(memfd_rw);
1435
cap_rights_t rights;
1436
EXPECT_OK(cap_rights_limit(memfd_ro, cap_rights_init(&rights, CAP_MMAP_R, CAP_FSTAT)));
1437
EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW, CAP_FCHMOD)));
1438
1439
unsigned char *p_ro = (unsigned char *)mmap(NULL, LEN, PROT_READ, MAP_SHARED, memfd_ro, 0);
1440
EXPECT_NE((unsigned char *)MAP_FAILED, p_ro);
1441
unsigned char *p_rw = (unsigned char *)mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_rw, 0);
1442
EXPECT_NE((unsigned char *)MAP_FAILED, p_rw);
1443
EXPECT_EQ(MAP_FAILED,
1444
mmap(NULL, LEN, PROT_READ|PROT_WRITE, MAP_SHARED, memfd_ro, 0));
1445
1446
*p_rw = 42;
1447
EXPECT_EQ(42, *p_ro);
1448
EXPECT_DEATH(*p_ro = 42, "");
1449
1450
#ifndef F_ADD_SEALS
1451
// Hack for when libc6 does not yet include the updated linux/fcntl.h from kernel 3.17
1452
#define _F_LINUX_SPECIFIC_BASE F_SETLEASE
1453
#define F_ADD_SEALS (_F_LINUX_SPECIFIC_BASE + 9)
1454
#define F_GET_SEALS (_F_LINUX_SPECIFIC_BASE + 10)
1455
#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
1456
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
1457
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
1458
#define F_SEAL_WRITE 0x0008 /* prevent writes */
1459
#endif
1460
1461
// Reading the seal information requires CAP_FSTAT.
1462
int seals = fcntl(memfd, F_GET_SEALS);
1463
EXPECT_OK(seals);
1464
if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
1465
int seals_ro = fcntl(memfd_ro, F_GET_SEALS);
1466
EXPECT_EQ(seals, seals_ro);
1467
if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
1468
int seals_rw = fcntl(memfd_rw, F_GET_SEALS);
1469
EXPECT_NOTCAPABLE(seals_rw);
1470
1471
// Fail to seal as a writable mapping exists.
1472
EXPECT_EQ(-1, fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1473
EXPECT_EQ(EBUSY, errno);
1474
*p_rw = 42;
1475
1476
// Seal the rw version; need to unmap first.
1477
munmap(p_rw, LEN);
1478
munmap(p_ro, LEN);
1479
EXPECT_OK(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1480
1481
seals = fcntl(memfd, F_GET_SEALS);
1482
EXPECT_OK(seals);
1483
if (verbose) fprintf(stderr, "seals are %08x on base fd\n", seals);
1484
seals_ro = fcntl(memfd_ro, F_GET_SEALS);
1485
EXPECT_EQ(seals, seals_ro);
1486
if (verbose) fprintf(stderr, "seals are %08x on read-only fd\n", seals_ro);
1487
1488
// Remove the CAP_FCHMOD right, can no longer add seals.
1489
EXPECT_OK(cap_rights_limit(memfd_rw, cap_rights_init(&rights, CAP_MMAP_RW)));
1490
EXPECT_NOTCAPABLE(fcntl(memfd_rw, F_ADD_SEALS, F_SEAL_WRITE));
1491
1492
close(memfd);
1493
close(memfd_ro);
1494
close(memfd_rw);
1495
}
1496
#endif
1497
1498
#else
1499
void noop() {}
1500
#endif
1501
1502