Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/kern/kern_jaildesc.c
105152 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2025 James Gritton.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26
* SUCH DAMAGE.
27
*/
28
29
#include <sys/param.h>
30
#include <sys/fcntl.h>
31
#include <sys/file.h>
32
#include <sys/filedesc.h>
33
#include <sys/kernel.h>
34
#include <sys/jail.h>
35
#include <sys/jaildesc.h>
36
#include <sys/lock.h>
37
#include <sys/malloc.h>
38
#include <sys/mutex.h>
39
#include <sys/poll.h>
40
#include <sys/priv.h>
41
#include <sys/stat.h>
42
#include <sys/sysproto.h>
43
#include <sys/systm.h>
44
#include <sys/ucred.h>
45
#include <sys/user.h>
46
#include <sys/vnode.h>
47
48
MALLOC_DEFINE(M_JAILDESC, "jaildesc", "jail descriptors");
49
50
static fo_poll_t jaildesc_poll;
51
static fo_kqfilter_t jaildesc_kqfilter;
52
static fo_stat_t jaildesc_stat;
53
static fo_close_t jaildesc_close;
54
static fo_fill_kinfo_t jaildesc_fill_kinfo;
55
static fo_cmp_t jaildesc_cmp;
56
57
static const struct fileops jaildesc_ops = {
58
.fo_read = invfo_rdwr,
59
.fo_write = invfo_rdwr,
60
.fo_truncate = invfo_truncate,
61
.fo_ioctl = invfo_ioctl,
62
.fo_poll = jaildesc_poll,
63
.fo_kqfilter = jaildesc_kqfilter,
64
.fo_stat = jaildesc_stat,
65
.fo_close = jaildesc_close,
66
.fo_chmod = invfo_chmod,
67
.fo_chown = invfo_chown,
68
.fo_sendfile = invfo_sendfile,
69
.fo_fill_kinfo = jaildesc_fill_kinfo,
70
.fo_cmp = jaildesc_cmp,
71
.fo_flags = DFLAG_PASSABLE,
72
};
73
74
/*
75
* Retrieve a prison from a jail descriptor. If prp is not NULL, then the
76
* prison will be held and subsequently returned, and must be released by the
77
* caller. This differs from jaildesc_get_prison in that it doesn't actually
78
* require the caller to take the struct prison, which we use internally when
79
* the caller doesn't necessarily need it- it might just want to check validity.
80
*/
81
static int
82
jaildesc_get_prison_impl(struct file *fp, struct prison **prp)
83
{
84
struct prison *pr;
85
struct jaildesc *jd;
86
87
if (fp->f_type != DTYPE_JAILDESC)
88
return (EINVAL);
89
90
jd = fp->f_data;
91
JAILDESC_LOCK(jd);
92
pr = jd->jd_prison;
93
if (pr == NULL || !prison_isvalid(pr)) {
94
JAILDESC_UNLOCK(jd);
95
return (ENOENT);
96
}
97
98
if (prp != NULL) {
99
prison_hold(pr);
100
*prp = pr;
101
}
102
103
JAILDESC_UNLOCK(jd);
104
105
return (0);
106
}
107
108
/*
109
* Given a jail descriptor number, return its prison and/or its
110
* credential. They are returned held, and will need to be released
111
* by the caller.
112
*/
113
int
114
jaildesc_find(struct thread *td, int fd, struct prison **prp,
115
struct ucred **ucredp)
116
{
117
struct file *fp;
118
int error;
119
120
error = fget(td, fd, &cap_no_rights, &fp);
121
if (error != 0)
122
return (error);
123
124
error = jaildesc_get_prison_impl(fp, prp);
125
if (error == 0) {
126
/*
127
* jaildesc_get_prison validated the file and held the prison
128
* for us if the caller wants it, so we just need to grab the
129
* ucred on the way out.
130
*/
131
if (ucredp != NULL)
132
*ucredp = crhold(fp->f_cred);
133
}
134
135
fdrop(fp, td);
136
return (error);
137
}
138
139
/*
140
* Allocate a new jail decriptor, not yet associated with a prison.
141
* Return the file pointer (with a reference held) and the descriptor
142
* number.
143
*/
144
int
145
jaildesc_alloc(struct thread *td, struct file **fpp, int *fdp, int owning)
146
{
147
struct file *fp;
148
struct jaildesc *jd;
149
int error;
150
151
if (owning) {
152
error = priv_check(td, PRIV_JAIL_REMOVE);
153
if (error != 0)
154
return (error);
155
}
156
jd = malloc(sizeof(*jd), M_JAILDESC, M_WAITOK | M_ZERO);
157
error = falloc_caps(td, &fp, fdp, 0, NULL);
158
if (error != 0) {
159
free(jd, M_JAILDESC);
160
return (error);
161
}
162
finit(fp, priv_check_cred(fp->f_cred, PRIV_JAIL_SET) == 0 ?
163
FREAD | FWRITE : FREAD, DTYPE_JAILDESC, jd, &jaildesc_ops);
164
JAILDESC_LOCK_INIT(jd);
165
knlist_init_mtx(&jd->jd_selinfo.si_note, &jd->jd_lock);
166
if (owning)
167
jd->jd_flags |= JDF_OWNING;
168
*fpp = fp;
169
return (0);
170
}
171
172
/*
173
* Retrieve a prison from a jail descriptor. It will be returned held, and must
174
* be released by the caller.
175
*/
176
int
177
jaildesc_get_prison(struct file *fp, struct prison **prp)
178
{
179
MPASS(prp != NULL);
180
return (jaildesc_get_prison_impl(fp, prp));
181
}
182
183
/*
184
* Assocate a jail descriptor with its prison.
185
*/
186
void
187
jaildesc_set_prison(struct file *fp, struct prison *pr)
188
{
189
struct jaildesc *jd;
190
191
mtx_assert(&pr->pr_mtx, MA_OWNED);
192
jd = fp->f_data;
193
JAILDESC_LOCK(jd);
194
jd->jd_prison = pr;
195
LIST_INSERT_HEAD(&pr->pr_descs, jd, jd_list);
196
prison_hold(pr);
197
JAILDESC_UNLOCK(jd);
198
}
199
200
/*
201
* Detach all the jail descriptors from a prison.
202
*/
203
void
204
jaildesc_prison_cleanup(struct prison *pr)
205
{
206
struct jaildesc *jd;
207
208
mtx_assert(&pr->pr_mtx, MA_OWNED);
209
while ((jd = LIST_FIRST(&pr->pr_descs))) {
210
JAILDESC_LOCK(jd);
211
LIST_REMOVE(jd, jd_list);
212
jd->jd_prison = NULL;
213
JAILDESC_UNLOCK(jd);
214
prison_free(pr);
215
}
216
}
217
218
/*
219
* Pass a note to all listening kqueues.
220
*/
221
void
222
jaildesc_knote(struct prison *pr, long hint)
223
{
224
struct jaildesc *jd;
225
int prison_locked;
226
227
if (!LIST_EMPTY(&pr->pr_descs)) {
228
prison_locked = mtx_owned(&pr->pr_mtx);
229
if (!prison_locked)
230
prison_lock(pr);
231
LIST_FOREACH(jd, &pr->pr_descs, jd_list) {
232
JAILDESC_LOCK(jd);
233
if (hint == NOTE_JAIL_REMOVE) {
234
jd->jd_flags |= JDF_REMOVED;
235
if (jd->jd_flags & JDF_SELECTED) {
236
jd->jd_flags &= ~JDF_SELECTED;
237
selwakeup(&jd->jd_selinfo);
238
}
239
}
240
KNOTE_LOCKED(&jd->jd_selinfo.si_note, hint);
241
JAILDESC_UNLOCK(jd);
242
}
243
if (!prison_locked)
244
prison_unlock(pr);
245
}
246
}
247
248
static int
249
jaildesc_close(struct file *fp, struct thread *td)
250
{
251
struct jaildesc *jd;
252
struct prison *pr;
253
254
jd = fp->f_data;
255
fp->f_data = NULL;
256
if (jd != NULL) {
257
JAILDESC_LOCK(jd);
258
pr = jd->jd_prison;
259
if (pr != NULL) {
260
/*
261
* Free or remove the associated prison.
262
* This requires a second check after re-
263
* ordering locks. This jaildesc can remain
264
* unlocked once we have a prison reference,
265
* because that prison is the only place that
266
* still points back to it.
267
*/
268
prison_hold(pr);
269
JAILDESC_UNLOCK(jd);
270
if (jd->jd_flags & JDF_OWNING) {
271
sx_xlock(&allprison_lock);
272
prison_lock(pr);
273
if (jd->jd_prison != NULL) {
274
/*
275
* Unlink the prison, but don't free
276
* it; that will be done as part of
277
* of prison_remove.
278
*/
279
LIST_REMOVE(jd, jd_list);
280
prison_remove(pr);
281
} else {
282
prison_unlock(pr);
283
sx_xunlock(&allprison_lock);
284
}
285
} else {
286
prison_lock(pr);
287
if (jd->jd_prison != NULL) {
288
LIST_REMOVE(jd, jd_list);
289
prison_free(pr);
290
}
291
prison_unlock(pr);
292
}
293
prison_free(pr);
294
}
295
knlist_destroy(&jd->jd_selinfo.si_note);
296
JAILDESC_LOCK_DESTROY(jd);
297
free(jd, M_JAILDESC);
298
}
299
return (0);
300
}
301
302
static int
303
jaildesc_poll(struct file *fp, int events, struct ucred *active_cred,
304
struct thread *td)
305
{
306
struct jaildesc *jd;
307
int revents;
308
309
revents = 0;
310
jd = fp->f_data;
311
JAILDESC_LOCK(jd);
312
if (jd->jd_flags & JDF_REMOVED)
313
revents |= POLLHUP;
314
if (revents == 0) {
315
selrecord(td, &jd->jd_selinfo);
316
jd->jd_flags |= JDF_SELECTED;
317
}
318
JAILDESC_UNLOCK(jd);
319
return (revents);
320
}
321
322
static void
323
jaildesc_kqops_detach(struct knote *kn)
324
{
325
struct jaildesc *jd;
326
327
jd = kn->kn_fp->f_data;
328
knlist_remove(&jd->jd_selinfo.si_note, kn, 0);
329
}
330
331
static int
332
jaildesc_kqops_event(struct knote *kn, long hint)
333
{
334
struct jaildesc *jd;
335
u_int event;
336
337
jd = kn->kn_fp->f_data;
338
if (hint == 0) {
339
/*
340
* Initial test after registration. Generate a
341
* NOTE_JAIL_REMOVE in case the prison already died
342
* before registration.
343
*/
344
event = jd->jd_flags & JDF_REMOVED ? NOTE_JAIL_REMOVE : 0;
345
} else {
346
/*
347
* Mask off extra data. In the NOTE_JAIL_CHILD case,
348
* that's everything except the NOTE_JAIL_CHILD bit
349
* itself, since a JID is any positive integer.
350
*/
351
event = ((u_int)hint & NOTE_JAIL_CHILD) ? NOTE_JAIL_CHILD :
352
(u_int)hint & NOTE_JAIL_CTRLMASK;
353
}
354
355
/* If the user is interested in this event, record it. */
356
if (kn->kn_sfflags & event) {
357
kn->kn_fflags |= event;
358
/* Report the created jail id or attached process id. */
359
if (event == NOTE_JAIL_CHILD || event == NOTE_JAIL_ATTACH) {
360
if (kn->kn_data != 0)
361
kn->kn_fflags |= NOTE_JAIL_MULTI;
362
kn->kn_data = (kn->kn_fflags & NOTE_JAIL_MULTI) ? 0U :
363
(u_int)hint & ~event;
364
}
365
}
366
367
/* Prison is gone, so flag the event as finished. */
368
if (event == NOTE_JAIL_REMOVE) {
369
kn->kn_flags |= EV_EOF | EV_ONESHOT;
370
if (kn->kn_fflags == 0)
371
kn->kn_flags |= EV_DROP;
372
return (1);
373
}
374
375
return (kn->kn_fflags != 0);
376
}
377
378
static const struct filterops jaildesc_kqops = {
379
.f_isfd = 1,
380
.f_detach = jaildesc_kqops_detach,
381
.f_event = jaildesc_kqops_event,
382
.f_copy = knote_triv_copy,
383
};
384
385
static int
386
jaildesc_kqfilter(struct file *fp, struct knote *kn)
387
{
388
struct jaildesc *jd;
389
390
jd = fp->f_data;
391
switch (kn->kn_filter) {
392
case EVFILT_JAILDESC:
393
kn->kn_fop = &jaildesc_kqops;
394
kn->kn_flags |= EV_CLEAR;
395
knlist_add(&jd->jd_selinfo.si_note, kn, 0);
396
return (0);
397
default:
398
return (EINVAL);
399
}
400
}
401
402
static int
403
jaildesc_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
404
{
405
struct jaildesc *jd;
406
407
bzero(sb, sizeof(struct stat));
408
jd = fp->f_data;
409
JAILDESC_LOCK(jd);
410
if (jd->jd_prison != NULL) {
411
sb->st_ino = jd->jd_prison->pr_id;
412
sb->st_mode = S_IFREG | S_IRWXU;
413
} else
414
sb->st_mode = S_IFREG;
415
JAILDESC_UNLOCK(jd);
416
return (0);
417
}
418
419
static int
420
jaildesc_fill_kinfo(struct file *fp, struct kinfo_file *kif,
421
struct filedesc *fdp)
422
{
423
struct jaildesc *jd;
424
425
jd = fp->f_data;
426
kif->kf_type = KF_TYPE_JAILDESC;
427
kif->kf_un.kf_jail.kf_jid = jd->jd_prison ? jd->jd_prison->pr_id : 0;
428
return (0);
429
}
430
431
static int
432
jaildesc_cmp(struct file *fp1, struct file *fp2, struct thread *td)
433
{
434
struct jaildesc *jd1, *jd2;
435
int jid1, jid2;
436
437
if (fp2->f_type != DTYPE_JAILDESC)
438
return (3);
439
jd1 = fp1->f_data;
440
JAILDESC_LOCK(jd1);
441
jid1 = jd1->jd_prison ? (uintptr_t)jd1->jd_prison->pr_id : 0;
442
JAILDESC_UNLOCK(jd1);
443
jd2 = fp2->f_data;
444
JAILDESC_LOCK(jd2);
445
jid2 = jd2->jd_prison ? (uintptr_t)jd2->jd_prison->pr_id : 0;
446
JAILDESC_UNLOCK(jd2);
447
return (kcmp_cmp(jid1, jid2));
448
}
449
450