Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/kern/kern_jailmeta.c
39475 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2024 SkunkWerks GmbH
5
*
6
* This software was developed by Igor Ostapenko <[email protected]>
7
* under sponsorship from SkunkWerks GmbH.
8
*/
9
10
#include <sys/param.h>
11
#include <sys/_bitset.h>
12
#include <sys/bitset.h>
13
#include <sys/lock.h>
14
#include <sys/sx.h>
15
#include <sys/kernel.h>
16
#include <sys/mount.h>
17
#include <sys/malloc.h>
18
#include <sys/jail.h>
19
#include <sys/osd.h>
20
#include <sys/proc.h>
21
22
/*
23
* Buffer limit.
24
*
25
* The hard limit is the actual value used during setting or modification. The
26
* soft limit is used solely by the security.jail.param.meta and .env sysctl. If
27
* the hard limit is decreased, the soft limit may remain higher to ensure that
28
* previously set meta strings can still be correctly interpreted by end-user
29
* interfaces, such as jls(8).
30
*/
31
32
static uint32_t jm_maxbufsize_hard = 4096;
33
static uint32_t jm_maxbufsize_soft = 4096;
34
35
static int
36
jm_sysctl_meta_maxbufsize(SYSCTL_HANDLER_ARGS)
37
{
38
int error;
39
uint32_t newmax = 0;
40
41
/* Reading only. */
42
43
if (req->newptr == NULL) {
44
sx_slock(&allprison_lock);
45
error = SYSCTL_OUT(req, &jm_maxbufsize_hard,
46
sizeof(jm_maxbufsize_hard));
47
sx_sunlock(&allprison_lock);
48
49
return (error);
50
}
51
52
/* Reading and writing. */
53
54
sx_xlock(&allprison_lock);
55
56
error = SYSCTL_OUT(req, &jm_maxbufsize_hard,
57
sizeof(jm_maxbufsize_hard));
58
if (error != 0)
59
goto end;
60
61
error = SYSCTL_IN(req, &newmax, sizeof(newmax));
62
if (error != 0)
63
goto end;
64
65
jm_maxbufsize_hard = newmax;
66
if (jm_maxbufsize_hard >= jm_maxbufsize_soft) {
67
jm_maxbufsize_soft = jm_maxbufsize_hard;
68
} else if (TAILQ_EMPTY(&allprison)) {
69
/*
70
* For now, this is the simplest way to
71
* avoid O(n) iteration over all prisons in
72
* case of a large n.
73
*/
74
jm_maxbufsize_soft = jm_maxbufsize_hard;
75
}
76
77
end:
78
sx_xunlock(&allprison_lock);
79
return (error);
80
}
81
SYSCTL_PROC(_security_jail, OID_AUTO, meta_maxbufsize,
82
CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
83
jm_sysctl_meta_maxbufsize, "IU",
84
"Maximum buffer size of each meta and env");
85
86
87
/* Jail parameter announcement. */
88
89
static int
90
jm_sysctl_param_meta(SYSCTL_HANDLER_ARGS)
91
{
92
uint32_t soft;
93
94
sx_slock(&allprison_lock);
95
soft = jm_maxbufsize_soft;
96
sx_sunlock(&allprison_lock);
97
98
return (sysctl_jail_param(oidp, arg1, soft, req));
99
}
100
SYSCTL_PROC(_security_jail_param, OID_AUTO, meta,
101
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
102
jm_sysctl_param_meta, "A,keyvalue",
103
"Jail meta information hidden from the jail");
104
SYSCTL_PROC(_security_jail_param, OID_AUTO, env,
105
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
106
jm_sysctl_param_meta, "A,keyvalue",
107
"Jail meta information readable by the jail");
108
109
110
/* Generic OSD-based logic for any metadata buffer. */
111
112
struct meta {
113
char *name;
114
u_int osd_slot;
115
osd_method_t methods[PR_MAXMETHOD];
116
};
117
118
/* A chain of hunks representing the final buffer after all manipulations. */
119
struct hunk {
120
char *p; /* a buf reference */
121
size_t len; /* number of bytes referred */
122
char *owned; /* must be freed */
123
struct hunk *next;
124
};
125
126
static inline struct hunk *
127
jm_h_alloc(void)
128
{
129
/* All fields are zeroed. */
130
return (malloc(sizeof(struct hunk), M_PRISON, M_WAITOK | M_ZERO));
131
}
132
133
static inline struct hunk *
134
jm_h_prepend(struct hunk *h, char *p, size_t len)
135
{
136
struct hunk *n;
137
138
n = jm_h_alloc();
139
n->p = p;
140
n->len = len;
141
n->next = h;
142
return (n);
143
}
144
145
static inline void
146
jm_h_cut_line(struct hunk *h, char *begin)
147
{
148
struct hunk *rem;
149
char *end;
150
151
/* Find the end of key=value. */
152
for (end = begin; (end + 1) < (h->p + h->len); end++)
153
if (*end == '\0' || *end == '\n')
154
break;
155
156
/* Pick up a non-empty remainder. */
157
if ((end + 1) < (h->p + h->len) && *(end + 1) != '\0') {
158
rem = jm_h_alloc();
159
rem->p = end + 1;
160
rem->len = h->p + h->len - rem->p;
161
162
/* insert */
163
rem->next = h->next;
164
h->next = rem;
165
}
166
167
/* Shorten this hunk. */
168
h->len = begin - h->p;
169
}
170
171
static inline void
172
jm_h_cut_occurrences(struct hunk *h, const char *key, size_t keylen)
173
{
174
char *p = h->p;
175
176
#define nexthunk() \
177
do { \
178
h = h->next; \
179
p = (h == NULL) ? NULL : h->p; \
180
} while (0)
181
182
while (p != NULL) {
183
p = strnstr(p, key, h->len - (p - h->p));
184
if (p == NULL) {
185
nexthunk();
186
continue;
187
}
188
if ((p == h->p || *(p - 1) == '\n') && p[keylen] == '=') {
189
jm_h_cut_line(h, p);
190
nexthunk();
191
continue;
192
}
193
/* Continue with this hunk. */
194
p += keylen;
195
/* Empty? The next hunk then. */
196
if ((p - h->p) >= h->len)
197
nexthunk();
198
}
199
}
200
201
static inline size_t
202
jm_h_len(struct hunk *h)
203
{
204
size_t len = 0;
205
while (h != NULL) {
206
len += h->len;
207
h = h->next;
208
}
209
return (len);
210
}
211
212
static inline void
213
jm_h_assemble(char *dst, struct hunk *h)
214
{
215
while (h != NULL) {
216
if (h->len > 0) {
217
memcpy(dst, h->p, h->len);
218
dst += h->len;
219
/* If not the last hunk then concatenate with \n. */
220
if (h->next != NULL && *(dst - 1) == '\0')
221
*(dst - 1) = '\n';
222
}
223
h = h->next;
224
}
225
}
226
227
static inline struct hunk *
228
jm_h_freechain(struct hunk *h)
229
{
230
struct hunk *n = h;
231
while (n != NULL) {
232
h = n;
233
n = h->next;
234
free(h->owned, M_PRISON);
235
free(h, M_PRISON);
236
}
237
238
return (NULL);
239
}
240
241
static int
242
jm_osd_method_set(void *obj, void *data, const struct meta *meta)
243
{
244
struct prison *pr = obj;
245
struct vfsoptlist *opts = data;
246
struct vfsopt *opt;
247
248
char *origosd;
249
char *origosd_copy;
250
char *oldosd;
251
char *osd;
252
size_t osdlen;
253
struct hunk *h;
254
char *key;
255
size_t keylen;
256
int error;
257
int repeats = 0;
258
bool repeat;
259
260
sx_assert(&allprison_lock, SA_XLOCKED);
261
262
again:
263
origosd = NULL;
264
origosd_copy = NULL;
265
osd = NULL;
266
h = NULL;
267
error = 0;
268
repeat = false;
269
TAILQ_FOREACH(opt, opts, link) {
270
/* Look for options with <metaname> prefix. */
271
if (strstr(opt->name, meta->name) != opt->name)
272
continue;
273
/* Consider only full <metaname> or <metaname>.* ones. */
274
if (opt->name[strlen(meta->name)] != '.' &&
275
opt->name[strlen(meta->name)] != '\0')
276
continue;
277
opt->seen = 1;
278
279
/* The very first preconditions. */
280
if (opt->len < 0)
281
continue;
282
if (opt->len > jm_maxbufsize_hard) {
283
error = EFBIG;
284
break;
285
}
286
/* NULL-terminated strings are expected from vfsopt. */
287
if (opt->value != NULL &&
288
((char *)opt->value)[opt->len - 1] != '\0') {
289
error = EINVAL;
290
break;
291
}
292
293
/* Work with our own copy of existing metadata. */
294
if (h == NULL) {
295
h = jm_h_alloc(); /* zeroed */
296
mtx_lock(&pr->pr_mtx);
297
origosd = osd_jail_get(pr, meta->osd_slot);
298
if (origosd != NULL) {
299
origosd_copy = malloc(strlen(origosd) + 1,
300
M_PRISON, M_NOWAIT);
301
if (origosd_copy == NULL)
302
error = ENOMEM;
303
else {
304
h->p = origosd_copy;
305
h->len = strlen(origosd) + 1;
306
memcpy(h->p, origosd, h->len);
307
}
308
}
309
mtx_unlock(&pr->pr_mtx);
310
if (error != 0)
311
break;
312
}
313
314
/* 1) Change the whole metadata. */
315
if (strcmp(opt->name, meta->name) == 0) {
316
if (opt->len > jm_maxbufsize_hard) {
317
error = EFBIG;
318
break;
319
}
320
h = jm_h_freechain(h);
321
h = jm_h_prepend(h,
322
(opt->value != NULL) ? opt->value : "",
323
/* avoid empty NULL-terminated string */
324
(opt->len > 1) ? opt->len : 0);
325
continue;
326
}
327
328
/* 2) Or add/replace/remove a specific key=value. */
329
key = opt->name + strlen(meta->name) + 1;
330
keylen = strlen(key);
331
if (keylen < 1) {
332
error = EINVAL;
333
break;
334
}
335
jm_h_cut_occurrences(h, key, keylen);
336
if (opt->value == NULL)
337
continue; /* key removal */
338
h = jm_h_prepend(h, NULL, 0);
339
h->len = keylen + 1 + opt->len; /* key=value\0 */
340
h->owned = malloc(h->len, M_PRISON, M_WAITOK | M_ZERO);
341
h->p = h->owned;
342
memcpy(h->p, key, keylen);
343
h->p[keylen] = '=';
344
memcpy(h->p + keylen + 1, opt->value, opt->len);
345
}
346
347
if (h == NULL || error != 0)
348
goto end;
349
350
/* Assemble the final contiguous buffer. */
351
osdlen = jm_h_len(h);
352
if (osdlen > jm_maxbufsize_hard) {
353
error = EFBIG;
354
goto end;
355
}
356
if (osdlen > 1) {
357
osd = malloc(osdlen, M_PRISON, M_WAITOK);
358
jm_h_assemble(osd, h);
359
osd[osdlen - 1] = '\0'; /* sealed */
360
}
361
362
/* Compare and swap the buffers. */
363
mtx_lock(&pr->pr_mtx);
364
oldosd = osd_jail_get(pr, meta->osd_slot);
365
if (oldosd == origosd) {
366
error = osd_jail_set(pr, meta->osd_slot, osd);
367
} else {
368
/*
369
* The osd(9) framework requires protection only for pr_osd,
370
* which is covered by pr_mtx. Therefore, other code might
371
* legally alter jail metadata without allprison_lock. It
372
* means that here we could override data just added by other
373
* thread. This extra caution with retry mechanism aims to
374
* prevent user data loss in such potential cases.
375
*/
376
error = EAGAIN;
377
repeat = true;
378
}
379
mtx_unlock(&pr->pr_mtx);
380
if (error == 0)
381
osd = oldosd;
382
383
end:
384
jm_h_freechain(h);
385
free(osd, M_PRISON);
386
free(origosd_copy, M_PRISON);
387
388
if (repeat && ++repeats < 3)
389
goto again;
390
391
return (error);
392
}
393
394
static int
395
jm_osd_method_get(void *obj, void *data, const struct meta *meta)
396
{
397
struct prison *pr = obj;
398
struct vfsoptlist *opts = data;
399
struct vfsopt *opt;
400
char *osd = NULL;
401
char empty = '\0';
402
int error = 0;
403
bool locked = false;
404
const char *key;
405
size_t keylen;
406
const char *p;
407
408
sx_assert(&allprison_lock, SA_SLOCKED);
409
410
TAILQ_FOREACH(opt, opts, link) {
411
if (strstr(opt->name, meta->name) != opt->name)
412
continue;
413
if (opt->name[strlen(meta->name)] != '.' &&
414
opt->name[strlen(meta->name)] != '\0')
415
continue;
416
417
if (!locked) {
418
mtx_lock(&pr->pr_mtx);
419
locked = true;
420
osd = osd_jail_get(pr, meta->osd_slot);
421
if (osd == NULL)
422
osd = &empty;
423
}
424
425
/* Provide full metadata. */
426
if (strcmp(opt->name, meta->name) == 0) {
427
if (strlcpy(opt->value, osd, opt->len) >= opt->len) {
428
error = EINVAL;
429
break;
430
}
431
opt->seen = 1;
432
continue;
433
}
434
435
/* Extract a specific key=value. */
436
p = osd;
437
key = opt->name + strlen(meta->name) + 1;
438
keylen = strlen(key);
439
while ((p = strstr(p, key)) != NULL) {
440
if ((p == osd || *(p - 1) == '\n')
441
&& p[keylen] == '=') {
442
if (strlcpy(opt->value, p + keylen + 1,
443
MIN(opt->len, strchr(p + keylen + 1, '\n') -
444
(p + keylen + 1) + 1)) >= opt->len) {
445
error = EINVAL;
446
break;
447
}
448
opt->seen = 1;
449
}
450
p += keylen;
451
}
452
if (error != 0)
453
break;
454
}
455
456
if (locked)
457
mtx_unlock(&pr->pr_mtx);
458
459
return (error);
460
}
461
462
static int
463
jm_osd_method_check(void *obj __unused, void *data, const struct meta *meta)
464
{
465
struct vfsoptlist *opts = data;
466
struct vfsopt *opt;
467
468
TAILQ_FOREACH(opt, opts, link) {
469
if (strstr(opt->name, meta->name) != opt->name)
470
continue;
471
if (opt->name[strlen(meta->name)] != '.' &&
472
opt->name[strlen(meta->name)] != '\0')
473
continue;
474
opt->seen = 1;
475
}
476
477
return (0);
478
}
479
480
static void
481
jm_osd_destructor(void *osd)
482
{
483
free(osd, M_PRISON);
484
}
485
486
487
/* OSD for "meta" param */
488
489
static struct meta meta;
490
491
static inline int
492
jm_osd_method_set_meta(void *obj, void *data)
493
{
494
return (jm_osd_method_set(obj, data, &meta));
495
}
496
497
static inline int
498
jm_osd_method_get_meta(void *obj, void *data)
499
{
500
return (jm_osd_method_get(obj, data, &meta));
501
}
502
503
static inline int
504
jm_osd_method_check_meta(void *obj, void *data)
505
{
506
return (jm_osd_method_check(obj, data, &meta));
507
}
508
509
static struct meta meta = {
510
.name = JAIL_META_PRIVATE,
511
.osd_slot = 0,
512
.methods = {
513
[PR_METHOD_SET] = jm_osd_method_set_meta,
514
[PR_METHOD_GET] = jm_osd_method_get_meta,
515
[PR_METHOD_CHECK] = jm_osd_method_check_meta,
516
}
517
};
518
519
520
/* OSD for "env" param */
521
522
static struct meta env;
523
524
static inline int
525
jm_osd_method_set_env(void *obj, void *data)
526
{
527
return (jm_osd_method_set(obj, data, &env));
528
}
529
530
static inline int
531
jm_osd_method_get_env(void *obj, void *data)
532
{
533
return (jm_osd_method_get(obj, data, &env));
534
}
535
536
static inline int
537
jm_osd_method_check_env(void *obj, void *data)
538
{
539
return (jm_osd_method_check(obj, data, &env));
540
}
541
542
static struct meta env = {
543
.name = JAIL_META_SHARED,
544
.osd_slot = 0,
545
.methods = {
546
[PR_METHOD_SET] = jm_osd_method_set_env,
547
[PR_METHOD_GET] = jm_osd_method_get_env,
548
[PR_METHOD_CHECK] = jm_osd_method_check_env,
549
}
550
};
551
552
553
/* A jail can read its "env". */
554
555
static int
556
jm_sysctl_env(SYSCTL_HANDLER_ARGS)
557
{
558
struct prison *pr;
559
char empty = '\0';
560
char *tmpbuf;
561
size_t outlen;
562
int error = 0;
563
564
pr = req->td->td_ucred->cr_prison;
565
566
mtx_lock(&pr->pr_mtx);
567
arg1 = osd_jail_get(pr, env.osd_slot);
568
if (arg1 == NULL) {
569
tmpbuf = &empty;
570
outlen = 1;
571
} else {
572
outlen = strlen(arg1) + 1;
573
if (req->oldptr != NULL) {
574
tmpbuf = malloc(outlen, M_PRISON, M_NOWAIT);
575
error = (tmpbuf == NULL) ? ENOMEM : 0;
576
if (error == 0)
577
memcpy(tmpbuf, arg1, outlen);
578
}
579
}
580
mtx_unlock(&pr->pr_mtx);
581
582
if (error != 0)
583
return (error);
584
585
if (req->oldptr == NULL)
586
SYSCTL_OUT(req, NULL, outlen);
587
else {
588
SYSCTL_OUT(req, tmpbuf, outlen);
589
if (tmpbuf != &empty)
590
free(tmpbuf, M_PRISON);
591
}
592
593
return (error);
594
}
595
SYSCTL_PROC(_security_jail, OID_AUTO, env,
596
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
597
0, 0, jm_sysctl_env, "A", "Meta information provided by parent jail");
598
599
600
/* Setup and tear down. */
601
602
static int
603
jm_sysinit(void *arg __unused)
604
{
605
meta.osd_slot = osd_jail_register(jm_osd_destructor, meta.methods);
606
env.osd_slot = osd_jail_register(jm_osd_destructor, env.methods);
607
608
return (0);
609
}
610
611
static int
612
jm_sysuninit(void *arg __unused)
613
{
614
osd_jail_deregister(meta.osd_slot);
615
osd_jail_deregister(env.osd_slot);
616
617
return (0);
618
}
619
620
SYSINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysinit, NULL);
621
SYSUNINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysuninit, NULL);
622
623