Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/kern/kern_descrip.c
39475 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1982, 1986, 1989, 1991, 1993
5
* The Regents of the University of California. All rights reserved.
6
* (c) UNIX System Laboratories, Inc.
7
* All or some portions of this file are derived from material licensed
8
* to the University of California by American Telephone and Telegraph
9
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
10
* the permission of UNIX System Laboratories, Inc.
11
*
12
* Redistribution and use in source and binary forms, with or without
13
* modification, are permitted provided that the following conditions
14
* are met:
15
* 1. Redistributions of source code must retain the above copyright
16
* notice, this list of conditions and the following disclaimer.
17
* 2. Redistributions in binary form must reproduce the above copyright
18
* notice, this list of conditions and the following disclaimer in the
19
* documentation and/or other materials provided with the distribution.
20
* 3. Neither the name of the University nor the names of its contributors
21
* may be used to endorse or promote products derived from this software
22
* without specific prior written permission.
23
*
24
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34
* SUCH DAMAGE.
35
*/
36
37
#include "opt_capsicum.h"
38
#include "opt_ddb.h"
39
#include "opt_ktrace.h"
40
41
#define EXTERR_CATEGORY EXTERR_CAT_FILEDESC
42
#include <sys/systm.h>
43
#include <sys/capsicum.h>
44
#include <sys/conf.h>
45
#include <sys/exterrvar.h>
46
#include <sys/fcntl.h>
47
#include <sys/file.h>
48
#include <sys/filedesc.h>
49
#include <sys/filio.h>
50
#include <sys/jail.h>
51
#include <sys/kernel.h>
52
#include <sys/limits.h>
53
#include <sys/lock.h>
54
#include <sys/malloc.h>
55
#include <sys/mount.h>
56
#include <sys/mutex.h>
57
#include <sys/namei.h>
58
#include <sys/selinfo.h>
59
#include <sys/poll.h>
60
#include <sys/priv.h>
61
#include <sys/proc.h>
62
#include <sys/protosw.h>
63
#include <sys/racct.h>
64
#include <sys/resourcevar.h>
65
#include <sys/sbuf.h>
66
#include <sys/signalvar.h>
67
#include <sys/kdb.h>
68
#include <sys/smr.h>
69
#include <sys/stat.h>
70
#include <sys/sx.h>
71
#include <sys/syscallsubr.h>
72
#include <sys/sysctl.h>
73
#include <sys/sysproto.h>
74
#include <sys/unistd.h>
75
#include <sys/user.h>
76
#include <sys/vnode.h>
77
#include <sys/ktrace.h>
78
79
#include <net/vnet.h>
80
81
#include <security/audit/audit.h>
82
83
#include <vm/uma.h>
84
#include <vm/vm.h>
85
86
#include <ddb/ddb.h>
87
88
static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
89
static MALLOC_DEFINE(M_PWD, "pwd", "Descriptor table vnodes");
90
static MALLOC_DEFINE(M_PWDDESC, "pwddesc", "Pwd descriptors");
91
static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
92
"file desc to leader structures");
93
static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
94
MALLOC_DEFINE(M_FILECAPS, "filecaps", "descriptor capabilities");
95
96
MALLOC_DECLARE(M_FADVISE);
97
98
static __read_mostly uma_zone_t file_zone;
99
static __read_mostly uma_zone_t filedesc0_zone;
100
__read_mostly uma_zone_t pwd_zone;
101
VFS_SMR_DECLARE;
102
103
static int closefp(struct filedesc *fdp, int fd, struct file *fp,
104
struct thread *td, bool holdleaders, bool audit);
105
static void export_file_to_kinfo(struct file *fp, int fd,
106
cap_rights_t *rightsp, struct kinfo_file *kif,
107
struct filedesc *fdp, int flags);
108
static int fd_first_free(struct filedesc *fdp, int low, int size);
109
static void fdgrowtable(struct filedesc *fdp, int nfd);
110
static void fdgrowtable_exp(struct filedesc *fdp, int nfd);
111
static void fdunused(struct filedesc *fdp, int fd);
112
static void fdused(struct filedesc *fdp, int fd);
113
static int fget_unlocked_seq(struct thread *td, int fd,
114
const cap_rights_t *needrightsp, uint8_t *flagsp,
115
struct file **fpp, seqc_t *seqp);
116
static int getmaxfd(struct thread *td);
117
static u_long *filecaps_copy_prep(const struct filecaps *src);
118
static void filecaps_copy_finish(const struct filecaps *src,
119
struct filecaps *dst, u_long *ioctls);
120
static u_long *filecaps_free_prep(struct filecaps *fcaps);
121
static void filecaps_free_finish(u_long *ioctls);
122
123
static struct pwd *pwd_alloc(void);
124
125
/*
126
* Each process has:
127
*
128
* - An array of open file descriptors (fd_ofiles)
129
* - An array of file flags (fd_ofileflags)
130
* - A bitmap recording which descriptors are in use (fd_map)
131
*
132
* A process starts out with NDFILE descriptors. The value of NDFILE has
133
* been selected based the historical limit of 20 open files, and an
134
* assumption that the majority of processes, especially short-lived
135
* processes like shells, will never need more.
136
*
137
* If this initial allocation is exhausted, a larger descriptor table and
138
* map are allocated dynamically, and the pointers in the process's struct
139
* filedesc are updated to point to those. This is repeated every time
140
* the process runs out of file descriptors (provided it hasn't hit its
141
* resource limit).
142
*
143
* Since threads may hold references to individual descriptor table
144
* entries, the tables are never freed. Instead, they are placed on a
145
* linked list and freed only when the struct filedesc is released.
146
*/
147
#define NDFILE 20
148
#define NDSLOTSIZE sizeof(NDSLOTTYPE)
149
#define NDENTRIES (NDSLOTSIZE * __CHAR_BIT)
150
#define NDSLOT(x) ((x) / NDENTRIES)
151
#define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES))
152
#define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES)
153
154
#define FILEDESC_FOREACH_FDE(fdp, _iterator, _fde) \
155
struct filedesc *_fdp = (fdp); \
156
int _lastfile = fdlastfile_single(_fdp); \
157
for (_iterator = 0; _iterator <= _lastfile; _iterator++) \
158
if ((_fde = &_fdp->fd_ofiles[_iterator])->fde_file != NULL)
159
160
#define FILEDESC_FOREACH_FP(fdp, _iterator, _fp) \
161
struct filedesc *_fdp = (fdp); \
162
int _lastfile = fdlastfile_single(_fdp); \
163
for (_iterator = 0; _iterator <= _lastfile; _iterator++) \
164
if ((_fp = _fdp->fd_ofiles[_iterator].fde_file) != NULL)
165
166
/*
167
* SLIST entry used to keep track of ofiles which must be reclaimed when
168
* the process exits.
169
*/
170
struct freetable {
171
struct fdescenttbl *ft_table;
172
SLIST_ENTRY(freetable) ft_next;
173
};
174
175
/*
176
* Initial allocation: a filedesc structure + the head of SLIST used to
177
* keep track of old ofiles + enough space for NDFILE descriptors.
178
*/
179
180
struct fdescenttbl0 {
181
int fdt_nfiles;
182
struct filedescent fdt_ofiles[NDFILE];
183
};
184
185
struct filedesc0 {
186
struct filedesc fd_fd;
187
SLIST_HEAD(, freetable) fd_free;
188
struct fdescenttbl0 fd_dfiles;
189
NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
190
};
191
192
/*
193
* Descriptor management.
194
*/
195
static int __exclusive_cache_line openfiles; /* actual number of open files */
196
struct mtx sigio_lock; /* mtx to protect pointers to sigio */
197
void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp);
198
199
/*
200
* If low >= size, just return low. Otherwise find the first zero bit in the
201
* given bitmap, starting at low and not exceeding size - 1. Return size if
202
* not found.
203
*/
204
static int
205
fd_first_free(struct filedesc *fdp, int low, int size)
206
{
207
NDSLOTTYPE *map = fdp->fd_map;
208
NDSLOTTYPE mask;
209
int off, maxoff;
210
211
if (low >= size)
212
return (low);
213
214
off = NDSLOT(low);
215
if (low % NDENTRIES) {
216
mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
217
if ((mask &= ~map[off]) != 0UL)
218
return (off * NDENTRIES + ffsl(mask) - 1);
219
++off;
220
}
221
for (maxoff = NDSLOTS(size); off < maxoff; ++off)
222
if (map[off] != ~0UL)
223
return (off * NDENTRIES + ffsl(~map[off]) - 1);
224
return (size);
225
}
226
227
/*
228
* Find the last used fd.
229
*
230
* Call this variant if fdp can't be modified by anyone else (e.g, during exec).
231
* Otherwise use fdlastfile.
232
*/
233
int
234
fdlastfile_single(struct filedesc *fdp)
235
{
236
NDSLOTTYPE *map = fdp->fd_map;
237
int off, minoff;
238
239
off = NDSLOT(fdp->fd_nfiles - 1);
240
for (minoff = NDSLOT(0); off >= minoff; --off)
241
if (map[off] != 0)
242
return (off * NDENTRIES + flsl(map[off]) - 1);
243
return (-1);
244
}
245
246
int
247
fdlastfile(struct filedesc *fdp)
248
{
249
250
FILEDESC_LOCK_ASSERT(fdp);
251
return (fdlastfile_single(fdp));
252
}
253
254
static int
255
fdisused(struct filedesc *fdp, int fd)
256
{
257
258
KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
259
("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
260
261
return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
262
}
263
264
/*
265
* Mark a file descriptor as used.
266
*/
267
static void
268
fdused_init(struct filedesc *fdp, int fd)
269
{
270
271
KASSERT(!fdisused(fdp, fd), ("fd=%d is already used", fd));
272
273
fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
274
}
275
276
static void
277
fdused(struct filedesc *fdp, int fd)
278
{
279
280
FILEDESC_XLOCK_ASSERT(fdp);
281
282
fdused_init(fdp, fd);
283
if (fd == fdp->fd_freefile)
284
fdp->fd_freefile++;
285
}
286
287
/*
288
* Mark a file descriptor as unused.
289
*/
290
static void
291
fdunused(struct filedesc *fdp, int fd)
292
{
293
294
FILEDESC_XLOCK_ASSERT(fdp);
295
296
KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd));
297
KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
298
("fd=%d is still in use", fd));
299
300
fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
301
if (fd < fdp->fd_freefile)
302
fdp->fd_freefile = fd;
303
}
304
305
/*
306
* Free a file descriptor.
307
*
308
* Avoid some work if fdp is about to be destroyed.
309
*/
310
static inline void
311
fdefree_last(struct filedescent *fde)
312
{
313
314
filecaps_free(&fde->fde_caps);
315
}
316
317
static inline void
318
fdfree(struct filedesc *fdp, int fd)
319
{
320
struct filedescent *fde;
321
322
FILEDESC_XLOCK_ASSERT(fdp);
323
fde = &fdp->fd_ofiles[fd];
324
#ifdef CAPABILITIES
325
seqc_write_begin(&fde->fde_seqc);
326
#endif
327
fde->fde_file = NULL;
328
#ifdef CAPABILITIES
329
seqc_write_end(&fde->fde_seqc);
330
#endif
331
fdefree_last(fde);
332
fdunused(fdp, fd);
333
}
334
335
/*
336
* System calls on descriptors.
337
*/
338
#ifndef _SYS_SYSPROTO_H_
339
struct getdtablesize_args {
340
int dummy;
341
};
342
#endif
343
/* ARGSUSED */
344
int
345
sys_getdtablesize(struct thread *td, struct getdtablesize_args *uap)
346
{
347
#ifdef RACCT
348
uint64_t lim;
349
#endif
350
351
td->td_retval[0] = getmaxfd(td);
352
#ifdef RACCT
353
PROC_LOCK(td->td_proc);
354
lim = racct_get_limit(td->td_proc, RACCT_NOFILE);
355
PROC_UNLOCK(td->td_proc);
356
if (lim < td->td_retval[0])
357
td->td_retval[0] = lim;
358
#endif
359
return (0);
360
}
361
362
/*
363
* Duplicate a file descriptor to a particular value.
364
*
365
* Note: keep in mind that a potential race condition exists when closing
366
* descriptors from a shared descriptor table (via rfork).
367
*/
368
#ifndef _SYS_SYSPROTO_H_
369
struct dup2_args {
370
u_int from;
371
u_int to;
372
};
373
#endif
374
/* ARGSUSED */
375
int
376
sys_dup2(struct thread *td, struct dup2_args *uap)
377
{
378
379
return (kern_dup(td, FDDUP_FIXED, 0, (int)uap->from, (int)uap->to));
380
}
381
382
/*
383
* Duplicate a file descriptor.
384
*/
385
#ifndef _SYS_SYSPROTO_H_
386
struct dup_args {
387
u_int fd;
388
};
389
#endif
390
/* ARGSUSED */
391
int
392
sys_dup(struct thread *td, struct dup_args *uap)
393
{
394
395
return (kern_dup(td, FDDUP_NORMAL, 0, (int)uap->fd, 0));
396
}
397
398
/*
399
* The file control system call.
400
*/
401
#ifndef _SYS_SYSPROTO_H_
402
struct fcntl_args {
403
int fd;
404
int cmd;
405
long arg;
406
};
407
#endif
408
/* ARGSUSED */
409
int
410
sys_fcntl(struct thread *td, struct fcntl_args *uap)
411
{
412
413
return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, uap->arg));
414
}
415
416
int
417
kern_fcntl_freebsd(struct thread *td, int fd, int cmd, intptr_t arg)
418
{
419
struct flock fl;
420
struct __oflock ofl;
421
intptr_t arg1;
422
int error, newcmd;
423
424
error = 0;
425
newcmd = cmd;
426
switch (cmd) {
427
case F_OGETLK:
428
case F_OSETLK:
429
case F_OSETLKW:
430
/*
431
* Convert old flock structure to new.
432
*/
433
error = copyin((void *)arg, &ofl, sizeof(ofl));
434
fl.l_start = ofl.l_start;
435
fl.l_len = ofl.l_len;
436
fl.l_pid = ofl.l_pid;
437
fl.l_type = ofl.l_type;
438
fl.l_whence = ofl.l_whence;
439
fl.l_sysid = 0;
440
441
switch (cmd) {
442
case F_OGETLK:
443
newcmd = F_GETLK;
444
break;
445
case F_OSETLK:
446
newcmd = F_SETLK;
447
break;
448
case F_OSETLKW:
449
newcmd = F_SETLKW;
450
break;
451
}
452
arg1 = (intptr_t)&fl;
453
break;
454
case F_GETLK:
455
case F_SETLK:
456
case F_SETLKW:
457
case F_SETLK_REMOTE:
458
error = copyin((void *)arg, &fl, sizeof(fl));
459
arg1 = (intptr_t)&fl;
460
break;
461
default:
462
arg1 = arg;
463
break;
464
}
465
if (error)
466
return (error);
467
error = kern_fcntl(td, fd, newcmd, arg1);
468
if (error)
469
return (error);
470
if (cmd == F_OGETLK) {
471
ofl.l_start = fl.l_start;
472
ofl.l_len = fl.l_len;
473
ofl.l_pid = fl.l_pid;
474
ofl.l_type = fl.l_type;
475
ofl.l_whence = fl.l_whence;
476
error = copyout(&ofl, (void *)arg, sizeof(ofl));
477
} else if (cmd == F_GETLK) {
478
error = copyout(&fl, (void *)arg, sizeof(fl));
479
}
480
return (error);
481
}
482
483
struct flags_trans_elem {
484
u_int f;
485
u_int t;
486
};
487
488
static u_int
489
flags_trans(const struct flags_trans_elem *ftes, int nitems, u_int from_flags)
490
{
491
u_int res;
492
int i;
493
494
res = 0;
495
for (i = 0; i < nitems; i++) {
496
if ((from_flags & ftes[i].f) != 0)
497
res |= ftes[i].t;
498
}
499
return (res);
500
}
501
502
static uint8_t
503
fd_to_fde_flags(int fd_flags)
504
{
505
static const struct flags_trans_elem fd_to_fde_flags_s[] = {
506
{ .f = FD_CLOEXEC, .t = UF_EXCLOSE },
507
{ .f = FD_CLOFORK, .t = UF_FOCLOSE },
508
{ .f = FD_RESOLVE_BENEATH, .t = UF_RESOLVE_BENEATH },
509
};
510
511
return (flags_trans(fd_to_fde_flags_s, nitems(fd_to_fde_flags_s),
512
fd_flags));
513
}
514
515
static int
516
fde_to_fd_flags(uint8_t fde_flags)
517
{
518
static const struct flags_trans_elem fde_to_fd_flags_s[] = {
519
{ .f = UF_EXCLOSE, .t = FD_CLOEXEC },
520
{ .f = UF_FOCLOSE, .t = FD_CLOFORK },
521
{ .f = UF_RESOLVE_BENEATH, .t = FD_RESOLVE_BENEATH },
522
};
523
524
return (flags_trans(fde_to_fd_flags_s, nitems(fde_to_fd_flags_s),
525
fde_flags));
526
}
527
528
static uint8_t
529
fddup_to_fde_flags(int fddup_flags)
530
{
531
static const struct flags_trans_elem fddup_to_fde_flags_s[] = {
532
{ .f = FDDUP_FLAG_CLOEXEC, .t = UF_EXCLOSE },
533
{ .f = FDDUP_FLAG_CLOFORK, .t = UF_FOCLOSE },
534
};
535
536
return (flags_trans(fddup_to_fde_flags_s, nitems(fddup_to_fde_flags_s),
537
fddup_flags));
538
}
539
540
static uint8_t
541
close_range_to_fde_flags(int close_range_flags)
542
{
543
static const struct flags_trans_elem close_range_to_fde_flags_s[] = {
544
{ .f = CLOSE_RANGE_CLOEXEC, .t = UF_EXCLOSE },
545
{ .f = CLOSE_RANGE_CLOFORK, .t = UF_FOCLOSE },
546
};
547
548
return (flags_trans(close_range_to_fde_flags_s,
549
nitems(close_range_to_fde_flags_s), close_range_flags));
550
}
551
552
static uint8_t
553
open_to_fde_flags(int open_flags, bool sticky_orb)
554
{
555
static const struct flags_trans_elem open_to_fde_flags_s[] = {
556
{ .f = O_CLOEXEC, .t = UF_EXCLOSE },
557
{ .f = O_CLOFORK, .t = UF_FOCLOSE },
558
{ .f = O_RESOLVE_BENEATH, .t = UF_RESOLVE_BENEATH },
559
};
560
#if defined(__clang__) && __clang_major__ >= 19
561
_Static_assert(open_to_fde_flags_s[nitems(open_to_fde_flags_s) - 1].f ==
562
O_RESOLVE_BENEATH, "O_RESOLVE_BENEATH must be last, for sticky_orb");
563
#endif
564
565
return (flags_trans(open_to_fde_flags_s, nitems(open_to_fde_flags_s) -
566
(sticky_orb ? 0 : 1), open_flags));
567
}
568
569
int
570
kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
571
{
572
struct filedesc *fdp;
573
struct flock *flp;
574
struct file *fp, *fp2;
575
struct filedescent *fde;
576
struct proc *p;
577
struct vnode *vp;
578
struct mount *mp;
579
struct kinfo_file *kif;
580
int error, flg, kif_sz, seals, tmp, got_set, got_cleared;
581
uint64_t bsize;
582
off_t foffset;
583
int flags;
584
585
error = 0;
586
flg = F_POSIX;
587
p = td->td_proc;
588
fdp = p->p_fd;
589
590
AUDIT_ARG_FD(cmd);
591
AUDIT_ARG_CMD(cmd);
592
switch (cmd) {
593
case F_DUPFD:
594
tmp = arg;
595
error = kern_dup(td, FDDUP_FCNTL, 0, fd, tmp);
596
break;
597
598
case F_DUPFD_CLOEXEC:
599
tmp = arg;
600
error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOEXEC, fd, tmp);
601
break;
602
603
case F_DUPFD_CLOFORK:
604
tmp = arg;
605
error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOFORK, fd, tmp);
606
break;
607
608
case F_DUP2FD:
609
tmp = arg;
610
error = kern_dup(td, FDDUP_FIXED, 0, fd, tmp);
611
break;
612
613
case F_DUP2FD_CLOEXEC:
614
tmp = arg;
615
error = kern_dup(td, FDDUP_FIXED, FDDUP_FLAG_CLOEXEC, fd, tmp);
616
break;
617
618
case F_GETFD:
619
error = EBADF;
620
FILEDESC_SLOCK(fdp);
621
fde = fdeget_noref(fdp, fd);
622
if (fde != NULL) {
623
td->td_retval[0] = fde_to_fd_flags(fde->fde_flags);
624
error = 0;
625
}
626
FILEDESC_SUNLOCK(fdp);
627
break;
628
629
case F_SETFD:
630
error = EBADF;
631
FILEDESC_XLOCK(fdp);
632
fde = fdeget_noref(fdp, fd);
633
if (fde != NULL) {
634
/*
635
* UF_RESOLVE_BENEATH is sticky and cannot be cleared.
636
*/
637
fde->fde_flags = (fde->fde_flags &
638
~(UF_EXCLOSE | UF_FOCLOSE)) | fd_to_fde_flags(arg);
639
error = 0;
640
}
641
FILEDESC_XUNLOCK(fdp);
642
break;
643
644
case F_GETFL:
645
error = fget_fcntl(td, fd, &cap_fcntl_rights, F_GETFL, &fp);
646
if (error != 0)
647
break;
648
td->td_retval[0] = OFLAGS(fp->f_flag);
649
fdrop(fp, td);
650
break;
651
652
case F_SETFL:
653
error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETFL, &fp);
654
if (error != 0)
655
break;
656
if (fp->f_ops == &path_fileops) {
657
fdrop(fp, td);
658
error = EBADF;
659
break;
660
}
661
fsetfl_lock(fp);
662
do {
663
tmp = flg = fp->f_flag;
664
tmp &= ~FCNTLFLAGS;
665
tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
666
} while (atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
667
got_set = tmp & ~flg;
668
got_cleared = flg & ~tmp;
669
if (((got_set | got_cleared) & FNONBLOCK) != 0) {
670
tmp = fp->f_flag & FNONBLOCK;
671
error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
672
if (error != 0)
673
goto revert_flags;
674
}
675
if (((got_set | got_cleared) & FASYNC) != 0) {
676
tmp = fp->f_flag & FASYNC;
677
error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
678
if (error != 0)
679
goto revert_nonblock;
680
}
681
fsetfl_unlock(fp);
682
fdrop(fp, td);
683
break;
684
revert_nonblock:
685
if (((got_set | got_cleared) & FNONBLOCK) != 0) {
686
tmp = ~fp->f_flag & FNONBLOCK;
687
(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
688
}
689
revert_flags:
690
do {
691
tmp = flg = fp->f_flag;
692
tmp &= ~FCNTLFLAGS;
693
tmp |= got_cleared;
694
tmp &= ~got_set;
695
} while (atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
696
fsetfl_unlock(fp);
697
fdrop(fp, td);
698
break;
699
700
case F_GETOWN:
701
error = fget_fcntl(td, fd, &cap_fcntl_rights, F_GETOWN, &fp);
702
if (error != 0)
703
break;
704
error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
705
if (error == 0)
706
td->td_retval[0] = tmp;
707
fdrop(fp, td);
708
break;
709
710
case F_SETOWN:
711
error = fget_fcntl(td, fd, &cap_fcntl_rights, F_SETOWN, &fp);
712
if (error != 0)
713
break;
714
tmp = arg;
715
error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
716
fdrop(fp, td);
717
break;
718
719
case F_SETLK_REMOTE:
720
error = priv_check(td, PRIV_NFS_LOCKD);
721
if (error != 0)
722
return (error);
723
flg = F_REMOTE;
724
goto do_setlk;
725
726
case F_SETLKW:
727
flg |= F_WAIT;
728
/* FALLTHROUGH F_SETLK */
729
730
case F_SETLK:
731
do_setlk:
732
flp = (struct flock *)arg;
733
if ((flg & F_REMOTE) != 0 && flp->l_sysid == 0) {
734
error = EINVAL;
735
break;
736
}
737
738
error = fget_unlocked(td, fd, &cap_flock_rights, &fp);
739
if (error != 0)
740
break;
741
if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
742
error = EBADF;
743
fdrop(fp, td);
744
break;
745
}
746
747
if (flp->l_whence == SEEK_CUR) {
748
foffset = foffset_get(fp);
749
if (foffset < 0 ||
750
(flp->l_start > 0 &&
751
foffset > OFF_MAX - flp->l_start)) {
752
error = EOVERFLOW;
753
fdrop(fp, td);
754
break;
755
}
756
flp->l_start += foffset;
757
}
758
759
vp = fp->f_vnode;
760
switch (flp->l_type) {
761
case F_RDLCK:
762
if ((fp->f_flag & FREAD) == 0) {
763
error = EBADF;
764
break;
765
}
766
if ((p->p_leader->p_flag & P_ADVLOCK) == 0) {
767
PROC_LOCK(p->p_leader);
768
p->p_leader->p_flag |= P_ADVLOCK;
769
PROC_UNLOCK(p->p_leader);
770
}
771
error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
772
flp, flg);
773
break;
774
case F_WRLCK:
775
if ((fp->f_flag & FWRITE) == 0) {
776
error = EBADF;
777
break;
778
}
779
if ((p->p_leader->p_flag & P_ADVLOCK) == 0) {
780
PROC_LOCK(p->p_leader);
781
p->p_leader->p_flag |= P_ADVLOCK;
782
PROC_UNLOCK(p->p_leader);
783
}
784
error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
785
flp, flg);
786
break;
787
case F_UNLCK:
788
error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
789
flp, flg);
790
break;
791
case F_UNLCKSYS:
792
if (flg != F_REMOTE) {
793
error = EINVAL;
794
break;
795
}
796
error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
797
F_UNLCKSYS, flp, flg);
798
break;
799
default:
800
error = EINVAL;
801
break;
802
}
803
if (error != 0 || flp->l_type == F_UNLCK ||
804
flp->l_type == F_UNLCKSYS) {
805
fdrop(fp, td);
806
break;
807
}
808
809
/*
810
* Check for a race with close.
811
*
812
* The vnode is now advisory locked (or unlocked, but this case
813
* is not really important) as the caller requested.
814
* We had to drop the filedesc lock, so we need to recheck if
815
* the descriptor is still valid, because if it was closed
816
* in the meantime we need to remove advisory lock from the
817
* vnode - close on any descriptor leading to an advisory
818
* locked vnode, removes that lock.
819
* We will return 0 on purpose in that case, as the result of
820
* successful advisory lock might have been externally visible
821
* already. This is fine - effectively we pretend to the caller
822
* that the closing thread was a bit slower and that the
823
* advisory lock succeeded before the close.
824
*/
825
error = fget_unlocked(td, fd, &cap_no_rights, &fp2);
826
if (error != 0) {
827
fdrop(fp, td);
828
break;
829
}
830
if (fp != fp2) {
831
flp->l_whence = SEEK_SET;
832
flp->l_start = 0;
833
flp->l_len = 0;
834
flp->l_type = F_UNLCK;
835
(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
836
F_UNLCK, flp, F_POSIX);
837
}
838
fdrop(fp, td);
839
fdrop(fp2, td);
840
break;
841
842
case F_GETLK:
843
error = fget_unlocked(td, fd, &cap_flock_rights, &fp);
844
if (error != 0)
845
break;
846
if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
847
error = EBADF;
848
fdrop(fp, td);
849
break;
850
}
851
flp = (struct flock *)arg;
852
if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
853
flp->l_type != F_UNLCK) {
854
error = EINVAL;
855
fdrop(fp, td);
856
break;
857
}
858
if (flp->l_whence == SEEK_CUR) {
859
foffset = foffset_get(fp);
860
if ((flp->l_start > 0 &&
861
foffset > OFF_MAX - flp->l_start) ||
862
(flp->l_start < 0 &&
863
foffset < OFF_MIN - flp->l_start)) {
864
error = EOVERFLOW;
865
fdrop(fp, td);
866
break;
867
}
868
flp->l_start += foffset;
869
}
870
vp = fp->f_vnode;
871
error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
872
F_POSIX);
873
fdrop(fp, td);
874
break;
875
876
case F_ADD_SEALS:
877
error = fget_unlocked(td, fd, &cap_no_rights, &fp);
878
if (error != 0)
879
break;
880
error = fo_add_seals(fp, arg);
881
fdrop(fp, td);
882
break;
883
884
case F_GET_SEALS:
885
error = fget_unlocked(td, fd, &cap_no_rights, &fp);
886
if (error != 0)
887
break;
888
if (fo_get_seals(fp, &seals) == 0)
889
td->td_retval[0] = seals;
890
else
891
error = EINVAL;
892
fdrop(fp, td);
893
break;
894
895
case F_RDAHEAD:
896
arg = arg ? 128 * 1024: 0;
897
/* FALLTHROUGH */
898
case F_READAHEAD:
899
error = fget_unlocked(td, fd, &cap_no_rights, &fp);
900
if (error != 0)
901
break;
902
if (fp->f_type != DTYPE_VNODE || fp->f_ops == &path_fileops) {
903
fdrop(fp, td);
904
error = EBADF;
905
break;
906
}
907
vp = fp->f_vnode;
908
if (vp->v_type != VREG) {
909
fdrop(fp, td);
910
error = ENOTTY;
911
break;
912
}
913
914
/*
915
* Exclusive lock synchronizes against f_seqcount reads and
916
* writes in sequential_heuristic().
917
*/
918
error = vn_lock(vp, LK_EXCLUSIVE);
919
if (error != 0) {
920
fdrop(fp, td);
921
break;
922
}
923
if (arg >= 0) {
924
bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
925
arg = MIN(arg, INT_MAX - bsize + 1);
926
fp->f_seqcount[UIO_READ] = MIN(IO_SEQMAX,
927
(arg + bsize - 1) / bsize);
928
atomic_set_int(&fp->f_flag, FRDAHEAD);
929
} else {
930
atomic_clear_int(&fp->f_flag, FRDAHEAD);
931
}
932
VOP_UNLOCK(vp);
933
fdrop(fp, td);
934
break;
935
936
case F_ISUNIONSTACK:
937
/*
938
* Check if the vnode is part of a union stack (either the
939
* "union" flag from mount(2) or unionfs).
940
*
941
* Prior to introduction of this op libc's readdir would call
942
* fstatfs(2), in effect unnecessarily copying kilobytes of
943
* data just to check fs name and a mount flag.
944
*
945
* Fixing the code to handle everything in the kernel instead
946
* is a non-trivial endeavor and has low priority, thus this
947
* horrible kludge facilitates the current behavior in a much
948
* cheaper manner until someone(tm) sorts this out.
949
*/
950
error = fget_unlocked(td, fd, &cap_no_rights, &fp);
951
if (error != 0)
952
break;
953
if (fp->f_type != DTYPE_VNODE) {
954
fdrop(fp, td);
955
error = EBADF;
956
break;
957
}
958
vp = fp->f_vnode;
959
/*
960
* Since we don't prevent dooming the vnode even non-null mp
961
* found can become immediately stale. This is tolerable since
962
* mount points are type-stable (providing safe memory access)
963
* and any vfs op on this vnode going forward will return an
964
* error (meaning return value in this case is meaningless).
965
*/
966
mp = atomic_load_ptr(&vp->v_mount);
967
if (__predict_false(mp == NULL)) {
968
fdrop(fp, td);
969
error = EBADF;
970
break;
971
}
972
td->td_retval[0] = 0;
973
if (mp->mnt_kern_flag & MNTK_UNIONFS ||
974
mp->mnt_flag & MNT_UNION)
975
td->td_retval[0] = 1;
976
fdrop(fp, td);
977
break;
978
979
case F_KINFO:
980
#ifdef CAPABILITY_MODE
981
if (CAP_TRACING(td))
982
ktrcapfail(CAPFAIL_SYSCALL, &cmd);
983
if (IN_CAPABILITY_MODE(td)) {
984
error = ECAPMODE;
985
break;
986
}
987
#endif
988
error = copyin((void *)arg, &kif_sz, sizeof(kif_sz));
989
if (error != 0)
990
break;
991
if (kif_sz != sizeof(*kif)) {
992
error = EINVAL;
993
break;
994
}
995
kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK | M_ZERO);
996
FILEDESC_SLOCK(fdp);
997
error = fget_cap_noref(fdp, fd, &cap_fcntl_rights, &fp, NULL);
998
if (error == 0 && fhold(fp)) {
999
export_file_to_kinfo(fp, fd, NULL, kif, fdp, 0);
1000
FILEDESC_SUNLOCK(fdp);
1001
fdrop(fp, td);
1002
if ((kif->kf_status & KF_ATTR_VALID) != 0) {
1003
kif->kf_structsize = sizeof(*kif);
1004
error = copyout(kif, (void *)arg, sizeof(*kif));
1005
} else {
1006
error = EBADF;
1007
}
1008
} else {
1009
FILEDESC_SUNLOCK(fdp);
1010
if (error == 0)
1011
error = EBADF;
1012
}
1013
free(kif, M_TEMP);
1014
break;
1015
1016
default:
1017
if ((cmd & ((1u << F_DUP3FD_SHIFT) - 1)) != F_DUP3FD)
1018
return (EXTERROR(EINVAL, "invalid fcntl cmd"));
1019
/* Handle F_DUP3FD */
1020
flags = (cmd >> F_DUP3FD_SHIFT);
1021
if ((flags & ~(FD_CLOEXEC | FD_CLOFORK)) != 0)
1022
return (EXTERROR(EINVAL, "invalid flags for F_DUP3FD"));
1023
tmp = arg;
1024
error = kern_dup(td, FDDUP_FIXED,
1025
((flags & FD_CLOEXEC) != 0 ? FDDUP_FLAG_CLOEXEC : 0) |
1026
((flags & FD_CLOFORK) != 0 ? FDDUP_FLAG_CLOFORK : 0),
1027
fd, tmp);
1028
break;
1029
}
1030
return (error);
1031
}
1032
1033
static int
1034
getmaxfd(struct thread *td)
1035
{
1036
1037
return (min((int)lim_cur(td, RLIMIT_NOFILE), maxfilesperproc));
1038
}
1039
1040
/*
1041
* Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
1042
*/
1043
int
1044
kern_dup(struct thread *td, u_int mode, int flags, int old, int new)
1045
{
1046
struct filedesc *fdp;
1047
struct filedescent *oldfde, *newfde;
1048
struct proc *p;
1049
struct file *delfp, *oldfp;
1050
u_long *oioctls, *nioctls;
1051
int error, maxfd;
1052
1053
p = td->td_proc;
1054
fdp = p->p_fd;
1055
oioctls = NULL;
1056
1057
MPASS((flags & ~(FDDUP_FLAG_CLOEXEC | FDDUP_FLAG_CLOFORK)) == 0);
1058
MPASS(mode < FDDUP_LASTMODE);
1059
1060
AUDIT_ARG_FD(old);
1061
/* XXXRW: if (flags & FDDUP_FIXED) AUDIT_ARG_FD2(new); */
1062
1063
/*
1064
* Verify we have a valid descriptor to dup from and possibly to
1065
* dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
1066
* return EINVAL when the new descriptor is out of bounds.
1067
*/
1068
if (old < 0)
1069
return (EBADF);
1070
if (new < 0)
1071
return (mode == FDDUP_FCNTL ? EINVAL : EBADF);
1072
maxfd = getmaxfd(td);
1073
if (new >= maxfd)
1074
return (mode == FDDUP_FCNTL ? EINVAL : EBADF);
1075
1076
error = EBADF;
1077
FILEDESC_XLOCK(fdp);
1078
if (fget_noref(fdp, old) == NULL)
1079
goto unlock;
1080
if (mode == FDDUP_FIXED && old == new) {
1081
td->td_retval[0] = new;
1082
fdp->fd_ofiles[new].fde_flags |= fddup_to_fde_flags(flags);
1083
error = 0;
1084
goto unlock;
1085
}
1086
1087
oldfde = &fdp->fd_ofiles[old];
1088
oldfp = oldfde->fde_file;
1089
if (!fhold(oldfp))
1090
goto unlock;
1091
1092
/*
1093
* If the caller specified a file descriptor, make sure the file
1094
* table is large enough to hold it, and grab it. Otherwise, just
1095
* allocate a new descriptor the usual way.
1096
*/
1097
switch (mode) {
1098
case FDDUP_NORMAL:
1099
case FDDUP_FCNTL:
1100
if ((error = fdalloc(td, new, &new)) != 0) {
1101
fdrop(oldfp, td);
1102
goto unlock;
1103
}
1104
break;
1105
case FDDUP_FIXED:
1106
if (new >= fdp->fd_nfiles) {
1107
/*
1108
* The resource limits are here instead of e.g.
1109
* fdalloc(), because the file descriptor table may be
1110
* shared between processes, so we can't really use
1111
* racct_add()/racct_sub(). Instead of counting the
1112
* number of actually allocated descriptors, just put
1113
* the limit on the size of the file descriptor table.
1114
*/
1115
#ifdef RACCT
1116
if (RACCT_ENABLED()) {
1117
error = racct_set_unlocked(p, RACCT_NOFILE, new + 1);
1118
if (error != 0) {
1119
error = EMFILE;
1120
fdrop(oldfp, td);
1121
goto unlock;
1122
}
1123
}
1124
#endif
1125
fdgrowtable_exp(fdp, new + 1);
1126
}
1127
if (!fdisused(fdp, new))
1128
fdused(fdp, new);
1129
break;
1130
default:
1131
KASSERT(0, ("%s unsupported mode %d", __func__, mode));
1132
}
1133
1134
KASSERT(old != new, ("new fd is same as old"));
1135
1136
/* Refetch oldfde because the table may have grown and old one freed. */
1137
oldfde = &fdp->fd_ofiles[old];
1138
KASSERT(oldfp == oldfde->fde_file,
1139
("fdt_ofiles shift from growth observed at fd %d",
1140
old));
1141
1142
newfde = &fdp->fd_ofiles[new];
1143
delfp = newfde->fde_file;
1144
1145
nioctls = filecaps_copy_prep(&oldfde->fde_caps);
1146
1147
/*
1148
* Duplicate the source descriptor.
1149
*/
1150
#ifdef CAPABILITIES
1151
seqc_write_begin(&newfde->fde_seqc);
1152
#endif
1153
oioctls = filecaps_free_prep(&newfde->fde_caps);
1154
fde_copy(oldfde, newfde);
1155
filecaps_copy_finish(&oldfde->fde_caps, &newfde->fde_caps,
1156
nioctls);
1157
newfde->fde_flags = (oldfde->fde_flags & ~(UF_EXCLOSE | UF_FOCLOSE)) |
1158
fddup_to_fde_flags(flags);
1159
#ifdef CAPABILITIES
1160
seqc_write_end(&newfde->fde_seqc);
1161
#endif
1162
td->td_retval[0] = new;
1163
1164
error = 0;
1165
1166
if (delfp != NULL) {
1167
(void) closefp(fdp, new, delfp, td, true, false);
1168
FILEDESC_UNLOCK_ASSERT(fdp);
1169
} else {
1170
unlock:
1171
FILEDESC_XUNLOCK(fdp);
1172
}
1173
1174
filecaps_free_finish(oioctls);
1175
return (error);
1176
}
1177
1178
static void
1179
sigiofree(struct sigio *sigio)
1180
{
1181
crfree(sigio->sio_ucred);
1182
free(sigio, M_SIGIO);
1183
}
1184
1185
static struct sigio *
1186
funsetown_locked(struct sigio *sigio)
1187
{
1188
struct proc *p;
1189
struct pgrp *pg;
1190
1191
SIGIO_ASSERT_LOCKED();
1192
1193
if (sigio == NULL)
1194
return (NULL);
1195
*sigio->sio_myref = NULL;
1196
if (sigio->sio_pgid < 0) {
1197
pg = sigio->sio_pgrp;
1198
PGRP_LOCK(pg);
1199
SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, sio_pgsigio);
1200
PGRP_UNLOCK(pg);
1201
} else {
1202
p = sigio->sio_proc;
1203
PROC_LOCK(p);
1204
SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, sio_pgsigio);
1205
PROC_UNLOCK(p);
1206
}
1207
return (sigio);
1208
}
1209
1210
/*
1211
* If sigio is on the list associated with a process or process group,
1212
* disable signalling from the device, remove sigio from the list and
1213
* free sigio.
1214
*/
1215
void
1216
funsetown(struct sigio **sigiop)
1217
{
1218
struct sigio *sigio;
1219
1220
/* Racy check, consumers must provide synchronization. */
1221
if (*sigiop == NULL)
1222
return;
1223
1224
SIGIO_LOCK();
1225
sigio = funsetown_locked(*sigiop);
1226
SIGIO_UNLOCK();
1227
if (sigio != NULL)
1228
sigiofree(sigio);
1229
}
1230
1231
/*
1232
* Free a list of sigio structures. The caller must ensure that new sigio
1233
* structures cannot be added after this point. For process groups this is
1234
* guaranteed using the proctree lock; for processes, the P_WEXIT flag serves
1235
* as an interlock.
1236
*/
1237
void
1238
funsetownlst(struct sigiolst *sigiolst)
1239
{
1240
struct proc *p;
1241
struct pgrp *pg;
1242
struct sigio *sigio, *tmp;
1243
1244
/* Racy check. */
1245
sigio = SLIST_FIRST(sigiolst);
1246
if (sigio == NULL)
1247
return;
1248
1249
p = NULL;
1250
pg = NULL;
1251
1252
SIGIO_LOCK();
1253
sigio = SLIST_FIRST(sigiolst);
1254
if (sigio == NULL) {
1255
SIGIO_UNLOCK();
1256
return;
1257
}
1258
1259
/*
1260
* Every entry of the list should belong to a single proc or pgrp.
1261
*/
1262
if (sigio->sio_pgid < 0) {
1263
pg = sigio->sio_pgrp;
1264
sx_assert(&proctree_lock, SX_XLOCKED);
1265
PGRP_LOCK(pg);
1266
} else /* if (sigio->sio_pgid > 0) */ {
1267
p = sigio->sio_proc;
1268
PROC_LOCK(p);
1269
KASSERT((p->p_flag & P_WEXIT) != 0,
1270
("%s: process %p is not exiting", __func__, p));
1271
}
1272
1273
SLIST_FOREACH(sigio, sigiolst, sio_pgsigio) {
1274
*sigio->sio_myref = NULL;
1275
if (pg != NULL) {
1276
KASSERT(sigio->sio_pgid < 0,
1277
("Proc sigio in pgrp sigio list"));
1278
KASSERT(sigio->sio_pgrp == pg,
1279
("Bogus pgrp in sigio list"));
1280
} else /* if (p != NULL) */ {
1281
KASSERT(sigio->sio_pgid > 0,
1282
("Pgrp sigio in proc sigio list"));
1283
KASSERT(sigio->sio_proc == p,
1284
("Bogus proc in sigio list"));
1285
}
1286
}
1287
1288
if (pg != NULL)
1289
PGRP_UNLOCK(pg);
1290
else
1291
PROC_UNLOCK(p);
1292
SIGIO_UNLOCK();
1293
1294
SLIST_FOREACH_SAFE(sigio, sigiolst, sio_pgsigio, tmp)
1295
sigiofree(sigio);
1296
}
1297
1298
/*
1299
* This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
1300
*
1301
* After permission checking, add a sigio structure to the sigio list for
1302
* the process or process group.
1303
*/
1304
int
1305
fsetown(pid_t pgid, struct sigio **sigiop)
1306
{
1307
struct proc *proc;
1308
struct pgrp *pgrp;
1309
struct sigio *osigio, *sigio;
1310
int ret;
1311
1312
if (pgid == 0) {
1313
funsetown(sigiop);
1314
return (0);
1315
}
1316
1317
sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
1318
sigio->sio_pgid = pgid;
1319
sigio->sio_ucred = crhold(curthread->td_ucred);
1320
sigio->sio_myref = sigiop;
1321
1322
ret = 0;
1323
if (pgid > 0) {
1324
ret = pget(pgid, PGET_NOTWEXIT | PGET_NOTID | PGET_HOLD, &proc);
1325
SIGIO_LOCK();
1326
osigio = funsetown_locked(*sigiop);
1327
if (ret == 0) {
1328
PROC_LOCK(proc);
1329
_PRELE(proc);
1330
if ((proc->p_flag & P_WEXIT) != 0) {
1331
ret = ESRCH;
1332
} else if (proc->p_session !=
1333
curthread->td_proc->p_session) {
1334
/*
1335
* Policy - Don't allow a process to FSETOWN a
1336
* process in another session.
1337
*
1338
* Remove this test to allow maximum flexibility
1339
* or restrict FSETOWN to the current process or
1340
* process group for maximum safety.
1341
*/
1342
ret = EPERM;
1343
} else {
1344
sigio->sio_proc = proc;
1345
SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio,
1346
sio_pgsigio);
1347
}
1348
PROC_UNLOCK(proc);
1349
}
1350
} else /* if (pgid < 0) */ {
1351
sx_slock(&proctree_lock);
1352
SIGIO_LOCK();
1353
osigio = funsetown_locked(*sigiop);
1354
pgrp = pgfind(-pgid);
1355
if (pgrp == NULL) {
1356
ret = ESRCH;
1357
} else {
1358
if (pgrp->pg_session != curthread->td_proc->p_session) {
1359
/*
1360
* Policy - Don't allow a process to FSETOWN a
1361
* process in another session.
1362
*
1363
* Remove this test to allow maximum flexibility
1364
* or restrict FSETOWN to the current process or
1365
* process group for maximum safety.
1366
*/
1367
ret = EPERM;
1368
} else {
1369
sigio->sio_pgrp = pgrp;
1370
SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio,
1371
sio_pgsigio);
1372
}
1373
PGRP_UNLOCK(pgrp);
1374
}
1375
sx_sunlock(&proctree_lock);
1376
}
1377
if (ret == 0)
1378
*sigiop = sigio;
1379
SIGIO_UNLOCK();
1380
if (osigio != NULL)
1381
sigiofree(osigio);
1382
return (ret);
1383
}
1384
1385
/*
1386
* This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
1387
*/
1388
pid_t
1389
fgetown(struct sigio **sigiop)
1390
{
1391
pid_t pgid;
1392
1393
SIGIO_LOCK();
1394
pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
1395
SIGIO_UNLOCK();
1396
return (pgid);
1397
}
1398
1399
static int
1400
closefp_impl(struct filedesc *fdp, int fd, struct file *fp, struct thread *td,
1401
bool audit)
1402
{
1403
int error;
1404
1405
FILEDESC_XLOCK_ASSERT(fdp);
1406
1407
/*
1408
* We now hold the fp reference that used to be owned by the
1409
* descriptor array. We have to unlock the FILEDESC *AFTER*
1410
* knote_fdclose to prevent a race of the fd getting opened, a knote
1411
* added, and deleteing a knote for the new fd.
1412
*/
1413
if (__predict_false(!TAILQ_EMPTY(&fdp->fd_kqlist)))
1414
knote_fdclose(td, fd);
1415
1416
/*
1417
* We need to notify mqueue if the object is of type mqueue.
1418
*/
1419
if (__predict_false(fp->f_type == DTYPE_MQUEUE))
1420
mq_fdclose(td, fd, fp);
1421
FILEDESC_XUNLOCK(fdp);
1422
1423
#ifdef AUDIT
1424
if (AUDITING_TD(td) && audit)
1425
audit_sysclose(td, fd, fp);
1426
#endif
1427
error = closef(fp, td);
1428
1429
/*
1430
* All paths leading up to closefp() will have already removed or
1431
* replaced the fd in the filedesc table, so a restart would not
1432
* operate on the same file.
1433
*/
1434
if (error == ERESTART)
1435
error = EINTR;
1436
1437
return (error);
1438
}
1439
1440
static int
1441
closefp_hl(struct filedesc *fdp, int fd, struct file *fp, struct thread *td,
1442
bool holdleaders, bool audit)
1443
{
1444
int error;
1445
1446
FILEDESC_XLOCK_ASSERT(fdp);
1447
1448
if (holdleaders) {
1449
if (td->td_proc->p_fdtol != NULL) {
1450
/*
1451
* Ask fdfree() to sleep to ensure that all relevant
1452
* process leaders can be traversed in closef().
1453
*/
1454
fdp->fd_holdleaderscount++;
1455
} else {
1456
holdleaders = false;
1457
}
1458
}
1459
1460
error = closefp_impl(fdp, fd, fp, td, audit);
1461
if (holdleaders) {
1462
FILEDESC_XLOCK(fdp);
1463
fdp->fd_holdleaderscount--;
1464
if (fdp->fd_holdleaderscount == 0 &&
1465
fdp->fd_holdleaderswakeup != 0) {
1466
fdp->fd_holdleaderswakeup = 0;
1467
wakeup(&fdp->fd_holdleaderscount);
1468
}
1469
FILEDESC_XUNLOCK(fdp);
1470
}
1471
return (error);
1472
}
1473
1474
static int
1475
closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td,
1476
bool holdleaders, bool audit)
1477
{
1478
1479
FILEDESC_XLOCK_ASSERT(fdp);
1480
1481
if (__predict_false(td->td_proc->p_fdtol != NULL)) {
1482
return (closefp_hl(fdp, fd, fp, td, holdleaders, audit));
1483
} else {
1484
return (closefp_impl(fdp, fd, fp, td, audit));
1485
}
1486
}
1487
1488
/*
1489
* Close a file descriptor.
1490
*/
1491
#ifndef _SYS_SYSPROTO_H_
1492
struct close_args {
1493
int fd;
1494
};
1495
#endif
1496
/* ARGSUSED */
1497
int
1498
sys_close(struct thread *td, struct close_args *uap)
1499
{
1500
1501
return (kern_close(td, uap->fd));
1502
}
1503
1504
int
1505
kern_close(struct thread *td, int fd)
1506
{
1507
struct filedesc *fdp;
1508
struct file *fp;
1509
1510
fdp = td->td_proc->p_fd;
1511
1512
FILEDESC_XLOCK(fdp);
1513
if ((fp = fget_noref(fdp, fd)) == NULL) {
1514
FILEDESC_XUNLOCK(fdp);
1515
return (EBADF);
1516
}
1517
fdfree(fdp, fd);
1518
1519
/* closefp() drops the FILEDESC lock for us. */
1520
return (closefp(fdp, fd, fp, td, true, true));
1521
}
1522
1523
static int
1524
close_range_flags(struct thread *td, u_int lowfd, u_int highfd, int flags)
1525
{
1526
struct filedesc *fdp;
1527
struct fdescenttbl *fdt;
1528
struct filedescent *fde;
1529
int fd, fde_flags;
1530
1531
fde_flags = close_range_to_fde_flags(flags);
1532
fdp = td->td_proc->p_fd;
1533
FILEDESC_XLOCK(fdp);
1534
fdt = atomic_load_ptr(&fdp->fd_files);
1535
highfd = MIN(highfd, fdt->fdt_nfiles - 1);
1536
fd = lowfd;
1537
if (__predict_false(fd > highfd)) {
1538
goto out_locked;
1539
}
1540
for (; fd <= highfd; fd++) {
1541
fde = &fdt->fdt_ofiles[fd];
1542
if (fde->fde_file != NULL)
1543
fde->fde_flags |= fde_flags;
1544
}
1545
out_locked:
1546
FILEDESC_XUNLOCK(fdp);
1547
return (0);
1548
}
1549
1550
static int
1551
close_range_impl(struct thread *td, u_int lowfd, u_int highfd)
1552
{
1553
struct filedesc *fdp;
1554
const struct fdescenttbl *fdt;
1555
struct file *fp;
1556
int fd;
1557
1558
fdp = td->td_proc->p_fd;
1559
FILEDESC_XLOCK(fdp);
1560
fdt = atomic_load_ptr(&fdp->fd_files);
1561
highfd = MIN(highfd, fdt->fdt_nfiles - 1);
1562
fd = lowfd;
1563
if (__predict_false(fd > highfd)) {
1564
goto out_locked;
1565
}
1566
for (;;) {
1567
fp = fdt->fdt_ofiles[fd].fde_file;
1568
if (fp == NULL) {
1569
if (fd == highfd)
1570
goto out_locked;
1571
} else {
1572
fdfree(fdp, fd);
1573
(void) closefp(fdp, fd, fp, td, true, true);
1574
if (fd == highfd)
1575
goto out_unlocked;
1576
FILEDESC_XLOCK(fdp);
1577
fdt = atomic_load_ptr(&fdp->fd_files);
1578
}
1579
fd++;
1580
}
1581
out_locked:
1582
FILEDESC_XUNLOCK(fdp);
1583
out_unlocked:
1584
return (0);
1585
}
1586
1587
int
1588
kern_close_range(struct thread *td, int flags, u_int lowfd, u_int highfd)
1589
{
1590
1591
/*
1592
* Check this prior to clamping; closefrom(3) with only fd 0, 1, and 2
1593
* open should not be a usage error. From a close_range() perspective,
1594
* close_range(3, ~0U, 0) in the same scenario should also likely not
1595
* be a usage error as all fd above 3 are in-fact already closed.
1596
*/
1597
if (highfd < lowfd) {
1598
return (EINVAL);
1599
}
1600
1601
if ((flags & (CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_CLOFORK)) != 0)
1602
return (close_range_flags(td, lowfd, highfd, flags));
1603
1604
return (close_range_impl(td, lowfd, highfd));
1605
}
1606
1607
#ifndef _SYS_SYSPROTO_H_
1608
struct close_range_args {
1609
u_int lowfd;
1610
u_int highfd;
1611
int flags;
1612
};
1613
#endif
1614
int
1615
sys_close_range(struct thread *td, struct close_range_args *uap)
1616
{
1617
1618
AUDIT_ARG_FD(uap->lowfd);
1619
AUDIT_ARG_CMD(uap->highfd);
1620
AUDIT_ARG_FFLAGS(uap->flags);
1621
1622
if ((uap->flags & ~(CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_CLOFORK)) != 0)
1623
return (EINVAL);
1624
return (kern_close_range(td, uap->flags, uap->lowfd, uap->highfd));
1625
}
1626
1627
#ifdef COMPAT_FREEBSD12
1628
/*
1629
* Close open file descriptors.
1630
*/
1631
#ifndef _SYS_SYSPROTO_H_
1632
struct freebsd12_closefrom_args {
1633
int lowfd;
1634
};
1635
#endif
1636
/* ARGSUSED */
1637
int
1638
freebsd12_closefrom(struct thread *td, struct freebsd12_closefrom_args *uap)
1639
{
1640
u_int lowfd;
1641
1642
AUDIT_ARG_FD(uap->lowfd);
1643
1644
/*
1645
* Treat negative starting file descriptor values identical to
1646
* closefrom(0) which closes all files.
1647
*/
1648
lowfd = MAX(0, uap->lowfd);
1649
return (kern_close_range(td, 0, lowfd, ~0U));
1650
}
1651
#endif /* COMPAT_FREEBSD12 */
1652
1653
#if defined(COMPAT_43)
1654
/*
1655
* Return status information about a file descriptor.
1656
*/
1657
#ifndef _SYS_SYSPROTO_H_
1658
struct ofstat_args {
1659
int fd;
1660
struct ostat *sb;
1661
};
1662
#endif
1663
/* ARGSUSED */
1664
int
1665
ofstat(struct thread *td, struct ofstat_args *uap)
1666
{
1667
struct ostat oub;
1668
struct stat ub;
1669
int error;
1670
1671
error = kern_fstat(td, uap->fd, &ub);
1672
if (error == 0) {
1673
cvtstat(&ub, &oub);
1674
error = copyout(&oub, uap->sb, sizeof(oub));
1675
}
1676
return (error);
1677
}
1678
#endif /* COMPAT_43 */
1679
1680
#if defined(COMPAT_FREEBSD11)
1681
int
1682
freebsd11_fstat(struct thread *td, struct freebsd11_fstat_args *uap)
1683
{
1684
struct stat sb;
1685
struct freebsd11_stat osb;
1686
int error;
1687
1688
error = kern_fstat(td, uap->fd, &sb);
1689
if (error != 0)
1690
return (error);
1691
error = freebsd11_cvtstat(&sb, &osb);
1692
if (error == 0)
1693
error = copyout(&osb, uap->sb, sizeof(osb));
1694
return (error);
1695
}
1696
#endif /* COMPAT_FREEBSD11 */
1697
1698
/*
1699
* Return status information about a file descriptor.
1700
*/
1701
#ifndef _SYS_SYSPROTO_H_
1702
struct fstat_args {
1703
int fd;
1704
struct stat *sb;
1705
};
1706
#endif
1707
/* ARGSUSED */
1708
int
1709
sys_fstat(struct thread *td, struct fstat_args *uap)
1710
{
1711
struct stat ub;
1712
int error;
1713
1714
error = kern_fstat(td, uap->fd, &ub);
1715
if (error == 0)
1716
error = copyout(&ub, uap->sb, sizeof(ub));
1717
return (error);
1718
}
1719
1720
int
1721
kern_fstat(struct thread *td, int fd, struct stat *sbp)
1722
{
1723
struct file *fp;
1724
int error;
1725
1726
AUDIT_ARG_FD(fd);
1727
1728
error = fget(td, fd, &cap_fstat_rights, &fp);
1729
if (__predict_false(error != 0))
1730
return (error);
1731
1732
AUDIT_ARG_FILE(td->td_proc, fp);
1733
1734
sbp->st_filerev = 0;
1735
sbp->st_bsdflags = 0;
1736
error = fo_stat(fp, sbp, td->td_ucred);
1737
fdrop(fp, td);
1738
#ifdef __STAT_TIME_T_EXT
1739
sbp->st_atim_ext = 0;
1740
sbp->st_mtim_ext = 0;
1741
sbp->st_ctim_ext = 0;
1742
sbp->st_btim_ext = 0;
1743
#endif
1744
#ifdef KTRACE
1745
if (KTRPOINT(td, KTR_STRUCT))
1746
ktrstat_error(sbp, error);
1747
#endif
1748
return (error);
1749
}
1750
1751
#if defined(COMPAT_FREEBSD11)
1752
/*
1753
* Return status information about a file descriptor.
1754
*/
1755
#ifndef _SYS_SYSPROTO_H_
1756
struct freebsd11_nfstat_args {
1757
int fd;
1758
struct nstat *sb;
1759
};
1760
#endif
1761
/* ARGSUSED */
1762
int
1763
freebsd11_nfstat(struct thread *td, struct freebsd11_nfstat_args *uap)
1764
{
1765
struct nstat nub;
1766
struct stat ub;
1767
int error;
1768
1769
error = kern_fstat(td, uap->fd, &ub);
1770
if (error != 0)
1771
return (error);
1772
error = freebsd11_cvtnstat(&ub, &nub);
1773
if (error != 0)
1774
error = copyout(&nub, uap->sb, sizeof(nub));
1775
return (error);
1776
}
1777
#endif /* COMPAT_FREEBSD11 */
1778
1779
/*
1780
* Return pathconf information about a file descriptor.
1781
*/
1782
#ifndef _SYS_SYSPROTO_H_
1783
struct fpathconf_args {
1784
int fd;
1785
int name;
1786
};
1787
#endif
1788
/* ARGSUSED */
1789
int
1790
sys_fpathconf(struct thread *td, struct fpathconf_args *uap)
1791
{
1792
long value;
1793
int error;
1794
1795
error = kern_fpathconf(td, uap->fd, uap->name, &value);
1796
if (error == 0)
1797
td->td_retval[0] = value;
1798
return (error);
1799
}
1800
1801
int
1802
kern_fpathconf(struct thread *td, int fd, int name, long *valuep)
1803
{
1804
struct file *fp;
1805
struct vnode *vp;
1806
int error;
1807
1808
error = fget(td, fd, &cap_fpathconf_rights, &fp);
1809
if (error != 0)
1810
return (error);
1811
1812
if (name == _PC_ASYNC_IO) {
1813
*valuep = _POSIX_ASYNCHRONOUS_IO;
1814
goto out;
1815
}
1816
vp = fp->f_vnode;
1817
if (vp != NULL) {
1818
vn_lock(vp, LK_SHARED | LK_RETRY);
1819
error = VOP_PATHCONF(vp, name, valuep);
1820
VOP_UNLOCK(vp);
1821
} else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
1822
if (name != _PC_PIPE_BUF) {
1823
error = EINVAL;
1824
} else {
1825
*valuep = PIPE_BUF;
1826
error = 0;
1827
}
1828
} else {
1829
error = EOPNOTSUPP;
1830
}
1831
out:
1832
fdrop(fp, td);
1833
return (error);
1834
}
1835
1836
/*
1837
* Copy filecaps structure allocating memory for ioctls array if needed.
1838
*
1839
* The last parameter indicates whether the fdtable is locked. If it is not and
1840
* ioctls are encountered, copying fails and the caller must lock the table.
1841
*
1842
* Note that if the table was not locked, the caller has to check the relevant
1843
* sequence counter to determine whether the operation was successful.
1844
*/
1845
bool
1846
filecaps_copy(const struct filecaps *src, struct filecaps *dst, bool locked)
1847
{
1848
size_t size;
1849
1850
if (src->fc_ioctls != NULL && !locked)
1851
return (false);
1852
memcpy(dst, src, sizeof(*src));
1853
if (src->fc_ioctls == NULL)
1854
return (true);
1855
1856
KASSERT(src->fc_nioctls > 0,
1857
("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls));
1858
1859
size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
1860
dst->fc_ioctls = malloc(size, M_FILECAPS, M_WAITOK);
1861
memcpy(dst->fc_ioctls, src->fc_ioctls, size);
1862
return (true);
1863
}
1864
1865
static u_long *
1866
filecaps_copy_prep(const struct filecaps *src)
1867
{
1868
u_long *ioctls;
1869
size_t size;
1870
1871
if (__predict_true(src->fc_ioctls == NULL))
1872
return (NULL);
1873
1874
KASSERT(src->fc_nioctls > 0,
1875
("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls));
1876
1877
size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
1878
ioctls = malloc(size, M_FILECAPS, M_WAITOK);
1879
return (ioctls);
1880
}
1881
1882
static void
1883
filecaps_copy_finish(const struct filecaps *src, struct filecaps *dst,
1884
u_long *ioctls)
1885
{
1886
size_t size;
1887
1888
*dst = *src;
1889
if (__predict_true(src->fc_ioctls == NULL)) {
1890
MPASS(ioctls == NULL);
1891
return;
1892
}
1893
1894
size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
1895
dst->fc_ioctls = ioctls;
1896
bcopy(src->fc_ioctls, dst->fc_ioctls, size);
1897
}
1898
1899
/*
1900
* Move filecaps structure to the new place and clear the old place.
1901
*/
1902
void
1903
filecaps_move(struct filecaps *src, struct filecaps *dst)
1904
{
1905
1906
*dst = *src;
1907
bzero(src, sizeof(*src));
1908
}
1909
1910
/*
1911
* Fill the given filecaps structure with full rights.
1912
*/
1913
static void
1914
filecaps_fill(struct filecaps *fcaps)
1915
{
1916
1917
CAP_ALL(&fcaps->fc_rights);
1918
fcaps->fc_ioctls = NULL;
1919
fcaps->fc_nioctls = -1;
1920
fcaps->fc_fcntls = CAP_FCNTL_ALL;
1921
}
1922
1923
/*
1924
* Free memory allocated within filecaps structure.
1925
*/
1926
static void
1927
filecaps_free_ioctl(struct filecaps *fcaps)
1928
{
1929
1930
free(fcaps->fc_ioctls, M_FILECAPS);
1931
fcaps->fc_ioctls = NULL;
1932
}
1933
1934
void
1935
filecaps_free(struct filecaps *fcaps)
1936
{
1937
1938
filecaps_free_ioctl(fcaps);
1939
bzero(fcaps, sizeof(*fcaps));
1940
}
1941
1942
static u_long *
1943
filecaps_free_prep(struct filecaps *fcaps)
1944
{
1945
u_long *ioctls;
1946
1947
ioctls = fcaps->fc_ioctls;
1948
bzero(fcaps, sizeof(*fcaps));
1949
return (ioctls);
1950
}
1951
1952
static void
1953
filecaps_free_finish(u_long *ioctls)
1954
{
1955
1956
free(ioctls, M_FILECAPS);
1957
}
1958
1959
/*
1960
* Validate the given filecaps structure.
1961
*/
1962
static void
1963
filecaps_validate(const struct filecaps *fcaps, const char *func)
1964
{
1965
1966
KASSERT(cap_rights_is_valid(&fcaps->fc_rights),
1967
("%s: invalid rights", func));
1968
KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0,
1969
("%s: invalid fcntls", func));
1970
KASSERT(fcaps->fc_fcntls == 0 ||
1971
cap_rights_is_set(&fcaps->fc_rights, CAP_FCNTL),
1972
("%s: fcntls without CAP_FCNTL", func));
1973
/*
1974
* open calls without WANTIOCTLCAPS free caps but leave the counter
1975
*/
1976
#if 0
1977
KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 :
1978
(fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0),
1979
("%s: invalid ioctls", func));
1980
#endif
1981
KASSERT(fcaps->fc_nioctls == 0 ||
1982
cap_rights_is_set(&fcaps->fc_rights, CAP_IOCTL),
1983
("%s: ioctls without CAP_IOCTL", func));
1984
}
1985
1986
static void
1987
fdgrowtable_exp(struct filedesc *fdp, int nfd)
1988
{
1989
int nfd1;
1990
1991
FILEDESC_XLOCK_ASSERT(fdp);
1992
1993
nfd1 = fdp->fd_nfiles * 2;
1994
if (nfd1 < nfd)
1995
nfd1 = nfd;
1996
fdgrowtable(fdp, nfd1);
1997
}
1998
1999
/*
2000
* Grow the file table to accommodate (at least) nfd descriptors.
2001
*/
2002
static void
2003
fdgrowtable(struct filedesc *fdp, int nfd)
2004
{
2005
struct filedesc0 *fdp0;
2006
struct freetable *ft;
2007
struct fdescenttbl *ntable;
2008
struct fdescenttbl *otable;
2009
int nnfiles, onfiles;
2010
NDSLOTTYPE *nmap, *omap;
2011
2012
KASSERT(fdp->fd_nfiles > 0, ("zero-length file table"));
2013
2014
/* save old values */
2015
onfiles = fdp->fd_nfiles;
2016
otable = fdp->fd_files;
2017
omap = fdp->fd_map;
2018
2019
/* compute the size of the new table */
2020
nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
2021
if (nnfiles <= onfiles)
2022
/* the table is already large enough */
2023
return;
2024
2025
/*
2026
* Allocate a new table. We need enough space for the number of
2027
* entries, file entries themselves and the struct freetable we will use
2028
* when we decommission the table and place it on the freelist.
2029
* We place the struct freetable in the middle so we don't have
2030
* to worry about padding.
2031
*/
2032
ntable = malloc(offsetof(struct fdescenttbl, fdt_ofiles) +
2033
nnfiles * sizeof(ntable->fdt_ofiles[0]) +
2034
sizeof(struct freetable),
2035
M_FILEDESC, M_ZERO | M_WAITOK);
2036
/* copy the old data */
2037
ntable->fdt_nfiles = nnfiles;
2038
memcpy(ntable->fdt_ofiles, otable->fdt_ofiles,
2039
onfiles * sizeof(ntable->fdt_ofiles[0]));
2040
2041
/*
2042
* Allocate a new map only if the old is not large enough. It will
2043
* grow at a slower rate than the table as it can map more
2044
* entries than the table can hold.
2045
*/
2046
if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
2047
nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC,
2048
M_ZERO | M_WAITOK);
2049
/* copy over the old data and update the pointer */
2050
memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap));
2051
fdp->fd_map = nmap;
2052
}
2053
2054
/*
2055
* Make sure that ntable is correctly initialized before we replace
2056
* fd_files poiner. Otherwise fget_unlocked() may see inconsistent
2057
* data.
2058
*/
2059
atomic_store_rel_ptr((volatile void *)&fdp->fd_files, (uintptr_t)ntable);
2060
2061
/*
2062
* Free the old file table when not shared by other threads or processes.
2063
* The old file table is considered to be shared when either are true:
2064
* - The process has more than one thread.
2065
* - The file descriptor table has been shared via fdshare().
2066
*
2067
* When shared, the old file table will be placed on a freelist
2068
* which will be processed when the struct filedesc is released.
2069
*
2070
* Note that if onfiles == NDFILE, we're dealing with the original
2071
* static allocation contained within (struct filedesc0 *)fdp,
2072
* which must not be freed.
2073
*/
2074
if (onfiles > NDFILE) {
2075
/*
2076
* Note we may be called here from fdinit while allocating a
2077
* table for a new process in which case ->p_fd points
2078
* elsewhere.
2079
*/
2080
if (curproc->p_fd != fdp || FILEDESC_IS_ONLY_USER(fdp)) {
2081
free(otable, M_FILEDESC);
2082
} else {
2083
ft = (struct freetable *)&otable->fdt_ofiles[onfiles];
2084
fdp0 = (struct filedesc0 *)fdp;
2085
ft->ft_table = otable;
2086
SLIST_INSERT_HEAD(&fdp0->fd_free, ft, ft_next);
2087
}
2088
}
2089
/*
2090
* The map does not have the same possibility of threads still
2091
* holding references to it. So always free it as long as it
2092
* does not reference the original static allocation.
2093
*/
2094
if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
2095
free(omap, M_FILEDESC);
2096
}
2097
2098
/*
2099
* Allocate a file descriptor for the process.
2100
*/
2101
int
2102
fdalloc(struct thread *td, int minfd, int *result)
2103
{
2104
struct proc *p = td->td_proc;
2105
struct filedesc *fdp = p->p_fd;
2106
int fd, maxfd, allocfd;
2107
#ifdef RACCT
2108
int error;
2109
#endif
2110
2111
FILEDESC_XLOCK_ASSERT(fdp);
2112
2113
if (fdp->fd_freefile > minfd)
2114
minfd = fdp->fd_freefile;
2115
2116
maxfd = getmaxfd(td);
2117
2118
/*
2119
* Search the bitmap for a free descriptor starting at minfd.
2120
* If none is found, grow the file table.
2121
*/
2122
fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
2123
if (__predict_false(fd >= maxfd))
2124
return (EMFILE);
2125
if (__predict_false(fd >= fdp->fd_nfiles)) {
2126
allocfd = min(fd * 2, maxfd);
2127
#ifdef RACCT
2128
if (RACCT_ENABLED()) {
2129
error = racct_set_unlocked(p, RACCT_NOFILE, allocfd);
2130
if (error != 0)
2131
return (EMFILE);
2132
}
2133
#endif
2134
/*
2135
* fd is already equal to first free descriptor >= minfd, so
2136
* we only need to grow the table and we are done.
2137
*/
2138
fdgrowtable_exp(fdp, allocfd);
2139
}
2140
2141
/*
2142
* Perform some sanity checks, then mark the file descriptor as
2143
* used and return it to the caller.
2144
*/
2145
KASSERT(fd >= 0 && fd < min(maxfd, fdp->fd_nfiles),
2146
("invalid descriptor %d", fd));
2147
KASSERT(!fdisused(fdp, fd),
2148
("fd_first_free() returned non-free descriptor"));
2149
KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
2150
("file descriptor isn't free"));
2151
fdused(fdp, fd);
2152
*result = fd;
2153
return (0);
2154
}
2155
2156
/*
2157
* Allocate n file descriptors for the process.
2158
*/
2159
int
2160
fdallocn(struct thread *td, int minfd, int *fds, int n)
2161
{
2162
struct proc *p = td->td_proc;
2163
struct filedesc *fdp = p->p_fd;
2164
int i;
2165
2166
FILEDESC_XLOCK_ASSERT(fdp);
2167
2168
for (i = 0; i < n; i++)
2169
if (fdalloc(td, 0, &fds[i]) != 0)
2170
break;
2171
2172
if (i < n) {
2173
for (i--; i >= 0; i--)
2174
fdunused(fdp, fds[i]);
2175
return (EMFILE);
2176
}
2177
2178
return (0);
2179
}
2180
2181
/*
2182
* Create a new open file structure and allocate a file descriptor for the
2183
* process that refers to it. We add one reference to the file for the
2184
* descriptor table and one reference for resultfp. This is to prevent us
2185
* being preempted and the entry in the descriptor table closed after we
2186
* release the FILEDESC lock.
2187
*/
2188
int
2189
falloc_caps(struct thread *td, struct file **resultfp, int *resultfd, int flags,
2190
struct filecaps *fcaps)
2191
{
2192
struct file *fp;
2193
int error, fd;
2194
2195
MPASS(resultfp != NULL);
2196
MPASS(resultfd != NULL);
2197
2198
error = _falloc_noinstall(td, &fp, 2);
2199
if (__predict_false(error != 0)) {
2200
return (error);
2201
}
2202
2203
error = finstall_refed(td, fp, &fd, flags, fcaps);
2204
if (__predict_false(error != 0)) {
2205
falloc_abort(td, fp);
2206
return (error);
2207
}
2208
2209
*resultfp = fp;
2210
*resultfd = fd;
2211
2212
return (0);
2213
}
2214
2215
/*
2216
* Create a new open file structure without allocating a file descriptor.
2217
*/
2218
int
2219
_falloc_noinstall(struct thread *td, struct file **resultfp, u_int n)
2220
{
2221
struct file *fp;
2222
int maxuserfiles = maxfiles - (maxfiles / 20);
2223
int openfiles_new;
2224
static struct timeval lastfail;
2225
static int curfail;
2226
2227
KASSERT(resultfp != NULL, ("%s: resultfp == NULL", __func__));
2228
MPASS(n > 0);
2229
2230
openfiles_new = atomic_fetchadd_int(&openfiles, 1) + 1;
2231
if ((openfiles_new >= maxuserfiles &&
2232
priv_check(td, PRIV_MAXFILES) != 0) ||
2233
openfiles_new >= maxfiles) {
2234
atomic_subtract_int(&openfiles, 1);
2235
if (ppsratecheck(&lastfail, &curfail, 1)) {
2236
printf("kern.maxfiles limit exceeded by uid %i, (%s) "
2237
"please see tuning(7).\n", td->td_ucred->cr_ruid, td->td_proc->p_comm);
2238
}
2239
return (ENFILE);
2240
}
2241
fp = uma_zalloc(file_zone, M_WAITOK);
2242
bzero(fp, sizeof(*fp));
2243
refcount_init(&fp->f_count, n);
2244
fp->f_cred = crhold(td->td_ucred);
2245
fp->f_ops = &badfileops;
2246
*resultfp = fp;
2247
return (0);
2248
}
2249
2250
void
2251
falloc_abort(struct thread *td, struct file *fp)
2252
{
2253
2254
/*
2255
* For assertion purposes.
2256
*/
2257
refcount_init(&fp->f_count, 0);
2258
_fdrop(fp, td);
2259
}
2260
2261
/*
2262
* Install a file in a file descriptor table.
2263
*/
2264
void
2265
_finstall(struct filedesc *fdp, struct file *fp, int fd, int flags,
2266
struct filecaps *fcaps)
2267
{
2268
struct filedescent *fde;
2269
2270
MPASS(fp != NULL);
2271
if (fcaps != NULL)
2272
filecaps_validate(fcaps, __func__);
2273
FILEDESC_XLOCK_ASSERT(fdp);
2274
2275
fde = &fdp->fd_ofiles[fd];
2276
#ifdef CAPABILITIES
2277
seqc_write_begin(&fde->fde_seqc);
2278
#endif
2279
fde->fde_file = fp;
2280
fde->fde_flags = open_to_fde_flags(flags, true);
2281
if (fcaps != NULL)
2282
filecaps_move(fcaps, &fde->fde_caps);
2283
else
2284
filecaps_fill(&fde->fde_caps);
2285
#ifdef CAPABILITIES
2286
seqc_write_end(&fde->fde_seqc);
2287
#endif
2288
}
2289
2290
int
2291
finstall_refed(struct thread *td, struct file *fp, int *fd, int flags,
2292
struct filecaps *fcaps)
2293
{
2294
struct filedesc *fdp = td->td_proc->p_fd;
2295
int error;
2296
2297
MPASS(fd != NULL);
2298
2299
FILEDESC_XLOCK(fdp);
2300
error = fdalloc(td, 0, fd);
2301
if (__predict_true(error == 0)) {
2302
_finstall(fdp, fp, *fd, flags, fcaps);
2303
}
2304
FILEDESC_XUNLOCK(fdp);
2305
return (error);
2306
}
2307
2308
int
2309
finstall(struct thread *td, struct file *fp, int *fd, int flags,
2310
struct filecaps *fcaps)
2311
{
2312
int error;
2313
2314
MPASS(fd != NULL);
2315
2316
if (!fhold(fp))
2317
return (EBADF);
2318
error = finstall_refed(td, fp, fd, flags, fcaps);
2319
if (__predict_false(error != 0)) {
2320
fdrop(fp, td);
2321
}
2322
return (error);
2323
}
2324
2325
/*
2326
* Build a new filedesc structure from another.
2327
*
2328
* If fdp is not NULL, return with it shared locked.
2329
*/
2330
struct filedesc *
2331
fdinit(void)
2332
{
2333
struct filedesc0 *newfdp0;
2334
struct filedesc *newfdp;
2335
2336
newfdp0 = uma_zalloc(filedesc0_zone, M_WAITOK | M_ZERO);
2337
newfdp = &newfdp0->fd_fd;
2338
2339
/* Create the file descriptor table. */
2340
FILEDESC_LOCK_INIT(newfdp);
2341
refcount_init(&newfdp->fd_refcnt, 1);
2342
refcount_init(&newfdp->fd_holdcnt, 1);
2343
newfdp->fd_map = newfdp0->fd_dmap;
2344
newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles;
2345
newfdp->fd_files->fdt_nfiles = NDFILE;
2346
2347
return (newfdp);
2348
}
2349
2350
/*
2351
* Build a pwddesc structure from another.
2352
* Copy the current, root, and jail root vnode references.
2353
*
2354
* If pdp is not NULL and keeplock is true, return with it (exclusively) locked.
2355
*/
2356
struct pwddesc *
2357
pdinit(struct pwddesc *pdp, bool keeplock)
2358
{
2359
struct pwddesc *newpdp;
2360
struct pwd *newpwd;
2361
2362
newpdp = malloc(sizeof(*newpdp), M_PWDDESC, M_WAITOK | M_ZERO);
2363
2364
PWDDESC_LOCK_INIT(newpdp);
2365
refcount_init(&newpdp->pd_refcount, 1);
2366
newpdp->pd_cmask = CMASK;
2367
2368
if (pdp == NULL) {
2369
newpwd = pwd_alloc();
2370
smr_serialized_store(&newpdp->pd_pwd, newpwd, true);
2371
return (newpdp);
2372
}
2373
2374
PWDDESC_XLOCK(pdp);
2375
newpwd = pwd_hold_pwddesc(pdp);
2376
smr_serialized_store(&newpdp->pd_pwd, newpwd, true);
2377
if (!keeplock)
2378
PWDDESC_XUNLOCK(pdp);
2379
return (newpdp);
2380
}
2381
2382
/*
2383
* Hold either filedesc or pwddesc of the passed process.
2384
*
2385
* The process lock is used to synchronize against the target exiting and
2386
* freeing the data.
2387
*
2388
* Clearing can be ilustrated in 3 steps:
2389
* 1. set the pointer to NULL. Either routine can race against it, hence
2390
* atomic_load_ptr.
2391
* 2. observe the process lock as not taken. Until then fdhold/pdhold can
2392
* race to either still see the pointer or find NULL. It is still safe to
2393
* grab a reference as clearing is stalled.
2394
* 3. after the lock is observed as not taken, any fdhold/pdhold calls are
2395
* guaranteed to see NULL, making it safe to finish clearing
2396
*/
2397
static struct filedesc *
2398
fdhold(struct proc *p)
2399
{
2400
struct filedesc *fdp;
2401
2402
PROC_LOCK_ASSERT(p, MA_OWNED);
2403
fdp = atomic_load_ptr(&p->p_fd);
2404
if (fdp != NULL)
2405
refcount_acquire(&fdp->fd_holdcnt);
2406
return (fdp);
2407
}
2408
2409
static struct pwddesc *
2410
pdhold(struct proc *p)
2411
{
2412
struct pwddesc *pdp;
2413
2414
PROC_LOCK_ASSERT(p, MA_OWNED);
2415
pdp = atomic_load_ptr(&p->p_pd);
2416
if (pdp != NULL)
2417
refcount_acquire(&pdp->pd_refcount);
2418
return (pdp);
2419
}
2420
2421
static void
2422
fddrop(struct filedesc *fdp)
2423
{
2424
2425
if (refcount_load(&fdp->fd_holdcnt) > 1) {
2426
if (refcount_release(&fdp->fd_holdcnt) == 0)
2427
return;
2428
}
2429
2430
FILEDESC_LOCK_DESTROY(fdp);
2431
uma_zfree(filedesc0_zone, fdp);
2432
}
2433
2434
static void
2435
pddrop(struct pwddesc *pdp)
2436
{
2437
struct pwd *pwd;
2438
2439
if (refcount_release_if_not_last(&pdp->pd_refcount))
2440
return;
2441
2442
PWDDESC_XLOCK(pdp);
2443
if (refcount_release(&pdp->pd_refcount) == 0) {
2444
PWDDESC_XUNLOCK(pdp);
2445
return;
2446
}
2447
pwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
2448
pwd_set(pdp, NULL);
2449
PWDDESC_XUNLOCK(pdp);
2450
pwd_drop(pwd);
2451
2452
PWDDESC_LOCK_DESTROY(pdp);
2453
free(pdp, M_PWDDESC);
2454
}
2455
2456
/*
2457
* Share a filedesc structure.
2458
*/
2459
struct filedesc *
2460
fdshare(struct filedesc *fdp)
2461
{
2462
2463
refcount_acquire(&fdp->fd_refcnt);
2464
return (fdp);
2465
}
2466
2467
/*
2468
* Share a pwddesc structure.
2469
*/
2470
struct pwddesc *
2471
pdshare(struct pwddesc *pdp)
2472
{
2473
refcount_acquire(&pdp->pd_refcount);
2474
return (pdp);
2475
}
2476
2477
/*
2478
* Unshare a filedesc structure, if necessary by making a copy
2479
*/
2480
void
2481
fdunshare(struct thread *td)
2482
{
2483
struct filedesc *tmp;
2484
struct proc *p = td->td_proc;
2485
2486
if (refcount_load(&p->p_fd->fd_refcnt) == 1)
2487
return;
2488
2489
tmp = fdcopy(p->p_fd);
2490
fdescfree(td);
2491
p->p_fd = tmp;
2492
}
2493
2494
/*
2495
* Unshare a pwddesc structure.
2496
*/
2497
void
2498
pdunshare(struct thread *td)
2499
{
2500
struct pwddesc *pdp;
2501
struct proc *p;
2502
2503
p = td->td_proc;
2504
/* Not shared. */
2505
if (refcount_load(&p->p_pd->pd_refcount) == 1)
2506
return;
2507
2508
pdp = pdcopy(p->p_pd);
2509
pdescfree(td);
2510
p->p_pd = pdp;
2511
}
2512
2513
/*
2514
* Copy a filedesc structure. A NULL pointer in returns a NULL reference,
2515
* this is to ease callers, not catch errors.
2516
*/
2517
struct filedesc *
2518
fdcopy(struct filedesc *fdp)
2519
{
2520
struct filedesc *newfdp;
2521
struct filedescent *nfde, *ofde;
2522
int i, lastfile;
2523
2524
MPASS(fdp != NULL);
2525
2526
newfdp = fdinit();
2527
FILEDESC_SLOCK(fdp);
2528
for (;;) {
2529
lastfile = fdlastfile(fdp);
2530
if (lastfile < newfdp->fd_nfiles)
2531
break;
2532
FILEDESC_SUNLOCK(fdp);
2533
fdgrowtable(newfdp, lastfile + 1);
2534
FILEDESC_SLOCK(fdp);
2535
}
2536
/* copy all passable descriptors (i.e. not kqueue) */
2537
newfdp->fd_freefile = fdp->fd_freefile;
2538
FILEDESC_FOREACH_FDE(fdp, i, ofde) {
2539
if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0 ||
2540
(ofde->fde_flags & UF_FOCLOSE) != 0 ||
2541
!fhold(ofde->fde_file)) {
2542
if (newfdp->fd_freefile == fdp->fd_freefile)
2543
newfdp->fd_freefile = i;
2544
continue;
2545
}
2546
nfde = &newfdp->fd_ofiles[i];
2547
*nfde = *ofde;
2548
filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true);
2549
fdused_init(newfdp, i);
2550
}
2551
MPASS(newfdp->fd_freefile != -1);
2552
FILEDESC_SUNLOCK(fdp);
2553
return (newfdp);
2554
}
2555
2556
/*
2557
* Copy a pwddesc structure.
2558
*/
2559
struct pwddesc *
2560
pdcopy(struct pwddesc *pdp)
2561
{
2562
struct pwddesc *newpdp;
2563
2564
MPASS(pdp != NULL);
2565
2566
newpdp = pdinit(pdp, true);
2567
newpdp->pd_cmask = pdp->pd_cmask;
2568
PWDDESC_XUNLOCK(pdp);
2569
return (newpdp);
2570
}
2571
2572
/*
2573
* Clear POSIX style locks. This is only used when fdp looses a reference (i.e.
2574
* one of processes using it exits) and the table used to be shared.
2575
*/
2576
static void
2577
fdclearlocks(struct thread *td)
2578
{
2579
struct filedesc *fdp;
2580
struct filedesc_to_leader *fdtol;
2581
struct flock lf;
2582
struct file *fp;
2583
struct proc *p;
2584
struct vnode *vp;
2585
int i;
2586
2587
p = td->td_proc;
2588
fdp = p->p_fd;
2589
fdtol = p->p_fdtol;
2590
MPASS(fdtol != NULL);
2591
2592
FILEDESC_XLOCK(fdp);
2593
KASSERT(fdtol->fdl_refcount > 0,
2594
("filedesc_to_refcount botch: fdl_refcount=%d",
2595
fdtol->fdl_refcount));
2596
if (fdtol->fdl_refcount == 1 &&
2597
(p->p_leader->p_flag & P_ADVLOCK) != 0) {
2598
FILEDESC_FOREACH_FP(fdp, i, fp) {
2599
if (fp->f_type != DTYPE_VNODE ||
2600
!fhold(fp))
2601
continue;
2602
FILEDESC_XUNLOCK(fdp);
2603
lf.l_whence = SEEK_SET;
2604
lf.l_start = 0;
2605
lf.l_len = 0;
2606
lf.l_type = F_UNLCK;
2607
vp = fp->f_vnode;
2608
(void) VOP_ADVLOCK(vp,
2609
(caddr_t)p->p_leader, F_UNLCK,
2610
&lf, F_POSIX);
2611
FILEDESC_XLOCK(fdp);
2612
fdrop(fp, td);
2613
}
2614
}
2615
retry:
2616
if (fdtol->fdl_refcount == 1) {
2617
if (fdp->fd_holdleaderscount > 0 &&
2618
(p->p_leader->p_flag & P_ADVLOCK) != 0) {
2619
/*
2620
* close() or kern_dup() has cleared a reference
2621
* in a shared file descriptor table.
2622
*/
2623
fdp->fd_holdleaderswakeup = 1;
2624
sx_sleep(&fdp->fd_holdleaderscount,
2625
FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
2626
goto retry;
2627
}
2628
if (fdtol->fdl_holdcount > 0) {
2629
/*
2630
* Ensure that fdtol->fdl_leader remains
2631
* valid in closef().
2632
*/
2633
fdtol->fdl_wakeup = 1;
2634
sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
2635
"fdlhold", 0);
2636
goto retry;
2637
}
2638
}
2639
fdtol->fdl_refcount--;
2640
if (fdtol->fdl_refcount == 0 &&
2641
fdtol->fdl_holdcount == 0) {
2642
fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
2643
fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
2644
} else
2645
fdtol = NULL;
2646
p->p_fdtol = NULL;
2647
FILEDESC_XUNLOCK(fdp);
2648
if (fdtol != NULL)
2649
free(fdtol, M_FILEDESC_TO_LEADER);
2650
}
2651
2652
/*
2653
* Release a filedesc structure.
2654
*/
2655
static void
2656
fdescfree_fds(struct thread *td, struct filedesc *fdp)
2657
{
2658
struct filedesc0 *fdp0;
2659
struct freetable *ft, *tft;
2660
struct filedescent *fde;
2661
struct file *fp;
2662
int i;
2663
2664
KASSERT(refcount_load(&fdp->fd_refcnt) == 0,
2665
("%s: fd table %p carries references", __func__, fdp));
2666
2667
/*
2668
* Serialize with threads iterating over the table, if any.
2669
*/
2670
if (refcount_load(&fdp->fd_holdcnt) > 1) {
2671
FILEDESC_XLOCK(fdp);
2672
FILEDESC_XUNLOCK(fdp);
2673
}
2674
2675
FILEDESC_FOREACH_FDE(fdp, i, fde) {
2676
fp = fde->fde_file;
2677
fdefree_last(fde);
2678
(void) closef(fp, td);
2679
}
2680
2681
if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
2682
free(fdp->fd_map, M_FILEDESC);
2683
if (fdp->fd_nfiles > NDFILE)
2684
free(fdp->fd_files, M_FILEDESC);
2685
2686
fdp0 = (struct filedesc0 *)fdp;
2687
SLIST_FOREACH_SAFE(ft, &fdp0->fd_free, ft_next, tft)
2688
free(ft->ft_table, M_FILEDESC);
2689
2690
fddrop(fdp);
2691
}
2692
2693
void
2694
fdescfree(struct thread *td)
2695
{
2696
struct proc *p;
2697
struct filedesc *fdp;
2698
2699
p = td->td_proc;
2700
fdp = p->p_fd;
2701
MPASS(fdp != NULL);
2702
2703
#ifdef RACCT
2704
if (RACCT_ENABLED())
2705
racct_set_unlocked(p, RACCT_NOFILE, 0);
2706
#endif
2707
2708
if (p->p_fdtol != NULL)
2709
fdclearlocks(td);
2710
2711
/*
2712
* Check fdhold for an explanation.
2713
*/
2714
atomic_store_ptr(&p->p_fd, NULL);
2715
atomic_thread_fence_seq_cst();
2716
PROC_WAIT_UNLOCKED(p);
2717
2718
if (refcount_release(&fdp->fd_refcnt) == 0)
2719
return;
2720
2721
fdescfree_fds(td, fdp);
2722
}
2723
2724
void
2725
pdescfree(struct thread *td)
2726
{
2727
struct proc *p;
2728
struct pwddesc *pdp;
2729
2730
p = td->td_proc;
2731
pdp = p->p_pd;
2732
MPASS(pdp != NULL);
2733
2734
/*
2735
* Check pdhold for an explanation.
2736
*/
2737
atomic_store_ptr(&p->p_pd, NULL);
2738
atomic_thread_fence_seq_cst();
2739
PROC_WAIT_UNLOCKED(p);
2740
2741
pddrop(pdp);
2742
}
2743
2744
/*
2745
* For setugid programs, we don't want to people to use that setugidness
2746
* to generate error messages which write to a file which otherwise would
2747
* otherwise be off-limits to the process. We check for filesystems where
2748
* the vnode can change out from under us after execve (like [lin]procfs).
2749
*
2750
* Since fdsetugidsafety calls this only for fd 0, 1 and 2, this check is
2751
* sufficient. We also don't check for setugidness since we know we are.
2752
*/
2753
static bool
2754
is_unsafe(struct file *fp)
2755
{
2756
struct vnode *vp;
2757
2758
if (fp->f_type != DTYPE_VNODE)
2759
return (false);
2760
2761
vp = fp->f_vnode;
2762
return ((vp->v_vflag & VV_PROCDEP) != 0);
2763
}
2764
2765
/*
2766
* Make this setguid thing safe, if at all possible.
2767
*/
2768
void
2769
fdsetugidsafety(struct thread *td)
2770
{
2771
struct filedesc *fdp;
2772
struct file *fp;
2773
int i;
2774
2775
fdp = td->td_proc->p_fd;
2776
KASSERT(refcount_load(&fdp->fd_refcnt) == 1,
2777
("the fdtable should not be shared"));
2778
MPASS(fdp->fd_nfiles >= 3);
2779
for (i = 0; i <= 2; i++) {
2780
fp = fdp->fd_ofiles[i].fde_file;
2781
if (fp != NULL && is_unsafe(fp)) {
2782
FILEDESC_XLOCK(fdp);
2783
knote_fdclose(td, i);
2784
/*
2785
* NULL-out descriptor prior to close to avoid
2786
* a race while close blocks.
2787
*/
2788
fdfree(fdp, i);
2789
FILEDESC_XUNLOCK(fdp);
2790
(void) closef(fp, td);
2791
}
2792
}
2793
}
2794
2795
/*
2796
* If a specific file object occupies a specific file descriptor, close the
2797
* file descriptor entry and drop a reference on the file object. This is a
2798
* convenience function to handle a subsequent error in a function that calls
2799
* falloc() that handles the race that another thread might have closed the
2800
* file descriptor out from under the thread creating the file object.
2801
*/
2802
void
2803
fdclose(struct thread *td, struct file *fp, int idx)
2804
{
2805
struct filedesc *fdp = td->td_proc->p_fd;
2806
2807
FILEDESC_XLOCK(fdp);
2808
if (fdp->fd_ofiles[idx].fde_file == fp) {
2809
fdfree(fdp, idx);
2810
FILEDESC_XUNLOCK(fdp);
2811
fdrop(fp, td);
2812
} else
2813
FILEDESC_XUNLOCK(fdp);
2814
}
2815
2816
/*
2817
* Close any files on exec?
2818
*/
2819
void
2820
fdcloseexec(struct thread *td)
2821
{
2822
struct filedesc *fdp;
2823
struct filedescent *fde;
2824
struct file *fp;
2825
int i;
2826
2827
fdp = td->td_proc->p_fd;
2828
KASSERT(refcount_load(&fdp->fd_refcnt) == 1,
2829
("the fdtable should not be shared"));
2830
FILEDESC_FOREACH_FDE(fdp, i, fde) {
2831
fp = fde->fde_file;
2832
if (fp->f_type == DTYPE_MQUEUE ||
2833
(fde->fde_flags & UF_EXCLOSE)) {
2834
FILEDESC_XLOCK(fdp);
2835
fdfree(fdp, i);
2836
(void) closefp(fdp, i, fp, td, false, false);
2837
FILEDESC_UNLOCK_ASSERT(fdp);
2838
} else if (fde->fde_flags & UF_FOCLOSE) {
2839
/*
2840
* https://austingroupbugs.net/view.php?id=1851
2841
* FD_CLOFORK should not be preserved across exec
2842
*/
2843
fde->fde_flags &= ~UF_FOCLOSE;
2844
}
2845
}
2846
}
2847
2848
/*
2849
* It is unsafe for set[ug]id processes to be started with file
2850
* descriptors 0..2 closed, as these descriptors are given implicit
2851
* significance in the Standard C library. fdcheckstd() will create a
2852
* descriptor referencing /dev/null for each of stdin, stdout, and
2853
* stderr that is not already open.
2854
*/
2855
int
2856
fdcheckstd(struct thread *td)
2857
{
2858
struct filedesc *fdp;
2859
register_t save;
2860
int i, error, devnull;
2861
2862
fdp = td->td_proc->p_fd;
2863
KASSERT(refcount_load(&fdp->fd_refcnt) == 1,
2864
("the fdtable should not be shared"));
2865
MPASS(fdp->fd_nfiles >= 3);
2866
devnull = -1;
2867
for (i = 0; i <= 2; i++) {
2868
if (fdp->fd_ofiles[i].fde_file != NULL)
2869
continue;
2870
2871
save = td->td_retval[0];
2872
if (devnull != -1) {
2873
error = kern_dup(td, FDDUP_FIXED, 0, devnull, i);
2874
} else {
2875
error = kern_openat(td, AT_FDCWD, "/dev/null",
2876
UIO_SYSSPACE, O_RDWR, 0);
2877
if (error == 0) {
2878
devnull = td->td_retval[0];
2879
KASSERT(devnull == i, ("we didn't get our fd"));
2880
}
2881
}
2882
td->td_retval[0] = save;
2883
if (error != 0)
2884
return (error);
2885
}
2886
return (0);
2887
}
2888
2889
/*
2890
* Internal form of close. Decrement reference count on file structure.
2891
* Note: td may be NULL when closing a file that was being passed in a
2892
* message.
2893
*/
2894
int
2895
closef(struct file *fp, struct thread *td)
2896
{
2897
struct vnode *vp;
2898
struct flock lf;
2899
struct filedesc_to_leader *fdtol;
2900
struct filedesc *fdp;
2901
2902
MPASS(td != NULL);
2903
2904
/*
2905
* POSIX record locking dictates that any close releases ALL
2906
* locks owned by this process. This is handled by setting
2907
* a flag in the unlock to free ONLY locks obeying POSIX
2908
* semantics, and not to free BSD-style file locks.
2909
* If the descriptor was in a message, POSIX-style locks
2910
* aren't passed with the descriptor, and the thread pointer
2911
* will be NULL. Callers should be careful only to pass a
2912
* NULL thread pointer when there really is no owning
2913
* context that might have locks, or the locks will be
2914
* leaked.
2915
*/
2916
if (fp->f_type == DTYPE_VNODE) {
2917
vp = fp->f_vnode;
2918
if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
2919
lf.l_whence = SEEK_SET;
2920
lf.l_start = 0;
2921
lf.l_len = 0;
2922
lf.l_type = F_UNLCK;
2923
(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
2924
F_UNLCK, &lf, F_POSIX);
2925
}
2926
fdtol = td->td_proc->p_fdtol;
2927
if (fdtol != NULL) {
2928
/*
2929
* Handle special case where file descriptor table is
2930
* shared between multiple process leaders.
2931
*/
2932
fdp = td->td_proc->p_fd;
2933
FILEDESC_XLOCK(fdp);
2934
for (fdtol = fdtol->fdl_next;
2935
fdtol != td->td_proc->p_fdtol;
2936
fdtol = fdtol->fdl_next) {
2937
if ((fdtol->fdl_leader->p_flag &
2938
P_ADVLOCK) == 0)
2939
continue;
2940
fdtol->fdl_holdcount++;
2941
FILEDESC_XUNLOCK(fdp);
2942
lf.l_whence = SEEK_SET;
2943
lf.l_start = 0;
2944
lf.l_len = 0;
2945
lf.l_type = F_UNLCK;
2946
vp = fp->f_vnode;
2947
(void) VOP_ADVLOCK(vp,
2948
(caddr_t)fdtol->fdl_leader, F_UNLCK, &lf,
2949
F_POSIX);
2950
FILEDESC_XLOCK(fdp);
2951
fdtol->fdl_holdcount--;
2952
if (fdtol->fdl_holdcount == 0 &&
2953
fdtol->fdl_wakeup != 0) {
2954
fdtol->fdl_wakeup = 0;
2955
wakeup(fdtol);
2956
}
2957
}
2958
FILEDESC_XUNLOCK(fdp);
2959
}
2960
}
2961
return (fdrop_close(fp, td));
2962
}
2963
2964
/*
2965
* Hack for file descriptor passing code.
2966
*/
2967
void
2968
closef_nothread(struct file *fp)
2969
{
2970
2971
fdrop(fp, NULL);
2972
}
2973
2974
/*
2975
* Initialize the file pointer with the specified properties.
2976
*
2977
* The ops are set with release semantics to be certain that the flags, type,
2978
* and data are visible when ops is. This is to prevent ops methods from being
2979
* called with bad data.
2980
*/
2981
void
2982
finit(struct file *fp, u_int flag, short type, void *data,
2983
const struct fileops *ops)
2984
{
2985
fp->f_data = data;
2986
fp->f_flag = flag;
2987
fp->f_type = type;
2988
atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
2989
}
2990
2991
void
2992
finit_vnode(struct file *fp, u_int flag, void *data, const struct fileops *ops)
2993
{
2994
fp->f_seqcount[UIO_READ] = 1;
2995
fp->f_seqcount[UIO_WRITE] = 1;
2996
finit(fp, (flag & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE,
2997
data, ops);
2998
}
2999
3000
int
3001
fget_cap_noref(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
3002
struct file **fpp, struct filecaps *havecapsp)
3003
{
3004
struct filedescent *fde;
3005
int error;
3006
3007
FILEDESC_LOCK_ASSERT(fdp);
3008
3009
*fpp = NULL;
3010
fde = fdeget_noref(fdp, fd);
3011
if (fde == NULL) {
3012
error = EBADF;
3013
goto out;
3014
}
3015
3016
#ifdef CAPABILITIES
3017
error = cap_check(cap_rights_fde_inline(fde), needrightsp);
3018
if (error != 0)
3019
goto out;
3020
#endif
3021
3022
if (havecapsp != NULL)
3023
filecaps_copy(&fde->fde_caps, havecapsp, true);
3024
3025
*fpp = fde->fde_file;
3026
3027
error = 0;
3028
out:
3029
return (error);
3030
}
3031
3032
#ifdef CAPABILITIES
3033
int
3034
fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
3035
uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp)
3036
{
3037
struct filedesc *fdp = td->td_proc->p_fd;
3038
int error;
3039
struct file *fp;
3040
seqc_t seq;
3041
3042
*fpp = NULL;
3043
for (;;) {
3044
error = fget_unlocked_seq(td, fd, needrightsp, flagsp, &fp,
3045
&seq);
3046
if (error != 0)
3047
return (error);
3048
3049
if (havecapsp != NULL) {
3050
if (!filecaps_copy(&fdp->fd_ofiles[fd].fde_caps,
3051
havecapsp, false)) {
3052
fdrop(fp, td);
3053
goto get_locked;
3054
}
3055
}
3056
3057
if (!fd_modified(fdp, fd, seq))
3058
break;
3059
fdrop(fp, td);
3060
}
3061
3062
*fpp = fp;
3063
return (0);
3064
3065
get_locked:
3066
FILEDESC_SLOCK(fdp);
3067
error = fget_cap_noref(fdp, fd, needrightsp, fpp, havecapsp);
3068
if (error == 0 && !fhold(*fpp))
3069
error = EBADF;
3070
FILEDESC_SUNLOCK(fdp);
3071
return (error);
3072
}
3073
#else
3074
int
3075
fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
3076
uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp)
3077
{
3078
int error;
3079
error = fget_unlocked_flags(td, fd, needrightsp, flagsp, fpp);
3080
if (havecapsp != NULL && error == 0)
3081
filecaps_fill(havecapsp);
3082
3083
return (error);
3084
}
3085
#endif
3086
3087
int
3088
fget_remote(struct thread *td, struct proc *p, int fd, struct file **fpp)
3089
{
3090
struct filedesc *fdp;
3091
struct file *fp;
3092
int error;
3093
3094
if (p == td->td_proc) /* curproc */
3095
return (fget_unlocked(td, fd, &cap_no_rights, fpp));
3096
3097
PROC_LOCK(p);
3098
fdp = fdhold(p);
3099
PROC_UNLOCK(p);
3100
if (fdp == NULL)
3101
return (ENOENT);
3102
FILEDESC_SLOCK(fdp);
3103
if (refcount_load(&fdp->fd_refcnt) != 0) {
3104
fp = fget_noref(fdp, fd);
3105
if (fp != NULL && fhold(fp)) {
3106
*fpp = fp;
3107
error = 0;
3108
} else {
3109
error = EBADF;
3110
}
3111
} else {
3112
error = ENOENT;
3113
}
3114
FILEDESC_SUNLOCK(fdp);
3115
fddrop(fdp);
3116
return (error);
3117
}
3118
3119
int
3120
fget_remote_foreach(struct thread *td, struct proc *p,
3121
int (*fn)(struct proc *, int, struct file *, void *), void *arg)
3122
{
3123
struct filedesc *fdp;
3124
struct fdescenttbl *fdt;
3125
struct file *fp;
3126
int error, error1, fd, highfd;
3127
3128
error = 0;
3129
PROC_LOCK(p);
3130
fdp = fdhold(p);
3131
PROC_UNLOCK(p);
3132
if (fdp == NULL)
3133
return (ENOENT);
3134
3135
FILEDESC_SLOCK(fdp);
3136
if (refcount_load(&fdp->fd_refcnt) != 0) {
3137
fdt = atomic_load_ptr(&fdp->fd_files);
3138
highfd = fdt->fdt_nfiles - 1;
3139
FILEDESC_SUNLOCK(fdp);
3140
} else {
3141
error = ENOENT;
3142
FILEDESC_SUNLOCK(fdp);
3143
goto out;
3144
}
3145
3146
for (fd = 0; fd <= highfd; fd++) {
3147
error1 = fget_remote(td, p, fd, &fp);
3148
if (error1 != 0)
3149
continue;
3150
error = fn(p, fd, fp, arg);
3151
fdrop(fp, td);
3152
if (error != 0)
3153
break;
3154
}
3155
out:
3156
fddrop(fdp);
3157
return (error);
3158
}
3159
3160
#ifdef CAPABILITIES
3161
int
3162
fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp)
3163
{
3164
const struct filedescent *fde;
3165
const struct fdescenttbl *fdt;
3166
struct filedesc *fdp;
3167
struct file *fp;
3168
struct vnode *vp;
3169
const cap_rights_t *haverights;
3170
cap_rights_t rights;
3171
seqc_t seq;
3172
int fd, flags;
3173
3174
VFS_SMR_ASSERT_ENTERED();
3175
3176
fd = ndp->ni_dirfd;
3177
rights = *ndp->ni_rightsneeded;
3178
cap_rights_set_one(&rights, CAP_LOOKUP);
3179
3180
fdp = curproc->p_fd;
3181
fdt = fdp->fd_files;
3182
if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
3183
return (EBADF);
3184
seq = seqc_read_notmodify(fd_seqc(fdt, fd));
3185
fde = &fdt->fdt_ofiles[fd];
3186
haverights = cap_rights_fde_inline(fde);
3187
fp = fde->fde_file;
3188
if (__predict_false(fp == NULL))
3189
return (EAGAIN);
3190
if (__predict_false(cap_check_inline_transient(haverights, &rights)))
3191
return (EAGAIN);
3192
flags = fp->f_flag & FSEARCH;
3193
flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ?
3194
O_RESOLVE_BENEATH : 0;
3195
vp = fp->f_vnode;
3196
if (__predict_false(vp == NULL)) {
3197
return (EAGAIN);
3198
}
3199
if (!filecaps_copy(&fde->fde_caps, &ndp->ni_filecaps, false)) {
3200
return (EAGAIN);
3201
}
3202
/*
3203
* Use an acquire barrier to force re-reading of fdt so it is
3204
* refreshed for verification.
3205
*/
3206
atomic_thread_fence_acq();
3207
fdt = fdp->fd_files;
3208
if (__predict_false(!seqc_consistent_no_fence(fd_seqc(fdt, fd), seq)))
3209
return (EAGAIN);
3210
/*
3211
* If file descriptor doesn't have all rights,
3212
* all lookups relative to it must also be
3213
* strictly relative.
3214
*
3215
* Not yet supported by fast path.
3216
*/
3217
CAP_ALL(&rights);
3218
if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, &rights) ||
3219
ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
3220
ndp->ni_filecaps.fc_nioctls != -1) {
3221
#ifdef notyet
3222
ndp->ni_lcf |= NI_LCF_STRICTREL;
3223
#else
3224
return (EAGAIN);
3225
#endif
3226
}
3227
*vpp = vp;
3228
*flagsp = flags;
3229
return (0);
3230
}
3231
#else
3232
int
3233
fgetvp_lookup_smr(struct nameidata *ndp, struct vnode **vpp, int *flagsp)
3234
{
3235
const struct filedescent *fde;
3236
const struct fdescenttbl *fdt;
3237
struct filedesc *fdp;
3238
struct file *fp;
3239
struct vnode *vp;
3240
int fd, flags;
3241
3242
VFS_SMR_ASSERT_ENTERED();
3243
3244
fd = ndp->ni_dirfd;
3245
fdp = curproc->p_fd;
3246
fdt = fdp->fd_files;
3247
if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
3248
return (EBADF);
3249
fde = &fdt->fdt_ofiles[fd];
3250
fp = fde->fde_file;
3251
if (__predict_false(fp == NULL))
3252
return (EAGAIN);
3253
flags = fp->f_flag & FSEARCH;
3254
flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ?
3255
O_RESOLVE_BENEATH : 0;
3256
vp = fp->f_vnode;
3257
if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
3258
return (EAGAIN);
3259
}
3260
/*
3261
* Use an acquire barrier to force re-reading of fdt so it is
3262
* refreshed for verification.
3263
*/
3264
atomic_thread_fence_acq();
3265
fdt = fdp->fd_files;
3266
if (__predict_false(fp != fdt->fdt_ofiles[fd].fde_file))
3267
return (EAGAIN);
3268
filecaps_fill(&ndp->ni_filecaps);
3269
*vpp = vp;
3270
*flagsp = flags;
3271
return (0);
3272
}
3273
#endif
3274
3275
int
3276
fgetvp_lookup(struct nameidata *ndp, struct vnode **vpp)
3277
{
3278
struct thread *td;
3279
struct file *fp;
3280
struct vnode *vp;
3281
struct componentname *cnp;
3282
cap_rights_t rights;
3283
int error;
3284
uint8_t flags;
3285
3286
td = curthread;
3287
rights = *ndp->ni_rightsneeded;
3288
cap_rights_set_one(&rights, CAP_LOOKUP);
3289
cnp = &ndp->ni_cnd;
3290
3291
error = fget_cap(td, ndp->ni_dirfd, &rights, &flags, &fp,
3292
&ndp->ni_filecaps);
3293
if (__predict_false(error != 0))
3294
return (error);
3295
if (__predict_false(fp->f_ops == &badfileops)) {
3296
error = EBADF;
3297
goto out_free;
3298
}
3299
vp = fp->f_vnode;
3300
if (__predict_false(vp == NULL)) {
3301
error = ENOTDIR;
3302
goto out_free;
3303
}
3304
vrefact(vp);
3305
/*
3306
* XXX does not check for VDIR, handled by namei_setup
3307
*/
3308
if ((fp->f_flag & FSEARCH) != 0)
3309
cnp->cn_flags |= NOEXECCHECK;
3310
if ((flags & UF_RESOLVE_BENEATH) != 0) {
3311
cnp->cn_flags |= RBENEATH;
3312
ndp->ni_resflags |= NIRES_BENEATH;
3313
}
3314
fdrop(fp, td);
3315
3316
#ifdef CAPABILITIES
3317
/*
3318
* If file descriptor doesn't have all rights,
3319
* all lookups relative to it must also be
3320
* strictly relative.
3321
*/
3322
CAP_ALL(&rights);
3323
if (!cap_rights_contains(&ndp->ni_filecaps.fc_rights, &rights) ||
3324
ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
3325
ndp->ni_filecaps.fc_nioctls != -1) {
3326
ndp->ni_lcf |= NI_LCF_STRICTREL;
3327
ndp->ni_resflags |= NIRES_STRICTREL;
3328
}
3329
#endif
3330
3331
/*
3332
* TODO: avoid copying ioctl caps if it can be helped to begin with
3333
*/
3334
if ((cnp->cn_flags & WANTIOCTLCAPS) == 0)
3335
filecaps_free_ioctl(&ndp->ni_filecaps);
3336
3337
*vpp = vp;
3338
return (0);
3339
3340
out_free:
3341
filecaps_free(&ndp->ni_filecaps);
3342
fdrop(fp, td);
3343
return (error);
3344
}
3345
3346
/*
3347
* Fetch the descriptor locklessly.
3348
*
3349
* We avoid fdrop() races by never raising a refcount above 0. To accomplish
3350
* this we have to use a cmpset loop rather than an atomic_add. The descriptor
3351
* must be re-verified once we acquire a reference to be certain that the
3352
* identity is still correct and we did not lose a race due to preemption.
3353
*
3354
* Force a reload of fdt when looping. Another thread could reallocate
3355
* the table before this fd was closed, so it is possible that there is
3356
* a stale fp pointer in cached version.
3357
*/
3358
#ifdef CAPABILITIES
3359
static int
3360
fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
3361
uint8_t *flagsp, struct file **fpp, seqc_t *seqp)
3362
{
3363
struct filedesc *fdp;
3364
const struct filedescent *fde;
3365
const struct fdescenttbl *fdt;
3366
struct file *fp;
3367
seqc_t seq;
3368
cap_rights_t haverights;
3369
int error;
3370
uint8_t flags;
3371
3372
fdp = td->td_proc->p_fd;
3373
fdt = fdp->fd_files;
3374
if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
3375
return (EBADF);
3376
3377
for (;;) {
3378
seq = seqc_read_notmodify(fd_seqc(fdt, fd));
3379
fde = &fdt->fdt_ofiles[fd];
3380
haverights = *cap_rights_fde_inline(fde);
3381
fp = fde->fde_file;
3382
flags = fde->fde_flags;
3383
if (__predict_false(fp == NULL)) {
3384
if (seqc_consistent(fd_seqc(fdt, fd), seq))
3385
return (EBADF);
3386
fdt = atomic_load_ptr(&fdp->fd_files);
3387
continue;
3388
}
3389
error = cap_check_inline(&haverights, needrightsp);
3390
if (__predict_false(error != 0)) {
3391
if (seqc_consistent(fd_seqc(fdt, fd), seq))
3392
return (error);
3393
fdt = atomic_load_ptr(&fdp->fd_files);
3394
continue;
3395
}
3396
if (__predict_false(!refcount_acquire_if_not_zero(&fp->f_count))) {
3397
fdt = atomic_load_ptr(&fdp->fd_files);
3398
continue;
3399
}
3400
/*
3401
* Use an acquire barrier to force re-reading of fdt so it is
3402
* refreshed for verification.
3403
*/
3404
atomic_thread_fence_acq();
3405
fdt = fdp->fd_files;
3406
if (seqc_consistent_no_fence(fd_seqc(fdt, fd), seq))
3407
break;
3408
fdrop(fp, td);
3409
}
3410
*fpp = fp;
3411
if (flagsp != NULL)
3412
*flagsp = flags;
3413
if (seqp != NULL)
3414
*seqp = seq;
3415
return (0);
3416
}
3417
#else
3418
static int
3419
fget_unlocked_seq(struct thread *td, int fd, const cap_rights_t *needrightsp,
3420
uint8_t *flagsp, struct file **fpp, seqc_t *seqp __unused)
3421
{
3422
struct filedesc *fdp;
3423
const struct fdescenttbl *fdt;
3424
struct file *fp;
3425
uint8_t flags;
3426
3427
fdp = td->td_proc->p_fd;
3428
fdt = fdp->fd_files;
3429
if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
3430
return (EBADF);
3431
3432
for (;;) {
3433
fp = fdt->fdt_ofiles[fd].fde_file;
3434
flags = fdt->fdt_ofiles[fd].fde_flags;
3435
if (__predict_false(fp == NULL))
3436
return (EBADF);
3437
if (__predict_false(!refcount_acquire_if_not_zero(&fp->f_count))) {
3438
fdt = atomic_load_ptr(&fdp->fd_files);
3439
continue;
3440
}
3441
/*
3442
* Use an acquire barrier to force re-reading of fdt so it is
3443
* refreshed for verification.
3444
*/
3445
atomic_thread_fence_acq();
3446
fdt = fdp->fd_files;
3447
if (__predict_true(fp == fdt->fdt_ofiles[fd].fde_file))
3448
break;
3449
fdrop(fp, td);
3450
}
3451
if (flagsp != NULL)
3452
*flagsp = flags;
3453
*fpp = fp;
3454
return (0);
3455
}
3456
#endif
3457
3458
/*
3459
* See the comments in fget_unlocked_seq for an explanation of how this works.
3460
*
3461
* This is a simplified variant which bails out to the aforementioned routine
3462
* if anything goes wrong. In practice this only happens when userspace is
3463
* racing with itself.
3464
*/
3465
int
3466
fget_unlocked_flags(struct thread *td, int fd, const cap_rights_t *needrightsp,
3467
uint8_t *flagsp, struct file **fpp)
3468
{
3469
struct filedesc *fdp;
3470
#ifdef CAPABILITIES
3471
const struct filedescent *fde;
3472
#endif
3473
const struct fdescenttbl *fdt;
3474
struct file *fp;
3475
#ifdef CAPABILITIES
3476
seqc_t seq;
3477
const cap_rights_t *haverights;
3478
#endif
3479
uint8_t flags;
3480
3481
fdp = td->td_proc->p_fd;
3482
fdt = fdp->fd_files;
3483
if (__predict_false((u_int)fd >= fdt->fdt_nfiles)) {
3484
*fpp = NULL;
3485
return (EBADF);
3486
}
3487
#ifdef CAPABILITIES
3488
seq = seqc_read_notmodify(fd_seqc(fdt, fd));
3489
fde = &fdt->fdt_ofiles[fd];
3490
haverights = cap_rights_fde_inline(fde);
3491
fp = fde->fde_file;
3492
flags = fde->fde_flags;
3493
#else
3494
fp = fdt->fdt_ofiles[fd].fde_file;
3495
flags = fdt->fdt_ofiles[fd].fde_flags;
3496
#endif
3497
if (__predict_false(fp == NULL))
3498
goto out_fallback;
3499
#ifdef CAPABILITIES
3500
if (__predict_false(cap_check_inline_transient(haverights, needrightsp)))
3501
goto out_fallback;
3502
#endif
3503
if (__predict_false(!refcount_acquire_if_not_zero(&fp->f_count)))
3504
goto out_fallback;
3505
3506
/*
3507
* Use an acquire barrier to force re-reading of fdt so it is
3508
* refreshed for verification.
3509
*/
3510
atomic_thread_fence_acq();
3511
fdt = fdp->fd_files;
3512
#ifdef CAPABILITIES
3513
if (__predict_false(!seqc_consistent_no_fence(fd_seqc(fdt, fd), seq)))
3514
#else
3515
if (__predict_false(fp != fdt->fdt_ofiles[fd].fde_file))
3516
#endif
3517
goto out_fdrop;
3518
*fpp = fp;
3519
if (flagsp != NULL)
3520
*flagsp = flags;
3521
return (0);
3522
out_fdrop:
3523
fdrop(fp, td);
3524
out_fallback:
3525
*fpp = NULL;
3526
return (fget_unlocked_seq(td, fd, needrightsp, flagsp, fpp, NULL));
3527
}
3528
3529
int
3530
fget_unlocked(struct thread *td, int fd, const cap_rights_t *needrightsp,
3531
struct file **fpp)
3532
{
3533
return (fget_unlocked_flags(td, fd, needrightsp, NULL, fpp));
3534
}
3535
3536
/*
3537
* Translate fd -> file when the caller guarantees the file descriptor table
3538
* can't be changed by others.
3539
*
3540
* Note this does not mean the file object itself is only visible to the caller,
3541
* merely that it wont disappear without having to be referenced.
3542
*
3543
* Must be paired with fput_only_user.
3544
*/
3545
#ifdef CAPABILITIES
3546
int
3547
fget_only_user(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
3548
struct file **fpp)
3549
{
3550
const struct filedescent *fde;
3551
const struct fdescenttbl *fdt;
3552
const cap_rights_t *haverights;
3553
struct file *fp;
3554
int error;
3555
3556
MPASS(FILEDESC_IS_ONLY_USER(fdp));
3557
3558
*fpp = NULL;
3559
if (__predict_false(fd >= fdp->fd_nfiles))
3560
return (EBADF);
3561
3562
fdt = fdp->fd_files;
3563
fde = &fdt->fdt_ofiles[fd];
3564
fp = fde->fde_file;
3565
if (__predict_false(fp == NULL))
3566
return (EBADF);
3567
MPASS(refcount_load(&fp->f_count) > 0);
3568
haverights = cap_rights_fde_inline(fde);
3569
error = cap_check_inline(haverights, needrightsp);
3570
if (__predict_false(error != 0))
3571
return (error);
3572
*fpp = fp;
3573
return (0);
3574
}
3575
#else
3576
int
3577
fget_only_user(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
3578
struct file **fpp)
3579
{
3580
struct file *fp;
3581
3582
MPASS(FILEDESC_IS_ONLY_USER(fdp));
3583
3584
*fpp = NULL;
3585
if (__predict_false(fd >= fdp->fd_nfiles))
3586
return (EBADF);
3587
3588
fp = fdp->fd_ofiles[fd].fde_file;
3589
if (__predict_false(fp == NULL))
3590
return (EBADF);
3591
3592
MPASS(refcount_load(&fp->f_count) > 0);
3593
*fpp = fp;
3594
return (0);
3595
}
3596
#endif
3597
3598
/*
3599
* Extract the file pointer associated with the specified descriptor for the
3600
* current user process.
3601
*
3602
* If the descriptor doesn't exist or doesn't match 'flags', EBADF is
3603
* returned.
3604
*
3605
* File's rights will be checked against the capability rights mask.
3606
*
3607
* If an error occurred the non-zero error is returned and *fpp is set to
3608
* NULL. Otherwise *fpp is held and set and zero is returned. Caller is
3609
* responsible for fdrop().
3610
*/
3611
static __inline int
3612
_fget(struct thread *td, int fd, struct file **fpp, int flags,
3613
const cap_rights_t *needrightsp)
3614
{
3615
struct file *fp;
3616
int error;
3617
3618
*fpp = NULL;
3619
error = fget_unlocked(td, fd, needrightsp, &fp);
3620
if (__predict_false(error != 0))
3621
return (error);
3622
if (__predict_false(fp->f_ops == &badfileops)) {
3623
fdrop(fp, td);
3624
return (EBADF);
3625
}
3626
3627
/*
3628
* FREAD and FWRITE failure return EBADF as per POSIX.
3629
*/
3630
error = 0;
3631
switch (flags) {
3632
case FREAD:
3633
case FWRITE:
3634
if ((fp->f_flag & flags) == 0)
3635
error = EBADF;
3636
break;
3637
case FEXEC:
3638
if (fp->f_ops != &path_fileops &&
3639
((fp->f_flag & (FREAD | FEXEC)) == 0 ||
3640
(fp->f_flag & FWRITE) != 0))
3641
error = EBADF;
3642
break;
3643
case 0:
3644
break;
3645
default:
3646
KASSERT(0, ("wrong flags"));
3647
}
3648
3649
if (error != 0) {
3650
fdrop(fp, td);
3651
return (error);
3652
}
3653
3654
*fpp = fp;
3655
return (0);
3656
}
3657
3658
int
3659
fget(struct thread *td, int fd, const cap_rights_t *rightsp, struct file **fpp)
3660
{
3661
3662
return (_fget(td, fd, fpp, 0, rightsp));
3663
}
3664
3665
int
3666
fget_mmap(struct thread *td, int fd, const cap_rights_t *rightsp,
3667
vm_prot_t *maxprotp, struct file **fpp)
3668
{
3669
int error;
3670
#ifndef CAPABILITIES
3671
error = _fget(td, fd, fpp, 0, rightsp);
3672
if (maxprotp != NULL)
3673
*maxprotp = VM_PROT_ALL;
3674
return (error);
3675
#else
3676
cap_rights_t fdrights;
3677
struct filedesc *fdp;
3678
struct file *fp;
3679
seqc_t seq;
3680
3681
*fpp = NULL;
3682
fdp = td->td_proc->p_fd;
3683
MPASS(cap_rights_is_set(rightsp, CAP_MMAP));
3684
for (;;) {
3685
error = fget_unlocked_seq(td, fd, rightsp, NULL, &fp, &seq);
3686
if (__predict_false(error != 0))
3687
return (error);
3688
if (__predict_false(fp->f_ops == &badfileops)) {
3689
fdrop(fp, td);
3690
return (EBADF);
3691
}
3692
if (maxprotp != NULL)
3693
fdrights = *cap_rights(fdp, fd);
3694
if (!fd_modified(fdp, fd, seq))
3695
break;
3696
fdrop(fp, td);
3697
}
3698
3699
/*
3700
* If requested, convert capability rights to access flags.
3701
*/
3702
if (maxprotp != NULL)
3703
*maxprotp = cap_rights_to_vmprot(&fdrights);
3704
*fpp = fp;
3705
return (0);
3706
#endif
3707
}
3708
3709
int
3710
fget_read(struct thread *td, int fd, const cap_rights_t *rightsp,
3711
struct file **fpp)
3712
{
3713
3714
return (_fget(td, fd, fpp, FREAD, rightsp));
3715
}
3716
3717
int
3718
fget_write(struct thread *td, int fd, const cap_rights_t *rightsp,
3719
struct file **fpp)
3720
{
3721
3722
return (_fget(td, fd, fpp, FWRITE, rightsp));
3723
}
3724
3725
int
3726
fget_fcntl(struct thread *td, int fd, const cap_rights_t *rightsp,
3727
int needfcntl, struct file **fpp)
3728
{
3729
#ifndef CAPABILITIES
3730
return (fget_unlocked(td, fd, rightsp, fpp));
3731
#else
3732
struct filedesc *fdp = td->td_proc->p_fd;
3733
struct file *fp;
3734
int error;
3735
seqc_t seq;
3736
3737
*fpp = NULL;
3738
MPASS(cap_rights_is_set(rightsp, CAP_FCNTL));
3739
for (;;) {
3740
error = fget_unlocked_seq(td, fd, rightsp, NULL, &fp, &seq);
3741
if (error != 0)
3742
return (error);
3743
error = cap_fcntl_check(fdp, fd, needfcntl);
3744
if (!fd_modified(fdp, fd, seq))
3745
break;
3746
fdrop(fp, td);
3747
}
3748
if (error != 0) {
3749
fdrop(fp, td);
3750
return (error);
3751
}
3752
*fpp = fp;
3753
return (0);
3754
#endif
3755
}
3756
3757
/*
3758
* Like fget() but loads the underlying vnode, or returns an error if the
3759
* descriptor does not represent a vnode. Note that pipes use vnodes but
3760
* never have VM objects. The returned vnode will be vref()'d.
3761
*
3762
* XXX: what about the unused flags ?
3763
*/
3764
static __inline int
3765
_fgetvp(struct thread *td, int fd, int flags, const cap_rights_t *needrightsp,
3766
struct vnode **vpp)
3767
{
3768
struct file *fp;
3769
int error;
3770
3771
*vpp = NULL;
3772
error = _fget(td, fd, &fp, flags, needrightsp);
3773
if (error != 0)
3774
return (error);
3775
if (fp->f_vnode == NULL) {
3776
error = EINVAL;
3777
} else {
3778
*vpp = fp->f_vnode;
3779
vrefact(*vpp);
3780
}
3781
fdrop(fp, td);
3782
3783
return (error);
3784
}
3785
3786
int
3787
fgetvp(struct thread *td, int fd, const cap_rights_t *rightsp,
3788
struct vnode **vpp)
3789
{
3790
3791
return (_fgetvp(td, fd, 0, rightsp, vpp));
3792
}
3793
3794
int
3795
fgetvp_rights(struct thread *td, int fd, const cap_rights_t *needrightsp,
3796
struct filecaps *havecaps, struct vnode **vpp)
3797
{
3798
struct filecaps caps;
3799
struct file *fp;
3800
int error;
3801
3802
error = fget_cap(td, fd, needrightsp, NULL, &fp, &caps);
3803
if (error != 0)
3804
return (error);
3805
if (fp->f_ops == &badfileops) {
3806
error = EBADF;
3807
goto out;
3808
}
3809
if (fp->f_vnode == NULL) {
3810
error = EINVAL;
3811
goto out;
3812
}
3813
3814
*havecaps = caps;
3815
*vpp = fp->f_vnode;
3816
vrefact(*vpp);
3817
fdrop(fp, td);
3818
3819
return (0);
3820
out:
3821
filecaps_free(&caps);
3822
fdrop(fp, td);
3823
return (error);
3824
}
3825
3826
int
3827
fgetvp_read(struct thread *td, int fd, const cap_rights_t *rightsp,
3828
struct vnode **vpp)
3829
{
3830
3831
return (_fgetvp(td, fd, FREAD, rightsp, vpp));
3832
}
3833
3834
int
3835
fgetvp_exec(struct thread *td, int fd, const cap_rights_t *rightsp,
3836
struct vnode **vpp)
3837
{
3838
3839
return (_fgetvp(td, fd, FEXEC, rightsp, vpp));
3840
}
3841
3842
#ifdef notyet
3843
int
3844
fgetvp_write(struct thread *td, int fd, const cap_rights_t *rightsp,
3845
struct vnode **vpp)
3846
{
3847
3848
return (_fgetvp(td, fd, FWRITE, rightsp, vpp));
3849
}
3850
#endif
3851
3852
/*
3853
* Handle the last reference to a file being closed.
3854
*
3855
* Without the noinline attribute clang keeps inlining the func thorough this
3856
* file when fdrop is used.
3857
*/
3858
int __noinline
3859
_fdrop(struct file *fp, struct thread *td)
3860
{
3861
int error;
3862
3863
KASSERT(refcount_load(&fp->f_count) == 0,
3864
("fdrop: fp %p count %d", fp, refcount_load(&fp->f_count)));
3865
3866
error = fo_close(fp, td);
3867
atomic_subtract_int(&openfiles, 1);
3868
crfree(fp->f_cred);
3869
free(fp->f_advice, M_FADVISE);
3870
uma_zfree(file_zone, fp);
3871
3872
return (error);
3873
}
3874
3875
/*
3876
* Apply an advisory lock on a file descriptor.
3877
*
3878
* Just attempt to get a record lock of the requested type on the entire file
3879
* (l_whence = SEEK_SET, l_start = 0, l_len = 0).
3880
*/
3881
#ifndef _SYS_SYSPROTO_H_
3882
struct flock_args {
3883
int fd;
3884
int how;
3885
};
3886
#endif
3887
/* ARGSUSED */
3888
int
3889
sys_flock(struct thread *td, struct flock_args *uap)
3890
{
3891
struct file *fp;
3892
struct vnode *vp;
3893
struct flock lf;
3894
int error;
3895
3896
error = fget(td, uap->fd, &cap_flock_rights, &fp);
3897
if (error != 0)
3898
return (error);
3899
error = EOPNOTSUPP;
3900
if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
3901
goto done;
3902
}
3903
if (fp->f_ops == &path_fileops) {
3904
goto done;
3905
}
3906
3907
error = 0;
3908
vp = fp->f_vnode;
3909
lf.l_whence = SEEK_SET;
3910
lf.l_start = 0;
3911
lf.l_len = 0;
3912
if (uap->how & LOCK_UN) {
3913
lf.l_type = F_UNLCK;
3914
atomic_clear_int(&fp->f_flag, FHASLOCK);
3915
error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
3916
goto done;
3917
}
3918
if (uap->how & LOCK_EX)
3919
lf.l_type = F_WRLCK;
3920
else if (uap->how & LOCK_SH)
3921
lf.l_type = F_RDLCK;
3922
else {
3923
error = EBADF;
3924
goto done;
3925
}
3926
atomic_set_int(&fp->f_flag, FHASLOCK);
3927
error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
3928
(uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
3929
done:
3930
fdrop(fp, td);
3931
return (error);
3932
}
3933
/*
3934
* Duplicate the specified descriptor to a free descriptor.
3935
*/
3936
int
3937
dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode,
3938
int openerror, int *indxp)
3939
{
3940
struct filedescent *newfde, *oldfde;
3941
struct file *fp;
3942
u_long *ioctls;
3943
int error, indx;
3944
3945
KASSERT(openerror == ENODEV || openerror == ENXIO,
3946
("unexpected error %d in %s", openerror, __func__));
3947
3948
/*
3949
* If the to-be-dup'd fd number is greater than the allowed number
3950
* of file descriptors, or the fd to be dup'd has already been
3951
* closed, then reject.
3952
*/
3953
FILEDESC_XLOCK(fdp);
3954
if ((fp = fget_noref(fdp, dfd)) == NULL) {
3955
FILEDESC_XUNLOCK(fdp);
3956
return (EBADF);
3957
}
3958
3959
error = fdalloc(td, 0, &indx);
3960
if (error != 0) {
3961
FILEDESC_XUNLOCK(fdp);
3962
return (error);
3963
}
3964
3965
/*
3966
* There are two cases of interest here.
3967
*
3968
* For ENODEV simply dup (dfd) to file descriptor (indx) and return.
3969
*
3970
* For ENXIO steal away the file structure from (dfd) and store it in
3971
* (indx). (dfd) is effectively closed by this operation.
3972
*/
3973
switch (openerror) {
3974
case ENODEV:
3975
/*
3976
* Check that the mode the file is being opened for is a
3977
* subset of the mode of the existing descriptor.
3978
*/
3979
if (((mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
3980
fdunused(fdp, indx);
3981
FILEDESC_XUNLOCK(fdp);
3982
return (EACCES);
3983
}
3984
if (!fhold(fp)) {
3985
fdunused(fdp, indx);
3986
FILEDESC_XUNLOCK(fdp);
3987
return (EBADF);
3988
}
3989
newfde = &fdp->fd_ofiles[indx];
3990
oldfde = &fdp->fd_ofiles[dfd];
3991
ioctls = filecaps_copy_prep(&oldfde->fde_caps);
3992
#ifdef CAPABILITIES
3993
seqc_write_begin(&newfde->fde_seqc);
3994
#endif
3995
fde_copy(oldfde, newfde);
3996
filecaps_copy_finish(&oldfde->fde_caps, &newfde->fde_caps,
3997
ioctls);
3998
#ifdef CAPABILITIES
3999
seqc_write_end(&newfde->fde_seqc);
4000
#endif
4001
break;
4002
case ENXIO:
4003
/*
4004
* Steal away the file pointer from dfd and stuff it into indx.
4005
*/
4006
newfde = &fdp->fd_ofiles[indx];
4007
oldfde = &fdp->fd_ofiles[dfd];
4008
#ifdef CAPABILITIES
4009
seqc_write_begin(&oldfde->fde_seqc);
4010
seqc_write_begin(&newfde->fde_seqc);
4011
#endif
4012
fde_copy(oldfde, newfde);
4013
oldfde->fde_file = NULL;
4014
fdunused(fdp, dfd);
4015
#ifdef CAPABILITIES
4016
seqc_write_end(&newfde->fde_seqc);
4017
seqc_write_end(&oldfde->fde_seqc);
4018
#endif
4019
break;
4020
}
4021
FILEDESC_XUNLOCK(fdp);
4022
*indxp = indx;
4023
return (0);
4024
}
4025
4026
/*
4027
* This sysctl determines if we will allow a process to chroot(2) if it
4028
* has a directory open:
4029
* 0: disallowed for all processes.
4030
* 1: allowed for processes that were not already chroot(2)'ed.
4031
* 2: allowed for all processes.
4032
*/
4033
4034
static int chroot_allow_open_directories = 1;
4035
4036
SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW,
4037
&chroot_allow_open_directories, 0,
4038
"Allow a process to chroot(2) if it has a directory open");
4039
4040
/*
4041
* Helper function for raised chroot(2) security function: Refuse if
4042
* any filedescriptors are open directories.
4043
*/
4044
static int
4045
chroot_refuse_vdir_fds(struct filedesc *fdp)
4046
{
4047
struct vnode *vp;
4048
struct file *fp;
4049
int i;
4050
4051
FILEDESC_LOCK_ASSERT(fdp);
4052
4053
FILEDESC_FOREACH_FP(fdp, i, fp) {
4054
if (fp->f_type == DTYPE_VNODE) {
4055
vp = fp->f_vnode;
4056
if (vp->v_type == VDIR)
4057
return (EPERM);
4058
}
4059
}
4060
return (0);
4061
}
4062
4063
static void
4064
pwd_fill(struct pwd *oldpwd, struct pwd *newpwd)
4065
{
4066
4067
if (newpwd->pwd_cdir == NULL && oldpwd->pwd_cdir != NULL) {
4068
vrefact(oldpwd->pwd_cdir);
4069
newpwd->pwd_cdir = oldpwd->pwd_cdir;
4070
}
4071
4072
if (newpwd->pwd_rdir == NULL && oldpwd->pwd_rdir != NULL) {
4073
vrefact(oldpwd->pwd_rdir);
4074
newpwd->pwd_rdir = oldpwd->pwd_rdir;
4075
}
4076
4077
if (newpwd->pwd_jdir == NULL && oldpwd->pwd_jdir != NULL) {
4078
vrefact(oldpwd->pwd_jdir);
4079
newpwd->pwd_jdir = oldpwd->pwd_jdir;
4080
}
4081
4082
if (newpwd->pwd_adir == NULL && oldpwd->pwd_adir != NULL) {
4083
vrefact(oldpwd->pwd_adir);
4084
newpwd->pwd_adir = oldpwd->pwd_adir;
4085
}
4086
}
4087
4088
struct pwd *
4089
pwd_hold_pwddesc(struct pwddesc *pdp)
4090
{
4091
struct pwd *pwd;
4092
4093
PWDDESC_ASSERT_XLOCKED(pdp);
4094
pwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
4095
if (pwd != NULL)
4096
refcount_acquire(&pwd->pwd_refcount);
4097
return (pwd);
4098
}
4099
4100
bool
4101
pwd_hold_smr(struct pwd *pwd)
4102
{
4103
4104
MPASS(pwd != NULL);
4105
if (__predict_true(refcount_acquire_if_not_zero(&pwd->pwd_refcount))) {
4106
return (true);
4107
}
4108
return (false);
4109
}
4110
4111
struct pwd *
4112
pwd_hold(struct thread *td)
4113
{
4114
struct pwddesc *pdp;
4115
struct pwd *pwd;
4116
4117
pdp = td->td_proc->p_pd;
4118
4119
vfs_smr_enter();
4120
pwd = vfs_smr_entered_load(&pdp->pd_pwd);
4121
if (pwd_hold_smr(pwd)) {
4122
vfs_smr_exit();
4123
return (pwd);
4124
}
4125
vfs_smr_exit();
4126
PWDDESC_XLOCK(pdp);
4127
pwd = pwd_hold_pwddesc(pdp);
4128
MPASS(pwd != NULL);
4129
PWDDESC_XUNLOCK(pdp);
4130
return (pwd);
4131
}
4132
4133
struct pwd *
4134
pwd_hold_proc(struct proc *p)
4135
{
4136
struct pwddesc *pdp;
4137
struct pwd *pwd;
4138
4139
PROC_ASSERT_HELD(p);
4140
PROC_LOCK(p);
4141
pdp = pdhold(p);
4142
MPASS(pdp != NULL);
4143
PROC_UNLOCK(p);
4144
4145
PWDDESC_XLOCK(pdp);
4146
pwd = pwd_hold_pwddesc(pdp);
4147
MPASS(pwd != NULL);
4148
PWDDESC_XUNLOCK(pdp);
4149
pddrop(pdp);
4150
return (pwd);
4151
}
4152
4153
static struct pwd *
4154
pwd_alloc(void)
4155
{
4156
struct pwd *pwd;
4157
4158
pwd = uma_zalloc_smr(pwd_zone, M_WAITOK);
4159
bzero(pwd, sizeof(*pwd));
4160
refcount_init(&pwd->pwd_refcount, 1);
4161
return (pwd);
4162
}
4163
4164
void
4165
pwd_drop(struct pwd *pwd)
4166
{
4167
4168
if (!refcount_release(&pwd->pwd_refcount))
4169
return;
4170
4171
if (pwd->pwd_cdir != NULL)
4172
vrele(pwd->pwd_cdir);
4173
if (pwd->pwd_rdir != NULL)
4174
vrele(pwd->pwd_rdir);
4175
if (pwd->pwd_jdir != NULL)
4176
vrele(pwd->pwd_jdir);
4177
if (pwd->pwd_adir != NULL)
4178
vrele(pwd->pwd_adir);
4179
uma_zfree_smr(pwd_zone, pwd);
4180
}
4181
4182
/*
4183
* The caller is responsible for invoking priv_check() and
4184
* mac_vnode_check_chroot() to authorize this operation.
4185
*/
4186
int
4187
pwd_chroot(struct thread *td, struct vnode *vp)
4188
{
4189
struct pwddesc *pdp;
4190
struct filedesc *fdp;
4191
struct pwd *newpwd, *oldpwd;
4192
int error;
4193
4194
fdp = td->td_proc->p_fd;
4195
pdp = td->td_proc->p_pd;
4196
newpwd = pwd_alloc();
4197
FILEDESC_SLOCK(fdp);
4198
PWDDESC_XLOCK(pdp);
4199
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
4200
if (chroot_allow_open_directories == 0 ||
4201
(chroot_allow_open_directories == 1 &&
4202
oldpwd->pwd_rdir != rootvnode)) {
4203
error = chroot_refuse_vdir_fds(fdp);
4204
FILEDESC_SUNLOCK(fdp);
4205
if (error != 0) {
4206
PWDDESC_XUNLOCK(pdp);
4207
pwd_drop(newpwd);
4208
return (error);
4209
}
4210
} else {
4211
FILEDESC_SUNLOCK(fdp);
4212
}
4213
4214
vrefact(vp);
4215
newpwd->pwd_rdir = vp;
4216
vrefact(vp);
4217
newpwd->pwd_adir = vp;
4218
if (oldpwd->pwd_jdir == NULL) {
4219
vrefact(vp);
4220
newpwd->pwd_jdir = vp;
4221
}
4222
pwd_fill(oldpwd, newpwd);
4223
pwd_set(pdp, newpwd);
4224
PWDDESC_XUNLOCK(pdp);
4225
pwd_drop(oldpwd);
4226
return (0);
4227
}
4228
4229
void
4230
pwd_chdir(struct thread *td, struct vnode *vp)
4231
{
4232
struct pwddesc *pdp;
4233
struct pwd *newpwd, *oldpwd;
4234
4235
VNPASS(vp->v_usecount > 0, vp);
4236
4237
newpwd = pwd_alloc();
4238
pdp = td->td_proc->p_pd;
4239
PWDDESC_XLOCK(pdp);
4240
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
4241
newpwd->pwd_cdir = vp;
4242
pwd_fill(oldpwd, newpwd);
4243
pwd_set(pdp, newpwd);
4244
PWDDESC_XUNLOCK(pdp);
4245
pwd_drop(oldpwd);
4246
}
4247
4248
/*
4249
* Process is transitioning to/from a non-native ABI.
4250
*/
4251
void
4252
pwd_altroot(struct thread *td, struct vnode *altroot_vp)
4253
{
4254
struct pwddesc *pdp;
4255
struct pwd *newpwd, *oldpwd;
4256
4257
newpwd = pwd_alloc();
4258
pdp = td->td_proc->p_pd;
4259
PWDDESC_XLOCK(pdp);
4260
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
4261
if (altroot_vp != NULL) {
4262
/*
4263
* Native process to a non-native ABI.
4264
*/
4265
4266
vrefact(altroot_vp);
4267
newpwd->pwd_adir = altroot_vp;
4268
} else {
4269
/*
4270
* Non-native process to the native ABI.
4271
*/
4272
4273
vrefact(oldpwd->pwd_rdir);
4274
newpwd->pwd_adir = oldpwd->pwd_rdir;
4275
}
4276
pwd_fill(oldpwd, newpwd);
4277
pwd_set(pdp, newpwd);
4278
PWDDESC_XUNLOCK(pdp);
4279
pwd_drop(oldpwd);
4280
}
4281
4282
/*
4283
* jail_attach(2) changes both root and working directories.
4284
*/
4285
int
4286
pwd_chroot_chdir(struct thread *td, struct vnode *vp)
4287
{
4288
struct pwddesc *pdp;
4289
struct filedesc *fdp;
4290
struct pwd *newpwd, *oldpwd;
4291
int error;
4292
4293
fdp = td->td_proc->p_fd;
4294
pdp = td->td_proc->p_pd;
4295
newpwd = pwd_alloc();
4296
FILEDESC_SLOCK(fdp);
4297
PWDDESC_XLOCK(pdp);
4298
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
4299
error = chroot_refuse_vdir_fds(fdp);
4300
FILEDESC_SUNLOCK(fdp);
4301
if (error != 0) {
4302
PWDDESC_XUNLOCK(pdp);
4303
pwd_drop(newpwd);
4304
return (error);
4305
}
4306
4307
vrefact(vp);
4308
newpwd->pwd_rdir = vp;
4309
vrefact(vp);
4310
newpwd->pwd_cdir = vp;
4311
if (oldpwd->pwd_jdir == NULL) {
4312
vrefact(vp);
4313
newpwd->pwd_jdir = vp;
4314
}
4315
vrefact(vp);
4316
newpwd->pwd_adir = vp;
4317
pwd_fill(oldpwd, newpwd);
4318
pwd_set(pdp, newpwd);
4319
PWDDESC_XUNLOCK(pdp);
4320
pwd_drop(oldpwd);
4321
return (0);
4322
}
4323
4324
void
4325
pwd_ensure_dirs(void)
4326
{
4327
struct pwddesc *pdp;
4328
struct pwd *oldpwd, *newpwd;
4329
4330
pdp = curproc->p_pd;
4331
PWDDESC_XLOCK(pdp);
4332
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
4333
if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL &&
4334
oldpwd->pwd_adir != NULL) {
4335
PWDDESC_XUNLOCK(pdp);
4336
return;
4337
}
4338
PWDDESC_XUNLOCK(pdp);
4339
4340
newpwd = pwd_alloc();
4341
PWDDESC_XLOCK(pdp);
4342
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
4343
pwd_fill(oldpwd, newpwd);
4344
if (newpwd->pwd_cdir == NULL) {
4345
vrefact(rootvnode);
4346
newpwd->pwd_cdir = rootvnode;
4347
}
4348
if (newpwd->pwd_rdir == NULL) {
4349
vrefact(rootvnode);
4350
newpwd->pwd_rdir = rootvnode;
4351
}
4352
if (newpwd->pwd_adir == NULL) {
4353
vrefact(rootvnode);
4354
newpwd->pwd_adir = rootvnode;
4355
}
4356
pwd_set(pdp, newpwd);
4357
PWDDESC_XUNLOCK(pdp);
4358
pwd_drop(oldpwd);
4359
}
4360
4361
void
4362
pwd_set_rootvnode(void)
4363
{
4364
struct pwddesc *pdp;
4365
struct pwd *oldpwd, *newpwd;
4366
4367
pdp = curproc->p_pd;
4368
4369
newpwd = pwd_alloc();
4370
PWDDESC_XLOCK(pdp);
4371
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
4372
vrefact(rootvnode);
4373
newpwd->pwd_cdir = rootvnode;
4374
vrefact(rootvnode);
4375
newpwd->pwd_rdir = rootvnode;
4376
vrefact(rootvnode);
4377
newpwd->pwd_adir = rootvnode;
4378
pwd_fill(oldpwd, newpwd);
4379
pwd_set(pdp, newpwd);
4380
PWDDESC_XUNLOCK(pdp);
4381
pwd_drop(oldpwd);
4382
}
4383
4384
/*
4385
* Scan all active processes and prisons to see if any of them have a current
4386
* or root directory of `olddp'. If so, replace them with the new mount point.
4387
*/
4388
void
4389
mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
4390
{
4391
struct pwddesc *pdp;
4392
struct pwd *newpwd, *oldpwd;
4393
struct prison *pr;
4394
struct proc *p;
4395
int nrele;
4396
4397
if (vrefcnt(olddp) == 1)
4398
return;
4399
nrele = 0;
4400
newpwd = pwd_alloc();
4401
sx_slock(&allproc_lock);
4402
FOREACH_PROC_IN_SYSTEM(p) {
4403
PROC_LOCK(p);
4404
pdp = pdhold(p);
4405
PROC_UNLOCK(p);
4406
if (pdp == NULL)
4407
continue;
4408
PWDDESC_XLOCK(pdp);
4409
oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
4410
if (oldpwd == NULL ||
4411
(oldpwd->pwd_cdir != olddp &&
4412
oldpwd->pwd_rdir != olddp &&
4413
oldpwd->pwd_jdir != olddp &&
4414
oldpwd->pwd_adir != olddp)) {
4415
PWDDESC_XUNLOCK(pdp);
4416
pddrop(pdp);
4417
continue;
4418
}
4419
if (oldpwd->pwd_cdir == olddp) {
4420
vrefact(newdp);
4421
newpwd->pwd_cdir = newdp;
4422
}
4423
if (oldpwd->pwd_rdir == olddp) {
4424
vrefact(newdp);
4425
newpwd->pwd_rdir = newdp;
4426
}
4427
if (oldpwd->pwd_jdir == olddp) {
4428
vrefact(newdp);
4429
newpwd->pwd_jdir = newdp;
4430
}
4431
if (oldpwd->pwd_adir == olddp) {
4432
vrefact(newdp);
4433
newpwd->pwd_adir = newdp;
4434
}
4435
pwd_fill(oldpwd, newpwd);
4436
pwd_set(pdp, newpwd);
4437
PWDDESC_XUNLOCK(pdp);
4438
pwd_drop(oldpwd);
4439
pddrop(pdp);
4440
newpwd = pwd_alloc();
4441
}
4442
sx_sunlock(&allproc_lock);
4443
pwd_drop(newpwd);
4444
if (rootvnode == olddp) {
4445
vrefact(newdp);
4446
rootvnode = newdp;
4447
nrele++;
4448
}
4449
mtx_lock(&prison0.pr_mtx);
4450
if (prison0.pr_root == olddp) {
4451
vrefact(newdp);
4452
prison0.pr_root = newdp;
4453
nrele++;
4454
}
4455
mtx_unlock(&prison0.pr_mtx);
4456
sx_slock(&allprison_lock);
4457
TAILQ_FOREACH(pr, &allprison, pr_list) {
4458
mtx_lock(&pr->pr_mtx);
4459
if (pr->pr_root == olddp) {
4460
vrefact(newdp);
4461
pr->pr_root = newdp;
4462
nrele++;
4463
}
4464
mtx_unlock(&pr->pr_mtx);
4465
}
4466
sx_sunlock(&allprison_lock);
4467
while (nrele--)
4468
vrele(olddp);
4469
}
4470
4471
int
4472
descrip_check_write_mp(struct filedesc *fdp, struct mount *mp)
4473
{
4474
struct file *fp;
4475
struct vnode *vp;
4476
int error, i;
4477
4478
error = 0;
4479
FILEDESC_SLOCK(fdp);
4480
FILEDESC_FOREACH_FP(fdp, i, fp) {
4481
if (fp->f_type != DTYPE_VNODE ||
4482
(atomic_load_int(&fp->f_flag) & FWRITE) == 0)
4483
continue;
4484
vp = fp->f_vnode;
4485
if (vp->v_mount == mp) {
4486
error = EDEADLK;
4487
break;
4488
}
4489
}
4490
FILEDESC_SUNLOCK(fdp);
4491
return (error);
4492
}
4493
4494
struct filedesc_to_leader *
4495
filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp,
4496
struct proc *leader)
4497
{
4498
struct filedesc_to_leader *fdtol;
4499
4500
fdtol = malloc(sizeof(struct filedesc_to_leader),
4501
M_FILEDESC_TO_LEADER, M_WAITOK);
4502
fdtol->fdl_refcount = 1;
4503
fdtol->fdl_holdcount = 0;
4504
fdtol->fdl_wakeup = 0;
4505
fdtol->fdl_leader = leader;
4506
if (old != NULL) {
4507
FILEDESC_XLOCK(fdp);
4508
fdtol->fdl_next = old->fdl_next;
4509
fdtol->fdl_prev = old;
4510
old->fdl_next = fdtol;
4511
fdtol->fdl_next->fdl_prev = fdtol;
4512
FILEDESC_XUNLOCK(fdp);
4513
} else {
4514
fdtol->fdl_next = fdtol;
4515
fdtol->fdl_prev = fdtol;
4516
}
4517
return (fdtol);
4518
}
4519
4520
struct filedesc_to_leader *
4521
filedesc_to_leader_share(struct filedesc_to_leader *fdtol, struct filedesc *fdp)
4522
{
4523
FILEDESC_XLOCK(fdp);
4524
fdtol->fdl_refcount++;
4525
FILEDESC_XUNLOCK(fdp);
4526
return (fdtol);
4527
}
4528
4529
static int
4530
filedesc_nfiles(struct filedesc *fdp)
4531
{
4532
NDSLOTTYPE *map;
4533
int count, off, minoff;
4534
4535
if (fdp == NULL)
4536
return (0);
4537
count = 0;
4538
FILEDESC_SLOCK(fdp);
4539
map = fdp->fd_map;
4540
off = NDSLOT(fdp->fd_nfiles - 1);
4541
for (minoff = NDSLOT(0); off >= minoff; --off)
4542
count += bitcountl(map[off]);
4543
FILEDESC_SUNLOCK(fdp);
4544
return (count);
4545
}
4546
4547
int
4548
proc_nfiles(struct proc *p)
4549
{
4550
struct filedesc *fdp;
4551
int res;
4552
4553
PROC_LOCK(p);
4554
fdp = fdhold(p);
4555
PROC_UNLOCK(p);
4556
res = filedesc_nfiles(fdp);
4557
fddrop(fdp);
4558
return (res);
4559
}
4560
4561
static int
4562
sysctl_kern_proc_nfds(SYSCTL_HANDLER_ARGS)
4563
{
4564
u_int namelen;
4565
int count;
4566
4567
namelen = arg2;
4568
if (namelen != 1)
4569
return (EINVAL);
4570
4571
if (*(int *)arg1 != 0)
4572
return (EINVAL);
4573
4574
count = filedesc_nfiles(curproc->p_fd);
4575
return (SYSCTL_OUT(req, &count, sizeof(count)));
4576
}
4577
4578
static SYSCTL_NODE(_kern_proc, KERN_PROC_NFDS, nfds,
4579
CTLFLAG_RD|CTLFLAG_CAPRD|CTLFLAG_MPSAFE, sysctl_kern_proc_nfds,
4580
"Number of open file descriptors");
4581
4582
/*
4583
* Get file structures globally.
4584
*/
4585
static int
4586
sysctl_kern_file(SYSCTL_HANDLER_ARGS)
4587
{
4588
struct xfile xf;
4589
struct filedesc *fdp;
4590
struct file *fp;
4591
struct proc *p;
4592
int error, n;
4593
4594
error = sysctl_wire_old_buffer(req, 0);
4595
if (error != 0)
4596
return (error);
4597
if (req->oldptr == NULL) {
4598
n = 0;
4599
sx_slock(&allproc_lock);
4600
FOREACH_PROC_IN_SYSTEM(p) {
4601
PROC_LOCK(p);
4602
if (p->p_state == PRS_NEW) {
4603
PROC_UNLOCK(p);
4604
continue;
4605
}
4606
fdp = fdhold(p);
4607
PROC_UNLOCK(p);
4608
if (fdp == NULL)
4609
continue;
4610
/* overestimates sparse tables. */
4611
n += fdp->fd_nfiles;
4612
fddrop(fdp);
4613
}
4614
sx_sunlock(&allproc_lock);
4615
return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
4616
}
4617
error = 0;
4618
bzero(&xf, sizeof(xf));
4619
xf.xf_size = sizeof(xf);
4620
sx_slock(&allproc_lock);
4621
FOREACH_PROC_IN_SYSTEM(p) {
4622
PROC_LOCK(p);
4623
if (p->p_state == PRS_NEW) {
4624
PROC_UNLOCK(p);
4625
continue;
4626
}
4627
if (p_cansee(req->td, p) != 0) {
4628
PROC_UNLOCK(p);
4629
continue;
4630
}
4631
xf.xf_pid = p->p_pid;
4632
xf.xf_uid = p->p_ucred->cr_uid;
4633
fdp = fdhold(p);
4634
PROC_UNLOCK(p);
4635
if (fdp == NULL)
4636
continue;
4637
FILEDESC_SLOCK(fdp);
4638
if (refcount_load(&fdp->fd_refcnt) == 0)
4639
goto nextproc;
4640
FILEDESC_FOREACH_FP(fdp, n, fp) {
4641
xf.xf_fd = n;
4642
xf.xf_file = (uintptr_t)fp;
4643
xf.xf_data = (uintptr_t)fp->f_data;
4644
xf.xf_vnode = (uintptr_t)fp->f_vnode;
4645
xf.xf_type = (uintptr_t)fp->f_type;
4646
xf.xf_count = refcount_load(&fp->f_count);
4647
xf.xf_msgcount = 0;
4648
xf.xf_offset = foffset_get(fp);
4649
xf.xf_flag = fp->f_flag;
4650
error = SYSCTL_OUT(req, &xf, sizeof(xf));
4651
4652
/*
4653
* There is no need to re-check the fdtable refcount
4654
* here since the filedesc lock is not dropped in the
4655
* loop body.
4656
*/
4657
if (error != 0)
4658
break;
4659
}
4660
nextproc:
4661
FILEDESC_SUNLOCK(fdp);
4662
fddrop(fdp);
4663
if (error)
4664
break;
4665
}
4666
sx_sunlock(&allproc_lock);
4667
return (error);
4668
}
4669
4670
SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE,
4671
0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
4672
4673
#ifdef KINFO_FILE_SIZE
4674
CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
4675
#endif
4676
4677
static int
4678
xlate_fflags(int fflags)
4679
{
4680
static const struct {
4681
int fflag;
4682
int kf_fflag;
4683
} fflags_table[] = {
4684
{ FAPPEND, KF_FLAG_APPEND },
4685
{ FASYNC, KF_FLAG_ASYNC },
4686
{ FFSYNC, KF_FLAG_FSYNC },
4687
{ FHASLOCK, KF_FLAG_HASLOCK },
4688
{ FNONBLOCK, KF_FLAG_NONBLOCK },
4689
{ FREAD, KF_FLAG_READ },
4690
{ FWRITE, KF_FLAG_WRITE },
4691
{ O_CREAT, KF_FLAG_CREAT },
4692
{ O_DIRECT, KF_FLAG_DIRECT },
4693
{ O_EXCL, KF_FLAG_EXCL },
4694
{ O_EXEC, KF_FLAG_EXEC },
4695
{ O_EXLOCK, KF_FLAG_EXLOCK },
4696
{ O_NOFOLLOW, KF_FLAG_NOFOLLOW },
4697
{ O_SHLOCK, KF_FLAG_SHLOCK },
4698
{ O_TRUNC, KF_FLAG_TRUNC }
4699
};
4700
unsigned int i;
4701
int kflags;
4702
4703
kflags = 0;
4704
for (i = 0; i < nitems(fflags_table); i++)
4705
if (fflags & fflags_table[i].fflag)
4706
kflags |= fflags_table[i].kf_fflag;
4707
return (kflags);
4708
}
4709
4710
/* Trim unused data from kf_path by truncating the structure size. */
4711
void
4712
pack_kinfo(struct kinfo_file *kif)
4713
{
4714
4715
kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
4716
strlen(kif->kf_path) + 1;
4717
kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
4718
}
4719
4720
static void
4721
export_file_to_kinfo(struct file *fp, int fd, cap_rights_t *rightsp,
4722
struct kinfo_file *kif, struct filedesc *fdp, int flags)
4723
{
4724
int error;
4725
4726
bzero(kif, sizeof(*kif));
4727
4728
/* Set a default type to allow for empty fill_kinfo() methods. */
4729
kif->kf_type = KF_TYPE_UNKNOWN;
4730
kif->kf_flags = xlate_fflags(fp->f_flag);
4731
if (rightsp != NULL)
4732
kif->kf_cap_rights = *rightsp;
4733
else
4734
cap_rights_init_zero(&kif->kf_cap_rights);
4735
kif->kf_fd = fd;
4736
kif->kf_ref_count = refcount_load(&fp->f_count);
4737
kif->kf_offset = foffset_get(fp);
4738
4739
/*
4740
* This may drop the filedesc lock, so the 'fp' cannot be
4741
* accessed after this call.
4742
*/
4743
error = fo_fill_kinfo(fp, kif, fdp);
4744
if (error == 0)
4745
kif->kf_status |= KF_ATTR_VALID;
4746
if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
4747
pack_kinfo(kif);
4748
else
4749
kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
4750
}
4751
4752
static void
4753
export_vnode_to_kinfo(struct vnode *vp, int fd, int fflags,
4754
struct kinfo_file *kif, int flags)
4755
{
4756
int error;
4757
4758
bzero(kif, sizeof(*kif));
4759
4760
kif->kf_type = KF_TYPE_VNODE;
4761
error = vn_fill_kinfo_vnode(vp, kif);
4762
if (error == 0)
4763
kif->kf_status |= KF_ATTR_VALID;
4764
kif->kf_flags = xlate_fflags(fflags);
4765
cap_rights_init_zero(&kif->kf_cap_rights);
4766
kif->kf_fd = fd;
4767
kif->kf_ref_count = -1;
4768
kif->kf_offset = -1;
4769
if ((flags & KERN_FILEDESC_PACK_KINFO) != 0)
4770
pack_kinfo(kif);
4771
else
4772
kif->kf_structsize = roundup2(sizeof(*kif), sizeof(uint64_t));
4773
vrele(vp);
4774
}
4775
4776
struct export_fd_buf {
4777
struct filedesc *fdp;
4778
struct pwddesc *pdp;
4779
struct sbuf *sb;
4780
ssize_t remainder;
4781
struct kinfo_file kif;
4782
int flags;
4783
};
4784
4785
static int
4786
export_kinfo_to_sb(struct export_fd_buf *efbuf)
4787
{
4788
struct kinfo_file *kif;
4789
4790
kif = &efbuf->kif;
4791
if (efbuf->remainder != -1) {
4792
if (efbuf->remainder < kif->kf_structsize)
4793
return (ENOMEM);
4794
efbuf->remainder -= kif->kf_structsize;
4795
}
4796
if (sbuf_bcat(efbuf->sb, kif, kif->kf_structsize) != 0)
4797
return (sbuf_error(efbuf->sb));
4798
return (0);
4799
}
4800
4801
static int
4802
export_file_to_sb(struct file *fp, int fd, cap_rights_t *rightsp,
4803
struct export_fd_buf *efbuf)
4804
{
4805
int error;
4806
4807
if (efbuf->remainder == 0)
4808
return (ENOMEM);
4809
export_file_to_kinfo(fp, fd, rightsp, &efbuf->kif, efbuf->fdp,
4810
efbuf->flags);
4811
FILEDESC_SUNLOCK(efbuf->fdp);
4812
error = export_kinfo_to_sb(efbuf);
4813
FILEDESC_SLOCK(efbuf->fdp);
4814
return (error);
4815
}
4816
4817
static int
4818
export_vnode_to_sb(struct vnode *vp, int fd, int fflags,
4819
struct export_fd_buf *efbuf)
4820
{
4821
int error;
4822
4823
if (efbuf->remainder == 0)
4824
return (ENOMEM);
4825
if (efbuf->pdp != NULL)
4826
PWDDESC_XUNLOCK(efbuf->pdp);
4827
export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif, efbuf->flags);
4828
error = export_kinfo_to_sb(efbuf);
4829
if (efbuf->pdp != NULL)
4830
PWDDESC_XLOCK(efbuf->pdp);
4831
return (error);
4832
}
4833
4834
/*
4835
* Store a process file descriptor information to sbuf.
4836
*
4837
* Takes a locked proc as argument, and returns with the proc unlocked.
4838
*/
4839
int
4840
kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
4841
int flags)
4842
{
4843
struct file *fp;
4844
struct filedesc *fdp;
4845
struct pwddesc *pdp;
4846
struct export_fd_buf *efbuf;
4847
struct vnode *cttyvp, *textvp, *tracevp;
4848
struct pwd *pwd;
4849
int error, i;
4850
cap_rights_t rights;
4851
4852
PROC_LOCK_ASSERT(p, MA_OWNED);
4853
4854
/* ktrace vnode */
4855
tracevp = ktr_get_tracevp(p, true);
4856
/* text vnode */
4857
textvp = p->p_textvp;
4858
if (textvp != NULL)
4859
vrefact(textvp);
4860
/* Controlling tty. */
4861
cttyvp = NULL;
4862
if (p->p_pgrp != NULL && p->p_pgrp->pg_session != NULL) {
4863
cttyvp = p->p_pgrp->pg_session->s_ttyvp;
4864
if (cttyvp != NULL)
4865
vrefact(cttyvp);
4866
}
4867
fdp = fdhold(p);
4868
pdp = pdhold(p);
4869
PROC_UNLOCK(p);
4870
4871
efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK);
4872
efbuf->fdp = NULL;
4873
efbuf->pdp = NULL;
4874
efbuf->sb = sb;
4875
efbuf->remainder = maxlen;
4876
efbuf->flags = flags;
4877
4878
error = 0;
4879
if (tracevp != NULL)
4880
error = export_vnode_to_sb(tracevp, KF_FD_TYPE_TRACE,
4881
FREAD | FWRITE, efbuf);
4882
if (error == 0 && textvp != NULL)
4883
error = export_vnode_to_sb(textvp, KF_FD_TYPE_TEXT, FREAD,
4884
efbuf);
4885
if (error == 0 && cttyvp != NULL)
4886
error = export_vnode_to_sb(cttyvp, KF_FD_TYPE_CTTY,
4887
FREAD | FWRITE, efbuf);
4888
if (error != 0 || pdp == NULL || fdp == NULL)
4889
goto fail;
4890
efbuf->fdp = fdp;
4891
efbuf->pdp = pdp;
4892
PWDDESC_XLOCK(pdp);
4893
pwd = pwd_hold_pwddesc(pdp);
4894
if (pwd != NULL) {
4895
/* working directory */
4896
if (pwd->pwd_cdir != NULL) {
4897
vrefact(pwd->pwd_cdir);
4898
error = export_vnode_to_sb(pwd->pwd_cdir,
4899
KF_FD_TYPE_CWD, FREAD, efbuf);
4900
}
4901
/* root directory */
4902
if (error == 0 && pwd->pwd_rdir != NULL) {
4903
vrefact(pwd->pwd_rdir);
4904
error = export_vnode_to_sb(pwd->pwd_rdir,
4905
KF_FD_TYPE_ROOT, FREAD, efbuf);
4906
}
4907
/* jail directory */
4908
if (error == 0 && pwd->pwd_jdir != NULL) {
4909
vrefact(pwd->pwd_jdir);
4910
error = export_vnode_to_sb(pwd->pwd_jdir,
4911
KF_FD_TYPE_JAIL, FREAD, efbuf);
4912
}
4913
}
4914
PWDDESC_XUNLOCK(pdp);
4915
if (error != 0)
4916
goto fail;
4917
if (pwd != NULL)
4918
pwd_drop(pwd);
4919
FILEDESC_SLOCK(fdp);
4920
if (refcount_load(&fdp->fd_refcnt) == 0)
4921
goto skip;
4922
FILEDESC_FOREACH_FP(fdp, i, fp) {
4923
#ifdef CAPABILITIES
4924
rights = *cap_rights(fdp, i);
4925
#else /* !CAPABILITIES */
4926
rights = cap_no_rights;
4927
#endif
4928
/*
4929
* Create sysctl entry. It is OK to drop the filedesc
4930
* lock inside of export_file_to_sb() as we will
4931
* re-validate and re-evaluate its properties when the
4932
* loop continues.
4933
*/
4934
error = export_file_to_sb(fp, i, &rights, efbuf);
4935
if (error != 0 || refcount_load(&fdp->fd_refcnt) == 0)
4936
break;
4937
}
4938
skip:
4939
FILEDESC_SUNLOCK(fdp);
4940
fail:
4941
if (fdp != NULL)
4942
fddrop(fdp);
4943
if (pdp != NULL)
4944
pddrop(pdp);
4945
free(efbuf, M_TEMP);
4946
return (error);
4947
}
4948
4949
#define FILEDESC_SBUF_SIZE (sizeof(struct kinfo_file) * 5)
4950
4951
/*
4952
* Get per-process file descriptors for use by procstat(1), et al.
4953
*/
4954
static int
4955
sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
4956
{
4957
struct sbuf sb;
4958
struct proc *p;
4959
ssize_t maxlen;
4960
u_int namelen;
4961
int error, error2, *name;
4962
4963
namelen = arg2;
4964
if (namelen != 1)
4965
return (EINVAL);
4966
4967
name = (int *)arg1;
4968
4969
sbuf_new_for_sysctl(&sb, NULL, FILEDESC_SBUF_SIZE, req);
4970
sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
4971
error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
4972
if (error != 0) {
4973
sbuf_delete(&sb);
4974
return (error);
4975
}
4976
maxlen = req->oldptr != NULL ? req->oldlen : -1;
4977
error = kern_proc_filedesc_out(p, &sb, maxlen,
4978
KERN_FILEDESC_PACK_KINFO);
4979
error2 = sbuf_finish(&sb);
4980
sbuf_delete(&sb);
4981
return (error != 0 ? error : error2);
4982
}
4983
4984
#ifdef COMPAT_FREEBSD7
4985
#ifdef KINFO_OFILE_SIZE
4986
CTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE);
4987
#endif
4988
4989
static void
4990
kinfo_to_okinfo(struct kinfo_file *kif, struct kinfo_ofile *okif)
4991
{
4992
4993
okif->kf_structsize = sizeof(*okif);
4994
okif->kf_type = kif->kf_type;
4995
okif->kf_fd = kif->kf_fd;
4996
okif->kf_ref_count = kif->kf_ref_count;
4997
okif->kf_flags = kif->kf_flags & (KF_FLAG_READ | KF_FLAG_WRITE |
4998
KF_FLAG_APPEND | KF_FLAG_ASYNC | KF_FLAG_FSYNC | KF_FLAG_NONBLOCK |
4999
KF_FLAG_DIRECT | KF_FLAG_HASLOCK);
5000
okif->kf_offset = kif->kf_offset;
5001
if (kif->kf_type == KF_TYPE_VNODE)
5002
okif->kf_vnode_type = kif->kf_un.kf_file.kf_file_type;
5003
else
5004
okif->kf_vnode_type = KF_VTYPE_VNON;
5005
strlcpy(okif->kf_path, kif->kf_path, sizeof(okif->kf_path));
5006
if (kif->kf_type == KF_TYPE_SOCKET) {
5007
okif->kf_sock_domain = kif->kf_un.kf_sock.kf_sock_domain0;
5008
okif->kf_sock_type = kif->kf_un.kf_sock.kf_sock_type0;
5009
okif->kf_sock_protocol = kif->kf_un.kf_sock.kf_sock_protocol0;
5010
okif->kf_sa_local = kif->kf_un.kf_sock.kf_sa_local;
5011
okif->kf_sa_peer = kif->kf_un.kf_sock.kf_sa_peer;
5012
} else {
5013
okif->kf_sa_local.ss_family = AF_UNSPEC;
5014
okif->kf_sa_peer.ss_family = AF_UNSPEC;
5015
}
5016
}
5017
5018
static int
5019
export_vnode_for_osysctl(struct vnode *vp, int type, struct kinfo_file *kif,
5020
struct kinfo_ofile *okif, struct pwddesc *pdp, struct sysctl_req *req)
5021
{
5022
int error;
5023
5024
vrefact(vp);
5025
PWDDESC_XUNLOCK(pdp);
5026
export_vnode_to_kinfo(vp, type, 0, kif, KERN_FILEDESC_PACK_KINFO);
5027
kinfo_to_okinfo(kif, okif);
5028
error = SYSCTL_OUT(req, okif, sizeof(*okif));
5029
PWDDESC_XLOCK(pdp);
5030
return (error);
5031
}
5032
5033
/*
5034
* Get per-process file descriptors for use by procstat(1), et al.
5035
*/
5036
static int
5037
sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
5038
{
5039
struct kinfo_ofile *okif;
5040
struct kinfo_file *kif;
5041
struct filedesc *fdp;
5042
struct pwddesc *pdp;
5043
struct pwd *pwd;
5044
u_int namelen;
5045
int error, i, *name;
5046
struct file *fp;
5047
struct proc *p;
5048
5049
namelen = arg2;
5050
if (namelen != 1)
5051
return (EINVAL);
5052
5053
name = (int *)arg1;
5054
error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
5055
if (error != 0)
5056
return (error);
5057
fdp = fdhold(p);
5058
if (fdp != NULL)
5059
pdp = pdhold(p);
5060
PROC_UNLOCK(p);
5061
if (fdp == NULL || pdp == NULL) {
5062
if (fdp != NULL)
5063
fddrop(fdp);
5064
return (ENOENT);
5065
}
5066
kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
5067
okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK);
5068
PWDDESC_XLOCK(pdp);
5069
pwd = pwd_hold_pwddesc(pdp);
5070
if (pwd != NULL) {
5071
if (pwd->pwd_cdir != NULL)
5072
export_vnode_for_osysctl(pwd->pwd_cdir, KF_FD_TYPE_CWD, kif,
5073
okif, pdp, req);
5074
if (pwd->pwd_rdir != NULL)
5075
export_vnode_for_osysctl(pwd->pwd_rdir, KF_FD_TYPE_ROOT, kif,
5076
okif, pdp, req);
5077
if (pwd->pwd_jdir != NULL)
5078
export_vnode_for_osysctl(pwd->pwd_jdir, KF_FD_TYPE_JAIL, kif,
5079
okif, pdp, req);
5080
}
5081
PWDDESC_XUNLOCK(pdp);
5082
if (pwd != NULL)
5083
pwd_drop(pwd);
5084
FILEDESC_SLOCK(fdp);
5085
if (refcount_load(&fdp->fd_refcnt) == 0)
5086
goto skip;
5087
FILEDESC_FOREACH_FP(fdp, i, fp) {
5088
export_file_to_kinfo(fp, i, NULL, kif, fdp,
5089
KERN_FILEDESC_PACK_KINFO);
5090
FILEDESC_SUNLOCK(fdp);
5091
kinfo_to_okinfo(kif, okif);
5092
error = SYSCTL_OUT(req, okif, sizeof(*okif));
5093
FILEDESC_SLOCK(fdp);
5094
if (error != 0 || refcount_load(&fdp->fd_refcnt) == 0)
5095
break;
5096
}
5097
skip:
5098
FILEDESC_SUNLOCK(fdp);
5099
fddrop(fdp);
5100
pddrop(pdp);
5101
free(kif, M_TEMP);
5102
free(okif, M_TEMP);
5103
return (0);
5104
}
5105
5106
static SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc,
5107
CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_ofiledesc,
5108
"Process ofiledesc entries");
5109
#endif /* COMPAT_FREEBSD7 */
5110
5111
int
5112
vntype_to_kinfo(int vtype)
5113
{
5114
struct {
5115
int vtype;
5116
int kf_vtype;
5117
} vtypes_table[] = {
5118
{ VBAD, KF_VTYPE_VBAD },
5119
{ VBLK, KF_VTYPE_VBLK },
5120
{ VCHR, KF_VTYPE_VCHR },
5121
{ VDIR, KF_VTYPE_VDIR },
5122
{ VFIFO, KF_VTYPE_VFIFO },
5123
{ VLNK, KF_VTYPE_VLNK },
5124
{ VNON, KF_VTYPE_VNON },
5125
{ VREG, KF_VTYPE_VREG },
5126
{ VSOCK, KF_VTYPE_VSOCK }
5127
};
5128
unsigned int i;
5129
5130
/*
5131
* Perform vtype translation.
5132
*/
5133
for (i = 0; i < nitems(vtypes_table); i++)
5134
if (vtypes_table[i].vtype == vtype)
5135
return (vtypes_table[i].kf_vtype);
5136
5137
return (KF_VTYPE_UNKNOWN);
5138
}
5139
5140
static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc,
5141
CTLFLAG_RD|CTLFLAG_MPSAFE, sysctl_kern_proc_filedesc,
5142
"Process filedesc entries");
5143
5144
/*
5145
* Store a process current working directory information to sbuf.
5146
*
5147
* Takes a locked proc as argument, and returns with the proc unlocked.
5148
*/
5149
int
5150
kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
5151
{
5152
struct pwddesc *pdp;
5153
struct pwd *pwd;
5154
struct export_fd_buf *efbuf;
5155
struct vnode *cdir;
5156
int error;
5157
5158
PROC_LOCK_ASSERT(p, MA_OWNED);
5159
5160
pdp = pdhold(p);
5161
PROC_UNLOCK(p);
5162
if (pdp == NULL)
5163
return (EINVAL);
5164
5165
efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK);
5166
efbuf->fdp = NULL;
5167
efbuf->pdp = pdp;
5168
efbuf->sb = sb;
5169
efbuf->remainder = maxlen;
5170
efbuf->flags = 0;
5171
5172
PWDDESC_XLOCK(pdp);
5173
pwd = PWDDESC_XLOCKED_LOAD_PWD(pdp);
5174
cdir = pwd->pwd_cdir;
5175
if (cdir == NULL) {
5176
error = EINVAL;
5177
} else {
5178
vrefact(cdir);
5179
error = export_vnode_to_sb(cdir, KF_FD_TYPE_CWD, FREAD, efbuf);
5180
}
5181
PWDDESC_XUNLOCK(pdp);
5182
pddrop(pdp);
5183
free(efbuf, M_TEMP);
5184
return (error);
5185
}
5186
5187
/*
5188
* Get per-process current working directory.
5189
*/
5190
static int
5191
sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS)
5192
{
5193
struct sbuf sb;
5194
struct proc *p;
5195
ssize_t maxlen;
5196
u_int namelen;
5197
int error, error2, *name;
5198
5199
namelen = arg2;
5200
if (namelen != 1)
5201
return (EINVAL);
5202
5203
name = (int *)arg1;
5204
5205
sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_file), req);
5206
sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
5207
error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
5208
if (error != 0) {
5209
sbuf_delete(&sb);
5210
return (error);
5211
}
5212
maxlen = req->oldptr != NULL ? req->oldlen : -1;
5213
error = kern_proc_cwd_out(p, &sb, maxlen);
5214
error2 = sbuf_finish(&sb);
5215
sbuf_delete(&sb);
5216
return (error != 0 ? error : error2);
5217
}
5218
5219
static SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD|CTLFLAG_MPSAFE,
5220
sysctl_kern_proc_cwd, "Process current working directory");
5221
5222
#ifdef DDB
5223
/*
5224
* For the purposes of debugging, generate a human-readable string for the
5225
* file type.
5226
*/
5227
static const char *
5228
file_type_to_name(short type)
5229
{
5230
5231
switch (type) {
5232
case 0:
5233
return ("zero");
5234
case DTYPE_VNODE:
5235
return ("vnode");
5236
case DTYPE_SOCKET:
5237
return ("socket");
5238
case DTYPE_PIPE:
5239
return ("pipe");
5240
case DTYPE_FIFO:
5241
return ("fifo");
5242
case DTYPE_KQUEUE:
5243
return ("kqueue");
5244
case DTYPE_CRYPTO:
5245
return ("crypto");
5246
case DTYPE_MQUEUE:
5247
return ("mqueue");
5248
case DTYPE_SHM:
5249
return ("shm");
5250
case DTYPE_SEM:
5251
return ("ksem");
5252
case DTYPE_PTS:
5253
return ("pts");
5254
case DTYPE_DEV:
5255
return ("dev");
5256
case DTYPE_PROCDESC:
5257
return ("proc");
5258
case DTYPE_EVENTFD:
5259
return ("eventfd");
5260
case DTYPE_TIMERFD:
5261
return ("timerfd");
5262
case DTYPE_JAILDESC:
5263
return ("jail");
5264
default:
5265
return ("unkn");
5266
}
5267
}
5268
5269
/*
5270
* For the purposes of debugging, identify a process (if any, perhaps one of
5271
* many) that references the passed file in its file descriptor array. Return
5272
* NULL if none.
5273
*/
5274
static struct proc *
5275
file_to_first_proc(struct file *fp)
5276
{
5277
struct filedesc *fdp;
5278
struct proc *p;
5279
int n;
5280
5281
FOREACH_PROC_IN_SYSTEM(p) {
5282
if (p->p_state == PRS_NEW)
5283
continue;
5284
fdp = p->p_fd;
5285
if (fdp == NULL)
5286
continue;
5287
for (n = 0; n < fdp->fd_nfiles; n++) {
5288
if (fp == fdp->fd_ofiles[n].fde_file)
5289
return (p);
5290
}
5291
}
5292
return (NULL);
5293
}
5294
5295
static void
5296
db_print_file(struct file *fp, int header)
5297
{
5298
#define XPTRWIDTH ((int)howmany(sizeof(void *) * NBBY, 4))
5299
struct proc *p;
5300
5301
if (header)
5302
db_printf("%*s %6s %*s %8s %4s %5s %6s %*s %5s %s\n",
5303
XPTRWIDTH, "File", "Type", XPTRWIDTH, "Data", "Flag",
5304
"GCFl", "Count", "MCount", XPTRWIDTH, "Vnode", "FPID",
5305
"FCmd");
5306
p = file_to_first_proc(fp);
5307
db_printf("%*p %6s %*p %08x %04x %5d %6d %*p %5d %s\n", XPTRWIDTH,
5308
fp, file_type_to_name(fp->f_type), XPTRWIDTH, fp->f_data,
5309
fp->f_flag, 0, refcount_load(&fp->f_count), 0, XPTRWIDTH, fp->f_vnode,
5310
p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
5311
5312
#undef XPTRWIDTH
5313
}
5314
5315
DB_SHOW_COMMAND(file, db_show_file)
5316
{
5317
struct file *fp;
5318
5319
if (!have_addr) {
5320
db_printf("usage: show file <addr>\n");
5321
return;
5322
}
5323
fp = (struct file *)addr;
5324
db_print_file(fp, 1);
5325
}
5326
5327
DB_SHOW_COMMAND_FLAGS(files, db_show_files, DB_CMD_MEMSAFE)
5328
{
5329
struct filedesc *fdp;
5330
struct file *fp;
5331
struct proc *p;
5332
int header;
5333
int n;
5334
5335
header = 1;
5336
FOREACH_PROC_IN_SYSTEM(p) {
5337
if (p->p_state == PRS_NEW)
5338
continue;
5339
if ((fdp = p->p_fd) == NULL)
5340
continue;
5341
for (n = 0; n < fdp->fd_nfiles; ++n) {
5342
if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
5343
continue;
5344
db_print_file(fp, header);
5345
header = 0;
5346
}
5347
}
5348
}
5349
#endif
5350
5351
SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
5352
CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
5353
&maxfilesperproc, 0, "Maximum files allowed open per process");
5354
5355
SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
5356
&maxfiles, 0, "Maximum number of files");
5357
5358
SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
5359
&openfiles, 0, "System-wide number of open files");
5360
5361
/* ARGSUSED*/
5362
static void
5363
filelistinit(void *dummy)
5364
{
5365
5366
file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
5367
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
5368
filedesc0_zone = uma_zcreate("filedesc0", sizeof(struct filedesc0),
5369
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
5370
pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL,
5371
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
5372
/*
5373
* XXXMJG this is a temporary hack due to boot ordering issues against
5374
* the vnode zone.
5375
*/
5376
vfs_smr = uma_zone_get_smr(pwd_zone);
5377
mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
5378
}
5379
SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
5380
5381
/*-------------------------------------------------------------------*/
5382
5383
static int
5384
badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred,
5385
int flags, struct thread *td)
5386
{
5387
5388
return (EBADF);
5389
}
5390
5391
static int
5392
badfo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
5393
struct thread *td)
5394
{
5395
5396
return (EINVAL);
5397
}
5398
5399
static int
5400
badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred,
5401
struct thread *td)
5402
{
5403
5404
return (EBADF);
5405
}
5406
5407
static int
5408
badfo_poll(struct file *fp, int events, struct ucred *active_cred,
5409
struct thread *td)
5410
{
5411
5412
return (0);
5413
}
5414
5415
static int
5416
badfo_kqfilter(struct file *fp, struct knote *kn)
5417
{
5418
5419
return (EBADF);
5420
}
5421
5422
static int
5423
badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred)
5424
{
5425
5426
return (EBADF);
5427
}
5428
5429
static int
5430
badfo_close(struct file *fp, struct thread *td)
5431
{
5432
5433
return (0);
5434
}
5435
5436
static int
5437
badfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
5438
struct thread *td)
5439
{
5440
5441
return (EBADF);
5442
}
5443
5444
static int
5445
badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
5446
struct thread *td)
5447
{
5448
5449
return (EBADF);
5450
}
5451
5452
static int
5453
badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
5454
struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
5455
struct thread *td)
5456
{
5457
5458
return (EBADF);
5459
}
5460
5461
static int
5462
badfo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
5463
{
5464
5465
return (0);
5466
}
5467
5468
const struct fileops badfileops = {
5469
.fo_read = badfo_readwrite,
5470
.fo_write = badfo_readwrite,
5471
.fo_truncate = badfo_truncate,
5472
.fo_ioctl = badfo_ioctl,
5473
.fo_poll = badfo_poll,
5474
.fo_kqfilter = badfo_kqfilter,
5475
.fo_stat = badfo_stat,
5476
.fo_close = badfo_close,
5477
.fo_chmod = badfo_chmod,
5478
.fo_chown = badfo_chown,
5479
.fo_sendfile = badfo_sendfile,
5480
.fo_fill_kinfo = badfo_fill_kinfo,
5481
};
5482
5483
static int
5484
path_poll(struct file *fp, int events, struct ucred *active_cred,
5485
struct thread *td)
5486
{
5487
return (POLLNVAL);
5488
}
5489
5490
static int
5491
path_close(struct file *fp, struct thread *td)
5492
{
5493
MPASS(fp->f_type == DTYPE_VNODE);
5494
fp->f_ops = &badfileops;
5495
vrele(fp->f_vnode);
5496
return (0);
5497
}
5498
5499
const struct fileops path_fileops = {
5500
.fo_read = badfo_readwrite,
5501
.fo_write = badfo_readwrite,
5502
.fo_truncate = badfo_truncate,
5503
.fo_ioctl = badfo_ioctl,
5504
.fo_poll = path_poll,
5505
.fo_kqfilter = vn_kqfilter_opath,
5506
.fo_stat = vn_statfile,
5507
.fo_close = path_close,
5508
.fo_chmod = badfo_chmod,
5509
.fo_chown = badfo_chown,
5510
.fo_sendfile = badfo_sendfile,
5511
.fo_fill_kinfo = vn_fill_kinfo,
5512
.fo_cmp = vn_cmp,
5513
.fo_flags = DFLAG_PASSABLE,
5514
};
5515
5516
int
5517
invfo_rdwr(struct file *fp, struct uio *uio, struct ucred *active_cred,
5518
int flags, struct thread *td)
5519
{
5520
5521
return (EOPNOTSUPP);
5522
}
5523
5524
int
5525
invfo_truncate(struct file *fp, off_t length, struct ucred *active_cred,
5526
struct thread *td)
5527
{
5528
5529
return (EINVAL);
5530
}
5531
5532
int
5533
invfo_ioctl(struct file *fp, u_long com, void *data,
5534
struct ucred *active_cred, struct thread *td)
5535
{
5536
5537
return (ENOTTY);
5538
}
5539
5540
int
5541
invfo_poll(struct file *fp, int events, struct ucred *active_cred,
5542
struct thread *td)
5543
{
5544
5545
return (poll_no_poll(events));
5546
}
5547
5548
int
5549
invfo_kqfilter(struct file *fp, struct knote *kn)
5550
{
5551
5552
return (EINVAL);
5553
}
5554
5555
int
5556
invfo_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
5557
struct thread *td)
5558
{
5559
5560
return (EINVAL);
5561
}
5562
5563
int
5564
invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
5565
struct thread *td)
5566
{
5567
5568
return (EINVAL);
5569
}
5570
5571
int
5572
invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
5573
struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
5574
struct thread *td)
5575
{
5576
5577
return (EINVAL);
5578
}
5579
5580
/*-------------------------------------------------------------------*/
5581
5582
/*
5583
* File Descriptor pseudo-device driver (/dev/fd/).
5584
*
5585
* Opening minor device N dup()s the file (if any) connected to file
5586
* descriptor N belonging to the calling process. Note that this driver
5587
* consists of only the ``open()'' routine, because all subsequent
5588
* references to this file will be direct to the other driver.
5589
*
5590
* XXX: we could give this one a cloning event handler if necessary.
5591
*/
5592
5593
/* ARGSUSED */
5594
static int
5595
fdopen(struct cdev *dev, int mode, int type, struct thread *td)
5596
{
5597
5598
/*
5599
* XXX Kludge: set curthread->td_dupfd to contain the value of the
5600
* the file descriptor being sought for duplication. The error
5601
* return ensures that the vnode for this device will be released
5602
* by vn_open. Open will detect this special error and take the
5603
* actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
5604
* will simply report the error.
5605
*/
5606
td->td_dupfd = dev2unit(dev);
5607
return (ENODEV);
5608
}
5609
5610
static struct cdevsw fildesc_cdevsw = {
5611
.d_version = D_VERSION,
5612
.d_open = fdopen,
5613
.d_name = "FD",
5614
};
5615
5616
static void
5617
fildesc_drvinit(void *unused)
5618
{
5619
struct cdev *dev;
5620
5621
dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 0, NULL,
5622
UID_ROOT, GID_WHEEL, 0666, "fd/0");
5623
make_dev_alias(dev, "stdin");
5624
dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 1, NULL,
5625
UID_ROOT, GID_WHEEL, 0666, "fd/1");
5626
make_dev_alias(dev, "stdout");
5627
dev = make_dev_credf(MAKEDEV_ETERNAL, &fildesc_cdevsw, 2, NULL,
5628
UID_ROOT, GID_WHEEL, 0666, "fd/2");
5629
make_dev_alias(dev, "stderr");
5630
}
5631
5632
SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL);
5633
5634