Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/fs/fuse/fuse_device.c
103381 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 2007-2009 Google Inc.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions are
9
* met:
10
*
11
* * Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* * Redistributions in binary form must reproduce the above
14
* copyright notice, this list of conditions and the following disclaimer
15
* in the documentation and/or other materials provided with the
16
* distribution.
17
* * Neither the name of Google Inc. nor the names of its
18
* contributors may be used to endorse or promote products derived from
19
* this software without specific prior written permission.
20
*
21
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
*
33
* Copyright (C) 2005 Csaba Henk.
34
* All rights reserved.
35
*
36
* Copyright (c) 2019 The FreeBSD Foundation
37
*
38
* Portions of this software were developed by BFF Storage Systems, LLC under
39
* sponsorship from the FreeBSD Foundation.
40
*
41
* Redistribution and use in source and binary forms, with or without
42
* modification, are permitted provided that the following conditions
43
* are met:
44
* 1. Redistributions of source code must retain the above copyright
45
* notice, this list of conditions and the following disclaimer.
46
* 2. Redistributions in binary form must reproduce the above copyright
47
* notice, this list of conditions and the following disclaimer in the
48
* documentation and/or other materials provided with the distribution.
49
*
50
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60
* SUCH DAMAGE.
61
*/
62
63
#include <sys/types.h>
64
#include <sys/param.h>
65
#include <sys/module.h>
66
#include <sys/systm.h>
67
#include <sys/errno.h>
68
#include <sys/kernel.h>
69
#include <sys/conf.h>
70
#include <sys/uio.h>
71
#include <sys/malloc.h>
72
#include <sys/queue.h>
73
#include <sys/lock.h>
74
#include <sys/sx.h>
75
#include <sys/mutex.h>
76
#include <sys/proc.h>
77
#include <sys/mount.h>
78
#include <sys/sdt.h>
79
#include <sys/stat.h>
80
#include <sys/fcntl.h>
81
#include <sys/sysctl.h>
82
#include <sys/poll.h>
83
#include <sys/selinfo.h>
84
#define EXTERR_CATEGORY EXTERR_CAT_FUSE_DEVICE
85
#include <sys/exterrvar.h>
86
87
#include "fuse.h"
88
#include "fuse_internal.h"
89
#include "fuse_ipc.h"
90
91
#include <compat/linux/linux_errno.h>
92
#include <compat/linux/linux_errno.inc>
93
94
SDT_PROVIDER_DECLARE(fusefs);
95
/*
96
* Fuse trace probe:
97
* arg0: verbosity. Higher numbers give more verbose messages
98
* arg1: Textual message
99
*/
100
SDT_PROBE_DEFINE2(fusefs, , device, trace, "int", "char*");
101
102
static struct cdev *fuse_dev;
103
104
static d_kqfilter_t fuse_device_filter;
105
static d_open_t fuse_device_open;
106
static d_poll_t fuse_device_poll;
107
static d_read_t fuse_device_read;
108
static d_write_t fuse_device_write;
109
110
static struct cdevsw fuse_device_cdevsw = {
111
.d_kqfilter = fuse_device_filter,
112
.d_open = fuse_device_open,
113
.d_name = "fuse",
114
.d_poll = fuse_device_poll,
115
.d_read = fuse_device_read,
116
.d_write = fuse_device_write,
117
.d_version = D_VERSION,
118
};
119
120
static int fuse_device_filt_read(struct knote *kn, long hint);
121
static int fuse_device_filt_write(struct knote *kn, long hint);
122
static void fuse_device_filt_detach(struct knote *kn);
123
124
static const struct filterops fuse_device_rfiltops = {
125
.f_isfd = 1,
126
.f_detach = fuse_device_filt_detach,
127
.f_event = fuse_device_filt_read,
128
.f_copy = knote_triv_copy,
129
};
130
131
static const struct filterops fuse_device_wfiltops = {
132
.f_isfd = 1,
133
.f_event = fuse_device_filt_write,
134
.f_copy = knote_triv_copy,
135
};
136
137
/****************************
138
*
139
* >>> Fuse device op defs
140
*
141
****************************/
142
143
static void
144
fdata_dtor(void *arg)
145
{
146
struct fuse_data *fdata;
147
struct fuse_ticket *tick;
148
149
fdata = arg;
150
if (fdata == NULL)
151
return;
152
153
fdata_set_dead(fdata);
154
155
FUSE_LOCK();
156
fuse_lck_mtx_lock(fdata->aw_mtx);
157
/* wakup poll()ers */
158
selwakeuppri(&fdata->ks_rsel, PZERO);
159
/* Don't let syscall handlers wait in vain */
160
while ((tick = fuse_aw_pop(fdata))) {
161
fuse_lck_mtx_lock(tick->tk_aw_mtx);
162
fticket_set_answered(tick);
163
tick->tk_aw_errno = ENOTCONN;
164
wakeup(tick);
165
fuse_lck_mtx_unlock(tick->tk_aw_mtx);
166
FUSE_ASSERT_AW_DONE(tick);
167
fuse_ticket_drop(tick);
168
}
169
fuse_lck_mtx_unlock(fdata->aw_mtx);
170
171
/* Cleanup unsent operations */
172
fuse_lck_mtx_lock(fdata->ms_mtx);
173
while ((tick = fuse_ms_pop(fdata))) {
174
fuse_ticket_drop(tick);
175
}
176
fuse_lck_mtx_unlock(fdata->ms_mtx);
177
FUSE_UNLOCK();
178
179
if (fdata->mp && fdata->dataflags & FSESS_AUTO_UNMOUNT) {
180
vfs_ref(fdata->mp);
181
dounmount(fdata->mp, MNT_FORCE, curthread);
182
}
183
184
fdata_trydestroy(fdata);
185
}
186
187
static int
188
fuse_device_filter(struct cdev *dev, struct knote *kn)
189
{
190
struct fuse_data *data;
191
int error;
192
193
error = devfs_get_cdevpriv((void **)&data);
194
195
if (error == 0 && kn->kn_filter == EVFILT_READ) {
196
kn->kn_fop = &fuse_device_rfiltops;
197
kn->kn_hook = data;
198
knlist_add(&data->ks_rsel.si_note, kn, 0);
199
error = 0;
200
} else if (error == 0 && kn->kn_filter == EVFILT_WRITE) {
201
kn->kn_fop = &fuse_device_wfiltops;
202
error = 0;
203
} else if (error == 0) {
204
error = EXTERROR(EINVAL, "Unsupported kevent filter");
205
kn->kn_data = error;
206
}
207
208
return (error);
209
}
210
211
static void
212
fuse_device_filt_detach(struct knote *kn)
213
{
214
struct fuse_data *data;
215
216
data = (struct fuse_data*)kn->kn_hook;
217
MPASS(data != NULL);
218
knlist_remove(&data->ks_rsel.si_note, kn, 0);
219
kn->kn_hook = NULL;
220
}
221
222
static int
223
fuse_device_filt_read(struct knote *kn, long hint)
224
{
225
struct fuse_data *data;
226
int ready;
227
228
data = (struct fuse_data*)kn->kn_hook;
229
MPASS(data != NULL);
230
231
mtx_assert(&data->ms_mtx, MA_OWNED);
232
if (fdata_get_dead(data)) {
233
kn->kn_flags |= EV_EOF;
234
kn->kn_fflags = ENODEV;
235
kn->kn_data = 1;
236
ready = 1;
237
} else if (STAILQ_FIRST(&data->ms_head)) {
238
MPASS(data->ms_count >= 1);
239
kn->kn_data = data->ms_count;
240
ready = 1;
241
} else {
242
ready = 0;
243
}
244
245
return (ready);
246
}
247
248
static int
249
fuse_device_filt_write(struct knote *kn, long hint)
250
{
251
252
kn->kn_data = 0;
253
254
/* The device is always ready to write, so we return 1*/
255
return (1);
256
}
257
258
/*
259
* Resources are set up on a per-open basis
260
*/
261
static int
262
fuse_device_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
263
{
264
struct fuse_data *fdata;
265
int error;
266
267
SDT_PROBE2(fusefs, , device, trace, 1, "device open");
268
269
fdata = fdata_alloc(dev, td->td_ucred);
270
error = devfs_set_cdevpriv(fdata, fdata_dtor);
271
if (error != 0)
272
fdata_trydestroy(fdata);
273
else
274
SDT_PROBE2(fusefs, , device, trace, 1, "device open success");
275
return (error);
276
}
277
278
int
279
fuse_device_poll(struct cdev *dev, int events, struct thread *td)
280
{
281
struct fuse_data *data;
282
int error, revents = 0;
283
284
error = devfs_get_cdevpriv((void **)&data);
285
if (error != 0)
286
return (events &
287
(POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
288
289
if (events & (POLLIN | POLLRDNORM)) {
290
fuse_lck_mtx_lock(data->ms_mtx);
291
if (fdata_get_dead(data) || STAILQ_FIRST(&data->ms_head))
292
revents |= events & (POLLIN | POLLRDNORM);
293
else
294
selrecord(td, &data->ks_rsel);
295
fuse_lck_mtx_unlock(data->ms_mtx);
296
}
297
if (events & (POLLOUT | POLLWRNORM)) {
298
revents |= events & (POLLOUT | POLLWRNORM);
299
}
300
return (revents);
301
}
302
303
/*
304
* fuse_device_read hangs on the queue of VFS messages.
305
* When it's notified that there is a new one, it picks that and
306
* passes up to the daemon
307
*/
308
int
309
fuse_device_read(struct cdev *dev, struct uio *uio, int ioflag)
310
{
311
int err;
312
struct fuse_data *data;
313
struct fuse_ticket *tick;
314
void *buf;
315
int buflen;
316
317
SDT_PROBE2(fusefs, , device, trace, 1, "fuse device read");
318
319
err = devfs_get_cdevpriv((void **)&data);
320
if (err != 0)
321
return (err);
322
323
fuse_lck_mtx_lock(data->ms_mtx);
324
again:
325
if (fdata_get_dead(data)) {
326
SDT_PROBE2(fusefs, , device, trace, 2,
327
"we know early on that reader should be kicked so we "
328
"don't wait for news");
329
fuse_lck_mtx_unlock(data->ms_mtx);
330
return (EXTERROR(ENODEV, "This FUSE session is about to be closed"));
331
}
332
if (!(tick = fuse_ms_pop(data))) {
333
/* check if we may block */
334
if (ioflag & O_NONBLOCK) {
335
/* get outa here soon */
336
fuse_lck_mtx_unlock(data->ms_mtx);
337
return (EAGAIN);
338
} else {
339
err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0);
340
if (err != 0) {
341
fuse_lck_mtx_unlock(data->ms_mtx);
342
if (fdata_get_dead(data))
343
err = EXTERROR(ENODEV,
344
"This FUSE session is about to be closed");
345
return (err);
346
}
347
tick = fuse_ms_pop(data);
348
}
349
}
350
if (!tick) {
351
/*
352
* We can get here if fuse daemon suddenly terminates,
353
* eg, by being hit by a SIGKILL
354
* -- and some other cases, too, tho not totally clear, when
355
* (cv_signal/wakeup_one signals the whole process ?)
356
*/
357
SDT_PROBE2(fusefs, , device, trace, 1, "no message on thread");
358
goto again;
359
}
360
fuse_lck_mtx_unlock(data->ms_mtx);
361
362
if (fdata_get_dead(data)) {
363
/*
364
* somebody somewhere -- eg., umount routine --
365
* wants this liaison finished off
366
*/
367
SDT_PROBE2(fusefs, , device, trace, 2,
368
"reader is to be sacked");
369
if (tick) {
370
SDT_PROBE2(fusefs, , device, trace, 2, "weird -- "
371
"\"kick\" is set tho there is message");
372
FUSE_ASSERT_MS_DONE(tick);
373
fuse_ticket_drop(tick);
374
}
375
/* This should make the daemon get off of us */
376
return (EXTERROR(ENODEV, "This FUSE session is about to be closed"));
377
}
378
SDT_PROBE2(fusefs, , device, trace, 1,
379
"fuse device read message successfully");
380
381
buf = tick->tk_ms_fiov.base;
382
buflen = tick->tk_ms_fiov.len;
383
384
/*
385
* Why not ban mercilessly stupid daemons who can't keep up
386
* with us? (There is no much use of a partial read here...)
387
*/
388
/*
389
* XXX note that in such cases Linux FUSE throws EIO at the
390
* syscall invoker and stands back to the message queue. The
391
* rationale should be made clear (and possibly adopt that
392
* behaviour). Keeping the current scheme at least makes
393
* fallacy as loud as possible...
394
*/
395
if (uio->uio_resid < buflen) {
396
fdata_set_dead(data);
397
SDT_PROBE2(fusefs, , device, trace, 2,
398
"daemon is stupid, kick it off...");
399
err = EXTERROR(ENODEV, "Partial read attempted");
400
} else {
401
err = uiomove(buf, buflen, uio);
402
}
403
404
FUSE_ASSERT_MS_DONE(tick);
405
fuse_ticket_drop(tick);
406
407
return (err);
408
}
409
410
static inline int
411
fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio)
412
{
413
if (uio->uio_resid + sizeof(struct fuse_out_header) != ohead->len) {
414
SDT_PROBE2(fusefs, , device, trace, 1,
415
"Format error: body size "
416
"differs from size claimed by header");
417
return (EXTERROR(EINVAL, "Format error: body size "
418
"differs from size claimed by header"));
419
}
420
if (uio->uio_resid && ohead->unique != 0 && ohead->error) {
421
SDT_PROBE2(fusefs, , device, trace, 1,
422
"Format error: non zero error but message had a body");
423
return (EXTERROR(EINVAL, "Format error: non zero error, "
424
"but message had a body"));
425
}
426
427
return (0);
428
}
429
430
SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_notify,
431
"struct fuse_out_header*");
432
SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_missing_ticket,
433
"uint64_t");
434
SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_found,
435
"struct fuse_ticket*");
436
/*
437
* fuse_device_write first reads the header sent by the daemon.
438
* If that's OK, looks up ticket/callback node by the unique id seen in header.
439
* If the callback node contains a handler function, the uio is passed over
440
* that.
441
*/
442
static int
443
fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
444
{
445
struct fuse_out_header ohead;
446
int err = 0;
447
struct fuse_data *data;
448
struct mount *mp;
449
struct fuse_ticket *tick, *itick, *x_tick;
450
int found = 0;
451
452
err = devfs_get_cdevpriv((void **)&data);
453
if (err != 0)
454
return (err);
455
456
if (uio->uio_resid < sizeof(struct fuse_out_header)) {
457
SDT_PROBE2(fusefs, , device, trace, 1,
458
"fuse_device_write got less than a header!");
459
fdata_set_dead(data);
460
return (EXTERROR(EINVAL, "fuse_device_write got less than a header!"));
461
}
462
if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0)
463
return (err);
464
465
if (data->linux_errnos != 0 && ohead.error != 0) {
466
err = -ohead.error;
467
if (err < 0 || err >= nitems(linux_to_bsd_errtbl))
468
return (EXTERROR(EINVAL, "Unknown Linux errno", err));
469
470
/* '-', because it will get flipped again below */
471
ohead.error = -linux_to_bsd_errtbl[err];
472
}
473
474
/*
475
* We check header information (which is redundant) and compare it
476
* with what we see. If we see some inconsistency we discard the
477
* whole answer and proceed on as if it had never existed. In
478
* particular, no pretender will be woken up, regardless the
479
* "unique" value in the header.
480
*/
481
if ((err = fuse_ohead_audit(&ohead, uio))) {
482
fdata_set_dead(data);
483
return (err);
484
}
485
/* Pass stuff over to callback if there is one installed */
486
487
/* Looking for ticket with the unique id of header */
488
fuse_lck_mtx_lock(data->aw_mtx);
489
TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link,
490
x_tick) {
491
if (tick->tk_unique == ohead.unique) {
492
SDT_PROBE1(fusefs, , device, fuse_device_write_found,
493
tick);
494
found = 1;
495
fuse_aw_remove(tick);
496
break;
497
}
498
}
499
if (found && tick->irq_unique > 0) {
500
/*
501
* Discard the FUSE_INTERRUPT ticket that tried to interrupt
502
* this operation
503
*/
504
TAILQ_FOREACH_SAFE(itick, &data->aw_head, tk_aw_link,
505
x_tick) {
506
if (itick->tk_unique == tick->irq_unique) {
507
fuse_aw_remove(itick);
508
fuse_ticket_drop(itick);
509
break;
510
}
511
}
512
tick->irq_unique = 0;
513
}
514
fuse_lck_mtx_unlock(data->aw_mtx);
515
516
if (found) {
517
if (tick->tk_aw_handler) {
518
/*
519
* We found a callback with proper handler. In this
520
* case the out header will be 0wnd by the callback,
521
* so the fun of freeing that is left for her.
522
* (Then, by all chance, she'll just get that's done
523
* via ticket_drop(), so no manual mucking
524
* around...)
525
*/
526
SDT_PROBE2(fusefs, , device, trace, 1,
527
"pass ticket to a callback");
528
/* Sanitize the linuxism of negative errnos */
529
ohead.error *= -1;
530
if (ohead.error < 0 || ohead.error > ELAST) {
531
/* Illegal error code */
532
ohead.error = EIO;
533
memcpy(&tick->tk_aw_ohead, &ohead,
534
sizeof(ohead));
535
tick->tk_aw_handler(tick, uio);
536
err = EXTERROR(EINVAL, "Unknown errno", ohead.error);
537
} else {
538
memcpy(&tick->tk_aw_ohead, &ohead,
539
sizeof(ohead));
540
err = tick->tk_aw_handler(tick, uio);
541
}
542
} else {
543
/* pretender doesn't wanna do anything with answer */
544
SDT_PROBE2(fusefs, , device, trace, 1,
545
"stuff devalidated, so we drop it");
546
}
547
548
/*
549
* As aw_mtx was not held during the callback execution the
550
* ticket may have been inserted again. However, this is safe
551
* because fuse_ticket_drop() will deal with refcount anyway.
552
*/
553
fuse_ticket_drop(tick);
554
} else if (ohead.unique == 0){
555
/* unique == 0 means asynchronous notification */
556
SDT_PROBE1(fusefs, , device, fuse_device_write_notify, &ohead);
557
if (data->mp == NULL) {
558
SDT_PROBE2(fusefs, , device, trace, 1,
559
"asynchronous notification before mount"
560
" or after unmount");
561
return (EXTERROR(ENODEV,
562
"This FUSE session is not mounted"));
563
}
564
mp = data->mp;
565
vfs_ref(mp);
566
err = vfs_busy(mp, 0);
567
vfs_rel(mp);
568
if (err)
569
return (err);
570
571
switch (ohead.error) {
572
case FUSE_NOTIFY_INVAL_ENTRY:
573
err = fuse_internal_invalidate_entry(mp, uio);
574
break;
575
case FUSE_NOTIFY_INVAL_INODE:
576
err = fuse_internal_invalidate_inode(mp, uio);
577
break;
578
case FUSE_NOTIFY_RETRIEVE:
579
case FUSE_NOTIFY_STORE:
580
/*
581
* Unimplemented. I don't know of any file systems
582
* that use them, and the protocol isn't sound anyway,
583
* since the notification messages don't include the
584
* inode's generation number. Without that, it's
585
* possible to manipulate the cache of the wrong vnode.
586
* Finally, it's not defined what this message should
587
* do for a file with dirty cache.
588
*/
589
case FUSE_NOTIFY_POLL:
590
/* Unimplemented. See comments in fuse_vnops */
591
default:
592
/* Not implemented */
593
err = EXTERROR(ENOSYS, "Unimplemented FUSE notification code",
594
ohead.error);
595
}
596
vfs_unbusy(mp);
597
} else {
598
/* no callback at all! */
599
SDT_PROBE1(fusefs, , device, fuse_device_write_missing_ticket,
600
ohead.unique);
601
if (ohead.error == -EAGAIN) {
602
/*
603
* This was probably a response to a FUSE_INTERRUPT
604
* operation whose original operation is already
605
* complete. We can't store FUSE_INTERRUPT tickets
606
* indefinitely because their responses are optional.
607
* So we delete them when the original operation
608
* completes. And sadly the fuse_header_out doesn't
609
* identify the opcode, so we have to guess.
610
*/
611
err = 0;
612
} else {
613
err = EXTERROR(EINVAL, "FUSE ticket is missing");
614
}
615
}
616
617
return (err);
618
}
619
620
int
621
fuse_device_init(void)
622
{
623
624
fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR,
625
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, "fuse");
626
if (fuse_dev == NULL)
627
return (ENOMEM);
628
return (0);
629
}
630
631
void
632
fuse_device_destroy(void)
633
{
634
635
MPASS(fuse_dev != NULL);
636
destroy_dev(fuse_dev);
637
}
638
639