Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/fs/fuse/fuse_device.c
39586 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 2007-2009 Google Inc.
5
* All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions are
9
* met:
10
*
11
* * Redistributions of source code must retain the above copyright
12
* notice, this list of conditions and the following disclaimer.
13
* * Redistributions in binary form must reproduce the above
14
* copyright notice, this list of conditions and the following disclaimer
15
* in the documentation and/or other materials provided with the
16
* distribution.
17
* * Neither the name of Google Inc. nor the names of its
18
* contributors may be used to endorse or promote products derived from
19
* this software without specific prior written permission.
20
*
21
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
*
33
* Copyright (C) 2005 Csaba Henk.
34
* All rights reserved.
35
*
36
* Copyright (c) 2019 The FreeBSD Foundation
37
*
38
* Portions of this software were developed by BFF Storage Systems, LLC under
39
* sponsorship from the FreeBSD Foundation.
40
*
41
* Redistribution and use in source and binary forms, with or without
42
* modification, are permitted provided that the following conditions
43
* are met:
44
* 1. Redistributions of source code must retain the above copyright
45
* notice, this list of conditions and the following disclaimer.
46
* 2. Redistributions in binary form must reproduce the above copyright
47
* notice, this list of conditions and the following disclaimer in the
48
* documentation and/or other materials provided with the distribution.
49
*
50
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
51
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
54
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60
* SUCH DAMAGE.
61
*/
62
63
#include <sys/types.h>
64
#include <sys/param.h>
65
#include <sys/module.h>
66
#include <sys/systm.h>
67
#include <sys/errno.h>
68
#include <sys/param.h>
69
#include <sys/kernel.h>
70
#include <sys/conf.h>
71
#include <sys/uio.h>
72
#include <sys/malloc.h>
73
#include <sys/queue.h>
74
#include <sys/lock.h>
75
#include <sys/sx.h>
76
#include <sys/mutex.h>
77
#include <sys/proc.h>
78
#include <sys/mount.h>
79
#include <sys/sdt.h>
80
#include <sys/stat.h>
81
#include <sys/fcntl.h>
82
#include <sys/sysctl.h>
83
#include <sys/poll.h>
84
#include <sys/selinfo.h>
85
#define EXTERR_CATEGORY EXTERR_CAT_FUSE
86
#include <sys/exterrvar.h>
87
88
#include "fuse.h"
89
#include "fuse_internal.h"
90
#include "fuse_ipc.h"
91
92
#include <compat/linux/linux_errno.h>
93
#include <compat/linux/linux_errno.inc>
94
95
SDT_PROVIDER_DECLARE(fusefs);
96
/*
97
* Fuse trace probe:
98
* arg0: verbosity. Higher numbers give more verbose messages
99
* arg1: Textual message
100
*/
101
SDT_PROBE_DEFINE2(fusefs, , device, trace, "int", "char*");
102
103
static struct cdev *fuse_dev;
104
105
static d_kqfilter_t fuse_device_filter;
106
static d_open_t fuse_device_open;
107
static d_poll_t fuse_device_poll;
108
static d_read_t fuse_device_read;
109
static d_write_t fuse_device_write;
110
111
static struct cdevsw fuse_device_cdevsw = {
112
.d_kqfilter = fuse_device_filter,
113
.d_open = fuse_device_open,
114
.d_name = "fuse",
115
.d_poll = fuse_device_poll,
116
.d_read = fuse_device_read,
117
.d_write = fuse_device_write,
118
.d_version = D_VERSION,
119
};
120
121
static int fuse_device_filt_read(struct knote *kn, long hint);
122
static int fuse_device_filt_write(struct knote *kn, long hint);
123
static void fuse_device_filt_detach(struct knote *kn);
124
125
static const struct filterops fuse_device_rfiltops = {
126
.f_isfd = 1,
127
.f_detach = fuse_device_filt_detach,
128
.f_event = fuse_device_filt_read,
129
};
130
131
static const struct filterops fuse_device_wfiltops = {
132
.f_isfd = 1,
133
.f_event = fuse_device_filt_write,
134
};
135
136
/****************************
137
*
138
* >>> Fuse device op defs
139
*
140
****************************/
141
142
static void
143
fdata_dtor(void *arg)
144
{
145
struct fuse_data *fdata;
146
struct fuse_ticket *tick;
147
148
fdata = arg;
149
if (fdata == NULL)
150
return;
151
152
fdata_set_dead(fdata);
153
154
FUSE_LOCK();
155
fuse_lck_mtx_lock(fdata->aw_mtx);
156
/* wakup poll()ers */
157
selwakeuppri(&fdata->ks_rsel, PZERO);
158
/* Don't let syscall handlers wait in vain */
159
while ((tick = fuse_aw_pop(fdata))) {
160
fuse_lck_mtx_lock(tick->tk_aw_mtx);
161
fticket_set_answered(tick);
162
tick->tk_aw_errno = ENOTCONN;
163
wakeup(tick);
164
fuse_lck_mtx_unlock(tick->tk_aw_mtx);
165
FUSE_ASSERT_AW_DONE(tick);
166
fuse_ticket_drop(tick);
167
}
168
fuse_lck_mtx_unlock(fdata->aw_mtx);
169
170
/* Cleanup unsent operations */
171
fuse_lck_mtx_lock(fdata->ms_mtx);
172
while ((tick = fuse_ms_pop(fdata))) {
173
fuse_ticket_drop(tick);
174
}
175
fuse_lck_mtx_unlock(fdata->ms_mtx);
176
FUSE_UNLOCK();
177
178
fdata_trydestroy(fdata);
179
}
180
181
static int
182
fuse_device_filter(struct cdev *dev, struct knote *kn)
183
{
184
struct fuse_data *data;
185
int error;
186
187
error = devfs_get_cdevpriv((void **)&data);
188
189
if (error == 0 && kn->kn_filter == EVFILT_READ) {
190
kn->kn_fop = &fuse_device_rfiltops;
191
kn->kn_hook = data;
192
knlist_add(&data->ks_rsel.si_note, kn, 0);
193
error = 0;
194
} else if (error == 0 && kn->kn_filter == EVFILT_WRITE) {
195
kn->kn_fop = &fuse_device_wfiltops;
196
error = 0;
197
} else if (error == 0) {
198
error = EXTERROR(EINVAL, "Unsupported kevent filter");
199
kn->kn_data = error;
200
}
201
202
return (error);
203
}
204
205
static void
206
fuse_device_filt_detach(struct knote *kn)
207
{
208
struct fuse_data *data;
209
210
data = (struct fuse_data*)kn->kn_hook;
211
MPASS(data != NULL);
212
knlist_remove(&data->ks_rsel.si_note, kn, 0);
213
kn->kn_hook = NULL;
214
}
215
216
static int
217
fuse_device_filt_read(struct knote *kn, long hint)
218
{
219
struct fuse_data *data;
220
int ready;
221
222
data = (struct fuse_data*)kn->kn_hook;
223
MPASS(data != NULL);
224
225
mtx_assert(&data->ms_mtx, MA_OWNED);
226
if (fdata_get_dead(data)) {
227
kn->kn_flags |= EV_EOF;
228
kn->kn_fflags = ENODEV;
229
kn->kn_data = 1;
230
ready = 1;
231
} else if (STAILQ_FIRST(&data->ms_head)) {
232
MPASS(data->ms_count >= 1);
233
kn->kn_data = data->ms_count;
234
ready = 1;
235
} else {
236
ready = 0;
237
}
238
239
return (ready);
240
}
241
242
static int
243
fuse_device_filt_write(struct knote *kn, long hint)
244
{
245
246
kn->kn_data = 0;
247
248
/* The device is always ready to write, so we return 1*/
249
return (1);
250
}
251
252
/*
253
* Resources are set up on a per-open basis
254
*/
255
static int
256
fuse_device_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
257
{
258
struct fuse_data *fdata;
259
int error;
260
261
SDT_PROBE2(fusefs, , device, trace, 1, "device open");
262
263
fdata = fdata_alloc(dev, td->td_ucred);
264
error = devfs_set_cdevpriv(fdata, fdata_dtor);
265
if (error != 0)
266
fdata_trydestroy(fdata);
267
else
268
SDT_PROBE2(fusefs, , device, trace, 1, "device open success");
269
return (error);
270
}
271
272
int
273
fuse_device_poll(struct cdev *dev, int events, struct thread *td)
274
{
275
struct fuse_data *data;
276
int error, revents = 0;
277
278
error = devfs_get_cdevpriv((void **)&data);
279
if (error != 0)
280
return (events &
281
(POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
282
283
if (events & (POLLIN | POLLRDNORM)) {
284
fuse_lck_mtx_lock(data->ms_mtx);
285
if (fdata_get_dead(data) || STAILQ_FIRST(&data->ms_head))
286
revents |= events & (POLLIN | POLLRDNORM);
287
else
288
selrecord(td, &data->ks_rsel);
289
fuse_lck_mtx_unlock(data->ms_mtx);
290
}
291
if (events & (POLLOUT | POLLWRNORM)) {
292
revents |= events & (POLLOUT | POLLWRNORM);
293
}
294
return (revents);
295
}
296
297
/*
298
* fuse_device_read hangs on the queue of VFS messages.
299
* When it's notified that there is a new one, it picks that and
300
* passes up to the daemon
301
*/
302
int
303
fuse_device_read(struct cdev *dev, struct uio *uio, int ioflag)
304
{
305
int err;
306
struct fuse_data *data;
307
struct fuse_ticket *tick;
308
void *buf;
309
int buflen;
310
311
SDT_PROBE2(fusefs, , device, trace, 1, "fuse device read");
312
313
err = devfs_get_cdevpriv((void **)&data);
314
if (err != 0)
315
return (err);
316
317
fuse_lck_mtx_lock(data->ms_mtx);
318
again:
319
if (fdata_get_dead(data)) {
320
SDT_PROBE2(fusefs, , device, trace, 2,
321
"we know early on that reader should be kicked so we "
322
"don't wait for news");
323
fuse_lck_mtx_unlock(data->ms_mtx);
324
return (EXTERROR(ENODEV, "This FUSE session is about to be closed"));
325
}
326
if (!(tick = fuse_ms_pop(data))) {
327
/* check if we may block */
328
if (ioflag & O_NONBLOCK) {
329
/* get outa here soon */
330
fuse_lck_mtx_unlock(data->ms_mtx);
331
return (EAGAIN);
332
} else {
333
err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0);
334
if (err != 0) {
335
fuse_lck_mtx_unlock(data->ms_mtx);
336
if (fdata_get_dead(data))
337
err = EXTERROR(ENODEV,
338
"This FUSE session is about to be closed");
339
return (err);
340
}
341
tick = fuse_ms_pop(data);
342
}
343
}
344
if (!tick) {
345
/*
346
* We can get here if fuse daemon suddenly terminates,
347
* eg, by being hit by a SIGKILL
348
* -- and some other cases, too, tho not totally clear, when
349
* (cv_signal/wakeup_one signals the whole process ?)
350
*/
351
SDT_PROBE2(fusefs, , device, trace, 1, "no message on thread");
352
goto again;
353
}
354
fuse_lck_mtx_unlock(data->ms_mtx);
355
356
if (fdata_get_dead(data)) {
357
/*
358
* somebody somewhere -- eg., umount routine --
359
* wants this liaison finished off
360
*/
361
SDT_PROBE2(fusefs, , device, trace, 2,
362
"reader is to be sacked");
363
if (tick) {
364
SDT_PROBE2(fusefs, , device, trace, 2, "weird -- "
365
"\"kick\" is set tho there is message");
366
FUSE_ASSERT_MS_DONE(tick);
367
fuse_ticket_drop(tick);
368
}
369
/* This should make the daemon get off of us */
370
return (EXTERROR(ENODEV, "This FUSE session is about to be closed"));
371
}
372
SDT_PROBE2(fusefs, , device, trace, 1,
373
"fuse device read message successfully");
374
375
buf = tick->tk_ms_fiov.base;
376
buflen = tick->tk_ms_fiov.len;
377
378
/*
379
* Why not ban mercilessly stupid daemons who can't keep up
380
* with us? (There is no much use of a partial read here...)
381
*/
382
/*
383
* XXX note that in such cases Linux FUSE throws EIO at the
384
* syscall invoker and stands back to the message queue. The
385
* rationale should be made clear (and possibly adopt that
386
* behaviour). Keeping the current scheme at least makes
387
* fallacy as loud as possible...
388
*/
389
if (uio->uio_resid < buflen) {
390
fdata_set_dead(data);
391
SDT_PROBE2(fusefs, , device, trace, 2,
392
"daemon is stupid, kick it off...");
393
err = EXTERROR(ENODEV, "Partial read attempted");
394
} else {
395
err = uiomove(buf, buflen, uio);
396
}
397
398
FUSE_ASSERT_MS_DONE(tick);
399
fuse_ticket_drop(tick);
400
401
return (err);
402
}
403
404
static inline int
405
fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio)
406
{
407
if (uio->uio_resid + sizeof(struct fuse_out_header) != ohead->len) {
408
SDT_PROBE2(fusefs, , device, trace, 1,
409
"Format error: body size "
410
"differs from size claimed by header");
411
return (EXTERROR(EINVAL, "Format error: body size "
412
"differs from size claimed by header"));
413
}
414
if (uio->uio_resid && ohead->unique != 0 && ohead->error) {
415
SDT_PROBE2(fusefs, , device, trace, 1,
416
"Format error: non zero error but message had a body");
417
return (EXTERROR(EINVAL, "Format error: non zero error, "
418
"but message had a body"));
419
}
420
421
return (0);
422
}
423
424
SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_notify,
425
"struct fuse_out_header*");
426
SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_missing_ticket,
427
"uint64_t");
428
SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_found,
429
"struct fuse_ticket*");
430
/*
431
* fuse_device_write first reads the header sent by the daemon.
432
* If that's OK, looks up ticket/callback node by the unique id seen in header.
433
* If the callback node contains a handler function, the uio is passed over
434
* that.
435
*/
436
static int
437
fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag)
438
{
439
struct fuse_out_header ohead;
440
int err = 0;
441
struct fuse_data *data;
442
struct mount *mp;
443
struct fuse_ticket *tick, *itick, *x_tick;
444
int found = 0;
445
446
err = devfs_get_cdevpriv((void **)&data);
447
if (err != 0)
448
return (err);
449
450
if (uio->uio_resid < sizeof(struct fuse_out_header)) {
451
SDT_PROBE2(fusefs, , device, trace, 1,
452
"fuse_device_write got less than a header!");
453
fdata_set_dead(data);
454
return (EXTERROR(EINVAL, "fuse_device_write got less than a header!"));
455
}
456
if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0)
457
return (err);
458
459
if (data->linux_errnos != 0 && ohead.error != 0) {
460
err = -ohead.error;
461
if (err < 0 || err >= nitems(linux_to_bsd_errtbl))
462
return (EXTERROR(EINVAL, "Unknown Linux errno", err));
463
464
/* '-', because it will get flipped again below */
465
ohead.error = -linux_to_bsd_errtbl[err];
466
}
467
468
/*
469
* We check header information (which is redundant) and compare it
470
* with what we see. If we see some inconsistency we discard the
471
* whole answer and proceed on as if it had never existed. In
472
* particular, no pretender will be woken up, regardless the
473
* "unique" value in the header.
474
*/
475
if ((err = fuse_ohead_audit(&ohead, uio))) {
476
fdata_set_dead(data);
477
return (err);
478
}
479
/* Pass stuff over to callback if there is one installed */
480
481
/* Looking for ticket with the unique id of header */
482
fuse_lck_mtx_lock(data->aw_mtx);
483
TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link,
484
x_tick) {
485
if (tick->tk_unique == ohead.unique) {
486
SDT_PROBE1(fusefs, , device, fuse_device_write_found,
487
tick);
488
found = 1;
489
fuse_aw_remove(tick);
490
break;
491
}
492
}
493
if (found && tick->irq_unique > 0) {
494
/*
495
* Discard the FUSE_INTERRUPT ticket that tried to interrupt
496
* this operation
497
*/
498
TAILQ_FOREACH_SAFE(itick, &data->aw_head, tk_aw_link,
499
x_tick) {
500
if (itick->tk_unique == tick->irq_unique) {
501
fuse_aw_remove(itick);
502
fuse_ticket_drop(itick);
503
break;
504
}
505
}
506
tick->irq_unique = 0;
507
}
508
fuse_lck_mtx_unlock(data->aw_mtx);
509
510
if (found) {
511
if (tick->tk_aw_handler) {
512
/*
513
* We found a callback with proper handler. In this
514
* case the out header will be 0wnd by the callback,
515
* so the fun of freeing that is left for her.
516
* (Then, by all chance, she'll just get that's done
517
* via ticket_drop(), so no manual mucking
518
* around...)
519
*/
520
SDT_PROBE2(fusefs, , device, trace, 1,
521
"pass ticket to a callback");
522
/* Sanitize the linuxism of negative errnos */
523
ohead.error *= -1;
524
if (ohead.error < 0 || ohead.error > ELAST) {
525
/* Illegal error code */
526
ohead.error = EIO;
527
memcpy(&tick->tk_aw_ohead, &ohead,
528
sizeof(ohead));
529
tick->tk_aw_handler(tick, uio);
530
err = EXTERROR(EINVAL, "Unknown errno", ohead.error);
531
} else {
532
memcpy(&tick->tk_aw_ohead, &ohead,
533
sizeof(ohead));
534
err = tick->tk_aw_handler(tick, uio);
535
}
536
} else {
537
/* pretender doesn't wanna do anything with answer */
538
SDT_PROBE2(fusefs, , device, trace, 1,
539
"stuff devalidated, so we drop it");
540
}
541
542
/*
543
* As aw_mtx was not held during the callback execution the
544
* ticket may have been inserted again. However, this is safe
545
* because fuse_ticket_drop() will deal with refcount anyway.
546
*/
547
fuse_ticket_drop(tick);
548
} else if (ohead.unique == 0){
549
/* unique == 0 means asynchronous notification */
550
SDT_PROBE1(fusefs, , device, fuse_device_write_notify, &ohead);
551
mp = data->mp;
552
vfs_ref(mp);
553
err = vfs_busy(mp, 0);
554
vfs_rel(mp);
555
if (err)
556
return (err);
557
558
switch (ohead.error) {
559
case FUSE_NOTIFY_INVAL_ENTRY:
560
err = fuse_internal_invalidate_entry(mp, uio);
561
break;
562
case FUSE_NOTIFY_INVAL_INODE:
563
err = fuse_internal_invalidate_inode(mp, uio);
564
break;
565
case FUSE_NOTIFY_RETRIEVE:
566
case FUSE_NOTIFY_STORE:
567
/*
568
* Unimplemented. I don't know of any file systems
569
* that use them, and the protocol isn't sound anyway,
570
* since the notification messages don't include the
571
* inode's generation number. Without that, it's
572
* possible to manipulate the cache of the wrong vnode.
573
* Finally, it's not defined what this message should
574
* do for a file with dirty cache.
575
*/
576
case FUSE_NOTIFY_POLL:
577
/* Unimplemented. See comments in fuse_vnops */
578
default:
579
/* Not implemented */
580
err = EXTERROR(ENOSYS, "Unimplemented FUSE notification code",
581
ohead.error);
582
}
583
vfs_unbusy(mp);
584
} else {
585
/* no callback at all! */
586
SDT_PROBE1(fusefs, , device, fuse_device_write_missing_ticket,
587
ohead.unique);
588
if (ohead.error == -EAGAIN) {
589
/*
590
* This was probably a response to a FUSE_INTERRUPT
591
* operation whose original operation is already
592
* complete. We can't store FUSE_INTERRUPT tickets
593
* indefinitely because their responses are optional.
594
* So we delete them when the original operation
595
* completes. And sadly the fuse_header_out doesn't
596
* identify the opcode, so we have to guess.
597
*/
598
err = 0;
599
} else {
600
err = EXTERROR(EINVAL, "FUSE ticket is missing");
601
}
602
}
603
604
return (err);
605
}
606
607
int
608
fuse_device_init(void)
609
{
610
611
fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR,
612
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH, "fuse");
613
if (fuse_dev == NULL)
614
return (ENOMEM);
615
return (0);
616
}
617
618
void
619
fuse_device_destroy(void)
620
{
621
622
MPASS(fuse_dev != NULL);
623
destroy_dev(fuse_dev);
624
}
625
626