Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/ceph/export.c
26282 views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <linux/ceph/ceph_debug.h>
3
4
#include <linux/exportfs.h>
5
#include <linux/slab.h>
6
#include <linux/unaligned.h>
7
8
#include "super.h"
9
#include "mds_client.h"
10
#include "crypto.h"
11
12
/*
13
* Basic fh
14
*/
15
struct ceph_nfs_fh {
16
u64 ino;
17
} __attribute__ ((packed));
18
19
/*
20
* Larger fh that includes parent ino.
21
*/
22
struct ceph_nfs_confh {
23
u64 ino, parent_ino;
24
} __attribute__ ((packed));
25
26
/*
27
* fh for snapped inode
28
*/
29
struct ceph_nfs_snapfh {
30
u64 ino;
31
u64 snapid;
32
u64 parent_ino;
33
u32 hash;
34
} __attribute__ ((packed));
35
36
#define BYTES_PER_U32 (sizeof(u32))
37
#define CEPH_FH_BASIC_SIZE \
38
(sizeof(struct ceph_nfs_fh) / BYTES_PER_U32)
39
#define CEPH_FH_WITH_PARENT_SIZE \
40
(sizeof(struct ceph_nfs_confh) / BYTES_PER_U32)
41
#define CEPH_FH_SNAPPED_INODE_SIZE \
42
(sizeof(struct ceph_nfs_snapfh) / BYTES_PER_U32)
43
44
static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
45
struct inode *parent_inode)
46
{
47
struct ceph_client *cl = ceph_inode_to_client(inode);
48
static const int snap_handle_length = CEPH_FH_SNAPPED_INODE_SIZE;
49
struct ceph_nfs_snapfh *sfh = (void *)rawfh;
50
u64 snapid = ceph_snap(inode);
51
int ret;
52
bool no_parent = true;
53
54
if (*max_len < snap_handle_length) {
55
*max_len = snap_handle_length;
56
ret = FILEID_INVALID;
57
goto out;
58
}
59
60
ret = -EINVAL;
61
if (snapid != CEPH_SNAPDIR) {
62
struct inode *dir;
63
struct dentry *dentry = d_find_alias(inode);
64
if (!dentry)
65
goto out;
66
67
rcu_read_lock();
68
dir = d_inode_rcu(dentry->d_parent);
69
if (ceph_snap(dir) != CEPH_SNAPDIR) {
70
sfh->parent_ino = ceph_ino(dir);
71
sfh->hash = ceph_dentry_hash(dir, dentry);
72
no_parent = false;
73
}
74
rcu_read_unlock();
75
dput(dentry);
76
}
77
78
if (no_parent) {
79
if (!S_ISDIR(inode->i_mode))
80
goto out;
81
sfh->parent_ino = sfh->ino;
82
sfh->hash = 0;
83
}
84
sfh->ino = ceph_ino(inode);
85
sfh->snapid = snapid;
86
87
*max_len = snap_handle_length;
88
ret = FILEID_BTRFS_WITH_PARENT;
89
out:
90
doutc(cl, "%p %llx.%llx ret=%d\n", inode, ceph_vinop(inode), ret);
91
return ret;
92
}
93
94
static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
95
struct inode *parent_inode)
96
{
97
struct ceph_client *cl = ceph_inode_to_client(inode);
98
static const int handle_length = CEPH_FH_BASIC_SIZE;
99
static const int connected_handle_length = CEPH_FH_WITH_PARENT_SIZE;
100
int type;
101
102
if (ceph_snap(inode) != CEPH_NOSNAP)
103
return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
104
105
if (parent_inode && (*max_len < connected_handle_length)) {
106
*max_len = connected_handle_length;
107
return FILEID_INVALID;
108
} else if (*max_len < handle_length) {
109
*max_len = handle_length;
110
return FILEID_INVALID;
111
}
112
113
if (parent_inode) {
114
struct ceph_nfs_confh *cfh = (void *)rawfh;
115
doutc(cl, "%p %llx.%llx with parent %p %llx.%llx\n", inode,
116
ceph_vinop(inode), parent_inode, ceph_vinop(parent_inode));
117
cfh->ino = ceph_ino(inode);
118
cfh->parent_ino = ceph_ino(parent_inode);
119
*max_len = connected_handle_length;
120
type = FILEID_INO32_GEN_PARENT;
121
} else {
122
struct ceph_nfs_fh *fh = (void *)rawfh;
123
doutc(cl, "%p %llx.%llx\n", inode, ceph_vinop(inode));
124
fh->ino = ceph_ino(inode);
125
*max_len = handle_length;
126
type = FILEID_INO32_GEN;
127
}
128
return type;
129
}
130
131
static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
132
{
133
struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
134
struct inode *inode;
135
struct ceph_vino vino;
136
int err;
137
138
vino.ino = ino;
139
vino.snap = CEPH_NOSNAP;
140
141
if (ceph_vino_is_reserved(vino))
142
return ERR_PTR(-ESTALE);
143
144
inode = ceph_find_inode(sb, vino);
145
if (!inode) {
146
struct ceph_mds_request *req;
147
int mask;
148
149
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
150
USE_ANY_MDS);
151
if (IS_ERR(req))
152
return ERR_CAST(req);
153
154
mask = CEPH_STAT_CAP_INODE;
155
if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
156
mask |= CEPH_CAP_XATTR_SHARED;
157
req->r_args.lookupino.mask = cpu_to_le32(mask);
158
159
req->r_ino1 = vino;
160
req->r_num_caps = 1;
161
err = ceph_mdsc_do_request(mdsc, NULL, req);
162
inode = req->r_target_inode;
163
if (inode)
164
ihold(inode);
165
ceph_mdsc_put_request(req);
166
if (!inode)
167
return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
168
} else {
169
if (ceph_inode_is_shutdown(inode)) {
170
iput(inode);
171
return ERR_PTR(-ESTALE);
172
}
173
}
174
return inode;
175
}
176
177
struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
178
{
179
struct inode *inode = __lookup_inode(sb, ino);
180
if (IS_ERR(inode))
181
return inode;
182
if (inode->i_nlink == 0) {
183
iput(inode);
184
return ERR_PTR(-ESTALE);
185
}
186
return inode;
187
}
188
189
static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
190
{
191
struct inode *inode = __lookup_inode(sb, ino);
192
struct ceph_inode_info *ci = ceph_inode(inode);
193
int err;
194
195
if (IS_ERR(inode))
196
return ERR_CAST(inode);
197
/* We need LINK caps to reliably check i_nlink */
198
err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
199
if (err) {
200
iput(inode);
201
return ERR_PTR(err);
202
}
203
/* -ESTALE if inode as been unlinked and no file is open */
204
if ((inode->i_nlink == 0) && !__ceph_is_file_opened(ci)) {
205
iput(inode);
206
return ERR_PTR(-ESTALE);
207
}
208
return d_obtain_alias(inode);
209
}
210
211
static struct dentry *__snapfh_to_dentry(struct super_block *sb,
212
struct ceph_nfs_snapfh *sfh,
213
bool want_parent)
214
{
215
struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
216
struct ceph_client *cl = mdsc->fsc->client;
217
struct ceph_mds_request *req;
218
struct inode *inode;
219
struct ceph_vino vino;
220
int mask;
221
int err;
222
bool unlinked = false;
223
224
if (want_parent) {
225
vino.ino = sfh->parent_ino;
226
if (sfh->snapid == CEPH_SNAPDIR)
227
vino.snap = CEPH_NOSNAP;
228
else if (sfh->ino == sfh->parent_ino)
229
vino.snap = CEPH_SNAPDIR;
230
else
231
vino.snap = sfh->snapid;
232
} else {
233
vino.ino = sfh->ino;
234
vino.snap = sfh->snapid;
235
}
236
237
if (ceph_vino_is_reserved(vino))
238
return ERR_PTR(-ESTALE);
239
240
inode = ceph_find_inode(sb, vino);
241
if (inode) {
242
if (ceph_inode_is_shutdown(inode)) {
243
iput(inode);
244
return ERR_PTR(-ESTALE);
245
}
246
return d_obtain_alias(inode);
247
}
248
249
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
250
USE_ANY_MDS);
251
if (IS_ERR(req))
252
return ERR_CAST(req);
253
254
mask = CEPH_STAT_CAP_INODE;
255
if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
256
mask |= CEPH_CAP_XATTR_SHARED;
257
req->r_args.lookupino.mask = cpu_to_le32(mask);
258
if (vino.snap < CEPH_NOSNAP) {
259
req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
260
if (!want_parent && sfh->ino != sfh->parent_ino) {
261
req->r_args.lookupino.parent =
262
cpu_to_le64(sfh->parent_ino);
263
req->r_args.lookupino.hash =
264
cpu_to_le32(sfh->hash);
265
}
266
}
267
268
req->r_ino1 = vino;
269
req->r_num_caps = 1;
270
err = ceph_mdsc_do_request(mdsc, NULL, req);
271
inode = req->r_target_inode;
272
if (inode) {
273
if (vino.snap == CEPH_SNAPDIR) {
274
if (inode->i_nlink == 0)
275
unlinked = true;
276
inode = ceph_get_snapdir(inode);
277
} else if (ceph_snap(inode) == vino.snap) {
278
ihold(inode);
279
} else {
280
/* mds does not support lookup snapped inode */
281
inode = ERR_PTR(-EOPNOTSUPP);
282
}
283
} else {
284
inode = ERR_PTR(-ESTALE);
285
}
286
ceph_mdsc_put_request(req);
287
288
if (want_parent) {
289
doutc(cl, "%llx.%llx\n err=%d\n", vino.ino, vino.snap, err);
290
} else {
291
doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino,
292
vino.snap, sfh->parent_ino, sfh->hash, err);
293
}
294
/* see comments in ceph_get_parent() */
295
return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
296
}
297
298
/*
299
* convert regular fh to dentry
300
*/
301
static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
302
struct fid *fid,
303
int fh_len, int fh_type)
304
{
305
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
306
struct ceph_nfs_fh *fh = (void *)fid->raw;
307
308
if (fh_type == FILEID_BTRFS_WITH_PARENT) {
309
struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
310
return __snapfh_to_dentry(sb, sfh, false);
311
}
312
313
if (fh_type != FILEID_INO32_GEN &&
314
fh_type != FILEID_INO32_GEN_PARENT)
315
return NULL;
316
if (fh_len < sizeof(*fh) / BYTES_PER_U32)
317
return NULL;
318
319
doutc(fsc->client, "%llx\n", fh->ino);
320
return __fh_to_dentry(sb, fh->ino);
321
}
322
323
static struct dentry *__get_parent(struct super_block *sb,
324
struct dentry *child, u64 ino)
325
{
326
struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(sb)->mdsc;
327
struct ceph_mds_request *req;
328
struct inode *inode;
329
int mask;
330
int err;
331
332
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
333
USE_ANY_MDS);
334
if (IS_ERR(req))
335
return ERR_CAST(req);
336
337
if (child) {
338
req->r_inode = d_inode(child);
339
ihold(d_inode(child));
340
} else {
341
req->r_ino1 = (struct ceph_vino) {
342
.ino = ino,
343
.snap = CEPH_NOSNAP,
344
};
345
}
346
347
mask = CEPH_STAT_CAP_INODE;
348
if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
349
mask |= CEPH_CAP_XATTR_SHARED;
350
req->r_args.getattr.mask = cpu_to_le32(mask);
351
352
req->r_num_caps = 1;
353
err = ceph_mdsc_do_request(mdsc, NULL, req);
354
if (err) {
355
ceph_mdsc_put_request(req);
356
return ERR_PTR(err);
357
}
358
359
inode = req->r_target_inode;
360
if (inode)
361
ihold(inode);
362
ceph_mdsc_put_request(req);
363
if (!inode)
364
return ERR_PTR(-ENOENT);
365
366
return d_obtain_alias(inode);
367
}
368
369
static struct dentry *ceph_get_parent(struct dentry *child)
370
{
371
struct inode *inode = d_inode(child);
372
struct ceph_client *cl = ceph_inode_to_client(inode);
373
struct dentry *dn;
374
375
if (ceph_snap(inode) != CEPH_NOSNAP) {
376
struct inode* dir;
377
bool unlinked = false;
378
/* do not support non-directory */
379
if (!d_is_dir(child)) {
380
dn = ERR_PTR(-EINVAL);
381
goto out;
382
}
383
dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
384
if (IS_ERR(dir)) {
385
dn = ERR_CAST(dir);
386
goto out;
387
}
388
/* There can be multiple paths to access snapped inode.
389
* For simplicity, treat snapdir of head inode as parent */
390
if (ceph_snap(inode) != CEPH_SNAPDIR) {
391
struct inode *snapdir = ceph_get_snapdir(dir);
392
if (dir->i_nlink == 0)
393
unlinked = true;
394
iput(dir);
395
if (IS_ERR(snapdir)) {
396
dn = ERR_CAST(snapdir);
397
goto out;
398
}
399
dir = snapdir;
400
}
401
/* If directory has already been deleted, further get_parent
402
* will fail. Do not mark snapdir dentry as disconnected,
403
* this prevents exportfs from doing further get_parent. */
404
if (unlinked)
405
dn = d_obtain_root(dir);
406
else
407
dn = d_obtain_alias(dir);
408
} else {
409
dn = __get_parent(child->d_sb, child, 0);
410
}
411
out:
412
doutc(cl, "child %p %p %llx.%llx err=%ld\n", child, inode,
413
ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
414
return dn;
415
}
416
417
/*
418
* convert regular fh to parent
419
*/
420
static struct dentry *ceph_fh_to_parent(struct super_block *sb,
421
struct fid *fid,
422
int fh_len, int fh_type)
423
{
424
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
425
struct ceph_nfs_confh *cfh = (void *)fid->raw;
426
struct dentry *dentry;
427
428
if (fh_type == FILEID_BTRFS_WITH_PARENT) {
429
struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
430
return __snapfh_to_dentry(sb, sfh, true);
431
}
432
433
if (fh_type != FILEID_INO32_GEN_PARENT)
434
return NULL;
435
if (fh_len < sizeof(*cfh) / BYTES_PER_U32)
436
return NULL;
437
438
doutc(fsc->client, "%llx\n", cfh->parent_ino);
439
dentry = __get_parent(sb, NULL, cfh->ino);
440
if (unlikely(dentry == ERR_PTR(-ENOENT)))
441
dentry = __fh_to_dentry(sb, cfh->parent_ino);
442
return dentry;
443
}
444
445
static int __get_snap_name(struct dentry *parent, char *name,
446
struct dentry *child)
447
{
448
struct inode *inode = d_inode(child);
449
struct inode *dir = d_inode(parent);
450
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
451
struct ceph_mds_request *req = NULL;
452
char *last_name = NULL;
453
unsigned next_offset = 2;
454
int err = -EINVAL;
455
456
if (ceph_ino(inode) != ceph_ino(dir))
457
goto out;
458
if (ceph_snap(inode) == CEPH_SNAPDIR) {
459
if (ceph_snap(dir) == CEPH_NOSNAP) {
460
/*
461
* .get_name() from struct export_operations
462
* assumes that its 'name' parameter is pointing
463
* to a NAME_MAX+1 sized buffer
464
*/
465
strscpy(name, fsc->mount_options->snapdir_name,
466
NAME_MAX + 1);
467
err = 0;
468
}
469
goto out;
470
}
471
if (ceph_snap(dir) != CEPH_SNAPDIR)
472
goto out;
473
474
while (1) {
475
struct ceph_mds_reply_info_parsed *rinfo;
476
struct ceph_mds_reply_dir_entry *rde;
477
int i;
478
479
req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
480
USE_AUTH_MDS);
481
if (IS_ERR(req)) {
482
err = PTR_ERR(req);
483
req = NULL;
484
goto out;
485
}
486
err = ceph_alloc_readdir_reply_buffer(req, inode);
487
if (err)
488
goto out;
489
490
req->r_direct_mode = USE_AUTH_MDS;
491
req->r_readdir_offset = next_offset;
492
req->r_args.readdir.flags =
493
cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
494
if (last_name) {
495
req->r_path2 = last_name;
496
last_name = NULL;
497
}
498
499
req->r_inode = dir;
500
ihold(dir);
501
req->r_dentry = dget(parent);
502
503
inode_lock(dir);
504
err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
505
inode_unlock(dir);
506
507
if (err < 0)
508
goto out;
509
510
rinfo = &req->r_reply_info;
511
for (i = 0; i < rinfo->dir_nr; i++) {
512
rde = rinfo->dir_entries + i;
513
BUG_ON(!rde->inode.in);
514
if (ceph_snap(inode) ==
515
le64_to_cpu(rde->inode.in->snapid)) {
516
memcpy(name, rde->name, rde->name_len);
517
name[rde->name_len] = '\0';
518
err = 0;
519
goto out;
520
}
521
}
522
523
if (rinfo->dir_end)
524
break;
525
526
BUG_ON(rinfo->dir_nr <= 0);
527
rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
528
next_offset += rinfo->dir_nr;
529
last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
530
if (!last_name) {
531
err = -ENOMEM;
532
goto out;
533
}
534
535
ceph_mdsc_put_request(req);
536
req = NULL;
537
}
538
err = -ENOENT;
539
out:
540
if (req)
541
ceph_mdsc_put_request(req);
542
kfree(last_name);
543
doutc(fsc->client, "child dentry %p %p %llx.%llx err=%d\n", child,
544
inode, ceph_vinop(inode), err);
545
return err;
546
}
547
548
static int ceph_get_name(struct dentry *parent, char *name,
549
struct dentry *child)
550
{
551
struct ceph_mds_client *mdsc;
552
struct ceph_mds_request *req;
553
struct inode *dir = d_inode(parent);
554
struct inode *inode = d_inode(child);
555
struct ceph_mds_reply_info_parsed *rinfo;
556
int err;
557
558
if (ceph_snap(inode) != CEPH_NOSNAP)
559
return __get_snap_name(parent, name, child);
560
561
mdsc = ceph_inode_to_fs_client(inode)->mdsc;
562
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
563
USE_ANY_MDS);
564
if (IS_ERR(req))
565
return PTR_ERR(req);
566
567
inode_lock(dir);
568
req->r_inode = inode;
569
ihold(inode);
570
req->r_ino2 = ceph_vino(d_inode(parent));
571
req->r_parent = dir;
572
ihold(dir);
573
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
574
req->r_num_caps = 2;
575
err = ceph_mdsc_do_request(mdsc, NULL, req);
576
inode_unlock(dir);
577
578
if (err)
579
goto out;
580
581
rinfo = &req->r_reply_info;
582
if (!IS_ENCRYPTED(dir)) {
583
memcpy(name, rinfo->dname, rinfo->dname_len);
584
name[rinfo->dname_len] = 0;
585
} else {
586
struct fscrypt_str oname = FSTR_INIT(NULL, 0);
587
struct ceph_fname fname = { .dir = dir,
588
.name = rinfo->dname,
589
.ctext = rinfo->altname,
590
.name_len = rinfo->dname_len,
591
.ctext_len = rinfo->altname_len };
592
593
err = ceph_fname_alloc_buffer(dir, &oname);
594
if (err < 0)
595
goto out;
596
597
err = ceph_fname_to_usr(&fname, NULL, &oname, NULL);
598
if (!err) {
599
memcpy(name, oname.name, oname.len);
600
name[oname.len] = 0;
601
}
602
ceph_fname_free_buffer(dir, &oname);
603
}
604
out:
605
doutc(mdsc->fsc->client, "child dentry %p %p %llx.%llx err %d %s%s\n",
606
child, inode, ceph_vinop(inode), err, err ? "" : "name ",
607
err ? "" : name);
608
ceph_mdsc_put_request(req);
609
return err;
610
}
611
612
const struct export_operations ceph_export_ops = {
613
.encode_fh = ceph_encode_fh,
614
.fh_to_dentry = ceph_fh_to_dentry,
615
.fh_to_parent = ceph_fh_to_parent,
616
.get_parent = ceph_get_parent,
617
.get_name = ceph_get_name,
618
};
619
620