Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/ceph/super.c
26285 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
3
#include <linux/ceph/ceph_debug.h>
4
5
#include <linux/backing-dev.h>
6
#include <linux/ctype.h>
7
#include <linux/fs.h>
8
#include <linux/inet.h>
9
#include <linux/in6.h>
10
#include <linux/module.h>
11
#include <linux/mount.h>
12
#include <linux/fs_context.h>
13
#include <linux/fs_parser.h>
14
#include <linux/sched.h>
15
#include <linux/seq_file.h>
16
#include <linux/slab.h>
17
#include <linux/statfs.h>
18
#include <linux/string.h>
19
20
#include "super.h"
21
#include "mds_client.h"
22
#include "cache.h"
23
#include "crypto.h"
24
25
#include <linux/ceph/ceph_features.h>
26
#include <linux/ceph/decode.h>
27
#include <linux/ceph/mon_client.h>
28
#include <linux/ceph/auth.h>
29
#include <linux/ceph/debugfs.h>
30
31
#include <uapi/linux/magic.h>
32
33
static DEFINE_SPINLOCK(ceph_fsc_lock);
34
static LIST_HEAD(ceph_fsc_list);
35
36
/*
37
* Ceph superblock operations
38
*
39
* Handle the basics of mounting, unmounting.
40
*/
41
42
/*
43
* super ops
44
*/
45
static void ceph_put_super(struct super_block *s)
46
{
47
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s);
48
49
doutc(fsc->client, "begin\n");
50
ceph_fscrypt_free_dummy_policy(fsc);
51
ceph_mdsc_close_sessions(fsc->mdsc);
52
doutc(fsc->client, "done\n");
53
}
54
55
static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
56
{
57
struct ceph_fs_client *fsc = ceph_inode_to_fs_client(d_inode(dentry));
58
struct ceph_mon_client *monc = &fsc->client->monc;
59
struct ceph_statfs st;
60
int i, err;
61
u64 data_pool;
62
63
doutc(fsc->client, "begin\n");
64
if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
65
data_pool = fsc->mdsc->mdsmap->m_data_pg_pools[0];
66
} else {
67
data_pool = CEPH_NOPOOL;
68
}
69
70
err = ceph_monc_do_statfs(monc, data_pool, &st);
71
if (err < 0)
72
return err;
73
74
/* fill in kstatfs */
75
buf->f_type = CEPH_SUPER_MAGIC; /* ?? */
76
77
/*
78
* Express utilization in terms of large blocks to avoid
79
* overflow on 32-bit machines.
80
*/
81
buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
82
83
/*
84
* By default use root quota for stats; fallback to overall filesystem
85
* usage if using 'noquotadf' mount option or if the root dir doesn't
86
* have max_bytes quota set.
87
*/
88
if (ceph_test_mount_opt(fsc, NOQUOTADF) ||
89
!ceph_quota_update_statfs(fsc, buf)) {
90
buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
91
buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
92
buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
93
}
94
95
/*
96
* NOTE: for the time being, we make bsize == frsize to humor
97
* not-yet-ancient versions of glibc that are broken.
98
* Someday, we will probably want to report a real block
99
* size... whatever that may mean for a network file system!
100
*/
101
buf->f_bsize = buf->f_frsize;
102
103
buf->f_files = le64_to_cpu(st.num_objects);
104
buf->f_ffree = -1;
105
buf->f_namelen = NAME_MAX;
106
107
/* Must convert the fsid, for consistent values across arches */
108
buf->f_fsid.val[0] = 0;
109
mutex_lock(&monc->mutex);
110
for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i)
111
buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]);
112
mutex_unlock(&monc->mutex);
113
114
/* fold the fs_cluster_id into the upper bits */
115
buf->f_fsid.val[1] = monc->fs_cluster_id;
116
117
doutc(fsc->client, "done\n");
118
return 0;
119
}
120
121
static int ceph_sync_fs(struct super_block *sb, int wait)
122
{
123
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
124
struct ceph_client *cl = fsc->client;
125
126
if (!wait) {
127
doutc(cl, "(non-blocking)\n");
128
ceph_flush_dirty_caps(fsc->mdsc);
129
ceph_flush_cap_releases(fsc->mdsc);
130
doutc(cl, "(non-blocking) done\n");
131
return 0;
132
}
133
134
doutc(cl, "(blocking)\n");
135
ceph_osdc_sync(&fsc->client->osdc);
136
ceph_mdsc_sync(fsc->mdsc);
137
doutc(cl, "(blocking) done\n");
138
return 0;
139
}
140
141
/*
142
* mount options
143
*/
144
enum {
145
Opt_wsize,
146
Opt_rsize,
147
Opt_rasize,
148
Opt_caps_wanted_delay_min,
149
Opt_caps_wanted_delay_max,
150
Opt_caps_max,
151
Opt_readdir_max_entries,
152
Opt_readdir_max_bytes,
153
Opt_congestion_kb,
154
/* int args above */
155
Opt_snapdirname,
156
Opt_mds_namespace,
157
Opt_recover_session,
158
Opt_source,
159
Opt_mon_addr,
160
Opt_test_dummy_encryption,
161
/* string args above */
162
Opt_dirstat,
163
Opt_rbytes,
164
Opt_asyncreaddir,
165
Opt_dcache,
166
Opt_ino32,
167
Opt_fscache,
168
Opt_poolperm,
169
Opt_require_active_mds,
170
Opt_acl,
171
Opt_quotadf,
172
Opt_copyfrom,
173
Opt_wsync,
174
Opt_pagecache,
175
Opt_sparseread,
176
};
177
178
enum ceph_recover_session_mode {
179
ceph_recover_session_no,
180
ceph_recover_session_clean
181
};
182
183
static const struct constant_table ceph_param_recover[] = {
184
{ "no", ceph_recover_session_no },
185
{ "clean", ceph_recover_session_clean },
186
{}
187
};
188
189
static const struct fs_parameter_spec ceph_mount_parameters[] = {
190
fsparam_flag_no ("acl", Opt_acl),
191
fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir),
192
fsparam_s32 ("caps_max", Opt_caps_max),
193
fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max),
194
fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min),
195
fsparam_u32 ("write_congestion_kb", Opt_congestion_kb),
196
fsparam_flag_no ("copyfrom", Opt_copyfrom),
197
fsparam_flag_no ("dcache", Opt_dcache),
198
fsparam_flag_no ("dirstat", Opt_dirstat),
199
fsparam_flag_no ("fsc", Opt_fscache), // fsc|nofsc
200
fsparam_string ("fsc", Opt_fscache), // fsc=...
201
fsparam_flag_no ("ino32", Opt_ino32),
202
fsparam_string ("mds_namespace", Opt_mds_namespace),
203
fsparam_string ("mon_addr", Opt_mon_addr),
204
fsparam_flag_no ("poolperm", Opt_poolperm),
205
fsparam_flag_no ("quotadf", Opt_quotadf),
206
fsparam_u32 ("rasize", Opt_rasize),
207
fsparam_flag_no ("rbytes", Opt_rbytes),
208
fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes),
209
fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries),
210
fsparam_enum ("recover_session", Opt_recover_session, ceph_param_recover),
211
fsparam_flag_no ("require_active_mds", Opt_require_active_mds),
212
fsparam_u32 ("rsize", Opt_rsize),
213
fsparam_string ("snapdirname", Opt_snapdirname),
214
fsparam_string ("source", Opt_source),
215
fsparam_flag ("test_dummy_encryption", Opt_test_dummy_encryption),
216
fsparam_string ("test_dummy_encryption", Opt_test_dummy_encryption),
217
fsparam_u32 ("wsize", Opt_wsize),
218
fsparam_flag_no ("wsync", Opt_wsync),
219
fsparam_flag_no ("pagecache", Opt_pagecache),
220
fsparam_flag_no ("sparseread", Opt_sparseread),
221
{}
222
};
223
224
struct ceph_parse_opts_ctx {
225
struct ceph_options *copts;
226
struct ceph_mount_options *opts;
227
};
228
229
/*
230
* Remove adjacent slashes and then the trailing slash, unless it is
231
* the only remaining character.
232
*
233
* E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
234
*/
235
static void canonicalize_path(char *path)
236
{
237
int i, j = 0;
238
239
for (i = 0; path[i] != '\0'; i++) {
240
if (path[i] != '/' || j < 1 || path[j - 1] != '/')
241
path[j++] = path[i];
242
}
243
244
if (j > 1 && path[j - 1] == '/')
245
j--;
246
path[j] = '\0';
247
}
248
249
/*
250
* Check if the mds namespace in ceph_mount_options matches
251
* the passed in namespace string. First time match (when
252
* ->mds_namespace is NULL) is treated specially, since
253
* ->mds_namespace needs to be initialized by the caller.
254
*/
255
static int namespace_equals(struct ceph_mount_options *fsopt,
256
const char *namespace, size_t len)
257
{
258
return !(fsopt->mds_namespace &&
259
(strlen(fsopt->mds_namespace) != len ||
260
strncmp(fsopt->mds_namespace, namespace, len)));
261
}
262
263
static int ceph_parse_old_source(const char *dev_name, const char *dev_name_end,
264
struct fs_context *fc)
265
{
266
int r;
267
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
268
struct ceph_mount_options *fsopt = pctx->opts;
269
270
if (*dev_name_end != ':')
271
return invalfc(fc, "separator ':' missing in source");
272
273
r = ceph_parse_mon_ips(dev_name, dev_name_end - dev_name,
274
pctx->copts, fc->log.log, ',');
275
if (r)
276
return r;
277
278
fsopt->new_dev_syntax = false;
279
return 0;
280
}
281
282
static int ceph_parse_new_source(const char *dev_name, const char *dev_name_end,
283
struct fs_context *fc)
284
{
285
size_t len;
286
struct ceph_fsid fsid;
287
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
288
struct ceph_options *opts = pctx->copts;
289
struct ceph_mount_options *fsopt = pctx->opts;
290
const char *name_start = dev_name;
291
const char *fsid_start, *fs_name_start;
292
293
if (*dev_name_end != '=') {
294
dout("separator '=' missing in source");
295
return -EINVAL;
296
}
297
298
fsid_start = strchr(dev_name, '@');
299
if (!fsid_start)
300
return invalfc(fc, "missing cluster fsid");
301
len = fsid_start - name_start;
302
kfree(opts->name);
303
opts->name = kstrndup(name_start, len, GFP_KERNEL);
304
if (!opts->name)
305
return -ENOMEM;
306
dout("using %s entity name", opts->name);
307
308
++fsid_start; /* start of cluster fsid */
309
fs_name_start = strchr(fsid_start, '.');
310
if (!fs_name_start)
311
return invalfc(fc, "missing file system name");
312
313
if (ceph_parse_fsid(fsid_start, &fsid))
314
return invalfc(fc, "Invalid FSID");
315
316
++fs_name_start; /* start of file system name */
317
len = dev_name_end - fs_name_start;
318
319
if (!namespace_equals(fsopt, fs_name_start, len))
320
return invalfc(fc, "Mismatching mds_namespace");
321
kfree(fsopt->mds_namespace);
322
fsopt->mds_namespace = kstrndup(fs_name_start, len, GFP_KERNEL);
323
if (!fsopt->mds_namespace)
324
return -ENOMEM;
325
dout("file system (mds namespace) '%s'\n", fsopt->mds_namespace);
326
327
fsopt->new_dev_syntax = true;
328
return 0;
329
}
330
331
/*
332
* Parse the source parameter for new device format. Distinguish the device
333
* spec from the path. Try parsing new device format and fallback to old
334
* format if needed.
335
*
336
* New device syntax will looks like:
337
* <device_spec>=/<path>
338
* where
339
* <device_spec> is [email protected]
340
* <path> is optional, but if present must begin with '/'
341
* (monitor addresses are passed via mount option)
342
*
343
* Old device syntax is:
344
* <server_spec>[,<server_spec>...]:[<path>]
345
* where
346
* <server_spec> is <ip>[:<port>]
347
* <path> is optional, but if present must begin with '/'
348
*/
349
static int ceph_parse_source(struct fs_parameter *param, struct fs_context *fc)
350
{
351
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
352
struct ceph_mount_options *fsopt = pctx->opts;
353
char *dev_name = param->string, *dev_name_end;
354
int ret;
355
356
dout("'%s'\n", dev_name);
357
if (!dev_name || !*dev_name)
358
return invalfc(fc, "Empty source");
359
360
dev_name_end = strchr(dev_name, '/');
361
if (dev_name_end) {
362
/*
363
* The server_path will include the whole chars from userland
364
* including the leading '/'.
365
*/
366
kfree(fsopt->server_path);
367
fsopt->server_path = kstrdup(dev_name_end, GFP_KERNEL);
368
if (!fsopt->server_path)
369
return -ENOMEM;
370
371
canonicalize_path(fsopt->server_path);
372
} else {
373
dev_name_end = dev_name + strlen(dev_name);
374
}
375
376
dev_name_end--; /* back up to separator */
377
if (dev_name_end < dev_name)
378
return invalfc(fc, "Path missing in source");
379
380
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
381
if (fsopt->server_path)
382
dout("server path '%s'\n", fsopt->server_path);
383
384
dout("trying new device syntax");
385
ret = ceph_parse_new_source(dev_name, dev_name_end, fc);
386
if (ret) {
387
if (ret != -EINVAL)
388
return ret;
389
dout("trying old device syntax");
390
ret = ceph_parse_old_source(dev_name, dev_name_end, fc);
391
if (ret)
392
return ret;
393
}
394
395
fc->source = param->string;
396
param->string = NULL;
397
return 0;
398
}
399
400
static int ceph_parse_mon_addr(struct fs_parameter *param,
401
struct fs_context *fc)
402
{
403
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
404
struct ceph_mount_options *fsopt = pctx->opts;
405
406
kfree(fsopt->mon_addr);
407
fsopt->mon_addr = param->string;
408
param->string = NULL;
409
410
return ceph_parse_mon_ips(fsopt->mon_addr, strlen(fsopt->mon_addr),
411
pctx->copts, fc->log.log, '/');
412
}
413
414
static int ceph_parse_mount_param(struct fs_context *fc,
415
struct fs_parameter *param)
416
{
417
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
418
struct ceph_mount_options *fsopt = pctx->opts;
419
struct fs_parse_result result;
420
unsigned int mode;
421
int token, ret;
422
423
ret = ceph_parse_param(param, pctx->copts, fc->log.log);
424
if (ret != -ENOPARAM)
425
return ret;
426
427
token = fs_parse(fc, ceph_mount_parameters, param, &result);
428
dout("%s: fs_parse '%s' token %d\n",__func__, param->key, token);
429
if (token < 0)
430
return token;
431
432
switch (token) {
433
case Opt_snapdirname:
434
if (strlen(param->string) > NAME_MAX)
435
return invalfc(fc, "snapdirname too long");
436
kfree(fsopt->snapdir_name);
437
fsopt->snapdir_name = param->string;
438
param->string = NULL;
439
break;
440
case Opt_mds_namespace:
441
if (!namespace_equals(fsopt, param->string, strlen(param->string)))
442
return invalfc(fc, "Mismatching mds_namespace");
443
kfree(fsopt->mds_namespace);
444
fsopt->mds_namespace = param->string;
445
param->string = NULL;
446
break;
447
case Opt_recover_session:
448
mode = result.uint_32;
449
if (mode == ceph_recover_session_no)
450
fsopt->flags &= ~CEPH_MOUNT_OPT_CLEANRECOVER;
451
else if (mode == ceph_recover_session_clean)
452
fsopt->flags |= CEPH_MOUNT_OPT_CLEANRECOVER;
453
else
454
BUG();
455
break;
456
case Opt_source:
457
if (fc->source)
458
return invalfc(fc, "Multiple sources specified");
459
return ceph_parse_source(param, fc);
460
case Opt_mon_addr:
461
return ceph_parse_mon_addr(param, fc);
462
case Opt_wsize:
463
if (result.uint_32 < PAGE_SIZE ||
464
result.uint_32 > CEPH_MAX_WRITE_SIZE)
465
goto out_of_range;
466
fsopt->wsize = ALIGN(result.uint_32, PAGE_SIZE);
467
break;
468
case Opt_rsize:
469
if (result.uint_32 < PAGE_SIZE ||
470
result.uint_32 > CEPH_MAX_READ_SIZE)
471
goto out_of_range;
472
fsopt->rsize = ALIGN(result.uint_32, PAGE_SIZE);
473
break;
474
case Opt_rasize:
475
fsopt->rasize = ALIGN(result.uint_32, PAGE_SIZE);
476
break;
477
case Opt_caps_wanted_delay_min:
478
if (result.uint_32 < 1)
479
goto out_of_range;
480
fsopt->caps_wanted_delay_min = result.uint_32;
481
break;
482
case Opt_caps_wanted_delay_max:
483
if (result.uint_32 < 1)
484
goto out_of_range;
485
fsopt->caps_wanted_delay_max = result.uint_32;
486
break;
487
case Opt_caps_max:
488
if (result.int_32 < 0)
489
goto out_of_range;
490
fsopt->caps_max = result.int_32;
491
break;
492
case Opt_readdir_max_entries:
493
if (result.uint_32 < 1)
494
goto out_of_range;
495
fsopt->max_readdir = result.uint_32;
496
break;
497
case Opt_readdir_max_bytes:
498
if (result.uint_32 < PAGE_SIZE && result.uint_32 != 0)
499
goto out_of_range;
500
fsopt->max_readdir_bytes = result.uint_32;
501
break;
502
case Opt_congestion_kb:
503
if (result.uint_32 < 1024) /* at least 1M */
504
goto out_of_range;
505
fsopt->congestion_kb = result.uint_32;
506
break;
507
case Opt_dirstat:
508
if (!result.negated)
509
fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
510
else
511
fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
512
break;
513
case Opt_rbytes:
514
if (!result.negated)
515
fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
516
else
517
fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
518
break;
519
case Opt_asyncreaddir:
520
if (!result.negated)
521
fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
522
else
523
fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
524
break;
525
case Opt_dcache:
526
if (!result.negated)
527
fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
528
else
529
fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
530
break;
531
case Opt_ino32:
532
if (!result.negated)
533
fsopt->flags |= CEPH_MOUNT_OPT_INO32;
534
else
535
fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
536
break;
537
538
case Opt_fscache:
539
#ifdef CONFIG_CEPH_FSCACHE
540
kfree(fsopt->fscache_uniq);
541
fsopt->fscache_uniq = NULL;
542
if (result.negated) {
543
fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
544
} else {
545
fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
546
fsopt->fscache_uniq = param->string;
547
param->string = NULL;
548
}
549
break;
550
#else
551
return invalfc(fc, "fscache support is disabled");
552
#endif
553
case Opt_poolperm:
554
if (!result.negated)
555
fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
556
else
557
fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
558
break;
559
case Opt_require_active_mds:
560
if (!result.negated)
561
fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
562
else
563
fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
564
break;
565
case Opt_quotadf:
566
if (!result.negated)
567
fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF;
568
else
569
fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF;
570
break;
571
case Opt_copyfrom:
572
if (!result.negated)
573
fsopt->flags &= ~CEPH_MOUNT_OPT_NOCOPYFROM;
574
else
575
fsopt->flags |= CEPH_MOUNT_OPT_NOCOPYFROM;
576
break;
577
case Opt_acl:
578
if (!result.negated) {
579
#ifdef CONFIG_CEPH_FS_POSIX_ACL
580
fc->sb_flags |= SB_POSIXACL;
581
#else
582
return invalfc(fc, "POSIX ACL support is disabled");
583
#endif
584
} else {
585
fc->sb_flags &= ~SB_POSIXACL;
586
}
587
break;
588
case Opt_wsync:
589
if (!result.negated)
590
fsopt->flags &= ~CEPH_MOUNT_OPT_ASYNC_DIROPS;
591
else
592
fsopt->flags |= CEPH_MOUNT_OPT_ASYNC_DIROPS;
593
break;
594
case Opt_pagecache:
595
if (result.negated)
596
fsopt->flags |= CEPH_MOUNT_OPT_NOPAGECACHE;
597
else
598
fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
599
break;
600
case Opt_sparseread:
601
if (result.negated)
602
fsopt->flags &= ~CEPH_MOUNT_OPT_SPARSEREAD;
603
else
604
fsopt->flags |= CEPH_MOUNT_OPT_SPARSEREAD;
605
break;
606
case Opt_test_dummy_encryption:
607
#ifdef CONFIG_FS_ENCRYPTION
608
fscrypt_free_dummy_policy(&fsopt->dummy_enc_policy);
609
ret = fscrypt_parse_test_dummy_encryption(param,
610
&fsopt->dummy_enc_policy);
611
if (ret == -EINVAL) {
612
warnfc(fc, "Value of option \"%s\" is unrecognized",
613
param->key);
614
} else if (ret == -EEXIST) {
615
warnfc(fc, "Conflicting test_dummy_encryption options");
616
ret = -EINVAL;
617
}
618
#else
619
warnfc(fc,
620
"FS encryption not supported: test_dummy_encryption mount option ignored");
621
#endif
622
break;
623
default:
624
BUG();
625
}
626
return 0;
627
628
out_of_range:
629
return invalfc(fc, "%s out of range", param->key);
630
}
631
632
static void destroy_mount_options(struct ceph_mount_options *args)
633
{
634
dout("destroy_mount_options %p\n", args);
635
if (!args)
636
return;
637
638
kfree(args->snapdir_name);
639
kfree(args->mds_namespace);
640
kfree(args->server_path);
641
kfree(args->fscache_uniq);
642
kfree(args->mon_addr);
643
fscrypt_free_dummy_policy(&args->dummy_enc_policy);
644
kfree(args);
645
}
646
647
static int strcmp_null(const char *s1, const char *s2)
648
{
649
if (!s1 && !s2)
650
return 0;
651
if (s1 && !s2)
652
return -1;
653
if (!s1 && s2)
654
return 1;
655
return strcmp(s1, s2);
656
}
657
658
static int compare_mount_options(struct ceph_mount_options *new_fsopt,
659
struct ceph_options *new_opt,
660
struct ceph_fs_client *fsc)
661
{
662
struct ceph_mount_options *fsopt1 = new_fsopt;
663
struct ceph_mount_options *fsopt2 = fsc->mount_options;
664
int ofs = offsetof(struct ceph_mount_options, snapdir_name);
665
int ret;
666
667
ret = memcmp(fsopt1, fsopt2, ofs);
668
if (ret)
669
return ret;
670
671
ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
672
if (ret)
673
return ret;
674
675
ret = strcmp_null(fsopt1->mds_namespace, fsopt2->mds_namespace);
676
if (ret)
677
return ret;
678
679
ret = strcmp_null(fsopt1->server_path, fsopt2->server_path);
680
if (ret)
681
return ret;
682
683
ret = strcmp_null(fsopt1->fscache_uniq, fsopt2->fscache_uniq);
684
if (ret)
685
return ret;
686
687
ret = strcmp_null(fsopt1->mon_addr, fsopt2->mon_addr);
688
if (ret)
689
return ret;
690
691
return ceph_compare_options(new_opt, fsc->client);
692
}
693
694
/**
695
* ceph_show_options - Show mount options in /proc/mounts
696
* @m: seq_file to write to
697
* @root: root of that (sub)tree
698
*/
699
static int ceph_show_options(struct seq_file *m, struct dentry *root)
700
{
701
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(root->d_sb);
702
struct ceph_mount_options *fsopt = fsc->mount_options;
703
size_t pos;
704
int ret;
705
706
/* a comma between MNT/MS and client options */
707
seq_putc(m, ',');
708
pos = m->count;
709
710
ret = ceph_print_client_options(m, fsc->client, false);
711
if (ret)
712
return ret;
713
714
/* retract our comma if no client options */
715
if (m->count == pos)
716
m->count--;
717
718
if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
719
seq_puts(m, ",dirstat");
720
if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
721
seq_puts(m, ",rbytes");
722
if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
723
seq_puts(m, ",noasyncreaddir");
724
if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
725
seq_puts(m, ",nodcache");
726
if (fsopt->flags & CEPH_MOUNT_OPT_INO32)
727
seq_puts(m, ",ino32");
728
if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
729
seq_show_option(m, "fsc", fsopt->fscache_uniq);
730
}
731
if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
732
seq_puts(m, ",nopoolperm");
733
if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF)
734
seq_puts(m, ",noquotadf");
735
736
#ifdef CONFIG_CEPH_FS_POSIX_ACL
737
if (root->d_sb->s_flags & SB_POSIXACL)
738
seq_puts(m, ",acl");
739
else
740
seq_puts(m, ",noacl");
741
#endif
742
743
if ((fsopt->flags & CEPH_MOUNT_OPT_NOCOPYFROM) == 0)
744
seq_puts(m, ",copyfrom");
745
746
/* dump mds_namespace when old device syntax is in use */
747
if (fsopt->mds_namespace && !fsopt->new_dev_syntax)
748
seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
749
750
if (fsopt->mon_addr)
751
seq_printf(m, ",mon_addr=%s", fsopt->mon_addr);
752
753
if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
754
seq_show_option(m, "recover_session", "clean");
755
756
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
757
seq_puts(m, ",wsync");
758
if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
759
seq_puts(m, ",nopagecache");
760
if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
761
seq_puts(m, ",sparseread");
762
763
fscrypt_show_test_dummy_encryption(m, ',', root->d_sb);
764
765
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
766
seq_printf(m, ",wsize=%u", fsopt->wsize);
767
if (fsopt->rsize != CEPH_MAX_READ_SIZE)
768
seq_printf(m, ",rsize=%u", fsopt->rsize);
769
if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
770
seq_printf(m, ",rasize=%u", fsopt->rasize);
771
if (fsopt->congestion_kb != default_congestion_kb())
772
seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb);
773
if (fsopt->caps_max)
774
seq_printf(m, ",caps_max=%d", fsopt->caps_max);
775
if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
776
seq_printf(m, ",caps_wanted_delay_min=%u",
777
fsopt->caps_wanted_delay_min);
778
if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
779
seq_printf(m, ",caps_wanted_delay_max=%u",
780
fsopt->caps_wanted_delay_max);
781
if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
782
seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir);
783
if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
784
seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes);
785
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
786
seq_show_option(m, "snapdirname", fsopt->snapdir_name);
787
788
return 0;
789
}
790
791
/*
792
* handle any mon messages the standard library doesn't understand.
793
* return error if we don't either.
794
*/
795
static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
796
{
797
struct ceph_fs_client *fsc = client->private;
798
int type = le16_to_cpu(msg->hdr.type);
799
800
switch (type) {
801
case CEPH_MSG_MDS_MAP:
802
ceph_mdsc_handle_mdsmap(fsc->mdsc, msg);
803
return 0;
804
case CEPH_MSG_FS_MAP_USER:
805
ceph_mdsc_handle_fsmap(fsc->mdsc, msg);
806
return 0;
807
default:
808
return -1;
809
}
810
}
811
812
/*
813
* create a new fs client
814
*
815
* Success or not, this function consumes @fsopt and @opt.
816
*/
817
static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
818
struct ceph_options *opt)
819
{
820
struct ceph_fs_client *fsc;
821
int err;
822
823
fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
824
if (!fsc) {
825
err = -ENOMEM;
826
goto fail;
827
}
828
829
fsc->client = ceph_create_client(opt, fsc);
830
if (IS_ERR(fsc->client)) {
831
err = PTR_ERR(fsc->client);
832
goto fail;
833
}
834
opt = NULL; /* fsc->client now owns this */
835
836
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
837
ceph_set_opt(fsc->client, ABORT_ON_FULL);
838
839
if (!fsopt->mds_namespace) {
840
ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
841
0, true);
842
} else {
843
ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_FSMAP,
844
0, false);
845
}
846
847
fsc->mount_options = fsopt;
848
849
fsc->sb = NULL;
850
fsc->mount_state = CEPH_MOUNT_MOUNTING;
851
fsc->filp_gen = 1;
852
fsc->have_copy_from2 = true;
853
854
atomic_long_set(&fsc->writeback_count, 0);
855
fsc->write_congested = false;
856
857
err = -ENOMEM;
858
/*
859
* The number of concurrent works can be high but they don't need
860
* to be processed in parallel, limit concurrency.
861
*/
862
fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0);
863
if (!fsc->inode_wq)
864
goto fail_client;
865
fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1);
866
if (!fsc->cap_wq)
867
goto fail_inode_wq;
868
869
hash_init(fsc->async_unlink_conflict);
870
spin_lock_init(&fsc->async_unlink_conflict_lock);
871
872
spin_lock(&ceph_fsc_lock);
873
list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
874
spin_unlock(&ceph_fsc_lock);
875
876
return fsc;
877
878
fail_inode_wq:
879
destroy_workqueue(fsc->inode_wq);
880
fail_client:
881
ceph_destroy_client(fsc->client);
882
fail:
883
kfree(fsc);
884
if (opt)
885
ceph_destroy_options(opt);
886
destroy_mount_options(fsopt);
887
return ERR_PTR(err);
888
}
889
890
static void flush_fs_workqueues(struct ceph_fs_client *fsc)
891
{
892
flush_workqueue(fsc->inode_wq);
893
flush_workqueue(fsc->cap_wq);
894
}
895
896
static void destroy_fs_client(struct ceph_fs_client *fsc)
897
{
898
doutc(fsc->client, "%p\n", fsc);
899
900
spin_lock(&ceph_fsc_lock);
901
list_del(&fsc->metric_wakeup);
902
spin_unlock(&ceph_fsc_lock);
903
904
ceph_mdsc_destroy(fsc);
905
destroy_workqueue(fsc->inode_wq);
906
destroy_workqueue(fsc->cap_wq);
907
908
destroy_mount_options(fsc->mount_options);
909
910
ceph_destroy_client(fsc->client);
911
912
kfree(fsc);
913
dout("%s: %p done\n", __func__, fsc);
914
}
915
916
/*
917
* caches
918
*/
919
struct kmem_cache *ceph_inode_cachep;
920
struct kmem_cache *ceph_cap_cachep;
921
struct kmem_cache *ceph_cap_snap_cachep;
922
struct kmem_cache *ceph_cap_flush_cachep;
923
struct kmem_cache *ceph_dentry_cachep;
924
struct kmem_cache *ceph_file_cachep;
925
struct kmem_cache *ceph_dir_file_cachep;
926
struct kmem_cache *ceph_mds_request_cachep;
927
mempool_t *ceph_wb_pagevec_pool;
928
929
static void ceph_inode_init_once(void *foo)
930
{
931
struct ceph_inode_info *ci = foo;
932
inode_init_once(&ci->netfs.inode);
933
}
934
935
static int __init init_caches(void)
936
{
937
int error = -ENOMEM;
938
939
ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
940
sizeof(struct ceph_inode_info),
941
__alignof__(struct ceph_inode_info),
942
SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
943
ceph_inode_init_once);
944
if (!ceph_inode_cachep)
945
return -ENOMEM;
946
947
ceph_cap_cachep = KMEM_CACHE(ceph_cap, 0);
948
if (!ceph_cap_cachep)
949
goto bad_cap;
950
ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, 0);
951
if (!ceph_cap_snap_cachep)
952
goto bad_cap_snap;
953
ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
954
SLAB_RECLAIM_ACCOUNT);
955
if (!ceph_cap_flush_cachep)
956
goto bad_cap_flush;
957
958
ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
959
SLAB_RECLAIM_ACCOUNT);
960
if (!ceph_dentry_cachep)
961
goto bad_dentry;
962
963
ceph_file_cachep = KMEM_CACHE(ceph_file_info, 0);
964
if (!ceph_file_cachep)
965
goto bad_file;
966
967
ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, 0);
968
if (!ceph_dir_file_cachep)
969
goto bad_dir_file;
970
971
ceph_mds_request_cachep = KMEM_CACHE(ceph_mds_request, 0);
972
if (!ceph_mds_request_cachep)
973
goto bad_mds_req;
974
975
ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10,
976
(CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *));
977
if (!ceph_wb_pagevec_pool)
978
goto bad_pagevec_pool;
979
980
return 0;
981
982
bad_pagevec_pool:
983
kmem_cache_destroy(ceph_mds_request_cachep);
984
bad_mds_req:
985
kmem_cache_destroy(ceph_dir_file_cachep);
986
bad_dir_file:
987
kmem_cache_destroy(ceph_file_cachep);
988
bad_file:
989
kmem_cache_destroy(ceph_dentry_cachep);
990
bad_dentry:
991
kmem_cache_destroy(ceph_cap_flush_cachep);
992
bad_cap_flush:
993
kmem_cache_destroy(ceph_cap_snap_cachep);
994
bad_cap_snap:
995
kmem_cache_destroy(ceph_cap_cachep);
996
bad_cap:
997
kmem_cache_destroy(ceph_inode_cachep);
998
return error;
999
}
1000
1001
static void destroy_caches(void)
1002
{
1003
/*
1004
* Make sure all delayed rcu free inodes are flushed before we
1005
* destroy cache.
1006
*/
1007
rcu_barrier();
1008
1009
kmem_cache_destroy(ceph_inode_cachep);
1010
kmem_cache_destroy(ceph_cap_cachep);
1011
kmem_cache_destroy(ceph_cap_snap_cachep);
1012
kmem_cache_destroy(ceph_cap_flush_cachep);
1013
kmem_cache_destroy(ceph_dentry_cachep);
1014
kmem_cache_destroy(ceph_file_cachep);
1015
kmem_cache_destroy(ceph_dir_file_cachep);
1016
kmem_cache_destroy(ceph_mds_request_cachep);
1017
mempool_destroy(ceph_wb_pagevec_pool);
1018
}
1019
1020
static void __ceph_umount_begin(struct ceph_fs_client *fsc)
1021
{
1022
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
1023
ceph_mdsc_force_umount(fsc->mdsc);
1024
fsc->filp_gen++; // invalidate open files
1025
}
1026
1027
/*
1028
* ceph_umount_begin - initiate forced umount. Tear down the
1029
* mount, skipping steps that may hang while waiting for server(s).
1030
*/
1031
void ceph_umount_begin(struct super_block *sb)
1032
{
1033
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
1034
1035
doutc(fsc->client, "starting forced umount\n");
1036
1037
fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
1038
__ceph_umount_begin(fsc);
1039
}
1040
1041
static const struct super_operations ceph_super_ops = {
1042
.alloc_inode = ceph_alloc_inode,
1043
.free_inode = ceph_free_inode,
1044
.write_inode = ceph_write_inode,
1045
.drop_inode = generic_delete_inode,
1046
.evict_inode = ceph_evict_inode,
1047
.sync_fs = ceph_sync_fs,
1048
.put_super = ceph_put_super,
1049
.show_options = ceph_show_options,
1050
.statfs = ceph_statfs,
1051
.umount_begin = ceph_umount_begin,
1052
};
1053
1054
/*
1055
* Bootstrap mount by opening the root directory. Note the mount
1056
* @started time from caller, and time out if this takes too long.
1057
*/
1058
static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
1059
const char *path,
1060
unsigned long started)
1061
{
1062
struct ceph_client *cl = fsc->client;
1063
struct ceph_mds_client *mdsc = fsc->mdsc;
1064
struct ceph_mds_request *req = NULL;
1065
int err;
1066
struct dentry *root;
1067
1068
/* open dir */
1069
doutc(cl, "opening '%s'\n", path);
1070
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
1071
if (IS_ERR(req))
1072
return ERR_CAST(req);
1073
req->r_path1 = kstrdup(path, GFP_NOFS);
1074
if (!req->r_path1) {
1075
root = ERR_PTR(-ENOMEM);
1076
goto out;
1077
}
1078
1079
req->r_ino1.ino = CEPH_INO_ROOT;
1080
req->r_ino1.snap = CEPH_NOSNAP;
1081
req->r_started = started;
1082
req->r_timeout = fsc->client->options->mount_timeout;
1083
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
1084
req->r_num_caps = 2;
1085
err = ceph_mdsc_do_request(mdsc, NULL, req);
1086
if (err == 0) {
1087
struct inode *inode = req->r_target_inode;
1088
req->r_target_inode = NULL;
1089
doutc(cl, "success\n");
1090
root = d_make_root(inode);
1091
if (!root) {
1092
root = ERR_PTR(-ENOMEM);
1093
goto out;
1094
}
1095
doutc(cl, "success, root dentry is %p\n", root);
1096
} else {
1097
root = ERR_PTR(err);
1098
}
1099
out:
1100
ceph_mdsc_put_request(req);
1101
return root;
1102
}
1103
1104
#ifdef CONFIG_FS_ENCRYPTION
1105
static int ceph_apply_test_dummy_encryption(struct super_block *sb,
1106
struct fs_context *fc,
1107
struct ceph_mount_options *fsopt)
1108
{
1109
struct ceph_fs_client *fsc = sb->s_fs_info;
1110
1111
if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy))
1112
return 0;
1113
1114
/* No changing encryption context on remount. */
1115
if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
1116
!fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
1117
if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
1118
&fsc->fsc_dummy_enc_policy))
1119
return 0;
1120
errorfc(fc, "Can't set test_dummy_encryption on remount");
1121
return -EINVAL;
1122
}
1123
1124
/* Also make sure fsopt doesn't contain a conflicting value. */
1125
if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
1126
if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
1127
&fsc->fsc_dummy_enc_policy))
1128
return 0;
1129
errorfc(fc, "Conflicting test_dummy_encryption options");
1130
return -EINVAL;
1131
}
1132
1133
fsc->fsc_dummy_enc_policy = fsopt->dummy_enc_policy;
1134
memset(&fsopt->dummy_enc_policy, 0, sizeof(fsopt->dummy_enc_policy));
1135
1136
warnfc(fc, "test_dummy_encryption mode enabled");
1137
return 0;
1138
}
1139
#else
1140
static int ceph_apply_test_dummy_encryption(struct super_block *sb,
1141
struct fs_context *fc,
1142
struct ceph_mount_options *fsopt)
1143
{
1144
return 0;
1145
}
1146
#endif
1147
1148
/*
1149
* mount: join the ceph cluster, and open root directory.
1150
*/
1151
static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
1152
struct fs_context *fc)
1153
{
1154
struct ceph_client *cl = fsc->client;
1155
int err;
1156
unsigned long started = jiffies; /* note the start time */
1157
struct dentry *root;
1158
1159
doutc(cl, "mount start %p\n", fsc);
1160
mutex_lock(&fsc->client->mount_mutex);
1161
1162
if (!fsc->sb->s_root) {
1163
const char *path = fsc->mount_options->server_path ?
1164
fsc->mount_options->server_path + 1 : "";
1165
1166
err = __ceph_open_session(fsc->client, started);
1167
if (err < 0)
1168
goto out;
1169
1170
/* setup fscache */
1171
if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
1172
err = ceph_fscache_register_fs(fsc, fc);
1173
if (err < 0)
1174
goto out;
1175
}
1176
1177
err = ceph_apply_test_dummy_encryption(fsc->sb, fc,
1178
fsc->mount_options);
1179
if (err)
1180
goto out;
1181
1182
doutc(cl, "mount opening path '%s'\n", path);
1183
1184
ceph_fs_debugfs_init(fsc);
1185
1186
root = open_root_dentry(fsc, path, started);
1187
if (IS_ERR(root)) {
1188
err = PTR_ERR(root);
1189
goto out;
1190
}
1191
fsc->sb->s_root = dget(root);
1192
} else {
1193
root = dget(fsc->sb->s_root);
1194
}
1195
1196
fsc->mount_state = CEPH_MOUNT_MOUNTED;
1197
doutc(cl, "mount success\n");
1198
mutex_unlock(&fsc->client->mount_mutex);
1199
return root;
1200
1201
out:
1202
mutex_unlock(&fsc->client->mount_mutex);
1203
ceph_fscrypt_free_dummy_policy(fsc);
1204
return ERR_PTR(err);
1205
}
1206
1207
static int ceph_set_super(struct super_block *s, struct fs_context *fc)
1208
{
1209
struct ceph_fs_client *fsc = s->s_fs_info;
1210
struct ceph_client *cl = fsc->client;
1211
int ret;
1212
1213
doutc(cl, "%p\n", s);
1214
1215
s->s_maxbytes = MAX_LFS_FILESIZE;
1216
1217
s->s_xattr = ceph_xattr_handlers;
1218
fsc->sb = s;
1219
fsc->max_file_size = 1ULL << 40; /* temp value until we get mdsmap */
1220
1221
s->s_op = &ceph_super_ops;
1222
set_default_d_op(s, &ceph_dentry_ops);
1223
s->s_export_op = &ceph_export_ops;
1224
1225
s->s_time_gran = 1;
1226
s->s_time_min = 0;
1227
s->s_time_max = U32_MAX;
1228
s->s_flags |= SB_NODIRATIME | SB_NOATIME;
1229
s->s_magic = CEPH_SUPER_MAGIC;
1230
1231
ceph_fscrypt_set_ops(s);
1232
1233
ret = set_anon_super_fc(s, fc);
1234
if (ret != 0)
1235
fsc->sb = NULL;
1236
return ret;
1237
}
1238
1239
/*
1240
* share superblock if same fs AND options
1241
*/
1242
static int ceph_compare_super(struct super_block *sb, struct fs_context *fc)
1243
{
1244
struct ceph_fs_client *new = fc->s_fs_info;
1245
struct ceph_mount_options *fsopt = new->mount_options;
1246
struct ceph_options *opt = new->client->options;
1247
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
1248
struct ceph_client *cl = fsc->client;
1249
1250
doutc(cl, "%p\n", sb);
1251
1252
if (compare_mount_options(fsopt, opt, fsc)) {
1253
doutc(cl, "monitor(s)/mount options don't match\n");
1254
return 0;
1255
}
1256
if ((opt->flags & CEPH_OPT_FSID) &&
1257
ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) {
1258
doutc(cl, "fsid doesn't match\n");
1259
return 0;
1260
}
1261
if (fc->sb_flags != (sb->s_flags & ~SB_BORN)) {
1262
doutc(cl, "flags differ\n");
1263
return 0;
1264
}
1265
1266
if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) {
1267
doutc(cl, "client is blocklisted (and CLEANRECOVER is not set)\n");
1268
return 0;
1269
}
1270
1271
if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
1272
doutc(cl, "client has been forcibly unmounted\n");
1273
return 0;
1274
}
1275
1276
return 1;
1277
}
1278
1279
/*
1280
* construct our own bdi so we can control readahead, etc.
1281
*/
1282
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
1283
1284
static int ceph_setup_bdi(struct super_block *sb, struct ceph_fs_client *fsc)
1285
{
1286
int err;
1287
1288
err = super_setup_bdi_name(sb, "ceph-%ld",
1289
atomic_long_inc_return(&bdi_seq));
1290
if (err)
1291
return err;
1292
1293
/* set ra_pages based on rasize mount option? */
1294
sb->s_bdi->ra_pages = fsc->mount_options->rasize >> PAGE_SHIFT;
1295
1296
/* set io_pages based on max osd read size */
1297
sb->s_bdi->io_pages = fsc->mount_options->rsize >> PAGE_SHIFT;
1298
1299
return 0;
1300
}
1301
1302
static int ceph_get_tree(struct fs_context *fc)
1303
{
1304
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
1305
struct ceph_mount_options *fsopt = pctx->opts;
1306
struct super_block *sb;
1307
struct ceph_fs_client *fsc;
1308
struct dentry *res;
1309
int (*compare_super)(struct super_block *, struct fs_context *) =
1310
ceph_compare_super;
1311
int err;
1312
1313
dout("ceph_get_tree\n");
1314
1315
if (!fc->source)
1316
return invalfc(fc, "No source");
1317
if (fsopt->new_dev_syntax && !fsopt->mon_addr)
1318
return invalfc(fc, "No monitor address");
1319
1320
/* create client (which we may/may not use) */
1321
fsc = create_fs_client(pctx->opts, pctx->copts);
1322
pctx->opts = NULL;
1323
pctx->copts = NULL;
1324
if (IS_ERR(fsc)) {
1325
err = PTR_ERR(fsc);
1326
goto out_final;
1327
}
1328
1329
err = ceph_mdsc_init(fsc);
1330
if (err < 0)
1331
goto out;
1332
1333
if (ceph_test_opt(fsc->client, NOSHARE))
1334
compare_super = NULL;
1335
1336
fc->s_fs_info = fsc;
1337
sb = sget_fc(fc, compare_super, ceph_set_super);
1338
fc->s_fs_info = NULL;
1339
if (IS_ERR(sb)) {
1340
err = PTR_ERR(sb);
1341
goto out;
1342
}
1343
1344
if (ceph_sb_to_fs_client(sb) != fsc) {
1345
destroy_fs_client(fsc);
1346
fsc = ceph_sb_to_fs_client(sb);
1347
dout("get_sb got existing client %p\n", fsc);
1348
} else {
1349
dout("get_sb using new client %p\n", fsc);
1350
err = ceph_setup_bdi(sb, fsc);
1351
if (err < 0)
1352
goto out_splat;
1353
}
1354
1355
res = ceph_real_mount(fsc, fc);
1356
if (IS_ERR(res)) {
1357
err = PTR_ERR(res);
1358
goto out_splat;
1359
}
1360
1361
doutc(fsc->client, "root %p inode %p ino %llx.%llx\n", res,
1362
d_inode(res), ceph_vinop(d_inode(res)));
1363
fc->root = fsc->sb->s_root;
1364
return 0;
1365
1366
out_splat:
1367
if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
1368
pr_info("No mds server is up or the cluster is laggy\n");
1369
err = -EHOSTUNREACH;
1370
}
1371
1372
ceph_mdsc_close_sessions(fsc->mdsc);
1373
deactivate_locked_super(sb);
1374
goto out_final;
1375
1376
out:
1377
destroy_fs_client(fsc);
1378
out_final:
1379
dout("ceph_get_tree fail %d\n", err);
1380
return err;
1381
}
1382
1383
static void ceph_free_fc(struct fs_context *fc)
1384
{
1385
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
1386
1387
if (pctx) {
1388
destroy_mount_options(pctx->opts);
1389
ceph_destroy_options(pctx->copts);
1390
kfree(pctx);
1391
}
1392
}
1393
1394
static int ceph_reconfigure_fc(struct fs_context *fc)
1395
{
1396
int err;
1397
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
1398
struct ceph_mount_options *fsopt = pctx->opts;
1399
struct super_block *sb = fc->root->d_sb;
1400
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
1401
1402
err = ceph_apply_test_dummy_encryption(sb, fc, fsopt);
1403
if (err)
1404
return err;
1405
1406
if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
1407
ceph_set_mount_opt(fsc, ASYNC_DIROPS);
1408
else
1409
ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
1410
1411
if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
1412
ceph_set_mount_opt(fsc, SPARSEREAD);
1413
else
1414
ceph_clear_mount_opt(fsc, SPARSEREAD);
1415
1416
if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
1417
kfree(fsc->mount_options->mon_addr);
1418
fsc->mount_options->mon_addr = fsopt->mon_addr;
1419
fsopt->mon_addr = NULL;
1420
pr_notice_client(fsc->client,
1421
"monitor addresses recorded, but not used for reconnection");
1422
}
1423
1424
sync_filesystem(sb);
1425
return 0;
1426
}
1427
1428
static const struct fs_context_operations ceph_context_ops = {
1429
.free = ceph_free_fc,
1430
.parse_param = ceph_parse_mount_param,
1431
.get_tree = ceph_get_tree,
1432
.reconfigure = ceph_reconfigure_fc,
1433
};
1434
1435
/*
1436
* Set up the filesystem mount context.
1437
*/
1438
static int ceph_init_fs_context(struct fs_context *fc)
1439
{
1440
struct ceph_parse_opts_ctx *pctx;
1441
struct ceph_mount_options *fsopt;
1442
1443
pctx = kzalloc(sizeof(*pctx), GFP_KERNEL);
1444
if (!pctx)
1445
return -ENOMEM;
1446
1447
pctx->copts = ceph_alloc_options();
1448
if (!pctx->copts)
1449
goto nomem;
1450
1451
pctx->opts = kzalloc(sizeof(*pctx->opts), GFP_KERNEL);
1452
if (!pctx->opts)
1453
goto nomem;
1454
1455
fsopt = pctx->opts;
1456
fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
1457
1458
fsopt->wsize = CEPH_MAX_WRITE_SIZE;
1459
fsopt->rsize = CEPH_MAX_READ_SIZE;
1460
fsopt->rasize = CEPH_RASIZE_DEFAULT;
1461
fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
1462
if (!fsopt->snapdir_name)
1463
goto nomem;
1464
1465
fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
1466
fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
1467
fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
1468
fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
1469
fsopt->congestion_kb = default_congestion_kb();
1470
1471
#ifdef CONFIG_CEPH_FS_POSIX_ACL
1472
fc->sb_flags |= SB_POSIXACL;
1473
#endif
1474
1475
fc->fs_private = pctx;
1476
fc->ops = &ceph_context_ops;
1477
return 0;
1478
1479
nomem:
1480
destroy_mount_options(pctx->opts);
1481
ceph_destroy_options(pctx->copts);
1482
kfree(pctx);
1483
return -ENOMEM;
1484
}
1485
1486
/*
1487
* Return true if it successfully increases the blocker counter,
1488
* or false if the mdsc is in stopping and flushed state.
1489
*/
1490
static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
1491
{
1492
spin_lock(&mdsc->stopping_lock);
1493
if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
1494
spin_unlock(&mdsc->stopping_lock);
1495
return false;
1496
}
1497
atomic_inc(&mdsc->stopping_blockers);
1498
spin_unlock(&mdsc->stopping_lock);
1499
return true;
1500
}
1501
1502
static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
1503
{
1504
spin_lock(&mdsc->stopping_lock);
1505
if (!atomic_dec_return(&mdsc->stopping_blockers) &&
1506
mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
1507
complete_all(&mdsc->stopping_waiter);
1508
spin_unlock(&mdsc->stopping_lock);
1509
}
1510
1511
/* For metadata IO requests */
1512
bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
1513
struct ceph_mds_session *session)
1514
{
1515
mutex_lock(&session->s_mutex);
1516
inc_session_sequence(session);
1517
mutex_unlock(&session->s_mutex);
1518
1519
return __inc_stopping_blocker(mdsc);
1520
}
1521
1522
void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc)
1523
{
1524
__dec_stopping_blocker(mdsc);
1525
}
1526
1527
/* For data IO requests */
1528
bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc)
1529
{
1530
return __inc_stopping_blocker(mdsc);
1531
}
1532
1533
void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc)
1534
{
1535
__dec_stopping_blocker(mdsc);
1536
}
1537
1538
static void ceph_kill_sb(struct super_block *s)
1539
{
1540
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(s);
1541
struct ceph_client *cl = fsc->client;
1542
struct ceph_mds_client *mdsc = fsc->mdsc;
1543
bool wait;
1544
1545
doutc(cl, "%p\n", s);
1546
1547
ceph_mdsc_pre_umount(mdsc);
1548
flush_fs_workqueues(fsc);
1549
1550
/*
1551
* Though the kill_anon_super() will finally trigger the
1552
* sync_filesystem() anyway, we still need to do it here and
1553
* then bump the stage of shutdown. This will allow us to
1554
* drop any further message, which will increase the inodes'
1555
* i_count reference counters but makes no sense any more,
1556
* from MDSs.
1557
*
1558
* Without this when evicting the inodes it may fail in the
1559
* kill_anon_super(), which will trigger a warning when
1560
* destroying the fscrypt keyring and then possibly trigger
1561
* a further crash in ceph module when the iput() tries to
1562
* evict the inodes later.
1563
*/
1564
sync_filesystem(s);
1565
1566
if (atomic64_read(&mdsc->dirty_folios) > 0) {
1567
wait_queue_head_t *wq = &mdsc->flush_end_wq;
1568
long timeleft = wait_event_killable_timeout(*wq,
1569
atomic64_read(&mdsc->dirty_folios) <= 0,
1570
fsc->client->options->mount_timeout);
1571
if (!timeleft) /* timed out */
1572
pr_warn_client(cl, "umount timed out, %ld\n", timeleft);
1573
else if (timeleft < 0) /* killed */
1574
pr_warn_client(cl, "umount was killed, %ld\n", timeleft);
1575
}
1576
1577
spin_lock(&mdsc->stopping_lock);
1578
mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
1579
wait = !!atomic_read(&mdsc->stopping_blockers);
1580
spin_unlock(&mdsc->stopping_lock);
1581
1582
if (wait && atomic_read(&mdsc->stopping_blockers)) {
1583
long timeleft = wait_for_completion_killable_timeout(
1584
&mdsc->stopping_waiter,
1585
fsc->client->options->mount_timeout);
1586
if (!timeleft) /* timed out */
1587
pr_warn_client(cl, "umount timed out, %ld\n", timeleft);
1588
else if (timeleft < 0) /* killed */
1589
pr_warn_client(cl, "umount was killed, %ld\n", timeleft);
1590
}
1591
1592
mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
1593
kill_anon_super(s);
1594
1595
fsc->client->extra_mon_dispatch = NULL;
1596
ceph_fs_debugfs_cleanup(fsc);
1597
1598
ceph_fscache_unregister_fs(fsc);
1599
1600
destroy_fs_client(fsc);
1601
}
1602
1603
static struct file_system_type ceph_fs_type = {
1604
.owner = THIS_MODULE,
1605
.name = "ceph",
1606
.init_fs_context = ceph_init_fs_context,
1607
.kill_sb = ceph_kill_sb,
1608
.fs_flags = FS_RENAME_DOES_D_MOVE | FS_ALLOW_IDMAP,
1609
};
1610
MODULE_ALIAS_FS("ceph");
1611
1612
int ceph_force_reconnect(struct super_block *sb)
1613
{
1614
struct ceph_fs_client *fsc = ceph_sb_to_fs_client(sb);
1615
int err = 0;
1616
1617
fsc->mount_state = CEPH_MOUNT_RECOVER;
1618
__ceph_umount_begin(fsc);
1619
1620
/* Make sure all page caches get invalidated.
1621
* see remove_session_caps_cb() */
1622
flush_workqueue(fsc->inode_wq);
1623
1624
/* In case that we were blocklisted. This also reset
1625
* all mon/osd connections */
1626
ceph_reset_client_addr(fsc->client);
1627
1628
ceph_osdc_clear_abort_err(&fsc->client->osdc);
1629
1630
fsc->blocklisted = false;
1631
fsc->mount_state = CEPH_MOUNT_MOUNTED;
1632
1633
if (sb->s_root) {
1634
err = __ceph_do_getattr(d_inode(sb->s_root), NULL,
1635
CEPH_STAT_CAP_INODE, true);
1636
}
1637
return err;
1638
}
1639
1640
static int __init init_ceph(void)
1641
{
1642
int ret = init_caches();
1643
if (ret)
1644
goto out;
1645
1646
ceph_flock_init();
1647
ret = register_filesystem(&ceph_fs_type);
1648
if (ret)
1649
goto out_caches;
1650
1651
pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
1652
1653
return 0;
1654
1655
out_caches:
1656
destroy_caches();
1657
out:
1658
return ret;
1659
}
1660
1661
static void __exit exit_ceph(void)
1662
{
1663
dout("exit_ceph\n");
1664
unregister_filesystem(&ceph_fs_type);
1665
destroy_caches();
1666
}
1667
1668
static int param_set_metrics(const char *val, const struct kernel_param *kp)
1669
{
1670
struct ceph_fs_client *fsc;
1671
int ret;
1672
1673
ret = param_set_bool(val, kp);
1674
if (ret) {
1675
pr_err("Failed to parse sending metrics switch value '%s'\n",
1676
val);
1677
return ret;
1678
} else if (!disable_send_metrics) {
1679
// wake up all the mds clients
1680
spin_lock(&ceph_fsc_lock);
1681
list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) {
1682
metric_schedule_delayed(&fsc->mdsc->metric);
1683
}
1684
spin_unlock(&ceph_fsc_lock);
1685
}
1686
1687
return 0;
1688
}
1689
1690
static const struct kernel_param_ops param_ops_metrics = {
1691
.set = param_set_metrics,
1692
.get = param_get_bool,
1693
};
1694
1695
bool disable_send_metrics = false;
1696
module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
1697
MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
1698
1699
/* for both v1 and v2 syntax */
1700
static bool mount_support = true;
1701
static const struct kernel_param_ops param_ops_mount_syntax = {
1702
.get = param_get_bool,
1703
};
1704
module_param_cb(mount_syntax_v1, &param_ops_mount_syntax, &mount_support, 0444);
1705
module_param_cb(mount_syntax_v2, &param_ops_mount_syntax, &mount_support, 0444);
1706
1707
bool enable_unsafe_idmap = false;
1708
module_param(enable_unsafe_idmap, bool, 0644);
1709
MODULE_PARM_DESC(enable_unsafe_idmap,
1710
"Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID");
1711
1712
module_init(init_ceph);
1713
module_exit(exit_ceph);
1714
1715
MODULE_AUTHOR("Sage Weil <[email protected]>");
1716
MODULE_AUTHOR("Yehuda Sadeh <[email protected]>");
1717
MODULE_AUTHOR("Patience Warnick <[email protected]>");
1718
MODULE_DESCRIPTION("Ceph filesystem for Linux");
1719
MODULE_LICENSE("GPL");
1720
1721