CoCalc -- super.c

GitHub Repository: awilliam/linux-vfio
Path: blob/master/fs/ceph/super.c
¹⁷³⁶³ views
1

2
#include <linux/ceph/ceph_debug.h>
3

4
#include <linux/backing-dev.h>
5
#include <linux/ctype.h>
6
#include <linux/fs.h>
7
#include <linux/inet.h>
8
#include <linux/in6.h>
9
#include <linux/module.h>
10
#include <linux/mount.h>
11
#include <linux/parser.h>
12
#include <linux/sched.h>
13
#include <linux/seq_file.h>
14
#include <linux/slab.h>
15
#include <linux/statfs.h>
16
#include <linux/string.h>
17

18
#include "super.h"
19
#include "mds_client.h"
20

21
#include <linux/ceph/decode.h>
22
#include <linux/ceph/mon_client.h>
23
#include <linux/ceph/auth.h>
24
#include <linux/ceph/debugfs.h>
25

26
/*
27
 * Ceph superblock operations
28
 *
29
 * Handle the basics of mounting, unmounting.
30
 */
31

32
/*
33
 * super ops
34
 */
35
static void ceph_put_super(struct super_block *s)
36
{
37
	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
38

39
	dout("put_super\n");
40
	ceph_mdsc_close_sessions(fsc->mdsc);
41

42
	/*
43
	 * ensure we release the bdi before put_anon_super releases
44
	 * the device name.
45
	 */
46
	if (s->s_bdi == &fsc->backing_dev_info) {
47
		bdi_unregister(&fsc->backing_dev_info);
48
		s->s_bdi = NULL;
49
	}
50

51
	return;
52
}
53

54
static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
55
{
56
	struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
57
	struct ceph_monmap *monmap = fsc->client->monc.monmap;
58
	struct ceph_statfs st;
59
	u64 fsid;
60
	int err;
61

62
	dout("statfs\n");
63
	err = ceph_monc_do_statfs(&fsc->client->monc, &st);
64
	if (err < 0)
65
		return err;
66

67
	/* fill in kstatfs */
68
	buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
69

70
	/*
71
	 * express utilization in terms of large blocks to avoid
72
	 * overflow on 32-bit machines.
73
	 */
74
	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
75
	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
76
	buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >>
77
		(CEPH_BLOCK_SHIFT-10);
78
	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
79

80
	buf->f_files = le64_to_cpu(st.num_objects);
81
	buf->f_ffree = -1;
82
	buf->f_namelen = NAME_MAX;
83
	buf->f_frsize = PAGE_CACHE_SIZE;
84

85
	/* leave fsid little-endian, regardless of host endianness */
86
	fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
87
	buf->f_fsid.val[0] = fsid & 0xffffffff;
88
	buf->f_fsid.val[1] = fsid >> 32;
89

90
	return 0;
91
}
92

93

94
static int ceph_sync_fs(struct super_block *sb, int wait)
95
{
96
	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
97

98
	if (!wait) {
99
		dout("sync_fs (non-blocking)\n");
100
		ceph_flush_dirty_caps(fsc->mdsc);
101
		dout("sync_fs (non-blocking) done\n");
102
		return 0;
103
	}
104

105
	dout("sync_fs (blocking)\n");
106
	ceph_osdc_sync(&fsc->client->osdc);
107
	ceph_mdsc_sync(fsc->mdsc);
108
	dout("sync_fs (blocking) done\n");
109
	return 0;
110
}
111

112
/*
113
 * mount options
114
 */
115
enum {
116
	Opt_wsize,
117
	Opt_rsize,
118
	Opt_caps_wanted_delay_min,
119
	Opt_caps_wanted_delay_max,
120
	Opt_cap_release_safety,
121
	Opt_readdir_max_entries,
122
	Opt_readdir_max_bytes,
123
	Opt_congestion_kb,
124
	Opt_last_int,
125
	/* int args above */
126
	Opt_snapdirname,
127
	Opt_last_string,
128
	/* string args above */
129
	Opt_dirstat,
130
	Opt_nodirstat,
131
	Opt_rbytes,
132
	Opt_norbytes,
133
	Opt_noasyncreaddir,
134
	Opt_ino32,
135
};
136

137
static match_table_t fsopt_tokens = {
138
	{Opt_wsize, "wsize=%d"},
139
	{Opt_rsize, "rsize=%d"},
140
	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
141
	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
142
	{Opt_cap_release_safety, "cap_release_safety=%d"},
143
	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
144
	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
145
	{Opt_congestion_kb, "write_congestion_kb=%d"},
146
	/* int args above */
147
	{Opt_snapdirname, "snapdirname=%s"},
148
	/* string args above */
149
	{Opt_dirstat, "dirstat"},
150
	{Opt_nodirstat, "nodirstat"},
151
	{Opt_rbytes, "rbytes"},
152
	{Opt_norbytes, "norbytes"},
153
	{Opt_noasyncreaddir, "noasyncreaddir"},
154
	{Opt_ino32, "ino32"},
155
	{-1, NULL}
156
};
157

158
static int parse_fsopt_token(char *c, void *private)
159
{
160
	struct ceph_mount_options *fsopt = private;
161
	substring_t argstr[MAX_OPT_ARGS];
162
	int token, intval, ret;
163

164
	token = match_token((char *)c, fsopt_tokens, argstr);
165
	if (token < 0)
166
		return -EINVAL;
167

168
	if (token < Opt_last_int) {
169
		ret = match_int(&argstr[0], &intval);
170
		if (ret < 0) {
171
			pr_err("bad mount option arg (not int) "
172
			       "at '%s'\n", c);
173
			return ret;
174
		}
175
		dout("got int token %d val %d\n", token, intval);
176
	} else if (token > Opt_last_int && token < Opt_last_string) {
177
		dout("got string token %d val %s\n", token,
178
		     argstr[0].from);
179
	} else {
180
		dout("got token %d\n", token);
181
	}
182

183
	switch (token) {
184
	case Opt_snapdirname:
185
		kfree(fsopt->snapdir_name);
186
		fsopt->snapdir_name = kstrndup(argstr[0].from,
187
					       argstr[0].to-argstr[0].from,
188
					       GFP_KERNEL);
189
		if (!fsopt->snapdir_name)
190
			return -ENOMEM;
191
		break;
192

193
		/* misc */
194
	case Opt_wsize:
195
		fsopt->wsize = intval;
196
		break;
197
	case Opt_rsize:
198
		fsopt->rsize = intval;
199
		break;
200
	case Opt_caps_wanted_delay_min:
201
		fsopt->caps_wanted_delay_min = intval;
202
		break;
203
	case Opt_caps_wanted_delay_max:
204
		fsopt->caps_wanted_delay_max = intval;
205
		break;
206
	case Opt_readdir_max_entries:
207
		fsopt->max_readdir = intval;
208
		break;
209
	case Opt_readdir_max_bytes:
210
		fsopt->max_readdir_bytes = intval;
211
		break;
212
	case Opt_congestion_kb:
213
		fsopt->congestion_kb = intval;
214
		break;
215
	case Opt_dirstat:
216
		fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
217
		break;
218
	case Opt_nodirstat:
219
		fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
220
		break;
221
	case Opt_rbytes:
222
		fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
223
		break;
224
	case Opt_norbytes:
225
		fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
226
		break;
227
	case Opt_noasyncreaddir:
228
		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
229
		break;
230
	case Opt_ino32:
231
		fsopt->flags |= CEPH_MOUNT_OPT_INO32;
232
		break;
233
	default:
234
		BUG_ON(token);
235
	}
236
	return 0;
237
}
238

239
static void destroy_mount_options(struct ceph_mount_options *args)
240
{
241
	dout("destroy_mount_options %p\n", args);
242
	kfree(args->snapdir_name);
243
	kfree(args);
244
}
245

246
static int strcmp_null(const char *s1, const char *s2)
247
{
248
	if (!s1 && !s2)
249
		return 0;
250
	if (s1 && !s2)
251
		return -1;
252
	if (!s1 && s2)
253
		return 1;
254
	return strcmp(s1, s2);
255
}
256

257
static int compare_mount_options(struct ceph_mount_options *new_fsopt,
258
				 struct ceph_options *new_opt,
259
				 struct ceph_fs_client *fsc)
260
{
261
	struct ceph_mount_options *fsopt1 = new_fsopt;
262
	struct ceph_mount_options *fsopt2 = fsc->mount_options;
263
	int ofs = offsetof(struct ceph_mount_options, snapdir_name);
264
	int ret;
265

266
	ret = memcmp(fsopt1, fsopt2, ofs);
267
	if (ret)
268
		return ret;
269

270
	ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
271
	if (ret)
272
		return ret;
273

274
	return ceph_compare_options(new_opt, fsc->client);
275
}
276

277
static int parse_mount_options(struct ceph_mount_options **pfsopt,
278
			       struct ceph_options **popt,
279
			       int flags, char *options,
280
			       const char *dev_name,
281
			       const char **path)
282
{
283
	struct ceph_mount_options *fsopt;
284
	const char *dev_name_end;
285
	int err = -ENOMEM;
286

287
	fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
288
	if (!fsopt)
289
		return -ENOMEM;
290

291
	dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
292

293
        fsopt->sb_flags = flags;
294
        fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
295

296
        fsopt->rsize = CEPH_RSIZE_DEFAULT;
297
        fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
298
	fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
299
	fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
300
        fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
301
        fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
302
        fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
303
        fsopt->congestion_kb = default_congestion_kb();
304
	
305
        /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
306
        err = -EINVAL;
307
        if (!dev_name)
308
                goto out;
309
        *path = strstr(dev_name, ":/");
310
        if (*path == NULL) {
311
                pr_err("device name is missing path (no :/ in %s)\n",
312
                       dev_name);
313
                goto out;
314
        }
315
	dev_name_end = *path;
316
	dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
317

318
	/* path on server */
319
	*path += 2;
320
	dout("server path '%s'\n", *path);
321

322
	err = ceph_parse_options(popt, options, dev_name, dev_name_end,
323
				 parse_fsopt_token, (void *)fsopt);
324
	if (err)
325
		goto out;
326

327
	/* success */
328
	*pfsopt = fsopt;
329
	return 0;
330

331
out:
332
	destroy_mount_options(fsopt);
333
	return err;
334
}
335

336
/**
337
 * ceph_show_options - Show mount options in /proc/mounts
338
 * @m: seq_file to write to
339
 * @mnt: mount descriptor
340
 */
341
static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
342
{
343
	struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb);
344
	struct ceph_mount_options *fsopt = fsc->mount_options;
345
	struct ceph_options *opt = fsc->client->options;
346

347
	if (opt->flags & CEPH_OPT_FSID)
348
		seq_printf(m, ",fsid=%pU", &opt->fsid);
349
	if (opt->flags & CEPH_OPT_NOSHARE)
350
		seq_puts(m, ",noshare");
351
	if (opt->flags & CEPH_OPT_NOCRC)
352
		seq_puts(m, ",nocrc");
353

354
	if (opt->name)
355
		seq_printf(m, ",name=%s", opt->name);
356
	if (opt->key)
357
		seq_puts(m, ",secret=<hidden>");
358

359
	if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
360
		seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
361
	if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
362
		seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
363
	if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
364
		seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
365
	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
366
		seq_printf(m, ",osdkeepalivetimeout=%d",
367
			   opt->osd_keepalive_timeout);
368

369
	if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
370
		seq_puts(m, ",dirstat");
371
	if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
372
		seq_puts(m, ",norbytes");
373
	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
374
		seq_puts(m, ",noasyncreaddir");
375

376
	if (fsopt->wsize)
377
		seq_printf(m, ",wsize=%d", fsopt->wsize);
378
	if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
379
		seq_printf(m, ",rsize=%d", fsopt->rsize);
380
	if (fsopt->congestion_kb != default_congestion_kb())
381
		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
382
	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
383
		seq_printf(m, ",caps_wanted_delay_min=%d",
384
			 fsopt->caps_wanted_delay_min);
385
	if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
386
		seq_printf(m, ",caps_wanted_delay_max=%d",
387
			   fsopt->caps_wanted_delay_max);
388
	if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
389
		seq_printf(m, ",cap_release_safety=%d",
390
			   fsopt->cap_release_safety);
391
	if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
392
		seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
393
	if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
394
		seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
395
	if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
396
		seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
397
	return 0;
398
}
399

400
/*
401
 * handle any mon messages the standard library doesn't understand.
402
 * return error if we don't either.
403
 */
404
static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
405
{
406
	struct ceph_fs_client *fsc = client->private;
407
	int type = le16_to_cpu(msg->hdr.type);
408

409
	switch (type) {
410
	case CEPH_MSG_MDS_MAP:
411
		ceph_mdsc_handle_map(fsc->mdsc, msg);
412
		return 0;
413

414
	default:
415
		return -1;
416
	}
417
}
418

419
/*
420
 * create a new fs client
421
 */
422
struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
423
					struct ceph_options *opt)
424
{
425
	struct ceph_fs_client *fsc;
426
	int err = -ENOMEM;
427

428
	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
429
	if (!fsc)
430
		return ERR_PTR(-ENOMEM);
431

432
	fsc->client = ceph_create_client(opt, fsc);
433
	if (IS_ERR(fsc->client)) {
434
		err = PTR_ERR(fsc->client);
435
		goto fail;
436
	}
437
	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
438
	fsc->client->supported_features |= CEPH_FEATURE_FLOCK |
439
		CEPH_FEATURE_DIRLAYOUTHASH;
440
	fsc->client->monc.want_mdsmap = 1;
441

442
	fsc->mount_options = fsopt;
443

444
	fsc->sb = NULL;
445
	fsc->mount_state = CEPH_MOUNT_MOUNTING;
446

447
	atomic_long_set(&fsc->writeback_count, 0);
448

449
	err = bdi_init(&fsc->backing_dev_info);
450
	if (err < 0)
451
		goto fail_client;
452

453
	err = -ENOMEM;
454
	/*
455
	 * The number of concurrent works can be high but they don't need
456
	 * to be processed in parallel, limit concurrency.
457
	 */
458
	fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
459
	if (fsc->wb_wq == NULL)
460
		goto fail_bdi;
461
	fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
462
	if (fsc->pg_inv_wq == NULL)
463
		goto fail_wb_wq;
464
	fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
465
	if (fsc->trunc_wq == NULL)
466
		goto fail_pg_inv_wq;
467

468
	/* set up mempools */
469
	err = -ENOMEM;
470
	fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
471
			      fsc->mount_options->wsize >> PAGE_CACHE_SHIFT);
472
	if (!fsc->wb_pagevec_pool)
473
		goto fail_trunc_wq;
474

475
	/* caps */
476
	fsc->min_caps = fsopt->max_readdir;
477

478
	return fsc;
479

480
fail_trunc_wq:
481
	destroy_workqueue(fsc->trunc_wq);
482
fail_pg_inv_wq:
483
	destroy_workqueue(fsc->pg_inv_wq);
484
fail_wb_wq:
485
	destroy_workqueue(fsc->wb_wq);
486
fail_bdi:
487
	bdi_destroy(&fsc->backing_dev_info);
488
fail_client:
489
	ceph_destroy_client(fsc->client);
490
fail:
491
	kfree(fsc);
492
	return ERR_PTR(err);
493
}
494

495
void destroy_fs_client(struct ceph_fs_client *fsc)
496
{
497
	dout("destroy_fs_client %p\n", fsc);
498

499
	destroy_workqueue(fsc->wb_wq);
500
	destroy_workqueue(fsc->pg_inv_wq);
501
	destroy_workqueue(fsc->trunc_wq);
502

503
	bdi_destroy(&fsc->backing_dev_info);
504

505
	mempool_destroy(fsc->wb_pagevec_pool);
506

507
	destroy_mount_options(fsc->mount_options);
508

509
	ceph_fs_debugfs_cleanup(fsc);
510

511
	ceph_destroy_client(fsc->client);
512

513
	kfree(fsc);
514
	dout("destroy_fs_client %p done\n", fsc);
515
}
516

517
/*
518
 * caches
519
 */
520
struct kmem_cache *ceph_inode_cachep;
521
struct kmem_cache *ceph_cap_cachep;
522
struct kmem_cache *ceph_dentry_cachep;
523
struct kmem_cache *ceph_file_cachep;
524

525
static void ceph_inode_init_once(void *foo)
526
{
527
	struct ceph_inode_info *ci = foo;
528
	inode_init_once(&ci->vfs_inode);
529
}
530

531
static int __init init_caches(void)
532
{
533
	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
534
				      sizeof(struct ceph_inode_info),
535
				      __alignof__(struct ceph_inode_info),
536
				      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
537
				      ceph_inode_init_once);
538
	if (ceph_inode_cachep == NULL)
539
		return -ENOMEM;
540

541
	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
542
				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
543
	if (ceph_cap_cachep == NULL)
544
		goto bad_cap;
545

546
	ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
547
					SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
548
	if (ceph_dentry_cachep == NULL)
549
		goto bad_dentry;
550

551
	ceph_file_cachep = KMEM_CACHE(ceph_file_info,
552
				      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
553
	if (ceph_file_cachep == NULL)
554
		goto bad_file;
555

556
	return 0;
557

558
bad_file:
559
	kmem_cache_destroy(ceph_dentry_cachep);
560
bad_dentry:
561
	kmem_cache_destroy(ceph_cap_cachep);
562
bad_cap:
563
	kmem_cache_destroy(ceph_inode_cachep);
564
	return -ENOMEM;
565
}
566

567
static void destroy_caches(void)
568
{
569
	kmem_cache_destroy(ceph_inode_cachep);
570
	kmem_cache_destroy(ceph_cap_cachep);
571
	kmem_cache_destroy(ceph_dentry_cachep);
572
	kmem_cache_destroy(ceph_file_cachep);
573
}
574

575

576
/*
577
 * ceph_umount_begin - initiate forced umount.  Tear down down the
578
 * mount, skipping steps that may hang while waiting for server(s).
579
 */
580
static void ceph_umount_begin(struct super_block *sb)
581
{
582
	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
583

584
	dout("ceph_umount_begin - starting forced umount\n");
585
	if (!fsc)
586
		return;
587
	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
588
	return;
589
}
590

591
static const struct super_operations ceph_super_ops = {
592
	.alloc_inode	= ceph_alloc_inode,
593
	.destroy_inode	= ceph_destroy_inode,
594
	.write_inode    = ceph_write_inode,
595
	.sync_fs        = ceph_sync_fs,
596
	.put_super	= ceph_put_super,
597
	.show_options   = ceph_show_options,
598
	.statfs		= ceph_statfs,
599
	.umount_begin   = ceph_umount_begin,
600
};
601

602
/*
603
 * Bootstrap mount by opening the root directory.  Note the mount
604
 * @started time from caller, and time out if this takes too long.
605
 */
606
static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
607
				       const char *path,
608
				       unsigned long started)
609
{
610
	struct ceph_mds_client *mdsc = fsc->mdsc;
611
	struct ceph_mds_request *req = NULL;
612
	int err;
613
	struct dentry *root;
614

615
	/* open dir */
616
	dout("open_root_inode opening '%s'\n", path);
617
	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
618
	if (IS_ERR(req))
619
		return ERR_CAST(req);
620
	req->r_path1 = kstrdup(path, GFP_NOFS);
621
	req->r_ino1.ino = CEPH_INO_ROOT;
622
	req->r_ino1.snap = CEPH_NOSNAP;
623
	req->r_started = started;
624
	req->r_timeout = fsc->client->options->mount_timeout * HZ;
625
	req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
626
	req->r_num_caps = 2;
627
	err = ceph_mdsc_do_request(mdsc, NULL, req);
628
	if (err == 0) {
629
		dout("open_root_inode success\n");
630
		if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
631
		    fsc->sb->s_root == NULL)
632
			root = d_alloc_root(req->r_target_inode);
633
		else
634
			root = d_obtain_alias(req->r_target_inode);
635
		req->r_target_inode = NULL;
636
		dout("open_root_inode success, root dentry is %p\n", root);
637
	} else {
638
		root = ERR_PTR(err);
639
	}
640
	ceph_mdsc_put_request(req);
641
	return root;
642
}
643

644

645

646

647
/*
648
 * mount: join the ceph cluster, and open root directory.
649
 */
650
static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
651
		      const char *path)
652
{
653
	int err;
654
	unsigned long started = jiffies;  /* note the start time */
655
	struct dentry *root;
656
	int first = 0;   /* first vfsmount for this super_block */
657

658
	dout("mount start\n");
659
	mutex_lock(&fsc->client->mount_mutex);
660

661
	err = __ceph_open_session(fsc->client, started);
662
	if (err < 0)
663
		goto out;
664

665
	dout("mount opening root\n");
666
	root = open_root_dentry(fsc, "", started);
667
	if (IS_ERR(root)) {
668
		err = PTR_ERR(root);
669
		goto out;
670
	}
671
	if (fsc->sb->s_root) {
672
		dput(root);
673
	} else {
674
		fsc->sb->s_root = root;
675
		first = 1;
676

677
		err = ceph_fs_debugfs_init(fsc);
678
		if (err < 0)
679
			goto fail;
680
	}
681

682
	if (path[0] == 0) {
683
		dget(root);
684
	} else {
685
		dout("mount opening base mountpoint\n");
686
		root = open_root_dentry(fsc, path, started);
687
		if (IS_ERR(root)) {
688
			err = PTR_ERR(root);
689
			goto fail;
690
		}
691
	}
692

693
	fsc->mount_state = CEPH_MOUNT_MOUNTED;
694
	dout("mount success\n");
695
	mutex_unlock(&fsc->client->mount_mutex);
696
	return root;
697

698
out:
699
	mutex_unlock(&fsc->client->mount_mutex);
700
	return ERR_PTR(err);
701

702
fail:
703
	if (first) {
704
		dput(fsc->sb->s_root);
705
		fsc->sb->s_root = NULL;
706
	}
707
	goto out;
708
}
709

710
static int ceph_set_super(struct super_block *s, void *data)
711
{
712
	struct ceph_fs_client *fsc = data;
713
	int ret;
714

715
	dout("set_super %p data %p\n", s, data);
716

717
	s->s_flags = fsc->mount_options->sb_flags;
718
	s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
719

720
	s->s_fs_info = fsc;
721
	fsc->sb = s;
722

723
	s->s_op = &ceph_super_ops;
724
	s->s_export_op = &ceph_export_ops;
725

726
	s->s_time_gran = 1000;  /* 1000 ns == 1 us */
727

728
	ret = set_anon_super(s, NULL);  /* what is that second arg for? */
729
	if (ret != 0)
730
		goto fail;
731

732
	return ret;
733

734
fail:
735
	s->s_fs_info = NULL;
736
	fsc->sb = NULL;
737
	return ret;
738
}
739

740
/*
741
 * share superblock if same fs AND options
742
 */
743
static int ceph_compare_super(struct super_block *sb, void *data)
744
{
745
	struct ceph_fs_client *new = data;
746
	struct ceph_mount_options *fsopt = new->mount_options;
747
	struct ceph_options *opt = new->client->options;
748
	struct ceph_fs_client *other = ceph_sb_to_client(sb);
749

750
	dout("ceph_compare_super %p\n", sb);
751

752
	if (compare_mount_options(fsopt, opt, other)) {
753
		dout("monitor(s)/mount options don't match\n");
754
		return 0;
755
	}
756
	if ((opt->flags & CEPH_OPT_FSID) &&
757
	    ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
758
		dout("fsid doesn't match\n");
759
		return 0;
760
	}
761
	if (fsopt->sb_flags != other->mount_options->sb_flags) {
762
		dout("flags differ\n");
763
		return 0;
764
	}
765
	return 1;
766
}
767

768
/*
769
 * construct our own bdi so we can control readahead, etc.
770
 */
771
static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
772

773
static int ceph_register_bdi(struct super_block *sb,
774
			     struct ceph_fs_client *fsc)
775
{
776
	int err;
777

778
	/* set ra_pages based on rsize mount option? */
779
	if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
780
		fsc->backing_dev_info.ra_pages =
781
			(fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
782
			>> PAGE_SHIFT;
783
	err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
784
			   atomic_long_inc_return(&bdi_seq));
785
	if (!err)
786
		sb->s_bdi = &fsc->backing_dev_info;
787
	return err;
788
}
789

790
static struct dentry *ceph_mount(struct file_system_type *fs_type,
791
		       int flags, const char *dev_name, void *data)
792
{
793
	struct super_block *sb;
794
	struct ceph_fs_client *fsc;
795
	struct dentry *res;
796
	int err;
797
	int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
798
	const char *path = NULL;
799
	struct ceph_mount_options *fsopt = NULL;
800
	struct ceph_options *opt = NULL;
801

802
	dout("ceph_mount\n");
803
	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
804
	if (err < 0) {
805
		res = ERR_PTR(err);
806
		goto out_final;
807
	}
808

809
	/* create client (which we may/may not use) */
810
	fsc = create_fs_client(fsopt, opt);
811
	if (IS_ERR(fsc)) {
812
		res = ERR_CAST(fsc);
813
		kfree(fsopt);
814
		kfree(opt);
815
		goto out_final;
816
	}
817

818
	err = ceph_mdsc_init(fsc);
819
	if (err < 0) {
820
		res = ERR_PTR(err);
821
		goto out;
822
	}
823

824
	if (ceph_test_opt(fsc->client, NOSHARE))
825
		compare_super = NULL;
826
	sb = sget(fs_type, compare_super, ceph_set_super, fsc);
827
	if (IS_ERR(sb)) {
828
		res = ERR_CAST(sb);
829
		goto out;
830
	}
831

832
	if (ceph_sb_to_client(sb) != fsc) {
833
		ceph_mdsc_destroy(fsc);
834
		destroy_fs_client(fsc);
835
		fsc = ceph_sb_to_client(sb);
836
		dout("get_sb got existing client %p\n", fsc);
837
	} else {
838
		dout("get_sb using new client %p\n", fsc);
839
		err = ceph_register_bdi(sb, fsc);
840
		if (err < 0) {
841
			res = ERR_PTR(err);
842
			goto out_splat;
843
		}
844
	}
845

846
	res = ceph_real_mount(fsc, path);
847
	if (IS_ERR(res))
848
		goto out_splat;
849
	dout("root %p inode %p ino %llx.%llx\n", res,
850
	     res->d_inode, ceph_vinop(res->d_inode));
851
	return res;
852

853
out_splat:
854
	ceph_mdsc_close_sessions(fsc->mdsc);
855
	deactivate_locked_super(sb);
856
	goto out_final;
857

858
out:
859
	ceph_mdsc_destroy(fsc);
860
	destroy_fs_client(fsc);
861
out_final:
862
	dout("ceph_mount fail %ld\n", PTR_ERR(res));
863
	return res;
864
}
865

866
static void ceph_kill_sb(struct super_block *s)
867
{
868
	struct ceph_fs_client *fsc = ceph_sb_to_client(s);
869
	dout("kill_sb %p\n", s);
870
	ceph_mdsc_pre_umount(fsc->mdsc);
871
	kill_anon_super(s);    /* will call put_super after sb is r/o */
872
	ceph_mdsc_destroy(fsc);
873
	destroy_fs_client(fsc);
874
}
875

876
static struct file_system_type ceph_fs_type = {
877
	.owner		= THIS_MODULE,
878
	.name		= "ceph",
879
	.mount		= ceph_mount,
880
	.kill_sb	= ceph_kill_sb,
881
	.fs_flags	= FS_RENAME_DOES_D_MOVE,
882
};
883

884
#define _STRINGIFY(x) #x
885
#define STRINGIFY(x) _STRINGIFY(x)
886

887
static int __init init_ceph(void)
888
{
889
	int ret = init_caches();
890
	if (ret)
891
		goto out;
892

893
	ret = register_filesystem(&ceph_fs_type);
894
	if (ret)
895
		goto out_icache;
896

897
	pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
898

899
	return 0;
900

901
out_icache:
902
	destroy_caches();
903
out:
904
	return ret;
905
}
906

907
static void __exit exit_ceph(void)
908
{
909
	dout("exit_ceph\n");
910
	unregister_filesystem(&ceph_fs_type);
911
	destroy_caches();
912
}
913

914
module_init(init_ceph);
915
module_exit(exit_ceph);
916

917
MODULE_AUTHOR("Sage Weil <[email protected]>");
918
MODULE_AUTHOR("Yehuda Sadeh <[email protected]>");
919
MODULE_AUTHOR("Patience Warnick <[email protected]>");
920
MODULE_DESCRIPTION("Ceph filesystem for Linux");
921
MODULE_LICENSE("GPL");
922

923
Product

Resources

Company