CoCalc -- locks.c

GitHub Repository: torvalds/linux
Path: blob/master/fs/ceph/locks.c
²⁶²⁸³ views
1
// SPDX-License-Identifier: GPL-2.0
2
#include <linux/ceph/ceph_debug.h>
3

4
#include <linux/file.h>
5
#include <linux/namei.h>
6
#include <linux/random.h>
7

8
#include "super.h"
9
#include "mds_client.h"
10
#include <linux/filelock.h>
11
#include <linux/ceph/pagelist.h>
12

13
static u64 lock_secret;
14
static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
15
                                         struct ceph_mds_request *req);
16

17
static inline u64 secure_addr(void *addr)
18
{
19
	u64 v = lock_secret ^ (u64)(unsigned long)addr;
20
	/*
21
	 * Set the most significant bit, so that MDS knows the 'owner'
22
	 * is sufficient to identify the owner of lock. (old code uses
23
	 * both 'owner' and 'pid')
24
	 */
25
	v |= (1ULL << 63);
26
	return v;
27
}
28

29
void __init ceph_flock_init(void)
30
{
31
	get_random_bytes(&lock_secret, sizeof(lock_secret));
32
}
33

34
static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
35
{
36
	struct inode *inode = file_inode(dst->c.flc_file);
37
	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
38
	dst->fl_u.ceph.inode = igrab(inode);
39
}
40

41
/*
42
 * Do not use the 'fl->fl_file' in release function, which
43
 * is possibly already released by another thread.
44
 */
45
static void ceph_fl_release_lock(struct file_lock *fl)
46
{
47
	struct inode *inode = fl->fl_u.ceph.inode;
48
	struct ceph_inode_info *ci;
49

50
	/*
51
	 * If inode is NULL it should be a request file_lock,
52
	 * nothing we can do.
53
	 */
54
	if (!inode)
55
		return;
56

57
	ci = ceph_inode(inode);
58
	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
59
		/* clear error when all locks are released */
60
		spin_lock(&ci->i_ceph_lock);
61
		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
62
		spin_unlock(&ci->i_ceph_lock);
63
	}
64
	fl->fl_u.ceph.inode = NULL;
65
	iput(inode);
66
}
67

68
static const struct file_lock_operations ceph_fl_lock_ops = {
69
	.fl_copy_lock = ceph_fl_copy_lock,
70
	.fl_release_private = ceph_fl_release_lock,
71
};
72

73
/*
74
 * Implement fcntl and flock locking functions.
75
 */
76
static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
77
			     int cmd, u8 wait, struct file_lock *fl)
78
{
79
	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
80
	struct ceph_client *cl = mdsc->fsc->client;
81
	struct ceph_mds_request *req;
82
	int err;
83
	u64 length = 0;
84
	u64 owner;
85

86
	if (operation == CEPH_MDS_OP_SETFILELOCK) {
87
		/*
88
		 * increasing i_filelock_ref closes race window between
89
		 * handling request reply and adding file_lock struct to
90
		 * inode. Otherwise, auth caps may get trimmed in the
91
		 * window. Caller function will decrease the counter.
92
		 */
93
		fl->fl_ops = &ceph_fl_lock_ops;
94
		fl->fl_ops->fl_copy_lock(fl, NULL);
95
	}
96

97
	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
98
		wait = 0;
99

100
	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
101
	if (IS_ERR(req))
102
		return PTR_ERR(req);
103
	req->r_inode = inode;
104
	ihold(inode);
105
	req->r_num_caps = 1;
106

107
	/* mds requires start and length rather than start and end */
108
	if (LLONG_MAX == fl->fl_end)
109
		length = 0;
110
	else
111
		length = fl->fl_end - fl->fl_start + 1;
112

113
	owner = secure_addr(fl->c.flc_owner);
114

115
	doutc(cl, "rule: %d, op: %d, owner: %llx, pid: %llu, "
116
		    "start: %llu, length: %llu, wait: %d, type: %d\n",
117
		    (int)lock_type, (int)operation, owner,
118
		    (u64) fl->c.flc_pid,
119
		    fl->fl_start, length, wait, fl->c.flc_type);
120

121
	req->r_args.filelock_change.rule = lock_type;
122
	req->r_args.filelock_change.type = cmd;
123
	req->r_args.filelock_change.owner = cpu_to_le64(owner);
124
	req->r_args.filelock_change.pid = cpu_to_le64((u64) fl->c.flc_pid);
125
	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
126
	req->r_args.filelock_change.length = cpu_to_le64(length);
127
	req->r_args.filelock_change.wait = wait;
128

129
	err = ceph_mdsc_submit_request(mdsc, inode, req);
130
	if (!err)
131
		err = ceph_mdsc_wait_request(mdsc, req, wait ?
132
					ceph_lock_wait_for_completion : NULL);
133
	if (!err && operation == CEPH_MDS_OP_GETFILELOCK) {
134
		fl->c.flc_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid);
135
		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
136
			fl->c.flc_type = F_RDLCK;
137
		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
138
			fl->c.flc_type = F_WRLCK;
139
		else
140
			fl->c.flc_type = F_UNLCK;
141

142
		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
143
		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
144
						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
145
		if (length >= 1)
146
			fl->fl_end = length -1;
147
		else
148
			fl->fl_end = 0;
149

150
	}
151
	ceph_mdsc_put_request(req);
152
	doutc(cl, "rule: %d, op: %d, pid: %llu, start: %llu, "
153
	      "length: %llu, wait: %d, type: %d, err code %d\n",
154
	      (int)lock_type, (int)operation, (u64) fl->c.flc_pid,
155
	      fl->fl_start, length, wait, fl->c.flc_type, err);
156
	return err;
157
}
158

159
static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
160
                                         struct ceph_mds_request *req)
161
{
162
	struct ceph_client *cl = mdsc->fsc->client;
163
	struct ceph_mds_request *intr_req;
164
	struct inode *inode = req->r_inode;
165
	int err, lock_type;
166

167
	BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
168
	if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
169
		lock_type = CEPH_LOCK_FCNTL_INTR;
170
	else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
171
		lock_type = CEPH_LOCK_FLOCK_INTR;
172
	else
173
		BUG_ON(1);
174
	BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
175

176
	err = wait_for_completion_interruptible(&req->r_completion);
177
	if (!err)
178
		return 0;
179

180
	doutc(cl, "request %llu was interrupted\n", req->r_tid);
181

182
	mutex_lock(&mdsc->mutex);
183
	if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
184
		err = 0;
185
	} else {
186
		/*
187
		 * ensure we aren't running concurrently with
188
		 * ceph_fill_trace or ceph_readdir_prepopulate, which
189
		 * rely on locks (dir mutex) held by our caller.
190
		 */
191
		mutex_lock(&req->r_fill_mutex);
192
		req->r_err = err;
193
		set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
194
		mutex_unlock(&req->r_fill_mutex);
195

196
		if (!req->r_session) {
197
			// haven't sent the request
198
			err = 0;
199
		}
200
	}
201
	mutex_unlock(&mdsc->mutex);
202
	if (!err)
203
		return 0;
204

205
	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
206
					    USE_AUTH_MDS);
207
	if (IS_ERR(intr_req))
208
		return PTR_ERR(intr_req);
209

210
	intr_req->r_inode = inode;
211
	ihold(inode);
212
	intr_req->r_num_caps = 1;
213

214
	intr_req->r_args.filelock_change = req->r_args.filelock_change;
215
	intr_req->r_args.filelock_change.rule = lock_type;
216
	intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
217

218
	err = ceph_mdsc_do_request(mdsc, inode, intr_req);
219
	ceph_mdsc_put_request(intr_req);
220

221
	if (err && err != -ERESTARTSYS)
222
		return err;
223

224
	wait_for_completion_killable(&req->r_safe_completion);
225
	return 0;
226
}
227

228
static int try_unlock_file(struct file *file, struct file_lock *fl)
229
{
230
	int err;
231
	unsigned int orig_flags = fl->c.flc_flags;
232
	fl->c.flc_flags |= FL_EXISTS;
233
	err = locks_lock_file_wait(file, fl);
234
	fl->c.flc_flags = orig_flags;
235
	if (err == -ENOENT) {
236
		if (!(orig_flags & FL_EXISTS))
237
			err = 0;
238
		return err;
239
	}
240
	return 1;
241
}
242

243
/*
244
 * Attempt to set an fcntl lock.
245
 * For now, this just goes away to the server. Later it may be more awesome.
246
 */
247
int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
248
{
249
	struct inode *inode = file_inode(file);
250
	struct ceph_inode_info *ci = ceph_inode(inode);
251
	struct ceph_client *cl = ceph_inode_to_client(inode);
252
	int err = 0;
253
	u16 op = CEPH_MDS_OP_SETFILELOCK;
254
	u8 wait = 0;
255
	u8 lock_cmd;
256

257
	if (!(fl->c.flc_flags & FL_POSIX))
258
		return -ENOLCK;
259

260
	if (ceph_inode_is_shutdown(inode))
261
		return -ESTALE;
262

263
	doutc(cl, "fl_owner: %p\n", fl->c.flc_owner);
264

265
	/* set wait bit as appropriate, then make command as Ceph expects it*/
266
	if (IS_GETLK(cmd))
267
		op = CEPH_MDS_OP_GETFILELOCK;
268
	else if (IS_SETLKW(cmd))
269
		wait = 1;
270

271
	spin_lock(&ci->i_ceph_lock);
272
	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
273
		err = -EIO;
274
	}
275
	spin_unlock(&ci->i_ceph_lock);
276
	if (err < 0) {
277
		if (op == CEPH_MDS_OP_SETFILELOCK && lock_is_unlock(fl))
278
			posix_lock_file(file, fl, NULL);
279
		return err;
280
	}
281

282
	if (lock_is_read(fl))
283
		lock_cmd = CEPH_LOCK_SHARED;
284
	else if (lock_is_write(fl))
285
		lock_cmd = CEPH_LOCK_EXCL;
286
	else
287
		lock_cmd = CEPH_LOCK_UNLOCK;
288

289
	if (op == CEPH_MDS_OP_SETFILELOCK && lock_is_unlock(fl)) {
290
		err = try_unlock_file(file, fl);
291
		if (err <= 0)
292
			return err;
293
	}
294

295
	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
296
	if (!err) {
297
		if (op == CEPH_MDS_OP_SETFILELOCK && F_UNLCK != fl->c.flc_type) {
298
			doutc(cl, "locking locally\n");
299
			err = posix_lock_file(file, fl, NULL);
300
			if (err) {
301
				/* undo! This should only happen if
302
				 * the kernel detects local
303
				 * deadlock. */
304
				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
305
						  CEPH_LOCK_UNLOCK, 0, fl);
306
				doutc(cl, "got %d on posix_lock_file, undid lock\n",
307
				      err);
308
			}
309
		}
310
	}
311
	return err;
312
}
313

314
int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
315
{
316
	struct inode *inode = file_inode(file);
317
	struct ceph_inode_info *ci = ceph_inode(inode);
318
	struct ceph_client *cl = ceph_inode_to_client(inode);
319
	int err = 0;
320
	u8 wait = 0;
321
	u8 lock_cmd;
322

323
	if (!(fl->c.flc_flags & FL_FLOCK))
324
		return -ENOLCK;
325

326
	if (ceph_inode_is_shutdown(inode))
327
		return -ESTALE;
328

329
	doutc(cl, "fl_file: %p\n", fl->c.flc_file);
330

331
	spin_lock(&ci->i_ceph_lock);
332
	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
333
		err = -EIO;
334
	}
335
	spin_unlock(&ci->i_ceph_lock);
336
	if (err < 0) {
337
		if (lock_is_unlock(fl))
338
			locks_lock_file_wait(file, fl);
339
		return err;
340
	}
341

342
	if (IS_SETLKW(cmd))
343
		wait = 1;
344

345
	if (lock_is_read(fl))
346
		lock_cmd = CEPH_LOCK_SHARED;
347
	else if (lock_is_write(fl))
348
		lock_cmd = CEPH_LOCK_EXCL;
349
	else
350
		lock_cmd = CEPH_LOCK_UNLOCK;
351

352
	if (lock_is_unlock(fl)) {
353
		err = try_unlock_file(file, fl);
354
		if (err <= 0)
355
			return err;
356
	}
357

358
	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
359
				inode, lock_cmd, wait, fl);
360
	if (!err && F_UNLCK != fl->c.flc_type) {
361
		err = locks_lock_file_wait(file, fl);
362
		if (err) {
363
			ceph_lock_message(CEPH_LOCK_FLOCK,
364
					  CEPH_MDS_OP_SETFILELOCK,
365
					  inode, CEPH_LOCK_UNLOCK, 0, fl);
366
			doutc(cl, "got %d on locks_lock_file_wait, undid lock\n",
367
			      err);
368
		}
369
	}
370
	return err;
371
}
372

373
/*
374
 * Fills in the passed counter variables, so you can prepare pagelist metadata
375
 * before calling ceph_encode_locks.
376
 */
377
void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
378
{
379
	struct ceph_client *cl = ceph_inode_to_client(inode);
380
	struct file_lock *lock;
381
	struct file_lock_context *ctx;
382

383
	*fcntl_count = 0;
384
	*flock_count = 0;
385

386
	ctx = locks_inode_context(inode);
387
	if (ctx) {
388
		spin_lock(&ctx->flc_lock);
389
		for_each_file_lock(lock, &ctx->flc_posix)
390
			++(*fcntl_count);
391
		for_each_file_lock(lock, &ctx->flc_flock)
392
			++(*flock_count);
393
		spin_unlock(&ctx->flc_lock);
394
	}
395
	doutc(cl, "counted %d flock locks and %d fcntl locks\n",
396
	      *flock_count, *fcntl_count);
397
}
398

399
/*
400
 * Given a pointer to a lock, convert it to a ceph filelock
401
 */
402
static int lock_to_ceph_filelock(struct inode *inode,
403
				 struct file_lock *lock,
404
				 struct ceph_filelock *cephlock)
405
{
406
	struct ceph_client *cl = ceph_inode_to_client(inode);
407
	int err = 0;
408

409
	cephlock->start = cpu_to_le64(lock->fl_start);
410
	cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
411
	cephlock->client = cpu_to_le64(0);
412
	cephlock->pid = cpu_to_le64((u64) lock->c.flc_pid);
413
	cephlock->owner = cpu_to_le64(secure_addr(lock->c.flc_owner));
414

415
	switch (lock->c.flc_type) {
416
	case F_RDLCK:
417
		cephlock->type = CEPH_LOCK_SHARED;
418
		break;
419
	case F_WRLCK:
420
		cephlock->type = CEPH_LOCK_EXCL;
421
		break;
422
	case F_UNLCK:
423
		cephlock->type = CEPH_LOCK_UNLOCK;
424
		break;
425
	default:
426
		doutc(cl, "Have unknown lock type %d\n",
427
		      lock->c.flc_type);
428
		err = -EINVAL;
429
	}
430

431
	return err;
432
}
433

434
/*
435
 * Encode the flock and fcntl locks for the given inode into the ceph_filelock
436
 * array. Must be called with inode->i_lock already held.
437
 * If we encounter more of a specific lock type than expected, return -ENOSPC.
438
 */
439
int ceph_encode_locks_to_buffer(struct inode *inode,
440
				struct ceph_filelock *flocks,
441
				int num_fcntl_locks, int num_flock_locks)
442
{
443
	struct file_lock *lock;
444
	struct file_lock_context *ctx = locks_inode_context(inode);
445
	struct ceph_client *cl = ceph_inode_to_client(inode);
446
	int err = 0;
447
	int seen_fcntl = 0;
448
	int seen_flock = 0;
449
	int l = 0;
450

451
	doutc(cl, "encoding %d flock and %d fcntl locks\n", num_flock_locks,
452
	      num_fcntl_locks);
453

454
	if (!ctx)
455
		return 0;
456

457
	spin_lock(&ctx->flc_lock);
458
	for_each_file_lock(lock, &ctx->flc_posix) {
459
		++seen_fcntl;
460
		if (seen_fcntl > num_fcntl_locks) {
461
			err = -ENOSPC;
462
			goto fail;
463
		}
464
		err = lock_to_ceph_filelock(inode, lock, &flocks[l]);
465
		if (err)
466
			goto fail;
467
		++l;
468
	}
469
	for_each_file_lock(lock, &ctx->flc_flock) {
470
		++seen_flock;
471
		if (seen_flock > num_flock_locks) {
472
			err = -ENOSPC;
473
			goto fail;
474
		}
475
		err = lock_to_ceph_filelock(inode, lock, &flocks[l]);
476
		if (err)
477
			goto fail;
478
		++l;
479
	}
480
fail:
481
	spin_unlock(&ctx->flc_lock);
482
	return err;
483
}
484

485
/*
486
 * Copy the encoded flock and fcntl locks into the pagelist.
487
 * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
488
 * sequential flock locks.
489
 * Returns zero on success.
490
 */
491
int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
492
			   struct ceph_pagelist *pagelist,
493
			   int num_fcntl_locks, int num_flock_locks)
494
{
495
	int err = 0;
496
	__le32 nlocks;
497

498
	nlocks = cpu_to_le32(num_fcntl_locks);
499
	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
500
	if (err)
501
		goto out_fail;
502

503
	if (num_fcntl_locks > 0) {
504
		err = ceph_pagelist_append(pagelist, flocks,
505
					   num_fcntl_locks * sizeof(*flocks));
506
		if (err)
507
			goto out_fail;
508
	}
509

510
	nlocks = cpu_to_le32(num_flock_locks);
511
	err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
512
	if (err)
513
		goto out_fail;
514

515
	if (num_flock_locks > 0) {
516
		err = ceph_pagelist_append(pagelist, &flocks[num_fcntl_locks],
517
					   num_flock_locks * sizeof(*flocks));
518
	}
519
out_fail:
520
	return err;
521
}
522

523
Product

Resources

Company