Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/fs/dlm/plock.c
26278 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
4
*/
5
6
#include <linux/fs.h>
7
#include <linux/filelock.h>
8
#include <linux/miscdevice.h>
9
#include <linux/poll.h>
10
#include <linux/dlm.h>
11
#include <linux/dlm_plock.h>
12
#include <linux/slab.h>
13
14
#include <trace/events/dlm.h>
15
16
#include "dlm_internal.h"
17
#include "lockspace.h"
18
19
static DEFINE_SPINLOCK(ops_lock);
20
static LIST_HEAD(send_list);
21
static LIST_HEAD(recv_list);
22
static DECLARE_WAIT_QUEUE_HEAD(send_wq);
23
static DECLARE_WAIT_QUEUE_HEAD(recv_wq);
24
25
struct plock_async_data {
26
void *fl;
27
void *file;
28
struct file_lock flc;
29
int (*callback)(struct file_lock *fl, int result);
30
};
31
32
struct plock_op {
33
struct list_head list;
34
int done;
35
struct dlm_plock_info info;
36
/* if set indicates async handling */
37
struct plock_async_data *data;
38
};
39
40
static inline void set_version(struct dlm_plock_info *info)
41
{
42
info->version[0] = DLM_PLOCK_VERSION_MAJOR;
43
info->version[1] = DLM_PLOCK_VERSION_MINOR;
44
info->version[2] = DLM_PLOCK_VERSION_PATCH;
45
}
46
47
static struct plock_op *plock_lookup_waiter(const struct dlm_plock_info *info)
48
{
49
struct plock_op *op = NULL, *iter;
50
51
list_for_each_entry(iter, &recv_list, list) {
52
if (iter->info.fsid == info->fsid &&
53
iter->info.number == info->number &&
54
iter->info.owner == info->owner &&
55
iter->info.pid == info->pid &&
56
iter->info.start == info->start &&
57
iter->info.end == info->end &&
58
iter->info.ex == info->ex &&
59
iter->info.wait) {
60
op = iter;
61
break;
62
}
63
}
64
65
return op;
66
}
67
68
static int check_version(struct dlm_plock_info *info)
69
{
70
if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
71
(DLM_PLOCK_VERSION_MINOR < info->version[1])) {
72
log_print("plock device version mismatch: "
73
"kernel (%u.%u.%u), user (%u.%u.%u)",
74
DLM_PLOCK_VERSION_MAJOR,
75
DLM_PLOCK_VERSION_MINOR,
76
DLM_PLOCK_VERSION_PATCH,
77
info->version[0],
78
info->version[1],
79
info->version[2]);
80
return -EINVAL;
81
}
82
return 0;
83
}
84
85
static void dlm_release_plock_op(struct plock_op *op)
86
{
87
kfree(op->data);
88
kfree(op);
89
}
90
91
static void send_op(struct plock_op *op)
92
{
93
set_version(&op->info);
94
spin_lock(&ops_lock);
95
list_add_tail(&op->list, &send_list);
96
spin_unlock(&ops_lock);
97
wake_up(&send_wq);
98
}
99
100
static int do_lock_cancel(const struct dlm_plock_info *orig_info)
101
{
102
struct plock_op *op;
103
int rv;
104
105
op = kzalloc(sizeof(*op), GFP_NOFS);
106
if (!op)
107
return -ENOMEM;
108
109
op->info = *orig_info;
110
op->info.optype = DLM_PLOCK_OP_CANCEL;
111
op->info.wait = 0;
112
113
send_op(op);
114
wait_event(recv_wq, (op->done != 0));
115
116
rv = op->info.rv;
117
118
dlm_release_plock_op(op);
119
return rv;
120
}
121
122
int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
123
int cmd, struct file_lock *fl)
124
{
125
struct plock_async_data *op_data;
126
struct dlm_ls *ls;
127
struct plock_op *op;
128
int rv;
129
130
ls = dlm_find_lockspace_local(lockspace);
131
if (!ls)
132
return -EINVAL;
133
134
op = kzalloc(sizeof(*op), GFP_NOFS);
135
if (!op) {
136
rv = -ENOMEM;
137
goto out;
138
}
139
140
op->info.optype = DLM_PLOCK_OP_LOCK;
141
op->info.pid = fl->c.flc_pid;
142
op->info.ex = lock_is_write(fl);
143
op->info.wait = !!(fl->c.flc_flags & FL_SLEEP);
144
op->info.fsid = ls->ls_global_id;
145
op->info.number = number;
146
op->info.start = fl->fl_start;
147
op->info.end = fl->fl_end;
148
op->info.owner = (__u64)(long) fl->c.flc_owner;
149
/* async handling */
150
if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
151
op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
152
if (!op_data) {
153
dlm_release_plock_op(op);
154
rv = -ENOMEM;
155
goto out;
156
}
157
158
op_data->callback = fl->fl_lmops->lm_grant;
159
locks_init_lock(&op_data->flc);
160
locks_copy_lock(&op_data->flc, fl);
161
op_data->fl = fl;
162
op_data->file = file;
163
164
op->data = op_data;
165
166
send_op(op);
167
rv = FILE_LOCK_DEFERRED;
168
goto out;
169
}
170
171
send_op(op);
172
173
if (op->info.wait) {
174
rv = wait_event_interruptible(recv_wq, (op->done != 0));
175
if (rv == -ERESTARTSYS) {
176
spin_lock(&ops_lock);
177
/* recheck under ops_lock if we got a done != 0,
178
* if so this interrupt case should be ignored
179
*/
180
if (op->done != 0) {
181
spin_unlock(&ops_lock);
182
goto do_lock_wait;
183
}
184
spin_unlock(&ops_lock);
185
186
rv = do_lock_cancel(&op->info);
187
switch (rv) {
188
case 0:
189
/* waiter was deleted in user space, answer will never come
190
* remove original request. The original request must be
191
* on recv_list because the answer of do_lock_cancel()
192
* synchronized it.
193
*/
194
spin_lock(&ops_lock);
195
list_del(&op->list);
196
spin_unlock(&ops_lock);
197
rv = -EINTR;
198
break;
199
case -ENOENT:
200
/* cancellation wasn't successful but op should be done */
201
fallthrough;
202
default:
203
/* internal error doing cancel we need to wait */
204
goto wait;
205
}
206
207
log_debug(ls, "%s: wait interrupted %x %llx pid %d",
208
__func__, ls->ls_global_id,
209
(unsigned long long)number, op->info.pid);
210
dlm_release_plock_op(op);
211
goto out;
212
}
213
} else {
214
wait:
215
wait_event(recv_wq, (op->done != 0));
216
}
217
218
do_lock_wait:
219
220
WARN_ON(!list_empty(&op->list));
221
222
rv = op->info.rv;
223
224
if (!rv) {
225
if (locks_lock_file_wait(file, fl) < 0)
226
log_error(ls, "dlm_posix_lock: vfs lock error %llx",
227
(unsigned long long)number);
228
}
229
230
dlm_release_plock_op(op);
231
out:
232
dlm_put_lockspace(ls);
233
return rv;
234
}
235
EXPORT_SYMBOL_GPL(dlm_posix_lock);
236
237
/* Returns failure iff a successful lock operation should be canceled */
238
static int dlm_plock_callback(struct plock_op *op)
239
{
240
struct plock_async_data *op_data = op->data;
241
struct file *file;
242
struct file_lock *fl;
243
struct file_lock *flc;
244
int (*notify)(struct file_lock *fl, int result) = NULL;
245
int rv = 0;
246
247
WARN_ON(!list_empty(&op->list));
248
249
/* check if the following 2 are still valid or make a copy */
250
file = op_data->file;
251
flc = &op_data->flc;
252
fl = op_data->fl;
253
notify = op_data->callback;
254
255
if (op->info.rv) {
256
notify(fl, op->info.rv);
257
goto out;
258
}
259
260
/* got fs lock; bookkeep locally as well: */
261
flc->c.flc_flags &= ~FL_SLEEP;
262
if (posix_lock_file(file, flc, NULL)) {
263
/*
264
* This can only happen in the case of kmalloc() failure.
265
* The filesystem's own lock is the authoritative lock,
266
* so a failure to get the lock locally is not a disaster.
267
* As long as the fs cannot reliably cancel locks (especially
268
* in a low-memory situation), we're better off ignoring
269
* this failure than trying to recover.
270
*/
271
log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
272
(unsigned long long)op->info.number, file, fl);
273
}
274
275
rv = notify(fl, 0);
276
if (rv) {
277
/* XXX: We need to cancel the fs lock here: */
278
log_print("%s: lock granted after lock request failed; dangling lock!",
279
__func__);
280
goto out;
281
}
282
283
out:
284
dlm_release_plock_op(op);
285
return rv;
286
}
287
288
int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
289
struct file_lock *fl)
290
{
291
struct dlm_ls *ls;
292
struct plock_op *op;
293
int rv;
294
unsigned char saved_flags = fl->c.flc_flags;
295
296
ls = dlm_find_lockspace_local(lockspace);
297
if (!ls)
298
return -EINVAL;
299
300
op = kzalloc(sizeof(*op), GFP_NOFS);
301
if (!op) {
302
rv = -ENOMEM;
303
goto out;
304
}
305
306
/* cause the vfs unlock to return ENOENT if lock is not found */
307
fl->c.flc_flags |= FL_EXISTS;
308
309
rv = locks_lock_file_wait(file, fl);
310
if (rv == -ENOENT) {
311
rv = 0;
312
goto out_free;
313
}
314
if (rv < 0) {
315
log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx",
316
rv, (unsigned long long)number);
317
}
318
319
op->info.optype = DLM_PLOCK_OP_UNLOCK;
320
op->info.pid = fl->c.flc_pid;
321
op->info.fsid = ls->ls_global_id;
322
op->info.number = number;
323
op->info.start = fl->fl_start;
324
op->info.end = fl->fl_end;
325
op->info.owner = (__u64)(long) fl->c.flc_owner;
326
327
if (fl->c.flc_flags & FL_CLOSE) {
328
op->info.flags |= DLM_PLOCK_FL_CLOSE;
329
send_op(op);
330
rv = 0;
331
goto out;
332
}
333
334
send_op(op);
335
wait_event(recv_wq, (op->done != 0));
336
337
WARN_ON(!list_empty(&op->list));
338
339
rv = op->info.rv;
340
341
if (rv == -ENOENT)
342
rv = 0;
343
344
out_free:
345
dlm_release_plock_op(op);
346
out:
347
dlm_put_lockspace(ls);
348
fl->c.flc_flags = saved_flags;
349
return rv;
350
}
351
EXPORT_SYMBOL_GPL(dlm_posix_unlock);
352
353
/*
354
* NOTE: This implementation can only handle async lock requests as nfs
355
* do it. It cannot handle cancellation of a pending lock request sitting
356
* in wait_event(), but for now only nfs is the only user local kernel
357
* user.
358
*/
359
int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file,
360
struct file_lock *fl)
361
{
362
struct dlm_plock_info info;
363
struct plock_op *op;
364
struct dlm_ls *ls;
365
int rv;
366
367
/* this only works for async request for now and nfs is the only
368
* kernel user right now.
369
*/
370
if (WARN_ON_ONCE(!fl->fl_lmops || !fl->fl_lmops->lm_grant))
371
return -EOPNOTSUPP;
372
373
ls = dlm_find_lockspace_local(lockspace);
374
if (!ls)
375
return -EINVAL;
376
377
memset(&info, 0, sizeof(info));
378
info.pid = fl->c.flc_pid;
379
info.ex = lock_is_write(fl);
380
info.fsid = ls->ls_global_id;
381
dlm_put_lockspace(ls);
382
info.number = number;
383
info.start = fl->fl_start;
384
info.end = fl->fl_end;
385
info.owner = (__u64)(long) fl->c.flc_owner;
386
387
rv = do_lock_cancel(&info);
388
switch (rv) {
389
case 0:
390
spin_lock(&ops_lock);
391
/* lock request to cancel must be on recv_list because
392
* do_lock_cancel() synchronizes it.
393
*/
394
op = plock_lookup_waiter(&info);
395
if (WARN_ON_ONCE(!op)) {
396
spin_unlock(&ops_lock);
397
rv = -ENOLCK;
398
break;
399
}
400
401
list_del(&op->list);
402
spin_unlock(&ops_lock);
403
WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
404
op->data->callback(op->data->fl, -EINTR);
405
dlm_release_plock_op(op);
406
rv = -EINTR;
407
break;
408
case -ENOENT:
409
/* if cancel wasn't successful we probably were to late
410
* or it was a non-blocking lock request, so just unlock it.
411
*/
412
rv = dlm_posix_unlock(lockspace, number, file, fl);
413
break;
414
default:
415
break;
416
}
417
418
return rv;
419
}
420
EXPORT_SYMBOL_GPL(dlm_posix_cancel);
421
422
int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
423
struct file_lock *fl)
424
{
425
struct dlm_ls *ls;
426
struct plock_op *op;
427
int rv;
428
429
ls = dlm_find_lockspace_local(lockspace);
430
if (!ls)
431
return -EINVAL;
432
433
op = kzalloc(sizeof(*op), GFP_NOFS);
434
if (!op) {
435
rv = -ENOMEM;
436
goto out;
437
}
438
439
op->info.optype = DLM_PLOCK_OP_GET;
440
op->info.pid = fl->c.flc_pid;
441
op->info.ex = lock_is_write(fl);
442
op->info.fsid = ls->ls_global_id;
443
op->info.number = number;
444
op->info.start = fl->fl_start;
445
op->info.end = fl->fl_end;
446
op->info.owner = (__u64)(long) fl->c.flc_owner;
447
448
send_op(op);
449
wait_event(recv_wq, (op->done != 0));
450
451
WARN_ON(!list_empty(&op->list));
452
453
/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
454
-ENOENT if there are no locks on the file */
455
456
rv = op->info.rv;
457
458
fl->c.flc_type = F_UNLCK;
459
if (rv == -ENOENT)
460
rv = 0;
461
else if (rv > 0) {
462
locks_init_lock(fl);
463
fl->c.flc_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
464
fl->c.flc_flags = FL_POSIX;
465
fl->c.flc_pid = op->info.pid;
466
if (op->info.nodeid != dlm_our_nodeid())
467
fl->c.flc_pid = -fl->c.flc_pid;
468
fl->fl_start = op->info.start;
469
fl->fl_end = op->info.end;
470
rv = 0;
471
}
472
473
dlm_release_plock_op(op);
474
out:
475
dlm_put_lockspace(ls);
476
return rv;
477
}
478
EXPORT_SYMBOL_GPL(dlm_posix_get);
479
480
/* a read copies out one plock request from the send list */
481
static ssize_t dev_read(struct file *file, char __user *u, size_t count,
482
loff_t *ppos)
483
{
484
struct dlm_plock_info info;
485
struct plock_op *op = NULL;
486
487
if (count < sizeof(info))
488
return -EINVAL;
489
490
spin_lock(&ops_lock);
491
if (!list_empty(&send_list)) {
492
op = list_first_entry(&send_list, struct plock_op, list);
493
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
494
list_del(&op->list);
495
else
496
list_move_tail(&op->list, &recv_list);
497
memcpy(&info, &op->info, sizeof(info));
498
}
499
spin_unlock(&ops_lock);
500
501
if (!op)
502
return -EAGAIN;
503
504
trace_dlm_plock_read(&info);
505
506
/* there is no need to get a reply from userspace for unlocks
507
that were generated by the vfs cleaning up for a close
508
(the process did not make an unlock call). */
509
510
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
511
dlm_release_plock_op(op);
512
513
if (copy_to_user(u, &info, sizeof(info)))
514
return -EFAULT;
515
return sizeof(info);
516
}
517
518
/* a write copies in one plock result that should match a plock_op
519
on the recv list */
520
static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
521
loff_t *ppos)
522
{
523
struct plock_op *op = NULL, *iter;
524
struct dlm_plock_info info;
525
int do_callback = 0;
526
527
if (count != sizeof(info))
528
return -EINVAL;
529
530
if (copy_from_user(&info, u, sizeof(info)))
531
return -EFAULT;
532
533
trace_dlm_plock_write(&info);
534
535
if (check_version(&info))
536
return -EINVAL;
537
538
/*
539
* The results for waiting ops (SETLKW) can be returned in any
540
* order, so match all fields to find the op. The results for
541
* non-waiting ops are returned in the order that they were sent
542
* to userspace, so match the result with the first non-waiting op.
543
*/
544
spin_lock(&ops_lock);
545
if (info.wait) {
546
op = plock_lookup_waiter(&info);
547
} else {
548
list_for_each_entry(iter, &recv_list, list) {
549
if (!iter->info.wait &&
550
iter->info.fsid == info.fsid) {
551
op = iter;
552
break;
553
}
554
}
555
}
556
557
if (op) {
558
/* Sanity check that op and info match. */
559
if (info.wait)
560
WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
561
else
562
WARN_ON(op->info.number != info.number ||
563
op->info.owner != info.owner ||
564
op->info.optype != info.optype);
565
566
list_del_init(&op->list);
567
memcpy(&op->info, &info, sizeof(info));
568
if (op->data)
569
do_callback = 1;
570
else
571
op->done = 1;
572
}
573
spin_unlock(&ops_lock);
574
575
if (op) {
576
if (do_callback)
577
dlm_plock_callback(op);
578
else
579
wake_up(&recv_wq);
580
} else
581
pr_debug("%s: no op %x %llx", __func__,
582
info.fsid, (unsigned long long)info.number);
583
return count;
584
}
585
586
static __poll_t dev_poll(struct file *file, poll_table *wait)
587
{
588
__poll_t mask = 0;
589
590
poll_wait(file, &send_wq, wait);
591
592
spin_lock(&ops_lock);
593
if (!list_empty(&send_list))
594
mask = EPOLLIN | EPOLLRDNORM;
595
spin_unlock(&ops_lock);
596
597
return mask;
598
}
599
600
static const struct file_operations dev_fops = {
601
.read = dev_read,
602
.write = dev_write,
603
.poll = dev_poll,
604
.owner = THIS_MODULE,
605
.llseek = noop_llseek,
606
};
607
608
static struct miscdevice plock_dev_misc = {
609
.minor = MISC_DYNAMIC_MINOR,
610
.name = DLM_PLOCK_MISC_NAME,
611
.fops = &dev_fops
612
};
613
614
int dlm_plock_init(void)
615
{
616
int rv;
617
618
rv = misc_register(&plock_dev_misc);
619
if (rv)
620
log_print("dlm_plock_init: misc_register failed %d", rv);
621
return rv;
622
}
623
624
void dlm_plock_exit(void)
625
{
626
misc_deregister(&plock_dev_misc);
627
WARN_ON(!list_empty(&send_list));
628
WARN_ON(!list_empty(&recv_list));
629
}
630
631
632