Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/fs/afs/write.c
15109 views
1
/* handling of writes to regular files and writing back to the server
2
*
3
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4
* Written by David Howells ([email protected])
5
*
6
* This program is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU General Public License
8
* as published by the Free Software Foundation; either version
9
* 2 of the License, or (at your option) any later version.
10
*/
11
#include <linux/backing-dev.h>
12
#include <linux/slab.h>
13
#include <linux/fs.h>
14
#include <linux/pagemap.h>
15
#include <linux/writeback.h>
16
#include <linux/pagevec.h>
17
#include "internal.h"
18
19
static int afs_write_back_from_locked_page(struct afs_writeback *wb,
20
struct page *page);
21
22
/*
23
* mark a page as having been made dirty and thus needing writeback
24
*/
25
int afs_set_page_dirty(struct page *page)
26
{
27
_enter("");
28
return __set_page_dirty_nobuffers(page);
29
}
30
31
/*
32
* unlink a writeback record because its usage has reached zero
33
* - must be called with the wb->vnode->writeback_lock held
34
*/
35
static void afs_unlink_writeback(struct afs_writeback *wb)
36
{
37
struct afs_writeback *front;
38
struct afs_vnode *vnode = wb->vnode;
39
40
list_del_init(&wb->link);
41
if (!list_empty(&vnode->writebacks)) {
42
/* if an fsync rises to the front of the queue then wake it
43
* up */
44
front = list_entry(vnode->writebacks.next,
45
struct afs_writeback, link);
46
if (front->state == AFS_WBACK_SYNCING) {
47
_debug("wake up sync");
48
front->state = AFS_WBACK_COMPLETE;
49
wake_up(&front->waitq);
50
}
51
}
52
}
53
54
/*
55
* free a writeback record
56
*/
57
static void afs_free_writeback(struct afs_writeback *wb)
58
{
59
_enter("");
60
key_put(wb->key);
61
kfree(wb);
62
}
63
64
/*
65
* dispose of a reference to a writeback record
66
*/
67
void afs_put_writeback(struct afs_writeback *wb)
68
{
69
struct afs_vnode *vnode = wb->vnode;
70
71
_enter("{%d}", wb->usage);
72
73
spin_lock(&vnode->writeback_lock);
74
if (--wb->usage == 0)
75
afs_unlink_writeback(wb);
76
else
77
wb = NULL;
78
spin_unlock(&vnode->writeback_lock);
79
if (wb)
80
afs_free_writeback(wb);
81
}
82
83
/*
84
* partly or wholly fill a page that's under preparation for writing
85
*/
86
static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
87
loff_t pos, struct page *page)
88
{
89
loff_t i_size;
90
int ret;
91
int len;
92
93
_enter(",,%llu", (unsigned long long)pos);
94
95
i_size = i_size_read(&vnode->vfs_inode);
96
if (pos + PAGE_CACHE_SIZE > i_size)
97
len = i_size - pos;
98
else
99
len = PAGE_CACHE_SIZE;
100
101
ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
102
if (ret < 0) {
103
if (ret == -ENOENT) {
104
_debug("got NOENT from server"
105
" - marking file deleted and stale");
106
set_bit(AFS_VNODE_DELETED, &vnode->flags);
107
ret = -ESTALE;
108
}
109
}
110
111
_leave(" = %d", ret);
112
return ret;
113
}
114
115
/*
116
* prepare to perform part of a write to a page
117
*/
118
int afs_write_begin(struct file *file, struct address_space *mapping,
119
loff_t pos, unsigned len, unsigned flags,
120
struct page **pagep, void **fsdata)
121
{
122
struct afs_writeback *candidate, *wb;
123
struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
124
struct page *page;
125
struct key *key = file->private_data;
126
unsigned from = pos & (PAGE_CACHE_SIZE - 1);
127
unsigned to = from + len;
128
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
129
int ret;
130
131
_enter("{%x:%u},{%lx},%u,%u",
132
vnode->fid.vid, vnode->fid.vnode, index, from, to);
133
134
candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
135
if (!candidate)
136
return -ENOMEM;
137
candidate->vnode = vnode;
138
candidate->first = candidate->last = index;
139
candidate->offset_first = from;
140
candidate->to_last = to;
141
INIT_LIST_HEAD(&candidate->link);
142
candidate->usage = 1;
143
candidate->state = AFS_WBACK_PENDING;
144
init_waitqueue_head(&candidate->waitq);
145
146
page = grab_cache_page_write_begin(mapping, index, flags);
147
if (!page) {
148
kfree(candidate);
149
return -ENOMEM;
150
}
151
*pagep = page;
152
/* page won't leak in error case: it eventually gets cleaned off LRU */
153
154
if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
155
ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
156
if (ret < 0) {
157
kfree(candidate);
158
_leave(" = %d [prep]", ret);
159
return ret;
160
}
161
SetPageUptodate(page);
162
}
163
164
try_again:
165
spin_lock(&vnode->writeback_lock);
166
167
/* see if this page is already pending a writeback under a suitable key
168
* - if so we can just join onto that one */
169
wb = (struct afs_writeback *) page_private(page);
170
if (wb) {
171
if (wb->key == key && wb->state == AFS_WBACK_PENDING)
172
goto subsume_in_current_wb;
173
goto flush_conflicting_wb;
174
}
175
176
if (index > 0) {
177
/* see if we can find an already pending writeback that we can
178
* append this page to */
179
list_for_each_entry(wb, &vnode->writebacks, link) {
180
if (wb->last == index - 1 && wb->key == key &&
181
wb->state == AFS_WBACK_PENDING)
182
goto append_to_previous_wb;
183
}
184
}
185
186
list_add_tail(&candidate->link, &vnode->writebacks);
187
candidate->key = key_get(key);
188
spin_unlock(&vnode->writeback_lock);
189
SetPagePrivate(page);
190
set_page_private(page, (unsigned long) candidate);
191
_leave(" = 0 [new]");
192
return 0;
193
194
subsume_in_current_wb:
195
_debug("subsume");
196
ASSERTRANGE(wb->first, <=, index, <=, wb->last);
197
if (index == wb->first && from < wb->offset_first)
198
wb->offset_first = from;
199
if (index == wb->last && to > wb->to_last)
200
wb->to_last = to;
201
spin_unlock(&vnode->writeback_lock);
202
kfree(candidate);
203
_leave(" = 0 [sub]");
204
return 0;
205
206
append_to_previous_wb:
207
_debug("append into %lx-%lx", wb->first, wb->last);
208
wb->usage++;
209
wb->last++;
210
wb->to_last = to;
211
spin_unlock(&vnode->writeback_lock);
212
SetPagePrivate(page);
213
set_page_private(page, (unsigned long) wb);
214
kfree(candidate);
215
_leave(" = 0 [app]");
216
return 0;
217
218
/* the page is currently bound to another context, so if it's dirty we
219
* need to flush it before we can use the new context */
220
flush_conflicting_wb:
221
_debug("flush conflict");
222
if (wb->state == AFS_WBACK_PENDING)
223
wb->state = AFS_WBACK_CONFLICTING;
224
spin_unlock(&vnode->writeback_lock);
225
if (PageDirty(page)) {
226
ret = afs_write_back_from_locked_page(wb, page);
227
if (ret < 0) {
228
afs_put_writeback(candidate);
229
_leave(" = %d", ret);
230
return ret;
231
}
232
}
233
234
/* the page holds a ref on the writeback record */
235
afs_put_writeback(wb);
236
set_page_private(page, 0);
237
ClearPagePrivate(page);
238
goto try_again;
239
}
240
241
/*
242
* finalise part of a write to a page
243
*/
244
int afs_write_end(struct file *file, struct address_space *mapping,
245
loff_t pos, unsigned len, unsigned copied,
246
struct page *page, void *fsdata)
247
{
248
struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
249
loff_t i_size, maybe_i_size;
250
251
_enter("{%x:%u},{%lx}",
252
vnode->fid.vid, vnode->fid.vnode, page->index);
253
254
maybe_i_size = pos + copied;
255
256
i_size = i_size_read(&vnode->vfs_inode);
257
if (maybe_i_size > i_size) {
258
spin_lock(&vnode->writeback_lock);
259
i_size = i_size_read(&vnode->vfs_inode);
260
if (maybe_i_size > i_size)
261
i_size_write(&vnode->vfs_inode, maybe_i_size);
262
spin_unlock(&vnode->writeback_lock);
263
}
264
265
set_page_dirty(page);
266
if (PageDirty(page))
267
_debug("dirtied");
268
unlock_page(page);
269
page_cache_release(page);
270
271
return copied;
272
}
273
274
/*
275
* kill all the pages in the given range
276
*/
277
static void afs_kill_pages(struct afs_vnode *vnode, bool error,
278
pgoff_t first, pgoff_t last)
279
{
280
struct pagevec pv;
281
unsigned count, loop;
282
283
_enter("{%x:%u},%lx-%lx",
284
vnode->fid.vid, vnode->fid.vnode, first, last);
285
286
pagevec_init(&pv, 0);
287
288
do {
289
_debug("kill %lx-%lx", first, last);
290
291
count = last - first + 1;
292
if (count > PAGEVEC_SIZE)
293
count = PAGEVEC_SIZE;
294
pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
295
first, count, pv.pages);
296
ASSERTCMP(pv.nr, ==, count);
297
298
for (loop = 0; loop < count; loop++) {
299
ClearPageUptodate(pv.pages[loop]);
300
if (error)
301
SetPageError(pv.pages[loop]);
302
end_page_writeback(pv.pages[loop]);
303
}
304
305
__pagevec_release(&pv);
306
} while (first < last);
307
308
_leave("");
309
}
310
311
/*
312
* synchronously write back the locked page and any subsequent non-locked dirty
313
* pages also covered by the same writeback record
314
*/
315
static int afs_write_back_from_locked_page(struct afs_writeback *wb,
316
struct page *primary_page)
317
{
318
struct page *pages[8], *page;
319
unsigned long count;
320
unsigned n, offset, to;
321
pgoff_t start, first, last;
322
int loop, ret;
323
324
_enter(",%lx", primary_page->index);
325
326
count = 1;
327
if (!clear_page_dirty_for_io(primary_page))
328
BUG();
329
if (test_set_page_writeback(primary_page))
330
BUG();
331
332
/* find all consecutive lockable dirty pages, stopping when we find a
333
* page that is not immediately lockable, is not dirty or is missing,
334
* or we reach the end of the range */
335
start = primary_page->index;
336
if (start >= wb->last)
337
goto no_more;
338
start++;
339
do {
340
_debug("more %lx [%lx]", start, count);
341
n = wb->last - start + 1;
342
if (n > ARRAY_SIZE(pages))
343
n = ARRAY_SIZE(pages);
344
n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
345
start, n, pages);
346
_debug("fgpc %u", n);
347
if (n == 0)
348
goto no_more;
349
if (pages[0]->index != start) {
350
do {
351
put_page(pages[--n]);
352
} while (n > 0);
353
goto no_more;
354
}
355
356
for (loop = 0; loop < n; loop++) {
357
page = pages[loop];
358
if (page->index > wb->last)
359
break;
360
if (!trylock_page(page))
361
break;
362
if (!PageDirty(page) ||
363
page_private(page) != (unsigned long) wb) {
364
unlock_page(page);
365
break;
366
}
367
if (!clear_page_dirty_for_io(page))
368
BUG();
369
if (test_set_page_writeback(page))
370
BUG();
371
unlock_page(page);
372
put_page(page);
373
}
374
count += loop;
375
if (loop < n) {
376
for (; loop < n; loop++)
377
put_page(pages[loop]);
378
goto no_more;
379
}
380
381
start += loop;
382
} while (start <= wb->last && count < 65536);
383
384
no_more:
385
/* we now have a contiguous set of dirty pages, each with writeback set
386
* and the dirty mark cleared; the first page is locked and must remain
387
* so, all the rest are unlocked */
388
first = primary_page->index;
389
last = first + count - 1;
390
391
offset = (first == wb->first) ? wb->offset_first : 0;
392
to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
393
394
_debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
395
396
ret = afs_vnode_store_data(wb, first, last, offset, to);
397
if (ret < 0) {
398
switch (ret) {
399
case -EDQUOT:
400
case -ENOSPC:
401
set_bit(AS_ENOSPC,
402
&wb->vnode->vfs_inode.i_mapping->flags);
403
break;
404
case -EROFS:
405
case -EIO:
406
case -EREMOTEIO:
407
case -EFBIG:
408
case -ENOENT:
409
case -ENOMEDIUM:
410
case -ENXIO:
411
afs_kill_pages(wb->vnode, true, first, last);
412
set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags);
413
break;
414
case -EACCES:
415
case -EPERM:
416
case -ENOKEY:
417
case -EKEYEXPIRED:
418
case -EKEYREJECTED:
419
case -EKEYREVOKED:
420
afs_kill_pages(wb->vnode, false, first, last);
421
break;
422
default:
423
break;
424
}
425
} else {
426
ret = count;
427
}
428
429
_leave(" = %d", ret);
430
return ret;
431
}
432
433
/*
434
* write a page back to the server
435
* - the caller locked the page for us
436
*/
437
int afs_writepage(struct page *page, struct writeback_control *wbc)
438
{
439
struct afs_writeback *wb;
440
int ret;
441
442
_enter("{%lx},", page->index);
443
444
wb = (struct afs_writeback *) page_private(page);
445
ASSERT(wb != NULL);
446
447
ret = afs_write_back_from_locked_page(wb, page);
448
unlock_page(page);
449
if (ret < 0) {
450
_leave(" = %d", ret);
451
return 0;
452
}
453
454
wbc->nr_to_write -= ret;
455
456
_leave(" = 0");
457
return 0;
458
}
459
460
/*
461
* write a region of pages back to the server
462
*/
463
static int afs_writepages_region(struct address_space *mapping,
464
struct writeback_control *wbc,
465
pgoff_t index, pgoff_t end, pgoff_t *_next)
466
{
467
struct afs_writeback *wb;
468
struct page *page;
469
int ret, n;
470
471
_enter(",,%lx,%lx,", index, end);
472
473
do {
474
n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
475
1, &page);
476
if (!n)
477
break;
478
479
_debug("wback %lx", page->index);
480
481
if (page->index > end) {
482
*_next = index;
483
page_cache_release(page);
484
_leave(" = 0 [%lx]", *_next);
485
return 0;
486
}
487
488
/* at this point we hold neither mapping->tree_lock nor lock on
489
* the page itself: the page may be truncated or invalidated
490
* (changing page->mapping to NULL), or even swizzled back from
491
* swapper_space to tmpfs file mapping
492
*/
493
lock_page(page);
494
495
if (page->mapping != mapping) {
496
unlock_page(page);
497
page_cache_release(page);
498
continue;
499
}
500
501
if (wbc->sync_mode != WB_SYNC_NONE)
502
wait_on_page_writeback(page);
503
504
if (PageWriteback(page) || !PageDirty(page)) {
505
unlock_page(page);
506
continue;
507
}
508
509
wb = (struct afs_writeback *) page_private(page);
510
ASSERT(wb != NULL);
511
512
spin_lock(&wb->vnode->writeback_lock);
513
wb->state = AFS_WBACK_WRITING;
514
spin_unlock(&wb->vnode->writeback_lock);
515
516
ret = afs_write_back_from_locked_page(wb, page);
517
unlock_page(page);
518
page_cache_release(page);
519
if (ret < 0) {
520
_leave(" = %d", ret);
521
return ret;
522
}
523
524
wbc->nr_to_write -= ret;
525
526
cond_resched();
527
} while (index < end && wbc->nr_to_write > 0);
528
529
*_next = index;
530
_leave(" = 0 [%lx]", *_next);
531
return 0;
532
}
533
534
/*
535
* write some of the pending data back to the server
536
*/
537
int afs_writepages(struct address_space *mapping,
538
struct writeback_control *wbc)
539
{
540
pgoff_t start, end, next;
541
int ret;
542
543
_enter("");
544
545
if (wbc->range_cyclic) {
546
start = mapping->writeback_index;
547
end = -1;
548
ret = afs_writepages_region(mapping, wbc, start, end, &next);
549
if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
550
ret = afs_writepages_region(mapping, wbc, 0, start,
551
&next);
552
mapping->writeback_index = next;
553
} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
554
end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
555
ret = afs_writepages_region(mapping, wbc, 0, end, &next);
556
if (wbc->nr_to_write > 0)
557
mapping->writeback_index = next;
558
} else {
559
start = wbc->range_start >> PAGE_CACHE_SHIFT;
560
end = wbc->range_end >> PAGE_CACHE_SHIFT;
561
ret = afs_writepages_region(mapping, wbc, start, end, &next);
562
}
563
564
_leave(" = %d", ret);
565
return ret;
566
}
567
568
/*
569
* completion of write to server
570
*/
571
void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
572
{
573
struct afs_writeback *wb = call->wb;
574
struct pagevec pv;
575
unsigned count, loop;
576
pgoff_t first = call->first, last = call->last;
577
bool free_wb;
578
579
_enter("{%x:%u},{%lx-%lx}",
580
vnode->fid.vid, vnode->fid.vnode, first, last);
581
582
ASSERT(wb != NULL);
583
584
pagevec_init(&pv, 0);
585
586
do {
587
_debug("done %lx-%lx", first, last);
588
589
count = last - first + 1;
590
if (count > PAGEVEC_SIZE)
591
count = PAGEVEC_SIZE;
592
pv.nr = find_get_pages_contig(call->mapping, first, count,
593
pv.pages);
594
ASSERTCMP(pv.nr, ==, count);
595
596
spin_lock(&vnode->writeback_lock);
597
for (loop = 0; loop < count; loop++) {
598
struct page *page = pv.pages[loop];
599
end_page_writeback(page);
600
if (page_private(page) == (unsigned long) wb) {
601
set_page_private(page, 0);
602
ClearPagePrivate(page);
603
wb->usage--;
604
}
605
}
606
free_wb = false;
607
if (wb->usage == 0) {
608
afs_unlink_writeback(wb);
609
free_wb = true;
610
}
611
spin_unlock(&vnode->writeback_lock);
612
first += count;
613
if (free_wb) {
614
afs_free_writeback(wb);
615
wb = NULL;
616
}
617
618
__pagevec_release(&pv);
619
} while (first <= last);
620
621
_leave("");
622
}
623
624
/*
625
* write to an AFS file
626
*/
627
ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
628
unsigned long nr_segs, loff_t pos)
629
{
630
struct dentry *dentry = iocb->ki_filp->f_path.dentry;
631
struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
632
ssize_t result;
633
size_t count = iov_length(iov, nr_segs);
634
635
_enter("{%x.%u},{%zu},%lu,",
636
vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
637
638
if (IS_SWAPFILE(&vnode->vfs_inode)) {
639
printk(KERN_INFO
640
"AFS: Attempt to write to active swap file!\n");
641
return -EBUSY;
642
}
643
644
if (!count)
645
return 0;
646
647
result = generic_file_aio_write(iocb, iov, nr_segs, pos);
648
if (IS_ERR_VALUE(result)) {
649
_leave(" = %zd", result);
650
return result;
651
}
652
653
_leave(" = %zd", result);
654
return result;
655
}
656
657
/*
658
* flush the vnode to the fileserver
659
*/
660
int afs_writeback_all(struct afs_vnode *vnode)
661
{
662
struct address_space *mapping = vnode->vfs_inode.i_mapping;
663
struct writeback_control wbc = {
664
.sync_mode = WB_SYNC_ALL,
665
.nr_to_write = LONG_MAX,
666
.range_cyclic = 1,
667
};
668
int ret;
669
670
_enter("");
671
672
ret = mapping->a_ops->writepages(mapping, &wbc);
673
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
674
675
_leave(" = %d", ret);
676
return ret;
677
}
678
679
/*
680
* flush any dirty pages for this process, and check for write errors.
681
* - the return status from this call provides a reliable indication of
682
* whether any write errors occurred for this process.
683
*/
684
int afs_fsync(struct file *file, int datasync)
685
{
686
struct dentry *dentry = file->f_path.dentry;
687
struct afs_writeback *wb, *xwb;
688
struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
689
int ret;
690
691
_enter("{%x:%u},{n=%s},%d",
692
vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
693
datasync);
694
695
/* use a writeback record as a marker in the queue - when this reaches
696
* the front of the queue, all the outstanding writes are either
697
* completed or rejected */
698
wb = kzalloc(sizeof(*wb), GFP_KERNEL);
699
if (!wb)
700
return -ENOMEM;
701
wb->vnode = vnode;
702
wb->first = 0;
703
wb->last = -1;
704
wb->offset_first = 0;
705
wb->to_last = PAGE_SIZE;
706
wb->usage = 1;
707
wb->state = AFS_WBACK_SYNCING;
708
init_waitqueue_head(&wb->waitq);
709
710
spin_lock(&vnode->writeback_lock);
711
list_for_each_entry(xwb, &vnode->writebacks, link) {
712
if (xwb->state == AFS_WBACK_PENDING)
713
xwb->state = AFS_WBACK_CONFLICTING;
714
}
715
list_add_tail(&wb->link, &vnode->writebacks);
716
spin_unlock(&vnode->writeback_lock);
717
718
/* push all the outstanding writebacks to the server */
719
ret = afs_writeback_all(vnode);
720
if (ret < 0) {
721
afs_put_writeback(wb);
722
_leave(" = %d [wb]", ret);
723
return ret;
724
}
725
726
/* wait for the preceding writes to actually complete */
727
ret = wait_event_interruptible(wb->waitq,
728
wb->state == AFS_WBACK_COMPLETE ||
729
vnode->writebacks.next == &wb->link);
730
afs_put_writeback(wb);
731
_leave(" = %d", ret);
732
return ret;
733
}
734
735
/*
736
* notification that a previously read-only page is about to become writable
737
* - if it returns an error, the caller will deliver a bus error signal
738
*/
739
int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
740
{
741
struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host);
742
743
_enter("{{%x:%u}},{%lx}",
744
vnode->fid.vid, vnode->fid.vnode, page->index);
745
746
/* wait for the page to be written to the cache before we allow it to
747
* be modified */
748
#ifdef CONFIG_AFS_FSCACHE
749
fscache_wait_on_page_write(vnode->cache, page);
750
#endif
751
752
_leave(" = 0");
753
return 0;
754
}
755
756