Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/dax/device.c
26378 views
1
// SPDX-License-Identifier: GPL-2.0
2
/* Copyright(c) 2016-2018 Intel Corporation. All rights reserved. */
3
#include <linux/memremap.h>
4
#include <linux/pagemap.h>
5
#include <linux/module.h>
6
#include <linux/device.h>
7
#include <linux/cdev.h>
8
#include <linux/slab.h>
9
#include <linux/dax.h>
10
#include <linux/fs.h>
11
#include <linux/mm.h>
12
#include <linux/mman.h>
13
#include "dax-private.h"
14
#include "bus.h"
15
16
static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
17
const char *func)
18
{
19
struct device *dev = &dev_dax->dev;
20
unsigned long mask;
21
22
if (!dax_alive(dev_dax->dax_dev))
23
return -ENXIO;
24
25
/* prevent private mappings from being established */
26
if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
27
dev_info_ratelimited(dev,
28
"%s: %s: fail, attempted private mapping\n",
29
current->comm, func);
30
return -EINVAL;
31
}
32
33
mask = dev_dax->align - 1;
34
if (vma->vm_start & mask || vma->vm_end & mask) {
35
dev_info_ratelimited(dev,
36
"%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
37
current->comm, func, vma->vm_start, vma->vm_end,
38
mask);
39
return -EINVAL;
40
}
41
42
if (!vma_is_dax(vma)) {
43
dev_info_ratelimited(dev,
44
"%s: %s: fail, vma is not DAX capable\n",
45
current->comm, func);
46
return -EINVAL;
47
}
48
49
return 0;
50
}
51
52
/* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */
53
__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
54
unsigned long size)
55
{
56
int i;
57
58
for (i = 0; i < dev_dax->nr_range; i++) {
59
struct dev_dax_range *dax_range = &dev_dax->ranges[i];
60
struct range *range = &dax_range->range;
61
unsigned long long pgoff_end;
62
phys_addr_t phys;
63
64
pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
65
if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
66
continue;
67
phys = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
68
if (phys + size - 1 <= range->end)
69
return phys;
70
break;
71
}
72
return -1;
73
}
74
75
static void dax_set_mapping(struct vm_fault *vmf, unsigned long pfn,
76
unsigned long fault_size)
77
{
78
unsigned long i, nr_pages = fault_size / PAGE_SIZE;
79
struct file *filp = vmf->vma->vm_file;
80
struct dev_dax *dev_dax = filp->private_data;
81
pgoff_t pgoff;
82
83
/* mapping is only set on the head */
84
if (dev_dax->pgmap->vmemmap_shift)
85
nr_pages = 1;
86
87
pgoff = linear_page_index(vmf->vma,
88
ALIGN_DOWN(vmf->address, fault_size));
89
90
for (i = 0; i < nr_pages; i++) {
91
struct folio *folio = pfn_folio(pfn + i);
92
93
if (folio->mapping)
94
continue;
95
96
folio->mapping = filp->f_mapping;
97
folio->index = pgoff + i;
98
}
99
}
100
101
static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
102
struct vm_fault *vmf)
103
{
104
struct device *dev = &dev_dax->dev;
105
phys_addr_t phys;
106
unsigned long pfn;
107
unsigned int fault_size = PAGE_SIZE;
108
109
if (check_vma(dev_dax, vmf->vma, __func__))
110
return VM_FAULT_SIGBUS;
111
112
if (dev_dax->align > PAGE_SIZE) {
113
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
114
dev_dax->align, fault_size);
115
return VM_FAULT_SIGBUS;
116
}
117
118
if (fault_size != dev_dax->align)
119
return VM_FAULT_SIGBUS;
120
121
phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
122
if (phys == -1) {
123
dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff);
124
return VM_FAULT_SIGBUS;
125
}
126
127
pfn = PHYS_PFN(phys);
128
129
dax_set_mapping(vmf, pfn, fault_size);
130
131
return vmf_insert_page_mkwrite(vmf, pfn_to_page(pfn),
132
vmf->flags & FAULT_FLAG_WRITE);
133
}
134
135
static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
136
struct vm_fault *vmf)
137
{
138
unsigned long pmd_addr = vmf->address & PMD_MASK;
139
struct device *dev = &dev_dax->dev;
140
phys_addr_t phys;
141
pgoff_t pgoff;
142
unsigned long pfn;
143
unsigned int fault_size = PMD_SIZE;
144
145
if (check_vma(dev_dax, vmf->vma, __func__))
146
return VM_FAULT_SIGBUS;
147
148
if (dev_dax->align > PMD_SIZE) {
149
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
150
dev_dax->align, fault_size);
151
return VM_FAULT_SIGBUS;
152
}
153
154
if (fault_size < dev_dax->align)
155
return VM_FAULT_SIGBUS;
156
else if (fault_size > dev_dax->align)
157
return VM_FAULT_FALLBACK;
158
159
/* if we are outside of the VMA */
160
if (pmd_addr < vmf->vma->vm_start ||
161
(pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
162
return VM_FAULT_SIGBUS;
163
164
pgoff = linear_page_index(vmf->vma, pmd_addr);
165
phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE);
166
if (phys == -1) {
167
dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
168
return VM_FAULT_SIGBUS;
169
}
170
171
pfn = PHYS_PFN(phys);
172
173
dax_set_mapping(vmf, pfn, fault_size);
174
175
return vmf_insert_folio_pmd(vmf, page_folio(pfn_to_page(pfn)),
176
vmf->flags & FAULT_FLAG_WRITE);
177
}
178
179
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
180
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
181
struct vm_fault *vmf)
182
{
183
unsigned long pud_addr = vmf->address & PUD_MASK;
184
struct device *dev = &dev_dax->dev;
185
phys_addr_t phys;
186
pgoff_t pgoff;
187
unsigned long pfn;
188
unsigned int fault_size = PUD_SIZE;
189
190
191
if (check_vma(dev_dax, vmf->vma, __func__))
192
return VM_FAULT_SIGBUS;
193
194
if (dev_dax->align > PUD_SIZE) {
195
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
196
dev_dax->align, fault_size);
197
return VM_FAULT_SIGBUS;
198
}
199
200
if (fault_size < dev_dax->align)
201
return VM_FAULT_SIGBUS;
202
else if (fault_size > dev_dax->align)
203
return VM_FAULT_FALLBACK;
204
205
/* if we are outside of the VMA */
206
if (pud_addr < vmf->vma->vm_start ||
207
(pud_addr + PUD_SIZE) > vmf->vma->vm_end)
208
return VM_FAULT_SIGBUS;
209
210
pgoff = linear_page_index(vmf->vma, pud_addr);
211
phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE);
212
if (phys == -1) {
213
dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
214
return VM_FAULT_SIGBUS;
215
}
216
217
pfn = PHYS_PFN(phys);
218
219
dax_set_mapping(vmf, pfn, fault_size);
220
221
return vmf_insert_folio_pud(vmf, page_folio(pfn_to_page(pfn)),
222
vmf->flags & FAULT_FLAG_WRITE);
223
}
224
#else
225
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
226
struct vm_fault *vmf)
227
{
228
return VM_FAULT_FALLBACK;
229
}
230
#endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
231
232
static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
233
{
234
struct file *filp = vmf->vma->vm_file;
235
vm_fault_t rc = VM_FAULT_SIGBUS;
236
int id;
237
struct dev_dax *dev_dax = filp->private_data;
238
239
dev_dbg(&dev_dax->dev, "%s: op=%s addr=%#lx order=%d\n", current->comm,
240
(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
241
vmf->address & ~((1UL << (order + PAGE_SHIFT)) - 1), order);
242
243
id = dax_read_lock();
244
if (order == 0)
245
rc = __dev_dax_pte_fault(dev_dax, vmf);
246
else if (order == PMD_ORDER)
247
rc = __dev_dax_pmd_fault(dev_dax, vmf);
248
else if (order == PUD_ORDER)
249
rc = __dev_dax_pud_fault(dev_dax, vmf);
250
else
251
rc = VM_FAULT_SIGBUS;
252
253
dax_read_unlock(id);
254
255
return rc;
256
}
257
258
static vm_fault_t dev_dax_fault(struct vm_fault *vmf)
259
{
260
return dev_dax_huge_fault(vmf, 0);
261
}
262
263
static int dev_dax_may_split(struct vm_area_struct *vma, unsigned long addr)
264
{
265
struct file *filp = vma->vm_file;
266
struct dev_dax *dev_dax = filp->private_data;
267
268
if (!IS_ALIGNED(addr, dev_dax->align))
269
return -EINVAL;
270
return 0;
271
}
272
273
static unsigned long dev_dax_pagesize(struct vm_area_struct *vma)
274
{
275
struct file *filp = vma->vm_file;
276
struct dev_dax *dev_dax = filp->private_data;
277
278
return dev_dax->align;
279
}
280
281
static const struct vm_operations_struct dax_vm_ops = {
282
.fault = dev_dax_fault,
283
.huge_fault = dev_dax_huge_fault,
284
.may_split = dev_dax_may_split,
285
.pagesize = dev_dax_pagesize,
286
};
287
288
static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
289
{
290
struct dev_dax *dev_dax = filp->private_data;
291
int rc, id;
292
293
dev_dbg(&dev_dax->dev, "trace\n");
294
295
/*
296
* We lock to check dax_dev liveness and will re-check at
297
* fault time.
298
*/
299
id = dax_read_lock();
300
rc = check_vma(dev_dax, vma, __func__);
301
dax_read_unlock(id);
302
if (rc)
303
return rc;
304
305
vma->vm_ops = &dax_vm_ops;
306
vm_flags_set(vma, VM_HUGEPAGE);
307
return 0;
308
}
309
310
/* return an unmapped area aligned to the dax region specified alignment */
311
static unsigned long dax_get_unmapped_area(struct file *filp,
312
unsigned long addr, unsigned long len, unsigned long pgoff,
313
unsigned long flags)
314
{
315
unsigned long off, off_end, off_align, len_align, addr_align, align;
316
struct dev_dax *dev_dax = filp ? filp->private_data : NULL;
317
318
if (!dev_dax || addr)
319
goto out;
320
321
align = dev_dax->align;
322
off = pgoff << PAGE_SHIFT;
323
off_end = off + len;
324
off_align = round_up(off, align);
325
326
if ((off_end <= off_align) || ((off_end - off_align) < align))
327
goto out;
328
329
len_align = len + align;
330
if ((off + len_align) < off)
331
goto out;
332
333
addr_align = mm_get_unmapped_area(current->mm, filp, addr, len_align,
334
pgoff, flags);
335
if (!IS_ERR_VALUE(addr_align)) {
336
addr_align += (off - addr_align) & (align - 1);
337
return addr_align;
338
}
339
out:
340
return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags);
341
}
342
343
static const struct address_space_operations dev_dax_aops = {
344
.dirty_folio = noop_dirty_folio,
345
};
346
347
static int dax_open(struct inode *inode, struct file *filp)
348
{
349
struct dax_device *dax_dev = inode_dax(inode);
350
struct inode *__dax_inode = dax_inode(dax_dev);
351
struct dev_dax *dev_dax = dax_get_private(dax_dev);
352
353
dev_dbg(&dev_dax->dev, "trace\n");
354
inode->i_mapping = __dax_inode->i_mapping;
355
inode->i_mapping->host = __dax_inode;
356
inode->i_mapping->a_ops = &dev_dax_aops;
357
filp->f_mapping = inode->i_mapping;
358
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
359
filp->f_sb_err = file_sample_sb_err(filp);
360
filp->private_data = dev_dax;
361
inode->i_flags = S_DAX;
362
363
return 0;
364
}
365
366
static int dax_release(struct inode *inode, struct file *filp)
367
{
368
struct dev_dax *dev_dax = filp->private_data;
369
370
dev_dbg(&dev_dax->dev, "trace\n");
371
return 0;
372
}
373
374
static const struct file_operations dax_fops = {
375
.llseek = noop_llseek,
376
.owner = THIS_MODULE,
377
.open = dax_open,
378
.release = dax_release,
379
.get_unmapped_area = dax_get_unmapped_area,
380
.mmap = dax_mmap,
381
.fop_flags = FOP_MMAP_SYNC,
382
};
383
384
static void dev_dax_cdev_del(void *cdev)
385
{
386
cdev_del(cdev);
387
}
388
389
static void dev_dax_kill(void *dev_dax)
390
{
391
kill_dev_dax(dev_dax);
392
}
393
394
static int dev_dax_probe(struct dev_dax *dev_dax)
395
{
396
struct dax_device *dax_dev = dev_dax->dax_dev;
397
struct device *dev = &dev_dax->dev;
398
struct dev_pagemap *pgmap;
399
struct inode *inode;
400
struct cdev *cdev;
401
void *addr;
402
int rc, i;
403
404
if (static_dev_dax(dev_dax)) {
405
if (dev_dax->nr_range > 1) {
406
dev_warn(dev,
407
"static pgmap / multi-range device conflict\n");
408
return -EINVAL;
409
}
410
411
pgmap = dev_dax->pgmap;
412
} else {
413
if (dev_dax->pgmap) {
414
dev_warn(dev,
415
"dynamic-dax with pre-populated page map\n");
416
return -EINVAL;
417
}
418
419
pgmap = devm_kzalloc(dev,
420
struct_size(pgmap, ranges, dev_dax->nr_range - 1),
421
GFP_KERNEL);
422
if (!pgmap)
423
return -ENOMEM;
424
425
pgmap->nr_range = dev_dax->nr_range;
426
dev_dax->pgmap = pgmap;
427
428
for (i = 0; i < dev_dax->nr_range; i++) {
429
struct range *range = &dev_dax->ranges[i].range;
430
pgmap->ranges[i] = *range;
431
}
432
}
433
434
for (i = 0; i < dev_dax->nr_range; i++) {
435
struct range *range = &dev_dax->ranges[i].range;
436
437
if (!devm_request_mem_region(dev, range->start,
438
range_len(range), dev_name(dev))) {
439
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n",
440
i, range->start, range->end);
441
return -EBUSY;
442
}
443
}
444
445
pgmap->type = MEMORY_DEVICE_GENERIC;
446
if (dev_dax->align > PAGE_SIZE)
447
pgmap->vmemmap_shift =
448
order_base_2(dev_dax->align >> PAGE_SHIFT);
449
addr = devm_memremap_pages(dev, pgmap);
450
if (IS_ERR(addr))
451
return PTR_ERR(addr);
452
453
inode = dax_inode(dax_dev);
454
cdev = inode->i_cdev;
455
cdev_init(cdev, &dax_fops);
456
cdev->owner = dev->driver->owner;
457
cdev_set_parent(cdev, &dev->kobj);
458
rc = cdev_add(cdev, dev->devt, 1);
459
if (rc)
460
return rc;
461
462
rc = devm_add_action_or_reset(dev, dev_dax_cdev_del, cdev);
463
if (rc)
464
return rc;
465
466
run_dax(dax_dev);
467
return devm_add_action_or_reset(dev, dev_dax_kill, dev_dax);
468
}
469
470
static struct dax_device_driver device_dax_driver = {
471
.probe = dev_dax_probe,
472
.type = DAXDRV_DEVICE_TYPE,
473
};
474
475
static int __init dax_init(void)
476
{
477
return dax_driver_register(&device_dax_driver);
478
}
479
480
static void __exit dax_exit(void)
481
{
482
dax_driver_unregister(&device_dax_driver);
483
}
484
485
MODULE_AUTHOR("Intel Corporation");
486
MODULE_DESCRIPTION("Device DAX: direct access device driver");
487
MODULE_LICENSE("GPL v2");
488
module_init(dax_init);
489
module_exit(dax_exit);
490
MODULE_ALIAS_DAX_DEVICE(0);
491
492