Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/dax/kmem.c
26378 views
1
// SPDX-License-Identifier: GPL-2.0
2
/* Copyright(c) 2016-2019 Intel Corporation. All rights reserved. */
3
#include <linux/memremap.h>
4
#include <linux/pagemap.h>
5
#include <linux/memory.h>
6
#include <linux/module.h>
7
#include <linux/device.h>
8
#include <linux/slab.h>
9
#include <linux/dax.h>
10
#include <linux/fs.h>
11
#include <linux/mm.h>
12
#include <linux/mman.h>
13
#include <linux/memory-tiers.h>
14
#include <linux/memory_hotplug.h>
15
#include <linux/string_helpers.h>
16
#include "dax-private.h"
17
#include "bus.h"
18
19
/*
20
* Default abstract distance assigned to the NUMA node onlined
21
* by DAX/kmem if the low level platform driver didn't initialize
22
* one for this NUMA node.
23
*/
24
#define MEMTIER_DEFAULT_DAX_ADISTANCE (MEMTIER_ADISTANCE_DRAM * 5)
25
26
/* Memory resource name used for add_memory_driver_managed(). */
27
static const char *kmem_name;
28
/* Set if any memory will remain added when the driver will be unloaded. */
29
static bool any_hotremove_failed;
30
31
static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
32
{
33
struct dev_dax_range *dax_range = &dev_dax->ranges[i];
34
struct range *range = &dax_range->range;
35
36
/* memory-block align the hotplug range */
37
r->start = ALIGN(range->start, memory_block_size_bytes());
38
r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1;
39
if (r->start >= r->end) {
40
r->start = range->start;
41
r->end = range->end;
42
return -ENOSPC;
43
}
44
return 0;
45
}
46
47
struct dax_kmem_data {
48
const char *res_name;
49
int mgid;
50
struct resource *res[];
51
};
52
53
static DEFINE_MUTEX(kmem_memory_type_lock);
54
static LIST_HEAD(kmem_memory_types);
55
56
static struct memory_dev_type *kmem_find_alloc_memory_type(int adist)
57
{
58
guard(mutex)(&kmem_memory_type_lock);
59
return mt_find_alloc_memory_type(adist, &kmem_memory_types);
60
}
61
62
static void kmem_put_memory_types(void)
63
{
64
guard(mutex)(&kmem_memory_type_lock);
65
mt_put_memory_types(&kmem_memory_types);
66
}
67
68
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
69
{
70
struct device *dev = &dev_dax->dev;
71
unsigned long total_len = 0, orig_len = 0;
72
struct dax_kmem_data *data;
73
struct memory_dev_type *mtype;
74
int i, rc, mapped = 0;
75
mhp_t mhp_flags;
76
int numa_node;
77
int adist = MEMTIER_DEFAULT_DAX_ADISTANCE;
78
79
/*
80
* Ensure good NUMA information for the persistent memory.
81
* Without this check, there is a risk that slow memory
82
* could be mixed in a node with faster memory, causing
83
* unavoidable performance issues.
84
*/
85
numa_node = dev_dax->target_node;
86
if (numa_node < 0) {
87
dev_warn(dev, "rejecting DAX region with invalid node: %d\n",
88
numa_node);
89
return -EINVAL;
90
}
91
92
mt_calc_adistance(numa_node, &adist);
93
mtype = kmem_find_alloc_memory_type(adist);
94
if (IS_ERR(mtype))
95
return PTR_ERR(mtype);
96
97
for (i = 0; i < dev_dax->nr_range; i++) {
98
struct range range;
99
100
orig_len += range_len(&dev_dax->ranges[i].range);
101
rc = dax_kmem_range(dev_dax, i, &range);
102
if (rc) {
103
dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
104
i, range.start, range.end);
105
continue;
106
}
107
total_len += range_len(&range);
108
}
109
110
if (!total_len) {
111
dev_warn(dev, "rejecting DAX region without any memory after alignment\n");
112
return -EINVAL;
113
} else if (total_len != orig_len) {
114
char buf[16];
115
116
string_get_size(orig_len - total_len, 1, STRING_UNITS_2,
117
buf, sizeof(buf));
118
dev_warn(dev, "DAX region truncated by %s due to alignment\n", buf);
119
}
120
121
init_node_memory_type(numa_node, mtype);
122
123
rc = -ENOMEM;
124
data = kzalloc(struct_size(data, res, dev_dax->nr_range), GFP_KERNEL);
125
if (!data)
126
goto err_dax_kmem_data;
127
128
data->res_name = kstrdup(dev_name(dev), GFP_KERNEL);
129
if (!data->res_name)
130
goto err_res_name;
131
132
rc = memory_group_register_static(numa_node, PFN_UP(total_len));
133
if (rc < 0)
134
goto err_reg_mgid;
135
data->mgid = rc;
136
137
for (i = 0; i < dev_dax->nr_range; i++) {
138
struct resource *res;
139
struct range range;
140
141
rc = dax_kmem_range(dev_dax, i, &range);
142
if (rc)
143
continue;
144
145
/* Region is permanently reserved if hotremove fails. */
146
res = request_mem_region(range.start, range_len(&range), data->res_name);
147
if (!res) {
148
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
149
i, range.start, range.end);
150
/*
151
* Once some memory has been onlined we can't
152
* assume that it can be un-onlined safely.
153
*/
154
if (mapped)
155
continue;
156
rc = -EBUSY;
157
goto err_request_mem;
158
}
159
data->res[i] = res;
160
161
/*
162
* Set flags appropriate for System RAM. Leave ..._BUSY clear
163
* so that add_memory() can add a child resource. Do not
164
* inherit flags from the parent since it may set new flags
165
* unknown to us that will break add_memory() below.
166
*/
167
res->flags = IORESOURCE_SYSTEM_RAM;
168
169
mhp_flags = MHP_NID_IS_MGID;
170
if (dev_dax->memmap_on_memory)
171
mhp_flags |= MHP_MEMMAP_ON_MEMORY;
172
173
/*
174
* Ensure that future kexec'd kernels will not treat
175
* this as RAM automatically.
176
*/
177
rc = add_memory_driver_managed(data->mgid, range.start,
178
range_len(&range), kmem_name, mhp_flags);
179
180
if (rc) {
181
dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
182
i, range.start, range.end);
183
remove_resource(res);
184
kfree(res);
185
data->res[i] = NULL;
186
if (mapped)
187
continue;
188
goto err_request_mem;
189
}
190
mapped++;
191
}
192
193
dev_set_drvdata(dev, data);
194
195
return 0;
196
197
err_request_mem:
198
memory_group_unregister(data->mgid);
199
err_reg_mgid:
200
kfree(data->res_name);
201
err_res_name:
202
kfree(data);
203
err_dax_kmem_data:
204
clear_node_memory_type(numa_node, mtype);
205
return rc;
206
}
207
208
#ifdef CONFIG_MEMORY_HOTREMOVE
209
static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
210
{
211
int i, success = 0;
212
int node = dev_dax->target_node;
213
struct device *dev = &dev_dax->dev;
214
struct dax_kmem_data *data = dev_get_drvdata(dev);
215
216
/*
217
* We have one shot for removing memory, if some memory blocks were not
218
* offline prior to calling this function remove_memory() will fail, and
219
* there is no way to hotremove this memory until reboot because device
220
* unbind will succeed even if we return failure.
221
*/
222
for (i = 0; i < dev_dax->nr_range; i++) {
223
struct range range;
224
int rc;
225
226
rc = dax_kmem_range(dev_dax, i, &range);
227
if (rc)
228
continue;
229
230
rc = remove_memory(range.start, range_len(&range));
231
if (rc == 0) {
232
remove_resource(data->res[i]);
233
kfree(data->res[i]);
234
data->res[i] = NULL;
235
success++;
236
continue;
237
}
238
any_hotremove_failed = true;
239
dev_err(dev,
240
"mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
241
i, range.start, range.end);
242
}
243
244
if (success >= dev_dax->nr_range) {
245
memory_group_unregister(data->mgid);
246
kfree(data->res_name);
247
kfree(data);
248
dev_set_drvdata(dev, NULL);
249
/*
250
* Clear the memtype association on successful unplug.
251
* If not, we have memory blocks left which can be
252
* offlined/onlined later. We need to keep memory_dev_type
253
* for that. This implies this reference will be around
254
* till next reboot.
255
*/
256
clear_node_memory_type(node, NULL);
257
}
258
}
259
#else
260
static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
261
{
262
/*
263
* Without hotremove purposely leak the request_mem_region() for the
264
* device-dax range and return '0' to ->remove() attempts. The removal
265
* of the device from the driver always succeeds, but the region is
266
* permanently pinned as reserved by the unreleased
267
* request_mem_region().
268
*/
269
any_hotremove_failed = true;
270
}
271
#endif /* CONFIG_MEMORY_HOTREMOVE */
272
273
static struct dax_device_driver device_dax_kmem_driver = {
274
.probe = dev_dax_kmem_probe,
275
.remove = dev_dax_kmem_remove,
276
.type = DAXDRV_KMEM_TYPE,
277
};
278
279
static int __init dax_kmem_init(void)
280
{
281
int rc;
282
283
/* Resource name is permanently allocated if any hotremove fails. */
284
kmem_name = kstrdup_const("System RAM (kmem)", GFP_KERNEL);
285
if (!kmem_name)
286
return -ENOMEM;
287
288
rc = dax_driver_register(&device_dax_kmem_driver);
289
if (rc)
290
goto error_dax_driver;
291
292
return rc;
293
294
error_dax_driver:
295
kmem_put_memory_types();
296
kfree_const(kmem_name);
297
return rc;
298
}
299
300
static void __exit dax_kmem_exit(void)
301
{
302
dax_driver_unregister(&device_dax_kmem_driver);
303
if (!any_hotremove_failed)
304
kfree_const(kmem_name);
305
kmem_put_memory_types();
306
}
307
308
MODULE_AUTHOR("Intel Corporation");
309
MODULE_DESCRIPTION("KMEM DAX: map dax-devices as System-RAM");
310
MODULE_LICENSE("GPL v2");
311
module_init(dax_kmem_init);
312
module_exit(dax_kmem_exit);
313
MODULE_ALIAS_DAX_DEVICE(0);
314
315