Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/mm/list_lru.c
26131 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4
* Authors: David Chinner and Glauber Costa
5
*
6
* Generic LRU infrastructure
7
*/
8
#include <linux/kernel.h>
9
#include <linux/module.h>
10
#include <linux/mm.h>
11
#include <linux/list_lru.h>
12
#include <linux/slab.h>
13
#include <linux/mutex.h>
14
#include <linux/memcontrol.h>
15
#include "slab.h"
16
#include "internal.h"
17
18
#ifdef CONFIG_MEMCG
19
static LIST_HEAD(memcg_list_lrus);
20
static DEFINE_MUTEX(list_lrus_mutex);
21
22
static inline bool list_lru_memcg_aware(struct list_lru *lru)
23
{
24
return lru->memcg_aware;
25
}
26
27
static void list_lru_register(struct list_lru *lru)
28
{
29
if (!list_lru_memcg_aware(lru))
30
return;
31
32
mutex_lock(&list_lrus_mutex);
33
list_add(&lru->list, &memcg_list_lrus);
34
mutex_unlock(&list_lrus_mutex);
35
}
36
37
static void list_lru_unregister(struct list_lru *lru)
38
{
39
if (!list_lru_memcg_aware(lru))
40
return;
41
42
mutex_lock(&list_lrus_mutex);
43
list_del(&lru->list);
44
mutex_unlock(&list_lrus_mutex);
45
}
46
47
static int lru_shrinker_id(struct list_lru *lru)
48
{
49
return lru->shrinker_id;
50
}
51
52
static inline struct list_lru_one *
53
list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
54
{
55
if (list_lru_memcg_aware(lru) && idx >= 0) {
56
struct list_lru_memcg *mlru = xa_load(&lru->xa, idx);
57
58
return mlru ? &mlru->node[nid] : NULL;
59
}
60
return &lru->node[nid].lru;
61
}
62
63
static inline bool lock_list_lru(struct list_lru_one *l, bool irq)
64
{
65
if (irq)
66
spin_lock_irq(&l->lock);
67
else
68
spin_lock(&l->lock);
69
if (unlikely(READ_ONCE(l->nr_items) == LONG_MIN)) {
70
if (irq)
71
spin_unlock_irq(&l->lock);
72
else
73
spin_unlock(&l->lock);
74
return false;
75
}
76
return true;
77
}
78
79
static inline struct list_lru_one *
80
lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
81
bool irq, bool skip_empty)
82
{
83
struct list_lru_one *l;
84
85
rcu_read_lock();
86
again:
87
l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
88
if (likely(l) && lock_list_lru(l, irq)) {
89
rcu_read_unlock();
90
return l;
91
}
92
/*
93
* Caller may simply bail out if raced with reparenting or
94
* may iterate through the list_lru and expect empty slots.
95
*/
96
if (skip_empty) {
97
rcu_read_unlock();
98
return NULL;
99
}
100
VM_WARN_ON(!css_is_dying(&memcg->css));
101
memcg = parent_mem_cgroup(memcg);
102
goto again;
103
}
104
105
static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
106
{
107
if (irq_off)
108
spin_unlock_irq(&l->lock);
109
else
110
spin_unlock(&l->lock);
111
}
112
#else
113
static void list_lru_register(struct list_lru *lru)
114
{
115
}
116
117
static void list_lru_unregister(struct list_lru *lru)
118
{
119
}
120
121
static int lru_shrinker_id(struct list_lru *lru)
122
{
123
return -1;
124
}
125
126
static inline bool list_lru_memcg_aware(struct list_lru *lru)
127
{
128
return false;
129
}
130
131
static inline struct list_lru_one *
132
list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
133
{
134
return &lru->node[nid].lru;
135
}
136
137
static inline struct list_lru_one *
138
lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
139
bool irq, bool skip_empty)
140
{
141
struct list_lru_one *l = &lru->node[nid].lru;
142
143
if (irq)
144
spin_lock_irq(&l->lock);
145
else
146
spin_lock(&l->lock);
147
148
return l;
149
}
150
151
static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
152
{
153
if (irq_off)
154
spin_unlock_irq(&l->lock);
155
else
156
spin_unlock(&l->lock);
157
}
158
#endif /* CONFIG_MEMCG */
159
160
/* The caller must ensure the memcg lifetime. */
161
bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
162
struct mem_cgroup *memcg)
163
{
164
struct list_lru_node *nlru = &lru->node[nid];
165
struct list_lru_one *l;
166
167
l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
168
if (!l)
169
return false;
170
if (list_empty(item)) {
171
list_add_tail(item, &l->list);
172
/* Set shrinker bit if the first element was added */
173
if (!l->nr_items++)
174
set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
175
unlock_list_lru(l, false);
176
atomic_long_inc(&nlru->nr_items);
177
return true;
178
}
179
unlock_list_lru(l, false);
180
return false;
181
}
182
183
bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
184
{
185
bool ret;
186
int nid = page_to_nid(virt_to_page(item));
187
188
if (list_lru_memcg_aware(lru)) {
189
rcu_read_lock();
190
ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item));
191
rcu_read_unlock();
192
} else {
193
ret = list_lru_add(lru, item, nid, NULL);
194
}
195
196
return ret;
197
}
198
EXPORT_SYMBOL_GPL(list_lru_add_obj);
199
200
/* The caller must ensure the memcg lifetime. */
201
bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
202
struct mem_cgroup *memcg)
203
{
204
struct list_lru_node *nlru = &lru->node[nid];
205
struct list_lru_one *l;
206
l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
207
if (!l)
208
return false;
209
if (!list_empty(item)) {
210
list_del_init(item);
211
l->nr_items--;
212
unlock_list_lru(l, false);
213
atomic_long_dec(&nlru->nr_items);
214
return true;
215
}
216
unlock_list_lru(l, false);
217
return false;
218
}
219
220
bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
221
{
222
bool ret;
223
int nid = page_to_nid(virt_to_page(item));
224
225
if (list_lru_memcg_aware(lru)) {
226
rcu_read_lock();
227
ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item));
228
rcu_read_unlock();
229
} else {
230
ret = list_lru_del(lru, item, nid, NULL);
231
}
232
233
return ret;
234
}
235
EXPORT_SYMBOL_GPL(list_lru_del_obj);
236
237
void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
238
{
239
list_del_init(item);
240
list->nr_items--;
241
}
242
EXPORT_SYMBOL_GPL(list_lru_isolate);
243
244
void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
245
struct list_head *head)
246
{
247
list_move(item, head);
248
list->nr_items--;
249
}
250
EXPORT_SYMBOL_GPL(list_lru_isolate_move);
251
252
unsigned long list_lru_count_one(struct list_lru *lru,
253
int nid, struct mem_cgroup *memcg)
254
{
255
struct list_lru_one *l;
256
long count;
257
258
rcu_read_lock();
259
l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
260
count = l ? READ_ONCE(l->nr_items) : 0;
261
rcu_read_unlock();
262
263
if (unlikely(count < 0))
264
count = 0;
265
266
return count;
267
}
268
EXPORT_SYMBOL_GPL(list_lru_count_one);
269
270
unsigned long list_lru_count_node(struct list_lru *lru, int nid)
271
{
272
struct list_lru_node *nlru;
273
274
nlru = &lru->node[nid];
275
return atomic_long_read(&nlru->nr_items);
276
}
277
EXPORT_SYMBOL_GPL(list_lru_count_node);
278
279
static unsigned long
280
__list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
281
list_lru_walk_cb isolate, void *cb_arg,
282
unsigned long *nr_to_walk, bool irq_off)
283
{
284
struct list_lru_node *nlru = &lru->node[nid];
285
struct list_lru_one *l = NULL;
286
struct list_head *item, *n;
287
unsigned long isolated = 0;
288
289
restart:
290
l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true);
291
if (!l)
292
return isolated;
293
list_for_each_safe(item, n, &l->list) {
294
enum lru_status ret;
295
296
/*
297
* decrement nr_to_walk first so that we don't livelock if we
298
* get stuck on large numbers of LRU_RETRY items
299
*/
300
if (!*nr_to_walk)
301
break;
302
--*nr_to_walk;
303
304
ret = isolate(item, l, cb_arg);
305
switch (ret) {
306
/*
307
* LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru
308
* lock. List traversal will have to restart from scratch.
309
*/
310
case LRU_RETRY:
311
goto restart;
312
case LRU_REMOVED_RETRY:
313
fallthrough;
314
case LRU_REMOVED:
315
isolated++;
316
atomic_long_dec(&nlru->nr_items);
317
if (ret == LRU_REMOVED_RETRY)
318
goto restart;
319
break;
320
case LRU_ROTATE:
321
list_move_tail(item, &l->list);
322
break;
323
case LRU_SKIP:
324
break;
325
case LRU_STOP:
326
goto out;
327
default:
328
BUG();
329
}
330
}
331
unlock_list_lru(l, irq_off);
332
out:
333
return isolated;
334
}
335
336
unsigned long
337
list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
338
list_lru_walk_cb isolate, void *cb_arg,
339
unsigned long *nr_to_walk)
340
{
341
return __list_lru_walk_one(lru, nid, memcg, isolate,
342
cb_arg, nr_to_walk, false);
343
}
344
EXPORT_SYMBOL_GPL(list_lru_walk_one);
345
346
unsigned long
347
list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
348
list_lru_walk_cb isolate, void *cb_arg,
349
unsigned long *nr_to_walk)
350
{
351
return __list_lru_walk_one(lru, nid, memcg, isolate,
352
cb_arg, nr_to_walk, true);
353
}
354
355
unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
356
list_lru_walk_cb isolate, void *cb_arg,
357
unsigned long *nr_to_walk)
358
{
359
long isolated = 0;
360
361
isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
362
nr_to_walk);
363
364
#ifdef CONFIG_MEMCG
365
if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
366
struct list_lru_memcg *mlru;
367
struct mem_cgroup *memcg;
368
unsigned long index;
369
370
xa_for_each(&lru->xa, index, mlru) {
371
rcu_read_lock();
372
memcg = mem_cgroup_from_id(index);
373
if (!mem_cgroup_tryget(memcg)) {
374
rcu_read_unlock();
375
continue;
376
}
377
rcu_read_unlock();
378
isolated += __list_lru_walk_one(lru, nid, memcg,
379
isolate, cb_arg,
380
nr_to_walk, false);
381
mem_cgroup_put(memcg);
382
383
if (*nr_to_walk <= 0)
384
break;
385
}
386
}
387
#endif
388
389
return isolated;
390
}
391
EXPORT_SYMBOL_GPL(list_lru_walk_node);
392
393
static void init_one_lru(struct list_lru *lru, struct list_lru_one *l)
394
{
395
INIT_LIST_HEAD(&l->list);
396
spin_lock_init(&l->lock);
397
l->nr_items = 0;
398
#ifdef CONFIG_LOCKDEP
399
if (lru->key)
400
lockdep_set_class(&l->lock, lru->key);
401
#endif
402
}
403
404
#ifdef CONFIG_MEMCG
405
static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp)
406
{
407
int nid;
408
struct list_lru_memcg *mlru;
409
410
mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
411
if (!mlru)
412
return NULL;
413
414
for_each_node(nid)
415
init_one_lru(lru, &mlru->node[nid]);
416
417
return mlru;
418
}
419
420
static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
421
{
422
if (memcg_aware)
423
xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ);
424
lru->memcg_aware = memcg_aware;
425
}
426
427
static void memcg_destroy_list_lru(struct list_lru *lru)
428
{
429
XA_STATE(xas, &lru->xa, 0);
430
struct list_lru_memcg *mlru;
431
432
if (!list_lru_memcg_aware(lru))
433
return;
434
435
xas_lock_irq(&xas);
436
xas_for_each(&xas, mlru, ULONG_MAX) {
437
kfree(mlru);
438
xas_store(&xas, NULL);
439
}
440
xas_unlock_irq(&xas);
441
}
442
443
static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid,
444
struct list_lru_one *src,
445
struct mem_cgroup *dst_memcg)
446
{
447
int dst_idx = dst_memcg->kmemcg_id;
448
struct list_lru_one *dst;
449
450
spin_lock_irq(&src->lock);
451
dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
452
spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING);
453
454
list_splice_init(&src->list, &dst->list);
455
if (src->nr_items) {
456
WARN_ON(src->nr_items < 0);
457
dst->nr_items += src->nr_items;
458
set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
459
}
460
/* Mark the list_lru_one dead */
461
src->nr_items = LONG_MIN;
462
463
spin_unlock(&dst->lock);
464
spin_unlock_irq(&src->lock);
465
}
466
467
void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
468
{
469
struct list_lru *lru;
470
int i;
471
472
mutex_lock(&list_lrus_mutex);
473
list_for_each_entry(lru, &memcg_list_lrus, list) {
474
struct list_lru_memcg *mlru;
475
XA_STATE(xas, &lru->xa, memcg->kmemcg_id);
476
477
/*
478
* Lock the Xarray to ensure no on going list_lru_memcg
479
* allocation and further allocation will see css_is_dying().
480
*/
481
xas_lock_irq(&xas);
482
mlru = xas_store(&xas, NULL);
483
xas_unlock_irq(&xas);
484
if (!mlru)
485
continue;
486
487
/*
488
* With Xarray value set to NULL, holding the lru lock below
489
* prevents list_lru_{add,del,isolate} from touching the lru,
490
* safe to reparent.
491
*/
492
for_each_node(i)
493
memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent);
494
495
/*
496
* Here all list_lrus corresponding to the cgroup are guaranteed
497
* to remain empty, we can safely free this lru, any further
498
* memcg_list_lru_alloc() call will simply bail out.
499
*/
500
kvfree_rcu(mlru, rcu);
501
}
502
mutex_unlock(&list_lrus_mutex);
503
}
504
505
static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
506
struct list_lru *lru)
507
{
508
int idx = memcg->kmemcg_id;
509
510
return idx < 0 || xa_load(&lru->xa, idx);
511
}
512
513
int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
514
gfp_t gfp)
515
{
516
unsigned long flags;
517
struct list_lru_memcg *mlru = NULL;
518
struct mem_cgroup *pos, *parent;
519
XA_STATE(xas, &lru->xa, 0);
520
521
if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
522
return 0;
523
524
gfp &= GFP_RECLAIM_MASK;
525
/*
526
* Because the list_lru can be reparented to the parent cgroup's
527
* list_lru, we should make sure that this cgroup and all its
528
* ancestors have allocated list_lru_memcg.
529
*/
530
do {
531
/*
532
* Keep finding the farest parent that wasn't populated
533
* until found memcg itself.
534
*/
535
pos = memcg;
536
parent = parent_mem_cgroup(pos);
537
while (!memcg_list_lru_allocated(parent, lru)) {
538
pos = parent;
539
parent = parent_mem_cgroup(pos);
540
}
541
542
if (!mlru) {
543
mlru = memcg_init_list_lru_one(lru, gfp);
544
if (!mlru)
545
return -ENOMEM;
546
}
547
xas_set(&xas, pos->kmemcg_id);
548
do {
549
xas_lock_irqsave(&xas, flags);
550
if (!xas_load(&xas) && !css_is_dying(&pos->css)) {
551
xas_store(&xas, mlru);
552
if (!xas_error(&xas))
553
mlru = NULL;
554
}
555
xas_unlock_irqrestore(&xas, flags);
556
} while (xas_nomem(&xas, gfp));
557
} while (pos != memcg && !css_is_dying(&pos->css));
558
559
if (unlikely(mlru))
560
kfree(mlru);
561
562
return xas_error(&xas);
563
}
564
#else
565
static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
566
{
567
}
568
569
static void memcg_destroy_list_lru(struct list_lru *lru)
570
{
571
}
572
#endif /* CONFIG_MEMCG */
573
574
int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker)
575
{
576
int i;
577
578
#ifdef CONFIG_MEMCG
579
if (shrinker)
580
lru->shrinker_id = shrinker->id;
581
else
582
lru->shrinker_id = -1;
583
584
if (mem_cgroup_kmem_disabled())
585
memcg_aware = false;
586
#endif
587
588
lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
589
if (!lru->node)
590
return -ENOMEM;
591
592
for_each_node(i)
593
init_one_lru(lru, &lru->node[i].lru);
594
595
memcg_init_list_lru(lru, memcg_aware);
596
list_lru_register(lru);
597
598
return 0;
599
}
600
EXPORT_SYMBOL_GPL(__list_lru_init);
601
602
void list_lru_destroy(struct list_lru *lru)
603
{
604
/* Already destroyed or not yet initialized? */
605
if (!lru->node)
606
return;
607
608
list_lru_unregister(lru);
609
610
memcg_destroy_list_lru(lru);
611
kfree(lru->node);
612
lru->node = NULL;
613
614
#ifdef CONFIG_MEMCG
615
lru->shrinker_id = -1;
616
#endif
617
}
618
EXPORT_SYMBOL_GPL(list_lru_destroy);
619
620