Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/cgroup/dmem.c
49240 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Copyright 2023-2024 Intel Corporation (Maarten Lankhorst <[email protected]>)
4
* Copyright 2024 Red Hat (Maxime Ripard <[email protected]>)
5
* Partially based on the rdma and misc controllers, which bear the following copyrights:
6
*
7
* Copyright 2020 Google LLC
8
* Copyright (C) 2016 Parav Pandit <[email protected]>
9
*/
10
11
#include <linux/cgroup.h>
12
#include <linux/cgroup_dmem.h>
13
#include <linux/list.h>
14
#include <linux/mutex.h>
15
#include <linux/page_counter.h>
16
#include <linux/parser.h>
17
#include <linux/refcount.h>
18
#include <linux/rculist.h>
19
#include <linux/slab.h>
20
21
struct dmem_cgroup_region {
22
/**
23
* @ref: References keeping the region alive.
24
* Keeps the region reference alive after a succesful RCU lookup.
25
*/
26
struct kref ref;
27
28
/** @rcu: RCU head for freeing */
29
struct rcu_head rcu;
30
31
/**
32
* @region_node: Linked into &dmem_cgroup_regions list.
33
* Protected by RCU and global spinlock.
34
*/
35
struct list_head region_node;
36
37
/**
38
* @pools: List of pools linked to this region.
39
* Protected by global spinlock only
40
*/
41
struct list_head pools;
42
43
/** @size: Size of region, in bytes */
44
u64 size;
45
46
/** @name: Name describing the node, set by dmem_cgroup_register_region */
47
char *name;
48
49
/**
50
* @unregistered: Whether the region is unregistered by its caller.
51
* No new pools should be added to the region afterwards.
52
*/
53
bool unregistered;
54
};
55
56
struct dmemcg_state {
57
struct cgroup_subsys_state css;
58
59
struct list_head pools;
60
};
61
62
struct dmem_cgroup_pool_state {
63
struct dmem_cgroup_region *region;
64
struct dmemcg_state *cs;
65
66
/* css node, RCU protected against region teardown */
67
struct list_head css_node;
68
69
/* dev node, no RCU protection required */
70
struct list_head region_node;
71
72
struct rcu_head rcu;
73
74
struct page_counter cnt;
75
struct dmem_cgroup_pool_state *parent;
76
77
refcount_t ref;
78
bool inited;
79
};
80
81
/*
82
* 3 operations require locking protection:
83
* - Registering and unregistering region to/from list, requires global lock.
84
* - Adding a dmem_cgroup_pool_state to a CSS, removing when CSS is freed.
85
* - Adding a dmem_cgroup_pool_state to a region list.
86
*
87
* Since for the most common operations RCU provides enough protection, I
88
* do not think more granular locking makes sense. Most protection is offered
89
* by RCU and the lockless operating page_counter.
90
*/
91
static DEFINE_SPINLOCK(dmemcg_lock);
92
static LIST_HEAD(dmem_cgroup_regions);
93
94
static void dmemcg_free_region(struct kref *ref);
95
static void dmemcg_pool_free_rcu(struct rcu_head *rcu);
96
97
static inline struct dmemcg_state *
98
css_to_dmemcs(struct cgroup_subsys_state *css)
99
{
100
return container_of(css, struct dmemcg_state, css);
101
}
102
103
static inline struct dmemcg_state *get_current_dmemcs(void)
104
{
105
return css_to_dmemcs(task_get_css(current, dmem_cgrp_id));
106
}
107
108
static struct dmemcg_state *parent_dmemcs(struct dmemcg_state *cg)
109
{
110
return cg->css.parent ? css_to_dmemcs(cg->css.parent) : NULL;
111
}
112
113
static void dmemcg_pool_get(struct dmem_cgroup_pool_state *pool)
114
{
115
refcount_inc(&pool->ref);
116
}
117
118
static bool dmemcg_pool_tryget(struct dmem_cgroup_pool_state *pool)
119
{
120
return refcount_inc_not_zero(&pool->ref);
121
}
122
123
static void dmemcg_pool_put(struct dmem_cgroup_pool_state *pool)
124
{
125
if (!refcount_dec_and_test(&pool->ref))
126
return;
127
128
call_rcu(&pool->rcu, dmemcg_pool_free_rcu);
129
}
130
131
static void dmemcg_pool_free_rcu(struct rcu_head *rcu)
132
{
133
struct dmem_cgroup_pool_state *pool = container_of(rcu, typeof(*pool), rcu);
134
135
if (pool->parent)
136
dmemcg_pool_put(pool->parent);
137
kref_put(&pool->region->ref, dmemcg_free_region);
138
kfree(pool);
139
}
140
141
static void free_cg_pool(struct dmem_cgroup_pool_state *pool)
142
{
143
list_del(&pool->region_node);
144
dmemcg_pool_put(pool);
145
}
146
147
static void
148
set_resource_min(struct dmem_cgroup_pool_state *pool, u64 val)
149
{
150
page_counter_set_min(&pool->cnt, val);
151
}
152
153
static void
154
set_resource_low(struct dmem_cgroup_pool_state *pool, u64 val)
155
{
156
page_counter_set_low(&pool->cnt, val);
157
}
158
159
static void
160
set_resource_max(struct dmem_cgroup_pool_state *pool, u64 val)
161
{
162
page_counter_set_max(&pool->cnt, val);
163
}
164
165
static u64 get_resource_low(struct dmem_cgroup_pool_state *pool)
166
{
167
return pool ? READ_ONCE(pool->cnt.low) : 0;
168
}
169
170
static u64 get_resource_min(struct dmem_cgroup_pool_state *pool)
171
{
172
return pool ? READ_ONCE(pool->cnt.min) : 0;
173
}
174
175
static u64 get_resource_max(struct dmem_cgroup_pool_state *pool)
176
{
177
return pool ? READ_ONCE(pool->cnt.max) : PAGE_COUNTER_MAX;
178
}
179
180
static u64 get_resource_current(struct dmem_cgroup_pool_state *pool)
181
{
182
return pool ? page_counter_read(&pool->cnt) : 0;
183
}
184
185
static void reset_all_resource_limits(struct dmem_cgroup_pool_state *rpool)
186
{
187
set_resource_min(rpool, 0);
188
set_resource_low(rpool, 0);
189
set_resource_max(rpool, PAGE_COUNTER_MAX);
190
}
191
192
static void dmemcs_offline(struct cgroup_subsys_state *css)
193
{
194
struct dmemcg_state *dmemcs = css_to_dmemcs(css);
195
struct dmem_cgroup_pool_state *pool;
196
197
rcu_read_lock();
198
list_for_each_entry_rcu(pool, &dmemcs->pools, css_node)
199
reset_all_resource_limits(pool);
200
rcu_read_unlock();
201
}
202
203
static void dmemcs_free(struct cgroup_subsys_state *css)
204
{
205
struct dmemcg_state *dmemcs = css_to_dmemcs(css);
206
struct dmem_cgroup_pool_state *pool, *next;
207
208
spin_lock(&dmemcg_lock);
209
list_for_each_entry_safe(pool, next, &dmemcs->pools, css_node) {
210
/*
211
*The pool is dead and all references are 0,
212
* no need for RCU protection with list_del_rcu or freeing.
213
*/
214
list_del(&pool->css_node);
215
free_cg_pool(pool);
216
}
217
spin_unlock(&dmemcg_lock);
218
219
kfree(dmemcs);
220
}
221
222
static struct cgroup_subsys_state *
223
dmemcs_alloc(struct cgroup_subsys_state *parent_css)
224
{
225
struct dmemcg_state *dmemcs = kzalloc(sizeof(*dmemcs), GFP_KERNEL);
226
if (!dmemcs)
227
return ERR_PTR(-ENOMEM);
228
229
INIT_LIST_HEAD(&dmemcs->pools);
230
return &dmemcs->css;
231
}
232
233
static struct dmem_cgroup_pool_state *
234
find_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region)
235
{
236
struct dmem_cgroup_pool_state *pool;
237
238
list_for_each_entry_rcu(pool, &dmemcs->pools, css_node, spin_is_locked(&dmemcg_lock))
239
if (pool->region == region)
240
return pool;
241
242
return NULL;
243
}
244
245
static struct dmem_cgroup_pool_state *pool_parent(struct dmem_cgroup_pool_state *pool)
246
{
247
if (!pool->cnt.parent)
248
return NULL;
249
250
return container_of(pool->cnt.parent, typeof(*pool), cnt);
251
}
252
253
static void
254
dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state *limit_pool,
255
struct dmem_cgroup_pool_state *test_pool)
256
{
257
struct page_counter *climit;
258
struct cgroup_subsys_state *css;
259
struct dmemcg_state *dmemcg_iter;
260
struct dmem_cgroup_pool_state *pool, *found_pool;
261
262
climit = &limit_pool->cnt;
263
264
rcu_read_lock();
265
266
css_for_each_descendant_pre(css, &limit_pool->cs->css) {
267
dmemcg_iter = container_of(css, struct dmemcg_state, css);
268
found_pool = NULL;
269
270
list_for_each_entry_rcu(pool, &dmemcg_iter->pools, css_node) {
271
if (pool->region == limit_pool->region) {
272
found_pool = pool;
273
break;
274
}
275
}
276
if (!found_pool)
277
continue;
278
279
page_counter_calculate_protection(
280
climit, &found_pool->cnt, true);
281
282
if (found_pool == test_pool)
283
break;
284
}
285
rcu_read_unlock();
286
}
287
288
/**
289
* dmem_cgroup_state_evict_valuable() - Check if we should evict from test_pool
290
* @limit_pool: The pool for which we hit limits
291
* @test_pool: The pool for which to test
292
* @ignore_low: Whether we have to respect low watermarks.
293
* @ret_hit_low: Pointer to whether it makes sense to consider low watermark.
294
*
295
* This function returns true if we can evict from @test_pool, false if not.
296
* When returning false and @ignore_low is false, @ret_hit_low may
297
* be set to true to indicate this function can be retried with @ignore_low
298
* set to true.
299
*
300
* Return: bool
301
*/
302
bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool,
303
struct dmem_cgroup_pool_state *test_pool,
304
bool ignore_low, bool *ret_hit_low)
305
{
306
struct dmem_cgroup_pool_state *pool = test_pool;
307
struct page_counter *ctest;
308
u64 used, min, low;
309
310
/* Can always evict from current pool, despite limits */
311
if (limit_pool == test_pool)
312
return true;
313
314
if (limit_pool) {
315
if (!parent_dmemcs(limit_pool->cs))
316
return true;
317
318
for (pool = test_pool; pool && limit_pool != pool; pool = pool_parent(pool))
319
{}
320
321
if (!pool)
322
return false;
323
} else {
324
/*
325
* If there is no cgroup limiting memory usage, use the root
326
* cgroup instead for limit calculations.
327
*/
328
for (limit_pool = test_pool; pool_parent(limit_pool); limit_pool = pool_parent(limit_pool))
329
{}
330
}
331
332
ctest = &test_pool->cnt;
333
334
dmem_cgroup_calculate_protection(limit_pool, test_pool);
335
336
used = page_counter_read(ctest);
337
min = READ_ONCE(ctest->emin);
338
339
if (used <= min)
340
return false;
341
342
if (!ignore_low) {
343
low = READ_ONCE(ctest->elow);
344
if (used > low)
345
return true;
346
347
*ret_hit_low = true;
348
return false;
349
}
350
return true;
351
}
352
EXPORT_SYMBOL_GPL(dmem_cgroup_state_evict_valuable);
353
354
static struct dmem_cgroup_pool_state *
355
alloc_pool_single(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region,
356
struct dmem_cgroup_pool_state **allocpool)
357
{
358
struct dmemcg_state *parent = parent_dmemcs(dmemcs);
359
struct dmem_cgroup_pool_state *pool, *ppool = NULL;
360
361
if (!*allocpool) {
362
pool = kzalloc(sizeof(*pool), GFP_NOWAIT);
363
if (!pool)
364
return ERR_PTR(-ENOMEM);
365
} else {
366
pool = *allocpool;
367
*allocpool = NULL;
368
}
369
370
pool->region = region;
371
pool->cs = dmemcs;
372
373
if (parent)
374
ppool = find_cg_pool_locked(parent, region);
375
376
page_counter_init(&pool->cnt,
377
ppool ? &ppool->cnt : NULL, true);
378
reset_all_resource_limits(pool);
379
refcount_set(&pool->ref, 1);
380
kref_get(&region->ref);
381
if (ppool && !pool->parent) {
382
pool->parent = ppool;
383
dmemcg_pool_get(ppool);
384
}
385
386
list_add_tail_rcu(&pool->css_node, &dmemcs->pools);
387
list_add_tail(&pool->region_node, &region->pools);
388
389
if (!parent)
390
pool->inited = true;
391
else
392
pool->inited = ppool ? ppool->inited : false;
393
return pool;
394
}
395
396
static struct dmem_cgroup_pool_state *
397
get_cg_pool_locked(struct dmemcg_state *dmemcs, struct dmem_cgroup_region *region,
398
struct dmem_cgroup_pool_state **allocpool)
399
{
400
struct dmem_cgroup_pool_state *pool, *ppool, *retpool;
401
struct dmemcg_state *p, *pp;
402
403
/*
404
* Recursively create pool, we may not initialize yet on
405
* recursion, this is done as a separate step.
406
*/
407
for (p = dmemcs; p; p = parent_dmemcs(p)) {
408
pool = find_cg_pool_locked(p, region);
409
if (!pool)
410
pool = alloc_pool_single(p, region, allocpool);
411
412
if (IS_ERR(pool))
413
return pool;
414
415
if (p == dmemcs && pool->inited)
416
return pool;
417
418
if (pool->inited)
419
break;
420
}
421
422
retpool = pool = find_cg_pool_locked(dmemcs, region);
423
for (p = dmemcs, pp = parent_dmemcs(dmemcs); pp; p = pp, pp = parent_dmemcs(p)) {
424
if (pool->inited)
425
break;
426
427
/* ppool was created if it didn't exist by above loop. */
428
ppool = find_cg_pool_locked(pp, region);
429
430
/* Fix up parent links, mark as inited. */
431
pool->cnt.parent = &ppool->cnt;
432
if (ppool && !pool->parent) {
433
pool->parent = ppool;
434
dmemcg_pool_get(ppool);
435
}
436
pool->inited = true;
437
438
pool = ppool;
439
}
440
441
return retpool;
442
}
443
444
static void dmemcg_free_rcu(struct rcu_head *rcu)
445
{
446
struct dmem_cgroup_region *region = container_of(rcu, typeof(*region), rcu);
447
struct dmem_cgroup_pool_state *pool, *next;
448
449
list_for_each_entry_safe(pool, next, &region->pools, region_node)
450
free_cg_pool(pool);
451
kfree(region->name);
452
kfree(region);
453
}
454
455
static void dmemcg_free_region(struct kref *ref)
456
{
457
struct dmem_cgroup_region *cgregion = container_of(ref, typeof(*cgregion), ref);
458
459
call_rcu(&cgregion->rcu, dmemcg_free_rcu);
460
}
461
462
/**
463
* dmem_cgroup_unregister_region() - Unregister a previously registered region.
464
* @region: The region to unregister.
465
*
466
* This function undoes dmem_cgroup_register_region.
467
*/
468
void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region)
469
{
470
struct dmem_cgroup_pool_state *pool, *next;
471
472
if (!region)
473
return;
474
475
spin_lock(&dmemcg_lock);
476
477
/* Remove from global region list */
478
list_del_rcu(&region->region_node);
479
480
list_for_each_entry_safe(pool, next, &region->pools, region_node) {
481
list_del_rcu(&pool->css_node);
482
list_del(&pool->region_node);
483
dmemcg_pool_put(pool);
484
}
485
486
/*
487
* Ensure any RCU based lookups fail. Additionally,
488
* no new pools should be added to the dead region
489
* by get_cg_pool_unlocked.
490
*/
491
region->unregistered = true;
492
spin_unlock(&dmemcg_lock);
493
494
kref_put(&region->ref, dmemcg_free_region);
495
}
496
EXPORT_SYMBOL_GPL(dmem_cgroup_unregister_region);
497
498
/**
499
* dmem_cgroup_register_region() - Register a regions for dev cgroup.
500
* @size: Size of region to register, in bytes.
501
* @fmt: Region parameters to register
502
*
503
* This function registers a node in the dmem cgroup with the
504
* name given. After calling this function, the region can be
505
* used for allocations.
506
*
507
* Return: NULL or a struct on success, PTR_ERR on failure.
508
*/
509
struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, const char *fmt, ...)
510
{
511
struct dmem_cgroup_region *ret;
512
char *region_name;
513
va_list ap;
514
515
if (!size)
516
return NULL;
517
518
va_start(ap, fmt);
519
region_name = kvasprintf(GFP_KERNEL, fmt, ap);
520
va_end(ap);
521
if (!region_name)
522
return ERR_PTR(-ENOMEM);
523
524
ret = kzalloc(sizeof(*ret), GFP_KERNEL);
525
if (!ret) {
526
kfree(region_name);
527
return ERR_PTR(-ENOMEM);
528
}
529
530
INIT_LIST_HEAD(&ret->pools);
531
ret->name = region_name;
532
ret->size = size;
533
kref_init(&ret->ref);
534
535
spin_lock(&dmemcg_lock);
536
list_add_tail_rcu(&ret->region_node, &dmem_cgroup_regions);
537
spin_unlock(&dmemcg_lock);
538
539
return ret;
540
}
541
EXPORT_SYMBOL_GPL(dmem_cgroup_register_region);
542
543
static struct dmem_cgroup_region *dmemcg_get_region_by_name(const char *name)
544
{
545
struct dmem_cgroup_region *region;
546
547
list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node, spin_is_locked(&dmemcg_lock))
548
if (!strcmp(name, region->name) &&
549
kref_get_unless_zero(&region->ref))
550
return region;
551
552
return NULL;
553
}
554
555
/**
556
* dmem_cgroup_pool_state_put() - Drop a reference to a dmem_cgroup_pool_state
557
* @pool: &dmem_cgroup_pool_state
558
*
559
* Called to drop a reference to the limiting pool returned by
560
* dmem_cgroup_try_charge().
561
*/
562
void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool)
563
{
564
if (pool) {
565
css_put(&pool->cs->css);
566
dmemcg_pool_put(pool);
567
}
568
}
569
EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put);
570
571
static struct dmem_cgroup_pool_state *
572
get_cg_pool_unlocked(struct dmemcg_state *cg, struct dmem_cgroup_region *region)
573
{
574
struct dmem_cgroup_pool_state *pool, *allocpool = NULL;
575
576
/* fastpath lookup? */
577
rcu_read_lock();
578
pool = find_cg_pool_locked(cg, region);
579
if (pool && !READ_ONCE(pool->inited))
580
pool = NULL;
581
if (pool && !dmemcg_pool_tryget(pool))
582
pool = NULL;
583
rcu_read_unlock();
584
585
while (!pool) {
586
spin_lock(&dmemcg_lock);
587
if (!region->unregistered)
588
pool = get_cg_pool_locked(cg, region, &allocpool);
589
else
590
pool = ERR_PTR(-ENODEV);
591
if (!IS_ERR(pool))
592
dmemcg_pool_get(pool);
593
spin_unlock(&dmemcg_lock);
594
595
if (pool == ERR_PTR(-ENOMEM)) {
596
pool = NULL;
597
if (WARN_ON(allocpool))
598
continue;
599
600
allocpool = kzalloc(sizeof(*allocpool), GFP_KERNEL);
601
if (allocpool) {
602
pool = NULL;
603
continue;
604
}
605
}
606
}
607
608
kfree(allocpool);
609
return pool;
610
}
611
612
/**
613
* dmem_cgroup_uncharge() - Uncharge a pool.
614
* @pool: Pool to uncharge.
615
* @size: Size to uncharge.
616
*
617
* Undoes the effects of dmem_cgroup_try_charge.
618
* Must be called with the returned pool as argument,
619
* and same @index and @size.
620
*/
621
void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size)
622
{
623
if (!pool)
624
return;
625
626
page_counter_uncharge(&pool->cnt, size);
627
css_put(&pool->cs->css);
628
dmemcg_pool_put(pool);
629
}
630
EXPORT_SYMBOL_GPL(dmem_cgroup_uncharge);
631
632
/**
633
* dmem_cgroup_try_charge() - Try charging a new allocation to a region.
634
* @region: dmem region to charge
635
* @size: Size (in bytes) to charge.
636
* @ret_pool: On succesfull allocation, the pool that is charged.
637
* @ret_limit_pool: On a failed allocation, the limiting pool.
638
*
639
* This function charges the @region region for a size of @size bytes.
640
*
641
* If the function succeeds, @ret_pool is set, which must be passed to
642
* dmem_cgroup_uncharge() when undoing the allocation.
643
*
644
* When this function fails with -EAGAIN and @ret_limit_pool is non-null, it
645
* will be set to the pool for which the limit is hit. This can be used for
646
* eviction as argument to dmem_cgroup_evict_valuable(). This reference must be freed
647
* with @dmem_cgroup_pool_state_put().
648
*
649
* Return: 0 on success, -EAGAIN on hitting a limit, or a negative errno on failure.
650
*/
651
int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size,
652
struct dmem_cgroup_pool_state **ret_pool,
653
struct dmem_cgroup_pool_state **ret_limit_pool)
654
{
655
struct dmemcg_state *cg;
656
struct dmem_cgroup_pool_state *pool;
657
struct page_counter *fail;
658
int ret;
659
660
*ret_pool = NULL;
661
if (ret_limit_pool)
662
*ret_limit_pool = NULL;
663
664
/*
665
* hold on to css, as cgroup can be removed but resource
666
* accounting happens on css.
667
*/
668
cg = get_current_dmemcs();
669
670
pool = get_cg_pool_unlocked(cg, region);
671
if (IS_ERR(pool)) {
672
ret = PTR_ERR(pool);
673
goto err;
674
}
675
676
if (!page_counter_try_charge(&pool->cnt, size, &fail)) {
677
if (ret_limit_pool) {
678
*ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt);
679
css_get(&(*ret_limit_pool)->cs->css);
680
dmemcg_pool_get(*ret_limit_pool);
681
}
682
dmemcg_pool_put(pool);
683
ret = -EAGAIN;
684
goto err;
685
}
686
687
/* On success, reference from get_current_dmemcs is transferred to *ret_pool */
688
*ret_pool = pool;
689
return 0;
690
691
err:
692
css_put(&cg->css);
693
return ret;
694
}
695
EXPORT_SYMBOL_GPL(dmem_cgroup_try_charge);
696
697
static int dmem_cgroup_region_capacity_show(struct seq_file *sf, void *v)
698
{
699
struct dmem_cgroup_region *region;
700
701
rcu_read_lock();
702
list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) {
703
seq_puts(sf, region->name);
704
seq_printf(sf, " %llu\n", region->size);
705
}
706
rcu_read_unlock();
707
return 0;
708
}
709
710
static int dmemcg_parse_limit(char *options, struct dmem_cgroup_region *region,
711
u64 *new_limit)
712
{
713
char *end;
714
715
if (!strcmp(options, "max")) {
716
*new_limit = PAGE_COUNTER_MAX;
717
return 0;
718
}
719
720
*new_limit = memparse(options, &end);
721
if (*end != '\0')
722
return -EINVAL;
723
724
return 0;
725
}
726
727
static ssize_t dmemcg_limit_write(struct kernfs_open_file *of,
728
char *buf, size_t nbytes, loff_t off,
729
void (*apply)(struct dmem_cgroup_pool_state *, u64))
730
{
731
struct dmemcg_state *dmemcs = css_to_dmemcs(of_css(of));
732
int err = 0;
733
734
while (buf && !err) {
735
struct dmem_cgroup_pool_state *pool = NULL;
736
char *options, *region_name;
737
struct dmem_cgroup_region *region;
738
u64 new_limit;
739
740
options = buf;
741
buf = strchr(buf, '\n');
742
if (buf)
743
*buf++ = '\0';
744
745
options = strstrip(options);
746
747
/* eat empty lines */
748
if (!options[0])
749
continue;
750
751
region_name = strsep(&options, " \t");
752
if (!region_name[0])
753
continue;
754
755
if (!options || !*options)
756
return -EINVAL;
757
758
rcu_read_lock();
759
region = dmemcg_get_region_by_name(region_name);
760
rcu_read_unlock();
761
762
if (!region)
763
return -EINVAL;
764
765
err = dmemcg_parse_limit(options, region, &new_limit);
766
if (err < 0)
767
goto out_put;
768
769
pool = get_cg_pool_unlocked(dmemcs, region);
770
if (IS_ERR(pool)) {
771
err = PTR_ERR(pool);
772
goto out_put;
773
}
774
775
/* And commit */
776
apply(pool, new_limit);
777
dmemcg_pool_put(pool);
778
779
out_put:
780
kref_put(&region->ref, dmemcg_free_region);
781
}
782
783
784
return err ?: nbytes;
785
}
786
787
static int dmemcg_limit_show(struct seq_file *sf, void *v,
788
u64 (*fn)(struct dmem_cgroup_pool_state *))
789
{
790
struct dmemcg_state *dmemcs = css_to_dmemcs(seq_css(sf));
791
struct dmem_cgroup_region *region;
792
793
rcu_read_lock();
794
list_for_each_entry_rcu(region, &dmem_cgroup_regions, region_node) {
795
struct dmem_cgroup_pool_state *pool = find_cg_pool_locked(dmemcs, region);
796
u64 val;
797
798
seq_puts(sf, region->name);
799
800
val = fn(pool);
801
if (val < PAGE_COUNTER_MAX)
802
seq_printf(sf, " %lld\n", val);
803
else
804
seq_puts(sf, " max\n");
805
}
806
rcu_read_unlock();
807
808
return 0;
809
}
810
811
static int dmem_cgroup_region_current_show(struct seq_file *sf, void *v)
812
{
813
return dmemcg_limit_show(sf, v, get_resource_current);
814
}
815
816
static int dmem_cgroup_region_min_show(struct seq_file *sf, void *v)
817
{
818
return dmemcg_limit_show(sf, v, get_resource_min);
819
}
820
821
static ssize_t dmem_cgroup_region_min_write(struct kernfs_open_file *of,
822
char *buf, size_t nbytes, loff_t off)
823
{
824
return dmemcg_limit_write(of, buf, nbytes, off, set_resource_min);
825
}
826
827
static int dmem_cgroup_region_low_show(struct seq_file *sf, void *v)
828
{
829
return dmemcg_limit_show(sf, v, get_resource_low);
830
}
831
832
static ssize_t dmem_cgroup_region_low_write(struct kernfs_open_file *of,
833
char *buf, size_t nbytes, loff_t off)
834
{
835
return dmemcg_limit_write(of, buf, nbytes, off, set_resource_low);
836
}
837
838
static int dmem_cgroup_region_max_show(struct seq_file *sf, void *v)
839
{
840
return dmemcg_limit_show(sf, v, get_resource_max);
841
}
842
843
static ssize_t dmem_cgroup_region_max_write(struct kernfs_open_file *of,
844
char *buf, size_t nbytes, loff_t off)
845
{
846
return dmemcg_limit_write(of, buf, nbytes, off, set_resource_max);
847
}
848
849
static struct cftype files[] = {
850
{
851
.name = "capacity",
852
.seq_show = dmem_cgroup_region_capacity_show,
853
.flags = CFTYPE_ONLY_ON_ROOT,
854
},
855
{
856
.name = "current",
857
.seq_show = dmem_cgroup_region_current_show,
858
},
859
{
860
.name = "min",
861
.write = dmem_cgroup_region_min_write,
862
.seq_show = dmem_cgroup_region_min_show,
863
.flags = CFTYPE_NOT_ON_ROOT,
864
},
865
{
866
.name = "low",
867
.write = dmem_cgroup_region_low_write,
868
.seq_show = dmem_cgroup_region_low_show,
869
.flags = CFTYPE_NOT_ON_ROOT,
870
},
871
{
872
.name = "max",
873
.write = dmem_cgroup_region_max_write,
874
.seq_show = dmem_cgroup_region_max_show,
875
.flags = CFTYPE_NOT_ON_ROOT,
876
},
877
{ } /* Zero entry terminates. */
878
};
879
880
struct cgroup_subsys dmem_cgrp_subsys = {
881
.css_alloc = dmemcs_alloc,
882
.css_free = dmemcs_free,
883
.css_offline = dmemcs_offline,
884
.legacy_cftypes = files,
885
.dfl_cftypes = files,
886
};
887
888