Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/mm/hugetlb_cgroup.c
26131 views
1
/*
2
*
3
* Copyright IBM Corporation, 2012
4
* Author Aneesh Kumar K.V <[email protected]>
5
*
6
* Cgroup v2
7
* Copyright (C) 2019 Red Hat, Inc.
8
* Author: Giuseppe Scrivano <[email protected]>
9
*
10
* This program is free software; you can redistribute it and/or modify it
11
* under the terms of version 2.1 of the GNU Lesser General Public License
12
* as published by the Free Software Foundation.
13
*
14
* This program is distributed in the hope that it would be useful, but
15
* WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17
*
18
*/
19
20
#include <linux/cgroup.h>
21
#include <linux/page_counter.h>
22
#include <linux/slab.h>
23
#include <linux/hugetlb.h>
24
#include <linux/hugetlb_cgroup.h>
25
26
#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
27
#define MEMFILE_IDX(val) (((val) >> 16) & 0xffff)
28
#define MEMFILE_ATTR(val) ((val) & 0xffff)
29
30
/* Use t->m[0] to encode the offset */
31
#define MEMFILE_OFFSET(t, m0) (((offsetof(t, m0) << 16) | sizeof_field(t, m0)))
32
#define MEMFILE_OFFSET0(val) (((val) >> 16) & 0xffff)
33
#define MEMFILE_FIELD_SIZE(val) ((val) & 0xffff)
34
35
#define DFL_TMPL_SIZE ARRAY_SIZE(hugetlb_dfl_tmpl)
36
#define LEGACY_TMPL_SIZE ARRAY_SIZE(hugetlb_legacy_tmpl)
37
38
static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
39
static struct cftype *dfl_files;
40
static struct cftype *legacy_files;
41
42
static inline struct page_counter *
43
__hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
44
bool rsvd)
45
{
46
if (rsvd)
47
return &h_cg->rsvd_hugepage[idx];
48
return &h_cg->hugepage[idx];
49
}
50
51
static inline struct page_counter *
52
hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
53
{
54
return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
55
}
56
57
static inline struct page_counter *
58
hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
59
{
60
return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
61
}
62
63
static inline
64
struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
65
{
66
return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
67
}
68
69
static inline
70
struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
71
{
72
return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
73
}
74
75
static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
76
{
77
return (h_cg == root_h_cgroup);
78
}
79
80
static inline struct hugetlb_cgroup *
81
parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
82
{
83
return hugetlb_cgroup_from_css(h_cg->css.parent);
84
}
85
86
static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
87
{
88
struct hstate *h;
89
90
for_each_hstate(h) {
91
if (page_counter_read(
92
hugetlb_cgroup_counter_from_cgroup(h_cg, hstate_index(h))))
93
return true;
94
}
95
return false;
96
}
97
98
static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
99
struct hugetlb_cgroup *parent_h_cgroup)
100
{
101
int idx;
102
103
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
104
struct page_counter *fault, *fault_parent = NULL;
105
struct page_counter *rsvd, *rsvd_parent = NULL;
106
unsigned long limit;
107
108
if (parent_h_cgroup) {
109
fault_parent = hugetlb_cgroup_counter_from_cgroup(
110
parent_h_cgroup, idx);
111
rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
112
parent_h_cgroup, idx);
113
}
114
fault = hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx);
115
rsvd = hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx);
116
117
page_counter_init(fault, fault_parent, false);
118
page_counter_init(rsvd, rsvd_parent, false);
119
120
if (!cgroup_subsys_on_dfl(hugetlb_cgrp_subsys)) {
121
fault->track_failcnt = true;
122
rsvd->track_failcnt = true;
123
}
124
125
limit = round_down(PAGE_COUNTER_MAX,
126
pages_per_huge_page(&hstates[idx]));
127
128
VM_BUG_ON(page_counter_set_max(fault, limit));
129
VM_BUG_ON(page_counter_set_max(rsvd, limit));
130
}
131
}
132
133
static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup)
134
{
135
int node;
136
137
for_each_node(node)
138
kfree(h_cgroup->nodeinfo[node]);
139
kfree(h_cgroup);
140
}
141
142
static struct cgroup_subsys_state *
143
hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
144
{
145
struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
146
struct hugetlb_cgroup *h_cgroup;
147
int node;
148
149
h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids),
150
GFP_KERNEL);
151
152
if (!h_cgroup)
153
return ERR_PTR(-ENOMEM);
154
155
if (!parent_h_cgroup)
156
root_h_cgroup = h_cgroup;
157
158
/*
159
* TODO: this routine can waste much memory for nodes which will
160
* never be onlined. It's better to use memory hotplug callback
161
* function.
162
*/
163
for_each_node(node) {
164
/* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */
165
int node_to_alloc =
166
node_state(node, N_NORMAL_MEMORY) ? node : NUMA_NO_NODE;
167
h_cgroup->nodeinfo[node] =
168
kzalloc_node(sizeof(struct hugetlb_cgroup_per_node),
169
GFP_KERNEL, node_to_alloc);
170
if (!h_cgroup->nodeinfo[node])
171
goto fail_alloc_nodeinfo;
172
}
173
174
hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
175
return &h_cgroup->css;
176
177
fail_alloc_nodeinfo:
178
hugetlb_cgroup_free(h_cgroup);
179
return ERR_PTR(-ENOMEM);
180
}
181
182
static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
183
{
184
hugetlb_cgroup_free(hugetlb_cgroup_from_css(css));
185
}
186
187
/*
188
* Should be called with hugetlb_lock held.
189
* Since we are holding hugetlb_lock, pages cannot get moved from
190
* active list or uncharged from the cgroup, So no need to get
191
* page reference and test for page active here. This function
192
* cannot fail.
193
*/
194
static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
195
struct folio *folio)
196
{
197
unsigned int nr_pages;
198
struct page_counter *counter;
199
struct hugetlb_cgroup *hcg;
200
struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
201
202
hcg = hugetlb_cgroup_from_folio(folio);
203
/*
204
* We can have pages in active list without any cgroup
205
* ie, hugepage with less than 3 pages. We can safely
206
* ignore those pages.
207
*/
208
if (!hcg || hcg != h_cg)
209
goto out;
210
211
nr_pages = folio_nr_pages(folio);
212
if (!parent) {
213
parent = root_h_cgroup;
214
/* root has no limit */
215
page_counter_charge(&parent->hugepage[idx], nr_pages);
216
}
217
counter = &h_cg->hugepage[idx];
218
/* Take the pages off the local counter */
219
page_counter_cancel(counter, nr_pages);
220
221
set_hugetlb_cgroup(folio, parent);
222
out:
223
return;
224
}
225
226
/*
227
* Force the hugetlb cgroup to empty the hugetlb resources by moving them to
228
* the parent cgroup.
229
*/
230
static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
231
{
232
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
233
struct hstate *h;
234
struct folio *folio;
235
236
do {
237
for_each_hstate(h) {
238
spin_lock_irq(&hugetlb_lock);
239
list_for_each_entry(folio, &h->hugepage_activelist, lru)
240
hugetlb_cgroup_move_parent(hstate_index(h), h_cg, folio);
241
242
spin_unlock_irq(&hugetlb_lock);
243
}
244
cond_resched();
245
} while (hugetlb_cgroup_have_usage(h_cg));
246
}
247
248
static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
249
enum hugetlb_memory_event event)
250
{
251
atomic_long_inc(&hugetlb->events_local[idx][event]);
252
cgroup_file_notify(&hugetlb->events_local_file[idx]);
253
254
do {
255
atomic_long_inc(&hugetlb->events[idx][event]);
256
cgroup_file_notify(&hugetlb->events_file[idx]);
257
} while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
258
!hugetlb_cgroup_is_root(hugetlb));
259
}
260
261
static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
262
struct hugetlb_cgroup **ptr,
263
bool rsvd)
264
{
265
int ret = 0;
266
struct page_counter *counter;
267
struct hugetlb_cgroup *h_cg = NULL;
268
269
if (hugetlb_cgroup_disabled())
270
goto done;
271
again:
272
rcu_read_lock();
273
h_cg = hugetlb_cgroup_from_task(current);
274
if (!css_tryget(&h_cg->css)) {
275
rcu_read_unlock();
276
goto again;
277
}
278
rcu_read_unlock();
279
280
if (!page_counter_try_charge(
281
__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
282
nr_pages, &counter)) {
283
ret = -ENOMEM;
284
hugetlb_event(h_cg, idx, HUGETLB_MAX);
285
css_put(&h_cg->css);
286
goto done;
287
}
288
/* Reservations take a reference to the css because they do not get
289
* reparented.
290
*/
291
if (!rsvd)
292
css_put(&h_cg->css);
293
done:
294
*ptr = h_cg;
295
return ret;
296
}
297
298
int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
299
struct hugetlb_cgroup **ptr)
300
{
301
return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
302
}
303
304
int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
305
struct hugetlb_cgroup **ptr)
306
{
307
return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
308
}
309
310
/* Should be called with hugetlb_lock held */
311
static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
312
struct hugetlb_cgroup *h_cg,
313
struct folio *folio, bool rsvd)
314
{
315
if (hugetlb_cgroup_disabled() || !h_cg)
316
return;
317
lockdep_assert_held(&hugetlb_lock);
318
__set_hugetlb_cgroup(folio, h_cg, rsvd);
319
if (!rsvd) {
320
unsigned long usage =
321
h_cg->nodeinfo[folio_nid(folio)]->usage[idx];
322
/*
323
* This write is not atomic due to fetching usage and writing
324
* to it, but that's fine because we call this with
325
* hugetlb_lock held anyway.
326
*/
327
WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
328
usage + nr_pages);
329
}
330
}
331
332
void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
333
struct hugetlb_cgroup *h_cg,
334
struct folio *folio)
335
{
336
__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false);
337
}
338
339
void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
340
struct hugetlb_cgroup *h_cg,
341
struct folio *folio)
342
{
343
__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true);
344
}
345
346
/*
347
* Should be called with hugetlb_lock held
348
*/
349
static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
350
struct folio *folio, bool rsvd)
351
{
352
struct hugetlb_cgroup *h_cg;
353
354
if (hugetlb_cgroup_disabled())
355
return;
356
lockdep_assert_held(&hugetlb_lock);
357
h_cg = __hugetlb_cgroup_from_folio(folio, rsvd);
358
if (unlikely(!h_cg))
359
return;
360
__set_hugetlb_cgroup(folio, NULL, rsvd);
361
362
page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
363
rsvd),
364
nr_pages);
365
366
if (rsvd)
367
css_put(&h_cg->css);
368
else {
369
unsigned long usage =
370
h_cg->nodeinfo[folio_nid(folio)]->usage[idx];
371
/*
372
* This write is not atomic due to fetching usage and writing
373
* to it, but that's fine because we call this with
374
* hugetlb_lock held anyway.
375
*/
376
WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
377
usage - nr_pages);
378
}
379
}
380
381
void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
382
struct folio *folio)
383
{
384
__hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false);
385
}
386
387
void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages,
388
struct folio *folio)
389
{
390
__hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true);
391
}
392
393
static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
394
struct hugetlb_cgroup *h_cg,
395
bool rsvd)
396
{
397
if (hugetlb_cgroup_disabled() || !h_cg)
398
return;
399
400
page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
401
rsvd),
402
nr_pages);
403
404
if (rsvd)
405
css_put(&h_cg->css);
406
}
407
408
void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
409
struct hugetlb_cgroup *h_cg)
410
{
411
__hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
412
}
413
414
void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
415
struct hugetlb_cgroup *h_cg)
416
{
417
__hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
418
}
419
420
void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
421
unsigned long end)
422
{
423
if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
424
!resv->css)
425
return;
426
427
page_counter_uncharge(resv->reservation_counter,
428
(end - start) * resv->pages_per_hpage);
429
css_put(resv->css);
430
}
431
432
void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
433
struct file_region *rg,
434
unsigned long nr_pages,
435
bool region_del)
436
{
437
if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
438
return;
439
440
if (rg->reservation_counter && resv->pages_per_hpage &&
441
!resv->reservation_counter) {
442
page_counter_uncharge(rg->reservation_counter,
443
nr_pages * resv->pages_per_hpage);
444
/*
445
* Only do css_put(rg->css) when we delete the entire region
446
* because one file_region must hold exactly one css reference.
447
*/
448
if (region_del)
449
css_put(rg->css);
450
}
451
}
452
453
enum {
454
RES_USAGE,
455
RES_RSVD_USAGE,
456
RES_LIMIT,
457
RES_RSVD_LIMIT,
458
RES_MAX_USAGE,
459
RES_RSVD_MAX_USAGE,
460
RES_FAILCNT,
461
RES_RSVD_FAILCNT,
462
};
463
464
static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy)
465
{
466
int nid;
467
struct cftype *cft = seq_cft(seq);
468
int idx = MEMFILE_IDX(cft->private);
469
bool legacy = !cgroup_subsys_on_dfl(hugetlb_cgrp_subsys);
470
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
471
struct cgroup_subsys_state *css;
472
unsigned long usage;
473
474
if (legacy) {
475
/* Add up usage across all nodes for the non-hierarchical total. */
476
usage = 0;
477
for_each_node_state(nid, N_MEMORY)
478
usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]);
479
seq_printf(seq, "total=%lu", usage * PAGE_SIZE);
480
481
/* Simply print the per-node usage for the non-hierarchical total. */
482
for_each_node_state(nid, N_MEMORY)
483
seq_printf(seq, " N%d=%lu", nid,
484
READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) *
485
PAGE_SIZE);
486
seq_putc(seq, '\n');
487
}
488
489
/*
490
* The hierarchical total is pretty much the value recorded by the
491
* counter, so use that.
492
*/
493
seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "",
494
page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE);
495
496
/*
497
* For each node, transverse the css tree to obtain the hierarchical
498
* node usage.
499
*/
500
for_each_node_state(nid, N_MEMORY) {
501
usage = 0;
502
rcu_read_lock();
503
css_for_each_descendant_pre(css, &h_cg->css) {
504
usage += READ_ONCE(hugetlb_cgroup_from_css(css)
505
->nodeinfo[nid]
506
->usage[idx]);
507
}
508
rcu_read_unlock();
509
seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE);
510
}
511
512
seq_putc(seq, '\n');
513
514
return 0;
515
}
516
517
static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
518
struct cftype *cft)
519
{
520
struct page_counter *counter;
521
struct page_counter *rsvd_counter;
522
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
523
524
counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
525
rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
526
527
switch (MEMFILE_ATTR(cft->private)) {
528
case RES_USAGE:
529
return (u64)page_counter_read(counter) * PAGE_SIZE;
530
case RES_RSVD_USAGE:
531
return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
532
case RES_LIMIT:
533
return (u64)counter->max * PAGE_SIZE;
534
case RES_RSVD_LIMIT:
535
return (u64)rsvd_counter->max * PAGE_SIZE;
536
case RES_MAX_USAGE:
537
return (u64)counter->watermark * PAGE_SIZE;
538
case RES_RSVD_MAX_USAGE:
539
return (u64)rsvd_counter->watermark * PAGE_SIZE;
540
case RES_FAILCNT:
541
return counter->failcnt;
542
case RES_RSVD_FAILCNT:
543
return rsvd_counter->failcnt;
544
default:
545
BUG();
546
}
547
}
548
549
static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
550
{
551
int idx;
552
u64 val;
553
struct cftype *cft = seq_cft(seq);
554
unsigned long limit;
555
struct page_counter *counter;
556
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
557
558
idx = MEMFILE_IDX(cft->private);
559
counter = &h_cg->hugepage[idx];
560
561
limit = round_down(PAGE_COUNTER_MAX,
562
pages_per_huge_page(&hstates[idx]));
563
564
switch (MEMFILE_ATTR(cft->private)) {
565
case RES_RSVD_USAGE:
566
counter = &h_cg->rsvd_hugepage[idx];
567
fallthrough;
568
case RES_USAGE:
569
val = (u64)page_counter_read(counter);
570
seq_printf(seq, "%llu\n", val * PAGE_SIZE);
571
break;
572
case RES_RSVD_LIMIT:
573
counter = &h_cg->rsvd_hugepage[idx];
574
fallthrough;
575
case RES_LIMIT:
576
val = (u64)counter->max;
577
if (val == limit)
578
seq_puts(seq, "max\n");
579
else
580
seq_printf(seq, "%llu\n", val * PAGE_SIZE);
581
break;
582
default:
583
BUG();
584
}
585
586
return 0;
587
}
588
589
static DEFINE_MUTEX(hugetlb_limit_mutex);
590
591
static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
592
char *buf, size_t nbytes, loff_t off,
593
const char *max)
594
{
595
int ret, idx;
596
unsigned long nr_pages;
597
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
598
bool rsvd = false;
599
600
if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
601
return -EINVAL;
602
603
buf = strstrip(buf);
604
ret = page_counter_memparse(buf, max, &nr_pages);
605
if (ret)
606
return ret;
607
608
idx = MEMFILE_IDX(of_cft(of)->private);
609
nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx]));
610
611
switch (MEMFILE_ATTR(of_cft(of)->private)) {
612
case RES_RSVD_LIMIT:
613
rsvd = true;
614
fallthrough;
615
case RES_LIMIT:
616
mutex_lock(&hugetlb_limit_mutex);
617
ret = page_counter_set_max(
618
__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
619
nr_pages);
620
mutex_unlock(&hugetlb_limit_mutex);
621
break;
622
default:
623
ret = -EINVAL;
624
break;
625
}
626
return ret ?: nbytes;
627
}
628
629
static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
630
char *buf, size_t nbytes, loff_t off)
631
{
632
return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
633
}
634
635
static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
636
char *buf, size_t nbytes, loff_t off)
637
{
638
return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
639
}
640
641
static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
642
char *buf, size_t nbytes, loff_t off)
643
{
644
int ret = 0;
645
struct page_counter *counter, *rsvd_counter;
646
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
647
648
counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
649
rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
650
651
switch (MEMFILE_ATTR(of_cft(of)->private)) {
652
case RES_MAX_USAGE:
653
page_counter_reset_watermark(counter);
654
break;
655
case RES_RSVD_MAX_USAGE:
656
page_counter_reset_watermark(rsvd_counter);
657
break;
658
case RES_FAILCNT:
659
counter->failcnt = 0;
660
break;
661
case RES_RSVD_FAILCNT:
662
rsvd_counter->failcnt = 0;
663
break;
664
default:
665
ret = -EINVAL;
666
break;
667
}
668
return ret ?: nbytes;
669
}
670
671
static char *mem_fmt(char *buf, int size, unsigned long hsize)
672
{
673
if (hsize >= SZ_1G)
674
snprintf(buf, size, "%luGB", hsize / SZ_1G);
675
else if (hsize >= SZ_1M)
676
snprintf(buf, size, "%luMB", hsize / SZ_1M);
677
else
678
snprintf(buf, size, "%luKB", hsize / SZ_1K);
679
return buf;
680
}
681
682
static int __hugetlb_events_show(struct seq_file *seq, bool local)
683
{
684
int idx;
685
long max;
686
struct cftype *cft = seq_cft(seq);
687
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
688
689
idx = MEMFILE_IDX(cft->private);
690
691
if (local)
692
max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
693
else
694
max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
695
696
seq_printf(seq, "max %lu\n", max);
697
698
return 0;
699
}
700
701
static int hugetlb_events_show(struct seq_file *seq, void *v)
702
{
703
return __hugetlb_events_show(seq, false);
704
}
705
706
static int hugetlb_events_local_show(struct seq_file *seq, void *v)
707
{
708
return __hugetlb_events_show(seq, true);
709
}
710
711
static struct cftype hugetlb_dfl_tmpl[] = {
712
{
713
.name = "max",
714
.private = RES_LIMIT,
715
.seq_show = hugetlb_cgroup_read_u64_max,
716
.write = hugetlb_cgroup_write_dfl,
717
.flags = CFTYPE_NOT_ON_ROOT,
718
},
719
{
720
.name = "rsvd.max",
721
.private = RES_RSVD_LIMIT,
722
.seq_show = hugetlb_cgroup_read_u64_max,
723
.write = hugetlb_cgroup_write_dfl,
724
.flags = CFTYPE_NOT_ON_ROOT,
725
},
726
{
727
.name = "current",
728
.private = RES_USAGE,
729
.seq_show = hugetlb_cgroup_read_u64_max,
730
.flags = CFTYPE_NOT_ON_ROOT,
731
},
732
{
733
.name = "rsvd.current",
734
.private = RES_RSVD_USAGE,
735
.seq_show = hugetlb_cgroup_read_u64_max,
736
.flags = CFTYPE_NOT_ON_ROOT,
737
},
738
{
739
.name = "events",
740
.seq_show = hugetlb_events_show,
741
.file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_file[0]),
742
.flags = CFTYPE_NOT_ON_ROOT,
743
},
744
{
745
.name = "events.local",
746
.seq_show = hugetlb_events_local_show,
747
.file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_local_file[0]),
748
.flags = CFTYPE_NOT_ON_ROOT,
749
},
750
{
751
.name = "numa_stat",
752
.seq_show = hugetlb_cgroup_read_numa_stat,
753
.flags = CFTYPE_NOT_ON_ROOT,
754
},
755
/* don't need terminator here */
756
};
757
758
static struct cftype hugetlb_legacy_tmpl[] = {
759
{
760
.name = "limit_in_bytes",
761
.private = RES_LIMIT,
762
.read_u64 = hugetlb_cgroup_read_u64,
763
.write = hugetlb_cgroup_write_legacy,
764
},
765
{
766
.name = "rsvd.limit_in_bytes",
767
.private = RES_RSVD_LIMIT,
768
.read_u64 = hugetlb_cgroup_read_u64,
769
.write = hugetlb_cgroup_write_legacy,
770
},
771
{
772
.name = "usage_in_bytes",
773
.private = RES_USAGE,
774
.read_u64 = hugetlb_cgroup_read_u64,
775
},
776
{
777
.name = "rsvd.usage_in_bytes",
778
.private = RES_RSVD_USAGE,
779
.read_u64 = hugetlb_cgroup_read_u64,
780
},
781
{
782
.name = "max_usage_in_bytes",
783
.private = RES_MAX_USAGE,
784
.write = hugetlb_cgroup_reset,
785
.read_u64 = hugetlb_cgroup_read_u64,
786
},
787
{
788
.name = "rsvd.max_usage_in_bytes",
789
.private = RES_RSVD_MAX_USAGE,
790
.write = hugetlb_cgroup_reset,
791
.read_u64 = hugetlb_cgroup_read_u64,
792
},
793
{
794
.name = "failcnt",
795
.private = RES_FAILCNT,
796
.write = hugetlb_cgroup_reset,
797
.read_u64 = hugetlb_cgroup_read_u64,
798
},
799
{
800
.name = "rsvd.failcnt",
801
.private = RES_RSVD_FAILCNT,
802
.write = hugetlb_cgroup_reset,
803
.read_u64 = hugetlb_cgroup_read_u64,
804
},
805
{
806
.name = "numa_stat",
807
.seq_show = hugetlb_cgroup_read_numa_stat,
808
},
809
/* don't need terminator here */
810
};
811
812
static void __init
813
hugetlb_cgroup_cfttypes_init(struct hstate *h, struct cftype *cft,
814
struct cftype *tmpl, int tmpl_size)
815
{
816
char buf[32];
817
int i, idx = hstate_index(h);
818
819
/* format the size */
820
mem_fmt(buf, sizeof(buf), huge_page_size(h));
821
822
for (i = 0; i < tmpl_size; cft++, tmpl++, i++) {
823
*cft = *tmpl;
824
/* rebuild the name */
825
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.%s", buf, tmpl->name);
826
/* rebuild the private */
827
cft->private = MEMFILE_PRIVATE(idx, tmpl->private);
828
/* rebuild the file_offset */
829
if (tmpl->file_offset) {
830
unsigned int offset = tmpl->file_offset;
831
832
cft->file_offset = MEMFILE_OFFSET0(offset) +
833
MEMFILE_FIELD_SIZE(offset) * idx;
834
}
835
836
lockdep_register_key(&cft->lockdep_key);
837
}
838
}
839
840
static void __init __hugetlb_cgroup_file_dfl_init(struct hstate *h)
841
{
842
int idx = hstate_index(h);
843
844
hugetlb_cgroup_cfttypes_init(h, dfl_files + idx * DFL_TMPL_SIZE,
845
hugetlb_dfl_tmpl, DFL_TMPL_SIZE);
846
}
847
848
static void __init __hugetlb_cgroup_file_legacy_init(struct hstate *h)
849
{
850
int idx = hstate_index(h);
851
852
hugetlb_cgroup_cfttypes_init(h, legacy_files + idx * LEGACY_TMPL_SIZE,
853
hugetlb_legacy_tmpl, LEGACY_TMPL_SIZE);
854
}
855
856
static void __init __hugetlb_cgroup_file_init(struct hstate *h)
857
{
858
__hugetlb_cgroup_file_dfl_init(h);
859
__hugetlb_cgroup_file_legacy_init(h);
860
}
861
862
static void __init __hugetlb_cgroup_file_pre_init(void)
863
{
864
int cft_count;
865
866
cft_count = hugetlb_max_hstate * DFL_TMPL_SIZE + 1; /* add terminator */
867
dfl_files = kcalloc(cft_count, sizeof(struct cftype), GFP_KERNEL);
868
BUG_ON(!dfl_files);
869
cft_count = hugetlb_max_hstate * LEGACY_TMPL_SIZE + 1; /* add terminator */
870
legacy_files = kcalloc(cft_count, sizeof(struct cftype), GFP_KERNEL);
871
BUG_ON(!legacy_files);
872
}
873
874
static void __init __hugetlb_cgroup_file_post_init(void)
875
{
876
WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
877
dfl_files));
878
WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
879
legacy_files));
880
}
881
882
void __init hugetlb_cgroup_file_init(void)
883
{
884
struct hstate *h;
885
886
__hugetlb_cgroup_file_pre_init();
887
for_each_hstate(h)
888
__hugetlb_cgroup_file_init(h);
889
__hugetlb_cgroup_file_post_init();
890
}
891
892
/*
893
* hugetlb_lock will make sure a parallel cgroup rmdir won't happen
894
* when we migrate hugepages
895
*/
896
void hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio)
897
{
898
struct hugetlb_cgroup *h_cg;
899
struct hugetlb_cgroup *h_cg_rsvd;
900
struct hstate *h = folio_hstate(old_folio);
901
902
if (hugetlb_cgroup_disabled())
903
return;
904
905
spin_lock_irq(&hugetlb_lock);
906
h_cg = hugetlb_cgroup_from_folio(old_folio);
907
h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio);
908
set_hugetlb_cgroup(old_folio, NULL);
909
set_hugetlb_cgroup_rsvd(old_folio, NULL);
910
911
/* move the h_cg details to new cgroup */
912
set_hugetlb_cgroup(new_folio, h_cg);
913
set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd);
914
list_move(&new_folio->lru, &h->hugepage_activelist);
915
spin_unlock_irq(&hugetlb_lock);
916
}
917
918
static struct cftype hugetlb_files[] = {
919
{} /* terminate */
920
};
921
922
struct cgroup_subsys hugetlb_cgrp_subsys = {
923
.css_alloc = hugetlb_cgroup_css_alloc,
924
.css_offline = hugetlb_cgroup_css_offline,
925
.css_free = hugetlb_cgroup_css_free,
926
.dfl_cftypes = hugetlb_files,
927
.legacy_cftypes = hugetlb_files,
928
};
929
930