CoCalc -- core.c

GitHub Repository: torvalds/linux
Path: blob/master/mm/damon/core.c
²⁶²⁸⁵ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * Data Access Monitor
4
 *
5
 * Author: SeongJae Park <[email protected]>
6
 */
7

8
#define pr_fmt(fmt) "damon: " fmt
9

10
#include <linux/damon.h>
11
#include <linux/delay.h>
12
#include <linux/kthread.h>
13
#include <linux/mm.h>
14
#include <linux/psi.h>
15
#include <linux/slab.h>
16
#include <linux/string.h>
17
#include <linux/string_choices.h>
18

19
#define CREATE_TRACE_POINTS
20
#include <trace/events/damon.h>
21

22
#ifdef CONFIG_DAMON_KUNIT_TEST
23
#undef DAMON_MIN_REGION
24
#define DAMON_MIN_REGION 1
25
#endif
26

27
static DEFINE_MUTEX(damon_lock);
28
static int nr_running_ctxs;
29
static bool running_exclusive_ctxs;
30

31
static DEFINE_MUTEX(damon_ops_lock);
32
static struct damon_operations damon_registered_ops[NR_DAMON_OPS];
33

34
static struct kmem_cache *damon_region_cache __ro_after_init;
35

36
/* Should be called under damon_ops_lock with id smaller than NR_DAMON_OPS */
37
static bool __damon_is_registered_ops(enum damon_ops_id id)
38
{
39
	struct damon_operations empty_ops = {};
40

41
	if (!memcmp(&empty_ops, &damon_registered_ops[id], sizeof(empty_ops)))
42
		return false;
43
	return true;
44
}
45

46
/**
47
 * damon_is_registered_ops() - Check if a given damon_operations is registered.
48
 * @id:	Id of the damon_operations to check if registered.
49
 *
50
 * Return: true if the ops is set, false otherwise.
51
 */
52
bool damon_is_registered_ops(enum damon_ops_id id)
53
{
54
	bool registered;
55

56
	if (id >= NR_DAMON_OPS)
57
		return false;
58
	mutex_lock(&damon_ops_lock);
59
	registered = __damon_is_registered_ops(id);
60
	mutex_unlock(&damon_ops_lock);
61
	return registered;
62
}
63

64
/**
65
 * damon_register_ops() - Register a monitoring operations set to DAMON.
66
 * @ops:	monitoring operations set to register.
67
 *
68
 * This function registers a monitoring operations set of valid &struct
69
 * damon_operations->id so that others can find and use them later.
70
 *
71
 * Return: 0 on success, negative error code otherwise.
72
 */
73
int damon_register_ops(struct damon_operations *ops)
74
{
75
	int err = 0;
76

77
	if (ops->id >= NR_DAMON_OPS)
78
		return -EINVAL;
79

80
	mutex_lock(&damon_ops_lock);
81
	/* Fail for already registered ops */
82
	if (__damon_is_registered_ops(ops->id))
83
		err = -EINVAL;
84
	else
85
		damon_registered_ops[ops->id] = *ops;
86
	mutex_unlock(&damon_ops_lock);
87
	return err;
88
}
89

90
/**
91
 * damon_select_ops() - Select a monitoring operations to use with the context.
92
 * @ctx:	monitoring context to use the operations.
93
 * @id:		id of the registered monitoring operations to select.
94
 *
95
 * This function finds registered monitoring operations set of @id and make
96
 * @ctx to use it.
97
 *
98
 * Return: 0 on success, negative error code otherwise.
99
 */
100
int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id)
101
{
102
	int err = 0;
103

104
	if (id >= NR_DAMON_OPS)
105
		return -EINVAL;
106

107
	mutex_lock(&damon_ops_lock);
108
	if (!__damon_is_registered_ops(id))
109
		err = -EINVAL;
110
	else
111
		ctx->ops = damon_registered_ops[id];
112
	mutex_unlock(&damon_ops_lock);
113
	return err;
114
}
115

116
/*
117
 * Construct a damon_region struct
118
 *
119
 * Returns the pointer to the new struct if success, or NULL otherwise
120
 */
121
struct damon_region *damon_new_region(unsigned long start, unsigned long end)
122
{
123
	struct damon_region *region;
124

125
	region = kmem_cache_alloc(damon_region_cache, GFP_KERNEL);
126
	if (!region)
127
		return NULL;
128

129
	region->ar.start = start;
130
	region->ar.end = end;
131
	region->nr_accesses = 0;
132
	region->nr_accesses_bp = 0;
133
	INIT_LIST_HEAD(&region->list);
134

135
	region->age = 0;
136
	region->last_nr_accesses = 0;
137

138
	return region;
139
}
140

141
void damon_add_region(struct damon_region *r, struct damon_target *t)
142
{
143
	list_add_tail(&r->list, &t->regions_list);
144
	t->nr_regions++;
145
}
146

147
static void damon_del_region(struct damon_region *r, struct damon_target *t)
148
{
149
	list_del(&r->list);
150
	t->nr_regions--;
151
}
152

153
static void damon_free_region(struct damon_region *r)
154
{
155
	kmem_cache_free(damon_region_cache, r);
156
}
157

158
void damon_destroy_region(struct damon_region *r, struct damon_target *t)
159
{
160
	damon_del_region(r, t);
161
	damon_free_region(r);
162
}
163

164
/*
165
 * Check whether a region is intersecting an address range
166
 *
167
 * Returns true if it is.
168
 */
169
static bool damon_intersect(struct damon_region *r,
170
		struct damon_addr_range *re)
171
{
172
	return !(r->ar.end <= re->start || re->end <= r->ar.start);
173
}
174

175
/*
176
 * Fill holes in regions with new regions.
177
 */
178
static int damon_fill_regions_holes(struct damon_region *first,
179
		struct damon_region *last, struct damon_target *t)
180
{
181
	struct damon_region *r = first;
182

183
	damon_for_each_region_from(r, t) {
184
		struct damon_region *next, *newr;
185

186
		if (r == last)
187
			break;
188
		next = damon_next_region(r);
189
		if (r->ar.end != next->ar.start) {
190
			newr = damon_new_region(r->ar.end, next->ar.start);
191
			if (!newr)
192
				return -ENOMEM;
193
			damon_insert_region(newr, r, next, t);
194
		}
195
	}
196
	return 0;
197
}
198

199
/*
200
 * damon_set_regions() - Set regions of a target for given address ranges.
201
 * @t:		the given target.
202
 * @ranges:	array of new monitoring target ranges.
203
 * @nr_ranges:	length of @ranges.
204
 *
205
 * This function adds new regions to, or modify existing regions of a
206
 * monitoring target to fit in specific ranges.
207
 *
208
 * Return: 0 if success, or negative error code otherwise.
209
 */
210
int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
211
		unsigned int nr_ranges)
212
{
213
	struct damon_region *r, *next;
214
	unsigned int i;
215
	int err;
216

217
	/* Remove regions which are not in the new ranges */
218
	damon_for_each_region_safe(r, next, t) {
219
		for (i = 0; i < nr_ranges; i++) {
220
			if (damon_intersect(r, &ranges[i]))
221
				break;
222
		}
223
		if (i == nr_ranges)
224
			damon_destroy_region(r, t);
225
	}
226

227
	r = damon_first_region(t);
228
	/* Add new regions or resize existing regions to fit in the ranges */
229
	for (i = 0; i < nr_ranges; i++) {
230
		struct damon_region *first = NULL, *last, *newr;
231
		struct damon_addr_range *range;
232

233
		range = &ranges[i];
234
		/* Get the first/last regions intersecting with the range */
235
		damon_for_each_region_from(r, t) {
236
			if (damon_intersect(r, range)) {
237
				if (!first)
238
					first = r;
239
				last = r;
240
			}
241
			if (r->ar.start >= range->end)
242
				break;
243
		}
244
		if (!first) {
245
			/* no region intersects with this range */
246
			newr = damon_new_region(
247
					ALIGN_DOWN(range->start,
248
						DAMON_MIN_REGION),
249
					ALIGN(range->end, DAMON_MIN_REGION));
250
			if (!newr)
251
				return -ENOMEM;
252
			damon_insert_region(newr, damon_prev_region(r), r, t);
253
		} else {
254
			/* resize intersecting regions to fit in this range */
255
			first->ar.start = ALIGN_DOWN(range->start,
256
					DAMON_MIN_REGION);
257
			last->ar.end = ALIGN(range->end, DAMON_MIN_REGION);
258

259
			/* fill possible holes in the range */
260
			err = damon_fill_regions_holes(first, last, t);
261
			if (err)
262
				return err;
263
		}
264
	}
265
	return 0;
266
}
267

268
struct damos_filter *damos_new_filter(enum damos_filter_type type,
269
		bool matching, bool allow)
270
{
271
	struct damos_filter *filter;
272

273
	filter = kmalloc(sizeof(*filter), GFP_KERNEL);
274
	if (!filter)
275
		return NULL;
276
	filter->type = type;
277
	filter->matching = matching;
278
	filter->allow = allow;
279
	INIT_LIST_HEAD(&filter->list);
280
	return filter;
281
}
282

283
/**
284
 * damos_filter_for_ops() - Return if the filter is ops-hndled one.
285
 * @type:	type of the filter.
286
 *
287
 * Return: true if the filter of @type needs to be handled by ops layer, false
288
 * otherwise.
289
 */
290
bool damos_filter_for_ops(enum damos_filter_type type)
291
{
292
	switch (type) {
293
	case DAMOS_FILTER_TYPE_ADDR:
294
	case DAMOS_FILTER_TYPE_TARGET:
295
		return false;
296
	default:
297
		break;
298
	}
299
	return true;
300
}
301

302
void damos_add_filter(struct damos *s, struct damos_filter *f)
303
{
304
	if (damos_filter_for_ops(f->type))
305
		list_add_tail(&f->list, &s->ops_filters);
306
	else
307
		list_add_tail(&f->list, &s->filters);
308
}
309

310
static void damos_del_filter(struct damos_filter *f)
311
{
312
	list_del(&f->list);
313
}
314

315
static void damos_free_filter(struct damos_filter *f)
316
{
317
	kfree(f);
318
}
319

320
void damos_destroy_filter(struct damos_filter *f)
321
{
322
	damos_del_filter(f);
323
	damos_free_filter(f);
324
}
325

326
struct damos_quota_goal *damos_new_quota_goal(
327
		enum damos_quota_goal_metric metric,
328
		unsigned long target_value)
329
{
330
	struct damos_quota_goal *goal;
331

332
	goal = kmalloc(sizeof(*goal), GFP_KERNEL);
333
	if (!goal)
334
		return NULL;
335
	goal->metric = metric;
336
	goal->target_value = target_value;
337
	INIT_LIST_HEAD(&goal->list);
338
	return goal;
339
}
340

341
void damos_add_quota_goal(struct damos_quota *q, struct damos_quota_goal *g)
342
{
343
	list_add_tail(&g->list, &q->goals);
344
}
345

346
static void damos_del_quota_goal(struct damos_quota_goal *g)
347
{
348
	list_del(&g->list);
349
}
350

351
static void damos_free_quota_goal(struct damos_quota_goal *g)
352
{
353
	kfree(g);
354
}
355

356
void damos_destroy_quota_goal(struct damos_quota_goal *g)
357
{
358
	damos_del_quota_goal(g);
359
	damos_free_quota_goal(g);
360
}
361

362
/* initialize fields of @quota that normally API users wouldn't set */
363
static struct damos_quota *damos_quota_init(struct damos_quota *quota)
364
{
365
	quota->esz = 0;
366
	quota->total_charged_sz = 0;
367
	quota->total_charged_ns = 0;
368
	quota->charged_sz = 0;
369
	quota->charged_from = 0;
370
	quota->charge_target_from = NULL;
371
	quota->charge_addr_from = 0;
372
	quota->esz_bp = 0;
373
	return quota;
374
}
375

376
struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
377
			enum damos_action action,
378
			unsigned long apply_interval_us,
379
			struct damos_quota *quota,
380
			struct damos_watermarks *wmarks,
381
			int target_nid)
382
{
383
	struct damos *scheme;
384

385
	scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
386
	if (!scheme)
387
		return NULL;
388
	scheme->pattern = *pattern;
389
	scheme->action = action;
390
	scheme->apply_interval_us = apply_interval_us;
391
	/*
392
	 * next_apply_sis will be set when kdamond starts.  While kdamond is
393
	 * running, it will also updated when it is added to the DAMON context,
394
	 * or damon_attrs are updated.
395
	 */
396
	scheme->next_apply_sis = 0;
397
	scheme->walk_completed = false;
398
	INIT_LIST_HEAD(&scheme->filters);
399
	INIT_LIST_HEAD(&scheme->ops_filters);
400
	scheme->stat = (struct damos_stat){};
401
	INIT_LIST_HEAD(&scheme->list);
402

403
	scheme->quota = *(damos_quota_init(quota));
404
	/* quota.goals should be separately set by caller */
405
	INIT_LIST_HEAD(&scheme->quota.goals);
406

407
	scheme->wmarks = *wmarks;
408
	scheme->wmarks.activated = true;
409

410
	scheme->migrate_dests = (struct damos_migrate_dests){};
411
	scheme->target_nid = target_nid;
412

413
	return scheme;
414
}
415

416
static void damos_set_next_apply_sis(struct damos *s, struct damon_ctx *ctx)
417
{
418
	unsigned long sample_interval = ctx->attrs.sample_interval ?
419
		ctx->attrs.sample_interval : 1;
420
	unsigned long apply_interval = s->apply_interval_us ?
421
		s->apply_interval_us : ctx->attrs.aggr_interval;
422

423
	s->next_apply_sis = ctx->passed_sample_intervals +
424
		apply_interval / sample_interval;
425
}
426

427
void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
428
{
429
	list_add_tail(&s->list, &ctx->schemes);
430
	damos_set_next_apply_sis(s, ctx);
431
}
432

433
static void damon_del_scheme(struct damos *s)
434
{
435
	list_del(&s->list);
436
}
437

438
static void damon_free_scheme(struct damos *s)
439
{
440
	kfree(s);
441
}
442

443
void damon_destroy_scheme(struct damos *s)
444
{
445
	struct damos_quota_goal *g, *g_next;
446
	struct damos_filter *f, *next;
447

448
	damos_for_each_quota_goal_safe(g, g_next, &s->quota)
449
		damos_destroy_quota_goal(g);
450

451
	damos_for_each_filter_safe(f, next, s)
452
		damos_destroy_filter(f);
453

454
	kfree(s->migrate_dests.node_id_arr);
455
	kfree(s->migrate_dests.weight_arr);
456
	damon_del_scheme(s);
457
	damon_free_scheme(s);
458
}
459

460
/*
461
 * Construct a damon_target struct
462
 *
463
 * Returns the pointer to the new struct if success, or NULL otherwise
464
 */
465
struct damon_target *damon_new_target(void)
466
{
467
	struct damon_target *t;
468

469
	t = kmalloc(sizeof(*t), GFP_KERNEL);
470
	if (!t)
471
		return NULL;
472

473
	t->pid = NULL;
474
	t->nr_regions = 0;
475
	INIT_LIST_HEAD(&t->regions_list);
476
	INIT_LIST_HEAD(&t->list);
477

478
	return t;
479
}
480

481
void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
482
{
483
	list_add_tail(&t->list, &ctx->adaptive_targets);
484
}
485

486
bool damon_targets_empty(struct damon_ctx *ctx)
487
{
488
	return list_empty(&ctx->adaptive_targets);
489
}
490

491
static void damon_del_target(struct damon_target *t)
492
{
493
	list_del(&t->list);
494
}
495

496
void damon_free_target(struct damon_target *t)
497
{
498
	struct damon_region *r, *next;
499

500
	damon_for_each_region_safe(r, next, t)
501
		damon_free_region(r);
502
	kfree(t);
503
}
504

505
void damon_destroy_target(struct damon_target *t, struct damon_ctx *ctx)
506
{
507

508
	if (ctx && ctx->ops.cleanup_target)
509
		ctx->ops.cleanup_target(t);
510

511
	damon_del_target(t);
512
	damon_free_target(t);
513
}
514

515
unsigned int damon_nr_regions(struct damon_target *t)
516
{
517
	return t->nr_regions;
518
}
519

520
struct damon_ctx *damon_new_ctx(void)
521
{
522
	struct damon_ctx *ctx;
523

524
	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
525
	if (!ctx)
526
		return NULL;
527

528
	init_completion(&ctx->kdamond_started);
529

530
	ctx->attrs.sample_interval = 5 * 1000;
531
	ctx->attrs.aggr_interval = 100 * 1000;
532
	ctx->attrs.ops_update_interval = 60 * 1000 * 1000;
533

534
	ctx->passed_sample_intervals = 0;
535
	/* These will be set from kdamond_init_ctx() */
536
	ctx->next_aggregation_sis = 0;
537
	ctx->next_ops_update_sis = 0;
538

539
	mutex_init(&ctx->kdamond_lock);
540
	INIT_LIST_HEAD(&ctx->call_controls);
541
	mutex_init(&ctx->call_controls_lock);
542
	mutex_init(&ctx->walk_control_lock);
543

544
	ctx->attrs.min_nr_regions = 10;
545
	ctx->attrs.max_nr_regions = 1000;
546

547
	INIT_LIST_HEAD(&ctx->adaptive_targets);
548
	INIT_LIST_HEAD(&ctx->schemes);
549

550
	return ctx;
551
}
552

553
static void damon_destroy_targets(struct damon_ctx *ctx)
554
{
555
	struct damon_target *t, *next_t;
556

557
	damon_for_each_target_safe(t, next_t, ctx)
558
		damon_destroy_target(t, ctx);
559
}
560

561
void damon_destroy_ctx(struct damon_ctx *ctx)
562
{
563
	struct damos *s, *next_s;
564

565
	damon_destroy_targets(ctx);
566

567
	damon_for_each_scheme_safe(s, next_s, ctx)
568
		damon_destroy_scheme(s);
569

570
	kfree(ctx);
571
}
572

573
static unsigned int damon_age_for_new_attrs(unsigned int age,
574
		struct damon_attrs *old_attrs, struct damon_attrs *new_attrs)
575
{
576
	return age * old_attrs->aggr_interval / new_attrs->aggr_interval;
577
}
578

579
/* convert access ratio in bp (per 10,000) to nr_accesses */
580
static unsigned int damon_accesses_bp_to_nr_accesses(
581
		unsigned int accesses_bp, struct damon_attrs *attrs)
582
{
583
	return accesses_bp * damon_max_nr_accesses(attrs) / 10000;
584
}
585

586
/*
587
 * Convert nr_accesses to access ratio in bp (per 10,000).
588
 *
589
 * Callers should ensure attrs.aggr_interval is not zero, like
590
 * damon_update_monitoring_results() does .  Otherwise, divide-by-zero would
591
 * happen.
592
 */
593
static unsigned int damon_nr_accesses_to_accesses_bp(
594
		unsigned int nr_accesses, struct damon_attrs *attrs)
595
{
596
	return nr_accesses * 10000 / damon_max_nr_accesses(attrs);
597
}
598

599
static unsigned int damon_nr_accesses_for_new_attrs(unsigned int nr_accesses,
600
		struct damon_attrs *old_attrs, struct damon_attrs *new_attrs)
601
{
602
	return damon_accesses_bp_to_nr_accesses(
603
			damon_nr_accesses_to_accesses_bp(
604
				nr_accesses, old_attrs),
605
			new_attrs);
606
}
607

608
static void damon_update_monitoring_result(struct damon_region *r,
609
		struct damon_attrs *old_attrs, struct damon_attrs *new_attrs,
610
		bool aggregating)
611
{
612
	if (!aggregating) {
613
		r->nr_accesses = damon_nr_accesses_for_new_attrs(
614
				r->nr_accesses, old_attrs, new_attrs);
615
		r->nr_accesses_bp = r->nr_accesses * 10000;
616
	} else {
617
		/*
618
		 * if this is called in the middle of the aggregation, reset
619
		 * the aggregations we made so far for this aggregation
620
		 * interval.  In other words, make the status like
621
		 * kdamond_reset_aggregated() is called.
622
		 */
623
		r->last_nr_accesses = damon_nr_accesses_for_new_attrs(
624
				r->last_nr_accesses, old_attrs, new_attrs);
625
		r->nr_accesses_bp = r->last_nr_accesses * 10000;
626
		r->nr_accesses = 0;
627
	}
628
	r->age = damon_age_for_new_attrs(r->age, old_attrs, new_attrs);
629
}
630

631
/*
632
 * region->nr_accesses is the number of sampling intervals in the last
633
 * aggregation interval that access to the region has found, and region->age is
634
 * the number of aggregation intervals that its access pattern has maintained.
635
 * For the reason, the real meaning of the two fields depend on current
636
 * sampling interval and aggregation interval.  This function updates
637
 * ->nr_accesses and ->age of given damon_ctx's regions for new damon_attrs.
638
 */
639
static void damon_update_monitoring_results(struct damon_ctx *ctx,
640
		struct damon_attrs *new_attrs, bool aggregating)
641
{
642
	struct damon_attrs *old_attrs = &ctx->attrs;
643
	struct damon_target *t;
644
	struct damon_region *r;
645

646
	/* if any interval is zero, simply forgive conversion */
647
	if (!old_attrs->sample_interval || !old_attrs->aggr_interval ||
648
			!new_attrs->sample_interval ||
649
			!new_attrs->aggr_interval)
650
		return;
651

652
	damon_for_each_target(t, ctx)
653
		damon_for_each_region(r, t)
654
			damon_update_monitoring_result(
655
					r, old_attrs, new_attrs, aggregating);
656
}
657

658
/*
659
 * damon_valid_intervals_goal() - return if the intervals goal of @attrs is
660
 * valid.
661
 */
662
static bool damon_valid_intervals_goal(struct damon_attrs *attrs)
663
{
664
	struct damon_intervals_goal *goal = &attrs->intervals_goal;
665

666
	/* tuning is disabled */
667
	if (!goal->aggrs)
668
		return true;
669
	if (goal->min_sample_us > goal->max_sample_us)
670
		return false;
671
	if (attrs->sample_interval < goal->min_sample_us ||
672
			goal->max_sample_us < attrs->sample_interval)
673
		return false;
674
	return true;
675
}
676

677
/**
678
 * damon_set_attrs() - Set attributes for the monitoring.
679
 * @ctx:		monitoring context
680
 * @attrs:		monitoring attributes
681
 *
682
 * This function should be called while the kdamond is not running, an access
683
 * check results aggregation is not ongoing (e.g., from damon_call().
684
 *
685
 * Every time interval is in micro-seconds.
686
 *
687
 * Return: 0 on success, negative error code otherwise.
688
 */
689
int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs)
690
{
691
	unsigned long sample_interval = attrs->sample_interval ?
692
		attrs->sample_interval : 1;
693
	struct damos *s;
694
	bool aggregating = ctx->passed_sample_intervals <
695
		ctx->next_aggregation_sis;
696

697
	if (!damon_valid_intervals_goal(attrs))
698
		return -EINVAL;
699

700
	if (attrs->min_nr_regions < 3)
701
		return -EINVAL;
702
	if (attrs->min_nr_regions > attrs->max_nr_regions)
703
		return -EINVAL;
704
	if (attrs->sample_interval > attrs->aggr_interval)
705
		return -EINVAL;
706

707
	/* calls from core-external doesn't set this. */
708
	if (!attrs->aggr_samples)
709
		attrs->aggr_samples = attrs->aggr_interval / sample_interval;
710

711
	ctx->next_aggregation_sis = ctx->passed_sample_intervals +
712
		attrs->aggr_interval / sample_interval;
713
	ctx->next_ops_update_sis = ctx->passed_sample_intervals +
714
		attrs->ops_update_interval / sample_interval;
715

716
	damon_update_monitoring_results(ctx, attrs, aggregating);
717
	ctx->attrs = *attrs;
718

719
	damon_for_each_scheme(s, ctx)
720
		damos_set_next_apply_sis(s, ctx);
721

722
	return 0;
723
}
724

725
/**
726
 * damon_set_schemes() - Set data access monitoring based operation schemes.
727
 * @ctx:	monitoring context
728
 * @schemes:	array of the schemes
729
 * @nr_schemes:	number of entries in @schemes
730
 *
731
 * This function should not be called while the kdamond of the context is
732
 * running.
733
 */
734
void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
735
			ssize_t nr_schemes)
736
{
737
	struct damos *s, *next;
738
	ssize_t i;
739

740
	damon_for_each_scheme_safe(s, next, ctx)
741
		damon_destroy_scheme(s);
742
	for (i = 0; i < nr_schemes; i++)
743
		damon_add_scheme(ctx, schemes[i]);
744
}
745

746
static struct damos_quota_goal *damos_nth_quota_goal(
747
		int n, struct damos_quota *q)
748
{
749
	struct damos_quota_goal *goal;
750
	int i = 0;
751

752
	damos_for_each_quota_goal(goal, q) {
753
		if (i++ == n)
754
			return goal;
755
	}
756
	return NULL;
757
}
758

759
static void damos_commit_quota_goal_union(
760
		struct damos_quota_goal *dst, struct damos_quota_goal *src)
761
{
762
	switch (dst->metric) {
763
	case DAMOS_QUOTA_NODE_MEM_USED_BP:
764
	case DAMOS_QUOTA_NODE_MEM_FREE_BP:
765
		dst->nid = src->nid;
766
		break;
767
	default:
768
		break;
769
	}
770
}
771

772
static void damos_commit_quota_goal(
773
		struct damos_quota_goal *dst, struct damos_quota_goal *src)
774
{
775
	dst->metric = src->metric;
776
	dst->target_value = src->target_value;
777
	if (dst->metric == DAMOS_QUOTA_USER_INPUT)
778
		dst->current_value = src->current_value;
779
	/* keep last_psi_total as is, since it will be updated in next cycle */
780
	damos_commit_quota_goal_union(dst, src);
781
}
782

783
/**
784
 * damos_commit_quota_goals() - Commit DAMOS quota goals to another quota.
785
 * @dst:	The commit destination DAMOS quota.
786
 * @src:	The commit source DAMOS quota.
787
 *
788
 * Copies user-specified parameters for quota goals from @src to @dst.  Users
789
 * should use this function for quota goals-level parameters update of running
790
 * DAMON contexts, instead of manual in-place updates.
791
 *
792
 * This function should be called from parameters-update safe context, like
793
 * damon_call().
794
 */
795
int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src)
796
{
797
	struct damos_quota_goal *dst_goal, *next, *src_goal, *new_goal;
798
	int i = 0, j = 0;
799

800
	damos_for_each_quota_goal_safe(dst_goal, next, dst) {
801
		src_goal = damos_nth_quota_goal(i++, src);
802
		if (src_goal)
803
			damos_commit_quota_goal(dst_goal, src_goal);
804
		else
805
			damos_destroy_quota_goal(dst_goal);
806
	}
807
	damos_for_each_quota_goal_safe(src_goal, next, src) {
808
		if (j++ < i)
809
			continue;
810
		new_goal = damos_new_quota_goal(
811
				src_goal->metric, src_goal->target_value);
812
		if (!new_goal)
813
			return -ENOMEM;
814
		damos_commit_quota_goal_union(new_goal, src_goal);
815
		damos_add_quota_goal(dst, new_goal);
816
	}
817
	return 0;
818
}
819

820
static int damos_commit_quota(struct damos_quota *dst, struct damos_quota *src)
821
{
822
	int err;
823

824
	dst->reset_interval = src->reset_interval;
825
	dst->ms = src->ms;
826
	dst->sz = src->sz;
827
	err = damos_commit_quota_goals(dst, src);
828
	if (err)
829
		return err;
830
	dst->weight_sz = src->weight_sz;
831
	dst->weight_nr_accesses = src->weight_nr_accesses;
832
	dst->weight_age = src->weight_age;
833
	return 0;
834
}
835

836
static struct damos_filter *damos_nth_filter(int n, struct damos *s)
837
{
838
	struct damos_filter *filter;
839
	int i = 0;
840

841
	damos_for_each_filter(filter, s) {
842
		if (i++ == n)
843
			return filter;
844
	}
845
	return NULL;
846
}
847

848
static struct damos_filter *damos_nth_ops_filter(int n, struct damos *s)
849
{
850
	struct damos_filter *filter;
851
	int i = 0;
852

853
	damos_for_each_ops_filter(filter, s) {
854
		if (i++ == n)
855
			return filter;
856
	}
857
	return NULL;
858
}
859

860
static void damos_commit_filter_arg(
861
		struct damos_filter *dst, struct damos_filter *src)
862
{
863
	switch (dst->type) {
864
	case DAMOS_FILTER_TYPE_MEMCG:
865
		dst->memcg_id = src->memcg_id;
866
		break;
867
	case DAMOS_FILTER_TYPE_ADDR:
868
		dst->addr_range = src->addr_range;
869
		break;
870
	case DAMOS_FILTER_TYPE_TARGET:
871
		dst->target_idx = src->target_idx;
872
		break;
873
	case DAMOS_FILTER_TYPE_HUGEPAGE_SIZE:
874
		dst->sz_range = src->sz_range;
875
		break;
876
	default:
877
		break;
878
	}
879
}
880

881
static void damos_commit_filter(
882
		struct damos_filter *dst, struct damos_filter *src)
883
{
884
	dst->type = src->type;
885
	dst->matching = src->matching;
886
	dst->allow = src->allow;
887
	damos_commit_filter_arg(dst, src);
888
}
889

890
static int damos_commit_core_filters(struct damos *dst, struct damos *src)
891
{
892
	struct damos_filter *dst_filter, *next, *src_filter, *new_filter;
893
	int i = 0, j = 0;
894

895
	damos_for_each_filter_safe(dst_filter, next, dst) {
896
		src_filter = damos_nth_filter(i++, src);
897
		if (src_filter)
898
			damos_commit_filter(dst_filter, src_filter);
899
		else
900
			damos_destroy_filter(dst_filter);
901
	}
902

903
	damos_for_each_filter_safe(src_filter, next, src) {
904
		if (j++ < i)
905
			continue;
906

907
		new_filter = damos_new_filter(
908
				src_filter->type, src_filter->matching,
909
				src_filter->allow);
910
		if (!new_filter)
911
			return -ENOMEM;
912
		damos_commit_filter_arg(new_filter, src_filter);
913
		damos_add_filter(dst, new_filter);
914
	}
915
	return 0;
916
}
917

918
static int damos_commit_ops_filters(struct damos *dst, struct damos *src)
919
{
920
	struct damos_filter *dst_filter, *next, *src_filter, *new_filter;
921
	int i = 0, j = 0;
922

923
	damos_for_each_ops_filter_safe(dst_filter, next, dst) {
924
		src_filter = damos_nth_ops_filter(i++, src);
925
		if (src_filter)
926
			damos_commit_filter(dst_filter, src_filter);
927
		else
928
			damos_destroy_filter(dst_filter);
929
	}
930

931
	damos_for_each_ops_filter_safe(src_filter, next, src) {
932
		if (j++ < i)
933
			continue;
934

935
		new_filter = damos_new_filter(
936
				src_filter->type, src_filter->matching,
937
				src_filter->allow);
938
		if (!new_filter)
939
			return -ENOMEM;
940
		damos_commit_filter_arg(new_filter, src_filter);
941
		damos_add_filter(dst, new_filter);
942
	}
943
	return 0;
944
}
945

946
/**
947
 * damos_filters_default_reject() - decide whether to reject memory that didn't
948
 *				    match with any given filter.
949
 * @filters:	Given DAMOS filters of a group.
950
 */
951
static bool damos_filters_default_reject(struct list_head *filters)
952
{
953
	struct damos_filter *last_filter;
954

955
	if (list_empty(filters))
956
		return false;
957
	last_filter = list_last_entry(filters, struct damos_filter, list);
958
	return last_filter->allow;
959
}
960

961
static void damos_set_filters_default_reject(struct damos *s)
962
{
963
	if (!list_empty(&s->ops_filters))
964
		s->core_filters_default_reject = false;
965
	else
966
		s->core_filters_default_reject =
967
			damos_filters_default_reject(&s->filters);
968
	s->ops_filters_default_reject =
969
		damos_filters_default_reject(&s->ops_filters);
970
}
971

972
static int damos_commit_dests(struct damos *dst, struct damos *src)
973
{
974
	struct damos_migrate_dests *dst_dests, *src_dests;
975

976
	dst_dests = &dst->migrate_dests;
977
	src_dests = &src->migrate_dests;
978

979
	if (dst_dests->nr_dests != src_dests->nr_dests) {
980
		kfree(dst_dests->node_id_arr);
981
		kfree(dst_dests->weight_arr);
982

983
		dst_dests->node_id_arr = kmalloc_array(src_dests->nr_dests,
984
			sizeof(*dst_dests->node_id_arr), GFP_KERNEL);
985
		if (!dst_dests->node_id_arr) {
986
			dst_dests->weight_arr = NULL;
987
			return -ENOMEM;
988
		}
989

990
		dst_dests->weight_arr = kmalloc_array(src_dests->nr_dests,
991
			sizeof(*dst_dests->weight_arr), GFP_KERNEL);
992
		if (!dst_dests->weight_arr) {
993
			/* ->node_id_arr will be freed by scheme destruction */
994
			return -ENOMEM;
995
		}
996
	}
997

998
	dst_dests->nr_dests = src_dests->nr_dests;
999
	for (int i = 0; i < src_dests->nr_dests; i++) {
1000
		dst_dests->node_id_arr[i] = src_dests->node_id_arr[i];
1001
		dst_dests->weight_arr[i] = src_dests->weight_arr[i];
1002
	}
1003

1004
	return 0;
1005
}
1006

1007
static int damos_commit_filters(struct damos *dst, struct damos *src)
1008
{
1009
	int err;
1010

1011
	err = damos_commit_core_filters(dst, src);
1012
	if (err)
1013
		return err;
1014
	err = damos_commit_ops_filters(dst, src);
1015
	if (err)
1016
		return err;
1017
	damos_set_filters_default_reject(dst);
1018
	return 0;
1019
}
1020

1021
static struct damos *damon_nth_scheme(int n, struct damon_ctx *ctx)
1022
{
1023
	struct damos *s;
1024
	int i = 0;
1025

1026
	damon_for_each_scheme(s, ctx) {
1027
		if (i++ == n)
1028
			return s;
1029
	}
1030
	return NULL;
1031
}
1032

1033
static int damos_commit(struct damos *dst, struct damos *src)
1034
{
1035
	int err;
1036

1037
	dst->pattern = src->pattern;
1038
	dst->action = src->action;
1039
	dst->apply_interval_us = src->apply_interval_us;
1040

1041
	err = damos_commit_quota(&dst->quota, &src->quota);
1042
	if (err)
1043
		return err;
1044

1045
	dst->wmarks = src->wmarks;
1046
	dst->target_nid = src->target_nid;
1047

1048
	err = damos_commit_dests(dst, src);
1049
	if (err)
1050
		return err;
1051

1052
	err = damos_commit_filters(dst, src);
1053
	return err;
1054
}
1055

1056
static int damon_commit_schemes(struct damon_ctx *dst, struct damon_ctx *src)
1057
{
1058
	struct damos *dst_scheme, *next, *src_scheme, *new_scheme;
1059
	int i = 0, j = 0, err;
1060

1061
	damon_for_each_scheme_safe(dst_scheme, next, dst) {
1062
		src_scheme = damon_nth_scheme(i++, src);
1063
		if (src_scheme) {
1064
			err = damos_commit(dst_scheme, src_scheme);
1065
			if (err)
1066
				return err;
1067
		} else {
1068
			damon_destroy_scheme(dst_scheme);
1069
		}
1070
	}
1071

1072
	damon_for_each_scheme_safe(src_scheme, next, src) {
1073
		if (j++ < i)
1074
			continue;
1075
		new_scheme = damon_new_scheme(&src_scheme->pattern,
1076
				src_scheme->action,
1077
				src_scheme->apply_interval_us,
1078
				&src_scheme->quota, &src_scheme->wmarks,
1079
				NUMA_NO_NODE);
1080
		if (!new_scheme)
1081
			return -ENOMEM;
1082
		err = damos_commit(new_scheme, src_scheme);
1083
		if (err) {
1084
			damon_destroy_scheme(new_scheme);
1085
			return err;
1086
		}
1087
		damon_add_scheme(dst, new_scheme);
1088
	}
1089
	return 0;
1090
}
1091

1092
static struct damon_target *damon_nth_target(int n, struct damon_ctx *ctx)
1093
{
1094
	struct damon_target *t;
1095
	int i = 0;
1096

1097
	damon_for_each_target(t, ctx) {
1098
		if (i++ == n)
1099
			return t;
1100
	}
1101
	return NULL;
1102
}
1103

1104
/*
1105
 * The caller should ensure the regions of @src are
1106
 * 1. valid (end >= src) and
1107
 * 2. sorted by starting address.
1108
 *
1109
 * If @src has no region, @dst keeps current regions.
1110
 */
1111
static int damon_commit_target_regions(
1112
		struct damon_target *dst, struct damon_target *src)
1113
{
1114
	struct damon_region *src_region;
1115
	struct damon_addr_range *ranges;
1116
	int i = 0, err;
1117

1118
	damon_for_each_region(src_region, src)
1119
		i++;
1120
	if (!i)
1121
		return 0;
1122

1123
	ranges = kmalloc_array(i, sizeof(*ranges), GFP_KERNEL | __GFP_NOWARN);
1124
	if (!ranges)
1125
		return -ENOMEM;
1126
	i = 0;
1127
	damon_for_each_region(src_region, src)
1128
		ranges[i++] = src_region->ar;
1129
	err = damon_set_regions(dst, ranges, i);
1130
	kfree(ranges);
1131
	return err;
1132
}
1133

1134
static int damon_commit_target(
1135
		struct damon_target *dst, bool dst_has_pid,
1136
		struct damon_target *src, bool src_has_pid)
1137
{
1138
	int err;
1139

1140
	err = damon_commit_target_regions(dst, src);
1141
	if (err)
1142
		return err;
1143
	if (dst_has_pid)
1144
		put_pid(dst->pid);
1145
	if (src_has_pid)
1146
		get_pid(src->pid);
1147
	dst->pid = src->pid;
1148
	return 0;
1149
}
1150

1151
static int damon_commit_targets(
1152
		struct damon_ctx *dst, struct damon_ctx *src)
1153
{
1154
	struct damon_target *dst_target, *next, *src_target, *new_target;
1155
	int i = 0, j = 0, err;
1156

1157
	damon_for_each_target_safe(dst_target, next, dst) {
1158
		src_target = damon_nth_target(i++, src);
1159
		if (src_target) {
1160
			err = damon_commit_target(
1161
					dst_target, damon_target_has_pid(dst),
1162
					src_target, damon_target_has_pid(src));
1163
			if (err)
1164
				return err;
1165
		} else {
1166
			struct damos *s;
1167

1168
			damon_destroy_target(dst_target, dst);
1169
			damon_for_each_scheme(s, dst) {
1170
				if (s->quota.charge_target_from == dst_target) {
1171
					s->quota.charge_target_from = NULL;
1172
					s->quota.charge_addr_from = 0;
1173
				}
1174
			}
1175
		}
1176
	}
1177

1178
	damon_for_each_target_safe(src_target, next, src) {
1179
		if (j++ < i)
1180
			continue;
1181
		new_target = damon_new_target();
1182
		if (!new_target)
1183
			return -ENOMEM;
1184
		err = damon_commit_target(new_target, false,
1185
				src_target, damon_target_has_pid(src));
1186
		if (err) {
1187
			damon_destroy_target(new_target, NULL);
1188
			return err;
1189
		}
1190
		damon_add_target(dst, new_target);
1191
	}
1192
	return 0;
1193
}
1194

1195
/**
1196
 * damon_commit_ctx() - Commit parameters of a DAMON context to another.
1197
 * @dst:	The commit destination DAMON context.
1198
 * @src:	The commit source DAMON context.
1199
 *
1200
 * This function copies user-specified parameters from @src to @dst and update
1201
 * the internal status and results accordingly.  Users should use this function
1202
 * for context-level parameters update of running context, instead of manual
1203
 * in-place updates.
1204
 *
1205
 * This function should be called from parameters-update safe context, like
1206
 * damon_call().
1207
 */
1208
int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src)
1209
{
1210
	int err;
1211

1212
	err = damon_commit_schemes(dst, src);
1213
	if (err)
1214
		return err;
1215
	err = damon_commit_targets(dst, src);
1216
	if (err)
1217
		return err;
1218
	/*
1219
	 * schemes and targets should be updated first, since
1220
	 * 1. damon_set_attrs() updates monitoring results of targets and
1221
	 * next_apply_sis of schemes, and
1222
	 * 2. ops update should be done after pid handling is done (target
1223
	 *    committing require putting pids).
1224
	 */
1225
	err = damon_set_attrs(dst, &src->attrs);
1226
	if (err)
1227
		return err;
1228
	dst->ops = src->ops;
1229

1230
	return 0;
1231
}
1232

1233
/**
1234
 * damon_nr_running_ctxs() - Return number of currently running contexts.
1235
 */
1236
int damon_nr_running_ctxs(void)
1237
{
1238
	int nr_ctxs;
1239

1240
	mutex_lock(&damon_lock);
1241
	nr_ctxs = nr_running_ctxs;
1242
	mutex_unlock(&damon_lock);
1243

1244
	return nr_ctxs;
1245
}
1246

1247
/* Returns the size upper limit for each monitoring region */
1248
static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
1249
{
1250
	struct damon_target *t;
1251
	struct damon_region *r;
1252
	unsigned long sz = 0;
1253

1254
	damon_for_each_target(t, ctx) {
1255
		damon_for_each_region(r, t)
1256
			sz += damon_sz_region(r);
1257
	}
1258

1259
	if (ctx->attrs.min_nr_regions)
1260
		sz /= ctx->attrs.min_nr_regions;
1261
	if (sz < DAMON_MIN_REGION)
1262
		sz = DAMON_MIN_REGION;
1263

1264
	return sz;
1265
}
1266

1267
static int kdamond_fn(void *data);
1268

1269
/*
1270
 * __damon_start() - Starts monitoring with given context.
1271
 * @ctx:	monitoring context
1272
 *
1273
 * This function should be called while damon_lock is hold.
1274
 *
1275
 * Return: 0 on success, negative error code otherwise.
1276
 */
1277
static int __damon_start(struct damon_ctx *ctx)
1278
{
1279
	int err = -EBUSY;
1280

1281
	mutex_lock(&ctx->kdamond_lock);
1282
	if (!ctx->kdamond) {
1283
		err = 0;
1284
		reinit_completion(&ctx->kdamond_started);
1285
		ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
1286
				nr_running_ctxs);
1287
		if (IS_ERR(ctx->kdamond)) {
1288
			err = PTR_ERR(ctx->kdamond);
1289
			ctx->kdamond = NULL;
1290
		} else {
1291
			wait_for_completion(&ctx->kdamond_started);
1292
		}
1293
	}
1294
	mutex_unlock(&ctx->kdamond_lock);
1295

1296
	return err;
1297
}
1298

1299
/**
1300
 * damon_start() - Starts the monitorings for a given group of contexts.
1301
 * @ctxs:	an array of the pointers for contexts to start monitoring
1302
 * @nr_ctxs:	size of @ctxs
1303
 * @exclusive:	exclusiveness of this contexts group
1304
 *
1305
 * This function starts a group of monitoring threads for a group of monitoring
1306
 * contexts.  One thread per each context is created and run in parallel.  The
1307
 * caller should handle synchronization between the threads by itself.  If
1308
 * @exclusive is true and a group of threads that created by other
1309
 * 'damon_start()' call is currently running, this function does nothing but
1310
 * returns -EBUSY.
1311
 *
1312
 * Return: 0 on success, negative error code otherwise.
1313
 */
1314
int damon_start(struct damon_ctx **ctxs, int nr_ctxs, bool exclusive)
1315
{
1316
	int i;
1317
	int err = 0;
1318

1319
	mutex_lock(&damon_lock);
1320
	if ((exclusive && nr_running_ctxs) ||
1321
			(!exclusive && running_exclusive_ctxs)) {
1322
		mutex_unlock(&damon_lock);
1323
		return -EBUSY;
1324
	}
1325

1326
	for (i = 0; i < nr_ctxs; i++) {
1327
		err = __damon_start(ctxs[i]);
1328
		if (err)
1329
			break;
1330
		nr_running_ctxs++;
1331
	}
1332
	if (exclusive && nr_running_ctxs)
1333
		running_exclusive_ctxs = true;
1334
	mutex_unlock(&damon_lock);
1335

1336
	return err;
1337
}
1338

1339
/*
1340
 * __damon_stop() - Stops monitoring of a given context.
1341
 * @ctx:	monitoring context
1342
 *
1343
 * Return: 0 on success, negative error code otherwise.
1344
 */
1345
static int __damon_stop(struct damon_ctx *ctx)
1346
{
1347
	struct task_struct *tsk;
1348

1349
	mutex_lock(&ctx->kdamond_lock);
1350
	tsk = ctx->kdamond;
1351
	if (tsk) {
1352
		get_task_struct(tsk);
1353
		mutex_unlock(&ctx->kdamond_lock);
1354
		kthread_stop_put(tsk);
1355
		return 0;
1356
	}
1357
	mutex_unlock(&ctx->kdamond_lock);
1358

1359
	return -EPERM;
1360
}
1361

1362
/**
1363
 * damon_stop() - Stops the monitorings for a given group of contexts.
1364
 * @ctxs:	an array of the pointers for contexts to stop monitoring
1365
 * @nr_ctxs:	size of @ctxs
1366
 *
1367
 * Return: 0 on success, negative error code otherwise.
1368
 */
1369
int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
1370
{
1371
	int i, err = 0;
1372

1373
	for (i = 0; i < nr_ctxs; i++) {
1374
		/* nr_running_ctxs is decremented in kdamond_fn */
1375
		err = __damon_stop(ctxs[i]);
1376
		if (err)
1377
			break;
1378
	}
1379
	return err;
1380
}
1381

1382
/**
1383
 * damon_is_running() - Returns if a given DAMON context is running.
1384
 * @ctx:	The DAMON context to see if running.
1385
 *
1386
 * Return: true if @ctx is running, false otherwise.
1387
 */
1388
bool damon_is_running(struct damon_ctx *ctx)
1389
{
1390
	bool running;
1391

1392
	mutex_lock(&ctx->kdamond_lock);
1393
	running = ctx->kdamond != NULL;
1394
	mutex_unlock(&ctx->kdamond_lock);
1395
	return running;
1396
}
1397

1398
/**
1399
 * damon_call() - Invoke a given function on DAMON worker thread (kdamond).
1400
 * @ctx:	DAMON context to call the function for.
1401
 * @control:	Control variable of the call request.
1402
 *
1403
 * Ask DAMON worker thread (kdamond) of @ctx to call a function with an
1404
 * argument data that respectively passed via &damon_call_control->fn and
1405
 * &damon_call_control->data of @control.  If &damon_call_control->repeat of
1406
 * @control is set, further wait until the kdamond finishes handling of the
1407
 * request.  Otherwise, return as soon as the request is made.
1408
 *
1409
 * The kdamond executes the function with the argument in the main loop, just
1410
 * after a sampling of the iteration is finished.  The function can hence
1411
 * safely access the internal data of the &struct damon_ctx without additional
1412
 * synchronization.  The return value of the function will be saved in
1413
 * &damon_call_control->return_code.
1414
 *
1415
 * Return: 0 on success, negative error code otherwise.
1416
 */
1417
int damon_call(struct damon_ctx *ctx, struct damon_call_control *control)
1418
{
1419
	if (!control->repeat)
1420
		init_completion(&control->completion);
1421
	control->canceled = false;
1422
	INIT_LIST_HEAD(&control->list);
1423

1424
	mutex_lock(&ctx->call_controls_lock);
1425
	list_add_tail(&ctx->call_controls, &control->list);
1426
	mutex_unlock(&ctx->call_controls_lock);
1427
	if (!damon_is_running(ctx))
1428
		return -EINVAL;
1429
	if (control->repeat)
1430
		return 0;
1431
	wait_for_completion(&control->completion);
1432
	if (control->canceled)
1433
		return -ECANCELED;
1434
	return 0;
1435
}
1436

1437
/**
1438
 * damos_walk() - Invoke a given functions while DAMOS walk regions.
1439
 * @ctx:	DAMON context to call the functions for.
1440
 * @control:	Control variable of the walk request.
1441
 *
1442
 * Ask DAMON worker thread (kdamond) of @ctx to call a function for each region
1443
 * that the kdamond will apply DAMOS action to, and wait until the kdamond
1444
 * finishes handling of the request.
1445
 *
1446
 * The kdamond executes the given function in the main loop, for each region
1447
 * just after it applied any DAMOS actions of @ctx to it.  The invocation is
1448
 * made only within one &damos->apply_interval_us since damos_walk()
1449
 * invocation, for each scheme.  The given callback function can hence safely
1450
 * access the internal data of &struct damon_ctx and &struct damon_region that
1451
 * each of the scheme will apply the action for next interval, without
1452
 * additional synchronizations against the kdamond.  If every scheme of @ctx
1453
 * passed at least one &damos->apply_interval_us, kdamond marks the request as
1454
 * completed so that damos_walk() can wakeup and return.
1455
 *
1456
 * Return: 0 on success, negative error code otherwise.
1457
 */
1458
int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control)
1459
{
1460
	init_completion(&control->completion);
1461
	control->canceled = false;
1462
	mutex_lock(&ctx->walk_control_lock);
1463
	if (ctx->walk_control) {
1464
		mutex_unlock(&ctx->walk_control_lock);
1465
		return -EBUSY;
1466
	}
1467
	ctx->walk_control = control;
1468
	mutex_unlock(&ctx->walk_control_lock);
1469
	if (!damon_is_running(ctx))
1470
		return -EINVAL;
1471
	wait_for_completion(&control->completion);
1472
	if (control->canceled)
1473
		return -ECANCELED;
1474
	return 0;
1475
}
1476

1477
/*
1478
 * Warn and fix corrupted ->nr_accesses[_bp] for investigations and preventing
1479
 * the problem being propagated.
1480
 */
1481
static void damon_warn_fix_nr_accesses_corruption(struct damon_region *r)
1482
{
1483
	if (r->nr_accesses_bp == r->nr_accesses * 10000)
1484
		return;
1485
	WARN_ONCE(true, "invalid nr_accesses_bp at reset: %u %u\n",
1486
			r->nr_accesses_bp, r->nr_accesses);
1487
	r->nr_accesses_bp = r->nr_accesses * 10000;
1488
}
1489

1490
/*
1491
 * Reset the aggregated monitoring results ('nr_accesses' of each region).
1492
 */
1493
static void kdamond_reset_aggregated(struct damon_ctx *c)
1494
{
1495
	struct damon_target *t;
1496
	unsigned int ti = 0;	/* target's index */
1497

1498
	damon_for_each_target(t, c) {
1499
		struct damon_region *r;
1500

1501
		damon_for_each_region(r, t) {
1502
			trace_damon_aggregated(ti, r, damon_nr_regions(t));
1503
			damon_warn_fix_nr_accesses_corruption(r);
1504
			r->last_nr_accesses = r->nr_accesses;
1505
			r->nr_accesses = 0;
1506
		}
1507
		ti++;
1508
	}
1509
}
1510

1511
static unsigned long damon_get_intervals_score(struct damon_ctx *c)
1512
{
1513
	struct damon_target *t;
1514
	struct damon_region *r;
1515
	unsigned long sz_region, max_access_events = 0, access_events = 0;
1516
	unsigned long target_access_events;
1517
	unsigned long goal_bp = c->attrs.intervals_goal.access_bp;
1518

1519
	damon_for_each_target(t, c) {
1520
		damon_for_each_region(r, t) {
1521
			sz_region = damon_sz_region(r);
1522
			max_access_events += sz_region * c->attrs.aggr_samples;
1523
			access_events += sz_region * r->nr_accesses;
1524
		}
1525
	}
1526
	target_access_events = max_access_events * goal_bp / 10000;
1527
	target_access_events = target_access_events ? : 1;
1528
	return access_events * 10000 / target_access_events;
1529
}
1530

1531
static unsigned long damon_feed_loop_next_input(unsigned long last_input,
1532
		unsigned long score);
1533

1534
static unsigned long damon_get_intervals_adaptation_bp(struct damon_ctx *c)
1535
{
1536
	unsigned long score_bp, adaptation_bp;
1537

1538
	score_bp = damon_get_intervals_score(c);
1539
	adaptation_bp = damon_feed_loop_next_input(100000000, score_bp) /
1540
		10000;
1541
	/*
1542
	 * adaptaion_bp ranges from 1 to 20,000.  Avoid too rapid reduction of
1543
	 * the intervals by rescaling [1,10,000] to [5000, 10,000].
1544
	 */
1545
	if (adaptation_bp <= 10000)
1546
		adaptation_bp = 5000 + adaptation_bp / 2;
1547
	return adaptation_bp;
1548
}
1549

1550
static void kdamond_tune_intervals(struct damon_ctx *c)
1551
{
1552
	unsigned long adaptation_bp;
1553
	struct damon_attrs new_attrs;
1554
	struct damon_intervals_goal *goal;
1555

1556
	adaptation_bp = damon_get_intervals_adaptation_bp(c);
1557
	if (adaptation_bp == 10000)
1558
		return;
1559

1560
	new_attrs = c->attrs;
1561
	goal = &c->attrs.intervals_goal;
1562
	new_attrs.sample_interval = min(goal->max_sample_us,
1563
			c->attrs.sample_interval * adaptation_bp / 10000);
1564
	new_attrs.sample_interval = max(goal->min_sample_us,
1565
			new_attrs.sample_interval);
1566
	new_attrs.aggr_interval = new_attrs.sample_interval *
1567
		c->attrs.aggr_samples;
1568
	trace_damon_monitor_intervals_tune(new_attrs.sample_interval);
1569
	damon_set_attrs(c, &new_attrs);
1570
}
1571

1572
static void damon_split_region_at(struct damon_target *t,
1573
				  struct damon_region *r, unsigned long sz_r);
1574

1575
static bool __damos_valid_target(struct damon_region *r, struct damos *s)
1576
{
1577
	unsigned long sz;
1578
	unsigned int nr_accesses = r->nr_accesses_bp / 10000;
1579

1580
	sz = damon_sz_region(r);
1581
	return s->pattern.min_sz_region <= sz &&
1582
		sz <= s->pattern.max_sz_region &&
1583
		s->pattern.min_nr_accesses <= nr_accesses &&
1584
		nr_accesses <= s->pattern.max_nr_accesses &&
1585
		s->pattern.min_age_region <= r->age &&
1586
		r->age <= s->pattern.max_age_region;
1587
}
1588

1589
static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
1590
		struct damon_region *r, struct damos *s)
1591
{
1592
	bool ret = __damos_valid_target(r, s);
1593

1594
	if (!ret || !s->quota.esz || !c->ops.get_scheme_score)
1595
		return ret;
1596

1597
	return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
1598
}
1599

1600
/*
1601
 * damos_skip_charged_region() - Check if the given region or starting part of
1602
 * it is already charged for the DAMOS quota.
1603
 * @t:	The target of the region.
1604
 * @rp:	The pointer to the region.
1605
 * @s:	The scheme to be applied.
1606
 *
1607
 * If a quota of a scheme has exceeded in a quota charge window, the scheme's
1608
 * action would applied to only a part of the target access pattern fulfilling
1609
 * regions.  To avoid applying the scheme action to only already applied
1610
 * regions, DAMON skips applying the scheme action to the regions that charged
1611
 * in the previous charge window.
1612
 *
1613
 * This function checks if a given region should be skipped or not for the
1614
 * reason.  If only the starting part of the region has previously charged,
1615
 * this function splits the region into two so that the second one covers the
1616
 * area that not charged in the previous charge widnow and saves the second
1617
 * region in *rp and returns false, so that the caller can apply DAMON action
1618
 * to the second one.
1619
 *
1620
 * Return: true if the region should be entirely skipped, false otherwise.
1621
 */
1622
static bool damos_skip_charged_region(struct damon_target *t,
1623
		struct damon_region **rp, struct damos *s)
1624
{
1625
	struct damon_region *r = *rp;
1626
	struct damos_quota *quota = &s->quota;
1627
	unsigned long sz_to_skip;
1628

1629
	/* Skip previously charged regions */
1630
	if (quota->charge_target_from) {
1631
		if (t != quota->charge_target_from)
1632
			return true;
1633
		if (r == damon_last_region(t)) {
1634
			quota->charge_target_from = NULL;
1635
			quota->charge_addr_from = 0;
1636
			return true;
1637
		}
1638
		if (quota->charge_addr_from &&
1639
				r->ar.end <= quota->charge_addr_from)
1640
			return true;
1641

1642
		if (quota->charge_addr_from && r->ar.start <
1643
				quota->charge_addr_from) {
1644
			sz_to_skip = ALIGN_DOWN(quota->charge_addr_from -
1645
					r->ar.start, DAMON_MIN_REGION);
1646
			if (!sz_to_skip) {
1647
				if (damon_sz_region(r) <= DAMON_MIN_REGION)
1648
					return true;
1649
				sz_to_skip = DAMON_MIN_REGION;
1650
			}
1651
			damon_split_region_at(t, r, sz_to_skip);
1652
			r = damon_next_region(r);
1653
			*rp = r;
1654
		}
1655
		quota->charge_target_from = NULL;
1656
		quota->charge_addr_from = 0;
1657
	}
1658
	return false;
1659
}
1660

1661
static void damos_update_stat(struct damos *s,
1662
		unsigned long sz_tried, unsigned long sz_applied,
1663
		unsigned long sz_ops_filter_passed)
1664
{
1665
	s->stat.nr_tried++;
1666
	s->stat.sz_tried += sz_tried;
1667
	if (sz_applied)
1668
		s->stat.nr_applied++;
1669
	s->stat.sz_applied += sz_applied;
1670
	s->stat.sz_ops_filter_passed += sz_ops_filter_passed;
1671
}
1672

1673
static bool damos_filter_match(struct damon_ctx *ctx, struct damon_target *t,
1674
		struct damon_region *r, struct damos_filter *filter)
1675
{
1676
	bool matched = false;
1677
	struct damon_target *ti;
1678
	int target_idx = 0;
1679
	unsigned long start, end;
1680

1681
	switch (filter->type) {
1682
	case DAMOS_FILTER_TYPE_TARGET:
1683
		damon_for_each_target(ti, ctx) {
1684
			if (ti == t)
1685
				break;
1686
			target_idx++;
1687
		}
1688
		matched = target_idx == filter->target_idx;
1689
		break;
1690
	case DAMOS_FILTER_TYPE_ADDR:
1691
		start = ALIGN_DOWN(filter->addr_range.start, DAMON_MIN_REGION);
1692
		end = ALIGN_DOWN(filter->addr_range.end, DAMON_MIN_REGION);
1693

1694
		/* inside the range */
1695
		if (start <= r->ar.start && r->ar.end <= end) {
1696
			matched = true;
1697
			break;
1698
		}
1699
		/* outside of the range */
1700
		if (r->ar.end <= start || end <= r->ar.start) {
1701
			matched = false;
1702
			break;
1703
		}
1704
		/* start before the range and overlap */
1705
		if (r->ar.start < start) {
1706
			damon_split_region_at(t, r, start - r->ar.start);
1707
			matched = false;
1708
			break;
1709
		}
1710
		/* start inside the range */
1711
		damon_split_region_at(t, r, end - r->ar.start);
1712
		matched = true;
1713
		break;
1714
	default:
1715
		return false;
1716
	}
1717

1718
	return matched == filter->matching;
1719
}
1720

1721
static bool damos_filter_out(struct damon_ctx *ctx, struct damon_target *t,
1722
		struct damon_region *r, struct damos *s)
1723
{
1724
	struct damos_filter *filter;
1725

1726
	s->core_filters_allowed = false;
1727
	damos_for_each_filter(filter, s) {
1728
		if (damos_filter_match(ctx, t, r, filter)) {
1729
			if (filter->allow)
1730
				s->core_filters_allowed = true;
1731
			return !filter->allow;
1732
		}
1733
	}
1734
	return s->core_filters_default_reject;
1735
}
1736

1737
/*
1738
 * damos_walk_call_walk() - Call &damos_walk_control->walk_fn.
1739
 * @ctx:	The context of &damon_ctx->walk_control.
1740
 * @t:		The monitoring target of @r that @s will be applied.
1741
 * @r:		The region of @t that @s will be applied.
1742
 * @s:		The scheme of @ctx that will be applied to @r.
1743
 *
1744
 * This function is called from kdamond whenever it asked the operation set to
1745
 * apply a DAMOS scheme action to a region.  If a DAMOS walk request is
1746
 * installed by damos_walk() and not yet uninstalled, invoke it.
1747
 */
1748
static void damos_walk_call_walk(struct damon_ctx *ctx, struct damon_target *t,
1749
		struct damon_region *r, struct damos *s,
1750
		unsigned long sz_filter_passed)
1751
{
1752
	struct damos_walk_control *control;
1753

1754
	if (s->walk_completed)
1755
		return;
1756

1757
	control = ctx->walk_control;
1758
	if (!control)
1759
		return;
1760

1761
	control->walk_fn(control->data, ctx, t, r, s, sz_filter_passed);
1762
}
1763

1764
/*
1765
 * damos_walk_complete() - Complete DAMOS walk request if all walks are done.
1766
 * @ctx:	The context of &damon_ctx->walk_control.
1767
 * @s:		A scheme of @ctx that all walks are now done.
1768
 *
1769
 * This function is called when kdamond finished applying the action of a DAMOS
1770
 * scheme to all regions that eligible for the given &damos->apply_interval_us.
1771
 * If every scheme of @ctx including @s now finished walking for at least one
1772
 * &damos->apply_interval_us, this function makrs the handling of the given
1773
 * DAMOS walk request is done, so that damos_walk() can wake up and return.
1774
 */
1775
static void damos_walk_complete(struct damon_ctx *ctx, struct damos *s)
1776
{
1777
	struct damos *siter;
1778
	struct damos_walk_control *control;
1779

1780
	control = ctx->walk_control;
1781
	if (!control)
1782
		return;
1783

1784
	s->walk_completed = true;
1785
	/* if all schemes completed, signal completion to walker */
1786
	damon_for_each_scheme(siter, ctx) {
1787
		if (!siter->walk_completed)
1788
			return;
1789
	}
1790
	damon_for_each_scheme(siter, ctx)
1791
		siter->walk_completed = false;
1792

1793
	complete(&control->completion);
1794
	ctx->walk_control = NULL;
1795
}
1796

1797
/*
1798
 * damos_walk_cancel() - Cancel the current DAMOS walk request.
1799
 * @ctx:	The context of &damon_ctx->walk_control.
1800
 *
1801
 * This function is called when @ctx is deactivated by DAMOS watermarks, DAMOS
1802
 * walk is requested but there is no DAMOS scheme to walk for, or the kdamond
1803
 * is already out of the main loop and therefore gonna be terminated, and hence
1804
 * cannot continue the walks.  This function therefore marks the walk request
1805
 * as canceled, so that damos_walk() can wake up and return.
1806
 */
1807
static void damos_walk_cancel(struct damon_ctx *ctx)
1808
{
1809
	struct damos_walk_control *control;
1810

1811
	mutex_lock(&ctx->walk_control_lock);
1812
	control = ctx->walk_control;
1813
	mutex_unlock(&ctx->walk_control_lock);
1814

1815
	if (!control)
1816
		return;
1817
	control->canceled = true;
1818
	complete(&control->completion);
1819
	mutex_lock(&ctx->walk_control_lock);
1820
	ctx->walk_control = NULL;
1821
	mutex_unlock(&ctx->walk_control_lock);
1822
}
1823

1824
static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
1825
		struct damon_region *r, struct damos *s)
1826
{
1827
	struct damos_quota *quota = &s->quota;
1828
	unsigned long sz = damon_sz_region(r);
1829
	struct timespec64 begin, end;
1830
	unsigned long sz_applied = 0;
1831
	unsigned long sz_ops_filter_passed = 0;
1832
	/*
1833
	 * We plan to support multiple context per kdamond, as DAMON sysfs
1834
	 * implies with 'nr_contexts' file.  Nevertheless, only single context
1835
	 * per kdamond is supported for now.  So, we can simply use '0' context
1836
	 * index here.
1837
	 */
1838
	unsigned int cidx = 0;
1839
	struct damos *siter;		/* schemes iterator */
1840
	unsigned int sidx = 0;
1841
	struct damon_target *titer;	/* targets iterator */
1842
	unsigned int tidx = 0;
1843
	bool do_trace = false;
1844

1845
	/* get indices for trace_damos_before_apply() */
1846
	if (trace_damos_before_apply_enabled()) {
1847
		damon_for_each_scheme(siter, c) {
1848
			if (siter == s)
1849
				break;
1850
			sidx++;
1851
		}
1852
		damon_for_each_target(titer, c) {
1853
			if (titer == t)
1854
				break;
1855
			tidx++;
1856
		}
1857
		do_trace = true;
1858
	}
1859

1860
	if (c->ops.apply_scheme) {
1861
		if (quota->esz && quota->charged_sz + sz > quota->esz) {
1862
			sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
1863
					DAMON_MIN_REGION);
1864
			if (!sz)
1865
				goto update_stat;
1866
			damon_split_region_at(t, r, sz);
1867
		}
1868
		if (damos_filter_out(c, t, r, s))
1869
			return;
1870
		ktime_get_coarse_ts64(&begin);
1871
		trace_damos_before_apply(cidx, sidx, tidx, r,
1872
				damon_nr_regions(t), do_trace);
1873
		sz_applied = c->ops.apply_scheme(c, t, r, s,
1874
				&sz_ops_filter_passed);
1875
		damos_walk_call_walk(c, t, r, s, sz_ops_filter_passed);
1876
		ktime_get_coarse_ts64(&end);
1877
		quota->total_charged_ns += timespec64_to_ns(&end) -
1878
			timespec64_to_ns(&begin);
1879
		quota->charged_sz += sz;
1880
		if (quota->esz && quota->charged_sz >= quota->esz) {
1881
			quota->charge_target_from = t;
1882
			quota->charge_addr_from = r->ar.end + 1;
1883
		}
1884
	}
1885
	if (s->action != DAMOS_STAT)
1886
		r->age = 0;
1887

1888
update_stat:
1889
	damos_update_stat(s, sz, sz_applied, sz_ops_filter_passed);
1890
}
1891

1892
static void damon_do_apply_schemes(struct damon_ctx *c,
1893
				   struct damon_target *t,
1894
				   struct damon_region *r)
1895
{
1896
	struct damos *s;
1897

1898
	damon_for_each_scheme(s, c) {
1899
		struct damos_quota *quota = &s->quota;
1900

1901
		if (c->passed_sample_intervals < s->next_apply_sis)
1902
			continue;
1903

1904
		if (!s->wmarks.activated)
1905
			continue;
1906

1907
		/* Check the quota */
1908
		if (quota->esz && quota->charged_sz >= quota->esz)
1909
			continue;
1910

1911
		if (damos_skip_charged_region(t, &r, s))
1912
			continue;
1913

1914
		if (!damos_valid_target(c, t, r, s))
1915
			continue;
1916

1917
		damos_apply_scheme(c, t, r, s);
1918
	}
1919
}
1920

1921
/*
1922
 * damon_feed_loop_next_input() - get next input to achieve a target score.
1923
 * @last_input	The last input.
1924
 * @score	Current score that made with @last_input.
1925
 *
1926
 * Calculate next input to achieve the target score, based on the last input
1927
 * and current score.  Assuming the input and the score are positively
1928
 * proportional, calculate how much compensation should be added to or
1929
 * subtracted from the last input as a proportion of the last input.  Avoid
1930
 * next input always being zero by setting it non-zero always.  In short form
1931
 * (assuming support of float and signed calculations), the algorithm is as
1932
 * below.
1933
 *
1934
 * next_input = max(last_input * ((goal - current) / goal + 1), 1)
1935
 *
1936
 * For simple implementation, we assume the target score is always 10,000.  The
1937
 * caller should adjust @score for this.
1938
 *
1939
 * Returns next input that assumed to achieve the target score.
1940
 */
1941
static unsigned long damon_feed_loop_next_input(unsigned long last_input,
1942
		unsigned long score)
1943
{
1944
	const unsigned long goal = 10000;
1945
	/* Set minimum input as 10000 to avoid compensation be zero */
1946
	const unsigned long min_input = 10000;
1947
	unsigned long score_goal_diff, compensation;
1948
	bool over_achieving = score > goal;
1949

1950
	if (score == goal)
1951
		return last_input;
1952
	if (score >= goal * 2)
1953
		return min_input;
1954

1955
	if (over_achieving)
1956
		score_goal_diff = score - goal;
1957
	else
1958
		score_goal_diff = goal - score;
1959

1960
	if (last_input < ULONG_MAX / score_goal_diff)
1961
		compensation = last_input * score_goal_diff / goal;
1962
	else
1963
		compensation = last_input / goal * score_goal_diff;
1964

1965
	if (over_achieving)
1966
		return max(last_input - compensation, min_input);
1967
	if (last_input < ULONG_MAX - compensation)
1968
		return last_input + compensation;
1969
	return ULONG_MAX;
1970
}
1971

1972
#ifdef CONFIG_PSI
1973

1974
static u64 damos_get_some_mem_psi_total(void)
1975
{
1976
	if (static_branch_likely(&psi_disabled))
1977
		return 0;
1978
	return div_u64(psi_system.total[PSI_AVGS][PSI_MEM * 2],
1979
			NSEC_PER_USEC);
1980
}
1981

1982
#else	/* CONFIG_PSI */
1983

1984
static inline u64 damos_get_some_mem_psi_total(void)
1985
{
1986
	return 0;
1987
};
1988

1989
#endif	/* CONFIG_PSI */
1990

1991
#ifdef CONFIG_NUMA
1992
static __kernel_ulong_t damos_get_node_mem_bp(
1993
		struct damos_quota_goal *goal)
1994
{
1995
	struct sysinfo i;
1996
	__kernel_ulong_t numerator;
1997

1998
	si_meminfo_node(&i, goal->nid);
1999
	if (goal->metric == DAMOS_QUOTA_NODE_MEM_USED_BP)
2000
		numerator = i.totalram - i.freeram;
2001
	else	/* DAMOS_QUOTA_NODE_MEM_FREE_BP */
2002
		numerator = i.freeram;
2003
	return numerator * 10000 / i.totalram;
2004
}
2005
#else
2006
static __kernel_ulong_t damos_get_node_mem_bp(
2007
		struct damos_quota_goal *goal)
2008
{
2009
	return 0;
2010
}
2011
#endif
2012

2013

2014
static void damos_set_quota_goal_current_value(struct damos_quota_goal *goal)
2015
{
2016
	u64 now_psi_total;
2017

2018
	switch (goal->metric) {
2019
	case DAMOS_QUOTA_USER_INPUT:
2020
		/* User should already set goal->current_value */
2021
		break;
2022
	case DAMOS_QUOTA_SOME_MEM_PSI_US:
2023
		now_psi_total = damos_get_some_mem_psi_total();
2024
		goal->current_value = now_psi_total - goal->last_psi_total;
2025
		goal->last_psi_total = now_psi_total;
2026
		break;
2027
	case DAMOS_QUOTA_NODE_MEM_USED_BP:
2028
	case DAMOS_QUOTA_NODE_MEM_FREE_BP:
2029
		goal->current_value = damos_get_node_mem_bp(goal);
2030
		break;
2031
	default:
2032
		break;
2033
	}
2034
}
2035

2036
/* Return the highest score since it makes schemes least aggressive */
2037
static unsigned long damos_quota_score(struct damos_quota *quota)
2038
{
2039
	struct damos_quota_goal *goal;
2040
	unsigned long highest_score = 0;
2041

2042
	damos_for_each_quota_goal(goal, quota) {
2043
		damos_set_quota_goal_current_value(goal);
2044
		highest_score = max(highest_score,
2045
				goal->current_value * 10000 /
2046
				goal->target_value);
2047
	}
2048

2049
	return highest_score;
2050
}
2051

2052
/*
2053
 * Called only if quota->ms, or quota->sz are set, or quota->goals is not empty
2054
 */
2055
static void damos_set_effective_quota(struct damos_quota *quota)
2056
{
2057
	unsigned long throughput;
2058
	unsigned long esz = ULONG_MAX;
2059

2060
	if (!quota->ms && list_empty(&quota->goals)) {
2061
		quota->esz = quota->sz;
2062
		return;
2063
	}
2064

2065
	if (!list_empty(&quota->goals)) {
2066
		unsigned long score = damos_quota_score(quota);
2067

2068
		quota->esz_bp = damon_feed_loop_next_input(
2069
				max(quota->esz_bp, 10000UL),
2070
				score);
2071
		esz = quota->esz_bp / 10000;
2072
	}
2073

2074
	if (quota->ms) {
2075
		if (quota->total_charged_ns)
2076
			throughput = mult_frac(quota->total_charged_sz, 1000000,
2077
							quota->total_charged_ns);
2078
		else
2079
			throughput = PAGE_SIZE * 1024;
2080
		esz = min(throughput * quota->ms, esz);
2081
	}
2082

2083
	if (quota->sz && quota->sz < esz)
2084
		esz = quota->sz;
2085

2086
	quota->esz = esz;
2087
}
2088

2089
static void damos_trace_esz(struct damon_ctx *c, struct damos *s,
2090
		struct damos_quota *quota)
2091
{
2092
	unsigned int cidx = 0, sidx = 0;
2093
	struct damos *siter;
2094

2095
	damon_for_each_scheme(siter, c) {
2096
		if (siter == s)
2097
			break;
2098
		sidx++;
2099
	}
2100
	trace_damos_esz(cidx, sidx, quota->esz);
2101
}
2102

2103
static void damos_adjust_quota(struct damon_ctx *c, struct damos *s)
2104
{
2105
	struct damos_quota *quota = &s->quota;
2106
	struct damon_target *t;
2107
	struct damon_region *r;
2108
	unsigned long cumulated_sz, cached_esz;
2109
	unsigned int score, max_score = 0;
2110

2111
	if (!quota->ms && !quota->sz && list_empty(&quota->goals))
2112
		return;
2113

2114
	/* New charge window starts */
2115
	if (time_after_eq(jiffies, quota->charged_from +
2116
				msecs_to_jiffies(quota->reset_interval))) {
2117
		if (quota->esz && quota->charged_sz >= quota->esz)
2118
			s->stat.qt_exceeds++;
2119
		quota->total_charged_sz += quota->charged_sz;
2120
		quota->charged_from = jiffies;
2121
		quota->charged_sz = 0;
2122
		if (trace_damos_esz_enabled())
2123
			cached_esz = quota->esz;
2124
		damos_set_effective_quota(quota);
2125
		if (trace_damos_esz_enabled() && quota->esz != cached_esz)
2126
			damos_trace_esz(c, s, quota);
2127
	}
2128

2129
	if (!c->ops.get_scheme_score)
2130
		return;
2131

2132
	/* Fill up the score histogram */
2133
	memset(c->regions_score_histogram, 0,
2134
			sizeof(*c->regions_score_histogram) *
2135
			(DAMOS_MAX_SCORE + 1));
2136
	damon_for_each_target(t, c) {
2137
		damon_for_each_region(r, t) {
2138
			if (!__damos_valid_target(r, s))
2139
				continue;
2140
			score = c->ops.get_scheme_score(c, t, r, s);
2141
			c->regions_score_histogram[score] +=
2142
				damon_sz_region(r);
2143
			if (score > max_score)
2144
				max_score = score;
2145
		}
2146
	}
2147

2148
	/* Set the min score limit */
2149
	for (cumulated_sz = 0, score = max_score; ; score--) {
2150
		cumulated_sz += c->regions_score_histogram[score];
2151
		if (cumulated_sz >= quota->esz || !score)
2152
			break;
2153
	}
2154
	quota->min_score = score;
2155
}
2156

2157
static void kdamond_apply_schemes(struct damon_ctx *c)
2158
{
2159
	struct damon_target *t;
2160
	struct damon_region *r, *next_r;
2161
	struct damos *s;
2162
	unsigned long sample_interval = c->attrs.sample_interval ?
2163
		c->attrs.sample_interval : 1;
2164
	bool has_schemes_to_apply = false;
2165

2166
	damon_for_each_scheme(s, c) {
2167
		if (c->passed_sample_intervals < s->next_apply_sis)
2168
			continue;
2169

2170
		if (!s->wmarks.activated)
2171
			continue;
2172

2173
		has_schemes_to_apply = true;
2174

2175
		damos_adjust_quota(c, s);
2176
	}
2177

2178
	if (!has_schemes_to_apply)
2179
		return;
2180

2181
	mutex_lock(&c->walk_control_lock);
2182
	damon_for_each_target(t, c) {
2183
		damon_for_each_region_safe(r, next_r, t)
2184
			damon_do_apply_schemes(c, t, r);
2185
	}
2186

2187
	damon_for_each_scheme(s, c) {
2188
		if (c->passed_sample_intervals < s->next_apply_sis)
2189
			continue;
2190
		damos_walk_complete(c, s);
2191
		s->next_apply_sis = c->passed_sample_intervals +
2192
			(s->apply_interval_us ? s->apply_interval_us :
2193
			 c->attrs.aggr_interval) / sample_interval;
2194
		s->last_applied = NULL;
2195
	}
2196
	mutex_unlock(&c->walk_control_lock);
2197
}
2198

2199
/*
2200
 * Merge two adjacent regions into one region
2201
 */
2202
static void damon_merge_two_regions(struct damon_target *t,
2203
		struct damon_region *l, struct damon_region *r)
2204
{
2205
	unsigned long sz_l = damon_sz_region(l), sz_r = damon_sz_region(r);
2206

2207
	l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
2208
			(sz_l + sz_r);
2209
	l->nr_accesses_bp = l->nr_accesses * 10000;
2210
	l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
2211
	l->ar.end = r->ar.end;
2212
	damon_destroy_region(r, t);
2213
}
2214

2215
/*
2216
 * Merge adjacent regions having similar access frequencies
2217
 *
2218
 * t		target affected by this merge operation
2219
 * thres	'->nr_accesses' diff threshold for the merge
2220
 * sz_limit	size upper limit of each region
2221
 */
2222
static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
2223
				   unsigned long sz_limit)
2224
{
2225
	struct damon_region *r, *prev = NULL, *next;
2226

2227
	damon_for_each_region_safe(r, next, t) {
2228
		if (abs(r->nr_accesses - r->last_nr_accesses) > thres)
2229
			r->age = 0;
2230
		else
2231
			r->age++;
2232

2233
		if (prev && prev->ar.end == r->ar.start &&
2234
		    abs(prev->nr_accesses - r->nr_accesses) <= thres &&
2235
		    damon_sz_region(prev) + damon_sz_region(r) <= sz_limit)
2236
			damon_merge_two_regions(t, prev, r);
2237
		else
2238
			prev = r;
2239
	}
2240
}
2241

2242
/*
2243
 * Merge adjacent regions having similar access frequencies
2244
 *
2245
 * threshold	'->nr_accesses' diff threshold for the merge
2246
 * sz_limit	size upper limit of each region
2247
 *
2248
 * This function merges monitoring target regions which are adjacent and their
2249
 * access frequencies are similar.  This is for minimizing the monitoring
2250
 * overhead under the dynamically changeable access pattern.  If a merge was
2251
 * unnecessarily made, later 'kdamond_split_regions()' will revert it.
2252
 *
2253
 * The total number of regions could be higher than the user-defined limit,
2254
 * max_nr_regions for some cases.  For example, the user can update
2255
 * max_nr_regions to a number that lower than the current number of regions
2256
 * while DAMON is running.  For such a case, repeat merging until the limit is
2257
 * met while increasing @threshold up to possible maximum level.
2258
 */
2259
static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,
2260
				  unsigned long sz_limit)
2261
{
2262
	struct damon_target *t;
2263
	unsigned int nr_regions;
2264
	unsigned int max_thres;
2265

2266
	max_thres = c->attrs.aggr_interval /
2267
		(c->attrs.sample_interval ?  c->attrs.sample_interval : 1);
2268
	do {
2269
		nr_regions = 0;
2270
		damon_for_each_target(t, c) {
2271
			damon_merge_regions_of(t, threshold, sz_limit);
2272
			nr_regions += damon_nr_regions(t);
2273
		}
2274
		threshold = max(1, threshold * 2);
2275
	} while (nr_regions > c->attrs.max_nr_regions &&
2276
			threshold / 2 < max_thres);
2277
}
2278

2279
/*
2280
 * Split a region in two
2281
 *
2282
 * r		the region to be split
2283
 * sz_r		size of the first sub-region that will be made
2284
 */
2285
static void damon_split_region_at(struct damon_target *t,
2286
				  struct damon_region *r, unsigned long sz_r)
2287
{
2288
	struct damon_region *new;
2289

2290
	new = damon_new_region(r->ar.start + sz_r, r->ar.end);
2291
	if (!new)
2292
		return;
2293

2294
	r->ar.end = new->ar.start;
2295

2296
	new->age = r->age;
2297
	new->last_nr_accesses = r->last_nr_accesses;
2298
	new->nr_accesses_bp = r->nr_accesses_bp;
2299
	new->nr_accesses = r->nr_accesses;
2300

2301
	damon_insert_region(new, r, damon_next_region(r), t);
2302
}
2303

2304
/* Split every region in the given target into 'nr_subs' regions */
2305
static void damon_split_regions_of(struct damon_target *t, int nr_subs)
2306
{
2307
	struct damon_region *r, *next;
2308
	unsigned long sz_region, sz_sub = 0;
2309
	int i;
2310

2311
	damon_for_each_region_safe(r, next, t) {
2312
		sz_region = damon_sz_region(r);
2313

2314
		for (i = 0; i < nr_subs - 1 &&
2315
				sz_region > 2 * DAMON_MIN_REGION; i++) {
2316
			/*
2317
			 * Randomly select size of left sub-region to be at
2318
			 * least 10 percent and at most 90% of original region
2319
			 */
2320
			sz_sub = ALIGN_DOWN(damon_rand(1, 10) *
2321
					sz_region / 10, DAMON_MIN_REGION);
2322
			/* Do not allow blank region */
2323
			if (sz_sub == 0 || sz_sub >= sz_region)
2324
				continue;
2325

2326
			damon_split_region_at(t, r, sz_sub);
2327
			sz_region = sz_sub;
2328
		}
2329
	}
2330
}
2331

2332
/*
2333
 * Split every target region into randomly-sized small regions
2334
 *
2335
 * This function splits every target region into random-sized small regions if
2336
 * current total number of the regions is equal or smaller than half of the
2337
 * user-specified maximum number of regions.  This is for maximizing the
2338
 * monitoring accuracy under the dynamically changeable access patterns.  If a
2339
 * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
2340
 * it.
2341
 */
2342
static void kdamond_split_regions(struct damon_ctx *ctx)
2343
{
2344
	struct damon_target *t;
2345
	unsigned int nr_regions = 0;
2346
	static unsigned int last_nr_regions;
2347
	int nr_subregions = 2;
2348

2349
	damon_for_each_target(t, ctx)
2350
		nr_regions += damon_nr_regions(t);
2351

2352
	if (nr_regions > ctx->attrs.max_nr_regions / 2)
2353
		return;
2354

2355
	/* Maybe the middle of the region has different access frequency */
2356
	if (last_nr_regions == nr_regions &&
2357
			nr_regions < ctx->attrs.max_nr_regions / 3)
2358
		nr_subregions = 3;
2359

2360
	damon_for_each_target(t, ctx)
2361
		damon_split_regions_of(t, nr_subregions);
2362

2363
	last_nr_regions = nr_regions;
2364
}
2365

2366
/*
2367
 * Check whether current monitoring should be stopped
2368
 *
2369
 * The monitoring is stopped when either the user requested to stop, or all
2370
 * monitoring targets are invalid.
2371
 *
2372
 * Returns true if need to stop current monitoring.
2373
 */
2374
static bool kdamond_need_stop(struct damon_ctx *ctx)
2375
{
2376
	struct damon_target *t;
2377

2378
	if (kthread_should_stop())
2379
		return true;
2380

2381
	if (!ctx->ops.target_valid)
2382
		return false;
2383

2384
	damon_for_each_target(t, ctx) {
2385
		if (ctx->ops.target_valid(t))
2386
			return false;
2387
	}
2388

2389
	return true;
2390
}
2391

2392
static int damos_get_wmark_metric_value(enum damos_wmark_metric metric,
2393
					unsigned long *metric_value)
2394
{
2395
	switch (metric) {
2396
	case DAMOS_WMARK_FREE_MEM_RATE:
2397
		*metric_value = global_zone_page_state(NR_FREE_PAGES) * 1000 /
2398
		       totalram_pages();
2399
		return 0;
2400
	default:
2401
		break;
2402
	}
2403
	return -EINVAL;
2404
}
2405

2406
/*
2407
 * Returns zero if the scheme is active.  Else, returns time to wait for next
2408
 * watermark check in micro-seconds.
2409
 */
2410
static unsigned long damos_wmark_wait_us(struct damos *scheme)
2411
{
2412
	unsigned long metric;
2413

2414
	if (damos_get_wmark_metric_value(scheme->wmarks.metric, &metric))
2415
		return 0;
2416

2417
	/* higher than high watermark or lower than low watermark */
2418
	if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
2419
		if (scheme->wmarks.activated)
2420
			pr_debug("deactivate a scheme (%d) for %s wmark\n",
2421
				 scheme->action,
2422
				 str_high_low(metric > scheme->wmarks.high));
2423
		scheme->wmarks.activated = false;
2424
		return scheme->wmarks.interval;
2425
	}
2426

2427
	/* inactive and higher than middle watermark */
2428
	if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
2429
			!scheme->wmarks.activated)
2430
		return scheme->wmarks.interval;
2431

2432
	if (!scheme->wmarks.activated)
2433
		pr_debug("activate a scheme (%d)\n", scheme->action);
2434
	scheme->wmarks.activated = true;
2435
	return 0;
2436
}
2437

2438
static void kdamond_usleep(unsigned long usecs)
2439
{
2440
	if (usecs >= USLEEP_RANGE_UPPER_BOUND)
2441
		schedule_timeout_idle(usecs_to_jiffies(usecs));
2442
	else
2443
		usleep_range_idle(usecs, usecs + 1);
2444
}
2445

2446
/*
2447
 * kdamond_call() - handle damon_call_control objects.
2448
 * @ctx:	The &struct damon_ctx of the kdamond.
2449
 * @cancel:	Whether to cancel the invocation of the function.
2450
 *
2451
 * If there are &struct damon_call_control requests that registered via
2452
 * &damon_call() on @ctx, do or cancel the invocation of the function depending
2453
 * on @cancel.  @cancel is set when the kdamond is already out of the main loop
2454
 * and therefore will be terminated.
2455
 */
2456
static void kdamond_call(struct damon_ctx *ctx, bool cancel)
2457
{
2458
	struct damon_call_control *control;
2459
	LIST_HEAD(repeat_controls);
2460
	int ret = 0;
2461

2462
	while (true) {
2463
		mutex_lock(&ctx->call_controls_lock);
2464
		control = list_first_entry_or_null(&ctx->call_controls,
2465
				struct damon_call_control, list);
2466
		mutex_unlock(&ctx->call_controls_lock);
2467
		if (!control)
2468
			break;
2469
		if (cancel) {
2470
			control->canceled = true;
2471
		} else {
2472
			ret = control->fn(control->data);
2473
			control->return_code = ret;
2474
		}
2475
		mutex_lock(&ctx->call_controls_lock);
2476
		list_del(&control->list);
2477
		mutex_unlock(&ctx->call_controls_lock);
2478
		if (!control->repeat)
2479
			complete(&control->completion);
2480
		else
2481
			list_add(&control->list, &repeat_controls);
2482
	}
2483
	control = list_first_entry_or_null(&repeat_controls,
2484
			struct damon_call_control, list);
2485
	if (!control || cancel)
2486
		return;
2487
	mutex_lock(&ctx->call_controls_lock);
2488
	list_add_tail(&control->list, &ctx->call_controls);
2489
	mutex_unlock(&ctx->call_controls_lock);
2490
}
2491

2492
/* Returns negative error code if it's not activated but should return */
2493
static int kdamond_wait_activation(struct damon_ctx *ctx)
2494
{
2495
	struct damos *s;
2496
	unsigned long wait_time;
2497
	unsigned long min_wait_time = 0;
2498
	bool init_wait_time = false;
2499

2500
	while (!kdamond_need_stop(ctx)) {
2501
		damon_for_each_scheme(s, ctx) {
2502
			wait_time = damos_wmark_wait_us(s);
2503
			if (!init_wait_time || wait_time < min_wait_time) {
2504
				init_wait_time = true;
2505
				min_wait_time = wait_time;
2506
			}
2507
		}
2508
		if (!min_wait_time)
2509
			return 0;
2510

2511
		kdamond_usleep(min_wait_time);
2512

2513
		kdamond_call(ctx, false);
2514
		damos_walk_cancel(ctx);
2515
	}
2516
	return -EBUSY;
2517
}
2518

2519
static void kdamond_init_ctx(struct damon_ctx *ctx)
2520
{
2521
	unsigned long sample_interval = ctx->attrs.sample_interval ?
2522
		ctx->attrs.sample_interval : 1;
2523
	unsigned long apply_interval;
2524
	struct damos *scheme;
2525

2526
	ctx->passed_sample_intervals = 0;
2527
	ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval;
2528
	ctx->next_ops_update_sis = ctx->attrs.ops_update_interval /
2529
		sample_interval;
2530
	ctx->next_intervals_tune_sis = ctx->next_aggregation_sis *
2531
		ctx->attrs.intervals_goal.aggrs;
2532

2533
	damon_for_each_scheme(scheme, ctx) {
2534
		apply_interval = scheme->apply_interval_us ?
2535
			scheme->apply_interval_us : ctx->attrs.aggr_interval;
2536
		scheme->next_apply_sis = apply_interval / sample_interval;
2537
		damos_set_filters_default_reject(scheme);
2538
	}
2539
}
2540

2541
/*
2542
 * The monitoring daemon that runs as a kernel thread
2543
 */
2544
static int kdamond_fn(void *data)
2545
{
2546
	struct damon_ctx *ctx = data;
2547
	struct damon_target *t;
2548
	struct damon_region *r, *next;
2549
	unsigned int max_nr_accesses = 0;
2550
	unsigned long sz_limit = 0;
2551

2552
	pr_debug("kdamond (%d) starts\n", current->pid);
2553

2554
	complete(&ctx->kdamond_started);
2555
	kdamond_init_ctx(ctx);
2556

2557
	if (ctx->ops.init)
2558
		ctx->ops.init(ctx);
2559
	ctx->regions_score_histogram = kmalloc_array(DAMOS_MAX_SCORE + 1,
2560
			sizeof(*ctx->regions_score_histogram), GFP_KERNEL);
2561
	if (!ctx->regions_score_histogram)
2562
		goto done;
2563

2564
	sz_limit = damon_region_sz_limit(ctx);
2565

2566
	while (!kdamond_need_stop(ctx)) {
2567
		/*
2568
		 * ctx->attrs and ctx->next_{aggregation,ops_update}_sis could
2569
		 * be changed from kdamond_call().  Read the values here, and
2570
		 * use those for this iteration.  That is, damon_set_attrs()
2571
		 * updated new values are respected from next iteration.
2572
		 */
2573
		unsigned long next_aggregation_sis = ctx->next_aggregation_sis;
2574
		unsigned long next_ops_update_sis = ctx->next_ops_update_sis;
2575
		unsigned long sample_interval = ctx->attrs.sample_interval;
2576

2577
		if (kdamond_wait_activation(ctx))
2578
			break;
2579

2580
		if (ctx->ops.prepare_access_checks)
2581
			ctx->ops.prepare_access_checks(ctx);
2582

2583
		kdamond_usleep(sample_interval);
2584
		ctx->passed_sample_intervals++;
2585

2586
		if (ctx->ops.check_accesses)
2587
			max_nr_accesses = ctx->ops.check_accesses(ctx);
2588

2589
		if (ctx->passed_sample_intervals >= next_aggregation_sis)
2590
			kdamond_merge_regions(ctx,
2591
					max_nr_accesses / 10,
2592
					sz_limit);
2593

2594
		/*
2595
		 * do kdamond_call() and kdamond_apply_schemes() after
2596
		 * kdamond_merge_regions() if possible, to reduce overhead
2597
		 */
2598
		kdamond_call(ctx, false);
2599
		if (!list_empty(&ctx->schemes))
2600
			kdamond_apply_schemes(ctx);
2601
		else
2602
			damos_walk_cancel(ctx);
2603

2604
		sample_interval = ctx->attrs.sample_interval ?
2605
			ctx->attrs.sample_interval : 1;
2606
		if (ctx->passed_sample_intervals >= next_aggregation_sis) {
2607
			if (ctx->attrs.intervals_goal.aggrs &&
2608
					ctx->passed_sample_intervals >=
2609
					ctx->next_intervals_tune_sis) {
2610
				/*
2611
				 * ctx->next_aggregation_sis might be updated
2612
				 * from kdamond_call().  In the case,
2613
				 * damon_set_attrs() which will be called from
2614
				 * kdamond_tune_interval() may wrongly think
2615
				 * this is in the middle of the current
2616
				 * aggregation, and make aggregation
2617
				 * information reset for all regions.  Then,
2618
				 * following kdamond_reset_aggregated() call
2619
				 * will make the region information invalid,
2620
				 * particularly for ->nr_accesses_bp.
2621
				 *
2622
				 * Reset ->next_aggregation_sis to avoid that.
2623
				 * It will anyway correctly updated after this
2624
				 * if caluse.
2625
				 */
2626
				ctx->next_aggregation_sis =
2627
					next_aggregation_sis;
2628
				ctx->next_intervals_tune_sis +=
2629
					ctx->attrs.aggr_samples *
2630
					ctx->attrs.intervals_goal.aggrs;
2631
				kdamond_tune_intervals(ctx);
2632
				sample_interval = ctx->attrs.sample_interval ?
2633
					ctx->attrs.sample_interval : 1;
2634

2635
			}
2636
			ctx->next_aggregation_sis = next_aggregation_sis +
2637
				ctx->attrs.aggr_interval / sample_interval;
2638

2639
			kdamond_reset_aggregated(ctx);
2640
			kdamond_split_regions(ctx);
2641
		}
2642

2643
		if (ctx->passed_sample_intervals >= next_ops_update_sis) {
2644
			ctx->next_ops_update_sis = next_ops_update_sis +
2645
				ctx->attrs.ops_update_interval /
2646
				sample_interval;
2647
			if (ctx->ops.update)
2648
				ctx->ops.update(ctx);
2649
			sz_limit = damon_region_sz_limit(ctx);
2650
		}
2651
	}
2652
done:
2653
	damon_for_each_target(t, ctx) {
2654
		damon_for_each_region_safe(r, next, t)
2655
			damon_destroy_region(r, t);
2656
	}
2657

2658
	if (ctx->ops.cleanup)
2659
		ctx->ops.cleanup(ctx);
2660
	kfree(ctx->regions_score_histogram);
2661

2662
	pr_debug("kdamond (%d) finishes\n", current->pid);
2663
	mutex_lock(&ctx->kdamond_lock);
2664
	ctx->kdamond = NULL;
2665
	mutex_unlock(&ctx->kdamond_lock);
2666

2667
	kdamond_call(ctx, true);
2668
	damos_walk_cancel(ctx);
2669

2670
	mutex_lock(&damon_lock);
2671
	nr_running_ctxs--;
2672
	if (!nr_running_ctxs && running_exclusive_ctxs)
2673
		running_exclusive_ctxs = false;
2674
	mutex_unlock(&damon_lock);
2675

2676
	damon_destroy_targets(ctx);
2677
	return 0;
2678
}
2679

2680
/*
2681
 * struct damon_system_ram_region - System RAM resource address region of
2682
 *				    [@start, @end).
2683
 * @start:	Start address of the region (inclusive).
2684
 * @end:	End address of the region (exclusive).
2685
 */
2686
struct damon_system_ram_region {
2687
	unsigned long start;
2688
	unsigned long end;
2689
};
2690

2691
static int walk_system_ram(struct resource *res, void *arg)
2692
{
2693
	struct damon_system_ram_region *a = arg;
2694

2695
	if (a->end - a->start < resource_size(res)) {
2696
		a->start = res->start;
2697
		a->end = res->end;
2698
	}
2699
	return 0;
2700
}
2701

2702
/*
2703
 * Find biggest 'System RAM' resource and store its start and end address in
2704
 * @start and @end, respectively.  If no System RAM is found, returns false.
2705
 */
2706
static bool damon_find_biggest_system_ram(unsigned long *start,
2707
						unsigned long *end)
2708

2709
{
2710
	struct damon_system_ram_region arg = {};
2711

2712
	walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram);
2713
	if (arg.end <= arg.start)
2714
		return false;
2715

2716
	*start = arg.start;
2717
	*end = arg.end;
2718
	return true;
2719
}
2720

2721
/**
2722
 * damon_set_region_biggest_system_ram_default() - Set the region of the given
2723
 * monitoring target as requested, or biggest 'System RAM'.
2724
 * @t:		The monitoring target to set the region.
2725
 * @start:	The pointer to the start address of the region.
2726
 * @end:	The pointer to the end address of the region.
2727
 *
2728
 * This function sets the region of @t as requested by @start and @end.  If the
2729
 * values of @start and @end are zero, however, this function finds the biggest
2730
 * 'System RAM' resource and sets the region to cover the resource.  In the
2731
 * latter case, this function saves the start and end addresses of the resource
2732
 * in @start and @end, respectively.
2733
 *
2734
 * Return: 0 on success, negative error code otherwise.
2735
 */
2736
int damon_set_region_biggest_system_ram_default(struct damon_target *t,
2737
			unsigned long *start, unsigned long *end)
2738
{
2739
	struct damon_addr_range addr_range;
2740

2741
	if (*start > *end)
2742
		return -EINVAL;
2743

2744
	if (!*start && !*end &&
2745
		!damon_find_biggest_system_ram(start, end))
2746
		return -EINVAL;
2747

2748
	addr_range.start = *start;
2749
	addr_range.end = *end;
2750
	return damon_set_regions(t, &addr_range, 1);
2751
}
2752

2753
/*
2754
 * damon_moving_sum() - Calculate an inferred moving sum value.
2755
 * @mvsum:	Inferred sum of the last @len_window values.
2756
 * @nomvsum:	Non-moving sum of the last discrete @len_window window values.
2757
 * @len_window:	The number of last values to take care of.
2758
 * @new_value:	New value that will be added to the pseudo moving sum.
2759
 *
2760
 * Moving sum (moving average * window size) is good for handling noise, but
2761
 * the cost of keeping past values can be high for arbitrary window size.  This
2762
 * function implements a lightweight pseudo moving sum function that doesn't
2763
 * keep the past window values.
2764
 *
2765
 * It simply assumes there was no noise in the past, and get the no-noise
2766
 * assumed past value to drop from @nomvsum and @len_window.  @nomvsum is a
2767
 * non-moving sum of the last window.  For example, if @len_window is 10 and we
2768
 * have 25 values, @nomvsum is the sum of the 11th to 20th values of the 25
2769
 * values.  Hence, this function simply drops @nomvsum / @len_window from
2770
 * given @mvsum and add @new_value.
2771
 *
2772
 * For example, if @len_window is 10 and @nomvsum is 50, the last 10 values for
2773
 * the last window could be vary, e.g., 0, 10, 0, 10, 0, 10, 0, 0, 0, 20.  For
2774
 * calculating next moving sum with a new value, we should drop 0 from 50 and
2775
 * add the new value.  However, this function assumes it got value 5 for each
2776
 * of the last ten times.  Based on the assumption, when the next value is
2777
 * measured, it drops the assumed past value, 5 from the current sum, and add
2778
 * the new value to get the updated pseduo-moving average.
2779
 *
2780
 * This means the value could have errors, but the errors will be disappeared
2781
 * for every @len_window aligned calls.  For example, if @len_window is 10, the
2782
 * pseudo moving sum with 11th value to 19th value would have an error.  But
2783
 * the sum with 20th value will not have the error.
2784
 *
2785
 * Return: Pseudo-moving average after getting the @new_value.
2786
 */
2787
static unsigned int damon_moving_sum(unsigned int mvsum, unsigned int nomvsum,
2788
		unsigned int len_window, unsigned int new_value)
2789
{
2790
	return mvsum - nomvsum / len_window + new_value;
2791
}
2792

2793
/**
2794
 * damon_update_region_access_rate() - Update the access rate of a region.
2795
 * @r:		The DAMON region to update for its access check result.
2796
 * @accessed:	Whether the region has accessed during last sampling interval.
2797
 * @attrs:	The damon_attrs of the DAMON context.
2798
 *
2799
 * Update the access rate of a region with the region's last sampling interval
2800
 * access check result.
2801
 *
2802
 * Usually this will be called by &damon_operations->check_accesses callback.
2803
 */
2804
void damon_update_region_access_rate(struct damon_region *r, bool accessed,
2805
		struct damon_attrs *attrs)
2806
{
2807
	unsigned int len_window = 1;
2808

2809
	/*
2810
	 * sample_interval can be zero, but cannot be larger than
2811
	 * aggr_interval, owing to validation of damon_set_attrs().
2812
	 */
2813
	if (attrs->sample_interval)
2814
		len_window = damon_max_nr_accesses(attrs);
2815
	r->nr_accesses_bp = damon_moving_sum(r->nr_accesses_bp,
2816
			r->last_nr_accesses * 10000, len_window,
2817
			accessed ? 10000 : 0);
2818

2819
	if (accessed)
2820
		r->nr_accesses++;
2821
}
2822

2823
static int __init damon_init(void)
2824
{
2825
	damon_region_cache = KMEM_CACHE(damon_region, 0);
2826
	if (unlikely(!damon_region_cache)) {
2827
		pr_err("creating damon_region_cache fails\n");
2828
		return -ENOMEM;
2829
	}
2830

2831
	return 0;
2832
}
2833

2834
subsys_initcall(damon_init);
2835

2836
#include "tests/core-kunit.h"
2837

2838
Product

Resources

Company