Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/tools/sched_ext/scx_simple.bpf.c
26278 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* A simple scheduler.
4
*
5
* By default, it operates as a simple global weighted vtime scheduler and can
6
* be switched to FIFO scheduling. It also demonstrates the following niceties.
7
*
8
* - Statistics tracking how many tasks are queued to local and global dsq's.
9
* - Termination notification for userspace.
10
*
11
* While very simple, this scheduler should work reasonably well on CPUs with a
12
* uniform L3 cache topology. While preemption is not implemented, the fact that
13
* the scheduling queue is shared across all CPUs means that whatever is at the
14
* front of the queue is likely to be executed fairly quickly given enough
15
* number of CPUs. The FIFO scheduling mode may be beneficial to some workloads
16
* but comes with the usual problems with FIFO scheduling where saturating
17
* threads can easily drown out interactive ones.
18
*
19
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
20
* Copyright (c) 2022 Tejun Heo <[email protected]>
21
* Copyright (c) 2022 David Vernet <[email protected]>
22
*/
23
#include <scx/common.bpf.h>
24
25
char _license[] SEC("license") = "GPL";
26
27
const volatile bool fifo_sched;
28
29
static u64 vtime_now;
30
UEI_DEFINE(uei);
31
32
/*
33
* Built-in DSQs such as SCX_DSQ_GLOBAL cannot be used as priority queues
34
* (meaning, cannot be dispatched to with scx_bpf_dsq_insert_vtime()). We
35
* therefore create a separate DSQ with ID 0 that we dispatch to and consume
36
* from. If scx_simple only supported global FIFO scheduling, then we could just
37
* use SCX_DSQ_GLOBAL.
38
*/
39
#define SHARED_DSQ 0
40
41
struct {
42
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
43
__uint(key_size, sizeof(u32));
44
__uint(value_size, sizeof(u64));
45
__uint(max_entries, 2); /* [local, global] */
46
} stats SEC(".maps");
47
48
static void stat_inc(u32 idx)
49
{
50
u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx);
51
if (cnt_p)
52
(*cnt_p)++;
53
}
54
55
s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
56
{
57
bool is_idle = false;
58
s32 cpu;
59
60
cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle);
61
if (is_idle) {
62
stat_inc(0); /* count local queueing */
63
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
64
}
65
66
return cpu;
67
}
68
69
void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
70
{
71
stat_inc(1); /* count global queueing */
72
73
if (fifo_sched) {
74
scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags);
75
} else {
76
u64 vtime = p->scx.dsq_vtime;
77
78
/*
79
* Limit the amount of budget that an idling task can accumulate
80
* to one slice.
81
*/
82
if (time_before(vtime, vtime_now - SCX_SLICE_DFL))
83
vtime = vtime_now - SCX_SLICE_DFL;
84
85
scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime,
86
enq_flags);
87
}
88
}
89
90
void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev)
91
{
92
scx_bpf_dsq_move_to_local(SHARED_DSQ);
93
}
94
95
void BPF_STRUCT_OPS(simple_running, struct task_struct *p)
96
{
97
if (fifo_sched)
98
return;
99
100
/*
101
* Global vtime always progresses forward as tasks start executing. The
102
* test and update can be performed concurrently from multiple CPUs and
103
* thus racy. Any error should be contained and temporary. Let's just
104
* live with it.
105
*/
106
if (time_before(vtime_now, p->scx.dsq_vtime))
107
vtime_now = p->scx.dsq_vtime;
108
}
109
110
void BPF_STRUCT_OPS(simple_stopping, struct task_struct *p, bool runnable)
111
{
112
if (fifo_sched)
113
return;
114
115
/*
116
* Scale the execution time by the inverse of the weight and charge.
117
*
118
* Note that the default yield implementation yields by setting
119
* @p->scx.slice to zero and the following would treat the yielding task
120
* as if it has consumed all its slice. If this penalizes yielding tasks
121
* too much, determine the execution time by taking explicit timestamps
122
* instead of depending on @p->scx.slice.
123
*/
124
p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight;
125
}
126
127
void BPF_STRUCT_OPS(simple_enable, struct task_struct *p)
128
{
129
p->scx.dsq_vtime = vtime_now;
130
}
131
132
s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)
133
{
134
return scx_bpf_create_dsq(SHARED_DSQ, -1);
135
}
136
137
void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)
138
{
139
UEI_RECORD(uei, ei);
140
}
141
142
SCX_OPS_DEFINE(simple_ops,
143
.select_cpu = (void *)simple_select_cpu,
144
.enqueue = (void *)simple_enqueue,
145
.dispatch = (void *)simple_dispatch,
146
.running = (void *)simple_running,
147
.stopping = (void *)simple_stopping,
148
.enable = (void *)simple_enable,
149
.init = (void *)simple_init,
150
.exit = (void *)simple_exit,
151
.name = "simple");
152
153