/* SPDX-License-Identifier: GPL-2.0 */1/*2* A simple scheduler.3*4* By default, it operates as a simple global weighted vtime scheduler and can5* be switched to FIFO scheduling. It also demonstrates the following niceties.6*7* - Statistics tracking how many tasks are queued to local and global dsq's.8* - Termination notification for userspace.9*10* While very simple, this scheduler should work reasonably well on CPUs with a11* uniform L3 cache topology. While preemption is not implemented, the fact that12* the scheduling queue is shared across all CPUs means that whatever is at the13* front of the queue is likely to be executed fairly quickly given enough14* number of CPUs. The FIFO scheduling mode may be beneficial to some workloads15* but comes with the usual problems with FIFO scheduling where saturating16* threads can easily drown out interactive ones.17*18* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.19* Copyright (c) 2022 Tejun Heo <[email protected]>20* Copyright (c) 2022 David Vernet <[email protected]>21*/22#include <scx/common.bpf.h>2324char _license[] SEC("license") = "GPL";2526const volatile bool fifo_sched;2728static u64 vtime_now;29UEI_DEFINE(uei);3031/*32* Built-in DSQs such as SCX_DSQ_GLOBAL cannot be used as priority queues33* (meaning, cannot be dispatched to with scx_bpf_dsq_insert_vtime()). We34* therefore create a separate DSQ with ID 0 that we dispatch to and consume35* from. If scx_simple only supported global FIFO scheduling, then we could just36* use SCX_DSQ_GLOBAL.37*/38#define SHARED_DSQ 03940struct {41__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);42__uint(key_size, sizeof(u32));43__uint(value_size, sizeof(u64));44__uint(max_entries, 2); /* [local, global] */45} stats SEC(".maps");4647static void stat_inc(u32 idx)48{49u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx);50if (cnt_p)51(*cnt_p)++;52}5354s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)55{56bool is_idle = false;57s32 cpu;5859cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle);60if (is_idle) {61stat_inc(0); /* count local queueing */62scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);63}6465return cpu;66}6768void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)69{70stat_inc(1); /* count global queueing */7172if (fifo_sched) {73scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags);74} else {75u64 vtime = p->scx.dsq_vtime;7677/*78* Limit the amount of budget that an idling task can accumulate79* to one slice.80*/81if (time_before(vtime, vtime_now - SCX_SLICE_DFL))82vtime = vtime_now - SCX_SLICE_DFL;8384scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime,85enq_flags);86}87}8889void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev)90{91scx_bpf_dsq_move_to_local(SHARED_DSQ);92}9394void BPF_STRUCT_OPS(simple_running, struct task_struct *p)95{96if (fifo_sched)97return;9899/*100* Global vtime always progresses forward as tasks start executing. The101* test and update can be performed concurrently from multiple CPUs and102* thus racy. Any error should be contained and temporary. Let's just103* live with it.104*/105if (time_before(vtime_now, p->scx.dsq_vtime))106vtime_now = p->scx.dsq_vtime;107}108109void BPF_STRUCT_OPS(simple_stopping, struct task_struct *p, bool runnable)110{111if (fifo_sched)112return;113114/*115* Scale the execution time by the inverse of the weight and charge.116*117* Note that the default yield implementation yields by setting118* @p->scx.slice to zero and the following would treat the yielding task119* as if it has consumed all its slice. If this penalizes yielding tasks120* too much, determine the execution time by taking explicit timestamps121* instead of depending on @p->scx.slice.122*/123p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight;124}125126void BPF_STRUCT_OPS(simple_enable, struct task_struct *p)127{128p->scx.dsq_vtime = vtime_now;129}130131s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)132{133return scx_bpf_create_dsq(SHARED_DSQ, -1);134}135136void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)137{138UEI_RECORD(uei, ei);139}140141SCX_OPS_DEFINE(simple_ops,142.select_cpu = (void *)simple_select_cpu,143.enqueue = (void *)simple_enqueue,144.dispatch = (void *)simple_dispatch,145.running = (void *)simple_running,146.stopping = (void *)simple_stopping,147.enable = (void *)simple_enable,148.init = (void *)simple_init,149.exit = (void *)simple_exit,150.name = "simple");151152153