// SPDX-License-Identifier: GPL-2.012#include <linux/version.h>3#include <linux/ptrace.h>4#include <uapi/linux/bpf.h>5#include <bpf/bpf_helpers.h>67/*8* The CPU number, cstate number and pstate number are based9* on 96boards Hikey with octa CA53 CPUs.10*11* Every CPU have three idle states for cstate:12* WFI, CPU_OFF, CLUSTER_OFF13*14* Every CPU have 5 operating points:15* 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz16*17* This code is based on these assumption and other platforms18* need to adjust these definitions.19*/20#define MAX_CPU 821#define MAX_PSTATE_ENTRIES 522#define MAX_CSTATE_ENTRIES 32324static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };2526/*27* my_map structure is used to record cstate and pstate index and28* timestamp (Idx, Ts), when new event incoming we need to update29* combination for new state index and timestamp (Idx`, Ts`).30*31* Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time32* interval for the previous state: Duration(Idx) = Ts` - Ts.33*34* Every CPU has one below array for recording state index and35* timestamp, and record for cstate and pstate saperately:36*37* +--------------------------+38* | cstate timestamp |39* +--------------------------+40* | cstate index |41* +--------------------------+42* | pstate timestamp |43* +--------------------------+44* | pstate index |45* +--------------------------+46*/47#define MAP_OFF_CSTATE_TIME 048#define MAP_OFF_CSTATE_IDX 149#define MAP_OFF_PSTATE_TIME 250#define MAP_OFF_PSTATE_IDX 351#define MAP_OFF_NUM 45253struct {54__uint(type, BPF_MAP_TYPE_ARRAY);55__type(key, u32);56__type(value, u64);57__uint(max_entries, MAX_CPU * MAP_OFF_NUM);58} my_map SEC(".maps");5960/* cstate_duration records duration time for every idle state per CPU */61struct {62__uint(type, BPF_MAP_TYPE_ARRAY);63__type(key, u32);64__type(value, u64);65__uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);66} cstate_duration SEC(".maps");6768/* pstate_duration records duration time for every operating point per CPU */69struct {70__uint(type, BPF_MAP_TYPE_ARRAY);71__type(key, u32);72__type(value, u64);73__uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);74} pstate_duration SEC(".maps");7576/*77* The trace events for cpu_idle and cpu_frequency are taken from:78* /sys/kernel/tracing/events/power/cpu_idle/format79* /sys/kernel/tracing/events/power/cpu_frequency/format80*81* These two events have same format, so define one common structure.82*/83struct cpu_args {84u64 pad;85u32 state;86u32 cpu_id;87};8889/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */90static u32 find_cpu_pstate_idx(u32 frequency)91{92u32 i;9394for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {95if (frequency == cpu_opps[i])96return i;97}9899return i;100}101102SEC("tracepoint/power/cpu_idle")103int bpf_prog1(struct cpu_args *ctx)104{105u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;106u32 key, cpu, pstate_idx;107u64 *val;108109if (ctx->cpu_id > MAX_CPU)110return 0;111112cpu = ctx->cpu_id;113114key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;115cts = bpf_map_lookup_elem(&my_map, &key);116if (!cts)117return 0;118119key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;120cstate = bpf_map_lookup_elem(&my_map, &key);121if (!cstate)122return 0;123124key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;125pts = bpf_map_lookup_elem(&my_map, &key);126if (!pts)127return 0;128129key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;130pstate = bpf_map_lookup_elem(&my_map, &key);131if (!pstate)132return 0;133134prev_state = *cstate;135*cstate = ctx->state;136137if (!*cts) {138*cts = bpf_ktime_get_ns();139return 0;140}141142cur_ts = bpf_ktime_get_ns();143delta = cur_ts - *cts;144*cts = cur_ts;145146/*147* When state doesn't equal to (u32)-1, the cpu will enter148* one idle state; for this case we need to record interval149* for the pstate.150*151* OPP2152* +---------------------+153* OPP1 | |154* ---------+ |155* | Idle state156* +---------------157*158* |<- pstate duration ->|159* ^ ^160* pts cur_ts161*/162if (ctx->state != (u32)-1) {163164/* record pstate after have first cpu_frequency event */165if (!*pts)166return 0;167168delta = cur_ts - *pts;169170pstate_idx = find_cpu_pstate_idx(*pstate);171if (pstate_idx >= MAX_PSTATE_ENTRIES)172return 0;173174key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;175val = bpf_map_lookup_elem(&pstate_duration, &key);176if (val)177__sync_fetch_and_add((long *)val, delta);178179/*180* When state equal to (u32)-1, the cpu just exits from one181* specific idle state; for this case we need to record182* interval for the pstate.183*184* OPP2185* -----------+186* | OPP1187* | +-----------188* | Idle state |189* +---------------------+190*191* |<- cstate duration ->|192* ^ ^193* cts cur_ts194*/195} else {196197key = cpu * MAX_CSTATE_ENTRIES + prev_state;198val = bpf_map_lookup_elem(&cstate_duration, &key);199if (val)200__sync_fetch_and_add((long *)val, delta);201}202203/* Update timestamp for pstate as new start time */204if (*pts)205*pts = cur_ts;206207return 0;208}209210SEC("tracepoint/power/cpu_frequency")211int bpf_prog2(struct cpu_args *ctx)212{213u64 *pts, *cstate, *pstate, cur_ts, delta;214u32 key, cpu, pstate_idx;215u64 *val;216217cpu = ctx->cpu_id;218219key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;220pts = bpf_map_lookup_elem(&my_map, &key);221if (!pts)222return 0;223224key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;225pstate = bpf_map_lookup_elem(&my_map, &key);226if (!pstate)227return 0;228229key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;230cstate = bpf_map_lookup_elem(&my_map, &key);231if (!cstate)232return 0;233234*pstate = ctx->state;235236if (!*pts) {237*pts = bpf_ktime_get_ns();238return 0;239}240241cur_ts = bpf_ktime_get_ns();242delta = cur_ts - *pts;243*pts = cur_ts;244245/* When CPU is in idle, bail out to skip pstate statistics */246if (*cstate != (u32)(-1))247return 0;248249/*250* The cpu changes to another different OPP (in below diagram251* change frequency from OPP3 to OPP1), need recording interval252* for previous frequency OPP3 and update timestamp as start253* time for new frequency OPP1.254*255* OPP3256* +---------------------+257* OPP2 | |258* ---------+ |259* | OPP1260* +---------------261*262* |<- pstate duration ->|263* ^ ^264* pts cur_ts265*/266pstate_idx = find_cpu_pstate_idx(*pstate);267if (pstate_idx >= MAX_PSTATE_ENTRIES)268return 0;269270key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;271val = bpf_map_lookup_elem(&pstate_duration, &key);272if (val)273__sync_fetch_and_add((long *)val, delta);274275return 0;276}277278char _license[] SEC("license") = "GPL";279u32 _version SEC("version") = LINUX_VERSION_CODE;280281282