Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/samples/bpf/cpustat_kern.c
25924 views
1
// SPDX-License-Identifier: GPL-2.0
2
3
#include <linux/version.h>
4
#include <linux/ptrace.h>
5
#include <uapi/linux/bpf.h>
6
#include <bpf/bpf_helpers.h>
7
8
/*
9
* The CPU number, cstate number and pstate number are based
10
* on 96boards Hikey with octa CA53 CPUs.
11
*
12
* Every CPU have three idle states for cstate:
13
* WFI, CPU_OFF, CLUSTER_OFF
14
*
15
* Every CPU have 5 operating points:
16
* 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
17
*
18
* This code is based on these assumption and other platforms
19
* need to adjust these definitions.
20
*/
21
#define MAX_CPU 8
22
#define MAX_PSTATE_ENTRIES 5
23
#define MAX_CSTATE_ENTRIES 3
24
25
static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
26
27
/*
28
* my_map structure is used to record cstate and pstate index and
29
* timestamp (Idx, Ts), when new event incoming we need to update
30
* combination for new state index and timestamp (Idx`, Ts`).
31
*
32
* Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
33
* interval for the previous state: Duration(Idx) = Ts` - Ts.
34
*
35
* Every CPU has one below array for recording state index and
36
* timestamp, and record for cstate and pstate saperately:
37
*
38
* +--------------------------+
39
* | cstate timestamp |
40
* +--------------------------+
41
* | cstate index |
42
* +--------------------------+
43
* | pstate timestamp |
44
* +--------------------------+
45
* | pstate index |
46
* +--------------------------+
47
*/
48
#define MAP_OFF_CSTATE_TIME 0
49
#define MAP_OFF_CSTATE_IDX 1
50
#define MAP_OFF_PSTATE_TIME 2
51
#define MAP_OFF_PSTATE_IDX 3
52
#define MAP_OFF_NUM 4
53
54
struct {
55
__uint(type, BPF_MAP_TYPE_ARRAY);
56
__type(key, u32);
57
__type(value, u64);
58
__uint(max_entries, MAX_CPU * MAP_OFF_NUM);
59
} my_map SEC(".maps");
60
61
/* cstate_duration records duration time for every idle state per CPU */
62
struct {
63
__uint(type, BPF_MAP_TYPE_ARRAY);
64
__type(key, u32);
65
__type(value, u64);
66
__uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
67
} cstate_duration SEC(".maps");
68
69
/* pstate_duration records duration time for every operating point per CPU */
70
struct {
71
__uint(type, BPF_MAP_TYPE_ARRAY);
72
__type(key, u32);
73
__type(value, u64);
74
__uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
75
} pstate_duration SEC(".maps");
76
77
/*
78
* The trace events for cpu_idle and cpu_frequency are taken from:
79
* /sys/kernel/tracing/events/power/cpu_idle/format
80
* /sys/kernel/tracing/events/power/cpu_frequency/format
81
*
82
* These two events have same format, so define one common structure.
83
*/
84
struct cpu_args {
85
u64 pad;
86
u32 state;
87
u32 cpu_id;
88
};
89
90
/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
91
static u32 find_cpu_pstate_idx(u32 frequency)
92
{
93
u32 i;
94
95
for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
96
if (frequency == cpu_opps[i])
97
return i;
98
}
99
100
return i;
101
}
102
103
SEC("tracepoint/power/cpu_idle")
104
int bpf_prog1(struct cpu_args *ctx)
105
{
106
u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
107
u32 key, cpu, pstate_idx;
108
u64 *val;
109
110
if (ctx->cpu_id > MAX_CPU)
111
return 0;
112
113
cpu = ctx->cpu_id;
114
115
key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
116
cts = bpf_map_lookup_elem(&my_map, &key);
117
if (!cts)
118
return 0;
119
120
key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
121
cstate = bpf_map_lookup_elem(&my_map, &key);
122
if (!cstate)
123
return 0;
124
125
key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
126
pts = bpf_map_lookup_elem(&my_map, &key);
127
if (!pts)
128
return 0;
129
130
key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
131
pstate = bpf_map_lookup_elem(&my_map, &key);
132
if (!pstate)
133
return 0;
134
135
prev_state = *cstate;
136
*cstate = ctx->state;
137
138
if (!*cts) {
139
*cts = bpf_ktime_get_ns();
140
return 0;
141
}
142
143
cur_ts = bpf_ktime_get_ns();
144
delta = cur_ts - *cts;
145
*cts = cur_ts;
146
147
/*
148
* When state doesn't equal to (u32)-1, the cpu will enter
149
* one idle state; for this case we need to record interval
150
* for the pstate.
151
*
152
* OPP2
153
* +---------------------+
154
* OPP1 | |
155
* ---------+ |
156
* | Idle state
157
* +---------------
158
*
159
* |<- pstate duration ->|
160
* ^ ^
161
* pts cur_ts
162
*/
163
if (ctx->state != (u32)-1) {
164
165
/* record pstate after have first cpu_frequency event */
166
if (!*pts)
167
return 0;
168
169
delta = cur_ts - *pts;
170
171
pstate_idx = find_cpu_pstate_idx(*pstate);
172
if (pstate_idx >= MAX_PSTATE_ENTRIES)
173
return 0;
174
175
key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
176
val = bpf_map_lookup_elem(&pstate_duration, &key);
177
if (val)
178
__sync_fetch_and_add((long *)val, delta);
179
180
/*
181
* When state equal to (u32)-1, the cpu just exits from one
182
* specific idle state; for this case we need to record
183
* interval for the pstate.
184
*
185
* OPP2
186
* -----------+
187
* | OPP1
188
* | +-----------
189
* | Idle state |
190
* +---------------------+
191
*
192
* |<- cstate duration ->|
193
* ^ ^
194
* cts cur_ts
195
*/
196
} else {
197
198
key = cpu * MAX_CSTATE_ENTRIES + prev_state;
199
val = bpf_map_lookup_elem(&cstate_duration, &key);
200
if (val)
201
__sync_fetch_and_add((long *)val, delta);
202
}
203
204
/* Update timestamp for pstate as new start time */
205
if (*pts)
206
*pts = cur_ts;
207
208
return 0;
209
}
210
211
SEC("tracepoint/power/cpu_frequency")
212
int bpf_prog2(struct cpu_args *ctx)
213
{
214
u64 *pts, *cstate, *pstate, cur_ts, delta;
215
u32 key, cpu, pstate_idx;
216
u64 *val;
217
218
cpu = ctx->cpu_id;
219
220
key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
221
pts = bpf_map_lookup_elem(&my_map, &key);
222
if (!pts)
223
return 0;
224
225
key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
226
pstate = bpf_map_lookup_elem(&my_map, &key);
227
if (!pstate)
228
return 0;
229
230
key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
231
cstate = bpf_map_lookup_elem(&my_map, &key);
232
if (!cstate)
233
return 0;
234
235
*pstate = ctx->state;
236
237
if (!*pts) {
238
*pts = bpf_ktime_get_ns();
239
return 0;
240
}
241
242
cur_ts = bpf_ktime_get_ns();
243
delta = cur_ts - *pts;
244
*pts = cur_ts;
245
246
/* When CPU is in idle, bail out to skip pstate statistics */
247
if (*cstate != (u32)(-1))
248
return 0;
249
250
/*
251
* The cpu changes to another different OPP (in below diagram
252
* change frequency from OPP3 to OPP1), need recording interval
253
* for previous frequency OPP3 and update timestamp as start
254
* time for new frequency OPP1.
255
*
256
* OPP3
257
* +---------------------+
258
* OPP2 | |
259
* ---------+ |
260
* | OPP1
261
* +---------------
262
*
263
* |<- pstate duration ->|
264
* ^ ^
265
* pts cur_ts
266
*/
267
pstate_idx = find_cpu_pstate_idx(*pstate);
268
if (pstate_idx >= MAX_PSTATE_ENTRIES)
269
return 0;
270
271
key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
272
val = bpf_map_lookup_elem(&pstate_duration, &key);
273
if (val)
274
__sync_fetch_and_add((long *)val, delta);
275
276
return 0;
277
}
278
279
char _license[] SEC("license") = "GPL";
280
u32 _version SEC("version") = LINUX_VERSION_CODE;
281
282