Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/arm64/kvm/hyp_trace.c
170891 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Copyright (C) 2025 Google LLC
4
* Author: Vincent Donnefort <[email protected]>
5
*/
6
7
#include <linux/cpumask.h>
8
#include <linux/trace_remote.h>
9
#include <linux/tracefs.h>
10
#include <linux/simple_ring_buffer.h>
11
12
#include <asm/arch_timer.h>
13
#include <asm/kvm_host.h>
14
#include <asm/kvm_hyptrace.h>
15
#include <asm/kvm_mmu.h>
16
17
#include "hyp_trace.h"
18
19
/* Same 10min used by clocksource when width is more than 32-bits */
20
#define CLOCK_MAX_CONVERSION_S 600
21
/*
22
* Time to give for the clock init. Long enough to get a good mult/shift
23
* estimation. Short enough to not delay the tracing start too much.
24
*/
25
#define CLOCK_INIT_MS 100
26
/*
27
* Time between clock checks. Must be small enough to catch clock deviation when
28
* it is still tiny.
29
*/
30
#define CLOCK_UPDATE_MS 500
31
32
static struct hyp_trace_clock {
33
u64 cycles;
34
u64 cyc_overflow64;
35
u64 boot;
36
u32 mult;
37
u32 shift;
38
struct delayed_work work;
39
struct completion ready;
40
struct mutex lock;
41
bool running;
42
} hyp_clock;
43
44
static void __hyp_clock_work(struct work_struct *work)
45
{
46
struct delayed_work *dwork = to_delayed_work(work);
47
struct hyp_trace_clock *hyp_clock;
48
struct system_time_snapshot snap;
49
u64 rate, delta_cycles;
50
u64 boot, delta_boot;
51
52
hyp_clock = container_of(dwork, struct hyp_trace_clock, work);
53
54
ktime_get_snapshot(&snap);
55
boot = ktime_to_ns(snap.boot);
56
57
delta_boot = boot - hyp_clock->boot;
58
delta_cycles = snap.cycles - hyp_clock->cycles;
59
60
/* Compare hyp clock with the kernel boot clock */
61
if (hyp_clock->mult) {
62
u64 err, cur = delta_cycles;
63
64
if (WARN_ON_ONCE(cur >= hyp_clock->cyc_overflow64)) {
65
__uint128_t tmp = (__uint128_t)cur * hyp_clock->mult;
66
67
cur = tmp >> hyp_clock->shift;
68
} else {
69
cur *= hyp_clock->mult;
70
cur >>= hyp_clock->shift;
71
}
72
cur += hyp_clock->boot;
73
74
err = abs_diff(cur, boot);
75
/* No deviation, only update epoch if necessary */
76
if (!err) {
77
if (delta_cycles >= (hyp_clock->cyc_overflow64 >> 1))
78
goto fast_forward;
79
80
goto resched;
81
}
82
83
/* Warn if the error is above tracing precision (1us) */
84
if (err > NSEC_PER_USEC)
85
pr_warn_ratelimited("hyp trace clock off by %lluus\n",
86
err / NSEC_PER_USEC);
87
}
88
89
rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot);
90
91
clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift,
92
rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S);
93
94
/* Add a comfortable 50% margin */
95
hyp_clock->cyc_overflow64 = (U64_MAX / hyp_clock->mult) >> 1;
96
97
fast_forward:
98
hyp_clock->cycles = snap.cycles;
99
hyp_clock->boot = boot;
100
kvm_call_hyp_nvhe(__tracing_update_clock, hyp_clock->mult,
101
hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles);
102
complete(&hyp_clock->ready);
103
104
resched:
105
schedule_delayed_work(&hyp_clock->work,
106
msecs_to_jiffies(CLOCK_UPDATE_MS));
107
}
108
109
static void hyp_trace_clock_enable(struct hyp_trace_clock *hyp_clock, bool enable)
110
{
111
struct system_time_snapshot snap;
112
113
if (hyp_clock->running == enable)
114
return;
115
116
if (!enable) {
117
cancel_delayed_work_sync(&hyp_clock->work);
118
hyp_clock->running = false;
119
}
120
121
ktime_get_snapshot(&snap);
122
123
hyp_clock->boot = ktime_to_ns(snap.boot);
124
hyp_clock->cycles = snap.cycles;
125
hyp_clock->mult = 0;
126
127
init_completion(&hyp_clock->ready);
128
INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work);
129
schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS));
130
wait_for_completion(&hyp_clock->ready);
131
hyp_clock->running = true;
132
}
133
134
/* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */
135
static struct hyp_trace_buffer {
136
struct hyp_trace_desc *desc;
137
size_t desc_size;
138
} trace_buffer;
139
140
static int __map_hyp(void *start, size_t size)
141
{
142
if (is_protected_kvm_enabled())
143
return 0;
144
145
return create_hyp_mappings(start, start + size, PAGE_HYP);
146
}
147
148
static int __share_page(unsigned long va)
149
{
150
return kvm_share_hyp((void *)va, (void *)va + 1);
151
}
152
153
static void __unshare_page(unsigned long va)
154
{
155
kvm_unshare_hyp((void *)va, (void *)va + 1);
156
}
157
158
static int hyp_trace_buffer_alloc_bpages_backing(struct hyp_trace_buffer *trace_buffer, size_t size)
159
{
160
int nr_bpages = (PAGE_ALIGN(size) / PAGE_SIZE) + 1;
161
size_t backing_size;
162
void *start;
163
164
backing_size = PAGE_ALIGN(sizeof(struct simple_buffer_page) * nr_bpages *
165
num_possible_cpus());
166
167
start = alloc_pages_exact(backing_size, GFP_KERNEL_ACCOUNT);
168
if (!start)
169
return -ENOMEM;
170
171
trace_buffer->desc->bpages_backing_start = (unsigned long)start;
172
trace_buffer->desc->bpages_backing_size = backing_size;
173
174
return __map_hyp(start, backing_size);
175
}
176
177
static void hyp_trace_buffer_free_bpages_backing(struct hyp_trace_buffer *trace_buffer)
178
{
179
free_pages_exact((void *)trace_buffer->desc->bpages_backing_start,
180
trace_buffer->desc->bpages_backing_size);
181
}
182
183
static void hyp_trace_buffer_unshare_hyp(struct hyp_trace_buffer *trace_buffer, int last_cpu)
184
{
185
struct ring_buffer_desc *rb_desc;
186
int cpu, p;
187
188
for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
189
if (cpu > last_cpu)
190
break;
191
192
__share_page(rb_desc->meta_va);
193
for (p = 0; p < rb_desc->nr_page_va; p++)
194
__unshare_page(rb_desc->page_va[p]);
195
}
196
}
197
198
static int hyp_trace_buffer_share_hyp(struct hyp_trace_buffer *trace_buffer)
199
{
200
struct ring_buffer_desc *rb_desc;
201
int cpu, p, ret = 0;
202
203
for_each_ring_buffer_desc(rb_desc, cpu, &trace_buffer->desc->trace_buffer_desc) {
204
ret = __share_page(rb_desc->meta_va);
205
if (ret)
206
break;
207
208
for (p = 0; p < rb_desc->nr_page_va; p++) {
209
ret = __share_page(rb_desc->page_va[p]);
210
if (ret)
211
break;
212
}
213
214
if (ret) {
215
for (p--; p >= 0; p--)
216
__unshare_page(rb_desc->page_va[p]);
217
break;
218
}
219
}
220
221
if (ret)
222
hyp_trace_buffer_unshare_hyp(trace_buffer, cpu--);
223
224
return ret;
225
}
226
227
static struct trace_buffer_desc *hyp_trace_load(unsigned long size, void *priv)
228
{
229
struct hyp_trace_buffer *trace_buffer = priv;
230
struct hyp_trace_desc *desc;
231
size_t desc_size;
232
int ret;
233
234
if (WARN_ON(trace_buffer->desc))
235
return ERR_PTR(-EINVAL);
236
237
desc_size = trace_buffer_desc_size(size, num_possible_cpus());
238
if (desc_size == SIZE_MAX)
239
return ERR_PTR(-E2BIG);
240
241
desc_size = PAGE_ALIGN(desc_size);
242
desc = (struct hyp_trace_desc *)alloc_pages_exact(desc_size, GFP_KERNEL);
243
if (!desc)
244
return ERR_PTR(-ENOMEM);
245
246
ret = __map_hyp(desc, desc_size);
247
if (ret)
248
goto err_free_desc;
249
250
trace_buffer->desc = desc;
251
252
ret = hyp_trace_buffer_alloc_bpages_backing(trace_buffer, size);
253
if (ret)
254
goto err_free_desc;
255
256
ret = trace_remote_alloc_buffer(&desc->trace_buffer_desc, desc_size, size,
257
cpu_possible_mask);
258
if (ret)
259
goto err_free_backing;
260
261
ret = hyp_trace_buffer_share_hyp(trace_buffer);
262
if (ret)
263
goto err_free_buffer;
264
265
ret = kvm_call_hyp_nvhe(__tracing_load, (unsigned long)desc, desc_size);
266
if (ret)
267
goto err_unload_pages;
268
269
return &desc->trace_buffer_desc;
270
271
err_unload_pages:
272
hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
273
274
err_free_buffer:
275
trace_remote_free_buffer(&desc->trace_buffer_desc);
276
277
err_free_backing:
278
hyp_trace_buffer_free_bpages_backing(trace_buffer);
279
280
err_free_desc:
281
free_pages_exact(desc, desc_size);
282
trace_buffer->desc = NULL;
283
284
return ERR_PTR(ret);
285
}
286
287
static void hyp_trace_unload(struct trace_buffer_desc *desc, void *priv)
288
{
289
struct hyp_trace_buffer *trace_buffer = priv;
290
291
if (WARN_ON(desc != &trace_buffer->desc->trace_buffer_desc))
292
return;
293
294
kvm_call_hyp_nvhe(__tracing_unload);
295
hyp_trace_buffer_unshare_hyp(trace_buffer, INT_MAX);
296
trace_remote_free_buffer(desc);
297
hyp_trace_buffer_free_bpages_backing(trace_buffer);
298
free_pages_exact(trace_buffer->desc, trace_buffer->desc_size);
299
trace_buffer->desc = NULL;
300
}
301
302
static int hyp_trace_enable_tracing(bool enable, void *priv)
303
{
304
hyp_trace_clock_enable(&hyp_clock, enable);
305
306
return kvm_call_hyp_nvhe(__tracing_enable, enable);
307
}
308
309
static int hyp_trace_swap_reader_page(unsigned int cpu, void *priv)
310
{
311
return kvm_call_hyp_nvhe(__tracing_swap_reader, cpu);
312
}
313
314
static int hyp_trace_reset(unsigned int cpu, void *priv)
315
{
316
return kvm_call_hyp_nvhe(__tracing_reset, cpu);
317
}
318
319
static int hyp_trace_enable_event(unsigned short id, bool enable, void *priv)
320
{
321
struct hyp_event_id *event_id = lm_alias(&__hyp_event_ids_start[id]);
322
struct page *page;
323
atomic_t *enabled;
324
void *map;
325
326
if (is_protected_kvm_enabled())
327
return kvm_call_hyp_nvhe(__tracing_enable_event, id, enable);
328
329
enabled = &event_id->enabled;
330
page = virt_to_page(enabled);
331
map = vmap(&page, 1, VM_MAP, PAGE_KERNEL);
332
if (!map)
333
return -ENOMEM;
334
335
enabled = map + offset_in_page(enabled);
336
atomic_set(enabled, enable);
337
338
vunmap(map);
339
340
return 0;
341
}
342
343
static int hyp_trace_clock_show(struct seq_file *m, void *v)
344
{
345
seq_puts(m, "[boot]\n");
346
347
return 0;
348
}
349
DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock);
350
351
static ssize_t hyp_trace_write_event_write(struct file *f, const char __user *ubuf,
352
size_t cnt, loff_t *pos)
353
{
354
unsigned long val;
355
int ret;
356
357
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
358
if (ret)
359
return ret;
360
361
kvm_call_hyp_nvhe(__tracing_write_event, val);
362
363
return cnt;
364
}
365
366
static const struct file_operations hyp_trace_write_event_fops = {
367
.write = hyp_trace_write_event_write,
368
};
369
370
static int hyp_trace_init_tracefs(struct dentry *d, void *priv)
371
{
372
if (!tracefs_create_file("write_event", 0200, d, NULL, &hyp_trace_write_event_fops))
373
return -ENOMEM;
374
375
return tracefs_create_file("trace_clock", 0440, d, NULL, &hyp_trace_clock_fops) ?
376
0 : -ENOMEM;
377
}
378
379
static struct trace_remote_callbacks trace_remote_callbacks = {
380
.init = hyp_trace_init_tracefs,
381
.load_trace_buffer = hyp_trace_load,
382
.unload_trace_buffer = hyp_trace_unload,
383
.enable_tracing = hyp_trace_enable_tracing,
384
.swap_reader_page = hyp_trace_swap_reader_page,
385
.reset = hyp_trace_reset,
386
.enable_event = hyp_trace_enable_event,
387
};
388
389
static const char *__hyp_enter_exit_reason_str(u8 reason);
390
391
#include <asm/kvm_define_hypevents.h>
392
393
static const char *__hyp_enter_exit_reason_str(u8 reason)
394
{
395
static const char strs[][12] = {
396
"smc",
397
"hvc",
398
"psci",
399
"host_abort",
400
"guest_exit",
401
"eret_host",
402
"eret_guest",
403
"unknown",
404
};
405
406
return strs[min(reason, HYP_REASON_UNKNOWN)];
407
}
408
409
static void __init hyp_trace_init_events(void)
410
{
411
struct hyp_event_id *hyp_event_id = __hyp_event_ids_start;
412
struct remote_event *event = __hyp_events_start;
413
int id = 0;
414
415
/* Events on both sides hypervisor are sorted */
416
for (; event < __hyp_events_end; event++, hyp_event_id++, id++)
417
event->id = hyp_event_id->id = id;
418
}
419
420
int __init kvm_hyp_trace_init(void)
421
{
422
int cpu;
423
424
if (is_kernel_in_hyp_mode())
425
return 0;
426
427
for_each_possible_cpu(cpu) {
428
const struct arch_timer_erratum_workaround *wa =
429
per_cpu(timer_unstable_counter_workaround, cpu);
430
431
if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) &&
432
wa && wa->read_cntvct_el0) {
433
pr_warn("hyp trace can't handle CNTVCT workaround '%s'\n", wa->desc);
434
return -EOPNOTSUPP;
435
}
436
}
437
438
hyp_trace_init_events();
439
440
return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer,
441
__hyp_events_start, __hyp_events_end - __hyp_events_start);
442
}
443
444