CoCalc -- timerlat

GitHub Repository: torvalds/linux
Path: blob/master/tools/tracing/rtla/src/timerlat_aa.c
²⁶²⁸⁵ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <[email protected]>
4
 */
5

6
#include <stdlib.h>
7
#include <errno.h>
8
#include "timerlat.h"
9
#include <unistd.h>
10

11
enum timelat_state {
12
	TIMERLAT_INIT = 0,
13
	TIMERLAT_WAITING_IRQ,
14
	TIMERLAT_WAITING_THREAD,
15
};
16

17
/* Used to fill spaces in the output */
18
static const char *spaces  = "                                                         ";
19

20
#define MAX_COMM		24
21

22
/*
23
 * Per-cpu data statistics and data.
24
 */
25
struct timerlat_aa_data {
26
	/* Current CPU state */
27
	int			curr_state;
28

29
	/* timerlat IRQ latency */
30
	unsigned long long	tlat_irq_seqnum;
31
	unsigned long long	tlat_irq_latency;
32
	unsigned long long	tlat_irq_timstamp;
33

34
	/* timerlat Thread latency */
35
	unsigned long long	tlat_thread_seqnum;
36
	unsigned long long	tlat_thread_latency;
37
	unsigned long long	tlat_thread_timstamp;
38

39
	/*
40
	 * Information about the thread running when the IRQ
41
	 * arrived.
42
	 *
43
	 * This can be blocking or interference, depending on the
44
	 * priority of the thread. Assuming timerlat is the highest
45
	 * prio, it is blocking. If timerlat has a lower prio, it is
46
	 * interference.
47
	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
48
	 */
49
	unsigned long long	run_thread_pid;
50
	char			run_thread_comm[MAX_COMM];
51
	unsigned long long	thread_blocking_duration;
52
	unsigned long long	max_exit_idle_latency;
53

54
	/* Information about the timerlat timer irq */
55
	unsigned long long	timer_irq_start_time;
56
	unsigned long long	timer_irq_start_delay;
57
	unsigned long long	timer_irq_duration;
58
	unsigned long long	timer_exit_from_idle;
59

60
	/*
61
	 * Information about the last IRQ before the timerlat irq
62
	 * arrived.
63
	 *
64
	 * If now - timestamp is <= latency, it might have influenced
65
	 * in the timerlat irq latency. Otherwise, ignore it.
66
	 */
67
	unsigned long long	prev_irq_duration;
68
	unsigned long long	prev_irq_timstamp;
69

70
	/*
71
	 * Interference sum.
72
	 */
73
	unsigned long long	thread_nmi_sum;
74
	unsigned long long	thread_irq_sum;
75
	unsigned long long	thread_softirq_sum;
76
	unsigned long long	thread_thread_sum;
77

78
	/*
79
	 * Interference task information.
80
	 */
81
	struct trace_seq	*prev_irqs_seq;
82
	struct trace_seq	*nmi_seq;
83
	struct trace_seq	*irqs_seq;
84
	struct trace_seq	*softirqs_seq;
85
	struct trace_seq	*threads_seq;
86
	struct trace_seq	*stack_seq;
87

88
	/*
89
	 * Current thread.
90
	 */
91
	char			current_comm[MAX_COMM];
92
	unsigned long long	current_pid;
93

94
	/*
95
	 * Is the system running a kworker?
96
	 */
97
	unsigned long long	kworker;
98
	unsigned long long	kworker_func;
99
};
100

101
/*
102
 * The analysis context and system wide view
103
 */
104
struct timerlat_aa_context {
105
	int nr_cpus;
106
	int dump_tasks;
107

108
	/* per CPU data */
109
	struct timerlat_aa_data *taa_data;
110

111
	/*
112
	 * required to translate function names and register
113
	 * events.
114
	 */
115
	struct osnoise_tool *tool;
116
};
117

118
/*
119
 * The data is stored as a local variable, but accessed via a helper function.
120
 *
121
 * It could be stored inside the trace context. But every access would
122
 * require container_of() + a series of pointers. Do we need it? Not sure.
123
 *
124
 * For now keep it simple. If needed, store it in the tool, add the *context
125
 * as a parameter in timerlat_aa_get_ctx() and do the magic there.
126
 */
127
static struct timerlat_aa_context *__timerlat_aa_ctx;
128

129
static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
130
{
131
	return __timerlat_aa_ctx;
132
}
133

134
/*
135
 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
136
 */
137
static struct timerlat_aa_data
138
*timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
139
{
140
	return &taa_ctx->taa_data[cpu];
141
}
142

143
/*
144
 * timerlat_aa_irq_latency - Handles timerlat IRQ event
145
 */
146
static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
147
				   struct trace_seq *s, struct tep_record *record,
148
				   struct tep_event *event)
149
{
150
	/*
151
	 * For interference, we start now looking for things that can delay
152
	 * the thread.
153
	 */
154
	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
155
	taa_data->tlat_irq_timstamp = record->ts;
156

157
	/*
158
	 * Zero values.
159
	 */
160
	taa_data->thread_nmi_sum = 0;
161
	taa_data->thread_irq_sum = 0;
162
	taa_data->thread_softirq_sum = 0;
163
	taa_data->thread_thread_sum = 0;
164
	taa_data->thread_blocking_duration = 0;
165
	taa_data->timer_irq_start_time = 0;
166
	taa_data->timer_irq_duration = 0;
167
	taa_data->timer_exit_from_idle = 0;
168

169
	/*
170
	 * Zero interference tasks.
171
	 */
172
	trace_seq_reset(taa_data->nmi_seq);
173
	trace_seq_reset(taa_data->irqs_seq);
174
	trace_seq_reset(taa_data->softirqs_seq);
175
	trace_seq_reset(taa_data->threads_seq);
176

177
	/* IRQ latency values */
178
	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
179
	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
180

181
	/* The thread that can cause blocking */
182
	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
183

184
	/*
185
	 * Get exit from idle case.
186
	 *
187
	 * If it is not idle thread:
188
	 */
189
	if (taa_data->run_thread_pid)
190
		return 0;
191

192
	/*
193
	 * if the latency is shorter than the known exit from idle:
194
	 */
195
	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
196
		return 0;
197

198
	/*
199
	 * To be safe, ignore the cases in which an IRQ/NMI could have
200
	 * interfered with the timerlat IRQ.
201
	 */
202
	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
203
	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
204
		return 0;
205

206
	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
207

208
	return 0;
209
}
210

211
/*
212
 * timerlat_aa_thread_latency - Handles timerlat thread event
213
 */
214
static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
215
				      struct trace_seq *s, struct tep_record *record,
216
				      struct tep_event *event)
217
{
218
	/*
219
	 * For interference, we start now looking for things that can delay
220
	 * the IRQ of the next cycle.
221
	 */
222
	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
223
	taa_data->tlat_thread_timstamp = record->ts;
224

225
	/* Thread latency values */
226
	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
227
	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
228

229
	return 0;
230
}
231

232
/*
233
 * timerlat_aa_handler - Handle timerlat events
234
 *
235
 * This function is called to handle timerlat events recording statistics.
236
 *
237
 * Returns 0 on success, -1 otherwise.
238
 */
239
static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
240
			struct tep_event *event, void *context)
241
{
242
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
243
	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
244
	unsigned long long thread;
245

246
	if (!taa_data)
247
		return -1;
248

249
	tep_get_field_val(s, event, "context", record, &thread, 1);
250
	if (!thread)
251
		return timerlat_aa_irq_latency(taa_data, s, record, event);
252
	else
253
		return timerlat_aa_thread_latency(taa_data, s, record, event);
254
}
255

256
/*
257
 * timerlat_aa_nmi_handler - Handles NMI noise
258
 *
259
 * It is used to collect information about interferences from NMI. It is
260
 * hooked to the osnoise:nmi_noise event.
261
 */
262
static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
263
				   struct tep_event *event, void *context)
264
{
265
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
266
	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
267
	unsigned long long duration;
268
	unsigned long long start;
269

270
	tep_get_field_val(s, event, "duration", record, &duration, 1);
271
	tep_get_field_val(s, event, "start", record, &start, 1);
272

273
	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
274
		taa_data->prev_irq_duration = duration;
275
		taa_data->prev_irq_timstamp = start;
276

277
		trace_seq_reset(taa_data->prev_irqs_seq);
278
		trace_seq_printf(taa_data->prev_irqs_seq, "  %24s %.*s %9.2f us\n",
279
				 "nmi",
280
				 24, spaces,
281
				 ns_to_usf(duration));
282
		return 0;
283
	}
284

285
	taa_data->thread_nmi_sum += duration;
286
	trace_seq_printf(taa_data->nmi_seq, "  %24s %.*s %9.2f us\n",
287
			 "nmi",
288
			 24, spaces, ns_to_usf(duration));
289

290
	return 0;
291
}
292

293
/*
294
 * timerlat_aa_irq_handler - Handles IRQ noise
295
 *
296
 * It is used to collect information about interferences from IRQ. It is
297
 * hooked to the osnoise:irq_noise event.
298
 *
299
 * It is a little bit more complex than the other because it measures:
300
 *	- The IRQs that can delay the timer IRQ before it happened.
301
 *	- The Timerlat IRQ handler
302
 *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
303
 *	  (IRQ interference).
304
 */
305
static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
306
				   struct tep_event *event, void *context)
307
{
308
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
309
	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
310
	unsigned long long expected_start;
311
	unsigned long long duration;
312
	unsigned long long vector;
313
	unsigned long long start;
314
	char *desc;
315
	int val;
316

317
	tep_get_field_val(s, event, "duration", record, &duration, 1);
318
	tep_get_field_val(s, event, "start", record, &start, 1);
319
	tep_get_field_val(s, event, "vector", record, &vector, 1);
320
	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
321

322
	/*
323
	 * Before the timerlat IRQ.
324
	 */
325
	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
326
		taa_data->prev_irq_duration = duration;
327
		taa_data->prev_irq_timstamp = start;
328

329
		trace_seq_reset(taa_data->prev_irqs_seq);
330
		trace_seq_printf(taa_data->prev_irqs_seq, "  %24s:%-3llu %.*s %9.2f us\n",
331
				 desc, vector,
332
				 15, spaces,
333
				 ns_to_usf(duration));
334
		return 0;
335
	}
336

337
	/*
338
	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
339
	 * the timerlat irq handler.
340
	 */
341
	if (!taa_data->timer_irq_start_time) {
342
		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
343

344
		taa_data->timer_irq_start_time = start;
345
		taa_data->timer_irq_duration = duration;
346

347
		/*
348
		 * We are dealing with two different clock sources: the
349
		 * external clock source that timerlat uses as a reference
350
		 * and the clock used by the tracer. There are also two
351
		 * moments: the time reading the clock and the timer in
352
		 * which the event is placed in the buffer (the trace
353
		 * event timestamp). If the processor is slow or there
354
		 * is some hardware noise, the difference between the
355
		 * timestamp and the external clock read can be longer
356
		 * than the IRQ handler delay, resulting in a negative
357
		 * time. If so, set IRQ start delay as 0. In the end,
358
		 * it is less relevant than the noise.
359
		 */
360
		if (expected_start < taa_data->timer_irq_start_time)
361
			taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
362
		else
363
			taa_data->timer_irq_start_delay = 0;
364

365
		/*
366
		 * not exit from idle.
367
		 */
368
		if (taa_data->run_thread_pid)
369
			return 0;
370

371
		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
372
			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
373

374
		return 0;
375
	}
376

377
	/*
378
	 * IRQ interference.
379
	 */
380
	taa_data->thread_irq_sum += duration;
381
	trace_seq_printf(taa_data->irqs_seq, "  %24s:%-3llu %.*s %9.2f us\n",
382
			 desc, vector,
383
			 24, spaces,
384
			 ns_to_usf(duration));
385

386
	return 0;
387
}
388

389
static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
390
				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
391

392

393
/*
394
 * timerlat_aa_softirq_handler - Handles Softirq noise
395
 *
396
 * It is used to collect information about interferences from Softirq. It is
397
 * hooked to the osnoise:softirq_noise event.
398
 *
399
 * It is only printed in the non-rt kernel, as softirqs become thread on RT.
400
 */
401
static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
402
				       struct tep_event *event, void *context)
403
{
404
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
405
	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
406
	unsigned long long duration;
407
	unsigned long long vector;
408
	unsigned long long start;
409

410
	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
411
		return 0;
412

413
	tep_get_field_val(s, event, "duration", record, &duration, 1);
414
	tep_get_field_val(s, event, "start", record, &start, 1);
415
	tep_get_field_val(s, event, "vector", record, &vector, 1);
416

417
	taa_data->thread_softirq_sum += duration;
418

419
	trace_seq_printf(taa_data->softirqs_seq, "  %24s:%-3llu %.*s %9.2f us\n",
420
			 softirq_name[vector], vector,
421
			 24, spaces,
422
			 ns_to_usf(duration));
423
	return 0;
424
}
425

426
/*
427
 * timerlat_aa_softirq_handler - Handles thread noise
428
 *
429
 * It is used to collect information about interferences from threads. It is
430
 * hooked to the osnoise:thread_noise event.
431
 *
432
 * Note: if you see thread noise, your timerlat thread was not the highest prio one.
433
 */
434
static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
435
				      struct tep_event *event, void *context)
436
{
437
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
438
	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
439
	unsigned long long duration;
440
	unsigned long long start;
441
	unsigned long long pid;
442
	const char *comm;
443
	int val;
444

445
	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
446
		return 0;
447

448
	tep_get_field_val(s, event, "duration", record, &duration, 1);
449
	tep_get_field_val(s, event, "start", record, &start, 1);
450

451
	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
452
	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
453

454
	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
455
		taa_data->thread_blocking_duration = duration;
456

457
		if (comm)
458
			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
459
		else
460
			sprintf(taa_data->run_thread_comm, "<...>");
461

462
	} else {
463
		taa_data->thread_thread_sum += duration;
464

465
		trace_seq_printf(taa_data->threads_seq, "  %24s:%-12llu %.*s %9.2f us\n",
466
				 comm, pid,
467
				 15, spaces,
468
				 ns_to_usf(duration));
469
	}
470

471
	return 0;
472
}
473

474
/*
475
 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
476
 *
477
 * Saves and parse the stack trace generated by the timerlat IRQ.
478
 */
479
static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
480
			      struct tep_event *event, void *context)
481
{
482
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
483
	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
484
	unsigned long *caller;
485
	const char *function;
486
	int val, i;
487

488
	trace_seq_reset(taa_data->stack_seq);
489

490
	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
491
	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
492
	if (caller) {
493
		for (i = 0; ; i++) {
494
			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
495
			if (!function)
496
				break;
497
			trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n",
498
					 14, spaces, function);
499
		}
500
	}
501
	return 0;
502
}
503

504
/*
505
 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
506
 *
507
 * Handles the sched:sched_switch event to trace the current thread running on the
508
 * CPU. It is used to display the threads running on the other CPUs when the trace
509
 * stops.
510
 */
511
static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
512
					    struct tep_event *event, void *context)
513
{
514
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
515
	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
516
	const char *comm;
517
	int val;
518

519
	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
520
	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
521

522
	strncpy(taa_data->current_comm, comm, MAX_COMM);
523

524
	/*
525
	 * If this was a kworker, clean the last kworkers that ran.
526
	 */
527
	taa_data->kworker = 0;
528
	taa_data->kworker_func = 0;
529

530
	return 0;
531
}
532

533
/*
534
 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
535
 *
536
 * Handles workqueue:workqueue_execute_start event, keeping track of
537
 * the job that a kworker could be doing in the CPU.
538
 *
539
 * We already catch problems of hardware related latencies caused by work queues
540
 * running driver code that causes hardware stall. For example, with DRM drivers.
541
 */
542
static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
543
					     struct tep_event *event, void *context)
544
{
545
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
546
	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
547

548
	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
549
	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
550
	return 0;
551
}
552

553
/*
554
 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
555
 *
556
 * This is the core of the analysis.
557
 */
558
static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
559
				     int irq_thresh, int thread_thresh)
560
{
561
	long long exp_irq_ts;
562
	int total;
563
	int irq;
564

565
	/*
566
	 * IRQ latency or Thread latency?
567
	 */
568
	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
569
		irq = 1;
570
		total = taa_data->tlat_irq_latency;
571
	} else {
572
		irq = 0;
573
		total = taa_data->tlat_thread_latency;
574
	}
575

576
	/*
577
	 * Expected IRQ arrival time using the trace clock as the base.
578
	 *
579
	 * TODO: Add a list of previous IRQ, and then run the list backwards.
580
	 */
581
	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
582
	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
583
		if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
584
			printf("  Previous IRQ interference: %.*s up to  %9.2f us\n",
585
			       16, spaces,
586
			       ns_to_usf(taa_data->prev_irq_duration));
587
	}
588

589
	/*
590
	 * The delay that the IRQ suffered before starting.
591
	 */
592
	printf("  IRQ handler delay: %.*s %16s  %9.2f us (%.2f %%)\n", 16, spaces,
593
	       (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
594
	       ns_to_usf(taa_data->timer_irq_start_delay),
595
	       ns_to_per(total, taa_data->timer_irq_start_delay));
596

597
	/*
598
	 * Timerlat IRQ.
599
	 */
600
	printf("  IRQ latency: %.*s %9.2f us\n", 40, spaces,
601
	       ns_to_usf(taa_data->tlat_irq_latency));
602

603
	if (irq) {
604
		/*
605
		 * If the trace stopped due to IRQ, the other events will not happen
606
		 * because... the trace stopped :-).
607
		 *
608
		 * That is all folks, the stack trace was printed before the stop,
609
		 * so it will be displayed, it is the key.
610
		 */
611
		printf("  Blocking thread:\n");
612
		printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm,
613
		       taa_data->run_thread_pid);
614
	} else  {
615
		/*
616
		 * The duration of the IRQ handler that handled the timerlat IRQ.
617
		 */
618
		printf("  Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n",
619
		       30, spaces,
620
		       ns_to_usf(taa_data->timer_irq_duration),
621
		       ns_to_per(total, taa_data->timer_irq_duration));
622

623
		/*
624
		 * The amount of time that the current thread postponed the scheduler.
625
		 *
626
		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
627
		 * is no need to compute values here.
628
		 */
629
		printf("  Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces,
630
		       ns_to_usf(taa_data->thread_blocking_duration),
631
		       ns_to_per(total, taa_data->thread_blocking_duration));
632

633
		printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces,
634
		       taa_data->run_thread_comm, taa_data->run_thread_pid,
635
		       12, spaces, ns_to_usf(taa_data->thread_blocking_duration));
636
	}
637

638
	/*
639
	 * Print the stack trace!
640
	 */
641
	trace_seq_do_printf(taa_data->stack_seq);
642

643
	/*
644
	 * NMIs can happen during the IRQ, so they are always possible.
645
	 */
646
	if (taa_data->thread_nmi_sum)
647
		printf("  NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
648
		       ns_to_usf(taa_data->thread_nmi_sum),
649
		       ns_to_per(total, taa_data->thread_nmi_sum));
650

651
	/*
652
	 * If it is an IRQ latency, the other factors can be skipped.
653
	 */
654
	if (irq)
655
		goto print_total;
656

657
	/*
658
	 * Prints the interference caused by IRQs to the thread latency.
659
	 */
660
	if (taa_data->thread_irq_sum) {
661
		printf("  IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
662
		       ns_to_usf(taa_data->thread_irq_sum),
663
		       ns_to_per(total, taa_data->thread_irq_sum));
664

665
		trace_seq_do_printf(taa_data->irqs_seq);
666
	}
667

668
	/*
669
	 * Prints the interference caused by Softirqs to the thread latency.
670
	 */
671
	if (taa_data->thread_softirq_sum) {
672
		printf("  Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces,
673
		       ns_to_usf(taa_data->thread_softirq_sum),
674
		       ns_to_per(total, taa_data->thread_softirq_sum));
675

676
		trace_seq_do_printf(taa_data->softirqs_seq);
677
	}
678

679
	/*
680
	 * Prints the interference caused by other threads to the thread latency.
681
	 *
682
	 * If this happens, your timerlat is not the highest prio. OK, migration
683
	 * thread can happen. But otherwise, you are not measuring the "scheduling
684
	 * latency" only, and here is the difference from scheduling latency and
685
	 * timer handling latency.
686
	 */
687
	if (taa_data->thread_thread_sum) {
688
		printf("  Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces,
689
		       ns_to_usf(taa_data->thread_thread_sum),
690
		       ns_to_per(total, taa_data->thread_thread_sum));
691

692
		trace_seq_do_printf(taa_data->threads_seq);
693
	}
694

695
	/*
696
	 * Done.
697
	 */
698
print_total:
699
	printf("------------------------------------------------------------------------\n");
700
	printf("  %s latency: %.*s %9.2f us (100%%)\n", irq ? "   IRQ" : "Thread",
701
	       37, spaces, ns_to_usf(total));
702
}
703

704
static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
705
{
706
	struct trace_instance *trace = &taa_ctx->tool->trace;
707
	int retval;
708

709
	retval = tracefs_iterate_raw_events(trace->tep,
710
					    trace->inst,
711
					    NULL,
712
					    0,
713
					    collect_registered_events,
714
					    trace);
715
		if (retval < 0) {
716
			err_msg("Error iterating on events\n");
717
			return 0;
718
		}
719

720
	return 1;
721
}
722

723
/**
724
 * timerlat_auto_analysis - Analyze the collected data
725
 */
726
void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
727
{
728
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
729
	unsigned long long max_exit_from_idle = 0;
730
	struct timerlat_aa_data *taa_data;
731
	int max_exit_from_idle_cpu;
732
	struct tep_handle *tep;
733
	int cpu;
734

735
	timerlat_auto_analysis_collect_trace(taa_ctx);
736

737
	/* bring stop tracing to the ns scale */
738
	irq_thresh = irq_thresh * 1000;
739
	thread_thresh = thread_thresh * 1000;
740

741
	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
742
		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
743

744
		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
745
			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
746
			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
747
		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
748
			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
749
			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
750
		}
751

752
		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
753
			max_exit_from_idle = taa_data->max_exit_idle_latency;
754
			max_exit_from_idle_cpu = cpu;
755
		}
756

757
	}
758

759
	if (max_exit_from_idle) {
760
		printf("\n");
761
		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
762
			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
763
	}
764
	if (!taa_ctx->dump_tasks)
765
		return;
766

767
	printf("\n");
768
	printf("Printing CPU tasks:\n");
769
	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
770
		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
771
		tep = taa_ctx->tool->trace.tep;
772

773
		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
774

775
		if (taa_data->kworker_func)
776
			printf(" kworker:%s:%s",
777
				tep_find_function(tep, taa_data->kworker) ? : "<...>",
778
				tep_find_function(tep, taa_data->kworker_func));
779
		printf("\n");
780
	}
781

782
}
783

784
/*
785
 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
786
 */
787
static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
788
{
789
	struct timerlat_aa_data *taa_data;
790
	int i;
791

792
	if (!taa_ctx->taa_data)
793
		return;
794

795
	for (i = 0; i < taa_ctx->nr_cpus; i++) {
796
		taa_data = timerlat_aa_get_data(taa_ctx, i);
797

798
		if (taa_data->prev_irqs_seq) {
799
			trace_seq_destroy(taa_data->prev_irqs_seq);
800
			free(taa_data->prev_irqs_seq);
801
		}
802

803
		if (taa_data->nmi_seq) {
804
			trace_seq_destroy(taa_data->nmi_seq);
805
			free(taa_data->nmi_seq);
806
		}
807

808
		if (taa_data->irqs_seq) {
809
			trace_seq_destroy(taa_data->irqs_seq);
810
			free(taa_data->irqs_seq);
811
		}
812

813
		if (taa_data->softirqs_seq) {
814
			trace_seq_destroy(taa_data->softirqs_seq);
815
			free(taa_data->softirqs_seq);
816
		}
817

818
		if (taa_data->threads_seq) {
819
			trace_seq_destroy(taa_data->threads_seq);
820
			free(taa_data->threads_seq);
821
		}
822

823
		if (taa_data->stack_seq) {
824
			trace_seq_destroy(taa_data->stack_seq);
825
			free(taa_data->stack_seq);
826
		}
827
	}
828
}
829

830
/*
831
 * timerlat_aa_init_seqs - Init seq files used to store parsed information
832
 *
833
 * Instead of keeping data structures to store raw data, use seq files to
834
 * store parsed data.
835
 *
836
 * Allocates and initialize seq files.
837
 *
838
 * Returns 0 on success, -1 otherwise.
839
 */
840
static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
841
{
842
	struct timerlat_aa_data *taa_data;
843
	int i;
844

845
	for (i = 0; i < taa_ctx->nr_cpus; i++) {
846

847
		taa_data = timerlat_aa_get_data(taa_ctx, i);
848

849
		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
850
		if (!taa_data->prev_irqs_seq)
851
			goto out_err;
852

853
		trace_seq_init(taa_data->prev_irqs_seq);
854

855
		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
856
		if (!taa_data->nmi_seq)
857
			goto out_err;
858

859
		trace_seq_init(taa_data->nmi_seq);
860

861
		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
862
		if (!taa_data->irqs_seq)
863
			goto out_err;
864

865
		trace_seq_init(taa_data->irqs_seq);
866

867
		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
868
		if (!taa_data->softirqs_seq)
869
			goto out_err;
870

871
		trace_seq_init(taa_data->softirqs_seq);
872

873
		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
874
		if (!taa_data->threads_seq)
875
			goto out_err;
876

877
		trace_seq_init(taa_data->threads_seq);
878

879
		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
880
		if (!taa_data->stack_seq)
881
			goto out_err;
882

883
		trace_seq_init(taa_data->stack_seq);
884
	}
885

886
	return 0;
887

888
out_err:
889
	timerlat_aa_destroy_seqs(taa_ctx);
890
	return -1;
891
}
892

893
/*
894
 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
895
 */
896
static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
897
{
898

899
	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
900
				     timerlat_aa_handler, tool);
901

902
	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
903

904
	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
905
				     timerlat_aa_nmi_handler, tool);
906

907
	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
908
				     timerlat_aa_irq_handler, tool);
909

910
	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
911
				     timerlat_aa_softirq_handler, tool);
912

913
	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
914
				     timerlat_aa_thread_handler, tool);
915

916
	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
917
				     timerlat_aa_stack_handler, tool);
918
	if (!dump_tasks)
919
		return;
920

921
	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
922
	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
923
				     timerlat_aa_sched_switch_handler, tool);
924

925
	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
926
	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
927
				     timerlat_aa_kworker_start_handler, tool);
928
}
929

930
/*
931
 * timerlat_aa_register_events - Register events used in the auto-analysis
932
 *
933
 * Returns 0 on success, -1 otherwise.
934
 */
935
static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
936
{
937
	int retval;
938

939
	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
940
				timerlat_aa_handler, tool);
941

942

943
	/*
944
	 * register auto-analysis handlers.
945
	 */
946
	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
947
	if (retval < 0 && !errno) {
948
		err_msg("Could not find osnoise events\n");
949
		goto out_err;
950
	}
951

952
	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
953
				   timerlat_aa_nmi_handler, tool);
954

955
	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
956
				   timerlat_aa_irq_handler, tool);
957

958
	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
959
				   timerlat_aa_softirq_handler, tool);
960

961
	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
962
				   timerlat_aa_thread_handler, tool);
963

964
	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
965
				   timerlat_aa_stack_handler, tool);
966

967
	if (!dump_tasks)
968
		return 0;
969

970
	/*
971
	 * Dump task events.
972
	 */
973
	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
974
	if (retval < 0 && !errno) {
975
		err_msg("Could not find sched_switch\n");
976
		goto out_err;
977
	}
978

979
	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
980
				   timerlat_aa_sched_switch_handler, tool);
981

982
	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
983
	if (retval < 0 && !errno) {
984
		err_msg("Could not find workqueue_execute_start\n");
985
		goto out_err;
986
	}
987

988
	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
989
				   timerlat_aa_kworker_start_handler, tool);
990

991
	return 0;
992

993
out_err:
994
	timerlat_aa_unregister_events(tool, dump_tasks);
995
	return -1;
996
}
997

998
/**
999
 * timerlat_aa_destroy - Destroy timerlat auto-analysis
1000
 */
1001
void timerlat_aa_destroy(void)
1002
{
1003
	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
1004

1005
	if (!taa_ctx)
1006
		return;
1007

1008
	if (!taa_ctx->taa_data)
1009
		goto out_ctx;
1010

1011
	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
1012
	timerlat_aa_destroy_seqs(taa_ctx);
1013
	free(taa_ctx->taa_data);
1014
out_ctx:
1015
	free(taa_ctx);
1016
}
1017

1018
/**
1019
 * timerlat_aa_init - Initialize timerlat auto-analysis
1020
 *
1021
 * Returns 0 on success, -1 otherwise.
1022
 */
1023
int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1024
{
1025
	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1026
	struct timerlat_aa_context *taa_ctx;
1027
	int retval;
1028

1029
	taa_ctx = calloc(1, sizeof(*taa_ctx));
1030
	if (!taa_ctx)
1031
		return -1;
1032

1033
	__timerlat_aa_ctx = taa_ctx;
1034

1035
	taa_ctx->nr_cpus = nr_cpus;
1036
	taa_ctx->tool = tool;
1037
	taa_ctx->dump_tasks = dump_tasks;
1038

1039
	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1040
	if (!taa_ctx->taa_data)
1041
		goto out_err;
1042

1043
	retval = timerlat_aa_init_seqs(taa_ctx);
1044
	if (retval)
1045
		goto out_err;
1046

1047
	retval = timerlat_aa_register_events(tool, dump_tasks);
1048
	if (retval)
1049
		goto out_err;
1050

1051
	return 0;
1052

1053
out_err:
1054
	timerlat_aa_destroy();
1055
	return -1;
1056
}
1057

1058
Product

Resources

Company