Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/kernel/context_tracking.c
26243 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* Context tracking: Probe on high level context boundaries such as kernel,
4
* userspace, guest or idle.
5
*
6
* This is used by RCU to remove its dependency on the timer tick while a CPU
7
* runs in idle, userspace or guest mode.
8
*
9
* User/guest tracking started by Frederic Weisbecker:
10
*
11
* Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker
12
*
13
* Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
14
* Steven Rostedt, Peter Zijlstra for suggestions and improvements.
15
*
16
* RCU extended quiescent state bits imported from kernel/rcu/tree.c
17
* where the relevant authorship may be found.
18
*/
19
20
#include <linux/context_tracking.h>
21
#include <linux/rcupdate.h>
22
#include <linux/sched.h>
23
#include <linux/hardirq.h>
24
#include <linux/export.h>
25
#include <linux/kprobes.h>
26
#include <trace/events/rcu.h>
27
28
29
DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
30
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
31
.nesting = 1,
32
.nmi_nesting = CT_NESTING_IRQ_NONIDLE,
33
#endif
34
.state = ATOMIC_INIT(CT_RCU_WATCHING),
35
};
36
EXPORT_SYMBOL_GPL(context_tracking);
37
38
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
39
#define TPS(x) tracepoint_string(x)
40
41
/* Record the current task on exiting RCU-tasks (dyntick-idle entry). */
42
static __always_inline void rcu_task_exit(void)
43
{
44
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
45
WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id());
46
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
47
}
48
49
/* Record no current task on entering RCU-tasks (dyntick-idle exit). */
50
static __always_inline void rcu_task_enter(void)
51
{
52
#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
53
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
54
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
55
}
56
57
/* Turn on heavyweight RCU tasks trace readers on kernel exit. */
58
static __always_inline void rcu_task_trace_heavyweight_enter(void)
59
{
60
#ifdef CONFIG_TASKS_TRACE_RCU
61
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
62
current->trc_reader_special.b.need_mb = true;
63
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
64
}
65
66
/* Turn off heavyweight RCU tasks trace readers on kernel entry. */
67
static __always_inline void rcu_task_trace_heavyweight_exit(void)
68
{
69
#ifdef CONFIG_TASKS_TRACE_RCU
70
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
71
current->trc_reader_special.b.need_mb = false;
72
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
73
}
74
75
/*
76
* Record entry into an extended quiescent state. This is only to be
77
* called when not already in an extended quiescent state, that is,
78
* RCU is watching prior to the call to this function and is no longer
79
* watching upon return.
80
*/
81
static noinstr void ct_kernel_exit_state(int offset)
82
{
83
/*
84
* CPUs seeing atomic_add_return() must see prior RCU read-side
85
* critical sections, and we also must force ordering with the
86
* next idle sojourn.
87
*/
88
rcu_task_trace_heavyweight_enter(); // Before CT state update!
89
// RCU is still watching. Better not be in extended quiescent state!
90
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !rcu_is_watching_curr_cpu());
91
(void)ct_state_inc(offset);
92
// RCU is no longer watching.
93
}
94
95
/*
96
* Record exit from an extended quiescent state. This is only to be
97
* called from an extended quiescent state, that is, RCU is not watching
98
* prior to the call to this function and is watching upon return.
99
*/
100
static noinstr void ct_kernel_enter_state(int offset)
101
{
102
int seq;
103
104
/*
105
* CPUs seeing atomic_add_return() must see prior idle sojourns,
106
* and we also must force ordering with the next RCU read-side
107
* critical section.
108
*/
109
seq = ct_state_inc(offset);
110
// RCU is now watching. Better not be in an extended quiescent state!
111
rcu_task_trace_heavyweight_exit(); // After CT state update!
112
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & CT_RCU_WATCHING));
113
}
114
115
/*
116
* Enter an RCU extended quiescent state, which can be either the
117
* idle loop or adaptive-tickless usermode execution.
118
*
119
* We crowbar the ->nmi_nesting field to zero to allow for
120
* the possibility of usermode upcalls having messed up our count
121
* of interrupt nesting level during the prior busy period.
122
*/
123
static void noinstr ct_kernel_exit(bool user, int offset)
124
{
125
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
126
127
WARN_ON_ONCE(ct_nmi_nesting() != CT_NESTING_IRQ_NONIDLE);
128
WRITE_ONCE(ct->nmi_nesting, 0);
129
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
130
ct_nesting() == 0);
131
if (ct_nesting() != 1) {
132
// RCU will still be watching, so just do accounting and leave.
133
ct->nesting--;
134
return;
135
}
136
137
instrumentation_begin();
138
lockdep_assert_irqs_disabled();
139
trace_rcu_watching(TPS("End"), ct_nesting(), 0, ct_rcu_watching());
140
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
141
rcu_preempt_deferred_qs(current);
142
143
// instrumentation for the noinstr ct_kernel_exit_state()
144
instrument_atomic_write(&ct->state, sizeof(ct->state));
145
146
instrumentation_end();
147
WRITE_ONCE(ct->nesting, 0); /* Avoid irq-access tearing. */
148
// RCU is watching here ...
149
ct_kernel_exit_state(offset);
150
// ... but is no longer watching here.
151
rcu_task_exit();
152
}
153
154
/*
155
* Exit an RCU extended quiescent state, which can be either the
156
* idle loop or adaptive-tickless usermode execution.
157
*
158
* We crowbar the ->nmi_nesting field to CT_NESTING_IRQ_NONIDLE to
159
* allow for the possibility of usermode upcalls messing up our count of
160
* interrupt nesting level during the busy period that is just now starting.
161
*/
162
static void noinstr ct_kernel_enter(bool user, int offset)
163
{
164
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
165
long oldval;
166
167
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
168
oldval = ct_nesting();
169
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
170
if (oldval) {
171
// RCU was already watching, so just do accounting and leave.
172
ct->nesting++;
173
return;
174
}
175
rcu_task_enter();
176
// RCU is not watching here ...
177
ct_kernel_enter_state(offset);
178
// ... but is watching here.
179
instrumentation_begin();
180
181
// instrumentation for the noinstr ct_kernel_enter_state()
182
instrument_atomic_write(&ct->state, sizeof(ct->state));
183
184
trace_rcu_watching(TPS("Start"), ct_nesting(), 1, ct_rcu_watching());
185
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
186
WRITE_ONCE(ct->nesting, 1);
187
WARN_ON_ONCE(ct_nmi_nesting());
188
WRITE_ONCE(ct->nmi_nesting, CT_NESTING_IRQ_NONIDLE);
189
instrumentation_end();
190
}
191
192
/**
193
* ct_nmi_exit - inform RCU of exit from NMI context
194
*
195
* If we are returning from the outermost NMI handler that interrupted an
196
* RCU-idle period, update ct->state and ct->nmi_nesting
197
* to let the RCU grace-period handling know that the CPU is back to
198
* being RCU-idle.
199
*
200
* If you add or remove a call to ct_nmi_exit(), be sure to test
201
* with CONFIG_RCU_EQS_DEBUG=y.
202
*/
203
void noinstr ct_nmi_exit(void)
204
{
205
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
206
207
instrumentation_begin();
208
/*
209
* Check for ->nmi_nesting underflow and bad CT state.
210
* (We are exiting an NMI handler, so RCU better be paying attention
211
* to us!)
212
*/
213
WARN_ON_ONCE(ct_nmi_nesting() <= 0);
214
WARN_ON_ONCE(!rcu_is_watching_curr_cpu());
215
216
/*
217
* If the nesting level is not 1, the CPU wasn't RCU-idle, so
218
* leave it in non-RCU-idle state.
219
*/
220
if (ct_nmi_nesting() != 1) {
221
trace_rcu_watching(TPS("--="), ct_nmi_nesting(), ct_nmi_nesting() - 2,
222
ct_rcu_watching());
223
WRITE_ONCE(ct->nmi_nesting, /* No store tearing. */
224
ct_nmi_nesting() - 2);
225
instrumentation_end();
226
return;
227
}
228
229
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
230
trace_rcu_watching(TPS("Endirq"), ct_nmi_nesting(), 0, ct_rcu_watching());
231
WRITE_ONCE(ct->nmi_nesting, 0); /* Avoid store tearing. */
232
233
// instrumentation for the noinstr ct_kernel_exit_state()
234
instrument_atomic_write(&ct->state, sizeof(ct->state));
235
instrumentation_end();
236
237
// RCU is watching here ...
238
ct_kernel_exit_state(CT_RCU_WATCHING);
239
// ... but is no longer watching here.
240
241
if (!in_nmi())
242
rcu_task_exit();
243
}
244
245
/**
246
* ct_nmi_enter - inform RCU of entry to NMI context
247
*
248
* If the CPU was idle from RCU's viewpoint, update ct->state and
249
* ct->nmi_nesting to let the RCU grace-period handling know
250
* that the CPU is active. This implementation permits nested NMIs, as
251
* long as the nesting level does not overflow an int. (You will probably
252
* run out of stack space first.)
253
*
254
* If you add or remove a call to ct_nmi_enter(), be sure to test
255
* with CONFIG_RCU_EQS_DEBUG=y.
256
*/
257
void noinstr ct_nmi_enter(void)
258
{
259
long incby = 2;
260
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
261
262
/* Complain about underflow. */
263
WARN_ON_ONCE(ct_nmi_nesting() < 0);
264
265
/*
266
* If idle from RCU viewpoint, atomically increment CT state
267
* to mark non-idle and increment ->nmi_nesting by one.
268
* Otherwise, increment ->nmi_nesting by two. This means
269
* if ->nmi_nesting is equal to one, we are guaranteed
270
* to be in the outermost NMI handler that interrupted an RCU-idle
271
* period (observation due to Andy Lutomirski).
272
*/
273
if (!rcu_is_watching_curr_cpu()) {
274
275
if (!in_nmi())
276
rcu_task_enter();
277
278
// RCU is not watching here ...
279
ct_kernel_enter_state(CT_RCU_WATCHING);
280
// ... but is watching here.
281
282
instrumentation_begin();
283
// instrumentation for the noinstr rcu_is_watching_curr_cpu()
284
instrument_atomic_read(&ct->state, sizeof(ct->state));
285
// instrumentation for the noinstr ct_kernel_enter_state()
286
instrument_atomic_write(&ct->state, sizeof(ct->state));
287
288
incby = 1;
289
} else if (!in_nmi()) {
290
instrumentation_begin();
291
rcu_irq_enter_check_tick();
292
} else {
293
instrumentation_begin();
294
}
295
296
trace_rcu_watching(incby == 1 ? TPS("Startirq") : TPS("++="),
297
ct_nmi_nesting(),
298
ct_nmi_nesting() + incby, ct_rcu_watching());
299
instrumentation_end();
300
WRITE_ONCE(ct->nmi_nesting, /* Prevent store tearing. */
301
ct_nmi_nesting() + incby);
302
barrier();
303
}
304
305
/**
306
* ct_idle_enter - inform RCU that current CPU is entering idle
307
*
308
* Enter idle mode, in other words, -leave- the mode in which RCU
309
* read-side critical sections can occur. (Though RCU read-side
310
* critical sections can occur in irq handlers in idle, a possibility
311
* handled by irq_enter() and irq_exit().)
312
*
313
* If you add or remove a call to ct_idle_enter(), be sure to test with
314
* CONFIG_RCU_EQS_DEBUG=y.
315
*/
316
void noinstr ct_idle_enter(void)
317
{
318
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
319
ct_kernel_exit(false, CT_RCU_WATCHING + CT_STATE_IDLE);
320
}
321
EXPORT_SYMBOL_GPL(ct_idle_enter);
322
323
/**
324
* ct_idle_exit - inform RCU that current CPU is leaving idle
325
*
326
* Exit idle mode, in other words, -enter- the mode in which RCU
327
* read-side critical sections can occur.
328
*
329
* If you add or remove a call to ct_idle_exit(), be sure to test with
330
* CONFIG_RCU_EQS_DEBUG=y.
331
*/
332
void noinstr ct_idle_exit(void)
333
{
334
unsigned long flags;
335
336
raw_local_irq_save(flags);
337
ct_kernel_enter(false, CT_RCU_WATCHING - CT_STATE_IDLE);
338
raw_local_irq_restore(flags);
339
}
340
EXPORT_SYMBOL_GPL(ct_idle_exit);
341
342
/**
343
* ct_irq_enter - inform RCU that current CPU is entering irq away from idle
344
*
345
* Enter an interrupt handler, which might possibly result in exiting
346
* idle mode, in other words, entering the mode in which read-side critical
347
* sections can occur. The caller must have disabled interrupts.
348
*
349
* Note that the Linux kernel is fully capable of entering an interrupt
350
* handler that it never exits, for example when doing upcalls to user mode!
351
* This code assumes that the idle loop never does upcalls to user mode.
352
* If your architecture's idle loop does do upcalls to user mode (or does
353
* anything else that results in unbalanced calls to the irq_enter() and
354
* irq_exit() functions), RCU will give you what you deserve, good and hard.
355
* But very infrequently and irreproducibly.
356
*
357
* Use things like work queues to work around this limitation.
358
*
359
* You have been warned.
360
*
361
* If you add or remove a call to ct_irq_enter(), be sure to test with
362
* CONFIG_RCU_EQS_DEBUG=y.
363
*/
364
noinstr void ct_irq_enter(void)
365
{
366
lockdep_assert_irqs_disabled();
367
ct_nmi_enter();
368
}
369
370
/**
371
* ct_irq_exit - inform RCU that current CPU is exiting irq towards idle
372
*
373
* Exit from an interrupt handler, which might possibly result in entering
374
* idle mode, in other words, leaving the mode in which read-side critical
375
* sections can occur. The caller must have disabled interrupts.
376
*
377
* This code assumes that the idle loop never does anything that might
378
* result in unbalanced calls to irq_enter() and irq_exit(). If your
379
* architecture's idle loop violates this assumption, RCU will give you what
380
* you deserve, good and hard. But very infrequently and irreproducibly.
381
*
382
* Use things like work queues to work around this limitation.
383
*
384
* You have been warned.
385
*
386
* If you add or remove a call to ct_irq_exit(), be sure to test with
387
* CONFIG_RCU_EQS_DEBUG=y.
388
*/
389
noinstr void ct_irq_exit(void)
390
{
391
lockdep_assert_irqs_disabled();
392
ct_nmi_exit();
393
}
394
395
/*
396
* Wrapper for ct_irq_enter() where interrupts are enabled.
397
*
398
* If you add or remove a call to ct_irq_enter_irqson(), be sure to test
399
* with CONFIG_RCU_EQS_DEBUG=y.
400
*/
401
void ct_irq_enter_irqson(void)
402
{
403
unsigned long flags;
404
405
local_irq_save(flags);
406
ct_irq_enter();
407
local_irq_restore(flags);
408
}
409
410
/*
411
* Wrapper for ct_irq_exit() where interrupts are enabled.
412
*
413
* If you add or remove a call to ct_irq_exit_irqson(), be sure to test
414
* with CONFIG_RCU_EQS_DEBUG=y.
415
*/
416
void ct_irq_exit_irqson(void)
417
{
418
unsigned long flags;
419
420
local_irq_save(flags);
421
ct_irq_exit();
422
local_irq_restore(flags);
423
}
424
#else
425
static __always_inline void ct_kernel_exit(bool user, int offset) { }
426
static __always_inline void ct_kernel_enter(bool user, int offset) { }
427
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
428
429
#ifdef CONFIG_CONTEXT_TRACKING_USER
430
431
#define CREATE_TRACE_POINTS
432
#include <trace/events/context_tracking.h>
433
434
DEFINE_STATIC_KEY_FALSE_RO(context_tracking_key);
435
EXPORT_SYMBOL_GPL(context_tracking_key);
436
437
static noinstr bool context_tracking_recursion_enter(void)
438
{
439
int recursion;
440
441
recursion = __this_cpu_inc_return(context_tracking.recursion);
442
if (recursion == 1)
443
return true;
444
445
WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
446
__this_cpu_dec(context_tracking.recursion);
447
448
return false;
449
}
450
451
static __always_inline void context_tracking_recursion_exit(void)
452
{
453
__this_cpu_dec(context_tracking.recursion);
454
}
455
456
/**
457
* __ct_user_enter - Inform the context tracking that the CPU is going
458
* to enter user or guest space mode.
459
*
460
* @state: userspace context-tracking state to enter.
461
*
462
* This function must be called right before we switch from the kernel
463
* to user or guest space, when it's guaranteed the remaining kernel
464
* instructions to execute won't use any RCU read side critical section
465
* because this function sets RCU in extended quiescent state.
466
*/
467
void noinstr __ct_user_enter(enum ctx_state state)
468
{
469
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
470
lockdep_assert_irqs_disabled();
471
472
/* Kernel threads aren't supposed to go to userspace */
473
WARN_ON_ONCE(!current->mm);
474
475
if (!context_tracking_recursion_enter())
476
return;
477
478
if (__ct_state() != state) {
479
if (ct->active) {
480
/*
481
* At this stage, only low level arch entry code remains and
482
* then we'll run in userspace. We can assume there won't be
483
* any RCU read-side critical section until the next call to
484
* user_exit() or ct_irq_enter(). Let's remove RCU's dependency
485
* on the tick.
486
*/
487
if (state == CT_STATE_USER) {
488
instrumentation_begin();
489
trace_user_enter(0);
490
vtime_user_enter(current);
491
instrumentation_end();
492
}
493
/*
494
* Other than generic entry implementation, we may be past the last
495
* rescheduling opportunity in the entry code. Trigger a self IPI
496
* that will fire and reschedule once we resume in user/guest mode.
497
*/
498
rcu_irq_work_resched();
499
500
/*
501
* Enter RCU idle mode right before resuming userspace. No use of RCU
502
* is permitted between this call and rcu_eqs_exit(). This way the
503
* CPU doesn't need to maintain the tick for RCU maintenance purposes
504
* when the CPU runs in userspace.
505
*/
506
ct_kernel_exit(true, CT_RCU_WATCHING + state);
507
508
/*
509
* Special case if we only track user <-> kernel transitions for tickless
510
* cputime accounting but we don't support RCU extended quiescent state.
511
* In this we case we don't care about any concurrency/ordering.
512
*/
513
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
514
raw_atomic_set(&ct->state, state);
515
} else {
516
/*
517
* Even if context tracking is disabled on this CPU, because it's outside
518
* the full dynticks mask for example, we still have to keep track of the
519
* context transitions and states to prevent inconsistency on those of
520
* other CPUs.
521
* If a task triggers an exception in userspace, sleep on the exception
522
* handler and then migrate to another CPU, that new CPU must know where
523
* the exception returns by the time we call exception_exit().
524
* This information can only be provided by the previous CPU when it called
525
* exception_enter().
526
* OTOH we can spare the calls to vtime and RCU when context_tracking.active
527
* is false because we know that CPU is not tickless.
528
*/
529
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
530
/* Tracking for vtime only, no concurrent RCU EQS accounting */
531
raw_atomic_set(&ct->state, state);
532
} else {
533
/*
534
* Tracking for vtime and RCU EQS. Make sure we don't race
535
* with NMIs. OTOH we don't care about ordering here since
536
* RCU only requires CT_RCU_WATCHING increments to be fully
537
* ordered.
538
*/
539
raw_atomic_add(state, &ct->state);
540
}
541
}
542
}
543
context_tracking_recursion_exit();
544
}
545
EXPORT_SYMBOL_GPL(__ct_user_enter);
546
547
/*
548
* OBSOLETE:
549
* This function should be noinstr but the below local_irq_restore() is
550
* unsafe because it involves illegal RCU uses through tracing and lockdep.
551
* This is unlikely to be fixed as this function is obsolete. The preferred
552
* way is to call __context_tracking_enter() through user_enter_irqoff()
553
* or context_tracking_guest_enter(). It should be the arch entry code
554
* responsibility to call into context tracking with IRQs disabled.
555
*/
556
void ct_user_enter(enum ctx_state state)
557
{
558
unsigned long flags;
559
560
/*
561
* Some contexts may involve an exception occuring in an irq,
562
* leading to that nesting:
563
* ct_irq_enter() rcu_eqs_exit(true) rcu_eqs_enter(true) ct_irq_exit()
564
* This would mess up the dyntick_nesting count though. And rcu_irq_*()
565
* helpers are enough to protect RCU uses inside the exception. So
566
* just return immediately if we detect we are in an IRQ.
567
*/
568
if (in_interrupt())
569
return;
570
571
local_irq_save(flags);
572
__ct_user_enter(state);
573
local_irq_restore(flags);
574
}
575
NOKPROBE_SYMBOL(ct_user_enter);
576
EXPORT_SYMBOL_GPL(ct_user_enter);
577
578
/**
579
* user_enter_callable() - Unfortunate ASM callable version of user_enter() for
580
* archs that didn't manage to check the context tracking
581
* static key from low level code.
582
*
583
* This OBSOLETE function should be noinstr but it unsafely calls
584
* local_irq_restore(), involving illegal RCU uses through tracing and lockdep.
585
* This is unlikely to be fixed as this function is obsolete. The preferred
586
* way is to call user_enter_irqoff(). It should be the arch entry code
587
* responsibility to call into context tracking with IRQs disabled.
588
*/
589
void user_enter_callable(void)
590
{
591
user_enter();
592
}
593
NOKPROBE_SYMBOL(user_enter_callable);
594
595
/**
596
* __ct_user_exit - Inform the context tracking that the CPU is
597
* exiting user or guest mode and entering the kernel.
598
*
599
* @state: userspace context-tracking state being exited from.
600
*
601
* This function must be called after we entered the kernel from user or
602
* guest space before any use of RCU read side critical section. This
603
* potentially include any high level kernel code like syscalls, exceptions,
604
* signal handling, etc...
605
*
606
* This call supports re-entrancy. This way it can be called from any exception
607
* handler without needing to know if we came from userspace or not.
608
*/
609
void noinstr __ct_user_exit(enum ctx_state state)
610
{
611
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
612
613
if (!context_tracking_recursion_enter())
614
return;
615
616
if (__ct_state() == state) {
617
if (ct->active) {
618
/*
619
* Exit RCU idle mode while entering the kernel because it can
620
* run a RCU read side critical section anytime.
621
*/
622
ct_kernel_enter(true, CT_RCU_WATCHING - state);
623
if (state == CT_STATE_USER) {
624
instrumentation_begin();
625
vtime_user_exit(current);
626
trace_user_exit(0);
627
instrumentation_end();
628
}
629
630
/*
631
* Special case if we only track user <-> kernel transitions for tickless
632
* cputime accounting but we don't support RCU extended quiescent state.
633
* In this we case we don't care about any concurrency/ordering.
634
*/
635
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
636
raw_atomic_set(&ct->state, CT_STATE_KERNEL);
637
638
} else {
639
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
640
/* Tracking for vtime only, no concurrent RCU EQS accounting */
641
raw_atomic_set(&ct->state, CT_STATE_KERNEL);
642
} else {
643
/*
644
* Tracking for vtime and RCU EQS. Make sure we don't race
645
* with NMIs. OTOH we don't care about ordering here since
646
* RCU only requires CT_RCU_WATCHING increments to be fully
647
* ordered.
648
*/
649
raw_atomic_sub(state, &ct->state);
650
}
651
}
652
}
653
context_tracking_recursion_exit();
654
}
655
EXPORT_SYMBOL_GPL(__ct_user_exit);
656
657
/*
658
* OBSOLETE:
659
* This function should be noinstr but the below local_irq_save() is
660
* unsafe because it involves illegal RCU uses through tracing and lockdep.
661
* This is unlikely to be fixed as this function is obsolete. The preferred
662
* way is to call __context_tracking_exit() through user_exit_irqoff()
663
* or context_tracking_guest_exit(). It should be the arch entry code
664
* responsibility to call into context tracking with IRQs disabled.
665
*/
666
void ct_user_exit(enum ctx_state state)
667
{
668
unsigned long flags;
669
670
if (in_interrupt())
671
return;
672
673
local_irq_save(flags);
674
__ct_user_exit(state);
675
local_irq_restore(flags);
676
}
677
NOKPROBE_SYMBOL(ct_user_exit);
678
EXPORT_SYMBOL_GPL(ct_user_exit);
679
680
/**
681
* user_exit_callable() - Unfortunate ASM callable version of user_exit() for
682
* archs that didn't manage to check the context tracking
683
* static key from low level code.
684
*
685
* This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(),
686
* involving illegal RCU uses through tracing and lockdep. This is unlikely
687
* to be fixed as this function is obsolete. The preferred way is to call
688
* user_exit_irqoff(). It should be the arch entry code responsibility to
689
* call into context tracking with IRQs disabled.
690
*/
691
void user_exit_callable(void)
692
{
693
user_exit();
694
}
695
NOKPROBE_SYMBOL(user_exit_callable);
696
697
void __init ct_cpu_track_user(int cpu)
698
{
699
static __initdata bool initialized = false;
700
701
if (!per_cpu(context_tracking.active, cpu)) {
702
per_cpu(context_tracking.active, cpu) = true;
703
static_branch_inc(&context_tracking_key);
704
}
705
706
if (initialized)
707
return;
708
709
#ifdef CONFIG_HAVE_TIF_NOHZ
710
/*
711
* Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
712
* This assumes that init is the only task at this early boot stage.
713
*/
714
set_tsk_thread_flag(&init_task, TIF_NOHZ);
715
#endif
716
WARN_ON_ONCE(!tasklist_empty());
717
718
initialized = true;
719
}
720
721
#ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
722
void __init context_tracking_init(void)
723
{
724
int cpu;
725
726
for_each_possible_cpu(cpu)
727
ct_cpu_track_user(cpu);
728
}
729
#endif
730
731
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */
732
733