Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/mce/threshold.c
26516 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*
3
* Common corrected MCE threshold handler code:
4
*/
5
#include <linux/interrupt.h>
6
#include <linux/kernel.h>
7
8
#include <asm/irq_vectors.h>
9
#include <asm/traps.h>
10
#include <asm/apic.h>
11
#include <asm/mce.h>
12
#include <asm/trace/irq_vectors.h>
13
14
#include "internal.h"
15
16
static void default_threshold_interrupt(void)
17
{
18
pr_err("Unexpected threshold interrupt at vector %x\n",
19
THRESHOLD_APIC_VECTOR);
20
}
21
22
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
23
24
DEFINE_IDTENTRY_SYSVEC(sysvec_threshold)
25
{
26
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
27
inc_irq_stat(irq_threshold_count);
28
mce_threshold_vector();
29
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
30
apic_eoi();
31
}
32
33
DEFINE_PER_CPU(struct mca_storm_desc, storm_desc);
34
35
void mce_inherit_storm(unsigned int bank)
36
{
37
struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
38
39
/*
40
* Previous CPU owning this bank had put it into storm mode,
41
* but the precise history of that storm is unknown. Assume
42
* the worst (all recent polls of the bank found a valid error
43
* logged). This will avoid the new owner prematurely declaring
44
* the storm has ended.
45
*/
46
storm->banks[bank].history = ~0ull;
47
storm->banks[bank].timestamp = jiffies;
48
}
49
50
bool mce_get_storm_mode(void)
51
{
52
return __this_cpu_read(storm_desc.poll_mode);
53
}
54
55
void mce_set_storm_mode(bool storm)
56
{
57
__this_cpu_write(storm_desc.poll_mode, storm);
58
}
59
60
static void mce_handle_storm(unsigned int bank, bool on)
61
{
62
switch (boot_cpu_data.x86_vendor) {
63
case X86_VENDOR_INTEL:
64
mce_intel_handle_storm(bank, on);
65
break;
66
}
67
}
68
69
void cmci_storm_begin(unsigned int bank)
70
{
71
struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
72
73
__set_bit(bank, this_cpu_ptr(mce_poll_banks));
74
storm->banks[bank].in_storm_mode = true;
75
76
/*
77
* If this is the first bank on this CPU to enter storm mode
78
* start polling.
79
*/
80
if (++storm->stormy_bank_count == 1)
81
mce_timer_kick(true);
82
}
83
84
void cmci_storm_end(unsigned int bank)
85
{
86
struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
87
88
__clear_bit(bank, this_cpu_ptr(mce_poll_banks));
89
storm->banks[bank].history = 0;
90
storm->banks[bank].in_storm_mode = false;
91
92
/* If no banks left in storm mode, stop polling. */
93
if (!--storm->stormy_bank_count)
94
mce_timer_kick(false);
95
}
96
97
void mce_track_storm(struct mce *mce)
98
{
99
struct mca_storm_desc *storm = this_cpu_ptr(&storm_desc);
100
unsigned long now = jiffies, delta;
101
unsigned int shift = 1;
102
u64 history = 0;
103
104
/* No tracking needed for banks that do not support CMCI */
105
if (storm->banks[mce->bank].poll_only)
106
return;
107
108
/*
109
* When a bank is in storm mode it is polled once per second and
110
* the history mask will record about the last minute of poll results.
111
* If it is not in storm mode, then the bank is only checked when
112
* there is a CMCI interrupt. Check how long it has been since
113
* this bank was last checked, and adjust the amount of "shift"
114
* to apply to history.
115
*/
116
if (!storm->banks[mce->bank].in_storm_mode) {
117
delta = now - storm->banks[mce->bank].timestamp;
118
shift = (delta + HZ) / HZ;
119
}
120
121
/* If it has been a long time since the last poll, clear history. */
122
if (shift < NUM_HISTORY_BITS)
123
history = storm->banks[mce->bank].history << shift;
124
125
storm->banks[mce->bank].timestamp = now;
126
127
/* History keeps track of corrected errors. VAL=1 && UC=0 */
128
if ((mce->status & MCI_STATUS_VAL) && mce_is_correctable(mce))
129
history |= 1;
130
131
storm->banks[mce->bank].history = history;
132
133
if (storm->banks[mce->bank].in_storm_mode) {
134
if (history & GENMASK_ULL(STORM_END_POLL_THRESHOLD, 0))
135
return;
136
printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), mce->bank);
137
mce_handle_storm(mce->bank, false);
138
cmci_storm_end(mce->bank);
139
} else {
140
if (hweight64(history) < STORM_BEGIN_THRESHOLD)
141
return;
142
printk_deferred(KERN_NOTICE "CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), mce->bank);
143
mce_handle_storm(mce->bank, true);
144
cmci_storm_begin(mce->bank);
145
}
146
}
147
148