Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/s390/kernel/nmi.c
10817 views
1
/*
2
* Machine check handler
3
*
4
* Copyright IBM Corp. 2000,2009
5
* Author(s): Ingo Adlung <[email protected]>,
6
* Martin Schwidefsky <[email protected]>,
7
* Cornelia Huck <[email protected]>,
8
* Heiko Carstens <[email protected]>,
9
*/
10
11
#include <linux/kernel_stat.h>
12
#include <linux/init.h>
13
#include <linux/errno.h>
14
#include <linux/hardirq.h>
15
#include <linux/time.h>
16
#include <linux/module.h>
17
#include <asm/lowcore.h>
18
#include <asm/smp.h>
19
#include <asm/etr.h>
20
#include <asm/cputime.h>
21
#include <asm/nmi.h>
22
#include <asm/crw.h>
23
24
struct mcck_struct {
25
int kill_task;
26
int channel_report;
27
int warning;
28
unsigned long long mcck_code;
29
};
30
31
static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
32
33
static NORET_TYPE void s390_handle_damage(char *msg)
34
{
35
smp_send_stop();
36
disabled_wait((unsigned long) __builtin_return_address(0));
37
while (1);
38
}
39
40
/*
41
* Main machine check handler function. Will be called with interrupts enabled
42
* or disabled and machine checks enabled or disabled.
43
*/
44
void s390_handle_mcck(void)
45
{
46
unsigned long flags;
47
struct mcck_struct mcck;
48
49
/*
50
* Disable machine checks and get the current state of accumulated
51
* machine checks. Afterwards delete the old state and enable machine
52
* checks again.
53
*/
54
local_irq_save(flags);
55
local_mcck_disable();
56
mcck = __get_cpu_var(cpu_mcck);
57
memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
58
clear_thread_flag(TIF_MCCK_PENDING);
59
local_mcck_enable();
60
local_irq_restore(flags);
61
62
if (mcck.channel_report)
63
crw_handle_channel_report();
64
/*
65
* A warning may remain for a prolonged period on the bare iron.
66
* (actually until the machine is powered off, or the problem is gone)
67
* So we just stop listening for the WARNING MCH and avoid continuously
68
* being interrupted. One caveat is however, that we must do this per
69
* processor and cannot use the smp version of ctl_clear_bit().
70
* On VM we only get one interrupt per virtally presented machinecheck.
71
* Though one suffices, we may get one interrupt per (virtual) cpu.
72
*/
73
if (mcck.warning) { /* WARNING pending ? */
74
static int mchchk_wng_posted = 0;
75
76
/* Use single cpu clear, as we cannot handle smp here. */
77
__ctl_clear_bit(14, 24); /* Disable WARNING MCH */
78
if (xchg(&mchchk_wng_posted, 1) == 0)
79
kill_cad_pid(SIGPWR, 1);
80
}
81
if (mcck.kill_task) {
82
local_irq_enable();
83
printk(KERN_EMERG "mcck: Terminating task because of machine "
84
"malfunction (code 0x%016llx).\n", mcck.mcck_code);
85
printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
86
current->comm, current->pid);
87
do_exit(SIGSEGV);
88
}
89
}
90
EXPORT_SYMBOL_GPL(s390_handle_mcck);
91
92
/*
93
* returns 0 if all registers could be validated
94
* returns 1 otherwise
95
*/
96
static int notrace s390_revalidate_registers(struct mci *mci)
97
{
98
int kill_task;
99
u64 zero;
100
void *fpt_save_area, *fpt_creg_save_area;
101
102
kill_task = 0;
103
zero = 0;
104
105
if (!mci->gr) {
106
/*
107
* General purpose registers couldn't be restored and have
108
* unknown contents. Process needs to be terminated.
109
*/
110
kill_task = 1;
111
}
112
if (!mci->fp) {
113
/*
114
* Floating point registers can't be restored and
115
* therefore the process needs to be terminated.
116
*/
117
kill_task = 1;
118
}
119
#ifndef CONFIG_64BIT
120
asm volatile(
121
" ld 0,0(%0)\n"
122
" ld 2,8(%0)\n"
123
" ld 4,16(%0)\n"
124
" ld 6,24(%0)"
125
: : "a" (&S390_lowcore.floating_pt_save_area));
126
#endif
127
128
if (MACHINE_HAS_IEEE) {
129
#ifdef CONFIG_64BIT
130
fpt_save_area = &S390_lowcore.floating_pt_save_area;
131
fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
132
#else
133
fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
134
fpt_creg_save_area = fpt_save_area + 128;
135
#endif
136
if (!mci->fc) {
137
/*
138
* Floating point control register can't be restored.
139
* Task will be terminated.
140
*/
141
asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
142
kill_task = 1;
143
144
} else
145
asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
146
147
asm volatile(
148
" ld 0,0(%0)\n"
149
" ld 1,8(%0)\n"
150
" ld 2,16(%0)\n"
151
" ld 3,24(%0)\n"
152
" ld 4,32(%0)\n"
153
" ld 5,40(%0)\n"
154
" ld 6,48(%0)\n"
155
" ld 7,56(%0)\n"
156
" ld 8,64(%0)\n"
157
" ld 9,72(%0)\n"
158
" ld 10,80(%0)\n"
159
" ld 11,88(%0)\n"
160
" ld 12,96(%0)\n"
161
" ld 13,104(%0)\n"
162
" ld 14,112(%0)\n"
163
" ld 15,120(%0)\n"
164
: : "a" (fpt_save_area));
165
}
166
/* Revalidate access registers */
167
asm volatile(
168
" lam 0,15,0(%0)"
169
: : "a" (&S390_lowcore.access_regs_save_area));
170
if (!mci->ar) {
171
/*
172
* Access registers have unknown contents.
173
* Terminating task.
174
*/
175
kill_task = 1;
176
}
177
/* Revalidate control registers */
178
if (!mci->cr) {
179
/*
180
* Control registers have unknown contents.
181
* Can't recover and therefore stopping machine.
182
*/
183
s390_handle_damage("invalid control registers.");
184
} else {
185
#ifdef CONFIG_64BIT
186
asm volatile(
187
" lctlg 0,15,0(%0)"
188
: : "a" (&S390_lowcore.cregs_save_area));
189
#else
190
asm volatile(
191
" lctl 0,15,0(%0)"
192
: : "a" (&S390_lowcore.cregs_save_area));
193
#endif
194
}
195
/*
196
* We don't even try to revalidate the TOD register, since we simply
197
* can't write something sensible into that register.
198
*/
199
#ifdef CONFIG_64BIT
200
/*
201
* See if we can revalidate the TOD programmable register with its
202
* old contents (should be zero) otherwise set it to zero.
203
*/
204
if (!mci->pr)
205
asm volatile(
206
" sr 0,0\n"
207
" sckpf"
208
: : : "0", "cc");
209
else
210
asm volatile(
211
" l 0,0(%0)\n"
212
" sckpf"
213
: : "a" (&S390_lowcore.tod_progreg_save_area)
214
: "0", "cc");
215
#endif
216
/* Revalidate clock comparator register */
217
if (S390_lowcore.clock_comparator == -1)
218
set_clock_comparator(S390_lowcore.mcck_clock);
219
else
220
set_clock_comparator(S390_lowcore.clock_comparator);
221
/* Check if old PSW is valid */
222
if (!mci->wp)
223
/*
224
* Can't tell if we come from user or kernel mode
225
* -> stopping machine.
226
*/
227
s390_handle_damage("old psw invalid.");
228
229
if (!mci->ms || !mci->pm || !mci->ia)
230
kill_task = 1;
231
232
return kill_task;
233
}
234
235
#define MAX_IPD_COUNT 29
236
#define MAX_IPD_TIME (5 * 60 * USEC_PER_SEC) /* 5 minutes */
237
238
#define ED_STP_ISLAND 6 /* External damage STP island check */
239
#define ED_STP_SYNC 7 /* External damage STP sync check */
240
#define ED_ETR_SYNC 12 /* External damage ETR sync check */
241
#define ED_ETR_SWITCH 13 /* External damage ETR switch to local */
242
243
/*
244
* machine check handler.
245
*/
246
void notrace s390_do_machine_check(struct pt_regs *regs)
247
{
248
static int ipd_count;
249
static DEFINE_SPINLOCK(ipd_lock);
250
static unsigned long long last_ipd;
251
struct mcck_struct *mcck;
252
unsigned long long tmp;
253
struct mci *mci;
254
int umode;
255
256
nmi_enter();
257
s390_idle_check(regs, S390_lowcore.mcck_clock,
258
S390_lowcore.mcck_enter_timer);
259
kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++;
260
mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
261
mcck = &__get_cpu_var(cpu_mcck);
262
umode = user_mode(regs);
263
264
if (mci->sd) {
265
/* System damage -> stopping machine */
266
s390_handle_damage("received system damage machine check.");
267
}
268
if (mci->pd) {
269
if (mci->b) {
270
/* Processing backup -> verify if we can survive this */
271
u64 z_mcic, o_mcic, t_mcic;
272
#ifdef CONFIG_64BIT
273
z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
274
o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
275
1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
276
1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
277
1ULL<<16);
278
#else
279
z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
280
1ULL<<29);
281
o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
282
1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
283
1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
284
#endif
285
t_mcic = *(u64 *)mci;
286
287
if (((t_mcic & z_mcic) != 0) ||
288
((t_mcic & o_mcic) != o_mcic)) {
289
s390_handle_damage("processing backup machine "
290
"check with damage.");
291
}
292
293
/*
294
* Nullifying exigent condition, therefore we might
295
* retry this instruction.
296
*/
297
spin_lock(&ipd_lock);
298
tmp = get_clock();
299
if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
300
ipd_count++;
301
else
302
ipd_count = 1;
303
last_ipd = tmp;
304
if (ipd_count == MAX_IPD_COUNT)
305
s390_handle_damage("too many ipd retries.");
306
spin_unlock(&ipd_lock);
307
} else {
308
/* Processing damage -> stopping machine */
309
s390_handle_damage("received instruction processing "
310
"damage machine check.");
311
}
312
}
313
if (s390_revalidate_registers(mci)) {
314
if (umode) {
315
/*
316
* Couldn't restore all register contents while in
317
* user mode -> mark task for termination.
318
*/
319
mcck->kill_task = 1;
320
mcck->mcck_code = *(unsigned long long *) mci;
321
set_thread_flag(TIF_MCCK_PENDING);
322
} else {
323
/*
324
* Couldn't restore all register contents while in
325
* kernel mode -> stopping machine.
326
*/
327
s390_handle_damage("unable to revalidate registers.");
328
}
329
}
330
if (mci->cd) {
331
/* Timing facility damage */
332
s390_handle_damage("TOD clock damaged");
333
}
334
if (mci->ed && mci->ec) {
335
/* External damage */
336
if (S390_lowcore.external_damage_code & (1U << ED_ETR_SYNC))
337
etr_sync_check();
338
if (S390_lowcore.external_damage_code & (1U << ED_ETR_SWITCH))
339
etr_switch_to_local();
340
if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
341
stp_sync_check();
342
if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
343
stp_island_check();
344
}
345
if (mci->se)
346
/* Storage error uncorrected */
347
s390_handle_damage("received storage error uncorrected "
348
"machine check.");
349
if (mci->ke)
350
/* Storage key-error uncorrected */
351
s390_handle_damage("received storage key-error uncorrected "
352
"machine check.");
353
if (mci->ds && mci->fa)
354
/* Storage degradation */
355
s390_handle_damage("received storage degradation machine "
356
"check.");
357
if (mci->cp) {
358
/* Channel report word pending */
359
mcck->channel_report = 1;
360
set_thread_flag(TIF_MCCK_PENDING);
361
}
362
if (mci->w) {
363
/* Warning pending */
364
mcck->warning = 1;
365
set_thread_flag(TIF_MCCK_PENDING);
366
}
367
nmi_exit();
368
}
369
370
static int __init machine_check_init(void)
371
{
372
ctl_set_bit(14, 25); /* enable external damage MCH */
373
ctl_set_bit(14, 27); /* enable system recovery MCH */
374
ctl_set_bit(14, 24); /* enable warning MCH */
375
return 0;
376
}
377
arch_initcall(machine_check_init);
378
379