Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/powerpc/platforms/powernv/opal-hmi.c
26481 views
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
* OPAL hypervisor Maintenance interrupt handling support in PowerNV.
4
*
5
* Copyright 2014 IBM Corporation
6
* Author: Mahesh Salgaonkar <[email protected]>
7
*/
8
9
#undef DEBUG
10
11
#include <linux/kernel.h>
12
#include <linux/init.h>
13
#include <linux/of.h>
14
#include <linux/mm.h>
15
#include <linux/slab.h>
16
17
#include <asm/opal.h>
18
#include <asm/cputable.h>
19
#include <asm/machdep.h>
20
21
#include "powernv.h"
22
23
static int opal_hmi_handler_nb_init;
24
struct OpalHmiEvtNode {
25
struct list_head list;
26
struct OpalHMIEvent hmi_evt;
27
};
28
29
struct xstop_reason {
30
uint32_t xstop_reason;
31
const char *unit_failed;
32
const char *description;
33
};
34
35
static LIST_HEAD(opal_hmi_evt_list);
36
static DEFINE_SPINLOCK(opal_hmi_evt_lock);
37
38
static void print_core_checkstop_reason(const char *level,
39
struct OpalHMIEvent *hmi_evt)
40
{
41
int i;
42
static const struct xstop_reason xstop_reason[] = {
43
{ CORE_CHECKSTOP_IFU_REGFILE, "IFU",
44
"RegFile core check stop" },
45
{ CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" },
46
{ CORE_CHECKSTOP_PC_DURING_RECOV, "PC",
47
"Core checkstop during recovery" },
48
{ CORE_CHECKSTOP_ISU_REGFILE, "ISU",
49
"RegFile core check stop (mapper error)" },
50
{ CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" },
51
{ CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" },
52
{ CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" },
53
{ CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC",
54
"Recovery in maintenance mode" },
55
{ CORE_CHECKSTOP_LSU_REGFILE, "LSU",
56
"RegFile core check stop" },
57
{ CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC",
58
"Forward Progress Error" },
59
{ CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" },
60
{ CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" },
61
{ CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC",
62
"Hypervisor Resource error - core check stop" },
63
{ CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC",
64
"Hang Recovery Failed (core check stop)" },
65
{ CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC",
66
"Ambiguous Hang Detected (unknown source)" },
67
{ CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC",
68
"Debug Trigger Error inject" },
69
{ CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC",
70
"Hypervisor check stop via SPRC/SPRD" },
71
};
72
73
/* Validity check */
74
if (!hmi_evt->u.xstop_error.xstop_reason) {
75
printk("%s Unknown Core check stop.\n", level);
76
return;
77
}
78
79
printk("%s CPU PIR: %08x\n", level,
80
be32_to_cpu(hmi_evt->u.xstop_error.u.pir));
81
for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
82
if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
83
xstop_reason[i].xstop_reason)
84
printk("%s [Unit: %-3s] %s\n", level,
85
xstop_reason[i].unit_failed,
86
xstop_reason[i].description);
87
}
88
89
static void print_nx_checkstop_reason(const char *level,
90
struct OpalHMIEvent *hmi_evt)
91
{
92
int i;
93
static const struct xstop_reason xstop_reason[] = {
94
{ NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine",
95
"SHM invalid state error" },
96
{ NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine",
97
"DMA invalid state error bit 15" },
98
{ NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine",
99
"DMA invalid state error bit 16" },
100
{ NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine",
101
"Channel 0 invalid state error" },
102
{ NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine",
103
"Channel 1 invalid state error" },
104
{ NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine",
105
"Channel 2 invalid state error" },
106
{ NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine",
107
"Channel 3 invalid state error" },
108
{ NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine",
109
"Channel 4 invalid state error" },
110
{ NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine",
111
"Channel 5 invalid state error" },
112
{ NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine",
113
"Channel 6 invalid state error" },
114
{ NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine",
115
"Channel 7 invalid state error" },
116
{ NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine",
117
"UE error on CRB(CSB address, CCB)" },
118
{ NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine",
119
"SUE error on CRB(CSB address, CCB)" },
120
{ NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface",
121
"CRB Kill ISN received while holding ISN with UE error" },
122
};
123
124
/* Validity check */
125
if (!hmi_evt->u.xstop_error.xstop_reason) {
126
printk("%s Unknown NX check stop.\n", level);
127
return;
128
}
129
130
printk("%s NX checkstop on CHIP ID: %x\n", level,
131
be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
132
for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
133
if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
134
xstop_reason[i].xstop_reason)
135
printk("%s [Unit: %-3s] %s\n", level,
136
xstop_reason[i].unit_failed,
137
xstop_reason[i].description);
138
}
139
140
static void print_npu_checkstop_reason(const char *level,
141
struct OpalHMIEvent *hmi_evt)
142
{
143
uint8_t reason, reason_count, i;
144
145
/*
146
* We may not have a checkstop reason on some combination of
147
* hardware and/or skiboot version
148
*/
149
if (!hmi_evt->u.xstop_error.xstop_reason) {
150
printk("%s NPU checkstop on chip %x\n", level,
151
be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
152
return;
153
}
154
155
/*
156
* NPU2 has 3 FIRs. Reason encoded on a byte as:
157
* 2 bits for the FIR number
158
* 6 bits for the bit number
159
* It may be possible to find several reasons.
160
*
161
* We don't display a specific message per FIR bit as there
162
* are too many and most are meaningless without the workbook
163
* and/or hw team help anyway.
164
*/
165
reason_count = sizeof(hmi_evt->u.xstop_error.xstop_reason) /
166
sizeof(reason);
167
for (i = 0; i < reason_count; i++) {
168
reason = (hmi_evt->u.xstop_error.xstop_reason >> (8 * i)) & 0xFF;
169
if (reason)
170
printk("%s NPU checkstop on chip %x: FIR%d bit %d is set\n",
171
level,
172
be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id),
173
reason >> 6, reason & 0x3F);
174
}
175
}
176
177
static void print_checkstop_reason(const char *level,
178
struct OpalHMIEvent *hmi_evt)
179
{
180
uint8_t type = hmi_evt->u.xstop_error.xstop_type;
181
switch (type) {
182
case CHECKSTOP_TYPE_CORE:
183
print_core_checkstop_reason(level, hmi_evt);
184
break;
185
case CHECKSTOP_TYPE_NX:
186
print_nx_checkstop_reason(level, hmi_evt);
187
break;
188
case CHECKSTOP_TYPE_NPU:
189
print_npu_checkstop_reason(level, hmi_evt);
190
break;
191
default:
192
printk("%s Unknown Malfunction Alert of type %d\n",
193
level, type);
194
break;
195
}
196
}
197
198
static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
199
{
200
const char *level, *sevstr, *error_info;
201
static const char *hmi_error_types[] = {
202
"Malfunction Alert",
203
"Processor Recovery done",
204
"Processor recovery occurred again",
205
"Processor recovery occurred for masked error",
206
"Timer facility experienced an error",
207
"TFMR SPR is corrupted",
208
"UPS (Uninterrupted Power System) Overflow indication",
209
"An XSCOM operation failure",
210
"An XSCOM operation completed",
211
"SCOM has set a reserved FIR bit to cause recovery",
212
"Debug trigger has set a reserved FIR bit to cause recovery",
213
"A hypervisor resource error occurred",
214
"CAPP recovery process is in progress",
215
};
216
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
217
DEFAULT_RATELIMIT_BURST);
218
219
/* Print things out */
220
if (hmi_evt->version < OpalHMIEvt_V1) {
221
pr_err("HMI Interrupt, Unknown event version %d !\n",
222
hmi_evt->version);
223
return;
224
}
225
switch (hmi_evt->severity) {
226
case OpalHMI_SEV_NO_ERROR:
227
level = KERN_INFO;
228
sevstr = "Harmless";
229
break;
230
case OpalHMI_SEV_WARNING:
231
level = KERN_WARNING;
232
sevstr = "";
233
break;
234
case OpalHMI_SEV_ERROR_SYNC:
235
level = KERN_ERR;
236
sevstr = "Severe";
237
break;
238
case OpalHMI_SEV_FATAL:
239
default:
240
level = KERN_ERR;
241
sevstr = "Fatal";
242
break;
243
}
244
245
if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) {
246
printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
247
level, sevstr,
248
hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
249
"Recovered" : "Not recovered");
250
error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
251
hmi_error_types[hmi_evt->type]
252
: "Unknown";
253
printk("%s Error detail: %s\n", level, error_info);
254
printk("%s HMER: %016llx\n", level,
255
be64_to_cpu(hmi_evt->hmer));
256
if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
257
(hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
258
printk("%s TFMR: %016llx\n", level,
259
be64_to_cpu(hmi_evt->tfmr));
260
}
261
262
if (hmi_evt->version < OpalHMIEvt_V2)
263
return;
264
265
/* OpalHMIEvt_V2 and above provides reason for malfunction alert. */
266
if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT)
267
print_checkstop_reason(level, hmi_evt);
268
}
269
270
static void hmi_event_handler(struct work_struct *work)
271
{
272
unsigned long flags;
273
struct OpalHMIEvent *hmi_evt;
274
struct OpalHmiEvtNode *msg_node;
275
uint8_t disposition;
276
struct opal_msg msg;
277
int unrecoverable = 0;
278
279
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
280
while (!list_empty(&opal_hmi_evt_list)) {
281
msg_node = list_entry(opal_hmi_evt_list.next,
282
struct OpalHmiEvtNode, list);
283
list_del(&msg_node->list);
284
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
285
286
hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
287
print_hmi_event_info(hmi_evt);
288
disposition = hmi_evt->disposition;
289
kfree(msg_node);
290
291
/*
292
* Check if HMI event has been recovered or not. If not
293
* then kernel can't continue, we need to panic.
294
* But before we do that, display all the HMI event
295
* available on the list and set unrecoverable flag to 1.
296
*/
297
if (disposition != OpalHMI_DISPOSITION_RECOVERED)
298
unrecoverable = 1;
299
300
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
301
}
302
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
303
304
if (unrecoverable) {
305
/* Pull all HMI events from OPAL before we panic. */
306
while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
307
u32 type;
308
309
type = be32_to_cpu(msg.msg_type);
310
311
/* skip if not HMI event */
312
if (type != OPAL_MSG_HMI_EVT)
313
continue;
314
315
/* HMI event info starts from param[0] */
316
hmi_evt = (struct OpalHMIEvent *)&msg.params[0];
317
print_hmi_event_info(hmi_evt);
318
}
319
320
pnv_platform_error_reboot(NULL, "Unrecoverable HMI exception");
321
}
322
}
323
324
static DECLARE_WORK(hmi_event_work, hmi_event_handler);
325
/*
326
* opal_handle_hmi_event - notifier handler that queues up HMI events
327
* to be preocessed later.
328
*/
329
static int opal_handle_hmi_event(struct notifier_block *nb,
330
unsigned long msg_type, void *msg)
331
{
332
unsigned long flags;
333
struct OpalHMIEvent *hmi_evt;
334
struct opal_msg *hmi_msg = msg;
335
struct OpalHmiEvtNode *msg_node;
336
337
/* Sanity Checks */
338
if (msg_type != OPAL_MSG_HMI_EVT)
339
return 0;
340
341
/* HMI event info starts from param[0] */
342
hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
343
344
/* Delay the logging of HMI events to workqueue. */
345
msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
346
if (!msg_node) {
347
pr_err("HMI: out of memory, Opal message event not handled\n");
348
return -ENOMEM;
349
}
350
memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt));
351
352
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
353
list_add(&msg_node->list, &opal_hmi_evt_list);
354
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
355
356
schedule_work(&hmi_event_work);
357
return 0;
358
}
359
360
static struct notifier_block opal_hmi_handler_nb = {
361
.notifier_call = opal_handle_hmi_event,
362
.next = NULL,
363
.priority = 0,
364
};
365
366
int __init opal_hmi_handler_init(void)
367
{
368
int ret;
369
370
if (!opal_hmi_handler_nb_init) {
371
ret = opal_message_notifier_register(
372
OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
373
if (ret) {
374
pr_err("%s: Can't register OPAL event notifier (%d)\n",
375
__func__, ret);
376
return ret;
377
}
378
opal_hmi_handler_nb_init = 1;
379
}
380
return 0;
381
}
382
383