Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/kernel/cpu/mce/dev-mcelog.c
26516 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* /dev/mcelog driver
4
*
5
* K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
6
* Rest from unknown author(s).
7
* 2004 Andi Kleen. Rewrote most of it.
8
* Copyright 2008 Intel Corporation
9
* Author: Andi Kleen
10
*/
11
12
#include <linux/miscdevice.h>
13
#include <linux/slab.h>
14
#include <linux/kmod.h>
15
#include <linux/poll.h>
16
17
#include "internal.h"
18
19
static BLOCKING_NOTIFIER_HEAD(mce_injector_chain);
20
21
static DEFINE_MUTEX(mce_chrdev_read_mutex);
22
23
static char mce_helper[128];
24
static char *mce_helper_argv[2] = { mce_helper, NULL };
25
26
/*
27
* Lockless MCE logging infrastructure.
28
* This avoids deadlocks on printk locks without having to break locks. Also
29
* separate MCEs from kernel messages to avoid bogus bug reports.
30
*/
31
32
static struct mce_log_buffer *mcelog;
33
34
static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
35
36
static int dev_mce_log(struct notifier_block *nb, unsigned long val,
37
void *data)
38
{
39
struct mce *mce = (struct mce *)data;
40
unsigned int entry;
41
42
if (mce->kflags & MCE_HANDLED_CEC)
43
return NOTIFY_DONE;
44
45
mutex_lock(&mce_chrdev_read_mutex);
46
47
entry = mcelog->next;
48
49
/*
50
* When the buffer fills up discard new entries. Assume that the
51
* earlier errors are the more interesting ones:
52
*/
53
if (entry >= mcelog->len) {
54
set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags);
55
goto unlock;
56
}
57
58
mcelog->next = entry + 1;
59
60
memcpy(mcelog->entry + entry, mce, sizeof(struct mce));
61
mcelog->entry[entry].finished = 1;
62
mcelog->entry[entry].kflags = 0;
63
64
/* wake processes polling /dev/mcelog */
65
wake_up_interruptible(&mce_chrdev_wait);
66
67
unlock:
68
mutex_unlock(&mce_chrdev_read_mutex);
69
70
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
71
mce->kflags |= MCE_HANDLED_MCELOG;
72
73
return NOTIFY_OK;
74
}
75
76
static struct notifier_block dev_mcelog_nb = {
77
.notifier_call = dev_mce_log,
78
.priority = MCE_PRIO_MCELOG,
79
};
80
81
static void mce_do_trigger(struct work_struct *work)
82
{
83
call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
84
}
85
86
static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
87
88
89
void mce_work_trigger(void)
90
{
91
if (mce_helper[0])
92
schedule_work(&mce_trigger_work);
93
}
94
95
static ssize_t
96
show_trigger(struct device *s, struct device_attribute *attr, char *buf)
97
{
98
strcpy(buf, mce_helper);
99
strcat(buf, "\n");
100
return strlen(mce_helper) + 1;
101
}
102
103
static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
104
const char *buf, size_t siz)
105
{
106
char *p;
107
108
strscpy(mce_helper, buf, sizeof(mce_helper));
109
p = strchr(mce_helper, '\n');
110
111
if (p)
112
*p = 0;
113
114
return strlen(mce_helper) + !!p;
115
}
116
117
DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
118
119
/*
120
* mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
121
*/
122
123
static DEFINE_SPINLOCK(mce_chrdev_state_lock);
124
static int mce_chrdev_open_count; /* #times opened */
125
static int mce_chrdev_open_exclu; /* already open exclusive? */
126
127
static int mce_chrdev_open(struct inode *inode, struct file *file)
128
{
129
spin_lock(&mce_chrdev_state_lock);
130
131
if (mce_chrdev_open_exclu ||
132
(mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
133
spin_unlock(&mce_chrdev_state_lock);
134
135
return -EBUSY;
136
}
137
138
if (file->f_flags & O_EXCL)
139
mce_chrdev_open_exclu = 1;
140
mce_chrdev_open_count++;
141
142
spin_unlock(&mce_chrdev_state_lock);
143
144
return nonseekable_open(inode, file);
145
}
146
147
static int mce_chrdev_release(struct inode *inode, struct file *file)
148
{
149
spin_lock(&mce_chrdev_state_lock);
150
151
mce_chrdev_open_count--;
152
mce_chrdev_open_exclu = 0;
153
154
spin_unlock(&mce_chrdev_state_lock);
155
156
return 0;
157
}
158
159
static int mce_apei_read_done;
160
161
/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
162
static int __mce_read_apei(char __user **ubuf, size_t usize)
163
{
164
int rc;
165
u64 record_id;
166
struct mce m;
167
168
if (usize < sizeof(struct mce))
169
return -EINVAL;
170
171
rc = apei_read_mce(&m, &record_id);
172
/* Error or no more MCE record */
173
if (rc <= 0) {
174
mce_apei_read_done = 1;
175
/*
176
* When ERST is disabled, mce_chrdev_read() should return
177
* "no record" instead of "no device."
178
*/
179
if (rc == -ENODEV)
180
return 0;
181
return rc;
182
}
183
rc = -EFAULT;
184
if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
185
return rc;
186
/*
187
* In fact, we should have cleared the record after that has
188
* been flushed to the disk or sent to network in
189
* /sbin/mcelog, but we have no interface to support that now,
190
* so just clear it to avoid duplication.
191
*/
192
rc = apei_clear_mce(record_id);
193
if (rc) {
194
mce_apei_read_done = 1;
195
return rc;
196
}
197
*ubuf += sizeof(struct mce);
198
199
return 0;
200
}
201
202
static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
203
size_t usize, loff_t *off)
204
{
205
char __user *buf = ubuf;
206
unsigned next;
207
int i, err;
208
209
mutex_lock(&mce_chrdev_read_mutex);
210
211
if (!mce_apei_read_done) {
212
err = __mce_read_apei(&buf, usize);
213
if (err || buf != ubuf)
214
goto out;
215
}
216
217
/* Only supports full reads right now */
218
err = -EINVAL;
219
if (*off != 0 || usize < mcelog->len * sizeof(struct mce))
220
goto out;
221
222
next = mcelog->next;
223
err = 0;
224
225
for (i = 0; i < next; i++) {
226
struct mce *m = &mcelog->entry[i];
227
228
err |= copy_to_user(buf, m, sizeof(*m));
229
buf += sizeof(*m);
230
}
231
232
memset(mcelog->entry, 0, next * sizeof(struct mce));
233
mcelog->next = 0;
234
235
if (err)
236
err = -EFAULT;
237
238
out:
239
mutex_unlock(&mce_chrdev_read_mutex);
240
241
return err ? err : buf - ubuf;
242
}
243
244
static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
245
{
246
poll_wait(file, &mce_chrdev_wait, wait);
247
if (READ_ONCE(mcelog->next))
248
return EPOLLIN | EPOLLRDNORM;
249
if (!mce_apei_read_done && apei_check_mce())
250
return EPOLLIN | EPOLLRDNORM;
251
return 0;
252
}
253
254
static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
255
unsigned long arg)
256
{
257
int __user *p = (int __user *)arg;
258
259
if (!capable(CAP_SYS_ADMIN))
260
return -EPERM;
261
262
switch (cmd) {
263
case MCE_GET_RECORD_LEN:
264
return put_user(sizeof(struct mce), p);
265
case MCE_GET_LOG_LEN:
266
return put_user(mcelog->len, p);
267
case MCE_GETCLEAR_FLAGS:
268
return put_user(xchg(&mcelog->flags, 0), p);
269
default:
270
return -ENOTTY;
271
}
272
}
273
274
void mce_register_injector_chain(struct notifier_block *nb)
275
{
276
blocking_notifier_chain_register(&mce_injector_chain, nb);
277
}
278
EXPORT_SYMBOL_GPL(mce_register_injector_chain);
279
280
void mce_unregister_injector_chain(struct notifier_block *nb)
281
{
282
blocking_notifier_chain_unregister(&mce_injector_chain, nb);
283
}
284
EXPORT_SYMBOL_GPL(mce_unregister_injector_chain);
285
286
static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
287
size_t usize, loff_t *off)
288
{
289
struct mce m;
290
291
if (!capable(CAP_SYS_ADMIN))
292
return -EPERM;
293
/*
294
* There are some cases where real MSR reads could slip
295
* through.
296
*/
297
if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
298
return -EIO;
299
300
if ((unsigned long)usize > sizeof(struct mce))
301
usize = sizeof(struct mce);
302
if (copy_from_user(&m, ubuf, usize))
303
return -EFAULT;
304
305
if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
306
return -EINVAL;
307
308
/*
309
* Need to give user space some time to set everything up,
310
* so do it a jiffy or two later everywhere.
311
*/
312
schedule_timeout(2);
313
314
blocking_notifier_call_chain(&mce_injector_chain, 0, &m);
315
316
return usize;
317
}
318
319
static const struct file_operations mce_chrdev_ops = {
320
.open = mce_chrdev_open,
321
.release = mce_chrdev_release,
322
.read = mce_chrdev_read,
323
.write = mce_chrdev_write,
324
.poll = mce_chrdev_poll,
325
.unlocked_ioctl = mce_chrdev_ioctl,
326
.compat_ioctl = compat_ptr_ioctl,
327
};
328
329
static struct miscdevice mce_chrdev_device = {
330
MISC_MCELOG_MINOR,
331
"mcelog",
332
&mce_chrdev_ops,
333
};
334
335
static __init int dev_mcelog_init_device(void)
336
{
337
int mce_log_len;
338
int err;
339
340
mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus());
341
mcelog = kzalloc(struct_size(mcelog, entry, mce_log_len), GFP_KERNEL);
342
if (!mcelog)
343
return -ENOMEM;
344
345
memcpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature));
346
mcelog->len = mce_log_len;
347
mcelog->recordlen = sizeof(struct mce);
348
349
/* register character device /dev/mcelog */
350
err = misc_register(&mce_chrdev_device);
351
if (err) {
352
if (err == -EBUSY)
353
/* Xen dom0 might have registered the device already. */
354
pr_info("Unable to init device /dev/mcelog, already registered");
355
else
356
pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
357
358
kfree(mcelog);
359
return err;
360
}
361
362
mce_register_decode_chain(&dev_mcelog_nb);
363
return 0;
364
}
365
device_initcall_sync(dev_mcelog_init_device);
366
367