Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/drivers/char/ipmi/ipmi_watchdog.c
26282 views
1
// SPDX-License-Identifier: GPL-2.0+
2
/*
3
* ipmi_watchdog.c
4
*
5
* A watchdog timer based upon the IPMI interface.
6
*
7
* Author: MontaVista Software, Inc.
8
* Corey Minyard <[email protected]>
9
* [email protected]
10
*
11
* Copyright 2002 MontaVista Software Inc.
12
*/
13
14
#define pr_fmt(fmt) "IPMI Watchdog: " fmt
15
16
#include <linux/module.h>
17
#include <linux/moduleparam.h>
18
#include <linux/ipmi.h>
19
#include <linux/ipmi_smi.h>
20
#include <linux/mutex.h>
21
#include <linux/watchdog.h>
22
#include <linux/miscdevice.h>
23
#include <linux/init.h>
24
#include <linux/completion.h>
25
#include <linux/kdebug.h>
26
#include <linux/kstrtox.h>
27
#include <linux/rwsem.h>
28
#include <linux/errno.h>
29
#include <linux/uaccess.h>
30
#include <linux/notifier.h>
31
#include <linux/nmi.h>
32
#include <linux/reboot.h>
33
#include <linux/wait.h>
34
#include <linux/poll.h>
35
#include <linux/string.h>
36
#include <linux/ctype.h>
37
#include <linux/delay.h>
38
#include <linux/atomic.h>
39
#include <linux/sched/signal.h>
40
41
#ifdef CONFIG_X86
42
/*
43
* This is ugly, but I've determined that x86 is the only architecture
44
* that can reasonably support the IPMI NMI watchdog timeout at this
45
* time. If another architecture adds this capability somehow, it
46
* will have to be a somewhat different mechanism and I have no idea
47
* how it will work. So in the unlikely event that another
48
* architecture supports this, we can figure out a good generic
49
* mechanism for it at that time.
50
*/
51
#include <asm/kdebug.h>
52
#include <asm/nmi.h>
53
#define HAVE_DIE_NMI
54
#endif
55
56
/*
57
* The IPMI command/response information for the watchdog timer.
58
*/
59
60
/* values for byte 1 of the set command, byte 2 of the get response. */
61
#define WDOG_DONT_LOG (1 << 7)
62
#define WDOG_DONT_STOP_ON_SET (1 << 6)
63
#define WDOG_SET_TIMER_USE(byte, use) \
64
byte = ((byte) & 0xf8) | ((use) & 0x7)
65
#define WDOG_GET_TIMER_USE(byte) ((byte) & 0x7)
66
#define WDOG_TIMER_USE_BIOS_FRB2 1
67
#define WDOG_TIMER_USE_BIOS_POST 2
68
#define WDOG_TIMER_USE_OS_LOAD 3
69
#define WDOG_TIMER_USE_SMS_OS 4
70
#define WDOG_TIMER_USE_OEM 5
71
72
/* values for byte 2 of the set command, byte 3 of the get response. */
73
#define WDOG_SET_PRETIMEOUT_ACT(byte, use) \
74
byte = ((byte) & 0x8f) | (((use) & 0x7) << 4)
75
#define WDOG_GET_PRETIMEOUT_ACT(byte) (((byte) >> 4) & 0x7)
76
#define WDOG_PRETIMEOUT_NONE 0
77
#define WDOG_PRETIMEOUT_SMI 1
78
#define WDOG_PRETIMEOUT_NMI 2
79
#define WDOG_PRETIMEOUT_MSG_INT 3
80
81
/* Operations that can be performed on a pretimout. */
82
#define WDOG_PREOP_NONE 0
83
#define WDOG_PREOP_PANIC 1
84
/* Cause data to be available to read. Doesn't work in NMI mode. */
85
#define WDOG_PREOP_GIVE_DATA 2
86
87
/* Actions to perform on a full timeout. */
88
#define WDOG_SET_TIMEOUT_ACT(byte, use) \
89
byte = ((byte) & 0xf8) | ((use) & 0x7)
90
#define WDOG_GET_TIMEOUT_ACT(byte) ((byte) & 0x7)
91
#define WDOG_TIMEOUT_NONE 0
92
#define WDOG_TIMEOUT_RESET 1
93
#define WDOG_TIMEOUT_POWER_DOWN 2
94
#define WDOG_TIMEOUT_POWER_CYCLE 3
95
96
/*
97
* Byte 3 of the get command, byte 4 of the get response is the
98
* pre-timeout in seconds.
99
*/
100
101
/* Bits for setting byte 4 of the set command, byte 5 of the get response. */
102
#define WDOG_EXPIRE_CLEAR_BIOS_FRB2 (1 << 1)
103
#define WDOG_EXPIRE_CLEAR_BIOS_POST (1 << 2)
104
#define WDOG_EXPIRE_CLEAR_OS_LOAD (1 << 3)
105
#define WDOG_EXPIRE_CLEAR_SMS_OS (1 << 4)
106
#define WDOG_EXPIRE_CLEAR_OEM (1 << 5)
107
108
/*
109
* Setting/getting the watchdog timer value. This is for bytes 5 and
110
* 6 (the timeout time) of the set command, and bytes 6 and 7 (the
111
* timeout time) and 8 and 9 (the current countdown value) of the
112
* response. The timeout value is given in seconds (in the command it
113
* is 100ms intervals).
114
*/
115
#define WDOG_SET_TIMEOUT(byte1, byte2, val) \
116
(byte1) = (((val) * 10) & 0xff), (byte2) = (((val) * 10) >> 8)
117
#define WDOG_GET_TIMEOUT(byte1, byte2) \
118
(((byte1) | ((byte2) << 8)) / 10)
119
120
#define IPMI_WDOG_RESET_TIMER 0x22
121
#define IPMI_WDOG_SET_TIMER 0x24
122
#define IPMI_WDOG_GET_TIMER 0x25
123
124
#define IPMI_WDOG_TIMER_NOT_INIT_RESP 0x80
125
126
static DEFINE_MUTEX(ipmi_watchdog_mutex);
127
static bool nowayout = WATCHDOG_NOWAYOUT;
128
129
static struct ipmi_user *watchdog_user;
130
static int watchdog_ifnum;
131
132
/* Default the timeout to 10 seconds. */
133
static int timeout = 10;
134
135
/* The pre-timeout is disabled by default. */
136
static int pretimeout;
137
138
/* Default timeout to set on panic */
139
static int panic_wdt_timeout = 255;
140
141
/* Default action is to reset the board on a timeout. */
142
static unsigned char action_val = WDOG_TIMEOUT_RESET;
143
144
static char action[16] = "reset";
145
146
static unsigned char preaction_val = WDOG_PRETIMEOUT_NONE;
147
148
static char preaction[16] = "pre_none";
149
150
static unsigned char preop_val = WDOG_PREOP_NONE;
151
152
static char preop[16] = "preop_none";
153
static DEFINE_MUTEX(ipmi_read_mutex);
154
static char data_to_read;
155
static DECLARE_WAIT_QUEUE_HEAD(read_q);
156
static struct fasync_struct *fasync_q;
157
static atomic_t pretimeout_since_last_heartbeat;
158
static char expect_close;
159
160
static int ifnum_to_use = -1;
161
162
/* Parameters to ipmi_set_timeout */
163
#define IPMI_SET_TIMEOUT_NO_HB 0
164
#define IPMI_SET_TIMEOUT_HB_IF_NECESSARY 1
165
#define IPMI_SET_TIMEOUT_FORCE_HB 2
166
167
static int ipmi_set_timeout(int do_heartbeat);
168
static void ipmi_register_watchdog(int ipmi_intf);
169
static void ipmi_unregister_watchdog(int ipmi_intf);
170
171
/*
172
* If true, the driver will start running as soon as it is configured
173
* and ready.
174
*/
175
static int start_now;
176
177
static int set_param_timeout(const char *val, const struct kernel_param *kp)
178
{
179
char *endp;
180
int l;
181
int rv = 0;
182
183
if (!val)
184
return -EINVAL;
185
l = simple_strtoul(val, &endp, 0);
186
if (endp == val)
187
return -EINVAL;
188
189
*((int *)kp->arg) = l;
190
if (watchdog_user)
191
rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
192
193
return rv;
194
}
195
196
static const struct kernel_param_ops param_ops_timeout = {
197
.set = set_param_timeout,
198
.get = param_get_int,
199
};
200
#define param_check_timeout param_check_int
201
202
typedef int (*action_fn)(const char *intval, char *outval);
203
204
static int action_op(const char *inval, char *outval);
205
static int preaction_op(const char *inval, char *outval);
206
static int preop_op(const char *inval, char *outval);
207
static void check_parms(void);
208
209
static int set_param_str(const char *val, const struct kernel_param *kp)
210
{
211
action_fn fn = (action_fn) kp->arg;
212
int rv = 0;
213
char valcp[16];
214
char *s;
215
216
strscpy(valcp, val, 16);
217
218
s = strstrip(valcp);
219
220
rv = fn(s, NULL);
221
if (rv)
222
goto out;
223
224
check_parms();
225
if (watchdog_user)
226
rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
227
228
out:
229
return rv;
230
}
231
232
static int get_param_str(char *buffer, const struct kernel_param *kp)
233
{
234
action_fn fn = (action_fn) kp->arg;
235
int rv, len;
236
237
rv = fn(NULL, buffer);
238
if (rv)
239
return rv;
240
241
len = strlen(buffer);
242
buffer[len++] = '\n';
243
buffer[len] = 0;
244
245
return len;
246
}
247
248
249
static int set_param_wdog_ifnum(const char *val, const struct kernel_param *kp)
250
{
251
int rv = param_set_int(val, kp);
252
if (rv)
253
return rv;
254
if ((ifnum_to_use < 0) || (ifnum_to_use == watchdog_ifnum))
255
return 0;
256
257
ipmi_unregister_watchdog(watchdog_ifnum);
258
ipmi_register_watchdog(ifnum_to_use);
259
return 0;
260
}
261
262
static const struct kernel_param_ops param_ops_wdog_ifnum = {
263
.set = set_param_wdog_ifnum,
264
.get = param_get_int,
265
};
266
267
#define param_check_wdog_ifnum param_check_int
268
269
static const struct kernel_param_ops param_ops_str = {
270
.set = set_param_str,
271
.get = get_param_str,
272
};
273
274
module_param(ifnum_to_use, wdog_ifnum, 0644);
275
MODULE_PARM_DESC(ifnum_to_use, "The interface number to use for the watchdog "
276
"timer. Setting to -1 defaults to the first registered "
277
"interface");
278
279
module_param(timeout, timeout, 0644);
280
MODULE_PARM_DESC(timeout, "Timeout value in seconds.");
281
282
module_param(pretimeout, timeout, 0644);
283
MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds.");
284
285
module_param(panic_wdt_timeout, timeout, 0644);
286
MODULE_PARM_DESC(panic_wdt_timeout, "Timeout value on kernel panic in seconds.");
287
288
module_param_cb(action, &param_ops_str, action_op, 0644);
289
MODULE_PARM_DESC(action, "Timeout action. One of: "
290
"reset, none, power_cycle, power_off.");
291
292
module_param_cb(preaction, &param_ops_str, preaction_op, 0644);
293
MODULE_PARM_DESC(preaction, "Pretimeout action. One of: "
294
"pre_none, pre_smi, pre_nmi, pre_int.");
295
296
module_param_cb(preop, &param_ops_str, preop_op, 0644);
297
MODULE_PARM_DESC(preop, "Pretimeout driver operation. One of: "
298
"preop_none, preop_panic, preop_give_data.");
299
300
module_param(start_now, int, 0444);
301
MODULE_PARM_DESC(start_now, "Set to 1 to start the watchdog as"
302
"soon as the driver is loaded.");
303
304
module_param(nowayout, bool, 0644);
305
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
306
"(default=CONFIG_WATCHDOG_NOWAYOUT)");
307
308
/* Default state of the timer. */
309
static unsigned char ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
310
311
/* Is someone using the watchdog? Only one user is allowed. */
312
static unsigned long ipmi_wdog_open;
313
314
/*
315
* If set to 1, the heartbeat command will set the state to reset and
316
* start the timer. The timer doesn't normally run when the driver is
317
* first opened until the heartbeat is set the first time, this
318
* variable is used to accomplish this.
319
*/
320
static int ipmi_start_timer_on_heartbeat;
321
322
/* IPMI version of the BMC. */
323
static unsigned char ipmi_version_major;
324
static unsigned char ipmi_version_minor;
325
326
/* If a pretimeout occurs, this is used to allow only one panic to happen. */
327
static atomic_t preop_panic_excl = ATOMIC_INIT(-1);
328
329
#ifdef HAVE_DIE_NMI
330
static int testing_nmi;
331
static int nmi_handler_registered;
332
#endif
333
334
static int __ipmi_heartbeat(void);
335
336
/*
337
* We use a mutex to make sure that only one thing can send a set a
338
* message at one time. The mutex is claimed when a message is sent
339
* and freed when both the send and receive messages are free.
340
*/
341
static atomic_t msg_tofree = ATOMIC_INIT(0);
342
static DECLARE_COMPLETION(msg_wait);
343
static void msg_free_smi(struct ipmi_smi_msg *msg)
344
{
345
if (atomic_dec_and_test(&msg_tofree)) {
346
if (!oops_in_progress)
347
complete(&msg_wait);
348
}
349
}
350
static void msg_free_recv(struct ipmi_recv_msg *msg)
351
{
352
if (atomic_dec_and_test(&msg_tofree)) {
353
if (!oops_in_progress)
354
complete(&msg_wait);
355
}
356
}
357
static struct ipmi_smi_msg smi_msg = INIT_IPMI_SMI_MSG(msg_free_smi);
358
static struct ipmi_recv_msg recv_msg = INIT_IPMI_RECV_MSG(msg_free_recv);
359
360
static int __ipmi_set_timeout(struct ipmi_smi_msg *smi_msg,
361
struct ipmi_recv_msg *recv_msg,
362
int *send_heartbeat_now)
363
{
364
struct kernel_ipmi_msg msg;
365
unsigned char data[6];
366
int rv = 0;
367
struct ipmi_system_interface_addr addr;
368
int hbnow = 0;
369
370
371
data[0] = 0;
372
WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS);
373
374
if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
375
if ((ipmi_version_major > 1) ||
376
((ipmi_version_major == 1) && (ipmi_version_minor >= 5))) {
377
/* This is an IPMI 1.5-only feature. */
378
data[0] |= WDOG_DONT_STOP_ON_SET;
379
} else {
380
/*
381
* In ipmi 1.0, setting the timer stops the watchdog, we
382
* need to start it back up again.
383
*/
384
hbnow = 1;
385
}
386
}
387
388
data[1] = 0;
389
WDOG_SET_TIMEOUT_ACT(data[1], ipmi_watchdog_state);
390
if ((pretimeout > 0) && (ipmi_watchdog_state != WDOG_TIMEOUT_NONE)) {
391
WDOG_SET_PRETIMEOUT_ACT(data[1], preaction_val);
392
data[2] = pretimeout;
393
} else {
394
WDOG_SET_PRETIMEOUT_ACT(data[1], WDOG_PRETIMEOUT_NONE);
395
data[2] = 0; /* No pretimeout. */
396
}
397
data[3] = 0;
398
WDOG_SET_TIMEOUT(data[4], data[5], timeout);
399
400
addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
401
addr.channel = IPMI_BMC_CHANNEL;
402
addr.lun = 0;
403
404
msg.netfn = 0x06;
405
msg.cmd = IPMI_WDOG_SET_TIMER;
406
msg.data = data;
407
msg.data_len = sizeof(data);
408
if (smi_msg)
409
rv = ipmi_request_supply_msgs(watchdog_user,
410
(struct ipmi_addr *) &addr,
411
0,
412
&msg,
413
NULL,
414
smi_msg,
415
recv_msg,
416
1);
417
else
418
ipmi_panic_request_and_wait(watchdog_user,
419
(struct ipmi_addr *) &addr, &msg);
420
if (rv)
421
pr_warn("set timeout error: %d\n", rv);
422
else if (send_heartbeat_now)
423
*send_heartbeat_now = hbnow;
424
425
return rv;
426
}
427
428
static int _ipmi_set_timeout(int do_heartbeat)
429
{
430
int send_heartbeat_now;
431
int rv;
432
433
if (!watchdog_user)
434
return -ENODEV;
435
436
atomic_set(&msg_tofree, 2);
437
438
rv = __ipmi_set_timeout(&smi_msg, &recv_msg, &send_heartbeat_now);
439
if (rv) {
440
atomic_set(&msg_tofree, 0);
441
return rv;
442
}
443
444
wait_for_completion(&msg_wait);
445
446
if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB)
447
|| ((send_heartbeat_now)
448
&& (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY)))
449
rv = __ipmi_heartbeat();
450
451
return rv;
452
}
453
454
static int ipmi_set_timeout(int do_heartbeat)
455
{
456
int rv;
457
458
mutex_lock(&ipmi_watchdog_mutex);
459
rv = _ipmi_set_timeout(do_heartbeat);
460
mutex_unlock(&ipmi_watchdog_mutex);
461
462
return rv;
463
}
464
465
static void panic_halt_ipmi_heartbeat(void)
466
{
467
struct kernel_ipmi_msg msg;
468
struct ipmi_system_interface_addr addr;
469
470
/*
471
* Don't reset the timer if we have the timer turned off, that
472
* re-enables the watchdog.
473
*/
474
if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
475
return;
476
477
addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
478
addr.channel = IPMI_BMC_CHANNEL;
479
addr.lun = 0;
480
481
msg.netfn = 0x06;
482
msg.cmd = IPMI_WDOG_RESET_TIMER;
483
msg.data = NULL;
484
msg.data_len = 0;
485
ipmi_panic_request_and_wait(watchdog_user, (struct ipmi_addr *) &addr,
486
&msg);
487
}
488
489
/*
490
* Special call, doesn't claim any locks. This is only to be called
491
* at panic or halt time, in run-to-completion mode, when the caller
492
* is the only CPU and the only thing that will be going is these IPMI
493
* calls.
494
*/
495
static void panic_halt_ipmi_set_timeout(void)
496
{
497
int send_heartbeat_now;
498
int rv;
499
500
rv = __ipmi_set_timeout(NULL, NULL, &send_heartbeat_now);
501
if (rv) {
502
pr_warn("Unable to extend the watchdog timeout\n");
503
} else {
504
if (send_heartbeat_now)
505
panic_halt_ipmi_heartbeat();
506
}
507
}
508
509
static int __ipmi_heartbeat(void)
510
{
511
struct kernel_ipmi_msg msg;
512
int rv;
513
struct ipmi_system_interface_addr addr;
514
int timeout_retries = 0;
515
516
restart:
517
/*
518
* Don't reset the timer if we have the timer turned off, that
519
* re-enables the watchdog.
520
*/
521
if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
522
return 0;
523
524
atomic_set(&msg_tofree, 2);
525
526
addr.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE;
527
addr.channel = IPMI_BMC_CHANNEL;
528
addr.lun = 0;
529
530
msg.netfn = 0x06;
531
msg.cmd = IPMI_WDOG_RESET_TIMER;
532
msg.data = NULL;
533
msg.data_len = 0;
534
rv = ipmi_request_supply_msgs(watchdog_user,
535
(struct ipmi_addr *) &addr,
536
0,
537
&msg,
538
NULL,
539
&smi_msg,
540
&recv_msg,
541
1);
542
if (rv) {
543
atomic_set(&msg_tofree, 0);
544
pr_warn("heartbeat send failure: %d\n", rv);
545
return rv;
546
}
547
548
/* Wait for the heartbeat to be sent. */
549
wait_for_completion(&msg_wait);
550
551
if (recv_msg.msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP) {
552
timeout_retries++;
553
if (timeout_retries > 3) {
554
pr_err("Unable to restore the IPMI watchdog's settings, giving up\n");
555
rv = -EIO;
556
goto out;
557
}
558
559
/*
560
* The timer was not initialized, that means the BMC was
561
* probably reset and lost the watchdog information. Attempt
562
* to restore the timer's info. Note that we still hold
563
* the heartbeat lock, to keep a heartbeat from happening
564
* in this process, so must say no heartbeat to avoid a
565
* deadlock on this mutex
566
*/
567
rv = _ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
568
if (rv) {
569
pr_err("Unable to send the command to set the watchdog's settings, giving up\n");
570
goto out;
571
}
572
573
/* Might need a heartbeat send, go ahead and do it. */
574
goto restart;
575
} else if (recv_msg.msg.data[0] != 0) {
576
/*
577
* Got an error in the heartbeat response. It was already
578
* reported in ipmi_wdog_msg_handler, but we should return
579
* an error here.
580
*/
581
rv = -EINVAL;
582
}
583
584
out:
585
return rv;
586
}
587
588
static int _ipmi_heartbeat(void)
589
{
590
int rv;
591
592
if (!watchdog_user)
593
return -ENODEV;
594
595
if (ipmi_start_timer_on_heartbeat) {
596
ipmi_start_timer_on_heartbeat = 0;
597
ipmi_watchdog_state = action_val;
598
rv = _ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
599
} else if (atomic_cmpxchg(&pretimeout_since_last_heartbeat, 1, 0)) {
600
/*
601
* A pretimeout occurred, make sure we set the timeout.
602
* We don't want to set the action, though, we want to
603
* leave that alone (thus it can't be combined with the
604
* above operation.
605
*/
606
rv = _ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
607
} else {
608
rv = __ipmi_heartbeat();
609
}
610
611
return rv;
612
}
613
614
static int ipmi_heartbeat(void)
615
{
616
int rv;
617
618
mutex_lock(&ipmi_watchdog_mutex);
619
rv = _ipmi_heartbeat();
620
mutex_unlock(&ipmi_watchdog_mutex);
621
622
return rv;
623
}
624
625
static const struct watchdog_info ident = {
626
.options = 0, /* WDIOF_SETTIMEOUT, */
627
.firmware_version = 1,
628
.identity = "IPMI"
629
};
630
631
static int ipmi_ioctl(struct file *file,
632
unsigned int cmd, unsigned long arg)
633
{
634
void __user *argp = (void __user *)arg;
635
int i;
636
int val;
637
638
switch (cmd) {
639
case WDIOC_GETSUPPORT:
640
i = copy_to_user(argp, &ident, sizeof(ident));
641
return i ? -EFAULT : 0;
642
643
case WDIOC_SETTIMEOUT:
644
i = copy_from_user(&val, argp, sizeof(int));
645
if (i)
646
return -EFAULT;
647
timeout = val;
648
return _ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
649
650
case WDIOC_GETTIMEOUT:
651
i = copy_to_user(argp, &timeout, sizeof(timeout));
652
if (i)
653
return -EFAULT;
654
return 0;
655
656
case WDIOC_SETPRETIMEOUT:
657
i = copy_from_user(&val, argp, sizeof(int));
658
if (i)
659
return -EFAULT;
660
pretimeout = val;
661
return _ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
662
663
case WDIOC_GETPRETIMEOUT:
664
i = copy_to_user(argp, &pretimeout, sizeof(pretimeout));
665
if (i)
666
return -EFAULT;
667
return 0;
668
669
case WDIOC_KEEPALIVE:
670
return _ipmi_heartbeat();
671
672
case WDIOC_SETOPTIONS:
673
i = copy_from_user(&val, argp, sizeof(int));
674
if (i)
675
return -EFAULT;
676
if (val & WDIOS_DISABLECARD) {
677
ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
678
_ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
679
ipmi_start_timer_on_heartbeat = 0;
680
}
681
682
if (val & WDIOS_ENABLECARD) {
683
ipmi_watchdog_state = action_val;
684
_ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
685
}
686
return 0;
687
688
case WDIOC_GETSTATUS:
689
val = 0;
690
i = copy_to_user(argp, &val, sizeof(val));
691
if (i)
692
return -EFAULT;
693
return 0;
694
695
default:
696
return -ENOIOCTLCMD;
697
}
698
}
699
700
static long ipmi_unlocked_ioctl(struct file *file,
701
unsigned int cmd,
702
unsigned long arg)
703
{
704
int ret;
705
706
mutex_lock(&ipmi_watchdog_mutex);
707
ret = ipmi_ioctl(file, cmd, arg);
708
mutex_unlock(&ipmi_watchdog_mutex);
709
710
return ret;
711
}
712
713
static ssize_t ipmi_write(struct file *file,
714
const char __user *buf,
715
size_t len,
716
loff_t *ppos)
717
{
718
int rv;
719
720
if (len) {
721
if (!nowayout) {
722
size_t i;
723
724
/* In case it was set long ago */
725
expect_close = 0;
726
727
for (i = 0; i != len; i++) {
728
char c;
729
730
if (get_user(c, buf + i))
731
return -EFAULT;
732
if (c == 'V')
733
expect_close = 42;
734
}
735
}
736
rv = ipmi_heartbeat();
737
if (rv)
738
return rv;
739
}
740
return len;
741
}
742
743
static ssize_t ipmi_read(struct file *file,
744
char __user *buf,
745
size_t count,
746
loff_t *ppos)
747
{
748
int rv = 0;
749
wait_queue_entry_t wait;
750
751
if (count <= 0)
752
return 0;
753
754
/*
755
* Reading returns if the pretimeout has gone off, and it only does
756
* it once per pretimeout.
757
*/
758
mutex_lock(&ipmi_read_mutex);
759
if (!data_to_read) {
760
if (file->f_flags & O_NONBLOCK) {
761
rv = -EAGAIN;
762
goto out;
763
}
764
765
init_waitqueue_entry(&wait, current);
766
add_wait_queue(&read_q, &wait);
767
while (!data_to_read && !signal_pending(current)) {
768
set_current_state(TASK_INTERRUPTIBLE);
769
mutex_unlock(&ipmi_read_mutex);
770
schedule();
771
mutex_lock(&ipmi_read_mutex);
772
}
773
remove_wait_queue(&read_q, &wait);
774
775
if (signal_pending(current)) {
776
rv = -ERESTARTSYS;
777
goto out;
778
}
779
}
780
data_to_read = 0;
781
782
out:
783
mutex_unlock(&ipmi_read_mutex);
784
785
if (rv == 0) {
786
if (copy_to_user(buf, &data_to_read, 1))
787
rv = -EFAULT;
788
else
789
rv = 1;
790
}
791
792
return rv;
793
}
794
795
static int ipmi_open(struct inode *ino, struct file *filep)
796
{
797
switch (iminor(ino)) {
798
case WATCHDOG_MINOR:
799
if (test_and_set_bit(0, &ipmi_wdog_open))
800
return -EBUSY;
801
802
803
/*
804
* Don't start the timer now, let it start on the
805
* first heartbeat.
806
*/
807
ipmi_start_timer_on_heartbeat = 1;
808
return stream_open(ino, filep);
809
810
default:
811
return (-ENODEV);
812
}
813
}
814
815
static __poll_t ipmi_poll(struct file *file, poll_table *wait)
816
{
817
__poll_t mask = 0;
818
819
poll_wait(file, &read_q, wait);
820
821
mutex_lock(&ipmi_read_mutex);
822
if (data_to_read)
823
mask |= (EPOLLIN | EPOLLRDNORM);
824
mutex_unlock(&ipmi_read_mutex);
825
826
return mask;
827
}
828
829
static int ipmi_fasync(int fd, struct file *file, int on)
830
{
831
int result;
832
833
result = fasync_helper(fd, file, on, &fasync_q);
834
835
return (result);
836
}
837
838
static int ipmi_close(struct inode *ino, struct file *filep)
839
{
840
if (iminor(ino) == WATCHDOG_MINOR) {
841
if (expect_close == 42) {
842
mutex_lock(&ipmi_watchdog_mutex);
843
ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
844
_ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
845
mutex_unlock(&ipmi_watchdog_mutex);
846
} else {
847
pr_crit("Unexpected close, not stopping watchdog!\n");
848
ipmi_heartbeat();
849
}
850
clear_bit(0, &ipmi_wdog_open);
851
}
852
853
expect_close = 0;
854
855
return 0;
856
}
857
858
static const struct file_operations ipmi_wdog_fops = {
859
.owner = THIS_MODULE,
860
.read = ipmi_read,
861
.poll = ipmi_poll,
862
.write = ipmi_write,
863
.unlocked_ioctl = ipmi_unlocked_ioctl,
864
.compat_ioctl = compat_ptr_ioctl,
865
.open = ipmi_open,
866
.release = ipmi_close,
867
.fasync = ipmi_fasync,
868
};
869
870
static struct miscdevice ipmi_wdog_miscdev = {
871
.minor = WATCHDOG_MINOR,
872
.name = "watchdog",
873
.fops = &ipmi_wdog_fops
874
};
875
876
static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
877
void *handler_data)
878
{
879
if (msg->msg.cmd == IPMI_WDOG_RESET_TIMER &&
880
msg->msg.data[0] == IPMI_WDOG_TIMER_NOT_INIT_RESP)
881
pr_info("response: The IPMI controller appears to have been reset, will attempt to reinitialize the watchdog timer\n");
882
else if (msg->msg.data[0] != 0)
883
pr_err("response: Error %x on cmd %x\n",
884
msg->msg.data[0],
885
msg->msg.cmd);
886
887
ipmi_free_recv_msg(msg);
888
}
889
890
static void ipmi_wdog_pretimeout_handler(void *handler_data)
891
{
892
if (preaction_val != WDOG_PRETIMEOUT_NONE) {
893
if (preop_val == WDOG_PREOP_PANIC) {
894
if (atomic_inc_and_test(&preop_panic_excl))
895
panic("Watchdog pre-timeout");
896
} else if (preop_val == WDOG_PREOP_GIVE_DATA) {
897
mutex_lock(&ipmi_read_mutex);
898
data_to_read = 1;
899
wake_up_interruptible(&read_q);
900
kill_fasync(&fasync_q, SIGIO, POLL_IN);
901
mutex_unlock(&ipmi_read_mutex);
902
}
903
}
904
905
/*
906
* On some machines, the heartbeat will give an error and not
907
* work unless we re-enable the timer. So do so.
908
*/
909
atomic_set(&pretimeout_since_last_heartbeat, 1);
910
}
911
912
static void ipmi_wdog_panic_handler(void *user_data)
913
{
914
static int panic_event_handled;
915
916
/*
917
* On a panic, if we have a panic timeout, make sure to extend
918
* the watchdog timer to a reasonable value to complete the
919
* panic, if the watchdog timer is running. Plus the
920
* pretimeout is meaningless at panic time.
921
*/
922
if (watchdog_user && !panic_event_handled &&
923
ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
924
/* Make sure we do this only once. */
925
panic_event_handled = 1;
926
927
timeout = panic_wdt_timeout;
928
pretimeout = 0;
929
panic_halt_ipmi_set_timeout();
930
}
931
}
932
933
static const struct ipmi_user_hndl ipmi_hndlrs = {
934
.ipmi_recv_hndl = ipmi_wdog_msg_handler,
935
.ipmi_watchdog_pretimeout = ipmi_wdog_pretimeout_handler,
936
.ipmi_panic_handler = ipmi_wdog_panic_handler
937
};
938
939
static void ipmi_register_watchdog(int ipmi_intf)
940
{
941
int rv = -EBUSY;
942
943
if (watchdog_user)
944
goto out;
945
946
if ((ifnum_to_use >= 0) && (ifnum_to_use != ipmi_intf))
947
goto out;
948
949
watchdog_ifnum = ipmi_intf;
950
951
rv = ipmi_create_user(ipmi_intf, &ipmi_hndlrs, NULL, &watchdog_user);
952
if (rv < 0) {
953
pr_crit("Unable to register with ipmi\n");
954
goto out;
955
}
956
957
rv = ipmi_get_version(watchdog_user,
958
&ipmi_version_major,
959
&ipmi_version_minor);
960
if (rv) {
961
pr_warn("Unable to get IPMI version, assuming 1.0\n");
962
ipmi_version_major = 1;
963
ipmi_version_minor = 0;
964
}
965
966
rv = misc_register(&ipmi_wdog_miscdev);
967
if (rv < 0) {
968
ipmi_destroy_user(watchdog_user);
969
watchdog_user = NULL;
970
pr_crit("Unable to register misc device\n");
971
}
972
973
#ifdef HAVE_DIE_NMI
974
if (nmi_handler_registered) {
975
int old_pretimeout = pretimeout;
976
int old_timeout = timeout;
977
int old_preop_val = preop_val;
978
979
/*
980
* Set the pretimeout to go off in a second and give
981
* ourselves plenty of time to stop the timer.
982
*/
983
ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
984
preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */
985
pretimeout = 99;
986
timeout = 100;
987
988
testing_nmi = 1;
989
990
rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
991
if (rv) {
992
pr_warn("Error starting timer to test NMI: 0x%x. The NMI pretimeout will likely not work\n",
993
rv);
994
rv = 0;
995
goto out_restore;
996
}
997
998
msleep(1500);
999
1000
if (testing_nmi != 2) {
1001
pr_warn("IPMI NMI didn't seem to occur. The NMI pretimeout will likely not work\n");
1002
}
1003
out_restore:
1004
testing_nmi = 0;
1005
preop_val = old_preop_val;
1006
pretimeout = old_pretimeout;
1007
timeout = old_timeout;
1008
}
1009
#endif
1010
1011
out:
1012
if ((start_now) && (rv == 0)) {
1013
/* Run from startup, so start the timer now. */
1014
start_now = 0; /* Disable this function after first startup. */
1015
ipmi_watchdog_state = action_val;
1016
ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
1017
pr_info("Starting now!\n");
1018
} else {
1019
/* Stop the timer now. */
1020
ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
1021
ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
1022
}
1023
}
1024
1025
static void ipmi_unregister_watchdog(int ipmi_intf)
1026
{
1027
struct ipmi_user *loc_user = watchdog_user;
1028
1029
if (!loc_user)
1030
return;
1031
1032
if (watchdog_ifnum != ipmi_intf)
1033
return;
1034
1035
/* Make sure no one can call us any more. */
1036
misc_deregister(&ipmi_wdog_miscdev);
1037
1038
watchdog_user = NULL;
1039
1040
/*
1041
* Wait to make sure the message makes it out. The lower layer has
1042
* pointers to our buffers, we want to make sure they are done before
1043
* we release our memory.
1044
*/
1045
while (atomic_read(&msg_tofree))
1046
msg_free_smi(NULL);
1047
1048
mutex_lock(&ipmi_watchdog_mutex);
1049
1050
/* Disconnect from IPMI. */
1051
ipmi_destroy_user(loc_user);
1052
1053
/* If it comes back, restart it properly. */
1054
ipmi_start_timer_on_heartbeat = 1;
1055
1056
mutex_unlock(&ipmi_watchdog_mutex);
1057
}
1058
1059
#ifdef HAVE_DIE_NMI
1060
static int
1061
ipmi_nmi(unsigned int val, struct pt_regs *regs)
1062
{
1063
/*
1064
* If we get here, it's an NMI that's not a memory or I/O
1065
* error. We can't truly tell if it's from IPMI or not
1066
* without sending a message, and sending a message is almost
1067
* impossible because of locking.
1068
*/
1069
1070
if (testing_nmi) {
1071
testing_nmi = 2;
1072
return NMI_HANDLED;
1073
}
1074
1075
/* If we are not expecting a timeout, ignore it. */
1076
if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
1077
return NMI_DONE;
1078
1079
if (preaction_val != WDOG_PRETIMEOUT_NMI)
1080
return NMI_DONE;
1081
1082
/*
1083
* If no one else handled the NMI, we assume it was the IPMI
1084
* watchdog.
1085
*/
1086
if (preop_val == WDOG_PREOP_PANIC) {
1087
/* On some machines, the heartbeat will give
1088
an error and not work unless we re-enable
1089
the timer. So do so. */
1090
atomic_set(&pretimeout_since_last_heartbeat, 1);
1091
if (atomic_inc_and_test(&preop_panic_excl))
1092
nmi_panic(regs, "pre-timeout");
1093
}
1094
1095
return NMI_HANDLED;
1096
}
1097
#endif
1098
1099
static int wdog_reboot_handler(struct notifier_block *this,
1100
unsigned long code,
1101
void *unused)
1102
{
1103
static int reboot_event_handled;
1104
1105
if ((watchdog_user) && (!reboot_event_handled)) {
1106
/* Make sure we only do this once. */
1107
reboot_event_handled = 1;
1108
1109
if (code == SYS_POWER_OFF || code == SYS_HALT) {
1110
/* Disable the WDT if we are shutting down. */
1111
ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
1112
ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
1113
} else if (ipmi_watchdog_state != WDOG_TIMEOUT_NONE) {
1114
/* Set a long timer to let the reboot happen or
1115
reset if it hangs, but only if the watchdog
1116
timer was already running. */
1117
if (timeout < 120)
1118
timeout = 120;
1119
pretimeout = 0;
1120
ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
1121
ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
1122
}
1123
}
1124
return NOTIFY_OK;
1125
}
1126
1127
static struct notifier_block wdog_reboot_notifier = {
1128
.notifier_call = wdog_reboot_handler,
1129
.next = NULL,
1130
.priority = 0
1131
};
1132
1133
static void ipmi_new_smi(int if_num, struct device *device)
1134
{
1135
ipmi_register_watchdog(if_num);
1136
}
1137
1138
static void ipmi_smi_gone(int if_num)
1139
{
1140
ipmi_unregister_watchdog(if_num);
1141
}
1142
1143
static struct ipmi_smi_watcher smi_watcher = {
1144
.owner = THIS_MODULE,
1145
.new_smi = ipmi_new_smi,
1146
.smi_gone = ipmi_smi_gone
1147
};
1148
1149
static int action_op_set_val(const char *inval)
1150
{
1151
if (strcmp(inval, "reset") == 0)
1152
action_val = WDOG_TIMEOUT_RESET;
1153
else if (strcmp(inval, "none") == 0)
1154
action_val = WDOG_TIMEOUT_NONE;
1155
else if (strcmp(inval, "power_cycle") == 0)
1156
action_val = WDOG_TIMEOUT_POWER_CYCLE;
1157
else if (strcmp(inval, "power_off") == 0)
1158
action_val = WDOG_TIMEOUT_POWER_DOWN;
1159
else
1160
return -EINVAL;
1161
return 0;
1162
}
1163
1164
static int action_op(const char *inval, char *outval)
1165
{
1166
int rv;
1167
1168
if (outval)
1169
strcpy(outval, action);
1170
1171
if (!inval)
1172
return 0;
1173
rv = action_op_set_val(inval);
1174
if (!rv)
1175
strcpy(action, inval);
1176
return rv;
1177
}
1178
1179
static int preaction_op_set_val(const char *inval)
1180
{
1181
if (strcmp(inval, "pre_none") == 0)
1182
preaction_val = WDOG_PRETIMEOUT_NONE;
1183
else if (strcmp(inval, "pre_smi") == 0)
1184
preaction_val = WDOG_PRETIMEOUT_SMI;
1185
#ifdef HAVE_DIE_NMI
1186
else if (strcmp(inval, "pre_nmi") == 0)
1187
preaction_val = WDOG_PRETIMEOUT_NMI;
1188
#endif
1189
else if (strcmp(inval, "pre_int") == 0)
1190
preaction_val = WDOG_PRETIMEOUT_MSG_INT;
1191
else
1192
return -EINVAL;
1193
return 0;
1194
}
1195
1196
static int preaction_op(const char *inval, char *outval)
1197
{
1198
int rv;
1199
1200
if (outval)
1201
strcpy(outval, preaction);
1202
1203
if (!inval)
1204
return 0;
1205
rv = preaction_op_set_val(inval);
1206
if (!rv)
1207
strcpy(preaction, inval);
1208
return 0;
1209
}
1210
1211
static int preop_op_set_val(const char *inval)
1212
{
1213
if (strcmp(inval, "preop_none") == 0)
1214
preop_val = WDOG_PREOP_NONE;
1215
else if (strcmp(inval, "preop_panic") == 0)
1216
preop_val = WDOG_PREOP_PANIC;
1217
else if (strcmp(inval, "preop_give_data") == 0)
1218
preop_val = WDOG_PREOP_GIVE_DATA;
1219
else
1220
return -EINVAL;
1221
return 0;
1222
}
1223
1224
static int preop_op(const char *inval, char *outval)
1225
{
1226
int rv;
1227
1228
if (outval)
1229
strcpy(outval, preop);
1230
1231
if (!inval)
1232
return 0;
1233
1234
rv = preop_op_set_val(inval);
1235
if (!rv)
1236
strcpy(preop, inval);
1237
return 0;
1238
}
1239
1240
static void check_parms(void)
1241
{
1242
#ifdef HAVE_DIE_NMI
1243
int do_nmi = 0;
1244
int rv;
1245
1246
if (preaction_val == WDOG_PRETIMEOUT_NMI) {
1247
do_nmi = 1;
1248
if (preop_val == WDOG_PREOP_GIVE_DATA) {
1249
pr_warn("Pretimeout op is to give data but NMI pretimeout is enabled, setting pretimeout op to none\n");
1250
preop_op("preop_none", NULL);
1251
do_nmi = 0;
1252
}
1253
}
1254
if (do_nmi && !nmi_handler_registered) {
1255
rv = register_nmi_handler(NMI_UNKNOWN, ipmi_nmi, 0,
1256
"ipmi");
1257
if (rv) {
1258
pr_warn("Can't register nmi handler\n");
1259
return;
1260
} else
1261
nmi_handler_registered = 1;
1262
} else if (!do_nmi && nmi_handler_registered) {
1263
unregister_nmi_handler(NMI_UNKNOWN, "ipmi");
1264
nmi_handler_registered = 0;
1265
}
1266
#endif
1267
}
1268
1269
static int __init ipmi_wdog_init(void)
1270
{
1271
int rv;
1272
1273
if (action_op_set_val(action)) {
1274
action_op("reset", NULL);
1275
pr_info("Unknown action '%s', defaulting to reset\n", action);
1276
}
1277
1278
if (preaction_op_set_val(preaction)) {
1279
preaction_op("pre_none", NULL);
1280
pr_info("Unknown preaction '%s', defaulting to none\n",
1281
preaction);
1282
}
1283
1284
if (preop_op_set_val(preop)) {
1285
preop_op("preop_none", NULL);
1286
pr_info("Unknown preop '%s', defaulting to none\n", preop);
1287
}
1288
1289
check_parms();
1290
1291
register_reboot_notifier(&wdog_reboot_notifier);
1292
1293
rv = ipmi_smi_watcher_register(&smi_watcher);
1294
if (rv) {
1295
#ifdef HAVE_DIE_NMI
1296
if (nmi_handler_registered)
1297
unregister_nmi_handler(NMI_UNKNOWN, "ipmi");
1298
#endif
1299
unregister_reboot_notifier(&wdog_reboot_notifier);
1300
pr_warn("can't register smi watcher\n");
1301
return rv;
1302
}
1303
1304
pr_info("driver initialized\n");
1305
1306
return 0;
1307
}
1308
1309
static void __exit ipmi_wdog_exit(void)
1310
{
1311
ipmi_smi_watcher_unregister(&smi_watcher);
1312
ipmi_unregister_watchdog(watchdog_ifnum);
1313
1314
#ifdef HAVE_DIE_NMI
1315
if (nmi_handler_registered)
1316
unregister_nmi_handler(NMI_UNKNOWN, "ipmi");
1317
#endif
1318
1319
unregister_reboot_notifier(&wdog_reboot_notifier);
1320
}
1321
module_exit(ipmi_wdog_exit);
1322
module_init(ipmi_wdog_init);
1323
MODULE_LICENSE("GPL");
1324
MODULE_AUTHOR("Corey Minyard <[email protected]>");
1325
MODULE_DESCRIPTION("watchdog timer based upon the IPMI interface.");
1326
1327