Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/kern/kern_fail.c
39475 views
1
/*-
2
* SPDX-License-Identifier: BSD-2-Clause
3
*
4
* Copyright (c) 2009 Isilon Inc http://www.isilon.com/
5
*
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
8
* are met:
9
* 1. Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* 2. Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
*
15
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25
* SUCH DAMAGE.
26
*/
27
/**
28
* @file
29
*
30
* fail(9) Facility.
31
*
32
* @ingroup failpoint_private
33
*/
34
/**
35
* @defgroup failpoint fail(9) Facility
36
*
37
* Failpoints allow for injecting fake errors into running code on the fly,
38
* without modifying code or recompiling with flags. Failpoints are always
39
* present, and are very efficient when disabled. Failpoints are described
40
* in man fail(9).
41
*/
42
/**
43
* @defgroup failpoint_private Private fail(9) Implementation functions
44
*
45
* Private implementations for the actual failpoint code.
46
*
47
* @ingroup failpoint
48
*/
49
/**
50
* @addtogroup failpoint_private
51
* @{
52
*/
53
54
#include <sys/cdefs.h>
55
#include "opt_stack.h"
56
57
#include <sys/ctype.h>
58
#include <sys/errno.h>
59
#include <sys/fail.h>
60
#include <sys/kernel.h>
61
#include <sys/libkern.h>
62
#include <sys/limits.h>
63
#include <sys/lock.h>
64
#include <sys/malloc.h>
65
#include <sys/mutex.h>
66
#include <sys/proc.h>
67
#include <sys/sbuf.h>
68
#include <sys/sleepqueue.h>
69
#include <sys/stdarg.h>
70
#include <sys/sx.h>
71
#include <sys/sysctl.h>
72
#include <sys/types.h>
73
74
#include <machine/atomic.h>
75
76
#ifdef ILOG_DEFINE_FOR_FILE
77
ILOG_DEFINE_FOR_FILE(L_ISI_FAIL_POINT, L_ILOG, fail_point);
78
#endif
79
80
static MALLOC_DEFINE(M_FAIL_POINT, "Fail Points", "fail points system");
81
#define fp_free(ptr) free(ptr, M_FAIL_POINT)
82
#define fp_malloc(size, flags) malloc((size), M_FAIL_POINT, (flags))
83
#define fs_free(ptr) fp_free(ptr)
84
#define fs_malloc() fp_malloc(sizeof(struct fail_point_setting), \
85
M_WAITOK | M_ZERO)
86
87
/**
88
* These define the wchans that are used for sleeping, pausing respectively.
89
* They are chosen arbitrarily but need to be distinct to the failpoint and
90
* the sleep/pause distinction.
91
*/
92
#define FP_SLEEP_CHANNEL(fp) (void*)(fp)
93
#define FP_PAUSE_CHANNEL(fp) __DEVOLATILE(void*, &fp->fp_setting)
94
95
/**
96
* Don't allow more than this many entries in a fail point set by sysctl.
97
* The 99.99...% case is to have 1 entry. I can't imagine having this many
98
* entries, so it should not limit us. Saves on re-mallocs while holding
99
* a non-sleepable lock.
100
*/
101
#define FP_MAX_ENTRY_COUNT 20
102
103
/* Used to drain sbufs to the sysctl output */
104
int fail_sysctl_drain_func(void *, const char *, int);
105
106
/* Head of tailq of struct fail_point_entry */
107
TAILQ_HEAD(fail_point_entry_queue, fail_point_entry);
108
109
/**
110
* fp entries garbage list; outstanding entries are cleaned up in the
111
* garbage collector
112
*/
113
STAILQ_HEAD(fail_point_setting_garbage, fail_point_setting);
114
static struct fail_point_setting_garbage fp_setting_garbage =
115
STAILQ_HEAD_INITIALIZER(fp_setting_garbage);
116
static struct mtx mtx_garbage_list;
117
MTX_SYSINIT(mtx_garbage_list, &mtx_garbage_list, "fail point garbage mtx",
118
MTX_SPIN);
119
120
static struct sx sx_fp_set;
121
SX_SYSINIT(sx_fp_set, &sx_fp_set, "fail point set sx");
122
123
/**
124
* Failpoint types.
125
* Don't change these without changing fail_type_strings in fail.c.
126
* @ingroup failpoint_private
127
*/
128
enum fail_point_t {
129
FAIL_POINT_OFF, /**< don't fail */
130
FAIL_POINT_PANIC, /**< panic */
131
FAIL_POINT_RETURN, /**< return an errorcode */
132
FAIL_POINT_BREAK, /**< break into the debugger */
133
FAIL_POINT_PRINT, /**< print a message */
134
FAIL_POINT_SLEEP, /**< sleep for some msecs */
135
FAIL_POINT_PAUSE, /**< sleep until failpoint is set to off */
136
FAIL_POINT_YIELD, /**< yield the cpu */
137
FAIL_POINT_DELAY, /**< busy wait the cpu */
138
FAIL_POINT_NUMTYPES,
139
FAIL_POINT_INVALID = -1
140
};
141
142
static struct {
143
const char *name;
144
int nmlen;
145
} fail_type_strings[] = {
146
#define FP_TYPE_NM_LEN(s) { s, sizeof(s) - 1 }
147
[FAIL_POINT_OFF] = FP_TYPE_NM_LEN("off"),
148
[FAIL_POINT_PANIC] = FP_TYPE_NM_LEN("panic"),
149
[FAIL_POINT_RETURN] = FP_TYPE_NM_LEN("return"),
150
[FAIL_POINT_BREAK] = FP_TYPE_NM_LEN("break"),
151
[FAIL_POINT_PRINT] = FP_TYPE_NM_LEN("print"),
152
[FAIL_POINT_SLEEP] = FP_TYPE_NM_LEN("sleep"),
153
[FAIL_POINT_PAUSE] = FP_TYPE_NM_LEN("pause"),
154
[FAIL_POINT_YIELD] = FP_TYPE_NM_LEN("yield"),
155
[FAIL_POINT_DELAY] = FP_TYPE_NM_LEN("delay"),
156
};
157
158
#define FE_COUNT_UNTRACKED (INT_MIN)
159
160
/**
161
* Internal structure tracking a single term of a complete failpoint.
162
* @ingroup failpoint_private
163
*/
164
struct fail_point_entry {
165
volatile bool fe_stale;
166
enum fail_point_t fe_type; /**< type of entry */
167
int fe_arg; /**< argument to type (e.g. return value) */
168
int fe_prob; /**< likelihood of firing in millionths */
169
int32_t fe_count; /**< number of times to fire, -1 means infinite */
170
pid_t fe_pid; /**< only fail for this process */
171
struct fail_point *fe_parent; /**< backpointer to fp */
172
TAILQ_ENTRY(fail_point_entry) fe_entries; /**< next entry ptr */
173
};
174
175
struct fail_point_setting {
176
STAILQ_ENTRY(fail_point_setting) fs_garbage_link;
177
struct fail_point_entry_queue fp_entry_queue;
178
struct fail_point * fs_parent;
179
struct mtx feq_mtx; /* Gives fail_point_pause something to do. */
180
};
181
182
/**
183
* Defines stating the equivalent of probablilty one (100%)
184
*/
185
enum {
186
PROB_MAX = 1000000, /* probability between zero and this number */
187
PROB_DIGITS = 6 /* number of zero's in above number */
188
};
189
190
/* Get a ref on an fp's fp_setting */
191
static inline struct fail_point_setting *fail_point_setting_get_ref(
192
struct fail_point *fp);
193
/* Release a ref on an fp_setting */
194
static inline void fail_point_setting_release_ref(struct fail_point *fp);
195
/* Allocate and initialize a struct fail_point_setting */
196
static struct fail_point_setting *fail_point_setting_new(struct
197
fail_point *);
198
/* Free a struct fail_point_setting */
199
static void fail_point_setting_destroy(struct fail_point_setting *fp_setting);
200
/* Allocate and initialize a struct fail_point_entry */
201
static struct fail_point_entry *fail_point_entry_new(struct
202
fail_point_setting *);
203
/* Free a struct fail_point_entry */
204
static void fail_point_entry_destroy(struct fail_point_entry *fp_entry);
205
/* Append fp setting to garbage list */
206
static inline void fail_point_setting_garbage_append(
207
struct fail_point_setting *fp_setting);
208
/* Swap fp's setting with fp_setting_new */
209
static inline struct fail_point_setting *
210
fail_point_swap_settings(struct fail_point *fp,
211
struct fail_point_setting *fp_setting_new);
212
/* Free up any zero-ref setting in the garbage queue */
213
static void fail_point_garbage_collect(void);
214
/* If this fail point's setting are empty, then swap it out to NULL. */
215
static inline void fail_point_eval_swap_out(struct fail_point *fp,
216
struct fail_point_setting *fp_setting);
217
218
bool
219
fail_point_is_off(struct fail_point *fp)
220
{
221
bool return_val;
222
struct fail_point_setting *fp_setting;
223
struct fail_point_entry *ent;
224
225
return_val = true;
226
227
fp_setting = fail_point_setting_get_ref(fp);
228
if (fp_setting != NULL) {
229
TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue,
230
fe_entries) {
231
if (!ent->fe_stale) {
232
return_val = false;
233
break;
234
}
235
}
236
}
237
fail_point_setting_release_ref(fp);
238
239
return (return_val);
240
}
241
242
/* Allocate and initialize a struct fail_point_setting */
243
static struct fail_point_setting *
244
fail_point_setting_new(struct fail_point *fp)
245
{
246
struct fail_point_setting *fs_new;
247
248
fs_new = fs_malloc();
249
fs_new->fs_parent = fp;
250
TAILQ_INIT(&fs_new->fp_entry_queue);
251
mtx_init(&fs_new->feq_mtx, "fail point entries", NULL, MTX_SPIN);
252
253
fail_point_setting_garbage_append(fs_new);
254
255
return (fs_new);
256
}
257
258
/* Free a struct fail_point_setting */
259
static void
260
fail_point_setting_destroy(struct fail_point_setting *fp_setting)
261
{
262
struct fail_point_entry *ent;
263
264
while (!TAILQ_EMPTY(&fp_setting->fp_entry_queue)) {
265
ent = TAILQ_FIRST(&fp_setting->fp_entry_queue);
266
TAILQ_REMOVE(&fp_setting->fp_entry_queue, ent, fe_entries);
267
fail_point_entry_destroy(ent);
268
}
269
270
fs_free(fp_setting);
271
}
272
273
/* Allocate and initialize a struct fail_point_entry */
274
static struct fail_point_entry *
275
fail_point_entry_new(struct fail_point_setting *fp_setting)
276
{
277
struct fail_point_entry *fp_entry;
278
279
fp_entry = fp_malloc(sizeof(struct fail_point_entry),
280
M_WAITOK | M_ZERO);
281
fp_entry->fe_parent = fp_setting->fs_parent;
282
fp_entry->fe_prob = PROB_MAX;
283
fp_entry->fe_pid = NO_PID;
284
fp_entry->fe_count = FE_COUNT_UNTRACKED;
285
TAILQ_INSERT_TAIL(&fp_setting->fp_entry_queue, fp_entry,
286
fe_entries);
287
288
return (fp_entry);
289
}
290
291
/* Free a struct fail_point_entry */
292
static void
293
fail_point_entry_destroy(struct fail_point_entry *fp_entry)
294
{
295
296
fp_free(fp_entry);
297
}
298
299
/* Get a ref on an fp's fp_setting */
300
static inline struct fail_point_setting *
301
fail_point_setting_get_ref(struct fail_point *fp)
302
{
303
struct fail_point_setting *fp_setting;
304
305
/* Invariant: if we have a ref, our pointer to fp_setting is safe */
306
atomic_add_acq_32(&fp->fp_ref_cnt, 1);
307
fp_setting = fp->fp_setting;
308
309
return (fp_setting);
310
}
311
312
/* Release a ref on an fp_setting */
313
static inline void
314
fail_point_setting_release_ref(struct fail_point *fp)
315
{
316
317
KASSERT(&fp->fp_ref_cnt > 0, ("Attempting to deref w/no refs"));
318
atomic_subtract_rel_32(&fp->fp_ref_cnt, 1);
319
}
320
321
/* Append fp entries to fp garbage list */
322
static inline void
323
fail_point_setting_garbage_append(struct fail_point_setting *fp_setting)
324
{
325
326
mtx_lock_spin(&mtx_garbage_list);
327
STAILQ_INSERT_TAIL(&fp_setting_garbage, fp_setting,
328
fs_garbage_link);
329
mtx_unlock_spin(&mtx_garbage_list);
330
}
331
332
/* Swap fp's entries with fp_setting_new */
333
static struct fail_point_setting *
334
fail_point_swap_settings(struct fail_point *fp,
335
struct fail_point_setting *fp_setting_new)
336
{
337
struct fail_point_setting *fp_setting_old;
338
339
fp_setting_old = fp->fp_setting;
340
fp->fp_setting = fp_setting_new;
341
342
return (fp_setting_old);
343
}
344
345
static inline void
346
fail_point_eval_swap_out(struct fail_point *fp,
347
struct fail_point_setting *fp_setting)
348
{
349
350
/* We may have already been swapped out and replaced; ignore. */
351
if (fp->fp_setting == fp_setting)
352
fail_point_swap_settings(fp, NULL);
353
}
354
355
/* Free up any zero-ref entries in the garbage queue */
356
static void
357
fail_point_garbage_collect(void)
358
{
359
struct fail_point_setting *fs_current, *fs_next;
360
struct fail_point_setting_garbage fp_ents_free_list;
361
362
/**
363
* We will transfer the entries to free to fp_ents_free_list while holding
364
* the spin mutex, then free it after we drop the lock. This avoids
365
* triggering witness due to sleepable mutexes in the memory
366
* allocator.
367
*/
368
STAILQ_INIT(&fp_ents_free_list);
369
370
mtx_lock_spin(&mtx_garbage_list);
371
STAILQ_FOREACH_SAFE(fs_current, &fp_setting_garbage, fs_garbage_link,
372
fs_next) {
373
if (fs_current->fs_parent->fp_setting != fs_current &&
374
fs_current->fs_parent->fp_ref_cnt == 0) {
375
STAILQ_REMOVE(&fp_setting_garbage, fs_current,
376
fail_point_setting, fs_garbage_link);
377
STAILQ_INSERT_HEAD(&fp_ents_free_list, fs_current,
378
fs_garbage_link);
379
}
380
}
381
mtx_unlock_spin(&mtx_garbage_list);
382
383
STAILQ_FOREACH_SAFE(fs_current, &fp_ents_free_list, fs_garbage_link,
384
fs_next)
385
fail_point_setting_destroy(fs_current);
386
}
387
388
/* Drain out all refs from this fail point */
389
static inline void
390
fail_point_drain(struct fail_point *fp, int expected_ref)
391
{
392
struct fail_point_setting *entries;
393
394
entries = fail_point_swap_settings(fp, NULL);
395
/**
396
* We have unpaused all threads; so we will wait no longer
397
* than the time taken for the longest remaining sleep, or
398
* the length of time of a long-running code block.
399
*/
400
while (fp->fp_ref_cnt > expected_ref) {
401
wakeup(FP_PAUSE_CHANNEL(fp));
402
tsleep(&fp, PWAIT, "fail_point_drain", hz / 100);
403
}
404
if (fp->fp_callout)
405
callout_drain(fp->fp_callout);
406
fail_point_swap_settings(fp, entries);
407
}
408
409
static inline void
410
fail_point_pause(struct fail_point *fp, enum fail_point_return_code *pret,
411
struct mtx *mtx_sleep)
412
{
413
414
if (fp->fp_pre_sleep_fn)
415
fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg);
416
417
msleep_spin(FP_PAUSE_CHANNEL(fp), mtx_sleep, "failpt", 0);
418
419
if (fp->fp_post_sleep_fn)
420
fp->fp_post_sleep_fn(fp->fp_post_sleep_arg);
421
}
422
423
static inline void
424
fail_point_sleep(struct fail_point *fp, int msecs,
425
enum fail_point_return_code *pret)
426
{
427
int timo;
428
429
/* Convert from millisecs to ticks, rounding up */
430
timo = howmany((int64_t)msecs * hz, 1000L);
431
432
if (timo > 0) {
433
if (!(fp->fp_flags & FAIL_POINT_USE_TIMEOUT_PATH)) {
434
if (fp->fp_pre_sleep_fn)
435
fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg);
436
437
tsleep(FP_SLEEP_CHANNEL(fp), PWAIT, "failpt", timo);
438
439
if (fp->fp_post_sleep_fn)
440
fp->fp_post_sleep_fn(fp->fp_post_sleep_arg);
441
} else {
442
if (fp->fp_pre_sleep_fn)
443
fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg);
444
445
callout_reset(fp->fp_callout, timo,
446
fp->fp_post_sleep_fn, fp->fp_post_sleep_arg);
447
*pret = FAIL_POINT_RC_QUEUED;
448
}
449
}
450
}
451
452
static char *parse_fail_point(struct fail_point_setting *, char *);
453
static char *parse_term(struct fail_point_setting *, char *);
454
static char *parse_number(int *out_units, int *out_decimal, char *);
455
static char *parse_type(struct fail_point_entry *, char *);
456
457
/**
458
* Initialize a fail_point. The name is formed in a printf-like fashion
459
* from "fmt" and subsequent arguments. This function is generally used
460
* for custom failpoints located at odd places in the sysctl tree, and is
461
* not explicitly needed for standard in-line-declared failpoints.
462
*
463
* @ingroup failpoint
464
*/
465
void
466
fail_point_init(struct fail_point *fp, const char *fmt, ...)
467
{
468
va_list ap;
469
char *name;
470
int n;
471
472
fp->fp_setting = NULL;
473
fp->fp_flags = 0;
474
475
/* Figure out the size of the name. */
476
va_start(ap, fmt);
477
n = vsnprintf(NULL, 0, fmt, ap);
478
va_end(ap);
479
480
/* Allocate the name and fill it in. */
481
name = fp_malloc(n + 1, M_WAITOK);
482
va_start(ap, fmt);
483
vsnprintf(name, n + 1, fmt, ap);
484
va_end(ap);
485
486
fp->fp_name = name;
487
fp->fp_location = "";
488
fp->fp_flags |= FAIL_POINT_DYNAMIC_NAME;
489
fp->fp_pre_sleep_fn = NULL;
490
fp->fp_pre_sleep_arg = NULL;
491
fp->fp_post_sleep_fn = NULL;
492
fp->fp_post_sleep_arg = NULL;
493
}
494
495
void
496
fail_point_alloc_callout(struct fail_point *fp)
497
{
498
499
/**
500
* This assumes that calls to fail_point_use_timeout_path()
501
* will not race.
502
*/
503
if (fp->fp_callout != NULL)
504
return;
505
fp->fp_callout = fp_malloc(sizeof(*fp->fp_callout), M_WAITOK);
506
callout_init(fp->fp_callout, CALLOUT_MPSAFE);
507
}
508
509
/**
510
* Free the resources held by a fail_point, and wake any paused threads.
511
* Thou shalt not allow threads to hit this fail point after you enter this
512
* function, nor shall you call this multiple times for a given fp.
513
* @ingroup failpoint
514
*/
515
void
516
fail_point_destroy(struct fail_point *fp)
517
{
518
519
fail_point_drain(fp, 0);
520
521
if ((fp->fp_flags & FAIL_POINT_DYNAMIC_NAME) != 0) {
522
fp_free(__DECONST(void *, fp->fp_name));
523
fp->fp_name = NULL;
524
}
525
fp->fp_flags = 0;
526
if (fp->fp_callout) {
527
fp_free(fp->fp_callout);
528
fp->fp_callout = NULL;
529
}
530
531
sx_xlock(&sx_fp_set);
532
fail_point_garbage_collect();
533
sx_xunlock(&sx_fp_set);
534
}
535
536
/**
537
* This does the real work of evaluating a fail point. If the fail point tells
538
* us to return a value, this function returns 1 and fills in 'return_value'
539
* (return_value is allowed to be null). If the fail point tells us to panic,
540
* we never return. Otherwise we just return 0 after doing some work, which
541
* means "keep going".
542
*/
543
enum fail_point_return_code
544
fail_point_eval_nontrivial(struct fail_point *fp, int *return_value)
545
{
546
bool execute = false;
547
struct fail_point_entry *ent;
548
struct fail_point_setting *fp_setting;
549
enum fail_point_return_code ret;
550
int cont;
551
int count;
552
int msecs;
553
int usecs;
554
555
ret = FAIL_POINT_RC_CONTINUE;
556
cont = 0; /* don't continue by default */
557
558
fp_setting = fail_point_setting_get_ref(fp);
559
if (fp_setting == NULL)
560
goto abort;
561
562
TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, fe_entries) {
563
if (ent->fe_stale)
564
continue;
565
566
if (ent->fe_prob < PROB_MAX &&
567
ent->fe_prob < random() % PROB_MAX)
568
continue;
569
570
if (ent->fe_pid != NO_PID && ent->fe_pid != curproc->p_pid)
571
continue;
572
573
if (ent->fe_count != FE_COUNT_UNTRACKED) {
574
count = ent->fe_count;
575
while (count > 0) {
576
if (atomic_cmpset_32(&ent->fe_count, count, count - 1)) {
577
count--;
578
execute = true;
579
break;
580
}
581
count = ent->fe_count;
582
}
583
if (execute == false)
584
/* We lost the race; consider the entry stale and bail now */
585
continue;
586
if (count == 0)
587
ent->fe_stale = true;
588
}
589
590
switch (ent->fe_type) {
591
case FAIL_POINT_PANIC:
592
panic("fail point %s panicking", fp->fp_name);
593
/* NOTREACHED */
594
595
case FAIL_POINT_RETURN:
596
if (return_value != NULL)
597
*return_value = ent->fe_arg;
598
ret = FAIL_POINT_RC_RETURN;
599
break;
600
601
case FAIL_POINT_BREAK:
602
printf("fail point %s breaking to debugger\n",
603
fp->fp_name);
604
breakpoint();
605
break;
606
607
case FAIL_POINT_PRINT:
608
printf("fail point %s executing\n", fp->fp_name);
609
cont = ent->fe_arg;
610
break;
611
612
case FAIL_POINT_SLEEP:
613
msecs = ent->fe_arg;
614
if (msecs)
615
fail_point_sleep(fp, msecs, &ret);
616
break;
617
618
case FAIL_POINT_PAUSE:
619
/**
620
* Pausing is inherently strange with multiple
621
* entries given our design. That is because some
622
* entries could be unreachable, for instance in cases like:
623
* pause->return. We can never reach the return entry.
624
* The sysctl layer actually truncates all entries after
625
* a pause for this reason.
626
*/
627
mtx_lock_spin(&fp_setting->feq_mtx);
628
fail_point_pause(fp, &ret, &fp_setting->feq_mtx);
629
mtx_unlock_spin(&fp_setting->feq_mtx);
630
break;
631
632
case FAIL_POINT_YIELD:
633
kern_yield(PRI_UNCHANGED);
634
break;
635
636
case FAIL_POINT_DELAY:
637
usecs = ent->fe_arg;
638
DELAY(usecs);
639
break;
640
641
default:
642
break;
643
}
644
645
if (cont == 0)
646
break;
647
}
648
649
if (fail_point_is_off(fp))
650
fail_point_eval_swap_out(fp, fp_setting);
651
652
abort:
653
fail_point_setting_release_ref(fp);
654
655
return (ret);
656
}
657
658
/**
659
* Translate internal fail_point structure into human-readable text.
660
*/
661
static void
662
fail_point_get(struct fail_point *fp, struct sbuf *sb,
663
bool verbose)
664
{
665
struct fail_point_entry *ent;
666
struct fail_point_setting *fp_setting;
667
struct fail_point_entry *fp_entry_cpy;
668
int cnt_sleeping;
669
int idx;
670
int printed_entry_count;
671
672
cnt_sleeping = 0;
673
idx = 0;
674
printed_entry_count = 0;
675
676
fp_entry_cpy = fp_malloc(sizeof(struct fail_point_entry) *
677
(FP_MAX_ENTRY_COUNT + 1), M_WAITOK);
678
679
fp_setting = fail_point_setting_get_ref(fp);
680
681
if (fp_setting != NULL) {
682
TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, fe_entries) {
683
if (ent->fe_stale)
684
continue;
685
686
KASSERT(printed_entry_count < FP_MAX_ENTRY_COUNT,
687
("FP entry list larger than allowed"));
688
689
fp_entry_cpy[printed_entry_count] = *ent;
690
++printed_entry_count;
691
}
692
}
693
fail_point_setting_release_ref(fp);
694
695
/* This is our equivalent of a NULL terminator */
696
fp_entry_cpy[printed_entry_count].fe_type = FAIL_POINT_INVALID;
697
698
while (idx < printed_entry_count) {
699
ent = &fp_entry_cpy[idx];
700
++idx;
701
if (ent->fe_prob < PROB_MAX) {
702
int decimal = ent->fe_prob % (PROB_MAX / 100);
703
int units = ent->fe_prob / (PROB_MAX / 100);
704
sbuf_printf(sb, "%d", units);
705
if (decimal) {
706
int digits = PROB_DIGITS - 2;
707
while (!(decimal % 10)) {
708
digits--;
709
decimal /= 10;
710
}
711
sbuf_printf(sb, ".%0*d", digits, decimal);
712
}
713
sbuf_printf(sb, "%%");
714
}
715
if (ent->fe_count >= 0)
716
sbuf_printf(sb, "%d*", ent->fe_count);
717
sbuf_printf(sb, "%s", fail_type_strings[ent->fe_type].name);
718
if (ent->fe_arg)
719
sbuf_printf(sb, "(%d)", ent->fe_arg);
720
if (ent->fe_pid != NO_PID)
721
sbuf_printf(sb, "[pid %d]", ent->fe_pid);
722
if (TAILQ_NEXT(ent, fe_entries))
723
sbuf_cat(sb, "->");
724
}
725
if (!printed_entry_count)
726
sbuf_cat(sb, "off");
727
728
fp_free(fp_entry_cpy);
729
if (verbose) {
730
#ifdef STACK
731
/* Print number of sleeping threads. queue=0 is the argument
732
* used by msleep when sending our threads to sleep. */
733
sbuf_cat(sb, "\nsleeping_thread_stacks = {\n");
734
sleepq_sbuf_print_stacks(sb, FP_SLEEP_CHANNEL(fp), 0,
735
&cnt_sleeping);
736
737
sbuf_cat(sb, "},\n");
738
#endif
739
sbuf_printf(sb, "sleeping_thread_count = %d,\n",
740
cnt_sleeping);
741
742
#ifdef STACK
743
sbuf_cat(sb, "paused_thread_stacks = {\n");
744
sleepq_sbuf_print_stacks(sb, FP_PAUSE_CHANNEL(fp), 0,
745
&cnt_sleeping);
746
747
sbuf_cat(sb, "},\n");
748
#endif
749
sbuf_printf(sb, "paused_thread_count = %d\n",
750
cnt_sleeping);
751
}
752
}
753
754
/**
755
* Set an internal fail_point structure from a human-readable failpoint string
756
* in a lock-safe manner.
757
*/
758
static int
759
fail_point_set(struct fail_point *fp, char *buf)
760
{
761
struct fail_point_entry *ent, *ent_next;
762
struct fail_point_setting *entries;
763
bool should_wake_paused;
764
bool should_truncate;
765
int error;
766
767
error = 0;
768
should_wake_paused = false;
769
should_truncate = false;
770
771
/* Parse new entries. */
772
/**
773
* ref protects our new malloc'd stuff from being garbage collected
774
* before we link it.
775
*/
776
fail_point_setting_get_ref(fp);
777
entries = fail_point_setting_new(fp);
778
if (parse_fail_point(entries, buf) == NULL) {
779
STAILQ_REMOVE(&fp_setting_garbage, entries,
780
fail_point_setting, fs_garbage_link);
781
fail_point_setting_destroy(entries);
782
error = EINVAL;
783
goto end;
784
}
785
786
/**
787
* Transfer the entries we are going to keep to a new list.
788
* Get rid of useless zero probability entries, and entries with hit
789
* count 0.
790
* If 'off' is present, and it has no hit count set, then all entries
791
* after it are discarded since they are unreachable.
792
*/
793
TAILQ_FOREACH_SAFE(ent, &entries->fp_entry_queue, fe_entries, ent_next) {
794
if (ent->fe_prob == 0 || ent->fe_count == 0) {
795
printf("Discarding entry which cannot execute %s\n",
796
fail_type_strings[ent->fe_type].name);
797
TAILQ_REMOVE(&entries->fp_entry_queue, ent,
798
fe_entries);
799
fp_free(ent);
800
continue;
801
} else if (should_truncate) {
802
printf("Discarding unreachable entry %s\n",
803
fail_type_strings[ent->fe_type].name);
804
TAILQ_REMOVE(&entries->fp_entry_queue, ent,
805
fe_entries);
806
fp_free(ent);
807
continue;
808
}
809
810
if (ent->fe_type == FAIL_POINT_OFF) {
811
should_wake_paused = true;
812
if (ent->fe_count == FE_COUNT_UNTRACKED) {
813
should_truncate = true;
814
TAILQ_REMOVE(&entries->fp_entry_queue, ent,
815
fe_entries);
816
fp_free(ent);
817
}
818
} else if (ent->fe_type == FAIL_POINT_PAUSE) {
819
should_truncate = true;
820
} else if (ent->fe_type == FAIL_POINT_SLEEP && (fp->fp_flags &
821
FAIL_POINT_NONSLEEPABLE)) {
822
/**
823
* If this fail point is annotated as being in a
824
* non-sleepable ctx, convert sleep to delay and
825
* convert the msec argument to usecs.
826
*/
827
printf("Sleep call request on fail point in "
828
"non-sleepable context; using delay instead "
829
"of sleep\n");
830
ent->fe_type = FAIL_POINT_DELAY;
831
ent->fe_arg *= 1000;
832
}
833
}
834
835
if (TAILQ_EMPTY(&entries->fp_entry_queue)) {
836
entries = fail_point_swap_settings(fp, NULL);
837
if (entries != NULL)
838
wakeup(FP_PAUSE_CHANNEL(fp));
839
} else {
840
if (should_wake_paused)
841
wakeup(FP_PAUSE_CHANNEL(fp));
842
fail_point_swap_settings(fp, entries);
843
}
844
845
end:
846
#ifdef IWARNING
847
if (error)
848
IWARNING("Failed to set %s %s to %s",
849
fp->fp_name, fp->fp_location, buf);
850
else
851
INOTICE("Set %s %s to %s",
852
fp->fp_name, fp->fp_location, buf);
853
#endif /* IWARNING */
854
855
fail_point_setting_release_ref(fp);
856
return (error);
857
}
858
859
#define MAX_FAIL_POINT_BUF 1023
860
861
/**
862
* Handle kernel failpoint set/get.
863
*/
864
int
865
fail_point_sysctl(SYSCTL_HANDLER_ARGS)
866
{
867
struct fail_point *fp;
868
char *buf;
869
struct sbuf sb, *sb_check;
870
int error;
871
872
buf = NULL;
873
error = 0;
874
fp = arg1;
875
876
sb_check = sbuf_new(&sb, NULL, 1024, SBUF_AUTOEXTEND);
877
if (sb_check != &sb)
878
return (ENOMEM);
879
880
sbuf_set_drain(&sb, (sbuf_drain_func *)fail_sysctl_drain_func, req);
881
882
/* Setting */
883
/**
884
* Lock protects any new entries from being garbage collected before we
885
* can link them to the fail point.
886
*/
887
sx_xlock(&sx_fp_set);
888
if (req->newptr) {
889
if (req->newlen > MAX_FAIL_POINT_BUF) {
890
error = EINVAL;
891
goto out;
892
}
893
894
buf = fp_malloc(req->newlen + 1, M_WAITOK);
895
896
error = SYSCTL_IN(req, buf, req->newlen);
897
if (error)
898
goto out;
899
buf[req->newlen] = '\0';
900
901
error = fail_point_set(fp, buf);
902
}
903
904
fail_point_garbage_collect();
905
sx_xunlock(&sx_fp_set);
906
907
/* Retrieving. */
908
fail_point_get(fp, &sb, false);
909
910
out:
911
sbuf_finish(&sb);
912
sbuf_delete(&sb);
913
914
if (buf)
915
fp_free(buf);
916
917
return (error);
918
}
919
920
int
921
fail_point_sysctl_status(SYSCTL_HANDLER_ARGS)
922
{
923
struct fail_point *fp;
924
struct sbuf sb, *sb_check;
925
926
fp = arg1;
927
928
sb_check = sbuf_new(&sb, NULL, 1024, SBUF_AUTOEXTEND);
929
if (sb_check != &sb)
930
return (ENOMEM);
931
932
sbuf_set_drain(&sb, (sbuf_drain_func *)fail_sysctl_drain_func, req);
933
934
/* Retrieving. */
935
fail_point_get(fp, &sb, true);
936
937
sbuf_finish(&sb);
938
sbuf_delete(&sb);
939
940
/**
941
* Lock protects any new entries from being garbage collected before we
942
* can link them to the fail point.
943
*/
944
sx_xlock(&sx_fp_set);
945
fail_point_garbage_collect();
946
sx_xunlock(&sx_fp_set);
947
948
return (0);
949
}
950
951
int
952
fail_sysctl_drain_func(void *sysctl_args, const char *buf, int len)
953
{
954
struct sysctl_req *sa;
955
int error;
956
957
sa = sysctl_args;
958
959
error = SYSCTL_OUT(sa, buf, len);
960
961
if (error == ENOMEM)
962
return (-1);
963
else
964
return (len);
965
}
966
967
/**
968
* Internal helper function to translate a human-readable failpoint string
969
* into a internally-parsable fail_point structure.
970
*/
971
static char *
972
parse_fail_point(struct fail_point_setting *ents, char *p)
973
{
974
/* <fail_point> ::
975
* <term> ( "->" <term> )*
976
*/
977
uint8_t term_count;
978
979
term_count = 1;
980
981
p = parse_term(ents, p);
982
if (p == NULL)
983
return (NULL);
984
985
while (*p != '\0') {
986
term_count++;
987
if (p[0] != '-' || p[1] != '>' ||
988
(p = parse_term(ents, p+2)) == NULL ||
989
term_count > FP_MAX_ENTRY_COUNT)
990
return (NULL);
991
}
992
return (p);
993
}
994
995
/**
996
* Internal helper function to parse an individual term from a failpoint.
997
*/
998
static char *
999
parse_term(struct fail_point_setting *ents, char *p)
1000
{
1001
struct fail_point_entry *ent;
1002
1003
ent = fail_point_entry_new(ents);
1004
1005
/*
1006
* <term> ::
1007
* ( (<float> "%") | (<integer> "*" ) )*
1008
* <type>
1009
* [ "(" <integer> ")" ]
1010
* [ "[pid " <integer> "]" ]
1011
*/
1012
1013
/* ( (<float> "%") | (<integer> "*" ) )* */
1014
while (isdigit(*p) || *p == '.') {
1015
int units, decimal;
1016
1017
p = parse_number(&units, &decimal, p);
1018
if (p == NULL)
1019
return (NULL);
1020
1021
if (*p == '%') {
1022
if (units > 100) /* prevent overflow early */
1023
units = 100;
1024
ent->fe_prob = units * (PROB_MAX / 100) + decimal;
1025
if (ent->fe_prob > PROB_MAX)
1026
ent->fe_prob = PROB_MAX;
1027
} else if (*p == '*') {
1028
if (!units || units < 0 || decimal)
1029
return (NULL);
1030
ent->fe_count = units;
1031
} else
1032
return (NULL);
1033
p++;
1034
}
1035
1036
/* <type> */
1037
p = parse_type(ent, p);
1038
if (p == NULL)
1039
return (NULL);
1040
if (*p == '\0')
1041
return (p);
1042
1043
/* [ "(" <integer> ")" ] */
1044
if (*p != '(')
1045
return (p);
1046
p++;
1047
if (!isdigit(*p) && *p != '-')
1048
return (NULL);
1049
ent->fe_arg = strtol(p, &p, 0);
1050
if (*p++ != ')')
1051
return (NULL);
1052
1053
/* [ "[pid " <integer> "]" ] */
1054
#define PID_STRING "[pid "
1055
if (strncmp(p, PID_STRING, sizeof(PID_STRING) - 1) != 0)
1056
return (p);
1057
p += sizeof(PID_STRING) - 1;
1058
if (!isdigit(*p))
1059
return (NULL);
1060
ent->fe_pid = strtol(p, &p, 0);
1061
if (*p++ != ']')
1062
return (NULL);
1063
1064
return (p);
1065
}
1066
1067
/**
1068
* Internal helper function to parse a numeric for a failpoint term.
1069
*/
1070
static char *
1071
parse_number(int *out_units, int *out_decimal, char *p)
1072
{
1073
char *old_p;
1074
1075
/**
1076
* <number> ::
1077
* <integer> [ "." <integer> ] |
1078
* "." <integer>
1079
*/
1080
1081
/* whole part */
1082
old_p = p;
1083
*out_units = strtol(p, &p, 10);
1084
if (p == old_p && *p != '.')
1085
return (NULL);
1086
1087
/* fractional part */
1088
*out_decimal = 0;
1089
if (*p == '.') {
1090
int digits = 0;
1091
p++;
1092
while (isdigit(*p)) {
1093
int digit = *p - '0';
1094
if (digits < PROB_DIGITS - 2)
1095
*out_decimal = *out_decimal * 10 + digit;
1096
else if (digits == PROB_DIGITS - 2 && digit >= 5)
1097
(*out_decimal)++;
1098
digits++;
1099
p++;
1100
}
1101
if (!digits) /* need at least one digit after '.' */
1102
return (NULL);
1103
while (digits++ < PROB_DIGITS - 2) /* add implicit zeros */
1104
*out_decimal *= 10;
1105
}
1106
1107
return (p); /* success */
1108
}
1109
1110
/**
1111
* Internal helper function to parse an individual type for a failpoint term.
1112
*/
1113
static char *
1114
parse_type(struct fail_point_entry *ent, char *beg)
1115
{
1116
enum fail_point_t type;
1117
int len;
1118
1119
for (type = FAIL_POINT_OFF; type < FAIL_POINT_NUMTYPES; type++) {
1120
len = fail_type_strings[type].nmlen;
1121
if (strncmp(fail_type_strings[type].name, beg, len) == 0) {
1122
ent->fe_type = type;
1123
return (beg + len);
1124
}
1125
}
1126
return (NULL);
1127
}
1128
1129
/* The fail point sysctl tree. */
1130
SYSCTL_NODE(_debug, OID_AUTO, fail_point, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1131
"fail points");
1132
1133
/* Debugging/testing stuff for fail point */
1134
static int
1135
sysctl_test_fail_point(SYSCTL_HANDLER_ARGS)
1136
{
1137
1138
KFAIL_POINT_RETURN(DEBUG_FP, test_fail_point);
1139
return (0);
1140
}
1141
SYSCTL_OID(_debug_fail_point, OID_AUTO, test_trigger_fail_point,
1142
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, NULL, 0,
1143
sysctl_test_fail_point, "A",
1144
"Trigger test fail points");
1145
1146