Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/cddl/usr.sbin/zfsd/case_file.cc
105952 views
1
/*-
2
* Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
7
* are met:
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions, and the following disclaimer,
10
* without modification.
11
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
12
* substantially similar to the "NO WARRANTY" disclaimer below
13
* ("Disclaimer") and any redistribution must be conditioned upon
14
* including a substantially similar Disclaimer requirement for further
15
* binary redistribution.
16
*
17
* NO WARRANTY
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
* POSSIBILITY OF SUCH DAMAGES.
29
*
30
* Authors: Justin T. Gibbs (Spectra Logic Corporation)
31
*/
32
33
/**
34
* \file case_file.cc
35
*
36
* We keep case files for any leaf vdev that is not in the optimal state.
37
* However, we only serialize to disk those events that need to be preserved
38
* across reboots. For now, this is just a log of soft errors which we
39
* accumulate in order to mark a device as degraded.
40
*/
41
#include <sys/cdefs.h>
42
#include <sys/byteorder.h>
43
#include <sys/time.h>
44
45
#include <sys/fs/zfs.h>
46
47
#include <dirent.h>
48
#include <fcntl.h>
49
#include <iomanip>
50
#include <fstream>
51
#include <functional>
52
#include <sstream>
53
#include <syslog.h>
54
#include <unistd.h>
55
56
#include <libzutil.h>
57
#include <libzfs.h>
58
59
#include <list>
60
#include <map>
61
#include <string>
62
63
#include <devdctl/guid.h>
64
#include <devdctl/event.h>
65
#include <devdctl/event_factory.h>
66
#include <devdctl/exception.h>
67
#include <devdctl/consumer.h>
68
69
#include "callout.h"
70
#include "vdev_iterator.h"
71
#include "zfsd_event.h"
72
#include "case_file.h"
73
#include "vdev.h"
74
#include "zfsd.h"
75
#include "zfsd_exception.h"
76
#include "zpool_list.h"
77
/*============================ Namespace Control =============================*/
78
using std::hex;
79
using std::ifstream;
80
using std::stringstream;
81
using std::setfill;
82
using std::setw;
83
84
using DevdCtl::Event;
85
using DevdCtl::EventFactory;
86
using DevdCtl::EventList;
87
using DevdCtl::Guid;
88
using DevdCtl::ParseException;
89
90
/*--------------------------------- CaseFile ---------------------------------*/
91
//- CaseFile Static Data -------------------------------------------------------
92
93
CaseFileList CaseFile::s_activeCases;
94
const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
95
96
//- CaseFile Static Public Methods ---------------------------------------------
97
CaseFile *
98
CaseFile::Find(Guid poolGUID, Guid vdevGUID)
99
{
100
for (CaseFileList::iterator curCase = s_activeCases.begin();
101
curCase != s_activeCases.end(); curCase++) {
102
103
if (((*curCase)->PoolGUID() != poolGUID
104
&& Guid::InvalidGuid() != poolGUID)
105
|| (*curCase)->VdevGUID() != vdevGUID)
106
continue;
107
108
/*
109
* We only carry one active case per-vdev.
110
*/
111
return (*curCase);
112
}
113
return (NULL);
114
}
115
116
void
117
CaseFile::Find(Guid poolGUID, Guid vdevGUID, CaseFileList &cases)
118
{
119
for (CaseFileList::iterator curCase = s_activeCases.begin();
120
curCase != s_activeCases.end(); curCase++) {
121
if (((*curCase)->PoolGUID() != poolGUID &&
122
Guid::InvalidGuid() != poolGUID) ||
123
(*curCase)->VdevGUID() != vdevGUID)
124
continue;
125
126
/*
127
* We can have multiple cases for spare vdevs
128
*/
129
cases.push_back(*curCase);
130
if (!(*curCase)->IsSpare()) {
131
return;
132
}
133
}
134
}
135
136
CaseFile *
137
CaseFile::Find(const string &physPath)
138
{
139
CaseFile *result = NULL;
140
141
for (CaseFileList::iterator curCase = s_activeCases.begin();
142
curCase != s_activeCases.end(); curCase++) {
143
144
if ((*curCase)->PhysicalPath() != physPath)
145
continue;
146
147
if (result != NULL) {
148
syslog(LOG_WARNING, "Multiple casefiles found for "
149
"physical path %s. "
150
"This is most likely a bug in zfsd",
151
physPath.c_str());
152
}
153
result = *curCase;
154
}
155
return (result);
156
}
157
158
159
void
160
CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
161
{
162
CaseFileList::iterator casefile;
163
for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
164
CaseFileList::iterator next = casefile;
165
next++;
166
if (poolGUID == (*casefile)->PoolGUID())
167
(*casefile)->ReEvaluate(event);
168
casefile = next;
169
}
170
}
171
172
CaseFile &
173
CaseFile::Create(Vdev &vdev)
174
{
175
CaseFile *activeCase;
176
177
activeCase = Find(vdev.PoolGUID(), vdev.GUID());
178
if (activeCase == NULL)
179
activeCase = new CaseFile(vdev);
180
181
return (*activeCase);
182
}
183
184
void
185
CaseFile::DeSerialize()
186
{
187
struct dirent **caseFiles;
188
189
int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
190
DeSerializeSelector, /*compar*/NULL));
191
192
if (numCaseFiles == -1)
193
return;
194
if (numCaseFiles == 0) {
195
free(caseFiles);
196
return;
197
}
198
199
for (int i = 0; i < numCaseFiles; i++) {
200
201
DeSerializeFile(caseFiles[i]->d_name);
202
free(caseFiles[i]);
203
}
204
free(caseFiles);
205
}
206
207
bool
208
CaseFile::Empty()
209
{
210
return (s_activeCases.empty());
211
}
212
213
void
214
CaseFile::LogAll()
215
{
216
for (CaseFileList::iterator curCase = s_activeCases.begin();
217
curCase != s_activeCases.end(); curCase++)
218
(*curCase)->Log();
219
}
220
221
void
222
CaseFile::PurgeAll()
223
{
224
/*
225
* Serialize casefiles before deleting them so that they can be reread
226
* and revalidated during BuildCaseFiles.
227
* CaseFiles remove themselves from this list on destruction.
228
*/
229
while (s_activeCases.size() != 0) {
230
CaseFile *casefile = s_activeCases.front();
231
casefile->Serialize();
232
delete casefile;
233
}
234
235
}
236
237
int
238
CaseFile::IsSpare()
239
{
240
return (m_is_spare);
241
}
242
243
//- CaseFile Public Methods ----------------------------------------------------
244
bool
245
CaseFile::RefreshVdevState()
246
{
247
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
248
zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
249
if (casePool == NULL)
250
return (false);
251
252
Vdev vd(casePool, CaseVdev(casePool));
253
if (vd.DoesNotExist())
254
return (false);
255
256
m_vdevState = vd.State();
257
m_vdevPhysPath = vd.PhysicalPath();
258
m_vdevName = vd.Name(casePool, false);
259
return (true);
260
}
261
262
bool
263
CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
264
{
265
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
266
zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
267
int flags = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
268
269
if (pool == NULL || !RefreshVdevState()) {
270
/*
271
* The pool or vdev for this case file is no longer
272
* part of the configuration. This can happen
273
* if we process a device arrival notification
274
* before seeing the ZFS configuration change
275
* event.
276
*/
277
syslog(LOG_INFO,
278
"CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. "
279
"Closing\n",
280
PoolGUIDString().c_str(),
281
VdevGUIDString().c_str());
282
Close();
283
284
/*
285
* Since this event was not used to close this
286
* case, do not report it as consumed.
287
*/
288
return (/*consumed*/false);
289
}
290
291
if (VdevState() > VDEV_STATE_FAULTED) {
292
/*
293
* For now, newly discovered devices only help for
294
* devices that are missing. In the future, we might
295
* use a newly inserted spare to replace a degraded
296
* or faulted device.
297
*/
298
syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
299
PoolGUIDString().c_str(), VdevGUIDString().c_str());
300
return (/*consumed*/false);
301
}
302
if (VdevState() == VDEV_STATE_OFFLINE) {
303
/*
304
* OFFLINE is an administrative decision. No need for zfsd to
305
* do anything.
306
*/
307
syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
308
PoolGUIDString().c_str(), VdevGUIDString().c_str());
309
return (/*consumed*/false);
310
}
311
312
if (vdev != NULL
313
&& ( vdev->PoolGUID() == m_poolGUID
314
|| vdev->PoolGUID() == Guid::InvalidGuid())
315
&& vdev->GUID() == m_vdevGUID) {
316
317
if (IsSpare())
318
flags |= ZFS_ONLINE_SPARE;
319
if (zpool_vdev_online(pool, vdev->GUIDString().c_str(),
320
flags, &m_vdevState) != 0) {
321
syslog(LOG_ERR,
322
"Failed to online vdev(%s/%s:%s): %s: %s\n",
323
zpool_get_name(pool), vdev->GUIDString().c_str(),
324
devPath.c_str(), libzfs_error_action(g_zfsHandle),
325
libzfs_error_description(g_zfsHandle));
326
return (/*consumed*/false);
327
}
328
329
syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n",
330
zpool_get_name(pool), vdev->GUIDString().c_str(),
331
devPath.c_str(),
332
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
333
334
/*
335
* Check the vdev state post the online action to see
336
* if we can retire this case.
337
*/
338
CloseIfSolved();
339
340
return (/*consumed*/true);
341
}
342
343
/*
344
* If the auto-replace policy is enabled, and we have physical
345
* path information, try a physical path replacement.
346
*/
347
if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
348
syslog(LOG_INFO,
349
"CaseFile(%s:%s:%s): AutoReplace not set. "
350
"Ignoring device insertion.\n",
351
PoolGUIDString().c_str(),
352
VdevGUIDString().c_str(),
353
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
354
return (/*consumed*/false);
355
}
356
357
if (PhysicalPath().empty()) {
358
syslog(LOG_INFO,
359
"CaseFile(%s:%s:%s): No physical path information. "
360
"Ignoring device insertion.\n",
361
PoolGUIDString().c_str(),
362
VdevGUIDString().c_str(),
363
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
364
return (/*consumed*/false);
365
}
366
367
if (physPath != PhysicalPath()) {
368
syslog(LOG_INFO,
369
"CaseFile(%s:%s:%s): Physical path mismatch. "
370
"Ignoring device insertion.\n",
371
PoolGUIDString().c_str(),
372
VdevGUIDString().c_str(),
373
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
374
return (/*consumed*/false);
375
}
376
377
/* Write a label on the newly inserted disk. */
378
if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
379
syslog(LOG_ERR,
380
"Replace vdev(%s/%s) by physical path (label): %s: %s\n",
381
zpool_get_name(pool), VdevGUIDString().c_str(),
382
libzfs_error_action(g_zfsHandle),
383
libzfs_error_description(g_zfsHandle));
384
return (/*consumed*/false);
385
}
386
387
syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
388
PoolGUIDString().c_str(), VdevGUIDString().c_str(),
389
devPath.c_str());
390
return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
391
}
392
393
bool
394
CaseFile::ReEvaluate(const ZfsEvent &event)
395
{
396
bool consumed(false);
397
398
if (event.Value("type") == "sysevent.fs.zfs.vdev_remove") {
399
/*
400
* The Vdev we represent has been removed from the
401
* configuration. This case is no longer of value.
402
*/
403
Close();
404
405
return (/*consumed*/true);
406
} else if (event.Value("type") == "sysevent.fs.zfs.pool_destroy") {
407
/* This Pool has been destroyed. Discard the case */
408
Close();
409
410
return (/*consumed*/true);
411
} else if (event.Value("type") == "sysevent.fs.zfs.config_sync") {
412
RefreshVdevState();
413
if (VdevState() < VDEV_STATE_HEALTHY &&
414
VdevState() != VDEV_STATE_OFFLINE)
415
consumed = ActivateSpare();
416
}
417
418
419
if (event.Value("class") == "resource.fs.zfs.removed") {
420
bool spare_activated;
421
422
if (!RefreshVdevState()) {
423
/*
424
* The pool or vdev for this case file is no longer
425
* part of the configuration. This can happen
426
* if we process a device arrival notification
427
* before seeing the ZFS configuration change
428
* event.
429
*/
430
syslog(LOG_INFO,
431
"CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
432
"unconfigured. Closing\n",
433
PoolGUIDString().c_str(),
434
VdevGUIDString().c_str());
435
/*
436
* Close the case now so we won't waste cycles in the
437
* system rescan
438
*/
439
Close();
440
441
/*
442
* Since this event was not used to close this
443
* case, do not report it as consumed.
444
*/
445
return (/*consumed*/false);
446
}
447
448
/*
449
* Discard any tentative I/O error events for
450
* this case. They were most likely caused by the
451
* hot-unplug of this device.
452
*/
453
PurgeTentativeEvents();
454
455
/* Try to activate spares if they are available */
456
spare_activated = ActivateSpare();
457
458
/*
459
* Rescan the drives in the system to see if a recent
460
* drive arrival can be used to solve this case.
461
*/
462
ZfsDaemon::RequestSystemRescan();
463
464
/*
465
* Consume the event if we successfully activated a spare.
466
* Otherwise, leave it in the unconsumed events list so that the
467
* future addition of a spare to this pool might be able to
468
* close the case
469
*/
470
consumed = spare_activated;
471
} else if (event.Value("class") == "resource.fs.zfs.statechange") {
472
RefreshVdevState();
473
/*
474
* If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
475
* activate a hotspare. Otherwise, ignore the event
476
*/
477
if (VdevState() == VDEV_STATE_FAULTED ||
478
VdevState() == VDEV_STATE_DEGRADED ||
479
VdevState() == VDEV_STATE_CANT_OPEN)
480
(void) ActivateSpare();
481
consumed = true;
482
}
483
else if (event.Value("class") == "ereport.fs.zfs.io" ||
484
event.Value("class") == "ereport.fs.zfs.checksum" ||
485
event.Value("class") == "ereport.fs.zfs.delay") {
486
487
m_tentativeEvents.push_front(event.DeepCopy());
488
RegisterCallout(event);
489
consumed = true;
490
}
491
492
bool closed(CloseIfSolved());
493
494
return (consumed || closed);
495
}
496
497
/* Find a Vdev containing the vdev with the given GUID */
498
static nvlist_t*
499
find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid)
500
{
501
nvlist_t **vdevChildren;
502
int error;
503
unsigned ch, numChildren;
504
505
error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
506
&vdevChildren, &numChildren);
507
508
if (error != 0 || numChildren == 0)
509
return (NULL);
510
511
for (ch = 0; ch < numChildren; ch++) {
512
nvlist *result;
513
Vdev vdev(pool_config, vdevChildren[ch]);
514
515
if (vdev.GUID() == child_guid)
516
return (config);
517
518
result = find_parent(pool_config, vdevChildren[ch], child_guid);
519
if (result != NULL)
520
return (result);
521
}
522
523
return (NULL);
524
}
525
526
bool
527
CaseFile::ActivateSpare() {
528
nvlist_t *config, *nvroot, *parent_config;
529
nvlist_t **spares;
530
const char *devPath, *poolname, *vdev_type;
531
u_int nspares, i;
532
int error;
533
534
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
535
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
536
if (zhp == NULL) {
537
syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
538
"for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
539
return (false);
540
}
541
poolname = zpool_get_name(zhp);
542
config = zpool_get_config(zhp, NULL);
543
if (config == NULL) {
544
syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
545
"config for pool %s", poolname);
546
return (false);
547
}
548
error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
549
if (error != 0){
550
syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
551
"tree for pool %s", poolname);
552
return (false);
553
}
554
555
parent_config = find_parent(config, nvroot, m_vdevGUID);
556
if (parent_config != NULL) {
557
const char *parent_type;
558
559
/*
560
* Don't activate spares for members of a "replacing" vdev.
561
* They're already dealt with. Sparing them will just drag out
562
* the resilver process.
563
*/
564
error = nvlist_lookup_string(parent_config,
565
ZPOOL_CONFIG_TYPE, &parent_type);
566
if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0)
567
return (false);
568
}
569
570
nspares = 0;
571
nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
572
&nspares);
573
if (nspares == 0) {
574
/* The pool has no spares configured */
575
syslog(LOG_INFO, "CaseFile::ActivateSpare: "
576
"No spares available for pool %s", poolname);
577
return (false);
578
}
579
for (i = 0; i < nspares; i++) {
580
uint64_t *nvlist_array;
581
vdev_stat_t *vs;
582
uint_t nstats;
583
584
if (nvlist_lookup_uint64_array(spares[i],
585
ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
586
syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
587
"find vdev stats for pool %s, spare %d",
588
poolname, i);
589
return (false);
590
}
591
vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
592
593
if ((vs->vs_aux != VDEV_AUX_SPARED)
594
&& (vs->vs_state == VDEV_STATE_HEALTHY)) {
595
/* We found a usable spare */
596
break;
597
}
598
}
599
600
if (i == nspares) {
601
/* No available spares were found */
602
return (false);
603
}
604
605
error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
606
if (error != 0) {
607
syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
608
"the path of pool %s, spare %d. Error %d",
609
poolname, i, error);
610
return (false);
611
}
612
613
error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
614
if (error != 0) {
615
syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
616
"the vdev type of pool %s, spare %d. Error %d",
617
poolname, i, error);
618
return (false);
619
}
620
621
return (Replace(vdev_type, devPath, /*isspare*/true));
622
}
623
624
/* Does the argument event refer to a checksum error? */
625
static bool
626
IsChecksumEvent(const Event* const event)
627
{
628
return ("ereport.fs.zfs.checksum" == event->Value("type"));
629
}
630
631
/* Does the argument event refer to an IO error? */
632
static bool
633
IsIOEvent(const Event* const event)
634
{
635
return ("ereport.fs.zfs.io" == event->Value("type"));
636
}
637
638
/* Does the argument event refer to an IO delay? */
639
static bool
640
IsDelayEvent(const Event* const event)
641
{
642
return ("ereport.fs.zfs.delay" == event->Value("type"));
643
}
644
645
void
646
CaseFile::RegisterCallout(const Event &event)
647
{
648
timeval now, countdown, elapsed, timestamp, zero, remaining;
649
/**
650
* The time ZFSD waits before promoting a tentative event
651
* into a permanent event.
652
*/
653
int sec = -1;
654
if (IsChecksumEvent(&event))
655
sec = CaseFile::GetVdevProp(VDEV_PROP_CHECKSUM_T);
656
else if (IsIOEvent(&event))
657
sec = CaseFile::GetVdevProp(VDEV_PROP_IO_T);
658
else if (IsDelayEvent(&event))
659
sec = CaseFile::GetVdevProp(VDEV_PROP_SLOW_IO_T);
660
661
if (sec == -1)
662
sec = 60; /* default */
663
664
timeval removeGracePeriod = {
665
sec, /*sec*/
666
0 /*usec*/
667
};
668
669
gettimeofday(&now, 0);
670
timestamp = event.GetTimestamp();
671
timersub(&now, &timestamp, &elapsed);
672
timersub(&removeGracePeriod, &elapsed, &countdown);
673
/*
674
* If countdown is <= zero, Reset the timer to the
675
* smallest positive time value instead
676
*/
677
timerclear(&zero);
678
if (timercmp(&countdown, &zero, <=)) {
679
timerclear(&countdown);
680
countdown.tv_usec = 1;
681
}
682
683
remaining = m_tentativeTimer.TimeRemaining();
684
685
if (!m_tentativeTimer.IsPending()
686
|| timercmp(&countdown, &remaining, <))
687
m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
688
}
689
690
691
bool
692
CaseFile::CloseIfSolved()
693
{
694
if (m_events.empty()
695
&& m_tentativeEvents.empty()) {
696
697
/*
698
* We currently do not track or take actions on
699
* devices in the degraded or faulted state.
700
* Once we have support for spare pools, we'll
701
* retain these cases so that any spares added in
702
* the future can be applied to them.
703
*/
704
switch (VdevState()) {
705
case VDEV_STATE_HEALTHY:
706
/* No need to keep cases for healthy vdevs */
707
case VDEV_STATE_OFFLINE:
708
/*
709
* Offline is a deliberate administrative action. zfsd
710
* doesn't need to do anything for this state.
711
*/
712
Close();
713
return (true);
714
case VDEV_STATE_REMOVED:
715
case VDEV_STATE_CANT_OPEN:
716
/*
717
* Keep open. We may solve it with a newly inserted
718
* device.
719
*/
720
case VDEV_STATE_FAULTED:
721
case VDEV_STATE_DEGRADED:
722
/*
723
* Keep open. We may solve it with the future
724
* addition of a spare to the pool
725
*/
726
case VDEV_STATE_UNKNOWN:
727
case VDEV_STATE_CLOSED:
728
/*
729
* Keep open? This may not be the correct behavior,
730
* but it's what we've always done
731
*/
732
;
733
}
734
735
/*
736
* Re-serialize the case in order to remove any
737
* previous event data.
738
*/
739
Serialize();
740
}
741
742
return (false);
743
}
744
745
void
746
CaseFile::Log()
747
{
748
syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
749
VdevGUIDString().c_str(), PhysicalPath().c_str());
750
syslog(LOG_INFO, "\tVdev State = %s\n",
751
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
752
if (m_tentativeEvents.size() != 0) {
753
syslog(LOG_INFO, "\t=== Tentative Events ===\n");
754
for (EventList::iterator event(m_tentativeEvents.begin());
755
event != m_tentativeEvents.end(); event++)
756
(*event)->Log(LOG_INFO);
757
}
758
if (m_events.size() != 0) {
759
syslog(LOG_INFO, "\t=== Events ===\n");
760
for (EventList::iterator event(m_events.begin());
761
event != m_events.end(); event++)
762
(*event)->Log(LOG_INFO);
763
}
764
}
765
766
//- CaseFile Static Protected Methods ------------------------------------------
767
void
768
CaseFile::OnGracePeriodEnded(void *arg)
769
{
770
CaseFile &casefile(*static_cast<CaseFile *>(arg));
771
772
casefile.OnGracePeriodEnded();
773
}
774
775
int
776
CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
777
{
778
uint64_t poolGUID;
779
uint64_t vdevGUID;
780
781
if (dirEntry->d_type == DT_REG
782
&& sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
783
&poolGUID, &vdevGUID) == 2)
784
return (1);
785
return (0);
786
}
787
788
void
789
CaseFile::DeSerializeFile(const char *fileName)
790
{
791
string fullName(s_caseFilePath + '/' + fileName);
792
CaseFile *existingCaseFile(NULL);
793
CaseFile *caseFile(NULL);
794
795
try {
796
uint64_t poolGUID;
797
uint64_t vdevGUID;
798
nvlist_t *vdevConf;
799
800
if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
801
&poolGUID, &vdevGUID) != 2) {
802
throw ZfsdException("CaseFile::DeSerialize: "
803
"Unintelligible CaseFile filename %s.\n", fileName);
804
}
805
existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
806
if (existingCaseFile != NULL) {
807
/*
808
* If the vdev is already degraded or faulted,
809
* there's no point in keeping the state around
810
* that we use to put a drive into the degraded
811
* state. However, if the vdev is simply missing,
812
* preserve the case data in the hopes that it will
813
* return.
814
*/
815
caseFile = existingCaseFile;
816
vdev_state curState(caseFile->VdevState());
817
if (curState > VDEV_STATE_CANT_OPEN
818
&& curState < VDEV_STATE_HEALTHY) {
819
unlink(fileName);
820
return;
821
}
822
} else {
823
ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
824
if (zpl.empty()
825
|| (vdevConf = VdevIterator(zpl.front())
826
.Find(vdevGUID)) == NULL) {
827
/*
828
* Either the pool no longer exists
829
* or this vdev is no longer a member of
830
* the pool.
831
*/
832
unlink(fullName.c_str());
833
return;
834
}
835
836
/*
837
* Any vdev we find that does not have a case file
838
* must be in the healthy state and thus worthy of
839
* continued SERD data tracking.
840
*/
841
caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
842
}
843
844
ifstream caseStream(fullName.c_str());
845
if (!caseStream)
846
throw ZfsdException("CaseFile::DeSerialize: Unable to "
847
"read %s.\n", fileName);
848
849
caseFile->DeSerialize(caseStream);
850
} catch (const ParseException &exp) {
851
852
exp.Log();
853
if (caseFile != existingCaseFile)
854
delete caseFile;
855
856
/*
857
* Since we can't parse the file, unlink it so we don't
858
* trip over it again.
859
*/
860
unlink(fileName);
861
} catch (const ZfsdException &zfsException) {
862
863
zfsException.Log();
864
if (caseFile != existingCaseFile)
865
delete caseFile;
866
}
867
}
868
869
//- CaseFile Protected Methods -------------------------------------------------
870
CaseFile::CaseFile(const Vdev &vdev)
871
: m_poolGUID(vdev.PoolGUID()),
872
m_vdevGUID(vdev.GUID()),
873
m_vdevState(vdev.State()),
874
m_vdevPhysPath(vdev.PhysicalPath()),
875
m_is_spare(vdev.IsSpare())
876
{
877
stringstream guidString;
878
879
guidString << m_vdevGUID;
880
m_vdevGUIDString = guidString.str();
881
guidString.str("");
882
guidString << m_poolGUID;
883
m_poolGUIDString = guidString.str();
884
885
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
886
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
887
m_vdevName = vdev.Name(zhp, false);
888
889
s_activeCases.push_back(this);
890
891
syslog(LOG_INFO, "Creating new CaseFile:\n");
892
Log();
893
}
894
895
CaseFile::~CaseFile()
896
{
897
PurgeEvents();
898
PurgeTentativeEvents();
899
m_tentativeTimer.Stop();
900
s_activeCases.remove(this);
901
}
902
903
void
904
CaseFile::PurgeEvents()
905
{
906
for (EventList::iterator event(m_events.begin());
907
event != m_events.end(); event++)
908
delete *event;
909
910
m_events.clear();
911
}
912
913
void
914
CaseFile::PurgeTentativeEvents()
915
{
916
for (EventList::iterator event(m_tentativeEvents.begin());
917
event != m_tentativeEvents.end(); event++)
918
delete *event;
919
920
m_tentativeEvents.clear();
921
}
922
923
void
924
CaseFile::SerializeEvList(const EventList events, int fd,
925
const char* prefix) const
926
{
927
if (events.empty())
928
return;
929
for (EventList::const_iterator curEvent = events.begin();
930
curEvent != events.end(); curEvent++) {
931
const string &eventString((*curEvent)->GetEventString());
932
933
// TODO: replace many write(2) calls with a single writev(2)
934
if (prefix)
935
write(fd, prefix, strlen(prefix));
936
write(fd, eventString.c_str(), eventString.length());
937
}
938
}
939
940
void
941
CaseFile::Serialize()
942
{
943
stringstream saveFile;
944
945
saveFile << setfill('0')
946
<< s_caseFilePath << "/"
947
<< "pool_" << PoolGUIDString()
948
<< "_vdev_" << VdevGUIDString()
949
<< ".case";
950
951
if (m_events.empty() && m_tentativeEvents.empty()) {
952
unlink(saveFile.str().c_str());
953
return;
954
}
955
956
int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
957
if (fd == -1) {
958
syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
959
saveFile.str().c_str());
960
return;
961
}
962
SerializeEvList(m_events, fd);
963
SerializeEvList(m_tentativeEvents, fd, "tentative ");
964
close(fd);
965
}
966
967
/*
968
* XXX: This method assumes that events may not contain embedded newlines. If
969
* ever events can contain embedded newlines, then CaseFile must switch
970
* serialization formats
971
*/
972
void
973
CaseFile::DeSerialize(ifstream &caseStream)
974
{
975
string evString;
976
const EventFactory &factory(ZfsDaemon::Get().GetFactory());
977
978
caseStream >> std::noskipws >> std::ws;
979
while (caseStream.good()) {
980
/*
981
* Outline:
982
* read the beginning of a line and check it for
983
* "tentative". If found, discard "tentative".
984
* Create a new event
985
* continue
986
*/
987
EventList* destEvents;
988
const string tentFlag("tentative ");
989
string line;
990
std::stringbuf lineBuf;
991
992
caseStream.get(lineBuf);
993
caseStream.ignore(); /*discard the newline character*/
994
line = lineBuf.str();
995
if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
996
/* Discard "tentative" */
997
line.erase(0, tentFlag.size());
998
destEvents = &m_tentativeEvents;
999
} else {
1000
destEvents = &m_events;
1001
}
1002
Event *event(Event::CreateEvent(factory, line));
1003
if (event != NULL) {
1004
destEvents->push_back(event);
1005
RegisterCallout(*event);
1006
}
1007
}
1008
}
1009
1010
void
1011
CaseFile::Close()
1012
{
1013
/*
1014
* This case is no longer relevant. Clean up our
1015
* serialization file, and delete the case.
1016
*/
1017
syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
1018
PoolGUIDString().c_str(), VdevGUIDString().c_str(),
1019
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
1020
1021
/*
1022
* Serialization of a Case with no event data, clears the
1023
* Serialization data for that event.
1024
*/
1025
PurgeEvents();
1026
Serialize();
1027
1028
delete this;
1029
}
1030
1031
void
1032
CaseFile::OnGracePeriodEnded()
1033
{
1034
bool should_fault, should_degrade;
1035
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1036
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1037
1038
m_events.splice(m_events.begin(), m_tentativeEvents);
1039
should_fault = ShouldFault();
1040
should_degrade = ShouldDegrade();
1041
1042
if (should_fault || should_degrade) {
1043
if (zhp == NULL
1044
|| (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
1045
/*
1046
* Either the pool no longer exists
1047
* or this vdev is no longer a member of
1048
* the pool.
1049
*/
1050
Close();
1051
return;
1052
}
1053
1054
}
1055
1056
/* A fault condition has priority over a degrade condition */
1057
if (ShouldFault()) {
1058
/* Fault the vdev and close the case. */
1059
if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
1060
VDEV_AUX_ERR_EXCEEDED) == 0) {
1061
syslog(LOG_INFO, "Faulting vdev(%s/%s)",
1062
PoolGUIDString().c_str(),
1063
VdevGUIDString().c_str());
1064
Close();
1065
return;
1066
}
1067
else {
1068
syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
1069
PoolGUIDString().c_str(),
1070
VdevGUIDString().c_str(),
1071
libzfs_error_action(g_zfsHandle),
1072
libzfs_error_description(g_zfsHandle));
1073
}
1074
}
1075
else if (ShouldDegrade()) {
1076
/* Degrade the vdev and close the case. */
1077
if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
1078
VDEV_AUX_ERR_EXCEEDED) == 0) {
1079
syslog(LOG_INFO, "Degrading vdev(%s/%s)",
1080
PoolGUIDString().c_str(),
1081
VdevGUIDString().c_str());
1082
Close();
1083
return;
1084
}
1085
else {
1086
syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
1087
PoolGUIDString().c_str(),
1088
VdevGUIDString().c_str(),
1089
libzfs_error_action(g_zfsHandle),
1090
libzfs_error_description(g_zfsHandle));
1091
}
1092
}
1093
Serialize();
1094
}
1095
1096
Vdev
1097
CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
1098
Vdev vd(zhp, CaseVdev(zhp));
1099
std::list<Vdev> children;
1100
std::list<Vdev>::iterator children_it;
1101
1102
Vdev parent(vd.Parent());
1103
Vdev replacing(NonexistentVdev);
1104
1105
/*
1106
* To determine whether we are being replaced by another spare that
1107
* is still working, then make sure that it is currently spared and
1108
* that the spare is either resilvering or healthy. If any of these
1109
* conditions fail, then we are not being replaced by a spare.
1110
*
1111
* If the spare is healthy, then the case file should be closed very
1112
* soon after this check.
1113
*/
1114
if (parent.DoesNotExist()
1115
|| parent.Name(zhp, /*verbose*/false) != "spare")
1116
return (NonexistentVdev);
1117
1118
children = parent.Children();
1119
children_it = children.begin();
1120
for (;children_it != children.end(); children_it++) {
1121
Vdev child = *children_it;
1122
1123
/* Skip our vdev. */
1124
if (child.GUID() == VdevGUID())
1125
continue;
1126
/*
1127
* Accept the first child that doesn't match our GUID, or
1128
* any resilvering/healthy device if one exists.
1129
*/
1130
if (replacing.DoesNotExist() || child.IsResilvering()
1131
|| child.State() == VDEV_STATE_HEALTHY)
1132
replacing = child;
1133
}
1134
1135
return (replacing);
1136
}
1137
1138
bool
1139
CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
1140
nvlist_t *nvroot, *newvd;
1141
const char *poolname;
1142
string oldstr(VdevGUIDString());
1143
bool retval = true;
1144
1145
/* Figure out what pool we're working on */
1146
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1147
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1148
if (zhp == NULL) {
1149
syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1150
"pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1151
return (false);
1152
}
1153
poolname = zpool_get_name(zhp);
1154
Vdev vd(zhp, CaseVdev(zhp));
1155
Vdev replaced(BeingReplacedBy(zhp));
1156
1157
if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1158
/* If we are already being replaced by a working spare, pass. */
1159
if (replaced.IsResilvering()
1160
|| replaced.State() == VDEV_STATE_HEALTHY) {
1161
syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1162
"replaced", VdevGUIDString().c_str(), path);
1163
return (/*consumed*/false);
1164
}
1165
/*
1166
* If we have already been replaced by a spare, but that spare
1167
* is broken, we must spare the spare, not the original device.
1168
*/
1169
oldstr = replaced.GUIDString();
1170
syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1171
"broken spare %s instead", VdevGUIDString().c_str(),
1172
path, oldstr.c_str());
1173
}
1174
1175
/*
1176
* Build a root vdev/leaf vdev configuration suitable for
1177
* zpool_vdev_attach. Only enough data for the kernel to find
1178
* the device (i.e. type and disk device node path) are needed.
1179
*/
1180
nvroot = NULL;
1181
newvd = NULL;
1182
1183
if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1184
|| nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1185
syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1186
"configuration data.", poolname, oldstr.c_str());
1187
if (nvroot != NULL)
1188
nvlist_free(nvroot);
1189
return (false);
1190
}
1191
if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1192
|| nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1193
|| nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1194
|| nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1195
&newvd, 1) != 0) {
1196
syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1197
"configuration data.", poolname, oldstr.c_str());
1198
nvlist_free(newvd);
1199
nvlist_free(nvroot);
1200
return (true);
1201
}
1202
1203
/* Data was copied when added to the root vdev. */
1204
nvlist_free(newvd);
1205
1206
retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1207
/*replace*/B_TRUE, /*rebuild*/ B_FALSE) == 0);
1208
if (retval)
1209
syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1210
poolname, oldstr.c_str(), path);
1211
else
1212
syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1213
poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1214
libzfs_error_description(g_zfsHandle));
1215
nvlist_free(nvroot);
1216
1217
return (retval);
1218
}
1219
1220
/* Lookup the vdev prop. Used for checksum, IO, or slow IO props */
1221
int
1222
CaseFile::GetVdevProp(vdev_prop_t vdev_prop) const
1223
{
1224
char val[ZFS_MAXPROPLEN];
1225
zprop_source_t srctype;
1226
DevdCtl::Guid poolGUID = PoolGUID();
1227
ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
1228
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1229
1230
char *prop_str = (char *) vdev_prop_to_name(vdev_prop);
1231
if (zhp == NULL || zpool_get_vdev_prop(zhp, m_vdevName.c_str(),
1232
vdev_prop, prop_str, val, sizeof (val), &srctype, B_FALSE) != 0)
1233
return (-1);
1234
1235
/* we'll get "-" from libzfs for a prop that is not set */
1236
if (zfs_isnumber(val) == B_FALSE)
1237
return (-1);
1238
1239
return (atoi(val));
1240
}
1241
1242
bool
1243
CaseFile::ShouldDegrade() const
1244
{
1245
int checksum_n = GetVdevProp(VDEV_PROP_CHECKSUM_N);
1246
if (checksum_n == -1)
1247
checksum_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
1248
return (std::count_if(m_events.begin(), m_events.end(),
1249
IsChecksumEvent) > checksum_n);
1250
}
1251
1252
bool
1253
CaseFile::ShouldFault() const
1254
{
1255
bool should_fault_for_io, should_fault_for_delay;
1256
int io_n = GetVdevProp(VDEV_PROP_IO_N);
1257
int slow_io_n = GetVdevProp(VDEV_PROP_SLOW_IO_N);
1258
1259
if (io_n == -1)
1260
io_n = DEFAULT_ZFS_DEGRADE_IO_COUNT;
1261
if (slow_io_n == -1)
1262
slow_io_n = DEFAULT_ZFS_FAULT_SLOW_IO_COUNT;
1263
1264
should_fault_for_io = std::count_if(m_events.begin(), m_events.end(),
1265
IsIOEvent) > io_n;
1266
should_fault_for_delay = std::count_if(m_events.begin(), m_events.end(),
1267
IsDelayEvent) > slow_io_n;
1268
1269
return (should_fault_for_io || should_fault_for_delay);
1270
}
1271
1272
nvlist_t *
1273
CaseFile::CaseVdev(zpool_handle_t *zhp) const
1274
{
1275
return (VdevIterator(zhp).Find(VdevGUID()));
1276
}
1277
1278