Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
48529 views
1
// SPDX-License-Identifier: CDDL-1.0
2
/*
3
* CDDL HEADER START
4
*
5
* The contents of this file are subject to the terms of the
6
* Common Development and Distribution License Version 1.0 (CDDL-1.0).
7
* You can obtain a copy of the license from the top-level file
8
* "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
9
* You may not use this file except in compliance with the license.
10
*
11
* CDDL HEADER END
12
*/
13
14
/*
15
* Copyright (c) 2016, Intel Corporation.
16
* Copyright (c) 2018, loli10K <[email protected]>
17
* Copyright (c) 2021 Hewlett Packard Enterprise Development LP
18
*/
19
20
#include <libnvpair.h>
21
#include <libzfs.h>
22
#include <stddef.h>
23
#include <stdlib.h>
24
#include <string.h>
25
#include <sys/list.h>
26
#include <sys/time.h>
27
#include <sys/sysevent/eventdefs.h>
28
#include <sys/sysevent/dev.h>
29
#include <sys/fm/protocol.h>
30
#include <sys/fm/fs/zfs.h>
31
#include <pthread.h>
32
#include <unistd.h>
33
34
#include "zfs_agents.h"
35
#include "fmd_api.h"
36
#include "../zed_log.h"
37
38
/*
39
* agent dispatch code
40
*/
41
42
static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER;
43
static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER;
44
static list_t agent_events; /* list of pending events */
45
static int agent_exiting;
46
47
typedef struct agent_event {
48
char ae_class[64];
49
char ae_subclass[32];
50
nvlist_t *ae_nvl;
51
list_node_t ae_node;
52
} agent_event_t;
53
54
pthread_t g_agents_tid;
55
56
libzfs_handle_t *g_zfs_hdl;
57
58
/* guid search data */
59
typedef enum device_type {
60
DEVICE_TYPE_L2ARC, /* l2arc device */
61
DEVICE_TYPE_SPARE, /* spare device */
62
DEVICE_TYPE_PRIMARY /* any primary pool storage device */
63
} device_type_t;
64
65
typedef struct guid_search {
66
uint64_t gs_pool_guid;
67
uint64_t gs_vdev_guid;
68
const char *gs_devid;
69
device_type_t gs_vdev_type;
70
uint64_t gs_vdev_expandtime; /* vdev expansion time */
71
} guid_search_t;
72
73
/*
74
* Walks the vdev tree recursively looking for a matching devid.
75
* Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
76
*/
77
static boolean_t
78
zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
79
{
80
guid_search_t *gsp = arg;
81
const char *path = NULL;
82
uint_t c, children;
83
nvlist_t **child;
84
uint64_t vdev_guid;
85
86
/*
87
* First iterate over any children.
88
*/
89
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
90
&child, &children) == 0) {
91
for (c = 0; c < children; c++) {
92
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
93
gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
94
return (B_TRUE);
95
}
96
}
97
}
98
/*
99
* Iterate over any spares and cache devices
100
*/
101
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
102
&child, &children) == 0) {
103
for (c = 0; c < children; c++) {
104
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
105
gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
106
return (B_TRUE);
107
}
108
}
109
}
110
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
111
&child, &children) == 0) {
112
for (c = 0; c < children; c++) {
113
if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
114
gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
115
return (B_TRUE);
116
}
117
}
118
}
119
/*
120
* On a devid match, grab the vdev guid and expansion time, if any.
121
*/
122
if (gsp->gs_devid != NULL &&
123
(nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
124
(strcmp(gsp->gs_devid, path) == 0)) {
125
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
126
&gsp->gs_vdev_guid);
127
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
128
&gsp->gs_vdev_expandtime);
129
return (B_TRUE);
130
}
131
/*
132
* Otherwise, on a vdev guid match, grab the devid and expansion
133
* time. The devid might be missing on removal since its not part
134
* of blkid cache and L2ARC VDEV does not contain pool guid in its
135
* blkid, so this is a special case for L2ARC VDEV.
136
*/
137
else if (gsp->gs_vdev_guid != 0 &&
138
nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 &&
139
gsp->gs_vdev_guid == vdev_guid) {
140
if (gsp->gs_devid == NULL) {
141
(void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,
142
&gsp->gs_devid);
143
}
144
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
145
&gsp->gs_vdev_expandtime);
146
return (B_TRUE);
147
}
148
149
return (B_FALSE);
150
}
151
152
static int
153
zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
154
{
155
guid_search_t *gsp = arg;
156
nvlist_t *config, *nvl;
157
158
/*
159
* For each vdev in this pool, look for a match by devid
160
*/
161
boolean_t found = B_FALSE;
162
uint64_t pool_guid;
163
164
/* Get pool configuration and extract pool GUID */
165
if ((config = zpool_get_config(zhp, NULL)) == NULL ||
166
nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
167
&pool_guid) != 0)
168
goto out;
169
170
/* Skip this pool if we're looking for a specific pool */
171
if (gsp->gs_pool_guid != 0 && pool_guid != gsp->gs_pool_guid)
172
goto out;
173
174
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) == 0)
175
found = zfs_agent_iter_vdev(zhp, nvl, gsp);
176
177
if (found && gsp->gs_pool_guid == 0)
178
gsp->gs_pool_guid = pool_guid;
179
180
out:
181
zpool_close(zhp);
182
return (found);
183
}
184
185
void
186
zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
187
{
188
agent_event_t *event;
189
190
if (subclass == NULL)
191
subclass = "";
192
193
event = malloc(sizeof (agent_event_t));
194
if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
195
if (event)
196
free(event);
197
return;
198
}
199
200
if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
201
class = EC_ZFS;
202
subclass = ESC_ZFS_VDEV_CHECK;
203
}
204
205
/*
206
* On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport
207
* from the vdev_disk layer after a hot unplug. Fortunately we do
208
* get an EC_DEV_REMOVE from our disk monitor and it is a suitable
209
* proxy so we remap it here for the benefit of the diagnosis engine.
210
* Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa
211
* layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful.
212
*/
213
if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
214
(strcmp(subclass, ESC_DISK) == 0) &&
215
(nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
216
nvlist_exists(nvl, DEV_IDENTIFIER))) {
217
nvlist_t *payload = event->ae_nvl;
218
struct timeval tv;
219
int64_t tod[2];
220
uint64_t pool_guid = 0, vdev_guid = 0;
221
guid_search_t search = { 0 };
222
device_type_t devtype = DEVICE_TYPE_PRIMARY;
223
const char *devid = NULL;
224
225
class = "resource.fs.zfs.removed";
226
subclass = "";
227
228
(void) nvlist_add_string(payload, FM_CLASS, class);
229
(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid);
230
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
231
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
232
233
(void) gettimeofday(&tv, NULL);
234
tod[0] = tv.tv_sec;
235
tod[1] = tv.tv_usec;
236
(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
237
238
/*
239
* If devid is missing but vdev_guid is available, find devid
240
* and pool_guid from vdev_guid.
241
* For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
242
* ZFS_EV_POOL_GUID may be missing so find them.
243
*/
244
search.gs_devid = devid;
245
search.gs_vdev_guid = vdev_guid;
246
search.gs_pool_guid = pool_guid;
247
zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
248
if (devid == NULL)
249
devid = search.gs_devid;
250
if (pool_guid == 0)
251
pool_guid = search.gs_pool_guid;
252
if (vdev_guid == 0)
253
vdev_guid = search.gs_vdev_guid;
254
devtype = search.gs_vdev_type;
255
256
/*
257
* We want to avoid reporting "remove" events coming from
258
* libudev for VDEVs which were expanded recently (10s) and
259
* avoid activating spares in response to partitions being
260
* deleted and created in rapid succession.
261
*/
262
if (search.gs_vdev_expandtime != 0 &&
263
search.gs_vdev_expandtime + 10 > tv.tv_sec) {
264
zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
265
"for recently expanded device '%s'", EC_DEV_REMOVE,
266
devid);
267
fnvlist_free(payload);
268
free(event);
269
goto out;
270
}
271
272
(void) nvlist_add_uint64(payload,
273
FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
274
(void) nvlist_add_uint64(payload,
275
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
276
switch (devtype) {
277
case DEVICE_TYPE_L2ARC:
278
(void) nvlist_add_string(payload,
279
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
280
VDEV_TYPE_L2CACHE);
281
break;
282
case DEVICE_TYPE_SPARE:
283
(void) nvlist_add_string(payload,
284
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
285
break;
286
case DEVICE_TYPE_PRIMARY:
287
(void) nvlist_add_string(payload,
288
FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
289
break;
290
}
291
292
zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
293
EC_DEV_REMOVE, class);
294
}
295
296
(void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
297
(void) strlcpy(event->ae_subclass, subclass,
298
sizeof (event->ae_subclass));
299
300
(void) pthread_mutex_lock(&agent_lock);
301
list_insert_tail(&agent_events, event);
302
(void) pthread_mutex_unlock(&agent_lock);
303
304
out:
305
(void) pthread_cond_signal(&agent_cond);
306
}
307
308
static void
309
zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
310
{
311
/*
312
* The diagnosis engine subscribes to the following events.
313
* On illumos these subscriptions reside in:
314
* /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
315
*/
316
if (strstr(class, "ereport.fs.zfs.") != NULL ||
317
strstr(class, "resource.fs.zfs.") != NULL ||
318
strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
319
strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
320
strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
321
fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
322
}
323
324
/*
325
* The retire agent subscribes to the following events.
326
* On illumos these subscriptions reside in:
327
* /usr/lib/fm/fmd/plugins/zfs-retire.conf
328
*
329
* NOTE: faults events come directly from our diagnosis engine
330
* and will not pass through the zfs kernel module.
331
*/
332
if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
333
strcmp(class, "resource.fs.zfs.removed") == 0 ||
334
strcmp(class, "resource.fs.zfs.statechange") == 0 ||
335
strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) {
336
fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
337
}
338
339
/*
340
* The SLM module only consumes disk events and vdev check events
341
*
342
* NOTE: disk events come directly from disk monitor and will
343
* not pass through the zfs kernel module.
344
*/
345
if (strstr(class, "EC_dev_") != NULL ||
346
strcmp(class, EC_ZFS) == 0) {
347
(void) zfs_slm_event(class, subclass, nvl);
348
}
349
}
350
351
/*
352
* Events are consumed and dispatched from this thread
353
* An agent can also post an event so event list lock
354
* is not held when calling an agent.
355
* One event is consumed at a time.
356
*/
357
static void *
358
zfs_agent_consumer_thread(void *arg)
359
{
360
(void) arg;
361
362
for (;;) {
363
agent_event_t *event;
364
365
(void) pthread_mutex_lock(&agent_lock);
366
367
/* wait for an event to show up */
368
while (!agent_exiting && list_is_empty(&agent_events))
369
(void) pthread_cond_wait(&agent_cond, &agent_lock);
370
371
if (agent_exiting) {
372
(void) pthread_mutex_unlock(&agent_lock);
373
zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
374
"exiting");
375
return (NULL);
376
}
377
378
if ((event = list_remove_head(&agent_events)) != NULL) {
379
(void) pthread_mutex_unlock(&agent_lock);
380
381
/* dispatch to all event subscribers */
382
zfs_agent_dispatch(event->ae_class, event->ae_subclass,
383
event->ae_nvl);
384
385
nvlist_free(event->ae_nvl);
386
free(event);
387
continue;
388
}
389
390
(void) pthread_mutex_unlock(&agent_lock);
391
}
392
393
return (NULL);
394
}
395
396
void
397
zfs_agent_init(libzfs_handle_t *zfs_hdl)
398
{
399
fmd_hdl_t *hdl;
400
401
g_zfs_hdl = zfs_hdl;
402
403
if (zfs_slm_init() != 0)
404
zed_log_die("Failed to initialize zfs slm");
405
zed_log_msg(LOG_INFO, "Add Agent: init");
406
407
hdl = fmd_module_hdl("zfs-diagnosis");
408
_zfs_diagnosis_init(hdl);
409
if (!fmd_module_initialized(hdl))
410
zed_log_die("Failed to initialize zfs diagnosis");
411
412
hdl = fmd_module_hdl("zfs-retire");
413
_zfs_retire_init(hdl);
414
if (!fmd_module_initialized(hdl))
415
zed_log_die("Failed to initialize zfs retire");
416
417
list_create(&agent_events, sizeof (agent_event_t),
418
offsetof(struct agent_event, ae_node));
419
420
if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
421
NULL) != 0) {
422
list_destroy(&agent_events);
423
zed_log_die("Failed to initialize agents");
424
}
425
pthread_setname_np(g_agents_tid, "agents");
426
}
427
428
void
429
zfs_agent_fini(void)
430
{
431
fmd_hdl_t *hdl;
432
agent_event_t *event;
433
434
agent_exiting = 1;
435
(void) pthread_cond_signal(&agent_cond);
436
437
/* wait for zfs_enum_pools thread to complete */
438
(void) pthread_join(g_agents_tid, NULL);
439
440
/* drain any pending events */
441
while ((event = list_remove_head(&agent_events)) != NULL) {
442
nvlist_free(event->ae_nvl);
443
free(event);
444
}
445
446
list_destroy(&agent_events);
447
448
if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
449
_zfs_retire_fini(hdl);
450
fmd_hdl_unregister(hdl);
451
}
452
if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
453
_zfs_diagnosis_fini(hdl);
454
fmd_hdl_unregister(hdl);
455
}
456
457
zed_log_msg(LOG_INFO, "Add Agent: fini");
458
zfs_slm_fini();
459
460
g_zfs_hdl = NULL;
461
}
462
463