Path: blob/main/sys/contrib/openzfs/cmd/zed/agents/zfs_agents.c
48529 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License Version 1.0 (CDDL-1.0).6* You can obtain a copy of the license from the top-level file7* "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.8* You may not use this file except in compliance with the license.9*10* CDDL HEADER END11*/1213/*14* Copyright (c) 2016, Intel Corporation.15* Copyright (c) 2018, loli10K <[email protected]>16* Copyright (c) 2021 Hewlett Packard Enterprise Development LP17*/1819#include <libnvpair.h>20#include <libzfs.h>21#include <stddef.h>22#include <stdlib.h>23#include <string.h>24#include <sys/list.h>25#include <sys/time.h>26#include <sys/sysevent/eventdefs.h>27#include <sys/sysevent/dev.h>28#include <sys/fm/protocol.h>29#include <sys/fm/fs/zfs.h>30#include <pthread.h>31#include <unistd.h>3233#include "zfs_agents.h"34#include "fmd_api.h"35#include "../zed_log.h"3637/*38* agent dispatch code39*/4041static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER;42static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER;43static list_t agent_events; /* list of pending events */44static int agent_exiting;4546typedef struct agent_event {47char ae_class[64];48char ae_subclass[32];49nvlist_t *ae_nvl;50list_node_t ae_node;51} agent_event_t;5253pthread_t g_agents_tid;5455libzfs_handle_t *g_zfs_hdl;5657/* guid search data */58typedef enum device_type {59DEVICE_TYPE_L2ARC, /* l2arc device */60DEVICE_TYPE_SPARE, /* spare device */61DEVICE_TYPE_PRIMARY /* any primary pool storage device */62} device_type_t;6364typedef struct guid_search {65uint64_t gs_pool_guid;66uint64_t gs_vdev_guid;67const char *gs_devid;68device_type_t gs_vdev_type;69uint64_t gs_vdev_expandtime; /* vdev expansion time */70} guid_search_t;7172/*73* Walks the vdev tree recursively looking for a matching devid.74* Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.75*/76static boolean_t77zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)78{79guid_search_t *gsp = arg;80const char *path = NULL;81uint_t c, children;82nvlist_t **child;83uint64_t vdev_guid;8485/*86* First iterate over any children.87*/88if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,89&child, &children) == 0) {90for (c = 0; c < children; c++) {91if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {92gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;93return (B_TRUE);94}95}96}97/*98* Iterate over any spares and cache devices99*/100if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,101&child, &children) == 0) {102for (c = 0; c < children; c++) {103if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {104gsp->gs_vdev_type = DEVICE_TYPE_SPARE;105return (B_TRUE);106}107}108}109if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,110&child, &children) == 0) {111for (c = 0; c < children; c++) {112if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {113gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;114return (B_TRUE);115}116}117}118/*119* On a devid match, grab the vdev guid and expansion time, if any.120*/121if (gsp->gs_devid != NULL &&122(nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&123(strcmp(gsp->gs_devid, path) == 0)) {124(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,125&gsp->gs_vdev_guid);126(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,127&gsp->gs_vdev_expandtime);128return (B_TRUE);129}130/*131* Otherwise, on a vdev guid match, grab the devid and expansion132* time. The devid might be missing on removal since its not part133* of blkid cache and L2ARC VDEV does not contain pool guid in its134* blkid, so this is a special case for L2ARC VDEV.135*/136else if (gsp->gs_vdev_guid != 0 &&137nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 &&138gsp->gs_vdev_guid == vdev_guid) {139if (gsp->gs_devid == NULL) {140(void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,141&gsp->gs_devid);142}143(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,144&gsp->gs_vdev_expandtime);145return (B_TRUE);146}147148return (B_FALSE);149}150151static int152zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)153{154guid_search_t *gsp = arg;155nvlist_t *config, *nvl;156157/*158* For each vdev in this pool, look for a match by devid159*/160boolean_t found = B_FALSE;161uint64_t pool_guid;162163/* Get pool configuration and extract pool GUID */164if ((config = zpool_get_config(zhp, NULL)) == NULL ||165nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,166&pool_guid) != 0)167goto out;168169/* Skip this pool if we're looking for a specific pool */170if (gsp->gs_pool_guid != 0 && pool_guid != gsp->gs_pool_guid)171goto out;172173if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) == 0)174found = zfs_agent_iter_vdev(zhp, nvl, gsp);175176if (found && gsp->gs_pool_guid == 0)177gsp->gs_pool_guid = pool_guid;178179out:180zpool_close(zhp);181return (found);182}183184void185zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)186{187agent_event_t *event;188189if (subclass == NULL)190subclass = "";191192event = malloc(sizeof (agent_event_t));193if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {194if (event)195free(event);196return;197}198199if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {200class = EC_ZFS;201subclass = ESC_ZFS_VDEV_CHECK;202}203204/*205* On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport206* from the vdev_disk layer after a hot unplug. Fortunately we do207* get an EC_DEV_REMOVE from our disk monitor and it is a suitable208* proxy so we remap it here for the benefit of the diagnosis engine.209* Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa210* layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful.211*/212if ((strcmp(class, EC_DEV_REMOVE) == 0) &&213(strcmp(subclass, ESC_DISK) == 0) &&214(nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||215nvlist_exists(nvl, DEV_IDENTIFIER))) {216nvlist_t *payload = event->ae_nvl;217struct timeval tv;218int64_t tod[2];219uint64_t pool_guid = 0, vdev_guid = 0;220guid_search_t search = { 0 };221device_type_t devtype = DEVICE_TYPE_PRIMARY;222const char *devid = NULL;223224class = "resource.fs.zfs.removed";225subclass = "";226227(void) nvlist_add_string(payload, FM_CLASS, class);228(void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid);229(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);230(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);231232(void) gettimeofday(&tv, NULL);233tod[0] = tv.tv_sec;234tod[1] = tv.tv_usec;235(void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);236237/*238* If devid is missing but vdev_guid is available, find devid239* and pool_guid from vdev_guid.240* For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or241* ZFS_EV_POOL_GUID may be missing so find them.242*/243search.gs_devid = devid;244search.gs_vdev_guid = vdev_guid;245search.gs_pool_guid = pool_guid;246zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);247if (devid == NULL)248devid = search.gs_devid;249if (pool_guid == 0)250pool_guid = search.gs_pool_guid;251if (vdev_guid == 0)252vdev_guid = search.gs_vdev_guid;253devtype = search.gs_vdev_type;254255/*256* We want to avoid reporting "remove" events coming from257* libudev for VDEVs which were expanded recently (10s) and258* avoid activating spares in response to partitions being259* deleted and created in rapid succession.260*/261if (search.gs_vdev_expandtime != 0 &&262search.gs_vdev_expandtime + 10 > tv.tv_sec) {263zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "264"for recently expanded device '%s'", EC_DEV_REMOVE,265devid);266fnvlist_free(payload);267free(event);268goto out;269}270271(void) nvlist_add_uint64(payload,272FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);273(void) nvlist_add_uint64(payload,274FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);275switch (devtype) {276case DEVICE_TYPE_L2ARC:277(void) nvlist_add_string(payload,278FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,279VDEV_TYPE_L2CACHE);280break;281case DEVICE_TYPE_SPARE:282(void) nvlist_add_string(payload,283FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);284break;285case DEVICE_TYPE_PRIMARY:286(void) nvlist_add_string(payload,287FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);288break;289}290291zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",292EC_DEV_REMOVE, class);293}294295(void) strlcpy(event->ae_class, class, sizeof (event->ae_class));296(void) strlcpy(event->ae_subclass, subclass,297sizeof (event->ae_subclass));298299(void) pthread_mutex_lock(&agent_lock);300list_insert_tail(&agent_events, event);301(void) pthread_mutex_unlock(&agent_lock);302303out:304(void) pthread_cond_signal(&agent_cond);305}306307static void308zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)309{310/*311* The diagnosis engine subscribes to the following events.312* On illumos these subscriptions reside in:313* /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf314*/315if (strstr(class, "ereport.fs.zfs.") != NULL ||316strstr(class, "resource.fs.zfs.") != NULL ||317strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||318strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||319strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {320fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);321}322323/*324* The retire agent subscribes to the following events.325* On illumos these subscriptions reside in:326* /usr/lib/fm/fmd/plugins/zfs-retire.conf327*328* NOTE: faults events come directly from our diagnosis engine329* and will not pass through the zfs kernel module.330*/331if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||332strcmp(class, "resource.fs.zfs.removed") == 0 ||333strcmp(class, "resource.fs.zfs.statechange") == 0 ||334strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) {335fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);336}337338/*339* The SLM module only consumes disk events and vdev check events340*341* NOTE: disk events come directly from disk monitor and will342* not pass through the zfs kernel module.343*/344if (strstr(class, "EC_dev_") != NULL ||345strcmp(class, EC_ZFS) == 0) {346(void) zfs_slm_event(class, subclass, nvl);347}348}349350/*351* Events are consumed and dispatched from this thread352* An agent can also post an event so event list lock353* is not held when calling an agent.354* One event is consumed at a time.355*/356static void *357zfs_agent_consumer_thread(void *arg)358{359(void) arg;360361for (;;) {362agent_event_t *event;363364(void) pthread_mutex_lock(&agent_lock);365366/* wait for an event to show up */367while (!agent_exiting && list_is_empty(&agent_events))368(void) pthread_cond_wait(&agent_cond, &agent_lock);369370if (agent_exiting) {371(void) pthread_mutex_unlock(&agent_lock);372zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "373"exiting");374return (NULL);375}376377if ((event = list_remove_head(&agent_events)) != NULL) {378(void) pthread_mutex_unlock(&agent_lock);379380/* dispatch to all event subscribers */381zfs_agent_dispatch(event->ae_class, event->ae_subclass,382event->ae_nvl);383384nvlist_free(event->ae_nvl);385free(event);386continue;387}388389(void) pthread_mutex_unlock(&agent_lock);390}391392return (NULL);393}394395void396zfs_agent_init(libzfs_handle_t *zfs_hdl)397{398fmd_hdl_t *hdl;399400g_zfs_hdl = zfs_hdl;401402if (zfs_slm_init() != 0)403zed_log_die("Failed to initialize zfs slm");404zed_log_msg(LOG_INFO, "Add Agent: init");405406hdl = fmd_module_hdl("zfs-diagnosis");407_zfs_diagnosis_init(hdl);408if (!fmd_module_initialized(hdl))409zed_log_die("Failed to initialize zfs diagnosis");410411hdl = fmd_module_hdl("zfs-retire");412_zfs_retire_init(hdl);413if (!fmd_module_initialized(hdl))414zed_log_die("Failed to initialize zfs retire");415416list_create(&agent_events, sizeof (agent_event_t),417offsetof(struct agent_event, ae_node));418419if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,420NULL) != 0) {421list_destroy(&agent_events);422zed_log_die("Failed to initialize agents");423}424pthread_setname_np(g_agents_tid, "agents");425}426427void428zfs_agent_fini(void)429{430fmd_hdl_t *hdl;431agent_event_t *event;432433agent_exiting = 1;434(void) pthread_cond_signal(&agent_cond);435436/* wait for zfs_enum_pools thread to complete */437(void) pthread_join(g_agents_tid, NULL);438439/* drain any pending events */440while ((event = list_remove_head(&agent_events)) != NULL) {441nvlist_free(event->ae_nvl);442free(event);443}444445list_destroy(&agent_events);446447if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {448_zfs_retire_fini(hdl);449fmd_hdl_unregister(hdl);450}451if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {452_zfs_diagnosis_fini(hdl);453fmd_hdl_unregister(hdl);454}455456zed_log_msg(LOG_INFO, "Add Agent: fini");457zfs_slm_fini();458459g_zfs_hdl = NULL;460}461462463