Path: blob/main/sys/contrib/openzfs/module/zfs/fm.c
48383 views
// SPDX-License-Identifier: CDDL-1.01/*2* CDDL HEADER START3*4* The contents of this file are subject to the terms of the5* Common Development and Distribution License (the "License").6* You may not use this file except in compliance with the License.7*8* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE9* or https://opensource.org/licenses/CDDL-1.0.10* See the License for the specific language governing permissions11* and limitations under the License.12*13* When distributing Covered Code, include this CDDL HEADER in each14* file and include the License file at usr/src/OPENSOLARIS.LICENSE.15* If applicable, add the following below this CDDL HEADER, with the16* fields enclosed by brackets "[]" replaced with your own identifying17* information: Portions Copyright [yyyy] [name of copyright owner]18*19* CDDL HEADER END20*/21/*22* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.23*/2425/*26* Fault Management Architecture (FMA) Resource and Protocol Support27*28* The routines contained herein provide services to support kernel subsystems29* in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).30*31* Name-Value Pair Lists32*33* The embodiment of an FMA protocol element (event, fmri or authority) is a34* name-value pair list (nvlist_t). FMA-specific nvlist constructor and35* destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used36* to create an nvpair list using custom allocators. Callers may choose to37* allocate either from the kernel memory allocator, or from a preallocated38* buffer, useful in constrained contexts like high-level interrupt routines.39*40* Protocol Event and FMRI Construction41*42* Convenience routines are provided to construct nvlist events according to43* the FMA Event Protocol and Naming Schema specification for ereports and44* FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.45*46* ENA Manipulation47*48* Routines to generate ENA formats 0, 1 and 2 are available as well as49* routines to increment formats 1 and 2. Individual fields within the50* ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),51* fm_ena_format_get() and fm_ena_gen_get().52*/5354#include <sys/types.h>55#include <sys/time.h>56#include <sys/list.h>57#include <sys/nvpair.h>58#include <sys/cmn_err.h>59#include <sys/sysmacros.h>60#include <sys/sunddi.h>61#include <sys/systeminfo.h>62#include <sys/fm/util.h>63#include <sys/fm/protocol.h>64#include <sys/kstat.h>65#include <sys/zfs_context.h>66#ifdef _KERNEL67#include <sys/atomic.h>68#include <sys/condvar.h>69#include <sys/zfs_ioctl.h>7071static uint_t zfs_zevent_len_max = 512;7273static uint_t zevent_len_cur = 0;74static int zevent_waiters = 0;75static int zevent_flags = 0;7677/* Num events rate limited since the last time zfs_zevent_next() was called */78static uint64_t ratelimit_dropped = 0;7980/*81* The EID (Event IDentifier) is used to uniquely tag a zevent when it is82* posted. The posted EIDs are monotonically increasing but not persistent.83* They will be reset to the initial value (1) each time the kernel module is84* loaded.85*/86static uint64_t zevent_eid = 0;8788static kmutex_t zevent_lock;89static list_t zevent_list;90static kcondvar_t zevent_cv;91#endif /* _KERNEL */929394/*95* Common fault management kstats to record event generation failures96*/9798struct erpt_kstat {99kstat_named_t erpt_dropped; /* num erpts dropped on post */100kstat_named_t erpt_set_failed; /* num erpt set failures */101kstat_named_t fmri_set_failed; /* num fmri set failures */102kstat_named_t payload_set_failed; /* num payload set failures */103kstat_named_t erpt_duplicates; /* num duplicate erpts */104};105106static struct erpt_kstat erpt_kstat_data = {107{ "erpt-dropped", KSTAT_DATA_UINT64 },108{ "erpt-set-failed", KSTAT_DATA_UINT64 },109{ "fmri-set-failed", KSTAT_DATA_UINT64 },110{ "payload-set-failed", KSTAT_DATA_UINT64 },111{ "erpt-duplicates", KSTAT_DATA_UINT64 }112};113114kstat_t *fm_ksp;115116#ifdef _KERNEL117118static zevent_t *119zfs_zevent_alloc(void)120{121zevent_t *ev;122123ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);124125list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),126offsetof(zfs_zevent_t, ze_node));127list_link_init(&ev->ev_node);128129return (ev);130}131132static void133zfs_zevent_free(zevent_t *ev)134{135/* Run provided cleanup callback */136ev->ev_cb(ev->ev_nvl, ev->ev_detector);137138list_destroy(&ev->ev_ze_list);139kmem_free(ev, sizeof (zevent_t));140}141142static void143zfs_zevent_drain(zevent_t *ev)144{145zfs_zevent_t *ze;146147ASSERT(MUTEX_HELD(&zevent_lock));148list_remove(&zevent_list, ev);149150/* Remove references to this event in all private file data */151while ((ze = list_remove_head(&ev->ev_ze_list)) != NULL) {152ze->ze_zevent = NULL;153ze->ze_dropped++;154}155156zfs_zevent_free(ev);157}158159void160zfs_zevent_drain_all(uint_t *count)161{162zevent_t *ev;163164mutex_enter(&zevent_lock);165while ((ev = list_head(&zevent_list)) != NULL)166zfs_zevent_drain(ev);167168*count = zevent_len_cur;169zevent_len_cur = 0;170mutex_exit(&zevent_lock);171}172173/*174* New zevents are inserted at the head. If the maximum queue175* length is exceeded a zevent will be drained from the tail.176* As part of this any user space processes which currently have177* a reference to this zevent_t in their private data will have178* this reference set to NULL.179*/180static void181zfs_zevent_insert(zevent_t *ev)182{183ASSERT(MUTEX_HELD(&zevent_lock));184list_insert_head(&zevent_list, ev);185186if (zevent_len_cur >= zfs_zevent_len_max)187zfs_zevent_drain(list_tail(&zevent_list));188else189zevent_len_cur++;190}191192/*193* Post a zevent. The cb will be called when nvl and detector are no longer194* needed, i.e.:195* - An error happened and a zevent can't be posted. In this case, cb is called196* before zfs_zevent_post() returns.197* - The event is being drained and freed.198*/199int200zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)201{202inode_timespec_t tv;203int64_t tv_array[2];204uint64_t eid;205size_t nvl_size = 0;206zevent_t *ev;207int error;208209ASSERT(cb != NULL);210211gethrestime(&tv);212tv_array[0] = tv.tv_sec;213tv_array[1] = tv.tv_nsec;214215error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);216if (error) {217atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);218goto out;219}220221eid = atomic_inc_64_nv(&zevent_eid);222error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);223if (error) {224atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);225goto out;226}227228error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);229if (error) {230atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);231goto out;232}233234if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {235atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);236error = EOVERFLOW;237goto out;238}239240ev = zfs_zevent_alloc();241if (ev == NULL) {242atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);243error = ENOMEM;244goto out;245}246247ev->ev_nvl = nvl;248ev->ev_detector = detector;249ev->ev_cb = cb;250ev->ev_eid = eid;251252mutex_enter(&zevent_lock);253zfs_zevent_insert(ev);254cv_broadcast(&zevent_cv);255mutex_exit(&zevent_lock);256257out:258if (error)259cb(nvl, detector);260261return (error);262}263264void265zfs_zevent_track_duplicate(void)266{267atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);268}269270static int271zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)272{273*ze = zfsdev_get_state(minor, ZST_ZEVENT);274if (*ze == NULL)275return (SET_ERROR(EBADF));276277return (0);278}279280zfs_file_t *281zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)282{283zfs_file_t *fp = zfs_file_get(fd);284if (fp == NULL)285return (NULL);286287int error = zfsdev_getminor(fp, minorp);288if (error == 0)289error = zfs_zevent_minor_to_state(*minorp, ze);290291if (error) {292zfs_zevent_fd_rele(fp);293fp = NULL;294}295296return (fp);297}298299void300zfs_zevent_fd_rele(zfs_file_t *fp)301{302zfs_file_put(fp);303}304305/*306* Get the next zevent in the stream and place a copy in 'event'. This307* may fail with ENOMEM if the encoded nvlist size exceeds the passed308* 'event_size'. In this case the stream pointer is not advanced and309* and 'event_size' is set to the minimum required buffer size.310*/311int312zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,313uint64_t *dropped)314{315zevent_t *ev;316size_t size;317int error = 0;318319mutex_enter(&zevent_lock);320if (ze->ze_zevent == NULL) {321/* New stream start at the beginning/tail */322ev = list_tail(&zevent_list);323if (ev == NULL) {324error = ENOENT;325goto out;326}327} else {328/*329* Existing stream continue with the next element and remove330* ourselves from the wait queue for the previous element331*/332ev = list_prev(&zevent_list, ze->ze_zevent);333if (ev == NULL) {334error = ENOENT;335goto out;336}337}338339VERIFY0(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE));340if (size > *event_size) {341*event_size = size;342error = ENOMEM;343goto out;344}345346if (ze->ze_zevent)347list_remove(&ze->ze_zevent->ev_ze_list, ze);348349ze->ze_zevent = ev;350list_insert_head(&ev->ev_ze_list, ze);351(void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);352*dropped = ze->ze_dropped;353354#ifdef _KERNEL355/* Include events dropped due to rate limiting */356*dropped += atomic_swap_64(&ratelimit_dropped, 0);357#endif358ze->ze_dropped = 0;359out:360mutex_exit(&zevent_lock);361362return (error);363}364365/*366* Wait in an interruptible state for any new events.367*/368int369zfs_zevent_wait(zfs_zevent_t *ze)370{371int error = EAGAIN;372373mutex_enter(&zevent_lock);374zevent_waiters++;375376while (error == EAGAIN) {377if (zevent_flags & ZEVENT_SHUTDOWN) {378error = SET_ERROR(ESHUTDOWN);379break;380}381382if (cv_wait_sig(&zevent_cv, &zevent_lock) == 0) {383error = SET_ERROR(EINTR);384break;385} else if (!list_is_empty(&zevent_list)) {386error = 0;387continue;388} else {389error = EAGAIN;390}391}392393zevent_waiters--;394mutex_exit(&zevent_lock);395396return (error);397}398399/*400* The caller may seek to a specific EID by passing that EID. If the EID401* is still available in the posted list of events the cursor is positioned402* there. Otherwise ENOENT is returned and the cursor is not moved.403*404* There are two reserved EIDs which may be passed and will never fail.405* ZEVENT_SEEK_START positions the cursor at the start of the list, and406* ZEVENT_SEEK_END positions the cursor at the end of the list.407*/408int409zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)410{411zevent_t *ev;412int error = 0;413414mutex_enter(&zevent_lock);415416if (eid == ZEVENT_SEEK_START) {417if (ze->ze_zevent)418list_remove(&ze->ze_zevent->ev_ze_list, ze);419420ze->ze_zevent = NULL;421goto out;422}423424if (eid == ZEVENT_SEEK_END) {425if (ze->ze_zevent)426list_remove(&ze->ze_zevent->ev_ze_list, ze);427428ev = list_head(&zevent_list);429if (ev) {430ze->ze_zevent = ev;431list_insert_head(&ev->ev_ze_list, ze);432} else {433ze->ze_zevent = NULL;434}435436goto out;437}438439for (ev = list_tail(&zevent_list); ev != NULL;440ev = list_prev(&zevent_list, ev)) {441if (ev->ev_eid == eid) {442if (ze->ze_zevent)443list_remove(&ze->ze_zevent->ev_ze_list, ze);444445ze->ze_zevent = ev;446list_insert_head(&ev->ev_ze_list, ze);447break;448}449}450451if (ev == NULL)452error = ENOENT;453454out:455mutex_exit(&zevent_lock);456457return (error);458}459460void461zfs_zevent_init(zfs_zevent_t **zep)462{463zfs_zevent_t *ze;464465ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);466list_link_init(&ze->ze_node);467}468469void470zfs_zevent_destroy(zfs_zevent_t *ze)471{472mutex_enter(&zevent_lock);473if (ze->ze_zevent)474list_remove(&ze->ze_zevent->ev_ze_list, ze);475mutex_exit(&zevent_lock);476477kmem_free(ze, sizeof (zfs_zevent_t));478}479#endif /* _KERNEL */480481/*482* Wrappers for FM nvlist allocators483*/484static void *485i_fm_alloc(nv_alloc_t *nva, size_t size)486{487(void) nva;488return (kmem_alloc(size, KM_SLEEP));489}490491static void492i_fm_free(nv_alloc_t *nva, void *buf, size_t size)493{494(void) nva;495kmem_free(buf, size);496}497498static const nv_alloc_ops_t fm_mem_alloc_ops = {499.nv_ao_init = NULL,500.nv_ao_fini = NULL,501.nv_ao_alloc = i_fm_alloc,502.nv_ao_free = i_fm_free,503.nv_ao_reset = NULL504};505506/*507* Create and initialize a new nv_alloc_t for a fixed buffer, buf. A pointer508* to the newly allocated nv_alloc_t structure is returned upon success or NULL509* is returned to indicate that the nv_alloc structure could not be created.510*/511nv_alloc_t *512fm_nva_xcreate(char *buf, size_t bufsz)513{514nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);515516if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {517kmem_free(nvhdl, sizeof (nv_alloc_t));518return (NULL);519}520521return (nvhdl);522}523524/*525* Destroy a previously allocated nv_alloc structure. The fixed buffer526* associated with nva must be freed by the caller.527*/528void529fm_nva_xdestroy(nv_alloc_t *nva)530{531nv_alloc_fini(nva);532kmem_free(nva, sizeof (nv_alloc_t));533}534535/*536* Create a new nv list. A pointer to a new nv list structure is returned537* upon success or NULL is returned to indicate that the structure could538* not be created. The newly created nv list is created and managed by the539* operations installed in nva. If nva is NULL, the default FMA nva540* operations are installed and used.541*542* When called from the kernel and nva == NULL, this function must be called543* from passive kernel context with no locks held that can prevent a544* sleeping memory allocation from occurring. Otherwise, this function may545* be called from other kernel contexts as long a valid nva created via546* fm_nva_create() is supplied.547*/548nvlist_t *549fm_nvlist_create(nv_alloc_t *nva)550{551int hdl_alloced = 0;552nvlist_t *nvl;553nv_alloc_t *nvhdl;554555if (nva == NULL) {556nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);557558if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {559kmem_free(nvhdl, sizeof (nv_alloc_t));560return (NULL);561}562hdl_alloced = 1;563} else {564nvhdl = nva;565}566567if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {568if (hdl_alloced) {569nv_alloc_fini(nvhdl);570kmem_free(nvhdl, sizeof (nv_alloc_t));571}572return (NULL);573}574575return (nvl);576}577578/*579* Destroy a previously allocated nvlist structure. flag indicates whether580* or not the associated nva structure should be freed (FM_NVA_FREE) or581* retained (FM_NVA_RETAIN). Retaining the nv alloc structure allows582* it to be re-used for future nvlist creation operations.583*/584void585fm_nvlist_destroy(nvlist_t *nvl, int flag)586{587nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);588589nvlist_free(nvl);590591if (nva != NULL) {592if (flag == FM_NVA_FREE)593fm_nva_xdestroy(nva);594}595}596597int598i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)599{600int nelem, ret = 0;601data_type_t type;602603while (ret == 0 && name != NULL) {604type = va_arg(ap, data_type_t);605switch (type) {606case DATA_TYPE_BYTE:607ret = nvlist_add_byte(payload, name,608va_arg(ap, uint_t));609break;610case DATA_TYPE_BYTE_ARRAY:611nelem = va_arg(ap, int);612ret = nvlist_add_byte_array(payload, name,613va_arg(ap, uchar_t *), nelem);614break;615case DATA_TYPE_BOOLEAN_VALUE:616ret = nvlist_add_boolean_value(payload, name,617va_arg(ap, boolean_t));618break;619case DATA_TYPE_BOOLEAN_ARRAY:620nelem = va_arg(ap, int);621ret = nvlist_add_boolean_array(payload, name,622va_arg(ap, boolean_t *), nelem);623break;624case DATA_TYPE_INT8:625ret = nvlist_add_int8(payload, name,626va_arg(ap, int));627break;628case DATA_TYPE_INT8_ARRAY:629nelem = va_arg(ap, int);630ret = nvlist_add_int8_array(payload, name,631va_arg(ap, int8_t *), nelem);632break;633case DATA_TYPE_UINT8:634ret = nvlist_add_uint8(payload, name,635va_arg(ap, uint_t));636break;637case DATA_TYPE_UINT8_ARRAY:638nelem = va_arg(ap, int);639ret = nvlist_add_uint8_array(payload, name,640va_arg(ap, uint8_t *), nelem);641break;642case DATA_TYPE_INT16:643ret = nvlist_add_int16(payload, name,644va_arg(ap, int));645break;646case DATA_TYPE_INT16_ARRAY:647nelem = va_arg(ap, int);648ret = nvlist_add_int16_array(payload, name,649va_arg(ap, int16_t *), nelem);650break;651case DATA_TYPE_UINT16:652ret = nvlist_add_uint16(payload, name,653va_arg(ap, uint_t));654break;655case DATA_TYPE_UINT16_ARRAY:656nelem = va_arg(ap, int);657ret = nvlist_add_uint16_array(payload, name,658va_arg(ap, uint16_t *), nelem);659break;660case DATA_TYPE_INT32:661ret = nvlist_add_int32(payload, name,662va_arg(ap, int32_t));663break;664case DATA_TYPE_INT32_ARRAY:665nelem = va_arg(ap, int);666ret = nvlist_add_int32_array(payload, name,667va_arg(ap, int32_t *), nelem);668break;669case DATA_TYPE_UINT32:670ret = nvlist_add_uint32(payload, name,671va_arg(ap, uint32_t));672break;673case DATA_TYPE_UINT32_ARRAY:674nelem = va_arg(ap, int);675ret = nvlist_add_uint32_array(payload, name,676va_arg(ap, uint32_t *), nelem);677break;678case DATA_TYPE_INT64:679ret = nvlist_add_int64(payload, name,680va_arg(ap, int64_t));681break;682case DATA_TYPE_INT64_ARRAY:683nelem = va_arg(ap, int);684ret = nvlist_add_int64_array(payload, name,685va_arg(ap, int64_t *), nelem);686break;687case DATA_TYPE_UINT64:688ret = nvlist_add_uint64(payload, name,689va_arg(ap, uint64_t));690break;691case DATA_TYPE_UINT64_ARRAY:692nelem = va_arg(ap, int);693ret = nvlist_add_uint64_array(payload, name,694va_arg(ap, uint64_t *), nelem);695break;696case DATA_TYPE_STRING:697ret = nvlist_add_string(payload, name,698va_arg(ap, char *));699break;700case DATA_TYPE_STRING_ARRAY:701nelem = va_arg(ap, int);702ret = nvlist_add_string_array(payload, name,703va_arg(ap, const char **), nelem);704break;705case DATA_TYPE_NVLIST:706ret = nvlist_add_nvlist(payload, name,707va_arg(ap, nvlist_t *));708break;709case DATA_TYPE_NVLIST_ARRAY:710nelem = va_arg(ap, int);711ret = nvlist_add_nvlist_array(payload, name,712va_arg(ap, const nvlist_t **), nelem);713break;714default:715ret = EINVAL;716}717718name = va_arg(ap, char *);719}720return (ret);721}722723void724fm_payload_set(nvlist_t *payload, ...)725{726int ret;727const char *name;728va_list ap;729730va_start(ap, payload);731name = va_arg(ap, char *);732ret = i_fm_payload_set(payload, name, ap);733va_end(ap);734735if (ret)736atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);737}738739/*740* Set-up and validate the members of an ereport event according to:741*742* Member name Type Value743* ====================================================744* class string ereport745* version uint8_t 0746* ena uint64_t <ena>747* detector nvlist_t <detector>748* ereport-payload nvlist_t <var args>749*750* We don't actually add a 'version' member to the payload. Really,751* the version quoted to us by our caller is that of the category 1752* "ereport" event class (and we require FM_EREPORT_VERS0) but753* the payload version of the actual leaf class event under construction754* may be something else. Callers should supply a version in the varargs,755* or (better) we could take two version arguments - one for the756* ereport category 1 classification (expect FM_EREPORT_VERS0) and one757* for the leaf class.758*/759void760fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,761uint64_t ena, const nvlist_t *detector, ...)762{763char ereport_class[FM_MAX_CLASS];764const char *name;765va_list ap;766int ret;767768if (version != FM_EREPORT_VERS0) {769atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);770return;771}772773(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",774FM_EREPORT_CLASS, erpt_class);775if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {776atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);777return;778}779780if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {781atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);782}783784if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,785(nvlist_t *)detector) != 0) {786atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);787}788789va_start(ap, detector);790name = va_arg(ap, const char *);791ret = i_fm_payload_set(ereport, name, ap);792va_end(ap);793794if (ret)795atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);796}797798/*799* Set-up and validate the members of an hc fmri according to;800*801* Member name Type Value802* ===================================================803* version uint8_t 0804* auth nvlist_t <auth>805* hc-name string <name>806* hc-id string <id>807*808* Note that auth and hc-id are optional members.809*/810811#define HC_MAXPAIRS 20812#define HC_MAXNAMELEN 50813814static int815fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)816{817if (version != FM_HC_SCHEME_VERSION) {818atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);819return (0);820}821822if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||823nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {824atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);825return (0);826}827828if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,829(nvlist_t *)auth) != 0) {830atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);831return (0);832}833834return (1);835}836837void838fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,839nvlist_t *snvl, int npairs, ...)840{841nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);842nvlist_t *pairs[HC_MAXPAIRS];843va_list ap;844int i;845846if (!fm_fmri_hc_set_common(fmri, version, auth))847return;848849npairs = MIN(npairs, HC_MAXPAIRS);850851va_start(ap, npairs);852for (i = 0; i < npairs; i++) {853const char *name = va_arg(ap, const char *);854uint32_t id = va_arg(ap, uint32_t);855char idstr[11];856857(void) snprintf(idstr, sizeof (idstr), "%u", id);858859pairs[i] = fm_nvlist_create(nva);860if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||861nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {862atomic_inc_64(863&erpt_kstat_data.fmri_set_failed.value.ui64);864}865}866va_end(ap);867868if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST,869(const nvlist_t **)pairs, npairs) != 0) {870atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);871}872873for (i = 0; i < npairs; i++)874fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);875876if (snvl != NULL) {877if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {878atomic_inc_64(879&erpt_kstat_data.fmri_set_failed.value.ui64);880}881}882}883884void885fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,886nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)887{888nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);889nvlist_t *pairs[HC_MAXPAIRS];890nvlist_t **hcl;891uint_t n;892int i, j;893va_list ap;894const char *hcname, *hcid;895896if (!fm_fmri_hc_set_common(fmri, version, auth))897return;898899/*900* copy the bboard nvpairs to the pairs array901*/902if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)903!= 0) {904atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);905return;906}907908for (i = 0; i < n; i++) {909if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,910&hcname) != 0) {911atomic_inc_64(912&erpt_kstat_data.fmri_set_failed.value.ui64);913return;914}915if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {916atomic_inc_64(917&erpt_kstat_data.fmri_set_failed.value.ui64);918return;919}920921pairs[i] = fm_nvlist_create(nva);922if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||923nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {924for (j = 0; j <= i; j++) {925if (pairs[j] != NULL)926fm_nvlist_destroy(pairs[j],927FM_NVA_RETAIN);928}929atomic_inc_64(930&erpt_kstat_data.fmri_set_failed.value.ui64);931return;932}933}934935/*936* create the pairs from passed in pairs937*/938npairs = MIN(npairs, HC_MAXPAIRS);939940va_start(ap, npairs);941for (i = n; i < npairs + n; i++) {942const char *name = va_arg(ap, const char *);943uint32_t id = va_arg(ap, uint32_t);944char idstr[11];945(void) snprintf(idstr, sizeof (idstr), "%u", id);946pairs[i] = fm_nvlist_create(nva);947if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||948nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {949for (j = 0; j <= i; j++) {950if (pairs[j] != NULL)951fm_nvlist_destroy(pairs[j],952FM_NVA_RETAIN);953}954atomic_inc_64(955&erpt_kstat_data.fmri_set_failed.value.ui64);956va_end(ap);957return;958}959}960va_end(ap);961962/*963* Create the fmri hc list964*/965if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST,966(const nvlist_t **)pairs, npairs + n) != 0) {967atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);968return;969}970971for (i = 0; i < npairs + n; i++) {972fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);973}974975if (snvl != NULL) {976if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {977atomic_inc_64(978&erpt_kstat_data.fmri_set_failed.value.ui64);979return;980}981}982}983984/*985* Set-up and validate the members of an dev fmri according to:986*987* Member name Type Value988* ====================================================989* version uint8_t 0990* auth nvlist_t <auth>991* devpath string <devpath>992* [devid] string <devid>993* [target-port-l0id] string <target-port-lun0-id>994*995* Note that auth and devid are optional members.996*/997void998fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,999const char *devpath, const char *devid, const char *tpl0)1000{1001int err = 0;10021003if (version != DEV_SCHEME_VERSION0) {1004atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1005return;1006}10071008err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);1009err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);10101011if (auth != NULL) {1012err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,1013(nvlist_t *)auth);1014}10151016err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);10171018if (devid != NULL)1019err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);10201021if (tpl0 != NULL)1022err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);10231024if (err)1025atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);10261027}10281029/*1030* Set-up and validate the members of an cpu fmri according to:1031*1032* Member name Type Value1033* ====================================================1034* version uint8_t 01035* auth nvlist_t <auth>1036* cpuid uint32_t <cpu_id>1037* cpumask uint8_t <cpu_mask>1038* serial uint64_t <serial_id>1039*1040* Note that auth, cpumask, serial are optional members.1041*1042*/1043void1044fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,1045uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)1046{1047uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;10481049if (version < CPU_SCHEME_VERSION1) {1050atomic_inc_64(failedp);1051return;1052}10531054if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {1055atomic_inc_64(failedp);1056return;1057}10581059if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,1060FM_FMRI_SCHEME_CPU) != 0) {1061atomic_inc_64(failedp);1062return;1063}10641065if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,1066(nvlist_t *)auth) != 0)1067atomic_inc_64(failedp);10681069if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)1070atomic_inc_64(failedp);10711072if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,1073*cpu_maskp) != 0)1074atomic_inc_64(failedp);10751076if (serial_idp == NULL || nvlist_add_string(fmri_cpu,1077FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)1078atomic_inc_64(failedp);1079}10801081/*1082* Set-up and validate the members of a mem according to:1083*1084* Member name Type Value1085* ====================================================1086* version uint8_t 01087* auth nvlist_t <auth> [optional]1088* unum string <unum>1089* serial string <serial> [optional*]1090* offset uint64_t <offset> [optional]1091*1092* * serial is required if offset is present1093*/1094void1095fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,1096const char *unum, const char *serial, uint64_t offset)1097{1098if (version != MEM_SCHEME_VERSION0) {1099atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1100return;1101}11021103if (!serial && (offset != (uint64_t)-1)) {1104atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1105return;1106}11071108if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {1109atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1110return;1111}11121113if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {1114atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1115return;1116}11171118if (auth != NULL) {1119if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,1120(nvlist_t *)auth) != 0) {1121atomic_inc_64(1122&erpt_kstat_data.fmri_set_failed.value.ui64);1123}1124}11251126if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {1127atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1128}11291130if (serial != NULL) {1131if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,1132(const char **)&serial, 1) != 0) {1133atomic_inc_64(1134&erpt_kstat_data.fmri_set_failed.value.ui64);1135}1136if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,1137FM_FMRI_MEM_OFFSET, offset) != 0) {1138atomic_inc_64(1139&erpt_kstat_data.fmri_set_failed.value.ui64);1140}1141}1142}11431144void1145fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,1146uint64_t vdev_guid)1147{1148if (version != ZFS_SCHEME_VERSION0) {1149atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1150return;1151}11521153if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {1154atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1155return;1156}11571158if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {1159atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1160return;1161}11621163if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {1164atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);1165}11661167if (vdev_guid != 0) {1168if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {1169atomic_inc_64(1170&erpt_kstat_data.fmri_set_failed.value.ui64);1171}1172}1173}11741175uint64_t1176fm_ena_increment(uint64_t ena)1177{1178uint64_t new_ena;11791180switch (ENA_FORMAT(ena)) {1181case FM_ENA_FMT1:1182new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);1183break;1184case FM_ENA_FMT2:1185new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);1186break;1187default:1188new_ena = 0;1189}11901191return (new_ena);1192}11931194uint64_t1195fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)1196{1197uint64_t ena = 0;11981199switch (format) {1200case FM_ENA_FMT1:1201if (timestamp) {1202ena = (uint64_t)((format & ENA_FORMAT_MASK) |1203((cpuid << ENA_FMT1_CPUID_SHFT) &1204ENA_FMT1_CPUID_MASK) |1205((timestamp << ENA_FMT1_TIME_SHFT) &1206ENA_FMT1_TIME_MASK));1207} else {1208ena = (uint64_t)((format & ENA_FORMAT_MASK) |1209((cpuid << ENA_FMT1_CPUID_SHFT) &1210ENA_FMT1_CPUID_MASK) |1211((gethrtime() << ENA_FMT1_TIME_SHFT) &1212ENA_FMT1_TIME_MASK));1213}1214break;1215case FM_ENA_FMT2:1216ena = (uint64_t)((format & ENA_FORMAT_MASK) |1217((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));1218break;1219default:1220break;1221}12221223return (ena);1224}12251226uint64_t1227fm_ena_generate(uint64_t timestamp, uchar_t format)1228{1229uint64_t ena;12301231kpreempt_disable();1232ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);1233kpreempt_enable();12341235return (ena);1236}12371238uint64_t1239fm_ena_generation_get(uint64_t ena)1240{1241uint64_t gen;12421243switch (ENA_FORMAT(ena)) {1244case FM_ENA_FMT1:1245gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;1246break;1247case FM_ENA_FMT2:1248gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;1249break;1250default:1251gen = 0;1252break;1253}12541255return (gen);1256}12571258uchar_t1259fm_ena_format_get(uint64_t ena)1260{12611262return (ENA_FORMAT(ena));1263}12641265uint64_t1266fm_ena_id_get(uint64_t ena)1267{1268uint64_t id;12691270switch (ENA_FORMAT(ena)) {1271case FM_ENA_FMT1:1272id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;1273break;1274case FM_ENA_FMT2:1275id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;1276break;1277default:1278id = 0;1279}12801281return (id);1282}12831284uint64_t1285fm_ena_time_get(uint64_t ena)1286{1287uint64_t time;12881289switch (ENA_FORMAT(ena)) {1290case FM_ENA_FMT1:1291time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;1292break;1293case FM_ENA_FMT2:1294time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;1295break;1296default:1297time = 0;1298}12991300return (time);1301}13021303#ifdef _KERNEL1304/*1305* Helper function to increment ereport dropped count. Used by the event1306* rate limiting code to give feedback to the user about how many events were1307* rate limited by including them in the 'dropped' count.1308*/1309void1310fm_erpt_dropped_increment(void)1311{1312atomic_inc_64(&ratelimit_dropped);1313}13141315void1316fm_init(void)1317{1318zevent_len_cur = 0;1319zevent_flags = 0;13201321/* Initialize zevent allocation and generation kstats */1322fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,1323sizeof (struct erpt_kstat) / sizeof (kstat_named_t),1324KSTAT_FLAG_VIRTUAL);13251326if (fm_ksp != NULL) {1327fm_ksp->ks_data = &erpt_kstat_data;1328kstat_install(fm_ksp);1329} else {1330cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");1331}13321333mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);1334list_create(&zevent_list, sizeof (zevent_t),1335offsetof(zevent_t, ev_node));1336cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);13371338zfs_ereport_init();1339}13401341void1342fm_fini(void)1343{1344uint_t count;13451346zfs_ereport_fini();13471348zfs_zevent_drain_all(&count);13491350mutex_enter(&zevent_lock);1351cv_broadcast(&zevent_cv);13521353zevent_flags |= ZEVENT_SHUTDOWN;1354while (zevent_waiters > 0) {1355mutex_exit(&zevent_lock);1356kpreempt(KPREEMPT_SYNC);1357mutex_enter(&zevent_lock);1358}1359mutex_exit(&zevent_lock);13601361cv_destroy(&zevent_cv);1362list_destroy(&zevent_list);1363mutex_destroy(&zevent_lock);13641365if (fm_ksp != NULL) {1366kstat_delete(fm_ksp);1367fm_ksp = NULL;1368}1369}13701371ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, UINT, ZMOD_RW,1372"Max event queue length");13731374#endif /* _KERNEL */137513761377