Path: blob/main/sys/contrib/openzfs/cmd/zed/zed.d/deadman-sync-slot_off.sh
48529 views
#!/bin/sh1# shellcheck disable=SC3014,SC2154,SC2086,SC20342#3# Turn off disk's enclosure slot if an I/O is hung triggering the deadman.4#5# It's possible for outstanding I/O to a misbehaving SCSI disk to neither6# promptly complete or return an error. This can occur due to retry and7# recovery actions taken by the SCSI layer, driver, or disk. When it occurs8# the pool will be unresponsive even though there may be sufficient redundancy9# configured to proceeded without this single disk.10#11# When a hung I/O is detected by the kmods it will be posted as a deadman12# event. By default an I/O is considered to be hung after 5 minutes. This13# value can be changed with the zfs_deadman_ziotime_ms module parameter.14# If ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN is set the disk's enclosure15# slot will be powered off causing the outstanding I/O to fail. The ZED16# will then handle this like a normal disk failure and FAULT the vdev.17#18# We assume the user will be responsible for turning the slot back on19# after replacing the disk.20#21# Note that this script requires that your enclosure be supported by the22# Linux SCSI Enclosure services (SES) driver. The script will do nothing23# if you have no enclosure, or if your enclosure isn't supported.24#25# Exit codes:26# 0: slot successfully powered off27# 1: enclosure not available28# 2: ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN disabled29# 3: System not configured to wait on deadman30# 4: The enclosure sysfs path passed from ZFS does not exist31# 5: Enclosure slot didn't actually turn off after we told it to3233[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"34. "${ZED_ZEDLET_DIR}/zed-functions.sh"3536if [ ! -d /sys/class/enclosure ] ; then37# No JBOD enclosure or NVMe slots38exit 139fi4041if [ "${ZED_POWER_OFF_ENCLOSURE_SLOT_ON_DEADMAN}" != "1" ] ; then42exit 243fi4445if [ "$ZEVENT_POOL_FAILMODE" != "wait" ] ; then46exit 347fi4849if [ ! -f "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status" ] ; then50exit 451fi5253# Turn off the slot and wait for sysfs to report that the slot is off.54# It can take ~400ms on some enclosures and multiple retries may be needed.55for i in $(seq 1 20) ; do56echo "off" | tee "$ZEVENT_VDEV_ENC_SYSFS_PATH/power_status"5758for j in $(seq 1 5) ; do59if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" == "off" ] ; then60break 261fi62sleep 0.163done64done6566if [ "$(cat $ZEVENT_VDEV_ENC_SYSFS_PATH/power_status)" != "off" ] ; then67exit 568fi6970zed_log_msg "powered down slot $ZEVENT_VDEV_ENC_SYSFS_PATH for $ZEVENT_VDEV_PATH"717273