Path: blob/main/sys/contrib/openzfs/cmd/zpool_influxdb/zpool_influxdb.c
48383 views
// SPDX-License-Identifier: CDDL-1.01/*2* Gather top-level ZFS pool and resilver/scan statistics and print using3* influxdb line protocol4* usage: [options] [pool_name]5* where options are:6* --execd, -e run in telegraf execd input plugin mode, [CR] on7* stdin causes a sample to be printed and wait for8* the next [CR]9* --no-histograms, -n don't print histogram data (reduces cardinality10* if you don't care about histograms)11* --sum-histogram-buckets, -s sum histogram bucket values12*13* To integrate into telegraf use one of:14* 1. the `inputs.execd` plugin with the `--execd` option15* 2. the `inputs.exec` plugin to simply run with no options16*17* NOTE: libzfs is an unstable interface. YMMV.18*19* The design goals of this software include:20* + be as lightweight as possible21* + reduce the number of external dependencies as far as possible, hence22* there is no dependency on a client library for managing the metric23* collection -- info is printed, KISS24* + broken pools or kernel bugs can cause this process to hang in an25* unkillable state. For this reason, it is best to keep the damage limited26* to a small process like zpool_influxdb rather than a larger collector.27*28* Copyright 2018-2020 Richard Elling29*30* This software is dual-licensed MIT and CDDL.31*32* The MIT License (MIT)33*34* Permission is hereby granted, free of charge, to any person obtaining a copy35* of this software and associated documentation files (the "Software"), to deal36* in the Software without restriction, including without limitation the rights37* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell38* copies of the Software, and to permit persons to whom the Software is39* furnished to do so, subject to the following conditions:40*41* The above copyright notice and this permission notice shall be included in42* all copies or substantial portions of the Software.43*44* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR45* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,46* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE47* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER48* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,49* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE50* SOFTWARE.51*52* CDDL HEADER START53*54* The contents of this file are subject to the terms of the55* Common Development and Distribution License (the "License").56* You may not use this file except in compliance with the License.57*58* The contents of this file are subject to the terms of the59* Common Development and Distribution License Version 1.0 (CDDL-1.0).60* You can obtain a copy of the license from the top-level file61* "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.62* You may not use this file except in compliance with the license.63*64* See the License for the specific language governing permissions65* and limitations under the License.66*67* CDDL HEADER END68*/69#include <string.h>70#include <getopt.h>71#include <stdio.h>72#include <stdint.h>73#include <inttypes.h>74#include <libzfs.h>7576#define POOL_MEASUREMENT "zpool_stats"77#define SCAN_MEASUREMENT "zpool_scan_stats"78#define VDEV_MEASUREMENT "zpool_vdev_stats"79#define POOL_LATENCY_MEASUREMENT "zpool_latency"80#define POOL_QUEUE_MEASUREMENT "zpool_vdev_queue"81#define MIN_LAT_INDEX 10 /* minimum latency index 10 = 1024ns */82#define POOL_IO_SIZE_MEASUREMENT "zpool_io_size"83#define MIN_SIZE_INDEX 9 /* minimum size index 9 = 512 bytes */8485/* global options */86int execd_mode = 0;87int no_histograms = 0;88int sum_histogram_buckets = 0;89char metric_data_type = 'u';90uint64_t metric_value_mask = UINT64_MAX;91uint64_t timestamp = 0;92int complained_about_sync = 0;93const char *tags = "";9495typedef int (*stat_printer_f)(nvlist_t *, const char *, const char *);9697/*98* influxdb line protocol rules for escaping are important because the99* zpool name can include characters that need to be escaped100*101* caller is responsible for freeing result102*/103static char *104escape_string(const char *s)105{106const char *c;107char *d;108char *t = (char *)malloc(ZFS_MAX_DATASET_NAME_LEN * 2);109if (t == NULL) {110fprintf(stderr, "error: cannot allocate memory\n");111exit(1);112}113114for (c = s, d = t; *c != '\0'; c++, d++) {115switch (*c) {116case ' ':117case ',':118case '=':119case '\\':120*d++ = '\\';121zfs_fallthrough;122default:123*d = *c;124}125}126*d = '\0';127return (t);128}129130/*131* print key=value where value is a uint64_t132*/133static void134print_kv(const char *key, uint64_t value)135{136printf("%s=%llu%c", key,137(u_longlong_t)value & metric_value_mask, metric_data_type);138}139140/*141* print_scan_status() prints the details as often seen in the "zpool status"142* output. However, unlike the zpool command, which is intended for humans,143* this output is suitable for long-term tracking in influxdb.144* TODO: update to include issued scan data145*/146static int147print_scan_status(nvlist_t *nvroot, const char *pool_name)148{149uint_t c;150int64_t elapsed;151uint64_t examined, pass_exam, paused_time, paused_ts, rate;152uint64_t remaining_time;153pool_scan_stat_t *ps = NULL;154double pct_done;155const char *const state[DSS_NUM_STATES] = {156"none", "scanning", "finished", "canceled"};157const char *func;158159(void) nvlist_lookup_uint64_array(nvroot,160ZPOOL_CONFIG_SCAN_STATS,161(uint64_t **)&ps, &c);162163/*164* ignore if there are no stats165*/166if (ps == NULL)167return (0);168169/*170* return error if state is bogus171*/172if (ps->pss_state >= DSS_NUM_STATES ||173ps->pss_func >= POOL_SCAN_FUNCS) {174if (complained_about_sync % 1000 == 0) {175fprintf(stderr, "error: cannot decode scan stats: "176"ZFS is out of sync with compiled zpool_influxdb");177complained_about_sync++;178}179return (1);180}181182switch (ps->pss_func) {183case POOL_SCAN_NONE:184func = "none_requested";185break;186case POOL_SCAN_SCRUB:187func = "scrub";188break;189case POOL_SCAN_RESILVER:190func = "resilver";191break;192#ifdef POOL_SCAN_REBUILD193case POOL_SCAN_REBUILD:194func = "rebuild";195break;196#endif197default:198func = "scan";199}200201/* overall progress */202examined = ps->pss_examined ? ps->pss_examined : 1;203pct_done = 0.0;204if (ps->pss_to_examine > 0)205pct_done = 100.0 * examined / ps->pss_to_examine;206207#ifdef EZFS_SCRUB_PAUSED208paused_ts = ps->pss_pass_scrub_pause;209paused_time = ps->pss_pass_scrub_spent_paused;210#else211paused_ts = 0;212paused_time = 0;213#endif214215/* calculations for this pass */216if (ps->pss_state == DSS_SCANNING) {217elapsed = (int64_t)time(NULL) - (int64_t)ps->pss_pass_start -218(int64_t)paused_time;219elapsed = (elapsed > 0) ? elapsed : 1;220pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;221rate = pass_exam / elapsed;222rate = (rate > 0) ? rate : 1;223remaining_time = ps->pss_to_examine - examined / rate;224} else {225elapsed =226(int64_t)ps->pss_end_time - (int64_t)ps->pss_pass_start -227(int64_t)paused_time;228elapsed = (elapsed > 0) ? elapsed : 1;229pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;230rate = pass_exam / elapsed;231remaining_time = 0;232}233rate = rate ? rate : 1;234235/* influxdb line protocol format: "tags metrics timestamp" */236printf("%s%s,function=%s,name=%s,state=%s ",237SCAN_MEASUREMENT, tags, func, pool_name, state[ps->pss_state]);238print_kv("end_ts", ps->pss_end_time);239print_kv(",errors", ps->pss_errors);240print_kv(",examined", examined);241print_kv(",skipped", ps->pss_skipped);242print_kv(",issued", ps->pss_issued);243print_kv(",pass_examined", pass_exam);244print_kv(",pass_issued", ps->pss_pass_issued);245print_kv(",paused_ts", paused_ts);246print_kv(",paused_t", paused_time);247printf(",pct_done=%.2f", pct_done);248print_kv(",processed", ps->pss_processed);249print_kv(",rate", rate);250print_kv(",remaining_t", remaining_time);251print_kv(",start_ts", ps->pss_start_time);252print_kv(",to_examine", ps->pss_to_examine);253printf(" %llu\n", (u_longlong_t)timestamp);254return (0);255}256257/*258* get a vdev name that corresponds to the top-level vdev names259* printed by `zpool status`260*/261static char *262get_vdev_name(nvlist_t *nvroot, const char *parent_name)263{264static char vdev_name[256];265uint64_t vdev_id = 0;266267const char *vdev_type = "unknown";268(void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type);269270if (nvlist_lookup_uint64(271nvroot, ZPOOL_CONFIG_ID, &vdev_id) != 0)272vdev_id = UINT64_MAX;273274if (parent_name == NULL) {275(void) snprintf(vdev_name, sizeof (vdev_name), "%s",276vdev_type);277} else {278(void) snprintf(vdev_name, sizeof (vdev_name),279"%.220s/%s-%llu",280parent_name, vdev_type, (u_longlong_t)vdev_id);281}282return (vdev_name);283}284285/*286* get a string suitable for an influxdb tag that describes this vdev287*288* By default only the vdev hierarchical name is shown, separated by '/'289* If the vdev has an associated path, which is typical of leaf vdevs,290* then the path is added.291* It would be nice to have the devid instead of the path, but under292* Linux we cannot be sure a devid will exist and we'd rather have293* something than nothing, so we'll use path instead.294*/295static char *296get_vdev_desc(nvlist_t *nvroot, const char *parent_name)297{298static char vdev_desc[2 * MAXPATHLEN];299char vdev_value[MAXPATHLEN];300char *s, *t;301302const char *vdev_type = "unknown";303uint64_t vdev_id = UINT64_MAX;304const char *vdev_path = NULL;305(void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_TYPE, &vdev_type);306(void) nvlist_lookup_uint64(nvroot, ZPOOL_CONFIG_ID, &vdev_id);307(void) nvlist_lookup_string(nvroot, ZPOOL_CONFIG_PATH, &vdev_path);308309if (parent_name == NULL) {310s = escape_string(vdev_type);311(void) snprintf(vdev_value, sizeof (vdev_value), "vdev=%s", s);312free(s);313} else {314s = escape_string((char *)parent_name);315t = escape_string(vdev_type);316(void) snprintf(vdev_value, sizeof (vdev_value),317"vdev=%s/%s-%llu", s, t, (u_longlong_t)vdev_id);318free(s);319free(t);320}321if (vdev_path == NULL) {322(void) snprintf(vdev_desc, sizeof (vdev_desc), "%s",323vdev_value);324} else {325s = escape_string(vdev_path);326(void) snprintf(vdev_desc, sizeof (vdev_desc), "path=%s,%s",327s, vdev_value);328free(s);329}330return (vdev_desc);331}332333/*334* vdev summary stats are a combination of the data shown by335* `zpool status` and `zpool list -v`336*/337static int338print_summary_stats(nvlist_t *nvroot, const char *pool_name,339const char *parent_name)340{341uint_t c;342vdev_stat_t *vs;343char *vdev_desc = NULL;344vdev_desc = get_vdev_desc(nvroot, parent_name);345if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,346(uint64_t **)&vs, &c) != 0) {347return (1);348}349printf("%s%s,name=%s,state=%s,%s ", POOL_MEASUREMENT, tags,350pool_name, zpool_state_to_name((vdev_state_t)vs->vs_state,351(vdev_aux_t)vs->vs_aux), vdev_desc);352print_kv("alloc", vs->vs_alloc);353print_kv(",free", vs->vs_space - vs->vs_alloc);354print_kv(",size", vs->vs_space);355print_kv(",read_bytes", vs->vs_bytes[ZIO_TYPE_READ]);356print_kv(",read_errors", vs->vs_read_errors);357print_kv(",read_ops", vs->vs_ops[ZIO_TYPE_READ]);358print_kv(",write_bytes", vs->vs_bytes[ZIO_TYPE_WRITE]);359print_kv(",write_errors", vs->vs_write_errors);360print_kv(",write_ops", vs->vs_ops[ZIO_TYPE_WRITE]);361print_kv(",checksum_errors", vs->vs_checksum_errors);362print_kv(",fragmentation", vs->vs_fragmentation);363printf(" %llu\n", (u_longlong_t)timestamp);364return (0);365}366367/*368* vdev latency stats are histograms stored as nvlist arrays of uint64.369* Latency stats include the ZIO scheduler classes plus lower-level370* vdev latencies.371*372* In many cases, the top-level "root" view obscures the underlying373* top-level vdev operations. For example, if a pool has a log, special,374* or cache device, then each can behave very differently. It is useful375* to see how each is responding.376*/377static int378print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name,379const char *parent_name)380{381uint_t c, end = 0;382nvlist_t *nv_ex;383char *vdev_desc = NULL;384385/* short_names become part of the metric name and are influxdb-ready */386struct lat_lookup {387const char *name;388const char *short_name;389uint64_t sum;390uint64_t *array;391};392struct lat_lookup lat_type[] = {393{ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO, "total_read", 0},394{ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO, "total_write", 0},395{ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO, "disk_read", 0},396{ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO, "disk_write", 0},397{ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO, "sync_read", 0},398{ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO, "sync_write", 0},399{ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO, "async_read", 0},400{ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO, "async_write", 0},401{ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO, "scrub", 0},402#ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO403{ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0},404#endif405{ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO, "rebuild", 0},406{NULL, NULL}407};408409if (nvlist_lookup_nvlist(nvroot,410ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {411return (6);412}413414vdev_desc = get_vdev_desc(nvroot, parent_name);415416for (int i = 0; lat_type[i].name; i++) {417if (nvlist_lookup_uint64_array(nv_ex,418lat_type[i].name, &lat_type[i].array, &c) != 0) {419fprintf(stderr, "error: can't get %s\n",420lat_type[i].name);421return (3);422}423/* end count count, all of the arrays are the same size */424end = c - 1;425}426427for (int bucket = 0; bucket <= end; bucket++) {428if (bucket < MIN_LAT_INDEX) {429/* don't print, but collect the sum */430for (int i = 0; lat_type[i].name; i++) {431lat_type[i].sum += lat_type[i].array[bucket];432}433continue;434}435if (bucket < end) {436printf("%s%s,le=%0.6f,name=%s,%s ",437POOL_LATENCY_MEASUREMENT, tags,438(float)(1ULL << bucket) * 1e-9,439pool_name, vdev_desc);440} else {441printf("%s%s,le=+Inf,name=%s,%s ",442POOL_LATENCY_MEASUREMENT, tags, pool_name,443vdev_desc);444}445for (int i = 0; lat_type[i].name; i++) {446if (bucket <= MIN_LAT_INDEX || sum_histogram_buckets) {447lat_type[i].sum += lat_type[i].array[bucket];448} else {449lat_type[i].sum = lat_type[i].array[bucket];450}451print_kv(lat_type[i].short_name, lat_type[i].sum);452if (lat_type[i + 1].name != NULL) {453printf(",");454}455}456printf(" %llu\n", (u_longlong_t)timestamp);457}458return (0);459}460461/*462* vdev request size stats are histograms stored as nvlist arrays of uint64.463* Request size stats include the ZIO scheduler classes plus lower-level464* vdev sizes. Both independent (ind) and aggregated (agg) sizes are reported.465*466* In many cases, the top-level "root" view obscures the underlying467* top-level vdev operations. For example, if a pool has a log, special,468* or cache device, then each can behave very differently. It is useful469* to see how each is responding.470*/471static int472print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name,473const char *parent_name)474{475uint_t c, end = 0;476nvlist_t *nv_ex;477char *vdev_desc = NULL;478479/* short_names become the field name */480struct size_lookup {481const char *name;482const char *short_name;483uint64_t sum;484uint64_t *array;485};486struct size_lookup size_type[] = {487{ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO, "sync_read_ind"},488{ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO, "sync_write_ind"},489{ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO, "async_read_ind"},490{ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO, "async_write_ind"},491{ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO, "scrub_read_ind"},492{ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO, "sync_read_agg"},493{ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO, "sync_write_agg"},494{ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO, "async_read_agg"},495{ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO, "async_write_agg"},496{ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO, "scrub_read_agg"},497#ifdef ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO498{ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"},499{ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"},500#endif501{ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO, "rebuild_write_ind"},502{ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO, "rebuild_write_agg"},503{NULL, NULL}504};505506if (nvlist_lookup_nvlist(nvroot,507ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {508return (6);509}510511vdev_desc = get_vdev_desc(nvroot, parent_name);512513for (int i = 0; size_type[i].name; i++) {514if (nvlist_lookup_uint64_array(nv_ex, size_type[i].name,515&size_type[i].array, &c) != 0) {516fprintf(stderr, "error: can't get %s\n",517size_type[i].name);518return (3);519}520/* end count count, all of the arrays are the same size */521end = c - 1;522}523524for (int bucket = 0; bucket <= end; bucket++) {525if (bucket < MIN_SIZE_INDEX) {526/* don't print, but collect the sum */527for (int i = 0; size_type[i].name; i++) {528size_type[i].sum += size_type[i].array[bucket];529}530continue;531}532533if (bucket < end) {534printf("%s%s,le=%llu,name=%s,%s ",535POOL_IO_SIZE_MEASUREMENT, tags, 1ULL << bucket,536pool_name, vdev_desc);537} else {538printf("%s%s,le=+Inf,name=%s,%s ",539POOL_IO_SIZE_MEASUREMENT, tags, pool_name,540vdev_desc);541}542for (int i = 0; size_type[i].name; i++) {543if (bucket <= MIN_SIZE_INDEX || sum_histogram_buckets) {544size_type[i].sum += size_type[i].array[bucket];545} else {546size_type[i].sum = size_type[i].array[bucket];547}548print_kv(size_type[i].short_name, size_type[i].sum);549if (size_type[i + 1].name != NULL) {550printf(",");551}552}553printf(" %llu\n", (u_longlong_t)timestamp);554}555return (0);556}557558/*559* ZIO scheduler queue stats are stored as gauges. This is unfortunate560* because the values can change very rapidly and any point-in-time561* value will quickly be obsoleted. It is also not easy to downsample.562* Thus only the top-level queue stats might be beneficial... maybe.563*/564static int565print_queue_stats(nvlist_t *nvroot, const char *pool_name,566const char *parent_name)567{568nvlist_t *nv_ex;569uint64_t value;570571/* short_names are used for the field name */572struct queue_lookup {573const char *name;574const char *short_name;575};576struct queue_lookup queue_type[] = {577{ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active"},578{ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active"},579{ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"},580{ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"},581{ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"},582{ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active"},583{ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"},584{ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"},585{ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"},586{ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"},587{ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"},588{ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend"},589{NULL, NULL}590};591592if (nvlist_lookup_nvlist(nvroot,593ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {594return (6);595}596597printf("%s%s,name=%s,%s ", POOL_QUEUE_MEASUREMENT, tags, pool_name,598get_vdev_desc(nvroot, parent_name));599for (int i = 0; queue_type[i].name; i++) {600if (nvlist_lookup_uint64(nv_ex,601queue_type[i].name, &value) != 0) {602fprintf(stderr, "error: can't get %s\n",603queue_type[i].name);604return (3);605}606print_kv(queue_type[i].short_name, value);607if (queue_type[i + 1].name != NULL) {608printf(",");609}610}611printf(" %llu\n", (u_longlong_t)timestamp);612return (0);613}614615/*616* top-level vdev stats are at the pool level617*/618static int619print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name)620{621nvlist_t *nv_ex;622uint64_t value;623624/* short_names become part of the metric name */625struct queue_lookup {626const char *name;627const char *short_name;628};629struct queue_lookup queue_type[] = {630{ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE, "sync_r_active_queue"},631{ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE, "sync_w_active_queue"},632{ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"},633{ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"},634{ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"},635{ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active_queue"},636{ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"},637{ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"},638{ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"},639{ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"},640{ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"},641{ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend_queue"},642{NULL, NULL}643};644645if (nvlist_lookup_nvlist(nvroot,646ZPOOL_CONFIG_VDEV_STATS_EX, &nv_ex) != 0) {647return (6);648}649650printf("%s%s,name=%s,vdev=root ", VDEV_MEASUREMENT, tags,651pool_name);652for (int i = 0; queue_type[i].name; i++) {653if (nvlist_lookup_uint64(nv_ex,654queue_type[i].name, &value) != 0) {655fprintf(stderr, "error: can't get %s\n",656queue_type[i].name);657return (3);658}659if (i > 0)660printf(",");661print_kv(queue_type[i].short_name, value);662}663664printf(" %llu\n", (u_longlong_t)timestamp);665return (0);666}667668/*669* recursive stats printer670*/671static int672print_recursive_stats(stat_printer_f func, nvlist_t *nvroot,673const char *pool_name, const char *parent_name, int descend)674{675uint_t c, children;676nvlist_t **child;677char vdev_name[256];678int err;679680err = func(nvroot, pool_name, parent_name);681if (err)682return (err);683684if (descend && nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,685&child, &children) == 0) {686(void) strlcpy(vdev_name, get_vdev_name(nvroot, parent_name),687sizeof (vdev_name));688689for (c = 0; c < children; c++) {690err = print_recursive_stats(func, child[c], pool_name,691vdev_name, descend);692if (err)693return (err);694}695}696return (0);697}698699/*700* call-back to print the stats from the pool config701*702* Note: if the pool is broken, this can hang indefinitely and perhaps in an703* unkillable state.704*/705static int706print_stats(zpool_handle_t *zhp, void *data)707{708uint_t c;709int err;710boolean_t missing;711nvlist_t *config, *nvroot;712vdev_stat_t *vs;713struct timespec tv;714char *pool_name;715716/* if not this pool return quickly */717if (data &&718strncmp(data, zpool_get_name(zhp), ZFS_MAX_DATASET_NAME_LEN) != 0) {719zpool_close(zhp);720return (0);721}722723if (zpool_refresh_stats(zhp, &missing) != 0) {724zpool_close(zhp);725return (1);726}727728config = zpool_get_config(zhp, NULL);729if (clock_gettime(CLOCK_REALTIME, &tv) != 0)730timestamp = (uint64_t)time(NULL) * 1000000000;731else732timestamp =733((uint64_t)tv.tv_sec * 1000000000) + (uint64_t)tv.tv_nsec;734735if (nvlist_lookup_nvlist(736config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) != 0) {737zpool_close(zhp);738return (2);739}740if (nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,741(uint64_t **)&vs, &c) != 0) {742zpool_close(zhp);743return (3);744}745746pool_name = escape_string(zpool_get_name(zhp));747err = print_recursive_stats(print_summary_stats, nvroot,748pool_name, NULL, 1);749/* if any of these return an error, skip the rest */750if (err == 0)751err = print_top_level_vdev_stats(nvroot, pool_name);752753if (no_histograms == 0) {754if (err == 0)755err = print_recursive_stats(print_vdev_latency_stats, nvroot,756pool_name, NULL, 1);757if (err == 0)758err = print_recursive_stats(print_vdev_size_stats, nvroot,759pool_name, NULL, 1);760if (err == 0)761err = print_recursive_stats(print_queue_stats, nvroot,762pool_name, NULL, 0);763}764if (err == 0)765err = print_scan_status(nvroot, pool_name);766767free(pool_name);768zpool_close(zhp);769return (err);770}771772static void773usage(char *name)774{775fprintf(stderr, "usage: %s [--execd][--no-histograms]"776"[--sum-histogram-buckets] [--signed-int] [poolname]\n", name);777exit(EXIT_FAILURE);778}779780int781main(int argc, char *argv[])782{783int opt;784int ret = 8;785char *line = NULL, *ttags = NULL;786size_t len, tagslen = 0;787struct option long_options[] = {788{"execd", no_argument, NULL, 'e'},789{"help", no_argument, NULL, 'h'},790{"no-histograms", no_argument, NULL, 'n'},791{"signed-int", no_argument, NULL, 'i'},792{"sum-histogram-buckets", no_argument, NULL, 's'},793{"tags", required_argument, NULL, 't'},794{0, 0, 0, 0}795};796while ((opt = getopt_long(797argc, argv, "ehinst:", long_options, NULL)) != -1) {798switch (opt) {799case 'e':800execd_mode = 1;801break;802case 'i':803metric_data_type = 'i';804metric_value_mask = INT64_MAX;805break;806case 'n':807no_histograms = 1;808break;809case 's':810sum_histogram_buckets = 1;811break;812case 't':813free(ttags);814tagslen = strlen(optarg) + 2;815ttags = calloc(1, tagslen);816if (ttags == NULL) {817fprintf(stderr,818"error: cannot allocate memory "819"for tags\n");820exit(1);821}822(void) snprintf(ttags, tagslen, ",%s", optarg);823tags = ttags;824break;825default:826usage(argv[0]);827}828}829830libzfs_handle_t *g_zfs;831if ((g_zfs = libzfs_init()) == NULL) {832fprintf(stderr,833"error: cannot initialize libzfs. "834"Is the zfs module loaded or zrepl running?\n");835exit(EXIT_FAILURE);836}837if (execd_mode == 0) {838ret = zpool_iter(g_zfs, print_stats, argv[optind]);839return (ret);840}841while (getline(&line, &len, stdin) != -1) {842ret = zpool_iter(g_zfs, print_stats, argv[optind]);843fflush(stdout);844}845return (ret);846}847848849