Path: blob/main/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c
39562 views
/*1* CDDL HEADER START2*3* The contents of this file are subject to the terms of the4* Common Development and Distribution License (the "License").5* You may not use this file except in compliance with the License.6*7* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE8* or http://www.opensolaris.org/os/licensing.9* See the License for the specific language governing permissions10* and limitations under the License.11*12* When distributing Covered Code, include this CDDL HEADER in each13* file and include the License file at usr/src/OPENSOLARIS.LICENSE.14* If applicable, add the following below this CDDL HEADER, with the15* fields enclosed by brackets "[]" replaced with your own identifying16* information: Portions Copyright [yyyy] [name of copyright owner]17*18* CDDL HEADER END19*/20/*21* Copyright 2009 Sun Microsystems, Inc. All rights reserved.22* Use is subject to license terms.23*/2425/*26* Copyright (c) 2023, Domagoj Stolfa. All rights reserved.27* Copyright (c) 2017, Joyent, Inc. All rights reserved.28* Copyright (c) 2012 by Delphix. All rights reserved.29*/3031#include <stdlib.h>32#include <strings.h>33#include <errno.h>34#include <unistd.h>35#include <limits.h>36#include <assert.h>37#include <ctype.h>38#ifdef illumos39#include <alloca.h>40#endif41#include <dt_impl.h>42#include <dt_pq.h>43#include <dt_oformat.h>44#ifndef illumos45#include <libproc_compat.h>46#endif4748#define DT_MASK_LO 0x00000000FFFFFFFFULL4950#define dt_format_sym(dtp, addr) dt_print_sym((dtp), NULL, NULL, addr)5152typedef struct dt_prepare_args {53int first_bin;54int last_bin;55union {56struct lquantize_args {57#define lquantize_step u.lquantize.step58#define lquantize_levels u.lquantize.levels59#define lquantize_base u.lquantize.base60int base;61uint16_t step;62uint16_t levels;63} lquantize;64struct llquantize_args {65#define llquantize_next u.llquantize.next66#define llquantize_step u.llquantize.step67#define llquantize_value u.llquantize.value68#define llquantize_levels u.llquantize.levels69#define llquantize_order u.llquantize.order70#define llquantize_factor u.llquantize.factor71#define llquantize_low u.llquantize.low72#define llquantize_high u.llquantize.high73#define llquantize_nsteps u.llquantize.nsteps74int64_t next;75int64_t step;76int64_t value;77int levels;78int order;79uint16_t factor;80uint16_t low;81uint16_t high;82uint16_t nsteps;83} llquantize;84} u;85} dt_prepare_args_t;8687/*88* We declare this here because (1) we need it and (2) we want to avoid a89* dependency on libm in libdtrace.90*/91static long double92dt_fabsl(long double x)93{94if (x < 0)95return (-x);9697return (x);98}99100static int101dt_ndigits(long long val)102{103int rval = 1;104long long cmp = 10;105106if (val < 0) {107val = val == INT64_MIN ? INT64_MAX : -val;108rval++;109}110111while (val > cmp && cmp > 0) {112rval++;113cmp *= 10;114}115116return (rval < 4 ? 4 : rval);117}118119/*120* 128-bit arithmetic functions needed to support the stddev() aggregating121* action.122*/123static int124dt_gt_128(uint64_t *a, uint64_t *b)125{126return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));127}128129static int130dt_ge_128(uint64_t *a, uint64_t *b)131{132return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));133}134135static int136dt_le_128(uint64_t *a, uint64_t *b)137{138return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));139}140141/*142* Shift the 128-bit value in a by b. If b is positive, shift left.143* If b is negative, shift right.144*/145static void146dt_shift_128(uint64_t *a, int b)147{148uint64_t mask;149150if (b == 0)151return;152153if (b < 0) {154b = -b;155if (b >= 64) {156a[0] = a[1] >> (b - 64);157a[1] = 0;158} else {159a[0] >>= b;160mask = 1LL << (64 - b);161mask -= 1;162a[0] |= ((a[1] & mask) << (64 - b));163a[1] >>= b;164}165} else {166if (b >= 64) {167a[1] = a[0] << (b - 64);168a[0] = 0;169} else {170a[1] <<= b;171mask = a[0] >> (64 - b);172a[1] |= mask;173a[0] <<= b;174}175}176}177178static int179dt_nbits_128(uint64_t *a)180{181int nbits = 0;182uint64_t tmp[2];183uint64_t zero[2] = { 0, 0 };184185tmp[0] = a[0];186tmp[1] = a[1];187188dt_shift_128(tmp, -1);189while (dt_gt_128(tmp, zero)) {190dt_shift_128(tmp, -1);191nbits++;192}193194return (nbits);195}196197static void198dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)199{200uint64_t result[2];201202result[0] = minuend[0] - subtrahend[0];203result[1] = minuend[1] - subtrahend[1] -204(minuend[0] < subtrahend[0] ? 1 : 0);205206difference[0] = result[0];207difference[1] = result[1];208}209210static void211dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)212{213uint64_t result[2];214215result[0] = addend1[0] + addend2[0];216result[1] = addend1[1] + addend2[1] +217(result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);218219sum[0] = result[0];220sum[1] = result[1];221}222223/*224* The basic idea is to break the 2 64-bit values into 4 32-bit values,225* use native multiplication on those, and then re-combine into the226* resulting 128-bit value.227*228* (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =229* hi1 * hi2 << 64 +230* hi1 * lo2 << 32 +231* hi2 * lo1 << 32 +232* lo1 * lo2233*/234static void235dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)236{237uint64_t hi1, hi2, lo1, lo2;238uint64_t tmp[2];239240hi1 = factor1 >> 32;241hi2 = factor2 >> 32;242243lo1 = factor1 & DT_MASK_LO;244lo2 = factor2 & DT_MASK_LO;245246product[0] = lo1 * lo2;247product[1] = hi1 * hi2;248249tmp[0] = hi1 * lo2;250tmp[1] = 0;251dt_shift_128(tmp, 32);252dt_add_128(product, tmp, product);253254tmp[0] = hi2 * lo1;255tmp[1] = 0;256dt_shift_128(tmp, 32);257dt_add_128(product, tmp, product);258}259260/*261* This is long-hand division.262*263* We initialize subtrahend by shifting divisor left as far as possible. We264* loop, comparing subtrahend to dividend: if subtrahend is smaller, we265* subtract and set the appropriate bit in the result. We then shift266* subtrahend right by one bit for the next comparison.267*/268static void269dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)270{271uint64_t result[2] = { 0, 0 };272uint64_t remainder[2];273uint64_t subtrahend[2];274uint64_t divisor_128[2];275uint64_t mask[2] = { 1, 0 };276int log = 0;277278assert(divisor != 0);279280divisor_128[0] = divisor;281divisor_128[1] = 0;282283remainder[0] = dividend[0];284remainder[1] = dividend[1];285286subtrahend[0] = divisor;287subtrahend[1] = 0;288289while (divisor > 0) {290log++;291divisor >>= 1;292}293294dt_shift_128(subtrahend, 128 - log);295dt_shift_128(mask, 128 - log);296297while (dt_ge_128(remainder, divisor_128)) {298if (dt_ge_128(remainder, subtrahend)) {299dt_subtract_128(remainder, subtrahend, remainder);300result[0] |= mask[0];301result[1] |= mask[1];302}303304dt_shift_128(subtrahend, -1);305dt_shift_128(mask, -1);306}307308quotient[0] = result[0];309quotient[1] = result[1];310}311312/*313* This is the long-hand method of calculating a square root.314* The algorithm is as follows:315*316* 1. Group the digits by 2 from the right.317* 2. Over the leftmost group, find the largest single-digit number318* whose square is less than that group.319* 3. Subtract the result of the previous step (2 or 4, depending) and320* bring down the next two-digit group.321* 4. For the result R we have so far, find the largest single-digit number322* x such that 2 * R * 10 * x + x^2 is less than the result from step 3.323* (Note that this is doubling R and performing a decimal left-shift by 1324* and searching for the appropriate decimal to fill the one's place.)325* The value x is the next digit in the square root.326* Repeat steps 3 and 4 until the desired precision is reached. (We're327* dealing with integers, so the above is sufficient.)328*329* In decimal, the square root of 582,734 would be calculated as so:330*331* __7__6__3332* | 58 27 34333* -49 (7^2 == 49 => 7 is the first digit in the square root)334* --335* 9 27 (Subtract and bring down the next group.)336* 146 8 76 (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in337* ----- the square root)338* 51 34 (Subtract and bring down the next group.)339* 1523 45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in340* ----- the square root)341* 5 65 (remainder)342*343* The above algorithm applies similarly in binary, but note that the344* only possible non-zero value for x in step 4 is 1, so step 4 becomes a345* simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the346* preceding difference?347*348* In binary, the square root of 11011011 would be calculated as so:349*350* __1__1__1__0351* | 11 01 10 11352* 01 (0 << 2 + 1 == 1 < 11 => this bit is 1)353* --354* 10 01 10 11355* 101 1 01 (1 << 2 + 1 == 101 < 1001 => next bit is 1)356* -----357* 1 00 10 11358* 1101 11 01 (11 << 2 + 1 == 1101 < 10010 => next bit is 1)359* -------360* 1 01 11361* 11101 1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)362*363*/364static uint64_t365dt_sqrt_128(uint64_t *square)366{367uint64_t result[2] = { 0, 0 };368uint64_t diff[2] = { 0, 0 };369uint64_t one[2] = { 1, 0 };370uint64_t next_pair[2];371uint64_t next_try[2];372uint64_t bit_pairs, pair_shift;373int i;374375bit_pairs = dt_nbits_128(square) / 2;376pair_shift = bit_pairs * 2;377378for (i = 0; i <= bit_pairs; i++) {379/*380* Bring down the next pair of bits.381*/382next_pair[0] = square[0];383next_pair[1] = square[1];384dt_shift_128(next_pair, -pair_shift);385next_pair[0] &= 0x3;386next_pair[1] = 0;387388dt_shift_128(diff, 2);389dt_add_128(diff, next_pair, diff);390391/*392* next_try = R << 2 + 1393*/394next_try[0] = result[0];395next_try[1] = result[1];396dt_shift_128(next_try, 2);397dt_add_128(next_try, one, next_try);398399if (dt_le_128(next_try, diff)) {400dt_subtract_128(diff, next_try, diff);401dt_shift_128(result, 1);402dt_add_128(result, one, result);403} else {404dt_shift_128(result, 1);405}406407pair_shift -= 2;408}409410assert(result[1] == 0);411412return (result[0]);413}414415uint64_t416dt_stddev(uint64_t *data, uint64_t normal)417{418uint64_t avg_of_squares[2];419uint64_t square_of_avg[2];420int64_t norm_avg;421uint64_t diff[2];422423if (data[0] == 0)424return (0);425426/*427* The standard approximation for standard deviation is428* sqrt(average(x**2) - average(x)**2), i.e. the square root429* of the average of the squares minus the square of the average.430* When normalizing, we should divide the sum of x**2 by normal**2.431*/432dt_divide_128(data + 2, normal, avg_of_squares);433dt_divide_128(avg_of_squares, normal, avg_of_squares);434dt_divide_128(avg_of_squares, data[0], avg_of_squares);435436norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];437438if (norm_avg < 0)439norm_avg = -norm_avg;440441dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);442443dt_subtract_128(avg_of_squares, square_of_avg, diff);444445return (dt_sqrt_128(diff));446}447448static int449dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,450dtrace_bufdesc_t *buf, size_t offs)451{452dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;453dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;454char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;455dtrace_flowkind_t flow = DTRACEFLOW_NONE;456const char *str = NULL;457static const char *e_str[2] = { " -> ", " => " };458static const char *r_str[2] = { " <- ", " <= " };459static const char *ent = "entry", *ret = "return";460static int entlen = 0, retlen = 0;461dtrace_epid_t next, id = epd->dtepd_epid;462int rval;463464if (entlen == 0) {465assert(retlen == 0);466entlen = strlen(ent);467retlen = strlen(ret);468}469470/*471* If the name of the probe is "entry" or ends with "-entry", we472* treat it as an entry; if it is "return" or ends with "-return",473* we treat it as a return. (This allows application-provided probes474* like "method-entry" or "function-entry" to participate in flow475* indentation -- without accidentally misinterpreting popular probe476* names like "carpentry", "gentry" or "Coventry".)477*/478if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&479(sub == n || sub[-1] == '-')) {480flow = DTRACEFLOW_ENTRY;481str = e_str[strcmp(p, "syscall") == 0];482} else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&483(sub == n || sub[-1] == '-')) {484flow = DTRACEFLOW_RETURN;485str = r_str[strcmp(p, "syscall") == 0];486}487488/*489* If we're going to indent this, we need to check the ID of our last490* call. If we're looking at the same probe ID but a different EPID,491* we _don't_ want to indent. (Yes, there are some minor holes in492* this scheme -- it's a heuristic.)493*/494if (flow == DTRACEFLOW_ENTRY) {495if ((last != DTRACE_EPIDNONE && id != last &&496pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))497flow = DTRACEFLOW_NONE;498}499500/*501* If we're going to unindent this, it's more difficult to see if502* we don't actually want to unindent it -- we need to look at the503* _next_ EPID.504*/505if (flow == DTRACEFLOW_RETURN) {506offs += epd->dtepd_size;507508do {509if (offs >= buf->dtbd_size)510goto out;511512next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);513514if (next == DTRACE_EPIDNONE)515offs += sizeof (id);516} while (next == DTRACE_EPIDNONE);517518if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)519return (rval);520521if (next != id && npd->dtpd_id == pd->dtpd_id)522flow = DTRACEFLOW_NONE;523}524525out:526if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {527data->dtpda_prefix = str;528} else {529data->dtpda_prefix = "| ";530}531532if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)533data->dtpda_indent -= 2;534535data->dtpda_flow = flow;536537return (0);538}539540static int541dt_nullprobe()542{543return (DTRACE_CONSUME_THIS);544}545546static int547dt_nullrec()548{549return (DTRACE_CONSUME_NEXT);550}551552static void553dt_quantize_total(dtrace_hdl_t *dtp, int64_t datum, long double *total)554{555long double val = dt_fabsl((long double)datum);556557if (dtp->dt_options[DTRACEOPT_AGGZOOM] == DTRACEOPT_UNSET) {558*total += val;559return;560}561562/*563* If we're zooming in on an aggregation, we want the height of the564* highest value to be approximately 95% of total bar height -- so we565* adjust up by the reciprocal of DTRACE_AGGZOOM_MAX when comparing to566* our highest value.567*/568val *= 1 / DTRACE_AGGZOOM_MAX;569570if (*total < val)571*total = val;572}573574static int575dt_print_quanthdr(dtrace_hdl_t *dtp, FILE *fp, int width)576{577return (dt_printf(dtp, fp, "\n%*s %41s %-9s\n",578width ? width : 16, width ? "key" : "value",579"------------- Distribution -------------", "count"));580}581582static int583dt_print_quanthdr_packed(dtrace_hdl_t *dtp, FILE *fp, int width,584const dtrace_aggdata_t *aggdata, dtrace_actkind_t action)585{586int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin;587int minwidth, maxwidth, i;588589assert(action == DTRACEAGG_QUANTIZE || action == DTRACEAGG_LQUANTIZE);590591if (action == DTRACEAGG_QUANTIZE) {592if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET)593min--;594595if (max < DTRACE_QUANTIZE_NBUCKETS - 1)596max++;597598minwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(min));599maxwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(max));600} else {601maxwidth = 8;602minwidth = maxwidth - 1;603max++;604}605606if (dt_printf(dtp, fp, "\n%*s %*s .",607width, width > 0 ? "key" : "", minwidth, "min") < 0)608return (-1);609610for (i = min; i <= max; i++) {611if (dt_printf(dtp, fp, "-") < 0)612return (-1);613}614615return (dt_printf(dtp, fp, ". %*s | count\n", -maxwidth, "max"));616}617618/*619* We use a subset of the Unicode Block Elements (U+2588 through U+258F,620* inclusive) to represent aggregations via UTF-8 -- which are expressed via621* 3-byte UTF-8 sequences.622*/623#define DTRACE_AGGUTF8_FULL 0x2588624#define DTRACE_AGGUTF8_BASE 0x258f625#define DTRACE_AGGUTF8_LEVELS 8626627#define DTRACE_AGGUTF8_BYTE0(val) (0xe0 | ((val) >> 12))628#define DTRACE_AGGUTF8_BYTE1(val) (0x80 | (((val) >> 6) & 0x3f))629#define DTRACE_AGGUTF8_BYTE2(val) (0x80 | ((val) & 0x3f))630631static int632dt_print_quantline_utf8(dtrace_hdl_t *dtp, FILE *fp, int64_t val,633uint64_t normal, long double total)634{635uint_t len = 40, i, whole, partial;636long double f = (dt_fabsl((long double)val) * len) / total;637const char *spaces = " ";638639whole = (uint_t)f;640partial = (uint_t)((f - (long double)(uint_t)f) *641(long double)DTRACE_AGGUTF8_LEVELS);642643if (dt_printf(dtp, fp, "|") < 0)644return (-1);645646for (i = 0; i < whole; i++) {647if (dt_printf(dtp, fp, "%c%c%c",648DTRACE_AGGUTF8_BYTE0(DTRACE_AGGUTF8_FULL),649DTRACE_AGGUTF8_BYTE1(DTRACE_AGGUTF8_FULL),650DTRACE_AGGUTF8_BYTE2(DTRACE_AGGUTF8_FULL)) < 0)651return (-1);652}653654if (partial != 0) {655partial = DTRACE_AGGUTF8_BASE - (partial - 1);656657if (dt_printf(dtp, fp, "%c%c%c",658DTRACE_AGGUTF8_BYTE0(partial),659DTRACE_AGGUTF8_BYTE1(partial),660DTRACE_AGGUTF8_BYTE2(partial)) < 0)661return (-1);662663i++;664}665666return (dt_printf(dtp, fp, "%s %-9lld\n", spaces + i,667(long long)val / normal));668}669670static int671dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,672uint64_t normal, long double total, char positives, char negatives)673{674long double f;675uint_t depth, len = 40;676677const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";678const char *spaces = " ";679680assert(strlen(ats) == len && strlen(spaces) == len);681assert(!(total == 0 && (positives || negatives)));682assert(!(val < 0 && !negatives));683assert(!(val > 0 && !positives));684assert(!(val != 0 && total == 0));685686if (!negatives) {687if (positives) {688if (dtp->dt_encoding == DT_ENCODING_UTF8) {689return (dt_print_quantline_utf8(dtp, fp, val,690normal, total));691}692693f = (dt_fabsl((long double)val) * len) / total;694depth = (uint_t)(f + 0.5);695} else {696depth = 0;697}698699return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,700spaces + depth, (long long)val / normal));701}702703if (!positives) {704f = (dt_fabsl((long double)val) * len) / total;705depth = (uint_t)(f + 0.5);706707return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,708ats + len - depth, (long long)val / normal));709}710711/*712* If we're here, we have both positive and negative bucket values.713* To express this graphically, we're going to generate both positive714* and negative bars separated by a centerline. These bars are half715* the size of normal quantize()/lquantize() bars, so we divide the716* length in half before calculating the bar length.717*/718len /= 2;719ats = &ats[len];720spaces = &spaces[len];721722f = (dt_fabsl((long double)val) * len) / total;723depth = (uint_t)(f + 0.5);724725if (val <= 0) {726return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,727ats + len - depth, len, "", (long long)val / normal));728} else {729return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",730ats + len - depth, spaces + depth,731(long long)val / normal));732}733}734735/*736* As with UTF-8 printing of aggregations, we use a subset of the Unicode737* Block Elements (U+2581 through U+2588, inclusive) to represent our packed738* aggregation.739*/740#define DTRACE_AGGPACK_BASE 0x2581741#define DTRACE_AGGPACK_LEVELS 8742743static int744dt_print_packed(dtrace_hdl_t *dtp, FILE *fp,745long double datum, long double total)746{747static boolean_t utf8_checked = B_FALSE;748static boolean_t utf8;749char *ascii = "__xxxxXX";750char *neg = "vvvvVV";751unsigned int len;752long double val;753754if (!utf8_checked) {755char *term;756757/*758* We want to determine if we can reasonably emit UTF-8 for our759* packed aggregation. To do this, we will check for terminals760* that are known to be primitive to emit UTF-8 on these.761*/762utf8_checked = B_TRUE;763764if (dtp->dt_encoding == DT_ENCODING_ASCII) {765utf8 = B_FALSE;766} else if (dtp->dt_encoding == DT_ENCODING_UTF8) {767utf8 = B_TRUE;768} else if ((term = getenv("TERM")) != NULL &&769(strcmp(term, "sun") == 0 ||770strcmp(term, "sun-color") == 0 ||771strcmp(term, "dumb") == 0)) {772utf8 = B_FALSE;773} else {774utf8 = B_TRUE;775}776}777778if (datum == 0)779return (dt_printf(dtp, fp, " "));780781if (datum < 0) {782len = strlen(neg);783val = dt_fabsl(datum * (len - 1)) / total;784return (dt_printf(dtp, fp, "%c", neg[(uint_t)(val + 0.5)]));785}786787if (utf8) {788int block = DTRACE_AGGPACK_BASE + (unsigned int)(((datum *789(DTRACE_AGGPACK_LEVELS - 1)) / total) + 0.5);790791return (dt_printf(dtp, fp, "%c%c%c",792DTRACE_AGGUTF8_BYTE0(block),793DTRACE_AGGUTF8_BYTE1(block),794DTRACE_AGGUTF8_BYTE2(block)));795}796797len = strlen(ascii);798val = (datum * (len - 1)) / total;799return (dt_printf(dtp, fp, "%c", ascii[(uint_t)(val + 0.5)]));800}801802static const int64_t *803dt_format_quantize_prepare(dtrace_hdl_t *dtp, const void *addr, size_t size,804dt_prepare_args_t *args)805{806const int64_t *data = addr;807int first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;808809if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t)) {810(void) dt_set_errno(dtp, EDT_DMISMATCH);811return (NULL);812}813814while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)815first_bin++;816817if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {818/*819* There isn't any data. This is possible if the aggregation820* has been clear()'d or if negative increment values have been821* used. Regardless, we'll print the buckets around 0.822*/823first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;824last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;825} else {826if (first_bin > 0)827first_bin--;828829while (last_bin > 0 && data[last_bin] == 0)830last_bin--;831832if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)833last_bin++;834}835836args->first_bin = first_bin;837args->last_bin = last_bin;838return (data);839}840841int842dt_format_quantize(dtrace_hdl_t *dtp, const void *addr, size_t size,843uint64_t normal)844{845const int64_t *data;846dt_prepare_args_t args = { 0 };847int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;848849data = dt_format_quantize_prepare(dtp, addr, size, &args);850/* dt_errno is set for us */851if (data == NULL)852return (-1);853854first_bin = args.first_bin;855last_bin = args.last_bin;856857xo_open_list("buckets");858for (i = first_bin; i <= last_bin; i++) {859long long value = (long long)DTRACE_QUANTIZE_BUCKETVAL(i);860xo_open_instance("buckets");861xo_emit("{:value/%lld} {:count/%lld}", value,862(long long)data[i] / normal);863xo_close_instance("buckets");864}865xo_close_list("buckets");866867return (0);868}869870int871dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,872size_t size, uint64_t normal)873{874const int64_t *data;875dt_prepare_args_t args = { 0 };876int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;877long double total = 0;878char positives = 0, negatives = 0;879880data = dt_format_quantize_prepare(dtp, addr, size, &args);881/* dt_errno is set for us */882if (data == NULL)883return (-1);884885first_bin = args.first_bin;886last_bin = args.last_bin;887888for (i = first_bin; i <= last_bin; i++) {889positives |= (data[i] > 0);890negatives |= (data[i] < 0);891dt_quantize_total(dtp, data[i], &total);892}893894if (dt_print_quanthdr(dtp, fp, 0) < 0)895return (-1);896897for (i = first_bin; i <= last_bin; i++) {898if (dt_printf(dtp, fp, "%16lld ",899(long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)900return (-1);901902if (dt_print_quantline(dtp, fp, data[i], normal, total,903positives, negatives) < 0)904return (-1);905}906907return (0);908}909910int911dt_print_quantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr,912size_t size, const dtrace_aggdata_t *aggdata)913{914const int64_t *data = addr;915long double total = 0, count = 0;916int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin, i;917int64_t minval, maxval;918919if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))920return (dt_set_errno(dtp, EDT_DMISMATCH));921922if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET)923min--;924925if (max < DTRACE_QUANTIZE_NBUCKETS - 1)926max++;927928minval = DTRACE_QUANTIZE_BUCKETVAL(min);929maxval = DTRACE_QUANTIZE_BUCKETVAL(max);930931if (dt_printf(dtp, fp, " %*lld :", dt_ndigits(minval),932(long long)minval) < 0)933return (-1);934935for (i = min; i <= max; i++) {936dt_quantize_total(dtp, data[i], &total);937count += data[i];938}939940for (i = min; i <= max; i++) {941if (dt_print_packed(dtp, fp, data[i], total) < 0)942return (-1);943}944945if (dt_printf(dtp, fp, ": %*lld | %lld\n",946-dt_ndigits(maxval), (long long)maxval, (long long)count) < 0)947return (-1);948949return (0);950}951952static const int64_t *953dt_format_lquantize_prepare(dtrace_hdl_t *dtp, const void *addr, size_t size,954dt_prepare_args_t *args)955{956const int64_t *data = addr;957int first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1, base;958uint64_t arg;959uint16_t step, levels;960961if (size < sizeof (uint64_t)) {962(void) dt_set_errno(dtp, EDT_DMISMATCH);963return (NULL);964}965966arg = *data++;967size -= sizeof (uint64_t);968969base = DTRACE_LQUANTIZE_BASE(arg);970step = DTRACE_LQUANTIZE_STEP(arg);971levels = DTRACE_LQUANTIZE_LEVELS(arg);972973first_bin = 0;974last_bin = levels + 1;975976if (size != sizeof (uint64_t) * (levels + 2)) {977(void) dt_set_errno(dtp, EDT_DMISMATCH);978return (NULL);979}980981while (first_bin <= levels + 1 && data[first_bin] == 0)982first_bin++;983984if (first_bin > levels + 1) {985first_bin = 0;986last_bin = 2;987} else {988if (first_bin > 0)989first_bin--;990991while (last_bin > 0 && data[last_bin] == 0)992last_bin--;993994if (last_bin < levels + 1)995last_bin++;996}997998args->first_bin = first_bin;999args->last_bin = last_bin;1000args->lquantize_base = base;1001args->lquantize_step = step;1002args->lquantize_levels = levels;1003return (data);1004}10051006int1007dt_format_lquantize(dtrace_hdl_t *dtp, const void *addr, size_t size,1008uint64_t normal)1009{1010const int64_t *data;1011dt_prepare_args_t args = { 0 };1012int i, first_bin, last_bin, base;1013uint16_t step, levels;10141015data = dt_format_lquantize_prepare(dtp, addr, size, &args);1016/* dt_errno is set for us */1017if (data == NULL)1018return (-1);10191020first_bin = args.first_bin;1021last_bin = args.last_bin;1022step = args.lquantize_step;1023levels = args.lquantize_levels;1024base = args.lquantize_base;10251026xo_open_list("buckets");1027for (i = first_bin; i <= last_bin; i++) {1028char c[32];1029int err;10301031xo_open_instance("buckets");1032if (i == 0) {1033xo_emit("{:value/%d} {:operator/%s}", base, "<");1034} else if (i == levels + 1) {1035xo_emit("{:value/%d} {:operator/%s}",1036base + (levels * step), ">=");1037} else {1038xo_emit("{:value/%d}", base + (i - 1) * step);1039}10401041xo_emit("{:count/%lld}", (long long)data[i] / normal);1042xo_close_instance("buckets");1043}1044xo_close_list("buckets");10451046return (0);1047}10481049int1050dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,1051size_t size, uint64_t normal)1052{1053const int64_t *data;1054dt_prepare_args_t args = { 0 };1055int i, first_bin, last_bin, base;1056uint64_t arg;1057long double total = 0;1058uint16_t step, levels;1059char positives = 0, negatives = 0;10601061data = dt_format_lquantize_prepare(dtp, addr, size, &args);1062/* dt_errno is set for us */1063if (data == NULL)1064return (-1);10651066first_bin = args.first_bin;1067last_bin = args.last_bin;1068step = args.lquantize_step;1069levels = args.lquantize_levels;1070base = args.lquantize_base;10711072for (i = first_bin; i <= last_bin; i++) {1073positives |= (data[i] > 0);1074negatives |= (data[i] < 0);1075dt_quantize_total(dtp, data[i], &total);1076}10771078if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",1079"------------- Distribution -------------", "count") < 0)1080return (-1);10811082for (i = first_bin; i <= last_bin; i++) {1083char c[32];1084int err;10851086if (i == 0) {1087(void) snprintf(c, sizeof (c), "< %d", base);1088err = dt_printf(dtp, fp, "%16s ", c);1089} else if (i == levels + 1) {1090(void) snprintf(c, sizeof (c), ">= %d",1091base + (levels * step));1092err = dt_printf(dtp, fp, "%16s ", c);1093} else {1094err = dt_printf(dtp, fp, "%16d ",1095base + (i - 1) * step);1096}10971098if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,1099total, positives, negatives) < 0)1100return (-1);1101}11021103return (0);1104}11051106/*ARGSUSED*/1107int1108dt_print_lquantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr,1109size_t size, const dtrace_aggdata_t *aggdata)1110{1111const int64_t *data = addr;1112long double total = 0, count = 0;1113int min, max, base, err;1114uint64_t arg;1115uint16_t step, levels;1116char c[32];1117unsigned int i;11181119if (size < sizeof (uint64_t))1120return (dt_set_errno(dtp, EDT_DMISMATCH));11211122arg = *data++;1123size -= sizeof (uint64_t);11241125base = DTRACE_LQUANTIZE_BASE(arg);1126step = DTRACE_LQUANTIZE_STEP(arg);1127levels = DTRACE_LQUANTIZE_LEVELS(arg);11281129if (size != sizeof (uint64_t) * (levels + 2))1130return (dt_set_errno(dtp, EDT_DMISMATCH));11311132min = 0;1133max = levels + 1;11341135if (min == 0) {1136(void) snprintf(c, sizeof (c), "< %d", base);1137err = dt_printf(dtp, fp, "%8s :", c);1138} else {1139err = dt_printf(dtp, fp, "%8d :", base + (min - 1) * step);1140}11411142if (err < 0)1143return (-1);11441145for (i = min; i <= max; i++) {1146dt_quantize_total(dtp, data[i], &total);1147count += data[i];1148}11491150for (i = min; i <= max; i++) {1151if (dt_print_packed(dtp, fp, data[i], total) < 0)1152return (-1);1153}11541155(void) snprintf(c, sizeof (c), ">= %d", base + (levels * step));1156return (dt_printf(dtp, fp, ": %-8s | %lld\n", c, (long long)count));1157}11581159static const int64_t *1160dt_format_llquantize_prepare(dtrace_hdl_t *dtp, const void *addr, size_t size,1161dt_prepare_args_t *args)1162{1163int i, first_bin, last_bin, bin = 1, order, levels;1164uint16_t factor, low, high, nsteps;1165const int64_t *data = addr;1166int64_t value = 1, next, step;1167uint64_t arg;11681169if (size < sizeof(uint64_t)) {1170(void) dt_set_errno(dtp, EDT_DMISMATCH);1171return (NULL);1172}11731174arg = *data++;1175size -= sizeof (uint64_t);11761177factor = DTRACE_LLQUANTIZE_FACTOR(arg);1178low = DTRACE_LLQUANTIZE_LOW(arg);1179high = DTRACE_LLQUANTIZE_HIGH(arg);1180nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);11811182/*1183* We don't expect to be handed invalid llquantize() parameters here,1184* but sanity check them (to a degree) nonetheless.1185*/1186if (size > INT32_MAX || factor < 2 || low >= high ||1187nsteps == 0 || factor > nsteps) {1188(void) dt_set_errno(dtp, EDT_DMISMATCH);1189return (NULL);1190}11911192levels = (int)size / sizeof (uint64_t);11931194first_bin = 0;1195last_bin = levels - 1;11961197while (first_bin < levels && data[first_bin] == 0)1198first_bin++;11991200if (first_bin == levels) {1201first_bin = 0;1202last_bin = 1;1203} else {1204if (first_bin > 0)1205first_bin--;12061207while (last_bin > 0 && data[last_bin] == 0)1208last_bin--;12091210if (last_bin < levels - 1)1211last_bin++;1212}12131214for (order = 0; order < low; order++)1215value *= factor;12161217next = value * factor;1218step = next > nsteps ? next / nsteps : 1;12191220args->first_bin = first_bin;1221args->last_bin = last_bin;1222args->llquantize_factor = factor;1223args->llquantize_low = low;1224args->llquantize_high = high;1225args->llquantize_nsteps = nsteps;1226args->llquantize_levels = levels;1227args->llquantize_order = order;1228args->llquantize_next = next;1229args->llquantize_step = step;1230args->llquantize_value = value;12311232return (data);1233}12341235int1236dt_format_llquantize(dtrace_hdl_t *dtp, const void *addr, size_t size,1237uint64_t normal)1238{1239int first_bin, last_bin, bin = 1, order, levels;1240uint16_t factor, low, high, nsteps;1241const int64_t *data;1242dt_prepare_args_t args = { 0 };1243int64_t value = 1, next, step;1244uint64_t arg;1245char c[32];12461247data = dt_format_llquantize_prepare(dtp, addr, size, &args);1248/* dt_errno is set for us */1249if (data == NULL)1250return (-1);12511252first_bin = args.first_bin;1253last_bin = args.last_bin;1254factor = args.llquantize_factor;1255low = args.llquantize_low;1256high = args.llquantize_high;1257nsteps = args.llquantize_nsteps;1258levels = args.llquantize_levels;1259order = args.llquantize_order;1260next = args.llquantize_next;1261step = args.llquantize_step;1262value = args.llquantize_value;12631264xo_open_list("buckets");1265if (first_bin == 0) {1266/*1267* We have to represent < value somehow in JSON, so we bundle an1268* optional "operator" in llquantize buckets.1269*/1270xo_open_instance("buckets");1271xo_emit("{:value/%lld} {:count/%lld} {:operator/%s}",1272(long long)value, (long long)data[0] / normal, "<");1273xo_close_instance("buckets");1274}12751276while (order <= high) {1277if (bin >= first_bin && bin <= last_bin) {1278xo_open_instance("buckets");1279xo_emit("{:value/%lld} {:count/%lld}", (long long)value,1280(long long)data[bin] / normal);1281xo_close_instance("buckets");1282}12831284assert(value < next);1285bin++;12861287if ((value += step) != next)1288continue;12891290next = value * factor;1291step = next > nsteps ? next / nsteps : 1;1292order++;1293}12941295if (last_bin < bin) {1296xo_close_list("buckets");1297return (0);1298}12991300assert(last_bin == bin);1301xo_open_instance("buckets");1302xo_emit("{:value/%lld} {:count/%lld} {:operator/%s}", (long long)value,1303(long long)data[bin] / normal, ">=");1304xo_close_instance("buckets");13051306xo_close_list("buckets");1307return (0);1308}13091310int1311dt_print_llquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,1312size_t size, uint64_t normal)1313{1314int i, first_bin, last_bin, bin = 1, order, levels;1315uint16_t factor, low, high, nsteps;1316const int64_t *data;1317dt_prepare_args_t args = { 0 };1318int64_t value = 1, next, step;1319char positives = 0, negatives = 0;1320long double total = 0;1321uint64_t arg;1322char c[32];13231324data = dt_format_llquantize_prepare(dtp, addr, size, &args);1325/* dt_errno is set for us */1326if (data == NULL)1327return (-1);13281329first_bin = args.first_bin;1330last_bin = args.last_bin;1331factor = args.llquantize_factor;1332low = args.llquantize_low;1333high = args.llquantize_high;1334nsteps = args.llquantize_nsteps;1335levels = args.llquantize_levels;1336order = args.llquantize_order;1337next = args.llquantize_next;1338step = args.llquantize_step;1339value = args.llquantize_value;13401341for (i = first_bin; i <= last_bin; i++) {1342positives |= (data[i] > 0);1343negatives |= (data[i] < 0);1344dt_quantize_total(dtp, data[i], &total);1345}13461347if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",1348"------------- Distribution -------------", "count") < 0)1349return (-1);13501351if (first_bin == 0) {1352(void) snprintf(c, sizeof (c), "< %lld", (long long)value);13531354if (dt_printf(dtp, fp, "%16s ", c) < 0)1355return (-1);13561357if (dt_print_quantline(dtp, fp, data[0], normal,1358total, positives, negatives) < 0)1359return (-1);1360}13611362while (order <= high) {1363if (bin >= first_bin && bin <= last_bin) {1364if (dt_printf(dtp, fp, "%16lld ", (long long)value) < 0)1365return (-1);13661367if (dt_print_quantline(dtp, fp, data[bin],1368normal, total, positives, negatives) < 0)1369return (-1);1370}13711372assert(value < next);1373bin++;13741375if ((value += step) != next)1376continue;13771378next = value * factor;1379step = next > nsteps ? next / nsteps : 1;1380order++;1381}13821383if (last_bin < bin)1384return (0);13851386assert(last_bin == bin);1387(void) snprintf(c, sizeof (c), ">= %lld", (long long)value);13881389if (dt_printf(dtp, fp, "%16s ", c) < 0)1390return (-1);13911392return (dt_print_quantline(dtp, fp, data[bin], normal,1393total, positives, negatives));1394}13951396static int1397dt_format_average(dtrace_hdl_t *dtp, caddr_t addr, size_t size, uint64_t normal)1398{1399int64_t *data = (int64_t *)addr;14001401xo_emit("{:average/%lld}",1402data[0] ? (long long)(data[1] / (int64_t)normal / data[0]) : 0);1403return (0);1404}14051406/*ARGSUSED*/1407static int1408dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,1409size_t size, uint64_t normal)1410{1411/* LINTED - alignment */1412int64_t *data = (int64_t *)addr;14131414return (dt_printf(dtp, fp, " %16lld", data[0] ?1415(long long)(data[1] / (int64_t)normal / data[0]) : 0));1416}14171418static int1419dt_format_stddev(dtrace_hdl_t *dtp, caddr_t addr, size_t size, uint64_t normal)1420{1421uint64_t *data = (uint64_t *)addr;14221423xo_emit("{:stddev/%llu}",1424data[0] ? (unsigned long long)dt_stddev(data, normal) : 0);1425return (0);1426}14271428/*ARGSUSED*/1429static int1430dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,1431size_t size, uint64_t normal)1432{1433/* LINTED - alignment */1434uint64_t *data = (uint64_t *)addr;14351436return (dt_printf(dtp, fp, " %16llu", data[0] ?1437(unsigned long long) dt_stddev(data, normal) : 0));1438}14391440/*ARGSUSED*/1441static int1442dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,1443size_t nbytes, int width, int quiet, int forceraw)1444{1445/*1446* If the byte stream is a series of printable characters, followed by1447* a terminating byte, we print it out as a string. Otherwise, we1448* assume that it's something else and just print the bytes.1449*/1450int i, j, margin = 5;1451char *c = (char *)addr;14521453if (nbytes == 0)1454return (0);14551456if (forceraw)1457goto raw;14581459if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)1460goto raw;14611462for (i = 0; i < nbytes; i++) {1463/*1464* We define a "printable character" to be one for which1465* isprint(3C) returns non-zero, isspace(3C) returns non-zero,1466* or a character which is either backspace or the bell.1467* Backspace and the bell are regrettably special because1468* they fail the first two tests -- and yet they are entirely1469* printable. These are the only two control characters that1470* have meaning for the terminal and for which isprint(3C) and1471* isspace(3C) return 0.1472*/1473if (isprint(c[i]) || isspace(c[i]) ||1474c[i] == '\b' || c[i] == '\a')1475continue;14761477if (c[i] == '\0' && i > 0) {1478/*1479* This looks like it might be a string. Before we1480* assume that it is indeed a string, check the1481* remainder of the byte range; if it contains1482* additional non-nul characters, we'll assume that1483* it's a binary stream that just happens to look like1484* a string, and we'll print out the individual bytes.1485*/1486for (j = i + 1; j < nbytes; j++) {1487if (c[j] != '\0')1488break;1489}14901491if (j != nbytes)1492break;14931494if (quiet) {1495return (dt_printf(dtp, fp, "%s", c));1496} else {1497return (dt_printf(dtp, fp, " %s%*s",1498width < 0 ? " " : "", width, c));1499}1500}15011502break;1503}15041505if (i == nbytes) {1506/*1507* The byte range is all printable characters, but there is1508* no trailing nul byte. We'll assume that it's a string and1509* print it as such.1510*/1511char *s = alloca(nbytes + 1);1512bcopy(c, s, nbytes);1513s[nbytes] = '\0';1514return (dt_printf(dtp, fp, " %-*s", width, s));1515}15161517raw:1518if (dt_printf(dtp, fp, "\n%*s ", margin, "") < 0)1519return (-1);15201521for (i = 0; i < 16; i++)1522if (dt_printf(dtp, fp, " %c", "0123456789abcdef"[i]) < 0)1523return (-1);15241525if (dt_printf(dtp, fp, " 0123456789abcdef\n") < 0)1526return (-1);152715281529for (i = 0; i < nbytes; i += 16) {1530if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)1531return (-1);15321533for (j = i; j < i + 16 && j < nbytes; j++) {1534if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)1535return (-1);1536}15371538while (j++ % 16) {1539if (dt_printf(dtp, fp, " ") < 0)1540return (-1);1541}15421543if (dt_printf(dtp, fp, " ") < 0)1544return (-1);15451546for (j = i; j < i + 16 && j < nbytes; j++) {1547if (dt_printf(dtp, fp, "%c",1548c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)1549return (-1);1550}15511552if (dt_printf(dtp, fp, "\n") < 0)1553return (-1);1554}15551556return (0);1557}15581559int1560dt_format_stack(dtrace_hdl_t *dtp, caddr_t addr, int depth, int size)1561{1562dtrace_syminfo_t dts;1563GElf_Sym sym;1564int i;1565uint64_t pc;15661567xo_open_list("stack-frames");1568for (i = 0; i < depth; i++) {1569switch (size) {1570case sizeof (uint32_t):1571pc = *((uint32_t *)addr);1572break;15731574case sizeof (uint64_t):1575pc = *((uint64_t *)addr);1576break;15771578default:1579return (dt_set_errno(dtp, EDT_BADSTACKPC));1580}15811582if (pc == 0)1583break;15841585addr += size;15861587xo_open_instance("stack-frames");1588if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {1589if (pc > sym.st_value) {1590xo_emit("{:symbol/%s`%s+0x%llx} {:module/%s} "1591"{:name/%s} {:offset/0x%llx}",1592dts.dts_object, dts.dts_name,1593(u_longlong_t)(pc - sym.st_value),1594dts.dts_object, dts.dts_name,1595(u_longlong_t)(pc - sym.st_value));1596} else {1597xo_emit("{:symbol/%s`%s} {:module/%s} "1598"{:name/%s}",1599dts.dts_object, dts.dts_name,1600dts.dts_object, dts.dts_name);1601}1602} else {1603/*1604* We'll repeat the lookup, but this time we'll specify1605* a NULL GElf_Sym -- indicating that we're only1606* interested in the containing module.1607*/1608if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {1609xo_emit("{:symbol/%s`0x%llx} {:module/%s} "1610"{:offset/0x%llx}",1611dts.dts_object, (u_longlong_t)pc,1612dts.dts_object, (u_longlong_t)pc);1613} else {1614xo_emit("{:symbol/0x%llx} {:offset/0x%llx}",1615(u_longlong_t)pc, (u_longlong_t)pc);1616}1617}1618xo_close_instance("stack-frames");1619}1620xo_close_list("stack-frames");16211622return (0);1623}16241625int1626dt_format_ustack(dtrace_hdl_t *dtp, caddr_t addr, uint64_t arg)1627{1628uint64_t *pc = (uint64_t *)addr;1629uint32_t depth = DTRACE_USTACK_NFRAMES(arg);1630uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);1631const char *strbase = addr + (depth + 1) * sizeof (uint64_t);1632const char *str = strsize ? strbase : NULL;1633int err = 0;16341635char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];1636struct ps_prochandle *P;1637GElf_Sym sym;1638int i, indent;1639pid_t pid;16401641if (depth == 0)1642return (0);16431644pid = (pid_t)*pc++;16451646/*1647* Ultimately, we need to add an entry point in the library vector for1648* determining <symbol, offset> from <pid, address>. For now, if1649* this is a vector open, we just print the raw address or string.1650*/1651if (dtp->dt_vector == NULL)1652P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);1653else1654P = NULL;16551656if (P != NULL)1657dt_proc_lock(dtp, P); /* lock handle while we perform lookups */16581659xo_open_list("ustack-frames");1660for (i = 0; i < depth && pc[i] != 0; i++) {1661const prmap_t *map;16621663xo_open_instance("ustack-frames");1664if (P != NULL && Plookup_by_addr(P, pc[i],1665name, sizeof (name), &sym) == 0) {1666(void) Pobjname(P, pc[i], objname, sizeof (objname));16671668if (pc[i] > sym.st_value) {1669xo_emit("{:symbol/%s`%s+0x%llx} {:module/%s} "1670"{:name/%s} {:offset/0x%llx}",1671dt_basename(objname), name,1672(u_longlong_t)(pc[i] - sym.st_value),1673dt_basename(objname), name,1674(u_longlong_t)(pc[i] - sym.st_value));1675} else {1676xo_emit("{:symbol/%s`%s} {:module/%s} "1677"{:name/%s}",1678dt_basename(objname), name,1679dt_basename(objname), name);1680}1681} else if (str != NULL && str[0] != '\0' && str[0] != '@' &&1682(P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||1683(map->pr_mflags & MA_WRITE)))) {1684/*1685* If the current string pointer in the string table1686* does not point to an empty string _and_ the program1687* counter falls in a writable region, we'll use the1688* string from the string table instead of the raw1689* address. This last condition is necessary because1690* some (broken) ustack helpers will return a string1691* even for a program counter that they can't1692* identify. If we have a string for a program1693* counter that falls in a segment that isn't1694* writable, we assume that we have fallen into this1695* case and we refuse to use the string.1696*/1697xo_emit("{:symbol/%s}", str);1698} else {1699if (P != NULL && Pobjname(P, pc[i], objname,1700sizeof (objname)) != 0) {1701xo_emit("{:symbol/%s`0x%llx} {:module/%s} "1702"{:offset/0x%llx}",1703dt_basename(objname), (u_longlong_t)pc[i],1704dt_basename(objname), (u_longlong_t)pc[i]);1705} else {1706xo_emit("{:symbol/0x%llx} {:offset/0x%llx}",1707(u_longlong_t)pc[i], (u_longlong_t)pc[i]);1708}1709}17101711if (str != NULL && str[0] == '@') {1712/*1713* If the first character of the string is an "at" sign,1714* then the string is inferred to be an annotation --1715* and it is printed out beneath the frame and offset1716* with brackets.1717*/1718xo_emit("{:annotation/%s}", &str[1]);1719}17201721if (str != NULL) {1722str += strlen(str) + 1;1723if (str - strbase >= strsize)1724str = NULL;1725}1726xo_close_instance("ustack-frames");1727}1728xo_close_list("ustack-frames");17291730if (P != NULL) {1731dt_proc_unlock(dtp, P);1732dt_proc_release(dtp, P);1733}17341735return (err);1736}17371738int1739dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,1740caddr_t addr, int depth, int size)1741{1742dtrace_syminfo_t dts;1743GElf_Sym sym;1744int i, indent;1745char c[PATH_MAX * 2];1746uint64_t pc;17471748if (dt_printf(dtp, fp, "\n") < 0)1749return (-1);17501751if (format == NULL)1752format = "%s";17531754if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)1755indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];1756else1757indent = _dtrace_stkindent;17581759for (i = 0; i < depth; i++) {1760switch (size) {1761case sizeof (uint32_t):1762/* LINTED - alignment */1763pc = *((uint32_t *)addr);1764break;17651766case sizeof (uint64_t):1767/* LINTED - alignment */1768pc = *((uint64_t *)addr);1769break;17701771default:1772return (dt_set_errno(dtp, EDT_BADSTACKPC));1773}17741775if (pc == 0)1776break;17771778addr += size;17791780if (dt_printf(dtp, fp, "%*s", indent, "") < 0)1781return (-1);17821783if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {1784if (pc > sym.st_value) {1785(void) snprintf(c, sizeof (c), "%s`%s+0x%llx",1786dts.dts_object, dts.dts_name,1787(u_longlong_t)(pc - sym.st_value));1788} else {1789(void) snprintf(c, sizeof (c), "%s`%s",1790dts.dts_object, dts.dts_name);1791}1792} else {1793/*1794* We'll repeat the lookup, but this time we'll specify1795* a NULL GElf_Sym -- indicating that we're only1796* interested in the containing module.1797*/1798if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {1799(void) snprintf(c, sizeof (c), "%s`0x%llx",1800dts.dts_object, (u_longlong_t)pc);1801} else {1802(void) snprintf(c, sizeof (c), "0x%llx",1803(u_longlong_t)pc);1804}1805}18061807if (dt_printf(dtp, fp, format, c) < 0)1808return (-1);18091810if (dt_printf(dtp, fp, "\n") < 0)1811return (-1);1812}18131814return (0);1815}18161817int1818dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,1819caddr_t addr, uint64_t arg)1820{1821/* LINTED - alignment */1822uint64_t *pc = (uint64_t *)addr;1823uint32_t depth = DTRACE_USTACK_NFRAMES(arg);1824uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);1825const char *strbase = addr + (depth + 1) * sizeof (uint64_t);1826const char *str = strsize ? strbase : NULL;1827int err = 0;18281829char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];1830struct ps_prochandle *P;1831GElf_Sym sym;1832int i, indent;1833pid_t pid;18341835if (depth == 0)1836return (0);18371838pid = (pid_t)*pc++;18391840if (dt_printf(dtp, fp, "\n") < 0)1841return (-1);18421843if (format == NULL)1844format = "%s";18451846if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)1847indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];1848else1849indent = _dtrace_stkindent;18501851/*1852* Ultimately, we need to add an entry point in the library vector for1853* determining <symbol, offset> from <pid, address>. For now, if1854* this is a vector open, we just print the raw address or string.1855*/1856if (dtp->dt_vector == NULL)1857P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);1858else1859P = NULL;18601861if (P != NULL)1862dt_proc_lock(dtp, P); /* lock handle while we perform lookups */18631864for (i = 0; i < depth && pc[i] != 0; i++) {1865const prmap_t *map;18661867if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)1868break;18691870if (P != NULL && Plookup_by_addr(P, pc[i],1871name, sizeof (name), &sym) == 0) {1872(void) Pobjname(P, pc[i], objname, sizeof (objname));18731874if (pc[i] > sym.st_value) {1875(void) snprintf(c, sizeof (c),1876"%s`%s+0x%llx", dt_basename(objname), name,1877(u_longlong_t)(pc[i] - sym.st_value));1878} else {1879(void) snprintf(c, sizeof (c),1880"%s`%s", dt_basename(objname), name);1881}1882} else if (str != NULL && str[0] != '\0' && str[0] != '@' &&1883(P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||1884(map->pr_mflags & MA_WRITE)))) {1885/*1886* If the current string pointer in the string table1887* does not point to an empty string _and_ the program1888* counter falls in a writable region, we'll use the1889* string from the string table instead of the raw1890* address. This last condition is necessary because1891* some (broken) ustack helpers will return a string1892* even for a program counter that they can't1893* identify. If we have a string for a program1894* counter that falls in a segment that isn't1895* writable, we assume that we have fallen into this1896* case and we refuse to use the string.1897*/1898(void) snprintf(c, sizeof (c), "%s", str);1899} else {1900if (P != NULL && Pobjname(P, pc[i], objname,1901sizeof (objname)) != 0) {1902(void) snprintf(c, sizeof (c), "%s`0x%llx",1903dt_basename(objname), (u_longlong_t)pc[i]);1904} else {1905(void) snprintf(c, sizeof (c), "0x%llx",1906(u_longlong_t)pc[i]);1907}1908}19091910if ((err = dt_printf(dtp, fp, format, c)) < 0)1911break;19121913if ((err = dt_printf(dtp, fp, "\n")) < 0)1914break;19151916if (str != NULL && str[0] == '@') {1917/*1918* If the first character of the string is an "at" sign,1919* then the string is inferred to be an annotation --1920* and it is printed out beneath the frame and offset1921* with brackets.1922*/1923if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)1924break;19251926(void) snprintf(c, sizeof (c), " [ %s ]", &str[1]);19271928if ((err = dt_printf(dtp, fp, format, c)) < 0)1929break;19301931if ((err = dt_printf(dtp, fp, "\n")) < 0)1932break;1933}19341935if (str != NULL) {1936str += strlen(str) + 1;1937if (str - strbase >= strsize)1938str = NULL;1939}1940}19411942if (P != NULL) {1943dt_proc_unlock(dtp, P);1944dt_proc_release(dtp, P);1945}19461947return (err);1948}19491950static int1951dt_format_usym(dtrace_hdl_t *dtp, caddr_t addr, dtrace_actkind_t act)1952{1953uint64_t pid = ((uint64_t *)addr)[0];1954uint64_t pc = ((uint64_t *)addr)[1];1955char *s;1956int n, len = 256;19571958if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {1959struct ps_prochandle *P;19601961if ((P = dt_proc_grab(dtp, pid,1962PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {1963GElf_Sym sym;19641965dt_proc_lock(dtp, P);19661967if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)1968pc = sym.st_value;19691970dt_proc_unlock(dtp, P);1971dt_proc_release(dtp, P);1972}1973}19741975do {1976n = len;1977s = alloca(n);1978} while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);19791980xo_emit("{:usym/%s}", s);1981return (0);1982}198319841985static int1986dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)1987{1988/* LINTED - alignment */1989uint64_t pid = ((uint64_t *)addr)[0];1990/* LINTED - alignment */1991uint64_t pc = ((uint64_t *)addr)[1];1992const char *format = " %-50s";1993char *s;1994int n, len = 256;19951996if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {1997struct ps_prochandle *P;19981999if ((P = dt_proc_grab(dtp, pid,2000PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {2001GElf_Sym sym;20022003dt_proc_lock(dtp, P);20042005if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)2006pc = sym.st_value;20072008dt_proc_unlock(dtp, P);2009dt_proc_release(dtp, P);2010}2011}20122013do {2014n = len;2015s = alloca(n);2016} while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);20172018return (dt_printf(dtp, fp, format, s));2019}20202021int2022dt_format_umod(dtrace_hdl_t *dtp, caddr_t addr)2023{2024uint64_t pid = ((uint64_t *)addr)[0];2025uint64_t pc = ((uint64_t *)addr)[1];2026int err = 0;20272028char objname[PATH_MAX];2029struct ps_prochandle *P;20302031/*2032* See the comment in dt_print_ustack() for the rationale for2033* printing raw addresses in the vectored case.2034*/2035if (dtp->dt_vector == NULL)2036P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);2037else2038P = NULL;20392040if (P != NULL)2041dt_proc_lock(dtp, P); /* lock handle while we perform lookups */20422043if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {2044xo_emit("{:umod/%s}", dt_basename(objname));2045} else {2046xo_emit("{:umod/0x%llx}", (u_longlong_t)pc);2047}20482049if (P != NULL) {2050dt_proc_unlock(dtp, P);2051dt_proc_release(dtp, P);2052}20532054return (0);2055}20562057int2058dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)2059{2060/* LINTED - alignment */2061uint64_t pid = ((uint64_t *)addr)[0];2062/* LINTED - alignment */2063uint64_t pc = ((uint64_t *)addr)[1];2064int err = 0;20652066char objname[PATH_MAX], c[PATH_MAX * 2];2067struct ps_prochandle *P;20682069if (format == NULL)2070format = " %-50s";20712072/*2073* See the comment in dt_print_ustack() for the rationale for2074* printing raw addresses in the vectored case.2075*/2076if (dtp->dt_vector == NULL)2077P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);2078else2079P = NULL;20802081if (P != NULL)2082dt_proc_lock(dtp, P); /* lock handle while we perform lookups */20832084if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {2085(void) snprintf(c, sizeof (c), "%s", dt_basename(objname));2086} else {2087(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);2088}20892090err = dt_printf(dtp, fp, format, c);20912092if (P != NULL) {2093dt_proc_unlock(dtp, P);2094dt_proc_release(dtp, P);2095}20962097return (err);2098}20992100static int2101dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)2102{2103/* LINTED - alignment */2104uint64_t pc = *((uint64_t *)addr);2105dtrace_syminfo_t dts;2106GElf_Sym sym;2107char c[PATH_MAX * 2];21082109if (format == NULL)2110format = " %-50s";21112112if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {2113if (dtp->dt_oformat)2114xo_emit("{:sym/%s`%s} {:object/%s} {:name/%s}",2115dts.dts_object, dts.dts_name, dts.dts_object,2116dts.dts_name);2117else2118(void) snprintf(c, sizeof (c), "%s`%s",2119dts.dts_object, dts.dts_name);2120} else {2121/*2122* We'll repeat the lookup, but this time we'll specify a2123* NULL GElf_Sym -- indicating that we're only interested in2124* the containing module.2125*/2126if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {2127if (dtp->dt_oformat)2128xo_emit("{:sym/%s`0x%llx} {:object/%s} "2129"{:offset/0x%llx}",2130dts.dts_object, (u_longlong_t)pc,2131dts.dts_object, (u_longlong_t)pc);2132else2133(void) snprintf(c, sizeof (c), "%s`0x%llx",2134dts.dts_object, (u_longlong_t)pc);2135} else {2136if (dtp->dt_oformat)2137xo_emit("{:sym/0x%llx} {:offset/0x%llx}",2138(u_longlong_t)pc, (u_longlong_t)pc);2139else2140(void) snprintf(c, sizeof (c), "0x%llx",2141(u_longlong_t)pc);2142}2143}21442145if (dtp->dt_oformat != 0 && dt_printf(dtp, fp, format, c) < 0)2146return (-1);21472148return (0);2149}21502151int2152dt_format_mod(dtrace_hdl_t *dtp, caddr_t addr)2153{2154/* LINTED - alignment */2155uint64_t pc = *((uint64_t *)addr);2156dtrace_syminfo_t dts;21572158if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {2159xo_emit("{:mod/%s}", dts.dts_object);2160} else {2161xo_emit("{:mod/0x%llx}", (u_longlong_t)pc);2162}21632164return (0);2165}21662167int2168dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)2169{2170/* LINTED - alignment */2171uint64_t pc = *((uint64_t *)addr);2172dtrace_syminfo_t dts;2173char c[PATH_MAX * 2];21742175if (format == NULL)2176format = " %-50s";21772178if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {2179(void) snprintf(c, sizeof (c), "%s", dts.dts_object);2180} else {2181(void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);2182}21832184if (dt_printf(dtp, fp, format, c) < 0)2185return (-1);21862187return (0);2188}21892190static char *2191dt_format_bytes_get(dtrace_hdl_t *dtp, caddr_t addr, size_t nbytes)2192{2193char *s = dt_alloc(dtp, nbytes * 2 + 2 + 1); /* 2 bytes per byte + 0x + '\0' */2194char t[6];2195char *c = (char *)addr;2196size_t i, j;21972198if (s == NULL)2199return (NULL);22002201/*2202* XXX: Some duplication with dt_print_bytes().2203*/2204for (i = 0; i < nbytes; i++) {2205if (isprint(c[i]) || isspace(c[i]) || c[i] == '\b' || c[i] == '\a')2206continue;22072208if (c[i] == '\0' && i > 0) {2209for (j = i + 1; j < nbytes; j++) {2210if (c[j] != '\0')2211break;2212}22132214if (j != nbytes)2215break;22162217memcpy(s, c, nbytes);2218return (s);2219}22202221break;2222}22232224if (i == nbytes) {2225memcpy(s, c, nbytes);2226s[nbytes] = '\0';2227return (s);2228}22292230s[0] = '0';2231s[1] = 'x';2232for (i = 0; i < nbytes; i++) {2233snprintf(t, sizeof(t), "%02x", (uchar_t)c[i]);2234memcpy(s + (i * 2) + 2, t, 2);2235}22362237s[nbytes * 2 + 2] = 0;2238return (s);2239}22402241static int2242dt_format_memory(dtrace_hdl_t *dtp, caddr_t addr)2243{2244size_t nbytes = *((size_t *) addr);2245char *s;22462247s = dt_format_bytes_get(dtp, addr + sizeof(size_t), nbytes);2248if (s == NULL)2249return (-1);22502251xo_emit("{:printm/%s}", s);2252dt_free(dtp, s);22532254return (0);2255}22562257static int2258dt_print_memory(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)2259{2260int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);2261size_t nbytes = *((size_t *) addr);22622263return (dt_print_bytes(dtp, fp, addr + sizeof(size_t),2264nbytes, 50, quiet, 1));2265}22662267typedef struct dt_normal {2268dtrace_aggvarid_t dtnd_id;2269uint64_t dtnd_normal;2270} dt_normal_t;22712272static int2273dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)2274{2275dt_normal_t *normal = arg;2276dtrace_aggdesc_t *agg = aggdata->dtada_desc;2277dtrace_aggvarid_t id = normal->dtnd_id;22782279if (agg->dtagd_nrecs == 0)2280return (DTRACE_AGGWALK_NEXT);22812282if (agg->dtagd_varid != id)2283return (DTRACE_AGGWALK_NEXT);22842285((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;2286return (DTRACE_AGGWALK_NORMALIZE);2287}22882289static int2290dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)2291{2292dt_normal_t normal;2293caddr_t addr;22942295/*2296* We (should) have two records: the aggregation ID followed by the2297* normalization value.2298*/2299addr = base + rec->dtrd_offset;23002301if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))2302return (dt_set_errno(dtp, EDT_BADNORMAL));23032304/* LINTED - alignment */2305normal.dtnd_id = *((dtrace_aggvarid_t *)addr);2306rec++;23072308if (rec->dtrd_action != DTRACEACT_LIBACT)2309return (dt_set_errno(dtp, EDT_BADNORMAL));23102311if (rec->dtrd_arg != DT_ACT_NORMALIZE)2312return (dt_set_errno(dtp, EDT_BADNORMAL));23132314addr = base + rec->dtrd_offset;23152316switch (rec->dtrd_size) {2317case sizeof (uint64_t):2318/* LINTED - alignment */2319normal.dtnd_normal = *((uint64_t *)addr);2320break;2321case sizeof (uint32_t):2322/* LINTED - alignment */2323normal.dtnd_normal = *((uint32_t *)addr);2324break;2325case sizeof (uint16_t):2326/* LINTED - alignment */2327normal.dtnd_normal = *((uint16_t *)addr);2328break;2329case sizeof (uint8_t):2330normal.dtnd_normal = *((uint8_t *)addr);2331break;2332default:2333return (dt_set_errno(dtp, EDT_BADNORMAL));2334}23352336(void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);23372338return (0);2339}23402341static int2342dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)2343{2344dtrace_aggdesc_t *agg = aggdata->dtada_desc;2345dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);23462347if (agg->dtagd_nrecs == 0)2348return (DTRACE_AGGWALK_NEXT);23492350if (agg->dtagd_varid != id)2351return (DTRACE_AGGWALK_NEXT);23522353return (DTRACE_AGGWALK_DENORMALIZE);2354}23552356static int2357dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)2358{2359dtrace_aggdesc_t *agg = aggdata->dtada_desc;2360dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);23612362if (agg->dtagd_nrecs == 0)2363return (DTRACE_AGGWALK_NEXT);23642365if (agg->dtagd_varid != id)2366return (DTRACE_AGGWALK_NEXT);23672368return (DTRACE_AGGWALK_CLEAR);2369}23702371typedef struct dt_trunc {2372dtrace_aggvarid_t dttd_id;2373uint64_t dttd_remaining;2374} dt_trunc_t;23752376static int2377dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)2378{2379dt_trunc_t *trunc = arg;2380dtrace_aggdesc_t *agg = aggdata->dtada_desc;2381dtrace_aggvarid_t id = trunc->dttd_id;23822383if (agg->dtagd_nrecs == 0)2384return (DTRACE_AGGWALK_NEXT);23852386if (agg->dtagd_varid != id)2387return (DTRACE_AGGWALK_NEXT);23882389if (trunc->dttd_remaining == 0)2390return (DTRACE_AGGWALK_REMOVE);23912392trunc->dttd_remaining--;2393return (DTRACE_AGGWALK_NEXT);2394}23952396static int2397dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)2398{2399dt_trunc_t trunc;2400caddr_t addr;2401int64_t remaining;2402int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);24032404/*2405* We (should) have two records: the aggregation ID followed by the2406* number of aggregation entries after which the aggregation is to be2407* truncated.2408*/2409addr = base + rec->dtrd_offset;24102411if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))2412return (dt_set_errno(dtp, EDT_BADTRUNC));24132414/* LINTED - alignment */2415trunc.dttd_id = *((dtrace_aggvarid_t *)addr);2416rec++;24172418if (rec->dtrd_action != DTRACEACT_LIBACT)2419return (dt_set_errno(dtp, EDT_BADTRUNC));24202421if (rec->dtrd_arg != DT_ACT_TRUNC)2422return (dt_set_errno(dtp, EDT_BADTRUNC));24232424addr = base + rec->dtrd_offset;24252426switch (rec->dtrd_size) {2427case sizeof (uint64_t):2428/* LINTED - alignment */2429remaining = *((int64_t *)addr);2430break;2431case sizeof (uint32_t):2432/* LINTED - alignment */2433remaining = *((int32_t *)addr);2434break;2435case sizeof (uint16_t):2436/* LINTED - alignment */2437remaining = *((int16_t *)addr);2438break;2439case sizeof (uint8_t):2440remaining = *((int8_t *)addr);2441break;2442default:2443return (dt_set_errno(dtp, EDT_BADNORMAL));2444}24452446if (remaining < 0) {2447func = dtrace_aggregate_walk_valsorted;2448remaining = -remaining;2449} else {2450func = dtrace_aggregate_walk_valrevsorted;2451}24522453assert(remaining >= 0);2454trunc.dttd_remaining = remaining;24552456(void) func(dtp, dt_trunc_agg, &trunc);24572458return (0);2459}24602461static int2462dt_format_datum(dtrace_hdl_t *dtp, dtrace_recdesc_t *rec, caddr_t addr,2463size_t size, const dtrace_aggdata_t *aggdata, uint64_t normal,2464dt_print_aggdata_t *pd)2465{2466dtrace_actkind_t act = rec->dtrd_action;2467boolean_t packed = pd->dtpa_agghist || pd->dtpa_aggpack;2468dtrace_aggdesc_t *agg = aggdata->dtada_desc;2469char fmt[512];2470char *s;24712472if (packed && pd->dtpa_agghisthdr != agg->dtagd_varid)2473pd->dtpa_agghisthdr = agg->dtagd_varid;24742475switch (act) {2476case DTRACEACT_STACK:2477return (dt_format_stack(dtp, addr, rec->dtrd_arg,2478rec->dtrd_size / rec->dtrd_arg));24792480case DTRACEACT_USTACK:2481case DTRACEACT_JSTACK:2482return (dt_format_ustack(dtp, addr, rec->dtrd_arg));24832484case DTRACEACT_USYM:2485case DTRACEACT_UADDR:2486return (dt_format_usym(dtp, addr, act));24872488case DTRACEACT_UMOD:2489return (dt_format_umod(dtp, addr));24902491case DTRACEACT_SYM:2492return (dt_format_sym(dtp, addr));2493case DTRACEACT_MOD:2494return (dt_format_mod(dtp, addr));24952496case DTRACEAGG_QUANTIZE:2497return (dt_format_quantize(dtp, addr, size, normal));24982499case DTRACEAGG_LQUANTIZE:2500return (dt_format_lquantize(dtp, addr, size, normal));25012502case DTRACEAGG_LLQUANTIZE:2503return (dt_format_llquantize(dtp, addr, size, normal));25042505case DTRACEAGG_AVG:2506return (dt_format_average(dtp, addr, size, normal));25072508case DTRACEAGG_STDDEV:2509return (dt_format_stddev(dtp, addr, size, normal));25102511default:2512break;2513}25142515switch (size) {2516case sizeof (uint64_t):2517snprintf(fmt, sizeof(fmt), "{:%s/%%lld}", pd->dtpa_keyname);2518xo_emit(fmt, (long long)*((uint64_t *)addr) / normal);2519break;2520case sizeof (uint32_t):2521snprintf(fmt, sizeof(fmt), "{:%s/%%d}", pd->dtpa_keyname);2522xo_emit(fmt, *((uint32_t *)addr) / (uint32_t)normal);2523break;2524case sizeof (uint16_t):2525snprintf(fmt, sizeof(fmt), "{:%s/%%d}", pd->dtpa_keyname);2526xo_emit(fmt, *((uint16_t *)addr) / (uint32_t)normal);2527break;2528case sizeof (uint8_t):2529snprintf(fmt, sizeof(fmt), "{:%s/%%d}", pd->dtpa_keyname);2530xo_emit(fmt, *((uint8_t *)addr) / (uint32_t)normal);2531break;2532default:2533s = dt_format_bytes_get(dtp, addr, size);2534if (s == NULL)2535return (-1);25362537xo_emit("{:value/%s}", s);2538dt_free(dtp, s);2539break;2540}25412542return (0);2543}25442545static int2546dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,2547caddr_t addr, size_t size, const dtrace_aggdata_t *aggdata,2548uint64_t normal, dt_print_aggdata_t *pd)2549{2550int err, width;2551dtrace_actkind_t act = rec->dtrd_action;2552boolean_t packed = pd->dtpa_agghist || pd->dtpa_aggpack;2553dtrace_aggdesc_t *agg = aggdata->dtada_desc;25542555static struct {2556size_t size;2557int width;2558int packedwidth;2559} *fmt, fmttab[] = {2560{ sizeof (uint8_t), 3, 3 },2561{ sizeof (uint16_t), 5, 5 },2562{ sizeof (uint32_t), 8, 8 },2563{ sizeof (uint64_t), 16, 16 },2564{ 0, -50, 16 }2565};25662567if (packed && pd->dtpa_agghisthdr != agg->dtagd_varid) {2568dtrace_recdesc_t *r;25692570width = 0;25712572/*2573* To print our quantization header for either an agghist or2574* aggpack aggregation, we need to iterate through all of our2575* of our records to determine their width.2576*/2577for (r = rec; !DTRACEACT_ISAGG(r->dtrd_action); r++) {2578for (fmt = fmttab; fmt->size &&2579fmt->size != r->dtrd_size; fmt++)2580continue;25812582width += fmt->packedwidth + 1;2583}25842585if (pd->dtpa_agghist) {2586if (dt_print_quanthdr(dtp, fp, width) < 0)2587return (-1);2588} else {2589if (dt_print_quanthdr_packed(dtp, fp,2590width, aggdata, r->dtrd_action) < 0)2591return (-1);2592}25932594pd->dtpa_agghisthdr = agg->dtagd_varid;2595}25962597if (pd->dtpa_agghist && DTRACEACT_ISAGG(act)) {2598char positives = aggdata->dtada_flags & DTRACE_A_HASPOSITIVES;2599char negatives = aggdata->dtada_flags & DTRACE_A_HASNEGATIVES;2600int64_t val;26012602assert(act == DTRACEAGG_SUM || act == DTRACEAGG_COUNT);2603val = (long long)*((uint64_t *)addr);26042605if (dt_printf(dtp, fp, " ") < 0)2606return (-1);26072608return (dt_print_quantline(dtp, fp, val, normal,2609aggdata->dtada_total, positives, negatives));2610}26112612if (pd->dtpa_aggpack && DTRACEACT_ISAGG(act)) {2613switch (act) {2614case DTRACEAGG_QUANTIZE:2615return (dt_print_quantize_packed(dtp,2616fp, addr, size, aggdata));2617case DTRACEAGG_LQUANTIZE:2618return (dt_print_lquantize_packed(dtp,2619fp, addr, size, aggdata));2620default:2621break;2622}2623}26242625switch (act) {2626case DTRACEACT_STACK:2627return (dt_print_stack(dtp, fp, NULL, addr,2628rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));26292630case DTRACEACT_USTACK:2631case DTRACEACT_JSTACK:2632return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));26332634case DTRACEACT_USYM:2635case DTRACEACT_UADDR:2636return (dt_print_usym(dtp, fp, addr, act));26372638case DTRACEACT_UMOD:2639return (dt_print_umod(dtp, fp, NULL, addr));26402641case DTRACEACT_SYM:2642return (dt_print_sym(dtp, fp, NULL, addr));26432644case DTRACEACT_MOD:2645return (dt_print_mod(dtp, fp, NULL, addr));26462647case DTRACEAGG_QUANTIZE:2648return (dt_print_quantize(dtp, fp, addr, size, normal));26492650case DTRACEAGG_LQUANTIZE:2651return (dt_print_lquantize(dtp, fp, addr, size, normal));26522653case DTRACEAGG_LLQUANTIZE:2654return (dt_print_llquantize(dtp, fp, addr, size, normal));26552656case DTRACEAGG_AVG:2657return (dt_print_average(dtp, fp, addr, size, normal));26582659case DTRACEAGG_STDDEV:2660return (dt_print_stddev(dtp, fp, addr, size, normal));26612662default:2663break;2664}26652666for (fmt = fmttab; fmt->size && fmt->size != size; fmt++)2667continue;26682669width = packed ? fmt->packedwidth : fmt->width;26702671switch (size) {2672case sizeof (uint64_t):2673err = dt_printf(dtp, fp, " %*lld", width,2674/* LINTED - alignment */2675(long long)*((uint64_t *)addr) / normal);2676break;2677case sizeof (uint32_t):2678/* LINTED - alignment */2679err = dt_printf(dtp, fp, " %*d", width, *((uint32_t *)addr) /2680(uint32_t)normal);2681break;2682case sizeof (uint16_t):2683/* LINTED - alignment */2684err = dt_printf(dtp, fp, " %*d", width, *((uint16_t *)addr) /2685(uint32_t)normal);2686break;2687case sizeof (uint8_t):2688err = dt_printf(dtp, fp, " %*d", width, *((uint8_t *)addr) /2689(uint32_t)normal);2690break;2691default:2692err = dt_print_bytes(dtp, fp, addr, size, width, 0, 0);2693break;2694}26952696return (err);2697}26982699int2700dt_format_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)2701{2702int i, aggact = 0;2703dt_print_aggdata_t *pd = arg;2704const dtrace_aggdata_t *aggdata = aggsdata[0];2705dtrace_aggdesc_t *agg = aggdata->dtada_desc;2706dtrace_hdl_t *dtp = pd->dtpa_dtp;2707dtrace_recdesc_t *rec;2708dtrace_actkind_t act;2709caddr_t addr;2710size_t size;27112712if (pd->dtpa_aggname == NULL)2713pd->dtpa_aggname = agg->dtagd_name;27142715xo_open_instance("aggregation-data");2716strcpy(pd->dtpa_keyname, "value");2717xo_open_list("keys");27182719/*2720* Iterate over each record description in the key, printing the traced2721* data, skipping the first datum (the tuple member created by the2722* compiler).2723*/2724for (i = 1; i < agg->dtagd_nrecs; i++) {2725rec = &agg->dtagd_rec[i];2726act = rec->dtrd_action;2727addr = aggdata->dtada_data + rec->dtrd_offset;2728size = rec->dtrd_size;27292730if (DTRACEACT_ISAGG(act)) {2731aggact = i;2732break;2733}27342735xo_open_instance("keys");2736if (dt_format_datum(dtp, rec, addr,2737size, aggdata, 1, pd) < 0) {2738xo_close_instance("keys");2739xo_close_instance("aggregation-data");2740return (-1);2741}2742xo_close_instance("keys");27432744if (dt_buffered_flush(dtp, NULL, rec, aggdata,2745DTRACE_BUFDATA_AGGKEY) < 0) {2746xo_close_instance("aggregation-data");2747return (-1);2748}2749}2750xo_close_list("keys");27512752assert(aggact != 0);27532754for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {2755uint64_t normal;27562757aggdata = aggsdata[i];2758agg = aggdata->dtada_desc;2759rec = &agg->dtagd_rec[aggact];2760act = rec->dtrd_action;2761addr = aggdata->dtada_data + rec->dtrd_offset;2762size = rec->dtrd_size;27632764assert(DTRACEACT_ISAGG(act));27652766switch (act) {2767case DTRACEAGG_MIN:2768strcpy(pd->dtpa_keyname, "min");2769break;2770case DTRACEAGG_MAX:2771strcpy(pd->dtpa_keyname, "max");2772break;2773case DTRACEAGG_COUNT:2774strcpy(pd->dtpa_keyname, "count");2775break;2776case DTRACEAGG_SUM:2777strcpy(pd->dtpa_keyname, "sum");2778break;2779default:2780strcpy(pd->dtpa_keyname, "UNKNOWN");2781break;2782}27832784normal = aggdata->dtada_normal;27852786if (dt_format_datum(dtp, rec, addr, size,2787aggdata, normal, pd) < 0) {2788xo_close_instance("aggregation-data");2789return (-1);2790}27912792if (dt_buffered_flush(dtp, NULL, rec, aggdata,2793DTRACE_BUFDATA_AGGVAL) < 0) {2794xo_close_instance("aggregation-data");2795return (-1);2796}27972798if (!pd->dtpa_allunprint)2799agg->dtagd_flags |= DTRACE_AGD_PRINTED;2800}28012802if (dt_buffered_flush(dtp, NULL, NULL, aggdata,2803DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0) {2804xo_close_instance("aggregation-data");2805return (-1);2806}28072808xo_close_instance("aggregation-data");2809return (0);2810}28112812int2813dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)2814{2815int i, aggact = 0;2816dt_print_aggdata_t *pd = arg;2817const dtrace_aggdata_t *aggdata = aggsdata[0];2818dtrace_aggdesc_t *agg = aggdata->dtada_desc;2819FILE *fp = pd->dtpa_fp;2820dtrace_hdl_t *dtp = pd->dtpa_dtp;2821dtrace_recdesc_t *rec;2822dtrace_actkind_t act;2823caddr_t addr;2824size_t size;28252826pd->dtpa_agghist = (aggdata->dtada_flags & DTRACE_A_TOTAL);2827pd->dtpa_aggpack = (aggdata->dtada_flags & DTRACE_A_MINMAXBIN);28282829/*2830* Iterate over each record description in the key, printing the traced2831* data, skipping the first datum (the tuple member created by the2832* compiler).2833*/2834for (i = 1; i < agg->dtagd_nrecs; i++) {2835rec = &agg->dtagd_rec[i];2836act = rec->dtrd_action;2837addr = aggdata->dtada_data + rec->dtrd_offset;2838size = rec->dtrd_size;28392840if (DTRACEACT_ISAGG(act)) {2841aggact = i;2842break;2843}28442845if (dt_print_datum(dtp, fp, rec, addr,2846size, aggdata, 1, pd) < 0)2847return (-1);28482849if (dt_buffered_flush(dtp, NULL, rec, aggdata,2850DTRACE_BUFDATA_AGGKEY) < 0)2851return (-1);2852}28532854assert(aggact != 0);28552856for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {2857uint64_t normal;28582859aggdata = aggsdata[i];2860agg = aggdata->dtada_desc;2861rec = &agg->dtagd_rec[aggact];2862act = rec->dtrd_action;2863addr = aggdata->dtada_data + rec->dtrd_offset;2864size = rec->dtrd_size;28652866assert(DTRACEACT_ISAGG(act));2867normal = aggdata->dtada_normal;28682869if (dt_print_datum(dtp, fp, rec, addr,2870size, aggdata, normal, pd) < 0)2871return (-1);28722873if (dt_buffered_flush(dtp, NULL, rec, aggdata,2874DTRACE_BUFDATA_AGGVAL) < 0)2875return (-1);28762877if (!pd->dtpa_allunprint)2878agg->dtagd_flags |= DTRACE_AGD_PRINTED;2879}28802881if (!pd->dtpa_agghist && !pd->dtpa_aggpack) {2882if (dt_printf(dtp, fp, "\n") < 0)2883return (-1);2884}28852886if (dt_buffered_flush(dtp, NULL, NULL, aggdata,2887DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)2888return (-1);28892890return (0);2891}28922893int2894dt_format_agg(const dtrace_aggdata_t *aggdata, void *arg)2895{2896dt_print_aggdata_t *pd = arg;2897dtrace_aggdesc_t *agg = aggdata->dtada_desc;2898dtrace_aggvarid_t aggvarid = pd->dtpa_id;28992900if (pd->dtpa_allunprint) {2901if (agg->dtagd_flags & DTRACE_AGD_PRINTED)2902return (0);2903} else {2904/*2905* If we're not printing all unprinted aggregations, then the2906* aggregation variable ID denotes a specific aggregation2907* variable that we should print -- skip any other aggregations2908* that we encounter.2909*/2910if (agg->dtagd_nrecs == 0)2911return (0);29122913if (aggvarid != agg->dtagd_varid)2914return (0);2915}29162917return (dt_format_aggs(&aggdata, 1, arg));2918}29192920int2921dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)2922{2923dt_print_aggdata_t *pd = arg;2924dtrace_aggdesc_t *agg = aggdata->dtada_desc;2925dtrace_aggvarid_t aggvarid = pd->dtpa_id;29262927if (pd->dtpa_allunprint) {2928if (agg->dtagd_flags & DTRACE_AGD_PRINTED)2929return (0);2930} else {2931/*2932* If we're not printing all unprinted aggregations, then the2933* aggregation variable ID denotes a specific aggregation2934* variable that we should print -- skip any other aggregations2935* that we encounter.2936*/2937if (agg->dtagd_nrecs == 0)2938return (0);29392940if (aggvarid != agg->dtagd_varid)2941return (0);2942}29432944return (dt_print_aggs(&aggdata, 1, arg));2945}29462947int2948dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,2949const char *option, const char *value)2950{2951int len, rval;2952char *msg;2953const char *errstr;2954dtrace_setoptdata_t optdata;29552956bzero(&optdata, sizeof (optdata));2957(void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);29582959if (dtrace_setopt(dtp, option, value) == 0) {2960(void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);2961optdata.dtsda_probe = data;2962optdata.dtsda_option = option;2963optdata.dtsda_handle = dtp;29642965if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)2966return (rval);29672968return (0);2969}29702971errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));2972len = strlen(option) + strlen(value) + strlen(errstr) + 80;2973msg = alloca(len);29742975(void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",2976option, value, errstr);29772978if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)2979return (0);29802981return (rval);2982}29832984/*2985* Helper functions to help maintain style(9) in dt_consume_cpu().2986*/2987static int2988dt_oformat_agg_sorted(dtrace_hdl_t *dtp, dtrace_aggregate_f *func,2989dt_print_aggdata_t *pd)2990{2991int r;29922993r = dtrace_aggregate_walk_sorted(dtp, dt_format_agg, pd);2994if (r < 0) {2995xo_close_list("aggregation-data");2996xo_emit("{:aggregation-name/%s}", pd->dtpa_aggname);2997xo_close_instance("output");2998}29993000return (r);3001}30023003static void3004dt_oformat_agg_name(dt_print_aggdata_t *pd)3005{30063007xo_close_list("aggregation-data");3008xo_emit("{:aggregation-name/%s}", pd->dtpa_aggname);3009}30103011static int3012dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu,3013dtrace_bufdesc_t *buf, boolean_t just_one,3014dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)3015{3016dtrace_epid_t id;3017size_t offs;3018int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);3019int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);3020int rval, i, n;3021uint64_t tracememsize = 0;3022dtrace_probedata_t data;3023uint64_t drops;3024size_t skip_format;30253026bzero(&data, sizeof (data));3027data.dtpda_handle = dtp;3028data.dtpda_cpu = cpu;3029data.dtpda_flow = dtp->dt_flow;3030data.dtpda_indent = dtp->dt_indent;3031data.dtpda_prefix = dtp->dt_prefix;30323033for (offs = buf->dtbd_oldest; offs < buf->dtbd_size; ) {3034dtrace_eprobedesc_t *epd;30353036/*3037* We're guaranteed to have an ID.3038*/3039id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);30403041if (id == DTRACE_EPIDNONE) {3042/*3043* This is filler to assure proper alignment of the3044* next record; we simply ignore it.3045*/3046offs += sizeof (id);3047continue;3048}30493050if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,3051&data.dtpda_pdesc)) != 0)3052return (rval);30533054epd = data.dtpda_edesc;3055data.dtpda_data = buf->dtbd_data + offs;3056data.dtpda_timestamp = DTRACE_RECORD_LOAD_TIMESTAMP(3057(struct dtrace_rechdr *)data.dtpda_data);30583059if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {3060rval = dt_handle(dtp, &data);30613062if (rval == DTRACE_CONSUME_NEXT)3063goto nextepid;30643065if (rval == DTRACE_CONSUME_ERROR)3066return (-1);3067}30683069if (flow)3070(void) dt_flowindent(dtp, &data, dtp->dt_last_epid,3071buf, offs);30723073if (dtp->dt_oformat)3074xo_open_instance("probes");3075rval = (*efunc)(&data, arg);30763077if (flow) {3078if (data.dtpda_flow == DTRACEFLOW_ENTRY)3079data.dtpda_indent += 2;3080}30813082if (rval == DTRACE_CONSUME_NEXT)3083goto nextepid;30843085if (rval == DTRACE_CONSUME_ABORT)3086return (dt_set_errno(dtp, EDT_DIRABORT));30873088if (rval != DTRACE_CONSUME_THIS)3089return (dt_set_errno(dtp, EDT_BADRVAL));30903091skip_format = 0;3092if (dtp->dt_oformat)3093xo_open_list("output");3094for (i = 0; i < epd->dtepd_nrecs; i++) {3095caddr_t addr;3096dtrace_recdesc_t *rec = &epd->dtepd_rec[i];3097dtrace_actkind_t act = rec->dtrd_action;30983099if (skip_format > 0)3100skip_format--;31013102data.dtpda_data = buf->dtbd_data + offs +3103rec->dtrd_offset;3104addr = data.dtpda_data;31053106if (act == DTRACEACT_LIBACT) {3107uint64_t arg = rec->dtrd_arg;3108dtrace_aggvarid_t id;31093110switch (arg) {3111case DT_ACT_CLEAR:3112/* LINTED - alignment */3113id = *((dtrace_aggvarid_t *)addr);3114(void) dtrace_aggregate_walk(dtp,3115dt_clear_agg, &id);3116continue;31173118case DT_ACT_DENORMALIZE:3119/* LINTED - alignment */3120id = *((dtrace_aggvarid_t *)addr);3121(void) dtrace_aggregate_walk(dtp,3122dt_denormalize_agg, &id);3123continue;31243125case DT_ACT_FTRUNCATE:3126if (fp == NULL)3127continue;31283129(void) fflush(fp);3130(void) ftruncate(fileno(fp), 0);3131(void) fseeko(fp, 0, SEEK_SET);3132continue;31333134case DT_ACT_NORMALIZE:3135if (i == epd->dtepd_nrecs - 1)3136return (dt_set_errno(dtp,3137EDT_BADNORMAL));31383139if (dt_normalize(dtp,3140buf->dtbd_data + offs, rec) != 0)3141return (-1);31423143i++;3144continue;31453146case DT_ACT_SETOPT: {3147uint64_t *opts = dtp->dt_options;3148dtrace_recdesc_t *valrec;3149uint32_t valsize;3150caddr_t val;3151int rv;31523153if (i == epd->dtepd_nrecs - 1) {3154return (dt_set_errno(dtp,3155EDT_BADSETOPT));3156}31573158valrec = &epd->dtepd_rec[++i];3159valsize = valrec->dtrd_size;31603161if (valrec->dtrd_action != act ||3162valrec->dtrd_arg != arg) {3163return (dt_set_errno(dtp,3164EDT_BADSETOPT));3165}31663167if (valsize > sizeof (uint64_t)) {3168val = buf->dtbd_data + offs +3169valrec->dtrd_offset;3170} else {3171val = "1";3172}31733174rv = dt_setopt(dtp, &data, addr, val);31753176if (rv != 0)3177return (-1);31783179flow = (opts[DTRACEOPT_FLOWINDENT] !=3180DTRACEOPT_UNSET);3181quiet = (opts[DTRACEOPT_QUIET] !=3182DTRACEOPT_UNSET);31833184continue;3185}31863187case DT_ACT_TRUNC:3188if (i == epd->dtepd_nrecs - 1)3189return (dt_set_errno(dtp,3190EDT_BADTRUNC));31913192if (dt_trunc(dtp,3193buf->dtbd_data + offs, rec) != 0)3194return (-1);31953196i++;3197continue;31983199default:3200continue;3201}3202}32033204if (act == DTRACEACT_TRACEMEM_DYNSIZE &&3205rec->dtrd_size == sizeof (uint64_t)) {3206/* LINTED - alignment */3207tracememsize = *((unsigned long long *)addr);3208continue;3209}32103211rval = (*rfunc)(&data, rec, arg);32123213if (rval == DTRACE_CONSUME_NEXT)3214continue;32153216if (rval == DTRACE_CONSUME_ABORT)3217return (dt_set_errno(dtp, EDT_DIRABORT));32183219if (rval != DTRACE_CONSUME_THIS)3220return (dt_set_errno(dtp, EDT_BADRVAL));32213222if (dtp->dt_oformat && rec->dtrd_size > 0)3223xo_open_instance("output");3224if (act == DTRACEACT_STACK) {3225int depth = rec->dtrd_arg;32263227if (dtp->dt_oformat) {3228if (dt_format_stack(dtp, addr, depth,3229rec->dtrd_size / depth) < 0) {3230xo_close_instance("output");3231return (-1);3232}3233} else {3234if (dt_print_stack(dtp,3235fp, NULL, addr, depth,3236rec->dtrd_size / depth) < 0)3237return (-1);3238}3239goto nextrec;3240}32413242if (act == DTRACEACT_USTACK ||3243act == DTRACEACT_JSTACK) {3244if (dtp->dt_oformat) {3245if (dt_format_ustack(dtp, addr,3246rec->dtrd_arg) < 0) {3247xo_close_instance("output");3248return (-1);3249}3250} else {3251if (dt_print_ustack(dtp, fp, NULL,3252addr, rec->dtrd_arg) < 0)3253return (-1);3254}3255goto nextrec;3256}32573258if (act == DTRACEACT_SYM) {3259if (dtp->dt_oformat) {3260if (dt_format_sym(dtp, addr) < 0) {3261xo_close_instance("output");3262return (-1);3263}3264} else {3265if (dt_print_sym(dtp, fp, NULL, addr) < 0)3266return (-1);3267}3268goto nextrec;3269}32703271if (act == DTRACEACT_MOD) {3272if (dtp->dt_oformat) {3273if (dt_format_mod(dtp, addr) < 0) {3274xo_close_instance("output");3275return (-1);3276}3277} else {3278if (dt_print_mod(dtp, fp, NULL, addr) < 0)3279return (-1);3280}3281goto nextrec;3282}32833284if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {3285if (dtp->dt_oformat) {3286if (dt_format_usym(dtp, addr, act) < 0) {3287xo_close_instance("output");3288return (-1);3289}3290} else {3291if (dt_print_usym(dtp, fp, addr, act) < 0)3292return (-1);3293}3294goto nextrec;3295}32963297if (act == DTRACEACT_UMOD) {3298if (dtp->dt_oformat) {3299if (dt_format_umod(dtp, addr) < 0) {3300xo_close_instance("output");3301return (-1);3302}3303} else {3304if (dt_print_umod(dtp, fp, NULL, addr) < 0)3305return (-1);3306}3307goto nextrec;3308}33093310if (act == DTRACEACT_PRINTM) {3311if (dtp->dt_oformat) {3312if (dt_format_memory(dtp, addr) < 0) {3313xo_close_instance("output");3314return (-1);3315}3316} else {3317if (dt_print_memory(dtp, fp, addr) < 0)3318return (-1);3319}3320goto nextrec;3321}33223323if (dtp->dt_oformat == DTRACE_OFORMAT_TEXT &&3324DTRACEACT_ISPRINTFLIKE(act)) {3325void *fmtdata;3326int (*func)(dtrace_hdl_t *, FILE *, void *,3327const dtrace_probedata_t *,3328const dtrace_recdesc_t *, uint_t,3329const void *buf, size_t);33303331if ((fmtdata = dt_format_lookup(dtp,3332rec->dtrd_format)) == NULL)3333goto nofmt;33343335switch (act) {3336case DTRACEACT_PRINTF:3337func = dtrace_fprintf;3338break;3339case DTRACEACT_PRINTA:3340func = dtrace_fprinta;3341break;3342case DTRACEACT_SYSTEM:3343func = dtrace_system;3344break;3345case DTRACEACT_FREOPEN:3346func = dtrace_freopen;3347break;3348}33493350n = (*func)(dtp, fp, fmtdata, &data,3351rec, epd->dtepd_nrecs - i,3352(uchar_t *)buf->dtbd_data + offs,3353buf->dtbd_size - offs);33543355if (n < 0)3356return (-1); /* errno is set for us */33573358if (n > 0)3359i += n - 1;3360goto nextrec;3361}33623363/*3364* We don't care about a formatted printa, system or3365* freopen for oformat.3366*/3367if (dtp->dt_oformat && act == DTRACEACT_PRINTF &&3368skip_format == 0) {3369void *fmtdata;3370if ((fmtdata = dt_format_lookup(dtp,3371rec->dtrd_format)) == NULL)3372goto nofmt;33733374n = dtrace_sprintf(dtp, fp, fmtdata, rec,3375epd->dtepd_nrecs - i,3376(uchar_t *)buf->dtbd_data + offs,3377buf->dtbd_size - offs);33783379if (n < 0) {3380xo_close_instance("output");3381return (-1); /* errno is set for us */3382}33833384xo_emit("{:message/%s}", dtp->dt_sprintf_buf);3385skip_format += n;33863387/*3388* We want the "message" object to be its own3389* thing, but we still want to process the3390* current DIFEXPR in case there is a value3391* attached to it. If there is, we need to3392* re-open a new output instance, as otherwise3393* the message ends up bundled with the first3394* value.3395*3396* XXX: There is an edge case where a3397* printf("hello"); will produce a DIFO that3398* returns 0 attached to it and we have no good3399* way to determine if this 0 value is because3400* there's no real data attached to the printf3401* as an argument, or it's because the argument3402* actually returns 0.3403*/3404if (skip_format == 0)3405goto nextrec;34063407xo_close_instance("output");3408xo_open_instance("output");3409}34103411/*3412* If this is a DIF expression, and the record has a3413* format set, this indicates we have a CTF type name3414* associated with the data and we should try to print3415* it out by type.3416*/3417if (act == DTRACEACT_DIFEXPR) {3418const char *strdata = dt_strdata_lookup(dtp,3419rec->dtrd_format);3420if (strdata != NULL) {3421if (dtp->dt_oformat)3422n = dtrace_format_print(dtp, fp,3423strdata, addr,3424rec->dtrd_size);3425else3426n = dtrace_print(dtp, fp,3427strdata, addr,3428rec->dtrd_size);34293430/*3431* dtrace_print() will return -1 on3432* error, or return the number of bytes3433* consumed. It will return 0 if the3434* type couldn't be determined, and we3435* should fall through to the normal3436* trace method.3437*/3438if (n < 0) {3439if (dtp->dt_oformat)3440xo_close_instance(3441"output");3442return (-1);3443}34443445if (n > 0)3446goto nextrec;3447}3448}34493450nofmt:3451if (act == DTRACEACT_PRINTA) {3452dt_print_aggdata_t pd;3453dtrace_aggvarid_t *aggvars;3454int j, naggvars = 0;3455size_t size = ((epd->dtepd_nrecs - i) *3456sizeof (dtrace_aggvarid_t));34573458if ((aggvars = dt_alloc(dtp, size)) == NULL) {3459if (dtp->dt_oformat)3460xo_close_instance("output");3461return (-1);3462}34633464/*3465* This might be a printa() with multiple3466* aggregation variables. We need to scan3467* forward through the records until we find3468* a record from a different statement.3469*/3470for (j = i; j < epd->dtepd_nrecs; j++) {3471dtrace_recdesc_t *nrec;3472caddr_t naddr;34733474nrec = &epd->dtepd_rec[j];34753476if (nrec->dtrd_uarg != rec->dtrd_uarg)3477break;34783479if (nrec->dtrd_action != act) {3480if (dtp->dt_oformat)3481xo_close_instance(3482"output");3483return (dt_set_errno(dtp,3484EDT_BADAGG));3485}34863487naddr = buf->dtbd_data + offs +3488nrec->dtrd_offset;34893490aggvars[naggvars++] =3491/* LINTED - alignment */3492*((dtrace_aggvarid_t *)naddr);3493}34943495i = j - 1;3496bzero(&pd, sizeof (pd));3497pd.dtpa_dtp = dtp;3498pd.dtpa_fp = fp;34993500assert(naggvars >= 1);35013502if (dtp->dt_oformat)3503xo_open_list("aggregation-data");3504if (naggvars == 1) {3505pd.dtpa_id = aggvars[0];3506dt_free(dtp, aggvars);35073508if (dtp->dt_oformat) {3509n = dt_oformat_agg_sorted(dtp,3510dt_format_agg, &pd);3511if (n < 0)3512return (-1);3513} else {3514if (dt_printf(dtp, fp, "\n") < 0 ||3515dtrace_aggregate_walk_sorted(dtp,3516dt_print_agg, &pd) < 0)3517return (-1);3518}35193520if (dtp->dt_oformat)3521dt_oformat_agg_name(&pd);3522goto nextrec;3523}35243525if (dtp->dt_oformat) {3526if (dtrace_aggregate_walk_joined(dtp,3527aggvars, naggvars,3528dt_format_aggs, &pd) < 0) {3529dt_oformat_agg_name(&pd);3530xo_close_instance("output");3531dt_free(dtp, aggvars);3532return (-1);3533}3534} else {3535if (dt_printf(dtp, fp, "\n") < 0 ||3536dtrace_aggregate_walk_joined(dtp,3537aggvars, naggvars,3538dt_print_aggs, &pd) < 0) {3539dt_free(dtp, aggvars);3540return (-1);3541}3542}35433544if (dtp->dt_oformat)3545dt_oformat_agg_name(&pd);3546dt_free(dtp, aggvars);3547goto nextrec;3548}35493550if (act == DTRACEACT_TRACEMEM) {3551if (tracememsize == 0 ||3552tracememsize > rec->dtrd_size) {3553tracememsize = rec->dtrd_size;3554}35553556if (dtp->dt_oformat) {3557char *s;35583559s = dt_format_bytes_get(dtp, addr,3560tracememsize);3561n = xo_emit("{:tracemem/%s}", s);3562dt_free(dtp, s);3563} else {3564n = dt_print_bytes(dtp, fp, addr,3565tracememsize, -33, quiet, 1);3566}35673568tracememsize = 0;35693570if (n < 0)3571return (-1);35723573goto nextrec;3574}35753576switch (rec->dtrd_size) {3577case sizeof (uint64_t):3578if (dtp->dt_oformat) {3579xo_emit("{:value/%lld}",3580*((unsigned long long *)addr));3581n = 0;3582} else3583n = dt_printf(dtp, fp,3584quiet ? "%lld" : " %16lld",3585/* LINTED - alignment */3586*((unsigned long long *)addr));3587break;3588case sizeof (uint32_t):3589if (dtp->dt_oformat) {3590xo_emit("{:value/%d}",3591*((uint32_t *)addr));3592n = 0;3593} else3594n = dt_printf(dtp, fp,3595quiet ? "%d" : " %8d",3596/* LINTED - alignment */3597*((uint32_t *)addr));3598break;3599case sizeof (uint16_t):3600if (dtp->dt_oformat) {3601xo_emit("{:value/%d}",3602*((uint16_t *)addr));3603n = 0;3604} else3605n = dt_printf(dtp, fp,3606quiet ? "%d" : " %5d",3607/* LINTED - alignment */3608*((uint16_t *)addr));3609break;3610case sizeof (uint8_t):3611if (dtp->dt_oformat) {3612xo_emit("{:value/%d}",3613*((uint8_t *)addr));3614n = 0;3615} else3616n = dt_printf(dtp, fp,3617quiet ? "%d" : " %3d",3618*((uint8_t *)addr));3619break;3620default:3621if (dtp->dt_oformat && rec->dtrd_size > 0) {3622char *s;36233624s = dt_format_bytes_get(dtp, addr,3625rec->dtrd_size);3626xo_emit("{:value/%s}", s);3627dt_free(dtp, s);3628n = 0;3629} else {3630n = dt_print_bytes(dtp, fp, addr,3631rec->dtrd_size, -33, quiet, 0);3632}3633break;3634}36353636if (dtp->dt_oformat && rec->dtrd_size > 0)3637xo_close_instance("output");36383639if (n < 0)3640return (-1); /* errno is set for us */36413642nextrec:3643if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)3644return (-1); /* errno is set for us */3645}36463647/*3648* Call the record callback with a NULL record to indicate3649* that we're done processing this EPID.3650*/3651rval = (*rfunc)(&data, NULL, arg);3652nextepid:3653offs += epd->dtepd_size;3654dtp->dt_last_epid = id;36553656if (dtp->dt_oformat) {3657xo_close_list("output");3658xo_close_instance("probes");3659xo_flush();3660}3661if (just_one) {3662buf->dtbd_oldest = offs;3663break;3664}3665}36663667dtp->dt_flow = data.dtpda_flow;3668dtp->dt_indent = data.dtpda_indent;3669dtp->dt_prefix = data.dtpda_prefix;36703671if ((drops = buf->dtbd_drops) == 0)3672return (0);36733674/*3675* Explicitly zero the drops to prevent us from processing them again.3676*/3677buf->dtbd_drops = 0;36783679if (dtp->dt_oformat) {3680xo_open_instance("probes");3681dt_oformat_drop(dtp, cpu);3682}3683rval = dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops);3684if (dtp->dt_oformat)3685xo_close_instance("probes");36863687return (rval);3688}36893690/*3691* Reduce memory usage by shrinking the buffer if it's no more than half full.3692* Note, we need to preserve the alignment of the data at dtbd_oldest, which is3693* only 4-byte aligned.3694*/3695static void3696dt_realloc_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf, int cursize)3697{3698uint64_t used = buf->dtbd_size - buf->dtbd_oldest;3699if (used < cursize / 2) {3700int misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);3701char *newdata = dt_alloc(dtp, used + misalign);3702if (newdata == NULL)3703return;3704bzero(newdata, misalign);3705bcopy(buf->dtbd_data + buf->dtbd_oldest,3706newdata + misalign, used);3707dt_free(dtp, buf->dtbd_data);3708buf->dtbd_oldest = misalign;3709buf->dtbd_size = used + misalign;3710buf->dtbd_data = newdata;3711}3712}37133714/*3715* If the ring buffer has wrapped, the data is not in order. Rearrange it3716* so that it is. Note, we need to preserve the alignment of the data at3717* dtbd_oldest, which is only 4-byte aligned.3718*/3719static int3720dt_unring_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)3721{3722int misalign;3723char *newdata, *ndp;37243725if (buf->dtbd_oldest == 0)3726return (0);37273728misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);3729newdata = ndp = dt_alloc(dtp, buf->dtbd_size + misalign);37303731if (newdata == NULL)3732return (-1);37333734assert(0 == (buf->dtbd_size & (sizeof (uint64_t) - 1)));37353736bzero(ndp, misalign);3737ndp += misalign;37383739bcopy(buf->dtbd_data + buf->dtbd_oldest, ndp,3740buf->dtbd_size - buf->dtbd_oldest);3741ndp += buf->dtbd_size - buf->dtbd_oldest;37423743bcopy(buf->dtbd_data, ndp, buf->dtbd_oldest);37443745dt_free(dtp, buf->dtbd_data);3746buf->dtbd_oldest = misalign;3747buf->dtbd_data = newdata;3748buf->dtbd_size += misalign;37493750return (0);3751}37523753static void3754dt_put_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)3755{3756dt_free(dtp, buf->dtbd_data);3757dt_free(dtp, buf);3758}37593760/*3761* Returns 0 on success, in which case *cbp will be filled in if we retrieved3762* data, or NULL if there is no data for this CPU.3763* Returns -1 on failure and sets dt_errno.3764*/3765static int3766dt_get_buf(dtrace_hdl_t *dtp, int cpu, dtrace_bufdesc_t **bufp)3767{3768dtrace_optval_t size;3769dtrace_bufdesc_t *buf = dt_zalloc(dtp, sizeof (*buf));3770int error, rval;37713772if (buf == NULL)3773return (-1);37743775(void) dtrace_getopt(dtp, "bufsize", &size);3776buf->dtbd_data = dt_alloc(dtp, size);3777if (buf->dtbd_data == NULL) {3778dt_free(dtp, buf);3779return (-1);3780}3781buf->dtbd_size = size;3782buf->dtbd_cpu = cpu;37833784#ifdef illumos3785if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {3786#else3787if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {3788#endif3789/*3790* If we failed with ENOENT, it may be because the3791* CPU was unconfigured -- this is okay. Any other3792* error, however, is unexpected.3793*/3794if (errno == ENOENT) {3795*bufp = NULL;3796rval = 0;3797} else3798rval = dt_set_errno(dtp, errno);37993800dt_put_buf(dtp, buf);3801return (rval);3802}38033804error = dt_unring_buf(dtp, buf);3805if (error != 0) {3806dt_put_buf(dtp, buf);3807return (error);3808}3809dt_realloc_buf(dtp, buf, size);38103811*bufp = buf;3812return (0);3813}38143815typedef struct dt_begin {3816dtrace_consume_probe_f *dtbgn_probefunc;3817dtrace_consume_rec_f *dtbgn_recfunc;3818void *dtbgn_arg;3819dtrace_handle_err_f *dtbgn_errhdlr;3820void *dtbgn_errarg;3821int dtbgn_beginonly;3822} dt_begin_t;38233824static int3825dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)3826{3827dt_begin_t *begin = arg;3828dtrace_probedesc_t *pd = data->dtpda_pdesc;38293830int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);3831int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);38323833if (begin->dtbgn_beginonly) {3834if (!(r1 && r2))3835return (DTRACE_CONSUME_NEXT);3836} else {3837if (r1 && r2)3838return (DTRACE_CONSUME_NEXT);3839}38403841/*3842* We have a record that we're interested in. Now call the underlying3843* probe function...3844*/3845return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));3846}38473848static int3849dt_consume_begin_record(const dtrace_probedata_t *data,3850const dtrace_recdesc_t *rec, void *arg)3851{3852dt_begin_t *begin = arg;38533854return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));3855}38563857static int3858dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)3859{3860dt_begin_t *begin = (dt_begin_t *)arg;3861dtrace_probedesc_t *pd = data->dteda_pdesc;38623863int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);3864int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);38653866if (begin->dtbgn_beginonly) {3867if (!(r1 && r2))3868return (DTRACE_HANDLE_OK);3869} else {3870if (r1 && r2)3871return (DTRACE_HANDLE_OK);3872}38733874return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));3875}38763877static int3878dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp,3879dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)3880{3881/*3882* There's this idea that the BEGIN probe should be processed before3883* everything else, and that the END probe should be processed after3884* anything else. In the common case, this is pretty easy to deal3885* with. However, a situation may arise where the BEGIN enabling and3886* END enabling are on the same CPU, and some enabling in the middle3887* occurred on a different CPU. To deal with this (blech!) we need to3888* consume the BEGIN buffer up until the end of the BEGIN probe, and3889* then set it aside. We will then process every other CPU, and then3890* we'll return to the BEGIN CPU and process the rest of the data3891* (which will inevitably include the END probe, if any). Making this3892* even more complicated (!) is the library's ERROR enabling. Because3893* this enabling is processed before we even get into the consume call3894* back, any ERROR firing would result in the library's ERROR enabling3895* being processed twice -- once in our first pass (for BEGIN probes),3896* and again in our second pass (for everything but BEGIN probes). To3897* deal with this, we interpose on the ERROR handler to assure that we3898* only process ERROR enablings induced by BEGIN enablings in the3899* first pass, and that we only process ERROR enablings _not_ induced3900* by BEGIN enablings in the second pass.3901*/39023903dt_begin_t begin;3904processorid_t cpu = dtp->dt_beganon;3905int rval, i;3906static int max_ncpus;3907dtrace_bufdesc_t *buf;39083909dtp->dt_beganon = -1;39103911if (dt_get_buf(dtp, cpu, &buf) != 0)3912return (-1);3913if (buf == NULL)3914return (0);39153916if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {3917/*3918* This is the simple case. We're either not stopped, or if3919* we are, we actually processed any END probes on another3920* CPU. We can simply consume this buffer and return.3921*/3922rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,3923pf, rf, arg);3924dt_put_buf(dtp, buf);3925return (rval);3926}39273928begin.dtbgn_probefunc = pf;3929begin.dtbgn_recfunc = rf;3930begin.dtbgn_arg = arg;3931begin.dtbgn_beginonly = 1;39323933/*3934* We need to interpose on the ERROR handler to be sure that we3935* only process ERRORs induced by BEGIN.3936*/3937begin.dtbgn_errhdlr = dtp->dt_errhdlr;3938begin.dtbgn_errarg = dtp->dt_errarg;3939dtp->dt_errhdlr = dt_consume_begin_error;3940dtp->dt_errarg = &begin;39413942rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,3943dt_consume_begin_probe, dt_consume_begin_record, &begin);39443945dtp->dt_errhdlr = begin.dtbgn_errhdlr;3946dtp->dt_errarg = begin.dtbgn_errarg;39473948if (rval != 0) {3949dt_put_buf(dtp, buf);3950return (rval);3951}39523953if (max_ncpus == 0 && (max_ncpus = dt_cpu_maxid(dtp) + 1) <= 0)3954return (-1);39553956for (i = 0; i < max_ncpus; i++) {3957dtrace_bufdesc_t *nbuf;3958if (i == cpu)3959continue;39603961if (dt_get_buf(dtp, i, &nbuf) != 0) {3962dt_put_buf(dtp, buf);3963return (-1);3964}3965if (nbuf == NULL)3966continue;39673968rval = dt_consume_cpu(dtp, fp, i, nbuf, B_FALSE,3969pf, rf, arg);3970dt_put_buf(dtp, nbuf);3971if (rval != 0) {3972dt_put_buf(dtp, buf);3973return (rval);3974}3975}39763977/*3978* Okay -- we're done with the other buffers. Now we want to3979* reconsume the first buffer -- but this time we're looking for3980* everything _but_ BEGIN. And of course, in order to only consume3981* those ERRORs _not_ associated with BEGIN, we need to reinstall our3982* ERROR interposition function...3983*/3984begin.dtbgn_beginonly = 0;39853986assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);3987assert(begin.dtbgn_errarg == dtp->dt_errarg);3988dtp->dt_errhdlr = dt_consume_begin_error;3989dtp->dt_errarg = &begin;39903991rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,3992dt_consume_begin_probe, dt_consume_begin_record, &begin);39933994dtp->dt_errhdlr = begin.dtbgn_errhdlr;3995dtp->dt_errarg = begin.dtbgn_errarg;39963997return (rval);3998}39994000/* ARGSUSED */4001static uint64_t4002dt_buf_oldest(void *elem, void *arg)4003{4004dtrace_bufdesc_t *buf = elem;4005size_t offs = buf->dtbd_oldest;40064007while (offs < buf->dtbd_size) {4008dtrace_rechdr_t *dtrh =4009/* LINTED - alignment */4010(dtrace_rechdr_t *)(buf->dtbd_data + offs);4011if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {4012offs += sizeof (dtrace_epid_t);4013} else {4014return (DTRACE_RECORD_LOAD_TIMESTAMP(dtrh));4015}4016}40174018/* There are no records left; use the time the buffer was retrieved. */4019return (buf->dtbd_timestamp);4020}40214022int4023dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,4024dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)4025{4026dtrace_optval_t size;4027static int max_ncpus;4028int i, rval;4029dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];4030hrtime_t now = gethrtime();40314032if (dtp->dt_lastswitch != 0) {4033if (now - dtp->dt_lastswitch < interval)4034return (0);40354036dtp->dt_lastswitch += interval;4037} else {4038dtp->dt_lastswitch = now;4039}40404041if (!dtp->dt_active)4042return (dt_set_errno(dtp, EINVAL));40434044if (max_ncpus == 0 && (max_ncpus = dt_cpu_maxid(dtp) + 1) <= 0)4045return (-1);40464047if (pf == NULL)4048pf = (dtrace_consume_probe_f *)dt_nullprobe;40494050if (rf == NULL)4051rf = (dtrace_consume_rec_f *)dt_nullrec;40524053if (dtp->dt_options[DTRACEOPT_TEMPORAL] == DTRACEOPT_UNSET) {4054/*4055* The output will not be in the order it was traced. Rather,4056* we will consume all of the data from each CPU's buffer in4057* turn. We apply special handling for the records from BEGIN4058* and END probes so that they are consumed first and last,4059* respectively.4060*4061* If we have just begun, we want to first process the CPU that4062* executed the BEGIN probe (if any).4063*/4064if (dtp->dt_active && dtp->dt_beganon != -1 &&4065(rval = dt_consume_begin(dtp, fp, pf, rf, arg)) != 0)4066return (rval);40674068for (i = 0; i < max_ncpus; i++) {4069dtrace_bufdesc_t *buf;40704071/*4072* If we have stopped, we want to process the CPU on4073* which the END probe was processed only _after_ we4074* have processed everything else.4075*/4076if (dtp->dt_stopped && (i == dtp->dt_endedon))4077continue;40784079if (dt_get_buf(dtp, i, &buf) != 0)4080return (-1);4081if (buf == NULL)4082continue;40834084dtp->dt_flow = 0;4085dtp->dt_indent = 0;4086dtp->dt_prefix = NULL;4087rval = dt_consume_cpu(dtp, fp, i,4088buf, B_FALSE, pf, rf, arg);4089dt_put_buf(dtp, buf);4090if (rval != 0)4091return (rval);4092}4093if (dtp->dt_stopped) {4094dtrace_bufdesc_t *buf;40954096if (dt_get_buf(dtp, dtp->dt_endedon, &buf) != 0)4097return (-1);4098if (buf == NULL)4099return (0);41004101rval = dt_consume_cpu(dtp, fp, dtp->dt_endedon,4102buf, B_FALSE, pf, rf, arg);4103dt_put_buf(dtp, buf);4104return (rval);4105}4106} else {4107/*4108* The output will be in the order it was traced (or for4109* speculations, when it was committed). We retrieve a buffer4110* from each CPU and put it into a priority queue, which sorts4111* based on the first entry in the buffer. This is sufficient4112* because entries within a buffer are already sorted.4113*4114* We then consume records one at a time, always consuming the4115* oldest record, as determined by the priority queue. When4116* we reach the end of the time covered by these buffers,4117* we need to stop and retrieve more records on the next pass.4118* The kernel tells us the time covered by each buffer, in4119* dtbd_timestamp. The first buffer's timestamp tells us the4120* time covered by all buffers, as subsequently retrieved4121* buffers will cover to a more recent time.4122*/41234124uint64_t *drops = alloca(max_ncpus * sizeof (uint64_t));4125uint64_t first_timestamp = 0;4126uint_t cookie = 0;4127dtrace_bufdesc_t *buf;41284129bzero(drops, max_ncpus * sizeof (uint64_t));41304131if (dtp->dt_bufq == NULL) {4132dtp->dt_bufq = dt_pq_init(dtp, max_ncpus * 2,4133dt_buf_oldest, NULL);4134if (dtp->dt_bufq == NULL) /* ENOMEM */4135return (-1);4136}41374138/* Retrieve data from each CPU. */4139(void) dtrace_getopt(dtp, "bufsize", &size);4140for (i = 0; i < max_ncpus; i++) {4141dtrace_bufdesc_t *buf;41424143if (dt_get_buf(dtp, i, &buf) != 0)4144return (-1);4145if (buf != NULL) {4146if (first_timestamp == 0)4147first_timestamp = buf->dtbd_timestamp;4148assert(buf->dtbd_timestamp >= first_timestamp);41494150dt_pq_insert(dtp->dt_bufq, buf);4151drops[i] = buf->dtbd_drops;4152buf->dtbd_drops = 0;4153}4154}41554156/* Consume records. */4157for (;;) {4158dtrace_bufdesc_t *buf = dt_pq_pop(dtp->dt_bufq);4159uint64_t timestamp;41604161if (buf == NULL)4162break;41634164timestamp = dt_buf_oldest(buf, dtp);4165if (timestamp == buf->dtbd_timestamp) {4166/*4167* We've reached the end of the time covered4168* by this buffer. If this is the oldest4169* buffer, we must do another pass4170* to retrieve more data.4171*/4172dt_put_buf(dtp, buf);4173if (timestamp == first_timestamp &&4174!dtp->dt_stopped)4175break;4176continue;4177}4178assert(timestamp >= dtp->dt_last_timestamp);4179dtp->dt_last_timestamp = timestamp;41804181if ((rval = dt_consume_cpu(dtp, fp,4182buf->dtbd_cpu, buf, B_TRUE, pf, rf, arg)) != 0)4183return (rval);4184dt_pq_insert(dtp->dt_bufq, buf);4185}41864187/* Consume drops. */4188for (i = 0; i < max_ncpus; i++) {4189if (drops[i] != 0) {4190int error;41914192if (dtp->dt_oformat) {4193xo_open_instance("probes");4194dt_oformat_drop(dtp, i);4195}4196error = dt_handle_cpudrop(dtp, i,4197DTRACEDROP_PRINCIPAL, drops[i]);4198if (dtp->dt_oformat)4199xo_close_instance("probes");4200if (error != 0)4201return (error);4202}4203}42044205/*4206* Reduce memory usage by re-allocating smaller buffers4207* for the "remnants".4208*/4209while (buf = dt_pq_walk(dtp->dt_bufq, &cookie))4210dt_realloc_buf(dtp, buf, buf->dtbd_size);4211}42124213return (0);4214}42154216void4217dtrace_oformat_probe(dtrace_hdl_t *dtp __unused, const dtrace_probedata_t *data,4218processorid_t cpu, dtrace_probedesc_t *pd)4219{42204221xo_emit("{:timestamp/%llu} {:cpu/%d} {:id/%d} {:provider/%s} "4222"{:module/%s} {:function/%s} {:name/%s}",4223(unsigned long long)data->dtpda_timestamp, cpu, pd->dtpd_id,4224pd->dtpd_provider, pd->dtpd_mod, pd->dtpd_func, pd->dtpd_name);4225}42264227void4228dt_oformat_drop(dtrace_hdl_t *dtp, processorid_t cpu)4229{4230xo_emit("{:cpu/%d} {:id/%d} {:provider/%s} "4231"{:module/%s} {:function/%s} {:name/%s}",4232cpu, -1, "dtrace", "INTERNAL", "INTERNAL", "DROP");4233}423442354236