Path: blob/main/cddl/contrib/opensolaris/lib/libdtrace/common/dt_aggregate.c
39562 views
/*1* CDDL HEADER START2*3* The contents of this file are subject to the terms of the4* Common Development and Distribution License (the "License").5* You may not use this file except in compliance with the License.6*7* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE8* or http://www.opensolaris.org/os/licensing.9* See the License for the specific language governing permissions10* and limitations under the License.11*12* When distributing Covered Code, include this CDDL HEADER in each13* file and include the License file at usr/src/OPENSOLARIS.LICENSE.14* If applicable, add the following below this CDDL HEADER, with the15* fields enclosed by brackets "[]" replaced with your own identifying16* information: Portions Copyright [yyyy] [name of copyright owner]17*18* CDDL HEADER END19*/2021/*22* Copyright 2008 Sun Microsystems, Inc. All rights reserved.23* Use is subject to license terms.24*/2526/*27* Copyright (c) 2013, Joyent, Inc. All rights reserved.28* Copyright (c) 2012 by Delphix. All rights reserved.29*/3031#include <stdlib.h>32#include <strings.h>33#include <errno.h>34#include <unistd.h>35#include <dt_impl.h>36#include <assert.h>37#include <dt_oformat.h>38#ifdef illumos39#include <alloca.h>40#else41#include <sys/sysctl.h>42#include <libproc_compat.h>43#endif44#include <limits.h>4546#define DTRACE_AHASHSIZE 32779 /* big 'ol prime */4748/*49* Because qsort(3C) does not allow an argument to be passed to a comparison50* function, the variables that affect comparison must regrettably be global;51* they are protected by a global static lock, dt_qsort_lock.52*/53static pthread_mutex_t dt_qsort_lock = PTHREAD_MUTEX_INITIALIZER;5455static int dt_revsort;56static int dt_keysort;57static int dt_keypos;5859#define DT_LESSTHAN (dt_revsort == 0 ? -1 : 1)60#define DT_GREATERTHAN (dt_revsort == 0 ? 1 : -1)6162static void63dt_aggregate_count(int64_t *existing, int64_t *new, size_t size)64{65uint_t i;6667for (i = 0; i < size / sizeof (int64_t); i++)68existing[i] = existing[i] + new[i];69}7071static int72dt_aggregate_countcmp(int64_t *lhs, int64_t *rhs)73{74int64_t lvar = *lhs;75int64_t rvar = *rhs;7677if (lvar < rvar)78return (DT_LESSTHAN);7980if (lvar > rvar)81return (DT_GREATERTHAN);8283return (0);84}8586/*ARGSUSED*/87static void88dt_aggregate_min(int64_t *existing, int64_t *new, size_t size)89{90if (*new < *existing)91*existing = *new;92}9394/*ARGSUSED*/95static void96dt_aggregate_max(int64_t *existing, int64_t *new, size_t size)97{98if (*new > *existing)99*existing = *new;100}101102static int103dt_aggregate_averagecmp(int64_t *lhs, int64_t *rhs)104{105int64_t lavg = lhs[0] ? (lhs[1] / lhs[0]) : 0;106int64_t ravg = rhs[0] ? (rhs[1] / rhs[0]) : 0;107108if (lavg < ravg)109return (DT_LESSTHAN);110111if (lavg > ravg)112return (DT_GREATERTHAN);113114return (0);115}116117static int118dt_aggregate_stddevcmp(int64_t *lhs, int64_t *rhs)119{120uint64_t lsd = dt_stddev((uint64_t *)lhs, 1);121uint64_t rsd = dt_stddev((uint64_t *)rhs, 1);122123if (lsd < rsd)124return (DT_LESSTHAN);125126if (lsd > rsd)127return (DT_GREATERTHAN);128129return (0);130}131132/*ARGSUSED*/133static void134dt_aggregate_lquantize(int64_t *existing, int64_t *new, size_t size)135{136int64_t arg = *existing++;137uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);138int i;139140for (i = 0; i <= levels + 1; i++)141existing[i] = existing[i] + new[i + 1];142}143144static long double145dt_aggregate_lquantizedsum(int64_t *lquanta)146{147int64_t arg = *lquanta++;148int32_t base = DTRACE_LQUANTIZE_BASE(arg);149uint16_t step = DTRACE_LQUANTIZE_STEP(arg);150uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i;151long double total = (long double)lquanta[0] * (long double)(base - 1);152153for (i = 0; i < levels; base += step, i++)154total += (long double)lquanta[i + 1] * (long double)base;155156return (total + (long double)lquanta[levels + 1] *157(long double)(base + 1));158}159160static int64_t161dt_aggregate_lquantizedzero(int64_t *lquanta)162{163int64_t arg = *lquanta++;164int32_t base = DTRACE_LQUANTIZE_BASE(arg);165uint16_t step = DTRACE_LQUANTIZE_STEP(arg);166uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i;167168if (base - 1 == 0)169return (lquanta[0]);170171for (i = 0; i < levels; base += step, i++) {172if (base != 0)173continue;174175return (lquanta[i + 1]);176}177178if (base + 1 == 0)179return (lquanta[levels + 1]);180181return (0);182}183184static int185dt_aggregate_lquantizedcmp(int64_t *lhs, int64_t *rhs)186{187long double lsum = dt_aggregate_lquantizedsum(lhs);188long double rsum = dt_aggregate_lquantizedsum(rhs);189int64_t lzero, rzero;190191if (lsum < rsum)192return (DT_LESSTHAN);193194if (lsum > rsum)195return (DT_GREATERTHAN);196197/*198* If they're both equal, then we will compare based on the weights at199* zero. If the weights at zero are equal (or if zero is not within200* the range of the linear quantization), then this will be judged a201* tie and will be resolved based on the key comparison.202*/203lzero = dt_aggregate_lquantizedzero(lhs);204rzero = dt_aggregate_lquantizedzero(rhs);205206if (lzero < rzero)207return (DT_LESSTHAN);208209if (lzero > rzero)210return (DT_GREATERTHAN);211212return (0);213}214215static void216dt_aggregate_llquantize(int64_t *existing, int64_t *new, size_t size)217{218int i;219220for (i = 1; i < size / sizeof (int64_t); i++)221existing[i] = existing[i] + new[i];222}223224static long double225dt_aggregate_llquantizedsum(int64_t *llquanta)226{227int64_t arg = *llquanta++;228uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);229uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);230uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);231uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);232int bin = 0, order;233int64_t value = 1, next, step;234long double total;235236assert(nsteps >= factor);237assert(nsteps % factor == 0);238239for (order = 0; order < low; order++)240value *= factor;241242total = (long double)llquanta[bin++] * (long double)(value - 1);243244next = value * factor;245step = next > nsteps ? next / nsteps : 1;246247while (order <= high) {248assert(value < next);249total += (long double)llquanta[bin++] * (long double)(value);250251if ((value += step) != next)252continue;253254next = value * factor;255step = next > nsteps ? next / nsteps : 1;256order++;257}258259return (total + (long double)llquanta[bin] * (long double)value);260}261262static int263dt_aggregate_llquantizedcmp(int64_t *lhs, int64_t *rhs)264{265long double lsum = dt_aggregate_llquantizedsum(lhs);266long double rsum = dt_aggregate_llquantizedsum(rhs);267int64_t lzero, rzero;268269if (lsum < rsum)270return (DT_LESSTHAN);271272if (lsum > rsum)273return (DT_GREATERTHAN);274275/*276* If they're both equal, then we will compare based on the weights at277* zero. If the weights at zero are equal, then this will be judged a278* tie and will be resolved based on the key comparison.279*/280lzero = lhs[1];281rzero = rhs[1];282283if (lzero < rzero)284return (DT_LESSTHAN);285286if (lzero > rzero)287return (DT_GREATERTHAN);288289return (0);290}291292static int293dt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs)294{295int nbuckets = DTRACE_QUANTIZE_NBUCKETS;296long double ltotal = 0, rtotal = 0;297int64_t lzero, rzero;298uint_t i;299300for (i = 0; i < nbuckets; i++) {301int64_t bucketval = DTRACE_QUANTIZE_BUCKETVAL(i);302303if (bucketval == 0) {304lzero = lhs[i];305rzero = rhs[i];306}307308ltotal += (long double)bucketval * (long double)lhs[i];309rtotal += (long double)bucketval * (long double)rhs[i];310}311312if (ltotal < rtotal)313return (DT_LESSTHAN);314315if (ltotal > rtotal)316return (DT_GREATERTHAN);317318/*319* If they're both equal, then we will compare based on the weights at320* zero. If the weights at zero are equal, then this will be judged a321* tie and will be resolved based on the key comparison.322*/323if (lzero < rzero)324return (DT_LESSTHAN);325326if (lzero > rzero)327return (DT_GREATERTHAN);328329return (0);330}331332static void333dt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data)334{335uint64_t pid = data[0];336uint64_t *pc = &data[1];337struct ps_prochandle *P;338GElf_Sym sym;339340if (dtp->dt_vector != NULL)341return;342343if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL)344return;345346dt_proc_lock(dtp, P);347348if (Plookup_by_addr(P, *pc, NULL, 0, &sym) == 0)349*pc = sym.st_value;350351dt_proc_unlock(dtp, P);352dt_proc_release(dtp, P);353}354355static void356dt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data)357{358uint64_t pid = data[0];359uint64_t *pc = &data[1];360struct ps_prochandle *P;361const prmap_t *map;362363if (dtp->dt_vector != NULL)364return;365366if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL)367return;368369dt_proc_lock(dtp, P);370371if ((map = Paddr_to_map(P, *pc)) != NULL)372*pc = map->pr_vaddr;373374dt_proc_unlock(dtp, P);375dt_proc_release(dtp, P);376}377378static void379dt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data)380{381GElf_Sym sym;382uint64_t *pc = data;383384if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0)385*pc = sym.st_value;386}387388static void389dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *data)390{391uint64_t *pc = data;392dt_module_t *dmp;393394if (dtp->dt_vector != NULL) {395/*396* We don't have a way of just getting the module for a397* vectored open, and it doesn't seem to be worth defining398* one. This means that use of mod() won't get true399* aggregation in the postmortem case (some modules may400* appear more than once in aggregation output). It seems401* unlikely that anyone will ever notice or care...402*/403return;404}405406for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL;407dmp = dt_list_next(dmp)) {408if (*pc - dmp->dm_text_va < dmp->dm_text_size) {409*pc = dmp->dm_text_va;410return;411}412}413}414415static dtrace_aggvarid_t416dt_aggregate_aggvarid(dt_ahashent_t *ent)417{418dtrace_aggdesc_t *agg = ent->dtahe_data.dtada_desc;419caddr_t data = ent->dtahe_data.dtada_data;420dtrace_recdesc_t *rec = agg->dtagd_rec;421422/*423* First, we'll check the variable ID in the aggdesc. If it's valid,424* we'll return it. If not, we'll use the compiler-generated ID425* present as the first record.426*/427if (agg->dtagd_varid != DTRACE_AGGVARIDNONE)428return (agg->dtagd_varid);429430agg->dtagd_varid = *((dtrace_aggvarid_t *)(uintptr_t)(data +431rec->dtrd_offset));432433return (agg->dtagd_varid);434}435436437static int438dt_aggregate_snap_cpu(dtrace_hdl_t *dtp, processorid_t cpu)439{440dtrace_epid_t id;441uint64_t hashval;442size_t offs, roffs, size, ndx;443int i, j, rval;444caddr_t addr, data;445dtrace_recdesc_t *rec;446dt_aggregate_t *agp = &dtp->dt_aggregate;447dtrace_aggdesc_t *agg;448dt_ahash_t *hash = &agp->dtat_hash;449dt_ahashent_t *h;450dtrace_bufdesc_t b = agp->dtat_buf, *buf = &b;451dtrace_aggdata_t *aggdata;452int flags = agp->dtat_flags;453454buf->dtbd_cpu = cpu;455456#ifdef illumos457if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, buf) == -1) {458#else459if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, &buf) == -1) {460#endif461if (errno == ENOENT) {462/*463* If that failed with ENOENT, it may be because the464* CPU was unconfigured. This is okay; we'll just465* do nothing but return success.466*/467return (0);468}469470return (dt_set_errno(dtp, errno));471}472473if (buf->dtbd_drops != 0) {474int error;475476if (dtp->dt_oformat) {477xo_open_instance("probes");478dt_oformat_drop(dtp, cpu);479}480error = dt_handle_cpudrop(dtp, cpu, DTRACEDROP_AGGREGATION,481buf->dtbd_drops);482if (dtp->dt_oformat)483xo_close_instance("probes");484if (error != 0)485return (-1);486}487488if (buf->dtbd_size == 0)489return (0);490491if (hash->dtah_hash == NULL) {492size_t size;493494hash->dtah_size = DTRACE_AHASHSIZE;495size = hash->dtah_size * sizeof (dt_ahashent_t *);496497if ((hash->dtah_hash = malloc(size)) == NULL)498return (dt_set_errno(dtp, EDT_NOMEM));499500bzero(hash->dtah_hash, size);501}502503for (offs = 0; offs < buf->dtbd_size; ) {504/*505* We're guaranteed to have an ID.506*/507id = *((dtrace_epid_t *)((uintptr_t)buf->dtbd_data +508(uintptr_t)offs));509510if (id == DTRACE_AGGIDNONE) {511/*512* This is filler to assure proper alignment of the513* next record; we simply ignore it.514*/515offs += sizeof (id);516continue;517}518519if ((rval = dt_aggid_lookup(dtp, id, &agg)) != 0)520return (rval);521522addr = buf->dtbd_data + offs;523size = agg->dtagd_size;524hashval = 0;525526for (j = 0; j < agg->dtagd_nrecs - 1; j++) {527rec = &agg->dtagd_rec[j];528roffs = rec->dtrd_offset;529530switch (rec->dtrd_action) {531case DTRACEACT_USYM:532dt_aggregate_usym(dtp,533/* LINTED - alignment */534(uint64_t *)&addr[roffs]);535break;536537case DTRACEACT_UMOD:538dt_aggregate_umod(dtp,539/* LINTED - alignment */540(uint64_t *)&addr[roffs]);541break;542543case DTRACEACT_SYM:544/* LINTED - alignment */545dt_aggregate_sym(dtp, (uint64_t *)&addr[roffs]);546break;547548case DTRACEACT_MOD:549/* LINTED - alignment */550dt_aggregate_mod(dtp, (uint64_t *)&addr[roffs]);551break;552553default:554break;555}556557for (i = 0; i < rec->dtrd_size; i++)558hashval += addr[roffs + i];559}560561ndx = hashval % hash->dtah_size;562563for (h = hash->dtah_hash[ndx]; h != NULL; h = h->dtahe_next) {564if (h->dtahe_hashval != hashval)565continue;566567if (h->dtahe_size != size)568continue;569570aggdata = &h->dtahe_data;571data = aggdata->dtada_data;572573for (j = 0; j < agg->dtagd_nrecs - 1; j++) {574rec = &agg->dtagd_rec[j];575roffs = rec->dtrd_offset;576577for (i = 0; i < rec->dtrd_size; i++)578if (addr[roffs + i] != data[roffs + i])579goto hashnext;580}581582/*583* We found it. Now we need to apply the aggregating584* action on the data here.585*/586rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];587roffs = rec->dtrd_offset;588/* LINTED - alignment */589h->dtahe_aggregate((int64_t *)&data[roffs],590/* LINTED - alignment */591(int64_t *)&addr[roffs], rec->dtrd_size);592593/*594* If we're keeping per CPU data, apply the aggregating595* action there as well.596*/597if (aggdata->dtada_percpu != NULL) {598data = aggdata->dtada_percpu[cpu];599600/* LINTED - alignment */601h->dtahe_aggregate((int64_t *)data,602/* LINTED - alignment */603(int64_t *)&addr[roffs], rec->dtrd_size);604}605606goto bufnext;607hashnext:608continue;609}610611/*612* If we're here, we couldn't find an entry for this record.613*/614if ((h = malloc(sizeof (dt_ahashent_t))) == NULL)615return (dt_set_errno(dtp, EDT_NOMEM));616bzero(h, sizeof (dt_ahashent_t));617aggdata = &h->dtahe_data;618619if ((aggdata->dtada_data = malloc(size)) == NULL) {620free(h);621return (dt_set_errno(dtp, EDT_NOMEM));622}623624bcopy(addr, aggdata->dtada_data, size);625aggdata->dtada_size = size;626aggdata->dtada_desc = agg;627aggdata->dtada_handle = dtp;628(void) dt_epid_lookup(dtp, agg->dtagd_epid,629&aggdata->dtada_edesc, &aggdata->dtada_pdesc);630aggdata->dtada_normal = 1;631632h->dtahe_hashval = hashval;633h->dtahe_size = size;634(void) dt_aggregate_aggvarid(h);635636rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];637638if (flags & DTRACE_A_PERCPU) {639int max_cpus = agp->dtat_maxcpu;640caddr_t *percpu = malloc(max_cpus * sizeof (caddr_t));641642if (percpu == NULL) {643free(aggdata->dtada_data);644free(h);645return (dt_set_errno(dtp, EDT_NOMEM));646}647648for (j = 0; j < max_cpus; j++) {649percpu[j] = malloc(rec->dtrd_size);650651if (percpu[j] == NULL) {652while (--j >= 0)653free(percpu[j]);654655free(aggdata->dtada_data);656free(h);657return (dt_set_errno(dtp, EDT_NOMEM));658}659660if (j == cpu) {661bcopy(&addr[rec->dtrd_offset],662percpu[j], rec->dtrd_size);663} else {664bzero(percpu[j], rec->dtrd_size);665}666}667668aggdata->dtada_percpu = percpu;669}670671switch (rec->dtrd_action) {672case DTRACEAGG_MIN:673h->dtahe_aggregate = dt_aggregate_min;674break;675676case DTRACEAGG_MAX:677h->dtahe_aggregate = dt_aggregate_max;678break;679680case DTRACEAGG_LQUANTIZE:681h->dtahe_aggregate = dt_aggregate_lquantize;682break;683684case DTRACEAGG_LLQUANTIZE:685h->dtahe_aggregate = dt_aggregate_llquantize;686break;687688case DTRACEAGG_COUNT:689case DTRACEAGG_SUM:690case DTRACEAGG_AVG:691case DTRACEAGG_STDDEV:692case DTRACEAGG_QUANTIZE:693h->dtahe_aggregate = dt_aggregate_count;694break;695696default:697return (dt_set_errno(dtp, EDT_BADAGG));698}699700if (hash->dtah_hash[ndx] != NULL)701hash->dtah_hash[ndx]->dtahe_prev = h;702703h->dtahe_next = hash->dtah_hash[ndx];704hash->dtah_hash[ndx] = h;705706if (hash->dtah_all != NULL)707hash->dtah_all->dtahe_prevall = h;708709h->dtahe_nextall = hash->dtah_all;710hash->dtah_all = h;711bufnext:712offs += agg->dtagd_size;713}714715return (0);716}717718int719dtrace_aggregate_snap(dtrace_hdl_t *dtp)720{721int i, rval;722dt_aggregate_t *agp = &dtp->dt_aggregate;723hrtime_t now = gethrtime();724dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_AGGRATE];725726if (dtp->dt_lastagg != 0) {727if (now - dtp->dt_lastagg < interval)728return (0);729730dtp->dt_lastagg += interval;731} else {732dtp->dt_lastagg = now;733}734735if (!dtp->dt_active)736return (dt_set_errno(dtp, EINVAL));737738if (agp->dtat_buf.dtbd_size == 0)739return (0);740741for (i = 0; i < agp->dtat_ncpus; i++) {742if ((rval = dt_aggregate_snap_cpu(dtp, agp->dtat_cpus[i])))743return (rval);744}745746return (0);747}748749static int750dt_aggregate_hashcmp(const void *lhs, const void *rhs)751{752dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);753dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);754dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc;755dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc;756757if (lagg->dtagd_nrecs < ragg->dtagd_nrecs)758return (DT_LESSTHAN);759760if (lagg->dtagd_nrecs > ragg->dtagd_nrecs)761return (DT_GREATERTHAN);762763return (0);764}765766static int767dt_aggregate_varcmp(const void *lhs, const void *rhs)768{769dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);770dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);771dtrace_aggvarid_t lid, rid;772773lid = dt_aggregate_aggvarid(lh);774rid = dt_aggregate_aggvarid(rh);775776if (lid < rid)777return (DT_LESSTHAN);778779if (lid > rid)780return (DT_GREATERTHAN);781782return (0);783}784785static int786dt_aggregate_keycmp(const void *lhs, const void *rhs)787{788dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);789dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);790dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc;791dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc;792dtrace_recdesc_t *lrec, *rrec;793char *ldata, *rdata;794int rval, i, j, keypos, nrecs;795796if ((rval = dt_aggregate_hashcmp(lhs, rhs)) != 0)797return (rval);798799nrecs = lagg->dtagd_nrecs - 1;800assert(nrecs == ragg->dtagd_nrecs - 1);801802keypos = dt_keypos + 1 >= nrecs ? 0 : dt_keypos;803804for (i = 1; i < nrecs; i++) {805uint64_t lval, rval;806int ndx = i + keypos;807808if (ndx >= nrecs)809ndx = ndx - nrecs + 1;810811lrec = &lagg->dtagd_rec[ndx];812rrec = &ragg->dtagd_rec[ndx];813814ldata = lh->dtahe_data.dtada_data + lrec->dtrd_offset;815rdata = rh->dtahe_data.dtada_data + rrec->dtrd_offset;816817if (lrec->dtrd_size < rrec->dtrd_size)818return (DT_LESSTHAN);819820if (lrec->dtrd_size > rrec->dtrd_size)821return (DT_GREATERTHAN);822823switch (lrec->dtrd_size) {824case sizeof (uint64_t):825/* LINTED - alignment */826lval = *((uint64_t *)ldata);827/* LINTED - alignment */828rval = *((uint64_t *)rdata);829break;830831case sizeof (uint32_t):832/* LINTED - alignment */833lval = *((uint32_t *)ldata);834/* LINTED - alignment */835rval = *((uint32_t *)rdata);836break;837838case sizeof (uint16_t):839/* LINTED - alignment */840lval = *((uint16_t *)ldata);841/* LINTED - alignment */842rval = *((uint16_t *)rdata);843break;844845case sizeof (uint8_t):846lval = *((uint8_t *)ldata);847rval = *((uint8_t *)rdata);848break;849850default:851switch (lrec->dtrd_action) {852case DTRACEACT_UMOD:853case DTRACEACT_UADDR:854case DTRACEACT_USYM:855for (j = 0; j < 2; j++) {856/* LINTED - alignment */857lval = ((uint64_t *)ldata)[j];858/* LINTED - alignment */859rval = ((uint64_t *)rdata)[j];860861if (lval < rval)862return (DT_LESSTHAN);863864if (lval > rval)865return (DT_GREATERTHAN);866}867868break;869870default:871for (j = 0; j < lrec->dtrd_size; j++) {872lval = ((uint8_t *)ldata)[j];873rval = ((uint8_t *)rdata)[j];874875if (lval < rval)876return (DT_LESSTHAN);877878if (lval > rval)879return (DT_GREATERTHAN);880}881}882883continue;884}885886if (lval < rval)887return (DT_LESSTHAN);888889if (lval > rval)890return (DT_GREATERTHAN);891}892893return (0);894}895896static int897dt_aggregate_valcmp(const void *lhs, const void *rhs)898{899dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);900dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);901dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc;902dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc;903caddr_t ldata = lh->dtahe_data.dtada_data;904caddr_t rdata = rh->dtahe_data.dtada_data;905dtrace_recdesc_t *lrec, *rrec;906int64_t *laddr, *raddr;907int rval;908909assert(lagg->dtagd_nrecs == ragg->dtagd_nrecs);910911lrec = &lagg->dtagd_rec[lagg->dtagd_nrecs - 1];912rrec = &ragg->dtagd_rec[ragg->dtagd_nrecs - 1];913914assert(lrec->dtrd_action == rrec->dtrd_action);915916laddr = (int64_t *)(uintptr_t)(ldata + lrec->dtrd_offset);917raddr = (int64_t *)(uintptr_t)(rdata + rrec->dtrd_offset);918919switch (lrec->dtrd_action) {920case DTRACEAGG_AVG:921rval = dt_aggregate_averagecmp(laddr, raddr);922break;923924case DTRACEAGG_STDDEV:925rval = dt_aggregate_stddevcmp(laddr, raddr);926break;927928case DTRACEAGG_QUANTIZE:929rval = dt_aggregate_quantizedcmp(laddr, raddr);930break;931932case DTRACEAGG_LQUANTIZE:933rval = dt_aggregate_lquantizedcmp(laddr, raddr);934break;935936case DTRACEAGG_LLQUANTIZE:937rval = dt_aggregate_llquantizedcmp(laddr, raddr);938break;939940case DTRACEAGG_COUNT:941case DTRACEAGG_SUM:942case DTRACEAGG_MIN:943case DTRACEAGG_MAX:944rval = dt_aggregate_countcmp(laddr, raddr);945break;946947default:948assert(0);949}950951return (rval);952}953954static int955dt_aggregate_valkeycmp(const void *lhs, const void *rhs)956{957int rval;958959if ((rval = dt_aggregate_valcmp(lhs, rhs)) != 0)960return (rval);961962/*963* If we're here, the values for the two aggregation elements are964* equal. We already know that the key layout is the same for the two965* elements; we must now compare the keys themselves as a tie-breaker.966*/967return (dt_aggregate_keycmp(lhs, rhs));968}969970static int971dt_aggregate_keyvarcmp(const void *lhs, const void *rhs)972{973int rval;974975if ((rval = dt_aggregate_keycmp(lhs, rhs)) != 0)976return (rval);977978return (dt_aggregate_varcmp(lhs, rhs));979}980981static int982dt_aggregate_varkeycmp(const void *lhs, const void *rhs)983{984int rval;985986if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0)987return (rval);988989return (dt_aggregate_keycmp(lhs, rhs));990}991992static int993dt_aggregate_valvarcmp(const void *lhs, const void *rhs)994{995int rval;996997if ((rval = dt_aggregate_valkeycmp(lhs, rhs)) != 0)998return (rval);9991000return (dt_aggregate_varcmp(lhs, rhs));1001}10021003static int1004dt_aggregate_varvalcmp(const void *lhs, const void *rhs)1005{1006int rval;10071008if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0)1009return (rval);10101011return (dt_aggregate_valkeycmp(lhs, rhs));1012}10131014static int1015dt_aggregate_keyvarrevcmp(const void *lhs, const void *rhs)1016{1017return (dt_aggregate_keyvarcmp(rhs, lhs));1018}10191020static int1021dt_aggregate_varkeyrevcmp(const void *lhs, const void *rhs)1022{1023return (dt_aggregate_varkeycmp(rhs, lhs));1024}10251026static int1027dt_aggregate_valvarrevcmp(const void *lhs, const void *rhs)1028{1029return (dt_aggregate_valvarcmp(rhs, lhs));1030}10311032static int1033dt_aggregate_varvalrevcmp(const void *lhs, const void *rhs)1034{1035return (dt_aggregate_varvalcmp(rhs, lhs));1036}10371038static int1039dt_aggregate_bundlecmp(const void *lhs, const void *rhs)1040{1041dt_ahashent_t **lh = *((dt_ahashent_t ***)lhs);1042dt_ahashent_t **rh = *((dt_ahashent_t ***)rhs);1043int i, rval;10441045if (dt_keysort) {1046/*1047* If we're sorting on keys, we need to scan until we find the1048* last entry -- that's the representative key. (The order of1049* the bundle is values followed by key to accommodate the1050* default behavior of sorting by value.) If the keys are1051* equal, we'll fall into the value comparison loop, below.1052*/1053for (i = 0; lh[i + 1] != NULL; i++)1054continue;10551056assert(i != 0);1057assert(rh[i + 1] == NULL);10581059if ((rval = dt_aggregate_keycmp(&lh[i], &rh[i])) != 0)1060return (rval);1061}10621063for (i = 0; ; i++) {1064if (lh[i + 1] == NULL) {1065/*1066* All of the values are equal; if we're sorting on1067* keys, then we're only here because the keys were1068* found to be equal and these records are therefore1069* equal. If we're not sorting on keys, we'll use the1070* key comparison from the representative key as the1071* tie-breaker.1072*/1073if (dt_keysort)1074return (0);10751076assert(i != 0);1077assert(rh[i + 1] == NULL);1078return (dt_aggregate_keycmp(&lh[i], &rh[i]));1079} else {1080if ((rval = dt_aggregate_valcmp(&lh[i], &rh[i])) != 0)1081return (rval);1082}1083}1084}10851086int1087dt_aggregate_go(dtrace_hdl_t *dtp)1088{1089dt_aggregate_t *agp = &dtp->dt_aggregate;1090dtrace_optval_t size, cpu;1091dtrace_bufdesc_t *buf = &agp->dtat_buf;1092int rval, i;10931094assert(agp->dtat_maxcpu == 0);1095assert(agp->dtat_ncpu == 0);1096assert(agp->dtat_cpus == NULL);10971098agp->dtat_maxcpu = dt_cpu_maxid(dtp) + 1;1099if (agp->dtat_maxcpu <= 0)1100return (-1);1101agp->dtat_ncpu = dt_sysconf(dtp, _SC_NPROCESSORS_CONF);1102agp->dtat_cpus = malloc(agp->dtat_ncpu * sizeof (processorid_t));11031104if (agp->dtat_cpus == NULL)1105return (dt_set_errno(dtp, EDT_NOMEM));11061107/*1108* Use the aggregation buffer size as reloaded from the kernel.1109*/1110size = dtp->dt_options[DTRACEOPT_AGGSIZE];11111112rval = dtrace_getopt(dtp, "aggsize", &size);1113assert(rval == 0);11141115if (size == 0 || size == DTRACEOPT_UNSET)1116return (0);11171118buf = &agp->dtat_buf;1119buf->dtbd_size = size;11201121if ((buf->dtbd_data = malloc(buf->dtbd_size)) == NULL)1122return (dt_set_errno(dtp, EDT_NOMEM));11231124/*1125* Now query for the CPUs enabled.1126*/1127rval = dtrace_getopt(dtp, "cpu", &cpu);1128assert(rval == 0 && cpu != DTRACEOPT_UNSET);11291130if (cpu != DTRACE_CPUALL) {1131assert(cpu < agp->dtat_ncpu);1132agp->dtat_cpus[agp->dtat_ncpus++] = (processorid_t)cpu;11331134return (0);1135}11361137agp->dtat_ncpus = 0;1138for (i = 0; i < agp->dtat_maxcpu; i++) {1139if (dt_status(dtp, i) == -1)1140continue;11411142agp->dtat_cpus[agp->dtat_ncpus++] = i;1143}11441145return (0);1146}11471148static int1149dt_aggwalk_rval(dtrace_hdl_t *dtp, dt_ahashent_t *h, int rval)1150{1151dt_aggregate_t *agp = &dtp->dt_aggregate;1152dtrace_aggdata_t *data;1153dtrace_aggdesc_t *aggdesc;1154dtrace_recdesc_t *rec;1155int i;11561157switch (rval) {1158case DTRACE_AGGWALK_NEXT:1159break;11601161case DTRACE_AGGWALK_CLEAR: {1162uint32_t size, offs = 0;11631164aggdesc = h->dtahe_data.dtada_desc;1165rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];1166size = rec->dtrd_size;1167data = &h->dtahe_data;11681169if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) {1170offs = sizeof (uint64_t);1171size -= sizeof (uint64_t);1172}11731174bzero(&data->dtada_data[rec->dtrd_offset] + offs, size);11751176if (data->dtada_percpu == NULL)1177break;11781179for (i = 0; i < dtp->dt_aggregate.dtat_maxcpu; i++)1180bzero(data->dtada_percpu[i] + offs, size);1181break;1182}11831184case DTRACE_AGGWALK_ERROR:1185/*1186* We assume that errno is already set in this case.1187*/1188return (dt_set_errno(dtp, errno));11891190case DTRACE_AGGWALK_ABORT:1191return (dt_set_errno(dtp, EDT_DIRABORT));11921193case DTRACE_AGGWALK_DENORMALIZE:1194h->dtahe_data.dtada_normal = 1;1195return (0);11961197case DTRACE_AGGWALK_NORMALIZE:1198if (h->dtahe_data.dtada_normal == 0) {1199h->dtahe_data.dtada_normal = 1;1200return (dt_set_errno(dtp, EDT_BADRVAL));1201}12021203return (0);12041205case DTRACE_AGGWALK_REMOVE: {1206dtrace_aggdata_t *aggdata = &h->dtahe_data;1207int max_cpus = agp->dtat_maxcpu;12081209/*1210* First, remove this hash entry from its hash chain.1211*/1212if (h->dtahe_prev != NULL) {1213h->dtahe_prev->dtahe_next = h->dtahe_next;1214} else {1215dt_ahash_t *hash = &agp->dtat_hash;1216size_t ndx = h->dtahe_hashval % hash->dtah_size;12171218assert(hash->dtah_hash[ndx] == h);1219hash->dtah_hash[ndx] = h->dtahe_next;1220}12211222if (h->dtahe_next != NULL)1223h->dtahe_next->dtahe_prev = h->dtahe_prev;12241225/*1226* Now remove it from the list of all hash entries.1227*/1228if (h->dtahe_prevall != NULL) {1229h->dtahe_prevall->dtahe_nextall = h->dtahe_nextall;1230} else {1231dt_ahash_t *hash = &agp->dtat_hash;12321233assert(hash->dtah_all == h);1234hash->dtah_all = h->dtahe_nextall;1235}12361237if (h->dtahe_nextall != NULL)1238h->dtahe_nextall->dtahe_prevall = h->dtahe_prevall;12391240/*1241* We're unlinked. We can safely destroy the data.1242*/1243if (aggdata->dtada_percpu != NULL) {1244for (i = 0; i < max_cpus; i++)1245free(aggdata->dtada_percpu[i]);1246free(aggdata->dtada_percpu);1247}12481249free(aggdata->dtada_data);1250free(h);12511252return (0);1253}12541255default:1256return (dt_set_errno(dtp, EDT_BADRVAL));1257}12581259return (0);1260}12611262void1263dt_aggregate_qsort(dtrace_hdl_t *dtp, void *base, size_t nel, size_t width,1264int (*compar)(const void *, const void *))1265{1266int rev = dt_revsort, key = dt_keysort, keypos = dt_keypos;1267dtrace_optval_t keyposopt = dtp->dt_options[DTRACEOPT_AGGSORTKEYPOS];12681269dt_revsort = (dtp->dt_options[DTRACEOPT_AGGSORTREV] != DTRACEOPT_UNSET);1270dt_keysort = (dtp->dt_options[DTRACEOPT_AGGSORTKEY] != DTRACEOPT_UNSET);12711272if (keyposopt != DTRACEOPT_UNSET && keyposopt <= INT_MAX) {1273dt_keypos = (int)keyposopt;1274} else {1275dt_keypos = 0;1276}12771278if (compar == NULL) {1279if (!dt_keysort) {1280compar = dt_aggregate_varvalcmp;1281} else {1282compar = dt_aggregate_varkeycmp;1283}1284}12851286qsort(base, nel, width, compar);12871288dt_revsort = rev;1289dt_keysort = key;1290dt_keypos = keypos;1291}12921293int1294dtrace_aggregate_walk(dtrace_hdl_t *dtp, dtrace_aggregate_f *func, void *arg)1295{1296dt_ahashent_t *h, *next;1297dt_ahash_t *hash = &dtp->dt_aggregate.dtat_hash;12981299for (h = hash->dtah_all; h != NULL; h = next) {1300/*1301* dt_aggwalk_rval() can potentially remove the current hash1302* entry; we need to load the next hash entry before calling1303* into it.1304*/1305next = h->dtahe_nextall;13061307if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1)1308return (-1);1309}13101311return (0);1312}13131314static int1315dt_aggregate_total(dtrace_hdl_t *dtp, boolean_t clear)1316{1317dt_ahashent_t *h;1318dtrace_aggdata_t **total;1319dtrace_aggid_t max = DTRACE_AGGVARIDNONE, id;1320dt_aggregate_t *agp = &dtp->dt_aggregate;1321dt_ahash_t *hash = &agp->dtat_hash;1322uint32_t tflags;13231324tflags = DTRACE_A_TOTAL | DTRACE_A_HASNEGATIVES | DTRACE_A_HASPOSITIVES;13251326/*1327* If we need to deliver per-aggregation totals, we're going to take1328* three passes over the aggregate: one to clear everything out and1329* determine our maximum aggregation ID, one to actually total1330* everything up, and a final pass to assign the totals to the1331* individual elements.1332*/1333for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {1334dtrace_aggdata_t *aggdata = &h->dtahe_data;13351336if ((id = dt_aggregate_aggvarid(h)) > max)1337max = id;13381339aggdata->dtada_total = 0;1340aggdata->dtada_flags &= ~tflags;1341}13421343if (clear || max == DTRACE_AGGVARIDNONE)1344return (0);13451346total = dt_zalloc(dtp, (max + 1) * sizeof (dtrace_aggdata_t *));13471348if (total == NULL)1349return (-1);13501351for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {1352dtrace_aggdata_t *aggdata = &h->dtahe_data;1353dtrace_aggdesc_t *agg = aggdata->dtada_desc;1354dtrace_recdesc_t *rec;1355caddr_t data;1356int64_t val, *addr;13571358rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];1359data = aggdata->dtada_data;1360addr = (int64_t *)(uintptr_t)(data + rec->dtrd_offset);13611362switch (rec->dtrd_action) {1363case DTRACEAGG_STDDEV:1364val = dt_stddev((uint64_t *)addr, 1);1365break;13661367case DTRACEAGG_SUM:1368case DTRACEAGG_COUNT:1369val = *addr;1370break;13711372case DTRACEAGG_AVG:1373val = addr[0] ? (addr[1] / addr[0]) : 0;1374break;13751376default:1377continue;1378}13791380if (total[agg->dtagd_varid] == NULL) {1381total[agg->dtagd_varid] = aggdata;1382aggdata->dtada_flags |= DTRACE_A_TOTAL;1383} else {1384aggdata = total[agg->dtagd_varid];1385}13861387if (val > 0)1388aggdata->dtada_flags |= DTRACE_A_HASPOSITIVES;13891390if (val < 0) {1391aggdata->dtada_flags |= DTRACE_A_HASNEGATIVES;1392val = -val;1393}13941395if (dtp->dt_options[DTRACEOPT_AGGZOOM] != DTRACEOPT_UNSET) {1396val = (int64_t)((long double)val *1397(1 / DTRACE_AGGZOOM_MAX));13981399if (val > aggdata->dtada_total)1400aggdata->dtada_total = val;1401} else {1402aggdata->dtada_total += val;1403}1404}14051406/*1407* And now one final pass to set everyone's total.1408*/1409for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {1410dtrace_aggdata_t *aggdata = &h->dtahe_data, *t;1411dtrace_aggdesc_t *agg = aggdata->dtada_desc;14121413if ((t = total[agg->dtagd_varid]) == NULL || aggdata == t)1414continue;14151416aggdata->dtada_total = t->dtada_total;1417aggdata->dtada_flags |= (t->dtada_flags & tflags);1418}14191420dt_free(dtp, total);14211422return (0);1423}14241425static int1426dt_aggregate_minmaxbin(dtrace_hdl_t *dtp, boolean_t clear)1427{1428dt_ahashent_t *h;1429dtrace_aggdata_t **minmax;1430dtrace_aggid_t max = DTRACE_AGGVARIDNONE, id;1431dt_aggregate_t *agp = &dtp->dt_aggregate;1432dt_ahash_t *hash = &agp->dtat_hash;14331434for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {1435dtrace_aggdata_t *aggdata = &h->dtahe_data;14361437if ((id = dt_aggregate_aggvarid(h)) > max)1438max = id;14391440aggdata->dtada_minbin = 0;1441aggdata->dtada_maxbin = 0;1442aggdata->dtada_flags &= ~DTRACE_A_MINMAXBIN;1443}14441445if (clear || max == DTRACE_AGGVARIDNONE)1446return (0);14471448minmax = dt_zalloc(dtp, (max + 1) * sizeof (dtrace_aggdata_t *));14491450if (minmax == NULL)1451return (-1);14521453for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {1454dtrace_aggdata_t *aggdata = &h->dtahe_data;1455dtrace_aggdesc_t *agg = aggdata->dtada_desc;1456dtrace_recdesc_t *rec;1457caddr_t data;1458int64_t *addr;1459int minbin = -1, maxbin = -1, i;1460int start = 0, size;14611462rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];1463size = rec->dtrd_size / sizeof (int64_t);1464data = aggdata->dtada_data;1465addr = (int64_t *)(uintptr_t)(data + rec->dtrd_offset);14661467switch (rec->dtrd_action) {1468case DTRACEAGG_LQUANTIZE:1469/*1470* For lquantize(), we always display the entire range1471* of the aggregation when aggpack is set.1472*/1473start = 1;1474minbin = start;1475maxbin = size - 1 - start;1476break;14771478case DTRACEAGG_QUANTIZE:1479for (i = start; i < size; i++) {1480if (!addr[i])1481continue;14821483if (minbin == -1)1484minbin = i - start;14851486maxbin = i - start;1487}14881489if (minbin == -1) {1490/*1491* If we have no data (e.g., due to a clear()1492* or negative increments), we'll use the1493* zero bucket as both our min and max.1494*/1495minbin = maxbin = DTRACE_QUANTIZE_ZEROBUCKET;1496}14971498break;14991500default:1501continue;1502}15031504if (minmax[agg->dtagd_varid] == NULL) {1505minmax[agg->dtagd_varid] = aggdata;1506aggdata->dtada_flags |= DTRACE_A_MINMAXBIN;1507aggdata->dtada_minbin = minbin;1508aggdata->dtada_maxbin = maxbin;1509continue;1510}15111512if (minbin < minmax[agg->dtagd_varid]->dtada_minbin)1513minmax[agg->dtagd_varid]->dtada_minbin = minbin;15141515if (maxbin > minmax[agg->dtagd_varid]->dtada_maxbin)1516minmax[agg->dtagd_varid]->dtada_maxbin = maxbin;1517}15181519/*1520* And now one final pass to set everyone's minbin and maxbin.1521*/1522for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {1523dtrace_aggdata_t *aggdata = &h->dtahe_data, *mm;1524dtrace_aggdesc_t *agg = aggdata->dtada_desc;15251526if ((mm = minmax[agg->dtagd_varid]) == NULL || aggdata == mm)1527continue;15281529aggdata->dtada_minbin = mm->dtada_minbin;1530aggdata->dtada_maxbin = mm->dtada_maxbin;1531aggdata->dtada_flags |= DTRACE_A_MINMAXBIN;1532}15331534dt_free(dtp, minmax);15351536return (0);1537}15381539static int1540dt_aggregate_walk_sorted(dtrace_hdl_t *dtp,1541dtrace_aggregate_f *func, void *arg,1542int (*sfunc)(const void *, const void *))1543{1544dt_aggregate_t *agp = &dtp->dt_aggregate;1545dt_ahashent_t *h, **sorted;1546dt_ahash_t *hash = &agp->dtat_hash;1547size_t i, nentries = 0;1548int rval = -1;15491550agp->dtat_flags &= ~(DTRACE_A_TOTAL | DTRACE_A_MINMAXBIN);15511552if (dtp->dt_options[DTRACEOPT_AGGHIST] != DTRACEOPT_UNSET) {1553agp->dtat_flags |= DTRACE_A_TOTAL;15541555if (dt_aggregate_total(dtp, B_FALSE) != 0)1556return (-1);1557}15581559if (dtp->dt_options[DTRACEOPT_AGGPACK] != DTRACEOPT_UNSET) {1560agp->dtat_flags |= DTRACE_A_MINMAXBIN;15611562if (dt_aggregate_minmaxbin(dtp, B_FALSE) != 0)1563return (-1);1564}15651566for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall)1567nentries++;15681569sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *));15701571if (sorted == NULL)1572goto out;15731574for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall)1575sorted[i++] = h;15761577(void) pthread_mutex_lock(&dt_qsort_lock);15781579if (sfunc == NULL) {1580dt_aggregate_qsort(dtp, sorted, nentries,1581sizeof (dt_ahashent_t *), NULL);1582} else {1583/*1584* If we've been explicitly passed a sorting function,1585* we'll use that -- ignoring the values of the "aggsortrev",1586* "aggsortkey" and "aggsortkeypos" options.1587*/1588qsort(sorted, nentries, sizeof (dt_ahashent_t *), sfunc);1589}15901591(void) pthread_mutex_unlock(&dt_qsort_lock);15921593for (i = 0; i < nentries; i++) {1594h = sorted[i];15951596if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1)1597goto out;1598}15991600rval = 0;1601out:1602if (agp->dtat_flags & DTRACE_A_TOTAL)1603(void) dt_aggregate_total(dtp, B_TRUE);16041605if (agp->dtat_flags & DTRACE_A_MINMAXBIN)1606(void) dt_aggregate_minmaxbin(dtp, B_TRUE);16071608dt_free(dtp, sorted);1609return (rval);1610}16111612int1613dtrace_aggregate_walk_sorted(dtrace_hdl_t *dtp,1614dtrace_aggregate_f *func, void *arg)1615{1616return (dt_aggregate_walk_sorted(dtp, func, arg, NULL));1617}16181619int1620dtrace_aggregate_walk_keysorted(dtrace_hdl_t *dtp,1621dtrace_aggregate_f *func, void *arg)1622{1623return (dt_aggregate_walk_sorted(dtp, func,1624arg, dt_aggregate_varkeycmp));1625}16261627int1628dtrace_aggregate_walk_valsorted(dtrace_hdl_t *dtp,1629dtrace_aggregate_f *func, void *arg)1630{1631return (dt_aggregate_walk_sorted(dtp, func,1632arg, dt_aggregate_varvalcmp));1633}16341635int1636dtrace_aggregate_walk_keyvarsorted(dtrace_hdl_t *dtp,1637dtrace_aggregate_f *func, void *arg)1638{1639return (dt_aggregate_walk_sorted(dtp, func,1640arg, dt_aggregate_keyvarcmp));1641}16421643int1644dtrace_aggregate_walk_valvarsorted(dtrace_hdl_t *dtp,1645dtrace_aggregate_f *func, void *arg)1646{1647return (dt_aggregate_walk_sorted(dtp, func,1648arg, dt_aggregate_valvarcmp));1649}16501651int1652dtrace_aggregate_walk_keyrevsorted(dtrace_hdl_t *dtp,1653dtrace_aggregate_f *func, void *arg)1654{1655return (dt_aggregate_walk_sorted(dtp, func,1656arg, dt_aggregate_varkeyrevcmp));1657}16581659int1660dtrace_aggregate_walk_valrevsorted(dtrace_hdl_t *dtp,1661dtrace_aggregate_f *func, void *arg)1662{1663return (dt_aggregate_walk_sorted(dtp, func,1664arg, dt_aggregate_varvalrevcmp));1665}16661667int1668dtrace_aggregate_walk_keyvarrevsorted(dtrace_hdl_t *dtp,1669dtrace_aggregate_f *func, void *arg)1670{1671return (dt_aggregate_walk_sorted(dtp, func,1672arg, dt_aggregate_keyvarrevcmp));1673}16741675int1676dtrace_aggregate_walk_valvarrevsorted(dtrace_hdl_t *dtp,1677dtrace_aggregate_f *func, void *arg)1678{1679return (dt_aggregate_walk_sorted(dtp, func,1680arg, dt_aggregate_valvarrevcmp));1681}16821683int1684dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggvarid_t *aggvars,1685int naggvars, dtrace_aggregate_walk_joined_f *func, void *arg)1686{1687dt_aggregate_t *agp = &dtp->dt_aggregate;1688dt_ahashent_t *h, **sorted = NULL, ***bundle, **nbundle;1689const dtrace_aggdata_t **data;1690dt_ahashent_t *zaggdata = NULL;1691dt_ahash_t *hash = &agp->dtat_hash;1692size_t nentries = 0, nbundles = 0, start, zsize = 0, bundlesize;1693dtrace_aggvarid_t max = 0, aggvar;1694int rval = -1, *map, *remap = NULL;1695int i, j;1696dtrace_optval_t sortpos = dtp->dt_options[DTRACEOPT_AGGSORTPOS];16971698/*1699* If the sorting position is greater than the number of aggregation1700* variable IDs, we silently set it to 0.1701*/1702if (sortpos == DTRACEOPT_UNSET || sortpos >= naggvars)1703sortpos = 0;17041705/*1706* First we need to translate the specified aggregation variable IDs1707* into a linear map that will allow us to translate an aggregation1708* variable ID into its position in the specified aggvars.1709*/1710for (i = 0; i < naggvars; i++) {1711if (aggvars[i] == DTRACE_AGGVARIDNONE || aggvars[i] < 0)1712return (dt_set_errno(dtp, EDT_BADAGGVAR));17131714if (aggvars[i] > max)1715max = aggvars[i];1716}17171718if ((map = dt_zalloc(dtp, (max + 1) * sizeof (int))) == NULL)1719return (-1);17201721zaggdata = dt_zalloc(dtp, naggvars * sizeof (dt_ahashent_t));17221723if (zaggdata == NULL)1724goto out;17251726for (i = 0; i < naggvars; i++) {1727int ndx = i + sortpos;17281729if (ndx >= naggvars)1730ndx -= naggvars;17311732aggvar = aggvars[ndx];1733assert(aggvar <= max);17341735if (map[aggvar]) {1736/*1737* We have an aggregation variable that is present1738* more than once in the array of aggregation1739* variables. While it's unclear why one might want1740* to do this, it's legal. To support this construct,1741* we will allocate a remap that will indicate the1742* position from which this aggregation variable1743* should be pulled. (That is, where the remap will1744* map from one position to another.)1745*/1746if (remap == NULL) {1747remap = dt_zalloc(dtp, naggvars * sizeof (int));17481749if (remap == NULL)1750goto out;1751}17521753/*1754* Given that the variable is already present, assert1755* that following through the mapping and adjusting1756* for the sort position yields the same aggregation1757* variable ID.1758*/1759assert(aggvars[(map[aggvar] - 1 + sortpos) %1760naggvars] == aggvars[ndx]);17611762remap[i] = map[aggvar];1763continue;1764}17651766map[aggvar] = i + 1;1767}17681769/*1770* We need to take two passes over the data to size our allocation, so1771* we'll use the first pass to also fill in the zero-filled data to be1772* used to properly format a zero-valued aggregation.1773*/1774for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {1775dtrace_aggvarid_t id;1776int ndx;17771778if ((id = dt_aggregate_aggvarid(h)) > max || !(ndx = map[id]))1779continue;17801781if (zaggdata[ndx - 1].dtahe_size == 0) {1782zaggdata[ndx - 1].dtahe_size = h->dtahe_size;1783zaggdata[ndx - 1].dtahe_data = h->dtahe_data;1784}17851786nentries++;1787}17881789if (nentries == 0) {1790/*1791* We couldn't find any entries; there is nothing else to do.1792*/1793rval = 0;1794goto out;1795}17961797/*1798* Before we sort the data, we're going to look for any holes in our1799* zero-filled data. This will occur if an aggregation variable that1800* we are being asked to print has not yet been assigned the result of1801* any aggregating action for _any_ tuple. The issue becomes that we1802* would like a zero value to be printed for all columns for this1803* aggregation, but without any record description, we don't know the1804* aggregating action that corresponds to the aggregation variable. To1805* try to find a match, we're simply going to lookup aggregation IDs1806* (which are guaranteed to be contiguous and to start from 1), looking1807* for the specified aggregation variable ID. If we find a match,1808* we'll use that. If we iterate over all aggregation IDs and don't1809* find a match, then we must be an anonymous enabling. (Anonymous1810* enablings can't currently derive either aggregation variable IDs or1811* aggregation variable names given only an aggregation ID.) In this1812* obscure case (anonymous enabling, multiple aggregation printa() with1813* some aggregations not represented for any tuple), our defined1814* behavior is that the zero will be printed in the format of the first1815* aggregation variable that contains any non-zero value.1816*/1817for (i = 0; i < naggvars; i++) {1818if (zaggdata[i].dtahe_size == 0) {1819dtrace_aggvarid_t aggvar;18201821aggvar = aggvars[(i - sortpos + naggvars) % naggvars];1822assert(zaggdata[i].dtahe_data.dtada_data == NULL);18231824for (j = DTRACE_AGGIDNONE + 1; ; j++) {1825dtrace_aggdesc_t *agg;1826dtrace_aggdata_t *aggdata;18271828if (dt_aggid_lookup(dtp, j, &agg) != 0)1829break;18301831if (agg->dtagd_varid != aggvar)1832continue;18331834/*1835* We have our description -- now we need to1836* cons up the zaggdata entry for it.1837*/1838aggdata = &zaggdata[i].dtahe_data;1839aggdata->dtada_size = agg->dtagd_size;1840aggdata->dtada_desc = agg;1841aggdata->dtada_handle = dtp;1842(void) dt_epid_lookup(dtp, agg->dtagd_epid,1843&aggdata->dtada_edesc,1844&aggdata->dtada_pdesc);1845aggdata->dtada_normal = 1;1846zaggdata[i].dtahe_hashval = 0;1847zaggdata[i].dtahe_size = agg->dtagd_size;1848break;1849}18501851if (zaggdata[i].dtahe_size == 0) {1852caddr_t data;18531854/*1855* We couldn't find this aggregation, meaning1856* that we have never seen it before for any1857* tuple _and_ this is an anonymous enabling.1858* That is, we're in the obscure case outlined1859* above. In this case, our defined behavior1860* is to format the data in the format of the1861* first non-zero aggregation -- of which, of1862* course, we know there to be at least one1863* (or nentries would have been zero).1864*/1865for (j = 0; j < naggvars; j++) {1866if (zaggdata[j].dtahe_size != 0)1867break;1868}18691870assert(j < naggvars);1871zaggdata[i] = zaggdata[j];18721873data = zaggdata[i].dtahe_data.dtada_data;1874assert(data != NULL);1875}1876}1877}18781879/*1880* Now we need to allocate our zero-filled data for use for1881* aggregations that don't have a value corresponding to a given key.1882*/1883for (i = 0; i < naggvars; i++) {1884dtrace_aggdata_t *aggdata = &zaggdata[i].dtahe_data;1885dtrace_aggdesc_t *aggdesc = aggdata->dtada_desc;1886dtrace_recdesc_t *rec;1887uint64_t larg;1888caddr_t zdata;18891890zsize = zaggdata[i].dtahe_size;1891assert(zsize != 0);18921893if ((zdata = dt_zalloc(dtp, zsize)) == NULL) {1894/*1895* If we failed to allocated some zero-filled data, we1896* need to zero out the remaining dtada_data pointers1897* to prevent the wrong data from being freed below.1898*/1899for (j = i; j < naggvars; j++)1900zaggdata[j].dtahe_data.dtada_data = NULL;1901goto out;1902}19031904aggvar = aggvars[(i - sortpos + naggvars) % naggvars];19051906/*1907* First, the easy bit. To maintain compatibility with1908* consumers that pull the compiler-generated ID out of the1909* data, we put that ID at the top of the zero-filled data.1910*/1911rec = &aggdesc->dtagd_rec[0];1912/* LINTED - alignment */1913*((dtrace_aggvarid_t *)(zdata + rec->dtrd_offset)) = aggvar;19141915rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];19161917/*1918* Now for the more complicated part. If (and only if) this1919* is an lquantize() aggregating action, zero-filled data is1920* not equivalent to an empty record: we must also get the1921* parameters for the lquantize().1922*/1923if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) {1924if (aggdata->dtada_data != NULL) {1925/*1926* The easier case here is if we actually have1927* some prototype data -- in which case we1928* manually dig it out of the aggregation1929* record.1930*/1931/* LINTED - alignment */1932larg = *((uint64_t *)(aggdata->dtada_data +1933rec->dtrd_offset));1934} else {1935/*1936* We don't have any prototype data. As a1937* result, we know that we _do_ have the1938* compiler-generated information. (If this1939* were an anonymous enabling, all of our1940* zero-filled data would have prototype data1941* -- either directly or indirectly.) So as1942* gross as it is, we'll grovel around in the1943* compiler-generated information to find the1944* lquantize() parameters.1945*/1946dtrace_stmtdesc_t *sdp;1947dt_ident_t *aid;1948dt_idsig_t *isp;19491950sdp = (dtrace_stmtdesc_t *)(uintptr_t)1951aggdesc->dtagd_rec[0].dtrd_uarg;1952aid = sdp->dtsd_aggdata;1953isp = (dt_idsig_t *)aid->di_data;1954assert(isp->dis_auxinfo != 0);1955larg = isp->dis_auxinfo;1956}19571958/* LINTED - alignment */1959*((uint64_t *)(zdata + rec->dtrd_offset)) = larg;1960}19611962aggdata->dtada_data = zdata;1963}19641965/*1966* Now that we've dealt with setting up our zero-filled data, we can1967* allocate our sorted array, and take another pass over the data to1968* fill it.1969*/1970sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *));19711972if (sorted == NULL)1973goto out;19741975for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall) {1976dtrace_aggvarid_t id;19771978if ((id = dt_aggregate_aggvarid(h)) > max || !map[id])1979continue;19801981sorted[i++] = h;1982}19831984assert(i == nentries);19851986/*1987* We've loaded our array; now we need to sort by value to allow us1988* to create bundles of like value. We're going to acquire the1989* dt_qsort_lock here, and hold it across all of our subsequent1990* comparison and sorting.1991*/1992(void) pthread_mutex_lock(&dt_qsort_lock);19931994qsort(sorted, nentries, sizeof (dt_ahashent_t *),1995dt_aggregate_keyvarcmp);19961997/*1998* Now we need to go through and create bundles. Because the number1999* of bundles is bounded by the size of the sorted array, we're going2000* to reuse the underlying storage. And note that "bundle" is an2001* array of pointers to arrays of pointers to dt_ahashent_t -- making2002* its type (regrettably) "dt_ahashent_t ***". (Regrettable because2003* '*' -- like '_' and 'X' -- should never appear in triplicate in2004* an ideal world.)2005*/2006bundle = (dt_ahashent_t ***)sorted;20072008for (i = 1, start = 0; i <= nentries; i++) {2009if (i < nentries &&2010dt_aggregate_keycmp(&sorted[i], &sorted[i - 1]) == 0)2011continue;20122013/*2014* We have a bundle boundary. Everything from start to2015* (i - 1) belongs in one bundle.2016*/2017assert(i - start <= naggvars);2018bundlesize = (naggvars + 2) * sizeof (dt_ahashent_t *);20192020if ((nbundle = dt_zalloc(dtp, bundlesize)) == NULL) {2021(void) pthread_mutex_unlock(&dt_qsort_lock);2022goto out;2023}20242025for (j = start; j < i; j++) {2026dtrace_aggvarid_t id = dt_aggregate_aggvarid(sorted[j]);20272028assert(id <= max);2029assert(map[id] != 0);2030assert(map[id] - 1 < naggvars);2031assert(nbundle[map[id] - 1] == NULL);2032nbundle[map[id] - 1] = sorted[j];20332034if (nbundle[naggvars] == NULL)2035nbundle[naggvars] = sorted[j];2036}20372038for (j = 0; j < naggvars; j++) {2039if (nbundle[j] != NULL)2040continue;20412042/*2043* Before we assume that this aggregation variable2044* isn't present (and fall back to using the2045* zero-filled data allocated earlier), check the2046* remap. If we have a remapping, we'll drop it in2047* here. Note that we might be remapping an2048* aggregation variable that isn't present for this2049* key; in this case, the aggregation data that we2050* copy will point to the zeroed data.2051*/2052if (remap != NULL && remap[j]) {2053assert(remap[j] - 1 < j);2054assert(nbundle[remap[j] - 1] != NULL);2055nbundle[j] = nbundle[remap[j] - 1];2056} else {2057nbundle[j] = &zaggdata[j];2058}2059}20602061bundle[nbundles++] = nbundle;2062start = i;2063}20642065/*2066* Now we need to re-sort based on the first value.2067*/2068dt_aggregate_qsort(dtp, bundle, nbundles, sizeof (dt_ahashent_t **),2069dt_aggregate_bundlecmp);20702071(void) pthread_mutex_unlock(&dt_qsort_lock);20722073/*2074* We're done! Now we just need to go back over the sorted bundles,2075* calling the function.2076*/2077data = alloca((naggvars + 1) * sizeof (dtrace_aggdata_t *));20782079for (i = 0; i < nbundles; i++) {2080for (j = 0; j < naggvars; j++)2081data[j + 1] = NULL;20822083for (j = 0; j < naggvars; j++) {2084int ndx = j - sortpos;20852086if (ndx < 0)2087ndx += naggvars;20882089assert(bundle[i][ndx] != NULL);2090data[j + 1] = &bundle[i][ndx]->dtahe_data;2091}20922093for (j = 0; j < naggvars; j++)2094assert(data[j + 1] != NULL);20952096/*2097* The representative key is the last element in the bundle.2098* Assert that we have one, and then set it to be the first2099* element of data.2100*/2101assert(bundle[i][j] != NULL);2102data[0] = &bundle[i][j]->dtahe_data;21032104if ((rval = func(data, naggvars + 1, arg)) == -1)2105goto out;2106}21072108rval = 0;2109out:2110for (i = 0; i < nbundles; i++)2111dt_free(dtp, bundle[i]);21122113if (zaggdata != NULL) {2114for (i = 0; i < naggvars; i++)2115dt_free(dtp, zaggdata[i].dtahe_data.dtada_data);2116}21172118dt_free(dtp, zaggdata);2119dt_free(dtp, sorted);2120dt_free(dtp, remap);2121dt_free(dtp, map);21222123return (rval);2124}21252126int2127dtrace_aggregate_print(dtrace_hdl_t *dtp, FILE *fp,2128dtrace_aggregate_walk_f *func)2129{2130dt_print_aggdata_t pd;21312132bzero(&pd, sizeof (pd));21332134pd.dtpa_dtp = dtp;2135pd.dtpa_fp = fp;2136pd.dtpa_allunprint = 1;21372138if (func == NULL)2139func = dtrace_aggregate_walk_sorted;21402141if (dtp->dt_oformat) {2142if ((*func)(dtp, dt_format_agg, &pd) == -1)2143return (dt_set_errno(dtp, dtp->dt_errno));2144} else {2145if ((*func)(dtp, dt_print_agg, &pd) == -1)2146return (dt_set_errno(dtp, dtp->dt_errno));2147}21482149return (0);2150}21512152void2153dtrace_aggregate_clear(dtrace_hdl_t *dtp)2154{2155dt_aggregate_t *agp = &dtp->dt_aggregate;2156dt_ahash_t *hash = &agp->dtat_hash;2157dt_ahashent_t *h;2158dtrace_aggdata_t *data;2159dtrace_aggdesc_t *aggdesc;2160dtrace_recdesc_t *rec;2161int i, max_cpus = agp->dtat_maxcpu;21622163for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {2164aggdesc = h->dtahe_data.dtada_desc;2165rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];2166data = &h->dtahe_data;21672168bzero(&data->dtada_data[rec->dtrd_offset], rec->dtrd_size);21692170if (data->dtada_percpu == NULL)2171continue;21722173for (i = 0; i < max_cpus; i++)2174bzero(data->dtada_percpu[i], rec->dtrd_size);2175}2176}21772178void2179dt_aggregate_destroy(dtrace_hdl_t *dtp)2180{2181dt_aggregate_t *agp = &dtp->dt_aggregate;2182dt_ahash_t *hash = &agp->dtat_hash;2183dt_ahashent_t *h, *next;2184dtrace_aggdata_t *aggdata;2185int i, max_cpus = agp->dtat_maxcpu;21862187if (hash->dtah_hash == NULL) {2188assert(hash->dtah_all == NULL);2189} else {2190free(hash->dtah_hash);21912192for (h = hash->dtah_all; h != NULL; h = next) {2193next = h->dtahe_nextall;21942195aggdata = &h->dtahe_data;21962197if (aggdata->dtada_percpu != NULL) {2198for (i = 0; i < max_cpus; i++)2199free(aggdata->dtada_percpu[i]);2200free(aggdata->dtada_percpu);2201}22022203free(aggdata->dtada_data);2204free(h);2205}22062207hash->dtah_hash = NULL;2208hash->dtah_all = NULL;2209hash->dtah_size = 0;2210}22112212free(agp->dtat_buf.dtbd_data);2213free(agp->dtat_cpus);2214}221522162217