Path: blob/main/sys/contrib/zstd/programs/benchfn.c
48254 views
/*1* Copyright (c) Yann Collet, Facebook, Inc.2* All rights reserved.3*4* This source code is licensed under both the BSD-style license (found in the5* LICENSE file in the root directory of this source tree) and the GPLv2 (found6* in the COPYING file in the root directory of this source tree).7* You may select, at your option, one of the above-listed licenses.8*/9101112/* *************************************13* Includes14***************************************/15#include <stdlib.h> /* malloc, free */16#include <string.h> /* memset */17#include <assert.h> /* assert */1819#include "timefn.h" /* UTIL_time_t, UTIL_getTime */20#include "benchfn.h"212223/* *************************************24* Constants25***************************************/26#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */27#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */2829#define KB *(1 <<10)30#define MB *(1 <<20)31#define GB *(1U<<30)323334/* *************************************35* Debug errors36***************************************/37#if defined(DEBUG) && (DEBUG >= 1)38# include <stdio.h> /* fprintf */39# define DISPLAY(...) fprintf(stderr, __VA_ARGS__)40# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }41#else42# define DEBUGOUTPUT(...)43#endif444546/* error without displaying */47#define RETURN_QUIET_ERROR(retValue, ...) { \48DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \49DEBUGOUTPUT("Error : "); \50DEBUGOUTPUT(__VA_ARGS__); \51DEBUGOUTPUT(" \n"); \52return retValue; \53}5455/* Abort execution if a condition is not met */56#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }575859/* *************************************60* Benchmarking an arbitrary function61***************************************/6263int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)64{65return outcome.error_tag_never_ever_use_directly == 0;66}6768/* warning : this function will stop program execution if outcome is invalid !69* check outcome validity first, using BMK_isValid_runResult() */70BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)71{72CONTROL(outcome.error_tag_never_ever_use_directly == 0);73return outcome.internal_never_ever_use_directly;74}7576size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)77{78CONTROL(outcome.error_tag_never_ever_use_directly != 0);79return outcome.error_result_never_ever_use_directly;80}8182static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)83{84BMK_runOutcome_t b;85memset(&b, 0, sizeof(b));86b.error_tag_never_ever_use_directly = 1;87b.error_result_never_ever_use_directly = errorResult;88return b;89}9091static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)92{93BMK_runOutcome_t outcome;94outcome.error_tag_never_ever_use_directly = 0;95outcome.internal_never_ever_use_directly = runTime;96return outcome;97}9899100/* initFn will be measured once, benchFn will be measured `nbLoops` times */101/* initFn is optional, provide NULL if none */102/* benchFn must return a size_t value that errorFn can interpret */103/* takes # of blocks and list of size & stuff for each. */104/* can report result of benchFn for each block into blockResult. */105/* blockResult is optional, provide NULL if this information is not required */106/* note : time per loop can be reported as zero if run time < timer resolution */107BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,108unsigned nbLoops)109{110size_t dstSize = 0;111nbLoops += !nbLoops; /* minimum nbLoops is 1 */112113/* init */114{ size_t i;115for(i = 0; i < p.blockCount; i++) {116memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */117} }118119/* benchmark */120{ UTIL_time_t const clockStart = UTIL_getTime();121unsigned loopNb, blockNb;122if (p.initFn != NULL) p.initFn(p.initPayload);123for (loopNb = 0; loopNb < nbLoops; loopNb++) {124for (blockNb = 0; blockNb < p.blockCount; blockNb++) {125size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],126p.dstBuffers[blockNb], p.dstCapacities[blockNb],127p.benchPayload);128if (loopNb == 0) {129if (p.blockResults != NULL) p.blockResults[blockNb] = res;130if ((p.errorFn != NULL) && (p.errorFn(res))) {131RETURN_QUIET_ERROR(BMK_runOutcome_error(res),132"Function benchmark failed on block %u (of size %u) with error %i",133blockNb, (unsigned)p.srcSizes[blockNb], (int)res);134}135dstSize += res;136} }137} /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */138139{ PTime const totalTime = UTIL_clockSpanNano(clockStart);140BMK_runTime_t rt;141rt.nanoSecPerRun = (double)totalTime / nbLoops;142rt.sumOfReturn = dstSize;143return BMK_setValid_runTime(rt);144} }145}146147148/* ==== Benchmarking any function, providing intermediate results ==== */149150struct BMK_timedFnState_s {151PTime timeSpent_ns;152PTime timeBudget_ns;153PTime runBudget_ns;154BMK_runTime_t fastestRun;155unsigned nbLoops;156UTIL_time_t coolTime;157}; /* typedef'd to BMK_timedFnState_t within bench.h */158159BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)160{161BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));162if (r == NULL) return NULL; /* malloc() error */163BMK_resetTimedFnState(r, total_ms, run_ms);164return r;165}166167void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }168169BMK_timedFnState_t*170BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)171{172typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */173typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */174size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */175BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;176if (buffer == NULL) return NULL;177if (size < sizeof(struct BMK_timedFnState_s)) return NULL;178if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */179BMK_resetTimedFnState(r, total_ms, run_ms);180return r;181}182183void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)184{185if (!total_ms) total_ms = 1 ;186if (!run_ms) run_ms = 1;187if (run_ms > total_ms) run_ms = total_ms;188timedFnState->timeSpent_ns = 0;189timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;190timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;191timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */192timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);193timedFnState->nbLoops = 1;194timedFnState->coolTime = UTIL_getTime();195}196197/* Tells if nb of seconds set in timedFnState for all runs is spent.198* note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */199int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)200{201return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);202}203204205#undef MIN206#define MIN(a,b) ( (a) < (b) ? (a) : (b) )207208#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */209210BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,211BMK_benchParams_t p)212{213PTime const runBudget_ns = cont->runBudget_ns;214PTime const runTimeMin_ns = runBudget_ns / 2;215int completed = 0;216BMK_runTime_t bestRunTime = cont->fastestRun;217218while (!completed) {219BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);220221if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */222return runResult;223}224225{ BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);226double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;227228cont->timeSpent_ns += (unsigned long long)loopDuration_ns;229230/* estimate nbLoops for next run to last approximately 1 second */231if (loopDuration_ns > (runBudget_ns / 50)) {232double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);233cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;234} else {235/* previous run was too short : blindly increase workload by x multiplier */236const unsigned multiplier = 10;237assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */238cont->nbLoops *= multiplier;239}240241if(loopDuration_ns < runTimeMin_ns) {242/* don't report results for which benchmark run time was too small : increased risks of rounding errors */243assert(completed == 0);244continue;245} else {246if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {247bestRunTime = newRunTime;248}249completed = 1;250}251}252} /* while (!completed) */253254return BMK_setValid_runTime(bestRunTime);255}256257258