Path: blob/master/thirdparty/embree/kernels/common/profile.h
9905 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "default.h"67namespace embree8{9/*! helper structure for the implementation of the profile functions below */10struct ProfileTimer11{12static const size_t N = 20;1314ProfileTimer () {}1516ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0)17{18for (size_t i=0; i<N; i++) names[i] = nullptr;19for (size_t i=0; i<N; i++) dt_fst[i] = 0.0;20for (size_t i=0; i<N; i++) dt_min[i] = pos_inf;21for (size_t i=0; i<N; i++) dt_avg[i] = 0.0;22for (size_t i=0; i<N; i++) dt_max[i] = neg_inf;23}2425__forceinline void begin()26{27j=0;28t0 = tj = getSeconds();29}3031__forceinline void end() {32absolute("total");33i++;34}3536__forceinline void operator() (const char* name) {37relative(name);38}3940__forceinline void absolute (const char* name)41{42const double t1 = getSeconds();43const double dt = t1-t0;44assert(names[j] == nullptr || names[j] == name);45names[j] = name;46if (i == 0) dt_fst[j] = dt;47if (i>=numSkip) {48dt_min[j] = min(dt_min[j],dt);49dt_avg[j] = dt_avg[j] + dt;50dt_max[j] = max(dt_max[j],dt);51}52j++;53maxJ = max(maxJ,j);54}5556__forceinline void relative (const char* name)57{58const double t1 = getSeconds();59const double dt = t1-tj;60tj = t1;61assert(names[j] == nullptr || names[j] == name);62names[j] = name;63if (i == 0) dt_fst[j] = dt;64if (i>=numSkip) {65dt_min[j] = min(dt_min[j],dt);66dt_avg[j] = dt_avg[j] + dt;67dt_max[j] = max(dt_max[j],dt);68}69j++;70maxJ = max(maxJ,j);71}7273void print(size_t numElements)74{75for (size_t k=0; k<N; k++)76dt_avg[k] /= double(i-numSkip);7778printf(" profile [M/s]:\n");79for (size_t j=0; j<maxJ; j++)80printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n",81names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6);8283printf(" profile [ms]:\n");84for (size_t j=0; j<maxJ; j++)85printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",86names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);87}8889void print()90{91printf(" profile:\n");9293for (size_t k=0; k<N; k++)94dt_avg[k] /= double(i-numSkip);9596for (size_t j=0; j<maxJ; j++) {97printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",98names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);99}100}101102double avg() {103return dt_avg[maxJ-1]/double(i-numSkip);104}105106private:107size_t i;108size_t j;109size_t maxJ;110size_t numSkip;111double t0;112double tj;113const char* names[N];114double dt_fst[N];115double dt_min[N];116double dt_avg[N];117double dt_max[N];118};119120/*! This function executes some code block multiple times and measured sections of it.121Use the following way:122123profile(1,10,1000,[&](ProfileTimer& timer) {124// code125timer("A");126// code127timer("B");128});129*/130template<typename Closure>131void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)132{133ProfileTimer timer(numSkip);134135for (size_t i=0; i<numSkip+numIter; i++)136{137timer.begin();138closure(timer);139timer.end();140}141timer.print(numElements);142}143144/*! similar as the function above, but the timer object comes externally */145template<typename Closure>146void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)147{148timer = ProfileTimer(numSkip);149150for (size_t i=0; i<numSkip+numIter; i++)151{152timer.begin();153closure(timer);154timer.end();155}156timer.print(numElements);157}158}159160161