CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/Math/math_util.h
Views: 1401
#pragma once12// Some of the stuff in this file are snippets from all over the web, esp. dspmusic.org. I think it's all public domain.3// In any case, very little of it is used anywhere at the moment.45#include <cmath>6#include <cstring>7#include <cstdint>89typedef unsigned short float16;1011// This ain't a 1.5.10 float16, it's a stupid hack format where we chop 16 bits off a float.12// This choice is subject to change. Don't think I'm using this for anything at all now anyway.13// DEPRECATED14inline float16 FloatToFloat16(float x) {15int ix;16memcpy(&ix, &x, sizeof(float));17return ix >> 16;18}1920inline float Float16ToFloat(float16 ix) {21float x;22memcpy(&x, &ix, sizeof(float));23return x;24}2526inline bool isPowerOf2(int n) {27return n == 1 || (n & (n - 1)) == 0;28}2930// Next power of 2.31inline uint32_t RoundUpToPowerOf2(uint32_t v) {32v--;33v |= v >> 1;34v |= v >> 2;35v |= v >> 4;36v |= v >> 8;37v |= v >> 16;38v++;39return v;40}4142inline uint32_t RoundUpToPowerOf2(uint32_t v, uint32_t power) {43return (v + power - 1) & ~(power - 1);44}4546inline uint32_t log2i(uint32_t val) {47unsigned int ret = -1;48while (val != 0) {49val >>= 1; ret++;50}51return ret;52}5354#define PI 3.141592653589793f55#ifndef M_PI56#define M_PI 3.141592653589793f57#endif5859template<class T>60inline T clamp_value(T val, T floor, T cap) {61if (val > cap)62return cap;63else if (val < floor)64return floor;65else66return val;67}6869// Very common operation, familiar from shaders.70inline float saturatef(float x) {71if (x > 1.0f) return 1.0f;72else if (x < 0.0f) return 0.0f;73else return x;74}7576#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))77#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))7879template<class T>80inline void Clamp(T* val, const T& min, const T& max)81{82if (*val < min)83*val = min;84else if (*val > max)85*val = max;86}8788template<class T>89inline T Clamp(const T val, const T& min, const T& max)90{91T ret = val;92Clamp(&ret, min, max);93return ret;94}9596union FP32 {97uint32_t u;98float f;99};100101struct FP16 {102uint16_t u;103};104105inline bool my_isinf(float f) {106FP32 f2u;107f2u.f = f;108return f2u.u == 0x7f800000 ||109f2u.u == 0xff800000;110}111112inline bool my_isinf_u(uint32_t u) {113return u == 0x7f800000 || u == 0xff800000;114}115116inline bool my_isnan(float f) {117FP32 f2u;118f2u.f = f;119// NaNs have non-zero mantissa120return ((f2u.u & 0x7F800000) == 0x7F800000) && (f2u.u & 0x7FFFFF);121}122123inline bool my_isnanorinf(float f) {124FP32 f2u;125f2u.f = f;126// NaNs have non-zero mantissa, infs have zero mantissa. That is, we just ignore the mantissa here.127return ((f2u.u & 0x7F800000) == 0x7F800000);128}129130inline float InfToZero(float f) {131return my_isinf(f) ? 0.0f : f;132}133134inline int is_even(float d) {135float int_part;136modff(d / 2.0f, &int_part);137return 2.0f * int_part == d;138}139140// Rounds *.5 to closest even number141inline double round_ieee_754(double d) {142float i = (float)floor(d);143d -= i;144if (d < 0.5f)145return i;146if (d > 0.5f)147return i + 1.0f;148if (is_even(i))149return i;150return i + 1.0f;151}152153// magic code from ryg: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/154// See also SSE2 version: https://gist.github.com/rygorous/2144712155inline FP32 half_to_float_fast5(FP16 h)156{157static const FP32 magic = { (127 + (127 - 15)) << 23 };158static const FP32 was_infnan = { (127 + 16) << 23 };159FP32 o;160o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits161o.f *= magic.f; // exponent adjust162if (o.f >= was_infnan.f) // make sure Inf/NaN survive (retain the low bits)163o.u = (255 << 23) | (h.u & 0x03ff);164o.u |= (h.u & 0x8000) << 16; // sign bit165return o;166}167168inline float ExpandHalf(uint16_t half) {169FP16 fp16;170fp16.u = half;171FP32 fp = half_to_float_fast5(fp16);172return fp.f;173}174175// More magic code: https://gist.github.com/rygorous/2156668176inline FP16 float_to_half_fast3(FP32 f)177{178static const FP32 f32infty = { 255 << 23 };179static const FP32 f16infty = { 31 << 23 };180static const FP32 magic = { 15 << 23 };181static const uint32_t sign_mask = 0x80000000u;182static const uint32_t round_mask = ~0xfffu;183FP16 o = { 0 };184185uint32_t sign = f.u & sign_mask;186f.u ^= sign;187188if (f.u >= f32infty.u) // Inf or NaN (all exponent bits set)189o.u = (f.u > f32infty.u) ? (0x7e00 | (f.u & 0x3ff)) : 0x7c00; // NaN->qNaN and Inf->Inf190else // (De)normalized number or zero191{192f.u &= round_mask;193f.f *= magic.f;194f.u -= round_mask;195if (f.u > f16infty.u) f.u = f16infty.u; // Clamp to signed infinity if overflowed196197o.u = f.u >> 13; // Take the bits!198}199200o.u |= sign >> 16;201return o;202}203204inline uint16_t ShrinkToHalf(float full) {205FP32 fp32;206fp32.f = full;207FP16 fp = float_to_half_fast3(fp32);208return fp.u;209}210211// FPU control.212void EnableFZ();213214// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode215// where they can schedule VFP instructions on the NEON unit (these implementations have216// very slow VFP units).217// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html218void FPU_SetFastMode();219220221