Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Math/math_util.h
5659 views
1
#pragma once
2
3
// Some of the stuff in this file are snippets from all over the web, esp. dspmusic.org. I think it's all public domain.
4
// In any case, very little of it is used anywhere at the moment.
5
6
#include <cmath>
7
#include <cstring>
8
#include <cstdint>
9
10
inline constexpr bool isPowerOf2(int n) {
11
return n == 1 || (n & (n - 1)) == 0;
12
}
13
14
// Next power of 2 (NOTE: Not next multiple of a power of two, like the below function!)
15
inline constexpr uint32_t RoundToNextPowerOf2(uint32_t v) {
16
v--;
17
v |= v >> 1;
18
v |= v >> 2;
19
v |= v >> 4;
20
v |= v >> 8;
21
v |= v >> 16;
22
v++;
23
return v;
24
}
25
26
// NOTE: multiple must be a power of two!
27
inline constexpr uint32_t RoundUpToMultipleOf(uint32_t v, uint32_t multiple) {
28
return (v + multiple - 1) & ~(multiple - 1);
29
}
30
31
inline constexpr uint32_t RoundDownToMultipleOf(uint32_t v, uint32_t multiple) {
32
return v & ~(multiple - 1);
33
}
34
35
// TODO: this should just use a bitscan.
36
inline uint32_t log2i(uint32_t val) {
37
unsigned int ret = -1;
38
while (val != 0) {
39
val >>= 1; ret++;
40
}
41
return ret;
42
}
43
44
#define PI 3.141592653589793f
45
#ifndef M_PI
46
#define M_PI 3.141592653589793f
47
#endif
48
49
template<class T>
50
inline T clamp_value(T val, T floor, T cap) {
51
if (val > cap)
52
return cap;
53
else if (val < floor)
54
return floor;
55
else
56
return val;
57
}
58
59
// Very common operation, familiar from shaders.
60
inline float saturatef(float x) {
61
if (x > 1.0f) return 1.0f;
62
else if (x < 0.0f) return 0.0f;
63
else return x;
64
}
65
66
#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))
67
#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))
68
69
template<class T>
70
inline void Clamp(T* val, const T& min, const T& max)
71
{
72
if (*val < min)
73
*val = min;
74
else if (*val > max)
75
*val = max;
76
}
77
78
template<class T>
79
inline T Clamp(const T val, const T& min, const T& max)
80
{
81
T ret = val;
82
Clamp(&ret, min, max);
83
return ret;
84
}
85
86
union FP32 {
87
uint32_t u;
88
float f;
89
};
90
91
struct FP16 {
92
uint16_t u;
93
};
94
95
inline bool my_isinf(float f) {
96
FP32 f2u;
97
f2u.f = f;
98
return f2u.u == 0x7f800000 ||
99
f2u.u == 0xff800000;
100
}
101
102
inline bool my_isinf_u(uint32_t u) {
103
return u == 0x7f800000 || u == 0xff800000;
104
}
105
106
inline bool my_isnan(float f) {
107
FP32 f2u;
108
f2u.f = f;
109
// NaNs have non-zero mantissa
110
return ((f2u.u & 0x7F800000) == 0x7F800000) && (f2u.u & 0x7FFFFF);
111
}
112
113
inline bool my_isnanorinf(float f) {
114
FP32 f2u;
115
f2u.f = f;
116
// NaNs have non-zero mantissa, infs have zero mantissa. That is, we just ignore the mantissa here.
117
return ((f2u.u & 0x7F800000) == 0x7F800000);
118
}
119
120
inline float InfToZero(float f) {
121
return my_isinf(f) ? 0.0f : f;
122
}
123
124
inline int is_even(float d) {
125
float int_part;
126
modff(d / 2.0f, &int_part);
127
return 2.0f * int_part == d;
128
}
129
130
// Rounds *.5 to closest even number
131
inline double round_ieee_754(double d) {
132
float i = (float)floor(d);
133
d -= i;
134
if (d < 0.5f)
135
return i;
136
if (d > 0.5f)
137
return i + 1.0f;
138
if (is_even(i))
139
return i;
140
return i + 1.0f;
141
}
142
143
// magic code from ryg: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
144
// See also SSE2 version: https://gist.github.com/rygorous/2144712
145
inline FP32 half_to_float_fast5(FP16 h)
146
{
147
static const FP32 magic = { (127 + (127 - 15)) << 23 };
148
static const FP32 was_infnan = { (127 + 16) << 23 };
149
FP32 o;
150
o.u = (h.u & 0x7fff) << 13; // exponent/mantissa bits
151
o.f *= magic.f; // exponent adjust
152
if (o.f >= was_infnan.f) // make sure Inf/NaN survive (retain the low bits)
153
o.u = (255 << 23) | (h.u & 0x03ff);
154
o.u |= (h.u & 0x8000) << 16; // sign bit
155
return o;
156
}
157
158
inline float ExpandHalf(uint16_t half) {
159
FP16 fp16;
160
fp16.u = half;
161
FP32 fp = half_to_float_fast5(fp16);
162
return fp.f;
163
}
164
165
// More magic code: https://gist.github.com/rygorous/2156668
166
inline FP16 float_to_half_fast3(FP32 f)
167
{
168
static const FP32 f32infty = { 255 << 23 };
169
static const FP32 f16infty = { 31 << 23 };
170
static const FP32 magic = { 15 << 23 };
171
static const uint32_t sign_mask = 0x80000000u;
172
static const uint32_t round_mask = ~0xfffu;
173
FP16 o = { 0 };
174
175
uint32_t sign = f.u & sign_mask;
176
f.u ^= sign;
177
178
if (f.u >= f32infty.u) // Inf or NaN (all exponent bits set)
179
o.u = (f.u > f32infty.u) ? (0x7e00 | (f.u & 0x3ff)) : 0x7c00; // NaN->qNaN and Inf->Inf
180
else // (De)normalized number or zero
181
{
182
f.u &= round_mask;
183
f.f *= magic.f;
184
f.u -= round_mask;
185
if (f.u > f16infty.u) f.u = f16infty.u; // Clamp to signed infinity if overflowed
186
187
o.u = f.u >> 13; // Take the bits!
188
}
189
190
o.u |= sign >> 16;
191
return o;
192
}
193
194
inline uint16_t ShrinkToHalf(float full) {
195
FP32 fp32;
196
fp32.f = full;
197
FP16 fp = float_to_half_fast3(fp32);
198
return fp.u;
199
}
200
201
// FPU control.
202
void EnableFZ();
203
204
// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode
205
// where they can schedule VFP instructions on the NEON unit (these implementations have
206
// very slow VFP units).
207
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html
208
void FPU_SetFastMode();
209
210