CoCalc -- math

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Math/math_util.h
Views: ¹⁴⁰¹
1
#pragma once
2

3
// Some of the stuff in this file are snippets from all over the web, esp. dspmusic.org. I think it's all public domain.
4
// In any case, very little of it is used anywhere at the moment.
5

6
#include <cmath>
7
#include <cstring>
8
#include <cstdint>
9

10
typedef unsigned short float16;
11

12
// This ain't a 1.5.10 float16, it's a stupid hack format where we chop 16 bits off a float.
13
// This choice is subject to change. Don't think I'm using this for anything at all now anyway.
14
// DEPRECATED
15
inline float16 FloatToFloat16(float x) {
16
	int ix;
17
	memcpy(&ix, &x, sizeof(float));
18
	return ix >> 16;
19
}
20

21
inline float Float16ToFloat(float16 ix) {
22
	float x;
23
	memcpy(&x, &ix, sizeof(float));
24
	return x;
25
}
26

27
inline bool isPowerOf2(int n) {
28
	return n == 1 || (n & (n - 1)) == 0;
29
}
30

31
// Next power of 2.
32
inline uint32_t RoundUpToPowerOf2(uint32_t v) {
33
	v--;
34
	v |= v >> 1;
35
	v |= v >> 2;
36
	v |= v >> 4;
37
	v |= v >> 8;
38
	v |= v >> 16;
39
	v++;
40
	return v;
41
}
42

43
inline uint32_t RoundUpToPowerOf2(uint32_t v, uint32_t power) {
44
	return (v + power - 1) & ~(power - 1);
45
}
46

47
inline uint32_t log2i(uint32_t val) {
48
	unsigned int ret = -1;
49
	while (val != 0) {
50
		val >>= 1; ret++;
51
	}
52
	return ret;
53
}
54

55
#define PI 3.141592653589793f
56
#ifndef M_PI
57
#define M_PI 3.141592653589793f
58
#endif
59

60
template<class T>
61
inline T clamp_value(T val, T floor, T cap) {
62
	if (val > cap)
63
		return cap;
64
	else if (val < floor)
65
		return floor;
66
	else
67
		return val;
68
}
69

70
// Very common operation, familiar from shaders.
71
inline float saturatef(float x) {
72
	if (x > 1.0f) return 1.0f;
73
	else if (x < 0.0f) return 0.0f;
74
	else return x;
75
}
76

77
#define ROUND_UP(x, a)   (((x) + (a) - 1) & ~((a) - 1))
78
#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))
79

80
template<class T>
81
inline void Clamp(T* val, const T& min, const T& max)
82
{
83
	if (*val < min)
84
		*val = min;
85
	else if (*val > max)
86
		*val = max;
87
}
88

89
template<class T>
90
inline T Clamp(const T val, const T& min, const T& max)
91
{
92
	T ret = val;
93
	Clamp(&ret, min, max);
94
	return ret;
95
}
96

97
union FP32 {
98
	uint32_t u;
99
	float f;
100
};
101

102
struct FP16 {
103
	uint16_t u;
104
};
105

106
inline bool my_isinf(float f) {
107
	FP32 f2u;
108
	f2u.f = f;
109
	return f2u.u == 0x7f800000 ||
110
		f2u.u == 0xff800000;
111
}
112

113
inline bool my_isinf_u(uint32_t u) {
114
	return u == 0x7f800000 || u == 0xff800000;
115
}
116

117
inline bool my_isnan(float f) {
118
	FP32 f2u;
119
	f2u.f = f;
120
	// NaNs have non-zero mantissa
121
	return ((f2u.u & 0x7F800000) == 0x7F800000) && (f2u.u & 0x7FFFFF);
122
}
123

124
inline bool my_isnanorinf(float f) {
125
	FP32 f2u;
126
	f2u.f = f;
127
	// NaNs have non-zero mantissa, infs have zero mantissa. That is, we just ignore the mantissa here.
128
	return ((f2u.u & 0x7F800000) == 0x7F800000);
129
}
130

131
inline float InfToZero(float f) {
132
	return my_isinf(f) ? 0.0f : f;
133
}
134

135
inline int is_even(float d) {
136
	float int_part;
137
	modff(d / 2.0f, &int_part);
138
	return 2.0f * int_part == d;
139
}
140

141
// Rounds *.5 to closest even number
142
inline double round_ieee_754(double d) {
143
	float i = (float)floor(d);
144
	d -= i;
145
	if (d < 0.5f)
146
		return i;
147
	if (d > 0.5f)
148
		return i + 1.0f;
149
	if (is_even(i))
150
		return i;
151
	return i + 1.0f;
152
}
153

154
// magic code from ryg: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
155
// See also SSE2 version: https://gist.github.com/rygorous/2144712
156
inline FP32 half_to_float_fast5(FP16 h)
157
{
158
	static const FP32 magic = { (127 + (127 - 15)) << 23 };
159
	static const FP32 was_infnan = { (127 + 16) << 23 };
160
	FP32 o;
161
	o.u = (h.u & 0x7fff) << 13;     // exponent/mantissa bits
162
	o.f *= magic.f;                 // exponent adjust
163
	if (o.f >= was_infnan.f)        // make sure Inf/NaN survive (retain the low bits)
164
		o.u = (255 << 23) | (h.u & 0x03ff);
165
	o.u |= (h.u & 0x8000) << 16;    // sign bit
166
	return o;
167
}
168

169
inline float ExpandHalf(uint16_t half) {
170
	FP16 fp16;
171
	fp16.u = half;
172
	FP32 fp = half_to_float_fast5(fp16);
173
	return fp.f;
174
}
175

176
// More magic code: https://gist.github.com/rygorous/2156668
177
inline FP16 float_to_half_fast3(FP32 f)
178
{
179
	static const FP32 f32infty = { 255 << 23 };
180
	static const FP32 f16infty = { 31 << 23 };
181
	static const FP32 magic = { 15 << 23 };
182
	static const uint32_t sign_mask = 0x80000000u;
183
	static const uint32_t round_mask = ~0xfffu;
184
	FP16 o = { 0 };
185

186
	uint32_t sign = f.u & sign_mask;
187
	f.u ^= sign;
188

189
	if (f.u >= f32infty.u) // Inf or NaN (all exponent bits set)
190
		o.u = (f.u > f32infty.u) ? (0x7e00 | (f.u & 0x3ff)) : 0x7c00; // NaN->qNaN and Inf->Inf
191
	else // (De)normalized number or zero
192
	{
193
		f.u &= round_mask;
194
		f.f *= magic.f;
195
		f.u -= round_mask;
196
		if (f.u > f16infty.u) f.u = f16infty.u; // Clamp to signed infinity if overflowed
197

198
		o.u = f.u >> 13; // Take the bits!
199
	}
200

201
	o.u |= sign >> 16;
202
	return o;
203
}
204

205
inline uint16_t ShrinkToHalf(float full) {
206
	FP32 fp32;
207
	fp32.f = full;
208
	FP16 fp = float_to_half_fast3(fp32);
209
	return fp.u;
210
}
211

212
// FPU control.
213
void EnableFZ();
214

215
// Enable both FZ and Default-NaN. Is documented to flip some ARM implementation into a "run-fast" mode
216
// where they can schedule VFP instructions on the NEON unit (these implementations have
217
// very slow VFP units).
218
// http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0274h/Babffifj.html
219
void FPU_SetFastMode();
220

221
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

Product

Resources

Company