Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/meshoptimizer/quantization.cpp
9903 views
1
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
2
#include "meshoptimizer.h"
3
4
#include <assert.h>
5
6
union FloatBits
7
{
8
float f;
9
unsigned int ui;
10
};
11
12
unsigned short meshopt_quantizeHalf(float v)
13
{
14
FloatBits u = {v};
15
unsigned int ui = u.ui;
16
17
int s = (ui >> 16) & 0x8000;
18
int em = ui & 0x7fffffff;
19
20
// bias exponent and round to nearest; 112 is relative exponent bias (127-15)
21
int h = (em - (112 << 23) + (1 << 12)) >> 13;
22
23
// underflow: flush to zero; 113 encodes exponent -14
24
h = (em < (113 << 23)) ? 0 : h;
25
26
// overflow: infinity; 143 encodes exponent 16
27
h = (em >= (143 << 23)) ? 0x7c00 : h;
28
29
// NaN; note that we convert all types of NaN to qNaN
30
h = (em > (255 << 23)) ? 0x7e00 : h;
31
32
return (unsigned short)(s | h);
33
}
34
35
float meshopt_quantizeFloat(float v, int N)
36
{
37
assert(N >= 0 && N <= 23);
38
39
FloatBits u = {v};
40
unsigned int ui = u.ui;
41
42
const int mask = (1 << (23 - N)) - 1;
43
const int round = (1 << (23 - N)) >> 1;
44
45
int e = ui & 0x7f800000;
46
unsigned int rui = (ui + round) & ~mask;
47
48
// round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0
49
ui = e == 0x7f800000 ? ui : rui;
50
51
// flush denormals to zero
52
ui = e == 0 ? 0 : ui;
53
54
u.ui = ui;
55
return u.f;
56
}
57
58
float meshopt_dequantizeHalf(unsigned short h)
59
{
60
unsigned int s = unsigned(h & 0x8000) << 16;
61
int em = h & 0x7fff;
62
63
// bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15)
64
int r = (em + (112 << 10)) << 13;
65
66
// denormal: flush to zero
67
r = (em < (1 << 10)) ? 0 : r;
68
69
// infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases
70
// 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255
71
r += (em >= (31 << 10)) ? (112 << 23) : 0;
72
73
FloatBits u;
74
u.ui = s | r;
75
return u.f;
76
}
77
78