CoCalc -- quantization.cpp

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/meshoptimizer/quantization.cpp
²⁰⁹⁷¹ views
1
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
2
#include "meshoptimizer.h"
3

4
#include <assert.h>
5

6
union FloatBits
7
{
8
	float f;
9
	unsigned int ui;
10
};
11

12
unsigned short meshopt_quantizeHalf(float v)
13
{
14
	FloatBits u = {v};
15
	unsigned int ui = u.ui;
16

17
	int s = (ui >> 16) & 0x8000;
18
	int em = ui & 0x7fffffff;
19

20
	// bias exponent and round to nearest; 112 is relative exponent bias (127-15)
21
	int h = (em - (112 << 23) + (1 << 12)) >> 13;
22

23
	// underflow: flush to zero; 113 encodes exponent -14
24
	h = (em < (113 << 23)) ? 0 : h;
25

26
	// overflow: infinity; 143 encodes exponent 16
27
	h = (em >= (143 << 23)) ? 0x7c00 : h;
28

29
	// NaN; note that we convert all types of NaN to qNaN
30
	h = (em > (255 << 23)) ? 0x7e00 : h;
31

32
	return (unsigned short)(s | h);
33
}
34

35
float meshopt_quantizeFloat(float v, int N)
36
{
37
	assert(N >= 0 && N <= 23);
38

39
	FloatBits u = {v};
40
	unsigned int ui = u.ui;
41

42
	const int mask = (1 << (23 - N)) - 1;
43
	const int round = (1 << (23 - N)) >> 1;
44

45
	int e = ui & 0x7f800000;
46
	unsigned int rui = (ui + round) & ~mask;
47

48
	// round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0
49
	ui = e == 0x7f800000 ? ui : rui;
50

51
	// flush denormals to zero
52
	ui = e == 0 ? 0 : ui;
53

54
	u.ui = ui;
55
	return u.f;
56
}
57

58
float meshopt_dequantizeHalf(unsigned short h)
59
{
60
	unsigned int s = unsigned(h & 0x8000) << 16;
61
	int em = h & 0x7fff;
62

63
	// bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15)
64
	int r = (em + (112 << 10)) << 13;
65

66
	// denormal: flush to zero
67
	r = (em < (1 << 10)) ? 0 : r;
68

69
	// infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases
70
	// 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255
71
	r += (em >= (31 << 10)) ? (112 << 23) : 0;
72

73
	FloatBits u;
74
	u.ui = s | r;
75
	return u.f;
76
}
77

78
Product

Resources

Company