Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/jolt_physics/Jolt/Math/HalfFloat.h
9913 views
1
// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2
// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3
// SPDX-License-Identifier: MIT
4
5
#pragma once
6
7
#include <Jolt/Math/Vec4.h>
8
#include <Jolt/Core/FPException.h>
9
10
JPH_NAMESPACE_BEGIN
11
12
using HalfFloat = uint16;
13
14
// Define half float constant values
15
static constexpr HalfFloat HALF_FLT_MAX = 0x7bff;
16
static constexpr HalfFloat HALF_FLT_MAX_NEGATIVE = 0xfbff;
17
static constexpr HalfFloat HALF_FLT_INF = 0x7c00;
18
static constexpr HalfFloat HALF_FLT_INF_NEGATIVE = 0xfc00;
19
static constexpr HalfFloat HALF_FLT_NANQ = 0x7e00;
20
static constexpr HalfFloat HALF_FLT_NANQ_NEGATIVE = 0xfe00;
21
22
namespace HalfFloatConversion {
23
24
// Layout of a float
25
static constexpr int FLOAT_SIGN_POS = 31;
26
static constexpr int FLOAT_EXPONENT_POS = 23;
27
static constexpr int FLOAT_EXPONENT_BITS = 8;
28
static constexpr int FLOAT_EXPONENT_MASK = (1 << FLOAT_EXPONENT_BITS) - 1;
29
static constexpr int FLOAT_EXPONENT_BIAS = 127;
30
static constexpr int FLOAT_MANTISSA_BITS = 23;
31
static constexpr int FLOAT_MANTISSA_MASK = (1 << FLOAT_MANTISSA_BITS) - 1;
32
static constexpr int FLOAT_EXPONENT_AND_MANTISSA_MASK = FLOAT_MANTISSA_MASK + (FLOAT_EXPONENT_MASK << FLOAT_EXPONENT_POS);
33
34
// Layout of half float
35
static constexpr int HALF_FLT_SIGN_POS = 15;
36
static constexpr int HALF_FLT_EXPONENT_POS = 10;
37
static constexpr int HALF_FLT_EXPONENT_BITS = 5;
38
static constexpr int HALF_FLT_EXPONENT_MASK = (1 << HALF_FLT_EXPONENT_BITS) - 1;
39
static constexpr int HALF_FLT_EXPONENT_BIAS = 15;
40
static constexpr int HALF_FLT_MANTISSA_BITS = 10;
41
static constexpr int HALF_FLT_MANTISSA_MASK = (1 << HALF_FLT_MANTISSA_BITS) - 1;
42
static constexpr int HALF_FLT_EXPONENT_AND_MANTISSA_MASK = HALF_FLT_MANTISSA_MASK + (HALF_FLT_EXPONENT_MASK << HALF_FLT_EXPONENT_POS);
43
44
/// Define half-float rounding modes
45
enum ERoundingMode
46
{
47
ROUND_TO_NEG_INF, ///< Round to negative infinity
48
ROUND_TO_POS_INF, ///< Round to positive infinity
49
ROUND_TO_NEAREST, ///< Round to nearest value
50
};
51
52
/// Convert a float (32-bits) to a half float (16-bits), fallback version when no intrinsics available
53
template <int RoundingMode>
54
inline HalfFloat FromFloatFallback(float inV)
55
{
56
// Reinterpret the float as an uint32
57
uint32 value = BitCast<uint32>(inV);
58
59
// Extract exponent
60
uint32 exponent = (value >> FLOAT_EXPONENT_POS) & FLOAT_EXPONENT_MASK;
61
62
// Extract mantissa
63
uint32 mantissa = value & FLOAT_MANTISSA_MASK;
64
65
// Extract the sign and move it into the right spot for the half float (so we can just or it in at the end)
66
HalfFloat hf_sign = HalfFloat(value >> (FLOAT_SIGN_POS - HALF_FLT_SIGN_POS)) & (1 << HALF_FLT_SIGN_POS);
67
68
// Check NaN or INF
69
if (exponent == FLOAT_EXPONENT_MASK) // NaN or INF
70
return hf_sign | (mantissa == 0? HALF_FLT_INF : HALF_FLT_NANQ);
71
72
// Rebias the exponent for half floats
73
int rebiased_exponent = int(exponent) - FLOAT_EXPONENT_BIAS + HALF_FLT_EXPONENT_BIAS;
74
75
// Check overflow to infinity
76
if (rebiased_exponent >= HALF_FLT_EXPONENT_MASK)
77
{
78
bool round_up = RoundingMode == ROUND_TO_NEAREST || (hf_sign == 0) == (RoundingMode == ROUND_TO_POS_INF);
79
return hf_sign | (round_up? HALF_FLT_INF : HALF_FLT_MAX);
80
}
81
82
// Check underflow to zero
83
if (rebiased_exponent < -HALF_FLT_MANTISSA_BITS)
84
{
85
bool round_up = RoundingMode != ROUND_TO_NEAREST && (hf_sign == 0) == (RoundingMode == ROUND_TO_POS_INF) && (value & FLOAT_EXPONENT_AND_MANTISSA_MASK) != 0;
86
return hf_sign | (round_up? 1 : 0);
87
}
88
89
HalfFloat hf_exponent;
90
int shift;
91
if (rebiased_exponent <= 0)
92
{
93
// Underflow to denormalized number
94
hf_exponent = 0;
95
mantissa |= 1 << FLOAT_MANTISSA_BITS; // Add the implicit 1 bit to the mantissa
96
shift = FLOAT_MANTISSA_BITS - HALF_FLT_MANTISSA_BITS + 1 - rebiased_exponent;
97
}
98
else
99
{
100
// Normal half float
101
hf_exponent = HalfFloat(rebiased_exponent << HALF_FLT_EXPONENT_POS);
102
shift = FLOAT_MANTISSA_BITS - HALF_FLT_MANTISSA_BITS;
103
}
104
105
// Compose the half float
106
HalfFloat hf_mantissa = HalfFloat(mantissa >> shift);
107
HalfFloat hf = hf_sign | hf_exponent | hf_mantissa;
108
109
// Calculate the remaining bits that we're discarding
110
uint remainder = mantissa & ((1 << shift) - 1);
111
112
if constexpr (RoundingMode == ROUND_TO_NEAREST)
113
{
114
// Round to nearest
115
uint round_threshold = 1 << (shift - 1);
116
if (remainder > round_threshold // Above threshold, we must always round
117
|| (remainder == round_threshold && (hf_mantissa & 1))) // When equal, round to nearest even
118
hf++; // May overflow to infinity
119
}
120
else
121
{
122
// Round up or down (truncate) depending on the rounding mode
123
bool round_up = (hf_sign == 0) == (RoundingMode == ROUND_TO_POS_INF) && remainder != 0;
124
if (round_up)
125
hf++; // May overflow to infinity
126
}
127
128
return hf;
129
}
130
131
/// Convert a float (32-bits) to a half float (16-bits)
132
template <int RoundingMode>
133
JPH_INLINE HalfFloat FromFloat(float inV)
134
{
135
#ifdef JPH_USE_F16C
136
FPExceptionDisableOverflow disable_overflow;
137
JPH_UNUSED(disable_overflow);
138
139
union
140
{
141
__m128i u128;
142
HalfFloat u16[8];
143
} hf;
144
__m128 val = _mm_load_ss(&inV);
145
switch (RoundingMode)
146
{
147
case ROUND_TO_NEG_INF:
148
hf.u128 = _mm_cvtps_ph(val, _MM_FROUND_TO_NEG_INF);
149
break;
150
case ROUND_TO_POS_INF:
151
hf.u128 = _mm_cvtps_ph(val, _MM_FROUND_TO_POS_INF);
152
break;
153
case ROUND_TO_NEAREST:
154
hf.u128 = _mm_cvtps_ph(val, _MM_FROUND_TO_NEAREST_INT);
155
break;
156
}
157
return hf.u16[0];
158
#else
159
return FromFloatFallback<RoundingMode>(inV);
160
#endif
161
}
162
163
/// Convert 4 half floats (lower 64 bits) to floats, fallback version when no intrinsics available
164
inline Vec4 ToFloatFallback(UVec4Arg inValue)
165
{
166
// Unpack half floats to 4 uint32's
167
UVec4 value = inValue.Expand4Uint16Lo();
168
169
// Normal half float path, extract the exponent and mantissa, shift them into place and update the exponent bias
170
UVec4 exponent_mantissa = UVec4::sAnd(value, UVec4::sReplicate(HALF_FLT_EXPONENT_AND_MANTISSA_MASK)).LogicalShiftLeft<FLOAT_EXPONENT_POS - HALF_FLT_EXPONENT_POS>() + UVec4::sReplicate((FLOAT_EXPONENT_BIAS - HALF_FLT_EXPONENT_BIAS) << FLOAT_EXPONENT_POS);
171
172
// Denormalized half float path, renormalize the float
173
UVec4 exponent_mantissa_denormalized = ((exponent_mantissa + UVec4::sReplicate(1 << FLOAT_EXPONENT_POS)).ReinterpretAsFloat() - UVec4::sReplicate((FLOAT_EXPONENT_BIAS - HALF_FLT_EXPONENT_BIAS + 1) << FLOAT_EXPONENT_POS).ReinterpretAsFloat()).ReinterpretAsInt();
174
175
// NaN / INF path, set all exponent bits
176
UVec4 exponent_mantissa_nan_inf = UVec4::sOr(exponent_mantissa, UVec4::sReplicate(FLOAT_EXPONENT_MASK << FLOAT_EXPONENT_POS));
177
178
// Get the exponent to determine which of the paths we should take
179
UVec4 exponent_mask = UVec4::sReplicate(HALF_FLT_EXPONENT_MASK << HALF_FLT_EXPONENT_POS);
180
UVec4 exponent = UVec4::sAnd(value, exponent_mask);
181
UVec4 is_denormalized = UVec4::sEquals(exponent, UVec4::sZero());
182
UVec4 is_nan_inf = UVec4::sEquals(exponent, exponent_mask);
183
184
// Select the correct result
185
UVec4 result_exponent_mantissa = UVec4::sSelect(UVec4::sSelect(exponent_mantissa, exponent_mantissa_nan_inf, is_nan_inf), exponent_mantissa_denormalized, is_denormalized);
186
187
// Extract the sign bit and shift it to the left
188
UVec4 sign = UVec4::sAnd(value, UVec4::sReplicate(1 << HALF_FLT_SIGN_POS)).LogicalShiftLeft<FLOAT_SIGN_POS - HALF_FLT_SIGN_POS>();
189
190
// Construct the float
191
return UVec4::sOr(sign, result_exponent_mantissa).ReinterpretAsFloat();
192
}
193
194
/// Convert 4 half floats (lower 64 bits) to floats
195
JPH_INLINE Vec4 ToFloat(UVec4Arg inValue)
196
{
197
#if defined(JPH_USE_F16C)
198
return _mm_cvtph_ps(inValue.mValue);
199
#elif defined(JPH_USE_NEON)
200
return vcvt_f32_f16(vreinterpret_f16_u32(vget_low_u32(inValue.mValue)));
201
#else
202
return ToFloatFallback(inValue);
203
#endif
204
}
205
206
} // HalfFloatConversion
207
208
JPH_NAMESPACE_END
209
210