CoCalc -- HalfFloat.h

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/jolt_physics/Jolt/Math/HalfFloat.h
⁹⁹¹³ views
1
// Jolt Physics Library (https://github.com/jrouwe/JoltPhysics)
2
// SPDX-FileCopyrightText: 2021 Jorrit Rouwe
3
// SPDX-License-Identifier: MIT
4

5
#pragma once
6

7
#include <Jolt/Math/Vec4.h>
8
#include <Jolt/Core/FPException.h>
9

10
JPH_NAMESPACE_BEGIN
11

12
using HalfFloat = uint16;
13

14
// Define half float constant values
15
static constexpr HalfFloat HALF_FLT_MAX				= 0x7bff;
16
static constexpr HalfFloat HALF_FLT_MAX_NEGATIVE	= 0xfbff;
17
static constexpr HalfFloat HALF_FLT_INF				= 0x7c00;
18
static constexpr HalfFloat HALF_FLT_INF_NEGATIVE	= 0xfc00;
19
static constexpr HalfFloat HALF_FLT_NANQ			= 0x7e00;
20
static constexpr HalfFloat HALF_FLT_NANQ_NEGATIVE	= 0xfe00;
21

22
namespace HalfFloatConversion {
23

24
// Layout of a float
25
static constexpr int FLOAT_SIGN_POS = 31;
26
static constexpr int FLOAT_EXPONENT_POS = 23;
27
static constexpr int FLOAT_EXPONENT_BITS = 8;
28
static constexpr int FLOAT_EXPONENT_MASK = (1 << FLOAT_EXPONENT_BITS) - 1;
29
static constexpr int FLOAT_EXPONENT_BIAS = 127;
30
static constexpr int FLOAT_MANTISSA_BITS = 23;
31
static constexpr int FLOAT_MANTISSA_MASK = (1 << FLOAT_MANTISSA_BITS) - 1;
32
static constexpr int FLOAT_EXPONENT_AND_MANTISSA_MASK = FLOAT_MANTISSA_MASK + (FLOAT_EXPONENT_MASK << FLOAT_EXPONENT_POS);
33

34
// Layout of half float
35
static constexpr int HALF_FLT_SIGN_POS = 15;
36
static constexpr int HALF_FLT_EXPONENT_POS = 10;
37
static constexpr int HALF_FLT_EXPONENT_BITS = 5;
38
static constexpr int HALF_FLT_EXPONENT_MASK = (1 << HALF_FLT_EXPONENT_BITS) - 1;
39
static constexpr int HALF_FLT_EXPONENT_BIAS = 15;
40
static constexpr int HALF_FLT_MANTISSA_BITS = 10;
41
static constexpr int HALF_FLT_MANTISSA_MASK = (1 << HALF_FLT_MANTISSA_BITS) - 1;
42
static constexpr int HALF_FLT_EXPONENT_AND_MANTISSA_MASK = HALF_FLT_MANTISSA_MASK + (HALF_FLT_EXPONENT_MASK << HALF_FLT_EXPONENT_POS);
43

44
/// Define half-float rounding modes
45
enum ERoundingMode
46
{
47
	ROUND_TO_NEG_INF,				///< Round to negative infinity
48
	ROUND_TO_POS_INF,				///< Round to positive infinity
49
	ROUND_TO_NEAREST,				///< Round to nearest value
50
};
51

52
/// Convert a float (32-bits) to a half float (16-bits), fallback version when no intrinsics available
53
template <int RoundingMode>
54
inline HalfFloat FromFloatFallback(float inV)
55
{
56
	// Reinterpret the float as an uint32
57
	uint32 value = BitCast<uint32>(inV);
58

59
	// Extract exponent
60
	uint32 exponent = (value >> FLOAT_EXPONENT_POS) & FLOAT_EXPONENT_MASK;
61

62
	// Extract mantissa
63
	uint32 mantissa = value & FLOAT_MANTISSA_MASK;
64

65
	// Extract the sign and move it into the right spot for the half float (so we can just or it in at the end)
66
	HalfFloat hf_sign = HalfFloat(value >> (FLOAT_SIGN_POS - HALF_FLT_SIGN_POS)) & (1 << HALF_FLT_SIGN_POS);
67

68
	// Check NaN or INF
69
	if (exponent == FLOAT_EXPONENT_MASK) // NaN or INF
70
		return hf_sign | (mantissa == 0? HALF_FLT_INF : HALF_FLT_NANQ);
71

72
	// Rebias the exponent for half floats
73
	int rebiased_exponent = int(exponent) - FLOAT_EXPONENT_BIAS + HALF_FLT_EXPONENT_BIAS;
74

75
	// Check overflow to infinity
76
	if (rebiased_exponent >= HALF_FLT_EXPONENT_MASK)
77
	{
78
		bool round_up = RoundingMode == ROUND_TO_NEAREST || (hf_sign == 0) == (RoundingMode == ROUND_TO_POS_INF);
79
		return hf_sign | (round_up? HALF_FLT_INF : HALF_FLT_MAX);
80
	}
81

82
	// Check underflow to zero
83
	if (rebiased_exponent < -HALF_FLT_MANTISSA_BITS)
84
	{
85
		bool round_up = RoundingMode != ROUND_TO_NEAREST && (hf_sign == 0) == (RoundingMode == ROUND_TO_POS_INF) && (value & FLOAT_EXPONENT_AND_MANTISSA_MASK) != 0;
86
		return hf_sign | (round_up? 1 : 0);
87
	}
88

89
	HalfFloat hf_exponent;
90
	int shift;
91
	if (rebiased_exponent <= 0)
92
	{
93
		// Underflow to denormalized number
94
		hf_exponent = 0;
95
		mantissa |= 1 << FLOAT_MANTISSA_BITS; // Add the implicit 1 bit to the mantissa
96
		shift = FLOAT_MANTISSA_BITS - HALF_FLT_MANTISSA_BITS + 1 - rebiased_exponent;
97
	}
98
	else
99
	{
100
		// Normal half float
101
		hf_exponent = HalfFloat(rebiased_exponent << HALF_FLT_EXPONENT_POS);
102
		shift = FLOAT_MANTISSA_BITS - HALF_FLT_MANTISSA_BITS;
103
	}
104

105
	// Compose the half float
106
	HalfFloat hf_mantissa = HalfFloat(mantissa >> shift);
107
	HalfFloat hf = hf_sign | hf_exponent | hf_mantissa;
108

109
	// Calculate the remaining bits that we're discarding
110
	uint remainder = mantissa & ((1 << shift) - 1);
111

112
	if constexpr (RoundingMode == ROUND_TO_NEAREST)
113
	{
114
		// Round to nearest
115
		uint round_threshold = 1 << (shift - 1);
116
		if (remainder > round_threshold // Above threshold, we must always round
117
			|| (remainder == round_threshold && (hf_mantissa & 1))) // When equal, round to nearest even
118
			hf++; // May overflow to infinity
119
	}
120
	else
121
	{
122
		// Round up or down (truncate) depending on the rounding mode
123
		bool round_up = (hf_sign == 0) == (RoundingMode == ROUND_TO_POS_INF) && remainder != 0;
124
		if (round_up)
125
			hf++; // May overflow to infinity
126
	}
127

128
	return hf;
129
}
130

131
/// Convert a float (32-bits) to a half float (16-bits)
132
template <int RoundingMode>
133
JPH_INLINE HalfFloat FromFloat(float inV)
134
{
135
#ifdef JPH_USE_F16C
136
	FPExceptionDisableOverflow disable_overflow;
137
	JPH_UNUSED(disable_overflow);
138

139
	union
140
	{
141
		__m128i		u128;
142
		HalfFloat	u16[8];
143
	} hf;
144
	__m128 val = _mm_load_ss(&inV);
145
	switch (RoundingMode)
146
	{
147
	case ROUND_TO_NEG_INF:
148
		hf.u128 = _mm_cvtps_ph(val, _MM_FROUND_TO_NEG_INF);
149
		break;
150
	case ROUND_TO_POS_INF:
151
		hf.u128 = _mm_cvtps_ph(val, _MM_FROUND_TO_POS_INF);
152
		break;
153
	case ROUND_TO_NEAREST:
154
		hf.u128 = _mm_cvtps_ph(val, _MM_FROUND_TO_NEAREST_INT);
155
		break;
156
	}
157
	return hf.u16[0];
158
#else
159
	return FromFloatFallback<RoundingMode>(inV);
160
#endif
161
}
162

163
/// Convert 4 half floats (lower 64 bits) to floats, fallback version when no intrinsics available
164
inline Vec4 ToFloatFallback(UVec4Arg inValue)
165
{
166
	// Unpack half floats to 4 uint32's
167
	UVec4 value = inValue.Expand4Uint16Lo();
168

169
	// Normal half float path, extract the exponent and mantissa, shift them into place and update the exponent bias
170
	UVec4 exponent_mantissa = UVec4::sAnd(value, UVec4::sReplicate(HALF_FLT_EXPONENT_AND_MANTISSA_MASK)).LogicalShiftLeft<FLOAT_EXPONENT_POS - HALF_FLT_EXPONENT_POS>() + UVec4::sReplicate((FLOAT_EXPONENT_BIAS - HALF_FLT_EXPONENT_BIAS) << FLOAT_EXPONENT_POS);
171

172
	// Denormalized half float path, renormalize the float
173
	UVec4 exponent_mantissa_denormalized = ((exponent_mantissa + UVec4::sReplicate(1 << FLOAT_EXPONENT_POS)).ReinterpretAsFloat() - UVec4::sReplicate((FLOAT_EXPONENT_BIAS - HALF_FLT_EXPONENT_BIAS + 1) << FLOAT_EXPONENT_POS).ReinterpretAsFloat()).ReinterpretAsInt();
174

175
	// NaN / INF path, set all exponent bits
176
	UVec4 exponent_mantissa_nan_inf = UVec4::sOr(exponent_mantissa, UVec4::sReplicate(FLOAT_EXPONENT_MASK << FLOAT_EXPONENT_POS));
177

178
	// Get the exponent to determine which of the paths we should take
179
	UVec4 exponent_mask = UVec4::sReplicate(HALF_FLT_EXPONENT_MASK << HALF_FLT_EXPONENT_POS);
180
	UVec4 exponent = UVec4::sAnd(value, exponent_mask);
181
	UVec4 is_denormalized = UVec4::sEquals(exponent, UVec4::sZero());
182
	UVec4 is_nan_inf = UVec4::sEquals(exponent, exponent_mask);
183

184
	// Select the correct result
185
	UVec4 result_exponent_mantissa = UVec4::sSelect(UVec4::sSelect(exponent_mantissa, exponent_mantissa_nan_inf, is_nan_inf), exponent_mantissa_denormalized, is_denormalized);
186

187
	// Extract the sign bit and shift it to the left
188
	UVec4 sign = UVec4::sAnd(value, UVec4::sReplicate(1 << HALF_FLT_SIGN_POS)).LogicalShiftLeft<FLOAT_SIGN_POS - HALF_FLT_SIGN_POS>();
189

190
	// Construct the float
191
	return UVec4::sOr(sign, result_exponent_mantissa).ReinterpretAsFloat();
192
}
193

194
/// Convert 4 half floats (lower 64 bits) to floats
195
JPH_INLINE Vec4 ToFloat(UVec4Arg inValue)
196
{
197
#if defined(JPH_USE_F16C)
198
	return _mm_cvtph_ps(inValue.mValue);
199
#elif defined(JPH_USE_NEON)
200
	return vcvt_f32_f16(vreinterpret_f16_u32(vget_low_u32(inValue.mValue)));
201
#else
202
	return ToFloatFallback(inValue);
203
#endif
204
}
205

206
} // HalfFloatConversion
207

208
JPH_NAMESPACE_END
209

210
Product

Resources

Company