Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h
9903 views
1
#pragma once
2
#ifndef __CVTT_ENDPOINTREFINER_H__
3
#define __CVTT_ENDPOINTREFINER_H__
4
5
#include "ConvectionKernels_ParallelMath.h"
6
7
namespace cvtt
8
{
9
namespace Internal
10
{
11
// Solve for a, b where v = a*t + b
12
// This allows endpoints to be mapped to where T=0 and T=1
13
// Least squares from totals:
14
// a = (tv - t*v/w)/(tt - t*t/w)
15
// b = (v - a*t)/w
16
template<int TVectorSize>
17
class EndpointRefiner
18
{
19
public:
20
typedef ParallelMath::Float MFloat;
21
typedef ParallelMath::UInt16 MUInt16;
22
typedef ParallelMath::UInt15 MUInt15;
23
typedef ParallelMath::AInt16 MAInt16;
24
typedef ParallelMath::SInt16 MSInt16;
25
typedef ParallelMath::SInt32 MSInt32;
26
27
MFloat m_tv[TVectorSize];
28
MFloat m_v[TVectorSize];
29
MFloat m_tt;
30
MFloat m_t;
31
MFloat m_w;
32
int m_wu;
33
34
float m_rcpMaxIndex;
35
float m_channelWeights[TVectorSize];
36
float m_rcpChannelWeights[TVectorSize];
37
38
void Init(int indexRange, const float channelWeights[TVectorSize])
39
{
40
for (int ch = 0; ch < TVectorSize; ch++)
41
{
42
m_tv[ch] = ParallelMath::MakeFloatZero();
43
m_v[ch] = ParallelMath::MakeFloatZero();
44
}
45
m_tt = ParallelMath::MakeFloatZero();
46
m_t = ParallelMath::MakeFloatZero();
47
m_w = ParallelMath::MakeFloatZero();
48
49
m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);
50
51
for (int ch = 0; ch < TVectorSize; ch++)
52
{
53
m_channelWeights[ch] = channelWeights[ch];
54
m_rcpChannelWeights[ch] = 1.0f;
55
if (m_channelWeights[ch] != 0.0f)
56
m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];
57
}
58
59
m_wu = 0;
60
}
61
62
void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)
63
{
64
MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
65
66
for (int ch = 0; ch < TVectorSize; ch++)
67
{
68
MFloat v = pwFloatPixel[ch] * weight;
69
70
m_tv[ch] = m_tv[ch] + t * v;
71
m_v[ch] = m_v[ch] + v;
72
}
73
m_tt = m_tt + weight * t * t;
74
m_t = m_t + weight * t;
75
m_w = m_w + weight;
76
}
77
78
void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)
79
{
80
MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
81
82
for (int ch = 0; ch < numRealChannels; ch++)
83
{
84
MFloat v = pwFloatPixel[ch];
85
86
m_tv[ch] = m_tv[ch] + t * v;
87
m_v[ch] = m_v[ch] + v;
88
}
89
m_tt = m_tt + t * t;
90
m_t = m_t + t;
91
m_wu++;
92
}
93
94
void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)
95
{
96
ContributeUnweightedPW(floatPixel, index, TVectorSize);
97
}
98
99
void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])
100
{
101
// a = (tv - t*v/w)/(tt - t*t/w)
102
// b = (v - a*t)/w
103
MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
104
105
ParallelMath::MakeSafeDenominator(w);
106
MFloat wRcp = ParallelMath::Reciprocal(w);
107
108
MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
109
110
ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
111
ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));
112
113
for (int ch = 0; ch < TVectorSize; ch++)
114
{
115
/*
116
if (adenom == 0.0)
117
p1 = p2 = er.v / er.w;
118
else
119
{
120
float4 a = (er.tv - er.t*er.v / er.w) / adenom;
121
float4 b = (er.v - a * er.t) / er.w;
122
p1 = b;
123
p2 = a + b;
124
}
125
*/
126
127
MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
128
MFloat b = (m_v[ch] - a * m_t) * wRcp;
129
130
MFloat p1 = b;
131
MFloat p2 = a + b;
132
133
ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
134
ParallelMath::ConditionalSet(p2, adenomZero, p1);
135
136
// Unweight
137
float inverseWeight = m_rcpChannelWeights[ch];
138
139
endPoint[0][ch] = p1 * inverseWeight;
140
endPoint[1][ch] = p2 * inverseWeight;
141
}
142
}
143
144
void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
145
{
146
MFloat floatEndPoint[2][TVectorSize];
147
GetRefinedEndpoints(floatEndPoint);
148
149
for (int epi = 0; epi < 2; epi++)
150
for (int ch = 0; ch < TVectorSize; ch++)
151
endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);
152
}
153
154
void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
155
{
156
GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
157
}
158
159
void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
160
{
161
MFloat floatEndPoint[2][TVectorSize];
162
GetRefinedEndpoints(floatEndPoint);
163
164
for (int epi = 0; epi < 2; epi++)
165
{
166
for (int ch = 0; ch < TVectorSize; ch++)
167
{
168
MFloat f = floatEndPoint[epi][ch];
169
if (isSigned)
170
endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));
171
else
172
endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));
173
}
174
}
175
}
176
};
177
}
178
}
179
180
#endif
181
182
183