CoCalc -- ConvectionKernels

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h
²²⁰⁰⁴ views
1
#pragma once
2
#ifndef __CVTT_ENDPOINTREFINER_H__
3
#define __CVTT_ENDPOINTREFINER_H__
4

5
#include "ConvectionKernels_ParallelMath.h"
6

7
namespace cvtt
8
{
9
    namespace Internal
10
    {
11
        // Solve for a, b where v = a*t + b
12
        // This allows endpoints to be mapped to where T=0 and T=1
13
        // Least squares from totals:
14
        // a = (tv - t*v/w)/(tt - t*t/w)
15
        // b = (v - a*t)/w
16
        template<int TVectorSize>
17
        class EndpointRefiner
18
        {
19
        public:
20
            typedef ParallelMath::Float MFloat;
21
            typedef ParallelMath::UInt16 MUInt16;
22
            typedef ParallelMath::UInt15 MUInt15;
23
            typedef ParallelMath::AInt16 MAInt16;
24
            typedef ParallelMath::SInt16 MSInt16;
25
            typedef ParallelMath::SInt32 MSInt32;
26

27
            MFloat m_tv[TVectorSize];
28
            MFloat m_v[TVectorSize];
29
            MFloat m_tt;
30
            MFloat m_t;
31
            MFloat m_w;
32
            int m_wu;
33

34
            float m_rcpMaxIndex;
35
            float m_channelWeights[TVectorSize];
36
            float m_rcpChannelWeights[TVectorSize];
37

38
            void Init(int indexRange, const float channelWeights[TVectorSize])
39
            {
40
                for (int ch = 0; ch < TVectorSize; ch++)
41
                {
42
                    m_tv[ch] = ParallelMath::MakeFloatZero();
43
                    m_v[ch] = ParallelMath::MakeFloatZero();
44
                }
45
                m_tt = ParallelMath::MakeFloatZero();
46
                m_t = ParallelMath::MakeFloatZero();
47
                m_w = ParallelMath::MakeFloatZero();
48

49
                m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);
50

51
                for (int ch = 0; ch < TVectorSize; ch++)
52
                {
53
                    m_channelWeights[ch] = channelWeights[ch];
54
                    m_rcpChannelWeights[ch] = 1.0f;
55
                    if (m_channelWeights[ch] != 0.0f)
56
                        m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];
57
                }
58

59
                m_wu = 0;
60
            }
61

62
            void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)
63
            {
64
                MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
65

66
                for (int ch = 0; ch < TVectorSize; ch++)
67
                {
68
                    MFloat v = pwFloatPixel[ch] * weight;
69

70
                    m_tv[ch] = m_tv[ch] + t * v;
71
                    m_v[ch] = m_v[ch] + v;
72
                }
73
                m_tt = m_tt + weight * t * t;
74
                m_t = m_t + weight * t;
75
                m_w = m_w + weight;
76
            }
77

78
            void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)
79
            {
80
                MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
81

82
                for (int ch = 0; ch < numRealChannels; ch++)
83
                {
84
                    MFloat v = pwFloatPixel[ch];
85

86
                    m_tv[ch] = m_tv[ch] + t * v;
87
                    m_v[ch] = m_v[ch] + v;
88
                }
89
                m_tt = m_tt + t * t;
90
                m_t = m_t + t;
91
                m_wu++;
92
            }
93

94
            void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)
95
            {
96
                ContributeUnweightedPW(floatPixel, index, TVectorSize);
97
            }
98

99
            void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])
100
            {
101
                // a = (tv - t*v/w)/(tt - t*t/w)
102
                // b = (v - a*t)/w
103
                MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
104

105
                ParallelMath::MakeSafeDenominator(w);
106
                MFloat wRcp = ParallelMath::Reciprocal(w);
107

108
                MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
109

110
                ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
111
                ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));
112

113
                for (int ch = 0; ch < TVectorSize; ch++)
114
                {
115
                    /*
116
                    if (adenom == 0.0)
117
                    p1 = p2 = er.v / er.w;
118
                    else
119
                    {
120
                    float4 a = (er.tv - er.t*er.v / er.w) / adenom;
121
                    float4 b = (er.v - a * er.t) / er.w;
122
                    p1 = b;
123
                    p2 = a + b;
124
                    }
125
                    */
126

127
                    MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
128
                    MFloat b = (m_v[ch] - a * m_t) * wRcp;
129

130
                    MFloat p1 = b;
131
                    MFloat p2 = a + b;
132

133
                    ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
134
                    ParallelMath::ConditionalSet(p2, adenomZero, p1);
135

136
                    // Unweight
137
                    float inverseWeight = m_rcpChannelWeights[ch];
138

139
                    endPoint[0][ch] = p1 * inverseWeight;
140
                    endPoint[1][ch] = p2 * inverseWeight;
141
                }
142
            }
143

144
            void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
145
            {
146
                MFloat floatEndPoint[2][TVectorSize];
147
                GetRefinedEndpoints(floatEndPoint);
148

149
                for (int epi = 0; epi < 2; epi++)
150
                    for (int ch = 0; ch < TVectorSize; ch++)
151
                        endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);
152
            }
153

154
            void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
155
            {
156
                GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
157
            }
158

159
            void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
160
            {
161
                MFloat floatEndPoint[2][TVectorSize];
162
                GetRefinedEndpoints(floatEndPoint);
163

164
                for (int epi = 0; epi < 2; epi++)
165
                {
166
                    for (int ch = 0; ch < TVectorSize; ch++)
167
                    {
168
                        MFloat f = floatEndPoint[epi][ch];
169
                        if (isSigned)
170
                            endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));
171
                        else
172
                            endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));
173
                    }
174
                }
175
            }
176
        };
177
    }
178
}
179

180
#endif
181

182

183
Product

Resources

Company