Path: blob/master/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h
9903 views
#pragma once1#ifndef __CVTT_ENDPOINTREFINER_H__2#define __CVTT_ENDPOINTREFINER_H__34#include "ConvectionKernels_ParallelMath.h"56namespace cvtt7{8namespace Internal9{10// Solve for a, b where v = a*t + b11// This allows endpoints to be mapped to where T=0 and T=112// Least squares from totals:13// a = (tv - t*v/w)/(tt - t*t/w)14// b = (v - a*t)/w15template<int TVectorSize>16class EndpointRefiner17{18public:19typedef ParallelMath::Float MFloat;20typedef ParallelMath::UInt16 MUInt16;21typedef ParallelMath::UInt15 MUInt15;22typedef ParallelMath::AInt16 MAInt16;23typedef ParallelMath::SInt16 MSInt16;24typedef ParallelMath::SInt32 MSInt32;2526MFloat m_tv[TVectorSize];27MFloat m_v[TVectorSize];28MFloat m_tt;29MFloat m_t;30MFloat m_w;31int m_wu;3233float m_rcpMaxIndex;34float m_channelWeights[TVectorSize];35float m_rcpChannelWeights[TVectorSize];3637void Init(int indexRange, const float channelWeights[TVectorSize])38{39for (int ch = 0; ch < TVectorSize; ch++)40{41m_tv[ch] = ParallelMath::MakeFloatZero();42m_v[ch] = ParallelMath::MakeFloatZero();43}44m_tt = ParallelMath::MakeFloatZero();45m_t = ParallelMath::MakeFloatZero();46m_w = ParallelMath::MakeFloatZero();4748m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);4950for (int ch = 0; ch < TVectorSize; ch++)51{52m_channelWeights[ch] = channelWeights[ch];53m_rcpChannelWeights[ch] = 1.0f;54if (m_channelWeights[ch] != 0.0f)55m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];56}5758m_wu = 0;59}6061void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)62{63MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;6465for (int ch = 0; ch < TVectorSize; ch++)66{67MFloat v = pwFloatPixel[ch] * weight;6869m_tv[ch] = m_tv[ch] + t * v;70m_v[ch] = m_v[ch] + v;71}72m_tt = m_tt + weight * t * t;73m_t = m_t + weight * t;74m_w = m_w + weight;75}7677void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)78{79MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;8081for (int ch = 0; ch < numRealChannels; ch++)82{83MFloat v = pwFloatPixel[ch];8485m_tv[ch] = m_tv[ch] + t * v;86m_v[ch] = m_v[ch] + v;87}88m_tt = m_tt + t * t;89m_t = m_t + t;90m_wu++;91}9293void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)94{95ContributeUnweightedPW(floatPixel, index, TVectorSize);96}9798void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])99{100// a = (tv - t*v/w)/(tt - t*t/w)101// b = (v - a*t)/w102MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));103104ParallelMath::MakeSafeDenominator(w);105MFloat wRcp = ParallelMath::Reciprocal(w);106107MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;108109ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());110ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));111112for (int ch = 0; ch < TVectorSize; ch++)113{114/*115if (adenom == 0.0)116p1 = p2 = er.v / er.w;117else118{119float4 a = (er.tv - er.t*er.v / er.w) / adenom;120float4 b = (er.v - a * er.t) / er.w;121p1 = b;122p2 = a + b;123}124*/125126MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;127MFloat b = (m_v[ch] - a * m_t) * wRcp;128129MFloat p1 = b;130MFloat p2 = a + b;131132ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));133ParallelMath::ConditionalSet(p2, adenomZero, p1);134135// Unweight136float inverseWeight = m_rcpChannelWeights[ch];137138endPoint[0][ch] = p1 * inverseWeight;139endPoint[1][ch] = p2 * inverseWeight;140}141}142143void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)144{145MFloat floatEndPoint[2][TVectorSize];146GetRefinedEndpoints(floatEndPoint);147148for (int epi = 0; epi < 2; epi++)149for (int ch = 0; ch < TVectorSize; ch++)150endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);151}152153void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)154{155GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);156}157158void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)159{160MFloat floatEndPoint[2][TVectorSize];161GetRefinedEndpoints(floatEndPoint);162163for (int epi = 0; epi < 2; epi++)164{165for (int ch = 0; ch < TVectorSize; ch++)166{167MFloat f = floatEndPoint[epi][ch];168if (isSigned)169endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));170else171endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));172}173}174}175};176}177}178179#endif180181182183