Path: blob/master/thirdparty/cvtt/ConvectionKernels_IndexSelector.h
9896 views
#pragma once1#ifndef __CVTT_INDEXSELECTOR_H__2#define __CVTT_INDEXSELECTOR_H__34#include "ConvectionKernels_ParallelMath.h"56namespace cvtt7{8namespace Internal9{10extern const ParallelMath::UInt16 g_weightReciprocals[17];1112template<int TVectorSize>13class IndexSelector14{15public:16typedef ParallelMath::Float MFloat;17typedef ParallelMath::UInt16 MUInt16;18typedef ParallelMath::UInt15 MUInt15;19typedef ParallelMath::SInt16 MSInt16;20typedef ParallelMath::AInt16 MAInt16;21typedef ParallelMath::SInt32 MSInt32;22typedef ParallelMath::UInt31 MUInt31;232425template<class TInterpolationEPType, class TColorEPType>26void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range)27{28// In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space.29// We need to select indexes using the color-space endpoints.3031m_isUniform = true;32for (int ch = 1; ch < TVectorSize; ch++)33{34if (channelWeights[ch] != channelWeights[0])35m_isUniform = false;36}3738// To work with channel weights, we need something where:39// pxDiff = px - ep[0]40// epDiff = ep[1] - ep[0]41//42// weightedEPDiff = epDiff * channelWeights43// normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff)44// normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff)45// index = normalizedIndex * maxValue46//47// Equivalent to:48// axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights)49// index = dot(axis, pxDiff)5051for (int ep = 0; ep < 2; ep++)52for (int ch = 0; ch < TVectorSize; ch++)53m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]);5455m_range = range;56m_maxValue = static_cast<float>(range - 1);5758MFloat epDiffWeighted[TVectorSize];59for (int ch = 0; ch < TVectorSize; ch++)60{61m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]);62MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]);63epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch];64}6566MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0];67for (int ch = 1; ch < TVectorSize; ch++)68lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch];6970ParallelMath::MakeSafeDenominator(lenSquared);7172MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared;7374for (int ch = 0; ch < TVectorSize; ch++)75m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared;76}7778template<bool TSigned>79void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range)80{81MAInt16 converted[2][TVectorSize];82for (int epi = 0; epi < 2; epi++)83for (int ch = 0; ch < TVectorSize; ch++)84converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]);8586Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range);87}8889void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels)90{91MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));9293for (int ch = 0; ch < numRealChannels; ch++)94{95MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));96MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));97pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6));98}99}100101void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels)102{103MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7));104105for (int ch = 0; ch < numRealChannels; ch++)106{107MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));108MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));109pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8));110}111}112113void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel)114{115ReconstructLDR_BC7(index, pixel, TVectorSize);116}117118void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel)119{120ReconstructLDRPrecise(index, pixel, TVectorSize);121}122123MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const124{125MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0];126for (int ch = 1; ch < TVectorSize; ch++)127dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch];128129return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn);130}131132protected:133MAInt16 m_endPoint[2][TVectorSize];134135private:136MFloat m_origin[TVectorSize];137MFloat m_axis[TVectorSize];138int m_range;139float m_maxValue;140bool m_isUniform;141};142}143}144145#endif146147148149