Path: blob/master/thirdparty/cvtt/ConvectionKernels_BCCommon.h
9896 views
#pragma once1#ifndef __CVTT_BCCOMMON_H__2#define __CVTT_BCCOMMON_H__34#include "ConvectionKernels_AggregatedError.h"5#include "ConvectionKernels_ParallelMath.h"67namespace cvtt8{9namespace Internal10{11class BCCommon12{13public:14typedef ParallelMath::Float MFloat;15typedef ParallelMath::UInt16 MUInt16;16typedef ParallelMath::UInt15 MUInt15;17typedef ParallelMath::AInt16 MAInt16;18typedef ParallelMath::SInt16 MSInt16;19typedef ParallelMath::SInt32 MSInt32;2021static int TweakRoundsForRange(int range);2223template<int TVectorSize>24static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError<TVectorSize> &aggError)25{26for (int ch = 0; ch < numRealChannels; ch++)27aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch);28}2930template<int TVectorSize>31static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError<TVectorSize> &aggError)32{33ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, TVectorSize, aggError);34}3536template<int TVectorSize>37static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq)38{39AggregatedError<TVectorSize> aggError;40ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, numRealChannels, aggError);41return aggError.Finalize(flags, channelWeightsSq);42}4344template<int TVectorSize>45static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])46{47MFloat error = ParallelMath::MakeFloatZero();48if (flags & Flags::Uniform)49{50for (int ch = 0; ch < TVectorSize; ch++)51error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]);52}53else54{55for (int ch = 0; ch < TVectorSize; ch++)56error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);57}5859return error;60}6162template<int TVectorSize>63static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])64{65MFloat error = ParallelMath::MakeFloatZero();66if (flags & Flags::Uniform)67{68for (int ch = 0; ch < TVectorSize; ch++)69error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]);70}71else72{73for (int ch = 0; ch < TVectorSize; ch++)74error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);75}7677return error;78}7980template<int TChannelCount>81static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount])82{83for (int px = 0; px < 16; px++)84{85for (int ch = 0; ch < TChannelCount; ch++)86preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];87}88}8990template<int TChannelCount>91static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount])92{93for (int px = 0; px < 16; px++)94{95for (int ch = 0; ch < TChannelCount; ch++)96preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];97}98}99};100}101}102103#endif104105106