Path: blob/master/thirdparty/cvtt/ConvectionKernels_BC67.cpp
9903 views
/*1Convection Texture Tools2Copyright (c) 2018-2019 Eric Lasota34Permission is hereby granted, free of charge, to any person obtaining5a copy of this software and associated documentation files (the6"Software"), to deal in the Software without restriction, including7without limitation the rights to use, copy, modify, merge, publish,8distribute, sublicense, and/or sell copies of the Software, and to9permit persons to whom the Software is furnished to do so, subject10to the following conditions:1112The above copyright notice and this permission notice shall be included13in all copies or substantial portions of the Software.1415THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS16OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF17MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.18IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY19CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,20TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE21SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.2223-------------------------------------------------------------------------------------2425Portions based on DirectX Texture Library (DirectXTex)2627Copyright (c) Microsoft Corporation. All rights reserved.28Licensed under the MIT License.2930http://go.microsoft.com/fwlink/?LinkId=24892631*/32#include "ConvectionKernels_Config.h"3334#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)3536#include "ConvectionKernels_BC67.h"3738#include "ConvectionKernels_AggregatedError.h"39#include "ConvectionKernels_BCCommon.h"40#include "ConvectionKernels_BC7_Prio.h"41#include "ConvectionKernels_BC7_SingleColor.h"42#include "ConvectionKernels_BC6H_IO.h"43#include "ConvectionKernels_EndpointRefiner.h"44#include "ConvectionKernels_EndpointSelector.h"45#include "ConvectionKernels_IndexSelectorHDR.h"46#include "ConvectionKernels_ParallelMath.h"47#include "ConvectionKernels_UnfinishedEndpoints.h"4849namespace cvtt50{51namespace Internal52{53namespace BC6754{55typedef ParallelMath::Float MFloat;56typedef ParallelMath::UInt15 MUInt15;5758struct WorkInfo59{60MUInt15 m_mode;61MFloat m_error;62MUInt15 m_ep[3][2][4];63MUInt15 m_indexes[16];64MUInt15 m_indexes2[16];6566union67{68MUInt15 m_partition;69struct IndexSelectorAndRotation70{71MUInt15 m_indexSelector;72MUInt15 m_rotation;73} m_isr;74} m_u;75};76}7778namespace BC6HData79{80enum EField81{82NA, // N/A83M, // Mode84D, // Shape85RW,86RX,87RY,88RZ,89GW,90GX,91GY,92GZ,93BW,94BX,95BY,96BZ,97};9899struct ModeDescriptor100{101EField m_eField;102uint8_t m_uBit;103};104105const ModeDescriptor g_modeDescriptors[14][82] =106{107{ // Mode 1 (0x00) - 10 5 5 5108{ M, 0 },{ M, 1 },{ GY, 4 },{ BY, 4 },{ BZ, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },109{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },110{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },111{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },112{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },113{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },114{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },115{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },116{ D, 3 },{ D, 4 },117},118119{ // Mode 2 (0x01) - 7 6 6 6120{ M, 0 },{ M, 1 },{ GY, 5 },{ GZ, 4 },{ GZ, 5 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },121{ RW, 5 },{ RW, 6 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },122{ GW, 5 },{ GW, 6 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },123{ BW, 5 },{ BW, 6 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },124{ RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },125{ GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },126{ BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },127{ RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },128{ D, 3 },{ D, 4 },129},130131{ // Mode 3 (0x02) - 11 5 4 4132{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },133{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },134{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },135{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },136{ RW,10 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 },137{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 },138{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },139{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },140{ D, 3 },{ D, 4 },141},142143{ // Mode 4 (0x06) - 11 4 5 4144{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },145{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },146{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },147{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 },148{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },149{ GW,10 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 },150{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 0 },151{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ GY, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },152{ D, 3 },{ D, 4 },153},154155{ // Mode 5 (0x0a) - 11 4 4 5156{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },157{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },158{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },159{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 },160{ BY, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 },161{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },162{ BW,10 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 1 },163{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ BZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },164{ D, 3 },{ D, 4 },165},166167{ // Mode 6 (0x0e) - 9 5 5 5168{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },169{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },170{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },171{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },172{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },173{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },174{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },175{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },176{ D, 3 },{ D, 4 },177},178179{ // Mode 7 (0x12) - 8 6 5 5180{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },181{ RW, 5 },{ RW, 6 },{ RW, 7 },{ GZ, 4 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },182{ GW, 5 },{ GW, 6 },{ GW, 7 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },183{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 3 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },184{ RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },185{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },186{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },187{ RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },188{ D, 3 },{ D, 4 },189},190191{ // Mode 8 (0x16) - 8 5 6 5192{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },193{ RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 0 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },194{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },195{ BW, 5 },{ BW, 6 },{ BW, 7 },{ GZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },196{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },197{ GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },198{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },199{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },200{ D, 3 },{ D, 4 },201},202203{ // Mode 9 (0x1a) - 8 5 5 6204{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },205{ RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },206{ GW, 5 },{ GW, 6 },{ GW, 7 },{ BY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },207{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },208{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },209{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },210{ BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },211{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },212{ D, 3 },{ D, 4 },213},214215{ // Mode 10 (0x1e) - 6 6 6 6216{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },217{ RW, 5 },{ GZ, 4 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },218{ GW, 5 },{ GY, 5 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },219{ BW, 5 },{ GZ, 5 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },220{ RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },221{ GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },222{ BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },223{ RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },224{ D, 3 },{ D, 4 },225},226227{ // Mode 11 (0x03) - 10 10228{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },229{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },230{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },231{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },232{ RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RX, 9 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },233{ GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GX, 9 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },234{ BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BX, 9 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },235{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },236{ NA, 0 },{ NA, 0 },237},238239{ // Mode 12 (0x07) - 11 9240{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },241{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },242{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },243{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },244{ RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },245{ GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },246{ BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },247{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },248{ NA, 0 },{ NA, 0 },249},250251{ // Mode 13 (0x0b) - 12 8252{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },253{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },254{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },255{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },256{ RX, 5 },{ RX, 6 },{ RX, 7 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },257{ GX, 5 },{ GX, 6 },{ GX, 7 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },258{ BX, 5 },{ BX, 6 },{ BX, 7 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },259{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },260{ NA, 0 },{ NA, 0 },261},262263{ // Mode 14 (0x0f) - 16 4264{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },265{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },266{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },267{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,15 },268{ RW,14 },{ RW,13 },{ RW,12 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,15 },269{ GW,14 },{ GW,13 },{ GW,12 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,15 },270{ BW,14 },{ BW,13 },{ BW,12 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },271{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },272{ NA, 0 },{ NA, 0 },273},274};275}276277namespace BC7Data278{279enum AlphaMode280{281AlphaMode_Combined,282AlphaMode_Separate,283AlphaMode_None,284};285286enum PBitMode287{288PBitMode_PerEndpoint,289PBitMode_PerSubset,290PBitMode_None291};292293struct BC7ModeInfo294{295PBitMode m_pBitMode;296AlphaMode m_alphaMode;297int m_rgbBits;298int m_alphaBits;299int m_partitionBits;300int m_numSubsets;301int m_indexBits;302int m_alphaIndexBits;303bool m_hasIndexSelector;304};305306BC7ModeInfo g_modes[] =307{308{ PBitMode_PerEndpoint, AlphaMode_None, 4, 0, 4, 3, 3, 0, false }, // 0309{ PBitMode_PerSubset, AlphaMode_None, 6, 0, 6, 2, 3, 0, false }, // 1310{ PBitMode_None, AlphaMode_None, 5, 0, 6, 3, 2, 0, false }, // 2311{ PBitMode_PerEndpoint, AlphaMode_None, 7, 0, 6, 2, 2, 0, false }, // 3 (Mode reference has an error, P-bit is really per-endpoint)312313{ PBitMode_None, AlphaMode_Separate, 5, 6, 0, 1, 2, 3, true }, // 4314{ PBitMode_None, AlphaMode_Separate, 7, 8, 0, 1, 2, 2, false }, // 5315{ PBitMode_PerEndpoint, AlphaMode_Combined, 7, 7, 0, 1, 4, 0, false }, // 6316{ PBitMode_PerEndpoint, AlphaMode_Combined, 5, 5, 6, 2, 2, 0, false } // 7317};318319const int g_weight2[] = { 0, 21, 43, 64 };320const int g_weight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };321const int g_weight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };322323const int *g_weightTables[] =324{325NULL,326NULL,327g_weight2,328g_weight3,329g_weight4330};331332struct BC6HModeInfo333{334uint16_t m_modeID;335bool m_partitioned;336bool m_transformed;337int m_aPrec;338int m_bPrec[3];339};340341// [partitioned][precision]342bool g_hdrModesExistForPrecision[2][17] =343{344//0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16345{ false, false, false, false, false, false, false, false, false, false, true, true, true, false, false, false, true },346{ false, false, false, false, false, false, true, true, true, true, true, true, false, false, false, false, false },347};348349BC6HModeInfo g_hdrModes[] =350{351{ 0x00, true, true, 10,{ 5, 5, 5 } },352{ 0x01, true, true, 7,{ 6, 6, 6 } },353{ 0x02, true, true, 11,{ 5, 4, 4 } },354{ 0x06, true, true, 11,{ 4, 5, 4 } },355{ 0x0a, true, true, 11,{ 4, 4, 5 } },356{ 0x0e, true, true, 9,{ 5, 5, 5 } },357{ 0x12, true, true, 8,{ 6, 5, 5 } },358{ 0x16, true, true, 8,{ 5, 6, 5 } },359{ 0x1a, true, true, 8,{ 5, 5, 6 } },360{ 0x1e, true, false, 6,{ 6, 6, 6 } },361{ 0x03, false, false, 10,{ 10, 10, 10 } },362{ 0x07, false, true, 11,{ 9, 9, 9 } },363{ 0x0b, false, true, 12,{ 8, 8, 8 } },364{ 0x0f, false, true, 16,{ 4, 4, 4 } },365};366367const int g_maxHDRPrecision = 16;368369static const size_t g_numHDRModes = sizeof(g_hdrModes) / sizeof(g_hdrModes[0]);370371static uint16_t g_partitionMap[64] =372{3730xCCCC, 0x8888, 0xEEEE, 0xECC8,3740xC880, 0xFEEC, 0xFEC8, 0xEC80,3750xC800, 0xFFEC, 0xFE80, 0xE800,3760xFFE8, 0xFF00, 0xFFF0, 0xF000,3770xF710, 0x008E, 0x7100, 0x08CE,3780x008C, 0x7310, 0x3100, 0x8CCE,3790x088C, 0x3110, 0x6666, 0x366C,3800x17E8, 0x0FF0, 0x718E, 0x399C,3810xaaaa, 0xf0f0, 0x5a5a, 0x33cc,3820x3c3c, 0x55aa, 0x9696, 0xa55a,3830x73ce, 0x13c8, 0x324c, 0x3bdc,3840x6996, 0xc33c, 0x9966, 0x660,3850x272, 0x4e4, 0x4e40, 0x2720,3860xc936, 0x936c, 0x39c6, 0x639c,3870x9336, 0x9cc6, 0x817e, 0xe718,3880xccf0, 0xfcc, 0x7744, 0xee22,389};390391static uint32_t g_partitionMap2[64] =392{3930xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8,3940xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050,3950xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090,3960x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250,3970xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0,3980xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500,3990x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400,4000xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200,4010xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424,4020x00aa5500, 0x24924924, 0x24499224, 0x50a50a50,4030x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0,4040x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600,4050xaa444444, 0x54a854a8, 0x95809580, 0x96969600,4060xa85454a8, 0x80959580, 0xaa141414, 0x96960000,4070xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000,4080x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254,409};410411static int g_fixupIndexes2[64] =412{41315,15,15,15,41415,15,15,15,41515,15,15,15,41615,15,15,15,41715, 2, 8, 2,4182, 8, 8,15,4192, 8, 2, 2,4208, 8, 2, 2,42142215,15, 6, 8,4232, 8,15,15,4242, 8, 2, 2,4252,15,15, 6,4266, 2, 6, 8,42715,15, 2, 2,42815,15,15,15,42915, 2, 2,15,430};431432static int g_fixupIndexes3[64][2] =433{434{ 3,15 },{ 3, 8 },{ 15, 8 },{ 15, 3 },435{ 8,15 },{ 3,15 },{ 15, 3 },{ 15, 8 },436{ 8,15 },{ 8,15 },{ 6,15 },{ 6,15 },437{ 6,15 },{ 5,15 },{ 3,15 },{ 3, 8 },438{ 3,15 },{ 3, 8 },{ 8,15 },{ 15, 3 },439{ 3,15 },{ 3, 8 },{ 6,15 },{ 10, 8 },440{ 5, 3 },{ 8,15 },{ 8, 6 },{ 6,10 },441{ 8,15 },{ 5,15 },{ 15,10 },{ 15, 8 },442443{ 8,15 },{ 15, 3 },{ 3,15 },{ 5,10 },444{ 6,10 },{ 10, 8 },{ 8, 9 },{ 15,10 },445{ 15, 6 },{ 3,15 },{ 15, 8 },{ 5,15 },446{ 15, 3 },{ 15, 6 },{ 15, 6 },{ 15, 8 },447{ 3,15 },{ 15, 3 },{ 5,15 },{ 5,15 },448{ 5,15 },{ 8,15 },{ 5,15 },{ 10,15 },449{ 5,15 },{ 10,15 },{ 8,15 },{ 13,15 },450{ 15, 3 },{ 12,15 },{ 3,15 },{ 3, 8 },451};452453static const unsigned char g_fragments[] =454{4550, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0, 164560, 1, 2, 3, // 16, 44570, 1, 4, // 20, 34580, 1, 2, 4, // 23, 44592, 3, 7, // 27, 34601, 2, 3, 7, // 30, 44610, 1, 2, 3, 4, 5, 6, 7, // 34, 84620, 1, 4, 8, // 42, 44630, 1, 2, 4, 5, 8, // 46, 64640, 1, 2, 3, 4, 5, 6, 8, // 52, 84651, 4, 5, 6, 9, // 60, 54662, 5, 6, 7, 10, // 65, 54675, 6, 9, 10, // 70, 44682, 3, 7, 11, // 74, 44691, 2, 3, 6, 7, 11, // 78, 64700, 1, 2, 3, 5, 6, 7, 11, // 84, 84710, 1, 2, 3, 8, 9, 10, 11, // 92, 84722, 3, 6, 7, 8, 9, 10, 11, // 100, 84734, 5, 6, 7, 8, 9, 10, 11, // 108, 84740, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, // 116, 124750, 4, 8, 12, // 128, 44760, 2, 3, 4, 6, 7, 8, 12, // 132, 84770, 1, 2, 4, 5, 8, 9, 12, // 140, 84780, 1, 2, 3, 4, 5, 6, 8, 9, 12, // 148, 104793, 6, 7, 8, 9, 12, // 158, 64803, 5, 6, 7, 8, 9, 10, 12, // 164, 84810, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, // 172, 124820, 1, 2, 5, 6, 7, 11, 12, // 184, 84835, 8, 9, 10, 13, // 192, 54848, 12, 13, // 197, 34854, 8, 12, 13, // 200, 44862, 3, 6, 9, 12, 13, // 204, 64870, 1, 2, 3, 8, 9, 12, 13, // 210, 84880, 1, 4, 5, 8, 9, 12, 13, // 218, 84892, 3, 6, 7, 8, 9, 12, 13, // 226, 84902, 3, 5, 6, 9, 10, 12, 13, // 234, 84910, 3, 6, 7, 9, 10, 12, 13, // 242, 84920, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13, // 250, 124930, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, // 262, 134942, 3, 4, 7, 8, 11, 12, 13, // 275, 84951, 2, 6, 7, 8, 11, 12, 13, // 283, 84962, 3, 4, 6, 7, 8, 9, 11, 12, 13, // 291, 104972, 3, 4, 5, 10, 11, 12, 13, // 301, 84980, 1, 6, 7, 10, 11, 12, 13, // 309, 84996, 9, 10, 11, 14, // 317, 55000, 2, 4, 6, 8, 10, 12, 14, // 322, 85011, 3, 5, 7, 8, 10, 12, 14, // 330, 85021, 3, 4, 6, 9, 11, 12, 14, // 338, 85030, 2, 5, 7, 9, 11, 12, 14, // 346, 85040, 3, 4, 5, 8, 9, 13, 14, // 354, 85052, 3, 4, 7, 8, 9, 13, 14, // 362, 85061, 2, 5, 6, 9, 10, 13, 14, // 370, 85070, 3, 4, 7, 9, 10, 13, 14, // 378, 85080, 3, 5, 6, 8, 11, 13, 14, // 386, 85091, 2, 4, 7, 8, 11, 13, 14, // 394, 85100, 1, 4, 7, 10, 11, 13, 14, // 402, 85110, 3, 6, 7, 10, 11, 13, 14, // 410, 85128, 12, 13, 14, // 418, 45131, 2, 3, 7, 8, 12, 13, 14, // 422, 85144, 8, 9, 12, 13, 14, // 430, 65150, 4, 5, 8, 9, 12, 13, 14, // 436, 85161, 2, 3, 6, 7, 8, 9, 12, 13, 14, // 444, 105172, 6, 8, 9, 10, 12, 13, 14, // 454, 85180, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, // 462, 125190, 7, 9, 10, 11, 12, 13, 14, // 474, 85201, 2, 3, 4, 5, 6, 8, 15, // 482, 85213, 7, 11, 15, // 490, 45220, 1, 3, 4, 5, 7, 11, 15, // 494, 85230, 4, 5, 10, 11, 15, // 502, 65241, 2, 3, 6, 7, 10, 11, 15, // 508, 85250, 1, 2, 3, 5, 6, 7, 10, 11, 15, // 516, 105260, 4, 5, 6, 9, 10, 11, 15, // 526, 85270, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, // 534, 125281, 2, 4, 5, 8, 9, 12, 15, // 546, 85292, 3, 5, 6, 8, 9, 12, 15, // 554, 85300, 3, 5, 6, 9, 10, 12, 15, // 562, 85311, 2, 4, 7, 9, 10, 12, 15, // 570, 85321, 2, 5, 6, 8, 11, 12, 15, // 578, 85330, 3, 4, 7, 8, 11, 12, 15, // 586, 85340, 1, 5, 6, 10, 11, 12, 15, // 594, 85351, 2, 6, 7, 10, 11, 12, 15, // 602, 85361, 3, 4, 6, 8, 10, 13, 15, // 610, 85370, 2, 5, 7, 8, 10, 13, 15, // 618, 85380, 2, 4, 6, 9, 11, 13, 15, // 626, 85391, 3, 5, 7, 9, 11, 13, 15, // 634, 85400, 1, 2, 3, 4, 5, 7, 8, 12, 13, 15, // 642, 115412, 3, 4, 5, 8, 9, 14, 15, // 653, 85420, 1, 6, 7, 8, 9, 14, 15, // 661, 85430, 1, 5, 10, 14, 15, // 669, 65440, 3, 4, 5, 9, 10, 14, 15, // 675, 85450, 1, 5, 6, 9, 10, 14, 15, // 683, 854611, 14, 15, // 691, 35477, 11, 14, 15, // 694, 45481, 2, 4, 5, 8, 11, 14, 15, // 698, 85490, 1, 4, 7, 8, 11, 14, 15, // 706, 85500, 1, 4, 5, 10, 11, 14, 15, // 714, 85512, 3, 6, 7, 10, 11, 14, 15, // 722, 85524, 5, 6, 7, 10, 11, 14, 15, // 730, 85530, 1, 4, 5, 7, 8, 10, 11, 14, 15, // 738, 105540, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, // 748, 125550, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15, // 760, 135560, 1, 2, 3, 4, 6, 7, 11, 12, 14, 15, // 773, 115573, 4, 8, 9, 10, 13, 14, 15, // 784, 855811, 13, 14, 15, // 792, 45590, 1, 2, 4, 11, 13, 14, 15, // 796, 85600, 1, 2, 4, 5, 10, 11, 13, 14, 15, // 804, 105617, 10, 11, 13, 14, 15, // 814, 65623, 6, 7, 10, 11, 13, 14, 15, // 820, 85631, 5, 9, 10, 11, 13, 14, 15, // 828, 85641, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, // 836, 1256512, 13, 14, 15, // 848, 45660, 1, 2, 3, 12, 13, 14, 15, // 852, 85670, 1, 4, 5, 12, 13, 14, 15, // 860, 85684, 5, 6, 7, 12, 13, 14, 15, // 868, 85694, 8, 9, 10, 12, 13, 14, 15, // 876, 85700, 4, 5, 8, 9, 10, 12, 13, 14, 15, // 884, 105710, 1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, // 894, 125720, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15, // 906, 125730, 1, 3, 4, 8, 9, 11, 12, 13, 14, 15, // 918, 115740, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, // 929, 115757, 9, 10, 11, 12, 13, 14, 15, // 940, 85763, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 948, 105772, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 958, 125788, 9, 10, 11, 12, 13, 14, 15, // 970, 85790, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 978, 125800, 1, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 990, 135813, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1003, 125822, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1015, 135834, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1028, 125840, 2, // 1040, 25851, 3, // 1042, 25860, 1, 4, 5, // 1044, 45870, 1, 2, 4, 5, // 1048, 55882, 3, 6, // 1053, 35890, 2, 4, 6, // 1056, 45901, 2, 5, 6, // 1060, 45910, 1, 2, 3, 5, 6, // 1064, 65920, 1, 2, 4, 5, 6, // 1070, 65930, 1, 2, 3, 4, 5, 6, // 1076, 75940, 3, 4, 7, // 1083, 45950, 1, 2, 3, 4, 7, // 1087, 65961, 3, 5, 7, // 1093, 45972, 3, 6, 7, // 1097, 45981, 2, 3, 6, 7, // 1101, 55991, 2, 3, 5, 6, 7, // 1106, 66000, 1, 2, 3, 5, 6, 7, // 1112, 76014, 5, 6, 7, // 1119, 46020, 8, // 1123, 26030, 1, 4, 5, 8, // 1125, 56040, 1, 8, 9, // 1130, 46054, 5, 8, 9, // 1134, 46060, 1, 4, 5, 8, 9, // 1138, 66072, 6, 8, 9, // 1144, 46086, 7, 8, 9, // 1148, 46090, 2, 4, 6, 8, 10, // 1152, 66101, 2, 5, 6, 9, 10, // 1158, 66110, 3, 4, 7, 9, 10, // 1164, 66120, 1, 2, 8, 9, 10, // 1170, 66134, 5, 6, 8, 9, 10, // 1176, 66143, 11, // 1182, 26152, 3, 6, 7, 11, // 1184, 56160, 3, 8, 11, // 1189, 46170, 3, 4, 7, 8, 11, // 1193, 66181, 3, 5, 7, 9, 11, // 1199, 66192, 3, 10, 11, // 1205, 46201, 5, 10, 11, // 1209, 46214, 5, 10, 11, // 1213, 46226, 7, 10, 11, // 1217, 46232, 3, 6, 7, 10, 11, // 1221, 66241, 2, 3, 9, 10, 11, // 1227, 66255, 6, 7, 9, 10, 11, // 1233, 66268, 9, 10, 11, // 1239, 46274, 12, // 1243, 26280, 1, 2, 3, 4, 5, 8, 12, // 1245, 86298, 9, 12, // 1253, 36300, 4, 5, 8, 9, 12, // 1256, 66310, 1, 4, 5, 8, 9, 12, // 1262, 76322, 3, 5, 6, 8, 9, 12, // 1269, 76331, 5, 9, 13, // 1276, 46346, 7, 9, 13, // 1280, 46351, 4, 7, 10, 13, // 1284, 56361, 6, 8, 11, 13, // 1289, 56370, 1, 12, 13, // 1294, 46384, 5, 12, 13, // 1298, 46390, 1, 6, 7, 12, 13, // 1302, 66400, 1, 4, 8, 12, 13, // 1308, 66418, 9, 12, 13, // 1314, 46424, 8, 9, 12, 13, // 1318, 56434, 5, 8, 9, 12, 13, // 1323, 66440, 4, 5, 8, 9, 12, 13, // 1329, 76450, 1, 6, 10, 12, 13, // 1336, 66463, 6, 7, 9, 10, 12, 13, // 1342, 76470, 1, 10, 11, 12, 13, // 1349, 66482, 4, 7, 9, 14, // 1355, 56494, 5, 10, 14, // 1360, 46502, 6, 10, 14, // 1364, 46512, 5, 8, 11, 14, // 1368, 56520, 2, 12, 14, // 1373, 46538, 10, 12, 14, // 1377, 46544, 6, 8, 10, 12, 14, // 1381, 665513, 14, // 1387, 26569, 10, 13, 14, // 1389, 46575, 6, 9, 10, 13, 14, // 1393, 66580, 1, 2, 12, 13, 14, // 1399, 66594, 5, 6, 12, 13, 14, // 1405, 66608, 9, 12, 13, 14, // 1411, 56618, 9, 10, 12, 13, 14, // 1416, 66627, 15, // 1422, 26630, 5, 10, 15, // 1424, 46640, 1, 2, 3, 6, 7, 11, 15, // 1428, 866510, 11, 15, // 1436, 36660, 1, 5, 6, 10, 11, 15, // 1439, 76673, 6, 7, 10, 11, 15, // 1446, 666812, 15, // 1452, 26690, 3, 12, 15, // 1454, 46704, 7, 12, 15, // 1458, 46710, 3, 6, 9, 12, 15, // 1462, 66720, 3, 5, 10, 12, 15, // 1468, 66738, 11, 12, 15, // 1474, 46745, 6, 8, 11, 12, 15, // 1478, 66754, 7, 8, 11, 12, 15, // 1484, 66761, 3, 13, 15, // 1490, 46779, 11, 13, 15, // 1494, 46785, 7, 9, 11, 13, 15, // 1498, 66792, 3, 14, 15, // 1504, 46802, 3, 4, 5, 14, 15, // 1508, 66816, 7, 14, 15, // 1514, 46822, 3, 5, 9, 14, 15, // 1518, 66832, 3, 8, 9, 14, 15, // 1524, 668410, 14, 15, // 1530, 36850, 4, 5, 9, 10, 14, 15, // 1533, 76862, 3, 7, 11, 14, 15, // 1540, 668710, 11, 14, 15, // 1546, 46887, 10, 11, 14, 15, // 1550, 56896, 7, 10, 11, 14, 15, // 1555, 66901, 2, 3, 13, 14, 15, // 1561, 66915, 6, 7, 13, 14, 15, // 1567, 669210, 11, 13, 14, 15, // 1573, 56939, 10, 11, 13, 14, 15, // 1578, 66940, 4, 8, 9, 12, 13, 14, 15, // 1584, 86959, 10, 12, 13, 14, 15, // 1592, 66968, 11, 12, 13, 14, 15, // 1598, 66973, 7, 10, 11, 12, 13, 14, 15, // 1604, 8698};699static const int g_shapeRanges[][2] =700{701{ 0, 16 },{ 16, 4 },{ 20, 3 },{ 23, 4 },{ 27, 3 },{ 30, 4 },{ 34, 8 },{ 42, 4 },{ 46, 6 },{ 52, 8 },{ 60, 5 },702{ 65, 5 },{ 70, 4 },{ 74, 4 },{ 78, 6 },{ 84, 8 },{ 92, 8 },{ 100, 8 },{ 108, 8 },{ 116, 12 },{ 128, 4 },{ 132, 8 },703{ 140, 8 },{ 148, 10 },{ 158, 6 },{ 164, 8 },{ 172, 12 },{ 184, 8 },{ 192, 5 },{ 197, 3 },{ 200, 4 },{ 204, 6 },{ 210, 8 },704{ 218, 8 },{ 226, 8 },{ 234, 8 },{ 242, 8 },{ 250, 12 },{ 262, 13 },{ 275, 8 },{ 283, 8 },{ 291, 10 },{ 301, 8 },{ 309, 8 },705{ 317, 5 },{ 322, 8 },{ 330, 8 },{ 338, 8 },{ 346, 8 },{ 354, 8 },{ 362, 8 },{ 370, 8 },{ 378, 8 },{ 386, 8 },{ 394, 8 },706{ 402, 8 },{ 410, 8 },{ 418, 4 },{ 422, 8 },{ 430, 6 },{ 436, 8 },{ 444, 10 },{ 454, 8 },{ 462, 12 },{ 474, 8 },{ 482, 8 },707{ 490, 4 },{ 494, 8 },{ 502, 6 },{ 508, 8 },{ 516, 10 },{ 526, 8 },{ 534, 12 },{ 546, 8 },{ 554, 8 },{ 562, 8 },{ 570, 8 },708{ 578, 8 },{ 586, 8 },{ 594, 8 },{ 602, 8 },{ 610, 8 },{ 618, 8 },{ 626, 8 },{ 634, 8 },{ 642, 11 },{ 653, 8 },{ 661, 8 },709{ 669, 6 },{ 675, 8 },{ 683, 8 },{ 691, 3 },{ 694, 4 },{ 698, 8 },{ 706, 8 },{ 714, 8 },{ 722, 8 },{ 730, 8 },{ 738, 10 },710{ 748, 12 },{ 760, 13 },{ 773, 11 },{ 784, 8 },{ 792, 4 },{ 796, 8 },{ 804, 10 },{ 814, 6 },{ 820, 8 },{ 828, 8 },{ 836, 12 },711{ 848, 4 },{ 852, 8 },{ 860, 8 },{ 868, 8 },{ 876, 8 },{ 884, 10 },{ 894, 12 },{ 906, 12 },{ 918, 11 },{ 929, 11 },{ 940, 8 },712{ 948, 10 },{ 958, 12 },{ 970, 8 },{ 978, 12 },{ 990, 13 },{ 1003, 12 },{ 1015, 13 },{ 1028, 12 },{ 1040, 2 },{ 1042, 2 },{ 1044, 4 },713{ 1048, 5 },{ 1053, 3 },{ 1056, 4 },{ 1060, 4 },{ 1064, 6 },{ 1070, 6 },{ 1076, 7 },{ 1083, 4 },{ 1087, 6 },{ 1093, 4 },{ 1097, 4 },714{ 1101, 5 },{ 1106, 6 },{ 1112, 7 },{ 1119, 4 },{ 1123, 2 },{ 1125, 5 },{ 1130, 4 },{ 1134, 4 },{ 1138, 6 },{ 1144, 4 },{ 1148, 4 },715{ 1152, 6 },{ 1158, 6 },{ 1164, 6 },{ 1170, 6 },{ 1176, 6 },{ 1182, 2 },{ 1184, 5 },{ 1189, 4 },{ 1193, 6 },{ 1199, 6 },{ 1205, 4 },716{ 1209, 4 },{ 1213, 4 },{ 1217, 4 },{ 1221, 6 },{ 1227, 6 },{ 1233, 6 },{ 1239, 4 },{ 1243, 2 },{ 1245, 8 },{ 1253, 3 },{ 1256, 6 },717{ 1262, 7 },{ 1269, 7 },{ 1276, 4 },{ 1280, 4 },{ 1284, 5 },{ 1289, 5 },{ 1294, 4 },{ 1298, 4 },{ 1302, 6 },{ 1308, 6 },{ 1314, 4 },718{ 1318, 5 },{ 1323, 6 },{ 1329, 7 },{ 1336, 6 },{ 1342, 7 },{ 1349, 6 },{ 1355, 5 },{ 1360, 4 },{ 1364, 4 },{ 1368, 5 },{ 1373, 4 },719{ 1377, 4 },{ 1381, 6 },{ 1387, 2 },{ 1389, 4 },{ 1393, 6 },{ 1399, 6 },{ 1405, 6 },{ 1411, 5 },{ 1416, 6 },{ 1422, 2 },{ 1424, 4 },720{ 1428, 8 },{ 1436, 3 },{ 1439, 7 },{ 1446, 6 },{ 1452, 2 },{ 1454, 4 },{ 1458, 4 },{ 1462, 6 },{ 1468, 6 },{ 1474, 4 },{ 1478, 6 },721{ 1484, 6 },{ 1490, 4 },{ 1494, 4 },{ 1498, 6 },{ 1504, 4 },{ 1508, 6 },{ 1514, 4 },{ 1518, 6 },{ 1524, 6 },{ 1530, 3 },{ 1533, 7 },722{ 1540, 6 },{ 1546, 4 },{ 1550, 5 },{ 1555, 6 },{ 1561, 6 },{ 1567, 6 },{ 1573, 5 },{ 1578, 6 },{ 1584, 8 },{ 1592, 6 },{ 1598, 6 },723{ 1604, 8 },724};725static const int g_shapes1[][2] =726{727{ 0, 16 }728};729static const int g_shapes2[64][2] =730{731{ 33, 96 },{ 63, 66 },{ 20, 109 },{ 22, 107 },{ 37, 92 },{ 7, 122 },{ 8, 121 },{ 23, 106 },732{ 38, 91 },{ 2, 127 },{ 9, 120 },{ 26, 103 },{ 3, 126 },{ 6, 123 },{ 1, 128 },{ 19, 110 },733{ 15, 114 },{ 124, 5 },{ 72, 57 },{ 115, 14 },{ 125, 4 },{ 70, 59 },{ 100, 29 },{ 60, 69 },734{ 116, 13 },{ 99, 30 },{ 78, 51 },{ 94, 35 },{ 104, 25 },{ 111, 18 },{ 71, 58 },{ 90, 39 },735{ 45, 84 },{ 16, 113 },{ 82, 47 },{ 95, 34 },{ 87, 42 },{ 83, 46 },{ 53, 76 },{ 48, 81 },736{ 68, 61 },{ 105, 24 },{ 98, 31 },{ 88, 41 },{ 75, 54 },{ 43, 86 },{ 52, 77 },{ 117, 12 },737{ 119, 10 },{ 118, 11 },{ 85, 44 },{ 101, 28 },{ 36, 93 },{ 55, 74 },{ 89, 40 },{ 79, 50 },738{ 56, 73 },{ 49, 80 },{ 64, 65 },{ 27, 102 },{ 32, 97 },{ 112, 17 },{ 67, 62 },{ 21, 108 },739};740static const int g_shapes3[64][3] =741{742{ 148, 160, 240 },{ 132, 212, 205 },{ 136, 233, 187 },{ 175, 237, 143 },{ 6, 186, 232 },{ 33, 142, 232 },{ 131, 123, 142 },{ 131, 96, 186 },743{ 6, 171, 110 },{ 1, 18, 110 },{ 1, 146, 123 },{ 33, 195, 66 },{ 20, 51, 66 },{ 20, 178, 96 },{ 2, 177, 106 },{ 211, 4, 59 },744{ 8, 191, 91 },{ 230, 14, 29 },{ 1, 188, 234 },{ 151, 110, 168 },{ 20, 144, 238 },{ 137, 66, 206 },{ 173, 179, 232 },{ 209, 194, 186 },745{ 239, 165, 142 },{ 131, 152, 242 },{ 214, 54, 12 },{ 140, 219, 201 },{ 190, 150, 231 },{ 156, 135, 241 },{ 185, 227, 167 },{ 145, 210, 59 },746{ 138, 174, 106 },{ 189, 229, 14 },{ 176, 133, 106 },{ 78, 178, 195 },{ 111, 146, 171 },{ 216, 180, 196 },{ 217, 181, 193 },{ 184, 228, 166 },747{ 192, 225, 153 },{ 134, 141, 123 },{ 6, 222, 198 },{ 149, 183, 96 },{ 33, 226, 164 },{ 161, 215, 51 },{ 197, 221, 18 },{ 1, 223, 199 },748{ 154, 163, 110 },{ 20, 236, 169 },{ 157, 204, 66 },{ 1, 202, 220 },{ 20, 170, 235 },{ 203, 158, 66 },{ 162, 155, 110 },{ 6, 201, 218 },749{ 139, 135, 123 },{ 33, 167, 224 },{ 182, 150, 96 },{ 19, 200, 213 },{ 63, 207, 159 },{ 147, 172, 109 },{ 129, 130, 128 },{ 208, 14, 59 },750};751752static const int g_shapeList1[] =753{7540,755};756757static const int g_shapeList2[] =758{7591, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,76012, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,76123, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,76234, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,76345, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,76456, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,76567, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,76678, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,76789, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,768100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,769111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,770122, 123, 124, 125, 126, 127, 128,771};772773static const int g_shapeList12[] =774{7750, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,77611, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,77722, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,77833, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,77944, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,78055, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,78166, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,78277, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,78388, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,78499, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,785110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,786121, 122, 123, 124, 125, 126, 127, 128,787};788789static const int g_shapeList3[] =790{7911, 2, 4, 6, 8, 12, 14, 18, 19, 20, 29,79233, 51, 54, 59, 63, 66, 78, 91, 96, 106, 109,793110, 111, 123, 128, 129, 130, 131, 132, 133, 134, 135,794136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,795147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,796158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,797169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,798180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190,799191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,800202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,801213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,802224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,803235, 236, 237, 238, 239, 240, 241, 242,804};805806static const int g_shapeList3Short[] =807{8081, 2, 4, 6, 18, 20, 33, 51, 59, 66, 96,809106, 110, 123, 131, 132, 136, 142, 143, 146, 148, 160,810171, 175, 177, 178, 186, 187, 195, 205, 211, 212, 232,811233, 237, 240,812};813814static const int g_shapeListAll[] =815{8160, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,81711, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,81822, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,81933, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,82044, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,82155, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,82266, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,82377, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,82488, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,82599, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,826110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,827121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,828132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,829143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,830154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,831165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,832176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,833187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,834198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,835209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,836220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,837231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,838242,839};840841static const int g_numShapes1 = sizeof(g_shapeList1) / sizeof(g_shapeList1[0]);842static const int g_numShapes2 = sizeof(g_shapeList2) / sizeof(g_shapeList2[0]);843static const int g_numShapes12 = sizeof(g_shapeList12) / sizeof(g_shapeList12[0]);844static const int g_numShapes3 = sizeof(g_shapeList3) / sizeof(g_shapeList3[0]);845static const int g_numShapes3Short = sizeof(g_shapeList3Short) / sizeof(g_shapeList3Short[0]);846static const int g_numShapesAll = sizeof(g_shapeListAll) / sizeof(g_shapeListAll[0]);847static const int g_numFragments = sizeof(g_fragments) / sizeof(g_fragments[0]);848}849850struct PackingVector851{852uint32_t m_vector[4];853int m_offset;854855void Init()856{857for (int i = 0; i < 4; i++)858m_vector[i] = 0;859860m_offset = 0;861}862863void InitPacked(const uint32_t *v, int bits)864{865for (int b = 0; b < bits; b += 32)866m_vector[b / 32] = v[b / 32];867868m_offset = bits;869}870871inline void Pack(ParallelMath::ScalarUInt16 value, int bits)872{873int vOffset = m_offset >> 5;874int bitOffset = m_offset & 0x1f;875876m_vector[vOffset] |= (static_cast<uint32_t>(value) << bitOffset) & static_cast<uint32_t>(0xffffffff);877878int overflowBits = bitOffset + bits - 32;879if (overflowBits > 0)880m_vector[vOffset + 1] |= (static_cast<uint32_t>(value) >> (bits - overflowBits));881882m_offset += bits;883}884885inline void Flush(uint8_t* output)886{887assert(m_offset == 128);888889for (int v = 0; v < 4; v++)890{891uint32_t chunk = m_vector[v];892for (int b = 0; b < 4; b++)893output[v * 4 + b] = static_cast<uint8_t>((chunk >> (b * 8)) & 0xff);894}895}896};897898899struct UnpackingVector900{901uint32_t m_vector[4];902903void Init(const uint8_t *bytes)904{905for (int i = 0; i < 4; i++)906m_vector[i] = 0;907908for (int b = 0; b < 16; b++)909m_vector[b / 4] |= (bytes[b] << ((b % 4) * 8));910}911912inline void UnpackStart(uint32_t *v, int bits)913{914for (int b = 0; b < bits; b += 32)915v[b / 32] = m_vector[b / 32];916917int entriesShifted = bits / 32;918int carry = bits % 32;919920for (int i = entriesShifted; i < 4; i++)921m_vector[i - entriesShifted] = m_vector[i];922923int entriesRemaining = 4 - entriesShifted;924if (carry)925{926uint32_t bitMask = (1 << carry) - 1;927for (int i = 0; i < entriesRemaining; i++)928{929m_vector[i] >>= carry;930if (i != entriesRemaining - 1)931m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - carry);932}933}934}935936inline ParallelMath::ScalarUInt16 Unpack(int bits)937{938uint32_t bitMask = (1 << bits) - 1;939940ParallelMath::ScalarUInt16 result = static_cast<ParallelMath::ScalarUInt16>(m_vector[0] & bitMask);941942for (int i = 0; i < 4; i++)943{944m_vector[i] >>= bits;945if (i != 3)946m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - bits);947}948949return result;950}951};952953ParallelMath::Float ScaleHDRValue(const ParallelMath::Float &v, bool isSigned)954{955if (isSigned)956{957ParallelMath::Float offset = ParallelMath::Select(ParallelMath::Less(v, ParallelMath::MakeFloatZero()), ParallelMath::MakeFloat(-30.0f), ParallelMath::MakeFloat(30.0f));958return (v * 32.0f + offset) / 31.0f;959}960else961return (v * 64.0f + 30.0f) / 31.0f;962}963964ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v)965{966#ifdef CVTT_ENABLE_ASSERTS967for (int i = 0; i < ParallelMath::ParallelSize; i++)968assert(ParallelMath::Extract(v, i) != -32768)969#endif970971ParallelMath::Int16CompFlag negative = ParallelMath::Less(v, ParallelMath::MakeSInt16(0));972ParallelMath::UInt15 absComp = ParallelMath::LosslessCast<ParallelMath::UInt15>::Cast(ParallelMath::Select(negative, ParallelMath::SInt16(ParallelMath::MakeSInt16(0) - v), v));973974ParallelMath::UInt31 multiplied = ParallelMath::XMultiply(absComp, ParallelMath::MakeUInt15(31));975ParallelMath::UInt31 shifted = ParallelMath::RightShift(multiplied, 5);976ParallelMath::UInt15 absCompScaled = ParallelMath::ToUInt15(shifted);977ParallelMath::SInt16 signBits = ParallelMath::SelectOrZero(negative, ParallelMath::MakeSInt16(-32768));978979return ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(absCompScaled) | signBits;980}981982ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v)983{984return ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(v, ParallelMath::MakeUInt15(31)), 6));985}986987void UnscaleHDREndpoints(const ParallelMath::AInt16 inEP[2][3], ParallelMath::AInt16 outEP[2][3], bool isSigned)988{989for (int epi = 0; epi < 2; epi++)990{991for (int ch = 0; ch < 3; ch++)992{993if (isSigned)994outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueSigned(ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(inEP[epi][ch])));995else996outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::LosslessCast<ParallelMath::UInt16>::Cast(inEP[epi][ch])));997}998}999}10001001struct SinglePlaneTemporaries1002{1003UnfinishedEndpoints<3> unfinishedRGB[BC7Data::g_numShapesAll];1004UnfinishedEndpoints<4> unfinishedRGBA[BC7Data::g_numShapes12];10051006ParallelMath::UInt15 fragmentBestIndexes[BC7Data::g_numFragments];1007ParallelMath::UInt15 shapeBestEP[BC7Data::g_numShapesAll][2][4];1008ParallelMath::Float shapeBestError[BC7Data::g_numShapesAll];1009};1010}1011}10121013void cvtt::Internal::BC7Computer::TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2])1014{1015ParallelMath::RoundTowardNearestForScope roundingMode;10161017float tf[2];1018Util::ComputeTweakFactors(tweak, range, tf);10191020MFloat base = ParallelMath::ToFloat(original[0]);1021MFloat offs = ParallelMath::ToFloat(original[1]) - base;10221023result[0] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[0], 0.0f, 255.0f), &roundingMode);1024result[1] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[1], 0.0f, 255.0f), &roundingMode);1025}10261027void cvtt::Internal::BC7Computer::Quantize(MUInt15* color, int bits, int channels)1028{1029for (int ch = 0; ch < channels; ch++)1030color[ch] = ParallelMath::RightShift(((color[ch] << bits) - color[ch]) + ParallelMath::MakeUInt15(127 + (1 << (7 - bits))), 8);1031}10321033void cvtt::Internal::BC7Computer::QuantizeP(MUInt15* color, int bits, uint16_t p, int channels)1034{1035int16_t addend;1036if (p)1037addend = ((1 << (8 - bits)) - 1);1038else1039addend = 255;10401041for (int ch = 0; ch < channels; ch++)1042{1043MUInt16 ch16 = ParallelMath::LosslessCast<MUInt16>::Cast(color[ch]);1044ch16 = ParallelMath::RightShift((ch16 << (bits + 1)) - ch16 + addend, 9);1045ch16 = (ch16 << 1) | ParallelMath::MakeUInt16(p);1046color[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ch16);1047}1048}10491050void cvtt::Internal::BC7Computer::Unquantize(MUInt15* color, int bits, int channels)1051{1052for (int ch = 0; ch < channels; ch++)1053{1054MUInt15 clr = color[ch];1055clr = clr << (8 - bits);1056color[ch] = clr | ParallelMath::RightShift(clr, bits);1057}1058}10591060void cvtt::Internal::BC7Computer::CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2])1061{1062for (int j = 0; j < 2; j++)1063{1064QuantizeP(ep[j], 4, p[j], 3);1065Unquantize(ep[j], 5, 3);1066ep[j][3] = ParallelMath::MakeUInt15(255);1067}1068}10691070void cvtt::Internal::BC7Computer::CompressEndpoints1(MUInt15 ep[2][4], uint16_t p)1071{1072for (int j = 0; j < 2; j++)1073{1074QuantizeP(ep[j], 6, p, 3);1075Unquantize(ep[j], 7, 3);1076ep[j][3] = ParallelMath::MakeUInt15(255);1077}1078}10791080void cvtt::Internal::BC7Computer::CompressEndpoints2(MUInt15 ep[2][4])1081{1082for (int j = 0; j < 2; j++)1083{1084Quantize(ep[j], 5, 3);1085Unquantize(ep[j], 5, 3);1086ep[j][3] = ParallelMath::MakeUInt15(255);1087}1088}10891090void cvtt::Internal::BC7Computer::CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2])1091{1092for (int j = 0; j < 2; j++)1093{1094QuantizeP(ep[j], 7, p[j], 3);1095ep[j][3] = ParallelMath::MakeUInt15(255);1096}1097}10981099void cvtt::Internal::BC7Computer::CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2])1100{1101for (int j = 0; j < 2; j++)1102{1103Quantize(epRGB[j], 5, 3);1104Unquantize(epRGB[j], 5, 3);11051106Quantize(epA + j, 6, 1);1107Unquantize(epA + j, 6, 1);1108}1109}11101111void cvtt::Internal::BC7Computer::CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2])1112{1113for (int j = 0; j < 2; j++)1114{1115Quantize(epRGB[j], 7, 3);1116Unquantize(epRGB[j], 7, 3);1117}11181119// Alpha is full precision1120(void)epA;1121}11221123void cvtt::Internal::BC7Computer::CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2])1124{1125for (int j = 0; j < 2; j++)1126QuantizeP(ep[j], 7, p[j], 4);1127}11281129void cvtt::Internal::BC7Computer::CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2])1130{1131for (int j = 0; j < 2; j++)1132{1133QuantizeP(ep[j], 5, p[j], 4);1134Unquantize(ep[j], 6, 4);1135}1136}11371138void cvtt::Internal::BC7Computer::TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn)1139{1140MFloat bestAverageError = ParallelMath::MakeFloat(FLT_MAX);11411142MUInt15 intAverage[4];1143for (int ch = 0; ch < 4; ch++)1144intAverage[ch] = ParallelMath::RoundAndConvertToU15(average[ch], rtn);11451146MUInt15 eps[2][4];1147MUInt15 reconstructed[4];1148MUInt15 index = ParallelMath::MakeUInt15(0);11491150for (int epi = 0; epi < 2; epi++)1151{1152for (int ch = 0; ch < 3; ch++)1153eps[epi][ch] = ParallelMath::MakeUInt15(0);1154eps[epi][3] = ParallelMath::MakeUInt15(255);1155}11561157for (int ch = 0; ch < 3; ch++)1158reconstructed[ch] = ParallelMath::MakeUInt15(0);1159reconstructed[3] = ParallelMath::MakeUInt15(255);11601161// Depending on the target index and parity bits, there are multiple valid solid colors.1162// We want to find the one closest to the actual average.1163MFloat epsAverageDiff = ParallelMath::MakeFloat(FLT_MAX);1164for (int t = 0; t < numTables; t++)1165{1166const cvtt::Tables::BC7SC::Table& table = *(tables[t]);11671168ParallelMath::Int16CompFlag pti = punchThroughInvalid[table.m_pBits];11691170MUInt15 candidateReconstructed[4];1171MUInt15 candidateEPs[2][4];11721173for (int i = 0; i < ParallelMath::ParallelSize; i++)1174{1175for (int ch = 0; ch < numRealChannels; ch++)1176{1177ParallelMath::ScalarUInt16 avgValue = ParallelMath::Extract(intAverage[ch], i);1178assert(avgValue >= 0 && avgValue <= 255);11791180const cvtt::Tables::BC7SC::TableEntry &entry = table.m_entries[avgValue];11811182ParallelMath::PutUInt15(candidateEPs[0][ch], i, entry.m_min);1183ParallelMath::PutUInt15(candidateEPs[1][ch], i, entry.m_max);1184ParallelMath::PutUInt15(candidateReconstructed[ch], i, entry.m_actualColor);1185}1186}11871188MFloat avgError = ParallelMath::MakeFloatZero();1189for (int ch = 0; ch < numRealChannels; ch++)1190{1191MFloat delta = ParallelMath::ToFloat(candidateReconstructed[ch]) - average[ch];1192avgError = avgError + delta * delta * channelWeightsSq[ch];1193}11941195ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(avgError, bestAverageError));1196better = ParallelMath::AndNot(pti, better); // Mask out punch-through invalidations11971198if (ParallelMath::AnySet(better))1199{1200ParallelMath::ConditionalSet(bestAverageError, ParallelMath::Int16FlagToFloat(better), avgError);12011202MUInt15 candidateIndex = ParallelMath::MakeUInt15(table.m_index);12031204ParallelMath::ConditionalSet(index, better, candidateIndex);12051206for (int ch = 0; ch < numRealChannels; ch++)1207ParallelMath::ConditionalSet(reconstructed[ch], better, candidateReconstructed[ch]);12081209for (int epi = 0; epi < 2; epi++)1210for (int ch = 0; ch < numRealChannels; ch++)1211ParallelMath::ConditionalSet(eps[epi][ch], better, candidateEPs[epi][ch]);1212}1213}12141215AggregatedError<4> aggError;1216for (int pxi = 0; pxi < shapeLength; pxi++)1217{1218int px = fragmentStart[pxi];12191220BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);1221}12221223MFloat error = aggError.Finalize(flags, channelWeightsSq) + staticAlphaError;12241225ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, shapeBestError));1226if (ParallelMath::AnySet(better))1227{1228shapeBestError = ParallelMath::Min(shapeBestError, error);1229for (int epi = 0; epi < 2; epi++)1230{1231for (int ch = 0; ch < numRealChannels; ch++)1232ParallelMath::ConditionalSet(shapeBestEP[epi][ch], better, eps[epi][ch]);1233}12341235for (int pxi = 0; pxi < shapeLength; pxi++)1236ParallelMath::ConditionalSet(fragmentBestIndexes[pxi], better, index);1237}1238}12391240void cvtt::Internal::BC7Computer::TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)1241{1242if (numRefineRounds < 1)1243numRefineRounds = 1;12441245float channelWeightsSq[4];12461247for (int ch = 0; ch < 4; ch++)1248channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];12491250SinglePlaneTemporaries temps;12511252MUInt15 maxAlpha = ParallelMath::MakeUInt15(0);1253MUInt15 minAlpha = ParallelMath::MakeUInt15(255);1254ParallelMath::Int16CompFlag isPunchThrough = ParallelMath::MakeBoolInt16(true);1255for (int px = 0; px < 16; px++)1256{1257MUInt15 a = pixels[px][3];1258maxAlpha = ParallelMath::Max(maxAlpha, a);1259minAlpha = ParallelMath::Min(minAlpha, a);12601261isPunchThrough = (isPunchThrough & (ParallelMath::Equal(a, ParallelMath::MakeUInt15(0)) | ParallelMath::Equal(a, ParallelMath::MakeUInt15(255))));1262}12631264ParallelMath::Int16CompFlag blockHasNonMaxAlpha = ParallelMath::Less(minAlpha, ParallelMath::MakeUInt15(255));1265ParallelMath::Int16CompFlag blockHasNonZeroAlpha = ParallelMath::Less(ParallelMath::MakeUInt15(0), maxAlpha);12661267bool anyBlockHasAlpha = ParallelMath::AnySet(blockHasNonMaxAlpha);12681269// Try RGB modes if any block has a min alpha 251 or higher1270bool allowRGBModes = ParallelMath::AnySet(ParallelMath::Less(ParallelMath::MakeUInt15(250), minAlpha));12711272// Try mode 7 if any block has alpha.1273// Mode 7 is almost never selected for RGB blocks because mode 4 has very accurate 7.7.7.1 endpoints1274// and its parity bit doesn't affect alpha, meaning mode 7 can only be better in extremely specific1275// situations, and only by at most 1 unit of error per pixel.1276bool allowMode7 = anyBlockHasAlpha || (encodingPlan.mode7RGBPartitionEnabled != 0);12771278MFloat preWeightedPixels[16][4];12791280BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights);12811282// Get initial RGB endpoints1283if (allowRGBModes)1284{1285const uint8_t *shapeList = encodingPlan.rgbShapeList;1286int numShapesToEvaluate = encodingPlan.rgbNumShapesToEvaluate;12871288for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)1289{1290int shape = shapeList[shapeIter];12911292int shapeStart = BC7Data::g_shapeRanges[shape][0];1293int shapeSize = BC7Data::g_shapeRanges[shape][1];12941295EndpointSelector<3, 8> epSelector;12961297for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)1298{1299for (int spx = 0; spx < shapeSize; spx++)1300{1301int px = BC7Data::g_fragments[shapeStart + spx];1302epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));1303}1304epSelector.FinishPass(epPass);1305}1306temps.unfinishedRGB[shape] = epSelector.GetEndpoints(channelWeights);1307}1308}13091310// Get initial RGBA endpoints1311{1312const uint8_t *shapeList = encodingPlan.rgbaShapeList;1313int numShapesToEvaluate = encodingPlan.rgbaNumShapesToEvaluate;13141315for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)1316{1317int shape = shapeList[shapeIter];13181319if (anyBlockHasAlpha || !allowRGBModes)1320{1321int shapeStart = BC7Data::g_shapeRanges[shape][0];1322int shapeSize = BC7Data::g_shapeRanges[shape][1];13231324EndpointSelector<4, 8> epSelector;13251326for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)1327{1328for (int spx = 0; spx < shapeSize; spx++)1329{1330int px = BC7Data::g_fragments[shapeStart + spx];1331epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));1332}1333epSelector.FinishPass(epPass);1334}1335temps.unfinishedRGBA[shape] = epSelector.GetEndpoints(channelWeights);1336}1337else1338{1339temps.unfinishedRGBA[shape] = temps.unfinishedRGB[shape].ExpandTo<4>(255);1340}1341}1342}13431344for (uint16_t mode = 0; mode <= 7; mode++)1345{1346if (mode == 4 || mode == 5)1347continue;13481349if (mode < 4 && !allowRGBModes)1350continue;13511352if (mode == 7 && !allowMode7)1353continue;13541355uint64_t partitionEnabledBits = 0;1356switch (mode)1357{1358case 0:1359partitionEnabledBits = encodingPlan.mode0PartitionEnabled;1360break;1361case 1:1362partitionEnabledBits = encodingPlan.mode1PartitionEnabled;1363break;1364case 2:1365partitionEnabledBits = encodingPlan.mode2PartitionEnabled;1366break;1367case 3:1368partitionEnabledBits = encodingPlan.mode3PartitionEnabled;1369break;1370case 6:1371partitionEnabledBits = encodingPlan.mode6Enabled ? 1 : 0;1372break;1373case 7:1374if (anyBlockHasAlpha)1375partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled;1376else1377partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled;1378break;1379default:1380break;1381}13821383bool isRGB = (mode < 4);13841385unsigned int numPartitions = 1 << BC7Data::g_modes[mode].m_partitionBits;1386int numSubsets = BC7Data::g_modes[mode].m_numSubsets;1387int indexPrec = BC7Data::g_modes[mode].m_indexBits;13881389int parityBitMax = 1;1390if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerEndpoint)1391parityBitMax = 4;1392else if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerSubset)1393parityBitMax = 2;13941395int numRealChannels = isRGB ? 3 : 4;13961397int numShapes;1398const int *shapeList;13991400if (numSubsets == 1)1401{1402numShapes = BC7Data::g_numShapes1;1403shapeList = BC7Data::g_shapeList1;1404}1405else if (numSubsets == 2)1406{1407numShapes = BC7Data::g_numShapes2;1408shapeList = BC7Data::g_shapeList2;1409}1410else1411{1412assert(numSubsets == 3);1413if (numPartitions == 16)1414{1415numShapes = BC7Data::g_numShapes3Short;1416shapeList = BC7Data::g_shapeList3Short;1417}1418else1419{1420assert(numPartitions == 64);1421numShapes = BC7Data::g_numShapes3;1422shapeList = BC7Data::g_shapeList3;1423}1424}14251426for (int slot = 0; slot < BC7Data::g_numShapesAll; slot++)1427temps.shapeBestError[slot] = ParallelMath::MakeFloat(FLT_MAX);14281429for (int shapeIter = 0; shapeIter < numShapes; shapeIter++)1430{1431int shape = shapeList[shapeIter];14321433int numTweakRounds = 0;1434if (isRGB)1435numTweakRounds = encodingPlan.seedPointsForShapeRGB[shape];1436else1437numTweakRounds = encodingPlan.seedPointsForShapeRGBA[shape];14381439if (numTweakRounds == 0)1440continue;14411442if (numTweakRounds > MaxTweakRounds)1443numTweakRounds = MaxTweakRounds;14441445int shapeStart = BC7Data::g_shapeRanges[shape][0];1446int shapeLength = BC7Data::g_shapeRanges[shape][1];14471448AggregatedError<1> alphaAggError;1449if (isRGB && anyBlockHasAlpha)1450{1451MUInt15 filledAlpha[1] = { ParallelMath::MakeUInt15(255) };14521453for (int pxi = 0; pxi < shapeLength; pxi++)1454{1455int px = BC7Data::g_fragments[shapeStart + pxi];1456MUInt15 original[1] = { pixels[px][3] };1457BCCommon::ComputeErrorLDR<1>(flags, filledAlpha, original, alphaAggError);1458}1459}14601461float alphaWeightsSq[1] = { channelWeightsSq[3] };1462MFloat staticAlphaError = alphaAggError.Finalize(flags, alphaWeightsSq);14631464MUInt15 tweakBaseEP[MaxTweakRounds][2][4];14651466for (int tweak = 0; tweak < numTweakRounds; tweak++)1467{1468if (isRGB)1469{1470temps.unfinishedRGB[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);1471tweakBaseEP[tweak][0][3] = tweakBaseEP[tweak][1][3] = ParallelMath::MakeUInt15(255);1472}1473else1474{1475temps.unfinishedRGBA[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);1476}1477}14781479ParallelMath::Int16CompFlag punchThroughInvalid[4];1480for (int pIter = 0; pIter < parityBitMax; pIter++)1481{1482punchThroughInvalid[pIter] = ParallelMath::MakeBoolInt16(false);14831484if ((flags & Flags::BC7_RespectPunchThrough) && (mode == 6 || mode == 7))1485{1486// Modes 6 and 7 have parity bits that affect alpha1487if (pIter == 0)1488punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonZeroAlpha);1489else if (pIter == parityBitMax - 1)1490punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonMaxAlpha);1491else1492punchThroughInvalid[pIter] = isPunchThrough;1493}1494}14951496for (int pIter = 0; pIter < parityBitMax; pIter++)1497{1498if (ParallelMath::AllSet(punchThroughInvalid[pIter]))1499continue;15001501bool needPunchThroughCheck = ParallelMath::AnySet(punchThroughInvalid[pIter]);15021503for (int tweak = 0; tweak < numTweakRounds; tweak++)1504{1505uint16_t p[2];1506p[0] = (pIter & 1);1507p[1] = ((pIter >> 1) & 1);15081509MUInt15 ep[2][4];15101511for (int epi = 0; epi < 2; epi++)1512for (int ch = 0; ch < 4; ch++)1513ep[epi][ch] = tweakBaseEP[tweak][epi][ch];15141515for (int refine = 0; refine < numRefineRounds; refine++)1516{1517switch (mode)1518{1519case 0:1520CompressEndpoints0(ep, p);1521break;1522case 1:1523CompressEndpoints1(ep, p[0]);1524break;1525case 2:1526CompressEndpoints2(ep);1527break;1528case 3:1529CompressEndpoints3(ep, p);1530break;1531case 6:1532CompressEndpoints6(ep, p);1533break;1534case 7:1535CompressEndpoints7(ep, p);1536break;1537default:1538assert(false);1539break;1540};15411542MFloat shapeError = ParallelMath::MakeFloatZero();15431544IndexSelector<4> indexSelector;1545indexSelector.Init<false>(channelWeights, ep, 1 << indexPrec);15461547EndpointRefiner<4> epRefiner;1548epRefiner.Init(1 << indexPrec, channelWeights);15491550MUInt15 indexes[16];15511552AggregatedError<4> aggError;1553for (int pxi = 0; pxi < shapeLength; pxi++)1554{1555int px = BC7Data::g_fragments[shapeStart + pxi];15561557MUInt15 index;1558MUInt15 reconstructed[4];15591560index = indexSelector.SelectIndexLDR(floatPixels[px], rtn);1561indexSelector.ReconstructLDR_BC7(index, reconstructed, numRealChannels);15621563if (flags & cvtt::Flags::BC7_FastIndexing)1564BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);1565else1566{1567MFloat error = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);15681569MUInt15 altIndexes[2];1570altIndexes[0] = ParallelMath::Max(index, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);1571altIndexes[1] = ParallelMath::Min(index + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << indexPrec) - 1)));15721573for (int ii = 0; ii < 2; ii++)1574{1575indexSelector.ReconstructLDR_BC7(altIndexes[ii], reconstructed, numRealChannels);15761577MFloat altError = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);1578ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altError, error));1579error = ParallelMath::Min(error, altError);1580ParallelMath::ConditionalSet(index, better, altIndexes[ii]);1581}15821583shapeError = shapeError + error;1584}15851586if (refine != numRefineRounds - 1)1587epRefiner.ContributeUnweightedPW(preWeightedPixels[px], index, numRealChannels);15881589indexes[pxi] = index;1590}15911592if (flags & cvtt::Flags::BC7_FastIndexing)1593shapeError = aggError.Finalize(flags, channelWeightsSq);15941595if (isRGB)1596shapeError = shapeError + staticAlphaError;15971598ParallelMath::FloatCompFlag shapeErrorBetter;1599ParallelMath::Int16CompFlag shapeErrorBetter16;16001601shapeErrorBetter = ParallelMath::Less(shapeError, temps.shapeBestError[shape]);1602shapeErrorBetter16 = ParallelMath::FloatFlagToInt16(shapeErrorBetter);16031604if (ParallelMath::AnySet(shapeErrorBetter16))1605{1606bool punchThroughOK = true;1607if (needPunchThroughCheck)1608{1609shapeErrorBetter16 = ParallelMath::AndNot(punchThroughInvalid[pIter], shapeErrorBetter16);1610shapeErrorBetter = ParallelMath::Int16FlagToFloat(shapeErrorBetter16);16111612if (!ParallelMath::AnySet(shapeErrorBetter16))1613punchThroughOK = false;1614}16151616if (punchThroughOK)1617{1618ParallelMath::ConditionalSet(temps.shapeBestError[shape], shapeErrorBetter, shapeError);1619for (int epi = 0; epi < 2; epi++)1620for (int ch = 0; ch < numRealChannels; ch++)1621ParallelMath::ConditionalSet(temps.shapeBestEP[shape][epi][ch], shapeErrorBetter16, ep[epi][ch]);16221623for (int pxi = 0; pxi < shapeLength; pxi++)1624ParallelMath::ConditionalSet(temps.fragmentBestIndexes[shapeStart + pxi], shapeErrorBetter16, indexes[pxi]);1625}1626}16271628if (refine != numRefineRounds - 1)1629epRefiner.GetRefinedEndpointsLDR(ep, numRealChannels, rtn);1630} // refine1631} // tweak1632} // p16331634if (flags & cvtt::Flags::BC7_TrySingleColor)1635{1636MUInt15 total[4];1637for (int ch = 0; ch < 4; ch++)1638total[ch] = ParallelMath::MakeUInt15(0);16391640for (int pxi = 0; pxi < shapeLength; pxi++)1641{1642int px = BC7Data::g_fragments[shapeStart + pxi];1643for (int ch = 0; ch < 4; ch++)1644total[ch] = total[ch] + pixels[pxi][ch];1645}16461647MFloat rcpShapeLength = ParallelMath::MakeFloat(1.0f / static_cast<float>(shapeLength));1648MFloat average[4];1649for (int ch = 0; ch < 4; ch++)1650average[ch] = ParallelMath::ToFloat(total[ch]) * rcpShapeLength;16511652const uint8_t *fragment = BC7Data::g_fragments + shapeStart;1653MFloat &shapeBestError = temps.shapeBestError[shape];1654MUInt15 (&shapeBestEP)[2][4] = temps.shapeBestEP[shape];1655MUInt15 *fragmentBestIndexes = temps.fragmentBestIndexes + shapeStart;16561657const cvtt::Tables::BC7SC::Table **scTables = NULL;1658int numSCTables = 0;16591660const cvtt::Tables::BC7SC::Table *tables0[] =1661{1662&cvtt::Tables::BC7SC::g_mode0_p00_i1,1663&cvtt::Tables::BC7SC::g_mode0_p00_i2,1664&cvtt::Tables::BC7SC::g_mode0_p00_i3,1665&cvtt::Tables::BC7SC::g_mode0_p01_i1,1666&cvtt::Tables::BC7SC::g_mode0_p01_i2,1667&cvtt::Tables::BC7SC::g_mode0_p01_i3,1668&cvtt::Tables::BC7SC::g_mode0_p10_i1,1669&cvtt::Tables::BC7SC::g_mode0_p10_i2,1670&cvtt::Tables::BC7SC::g_mode0_p10_i3,1671&cvtt::Tables::BC7SC::g_mode0_p11_i1,1672&cvtt::Tables::BC7SC::g_mode0_p11_i2,1673&cvtt::Tables::BC7SC::g_mode0_p11_i3,1674};16751676const cvtt::Tables::BC7SC::Table *tables1[] =1677{1678&cvtt::Tables::BC7SC::g_mode1_p0_i1,1679&cvtt::Tables::BC7SC::g_mode1_p0_i2,1680&cvtt::Tables::BC7SC::g_mode1_p0_i3,1681&cvtt::Tables::BC7SC::g_mode1_p1_i1,1682&cvtt::Tables::BC7SC::g_mode1_p1_i2,1683&cvtt::Tables::BC7SC::g_mode1_p1_i3,1684};16851686const cvtt::Tables::BC7SC::Table *tables2[] =1687{1688&cvtt::Tables::BC7SC::g_mode2,1689};16901691const cvtt::Tables::BC7SC::Table *tables3[] =1692{1693&cvtt::Tables::BC7SC::g_mode3_p0,1694&cvtt::Tables::BC7SC::g_mode3_p1,1695};16961697const cvtt::Tables::BC7SC::Table *tables6[] =1698{1699&cvtt::Tables::BC7SC::g_mode6_p0_i1,1700&cvtt::Tables::BC7SC::g_mode6_p0_i2,1701&cvtt::Tables::BC7SC::g_mode6_p0_i3,1702&cvtt::Tables::BC7SC::g_mode6_p0_i4,1703&cvtt::Tables::BC7SC::g_mode6_p0_i5,1704&cvtt::Tables::BC7SC::g_mode6_p0_i6,1705&cvtt::Tables::BC7SC::g_mode6_p0_i7,1706&cvtt::Tables::BC7SC::g_mode6_p1_i1,1707&cvtt::Tables::BC7SC::g_mode6_p1_i2,1708&cvtt::Tables::BC7SC::g_mode6_p1_i3,1709&cvtt::Tables::BC7SC::g_mode6_p1_i4,1710&cvtt::Tables::BC7SC::g_mode6_p1_i5,1711&cvtt::Tables::BC7SC::g_mode6_p1_i6,1712&cvtt::Tables::BC7SC::g_mode6_p1_i7,1713};17141715const cvtt::Tables::BC7SC::Table *tables7[] =1716{1717&cvtt::Tables::BC7SC::g_mode7_p00,1718&cvtt::Tables::BC7SC::g_mode7_p01,1719&cvtt::Tables::BC7SC::g_mode7_p10,1720&cvtt::Tables::BC7SC::g_mode7_p11,1721};17221723switch (mode)1724{1725case 0:1726{1727scTables = tables0;1728numSCTables = sizeof(tables0) / sizeof(tables0[0]);1729}1730break;1731case 1:1732{1733scTables = tables1;1734numSCTables = sizeof(tables1) / sizeof(tables1[0]);1735}1736break;1737case 2:1738{17391740scTables = tables2;1741numSCTables = sizeof(tables2) / sizeof(tables2[0]);1742}1743break;1744case 3:1745{1746scTables = tables3;1747numSCTables = sizeof(tables3) / sizeof(tables3[0]);1748}1749break;1750case 6:1751{1752scTables = tables6;1753numSCTables = sizeof(tables6) / sizeof(tables6[0]);1754}1755break;1756case 7:1757{1758scTables = tables7;1759numSCTables = sizeof(tables7) / sizeof(tables7[0]);1760}1761break;1762default:1763assert(false);1764break;1765}17661767TrySingleColorRGBAMultiTable(flags, pixels, average, numRealChannels, fragment, shapeLength, staticAlphaError, punchThroughInvalid, shapeBestError, shapeBestEP, fragmentBestIndexes, channelWeightsSq, scTables, numSCTables, rtn);1768}1769} // shapeIter17701771uint64_t partitionsEnabledBits = 0xffffffffffffffffULL;17721773switch (mode)1774{1775case 0:1776partitionsEnabledBits = encodingPlan.mode0PartitionEnabled;1777break;1778case 1:1779partitionsEnabledBits = encodingPlan.mode1PartitionEnabled;1780break;1781case 2:1782partitionsEnabledBits = encodingPlan.mode2PartitionEnabled;1783break;1784case 3:1785partitionsEnabledBits = encodingPlan.mode3PartitionEnabled;1786break;1787case 6:1788partitionsEnabledBits = encodingPlan.mode6Enabled ? 1 : 0;1789break;1790case 7:1791if (anyBlockHasAlpha)1792partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled;1793else1794partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled;1795break;1796default:1797break;1798};17991800for (uint16_t partition = 0; partition < numPartitions; partition++)1801{1802if (((partitionsEnabledBits >> partition) & 1) == 0)1803continue;18041805const int *partitionShapes;1806if (numSubsets == 1)1807partitionShapes = BC7Data::g_shapes1[partition];1808else if (numSubsets == 2)1809partitionShapes = BC7Data::g_shapes2[partition];1810else1811{1812assert(numSubsets == 3);1813partitionShapes = BC7Data::g_shapes3[partition];1814}18151816MFloat totalError = ParallelMath::MakeFloatZero();1817for (int subset = 0; subset < numSubsets; subset++)1818totalError = totalError + temps.shapeBestError[partitionShapes[subset]];18191820ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(totalError, work.m_error);1821ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);18221823if (mode == 7 && anyBlockHasAlpha)1824{1825// Some lanes could be better, but we filter them out to ensure consistency with scalar1826bool isRGBAllowedForThisPartition = (((encodingPlan.mode7RGBPartitionEnabled >> partition) & 1) != 0);18271828if (!isRGBAllowedForThisPartition)1829{1830errorBetter16 = (errorBetter16 & blockHasNonMaxAlpha);1831errorBetter = ParallelMath::Int16FlagToFloat(errorBetter16);1832}1833}18341835if (ParallelMath::AnySet(errorBetter16))1836{1837for (int subset = 0; subset < numSubsets; subset++)1838{1839int shape = partitionShapes[subset];1840int shapeStart = BC7Data::g_shapeRanges[shape][0];1841int shapeLength = BC7Data::g_shapeRanges[shape][1];18421843for (int epi = 0; epi < 2; epi++)1844for (int ch = 0; ch < 4; ch++)1845ParallelMath::ConditionalSet(work.m_ep[subset][epi][ch], errorBetter16, temps.shapeBestEP[shape][epi][ch]);18461847for (int pxi = 0; pxi < shapeLength; pxi++)1848{1849int px = BC7Data::g_fragments[shapeStart + pxi];1850ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, temps.fragmentBestIndexes[shapeStart + pxi]);1851}1852}18531854ParallelMath::ConditionalSet(work.m_error, errorBetter, totalError);1855ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));1856ParallelMath::ConditionalSet(work.m_u.m_partition, errorBetter16, ParallelMath::MakeUInt15(partition));1857}1858}1859}1860}18611862void cvtt::Internal::BC7Computer::TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)1863{1864// TODO: These error calculations are not optimal for weight-by-alpha, but this routine needs to be mostly rewritten for that.1865// The alpha/color solutions are co-dependent in that case, but a good way to solve it would probably be to1866// solve the alpha channel first, then solve the RGB channels, which in turn breaks down into two cases:1867// - Separate alpha channel, then weighted RGB1868// - Alpha+2 other channels, then the independent channel1869if (numRefineRounds < 1)1870numRefineRounds = 1;18711872float channelWeightsSq[4];1873for (int ch = 0; ch < 4; ch++)1874channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];18751876for (uint16_t mode = 4; mode <= 5; mode++)1877{1878int numSP[2] = { 0, 0 };18791880for (uint16_t rotation = 0; rotation < 4; rotation++)1881{1882if (mode == 4)1883{1884numSP[0] = encodingPlan.mode4SP[rotation][0];1885numSP[1] = encodingPlan.mode4SP[rotation][1];1886}1887else1888numSP[0] = numSP[1] = encodingPlan.mode5SP[rotation];18891890if (numSP[0] == 0 && numSP[1] == 0)1891continue;18921893int alphaChannel = (rotation + 3) & 3;1894int redChannel = (rotation == 1) ? 3 : 0;1895int greenChannel = (rotation == 2) ? 3 : 1;1896int blueChannel = (rotation == 3) ? 3 : 2;18971898MUInt15 rotatedRGB[16][3];1899MFloat floatRotatedRGB[16][3];19001901for (int px = 0; px < 16; px++)1902{1903rotatedRGB[px][0] = pixels[px][redChannel];1904rotatedRGB[px][1] = pixels[px][greenChannel];1905rotatedRGB[px][2] = pixels[px][blueChannel];19061907for (int ch = 0; ch < 3; ch++)1908floatRotatedRGB[px][ch] = ParallelMath::ToFloat(rotatedRGB[px][ch]);1909}19101911uint16_t maxIndexSelector = (mode == 4) ? 2 : 1;19121913float rotatedRGBWeights[3] = { channelWeights[redChannel], channelWeights[greenChannel], channelWeights[blueChannel] };1914float rotatedRGBWeightsSq[3] = { channelWeightsSq[redChannel], channelWeightsSq[greenChannel], channelWeightsSq[blueChannel] };1915float rotatedAlphaWeight[1] = { channelWeights[alphaChannel] };1916float rotatedAlphaWeightSq[1] = { channelWeightsSq[alphaChannel] };19171918float uniformWeight[1] = { 1.0f }; // Since the alpha channel is independent, there's no need to bother with weights when doing refinement or selection, only error19191920MFloat preWeightedRotatedRGB[16][3];1921BCCommon::PreWeightPixelsLDR<3>(preWeightedRotatedRGB, rotatedRGB, rotatedRGBWeights);19221923for (uint16_t indexSelector = 0; indexSelector < maxIndexSelector; indexSelector++)1924{1925int numTweakRounds = numSP[indexSelector];19261927if (numTweakRounds <= 0)1928continue;19291930if (numTweakRounds > MaxTweakRounds)1931numTweakRounds = MaxTweakRounds;19321933EndpointSelector<3, 8> rgbSelector;19341935for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)1936{1937for (int px = 0; px < 16; px++)1938rgbSelector.ContributePass(preWeightedRotatedRGB[px], epPass, ParallelMath::MakeFloat(1.0f));19391940rgbSelector.FinishPass(epPass);1941}19421943MUInt15 alphaRange[2];19441945alphaRange[0] = alphaRange[1] = pixels[0][alphaChannel];1946for (int px = 1; px < 16; px++)1947{1948alphaRange[0] = ParallelMath::Min(pixels[px][alphaChannel], alphaRange[0]);1949alphaRange[1] = ParallelMath::Max(pixels[px][alphaChannel], alphaRange[1]);1950}19511952int rgbPrec = 0;1953int alphaPrec = 0;19541955if (mode == 4)1956{1957rgbPrec = indexSelector ? 3 : 2;1958alphaPrec = indexSelector ? 2 : 3;1959}1960else1961rgbPrec = alphaPrec = 2;19621963UnfinishedEndpoints<3> unfinishedRGB = rgbSelector.GetEndpoints(rotatedRGBWeights);19641965MFloat bestRGBError = ParallelMath::MakeFloat(FLT_MAX);1966MFloat bestAlphaError = ParallelMath::MakeFloat(FLT_MAX);19671968MUInt15 bestRGBIndexes[16];1969MUInt15 bestAlphaIndexes[16];1970MUInt15 bestEP[2][4];19711972for (int px = 0; px < 16; px++)1973bestRGBIndexes[px] = bestAlphaIndexes[px] = ParallelMath::MakeUInt15(0);19741975for (int tweak = 0; tweak < numTweakRounds; tweak++)1976{1977MUInt15 rgbEP[2][3];1978MUInt15 alphaEP[2];19791980unfinishedRGB.FinishLDR(tweak, 1 << rgbPrec, rgbEP[0], rgbEP[1]);19811982TweakAlpha(alphaRange, tweak, 1 << alphaPrec, alphaEP);19831984for (int refine = 0; refine < numRefineRounds; refine++)1985{1986if (mode == 4)1987CompressEndpoints4(rgbEP, alphaEP);1988else1989CompressEndpoints5(rgbEP, alphaEP);199019911992IndexSelector<1> alphaIndexSelector;1993IndexSelector<3> rgbIndexSelector;19941995{1996MUInt15 alphaEPTemp[2][1] = { { alphaEP[0] },{ alphaEP[1] } };1997alphaIndexSelector.Init<false>(uniformWeight, alphaEPTemp, 1 << alphaPrec);1998}1999rgbIndexSelector.Init<false>(rotatedRGBWeights, rgbEP, 1 << rgbPrec);20002001EndpointRefiner<3> rgbRefiner;2002EndpointRefiner<1> alphaRefiner;20032004rgbRefiner.Init(1 << rgbPrec, rotatedRGBWeights);2005alphaRefiner.Init(1 << alphaPrec, uniformWeight);20062007MFloat errorRGB = ParallelMath::MakeFloatZero();2008MFloat errorA = ParallelMath::MakeFloatZero();20092010MUInt15 rgbIndexes[16];2011MUInt15 alphaIndexes[16];20122013AggregatedError<3> rgbAggError;2014AggregatedError<1> alphaAggError;20152016for (int px = 0; px < 16; px++)2017{2018MUInt15 rgbIndex = rgbIndexSelector.SelectIndexLDR(floatRotatedRGB[px], rtn);2019MUInt15 alphaIndex = alphaIndexSelector.SelectIndexLDR(floatPixels[px] + alphaChannel, rtn);20202021MUInt15 reconstructedRGB[3];2022MUInt15 reconstructedAlpha[1];20232024rgbIndexSelector.ReconstructLDR_BC7(rgbIndex, reconstructedRGB);2025alphaIndexSelector.ReconstructLDR_BC7(alphaIndex, reconstructedAlpha);20262027if (flags & cvtt::Flags::BC7_FastIndexing)2028{2029BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], rgbAggError);2030BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, alphaAggError);2031}2032else2033{2034AggregatedError<3> baseRGBAggError;2035AggregatedError<1> baseAlphaAggError;20362037BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], baseRGBAggError);2038BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, baseAlphaAggError);20392040MFloat rgbError = baseRGBAggError.Finalize(flags, rotatedRGBWeightsSq);2041MFloat alphaError = baseAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);20422043MUInt15 altRGBIndexes[2];2044MUInt15 altAlphaIndexes[2];20452046altRGBIndexes[0] = ParallelMath::Max(rgbIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);2047altRGBIndexes[1] = ParallelMath::Min(rgbIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << rgbPrec) - 1)));20482049altAlphaIndexes[0] = ParallelMath::Max(alphaIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);2050altAlphaIndexes[1] = ParallelMath::Min(alphaIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << alphaPrec) - 1)));20512052for (int ii = 0; ii < 2; ii++)2053{2054rgbIndexSelector.ReconstructLDR_BC7(altRGBIndexes[ii], reconstructedRGB);2055alphaIndexSelector.ReconstructLDR_BC7(altAlphaIndexes[ii], reconstructedAlpha);20562057AggregatedError<3> altRGBAggError;2058AggregatedError<1> altAlphaAggError;20592060BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], altRGBAggError);2061BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, altAlphaAggError);20622063MFloat altRGBError = altRGBAggError.Finalize(flags, rotatedRGBWeightsSq);2064MFloat altAlphaError = altAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);20652066ParallelMath::Int16CompFlag rgbBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altRGBError, rgbError));2067ParallelMath::Int16CompFlag alphaBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altAlphaError, alphaError));20682069rgbError = ParallelMath::Min(altRGBError, rgbError);2070alphaError = ParallelMath::Min(altAlphaError, alphaError);20712072ParallelMath::ConditionalSet(rgbIndex, rgbBetter, altRGBIndexes[ii]);2073ParallelMath::ConditionalSet(alphaIndex, alphaBetter, altAlphaIndexes[ii]);2074}20752076errorRGB = errorRGB + rgbError;2077errorA = errorA + alphaError;2078}20792080if (refine != numRefineRounds - 1)2081{2082rgbRefiner.ContributeUnweightedPW(preWeightedRotatedRGB[px], rgbIndex);2083alphaRefiner.ContributeUnweightedPW(floatPixels[px] + alphaChannel, alphaIndex);2084}20852086if (flags & Flags::BC7_FastIndexing)2087{2088errorRGB = rgbAggError.Finalize(flags, rotatedRGBWeightsSq);2089errorA = alphaAggError.Finalize(flags, rotatedAlphaWeightSq);2090}20912092rgbIndexes[px] = rgbIndex;2093alphaIndexes[px] = alphaIndex;2094}20952096ParallelMath::FloatCompFlag rgbBetter = ParallelMath::Less(errorRGB, bestRGBError);2097ParallelMath::FloatCompFlag alphaBetter = ParallelMath::Less(errorA, bestAlphaError);20982099ParallelMath::Int16CompFlag rgbBetterInt16 = ParallelMath::FloatFlagToInt16(rgbBetter);2100ParallelMath::Int16CompFlag alphaBetterInt16 = ParallelMath::FloatFlagToInt16(alphaBetter);21012102if (ParallelMath::AnySet(rgbBetterInt16))2103{2104bestRGBError = ParallelMath::Min(errorRGB, bestRGBError);21052106for (int px = 0; px < 16; px++)2107ParallelMath::ConditionalSet(bestRGBIndexes[px], rgbBetterInt16, rgbIndexes[px]);21082109for (int ep = 0; ep < 2; ep++)2110{2111for (int ch = 0; ch < 3; ch++)2112ParallelMath::ConditionalSet(bestEP[ep][ch], rgbBetterInt16, rgbEP[ep][ch]);2113}2114}21152116if (ParallelMath::AnySet(alphaBetterInt16))2117{2118bestAlphaError = ParallelMath::Min(errorA, bestAlphaError);21192120for (int px = 0; px < 16; px++)2121ParallelMath::ConditionalSet(bestAlphaIndexes[px], alphaBetterInt16, alphaIndexes[px]);21222123for (int ep = 0; ep < 2; ep++)2124ParallelMath::ConditionalSet(bestEP[ep][3], alphaBetterInt16, alphaEP[ep]);2125}21262127if (refine != numRefineRounds - 1)2128{2129rgbRefiner.GetRefinedEndpointsLDR(rgbEP, rtn);21302131MUInt15 alphaEPTemp[2][1];2132alphaRefiner.GetRefinedEndpointsLDR(alphaEPTemp, rtn);21332134for (int i = 0; i < 2; i++)2135alphaEP[i] = alphaEPTemp[i][0];2136}2137} // refine2138} // tweak21392140MFloat combinedError = bestRGBError + bestAlphaError;21412142ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, work.m_error);2143ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);21442145work.m_error = ParallelMath::Min(combinedError, work.m_error);21462147ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));2148ParallelMath::ConditionalSet(work.m_u.m_isr.m_rotation, errorBetter16, ParallelMath::MakeUInt15(rotation));2149ParallelMath::ConditionalSet(work.m_u.m_isr.m_indexSelector, errorBetter16, ParallelMath::MakeUInt15(indexSelector));21502151for (int px = 0; px < 16; px++)2152{2153ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, indexSelector ? bestAlphaIndexes[px] : bestRGBIndexes[px]);2154ParallelMath::ConditionalSet(work.m_indexes2[px], errorBetter16, indexSelector ? bestRGBIndexes[px] : bestAlphaIndexes[px]);2155}21562157for (int ep = 0; ep < 2; ep++)2158for (int ch = 0; ch < 4; ch++)2159ParallelMath::ConditionalSet(work.m_ep[0][ep][ch], errorBetter16, bestEP[ep][ch]);2160}2161}2162}2163}21642165template<class T>2166void cvtt::Internal::BC7Computer::Swap(T& a, T& b)2167{2168T temp = a;2169a = b;2170b = temp;2171}21722173void cvtt::Internal::BC7Computer::Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds)2174{2175MUInt15 pixels[16][4];2176MFloat floatPixels[16][4];21772178for (int px = 0; px < 16; px++)2179{2180for (int ch = 0; ch < 4; ch++)2181ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]);2182}21832184for (int px = 0; px < 16; px++)2185{2186for (int ch = 0; ch < 4; ch++)2187floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]);2188}21892190BC67::WorkInfo work;2191memset(&work, 0, sizeof(work));21922193work.m_error = ParallelMath::MakeFloat(FLT_MAX);21942195{2196ParallelMath::RoundTowardNearestForScope rtn;2197TrySinglePlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn);2198TryDualPlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn);2199}22002201for (int block = 0; block < ParallelMath::ParallelSize; block++)2202{2203PackingVector pv;2204pv.Init();22052206ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(work.m_mode, block);2207ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(work.m_u.m_partition, block);2208ParallelMath::ScalarUInt16 indexSelector = ParallelMath::Extract(work.m_u.m_isr.m_indexSelector, block);22092210const BC7Data::BC7ModeInfo& modeInfo = BC7Data::g_modes[mode];22112212ParallelMath::ScalarUInt16 indexes[16];2213ParallelMath::ScalarUInt16 indexes2[16];2214ParallelMath::ScalarUInt16 endPoints[3][2][4];22152216for (int i = 0; i < 16; i++)2217{2218indexes[i] = ParallelMath::Extract(work.m_indexes[i], block);2219if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)2220indexes2[i] = ParallelMath::Extract(work.m_indexes2[i], block);2221}22222223for (int subset = 0; subset < 3; subset++)2224{2225for (int ep = 0; ep < 2; ep++)2226{2227for (int ch = 0; ch < 4; ch++)2228endPoints[subset][ep][ch] = ParallelMath::Extract(work.m_ep[subset][ep][ch], block);2229}2230}22312232int fixups[3] = { 0, 0, 0 };22332234if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)2235{2236bool flipRGB = ((indexes[0] & (1 << (modeInfo.m_indexBits - 1))) != 0);2237bool flipAlpha = ((indexes2[0] & (1 << (modeInfo.m_alphaIndexBits - 1))) != 0);22382239if (flipRGB)2240{2241uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;2242for (int px = 0; px < 16; px++)2243indexes[px] = highIndex - indexes[px];2244}22452246if (flipAlpha)2247{2248uint16_t highIndex = (1 << modeInfo.m_alphaIndexBits) - 1;2249for (int px = 0; px < 16; px++)2250indexes2[px] = highIndex - indexes2[px];2251}22522253if (indexSelector)2254Swap(flipRGB, flipAlpha);22552256if (flipRGB)2257{2258for (int ch = 0; ch < 3; ch++)2259Swap(endPoints[0][0][ch], endPoints[0][1][ch]);2260}2261if (flipAlpha)2262Swap(endPoints[0][0][3], endPoints[0][1][3]);22632264}2265else2266{2267if (modeInfo.m_numSubsets == 2)2268fixups[1] = BC7Data::g_fixupIndexes2[partition];2269else if (modeInfo.m_numSubsets == 3)2270{2271fixups[1] = BC7Data::g_fixupIndexes3[partition][0];2272fixups[2] = BC7Data::g_fixupIndexes3[partition][1];2273}22742275bool flip[3] = { false, false, false };2276for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2277flip[subset] = ((indexes[fixups[subset]] & (1 << (modeInfo.m_indexBits - 1))) != 0);22782279if (flip[0] || flip[1] || flip[2])2280{2281uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;2282for (int px = 0; px < 16; px++)2283{2284int subset = 0;2285if (modeInfo.m_numSubsets == 2)2286subset = (BC7Data::g_partitionMap[partition] >> px) & 1;2287else if (modeInfo.m_numSubsets == 3)2288subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;22892290if (flip[subset])2291indexes[px] = highIndex - indexes[px];2292}22932294int maxCH = (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) ? 4 : 3;2295for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2296{2297if (flip[subset])2298for (int ch = 0; ch < maxCH; ch++)2299Swap(endPoints[subset][0][ch], endPoints[subset][1][ch]);2300}2301}2302}23032304pv.Pack(static_cast<uint8_t>(1 << mode), mode + 1);23052306if (modeInfo.m_partitionBits)2307pv.Pack(partition, modeInfo.m_partitionBits);23082309if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)2310{2311ParallelMath::ScalarUInt16 rotation = ParallelMath::Extract(work.m_u.m_isr.m_rotation, block);2312pv.Pack(rotation, 2);2313}23142315if (modeInfo.m_hasIndexSelector)2316pv.Pack(indexSelector, 1);23172318// Encode RGB2319for (int ch = 0; ch < 3; ch++)2320{2321for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2322{2323for (int ep = 0; ep < 2; ep++)2324{2325ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][ch];2326epPart >>= (8 - modeInfo.m_rgbBits);23272328pv.Pack(epPart, modeInfo.m_rgbBits);2329}2330}2331}23322333// Encode alpha2334if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)2335{2336for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2337{2338for (int ep = 0; ep < 2; ep++)2339{2340ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][3];2341epPart >>= (8 - modeInfo.m_alphaBits);23422343pv.Pack(epPart, modeInfo.m_alphaBits);2344}2345}2346}23472348// Encode parity bits2349if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)2350{2351for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2352{2353ParallelMath::ScalarUInt16 epPart = endPoints[subset][0][0];2354epPart >>= (7 - modeInfo.m_rgbBits);2355epPart &= 1;23562357pv.Pack(epPart, 1);2358}2359}2360else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)2361{2362for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2363{2364for (int ep = 0; ep < 2; ep++)2365{2366ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][0];2367epPart >>= (7 - modeInfo.m_rgbBits);2368epPart &= 1;23692370pv.Pack(epPart, 1);2371}2372}2373}23742375// Encode indexes2376for (int px = 0; px < 16; px++)2377{2378int bits = modeInfo.m_indexBits;2379if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))2380bits--;23812382pv.Pack(indexes[px], bits);2383}23842385// Encode secondary indexes2386if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)2387{2388for (int px = 0; px < 16; px++)2389{2390int bits = modeInfo.m_alphaIndexBits;2391if (px == 0)2392bits--;23932394pv.Pack(indexes2[px], bits);2395}2396}23972398pv.Flush(packedBlocks);23992400packedBlocks += 16;2401}2402}24032404void cvtt::Internal::BC7Computer::UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock)2405{2406UnpackingVector pv;2407pv.Init(packedBlock);24082409int mode = 8;2410for (int i = 0; i < 8; i++)2411{2412if (pv.Unpack(1) == 1)2413{2414mode = i;2415break;2416}2417}24182419if (mode > 7)2420{2421for (int px = 0; px < 16; px++)2422for (int ch = 0; ch < 4; ch++)2423output.m_pixels[px][ch] = 0;24242425return;2426}24272428const BC7Data::BC7ModeInfo &modeInfo = BC7Data::g_modes[mode];24292430int partition = 0;2431if (modeInfo.m_partitionBits)2432partition = pv.Unpack(modeInfo.m_partitionBits);24332434int rotation = 0;2435if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)2436rotation = pv.Unpack(2);24372438int indexSelector = 0;2439if (modeInfo.m_hasIndexSelector)2440indexSelector = pv.Unpack(1);24412442// Resolve fixups2443int fixups[3] = { 0, 0, 0 };24442445if (modeInfo.m_alphaMode != BC7Data::AlphaMode_Separate)2446{2447if (modeInfo.m_numSubsets == 2)2448fixups[1] = BC7Data::g_fixupIndexes2[partition];2449else if (modeInfo.m_numSubsets == 3)2450{2451fixups[1] = BC7Data::g_fixupIndexes3[partition][0];2452fixups[2] = BC7Data::g_fixupIndexes3[partition][1];2453}2454}24552456int endPoints[3][2][4];24572458// Decode RGB2459for (int ch = 0; ch < 3; ch++)2460{2461for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2462{2463for (int ep = 0; ep < 2; ep++)2464endPoints[subset][ep][ch] = (pv.Unpack(modeInfo.m_rgbBits) << (8 - modeInfo.m_rgbBits));2465}2466}24672468// Decode alpha2469if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)2470{2471for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2472{2473for (int ep = 0; ep < 2; ep++)2474endPoints[subset][ep][3] = (pv.Unpack(modeInfo.m_alphaBits) << (8 - modeInfo.m_alphaBits));2475}2476}2477else2478{2479for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2480{2481for (int ep = 0; ep < 2; ep++)2482endPoints[subset][ep][3] = 255;2483}2484}24852486int parityBits = 0;24872488// Decode parity bits2489if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)2490{2491for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2492{2493int p = pv.Unpack(1);24942495for (int ep = 0; ep < 2; ep++)2496{2497for (int ch = 0; ch < 3; ch++)2498endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);24992500if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)2501endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);2502}2503}25042505parityBits = 1;2506}2507else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)2508{2509for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2510{2511for (int ep = 0; ep < 2; ep++)2512{2513int p = pv.Unpack(1);25142515for (int ch = 0; ch < 3; ch++)2516endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);25172518if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)2519endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);2520}2521}25222523parityBits = 1;2524}25252526// Fill endpoint bits2527for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)2528{2529for (int ep = 0; ep < 2; ep++)2530{2531for (int ch = 0; ch < 3; ch++)2532endPoints[subset][ep][ch] |= (endPoints[subset][ep][ch] >> (modeInfo.m_rgbBits + parityBits));25332534if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)2535endPoints[subset][ep][3] |= (endPoints[subset][ep][3] >> (modeInfo.m_alphaBits + parityBits));2536}2537}25382539int indexes[16];2540int indexes2[16];25412542// Decode indexes2543for (int px = 0; px < 16; px++)2544{2545int bits = modeInfo.m_indexBits;2546if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))2547bits--;25482549indexes[px] = pv.Unpack(bits);2550}25512552// Decode secondary indexes2553if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)2554{2555for (int px = 0; px < 16; px++)2556{2557int bits = modeInfo.m_alphaIndexBits;2558if (px == 0)2559bits--;25602561indexes2[px] = pv.Unpack(bits);2562}2563}2564else2565{2566for (int px = 0; px < 16; px++)2567indexes2[px] = 0;2568}25692570const int *alphaWeights = BC7Data::g_weightTables[modeInfo.m_alphaIndexBits];2571const int *rgbWeights = BC7Data::g_weightTables[modeInfo.m_indexBits];25722573// Decode each pixel2574for (int px = 0; px < 16; px++)2575{2576int rgbWeight = 0;2577int alphaWeight = 0;25782579int rgbIndex = indexes[px];25802581rgbWeight = rgbWeights[indexes[px]];25822583if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined)2584alphaWeight = rgbWeight;2585else if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)2586alphaWeight = alphaWeights[indexes2[px]];25872588if (indexSelector == 1)2589{2590int temp = rgbWeight;2591rgbWeight = alphaWeight;2592alphaWeight = temp;2593}25942595int pixel[4] = { 0, 0, 0, 255 };25962597int subset = 0;25982599if (modeInfo.m_numSubsets == 2)2600subset = (BC7Data::g_partitionMap[partition] >> px) & 1;2601else if (modeInfo.m_numSubsets == 3)2602subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;26032604for (int ch = 0; ch < 3; ch++)2605pixel[ch] = ((64 - rgbWeight) * endPoints[subset][0][ch] + rgbWeight * endPoints[subset][1][ch] + 32) >> 6;26062607if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)2608pixel[3] = ((64 - alphaWeight) * endPoints[subset][0][3] + alphaWeight * endPoints[subset][1][3] + 32) >> 6;26092610if (rotation != 0)2611{2612int ch = rotation - 1;2613int temp = pixel[ch];2614pixel[ch] = pixel[3];2615pixel[3] = temp;2616}26172618for (int ch = 0; ch < 4; ch++)2619output.m_pixels[px][ch] = static_cast<uint8_t>(pixel[ch]);2620}2621}26222623cvtt::ParallelMath::SInt16 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru)2624{2625assert(ParallelMath::AllSet(ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(31744))));2626assert(ParallelMath::AllSet(ParallelMath::Less(ParallelMath::MakeSInt16(-31744), elem2CL)));26272628// Expand to full range2629ParallelMath::Int16CompFlag isNegative = ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(0));2630MUInt15 absElem = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - elem2CL, elem2CL));26312632absElem = ParallelMath::RightShift(ParallelMath::RoundAndConvertToU15(ParallelMath::ToFloat(absElem) * 32.0f / 31.0f, ru), 16 - precision);26332634MSInt16 absElemS16 = ParallelMath::LosslessCast<MSInt16>::Cast(absElem);26352636return ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - absElemS16, absElemS16);2637}26382639cvtt::ParallelMath::UInt15 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru)2640{2641MUInt16 expandedElem = ParallelMath::RoundAndConvertToU16(ParallelMath::Min(ParallelMath::ToFloat(elem) * 64.0f / 31.0f, ParallelMath::MakeFloat(65535.0f)), ru);2642return ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(expandedElem, 16 - precision));2643}26442645void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL)2646{2647MSInt16 zero = ParallelMath::MakeSInt16(0);26482649ParallelMath::Int16CompFlag negative = ParallelMath::Less(comp, zero);2650MUInt15 absComp = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(negative, MSInt16(zero - comp), comp));26512652MSInt16 unq;2653MUInt15 absUnq;26542655if (precision >= 16)2656{2657unq = comp;2658absUnq = absComp;2659}2660else2661{2662MSInt16 maxCompMinusOne = ParallelMath::MakeSInt16(static_cast<int16_t>((1 << (precision - 1)) - 2));2663ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);2664ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);26652666absUnq = (absComp << (16 - precision)) + ParallelMath::MakeUInt15(static_cast<uint16_t>(0x4000 >> (precision - 1)));2667ParallelMath::ConditionalSet(absUnq, isZero, ParallelMath::MakeUInt15(0));2668ParallelMath::ConditionalSet(absUnq, isMax, ParallelMath::MakeUInt15(0x7fff));26692670unq = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(absUnq));2671}26722673outUnquantized = unq;26742675MUInt15 funq = ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(absUnq, ParallelMath::MakeUInt15(31)), 5));26762677outUnquantizedFinished2CL = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(funq));2678}26792680void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished)2681{2682MUInt16 unq = ParallelMath::LosslessCast<MUInt16>::Cast(comp);2683if (precision < 15)2684{2685MUInt15 zero = ParallelMath::MakeUInt15(0);2686MUInt15 maxCompMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << precision) - 2));26872688ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);2689ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);26902691unq = (ParallelMath::LosslessCast<MUInt16>::Cast(comp) << (16 - precision)) + ParallelMath::MakeUInt16(static_cast<uint16_t>(0x8000 >> precision));26922693ParallelMath::ConditionalSet(unq, isZero, ParallelMath::MakeUInt16(0));2694ParallelMath::ConditionalSet(unq, isMax, ParallelMath::MakeUInt16(0xffff));2695}26962697outUnquantized = unq;2698outUnquantizedFinished = ParallelMath::ToUInt16(ParallelMath::RightShift(ParallelMath::XMultiply(unq, ParallelMath::MakeUInt15(31)), 6));2699}27002701void cvtt::Internal::BC6HComputer::QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)2702{2703MSInt16 unquantizedEP[2][3];2704MSInt16 finishedUnquantizedEP[2][3];27052706{2707ParallelMath::RoundUpForScope ru;27082709for (int epi = 0; epi < 2; epi++)2710{2711for (int ch = 0; ch < 3; ch++)2712{2713MSInt16 qee = QuantizeSingleEndpointElementSigned(endPoints[epi][ch], precision, &ru);2714UnquantizeSingleEndpointElementSigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);2715quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);2716}2717}2718}27192720indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);2721indexSelector.InitHDR(indexRange, true, fastIndexing, channelWeights);27222723MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);27242725MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);27262727ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);27282729if (ParallelMath::AnySet(invert))2730{2731ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));27322733indexSelector.ConditionalInvert(invert);27342735for (int ch = 0; ch < 3; ch++)2736{2737MAInt16 firstEP = quantizedEndPoints[0][ch];2738MAInt16 secondEP = quantizedEndPoints[1][ch];27392740quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);2741quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);2742}2743}27442745indexes[fixupIndex] = index;2746}27472748void cvtt::Internal::BC6HComputer::QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)2749{2750MUInt16 unquantizedEP[2][3];2751MUInt16 finishedUnquantizedEP[2][3];27522753{2754ParallelMath::RoundUpForScope ru;27552756for (int epi = 0; epi < 2; epi++)2757{2758for (int ch = 0; ch < 3; ch++)2759{2760MUInt15 qee = QuantizeSingleEndpointElementUnsigned(ParallelMath::LosslessCast<MUInt15>::Cast(endPoints[epi][ch]), precision, &ru);2761UnquantizeSingleEndpointElementUnsigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);2762quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);2763}2764}2765}27662767indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);2768indexSelector.InitHDR(indexRange, false, fastIndexing, channelWeights);27692770MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);27712772MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);27732774ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);27752776if (ParallelMath::AnySet(invert))2777{2778ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));27792780indexSelector.ConditionalInvert(invert);27812782for (int ch = 0; ch < 3; ch++)2783{2784MAInt16 firstEP = quantizedEndPoints[0][ch];2785MAInt16 secondEP = quantizedEndPoints[1][ch];27862787quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);2788quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);2789}2790}27912792indexes[fixupIndex] = index;2793}27942795void cvtt::Internal::BC6HComputer::EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal)2796{2797ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);27982799MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));28002801for (int ch = 0; ch < 3; ch++)2802{2803outEncodedEPs[0][0][ch] = ep0[0][ch];2804outEncodedEPs[0][1][ch] = ep0[1][ch];2805outEncodedEPs[1][0][ch] = ep1[0][ch];2806outEncodedEPs[1][1][ch] = ep1[1][ch];28072808if (isTransformed)2809{2810for (int subset = 0; subset < 2; subset++)2811{2812for (int epi = 0; epi < 2; epi++)2813{2814if (epi == 0 && subset == 0)2815continue;28162817MAInt16 bReduced = (outEncodedEPs[subset][epi][ch] & aSignificantMask);28182819MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch])), bPrec[ch]);28202821outEncodedEPs[subset][epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);28222823MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch]) & aSignificantMask);2824allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);2825}2826}2827}28282829if (!ParallelMath::AnySet(allLegal))2830break;2831}28322833outIsLegal = allLegal;2834}28352836void cvtt::Internal::BC6HComputer::EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal)2837{2838ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);28392840MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));28412842for (int ch = 0; ch < 3; ch++)2843{2844outEncodedEPs[0][ch] = ep[0][ch];2845outEncodedEPs[1][ch] = ep[1][ch];28462847if (isTransformed)2848{2849MAInt16 bReduced = (outEncodedEPs[1][ch] & aSignificantMask);28502851MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[1][ch], outEncodedEPs[0][ch])), bPrec[ch]);28522853outEncodedEPs[1][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);28542855MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[1][ch], outEncodedEPs[0][ch]) & aSignificantMask);2856allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);2857}2858}28592860outIsLegal = allLegal;2861}28622863void cvtt::Internal::BC6HComputer::Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds)2864{2865if (numTweakRounds < 1)2866numTweakRounds = 1;2867else if (numTweakRounds > MaxTweakRounds)2868numTweakRounds = MaxTweakRounds;28692870if (numRefineRounds < 1)2871numRefineRounds = 1;2872else if (numRefineRounds > MaxRefineRounds)2873numRefineRounds = MaxRefineRounds;28742875bool fastIndexing = ((flags & cvtt::Flags::BC6H_FastIndexing) != 0);2876float channelWeightsSq[3];28772878ParallelMath::RoundTowardNearestForScope rtn;28792880MSInt16 pixels[16][3];2881MFloat floatPixels2CL[16][3];2882MFloat floatPixelsLinearWeighted[16][3];28832884MSInt16 low15Bits = ParallelMath::MakeSInt16(32767);28852886for (int ch = 0; ch < 3; ch++)2887channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];28882889for (int px = 0; px < 16; px++)2890{2891for (int ch = 0; ch < 3; ch++)2892{2893MSInt16 pixelValue;2894ParallelMath::ConvertHDRInputs(inputs, px, ch, pixelValue);28952896// Convert from sign+magnitude to 2CL2897if (isSigned)2898{2899ParallelMath::Int16CompFlag negative = ParallelMath::Less(pixelValue, ParallelMath::MakeSInt16(0));2900MSInt16 magnitude = (pixelValue & low15Bits);2901ParallelMath::ConditionalSet(pixelValue, negative, ParallelMath::MakeSInt16(0) - magnitude);2902pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(-31743));2903}2904else2905pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(0));29062907pixelValue = ParallelMath::Min(pixelValue, ParallelMath::MakeSInt16(31743));29082909pixels[px][ch] = pixelValue;2910floatPixels2CL[px][ch] = ParallelMath::ToFloat(pixelValue);2911floatPixelsLinearWeighted[px][ch] = ParallelMath::TwosCLHalfToFloat(pixelValue) * channelWeights[ch];2912}2913}29142915MFloat preWeightedPixels[16][3];29162917BCCommon::PreWeightPixelsHDR<3>(preWeightedPixels, pixels, channelWeights);29182919MAInt16 bestEndPoints[2][2][3];2920MUInt15 bestIndexes[16];2921MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);2922MUInt15 bestMode = ParallelMath::MakeUInt15(0);2923MUInt15 bestPartition = ParallelMath::MakeUInt15(0);29242925for (int px = 0; px < 16; px++)2926bestIndexes[px] = ParallelMath::MakeUInt15(0);29272928for (int subset = 0; subset < 2; subset++)2929for (int epi = 0; epi < 2; epi++)2930for (int ch = 0; ch < 3; ch++)2931bestEndPoints[subset][epi][ch] = ParallelMath::MakeAInt16(0);29322933UnfinishedEndpoints<3> partitionedUFEP[32][2];2934UnfinishedEndpoints<3> singleUFEP;29352936// Generate UFEP for partitions2937for (int p = 0; p < 32; p++)2938{2939int partitionMask = BC7Data::g_partitionMap[p];29402941EndpointSelector<3, 8> epSelectors[2];29422943for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)2944{2945for (int px = 0; px < 16; px++)2946{2947int subset = (partitionMask >> px) & 1;2948epSelectors[subset].ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));2949}29502951for (int subset = 0; subset < 2; subset++)2952epSelectors[subset].FinishPass(pass);2953}29542955for (int subset = 0; subset < 2; subset++)2956partitionedUFEP[p][subset] = epSelectors[subset].GetEndpoints(channelWeights);2957}29582959// Generate UFEP for single2960{2961EndpointSelector<3, 8> epSelector;29622963for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)2964{2965for (int px = 0; px < 16; px++)2966epSelector.ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));29672968epSelector.FinishPass(pass);2969}29702971singleUFEP = epSelector.GetEndpoints(channelWeights);2972}29732974for (int partitionedInt = 0; partitionedInt < 2; partitionedInt++)2975{2976bool partitioned = (partitionedInt == 1);29772978for (int aPrec = BC7Data::g_maxHDRPrecision; aPrec >= 0; aPrec--)2979{2980if (!BC7Data::g_hdrModesExistForPrecision[partitionedInt][aPrec])2981continue;29822983int numPartitions = partitioned ? 32 : 1;2984int numSubsets = partitioned ? 2 : 1;2985int indexBits = partitioned ? 3 : 4;2986int indexRange = (1 << indexBits);29872988for (int p = 0; p < numPartitions; p++)2989{2990int partitionMask = partitioned ? BC7Data::g_partitionMap[p] : 0;29912992const int MaxMetaRounds = MaxTweakRounds * MaxRefineRounds;29932994MAInt16 metaEndPointsQuantized[MaxMetaRounds][2][2][3];2995MUInt15 metaIndexes[MaxMetaRounds][16];2996MFloat metaError[MaxMetaRounds][2];29972998bool roundValid[MaxMetaRounds][2];29993000for (int r = 0; r < MaxMetaRounds; r++)3001for (int subset = 0; subset < 2; subset++)3002roundValid[r][subset] = true;30033004for (int subset = 0; subset < numSubsets; subset++)3005{3006for (int tweak = 0; tweak < MaxTweakRounds; tweak++)3007{3008EndpointRefiner<3> refiners[2];30093010bool abortRemainingRefines = false;3011for (int refinePass = 0; refinePass < MaxRefineRounds; refinePass++)3012{3013int metaRound = tweak * MaxRefineRounds + refinePass;30143015if (tweak >= numTweakRounds || refinePass >= numRefineRounds)3016abortRemainingRefines = true;30173018if (abortRemainingRefines)3019{3020roundValid[metaRound][subset] = false;3021continue;3022}30233024MAInt16(&mrQuantizedEndPoints)[2][2][3] = metaEndPointsQuantized[metaRound];3025MUInt15(&mrIndexes)[16] = metaIndexes[metaRound];30263027MSInt16 endPointsColorSpace[2][3];30283029if (refinePass == 0)3030{3031UnfinishedEndpoints<3> ufep = partitioned ? partitionedUFEP[p][subset] : singleUFEP;30323033if (isSigned)3034ufep.FinishHDRSigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);3035else3036ufep.FinishHDRUnsigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);3037}3038else3039refiners[subset].GetRefinedEndpointsHDR(endPointsColorSpace, isSigned, &rtn);30403041refiners[subset].Init(indexRange, channelWeights);30423043int fixupIndex = (subset == 0) ? 0 : BC7Data::g_fixupIndexes2[p];30443045IndexSelectorHDR<3> indexSelector;3046if (isSigned)3047QuantizeEndpointsSigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);3048else3049QuantizeEndpointsUnsigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);30503051if (metaRound > 0)3052{3053ParallelMath::Int16CompFlag anySame = ParallelMath::MakeBoolInt16(false);30543055for (int prevRound = 0; prevRound < metaRound; prevRound++)3056{3057MAInt16(&prevRoundEPs)[2][3] = metaEndPointsQuantized[prevRound][subset];30583059ParallelMath::Int16CompFlag same = ParallelMath::MakeBoolInt16(true);30603061for (int epi = 0; epi < 2; epi++)3062for (int ch = 0; ch < 3; ch++)3063same = (same & ParallelMath::Equal(prevRoundEPs[epi][ch], mrQuantizedEndPoints[subset][epi][ch]));30643065anySame = (anySame | same);3066if (ParallelMath::AllSet(anySame))3067break;3068}30693070if (ParallelMath::AllSet(anySame))3071{3072roundValid[metaRound][subset] = false;3073continue;3074}3075}30763077MFloat subsetError = ParallelMath::MakeFloatZero();30783079{3080for (int px = 0; px < 16; px++)3081{3082if (subset != ((partitionMask >> px) & 1))3083continue;30843085MUInt15 index;3086if (px == fixupIndex)3087index = mrIndexes[px];3088else3089{3090index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixels2CL[px], &rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[px], &rtn);3091mrIndexes[px] = index;3092}30933094MSInt16 reconstructed[3];3095if (isSigned)3096indexSelector.ReconstructHDRSigned(mrIndexes[px], reconstructed);3097else3098indexSelector.ReconstructHDRUnsigned(mrIndexes[px], reconstructed);30993100subsetError = subsetError + (fastIndexing ? BCCommon::ComputeErrorHDRFast<3>(flags, reconstructed, pixels[px], channelWeightsSq) : BCCommon::ComputeErrorHDRSlow<3>(flags, reconstructed, pixels[px], channelWeightsSq));31013102if (refinePass != numRefineRounds - 1)3103refiners[subset].ContributeUnweightedPW(preWeightedPixels[px], index);3104}3105}31063107metaError[metaRound][subset] = subsetError;3108}3109}3110}31113112// Now we have a bunch of attempts, but not all of them will fit in the delta coding scheme3113int numMeta1 = partitioned ? MaxMetaRounds : 1;3114for (int meta0 = 0; meta0 < MaxMetaRounds; meta0++)3115{3116if (!roundValid[meta0][0])3117continue;31183119for (int meta1 = 0; meta1 < numMeta1; meta1++)3120{3121MFloat combinedError = metaError[meta0][0];3122if (partitioned)3123{3124if (!roundValid[meta1][1])3125continue;31263127combinedError = combinedError + metaError[meta1][1];3128}31293130ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, bestError);3131if (!ParallelMath::AnySet(errorBetter))3132continue;31333134ParallelMath::Int16CompFlag needsCommit = ParallelMath::FloatFlagToInt16(errorBetter);31353136// Figure out if this is encodable3137for (int mode = 0; mode < BC7Data::g_numHDRModes; mode++)3138{3139const BC7Data::BC6HModeInfo &modeInfo = BC7Data::g_hdrModes[mode];31403141if (modeInfo.m_partitioned != partitioned || modeInfo.m_aPrec != aPrec)3142continue;31433144MAInt16 encodedEPs[2][2][3];3145ParallelMath::Int16CompFlag isLegal;3146if (partitioned)3147EvaluatePartitionedLegality(metaEndPointsQuantized[meta0][0], metaEndPointsQuantized[meta1][1], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs, isLegal);3148else3149EvaluateSingleLegality(metaEndPointsQuantized[meta0][0], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs[0], isLegal);31503151ParallelMath::Int16CompFlag isLegalAndBetter = (ParallelMath::FloatFlagToInt16(errorBetter) & isLegal);3152if (!ParallelMath::AnySet(isLegalAndBetter))3153continue;31543155ParallelMath::FloatCompFlag isLegalAndBetterFloat = ParallelMath::Int16FlagToFloat(isLegalAndBetter);31563157ParallelMath::ConditionalSet(bestError, isLegalAndBetterFloat, combinedError);3158ParallelMath::ConditionalSet(bestMode, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(mode)));3159ParallelMath::ConditionalSet(bestPartition, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(p)));31603161for (int subset = 0; subset < numSubsets; subset++)3162{3163for (int epi = 0; epi < 2; epi++)3164{3165for (int ch = 0; ch < 3; ch++)3166ParallelMath::ConditionalSet(bestEndPoints[subset][epi][ch], isLegalAndBetter, encodedEPs[subset][epi][ch]);3167}3168}31693170for (int px = 0; px < 16; px++)3171{3172int subset = ((partitionMask >> px) & 1);3173if (subset == 0)3174ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta0][px]);3175else3176ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta1][px]);3177}31783179needsCommit = ParallelMath::AndNot(needsCommit, isLegalAndBetter);3180if (!ParallelMath::AnySet(needsCommit))3181break;3182}3183}3184}3185}3186}3187}31883189// At this point, everything should be set3190for (int block = 0; block < ParallelMath::ParallelSize; block++)3191{3192ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(bestMode, block);3193ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(bestPartition, block);3194int32_t eps[2][2][3];3195ParallelMath::ScalarUInt16 indexes[16];31963197const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];31983199const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode];32003201const size_t headerBits = modeInfo.m_partitioned ? 82 : 65;32023203for (int subset = 0; subset < 2; subset++)3204{3205for (int epi = 0; epi < 2; epi++)3206{3207for (int ch = 0; ch < 3; ch++)3208eps[subset][epi][ch] = ParallelMath::Extract(bestEndPoints[subset][epi][ch], block);3209}3210}32113212for (int px = 0; px < 16; px++)3213indexes[px] = ParallelMath::Extract(bestIndexes[px], block);32143215uint16_t modeID = modeInfo.m_modeID;32163217PackingVector pv;3218pv.Init();32193220for (size_t i = 0; i < headerBits; i++) {3221int32_t codedValue = 0;3222switch (desc[i].m_eField) {3223case BC6HData::M:3224codedValue = modeID;3225break;3226case BC6HData::D:3227codedValue = partition;3228break;3229case BC6HData::RW:3230codedValue = eps[0][0][0];3231break;3232case BC6HData::RX:3233codedValue = eps[0][1][0];3234break;3235case BC6HData::RY:3236codedValue = eps[1][0][0];3237break;3238case BC6HData::RZ:3239codedValue = eps[1][1][0];3240break;3241case BC6HData::GW:3242codedValue = eps[0][0][1];3243break;3244case BC6HData::GX:3245codedValue = eps[0][1][1];3246break;3247case BC6HData::GY:3248codedValue = eps[1][0][1];3249break;3250case BC6HData::GZ:3251codedValue = eps[1][1][1];3252break;3253case BC6HData::BW:3254codedValue = eps[0][0][2];3255break;3256case BC6HData::BX:3257codedValue = eps[0][1][2];3258break;3259case BC6HData::BY:3260codedValue = eps[1][0][2];3261break;3262case BC6HData::BZ:3263codedValue = eps[1][1][2];3264break;3265default:3266assert(false);3267break;3268}3269pv.Pack(static_cast<uint16_t>((codedValue >> desc[i].m_uBit) & 1), 1);3270}32713272int fixupIndex1 = 0;3273int indexBits = 4;3274if (modeInfo.m_partitioned)3275{3276fixupIndex1 = BC7Data::g_fixupIndexes2[partition];3277indexBits = 3;3278}32793280for (int px = 0; px < 16; px++)3281{3282ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[px], block);3283if (px == 0 || px == fixupIndex1)3284pv.Pack(index, indexBits - 1);3285else3286pv.Pack(index, indexBits);3287}32883289pv.Flush(packedBlocks + 16 * block);3290}3291}32923293void cvtt::Internal::BC6HComputer::SignExtendSingle(int &v, int bits)3294{3295if (v & (1 << (bits - 1)))3296v |= -(1 << bits);3297}32983299void cvtt::Internal::BC6HComputer::UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned)3300{3301UnpackingVector pv;3302pv.Init(pBC);33033304int numModeBits = 2;3305int modeBits = pv.Unpack(2);3306if (modeBits != 0 && modeBits != 1)3307{3308modeBits |= pv.Unpack(3) << 2;3309numModeBits += 3;3310}33113312int mode = -1;3313for (int possibleMode = 0; possibleMode < BC7Data::g_numHDRModes; possibleMode++)3314{3315if (BC7Data::g_hdrModes[possibleMode].m_modeID == modeBits)3316{3317mode = possibleMode;3318break;3319}3320}33213322if (mode < 0)3323{3324for (int px = 0; px < 16; px++)3325{3326for (int ch = 0; ch < 3; ch++)3327output.m_pixels[px][ch] = 0;3328output.m_pixels[px][3] = 0x3c00; // 1.03329}3330return;3331}33323333const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];3334const size_t headerBits = modeInfo.m_partitioned ? 82 : 65;3335const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode];33363337int32_t partition = 0;3338int32_t eps[2][2][3];33393340for (int subset = 0; subset < 2; subset++)3341for (int epi = 0; epi < 2; epi++)3342for (int ch = 0; ch < 3; ch++)3343eps[subset][epi][ch] = 0;33443345for (size_t i = numModeBits; i < headerBits; i++) {3346int32_t *pCodedValue = NULL;33473348switch (desc[i].m_eField) {3349case BC6HData::D:3350pCodedValue = &partition;3351break;3352case BC6HData::RW:3353pCodedValue = &eps[0][0][0];3354break;3355case BC6HData::RX:3356pCodedValue = &eps[0][1][0];3357break;3358case BC6HData::RY:3359pCodedValue = &eps[1][0][0];3360break;3361case BC6HData::RZ:3362pCodedValue = &eps[1][1][0];3363break;3364case BC6HData::GW:3365pCodedValue = &eps[0][0][1];3366break;3367case BC6HData::GX:3368pCodedValue = &eps[0][1][1];3369break;3370case BC6HData::GY:3371pCodedValue = &eps[1][0][1];3372break;3373case BC6HData::GZ:3374pCodedValue = &eps[1][1][1];3375break;3376case BC6HData::BW:3377pCodedValue = &eps[0][0][2];3378break;3379case BC6HData::BX:3380pCodedValue = &eps[0][1][2];3381break;3382case BC6HData::BY:3383pCodedValue = &eps[1][0][2];3384break;3385case BC6HData::BZ:3386pCodedValue = &eps[1][1][2];3387break;3388default:3389assert(false);3390break;3391}33923393(*pCodedValue) |= pv.Unpack(1) << desc[i].m_uBit;3394}33953396uint16_t modeID = modeInfo.m_modeID;33973398int fixupIndex1 = 0;3399int indexBits = 4;3400int numSubsets = 1;3401if (modeInfo.m_partitioned)3402{3403fixupIndex1 = BC7Data::g_fixupIndexes2[partition];3404indexBits = 3;3405numSubsets = 2;3406}34073408int indexes[16];3409for (int px = 0; px < 16; px++)3410{3411if (px == 0 || px == fixupIndex1)3412indexes[px] = pv.Unpack(indexBits - 1);3413else3414indexes[px] = pv.Unpack(indexBits);3415}34163417if (modeInfo.m_partitioned)3418{3419for (int ch = 0; ch < 3; ch++)3420{3421if (isSigned)3422SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);3423if (modeInfo.m_transformed || isSigned)3424{3425SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);3426SignExtendSingle(eps[1][0][ch], modeInfo.m_bPrec[ch]);3427SignExtendSingle(eps[1][1][ch], modeInfo.m_bPrec[ch]);3428}3429}3430}3431else3432{3433for (int ch = 0; ch < 3; ch++)3434{3435if (isSigned)3436SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);3437if (modeInfo.m_transformed || isSigned)3438SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);3439}3440}34413442int aPrec = modeInfo.m_aPrec;34433444if (modeInfo.m_transformed)3445{3446for (int ch = 0; ch < 3; ch++)3447{3448int wrapMask = (1 << aPrec) - 1;34493450eps[0][1][ch] = ((eps[0][0][ch] + eps[0][1][ch]) & wrapMask);3451if (isSigned)3452SignExtendSingle(eps[0][1][ch], aPrec);34533454if (modeInfo.m_partitioned)3455{3456eps[1][0][ch] = ((eps[0][0][ch] + eps[1][0][ch]) & wrapMask);3457eps[1][1][ch] = ((eps[0][0][ch] + eps[1][1][ch]) & wrapMask);34583459if (isSigned)3460{3461SignExtendSingle(eps[1][0][ch], aPrec);3462SignExtendSingle(eps[1][1][ch], aPrec);3463}3464}3465}3466}34673468// Unquantize endpoints3469for (int subset = 0; subset < numSubsets; subset++)3470{3471for (int epi = 0; epi < 2; epi++)3472{3473for (int ch = 0; ch < 3; ch++)3474{3475int &v = eps[subset][epi][ch];34763477if (isSigned)3478{3479if (aPrec >= 16)3480{3481// Nothing3482}3483else3484{3485bool s = false;3486int comp = v;3487if (v < 0)3488{3489s = true;3490comp = -comp;3491}34923493int unq = 0;3494if (comp == 0)3495unq = 0;3496else if (comp >= ((1 << (aPrec - 1)) - 1))3497unq = 0x7fff;3498else3499unq = ((comp << 15) + 0x4000) >> (aPrec - 1);35003501if (s)3502unq = -unq;35033504v = unq;3505}3506}3507else3508{3509if (aPrec >= 15)3510{3511// Nothing3512}3513else if (v == 0)3514{3515// Nothing3516}3517else if (v == ((1 << aPrec) - 1))3518v = 0xffff;3519else3520v = ((v << 16) + 0x8000) >> aPrec;3521}3522}3523}3524}35253526const int *weights = BC7Data::g_weightTables[indexBits];35273528for (int px = 0; px < 16; px++)3529{3530int subset = 0;3531if (modeInfo.m_partitioned)3532subset = (BC7Data::g_partitionMap[partition] >> px) & 1;35333534int w = weights[indexes[px]];3535for (int ch = 0; ch < 3; ch++)3536{3537int comp = ((64 - w) * eps[subset][0][ch] + w * eps[subset][1][ch] + 32) >> 6;35383539if (isSigned)3540{3541if (comp < 0)3542comp = -(((-comp) * 31) >> 5);3543else3544comp = (comp * 31) >> 5;35453546int s = 0;3547if (comp < 0)3548{3549s = 0x8000;3550comp = -comp;3551}35523553output.m_pixels[px][ch] = static_cast<uint16_t>(s | comp);3554}3555else3556{3557comp = (comp * 31) >> 6;3558output.m_pixels[px][ch] = static_cast<uint16_t>(comp);3559}3560}3561output.m_pixels[px][3] = 0x3c00; // 1.03562}3563}35643565void cvtt::Kernels::ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality)3566{3567static const int kMaxQuality = 100;35683569if (quality < 1)3570quality = 1;3571else if (quality > kMaxQuality)3572quality = kMaxQuality;35733574const int numRGBModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGB * quality / kMaxQuality;3575const int numRGBAModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGBA * quality / kMaxQuality;35763577const uint16_t *prioLists[] = { cvtt::Tables::BC7Prio::g_bc7PrioCodesRGB, cvtt::Tables::BC7Prio::g_bc7PrioCodesRGBA };3578const int prioListSizes[] = { numRGBModes, numRGBAModes };35793580BC7FineTuningParams ftParams;3581memset(&ftParams, 0, sizeof(ftParams));35823583for (int listIndex = 0; listIndex < 2; listIndex++)3584{3585int prioListSize = prioListSizes[listIndex];3586const uint16_t *prioList = prioLists[listIndex];35873588for (int prioIndex = 0; prioIndex < prioListSize; prioIndex++)3589{3590const uint16_t packedMode = prioList[prioIndex];35913592uint8_t seedPoints = static_cast<uint8_t>(cvtt::Tables::BC7Prio::UnpackSeedPointCount(packedMode));3593int mode = cvtt::Tables::BC7Prio::UnpackMode(packedMode);35943595switch (mode)3596{3597case 0:3598ftParams.mode0SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;3599break;3600case 1:3601ftParams.mode1SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;3602break;3603case 2:3604ftParams.mode2SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;3605break;3606case 3:3607ftParams.mode3SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;3608break;3609case 4:3610ftParams.mode4SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)][cvtt::Tables::BC7Prio::UnpackIndexSelector(packedMode)] = seedPoints;3611break;3612case 5:3613ftParams.mode5SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)] = seedPoints;3614break;3615case 6:3616ftParams.mode6SP = seedPoints;3617break;3618case 7:3619ftParams.mode7SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;3620break;3621}3622}3623}36243625ConfigureBC7EncodingPlanFromFineTuningParams(encodingPlan, ftParams);3626}36273628// Generates a BC7 encoding plan from fine-tuning parameters.3629bool cvtt::Kernels::ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams ¶ms)3630{3631memset(&encodingPlan, 0, sizeof(encodingPlan));36323633// Mode 03634for (int partition = 0; partition < 16; partition++)3635{3636uint8_t sp = params.mode0SP[partition];3637if (sp == 0)3638continue;36393640encodingPlan.mode0PartitionEnabled |= static_cast<uint16_t>(1) << partition;36413642for (int subset = 0; subset < 3; subset++)3643{3644int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset];3645encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);3646}3647}36483649// Mode 13650for (int partition = 0; partition < 64; partition++)3651{3652uint8_t sp = params.mode1SP[partition];3653if (sp == 0)3654continue;36553656encodingPlan.mode1PartitionEnabled |= static_cast<uint64_t>(1) << partition;36573658for (int subset = 0; subset < 2; subset++)3659{3660int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];3661encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);3662}3663}36643665// Mode 23666for (int partition = 0; partition < 64; partition++)3667{3668uint8_t sp = params.mode2SP[partition];3669if (sp == 0)3670continue;36713672encodingPlan.mode2PartitionEnabled |= static_cast<uint64_t>(1) << partition;36733674for (int subset = 0; subset < 3; subset++)3675{3676int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset];3677encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);3678}3679}36803681// Mode 33682for (int partition = 0; partition < 64; partition++)3683{3684uint8_t sp = params.mode3SP[partition];3685if (sp == 0)3686continue;36873688encodingPlan.mode3PartitionEnabled |= static_cast<uint64_t>(1) << partition;36893690for (int subset = 0; subset < 2; subset++)3691{3692int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];3693encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);3694}3695}36963697// Mode 43698for (int rotation = 0; rotation < 4; rotation++)3699{3700for (int indexMode = 0; indexMode < 2; indexMode++)3701encodingPlan.mode4SP[rotation][indexMode] = params.mode4SP[rotation][indexMode];3702}37033704// Mode 53705for (int rotation = 0; rotation < 4; rotation++)3706encodingPlan.mode5SP[rotation] = params.mode5SP[rotation];37073708// Mode 63709{3710uint8_t sp = params.mode6SP;3711if (sp != 0)3712{3713encodingPlan.mode6Enabled = true;37143715int shape = cvtt::Internal::BC7Data::g_shapes1[0][0];3716encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp);3717}3718}37193720// Mode 73721for (int partition = 0; partition < 64; partition++)3722{3723uint8_t sp = params.mode7SP[partition];3724if (sp == 0)3725continue;37263727encodingPlan.mode7RGBAPartitionEnabled |= static_cast<uint64_t>(1) << partition;37283729for (int subset = 0; subset < 2; subset++)3730{3731int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];3732encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp);3733}3734}37353736for (int i = 0; i < BC7EncodingPlan::kNumRGBShapes; i++)3737{3738if (encodingPlan.seedPointsForShapeRGB[i] > 0)3739{3740encodingPlan.rgbShapeList[encodingPlan.rgbNumShapesToEvaluate] = i;3741encodingPlan.rgbNumShapesToEvaluate++;3742}3743}37443745for (int i = 0; i < BC7EncodingPlan::kNumRGBAShapes; i++)3746{3747if (encodingPlan.seedPointsForShapeRGBA[i] > 0)3748{3749encodingPlan.rgbaShapeList[encodingPlan.rgbaNumShapesToEvaluate] = i;3750encodingPlan.rgbaNumShapesToEvaluate++;3751}3752}37533754encodingPlan.mode7RGBPartitionEnabled = (encodingPlan.mode7RGBAPartitionEnabled & ~encodingPlan.mode3PartitionEnabled);37553756return true;3757}37583759#endif376037613762