Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/cvtt/ConvectionKernels_BC67.cpp
9903 views
1
/*
2
Convection Texture Tools
3
Copyright (c) 2018-2019 Eric Lasota
4
5
Permission is hereby granted, free of charge, to any person obtaining
6
a copy of this software and associated documentation files (the
7
"Software"), to deal in the Software without restriction, including
8
without limitation the rights to use, copy, modify, merge, publish,
9
distribute, sublicense, and/or sell copies of the Software, and to
10
permit persons to whom the Software is furnished to do so, subject
11
to the following conditions:
12
13
The above copyright notice and this permission notice shall be included
14
in all copies or substantial portions of the Software.
15
16
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24
-------------------------------------------------------------------------------------
25
26
Portions based on DirectX Texture Library (DirectXTex)
27
28
Copyright (c) Microsoft Corporation. All rights reserved.
29
Licensed under the MIT License.
30
31
http://go.microsoft.com/fwlink/?LinkId=248926
32
*/
33
#include "ConvectionKernels_Config.h"
34
35
#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
36
37
#include "ConvectionKernels_BC67.h"
38
39
#include "ConvectionKernels_AggregatedError.h"
40
#include "ConvectionKernels_BCCommon.h"
41
#include "ConvectionKernels_BC7_Prio.h"
42
#include "ConvectionKernels_BC7_SingleColor.h"
43
#include "ConvectionKernels_BC6H_IO.h"
44
#include "ConvectionKernels_EndpointRefiner.h"
45
#include "ConvectionKernels_EndpointSelector.h"
46
#include "ConvectionKernels_IndexSelectorHDR.h"
47
#include "ConvectionKernels_ParallelMath.h"
48
#include "ConvectionKernels_UnfinishedEndpoints.h"
49
50
namespace cvtt
51
{
52
namespace Internal
53
{
54
namespace BC67
55
{
56
typedef ParallelMath::Float MFloat;
57
typedef ParallelMath::UInt15 MUInt15;
58
59
struct WorkInfo
60
{
61
MUInt15 m_mode;
62
MFloat m_error;
63
MUInt15 m_ep[3][2][4];
64
MUInt15 m_indexes[16];
65
MUInt15 m_indexes2[16];
66
67
union
68
{
69
MUInt15 m_partition;
70
struct IndexSelectorAndRotation
71
{
72
MUInt15 m_indexSelector;
73
MUInt15 m_rotation;
74
} m_isr;
75
} m_u;
76
};
77
}
78
79
namespace BC6HData
80
{
81
enum EField
82
{
83
NA, // N/A
84
M, // Mode
85
D, // Shape
86
RW,
87
RX,
88
RY,
89
RZ,
90
GW,
91
GX,
92
GY,
93
GZ,
94
BW,
95
BX,
96
BY,
97
BZ,
98
};
99
100
struct ModeDescriptor
101
{
102
EField m_eField;
103
uint8_t m_uBit;
104
};
105
106
const ModeDescriptor g_modeDescriptors[14][82] =
107
{
108
{ // Mode 1 (0x00) - 10 5 5 5
109
{ M, 0 },{ M, 1 },{ GY, 4 },{ BY, 4 },{ BZ, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
110
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
111
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
112
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
113
{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
114
{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
115
{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
116
{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
117
{ D, 3 },{ D, 4 },
118
},
119
120
{ // Mode 2 (0x01) - 7 6 6 6
121
{ M, 0 },{ M, 1 },{ GY, 5 },{ GZ, 4 },{ GZ, 5 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
122
{ RW, 5 },{ RW, 6 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
123
{ GW, 5 },{ GW, 6 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
124
{ BW, 5 },{ BW, 6 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
125
{ RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
126
{ GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
127
{ BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
128
{ RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },
129
{ D, 3 },{ D, 4 },
130
},
131
132
{ // Mode 3 (0x02) - 11 5 4 4
133
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
134
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
135
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
136
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
137
{ RW,10 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 },
138
{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 },
139
{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
140
{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
141
{ D, 3 },{ D, 4 },
142
},
143
144
{ // Mode 4 (0x06) - 11 4 5 4
145
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
146
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
147
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
148
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 },
149
{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
150
{ GW,10 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 },
151
{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 0 },
152
{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ GY, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
153
{ D, 3 },{ D, 4 },
154
},
155
156
{ // Mode 5 (0x0a) - 11 4 4 5
157
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
158
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
159
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
160
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 },
161
{ BY, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 },
162
{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
163
{ BW,10 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 1 },
164
{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ BZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
165
{ D, 3 },{ D, 4 },
166
},
167
168
{ // Mode 6 (0x0e) - 9 5 5 5
169
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
170
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
171
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
172
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
173
{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
174
{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
175
{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
176
{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
177
{ D, 3 },{ D, 4 },
178
},
179
180
{ // Mode 7 (0x12) - 8 6 5 5
181
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
182
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ GZ, 4 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
183
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
184
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 3 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
185
{ RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
186
{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
187
{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
188
{ RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },
189
{ D, 3 },{ D, 4 },
190
},
191
192
{ // Mode 8 (0x16) - 8 5 6 5
193
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
194
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 0 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
195
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
196
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ GZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
197
{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
198
{ GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
199
{ BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
200
{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
201
{ D, 3 },{ D, 4 },
202
},
203
204
{ // Mode 9 (0x1a) - 8 5 5 6
205
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
206
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
207
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ BY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
208
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
209
{ GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
210
{ BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
211
{ BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
212
{ BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 },
213
{ D, 3 },{ D, 4 },
214
},
215
216
{ // Mode 10 (0x1e) - 6 6 6 6
217
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
218
{ RW, 5 },{ GZ, 4 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
219
{ GW, 5 },{ GY, 5 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
220
{ BW, 5 },{ GZ, 5 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
221
{ RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
222
{ GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
223
{ BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 },
224
{ RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 },
225
{ D, 3 },{ D, 4 },
226
},
227
228
{ // Mode 11 (0x03) - 10 10
229
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
230
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
231
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
232
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
233
{ RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RX, 9 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
234
{ GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GX, 9 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
235
{ BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BX, 9 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
236
{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
237
{ NA, 0 },{ NA, 0 },
238
},
239
240
{ // Mode 12 (0x07) - 11 9
241
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
242
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
243
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
244
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
245
{ RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
246
{ GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
247
{ BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
248
{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
249
{ NA, 0 },{ NA, 0 },
250
},
251
252
{ // Mode 13 (0x0b) - 12 8
253
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
254
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
255
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
256
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 },
257
{ RX, 5 },{ RX, 6 },{ RX, 7 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 },
258
{ GX, 5 },{ GX, 6 },{ GX, 7 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 },
259
{ BX, 5 },{ BX, 6 },{ BX, 7 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
260
{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
261
{ NA, 0 },{ NA, 0 },
262
},
263
264
{ // Mode 14 (0x0f) - 16 4
265
{ M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 },
266
{ RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 },
267
{ GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 },
268
{ BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,15 },
269
{ RW,14 },{ RW,13 },{ RW,12 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,15 },
270
{ GW,14 },{ GW,13 },{ GW,12 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,15 },
271
{ BW,14 },{ BW,13 },{ BW,12 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
272
{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },
273
{ NA, 0 },{ NA, 0 },
274
},
275
};
276
}
277
278
namespace BC7Data
279
{
280
enum AlphaMode
281
{
282
AlphaMode_Combined,
283
AlphaMode_Separate,
284
AlphaMode_None,
285
};
286
287
enum PBitMode
288
{
289
PBitMode_PerEndpoint,
290
PBitMode_PerSubset,
291
PBitMode_None
292
};
293
294
struct BC7ModeInfo
295
{
296
PBitMode m_pBitMode;
297
AlphaMode m_alphaMode;
298
int m_rgbBits;
299
int m_alphaBits;
300
int m_partitionBits;
301
int m_numSubsets;
302
int m_indexBits;
303
int m_alphaIndexBits;
304
bool m_hasIndexSelector;
305
};
306
307
BC7ModeInfo g_modes[] =
308
{
309
{ PBitMode_PerEndpoint, AlphaMode_None, 4, 0, 4, 3, 3, 0, false }, // 0
310
{ PBitMode_PerSubset, AlphaMode_None, 6, 0, 6, 2, 3, 0, false }, // 1
311
{ PBitMode_None, AlphaMode_None, 5, 0, 6, 3, 2, 0, false }, // 2
312
{ PBitMode_PerEndpoint, AlphaMode_None, 7, 0, 6, 2, 2, 0, false }, // 3 (Mode reference has an error, P-bit is really per-endpoint)
313
314
{ PBitMode_None, AlphaMode_Separate, 5, 6, 0, 1, 2, 3, true }, // 4
315
{ PBitMode_None, AlphaMode_Separate, 7, 8, 0, 1, 2, 2, false }, // 5
316
{ PBitMode_PerEndpoint, AlphaMode_Combined, 7, 7, 0, 1, 4, 0, false }, // 6
317
{ PBitMode_PerEndpoint, AlphaMode_Combined, 5, 5, 6, 2, 2, 0, false } // 7
318
};
319
320
const int g_weight2[] = { 0, 21, 43, 64 };
321
const int g_weight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
322
const int g_weight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
323
324
const int *g_weightTables[] =
325
{
326
NULL,
327
NULL,
328
g_weight2,
329
g_weight3,
330
g_weight4
331
};
332
333
struct BC6HModeInfo
334
{
335
uint16_t m_modeID;
336
bool m_partitioned;
337
bool m_transformed;
338
int m_aPrec;
339
int m_bPrec[3];
340
};
341
342
// [partitioned][precision]
343
bool g_hdrModesExistForPrecision[2][17] =
344
{
345
//0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
346
{ false, false, false, false, false, false, false, false, false, false, true, true, true, false, false, false, true },
347
{ false, false, false, false, false, false, true, true, true, true, true, true, false, false, false, false, false },
348
};
349
350
BC6HModeInfo g_hdrModes[] =
351
{
352
{ 0x00, true, true, 10,{ 5, 5, 5 } },
353
{ 0x01, true, true, 7,{ 6, 6, 6 } },
354
{ 0x02, true, true, 11,{ 5, 4, 4 } },
355
{ 0x06, true, true, 11,{ 4, 5, 4 } },
356
{ 0x0a, true, true, 11,{ 4, 4, 5 } },
357
{ 0x0e, true, true, 9,{ 5, 5, 5 } },
358
{ 0x12, true, true, 8,{ 6, 5, 5 } },
359
{ 0x16, true, true, 8,{ 5, 6, 5 } },
360
{ 0x1a, true, true, 8,{ 5, 5, 6 } },
361
{ 0x1e, true, false, 6,{ 6, 6, 6 } },
362
{ 0x03, false, false, 10,{ 10, 10, 10 } },
363
{ 0x07, false, true, 11,{ 9, 9, 9 } },
364
{ 0x0b, false, true, 12,{ 8, 8, 8 } },
365
{ 0x0f, false, true, 16,{ 4, 4, 4 } },
366
};
367
368
const int g_maxHDRPrecision = 16;
369
370
static const size_t g_numHDRModes = sizeof(g_hdrModes) / sizeof(g_hdrModes[0]);
371
372
static uint16_t g_partitionMap[64] =
373
{
374
0xCCCC, 0x8888, 0xEEEE, 0xECC8,
375
0xC880, 0xFEEC, 0xFEC8, 0xEC80,
376
0xC800, 0xFFEC, 0xFE80, 0xE800,
377
0xFFE8, 0xFF00, 0xFFF0, 0xF000,
378
0xF710, 0x008E, 0x7100, 0x08CE,
379
0x008C, 0x7310, 0x3100, 0x8CCE,
380
0x088C, 0x3110, 0x6666, 0x366C,
381
0x17E8, 0x0FF0, 0x718E, 0x399C,
382
0xaaaa, 0xf0f0, 0x5a5a, 0x33cc,
383
0x3c3c, 0x55aa, 0x9696, 0xa55a,
384
0x73ce, 0x13c8, 0x324c, 0x3bdc,
385
0x6996, 0xc33c, 0x9966, 0x660,
386
0x272, 0x4e4, 0x4e40, 0x2720,
387
0xc936, 0x936c, 0x39c6, 0x639c,
388
0x9336, 0x9cc6, 0x817e, 0xe718,
389
0xccf0, 0xfcc, 0x7744, 0xee22,
390
};
391
392
static uint32_t g_partitionMap2[64] =
393
{
394
0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8,
395
0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050,
396
0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090,
397
0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250,
398
0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0,
399
0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500,
400
0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400,
401
0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200,
402
0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424,
403
0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50,
404
0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0,
405
0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600,
406
0xaa444444, 0x54a854a8, 0x95809580, 0x96969600,
407
0xa85454a8, 0x80959580, 0xaa141414, 0x96960000,
408
0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000,
409
0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254,
410
};
411
412
static int g_fixupIndexes2[64] =
413
{
414
15,15,15,15,
415
15,15,15,15,
416
15,15,15,15,
417
15,15,15,15,
418
15, 2, 8, 2,
419
2, 8, 8,15,
420
2, 8, 2, 2,
421
8, 8, 2, 2,
422
423
15,15, 6, 8,
424
2, 8,15,15,
425
2, 8, 2, 2,
426
2,15,15, 6,
427
6, 2, 6, 8,
428
15,15, 2, 2,
429
15,15,15,15,
430
15, 2, 2,15,
431
};
432
433
static int g_fixupIndexes3[64][2] =
434
{
435
{ 3,15 },{ 3, 8 },{ 15, 8 },{ 15, 3 },
436
{ 8,15 },{ 3,15 },{ 15, 3 },{ 15, 8 },
437
{ 8,15 },{ 8,15 },{ 6,15 },{ 6,15 },
438
{ 6,15 },{ 5,15 },{ 3,15 },{ 3, 8 },
439
{ 3,15 },{ 3, 8 },{ 8,15 },{ 15, 3 },
440
{ 3,15 },{ 3, 8 },{ 6,15 },{ 10, 8 },
441
{ 5, 3 },{ 8,15 },{ 8, 6 },{ 6,10 },
442
{ 8,15 },{ 5,15 },{ 15,10 },{ 15, 8 },
443
444
{ 8,15 },{ 15, 3 },{ 3,15 },{ 5,10 },
445
{ 6,10 },{ 10, 8 },{ 8, 9 },{ 15,10 },
446
{ 15, 6 },{ 3,15 },{ 15, 8 },{ 5,15 },
447
{ 15, 3 },{ 15, 6 },{ 15, 6 },{ 15, 8 },
448
{ 3,15 },{ 15, 3 },{ 5,15 },{ 5,15 },
449
{ 5,15 },{ 8,15 },{ 5,15 },{ 10,15 },
450
{ 5,15 },{ 10,15 },{ 8,15 },{ 13,15 },
451
{ 15, 3 },{ 12,15 },{ 3,15 },{ 3, 8 },
452
};
453
454
static const unsigned char g_fragments[] =
455
{
456
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0, 16
457
0, 1, 2, 3, // 16, 4
458
0, 1, 4, // 20, 3
459
0, 1, 2, 4, // 23, 4
460
2, 3, 7, // 27, 3
461
1, 2, 3, 7, // 30, 4
462
0, 1, 2, 3, 4, 5, 6, 7, // 34, 8
463
0, 1, 4, 8, // 42, 4
464
0, 1, 2, 4, 5, 8, // 46, 6
465
0, 1, 2, 3, 4, 5, 6, 8, // 52, 8
466
1, 4, 5, 6, 9, // 60, 5
467
2, 5, 6, 7, 10, // 65, 5
468
5, 6, 9, 10, // 70, 4
469
2, 3, 7, 11, // 74, 4
470
1, 2, 3, 6, 7, 11, // 78, 6
471
0, 1, 2, 3, 5, 6, 7, 11, // 84, 8
472
0, 1, 2, 3, 8, 9, 10, 11, // 92, 8
473
2, 3, 6, 7, 8, 9, 10, 11, // 100, 8
474
4, 5, 6, 7, 8, 9, 10, 11, // 108, 8
475
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, // 116, 12
476
0, 4, 8, 12, // 128, 4
477
0, 2, 3, 4, 6, 7, 8, 12, // 132, 8
478
0, 1, 2, 4, 5, 8, 9, 12, // 140, 8
479
0, 1, 2, 3, 4, 5, 6, 8, 9, 12, // 148, 10
480
3, 6, 7, 8, 9, 12, // 158, 6
481
3, 5, 6, 7, 8, 9, 10, 12, // 164, 8
482
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, // 172, 12
483
0, 1, 2, 5, 6, 7, 11, 12, // 184, 8
484
5, 8, 9, 10, 13, // 192, 5
485
8, 12, 13, // 197, 3
486
4, 8, 12, 13, // 200, 4
487
2, 3, 6, 9, 12, 13, // 204, 6
488
0, 1, 2, 3, 8, 9, 12, 13, // 210, 8
489
0, 1, 4, 5, 8, 9, 12, 13, // 218, 8
490
2, 3, 6, 7, 8, 9, 12, 13, // 226, 8
491
2, 3, 5, 6, 9, 10, 12, 13, // 234, 8
492
0, 3, 6, 7, 9, 10, 12, 13, // 242, 8
493
0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13, // 250, 12
494
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, // 262, 13
495
2, 3, 4, 7, 8, 11, 12, 13, // 275, 8
496
1, 2, 6, 7, 8, 11, 12, 13, // 283, 8
497
2, 3, 4, 6, 7, 8, 9, 11, 12, 13, // 291, 10
498
2, 3, 4, 5, 10, 11, 12, 13, // 301, 8
499
0, 1, 6, 7, 10, 11, 12, 13, // 309, 8
500
6, 9, 10, 11, 14, // 317, 5
501
0, 2, 4, 6, 8, 10, 12, 14, // 322, 8
502
1, 3, 5, 7, 8, 10, 12, 14, // 330, 8
503
1, 3, 4, 6, 9, 11, 12, 14, // 338, 8
504
0, 2, 5, 7, 9, 11, 12, 14, // 346, 8
505
0, 3, 4, 5, 8, 9, 13, 14, // 354, 8
506
2, 3, 4, 7, 8, 9, 13, 14, // 362, 8
507
1, 2, 5, 6, 9, 10, 13, 14, // 370, 8
508
0, 3, 4, 7, 9, 10, 13, 14, // 378, 8
509
0, 3, 5, 6, 8, 11, 13, 14, // 386, 8
510
1, 2, 4, 7, 8, 11, 13, 14, // 394, 8
511
0, 1, 4, 7, 10, 11, 13, 14, // 402, 8
512
0, 3, 6, 7, 10, 11, 13, 14, // 410, 8
513
8, 12, 13, 14, // 418, 4
514
1, 2, 3, 7, 8, 12, 13, 14, // 422, 8
515
4, 8, 9, 12, 13, 14, // 430, 6
516
0, 4, 5, 8, 9, 12, 13, 14, // 436, 8
517
1, 2, 3, 6, 7, 8, 9, 12, 13, 14, // 444, 10
518
2, 6, 8, 9, 10, 12, 13, 14, // 454, 8
519
0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, // 462, 12
520
0, 7, 9, 10, 11, 12, 13, 14, // 474, 8
521
1, 2, 3, 4, 5, 6, 8, 15, // 482, 8
522
3, 7, 11, 15, // 490, 4
523
0, 1, 3, 4, 5, 7, 11, 15, // 494, 8
524
0, 4, 5, 10, 11, 15, // 502, 6
525
1, 2, 3, 6, 7, 10, 11, 15, // 508, 8
526
0, 1, 2, 3, 5, 6, 7, 10, 11, 15, // 516, 10
527
0, 4, 5, 6, 9, 10, 11, 15, // 526, 8
528
0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, // 534, 12
529
1, 2, 4, 5, 8, 9, 12, 15, // 546, 8
530
2, 3, 5, 6, 8, 9, 12, 15, // 554, 8
531
0, 3, 5, 6, 9, 10, 12, 15, // 562, 8
532
1, 2, 4, 7, 9, 10, 12, 15, // 570, 8
533
1, 2, 5, 6, 8, 11, 12, 15, // 578, 8
534
0, 3, 4, 7, 8, 11, 12, 15, // 586, 8
535
0, 1, 5, 6, 10, 11, 12, 15, // 594, 8
536
1, 2, 6, 7, 10, 11, 12, 15, // 602, 8
537
1, 3, 4, 6, 8, 10, 13, 15, // 610, 8
538
0, 2, 5, 7, 8, 10, 13, 15, // 618, 8
539
0, 2, 4, 6, 9, 11, 13, 15, // 626, 8
540
1, 3, 5, 7, 9, 11, 13, 15, // 634, 8
541
0, 1, 2, 3, 4, 5, 7, 8, 12, 13, 15, // 642, 11
542
2, 3, 4, 5, 8, 9, 14, 15, // 653, 8
543
0, 1, 6, 7, 8, 9, 14, 15, // 661, 8
544
0, 1, 5, 10, 14, 15, // 669, 6
545
0, 3, 4, 5, 9, 10, 14, 15, // 675, 8
546
0, 1, 5, 6, 9, 10, 14, 15, // 683, 8
547
11, 14, 15, // 691, 3
548
7, 11, 14, 15, // 694, 4
549
1, 2, 4, 5, 8, 11, 14, 15, // 698, 8
550
0, 1, 4, 7, 8, 11, 14, 15, // 706, 8
551
0, 1, 4, 5, 10, 11, 14, 15, // 714, 8
552
2, 3, 6, 7, 10, 11, 14, 15, // 722, 8
553
4, 5, 6, 7, 10, 11, 14, 15, // 730, 8
554
0, 1, 4, 5, 7, 8, 10, 11, 14, 15, // 738, 10
555
0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, // 748, 12
556
0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15, // 760, 13
557
0, 1, 2, 3, 4, 6, 7, 11, 12, 14, 15, // 773, 11
558
3, 4, 8, 9, 10, 13, 14, 15, // 784, 8
559
11, 13, 14, 15, // 792, 4
560
0, 1, 2, 4, 11, 13, 14, 15, // 796, 8
561
0, 1, 2, 4, 5, 10, 11, 13, 14, 15, // 804, 10
562
7, 10, 11, 13, 14, 15, // 814, 6
563
3, 6, 7, 10, 11, 13, 14, 15, // 820, 8
564
1, 5, 9, 10, 11, 13, 14, 15, // 828, 8
565
1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, // 836, 12
566
12, 13, 14, 15, // 848, 4
567
0, 1, 2, 3, 12, 13, 14, 15, // 852, 8
568
0, 1, 4, 5, 12, 13, 14, 15, // 860, 8
569
4, 5, 6, 7, 12, 13, 14, 15, // 868, 8
570
4, 8, 9, 10, 12, 13, 14, 15, // 876, 8
571
0, 4, 5, 8, 9, 10, 12, 13, 14, 15, // 884, 10
572
0, 1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, // 894, 12
573
0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15, // 906, 12
574
0, 1, 3, 4, 8, 9, 11, 12, 13, 14, 15, // 918, 11
575
0, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, // 929, 11
576
7, 9, 10, 11, 12, 13, 14, 15, // 940, 8
577
3, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 948, 10
578
2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 958, 12
579
8, 9, 10, 11, 12, 13, 14, 15, // 970, 8
580
0, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 978, 12
581
0, 1, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 990, 13
582
3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1003, 12
583
2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1015, 13
584
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1028, 12
585
0, 2, // 1040, 2
586
1, 3, // 1042, 2
587
0, 1, 4, 5, // 1044, 4
588
0, 1, 2, 4, 5, // 1048, 5
589
2, 3, 6, // 1053, 3
590
0, 2, 4, 6, // 1056, 4
591
1, 2, 5, 6, // 1060, 4
592
0, 1, 2, 3, 5, 6, // 1064, 6
593
0, 1, 2, 4, 5, 6, // 1070, 6
594
0, 1, 2, 3, 4, 5, 6, // 1076, 7
595
0, 3, 4, 7, // 1083, 4
596
0, 1, 2, 3, 4, 7, // 1087, 6
597
1, 3, 5, 7, // 1093, 4
598
2, 3, 6, 7, // 1097, 4
599
1, 2, 3, 6, 7, // 1101, 5
600
1, 2, 3, 5, 6, 7, // 1106, 6
601
0, 1, 2, 3, 5, 6, 7, // 1112, 7
602
4, 5, 6, 7, // 1119, 4
603
0, 8, // 1123, 2
604
0, 1, 4, 5, 8, // 1125, 5
605
0, 1, 8, 9, // 1130, 4
606
4, 5, 8, 9, // 1134, 4
607
0, 1, 4, 5, 8, 9, // 1138, 6
608
2, 6, 8, 9, // 1144, 4
609
6, 7, 8, 9, // 1148, 4
610
0, 2, 4, 6, 8, 10, // 1152, 6
611
1, 2, 5, 6, 9, 10, // 1158, 6
612
0, 3, 4, 7, 9, 10, // 1164, 6
613
0, 1, 2, 8, 9, 10, // 1170, 6
614
4, 5, 6, 8, 9, 10, // 1176, 6
615
3, 11, // 1182, 2
616
2, 3, 6, 7, 11, // 1184, 5
617
0, 3, 8, 11, // 1189, 4
618
0, 3, 4, 7, 8, 11, // 1193, 6
619
1, 3, 5, 7, 9, 11, // 1199, 6
620
2, 3, 10, 11, // 1205, 4
621
1, 5, 10, 11, // 1209, 4
622
4, 5, 10, 11, // 1213, 4
623
6, 7, 10, 11, // 1217, 4
624
2, 3, 6, 7, 10, 11, // 1221, 6
625
1, 2, 3, 9, 10, 11, // 1227, 6
626
5, 6, 7, 9, 10, 11, // 1233, 6
627
8, 9, 10, 11, // 1239, 4
628
4, 12, // 1243, 2
629
0, 1, 2, 3, 4, 5, 8, 12, // 1245, 8
630
8, 9, 12, // 1253, 3
631
0, 4, 5, 8, 9, 12, // 1256, 6
632
0, 1, 4, 5, 8, 9, 12, // 1262, 7
633
2, 3, 5, 6, 8, 9, 12, // 1269, 7
634
1, 5, 9, 13, // 1276, 4
635
6, 7, 9, 13, // 1280, 4
636
1, 4, 7, 10, 13, // 1284, 5
637
1, 6, 8, 11, 13, // 1289, 5
638
0, 1, 12, 13, // 1294, 4
639
4, 5, 12, 13, // 1298, 4
640
0, 1, 6, 7, 12, 13, // 1302, 6
641
0, 1, 4, 8, 12, 13, // 1308, 6
642
8, 9, 12, 13, // 1314, 4
643
4, 8, 9, 12, 13, // 1318, 5
644
4, 5, 8, 9, 12, 13, // 1323, 6
645
0, 4, 5, 8, 9, 12, 13, // 1329, 7
646
0, 1, 6, 10, 12, 13, // 1336, 6
647
3, 6, 7, 9, 10, 12, 13, // 1342, 7
648
0, 1, 10, 11, 12, 13, // 1349, 6
649
2, 4, 7, 9, 14, // 1355, 5
650
4, 5, 10, 14, // 1360, 4
651
2, 6, 10, 14, // 1364, 4
652
2, 5, 8, 11, 14, // 1368, 5
653
0, 2, 12, 14, // 1373, 4
654
8, 10, 12, 14, // 1377, 4
655
4, 6, 8, 10, 12, 14, // 1381, 6
656
13, 14, // 1387, 2
657
9, 10, 13, 14, // 1389, 4
658
5, 6, 9, 10, 13, 14, // 1393, 6
659
0, 1, 2, 12, 13, 14, // 1399, 6
660
4, 5, 6, 12, 13, 14, // 1405, 6
661
8, 9, 12, 13, 14, // 1411, 5
662
8, 9, 10, 12, 13, 14, // 1416, 6
663
7, 15, // 1422, 2
664
0, 5, 10, 15, // 1424, 4
665
0, 1, 2, 3, 6, 7, 11, 15, // 1428, 8
666
10, 11, 15, // 1436, 3
667
0, 1, 5, 6, 10, 11, 15, // 1439, 7
668
3, 6, 7, 10, 11, 15, // 1446, 6
669
12, 15, // 1452, 2
670
0, 3, 12, 15, // 1454, 4
671
4, 7, 12, 15, // 1458, 4
672
0, 3, 6, 9, 12, 15, // 1462, 6
673
0, 3, 5, 10, 12, 15, // 1468, 6
674
8, 11, 12, 15, // 1474, 4
675
5, 6, 8, 11, 12, 15, // 1478, 6
676
4, 7, 8, 11, 12, 15, // 1484, 6
677
1, 3, 13, 15, // 1490, 4
678
9, 11, 13, 15, // 1494, 4
679
5, 7, 9, 11, 13, 15, // 1498, 6
680
2, 3, 14, 15, // 1504, 4
681
2, 3, 4, 5, 14, 15, // 1508, 6
682
6, 7, 14, 15, // 1514, 4
683
2, 3, 5, 9, 14, 15, // 1518, 6
684
2, 3, 8, 9, 14, 15, // 1524, 6
685
10, 14, 15, // 1530, 3
686
0, 4, 5, 9, 10, 14, 15, // 1533, 7
687
2, 3, 7, 11, 14, 15, // 1540, 6
688
10, 11, 14, 15, // 1546, 4
689
7, 10, 11, 14, 15, // 1550, 5
690
6, 7, 10, 11, 14, 15, // 1555, 6
691
1, 2, 3, 13, 14, 15, // 1561, 6
692
5, 6, 7, 13, 14, 15, // 1567, 6
693
10, 11, 13, 14, 15, // 1573, 5
694
9, 10, 11, 13, 14, 15, // 1578, 6
695
0, 4, 8, 9, 12, 13, 14, 15, // 1584, 8
696
9, 10, 12, 13, 14, 15, // 1592, 6
697
8, 11, 12, 13, 14, 15, // 1598, 6
698
3, 7, 10, 11, 12, 13, 14, 15, // 1604, 8
699
};
700
static const int g_shapeRanges[][2] =
701
{
702
{ 0, 16 },{ 16, 4 },{ 20, 3 },{ 23, 4 },{ 27, 3 },{ 30, 4 },{ 34, 8 },{ 42, 4 },{ 46, 6 },{ 52, 8 },{ 60, 5 },
703
{ 65, 5 },{ 70, 4 },{ 74, 4 },{ 78, 6 },{ 84, 8 },{ 92, 8 },{ 100, 8 },{ 108, 8 },{ 116, 12 },{ 128, 4 },{ 132, 8 },
704
{ 140, 8 },{ 148, 10 },{ 158, 6 },{ 164, 8 },{ 172, 12 },{ 184, 8 },{ 192, 5 },{ 197, 3 },{ 200, 4 },{ 204, 6 },{ 210, 8 },
705
{ 218, 8 },{ 226, 8 },{ 234, 8 },{ 242, 8 },{ 250, 12 },{ 262, 13 },{ 275, 8 },{ 283, 8 },{ 291, 10 },{ 301, 8 },{ 309, 8 },
706
{ 317, 5 },{ 322, 8 },{ 330, 8 },{ 338, 8 },{ 346, 8 },{ 354, 8 },{ 362, 8 },{ 370, 8 },{ 378, 8 },{ 386, 8 },{ 394, 8 },
707
{ 402, 8 },{ 410, 8 },{ 418, 4 },{ 422, 8 },{ 430, 6 },{ 436, 8 },{ 444, 10 },{ 454, 8 },{ 462, 12 },{ 474, 8 },{ 482, 8 },
708
{ 490, 4 },{ 494, 8 },{ 502, 6 },{ 508, 8 },{ 516, 10 },{ 526, 8 },{ 534, 12 },{ 546, 8 },{ 554, 8 },{ 562, 8 },{ 570, 8 },
709
{ 578, 8 },{ 586, 8 },{ 594, 8 },{ 602, 8 },{ 610, 8 },{ 618, 8 },{ 626, 8 },{ 634, 8 },{ 642, 11 },{ 653, 8 },{ 661, 8 },
710
{ 669, 6 },{ 675, 8 },{ 683, 8 },{ 691, 3 },{ 694, 4 },{ 698, 8 },{ 706, 8 },{ 714, 8 },{ 722, 8 },{ 730, 8 },{ 738, 10 },
711
{ 748, 12 },{ 760, 13 },{ 773, 11 },{ 784, 8 },{ 792, 4 },{ 796, 8 },{ 804, 10 },{ 814, 6 },{ 820, 8 },{ 828, 8 },{ 836, 12 },
712
{ 848, 4 },{ 852, 8 },{ 860, 8 },{ 868, 8 },{ 876, 8 },{ 884, 10 },{ 894, 12 },{ 906, 12 },{ 918, 11 },{ 929, 11 },{ 940, 8 },
713
{ 948, 10 },{ 958, 12 },{ 970, 8 },{ 978, 12 },{ 990, 13 },{ 1003, 12 },{ 1015, 13 },{ 1028, 12 },{ 1040, 2 },{ 1042, 2 },{ 1044, 4 },
714
{ 1048, 5 },{ 1053, 3 },{ 1056, 4 },{ 1060, 4 },{ 1064, 6 },{ 1070, 6 },{ 1076, 7 },{ 1083, 4 },{ 1087, 6 },{ 1093, 4 },{ 1097, 4 },
715
{ 1101, 5 },{ 1106, 6 },{ 1112, 7 },{ 1119, 4 },{ 1123, 2 },{ 1125, 5 },{ 1130, 4 },{ 1134, 4 },{ 1138, 6 },{ 1144, 4 },{ 1148, 4 },
716
{ 1152, 6 },{ 1158, 6 },{ 1164, 6 },{ 1170, 6 },{ 1176, 6 },{ 1182, 2 },{ 1184, 5 },{ 1189, 4 },{ 1193, 6 },{ 1199, 6 },{ 1205, 4 },
717
{ 1209, 4 },{ 1213, 4 },{ 1217, 4 },{ 1221, 6 },{ 1227, 6 },{ 1233, 6 },{ 1239, 4 },{ 1243, 2 },{ 1245, 8 },{ 1253, 3 },{ 1256, 6 },
718
{ 1262, 7 },{ 1269, 7 },{ 1276, 4 },{ 1280, 4 },{ 1284, 5 },{ 1289, 5 },{ 1294, 4 },{ 1298, 4 },{ 1302, 6 },{ 1308, 6 },{ 1314, 4 },
719
{ 1318, 5 },{ 1323, 6 },{ 1329, 7 },{ 1336, 6 },{ 1342, 7 },{ 1349, 6 },{ 1355, 5 },{ 1360, 4 },{ 1364, 4 },{ 1368, 5 },{ 1373, 4 },
720
{ 1377, 4 },{ 1381, 6 },{ 1387, 2 },{ 1389, 4 },{ 1393, 6 },{ 1399, 6 },{ 1405, 6 },{ 1411, 5 },{ 1416, 6 },{ 1422, 2 },{ 1424, 4 },
721
{ 1428, 8 },{ 1436, 3 },{ 1439, 7 },{ 1446, 6 },{ 1452, 2 },{ 1454, 4 },{ 1458, 4 },{ 1462, 6 },{ 1468, 6 },{ 1474, 4 },{ 1478, 6 },
722
{ 1484, 6 },{ 1490, 4 },{ 1494, 4 },{ 1498, 6 },{ 1504, 4 },{ 1508, 6 },{ 1514, 4 },{ 1518, 6 },{ 1524, 6 },{ 1530, 3 },{ 1533, 7 },
723
{ 1540, 6 },{ 1546, 4 },{ 1550, 5 },{ 1555, 6 },{ 1561, 6 },{ 1567, 6 },{ 1573, 5 },{ 1578, 6 },{ 1584, 8 },{ 1592, 6 },{ 1598, 6 },
724
{ 1604, 8 },
725
};
726
static const int g_shapes1[][2] =
727
{
728
{ 0, 16 }
729
};
730
static const int g_shapes2[64][2] =
731
{
732
{ 33, 96 },{ 63, 66 },{ 20, 109 },{ 22, 107 },{ 37, 92 },{ 7, 122 },{ 8, 121 },{ 23, 106 },
733
{ 38, 91 },{ 2, 127 },{ 9, 120 },{ 26, 103 },{ 3, 126 },{ 6, 123 },{ 1, 128 },{ 19, 110 },
734
{ 15, 114 },{ 124, 5 },{ 72, 57 },{ 115, 14 },{ 125, 4 },{ 70, 59 },{ 100, 29 },{ 60, 69 },
735
{ 116, 13 },{ 99, 30 },{ 78, 51 },{ 94, 35 },{ 104, 25 },{ 111, 18 },{ 71, 58 },{ 90, 39 },
736
{ 45, 84 },{ 16, 113 },{ 82, 47 },{ 95, 34 },{ 87, 42 },{ 83, 46 },{ 53, 76 },{ 48, 81 },
737
{ 68, 61 },{ 105, 24 },{ 98, 31 },{ 88, 41 },{ 75, 54 },{ 43, 86 },{ 52, 77 },{ 117, 12 },
738
{ 119, 10 },{ 118, 11 },{ 85, 44 },{ 101, 28 },{ 36, 93 },{ 55, 74 },{ 89, 40 },{ 79, 50 },
739
{ 56, 73 },{ 49, 80 },{ 64, 65 },{ 27, 102 },{ 32, 97 },{ 112, 17 },{ 67, 62 },{ 21, 108 },
740
};
741
static const int g_shapes3[64][3] =
742
{
743
{ 148, 160, 240 },{ 132, 212, 205 },{ 136, 233, 187 },{ 175, 237, 143 },{ 6, 186, 232 },{ 33, 142, 232 },{ 131, 123, 142 },{ 131, 96, 186 },
744
{ 6, 171, 110 },{ 1, 18, 110 },{ 1, 146, 123 },{ 33, 195, 66 },{ 20, 51, 66 },{ 20, 178, 96 },{ 2, 177, 106 },{ 211, 4, 59 },
745
{ 8, 191, 91 },{ 230, 14, 29 },{ 1, 188, 234 },{ 151, 110, 168 },{ 20, 144, 238 },{ 137, 66, 206 },{ 173, 179, 232 },{ 209, 194, 186 },
746
{ 239, 165, 142 },{ 131, 152, 242 },{ 214, 54, 12 },{ 140, 219, 201 },{ 190, 150, 231 },{ 156, 135, 241 },{ 185, 227, 167 },{ 145, 210, 59 },
747
{ 138, 174, 106 },{ 189, 229, 14 },{ 176, 133, 106 },{ 78, 178, 195 },{ 111, 146, 171 },{ 216, 180, 196 },{ 217, 181, 193 },{ 184, 228, 166 },
748
{ 192, 225, 153 },{ 134, 141, 123 },{ 6, 222, 198 },{ 149, 183, 96 },{ 33, 226, 164 },{ 161, 215, 51 },{ 197, 221, 18 },{ 1, 223, 199 },
749
{ 154, 163, 110 },{ 20, 236, 169 },{ 157, 204, 66 },{ 1, 202, 220 },{ 20, 170, 235 },{ 203, 158, 66 },{ 162, 155, 110 },{ 6, 201, 218 },
750
{ 139, 135, 123 },{ 33, 167, 224 },{ 182, 150, 96 },{ 19, 200, 213 },{ 63, 207, 159 },{ 147, 172, 109 },{ 129, 130, 128 },{ 208, 14, 59 },
751
};
752
753
static const int g_shapeList1[] =
754
{
755
0,
756
};
757
758
static const int g_shapeList2[] =
759
{
760
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
761
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
762
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
763
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
764
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
765
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
766
67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
767
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88,
768
89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
769
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
770
111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
771
122, 123, 124, 125, 126, 127, 128,
772
};
773
774
static const int g_shapeList12[] =
775
{
776
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
777
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
778
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
779
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
780
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
781
55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
782
66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
783
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
784
88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
785
99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
786
110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
787
121, 122, 123, 124, 125, 126, 127, 128,
788
};
789
790
static const int g_shapeList3[] =
791
{
792
1, 2, 4, 6, 8, 12, 14, 18, 19, 20, 29,
793
33, 51, 54, 59, 63, 66, 78, 91, 96, 106, 109,
794
110, 111, 123, 128, 129, 130, 131, 132, 133, 134, 135,
795
136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
796
147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
797
158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
798
169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
799
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190,
800
191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
801
202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212,
802
213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
803
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234,
804
235, 236, 237, 238, 239, 240, 241, 242,
805
};
806
807
static const int g_shapeList3Short[] =
808
{
809
1, 2, 4, 6, 18, 20, 33, 51, 59, 66, 96,
810
106, 110, 123, 131, 132, 136, 142, 143, 146, 148, 160,
811
171, 175, 177, 178, 186, 187, 195, 205, 211, 212, 232,
812
233, 237, 240,
813
};
814
815
static const int g_shapeListAll[] =
816
{
817
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
818
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
819
22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
820
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
821
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
822
55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65,
823
66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
824
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
825
88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,
826
99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
827
110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
828
121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
829
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
830
143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
831
154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
832
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
833
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
834
187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197,
835
198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
836
209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
837
220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
838
231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241,
839
242,
840
};
841
842
static const int g_numShapes1 = sizeof(g_shapeList1) / sizeof(g_shapeList1[0]);
843
static const int g_numShapes2 = sizeof(g_shapeList2) / sizeof(g_shapeList2[0]);
844
static const int g_numShapes12 = sizeof(g_shapeList12) / sizeof(g_shapeList12[0]);
845
static const int g_numShapes3 = sizeof(g_shapeList3) / sizeof(g_shapeList3[0]);
846
static const int g_numShapes3Short = sizeof(g_shapeList3Short) / sizeof(g_shapeList3Short[0]);
847
static const int g_numShapesAll = sizeof(g_shapeListAll) / sizeof(g_shapeListAll[0]);
848
static const int g_numFragments = sizeof(g_fragments) / sizeof(g_fragments[0]);
849
}
850
851
struct PackingVector
852
{
853
uint32_t m_vector[4];
854
int m_offset;
855
856
void Init()
857
{
858
for (int i = 0; i < 4; i++)
859
m_vector[i] = 0;
860
861
m_offset = 0;
862
}
863
864
void InitPacked(const uint32_t *v, int bits)
865
{
866
for (int b = 0; b < bits; b += 32)
867
m_vector[b / 32] = v[b / 32];
868
869
m_offset = bits;
870
}
871
872
inline void Pack(ParallelMath::ScalarUInt16 value, int bits)
873
{
874
int vOffset = m_offset >> 5;
875
int bitOffset = m_offset & 0x1f;
876
877
m_vector[vOffset] |= (static_cast<uint32_t>(value) << bitOffset) & static_cast<uint32_t>(0xffffffff);
878
879
int overflowBits = bitOffset + bits - 32;
880
if (overflowBits > 0)
881
m_vector[vOffset + 1] |= (static_cast<uint32_t>(value) >> (bits - overflowBits));
882
883
m_offset += bits;
884
}
885
886
inline void Flush(uint8_t* output)
887
{
888
assert(m_offset == 128);
889
890
for (int v = 0; v < 4; v++)
891
{
892
uint32_t chunk = m_vector[v];
893
for (int b = 0; b < 4; b++)
894
output[v * 4 + b] = static_cast<uint8_t>((chunk >> (b * 8)) & 0xff);
895
}
896
}
897
};
898
899
900
struct UnpackingVector
901
{
902
uint32_t m_vector[4];
903
904
void Init(const uint8_t *bytes)
905
{
906
for (int i = 0; i < 4; i++)
907
m_vector[i] = 0;
908
909
for (int b = 0; b < 16; b++)
910
m_vector[b / 4] |= (bytes[b] << ((b % 4) * 8));
911
}
912
913
inline void UnpackStart(uint32_t *v, int bits)
914
{
915
for (int b = 0; b < bits; b += 32)
916
v[b / 32] = m_vector[b / 32];
917
918
int entriesShifted = bits / 32;
919
int carry = bits % 32;
920
921
for (int i = entriesShifted; i < 4; i++)
922
m_vector[i - entriesShifted] = m_vector[i];
923
924
int entriesRemaining = 4 - entriesShifted;
925
if (carry)
926
{
927
uint32_t bitMask = (1 << carry) - 1;
928
for (int i = 0; i < entriesRemaining; i++)
929
{
930
m_vector[i] >>= carry;
931
if (i != entriesRemaining - 1)
932
m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - carry);
933
}
934
}
935
}
936
937
inline ParallelMath::ScalarUInt16 Unpack(int bits)
938
{
939
uint32_t bitMask = (1 << bits) - 1;
940
941
ParallelMath::ScalarUInt16 result = static_cast<ParallelMath::ScalarUInt16>(m_vector[0] & bitMask);
942
943
for (int i = 0; i < 4; i++)
944
{
945
m_vector[i] >>= bits;
946
if (i != 3)
947
m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - bits);
948
}
949
950
return result;
951
}
952
};
953
954
ParallelMath::Float ScaleHDRValue(const ParallelMath::Float &v, bool isSigned)
955
{
956
if (isSigned)
957
{
958
ParallelMath::Float offset = ParallelMath::Select(ParallelMath::Less(v, ParallelMath::MakeFloatZero()), ParallelMath::MakeFloat(-30.0f), ParallelMath::MakeFloat(30.0f));
959
return (v * 32.0f + offset) / 31.0f;
960
}
961
else
962
return (v * 64.0f + 30.0f) / 31.0f;
963
}
964
965
ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v)
966
{
967
#ifdef CVTT_ENABLE_ASSERTS
968
for (int i = 0; i < ParallelMath::ParallelSize; i++)
969
assert(ParallelMath::Extract(v, i) != -32768)
970
#endif
971
972
ParallelMath::Int16CompFlag negative = ParallelMath::Less(v, ParallelMath::MakeSInt16(0));
973
ParallelMath::UInt15 absComp = ParallelMath::LosslessCast<ParallelMath::UInt15>::Cast(ParallelMath::Select(negative, ParallelMath::SInt16(ParallelMath::MakeSInt16(0) - v), v));
974
975
ParallelMath::UInt31 multiplied = ParallelMath::XMultiply(absComp, ParallelMath::MakeUInt15(31));
976
ParallelMath::UInt31 shifted = ParallelMath::RightShift(multiplied, 5);
977
ParallelMath::UInt15 absCompScaled = ParallelMath::ToUInt15(shifted);
978
ParallelMath::SInt16 signBits = ParallelMath::SelectOrZero(negative, ParallelMath::MakeSInt16(-32768));
979
980
return ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(absCompScaled) | signBits;
981
}
982
983
ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v)
984
{
985
return ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(v, ParallelMath::MakeUInt15(31)), 6));
986
}
987
988
void UnscaleHDREndpoints(const ParallelMath::AInt16 inEP[2][3], ParallelMath::AInt16 outEP[2][3], bool isSigned)
989
{
990
for (int epi = 0; epi < 2; epi++)
991
{
992
for (int ch = 0; ch < 3; ch++)
993
{
994
if (isSigned)
995
outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueSigned(ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(inEP[epi][ch])));
996
else
997
outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::LosslessCast<ParallelMath::UInt16>::Cast(inEP[epi][ch])));
998
}
999
}
1000
}
1001
1002
struct SinglePlaneTemporaries
1003
{
1004
UnfinishedEndpoints<3> unfinishedRGB[BC7Data::g_numShapesAll];
1005
UnfinishedEndpoints<4> unfinishedRGBA[BC7Data::g_numShapes12];
1006
1007
ParallelMath::UInt15 fragmentBestIndexes[BC7Data::g_numFragments];
1008
ParallelMath::UInt15 shapeBestEP[BC7Data::g_numShapesAll][2][4];
1009
ParallelMath::Float shapeBestError[BC7Data::g_numShapesAll];
1010
};
1011
}
1012
}
1013
1014
void cvtt::Internal::BC7Computer::TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2])
1015
{
1016
ParallelMath::RoundTowardNearestForScope roundingMode;
1017
1018
float tf[2];
1019
Util::ComputeTweakFactors(tweak, range, tf);
1020
1021
MFloat base = ParallelMath::ToFloat(original[0]);
1022
MFloat offs = ParallelMath::ToFloat(original[1]) - base;
1023
1024
result[0] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[0], 0.0f, 255.0f), &roundingMode);
1025
result[1] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[1], 0.0f, 255.0f), &roundingMode);
1026
}
1027
1028
void cvtt::Internal::BC7Computer::Quantize(MUInt15* color, int bits, int channels)
1029
{
1030
for (int ch = 0; ch < channels; ch++)
1031
color[ch] = ParallelMath::RightShift(((color[ch] << bits) - color[ch]) + ParallelMath::MakeUInt15(127 + (1 << (7 - bits))), 8);
1032
}
1033
1034
void cvtt::Internal::BC7Computer::QuantizeP(MUInt15* color, int bits, uint16_t p, int channels)
1035
{
1036
int16_t addend;
1037
if (p)
1038
addend = ((1 << (8 - bits)) - 1);
1039
else
1040
addend = 255;
1041
1042
for (int ch = 0; ch < channels; ch++)
1043
{
1044
MUInt16 ch16 = ParallelMath::LosslessCast<MUInt16>::Cast(color[ch]);
1045
ch16 = ParallelMath::RightShift((ch16 << (bits + 1)) - ch16 + addend, 9);
1046
ch16 = (ch16 << 1) | ParallelMath::MakeUInt16(p);
1047
color[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ch16);
1048
}
1049
}
1050
1051
void cvtt::Internal::BC7Computer::Unquantize(MUInt15* color, int bits, int channels)
1052
{
1053
for (int ch = 0; ch < channels; ch++)
1054
{
1055
MUInt15 clr = color[ch];
1056
clr = clr << (8 - bits);
1057
color[ch] = clr | ParallelMath::RightShift(clr, bits);
1058
}
1059
}
1060
1061
void cvtt::Internal::BC7Computer::CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2])
1062
{
1063
for (int j = 0; j < 2; j++)
1064
{
1065
QuantizeP(ep[j], 4, p[j], 3);
1066
Unquantize(ep[j], 5, 3);
1067
ep[j][3] = ParallelMath::MakeUInt15(255);
1068
}
1069
}
1070
1071
void cvtt::Internal::BC7Computer::CompressEndpoints1(MUInt15 ep[2][4], uint16_t p)
1072
{
1073
for (int j = 0; j < 2; j++)
1074
{
1075
QuantizeP(ep[j], 6, p, 3);
1076
Unquantize(ep[j], 7, 3);
1077
ep[j][3] = ParallelMath::MakeUInt15(255);
1078
}
1079
}
1080
1081
void cvtt::Internal::BC7Computer::CompressEndpoints2(MUInt15 ep[2][4])
1082
{
1083
for (int j = 0; j < 2; j++)
1084
{
1085
Quantize(ep[j], 5, 3);
1086
Unquantize(ep[j], 5, 3);
1087
ep[j][3] = ParallelMath::MakeUInt15(255);
1088
}
1089
}
1090
1091
void cvtt::Internal::BC7Computer::CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2])
1092
{
1093
for (int j = 0; j < 2; j++)
1094
{
1095
QuantizeP(ep[j], 7, p[j], 3);
1096
ep[j][3] = ParallelMath::MakeUInt15(255);
1097
}
1098
}
1099
1100
void cvtt::Internal::BC7Computer::CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2])
1101
{
1102
for (int j = 0; j < 2; j++)
1103
{
1104
Quantize(epRGB[j], 5, 3);
1105
Unquantize(epRGB[j], 5, 3);
1106
1107
Quantize(epA + j, 6, 1);
1108
Unquantize(epA + j, 6, 1);
1109
}
1110
}
1111
1112
void cvtt::Internal::BC7Computer::CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2])
1113
{
1114
for (int j = 0; j < 2; j++)
1115
{
1116
Quantize(epRGB[j], 7, 3);
1117
Unquantize(epRGB[j], 7, 3);
1118
}
1119
1120
// Alpha is full precision
1121
(void)epA;
1122
}
1123
1124
void cvtt::Internal::BC7Computer::CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2])
1125
{
1126
for (int j = 0; j < 2; j++)
1127
QuantizeP(ep[j], 7, p[j], 4);
1128
}
1129
1130
void cvtt::Internal::BC7Computer::CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2])
1131
{
1132
for (int j = 0; j < 2; j++)
1133
{
1134
QuantizeP(ep[j], 5, p[j], 4);
1135
Unquantize(ep[j], 6, 4);
1136
}
1137
}
1138
1139
void cvtt::Internal::BC7Computer::TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn)
1140
{
1141
MFloat bestAverageError = ParallelMath::MakeFloat(FLT_MAX);
1142
1143
MUInt15 intAverage[4];
1144
for (int ch = 0; ch < 4; ch++)
1145
intAverage[ch] = ParallelMath::RoundAndConvertToU15(average[ch], rtn);
1146
1147
MUInt15 eps[2][4];
1148
MUInt15 reconstructed[4];
1149
MUInt15 index = ParallelMath::MakeUInt15(0);
1150
1151
for (int epi = 0; epi < 2; epi++)
1152
{
1153
for (int ch = 0; ch < 3; ch++)
1154
eps[epi][ch] = ParallelMath::MakeUInt15(0);
1155
eps[epi][3] = ParallelMath::MakeUInt15(255);
1156
}
1157
1158
for (int ch = 0; ch < 3; ch++)
1159
reconstructed[ch] = ParallelMath::MakeUInt15(0);
1160
reconstructed[3] = ParallelMath::MakeUInt15(255);
1161
1162
// Depending on the target index and parity bits, there are multiple valid solid colors.
1163
// We want to find the one closest to the actual average.
1164
MFloat epsAverageDiff = ParallelMath::MakeFloat(FLT_MAX);
1165
for (int t = 0; t < numTables; t++)
1166
{
1167
const cvtt::Tables::BC7SC::Table& table = *(tables[t]);
1168
1169
ParallelMath::Int16CompFlag pti = punchThroughInvalid[table.m_pBits];
1170
1171
MUInt15 candidateReconstructed[4];
1172
MUInt15 candidateEPs[2][4];
1173
1174
for (int i = 0; i < ParallelMath::ParallelSize; i++)
1175
{
1176
for (int ch = 0; ch < numRealChannels; ch++)
1177
{
1178
ParallelMath::ScalarUInt16 avgValue = ParallelMath::Extract(intAverage[ch], i);
1179
assert(avgValue >= 0 && avgValue <= 255);
1180
1181
const cvtt::Tables::BC7SC::TableEntry &entry = table.m_entries[avgValue];
1182
1183
ParallelMath::PutUInt15(candidateEPs[0][ch], i, entry.m_min);
1184
ParallelMath::PutUInt15(candidateEPs[1][ch], i, entry.m_max);
1185
ParallelMath::PutUInt15(candidateReconstructed[ch], i, entry.m_actualColor);
1186
}
1187
}
1188
1189
MFloat avgError = ParallelMath::MakeFloatZero();
1190
for (int ch = 0; ch < numRealChannels; ch++)
1191
{
1192
MFloat delta = ParallelMath::ToFloat(candidateReconstructed[ch]) - average[ch];
1193
avgError = avgError + delta * delta * channelWeightsSq[ch];
1194
}
1195
1196
ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(avgError, bestAverageError));
1197
better = ParallelMath::AndNot(pti, better); // Mask out punch-through invalidations
1198
1199
if (ParallelMath::AnySet(better))
1200
{
1201
ParallelMath::ConditionalSet(bestAverageError, ParallelMath::Int16FlagToFloat(better), avgError);
1202
1203
MUInt15 candidateIndex = ParallelMath::MakeUInt15(table.m_index);
1204
1205
ParallelMath::ConditionalSet(index, better, candidateIndex);
1206
1207
for (int ch = 0; ch < numRealChannels; ch++)
1208
ParallelMath::ConditionalSet(reconstructed[ch], better, candidateReconstructed[ch]);
1209
1210
for (int epi = 0; epi < 2; epi++)
1211
for (int ch = 0; ch < numRealChannels; ch++)
1212
ParallelMath::ConditionalSet(eps[epi][ch], better, candidateEPs[epi][ch]);
1213
}
1214
}
1215
1216
AggregatedError<4> aggError;
1217
for (int pxi = 0; pxi < shapeLength; pxi++)
1218
{
1219
int px = fragmentStart[pxi];
1220
1221
BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);
1222
}
1223
1224
MFloat error = aggError.Finalize(flags, channelWeightsSq) + staticAlphaError;
1225
1226
ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, shapeBestError));
1227
if (ParallelMath::AnySet(better))
1228
{
1229
shapeBestError = ParallelMath::Min(shapeBestError, error);
1230
for (int epi = 0; epi < 2; epi++)
1231
{
1232
for (int ch = 0; ch < numRealChannels; ch++)
1233
ParallelMath::ConditionalSet(shapeBestEP[epi][ch], better, eps[epi][ch]);
1234
}
1235
1236
for (int pxi = 0; pxi < shapeLength; pxi++)
1237
ParallelMath::ConditionalSet(fragmentBestIndexes[pxi], better, index);
1238
}
1239
}
1240
1241
void cvtt::Internal::BC7Computer::TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)
1242
{
1243
if (numRefineRounds < 1)
1244
numRefineRounds = 1;
1245
1246
float channelWeightsSq[4];
1247
1248
for (int ch = 0; ch < 4; ch++)
1249
channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
1250
1251
SinglePlaneTemporaries temps;
1252
1253
MUInt15 maxAlpha = ParallelMath::MakeUInt15(0);
1254
MUInt15 minAlpha = ParallelMath::MakeUInt15(255);
1255
ParallelMath::Int16CompFlag isPunchThrough = ParallelMath::MakeBoolInt16(true);
1256
for (int px = 0; px < 16; px++)
1257
{
1258
MUInt15 a = pixels[px][3];
1259
maxAlpha = ParallelMath::Max(maxAlpha, a);
1260
minAlpha = ParallelMath::Min(minAlpha, a);
1261
1262
isPunchThrough = (isPunchThrough & (ParallelMath::Equal(a, ParallelMath::MakeUInt15(0)) | ParallelMath::Equal(a, ParallelMath::MakeUInt15(255))));
1263
}
1264
1265
ParallelMath::Int16CompFlag blockHasNonMaxAlpha = ParallelMath::Less(minAlpha, ParallelMath::MakeUInt15(255));
1266
ParallelMath::Int16CompFlag blockHasNonZeroAlpha = ParallelMath::Less(ParallelMath::MakeUInt15(0), maxAlpha);
1267
1268
bool anyBlockHasAlpha = ParallelMath::AnySet(blockHasNonMaxAlpha);
1269
1270
// Try RGB modes if any block has a min alpha 251 or higher
1271
bool allowRGBModes = ParallelMath::AnySet(ParallelMath::Less(ParallelMath::MakeUInt15(250), minAlpha));
1272
1273
// Try mode 7 if any block has alpha.
1274
// Mode 7 is almost never selected for RGB blocks because mode 4 has very accurate 7.7.7.1 endpoints
1275
// and its parity bit doesn't affect alpha, meaning mode 7 can only be better in extremely specific
1276
// situations, and only by at most 1 unit of error per pixel.
1277
bool allowMode7 = anyBlockHasAlpha || (encodingPlan.mode7RGBPartitionEnabled != 0);
1278
1279
MFloat preWeightedPixels[16][4];
1280
1281
BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights);
1282
1283
// Get initial RGB endpoints
1284
if (allowRGBModes)
1285
{
1286
const uint8_t *shapeList = encodingPlan.rgbShapeList;
1287
int numShapesToEvaluate = encodingPlan.rgbNumShapesToEvaluate;
1288
1289
for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)
1290
{
1291
int shape = shapeList[shapeIter];
1292
1293
int shapeStart = BC7Data::g_shapeRanges[shape][0];
1294
int shapeSize = BC7Data::g_shapeRanges[shape][1];
1295
1296
EndpointSelector<3, 8> epSelector;
1297
1298
for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
1299
{
1300
for (int spx = 0; spx < shapeSize; spx++)
1301
{
1302
int px = BC7Data::g_fragments[shapeStart + spx];
1303
epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));
1304
}
1305
epSelector.FinishPass(epPass);
1306
}
1307
temps.unfinishedRGB[shape] = epSelector.GetEndpoints(channelWeights);
1308
}
1309
}
1310
1311
// Get initial RGBA endpoints
1312
{
1313
const uint8_t *shapeList = encodingPlan.rgbaShapeList;
1314
int numShapesToEvaluate = encodingPlan.rgbaNumShapesToEvaluate;
1315
1316
for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++)
1317
{
1318
int shape = shapeList[shapeIter];
1319
1320
if (anyBlockHasAlpha || !allowRGBModes)
1321
{
1322
int shapeStart = BC7Data::g_shapeRanges[shape][0];
1323
int shapeSize = BC7Data::g_shapeRanges[shape][1];
1324
1325
EndpointSelector<4, 8> epSelector;
1326
1327
for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
1328
{
1329
for (int spx = 0; spx < shapeSize; spx++)
1330
{
1331
int px = BC7Data::g_fragments[shapeStart + spx];
1332
epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f));
1333
}
1334
epSelector.FinishPass(epPass);
1335
}
1336
temps.unfinishedRGBA[shape] = epSelector.GetEndpoints(channelWeights);
1337
}
1338
else
1339
{
1340
temps.unfinishedRGBA[shape] = temps.unfinishedRGB[shape].ExpandTo<4>(255);
1341
}
1342
}
1343
}
1344
1345
for (uint16_t mode = 0; mode <= 7; mode++)
1346
{
1347
if (mode == 4 || mode == 5)
1348
continue;
1349
1350
if (mode < 4 && !allowRGBModes)
1351
continue;
1352
1353
if (mode == 7 && !allowMode7)
1354
continue;
1355
1356
uint64_t partitionEnabledBits = 0;
1357
switch (mode)
1358
{
1359
case 0:
1360
partitionEnabledBits = encodingPlan.mode0PartitionEnabled;
1361
break;
1362
case 1:
1363
partitionEnabledBits = encodingPlan.mode1PartitionEnabled;
1364
break;
1365
case 2:
1366
partitionEnabledBits = encodingPlan.mode2PartitionEnabled;
1367
break;
1368
case 3:
1369
partitionEnabledBits = encodingPlan.mode3PartitionEnabled;
1370
break;
1371
case 6:
1372
partitionEnabledBits = encodingPlan.mode6Enabled ? 1 : 0;
1373
break;
1374
case 7:
1375
if (anyBlockHasAlpha)
1376
partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled;
1377
else
1378
partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled;
1379
break;
1380
default:
1381
break;
1382
}
1383
1384
bool isRGB = (mode < 4);
1385
1386
unsigned int numPartitions = 1 << BC7Data::g_modes[mode].m_partitionBits;
1387
int numSubsets = BC7Data::g_modes[mode].m_numSubsets;
1388
int indexPrec = BC7Data::g_modes[mode].m_indexBits;
1389
1390
int parityBitMax = 1;
1391
if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerEndpoint)
1392
parityBitMax = 4;
1393
else if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerSubset)
1394
parityBitMax = 2;
1395
1396
int numRealChannels = isRGB ? 3 : 4;
1397
1398
int numShapes;
1399
const int *shapeList;
1400
1401
if (numSubsets == 1)
1402
{
1403
numShapes = BC7Data::g_numShapes1;
1404
shapeList = BC7Data::g_shapeList1;
1405
}
1406
else if (numSubsets == 2)
1407
{
1408
numShapes = BC7Data::g_numShapes2;
1409
shapeList = BC7Data::g_shapeList2;
1410
}
1411
else
1412
{
1413
assert(numSubsets == 3);
1414
if (numPartitions == 16)
1415
{
1416
numShapes = BC7Data::g_numShapes3Short;
1417
shapeList = BC7Data::g_shapeList3Short;
1418
}
1419
else
1420
{
1421
assert(numPartitions == 64);
1422
numShapes = BC7Data::g_numShapes3;
1423
shapeList = BC7Data::g_shapeList3;
1424
}
1425
}
1426
1427
for (int slot = 0; slot < BC7Data::g_numShapesAll; slot++)
1428
temps.shapeBestError[slot] = ParallelMath::MakeFloat(FLT_MAX);
1429
1430
for (int shapeIter = 0; shapeIter < numShapes; shapeIter++)
1431
{
1432
int shape = shapeList[shapeIter];
1433
1434
int numTweakRounds = 0;
1435
if (isRGB)
1436
numTweakRounds = encodingPlan.seedPointsForShapeRGB[shape];
1437
else
1438
numTweakRounds = encodingPlan.seedPointsForShapeRGBA[shape];
1439
1440
if (numTweakRounds == 0)
1441
continue;
1442
1443
if (numTweakRounds > MaxTweakRounds)
1444
numTweakRounds = MaxTweakRounds;
1445
1446
int shapeStart = BC7Data::g_shapeRanges[shape][0];
1447
int shapeLength = BC7Data::g_shapeRanges[shape][1];
1448
1449
AggregatedError<1> alphaAggError;
1450
if (isRGB && anyBlockHasAlpha)
1451
{
1452
MUInt15 filledAlpha[1] = { ParallelMath::MakeUInt15(255) };
1453
1454
for (int pxi = 0; pxi < shapeLength; pxi++)
1455
{
1456
int px = BC7Data::g_fragments[shapeStart + pxi];
1457
MUInt15 original[1] = { pixels[px][3] };
1458
BCCommon::ComputeErrorLDR<1>(flags, filledAlpha, original, alphaAggError);
1459
}
1460
}
1461
1462
float alphaWeightsSq[1] = { channelWeightsSq[3] };
1463
MFloat staticAlphaError = alphaAggError.Finalize(flags, alphaWeightsSq);
1464
1465
MUInt15 tweakBaseEP[MaxTweakRounds][2][4];
1466
1467
for (int tweak = 0; tweak < numTweakRounds; tweak++)
1468
{
1469
if (isRGB)
1470
{
1471
temps.unfinishedRGB[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);
1472
tweakBaseEP[tweak][0][3] = tweakBaseEP[tweak][1][3] = ParallelMath::MakeUInt15(255);
1473
}
1474
else
1475
{
1476
temps.unfinishedRGBA[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]);
1477
}
1478
}
1479
1480
ParallelMath::Int16CompFlag punchThroughInvalid[4];
1481
for (int pIter = 0; pIter < parityBitMax; pIter++)
1482
{
1483
punchThroughInvalid[pIter] = ParallelMath::MakeBoolInt16(false);
1484
1485
if ((flags & Flags::BC7_RespectPunchThrough) && (mode == 6 || mode == 7))
1486
{
1487
// Modes 6 and 7 have parity bits that affect alpha
1488
if (pIter == 0)
1489
punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonZeroAlpha);
1490
else if (pIter == parityBitMax - 1)
1491
punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonMaxAlpha);
1492
else
1493
punchThroughInvalid[pIter] = isPunchThrough;
1494
}
1495
}
1496
1497
for (int pIter = 0; pIter < parityBitMax; pIter++)
1498
{
1499
if (ParallelMath::AllSet(punchThroughInvalid[pIter]))
1500
continue;
1501
1502
bool needPunchThroughCheck = ParallelMath::AnySet(punchThroughInvalid[pIter]);
1503
1504
for (int tweak = 0; tweak < numTweakRounds; tweak++)
1505
{
1506
uint16_t p[2];
1507
p[0] = (pIter & 1);
1508
p[1] = ((pIter >> 1) & 1);
1509
1510
MUInt15 ep[2][4];
1511
1512
for (int epi = 0; epi < 2; epi++)
1513
for (int ch = 0; ch < 4; ch++)
1514
ep[epi][ch] = tweakBaseEP[tweak][epi][ch];
1515
1516
for (int refine = 0; refine < numRefineRounds; refine++)
1517
{
1518
switch (mode)
1519
{
1520
case 0:
1521
CompressEndpoints0(ep, p);
1522
break;
1523
case 1:
1524
CompressEndpoints1(ep, p[0]);
1525
break;
1526
case 2:
1527
CompressEndpoints2(ep);
1528
break;
1529
case 3:
1530
CompressEndpoints3(ep, p);
1531
break;
1532
case 6:
1533
CompressEndpoints6(ep, p);
1534
break;
1535
case 7:
1536
CompressEndpoints7(ep, p);
1537
break;
1538
default:
1539
assert(false);
1540
break;
1541
};
1542
1543
MFloat shapeError = ParallelMath::MakeFloatZero();
1544
1545
IndexSelector<4> indexSelector;
1546
indexSelector.Init<false>(channelWeights, ep, 1 << indexPrec);
1547
1548
EndpointRefiner<4> epRefiner;
1549
epRefiner.Init(1 << indexPrec, channelWeights);
1550
1551
MUInt15 indexes[16];
1552
1553
AggregatedError<4> aggError;
1554
for (int pxi = 0; pxi < shapeLength; pxi++)
1555
{
1556
int px = BC7Data::g_fragments[shapeStart + pxi];
1557
1558
MUInt15 index;
1559
MUInt15 reconstructed[4];
1560
1561
index = indexSelector.SelectIndexLDR(floatPixels[px], rtn);
1562
indexSelector.ReconstructLDR_BC7(index, reconstructed, numRealChannels);
1563
1564
if (flags & cvtt::Flags::BC7_FastIndexing)
1565
BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError);
1566
else
1567
{
1568
MFloat error = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);
1569
1570
MUInt15 altIndexes[2];
1571
altIndexes[0] = ParallelMath::Max(index, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
1572
altIndexes[1] = ParallelMath::Min(index + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << indexPrec) - 1)));
1573
1574
for (int ii = 0; ii < 2; ii++)
1575
{
1576
indexSelector.ReconstructLDR_BC7(altIndexes[ii], reconstructed, numRealChannels);
1577
1578
MFloat altError = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq);
1579
ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altError, error));
1580
error = ParallelMath::Min(error, altError);
1581
ParallelMath::ConditionalSet(index, better, altIndexes[ii]);
1582
}
1583
1584
shapeError = shapeError + error;
1585
}
1586
1587
if (refine != numRefineRounds - 1)
1588
epRefiner.ContributeUnweightedPW(preWeightedPixels[px], index, numRealChannels);
1589
1590
indexes[pxi] = index;
1591
}
1592
1593
if (flags & cvtt::Flags::BC7_FastIndexing)
1594
shapeError = aggError.Finalize(flags, channelWeightsSq);
1595
1596
if (isRGB)
1597
shapeError = shapeError + staticAlphaError;
1598
1599
ParallelMath::FloatCompFlag shapeErrorBetter;
1600
ParallelMath::Int16CompFlag shapeErrorBetter16;
1601
1602
shapeErrorBetter = ParallelMath::Less(shapeError, temps.shapeBestError[shape]);
1603
shapeErrorBetter16 = ParallelMath::FloatFlagToInt16(shapeErrorBetter);
1604
1605
if (ParallelMath::AnySet(shapeErrorBetter16))
1606
{
1607
bool punchThroughOK = true;
1608
if (needPunchThroughCheck)
1609
{
1610
shapeErrorBetter16 = ParallelMath::AndNot(punchThroughInvalid[pIter], shapeErrorBetter16);
1611
shapeErrorBetter = ParallelMath::Int16FlagToFloat(shapeErrorBetter16);
1612
1613
if (!ParallelMath::AnySet(shapeErrorBetter16))
1614
punchThroughOK = false;
1615
}
1616
1617
if (punchThroughOK)
1618
{
1619
ParallelMath::ConditionalSet(temps.shapeBestError[shape], shapeErrorBetter, shapeError);
1620
for (int epi = 0; epi < 2; epi++)
1621
for (int ch = 0; ch < numRealChannels; ch++)
1622
ParallelMath::ConditionalSet(temps.shapeBestEP[shape][epi][ch], shapeErrorBetter16, ep[epi][ch]);
1623
1624
for (int pxi = 0; pxi < shapeLength; pxi++)
1625
ParallelMath::ConditionalSet(temps.fragmentBestIndexes[shapeStart + pxi], shapeErrorBetter16, indexes[pxi]);
1626
}
1627
}
1628
1629
if (refine != numRefineRounds - 1)
1630
epRefiner.GetRefinedEndpointsLDR(ep, numRealChannels, rtn);
1631
} // refine
1632
} // tweak
1633
} // p
1634
1635
if (flags & cvtt::Flags::BC7_TrySingleColor)
1636
{
1637
MUInt15 total[4];
1638
for (int ch = 0; ch < 4; ch++)
1639
total[ch] = ParallelMath::MakeUInt15(0);
1640
1641
for (int pxi = 0; pxi < shapeLength; pxi++)
1642
{
1643
int px = BC7Data::g_fragments[shapeStart + pxi];
1644
for (int ch = 0; ch < 4; ch++)
1645
total[ch] = total[ch] + pixels[pxi][ch];
1646
}
1647
1648
MFloat rcpShapeLength = ParallelMath::MakeFloat(1.0f / static_cast<float>(shapeLength));
1649
MFloat average[4];
1650
for (int ch = 0; ch < 4; ch++)
1651
average[ch] = ParallelMath::ToFloat(total[ch]) * rcpShapeLength;
1652
1653
const uint8_t *fragment = BC7Data::g_fragments + shapeStart;
1654
MFloat &shapeBestError = temps.shapeBestError[shape];
1655
MUInt15 (&shapeBestEP)[2][4] = temps.shapeBestEP[shape];
1656
MUInt15 *fragmentBestIndexes = temps.fragmentBestIndexes + shapeStart;
1657
1658
const cvtt::Tables::BC7SC::Table **scTables = NULL;
1659
int numSCTables = 0;
1660
1661
const cvtt::Tables::BC7SC::Table *tables0[] =
1662
{
1663
&cvtt::Tables::BC7SC::g_mode0_p00_i1,
1664
&cvtt::Tables::BC7SC::g_mode0_p00_i2,
1665
&cvtt::Tables::BC7SC::g_mode0_p00_i3,
1666
&cvtt::Tables::BC7SC::g_mode0_p01_i1,
1667
&cvtt::Tables::BC7SC::g_mode0_p01_i2,
1668
&cvtt::Tables::BC7SC::g_mode0_p01_i3,
1669
&cvtt::Tables::BC7SC::g_mode0_p10_i1,
1670
&cvtt::Tables::BC7SC::g_mode0_p10_i2,
1671
&cvtt::Tables::BC7SC::g_mode0_p10_i3,
1672
&cvtt::Tables::BC7SC::g_mode0_p11_i1,
1673
&cvtt::Tables::BC7SC::g_mode0_p11_i2,
1674
&cvtt::Tables::BC7SC::g_mode0_p11_i3,
1675
};
1676
1677
const cvtt::Tables::BC7SC::Table *tables1[] =
1678
{
1679
&cvtt::Tables::BC7SC::g_mode1_p0_i1,
1680
&cvtt::Tables::BC7SC::g_mode1_p0_i2,
1681
&cvtt::Tables::BC7SC::g_mode1_p0_i3,
1682
&cvtt::Tables::BC7SC::g_mode1_p1_i1,
1683
&cvtt::Tables::BC7SC::g_mode1_p1_i2,
1684
&cvtt::Tables::BC7SC::g_mode1_p1_i3,
1685
};
1686
1687
const cvtt::Tables::BC7SC::Table *tables2[] =
1688
{
1689
&cvtt::Tables::BC7SC::g_mode2,
1690
};
1691
1692
const cvtt::Tables::BC7SC::Table *tables3[] =
1693
{
1694
&cvtt::Tables::BC7SC::g_mode3_p0,
1695
&cvtt::Tables::BC7SC::g_mode3_p1,
1696
};
1697
1698
const cvtt::Tables::BC7SC::Table *tables6[] =
1699
{
1700
&cvtt::Tables::BC7SC::g_mode6_p0_i1,
1701
&cvtt::Tables::BC7SC::g_mode6_p0_i2,
1702
&cvtt::Tables::BC7SC::g_mode6_p0_i3,
1703
&cvtt::Tables::BC7SC::g_mode6_p0_i4,
1704
&cvtt::Tables::BC7SC::g_mode6_p0_i5,
1705
&cvtt::Tables::BC7SC::g_mode6_p0_i6,
1706
&cvtt::Tables::BC7SC::g_mode6_p0_i7,
1707
&cvtt::Tables::BC7SC::g_mode6_p1_i1,
1708
&cvtt::Tables::BC7SC::g_mode6_p1_i2,
1709
&cvtt::Tables::BC7SC::g_mode6_p1_i3,
1710
&cvtt::Tables::BC7SC::g_mode6_p1_i4,
1711
&cvtt::Tables::BC7SC::g_mode6_p1_i5,
1712
&cvtt::Tables::BC7SC::g_mode6_p1_i6,
1713
&cvtt::Tables::BC7SC::g_mode6_p1_i7,
1714
};
1715
1716
const cvtt::Tables::BC7SC::Table *tables7[] =
1717
{
1718
&cvtt::Tables::BC7SC::g_mode7_p00,
1719
&cvtt::Tables::BC7SC::g_mode7_p01,
1720
&cvtt::Tables::BC7SC::g_mode7_p10,
1721
&cvtt::Tables::BC7SC::g_mode7_p11,
1722
};
1723
1724
switch (mode)
1725
{
1726
case 0:
1727
{
1728
scTables = tables0;
1729
numSCTables = sizeof(tables0) / sizeof(tables0[0]);
1730
}
1731
break;
1732
case 1:
1733
{
1734
scTables = tables1;
1735
numSCTables = sizeof(tables1) / sizeof(tables1[0]);
1736
}
1737
break;
1738
case 2:
1739
{
1740
1741
scTables = tables2;
1742
numSCTables = sizeof(tables2) / sizeof(tables2[0]);
1743
}
1744
break;
1745
case 3:
1746
{
1747
scTables = tables3;
1748
numSCTables = sizeof(tables3) / sizeof(tables3[0]);
1749
}
1750
break;
1751
case 6:
1752
{
1753
scTables = tables6;
1754
numSCTables = sizeof(tables6) / sizeof(tables6[0]);
1755
}
1756
break;
1757
case 7:
1758
{
1759
scTables = tables7;
1760
numSCTables = sizeof(tables7) / sizeof(tables7[0]);
1761
}
1762
break;
1763
default:
1764
assert(false);
1765
break;
1766
}
1767
1768
TrySingleColorRGBAMultiTable(flags, pixels, average, numRealChannels, fragment, shapeLength, staticAlphaError, punchThroughInvalid, shapeBestError, shapeBestEP, fragmentBestIndexes, channelWeightsSq, scTables, numSCTables, rtn);
1769
}
1770
} // shapeIter
1771
1772
uint64_t partitionsEnabledBits = 0xffffffffffffffffULL;
1773
1774
switch (mode)
1775
{
1776
case 0:
1777
partitionsEnabledBits = encodingPlan.mode0PartitionEnabled;
1778
break;
1779
case 1:
1780
partitionsEnabledBits = encodingPlan.mode1PartitionEnabled;
1781
break;
1782
case 2:
1783
partitionsEnabledBits = encodingPlan.mode2PartitionEnabled;
1784
break;
1785
case 3:
1786
partitionsEnabledBits = encodingPlan.mode3PartitionEnabled;
1787
break;
1788
case 6:
1789
partitionsEnabledBits = encodingPlan.mode6Enabled ? 1 : 0;
1790
break;
1791
case 7:
1792
if (anyBlockHasAlpha)
1793
partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled;
1794
else
1795
partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled;
1796
break;
1797
default:
1798
break;
1799
};
1800
1801
for (uint16_t partition = 0; partition < numPartitions; partition++)
1802
{
1803
if (((partitionsEnabledBits >> partition) & 1) == 0)
1804
continue;
1805
1806
const int *partitionShapes;
1807
if (numSubsets == 1)
1808
partitionShapes = BC7Data::g_shapes1[partition];
1809
else if (numSubsets == 2)
1810
partitionShapes = BC7Data::g_shapes2[partition];
1811
else
1812
{
1813
assert(numSubsets == 3);
1814
partitionShapes = BC7Data::g_shapes3[partition];
1815
}
1816
1817
MFloat totalError = ParallelMath::MakeFloatZero();
1818
for (int subset = 0; subset < numSubsets; subset++)
1819
totalError = totalError + temps.shapeBestError[partitionShapes[subset]];
1820
1821
ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(totalError, work.m_error);
1822
ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
1823
1824
if (mode == 7 && anyBlockHasAlpha)
1825
{
1826
// Some lanes could be better, but we filter them out to ensure consistency with scalar
1827
bool isRGBAllowedForThisPartition = (((encodingPlan.mode7RGBPartitionEnabled >> partition) & 1) != 0);
1828
1829
if (!isRGBAllowedForThisPartition)
1830
{
1831
errorBetter16 = (errorBetter16 & blockHasNonMaxAlpha);
1832
errorBetter = ParallelMath::Int16FlagToFloat(errorBetter16);
1833
}
1834
}
1835
1836
if (ParallelMath::AnySet(errorBetter16))
1837
{
1838
for (int subset = 0; subset < numSubsets; subset++)
1839
{
1840
int shape = partitionShapes[subset];
1841
int shapeStart = BC7Data::g_shapeRanges[shape][0];
1842
int shapeLength = BC7Data::g_shapeRanges[shape][1];
1843
1844
for (int epi = 0; epi < 2; epi++)
1845
for (int ch = 0; ch < 4; ch++)
1846
ParallelMath::ConditionalSet(work.m_ep[subset][epi][ch], errorBetter16, temps.shapeBestEP[shape][epi][ch]);
1847
1848
for (int pxi = 0; pxi < shapeLength; pxi++)
1849
{
1850
int px = BC7Data::g_fragments[shapeStart + pxi];
1851
ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, temps.fragmentBestIndexes[shapeStart + pxi]);
1852
}
1853
}
1854
1855
ParallelMath::ConditionalSet(work.m_error, errorBetter, totalError);
1856
ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));
1857
ParallelMath::ConditionalSet(work.m_u.m_partition, errorBetter16, ParallelMath::MakeUInt15(partition));
1858
}
1859
}
1860
}
1861
}
1862
1863
void cvtt::Internal::BC7Computer::TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn)
1864
{
1865
// TODO: These error calculations are not optimal for weight-by-alpha, but this routine needs to be mostly rewritten for that.
1866
// The alpha/color solutions are co-dependent in that case, but a good way to solve it would probably be to
1867
// solve the alpha channel first, then solve the RGB channels, which in turn breaks down into two cases:
1868
// - Separate alpha channel, then weighted RGB
1869
// - Alpha+2 other channels, then the independent channel
1870
if (numRefineRounds < 1)
1871
numRefineRounds = 1;
1872
1873
float channelWeightsSq[4];
1874
for (int ch = 0; ch < 4; ch++)
1875
channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
1876
1877
for (uint16_t mode = 4; mode <= 5; mode++)
1878
{
1879
int numSP[2] = { 0, 0 };
1880
1881
for (uint16_t rotation = 0; rotation < 4; rotation++)
1882
{
1883
if (mode == 4)
1884
{
1885
numSP[0] = encodingPlan.mode4SP[rotation][0];
1886
numSP[1] = encodingPlan.mode4SP[rotation][1];
1887
}
1888
else
1889
numSP[0] = numSP[1] = encodingPlan.mode5SP[rotation];
1890
1891
if (numSP[0] == 0 && numSP[1] == 0)
1892
continue;
1893
1894
int alphaChannel = (rotation + 3) & 3;
1895
int redChannel = (rotation == 1) ? 3 : 0;
1896
int greenChannel = (rotation == 2) ? 3 : 1;
1897
int blueChannel = (rotation == 3) ? 3 : 2;
1898
1899
MUInt15 rotatedRGB[16][3];
1900
MFloat floatRotatedRGB[16][3];
1901
1902
for (int px = 0; px < 16; px++)
1903
{
1904
rotatedRGB[px][0] = pixels[px][redChannel];
1905
rotatedRGB[px][1] = pixels[px][greenChannel];
1906
rotatedRGB[px][2] = pixels[px][blueChannel];
1907
1908
for (int ch = 0; ch < 3; ch++)
1909
floatRotatedRGB[px][ch] = ParallelMath::ToFloat(rotatedRGB[px][ch]);
1910
}
1911
1912
uint16_t maxIndexSelector = (mode == 4) ? 2 : 1;
1913
1914
float rotatedRGBWeights[3] = { channelWeights[redChannel], channelWeights[greenChannel], channelWeights[blueChannel] };
1915
float rotatedRGBWeightsSq[3] = { channelWeightsSq[redChannel], channelWeightsSq[greenChannel], channelWeightsSq[blueChannel] };
1916
float rotatedAlphaWeight[1] = { channelWeights[alphaChannel] };
1917
float rotatedAlphaWeightSq[1] = { channelWeightsSq[alphaChannel] };
1918
1919
float uniformWeight[1] = { 1.0f }; // Since the alpha channel is independent, there's no need to bother with weights when doing refinement or selection, only error
1920
1921
MFloat preWeightedRotatedRGB[16][3];
1922
BCCommon::PreWeightPixelsLDR<3>(preWeightedRotatedRGB, rotatedRGB, rotatedRGBWeights);
1923
1924
for (uint16_t indexSelector = 0; indexSelector < maxIndexSelector; indexSelector++)
1925
{
1926
int numTweakRounds = numSP[indexSelector];
1927
1928
if (numTweakRounds <= 0)
1929
continue;
1930
1931
if (numTweakRounds > MaxTweakRounds)
1932
numTweakRounds = MaxTweakRounds;
1933
1934
EndpointSelector<3, 8> rgbSelector;
1935
1936
for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++)
1937
{
1938
for (int px = 0; px < 16; px++)
1939
rgbSelector.ContributePass(preWeightedRotatedRGB[px], epPass, ParallelMath::MakeFloat(1.0f));
1940
1941
rgbSelector.FinishPass(epPass);
1942
}
1943
1944
MUInt15 alphaRange[2];
1945
1946
alphaRange[0] = alphaRange[1] = pixels[0][alphaChannel];
1947
for (int px = 1; px < 16; px++)
1948
{
1949
alphaRange[0] = ParallelMath::Min(pixels[px][alphaChannel], alphaRange[0]);
1950
alphaRange[1] = ParallelMath::Max(pixels[px][alphaChannel], alphaRange[1]);
1951
}
1952
1953
int rgbPrec = 0;
1954
int alphaPrec = 0;
1955
1956
if (mode == 4)
1957
{
1958
rgbPrec = indexSelector ? 3 : 2;
1959
alphaPrec = indexSelector ? 2 : 3;
1960
}
1961
else
1962
rgbPrec = alphaPrec = 2;
1963
1964
UnfinishedEndpoints<3> unfinishedRGB = rgbSelector.GetEndpoints(rotatedRGBWeights);
1965
1966
MFloat bestRGBError = ParallelMath::MakeFloat(FLT_MAX);
1967
MFloat bestAlphaError = ParallelMath::MakeFloat(FLT_MAX);
1968
1969
MUInt15 bestRGBIndexes[16];
1970
MUInt15 bestAlphaIndexes[16];
1971
MUInt15 bestEP[2][4];
1972
1973
for (int px = 0; px < 16; px++)
1974
bestRGBIndexes[px] = bestAlphaIndexes[px] = ParallelMath::MakeUInt15(0);
1975
1976
for (int tweak = 0; tweak < numTweakRounds; tweak++)
1977
{
1978
MUInt15 rgbEP[2][3];
1979
MUInt15 alphaEP[2];
1980
1981
unfinishedRGB.FinishLDR(tweak, 1 << rgbPrec, rgbEP[0], rgbEP[1]);
1982
1983
TweakAlpha(alphaRange, tweak, 1 << alphaPrec, alphaEP);
1984
1985
for (int refine = 0; refine < numRefineRounds; refine++)
1986
{
1987
if (mode == 4)
1988
CompressEndpoints4(rgbEP, alphaEP);
1989
else
1990
CompressEndpoints5(rgbEP, alphaEP);
1991
1992
1993
IndexSelector<1> alphaIndexSelector;
1994
IndexSelector<3> rgbIndexSelector;
1995
1996
{
1997
MUInt15 alphaEPTemp[2][1] = { { alphaEP[0] },{ alphaEP[1] } };
1998
alphaIndexSelector.Init<false>(uniformWeight, alphaEPTemp, 1 << alphaPrec);
1999
}
2000
rgbIndexSelector.Init<false>(rotatedRGBWeights, rgbEP, 1 << rgbPrec);
2001
2002
EndpointRefiner<3> rgbRefiner;
2003
EndpointRefiner<1> alphaRefiner;
2004
2005
rgbRefiner.Init(1 << rgbPrec, rotatedRGBWeights);
2006
alphaRefiner.Init(1 << alphaPrec, uniformWeight);
2007
2008
MFloat errorRGB = ParallelMath::MakeFloatZero();
2009
MFloat errorA = ParallelMath::MakeFloatZero();
2010
2011
MUInt15 rgbIndexes[16];
2012
MUInt15 alphaIndexes[16];
2013
2014
AggregatedError<3> rgbAggError;
2015
AggregatedError<1> alphaAggError;
2016
2017
for (int px = 0; px < 16; px++)
2018
{
2019
MUInt15 rgbIndex = rgbIndexSelector.SelectIndexLDR(floatRotatedRGB[px], rtn);
2020
MUInt15 alphaIndex = alphaIndexSelector.SelectIndexLDR(floatPixels[px] + alphaChannel, rtn);
2021
2022
MUInt15 reconstructedRGB[3];
2023
MUInt15 reconstructedAlpha[1];
2024
2025
rgbIndexSelector.ReconstructLDR_BC7(rgbIndex, reconstructedRGB);
2026
alphaIndexSelector.ReconstructLDR_BC7(alphaIndex, reconstructedAlpha);
2027
2028
if (flags & cvtt::Flags::BC7_FastIndexing)
2029
{
2030
BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], rgbAggError);
2031
BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, alphaAggError);
2032
}
2033
else
2034
{
2035
AggregatedError<3> baseRGBAggError;
2036
AggregatedError<1> baseAlphaAggError;
2037
2038
BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], baseRGBAggError);
2039
BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, baseAlphaAggError);
2040
2041
MFloat rgbError = baseRGBAggError.Finalize(flags, rotatedRGBWeightsSq);
2042
MFloat alphaError = baseAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);
2043
2044
MUInt15 altRGBIndexes[2];
2045
MUInt15 altAlphaIndexes[2];
2046
2047
altRGBIndexes[0] = ParallelMath::Max(rgbIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
2048
altRGBIndexes[1] = ParallelMath::Min(rgbIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << rgbPrec) - 1)));
2049
2050
altAlphaIndexes[0] = ParallelMath::Max(alphaIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1);
2051
altAlphaIndexes[1] = ParallelMath::Min(alphaIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << alphaPrec) - 1)));
2052
2053
for (int ii = 0; ii < 2; ii++)
2054
{
2055
rgbIndexSelector.ReconstructLDR_BC7(altRGBIndexes[ii], reconstructedRGB);
2056
alphaIndexSelector.ReconstructLDR_BC7(altAlphaIndexes[ii], reconstructedAlpha);
2057
2058
AggregatedError<3> altRGBAggError;
2059
AggregatedError<1> altAlphaAggError;
2060
2061
BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], altRGBAggError);
2062
BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, altAlphaAggError);
2063
2064
MFloat altRGBError = altRGBAggError.Finalize(flags, rotatedRGBWeightsSq);
2065
MFloat altAlphaError = altAlphaAggError.Finalize(flags, rotatedAlphaWeightSq);
2066
2067
ParallelMath::Int16CompFlag rgbBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altRGBError, rgbError));
2068
ParallelMath::Int16CompFlag alphaBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altAlphaError, alphaError));
2069
2070
rgbError = ParallelMath::Min(altRGBError, rgbError);
2071
alphaError = ParallelMath::Min(altAlphaError, alphaError);
2072
2073
ParallelMath::ConditionalSet(rgbIndex, rgbBetter, altRGBIndexes[ii]);
2074
ParallelMath::ConditionalSet(alphaIndex, alphaBetter, altAlphaIndexes[ii]);
2075
}
2076
2077
errorRGB = errorRGB + rgbError;
2078
errorA = errorA + alphaError;
2079
}
2080
2081
if (refine != numRefineRounds - 1)
2082
{
2083
rgbRefiner.ContributeUnweightedPW(preWeightedRotatedRGB[px], rgbIndex);
2084
alphaRefiner.ContributeUnweightedPW(floatPixels[px] + alphaChannel, alphaIndex);
2085
}
2086
2087
if (flags & Flags::BC7_FastIndexing)
2088
{
2089
errorRGB = rgbAggError.Finalize(flags, rotatedRGBWeightsSq);
2090
errorA = alphaAggError.Finalize(flags, rotatedAlphaWeightSq);
2091
}
2092
2093
rgbIndexes[px] = rgbIndex;
2094
alphaIndexes[px] = alphaIndex;
2095
}
2096
2097
ParallelMath::FloatCompFlag rgbBetter = ParallelMath::Less(errorRGB, bestRGBError);
2098
ParallelMath::FloatCompFlag alphaBetter = ParallelMath::Less(errorA, bestAlphaError);
2099
2100
ParallelMath::Int16CompFlag rgbBetterInt16 = ParallelMath::FloatFlagToInt16(rgbBetter);
2101
ParallelMath::Int16CompFlag alphaBetterInt16 = ParallelMath::FloatFlagToInt16(alphaBetter);
2102
2103
if (ParallelMath::AnySet(rgbBetterInt16))
2104
{
2105
bestRGBError = ParallelMath::Min(errorRGB, bestRGBError);
2106
2107
for (int px = 0; px < 16; px++)
2108
ParallelMath::ConditionalSet(bestRGBIndexes[px], rgbBetterInt16, rgbIndexes[px]);
2109
2110
for (int ep = 0; ep < 2; ep++)
2111
{
2112
for (int ch = 0; ch < 3; ch++)
2113
ParallelMath::ConditionalSet(bestEP[ep][ch], rgbBetterInt16, rgbEP[ep][ch]);
2114
}
2115
}
2116
2117
if (ParallelMath::AnySet(alphaBetterInt16))
2118
{
2119
bestAlphaError = ParallelMath::Min(errorA, bestAlphaError);
2120
2121
for (int px = 0; px < 16; px++)
2122
ParallelMath::ConditionalSet(bestAlphaIndexes[px], alphaBetterInt16, alphaIndexes[px]);
2123
2124
for (int ep = 0; ep < 2; ep++)
2125
ParallelMath::ConditionalSet(bestEP[ep][3], alphaBetterInt16, alphaEP[ep]);
2126
}
2127
2128
if (refine != numRefineRounds - 1)
2129
{
2130
rgbRefiner.GetRefinedEndpointsLDR(rgbEP, rtn);
2131
2132
MUInt15 alphaEPTemp[2][1];
2133
alphaRefiner.GetRefinedEndpointsLDR(alphaEPTemp, rtn);
2134
2135
for (int i = 0; i < 2; i++)
2136
alphaEP[i] = alphaEPTemp[i][0];
2137
}
2138
} // refine
2139
} // tweak
2140
2141
MFloat combinedError = bestRGBError + bestAlphaError;
2142
2143
ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, work.m_error);
2144
ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter);
2145
2146
work.m_error = ParallelMath::Min(combinedError, work.m_error);
2147
2148
ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode));
2149
ParallelMath::ConditionalSet(work.m_u.m_isr.m_rotation, errorBetter16, ParallelMath::MakeUInt15(rotation));
2150
ParallelMath::ConditionalSet(work.m_u.m_isr.m_indexSelector, errorBetter16, ParallelMath::MakeUInt15(indexSelector));
2151
2152
for (int px = 0; px < 16; px++)
2153
{
2154
ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, indexSelector ? bestAlphaIndexes[px] : bestRGBIndexes[px]);
2155
ParallelMath::ConditionalSet(work.m_indexes2[px], errorBetter16, indexSelector ? bestRGBIndexes[px] : bestAlphaIndexes[px]);
2156
}
2157
2158
for (int ep = 0; ep < 2; ep++)
2159
for (int ch = 0; ch < 4; ch++)
2160
ParallelMath::ConditionalSet(work.m_ep[0][ep][ch], errorBetter16, bestEP[ep][ch]);
2161
}
2162
}
2163
}
2164
}
2165
2166
template<class T>
2167
void cvtt::Internal::BC7Computer::Swap(T& a, T& b)
2168
{
2169
T temp = a;
2170
a = b;
2171
b = temp;
2172
}
2173
2174
void cvtt::Internal::BC7Computer::Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds)
2175
{
2176
MUInt15 pixels[16][4];
2177
MFloat floatPixels[16][4];
2178
2179
for (int px = 0; px < 16; px++)
2180
{
2181
for (int ch = 0; ch < 4; ch++)
2182
ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]);
2183
}
2184
2185
for (int px = 0; px < 16; px++)
2186
{
2187
for (int ch = 0; ch < 4; ch++)
2188
floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]);
2189
}
2190
2191
BC67::WorkInfo work;
2192
memset(&work, 0, sizeof(work));
2193
2194
work.m_error = ParallelMath::MakeFloat(FLT_MAX);
2195
2196
{
2197
ParallelMath::RoundTowardNearestForScope rtn;
2198
TrySinglePlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn);
2199
TryDualPlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn);
2200
}
2201
2202
for (int block = 0; block < ParallelMath::ParallelSize; block++)
2203
{
2204
PackingVector pv;
2205
pv.Init();
2206
2207
ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(work.m_mode, block);
2208
ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(work.m_u.m_partition, block);
2209
ParallelMath::ScalarUInt16 indexSelector = ParallelMath::Extract(work.m_u.m_isr.m_indexSelector, block);
2210
2211
const BC7Data::BC7ModeInfo& modeInfo = BC7Data::g_modes[mode];
2212
2213
ParallelMath::ScalarUInt16 indexes[16];
2214
ParallelMath::ScalarUInt16 indexes2[16];
2215
ParallelMath::ScalarUInt16 endPoints[3][2][4];
2216
2217
for (int i = 0; i < 16; i++)
2218
{
2219
indexes[i] = ParallelMath::Extract(work.m_indexes[i], block);
2220
if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
2221
indexes2[i] = ParallelMath::Extract(work.m_indexes2[i], block);
2222
}
2223
2224
for (int subset = 0; subset < 3; subset++)
2225
{
2226
for (int ep = 0; ep < 2; ep++)
2227
{
2228
for (int ch = 0; ch < 4; ch++)
2229
endPoints[subset][ep][ch] = ParallelMath::Extract(work.m_ep[subset][ep][ch], block);
2230
}
2231
}
2232
2233
int fixups[3] = { 0, 0, 0 };
2234
2235
if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
2236
{
2237
bool flipRGB = ((indexes[0] & (1 << (modeInfo.m_indexBits - 1))) != 0);
2238
bool flipAlpha = ((indexes2[0] & (1 << (modeInfo.m_alphaIndexBits - 1))) != 0);
2239
2240
if (flipRGB)
2241
{
2242
uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;
2243
for (int px = 0; px < 16; px++)
2244
indexes[px] = highIndex - indexes[px];
2245
}
2246
2247
if (flipAlpha)
2248
{
2249
uint16_t highIndex = (1 << modeInfo.m_alphaIndexBits) - 1;
2250
for (int px = 0; px < 16; px++)
2251
indexes2[px] = highIndex - indexes2[px];
2252
}
2253
2254
if (indexSelector)
2255
Swap(flipRGB, flipAlpha);
2256
2257
if (flipRGB)
2258
{
2259
for (int ch = 0; ch < 3; ch++)
2260
Swap(endPoints[0][0][ch], endPoints[0][1][ch]);
2261
}
2262
if (flipAlpha)
2263
Swap(endPoints[0][0][3], endPoints[0][1][3]);
2264
2265
}
2266
else
2267
{
2268
if (modeInfo.m_numSubsets == 2)
2269
fixups[1] = BC7Data::g_fixupIndexes2[partition];
2270
else if (modeInfo.m_numSubsets == 3)
2271
{
2272
fixups[1] = BC7Data::g_fixupIndexes3[partition][0];
2273
fixups[2] = BC7Data::g_fixupIndexes3[partition][1];
2274
}
2275
2276
bool flip[3] = { false, false, false };
2277
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2278
flip[subset] = ((indexes[fixups[subset]] & (1 << (modeInfo.m_indexBits - 1))) != 0);
2279
2280
if (flip[0] || flip[1] || flip[2])
2281
{
2282
uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1;
2283
for (int px = 0; px < 16; px++)
2284
{
2285
int subset = 0;
2286
if (modeInfo.m_numSubsets == 2)
2287
subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
2288
else if (modeInfo.m_numSubsets == 3)
2289
subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;
2290
2291
if (flip[subset])
2292
indexes[px] = highIndex - indexes[px];
2293
}
2294
2295
int maxCH = (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) ? 4 : 3;
2296
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2297
{
2298
if (flip[subset])
2299
for (int ch = 0; ch < maxCH; ch++)
2300
Swap(endPoints[subset][0][ch], endPoints[subset][1][ch]);
2301
}
2302
}
2303
}
2304
2305
pv.Pack(static_cast<uint8_t>(1 << mode), mode + 1);
2306
2307
if (modeInfo.m_partitionBits)
2308
pv.Pack(partition, modeInfo.m_partitionBits);
2309
2310
if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
2311
{
2312
ParallelMath::ScalarUInt16 rotation = ParallelMath::Extract(work.m_u.m_isr.m_rotation, block);
2313
pv.Pack(rotation, 2);
2314
}
2315
2316
if (modeInfo.m_hasIndexSelector)
2317
pv.Pack(indexSelector, 1);
2318
2319
// Encode RGB
2320
for (int ch = 0; ch < 3; ch++)
2321
{
2322
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2323
{
2324
for (int ep = 0; ep < 2; ep++)
2325
{
2326
ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][ch];
2327
epPart >>= (8 - modeInfo.m_rgbBits);
2328
2329
pv.Pack(epPart, modeInfo.m_rgbBits);
2330
}
2331
}
2332
}
2333
2334
// Encode alpha
2335
if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
2336
{
2337
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2338
{
2339
for (int ep = 0; ep < 2; ep++)
2340
{
2341
ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][3];
2342
epPart >>= (8 - modeInfo.m_alphaBits);
2343
2344
pv.Pack(epPart, modeInfo.m_alphaBits);
2345
}
2346
}
2347
}
2348
2349
// Encode parity bits
2350
if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)
2351
{
2352
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2353
{
2354
ParallelMath::ScalarUInt16 epPart = endPoints[subset][0][0];
2355
epPart >>= (7 - modeInfo.m_rgbBits);
2356
epPart &= 1;
2357
2358
pv.Pack(epPart, 1);
2359
}
2360
}
2361
else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)
2362
{
2363
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2364
{
2365
for (int ep = 0; ep < 2; ep++)
2366
{
2367
ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][0];
2368
epPart >>= (7 - modeInfo.m_rgbBits);
2369
epPart &= 1;
2370
2371
pv.Pack(epPart, 1);
2372
}
2373
}
2374
}
2375
2376
// Encode indexes
2377
for (int px = 0; px < 16; px++)
2378
{
2379
int bits = modeInfo.m_indexBits;
2380
if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))
2381
bits--;
2382
2383
pv.Pack(indexes[px], bits);
2384
}
2385
2386
// Encode secondary indexes
2387
if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
2388
{
2389
for (int px = 0; px < 16; px++)
2390
{
2391
int bits = modeInfo.m_alphaIndexBits;
2392
if (px == 0)
2393
bits--;
2394
2395
pv.Pack(indexes2[px], bits);
2396
}
2397
}
2398
2399
pv.Flush(packedBlocks);
2400
2401
packedBlocks += 16;
2402
}
2403
}
2404
2405
void cvtt::Internal::BC7Computer::UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock)
2406
{
2407
UnpackingVector pv;
2408
pv.Init(packedBlock);
2409
2410
int mode = 8;
2411
for (int i = 0; i < 8; i++)
2412
{
2413
if (pv.Unpack(1) == 1)
2414
{
2415
mode = i;
2416
break;
2417
}
2418
}
2419
2420
if (mode > 7)
2421
{
2422
for (int px = 0; px < 16; px++)
2423
for (int ch = 0; ch < 4; ch++)
2424
output.m_pixels[px][ch] = 0;
2425
2426
return;
2427
}
2428
2429
const BC7Data::BC7ModeInfo &modeInfo = BC7Data::g_modes[mode];
2430
2431
int partition = 0;
2432
if (modeInfo.m_partitionBits)
2433
partition = pv.Unpack(modeInfo.m_partitionBits);
2434
2435
int rotation = 0;
2436
if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
2437
rotation = pv.Unpack(2);
2438
2439
int indexSelector = 0;
2440
if (modeInfo.m_hasIndexSelector)
2441
indexSelector = pv.Unpack(1);
2442
2443
// Resolve fixups
2444
int fixups[3] = { 0, 0, 0 };
2445
2446
if (modeInfo.m_alphaMode != BC7Data::AlphaMode_Separate)
2447
{
2448
if (modeInfo.m_numSubsets == 2)
2449
fixups[1] = BC7Data::g_fixupIndexes2[partition];
2450
else if (modeInfo.m_numSubsets == 3)
2451
{
2452
fixups[1] = BC7Data::g_fixupIndexes3[partition][0];
2453
fixups[2] = BC7Data::g_fixupIndexes3[partition][1];
2454
}
2455
}
2456
2457
int endPoints[3][2][4];
2458
2459
// Decode RGB
2460
for (int ch = 0; ch < 3; ch++)
2461
{
2462
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2463
{
2464
for (int ep = 0; ep < 2; ep++)
2465
endPoints[subset][ep][ch] = (pv.Unpack(modeInfo.m_rgbBits) << (8 - modeInfo.m_rgbBits));
2466
}
2467
}
2468
2469
// Decode alpha
2470
if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
2471
{
2472
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2473
{
2474
for (int ep = 0; ep < 2; ep++)
2475
endPoints[subset][ep][3] = (pv.Unpack(modeInfo.m_alphaBits) << (8 - modeInfo.m_alphaBits));
2476
}
2477
}
2478
else
2479
{
2480
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2481
{
2482
for (int ep = 0; ep < 2; ep++)
2483
endPoints[subset][ep][3] = 255;
2484
}
2485
}
2486
2487
int parityBits = 0;
2488
2489
// Decode parity bits
2490
if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset)
2491
{
2492
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2493
{
2494
int p = pv.Unpack(1);
2495
2496
for (int ep = 0; ep < 2; ep++)
2497
{
2498
for (int ch = 0; ch < 3; ch++)
2499
endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);
2500
2501
if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
2502
endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);
2503
}
2504
}
2505
2506
parityBits = 1;
2507
}
2508
else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint)
2509
{
2510
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2511
{
2512
for (int ep = 0; ep < 2; ep++)
2513
{
2514
int p = pv.Unpack(1);
2515
2516
for (int ch = 0; ch < 3; ch++)
2517
endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits);
2518
2519
if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
2520
endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits);
2521
}
2522
}
2523
2524
parityBits = 1;
2525
}
2526
2527
// Fill endpoint bits
2528
for (int subset = 0; subset < modeInfo.m_numSubsets; subset++)
2529
{
2530
for (int ep = 0; ep < 2; ep++)
2531
{
2532
for (int ch = 0; ch < 3; ch++)
2533
endPoints[subset][ep][ch] |= (endPoints[subset][ep][ch] >> (modeInfo.m_rgbBits + parityBits));
2534
2535
if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
2536
endPoints[subset][ep][3] |= (endPoints[subset][ep][3] >> (modeInfo.m_alphaBits + parityBits));
2537
}
2538
}
2539
2540
int indexes[16];
2541
int indexes2[16];
2542
2543
// Decode indexes
2544
for (int px = 0; px < 16; px++)
2545
{
2546
int bits = modeInfo.m_indexBits;
2547
if ((px == 0) || (px == fixups[1]) || (px == fixups[2]))
2548
bits--;
2549
2550
indexes[px] = pv.Unpack(bits);
2551
}
2552
2553
// Decode secondary indexes
2554
if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
2555
{
2556
for (int px = 0; px < 16; px++)
2557
{
2558
int bits = modeInfo.m_alphaIndexBits;
2559
if (px == 0)
2560
bits--;
2561
2562
indexes2[px] = pv.Unpack(bits);
2563
}
2564
}
2565
else
2566
{
2567
for (int px = 0; px < 16; px++)
2568
indexes2[px] = 0;
2569
}
2570
2571
const int *alphaWeights = BC7Data::g_weightTables[modeInfo.m_alphaIndexBits];
2572
const int *rgbWeights = BC7Data::g_weightTables[modeInfo.m_indexBits];
2573
2574
// Decode each pixel
2575
for (int px = 0; px < 16; px++)
2576
{
2577
int rgbWeight = 0;
2578
int alphaWeight = 0;
2579
2580
int rgbIndex = indexes[px];
2581
2582
rgbWeight = rgbWeights[indexes[px]];
2583
2584
if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined)
2585
alphaWeight = rgbWeight;
2586
else if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate)
2587
alphaWeight = alphaWeights[indexes2[px]];
2588
2589
if (indexSelector == 1)
2590
{
2591
int temp = rgbWeight;
2592
rgbWeight = alphaWeight;
2593
alphaWeight = temp;
2594
}
2595
2596
int pixel[4] = { 0, 0, 0, 255 };
2597
2598
int subset = 0;
2599
2600
if (modeInfo.m_numSubsets == 2)
2601
subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
2602
else if (modeInfo.m_numSubsets == 3)
2603
subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3;
2604
2605
for (int ch = 0; ch < 3; ch++)
2606
pixel[ch] = ((64 - rgbWeight) * endPoints[subset][0][ch] + rgbWeight * endPoints[subset][1][ch] + 32) >> 6;
2607
2608
if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None)
2609
pixel[3] = ((64 - alphaWeight) * endPoints[subset][0][3] + alphaWeight * endPoints[subset][1][3] + 32) >> 6;
2610
2611
if (rotation != 0)
2612
{
2613
int ch = rotation - 1;
2614
int temp = pixel[ch];
2615
pixel[ch] = pixel[3];
2616
pixel[3] = temp;
2617
}
2618
2619
for (int ch = 0; ch < 4; ch++)
2620
output.m_pixels[px][ch] = static_cast<uint8_t>(pixel[ch]);
2621
}
2622
}
2623
2624
cvtt::ParallelMath::SInt16 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru)
2625
{
2626
assert(ParallelMath::AllSet(ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(31744))));
2627
assert(ParallelMath::AllSet(ParallelMath::Less(ParallelMath::MakeSInt16(-31744), elem2CL)));
2628
2629
// Expand to full range
2630
ParallelMath::Int16CompFlag isNegative = ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(0));
2631
MUInt15 absElem = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - elem2CL, elem2CL));
2632
2633
absElem = ParallelMath::RightShift(ParallelMath::RoundAndConvertToU15(ParallelMath::ToFloat(absElem) * 32.0f / 31.0f, ru), 16 - precision);
2634
2635
MSInt16 absElemS16 = ParallelMath::LosslessCast<MSInt16>::Cast(absElem);
2636
2637
return ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - absElemS16, absElemS16);
2638
}
2639
2640
cvtt::ParallelMath::UInt15 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru)
2641
{
2642
MUInt16 expandedElem = ParallelMath::RoundAndConvertToU16(ParallelMath::Min(ParallelMath::ToFloat(elem) * 64.0f / 31.0f, ParallelMath::MakeFloat(65535.0f)), ru);
2643
return ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(expandedElem, 16 - precision));
2644
}
2645
2646
void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL)
2647
{
2648
MSInt16 zero = ParallelMath::MakeSInt16(0);
2649
2650
ParallelMath::Int16CompFlag negative = ParallelMath::Less(comp, zero);
2651
MUInt15 absComp = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(negative, MSInt16(zero - comp), comp));
2652
2653
MSInt16 unq;
2654
MUInt15 absUnq;
2655
2656
if (precision >= 16)
2657
{
2658
unq = comp;
2659
absUnq = absComp;
2660
}
2661
else
2662
{
2663
MSInt16 maxCompMinusOne = ParallelMath::MakeSInt16(static_cast<int16_t>((1 << (precision - 1)) - 2));
2664
ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);
2665
ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);
2666
2667
absUnq = (absComp << (16 - precision)) + ParallelMath::MakeUInt15(static_cast<uint16_t>(0x4000 >> (precision - 1)));
2668
ParallelMath::ConditionalSet(absUnq, isZero, ParallelMath::MakeUInt15(0));
2669
ParallelMath::ConditionalSet(absUnq, isMax, ParallelMath::MakeUInt15(0x7fff));
2670
2671
unq = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(absUnq));
2672
}
2673
2674
outUnquantized = unq;
2675
2676
MUInt15 funq = ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(absUnq, ParallelMath::MakeUInt15(31)), 5));
2677
2678
outUnquantizedFinished2CL = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(funq));
2679
}
2680
2681
void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished)
2682
{
2683
MUInt16 unq = ParallelMath::LosslessCast<MUInt16>::Cast(comp);
2684
if (precision < 15)
2685
{
2686
MUInt15 zero = ParallelMath::MakeUInt15(0);
2687
MUInt15 maxCompMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << precision) - 2));
2688
2689
ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero);
2690
ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp);
2691
2692
unq = (ParallelMath::LosslessCast<MUInt16>::Cast(comp) << (16 - precision)) + ParallelMath::MakeUInt16(static_cast<uint16_t>(0x8000 >> precision));
2693
2694
ParallelMath::ConditionalSet(unq, isZero, ParallelMath::MakeUInt16(0));
2695
ParallelMath::ConditionalSet(unq, isMax, ParallelMath::MakeUInt16(0xffff));
2696
}
2697
2698
outUnquantized = unq;
2699
outUnquantizedFinished = ParallelMath::ToUInt16(ParallelMath::RightShift(ParallelMath::XMultiply(unq, ParallelMath::MakeUInt15(31)), 6));
2700
}
2701
2702
void cvtt::Internal::BC6HComputer::QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)
2703
{
2704
MSInt16 unquantizedEP[2][3];
2705
MSInt16 finishedUnquantizedEP[2][3];
2706
2707
{
2708
ParallelMath::RoundUpForScope ru;
2709
2710
for (int epi = 0; epi < 2; epi++)
2711
{
2712
for (int ch = 0; ch < 3; ch++)
2713
{
2714
MSInt16 qee = QuantizeSingleEndpointElementSigned(endPoints[epi][ch], precision, &ru);
2715
UnquantizeSingleEndpointElementSigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);
2716
quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);
2717
}
2718
}
2719
}
2720
2721
indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);
2722
indexSelector.InitHDR(indexRange, true, fastIndexing, channelWeights);
2723
2724
MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);
2725
2726
MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);
2727
2728
ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);
2729
2730
if (ParallelMath::AnySet(invert))
2731
{
2732
ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));
2733
2734
indexSelector.ConditionalInvert(invert);
2735
2736
for (int ch = 0; ch < 3; ch++)
2737
{
2738
MAInt16 firstEP = quantizedEndPoints[0][ch];
2739
MAInt16 secondEP = quantizedEndPoints[1][ch];
2740
2741
quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);
2742
quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);
2743
}
2744
}
2745
2746
indexes[fixupIndex] = index;
2747
}
2748
2749
void cvtt::Internal::BC6HComputer::QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn)
2750
{
2751
MUInt16 unquantizedEP[2][3];
2752
MUInt16 finishedUnquantizedEP[2][3];
2753
2754
{
2755
ParallelMath::RoundUpForScope ru;
2756
2757
for (int epi = 0; epi < 2; epi++)
2758
{
2759
for (int ch = 0; ch < 3; ch++)
2760
{
2761
MUInt15 qee = QuantizeSingleEndpointElementUnsigned(ParallelMath::LosslessCast<MUInt15>::Cast(endPoints[epi][ch]), precision, &ru);
2762
UnquantizeSingleEndpointElementUnsigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]);
2763
quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee);
2764
}
2765
}
2766
}
2767
2768
indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange);
2769
indexSelector.InitHDR(indexRange, false, fastIndexing, channelWeights);
2770
2771
MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1);
2772
2773
MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn);
2774
2775
ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index);
2776
2777
if (ParallelMath::AnySet(invert))
2778
{
2779
ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index));
2780
2781
indexSelector.ConditionalInvert(invert);
2782
2783
for (int ch = 0; ch < 3; ch++)
2784
{
2785
MAInt16 firstEP = quantizedEndPoints[0][ch];
2786
MAInt16 secondEP = quantizedEndPoints[1][ch];
2787
2788
quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP);
2789
quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP);
2790
}
2791
}
2792
2793
indexes[fixupIndex] = index;
2794
}
2795
2796
void cvtt::Internal::BC6HComputer::EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal)
2797
{
2798
ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);
2799
2800
MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));
2801
2802
for (int ch = 0; ch < 3; ch++)
2803
{
2804
outEncodedEPs[0][0][ch] = ep0[0][ch];
2805
outEncodedEPs[0][1][ch] = ep0[1][ch];
2806
outEncodedEPs[1][0][ch] = ep1[0][ch];
2807
outEncodedEPs[1][1][ch] = ep1[1][ch];
2808
2809
if (isTransformed)
2810
{
2811
for (int subset = 0; subset < 2; subset++)
2812
{
2813
for (int epi = 0; epi < 2; epi++)
2814
{
2815
if (epi == 0 && subset == 0)
2816
continue;
2817
2818
MAInt16 bReduced = (outEncodedEPs[subset][epi][ch] & aSignificantMask);
2819
2820
MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch])), bPrec[ch]);
2821
2822
outEncodedEPs[subset][epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);
2823
2824
MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch]) & aSignificantMask);
2825
allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);
2826
}
2827
}
2828
}
2829
2830
if (!ParallelMath::AnySet(allLegal))
2831
break;
2832
}
2833
2834
outIsLegal = allLegal;
2835
}
2836
2837
void cvtt::Internal::BC6HComputer::EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal)
2838
{
2839
ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true);
2840
2841
MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1));
2842
2843
for (int ch = 0; ch < 3; ch++)
2844
{
2845
outEncodedEPs[0][ch] = ep[0][ch];
2846
outEncodedEPs[1][ch] = ep[1][ch];
2847
2848
if (isTransformed)
2849
{
2850
MAInt16 bReduced = (outEncodedEPs[1][ch] & aSignificantMask);
2851
2852
MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[1][ch], outEncodedEPs[0][ch])), bPrec[ch]);
2853
2854
outEncodedEPs[1][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta);
2855
2856
MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[1][ch], outEncodedEPs[0][ch]) & aSignificantMask);
2857
allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced);
2858
}
2859
}
2860
2861
outIsLegal = allLegal;
2862
}
2863
2864
void cvtt::Internal::BC6HComputer::Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds)
2865
{
2866
if (numTweakRounds < 1)
2867
numTweakRounds = 1;
2868
else if (numTweakRounds > MaxTweakRounds)
2869
numTweakRounds = MaxTweakRounds;
2870
2871
if (numRefineRounds < 1)
2872
numRefineRounds = 1;
2873
else if (numRefineRounds > MaxRefineRounds)
2874
numRefineRounds = MaxRefineRounds;
2875
2876
bool fastIndexing = ((flags & cvtt::Flags::BC6H_FastIndexing) != 0);
2877
float channelWeightsSq[3];
2878
2879
ParallelMath::RoundTowardNearestForScope rtn;
2880
2881
MSInt16 pixels[16][3];
2882
MFloat floatPixels2CL[16][3];
2883
MFloat floatPixelsLinearWeighted[16][3];
2884
2885
MSInt16 low15Bits = ParallelMath::MakeSInt16(32767);
2886
2887
for (int ch = 0; ch < 3; ch++)
2888
channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch];
2889
2890
for (int px = 0; px < 16; px++)
2891
{
2892
for (int ch = 0; ch < 3; ch++)
2893
{
2894
MSInt16 pixelValue;
2895
ParallelMath::ConvertHDRInputs(inputs, px, ch, pixelValue);
2896
2897
// Convert from sign+magnitude to 2CL
2898
if (isSigned)
2899
{
2900
ParallelMath::Int16CompFlag negative = ParallelMath::Less(pixelValue, ParallelMath::MakeSInt16(0));
2901
MSInt16 magnitude = (pixelValue & low15Bits);
2902
ParallelMath::ConditionalSet(pixelValue, negative, ParallelMath::MakeSInt16(0) - magnitude);
2903
pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(-31743));
2904
}
2905
else
2906
pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(0));
2907
2908
pixelValue = ParallelMath::Min(pixelValue, ParallelMath::MakeSInt16(31743));
2909
2910
pixels[px][ch] = pixelValue;
2911
floatPixels2CL[px][ch] = ParallelMath::ToFloat(pixelValue);
2912
floatPixelsLinearWeighted[px][ch] = ParallelMath::TwosCLHalfToFloat(pixelValue) * channelWeights[ch];
2913
}
2914
}
2915
2916
MFloat preWeightedPixels[16][3];
2917
2918
BCCommon::PreWeightPixelsHDR<3>(preWeightedPixels, pixels, channelWeights);
2919
2920
MAInt16 bestEndPoints[2][2][3];
2921
MUInt15 bestIndexes[16];
2922
MFloat bestError = ParallelMath::MakeFloat(FLT_MAX);
2923
MUInt15 bestMode = ParallelMath::MakeUInt15(0);
2924
MUInt15 bestPartition = ParallelMath::MakeUInt15(0);
2925
2926
for (int px = 0; px < 16; px++)
2927
bestIndexes[px] = ParallelMath::MakeUInt15(0);
2928
2929
for (int subset = 0; subset < 2; subset++)
2930
for (int epi = 0; epi < 2; epi++)
2931
for (int ch = 0; ch < 3; ch++)
2932
bestEndPoints[subset][epi][ch] = ParallelMath::MakeAInt16(0);
2933
2934
UnfinishedEndpoints<3> partitionedUFEP[32][2];
2935
UnfinishedEndpoints<3> singleUFEP;
2936
2937
// Generate UFEP for partitions
2938
for (int p = 0; p < 32; p++)
2939
{
2940
int partitionMask = BC7Data::g_partitionMap[p];
2941
2942
EndpointSelector<3, 8> epSelectors[2];
2943
2944
for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
2945
{
2946
for (int px = 0; px < 16; px++)
2947
{
2948
int subset = (partitionMask >> px) & 1;
2949
epSelectors[subset].ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));
2950
}
2951
2952
for (int subset = 0; subset < 2; subset++)
2953
epSelectors[subset].FinishPass(pass);
2954
}
2955
2956
for (int subset = 0; subset < 2; subset++)
2957
partitionedUFEP[p][subset] = epSelectors[subset].GetEndpoints(channelWeights);
2958
}
2959
2960
// Generate UFEP for single
2961
{
2962
EndpointSelector<3, 8> epSelector;
2963
2964
for (int pass = 0; pass < NumEndpointSelectorPasses; pass++)
2965
{
2966
for (int px = 0; px < 16; px++)
2967
epSelector.ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f));
2968
2969
epSelector.FinishPass(pass);
2970
}
2971
2972
singleUFEP = epSelector.GetEndpoints(channelWeights);
2973
}
2974
2975
for (int partitionedInt = 0; partitionedInt < 2; partitionedInt++)
2976
{
2977
bool partitioned = (partitionedInt == 1);
2978
2979
for (int aPrec = BC7Data::g_maxHDRPrecision; aPrec >= 0; aPrec--)
2980
{
2981
if (!BC7Data::g_hdrModesExistForPrecision[partitionedInt][aPrec])
2982
continue;
2983
2984
int numPartitions = partitioned ? 32 : 1;
2985
int numSubsets = partitioned ? 2 : 1;
2986
int indexBits = partitioned ? 3 : 4;
2987
int indexRange = (1 << indexBits);
2988
2989
for (int p = 0; p < numPartitions; p++)
2990
{
2991
int partitionMask = partitioned ? BC7Data::g_partitionMap[p] : 0;
2992
2993
const int MaxMetaRounds = MaxTweakRounds * MaxRefineRounds;
2994
2995
MAInt16 metaEndPointsQuantized[MaxMetaRounds][2][2][3];
2996
MUInt15 metaIndexes[MaxMetaRounds][16];
2997
MFloat metaError[MaxMetaRounds][2];
2998
2999
bool roundValid[MaxMetaRounds][2];
3000
3001
for (int r = 0; r < MaxMetaRounds; r++)
3002
for (int subset = 0; subset < 2; subset++)
3003
roundValid[r][subset] = true;
3004
3005
for (int subset = 0; subset < numSubsets; subset++)
3006
{
3007
for (int tweak = 0; tweak < MaxTweakRounds; tweak++)
3008
{
3009
EndpointRefiner<3> refiners[2];
3010
3011
bool abortRemainingRefines = false;
3012
for (int refinePass = 0; refinePass < MaxRefineRounds; refinePass++)
3013
{
3014
int metaRound = tweak * MaxRefineRounds + refinePass;
3015
3016
if (tweak >= numTweakRounds || refinePass >= numRefineRounds)
3017
abortRemainingRefines = true;
3018
3019
if (abortRemainingRefines)
3020
{
3021
roundValid[metaRound][subset] = false;
3022
continue;
3023
}
3024
3025
MAInt16(&mrQuantizedEndPoints)[2][2][3] = metaEndPointsQuantized[metaRound];
3026
MUInt15(&mrIndexes)[16] = metaIndexes[metaRound];
3027
3028
MSInt16 endPointsColorSpace[2][3];
3029
3030
if (refinePass == 0)
3031
{
3032
UnfinishedEndpoints<3> ufep = partitioned ? partitionedUFEP[p][subset] : singleUFEP;
3033
3034
if (isSigned)
3035
ufep.FinishHDRSigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);
3036
else
3037
ufep.FinishHDRUnsigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn);
3038
}
3039
else
3040
refiners[subset].GetRefinedEndpointsHDR(endPointsColorSpace, isSigned, &rtn);
3041
3042
refiners[subset].Init(indexRange, channelWeights);
3043
3044
int fixupIndex = (subset == 0) ? 0 : BC7Data::g_fixupIndexes2[p];
3045
3046
IndexSelectorHDR<3> indexSelector;
3047
if (isSigned)
3048
QuantizeEndpointsSigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);
3049
else
3050
QuantizeEndpointsUnsigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn);
3051
3052
if (metaRound > 0)
3053
{
3054
ParallelMath::Int16CompFlag anySame = ParallelMath::MakeBoolInt16(false);
3055
3056
for (int prevRound = 0; prevRound < metaRound; prevRound++)
3057
{
3058
MAInt16(&prevRoundEPs)[2][3] = metaEndPointsQuantized[prevRound][subset];
3059
3060
ParallelMath::Int16CompFlag same = ParallelMath::MakeBoolInt16(true);
3061
3062
for (int epi = 0; epi < 2; epi++)
3063
for (int ch = 0; ch < 3; ch++)
3064
same = (same & ParallelMath::Equal(prevRoundEPs[epi][ch], mrQuantizedEndPoints[subset][epi][ch]));
3065
3066
anySame = (anySame | same);
3067
if (ParallelMath::AllSet(anySame))
3068
break;
3069
}
3070
3071
if (ParallelMath::AllSet(anySame))
3072
{
3073
roundValid[metaRound][subset] = false;
3074
continue;
3075
}
3076
}
3077
3078
MFloat subsetError = ParallelMath::MakeFloatZero();
3079
3080
{
3081
for (int px = 0; px < 16; px++)
3082
{
3083
if (subset != ((partitionMask >> px) & 1))
3084
continue;
3085
3086
MUInt15 index;
3087
if (px == fixupIndex)
3088
index = mrIndexes[px];
3089
else
3090
{
3091
index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixels2CL[px], &rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[px], &rtn);
3092
mrIndexes[px] = index;
3093
}
3094
3095
MSInt16 reconstructed[3];
3096
if (isSigned)
3097
indexSelector.ReconstructHDRSigned(mrIndexes[px], reconstructed);
3098
else
3099
indexSelector.ReconstructHDRUnsigned(mrIndexes[px], reconstructed);
3100
3101
subsetError = subsetError + (fastIndexing ? BCCommon::ComputeErrorHDRFast<3>(flags, reconstructed, pixels[px], channelWeightsSq) : BCCommon::ComputeErrorHDRSlow<3>(flags, reconstructed, pixels[px], channelWeightsSq));
3102
3103
if (refinePass != numRefineRounds - 1)
3104
refiners[subset].ContributeUnweightedPW(preWeightedPixels[px], index);
3105
}
3106
}
3107
3108
metaError[metaRound][subset] = subsetError;
3109
}
3110
}
3111
}
3112
3113
// Now we have a bunch of attempts, but not all of them will fit in the delta coding scheme
3114
int numMeta1 = partitioned ? MaxMetaRounds : 1;
3115
for (int meta0 = 0; meta0 < MaxMetaRounds; meta0++)
3116
{
3117
if (!roundValid[meta0][0])
3118
continue;
3119
3120
for (int meta1 = 0; meta1 < numMeta1; meta1++)
3121
{
3122
MFloat combinedError = metaError[meta0][0];
3123
if (partitioned)
3124
{
3125
if (!roundValid[meta1][1])
3126
continue;
3127
3128
combinedError = combinedError + metaError[meta1][1];
3129
}
3130
3131
ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, bestError);
3132
if (!ParallelMath::AnySet(errorBetter))
3133
continue;
3134
3135
ParallelMath::Int16CompFlag needsCommit = ParallelMath::FloatFlagToInt16(errorBetter);
3136
3137
// Figure out if this is encodable
3138
for (int mode = 0; mode < BC7Data::g_numHDRModes; mode++)
3139
{
3140
const BC7Data::BC6HModeInfo &modeInfo = BC7Data::g_hdrModes[mode];
3141
3142
if (modeInfo.m_partitioned != partitioned || modeInfo.m_aPrec != aPrec)
3143
continue;
3144
3145
MAInt16 encodedEPs[2][2][3];
3146
ParallelMath::Int16CompFlag isLegal;
3147
if (partitioned)
3148
EvaluatePartitionedLegality(metaEndPointsQuantized[meta0][0], metaEndPointsQuantized[meta1][1], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs, isLegal);
3149
else
3150
EvaluateSingleLegality(metaEndPointsQuantized[meta0][0], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs[0], isLegal);
3151
3152
ParallelMath::Int16CompFlag isLegalAndBetter = (ParallelMath::FloatFlagToInt16(errorBetter) & isLegal);
3153
if (!ParallelMath::AnySet(isLegalAndBetter))
3154
continue;
3155
3156
ParallelMath::FloatCompFlag isLegalAndBetterFloat = ParallelMath::Int16FlagToFloat(isLegalAndBetter);
3157
3158
ParallelMath::ConditionalSet(bestError, isLegalAndBetterFloat, combinedError);
3159
ParallelMath::ConditionalSet(bestMode, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(mode)));
3160
ParallelMath::ConditionalSet(bestPartition, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(p)));
3161
3162
for (int subset = 0; subset < numSubsets; subset++)
3163
{
3164
for (int epi = 0; epi < 2; epi++)
3165
{
3166
for (int ch = 0; ch < 3; ch++)
3167
ParallelMath::ConditionalSet(bestEndPoints[subset][epi][ch], isLegalAndBetter, encodedEPs[subset][epi][ch]);
3168
}
3169
}
3170
3171
for (int px = 0; px < 16; px++)
3172
{
3173
int subset = ((partitionMask >> px) & 1);
3174
if (subset == 0)
3175
ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta0][px]);
3176
else
3177
ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta1][px]);
3178
}
3179
3180
needsCommit = ParallelMath::AndNot(needsCommit, isLegalAndBetter);
3181
if (!ParallelMath::AnySet(needsCommit))
3182
break;
3183
}
3184
}
3185
}
3186
}
3187
}
3188
}
3189
3190
// At this point, everything should be set
3191
for (int block = 0; block < ParallelMath::ParallelSize; block++)
3192
{
3193
ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(bestMode, block);
3194
ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(bestPartition, block);
3195
int32_t eps[2][2][3];
3196
ParallelMath::ScalarUInt16 indexes[16];
3197
3198
const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];
3199
3200
const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode];
3201
3202
const size_t headerBits = modeInfo.m_partitioned ? 82 : 65;
3203
3204
for (int subset = 0; subset < 2; subset++)
3205
{
3206
for (int epi = 0; epi < 2; epi++)
3207
{
3208
for (int ch = 0; ch < 3; ch++)
3209
eps[subset][epi][ch] = ParallelMath::Extract(bestEndPoints[subset][epi][ch], block);
3210
}
3211
}
3212
3213
for (int px = 0; px < 16; px++)
3214
indexes[px] = ParallelMath::Extract(bestIndexes[px], block);
3215
3216
uint16_t modeID = modeInfo.m_modeID;
3217
3218
PackingVector pv;
3219
pv.Init();
3220
3221
for (size_t i = 0; i < headerBits; i++) {
3222
int32_t codedValue = 0;
3223
switch (desc[i].m_eField) {
3224
case BC6HData::M:
3225
codedValue = modeID;
3226
break;
3227
case BC6HData::D:
3228
codedValue = partition;
3229
break;
3230
case BC6HData::RW:
3231
codedValue = eps[0][0][0];
3232
break;
3233
case BC6HData::RX:
3234
codedValue = eps[0][1][0];
3235
break;
3236
case BC6HData::RY:
3237
codedValue = eps[1][0][0];
3238
break;
3239
case BC6HData::RZ:
3240
codedValue = eps[1][1][0];
3241
break;
3242
case BC6HData::GW:
3243
codedValue = eps[0][0][1];
3244
break;
3245
case BC6HData::GX:
3246
codedValue = eps[0][1][1];
3247
break;
3248
case BC6HData::GY:
3249
codedValue = eps[1][0][1];
3250
break;
3251
case BC6HData::GZ:
3252
codedValue = eps[1][1][1];
3253
break;
3254
case BC6HData::BW:
3255
codedValue = eps[0][0][2];
3256
break;
3257
case BC6HData::BX:
3258
codedValue = eps[0][1][2];
3259
break;
3260
case BC6HData::BY:
3261
codedValue = eps[1][0][2];
3262
break;
3263
case BC6HData::BZ:
3264
codedValue = eps[1][1][2];
3265
break;
3266
default:
3267
assert(false);
3268
break;
3269
}
3270
pv.Pack(static_cast<uint16_t>((codedValue >> desc[i].m_uBit) & 1), 1);
3271
}
3272
3273
int fixupIndex1 = 0;
3274
int indexBits = 4;
3275
if (modeInfo.m_partitioned)
3276
{
3277
fixupIndex1 = BC7Data::g_fixupIndexes2[partition];
3278
indexBits = 3;
3279
}
3280
3281
for (int px = 0; px < 16; px++)
3282
{
3283
ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[px], block);
3284
if (px == 0 || px == fixupIndex1)
3285
pv.Pack(index, indexBits - 1);
3286
else
3287
pv.Pack(index, indexBits);
3288
}
3289
3290
pv.Flush(packedBlocks + 16 * block);
3291
}
3292
}
3293
3294
void cvtt::Internal::BC6HComputer::SignExtendSingle(int &v, int bits)
3295
{
3296
if (v & (1 << (bits - 1)))
3297
v |= -(1 << bits);
3298
}
3299
3300
void cvtt::Internal::BC6HComputer::UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned)
3301
{
3302
UnpackingVector pv;
3303
pv.Init(pBC);
3304
3305
int numModeBits = 2;
3306
int modeBits = pv.Unpack(2);
3307
if (modeBits != 0 && modeBits != 1)
3308
{
3309
modeBits |= pv.Unpack(3) << 2;
3310
numModeBits += 3;
3311
}
3312
3313
int mode = -1;
3314
for (int possibleMode = 0; possibleMode < BC7Data::g_numHDRModes; possibleMode++)
3315
{
3316
if (BC7Data::g_hdrModes[possibleMode].m_modeID == modeBits)
3317
{
3318
mode = possibleMode;
3319
break;
3320
}
3321
}
3322
3323
if (mode < 0)
3324
{
3325
for (int px = 0; px < 16; px++)
3326
{
3327
for (int ch = 0; ch < 3; ch++)
3328
output.m_pixels[px][ch] = 0;
3329
output.m_pixels[px][3] = 0x3c00; // 1.0
3330
}
3331
return;
3332
}
3333
3334
const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode];
3335
const size_t headerBits = modeInfo.m_partitioned ? 82 : 65;
3336
const BC6HData::ModeDescriptor *desc = BC6HData::g_modeDescriptors[mode];
3337
3338
int32_t partition = 0;
3339
int32_t eps[2][2][3];
3340
3341
for (int subset = 0; subset < 2; subset++)
3342
for (int epi = 0; epi < 2; epi++)
3343
for (int ch = 0; ch < 3; ch++)
3344
eps[subset][epi][ch] = 0;
3345
3346
for (size_t i = numModeBits; i < headerBits; i++) {
3347
int32_t *pCodedValue = NULL;
3348
3349
switch (desc[i].m_eField) {
3350
case BC6HData::D:
3351
pCodedValue = &partition;
3352
break;
3353
case BC6HData::RW:
3354
pCodedValue = &eps[0][0][0];
3355
break;
3356
case BC6HData::RX:
3357
pCodedValue = &eps[0][1][0];
3358
break;
3359
case BC6HData::RY:
3360
pCodedValue = &eps[1][0][0];
3361
break;
3362
case BC6HData::RZ:
3363
pCodedValue = &eps[1][1][0];
3364
break;
3365
case BC6HData::GW:
3366
pCodedValue = &eps[0][0][1];
3367
break;
3368
case BC6HData::GX:
3369
pCodedValue = &eps[0][1][1];
3370
break;
3371
case BC6HData::GY:
3372
pCodedValue = &eps[1][0][1];
3373
break;
3374
case BC6HData::GZ:
3375
pCodedValue = &eps[1][1][1];
3376
break;
3377
case BC6HData::BW:
3378
pCodedValue = &eps[0][0][2];
3379
break;
3380
case BC6HData::BX:
3381
pCodedValue = &eps[0][1][2];
3382
break;
3383
case BC6HData::BY:
3384
pCodedValue = &eps[1][0][2];
3385
break;
3386
case BC6HData::BZ:
3387
pCodedValue = &eps[1][1][2];
3388
break;
3389
default:
3390
assert(false);
3391
break;
3392
}
3393
3394
(*pCodedValue) |= pv.Unpack(1) << desc[i].m_uBit;
3395
}
3396
3397
uint16_t modeID = modeInfo.m_modeID;
3398
3399
int fixupIndex1 = 0;
3400
int indexBits = 4;
3401
int numSubsets = 1;
3402
if (modeInfo.m_partitioned)
3403
{
3404
fixupIndex1 = BC7Data::g_fixupIndexes2[partition];
3405
indexBits = 3;
3406
numSubsets = 2;
3407
}
3408
3409
int indexes[16];
3410
for (int px = 0; px < 16; px++)
3411
{
3412
if (px == 0 || px == fixupIndex1)
3413
indexes[px] = pv.Unpack(indexBits - 1);
3414
else
3415
indexes[px] = pv.Unpack(indexBits);
3416
}
3417
3418
if (modeInfo.m_partitioned)
3419
{
3420
for (int ch = 0; ch < 3; ch++)
3421
{
3422
if (isSigned)
3423
SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);
3424
if (modeInfo.m_transformed || isSigned)
3425
{
3426
SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);
3427
SignExtendSingle(eps[1][0][ch], modeInfo.m_bPrec[ch]);
3428
SignExtendSingle(eps[1][1][ch], modeInfo.m_bPrec[ch]);
3429
}
3430
}
3431
}
3432
else
3433
{
3434
for (int ch = 0; ch < 3; ch++)
3435
{
3436
if (isSigned)
3437
SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec);
3438
if (modeInfo.m_transformed || isSigned)
3439
SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]);
3440
}
3441
}
3442
3443
int aPrec = modeInfo.m_aPrec;
3444
3445
if (modeInfo.m_transformed)
3446
{
3447
for (int ch = 0; ch < 3; ch++)
3448
{
3449
int wrapMask = (1 << aPrec) - 1;
3450
3451
eps[0][1][ch] = ((eps[0][0][ch] + eps[0][1][ch]) & wrapMask);
3452
if (isSigned)
3453
SignExtendSingle(eps[0][1][ch], aPrec);
3454
3455
if (modeInfo.m_partitioned)
3456
{
3457
eps[1][0][ch] = ((eps[0][0][ch] + eps[1][0][ch]) & wrapMask);
3458
eps[1][1][ch] = ((eps[0][0][ch] + eps[1][1][ch]) & wrapMask);
3459
3460
if (isSigned)
3461
{
3462
SignExtendSingle(eps[1][0][ch], aPrec);
3463
SignExtendSingle(eps[1][1][ch], aPrec);
3464
}
3465
}
3466
}
3467
}
3468
3469
// Unquantize endpoints
3470
for (int subset = 0; subset < numSubsets; subset++)
3471
{
3472
for (int epi = 0; epi < 2; epi++)
3473
{
3474
for (int ch = 0; ch < 3; ch++)
3475
{
3476
int &v = eps[subset][epi][ch];
3477
3478
if (isSigned)
3479
{
3480
if (aPrec >= 16)
3481
{
3482
// Nothing
3483
}
3484
else
3485
{
3486
bool s = false;
3487
int comp = v;
3488
if (v < 0)
3489
{
3490
s = true;
3491
comp = -comp;
3492
}
3493
3494
int unq = 0;
3495
if (comp == 0)
3496
unq = 0;
3497
else if (comp >= ((1 << (aPrec - 1)) - 1))
3498
unq = 0x7fff;
3499
else
3500
unq = ((comp << 15) + 0x4000) >> (aPrec - 1);
3501
3502
if (s)
3503
unq = -unq;
3504
3505
v = unq;
3506
}
3507
}
3508
else
3509
{
3510
if (aPrec >= 15)
3511
{
3512
// Nothing
3513
}
3514
else if (v == 0)
3515
{
3516
// Nothing
3517
}
3518
else if (v == ((1 << aPrec) - 1))
3519
v = 0xffff;
3520
else
3521
v = ((v << 16) + 0x8000) >> aPrec;
3522
}
3523
}
3524
}
3525
}
3526
3527
const int *weights = BC7Data::g_weightTables[indexBits];
3528
3529
for (int px = 0; px < 16; px++)
3530
{
3531
int subset = 0;
3532
if (modeInfo.m_partitioned)
3533
subset = (BC7Data::g_partitionMap[partition] >> px) & 1;
3534
3535
int w = weights[indexes[px]];
3536
for (int ch = 0; ch < 3; ch++)
3537
{
3538
int comp = ((64 - w) * eps[subset][0][ch] + w * eps[subset][1][ch] + 32) >> 6;
3539
3540
if (isSigned)
3541
{
3542
if (comp < 0)
3543
comp = -(((-comp) * 31) >> 5);
3544
else
3545
comp = (comp * 31) >> 5;
3546
3547
int s = 0;
3548
if (comp < 0)
3549
{
3550
s = 0x8000;
3551
comp = -comp;
3552
}
3553
3554
output.m_pixels[px][ch] = static_cast<uint16_t>(s | comp);
3555
}
3556
else
3557
{
3558
comp = (comp * 31) >> 6;
3559
output.m_pixels[px][ch] = static_cast<uint16_t>(comp);
3560
}
3561
}
3562
output.m_pixels[px][3] = 0x3c00; // 1.0
3563
}
3564
}
3565
3566
void cvtt::Kernels::ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality)
3567
{
3568
static const int kMaxQuality = 100;
3569
3570
if (quality < 1)
3571
quality = 1;
3572
else if (quality > kMaxQuality)
3573
quality = kMaxQuality;
3574
3575
const int numRGBModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGB * quality / kMaxQuality;
3576
const int numRGBAModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGBA * quality / kMaxQuality;
3577
3578
const uint16_t *prioLists[] = { cvtt::Tables::BC7Prio::g_bc7PrioCodesRGB, cvtt::Tables::BC7Prio::g_bc7PrioCodesRGBA };
3579
const int prioListSizes[] = { numRGBModes, numRGBAModes };
3580
3581
BC7FineTuningParams ftParams;
3582
memset(&ftParams, 0, sizeof(ftParams));
3583
3584
for (int listIndex = 0; listIndex < 2; listIndex++)
3585
{
3586
int prioListSize = prioListSizes[listIndex];
3587
const uint16_t *prioList = prioLists[listIndex];
3588
3589
for (int prioIndex = 0; prioIndex < prioListSize; prioIndex++)
3590
{
3591
const uint16_t packedMode = prioList[prioIndex];
3592
3593
uint8_t seedPoints = static_cast<uint8_t>(cvtt::Tables::BC7Prio::UnpackSeedPointCount(packedMode));
3594
int mode = cvtt::Tables::BC7Prio::UnpackMode(packedMode);
3595
3596
switch (mode)
3597
{
3598
case 0:
3599
ftParams.mode0SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
3600
break;
3601
case 1:
3602
ftParams.mode1SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
3603
break;
3604
case 2:
3605
ftParams.mode2SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
3606
break;
3607
case 3:
3608
ftParams.mode3SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
3609
break;
3610
case 4:
3611
ftParams.mode4SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)][cvtt::Tables::BC7Prio::UnpackIndexSelector(packedMode)] = seedPoints;
3612
break;
3613
case 5:
3614
ftParams.mode5SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)] = seedPoints;
3615
break;
3616
case 6:
3617
ftParams.mode6SP = seedPoints;
3618
break;
3619
case 7:
3620
ftParams.mode7SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints;
3621
break;
3622
}
3623
}
3624
}
3625
3626
ConfigureBC7EncodingPlanFromFineTuningParams(encodingPlan, ftParams);
3627
}
3628
3629
// Generates a BC7 encoding plan from fine-tuning parameters.
3630
bool cvtt::Kernels::ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams &params)
3631
{
3632
memset(&encodingPlan, 0, sizeof(encodingPlan));
3633
3634
// Mode 0
3635
for (int partition = 0; partition < 16; partition++)
3636
{
3637
uint8_t sp = params.mode0SP[partition];
3638
if (sp == 0)
3639
continue;
3640
3641
encodingPlan.mode0PartitionEnabled |= static_cast<uint16_t>(1) << partition;
3642
3643
for (int subset = 0; subset < 3; subset++)
3644
{
3645
int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset];
3646
encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
3647
}
3648
}
3649
3650
// Mode 1
3651
for (int partition = 0; partition < 64; partition++)
3652
{
3653
uint8_t sp = params.mode1SP[partition];
3654
if (sp == 0)
3655
continue;
3656
3657
encodingPlan.mode1PartitionEnabled |= static_cast<uint64_t>(1) << partition;
3658
3659
for (int subset = 0; subset < 2; subset++)
3660
{
3661
int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];
3662
encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
3663
}
3664
}
3665
3666
// Mode 2
3667
for (int partition = 0; partition < 64; partition++)
3668
{
3669
uint8_t sp = params.mode2SP[partition];
3670
if (sp == 0)
3671
continue;
3672
3673
encodingPlan.mode2PartitionEnabled |= static_cast<uint64_t>(1) << partition;
3674
3675
for (int subset = 0; subset < 3; subset++)
3676
{
3677
int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset];
3678
encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
3679
}
3680
}
3681
3682
// Mode 3
3683
for (int partition = 0; partition < 64; partition++)
3684
{
3685
uint8_t sp = params.mode3SP[partition];
3686
if (sp == 0)
3687
continue;
3688
3689
encodingPlan.mode3PartitionEnabled |= static_cast<uint64_t>(1) << partition;
3690
3691
for (int subset = 0; subset < 2; subset++)
3692
{
3693
int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];
3694
encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp);
3695
}
3696
}
3697
3698
// Mode 4
3699
for (int rotation = 0; rotation < 4; rotation++)
3700
{
3701
for (int indexMode = 0; indexMode < 2; indexMode++)
3702
encodingPlan.mode4SP[rotation][indexMode] = params.mode4SP[rotation][indexMode];
3703
}
3704
3705
// Mode 5
3706
for (int rotation = 0; rotation < 4; rotation++)
3707
encodingPlan.mode5SP[rotation] = params.mode5SP[rotation];
3708
3709
// Mode 6
3710
{
3711
uint8_t sp = params.mode6SP;
3712
if (sp != 0)
3713
{
3714
encodingPlan.mode6Enabled = true;
3715
3716
int shape = cvtt::Internal::BC7Data::g_shapes1[0][0];
3717
encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp);
3718
}
3719
}
3720
3721
// Mode 7
3722
for (int partition = 0; partition < 64; partition++)
3723
{
3724
uint8_t sp = params.mode7SP[partition];
3725
if (sp == 0)
3726
continue;
3727
3728
encodingPlan.mode7RGBAPartitionEnabled |= static_cast<uint64_t>(1) << partition;
3729
3730
for (int subset = 0; subset < 2; subset++)
3731
{
3732
int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset];
3733
encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp);
3734
}
3735
}
3736
3737
for (int i = 0; i < BC7EncodingPlan::kNumRGBShapes; i++)
3738
{
3739
if (encodingPlan.seedPointsForShapeRGB[i] > 0)
3740
{
3741
encodingPlan.rgbShapeList[encodingPlan.rgbNumShapesToEvaluate] = i;
3742
encodingPlan.rgbNumShapesToEvaluate++;
3743
}
3744
}
3745
3746
for (int i = 0; i < BC7EncodingPlan::kNumRGBAShapes; i++)
3747
{
3748
if (encodingPlan.seedPointsForShapeRGBA[i] > 0)
3749
{
3750
encodingPlan.rgbaShapeList[encodingPlan.rgbaNumShapesToEvaluate] = i;
3751
encodingPlan.rgbaNumShapesToEvaluate++;
3752
}
3753
}
3754
3755
encodingPlan.mode7RGBPartitionEnabled = (encodingPlan.mode7RGBAPartitionEnabled & ~encodingPlan.mode3PartitionEnabled);
3756
3757
return true;
3758
}
3759
3760
#endif
3761
3762