Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/cvtt/ConvectionKernels.h
9903 views
1
/*
2
Convection Texture Tools
3
Copyright (c) 2018 Eric Lasota
4
5
Permission is hereby granted, free of charge, to any person obtaining
6
a copy of this software and associated documentation files (the
7
"Software"), to deal in the Software without restriction, including
8
without limitation the rights to use, copy, modify, merge, publish,
9
distribute, sublicense, and/or sell copies of the Software, and to
10
permit persons to whom the Software is furnished to do so, subject
11
to the following conditions:
12
13
The above copyright notice and this permission notice shall be included
14
in all copies or substantial portions of the Software.
15
16
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24
#pragma once
25
#ifndef __CVTT_CONVECTION_KERNELS__
26
#define __CVTT_CONVECTION_KERNELS__
27
28
#include <stddef.h>
29
#include <stdint.h>
30
31
namespace cvtt
32
{
33
namespace Flags
34
{
35
// Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality)
36
const uint32_t BC7_FastIndexing = 0x008;
37
38
// Try precomputed single-color lookups where applicable (slightly slower, small quality increase on specific blocks)
39
const uint32_t BC7_TrySingleColor = 0x010;
40
41
// Don't allow non-zero or non-max alpha values in blocks that only contain one or the other
42
const uint32_t BC7_RespectPunchThrough = 0x020;
43
44
// Use fast indexing in HDR formats (faster, worse quality)
45
const uint32_t BC6H_FastIndexing = 0x040;
46
47
// Exhaustive search RGB orderings when encoding BC1-BC3 (much slower, better quality)
48
const uint32_t S3TC_Exhaustive = 0x080;
49
50
// Penalize distant endpoints, improving quality on inaccurate GPU decoders
51
const uint32_t S3TC_Paranoid = 0x100;
52
53
// Uniform color channel importance
54
const uint32_t Uniform = 0x200;
55
56
// Use fake BT.709 color space for etc2comp compatibility (slower)
57
const uint32_t ETC_UseFakeBT709 = 0x400;
58
59
// Use accurate quantization functions when quantizing fake BT.709 (much slower, marginal improvement on specific blocks)
60
const uint32_t ETC_FakeBT709Accurate = 0x800;
61
62
// Misc useful default flag combinations
63
const uint32_t Fastest = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
64
const uint32_t Faster = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
65
const uint32_t Fast = (BC7_FastIndexing | S3TC_Paranoid);
66
const uint32_t Default = (BC7_FastIndexing | S3TC_Paranoid);
67
const uint32_t Better = (S3TC_Paranoid | S3TC_Exhaustive);
68
const uint32_t Ultra = (BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive | ETC_FakeBT709Accurate);
69
}
70
71
const unsigned int NumParallelBlocks = 8;
72
73
struct Options
74
{
75
uint32_t flags; // Bitmask of cvtt::Flags values
76
float threshold; // Alpha test threshold for BC1
77
float redWeight; // Red channel importance
78
float greenWeight; // Green channel importance
79
float blueWeight; // Blue channel importance
80
float alphaWeight; // Alpha channel importance
81
82
int refineRoundsBC7; // Number of refine rounds for BC7
83
int refineRoundsBC6H; // Number of refine rounds for BC6H (max 3)
84
int refineRoundsIIC; // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5)
85
int refineRoundsS3TC; // Number of refine rounds for S3TC RGB
86
87
int seedPoints; // Number of seed points (min 1, max 4)
88
89
Options()
90
: flags(Flags::Default)
91
, threshold(0.5f)
92
, redWeight(0.2125f / 0.7154f)
93
, greenWeight(1.0f)
94
, blueWeight(0.0721f / 0.7154f)
95
, alphaWeight(1.0f)
96
, refineRoundsBC7(2)
97
, refineRoundsBC6H(3)
98
, refineRoundsIIC(8)
99
, refineRoundsS3TC(2)
100
, seedPoints(4)
101
{
102
}
103
};
104
105
struct BC7FineTuningParams
106
{
107
// Seed point counts for each mode+configuration combination
108
uint8_t mode0SP[16];
109
uint8_t mode1SP[64];
110
uint8_t mode2SP[64];
111
uint8_t mode3SP[64];
112
uint8_t mode4SP[4][2];
113
uint8_t mode5SP[4];
114
uint8_t mode6SP;
115
uint8_t mode7SP[64];
116
117
BC7FineTuningParams()
118
{
119
for (int i = 0; i < 16; i++)
120
this->mode0SP[i] = 4;
121
122
for (int i = 0; i < 64; i++)
123
{
124
this->mode1SP[i] = 4;
125
this->mode2SP[i] = 4;
126
this->mode3SP[i] = 4;
127
this->mode7SP[i] = 4;
128
}
129
130
for (int i = 0; i < 4; i++)
131
{
132
for (int j = 0; j < 2; j++)
133
this->mode4SP[i][j] = 4;
134
135
this->mode5SP[i] = 4;
136
}
137
138
this->mode6SP = 4;
139
}
140
};
141
142
struct BC7EncodingPlan
143
{
144
static const int kNumRGBAShapes = 129;
145
static const int kNumRGBShapes = 243;
146
147
uint64_t mode1PartitionEnabled;
148
uint64_t mode2PartitionEnabled;
149
uint64_t mode3PartitionEnabled;
150
uint16_t mode0PartitionEnabled;
151
uint64_t mode7RGBAPartitionEnabled;
152
uint64_t mode7RGBPartitionEnabled;
153
uint8_t mode4SP[4][2];
154
uint8_t mode5SP[4];
155
bool mode6Enabled;
156
157
uint8_t seedPointsForShapeRGB[kNumRGBShapes];
158
uint8_t seedPointsForShapeRGBA[kNumRGBAShapes];
159
160
uint8_t rgbaShapeList[kNumRGBAShapes];
161
uint8_t rgbaNumShapesToEvaluate;
162
163
uint8_t rgbShapeList[kNumRGBShapes];
164
uint8_t rgbNumShapesToEvaluate;
165
166
BC7EncodingPlan()
167
{
168
for (int i = 0; i < kNumRGBShapes; i++)
169
{
170
this->rgbShapeList[i] = i;
171
this->seedPointsForShapeRGB[i] = 4;
172
}
173
this->rgbNumShapesToEvaluate = kNumRGBShapes;
174
175
for (int i = 0; i < kNumRGBAShapes; i++)
176
{
177
this->rgbaShapeList[i] = i;
178
this->seedPointsForShapeRGBA[i] = 4;
179
}
180
this->rgbaNumShapesToEvaluate = kNumRGBAShapes;
181
182
183
this->mode0PartitionEnabled = 0xffff;
184
this->mode1PartitionEnabled = 0xffffffffffffffffULL;
185
this->mode2PartitionEnabled = 0xffffffffffffffffULL;
186
this->mode3PartitionEnabled = 0xffffffffffffffffULL;
187
this->mode6Enabled = true;
188
this->mode7RGBPartitionEnabled = 0xffffffffffffffffULL;
189
this->mode7RGBAPartitionEnabled = 0xffffffffffffffffULL;
190
191
for (int i = 0; i < 4; i++)
192
{
193
for (int j = 0; j < 2; j++)
194
this->mode4SP[i][j] = 4;
195
196
this->mode5SP[i] = 4;
197
}
198
}
199
};
200
201
// RGBA input block for unsigned 8-bit formats
202
struct PixelBlockU8
203
{
204
uint8_t m_pixels[16][4];
205
};
206
207
// RGBA input block for signed 8-bit formats
208
struct PixelBlockS8
209
{
210
int8_t m_pixels[16][4];
211
};
212
213
struct PixelBlockScalarS16
214
{
215
int16_t m_pixels[16];
216
};
217
218
// RGBA input block for half-precision float formats (bit-cast to int16_t)
219
struct PixelBlockF16
220
{
221
int16_t m_pixels[16][4];
222
};
223
224
class ETC2CompressionData
225
{
226
protected:
227
ETC2CompressionData() {}
228
};
229
230
class ETC1CompressionData
231
{
232
protected:
233
ETC1CompressionData() {}
234
};
235
236
namespace Kernels
237
{
238
typedef void* allocFunc_t(void *context, size_t size);
239
typedef void freeFunc_t(void *context, void* ptr, size_t size);
240
241
// NOTE: All functions accept and output NumParallelBlocks blocks at once
242
void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
243
void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
244
void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
245
void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
246
void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);
247
void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
248
void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);
249
void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
250
void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
251
void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, const BC7EncodingPlan &encodingPlan);
252
void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC1CompressionData *compressionData);
253
void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC2CompressionData *compressionData);
254
void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
255
void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
256
257
void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options);
258
void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options);
259
260
// Generates a BC7 encoding plan from a quality parameter that ranges from 1 (fastest) to 100 (best)
261
void ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality);
262
263
// Generates a BC7 encoding plan from fine-tuning parameters.
264
bool ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams &params);
265
266
// ETC compression requires temporary storage that normally consumes a large amount of stack space.
267
// To allocate and release it, use one of these functions.
268
ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options);
269
void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc);
270
271
ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context);
272
void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc);
273
274
void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC);
275
void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC);
276
void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC);
277
}
278
}
279
280
#endif
281
282