Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/basis_universal/encoder/basisu_astc_hdr_common.cpp
9903 views
1
// File: basisu_astc_hdr_common.cpp
2
#include "basisu_enc.h"
3
#include "basisu_gpu_texture.h"
4
#include "../transcoder/basisu_astc_helpers.h"
5
#include "../transcoder/basisu_astc_hdr_core.h"
6
#include "basisu_astc_hdr_common.h"
7
8
using namespace basist;
9
10
#ifndef __EMSCRIPTEN__
11
#define BASISU_MULTITHREADED_INIT (0)
12
#endif
13
14
namespace basisu
15
{
16
17
const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33] =
18
{
19
{ 2, 0, 64 }, // 0, note ise range=0 is invalid for 4x4 block sizes (<24 weight bits in the block)
20
{ 3, 0, 32, 64 }, // 1
21
{ 4, 0, 21, 43, 64 }, // 2
22
{ 5, 0, 16, 32, 48, 64 }, // 3
23
{ 6, 0, 64, 12, 52, 25, 39 }, // 4
24
{ 8, 0, 9, 18, 27, 37, 46, 55, 64 }, // 5
25
{ 10, 0, 64, 7, 57, 14, 50, 21, 43, 28, 36 }, // 6
26
{ 12, 0, 64, 17, 47, 5, 59, 23, 41, 11, 53, 28, 36 }, // 7
27
{ 16, 0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64 }, // 8
28
{ 20, 0,64,16,48,3,61,19,45,6,58,23,41,9,55,26,38,13,51,29,35}, // 9
29
{ 24, 0,64,8,56,16,48,24,40,2,62,11,53,19,45,27,37,5,59,13,51,22,42,30,34}, // 10
30
{ 32, 0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62,64}, // 11
31
};
32
33
//--------------------------------------------------------------------------------------------------------------------------
34
35
const float DEF_R_ERROR_SCALE = 2.0f;
36
const float DEF_G_ERROR_SCALE = 3.0f;
37
38
void astc_hdr_codec_base_options::init()
39
{
40
m_r_err_scale = DEF_R_ERROR_SCALE;
41
m_g_err_scale = DEF_G_ERROR_SCALE;
42
m_q_log_bias = Q_LOG_BIAS_4x4;
43
44
m_ultra_quant = false;
45
46
// Disabling by default to avoid transcoding outliers (try kodim26). The quality lost is very low. TODO: Could include the uber result in the output.
47
m_allow_uber_mode = false;
48
49
m_mode7_full_s_optimization = true;
50
51
m_take_first_non_clamping_mode11_submode = false;
52
m_take_first_non_clamping_mode7_submode = false;
53
54
m_disable_weight_plane_optimization = true;
55
}
56
57
//--------------------------------------------------------------------------------------------------------------------------
58
// max usable qlog8 value is 247, 248=inf, >=249 is nan
59
// max usable qlog7 value is 123, 124=inf, >=125 is nan
60
61
//const uint32_t TOTAL_USABLE_QLOG8 = 248; // 0-247 are usable, 0=0, 247=60416.0, 246=55296.0
62
63
// nearest values given a positive half float value (only)
64
static uint16_t g_half_to_qlog7[32768], g_half_to_qlog8[32768];
65
66
const uint32_t HALF_TO_QLOG_TABS_MIN_BITS = 7;
67
const uint32_t HALF_TO_QLOG_TABS_MAX_BITS = 8;
68
static uint16_t* g_pHalf_to_qlog_tabs[2] =
69
{
70
g_half_to_qlog7,
71
g_half_to_qlog8,
72
};
73
74
#if 0
75
static inline uint32_t half_to_qlog7_8(half_float h, uint32_t bits)
76
{
77
assert((bits >= HALF_TO_QLOG_TABS_MIN_BITS) && (bits <= HALF_TO_QLOG_TABS_MAX_BITS));
78
assert(h < 32768);
79
80
return g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS][h];
81
}
82
#endif
83
84
// TODO: Tune this
85
static inline uint32_t quant_qlog16(uint32_t q16, uint32_t desired_bits)
86
{
87
assert((desired_bits >= 7) && (desired_bits <= 12));
88
assert(q16 <= 65535);
89
90
const uint32_t shift = 16 - desired_bits;
91
uint32_t e = (q16 + (1U << (shift - 1U)) - 1U) >> shift;
92
93
uint32_t max_val = (1U << desired_bits) - 1U;
94
e = minimum<uint32_t>(e, max_val);
95
96
return e;
97
}
98
99
static void compute_half_to_qlog_table(uint32_t bits, uint16_t* pTable, const basisu::vector<float>& qlog16_to_float)
100
{
101
assert(bits >= 5 && bits <= 12);
102
const uint32_t max_val = (1 << bits) - 1;
103
104
const uint32_t FIRST_INVALID_QLOG16_INDEX = 63488; // first inf, rest are inf/nan's
105
assert(std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX]));
106
assert(std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX + 1]));
107
assert(!std::isnan(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX - 1]));
108
assert(!std::isinf(qlog16_to_float[FIRST_INVALID_QLOG16_INDEX - 1]));
109
110
// For all positive half-floats
111
for (uint32_t h = 0; h < 32768; h++)
112
{
113
// Skip invalid values
114
if (is_half_inf_or_nan((half_float)h))
115
continue;
116
const float desired_val = half_to_float((half_float)h);
117
118
float best_err = BIG_FLOAT_VAL;
119
uint32_t best_qlog = 0;
120
121
double prev_err = BIG_FLOAT_VAL;
122
123
// For all possible qlog's
124
for (uint32_t i = 0; i <= max_val; i++)
125
{
126
// Skip invalid values
127
uint32_t idx = i << (16 - bits);
128
if (idx >= FIRST_INVALID_QLOG16_INDEX)
129
break;
130
131
float v = qlog16_to_float[idx];
132
//assert(!std::isinf(v) && !std::isnan(v)); // too clostly in debug
133
134
// Compute error
135
float err = fabsf(v - desired_val);
136
137
if (err > prev_err)
138
{
139
// Every remaining entry will have guaranteed higher error
140
break;
141
}
142
143
prev_err = err;
144
145
// Find best
146
if (err < best_err)
147
{
148
best_err = err;
149
best_qlog = i;
150
151
if (best_err == 0.0f)
152
break;
153
}
154
}
155
156
pTable[h] = (uint16_t)best_qlog;
157
}
158
}
159
160
static void init_qlog_tables()
161
{
162
basisu::vector<float> qlog16_to_float(65536);
163
164
// for all possible qlog16, compute the corresponding half float
165
for (uint32_t i = 0; i <= 65535; i++)
166
{
167
half_float h = astc_helpers::qlog16_to_half(i);
168
169
qlog16_to_float[i] = half_to_float(h);
170
}
171
172
#if BASISU_MULTITHREADED_INIT
173
job_pool jp(3);
174
175
for (uint32_t bits = HALF_TO_QLOG_TABS_MIN_BITS; bits <= HALF_TO_QLOG_TABS_MAX_BITS; bits++)
176
{
177
jp.add_job( [bits, &qlog16_to_float]() { compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS], qlog16_to_float); });
178
}
179
180
jp.wait_for_all();
181
#else
182
// for all possible half floats, find the nearest qlog5-12 float
183
for (uint32_t bits = HALF_TO_QLOG_TABS_MIN_BITS; bits <= HALF_TO_QLOG_TABS_MAX_BITS; bits++)
184
{
185
compute_half_to_qlog_table(bits, g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS], qlog16_to_float);
186
187
#if 0
188
std::vector<uint16_t> check_tab(32768);
189
compute_half_to_qlog_table_orig(bits, check_tab.data(), qlog16_to_float);
190
for (uint32_t i = 0; i < (1 << bits); i++)
191
{
192
assert(check_tab[i] == g_pHalf_to_qlog_tabs[bits - HALF_TO_QLOG_TABS_MIN_BITS][i]);
193
}
194
#endif
195
}
196
#endif // BASISU_MULTITHREADED_INIT
197
}
198
199
//--------------------------------------------------------------------------------------------------------------------------
200
201
static vec3F calc_mean(uint32_t num_pixels, const vec4F* pPixels)
202
{
203
vec3F mean(0.0f);
204
205
for (uint32_t i = 0; i < num_pixels; i++)
206
{
207
const vec4F& p = pPixels[i];
208
209
mean[0] += p[0];
210
mean[1] += p[1];
211
mean[2] += p[2];
212
}
213
214
return mean / static_cast<float>(num_pixels);
215
}
216
217
static vec3F calc_rgb_pca(uint32_t num_pixels, const vec4F* pPixels, const vec3F& mean_color)
218
{
219
float cov[6] = { 0, 0, 0, 0, 0, 0 };
220
221
for (uint32_t i = 0; i < num_pixels; i++)
222
{
223
const vec4F& v = pPixels[i];
224
225
float r = v[0] - mean_color[0];
226
float g = v[1] - mean_color[1];
227
float b = v[2] - mean_color[2];
228
229
cov[0] += r * r;
230
cov[1] += r * g;
231
cov[2] += r * b;
232
cov[3] += g * g;
233
cov[4] += g * b;
234
cov[5] += b * b;
235
}
236
237
float xr = .9f, xg = 1.0f, xb = .7f;
238
for (uint32_t iter = 0; iter < 3; iter++)
239
{
240
float r = xr * cov[0] + xg * cov[1] + xb * cov[2];
241
float g = xr * cov[1] + xg * cov[3] + xb * cov[4];
242
float b = xr * cov[2] + xg * cov[4] + xb * cov[5];
243
244
float m = maximumf(maximumf(fabsf(r), fabsf(g)), fabsf(b));
245
246
if (m > 1e-10f)
247
{
248
m = 1.0f / m;
249
250
r *= m;
251
g *= m;
252
b *= m;
253
}
254
255
xr = r;
256
xg = g;
257
xb = b;
258
}
259
260
float len = xr * xr + xg * xg + xb * xb;
261
262
vec3F axis(0.5773502691f);
263
264
if (len >= 1e-10f)
265
{
266
len = 1.0f / sqrtf(len);
267
268
xr *= len;
269
xg *= len;
270
xb *= len;
271
272
axis.set(xr, xg, xb);
273
}
274
275
return axis;
276
}
277
278
void encode_astc_block_stats::init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[])
279
{
280
m_num_pixels = num_pixels;
281
m_mean_q16 = calc_mean(num_pixels, pBlock_pixels_q16);
282
m_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, m_mean_q16);
283
}
284
285
static vec3F interp_color(const vec3F& mean, const vec3F& dir, float df, const aabb3F& colorspace_box, const aabb3F& input_box, bool* pInside = nullptr)
286
{
287
#if 0
288
assert(mean[0] >= input_box[0][0]);
289
assert(mean[1] >= input_box[0][1]);
290
assert(mean[2] >= input_box[0][2]);
291
assert(mean[0] <= input_box[1][0]);
292
assert(mean[1] <= input_box[1][1]);
293
assert(mean[2] <= input_box[1][2]);
294
#endif
295
296
if (pInside)
297
*pInside = false;
298
299
vec3F k(mean + dir * df);
300
if (colorspace_box.contains(k))
301
{
302
if (pInside)
303
*pInside = true;
304
305
return k;
306
}
307
308
// starts inside
309
vec3F s(mean);
310
311
// ends outside
312
vec3F e(mean + dir * df);
313
314
// a ray guaranteed to go from the outside to inside
315
ray3F r(e, (s - e).normalize_in_place());
316
vec3F c;
317
float t = 0.0f;
318
319
intersection::result res = intersection::ray_aabb(c, t, r, input_box);
320
if (res != intersection::cSuccess)
321
c = k;
322
323
return c;
324
}
325
326
// all in Q16 space, 0-65535
327
static bool compute_least_squares_endpoints_rgb(
328
uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,
329
vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box)
330
{
331
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
332
// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
333
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
334
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
335
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
336
float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
337
float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;
338
339
for (uint32_t i = 0; i < N; i++)
340
{
341
const uint32_t sel = pSelectors[i];
342
343
z00 += pSelector_weights[sel][0];
344
z10 += pSelector_weights[sel][1];
345
z11 += pSelector_weights[sel][2];
346
347
float w = pSelector_weights[sel][3];
348
349
q00_r += w * pColors[i][0];
350
t_r += pColors[i][0];
351
352
q00_g += w * pColors[i][1];
353
t_g += pColors[i][1];
354
355
q00_b += w * pColors[i][2];
356
t_b += pColors[i][2];
357
}
358
359
q10_r = t_r - q00_r;
360
q10_g = t_g - q00_g;
361
q10_b = t_b - q00_b;
362
363
z01 = z10;
364
365
float det = z00 * z11 - z01 * z10;
366
if (det == 0.0f)
367
return false;
368
369
det = 1.0f / det;
370
371
float iz00, iz01, iz10, iz11;
372
iz00 = z11 * det;
373
iz01 = -z01 * det;
374
iz10 = -z10 * det;
375
iz11 = z00 * det;
376
377
(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
378
(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
379
380
(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);
381
(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);
382
383
(*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b);
384
(*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b);
385
386
for (uint32_t c = 0; c < 3; c++)
387
{
388
float l = (*pXl)[c], h = (*pXh)[c];
389
390
if (input_box.get_dim(c) < .0000125f)
391
{
392
l = input_box[0][c];
393
h = input_box[1][c];
394
}
395
396
(*pXl)[c] = l;
397
(*pXh)[c] = h;
398
}
399
400
vec3F mean((*pXl + *pXh) * .5f);
401
vec3F dir(*pXh - *pXl);
402
403
float ln = dir.length();
404
if (ln)
405
{
406
dir /= ln;
407
408
float ld = (*pXl - mean).dot(dir);
409
float hd = (*pXh - mean).dot(dir);
410
411
aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL));
412
413
bool was_inside1 = false;
414
415
vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1);
416
if (!was_inside1)
417
*pXl = l;
418
419
bool was_inside2 = false;
420
vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2);
421
if (!was_inside2)
422
*pXh = h;
423
}
424
425
pXl->clamp(0.0f, MAX_QLOG16_VAL);
426
pXh->clamp(0.0f, MAX_QLOG16_VAL);
427
428
return true;
429
}
430
431
static bool compute_least_squares_endpoints_rgb_raw_weights(
432
uint32_t N, const uint8_t* pRaw_weights,
433
vec3F* pXl, vec3F* pXh, const vec4F* pColors, const aabb3F& input_box)
434
{
435
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
436
// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
437
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
438
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
439
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
440
float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
441
float q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f;
442
443
for (uint32_t i = 0; i < N; i++)
444
{
445
const float wt = (float)pRaw_weights[i] * (1.0f / 64.0f);
446
assert(wt <= 1.0f);
447
448
const float w0 = wt * wt;
449
const float w1 = (1.0f - wt) * wt;
450
const float w2 = (1.0f - wt) * (1.0f - wt);
451
const float w3 = wt;
452
453
z00 += w0;
454
z10 += w1;
455
z11 += w2;
456
457
float w = w3;
458
q00_r += w * pColors[i][0];
459
t_r += pColors[i][0];
460
461
q00_g += w * pColors[i][1];
462
t_g += pColors[i][1];
463
464
q00_b += w * pColors[i][2];
465
t_b += pColors[i][2];
466
}
467
468
q10_r = t_r - q00_r;
469
q10_g = t_g - q00_g;
470
q10_b = t_b - q00_b;
471
472
z01 = z10;
473
474
float det = z00 * z11 - z01 * z10;
475
if (det == 0.0f)
476
return false;
477
478
det = 1.0f / det;
479
480
float iz00, iz01, iz10, iz11;
481
iz00 = z11 * det;
482
iz01 = -z01 * det;
483
iz10 = -z10 * det;
484
iz11 = z00 * det;
485
486
(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
487
(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
488
489
(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);
490
(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);
491
492
(*pXl)[2] = (float)(iz00 * q00_b + iz01 * q10_b);
493
(*pXh)[2] = (float)(iz10 * q00_b + iz11 * q10_b);
494
495
for (uint32_t c = 0; c < 3; c++)
496
{
497
float l = (*pXl)[c], h = (*pXh)[c];
498
499
if (input_box.get_dim(c) < .0000125f)
500
{
501
l = input_box[0][c];
502
h = input_box[1][c];
503
}
504
505
(*pXl)[c] = l;
506
(*pXh)[c] = h;
507
}
508
509
vec3F mean((*pXl + *pXh) * .5f);
510
vec3F dir(*pXh - *pXl);
511
512
float ln = dir.length();
513
if (ln)
514
{
515
dir /= ln;
516
517
float ld = (*pXl - mean).dot(dir);
518
float hd = (*pXh - mean).dot(dir);
519
520
aabb3F colorspace_box(vec3F(0.0f), vec3F(MAX_QLOG16_VAL));
521
522
bool was_inside1 = false;
523
524
vec3F l = interp_color(mean, dir, ld, colorspace_box, input_box, &was_inside1);
525
if (!was_inside1)
526
*pXl = l;
527
528
bool was_inside2 = false;
529
vec3F h = interp_color(mean, dir, hd, colorspace_box, input_box, &was_inside2);
530
if (!was_inside2)
531
*pXh = h;
532
}
533
534
pXl->clamp(0.0f, MAX_QLOG16_VAL);
535
pXh->clamp(0.0f, MAX_QLOG16_VAL);
536
537
return true;
538
}
539
540
static bool compute_least_squares_endpoints_2D(
541
uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,
542
vec2F* pXl, vec2F* pXh, const vec2F* pColors, const aabb2F& input_box)
543
{
544
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
545
// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
546
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
547
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
548
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
549
float q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f;
550
551
for (uint32_t i = 0; i < N; i++)
552
{
553
const uint32_t sel = pSelectors[i];
554
z00 += pSelector_weights[sel][0];
555
z10 += pSelector_weights[sel][1];
556
z11 += pSelector_weights[sel][2];
557
558
float w = pSelector_weights[sel][3];
559
q00_r += w * pColors[i][0];
560
t_r += pColors[i][0];
561
562
q00_g += w * pColors[i][1];
563
t_g += pColors[i][1];
564
}
565
566
q10_r = t_r - q00_r;
567
q10_g = t_g - q00_g;
568
569
z01 = z10;
570
571
float det = z00 * z11 - z01 * z10;
572
if (det == 0.0f)
573
return false;
574
575
det = 1.0f / det;
576
577
float iz00, iz01, iz10, iz11;
578
iz00 = z11 * det;
579
iz01 = -z01 * det;
580
iz10 = -z10 * det;
581
iz11 = z00 * det;
582
583
(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
584
(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
585
586
(*pXl)[1] = (float)(iz00 * q00_g + iz01 * q10_g);
587
(*pXh)[1] = (float)(iz10 * q00_g + iz11 * q10_g);
588
589
for (uint32_t c = 0; c < 2; c++)
590
{
591
float l = (*pXl)[c], h = (*pXh)[c];
592
593
if (input_box.get_dim(c) < .0000125f)
594
{
595
l = input_box[0][c];
596
h = input_box[1][c];
597
}
598
599
(*pXl)[c] = l;
600
(*pXh)[c] = h;
601
}
602
603
pXl->clamp(0.0f, MAX_QLOG16_VAL);
604
pXh->clamp(0.0f, MAX_QLOG16_VAL);
605
606
return true;
607
}
608
609
static bool compute_least_squares_endpoints_1D(
610
uint32_t N, const uint8_t* pSelectors, const vec4F* pSelector_weights,
611
vec1F* pXl, vec1F* pXh, const vec1F* pColors, const aabb1F& input_box)
612
{
613
// Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
614
// https://web.archive.org/web/20150319232457/http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf
615
// I did this in matrix form first, expanded out all the ops, then optimized it a bit.
616
float z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f;
617
float q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f;
618
619
for (uint32_t i = 0; i < N; i++)
620
{
621
const uint32_t sel = pSelectors[i];
622
z00 += pSelector_weights[sel][0];
623
z10 += pSelector_weights[sel][1];
624
z11 += pSelector_weights[sel][2];
625
626
float w = pSelector_weights[sel][3];
627
q00_r += w * pColors[i][0];
628
t_r += pColors[i][0];
629
}
630
631
q10_r = t_r - q00_r;
632
633
z01 = z10;
634
635
float det = z00 * z11 - z01 * z10;
636
if (det == 0.0f)
637
return false;
638
639
det = 1.0f / det;
640
641
float iz00, iz01, iz10, iz11;
642
iz00 = z11 * det;
643
iz01 = -z01 * det;
644
iz10 = -z10 * det;
645
iz11 = z00 * det;
646
647
(*pXl)[0] = (float)(iz00 * q00_r + iz01 * q10_r);
648
(*pXh)[0] = (float)(iz10 * q00_r + iz11 * q10_r);
649
650
for (uint32_t c = 0; c < 1; c++)
651
{
652
float l = (*pXl)[c], h = (*pXh)[c];
653
654
if (input_box.get_dim(c) < .0000125f)
655
{
656
l = input_box[0][c];
657
h = input_box[1][c];
658
}
659
660
(*pXl)[c] = l;
661
(*pXh)[c] = h;
662
}
663
664
pXl->clamp(0.0f, MAX_QLOG16_VAL);
665
pXh->clamp(0.0f, MAX_QLOG16_VAL);
666
667
return true;
668
}
669
670
static bool compute_weighted_least_squares_endpoints_rgb(
671
uint32_t N,
672
const uint8_t* pSelectors, const vec4F* pSelector_weights, const float* pRaw_weights, /* ti */
673
const float* pEmphasis_weights /* wi */,
674
vec3F* pXl, vec3F* pXh,
675
const vec4F* pColors, /* pi */
676
const aabb3F& input_box)
677
{
678
(void)input_box;
679
680
assert(N);
681
assert((pSelectors && pSelector_weights) || pRaw_weights);
682
assert(pEmphasis_weights);
683
684
// Pi = pixel colors
685
// Ti = project weights, [0,1]
686
// Wi = emphasis weights
687
688
float total_wi = 0.0f;
689
for (uint32_t i = 0; i < N; i++)
690
total_wi += pEmphasis_weights[i];
691
692
if (total_wi == 0.0f)
693
return false;
694
695
float weighted_mean_tw = 0.0f;
696
float weighted_mean_pw[3] = { 0.0f };
697
698
for (uint32_t i = 0; i < N; i++)
699
{
700
const float wi = pEmphasis_weights[i];
701
const float ti = pSelectors ? pSelector_weights[pSelectors[i]][3] : pRaw_weights[i];
702
const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2];
703
704
weighted_mean_tw += wi * ti;
705
706
weighted_mean_pw[0] += wi * pi_r;
707
weighted_mean_pw[1] += wi * pi_g;
708
weighted_mean_pw[2] += wi * pi_b;
709
}
710
711
weighted_mean_tw /= total_wi;
712
713
weighted_mean_pw[0] /= total_wi;
714
weighted_mean_pw[1] /= total_wi;
715
weighted_mean_pw[2] /= total_wi;
716
717
float spt[3] = { 0.0f };
718
float stt = 0.0f;
719
720
for (uint32_t i = 0; i < N; i++)
721
{
722
const float wi = pEmphasis_weights[i];
723
const float ti = pSelectors ? pSelector_weights[pSelectors[i]][3] : pRaw_weights[i];
724
const float pi_r = pColors[i][0], pi_g = pColors[i][1], pi_b = pColors[i][2];
725
726
spt[0] += wi * (pi_r - weighted_mean_pw[0]) * (ti - weighted_mean_tw);
727
spt[1] += wi * (pi_g - weighted_mean_pw[1]) * (ti - weighted_mean_tw);
728
spt[2] += wi * (pi_b - weighted_mean_pw[2]) * (ti - weighted_mean_tw);
729
730
stt += wi * square(ti - weighted_mean_tw);
731
}
732
733
if (stt == 0.0f)
734
return false;
735
736
for (uint32_t i = 0; i < 3; i++)
737
{
738
float h = weighted_mean_pw[i] + (spt[i] / stt) * (1.0f - weighted_mean_tw);
739
float l = weighted_mean_pw[i] - (spt[i] / stt) * weighted_mean_tw;
740
741
(*pXh)[i] = h;
742
(*pXl)[i] = l;
743
}
744
745
pXl->clamp(0.0f, MAX_QLOG16_VAL);
746
pXh->clamp(0.0f, MAX_QLOG16_VAL);
747
748
return true;
749
}
750
751
static vec4F g_astc_ls_weights_ise[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS];
752
753
static uint8_t g_map_astc_to_linear_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][astc_index] -> linear index
754
static uint8_t g_map_linear_to_astc_order[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][MAX_SUPPORTED_WEIGHT_LEVELS]; // [ise_range][linear_index] -> astc_index
755
756
static void encode_astc_hdr_init()
757
{
758
// Precomputed weight constants used during least fit determination. For each entry: w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w
759
for (uint32_t range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; range++)
760
{
761
const uint32_t num_levels = g_ise_weight_lerps[range][0];
762
assert(num_levels == astc_helpers::get_ise_levels(range));
763
assert((num_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));
764
765
for (uint32_t i = 0; i < num_levels; i++)
766
{
767
float w = g_ise_weight_lerps[range][1 + i] * (1.0f / 64.0f);
768
769
g_astc_ls_weights_ise[range][i].set(w * w, (1.0f - w) * w, (1.0f - w) * (1.0f - w), w);
770
}
771
}
772
773
for (uint32_t ise_range = MIN_SUPPORTED_ISE_WEIGHT_INDEX; ise_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX; ise_range++)
774
{
775
const uint32_t num_levels = g_ise_weight_lerps[ise_range][0];
776
assert((num_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));
777
778
uint32_t s[MAX_SUPPORTED_WEIGHT_LEVELS];
779
for (uint32_t i = 0; i < num_levels; i++)
780
s[i] = (g_ise_weight_lerps[ise_range][1 + i] << 8) + i;
781
782
std::sort(s, s + num_levels);
783
784
for (uint32_t i = 0; i < num_levels; i++)
785
g_map_linear_to_astc_order[ise_range][i] = (uint8_t)(s[i] & 0xFF);
786
787
for (uint32_t i = 0; i < num_levels; i++)
788
g_map_astc_to_linear_order[ise_range][g_map_linear_to_astc_order[ise_range][i]] = (uint8_t)i;
789
}
790
791
//init_quantize_tables();
792
}
793
794
bool g_astc_hdr_enc_initialized;
795
796
void astc_hdr_enc_init()
797
{
798
if (g_astc_hdr_enc_initialized)
799
return;
800
801
astc_hdr_core_init();
802
803
astc_helpers::init_tables(true);
804
805
init_qlog_tables();
806
807
encode_astc_hdr_init();
808
809
g_astc_hdr_enc_initialized = true;
810
}
811
812
void interpolate_qlog12_colors(
813
const int e[2][3],
814
half_float* pDecoded_half,
815
vec3F* pDecoded_float,
816
uint32_t n, uint32_t ise_weight_range)
817
{
818
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
819
820
for (uint32_t i = 0; i < 2; i++)
821
{
822
for (uint32_t j = 0; j < 3; j++)
823
{
824
assert(in_range(e[i][j], 0, 0xFFF));
825
}
826
}
827
828
for (uint32_t i = 0; i < n; i++)
829
{
830
const int c = g_ise_weight_lerps[ise_weight_range][1 + i];
831
assert(c == (int)astc_helpers::dequant_bise_weight(i, ise_weight_range));
832
833
half_float rf, gf, bf;
834
835
{
836
uint32_t r0 = e[0][0] << 4;
837
uint32_t r1 = e[1][0] << 4;
838
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
839
rf = astc_helpers::qlog16_to_half(ri);
840
}
841
842
{
843
uint32_t g0 = e[0][1] << 4;
844
uint32_t g1 = e[1][1] << 4;
845
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
846
gf = astc_helpers::qlog16_to_half(gi);
847
}
848
849
{
850
uint32_t b0 = e[0][2] << 4;
851
uint32_t b1 = e[1][2] << 4;
852
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
853
bf = astc_helpers::qlog16_to_half(bi);
854
}
855
856
if (pDecoded_half)
857
{
858
pDecoded_half[i * 3 + 0] = rf;
859
pDecoded_half[i * 3 + 1] = gf;
860
pDecoded_half[i * 3 + 2] = bf;
861
}
862
863
if (pDecoded_float)
864
{
865
pDecoded_float[i][0] = half_to_float(rf);
866
pDecoded_float[i][1] = half_to_float(gf);
867
pDecoded_float[i][2] = half_to_float(bf);
868
}
869
}
870
}
871
872
// decoded in ASTC order, not linear order
873
// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded
874
bool get_astc_hdr_mode_11_block_colors(
875
const uint8_t* pEndpoints,
876
half_float* pDecoded_half,
877
vec3F* pDecoded_float,
878
uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)
879
{
880
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
881
882
int e[2][3];
883
if (!decode_mode11_to_qlog12(pEndpoints, e, ise_endpoint_range))
884
return false;
885
886
interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);
887
888
return true;
889
}
890
891
// decoded in ASTC order, not linear order
892
// return false if the ISE endpoint quantization leads to non-valid endpoints being decoded
893
bool get_astc_hdr_mode_7_block_colors(
894
const uint8_t* pEndpoints,
895
half_float* pDecoded_half,
896
vec3F* pDecoded_float,
897
uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range)
898
{
899
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
900
901
int e[2][3];
902
if (!decode_mode7_to_qlog12(pEndpoints, e, nullptr, ise_endpoint_range))
903
return false;
904
905
interpolate_qlog12_colors(e, pDecoded_half, pDecoded_float, n, ise_weight_range);
906
907
return true;
908
}
909
910
double eval_selectors_f(
911
uint32_t num_pixels,
912
uint8_t* pWeights,
913
const half_float* pBlock_pixels_half,
914
uint32_t num_weight_levels,
915
const half_float* pDecoded_half,
916
const astc_hdr_codec_base_options& coptions,
917
uint32_t usable_selector_bitmask)
918
{
919
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
920
assert(usable_selector_bitmask);
921
922
const float R_WEIGHT = coptions.m_r_err_scale;
923
const float G_WEIGHT = coptions.m_g_err_scale;
924
925
double total_error = 0;
926
927
#ifdef _DEBUG
928
for (uint32_t i = 0; i < num_weight_levels; i++)
929
{
930
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));
931
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));
932
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));
933
}
934
#endif
935
936
double decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3];
937
938
for (uint32_t i = 0; i < num_weight_levels; i++)
939
{
940
const half_float* p = &pDecoded_half[i * 3];
941
942
decoded_half_q[i][0] = q(p[0], coptions.m_q_log_bias);
943
decoded_half_q[i][1] = q(p[1], coptions.m_q_log_bias);
944
decoded_half_q[i][2] = q(p[2], coptions.m_q_log_bias);
945
}
946
947
for (uint32_t p = 0; p < num_pixels; p++)
948
{
949
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
950
951
const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias);
952
const double desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias);
953
const double desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);
954
955
double lowest_e = BIG_FLOAT_VAL;
956
957
//double dists[MAX_SUPPORTED_WEIGHT_LEVELS];
958
959
// this is an approximation of MSLE
960
for (uint32_t i = 0; i < num_weight_levels; i++)
961
{
962
if (((1 << i) & usable_selector_bitmask) == 0)
963
continue;
964
965
// compute piecewise linear approximation of log2(a+eps)-log2(b+eps), for each component, then MSLE
966
double rd = decoded_half_q[i][0] - desired_half_r_q;
967
double gd = decoded_half_q[i][1] - desired_half_g_q;
968
double bd = decoded_half_q[i][2] - desired_half_b_q;
969
970
double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
971
972
//dists[i] = e;
973
974
if (e < lowest_e)
975
{
976
lowest_e = e;
977
pWeights[p] = (uint8_t)i;
978
}
979
}
980
981
total_error += lowest_e;
982
983
} // p
984
985
return total_error;
986
}
987
988
double eval_selectors(
989
uint32_t num_pixels,
990
uint8_t* pWeights,
991
uint32_t ise_weight_range,
992
const half_float* pBlock_pixels_half,
993
uint32_t num_weight_levels,
994
const half_float* pDecoded_half,
995
const astc_hdr_codec_base_options& coptions,
996
uint32_t usable_selector_bitmask)
997
{
998
if ((coptions.m_r_err_scale != 2.0f) || (coptions.m_g_err_scale != 3.0f))
999
{
1000
return eval_selectors_f(
1001
num_pixels,
1002
pWeights,
1003
pBlock_pixels_half,
1004
num_weight_levels,
1005
pDecoded_half,
1006
coptions,
1007
usable_selector_bitmask);
1008
}
1009
1010
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
1011
assert(usable_selector_bitmask);
1012
1013
uint64_t total_error = 0;
1014
1015
#ifdef _DEBUG
1016
for (uint32_t i = 0; i < num_weight_levels; i++)
1017
{
1018
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));
1019
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));
1020
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));
1021
}
1022
#endif
1023
1024
int64_t decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3];
1025
1026
for (uint32_t i = 0; i < num_weight_levels; i++)
1027
{
1028
const half_float* p = &pDecoded_half[i * 3];
1029
1030
decoded_half_q[i][0] = q2(p[0], coptions.m_q_log_bias);
1031
decoded_half_q[i][1] = q2(p[1], coptions.m_q_log_bias);
1032
decoded_half_q[i][2] = q2(p[2], coptions.m_q_log_bias);
1033
}
1034
1035
if (usable_selector_bitmask != UINT32_MAX)
1036
{
1037
for (uint32_t p = 0; p < num_pixels; p++)
1038
{
1039
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
1040
1041
const int64_t desired_half_r_q = q2(pDesired_half[0], coptions.m_q_log_bias);
1042
const int64_t desired_half_g_q = q2(pDesired_half[1], coptions.m_q_log_bias);
1043
const int64_t desired_half_b_q = q2(pDesired_half[2], coptions.m_q_log_bias);
1044
1045
int64_t lowest_e = INT64_MAX;
1046
1047
for (uint32_t i = 0; i < num_weight_levels; i++)
1048
{
1049
if (((1 << i) & usable_selector_bitmask) == 0)
1050
continue;
1051
1052
int64_t rd = decoded_half_q[i][0] - desired_half_r_q;
1053
int64_t gd = decoded_half_q[i][1] - desired_half_g_q;
1054
int64_t bd = decoded_half_q[i][2] - desired_half_b_q;
1055
1056
int64_t e = 2 * (rd * rd) + 3 * (gd * gd) + bd * bd;
1057
1058
if (e < lowest_e)
1059
{
1060
lowest_e = e;
1061
pWeights[p] = (uint8_t)i;
1062
}
1063
}
1064
1065
total_error += lowest_e;
1066
1067
} // p
1068
}
1069
else
1070
{
1071
if ((num_weight_levels <= 4) || (coptions.m_disable_weight_plane_optimization))
1072
{
1073
for (uint32_t p = 0; p < num_pixels; p++)
1074
{
1075
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
1076
1077
const half_float desired_r = pDesired_half[0], desired_g = pDesired_half[1], desired_b = pDesired_half[2];
1078
1079
const int64_t desired_half_r_q = q2(desired_r, coptions.m_q_log_bias);
1080
const int64_t desired_half_g_q = q2(desired_g, coptions.m_q_log_bias);
1081
const int64_t desired_half_b_q = q2(desired_b, coptions.m_q_log_bias);
1082
1083
int64_t lowest_e = INT64_MAX;
1084
1085
uint32_t i;
1086
for (i = 0; (i + 1) < num_weight_levels; i += 2)
1087
{
1088
int64_t e0, e1;
1089
1090
{
1091
int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; // 27 bits maximum with half float inputs
1092
int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;
1093
int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;
1094
e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; // max 62 bits (27*2+3+5)
1095
}
1096
1097
{
1098
int64_t rd1 = decoded_half_q[i + 1][0] - desired_half_r_q;
1099
int64_t gd1 = decoded_half_q[i + 1][1] - desired_half_g_q;
1100
int64_t bd1 = decoded_half_q[i + 1][2] - desired_half_b_q;
1101
e1 = ((2 * (rd1 * rd1) + 3 * (gd1 * gd1) + bd1 * bd1) << 5) | (i + 1);
1102
}
1103
1104
lowest_e = minimum(lowest_e, e0, e1);
1105
}
1106
1107
if (i != num_weight_levels)
1108
{
1109
int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q;
1110
int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;
1111
int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;
1112
int64_t e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i;
1113
1114
lowest_e = minimum(lowest_e, e0);
1115
}
1116
1117
pWeights[p] = (uint8_t)(lowest_e & 31);
1118
1119
total_error += (lowest_e >> 5);
1120
1121
} // p
1122
}
1123
else
1124
{
1125
const auto& weight_val_to_ise_tab = astc_helpers::g_dequant_tables.get_weight_tab(ise_weight_range).m_val_to_ise;
1126
const int lo_index = weight_val_to_ise_tab[0], hi_index = weight_val_to_ise_tab[64], mid_index = weight_val_to_ise_tab[32];
1127
1128
const vec3F low_color((float)pDecoded_half[lo_index * 3 + 0], (float)pDecoded_half[lo_index * 3 + 1], (float)pDecoded_half[lo_index * 3 + 2]);
1129
const vec3F high_color((float)pDecoded_half[hi_index * 3 + 0], (float)pDecoded_half[hi_index * 3 + 1], (float)pDecoded_half[hi_index * 3 + 2]);
1130
const vec3F mid_color((float)pDecoded_half[mid_index * 3 + 0], (float)pDecoded_half[mid_index * 3 + 1], (float)pDecoded_half[mid_index * 3 + 2]);
1131
1132
const vec3F block_dir(high_color - low_color);
1133
1134
for (uint32_t p = 0; p < num_pixels; p++)
1135
{
1136
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
1137
1138
const half_float desired_r = pDesired_half[0], desired_g = pDesired_half[1], desired_b = pDesired_half[2];
1139
1140
const int64_t desired_half_r_q = q2(desired_r, coptions.m_q_log_bias);
1141
const int64_t desired_half_g_q = q2(desired_g, coptions.m_q_log_bias);
1142
const int64_t desired_half_b_q = q2(desired_b, coptions.m_q_log_bias);
1143
1144
// Determine which side of the middle plane the point is for a modest gain
1145
vec3F c((float)desired_r - mid_color[0], (float)desired_g - mid_color[1], (float)desired_b - mid_color[2]);
1146
float d = c.dot(block_dir);
1147
1148
int i = 0, high_index = (num_weight_levels / 2) + 1;
1149
if (d >= 0.0f)
1150
{
1151
i = num_weight_levels / 2;
1152
high_index = num_weight_levels;
1153
}
1154
1155
int64_t lowest_e = INT64_MAX;
1156
1157
for (; (i + 1) < high_index; i += 2)
1158
{
1159
int64_t e0, e1;
1160
1161
{
1162
int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q; // 27 bits maximum with half float inputs
1163
int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;
1164
int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;
1165
e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i; // max 62 bits (27*2+3+5)
1166
}
1167
1168
{
1169
int64_t rd1 = decoded_half_q[i + 1][0] - desired_half_r_q;
1170
int64_t gd1 = decoded_half_q[i + 1][1] - desired_half_g_q;
1171
int64_t bd1 = decoded_half_q[i + 1][2] - desired_half_b_q;
1172
e1 = ((2 * (rd1 * rd1) + 3 * (gd1 * gd1) + bd1 * bd1) << 5) | (i + 1);
1173
}
1174
1175
lowest_e = minimum(lowest_e, e0, e1);
1176
}
1177
1178
if (i != high_index)
1179
{
1180
int64_t rd0 = decoded_half_q[i][0] - desired_half_r_q;
1181
int64_t gd0 = decoded_half_q[i][1] - desired_half_g_q;
1182
int64_t bd0 = decoded_half_q[i][2] - desired_half_b_q;
1183
int64_t e0 = ((2 * (rd0 * rd0) + 3 * (gd0 * gd0) + bd0 * bd0) << 5) | i;
1184
1185
lowest_e = minimum(lowest_e, e0);
1186
}
1187
1188
pWeights[p] = (uint8_t)(lowest_e & 31);
1189
1190
total_error += (lowest_e >> 5);
1191
1192
} // p
1193
}
1194
}
1195
1196
return (double)total_error;
1197
}
1198
1199
//--------------------------------------------------------------------------------------------------------------------------
1200
1201
double eval_selectors_dual_plane(
1202
uint32_t channel_index,
1203
uint32_t num_pixels,
1204
uint8_t* pWeights0, uint8_t* pWeights1,
1205
const half_float* pBlock_pixels_half,
1206
uint32_t num_weight_levels,
1207
const half_float* pDecoded_half,
1208
const astc_hdr_codec_base_options& coptions,
1209
uint32_t usable_selector_bitmask)
1210
{
1211
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
1212
assert(usable_selector_bitmask);
1213
1214
const float R_WEIGHT = coptions.m_r_err_scale;
1215
const float G_WEIGHT = coptions.m_g_err_scale;
1216
1217
double total_error = 0;
1218
1219
#ifdef _DEBUG
1220
for (uint32_t i = 0; i < num_weight_levels; i++)
1221
{
1222
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 0]));
1223
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 1]));
1224
assert(!is_half_inf_or_nan(pDecoded_half[i * 3 + 2]));
1225
}
1226
#endif
1227
1228
double decoded_half_q[MAX_SUPPORTED_WEIGHT_LEVELS][3];
1229
1230
for (uint32_t i = 0; i < num_weight_levels; i++)
1231
{
1232
const half_float* p = &pDecoded_half[i * 3];
1233
1234
decoded_half_q[i][0] = q(p[0], coptions.m_q_log_bias);
1235
decoded_half_q[i][1] = q(p[1], coptions.m_q_log_bias);
1236
decoded_half_q[i][2] = q(p[2], coptions.m_q_log_bias);
1237
}
1238
1239
const double channel_weights[3] = { R_WEIGHT, G_WEIGHT, 1.0f };
1240
1241
const uint32_t first_channel = (channel_index + 1) % 3;
1242
const uint32_t second_channel = (channel_index + 2) % 3;
1243
1244
// First plane
1245
const double first_channel_weight = channel_weights[first_channel];
1246
const double second_channel_weight = channel_weights[second_channel];
1247
1248
for (uint32_t p = 0; p < num_pixels; p++)
1249
{
1250
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
1251
1252
const double desired_half_x_q = q(pDesired_half[first_channel], coptions.m_q_log_bias);
1253
const double desired_half_y_q = q(pDesired_half[second_channel], coptions.m_q_log_bias);
1254
1255
double lowest_e = BIG_FLOAT_VAL;
1256
1257
// this is an approximation of MSLE
1258
for (uint32_t i = 0; i < num_weight_levels; i++)
1259
{
1260
if (((1 << i) & usable_selector_bitmask) == 0)
1261
continue;
1262
1263
double xd = decoded_half_q[i][first_channel] - desired_half_x_q;
1264
double yd = decoded_half_q[i][second_channel] - desired_half_y_q;
1265
1266
double e = first_channel_weight * (xd * xd) + second_channel_weight * (yd * yd);
1267
1268
if (e < lowest_e)
1269
{
1270
lowest_e = e;
1271
pWeights0[p] = (uint8_t)i;
1272
}
1273
}
1274
1275
total_error += lowest_e;
1276
1277
} // p
1278
1279
// Second plane
1280
const double alt_channel_weight = channel_weights[channel_index];
1281
1282
for (uint32_t p = 0; p < num_pixels; p++)
1283
{
1284
const half_float* pDesired_half = &pBlock_pixels_half[p * 3];
1285
1286
const double desired_half_a_q = q(pDesired_half[channel_index], coptions.m_q_log_bias);
1287
1288
double lowest_e = BIG_FLOAT_VAL;
1289
1290
// this is an approximation of MSLE
1291
for (uint32_t i = 0; i < num_weight_levels; i++)
1292
{
1293
if (((1 << i) & usable_selector_bitmask) == 0)
1294
continue;
1295
1296
double ad = decoded_half_q[i][channel_index] - desired_half_a_q;
1297
1298
double e = alt_channel_weight * (ad * ad);
1299
1300
if (e < lowest_e)
1301
{
1302
lowest_e = e;
1303
pWeights1[p] = (uint8_t)i;
1304
}
1305
}
1306
1307
total_error += lowest_e;
1308
1309
} // p
1310
1311
return total_error;
1312
}
1313
1314
//--------------------------------------------------------------------------------------------------------------------------
1315
1316
double compute_block_error(uint32_t num_pixels, const half_float* pOrig_block, const half_float* pPacked_block, const astc_hdr_codec_base_options& coptions)
1317
{
1318
const float R_WEIGHT = coptions.m_r_err_scale;
1319
const float G_WEIGHT = coptions.m_g_err_scale;
1320
1321
double total_error = 0;
1322
1323
for (uint32_t p = 0; p < num_pixels; p++)
1324
{
1325
double rd = q(pOrig_block[p * 3 + 0], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 0], coptions.m_q_log_bias);
1326
double gd = q(pOrig_block[p * 3 + 1], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 1], coptions.m_q_log_bias);
1327
double bd = q(pOrig_block[p * 3 + 2], coptions.m_q_log_bias) - q(pPacked_block[p * 3 + 2], coptions.m_q_log_bias);
1328
1329
double e = R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
1330
1331
total_error += e;
1332
}
1333
1334
return total_error;
1335
}
1336
1337
//--------------------------------------------------------------------------------------------------------------------------
1338
1339
double compute_block_error_from_raw_weights(
1340
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3],
1341
const uint8_t* pRaw_weights,
1342
int endpoints_qlog12[2][3],
1343
const astc_hdr_codec_base_options& coptions)
1344
{
1345
// qlog12->qlog16
1346
int trial_e[2][3];
1347
for (uint32_t i = 0; i < 3; i++)
1348
{
1349
assert(endpoints_qlog12[0][i] <= (int)basist::MAX_QLOG12);
1350
assert(endpoints_qlog12[1][i] <= (int)basist::MAX_QLOG12);
1351
1352
trial_e[0][i] = endpoints_qlog12[0][i] << 4;
1353
trial_e[1][i] = endpoints_qlog12[1][i] << 4;
1354
}
1355
1356
const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale;
1357
1358
double trial_error = 0;
1359
for (uint32_t p = 0; p < num_pixels; p++)
1360
{
1361
const half_float* pDesired_half = &pBlock_pixels_half[p][0];
1362
1363
const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);
1364
1365
const uint32_t c = pRaw_weights[p];
1366
assert(c <= 64);
1367
1368
{
1369
half_float rf, gf, bf;
1370
{
1371
uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0];
1372
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
1373
rf = astc_helpers::qlog16_to_half(ri);
1374
}
1375
{
1376
uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1];
1377
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
1378
gf = astc_helpers::qlog16_to_half(gi);
1379
}
1380
{
1381
uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2];
1382
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
1383
bf = astc_helpers::qlog16_to_half(bi);
1384
}
1385
1386
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
1387
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
1388
trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
1389
}
1390
}
1391
1392
return trial_error;
1393
}
1394
1395
//--------------------------------------------------------------------------------------------------------------------------
1396
1397
static inline int compute_clamped_val(int v, int l, int h, bool& did_clamp, int& max_clamp_mag)
1398
{
1399
assert(l < h);
1400
1401
if (v < l)
1402
{
1403
max_clamp_mag = basisu::maximum<int>(max_clamp_mag, l - v);
1404
1405
v = l;
1406
did_clamp = true;
1407
}
1408
else if (v > h)
1409
{
1410
max_clamp_mag = basisu::maximum<int>(max_clamp_mag, v - h);
1411
1412
v = h;
1413
did_clamp = true;
1414
}
1415
1416
return v;
1417
}
1418
1419
//--------------------------------------------------------------------------------------------------------------------------
1420
1421
const uint8_t s_b_bits[8] = { 7, 8, 6, 7, 8, 6, 7, 6 };
1422
const uint8_t s_c_bits[8] = { 6, 6, 7, 7, 6, 7, 7, 7 };
1423
const uint8_t s_d_bits[8] = { 7, 6, 7, 6, 5, 6, 5, 6 };
1424
1425
// val_q[] must be already packed to qlog9-qlog12.
1426
bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped, int max_clamp_mag_accept_thresh)
1427
{
1428
assert(submode <= 7);
1429
1430
const uint32_t a_bits = 9 + (submode >> 1);
1431
const uint32_t b_bits = s_b_bits[submode];
1432
const uint32_t c_bits = s_c_bits[submode];
1433
const uint32_t d_bits = s_d_bits[submode];
1434
1435
const int max_a_val = (1 << a_bits) - 1;
1436
const int max_b_val = (1 << b_bits) - 1;
1437
const int max_c_val = (1 << c_bits) - 1;
1438
1439
// The maximum usable value before it turns to NaN/Inf
1440
const int max_a_qlog = get_max_qlog(a_bits);
1441
BASISU_NOTE_UNUSED(max_a_qlog);
1442
1443
const int min_d_val = -(1 << (d_bits - 1));
1444
const int max_d_val = -min_d_val - 1;
1445
assert((max_d_val - min_d_val + 1) == (1 << d_bits));
1446
1447
int highest_q = -1, highest_val = 0, highest_comp = 0;
1448
1449
for (uint32_t c = 0; c < 3; c++)
1450
{
1451
assert(val_q[0][c] <= max_a_qlog);
1452
assert(val_q[1][c] <= max_a_qlog);
1453
}
1454
1455
for (uint32_t v = 0; v < 2; v++)
1456
{
1457
for (uint32_t c = 0; c < 3; c++)
1458
{
1459
assert(val_q[v][c] >= 0 && val_q[v][c] <= max_a_val);
1460
1461
if (val_q[v][c] > highest_q)
1462
{
1463
highest_q = val_q[v][c];
1464
highest_val = v;
1465
highest_comp = c;
1466
}
1467
}
1468
}
1469
1470
const bool had_tie = (val_q[highest_val ^ 1][highest_comp] == highest_q);
1471
1472
if (highest_val != 1)
1473
{
1474
for (uint32_t c = 0; c < 3; c++)
1475
{
1476
std::swap(val_q[0][c], val_q[1][c]);
1477
}
1478
}
1479
1480
if (highest_comp)
1481
{
1482
std::swap(val_q[0][0], val_q[0][highest_comp]);
1483
std::swap(val_q[1][0], val_q[1][highest_comp]);
1484
}
1485
1486
int orig_q[2][3];
1487
memcpy(orig_q, val_q, sizeof(int) * 6);
1488
1489
// val[1][0] is now guaranteed to be highest
1490
int best_va = 0, best_vb0 = 0, best_vb1 = 0, best_vc = 0, best_vd0 = 0, best_vd1 = 0;
1491
int best_max_clamp_mag = 0;
1492
bool best_did_clamp = false;
1493
int best_q[2][3] = { { 0, 0, 0}, { 0, 0, 0 } };
1494
BASISU_NOTE_UNUSED(best_q);
1495
uint32_t best_dist = UINT_MAX;
1496
1497
for (uint32_t pass = 0; pass < 2; pass++)
1498
{
1499
int trial_va = val_q[1][0];
1500
1501
assert(trial_va <= max_a_val);
1502
assert(trial_va >= val_q[1][1]);
1503
assert(trial_va >= val_q[1][2]);
1504
1505
assert(trial_va >= val_q[0][0]);
1506
assert(trial_va >= val_q[0][1]);
1507
assert(trial_va >= val_q[0][2]);
1508
1509
bool did_clamp = false;
1510
int trial_max_clamp_mag = 0;
1511
1512
int trial_vb0 = compute_clamped_val(trial_va - val_q[1][1], 0, max_b_val, did_clamp, trial_max_clamp_mag);
1513
int trial_vb1 = compute_clamped_val(trial_va - val_q[1][2], 0, max_b_val, did_clamp, trial_max_clamp_mag);
1514
int trial_vc = compute_clamped_val(trial_va - val_q[0][0], 0, max_c_val, did_clamp, trial_max_clamp_mag);
1515
int trial_vd0 = compute_clamped_val((trial_va - trial_vb0 - trial_vc) - val_q[0][1], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);
1516
int trial_vd1 = compute_clamped_val((trial_va - trial_vb1 - trial_vc) - val_q[0][2], min_d_val, max_d_val, did_clamp, trial_max_clamp_mag);
1517
1518
if ((early_out_if_clamped) && (did_clamp) && (trial_max_clamp_mag > max_clamp_mag_accept_thresh))
1519
{
1520
if ((!had_tie) || (pass == 1))
1521
{
1522
max_clamp_mag = trial_max_clamp_mag;
1523
return true;
1524
}
1525
}
1526
1527
if (!did_clamp)
1528
{
1529
// Make sure decoder gets the expected values
1530
assert(trial_va == val_q[1][0]);
1531
assert(trial_va - trial_vb0 == val_q[1][1]);
1532
assert(trial_va - trial_vb1 == val_q[1][2]);
1533
1534
assert((trial_va - trial_vc) == val_q[0][0]);
1535
assert((trial_va - trial_vb0 - trial_vc - trial_vd0) == val_q[0][1]);
1536
assert((trial_va - trial_vb1 - trial_vc - trial_vd1) == val_q[0][2]);
1537
}
1538
1539
const int r_e0 = clamp<int>(trial_va, 0, max_a_val);
1540
const int r_e1 = clamp<int>(trial_va - trial_vb0, 0, max_a_val);
1541
const int r_e2 = clamp<int>(trial_va - trial_vb1, 0, max_a_val);
1542
1543
const int r_f0 = clamp<int>(trial_va - trial_vc, 0, max_a_val);
1544
const int r_f1 = clamp<int>(trial_va - trial_vb0 - trial_vc - trial_vd0, 0, max_a_val);
1545
const int r_f2 = clamp<int>(trial_va - trial_vb1 - trial_vc - trial_vd1, 0, max_a_val);
1546
1547
assert(r_e0 <= max_a_qlog);
1548
assert(r_e1 <= max_a_qlog);
1549
assert(r_e2 <= max_a_qlog);
1550
1551
assert(r_f0 <= max_a_qlog);
1552
assert(r_f1 <= max_a_qlog);
1553
assert(r_f2 <= max_a_qlog);
1554
1555
if ((!did_clamp) || (!had_tie))
1556
{
1557
best_va = trial_va;
1558
best_vb0 = trial_vb0;
1559
best_vb1 = trial_vb1;
1560
best_vc = trial_vc;
1561
best_vd0 = trial_vd0;
1562
best_vd1 = trial_vd1;
1563
best_max_clamp_mag = trial_max_clamp_mag;
1564
best_did_clamp = did_clamp;
1565
1566
best_q[1][0] = r_e0;
1567
best_q[1][1] = r_e1;
1568
best_q[1][2] = r_e2;
1569
best_q[0][0] = r_f0;
1570
best_q[0][1] = r_f1;
1571
best_q[0][2] = r_f2;
1572
break;
1573
}
1574
1575
// we had a tie and it did clamp, try swapping L/H for a potential slight gain
1576
1577
const uint32_t r_dist1 = basisu::square<int>(r_e0 - val_q[1][0]) + basisu::square<int>(r_e1 - val_q[1][1]) + basisu::square<int>(r_e2 - val_q[1][2]);
1578
const uint32_t r_dist0 = basisu::square<int>(r_f0 - val_q[0][0]) + basisu::square<int>(r_f1 - val_q[0][1]) + basisu::square<int>(r_f2 - val_q[0][2]);
1579
1580
const uint32_t total_dist = r_dist1 + r_dist0;
1581
1582
if (total_dist < best_dist)
1583
{
1584
best_dist = total_dist;
1585
1586
best_va = trial_va;
1587
best_vb0 = trial_vb0;
1588
best_vb1 = trial_vb1;
1589
best_vc = trial_vc;
1590
best_vd0 = trial_vd0;
1591
best_vd1 = trial_vd1;
1592
best_did_clamp = did_clamp;
1593
1594
best_q[1][0] = r_e0;
1595
best_q[1][1] = r_e1;
1596
best_q[1][2] = r_e2;
1597
best_q[0][0] = r_f0;
1598
best_q[0][1] = r_f1;
1599
best_q[0][2] = r_f2;
1600
}
1601
1602
for (uint32_t c = 0; c < 3; c++)
1603
std::swap(val_q[0][c], val_q[1][c]);
1604
}
1605
1606
// pack bits now
1607
int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0;
1608
1609
int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0;
1610
switch (submode)
1611
{
1612
case 0:
1613
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
1614
break;
1615
case 1:
1616
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
1617
break;
1618
case 2:
1619
x0 = get_bit(best_va, 9); x1 = get_bit(best_vc, 6); x2 = get_bit(best_vd0, 6); x3 = get_bit(best_vd1, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
1620
break;
1621
case 3:
1622
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 9); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
1623
break;
1624
case 4:
1625
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_vb0, 7); x3 = get_bit(best_vb1, 7); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);
1626
break;
1627
case 5:
1628
x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_vc, 7); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
1629
break;
1630
case 6:
1631
x0 = get_bit(best_vb0, 6); x1 = get_bit(best_vb1, 6); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_va, 9); x5 = get_bit(best_va, 10);
1632
break;
1633
case 7:
1634
x0 = get_bit(best_va, 9); x1 = get_bit(best_va, 10); x2 = get_bit(best_va, 11); x3 = get_bit(best_vc, 6); x4 = get_bit(best_vd0, 5); x5 = get_bit(best_vd1, 5);
1635
break;
1636
default:
1637
break;
1638
}
1639
1640
// write mode
1641
pack_bit(v1, 7, submode, 0);
1642
pack_bit(v2, 7, submode, 1);
1643
pack_bit(v3, 7, submode, 2);
1644
1645
// highest component
1646
pack_bit(v4, 7, highest_comp, 0);
1647
pack_bit(v5, 7, highest_comp, 1);
1648
1649
// write bit 8 of va
1650
pack_bit(v1, 6, best_va, 8);
1651
1652
// extra bits
1653
pack_bit(v2, 6, x0);
1654
pack_bit(v3, 6, x1);
1655
pack_bit(v4, 6, x2);
1656
pack_bit(v5, 6, x3);
1657
pack_bit(v4, 5, x4);
1658
pack_bit(v5, 5, x5);
1659
1660
v0 = best_va & 0xFF;
1661
v1 |= (best_vc & 63);
1662
v2 |= (best_vb0 & 63);
1663
v3 |= (best_vb1 & 63);
1664
v4 |= (best_vd0 & 31);
1665
v5 |= (best_vd1 & 31);
1666
1667
assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255));
1668
1669
pEndpoints[0] = (uint8_t)v0;
1670
pEndpoints[1] = (uint8_t)v1;
1671
pEndpoints[2] = (uint8_t)v2;
1672
pEndpoints[3] = (uint8_t)v3;
1673
pEndpoints[4] = (uint8_t)v4;
1674
pEndpoints[5] = (uint8_t)v5;
1675
1676
#ifdef _DEBUG
1677
// Test for valid pack by unpacking
1678
{
1679
if (highest_comp)
1680
{
1681
std::swap(best_q[0][0], best_q[0][highest_comp]);
1682
std::swap(best_q[1][0], best_q[1][highest_comp]);
1683
1684
std::swap(orig_q[0][0], orig_q[0][highest_comp]);
1685
std::swap(orig_q[1][0], orig_q[1][highest_comp]);
1686
}
1687
1688
int test_e[2][3];
1689
decode_mode11_to_qlog12(pEndpoints, test_e, astc_helpers::BISE_256_LEVELS);
1690
for (uint32_t i = 0; i < 2; i++)
1691
{
1692
for (uint32_t j = 0; j < 3; j++)
1693
{
1694
assert(best_q[i][j] == test_e[i][j] >> (12 - a_bits));
1695
1696
if (!best_did_clamp)
1697
{
1698
assert((orig_q[i][j] == test_e[i][j] >> (12 - a_bits)) ||
1699
(orig_q[1 - i][j] == test_e[i][j] >> (12 - a_bits)));
1700
}
1701
}
1702
}
1703
}
1704
#endif
1705
1706
max_clamp_mag = best_max_clamp_mag;
1707
1708
return best_did_clamp;
1709
}
1710
1711
bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped, int max_clamp_mag_accept_thresh)
1712
{
1713
assert(submode <= 7);
1714
1715
const uint32_t a_bits = 9 + (submode >> 1);
1716
const int max_a_val = (1 << a_bits) - 1;
1717
1718
// The maximum usable value before it turns to NaN/Inf
1719
const int max_a_qlog = get_max_qlog(a_bits);
1720
1721
int val_q[2][3];
1722
1723
for (uint32_t c = 0; c < 3; c++)
1724
{
1725
#if 0
1726
// This is very slightly better, but ~8% slower likely due to the table lookups.
1727
const half_float l = astc_helpers::qlog16_to_half((uint32_t)std::round(low_q16[c]));
1728
val_q[0][c] = half_to_qlog7_12(l, a_bits);
1729
1730
const half_float h = astc_helpers::qlog16_to_half((uint32_t)std::round(high_q16[c]));
1731
val_q[1][c] = half_to_qlog7_12(h, a_bits);
1732
#else
1733
// TODO: Tune quant_qlog16() for higher precision.
1734
val_q[0][c] = quant_qlog16((uint32_t)std::round(low_q16[c]), a_bits);
1735
val_q[1][c] = quant_qlog16((uint32_t)std::round(high_q16[c]), a_bits);
1736
#endif
1737
1738
#if 1
1739
if (val_q[0][c] == val_q[1][c])
1740
{
1741
#if 0
1742
if (l <= h)
1743
#else
1744
if (low_q16[c] < high_q16[c])
1745
#endif
1746
{
1747
if (val_q[0][c])
1748
val_q[0][c]--;
1749
1750
if (val_q[1][c] != max_a_val)
1751
val_q[1][c]++;
1752
}
1753
else
1754
{
1755
if (val_q[0][c] != max_a_val)
1756
val_q[0][c]++;
1757
1758
if (val_q[1][c])
1759
val_q[1][c]--;
1760
}
1761
}
1762
#endif
1763
1764
val_q[0][c] = minimum<uint32_t>(val_q[0][c], max_a_qlog);
1765
val_q[1][c] = minimum<uint32_t>(val_q[1][c], max_a_qlog);
1766
}
1767
1768
return pack_astc_mode11_submode(submode, pEndpoints, val_q, max_clamp_mag, early_out_if_clamped, max_clamp_mag_accept_thresh);
1769
}
1770
1771
//--------------------------------------------------------------------------------------------------------------------------
1772
1773
void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16)
1774
{
1775
float lg = l_q16.dot(vec3F(1.0f)), hg = h_q16.dot(vec3F(1.0f));
1776
if (lg > hg)
1777
{
1778
// Ensure low endpoint is generally less bright than high in direct mode.
1779
std::swap(l_q16, h_q16);
1780
}
1781
1782
for (uint32_t i = 0; i < 3; i++)
1783
{
1784
// TODO: This goes from QLOG16->HALF->QLOG8/7
1785
half_float l_half = astc_helpers::qlog16_to_half(clamp((int)std::round(l_q16[i]), 0, 65535));
1786
half_float h_half = astc_helpers::qlog16_to_half(clamp((int)std::round(h_q16[i]), 0, 65535));
1787
1788
int l_q, h_q;
1789
1790
if (i == 2)
1791
{
1792
l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)];
1793
h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)];
1794
1795
l_q = minimum<uint32_t>(l_q, MAX_QLOG7);
1796
h_q = minimum<uint32_t>(h_q, MAX_QLOG7);
1797
}
1798
else
1799
{
1800
l_q = g_half_to_qlog8[bounds_check((uint32_t)l_half, 0U, 32768U)];
1801
h_q = g_half_to_qlog8[bounds_check((uint32_t)h_half, 0U, 32768U)];
1802
1803
// this quantizes R and G as 7 bits vs. 8, for grayscale.
1804
//l_q = g_half_to_qlog7[bounds_check((uint32_t)l_half, 0U, 32768U)] << 1;
1805
//h_q = g_half_to_qlog7[bounds_check((uint32_t)h_half, 0U, 32768U)] << 1;
1806
1807
l_q = minimum<uint32_t>(l_q, MAX_QLOG8);
1808
h_q = minimum<uint32_t>(h_q, MAX_QLOG8);
1809
}
1810
1811
#if 1
1812
if (l_q == h_q)
1813
{
1814
const int m = (i == 2) ? MAX_QLOG7 : MAX_QLOG8;
1815
1816
if (l_q16[i] <= h_q16[i])
1817
{
1818
if (l_q)
1819
l_q--;
1820
1821
if (h_q != m)
1822
h_q++;
1823
}
1824
else
1825
{
1826
if (h_q)
1827
h_q--;
1828
1829
if (l_q != m)
1830
l_q++;
1831
}
1832
}
1833
#endif
1834
1835
if (i == 2)
1836
{
1837
assert(l_q <= (int)MAX_QLOG7 && h_q <= (int)MAX_QLOG7);
1838
l_q |= 128;
1839
h_q |= 128;
1840
}
1841
else
1842
{
1843
assert(l_q <= (int)MAX_QLOG8 && h_q <= (int)MAX_QLOG8);
1844
}
1845
1846
pEndpoints[2 * i + 0] = (uint8_t)l_q;
1847
pEndpoints[2 * i + 1] = (uint8_t)h_q;
1848
}
1849
}
1850
1851
//--------------------------------------------------------------------------------------------------------------------------
1852
1853
bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh)
1854
{
1855
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
1856
1857
assert(submode <= 5);
1858
max_clamp_mag = 0;
1859
1860
static const uint8_t s_r_bits[6] = { 11, 11, 10, 9, 8, 7 };
1861
static const uint8_t s_g_b_bits[6] = { 5, 6, 5, 6, 7, 7 };
1862
static const uint8_t s_s_bits[6] = { 7, 5, 8, 7, 6, 7 };
1863
1864
// The precision of the components
1865
const uint32_t prec_bits = s_r_bits[submode];
1866
1867
int qlog[4], pack_bits[4];
1868
1869
for (uint32_t i = 0; i < 4; i++)
1870
{
1871
const float f = (i == 3) ? s_q16 : rgb_q16[i];
1872
1873
// The # of bits the component is packed into
1874
if (i == 0)
1875
pack_bits[i] = s_r_bits[submode];
1876
else if (i == 3)
1877
pack_bits[i] = s_s_bits[submode];
1878
else
1879
pack_bits[i] = s_g_b_bits[submode];
1880
1881
#if 0
1882
// this is slightly worse
1883
// TODO: going from qlog16 to half loses some precision. Then going from half to qlog 7-12 will have extra error.
1884
half_float h = qlog_to_half(clamp((int)std::round(f), 0, MAX_QLOG16), 16);
1885
qlog[i] = half_to_qlog7_12((half_float)bounds_check((uint32_t)h, 0U, 32768U), prec_bits);
1886
#else
1887
qlog[i] = quant_qlog16(clamp<int>((int)std::round(f), 0, MAX_QLOG16), prec_bits);
1888
1889
// Only bias if there are enough texel weights, 4=6 weights
1890
if (ise_weight_range >= 4)
1891
{
1892
// Explictly bias the high color, and the scale up, to better exploit the weights.
1893
// The quantized range also then encompases the complete input range.
1894
const uint32_t max_val = (1 << prec_bits) - 1;
1895
const uint32_t K = 3;
1896
if (i == 3)
1897
{
1898
qlog[i] = minimum<uint32_t>(qlog[i] + K * 2, max_val);
1899
}
1900
else
1901
{
1902
qlog[i] = minimum<uint32_t>(qlog[i] + K, max_val);
1903
}
1904
}
1905
#endif
1906
1907
if (i != 3)
1908
qlog[i] = minimum<uint32_t>(qlog[i], get_max_qlog(prec_bits));
1909
1910
// If S=0, we lose freedom for the texel weights to add any value.
1911
if ((i == 3) && (qlog[i] == 0))
1912
qlog[i] = 1;
1913
}
1914
1915
uint32_t maj_index = 0;
1916
1917
bool did_clamp = false;
1918
1919
if (submode != 5)
1920
{
1921
int largest_qlog = 0;
1922
for (uint32_t i = 0; i < 3; i++)
1923
{
1924
if (qlog[i] > largest_qlog)
1925
{
1926
largest_qlog = qlog[i];
1927
maj_index = i;
1928
}
1929
}
1930
1931
if (maj_index)
1932
{
1933
std::swap(qlog[0], qlog[maj_index]);
1934
}
1935
1936
assert(qlog[0] >= qlog[1]);
1937
assert(qlog[0] >= qlog[2]);
1938
1939
qlog[1] = qlog[0] - qlog[1];
1940
qlog[2] = qlog[0] - qlog[2];
1941
1942
for (uint32_t i = 1; i < 4; i++)
1943
{
1944
const int max_val = (1 << pack_bits[i]) - 1;
1945
1946
if (qlog[i] > max_val)
1947
{
1948
max_clamp_mag = maximum<int>(max_clamp_mag, qlog[i] - max_val);
1949
qlog[i] = max_val;
1950
did_clamp = true;
1951
1952
if ((early_out_if_clamped) && (max_clamp_mag > max_clamp_mag_accept_thresh))
1953
return true;
1954
}
1955
}
1956
}
1957
1958
for (uint32_t i = 0; i < 4; i++)
1959
{
1960
const int max_val = (1 << pack_bits[i]) - 1; (void)max_val;
1961
1962
assert(qlog[i] <= max_val);
1963
}
1964
1965
int mode = 0;
1966
1967
int r = qlog[0] & 63; // 6-bits
1968
int g = qlog[1] & 31; // 5-bits
1969
int b = qlog[2] & 31; // 5-bits
1970
int s = qlog[3] & 31; // 5-bits
1971
1972
int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0;
1973
1974
switch (submode)
1975
{
1976
case 0:
1977
{
1978
mode = (maj_index << 2) | 0;
1979
assert((mode & 0xC) != 0xC);
1980
1981
x0 = get_bit(qlog[0], 9); // R9
1982
x1 = get_bit(qlog[0], 8); // R8
1983
x2 = get_bit(qlog[0], 7); // R7
1984
x3 = get_bit(qlog[0], 10); // R10
1985
x4 = get_bit(qlog[0], 6); // R6
1986
x5 = get_bit(qlog[3], 6); // S6
1987
x6 = get_bit(qlog[3], 5); // S5
1988
break;
1989
}
1990
case 1:
1991
{
1992
mode = (maj_index << 2) | 1;
1993
assert((mode & 0xC) != 0xC);
1994
1995
x0 = get_bit(qlog[0], 8); // R8
1996
x1 = get_bit(qlog[1], 5); // G5
1997
x2 = get_bit(qlog[0], 7); // R7
1998
x3 = get_bit(qlog[2], 5); // B5
1999
x4 = get_bit(qlog[0], 6); // R6
2000
x5 = get_bit(qlog[0], 10); // R10
2001
x6 = get_bit(qlog[0], 9); // R9
2002
break;
2003
}
2004
case 2:
2005
{
2006
mode = (maj_index << 2) | 2;
2007
assert((mode & 0xC) != 0xC);
2008
2009
x0 = get_bit(qlog[0], 9); // R9
2010
x1 = get_bit(qlog[0], 8); // R8
2011
x2 = get_bit(qlog[0], 7); // R7
2012
x3 = get_bit(qlog[0], 6); // R6
2013
x4 = get_bit(qlog[3], 7); // S7
2014
x5 = get_bit(qlog[3], 6); // S6
2015
x6 = get_bit(qlog[3], 5); // S5
2016
break;
2017
}
2018
case 3:
2019
{
2020
mode = (maj_index << 2) | 3;
2021
assert((mode & 0xC) != 0xC);
2022
2023
x0 = get_bit(qlog[0], 8); // R8
2024
x1 = get_bit(qlog[1], 5); // G5
2025
x2 = get_bit(qlog[0], 7); // R7
2026
x3 = get_bit(qlog[2], 5); // B5
2027
x4 = get_bit(qlog[0], 6); // R6
2028
x5 = get_bit(qlog[3], 6); // S6
2029
x6 = get_bit(qlog[3], 5); // S5
2030
break;
2031
}
2032
case 4:
2033
{
2034
mode = maj_index | 0xC; // 0b1100
2035
assert((mode & 0xC) == 0xC);
2036
assert(mode != 0xF);
2037
2038
x0 = get_bit(qlog[1], 6); // G6
2039
x1 = get_bit(qlog[1], 5); // G5
2040
x2 = get_bit(qlog[2], 6); // B6
2041
x3 = get_bit(qlog[2], 5); // B5
2042
x4 = get_bit(qlog[0], 6); // R6
2043
x5 = get_bit(qlog[0], 7); // R7
2044
x6 = get_bit(qlog[3], 5); // S5
2045
break;
2046
}
2047
case 5:
2048
{
2049
mode = 0xF;
2050
2051
x0 = get_bit(qlog[1], 6); // G6
2052
x1 = get_bit(qlog[1], 5); // G5
2053
x2 = get_bit(qlog[2], 6); // B6
2054
x3 = get_bit(qlog[2], 5); // B5
2055
x4 = get_bit(qlog[0], 6); // R6
2056
x5 = get_bit(qlog[3], 6); // S6
2057
x6 = get_bit(qlog[3], 5); // S5
2058
break;
2059
}
2060
default:
2061
{
2062
assert(0);
2063
break;
2064
}
2065
}
2066
2067
pEndpoints[0] = (uint8_t)((get_bit(mode, 1) << 7) | (get_bit(mode, 0) << 6) | r);
2068
pEndpoints[1] = (uint8_t)((get_bit(mode, 2) << 7) | (x0 << 6) | (x1 << 5) | g);
2069
pEndpoints[2] = (uint8_t)((get_bit(mode, 3) << 7) | (x2 << 6) | (x3 << 5) | b);
2070
pEndpoints[3] = (uint8_t)((x4 << 7) | (x5 << 6) | (x6 << 5) | s);
2071
2072
#ifdef _DEBUG
2073
// Test for valid pack by unpacking
2074
{
2075
const int inv_shift = 12 - prec_bits;
2076
2077
int unpacked_e[2][3];
2078
if (submode != 5)
2079
{
2080
unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);
2081
unpacked_e[1][1] = clamp(left_shift32((qlog[0] - qlog[1]), inv_shift), 0, 0xFFF);
2082
unpacked_e[1][2] = clamp(left_shift32((qlog[0] - qlog[2]), inv_shift), 0, 0xFFF);
2083
2084
unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);
2085
unpacked_e[0][1] = clamp(left_shift32(((qlog[0] - qlog[1]) - qlog[3]), inv_shift), 0, 0xFFF);
2086
unpacked_e[0][2] = clamp(left_shift32(((qlog[0] - qlog[2]) - qlog[3]), inv_shift), 0, 0xFFF);
2087
}
2088
else
2089
{
2090
unpacked_e[1][0] = left_shift32(qlog[0], inv_shift);
2091
unpacked_e[1][1] = left_shift32(qlog[1], inv_shift);
2092
unpacked_e[1][2] = left_shift32(qlog[2], inv_shift);
2093
2094
unpacked_e[0][0] = clamp(left_shift32((qlog[0] - qlog[3]), inv_shift), 0, 0xFFF);
2095
unpacked_e[0][1] = clamp(left_shift32((qlog[1] - qlog[3]), inv_shift), 0, 0xFFF);
2096
unpacked_e[0][2] = clamp(left_shift32((qlog[2] - qlog[3]), inv_shift), 0, 0xFFF);
2097
}
2098
2099
if (maj_index)
2100
{
2101
std::swap(unpacked_e[0][0], unpacked_e[0][maj_index]);
2102
std::swap(unpacked_e[1][0], unpacked_e[1][maj_index]);
2103
}
2104
2105
int e[2][3];
2106
decode_mode7_to_qlog12_ise20(pEndpoints, e, nullptr);
2107
2108
for (uint32_t i = 0; i < 3; i++)
2109
{
2110
assert(unpacked_e[0][i] == e[0][i]);
2111
assert(unpacked_e[1][i] == e[1][i]);
2112
}
2113
}
2114
#endif
2115
2116
return did_clamp;
2117
}
2118
2119
//--------------------------------------------------------------------------------------------------------------------------
2120
2121
bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints)
2122
{
2123
memset(pEndpoints, 0, NUM_MODE11_ENDPOINTS);
2124
2125
if (desc.is_direct())
2126
{
2127
if ((desc.m_a < 0) || (desc.m_c < 0) || (desc.m_b0 < 0))
2128
return false;
2129
2130
if (!((desc.m_a <= 255) && (desc.m_c <= 255) && (desc.m_b0 <= 127)))
2131
return false;
2132
2133
pEndpoints[0] = (uint8_t)desc.m_a;
2134
pEndpoints[2] = (uint8_t)desc.m_c;
2135
pEndpoints[4] = (uint8_t)desc.m_b0 | 128;
2136
2137
if ((desc.m_b1 < 0) || (desc.m_d0 < 0) || (desc.m_d1 < 0))
2138
return false;
2139
2140
if (!((desc.m_b1 <= 255) && (desc.m_d0 <= 255) && (desc.m_d1 <= 127)))
2141
return false;
2142
2143
pEndpoints[1] = (uint8_t)desc.m_b1;
2144
pEndpoints[3] = (uint8_t)desc.m_d0;
2145
pEndpoints[5] = (uint8_t)desc.m_d1 | 128;
2146
2147
return true;
2148
}
2149
2150
if (!((desc.m_a >= 0) && (desc.m_a <= desc.m_max_a_val)))
2151
return false;
2152
if (!(((desc.m_c >= 0) && (desc.m_c <= desc.m_max_c_val))))
2153
return false;
2154
if (!((desc.m_b0 >= 0) && (desc.m_b0 <= desc.m_max_b_val)))
2155
return false;
2156
if (!((desc.m_b1 >= 0) && (desc.m_b1 <= desc.m_max_b_val)))
2157
return false;
2158
if (!((desc.m_d0 >= desc.m_min_d_val) && (desc.m_d0 <= desc.m_max_d_val)))
2159
return false;
2160
if (!((desc.m_d1 >= desc.m_min_d_val) && (desc.m_d1 <= desc.m_max_d_val)))
2161
return false;
2162
2163
const int va = desc.m_a, vb0 = desc.m_b0, vb1 = desc.m_b1, vc = desc.m_c, vd0 = desc.m_d0, vd1 = desc.m_d1;
2164
2165
int v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 = 0;
2166
2167
int x0 = 0, x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0;
2168
switch (desc.m_submode)
2169
{
2170
case 0:
2171
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vd0, 6); x3 = get_bit(vd1, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
2172
break;
2173
case 1:
2174
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vb0, 7); x3 = get_bit(vb1, 7); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
2175
break;
2176
case 2:
2177
x0 = get_bit(va, 9); x1 = get_bit(vc, 6); x2 = get_bit(vd0, 6); x3 = get_bit(vd1, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
2178
break;
2179
case 3:
2180
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(va, 9); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
2181
break;
2182
case 4:
2183
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(vb0, 7); x3 = get_bit(vb1, 7); x4 = get_bit(va, 9); x5 = get_bit(va, 10);
2184
break;
2185
case 5:
2186
x0 = get_bit(va, 9); x1 = get_bit(va, 10); x2 = get_bit(vc, 7); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
2187
break;
2188
case 6:
2189
x0 = get_bit(vb0, 6); x1 = get_bit(vb1, 6); x2 = get_bit(va, 11); x3 = get_bit(vc, 6); x4 = get_bit(va, 9); x5 = get_bit(va, 10);
2190
break;
2191
case 7:
2192
x0 = get_bit(va, 9); x1 = get_bit(va, 10); x2 = get_bit(va, 11); x3 = get_bit(vc, 6); x4 = get_bit(vd0, 5); x5 = get_bit(vd1, 5);
2193
break;
2194
default:
2195
break;
2196
}
2197
2198
// write mode
2199
pack_bit(v1, 7, desc.m_submode, 0);
2200
pack_bit(v2, 7, desc.m_submode, 1);
2201
pack_bit(v3, 7, desc.m_submode, 2);
2202
2203
// highest component
2204
pack_bit(v4, 7, desc.m_maj_comp, 0);
2205
pack_bit(v5, 7, desc.m_maj_comp, 1);
2206
2207
// write bit 8 of va
2208
pack_bit(v1, 6, va, 8);
2209
2210
// extra bits
2211
pack_bit(v2, 6, x0);
2212
pack_bit(v3, 6, x1);
2213
pack_bit(v4, 6, x2);
2214
pack_bit(v5, 6, x3);
2215
pack_bit(v4, 5, x4);
2216
pack_bit(v5, 5, x5);
2217
2218
v0 = va & 0xFF;
2219
v1 |= (vc & 63);
2220
v2 |= (vb0 & 63);
2221
v3 |= (vb1 & 63);
2222
v4 |= (vd0 & 31);
2223
v5 |= (vd1 & 31);
2224
2225
assert(in_range(v0, 0, 255) && in_range(v1, 0, 255) && in_range(v2, 0, 255) && in_range(v3, 0, 255) && in_range(v4, 0, 255) && in_range(v5, 0, 255));
2226
2227
pEndpoints[0] = (uint8_t)v0;
2228
pEndpoints[1] = (uint8_t)v1;
2229
pEndpoints[2] = (uint8_t)v2;
2230
pEndpoints[3] = (uint8_t)v3;
2231
pEndpoints[4] = (uint8_t)v4;
2232
pEndpoints[5] = (uint8_t)v5;
2233
2234
return true;
2235
}
2236
2237
static inline int astc_hdr_sign_extend(int src, int num_src_bits)
2238
{
2239
assert(basisu::in_range(num_src_bits, 2, 31));
2240
2241
const bool negative = (src & (1 << (num_src_bits - 1))) != 0;
2242
if (negative)
2243
return src | ~((1 << num_src_bits) - 1);
2244
else
2245
return src & ((1 << num_src_bits) - 1);
2246
}
2247
2248
void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc)
2249
{
2250
clear_obj(desc);
2251
2252
pack_bit(desc.m_maj_comp, 0, pEndpoints[4], 7);
2253
pack_bit(desc.m_maj_comp, 1, pEndpoints[5], 7);
2254
2255
if (desc.m_maj_comp == 3)
2256
{
2257
desc.m_a = pEndpoints[0];
2258
desc.m_c = pEndpoints[2];
2259
desc.m_b0 = pEndpoints[4] & 0x7F;
2260
2261
desc.m_b1 = pEndpoints[1];
2262
desc.m_d0 = pEndpoints[3];
2263
desc.m_d1 = pEndpoints[5] & 0x7F;
2264
2265
return;
2266
}
2267
2268
pack_bit(desc.m_submode, 0, pEndpoints[1], 7);
2269
pack_bit(desc.m_submode, 1, pEndpoints[2], 7);
2270
pack_bit(desc.m_submode, 2, pEndpoints[3], 7);
2271
2272
desc.m_a = pEndpoints[0]; // 8 bits
2273
pack_bit(desc.m_a, 8, pEndpoints[1], 6);
2274
2275
desc.m_c = pEndpoints[1] & 63; // 6 bits
2276
desc.m_b0 = pEndpoints[2] & 63; // 6 bits
2277
desc.m_b1 = pEndpoints[3] & 63; // 6 bits
2278
desc.m_d0 = pEndpoints[4] & 31; // 5 bits
2279
desc.m_d1 = pEndpoints[5] & 31; // 5 bits
2280
2281
const int x0 = get_bit(pEndpoints[2], 6);
2282
const int x1 = get_bit(pEndpoints[3], 6);
2283
const int x2 = get_bit(pEndpoints[4], 6);
2284
const int x3 = get_bit(pEndpoints[5], 6);
2285
const int x4 = get_bit(pEndpoints[4], 5);
2286
const int x5 = get_bit(pEndpoints[5], 5);
2287
2288
switch (desc.m_submode)
2289
{
2290
case 0:
2291
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_d0, 6, x2, 0); pack_bit(desc.m_d1, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
2292
break;
2293
case 1:
2294
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_b0, 7, x2, 0); pack_bit(desc.m_b1, 7, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
2295
break;
2296
case 2:
2297
pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_c, 6, x1, 0); pack_bit(desc.m_d0, 6, x2, 0); pack_bit(desc.m_d1, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
2298
break;
2299
case 3:
2300
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_a, 9, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
2301
break;
2302
case 4:
2303
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_b0, 7, x2, 0); pack_bit(desc.m_b1, 7, x3, 0); pack_bit(desc.m_a, 9, x4, 0); pack_bit(desc.m_a, 10, x5, 0);
2304
break;
2305
case 5:
2306
pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_a, 10, x1, 0); pack_bit(desc.m_c, 7, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
2307
break;
2308
case 6:
2309
pack_bit(desc.m_b0, 6, x0, 0); pack_bit(desc.m_b1, 6, x1, 0); pack_bit(desc.m_a, 11, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_a, 9, x4, 0); pack_bit(desc.m_a, 10, x5, 0);
2310
break;
2311
case 7:
2312
default:
2313
pack_bit(desc.m_a, 9, x0, 0); pack_bit(desc.m_a, 10, x1, 0); pack_bit(desc.m_a, 11, x2, 0); pack_bit(desc.m_c, 6, x3, 0); pack_bit(desc.m_d0, 5, x4, 0); pack_bit(desc.m_d1, 5, x5, 0);
2314
break;
2315
}
2316
2317
desc.m_a_bits = 9 + (desc.m_submode >> 1);
2318
desc.m_b_bits = s_b_bits[desc.m_submode];
2319
desc.m_c_bits = s_c_bits[desc.m_submode];
2320
desc.m_d_bits = s_d_bits[desc.m_submode];
2321
2322
desc.m_max_a_val = (1 << desc.m_a_bits) - 1;
2323
desc.m_max_b_val = (1 << desc.m_b_bits) - 1;
2324
desc.m_max_c_val = (1 << desc.m_c_bits) - 1;
2325
2326
desc.m_min_d_val = -(1 << (desc.m_d_bits - 1));
2327
desc.m_max_d_val = -desc.m_min_d_val - 1;
2328
2329
desc.m_d0 = astc_hdr_sign_extend(desc.m_d0, desc.m_d_bits);
2330
desc.m_d1 = astc_hdr_sign_extend(desc.m_d1, desc.m_d_bits);
2331
2332
assert((desc.m_a >= 0) && (desc.m_a <= desc.m_max_a_val));
2333
assert((desc.m_c >= 0) && (desc.m_c <= desc.m_max_c_val));
2334
assert((desc.m_b0 >= 0) && (desc.m_b0 <= desc.m_max_b_val));
2335
assert((desc.m_b1 >= 0) && (desc.m_b1 <= desc.m_max_b_val));
2336
assert((desc.m_d0 >= desc.m_min_d_val) && (desc.m_d0 <= desc.m_max_d_val));
2337
assert((desc.m_d1 >= desc.m_min_d_val) && (desc.m_d1 <= desc.m_max_d_val));
2338
}
2339
2340
//--------------------------------------------------------------------------------------------------------------------------
2341
2342
void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index)
2343
{
2344
submode_index = 0;
2345
maj_index = 0;
2346
2347
pack_bit(submode_index, 0, pEndpoints[1], 7);
2348
pack_bit(submode_index, 1, pEndpoints[2], 7);
2349
pack_bit(submode_index, 2, pEndpoints[3], 7);
2350
2351
pack_bit(maj_index, 0, pEndpoints[4], 7);
2352
pack_bit(maj_index, 1, pEndpoints[5], 7);
2353
}
2354
2355
//--------------------------------------------------------------------------------------------------------------------------
2356
2357
void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int &maj_index)
2358
{
2359
const int v0 = pEndpoints[0], v1 = pEndpoints[1], v2 = pEndpoints[2], v3 = pEndpoints[3];
2360
(void)v3;
2361
2362
// Extract mode bits and unpack to major component and mode.
2363
const int modeval = ((v0 & 0xC0) >> 6) | ((v1 & 0x80) >> 5) | ((v2 & 0x80) >> 4);
2364
2365
if ((modeval & 0xC) != 0xC)
2366
{
2367
maj_index = modeval >> 2;
2368
submode_index = modeval & 3;
2369
}
2370
else if (modeval != 0xF)
2371
{
2372
maj_index = modeval & 3;
2373
submode_index = 4;
2374
}
2375
else
2376
{
2377
maj_index = 0;
2378
submode_index = 5;
2379
}
2380
}
2381
2382
//--------------------------------------------------------------------------------------------------------------------------
2383
// TODO: Use pack_mode11() as a shared function.
2384
2385
bool pack_mode11(
2386
const vec3F& low_color_q16, const vec3F& high_color_q16,
2387
uint32_t ise_endpoint_range, uint8_t* pEndpoints,
2388
const astc_hdr_codec_base_options& coptions,
2389
bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used)
2390
{
2391
uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS];
2392
2393
if (direct_only)
2394
{
2395
first_submode = -1;
2396
last_submode = -1;
2397
}
2398
2399
assert(first_submode <= last_submode);
2400
assert((first_submode >= -1) && (first_submode <= 7));
2401
assert((last_submode >= -1) && (last_submode <= 7));
2402
2403
memset(pEndpoints, 0, NUM_MODE11_ENDPOINTS);
2404
2405
double best_trial_dist = BIG_FLOAT_VAL;
2406
int best_submode = 0;
2407
2408
for (int submode = last_submode; submode >= first_submode; submode--)
2409
{
2410
bool did_clamp = false;
2411
int max_clamp_mag = 0;
2412
if (submode == -1)
2413
{
2414
// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.
2415
pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);
2416
}
2417
else
2418
{
2419
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32;
2420
did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);
2421
2422
if (!ignore_clamping)
2423
{
2424
// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.
2425
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
2426
continue;
2427
}
2428
}
2429
2430
uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];
2431
2432
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
2433
// It could massively distort the endpoints, but still result in a valid encoding.
2434
basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints);
2435
2436
int e[2][3];
2437
if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range))
2438
continue;
2439
2440
vec3F e0(
2441
(float)(e[0][0] << 4),
2442
(float)(e[0][1] << 4),
2443
(float)(e[0][2] << 4)
2444
);
2445
2446
vec3F e1(
2447
(float)(e[1][0] << 4),
2448
(float)(e[1][1] << 4),
2449
(float)(e[1][2] << 4)
2450
);
2451
2452
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
2453
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
2454
double dist = helpers::minimum(dist0, dist1);
2455
2456
if (dist < best_trial_dist)
2457
{
2458
best_trial_dist = dist;
2459
best_submode = submode;
2460
memcpy(pEndpoints, trial_endpoints, NUM_MODE11_ENDPOINTS);
2461
}
2462
2463
if (coptions.m_take_first_non_clamping_mode11_submode)
2464
{
2465
if (!did_clamp)
2466
break;
2467
}
2468
2469
} // submode
2470
2471
if ((coptions.m_ultra_quant) &&
2472
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
2473
(best_trial_dist != BIG_FLOAT_VAL))
2474
{
2475
uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS];
2476
memcpy(orig_best_trial_endpoints, pEndpoints, NUM_MODE11_ENDPOINTS);
2477
2478
for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++)
2479
{
2480
for (int dt = 0; dt <= 1; dt++)
2481
{
2482
const int d = dt ? 1 : -1;
2483
2484
uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS];
2485
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS);
2486
2487
int ise = varied_endpoints[c];
2488
2489
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
2490
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
2491
2492
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
2493
2494
varied_endpoints[c] = (uint8_t)ise;
2495
2496
int e[2][3];
2497
if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range))
2498
continue;
2499
2500
vec3F e0(
2501
(float)(e[0][0] << 4),
2502
(float)(e[0][1] << 4),
2503
(float)(e[0][2] << 4)
2504
);
2505
2506
vec3F e1(
2507
(float)(e[1][0] << 4),
2508
(float)(e[1][1] << 4),
2509
(float)(e[1][2] << 4)
2510
);
2511
2512
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
2513
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
2514
double dist = helpers::minimum(dist0, dist1);
2515
2516
if (dist < best_trial_dist)
2517
{
2518
best_trial_dist = dist;
2519
memcpy(pEndpoints, varied_endpoints, NUM_MODE11_ENDPOINTS);
2520
}
2521
} // d
2522
} // c
2523
} // if (coptions.m_ultra_quant)
2524
2525
submode_used = best_submode + 1;
2526
2527
return (best_trial_dist != BIG_FLOAT_VAL);
2528
}
2529
2530
bool try_mode11(uint32_t num_pixels,
2531
uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
2532
const vec3F& low_color_q16, const vec3F& high_color_q16,
2533
const basist::half_float block_pixels_half[][3],
2534
uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
2535
bool constrain_ise_weight_selectors,
2536
int32_t first_submode, int32_t last_submode, bool ignore_clamping) // -1, 7
2537
{
2538
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
2539
assert((num_weight_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));
2540
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
2541
assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range));
2542
2543
half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3];
2544
uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
2545
2546
if (direct_only)
2547
{
2548
first_submode = -1;
2549
last_submode = -1;
2550
}
2551
2552
assert(first_submode <= last_submode);
2553
assert((first_submode >= -1) && (first_submode <= 7));
2554
assert((last_submode >= -1) && (last_submode <= 7));
2555
2556
uint8_t best_trial_endpoints[NUM_MODE11_ENDPOINTS];
2557
clear_obj(best_trial_endpoints);
2558
double best_trial_dist = BIG_FLOAT_VAL;
2559
int best_submode = 0;
2560
2561
for (int submode = last_submode; submode >= first_submode; submode--)
2562
{
2563
bool did_clamp = false;
2564
int max_clamp_mag = 0;
2565
if (submode == -1)
2566
{
2567
// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.
2568
pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);
2569
}
2570
else
2571
{
2572
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32;
2573
did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);
2574
2575
if (!ignore_clamping)
2576
{
2577
// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.
2578
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
2579
continue;
2580
}
2581
}
2582
2583
uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];
2584
2585
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
2586
// It could massively distort the endpoints, but still result in a valid encoding.
2587
basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints);
2588
2589
int e[2][3];
2590
if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range))
2591
continue;
2592
2593
vec3F e0(
2594
(float)(e[0][0] << 4),
2595
(float)(e[0][1] << 4),
2596
(float)(e[0][2] << 4)
2597
);
2598
2599
vec3F e1(
2600
(float)(e[1][0] << 4),
2601
(float)(e[1][1] << 4),
2602
(float)(e[1][2] << 4)
2603
);
2604
2605
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
2606
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
2607
double dist = helpers::minimum(dist0, dist1);
2608
2609
if (dist < best_trial_dist)
2610
{
2611
best_trial_dist = dist;
2612
best_submode = submode;
2613
memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints));
2614
}
2615
2616
if (coptions.m_take_first_non_clamping_mode11_submode)
2617
{
2618
if (!did_clamp)
2619
break;
2620
}
2621
2622
} // submode
2623
2624
if ((coptions.m_ultra_quant) &&
2625
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
2626
(best_trial_dist != BIG_FLOAT_VAL))
2627
{
2628
uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS];
2629
memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);
2630
2631
for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++)
2632
{
2633
for (int dt = 0; dt <= 1; dt++)
2634
{
2635
const int d = dt ? 1 : -1;
2636
2637
uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS];
2638
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS);
2639
2640
int ise = varied_endpoints[c];
2641
2642
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
2643
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
2644
2645
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
2646
2647
varied_endpoints[c] = (uint8_t)ise;
2648
2649
int e[2][3];
2650
if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range))
2651
continue;
2652
2653
vec3F e0(
2654
(float)(e[0][0] << 4),
2655
(float)(e[0][1] << 4),
2656
(float)(e[0][2] << 4)
2657
);
2658
2659
vec3F e1(
2660
(float)(e[1][0] << 4),
2661
(float)(e[1][1] << 4),
2662
(float)(e[1][2] << 4)
2663
);
2664
2665
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
2666
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
2667
double dist = helpers::minimum(dist0, dist1);
2668
2669
if (dist < best_trial_dist)
2670
{
2671
best_trial_dist = dist;
2672
memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE11_ENDPOINTS);
2673
}
2674
} // d
2675
} // c
2676
} // if (coptions.m_ultra_quant)
2677
2678
bool improved_flag = false;
2679
2680
if (best_trial_dist != BIG_FLOAT_VAL)
2681
{
2682
if (get_astc_hdr_mode_11_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range))
2683
{
2684
uint32_t usable_selector_bitmask = UINT32_MAX;
2685
if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS))
2686
usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15);
2687
else if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_12_LEVELS))
2688
usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3);
2689
2690
double trial_blk_error = eval_selectors(num_pixels, trial_weights, ise_weight_range, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask);
2691
if (trial_blk_error < cur_block_error)
2692
{
2693
cur_block_error = trial_blk_error;
2694
memcpy(pEndpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);
2695
memcpy(pWeights, trial_weights, num_pixels);
2696
submode_used = best_submode + 1;
2697
improved_flag = true;
2698
}
2699
}
2700
}
2701
2702
return improved_flag;
2703
}
2704
2705
//--------------------------------------------------------------------------------------------------------------------------
2706
2707
bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels,
2708
uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used,
2709
const vec3F& low_color_q16, const vec3F& high_color_q16,
2710
const basist::half_float block_pixels_half[][3],
2711
uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
2712
bool constrain_ise_weight_selectors,
2713
int32_t first_submode, int32_t last_submode, bool ignore_clamping) // -1, 7
2714
{
2715
assert(channel_index <= 2);
2716
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
2717
assert((num_weight_levels >= MIN_SUPPORTED_WEIGHT_LEVELS) && (num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS));
2718
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
2719
assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range));
2720
2721
half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3];
2722
uint8_t orig_trial_endpoints[NUM_MODE11_ENDPOINTS], trial_weights0[MAX_ASTC_HDR_ENC_BLOCK_PIXELS], trial_weights1[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
2723
2724
if (direct_only)
2725
{
2726
first_submode = -1;
2727
last_submode = -1;
2728
}
2729
2730
assert(first_submode <= last_submode);
2731
assert((first_submode >= -1) && (first_submode <= 7));
2732
assert((last_submode >= -1) && (last_submode <= 7));
2733
2734
uint8_t best_trial_endpoints[NUM_MODE11_ENDPOINTS];
2735
clear_obj(best_trial_endpoints);
2736
2737
double best_trial_dist = BIG_FLOAT_VAL;
2738
int best_submode = 0;
2739
2740
for (int submode = last_submode; submode >= first_submode; submode--)
2741
{
2742
bool did_clamp = false;
2743
int max_clamp_mag = 0;
2744
if (submode == -1)
2745
{
2746
// If it had to clamp with one of the submodes, try direct which can't clamp, but has low precision.
2747
pack_astc_mode11_direct(orig_trial_endpoints, low_color_q16, high_color_q16);
2748
}
2749
else
2750
{
2751
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 32;
2752
did_clamp = pack_astc_mode11_submode(submode, orig_trial_endpoints, low_color_q16, high_color_q16, max_clamp_mag, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);
2753
2754
if (!ignore_clamping)
2755
{
2756
// If it had to clamp and the clamp was too high, it'll distort the endpoint colors too much, which could lead to noticeable artifacts.
2757
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
2758
continue;
2759
}
2760
}
2761
2762
uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];
2763
2764
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
2765
// It could massively distort the endpoints, but still result in a valid encoding.
2766
basist::astc_6x6_hdr::requantize_ise_endpoints(11, astc_helpers::BISE_256_LEVELS, orig_trial_endpoints, ise_endpoint_range, trial_endpoints);
2767
2768
int e[2][3];
2769
if (!decode_mode11_to_qlog12(trial_endpoints, e, ise_endpoint_range))
2770
continue;
2771
2772
vec3F e0(
2773
(float)(e[0][0] << 4),
2774
(float)(e[0][1] << 4),
2775
(float)(e[0][2] << 4)
2776
);
2777
2778
vec3F e1(
2779
(float)(e[1][0] << 4),
2780
(float)(e[1][1] << 4),
2781
(float)(e[1][2] << 4)
2782
);
2783
2784
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
2785
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
2786
double dist = helpers::minimum(dist0, dist1);
2787
2788
if (dist < best_trial_dist)
2789
{
2790
best_trial_dist = dist;
2791
best_submode = submode;
2792
memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints));
2793
}
2794
2795
if (coptions.m_take_first_non_clamping_mode11_submode)
2796
{
2797
if (!did_clamp)
2798
break;
2799
}
2800
2801
} // submode
2802
2803
if ((coptions.m_ultra_quant) &&
2804
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
2805
(best_trial_dist != BIG_FLOAT_VAL))
2806
{
2807
uint8_t orig_best_trial_endpoints[NUM_MODE11_ENDPOINTS];
2808
memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);
2809
2810
for (uint32_t c = 0; c < NUM_MODE11_ENDPOINTS; c++)
2811
{
2812
for (int dt = 0; dt <= 1; dt++)
2813
{
2814
const int d = dt ? 1 : -1;
2815
2816
uint8_t varied_endpoints[NUM_MODE11_ENDPOINTS];
2817
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE11_ENDPOINTS);
2818
2819
int ise = varied_endpoints[c];
2820
2821
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
2822
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
2823
2824
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
2825
2826
varied_endpoints[c] = (uint8_t)ise;
2827
2828
int e[2][3];
2829
if (!decode_mode11_to_qlog12(varied_endpoints, e, ise_endpoint_range))
2830
continue;
2831
2832
vec3F e0(
2833
(float)(e[0][0] << 4),
2834
(float)(e[0][1] << 4),
2835
(float)(e[0][2] << 4)
2836
);
2837
2838
vec3F e1(
2839
(float)(e[1][0] << 4),
2840
(float)(e[1][1] << 4),
2841
(float)(e[1][2] << 4)
2842
);
2843
2844
double dist0 = e0.squared_distance_d(low_color_q16) + e1.squared_distance_d(high_color_q16);
2845
double dist1 = e1.squared_distance_d(low_color_q16) + e0.squared_distance_d(high_color_q16);
2846
double dist = helpers::minimum(dist0, dist1);
2847
2848
if (dist < best_trial_dist)
2849
{
2850
best_trial_dist = dist;
2851
memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE11_ENDPOINTS);
2852
}
2853
} // d
2854
} // c
2855
} // if (coptions.m_ultra_quant)
2856
2857
bool improved_flag = false;
2858
2859
if (best_trial_dist != BIG_FLOAT_VAL)
2860
{
2861
if (get_astc_hdr_mode_11_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range))
2862
{
2863
uint32_t usable_selector_bitmask = UINT32_MAX;
2864
if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_16_LEVELS))
2865
usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 10) | (1 << 11) | (1 << 14) | (1 << 15);
2866
else if ((constrain_ise_weight_selectors) && (ise_weight_range == astc_helpers::BISE_12_LEVELS))
2867
usable_selector_bitmask = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3);
2868
2869
double trial_blk_error = eval_selectors_dual_plane(channel_index, num_pixels, trial_weights0, trial_weights1, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions, usable_selector_bitmask);
2870
if (trial_blk_error < cur_block_error)
2871
{
2872
cur_block_error = trial_blk_error;
2873
memcpy(pEndpoints, best_trial_endpoints, NUM_MODE11_ENDPOINTS);
2874
memcpy(pWeights0, trial_weights0, num_pixels);
2875
memcpy(pWeights1, trial_weights1, num_pixels);
2876
submode_used = best_submode + 1;
2877
improved_flag = true;
2878
}
2879
}
2880
}
2881
2882
return improved_flag;
2883
}
2884
2885
//--------------------------------------------------------------------------------------------------------------------------
2886
2887
bool pack_mode7(
2888
const vec3F& high_color_q16, const float s_q16,
2889
uint32_t ise_endpoint_range, uint8_t* pEndpoints,
2890
uint32_t ise_weight_range, // only used for determining biasing during packing
2891
const astc_hdr_codec_base_options& coptions,
2892
int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used)
2893
{
2894
assert(first_submode <= last_submode);
2895
assert((first_submode >= 0) && (first_submode <= (int)MAX_MODE7_SUBMODE_INDEX));
2896
assert(last_submode <= (int)MAX_MODE7_SUBMODE_INDEX);
2897
2898
uint8_t unquant_trial_endpoints[NUM_MODE7_ENDPOINTS];
2899
2900
memset(pEndpoints, 0, NUM_MODE7_ENDPOINTS);
2901
2902
double best_trial_dist = BIG_FLOAT_VAL;
2903
int best_trial_submode = 0;
2904
2905
for (int submode = first_submode; submode <= last_submode; submode++)
2906
{
2907
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 16;
2908
2909
int max_clamp_mag = 0;
2910
const bool did_clamp = pack_astc_mode7_submode(submode, unquant_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range, !ignore_clamping, MAX_CLAMP_MAG_ACCEPT_THRESH);
2911
2912
if (submode < 5)
2913
{
2914
if (!ignore_clamping)
2915
{
2916
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
2917
continue;
2918
}
2919
}
2920
2921
uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS];
2922
2923
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
2924
// It could massively distort the endpoints, but still result in a valid encoding.
2925
basist::astc_6x6_hdr::requantize_ise_endpoints(7, astc_helpers::BISE_256_LEVELS, unquant_trial_endpoints, ise_endpoint_range, trial_endpoints);
2926
2927
int e[2][3];
2928
int decoded_s = 0;
2929
if (!decode_mode7_to_qlog12(trial_endpoints, e, &decoded_s, ise_endpoint_range))
2930
continue;
2931
2932
// e1 is always the high color
2933
vec3F e1(
2934
(float)(e[1][0] << 4),
2935
(float)(e[1][1] << 4),
2936
(float)(e[1][2] << 4)
2937
);
2938
2939
decoded_s <<= 4;
2940
2941
double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;
2942
2943
if (dist < best_trial_dist)
2944
{
2945
best_trial_dist = dist;
2946
best_trial_submode = submode;
2947
memcpy(pEndpoints, trial_endpoints, NUM_MODE7_ENDPOINTS);
2948
}
2949
2950
if (coptions.m_take_first_non_clamping_mode7_submode)
2951
{
2952
if (!did_clamp)
2953
break;
2954
}
2955
2956
} // submode
2957
2958
if ((coptions.m_ultra_quant) &&
2959
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
2960
(best_trial_dist != BIG_FLOAT_VAL))
2961
{
2962
uint8_t orig_best_trial_endpoints[NUM_MODE7_ENDPOINTS];
2963
memcpy(orig_best_trial_endpoints, pEndpoints, NUM_MODE7_ENDPOINTS);
2964
2965
vec3F low_color_q16(high_color_q16 - vec3F(s_q16));
2966
low_color_q16.clamp(0.0f, 65535.0f);
2967
2968
for (uint32_t c = 0; c < NUM_MODE7_ENDPOINTS; c++)
2969
{
2970
for (int dt = 0; dt <= 1; dt++)
2971
{
2972
const int d = dt ? 1 : -1;
2973
2974
uint8_t varied_endpoints[NUM_MODE7_ENDPOINTS];
2975
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE7_ENDPOINTS);
2976
2977
int ise = varied_endpoints[c];
2978
2979
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
2980
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
2981
2982
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
2983
2984
varied_endpoints[c] = (uint8_t)ise;
2985
2986
int e[2][3];
2987
int decoded_s = 0;
2988
if (!decode_mode7_to_qlog12(varied_endpoints, e, &decoded_s, ise_endpoint_range))
2989
continue;
2990
2991
// e1 is always the high color
2992
vec3F e1(
2993
(float)(e[1][0] << 4),
2994
(float)(e[1][1] << 4),
2995
(float)(e[1][2] << 4)
2996
);
2997
2998
decoded_s <<= 4;
2999
3000
double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;
3001
3002
if (dist < best_trial_dist)
3003
{
3004
best_trial_dist = dist;
3005
memcpy(pEndpoints, varied_endpoints, NUM_MODE7_ENDPOINTS);
3006
}
3007
3008
} // d
3009
} // c
3010
}
3011
3012
submode_used = best_trial_submode;
3013
3014
return (best_trial_dist != BIG_FLOAT_VAL);
3015
}
3016
3017
//--------------------------------------------------------------------------------------------------------------------------
3018
3019
bool try_mode7(
3020
uint32_t num_pixels,
3021
uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
3022
const vec3F& high_color_q16, const float s_q16,
3023
const half_float block_pixels_half[][3],
3024
uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions,
3025
uint32_t ise_endpoint_range,
3026
int32_t first_submode, int32_t last_submode)
3027
{
3028
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
3029
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
3030
3031
assert(first_submode <= last_submode);
3032
assert((first_submode >= 0) && (first_submode <= (int)MAX_MODE7_SUBMODE_INDEX));
3033
assert(last_submode <= (int)MAX_MODE7_SUBMODE_INDEX);
3034
assert(num_weight_levels == astc_helpers::get_ise_levels(ise_weight_range));
3035
3036
uint8_t unquant_trial_endpoints[NUM_MODE7_ENDPOINTS];
3037
3038
uint8_t best_trial_endpoints[NUM_MODE7_ENDPOINTS];
3039
clear_obj(best_trial_endpoints);
3040
double best_trial_dist = BIG_FLOAT_VAL;
3041
int best_trial_submode = 0;
3042
3043
for (int submode = first_submode; submode <= last_submode; submode++)
3044
{
3045
const int MAX_CLAMP_MAG_ACCEPT_THRESH = 16;
3046
3047
int max_clamp_mag = 0;
3048
const bool did_clamp = pack_astc_mode7_submode(submode, unquant_trial_endpoints, high_color_q16, s_q16, max_clamp_mag, ise_weight_range, true, MAX_CLAMP_MAG_ACCEPT_THRESH);
3049
3050
if (submode < 5)
3051
{
3052
if ((did_clamp) && (max_clamp_mag > MAX_CLAMP_MAG_ACCEPT_THRESH))
3053
continue;
3054
}
3055
3056
uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS];
3057
3058
// This will distort the endpoints if the ISE endpoint range isn't 256 levels (20).
3059
// It could massively distort the endpoints, but still result in a valid encoding.
3060
basist::astc_6x6_hdr::requantize_ise_endpoints(7, astc_helpers::BISE_256_LEVELS, unquant_trial_endpoints, ise_endpoint_range, trial_endpoints);
3061
3062
int e[2][3];
3063
int decoded_s = 0;
3064
if (!decode_mode7_to_qlog12(trial_endpoints, e, &decoded_s, ise_endpoint_range))
3065
continue;
3066
3067
// e1 is always the high color
3068
vec3F e1(
3069
(float)(e[1][0] << 4),
3070
(float)(e[1][1] << 4),
3071
(float)(e[1][2] << 4)
3072
);
3073
3074
decoded_s <<= 4;
3075
3076
double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;
3077
3078
if (dist < best_trial_dist)
3079
{
3080
best_trial_dist = dist;
3081
best_trial_submode = submode;
3082
memcpy(best_trial_endpoints, trial_endpoints, sizeof(best_trial_endpoints));
3083
}
3084
3085
if (coptions.m_take_first_non_clamping_mode7_submode)
3086
{
3087
if (!did_clamp)
3088
break;
3089
}
3090
3091
} // submode
3092
3093
if ((coptions.m_ultra_quant) &&
3094
(ise_endpoint_range < astc_helpers::BISE_256_LEVELS) &&
3095
(best_trial_dist != BIG_FLOAT_VAL))
3096
{
3097
uint8_t orig_best_trial_endpoints[NUM_MODE7_ENDPOINTS];
3098
memcpy(orig_best_trial_endpoints, best_trial_endpoints, NUM_MODE7_ENDPOINTS);
3099
3100
vec3F low_color_q16(high_color_q16 - vec3F(s_q16));
3101
low_color_q16.clamp(0.0f, 65535.0f);
3102
3103
for (uint32_t c = 0; c < NUM_MODE7_ENDPOINTS; c++)
3104
{
3105
for (int dt = 0; dt <= 1; dt++)
3106
{
3107
const int d = dt ? 1 : -1;
3108
3109
uint8_t varied_endpoints[NUM_MODE7_ENDPOINTS];
3110
memcpy(varied_endpoints, orig_best_trial_endpoints, NUM_MODE7_ENDPOINTS);
3111
3112
int ise = varied_endpoints[c];
3113
3114
int rank = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_ISE_to_rank[ise];
3115
rank = clamp<int>(rank + d, 0, astc_helpers::get_ise_levels(ise_endpoint_range) - 1);
3116
3117
ise = astc_helpers::g_dequant_tables.get_endpoint_tab(ise_endpoint_range).m_rank_to_ISE[rank];
3118
3119
varied_endpoints[c] = (uint8_t)ise;
3120
3121
int e[2][3];
3122
int decoded_s = 0;
3123
if (!decode_mode7_to_qlog12(varied_endpoints, e, &decoded_s, ise_endpoint_range))
3124
continue;
3125
3126
// e1 is always the high color
3127
vec3F e1(
3128
(float)(e[1][0] << 4),
3129
(float)(e[1][1] << 4),
3130
(float)(e[1][2] << 4)
3131
);
3132
3133
decoded_s <<= 4;
3134
3135
double dist = e1.squared_distance_d(high_color_q16) + squared((double)decoded_s - s_q16) * 3;
3136
3137
if (dist < best_trial_dist)
3138
{
3139
best_trial_dist = dist;
3140
memcpy(best_trial_endpoints, varied_endpoints, NUM_MODE7_ENDPOINTS);
3141
}
3142
3143
} // d
3144
} // c
3145
}
3146
3147
bool improved_flag = false;
3148
3149
if (best_trial_dist != BIG_FLOAT_VAL)
3150
{
3151
half_float decoded_half[MAX_SUPPORTED_WEIGHT_LEVELS][3];
3152
uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3153
3154
if (get_astc_hdr_mode_7_block_colors(best_trial_endpoints, &decoded_half[0][0], nullptr, num_weight_levels, ise_weight_range, ise_endpoint_range))
3155
{
3156
double trial_blk_error = eval_selectors(num_pixels, trial_weights, ise_weight_range, &block_pixels_half[0][0], num_weight_levels, &decoded_half[0][0], coptions);
3157
if (trial_blk_error < cur_block_error)
3158
{
3159
cur_block_error = trial_blk_error;
3160
memcpy(pEndpoints, best_trial_endpoints, NUM_MODE7_ENDPOINTS);
3161
memcpy(pWeights, trial_weights, num_pixels);
3162
submode_used = best_trial_submode;
3163
improved_flag = true;
3164
}
3165
}
3166
}
3167
3168
return improved_flag;
3169
}
3170
3171
//--------------------------------------------------------------------------------------------------------------------------
3172
const float LOW_EMPHASIS_WEIGHT = 1.0f, MIDDLE_EMPHASIS_WEIGHT = 1.25f, HIGH_EMPHASIS_WEIGHT = 1.0f;
3173
const float LOW_EMPHASIS_WEIGHT_HEAVY = 1.0f, MIDDLE_EMPHASIS_WEIGHT_HEAVY = 4.0f, HIGH_EMPHASIS_WEIGHT_HEAVY = 1.0f;
3174
3175
double encode_astc_hdr_block_mode_11(
3176
uint32_t num_pixels,
3177
const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
3178
uint32_t ise_weight_range,
3179
uint32_t& best_submode,
3180
double cur_block_error,
3181
uint8_t* blk_endpoints, uint8_t* blk_weights,
3182
const astc_hdr_codec_base_options& coptions,
3183
bool direct_only,
3184
uint32_t ise_endpoint_range,
3185
bool uber_mode,
3186
bool constrain_ise_weight_selectors,
3187
int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,
3188
const encode_astc_block_stats* pBlock_stats)
3189
{
3190
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
3191
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
3192
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
3193
3194
assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode));
3195
assert(last_submode <= MAX_MODE11_SUBMODE_INDEX);
3196
3197
best_submode = 0;
3198
3199
const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
3200
assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS);
3201
3202
vec3F block_mean_color_q16, block_axis_q16;
3203
if (!pBlock_stats)
3204
{
3205
block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16);
3206
block_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16);
3207
}
3208
else
3209
{
3210
assert(num_pixels == pBlock_stats->m_num_pixels);
3211
block_mean_color_q16 = pBlock_stats->m_mean_q16;
3212
block_axis_q16 = pBlock_stats->m_axis_q16;
3213
}
3214
3215
aabb3F color_box_q16(cInitExpand);
3216
3217
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
3218
vec3F low_color_q16, high_color_q16;
3219
3220
for (uint32_t i = 0; i < num_pixels; i++)
3221
{
3222
color_box_q16.expand(pBlock_pixels_q16[i]);
3223
3224
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
3225
float kd = k.dot(block_axis_q16);
3226
3227
if (kd < l)
3228
{
3229
l = kd;
3230
low_color_q16 = pBlock_pixels_q16[i];
3231
}
3232
3233
if (kd > h)
3234
{
3235
h = kd;
3236
high_color_q16 = pBlock_pixels_q16[i];
3237
}
3238
}
3239
3240
vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);
3241
3242
for (uint32_t i = 0; i < 3; i++)
3243
{
3244
low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);
3245
high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);
3246
}
3247
3248
uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];
3249
uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3250
uint32_t trial_best_submode = 0;
3251
3252
clear_obj(trial_blk_endpoints);
3253
clear_obj(trial_blk_weights);
3254
3255
double trial_blk_error = BIG_FLOAT_VAL;
3256
3257
bool did_improve = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
3258
low_color_q16, high_color_q16,
3259
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
3260
first_submode, last_submode, ignore_clamping);
3261
3262
// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
3263
if (!did_improve)
3264
return cur_block_error;
3265
3266
// Did the solution improve?
3267
if (trial_blk_error < cur_block_error)
3268
{
3269
cur_block_error = trial_blk_error;
3270
memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
3271
memcpy(blk_weights, trial_blk_weights, num_pixels);
3272
best_submode = trial_best_submode;
3273
}
3274
3275
if (opt_mode == cNoOpt)
3276
return cur_block_error;
3277
3278
// least squares on the most promising trial weight indices found
3279
const uint32_t NUM_LS_PASSES = 3;
3280
3281
float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3282
3283
if (opt_mode == cWeightedAverage)
3284
{
3285
const uint32_t NUM_OPT_PASSES = 3;
3286
for (uint32_t pass = 0; pass < NUM_OPT_PASSES; pass++)
3287
{
3288
vec3F low_p(0.0f);
3289
float total_low = 0.0f;
3290
3291
vec3F high_p(0.0f);
3292
float total_high = 0.0f;
3293
3294
for (uint32_t i = 0; i < num_pixels; i++)
3295
{
3296
vec3F p(pBlock_pixels_q16[i]);
3297
float lerp = g_ise_weight_lerps[ise_weight_range][trial_blk_weights[i] + 1] * (1.0f / 64.0f);
3298
3299
low_p += p * (1.0f - lerp);
3300
total_low += (1.0f - lerp);
3301
3302
high_p += p * lerp;
3303
total_high += lerp;
3304
}
3305
3306
if (total_low != 0.0f)
3307
low_p *= (1.0f / total_low);
3308
3309
if (total_high != 0.0f)
3310
high_p *= (1.0f / total_high);
3311
3312
vec3F low, high;
3313
3314
bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
3315
low_p, high_p,
3316
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
3317
first_submode, last_submode, ignore_clamping);
3318
3319
if (!was_improved)
3320
break;
3321
3322
memcpy(trial_blk_weights, blk_weights, num_pixels);
3323
}
3324
}
3325
else if (opt_mode == cOrdinaryLeastSquares)
3326
{
3327
for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)
3328
{
3329
vec3F l_q16, h_q16;
3330
3331
if (!compute_least_squares_endpoints_rgb(num_pixels, trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16))
3332
break;
3333
3334
bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
3335
l_q16, h_q16,
3336
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
3337
first_submode, last_submode, ignore_clamping);
3338
3339
if (!was_improved)
3340
break;
3341
3342
// It's improved, so let's take the new weight indices.
3343
memcpy(trial_blk_weights, blk_weights, num_pixels);
3344
3345
} // pass
3346
}
3347
else
3348
{
3349
if (h == l)
3350
{
3351
for (uint32_t i = 0; i < num_pixels; i++)
3352
emphasis_weights[i] = 1.0f;
3353
}
3354
else
3355
{
3356
float mid = (0.0f - l) / (h - l);
3357
mid = clamp(mid, .01f, .99f);
3358
3359
float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT;
3360
if (opt_mode == cWeightedLeastSquaresHeavy)
3361
lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY;
3362
3363
for (uint32_t i = 0; i < num_pixels; i++)
3364
{
3365
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
3366
float kd = k.dot(block_axis_q16);
3367
3368
assert((kd >= l) && (kd <= h));
3369
3370
float v = (kd - l) / (h - l);
3371
3372
if (v < mid)
3373
v = lerp(lw, mw, v / mid);
3374
else
3375
v = lerp(mw, hw, (v - mid) * (1.0f - mid));
3376
3377
emphasis_weights[i] = v;
3378
}
3379
3380
#if 0
3381
if (num_pixels == 6 * 6)
3382
{
3383
const float EDGE_WEIGHT = .1f;
3384
for (uint32_t i = 0; i < 6; i++)
3385
{
3386
emphasis_weights[i] += EDGE_WEIGHT;
3387
emphasis_weights[i + 5 * 6] += EDGE_WEIGHT;
3388
emphasis_weights[i * 6] += EDGE_WEIGHT;
3389
emphasis_weights[5 + i * 6] += EDGE_WEIGHT;
3390
}
3391
}
3392
#endif
3393
}
3394
3395
for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)
3396
{
3397
vec3F l_q16, h_q16;
3398
3399
if (!compute_weighted_least_squares_endpoints_rgb(
3400
num_pixels,
3401
trial_blk_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr,
3402
emphasis_weights,
3403
&l_q16, &h_q16,
3404
pBlock_pixels_q16,
3405
color_box_q16))
3406
break;
3407
3408
bool was_improved = try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
3409
l_q16, h_q16,
3410
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
3411
first_submode, last_submode, ignore_clamping);
3412
3413
if (!was_improved)
3414
break;
3415
3416
// It's improved, so let's take the new weight indices.
3417
memcpy(trial_blk_weights, blk_weights, num_pixels);
3418
3419
} // pass
3420
}
3421
3422
if ( (uber_mode) && (ise_weight_range >= astc_helpers::BISE_3_LEVELS) &&
3423
((opt_mode == cOrdinaryLeastSquares) || (opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy)) )
3424
{
3425
// Try varying the current best weight indices. This can be expanded/improved, but at potentially great cost.
3426
3427
uint8_t temp_astc_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3428
memcpy(temp_astc_weights, trial_blk_weights, num_pixels);
3429
3430
uint32_t min_lin_sel = 256, max_lin_sel = 0;
3431
for (uint32_t i = 0; i < num_pixels; i++)
3432
{
3433
const uint32_t astc_sel = temp_astc_weights[i];
3434
3435
const uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
3436
assert(lin_sel < num_weight_levels);
3437
3438
min_lin_sel = minimumu(min_lin_sel, lin_sel);
3439
max_lin_sel = maximumu(max_lin_sel, lin_sel);
3440
}
3441
3442
bool was_improved = false;
3443
(void)was_improved;
3444
3445
{
3446
bool weights_changed = false;
3447
uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3448
for (uint32_t i = 0; i < num_pixels; i++)
3449
{
3450
uint32_t astc_sel = temp_astc_weights[i];
3451
uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
3452
3453
if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))
3454
{
3455
lin_sel++;
3456
weights_changed = true;
3457
}
3458
3459
trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
3460
}
3461
3462
if (weights_changed)
3463
{
3464
vec3F l_q16, h_q16;
3465
3466
bool succeeded;
3467
if (opt_mode == cOrdinaryLeastSquares)
3468
succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
3469
else
3470
succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
3471
3472
if (succeeded)
3473
{
3474
if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
3475
l_q16, h_q16,
3476
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
3477
first_submode, last_submode, ignore_clamping))
3478
{
3479
was_improved = true;
3480
}
3481
}
3482
}
3483
}
3484
3485
{
3486
bool weights_changed = false;
3487
uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3488
3489
for (uint32_t i = 0; i < num_pixels; i++)
3490
{
3491
uint32_t astc_sel = temp_astc_weights[i];
3492
uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
3493
3494
if ((lin_sel == max_lin_sel) && (lin_sel > 0))
3495
{
3496
lin_sel--;
3497
weights_changed = true;
3498
}
3499
3500
trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
3501
}
3502
3503
if (weights_changed)
3504
{
3505
vec3F l_q16, h_q16;
3506
3507
bool succeeded;
3508
if (opt_mode == cOrdinaryLeastSquares)
3509
succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
3510
else
3511
succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
3512
3513
if (succeeded)
3514
{
3515
if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
3516
l_q16, h_q16,
3517
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
3518
first_submode, last_submode, ignore_clamping))
3519
{
3520
was_improved = true;
3521
}
3522
}
3523
}
3524
}
3525
3526
{
3527
bool weights_changed = false;
3528
uint8_t trial_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3529
for (uint32_t i = 0; i < num_pixels; i++)
3530
{
3531
uint32_t astc_sel = temp_astc_weights[i];
3532
uint32_t lin_sel = g_map_astc_to_linear_order[ise_weight_range][astc_sel];
3533
3534
if ((lin_sel == max_lin_sel) && (lin_sel > 0))
3535
{
3536
lin_sel--;
3537
weights_changed = true;
3538
}
3539
else if ((lin_sel == min_lin_sel) && (lin_sel < (num_weight_levels - 1)))
3540
{
3541
lin_sel++;
3542
weights_changed = true;
3543
}
3544
3545
trial_weights[i] = g_map_linear_to_astc_order[ise_weight_range][lin_sel];
3546
}
3547
3548
if (weights_changed)
3549
{
3550
vec3F l_q16, h_q16;
3551
bool succeeded;
3552
if (opt_mode == cOrdinaryLeastSquares)
3553
succeeded = compute_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
3554
else
3555
succeeded = compute_weighted_least_squares_endpoints_rgb(num_pixels, trial_weights, &g_astc_ls_weights_ise[ise_weight_range][0], nullptr, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16);
3556
3557
if (succeeded)
3558
{
3559
if (try_mode11(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
3560
l_q16, h_q16,
3561
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
3562
first_submode, last_submode, ignore_clamping))
3563
{
3564
was_improved = true;
3565
}
3566
}
3567
}
3568
}
3569
3570
} // uber_mode
3571
3572
return cur_block_error;
3573
}
3574
3575
//--------------------------------------------------------------------------------------------------------------------------
3576
3577
double encode_astc_hdr_block_downsampled_mode_11(
3578
uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y,
3579
uint32_t ise_weight_range, uint32_t ise_endpoint_range,
3580
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
3581
double cur_block_error,
3582
int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,
3583
uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode,
3584
const astc_hdr_codec_base_options& coptions,
3585
const encode_astc_block_stats* pBlock_stats)
3586
{
3587
assert((block_x >= 4) && (block_y >= 4) && (block_x <= MAX_ASTC_HDR_BLOCK_W) && (block_y <= MAX_ASTC_HDR_BLOCK_H));
3588
assert((grid_x >= 2) && (grid_y >= 2) && (grid_x <= block_x) && (grid_y <= block_y));
3589
3590
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
3591
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
3592
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
3593
3594
assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode));
3595
assert(last_submode <= MAX_MODE11_SUBMODE_INDEX);
3596
3597
best_submode = 0;
3598
3599
assert(astc_helpers::get_ise_levels(ise_weight_range) <= MAX_SUPPORTED_WEIGHT_LEVELS);
3600
3601
const uint32_t num_weights = grid_x * grid_y;
3602
3603
vec3F block_mean_color_q16, block_axis_q16;
3604
if (!pBlock_stats)
3605
{
3606
block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16);
3607
block_axis_q16 = calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16);
3608
}
3609
else
3610
{
3611
assert(num_pixels == pBlock_stats->m_num_pixels);
3612
block_mean_color_q16 = pBlock_stats->m_mean_q16;
3613
block_axis_q16 = pBlock_stats->m_axis_q16;
3614
}
3615
3616
aabb3F color_box_q16(cInitExpand);
3617
3618
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
3619
vec3F low_color_q16, high_color_q16;
3620
3621
for (uint32_t i = 0; i < num_pixels; i++)
3622
{
3623
color_box_q16.expand(pBlock_pixels_q16[i]);
3624
3625
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
3626
float kd = k.dot(block_axis_q16);
3627
3628
if (kd < l)
3629
{
3630
l = kd;
3631
low_color_q16 = pBlock_pixels_q16[i];
3632
}
3633
3634
if (kd > h)
3635
{
3636
h = kd;
3637
high_color_q16 = pBlock_pixels_q16[i];
3638
}
3639
}
3640
3641
vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);
3642
3643
for (uint32_t i = 0; i < 3; i++)
3644
{
3645
low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);
3646
high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);
3647
}
3648
3649
const uint32_t NUM_PASSES = 3;
3650
for (uint32_t pass = 0; pass < NUM_PASSES; pass++)
3651
{
3652
uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];
3653
uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // at block resolution, not grid res
3654
uint32_t trial_best_submode = 0;
3655
3656
clear_obj(trial_blk_endpoints);
3657
clear_obj(trial_blk_weights);
3658
3659
double trial_blk_error = BIG_FLOAT_VAL;
3660
3661
bool could_pack = try_mode11(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
3662
low_color_q16, high_color_q16,
3663
pBlock_pixels_half, 32, astc_helpers::BISE_32_LEVELS, coptions, false, ise_endpoint_range, false,
3664
first_submode, last_submode, ignore_clamping);
3665
3666
if (!could_pack)
3667
break;
3668
3669
uint8_t trial_downsampled_ise_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3670
3671
downsample_ise_weights(
3672
astc_helpers::BISE_32_LEVELS, ise_weight_range,
3673
block_x, block_y, grid_x, grid_y,
3674
trial_blk_weights, trial_downsampled_ise_weights);
3675
3676
uint8_t trial_downsampled_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3677
dequantize_astc_weights(num_weights, trial_downsampled_ise_weights, ise_weight_range, trial_downsampled_raw_weights);
3678
3679
uint8_t trial_upsampled_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
3680
astc_helpers::upsample_weight_grid(block_x, block_y, grid_x, grid_y, trial_downsampled_raw_weights, trial_upsampled_raw_weights);
3681
3682
//------
3683
3684
int trial_e[2][3];
3685
if (!decode_mode11_to_qlog12(trial_blk_endpoints, trial_e, ise_endpoint_range))
3686
return cur_block_error;
3687
3688
double trial_error = compute_block_error_from_raw_weights(num_pixels, pBlock_pixels_half, trial_upsampled_raw_weights, trial_e, coptions);
3689
3690
if (trial_error < cur_block_error)
3691
{
3692
cur_block_error = trial_error;
3693
memcpy(pBlk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
3694
memcpy(pBlk_weights, trial_downsampled_ise_weights, num_weights);
3695
best_submode = trial_best_submode;
3696
}
3697
else if (pass)
3698
break;
3699
3700
if ((opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy))
3701
{
3702
float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3703
if (h == l)
3704
{
3705
for (uint32_t i = 0; i < num_pixels; i++)
3706
emphasis_weights[i] = 1.0f;
3707
}
3708
else
3709
{
3710
float mid = (0.0f - l) / (h - l);
3711
mid = clamp(mid, .01f, .99f);
3712
3713
float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT;
3714
if (opt_mode == cWeightedLeastSquaresHeavy)
3715
lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY;
3716
3717
for (uint32_t i = 0; i < num_pixels; i++)
3718
{
3719
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
3720
float kd = k.dot(block_axis_q16);
3721
3722
assert((kd >= l) && (kd <= h));
3723
3724
float v = (kd - l) / (h - l);
3725
3726
if (v < mid)
3727
v = lerp(lw, mw, v / mid);
3728
else
3729
v = lerp(mw, hw, (v - mid) * (1.0f - mid));
3730
3731
emphasis_weights[i] = v;
3732
}
3733
}
3734
3735
float trial_upsampled_raw_weightsf[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3736
for (uint32_t i = 0; i < num_pixels; i++)
3737
trial_upsampled_raw_weightsf[i] = (float)trial_upsampled_raw_weights[i] * (1.0f / 64.0f);
3738
3739
if (!compute_weighted_least_squares_endpoints_rgb(num_pixels, nullptr, nullptr, trial_upsampled_raw_weightsf, emphasis_weights, &low_color_q16, &high_color_q16, pBlock_pixels_q16, color_box_q16))
3740
return false;
3741
}
3742
else
3743
{
3744
if (!compute_least_squares_endpoints_rgb_raw_weights(num_pixels, trial_upsampled_raw_weights, &low_color_q16, &high_color_q16, pBlock_pixels_q16, color_box_q16))
3745
break;
3746
}
3747
3748
bool pack_succeeded = pack_mode11(low_color_q16, high_color_q16, ise_endpoint_range, trial_blk_endpoints, coptions, false, first_submode, last_submode, false, trial_best_submode);
3749
if (!pack_succeeded)
3750
break;
3751
3752
if (!decode_mode11_to_qlog12(trial_blk_endpoints, trial_e, ise_endpoint_range))
3753
break;
3754
3755
trial_error = compute_block_error_from_raw_weights(num_pixels, pBlock_pixels_half, trial_upsampled_raw_weights, trial_e, coptions);
3756
3757
if (trial_error < cur_block_error)
3758
{
3759
cur_block_error = trial_error;
3760
memcpy(pBlk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
3761
memcpy(pBlk_weights, trial_downsampled_ise_weights, num_weights);
3762
best_submode = trial_best_submode;
3763
}
3764
else
3765
{
3766
break;
3767
}
3768
3769
} // pass
3770
3771
return cur_block_error;
3772
}
3773
3774
//--------------------------------------------------------------------------------------------------------------------------
3775
3776
double encode_astc_hdr_block_mode_11_dual_plane(
3777
uint32_t num_pixels,
3778
const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
3779
uint32_t channel_index, // 0-2
3780
uint32_t ise_weight_range,
3781
uint32_t& best_submode,
3782
double cur_block_error,
3783
uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1,
3784
const astc_hdr_codec_base_options& coptions,
3785
bool direct_only,
3786
uint32_t ise_endpoint_range,
3787
bool uber_mode,
3788
bool constrain_ise_weight_selectors,
3789
int32_t first_submode, int32_t last_submode, bool ignore_clamping)
3790
{
3791
(void)uber_mode;
3792
3793
assert(channel_index <= 2);
3794
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
3795
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
3796
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
3797
3798
assert((first_submode >= FIRST_MODE11_SUBMODE_INDEX) && (first_submode <= last_submode));
3799
assert(last_submode <= MAX_MODE11_SUBMODE_INDEX);
3800
3801
assert(num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS);
3802
3803
best_submode = 0;
3804
3805
const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
3806
assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS);
3807
3808
vec4F temp_block_pixels_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3809
for (uint32_t i = 0; i < num_pixels; i++)
3810
{
3811
temp_block_pixels_q16[i] = pBlock_pixels_q16[i];
3812
temp_block_pixels_q16[i][channel_index] = 0.0f;
3813
}
3814
3815
vec3F block_mean_color_q16(calc_mean(num_pixels, temp_block_pixels_q16));
3816
vec3F block_axis_q16(calc_rgb_pca(num_pixels, temp_block_pixels_q16, block_mean_color_q16));
3817
3818
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
3819
vec3F low_color_q16, high_color_q16;
3820
3821
aabb3F color_box_q16(cInitExpand);
3822
3823
for (uint32_t i = 0; i < num_pixels; i++)
3824
{
3825
color_box_q16.expand(pBlock_pixels_q16[i]);
3826
3827
vec3F k(vec3F(temp_block_pixels_q16[i]) - block_mean_color_q16);
3828
float kd = k.dot(block_axis_q16);
3829
3830
if (kd < l)
3831
{
3832
l = kd;
3833
low_color_q16 = pBlock_pixels_q16[i];
3834
}
3835
3836
if (kd > h)
3837
{
3838
h = kd;
3839
high_color_q16 = pBlock_pixels_q16[i];
3840
}
3841
}
3842
3843
low_color_q16[channel_index] = 0.0f;
3844
high_color_q16[channel_index] = 0.0f;
3845
3846
float a = low_color_q16.dot(vec3F(1.0f)), b = high_color_q16.dot(vec3F(1.0f));
3847
if (a <= b)
3848
{
3849
low_color_q16[channel_index] = color_box_q16.get_low()[channel_index];
3850
high_color_q16[channel_index] = color_box_q16.get_high()[channel_index];
3851
}
3852
else
3853
{
3854
high_color_q16[channel_index] = color_box_q16.get_low()[channel_index];
3855
low_color_q16[channel_index] = color_box_q16.get_high()[channel_index];
3856
}
3857
3858
vec3F old_low_color_q16(low_color_q16), old_high_color_q16(high_color_q16);
3859
for (uint32_t i = 0; i < 3; i++)
3860
{
3861
low_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 1.0f / 64.0f);
3862
high_color_q16[i] = lerp<float>(old_low_color_q16[i], old_high_color_q16[i], 63.0f / 64.0f);
3863
}
3864
3865
uint8_t trial_blk_endpoints[NUM_MODE11_ENDPOINTS];
3866
uint8_t trial_blk_weights0[MAX_ASTC_HDR_ENC_BLOCK_PIXELS], trial_blk_weights1[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3867
uint32_t trial_best_submode = 0;
3868
3869
clear_obj(trial_blk_endpoints);
3870
clear_obj(trial_blk_weights0);
3871
clear_obj(trial_blk_weights1);
3872
3873
double trial_blk_error = BIG_FLOAT_VAL;
3874
3875
bool did_improve = try_mode11_dual_plane(channel_index, num_pixels, trial_blk_endpoints, trial_blk_weights0, trial_blk_weights1, trial_blk_error, trial_best_submode,
3876
low_color_q16, high_color_q16,
3877
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
3878
first_submode, last_submode, ignore_clamping);
3879
3880
// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
3881
if (!did_improve)
3882
return cur_block_error;
3883
3884
// Did the solution improve?
3885
if (trial_blk_error < cur_block_error)
3886
{
3887
cur_block_error = trial_blk_error;
3888
memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE11_ENDPOINTS);
3889
memcpy(blk_weights0, trial_blk_weights0, num_pixels);
3890
memcpy(blk_weights1, trial_blk_weights1, num_pixels);
3891
best_submode = trial_best_submode;
3892
}
3893
3894
const uint32_t chan0 = (channel_index + 1) % 3, chan1 = (channel_index + 2) % 3;
3895
3896
vec2F plane0_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3897
aabb2F plane0_bounds;
3898
plane0_bounds[0].set(color_box_q16.get_low()[chan0], color_box_q16.get_low()[chan1]);
3899
plane0_bounds[1].set(color_box_q16.get_high()[chan0], color_box_q16.get_high()[chan1]);
3900
3901
vec1F plane1_q16[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
3902
aabb1F plane1_bounds;
3903
plane1_bounds[0].set(color_box_q16.get_low()[channel_index]);
3904
plane1_bounds[1].set(color_box_q16.get_high()[channel_index]);
3905
3906
for (uint32_t i = 0; i < num_pixels; i++)
3907
{
3908
plane0_q16[i][0] = pBlock_pixels_q16[i][chan0];
3909
plane0_q16[i][1] = pBlock_pixels_q16[i][chan1];
3910
3911
plane1_q16[i][0] = pBlock_pixels_q16[i][channel_index];
3912
}
3913
3914
const uint32_t NUM_LS_PASSES = 3;
3915
3916
for (uint32_t pass = 0; pass < NUM_LS_PASSES; pass++)
3917
{
3918
vec2F l0_q16, h0_q16;
3919
if (!compute_least_squares_endpoints_2D(num_pixels, trial_blk_weights0, &g_astc_ls_weights_ise[ise_weight_range][0], &l0_q16, &h0_q16, plane0_q16, plane0_bounds))
3920
break;
3921
3922
vec1F l1_q16, h1_q16;
3923
if (!compute_least_squares_endpoints_1D(num_pixels, trial_blk_weights1, &g_astc_ls_weights_ise[ise_weight_range][0], &l1_q16, &h1_q16, plane1_q16, plane1_bounds))
3924
break;
3925
3926
vec3F l_q16, h_q16;
3927
3928
l_q16[channel_index] = l1_q16[0];
3929
h_q16[channel_index] = h1_q16[0];
3930
3931
l_q16[chan0] = l0_q16[0];
3932
h_q16[chan0] = h0_q16[0];
3933
3934
l_q16[chan1] = l0_q16[1];
3935
h_q16[chan1] = h0_q16[1];
3936
3937
bool was_improved = try_mode11_dual_plane(channel_index, num_pixels, blk_endpoints, blk_weights0, blk_weights1, cur_block_error, best_submode,
3938
l_q16, h_q16,
3939
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, direct_only, ise_endpoint_range, constrain_ise_weight_selectors,
3940
first_submode, last_submode, ignore_clamping);
3941
3942
if (!was_improved)
3943
break;
3944
3945
// It's improved, so let's take the new weight indices.
3946
memcpy(trial_blk_weights0, blk_weights0, num_pixels);
3947
memcpy(trial_blk_weights1, blk_weights1, num_pixels);
3948
3949
} // pass
3950
3951
return cur_block_error;
3952
}
3953
3954
//--------------------------------------------------------------------------------------------------------------------------
3955
3956
double encode_astc_hdr_block_mode_7(
3957
uint32_t num_pixels,
3958
const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
3959
uint32_t ise_weight_range,
3960
uint32_t& best_submode,
3961
double cur_block_error,
3962
uint8_t* blk_endpoints, //[4]
3963
uint8_t* blk_weights, // [num_pixels]
3964
const astc_hdr_codec_base_options& coptions,
3965
uint32_t ise_endpoint_range,
3966
int first_submode, int last_submode,
3967
const encode_astc_block_stats* pBlock_stats)
3968
{
3969
assert((num_pixels >= 1) && (num_pixels <= MAX_ASTC_HDR_ENC_BLOCK_PIXELS));
3970
assert((ise_weight_range >= MIN_SUPPORTED_ISE_WEIGHT_INDEX) && (ise_weight_range <= MAX_SUPPORTED_ISE_WEIGHT_INDEX));
3971
assert((ise_endpoint_range >= astc_helpers::FIRST_VALID_ENDPOINT_ISE_RANGE) && (ise_endpoint_range <= astc_helpers::LAST_VALID_ENDPOINT_ISE_RANGE));
3972
3973
const uint32_t num_weight_levels = astc_helpers::get_ise_levels(ise_weight_range);
3974
assert(num_weight_levels <= MAX_SUPPORTED_WEIGHT_LEVELS);
3975
3976
best_submode = 0;
3977
3978
vec3F block_mean_color_q16;
3979
if (!pBlock_stats)
3980
block_mean_color_q16 = calc_mean(num_pixels, pBlock_pixels_q16);
3981
else
3982
{
3983
assert(num_pixels == pBlock_stats->m_num_pixels);
3984
block_mean_color_q16 = pBlock_stats->m_mean_q16;
3985
}
3986
3987
vec3F block_axis_q16(0.577350259f);
3988
3989
aabb3F color_box_q16(cInitExpand);
3990
3991
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
3992
for (uint32_t i = 0; i < num_pixels; i++)
3993
{
3994
color_box_q16.expand(pBlock_pixels_q16[i]);
3995
3996
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
3997
float kd = k.dot(block_axis_q16);
3998
3999
l = basisu::minimum<float>(l, kd);
4000
h = basisu::maximum<float>(h, kd);
4001
}
4002
4003
vec3F low_color_q16(interp_color(block_mean_color_q16, block_axis_q16, l, color_box_q16, color_box_q16));
4004
vec3F high_color_q16(interp_color(block_mean_color_q16, block_axis_q16, h, color_box_q16, color_box_q16));
4005
4006
low_color_q16.clamp(0.0f, MAX_QLOG16_VAL);
4007
high_color_q16.clamp(0.0f, MAX_QLOG16_VAL);
4008
4009
vec3F diff(high_color_q16 - low_color_q16);
4010
4011
// The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0,
4012
// i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259).
4013
float s_q16 = diff.dot(block_axis_q16) * block_axis_q16[0];
4014
4015
uint8_t trial_blk_endpoints[NUM_MODE7_ENDPOINTS];
4016
uint8_t trial_blk_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
4017
uint32_t trial_best_submode = 0;
4018
4019
clear_obj(trial_blk_endpoints);
4020
clear_obj(trial_blk_weights);
4021
4022
double trial_blk_error = BIG_FLOAT_VAL;
4023
4024
bool did_improve = try_mode7(num_pixels, trial_blk_endpoints, trial_blk_weights, trial_blk_error, trial_best_submode,
4025
high_color_q16, ceilf(s_q16),
4026
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode);
4027
4028
// If we couldn't find ANY usable solution due to endpoint quantization, just return. There's nothing we can do.
4029
if (!did_improve)
4030
{
4031
return cur_block_error;
4032
}
4033
4034
// Did the solution improve?
4035
if (trial_blk_error < cur_block_error)
4036
{
4037
cur_block_error = trial_blk_error;
4038
memcpy(blk_endpoints, trial_blk_endpoints, NUM_MODE7_ENDPOINTS);
4039
memcpy(blk_weights, trial_blk_weights, num_pixels);
4040
best_submode = trial_best_submode;
4041
}
4042
4043
#if 1
4044
{
4045
//const float TL = 8830.0f;// (float)half_to_qlog16(float_to_half(0.00061f));
4046
//const float TH = 41600.0f;// (float)half_to_qlog16(float_to_half(40.0f));
4047
//float zl = minimum<float>(color_box_q16[0][0], color_box_q16[0][1], color_box_q16[0][2]);
4048
//float zh = minimum<float>(color_box_q16[1][0], color_box_q16[1][1], color_box_q16[1][2]);
4049
4050
//if ((zl <= TL) && (zh >= TH))
4051
{
4052
// Try a simpler technique for artifact reduction
4053
l = BIG_FLOAT_VAL;
4054
h = -BIG_FLOAT_VAL;
4055
4056
vec3F alt_low_color_q16(0.0f), alt_high_color_q16(0.0f);
4057
for (uint32_t i = 0; i < num_pixels; i++)
4058
{
4059
color_box_q16.expand(pBlock_pixels_q16[i]);
4060
4061
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
4062
float kd = k.dot(block_axis_q16);
4063
4064
if (kd < l)
4065
{
4066
alt_low_color_q16 = pBlock_pixels_q16[i];
4067
l = kd;
4068
}
4069
4070
if (kd > h)
4071
{
4072
alt_high_color_q16 = pBlock_pixels_q16[i];
4073
h = kd;
4074
}
4075
}
4076
4077
vec3F old_alt_low_color_q16(alt_low_color_q16);
4078
4079
for (uint32_t i = 0; i < 3; i++)
4080
alt_low_color_q16[i] = lerp<float>(old_alt_low_color_q16[i], alt_high_color_q16[i], 1.0f / 64.0f);
4081
4082
vec3F alt_diff(alt_high_color_q16 - alt_low_color_q16);
4083
4084
// The mul here (* block_axis_q16[0]) is because the "S" or scale value is subtracted from the high color with a scale of 1.0,
4085
// i.e. it's equivalent to a vector of (1,1,1) multiplied by scale before the sub. We want to actually move along the grayscale axis, or (0.577350259, 0.577350259, 0.577350259).
4086
float alt_s_q16 = alt_diff.dot(block_axis_q16) * block_axis_q16[0];
4087
4088
try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
4089
alt_high_color_q16, ceilf(alt_s_q16),
4090
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode);
4091
}
4092
}
4093
#endif
4094
4095
const float one_over_num_pixels = 1.0f / (float)num_pixels;
4096
4097
const uint32_t NUM_TRIALS = 2;
4098
for (uint32_t trial = 0; trial < NUM_TRIALS; trial++)
4099
{
4100
// Given a set of selectors and S, try to compute a better high color
4101
vec3F new_high_color_q16(block_mean_color_q16);
4102
4103
int e[2][3];
4104
int cur_s = 0;
4105
if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, &cur_s, ise_endpoint_range))
4106
break;
4107
4108
cur_s <<= 4;
4109
4110
for (uint32_t i = 0; i < num_pixels; i++)
4111
{
4112
uint32_t astc_sel = trial_blk_weights[i];
4113
float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);
4114
4115
float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels;
4116
new_high_color_q16[0] += k;
4117
new_high_color_q16[1] += k;
4118
new_high_color_q16[2] += k;
4119
}
4120
4121
bool improved = try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
4122
new_high_color_q16, (float)cur_s,
4123
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode);
4124
4125
if (improved)
4126
{
4127
memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);
4128
memcpy(trial_blk_weights, blk_weights, num_pixels);
4129
}
4130
4131
// Given a set of selectors and a high color, try to compute a better S.
4132
float t = 0.0f;
4133
4134
for (uint32_t i = 0; i < num_pixels; i++)
4135
{
4136
uint32_t astc_sel = trial_blk_weights[i];
4137
float lerp = g_ise_weight_lerps[ise_weight_range][astc_sel + 1] * (1.0f / 64.0f);
4138
4139
t += (1.0f) - lerp;
4140
}
4141
4142
t *= one_over_num_pixels;
4143
4144
//int e[2][3];
4145
if (!decode_mode7_to_qlog12(trial_blk_endpoints, e, nullptr, ise_endpoint_range))
4146
break;
4147
4148
vec3F cur_h_q16((float)(e[1][0] << 4), (float)(e[1][1] << 4), (float)(e[1][2] << 4));
4149
4150
if (fabs(t) > .0000125f)
4151
{
4152
float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t;
4153
float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t;
4154
float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t;
4155
4156
// TODO: gather statistics on these
4157
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
4158
cur_h_q16, ceilf(s_r),
4159
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
4160
{
4161
improved = true;
4162
}
4163
4164
if (coptions.m_mode7_full_s_optimization)
4165
{
4166
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
4167
cur_h_q16, ceilf(s_g),
4168
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
4169
{
4170
improved = true;
4171
}
4172
4173
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
4174
cur_h_q16, ceilf(s_b),
4175
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
4176
{
4177
improved = true;
4178
}
4179
4180
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
4181
cur_h_q16, ceilf((s_r + s_g + s_b) / 3.0f),
4182
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
4183
{
4184
improved = true;
4185
}
4186
4187
// Added this - quite strong.
4188
if (try_mode7(num_pixels, blk_endpoints, blk_weights, cur_block_error, best_submode,
4189
cur_h_q16, minimum(maximum(s_r, s_g, s_b) * 1.1f, 65535.0f),
4190
pBlock_pixels_half, num_weight_levels, ise_weight_range, coptions, ise_endpoint_range, first_submode, last_submode))
4191
{
4192
improved = true;
4193
}
4194
} // if (coptions.m_mode7_full_s_optimization)
4195
4196
} // if (fabs(t) > .0000125f)
4197
4198
if (!improved)
4199
break;
4200
4201
memcpy(trial_blk_endpoints, blk_endpoints, NUM_MODE7_ENDPOINTS);
4202
memcpy(trial_blk_weights, blk_weights, num_pixels);
4203
4204
} // trial
4205
4206
return cur_block_error;
4207
}
4208
4209
//--------------------------------------------------------------------------------------------------------------------------
4210
4211
void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights)
4212
{
4213
const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(from_ise_range).m_ISE_to_val;
4214
4215
for (uint32_t i = 0; i < n; i++)
4216
pDst_raw_weights[i] = dequant_tab[pSrc_ise_vals[i]];
4217
}
4218
4219
//--------------------------------------------------------------------------------------------------------------------------
4220
4221
// For each output (2x2) sample, the weight of each input (6x6) sample.
4222
static const float g_weight_downsample_6x6_to_2x2[4][36] = {
4223
{0.165438f, 0.132609f, 0.092681f, 0.028953f, 0.000000f, 0.000000f, 0.133716f, 0.111240f, 0.065133f, 0.022236f, 0.000000f, 0.000000f, 0.092623f, 0.063898f, 0.039120f, 0.000000f, 0.000000f, 0.000000f, 0.028168f, 0.024184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4224
{0.000000f, 0.000000f, 0.027262f, 0.091051f, 0.132446f, 0.164791f, 0.000000f, 0.000000f, 0.026038f, 0.066511f, 0.111644f, 0.133197f, 0.000000f, 0.000000f, 0.000000f, 0.040053f, 0.064757f, 0.091196f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024265f, 0.026789f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4225
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028282f, 0.024804f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092871f, 0.066580f, 0.042024f, 0.000000f, 0.000000f, 0.000000f, 0.132115f, 0.107586f, 0.061943f, 0.025551f, 0.000000f, 0.000000f, 0.166111f, 0.132946f, 0.089043f, 0.030145f, 0.000000f, 0.000000f},
4226
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024535f, 0.028835f, 0.000000f, 0.000000f, 0.000000f, 0.044465f, 0.063652f, 0.093251f, 0.000000f, 0.000000f, 0.025961f, 0.063339f, 0.107329f, 0.132240f, 0.000000f, 0.000000f, 0.029844f, 0.089249f, 0.132200f, 0.165099f},
4227
};
4228
4229
// For each output (3x2) sample, the weight of each input (6x6) sample.
4230
static const float g_weight_downsample_6x6_to_3x2[6][36] = {
4231
{0.257933f, 0.144768f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.213754f, 0.109376f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.140969f, 0.064128f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041270f, 0.027803f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4232
{0.000000f, 0.046066f, 0.153691f, 0.153395f, 0.042845f, 0.000000f, 0.000000f, 0.038497f, 0.131674f, 0.126804f, 0.041513f, 0.000000f, 0.000000f, 0.028434f, 0.081152f, 0.075499f, 0.025372f, 0.000000f, 0.000000f, 0.000000f, 0.030067f, 0.024989f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4233
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.147088f, 0.258980f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105549f, 0.211746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066714f, 0.144015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027755f, 0.038152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4234
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044268f, 0.030990f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.141642f, 0.069930f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207393f, 0.105354f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.255911f, 0.144511f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4235
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026658f, 0.032535f, 0.000000f, 0.000000f, 0.000000f, 0.024618f, 0.079487f, 0.080415f, 0.026311f, 0.000000f, 0.000000f, 0.038382f, 0.133569f, 0.133162f, 0.033451f, 0.000000f, 0.000000f, 0.043697f, 0.152483f, 0.154345f, 0.040885f, 0.000000f},
4236
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026401f, 0.040228f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066688f, 0.142350f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.108504f, 0.210286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.149666f, 0.255876f},
4237
};
4238
4239
// For each output (4x2) sample, the weight of each input (6x6) sample.
4240
static const float g_weight_downsample_6x6_to_4x2[8][36] = {
4241
{0.318857f, 0.081413f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.262816f, 0.064811f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.175211f, 0.046152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050740f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4242
{0.000000f, 0.163830f, 0.223661f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128904f, 0.194332f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080369f, 0.121162f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041941f, 0.045801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4243
{0.000000f, 0.000000f, 0.000000f, 0.230801f, 0.166220f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193495f, 0.136548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113816f, 0.085890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043771f, 0.029459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4244
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.087528f, 0.318213f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059739f, 0.262039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046515f, 0.175973f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4245
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.054078f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173243f, 0.055145f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.254561f, 0.059695f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319463f, 0.083816f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4246
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038171f, 0.037447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.076263f, 0.117360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134218f, 0.202503f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163759f, 0.230278f, 0.000000f, 0.000000f, 0.000000f},
4247
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044607f, 0.035170f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.114466f, 0.088407f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201026f, 0.127983f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.224148f, 0.164194f, 0.000000f},
4248
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052817f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043531f, 0.174390f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.060164f, 0.262636f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.089340f, 0.317122f},
4249
};
4250
4251
// For each output (5x2) sample, the weight of each input (6x6) sample.
4252
static const float g_weight_downsample_6x6_to_5x2[10][36] = {
4253
{0.393855f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.327491f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.216089f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062565f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4254
{0.000000f, 0.303101f, 0.078223f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261199f, 0.068761f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.160056f, 0.054634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074026f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4255
{0.000000f, 0.000000f, 0.202529f, 0.207447f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.151013f, 0.157673f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100074f, 0.095239f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043623f, 0.042402f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4256
{0.000000f, 0.000000f, 0.000000f, 0.083336f, 0.309647f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061432f, 0.269582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046328f, 0.166035f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063640f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4257
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397684f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217856f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.058282f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4258
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065541f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215996f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321124f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397338f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4259
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.069030f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.159434f, 0.051902f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.266327f, 0.065732f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.305627f, 0.081948f, 0.000000f, 0.000000f, 0.000000f},
4260
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.038550f, 0.046259f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092606f, 0.100038f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.162523f, 0.163345f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199767f, 0.196912f, 0.000000f, 0.000000f},
4261
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066709f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.050841f, 0.169003f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061591f, 0.265094f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.081426f, 0.305335f, 0.000000f},
4262
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063517f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316133f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027674f, 0.381781f},
4263
};
4264
4265
// For each output (6x2) sample, the weight of each input (6x6) sample.
4266
static const float g_weight_downsample_6x6_to_6x2[12][36] = {
4267
{0.395563f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.328397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214936f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061104f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4268
{0.000000f, 0.395041f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.323513f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073360f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4269
{0.000000f, 0.000000f, 0.393200f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317339f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.218679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4270
{0.000000f, 0.000000f, 0.000000f, 0.399071f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321356f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214689f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.064883f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4271
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.399159f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326009f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212426f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062406f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4272
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398973f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.217446f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057071f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4273
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065386f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215039f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321113f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4274
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.211515f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.319185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397066f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4275
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.053184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.213286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.332634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400895f, 0.000000f, 0.000000f, 0.000000f},
4276
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.207210f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.334096f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395193f, 0.000000f, 0.000000f},
4277
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.074315f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.216723f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320827f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388135f, 0.000000f},
4278
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063571f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215814f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325843f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394772f},
4279
};
4280
4281
// For each output (2x3) sample, the weight of each input (6x6) sample.
4282
static const float g_weight_downsample_6x6_to_2x3[6][36] = {
4283
{0.253933f, 0.211745f, 0.142964f, 0.043509f, 0.000000f, 0.000000f, 0.146094f, 0.108119f, 0.068727f, 0.024908f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4284
{0.000000f, 0.000000f, 0.043336f, 0.140540f, 0.208745f, 0.253069f, 0.000000f, 0.000000f, 0.031333f, 0.069242f, 0.108596f, 0.145138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4285
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044780f, 0.036916f, 0.026808f, 0.000000f, 0.000000f, 0.000000f, 0.151455f, 0.129189f, 0.076266f, 0.030885f, 0.000000f, 0.000000f, 0.151915f, 0.131628f, 0.081598f, 0.031903f, 0.000000f, 0.000000f, 0.043838f, 0.032645f, 0.030173f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4286
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028998f, 0.038454f, 0.046460f, 0.000000f, 0.000000f, 0.033717f, 0.076274f, 0.130140f, 0.153377f, 0.000000f, 0.000000f, 0.025762f, 0.077843f, 0.130195f, 0.150217f, 0.000000f, 0.000000f, 0.000000f, 0.029422f, 0.034493f, 0.044648f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4287
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.145243f, 0.107655f, 0.062280f, 0.033041f, 0.000000f, 0.000000f, 0.257369f, 0.210260f, 0.139667f, 0.044485f, 0.000000f, 0.000000f},
4288
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.037604f, 0.064104f, 0.105759f, 0.144848f, 0.000000f, 0.000000f, 0.042699f, 0.141511f, 0.207704f, 0.255772f},
4289
};
4290
4291
// For each output (3x3) sample, the weight of each input (6x6) sample.
4292
static const float g_weight_downsample_6x6_to_3x3[9][36] = {
4293
{0.412913f, 0.237773f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237370f, 0.111944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4294
{0.000000f, 0.066531f, 0.251421f, 0.245639f, 0.065785f, 0.000000f, 0.000000f, 0.047059f, 0.143642f, 0.128760f, 0.051164f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4295
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.234587f, 0.419421f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.110765f, 0.235227f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4296
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067391f, 0.044131f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.248992f, 0.133218f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.247568f, 0.139987f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072238f, 0.046475f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4297
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.040674f, 0.048555f, 0.000000f, 0.000000f, 0.000000f, 0.049640f, 0.158199f, 0.158521f, 0.046044f, 0.000000f, 0.000000f, 0.043591f, 0.153956f, 0.155258f, 0.049378f, 0.000000f, 0.000000f, 0.000000f, 0.046674f, 0.049509f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4298
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049528f, 0.063611f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.137662f, 0.252612f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134924f, 0.246668f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.042655f, 0.072341f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4299
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.237403f, 0.114850f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.418506f, 0.229241f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4300
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.049009f, 0.142093f, 0.136891f, 0.036294f, 0.000000f, 0.000000f, 0.074433f, 0.244437f, 0.251631f, 0.065212f, 0.000000f},
4301
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.121166f, 0.231108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.236230f, 0.411495f},
4302
};
4303
4304
// For each output (4x3) sample, the weight of each input (6x6) sample.
4305
static const float g_weight_downsample_6x6_to_4x3[12][36] = {
4306
{0.508292f, 0.132529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.285382f, 0.073798f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4307
{0.000000f, 0.266624f, 0.378457f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.144380f, 0.210539f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4308
{0.000000f, 0.000000f, 0.000000f, 0.380292f, 0.270590f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200825f, 0.148293f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4309
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.130560f, 0.507542f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.071578f, 0.290320f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4310
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094051f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322294f, 0.082665f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316365f, 0.092271f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092353f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4311
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.046081f, 0.061377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.158151f, 0.235006f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.152896f, 0.232594f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052844f, 0.061053f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4312
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.061619f, 0.046867f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.227763f, 0.158202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.222620f, 0.155545f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.073398f, 0.053986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4313
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.084098f, 0.330283f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085224f, 0.323658f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094451f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4314
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.286413f, 0.077046f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.512915f, 0.123625f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4315
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.140389f, 0.213324f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.267125f, 0.379163f, 0.000000f, 0.000000f, 0.000000f},
4316
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208464f, 0.139969f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382876f, 0.268691f, 0.000000f},
4317
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080416f, 0.285653f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.131803f, 0.502128f},
4318
};
4319
4320
// For each output (5x3) sample, the weight of each input (6x6) sample.
4321
static const float g_weight_downsample_6x6_to_5x3[15][36] = {
4322
{0.618662f, 0.032137f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.349200f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4323
{0.000000f, 0.497060f, 0.129255f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.281642f, 0.092043f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4324
{0.000000f, 0.000000f, 0.333166f, 0.338337f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164333f, 0.164165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4325
{0.000000f, 0.000000f, 0.000000f, 0.129409f, 0.504176f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085525f, 0.280890f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4326
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.636943f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.363057f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4327
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113467f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394204f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386741f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105588f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4328
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.317750f, 0.095763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.321008f, 0.086368f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.092185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4329
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.057696f, 0.061462f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.184995f, 0.197656f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.186342f, 0.186715f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059712f, 0.065422f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4330
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.091939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079906f, 0.328876f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085955f, 0.320229f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093096f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4331
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.099585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398489f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388782f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.113144f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4332
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360655f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.639345f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4333
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.285578f, 0.088663f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.495946f, 0.129812f, 0.000000f, 0.000000f, 0.000000f},
4334
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177513f, 0.166195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.329950f, 0.326342f, 0.000000f, 0.000000f},
4335
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082692f, 0.279744f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.134353f, 0.503211f, 0.000000f},
4336
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.638822f},
4337
};
4338
4339
// For each output (6x3) sample, the weight of each input (6x6) sample.
4340
static const float g_weight_downsample_6x6_to_6x3[18][36] = {
4341
{0.640623f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359377f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4342
{0.000000f, 0.638697f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361303f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4343
{0.000000f, 0.000000f, 0.640672f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359328f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4344
{0.000000f, 0.000000f, 0.000000f, 0.637721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.362279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4345
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.647342f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.352658f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4346
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.638418f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4347
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111041f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395972f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387932f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.105054f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4348
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101949f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.401263f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.101060f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4349
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098132f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.388180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402030f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111659f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4350
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.096173f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.386312f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.123650f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4351
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104357f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398062f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393265f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.104316f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4352
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097666f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400772f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111166f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4353
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359466f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640534f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4354
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360569f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.639431f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4355
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.355750f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.644250f, 0.000000f, 0.000000f, 0.000000f},
4356
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.353865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646135f, 0.000000f, 0.000000f},
4357
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.642273f, 0.000000f},
4358
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359539f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640461f},
4359
};
4360
4361
// For each output (2x4) sample, the weight of each input (6x6) sample.
4362
static const float g_weight_downsample_6x6_to_2x4[8][36] = {
4363
{0.312206f, 0.261492f, 0.177496f, 0.055798f, 0.000000f, 0.000000f, 0.081944f, 0.062361f, 0.048703f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4364
{0.000000f, 0.000000f, 0.054679f, 0.172805f, 0.260561f, 0.314742f, 0.000000f, 0.000000f, 0.000000f, 0.049040f, 0.065652f, 0.082520f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4365
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164115f, 0.129589f, 0.083879f, 0.029309f, 0.000000f, 0.000000f, 0.231202f, 0.198851f, 0.118719f, 0.044334f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4366
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035855f, 0.083276f, 0.127764f, 0.166965f, 0.000000f, 0.000000f, 0.045347f, 0.116503f, 0.193645f, 0.230645f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4367
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.223790f, 0.194804f, 0.115855f, 0.047371f, 0.000000f, 0.000000f, 0.164616f, 0.125798f, 0.087268f, 0.040497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4368
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.044738f, 0.118365f, 0.198854f, 0.230745f, 0.000000f, 0.000000f, 0.029646f, 0.078141f, 0.131405f, 0.168106f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4369
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.080206f, 0.060505f, 0.041197f, 0.000000f, 0.000000f, 0.000000f, 0.320486f, 0.265233f, 0.174992f, 0.057380f, 0.000000f, 0.000000f},
4370
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.051057f, 0.058139f, 0.082120f, 0.000000f, 0.000000f, 0.056168f, 0.174118f, 0.260525f, 0.317873f},
4371
};
4372
4373
// For each output (3x4) sample, the weight of each input (6x6) sample.
4374
static const float g_weight_downsample_6x6_to_3x4[12][36] = {
4375
{0.503381f, 0.288537f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.130806f, 0.077275f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4376
{0.000000f, 0.088808f, 0.319226f, 0.312498f, 0.086797f, 0.000000f, 0.000000f, 0.000000f, 0.092065f, 0.079421f, 0.021185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4377
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.286250f, 0.514036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.072999f, 0.126714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4378
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.261935f, 0.133191f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.376226f, 0.207118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021529f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4379
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059585f, 0.153016f, 0.152552f, 0.043373f, 0.000000f, 0.000000f, 0.063990f, 0.231504f, 0.235283f, 0.060696f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4380
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.146403f, 0.262394f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.208547f, 0.382656f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4381
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.374676f, 0.209306f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.270440f, 0.145577f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4382
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.059636f, 0.233975f, 0.235944f, 0.069029f, 0.000000f, 0.000000f, 0.048950f, 0.150198f, 0.154340f, 0.047929f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4383
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200921f, 0.380881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.146928f, 0.271271f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4384
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128883f, 0.075468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.509859f, 0.285791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4385
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095842f, 0.086878f, 0.000000f, 0.000000f, 0.000000f, 0.092942f, 0.314169f, 0.319263f, 0.090906f, 0.000000f},
4386
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079652f, 0.124852f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.289868f, 0.505628f},
4387
};
4388
4389
// For each output (4x4) sample, the weight of each input (6x6) sample.
4390
static const float g_weight_downsample_6x6_to_4x4[16][36] = {
4391
{0.665277f, 0.167914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166809f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4392
{0.000000f, 0.325854f, 0.449938f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094690f, 0.129518f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4393
{0.000000f, 0.000000f, 0.000000f, 0.455174f, 0.326025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.109174f, 0.109627f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4394
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166733f, 0.664155f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169112f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4395
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.320619f, 0.090788f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.462066f, 0.126527f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4396
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.165890f, 0.235855f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.233931f, 0.364324f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4397
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.239319f, 0.151533f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.363629f, 0.245519f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4398
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106763f, 0.311932f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.119451f, 0.461853f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4399
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.451893f, 0.124086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.326160f, 0.097861f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4400
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.239712f, 0.365585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.164178f, 0.230525f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4401
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360274f, 0.237862f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.246139f, 0.155726f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4402
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.121863f, 0.457051f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097828f, 0.323258f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4403
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.163634f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.667648f, 0.168718f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4404
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.094870f, 0.132660f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.316878f, 0.455591f, 0.000000f, 0.000000f, 0.000000f},
4405
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116917f, 0.098433f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.458816f, 0.325834f, 0.000000f},
4406
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.168403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172019f, 0.659578f},
4407
};
4408
4409
// For each output (5x4) sample, the weight of each input (6x6) sample.
4410
static const float g_weight_downsample_6x6_to_5x4[20][36] = {
4411
{0.773702f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192588f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4412
{0.000000f, 0.633422f, 0.166577f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170080f, 0.029921f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4413
{0.000000f, 0.000000f, 0.388335f, 0.403694f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.100996f, 0.106975f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4414
{0.000000f, 0.000000f, 0.000000f, 0.161122f, 0.655288f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.183590f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4415
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.801705f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198295f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4416
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.400989f, 0.025097f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.573915f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4417
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309345f, 0.085396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.478694f, 0.126565f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4418
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194664f, 0.187267f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.292735f, 0.308960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.016375f, 0.000000f, 0.000000f},
4419
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098049f, 0.295983f, 0.000000f, 0.000000f, 0.017892f, 0.000000f, 0.111938f, 0.476138f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4420
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.043545f, 0.386448f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.570007f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4421
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.566407f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402307f, 0.031286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4422
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.463145f, 0.120696f, 0.000000f, 0.019497f, 0.000000f, 0.000000f, 0.311721f, 0.084942f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4423
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.296730f, 0.300781f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204639f, 0.197849f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4424
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122117f, 0.469302f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102545f, 0.306036f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4425
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.562064f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041534f, 0.396403f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4426
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190134f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.773971f, 0.035896f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4427
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.169927f, 0.035812f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.630284f, 0.163977f, 0.000000f, 0.000000f, 0.000000f},
4428
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.112667f, 0.106813f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393502f, 0.387018f, 0.000000f, 0.000000f},
4429
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177024f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.170482f, 0.652494f, 0.000000f},
4430
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192274f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033039f, 0.774687f},
4431
};
4432
4433
// For each output (6x4) sample, the weight of each input (6x6) sample.
4434
static const float g_weight_downsample_6x6_to_6x4[24][36] = {
4435
{0.804254f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4436
{0.000000f, 0.804177f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195823f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4437
{0.000000f, 0.000000f, 0.799585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4438
{0.000000f, 0.000000f, 0.000000f, 0.803604f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196396f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4439
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.807256f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192744f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4440
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.805135f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194865f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4441
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.410532f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.589468f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4442
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408690f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.591310f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4443
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.416225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.583775f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4444
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.414279f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585721f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4445
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.406723f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.593277f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4446
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.402510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.597490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4447
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.584784f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.415216f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4448
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590427f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409573f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4449
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.590073f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.409927f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4450
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.580348f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.419652f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4451
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.588321f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411679f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4452
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.587022f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.412978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4453
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193281f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4454
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.189163f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.810837f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4455
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195108f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804892f, 0.000000f, 0.000000f, 0.000000f},
4456
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.188290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.811710f, 0.000000f, 0.000000f},
4457
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.192914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.807086f, 0.000000f},
4458
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195292f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.804708f},
4459
};
4460
4461
// For each output (2x5) sample, the weight of each input (6x6) sample.
4462
static const float g_weight_downsample_6x6_to_2x5[10][36] = {
4463
{0.387593f, 0.325123f, 0.221104f, 0.066180f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4464
{0.000000f, 0.000000f, 0.065940f, 0.214659f, 0.326737f, 0.392664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4465
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309603f, 0.265953f, 0.168780f, 0.060600f, 0.000000f, 0.000000f, 0.084707f, 0.063017f, 0.047341f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4466
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062836f, 0.170767f, 0.261053f, 0.307978f, 0.000000f, 0.000000f, 0.000000f, 0.049286f, 0.064361f, 0.083719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4467
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.195787f, 0.153943f, 0.095706f, 0.042417f, 0.000000f, 0.000000f, 0.190695f, 0.154435f, 0.097288f, 0.040258f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029471f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4468
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017536f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039307f, 0.094677f, 0.158696f, 0.199136f, 0.000000f, 0.000000f, 0.040959f, 0.093353f, 0.155294f, 0.201042f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4469
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.079432f, 0.065739f, 0.044876f, 0.000000f, 0.000000f, 0.000000f, 0.309205f, 0.264700f, 0.167247f, 0.068801f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4470
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.052112f, 0.064829f, 0.081363f, 0.000000f, 0.000000f, 0.064024f, 0.161136f, 0.263743f, 0.312793f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4471
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.393277f, 0.324792f, 0.213188f, 0.068743f, 0.000000f, 0.000000f},
4472
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.066964f, 0.215440f, 0.323005f, 0.394591f},
4473
};
4474
4475
// For each output (3x5) sample, the weight of each input (6x6) sample.
4476
static const float g_weight_downsample_6x6_to_3x5[15][36] = {
4477
{0.620557f, 0.350797f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028646f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4478
{0.000000f, 0.110170f, 0.397489f, 0.386326f, 0.106015f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4479
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357348f, 0.642652f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4480
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.503934f, 0.275289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128280f, 0.092497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4481
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.102294f, 0.316223f, 0.313576f, 0.092518f, 0.000000f, 0.000000f, 0.000000f, 0.081158f, 0.094231f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4482
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.279079f, 0.502163f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.086083f, 0.132675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4483
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.325483f, 0.157739f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322567f, 0.172225f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021986f, 0.000000f, 0.000000f},
4484
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.063342f, 0.192228f, 0.186950f, 0.057021f, 0.000000f, 0.000000f, 0.054779f, 0.186114f, 0.185666f, 0.073901f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4485
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.172195f, 0.331802f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.148212f, 0.322038f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025751f, 0.000000f, 0.000000f},
4486
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.123726f, 0.081188f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.507339f, 0.287746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4487
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.093924f, 0.094021f, 0.000000f, 0.000000f, 0.000000f, 0.097070f, 0.315697f, 0.314560f, 0.084728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4488
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.082560f, 0.129771f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.277014f, 0.486817f, 0.023837f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4489
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.644191f, 0.355809f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4490
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.107771f, 0.387615f, 0.393454f, 0.111159f, 0.000000f},
4491
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.360886f, 0.639114f},
4492
};
4493
4494
// For each output (4x5) sample, the weight of each input (6x6) sample.
4495
static const float g_weight_downsample_6x6_to_4x5[20][36] = {
4496
{0.778254f, 0.190730f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.031016f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4497
{0.000000f, 0.401147f, 0.570243f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028610f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4498
{0.000000f, 0.000000f, 0.000000f, 0.563768f, 0.394241f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.041992f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4499
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196238f, 0.767548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036214f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4500
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.637514f, 0.166734f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.167634f, 0.028118f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4501
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.322778f, 0.473312f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.085399f, 0.118511f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4502
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471429f, 0.308185f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118025f, 0.102361f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4503
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.176592f, 0.643933f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.179475f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4504
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.391609f, 0.100882f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.390531f, 0.116978f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4505
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017259f, 0.000000f, 0.201618f, 0.301555f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197600f, 0.281968f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4506
{0.000000f, 0.000000f, 0.016735f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.293309f, 0.192842f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.268674f, 0.208109f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.020330f, 0.000000f, 0.000000f, 0.000000f},
4507
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.118514f, 0.380746f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097621f, 0.381305f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021814f, 0.000000f, 0.000000f},
4508
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.157977f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.657533f, 0.184490f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4509
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.097522f, 0.128585f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.309864f, 0.464029f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4510
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.128900f, 0.090864f, 0.000000f, 0.025393f, 0.000000f, 0.000000f, 0.464029f, 0.290814f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4511
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.024593f, 0.172268f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173412f, 0.629727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4512
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029582f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.778816f, 0.191602f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4513
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036297f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.394454f, 0.569249f, 0.000000f, 0.000000f, 0.000000f},
4514
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.039685f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.561207f, 0.399108f, 0.000000f},
4515
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034683f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193744f, 0.771574f},
4516
};
4517
4518
// For each output (5x5) sample, the weight of each input (6x6) sample.
4519
static const float g_weight_downsample_6x6_to_5x5[25][36] = {
4520
{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4521
{0.000000f, 0.794727f, 0.205273f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4522
{0.000000f, 0.000000f, 0.465125f, 0.484079f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028881f, 0.000000f, 0.000000f, 0.021914f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4523
{0.000000f, 0.000000f, 0.000000f, 0.192446f, 0.772941f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.034613f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4524
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033123f, 0.930510f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.036367f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4525
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800234f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199766f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4526
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.629079f, 0.165939f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.166390f, 0.019675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.018918f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4527
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.378734f, 0.373861f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111597f, 0.135808f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4528
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.177492f, 0.641195f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.181313f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4529
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028722f, 0.761781f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4530
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.475763f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471882f, 0.029551f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022804f, 0.000000f, 0.000000f},
4531
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.382714f, 0.116167f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.383377f, 0.117742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4532
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.254151f, 0.249987f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.241972f, 0.253891f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4533
{0.000000f, 0.000000f, 0.017950f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122722f, 0.376847f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.095099f, 0.369986f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.017396f, 0.000000f, 0.000000f, 0.000000f},
4534
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029442f, 0.472507f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.471751f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.026300f, 0.000000f, 0.000000f},
4535
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190299f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.776924f, 0.032778f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4536
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.171498f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.666385f, 0.162117f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4537
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.125713f, 0.117624f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.387084f, 0.369579f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4538
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.028493f, 0.169318f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.173770f, 0.628419f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4539
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198951f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.035634f, 0.765415f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4540
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.963102f, 0.036898f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4541
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030322f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.771054f, 0.198624f, 0.000000f, 0.000000f, 0.000000f},
4542
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021816f, 0.020944f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.481761f, 0.475479f, 0.000000f, 0.000000f},
4543
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.032816f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198418f, 0.768766f, 0.000000f},
4544
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033338f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966662f},
4545
};
4546
4547
// For each output (6x5) sample, the weight of each input (6x6) sample.
4548
static const float g_weight_downsample_6x6_to_6x5[30][36] = {
4549
{0.966284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033716f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4550
{0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4551
{0.000000f, 0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4552
{0.000000f, 0.000000f, 0.000000f, 0.966290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033710f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4553
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966125f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033875f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4554
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966273f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4555
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800857f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.199143f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4556
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.773463f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.025372f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4557
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.805735f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.194265f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4558
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.788791f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.211209f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4559
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.785975f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.214025f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4560
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.787286f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212714f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4561
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.487242f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.021913f, 0.000000f, 0.000000f},
4562
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490663f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486878f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.022459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4563
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.505452f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.494548f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4564
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.495383f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.482180f, 0.000000f, 0.022437f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4565
{0.000000f, 0.000000f, 0.022727f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.496545f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.480728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4566
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.486387f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.027352f, 0.000000f, 0.000000f},
4567
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.196272f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.803728f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4568
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.210059f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.789941f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4569
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.212947f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.787053f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4570
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.215261f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.784739f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4571
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.209116f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.790884f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4572
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.794119f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4573
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033710f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966290f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4574
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4575
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033713f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966287f, 0.000000f, 0.000000f, 0.000000f},
4576
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033719f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966281f, 0.000000f, 0.000000f},
4577
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966288f, 0.000000f},
4578
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033712f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966288f},
4579
};
4580
4581
// For each output (2x6) sample, the weight of each input (6x6) sample.
4582
static const float g_weight_downsample_6x6_to_2x6[12][36] = {
4583
{0.388815f, 0.325435f, 0.220189f, 0.065562f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4584
{0.000000f, 0.000000f, 0.064515f, 0.214042f, 0.327700f, 0.393742f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4585
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398821f, 0.326200f, 0.217851f, 0.057128f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4586
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062546f, 0.216408f, 0.322269f, 0.398777f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4587
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.396575f, 0.330631f, 0.212857f, 0.059936f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4588
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.070253f, 0.215326f, 0.317576f, 0.396845f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4589
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.398130f, 0.324745f, 0.213572f, 0.063553f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4590
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.062009f, 0.216253f, 0.324683f, 0.397055f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4591
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.397646f, 0.321346f, 0.212334f, 0.068675f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4592
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.067073f, 0.210768f, 0.318165f, 0.403993f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4593
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.395756f, 0.325048f, 0.211862f, 0.067334f, 0.000000f, 0.000000f},
4594
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.065475f, 0.214113f, 0.324009f, 0.396403f},
4595
};
4596
4597
// For each output (3x6) sample, the weight of each input (6x6) sample.
4598
static const float g_weight_downsample_6x6_to_3x6[18][36] = {
4599
{0.640136f, 0.359864f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4600
{0.000000f, 0.108112f, 0.399968f, 0.388087f, 0.103833f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4601
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.356122f, 0.643878f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4602
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.646308f, 0.353692f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4603
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.122937f, 0.390166f, 0.380558f, 0.106339f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4604
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.355015f, 0.644985f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4605
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.642874f, 0.357126f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4606
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.111570f, 0.398638f, 0.387639f, 0.102153f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4607
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359134f, 0.640866f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4608
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640159f, 0.359841f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4609
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.098908f, 0.393303f, 0.400421f, 0.107369f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4610
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.357119f, 0.642881f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4611
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.640541f, 0.359459f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4612
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.116318f, 0.397635f, 0.395084f, 0.090964f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4613
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.361948f, 0.638052f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4614
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.645448f, 0.354552f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4615
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.106981f, 0.389214f, 0.395056f, 0.108749f, 0.000000f},
4616
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.359592f, 0.640408f},
4617
};
4618
4619
// For each output (4x6) sample, the weight of each input (6x6) sample.
4620
static const float g_weight_downsample_6x6_to_4x6[24][36] = {
4621
{0.806928f, 0.193072f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4622
{0.000000f, 0.412216f, 0.587784f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4623
{0.000000f, 0.000000f, 0.000000f, 0.590075f, 0.409925f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4624
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200682f, 0.799318f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4625
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.809822f, 0.190178f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4626
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.423474f, 0.576526f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4627
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.580816f, 0.419184f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4628
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.190240f, 0.809760f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4629
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.800320f, 0.199680f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4630
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408625f, 0.591375f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4631
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.583392f, 0.416608f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4632
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.200372f, 0.799628f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4633
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798914f, 0.201086f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4634
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411243f, 0.588757f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4635
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.586520f, 0.413480f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4636
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203588f, 0.796412f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4637
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.802040f, 0.197960f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4638
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.411175f, 0.588825f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4639
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.599873f, 0.400127f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4640
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.193060f, 0.806940f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4641
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.806073f, 0.193927f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4642
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.408705f, 0.591295f, 0.000000f, 0.000000f, 0.000000f},
4643
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.585711f, 0.414289f, 0.000000f},
4644
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.197672f, 0.802328f},
4645
};
4646
4647
// For each output (5x6) sample, the weight of each input (6x6) sample.
4648
static const float g_weight_downsample_6x6_to_5x6[30][36] = {
4649
{0.966289f, 0.033711f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4650
{0.000000f, 0.794848f, 0.205152f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4651
{0.000000f, 0.000000f, 0.473272f, 0.496525f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.030202f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4652
{0.000000f, 0.000000f, 0.000000f, 0.196955f, 0.803045f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4653
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033711f, 0.966289f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4654
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966284f, 0.033716f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4655
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.795787f, 0.204213f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4656
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.500928f, 0.499072f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4657
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.198603f, 0.801397f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4658
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033716f, 0.966284f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4659
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966283f, 0.033717f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4660
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.788424f, 0.211576f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4661
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.029276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.484227f, 0.486497f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4662
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.201499f, 0.798501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4663
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033724f, 0.966276f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4664
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966283f, 0.033717f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4665
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.791336f, 0.208664f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4666
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.490188f, 0.509812f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4667
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.204835f, 0.795165f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4668
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033703f, 0.966297f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4669
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.966276f, 0.033724f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4670
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.799276f, 0.200724f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4671
{0.000000f, 0.000000f, 0.022501f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.494443f, 0.483055f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4672
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.205967f, 0.794033f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4673
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033726f, 0.966274f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4674
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.965971f, 0.034029f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4675
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.798640f, 0.201360f, 0.000000f, 0.000000f, 0.000000f},
4676
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.502577f, 0.497423f, 0.000000f, 0.000000f},
4677
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.203927f, 0.796073f, 0.000000f},
4678
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.033706f, 0.966294f},
4679
};
4680
4681
// For each output (6x6) sample, the weight of each input (6x6) sample.
4682
static const float g_weight_downsample_6x6_to_6x6[36][36] = {
4683
{1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4684
{0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4685
{0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4686
{0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4687
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4688
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4689
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4690
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4691
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4692
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4693
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4694
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4695
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4696
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4697
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4698
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4699
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4700
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4701
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4702
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4703
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4704
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4705
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4706
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4707
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4708
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4709
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4710
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4711
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4712
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4713
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4714
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f},
4715
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f, 0.000000f},
4716
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f, 0.000000f},
4717
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f, 0.000000f},
4718
{0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 1.000000f},
4719
};
4720
4721
//--------------------------------------------------------------------------------------------------------------------------
4722
4723
const struct downsample_matrix_6x6
4724
{
4725
uint32_t m_grid_width, m_grid_height;
4726
const float* m_p;
4727
} g_downsample_matrices_6x6[] = {
4728
{ 2, 2, (const float*)g_weight_downsample_6x6_to_2x2 },
4729
{ 3, 2, (const float*)g_weight_downsample_6x6_to_3x2 },
4730
{ 4, 2, (const float*)g_weight_downsample_6x6_to_4x2 },
4731
{ 5, 2, (const float*)g_weight_downsample_6x6_to_5x2 },
4732
{ 6, 2, (const float*)g_weight_downsample_6x6_to_6x2 },
4733
{ 2, 3, (const float*)g_weight_downsample_6x6_to_2x3 },
4734
{ 3, 3, (const float*)g_weight_downsample_6x6_to_3x3 },
4735
{ 4, 3, (const float*)g_weight_downsample_6x6_to_4x3 },
4736
{ 5, 3, (const float*)g_weight_downsample_6x6_to_5x3 },
4737
{ 6, 3, (const float*)g_weight_downsample_6x6_to_6x3 },
4738
{ 2, 4, (const float*)g_weight_downsample_6x6_to_2x4 },
4739
{ 3, 4, (const float*)g_weight_downsample_6x6_to_3x4 },
4740
{ 4, 4, (const float*)g_weight_downsample_6x6_to_4x4 },
4741
{ 5, 4, (const float*)g_weight_downsample_6x6_to_5x4 },
4742
{ 6, 4, (const float*)g_weight_downsample_6x6_to_6x4 },
4743
{ 2, 5, (const float*)g_weight_downsample_6x6_to_2x5 },
4744
{ 3, 5, (const float*)g_weight_downsample_6x6_to_3x5 },
4745
{ 4, 5, (const float*)g_weight_downsample_6x6_to_4x5 },
4746
{ 5, 5, (const float*)g_weight_downsample_6x6_to_5x5 },
4747
{ 6, 5, (const float*)g_weight_downsample_6x6_to_6x5 },
4748
{ 2, 6, (const float*)g_weight_downsample_6x6_to_2x6 },
4749
{ 3, 6, (const float*)g_weight_downsample_6x6_to_3x6 },
4750
{ 4, 6, (const float*)g_weight_downsample_6x6_to_4x6 },
4751
{ 5, 6, (const float*)g_weight_downsample_6x6_to_5x6 },
4752
{ 6, 6, (const float*)g_weight_downsample_6x6_to_6x6 }
4753
};
4754
//const uint32_t NUM_DOWNSAMPLE_MATRICES_6x6 = sizeof(g_downsample_matrices_6x6) / sizeof(g_downsample_matrices_6x6[0]);
4755
4756
//--------------------------------------------------------------------------------------------------------------------------
4757
4758
const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height)
4759
{
4760
// TODO: Use hash or map lookup.
4761
for (const auto& m : g_downsample_matrices_6x6)
4762
if ((m.m_grid_width == grid_width) && (m.m_grid_height == grid_height))
4763
return m.m_p;
4764
4765
assert(0);
4766
return nullptr;
4767
}
4768
4769
void downsample_weight_grid(
4770
const float* pMatrix_weights,
4771
uint32_t bx, uint32_t by, // source/from dimension (block size)
4772
uint32_t wx, uint32_t wy, // dest/to dimension (grid size)
4773
const uint8_t* pSrc_weights, // these are dequantized weights, NOT ISE symbols, [by][bx]
4774
uint8_t* pDst_weights) // [wy][wx]
4775
{
4776
const uint32_t total_block_samples = bx * by;
4777
4778
for (uint32_t y = 0; y < wy; y++)
4779
{
4780
for (uint32_t x = 0; x < wx; x++)
4781
{
4782
float total = 0.5f;
4783
4784
for (uint32_t i = 0; i < total_block_samples; i++)
4785
if (pMatrix_weights[i])
4786
total += pMatrix_weights[i] * (float)pSrc_weights[i];
4787
4788
pDst_weights[x + y * wx] = (uint8_t)clamp((int)total, 0, 64);
4789
4790
pMatrix_weights += total_block_samples;
4791
}
4792
}
4793
}
4794
4795
//--------------------------------------------------------------------------------------------------------------------------
4796
4797
void downsample_ise_weights(
4798
uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,
4799
uint32_t block_w, uint32_t block_h,
4800
uint32_t grid_w, uint32_t grid_h,
4801
const uint8_t* pSrc_weights, uint8_t* pDst_weights)
4802
{
4803
assert((block_w <= MAX_ASTC_HDR_BLOCK_W) && (block_h <= MAX_ASTC_HDR_BLOCK_H));
4804
assert((grid_w >= 2) && (grid_w <= MAX_ASTC_HDR_BLOCK_W));
4805
assert((grid_h >= 2) && (grid_h <= MAX_ASTC_HDR_BLOCK_H));
4806
4807
assert(dequant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE);
4808
assert(dequant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE);
4809
4810
assert(quant_weight_ise_range >= astc_helpers::FIRST_VALID_WEIGHT_ISE_RANGE);
4811
assert(quant_weight_ise_range <= astc_helpers::LAST_VALID_WEIGHT_ISE_RANGE);
4812
4813
if ((block_w == grid_w) && (block_h == grid_h))
4814
{
4815
if (dequant_weight_ise_range != quant_weight_ise_range)
4816
{
4817
basist::astc_6x6_hdr::requantize_astc_weights(block_w * block_h, pSrc_weights, dequant_weight_ise_range, pDst_weights, quant_weight_ise_range);
4818
}
4819
else
4820
{
4821
if (pDst_weights != pSrc_weights)
4822
memcpy(pDst_weights, pSrc_weights, block_w * block_h);
4823
}
4824
4825
return;
4826
}
4827
4828
uint8_t desired_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
4829
4830
const auto& dequant_tab = astc_helpers::g_dequant_tables.get_weight_tab(dequant_weight_ise_range).m_ISE_to_val;
4831
4832
for (uint32_t by = 0; by < block_h; by++)
4833
for (uint32_t bx = 0; bx < block_w; bx++)
4834
desired_weights[bx + by * block_w] = dequant_tab[pSrc_weights[bx + by * block_w]];
4835
4836
uint8_t downsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
4837
4838
const float* pDownsample_matrix = get_6x6_downsample_matrix(grid_w, grid_h);
4839
assert(pDownsample_matrix);
4840
4841
downsample_weight_grid(
4842
pDownsample_matrix,
4843
block_w, block_h, // source/from dimension (block size)
4844
grid_w, grid_h, // dest/to dimension (grid size)
4845
desired_weights, // these are dequantized weights, NOT ISE symbols, [by][bx]
4846
downsampled_weights); // [wy][wx]
4847
4848
const auto& weight_quant_tab = astc_helpers::g_dequant_tables.get_weight_tab(quant_weight_ise_range).m_val_to_ise;
4849
4850
for (uint32_t gy = 0; gy < grid_h; gy++)
4851
for (uint32_t gx = 0; gx < grid_w; gx++)
4852
pDst_weights[gx + gy * grid_w] = weight_quant_tab[downsampled_weights[gx + gy * grid_w]];
4853
}
4854
4855
void downsample_ise_weights_dual_plane(
4856
uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,
4857
uint32_t block_w, uint32_t block_h,
4858
uint32_t grid_w, uint32_t grid_h,
4859
const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1,
4860
uint8_t* pDst_weights)
4861
{
4862
uint8_t downsampled_weights0[MAX_ASTC_HDR_BLOCK_W * MAX_ASTC_HDR_BLOCK_H], downsampled_weights1[MAX_ASTC_HDR_BLOCK_W * MAX_ASTC_HDR_BLOCK_H];
4863
4864
downsample_ise_weights(
4865
dequant_weight_ise_range, quant_weight_ise_range,
4866
block_w, block_h,
4867
grid_w, grid_h,
4868
pSrc_weights0, downsampled_weights0);
4869
4870
downsample_ise_weights(
4871
dequant_weight_ise_range, quant_weight_ise_range,
4872
block_w, block_h,
4873
grid_w, grid_h,
4874
pSrc_weights1, downsampled_weights1);
4875
4876
const uint32_t num_grid_samples = grid_w * grid_h;
4877
for (uint32_t i = 0; i < num_grid_samples; i++)
4878
{
4879
pDst_weights[i * 2 + 0] = downsampled_weights0[i];
4880
pDst_weights[i * 2 + 1] = downsampled_weights1[i];
4881
}
4882
}
4883
4884
static bool refine_endpoints_mode11(
4885
uint32_t endpoint_ise_range,
4886
uint8_t* pEndpoint_vals, // the endpoints to optimize
4887
uint32_t block_w, uint32_t block_h, // block dimensions
4888
uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid
4889
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
4890
const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets
4891
astc_hdr_codec_base_options& coptions,
4892
bool direct_only, int first_submode, int last_submode,
4893
opt_mode_t opt_mode)
4894
{
4895
if (opt_mode == cNoOpt)
4896
return false;
4897
4898
const uint32_t num_block_pixels = block_w * block_h;
4899
4900
uint8_t def_pixel_block_ofs[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
4901
if (!pPixel_block_ofs)
4902
{
4903
for (uint32_t i = 0; i < num_block_pixels; i++)
4904
def_pixel_block_ofs[i] = (uint8_t)i;
4905
4906
pPixel_block_ofs = def_pixel_block_ofs;
4907
}
4908
4909
const uint32_t num_weights = grid_w * grid_h;
4910
4911
uint8_t dequantized_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
4912
for (uint32_t i = 0; i < num_weights; i++)
4913
dequantized_raw_weights[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[pWeights[i]];
4914
4915
uint8_t upsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
4916
astc_helpers::upsample_weight_grid(block_w, block_h, grid_w, grid_h, dequantized_raw_weights, upsampled_weights);
4917
4918
aabb3F color_box_q16(cInitExpand);
4919
4920
uint8_t trial_blk_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
4921
float trial_blk_raw_weightsf[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
4922
for (uint32_t i = 0; i < num_pixels; i++)
4923
{
4924
color_box_q16.expand(pBlock_pixels_q16[i]);
4925
4926
assert(pPixel_block_ofs[i] < num_block_pixels);
4927
4928
trial_blk_raw_weights[i] = upsampled_weights[pPixel_block_ofs[i]];
4929
trial_blk_raw_weightsf[i] = (float)trial_blk_raw_weights[i] * (1.0f / 64.0f);
4930
}
4931
4932
vec3F l_q16, h_q16;
4933
if (opt_mode == cOrdinaryLeastSquares)
4934
{
4935
if (!compute_least_squares_endpoints_rgb_raw_weights(num_pixels, trial_blk_raw_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16))
4936
return false;
4937
}
4938
else if ((opt_mode == cWeightedLeastSquares) || (opt_mode == cWeightedLeastSquaresHeavy))
4939
{
4940
vec3F block_mean_color_q16(calc_mean(num_pixels, pBlock_pixels_q16));
4941
vec3F block_axis_q16(calc_rgb_pca(num_pixels, pBlock_pixels_q16, block_mean_color_q16));
4942
float l = BIG_FLOAT_VAL, h = -BIG_FLOAT_VAL;
4943
for (uint32_t i = 0; i < num_pixels; i++)
4944
{
4945
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
4946
float kd = k.dot(block_axis_q16);
4947
if (kd < l)
4948
l = kd;
4949
if (kd > h)
4950
h = kd;
4951
}
4952
float emphasis_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
4953
if (h == l)
4954
{
4955
for (uint32_t i = 0; i < num_pixels; i++)
4956
emphasis_weights[i] = 1.0f;
4957
}
4958
else
4959
{
4960
float mid = (0.0f - l) / (h - l);
4961
mid = clamp(mid, .01f, .99f);
4962
4963
float lw = LOW_EMPHASIS_WEIGHT, mw = MIDDLE_EMPHASIS_WEIGHT, hw = HIGH_EMPHASIS_WEIGHT;
4964
if (opt_mode == cWeightedLeastSquaresHeavy)
4965
lw = LOW_EMPHASIS_WEIGHT_HEAVY, mw = MIDDLE_EMPHASIS_WEIGHT_HEAVY, hw = HIGH_EMPHASIS_WEIGHT_HEAVY;
4966
4967
for (uint32_t i = 0; i < num_pixels; i++)
4968
{
4969
vec3F k(vec3F(pBlock_pixels_q16[i]) - block_mean_color_q16);
4970
float kd = k.dot(block_axis_q16);
4971
4972
assert((kd >= l) && (kd <= h));
4973
4974
float v = (kd - l) / (h - l);
4975
4976
if (v < mid)
4977
v = lerp(lw, mw, v / mid);
4978
else
4979
v = lerp(mw, hw, (v - mid) * (1.0f - mid));
4980
4981
emphasis_weights[i] = v;
4982
}
4983
}
4984
4985
if (!compute_weighted_least_squares_endpoints_rgb(num_pixels, nullptr, nullptr, trial_blk_raw_weightsf, emphasis_weights, &l_q16, &h_q16, pBlock_pixels_q16, color_box_q16))
4986
return false;
4987
}
4988
else
4989
{
4990
assert(opt_mode == cWeightedAverage);
4991
4992
l_q16.set(0.0f);
4993
float total_low = 0.0f;
4994
4995
h_q16.set(0.0f);
4996
float total_high = 0.0f;
4997
4998
for (uint32_t i = 0; i < num_pixels; i++)
4999
{
5000
vec3F p(pBlock_pixels_q16[i]);
5001
float lerp = (float)trial_blk_raw_weights[i] * (1.0f / 64.0f);
5002
5003
l_q16 += p * (1.0f - lerp);
5004
total_low += (1.0f - lerp);
5005
5006
h_q16 += p * lerp;
5007
total_high += lerp;
5008
}
5009
5010
if (total_low != 0.0f)
5011
l_q16 *= (1.0f / total_low);
5012
else
5013
return false;
5014
5015
if (total_high != 0.0f)
5016
h_q16 *= (1.0f / total_high);
5017
else
5018
return false;
5019
}
5020
5021
uint8_t trial_endpoints[NUM_MODE11_ENDPOINTS];
5022
5023
uint32_t submode_used;
5024
5025
bool pack_succeeded = pack_mode11(l_q16, h_q16, endpoint_ise_range, trial_endpoints, coptions, direct_only, first_submode, last_submode, false, submode_used);
5026
if (!pack_succeeded)
5027
return false;
5028
5029
int cur_e[2][3];
5030
if (!decode_mode11_to_qlog12(pEndpoint_vals, cur_e, endpoint_ise_range))
5031
return false;
5032
5033
int trial_e[2][3];
5034
if (!decode_mode11_to_qlog12(trial_endpoints, trial_e, endpoint_ise_range))
5035
return false;
5036
5037
for (uint32_t i = 0; i < 3; i++)
5038
{
5039
cur_e[0][i] <<= 4;
5040
cur_e[1][i] <<= 4;
5041
5042
trial_e[0][i] <<= 4;
5043
trial_e[1][i] <<= 4;
5044
}
5045
5046
const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale;
5047
5048
double cur_error = 0, trial_error = 0;
5049
5050
for (uint32_t p = 0; p < num_pixels; p++)
5051
{
5052
const half_float* pDesired_half = &pBlock_pixels_half[p][0];
5053
5054
const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);
5055
5056
const uint32_t c = trial_blk_raw_weights[p];
5057
assert(c <= 64);
5058
5059
{
5060
half_float rf, gf, bf;
5061
5062
{
5063
uint32_t r0 = cur_e[0][0], r1 = cur_e[1][0];
5064
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
5065
rf = astc_helpers::qlog16_to_half(ri);
5066
}
5067
5068
{
5069
uint32_t g0 = cur_e[0][1], g1 = cur_e[1][1];
5070
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
5071
gf = astc_helpers::qlog16_to_half(gi);
5072
}
5073
5074
{
5075
uint32_t b0 = cur_e[0][2], b1 = cur_e[1][2];
5076
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
5077
bf = astc_helpers::qlog16_to_half(bi);
5078
}
5079
5080
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
5081
5082
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
5083
5084
cur_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
5085
}
5086
5087
{
5088
half_float rf, gf, bf;
5089
5090
{
5091
uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0];
5092
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
5093
rf = astc_helpers::qlog16_to_half(ri);
5094
}
5095
5096
{
5097
uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1];
5098
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
5099
gf = astc_helpers::qlog16_to_half(gi);
5100
}
5101
5102
{
5103
uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2];
5104
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
5105
bf = astc_helpers::qlog16_to_half(bi);
5106
}
5107
5108
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
5109
5110
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
5111
5112
trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
5113
}
5114
5115
} // p
5116
5117
if (trial_error < cur_error)
5118
{
5119
memcpy(pEndpoint_vals, trial_endpoints, NUM_MODE11_ENDPOINTS);
5120
return true;
5121
}
5122
5123
return false;
5124
}
5125
5126
static bool refine_endpoints_mode7(
5127
uint32_t endpoint_ise_range,
5128
uint8_t* pEndpoint_vals, // the endpoints to optimize
5129
uint32_t block_w, uint32_t block_h, // block dimensions
5130
uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid
5131
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
5132
const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets
5133
astc_hdr_codec_base_options& coptions,
5134
int first_submode, int last_submode)
5135
{
5136
const uint32_t num_block_pixels = block_w * block_h;
5137
5138
uint8_t def_pixel_block_ofs[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
5139
if (!pPixel_block_ofs)
5140
{
5141
for (uint32_t i = 0; i < num_block_pixels; i++)
5142
def_pixel_block_ofs[i] = (uint8_t)i;
5143
5144
pPixel_block_ofs = def_pixel_block_ofs;
5145
}
5146
5147
const uint32_t num_weights = grid_w * grid_h;
5148
5149
uint8_t dequantized_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS];
5150
for (uint32_t i = 0; i < num_weights; i++)
5151
dequantized_raw_weights[i] = astc_helpers::g_dequant_tables.get_weight_tab(weight_ise_range).m_ISE_to_val[pWeights[i]];
5152
5153
uint8_t upsampled_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
5154
astc_helpers::upsample_weight_grid(block_w, block_h, grid_w, grid_h, dequantized_raw_weights, upsampled_weights);
5155
5156
uint8_t trial_blk_raw_weights[MAX_ASTC_HDR_ENC_BLOCK_PIXELS]; // raw weights, NOT ISE
5157
for (uint32_t i = 0; i < num_pixels; i++)
5158
{
5159
assert(pPixel_block_ofs[i] < num_block_pixels);
5160
5161
trial_blk_raw_weights[i] = upsampled_weights[pPixel_block_ofs[i]];
5162
}
5163
5164
//--
5165
5166
int cur_e[2][3];
5167
int cur_s = 0;
5168
if (!decode_mode7_to_qlog12(pEndpoint_vals, cur_e, &cur_s, endpoint_ise_range))
5169
return false;
5170
5171
cur_s <<= 4;
5172
5173
vec3F block_mean_color_q16(calc_mean(num_pixels, pBlock_pixels_q16));
5174
5175
vec3F new_high_color_q16(block_mean_color_q16);
5176
5177
const float one_over_num_pixels = 1.0f / (float)num_pixels;
5178
5179
for (uint32_t i = 0; i < num_pixels; i++)
5180
{
5181
float lerp = trial_blk_raw_weights[i] * (1.0f / 64.0f);
5182
5183
float k = (float)cur_s * (1.0f - lerp) * one_over_num_pixels;
5184
new_high_color_q16[0] += k;
5185
new_high_color_q16[1] += k;
5186
new_high_color_q16[2] += k;
5187
}
5188
5189
// Given a set of selectors and a high color, try to compute a better S.
5190
float t = 0.0f;
5191
5192
for (uint32_t i = 0; i < num_pixels; i++)
5193
{
5194
float lerp = trial_blk_raw_weights[i] * (1.0f / 64.0f);
5195
5196
t += (1.0f) - lerp;
5197
}
5198
5199
t *= one_over_num_pixels;
5200
5201
if (fabs(t) < .0000125f)
5202
return false;
5203
5204
uint8_t trial_endpoints[NUM_MODE7_ENDPOINTS];
5205
5206
uint32_t submode_used;
5207
if (!pack_mode7(new_high_color_q16, (float)cur_s, endpoint_ise_range, trial_endpoints, weight_ise_range, coptions, first_submode, last_submode, false, submode_used))
5208
return false;
5209
5210
int trial_e[2][3];
5211
if (!decode_mode7_to_qlog12(trial_endpoints, trial_e, nullptr, endpoint_ise_range))
5212
return false;
5213
5214
vec3F cur_h_q16((float)(trial_e[1][0] << 4), (float)(trial_e[1][1] << 4), (float)(trial_e[1][2] << 4));
5215
5216
float s_r = (cur_h_q16[0] - block_mean_color_q16[0]) / t;
5217
//float s_g = (cur_h_q16[1] - block_mean_color_q16[1]) / t;
5218
//float s_b = (cur_h_q16[2] - block_mean_color_q16[2]) / t;
5219
float new_s_q16 = ceilf(s_r);
5220
5221
if (!pack_mode7(new_high_color_q16, new_s_q16, endpoint_ise_range, trial_endpoints, weight_ise_range, coptions, first_submode, last_submode, false, submode_used))
5222
return false;
5223
5224
if (!decode_mode7_to_qlog12(trial_endpoints, trial_e, nullptr, endpoint_ise_range))
5225
return false;
5226
5227
// --
5228
5229
for (uint32_t i = 0; i < 3; i++)
5230
{
5231
cur_e[0][i] <<= 4;
5232
cur_e[1][i] <<= 4;
5233
5234
trial_e[0][i] <<= 4;
5235
trial_e[1][i] <<= 4;
5236
}
5237
5238
const float R_WEIGHT = coptions.m_r_err_scale, G_WEIGHT = coptions.m_g_err_scale;
5239
5240
double cur_error = 0, trial_error = 0;
5241
5242
for (uint32_t p = 0; p < num_pixels; p++)
5243
{
5244
const half_float* pDesired_half = &pBlock_pixels_half[p][0];
5245
5246
const double desired_half_r_q = q(pDesired_half[0], coptions.m_q_log_bias), desired_half_g_q = q(pDesired_half[1], coptions.m_q_log_bias), desired_half_b_q = q(pDesired_half[2], coptions.m_q_log_bias);
5247
5248
const uint32_t c = trial_blk_raw_weights[p];
5249
assert(c <= 64);
5250
5251
{
5252
half_float rf, gf, bf;
5253
5254
{
5255
uint32_t r0 = cur_e[0][0], r1 = cur_e[1][0];
5256
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
5257
rf = astc_helpers::qlog16_to_half(ri);
5258
}
5259
5260
{
5261
uint32_t g0 = cur_e[0][1], g1 = cur_e[1][1];
5262
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
5263
gf = astc_helpers::qlog16_to_half(gi);
5264
}
5265
5266
{
5267
uint32_t b0 = cur_e[0][2], b1 = cur_e[1][2];
5268
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
5269
bf = astc_helpers::qlog16_to_half(bi);
5270
}
5271
5272
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
5273
5274
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
5275
5276
cur_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
5277
}
5278
5279
{
5280
half_float rf, gf, bf;
5281
5282
{
5283
uint32_t r0 = trial_e[0][0], r1 = trial_e[1][0];
5284
int ri = (r0 * (64 - c) + r1 * c + 32) / 64;
5285
rf = astc_helpers::qlog16_to_half(ri);
5286
}
5287
5288
{
5289
uint32_t g0 = trial_e[0][1], g1 = trial_e[1][1];
5290
int gi = (g0 * (64 - c) + g1 * c + 32) / 64;
5291
gf = astc_helpers::qlog16_to_half(gi);
5292
}
5293
5294
{
5295
uint32_t b0 = trial_e[0][2], b1 = trial_e[1][2];
5296
int bi = (b0 * (64 - c) + b1 * c + 32) / 64;
5297
bf = astc_helpers::qlog16_to_half(bi);
5298
}
5299
5300
const double decoded_half_q0 = q(rf, coptions.m_q_log_bias), decoded_half_q1 = q(gf, coptions.m_q_log_bias), decoded_half_q2 = q(bf, coptions.m_q_log_bias);
5301
5302
const double rd = decoded_half_q0 - desired_half_r_q, gd = decoded_half_q1 - desired_half_g_q, bd = decoded_half_q2 - desired_half_b_q;
5303
5304
trial_error += R_WEIGHT * (rd * rd) + G_WEIGHT * (gd * gd) + bd * bd;
5305
}
5306
5307
} // p
5308
5309
if (trial_error < cur_error)
5310
{
5311
memcpy(pEndpoint_vals, trial_endpoints, NUM_MODE7_ENDPOINTS);
5312
return true;
5313
}
5314
5315
return false;
5316
}
5317
5318
bool refine_endpoints(
5319
uint32_t cem,
5320
uint32_t endpoint_ise_range,
5321
uint8_t* pEndpoint_vals, // the endpoints to optimize
5322
uint32_t block_w, uint32_t block_h, // block dimensions
5323
uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid
5324
uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
5325
const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets
5326
astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode)
5327
{
5328
if (cem == 7)
5329
{
5330
return refine_endpoints_mode7(
5331
endpoint_ise_range,
5332
pEndpoint_vals,
5333
block_w, block_h,
5334
grid_w, grid_h, pWeights, weight_ise_range,
5335
num_pixels, pBlock_pixels_half, pBlock_pixels_q16,
5336
pPixel_block_ofs,
5337
coptions,
5338
FIRST_MODE7_SUBMODE_INDEX, MAX_MODE7_SUBMODE_INDEX);
5339
}
5340
else if (cem == 11)
5341
{
5342
return refine_endpoints_mode11(
5343
endpoint_ise_range,
5344
pEndpoint_vals,
5345
block_w, block_h,
5346
grid_w, grid_h, pWeights, weight_ise_range,
5347
num_pixels, pBlock_pixels_half, pBlock_pixels_q16,
5348
pPixel_block_ofs,
5349
coptions,
5350
false, FIRST_MODE11_SUBMODE_INDEX, MAX_MODE11_SUBMODE_INDEX, opt_mode);
5351
}
5352
5353
return false;
5354
}
5355
5356
} // namespace basisu
5357
5358
5359