Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/util/format/u_format_fxt1.c
7132 views
1
/**************************************************************************
2
*
3
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
4
* Copyright (c) 2008 VMware, Inc.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the "Software"),
8
* to deal in the Software without restriction, including without limitation
9
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
* and/or sell copies of the Software, and to permit persons to whom the
11
* Software is furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included
14
* in all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
* OTHER DEALINGS IN THE SOFTWARE.
23
*
24
**************************************************************************/
25
26
#include "util/format/u_format.h"
27
#include "util/format/u_format_fxt1.h"
28
#include "util/format/u_format_pack.h"
29
#include "util/format_srgb.h"
30
#include "util/u_math.h"
31
32
#define RCOMP 0
33
#define GCOMP 1
34
#define BCOMP 2
35
#define ACOMP 3
36
37
#define FXT1_BLOCK_SIZE 16
38
39
static void
40
fxt1_encode (uint32_t width, uint32_t height, int32_t comps,
41
const void *source, int32_t srcRowStride,
42
void *dest, int32_t destRowStride);
43
44
static void
45
fxt1_decode_1 (const void *texture, int32_t stride,
46
int32_t i, int32_t j, uint8_t *rgba);
47
48
/***************************************************************************\
49
* FXT1 encoder
50
*
51
* The encoder was built by reversing the decoder,
52
* and is vaguely based on Texus2 by 3dfx. Note that this code
53
* is merely a proof of concept, since it is highly UNoptimized;
54
* moreover, it is sub-optimal due to initial conditions passed
55
* to Lloyd's algorithm (the interpolation modes are even worse).
56
\***************************************************************************/
57
58
59
#define MAX_COMP 4 /* ever needed maximum number of components in texel */
60
#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
61
#define N_TEXELS 32 /* number of texels in a block (always 32) */
62
#define LL_N_REP 50 /* number of iterations in lloyd's vq */
63
#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
64
#define LL_RMS_E 255 /* fault tolerance (maximum error) */
65
#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
66
static const uint32_t zero = 0;
67
#define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
68
69
/*
70
* Define a 64-bit unsigned integer type and macros
71
*/
72
#if 1
73
74
#define FX64_NATIVE 1
75
76
typedef uint64_t Fx64;
77
78
#define FX64_MOV32(a, b) a = b
79
#define FX64_OR32(a, b) a |= b
80
#define FX64_SHL(a, c) a <<= c
81
82
#else
83
84
#define FX64_NATIVE 0
85
86
typedef struct {
87
uint32_t lo, hi;
88
} Fx64;
89
90
#define FX64_MOV32(a, b) a.lo = b
91
#define FX64_OR32(a, b) a.lo |= b
92
93
#define FX64_SHL(a, c) \
94
do { \
95
if ((c) >= 32) { \
96
a.hi = a.lo << ((c) - 32); \
97
a.lo = 0; \
98
} else { \
99
a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
100
a.lo <<= (c); \
101
} \
102
} while (0)
103
104
#endif
105
106
107
#define F(i) (float)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
108
#define SAFECDOT 1 /* for paranoids */
109
110
#define MAKEIVEC(NV, NC, IV, B, V0, V1) \
111
do { \
112
/* compute interpolation vector */ \
113
float d2 = 0.0F; \
114
float rd2; \
115
\
116
for (i = 0; i < NC; i++) { \
117
IV[i] = (V1[i] - V0[i]) * F(i); \
118
d2 += IV[i] * IV[i]; \
119
} \
120
rd2 = (float)NV / d2; \
121
B = 0; \
122
for (i = 0; i < NC; i++) { \
123
IV[i] *= F(i); \
124
B -= IV[i] * V0[i]; \
125
IV[i] *= rd2; \
126
} \
127
B = B * rd2 + 0.5f; \
128
} while (0)
129
130
#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
131
do { \
132
float dot = 0.0F; \
133
for (i = 0; i < NC; i++) { \
134
dot += V[i] * IV[i]; \
135
} \
136
TEXEL = (int32_t)(dot + B); \
137
if (SAFECDOT) { \
138
if (TEXEL < 0) { \
139
TEXEL = 0; \
140
} else if (TEXEL > NV) { \
141
TEXEL = NV; \
142
} \
143
} \
144
} while (0)
145
146
147
static int32_t
148
fxt1_bestcol (float vec[][MAX_COMP], int32_t nv,
149
uint8_t input[MAX_COMP], int32_t nc)
150
{
151
int32_t i, j, best = -1;
152
float err = 1e9; /* big enough */
153
154
for (j = 0; j < nv; j++) {
155
float e = 0.0F;
156
for (i = 0; i < nc; i++) {
157
e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
158
}
159
if (e < err) {
160
err = e;
161
best = j;
162
}
163
}
164
165
return best;
166
}
167
168
169
static int32_t
170
fxt1_worst (float vec[MAX_COMP],
171
uint8_t input[N_TEXELS][MAX_COMP], int32_t nc, int32_t n)
172
{
173
int32_t i, k, worst = -1;
174
float err = -1.0F; /* small enough */
175
176
for (k = 0; k < n; k++) {
177
float e = 0.0F;
178
for (i = 0; i < nc; i++) {
179
e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
180
}
181
if (e > err) {
182
err = e;
183
worst = k;
184
}
185
}
186
187
return worst;
188
}
189
190
191
static int32_t
192
fxt1_variance (uint8_t input[N_TEXELS / 2][MAX_COMP], int32_t nc)
193
{
194
const int n = N_TEXELS / 2;
195
int32_t i, k, best = 0;
196
int32_t sx, sx2;
197
double var, maxvar = -1; /* small enough */
198
double teenth = 1.0 / n;
199
200
for (i = 0; i < nc; i++) {
201
sx = sx2 = 0;
202
for (k = 0; k < n; k++) {
203
int32_t t = input[k][i];
204
sx += t;
205
sx2 += t * t;
206
}
207
var = sx2 * teenth - sx * sx * teenth * teenth;
208
if (maxvar < var) {
209
maxvar = var;
210
best = i;
211
}
212
}
213
214
return best;
215
}
216
217
218
static int32_t
219
fxt1_choose (float vec[][MAX_COMP], int32_t nv,
220
uint8_t input[N_TEXELS][MAX_COMP], int32_t nc, int32_t n)
221
{
222
#if 0
223
/* Choose colors from a grid.
224
*/
225
int32_t i, j;
226
227
for (j = 0; j < nv; j++) {
228
int32_t m = j * (n - 1) / (nv - 1);
229
for (i = 0; i < nc; i++) {
230
vec[j][i] = input[m][i];
231
}
232
}
233
#else
234
/* Our solution here is to find the darkest and brightest colors in
235
* the 8x4 tile and use those as the two representative colors.
236
* There are probably better algorithms to use (histogram-based).
237
*/
238
int32_t i, j, k;
239
int32_t minSum = 2000; /* big enough */
240
int32_t maxSum = -1; /* small enough */
241
int32_t minCol = 0; /* phoudoin: silent compiler! */
242
int32_t maxCol = 0; /* phoudoin: silent compiler! */
243
244
struct {
245
int32_t flag;
246
int32_t key;
247
int32_t freq;
248
int32_t idx;
249
} hist[N_TEXELS];
250
int32_t lenh = 0;
251
252
memset(hist, 0, sizeof(hist));
253
254
for (k = 0; k < n; k++) {
255
int32_t l;
256
int32_t key = 0;
257
int32_t sum = 0;
258
for (i = 0; i < nc; i++) {
259
key <<= 8;
260
key |= input[k][i];
261
sum += input[k][i];
262
}
263
for (l = 0; l < n; l++) {
264
if (!hist[l].flag) {
265
/* alloc new slot */
266
hist[l].flag = !0;
267
hist[l].key = key;
268
hist[l].freq = 1;
269
hist[l].idx = k;
270
lenh = l + 1;
271
break;
272
} else if (hist[l].key == key) {
273
hist[l].freq++;
274
break;
275
}
276
}
277
if (minSum > sum) {
278
minSum = sum;
279
minCol = k;
280
}
281
if (maxSum < sum) {
282
maxSum = sum;
283
maxCol = k;
284
}
285
}
286
287
if (lenh <= nv) {
288
for (j = 0; j < lenh; j++) {
289
for (i = 0; i < nc; i++) {
290
vec[j][i] = (float)input[hist[j].idx][i];
291
}
292
}
293
for (; j < nv; j++) {
294
for (i = 0; i < nc; i++) {
295
vec[j][i] = vec[0][i];
296
}
297
}
298
return 0;
299
}
300
301
for (j = 0; j < nv; j++) {
302
for (i = 0; i < nc; i++) {
303
vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
304
}
305
}
306
#endif
307
308
return !0;
309
}
310
311
312
static int32_t
313
fxt1_lloyd (float vec[][MAX_COMP], int32_t nv,
314
uint8_t input[N_TEXELS][MAX_COMP], int32_t nc, int32_t n)
315
{
316
/* Use the generalized lloyd's algorithm for VQ:
317
* find 4 color vectors.
318
*
319
* for each sample color
320
* sort to nearest vector.
321
*
322
* replace each vector with the centroid of its matching colors.
323
*
324
* repeat until RMS doesn't improve.
325
*
326
* if a color vector has no samples, or becomes the same as another
327
* vector, replace it with the color which is farthest from a sample.
328
*
329
* vec[][MAX_COMP] initial vectors and resulting colors
330
* nv number of resulting colors required
331
* input[N_TEXELS][MAX_COMP] input texels
332
* nc number of components in input / vec
333
* n number of input samples
334
*/
335
336
int32_t sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
337
int32_t cnt[MAX_VECT]; /* how many times a certain vector was chosen */
338
float error, lasterror = 1e9;
339
340
int32_t i, j, k, rep;
341
342
/* the quantizer */
343
for (rep = 0; rep < LL_N_REP; rep++) {
344
/* reset sums & counters */
345
for (j = 0; j < nv; j++) {
346
for (i = 0; i < nc; i++) {
347
sum[j][i] = 0;
348
}
349
cnt[j] = 0;
350
}
351
error = 0;
352
353
/* scan whole block */
354
for (k = 0; k < n; k++) {
355
#if 1
356
int32_t best = -1;
357
float err = 1e9; /* big enough */
358
/* determine best vector */
359
for (j = 0; j < nv; j++) {
360
float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
361
(vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
362
(vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
363
if (nc == 4) {
364
e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
365
}
366
if (e < err) {
367
err = e;
368
best = j;
369
}
370
}
371
#else
372
int32_t best = fxt1_bestcol(vec, nv, input[k], nc, &err);
373
#endif
374
assert(best >= 0);
375
/* add in closest color */
376
for (i = 0; i < nc; i++) {
377
sum[best][i] += input[k][i];
378
}
379
/* mark this vector as used */
380
cnt[best]++;
381
/* accumulate error */
382
error += err;
383
}
384
385
/* check RMS */
386
if ((error < LL_RMS_E) ||
387
((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
388
return !0; /* good match */
389
}
390
lasterror = error;
391
392
/* move each vector to the barycenter of its closest colors */
393
for (j = 0; j < nv; j++) {
394
if (cnt[j]) {
395
float div = 1.0F / cnt[j];
396
for (i = 0; i < nc; i++) {
397
vec[j][i] = div * sum[j][i];
398
}
399
} else {
400
/* this vec has no samples or is identical with a previous vec */
401
int32_t worst = fxt1_worst(vec[j], input, nc, n);
402
for (i = 0; i < nc; i++) {
403
vec[j][i] = input[worst][i];
404
}
405
}
406
}
407
}
408
409
return 0; /* could not converge fast enough */
410
}
411
412
413
static void
414
fxt1_quantize_CHROMA (uint32_t *cc,
415
uint8_t input[N_TEXELS][MAX_COMP])
416
{
417
const int32_t n_vect = 4; /* 4 base vectors to find */
418
const int32_t n_comp = 3; /* 3 components: R, G, B */
419
float vec[MAX_VECT][MAX_COMP];
420
int32_t i, j, k;
421
Fx64 hi; /* high quadword */
422
uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
423
424
if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
425
fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
426
}
427
428
FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
429
for (j = n_vect - 1; j >= 0; j--) {
430
for (i = 0; i < n_comp; i++) {
431
/* add in colors */
432
FX64_SHL(hi, 5);
433
FX64_OR32(hi, (uint32_t)(vec[j][i] / 8.0F));
434
}
435
}
436
((Fx64 *)cc)[1] = hi;
437
438
lohi = lolo = 0;
439
/* right microtile */
440
for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
441
lohi <<= 2;
442
lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
443
}
444
/* left microtile */
445
for (; k >= 0; k--) {
446
lolo <<= 2;
447
lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
448
}
449
cc[1] = lohi;
450
cc[0] = lolo;
451
}
452
453
454
static void
455
fxt1_quantize_ALPHA0 (uint32_t *cc,
456
uint8_t input[N_TEXELS][MAX_COMP],
457
uint8_t reord[N_TEXELS][MAX_COMP], int32_t n)
458
{
459
const int32_t n_vect = 3; /* 3 base vectors to find */
460
const int32_t n_comp = 4; /* 4 components: R, G, B, A */
461
float vec[MAX_VECT][MAX_COMP];
462
int32_t i, j, k;
463
Fx64 hi; /* high quadword */
464
uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
465
466
/* the last vector indicates zero */
467
for (i = 0; i < n_comp; i++) {
468
vec[n_vect][i] = 0;
469
}
470
471
/* the first n texels in reord are guaranteed to be non-zero */
472
if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
473
fxt1_lloyd(vec, n_vect, reord, n_comp, n);
474
}
475
476
FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
477
for (j = n_vect - 1; j >= 0; j--) {
478
/* add in alphas */
479
FX64_SHL(hi, 5);
480
FX64_OR32(hi, (uint32_t)(vec[j][ACOMP] / 8.0F));
481
}
482
for (j = n_vect - 1; j >= 0; j--) {
483
for (i = 0; i < n_comp - 1; i++) {
484
/* add in colors */
485
FX64_SHL(hi, 5);
486
FX64_OR32(hi, (uint32_t)(vec[j][i] / 8.0F));
487
}
488
}
489
((Fx64 *)cc)[1] = hi;
490
491
lohi = lolo = 0;
492
/* right microtile */
493
for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
494
lohi <<= 2;
495
lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
496
}
497
/* left microtile */
498
for (; k >= 0; k--) {
499
lolo <<= 2;
500
lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
501
}
502
cc[1] = lohi;
503
cc[0] = lolo;
504
}
505
506
507
static void
508
fxt1_quantize_ALPHA1 (uint32_t *cc,
509
uint8_t input[N_TEXELS][MAX_COMP])
510
{
511
const int32_t n_vect = 3; /* highest vector number in each microtile */
512
const int32_t n_comp = 4; /* 4 components: R, G, B, A */
513
float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
514
float b, iv[MAX_COMP]; /* interpolation vector */
515
int32_t i, j, k;
516
Fx64 hi; /* high quadword */
517
uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
518
519
int32_t minSum;
520
int32_t maxSum;
521
int32_t minColL = 0, maxColL = 0;
522
int32_t minColR = 0, maxColR = 0;
523
int32_t sumL = 0, sumR = 0;
524
int32_t nn_comp;
525
/* Our solution here is to find the darkest and brightest colors in
526
* the 4x4 tile and use those as the two representative colors.
527
* There are probably better algorithms to use (histogram-based).
528
*/
529
nn_comp = n_comp;
530
while ((minColL == maxColL) && nn_comp) {
531
minSum = 2000; /* big enough */
532
maxSum = -1; /* small enough */
533
for (k = 0; k < N_TEXELS / 2; k++) {
534
int32_t sum = 0;
535
for (i = 0; i < nn_comp; i++) {
536
sum += input[k][i];
537
}
538
if (minSum > sum) {
539
minSum = sum;
540
minColL = k;
541
}
542
if (maxSum < sum) {
543
maxSum = sum;
544
maxColL = k;
545
}
546
sumL += sum;
547
}
548
549
nn_comp--;
550
}
551
552
nn_comp = n_comp;
553
while ((minColR == maxColR) && nn_comp) {
554
minSum = 2000; /* big enough */
555
maxSum = -1; /* small enough */
556
for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
557
int32_t sum = 0;
558
for (i = 0; i < nn_comp; i++) {
559
sum += input[k][i];
560
}
561
if (minSum > sum) {
562
minSum = sum;
563
minColR = k;
564
}
565
if (maxSum < sum) {
566
maxSum = sum;
567
maxColR = k;
568
}
569
sumR += sum;
570
}
571
572
nn_comp--;
573
}
574
575
/* choose the common vector (yuck!) */
576
{
577
int32_t j1, j2;
578
int32_t v1 = 0, v2 = 0;
579
float err = 1e9; /* big enough */
580
float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
581
for (i = 0; i < n_comp; i++) {
582
tv[0][i] = input[minColL][i];
583
tv[1][i] = input[maxColL][i];
584
tv[2][i] = input[minColR][i];
585
tv[3][i] = input[maxColR][i];
586
}
587
for (j1 = 0; j1 < 2; j1++) {
588
for (j2 = 2; j2 < 4; j2++) {
589
float e = 0.0F;
590
for (i = 0; i < n_comp; i++) {
591
e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
592
}
593
if (e < err) {
594
err = e;
595
v1 = j1;
596
v2 = j2;
597
}
598
}
599
}
600
for (i = 0; i < n_comp; i++) {
601
vec[0][i] = tv[1 - v1][i];
602
vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
603
vec[2][i] = tv[5 - v2][i];
604
}
605
}
606
607
/* left microtile */
608
cc[0] = 0;
609
if (minColL != maxColL) {
610
/* compute interpolation vector */
611
MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
612
613
/* add in texels */
614
lolo = 0;
615
for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
616
int32_t texel;
617
/* interpolate color */
618
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
619
/* add in texel */
620
lolo <<= 2;
621
lolo |= texel;
622
}
623
624
cc[0] = lolo;
625
}
626
627
/* right microtile */
628
cc[1] = 0;
629
if (minColR != maxColR) {
630
/* compute interpolation vector */
631
MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
632
633
/* add in texels */
634
lohi = 0;
635
for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
636
int32_t texel;
637
/* interpolate color */
638
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
639
/* add in texel */
640
lohi <<= 2;
641
lohi |= texel;
642
}
643
644
cc[1] = lohi;
645
}
646
647
FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
648
for (j = n_vect - 1; j >= 0; j--) {
649
/* add in alphas */
650
FX64_SHL(hi, 5);
651
FX64_OR32(hi, (uint32_t)(vec[j][ACOMP] / 8.0F));
652
}
653
for (j = n_vect - 1; j >= 0; j--) {
654
for (i = 0; i < n_comp - 1; i++) {
655
/* add in colors */
656
FX64_SHL(hi, 5);
657
FX64_OR32(hi, (uint32_t)(vec[j][i] / 8.0F));
658
}
659
}
660
((Fx64 *)cc)[1] = hi;
661
}
662
663
664
static void
665
fxt1_quantize_HI (uint32_t *cc,
666
uint8_t input[N_TEXELS][MAX_COMP],
667
uint8_t reord[N_TEXELS][MAX_COMP], int32_t n)
668
{
669
const int32_t n_vect = 6; /* highest vector number */
670
const int32_t n_comp = 3; /* 3 components: R, G, B */
671
float b = 0.0F; /* phoudoin: silent compiler! */
672
float iv[MAX_COMP]; /* interpolation vector */
673
int32_t i, k;
674
uint32_t hihi; /* high quadword: hi dword */
675
676
int32_t minSum = 2000; /* big enough */
677
int32_t maxSum = -1; /* small enough */
678
int32_t minCol = 0; /* phoudoin: silent compiler! */
679
int32_t maxCol = 0; /* phoudoin: silent compiler! */
680
681
/* Our solution here is to find the darkest and brightest colors in
682
* the 8x4 tile and use those as the two representative colors.
683
* There are probably better algorithms to use (histogram-based).
684
*/
685
for (k = 0; k < n; k++) {
686
int32_t sum = 0;
687
for (i = 0; i < n_comp; i++) {
688
sum += reord[k][i];
689
}
690
if (minSum > sum) {
691
minSum = sum;
692
minCol = k;
693
}
694
if (maxSum < sum) {
695
maxSum = sum;
696
maxCol = k;
697
}
698
}
699
700
hihi = 0; /* cc-hi = "00" */
701
for (i = 0; i < n_comp; i++) {
702
/* add in colors */
703
hihi <<= 5;
704
hihi |= reord[maxCol][i] >> 3;
705
}
706
for (i = 0; i < n_comp; i++) {
707
/* add in colors */
708
hihi <<= 5;
709
hihi |= reord[minCol][i] >> 3;
710
}
711
cc[3] = hihi;
712
cc[0] = cc[1] = cc[2] = 0;
713
714
/* compute interpolation vector */
715
if (minCol != maxCol) {
716
MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
717
}
718
719
/* add in texels */
720
for (k = N_TEXELS - 1; k >= 0; k--) {
721
int32_t t = k * 3;
722
uint32_t *kk = (uint32_t *)((char *)cc + t / 8);
723
int32_t texel = n_vect + 1; /* transparent black */
724
725
if (!ISTBLACK(input[k])) {
726
if (minCol != maxCol) {
727
/* interpolate color */
728
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
729
/* add in texel */
730
kk[0] |= texel << (t & 7);
731
}
732
} else {
733
/* add in texel */
734
kk[0] |= texel << (t & 7);
735
}
736
}
737
}
738
739
740
static void
741
fxt1_quantize_MIXED1 (uint32_t *cc,
742
uint8_t input[N_TEXELS][MAX_COMP])
743
{
744
const int32_t n_vect = 2; /* highest vector number in each microtile */
745
const int32_t n_comp = 3; /* 3 components: R, G, B */
746
uint8_t vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
747
float b, iv[MAX_COMP]; /* interpolation vector */
748
int32_t i, j, k;
749
Fx64 hi; /* high quadword */
750
uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
751
752
int32_t minSum;
753
int32_t maxSum;
754
int32_t minColL = 0, maxColL = -1;
755
int32_t minColR = 0, maxColR = -1;
756
757
/* Our solution here is to find the darkest and brightest colors in
758
* the 4x4 tile and use those as the two representative colors.
759
* There are probably better algorithms to use (histogram-based).
760
*/
761
minSum = 2000; /* big enough */
762
maxSum = -1; /* small enough */
763
for (k = 0; k < N_TEXELS / 2; k++) {
764
if (!ISTBLACK(input[k])) {
765
int32_t sum = 0;
766
for (i = 0; i < n_comp; i++) {
767
sum += input[k][i];
768
}
769
if (minSum > sum) {
770
minSum = sum;
771
minColL = k;
772
}
773
if (maxSum < sum) {
774
maxSum = sum;
775
maxColL = k;
776
}
777
}
778
}
779
minSum = 2000; /* big enough */
780
maxSum = -1; /* small enough */
781
for (; k < N_TEXELS; k++) {
782
if (!ISTBLACK(input[k])) {
783
int32_t sum = 0;
784
for (i = 0; i < n_comp; i++) {
785
sum += input[k][i];
786
}
787
if (minSum > sum) {
788
minSum = sum;
789
minColR = k;
790
}
791
if (maxSum < sum) {
792
maxSum = sum;
793
maxColR = k;
794
}
795
}
796
}
797
798
/* left microtile */
799
if (maxColL == -1) {
800
/* all transparent black */
801
cc[0] = ~0u;
802
for (i = 0; i < n_comp; i++) {
803
vec[0][i] = 0;
804
vec[1][i] = 0;
805
}
806
} else {
807
cc[0] = 0;
808
for (i = 0; i < n_comp; i++) {
809
vec[0][i] = input[minColL][i];
810
vec[1][i] = input[maxColL][i];
811
}
812
if (minColL != maxColL) {
813
/* compute interpolation vector */
814
MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
815
816
/* add in texels */
817
lolo = 0;
818
for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
819
int32_t texel = n_vect + 1; /* transparent black */
820
if (!ISTBLACK(input[k])) {
821
/* interpolate color */
822
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
823
}
824
/* add in texel */
825
lolo <<= 2;
826
lolo |= texel;
827
}
828
cc[0] = lolo;
829
}
830
}
831
832
/* right microtile */
833
if (maxColR == -1) {
834
/* all transparent black */
835
cc[1] = ~0u;
836
for (i = 0; i < n_comp; i++) {
837
vec[2][i] = 0;
838
vec[3][i] = 0;
839
}
840
} else {
841
cc[1] = 0;
842
for (i = 0; i < n_comp; i++) {
843
vec[2][i] = input[minColR][i];
844
vec[3][i] = input[maxColR][i];
845
}
846
if (minColR != maxColR) {
847
/* compute interpolation vector */
848
MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
849
850
/* add in texels */
851
lohi = 0;
852
for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
853
int32_t texel = n_vect + 1; /* transparent black */
854
if (!ISTBLACK(input[k])) {
855
/* interpolate color */
856
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
857
}
858
/* add in texel */
859
lohi <<= 2;
860
lohi |= texel;
861
}
862
cc[1] = lohi;
863
}
864
}
865
866
FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
867
for (j = 2 * 2 - 1; j >= 0; j--) {
868
for (i = 0; i < n_comp; i++) {
869
/* add in colors */
870
FX64_SHL(hi, 5);
871
FX64_OR32(hi, vec[j][i] >> 3);
872
}
873
}
874
((Fx64 *)cc)[1] = hi;
875
}
876
877
878
static void
879
fxt1_quantize_MIXED0 (uint32_t *cc,
880
uint8_t input[N_TEXELS][MAX_COMP])
881
{
882
const int32_t n_vect = 3; /* highest vector number in each microtile */
883
const int32_t n_comp = 3; /* 3 components: R, G, B */
884
uint8_t vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
885
float b, iv[MAX_COMP]; /* interpolation vector */
886
int32_t i, j, k;
887
Fx64 hi; /* high quadword */
888
uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
889
890
int32_t minColL = 0, maxColL = 0;
891
int32_t minColR = 0, maxColR = 0;
892
#if 0
893
int32_t minSum;
894
int32_t maxSum;
895
896
/* Our solution here is to find the darkest and brightest colors in
897
* the 4x4 tile and use those as the two representative colors.
898
* There are probably better algorithms to use (histogram-based).
899
*/
900
minSum = 2000; /* big enough */
901
maxSum = -1; /* small enough */
902
for (k = 0; k < N_TEXELS / 2; k++) {
903
int32_t sum = 0;
904
for (i = 0; i < n_comp; i++) {
905
sum += input[k][i];
906
}
907
if (minSum > sum) {
908
minSum = sum;
909
minColL = k;
910
}
911
if (maxSum < sum) {
912
maxSum = sum;
913
maxColL = k;
914
}
915
}
916
minSum = 2000; /* big enough */
917
maxSum = -1; /* small enough */
918
for (; k < N_TEXELS; k++) {
919
int32_t sum = 0;
920
for (i = 0; i < n_comp; i++) {
921
sum += input[k][i];
922
}
923
if (minSum > sum) {
924
minSum = sum;
925
minColR = k;
926
}
927
if (maxSum < sum) {
928
maxSum = sum;
929
maxColR = k;
930
}
931
}
932
#else
933
int32_t minVal;
934
int32_t maxVal;
935
int32_t maxVarL = fxt1_variance(input, n_comp);
936
int32_t maxVarR = fxt1_variance(&input[N_TEXELS / 2], n_comp);
937
938
/* Scan the channel with max variance for lo & hi
939
* and use those as the two representative colors.
940
*/
941
minVal = 2000; /* big enough */
942
maxVal = -1; /* small enough */
943
for (k = 0; k < N_TEXELS / 2; k++) {
944
int32_t t = input[k][maxVarL];
945
if (minVal > t) {
946
minVal = t;
947
minColL = k;
948
}
949
if (maxVal < t) {
950
maxVal = t;
951
maxColL = k;
952
}
953
}
954
minVal = 2000; /* big enough */
955
maxVal = -1; /* small enough */
956
for (; k < N_TEXELS; k++) {
957
int32_t t = input[k][maxVarR];
958
if (minVal > t) {
959
minVal = t;
960
minColR = k;
961
}
962
if (maxVal < t) {
963
maxVal = t;
964
maxColR = k;
965
}
966
}
967
#endif
968
969
/* left microtile */
970
cc[0] = 0;
971
for (i = 0; i < n_comp; i++) {
972
vec[0][i] = input[minColL][i];
973
vec[1][i] = input[maxColL][i];
974
}
975
if (minColL != maxColL) {
976
/* compute interpolation vector */
977
MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
978
979
/* add in texels */
980
lolo = 0;
981
for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
982
int32_t texel;
983
/* interpolate color */
984
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
985
/* add in texel */
986
lolo <<= 2;
987
lolo |= texel;
988
}
989
990
/* funky encoding for LSB of green */
991
if ((int32_t)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
992
for (i = 0; i < n_comp; i++) {
993
vec[1][i] = input[minColL][i];
994
vec[0][i] = input[maxColL][i];
995
}
996
lolo = ~lolo;
997
}
998
999
cc[0] = lolo;
1000
}
1001
1002
/* right microtile */
1003
cc[1] = 0;
1004
for (i = 0; i < n_comp; i++) {
1005
vec[2][i] = input[minColR][i];
1006
vec[3][i] = input[maxColR][i];
1007
}
1008
if (minColR != maxColR) {
1009
/* compute interpolation vector */
1010
MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1011
1012
/* add in texels */
1013
lohi = 0;
1014
for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1015
int32_t texel;
1016
/* interpolate color */
1017
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1018
/* add in texel */
1019
lohi <<= 2;
1020
lohi |= texel;
1021
}
1022
1023
/* funky encoding for LSB of green */
1024
if ((int32_t)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1025
for (i = 0; i < n_comp; i++) {
1026
vec[3][i] = input[minColR][i];
1027
vec[2][i] = input[maxColR][i];
1028
}
1029
lohi = ~lohi;
1030
}
1031
1032
cc[1] = lohi;
1033
}
1034
1035
FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1036
for (j = 2 * 2 - 1; j >= 0; j--) {
1037
for (i = 0; i < n_comp; i++) {
1038
/* add in colors */
1039
FX64_SHL(hi, 5);
1040
FX64_OR32(hi, vec[j][i] >> 3);
1041
}
1042
}
1043
((Fx64 *)cc)[1] = hi;
1044
}
1045
1046
1047
static void
1048
fxt1_quantize (uint32_t *cc, const uint8_t *lines[], int32_t comps)
1049
{
1050
int32_t trualpha;
1051
uint8_t reord[N_TEXELS][MAX_COMP];
1052
1053
uint8_t input[N_TEXELS][MAX_COMP];
1054
int32_t i, k, l;
1055
1056
if (comps == 3) {
1057
/* make the whole block opaque */
1058
memset(input, -1, sizeof(input));
1059
}
1060
1061
/* 8 texels each line */
1062
for (l = 0; l < 4; l++) {
1063
for (k = 0; k < 4; k++) {
1064
for (i = 0; i < comps; i++) {
1065
input[k + l * 4][i] = *lines[l]++;
1066
}
1067
}
1068
for (; k < 8; k++) {
1069
for (i = 0; i < comps; i++) {
1070
input[k + l * 4 + 12][i] = *lines[l]++;
1071
}
1072
}
1073
}
1074
1075
/* block layout:
1076
* 00, 01, 02, 03, 08, 09, 0a, 0b
1077
* 10, 11, 12, 13, 18, 19, 1a, 1b
1078
* 04, 05, 06, 07, 0c, 0d, 0e, 0f
1079
* 14, 15, 16, 17, 1c, 1d, 1e, 1f
1080
*/
1081
1082
/* [dBorca]
1083
* stupidity flows forth from this
1084
*/
1085
l = N_TEXELS;
1086
trualpha = 0;
1087
if (comps == 4) {
1088
/* skip all transparent black texels */
1089
l = 0;
1090
for (k = 0; k < N_TEXELS; k++) {
1091
/* test all components against 0 */
1092
if (!ISTBLACK(input[k])) {
1093
/* texel is not transparent black */
1094
memcpy(reord[l], input[k], 4);
1095
if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1096
/* non-opaque texel */
1097
trualpha = !0;
1098
}
1099
l++;
1100
}
1101
}
1102
}
1103
1104
#if 0
1105
if (trualpha) {
1106
fxt1_quantize_ALPHA0(cc, input, reord, l);
1107
} else if (l == 0) {
1108
cc[0] = cc[1] = cc[2] = -1;
1109
cc[3] = 0;
1110
} else if (l < N_TEXELS) {
1111
fxt1_quantize_HI(cc, input, reord, l);
1112
} else {
1113
fxt1_quantize_CHROMA(cc, input);
1114
}
1115
(void)fxt1_quantize_ALPHA1;
1116
(void)fxt1_quantize_MIXED1;
1117
(void)fxt1_quantize_MIXED0;
1118
#else
1119
if (trualpha) {
1120
fxt1_quantize_ALPHA1(cc, input);
1121
} else if (l == 0) {
1122
cc[0] = cc[1] = cc[2] = ~0u;
1123
cc[3] = 0;
1124
} else if (l < N_TEXELS) {
1125
fxt1_quantize_MIXED1(cc, input);
1126
} else {
1127
fxt1_quantize_MIXED0(cc, input);
1128
}
1129
(void)fxt1_quantize_ALPHA0;
1130
(void)fxt1_quantize_HI;
1131
(void)fxt1_quantize_CHROMA;
1132
#endif
1133
}
1134
1135
1136
1137
/**
1138
* Upscale an image by replication, not (typical) stretching.
1139
* We use this when the image width or height is less than a
1140
* certain size (4, 8) and we need to upscale an image.
1141
*/
1142
static void
1143
upscale_teximage2d(int32_t inWidth, int32_t inHeight,
1144
int32_t outWidth, int32_t outHeight,
1145
int32_t comps, const uint8_t *src, int32_t srcRowStride,
1146
uint8_t *dest )
1147
{
1148
int32_t i, j, k;
1149
1150
assert(outWidth >= inWidth);
1151
assert(outHeight >= inHeight);
1152
#if 0
1153
assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1154
assert((outWidth & 3) == 0);
1155
assert((outHeight & 3) == 0);
1156
#endif
1157
1158
for (i = 0; i < outHeight; i++) {
1159
const int32_t ii = i % inHeight;
1160
for (j = 0; j < outWidth; j++) {
1161
const int32_t jj = j % inWidth;
1162
for (k = 0; k < comps; k++) {
1163
dest[(i * outWidth + j) * comps + k]
1164
= src[ii * srcRowStride + jj * comps + k];
1165
}
1166
}
1167
}
1168
}
1169
1170
1171
static void
1172
fxt1_encode (uint32_t width, uint32_t height, int32_t comps,
1173
const void *source, int32_t srcRowStride,
1174
void *dest, int32_t destRowStride)
1175
{
1176
uint32_t x, y;
1177
const uint8_t *data;
1178
uint32_t *encoded = (uint32_t *)dest;
1179
void *newSource = NULL;
1180
1181
assert(comps == 3 || comps == 4);
1182
1183
/* Replicate image if width is not M8 or height is not M4 */
1184
if ((width & 7) | (height & 3)) {
1185
int32_t newWidth = (width + 7) & ~7;
1186
int32_t newHeight = (height + 3) & ~3;
1187
newSource = malloc(comps * newWidth * newHeight * sizeof(uint8_t));
1188
if (!newSource)
1189
return;
1190
upscale_teximage2d(width, height, newWidth, newHeight,
1191
comps, (const uint8_t *) source,
1192
srcRowStride, (uint8_t *) newSource);
1193
source = newSource;
1194
width = newWidth;
1195
height = newHeight;
1196
srcRowStride = comps * newWidth;
1197
}
1198
1199
data = (const uint8_t *) source;
1200
destRowStride = (destRowStride - width * 2) / 4;
1201
for (y = 0; y < height; y += 4) {
1202
uint32_t offs = 0 + (y + 0) * srcRowStride;
1203
for (x = 0; x < width; x += 8) {
1204
const uint8_t *lines[4];
1205
lines[0] = &data[offs];
1206
lines[1] = lines[0] + srcRowStride;
1207
lines[2] = lines[1] + srcRowStride;
1208
lines[3] = lines[2] + srcRowStride;
1209
offs += 8 * comps;
1210
fxt1_quantize(encoded, lines, comps);
1211
/* 128 bits per 8x4 block */
1212
encoded += 4;
1213
}
1214
encoded += destRowStride;
1215
}
1216
1217
free(newSource);
1218
}
1219
1220
1221
/***************************************************************************\
1222
* FXT1 decoder
1223
*
1224
* The decoder is based on GL_3DFX_texture_compression_FXT1
1225
* specification and serves as a concept for the encoder.
1226
\***************************************************************************/
1227
1228
1229
/* lookup table for scaling 5 bit colors up to 8 bits */
1230
static const uint8_t _rgb_scale_5[] = {
1231
0, 8, 16, 25, 33, 41, 49, 58,
1232
66, 74, 82, 90, 99, 107, 115, 123,
1233
132, 140, 148, 156, 165, 173, 181, 189,
1234
197, 206, 214, 222, 230, 239, 247, 255
1235
};
1236
1237
/* lookup table for scaling 6 bit colors up to 8 bits */
1238
static const uint8_t _rgb_scale_6[] = {
1239
0, 4, 8, 12, 16, 20, 24, 28,
1240
32, 36, 40, 45, 49, 53, 57, 61,
1241
65, 69, 73, 77, 81, 85, 89, 93,
1242
97, 101, 105, 109, 113, 117, 121, 125,
1243
130, 134, 138, 142, 146, 150, 154, 158,
1244
162, 166, 170, 174, 178, 182, 186, 190,
1245
194, 198, 202, 206, 210, 215, 219, 223,
1246
227, 231, 235, 239, 243, 247, 251, 255
1247
};
1248
1249
1250
#define CC_SEL(cc, which) (((uint32_t *)(cc))[(which) / 32] >> ((which) & 31))
1251
#define UP5(c) _rgb_scale_5[(c) & 31]
1252
#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1253
#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1254
1255
1256
static void
1257
fxt1_decode_1HI (const uint8_t *code, int32_t t, uint8_t *rgba)
1258
{
1259
const uint32_t *cc;
1260
1261
t *= 3;
1262
cc = (const uint32_t *)(code + t / 8);
1263
t = (cc[0] >> (t & 7)) & 7;
1264
1265
if (t == 7) {
1266
rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1267
} else {
1268
uint8_t r, g, b;
1269
cc = (const uint32_t *)(code + 12);
1270
if (t == 0) {
1271
b = UP5(CC_SEL(cc, 0));
1272
g = UP5(CC_SEL(cc, 5));
1273
r = UP5(CC_SEL(cc, 10));
1274
} else if (t == 6) {
1275
b = UP5(CC_SEL(cc, 15));
1276
g = UP5(CC_SEL(cc, 20));
1277
r = UP5(CC_SEL(cc, 25));
1278
} else {
1279
b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1280
g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1281
r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1282
}
1283
rgba[RCOMP] = r;
1284
rgba[GCOMP] = g;
1285
rgba[BCOMP] = b;
1286
rgba[ACOMP] = 255;
1287
}
1288
}
1289
1290
1291
static void
1292
fxt1_decode_1CHROMA (const uint8_t *code, int32_t t, uint8_t *rgba)
1293
{
1294
const uint32_t *cc;
1295
uint32_t kk;
1296
1297
cc = (const uint32_t *)code;
1298
if (t & 16) {
1299
cc++;
1300
t &= 15;
1301
}
1302
t = (cc[0] >> (t * 2)) & 3;
1303
1304
t *= 15;
1305
cc = (const uint32_t *)(code + 8 + t / 8);
1306
kk = cc[0] >> (t & 7);
1307
rgba[BCOMP] = UP5(kk);
1308
rgba[GCOMP] = UP5(kk >> 5);
1309
rgba[RCOMP] = UP5(kk >> 10);
1310
rgba[ACOMP] = 255;
1311
}
1312
1313
1314
static void
1315
fxt1_decode_1MIXED (const uint8_t *code, int32_t t, uint8_t *rgba)
1316
{
1317
const uint32_t *cc;
1318
uint32_t col[2][3];
1319
int32_t glsb, selb;
1320
1321
cc = (const uint32_t *)code;
1322
if (t & 16) {
1323
t &= 15;
1324
t = (cc[1] >> (t * 2)) & 3;
1325
/* col 2 */
1326
col[0][BCOMP] = (*(const uint32_t *)(code + 11)) >> 6;
1327
col[0][GCOMP] = CC_SEL(cc, 99);
1328
col[0][RCOMP] = CC_SEL(cc, 104);
1329
/* col 3 */
1330
col[1][BCOMP] = CC_SEL(cc, 109);
1331
col[1][GCOMP] = CC_SEL(cc, 114);
1332
col[1][RCOMP] = CC_SEL(cc, 119);
1333
glsb = CC_SEL(cc, 126);
1334
selb = CC_SEL(cc, 33);
1335
} else {
1336
t = (cc[0] >> (t * 2)) & 3;
1337
/* col 0 */
1338
col[0][BCOMP] = CC_SEL(cc, 64);
1339
col[0][GCOMP] = CC_SEL(cc, 69);
1340
col[0][RCOMP] = CC_SEL(cc, 74);
1341
/* col 1 */
1342
col[1][BCOMP] = CC_SEL(cc, 79);
1343
col[1][GCOMP] = CC_SEL(cc, 84);
1344
col[1][RCOMP] = CC_SEL(cc, 89);
1345
glsb = CC_SEL(cc, 125);
1346
selb = CC_SEL(cc, 1);
1347
}
1348
1349
if (CC_SEL(cc, 124) & 1) {
1350
/* alpha[0] == 1 */
1351
1352
if (t == 3) {
1353
/* zero */
1354
rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1355
} else {
1356
uint8_t r, g, b;
1357
if (t == 0) {
1358
b = UP5(col[0][BCOMP]);
1359
g = UP5(col[0][GCOMP]);
1360
r = UP5(col[0][RCOMP]);
1361
} else if (t == 2) {
1362
b = UP5(col[1][BCOMP]);
1363
g = UP6(col[1][GCOMP], glsb);
1364
r = UP5(col[1][RCOMP]);
1365
} else {
1366
b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1367
g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1368
r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1369
}
1370
rgba[RCOMP] = r;
1371
rgba[GCOMP] = g;
1372
rgba[BCOMP] = b;
1373
rgba[ACOMP] = 255;
1374
}
1375
} else {
1376
/* alpha[0] == 0 */
1377
uint8_t r, g, b;
1378
if (t == 0) {
1379
b = UP5(col[0][BCOMP]);
1380
g = UP6(col[0][GCOMP], glsb ^ selb);
1381
r = UP5(col[0][RCOMP]);
1382
} else if (t == 3) {
1383
b = UP5(col[1][BCOMP]);
1384
g = UP6(col[1][GCOMP], glsb);
1385
r = UP5(col[1][RCOMP]);
1386
} else {
1387
b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1388
g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1389
UP6(col[1][GCOMP], glsb));
1390
r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1391
}
1392
rgba[RCOMP] = r;
1393
rgba[GCOMP] = g;
1394
rgba[BCOMP] = b;
1395
rgba[ACOMP] = 255;
1396
}
1397
}
1398
1399
1400
static void
1401
fxt1_decode_1ALPHA (const uint8_t *code, int32_t t, uint8_t *rgba)
1402
{
1403
const uint32_t *cc;
1404
uint8_t r, g, b, a;
1405
1406
cc = (const uint32_t *)code;
1407
if (CC_SEL(cc, 124) & 1) {
1408
/* lerp == 1 */
1409
uint32_t col0[4];
1410
1411
if (t & 16) {
1412
t &= 15;
1413
t = (cc[1] >> (t * 2)) & 3;
1414
/* col 2 */
1415
col0[BCOMP] = (*(const uint32_t *)(code + 11)) >> 6;
1416
col0[GCOMP] = CC_SEL(cc, 99);
1417
col0[RCOMP] = CC_SEL(cc, 104);
1418
col0[ACOMP] = CC_SEL(cc, 119);
1419
} else {
1420
t = (cc[0] >> (t * 2)) & 3;
1421
/* col 0 */
1422
col0[BCOMP] = CC_SEL(cc, 64);
1423
col0[GCOMP] = CC_SEL(cc, 69);
1424
col0[RCOMP] = CC_SEL(cc, 74);
1425
col0[ACOMP] = CC_SEL(cc, 109);
1426
}
1427
1428
if (t == 0) {
1429
b = UP5(col0[BCOMP]);
1430
g = UP5(col0[GCOMP]);
1431
r = UP5(col0[RCOMP]);
1432
a = UP5(col0[ACOMP]);
1433
} else if (t == 3) {
1434
b = UP5(CC_SEL(cc, 79));
1435
g = UP5(CC_SEL(cc, 84));
1436
r = UP5(CC_SEL(cc, 89));
1437
a = UP5(CC_SEL(cc, 114));
1438
} else {
1439
b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1440
g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1441
r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1442
a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1443
}
1444
} else {
1445
/* lerp == 0 */
1446
1447
if (t & 16) {
1448
cc++;
1449
t &= 15;
1450
}
1451
t = (cc[0] >> (t * 2)) & 3;
1452
1453
if (t == 3) {
1454
/* zero */
1455
r = g = b = a = 0;
1456
} else {
1457
uint32_t kk;
1458
cc = (const uint32_t *)code;
1459
a = UP5(cc[3] >> (t * 5 + 13));
1460
t *= 15;
1461
cc = (const uint32_t *)(code + 8 + t / 8);
1462
kk = cc[0] >> (t & 7);
1463
b = UP5(kk);
1464
g = UP5(kk >> 5);
1465
r = UP5(kk >> 10);
1466
}
1467
}
1468
rgba[RCOMP] = r;
1469
rgba[GCOMP] = g;
1470
rgba[BCOMP] = b;
1471
rgba[ACOMP] = a;
1472
}
1473
1474
1475
static void
1476
fxt1_decode_1 (const void *texture, int32_t stride, /* in pixels */
1477
int32_t i, int32_t j, uint8_t *rgba)
1478
{
1479
static void (*decode_1[]) (const uint8_t *, int32_t, uint8_t *) = {
1480
fxt1_decode_1HI, /* cc-high = "00?" */
1481
fxt1_decode_1HI, /* cc-high = "00?" */
1482
fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1483
fxt1_decode_1ALPHA, /* alpha = "011" */
1484
fxt1_decode_1MIXED, /* mixed = "1??" */
1485
fxt1_decode_1MIXED, /* mixed = "1??" */
1486
fxt1_decode_1MIXED, /* mixed = "1??" */
1487
fxt1_decode_1MIXED /* mixed = "1??" */
1488
};
1489
1490
const uint8_t *code = (const uint8_t *)texture +
1491
((j / 4) * (stride / 8) + (i / 8)) * 16;
1492
int32_t mode = CC_SEL(code, 125);
1493
int32_t t = i & 7;
1494
1495
if (t & 4) {
1496
t += 12;
1497
}
1498
t += (j & 3) * 4;
1499
1500
decode_1[mode](code, t, rgba);
1501
}
1502
1503
/*
1504
* Pixel fetch within a block.
1505
*/
1506
1507
void
1508
util_format_fxt1_rgb_fetch_rgba_8unorm(uint8_t *restrict dst, const uint8_t *restrict src, unsigned i, unsigned j)
1509
{
1510
fxt1_decode_1(src, 0, i, j, dst);
1511
}
1512
1513
void
1514
util_format_fxt1_rgba_fetch_rgba_8unorm(uint8_t *restrict dst, const uint8_t *restrict src, unsigned i, unsigned j)
1515
{
1516
fxt1_decode_1(src, 0, i, j, dst);
1517
dst[3] = 0xff;
1518
}
1519
1520
void
1521
util_format_fxt1_rgb_fetch_rgba(void *restrict in_dst, const uint8_t *restrict src, unsigned i, unsigned j)
1522
{
1523
float *dst = in_dst;
1524
uint8_t tmp[4];
1525
fxt1_decode_1(src, 0, i, j, tmp);
1526
dst[0] = ubyte_to_float(tmp[0]);
1527
dst[1] = ubyte_to_float(tmp[1]);
1528
dst[2] = ubyte_to_float(tmp[2]);
1529
dst[3] = 1.0;
1530
}
1531
1532
void
1533
util_format_fxt1_rgba_fetch_rgba(void *restrict in_dst, const uint8_t *restrict src, unsigned i, unsigned j)
1534
{
1535
float *dst = in_dst;
1536
uint8_t tmp[4];
1537
fxt1_decode_1(src, 0, i, j, tmp);
1538
dst[0] = ubyte_to_float(tmp[0]);
1539
dst[1] = ubyte_to_float(tmp[1]);
1540
dst[2] = ubyte_to_float(tmp[2]);
1541
dst[3] = ubyte_to_float(tmp[3]);
1542
}
1543
1544
/*
1545
* Block decompression.
1546
*/
1547
1548
static inline void
1549
util_format_fxtn_rgb_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1550
const uint8_t *restrict src_row, unsigned src_stride,
1551
unsigned width, unsigned height,
1552
boolean rgba)
1553
{
1554
const unsigned bw = 8, bh = 4, comps = 4;
1555
unsigned x, y, i, j;
1556
for (y = 0; y < height; y += bh) {
1557
const uint8_t *src = src_row;
1558
for (x = 0; x < width; x += bw) {
1559
for (j = 0; j < bh; ++j) {
1560
for (i = 0; i < bw; ++i) {
1561
uint8_t *dst = dst_row + (y + j) * dst_stride / sizeof(*dst_row) + (x + i) * comps;
1562
fxt1_decode_1(src, 0, i, j, dst);
1563
if (!rgba)
1564
dst[3] = 0xff;
1565
}
1566
}
1567
src += FXT1_BLOCK_SIZE;
1568
}
1569
src_row += src_stride;
1570
}
1571
}
1572
1573
void
1574
util_format_fxt1_rgb_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1575
const uint8_t *restrict src_row, unsigned src_stride,
1576
unsigned width, unsigned height)
1577
{
1578
util_format_fxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
1579
src_row, src_stride,
1580
width, height,
1581
false);
1582
}
1583
1584
void
1585
util_format_fxt1_rgba_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1586
const uint8_t *restrict src_row, unsigned src_stride,
1587
unsigned width, unsigned height)
1588
{
1589
util_format_fxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
1590
src_row, src_stride,
1591
width, height,
1592
true);
1593
}
1594
1595
static inline void
1596
util_format_fxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
1597
const uint8_t *restrict src_row, unsigned src_stride,
1598
unsigned width, unsigned height,
1599
boolean rgba)
1600
{
1601
const unsigned bw = 8, bh = 4, comps = 4;
1602
unsigned x, y, i, j;
1603
for (y = 0; y < height; y += 4) {
1604
const uint8_t *src = src_row;
1605
for (x = 0; x < width; x += 8) {
1606
for (j = 0; j < bh; ++j) {
1607
for (i = 0; i < bw; ++i) {
1608
float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i) * comps;
1609
uint8_t tmp[4];
1610
fxt1_decode_1(src, 0, i, j, tmp);
1611
dst[0] = ubyte_to_float(tmp[0]);
1612
dst[1] = ubyte_to_float(tmp[1]);
1613
dst[2] = ubyte_to_float(tmp[2]);
1614
if (rgba)
1615
dst[3] = ubyte_to_float(tmp[3]);
1616
else
1617
dst[3] = 1.0;
1618
}
1619
}
1620
src += FXT1_BLOCK_SIZE;
1621
}
1622
src_row += src_stride;
1623
}
1624
}
1625
1626
void
1627
util_format_fxt1_rgb_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
1628
const uint8_t *restrict src_row, unsigned src_stride,
1629
unsigned width, unsigned height)
1630
{
1631
util_format_fxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
1632
src_row, src_stride,
1633
width, height,
1634
false);
1635
}
1636
1637
void
1638
util_format_fxt1_rgba_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
1639
const uint8_t *restrict src_row, unsigned src_stride,
1640
unsigned width, unsigned height)
1641
{
1642
util_format_fxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
1643
src_row, src_stride,
1644
width, height,
1645
true);
1646
}
1647
1648
/*
1649
* Block compression.
1650
*/
1651
1652
void
1653
util_format_fxt1_rgb_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1654
const uint8_t *restrict src, unsigned src_stride,
1655
unsigned width, unsigned height)
1656
{
1657
/* The encoder for FXT1_RGB wants 24bpp packed rgb, so make a temporary to do that.
1658
*/
1659
int temp_stride = width * 3;
1660
uint8_t *temp = malloc(height * temp_stride);
1661
if (!temp)
1662
return;
1663
1664
for (int y = 0; y < height; y++) {
1665
for (int x = 0; x < width; x++) {
1666
temp[y * temp_stride + x * 3 + 0] = src[x * 4 + 0];
1667
temp[y * temp_stride + x * 3 + 1] = src[x * 4 + 1];
1668
temp[y * temp_stride + x * 3 + 2] = src[x * 4 + 2];
1669
}
1670
src += src_stride;
1671
}
1672
1673
fxt1_encode(width, height, 3, temp, temp_stride, dst_row, dst_stride);
1674
1675
free(temp);
1676
}
1677
1678
void
1679
util_format_fxt1_rgba_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1680
const uint8_t *restrict src, unsigned src_stride,
1681
unsigned width, unsigned height)
1682
{
1683
fxt1_encode(width, height, 4, src, src_stride, dst_row, dst_stride);
1684
}
1685
1686
void
1687
util_format_fxt1_rgb_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
1688
const float *restrict src, unsigned src_stride,
1689
unsigned width, unsigned height)
1690
{
1691
int temp_stride = width * 4;
1692
uint8_t *temp = malloc(height * temp_stride);
1693
if (!temp)
1694
return;
1695
1696
util_format_r8g8b8a8_unorm_pack_rgba_float(temp, temp_stride,
1697
src, src_stride,
1698
width, height);
1699
1700
util_format_fxt1_rgb_pack_rgba_8unorm(dst_row, dst_stride,
1701
temp, temp_stride,
1702
width, height);
1703
1704
free(temp);
1705
}
1706
1707
void
1708
util_format_fxt1_rgba_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
1709
const float *restrict src, unsigned src_stride,
1710
unsigned width, unsigned height)
1711
{
1712
int temp_stride = width * 4;
1713
uint8_t *temp = malloc(height * temp_stride);
1714
if (!temp)
1715
return;
1716
1717
util_format_r8g8b8a8_unorm_pack_rgba_float(temp, temp_stride,
1718
src, src_stride,
1719
width, height);
1720
1721
util_format_fxt1_rgba_pack_rgba_8unorm(dst_row, dst_stride,
1722
temp, temp_stride,
1723
width, height);
1724
1725
free(temp);
1726
}
1727
1728