CoCalc -- u_format

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/util/format/u_format_fxt1.c
⁷¹³² views
1
/**************************************************************************
2
 *
3
 * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
4
 * Copyright (c) 2008 VMware, Inc.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included
14
 * in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
 * OTHER DEALINGS IN THE SOFTWARE.
23
 *
24
 **************************************************************************/
25

26
#include "util/format/u_format.h"
27
#include "util/format/u_format_fxt1.h"
28
#include "util/format/u_format_pack.h"
29
#include "util/format_srgb.h"
30
#include "util/u_math.h"
31

32
#define RCOMP 0
33
#define GCOMP 1
34
#define BCOMP 2
35
#define ACOMP 3
36

37
#define FXT1_BLOCK_SIZE 16
38

39
static void
40
fxt1_encode (uint32_t width, uint32_t height, int32_t comps,
41
             const void *source, int32_t srcRowStride,
42
             void *dest, int32_t destRowStride);
43

44
static void
45
fxt1_decode_1 (const void *texture, int32_t stride,
46
               int32_t i, int32_t j, uint8_t *rgba);
47

48
/***************************************************************************\
49
 * FXT1 encoder
50
 *
51
 * The encoder was built by reversing the decoder,
52
 * and is vaguely based on Texus2 by 3dfx. Note that this code
53
 * is merely a proof of concept, since it is highly UNoptimized;
54
 * moreover, it is sub-optimal due to initial conditions passed
55
 * to Lloyd's algorithm (the interpolation modes are even worse).
56
\***************************************************************************/
57

58

59
#define MAX_COMP 4 /* ever needed maximum number of components in texel */
60
#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
61
#define N_TEXELS 32 /* number of texels in a block (always 32) */
62
#define LL_N_REP 50 /* number of iterations in lloyd's vq */
63
#define LL_RMS_D 10 /* fault tolerance (maximum delta) */
64
#define LL_RMS_E 255 /* fault tolerance (maximum error) */
65
#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
66
static const uint32_t zero = 0;
67
#define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
68

69
/*
70
 * Define a 64-bit unsigned integer type and macros
71
 */
72
#if 1
73

74
#define FX64_NATIVE 1
75

76
typedef uint64_t Fx64;
77

78
#define FX64_MOV32(a, b) a = b
79
#define FX64_OR32(a, b)  a |= b
80
#define FX64_SHL(a, c)   a <<= c
81

82
#else
83

84
#define FX64_NATIVE 0
85

86
typedef struct {
87
   uint32_t lo, hi;
88
} Fx64;
89

90
#define FX64_MOV32(a, b) a.lo = b
91
#define FX64_OR32(a, b)  a.lo |= b
92

93
#define FX64_SHL(a, c)                                 \
94
   do {                                                \
95
       if ((c) >= 32) {                                \
96
          a.hi = a.lo << ((c) - 32);                   \
97
          a.lo = 0;                                    \
98
       } else {                                        \
99
          a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
100
          a.lo <<= (c);                                \
101
       }                                               \
102
   } while (0)
103

104
#endif
105

106

107
#define F(i) (float)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
108
#define SAFECDOT 1 /* for paranoids */
109

110
#define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
111
   do {                                  \
112
      /* compute interpolation vector */ \
113
      float d2 = 0.0F;                   \
114
      float rd2;                         \
115
                                         \
116
      for (i = 0; i < NC; i++) {         \
117
         IV[i] = (V1[i] - V0[i]) * F(i); \
118
         d2 += IV[i] * IV[i];            \
119
      }                                  \
120
      rd2 = (float)NV / d2;              \
121
      B = 0;                             \
122
      for (i = 0; i < NC; i++) {         \
123
         IV[i] *= F(i);                  \
124
         B -= IV[i] * V0[i];             \
125
         IV[i] *= rd2;                   \
126
      }                                  \
127
      B = B * rd2 + 0.5f;                \
128
   } while (0)
129

130
#define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
131
   do {                                  \
132
      float dot = 0.0F;                  \
133
      for (i = 0; i < NC; i++) {         \
134
         dot += V[i] * IV[i];            \
135
      }                                  \
136
      TEXEL = (int32_t)(dot + B);        \
137
      if (SAFECDOT) {                    \
138
         if (TEXEL < 0) {                \
139
            TEXEL = 0;                   \
140
         } else if (TEXEL > NV) {        \
141
            TEXEL = NV;                  \
142
         }                               \
143
      }                                  \
144
   } while (0)
145

146

147
static int32_t
148
fxt1_bestcol (float vec[][MAX_COMP], int32_t nv,
149
              uint8_t input[MAX_COMP], int32_t nc)
150
{
151
   int32_t i, j, best = -1;
152
   float err = 1e9; /* big enough */
153

154
   for (j = 0; j < nv; j++) {
155
      float e = 0.0F;
156
      for (i = 0; i < nc; i++) {
157
         e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
158
      }
159
      if (e < err) {
160
         err = e;
161
         best = j;
162
      }
163
   }
164

165
   return best;
166
}
167

168

169
static int32_t
170
fxt1_worst (float vec[MAX_COMP],
171
            uint8_t input[N_TEXELS][MAX_COMP], int32_t nc, int32_t n)
172
{
173
   int32_t i, k, worst = -1;
174
   float err = -1.0F; /* small enough */
175

176
   for (k = 0; k < n; k++) {
177
      float e = 0.0F;
178
      for (i = 0; i < nc; i++) {
179
         e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
180
      }
181
      if (e > err) {
182
         err = e;
183
         worst = k;
184
      }
185
   }
186

187
   return worst;
188
}
189

190

191
static int32_t
192
fxt1_variance (uint8_t input[N_TEXELS / 2][MAX_COMP], int32_t nc)
193
{
194
   const int n = N_TEXELS / 2;
195
   int32_t i, k, best = 0;
196
   int32_t sx, sx2;
197
   double var, maxvar = -1; /* small enough */
198
   double teenth = 1.0 / n;
199

200
   for (i = 0; i < nc; i++) {
201
      sx = sx2 = 0;
202
      for (k = 0; k < n; k++) {
203
         int32_t t = input[k][i];
204
         sx += t;
205
         sx2 += t * t;
206
      }
207
      var = sx2 * teenth - sx * sx * teenth * teenth;
208
      if (maxvar < var) {
209
         maxvar = var;
210
         best = i;
211
      }
212
   }
213

214
   return best;
215
}
216

217

218
static int32_t
219
fxt1_choose (float vec[][MAX_COMP], int32_t nv,
220
             uint8_t input[N_TEXELS][MAX_COMP], int32_t nc, int32_t n)
221
{
222
#if 0
223
   /* Choose colors from a grid.
224
    */
225
   int32_t i, j;
226

227
   for (j = 0; j < nv; j++) {
228
      int32_t m = j * (n - 1) / (nv - 1);
229
      for (i = 0; i < nc; i++) {
230
         vec[j][i] = input[m][i];
231
      }
232
   }
233
#else
234
   /* Our solution here is to find the darkest and brightest colors in
235
    * the 8x4 tile and use those as the two representative colors.
236
    * There are probably better algorithms to use (histogram-based).
237
    */
238
   int32_t i, j, k;
239
   int32_t minSum = 2000; /* big enough */
240
   int32_t maxSum = -1; /* small enough */
241
   int32_t minCol = 0; /* phoudoin: silent compiler! */
242
   int32_t maxCol = 0; /* phoudoin: silent compiler! */
243

244
   struct {
245
      int32_t flag;
246
      int32_t key;
247
      int32_t freq;
248
      int32_t idx;
249
   } hist[N_TEXELS];
250
   int32_t lenh = 0;
251

252
   memset(hist, 0, sizeof(hist));
253

254
   for (k = 0; k < n; k++) {
255
      int32_t l;
256
      int32_t key = 0;
257
      int32_t sum = 0;
258
      for (i = 0; i < nc; i++) {
259
         key <<= 8;
260
         key |= input[k][i];
261
         sum += input[k][i];
262
      }
263
      for (l = 0; l < n; l++) {
264
         if (!hist[l].flag) {
265
            /* alloc new slot */
266
            hist[l].flag = !0;
267
            hist[l].key = key;
268
            hist[l].freq = 1;
269
            hist[l].idx = k;
270
            lenh = l + 1;
271
            break;
272
         } else if (hist[l].key == key) {
273
            hist[l].freq++;
274
            break;
275
         }
276
      }
277
      if (minSum > sum) {
278
         minSum = sum;
279
         minCol = k;
280
      }
281
      if (maxSum < sum) {
282
         maxSum = sum;
283
         maxCol = k;
284
      }
285
   }
286

287
   if (lenh <= nv) {
288
      for (j = 0; j < lenh; j++) {
289
         for (i = 0; i < nc; i++) {
290
            vec[j][i] = (float)input[hist[j].idx][i];
291
         }
292
      }
293
      for (; j < nv; j++) {
294
         for (i = 0; i < nc; i++) {
295
            vec[j][i] = vec[0][i];
296
         }
297
      }
298
      return 0;
299
   }
300

301
   for (j = 0; j < nv; j++) {
302
      for (i = 0; i < nc; i++) {
303
         vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);
304
      }
305
   }
306
#endif
307

308
   return !0;
309
}
310

311

312
static int32_t
313
fxt1_lloyd (float vec[][MAX_COMP], int32_t nv,
314
            uint8_t input[N_TEXELS][MAX_COMP], int32_t nc, int32_t n)
315
{
316
   /* Use the generalized lloyd's algorithm for VQ:
317
    *     find 4 color vectors.
318
    *
319
    *     for each sample color
320
    *         sort to nearest vector.
321
    *
322
    *     replace each vector with the centroid of its matching colors.
323
    *
324
    *     repeat until RMS doesn't improve.
325
    *
326
    *     if a color vector has no samples, or becomes the same as another
327
    *     vector, replace it with the color which is farthest from a sample.
328
    *
329
    * vec[][MAX_COMP]           initial vectors and resulting colors
330
    * nv                        number of resulting colors required
331
    * input[N_TEXELS][MAX_COMP] input texels
332
    * nc                        number of components in input / vec
333
    * n                         number of input samples
334
    */
335

336
   int32_t sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
337
   int32_t cnt[MAX_VECT]; /* how many times a certain vector was chosen */
338
   float error, lasterror = 1e9;
339

340
   int32_t i, j, k, rep;
341

342
   /* the quantizer */
343
   for (rep = 0; rep < LL_N_REP; rep++) {
344
      /* reset sums & counters */
345
      for (j = 0; j < nv; j++) {
346
         for (i = 0; i < nc; i++) {
347
            sum[j][i] = 0;
348
         }
349
         cnt[j] = 0;
350
      }
351
      error = 0;
352

353
      /* scan whole block */
354
      for (k = 0; k < n; k++) {
355
#if 1
356
         int32_t best = -1;
357
         float err = 1e9; /* big enough */
358
         /* determine best vector */
359
         for (j = 0; j < nv; j++) {
360
            float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
361
                      (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
362
                      (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
363
            if (nc == 4) {
364
               e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
365
            }
366
            if (e < err) {
367
               err = e;
368
               best = j;
369
            }
370
         }
371
#else
372
         int32_t best = fxt1_bestcol(vec, nv, input[k], nc, &err);
373
#endif
374
         assert(best >= 0);
375
         /* add in closest color */
376
         for (i = 0; i < nc; i++) {
377
            sum[best][i] += input[k][i];
378
         }
379
         /* mark this vector as used */
380
         cnt[best]++;
381
         /* accumulate error */
382
         error += err;
383
      }
384

385
      /* check RMS */
386
      if ((error < LL_RMS_E) ||
387
          ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
388
         return !0; /* good match */
389
      }
390
      lasterror = error;
391

392
      /* move each vector to the barycenter of its closest colors */
393
      for (j = 0; j < nv; j++) {
394
         if (cnt[j]) {
395
            float div = 1.0F / cnt[j];
396
            for (i = 0; i < nc; i++) {
397
               vec[j][i] = div * sum[j][i];
398
            }
399
         } else {
400
            /* this vec has no samples or is identical with a previous vec */
401
            int32_t worst = fxt1_worst(vec[j], input, nc, n);
402
            for (i = 0; i < nc; i++) {
403
               vec[j][i] = input[worst][i];
404
            }
405
         }
406
      }
407
   }
408

409
   return 0; /* could not converge fast enough */
410
}
411

412

413
static void
414
fxt1_quantize_CHROMA (uint32_t *cc,
415
                      uint8_t input[N_TEXELS][MAX_COMP])
416
{
417
   const int32_t n_vect = 4; /* 4 base vectors to find */
418
   const int32_t n_comp = 3; /* 3 components: R, G, B */
419
   float vec[MAX_VECT][MAX_COMP];
420
   int32_t i, j, k;
421
   Fx64 hi; /* high quadword */
422
   uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
423

424
   if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
425
      fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
426
   }
427

428
   FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
429
   for (j = n_vect - 1; j >= 0; j--) {
430
      for (i = 0; i < n_comp; i++) {
431
         /* add in colors */
432
         FX64_SHL(hi, 5);
433
         FX64_OR32(hi, (uint32_t)(vec[j][i] / 8.0F));
434
      }
435
   }
436
   ((Fx64 *)cc)[1] = hi;
437

438
   lohi = lolo = 0;
439
   /* right microtile */
440
   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
441
      lohi <<= 2;
442
      lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
443
   }
444
   /* left microtile */
445
   for (; k >= 0; k--) {
446
      lolo <<= 2;
447
      lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
448
   }
449
   cc[1] = lohi;
450
   cc[0] = lolo;
451
}
452

453

454
static void
455
fxt1_quantize_ALPHA0 (uint32_t *cc,
456
                      uint8_t input[N_TEXELS][MAX_COMP],
457
                      uint8_t reord[N_TEXELS][MAX_COMP], int32_t n)
458
{
459
   const int32_t n_vect = 3; /* 3 base vectors to find */
460
   const int32_t n_comp = 4; /* 4 components: R, G, B, A */
461
   float vec[MAX_VECT][MAX_COMP];
462
   int32_t i, j, k;
463
   Fx64 hi; /* high quadword */
464
   uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
465

466
   /* the last vector indicates zero */
467
   for (i = 0; i < n_comp; i++) {
468
      vec[n_vect][i] = 0;
469
   }
470

471
   /* the first n texels in reord are guaranteed to be non-zero */
472
   if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
473
      fxt1_lloyd(vec, n_vect, reord, n_comp, n);
474
   }
475

476
   FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
477
   for (j = n_vect - 1; j >= 0; j--) {
478
      /* add in alphas */
479
      FX64_SHL(hi, 5);
480
      FX64_OR32(hi, (uint32_t)(vec[j][ACOMP] / 8.0F));
481
   }
482
   for (j = n_vect - 1; j >= 0; j--) {
483
      for (i = 0; i < n_comp - 1; i++) {
484
         /* add in colors */
485
         FX64_SHL(hi, 5);
486
         FX64_OR32(hi, (uint32_t)(vec[j][i] / 8.0F));
487
      }
488
   }
489
   ((Fx64 *)cc)[1] = hi;
490

491
   lohi = lolo = 0;
492
   /* right microtile */
493
   for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
494
      lohi <<= 2;
495
      lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
496
   }
497
   /* left microtile */
498
   for (; k >= 0; k--) {
499
      lolo <<= 2;
500
      lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
501
   }
502
   cc[1] = lohi;
503
   cc[0] = lolo;
504
}
505

506

507
static void
508
fxt1_quantize_ALPHA1 (uint32_t *cc,
509
                      uint8_t input[N_TEXELS][MAX_COMP])
510
{
511
   const int32_t n_vect = 3; /* highest vector number in each microtile */
512
   const int32_t n_comp = 4; /* 4 components: R, G, B, A */
513
   float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
514
   float b, iv[MAX_COMP]; /* interpolation vector */
515
   int32_t i, j, k;
516
   Fx64 hi; /* high quadword */
517
   uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
518

519
   int32_t minSum;
520
   int32_t maxSum;
521
   int32_t minColL = 0, maxColL = 0;
522
   int32_t minColR = 0, maxColR = 0;
523
   int32_t sumL = 0, sumR = 0;
524
   int32_t nn_comp;
525
   /* Our solution here is to find the darkest and brightest colors in
526
    * the 4x4 tile and use those as the two representative colors.
527
    * There are probably better algorithms to use (histogram-based).
528
    */
529
   nn_comp = n_comp;
530
   while ((minColL == maxColL) && nn_comp) {
531
       minSum = 2000; /* big enough */
532
       maxSum = -1; /* small enough */
533
       for (k = 0; k < N_TEXELS / 2; k++) {
534
           int32_t sum = 0;
535
           for (i = 0; i < nn_comp; i++) {
536
               sum += input[k][i];
537
           }
538
           if (minSum > sum) {
539
               minSum = sum;
540
               minColL = k;
541
           }
542
           if (maxSum < sum) {
543
               maxSum = sum;
544
               maxColL = k;
545
           }
546
           sumL += sum;
547
       }
548

549
       nn_comp--;
550
   }
551

552
   nn_comp = n_comp;
553
   while ((minColR == maxColR) && nn_comp) {
554
       minSum = 2000; /* big enough */
555
       maxSum = -1; /* small enough */
556
       for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
557
           int32_t sum = 0;
558
           for (i = 0; i < nn_comp; i++) {
559
               sum += input[k][i];
560
           }
561
           if (minSum > sum) {
562
               minSum = sum;
563
               minColR = k;
564
           }
565
           if (maxSum < sum) {
566
               maxSum = sum;
567
               maxColR = k;
568
           }
569
           sumR += sum;
570
       }
571

572
       nn_comp--;
573
   }
574

575
   /* choose the common vector (yuck!) */
576
   {
577
      int32_t j1, j2;
578
      int32_t v1 = 0, v2 = 0;
579
      float err = 1e9; /* big enough */
580
      float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
581
      for (i = 0; i < n_comp; i++) {
582
         tv[0][i] = input[minColL][i];
583
         tv[1][i] = input[maxColL][i];
584
         tv[2][i] = input[minColR][i];
585
         tv[3][i] = input[maxColR][i];
586
      }
587
      for (j1 = 0; j1 < 2; j1++) {
588
         for (j2 = 2; j2 < 4; j2++) {
589
            float e = 0.0F;
590
            for (i = 0; i < n_comp; i++) {
591
               e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
592
            }
593
            if (e < err) {
594
               err = e;
595
               v1 = j1;
596
               v2 = j2;
597
            }
598
         }
599
      }
600
      for (i = 0; i < n_comp; i++) {
601
         vec[0][i] = tv[1 - v1][i];
602
         vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
603
         vec[2][i] = tv[5 - v2][i];
604
      }
605
   }
606

607
   /* left microtile */
608
   cc[0] = 0;
609
   if (minColL != maxColL) {
610
      /* compute interpolation vector */
611
      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
612

613
      /* add in texels */
614
      lolo = 0;
615
      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
616
         int32_t texel;
617
         /* interpolate color */
618
         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
619
         /* add in texel */
620
         lolo <<= 2;
621
         lolo |= texel;
622
      }
623

624
      cc[0] = lolo;
625
   }
626

627
   /* right microtile */
628
   cc[1] = 0;
629
   if (minColR != maxColR) {
630
      /* compute interpolation vector */
631
      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
632

633
      /* add in texels */
634
      lohi = 0;
635
      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
636
         int32_t texel;
637
         /* interpolate color */
638
         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
639
         /* add in texel */
640
         lohi <<= 2;
641
         lohi |= texel;
642
      }
643

644
      cc[1] = lohi;
645
   }
646

647
   FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
648
   for (j = n_vect - 1; j >= 0; j--) {
649
      /* add in alphas */
650
      FX64_SHL(hi, 5);
651
      FX64_OR32(hi, (uint32_t)(vec[j][ACOMP] / 8.0F));
652
   }
653
   for (j = n_vect - 1; j >= 0; j--) {
654
      for (i = 0; i < n_comp - 1; i++) {
655
         /* add in colors */
656
         FX64_SHL(hi, 5);
657
         FX64_OR32(hi, (uint32_t)(vec[j][i] / 8.0F));
658
      }
659
   }
660
   ((Fx64 *)cc)[1] = hi;
661
}
662

663

664
static void
665
fxt1_quantize_HI (uint32_t *cc,
666
                  uint8_t input[N_TEXELS][MAX_COMP],
667
                  uint8_t reord[N_TEXELS][MAX_COMP], int32_t n)
668
{
669
   const int32_t n_vect = 6; /* highest vector number */
670
   const int32_t n_comp = 3; /* 3 components: R, G, B */
671
   float b = 0.0F;       /* phoudoin: silent compiler! */
672
   float iv[MAX_COMP];   /* interpolation vector */
673
   int32_t i, k;
674
   uint32_t hihi; /* high quadword: hi dword */
675

676
   int32_t minSum = 2000; /* big enough */
677
   int32_t maxSum = -1; /* small enough */
678
   int32_t minCol = 0; /* phoudoin: silent compiler! */
679
   int32_t maxCol = 0; /* phoudoin: silent compiler! */
680

681
   /* Our solution here is to find the darkest and brightest colors in
682
    * the 8x4 tile and use those as the two representative colors.
683
    * There are probably better algorithms to use (histogram-based).
684
    */
685
   for (k = 0; k < n; k++) {
686
      int32_t sum = 0;
687
      for (i = 0; i < n_comp; i++) {
688
         sum += reord[k][i];
689
      }
690
      if (minSum > sum) {
691
         minSum = sum;
692
         minCol = k;
693
      }
694
      if (maxSum < sum) {
695
         maxSum = sum;
696
         maxCol = k;
697
      }
698
   }
699

700
   hihi = 0; /* cc-hi = "00" */
701
   for (i = 0; i < n_comp; i++) {
702
      /* add in colors */
703
      hihi <<= 5;
704
      hihi |= reord[maxCol][i] >> 3;
705
   }
706
   for (i = 0; i < n_comp; i++) {
707
      /* add in colors */
708
      hihi <<= 5;
709
      hihi |= reord[minCol][i] >> 3;
710
   }
711
   cc[3] = hihi;
712
   cc[0] = cc[1] = cc[2] = 0;
713

714
   /* compute interpolation vector */
715
   if (minCol != maxCol) {
716
      MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
717
   }
718

719
   /* add in texels */
720
   for (k = N_TEXELS - 1; k >= 0; k--) {
721
      int32_t t = k * 3;
722
      uint32_t *kk = (uint32_t *)((char *)cc + t / 8);
723
      int32_t texel = n_vect + 1; /* transparent black */
724

725
      if (!ISTBLACK(input[k])) {
726
         if (minCol != maxCol) {
727
            /* interpolate color */
728
            CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
729
            /* add in texel */
730
            kk[0] |= texel << (t & 7);
731
         }
732
      } else {
733
         /* add in texel */
734
         kk[0] |= texel << (t & 7);
735
      }
736
   }
737
}
738

739

740
static void
741
fxt1_quantize_MIXED1 (uint32_t *cc,
742
                      uint8_t input[N_TEXELS][MAX_COMP])
743
{
744
   const int32_t n_vect = 2; /* highest vector number in each microtile */
745
   const int32_t n_comp = 3; /* 3 components: R, G, B */
746
   uint8_t vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
747
   float b, iv[MAX_COMP]; /* interpolation vector */
748
   int32_t i, j, k;
749
   Fx64 hi; /* high quadword */
750
   uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
751

752
   int32_t minSum;
753
   int32_t maxSum;
754
   int32_t minColL = 0, maxColL = -1;
755
   int32_t minColR = 0, maxColR = -1;
756

757
   /* Our solution here is to find the darkest and brightest colors in
758
    * the 4x4 tile and use those as the two representative colors.
759
    * There are probably better algorithms to use (histogram-based).
760
    */
761
   minSum = 2000; /* big enough */
762
   maxSum = -1; /* small enough */
763
   for (k = 0; k < N_TEXELS / 2; k++) {
764
      if (!ISTBLACK(input[k])) {
765
         int32_t sum = 0;
766
         for (i = 0; i < n_comp; i++) {
767
            sum += input[k][i];
768
         }
769
         if (minSum > sum) {
770
            minSum = sum;
771
            minColL = k;
772
         }
773
         if (maxSum < sum) {
774
            maxSum = sum;
775
            maxColL = k;
776
         }
777
      }
778
   }
779
   minSum = 2000; /* big enough */
780
   maxSum = -1; /* small enough */
781
   for (; k < N_TEXELS; k++) {
782
      if (!ISTBLACK(input[k])) {
783
         int32_t sum = 0;
784
         for (i = 0; i < n_comp; i++) {
785
            sum += input[k][i];
786
         }
787
         if (minSum > sum) {
788
            minSum = sum;
789
            minColR = k;
790
         }
791
         if (maxSum < sum) {
792
            maxSum = sum;
793
            maxColR = k;
794
         }
795
      }
796
   }
797

798
   /* left microtile */
799
   if (maxColL == -1) {
800
      /* all transparent black */
801
      cc[0] = ~0u;
802
      for (i = 0; i < n_comp; i++) {
803
         vec[0][i] = 0;
804
         vec[1][i] = 0;
805
      }
806
   } else {
807
      cc[0] = 0;
808
      for (i = 0; i < n_comp; i++) {
809
         vec[0][i] = input[minColL][i];
810
         vec[1][i] = input[maxColL][i];
811
      }
812
      if (minColL != maxColL) {
813
         /* compute interpolation vector */
814
         MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
815

816
         /* add in texels */
817
         lolo = 0;
818
         for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
819
            int32_t texel = n_vect + 1; /* transparent black */
820
            if (!ISTBLACK(input[k])) {
821
               /* interpolate color */
822
               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
823
            }
824
            /* add in texel */
825
            lolo <<= 2;
826
            lolo |= texel;
827
         }
828
         cc[0] = lolo;
829
      }
830
   }
831

832
   /* right microtile */
833
   if (maxColR == -1) {
834
      /* all transparent black */
835
      cc[1] = ~0u;
836
      for (i = 0; i < n_comp; i++) {
837
         vec[2][i] = 0;
838
         vec[3][i] = 0;
839
      }
840
   } else {
841
      cc[1] = 0;
842
      for (i = 0; i < n_comp; i++) {
843
         vec[2][i] = input[minColR][i];
844
         vec[3][i] = input[maxColR][i];
845
      }
846
      if (minColR != maxColR) {
847
         /* compute interpolation vector */
848
         MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
849

850
         /* add in texels */
851
         lohi = 0;
852
         for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
853
            int32_t texel = n_vect + 1; /* transparent black */
854
            if (!ISTBLACK(input[k])) {
855
               /* interpolate color */
856
               CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
857
            }
858
            /* add in texel */
859
            lohi <<= 2;
860
            lohi |= texel;
861
         }
862
         cc[1] = lohi;
863
      }
864
   }
865

866
   FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
867
   for (j = 2 * 2 - 1; j >= 0; j--) {
868
      for (i = 0; i < n_comp; i++) {
869
         /* add in colors */
870
         FX64_SHL(hi, 5);
871
         FX64_OR32(hi, vec[j][i] >> 3);
872
      }
873
   }
874
   ((Fx64 *)cc)[1] = hi;
875
}
876

877

878
static void
879
fxt1_quantize_MIXED0 (uint32_t *cc,
880
                      uint8_t input[N_TEXELS][MAX_COMP])
881
{
882
   const int32_t n_vect = 3; /* highest vector number in each microtile */
883
   const int32_t n_comp = 3; /* 3 components: R, G, B */
884
   uint8_t vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
885
   float b, iv[MAX_COMP]; /* interpolation vector */
886
   int32_t i, j, k;
887
   Fx64 hi; /* high quadword */
888
   uint32_t lohi, lolo; /* low quadword: hi dword, lo dword */
889

890
   int32_t minColL = 0, maxColL = 0;
891
   int32_t minColR = 0, maxColR = 0;
892
#if 0
893
   int32_t minSum;
894
   int32_t maxSum;
895

896
   /* Our solution here is to find the darkest and brightest colors in
897
    * the 4x4 tile and use those as the two representative colors.
898
    * There are probably better algorithms to use (histogram-based).
899
    */
900
   minSum = 2000; /* big enough */
901
   maxSum = -1; /* small enough */
902
   for (k = 0; k < N_TEXELS / 2; k++) {
903
      int32_t sum = 0;
904
      for (i = 0; i < n_comp; i++) {
905
         sum += input[k][i];
906
      }
907
      if (minSum > sum) {
908
         minSum = sum;
909
         minColL = k;
910
      }
911
      if (maxSum < sum) {
912
         maxSum = sum;
913
         maxColL = k;
914
      }
915
   }
916
   minSum = 2000; /* big enough */
917
   maxSum = -1; /* small enough */
918
   for (; k < N_TEXELS; k++) {
919
      int32_t sum = 0;
920
      for (i = 0; i < n_comp; i++) {
921
         sum += input[k][i];
922
      }
923
      if (minSum > sum) {
924
         minSum = sum;
925
         minColR = k;
926
      }
927
      if (maxSum < sum) {
928
         maxSum = sum;
929
         maxColR = k;
930
      }
931
   }
932
#else
933
   int32_t minVal;
934
   int32_t maxVal;
935
   int32_t maxVarL = fxt1_variance(input, n_comp);
936
   int32_t maxVarR = fxt1_variance(&input[N_TEXELS / 2], n_comp);
937

938
   /* Scan the channel with max variance for lo & hi
939
    * and use those as the two representative colors.
940
    */
941
   minVal = 2000; /* big enough */
942
   maxVal = -1; /* small enough */
943
   for (k = 0; k < N_TEXELS / 2; k++) {
944
      int32_t t = input[k][maxVarL];
945
      if (minVal > t) {
946
         minVal = t;
947
         minColL = k;
948
      }
949
      if (maxVal < t) {
950
         maxVal = t;
951
         maxColL = k;
952
      }
953
   }
954
   minVal = 2000; /* big enough */
955
   maxVal = -1; /* small enough */
956
   for (; k < N_TEXELS; k++) {
957
      int32_t t = input[k][maxVarR];
958
      if (minVal > t) {
959
         minVal = t;
960
         minColR = k;
961
      }
962
      if (maxVal < t) {
963
         maxVal = t;
964
         maxColR = k;
965
      }
966
   }
967
#endif
968

969
   /* left microtile */
970
   cc[0] = 0;
971
   for (i = 0; i < n_comp; i++) {
972
      vec[0][i] = input[minColL][i];
973
      vec[1][i] = input[maxColL][i];
974
   }
975
   if (minColL != maxColL) {
976
      /* compute interpolation vector */
977
      MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
978

979
      /* add in texels */
980
      lolo = 0;
981
      for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
982
         int32_t texel;
983
         /* interpolate color */
984
         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
985
         /* add in texel */
986
         lolo <<= 2;
987
         lolo |= texel;
988
      }
989

990
      /* funky encoding for LSB of green */
991
      if ((int32_t)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
992
         for (i = 0; i < n_comp; i++) {
993
            vec[1][i] = input[minColL][i];
994
            vec[0][i] = input[maxColL][i];
995
         }
996
         lolo = ~lolo;
997
      }
998

999
      cc[0] = lolo;
1000
   }
1001

1002
   /* right microtile */
1003
   cc[1] = 0;
1004
   for (i = 0; i < n_comp; i++) {
1005
      vec[2][i] = input[minColR][i];
1006
      vec[3][i] = input[maxColR][i];
1007
   }
1008
   if (minColR != maxColR) {
1009
      /* compute interpolation vector */
1010
      MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1011

1012
      /* add in texels */
1013
      lohi = 0;
1014
      for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1015
         int32_t texel;
1016
         /* interpolate color */
1017
         CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1018
         /* add in texel */
1019
         lohi <<= 2;
1020
         lohi |= texel;
1021
      }
1022

1023
      /* funky encoding for LSB of green */
1024
      if ((int32_t)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1025
         for (i = 0; i < n_comp; i++) {
1026
            vec[3][i] = input[minColR][i];
1027
            vec[2][i] = input[maxColR][i];
1028
         }
1029
         lohi = ~lohi;
1030
      }
1031

1032
      cc[1] = lohi;
1033
   }
1034

1035
   FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1036
   for (j = 2 * 2 - 1; j >= 0; j--) {
1037
      for (i = 0; i < n_comp; i++) {
1038
         /* add in colors */
1039
         FX64_SHL(hi, 5);
1040
         FX64_OR32(hi, vec[j][i] >> 3);
1041
      }
1042
   }
1043
   ((Fx64 *)cc)[1] = hi;
1044
}
1045

1046

1047
static void
1048
fxt1_quantize (uint32_t *cc, const uint8_t *lines[], int32_t comps)
1049
{
1050
   int32_t trualpha;
1051
   uint8_t reord[N_TEXELS][MAX_COMP];
1052

1053
   uint8_t input[N_TEXELS][MAX_COMP];
1054
   int32_t i, k, l;
1055

1056
   if (comps == 3) {
1057
      /* make the whole block opaque */
1058
      memset(input, -1, sizeof(input));
1059
   }
1060

1061
   /* 8 texels each line */
1062
   for (l = 0; l < 4; l++) {
1063
      for (k = 0; k < 4; k++) {
1064
         for (i = 0; i < comps; i++) {
1065
            input[k + l * 4][i] = *lines[l]++;
1066
         }
1067
      }
1068
      for (; k < 8; k++) {
1069
         for (i = 0; i < comps; i++) {
1070
            input[k + l * 4 + 12][i] = *lines[l]++;
1071
         }
1072
      }
1073
   }
1074

1075
   /* block layout:
1076
    * 00, 01, 02, 03, 08, 09, 0a, 0b
1077
    * 10, 11, 12, 13, 18, 19, 1a, 1b
1078
    * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1079
    * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1080
    */
1081

1082
   /* [dBorca]
1083
    * stupidity flows forth from this
1084
    */
1085
   l = N_TEXELS;
1086
   trualpha = 0;
1087
   if (comps == 4) {
1088
      /* skip all transparent black texels */
1089
      l = 0;
1090
      for (k = 0; k < N_TEXELS; k++) {
1091
         /* test all components against 0 */
1092
         if (!ISTBLACK(input[k])) {
1093
            /* texel is not transparent black */
1094
            memcpy(reord[l], input[k], 4);
1095
            if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1096
               /* non-opaque texel */
1097
               trualpha = !0;
1098
            }
1099
            l++;
1100
         }
1101
      }
1102
   }
1103

1104
#if 0
1105
   if (trualpha) {
1106
      fxt1_quantize_ALPHA0(cc, input, reord, l);
1107
   } else if (l == 0) {
1108
      cc[0] = cc[1] = cc[2] = -1;
1109
      cc[3] = 0;
1110
   } else if (l < N_TEXELS) {
1111
      fxt1_quantize_HI(cc, input, reord, l);
1112
   } else {
1113
      fxt1_quantize_CHROMA(cc, input);
1114
   }
1115
   (void)fxt1_quantize_ALPHA1;
1116
   (void)fxt1_quantize_MIXED1;
1117
   (void)fxt1_quantize_MIXED0;
1118
#else
1119
   if (trualpha) {
1120
      fxt1_quantize_ALPHA1(cc, input);
1121
   } else if (l == 0) {
1122
      cc[0] = cc[1] = cc[2] = ~0u;
1123
      cc[3] = 0;
1124
   } else if (l < N_TEXELS) {
1125
      fxt1_quantize_MIXED1(cc, input);
1126
   } else {
1127
      fxt1_quantize_MIXED0(cc, input);
1128
   }
1129
   (void)fxt1_quantize_ALPHA0;
1130
   (void)fxt1_quantize_HI;
1131
   (void)fxt1_quantize_CHROMA;
1132
#endif
1133
}
1134

1135

1136

1137
/**
1138
 * Upscale an image by replication, not (typical) stretching.
1139
 * We use this when the image width or height is less than a
1140
 * certain size (4, 8) and we need to upscale an image.
1141
 */
1142
static void
1143
upscale_teximage2d(int32_t inWidth, int32_t inHeight,
1144
                   int32_t outWidth, int32_t outHeight,
1145
                   int32_t comps, const uint8_t *src, int32_t srcRowStride,
1146
                   uint8_t *dest )
1147
{
1148
   int32_t i, j, k;
1149

1150
   assert(outWidth >= inWidth);
1151
   assert(outHeight >= inHeight);
1152
#if 0
1153
   assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1154
   assert((outWidth & 3) == 0);
1155
   assert((outHeight & 3) == 0);
1156
#endif
1157

1158
   for (i = 0; i < outHeight; i++) {
1159
      const int32_t ii = i % inHeight;
1160
      for (j = 0; j < outWidth; j++) {
1161
         const int32_t jj = j % inWidth;
1162
         for (k = 0; k < comps; k++) {
1163
            dest[(i * outWidth + j) * comps + k]
1164
               = src[ii * srcRowStride + jj * comps + k];
1165
         }
1166
      }
1167
   }
1168
}
1169

1170

1171
static void
1172
fxt1_encode (uint32_t width, uint32_t height, int32_t comps,
1173
             const void *source, int32_t srcRowStride,
1174
             void *dest, int32_t destRowStride)
1175
{
1176
   uint32_t x, y;
1177
   const uint8_t *data;
1178
   uint32_t *encoded = (uint32_t *)dest;
1179
   void *newSource = NULL;
1180

1181
   assert(comps == 3 || comps == 4);
1182

1183
   /* Replicate image if width is not M8 or height is not M4 */
1184
   if ((width & 7) | (height & 3)) {
1185
      int32_t newWidth = (width + 7) & ~7;
1186
      int32_t newHeight = (height + 3) & ~3;
1187
      newSource = malloc(comps * newWidth * newHeight * sizeof(uint8_t));
1188
      if (!newSource)
1189
         return;
1190
      upscale_teximage2d(width, height, newWidth, newHeight,
1191
                         comps, (const uint8_t *) source,
1192
                         srcRowStride, (uint8_t *) newSource);
1193
      source = newSource;
1194
      width = newWidth;
1195
      height = newHeight;
1196
      srcRowStride = comps * newWidth;
1197
   }
1198

1199
   data = (const uint8_t *) source;
1200
   destRowStride = (destRowStride - width * 2) / 4;
1201
   for (y = 0; y < height; y += 4) {
1202
      uint32_t offs = 0 + (y + 0) * srcRowStride;
1203
      for (x = 0; x < width; x += 8) {
1204
         const uint8_t *lines[4];
1205
         lines[0] = &data[offs];
1206
         lines[1] = lines[0] + srcRowStride;
1207
         lines[2] = lines[1] + srcRowStride;
1208
         lines[3] = lines[2] + srcRowStride;
1209
         offs += 8 * comps;
1210
         fxt1_quantize(encoded, lines, comps);
1211
         /* 128 bits per 8x4 block */
1212
         encoded += 4;
1213
      }
1214
      encoded += destRowStride;
1215
   }
1216

1217
   free(newSource);
1218
}
1219

1220

1221
/***************************************************************************\
1222
 * FXT1 decoder
1223
 *
1224
 * The decoder is based on GL_3DFX_texture_compression_FXT1
1225
 * specification and serves as a concept for the encoder.
1226
\***************************************************************************/
1227

1228

1229
/* lookup table for scaling 5 bit colors up to 8 bits */
1230
static const uint8_t _rgb_scale_5[] = {
1231
   0,   8,   16,  25,  33,  41,  49,  58,
1232
   66,  74,  82,  90,  99,  107, 115, 123,
1233
   132, 140, 148, 156, 165, 173, 181, 189,
1234
   197, 206, 214, 222, 230, 239, 247, 255
1235
};
1236

1237
/* lookup table for scaling 6 bit colors up to 8 bits */
1238
static const uint8_t _rgb_scale_6[] = {
1239
   0,   4,   8,   12,  16,  20,  24,  28,
1240
   32,  36,  40,  45,  49,  53,  57,  61,
1241
   65,  69,  73,  77,  81,  85,  89,  93,
1242
   97,  101, 105, 109, 113, 117, 121, 125,
1243
   130, 134, 138, 142, 146, 150, 154, 158,
1244
   162, 166, 170, 174, 178, 182, 186, 190,
1245
   194, 198, 202, 206, 210, 215, 219, 223,
1246
   227, 231, 235, 239, 243, 247, 251, 255
1247
};
1248

1249

1250
#define CC_SEL(cc, which) (((uint32_t *)(cc))[(which) / 32] >> ((which) & 31))
1251
#define UP5(c) _rgb_scale_5[(c) & 31]
1252
#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1253
#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1254

1255

1256
static void
1257
fxt1_decode_1HI (const uint8_t *code, int32_t t, uint8_t *rgba)
1258
{
1259
   const uint32_t *cc;
1260

1261
   t *= 3;
1262
   cc = (const uint32_t *)(code + t / 8);
1263
   t = (cc[0] >> (t & 7)) & 7;
1264

1265
   if (t == 7) {
1266
      rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1267
   } else {
1268
      uint8_t r, g, b;
1269
      cc = (const uint32_t *)(code + 12);
1270
      if (t == 0) {
1271
         b = UP5(CC_SEL(cc, 0));
1272
         g = UP5(CC_SEL(cc, 5));
1273
         r = UP5(CC_SEL(cc, 10));
1274
      } else if (t == 6) {
1275
         b = UP5(CC_SEL(cc, 15));
1276
         g = UP5(CC_SEL(cc, 20));
1277
         r = UP5(CC_SEL(cc, 25));
1278
      } else {
1279
         b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1280
         g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1281
         r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1282
      }
1283
      rgba[RCOMP] = r;
1284
      rgba[GCOMP] = g;
1285
      rgba[BCOMP] = b;
1286
      rgba[ACOMP] = 255;
1287
   }
1288
}
1289

1290

1291
static void
1292
fxt1_decode_1CHROMA (const uint8_t *code, int32_t t, uint8_t *rgba)
1293
{
1294
   const uint32_t *cc;
1295
   uint32_t kk;
1296

1297
   cc = (const uint32_t *)code;
1298
   if (t & 16) {
1299
      cc++;
1300
      t &= 15;
1301
   }
1302
   t = (cc[0] >> (t * 2)) & 3;
1303

1304
   t *= 15;
1305
   cc = (const uint32_t *)(code + 8 + t / 8);
1306
   kk = cc[0] >> (t & 7);
1307
   rgba[BCOMP] = UP5(kk);
1308
   rgba[GCOMP] = UP5(kk >> 5);
1309
   rgba[RCOMP] = UP5(kk >> 10);
1310
   rgba[ACOMP] = 255;
1311
}
1312

1313

1314
static void
1315
fxt1_decode_1MIXED (const uint8_t *code, int32_t t, uint8_t *rgba)
1316
{
1317
   const uint32_t *cc;
1318
   uint32_t col[2][3];
1319
   int32_t glsb, selb;
1320

1321
   cc = (const uint32_t *)code;
1322
   if (t & 16) {
1323
      t &= 15;
1324
      t = (cc[1] >> (t * 2)) & 3;
1325
      /* col 2 */
1326
      col[0][BCOMP] = (*(const uint32_t *)(code + 11)) >> 6;
1327
      col[0][GCOMP] = CC_SEL(cc, 99);
1328
      col[0][RCOMP] = CC_SEL(cc, 104);
1329
      /* col 3 */
1330
      col[1][BCOMP] = CC_SEL(cc, 109);
1331
      col[1][GCOMP] = CC_SEL(cc, 114);
1332
      col[1][RCOMP] = CC_SEL(cc, 119);
1333
      glsb = CC_SEL(cc, 126);
1334
      selb = CC_SEL(cc, 33);
1335
   } else {
1336
      t = (cc[0] >> (t * 2)) & 3;
1337
      /* col 0 */
1338
      col[0][BCOMP] = CC_SEL(cc, 64);
1339
      col[0][GCOMP] = CC_SEL(cc, 69);
1340
      col[0][RCOMP] = CC_SEL(cc, 74);
1341
      /* col 1 */
1342
      col[1][BCOMP] = CC_SEL(cc, 79);
1343
      col[1][GCOMP] = CC_SEL(cc, 84);
1344
      col[1][RCOMP] = CC_SEL(cc, 89);
1345
      glsb = CC_SEL(cc, 125);
1346
      selb = CC_SEL(cc, 1);
1347
   }
1348

1349
   if (CC_SEL(cc, 124) & 1) {
1350
      /* alpha[0] == 1 */
1351

1352
      if (t == 3) {
1353
         /* zero */
1354
         rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1355
      } else {
1356
         uint8_t r, g, b;
1357
         if (t == 0) {
1358
            b = UP5(col[0][BCOMP]);
1359
            g = UP5(col[0][GCOMP]);
1360
            r = UP5(col[0][RCOMP]);
1361
         } else if (t == 2) {
1362
            b = UP5(col[1][BCOMP]);
1363
            g = UP6(col[1][GCOMP], glsb);
1364
            r = UP5(col[1][RCOMP]);
1365
         } else {
1366
            b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1367
            g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1368
            r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1369
         }
1370
         rgba[RCOMP] = r;
1371
         rgba[GCOMP] = g;
1372
         rgba[BCOMP] = b;
1373
         rgba[ACOMP] = 255;
1374
      }
1375
   } else {
1376
      /* alpha[0] == 0 */
1377
      uint8_t r, g, b;
1378
      if (t == 0) {
1379
         b = UP5(col[0][BCOMP]);
1380
         g = UP6(col[0][GCOMP], glsb ^ selb);
1381
         r = UP5(col[0][RCOMP]);
1382
      } else if (t == 3) {
1383
         b = UP5(col[1][BCOMP]);
1384
         g = UP6(col[1][GCOMP], glsb);
1385
         r = UP5(col[1][RCOMP]);
1386
      } else {
1387
         b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1388
         g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1389
                        UP6(col[1][GCOMP], glsb));
1390
         r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1391
      }
1392
      rgba[RCOMP] = r;
1393
      rgba[GCOMP] = g;
1394
      rgba[BCOMP] = b;
1395
      rgba[ACOMP] = 255;
1396
   }
1397
}
1398

1399

1400
static void
1401
fxt1_decode_1ALPHA (const uint8_t *code, int32_t t, uint8_t *rgba)
1402
{
1403
   const uint32_t *cc;
1404
   uint8_t r, g, b, a;
1405

1406
   cc = (const uint32_t *)code;
1407
   if (CC_SEL(cc, 124) & 1) {
1408
      /* lerp == 1 */
1409
      uint32_t col0[4];
1410

1411
      if (t & 16) {
1412
         t &= 15;
1413
         t = (cc[1] >> (t * 2)) & 3;
1414
         /* col 2 */
1415
         col0[BCOMP] = (*(const uint32_t *)(code + 11)) >> 6;
1416
         col0[GCOMP] = CC_SEL(cc, 99);
1417
         col0[RCOMP] = CC_SEL(cc, 104);
1418
         col0[ACOMP] = CC_SEL(cc, 119);
1419
      } else {
1420
         t = (cc[0] >> (t * 2)) & 3;
1421
         /* col 0 */
1422
         col0[BCOMP] = CC_SEL(cc, 64);
1423
         col0[GCOMP] = CC_SEL(cc, 69);
1424
         col0[RCOMP] = CC_SEL(cc, 74);
1425
         col0[ACOMP] = CC_SEL(cc, 109);
1426
      }
1427

1428
      if (t == 0) {
1429
         b = UP5(col0[BCOMP]);
1430
         g = UP5(col0[GCOMP]);
1431
         r = UP5(col0[RCOMP]);
1432
         a = UP5(col0[ACOMP]);
1433
      } else if (t == 3) {
1434
         b = UP5(CC_SEL(cc, 79));
1435
         g = UP5(CC_SEL(cc, 84));
1436
         r = UP5(CC_SEL(cc, 89));
1437
         a = UP5(CC_SEL(cc, 114));
1438
      } else {
1439
         b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1440
         g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1441
         r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1442
         a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1443
      }
1444
   } else {
1445
      /* lerp == 0 */
1446

1447
      if (t & 16) {
1448
         cc++;
1449
         t &= 15;
1450
      }
1451
      t = (cc[0] >> (t * 2)) & 3;
1452

1453
      if (t == 3) {
1454
         /* zero */
1455
         r = g = b = a = 0;
1456
      } else {
1457
         uint32_t kk;
1458
         cc = (const uint32_t *)code;
1459
         a = UP5(cc[3] >> (t * 5 + 13));
1460
         t *= 15;
1461
         cc = (const uint32_t *)(code + 8 + t / 8);
1462
         kk = cc[0] >> (t & 7);
1463
         b = UP5(kk);
1464
         g = UP5(kk >> 5);
1465
         r = UP5(kk >> 10);
1466
      }
1467
   }
1468
   rgba[RCOMP] = r;
1469
   rgba[GCOMP] = g;
1470
   rgba[BCOMP] = b;
1471
   rgba[ACOMP] = a;
1472
}
1473

1474

1475
static void
1476
fxt1_decode_1 (const void *texture, int32_t stride, /* in pixels */
1477
               int32_t i, int32_t j, uint8_t *rgba)
1478
{
1479
   static void (*decode_1[]) (const uint8_t *, int32_t, uint8_t *) = {
1480
      fxt1_decode_1HI,     /* cc-high   = "00?" */
1481
      fxt1_decode_1HI,     /* cc-high   = "00?" */
1482
      fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1483
      fxt1_decode_1ALPHA,  /* alpha     = "011" */
1484
      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1485
      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1486
      fxt1_decode_1MIXED,  /* mixed     = "1??" */
1487
      fxt1_decode_1MIXED   /* mixed     = "1??" */
1488
   };
1489

1490
   const uint8_t *code = (const uint8_t *)texture +
1491
                         ((j / 4) * (stride / 8) + (i / 8)) * 16;
1492
   int32_t mode = CC_SEL(code, 125);
1493
   int32_t t = i & 7;
1494

1495
   if (t & 4) {
1496
      t += 12;
1497
   }
1498
   t += (j & 3) * 4;
1499

1500
   decode_1[mode](code, t, rgba);
1501
}
1502

1503
/*
1504
 * Pixel fetch within a block.
1505
 */
1506

1507
void
1508
util_format_fxt1_rgb_fetch_rgba_8unorm(uint8_t *restrict dst, const uint8_t *restrict src, unsigned i, unsigned j)
1509
{
1510
   fxt1_decode_1(src, 0, i, j, dst);
1511
}
1512

1513
void
1514
util_format_fxt1_rgba_fetch_rgba_8unorm(uint8_t *restrict dst, const uint8_t *restrict src, unsigned i, unsigned j)
1515
{
1516
   fxt1_decode_1(src, 0, i, j, dst);
1517
   dst[3] = 0xff;
1518
}
1519

1520
void
1521
util_format_fxt1_rgb_fetch_rgba(void *restrict in_dst, const uint8_t *restrict src, unsigned i, unsigned j)
1522
{
1523
   float *dst = in_dst;
1524
   uint8_t tmp[4];
1525
   fxt1_decode_1(src, 0, i, j, tmp);
1526
   dst[0] = ubyte_to_float(tmp[0]);
1527
   dst[1] = ubyte_to_float(tmp[1]);
1528
   dst[2] = ubyte_to_float(tmp[2]);
1529
   dst[3] = 1.0;
1530
}
1531

1532
void
1533
util_format_fxt1_rgba_fetch_rgba(void *restrict in_dst, const uint8_t *restrict src, unsigned i, unsigned j)
1534
{
1535
   float *dst = in_dst;
1536
   uint8_t tmp[4];
1537
   fxt1_decode_1(src, 0, i, j, tmp);
1538
   dst[0] = ubyte_to_float(tmp[0]);
1539
   dst[1] = ubyte_to_float(tmp[1]);
1540
   dst[2] = ubyte_to_float(tmp[2]);
1541
   dst[3] = ubyte_to_float(tmp[3]);
1542
}
1543

1544
/*
1545
 * Block decompression.
1546
 */
1547

1548
static inline void
1549
util_format_fxtn_rgb_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1550
                                        const uint8_t *restrict src_row, unsigned src_stride,
1551
                                        unsigned width, unsigned height,
1552
                                        boolean rgba)
1553
{
1554
   const unsigned bw = 8, bh = 4, comps = 4;
1555
   unsigned x, y, i, j;
1556
   for (y = 0; y < height; y += bh) {
1557
      const uint8_t *src = src_row;
1558
      for (x = 0; x < width; x += bw) {
1559
         for (j = 0; j < bh; ++j) {
1560
            for (i = 0; i < bw; ++i) {
1561
               uint8_t *dst = dst_row + (y + j) * dst_stride / sizeof(*dst_row) + (x + i) * comps;
1562
               fxt1_decode_1(src, 0, i, j, dst);
1563
               if (!rgba)
1564
                  dst[3] = 0xff;
1565
            }
1566
         }
1567
         src += FXT1_BLOCK_SIZE;
1568
      }
1569
      src_row += src_stride;
1570
   }
1571
}
1572

1573
void
1574
util_format_fxt1_rgb_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1575
                                        const uint8_t *restrict src_row, unsigned src_stride,
1576
                                        unsigned width, unsigned height)
1577
{
1578
   util_format_fxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
1579
                                           src_row, src_stride,
1580
                                           width, height,
1581
                                           false);
1582
}
1583

1584
void
1585
util_format_fxt1_rgba_unpack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1586
                                         const uint8_t *restrict src_row, unsigned src_stride,
1587
                                         unsigned width, unsigned height)
1588
{
1589
   util_format_fxtn_rgb_unpack_rgba_8unorm(dst_row, dst_stride,
1590
                                           src_row, src_stride,
1591
                                           width, height,
1592
                                           true);
1593
}
1594

1595
static inline void
1596
util_format_fxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
1597
                                       const uint8_t *restrict src_row, unsigned src_stride,
1598
                                       unsigned width, unsigned height,
1599
                                       boolean rgba)
1600
{
1601
   const unsigned bw = 8, bh = 4, comps = 4;
1602
   unsigned x, y, i, j;
1603
   for (y = 0; y < height; y += 4) {
1604
      const uint8_t *src = src_row;
1605
      for (x = 0; x < width; x += 8) {
1606
         for (j = 0; j < bh; ++j) {
1607
            for (i = 0; i < bw; ++i) {
1608
               float *dst = dst_row + (y + j)*dst_stride/sizeof(*dst_row) + (x + i) * comps;
1609
               uint8_t tmp[4];
1610
               fxt1_decode_1(src, 0, i, j, tmp);
1611
               dst[0] = ubyte_to_float(tmp[0]);
1612
               dst[1] = ubyte_to_float(tmp[1]);
1613
               dst[2] = ubyte_to_float(tmp[2]);
1614
               if (rgba)
1615
                  dst[3] = ubyte_to_float(tmp[3]);
1616
               else
1617
                  dst[3] = 1.0;
1618
            }
1619
         }
1620
         src += FXT1_BLOCK_SIZE;
1621
      }
1622
      src_row += src_stride;
1623
   }
1624
}
1625

1626
void
1627
util_format_fxt1_rgb_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
1628
                                       const uint8_t *restrict src_row, unsigned src_stride,
1629
                                       unsigned width, unsigned height)
1630
{
1631
   util_format_fxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
1632
                                          src_row, src_stride,
1633
                                          width, height,
1634
                                          false);
1635
}
1636

1637
void
1638
util_format_fxt1_rgba_unpack_rgba_float(void *restrict dst_row, unsigned dst_stride,
1639
                                        const uint8_t *restrict src_row, unsigned src_stride,
1640
                                        unsigned width, unsigned height)
1641
{
1642
   util_format_fxtn_rgb_unpack_rgba_float(dst_row, dst_stride,
1643
                                          src_row, src_stride,
1644
                                          width, height,
1645
                                          true);
1646
}
1647

1648
/*
1649
 * Block compression.
1650
 */
1651

1652
void
1653
util_format_fxt1_rgb_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1654
                                      const uint8_t *restrict src, unsigned src_stride,
1655
                                      unsigned width, unsigned height)
1656
{
1657
   /* The encoder for FXT1_RGB wants 24bpp packed rgb, so make a temporary to do that.
1658
    */
1659
   int temp_stride = width * 3;
1660
   uint8_t *temp = malloc(height * temp_stride);
1661
   if (!temp)
1662
      return;
1663

1664
   for (int y = 0; y < height; y++) {
1665
      for (int x = 0; x < width; x++) {
1666
         temp[y * temp_stride + x * 3 + 0] = src[x * 4 + 0];
1667
         temp[y * temp_stride + x * 3 + 1] = src[x * 4 + 1];
1668
         temp[y * temp_stride + x * 3 + 2] = src[x * 4 + 2];
1669
      }
1670
      src += src_stride;
1671
   }
1672

1673
   fxt1_encode(width, height, 3, temp, temp_stride, dst_row, dst_stride);
1674

1675
   free(temp);
1676
}
1677

1678
void
1679
util_format_fxt1_rgba_pack_rgba_8unorm(uint8_t *restrict dst_row, unsigned dst_stride,
1680
                                       const uint8_t *restrict src, unsigned src_stride,
1681
                                       unsigned width, unsigned height)
1682
{
1683
   fxt1_encode(width, height, 4, src, src_stride, dst_row, dst_stride);
1684
}
1685

1686
void
1687
util_format_fxt1_rgb_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
1688
                                     const float *restrict src, unsigned src_stride,
1689
                                     unsigned width, unsigned height)
1690
{
1691
   int temp_stride = width * 4;
1692
   uint8_t *temp = malloc(height * temp_stride);
1693
   if (!temp)
1694
      return;
1695

1696
   util_format_r8g8b8a8_unorm_pack_rgba_float(temp, temp_stride,
1697
                                              src, src_stride,
1698
                                              width, height);
1699

1700
   util_format_fxt1_rgb_pack_rgba_8unorm(dst_row, dst_stride,
1701
                                         temp, temp_stride,
1702
                                         width, height);
1703

1704
   free(temp);
1705
}
1706

1707
void
1708
util_format_fxt1_rgba_pack_rgba_float(uint8_t *restrict dst_row, unsigned dst_stride,
1709
                                      const float *restrict src, unsigned src_stride,
1710
                                      unsigned width, unsigned height)
1711
{
1712
   int temp_stride = width * 4;
1713
   uint8_t *temp = malloc(height * temp_stride);
1714
   if (!temp)
1715
      return;
1716

1717
   util_format_r8g8b8a8_unorm_pack_rgba_float(temp, temp_stride,
1718
                                              src, src_stride,
1719
                                              width, height);
1720

1721
   util_format_fxt1_rgba_pack_rgba_8unorm(dst_row, dst_stride,
1722
                                          temp, temp_stride,
1723
                                          width, height);
1724

1725
   free(temp);
1726
}
1727

1728
Product

Resources

Company