Path: blob/master/libmupen64plus/mupen64plus-video-glide64mk2/src/GlideHQ/tc-1.1+/fxt1.c
2 views
/*1* FXT1 codec2* Version: 1.13*4* Copyright (C) 2004 Daniel Borca All Rights Reserved.5*6* Permission is hereby granted, free of charge, to any person obtaining a7* copy of this software and associated documentation files (the "Software"),8* to deal in the Software without restriction, including without limitation9* the rights to use, copy, modify, merge, publish, distribute, sublicense,10* and/or sell copies of the Software, and to permit persons to whom the11* Software is furnished to do so, subject to the following conditions:12*13* The above copyright notice and this permission notice shall be included14* in all copies or substantial portions of the Software.15*16* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS17* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,18* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL19* DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN20* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN21* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324/* Copyright (C) 2007 Hiroshi Morii <koolsmoky(at)users.sourceforge.net>25* Added support for ARGB inputs.26*/272829#include <stdlib.h>30#include <string.h>3132#include "types.h"33#include "internal.h"34#include "fxt1.h"353637/***************************************************************************\38* FXT1 encoder39*40* The encoder was built by reversing the decoder,41* and is vaguely based on Texus2 by 3dfx. Note that this code42* is merely a proof of concept, since it is highly UNoptimized;43* moreover, it is sub-optimal due to initial conditions passed44* to Lloyd's algorithm (the interpolation modes are even worse).45\***************************************************************************/464748#define MAX_COMP 4 /* ever needed maximum number of components in texel */49#define MAX_VECT 4 /* ever needed maximum number of base vectors to find */50#define N_TEXELS 32 /* number of texels in a block (always 32) */51#define LL_N_REP 50 /* number of iterations in lloyd's vq */52#define LL_RMS_D 10 /* fault tolerance (maximum delta) */53#define LL_RMS_E 255 /* fault tolerance (maximum error) */54#define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */55#define ISTBLACK(v) (*((dword *)(v)) == 0)56#define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC))575859static int60fxt1_bestcol (float vec[][MAX_COMP], int nv,61byte input[MAX_COMP], int nc)62{63int i, j, best = -1;64float err = 1e9; /* big enough */6566for (j = 0; j < nv; j++) {67float e = 0.0F;68for (i = 0; i < nc; i++) {69e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);70}71if (e < err) {72err = e;73best = j;74}75}7677return best;78}798081static int82fxt1_worst (float vec[MAX_COMP],83byte input[N_TEXELS][MAX_COMP], int nc, int n)84{85int i, k, worst = -1;86float err = -1.0F; /* small enough */8788for (k = 0; k < n; k++) {89float e = 0.0F;90for (i = 0; i < nc; i++) {91e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);92}93if (e > err) {94err = e;95worst = k;96}97}9899return worst;100}101102103static int104fxt1_variance (double variance[MAX_COMP],105byte input[N_TEXELS][MAX_COMP], int nc, int n)106{107int i, k, best = 0;108dword sx, sx2;109double var, maxvar = -1; /* small enough */110double teenth = 1.0 / n;111112for (i = 0; i < nc; i++) {113sx = sx2 = 0;114for (k = 0; k < n; k++) {115int t = input[k][i];116sx += t;117sx2 += t * t;118}119var = sx2 * teenth - sx * sx * teenth * teenth;120if (maxvar < var) {121maxvar = var;122best = i;123}124if (variance) {125variance[i] = var;126}127}128129return best;130}131132133static int134fxt1_choose (float vec[][MAX_COMP], int nv,135byte input[N_TEXELS][MAX_COMP], int nc, int n)136{137#if 0138/* Choose colors from a grid.139*/140int i, j;141142for (j = 0; j < nv; j++) {143int m = j * (n - 1) / (nv - 1);144for (i = 0; i < nc; i++) {145vec[j][i] = input[m][i];146}147}148#else149/* Our solution here is to find the darkest and brightest colors in150* the 8x4 tile and use those as the two representative colors.151* There are probably better algorithms to use (histogram-based).152*/153int i, j, k;154#ifndef YUV155int minSum = 2000; /* big enough */156#else157int minSum = 2000000;158#endif159int maxSum = -1; /* small enough */160int minCol = 0; /* phoudoin: silent compiler! */161int maxCol = 0; /* phoudoin: silent compiler! */162163struct {164int flag;165dword key;166int freq;167int idx;168} hist[N_TEXELS];169int lenh = 0;170171memset(hist, 0, sizeof(hist));172173for (k = 0; k < n; k++) {174int l;175dword key = 0;176int sum = 0;177for (i = 0; i < nc; i++) {178key <<= 8;179key |= input[k][i];180#ifndef YUV181sum += input[k][i];182#else183/* RGB to YUV conversion according to CCIR 601 specs184* Y = 0.299R+0.587G+0.114B185* U = 0.713(R - Y) = 0.500R-0.419G-0.081B186* V = 0.564(B - Y) = -0.169R-0.331G+0.500B187*/188sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];189#endif190}191for (l = 0; l < n; l++) {192if (!hist[l].flag) {193/* alloc new slot */194hist[l].flag = !0;195hist[l].key = key;196hist[l].freq = 1;197hist[l].idx = k;198lenh = l + 1;199break;200} else if (hist[l].key == key) {201hist[l].freq++;202break;203}204}205if (minSum > sum) {206minSum = sum;207minCol = k;208}209if (maxSum < sum) {210maxSum = sum;211maxCol = k;212}213}214215if (lenh <= nv) {216for (j = 0; j < lenh; j++) {217for (i = 0; i < nc; i++) {218vec[j][i] = (float)input[hist[j].idx][i];219}220}221for (; j < nv; j++) {222for (i = 0; i < nc; i++) {223vec[j][i] = vec[0][i];224}225}226return 0;227}228229for (j = 0; j < nv; j++) {230for (i = 0; i < nc; i++) {231vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1);232}233}234#endif235236return !0;237}238239240static int241fxt1_lloyd (float vec[][MAX_COMP], int nv,242byte input[N_TEXELS][MAX_COMP], int nc, int n)243{244/* Use the generalized lloyd's algorithm for VQ:245* find 4 color vectors.246*247* for each sample color248* sort to nearest vector.249*250* replace each vector with the centroid of it's matching colors.251*252* repeat until RMS doesn't improve.253*254* if a color vector has no samples, or becomes the same as another255* vector, replace it with the color which is farthest from a sample.256*257* vec[][MAX_COMP] initial vectors and resulting colors258* nv number of resulting colors required259* input[N_TEXELS][MAX_COMP] input texels260* nc number of components in input / vec261* n number of input samples262*/263264int sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */265int cnt[MAX_VECT]; /* how many times a certain vector was chosen */266float error, lasterror = 1e9;267268int i, j, k, rep;269270/* the quantizer */271for (rep = 0; rep < LL_N_REP; rep++) {272/* reset sums & counters */273for (j = 0; j < nv; j++) {274for (i = 0; i < nc; i++) {275sum[j][i] = 0;276}277cnt[j] = 0;278}279error = 0;280281/* scan whole block */282for (k = 0; k < n; k++) {283#if 1284int best = -1;285float err = 1e9; /* big enough */286/* determine best vector */287for (j = 0; j < nv; j++) {288float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +289(vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +290(vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);291if (nc == 4) {292e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);293}294if (e < err) {295err = e;296best = j;297}298}299#else300int best = fxt1_bestcol(vec, nv, input[k], nc, &err);301#endif302/* add in closest color */303for (i = 0; i < nc; i++) {304sum[best][i] += input[k][i];305}306/* mark this vector as used */307cnt[best]++;308/* accumulate error */309error += err;310}311312/* check RMS */313if ((error < LL_RMS_E) ||314((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {315return !0; /* good match */316}317lasterror = error;318319/* move each vector to the barycenter of its closest colors */320for (j = 0; j < nv; j++) {321if (cnt[j]) {322float div = 1.0F / cnt[j];323for (i = 0; i < nc; i++) {324vec[j][i] = div * sum[j][i];325}326} else {327/* this vec has no samples or is identical with a previous vec */328int worst = fxt1_worst(vec[j], input, nc, n);329for (i = 0; i < nc; i++) {330vec[j][i] = input[worst][i];331}332}333}334}335336return 0; /* could not converge fast enough */337}338339340static void341fxt1_quantize_CHROMA (dword *cc,342byte input[N_TEXELS][MAX_COMP])343{344const int n_vect = 4; /* 4 base vectors to find */345const int n_comp = 3; /* 3 components: R, G, B */346float vec[MAX_VECT][MAX_COMP];347int i, j, k;348qword hi; /* high quadword */349dword lohi, lolo; /* low quadword: hi dword, lo dword */350351if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {352fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);353}354355Q_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */356for (j = n_vect - 1; j >= 0; j--) {357for (i = 0; i < n_comp; i++) {358/* add in colors */359Q_SHL(hi, 5);360Q_OR32(hi, (dword)(vec[j][i] / 8.0F));361}362}363((qword *)cc)[1] = hi;364365lohi = lolo = 0;366/* right microtile */367for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {368lohi <<= 2;369lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);370}371/* left microtile */372for (; k >= 0; k--) {373lolo <<= 2;374lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);375}376cc[1] = lohi;377cc[0] = lolo;378}379380381static void382fxt1_quantize_ALPHA0 (dword *cc,383byte input[N_TEXELS][MAX_COMP],384byte reord[N_TEXELS][MAX_COMP], int n)385{386const int n_vect = 3; /* 3 base vectors to find */387const int n_comp = 4; /* 4 components: R, G, B, A */388float vec[MAX_VECT][MAX_COMP];389int i, j, k;390qword hi; /* high quadword */391dword lohi, lolo; /* low quadword: hi dword, lo dword */392393/* the last vector indicates zero */394for (i = 0; i < n_comp; i++) {395vec[n_vect][i] = 0;396}397398/* the first n texels in reord are guaranteed to be non-zero */399if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {400fxt1_lloyd(vec, n_vect, reord, n_comp, n);401}402403Q_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */404for (j = n_vect - 1; j >= 0; j--) {405/* add in alphas */406Q_SHL(hi, 5);407Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));408}409for (j = n_vect - 1; j >= 0; j--) {410for (i = 0; i < n_comp - 1; i++) {411/* add in colors */412Q_SHL(hi, 5);413Q_OR32(hi, (dword)(vec[j][i] / 8.0F));414}415}416((qword *)cc)[1] = hi;417418lohi = lolo = 0;419/* right microtile */420for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {421lohi <<= 2;422lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);423}424/* left microtile */425for (; k >= 0; k--) {426lolo <<= 2;427lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);428}429cc[1] = lohi;430cc[0] = lolo;431}432433434static void435fxt1_quantize_ALPHA1 (dword *cc,436byte input[N_TEXELS][MAX_COMP])437{438const int n_vect = 3; /* highest vector number in each microtile */439const int n_comp = 4; /* 4 components: R, G, B, A */440float vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */441float b, iv[MAX_COMP]; /* interpolation vector */442int i, j, k;443qword hi; /* high quadword */444dword lohi, lolo; /* low quadword: hi dword, lo dword */445446int minSum;447int maxSum;448int minColL = 0, maxColL = 0;449int minColR = 0, maxColR = 0;450int sumL = 0, sumR = 0;451452/* Our solution here is to find the darkest and brightest colors in453* the 4x4 tile and use those as the two representative colors.454* There are probably better algorithms to use (histogram-based).455*/456#ifndef YUV457minSum = 2000; /* big enough */458#else459minSum = 2000000;460#endif461maxSum = -1; /* small enough */462for (k = 0; k < N_TEXELS / 2; k++) {463int sum = 0;464#ifndef YUV465for (i = 0; i < n_comp; i++) {466sum += input[k][i];467}468#else469sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];470#endif471if (minSum > sum) {472minSum = sum;473minColL = k;474}475if (maxSum < sum) {476maxSum = sum;477maxColL = k;478}479sumL += sum;480}481#ifndef YUV482minSum = 2000; /* big enough */483#else484minSum = 2000000;485#endif486maxSum = -1; /* small enough */487for (; k < N_TEXELS; k++) {488int sum = 0;489#ifndef YUV490for (i = 0; i < n_comp; i++) {491sum += input[k][i];492}493#else494sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];495#endif496if (minSum > sum) {497minSum = sum;498minColR = k;499}500if (maxSum < sum) {501maxSum = sum;502maxColR = k;503}504sumR += sum;505}506507/* choose the common vector (yuck!) */508{509int j1, j2;510int v1 = 0, v2 = 0;511float err = 1e9; /* big enough */512float tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */513for (i = 0; i < n_comp; i++) {514tv[0][i] = input[minColL][i];515tv[1][i] = input[maxColL][i];516tv[2][i] = input[minColR][i];517tv[3][i] = input[maxColR][i];518}519for (j1 = 0; j1 < 2; j1++) {520for (j2 = 2; j2 < 4; j2++) {521float e = 0.0F;522for (i = 0; i < n_comp; i++) {523e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);524}525if (e < err) {526err = e;527v1 = j1;528v2 = j2;529}530}531}532for (i = 0; i < n_comp; i++) {533vec[0][i] = tv[1 - v1][i];534vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);535vec[2][i] = tv[5 - v2][i];536}537}538539/* left microtile */540cc[0] = 0;541if (minColL != maxColL) {542/* compute interpolation vector */543MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);544545/* add in texels */546lolo = 0;547for (k = N_TEXELS / 2 - 1; k >= 0; k--) {548int texel;549/* interpolate color */550CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);551/* add in texel */552lolo <<= 2;553lolo |= texel;554}555556cc[0] = lolo;557}558559/* right microtile */560cc[1] = 0;561if (minColR != maxColR) {562/* compute interpolation vector */563MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);564565/* add in texels */566lohi = 0;567for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {568int texel;569/* interpolate color */570CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);571/* add in texel */572lohi <<= 2;573lohi |= texel;574}575576cc[1] = lohi;577}578579Q_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */580for (j = n_vect - 1; j >= 0; j--) {581/* add in alphas */582Q_SHL(hi, 5);583Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F));584}585for (j = n_vect - 1; j >= 0; j--) {586for (i = 0; i < n_comp - 1; i++) {587/* add in colors */588Q_SHL(hi, 5);589Q_OR32(hi, (dword)(vec[j][i] / 8.0F));590}591}592((qword *)cc)[1] = hi;593}594595596static void597fxt1_quantize_HI (dword *cc,598byte input[N_TEXELS][MAX_COMP],599byte reord[N_TEXELS][MAX_COMP], int n)600{601const int n_vect = 6; /* highest vector number */602const int n_comp = 3; /* 3 components: R, G, B */603float b = 0.0F; /* phoudoin: silent compiler! */604float iv[MAX_COMP]; /* interpolation vector */605int i, k;606dword hihi; /* high quadword: hi dword */607608#ifndef YUV609int minSum = 2000; /* big enough */610#else611int minSum = 2000000;612#endif613int maxSum = -1; /* small enough */614int minCol = 0; /* phoudoin: silent compiler! */615int maxCol = 0; /* phoudoin: silent compiler! */616617/* Our solution here is to find the darkest and brightest colors in618* the 8x4 tile and use those as the two representative colors.619* There are probably better algorithms to use (histogram-based).620*/621for (k = 0; k < n; k++) {622int sum = 0;623#ifndef YUV624for (i = 0; i < n_comp; i++) {625sum += reord[k][i];626}627#else628sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];629#endif630if (minSum > sum) {631minSum = sum;632minCol = k;633}634if (maxSum < sum) {635maxSum = sum;636maxCol = k;637}638}639640hihi = 0; /* cc-hi = "00" */641for (i = 0; i < n_comp; i++) {642/* add in colors */643hihi <<= 5;644hihi |= reord[maxCol][i] >> 3;645}646for (i = 0; i < n_comp; i++) {647/* add in colors */648hihi <<= 5;649hihi |= reord[minCol][i] >> 3;650}651cc[3] = hihi;652cc[0] = cc[1] = cc[2] = 0;653654/* compute interpolation vector */655if (minCol != maxCol) {656MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);657}658659/* add in texels */660for (k = N_TEXELS - 1; k >= 0; k--) {661int t = k * 3;662dword *kk = (dword *)((byte *)cc + t / 8);663int texel = n_vect + 1; /* transparent black */664665if (!ISTBLACK(input[k])) {666if (minCol != maxCol) {667/* interpolate color */668CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);669/* add in texel */670kk[0] |= texel << (t & 7);671}672} else {673/* add in texel */674kk[0] |= texel << (t & 7);675}676}677}678679680static void681fxt1_quantize_MIXED1 (dword *cc,682byte input[N_TEXELS][MAX_COMP])683{684const int n_vect = 2; /* highest vector number in each microtile */685const int n_comp = 3; /* 3 components: R, G, B */686byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */687float b, iv[MAX_COMP]; /* interpolation vector */688int i, j, k;689qword hi; /* high quadword */690dword lohi, lolo; /* low quadword: hi dword, lo dword */691692int minSum;693int maxSum;694int minColL = 0, maxColL = -1;695int minColR = 0, maxColR = -1;696697/* Our solution here is to find the darkest and brightest colors in698* the 4x4 tile and use those as the two representative colors.699* There are probably better algorithms to use (histogram-based).700*/701#ifndef YUV702minSum = 2000; /* big enough */703#else704minSum = 2000000;705#endif706maxSum = -1; /* small enough */707for (k = 0; k < N_TEXELS / 2; k++) {708if (!ISTBLACK(input[k])) {709int sum = 0;710#ifndef YUV711for (i = 0; i < n_comp; i++) {712sum += input[k][i];713}714#else715sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];716#endif717if (minSum > sum) {718minSum = sum;719minColL = k;720}721if (maxSum < sum) {722maxSum = sum;723maxColL = k;724}725}726}727#ifndef YUV728minSum = 2000; /* big enough */729#else730minSum = 2000000;731#endif732maxSum = -1; /* small enough */733for (; k < N_TEXELS; k++) {734if (!ISTBLACK(input[k])) {735int sum = 0;736#ifndef YUV737for (i = 0; i < n_comp; i++) {738sum += input[k][i];739}740#else741sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];742#endif743if (minSum > sum) {744minSum = sum;745minColR = k;746}747if (maxSum < sum) {748maxSum = sum;749maxColR = k;750}751}752}753754/* left microtile */755if (maxColL == -1) {756/* all transparent black */757cc[0] = 0xFFFFFFFF;758for (i = 0; i < n_comp; i++) {759vec[0][i] = 0;760vec[1][i] = 0;761}762} else {763cc[0] = 0;764for (i = 0; i < n_comp; i++) {765vec[0][i] = input[minColL][i];766vec[1][i] = input[maxColL][i];767}768if (minColL != maxColL) {769/* compute interpolation vector */770MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);771772/* add in texels */773lolo = 0;774for (k = N_TEXELS / 2 - 1; k >= 0; k--) {775int texel = n_vect + 1; /* transparent black */776if (!ISTBLACK(input[k])) {777/* interpolate color */778CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);779}780/* add in texel */781lolo <<= 2;782lolo |= texel;783}784cc[0] = lolo;785}786}787788/* right microtile */789if (maxColR == -1) {790/* all transparent black */791cc[1] = 0xFFFFFFFF;792for (i = 0; i < n_comp; i++) {793vec[2][i] = 0;794vec[3][i] = 0;795}796} else {797cc[1] = 0;798for (i = 0; i < n_comp; i++) {799vec[2][i] = input[minColR][i];800vec[3][i] = input[maxColR][i];801}802if (minColR != maxColR) {803/* compute interpolation vector */804MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);805806/* add in texels */807lohi = 0;808for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {809int texel = n_vect + 1; /* transparent black */810if (!ISTBLACK(input[k])) {811/* interpolate color */812CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);813}814/* add in texel */815lohi <<= 2;816lohi |= texel;817}818cc[1] = lohi;819}820}821822Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */823for (j = 2 * 2 - 1; j >= 0; j--) {824for (i = 0; i < n_comp; i++) {825/* add in colors */826Q_SHL(hi, 5);827Q_OR32(hi, vec[j][i] >> 3);828}829}830((qword *)cc)[1] = hi;831}832833834static void835fxt1_quantize_MIXED0 (dword *cc,836byte input[N_TEXELS][MAX_COMP])837{838const int n_vect = 3; /* highest vector number in each microtile */839const int n_comp = 3; /* 3 components: R, G, B */840byte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */841float b, iv[MAX_COMP]; /* interpolation vector */842int i, j, k;843qword hi; /* high quadword */844dword lohi, lolo; /* low quadword: hi dword, lo dword */845846int minColL = 0, maxColL = 0;847int minColR = 0, maxColR = 0;848#if 0849int minSum;850int maxSum;851852/* Our solution here is to find the darkest and brightest colors in853* the 4x4 tile and use those as the two representative colors.854* There are probably better algorithms to use (histogram-based).855*/856#ifndef YUV857minSum = 2000; /* big enough */858#else859minSum = 2000000;860#endif861maxSum = -1; /* small enough */862for (k = 0; k < N_TEXELS / 2; k++) {863int sum = 0;864#ifndef YUV865for (i = 0; i < n_comp; i++) {866sum += input[k][i];867}868#else869sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];870#endif871if (minSum > sum) {872minSum = sum;873minColL = k;874}875if (maxSum < sum) {876maxSum = sum;877maxColL = k;878}879}880minSum = 2000; /* big enough */881maxSum = -1; /* small enough */882for (; k < N_TEXELS; k++) {883int sum = 0;884#ifndef YUV885for (i = 0; i < n_comp; i++) {886sum += input[k][i];887}888#else889sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];890#endif891if (minSum > sum) {892minSum = sum;893minColR = k;894}895if (maxSum < sum) {896maxSum = sum;897maxColR = k;898}899}900#else901int minVal;902int maxVal;903int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);904int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);905906/* Scan the channel with max variance for lo & hi907* and use those as the two representative colors.908*/909minVal = 2000; /* big enough */910maxVal = -1; /* small enough */911for (k = 0; k < N_TEXELS / 2; k++) {912int t = input[k][maxVarL];913if (minVal > t) {914minVal = t;915minColL = k;916}917if (maxVal < t) {918maxVal = t;919maxColL = k;920}921}922minVal = 2000; /* big enough */923maxVal = -1; /* small enough */924for (; k < N_TEXELS; k++) {925int t = input[k][maxVarR];926if (minVal > t) {927minVal = t;928minColR = k;929}930if (maxVal < t) {931maxVal = t;932maxColR = k;933}934}935#endif936937/* left microtile */938cc[0] = 0;939for (i = 0; i < n_comp; i++) {940vec[0][i] = input[minColL][i];941vec[1][i] = input[maxColL][i];942}943if (minColL != maxColL) {944/* compute interpolation vector */945MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);946947/* add in texels */948lolo = 0;949for (k = N_TEXELS / 2 - 1; k >= 0; k--) {950int texel;951/* interpolate color */952CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);953/* add in texel */954lolo <<= 2;955lolo |= texel;956}957958/* funky encoding for LSB of green */959if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {960for (i = 0; i < n_comp; i++) {961vec[1][i] = input[minColL][i];962vec[0][i] = input[maxColL][i];963}964lolo = ~lolo;965}966967cc[0] = lolo;968}969970/* right microtile */971cc[1] = 0;972for (i = 0; i < n_comp; i++) {973vec[2][i] = input[minColR][i];974vec[3][i] = input[maxColR][i];975}976if (minColR != maxColR) {977/* compute interpolation vector */978MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);979980/* add in texels */981lohi = 0;982for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {983int texel;984/* interpolate color */985CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);986/* add in texel */987lohi <<= 2;988lohi |= texel;989}990991/* funky encoding for LSB of green */992if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {993for (i = 0; i < n_comp; i++) {994vec[3][i] = input[minColR][i];995vec[2][i] = input[maxColR][i];996}997lohi = ~lohi;998}9991000cc[1] = lohi;1001}10021003Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */1004for (j = 2 * 2 - 1; j >= 0; j--) {1005for (i = 0; i < n_comp; i++) {1006/* add in colors */1007Q_SHL(hi, 5);1008Q_OR32(hi, vec[j][i] >> 3);1009}1010}1011((qword *)cc)[1] = hi;1012}101310141015static void1016fxt1_quantize (dword *cc, const byte *lines[], int comps)1017{1018int trualpha;1019byte reord[N_TEXELS][MAX_COMP];10201021byte input[N_TEXELS][MAX_COMP];1022#ifndef ARGB1023int i;1024#endif1025int k, l;10261027if (comps == 3) {1028/* make the whole block opaque */1029memset(input, -1, sizeof(input));1030}10311032/* 8 texels each line */1033#ifndef ARGB1034for (l = 0; l < 4; l++) {1035for (k = 0; k < 4; k++) {1036for (i = 0; i < comps; i++) {1037input[k + l * 4][i] = *lines[l]++;1038}1039}1040for (; k < 8; k++) {1041for (i = 0; i < comps; i++) {1042input[k + l * 4 + 12][i] = *lines[l]++;1043}1044}1045}1046#else1047/* H.Morii - support for ARGB inputs */1048for (l = 0; l < 4; l++) {1049for (k = 0; k < 4; k++) {1050input[k + l * 4][2] = *lines[l]++;1051input[k + l * 4][1] = *lines[l]++;1052input[k + l * 4][0] = *lines[l]++;1053if (comps == 4) input[k + l * 4][3] = *lines[l]++;1054}1055for (; k < 8; k++) {1056input[k + l * 4 + 12][2] = *lines[l]++;1057input[k + l * 4 + 12][1] = *lines[l]++;1058input[k + l * 4 + 12][0] = *lines[l]++;1059if (comps == 4) input[k + l * 4 + 12][3] = *lines[l]++;1060}1061}1062#endif10631064/* block layout:1065* 00, 01, 02, 03, 08, 09, 0a, 0b1066* 10, 11, 12, 13, 18, 19, 1a, 1b1067* 04, 05, 06, 07, 0c, 0d, 0e, 0f1068* 14, 15, 16, 17, 1c, 1d, 1e, 1f1069*/10701071/* [dBorca]1072* stupidity flows forth from this1073*/1074l = N_TEXELS;1075trualpha = 0;1076if (comps == 4) {1077/* skip all transparent black texels */1078l = 0;1079for (k = 0; k < N_TEXELS; k++) {1080/* test all components against 0 */1081if (!ISTBLACK(input[k])) {1082/* texel is not transparent black */1083COPY_4UBV(reord[l], input[k]);1084if (reord[l][ACOMP] < (255 - ALPHA_TS)) {1085/* non-opaque texel */1086trualpha = !0;1087}1088l++;1089}1090}1091}10921093#if 01094if (trualpha) {1095fxt1_quantize_ALPHA0(cc, input, reord, l);1096} else if (l == 0) {1097cc[0] = cc[1] = cc[2] = -1;1098cc[3] = 0;1099} else if (l < N_TEXELS) {1100fxt1_quantize_HI(cc, input, reord, l);1101} else {1102fxt1_quantize_CHROMA(cc, input);1103}1104(void)fxt1_quantize_ALPHA1;1105(void)fxt1_quantize_MIXED1;1106(void)fxt1_quantize_MIXED0;1107#else1108if (trualpha) {1109fxt1_quantize_ALPHA1(cc, input);1110} else if (l == 0) {1111cc[0] = cc[1] = cc[2] = 0xFFFFFFFF;1112cc[3] = 0;1113} else if (l < N_TEXELS) {1114fxt1_quantize_MIXED1(cc, input);1115} else {1116fxt1_quantize_MIXED0(cc, input);1117}1118(void)fxt1_quantize_ALPHA0;1119(void)fxt1_quantize_HI;1120(void)fxt1_quantize_CHROMA;1121#endif1122}112311241125TAPI int TAPIENTRY1126fxt1_encode (int width, int height, int comps,1127const void *source, int srcRowStride,1128void *dest, int destRowStride)1129{1130int x, y;1131const byte *data;1132dword *encoded = (dword *)dest;1133void *newSource = NULL;11341135/* Replicate image if width is not M8 or height is not M4 */1136if ((width & 7) | (height & 3)) {1137int newWidth = (width + 7) & ~7;1138int newHeight = (height + 3) & ~3;1139newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));1140_mesa_upscale_teximage2d(width, height, newWidth, newHeight,1141comps, (const byte *)source,1142srcRowStride, (byte *)newSource);1143source = newSource;1144width = newWidth;1145height = newHeight;1146srcRowStride = comps * newWidth;1147}11481149data = (const byte *)source;1150destRowStride = (destRowStride - width * 2) / 4;1151for (y = 0; y < height; y += 4) {1152unsigned int offs = 0 + (y + 0) * srcRowStride;1153for (x = 0; x < width; x += 8) {1154const byte *lines[4];1155lines[0] = &data[offs];1156lines[1] = lines[0] + srcRowStride;1157lines[2] = lines[1] + srcRowStride;1158lines[3] = lines[2] + srcRowStride;1159offs += 8 * comps;1160fxt1_quantize(encoded, lines, comps);1161/* 128 bits per 8x4 block */1162encoded += 4;1163}1164encoded += destRowStride;1165}11661167if (newSource != NULL) {1168free(newSource);1169}11701171return 0;1172}117311741175/***************************************************************************\1176* FXT1 decoder1177*1178* The decoder is based on GL_3DFX_texture_compression_FXT11179* specification and serves as a concept for the encoder.1180\***************************************************************************/118111821183/* lookup table for scaling 5 bit colors up to 8 bits */1184static const byte _rgb_scale_5[] = {11850, 8, 16, 25, 33, 41, 49, 58,118666, 74, 82, 90, 99, 107, 115, 123,1187132, 140, 148, 156, 165, 173, 181, 189,1188197, 206, 214, 222, 230, 239, 247, 2551189};11901191/* lookup table for scaling 6 bit colors up to 8 bits */1192static const byte _rgb_scale_6[] = {11930, 4, 8, 12, 16, 20, 24, 28,119432, 36, 40, 45, 49, 53, 57, 61,119565, 69, 73, 77, 81, 85, 89, 93,119697, 101, 105, 109, 113, 117, 121, 125,1197130, 134, 138, 142, 146, 150, 154, 158,1198162, 166, 170, 174, 178, 182, 186, 190,1199194, 198, 202, 206, 210, 215, 219, 223,1200227, 231, 235, 239, 243, 247, 251, 2551201};120212031204#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))1205#define UP5(c) _rgb_scale_5[(c) & 31]1206#define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]1207#define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)1208#define ZERO_4UBV(v) *((dword *)(v)) = 0120912101211static void1212fxt1_decode_1HI (const byte *code, int t, byte *rgba)1213{1214const dword *cc;12151216t *= 3;1217cc = (const dword *)(code + t / 8);1218t = (cc[0] >> (t & 7)) & 7;12191220if (t == 7) {1221ZERO_4UBV(rgba);1222} else {1223cc = (const dword *)(code + 12);1224if (t == 0) {1225rgba[BCOMP] = UP5(CC_SEL(cc, 0));1226rgba[GCOMP] = UP5(CC_SEL(cc, 5));1227rgba[RCOMP] = UP5(CC_SEL(cc, 10));1228} else if (t == 6) {1229rgba[BCOMP] = UP5(CC_SEL(cc, 15));1230rgba[GCOMP] = UP5(CC_SEL(cc, 20));1231rgba[RCOMP] = UP5(CC_SEL(cc, 25));1232} else {1233rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));1234rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));1235rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));1236}1237rgba[ACOMP] = 255;1238}1239}124012411242static void1243fxt1_decode_1CHROMA (const byte *code, int t, byte *rgba)1244{1245const dword *cc;1246dword kk;12471248cc = (const dword *)code;1249if (t & 16) {1250cc++;1251t &= 15;1252}1253t = (cc[0] >> (t * 2)) & 3;12541255t *= 15;1256cc = (const dword *)(code + 8 + t / 8);1257kk = cc[0] >> (t & 7);1258rgba[BCOMP] = UP5(kk);1259rgba[GCOMP] = UP5(kk >> 5);1260rgba[RCOMP] = UP5(kk >> 10);1261rgba[ACOMP] = 255;1262}126312641265static void1266fxt1_decode_1MIXED (const byte *code, int t, byte *rgba)1267{1268const dword *cc;1269int col[2][3];1270int glsb, selb;12711272cc = (const dword *)code;1273if (t & 16) {1274t &= 15;1275t = (cc[1] >> (t * 2)) & 3;1276/* col 2 */1277col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6;1278col[0][GCOMP] = CC_SEL(cc, 99);1279col[0][RCOMP] = CC_SEL(cc, 104);1280/* col 3 */1281col[1][BCOMP] = CC_SEL(cc, 109);1282col[1][GCOMP] = CC_SEL(cc, 114);1283col[1][RCOMP] = CC_SEL(cc, 119);1284glsb = CC_SEL(cc, 126);1285selb = CC_SEL(cc, 33);1286} else {1287t = (cc[0] >> (t * 2)) & 3;1288/* col 0 */1289col[0][BCOMP] = CC_SEL(cc, 64);1290col[0][GCOMP] = CC_SEL(cc, 69);1291col[0][RCOMP] = CC_SEL(cc, 74);1292/* col 1 */1293col[1][BCOMP] = CC_SEL(cc, 79);1294col[1][GCOMP] = CC_SEL(cc, 84);1295col[1][RCOMP] = CC_SEL(cc, 89);1296glsb = CC_SEL(cc, 125);1297selb = CC_SEL(cc, 1);1298}12991300if (CC_SEL(cc, 124) & 1) {1301/* alpha[0] == 1 */13021303if (t == 3) {1304ZERO_4UBV(rgba);1305} else {1306if (t == 0) {1307rgba[BCOMP] = UP5(col[0][BCOMP]);1308rgba[GCOMP] = UP5(col[0][GCOMP]);1309rgba[RCOMP] = UP5(col[0][RCOMP]);1310} else if (t == 2) {1311rgba[BCOMP] = UP5(col[1][BCOMP]);1312rgba[GCOMP] = UP6(col[1][GCOMP], glsb);1313rgba[RCOMP] = UP5(col[1][RCOMP]);1314} else {1315rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;1316rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;1317rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;1318}1319rgba[ACOMP] = 255;1320}1321} else {1322/* alpha[0] == 0 */13231324if (t == 0) {1325rgba[BCOMP] = UP5(col[0][BCOMP]);1326rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb);1327rgba[RCOMP] = UP5(col[0][RCOMP]);1328} else if (t == 3) {1329rgba[BCOMP] = UP5(col[1][BCOMP]);1330rgba[GCOMP] = UP6(col[1][GCOMP], glsb);1331rgba[RCOMP] = UP5(col[1][RCOMP]);1332} else {1333rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));1334rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),1335UP6(col[1][GCOMP], glsb));1336rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));1337}1338rgba[ACOMP] = 255;1339}1340}134113421343static void1344fxt1_decode_1ALPHA (const byte *code, int t, byte *rgba)1345{1346const dword *cc;13471348cc = (const dword *)code;1349if (CC_SEL(cc, 124) & 1) {1350/* lerp == 1 */1351int col0[4];13521353if (t & 16) {1354t &= 15;1355t = (cc[1] >> (t * 2)) & 3;1356/* col 2 */1357col0[BCOMP] = (*(const dword *)(code + 11)) >> 6;1358col0[GCOMP] = CC_SEL(cc, 99);1359col0[RCOMP] = CC_SEL(cc, 104);1360col0[ACOMP] = CC_SEL(cc, 119);1361} else {1362t = (cc[0] >> (t * 2)) & 3;1363/* col 0 */1364col0[BCOMP] = CC_SEL(cc, 64);1365col0[GCOMP] = CC_SEL(cc, 69);1366col0[RCOMP] = CC_SEL(cc, 74);1367col0[ACOMP] = CC_SEL(cc, 109);1368}13691370if (t == 0) {1371rgba[BCOMP] = UP5(col0[BCOMP]);1372rgba[GCOMP] = UP5(col0[GCOMP]);1373rgba[RCOMP] = UP5(col0[RCOMP]);1374rgba[ACOMP] = UP5(col0[ACOMP]);1375} else if (t == 3) {1376rgba[BCOMP] = UP5(CC_SEL(cc, 79));1377rgba[GCOMP] = UP5(CC_SEL(cc, 84));1378rgba[RCOMP] = UP5(CC_SEL(cc, 89));1379rgba[ACOMP] = UP5(CC_SEL(cc, 114));1380} else {1381rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));1382rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));1383rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));1384rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));1385}1386} else {1387/* lerp == 0 */13881389if (t & 16) {1390cc++;1391t &= 15;1392}1393t = (cc[0] >> (t * 2)) & 3;13941395if (t == 3) {1396ZERO_4UBV(rgba);1397} else {1398dword kk;1399cc = (const dword *)code;1400rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13));1401t *= 15;1402cc = (const dword *)(code + 8 + t / 8);1403kk = cc[0] >> (t & 7);1404rgba[BCOMP] = UP5(kk);1405rgba[GCOMP] = UP5(kk >> 5);1406rgba[RCOMP] = UP5(kk >> 10);1407}1408}1409}141014111412TAPI void TAPIENTRY1413fxt1_decode_1 (const void *texture, int stride,1414int i, int j, byte *rgba)1415{1416static void (*decode_1[]) (const byte *, int, byte *) = {1417fxt1_decode_1HI, /* cc-high = "00?" */1418fxt1_decode_1HI, /* cc-high = "00?" */1419fxt1_decode_1CHROMA, /* cc-chroma = "010" */1420fxt1_decode_1ALPHA, /* alpha = "011" */1421fxt1_decode_1MIXED, /* mixed = "1??" */1422fxt1_decode_1MIXED, /* mixed = "1??" */1423fxt1_decode_1MIXED, /* mixed = "1??" */1424fxt1_decode_1MIXED /* mixed = "1??" */1425};14261427const byte *code = (const byte *)texture +1428((j / 4) * (stride / 8) + (i / 8)) * 16;1429int mode = CC_SEL(code, 125);1430int t = i & 7;14311432if (t & 4) {1433t += 12;1434}1435t += (j & 3) * 4;14361437decode_1[mode](code, t, rgba);14381439#if VERBOSE1440{1441extern int cc_chroma;1442extern int cc_alpha;1443extern int cc_high;1444extern int cc_mixed;1445static int *cctype[] = {1446&cc_high,1447&cc_high,1448&cc_chroma,1449&cc_alpha,1450&cc_mixed,1451&cc_mixed,1452&cc_mixed,1453&cc_mixed1454};1455(*cctype[mode])++;1456}1457#endif1458}145914601461