Path: blob/master/libmupen64plus/mupen64plus-video-glide64mk2/src/GlideHQ/TxUtil.cpp
2 views
/*1* Texture Filtering2* Version: 1.03*4* Copyright (C) 2007 Hiroshi Morii All Rights Reserved.5* Email koolsmoky(at)users.sourceforge.net6* Web http://www.3dfxzone.it/koolsmoky7*8* this is free software; you can redistribute it and/or modify9* it under the terms of the GNU General Public License as published by10* the Free Software Foundation; either version 2, or (at your option)11* any later version.12*13* this is distributed in the hope that it will be useful,14* but WITHOUT ANY WARRANTY; without even the implied warranty of15* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the16* GNU General Public License for more details.17*18* You should have received a copy of the GNU General Public License19* along with GNU Make; see the file COPYING. If not, write to20* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.21*/2223#include "TxUtil.h"24#include "TxDbg.h"25#include <zlib.h>26#include <stdlib.h>27#ifdef _WIN3228#define WIN32_LEAN_AND_MEAN29#include <windows.h>30#else31#include <unistd.h>32#endif3334/*35* External libraries36******************************************************************************/37TxLoadLib::TxLoadLib()38{39#ifdef DXTN_DLL40if (!_dxtnlib)41_dxtnlib = LoadLibrary("dxtn");4243if (_dxtnlib) {44if (!_tx_compress_dxtn)45_tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn");4647if (!_tx_compress_fxt1)48_tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");49}50#else51_tx_compress_dxtn = tx_compress_dxtn;52_tx_compress_fxt1 = fxt1_encode;5354#endif55}5657TxLoadLib::~TxLoadLib()58{59#ifdef DXTN_DLL60/* free dynamic library */61if (_dxtnlib)62FreeLibrary(_dxtnlib);63#endif6465}6667fxtCompressTexFuncExt68TxLoadLib::getfxtCompressTexFuncExt()69{70return _tx_compress_fxt1;71}7273dxtCompressTexFuncExt74TxLoadLib::getdxtCompressTexFuncExt()75{76return _tx_compress_dxtn;77}787980/*81* Utilities82******************************************************************************/83uint3284TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)85{86int dataSize = sizeofTx(width, height, format);8788/* for now we use adler32 if something else is better89* we can simply swtich later90*/91/* return (dataSize ? Adler32(src, dataSize, 1) : 0); */9293/* zlib crc32 */94return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);95}9697int98TxUtil::sizeofTx(int width, int height, uint16 format)99{100int dataSize = 0;101102/* a lookup table for the shifts would be better */103switch (format) {104case GR_TEXFMT_ARGB_CMP_FXT1:105dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;106break;107case GR_TEXFMT_ARGB_CMP_DXT1:108dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;109break;110case GR_TEXFMT_ARGB_CMP_DXT3:111case GR_TEXFMT_ARGB_CMP_DXT5:112dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);113break;114case GR_TEXFMT_ALPHA_INTENSITY_44:115case GR_TEXFMT_ALPHA_8:116case GR_TEXFMT_INTENSITY_8:117case GR_TEXFMT_P_8:118dataSize = width * height;119break;120case GR_TEXFMT_ARGB_4444:121case GR_TEXFMT_ARGB_1555:122case GR_TEXFMT_RGB_565:123case GR_TEXFMT_ALPHA_INTENSITY_88:124dataSize = (width * height) << 1;125break;126case GR_TEXFMT_ARGB_8888:127dataSize = (width * height) << 2;128break;129default:130/* unsupported format */131DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);132;133}134135return dataSize;136}137138#if 0 /* unused */139uint32140TxUtil::chkAlpha(uint32* src, int width, int height)141{142/* NOTE: _src must be ARGB8888143* return values144* 0x00000000: 8bit alpha145* 0x00000001: 1bit alpha146* 0xff000001: no alpha147*/148149int _size = width * height;150uint32 alpha = 0;151152__asm {153mov esi, dword ptr [src];154mov ecx, dword ptr [_size];155mov ebx, 0xff000000;156157tc1_loop:158mov eax, dword ptr [esi];159add esi, 4;160161and eax, 0xff000000;162jz alpha1bit;163cmp eax, 0xff000000;164je alpha1bit;165jmp done;166167alpha1bit:168and ebx, eax;169dec ecx;170jnz tc1_loop;171172or ebx, 0x00000001;173mov dword ptr [alpha], ebx;174175done:176}177178return alpha;179}180#endif181182uint32183TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)184{185/* Rice CRC32 for now. We can switch this to Jabo MD5 or186* any other custom checksum.187* TODO: use *_HIRESTEXTURE option. */188189if (!src) return 0;190191return RiceCRC32(src, width, height, size, rowStride);192}193194uint64195TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)196{197/* Rice CRC32 for now. We can switch this to Jabo MD5 or198* any other custom checksum.199* TODO: use *_HIRESTEXTURE option. */200/* Returned value is 64bits: hi=palette crc32 low=texture crc32 */201202if (!src) return 0;203204uint64 crc64Ret = 0;205206if (palette) {207uint32 crc32 = 0, cimax = 0;208switch (size & 0xff) {209case 1:210if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {211crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);212crc64Ret <<= 32;213crc64Ret |= (uint64)crc32;214}215break;216case 0:217if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {218crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);219crc64Ret <<= 32;220crc64Ret |= (uint64)crc32;221}222}223}224if (!crc64Ret) {225crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);226}227228return crc64Ret;229}230231/*232** Computes Adler32 checksum for a stream of data.233**234** From the specification found in RFC 1950: (ZLIB Compressed Data Format235** Specification version 3.3)236**237** ADLER32 (Adler-32 checksum) This contains a checksum value of the238** uncompressed data (excluding any dictionary data) computed according to239** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement240** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.241**242** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of243** all bytes, s2 is the sum of all s1 values. Both sums are done modulo244** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored245** as s2*65536 + s1 in most-significant-byte first (network) order.246**247** 8.2. The Adler-32 algorithm248**249** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still250** provides an extremely low probability of undetected errors.251**252** The modulo on unsigned long accumulators can be delayed for 5552 bytes,253** so the modulo operation time is negligible. If the bytes are a, b, c,254** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,255** unlike the first sum, which is just a checksum. That 65521 is prime is256** important to avoid a possible large class of two-byte errors that leave257** the check unchanged. (The Fletcher checksum uses 255, which is not prime258** and which also makes the Fletcher check insensitive to single byte259** changes 0 <-> 255.)260**261** The sum s1 is initialized to 1 instead of zero to make the length of262** the sequence part of s2, so that the length does not have to be checked263** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)264*/265266uint32267TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)268{269#if 1270/* zlib adler32 */271return adler32(dwAdler32, data, Len);272#else273register uint32 s1 = dwAdler32 & 0xFFFF;274register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;275int k;276277while (Len > 0) {278/* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */279k = (Len < 5552 ? Len : 5552);280Len -= k;281while (k--) {282s1 += *data++;283s2 += s1;284}285/* 65521 is the largest prime smaller than 65536 */286s1 %= 65521;287s2 %= 65521;288}289290return (s2 << 16) | s1;291#endif292}293294uint32295TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)296{297int i;298uint32 ret = 1;299uint32 width_in_bytes = width * size;300301for (i = 0; i < height; i++) {302ret = Adler32(src, width_in_bytes, ret);303src += rowStride;304}305306return ret;307}308309// rotate left310template<class T> static T __ROL__(T value, unsigned int count)311{312const unsigned int nbits = sizeof(T) * 8;313count %= nbits;314315T high = value >> (nbits - count);316value <<= count;317value |= high;318return value;319}320321/* Rice CRC32 for hires texture packs */322/* NOTE: The following is used in Glide64 to calculate the CRC32323* for Rice hires texture packs.324*325* BYTE* addr = (BYTE*)(gfx.RDRAM +326* rdp.addr[rdp.tiles[tile].t_mem] +327* (rdp.tiles[tile].ul_t * bpl) +328* (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));329* RiceCRC32(addr,330* rdp.tiles[tile].width,331* rdp.tiles[tile].height,332* (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),333* bpl);334*/335uint32336TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)337{338const uint8_t *row;339uint32_t crc32Ret;340int cur_height;341uint32_t pos;342uint32_t word;343uint32_t word_hash = 0;344uint32_t tmp;345const uint32_t bytes_per_width = ((width << size) + 1) >> 1;346347row = src;348crc32Ret = 0;349350for (cur_height = height - 1; cur_height >= 0; cur_height--) {351for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {352word = *(uint32_t *)&row[pos];353word_hash = pos ^ word;354tmp = __ROL__(crc32Ret, 4);355crc32Ret = word_hash + tmp;356}357crc32Ret += cur_height ^ word_hash;358row += rowStride;359}360return crc32Ret;361}362363boolean364TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,365uint32* crc32, uint32* cimax)366{367const uint8_t *row;368uint32_t crc32Ret;369uint32_t cimaxRet;370int cur_height;371uint32_t pos;372uint32_t word;373uint32_t word_hash = 0;374uint32_t tmp;375const uint32_t bytes_per_width = ((width << size) + 1) >> 1;376377row = src;378crc32Ret = 0;379cimaxRet = 0;380381for (cur_height = height - 1; cur_height >= 0; cur_height--) {382for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {383word = *(uint32_t *)&row[pos];384if (cimaxRet != 15) {385if ((word & 0xF) >= cimaxRet)386cimaxRet = word & 0xF;387if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet)388cimaxRet = (uint8_t)word >> 4;389if (((word >> 8) & 0xF) >= cimaxRet)390cimaxRet = (word >> 8) & 0xF;391if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet)392cimaxRet = (uint16_t)word >> 12;393if (((word >> 16) & 0xF) >= cimaxRet)394cimaxRet = (word >> 16) & 0xF;395if (((word >> 20) & 0xF) >= cimaxRet)396cimaxRet = (word >> 20) & 0xF;397if (((word >> 24) & 0xF) >= cimaxRet)398cimaxRet = (word >> 24) & 0xF;399if (word >> 28 >= cimaxRet )400cimaxRet = word >> 28;401}402word_hash = pos ^ word;403tmp = __ROL__(crc32Ret, 4);404crc32Ret = word_hash + tmp;405}406crc32Ret += cur_height ^ word_hash;407row += rowStride;408}409*crc32 = crc32Ret;410*cimax = cimaxRet;411return 1;412}413414boolean415TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,416uint32* crc32, uint32* cimax)417{418const uint8_t *row;419uint32_t crc32Ret;420uint32_t cimaxRet;421int cur_height;422uint32_t pos;423uint32_t word;424uint32_t word_hash = 0;425uint32_t tmp;426const uint32_t bytes_per_width = ((width << size) + 1) >> 1;427428row = src;429crc32Ret = 0;430cimaxRet = 0;431432for (cur_height = height - 1; cur_height >= 0; cur_height--) {433for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {434word = *(uint32_t *)&row[pos];435if (cimaxRet != 255) {436if ((uint8_t)word >= cimaxRet)437cimaxRet = (uint8_t)word;438if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet)439cimaxRet = (uint16_t)word >> 8;440if (((word >> 16) & 0xFF) >= cimaxRet)441cimaxRet = (word >> 16) & 0xFF;442if (word >> 24 >= cimaxRet)443cimaxRet = word >> 24;444}445word_hash = pos ^ word;446tmp = __ROL__(crc32Ret, 4);447crc32Ret = word_hash + tmp;448}449crc32Ret += cur_height ^ word_hash;450row += rowStride;451}452*crc32 = crc32Ret;453*cimax = cimaxRet;454return 1;455}456457int458TxUtil::log2(int num)459{460#if defined(__GNUC__)461return __builtin_ctz(num);462#elif defined(_MSC_VER) && _MSC_VER >= 1400463uint32_t i;464_BitScanForward((DWORD *)&i, num);465return i;466#elif defined(__MSC__)467__asm {468mov eax, dword ptr [num];469bsr eax, eax;470mov dword ptr [i], eax;471}472#else473switch (num) {474case 1: return 0;475case 2: return 1;476case 4: return 2;477case 8: return 3;478case 16: return 4;479case 32: return 5;480case 64: return 6;481case 128: return 7;482case 256: return 8;483case 512: return 9;484case 1024: return 10;485case 2048: return 11;486}487#endif488}489490int491TxUtil::grLodLog2(int w, int h)492{493return (w >= h ? log2(w) : log2(h));494}495496int497TxUtil::grAspectRatioLog2(int w, int h)498{499return (w >= h ? log2(w/h) : -log2(h/w));500}501502int503TxUtil::getNumberofProcessors()504{505int numcore = 1, ret;506507#ifdef _WIN32508#ifndef _SC_NPROCESSORS_ONLN509SYSTEM_INFO info;510GetSystemInfo(&info);511#define sysconf(a) info.dwNumberOfProcessors512#define _SC_NPROCESSORS_ONLN513#endif514#endif515#ifdef _SC_NPROCESSORS_ONLN516ret = sysconf(_SC_NPROCESSORS_CONF);517if (ret >= 1) {518numcore = ret;519}520ret = sysconf(_SC_NPROCESSORS_ONLN);521if (ret < 1) {522numcore = ret;523}524#endif525526return numcore;527}528529530/*531* Memory buffers for texture manipulations532******************************************************************************/533TxMemBuf::TxMemBuf()534{535int i;536for (i = 0; i < 2; i++) {537_tex[i] = NULL;538_size[i] = 0;539}540}541542TxMemBuf::~TxMemBuf()543{544shutdown();545}546547boolean548TxMemBuf::init(int maxwidth, int maxheight)549{550int i;551for (i = 0; i < 2; i++) {552if (!_tex[i]) {553_tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);554_size[i] = maxwidth * maxheight * 4;555}556557if (!_tex[i]) {558shutdown();559return 0;560}561}562return 1;563}564565void566TxMemBuf::shutdown()567{568int i;569for (i = 0; i < 2; i++) {570if (_tex[i]) free(_tex[i]);571_tex[i] = NULL;572_size[i] = 0;573}574}575576uint8*577TxMemBuf::get(unsigned int num)578{579return ((num < 2) ? _tex[num] : NULL);580}581582uint32583TxMemBuf::size_of(unsigned int num)584{585return ((num < 2) ? _size[num] : 0);586}587588589