CoCalc -- TxUtil.cpp

GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-video-glide64mk2/src/GlideHQ/TxUtil.cpp
² views
1
/*
2
 * Texture Filtering
3
 * Version:  1.0
4
 *
5
 * Copyright (C) 2007  Hiroshi Morii   All Rights Reserved.
6
 * Email koolsmoky(at)users.sourceforge.net
7
 * Web   http://www.3dfxzone.it/koolsmoky
8
 *
9
 * this is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License as published by
11
 * the Free Software Foundation; either version 2, or (at your option)
12
 * any later version.
13
 *
14
 * this is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU General Public License
20
 * along with GNU Make; see the file COPYING.  If not, write to
21
 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
22
 */
23

24
#include "TxUtil.h"
25
#include "TxDbg.h"
26
#include <zlib.h>
27
#include <stdlib.h>
28
#ifdef _WIN32
29
#define WIN32_LEAN_AND_MEAN
30
#include <windows.h>
31
#else
32
#include <unistd.h>
33
#endif
34

35
/*
36
 * External libraries
37
 ******************************************************************************/
38
TxLoadLib::TxLoadLib()
39
{
40
#ifdef DXTN_DLL
41
  if (!_dxtnlib)
42
    _dxtnlib = LoadLibrary("dxtn");
43

44
  if (_dxtnlib) {
45
    if (!_tx_compress_dxtn)
46
      _tx_compress_dxtn = (dxtCompressTexFuncExt)DLSYM(_dxtnlib, "tx_compress_dxtn");
47

48
    if (!_tx_compress_fxt1)
49
      _tx_compress_fxt1 = (fxtCompressTexFuncExt)DLSYM(_dxtnlib, "fxt1_encode");
50
  }
51
#else
52
  _tx_compress_dxtn = tx_compress_dxtn;
53
  _tx_compress_fxt1 = fxt1_encode;
54

55
#endif
56
}
57

58
TxLoadLib::~TxLoadLib()
59
{
60
#ifdef DXTN_DLL
61
  /* free dynamic library */
62
  if (_dxtnlib)
63
    FreeLibrary(_dxtnlib);
64
#endif
65

66
}
67

68
fxtCompressTexFuncExt
69
TxLoadLib::getfxtCompressTexFuncExt()
70
{
71
  return _tx_compress_fxt1;
72
}
73

74
dxtCompressTexFuncExt
75
TxLoadLib::getdxtCompressTexFuncExt()
76
{
77
  return _tx_compress_dxtn;
78
}
79

80

81
/*
82
 * Utilities
83
 ******************************************************************************/
84
uint32
85
TxUtil::checksumTx(uint8 *src, int width, int height, uint16 format)
86
{
87
  int dataSize = sizeofTx(width, height, format);
88

89
  /* for now we use adler32 if something else is better
90
   * we can simply swtich later
91
   */
92
  /* return (dataSize ? Adler32(src, dataSize, 1) : 0); */
93

94
  /* zlib crc32 */
95
  return (dataSize ? crc32(crc32(0L, Z_NULL, 0), src, dataSize) : 0);
96
}
97

98
int
99
TxUtil::sizeofTx(int width, int height, uint16 format)
100
{
101
  int dataSize = 0;
102

103
  /* a lookup table for the shifts would be better */
104
  switch (format) {
105
  case GR_TEXFMT_ARGB_CMP_FXT1:
106
    dataSize = (((width + 0x7) & ~0x7) * ((height + 0x3) & ~0x3)) >> 1;
107
    break;
108
  case GR_TEXFMT_ARGB_CMP_DXT1:
109
    dataSize = (((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3)) >> 1;
110
    break;
111
  case GR_TEXFMT_ARGB_CMP_DXT3:
112
  case GR_TEXFMT_ARGB_CMP_DXT5:
113
    dataSize = ((width + 0x3) & ~0x3) * ((height + 0x3) & ~0x3);
114
    break;
115
  case GR_TEXFMT_ALPHA_INTENSITY_44:
116
  case GR_TEXFMT_ALPHA_8:
117
  case GR_TEXFMT_INTENSITY_8:
118
  case GR_TEXFMT_P_8:
119
    dataSize = width * height;
120
    break;
121
  case GR_TEXFMT_ARGB_4444:
122
  case GR_TEXFMT_ARGB_1555:
123
  case GR_TEXFMT_RGB_565:
124
  case GR_TEXFMT_ALPHA_INTENSITY_88:
125
    dataSize = (width * height) << 1;
126
    break;
127
  case GR_TEXFMT_ARGB_8888:
128
    dataSize = (width * height) << 2;
129
    break;
130
  default:
131
    /* unsupported format */
132
    DBG_INFO(80, L"Error: cannot get size. unsupported gfmt:%x\n", format);
133
    ;
134
  }
135

136
  return dataSize;
137
}
138

139
#if 0 /* unused */
140
uint32
141
TxUtil::chkAlpha(uint32* src, int width, int height)
142
{
143
  /* NOTE: _src must be ARGB8888
144
   * return values
145
   * 0x00000000: 8bit alpha
146
   * 0x00000001: 1bit alpha
147
   * 0xff000001: no alpha
148
   */
149

150
  int _size = width * height;
151
  uint32 alpha = 0;
152

153
  __asm {
154
    mov esi, dword ptr [src];
155
    mov ecx, dword ptr [_size];
156
    mov ebx, 0xff000000;
157

158
  tc1_loop:
159
    mov eax, dword ptr [esi];
160
    add esi, 4;
161

162
    and eax, 0xff000000;
163
    jz  alpha1bit;
164
    cmp eax, 0xff000000;
165
    je  alpha1bit;
166
    jmp done;
167

168
  alpha1bit:
169
    and ebx, eax;
170
    dec ecx;
171
    jnz tc1_loop;
172

173
    or  ebx, 0x00000001;
174
    mov dword ptr [alpha], ebx;
175

176
  done:
177
  }
178

179
  return alpha;
180
}
181
#endif
182

183
uint32
184
TxUtil::checksum(uint8 *src, int width, int height, int size, int rowStride)
185
{
186
  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
187
   * any other custom checksum.
188
   * TODO: use *_HIRESTEXTURE option. */
189

190
  if (!src) return 0;
191

192
  return RiceCRC32(src, width, height, size, rowStride);
193
}
194

195
uint64
196
TxUtil::checksum64(uint8 *src, int width, int height, int size, int rowStride, uint8 *palette)
197
{
198
  /* Rice CRC32 for now. We can switch this to Jabo MD5 or
199
   * any other custom checksum.
200
   * TODO: use *_HIRESTEXTURE option. */
201
  /* Returned value is 64bits: hi=palette crc32 low=texture crc32 */
202

203
  if (!src) return 0;
204

205
  uint64 crc64Ret = 0;
206

207
  if (palette) {
208
    uint32 crc32 = 0, cimax = 0;
209
    switch (size & 0xff) {
210
    case 1:
211
      if (RiceCRC32_CI8(src, width, height, size, rowStride, &crc32, &cimax)) {
212
        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 512);
213
        crc64Ret <<= 32;
214
        crc64Ret |= (uint64)crc32;
215
      }
216
      break;
217
    case 0:
218
      if (RiceCRC32_CI4(src, width, height, size, rowStride, &crc32, &cimax)) {
219
        crc64Ret = (uint64)RiceCRC32(palette, cimax + 1, 1, 2, 32);
220
        crc64Ret <<= 32;
221
        crc64Ret |= (uint64)crc32;
222
      }
223
    }
224
  }
225
  if (!crc64Ret) {
226
    crc64Ret = (uint64)RiceCRC32(src, width, height, size, rowStride);
227
  }
228

229
  return crc64Ret;
230
}
231

232
/*
233
** Computes Adler32 checksum for a stream of data.
234
**
235
** From the specification found in RFC 1950: (ZLIB Compressed Data Format
236
** Specification version 3.3)
237
**
238
** ADLER32 (Adler-32 checksum) This contains a checksum value of the
239
** uncompressed data (excluding any dictionary data) computed according to
240
** Adler-32 algorithm. This algorithm is a 32-bit extension and improvement
241
** of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073 standard.
242
**
243
** Adler-32 is composed of two sums accumulated per byte: s1 is the sum of
244
** all bytes, s2 is the sum of all s1 values. Both sums are done modulo
245
** 65521. s1 is initialized to 1, s2 to zero. The Adler-32 checksum is stored
246
** as s2*65536 + s1 in most-significant-byte first (network) order.
247
**
248
** 8.2. The Adler-32 algorithm 
249
**
250
** The Adler-32 algorithm is much faster than the CRC32 algorithm yet still
251
** provides an extremely low probability of undetected errors.
252
**
253
** The modulo on unsigned long accumulators can be delayed for 5552 bytes,
254
** so the modulo operation time is negligible. If the bytes are a, b, c,
255
** the second sum is 3a + 2b + c + 3, and so is position and order sensitive,
256
** unlike the first sum, which is just a checksum. That 65521 is prime is
257
** important to avoid a possible large class of two-byte errors that leave
258
** the check unchanged. (The Fletcher checksum uses 255, which is not prime
259
** and which also makes the Fletcher check insensitive to single byte
260
** changes 0 <-> 255.)
261
**
262
** The sum s1 is initialized to 1 instead of zero to make the length of
263
** the sequence part of s2, so that the length does not have to be checked
264
** separately. (Any sequence of zeroes has a Fletcher checksum of zero.)
265
*/
266

267
uint32
268
TxUtil::Adler32(const uint8* data, int Len, uint32 dwAdler32)
269
{
270
#if 1
271
  /* zlib adler32 */
272
  return adler32(dwAdler32, data, Len);
273
#else
274
  register uint32 s1 = dwAdler32 & 0xFFFF;
275
  register uint32 s2 = (dwAdler32 >> 16) & 0xFFFF;
276
  int k;
277

278
  while (Len > 0) {
279
    /* 5552 is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
280
    k = (Len < 5552 ? Len : 5552);
281
    Len -= k;
282
    while (k--) {
283
      s1 += *data++;
284
      s2 += s1;
285
    }
286
    /* 65521 is the largest prime smaller than 65536 */
287
    s1 %= 65521;
288
    s2 %= 65521;
289
  }
290

291
  return (s2 << 16) | s1;
292
#endif
293
}
294

295
uint32
296
TxUtil::Adler32(const uint8* src, int width, int height, int size, int rowStride)
297
{
298
  int i;
299
  uint32 ret = 1;
300
  uint32 width_in_bytes = width * size;
301

302
  for (i = 0; i < height; i++) {
303
    ret = Adler32(src, width_in_bytes, ret);
304
    src += rowStride;
305
  }
306

307
  return ret;
308
}
309

310
// rotate left
311
template<class T> static T __ROL__(T value, unsigned int count)
312
{
313
  const unsigned int nbits = sizeof(T) * 8;
314
  count %= nbits;
315

316
  T high = value >> (nbits - count);
317
  value <<= count;
318
  value |= high;
319
  return value;
320
}
321

322
/* Rice CRC32 for hires texture packs */
323
/* NOTE: The following is used in Glide64 to calculate the CRC32
324
 * for Rice hires texture packs.
325
 *
326
 * BYTE* addr = (BYTE*)(gfx.RDRAM +
327
 *                     rdp.addr[rdp.tiles[tile].t_mem] +
328
 *                     (rdp.tiles[tile].ul_t * bpl) +
329
 *                     (((rdp.tiles[tile].ul_s<<rdp.tiles[tile].size)+1)>>1));
330
 * RiceCRC32(addr,
331
 *          rdp.tiles[tile].width,
332
 *          rdp.tiles[tile].height,
333
 *          (unsigned short)(rdp.tiles[tile].format << 8 | rdp.tiles[tile].size),
334
 *          bpl);
335
 */
336
uint32
337
TxUtil::RiceCRC32(const uint8* src, int width, int height, int size, int rowStride)
338
{
339
  const uint8_t *row;
340
  uint32_t crc32Ret;
341
  int cur_height;
342
  uint32_t pos;
343
  uint32_t word;
344
  uint32_t word_hash = 0;
345
  uint32_t tmp;
346
  const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
347

348
  row = src;
349
  crc32Ret = 0;
350

351
  for (cur_height = height - 1; cur_height >= 0; cur_height--) {
352
    for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
353
      word = *(uint32_t *)&row[pos];
354
      word_hash = pos ^ word;
355
      tmp = __ROL__(crc32Ret, 4);
356
      crc32Ret = word_hash + tmp;
357
    }
358
    crc32Ret += cur_height ^ word_hash;
359
    row += rowStride;
360
  }
361
  return crc32Ret;
362
}
363

364
boolean
365
TxUtil::RiceCRC32_CI4(const uint8* src, int width, int height, int size, int rowStride,
366
                        uint32* crc32, uint32* cimax)
367
{
368
  const uint8_t *row;
369
  uint32_t crc32Ret;
370
  uint32_t cimaxRet;
371
  int cur_height;
372
  uint32_t pos;
373
  uint32_t word;
374
  uint32_t word_hash = 0;
375
  uint32_t tmp;
376
  const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
377

378
  row = src;
379
  crc32Ret = 0;
380
  cimaxRet = 0;
381

382
  for (cur_height = height - 1; cur_height >= 0; cur_height--) {
383
    for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
384
      word = *(uint32_t *)&row[pos];
385
      if (cimaxRet != 15) {
386
        if ((word & 0xF) >= cimaxRet)
387
          cimaxRet = word & 0xF;
388
        if ((uint32_t)((uint8_t)word >> 4) >= cimaxRet)
389
          cimaxRet = (uint8_t)word >> 4;
390
        if (((word >> 8) & 0xF) >= cimaxRet)
391
          cimaxRet = (word >> 8) & 0xF;
392
        if ((uint32_t)((uint16_t)word >> 12) >= cimaxRet)
393
          cimaxRet = (uint16_t)word >> 12;
394
        if (((word >> 16) & 0xF) >= cimaxRet)
395
          cimaxRet = (word >> 16) & 0xF;
396
        if (((word >> 20) & 0xF) >= cimaxRet)
397
          cimaxRet = (word >> 20) & 0xF;
398
        if (((word >> 24) & 0xF) >= cimaxRet)
399
          cimaxRet = (word >> 24) & 0xF;
400
        if (word >> 28 >= cimaxRet )
401
          cimaxRet = word >> 28;
402
      }
403
      word_hash = pos ^ word;
404
      tmp = __ROL__(crc32Ret, 4);
405
      crc32Ret = word_hash + tmp;
406
    }
407
    crc32Ret += cur_height ^ word_hash;
408
    row += rowStride;
409
  }
410
  *crc32 = crc32Ret;
411
  *cimax = cimaxRet;
412
  return 1;
413
}
414

415
boolean
416
TxUtil::RiceCRC32_CI8(const uint8* src, int width, int height, int size, int rowStride,
417
                      uint32* crc32, uint32* cimax)
418
{
419
  const uint8_t *row;
420
  uint32_t crc32Ret;
421
  uint32_t cimaxRet;
422
  int cur_height;
423
  uint32_t pos;
424
  uint32_t word;
425
  uint32_t word_hash = 0;
426
  uint32_t tmp;
427
  const uint32_t bytes_per_width = ((width << size) + 1) >> 1;
428

429
  row = src;
430
  crc32Ret = 0;
431
  cimaxRet = 0;
432

433
  for (cur_height = height - 1; cur_height >= 0; cur_height--) {
434
    for (pos = bytes_per_width - 4; pos < 0x80000000u; pos -= 4) {
435
      word = *(uint32_t *)&row[pos];
436
      if (cimaxRet != 255) {
437
        if ((uint8_t)word >= cimaxRet)
438
          cimaxRet = (uint8_t)word;
439
        if ((uint32_t)((uint16_t)word >> 8) >= cimaxRet)
440
          cimaxRet = (uint16_t)word >> 8;
441
        if (((word >> 16) & 0xFF) >= cimaxRet)
442
          cimaxRet = (word >> 16) & 0xFF;
443
        if (word >> 24 >= cimaxRet)
444
          cimaxRet = word >> 24;
445
      }
446
      word_hash = pos ^ word;
447
      tmp = __ROL__(crc32Ret, 4);
448
      crc32Ret = word_hash + tmp;
449
    }
450
    crc32Ret += cur_height ^ word_hash;
451
    row += rowStride;
452
  }
453
  *crc32 = crc32Ret;
454
  *cimax = cimaxRet;
455
  return 1;
456
}
457

458
int
459
TxUtil::log2(int num)
460
{
461
#if defined(__GNUC__)
462
  return __builtin_ctz(num);
463
#elif defined(_MSC_VER) && _MSC_VER >= 1400
464
  uint32_t i;
465
  _BitScanForward((DWORD *)&i, num);
466
  return i;
467
#elif defined(__MSC__)
468
  __asm {
469
    mov eax, dword ptr [num];
470
    bsr eax, eax;
471
    mov dword ptr [i], eax;
472
  }
473
#else
474
  switch (num) {
475
    case 1:    return 0;
476
    case 2:    return 1;
477
    case 4:    return 2;
478
    case 8:    return 3;
479
    case 16:   return 4;
480
    case 32:   return 5;
481
    case 64:   return 6;
482
    case 128:  return 7;
483
    case 256:  return 8;
484
    case 512:  return 9;
485
    case 1024:  return 10;
486
    case 2048:  return 11;
487
  }
488
#endif
489
}
490

491
int
492
TxUtil::grLodLog2(int w, int h)
493
{
494
  return (w >= h ? log2(w) : log2(h));
495
}
496

497
int
498
TxUtil::grAspectRatioLog2(int w, int h)
499
{
500
  return (w >= h ? log2(w/h) : -log2(h/w));
501
}
502

503
int
504
TxUtil::getNumberofProcessors()
505
{
506
  int numcore = 1, ret;
507

508
#ifdef _WIN32
509
#ifndef _SC_NPROCESSORS_ONLN
510
  SYSTEM_INFO info;
511
  GetSystemInfo(&info);
512
#define sysconf(a) info.dwNumberOfProcessors
513
#define _SC_NPROCESSORS_ONLN
514
#endif
515
#endif
516
#ifdef _SC_NPROCESSORS_ONLN
517
  ret = sysconf(_SC_NPROCESSORS_CONF);
518
  if (ret >= 1) {
519
    numcore = ret;
520
  }
521
  ret = sysconf(_SC_NPROCESSORS_ONLN);
522
  if (ret < 1) {
523
    numcore = ret;
524
  }
525
#endif
526

527
  return numcore;
528
}
529

530

531
/*
532
 * Memory buffers for texture manipulations
533
 ******************************************************************************/
534
TxMemBuf::TxMemBuf()
535
{
536
  int i;
537
  for (i = 0; i < 2; i++) {
538
    _tex[i] = NULL;
539
    _size[i] = 0;
540
  }
541
}
542

543
TxMemBuf::~TxMemBuf()
544
{
545
  shutdown();
546
}
547

548
boolean
549
TxMemBuf::init(int maxwidth, int maxheight)
550
{
551
  int i;
552
  for (i = 0; i < 2; i++) {
553
    if (!_tex[i]) {
554
      _tex[i] = (uint8 *)malloc(maxwidth * maxheight * 4);
555
      _size[i] = maxwidth * maxheight * 4;
556
    }
557

558
    if (!_tex[i]) {
559
      shutdown();
560
      return 0;
561
    }
562
  }
563
  return 1;
564
}
565

566
void
567
TxMemBuf::shutdown()
568
{
569
  int i;
570
  for (i = 0; i < 2; i++) {
571
    if (_tex[i]) free(_tex[i]);
572
    _tex[i] = NULL;
573
    _size[i] = 0;
574
  }
575
}
576

577
uint8*
578
TxMemBuf::get(unsigned int num)
579
{
580
  return ((num < 2) ? _tex[num] : NULL);
581
}
582

583
uint32
584
TxMemBuf::size_of(unsigned int num)
585
{
586
  return ((num < 2) ? _size[num] : 0);
587
}
588

589
Product

Resources

Company