Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alexbevi
GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-rsp-hle/src/jpeg.c
2 views
1
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2
* Mupen64plus-rsp-hle - jpeg.c *
3
* Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *
4
* Copyright (C) 2012 Bobby Smiles *
5
* Copyright (C) 2009 Richard Goedeken *
6
* Copyright (C) 2002 Hacktarux *
7
* *
8
* This program is free software; you can redistribute it and/or modify *
9
* it under the terms of the GNU General Public License as published by *
10
* the Free Software Foundation; either version 2 of the License, or *
11
* (at your option) any later version. *
12
* *
13
* This program is distributed in the hope that it will be useful, *
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16
* GNU General Public License for more details. *
17
* *
18
* You should have received a copy of the GNU General Public License *
19
* along with this program; if not, write to the *
20
* Free Software Foundation, Inc., *
21
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
22
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
23
24
#include <assert.h>
25
#include <stdlib.h>
26
#include <stdint.h>
27
28
#define M64P_PLUGIN_PROTOTYPES 1
29
#include "m64p_types.h"
30
#include "m64p_plugin.h"
31
#include "hle.h"
32
33
#define SUBBLOCK_SIZE 64
34
35
typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address);
36
typedef void (*std_macroblock_decoder_t)(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
37
38
/* rdram operations */
39
// FIXME: these functions deserve their own module
40
static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count);
41
static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count);
42
static uint32_t rdram_read_u32(uint32_t address);
43
static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count);
44
45
/* standard jpeg ucode decoder */
46
static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line);
47
48
/* helper functions */
49
static uint8_t clamp_u8(int16_t x);
50
static int16_t clamp_s12(int16_t x);
51
static int16_t clamp_s16(int32_t x);
52
static uint16_t clamp_RGBA_component(int16_t x);
53
54
/* pixel conversion & foratting */
55
static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v);
56
static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v);
57
58
/* tile line emitters */
59
static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address);
60
static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address);
61
62
/* macroblocks operations */
63
static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable);
64
static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
65
static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
66
static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
67
static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
68
69
/* subblocks operations */
70
static void TransposeSubBlock(int16_t *dst, const int16_t *src);
71
static void ZigZagSubBlock(int16_t *dst, const int16_t *src);
72
static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table);
73
static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift);
74
static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale);
75
static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift);
76
static void InverseDCT1D(const float * const x, float *dst, unsigned int stride);
77
static void InverseDCTSubBlock(int16_t *dst, const int16_t *src);
78
static void RescaleYSubBlock(int16_t *dst, const int16_t *src);
79
static void RescaleUVSubBlock(int16_t *dst, const int16_t *src);
80
81
/* transposed dequantization table */
82
static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] =
83
{
84
16, 12, 14, 14, 18, 24, 49, 72,
85
11, 12, 13, 17, 22, 35, 64, 92,
86
10, 14, 16, 22, 37, 55, 78, 95,
87
16, 19, 24, 29, 56, 64, 87, 98,
88
24, 26, 40, 51, 68, 81, 103, 112,
89
40, 58, 57, 87, 109, 104, 121, 100,
90
51, 60, 69, 80, 103, 113, 120, 103,
91
61, 55, 56, 62, 77, 92, 101, 99
92
};
93
94
/* zig-zag indices */
95
static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] =
96
{
97
0, 1, 5, 6, 14, 15, 27, 28,
98
2, 4, 7, 13, 16, 26, 29, 42,
99
3, 8, 12, 17, 25, 30, 41, 43,
100
9, 11, 18, 24, 31, 40, 44, 53,
101
10, 19, 23, 32, 39, 45, 52, 54,
102
20, 22, 33, 38, 46, 51, 55, 60,
103
21, 34, 37, 47, 50, 56, 59, 61,
104
35, 36, 48, 49, 57, 58, 62, 63
105
};
106
107
/* transposition indices */
108
static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] =
109
{
110
0, 8, 16, 24, 32, 40, 48, 56,
111
1, 9, 17, 25, 33, 41, 49, 57,
112
2, 10, 18, 26, 34, 42, 50, 58,
113
3, 11, 19, 27, 35, 43, 51, 59,
114
4, 12, 20, 28, 36, 44, 52, 60,
115
5, 13, 21, 29, 37, 45, 53, 61,
116
6, 14, 22, 30, 38, 46, 54, 62,
117
7, 15, 23, 31, 39, 47, 55, 63
118
};
119
120
121
122
/* IDCT related constants
123
* Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */
124
static const float IDCT_C3 = 1.175875602f;
125
static const float IDCT_C6 = 0.541196100f;
126
static const float IDCT_K[10] =
127
{
128
0.765366865f, /* C2-C6 */
129
-1.847759065f, /* -C2-C6 */
130
-0.390180644f, /* C5-C3 */
131
-1.961570561f, /* -C5-C3 */
132
1.501321110f, /* C1+C3-C5-C7 */
133
2.053119869f, /* C1+C3-C5+C7 */
134
3.072711027f, /* C1+C3+C5-C7 */
135
0.298631336f, /* -C1+C3+C5-C7 */
136
-0.899976223f, /* C7-C3 */
137
-2.562915448f /* -C1-C3 */
138
};
139
140
141
/* global functions */
142
143
/***************************************************************************
144
* JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium.
145
**************************************************************************/
146
void jpeg_decode_PS0()
147
{
148
jpeg_decode_std("PS0", DecodeMacroblock3, EmitYUVTileLine);
149
}
150
151
/***************************************************************************
152
* JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and
153
* Pokemon Stadium 2.
154
**************************************************************************/
155
void jpeg_decode_PS()
156
{
157
jpeg_decode_std("PS", DecodeMacroblock2, EmitRGBATileLine);
158
}
159
160
/***************************************************************************
161
* JPEG decoding ucode found in Ogre Battle and Bottom of the 9th.
162
**************************************************************************/
163
void jpeg_decode_OB()
164
{
165
int16_t qtable[SUBBLOCK_SIZE];
166
unsigned int mb;
167
168
int32_t y_dc = 0;
169
int32_t u_dc = 0;
170
int32_t v_dc = 0;
171
172
const OSTask_t * const task = get_task();
173
174
uint32_t address = task->data_ptr;
175
const unsigned int macroblock_count = task->data_size;
176
const int qscale = task->yield_data_size;
177
178
DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d",
179
address,
180
macroblock_count,
181
qscale);
182
183
if (qscale != 0)
184
{
185
if (qscale > 0)
186
{
187
ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale);
188
}
189
else
190
{
191
RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale);
192
}
193
}
194
195
for (mb = 0; mb < macroblock_count; ++mb)
196
{
197
int16_t macroblock[6*SUBBLOCK_SIZE];
198
199
rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE);
200
DecodeMacroblock1(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL);
201
EmitTilesMode2(EmitYUVTileLine, macroblock, address);
202
203
address += (2*6*SUBBLOCK_SIZE);
204
}
205
}
206
207
208
/* local functions */
209
static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line)
210
{
211
int16_t qtables[3][SUBBLOCK_SIZE];
212
unsigned int mb;
213
uint32_t address;
214
uint32_t macroblock_count;
215
uint32_t mode;
216
uint32_t qtableY_ptr;
217
uint32_t qtableU_ptr;
218
uint32_t qtableV_ptr;
219
unsigned int subblock_count;
220
unsigned int macroblock_size;
221
int16_t *macroblock;
222
const OSTask_t * const task = get_task();
223
224
if (task->flags & 0x1)
225
{
226
DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version);
227
return;
228
}
229
230
address = rdram_read_u32(task->data_ptr);
231
macroblock_count = rdram_read_u32(task->data_ptr + 4);
232
mode = rdram_read_u32(task->data_ptr + 8);
233
qtableY_ptr = rdram_read_u32(task->data_ptr + 12);
234
qtableU_ptr = rdram_read_u32(task->data_ptr + 16);
235
qtableV_ptr = rdram_read_u32(task->data_ptr + 20);
236
237
DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x",
238
version,
239
address,
240
macroblock_count,
241
mode,
242
qtableY_ptr,
243
qtableU_ptr,
244
qtableV_ptr);
245
246
if (mode != 0 && mode != 2)
247
{
248
DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode);
249
return;
250
}
251
252
subblock_count = mode + 4;
253
macroblock_size = 2*subblock_count*SUBBLOCK_SIZE;
254
255
rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE);
256
rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE);
257
rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE);
258
259
macroblock = malloc(sizeof(*macroblock) * macroblock_size);
260
if (!macroblock)
261
{
262
DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: could not allocate macroblock", version);
263
return;
264
}
265
266
for (mb = 0; mb < macroblock_count; ++mb)
267
{
268
rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size >> 1);
269
decode_mb(macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables);
270
271
if (mode == 0)
272
{
273
EmitTilesMode0(emit_line, macroblock, address);
274
}
275
else
276
{
277
EmitTilesMode2(emit_line, macroblock, address);
278
}
279
280
address += macroblock_size;
281
}
282
free(macroblock);
283
}
284
285
static uint8_t clamp_u8(int16_t x)
286
{
287
return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x;
288
}
289
290
static int16_t clamp_s12(int16_t x)
291
{
292
if (x < -0x800) { x = -0x800; } else if (x > 0x7f0) { x = 0x7f0; }
293
return x;
294
}
295
296
static int16_t clamp_s16(int32_t x)
297
{
298
if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; }
299
return x;
300
}
301
302
static uint16_t clamp_RGBA_component(int16_t x)
303
{
304
if (x > 0xff0) { x = 0xff0; } else if (x < 0) { x = 0; }
305
return (x & 0xf80);
306
}
307
308
static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v)
309
{
310
return (uint32_t)clamp_u8(u) << 24
311
| (uint32_t)clamp_u8(y1) << 16
312
| (uint32_t)clamp_u8(v) << 8
313
| (uint32_t)clamp_u8(y2);
314
}
315
316
static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v)
317
{
318
const float fY = (float)y + 2048.0f;
319
const float fU = (float)u;
320
const float fV = (float)v;
321
322
const uint16_t r = clamp_RGBA_component((int16_t)(fY + 1.4025*fV));
323
const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443*fU - 0.7144*fV));
324
const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729*fU ));
325
326
return (r << 4) | (g >> 1) | (b >> 6) | 1;
327
}
328
329
static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address)
330
{
331
uint32_t uyvy[8];
332
333
const int16_t * const v = u + SUBBLOCK_SIZE;
334
const int16_t * const y2 = y + SUBBLOCK_SIZE;
335
336
uyvy[0] = GetUYVY(y[0], y[1], u[0], v[0]);
337
uyvy[1] = GetUYVY(y[2], y[3], u[1], v[1]);
338
uyvy[2] = GetUYVY(y[4], y[5], u[2], v[2]);
339
uyvy[3] = GetUYVY(y[6], y[7], u[3], v[3]);
340
uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]);
341
uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]);
342
uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]);
343
uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]);
344
345
rdram_write_many_u32(uyvy, address, 8);
346
}
347
348
static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address)
349
{
350
uint16_t rgba[16];
351
352
const int16_t * const v = u + SUBBLOCK_SIZE;
353
const int16_t * const y2 = y + SUBBLOCK_SIZE;
354
355
rgba[0] = GetRGBA(y[0], u[0], v[0]);
356
rgba[1] = GetRGBA(y[1], u[0], v[0]);
357
rgba[2] = GetRGBA(y[2], u[1], v[1]);
358
rgba[3] = GetRGBA(y[3], u[1], v[1]);
359
rgba[4] = GetRGBA(y[4], u[2], v[2]);
360
rgba[5] = GetRGBA(y[5], u[2], v[2]);
361
rgba[6] = GetRGBA(y[6], u[3], v[3]);
362
rgba[7] = GetRGBA(y[7], u[3], v[3]);
363
rgba[8] = GetRGBA(y2[0], u[4], v[4]);
364
rgba[9] = GetRGBA(y2[1], u[4], v[4]);
365
rgba[10] = GetRGBA(y2[2], u[5], v[5]);
366
rgba[11] = GetRGBA(y2[3], u[5], v[5]);
367
rgba[12] = GetRGBA(y2[4], u[6], v[6]);
368
rgba[13] = GetRGBA(y2[5], u[6], v[6]);
369
rgba[14] = GetRGBA(y2[6], u[7], v[7]);
370
rgba[15] = GetRGBA(y2[7], u[7], v[7]);
371
372
rdram_write_many_u16(rgba, address, 16);
373
}
374
375
static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
376
{
377
unsigned int i;
378
379
unsigned int y_offset = 0;
380
unsigned int u_offset = 2*SUBBLOCK_SIZE;
381
382
for (i = 0; i < 8; ++i)
383
{
384
emit_line(&macroblock[y_offset], &macroblock[u_offset], address);
385
386
y_offset += 8;
387
u_offset += 8;
388
address += 32;
389
}
390
}
391
392
static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
393
{
394
unsigned int i;
395
396
unsigned int y_offset = 0;
397
unsigned int u_offset = 4*SUBBLOCK_SIZE;
398
399
for (i = 0; i < 8; ++i)
400
{
401
emit_line(&macroblock[y_offset], &macroblock[u_offset], address);
402
emit_line(&macroblock[y_offset + 8], &macroblock[u_offset], address + 32);
403
404
y_offset += (i == 3) ? SUBBLOCK_SIZE+16 : 16;
405
u_offset += 8;
406
address += 64;
407
}
408
}
409
410
static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable)
411
{
412
int sb;
413
414
for (sb = 0; sb < 6; ++sb)
415
{
416
int16_t tmp_sb[SUBBLOCK_SIZE];
417
418
/* update DC */
419
int32_t dc = (int32_t)macroblock[0];
420
switch(sb)
421
{
422
case 0: case 1: case 2: case 3:
423
*y_dc += dc; macroblock[0] = *y_dc & 0xffff; break;
424
case 4: *u_dc += dc; macroblock[0] = *u_dc & 0xffff; break;
425
case 5: *v_dc += dc; macroblock[0] = *v_dc & 0xffff; break;
426
}
427
428
ZigZagSubBlock(tmp_sb, macroblock);
429
if (qtable != NULL) { MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); }
430
TransposeSubBlock(macroblock, tmp_sb);
431
InverseDCTSubBlock(macroblock, macroblock);
432
433
macroblock += SUBBLOCK_SIZE;
434
}
435
}
436
437
static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
438
{
439
unsigned int sb;
440
unsigned int q = 0;
441
442
for (sb = 0; sb < subblock_count; ++sb)
443
{
444
int16_t tmp_sb[SUBBLOCK_SIZE];
445
const int isChromaSubBlock = (subblock_count - sb <= 2);
446
447
if (isChromaSubBlock) { ++q; }
448
449
MultSubBlocks(macroblock, macroblock, qtables[q], 4);
450
ZigZagSubBlock(tmp_sb, macroblock);
451
InverseDCTSubBlock(macroblock, tmp_sb);
452
453
macroblock += SUBBLOCK_SIZE;
454
}
455
456
}
457
458
static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
459
{
460
unsigned int sb;
461
unsigned int q = 0;
462
463
for (sb = 0; sb < subblock_count; ++sb)
464
{
465
int16_t tmp_sb[SUBBLOCK_SIZE];
466
const int isChromaSubBlock = (subblock_count - sb <= 2);
467
468
if (isChromaSubBlock) { ++q; }
469
470
MultSubBlocks(macroblock, macroblock, qtables[q], 4);
471
ZigZagSubBlock(tmp_sb, macroblock);
472
InverseDCTSubBlock(macroblock, tmp_sb);
473
474
if (isChromaSubBlock)
475
{
476
RescaleUVSubBlock(macroblock, macroblock);
477
}
478
else
479
{
480
RescaleYSubBlock(macroblock, macroblock);
481
}
482
483
macroblock += SUBBLOCK_SIZE;
484
}
485
}
486
487
static void TransposeSubBlock(int16_t *dst, const int16_t *src)
488
{
489
ReorderSubBlock(dst, src, TRANSPOSE_TABLE);
490
}
491
492
static void ZigZagSubBlock(int16_t *dst, const int16_t *src)
493
{
494
ReorderSubBlock(dst, src, ZIGZAG_TABLE);
495
}
496
497
static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table)
498
{
499
unsigned int i;
500
501
/* source and destination sublocks cannot overlap */
502
assert(abs(dst - src) > SUBBLOCK_SIZE);
503
504
for (i = 0; i < SUBBLOCK_SIZE; ++i)
505
{
506
dst[i] = src[table[i]];
507
}
508
}
509
510
static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift)
511
{
512
unsigned int i;
513
514
for (i = 0; i < SUBBLOCK_SIZE; ++i)
515
{
516
int32_t v = src1[i] * src2[i];
517
dst[i] = clamp_s16(v) << shift;
518
}
519
}
520
521
static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale)
522
{
523
unsigned int i;
524
525
for (i = 0; i < SUBBLOCK_SIZE; ++i)
526
{
527
int32_t v = src[i] * scale;
528
dst[i] = clamp_s16(v);
529
}
530
}
531
532
static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift)
533
{
534
unsigned int i;
535
536
for (i = 0; i < SUBBLOCK_SIZE; ++i)
537
{
538
dst[i] = src[i] >> shift;
539
}
540
}
541
542
/***************************************************************************
543
* Fast 2D IDCT using separable formulation and normalization
544
* Computations use single precision floats
545
* Implementation based on Wikipedia :
546
* http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te
547
**************************************************************************/
548
static void InverseDCT1D(const float * const x, float *dst, unsigned int stride)
549
{
550
float e[4];
551
float f[4];
552
float x26, x1357, x15, x37, x17, x35;
553
554
x15 = IDCT_K[2] * (x[1] + x[5]);
555
x37 = IDCT_K[3] * (x[3] + x[7]);
556
x17 = IDCT_K[8] * (x[1] + x[7]);
557
x35 = IDCT_K[9] * (x[3] + x[5]);
558
x1357 = IDCT_C3 * (x[1] + x[3] + x[5] + x[7]);
559
x26 = IDCT_C6 * (x[2] + x[6]);
560
561
f[0] = x[0] + x[4];
562
f[1] = x[0] - x[4];
563
f[2] = x26 + IDCT_K[0]*x[2];
564
f[3] = x26 + IDCT_K[1]*x[6];
565
566
e[0] = x1357 + x15 + IDCT_K[4]*x[1] + x17;
567
e[1] = x1357 + x37 + IDCT_K[6]*x[3] + x35;
568
e[2] = x1357 + x15 + IDCT_K[5]*x[5] + x35;
569
e[3] = x1357 + x37 + IDCT_K[7]*x[7] + x17;
570
571
*dst = f[0] + f[2] + e[0]; dst += stride;
572
*dst = f[1] + f[3] + e[1]; dst += stride;
573
*dst = f[1] - f[3] + e[2]; dst += stride;
574
*dst = f[0] - f[2] + e[3]; dst += stride;
575
*dst = f[0] - f[2] - e[3]; dst += stride;
576
*dst = f[1] - f[3] - e[2]; dst += stride;
577
*dst = f[1] + f[3] - e[1]; dst += stride;
578
*dst = f[0] + f[2] - e[0]; dst += stride;
579
}
580
581
static void InverseDCTSubBlock(int16_t *dst, const int16_t *src)
582
{
583
float x[8];
584
float block[SUBBLOCK_SIZE];
585
unsigned int i, j;
586
587
/* idct 1d on rows (+transposition) */
588
for (i = 0; i < 8; ++i)
589
{
590
for (j = 0; j < 8; ++j)
591
{
592
x[j] = (float)src[i*8+j];
593
}
594
595
InverseDCT1D(x, &block[i], 8);
596
}
597
598
/* idct 1d on columns (thanks to previous transposition) */
599
for (i = 0; i < 8; ++i)
600
{
601
InverseDCT1D(&block[i*8], x, 1);
602
603
/* C4 = 1 normalization implies a division by 8 */
604
for (j = 0; j < 8; ++j)
605
{
606
dst[i+j*8] = (int16_t)x[j] >> 3;
607
}
608
}
609
}
610
611
static void RescaleYSubBlock(int16_t *dst, const int16_t *src)
612
{
613
unsigned int i;
614
615
for (i = 0; i < SUBBLOCK_SIZE; ++i)
616
{
617
dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10;
618
}
619
}
620
621
static void RescaleUVSubBlock(int16_t *dst, const int16_t *src)
622
{
623
unsigned int i;
624
625
for (i = 0; i < SUBBLOCK_SIZE; ++i)
626
{
627
dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80;
628
}
629
}
630
631
632
633
/* FIXME: assume presence of expansion pack */
634
#define MEMMASK 0x7fffff
635
636
static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count)
637
{
638
while (count != 0)
639
{
640
uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK];
641
s <<= 8;
642
s |= rsp.RDRAM[((address++)^S8) & MEMMASK];
643
644
*(dst++) = s;
645
646
--count;
647
}
648
}
649
650
static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count)
651
{
652
while (count != 0)
653
{
654
rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
655
rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
656
657
--count;
658
}
659
}
660
661
static uint32_t rdram_read_u32(uint32_t address)
662
{
663
uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
664
r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
665
r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
666
r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK];
667
668
return r;
669
}
670
671
static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count)
672
{
673
while (count != 0)
674
{
675
rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24);
676
rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16);
677
rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
678
rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
679
680
--count;
681
}
682
}
683
684
685