CoCalc -- jpeg.c

GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-rsp-hle/src/jpeg.c
² views
1
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2
 *   Mupen64plus-rsp-hle - jpeg.c                                          *
3
 *   Mupen64Plus homepage: http://code.google.com/p/mupen64plus/           *
4
 *   Copyright (C) 2012 Bobby Smiles                                       *
5
 *   Copyright (C) 2009 Richard Goedeken                                   *
6
 *   Copyright (C) 2002 Hacktarux                                          *
7
 *                                                                         *
8
 *   This program is free software; you can redistribute it and/or modify  *
9
 *   it under the terms of the GNU General Public License as published by  *
10
 *   the Free Software Foundation; either version 2 of the License, or     *
11
 *   (at your option) any later version.                                   *
12
 *                                                                         *
13
 *   This program is distributed in the hope that it will be useful,       *
14
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
15
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
16
 *   GNU General Public License for more details.                          *
17
 *                                                                         *
18
 *   You should have received a copy of the GNU General Public License     *
19
 *   along with this program; if not, write to the                         *
20
 *   Free Software Foundation, Inc.,                                       *
21
 *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
22
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
23

24
#include <assert.h>
25
#include <stdlib.h>
26
#include <stdint.h>
27

28
#define M64P_PLUGIN_PROTOTYPES 1
29
#include "m64p_types.h"
30
#include "m64p_plugin.h"
31
#include "hle.h"
32

33
#define SUBBLOCK_SIZE 64
34

35
typedef void (*tile_line_emitter_t)(const int16_t *y, const int16_t *u, uint32_t address);
36
typedef void (*std_macroblock_decoder_t)(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
37

38
/* rdram operations */
39
// FIXME: these functions deserve their own module
40
static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count);
41
static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count);
42
static uint32_t rdram_read_u32(uint32_t address);
43
static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count);
44

45
/* standard jpeg ucode decoder */
46
static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line);
47

48
/* helper functions */
49
static uint8_t clamp_u8(int16_t x);
50
static int16_t clamp_s12(int16_t x);
51
static int16_t clamp_s16(int32_t x);
52
static uint16_t clamp_RGBA_component(int16_t x);
53

54
/* pixel conversion & foratting */
55
static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v);
56
static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v);
57

58
/* tile line emitters */
59
static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address);
60
static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address);
61

62
/* macroblocks operations */
63
static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable);
64
static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
65
static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE]);
66
static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
67
static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address);
68

69
/* subblocks operations */
70
static void TransposeSubBlock(int16_t *dst, const int16_t *src);
71
static void ZigZagSubBlock(int16_t *dst, const int16_t *src);
72
static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table);
73
static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift);
74
static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale);
75
static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift);
76
static void InverseDCT1D(const float * const x, float *dst, unsigned int stride);
77
static void InverseDCTSubBlock(int16_t *dst, const int16_t *src);
78
static void RescaleYSubBlock(int16_t *dst, const int16_t *src);
79
static void RescaleUVSubBlock(int16_t *dst, const int16_t *src);
80

81
/* transposed dequantization table */
82
static const int16_t DEFAULT_QTABLE[SUBBLOCK_SIZE] =
83
{
84
    16, 12, 14, 14,  18,  24,  49,  72,
85
    11, 12, 13, 17,  22,  35,  64,  92,
86
    10, 14, 16, 22,  37,  55,  78,  95,
87
    16, 19, 24, 29,  56,  64,  87,  98,
88
    24, 26, 40, 51,  68,  81, 103, 112,
89
    40, 58, 57, 87, 109, 104, 121, 100,
90
    51, 60, 69, 80, 103, 113, 120, 103,
91
    61, 55, 56, 62,  77,  92, 101,  99
92
};
93

94
/* zig-zag indices */
95
static const unsigned int ZIGZAG_TABLE[SUBBLOCK_SIZE] =
96
{
97
     0,  1,  5,  6, 14, 15, 27, 28,
98
     2,  4,  7, 13, 16, 26, 29, 42,
99
     3,  8, 12, 17, 25, 30, 41, 43,
100
     9, 11, 18, 24, 31, 40, 44, 53,
101
    10, 19, 23, 32, 39, 45, 52, 54,
102
    20, 22, 33, 38, 46, 51, 55, 60,
103
    21, 34, 37, 47, 50, 56, 59, 61,
104
    35, 36, 48, 49, 57, 58, 62, 63
105
};
106

107
/* transposition indices */
108
static const unsigned int TRANSPOSE_TABLE[SUBBLOCK_SIZE] =
109
{
110
    0,  8, 16, 24, 32, 40, 48, 56,
111
    1,  9, 17, 25, 33, 41, 49, 57,
112
    2, 10, 18, 26, 34, 42, 50, 58,
113
    3, 11, 19, 27, 35, 43, 51, 59,
114
    4, 12, 20, 28, 36, 44, 52, 60,
115
    5, 13, 21, 29, 37, 45, 53, 61,
116
    6, 14, 22, 30, 38, 46, 54, 62,
117
    7, 15, 23, 31, 39, 47, 55, 63
118
};
119

120

121

122
/* IDCT related constants
123
 * Cn = alpha * cos(n * PI / 16) (alpha is chosen such as C4 = 1) */
124
static const float IDCT_C3 = 1.175875602f;
125
static const float IDCT_C6 = 0.541196100f;
126
static const float IDCT_K[10] =
127
{
128
  0.765366865f,   /*  C2-C6         */
129
 -1.847759065f,   /* -C2-C6         */
130
 -0.390180644f,   /*  C5-C3         */
131
 -1.961570561f,   /* -C5-C3         */
132
  1.501321110f,   /*  C1+C3-C5-C7   */
133
  2.053119869f,   /*  C1+C3-C5+C7   */
134
  3.072711027f,   /*  C1+C3+C5-C7   */
135
  0.298631336f,   /* -C1+C3+C5-C7   */
136
 -0.899976223f,   /*  C7-C3         */
137
 -2.562915448f    /* -C1-C3         */
138
};
139

140

141
/* global functions */
142

143
/***************************************************************************
144
 * JPEG decoding ucode found in Japanese exclusive version of Pokemon Stadium.
145
 **************************************************************************/
146
void jpeg_decode_PS0()
147
{
148
    jpeg_decode_std("PS0", DecodeMacroblock3, EmitYUVTileLine);
149
}
150

151
/***************************************************************************
152
 * JPEG decoding ucode found in Ocarina of Time, Pokemon Stadium 1 and
153
 * Pokemon Stadium 2.
154
 **************************************************************************/
155
void jpeg_decode_PS()
156
{
157
    jpeg_decode_std("PS", DecodeMacroblock2, EmitRGBATileLine);
158
}
159

160
/***************************************************************************
161
 * JPEG decoding ucode found in Ogre Battle and Bottom of the 9th.
162
 **************************************************************************/
163
void jpeg_decode_OB()
164
{
165
    int16_t qtable[SUBBLOCK_SIZE];
166
    unsigned int mb;
167

168
    int32_t y_dc = 0;
169
    int32_t u_dc = 0;
170
    int32_t v_dc = 0;
171
    
172
    const OSTask_t * const task = get_task();
173

174
    uint32_t           address          = task->data_ptr;
175
    const unsigned int macroblock_count = task->data_size;
176
    const int          qscale           = task->yield_data_size;
177

178
    DebugMessage(M64MSG_VERBOSE, "jpeg_decode_OB: *buffer=%x, #MB=%d, qscale=%d",
179
            address,
180
            macroblock_count,
181
            qscale);
182

183
    if (qscale != 0)
184
    {
185
        if (qscale > 0)
186
        {
187
            ScaleSubBlock(qtable, DEFAULT_QTABLE, qscale);
188
        }
189
        else
190
        {
191
            RShiftSubBlock(qtable, DEFAULT_QTABLE, -qscale);
192
        }
193
    }
194

195
    for (mb = 0; mb < macroblock_count; ++mb)
196
    {
197
        int16_t macroblock[6*SUBBLOCK_SIZE];
198

199
        rdram_read_many_u16((uint16_t*)macroblock, address, 6*SUBBLOCK_SIZE);
200
        DecodeMacroblock1(macroblock, &y_dc, &u_dc, &v_dc, (qscale != 0) ? qtable : NULL);
201
        EmitTilesMode2(EmitYUVTileLine, macroblock, address);
202

203
        address += (2*6*SUBBLOCK_SIZE);
204
    }
205
}
206

207

208
/* local functions */
209
static void jpeg_decode_std(const char * const version, const std_macroblock_decoder_t decode_mb, const tile_line_emitter_t emit_line)
210
{
211
    int16_t qtables[3][SUBBLOCK_SIZE];
212
    unsigned int mb;
213
    uint32_t address;
214
    uint32_t macroblock_count;
215
    uint32_t mode;
216
    uint32_t qtableY_ptr;
217
    uint32_t qtableU_ptr;
218
    uint32_t qtableV_ptr;
219
    unsigned int subblock_count;
220
    unsigned int macroblock_size;
221
    int16_t *macroblock;
222
    const OSTask_t * const task = get_task();
223

224
    if (task->flags & 0x1)
225
    {
226
        DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: task yielding not implemented", version);
227
        return;
228
    }
229

230
    address          = rdram_read_u32(task->data_ptr);
231
    macroblock_count = rdram_read_u32(task->data_ptr + 4);
232
    mode             = rdram_read_u32(task->data_ptr + 8);
233
    qtableY_ptr      = rdram_read_u32(task->data_ptr + 12);
234
    qtableU_ptr      = rdram_read_u32(task->data_ptr + 16);
235
    qtableV_ptr      = rdram_read_u32(task->data_ptr + 20);
236

237
    DebugMessage(M64MSG_VERBOSE, "jpeg_decode_%s: *buffer=%x, #MB=%d, mode=%d, *Qy=%x, *Qu=%x, *Qv=%x",
238
            version,
239
            address,
240
            macroblock_count,
241
            mode,
242
            qtableY_ptr,
243
            qtableU_ptr,
244
            qtableV_ptr);
245

246
    if (mode != 0 && mode != 2)
247
    {
248
        DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: invalid mode %d", version, mode);
249
        return;
250
    }
251
    
252
    subblock_count = mode + 4;
253
    macroblock_size = 2*subblock_count*SUBBLOCK_SIZE;
254

255
    rdram_read_many_u16((uint16_t*)qtables[0], qtableY_ptr, SUBBLOCK_SIZE);
256
    rdram_read_many_u16((uint16_t*)qtables[1], qtableU_ptr, SUBBLOCK_SIZE);
257
    rdram_read_many_u16((uint16_t*)qtables[2], qtableV_ptr, SUBBLOCK_SIZE);
258

259
    macroblock = malloc(sizeof(*macroblock) * macroblock_size);
260
    if (!macroblock)
261
    {
262
        DebugMessage(M64MSG_WARNING, "jpeg_decode_%s: could not allocate macroblock", version);
263
        return;
264
    }
265

266
    for (mb = 0; mb < macroblock_count; ++mb)
267
    {
268
        rdram_read_many_u16((uint16_t*)macroblock, address, macroblock_size >> 1);
269
        decode_mb(macroblock, subblock_count, (const int16_t (*)[SUBBLOCK_SIZE])qtables);
270

271
        if (mode == 0)
272
        {
273
            EmitTilesMode0(emit_line, macroblock, address);
274
        }
275
        else
276
        {
277
            EmitTilesMode2(emit_line, macroblock, address);
278
        }
279

280
        address += macroblock_size;
281
    }
282
    free(macroblock);
283
}
284

285
static uint8_t clamp_u8(int16_t x)
286
{
287
    return (x & (0xff00)) ? ((-x) >> 15) & 0xff : x;
288
}
289

290
static int16_t clamp_s12(int16_t x)
291
{
292
    if (x < -0x800) { x = -0x800; } else if (x > 0x7f0) { x = 0x7f0; }
293
    return x;
294
}
295

296
static int16_t clamp_s16(int32_t x)
297
{
298
    if (x > 32767) { x = 32767; } else if (x < -32768) { x = -32768; }
299
    return x;
300
}
301

302
static uint16_t clamp_RGBA_component(int16_t x)
303
{
304
    if (x > 0xff0) { x = 0xff0; } else if (x < 0) { x = 0; }
305
    return (x & 0xf80);
306
}
307

308
static uint32_t GetUYVY(int16_t y1, int16_t y2, int16_t u, int16_t v)
309
{
310
    return (uint32_t)clamp_u8(u)  << 24
311
        |  (uint32_t)clamp_u8(y1) << 16
312
        |  (uint32_t)clamp_u8(v)  << 8
313
        |  (uint32_t)clamp_u8(y2);
314
}
315

316
static uint16_t GetRGBA(int16_t y, int16_t u, int16_t v)
317
{
318
    const float fY = (float)y + 2048.0f;
319
    const float fU = (float)u;
320
    const float fV = (float)v;
321

322
    const uint16_t r = clamp_RGBA_component((int16_t)(fY             + 1.4025*fV));
323
    const uint16_t g = clamp_RGBA_component((int16_t)(fY - 0.3443*fU - 0.7144*fV));
324
    const uint16_t b = clamp_RGBA_component((int16_t)(fY + 1.7729*fU            ));
325

326
    return (r << 4) | (g >> 1) | (b >> 6) | 1;
327
}
328

329
static void EmitYUVTileLine(const int16_t *y, const int16_t *u, uint32_t address)
330
{
331
    uint32_t uyvy[8];
332

333
    const int16_t * const v  = u + SUBBLOCK_SIZE;
334
    const int16_t * const y2 = y + SUBBLOCK_SIZE;
335

336
    uyvy[0] = GetUYVY(y[0],  y[1],  u[0], v[0]);
337
    uyvy[1] = GetUYVY(y[2],  y[3],  u[1], v[1]);
338
    uyvy[2] = GetUYVY(y[4],  y[5],  u[2], v[2]);
339
    uyvy[3] = GetUYVY(y[6],  y[7],  u[3], v[3]);
340
    uyvy[4] = GetUYVY(y2[0], y2[1], u[4], v[4]);
341
    uyvy[5] = GetUYVY(y2[2], y2[3], u[5], v[5]);
342
    uyvy[6] = GetUYVY(y2[4], y2[5], u[6], v[6]);
343
    uyvy[7] = GetUYVY(y2[6], y2[7], u[7], v[7]);
344

345
    rdram_write_many_u32(uyvy, address, 8);
346
}
347

348
static void EmitRGBATileLine(const int16_t *y, const int16_t *u, uint32_t address)
349
{
350
    uint16_t rgba[16];
351

352
    const int16_t * const v  = u + SUBBLOCK_SIZE;
353
    const int16_t * const y2 = y + SUBBLOCK_SIZE;
354

355
    rgba[0]  = GetRGBA(y[0],  u[0], v[0]);
356
    rgba[1]  = GetRGBA(y[1],  u[0], v[0]);
357
    rgba[2]  = GetRGBA(y[2],  u[1], v[1]);
358
    rgba[3]  = GetRGBA(y[3],  u[1], v[1]);
359
    rgba[4]  = GetRGBA(y[4],  u[2], v[2]);
360
    rgba[5]  = GetRGBA(y[5],  u[2], v[2]);
361
    rgba[6]  = GetRGBA(y[6],  u[3], v[3]);
362
    rgba[7]  = GetRGBA(y[7],  u[3], v[3]);
363
    rgba[8]  = GetRGBA(y2[0], u[4], v[4]);
364
    rgba[9]  = GetRGBA(y2[1], u[4], v[4]);
365
    rgba[10] = GetRGBA(y2[2], u[5], v[5]);
366
    rgba[11] = GetRGBA(y2[3], u[5], v[5]);
367
    rgba[12] = GetRGBA(y2[4], u[6], v[6]);
368
    rgba[13] = GetRGBA(y2[5], u[6], v[6]);
369
    rgba[14] = GetRGBA(y2[6], u[7], v[7]);
370
    rgba[15] = GetRGBA(y2[7], u[7], v[7]);
371

372
    rdram_write_many_u16(rgba, address, 16);
373
}
374

375
static void EmitTilesMode0(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
376
{
377
    unsigned int i;
378

379
    unsigned int y_offset = 0;
380
    unsigned int u_offset = 2*SUBBLOCK_SIZE;
381

382
    for (i = 0; i < 8; ++i)
383
    {
384
        emit_line(&macroblock[y_offset], &macroblock[u_offset], address);
385

386
        y_offset += 8;
387
        u_offset += 8;
388
        address += 32;
389
    }
390
}
391

392
static void EmitTilesMode2(const tile_line_emitter_t emit_line, const int16_t *macroblock, uint32_t address)
393
{
394
    unsigned int i;
395

396
    unsigned int y_offset = 0;
397
    unsigned int u_offset = 4*SUBBLOCK_SIZE;
398

399
    for (i = 0; i < 8; ++i)
400
    {
401
        emit_line(&macroblock[y_offset],     &macroblock[u_offset], address);
402
        emit_line(&macroblock[y_offset + 8], &macroblock[u_offset], address + 32);
403

404
        y_offset += (i == 3) ? SUBBLOCK_SIZE+16 : 16;
405
        u_offset += 8;
406
        address += 64;
407
    }
408
}
409

410
static void DecodeMacroblock1(int16_t *macroblock, int32_t *y_dc, int32_t *u_dc, int32_t *v_dc, const int16_t *qtable)
411
{
412
    int sb;
413

414
    for (sb = 0; sb < 6; ++sb)
415
    {
416
        int16_t tmp_sb[SUBBLOCK_SIZE];
417

418
        /* update DC */
419
        int32_t dc = (int32_t)macroblock[0];
420
        switch(sb)
421
        {
422
        case 0: case 1: case 2: case 3:
423
                *y_dc += dc; macroblock[0] = *y_dc & 0xffff; break;
424
        case 4: *u_dc += dc; macroblock[0] = *u_dc & 0xffff; break;
425
        case 5: *v_dc += dc; macroblock[0] = *v_dc & 0xffff; break;
426
        }
427

428
        ZigZagSubBlock(tmp_sb, macroblock);
429
        if (qtable != NULL) { MultSubBlocks(tmp_sb, tmp_sb, qtable, 0); }
430
        TransposeSubBlock(macroblock, tmp_sb);
431
        InverseDCTSubBlock(macroblock, macroblock);
432
        
433
        macroblock += SUBBLOCK_SIZE;
434
    }
435
}
436

437
static void DecodeMacroblock2(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
438
{
439
    unsigned int sb;
440
    unsigned int q = 0;
441

442
    for (sb = 0; sb < subblock_count; ++sb)
443
    {
444
        int16_t tmp_sb[SUBBLOCK_SIZE];
445
        const int isChromaSubBlock = (subblock_count - sb <= 2);
446

447
        if (isChromaSubBlock) { ++q; }
448

449
        MultSubBlocks(macroblock, macroblock, qtables[q], 4);
450
        ZigZagSubBlock(tmp_sb, macroblock);
451
        InverseDCTSubBlock(macroblock, tmp_sb);
452

453
        macroblock += SUBBLOCK_SIZE;
454
    }
455

456
}
457

458
static void DecodeMacroblock3(int16_t *macroblock, unsigned int subblock_count, const int16_t qtables[3][SUBBLOCK_SIZE])
459
{
460
    unsigned int sb;
461
    unsigned int q = 0;
462

463
    for (sb = 0; sb < subblock_count; ++sb)
464
    {
465
        int16_t tmp_sb[SUBBLOCK_SIZE];
466
        const int isChromaSubBlock = (subblock_count - sb <= 2);
467

468
        if (isChromaSubBlock) { ++q; }
469

470
        MultSubBlocks(macroblock, macroblock, qtables[q], 4);
471
        ZigZagSubBlock(tmp_sb, macroblock);
472
        InverseDCTSubBlock(macroblock, tmp_sb);
473

474
        if (isChromaSubBlock)
475
        {
476
            RescaleUVSubBlock(macroblock, macroblock);
477
        }
478
        else
479
        {
480
            RescaleYSubBlock(macroblock, macroblock);
481
        }
482

483
        macroblock += SUBBLOCK_SIZE;
484
    }
485
}
486

487
static void TransposeSubBlock(int16_t *dst, const int16_t *src)
488
{
489
    ReorderSubBlock(dst, src, TRANSPOSE_TABLE);
490
}
491

492
static void ZigZagSubBlock(int16_t *dst, const int16_t *src)
493
{
494
    ReorderSubBlock(dst, src, ZIGZAG_TABLE);
495
}
496

497
static void ReorderSubBlock(int16_t *dst, const int16_t *src, const unsigned int *table)
498
{
499
    unsigned int i;
500

501
    /* source and destination sublocks cannot overlap */
502
    assert(abs(dst - src) > SUBBLOCK_SIZE);
503

504
    for (i = 0; i < SUBBLOCK_SIZE; ++i)
505
    {
506
        dst[i] = src[table[i]];
507
    }
508
}
509

510
static void MultSubBlocks(int16_t *dst, const int16_t *src1, const int16_t *src2, unsigned int shift)
511
{
512
    unsigned int i;
513

514
    for (i = 0; i < SUBBLOCK_SIZE; ++i)
515
    {
516
        int32_t v = src1[i] * src2[i];
517
        dst[i] = clamp_s16(v) << shift;
518
    }
519
}
520

521
static void ScaleSubBlock(int16_t *dst, const int16_t *src, int16_t scale)
522
{
523
    unsigned int i;
524

525
    for (i = 0; i < SUBBLOCK_SIZE; ++i)
526
    {
527
        int32_t v = src[i] * scale;
528
        dst[i] = clamp_s16(v);
529
    }
530
}
531

532
static void RShiftSubBlock(int16_t *dst, const int16_t *src, unsigned int shift)
533
{
534
    unsigned int i;
535

536
    for (i = 0; i < SUBBLOCK_SIZE; ++i)
537
    {
538
        dst[i] = src[i] >> shift;
539
    }
540
}
541

542
/***************************************************************************
543
 * Fast 2D IDCT using separable formulation and normalization
544
 * Computations use single precision floats
545
 * Implementation based on Wikipedia :
546
 * http://fr.wikipedia.org/wiki/Transform%C3%A9e_en_cosinus_discr%C3%A8te
547
 **************************************************************************/
548
static void InverseDCT1D(const float * const x, float *dst, unsigned int stride)
549
{
550
    float e[4];
551
    float f[4];
552
    float x26, x1357, x15, x37, x17, x35;
553

554
    x15   = IDCT_K[2] * (x[1] + x[5]);
555
    x37   = IDCT_K[3] * (x[3] + x[7]);
556
    x17   = IDCT_K[8] * (x[1] + x[7]);
557
    x35   = IDCT_K[9] * (x[3] + x[5]);
558
    x1357 = IDCT_C3   * (x[1] + x[3] + x[5] + x[7]);
559
    x26   = IDCT_C6   * (x[2] + x[6]);
560

561
    f[0] = x[0] + x[4];
562
    f[1] = x[0] - x[4];
563
    f[2] = x26  + IDCT_K[0]*x[2];
564
    f[3] = x26  + IDCT_K[1]*x[6];
565

566
    e[0] = x1357 + x15 + IDCT_K[4]*x[1] + x17;
567
    e[1] = x1357 + x37 + IDCT_K[6]*x[3] + x35;
568
    e[2] = x1357 + x15 + IDCT_K[5]*x[5] + x35;
569
    e[3] = x1357 + x37 + IDCT_K[7]*x[7] + x17;
570

571
    *dst = f[0] + f[2] + e[0]; dst += stride;
572
    *dst = f[1] + f[3] + e[1]; dst += stride;
573
    *dst = f[1] - f[3] + e[2]; dst += stride;
574
    *dst = f[0] - f[2] + e[3]; dst += stride;
575
    *dst = f[0] - f[2] - e[3]; dst += stride;
576
    *dst = f[1] - f[3] - e[2]; dst += stride;
577
    *dst = f[1] + f[3] - e[1]; dst += stride;
578
    *dst = f[0] + f[2] - e[0]; dst += stride;
579
}
580

581
static void InverseDCTSubBlock(int16_t *dst, const int16_t *src)
582
{
583
    float x[8];
584
    float block[SUBBLOCK_SIZE];
585
    unsigned int i, j;
586

587
    /* idct 1d on rows (+transposition) */
588
    for (i = 0; i < 8; ++i)
589
    {
590
        for (j = 0; j < 8; ++j)
591
        {
592
            x[j] = (float)src[i*8+j];
593
        }
594

595
        InverseDCT1D(x, &block[i], 8);
596
    }
597

598
    /* idct 1d on columns (thanks to previous transposition) */
599
    for (i = 0; i < 8; ++i)
600
    {
601
        InverseDCT1D(&block[i*8], x, 1);
602

603
        /* C4 = 1 normalization implies a division by 8 */
604
        for (j = 0; j < 8; ++j)
605
        {
606
            dst[i+j*8] = (int16_t)x[j] >> 3;
607
        }
608
    }
609
}
610

611
static void RescaleYSubBlock(int16_t *dst, const int16_t *src)
612
{
613
    unsigned int i;
614

615
    for (i = 0; i < SUBBLOCK_SIZE; ++i)
616
    {
617
        dst[i] = (((uint32_t)(clamp_s12(src[i]) + 0x800) * 0xdb0) >> 16) + 0x10;
618
    }
619
}
620

621
static void RescaleUVSubBlock(int16_t *dst, const int16_t *src)
622
{
623
    unsigned int i;
624

625
    for (i = 0; i < SUBBLOCK_SIZE; ++i)
626
    {
627
        dst[i] = (((int)clamp_s12(src[i]) * 0xe00) >> 16) + 0x80;
628
    }
629
}
630

631

632

633
/* FIXME: assume presence of expansion pack */
634
#define MEMMASK 0x7fffff
635

636
static void rdram_read_many_u16(uint16_t *dst, uint32_t address, unsigned int count)
637
{
638
    while (count != 0)
639
    {
640
        uint16_t s = rsp.RDRAM[((address++)^S8) & MEMMASK];
641
        s <<= 8;
642
        s |= rsp.RDRAM[((address++)^S8) & MEMMASK];
643

644
        *(dst++) = s;
645

646
        --count;
647
    }
648
}
649

650
static void rdram_write_many_u16(const uint16_t *src, uint32_t address, unsigned int count)
651
{
652
    while (count != 0)
653
    {
654
        rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
655
        rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
656

657
        --count;
658
    }
659
}
660

661
static uint32_t rdram_read_u32(uint32_t address)
662
{
663
    uint32_t r = rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
664
    r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
665
    r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK]; r <<= 8;
666
    r |= rsp.RDRAM[((address++) ^ S8) & MEMMASK];
667

668
    return r;
669
}
670

671
static void rdram_write_many_u32(const uint32_t *src, uint32_t address, unsigned int count)
672
{
673
    while (count != 0)
674
    {
675
        rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 24);
676
        rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 16);
677
        rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*src >> 8);
678
        rsp.RDRAM[((address++)^S8) & MEMMASK] = (uint8_t)(*(src++) & 0xff);
679

680
        --count;
681
    }
682
}
683

684

685
Product

Resources

Company