Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/jpeg/jidctint.c
8725 views
1
/*
2
* jidctint.c
3
*
4
* Copyright (C) 1991-1998, Thomas G. Lane.
5
* Modification developed 2002-2026 by Guido Vollbeding.
6
* This file is part of the Independent JPEG Group's software.
7
* For conditions of distribution and use, see the accompanying README file.
8
*
9
* This file contains a slow-but-accurate integer implementation of the
10
* inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11
* must also perform dequantization of the input coefficients.
12
*
13
* A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14
* on each row (or vice versa, but it's more convenient to emit a row at
15
* a time). Direct algorithms are also available, but they are much more
16
* complex and seem not to be any faster when reduced to code.
17
*
18
* This implementation is based on an algorithm described in
19
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22
* The primary algorithm described there uses 11 multiplies and 29 adds.
23
* We use their alternate method with 12 multiplies and 32 adds.
24
* The advantage of this method is that no data path contains more than one
25
* multiplication; this allows a very simple and accurate implementation in
26
* scaled fixed-point arithmetic, with a minimal number of shifts.
27
*
28
* We also provide IDCT routines with various output sample block sizes for
29
* direct resolution reduction or enlargement and for direct resolving the
30
* common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31
* (N=1...16), 2NxN, and Nx2N (N=1...8) samples for one 8x8 input DCT block.
32
*
33
* For N<8 we simply take the corresponding low-frequency coefficients of
34
* the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35
* to yield the downscaled outputs.
36
* This can be seen as direct low-pass downsampling from the DCT domain
37
* point of view rather than the usual spatial domain point of view,
38
* yielding significant computational savings and results at least
39
* as good as common bilinear (averaging) spatial downsampling.
40
*
41
* For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42
* lower frequencies and higher frequencies assumed to be zero.
43
* It turns out that the computational effort is similar to the 8x8 IDCT
44
* regarding the output size.
45
* Furthermore, the scaling and descaling is the same for all IDCT sizes.
46
*
47
* CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48
* since there would be too many additional constants to pre-calculate.
49
*/
50
51
#define JPEG_INTERNALS
52
#include "jinclude.h"
53
#include "jpeglib.h"
54
#include "jdct.h" /* Private declarations for DCT subsystem */
55
56
#ifdef DCT_ISLOW_SUPPORTED
57
58
59
/*
60
* This module is specialized to the case DCTSIZE = 8.
61
*/
62
63
#if DCTSIZE != 8
64
Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65
#endif
66
67
68
/*
69
* The poop on this scaling stuff is as follows:
70
*
71
* Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72
* larger than the true IDCT outputs. The final outputs are therefore
73
* a factor of N larger than desired; since N=8 this can be cured by
74
* a simple right shift at the end of the algorithm. The advantage of
75
* this arrangement is that we save two multiplications per 1-D IDCT,
76
* because the y0 and y4 inputs need not be divided by sqrt(N).
77
*
78
* We have to do addition and subtraction of the integer inputs, which
79
* is no problem, and multiplication by fractional constants, which is
80
* a problem to do in integer arithmetic. We multiply all the constants
81
* by CONST_SCALE and convert them to integer constants (thus retaining
82
* CONST_BITS bits of precision in the constants). After doing a
83
* multiplication we have to divide the product by CONST_SCALE, with
84
* proper rounding, to produce the correct output. This division can
85
* be done cheaply as a right shift of CONST_BITS bits. We postpone
86
* shifting as long as possible so that partial sums can be added
87
* together with full fractional precision.
88
*
89
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
90
* they are represented to better-than-integral precision. These outputs
91
* require JPEG_DATA_PRECISION + PASS1_BITS + 3 bits; this fits in a
92
* 16-bit word with the recommended scaling. (To scale up higher bit
93
* depths further, an intermediate INT32 array would be needed.)
94
*
95
* To avoid overflow of the 32-bit intermediate results in pass 2, we
96
* must have JPEG_DATA_PRECISION + CONST_BITS + PASS1_BITS <= 26. Error
97
* analysis shows that the values given below are the most effective.
98
*/
99
100
#if JPEG_DATA_PRECISION <= 10 && BITS_IN_JSAMPLE <= 13
101
#define CONST_BITS 13
102
#define PASS1_BITS (10 - JPEG_DATA_PRECISION)
103
#define PASS2_BITS (13 - BITS_IN_JSAMPLE)
104
#else
105
#if JPEG_DATA_PRECISION <= 13 && BITS_IN_JSAMPLE <= 16
106
#define CONST_BITS 13
107
#define PASS1_BITS (13 - JPEG_DATA_PRECISION)
108
#define PASS2_BITS (16 - BITS_IN_JSAMPLE)
109
#endif
110
#endif
111
112
/* Some C compilers fail to reduce "FIX(constant)" at compile time,
113
* thus causing a lot of useless floating-point operations at run time.
114
* To get around this we use the following pre-calculated constants.
115
* If you change CONST_BITS you may want to add appropriate values.
116
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
117
*/
118
119
#if CONST_BITS == 13
120
#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
121
#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
122
#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
123
#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
124
#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
125
#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
126
#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
127
#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
128
#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
129
#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
130
#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
131
#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
132
#else
133
#define FIX_0_298631336 FIX(0.298631336)
134
#define FIX_0_390180644 FIX(0.390180644)
135
#define FIX_0_541196100 FIX(0.541196100)
136
#define FIX_0_765366865 FIX(0.765366865)
137
#define FIX_0_899976223 FIX(0.899976223)
138
#define FIX_1_175875602 FIX(1.175875602)
139
#define FIX_1_501321110 FIX(1.501321110)
140
#define FIX_1_847759065 FIX(1.847759065)
141
#define FIX_1_961570560 FIX(1.961570560)
142
#define FIX_2_053119869 FIX(2.053119869)
143
#define FIX_2_562915447 FIX(2.562915447)
144
#define FIX_3_072711026 FIX(3.072711026)
145
#endif
146
147
148
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
149
* For up to 10-bit data with the recommended scaling, all the variable
150
* and constant values involved are no more than 16 bits wide, so a
151
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
152
* For higher bit depths, a full 32-bit multiplication will be needed.
153
*/
154
155
#if JPEG_DATA_PRECISION <= 10 && BITS_IN_JSAMPLE <= 13
156
#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
157
#else
158
#define MULTIPLY(var,const) ((var) * (const))
159
#endif
160
161
162
/* Dequantize a coefficient by multiplying it by the multiplier-table
163
* entry; produce an int result. In this module, both inputs and result
164
* are 16 bits or less, so either int or short multiply will work.
165
*/
166
167
#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
168
169
170
/* Pass 2 range center and fudge factor for final descale and range-limit. */
171
172
#if PASS2_BITS > 1
173
#define PASS2_OFFSET \
174
((((INT32) RANGE_CENTER) << PASS2_BITS) + (ONE << (PASS2_BITS-1)))
175
#else
176
#if PASS2_BITS > 0
177
#define PASS2_OFFSET ((((INT32) RANGE_CENTER) << 1) + ONE)
178
#else
179
#define PASS2_OFFSET (INT32) RANGE_CENTER
180
#endif
181
#endif
182
183
184
/*
185
* Perform dequantization and inverse DCT on one block of coefficients.
186
*
187
* Optimized algorithm with 12 multiplications in the 1-D kernel.
188
* cK represents sqrt(2) * cos(K*pi/16).
189
*/
190
191
GLOBAL(void)
192
jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
193
JCOEFPTR coef_block,
194
JSAMPARRAY output_buf, JDIMENSION output_col)
195
{
196
INT32 tmp0, tmp1, tmp2, tmp3;
197
INT32 tmp10, tmp11, tmp12, tmp13;
198
INT32 z1, z2, z3;
199
JCOEFPTR inptr;
200
ISLOW_MULT_TYPE * quantptr;
201
int * wsptr;
202
JSAMPROW outptr;
203
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
204
int ctr;
205
int workspace[DCTSIZE2]; /* buffers data between passes */
206
SHIFT_TEMPS
207
208
/* Pass 1: process columns from input, store into work array.
209
* Note results are scaled up by sqrt(8) compared to a true IDCT;
210
* furthermore, we scale the results by 2**PASS1_BITS.
211
*/
212
213
inptr = coef_block;
214
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
215
wsptr = workspace;
216
for (ctr = DCTSIZE; ctr > 0; ctr--) {
217
/* Due to quantization, we will usually find that many of the input
218
* coefficients are zero, especially the AC terms. We can exploit this
219
* by short-circuiting the IDCT calculation for any column in which all
220
* the AC terms are zero. In that case each output is equal to the
221
* DC coefficient (with scale factor as needed).
222
* With typical images and quantization tables, half or more of the
223
* column DCT calculations can be simplified this way.
224
*/
225
226
if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
227
inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
228
inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
229
inptr[DCTSIZE*7] == 0) {
230
/* AC terms all zero */
231
#if PASS1_BITS > 0
232
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
233
#else
234
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
235
#endif
236
237
wsptr[DCTSIZE*0] = dcval;
238
wsptr[DCTSIZE*1] = dcval;
239
wsptr[DCTSIZE*2] = dcval;
240
wsptr[DCTSIZE*3] = dcval;
241
wsptr[DCTSIZE*4] = dcval;
242
wsptr[DCTSIZE*5] = dcval;
243
wsptr[DCTSIZE*6] = dcval;
244
wsptr[DCTSIZE*7] = dcval;
245
246
inptr++; /* advance pointers to next column */
247
quantptr++;
248
wsptr++;
249
continue;
250
}
251
252
/* Even part: reverse the even part of the forward DCT.
253
* The rotator is c(-6).
254
*/
255
256
z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
257
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
258
z2 <<= CONST_BITS;
259
z3 <<= CONST_BITS;
260
/* Add fudge factor here for final descale. */
261
z2 += ONE << (CONST_BITS-PASS1_BITS-1);
262
263
tmp0 = z2 + z3;
264
tmp1 = z2 - z3;
265
266
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
267
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
268
269
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
270
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
271
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
272
273
tmp10 = tmp0 + tmp2;
274
tmp13 = tmp0 - tmp2;
275
tmp11 = tmp1 + tmp3;
276
tmp12 = tmp1 - tmp3;
277
278
/* Odd part per figure 8; the matrix is unitary and hence its
279
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
280
*/
281
282
tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
283
tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
284
tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
285
tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
286
287
z2 = tmp0 + tmp2;
288
z3 = tmp1 + tmp3;
289
290
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
291
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
292
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
293
z2 += z1;
294
z3 += z1;
295
296
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
297
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
298
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
299
tmp0 += z1 + z2;
300
tmp3 += z1 + z3;
301
302
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
303
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
304
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
305
tmp1 += z1 + z3;
306
tmp2 += z1 + z2;
307
308
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
309
310
wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
311
wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
312
wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
313
wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
314
wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
315
wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
316
wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
317
wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
318
319
inptr++; /* advance pointers to next column */
320
quantptr++;
321
wsptr++;
322
}
323
324
/* Pass 2: process rows from work array, store into output array.
325
* Note that we must descale the results by a factor of 8 == 2**3,
326
* which is folded into the PASS2_BITS value.
327
*/
328
329
wsptr = workspace;
330
for (ctr = 0; ctr < DCTSIZE; ctr++) {
331
outptr = output_buf[ctr] + output_col;
332
333
/* Add range center and fudge factor for final descale and range-limit. */
334
z2 = (INT32) wsptr[0] + PASS2_OFFSET;
335
336
/* Rows of zeroes can be exploited in the same way as we did with columns.
337
* However, the column calculation has created many nonzero AC terms, so
338
* the simplification applies less often (typically 5% to 10% of the time).
339
* On machines with very fast multiplication, it's possible that the
340
* test takes more time than it's worth. In that case this section
341
* may be commented out.
342
*/
343
344
#ifndef NO_ZERO_ROW_TEST
345
if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
346
wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
347
/* AC terms all zero */
348
#if PASS2_BITS > 0
349
JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS2_BITS)
350
& RANGE_MASK];
351
#else
352
JSAMPLE dcval = range_limit[(int) z2 & RANGE_MASK];
353
#endif
354
355
outptr[0] = dcval;
356
outptr[1] = dcval;
357
outptr[2] = dcval;
358
outptr[3] = dcval;
359
outptr[4] = dcval;
360
outptr[5] = dcval;
361
outptr[6] = dcval;
362
outptr[7] = dcval;
363
364
wsptr += DCTSIZE; /* advance pointer to next row */
365
continue;
366
}
367
#endif
368
369
/* Even part: reverse the even part of the forward DCT.
370
* The rotator is c(-6).
371
*/
372
373
z3 = (INT32) wsptr[4];
374
z2 <<= CONST_BITS;
375
z3 <<= CONST_BITS;
376
#if PASS2_BITS == 0
377
/* Add fudge factor here for final descale. */
378
z2 += ONE << (CONST_BITS-1);
379
#endif
380
381
tmp0 = z2 + z3;
382
tmp1 = z2 - z3;
383
384
z2 = (INT32) wsptr[2];
385
z3 = (INT32) wsptr[6];
386
387
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
388
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
389
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
390
391
tmp10 = tmp0 + tmp2;
392
tmp13 = tmp0 - tmp2;
393
tmp11 = tmp1 + tmp3;
394
tmp12 = tmp1 - tmp3;
395
396
/* Odd part per figure 8; the matrix is unitary and hence its
397
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
398
*/
399
400
tmp0 = (INT32) wsptr[7];
401
tmp1 = (INT32) wsptr[5];
402
tmp2 = (INT32) wsptr[3];
403
tmp3 = (INT32) wsptr[1];
404
405
z2 = tmp0 + tmp2;
406
z3 = tmp1 + tmp3;
407
408
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
409
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
410
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
411
z2 += z1;
412
z3 += z1;
413
414
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
415
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
416
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
417
tmp0 += z1 + z2;
418
tmp3 += z1 + z3;
419
420
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
421
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
422
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
423
tmp1 += z1 + z3;
424
tmp2 += z1 + z2;
425
426
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
427
428
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
429
CONST_BITS+PASS2_BITS)
430
& RANGE_MASK];
431
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
432
CONST_BITS+PASS2_BITS)
433
& RANGE_MASK];
434
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
435
CONST_BITS+PASS2_BITS)
436
& RANGE_MASK];
437
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
438
CONST_BITS+PASS2_BITS)
439
& RANGE_MASK];
440
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
441
CONST_BITS+PASS2_BITS)
442
& RANGE_MASK];
443
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
444
CONST_BITS+PASS2_BITS)
445
& RANGE_MASK];
446
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
447
CONST_BITS+PASS2_BITS)
448
& RANGE_MASK];
449
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
450
CONST_BITS+PASS2_BITS)
451
& RANGE_MASK];
452
453
wsptr += DCTSIZE; /* advance pointer to next row */
454
}
455
}
456
457
#ifdef IDCT_SCALING_SUPPORTED
458
459
460
/*
461
* Perform dequantization and inverse DCT on one block of coefficients,
462
* producing a reduced-size 7x7 output block.
463
*
464
* Optimized algorithm with 12 multiplications in the 1-D kernel.
465
* cK represents sqrt(2) * cos(K*pi/14).
466
*/
467
468
GLOBAL(void)
469
jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
470
JCOEFPTR coef_block,
471
JSAMPARRAY output_buf, JDIMENSION output_col)
472
{
473
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
474
INT32 z1, z2, z3;
475
JCOEFPTR inptr;
476
ISLOW_MULT_TYPE * quantptr;
477
int * wsptr;
478
JSAMPROW outptr;
479
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
480
int ctr;
481
int workspace[7*7]; /* buffers data between passes */
482
SHIFT_TEMPS
483
484
/* Pass 1: process columns from input, store into work array. */
485
486
inptr = coef_block;
487
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
488
wsptr = workspace;
489
for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
490
/* Even part */
491
492
tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
493
tmp13 <<= CONST_BITS;
494
/* Add fudge factor here for final descale. */
495
tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
496
497
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
498
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
499
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
500
501
tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
502
tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
503
tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
504
tmp0 = z1 + z3;
505
z2 -= tmp0;
506
tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
507
tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
508
tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
509
tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
510
511
/* Odd part */
512
513
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
514
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
515
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
516
517
tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
518
tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
519
tmp0 = tmp1 - tmp2;
520
tmp1 += tmp2;
521
tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
522
tmp1 += tmp2;
523
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
524
tmp0 += z2;
525
tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
526
527
/* Final output stage */
528
529
wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
530
wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
531
wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
532
wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
533
wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
534
wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
535
wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
536
}
537
538
/* Pass 2: process 7 rows from work array, store into output array. */
539
540
wsptr = workspace;
541
for (ctr = 0; ctr < 7; ctr++) {
542
outptr = output_buf[ctr] + output_col;
543
544
/* Even part */
545
546
/* Add range center and fudge factor for final descale and range-limit. */
547
tmp13 = (INT32) wsptr[0] + PASS2_OFFSET;
548
tmp13 <<= CONST_BITS;
549
#if PASS2_BITS == 0
550
tmp13 += ONE << (CONST_BITS-1);
551
#endif
552
553
z1 = (INT32) wsptr[2];
554
z2 = (INT32) wsptr[4];
555
z3 = (INT32) wsptr[6];
556
557
tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
558
tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
559
tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
560
tmp0 = z1 + z3;
561
z2 -= tmp0;
562
tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
563
tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
564
tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
565
tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
566
567
/* Odd part */
568
569
z1 = (INT32) wsptr[1];
570
z2 = (INT32) wsptr[3];
571
z3 = (INT32) wsptr[5];
572
573
tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
574
tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
575
tmp0 = tmp1 - tmp2;
576
tmp1 += tmp2;
577
tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
578
tmp1 += tmp2;
579
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
580
tmp0 += z2;
581
tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
582
583
/* Final output stage */
584
585
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
586
CONST_BITS+PASS2_BITS)
587
& RANGE_MASK];
588
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
589
CONST_BITS+PASS2_BITS)
590
& RANGE_MASK];
591
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
592
CONST_BITS+PASS2_BITS)
593
& RANGE_MASK];
594
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
595
CONST_BITS+PASS2_BITS)
596
& RANGE_MASK];
597
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
598
CONST_BITS+PASS2_BITS)
599
& RANGE_MASK];
600
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
601
CONST_BITS+PASS2_BITS)
602
& RANGE_MASK];
603
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
604
CONST_BITS+PASS2_BITS)
605
& RANGE_MASK];
606
607
wsptr += 7; /* advance pointer to next row */
608
}
609
}
610
611
612
/*
613
* Perform dequantization and inverse DCT on one block of coefficients,
614
* producing a reduced-size 6x6 output block.
615
*
616
* Optimized algorithm with 3 multiplications in the 1-D kernel.
617
* cK represents sqrt(2) * cos(K*pi/12).
618
*/
619
620
GLOBAL(void)
621
jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
622
JCOEFPTR coef_block,
623
JSAMPARRAY output_buf, JDIMENSION output_col)
624
{
625
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
626
INT32 z1, z2, z3;
627
JCOEFPTR inptr;
628
ISLOW_MULT_TYPE * quantptr;
629
int * wsptr;
630
JSAMPROW outptr;
631
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
632
int ctr;
633
int workspace[6*6]; /* buffers data between passes */
634
SHIFT_TEMPS
635
636
/* Pass 1: process columns from input, store into work array. */
637
638
inptr = coef_block;
639
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
640
wsptr = workspace;
641
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
642
/* Even part */
643
644
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
645
tmp0 <<= CONST_BITS;
646
/* Add fudge factor here for final descale. */
647
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
648
tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
649
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
650
tmp1 = tmp0 + tmp10;
651
tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
652
tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
653
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
654
tmp10 = tmp1 + tmp0;
655
tmp12 = tmp1 - tmp0;
656
657
/* Odd part */
658
659
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
660
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
661
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
662
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
663
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
664
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
665
#if PASS1_BITS > 0
666
tmp1 = (z1 - z2 - z3) << PASS1_BITS;
667
#else
668
tmp1 = z1 - z2 - z3;
669
#endif
670
671
/* Final output stage */
672
673
wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
674
wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
675
wsptr[6*1] = (int) (tmp11 + tmp1);
676
wsptr[6*4] = (int) (tmp11 - tmp1);
677
wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
678
wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
679
}
680
681
/* Pass 2: process 6 rows from work array, store into output array. */
682
683
wsptr = workspace;
684
for (ctr = 0; ctr < 6; ctr++) {
685
outptr = output_buf[ctr] + output_col;
686
687
/* Even part */
688
689
/* Add range center and fudge factor for final descale and range-limit. */
690
tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
691
tmp0 <<= CONST_BITS;
692
#if PASS2_BITS == 0
693
tmp0 += ONE << (CONST_BITS-1);
694
#endif
695
tmp2 = (INT32) wsptr[4];
696
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
697
tmp1 = tmp0 + tmp10;
698
tmp11 = tmp0 - tmp10 - tmp10;
699
tmp10 = (INT32) wsptr[2];
700
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
701
tmp10 = tmp1 + tmp0;
702
tmp12 = tmp1 - tmp0;
703
704
/* Odd part */
705
706
z1 = (INT32) wsptr[1];
707
z2 = (INT32) wsptr[3];
708
z3 = (INT32) wsptr[5];
709
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
710
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
711
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
712
tmp1 = (z1 - z2 - z3) << CONST_BITS;
713
714
/* Final output stage */
715
716
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
717
CONST_BITS+PASS2_BITS)
718
& RANGE_MASK];
719
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
720
CONST_BITS+PASS2_BITS)
721
& RANGE_MASK];
722
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
723
CONST_BITS+PASS2_BITS)
724
& RANGE_MASK];
725
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
726
CONST_BITS+PASS2_BITS)
727
& RANGE_MASK];
728
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
729
CONST_BITS+PASS2_BITS)
730
& RANGE_MASK];
731
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
732
CONST_BITS+PASS2_BITS)
733
& RANGE_MASK];
734
735
wsptr += 6; /* advance pointer to next row */
736
}
737
}
738
739
740
/*
741
* Perform dequantization and inverse DCT on one block of coefficients,
742
* producing a reduced-size 5x5 output block.
743
*
744
* Optimized algorithm with 5 multiplications in the 1-D kernel.
745
* cK represents sqrt(2) * cos(K*pi/10).
746
*/
747
748
GLOBAL(void)
749
jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
750
JCOEFPTR coef_block,
751
JSAMPARRAY output_buf, JDIMENSION output_col)
752
{
753
INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
754
INT32 z1, z2, z3;
755
JCOEFPTR inptr;
756
ISLOW_MULT_TYPE * quantptr;
757
int * wsptr;
758
JSAMPROW outptr;
759
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
760
int ctr;
761
int workspace[5*5]; /* buffers data between passes */
762
SHIFT_TEMPS
763
764
/* Pass 1: process columns from input, store into work array. */
765
766
inptr = coef_block;
767
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
768
wsptr = workspace;
769
for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
770
/* Even part */
771
772
tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
773
tmp12 <<= CONST_BITS;
774
/* Add fudge factor here for final descale. */
775
tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
776
tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
777
tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
778
z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
779
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
780
z3 = tmp12 + z2;
781
tmp10 = z3 + z1;
782
tmp11 = z3 - z1;
783
tmp12 -= z2 << 2;
784
785
/* Odd part */
786
787
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
788
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
789
790
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
791
tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
792
tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
793
794
/* Final output stage */
795
796
wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
797
wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
798
wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
799
wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
800
wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
801
}
802
803
/* Pass 2: process 5 rows from work array, store into output array. */
804
805
wsptr = workspace;
806
for (ctr = 0; ctr < 5; ctr++) {
807
outptr = output_buf[ctr] + output_col;
808
809
/* Even part */
810
811
/* Add range center and fudge factor for final descale and range-limit. */
812
tmp12 = (INT32) wsptr[0] + PASS2_OFFSET;
813
tmp12 <<= CONST_BITS;
814
#if PASS2_BITS == 0
815
tmp12 += ONE << (CONST_BITS-1);
816
#endif
817
tmp0 = (INT32) wsptr[2];
818
tmp1 = (INT32) wsptr[4];
819
z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
820
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
821
z3 = tmp12 + z2;
822
tmp10 = z3 + z1;
823
tmp11 = z3 - z1;
824
tmp12 -= z2 << 2;
825
826
/* Odd part */
827
828
z2 = (INT32) wsptr[1];
829
z3 = (INT32) wsptr[3];
830
831
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
832
tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
833
tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
834
835
/* Final output stage */
836
837
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
838
CONST_BITS+PASS2_BITS)
839
& RANGE_MASK];
840
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
841
CONST_BITS+PASS2_BITS)
842
& RANGE_MASK];
843
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
844
CONST_BITS+PASS2_BITS)
845
& RANGE_MASK];
846
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
847
CONST_BITS+PASS2_BITS)
848
& RANGE_MASK];
849
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
850
CONST_BITS+PASS2_BITS)
851
& RANGE_MASK];
852
853
wsptr += 5; /* advance pointer to next row */
854
}
855
}
856
857
858
/*
859
* Perform dequantization and inverse DCT on one block of coefficients,
860
* producing a reduced-size 4x4 output block.
861
*
862
* Optimized algorithm with 3 multiplications in the 1-D kernel.
863
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
864
*/
865
866
GLOBAL(void)
867
jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
868
JCOEFPTR coef_block,
869
JSAMPARRAY output_buf, JDIMENSION output_col)
870
{
871
INT32 tmp0, tmp2, tmp10, tmp12;
872
INT32 z1, z2, z3;
873
JCOEFPTR inptr;
874
ISLOW_MULT_TYPE * quantptr;
875
int * wsptr;
876
JSAMPROW outptr;
877
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
878
int ctr;
879
int workspace[4*4]; /* buffers data between passes */
880
SHIFT_TEMPS
881
882
/* Pass 1: process columns from input, store into work array. */
883
884
inptr = coef_block;
885
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
886
wsptr = workspace;
887
for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
888
/* Even part */
889
890
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
891
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
892
893
#if PASS1_BITS > 0
894
tmp10 = (tmp0 + tmp2) << PASS1_BITS;
895
tmp12 = (tmp0 - tmp2) << PASS1_BITS;
896
#else
897
tmp10 = tmp0 + tmp2;
898
tmp12 = tmp0 - tmp2;
899
#endif
900
901
/* Odd part */
902
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
903
904
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
905
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
906
907
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
908
/* Add fudge factor here for final descale. */
909
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
910
tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
911
CONST_BITS-PASS1_BITS);
912
tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
913
CONST_BITS-PASS1_BITS);
914
915
/* Final output stage */
916
917
wsptr[4*0] = (int) (tmp10 + tmp0);
918
wsptr[4*3] = (int) (tmp10 - tmp0);
919
wsptr[4*1] = (int) (tmp12 + tmp2);
920
wsptr[4*2] = (int) (tmp12 - tmp2);
921
}
922
923
/* Pass 2: process 4 rows from work array, store into output array. */
924
925
wsptr = workspace;
926
for (ctr = 0; ctr < 4; ctr++) {
927
outptr = output_buf[ctr] + output_col;
928
929
/* Even part */
930
931
/* Add range center and fudge factor for final descale and range-limit. */
932
tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
933
tmp2 = (INT32) wsptr[2];
934
tmp0 <<= CONST_BITS;
935
tmp2 <<= CONST_BITS;
936
#if PASS2_BITS == 0
937
tmp0 += ONE << (CONST_BITS-1);
938
#endif
939
940
tmp10 = tmp0 + tmp2;
941
tmp12 = tmp0 - tmp2;
942
943
/* Odd part */
944
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
945
946
z2 = (INT32) wsptr[1];
947
z3 = (INT32) wsptr[3];
948
949
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
950
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
951
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
952
953
/* Final output stage */
954
955
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
956
CONST_BITS+PASS2_BITS)
957
& RANGE_MASK];
958
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
959
CONST_BITS+PASS2_BITS)
960
& RANGE_MASK];
961
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
962
CONST_BITS+PASS2_BITS)
963
& RANGE_MASK];
964
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
965
CONST_BITS+PASS2_BITS)
966
& RANGE_MASK];
967
968
wsptr += 4; /* advance pointer to next row */
969
}
970
}
971
972
973
/*
974
* Perform dequantization and inverse DCT on one block of coefficients,
975
* producing a reduced-size 3x3 output block.
976
*
977
* Optimized algorithm with 2 multiplications in the 1-D kernel.
978
* cK represents sqrt(2) * cos(K*pi/6).
979
*/
980
981
GLOBAL(void)
982
jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
983
JCOEFPTR coef_block,
984
JSAMPARRAY output_buf, JDIMENSION output_col)
985
{
986
INT32 tmp0, tmp2, tmp10, tmp12;
987
JCOEFPTR inptr;
988
ISLOW_MULT_TYPE * quantptr;
989
int * wsptr;
990
JSAMPROW outptr;
991
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
992
int ctr;
993
int workspace[3*3]; /* buffers data between passes */
994
SHIFT_TEMPS
995
996
/* Pass 1: process columns from input, store into work array. */
997
998
inptr = coef_block;
999
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1000
wsptr = workspace;
1001
for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
1002
/* Even part */
1003
1004
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1005
tmp0 <<= CONST_BITS;
1006
/* Add fudge factor here for final descale. */
1007
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1008
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1009
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
1010
tmp10 = tmp0 + tmp12;
1011
tmp2 = tmp0 - tmp12 - tmp12;
1012
1013
/* Odd part */
1014
1015
tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1016
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1017
1018
/* Final output stage */
1019
1020
wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1021
wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1022
wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
1023
}
1024
1025
/* Pass 2: process 3 rows from work array, store into output array. */
1026
1027
wsptr = workspace;
1028
for (ctr = 0; ctr < 3; ctr++) {
1029
outptr = output_buf[ctr] + output_col;
1030
1031
/* Even part */
1032
1033
/* Add range center and fudge factor for final descale and range-limit. */
1034
tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
1035
tmp0 <<= CONST_BITS;
1036
#if PASS2_BITS == 0
1037
tmp0 += ONE << (CONST_BITS-1);
1038
#endif
1039
tmp2 = (INT32) wsptr[2];
1040
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
1041
tmp10 = tmp0 + tmp12;
1042
tmp2 = tmp0 - tmp12 - tmp12;
1043
1044
/* Odd part */
1045
1046
tmp12 = (INT32) wsptr[1];
1047
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1048
1049
/* Final output stage */
1050
1051
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1052
CONST_BITS+PASS2_BITS)
1053
& RANGE_MASK];
1054
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1055
CONST_BITS+PASS2_BITS)
1056
& RANGE_MASK];
1057
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1058
CONST_BITS+PASS2_BITS)
1059
& RANGE_MASK];
1060
1061
wsptr += 3; /* advance pointer to next row */
1062
}
1063
}
1064
1065
1066
/*
1067
* Perform dequantization and inverse DCT on one block of coefficients,
1068
* producing a reduced-size 2x2 output block.
1069
*
1070
* Multiplication-less algorithm.
1071
*/
1072
1073
GLOBAL(void)
1074
jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1075
JCOEFPTR coef_block,
1076
JSAMPARRAY output_buf, JDIMENSION output_col)
1077
{
1078
DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1079
ISLOW_MULT_TYPE * quantptr;
1080
JSAMPROW outptr;
1081
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1082
ISHIFT_TEMPS
1083
1084
/* Pass 1: process columns from input. */
1085
1086
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1087
1088
/* Column 0 */
1089
tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1090
tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1091
1092
#if PASS2_BITS > PASS1_BITS
1093
/* Add range center and fudge factor for final downscale and range-limit. */
1094
#if PASS2_BITS > PASS1_BITS + 1
1095
tmp4 += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
1096
(1 << (PASS2_BITS-PASS1_BITS-1));
1097
#else
1098
tmp4 += (((DCTELEM) RANGE_CENTER) << 1) + 1;
1099
#endif
1100
1101
tmp0 = tmp4 + tmp5;
1102
tmp2 = tmp4 - tmp5;
1103
1104
/* Column 1 */
1105
tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1106
tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1107
1108
tmp1 = tmp4 + tmp5;
1109
tmp3 = tmp4 - tmp5;
1110
1111
/* Pass 2: process 2 rows, store into output array. */
1112
1113
/* Row 0 */
1114
outptr = output_buf[0] + output_col;
1115
1116
outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1,
1117
PASS2_BITS-PASS1_BITS)
1118
& RANGE_MASK];
1119
outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1,
1120
PASS2_BITS-PASS1_BITS)
1121
& RANGE_MASK];
1122
1123
/* Row 1 */
1124
outptr = output_buf[1] + output_col;
1125
1126
outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3,
1127
PASS2_BITS-PASS1_BITS)
1128
& RANGE_MASK];
1129
outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3,
1130
PASS2_BITS-PASS1_BITS)
1131
& RANGE_MASK];
1132
#else
1133
#if PASS2_BITS == PASS1_BITS
1134
tmp4 += (DCTELEM) RANGE_CENTER; /* add range center for final range-limit */
1135
1136
tmp0 = tmp4 + tmp5;
1137
tmp2 = tmp4 - tmp5;
1138
1139
/* Column 1 */
1140
tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1141
tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1142
#else
1143
tmp4 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1144
tmp5 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1145
1146
tmp4 += (DCTELEM) RANGE_CENTER; /* add range center for final range-limit */
1147
1148
tmp0 = tmp4 + tmp5;
1149
tmp2 = tmp4 - tmp5;
1150
1151
/* Column 1 */
1152
tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1153
tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1154
1155
tmp4 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1156
tmp5 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1157
#endif
1158
1159
tmp1 = tmp4 + tmp5;
1160
tmp3 = tmp4 - tmp5;
1161
1162
/* Pass 2: process 2 rows, store into output array. */
1163
1164
/* Row 0 */
1165
outptr = output_buf[0] + output_col;
1166
1167
outptr[0] = range_limit[(int) (tmp0 + tmp1) & RANGE_MASK];
1168
outptr[1] = range_limit[(int) (tmp0 - tmp1) & RANGE_MASK];
1169
1170
/* Row 1 */
1171
outptr = output_buf[1] + output_col;
1172
1173
outptr[0] = range_limit[(int) (tmp2 + tmp3) & RANGE_MASK];
1174
outptr[1] = range_limit[(int) (tmp2 - tmp3) & RANGE_MASK];
1175
#endif
1176
}
1177
1178
1179
/*
1180
* Perform dequantization and inverse DCT on one block of coefficients,
1181
* producing a reduced-size 1x1 output block.
1182
*
1183
* This is just a rescale of the DC coefficient.
1184
*/
1185
1186
GLOBAL(void)
1187
jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1188
JCOEFPTR coef_block,
1189
JSAMPARRAY output_buf, JDIMENSION output_col)
1190
{
1191
DCTELEM dcval;
1192
ISLOW_MULT_TYPE * quantptr;
1193
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1194
ISHIFT_TEMPS
1195
1196
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1197
1198
dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1199
1200
#if PASS2_BITS > PASS1_BITS
1201
/* Add range center and fudge factor for downscale and range-limit. */
1202
#if PASS2_BITS > PASS1_BITS + 1
1203
dcval += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
1204
(1 << (PASS2_BITS-PASS1_BITS-1));
1205
#else
1206
dcval += (((DCTELEM) RANGE_CENTER) << 1) + 1;
1207
#endif
1208
1209
output_buf[0][output_col] =
1210
range_limit[(int) IRIGHT_SHIFT(dcval, PASS2_BITS-PASS1_BITS) & RANGE_MASK];
1211
#else
1212
#if PASS2_BITS < PASS1_BITS
1213
dcval <<= (PASS1_BITS-PASS2_BITS); /* upscale */
1214
#endif
1215
1216
output_buf[0][output_col] =
1217
range_limit[((int) dcval + RANGE_CENTER) & RANGE_MASK];
1218
#endif
1219
}
1220
1221
1222
/*
1223
* Perform dequantization and inverse DCT on one block of coefficients,
1224
* producing a 9x9 output block.
1225
*
1226
* Optimized algorithm with 10 multiplications in the 1-D kernel.
1227
* cK represents sqrt(2) * cos(K*pi/18).
1228
*/
1229
1230
GLOBAL(void)
1231
jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1232
JCOEFPTR coef_block,
1233
JSAMPARRAY output_buf, JDIMENSION output_col)
1234
{
1235
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1236
INT32 z1, z2, z3, z4;
1237
JCOEFPTR inptr;
1238
ISLOW_MULT_TYPE * quantptr;
1239
int * wsptr;
1240
JSAMPROW outptr;
1241
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1242
int ctr;
1243
int workspace[8*9]; /* buffers data between passes */
1244
SHIFT_TEMPS
1245
1246
/* Pass 1: process columns from input, store into work array. */
1247
1248
inptr = coef_block;
1249
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1250
wsptr = workspace;
1251
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1252
/* Even part */
1253
1254
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1255
tmp0 <<= CONST_BITS;
1256
/* Add fudge factor here for final descale. */
1257
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1258
1259
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1260
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1261
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1262
1263
tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1264
tmp1 = tmp0 + tmp3;
1265
tmp2 = tmp0 - tmp3 - tmp3;
1266
1267
tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1268
tmp11 = tmp2 + tmp0;
1269
tmp14 = tmp2 - tmp0 - tmp0;
1270
1271
tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1272
tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1273
tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1274
1275
tmp10 = tmp1 + tmp0 - tmp3;
1276
tmp12 = tmp1 - tmp0 + tmp2;
1277
tmp13 = tmp1 - tmp2 + tmp3;
1278
1279
/* Odd part */
1280
1281
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1282
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1283
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1284
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1285
1286
z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1287
1288
tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1289
tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1290
tmp0 = tmp2 + tmp3 - z2;
1291
tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1292
tmp2 += z2 - tmp1;
1293
tmp3 += z2 + tmp1;
1294
tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1295
1296
/* Final output stage */
1297
1298
wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1299
wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1300
wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1301
wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1302
wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1303
wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1304
wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1305
wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1306
wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1307
}
1308
1309
/* Pass 2: process 9 rows from work array, store into output array. */
1310
1311
wsptr = workspace;
1312
for (ctr = 0; ctr < 9; ctr++) {
1313
outptr = output_buf[ctr] + output_col;
1314
1315
/* Even part */
1316
1317
/* Add range center and fudge factor for final descale and range-limit. */
1318
tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
1319
tmp0 <<= CONST_BITS;
1320
#if PASS2_BITS == 0
1321
tmp0 += ONE << (CONST_BITS-1);
1322
#endif
1323
1324
z1 = (INT32) wsptr[2];
1325
z2 = (INT32) wsptr[4];
1326
z3 = (INT32) wsptr[6];
1327
1328
tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1329
tmp1 = tmp0 + tmp3;
1330
tmp2 = tmp0 - tmp3 - tmp3;
1331
1332
tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1333
tmp11 = tmp2 + tmp0;
1334
tmp14 = tmp2 - tmp0 - tmp0;
1335
1336
tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1337
tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1338
tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1339
1340
tmp10 = tmp1 + tmp0 - tmp3;
1341
tmp12 = tmp1 - tmp0 + tmp2;
1342
tmp13 = tmp1 - tmp2 + tmp3;
1343
1344
/* Odd part */
1345
1346
z1 = (INT32) wsptr[1];
1347
z2 = (INT32) wsptr[3];
1348
z3 = (INT32) wsptr[5];
1349
z4 = (INT32) wsptr[7];
1350
1351
z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1352
1353
tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1354
tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1355
tmp0 = tmp2 + tmp3 - z2;
1356
tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1357
tmp2 += z2 - tmp1;
1358
tmp3 += z2 + tmp1;
1359
tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1360
1361
/* Final output stage */
1362
1363
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1364
CONST_BITS+PASS2_BITS)
1365
& RANGE_MASK];
1366
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1367
CONST_BITS+PASS2_BITS)
1368
& RANGE_MASK];
1369
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1370
CONST_BITS+PASS2_BITS)
1371
& RANGE_MASK];
1372
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1373
CONST_BITS+PASS2_BITS)
1374
& RANGE_MASK];
1375
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1376
CONST_BITS+PASS2_BITS)
1377
& RANGE_MASK];
1378
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1379
CONST_BITS+PASS2_BITS)
1380
& RANGE_MASK];
1381
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1382
CONST_BITS+PASS2_BITS)
1383
& RANGE_MASK];
1384
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1385
CONST_BITS+PASS2_BITS)
1386
& RANGE_MASK];
1387
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1388
CONST_BITS+PASS2_BITS)
1389
& RANGE_MASK];
1390
1391
wsptr += 8; /* advance pointer to next row */
1392
}
1393
}
1394
1395
1396
/*
1397
* Perform dequantization and inverse DCT on one block of coefficients,
1398
* producing a 10x10 output block.
1399
*
1400
* Optimized algorithm with 12 multiplications in the 1-D kernel.
1401
* cK represents sqrt(2) * cos(K*pi/20).
1402
*/
1403
1404
GLOBAL(void)
1405
jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1406
JCOEFPTR coef_block,
1407
JSAMPARRAY output_buf, JDIMENSION output_col)
1408
{
1409
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1410
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1411
INT32 z1, z2, z3, z4, z5;
1412
JCOEFPTR inptr;
1413
ISLOW_MULT_TYPE * quantptr;
1414
int * wsptr;
1415
JSAMPROW outptr;
1416
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1417
int ctr;
1418
int workspace[8*10]; /* buffers data between passes */
1419
SHIFT_TEMPS
1420
1421
/* Pass 1: process columns from input, store into work array. */
1422
1423
inptr = coef_block;
1424
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1425
wsptr = workspace;
1426
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1427
/* Even part */
1428
1429
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1430
z3 <<= CONST_BITS;
1431
/* Add fudge factor here for final descale. */
1432
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1433
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1434
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1435
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1436
tmp10 = z3 + z1;
1437
tmp11 = z3 - z2;
1438
1439
tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1440
CONST_BITS-PASS1_BITS);
1441
1442
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1443
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1444
1445
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1446
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1447
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1448
1449
tmp20 = tmp10 + tmp12;
1450
tmp24 = tmp10 - tmp12;
1451
tmp21 = tmp11 + tmp13;
1452
tmp23 = tmp11 - tmp13;
1453
1454
/* Odd part */
1455
1456
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1457
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1458
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1459
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1460
1461
tmp11 = z2 + z4;
1462
tmp13 = z2 - z4;
1463
1464
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1465
z5 = z3 << CONST_BITS;
1466
1467
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1468
z4 = z5 + tmp12;
1469
1470
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1471
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1472
1473
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1474
z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1475
1476
#if PASS1_BITS > 0
1477
tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1478
#else
1479
tmp12 = z1 - tmp13 - z3;
1480
#endif
1481
1482
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1483
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1484
1485
/* Final output stage */
1486
1487
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1488
wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1489
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1490
wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1491
wsptr[8*2] = (int) (tmp22 + tmp12);
1492
wsptr[8*7] = (int) (tmp22 - tmp12);
1493
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1494
wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1495
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1496
wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1497
}
1498
1499
/* Pass 2: process 10 rows from work array, store into output array. */
1500
1501
wsptr = workspace;
1502
for (ctr = 0; ctr < 10; ctr++) {
1503
outptr = output_buf[ctr] + output_col;
1504
1505
/* Even part */
1506
1507
/* Add range center and fudge factor for final descale and range-limit. */
1508
z3 = (INT32) wsptr[0] + PASS2_OFFSET;
1509
z3 <<= CONST_BITS;
1510
#if PASS2_BITS == 0
1511
z3 += ONE << (CONST_BITS-1);
1512
#endif
1513
z4 = (INT32) wsptr[4];
1514
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1515
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1516
tmp10 = z3 + z1;
1517
tmp11 = z3 - z2;
1518
1519
tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1520
1521
z2 = (INT32) wsptr[2];
1522
z3 = (INT32) wsptr[6];
1523
1524
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1525
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1526
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1527
1528
tmp20 = tmp10 + tmp12;
1529
tmp24 = tmp10 - tmp12;
1530
tmp21 = tmp11 + tmp13;
1531
tmp23 = tmp11 - tmp13;
1532
1533
/* Odd part */
1534
1535
z1 = (INT32) wsptr[1];
1536
z2 = (INT32) wsptr[3];
1537
z3 = (INT32) wsptr[5];
1538
z3 <<= CONST_BITS;
1539
z4 = (INT32) wsptr[7];
1540
1541
tmp11 = z2 + z4;
1542
tmp13 = z2 - z4;
1543
1544
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1545
1546
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1547
z4 = z3 + tmp12;
1548
1549
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1550
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1551
1552
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1553
z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1554
1555
tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1556
1557
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1558
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1559
1560
/* Final output stage */
1561
1562
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1563
CONST_BITS+PASS2_BITS)
1564
& RANGE_MASK];
1565
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1566
CONST_BITS+PASS2_BITS)
1567
& RANGE_MASK];
1568
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1569
CONST_BITS+PASS2_BITS)
1570
& RANGE_MASK];
1571
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1572
CONST_BITS+PASS2_BITS)
1573
& RANGE_MASK];
1574
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1575
CONST_BITS+PASS2_BITS)
1576
& RANGE_MASK];
1577
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1578
CONST_BITS+PASS2_BITS)
1579
& RANGE_MASK];
1580
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1581
CONST_BITS+PASS2_BITS)
1582
& RANGE_MASK];
1583
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1584
CONST_BITS+PASS2_BITS)
1585
& RANGE_MASK];
1586
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1587
CONST_BITS+PASS2_BITS)
1588
& RANGE_MASK];
1589
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1590
CONST_BITS+PASS2_BITS)
1591
& RANGE_MASK];
1592
1593
wsptr += 8; /* advance pointer to next row */
1594
}
1595
}
1596
1597
1598
/*
1599
* Perform dequantization and inverse DCT on one block of coefficients,
1600
* producing an 11x11 output block.
1601
*
1602
* Optimized algorithm with 24 multiplications in the 1-D kernel.
1603
* cK represents sqrt(2) * cos(K*pi/22).
1604
*/
1605
1606
GLOBAL(void)
1607
jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1608
JCOEFPTR coef_block,
1609
JSAMPARRAY output_buf, JDIMENSION output_col)
1610
{
1611
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1612
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1613
INT32 z1, z2, z3, z4;
1614
JCOEFPTR inptr;
1615
ISLOW_MULT_TYPE * quantptr;
1616
int * wsptr;
1617
JSAMPROW outptr;
1618
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1619
int ctr;
1620
int workspace[8*11]; /* buffers data between passes */
1621
SHIFT_TEMPS
1622
1623
/* Pass 1: process columns from input, store into work array. */
1624
1625
inptr = coef_block;
1626
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1627
wsptr = workspace;
1628
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1629
/* Even part */
1630
1631
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1632
tmp10 <<= CONST_BITS;
1633
/* Add fudge factor here for final descale. */
1634
tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1635
1636
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1637
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1638
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1639
1640
tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1641
tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1642
z4 = z1 + z3;
1643
tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1644
z4 -= z2;
1645
tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1646
tmp21 = tmp20 + tmp23 + tmp25 -
1647
MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1648
tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1649
tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1650
tmp24 += tmp25;
1651
tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1652
tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1653
MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1654
tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1655
1656
/* Odd part */
1657
1658
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1659
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1660
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1661
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1662
1663
tmp11 = z1 + z2;
1664
tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1665
tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1666
tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1667
tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1668
tmp10 = tmp11 + tmp12 + tmp13 -
1669
MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1670
z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1671
tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1672
tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1673
z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1674
tmp11 += z1;
1675
tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1676
tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1677
MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1678
MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1679
1680
/* Final output stage */
1681
1682
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1683
wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1684
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1685
wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1686
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1687
wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1688
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1689
wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1690
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1691
wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1692
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1693
}
1694
1695
/* Pass 2: process 11 rows from work array, store into output array. */
1696
1697
wsptr = workspace;
1698
for (ctr = 0; ctr < 11; ctr++) {
1699
outptr = output_buf[ctr] + output_col;
1700
1701
/* Even part */
1702
1703
/* Add range center and fudge factor for final descale and range-limit. */
1704
tmp10 = (INT32) wsptr[0] + PASS2_OFFSET;
1705
tmp10 <<= CONST_BITS;
1706
#if PASS2_BITS == 0
1707
tmp10 += ONE << (CONST_BITS-1);
1708
#endif
1709
1710
z1 = (INT32) wsptr[2];
1711
z2 = (INT32) wsptr[4];
1712
z3 = (INT32) wsptr[6];
1713
1714
tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1715
tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1716
z4 = z1 + z3;
1717
tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1718
z4 -= z2;
1719
tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1720
tmp21 = tmp20 + tmp23 + tmp25 -
1721
MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1722
tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1723
tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1724
tmp24 += tmp25;
1725
tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1726
tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1727
MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1728
tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1729
1730
/* Odd part */
1731
1732
z1 = (INT32) wsptr[1];
1733
z2 = (INT32) wsptr[3];
1734
z3 = (INT32) wsptr[5];
1735
z4 = (INT32) wsptr[7];
1736
1737
tmp11 = z1 + z2;
1738
tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1739
tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1740
tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1741
tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1742
tmp10 = tmp11 + tmp12 + tmp13 -
1743
MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1744
z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1745
tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1746
tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1747
z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1748
tmp11 += z1;
1749
tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1750
tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1751
MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1752
MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1753
1754
/* Final output stage */
1755
1756
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1757
CONST_BITS+PASS2_BITS)
1758
& RANGE_MASK];
1759
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1760
CONST_BITS+PASS2_BITS)
1761
& RANGE_MASK];
1762
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1763
CONST_BITS+PASS2_BITS)
1764
& RANGE_MASK];
1765
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1766
CONST_BITS+PASS2_BITS)
1767
& RANGE_MASK];
1768
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1769
CONST_BITS+PASS2_BITS)
1770
& RANGE_MASK];
1771
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1772
CONST_BITS+PASS2_BITS)
1773
& RANGE_MASK];
1774
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1775
CONST_BITS+PASS2_BITS)
1776
& RANGE_MASK];
1777
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1778
CONST_BITS+PASS2_BITS)
1779
& RANGE_MASK];
1780
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1781
CONST_BITS+PASS2_BITS)
1782
& RANGE_MASK];
1783
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1784
CONST_BITS+PASS2_BITS)
1785
& RANGE_MASK];
1786
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1787
CONST_BITS+PASS2_BITS)
1788
& RANGE_MASK];
1789
1790
wsptr += 8; /* advance pointer to next row */
1791
}
1792
}
1793
1794
1795
/*
1796
* Perform dequantization and inverse DCT on one block of coefficients,
1797
* producing a 12x12 output block.
1798
*
1799
* Optimized algorithm with 15 multiplications in the 1-D kernel.
1800
* cK represents sqrt(2) * cos(K*pi/24).
1801
*/
1802
1803
GLOBAL(void)
1804
jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1805
JCOEFPTR coef_block,
1806
JSAMPARRAY output_buf, JDIMENSION output_col)
1807
{
1808
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1809
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1810
INT32 z1, z2, z3, z4;
1811
JCOEFPTR inptr;
1812
ISLOW_MULT_TYPE * quantptr;
1813
int * wsptr;
1814
JSAMPROW outptr;
1815
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1816
int ctr;
1817
int workspace[8*12]; /* buffers data between passes */
1818
SHIFT_TEMPS
1819
1820
/* Pass 1: process columns from input, store into work array. */
1821
1822
inptr = coef_block;
1823
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1824
wsptr = workspace;
1825
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1826
/* Even part */
1827
1828
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1829
z3 <<= CONST_BITS;
1830
/* Add fudge factor here for final descale. */
1831
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1832
1833
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1834
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1835
1836
tmp10 = z3 + z4;
1837
tmp11 = z3 - z4;
1838
1839
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1840
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1841
z1 <<= CONST_BITS;
1842
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1843
z2 <<= CONST_BITS;
1844
1845
tmp12 = z1 - z2;
1846
1847
tmp21 = z3 + tmp12;
1848
tmp24 = z3 - tmp12;
1849
1850
tmp12 = z4 + z2;
1851
1852
tmp20 = tmp10 + tmp12;
1853
tmp25 = tmp10 - tmp12;
1854
1855
tmp12 = z4 - z1 - z2;
1856
1857
tmp22 = tmp11 + tmp12;
1858
tmp23 = tmp11 - tmp12;
1859
1860
/* Odd part */
1861
1862
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1863
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1864
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1865
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1866
1867
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1868
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1869
1870
tmp10 = z1 + z3;
1871
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1872
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1873
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1874
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1875
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1876
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1877
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1878
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1879
1880
z1 -= z4;
1881
z2 -= z3;
1882
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1883
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1884
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1885
1886
/* Final output stage */
1887
1888
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1889
wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1890
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1891
wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1892
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1893
wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1894
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1895
wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1896
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1897
wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1898
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1899
wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1900
}
1901
1902
/* Pass 2: process 12 rows from work array, store into output array. */
1903
1904
wsptr = workspace;
1905
for (ctr = 0; ctr < 12; ctr++) {
1906
outptr = output_buf[ctr] + output_col;
1907
1908
/* Even part */
1909
1910
/* Add range center and fudge factor for final descale and range-limit. */
1911
z3 = (INT32) wsptr[0] + PASS2_OFFSET;
1912
z3 <<= CONST_BITS;
1913
#if PASS2_BITS == 0
1914
z3 += ONE << (CONST_BITS-1);
1915
#endif
1916
1917
z4 = (INT32) wsptr[4];
1918
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1919
1920
tmp10 = z3 + z4;
1921
tmp11 = z3 - z4;
1922
1923
z1 = (INT32) wsptr[2];
1924
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1925
z1 <<= CONST_BITS;
1926
z2 = (INT32) wsptr[6];
1927
z2 <<= CONST_BITS;
1928
1929
tmp12 = z1 - z2;
1930
1931
tmp21 = z3 + tmp12;
1932
tmp24 = z3 - tmp12;
1933
1934
tmp12 = z4 + z2;
1935
1936
tmp20 = tmp10 + tmp12;
1937
tmp25 = tmp10 - tmp12;
1938
1939
tmp12 = z4 - z1 - z2;
1940
1941
tmp22 = tmp11 + tmp12;
1942
tmp23 = tmp11 - tmp12;
1943
1944
/* Odd part */
1945
1946
z1 = (INT32) wsptr[1];
1947
z2 = (INT32) wsptr[3];
1948
z3 = (INT32) wsptr[5];
1949
z4 = (INT32) wsptr[7];
1950
1951
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1952
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1953
1954
tmp10 = z1 + z3;
1955
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1956
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1957
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1958
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1959
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1960
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1961
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1962
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1963
1964
z1 -= z4;
1965
z2 -= z3;
1966
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1967
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1968
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1969
1970
/* Final output stage */
1971
1972
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1973
CONST_BITS+PASS2_BITS)
1974
& RANGE_MASK];
1975
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1976
CONST_BITS+PASS2_BITS)
1977
& RANGE_MASK];
1978
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1979
CONST_BITS+PASS2_BITS)
1980
& RANGE_MASK];
1981
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1982
CONST_BITS+PASS2_BITS)
1983
& RANGE_MASK];
1984
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1985
CONST_BITS+PASS2_BITS)
1986
& RANGE_MASK];
1987
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1988
CONST_BITS+PASS2_BITS)
1989
& RANGE_MASK];
1990
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1991
CONST_BITS+PASS2_BITS)
1992
& RANGE_MASK];
1993
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1994
CONST_BITS+PASS2_BITS)
1995
& RANGE_MASK];
1996
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1997
CONST_BITS+PASS2_BITS)
1998
& RANGE_MASK];
1999
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2000
CONST_BITS+PASS2_BITS)
2001
& RANGE_MASK];
2002
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2003
CONST_BITS+PASS2_BITS)
2004
& RANGE_MASK];
2005
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2006
CONST_BITS+PASS2_BITS)
2007
& RANGE_MASK];
2008
2009
wsptr += 8; /* advance pointer to next row */
2010
}
2011
}
2012
2013
2014
/*
2015
* Perform dequantization and inverse DCT on one block of coefficients,
2016
* producing a 13x13 output block.
2017
*
2018
* Optimized algorithm with 29 multiplications in the 1-D kernel.
2019
* cK represents sqrt(2) * cos(K*pi/26).
2020
*/
2021
2022
GLOBAL(void)
2023
jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2024
JCOEFPTR coef_block,
2025
JSAMPARRAY output_buf, JDIMENSION output_col)
2026
{
2027
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
2028
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2029
INT32 z1, z2, z3, z4;
2030
JCOEFPTR inptr;
2031
ISLOW_MULT_TYPE * quantptr;
2032
int * wsptr;
2033
JSAMPROW outptr;
2034
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2035
int ctr;
2036
int workspace[8*13]; /* buffers data between passes */
2037
SHIFT_TEMPS
2038
2039
/* Pass 1: process columns from input, store into work array. */
2040
2041
inptr = coef_block;
2042
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2043
wsptr = workspace;
2044
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2045
/* Even part */
2046
2047
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2048
z1 <<= CONST_BITS;
2049
/* Add fudge factor here for final descale. */
2050
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2051
2052
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2053
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2054
z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2055
2056
tmp10 = z3 + z4;
2057
tmp11 = z3 - z4;
2058
2059
tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2060
tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2061
2062
tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2063
tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2064
2065
tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2066
tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2067
2068
tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2069
tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2070
2071
tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2072
tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2073
2074
tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2075
tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2076
2077
tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2078
2079
/* Odd part */
2080
2081
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2082
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2083
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2084
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2085
2086
tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2087
tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2088
tmp15 = z1 + z4;
2089
tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2090
tmp10 = tmp11 + tmp12 + tmp13 -
2091
MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2092
tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2093
tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2094
tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2095
tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2096
tmp11 += tmp14;
2097
tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2098
tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2099
tmp12 += tmp14;
2100
tmp13 += tmp14;
2101
tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2102
tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2103
MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2104
z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2105
tmp14 += z1;
2106
tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2107
MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2108
2109
/* Final output stage */
2110
2111
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2112
wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2113
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2114
wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2115
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2116
wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2117
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2118
wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2119
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2120
wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2121
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2122
wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2123
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
2124
}
2125
2126
/* Pass 2: process 13 rows from work array, store into output array. */
2127
2128
wsptr = workspace;
2129
for (ctr = 0; ctr < 13; ctr++) {
2130
outptr = output_buf[ctr] + output_col;
2131
2132
/* Even part */
2133
2134
/* Add range center and fudge factor for final descale and range-limit. */
2135
z1 = (INT32) wsptr[0] + PASS2_OFFSET;
2136
z1 <<= CONST_BITS;
2137
#if PASS2_BITS == 0
2138
z1 += ONE << (CONST_BITS-1);
2139
#endif
2140
2141
z2 = (INT32) wsptr[2];
2142
z3 = (INT32) wsptr[4];
2143
z4 = (INT32) wsptr[6];
2144
2145
tmp10 = z3 + z4;
2146
tmp11 = z3 - z4;
2147
2148
tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2149
tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2150
2151
tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2152
tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2153
2154
tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2155
tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2156
2157
tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2158
tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2159
2160
tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2161
tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2162
2163
tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2164
tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2165
2166
tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2167
2168
/* Odd part */
2169
2170
z1 = (INT32) wsptr[1];
2171
z2 = (INT32) wsptr[3];
2172
z3 = (INT32) wsptr[5];
2173
z4 = (INT32) wsptr[7];
2174
2175
tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2176
tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2177
tmp15 = z1 + z4;
2178
tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2179
tmp10 = tmp11 + tmp12 + tmp13 -
2180
MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2181
tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2182
tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2183
tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2184
tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2185
tmp11 += tmp14;
2186
tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2187
tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2188
tmp12 += tmp14;
2189
tmp13 += tmp14;
2190
tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2191
tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2192
MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2193
z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2194
tmp14 += z1;
2195
tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2196
MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2197
2198
/* Final output stage */
2199
2200
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2201
CONST_BITS+PASS2_BITS)
2202
& RANGE_MASK];
2203
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2204
CONST_BITS+PASS2_BITS)
2205
& RANGE_MASK];
2206
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2207
CONST_BITS+PASS2_BITS)
2208
& RANGE_MASK];
2209
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2210
CONST_BITS+PASS2_BITS)
2211
& RANGE_MASK];
2212
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2213
CONST_BITS+PASS2_BITS)
2214
& RANGE_MASK];
2215
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2216
CONST_BITS+PASS2_BITS)
2217
& RANGE_MASK];
2218
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2219
CONST_BITS+PASS2_BITS)
2220
& RANGE_MASK];
2221
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2222
CONST_BITS+PASS2_BITS)
2223
& RANGE_MASK];
2224
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2225
CONST_BITS+PASS2_BITS)
2226
& RANGE_MASK];
2227
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2228
CONST_BITS+PASS2_BITS)
2229
& RANGE_MASK];
2230
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2231
CONST_BITS+PASS2_BITS)
2232
& RANGE_MASK];
2233
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2234
CONST_BITS+PASS2_BITS)
2235
& RANGE_MASK];
2236
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
2237
CONST_BITS+PASS2_BITS)
2238
& RANGE_MASK];
2239
2240
wsptr += 8; /* advance pointer to next row */
2241
}
2242
}
2243
2244
2245
/*
2246
* Perform dequantization and inverse DCT on one block of coefficients,
2247
* producing a 14x14 output block.
2248
*
2249
* Optimized algorithm with 20 multiplications in the 1-D kernel.
2250
* cK represents sqrt(2) * cos(K*pi/28).
2251
*/
2252
2253
GLOBAL(void)
2254
jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2255
JCOEFPTR coef_block,
2256
JSAMPARRAY output_buf, JDIMENSION output_col)
2257
{
2258
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2259
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2260
INT32 z1, z2, z3, z4;
2261
JCOEFPTR inptr;
2262
ISLOW_MULT_TYPE * quantptr;
2263
int * wsptr;
2264
JSAMPROW outptr;
2265
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2266
int ctr;
2267
int workspace[8*14]; /* buffers data between passes */
2268
SHIFT_TEMPS
2269
2270
/* Pass 1: process columns from input, store into work array. */
2271
2272
inptr = coef_block;
2273
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2274
wsptr = workspace;
2275
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2276
/* Even part */
2277
2278
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2279
z1 <<= CONST_BITS;
2280
/* Add fudge factor here for final descale. */
2281
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2282
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2283
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2284
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2285
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2286
2287
tmp10 = z1 + z2;
2288
tmp11 = z1 + z3;
2289
tmp12 = z1 - z4;
2290
2291
tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2292
CONST_BITS-PASS1_BITS);
2293
2294
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2295
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2296
2297
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2298
2299
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2300
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2301
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2302
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2303
2304
tmp20 = tmp10 + tmp13;
2305
tmp26 = tmp10 - tmp13;
2306
tmp21 = tmp11 + tmp14;
2307
tmp25 = tmp11 - tmp14;
2308
tmp22 = tmp12 + tmp15;
2309
tmp24 = tmp12 - tmp15;
2310
2311
/* Odd part */
2312
2313
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2314
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2315
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2316
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2317
tmp13 = z4 << CONST_BITS;
2318
2319
tmp14 = z1 + z3;
2320
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2321
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2322
tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2323
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2324
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2325
z1 -= z2;
2326
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
2327
tmp16 += tmp15;
2328
z1 += z4;
2329
z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2330
tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2331
tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2332
z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2333
tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2334
tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2335
2336
#if PASS1_BITS > 0
2337
tmp13 = (z1 - z3) << PASS1_BITS;
2338
#else
2339
tmp13 = z1 - z3;
2340
#endif
2341
2342
/* Final output stage */
2343
2344
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2345
wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2346
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2347
wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2348
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2349
wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2350
wsptr[8*3] = (int) (tmp23 + tmp13);
2351
wsptr[8*10] = (int) (tmp23 - tmp13);
2352
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2353
wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2354
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2355
wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2356
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2357
wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2358
}
2359
2360
/* Pass 2: process 14 rows from work array, store into output array. */
2361
2362
wsptr = workspace;
2363
for (ctr = 0; ctr < 14; ctr++) {
2364
outptr = output_buf[ctr] + output_col;
2365
2366
/* Even part */
2367
2368
/* Add range center and fudge factor for final descale and range-limit. */
2369
z1 = (INT32) wsptr[0] + PASS2_OFFSET;
2370
z1 <<= CONST_BITS;
2371
#if PASS2_BITS == 0
2372
z1 += ONE << (CONST_BITS-1);
2373
#endif
2374
z4 = (INT32) wsptr[4];
2375
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2376
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2377
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2378
2379
tmp10 = z1 + z2;
2380
tmp11 = z1 + z3;
2381
tmp12 = z1 - z4;
2382
2383
tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2384
2385
z1 = (INT32) wsptr[2];
2386
z2 = (INT32) wsptr[6];
2387
2388
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2389
2390
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2391
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2392
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2393
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2394
2395
tmp20 = tmp10 + tmp13;
2396
tmp26 = tmp10 - tmp13;
2397
tmp21 = tmp11 + tmp14;
2398
tmp25 = tmp11 - tmp14;
2399
tmp22 = tmp12 + tmp15;
2400
tmp24 = tmp12 - tmp15;
2401
2402
/* Odd part */
2403
2404
z1 = (INT32) wsptr[1];
2405
z2 = (INT32) wsptr[3];
2406
z3 = (INT32) wsptr[5];
2407
z4 = (INT32) wsptr[7];
2408
z4 <<= CONST_BITS;
2409
2410
tmp14 = z1 + z3;
2411
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2412
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2413
tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2414
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2415
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2416
z1 -= z2;
2417
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2418
tmp16 += tmp15;
2419
tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2420
tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2421
tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2422
tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2423
tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2424
tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2425
2426
tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2427
2428
/* Final output stage */
2429
2430
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2431
CONST_BITS+PASS2_BITS)
2432
& RANGE_MASK];
2433
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2434
CONST_BITS+PASS2_BITS)
2435
& RANGE_MASK];
2436
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2437
CONST_BITS+PASS2_BITS)
2438
& RANGE_MASK];
2439
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2440
CONST_BITS+PASS2_BITS)
2441
& RANGE_MASK];
2442
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2443
CONST_BITS+PASS2_BITS)
2444
& RANGE_MASK];
2445
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2446
CONST_BITS+PASS2_BITS)
2447
& RANGE_MASK];
2448
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2449
CONST_BITS+PASS2_BITS)
2450
& RANGE_MASK];
2451
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2452
CONST_BITS+PASS2_BITS)
2453
& RANGE_MASK];
2454
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2455
CONST_BITS+PASS2_BITS)
2456
& RANGE_MASK];
2457
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2458
CONST_BITS+PASS2_BITS)
2459
& RANGE_MASK];
2460
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2461
CONST_BITS+PASS2_BITS)
2462
& RANGE_MASK];
2463
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2464
CONST_BITS+PASS2_BITS)
2465
& RANGE_MASK];
2466
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2467
CONST_BITS+PASS2_BITS)
2468
& RANGE_MASK];
2469
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2470
CONST_BITS+PASS2_BITS)
2471
& RANGE_MASK];
2472
2473
wsptr += 8; /* advance pointer to next row */
2474
}
2475
}
2476
2477
2478
/*
2479
* Perform dequantization and inverse DCT on one block of coefficients,
2480
* producing a 15x15 output block.
2481
*
2482
* Optimized algorithm with 22 multiplications in the 1-D kernel.
2483
* cK represents sqrt(2) * cos(K*pi/30).
2484
*/
2485
2486
GLOBAL(void)
2487
jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2488
JCOEFPTR coef_block,
2489
JSAMPARRAY output_buf, JDIMENSION output_col)
2490
{
2491
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2492
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2493
INT32 z1, z2, z3, z4;
2494
JCOEFPTR inptr;
2495
ISLOW_MULT_TYPE * quantptr;
2496
int * wsptr;
2497
JSAMPROW outptr;
2498
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2499
int ctr;
2500
int workspace[8*15]; /* buffers data between passes */
2501
SHIFT_TEMPS
2502
2503
/* Pass 1: process columns from input, store into work array. */
2504
2505
inptr = coef_block;
2506
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2507
wsptr = workspace;
2508
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2509
/* Even part */
2510
2511
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2512
z1 <<= CONST_BITS;
2513
/* Add fudge factor here for final descale. */
2514
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2515
2516
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2517
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2518
z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2519
2520
tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2521
tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2522
2523
tmp12 = z1 - tmp10;
2524
tmp13 = z1 + tmp11;
2525
z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2526
2527
z4 = z2 - z3;
2528
z3 += z2;
2529
tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2530
tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2531
z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2532
2533
tmp20 = tmp13 + tmp10 + tmp11;
2534
tmp23 = tmp12 - tmp10 + tmp11 + z2;
2535
2536
tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2537
tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2538
2539
tmp25 = tmp13 - tmp10 - tmp11;
2540
tmp26 = tmp12 + tmp10 - tmp11 - z2;
2541
2542
tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2543
tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2544
2545
tmp21 = tmp12 + tmp10 + tmp11;
2546
tmp24 = tmp13 - tmp10 + tmp11;
2547
tmp11 += tmp11;
2548
tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2549
tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2550
2551
/* Odd part */
2552
2553
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2554
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2555
z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2556
z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2557
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2558
2559
tmp13 = z2 - z4;
2560
tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2561
tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2562
tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2563
2564
tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2565
tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2566
z2 = z1 - z4;
2567
tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2568
2569
tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2570
tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2571
tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2572
z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2573
tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2574
tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2575
2576
/* Final output stage */
2577
2578
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2579
wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2580
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2581
wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2582
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2583
wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2584
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2585
wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2586
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2587
wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2588
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2589
wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2590
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2591
wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2592
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2593
}
2594
2595
/* Pass 2: process 15 rows from work array, store into output array. */
2596
2597
wsptr = workspace;
2598
for (ctr = 0; ctr < 15; ctr++) {
2599
outptr = output_buf[ctr] + output_col;
2600
2601
/* Even part */
2602
2603
/* Add range center and fudge factor for final descale and range-limit. */
2604
z1 = (INT32) wsptr[0] + PASS2_OFFSET;
2605
z1 <<= CONST_BITS;
2606
#if PASS2_BITS == 0
2607
z1 += ONE << (CONST_BITS-1);
2608
#endif
2609
2610
z2 = (INT32) wsptr[2];
2611
z3 = (INT32) wsptr[4];
2612
z4 = (INT32) wsptr[6];
2613
2614
tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2615
tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2616
2617
tmp12 = z1 - tmp10;
2618
tmp13 = z1 + tmp11;
2619
z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2620
2621
z4 = z2 - z3;
2622
z3 += z2;
2623
tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2624
tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2625
z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2626
2627
tmp20 = tmp13 + tmp10 + tmp11;
2628
tmp23 = tmp12 - tmp10 + tmp11 + z2;
2629
2630
tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2631
tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2632
2633
tmp25 = tmp13 - tmp10 - tmp11;
2634
tmp26 = tmp12 + tmp10 - tmp11 - z2;
2635
2636
tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2637
tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2638
2639
tmp21 = tmp12 + tmp10 + tmp11;
2640
tmp24 = tmp13 - tmp10 + tmp11;
2641
tmp11 += tmp11;
2642
tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2643
tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2644
2645
/* Odd part */
2646
2647
z1 = (INT32) wsptr[1];
2648
z2 = (INT32) wsptr[3];
2649
z4 = (INT32) wsptr[5];
2650
z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2651
z4 = (INT32) wsptr[7];
2652
2653
tmp13 = z2 - z4;
2654
tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2655
tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2656
tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2657
2658
tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2659
tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2660
z2 = z1 - z4;
2661
tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2662
2663
tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2664
tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2665
tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2666
z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2667
tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2668
tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2669
2670
/* Final output stage */
2671
2672
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2673
CONST_BITS+PASS2_BITS)
2674
& RANGE_MASK];
2675
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2676
CONST_BITS+PASS2_BITS)
2677
& RANGE_MASK];
2678
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2679
CONST_BITS+PASS2_BITS)
2680
& RANGE_MASK];
2681
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2682
CONST_BITS+PASS2_BITS)
2683
& RANGE_MASK];
2684
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2685
CONST_BITS+PASS2_BITS)
2686
& RANGE_MASK];
2687
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2688
CONST_BITS+PASS2_BITS)
2689
& RANGE_MASK];
2690
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2691
CONST_BITS+PASS2_BITS)
2692
& RANGE_MASK];
2693
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2694
CONST_BITS+PASS2_BITS)
2695
& RANGE_MASK];
2696
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2697
CONST_BITS+PASS2_BITS)
2698
& RANGE_MASK];
2699
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2700
CONST_BITS+PASS2_BITS)
2701
& RANGE_MASK];
2702
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2703
CONST_BITS+PASS2_BITS)
2704
& RANGE_MASK];
2705
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2706
CONST_BITS+PASS2_BITS)
2707
& RANGE_MASK];
2708
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2709
CONST_BITS+PASS2_BITS)
2710
& RANGE_MASK];
2711
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2712
CONST_BITS+PASS2_BITS)
2713
& RANGE_MASK];
2714
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2715
CONST_BITS+PASS2_BITS)
2716
& RANGE_MASK];
2717
2718
wsptr += 8; /* advance pointer to next row */
2719
}
2720
}
2721
2722
2723
/*
2724
* Perform dequantization and inverse DCT on one block of coefficients,
2725
* producing a 16x16 output block.
2726
*
2727
* Optimized algorithm with 28 multiplications in the 1-D kernel.
2728
* cK represents sqrt(2) * cos(K*pi/32).
2729
*/
2730
2731
GLOBAL(void)
2732
jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2733
JCOEFPTR coef_block,
2734
JSAMPARRAY output_buf, JDIMENSION output_col)
2735
{
2736
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2737
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2738
INT32 z1, z2, z3, z4;
2739
JCOEFPTR inptr;
2740
ISLOW_MULT_TYPE * quantptr;
2741
int * wsptr;
2742
JSAMPROW outptr;
2743
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2744
int ctr;
2745
int workspace[8*16]; /* buffers data between passes */
2746
SHIFT_TEMPS
2747
2748
/* Pass 1: process columns from input, store into work array. */
2749
2750
inptr = coef_block;
2751
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2752
wsptr = workspace;
2753
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2754
/* Even part */
2755
2756
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2757
tmp0 <<= CONST_BITS;
2758
/* Add fudge factor here for final descale. */
2759
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2760
2761
z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2762
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2763
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2764
2765
tmp10 = tmp0 + tmp1;
2766
tmp11 = tmp0 - tmp1;
2767
tmp12 = tmp0 + tmp2;
2768
tmp13 = tmp0 - tmp2;
2769
2770
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2771
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2772
z3 = z1 - z2;
2773
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2774
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2775
2776
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2777
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2778
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2779
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2780
2781
tmp20 = tmp10 + tmp0;
2782
tmp27 = tmp10 - tmp0;
2783
tmp21 = tmp12 + tmp1;
2784
tmp26 = tmp12 - tmp1;
2785
tmp22 = tmp13 + tmp2;
2786
tmp25 = tmp13 - tmp2;
2787
tmp23 = tmp11 + tmp3;
2788
tmp24 = tmp11 - tmp3;
2789
2790
/* Odd part */
2791
2792
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2793
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2794
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2795
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2796
2797
tmp11 = z1 + z3;
2798
2799
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2800
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2801
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2802
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2803
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2804
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2805
tmp0 = tmp1 + tmp2 + tmp3 -
2806
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2807
tmp13 = tmp10 + tmp11 + tmp12 -
2808
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2809
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2810
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2811
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2812
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2813
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2814
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2815
z2 += z4;
2816
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2817
tmp1 += z1;
2818
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2819
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2820
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2821
tmp12 += z2;
2822
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2823
tmp2 += z2;
2824
tmp3 += z2;
2825
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2826
tmp10 += z2;
2827
tmp11 += z2;
2828
2829
/* Final output stage */
2830
2831
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2832
wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2833
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2834
wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2835
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2836
wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2837
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2838
wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2839
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2840
wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2841
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2842
wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2843
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2844
wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2845
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2846
wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2847
}
2848
2849
/* Pass 2: process 16 rows from work array, store into output array. */
2850
2851
wsptr = workspace;
2852
for (ctr = 0; ctr < 16; ctr++) {
2853
outptr = output_buf[ctr] + output_col;
2854
2855
/* Even part */
2856
2857
/* Add range center and fudge factor for final descale and range-limit. */
2858
tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
2859
tmp0 <<= CONST_BITS;
2860
#if PASS2_BITS == 0
2861
tmp0 += ONE << (CONST_BITS-1);
2862
#endif
2863
2864
z1 = (INT32) wsptr[4];
2865
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2866
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2867
2868
tmp10 = tmp0 + tmp1;
2869
tmp11 = tmp0 - tmp1;
2870
tmp12 = tmp0 + tmp2;
2871
tmp13 = tmp0 - tmp2;
2872
2873
z1 = (INT32) wsptr[2];
2874
z2 = (INT32) wsptr[6];
2875
z3 = z1 - z2;
2876
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2877
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2878
2879
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2880
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2881
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2882
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2883
2884
tmp20 = tmp10 + tmp0;
2885
tmp27 = tmp10 - tmp0;
2886
tmp21 = tmp12 + tmp1;
2887
tmp26 = tmp12 - tmp1;
2888
tmp22 = tmp13 + tmp2;
2889
tmp25 = tmp13 - tmp2;
2890
tmp23 = tmp11 + tmp3;
2891
tmp24 = tmp11 - tmp3;
2892
2893
/* Odd part */
2894
2895
z1 = (INT32) wsptr[1];
2896
z2 = (INT32) wsptr[3];
2897
z3 = (INT32) wsptr[5];
2898
z4 = (INT32) wsptr[7];
2899
2900
tmp11 = z1 + z3;
2901
2902
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2903
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2904
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2905
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2906
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2907
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2908
tmp0 = tmp1 + tmp2 + tmp3 -
2909
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2910
tmp13 = tmp10 + tmp11 + tmp12 -
2911
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2912
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2913
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2914
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2915
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2916
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2917
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2918
z2 += z4;
2919
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2920
tmp1 += z1;
2921
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2922
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2923
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2924
tmp12 += z2;
2925
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2926
tmp2 += z2;
2927
tmp3 += z2;
2928
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2929
tmp10 += z2;
2930
tmp11 += z2;
2931
2932
/* Final output stage */
2933
2934
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2935
CONST_BITS+PASS2_BITS)
2936
& RANGE_MASK];
2937
outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2938
CONST_BITS+PASS2_BITS)
2939
& RANGE_MASK];
2940
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2941
CONST_BITS+PASS2_BITS)
2942
& RANGE_MASK];
2943
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2944
CONST_BITS+PASS2_BITS)
2945
& RANGE_MASK];
2946
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2947
CONST_BITS+PASS2_BITS)
2948
& RANGE_MASK];
2949
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2950
CONST_BITS+PASS2_BITS)
2951
& RANGE_MASK];
2952
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2953
CONST_BITS+PASS2_BITS)
2954
& RANGE_MASK];
2955
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2956
CONST_BITS+PASS2_BITS)
2957
& RANGE_MASK];
2958
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2959
CONST_BITS+PASS2_BITS)
2960
& RANGE_MASK];
2961
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2962
CONST_BITS+PASS2_BITS)
2963
& RANGE_MASK];
2964
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2965
CONST_BITS+PASS2_BITS)
2966
& RANGE_MASK];
2967
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2968
CONST_BITS+PASS2_BITS)
2969
& RANGE_MASK];
2970
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2971
CONST_BITS+PASS2_BITS)
2972
& RANGE_MASK];
2973
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2974
CONST_BITS+PASS2_BITS)
2975
& RANGE_MASK];
2976
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2977
CONST_BITS+PASS2_BITS)
2978
& RANGE_MASK];
2979
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2980
CONST_BITS+PASS2_BITS)
2981
& RANGE_MASK];
2982
2983
wsptr += 8; /* advance pointer to next row */
2984
}
2985
}
2986
2987
2988
/*
2989
* Perform dequantization and inverse DCT on one block of coefficients,
2990
* producing a 16x8 output block.
2991
*
2992
* 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2993
*/
2994
2995
GLOBAL(void)
2996
jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2997
JCOEFPTR coef_block,
2998
JSAMPARRAY output_buf, JDIMENSION output_col)
2999
{
3000
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
3001
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
3002
INT32 z1, z2, z3, z4;
3003
JCOEFPTR inptr;
3004
ISLOW_MULT_TYPE * quantptr;
3005
int * wsptr;
3006
JSAMPROW outptr;
3007
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3008
int ctr;
3009
int workspace[8*8]; /* buffers data between passes */
3010
SHIFT_TEMPS
3011
3012
/* Pass 1: process columns from input, store into work array.
3013
* Note results are scaled up by sqrt(8) compared to a true IDCT;
3014
* furthermore, we scale the results by 2**PASS1_BITS.
3015
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3016
*/
3017
3018
inptr = coef_block;
3019
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3020
wsptr = workspace;
3021
for (ctr = DCTSIZE; ctr > 0; ctr--) {
3022
/* Due to quantization, we will usually find that many of the input
3023
* coefficients are zero, especially the AC terms. We can exploit this
3024
* by short-circuiting the IDCT calculation for any column in which all
3025
* the AC terms are zero. In that case each output is equal to the
3026
* DC coefficient (with scale factor as needed).
3027
* With typical images and quantization tables, half or more of the
3028
* column DCT calculations can be simplified this way.
3029
*/
3030
3031
if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
3032
inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
3033
inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
3034
inptr[DCTSIZE*7] == 0) {
3035
/* AC terms all zero */
3036
#if PASS1_BITS > 0
3037
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
3038
#else
3039
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3040
#endif
3041
3042
wsptr[DCTSIZE*0] = dcval;
3043
wsptr[DCTSIZE*1] = dcval;
3044
wsptr[DCTSIZE*2] = dcval;
3045
wsptr[DCTSIZE*3] = dcval;
3046
wsptr[DCTSIZE*4] = dcval;
3047
wsptr[DCTSIZE*5] = dcval;
3048
wsptr[DCTSIZE*6] = dcval;
3049
wsptr[DCTSIZE*7] = dcval;
3050
3051
inptr++; /* advance pointers to next column */
3052
quantptr++;
3053
wsptr++;
3054
continue;
3055
}
3056
3057
/* Even part: reverse the even part of the forward DCT.
3058
* The rotator is c(-6).
3059
*/
3060
3061
z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3062
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3063
z2 <<= CONST_BITS;
3064
z3 <<= CONST_BITS;
3065
/* Add fudge factor here for final descale. */
3066
z2 += ONE << (CONST_BITS-PASS1_BITS-1);
3067
3068
tmp0 = z2 + z3;
3069
tmp1 = z2 - z3;
3070
3071
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3072
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3073
3074
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3075
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3076
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3077
3078
tmp10 = tmp0 + tmp2;
3079
tmp13 = tmp0 - tmp2;
3080
tmp11 = tmp1 + tmp3;
3081
tmp12 = tmp1 - tmp3;
3082
3083
/* Odd part per figure 8; the matrix is unitary and hence its
3084
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3085
*/
3086
3087
tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
3088
tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3089
tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3090
tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3091
3092
z2 = tmp0 + tmp2;
3093
z3 = tmp1 + tmp3;
3094
3095
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3096
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3097
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3098
z2 += z1;
3099
z3 += z1;
3100
3101
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3102
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3103
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3104
tmp0 += z1 + z2;
3105
tmp3 += z1 + z3;
3106
3107
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3108
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3109
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3110
tmp1 += z1 + z3;
3111
tmp2 += z1 + z2;
3112
3113
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3114
3115
wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
3116
wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
3117
wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
3118
wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
3119
wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
3120
wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
3121
wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
3122
wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
3123
3124
inptr++; /* advance pointers to next column */
3125
quantptr++;
3126
wsptr++;
3127
}
3128
3129
/* Pass 2: process 8 rows from work array, store into output array.
3130
* 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3131
*/
3132
3133
wsptr = workspace;
3134
for (ctr = 0; ctr < 8; ctr++) {
3135
outptr = output_buf[ctr] + output_col;
3136
3137
/* Even part */
3138
3139
/* Add range center and fudge factor for final descale and range-limit. */
3140
tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
3141
tmp0 <<= CONST_BITS;
3142
#if PASS2_BITS == 0
3143
tmp0 += ONE << (CONST_BITS-1);
3144
#endif
3145
3146
z1 = (INT32) wsptr[4];
3147
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3148
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3149
3150
tmp10 = tmp0 + tmp1;
3151
tmp11 = tmp0 - tmp1;
3152
tmp12 = tmp0 + tmp2;
3153
tmp13 = tmp0 - tmp2;
3154
3155
z1 = (INT32) wsptr[2];
3156
z2 = (INT32) wsptr[6];
3157
z3 = z1 - z2;
3158
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3159
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3160
3161
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3162
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3163
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3164
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3165
3166
tmp20 = tmp10 + tmp0;
3167
tmp27 = tmp10 - tmp0;
3168
tmp21 = tmp12 + tmp1;
3169
tmp26 = tmp12 - tmp1;
3170
tmp22 = tmp13 + tmp2;
3171
tmp25 = tmp13 - tmp2;
3172
tmp23 = tmp11 + tmp3;
3173
tmp24 = tmp11 - tmp3;
3174
3175
/* Odd part */
3176
3177
z1 = (INT32) wsptr[1];
3178
z2 = (INT32) wsptr[3];
3179
z3 = (INT32) wsptr[5];
3180
z4 = (INT32) wsptr[7];
3181
3182
tmp11 = z1 + z3;
3183
3184
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
3185
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
3186
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
3187
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
3188
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
3189
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
3190
tmp0 = tmp1 + tmp2 + tmp3 -
3191
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
3192
tmp13 = tmp10 + tmp11 + tmp12 -
3193
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
3194
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
3195
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
3196
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
3197
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
3198
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
3199
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
3200
z2 += z4;
3201
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
3202
tmp1 += z1;
3203
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
3204
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
3205
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
3206
tmp12 += z2;
3207
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3208
tmp2 += z2;
3209
tmp3 += z2;
3210
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
3211
tmp10 += z2;
3212
tmp11 += z2;
3213
3214
/* Final output stage */
3215
3216
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3217
CONST_BITS+PASS2_BITS)
3218
& RANGE_MASK];
3219
outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3220
CONST_BITS+PASS2_BITS)
3221
& RANGE_MASK];
3222
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3223
CONST_BITS+PASS2_BITS)
3224
& RANGE_MASK];
3225
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3226
CONST_BITS+PASS2_BITS)
3227
& RANGE_MASK];
3228
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3229
CONST_BITS+PASS2_BITS)
3230
& RANGE_MASK];
3231
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3232
CONST_BITS+PASS2_BITS)
3233
& RANGE_MASK];
3234
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3235
CONST_BITS+PASS2_BITS)
3236
& RANGE_MASK];
3237
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3238
CONST_BITS+PASS2_BITS)
3239
& RANGE_MASK];
3240
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3241
CONST_BITS+PASS2_BITS)
3242
& RANGE_MASK];
3243
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3244
CONST_BITS+PASS2_BITS)
3245
& RANGE_MASK];
3246
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3247
CONST_BITS+PASS2_BITS)
3248
& RANGE_MASK];
3249
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3250
CONST_BITS+PASS2_BITS)
3251
& RANGE_MASK];
3252
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3253
CONST_BITS+PASS2_BITS)
3254
& RANGE_MASK];
3255
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3256
CONST_BITS+PASS2_BITS)
3257
& RANGE_MASK];
3258
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3259
CONST_BITS+PASS2_BITS)
3260
& RANGE_MASK];
3261
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3262
CONST_BITS+PASS2_BITS)
3263
& RANGE_MASK];
3264
3265
wsptr += 8; /* advance pointer to next row */
3266
}
3267
}
3268
3269
3270
/*
3271
* Perform dequantization and inverse DCT on one block of coefficients,
3272
* producing a 14x7 output block.
3273
*
3274
* 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3275
*/
3276
3277
GLOBAL(void)
3278
jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3279
JCOEFPTR coef_block,
3280
JSAMPARRAY output_buf, JDIMENSION output_col)
3281
{
3282
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3283
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3284
INT32 z1, z2, z3, z4;
3285
JCOEFPTR inptr;
3286
ISLOW_MULT_TYPE * quantptr;
3287
int * wsptr;
3288
JSAMPROW outptr;
3289
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3290
int ctr;
3291
int workspace[8*7]; /* buffers data between passes */
3292
SHIFT_TEMPS
3293
3294
/* Pass 1: process columns from input, store into work array.
3295
* 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3296
*/
3297
3298
inptr = coef_block;
3299
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3300
wsptr = workspace;
3301
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3302
/* Even part */
3303
3304
tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3305
tmp23 <<= CONST_BITS;
3306
/* Add fudge factor here for final descale. */
3307
tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3308
3309
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3310
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3311
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3312
3313
tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3314
tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3315
tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3316
tmp10 = z1 + z3;
3317
z2 -= tmp10;
3318
tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3319
tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
3320
tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
3321
tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
3322
3323
/* Odd part */
3324
3325
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3326
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3327
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3328
3329
tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3330
tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3331
tmp10 = tmp11 - tmp12;
3332
tmp11 += tmp12;
3333
tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3334
tmp11 += tmp12;
3335
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3336
tmp10 += z2;
3337
tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3338
3339
/* Final output stage */
3340
3341
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3342
wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3343
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3344
wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3345
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3346
wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3347
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3348
}
3349
3350
/* Pass 2: process 7 rows from work array, store into output array.
3351
* 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3352
*/
3353
3354
wsptr = workspace;
3355
for (ctr = 0; ctr < 7; ctr++) {
3356
outptr = output_buf[ctr] + output_col;
3357
3358
/* Even part */
3359
3360
/* Add range center and fudge factor for final descale and range-limit. */
3361
z1 = (INT32) wsptr[0] + PASS2_OFFSET;
3362
z1 <<= CONST_BITS;
3363
#if PASS2_BITS == 0
3364
z1 += ONE << (CONST_BITS-1);
3365
#endif
3366
z4 = (INT32) wsptr[4];
3367
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3368
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3369
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3370
3371
tmp10 = z1 + z2;
3372
tmp11 = z1 + z3;
3373
tmp12 = z1 - z4;
3374
3375
tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3376
3377
z1 = (INT32) wsptr[2];
3378
z2 = (INT32) wsptr[6];
3379
3380
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3381
3382
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3383
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3384
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3385
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
3386
3387
tmp20 = tmp10 + tmp13;
3388
tmp26 = tmp10 - tmp13;
3389
tmp21 = tmp11 + tmp14;
3390
tmp25 = tmp11 - tmp14;
3391
tmp22 = tmp12 + tmp15;
3392
tmp24 = tmp12 - tmp15;
3393
3394
/* Odd part */
3395
3396
z1 = (INT32) wsptr[1];
3397
z2 = (INT32) wsptr[3];
3398
z3 = (INT32) wsptr[5];
3399
z4 = (INT32) wsptr[7];
3400
z4 <<= CONST_BITS;
3401
3402
tmp14 = z1 + z3;
3403
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
3404
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
3405
tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3406
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
3407
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
3408
z1 -= z2;
3409
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
3410
tmp16 += tmp15;
3411
tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
3412
tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
3413
tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
3414
tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
3415
tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3416
tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
3417
3418
tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3419
3420
/* Final output stage */
3421
3422
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3423
CONST_BITS+PASS2_BITS)
3424
& RANGE_MASK];
3425
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3426
CONST_BITS+PASS2_BITS)
3427
& RANGE_MASK];
3428
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3429
CONST_BITS+PASS2_BITS)
3430
& RANGE_MASK];
3431
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3432
CONST_BITS+PASS2_BITS)
3433
& RANGE_MASK];
3434
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3435
CONST_BITS+PASS2_BITS)
3436
& RANGE_MASK];
3437
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3438
CONST_BITS+PASS2_BITS)
3439
& RANGE_MASK];
3440
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3441
CONST_BITS+PASS2_BITS)
3442
& RANGE_MASK];
3443
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3444
CONST_BITS+PASS2_BITS)
3445
& RANGE_MASK];
3446
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3447
CONST_BITS+PASS2_BITS)
3448
& RANGE_MASK];
3449
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3450
CONST_BITS+PASS2_BITS)
3451
& RANGE_MASK];
3452
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3453
CONST_BITS+PASS2_BITS)
3454
& RANGE_MASK];
3455
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3456
CONST_BITS+PASS2_BITS)
3457
& RANGE_MASK];
3458
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3459
CONST_BITS+PASS2_BITS)
3460
& RANGE_MASK];
3461
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3462
CONST_BITS+PASS2_BITS)
3463
& RANGE_MASK];
3464
3465
wsptr += 8; /* advance pointer to next row */
3466
}
3467
}
3468
3469
3470
/*
3471
* Perform dequantization and inverse DCT on one block of coefficients,
3472
* producing a 12x6 output block.
3473
*
3474
* 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3475
*/
3476
3477
GLOBAL(void)
3478
jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3479
JCOEFPTR coef_block,
3480
JSAMPARRAY output_buf, JDIMENSION output_col)
3481
{
3482
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3483
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3484
INT32 z1, z2, z3, z4;
3485
JCOEFPTR inptr;
3486
ISLOW_MULT_TYPE * quantptr;
3487
int * wsptr;
3488
JSAMPROW outptr;
3489
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3490
int ctr;
3491
int workspace[8*6]; /* buffers data between passes */
3492
SHIFT_TEMPS
3493
3494
/* Pass 1: process columns from input, store into work array.
3495
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3496
*/
3497
3498
inptr = coef_block;
3499
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3500
wsptr = workspace;
3501
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3502
/* Even part */
3503
3504
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3505
tmp10 <<= CONST_BITS;
3506
/* Add fudge factor here for final descale. */
3507
tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3508
tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3509
tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3510
tmp11 = tmp10 + tmp20;
3511
tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3512
tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3513
tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3514
tmp20 = tmp11 + tmp10;
3515
tmp22 = tmp11 - tmp10;
3516
3517
/* Odd part */
3518
3519
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3520
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3521
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3522
tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3523
tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3524
tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3525
#if PASS1_BITS > 0
3526
tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3527
#else
3528
tmp11 = z1 - z2 - z3;
3529
#endif
3530
3531
/* Final output stage */
3532
3533
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3534
wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3535
wsptr[8*1] = (int) (tmp21 + tmp11);
3536
wsptr[8*4] = (int) (tmp21 - tmp11);
3537
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3538
wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3539
}
3540
3541
/* Pass 2: process 6 rows from work array, store into output array.
3542
* 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3543
*/
3544
3545
wsptr = workspace;
3546
for (ctr = 0; ctr < 6; ctr++) {
3547
outptr = output_buf[ctr] + output_col;
3548
3549
/* Even part */
3550
3551
/* Add range center and fudge factor for final descale and range-limit. */
3552
z3 = (INT32) wsptr[0] + PASS2_OFFSET;
3553
z3 <<= CONST_BITS;
3554
#if PASS2_BITS == 0
3555
z3 += ONE << (CONST_BITS-1);
3556
#endif
3557
3558
z4 = (INT32) wsptr[4];
3559
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3560
3561
tmp10 = z3 + z4;
3562
tmp11 = z3 - z4;
3563
3564
z1 = (INT32) wsptr[2];
3565
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3566
z1 <<= CONST_BITS;
3567
z2 = (INT32) wsptr[6];
3568
z2 <<= CONST_BITS;
3569
3570
tmp12 = z1 - z2;
3571
3572
tmp21 = z3 + tmp12;
3573
tmp24 = z3 - tmp12;
3574
3575
tmp12 = z4 + z2;
3576
3577
tmp20 = tmp10 + tmp12;
3578
tmp25 = tmp10 - tmp12;
3579
3580
tmp12 = z4 - z1 - z2;
3581
3582
tmp22 = tmp11 + tmp12;
3583
tmp23 = tmp11 - tmp12;
3584
3585
/* Odd part */
3586
3587
z1 = (INT32) wsptr[1];
3588
z2 = (INT32) wsptr[3];
3589
z3 = (INT32) wsptr[5];
3590
z4 = (INT32) wsptr[7];
3591
3592
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
3593
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
3594
3595
tmp10 = z1 + z3;
3596
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
3597
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
3598
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
3599
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
3600
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3601
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3602
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
3603
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
3604
3605
z1 -= z4;
3606
z2 -= z3;
3607
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
3608
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
3609
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
3610
3611
/* Final output stage */
3612
3613
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3614
CONST_BITS+PASS2_BITS)
3615
& RANGE_MASK];
3616
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3617
CONST_BITS+PASS2_BITS)
3618
& RANGE_MASK];
3619
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3620
CONST_BITS+PASS2_BITS)
3621
& RANGE_MASK];
3622
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3623
CONST_BITS+PASS2_BITS)
3624
& RANGE_MASK];
3625
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3626
CONST_BITS+PASS2_BITS)
3627
& RANGE_MASK];
3628
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3629
CONST_BITS+PASS2_BITS)
3630
& RANGE_MASK];
3631
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3632
CONST_BITS+PASS2_BITS)
3633
& RANGE_MASK];
3634
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3635
CONST_BITS+PASS2_BITS)
3636
& RANGE_MASK];
3637
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3638
CONST_BITS+PASS2_BITS)
3639
& RANGE_MASK];
3640
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3641
CONST_BITS+PASS2_BITS)
3642
& RANGE_MASK];
3643
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3644
CONST_BITS+PASS2_BITS)
3645
& RANGE_MASK];
3646
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3647
CONST_BITS+PASS2_BITS)
3648
& RANGE_MASK];
3649
3650
wsptr += 8; /* advance pointer to next row */
3651
}
3652
}
3653
3654
3655
/*
3656
* Perform dequantization and inverse DCT on one block of coefficients,
3657
* producing a 10x5 output block.
3658
*
3659
* 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3660
*/
3661
3662
GLOBAL(void)
3663
jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3664
JCOEFPTR coef_block,
3665
JSAMPARRAY output_buf, JDIMENSION output_col)
3666
{
3667
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3668
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3669
INT32 z1, z2, z3, z4;
3670
JCOEFPTR inptr;
3671
ISLOW_MULT_TYPE * quantptr;
3672
int * wsptr;
3673
JSAMPROW outptr;
3674
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3675
int ctr;
3676
int workspace[8*5]; /* buffers data between passes */
3677
SHIFT_TEMPS
3678
3679
/* Pass 1: process columns from input, store into work array.
3680
* 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3681
*/
3682
3683
inptr = coef_block;
3684
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3685
wsptr = workspace;
3686
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3687
/* Even part */
3688
3689
tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3690
tmp12 <<= CONST_BITS;
3691
/* Add fudge factor here for final descale. */
3692
tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3693
tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3694
tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3695
z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3696
z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3697
z3 = tmp12 + z2;
3698
tmp10 = z3 + z1;
3699
tmp11 = z3 - z1;
3700
tmp12 -= z2 << 2;
3701
3702
/* Odd part */
3703
3704
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3705
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3706
3707
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3708
tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3709
tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3710
3711
/* Final output stage */
3712
3713
wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3714
wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3715
wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3716
wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3717
wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3718
}
3719
3720
/* Pass 2: process 5 rows from work array, store into output array.
3721
* 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3722
*/
3723
3724
wsptr = workspace;
3725
for (ctr = 0; ctr < 5; ctr++) {
3726
outptr = output_buf[ctr] + output_col;
3727
3728
/* Even part */
3729
3730
/* Add range center and fudge factor for final descale and range-limit. */
3731
z3 = (INT32) wsptr[0] + PASS2_OFFSET;
3732
z3 <<= CONST_BITS;
3733
#if PASS2_BITS == 0
3734
z3 += ONE << (CONST_BITS-1);
3735
#endif
3736
z4 = (INT32) wsptr[4];
3737
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3738
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3739
tmp10 = z3 + z1;
3740
tmp11 = z3 - z2;
3741
3742
tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3743
3744
z2 = (INT32) wsptr[2];
3745
z3 = (INT32) wsptr[6];
3746
3747
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3748
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3749
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3750
3751
tmp20 = tmp10 + tmp12;
3752
tmp24 = tmp10 - tmp12;
3753
tmp21 = tmp11 + tmp13;
3754
tmp23 = tmp11 - tmp13;
3755
3756
/* Odd part */
3757
3758
z1 = (INT32) wsptr[1];
3759
z2 = (INT32) wsptr[3];
3760
z3 = (INT32) wsptr[5];
3761
z3 <<= CONST_BITS;
3762
z4 = (INT32) wsptr[7];
3763
3764
tmp11 = z2 + z4;
3765
tmp13 = z2 - z4;
3766
3767
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
3768
3769
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
3770
z4 = z3 + tmp12;
3771
3772
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3773
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3774
3775
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
3776
z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3777
3778
tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3779
3780
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3781
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3782
3783
/* Final output stage */
3784
3785
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3786
CONST_BITS+PASS2_BITS)
3787
& RANGE_MASK];
3788
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3789
CONST_BITS+PASS2_BITS)
3790
& RANGE_MASK];
3791
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3792
CONST_BITS+PASS2_BITS)
3793
& RANGE_MASK];
3794
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3795
CONST_BITS+PASS2_BITS)
3796
& RANGE_MASK];
3797
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3798
CONST_BITS+PASS2_BITS)
3799
& RANGE_MASK];
3800
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3801
CONST_BITS+PASS2_BITS)
3802
& RANGE_MASK];
3803
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3804
CONST_BITS+PASS2_BITS)
3805
& RANGE_MASK];
3806
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3807
CONST_BITS+PASS2_BITS)
3808
& RANGE_MASK];
3809
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3810
CONST_BITS+PASS2_BITS)
3811
& RANGE_MASK];
3812
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3813
CONST_BITS+PASS2_BITS)
3814
& RANGE_MASK];
3815
3816
wsptr += 8; /* advance pointer to next row */
3817
}
3818
}
3819
3820
3821
/*
3822
* Perform dequantization and inverse DCT on one block of coefficients,
3823
* producing an 8x4 output block.
3824
*
3825
* 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3826
*/
3827
3828
GLOBAL(void)
3829
jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3830
JCOEFPTR coef_block,
3831
JSAMPARRAY output_buf, JDIMENSION output_col)
3832
{
3833
INT32 tmp0, tmp1, tmp2, tmp3;
3834
INT32 tmp10, tmp11, tmp12, tmp13;
3835
INT32 z1, z2, z3;
3836
JCOEFPTR inptr;
3837
ISLOW_MULT_TYPE * quantptr;
3838
int * wsptr;
3839
JSAMPROW outptr;
3840
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3841
int ctr;
3842
int workspace[8*4]; /* buffers data between passes */
3843
SHIFT_TEMPS
3844
3845
/* Pass 1: process columns from input, store into work array.
3846
* 4-point IDCT kernel,
3847
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3848
*/
3849
3850
inptr = coef_block;
3851
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3852
wsptr = workspace;
3853
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3854
/* Even part */
3855
3856
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3857
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3858
3859
#if PASS1_BITS > 0
3860
tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3861
tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3862
#else
3863
tmp10 = tmp0 + tmp2;
3864
tmp12 = tmp0 - tmp2;
3865
#endif
3866
3867
/* Odd part */
3868
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
3869
3870
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3871
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3872
3873
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3874
/* Add fudge factor here for final descale. */
3875
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3876
tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3877
CONST_BITS-PASS1_BITS);
3878
tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3879
CONST_BITS-PASS1_BITS);
3880
3881
/* Final output stage */
3882
3883
wsptr[8*0] = (int) (tmp10 + tmp0);
3884
wsptr[8*3] = (int) (tmp10 - tmp0);
3885
wsptr[8*1] = (int) (tmp12 + tmp2);
3886
wsptr[8*2] = (int) (tmp12 - tmp2);
3887
}
3888
3889
/* Pass 2: process rows from work array, store into output array.
3890
* Note that we must descale the results by a factor of 8 == 2**3,
3891
* which is folded into the PASS2_BITS value.
3892
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3893
*/
3894
3895
wsptr = workspace;
3896
for (ctr = 0; ctr < 4; ctr++) {
3897
outptr = output_buf[ctr] + output_col;
3898
3899
/* Even part: reverse the even part of the forward DCT.
3900
* The rotator is c(-6).
3901
*/
3902
3903
/* Add range center and fudge factor for final descale and range-limit. */
3904
z2 = (INT32) wsptr[0] + PASS2_OFFSET;
3905
z3 = (INT32) wsptr[4];
3906
z2 <<= CONST_BITS;
3907
z3 <<= CONST_BITS;
3908
#if PASS2_BITS == 0
3909
/* Add fudge factor here for final descale. */
3910
z2 += ONE << (CONST_BITS-1);
3911
#endif
3912
3913
tmp0 = z2 + z3;
3914
tmp1 = z2 - z3;
3915
3916
z2 = (INT32) wsptr[2];
3917
z3 = (INT32) wsptr[6];
3918
3919
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3920
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3921
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3922
3923
tmp10 = tmp0 + tmp2;
3924
tmp13 = tmp0 - tmp2;
3925
tmp11 = tmp1 + tmp3;
3926
tmp12 = tmp1 - tmp3;
3927
3928
/* Odd part per figure 8; the matrix is unitary and hence its
3929
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3930
*/
3931
3932
tmp0 = (INT32) wsptr[7];
3933
tmp1 = (INT32) wsptr[5];
3934
tmp2 = (INT32) wsptr[3];
3935
tmp3 = (INT32) wsptr[1];
3936
3937
z2 = tmp0 + tmp2;
3938
z3 = tmp1 + tmp3;
3939
3940
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3941
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3942
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3943
z2 += z1;
3944
z3 += z1;
3945
3946
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3947
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3948
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3949
tmp0 += z1 + z2;
3950
tmp3 += z1 + z3;
3951
3952
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3953
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3954
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3955
tmp1 += z1 + z3;
3956
tmp2 += z1 + z2;
3957
3958
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3959
3960
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3961
CONST_BITS+PASS2_BITS)
3962
& RANGE_MASK];
3963
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3964
CONST_BITS+PASS2_BITS)
3965
& RANGE_MASK];
3966
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3967
CONST_BITS+PASS2_BITS)
3968
& RANGE_MASK];
3969
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3970
CONST_BITS+PASS2_BITS)
3971
& RANGE_MASK];
3972
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3973
CONST_BITS+PASS2_BITS)
3974
& RANGE_MASK];
3975
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3976
CONST_BITS+PASS2_BITS)
3977
& RANGE_MASK];
3978
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3979
CONST_BITS+PASS2_BITS)
3980
& RANGE_MASK];
3981
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3982
CONST_BITS+PASS2_BITS)
3983
& RANGE_MASK];
3984
3985
wsptr += DCTSIZE; /* advance pointer to next row */
3986
}
3987
}
3988
3989
3990
/*
3991
* Perform dequantization and inverse DCT on one block of coefficients,
3992
* producing a 6x3 output block.
3993
*
3994
* 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3995
*/
3996
3997
GLOBAL(void)
3998
jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3999
JCOEFPTR coef_block,
4000
JSAMPARRAY output_buf, JDIMENSION output_col)
4001
{
4002
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
4003
INT32 z1, z2, z3;
4004
JCOEFPTR inptr;
4005
ISLOW_MULT_TYPE * quantptr;
4006
int * wsptr;
4007
JSAMPROW outptr;
4008
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4009
int ctr;
4010
int workspace[6*3]; /* buffers data between passes */
4011
SHIFT_TEMPS
4012
4013
/* Pass 1: process columns from input, store into work array.
4014
* 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
4015
*/
4016
4017
inptr = coef_block;
4018
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4019
wsptr = workspace;
4020
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4021
/* Even part */
4022
4023
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4024
tmp0 <<= CONST_BITS;
4025
/* Add fudge factor here for final descale. */
4026
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4027
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4028
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
4029
tmp10 = tmp0 + tmp12;
4030
tmp2 = tmp0 - tmp12 - tmp12;
4031
4032
/* Odd part */
4033
4034
tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4035
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
4036
4037
/* Final output stage */
4038
4039
wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
4040
wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
4041
wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
4042
}
4043
4044
/* Pass 2: process 3 rows from work array, store into output array.
4045
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4046
*/
4047
4048
wsptr = workspace;
4049
for (ctr = 0; ctr < 3; ctr++) {
4050
outptr = output_buf[ctr] + output_col;
4051
4052
/* Even part */
4053
4054
/* Add range center and fudge factor for final descale and range-limit. */
4055
tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
4056
tmp0 <<= CONST_BITS;
4057
#if PASS2_BITS == 0
4058
tmp0 += ONE << (CONST_BITS-1);
4059
#endif
4060
tmp2 = (INT32) wsptr[4];
4061
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
4062
tmp1 = tmp0 + tmp10;
4063
tmp11 = tmp0 - tmp10 - tmp10;
4064
tmp10 = (INT32) wsptr[2];
4065
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
4066
tmp10 = tmp1 + tmp0;
4067
tmp12 = tmp1 - tmp0;
4068
4069
/* Odd part */
4070
4071
z1 = (INT32) wsptr[1];
4072
z2 = (INT32) wsptr[3];
4073
z3 = (INT32) wsptr[5];
4074
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4075
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
4076
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
4077
tmp1 = (z1 - z2 - z3) << CONST_BITS;
4078
4079
/* Final output stage */
4080
4081
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4082
CONST_BITS+PASS2_BITS)
4083
& RANGE_MASK];
4084
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4085
CONST_BITS+PASS2_BITS)
4086
& RANGE_MASK];
4087
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
4088
CONST_BITS+PASS2_BITS)
4089
& RANGE_MASK];
4090
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
4091
CONST_BITS+PASS2_BITS)
4092
& RANGE_MASK];
4093
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4094
CONST_BITS+PASS2_BITS)
4095
& RANGE_MASK];
4096
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4097
CONST_BITS+PASS2_BITS)
4098
& RANGE_MASK];
4099
4100
wsptr += 6; /* advance pointer to next row */
4101
}
4102
}
4103
4104
4105
/*
4106
* Perform dequantization and inverse DCT on one block of coefficients,
4107
* producing a 4x2 output block.
4108
*
4109
* 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4110
*/
4111
4112
GLOBAL(void)
4113
jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4114
JCOEFPTR coef_block,
4115
JSAMPARRAY output_buf, JDIMENSION output_col)
4116
{
4117
INT32 tmp0, tmp2, tmp10, tmp12;
4118
INT32 z1, z2, z3;
4119
JCOEFPTR inptr;
4120
ISLOW_MULT_TYPE * quantptr;
4121
INT32 * wsptr;
4122
JSAMPROW outptr;
4123
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4124
int ctr;
4125
INT32 workspace[4*2]; /* buffers data between passes */
4126
SHIFT_TEMPS
4127
4128
/* Pass 1: process columns from input, store into work array. */
4129
4130
inptr = coef_block;
4131
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4132
wsptr = workspace;
4133
for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
4134
/* Even part */
4135
4136
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4137
4138
/* Odd part */
4139
4140
tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4141
4142
/* Final output stage */
4143
4144
wsptr[4*0] = tmp10 + tmp0;
4145
wsptr[4*1] = tmp10 - tmp0;
4146
}
4147
4148
/* Pass 2: process 2 rows from work array, store into output array.
4149
* 4-point IDCT kernel,
4150
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4151
*/
4152
4153
wsptr = workspace;
4154
for (ctr = 0; ctr < 2; ctr++) {
4155
outptr = output_buf[ctr] + output_col;
4156
4157
/* Even part */
4158
4159
tmp0 = wsptr[0];
4160
tmp2 = wsptr[2];
4161
4162
/* Add range center and fudge factor for final descale and range-limit. */
4163
#if PASS2_BITS > PASS1_BITS
4164
#if PASS2_BITS > PASS1_BITS + 1
4165
tmp0 += (((INT32) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
4166
(ONE << (PASS2_BITS-PASS1_BITS-1));
4167
#else
4168
tmp0 += (((INT32) RANGE_CENTER) << 1) + ONE;
4169
#endif
4170
tmp0 <<= CONST_BITS;
4171
#else
4172
#if PASS2_BITS == PASS1_BITS
4173
tmp0 += (INT32) RANGE_CENTER;
4174
tmp0 <<= CONST_BITS;
4175
tmp0 += ONE << (CONST_BITS-1);
4176
#else
4177
tmp0 <<= CONST_BITS;
4178
tmp0 += (((INT32) RANGE_CENTER) << (CONST_BITS+PASS2_BITS-PASS1_BITS)) +
4179
(ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-1));
4180
#endif
4181
#endif
4182
4183
tmp2 <<= CONST_BITS;
4184
4185
tmp10 = tmp0 + tmp2;
4186
tmp12 = tmp0 - tmp2;
4187
4188
/* Odd part */
4189
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
4190
4191
z2 = wsptr[1];
4192
z3 = wsptr[3];
4193
4194
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4195
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4196
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4197
4198
/* Final output stage */
4199
4200
outptr[0] =
4201
range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4202
CONST_BITS+PASS2_BITS-PASS1_BITS)
4203
& RANGE_MASK];
4204
outptr[3] =
4205
range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4206
CONST_BITS+PASS2_BITS-PASS1_BITS)
4207
& RANGE_MASK];
4208
outptr[1] =
4209
range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4210
CONST_BITS+PASS2_BITS-PASS1_BITS)
4211
& RANGE_MASK];
4212
outptr[2] =
4213
range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4214
CONST_BITS+PASS2_BITS-PASS1_BITS)
4215
& RANGE_MASK];
4216
4217
wsptr += 4; /* advance pointer to next row */
4218
}
4219
}
4220
4221
4222
/*
4223
* Perform dequantization and inverse DCT on one block of coefficients,
4224
* producing a 2x1 output block.
4225
*
4226
* 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4227
*/
4228
4229
GLOBAL(void)
4230
jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4231
JCOEFPTR coef_block,
4232
JSAMPARRAY output_buf, JDIMENSION output_col)
4233
{
4234
DCTELEM tmp0, tmp1;
4235
ISLOW_MULT_TYPE * quantptr;
4236
JSAMPROW outptr;
4237
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4238
ISHIFT_TEMPS
4239
4240
/* Pass 1: empty. */
4241
4242
/* Pass 2: process 1 row from input, store into output array. */
4243
4244
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4245
outptr = output_buf[0] + output_col;
4246
4247
/* Even part */
4248
4249
tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4250
4251
/* Odd part */
4252
4253
tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4254
4255
/* Final output stage */
4256
4257
#if PASS2_BITS > PASS1_BITS
4258
/* Add range center and fudge factor for downscale and range-limit. */
4259
#if PASS2_BITS > PASS1_BITS + 1
4260
tmp0 += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
4261
(1 << (PASS2_BITS-PASS1_BITS-1));
4262
#else
4263
tmp0 += (((DCTELEM) RANGE_CENTER) << 1) + 1;
4264
#endif
4265
4266
outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1,
4267
PASS2_BITS-PASS1_BITS)
4268
& RANGE_MASK];
4269
outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1,
4270
PASS2_BITS-PASS1_BITS)
4271
& RANGE_MASK];
4272
#else
4273
#if PASS2_BITS < PASS1_BITS
4274
tmp0 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
4275
tmp1 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
4276
#endif
4277
4278
tmp0 += (DCTELEM) RANGE_CENTER; /* add range center for range-limit */
4279
4280
outptr[0] = range_limit[(int) (tmp0 + tmp1) & RANGE_MASK];
4281
outptr[1] = range_limit[(int) (tmp0 - tmp1) & RANGE_MASK];
4282
#endif
4283
}
4284
4285
4286
/*
4287
* Perform dequantization and inverse DCT on one block of coefficients,
4288
* producing an 8x16 output block.
4289
*
4290
* 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4291
*/
4292
4293
GLOBAL(void)
4294
jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4295
JCOEFPTR coef_block,
4296
JSAMPARRAY output_buf, JDIMENSION output_col)
4297
{
4298
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4299
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4300
INT32 z1, z2, z3, z4;
4301
JCOEFPTR inptr;
4302
ISLOW_MULT_TYPE * quantptr;
4303
int * wsptr;
4304
JSAMPROW outptr;
4305
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4306
int ctr;
4307
int workspace[8*16]; /* buffers data between passes */
4308
SHIFT_TEMPS
4309
4310
/* Pass 1: process columns from input, store into work array.
4311
* 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4312
*/
4313
4314
inptr = coef_block;
4315
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4316
wsptr = workspace;
4317
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4318
/* Even part */
4319
4320
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4321
tmp0 <<= CONST_BITS;
4322
/* Add fudge factor here for final descale. */
4323
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4324
4325
z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4326
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4327
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4328
4329
tmp10 = tmp0 + tmp1;
4330
tmp11 = tmp0 - tmp1;
4331
tmp12 = tmp0 + tmp2;
4332
tmp13 = tmp0 - tmp2;
4333
4334
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4335
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4336
z3 = z1 - z2;
4337
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
4338
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
4339
4340
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
4341
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
4342
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4343
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4344
4345
tmp20 = tmp10 + tmp0;
4346
tmp27 = tmp10 - tmp0;
4347
tmp21 = tmp12 + tmp1;
4348
tmp26 = tmp12 - tmp1;
4349
tmp22 = tmp13 + tmp2;
4350
tmp25 = tmp13 - tmp2;
4351
tmp23 = tmp11 + tmp3;
4352
tmp24 = tmp11 - tmp3;
4353
4354
/* Odd part */
4355
4356
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4357
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4358
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4359
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4360
4361
tmp11 = z1 + z3;
4362
4363
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
4364
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
4365
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
4366
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
4367
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
4368
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
4369
tmp0 = tmp1 + tmp2 + tmp3 -
4370
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
4371
tmp13 = tmp10 + tmp11 + tmp12 -
4372
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
4373
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
4374
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
4375
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
4376
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
4377
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
4378
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
4379
z2 += z4;
4380
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
4381
tmp1 += z1;
4382
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
4383
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
4384
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
4385
tmp12 += z2;
4386
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4387
tmp2 += z2;
4388
tmp3 += z2;
4389
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
4390
tmp10 += z2;
4391
tmp11 += z2;
4392
4393
/* Final output stage */
4394
4395
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4396
wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4397
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4398
wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4399
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4400
wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4401
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4402
wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4403
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4404
wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4405
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4406
wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4407
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4408
wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4409
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4410
wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4411
}
4412
4413
/* Pass 2: process rows from work array, store into output array.
4414
* Note that we must descale the results by a factor of 8 == 2**3,
4415
* which is folded into the PASS2_BITS value.
4416
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4417
*/
4418
4419
wsptr = workspace;
4420
for (ctr = 0; ctr < 16; ctr++) {
4421
outptr = output_buf[ctr] + output_col;
4422
4423
/* Even part: reverse the even part of the forward DCT.
4424
* The rotator is c(-6).
4425
*/
4426
4427
/* Add range center and fudge factor for final descale and range-limit. */
4428
z2 = (INT32) wsptr[0] + PASS2_OFFSET;
4429
z3 = (INT32) wsptr[4];
4430
z2 <<= CONST_BITS;
4431
z3 <<= CONST_BITS;
4432
#if PASS2_BITS == 0
4433
/* Add fudge factor here for final descale. */
4434
z2 += ONE << (CONST_BITS-1);
4435
#endif
4436
4437
tmp0 = z2 + z3;
4438
tmp1 = z2 - z3;
4439
4440
z2 = (INT32) wsptr[2];
4441
z3 = (INT32) wsptr[6];
4442
4443
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4444
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4445
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4446
4447
tmp10 = tmp0 + tmp2;
4448
tmp13 = tmp0 - tmp2;
4449
tmp11 = tmp1 + tmp3;
4450
tmp12 = tmp1 - tmp3;
4451
4452
/* Odd part per figure 8; the matrix is unitary and hence its
4453
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4454
*/
4455
4456
tmp0 = (INT32) wsptr[7];
4457
tmp1 = (INT32) wsptr[5];
4458
tmp2 = (INT32) wsptr[3];
4459
tmp3 = (INT32) wsptr[1];
4460
4461
z2 = tmp0 + tmp2;
4462
z3 = tmp1 + tmp3;
4463
4464
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4465
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4466
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4467
z2 += z1;
4468
z3 += z1;
4469
4470
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4471
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4472
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4473
tmp0 += z1 + z2;
4474
tmp3 += z1 + z3;
4475
4476
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4477
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4478
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4479
tmp1 += z1 + z3;
4480
tmp2 += z1 + z2;
4481
4482
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4483
4484
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4485
CONST_BITS+PASS2_BITS)
4486
& RANGE_MASK];
4487
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4488
CONST_BITS+PASS2_BITS)
4489
& RANGE_MASK];
4490
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4491
CONST_BITS+PASS2_BITS)
4492
& RANGE_MASK];
4493
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4494
CONST_BITS+PASS2_BITS)
4495
& RANGE_MASK];
4496
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4497
CONST_BITS+PASS2_BITS)
4498
& RANGE_MASK];
4499
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4500
CONST_BITS+PASS2_BITS)
4501
& RANGE_MASK];
4502
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4503
CONST_BITS+PASS2_BITS)
4504
& RANGE_MASK];
4505
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4506
CONST_BITS+PASS2_BITS)
4507
& RANGE_MASK];
4508
4509
wsptr += DCTSIZE; /* advance pointer to next row */
4510
}
4511
}
4512
4513
4514
/*
4515
* Perform dequantization and inverse DCT on one block of coefficients,
4516
* producing a 7x14 output block.
4517
*
4518
* 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4519
*/
4520
4521
GLOBAL(void)
4522
jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4523
JCOEFPTR coef_block,
4524
JSAMPARRAY output_buf, JDIMENSION output_col)
4525
{
4526
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4527
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4528
INT32 z1, z2, z3, z4;
4529
JCOEFPTR inptr;
4530
ISLOW_MULT_TYPE * quantptr;
4531
int * wsptr;
4532
JSAMPROW outptr;
4533
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4534
int ctr;
4535
int workspace[7*14]; /* buffers data between passes */
4536
SHIFT_TEMPS
4537
4538
/* Pass 1: process columns from input, store into work array.
4539
* 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4540
*/
4541
4542
inptr = coef_block;
4543
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4544
wsptr = workspace;
4545
for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4546
/* Even part */
4547
4548
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4549
z1 <<= CONST_BITS;
4550
/* Add fudge factor here for final descale. */
4551
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4552
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4553
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4554
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4555
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4556
4557
tmp10 = z1 + z2;
4558
tmp11 = z1 + z3;
4559
tmp12 = z1 - z4;
4560
4561
tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4562
CONST_BITS-PASS1_BITS);
4563
4564
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4565
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4566
4567
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
4568
4569
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4570
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4571
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
4572
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
4573
4574
tmp20 = tmp10 + tmp13;
4575
tmp26 = tmp10 - tmp13;
4576
tmp21 = tmp11 + tmp14;
4577
tmp25 = tmp11 - tmp14;
4578
tmp22 = tmp12 + tmp15;
4579
tmp24 = tmp12 - tmp15;
4580
4581
/* Odd part */
4582
4583
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4584
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4585
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4586
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4587
tmp13 = z4 << CONST_BITS;
4588
4589
tmp14 = z1 + z3;
4590
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
4591
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
4592
tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4593
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
4594
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
4595
z1 -= z2;
4596
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
4597
tmp16 += tmp15;
4598
z1 += z4;
4599
z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4600
tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
4601
tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
4602
z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
4603
tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4604
tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
4605
4606
#if PASS1_BITS > 0
4607
tmp13 = (z1 - z3) << PASS1_BITS;
4608
#else
4609
tmp13 = z1 - z3;
4610
#endif
4611
4612
/* Final output stage */
4613
4614
wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4615
wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4616
wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4617
wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4618
wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4619
wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4620
wsptr[7*3] = (int) (tmp23 + tmp13);
4621
wsptr[7*10] = (int) (tmp23 - tmp13);
4622
wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4623
wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4624
wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4625
wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4626
wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4627
wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4628
}
4629
4630
/* Pass 2: process 14 rows from work array, store into output array.
4631
* 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4632
*/
4633
4634
wsptr = workspace;
4635
for (ctr = 0; ctr < 14; ctr++) {
4636
outptr = output_buf[ctr] + output_col;
4637
4638
/* Even part */
4639
4640
/* Add range center and fudge factor for final descale and range-limit. */
4641
tmp23 = (INT32) wsptr[0] + PASS2_OFFSET;
4642
tmp23 <<= CONST_BITS;
4643
#if PASS2_BITS == 0
4644
tmp23 += ONE << (CONST_BITS-1);
4645
#endif
4646
4647
z1 = (INT32) wsptr[2];
4648
z2 = (INT32) wsptr[4];
4649
z3 = (INT32) wsptr[6];
4650
4651
tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4652
tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4653
tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4654
tmp10 = z1 + z3;
4655
z2 -= tmp10;
4656
tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4657
tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4658
tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4659
tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4660
4661
/* Odd part */
4662
4663
z1 = (INT32) wsptr[1];
4664
z2 = (INT32) wsptr[3];
4665
z3 = (INT32) wsptr[5];
4666
4667
tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4668
tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4669
tmp10 = tmp11 - tmp12;
4670
tmp11 += tmp12;
4671
tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
4672
tmp11 += tmp12;
4673
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
4674
tmp10 += z2;
4675
tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
4676
4677
/* Final output stage */
4678
4679
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4680
CONST_BITS+PASS2_BITS)
4681
& RANGE_MASK];
4682
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4683
CONST_BITS+PASS2_BITS)
4684
& RANGE_MASK];
4685
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4686
CONST_BITS+PASS2_BITS)
4687
& RANGE_MASK];
4688
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4689
CONST_BITS+PASS2_BITS)
4690
& RANGE_MASK];
4691
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4692
CONST_BITS+PASS2_BITS)
4693
& RANGE_MASK];
4694
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4695
CONST_BITS+PASS2_BITS)
4696
& RANGE_MASK];
4697
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4698
CONST_BITS+PASS2_BITS)
4699
& RANGE_MASK];
4700
4701
wsptr += 7; /* advance pointer to next row */
4702
}
4703
}
4704
4705
4706
/*
4707
* Perform dequantization and inverse DCT on one block of coefficients,
4708
* producing a 6x12 output block.
4709
*
4710
* 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4711
*/
4712
4713
GLOBAL(void)
4714
jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4715
JCOEFPTR coef_block,
4716
JSAMPARRAY output_buf, JDIMENSION output_col)
4717
{
4718
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4719
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4720
INT32 z1, z2, z3, z4;
4721
JCOEFPTR inptr;
4722
ISLOW_MULT_TYPE * quantptr;
4723
int * wsptr;
4724
JSAMPROW outptr;
4725
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4726
int ctr;
4727
int workspace[6*12]; /* buffers data between passes */
4728
SHIFT_TEMPS
4729
4730
/* Pass 1: process columns from input, store into work array.
4731
* 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4732
*/
4733
4734
inptr = coef_block;
4735
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4736
wsptr = workspace;
4737
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4738
/* Even part */
4739
4740
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4741
z3 <<= CONST_BITS;
4742
/* Add fudge factor here for final descale. */
4743
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4744
4745
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4746
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4747
4748
tmp10 = z3 + z4;
4749
tmp11 = z3 - z4;
4750
4751
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4752
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4753
z1 <<= CONST_BITS;
4754
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4755
z2 <<= CONST_BITS;
4756
4757
tmp12 = z1 - z2;
4758
4759
tmp21 = z3 + tmp12;
4760
tmp24 = z3 - tmp12;
4761
4762
tmp12 = z4 + z2;
4763
4764
tmp20 = tmp10 + tmp12;
4765
tmp25 = tmp10 - tmp12;
4766
4767
tmp12 = z4 - z1 - z2;
4768
4769
tmp22 = tmp11 + tmp12;
4770
tmp23 = tmp11 - tmp12;
4771
4772
/* Odd part */
4773
4774
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4775
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4776
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4777
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4778
4779
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
4780
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
4781
4782
tmp10 = z1 + z3;
4783
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
4784
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
4785
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
4786
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
4787
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4788
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4789
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
4790
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
4791
4792
z1 -= z4;
4793
z2 -= z3;
4794
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
4795
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
4796
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
4797
4798
/* Final output stage */
4799
4800
wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4801
wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4802
wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4803
wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4804
wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4805
wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4806
wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4807
wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4808
wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4809
wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4810
wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4811
wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4812
}
4813
4814
/* Pass 2: process 12 rows from work array, store into output array.
4815
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4816
*/
4817
4818
wsptr = workspace;
4819
for (ctr = 0; ctr < 12; ctr++) {
4820
outptr = output_buf[ctr] + output_col;
4821
4822
/* Even part */
4823
4824
/* Add range center and fudge factor for final descale and range-limit. */
4825
tmp10 = (INT32) wsptr[0] + PASS2_OFFSET;
4826
tmp10 <<= CONST_BITS;
4827
#if PASS2_BITS == 0
4828
tmp10 += ONE << (CONST_BITS-1);
4829
#endif
4830
tmp12 = (INT32) wsptr[4];
4831
tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4832
tmp11 = tmp10 + tmp20;
4833
tmp21 = tmp10 - tmp20 - tmp20;
4834
tmp20 = (INT32) wsptr[2];
4835
tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4836
tmp20 = tmp11 + tmp10;
4837
tmp22 = tmp11 - tmp10;
4838
4839
/* Odd part */
4840
4841
z1 = (INT32) wsptr[1];
4842
z2 = (INT32) wsptr[3];
4843
z3 = (INT32) wsptr[5];
4844
tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4845
tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4846
tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4847
tmp11 = (z1 - z2 - z3) << CONST_BITS;
4848
4849
/* Final output stage */
4850
4851
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4852
CONST_BITS+PASS2_BITS)
4853
& RANGE_MASK];
4854
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4855
CONST_BITS+PASS2_BITS)
4856
& RANGE_MASK];
4857
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4858
CONST_BITS+PASS2_BITS)
4859
& RANGE_MASK];
4860
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4861
CONST_BITS+PASS2_BITS)
4862
& RANGE_MASK];
4863
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4864
CONST_BITS+PASS2_BITS)
4865
& RANGE_MASK];
4866
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4867
CONST_BITS+PASS2_BITS)
4868
& RANGE_MASK];
4869
4870
wsptr += 6; /* advance pointer to next row */
4871
}
4872
}
4873
4874
4875
/*
4876
* Perform dequantization and inverse DCT on one block of coefficients,
4877
* producing a 5x10 output block.
4878
*
4879
* 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4880
*/
4881
4882
GLOBAL(void)
4883
jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4884
JCOEFPTR coef_block,
4885
JSAMPARRAY output_buf, JDIMENSION output_col)
4886
{
4887
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4888
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4889
INT32 z1, z2, z3, z4, z5;
4890
JCOEFPTR inptr;
4891
ISLOW_MULT_TYPE * quantptr;
4892
int * wsptr;
4893
JSAMPROW outptr;
4894
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4895
int ctr;
4896
int workspace[5*10]; /* buffers data between passes */
4897
SHIFT_TEMPS
4898
4899
/* Pass 1: process columns from input, store into work array.
4900
* 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4901
*/
4902
4903
inptr = coef_block;
4904
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4905
wsptr = workspace;
4906
for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4907
/* Even part */
4908
4909
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4910
z3 <<= CONST_BITS;
4911
/* Add fudge factor here for final descale. */
4912
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4913
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4914
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4915
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4916
tmp10 = z3 + z1;
4917
tmp11 = z3 - z2;
4918
4919
tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4920
CONST_BITS-PASS1_BITS);
4921
4922
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4923
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4924
4925
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
4926
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4927
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4928
4929
tmp20 = tmp10 + tmp12;
4930
tmp24 = tmp10 - tmp12;
4931
tmp21 = tmp11 + tmp13;
4932
tmp23 = tmp11 - tmp13;
4933
4934
/* Odd part */
4935
4936
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4937
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4938
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4939
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4940
4941
tmp11 = z2 + z4;
4942
tmp13 = z2 - z4;
4943
4944
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
4945
z5 = z3 << CONST_BITS;
4946
4947
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
4948
z4 = z5 + tmp12;
4949
4950
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4951
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4952
4953
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
4954
z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4955
4956
#if PASS1_BITS > 0
4957
tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4958
#else
4959
tmp12 = z1 - tmp13 - z3;
4960
#endif
4961
4962
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4963
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4964
4965
/* Final output stage */
4966
4967
wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4968
wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4969
wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4970
wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4971
wsptr[5*2] = (int) (tmp22 + tmp12);
4972
wsptr[5*7] = (int) (tmp22 - tmp12);
4973
wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4974
wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4975
wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4976
wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4977
}
4978
4979
/* Pass 2: process 10 rows from work array, store into output array.
4980
* 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4981
*/
4982
4983
wsptr = workspace;
4984
for (ctr = 0; ctr < 10; ctr++) {
4985
outptr = output_buf[ctr] + output_col;
4986
4987
/* Even part */
4988
4989
/* Add range center and fudge factor for final descale and range-limit. */
4990
tmp12 = (INT32) wsptr[0] + PASS2_OFFSET;
4991
tmp12 <<= CONST_BITS;
4992
#if PASS2_BITS == 0
4993
tmp12 += ONE << (CONST_BITS-1);
4994
#endif
4995
tmp13 = (INT32) wsptr[2];
4996
tmp14 = (INT32) wsptr[4];
4997
z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4998
z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4999
z3 = tmp12 + z2;
5000
tmp10 = z3 + z1;
5001
tmp11 = z3 - z1;
5002
tmp12 -= z2 << 2;
5003
5004
/* Odd part */
5005
5006
z2 = (INT32) wsptr[1];
5007
z3 = (INT32) wsptr[3];
5008
5009
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
5010
tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
5011
tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
5012
5013
/* Final output stage */
5014
5015
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
5016
CONST_BITS+PASS2_BITS)
5017
& RANGE_MASK];
5018
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
5019
CONST_BITS+PASS2_BITS)
5020
& RANGE_MASK];
5021
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
5022
CONST_BITS+PASS2_BITS)
5023
& RANGE_MASK];
5024
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
5025
CONST_BITS+PASS2_BITS)
5026
& RANGE_MASK];
5027
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
5028
CONST_BITS+PASS2_BITS)
5029
& RANGE_MASK];
5030
5031
wsptr += 5; /* advance pointer to next row */
5032
}
5033
}
5034
5035
5036
/*
5037
* Perform dequantization and inverse DCT on one block of coefficients,
5038
* producing a 4x8 output block.
5039
*
5040
* 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
5041
*/
5042
5043
GLOBAL(void)
5044
jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5045
JCOEFPTR coef_block,
5046
JSAMPARRAY output_buf, JDIMENSION output_col)
5047
{
5048
INT32 tmp0, tmp1, tmp2, tmp3;
5049
INT32 tmp10, tmp11, tmp12, tmp13;
5050
INT32 z1, z2, z3;
5051
JCOEFPTR inptr;
5052
ISLOW_MULT_TYPE * quantptr;
5053
int * wsptr;
5054
JSAMPROW outptr;
5055
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5056
int ctr;
5057
int workspace[4*8]; /* buffers data between passes */
5058
SHIFT_TEMPS
5059
5060
/* Pass 1: process columns from input, store into work array.
5061
* Note results are scaled up by sqrt(8) compared to a true IDCT;
5062
* furthermore, we scale the results by 2**PASS1_BITS.
5063
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
5064
*/
5065
5066
inptr = coef_block;
5067
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5068
wsptr = workspace;
5069
for (ctr = 4; ctr > 0; ctr--) {
5070
/* Due to quantization, we will usually find that many of the input
5071
* coefficients are zero, especially the AC terms. We can exploit this
5072
* by short-circuiting the IDCT calculation for any column in which all
5073
* the AC terms are zero. In that case each output is equal to the
5074
* DC coefficient (with scale factor as needed).
5075
* With typical images and quantization tables, half or more of the
5076
* column DCT calculations can be simplified this way.
5077
*/
5078
5079
if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
5080
inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
5081
inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
5082
inptr[DCTSIZE*7] == 0) {
5083
/* AC terms all zero */
5084
#if PASS1_BITS > 0
5085
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
5086
#else
5087
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5088
#endif
5089
5090
wsptr[4*0] = dcval;
5091
wsptr[4*1] = dcval;
5092
wsptr[4*2] = dcval;
5093
wsptr[4*3] = dcval;
5094
wsptr[4*4] = dcval;
5095
wsptr[4*5] = dcval;
5096
wsptr[4*6] = dcval;
5097
wsptr[4*7] = dcval;
5098
5099
inptr++; /* advance pointers to next column */
5100
quantptr++;
5101
wsptr++;
5102
continue;
5103
}
5104
5105
/* Even part: reverse the even part of the forward DCT.
5106
* The rotator is c(-6).
5107
*/
5108
5109
z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5110
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5111
z2 <<= CONST_BITS;
5112
z3 <<= CONST_BITS;
5113
/* Add fudge factor here for final descale. */
5114
z2 += ONE << (CONST_BITS-PASS1_BITS-1);
5115
5116
tmp0 = z2 + z3;
5117
tmp1 = z2 - z3;
5118
5119
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5120
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
5121
5122
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5123
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5124
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5125
5126
tmp10 = tmp0 + tmp2;
5127
tmp13 = tmp0 - tmp2;
5128
tmp11 = tmp1 + tmp3;
5129
tmp12 = tmp1 - tmp3;
5130
5131
/* Odd part per figure 8; the matrix is unitary and hence its
5132
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
5133
*/
5134
5135
tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
5136
tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5137
tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5138
tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5139
5140
z2 = tmp0 + tmp2;
5141
z3 = tmp1 + tmp3;
5142
5143
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
5144
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
5145
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
5146
z2 += z1;
5147
z3 += z1;
5148
5149
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
5150
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
5151
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
5152
tmp0 += z1 + z2;
5153
tmp3 += z1 + z3;
5154
5155
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
5156
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
5157
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
5158
tmp1 += z1 + z3;
5159
tmp2 += z1 + z2;
5160
5161
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
5162
5163
wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
5164
wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
5165
wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
5166
wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
5167
wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
5168
wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
5169
wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
5170
wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
5171
5172
inptr++; /* advance pointers to next column */
5173
quantptr++;
5174
wsptr++;
5175
}
5176
5177
/* Pass 2: process 8 rows from work array, store into output array.
5178
* 4-point IDCT kernel,
5179
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5180
*/
5181
5182
wsptr = workspace;
5183
for (ctr = 0; ctr < 8; ctr++) {
5184
outptr = output_buf[ctr] + output_col;
5185
5186
/* Even part */
5187
5188
/* Add range center and fudge factor for final descale and range-limit. */
5189
tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
5190
tmp2 = (INT32) wsptr[2];
5191
tmp0 <<= CONST_BITS;
5192
tmp2 <<= CONST_BITS;
5193
#if PASS2_BITS == 0
5194
tmp0 += ONE << (CONST_BITS-1);
5195
#endif
5196
5197
tmp10 = tmp0 + tmp2;
5198
tmp12 = tmp0 - tmp2;
5199
5200
/* Odd part */
5201
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
5202
5203
z2 = (INT32) wsptr[1];
5204
z3 = (INT32) wsptr[3];
5205
5206
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5207
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5208
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5209
5210
/* Final output stage */
5211
5212
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5213
CONST_BITS+PASS2_BITS)
5214
& RANGE_MASK];
5215
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5216
CONST_BITS+PASS2_BITS)
5217
& RANGE_MASK];
5218
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
5219
CONST_BITS+PASS2_BITS)
5220
& RANGE_MASK];
5221
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
5222
CONST_BITS+PASS2_BITS)
5223
& RANGE_MASK];
5224
5225
wsptr += 4; /* advance pointer to next row */
5226
}
5227
}
5228
5229
5230
/*
5231
* Perform dequantization and inverse DCT on one block of coefficients,
5232
* producing a 3x6 output block.
5233
*
5234
* 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5235
*/
5236
5237
GLOBAL(void)
5238
jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5239
JCOEFPTR coef_block,
5240
JSAMPARRAY output_buf, JDIMENSION output_col)
5241
{
5242
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5243
INT32 z1, z2, z3;
5244
JCOEFPTR inptr;
5245
ISLOW_MULT_TYPE * quantptr;
5246
int * wsptr;
5247
JSAMPROW outptr;
5248
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5249
int ctr;
5250
int workspace[3*6]; /* buffers data between passes */
5251
SHIFT_TEMPS
5252
5253
/* Pass 1: process columns from input, store into work array.
5254
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5255
*/
5256
5257
inptr = coef_block;
5258
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5259
wsptr = workspace;
5260
for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5261
/* Even part */
5262
5263
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5264
tmp0 <<= CONST_BITS;
5265
/* Add fudge factor here for final descale. */
5266
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5267
tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5268
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5269
tmp1 = tmp0 + tmp10;
5270
tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5271
tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5272
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5273
tmp10 = tmp1 + tmp0;
5274
tmp12 = tmp1 - tmp0;
5275
5276
/* Odd part */
5277
5278
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5279
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5280
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5281
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5282
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5283
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5284
#if PASS1_BITS > 0
5285
tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5286
#else
5287
tmp1 = z1 - z2 - z3;
5288
#endif
5289
5290
/* Final output stage */
5291
5292
wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5293
wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5294
wsptr[3*1] = (int) (tmp11 + tmp1);
5295
wsptr[3*4] = (int) (tmp11 - tmp1);
5296
wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5297
wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5298
}
5299
5300
/* Pass 2: process 6 rows from work array, store into output array.
5301
* 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5302
*/
5303
5304
wsptr = workspace;
5305
for (ctr = 0; ctr < 6; ctr++) {
5306
outptr = output_buf[ctr] + output_col;
5307
5308
/* Even part */
5309
5310
/* Add range center and fudge factor for final descale and range-limit. */
5311
tmp0 = (INT32) wsptr[0] + PASS2_OFFSET;
5312
tmp0 <<= CONST_BITS;
5313
#if PASS2_BITS == 0
5314
tmp0 += ONE << (CONST_BITS-1);
5315
#endif
5316
tmp2 = (INT32) wsptr[2];
5317
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5318
tmp10 = tmp0 + tmp12;
5319
tmp2 = tmp0 - tmp12 - tmp12;
5320
5321
/* Odd part */
5322
5323
tmp12 = (INT32) wsptr[1];
5324
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5325
5326
/* Final output stage */
5327
5328
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5329
CONST_BITS+PASS2_BITS)
5330
& RANGE_MASK];
5331
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5332
CONST_BITS+PASS2_BITS)
5333
& RANGE_MASK];
5334
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5335
CONST_BITS+PASS2_BITS)
5336
& RANGE_MASK];
5337
5338
wsptr += 3; /* advance pointer to next row */
5339
}
5340
}
5341
5342
5343
/*
5344
* Perform dequantization and inverse DCT on one block of coefficients,
5345
* producing a 2x4 output block.
5346
*
5347
* 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5348
*/
5349
5350
GLOBAL(void)
5351
jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5352
JCOEFPTR coef_block,
5353
JSAMPARRAY output_buf, JDIMENSION output_col)
5354
{
5355
INT32 tmp0, tmp2, tmp10, tmp12;
5356
INT32 z1, z2, z3;
5357
JCOEFPTR inptr;
5358
ISLOW_MULT_TYPE * quantptr;
5359
INT32 * wsptr;
5360
JSAMPROW outptr;
5361
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5362
int ctr;
5363
INT32 workspace[2*4]; /* buffers data between passes */
5364
SHIFT_TEMPS
5365
5366
/* Pass 1: process columns from input, store into work array.
5367
* 4-point IDCT kernel,
5368
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5369
*/
5370
5371
inptr = coef_block;
5372
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5373
wsptr = workspace;
5374
for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5375
/* Even part */
5376
5377
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5378
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5379
5380
tmp10 = (tmp0 + tmp2) << CONST_BITS;
5381
tmp12 = (tmp0 - tmp2) << CONST_BITS;
5382
5383
/* Odd part */
5384
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
5385
5386
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5387
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5388
5389
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5390
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5391
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5392
5393
/* Final output stage */
5394
5395
wsptr[2*0] = tmp10 + tmp0;
5396
wsptr[2*3] = tmp10 - tmp0;
5397
wsptr[2*1] = tmp12 + tmp2;
5398
wsptr[2*2] = tmp12 - tmp2;
5399
}
5400
5401
/* Pass 2: process 4 rows from work array, store into output array. */
5402
5403
wsptr = workspace;
5404
for (ctr = 0; ctr < 4; ctr++) {
5405
outptr = output_buf[ctr] + output_col;
5406
5407
/* Even part */
5408
5409
/* Add range center and fudge factor for final descale and range-limit. */
5410
tmp10 = wsptr[0] +
5411
((((INT32) RANGE_CENTER) << (CONST_BITS+PASS2_BITS-PASS1_BITS)) +
5412
(ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-1)));
5413
5414
/* Odd part */
5415
5416
tmp0 = wsptr[1];
5417
5418
/* Final output stage */
5419
5420
outptr[0] =
5421
range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5422
CONST_BITS+PASS2_BITS-PASS1_BITS)
5423
& RANGE_MASK];
5424
outptr[1] =
5425
range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5426
CONST_BITS+PASS2_BITS-PASS1_BITS)
5427
& RANGE_MASK];
5428
5429
wsptr += 2; /* advance pointer to next row */
5430
}
5431
}
5432
5433
5434
/*
5435
* Perform dequantization and inverse DCT on one block of coefficients,
5436
* producing a 1x2 output block.
5437
*
5438
* 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5439
*/
5440
5441
GLOBAL(void)
5442
jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5443
JCOEFPTR coef_block,
5444
JSAMPARRAY output_buf, JDIMENSION output_col)
5445
{
5446
DCTELEM tmp0, tmp1;
5447
ISLOW_MULT_TYPE * quantptr;
5448
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5449
ISHIFT_TEMPS
5450
5451
/* Process 1 column from input, store into output array. */
5452
5453
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5454
5455
/* Even part */
5456
5457
tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5458
5459
/* Odd part */
5460
5461
tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5462
5463
/* Final output stage */
5464
5465
#if PASS2_BITS > PASS1_BITS
5466
/* Add range center and fudge factor for downscale and range-limit. */
5467
#if PASS2_BITS > PASS1_BITS + 1
5468
tmp0 += (((DCTELEM) RANGE_CENTER) << (PASS2_BITS-PASS1_BITS)) +
5469
(1 << (PASS2_BITS-PASS1_BITS-1));
5470
#else
5471
tmp0 += (((DCTELEM) RANGE_CENTER) << 1) + 1;
5472
#endif
5473
5474
output_buf[0][output_col] =
5475
range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, PASS2_BITS-PASS1_BITS)
5476
& RANGE_MASK];
5477
output_buf[1][output_col] =
5478
range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, PASS2_BITS-PASS1_BITS)
5479
& RANGE_MASK];
5480
#else
5481
#if PASS2_BITS < PASS1_BITS
5482
tmp0 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
5483
tmp1 <<= (PASS1_BITS-PASS2_BITS); /* upscale */
5484
#endif
5485
5486
tmp0 += (DCTELEM) RANGE_CENTER; /* add range center for range-limit */
5487
5488
output_buf[0][output_col] =
5489
range_limit[(int) (tmp0 + tmp1) & RANGE_MASK];
5490
output_buf[1][output_col] =
5491
range_limit[(int) (tmp0 - tmp1) & RANGE_MASK];
5492
#endif
5493
}
5494
5495
#endif /* IDCT_SCALING_SUPPORTED */
5496
#endif /* DCT_ISLOW_SUPPORTED */
5497
5498