Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/jpeg/jidctint.c
4389 views
1
/*
2
* jidctint.c
3
*
4
* Copyright (C) 1991-1998, Thomas G. Lane.
5
* Modification developed 2002-2018 by Guido Vollbeding.
6
* This file is part of the Independent JPEG Group's software.
7
* For conditions of distribution and use, see the accompanying README file.
8
*
9
* This file contains a slow-but-accurate integer implementation of the
10
* inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11
* must also perform dequantization of the input coefficients.
12
*
13
* A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14
* on each row (or vice versa, but it's more convenient to emit a row at
15
* a time). Direct algorithms are also available, but they are much more
16
* complex and seem not to be any faster when reduced to code.
17
*
18
* This implementation is based on an algorithm described in
19
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22
* The primary algorithm described there uses 11 multiplies and 29 adds.
23
* We use their alternate method with 12 multiplies and 32 adds.
24
* The advantage of this method is that no data path contains more than one
25
* multiplication; this allows a very simple and accurate implementation in
26
* scaled fixed-point arithmetic, with a minimal number of shifts.
27
*
28
* We also provide IDCT routines with various output sample block sizes for
29
* direct resolution reduction or enlargement and for direct resolving the
30
* common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31
* (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
32
*
33
* For N<8 we simply take the corresponding low-frequency coefficients of
34
* the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35
* to yield the downscaled outputs.
36
* This can be seen as direct low-pass downsampling from the DCT domain
37
* point of view rather than the usual spatial domain point of view,
38
* yielding significant computational savings and results at least
39
* as good as common bilinear (averaging) spatial downsampling.
40
*
41
* For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42
* lower frequencies and higher frequencies assumed to be zero.
43
* It turns out that the computational effort is similar to the 8x8 IDCT
44
* regarding the output size.
45
* Furthermore, the scaling and descaling is the same for all IDCT sizes.
46
*
47
* CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48
* since there would be too many additional constants to pre-calculate.
49
*/
50
51
#define JPEG_INTERNALS
52
#include "jinclude.h"
53
#include "jpeglib.h"
54
#include "jdct.h" /* Private declarations for DCT subsystem */
55
56
#ifdef DCT_ISLOW_SUPPORTED
57
58
59
/*
60
* This module is specialized to the case DCTSIZE = 8.
61
*/
62
63
#if DCTSIZE != 8
64
Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65
#endif
66
67
68
/*
69
* The poop on this scaling stuff is as follows:
70
*
71
* Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72
* larger than the true IDCT outputs. The final outputs are therefore
73
* a factor of N larger than desired; since N=8 this can be cured by
74
* a simple right shift at the end of the algorithm. The advantage of
75
* this arrangement is that we save two multiplications per 1-D IDCT,
76
* because the y0 and y4 inputs need not be divided by sqrt(N).
77
*
78
* We have to do addition and subtraction of the integer inputs, which
79
* is no problem, and multiplication by fractional constants, which is
80
* a problem to do in integer arithmetic. We multiply all the constants
81
* by CONST_SCALE and convert them to integer constants (thus retaining
82
* CONST_BITS bits of precision in the constants). After doing a
83
* multiplication we have to divide the product by CONST_SCALE, with proper
84
* rounding, to produce the correct output. This division can be done
85
* cheaply as a right shift of CONST_BITS bits. We postpone shifting
86
* as long as possible so that partial sums can be added together with
87
* full fractional precision.
88
*
89
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
90
* they are represented to better-than-integral precision. These outputs
91
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
92
* with the recommended scaling. (To scale up 12-bit sample data further, an
93
* intermediate INT32 array would be needed.)
94
*
95
* To avoid overflow of the 32-bit intermediate results in pass 2, we must
96
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
97
* shows that the values given below are the most effective.
98
*/
99
100
#if BITS_IN_JSAMPLE == 8
101
#define CONST_BITS 13
102
#define PASS1_BITS 2
103
#else
104
#define CONST_BITS 13
105
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
106
#endif
107
108
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109
* causing a lot of useless floating-point operations at run time.
110
* To get around this we use the following pre-calculated constants.
111
* If you change CONST_BITS you may want to add appropriate values.
112
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
113
*/
114
115
#if CONST_BITS == 13
116
#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
117
#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
118
#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
119
#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
120
#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
121
#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
122
#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
123
#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
124
#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
125
#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
126
#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
127
#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
128
#else
129
#define FIX_0_298631336 FIX(0.298631336)
130
#define FIX_0_390180644 FIX(0.390180644)
131
#define FIX_0_541196100 FIX(0.541196100)
132
#define FIX_0_765366865 FIX(0.765366865)
133
#define FIX_0_899976223 FIX(0.899976223)
134
#define FIX_1_175875602 FIX(1.175875602)
135
#define FIX_1_501321110 FIX(1.501321110)
136
#define FIX_1_847759065 FIX(1.847759065)
137
#define FIX_1_961570560 FIX(1.961570560)
138
#define FIX_2_053119869 FIX(2.053119869)
139
#define FIX_2_562915447 FIX(2.562915447)
140
#define FIX_3_072711026 FIX(3.072711026)
141
#endif
142
143
144
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
145
* For 8-bit samples with the recommended scaling, all the variable
146
* and constant values involved are no more than 16 bits wide, so a
147
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
148
* For 12-bit samples, a full 32-bit multiplication will be needed.
149
*/
150
151
#if BITS_IN_JSAMPLE == 8
152
#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
153
#else
154
#define MULTIPLY(var,const) ((var) * (const))
155
#endif
156
157
158
/* Dequantize a coefficient by multiplying it by the multiplier-table
159
* entry; produce an int result. In this module, both inputs and result
160
* are 16 bits or less, so either int or short multiply will work.
161
*/
162
163
#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
164
165
166
/*
167
* Perform dequantization and inverse DCT on one block of coefficients.
168
*
169
* Optimized algorithm with 12 multiplications in the 1-D kernel.
170
* cK represents sqrt(2) * cos(K*pi/16).
171
*/
172
173
GLOBAL(void)
174
jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
175
JCOEFPTR coef_block,
176
JSAMPARRAY output_buf, JDIMENSION output_col)
177
{
178
INT32 tmp0, tmp1, tmp2, tmp3;
179
INT32 tmp10, tmp11, tmp12, tmp13;
180
INT32 z1, z2, z3;
181
JCOEFPTR inptr;
182
ISLOW_MULT_TYPE * quantptr;
183
int * wsptr;
184
JSAMPROW outptr;
185
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
186
int ctr;
187
int workspace[DCTSIZE2]; /* buffers data between passes */
188
SHIFT_TEMPS
189
190
/* Pass 1: process columns from input, store into work array.
191
* Note results are scaled up by sqrt(8) compared to a true IDCT;
192
* furthermore, we scale the results by 2**PASS1_BITS.
193
*/
194
195
inptr = coef_block;
196
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
197
wsptr = workspace;
198
for (ctr = DCTSIZE; ctr > 0; ctr--) {
199
/* Due to quantization, we will usually find that many of the input
200
* coefficients are zero, especially the AC terms. We can exploit this
201
* by short-circuiting the IDCT calculation for any column in which all
202
* the AC terms are zero. In that case each output is equal to the
203
* DC coefficient (with scale factor as needed).
204
* With typical images and quantization tables, half or more of the
205
* column DCT calculations can be simplified this way.
206
*/
207
208
if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
209
inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
210
inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
211
inptr[DCTSIZE*7] == 0) {
212
/* AC terms all zero */
213
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
214
215
wsptr[DCTSIZE*0] = dcval;
216
wsptr[DCTSIZE*1] = dcval;
217
wsptr[DCTSIZE*2] = dcval;
218
wsptr[DCTSIZE*3] = dcval;
219
wsptr[DCTSIZE*4] = dcval;
220
wsptr[DCTSIZE*5] = dcval;
221
wsptr[DCTSIZE*6] = dcval;
222
wsptr[DCTSIZE*7] = dcval;
223
224
inptr++; /* advance pointers to next column */
225
quantptr++;
226
wsptr++;
227
continue;
228
}
229
230
/* Even part: reverse the even part of the forward DCT.
231
* The rotator is c(-6).
232
*/
233
234
z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
235
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
236
z2 <<= CONST_BITS;
237
z3 <<= CONST_BITS;
238
/* Add fudge factor here for final descale. */
239
z2 += ONE << (CONST_BITS-PASS1_BITS-1);
240
241
tmp0 = z2 + z3;
242
tmp1 = z2 - z3;
243
244
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
245
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
246
247
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
248
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
249
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
250
251
tmp10 = tmp0 + tmp2;
252
tmp13 = tmp0 - tmp2;
253
tmp11 = tmp1 + tmp3;
254
tmp12 = tmp1 - tmp3;
255
256
/* Odd part per figure 8; the matrix is unitary and hence its
257
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
258
*/
259
260
tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
261
tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
262
tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
263
tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
264
265
z2 = tmp0 + tmp2;
266
z3 = tmp1 + tmp3;
267
268
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
269
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
270
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
271
z2 += z1;
272
z3 += z1;
273
274
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
275
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
276
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
277
tmp0 += z1 + z2;
278
tmp3 += z1 + z3;
279
280
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
281
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
282
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
283
tmp1 += z1 + z3;
284
tmp2 += z1 + z2;
285
286
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
287
288
wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
289
wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
290
wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
291
wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
292
wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
293
wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
294
wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
295
wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
296
297
inptr++; /* advance pointers to next column */
298
quantptr++;
299
wsptr++;
300
}
301
302
/* Pass 2: process rows from work array, store into output array.
303
* Note that we must descale the results by a factor of 8 == 2**3,
304
* and also undo the PASS1_BITS scaling.
305
*/
306
307
wsptr = workspace;
308
for (ctr = 0; ctr < DCTSIZE; ctr++) {
309
outptr = output_buf[ctr] + output_col;
310
311
/* Add range center and fudge factor for final descale and range-limit. */
312
z2 = (INT32) wsptr[0] +
313
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
314
(ONE << (PASS1_BITS+2)));
315
316
/* Rows of zeroes can be exploited in the same way as we did with columns.
317
* However, the column calculation has created many nonzero AC terms, so
318
* the simplification applies less often (typically 5% to 10% of the time).
319
* On machines with very fast multiplication, it's possible that the
320
* test takes more time than it's worth. In that case this section
321
* may be commented out.
322
*/
323
324
#ifndef NO_ZERO_ROW_TEST
325
if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
326
wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
327
/* AC terms all zero */
328
JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
329
& RANGE_MASK];
330
331
outptr[0] = dcval;
332
outptr[1] = dcval;
333
outptr[2] = dcval;
334
outptr[3] = dcval;
335
outptr[4] = dcval;
336
outptr[5] = dcval;
337
outptr[6] = dcval;
338
outptr[7] = dcval;
339
340
wsptr += DCTSIZE; /* advance pointer to next row */
341
continue;
342
}
343
#endif
344
345
/* Even part: reverse the even part of the forward DCT.
346
* The rotator is c(-6).
347
*/
348
349
z3 = (INT32) wsptr[4];
350
351
tmp0 = (z2 + z3) << CONST_BITS;
352
tmp1 = (z2 - z3) << CONST_BITS;
353
354
z2 = (INT32) wsptr[2];
355
z3 = (INT32) wsptr[6];
356
357
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
358
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
359
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
360
361
tmp10 = tmp0 + tmp2;
362
tmp13 = tmp0 - tmp2;
363
tmp11 = tmp1 + tmp3;
364
tmp12 = tmp1 - tmp3;
365
366
/* Odd part per figure 8; the matrix is unitary and hence its
367
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
368
*/
369
370
tmp0 = (INT32) wsptr[7];
371
tmp1 = (INT32) wsptr[5];
372
tmp2 = (INT32) wsptr[3];
373
tmp3 = (INT32) wsptr[1];
374
375
z2 = tmp0 + tmp2;
376
z3 = tmp1 + tmp3;
377
378
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
379
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
380
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
381
z2 += z1;
382
z3 += z1;
383
384
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
385
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
386
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
387
tmp0 += z1 + z2;
388
tmp3 += z1 + z3;
389
390
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
391
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
392
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
393
tmp1 += z1 + z3;
394
tmp2 += z1 + z2;
395
396
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
397
398
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
399
CONST_BITS+PASS1_BITS+3)
400
& RANGE_MASK];
401
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
402
CONST_BITS+PASS1_BITS+3)
403
& RANGE_MASK];
404
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
405
CONST_BITS+PASS1_BITS+3)
406
& RANGE_MASK];
407
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
408
CONST_BITS+PASS1_BITS+3)
409
& RANGE_MASK];
410
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
411
CONST_BITS+PASS1_BITS+3)
412
& RANGE_MASK];
413
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
414
CONST_BITS+PASS1_BITS+3)
415
& RANGE_MASK];
416
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
417
CONST_BITS+PASS1_BITS+3)
418
& RANGE_MASK];
419
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
420
CONST_BITS+PASS1_BITS+3)
421
& RANGE_MASK];
422
423
wsptr += DCTSIZE; /* advance pointer to next row */
424
}
425
}
426
427
#ifdef IDCT_SCALING_SUPPORTED
428
429
430
/*
431
* Perform dequantization and inverse DCT on one block of coefficients,
432
* producing a reduced-size 7x7 output block.
433
*
434
* Optimized algorithm with 12 multiplications in the 1-D kernel.
435
* cK represents sqrt(2) * cos(K*pi/14).
436
*/
437
438
GLOBAL(void)
439
jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
440
JCOEFPTR coef_block,
441
JSAMPARRAY output_buf, JDIMENSION output_col)
442
{
443
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
444
INT32 z1, z2, z3;
445
JCOEFPTR inptr;
446
ISLOW_MULT_TYPE * quantptr;
447
int * wsptr;
448
JSAMPROW outptr;
449
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
450
int ctr;
451
int workspace[7*7]; /* buffers data between passes */
452
SHIFT_TEMPS
453
454
/* Pass 1: process columns from input, store into work array. */
455
456
inptr = coef_block;
457
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
458
wsptr = workspace;
459
for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
460
/* Even part */
461
462
tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
463
tmp13 <<= CONST_BITS;
464
/* Add fudge factor here for final descale. */
465
tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
466
467
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
468
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
469
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
470
471
tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
472
tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
473
tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
474
tmp0 = z1 + z3;
475
z2 -= tmp0;
476
tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
477
tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
478
tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
479
tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
480
481
/* Odd part */
482
483
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
484
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
485
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
486
487
tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
488
tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
489
tmp0 = tmp1 - tmp2;
490
tmp1 += tmp2;
491
tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
492
tmp1 += tmp2;
493
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
494
tmp0 += z2;
495
tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
496
497
/* Final output stage */
498
499
wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
500
wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
501
wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
502
wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
503
wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
504
wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
505
wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
506
}
507
508
/* Pass 2: process 7 rows from work array, store into output array. */
509
510
wsptr = workspace;
511
for (ctr = 0; ctr < 7; ctr++) {
512
outptr = output_buf[ctr] + output_col;
513
514
/* Even part */
515
516
/* Add range center and fudge factor for final descale and range-limit. */
517
tmp13 = (INT32) wsptr[0] +
518
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
519
(ONE << (PASS1_BITS+2)));
520
tmp13 <<= CONST_BITS;
521
522
z1 = (INT32) wsptr[2];
523
z2 = (INT32) wsptr[4];
524
z3 = (INT32) wsptr[6];
525
526
tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
527
tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
528
tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
529
tmp0 = z1 + z3;
530
z2 -= tmp0;
531
tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
532
tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
533
tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
534
tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
535
536
/* Odd part */
537
538
z1 = (INT32) wsptr[1];
539
z2 = (INT32) wsptr[3];
540
z3 = (INT32) wsptr[5];
541
542
tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
543
tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
544
tmp0 = tmp1 - tmp2;
545
tmp1 += tmp2;
546
tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
547
tmp1 += tmp2;
548
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
549
tmp0 += z2;
550
tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
551
552
/* Final output stage */
553
554
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
555
CONST_BITS+PASS1_BITS+3)
556
& RANGE_MASK];
557
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
558
CONST_BITS+PASS1_BITS+3)
559
& RANGE_MASK];
560
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
561
CONST_BITS+PASS1_BITS+3)
562
& RANGE_MASK];
563
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
564
CONST_BITS+PASS1_BITS+3)
565
& RANGE_MASK];
566
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
567
CONST_BITS+PASS1_BITS+3)
568
& RANGE_MASK];
569
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
570
CONST_BITS+PASS1_BITS+3)
571
& RANGE_MASK];
572
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
573
CONST_BITS+PASS1_BITS+3)
574
& RANGE_MASK];
575
576
wsptr += 7; /* advance pointer to next row */
577
}
578
}
579
580
581
/*
582
* Perform dequantization and inverse DCT on one block of coefficients,
583
* producing a reduced-size 6x6 output block.
584
*
585
* Optimized algorithm with 3 multiplications in the 1-D kernel.
586
* cK represents sqrt(2) * cos(K*pi/12).
587
*/
588
589
GLOBAL(void)
590
jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
591
JCOEFPTR coef_block,
592
JSAMPARRAY output_buf, JDIMENSION output_col)
593
{
594
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
595
INT32 z1, z2, z3;
596
JCOEFPTR inptr;
597
ISLOW_MULT_TYPE * quantptr;
598
int * wsptr;
599
JSAMPROW outptr;
600
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
601
int ctr;
602
int workspace[6*6]; /* buffers data between passes */
603
SHIFT_TEMPS
604
605
/* Pass 1: process columns from input, store into work array. */
606
607
inptr = coef_block;
608
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
609
wsptr = workspace;
610
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
611
/* Even part */
612
613
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
614
tmp0 <<= CONST_BITS;
615
/* Add fudge factor here for final descale. */
616
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
617
tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
618
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
619
tmp1 = tmp0 + tmp10;
620
tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
621
tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
622
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
623
tmp10 = tmp1 + tmp0;
624
tmp12 = tmp1 - tmp0;
625
626
/* Odd part */
627
628
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
629
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
630
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
631
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
632
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
633
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
634
tmp1 = (z1 - z2 - z3) << PASS1_BITS;
635
636
/* Final output stage */
637
638
wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
639
wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
640
wsptr[6*1] = (int) (tmp11 + tmp1);
641
wsptr[6*4] = (int) (tmp11 - tmp1);
642
wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
643
wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
644
}
645
646
/* Pass 2: process 6 rows from work array, store into output array. */
647
648
wsptr = workspace;
649
for (ctr = 0; ctr < 6; ctr++) {
650
outptr = output_buf[ctr] + output_col;
651
652
/* Even part */
653
654
/* Add range center and fudge factor for final descale and range-limit. */
655
tmp0 = (INT32) wsptr[0] +
656
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
657
(ONE << (PASS1_BITS+2)));
658
tmp0 <<= CONST_BITS;
659
tmp2 = (INT32) wsptr[4];
660
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
661
tmp1 = tmp0 + tmp10;
662
tmp11 = tmp0 - tmp10 - tmp10;
663
tmp10 = (INT32) wsptr[2];
664
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
665
tmp10 = tmp1 + tmp0;
666
tmp12 = tmp1 - tmp0;
667
668
/* Odd part */
669
670
z1 = (INT32) wsptr[1];
671
z2 = (INT32) wsptr[3];
672
z3 = (INT32) wsptr[5];
673
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
674
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
675
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
676
tmp1 = (z1 - z2 - z3) << CONST_BITS;
677
678
/* Final output stage */
679
680
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
681
CONST_BITS+PASS1_BITS+3)
682
& RANGE_MASK];
683
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
684
CONST_BITS+PASS1_BITS+3)
685
& RANGE_MASK];
686
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
687
CONST_BITS+PASS1_BITS+3)
688
& RANGE_MASK];
689
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
690
CONST_BITS+PASS1_BITS+3)
691
& RANGE_MASK];
692
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
693
CONST_BITS+PASS1_BITS+3)
694
& RANGE_MASK];
695
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
696
CONST_BITS+PASS1_BITS+3)
697
& RANGE_MASK];
698
699
wsptr += 6; /* advance pointer to next row */
700
}
701
}
702
703
704
/*
705
* Perform dequantization and inverse DCT on one block of coefficients,
706
* producing a reduced-size 5x5 output block.
707
*
708
* Optimized algorithm with 5 multiplications in the 1-D kernel.
709
* cK represents sqrt(2) * cos(K*pi/10).
710
*/
711
712
GLOBAL(void)
713
jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
714
JCOEFPTR coef_block,
715
JSAMPARRAY output_buf, JDIMENSION output_col)
716
{
717
INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
718
INT32 z1, z2, z3;
719
JCOEFPTR inptr;
720
ISLOW_MULT_TYPE * quantptr;
721
int * wsptr;
722
JSAMPROW outptr;
723
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
724
int ctr;
725
int workspace[5*5]; /* buffers data between passes */
726
SHIFT_TEMPS
727
728
/* Pass 1: process columns from input, store into work array. */
729
730
inptr = coef_block;
731
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
732
wsptr = workspace;
733
for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
734
/* Even part */
735
736
tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
737
tmp12 <<= CONST_BITS;
738
/* Add fudge factor here for final descale. */
739
tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
740
tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
741
tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
742
z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
743
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
744
z3 = tmp12 + z2;
745
tmp10 = z3 + z1;
746
tmp11 = z3 - z1;
747
tmp12 -= z2 << 2;
748
749
/* Odd part */
750
751
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
752
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
753
754
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
755
tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
756
tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
757
758
/* Final output stage */
759
760
wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
761
wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
762
wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
763
wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
764
wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
765
}
766
767
/* Pass 2: process 5 rows from work array, store into output array. */
768
769
wsptr = workspace;
770
for (ctr = 0; ctr < 5; ctr++) {
771
outptr = output_buf[ctr] + output_col;
772
773
/* Even part */
774
775
/* Add range center and fudge factor for final descale and range-limit. */
776
tmp12 = (INT32) wsptr[0] +
777
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
778
(ONE << (PASS1_BITS+2)));
779
tmp12 <<= CONST_BITS;
780
tmp0 = (INT32) wsptr[2];
781
tmp1 = (INT32) wsptr[4];
782
z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
783
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
784
z3 = tmp12 + z2;
785
tmp10 = z3 + z1;
786
tmp11 = z3 - z1;
787
tmp12 -= z2 << 2;
788
789
/* Odd part */
790
791
z2 = (INT32) wsptr[1];
792
z3 = (INT32) wsptr[3];
793
794
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
795
tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
796
tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
797
798
/* Final output stage */
799
800
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
801
CONST_BITS+PASS1_BITS+3)
802
& RANGE_MASK];
803
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
804
CONST_BITS+PASS1_BITS+3)
805
& RANGE_MASK];
806
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
807
CONST_BITS+PASS1_BITS+3)
808
& RANGE_MASK];
809
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
810
CONST_BITS+PASS1_BITS+3)
811
& RANGE_MASK];
812
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
813
CONST_BITS+PASS1_BITS+3)
814
& RANGE_MASK];
815
816
wsptr += 5; /* advance pointer to next row */
817
}
818
}
819
820
821
/*
822
* Perform dequantization and inverse DCT on one block of coefficients,
823
* producing a reduced-size 4x4 output block.
824
*
825
* Optimized algorithm with 3 multiplications in the 1-D kernel.
826
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
827
*/
828
829
GLOBAL(void)
830
jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
831
JCOEFPTR coef_block,
832
JSAMPARRAY output_buf, JDIMENSION output_col)
833
{
834
INT32 tmp0, tmp2, tmp10, tmp12;
835
INT32 z1, z2, z3;
836
JCOEFPTR inptr;
837
ISLOW_MULT_TYPE * quantptr;
838
int * wsptr;
839
JSAMPROW outptr;
840
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
841
int ctr;
842
int workspace[4*4]; /* buffers data between passes */
843
SHIFT_TEMPS
844
845
/* Pass 1: process columns from input, store into work array. */
846
847
inptr = coef_block;
848
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
849
wsptr = workspace;
850
for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
851
/* Even part */
852
853
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
854
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
855
856
tmp10 = (tmp0 + tmp2) << PASS1_BITS;
857
tmp12 = (tmp0 - tmp2) << PASS1_BITS;
858
859
/* Odd part */
860
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
861
862
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
863
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
864
865
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
866
/* Add fudge factor here for final descale. */
867
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
868
tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
869
CONST_BITS-PASS1_BITS);
870
tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
871
CONST_BITS-PASS1_BITS);
872
873
/* Final output stage */
874
875
wsptr[4*0] = (int) (tmp10 + tmp0);
876
wsptr[4*3] = (int) (tmp10 - tmp0);
877
wsptr[4*1] = (int) (tmp12 + tmp2);
878
wsptr[4*2] = (int) (tmp12 - tmp2);
879
}
880
881
/* Pass 2: process 4 rows from work array, store into output array. */
882
883
wsptr = workspace;
884
for (ctr = 0; ctr < 4; ctr++) {
885
outptr = output_buf[ctr] + output_col;
886
887
/* Even part */
888
889
/* Add range center and fudge factor for final descale and range-limit. */
890
tmp0 = (INT32) wsptr[0] +
891
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
892
(ONE << (PASS1_BITS+2)));
893
tmp2 = (INT32) wsptr[2];
894
895
tmp10 = (tmp0 + tmp2) << CONST_BITS;
896
tmp12 = (tmp0 - tmp2) << CONST_BITS;
897
898
/* Odd part */
899
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
900
901
z2 = (INT32) wsptr[1];
902
z3 = (INT32) wsptr[3];
903
904
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
905
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
906
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
907
908
/* Final output stage */
909
910
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
911
CONST_BITS+PASS1_BITS+3)
912
& RANGE_MASK];
913
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
914
CONST_BITS+PASS1_BITS+3)
915
& RANGE_MASK];
916
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
917
CONST_BITS+PASS1_BITS+3)
918
& RANGE_MASK];
919
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
920
CONST_BITS+PASS1_BITS+3)
921
& RANGE_MASK];
922
923
wsptr += 4; /* advance pointer to next row */
924
}
925
}
926
927
928
/*
929
* Perform dequantization and inverse DCT on one block of coefficients,
930
* producing a reduced-size 3x3 output block.
931
*
932
* Optimized algorithm with 2 multiplications in the 1-D kernel.
933
* cK represents sqrt(2) * cos(K*pi/6).
934
*/
935
936
GLOBAL(void)
937
jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
938
JCOEFPTR coef_block,
939
JSAMPARRAY output_buf, JDIMENSION output_col)
940
{
941
INT32 tmp0, tmp2, tmp10, tmp12;
942
JCOEFPTR inptr;
943
ISLOW_MULT_TYPE * quantptr;
944
int * wsptr;
945
JSAMPROW outptr;
946
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
947
int ctr;
948
int workspace[3*3]; /* buffers data between passes */
949
SHIFT_TEMPS
950
951
/* Pass 1: process columns from input, store into work array. */
952
953
inptr = coef_block;
954
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
955
wsptr = workspace;
956
for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
957
/* Even part */
958
959
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
960
tmp0 <<= CONST_BITS;
961
/* Add fudge factor here for final descale. */
962
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
963
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
964
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
965
tmp10 = tmp0 + tmp12;
966
tmp2 = tmp0 - tmp12 - tmp12;
967
968
/* Odd part */
969
970
tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
971
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
972
973
/* Final output stage */
974
975
wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
976
wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
977
wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
978
}
979
980
/* Pass 2: process 3 rows from work array, store into output array. */
981
982
wsptr = workspace;
983
for (ctr = 0; ctr < 3; ctr++) {
984
outptr = output_buf[ctr] + output_col;
985
986
/* Even part */
987
988
/* Add range center and fudge factor for final descale and range-limit. */
989
tmp0 = (INT32) wsptr[0] +
990
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
991
(ONE << (PASS1_BITS+2)));
992
tmp0 <<= CONST_BITS;
993
tmp2 = (INT32) wsptr[2];
994
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
995
tmp10 = tmp0 + tmp12;
996
tmp2 = tmp0 - tmp12 - tmp12;
997
998
/* Odd part */
999
1000
tmp12 = (INT32) wsptr[1];
1001
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1002
1003
/* Final output stage */
1004
1005
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1006
CONST_BITS+PASS1_BITS+3)
1007
& RANGE_MASK];
1008
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1009
CONST_BITS+PASS1_BITS+3)
1010
& RANGE_MASK];
1011
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1012
CONST_BITS+PASS1_BITS+3)
1013
& RANGE_MASK];
1014
1015
wsptr += 3; /* advance pointer to next row */
1016
}
1017
}
1018
1019
1020
/*
1021
* Perform dequantization and inverse DCT on one block of coefficients,
1022
* producing a reduced-size 2x2 output block.
1023
*
1024
* Multiplication-less algorithm.
1025
*/
1026
1027
GLOBAL(void)
1028
jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1029
JCOEFPTR coef_block,
1030
JSAMPARRAY output_buf, JDIMENSION output_col)
1031
{
1032
DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1033
ISLOW_MULT_TYPE * quantptr;
1034
JSAMPROW outptr;
1035
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1036
ISHIFT_TEMPS
1037
1038
/* Pass 1: process columns from input. */
1039
1040
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1041
1042
/* Column 0 */
1043
tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1044
tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1045
/* Add range center and fudge factor for final descale and range-limit. */
1046
tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1047
1048
tmp0 = tmp4 + tmp5;
1049
tmp2 = tmp4 - tmp5;
1050
1051
/* Column 1 */
1052
tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1053
tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1054
1055
tmp1 = tmp4 + tmp5;
1056
tmp3 = tmp4 - tmp5;
1057
1058
/* Pass 2: process 2 rows, store into output array. */
1059
1060
/* Row 0 */
1061
outptr = output_buf[0] + output_col;
1062
1063
outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1064
outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1065
1066
/* Row 1 */
1067
outptr = output_buf[1] + output_col;
1068
1069
outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1070
outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1071
}
1072
1073
1074
/*
1075
* Perform dequantization and inverse DCT on one block of coefficients,
1076
* producing a reduced-size 1x1 output block.
1077
*
1078
* We hardly need an inverse DCT routine for this: just take the
1079
* average pixel value, which is one-eighth of the DC coefficient.
1080
*/
1081
1082
GLOBAL(void)
1083
jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1084
JCOEFPTR coef_block,
1085
JSAMPARRAY output_buf, JDIMENSION output_col)
1086
{
1087
DCTELEM dcval;
1088
ISLOW_MULT_TYPE * quantptr;
1089
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1090
ISHIFT_TEMPS
1091
1092
/* 1x1 is trivial: just take the DC coefficient divided by 8. */
1093
1094
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1095
1096
dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1097
/* Add range center and fudge factor for descale and range-limit. */
1098
dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1099
1100
output_buf[0][output_col] =
1101
range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
1102
}
1103
1104
1105
/*
1106
* Perform dequantization and inverse DCT on one block of coefficients,
1107
* producing a 9x9 output block.
1108
*
1109
* Optimized algorithm with 10 multiplications in the 1-D kernel.
1110
* cK represents sqrt(2) * cos(K*pi/18).
1111
*/
1112
1113
GLOBAL(void)
1114
jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1115
JCOEFPTR coef_block,
1116
JSAMPARRAY output_buf, JDIMENSION output_col)
1117
{
1118
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1119
INT32 z1, z2, z3, z4;
1120
JCOEFPTR inptr;
1121
ISLOW_MULT_TYPE * quantptr;
1122
int * wsptr;
1123
JSAMPROW outptr;
1124
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1125
int ctr;
1126
int workspace[8*9]; /* buffers data between passes */
1127
SHIFT_TEMPS
1128
1129
/* Pass 1: process columns from input, store into work array. */
1130
1131
inptr = coef_block;
1132
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1133
wsptr = workspace;
1134
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1135
/* Even part */
1136
1137
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1138
tmp0 <<= CONST_BITS;
1139
/* Add fudge factor here for final descale. */
1140
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1141
1142
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1143
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1144
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1145
1146
tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1147
tmp1 = tmp0 + tmp3;
1148
tmp2 = tmp0 - tmp3 - tmp3;
1149
1150
tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1151
tmp11 = tmp2 + tmp0;
1152
tmp14 = tmp2 - tmp0 - tmp0;
1153
1154
tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1155
tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1156
tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1157
1158
tmp10 = tmp1 + tmp0 - tmp3;
1159
tmp12 = tmp1 - tmp0 + tmp2;
1160
tmp13 = tmp1 - tmp2 + tmp3;
1161
1162
/* Odd part */
1163
1164
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1165
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1166
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1167
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1168
1169
z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1170
1171
tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1172
tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1173
tmp0 = tmp2 + tmp3 - z2;
1174
tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1175
tmp2 += z2 - tmp1;
1176
tmp3 += z2 + tmp1;
1177
tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1178
1179
/* Final output stage */
1180
1181
wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1182
wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1183
wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1184
wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1185
wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1186
wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1187
wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1188
wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1189
wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1190
}
1191
1192
/* Pass 2: process 9 rows from work array, store into output array. */
1193
1194
wsptr = workspace;
1195
for (ctr = 0; ctr < 9; ctr++) {
1196
outptr = output_buf[ctr] + output_col;
1197
1198
/* Even part */
1199
1200
/* Add range center and fudge factor for final descale and range-limit. */
1201
tmp0 = (INT32) wsptr[0] +
1202
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1203
(ONE << (PASS1_BITS+2)));
1204
tmp0 <<= CONST_BITS;
1205
1206
z1 = (INT32) wsptr[2];
1207
z2 = (INT32) wsptr[4];
1208
z3 = (INT32) wsptr[6];
1209
1210
tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1211
tmp1 = tmp0 + tmp3;
1212
tmp2 = tmp0 - tmp3 - tmp3;
1213
1214
tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1215
tmp11 = tmp2 + tmp0;
1216
tmp14 = tmp2 - tmp0 - tmp0;
1217
1218
tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1219
tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1220
tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1221
1222
tmp10 = tmp1 + tmp0 - tmp3;
1223
tmp12 = tmp1 - tmp0 + tmp2;
1224
tmp13 = tmp1 - tmp2 + tmp3;
1225
1226
/* Odd part */
1227
1228
z1 = (INT32) wsptr[1];
1229
z2 = (INT32) wsptr[3];
1230
z3 = (INT32) wsptr[5];
1231
z4 = (INT32) wsptr[7];
1232
1233
z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1234
1235
tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1236
tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1237
tmp0 = tmp2 + tmp3 - z2;
1238
tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1239
tmp2 += z2 - tmp1;
1240
tmp3 += z2 + tmp1;
1241
tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1242
1243
/* Final output stage */
1244
1245
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1246
CONST_BITS+PASS1_BITS+3)
1247
& RANGE_MASK];
1248
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1249
CONST_BITS+PASS1_BITS+3)
1250
& RANGE_MASK];
1251
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1252
CONST_BITS+PASS1_BITS+3)
1253
& RANGE_MASK];
1254
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1255
CONST_BITS+PASS1_BITS+3)
1256
& RANGE_MASK];
1257
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1258
CONST_BITS+PASS1_BITS+3)
1259
& RANGE_MASK];
1260
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1261
CONST_BITS+PASS1_BITS+3)
1262
& RANGE_MASK];
1263
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1264
CONST_BITS+PASS1_BITS+3)
1265
& RANGE_MASK];
1266
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1267
CONST_BITS+PASS1_BITS+3)
1268
& RANGE_MASK];
1269
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1270
CONST_BITS+PASS1_BITS+3)
1271
& RANGE_MASK];
1272
1273
wsptr += 8; /* advance pointer to next row */
1274
}
1275
}
1276
1277
1278
/*
1279
* Perform dequantization and inverse DCT on one block of coefficients,
1280
* producing a 10x10 output block.
1281
*
1282
* Optimized algorithm with 12 multiplications in the 1-D kernel.
1283
* cK represents sqrt(2) * cos(K*pi/20).
1284
*/
1285
1286
GLOBAL(void)
1287
jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1288
JCOEFPTR coef_block,
1289
JSAMPARRAY output_buf, JDIMENSION output_col)
1290
{
1291
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1292
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1293
INT32 z1, z2, z3, z4, z5;
1294
JCOEFPTR inptr;
1295
ISLOW_MULT_TYPE * quantptr;
1296
int * wsptr;
1297
JSAMPROW outptr;
1298
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1299
int ctr;
1300
int workspace[8*10]; /* buffers data between passes */
1301
SHIFT_TEMPS
1302
1303
/* Pass 1: process columns from input, store into work array. */
1304
1305
inptr = coef_block;
1306
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1307
wsptr = workspace;
1308
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1309
/* Even part */
1310
1311
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1312
z3 <<= CONST_BITS;
1313
/* Add fudge factor here for final descale. */
1314
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1315
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1316
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1317
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1318
tmp10 = z3 + z1;
1319
tmp11 = z3 - z2;
1320
1321
tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1322
CONST_BITS-PASS1_BITS);
1323
1324
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1325
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1326
1327
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1328
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1329
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1330
1331
tmp20 = tmp10 + tmp12;
1332
tmp24 = tmp10 - tmp12;
1333
tmp21 = tmp11 + tmp13;
1334
tmp23 = tmp11 - tmp13;
1335
1336
/* Odd part */
1337
1338
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1339
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1340
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1341
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1342
1343
tmp11 = z2 + z4;
1344
tmp13 = z2 - z4;
1345
1346
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1347
z5 = z3 << CONST_BITS;
1348
1349
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1350
z4 = z5 + tmp12;
1351
1352
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1353
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1354
1355
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1356
z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1357
1358
tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1359
1360
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1361
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1362
1363
/* Final output stage */
1364
1365
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1366
wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1367
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1368
wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1369
wsptr[8*2] = (int) (tmp22 + tmp12);
1370
wsptr[8*7] = (int) (tmp22 - tmp12);
1371
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1372
wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1373
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1374
wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1375
}
1376
1377
/* Pass 2: process 10 rows from work array, store into output array. */
1378
1379
wsptr = workspace;
1380
for (ctr = 0; ctr < 10; ctr++) {
1381
outptr = output_buf[ctr] + output_col;
1382
1383
/* Even part */
1384
1385
/* Add range center and fudge factor for final descale and range-limit. */
1386
z3 = (INT32) wsptr[0] +
1387
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1388
(ONE << (PASS1_BITS+2)));
1389
z3 <<= CONST_BITS;
1390
z4 = (INT32) wsptr[4];
1391
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1392
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1393
tmp10 = z3 + z1;
1394
tmp11 = z3 - z2;
1395
1396
tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1397
1398
z2 = (INT32) wsptr[2];
1399
z3 = (INT32) wsptr[6];
1400
1401
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1402
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1403
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1404
1405
tmp20 = tmp10 + tmp12;
1406
tmp24 = tmp10 - tmp12;
1407
tmp21 = tmp11 + tmp13;
1408
tmp23 = tmp11 - tmp13;
1409
1410
/* Odd part */
1411
1412
z1 = (INT32) wsptr[1];
1413
z2 = (INT32) wsptr[3];
1414
z3 = (INT32) wsptr[5];
1415
z3 <<= CONST_BITS;
1416
z4 = (INT32) wsptr[7];
1417
1418
tmp11 = z2 + z4;
1419
tmp13 = z2 - z4;
1420
1421
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1422
1423
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1424
z4 = z3 + tmp12;
1425
1426
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1427
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1428
1429
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1430
z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1431
1432
tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1433
1434
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1435
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1436
1437
/* Final output stage */
1438
1439
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1440
CONST_BITS+PASS1_BITS+3)
1441
& RANGE_MASK];
1442
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1443
CONST_BITS+PASS1_BITS+3)
1444
& RANGE_MASK];
1445
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1446
CONST_BITS+PASS1_BITS+3)
1447
& RANGE_MASK];
1448
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1449
CONST_BITS+PASS1_BITS+3)
1450
& RANGE_MASK];
1451
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1452
CONST_BITS+PASS1_BITS+3)
1453
& RANGE_MASK];
1454
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1455
CONST_BITS+PASS1_BITS+3)
1456
& RANGE_MASK];
1457
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1458
CONST_BITS+PASS1_BITS+3)
1459
& RANGE_MASK];
1460
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1461
CONST_BITS+PASS1_BITS+3)
1462
& RANGE_MASK];
1463
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1464
CONST_BITS+PASS1_BITS+3)
1465
& RANGE_MASK];
1466
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1467
CONST_BITS+PASS1_BITS+3)
1468
& RANGE_MASK];
1469
1470
wsptr += 8; /* advance pointer to next row */
1471
}
1472
}
1473
1474
1475
/*
1476
* Perform dequantization and inverse DCT on one block of coefficients,
1477
* producing an 11x11 output block.
1478
*
1479
* Optimized algorithm with 24 multiplications in the 1-D kernel.
1480
* cK represents sqrt(2) * cos(K*pi/22).
1481
*/
1482
1483
GLOBAL(void)
1484
jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1485
JCOEFPTR coef_block,
1486
JSAMPARRAY output_buf, JDIMENSION output_col)
1487
{
1488
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1489
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1490
INT32 z1, z2, z3, z4;
1491
JCOEFPTR inptr;
1492
ISLOW_MULT_TYPE * quantptr;
1493
int * wsptr;
1494
JSAMPROW outptr;
1495
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1496
int ctr;
1497
int workspace[8*11]; /* buffers data between passes */
1498
SHIFT_TEMPS
1499
1500
/* Pass 1: process columns from input, store into work array. */
1501
1502
inptr = coef_block;
1503
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1504
wsptr = workspace;
1505
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1506
/* Even part */
1507
1508
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1509
tmp10 <<= CONST_BITS;
1510
/* Add fudge factor here for final descale. */
1511
tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1512
1513
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1514
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1515
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1516
1517
tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1518
tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1519
z4 = z1 + z3;
1520
tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1521
z4 -= z2;
1522
tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1523
tmp21 = tmp20 + tmp23 + tmp25 -
1524
MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1525
tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1526
tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1527
tmp24 += tmp25;
1528
tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1529
tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1530
MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1531
tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1532
1533
/* Odd part */
1534
1535
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1536
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1537
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1538
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1539
1540
tmp11 = z1 + z2;
1541
tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1542
tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1543
tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1544
tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1545
tmp10 = tmp11 + tmp12 + tmp13 -
1546
MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1547
z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1548
tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1549
tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1550
z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1551
tmp11 += z1;
1552
tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1553
tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1554
MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1555
MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1556
1557
/* Final output stage */
1558
1559
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1560
wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1561
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1562
wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1563
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1564
wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1565
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1566
wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1567
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1568
wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1569
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1570
}
1571
1572
/* Pass 2: process 11 rows from work array, store into output array. */
1573
1574
wsptr = workspace;
1575
for (ctr = 0; ctr < 11; ctr++) {
1576
outptr = output_buf[ctr] + output_col;
1577
1578
/* Even part */
1579
1580
/* Add range center and fudge factor for final descale and range-limit. */
1581
tmp10 = (INT32) wsptr[0] +
1582
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1583
(ONE << (PASS1_BITS+2)));
1584
tmp10 <<= CONST_BITS;
1585
1586
z1 = (INT32) wsptr[2];
1587
z2 = (INT32) wsptr[4];
1588
z3 = (INT32) wsptr[6];
1589
1590
tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1591
tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1592
z4 = z1 + z3;
1593
tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1594
z4 -= z2;
1595
tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1596
tmp21 = tmp20 + tmp23 + tmp25 -
1597
MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1598
tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1599
tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1600
tmp24 += tmp25;
1601
tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1602
tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1603
MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1604
tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1605
1606
/* Odd part */
1607
1608
z1 = (INT32) wsptr[1];
1609
z2 = (INT32) wsptr[3];
1610
z3 = (INT32) wsptr[5];
1611
z4 = (INT32) wsptr[7];
1612
1613
tmp11 = z1 + z2;
1614
tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1615
tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1616
tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1617
tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1618
tmp10 = tmp11 + tmp12 + tmp13 -
1619
MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1620
z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1621
tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1622
tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1623
z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1624
tmp11 += z1;
1625
tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1626
tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1627
MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1628
MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1629
1630
/* Final output stage */
1631
1632
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1633
CONST_BITS+PASS1_BITS+3)
1634
& RANGE_MASK];
1635
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1636
CONST_BITS+PASS1_BITS+3)
1637
& RANGE_MASK];
1638
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1639
CONST_BITS+PASS1_BITS+3)
1640
& RANGE_MASK];
1641
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1642
CONST_BITS+PASS1_BITS+3)
1643
& RANGE_MASK];
1644
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1645
CONST_BITS+PASS1_BITS+3)
1646
& RANGE_MASK];
1647
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1648
CONST_BITS+PASS1_BITS+3)
1649
& RANGE_MASK];
1650
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1651
CONST_BITS+PASS1_BITS+3)
1652
& RANGE_MASK];
1653
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1654
CONST_BITS+PASS1_BITS+3)
1655
& RANGE_MASK];
1656
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1657
CONST_BITS+PASS1_BITS+3)
1658
& RANGE_MASK];
1659
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1660
CONST_BITS+PASS1_BITS+3)
1661
& RANGE_MASK];
1662
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1663
CONST_BITS+PASS1_BITS+3)
1664
& RANGE_MASK];
1665
1666
wsptr += 8; /* advance pointer to next row */
1667
}
1668
}
1669
1670
1671
/*
1672
* Perform dequantization and inverse DCT on one block of coefficients,
1673
* producing a 12x12 output block.
1674
*
1675
* Optimized algorithm with 15 multiplications in the 1-D kernel.
1676
* cK represents sqrt(2) * cos(K*pi/24).
1677
*/
1678
1679
GLOBAL(void)
1680
jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1681
JCOEFPTR coef_block,
1682
JSAMPARRAY output_buf, JDIMENSION output_col)
1683
{
1684
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1685
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1686
INT32 z1, z2, z3, z4;
1687
JCOEFPTR inptr;
1688
ISLOW_MULT_TYPE * quantptr;
1689
int * wsptr;
1690
JSAMPROW outptr;
1691
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1692
int ctr;
1693
int workspace[8*12]; /* buffers data between passes */
1694
SHIFT_TEMPS
1695
1696
/* Pass 1: process columns from input, store into work array. */
1697
1698
inptr = coef_block;
1699
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1700
wsptr = workspace;
1701
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1702
/* Even part */
1703
1704
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1705
z3 <<= CONST_BITS;
1706
/* Add fudge factor here for final descale. */
1707
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1708
1709
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1710
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1711
1712
tmp10 = z3 + z4;
1713
tmp11 = z3 - z4;
1714
1715
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1716
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1717
z1 <<= CONST_BITS;
1718
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1719
z2 <<= CONST_BITS;
1720
1721
tmp12 = z1 - z2;
1722
1723
tmp21 = z3 + tmp12;
1724
tmp24 = z3 - tmp12;
1725
1726
tmp12 = z4 + z2;
1727
1728
tmp20 = tmp10 + tmp12;
1729
tmp25 = tmp10 - tmp12;
1730
1731
tmp12 = z4 - z1 - z2;
1732
1733
tmp22 = tmp11 + tmp12;
1734
tmp23 = tmp11 - tmp12;
1735
1736
/* Odd part */
1737
1738
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1739
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1740
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1741
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1742
1743
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1744
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1745
1746
tmp10 = z1 + z3;
1747
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1748
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1749
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1750
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1751
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1752
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1753
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1754
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1755
1756
z1 -= z4;
1757
z2 -= z3;
1758
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1759
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1760
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1761
1762
/* Final output stage */
1763
1764
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1765
wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1766
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1767
wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1768
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1769
wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1770
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1771
wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1772
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1773
wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1774
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1775
wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1776
}
1777
1778
/* Pass 2: process 12 rows from work array, store into output array. */
1779
1780
wsptr = workspace;
1781
for (ctr = 0; ctr < 12; ctr++) {
1782
outptr = output_buf[ctr] + output_col;
1783
1784
/* Even part */
1785
1786
/* Add range center and fudge factor for final descale and range-limit. */
1787
z3 = (INT32) wsptr[0] +
1788
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1789
(ONE << (PASS1_BITS+2)));
1790
z3 <<= CONST_BITS;
1791
1792
z4 = (INT32) wsptr[4];
1793
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1794
1795
tmp10 = z3 + z4;
1796
tmp11 = z3 - z4;
1797
1798
z1 = (INT32) wsptr[2];
1799
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1800
z1 <<= CONST_BITS;
1801
z2 = (INT32) wsptr[6];
1802
z2 <<= CONST_BITS;
1803
1804
tmp12 = z1 - z2;
1805
1806
tmp21 = z3 + tmp12;
1807
tmp24 = z3 - tmp12;
1808
1809
tmp12 = z4 + z2;
1810
1811
tmp20 = tmp10 + tmp12;
1812
tmp25 = tmp10 - tmp12;
1813
1814
tmp12 = z4 - z1 - z2;
1815
1816
tmp22 = tmp11 + tmp12;
1817
tmp23 = tmp11 - tmp12;
1818
1819
/* Odd part */
1820
1821
z1 = (INT32) wsptr[1];
1822
z2 = (INT32) wsptr[3];
1823
z3 = (INT32) wsptr[5];
1824
z4 = (INT32) wsptr[7];
1825
1826
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1827
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1828
1829
tmp10 = z1 + z3;
1830
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1831
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1832
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1833
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1834
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1835
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1836
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1837
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1838
1839
z1 -= z4;
1840
z2 -= z3;
1841
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1842
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1843
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1844
1845
/* Final output stage */
1846
1847
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1848
CONST_BITS+PASS1_BITS+3)
1849
& RANGE_MASK];
1850
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1851
CONST_BITS+PASS1_BITS+3)
1852
& RANGE_MASK];
1853
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1854
CONST_BITS+PASS1_BITS+3)
1855
& RANGE_MASK];
1856
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1857
CONST_BITS+PASS1_BITS+3)
1858
& RANGE_MASK];
1859
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1860
CONST_BITS+PASS1_BITS+3)
1861
& RANGE_MASK];
1862
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1863
CONST_BITS+PASS1_BITS+3)
1864
& RANGE_MASK];
1865
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1866
CONST_BITS+PASS1_BITS+3)
1867
& RANGE_MASK];
1868
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1869
CONST_BITS+PASS1_BITS+3)
1870
& RANGE_MASK];
1871
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1872
CONST_BITS+PASS1_BITS+3)
1873
& RANGE_MASK];
1874
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1875
CONST_BITS+PASS1_BITS+3)
1876
& RANGE_MASK];
1877
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1878
CONST_BITS+PASS1_BITS+3)
1879
& RANGE_MASK];
1880
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1881
CONST_BITS+PASS1_BITS+3)
1882
& RANGE_MASK];
1883
1884
wsptr += 8; /* advance pointer to next row */
1885
}
1886
}
1887
1888
1889
/*
1890
* Perform dequantization and inverse DCT on one block of coefficients,
1891
* producing a 13x13 output block.
1892
*
1893
* Optimized algorithm with 29 multiplications in the 1-D kernel.
1894
* cK represents sqrt(2) * cos(K*pi/26).
1895
*/
1896
1897
GLOBAL(void)
1898
jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1899
JCOEFPTR coef_block,
1900
JSAMPARRAY output_buf, JDIMENSION output_col)
1901
{
1902
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1903
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1904
INT32 z1, z2, z3, z4;
1905
JCOEFPTR inptr;
1906
ISLOW_MULT_TYPE * quantptr;
1907
int * wsptr;
1908
JSAMPROW outptr;
1909
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1910
int ctr;
1911
int workspace[8*13]; /* buffers data between passes */
1912
SHIFT_TEMPS
1913
1914
/* Pass 1: process columns from input, store into work array. */
1915
1916
inptr = coef_block;
1917
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1918
wsptr = workspace;
1919
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1920
/* Even part */
1921
1922
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1923
z1 <<= CONST_BITS;
1924
/* Add fudge factor here for final descale. */
1925
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1926
1927
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1928
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1929
z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1930
1931
tmp10 = z3 + z4;
1932
tmp11 = z3 - z4;
1933
1934
tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1935
tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1936
1937
tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1938
tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1939
1940
tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1941
tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1942
1943
tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1944
tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1945
1946
tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1947
tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1948
1949
tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1950
tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1951
1952
tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1953
1954
/* Odd part */
1955
1956
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1957
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1958
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1959
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1960
1961
tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1962
tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1963
tmp15 = z1 + z4;
1964
tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1965
tmp10 = tmp11 + tmp12 + tmp13 -
1966
MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1967
tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
1968
tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1969
tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1970
tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
1971
tmp11 += tmp14;
1972
tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1973
tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
1974
tmp12 += tmp14;
1975
tmp13 += tmp14;
1976
tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1977
tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1978
MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1979
z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1980
tmp14 += z1;
1981
tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1982
MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1983
1984
/* Final output stage */
1985
1986
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1987
wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1988
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1989
wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1990
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1991
wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1992
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1993
wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1994
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1995
wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1996
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1997
wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1998
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
1999
}
2000
2001
/* Pass 2: process 13 rows from work array, store into output array. */
2002
2003
wsptr = workspace;
2004
for (ctr = 0; ctr < 13; ctr++) {
2005
outptr = output_buf[ctr] + output_col;
2006
2007
/* Even part */
2008
2009
/* Add range center and fudge factor for final descale and range-limit. */
2010
z1 = (INT32) wsptr[0] +
2011
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2012
(ONE << (PASS1_BITS+2)));
2013
z1 <<= CONST_BITS;
2014
2015
z2 = (INT32) wsptr[2];
2016
z3 = (INT32) wsptr[4];
2017
z4 = (INT32) wsptr[6];
2018
2019
tmp10 = z3 + z4;
2020
tmp11 = z3 - z4;
2021
2022
tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2023
tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2024
2025
tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2026
tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2027
2028
tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2029
tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2030
2031
tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2032
tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2033
2034
tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2035
tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2036
2037
tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2038
tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2039
2040
tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2041
2042
/* Odd part */
2043
2044
z1 = (INT32) wsptr[1];
2045
z2 = (INT32) wsptr[3];
2046
z3 = (INT32) wsptr[5];
2047
z4 = (INT32) wsptr[7];
2048
2049
tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2050
tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2051
tmp15 = z1 + z4;
2052
tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2053
tmp10 = tmp11 + tmp12 + tmp13 -
2054
MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2055
tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2056
tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2057
tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2058
tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2059
tmp11 += tmp14;
2060
tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2061
tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2062
tmp12 += tmp14;
2063
tmp13 += tmp14;
2064
tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2065
tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2066
MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2067
z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2068
tmp14 += z1;
2069
tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2070
MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2071
2072
/* Final output stage */
2073
2074
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2075
CONST_BITS+PASS1_BITS+3)
2076
& RANGE_MASK];
2077
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2078
CONST_BITS+PASS1_BITS+3)
2079
& RANGE_MASK];
2080
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2081
CONST_BITS+PASS1_BITS+3)
2082
& RANGE_MASK];
2083
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2084
CONST_BITS+PASS1_BITS+3)
2085
& RANGE_MASK];
2086
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2087
CONST_BITS+PASS1_BITS+3)
2088
& RANGE_MASK];
2089
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2090
CONST_BITS+PASS1_BITS+3)
2091
& RANGE_MASK];
2092
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2093
CONST_BITS+PASS1_BITS+3)
2094
& RANGE_MASK];
2095
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2096
CONST_BITS+PASS1_BITS+3)
2097
& RANGE_MASK];
2098
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2099
CONST_BITS+PASS1_BITS+3)
2100
& RANGE_MASK];
2101
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2102
CONST_BITS+PASS1_BITS+3)
2103
& RANGE_MASK];
2104
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2105
CONST_BITS+PASS1_BITS+3)
2106
& RANGE_MASK];
2107
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2108
CONST_BITS+PASS1_BITS+3)
2109
& RANGE_MASK];
2110
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
2111
CONST_BITS+PASS1_BITS+3)
2112
& RANGE_MASK];
2113
2114
wsptr += 8; /* advance pointer to next row */
2115
}
2116
}
2117
2118
2119
/*
2120
* Perform dequantization and inverse DCT on one block of coefficients,
2121
* producing a 14x14 output block.
2122
*
2123
* Optimized algorithm with 20 multiplications in the 1-D kernel.
2124
* cK represents sqrt(2) * cos(K*pi/28).
2125
*/
2126
2127
GLOBAL(void)
2128
jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2129
JCOEFPTR coef_block,
2130
JSAMPARRAY output_buf, JDIMENSION output_col)
2131
{
2132
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2133
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2134
INT32 z1, z2, z3, z4;
2135
JCOEFPTR inptr;
2136
ISLOW_MULT_TYPE * quantptr;
2137
int * wsptr;
2138
JSAMPROW outptr;
2139
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2140
int ctr;
2141
int workspace[8*14]; /* buffers data between passes */
2142
SHIFT_TEMPS
2143
2144
/* Pass 1: process columns from input, store into work array. */
2145
2146
inptr = coef_block;
2147
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2148
wsptr = workspace;
2149
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2150
/* Even part */
2151
2152
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2153
z1 <<= CONST_BITS;
2154
/* Add fudge factor here for final descale. */
2155
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2156
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2157
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2158
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2159
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2160
2161
tmp10 = z1 + z2;
2162
tmp11 = z1 + z3;
2163
tmp12 = z1 - z4;
2164
2165
tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2166
CONST_BITS-PASS1_BITS);
2167
2168
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2169
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2170
2171
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2172
2173
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2174
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2175
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2176
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2177
2178
tmp20 = tmp10 + tmp13;
2179
tmp26 = tmp10 - tmp13;
2180
tmp21 = tmp11 + tmp14;
2181
tmp25 = tmp11 - tmp14;
2182
tmp22 = tmp12 + tmp15;
2183
tmp24 = tmp12 - tmp15;
2184
2185
/* Odd part */
2186
2187
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2188
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2189
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2190
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2191
tmp13 = z4 << CONST_BITS;
2192
2193
tmp14 = z1 + z3;
2194
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2195
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2196
tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2197
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2198
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2199
z1 -= z2;
2200
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
2201
tmp16 += tmp15;
2202
z1 += z4;
2203
z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2204
tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2205
tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2206
z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2207
tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2208
tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2209
2210
tmp13 = (z1 - z3) << PASS1_BITS;
2211
2212
/* Final output stage */
2213
2214
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2215
wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2216
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2217
wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2218
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2219
wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2220
wsptr[8*3] = (int) (tmp23 + tmp13);
2221
wsptr[8*10] = (int) (tmp23 - tmp13);
2222
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2223
wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2224
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2225
wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2226
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2227
wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2228
}
2229
2230
/* Pass 2: process 14 rows from work array, store into output array. */
2231
2232
wsptr = workspace;
2233
for (ctr = 0; ctr < 14; ctr++) {
2234
outptr = output_buf[ctr] + output_col;
2235
2236
/* Even part */
2237
2238
/* Add range center and fudge factor for final descale and range-limit. */
2239
z1 = (INT32) wsptr[0] +
2240
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2241
(ONE << (PASS1_BITS+2)));
2242
z1 <<= CONST_BITS;
2243
z4 = (INT32) wsptr[4];
2244
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2245
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2246
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2247
2248
tmp10 = z1 + z2;
2249
tmp11 = z1 + z3;
2250
tmp12 = z1 - z4;
2251
2252
tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2253
2254
z1 = (INT32) wsptr[2];
2255
z2 = (INT32) wsptr[6];
2256
2257
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2258
2259
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2260
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2261
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2262
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2263
2264
tmp20 = tmp10 + tmp13;
2265
tmp26 = tmp10 - tmp13;
2266
tmp21 = tmp11 + tmp14;
2267
tmp25 = tmp11 - tmp14;
2268
tmp22 = tmp12 + tmp15;
2269
tmp24 = tmp12 - tmp15;
2270
2271
/* Odd part */
2272
2273
z1 = (INT32) wsptr[1];
2274
z2 = (INT32) wsptr[3];
2275
z3 = (INT32) wsptr[5];
2276
z4 = (INT32) wsptr[7];
2277
z4 <<= CONST_BITS;
2278
2279
tmp14 = z1 + z3;
2280
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2281
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2282
tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2283
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2284
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2285
z1 -= z2;
2286
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2287
tmp16 += tmp15;
2288
tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2289
tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2290
tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2291
tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2292
tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2293
tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2294
2295
tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2296
2297
/* Final output stage */
2298
2299
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2300
CONST_BITS+PASS1_BITS+3)
2301
& RANGE_MASK];
2302
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2303
CONST_BITS+PASS1_BITS+3)
2304
& RANGE_MASK];
2305
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2306
CONST_BITS+PASS1_BITS+3)
2307
& RANGE_MASK];
2308
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2309
CONST_BITS+PASS1_BITS+3)
2310
& RANGE_MASK];
2311
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2312
CONST_BITS+PASS1_BITS+3)
2313
& RANGE_MASK];
2314
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2315
CONST_BITS+PASS1_BITS+3)
2316
& RANGE_MASK];
2317
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2318
CONST_BITS+PASS1_BITS+3)
2319
& RANGE_MASK];
2320
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2321
CONST_BITS+PASS1_BITS+3)
2322
& RANGE_MASK];
2323
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2324
CONST_BITS+PASS1_BITS+3)
2325
& RANGE_MASK];
2326
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2327
CONST_BITS+PASS1_BITS+3)
2328
& RANGE_MASK];
2329
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2330
CONST_BITS+PASS1_BITS+3)
2331
& RANGE_MASK];
2332
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2333
CONST_BITS+PASS1_BITS+3)
2334
& RANGE_MASK];
2335
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2336
CONST_BITS+PASS1_BITS+3)
2337
& RANGE_MASK];
2338
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2339
CONST_BITS+PASS1_BITS+3)
2340
& RANGE_MASK];
2341
2342
wsptr += 8; /* advance pointer to next row */
2343
}
2344
}
2345
2346
2347
/*
2348
* Perform dequantization and inverse DCT on one block of coefficients,
2349
* producing a 15x15 output block.
2350
*
2351
* Optimized algorithm with 22 multiplications in the 1-D kernel.
2352
* cK represents sqrt(2) * cos(K*pi/30).
2353
*/
2354
2355
GLOBAL(void)
2356
jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2357
JCOEFPTR coef_block,
2358
JSAMPARRAY output_buf, JDIMENSION output_col)
2359
{
2360
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2361
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2362
INT32 z1, z2, z3, z4;
2363
JCOEFPTR inptr;
2364
ISLOW_MULT_TYPE * quantptr;
2365
int * wsptr;
2366
JSAMPROW outptr;
2367
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2368
int ctr;
2369
int workspace[8*15]; /* buffers data between passes */
2370
SHIFT_TEMPS
2371
2372
/* Pass 1: process columns from input, store into work array. */
2373
2374
inptr = coef_block;
2375
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2376
wsptr = workspace;
2377
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2378
/* Even part */
2379
2380
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2381
z1 <<= CONST_BITS;
2382
/* Add fudge factor here for final descale. */
2383
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2384
2385
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2386
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2387
z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2388
2389
tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2390
tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2391
2392
tmp12 = z1 - tmp10;
2393
tmp13 = z1 + tmp11;
2394
z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2395
2396
z4 = z2 - z3;
2397
z3 += z2;
2398
tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2399
tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2400
z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2401
2402
tmp20 = tmp13 + tmp10 + tmp11;
2403
tmp23 = tmp12 - tmp10 + tmp11 + z2;
2404
2405
tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2406
tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2407
2408
tmp25 = tmp13 - tmp10 - tmp11;
2409
tmp26 = tmp12 + tmp10 - tmp11 - z2;
2410
2411
tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2412
tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2413
2414
tmp21 = tmp12 + tmp10 + tmp11;
2415
tmp24 = tmp13 - tmp10 + tmp11;
2416
tmp11 += tmp11;
2417
tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2418
tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2419
2420
/* Odd part */
2421
2422
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2423
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2424
z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2425
z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2426
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2427
2428
tmp13 = z2 - z4;
2429
tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2430
tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2431
tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2432
2433
tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2434
tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2435
z2 = z1 - z4;
2436
tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2437
2438
tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2439
tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2440
tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2441
z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2442
tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2443
tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2444
2445
/* Final output stage */
2446
2447
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2448
wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2449
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2450
wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2451
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2452
wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2453
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2454
wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2455
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2456
wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2457
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2458
wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2459
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2460
wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2461
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2462
}
2463
2464
/* Pass 2: process 15 rows from work array, store into output array. */
2465
2466
wsptr = workspace;
2467
for (ctr = 0; ctr < 15; ctr++) {
2468
outptr = output_buf[ctr] + output_col;
2469
2470
/* Even part */
2471
2472
/* Add range center and fudge factor for final descale and range-limit. */
2473
z1 = (INT32) wsptr[0] +
2474
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2475
(ONE << (PASS1_BITS+2)));
2476
z1 <<= CONST_BITS;
2477
2478
z2 = (INT32) wsptr[2];
2479
z3 = (INT32) wsptr[4];
2480
z4 = (INT32) wsptr[6];
2481
2482
tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2483
tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2484
2485
tmp12 = z1 - tmp10;
2486
tmp13 = z1 + tmp11;
2487
z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2488
2489
z4 = z2 - z3;
2490
z3 += z2;
2491
tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2492
tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2493
z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2494
2495
tmp20 = tmp13 + tmp10 + tmp11;
2496
tmp23 = tmp12 - tmp10 + tmp11 + z2;
2497
2498
tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2499
tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2500
2501
tmp25 = tmp13 - tmp10 - tmp11;
2502
tmp26 = tmp12 + tmp10 - tmp11 - z2;
2503
2504
tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2505
tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2506
2507
tmp21 = tmp12 + tmp10 + tmp11;
2508
tmp24 = tmp13 - tmp10 + tmp11;
2509
tmp11 += tmp11;
2510
tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2511
tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2512
2513
/* Odd part */
2514
2515
z1 = (INT32) wsptr[1];
2516
z2 = (INT32) wsptr[3];
2517
z4 = (INT32) wsptr[5];
2518
z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2519
z4 = (INT32) wsptr[7];
2520
2521
tmp13 = z2 - z4;
2522
tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2523
tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2524
tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2525
2526
tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2527
tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2528
z2 = z1 - z4;
2529
tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2530
2531
tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2532
tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2533
tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2534
z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2535
tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2536
tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2537
2538
/* Final output stage */
2539
2540
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2541
CONST_BITS+PASS1_BITS+3)
2542
& RANGE_MASK];
2543
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2544
CONST_BITS+PASS1_BITS+3)
2545
& RANGE_MASK];
2546
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2547
CONST_BITS+PASS1_BITS+3)
2548
& RANGE_MASK];
2549
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2550
CONST_BITS+PASS1_BITS+3)
2551
& RANGE_MASK];
2552
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2553
CONST_BITS+PASS1_BITS+3)
2554
& RANGE_MASK];
2555
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2556
CONST_BITS+PASS1_BITS+3)
2557
& RANGE_MASK];
2558
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2559
CONST_BITS+PASS1_BITS+3)
2560
& RANGE_MASK];
2561
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2562
CONST_BITS+PASS1_BITS+3)
2563
& RANGE_MASK];
2564
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2565
CONST_BITS+PASS1_BITS+3)
2566
& RANGE_MASK];
2567
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2568
CONST_BITS+PASS1_BITS+3)
2569
& RANGE_MASK];
2570
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2571
CONST_BITS+PASS1_BITS+3)
2572
& RANGE_MASK];
2573
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2574
CONST_BITS+PASS1_BITS+3)
2575
& RANGE_MASK];
2576
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2577
CONST_BITS+PASS1_BITS+3)
2578
& RANGE_MASK];
2579
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2580
CONST_BITS+PASS1_BITS+3)
2581
& RANGE_MASK];
2582
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2583
CONST_BITS+PASS1_BITS+3)
2584
& RANGE_MASK];
2585
2586
wsptr += 8; /* advance pointer to next row */
2587
}
2588
}
2589
2590
2591
/*
2592
* Perform dequantization and inverse DCT on one block of coefficients,
2593
* producing a 16x16 output block.
2594
*
2595
* Optimized algorithm with 28 multiplications in the 1-D kernel.
2596
* cK represents sqrt(2) * cos(K*pi/32).
2597
*/
2598
2599
GLOBAL(void)
2600
jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2601
JCOEFPTR coef_block,
2602
JSAMPARRAY output_buf, JDIMENSION output_col)
2603
{
2604
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2605
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2606
INT32 z1, z2, z3, z4;
2607
JCOEFPTR inptr;
2608
ISLOW_MULT_TYPE * quantptr;
2609
int * wsptr;
2610
JSAMPROW outptr;
2611
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2612
int ctr;
2613
int workspace[8*16]; /* buffers data between passes */
2614
SHIFT_TEMPS
2615
2616
/* Pass 1: process columns from input, store into work array. */
2617
2618
inptr = coef_block;
2619
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2620
wsptr = workspace;
2621
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2622
/* Even part */
2623
2624
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2625
tmp0 <<= CONST_BITS;
2626
/* Add fudge factor here for final descale. */
2627
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
2628
2629
z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2630
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2631
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2632
2633
tmp10 = tmp0 + tmp1;
2634
tmp11 = tmp0 - tmp1;
2635
tmp12 = tmp0 + tmp2;
2636
tmp13 = tmp0 - tmp2;
2637
2638
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2639
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2640
z3 = z1 - z2;
2641
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2642
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2643
2644
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2645
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2646
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2647
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2648
2649
tmp20 = tmp10 + tmp0;
2650
tmp27 = tmp10 - tmp0;
2651
tmp21 = tmp12 + tmp1;
2652
tmp26 = tmp12 - tmp1;
2653
tmp22 = tmp13 + tmp2;
2654
tmp25 = tmp13 - tmp2;
2655
tmp23 = tmp11 + tmp3;
2656
tmp24 = tmp11 - tmp3;
2657
2658
/* Odd part */
2659
2660
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2661
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2662
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2663
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2664
2665
tmp11 = z1 + z3;
2666
2667
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2668
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2669
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2670
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2671
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2672
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2673
tmp0 = tmp1 + tmp2 + tmp3 -
2674
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2675
tmp13 = tmp10 + tmp11 + tmp12 -
2676
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2677
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2678
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2679
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2680
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2681
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2682
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2683
z2 += z4;
2684
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2685
tmp1 += z1;
2686
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2687
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2688
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2689
tmp12 += z2;
2690
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2691
tmp2 += z2;
2692
tmp3 += z2;
2693
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2694
tmp10 += z2;
2695
tmp11 += z2;
2696
2697
/* Final output stage */
2698
2699
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2700
wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2701
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2702
wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2703
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2704
wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2705
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2706
wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2707
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2708
wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2709
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2710
wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2711
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2712
wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2713
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2714
wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2715
}
2716
2717
/* Pass 2: process 16 rows from work array, store into output array. */
2718
2719
wsptr = workspace;
2720
for (ctr = 0; ctr < 16; ctr++) {
2721
outptr = output_buf[ctr] + output_col;
2722
2723
/* Even part */
2724
2725
/* Add range center and fudge factor for final descale and range-limit. */
2726
tmp0 = (INT32) wsptr[0] +
2727
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2728
(ONE << (PASS1_BITS+2)));
2729
tmp0 <<= CONST_BITS;
2730
2731
z1 = (INT32) wsptr[4];
2732
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2733
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2734
2735
tmp10 = tmp0 + tmp1;
2736
tmp11 = tmp0 - tmp1;
2737
tmp12 = tmp0 + tmp2;
2738
tmp13 = tmp0 - tmp2;
2739
2740
z1 = (INT32) wsptr[2];
2741
z2 = (INT32) wsptr[6];
2742
z3 = z1 - z2;
2743
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2744
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2745
2746
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2747
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2748
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2749
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2750
2751
tmp20 = tmp10 + tmp0;
2752
tmp27 = tmp10 - tmp0;
2753
tmp21 = tmp12 + tmp1;
2754
tmp26 = tmp12 - tmp1;
2755
tmp22 = tmp13 + tmp2;
2756
tmp25 = tmp13 - tmp2;
2757
tmp23 = tmp11 + tmp3;
2758
tmp24 = tmp11 - tmp3;
2759
2760
/* Odd part */
2761
2762
z1 = (INT32) wsptr[1];
2763
z2 = (INT32) wsptr[3];
2764
z3 = (INT32) wsptr[5];
2765
z4 = (INT32) wsptr[7];
2766
2767
tmp11 = z1 + z3;
2768
2769
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2770
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2771
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2772
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2773
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2774
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2775
tmp0 = tmp1 + tmp2 + tmp3 -
2776
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2777
tmp13 = tmp10 + tmp11 + tmp12 -
2778
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2779
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2780
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2781
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2782
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2783
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2784
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2785
z2 += z4;
2786
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2787
tmp1 += z1;
2788
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2789
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2790
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2791
tmp12 += z2;
2792
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2793
tmp2 += z2;
2794
tmp3 += z2;
2795
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2796
tmp10 += z2;
2797
tmp11 += z2;
2798
2799
/* Final output stage */
2800
2801
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2802
CONST_BITS+PASS1_BITS+3)
2803
& RANGE_MASK];
2804
outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2805
CONST_BITS+PASS1_BITS+3)
2806
& RANGE_MASK];
2807
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2808
CONST_BITS+PASS1_BITS+3)
2809
& RANGE_MASK];
2810
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2811
CONST_BITS+PASS1_BITS+3)
2812
& RANGE_MASK];
2813
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2814
CONST_BITS+PASS1_BITS+3)
2815
& RANGE_MASK];
2816
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2817
CONST_BITS+PASS1_BITS+3)
2818
& RANGE_MASK];
2819
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2820
CONST_BITS+PASS1_BITS+3)
2821
& RANGE_MASK];
2822
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2823
CONST_BITS+PASS1_BITS+3)
2824
& RANGE_MASK];
2825
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2826
CONST_BITS+PASS1_BITS+3)
2827
& RANGE_MASK];
2828
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2829
CONST_BITS+PASS1_BITS+3)
2830
& RANGE_MASK];
2831
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2832
CONST_BITS+PASS1_BITS+3)
2833
& RANGE_MASK];
2834
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2835
CONST_BITS+PASS1_BITS+3)
2836
& RANGE_MASK];
2837
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2838
CONST_BITS+PASS1_BITS+3)
2839
& RANGE_MASK];
2840
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2841
CONST_BITS+PASS1_BITS+3)
2842
& RANGE_MASK];
2843
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2844
CONST_BITS+PASS1_BITS+3)
2845
& RANGE_MASK];
2846
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2847
CONST_BITS+PASS1_BITS+3)
2848
& RANGE_MASK];
2849
2850
wsptr += 8; /* advance pointer to next row */
2851
}
2852
}
2853
2854
2855
/*
2856
* Perform dequantization and inverse DCT on one block of coefficients,
2857
* producing a 16x8 output block.
2858
*
2859
* 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2860
*/
2861
2862
GLOBAL(void)
2863
jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2864
JCOEFPTR coef_block,
2865
JSAMPARRAY output_buf, JDIMENSION output_col)
2866
{
2867
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2868
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2869
INT32 z1, z2, z3, z4;
2870
JCOEFPTR inptr;
2871
ISLOW_MULT_TYPE * quantptr;
2872
int * wsptr;
2873
JSAMPROW outptr;
2874
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2875
int ctr;
2876
int workspace[8*8]; /* buffers data between passes */
2877
SHIFT_TEMPS
2878
2879
/* Pass 1: process columns from input, store into work array.
2880
* Note results are scaled up by sqrt(8) compared to a true IDCT;
2881
* furthermore, we scale the results by 2**PASS1_BITS.
2882
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2883
*/
2884
2885
inptr = coef_block;
2886
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2887
wsptr = workspace;
2888
for (ctr = DCTSIZE; ctr > 0; ctr--) {
2889
/* Due to quantization, we will usually find that many of the input
2890
* coefficients are zero, especially the AC terms. We can exploit this
2891
* by short-circuiting the IDCT calculation for any column in which all
2892
* the AC terms are zero. In that case each output is equal to the
2893
* DC coefficient (with scale factor as needed).
2894
* With typical images and quantization tables, half or more of the
2895
* column DCT calculations can be simplified this way.
2896
*/
2897
2898
if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2899
inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2900
inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2901
inptr[DCTSIZE*7] == 0) {
2902
/* AC terms all zero */
2903
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
2904
2905
wsptr[DCTSIZE*0] = dcval;
2906
wsptr[DCTSIZE*1] = dcval;
2907
wsptr[DCTSIZE*2] = dcval;
2908
wsptr[DCTSIZE*3] = dcval;
2909
wsptr[DCTSIZE*4] = dcval;
2910
wsptr[DCTSIZE*5] = dcval;
2911
wsptr[DCTSIZE*6] = dcval;
2912
wsptr[DCTSIZE*7] = dcval;
2913
2914
inptr++; /* advance pointers to next column */
2915
quantptr++;
2916
wsptr++;
2917
continue;
2918
}
2919
2920
/* Even part: reverse the even part of the forward DCT.
2921
* The rotator is c(-6).
2922
*/
2923
2924
z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2925
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2926
z2 <<= CONST_BITS;
2927
z3 <<= CONST_BITS;
2928
/* Add fudge factor here for final descale. */
2929
z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2930
2931
tmp0 = z2 + z3;
2932
tmp1 = z2 - z3;
2933
2934
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2935
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2936
2937
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
2938
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
2939
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
2940
2941
tmp10 = tmp0 + tmp2;
2942
tmp13 = tmp0 - tmp2;
2943
tmp11 = tmp1 + tmp3;
2944
tmp12 = tmp1 - tmp3;
2945
2946
/* Odd part per figure 8; the matrix is unitary and hence its
2947
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2948
*/
2949
2950
tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2951
tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2952
tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2953
tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2954
2955
z2 = tmp0 + tmp2;
2956
z3 = tmp1 + tmp3;
2957
2958
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
2959
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
2960
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
2961
z2 += z1;
2962
z3 += z1;
2963
2964
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2965
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
2966
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
2967
tmp0 += z1 + z2;
2968
tmp3 += z1 + z3;
2969
2970
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2971
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
2972
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
2973
tmp1 += z1 + z3;
2974
tmp2 += z1 + z2;
2975
2976
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2977
2978
wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2979
wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2980
wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2981
wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2982
wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2983
wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2984
wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2985
wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2986
2987
inptr++; /* advance pointers to next column */
2988
quantptr++;
2989
wsptr++;
2990
}
2991
2992
/* Pass 2: process 8 rows from work array, store into output array.
2993
* 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2994
*/
2995
2996
wsptr = workspace;
2997
for (ctr = 0; ctr < 8; ctr++) {
2998
outptr = output_buf[ctr] + output_col;
2999
3000
/* Even part */
3001
3002
/* Add range center and fudge factor for final descale and range-limit. */
3003
tmp0 = (INT32) wsptr[0] +
3004
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3005
(ONE << (PASS1_BITS+2)));
3006
tmp0 <<= CONST_BITS;
3007
3008
z1 = (INT32) wsptr[4];
3009
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3010
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3011
3012
tmp10 = tmp0 + tmp1;
3013
tmp11 = tmp0 - tmp1;
3014
tmp12 = tmp0 + tmp2;
3015
tmp13 = tmp0 - tmp2;
3016
3017
z1 = (INT32) wsptr[2];
3018
z2 = (INT32) wsptr[6];
3019
z3 = z1 - z2;
3020
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3021
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3022
3023
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3024
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3025
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3026
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3027
3028
tmp20 = tmp10 + tmp0;
3029
tmp27 = tmp10 - tmp0;
3030
tmp21 = tmp12 + tmp1;
3031
tmp26 = tmp12 - tmp1;
3032
tmp22 = tmp13 + tmp2;
3033
tmp25 = tmp13 - tmp2;
3034
tmp23 = tmp11 + tmp3;
3035
tmp24 = tmp11 - tmp3;
3036
3037
/* Odd part */
3038
3039
z1 = (INT32) wsptr[1];
3040
z2 = (INT32) wsptr[3];
3041
z3 = (INT32) wsptr[5];
3042
z4 = (INT32) wsptr[7];
3043
3044
tmp11 = z1 + z3;
3045
3046
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
3047
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
3048
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
3049
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
3050
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
3051
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
3052
tmp0 = tmp1 + tmp2 + tmp3 -
3053
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
3054
tmp13 = tmp10 + tmp11 + tmp12 -
3055
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
3056
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
3057
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
3058
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
3059
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
3060
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
3061
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
3062
z2 += z4;
3063
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
3064
tmp1 += z1;
3065
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
3066
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
3067
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
3068
tmp12 += z2;
3069
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3070
tmp2 += z2;
3071
tmp3 += z2;
3072
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
3073
tmp10 += z2;
3074
tmp11 += z2;
3075
3076
/* Final output stage */
3077
3078
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3079
CONST_BITS+PASS1_BITS+3)
3080
& RANGE_MASK];
3081
outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3082
CONST_BITS+PASS1_BITS+3)
3083
& RANGE_MASK];
3084
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3085
CONST_BITS+PASS1_BITS+3)
3086
& RANGE_MASK];
3087
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3088
CONST_BITS+PASS1_BITS+3)
3089
& RANGE_MASK];
3090
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3091
CONST_BITS+PASS1_BITS+3)
3092
& RANGE_MASK];
3093
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3094
CONST_BITS+PASS1_BITS+3)
3095
& RANGE_MASK];
3096
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3097
CONST_BITS+PASS1_BITS+3)
3098
& RANGE_MASK];
3099
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3100
CONST_BITS+PASS1_BITS+3)
3101
& RANGE_MASK];
3102
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3103
CONST_BITS+PASS1_BITS+3)
3104
& RANGE_MASK];
3105
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3106
CONST_BITS+PASS1_BITS+3)
3107
& RANGE_MASK];
3108
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3109
CONST_BITS+PASS1_BITS+3)
3110
& RANGE_MASK];
3111
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3112
CONST_BITS+PASS1_BITS+3)
3113
& RANGE_MASK];
3114
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3115
CONST_BITS+PASS1_BITS+3)
3116
& RANGE_MASK];
3117
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3118
CONST_BITS+PASS1_BITS+3)
3119
& RANGE_MASK];
3120
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3121
CONST_BITS+PASS1_BITS+3)
3122
& RANGE_MASK];
3123
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3124
CONST_BITS+PASS1_BITS+3)
3125
& RANGE_MASK];
3126
3127
wsptr += 8; /* advance pointer to next row */
3128
}
3129
}
3130
3131
3132
/*
3133
* Perform dequantization and inverse DCT on one block of coefficients,
3134
* producing a 14x7 output block.
3135
*
3136
* 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3137
*/
3138
3139
GLOBAL(void)
3140
jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3141
JCOEFPTR coef_block,
3142
JSAMPARRAY output_buf, JDIMENSION output_col)
3143
{
3144
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3145
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3146
INT32 z1, z2, z3, z4;
3147
JCOEFPTR inptr;
3148
ISLOW_MULT_TYPE * quantptr;
3149
int * wsptr;
3150
JSAMPROW outptr;
3151
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3152
int ctr;
3153
int workspace[8*7]; /* buffers data between passes */
3154
SHIFT_TEMPS
3155
3156
/* Pass 1: process columns from input, store into work array.
3157
* 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3158
*/
3159
3160
inptr = coef_block;
3161
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3162
wsptr = workspace;
3163
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3164
/* Even part */
3165
3166
tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3167
tmp23 <<= CONST_BITS;
3168
/* Add fudge factor here for final descale. */
3169
tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3170
3171
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3172
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3173
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3174
3175
tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3176
tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3177
tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3178
tmp10 = z1 + z3;
3179
z2 -= tmp10;
3180
tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3181
tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
3182
tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
3183
tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
3184
3185
/* Odd part */
3186
3187
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3188
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3189
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3190
3191
tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3192
tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3193
tmp10 = tmp11 - tmp12;
3194
tmp11 += tmp12;
3195
tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3196
tmp11 += tmp12;
3197
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3198
tmp10 += z2;
3199
tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3200
3201
/* Final output stage */
3202
3203
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3204
wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3205
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3206
wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3207
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3208
wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3209
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3210
}
3211
3212
/* Pass 2: process 7 rows from work array, store into output array.
3213
* 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3214
*/
3215
3216
wsptr = workspace;
3217
for (ctr = 0; ctr < 7; ctr++) {
3218
outptr = output_buf[ctr] + output_col;
3219
3220
/* Even part */
3221
3222
/* Add range center and fudge factor for final descale and range-limit. */
3223
z1 = (INT32) wsptr[0] +
3224
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3225
(ONE << (PASS1_BITS+2)));
3226
z1 <<= CONST_BITS;
3227
z4 = (INT32) wsptr[4];
3228
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3229
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3230
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3231
3232
tmp10 = z1 + z2;
3233
tmp11 = z1 + z3;
3234
tmp12 = z1 - z4;
3235
3236
tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3237
3238
z1 = (INT32) wsptr[2];
3239
z2 = (INT32) wsptr[6];
3240
3241
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3242
3243
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3244
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3245
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3246
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
3247
3248
tmp20 = tmp10 + tmp13;
3249
tmp26 = tmp10 - tmp13;
3250
tmp21 = tmp11 + tmp14;
3251
tmp25 = tmp11 - tmp14;
3252
tmp22 = tmp12 + tmp15;
3253
tmp24 = tmp12 - tmp15;
3254
3255
/* Odd part */
3256
3257
z1 = (INT32) wsptr[1];
3258
z2 = (INT32) wsptr[3];
3259
z3 = (INT32) wsptr[5];
3260
z4 = (INT32) wsptr[7];
3261
z4 <<= CONST_BITS;
3262
3263
tmp14 = z1 + z3;
3264
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
3265
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
3266
tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3267
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
3268
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
3269
z1 -= z2;
3270
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
3271
tmp16 += tmp15;
3272
tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
3273
tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
3274
tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
3275
tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
3276
tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3277
tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
3278
3279
tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3280
3281
/* Final output stage */
3282
3283
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3284
CONST_BITS+PASS1_BITS+3)
3285
& RANGE_MASK];
3286
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3287
CONST_BITS+PASS1_BITS+3)
3288
& RANGE_MASK];
3289
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3290
CONST_BITS+PASS1_BITS+3)
3291
& RANGE_MASK];
3292
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3293
CONST_BITS+PASS1_BITS+3)
3294
& RANGE_MASK];
3295
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3296
CONST_BITS+PASS1_BITS+3)
3297
& RANGE_MASK];
3298
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3299
CONST_BITS+PASS1_BITS+3)
3300
& RANGE_MASK];
3301
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3302
CONST_BITS+PASS1_BITS+3)
3303
& RANGE_MASK];
3304
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3305
CONST_BITS+PASS1_BITS+3)
3306
& RANGE_MASK];
3307
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3308
CONST_BITS+PASS1_BITS+3)
3309
& RANGE_MASK];
3310
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3311
CONST_BITS+PASS1_BITS+3)
3312
& RANGE_MASK];
3313
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3314
CONST_BITS+PASS1_BITS+3)
3315
& RANGE_MASK];
3316
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3317
CONST_BITS+PASS1_BITS+3)
3318
& RANGE_MASK];
3319
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3320
CONST_BITS+PASS1_BITS+3)
3321
& RANGE_MASK];
3322
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3323
CONST_BITS+PASS1_BITS+3)
3324
& RANGE_MASK];
3325
3326
wsptr += 8; /* advance pointer to next row */
3327
}
3328
}
3329
3330
3331
/*
3332
* Perform dequantization and inverse DCT on one block of coefficients,
3333
* producing a 12x6 output block.
3334
*
3335
* 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3336
*/
3337
3338
GLOBAL(void)
3339
jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3340
JCOEFPTR coef_block,
3341
JSAMPARRAY output_buf, JDIMENSION output_col)
3342
{
3343
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3344
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3345
INT32 z1, z2, z3, z4;
3346
JCOEFPTR inptr;
3347
ISLOW_MULT_TYPE * quantptr;
3348
int * wsptr;
3349
JSAMPROW outptr;
3350
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3351
int ctr;
3352
int workspace[8*6]; /* buffers data between passes */
3353
SHIFT_TEMPS
3354
3355
/* Pass 1: process columns from input, store into work array.
3356
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3357
*/
3358
3359
inptr = coef_block;
3360
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3361
wsptr = workspace;
3362
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3363
/* Even part */
3364
3365
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3366
tmp10 <<= CONST_BITS;
3367
/* Add fudge factor here for final descale. */
3368
tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3369
tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3370
tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3371
tmp11 = tmp10 + tmp20;
3372
tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3373
tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3374
tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3375
tmp20 = tmp11 + tmp10;
3376
tmp22 = tmp11 - tmp10;
3377
3378
/* Odd part */
3379
3380
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3381
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3382
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3383
tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3384
tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3385
tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3386
tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3387
3388
/* Final output stage */
3389
3390
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3391
wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3392
wsptr[8*1] = (int) (tmp21 + tmp11);
3393
wsptr[8*4] = (int) (tmp21 - tmp11);
3394
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3395
wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3396
}
3397
3398
/* Pass 2: process 6 rows from work array, store into output array.
3399
* 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3400
*/
3401
3402
wsptr = workspace;
3403
for (ctr = 0; ctr < 6; ctr++) {
3404
outptr = output_buf[ctr] + output_col;
3405
3406
/* Even part */
3407
3408
/* Add range center and fudge factor for final descale and range-limit. */
3409
z3 = (INT32) wsptr[0] +
3410
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3411
(ONE << (PASS1_BITS+2)));
3412
z3 <<= CONST_BITS;
3413
3414
z4 = (INT32) wsptr[4];
3415
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3416
3417
tmp10 = z3 + z4;
3418
tmp11 = z3 - z4;
3419
3420
z1 = (INT32) wsptr[2];
3421
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3422
z1 <<= CONST_BITS;
3423
z2 = (INT32) wsptr[6];
3424
z2 <<= CONST_BITS;
3425
3426
tmp12 = z1 - z2;
3427
3428
tmp21 = z3 + tmp12;
3429
tmp24 = z3 - tmp12;
3430
3431
tmp12 = z4 + z2;
3432
3433
tmp20 = tmp10 + tmp12;
3434
tmp25 = tmp10 - tmp12;
3435
3436
tmp12 = z4 - z1 - z2;
3437
3438
tmp22 = tmp11 + tmp12;
3439
tmp23 = tmp11 - tmp12;
3440
3441
/* Odd part */
3442
3443
z1 = (INT32) wsptr[1];
3444
z2 = (INT32) wsptr[3];
3445
z3 = (INT32) wsptr[5];
3446
z4 = (INT32) wsptr[7];
3447
3448
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
3449
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
3450
3451
tmp10 = z1 + z3;
3452
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
3453
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
3454
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
3455
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
3456
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3457
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3458
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
3459
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
3460
3461
z1 -= z4;
3462
z2 -= z3;
3463
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
3464
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
3465
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
3466
3467
/* Final output stage */
3468
3469
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3470
CONST_BITS+PASS1_BITS+3)
3471
& RANGE_MASK];
3472
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3473
CONST_BITS+PASS1_BITS+3)
3474
& RANGE_MASK];
3475
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3476
CONST_BITS+PASS1_BITS+3)
3477
& RANGE_MASK];
3478
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3479
CONST_BITS+PASS1_BITS+3)
3480
& RANGE_MASK];
3481
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3482
CONST_BITS+PASS1_BITS+3)
3483
& RANGE_MASK];
3484
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3485
CONST_BITS+PASS1_BITS+3)
3486
& RANGE_MASK];
3487
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3488
CONST_BITS+PASS1_BITS+3)
3489
& RANGE_MASK];
3490
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3491
CONST_BITS+PASS1_BITS+3)
3492
& RANGE_MASK];
3493
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3494
CONST_BITS+PASS1_BITS+3)
3495
& RANGE_MASK];
3496
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3497
CONST_BITS+PASS1_BITS+3)
3498
& RANGE_MASK];
3499
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3500
CONST_BITS+PASS1_BITS+3)
3501
& RANGE_MASK];
3502
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3503
CONST_BITS+PASS1_BITS+3)
3504
& RANGE_MASK];
3505
3506
wsptr += 8; /* advance pointer to next row */
3507
}
3508
}
3509
3510
3511
/*
3512
* Perform dequantization and inverse DCT on one block of coefficients,
3513
* producing a 10x5 output block.
3514
*
3515
* 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3516
*/
3517
3518
GLOBAL(void)
3519
jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3520
JCOEFPTR coef_block,
3521
JSAMPARRAY output_buf, JDIMENSION output_col)
3522
{
3523
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3524
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3525
INT32 z1, z2, z3, z4;
3526
JCOEFPTR inptr;
3527
ISLOW_MULT_TYPE * quantptr;
3528
int * wsptr;
3529
JSAMPROW outptr;
3530
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3531
int ctr;
3532
int workspace[8*5]; /* buffers data between passes */
3533
SHIFT_TEMPS
3534
3535
/* Pass 1: process columns from input, store into work array.
3536
* 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3537
*/
3538
3539
inptr = coef_block;
3540
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3541
wsptr = workspace;
3542
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3543
/* Even part */
3544
3545
tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3546
tmp12 <<= CONST_BITS;
3547
/* Add fudge factor here for final descale. */
3548
tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3549
tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3550
tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3551
z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3552
z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3553
z3 = tmp12 + z2;
3554
tmp10 = z3 + z1;
3555
tmp11 = z3 - z1;
3556
tmp12 -= z2 << 2;
3557
3558
/* Odd part */
3559
3560
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3561
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3562
3563
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3564
tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3565
tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3566
3567
/* Final output stage */
3568
3569
wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3570
wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3571
wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3572
wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3573
wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3574
}
3575
3576
/* Pass 2: process 5 rows from work array, store into output array.
3577
* 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3578
*/
3579
3580
wsptr = workspace;
3581
for (ctr = 0; ctr < 5; ctr++) {
3582
outptr = output_buf[ctr] + output_col;
3583
3584
/* Even part */
3585
3586
/* Add range center and fudge factor for final descale and range-limit. */
3587
z3 = (INT32) wsptr[0] +
3588
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3589
(ONE << (PASS1_BITS+2)));
3590
z3 <<= CONST_BITS;
3591
z4 = (INT32) wsptr[4];
3592
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3593
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3594
tmp10 = z3 + z1;
3595
tmp11 = z3 - z2;
3596
3597
tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3598
3599
z2 = (INT32) wsptr[2];
3600
z3 = (INT32) wsptr[6];
3601
3602
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3603
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3604
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3605
3606
tmp20 = tmp10 + tmp12;
3607
tmp24 = tmp10 - tmp12;
3608
tmp21 = tmp11 + tmp13;
3609
tmp23 = tmp11 - tmp13;
3610
3611
/* Odd part */
3612
3613
z1 = (INT32) wsptr[1];
3614
z2 = (INT32) wsptr[3];
3615
z3 = (INT32) wsptr[5];
3616
z3 <<= CONST_BITS;
3617
z4 = (INT32) wsptr[7];
3618
3619
tmp11 = z2 + z4;
3620
tmp13 = z2 - z4;
3621
3622
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
3623
3624
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
3625
z4 = z3 + tmp12;
3626
3627
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3628
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3629
3630
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
3631
z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3632
3633
tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3634
3635
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3636
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3637
3638
/* Final output stage */
3639
3640
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3641
CONST_BITS+PASS1_BITS+3)
3642
& RANGE_MASK];
3643
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3644
CONST_BITS+PASS1_BITS+3)
3645
& RANGE_MASK];
3646
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3647
CONST_BITS+PASS1_BITS+3)
3648
& RANGE_MASK];
3649
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3650
CONST_BITS+PASS1_BITS+3)
3651
& RANGE_MASK];
3652
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3653
CONST_BITS+PASS1_BITS+3)
3654
& RANGE_MASK];
3655
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3656
CONST_BITS+PASS1_BITS+3)
3657
& RANGE_MASK];
3658
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3659
CONST_BITS+PASS1_BITS+3)
3660
& RANGE_MASK];
3661
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3662
CONST_BITS+PASS1_BITS+3)
3663
& RANGE_MASK];
3664
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3665
CONST_BITS+PASS1_BITS+3)
3666
& RANGE_MASK];
3667
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3668
CONST_BITS+PASS1_BITS+3)
3669
& RANGE_MASK];
3670
3671
wsptr += 8; /* advance pointer to next row */
3672
}
3673
}
3674
3675
3676
/*
3677
* Perform dequantization and inverse DCT on one block of coefficients,
3678
* producing an 8x4 output block.
3679
*
3680
* 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3681
*/
3682
3683
GLOBAL(void)
3684
jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3685
JCOEFPTR coef_block,
3686
JSAMPARRAY output_buf, JDIMENSION output_col)
3687
{
3688
INT32 tmp0, tmp1, tmp2, tmp3;
3689
INT32 tmp10, tmp11, tmp12, tmp13;
3690
INT32 z1, z2, z3;
3691
JCOEFPTR inptr;
3692
ISLOW_MULT_TYPE * quantptr;
3693
int * wsptr;
3694
JSAMPROW outptr;
3695
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3696
int ctr;
3697
int workspace[8*4]; /* buffers data between passes */
3698
SHIFT_TEMPS
3699
3700
/* Pass 1: process columns from input, store into work array.
3701
* 4-point IDCT kernel,
3702
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3703
*/
3704
3705
inptr = coef_block;
3706
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3707
wsptr = workspace;
3708
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3709
/* Even part */
3710
3711
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3712
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3713
3714
tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3715
tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3716
3717
/* Odd part */
3718
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
3719
3720
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3721
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3722
3723
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3724
/* Add fudge factor here for final descale. */
3725
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3726
tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3727
CONST_BITS-PASS1_BITS);
3728
tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3729
CONST_BITS-PASS1_BITS);
3730
3731
/* Final output stage */
3732
3733
wsptr[8*0] = (int) (tmp10 + tmp0);
3734
wsptr[8*3] = (int) (tmp10 - tmp0);
3735
wsptr[8*1] = (int) (tmp12 + tmp2);
3736
wsptr[8*2] = (int) (tmp12 - tmp2);
3737
}
3738
3739
/* Pass 2: process rows from work array, store into output array.
3740
* Note that we must descale the results by a factor of 8 == 2**3,
3741
* and also undo the PASS1_BITS scaling.
3742
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3743
*/
3744
3745
wsptr = workspace;
3746
for (ctr = 0; ctr < 4; ctr++) {
3747
outptr = output_buf[ctr] + output_col;
3748
3749
/* Even part: reverse the even part of the forward DCT.
3750
* The rotator is c(-6).
3751
*/
3752
3753
/* Add range center and fudge factor for final descale and range-limit. */
3754
z2 = (INT32) wsptr[0] +
3755
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3756
(ONE << (PASS1_BITS+2)));
3757
z3 = (INT32) wsptr[4];
3758
3759
tmp0 = (z2 + z3) << CONST_BITS;
3760
tmp1 = (z2 - z3) << CONST_BITS;
3761
3762
z2 = (INT32) wsptr[2];
3763
z3 = (INT32) wsptr[6];
3764
3765
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3766
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3767
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3768
3769
tmp10 = tmp0 + tmp2;
3770
tmp13 = tmp0 - tmp2;
3771
tmp11 = tmp1 + tmp3;
3772
tmp12 = tmp1 - tmp3;
3773
3774
/* Odd part per figure 8; the matrix is unitary and hence its
3775
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3776
*/
3777
3778
tmp0 = (INT32) wsptr[7];
3779
tmp1 = (INT32) wsptr[5];
3780
tmp2 = (INT32) wsptr[3];
3781
tmp3 = (INT32) wsptr[1];
3782
3783
z2 = tmp0 + tmp2;
3784
z3 = tmp1 + tmp3;
3785
3786
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3787
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3788
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3789
z2 += z1;
3790
z3 += z1;
3791
3792
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3793
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3794
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3795
tmp0 += z1 + z2;
3796
tmp3 += z1 + z3;
3797
3798
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3799
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3800
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3801
tmp1 += z1 + z3;
3802
tmp2 += z1 + z2;
3803
3804
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3805
3806
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3807
CONST_BITS+PASS1_BITS+3)
3808
& RANGE_MASK];
3809
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3810
CONST_BITS+PASS1_BITS+3)
3811
& RANGE_MASK];
3812
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3813
CONST_BITS+PASS1_BITS+3)
3814
& RANGE_MASK];
3815
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3816
CONST_BITS+PASS1_BITS+3)
3817
& RANGE_MASK];
3818
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3819
CONST_BITS+PASS1_BITS+3)
3820
& RANGE_MASK];
3821
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3822
CONST_BITS+PASS1_BITS+3)
3823
& RANGE_MASK];
3824
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3825
CONST_BITS+PASS1_BITS+3)
3826
& RANGE_MASK];
3827
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3828
CONST_BITS+PASS1_BITS+3)
3829
& RANGE_MASK];
3830
3831
wsptr += DCTSIZE; /* advance pointer to next row */
3832
}
3833
}
3834
3835
3836
/*
3837
* Perform dequantization and inverse DCT on one block of coefficients,
3838
* producing a 6x3 output block.
3839
*
3840
* 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3841
*/
3842
3843
GLOBAL(void)
3844
jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3845
JCOEFPTR coef_block,
3846
JSAMPARRAY output_buf, JDIMENSION output_col)
3847
{
3848
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3849
INT32 z1, z2, z3;
3850
JCOEFPTR inptr;
3851
ISLOW_MULT_TYPE * quantptr;
3852
int * wsptr;
3853
JSAMPROW outptr;
3854
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3855
int ctr;
3856
int workspace[6*3]; /* buffers data between passes */
3857
SHIFT_TEMPS
3858
3859
/* Pass 1: process columns from input, store into work array.
3860
* 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3861
*/
3862
3863
inptr = coef_block;
3864
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3865
wsptr = workspace;
3866
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3867
/* Even part */
3868
3869
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3870
tmp0 <<= CONST_BITS;
3871
/* Add fudge factor here for final descale. */
3872
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3873
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3874
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3875
tmp10 = tmp0 + tmp12;
3876
tmp2 = tmp0 - tmp12 - tmp12;
3877
3878
/* Odd part */
3879
3880
tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3881
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3882
3883
/* Final output stage */
3884
3885
wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3886
wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3887
wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3888
}
3889
3890
/* Pass 2: process 3 rows from work array, store into output array.
3891
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3892
*/
3893
3894
wsptr = workspace;
3895
for (ctr = 0; ctr < 3; ctr++) {
3896
outptr = output_buf[ctr] + output_col;
3897
3898
/* Even part */
3899
3900
/* Add range center and fudge factor for final descale and range-limit. */
3901
tmp0 = (INT32) wsptr[0] +
3902
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3903
(ONE << (PASS1_BITS+2)));
3904
tmp0 <<= CONST_BITS;
3905
tmp2 = (INT32) wsptr[4];
3906
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
3907
tmp1 = tmp0 + tmp10;
3908
tmp11 = tmp0 - tmp10 - tmp10;
3909
tmp10 = (INT32) wsptr[2];
3910
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
3911
tmp10 = tmp1 + tmp0;
3912
tmp12 = tmp1 - tmp0;
3913
3914
/* Odd part */
3915
3916
z1 = (INT32) wsptr[1];
3917
z2 = (INT32) wsptr[3];
3918
z3 = (INT32) wsptr[5];
3919
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3920
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3921
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3922
tmp1 = (z1 - z2 - z3) << CONST_BITS;
3923
3924
/* Final output stage */
3925
3926
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
3927
CONST_BITS+PASS1_BITS+3)
3928
& RANGE_MASK];
3929
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
3930
CONST_BITS+PASS1_BITS+3)
3931
& RANGE_MASK];
3932
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
3933
CONST_BITS+PASS1_BITS+3)
3934
& RANGE_MASK];
3935
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
3936
CONST_BITS+PASS1_BITS+3)
3937
& RANGE_MASK];
3938
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
3939
CONST_BITS+PASS1_BITS+3)
3940
& RANGE_MASK];
3941
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
3942
CONST_BITS+PASS1_BITS+3)
3943
& RANGE_MASK];
3944
3945
wsptr += 6; /* advance pointer to next row */
3946
}
3947
}
3948
3949
3950
/*
3951
* Perform dequantization and inverse DCT on one block of coefficients,
3952
* producing a 4x2 output block.
3953
*
3954
* 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
3955
*/
3956
3957
GLOBAL(void)
3958
jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3959
JCOEFPTR coef_block,
3960
JSAMPARRAY output_buf, JDIMENSION output_col)
3961
{
3962
INT32 tmp0, tmp2, tmp10, tmp12;
3963
INT32 z1, z2, z3;
3964
JCOEFPTR inptr;
3965
ISLOW_MULT_TYPE * quantptr;
3966
INT32 * wsptr;
3967
JSAMPROW outptr;
3968
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3969
int ctr;
3970
INT32 workspace[4*2]; /* buffers data between passes */
3971
SHIFT_TEMPS
3972
3973
/* Pass 1: process columns from input, store into work array. */
3974
3975
inptr = coef_block;
3976
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3977
wsptr = workspace;
3978
for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
3979
/* Even part */
3980
3981
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3982
3983
/* Odd part */
3984
3985
tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3986
3987
/* Final output stage */
3988
3989
wsptr[4*0] = tmp10 + tmp0;
3990
wsptr[4*1] = tmp10 - tmp0;
3991
}
3992
3993
/* Pass 2: process 2 rows from work array, store into output array.
3994
* 4-point IDCT kernel,
3995
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3996
*/
3997
3998
wsptr = workspace;
3999
for (ctr = 0; ctr < 2; ctr++) {
4000
outptr = output_buf[ctr] + output_col;
4001
4002
/* Even part */
4003
4004
/* Add range center and fudge factor for final descale and range-limit. */
4005
tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
4006
tmp2 = wsptr[2];
4007
4008
tmp10 = (tmp0 + tmp2) << CONST_BITS;
4009
tmp12 = (tmp0 - tmp2) << CONST_BITS;
4010
4011
/* Odd part */
4012
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
4013
4014
z2 = wsptr[1];
4015
z3 = wsptr[3];
4016
4017
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4018
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4019
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4020
4021
/* Final output stage */
4022
4023
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4024
CONST_BITS+3)
4025
& RANGE_MASK];
4026
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4027
CONST_BITS+3)
4028
& RANGE_MASK];
4029
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4030
CONST_BITS+3)
4031
& RANGE_MASK];
4032
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4033
CONST_BITS+3)
4034
& RANGE_MASK];
4035
4036
wsptr += 4; /* advance pointer to next row */
4037
}
4038
}
4039
4040
4041
/*
4042
* Perform dequantization and inverse DCT on one block of coefficients,
4043
* producing a 2x1 output block.
4044
*
4045
* 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4046
*/
4047
4048
GLOBAL(void)
4049
jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4050
JCOEFPTR coef_block,
4051
JSAMPARRAY output_buf, JDIMENSION output_col)
4052
{
4053
DCTELEM tmp0, tmp1;
4054
ISLOW_MULT_TYPE * quantptr;
4055
JSAMPROW outptr;
4056
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4057
ISHIFT_TEMPS
4058
4059
/* Pass 1: empty. */
4060
4061
/* Pass 2: process 1 row from input, store into output array. */
4062
4063
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4064
outptr = output_buf[0] + output_col;
4065
4066
/* Even part */
4067
4068
tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4069
/* Add range center and fudge factor for final descale and range-limit. */
4070
tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
4071
4072
/* Odd part */
4073
4074
tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4075
4076
/* Final output stage */
4077
4078
outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
4079
outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
4080
}
4081
4082
4083
/*
4084
* Perform dequantization and inverse DCT on one block of coefficients,
4085
* producing an 8x16 output block.
4086
*
4087
* 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4088
*/
4089
4090
GLOBAL(void)
4091
jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4092
JCOEFPTR coef_block,
4093
JSAMPARRAY output_buf, JDIMENSION output_col)
4094
{
4095
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4096
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4097
INT32 z1, z2, z3, z4;
4098
JCOEFPTR inptr;
4099
ISLOW_MULT_TYPE * quantptr;
4100
int * wsptr;
4101
JSAMPROW outptr;
4102
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4103
int ctr;
4104
int workspace[8*16]; /* buffers data between passes */
4105
SHIFT_TEMPS
4106
4107
/* Pass 1: process columns from input, store into work array.
4108
* 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4109
*/
4110
4111
inptr = coef_block;
4112
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4113
wsptr = workspace;
4114
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4115
/* Even part */
4116
4117
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4118
tmp0 <<= CONST_BITS;
4119
/* Add fudge factor here for final descale. */
4120
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4121
4122
z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4123
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4124
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4125
4126
tmp10 = tmp0 + tmp1;
4127
tmp11 = tmp0 - tmp1;
4128
tmp12 = tmp0 + tmp2;
4129
tmp13 = tmp0 - tmp2;
4130
4131
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4132
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4133
z3 = z1 - z2;
4134
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
4135
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
4136
4137
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
4138
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
4139
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4140
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4141
4142
tmp20 = tmp10 + tmp0;
4143
tmp27 = tmp10 - tmp0;
4144
tmp21 = tmp12 + tmp1;
4145
tmp26 = tmp12 - tmp1;
4146
tmp22 = tmp13 + tmp2;
4147
tmp25 = tmp13 - tmp2;
4148
tmp23 = tmp11 + tmp3;
4149
tmp24 = tmp11 - tmp3;
4150
4151
/* Odd part */
4152
4153
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4154
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4155
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4156
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4157
4158
tmp11 = z1 + z3;
4159
4160
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
4161
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
4162
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
4163
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
4164
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
4165
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
4166
tmp0 = tmp1 + tmp2 + tmp3 -
4167
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
4168
tmp13 = tmp10 + tmp11 + tmp12 -
4169
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
4170
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
4171
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
4172
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
4173
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
4174
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
4175
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
4176
z2 += z4;
4177
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
4178
tmp1 += z1;
4179
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
4180
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
4181
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
4182
tmp12 += z2;
4183
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4184
tmp2 += z2;
4185
tmp3 += z2;
4186
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
4187
tmp10 += z2;
4188
tmp11 += z2;
4189
4190
/* Final output stage */
4191
4192
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4193
wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4194
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4195
wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4196
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4197
wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4198
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4199
wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4200
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4201
wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4202
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4203
wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4204
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4205
wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4206
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4207
wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4208
}
4209
4210
/* Pass 2: process rows from work array, store into output array.
4211
* Note that we must descale the results by a factor of 8 == 2**3,
4212
* and also undo the PASS1_BITS scaling.
4213
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4214
*/
4215
4216
wsptr = workspace;
4217
for (ctr = 0; ctr < 16; ctr++) {
4218
outptr = output_buf[ctr] + output_col;
4219
4220
/* Even part: reverse the even part of the forward DCT.
4221
* The rotator is c(-6).
4222
*/
4223
4224
/* Add range center and fudge factor for final descale and range-limit. */
4225
z2 = (INT32) wsptr[0] +
4226
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4227
(ONE << (PASS1_BITS+2)));
4228
z3 = (INT32) wsptr[4];
4229
4230
tmp0 = (z2 + z3) << CONST_BITS;
4231
tmp1 = (z2 - z3) << CONST_BITS;
4232
4233
z2 = (INT32) wsptr[2];
4234
z3 = (INT32) wsptr[6];
4235
4236
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4237
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4238
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4239
4240
tmp10 = tmp0 + tmp2;
4241
tmp13 = tmp0 - tmp2;
4242
tmp11 = tmp1 + tmp3;
4243
tmp12 = tmp1 - tmp3;
4244
4245
/* Odd part per figure 8; the matrix is unitary and hence its
4246
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4247
*/
4248
4249
tmp0 = (INT32) wsptr[7];
4250
tmp1 = (INT32) wsptr[5];
4251
tmp2 = (INT32) wsptr[3];
4252
tmp3 = (INT32) wsptr[1];
4253
4254
z2 = tmp0 + tmp2;
4255
z3 = tmp1 + tmp3;
4256
4257
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4258
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4259
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4260
z2 += z1;
4261
z3 += z1;
4262
4263
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4264
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4265
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4266
tmp0 += z1 + z2;
4267
tmp3 += z1 + z3;
4268
4269
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4270
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4271
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4272
tmp1 += z1 + z3;
4273
tmp2 += z1 + z2;
4274
4275
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4276
4277
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4278
CONST_BITS+PASS1_BITS+3)
4279
& RANGE_MASK];
4280
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4281
CONST_BITS+PASS1_BITS+3)
4282
& RANGE_MASK];
4283
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4284
CONST_BITS+PASS1_BITS+3)
4285
& RANGE_MASK];
4286
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4287
CONST_BITS+PASS1_BITS+3)
4288
& RANGE_MASK];
4289
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4290
CONST_BITS+PASS1_BITS+3)
4291
& RANGE_MASK];
4292
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4293
CONST_BITS+PASS1_BITS+3)
4294
& RANGE_MASK];
4295
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4296
CONST_BITS+PASS1_BITS+3)
4297
& RANGE_MASK];
4298
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4299
CONST_BITS+PASS1_BITS+3)
4300
& RANGE_MASK];
4301
4302
wsptr += DCTSIZE; /* advance pointer to next row */
4303
}
4304
}
4305
4306
4307
/*
4308
* Perform dequantization and inverse DCT on one block of coefficients,
4309
* producing a 7x14 output block.
4310
*
4311
* 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4312
*/
4313
4314
GLOBAL(void)
4315
jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4316
JCOEFPTR coef_block,
4317
JSAMPARRAY output_buf, JDIMENSION output_col)
4318
{
4319
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4320
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4321
INT32 z1, z2, z3, z4;
4322
JCOEFPTR inptr;
4323
ISLOW_MULT_TYPE * quantptr;
4324
int * wsptr;
4325
JSAMPROW outptr;
4326
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4327
int ctr;
4328
int workspace[7*14]; /* buffers data between passes */
4329
SHIFT_TEMPS
4330
4331
/* Pass 1: process columns from input, store into work array.
4332
* 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4333
*/
4334
4335
inptr = coef_block;
4336
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4337
wsptr = workspace;
4338
for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4339
/* Even part */
4340
4341
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4342
z1 <<= CONST_BITS;
4343
/* Add fudge factor here for final descale. */
4344
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4345
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4346
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4347
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4348
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4349
4350
tmp10 = z1 + z2;
4351
tmp11 = z1 + z3;
4352
tmp12 = z1 - z4;
4353
4354
tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4355
CONST_BITS-PASS1_BITS);
4356
4357
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4358
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4359
4360
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
4361
4362
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4363
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4364
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
4365
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
4366
4367
tmp20 = tmp10 + tmp13;
4368
tmp26 = tmp10 - tmp13;
4369
tmp21 = tmp11 + tmp14;
4370
tmp25 = tmp11 - tmp14;
4371
tmp22 = tmp12 + tmp15;
4372
tmp24 = tmp12 - tmp15;
4373
4374
/* Odd part */
4375
4376
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4377
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4378
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4379
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4380
tmp13 = z4 << CONST_BITS;
4381
4382
tmp14 = z1 + z3;
4383
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
4384
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
4385
tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4386
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
4387
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
4388
z1 -= z2;
4389
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
4390
tmp16 += tmp15;
4391
z1 += z4;
4392
z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4393
tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
4394
tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
4395
z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
4396
tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4397
tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
4398
4399
tmp13 = (z1 - z3) << PASS1_BITS;
4400
4401
/* Final output stage */
4402
4403
wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4404
wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4405
wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4406
wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4407
wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4408
wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4409
wsptr[7*3] = (int) (tmp23 + tmp13);
4410
wsptr[7*10] = (int) (tmp23 - tmp13);
4411
wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4412
wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4413
wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4414
wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4415
wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4416
wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4417
}
4418
4419
/* Pass 2: process 14 rows from work array, store into output array.
4420
* 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4421
*/
4422
4423
wsptr = workspace;
4424
for (ctr = 0; ctr < 14; ctr++) {
4425
outptr = output_buf[ctr] + output_col;
4426
4427
/* Even part */
4428
4429
/* Add range center and fudge factor for final descale and range-limit. */
4430
tmp23 = (INT32) wsptr[0] +
4431
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4432
(ONE << (PASS1_BITS+2)));
4433
tmp23 <<= CONST_BITS;
4434
4435
z1 = (INT32) wsptr[2];
4436
z2 = (INT32) wsptr[4];
4437
z3 = (INT32) wsptr[6];
4438
4439
tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4440
tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4441
tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4442
tmp10 = z1 + z3;
4443
z2 -= tmp10;
4444
tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4445
tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4446
tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4447
tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4448
4449
/* Odd part */
4450
4451
z1 = (INT32) wsptr[1];
4452
z2 = (INT32) wsptr[3];
4453
z3 = (INT32) wsptr[5];
4454
4455
tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4456
tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4457
tmp10 = tmp11 - tmp12;
4458
tmp11 += tmp12;
4459
tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
4460
tmp11 += tmp12;
4461
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
4462
tmp10 += z2;
4463
tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
4464
4465
/* Final output stage */
4466
4467
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4468
CONST_BITS+PASS1_BITS+3)
4469
& RANGE_MASK];
4470
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4471
CONST_BITS+PASS1_BITS+3)
4472
& RANGE_MASK];
4473
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4474
CONST_BITS+PASS1_BITS+3)
4475
& RANGE_MASK];
4476
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4477
CONST_BITS+PASS1_BITS+3)
4478
& RANGE_MASK];
4479
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4480
CONST_BITS+PASS1_BITS+3)
4481
& RANGE_MASK];
4482
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4483
CONST_BITS+PASS1_BITS+3)
4484
& RANGE_MASK];
4485
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4486
CONST_BITS+PASS1_BITS+3)
4487
& RANGE_MASK];
4488
4489
wsptr += 7; /* advance pointer to next row */
4490
}
4491
}
4492
4493
4494
/*
4495
* Perform dequantization and inverse DCT on one block of coefficients,
4496
* producing a 6x12 output block.
4497
*
4498
* 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4499
*/
4500
4501
GLOBAL(void)
4502
jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4503
JCOEFPTR coef_block,
4504
JSAMPARRAY output_buf, JDIMENSION output_col)
4505
{
4506
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4507
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4508
INT32 z1, z2, z3, z4;
4509
JCOEFPTR inptr;
4510
ISLOW_MULT_TYPE * quantptr;
4511
int * wsptr;
4512
JSAMPROW outptr;
4513
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4514
int ctr;
4515
int workspace[6*12]; /* buffers data between passes */
4516
SHIFT_TEMPS
4517
4518
/* Pass 1: process columns from input, store into work array.
4519
* 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4520
*/
4521
4522
inptr = coef_block;
4523
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4524
wsptr = workspace;
4525
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4526
/* Even part */
4527
4528
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4529
z3 <<= CONST_BITS;
4530
/* Add fudge factor here for final descale. */
4531
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4532
4533
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4534
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4535
4536
tmp10 = z3 + z4;
4537
tmp11 = z3 - z4;
4538
4539
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4540
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4541
z1 <<= CONST_BITS;
4542
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4543
z2 <<= CONST_BITS;
4544
4545
tmp12 = z1 - z2;
4546
4547
tmp21 = z3 + tmp12;
4548
tmp24 = z3 - tmp12;
4549
4550
tmp12 = z4 + z2;
4551
4552
tmp20 = tmp10 + tmp12;
4553
tmp25 = tmp10 - tmp12;
4554
4555
tmp12 = z4 - z1 - z2;
4556
4557
tmp22 = tmp11 + tmp12;
4558
tmp23 = tmp11 - tmp12;
4559
4560
/* Odd part */
4561
4562
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4563
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4564
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4565
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4566
4567
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
4568
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
4569
4570
tmp10 = z1 + z3;
4571
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
4572
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
4573
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
4574
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
4575
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4576
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4577
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
4578
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
4579
4580
z1 -= z4;
4581
z2 -= z3;
4582
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
4583
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
4584
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
4585
4586
/* Final output stage */
4587
4588
wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4589
wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4590
wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4591
wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4592
wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4593
wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4594
wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4595
wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4596
wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4597
wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4598
wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4599
wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4600
}
4601
4602
/* Pass 2: process 12 rows from work array, store into output array.
4603
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4604
*/
4605
4606
wsptr = workspace;
4607
for (ctr = 0; ctr < 12; ctr++) {
4608
outptr = output_buf[ctr] + output_col;
4609
4610
/* Even part */
4611
4612
/* Add range center and fudge factor for final descale and range-limit. */
4613
tmp10 = (INT32) wsptr[0] +
4614
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4615
(ONE << (PASS1_BITS+2)));
4616
tmp10 <<= CONST_BITS;
4617
tmp12 = (INT32) wsptr[4];
4618
tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4619
tmp11 = tmp10 + tmp20;
4620
tmp21 = tmp10 - tmp20 - tmp20;
4621
tmp20 = (INT32) wsptr[2];
4622
tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4623
tmp20 = tmp11 + tmp10;
4624
tmp22 = tmp11 - tmp10;
4625
4626
/* Odd part */
4627
4628
z1 = (INT32) wsptr[1];
4629
z2 = (INT32) wsptr[3];
4630
z3 = (INT32) wsptr[5];
4631
tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4632
tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4633
tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4634
tmp11 = (z1 - z2 - z3) << CONST_BITS;
4635
4636
/* Final output stage */
4637
4638
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4639
CONST_BITS+PASS1_BITS+3)
4640
& RANGE_MASK];
4641
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4642
CONST_BITS+PASS1_BITS+3)
4643
& RANGE_MASK];
4644
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4645
CONST_BITS+PASS1_BITS+3)
4646
& RANGE_MASK];
4647
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4648
CONST_BITS+PASS1_BITS+3)
4649
& RANGE_MASK];
4650
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4651
CONST_BITS+PASS1_BITS+3)
4652
& RANGE_MASK];
4653
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4654
CONST_BITS+PASS1_BITS+3)
4655
& RANGE_MASK];
4656
4657
wsptr += 6; /* advance pointer to next row */
4658
}
4659
}
4660
4661
4662
/*
4663
* Perform dequantization and inverse DCT on one block of coefficients,
4664
* producing a 5x10 output block.
4665
*
4666
* 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4667
*/
4668
4669
GLOBAL(void)
4670
jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4671
JCOEFPTR coef_block,
4672
JSAMPARRAY output_buf, JDIMENSION output_col)
4673
{
4674
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4675
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4676
INT32 z1, z2, z3, z4, z5;
4677
JCOEFPTR inptr;
4678
ISLOW_MULT_TYPE * quantptr;
4679
int * wsptr;
4680
JSAMPROW outptr;
4681
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4682
int ctr;
4683
int workspace[5*10]; /* buffers data between passes */
4684
SHIFT_TEMPS
4685
4686
/* Pass 1: process columns from input, store into work array.
4687
* 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4688
*/
4689
4690
inptr = coef_block;
4691
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4692
wsptr = workspace;
4693
for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4694
/* Even part */
4695
4696
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4697
z3 <<= CONST_BITS;
4698
/* Add fudge factor here for final descale. */
4699
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4700
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4701
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4702
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4703
tmp10 = z3 + z1;
4704
tmp11 = z3 - z2;
4705
4706
tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4707
CONST_BITS-PASS1_BITS);
4708
4709
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4710
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4711
4712
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
4713
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4714
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4715
4716
tmp20 = tmp10 + tmp12;
4717
tmp24 = tmp10 - tmp12;
4718
tmp21 = tmp11 + tmp13;
4719
tmp23 = tmp11 - tmp13;
4720
4721
/* Odd part */
4722
4723
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4724
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4725
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4726
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4727
4728
tmp11 = z2 + z4;
4729
tmp13 = z2 - z4;
4730
4731
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
4732
z5 = z3 << CONST_BITS;
4733
4734
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
4735
z4 = z5 + tmp12;
4736
4737
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4738
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4739
4740
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
4741
z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4742
4743
tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4744
4745
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4746
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4747
4748
/* Final output stage */
4749
4750
wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4751
wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4752
wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4753
wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4754
wsptr[5*2] = (int) (tmp22 + tmp12);
4755
wsptr[5*7] = (int) (tmp22 - tmp12);
4756
wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4757
wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4758
wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4759
wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4760
}
4761
4762
/* Pass 2: process 10 rows from work array, store into output array.
4763
* 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4764
*/
4765
4766
wsptr = workspace;
4767
for (ctr = 0; ctr < 10; ctr++) {
4768
outptr = output_buf[ctr] + output_col;
4769
4770
/* Even part */
4771
4772
/* Add range center and fudge factor for final descale and range-limit. */
4773
tmp12 = (INT32) wsptr[0] +
4774
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4775
(ONE << (PASS1_BITS+2)));
4776
tmp12 <<= CONST_BITS;
4777
tmp13 = (INT32) wsptr[2];
4778
tmp14 = (INT32) wsptr[4];
4779
z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4780
z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4781
z3 = tmp12 + z2;
4782
tmp10 = z3 + z1;
4783
tmp11 = z3 - z1;
4784
tmp12 -= z2 << 2;
4785
4786
/* Odd part */
4787
4788
z2 = (INT32) wsptr[1];
4789
z3 = (INT32) wsptr[3];
4790
4791
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
4792
tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
4793
tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
4794
4795
/* Final output stage */
4796
4797
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
4798
CONST_BITS+PASS1_BITS+3)
4799
& RANGE_MASK];
4800
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
4801
CONST_BITS+PASS1_BITS+3)
4802
& RANGE_MASK];
4803
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
4804
CONST_BITS+PASS1_BITS+3)
4805
& RANGE_MASK];
4806
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
4807
CONST_BITS+PASS1_BITS+3)
4808
& RANGE_MASK];
4809
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
4810
CONST_BITS+PASS1_BITS+3)
4811
& RANGE_MASK];
4812
4813
wsptr += 5; /* advance pointer to next row */
4814
}
4815
}
4816
4817
4818
/*
4819
* Perform dequantization and inverse DCT on one block of coefficients,
4820
* producing a 4x8 output block.
4821
*
4822
* 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4823
*/
4824
4825
GLOBAL(void)
4826
jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4827
JCOEFPTR coef_block,
4828
JSAMPARRAY output_buf, JDIMENSION output_col)
4829
{
4830
INT32 tmp0, tmp1, tmp2, tmp3;
4831
INT32 tmp10, tmp11, tmp12, tmp13;
4832
INT32 z1, z2, z3;
4833
JCOEFPTR inptr;
4834
ISLOW_MULT_TYPE * quantptr;
4835
int * wsptr;
4836
JSAMPROW outptr;
4837
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4838
int ctr;
4839
int workspace[4*8]; /* buffers data between passes */
4840
SHIFT_TEMPS
4841
4842
/* Pass 1: process columns from input, store into work array.
4843
* Note results are scaled up by sqrt(8) compared to a true IDCT;
4844
* furthermore, we scale the results by 2**PASS1_BITS.
4845
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4846
*/
4847
4848
inptr = coef_block;
4849
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4850
wsptr = workspace;
4851
for (ctr = 4; ctr > 0; ctr--) {
4852
/* Due to quantization, we will usually find that many of the input
4853
* coefficients are zero, especially the AC terms. We can exploit this
4854
* by short-circuiting the IDCT calculation for any column in which all
4855
* the AC terms are zero. In that case each output is equal to the
4856
* DC coefficient (with scale factor as needed).
4857
* With typical images and quantization tables, half or more of the
4858
* column DCT calculations can be simplified this way.
4859
*/
4860
4861
if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4862
inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4863
inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4864
inptr[DCTSIZE*7] == 0) {
4865
/* AC terms all zero */
4866
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
4867
4868
wsptr[4*0] = dcval;
4869
wsptr[4*1] = dcval;
4870
wsptr[4*2] = dcval;
4871
wsptr[4*3] = dcval;
4872
wsptr[4*4] = dcval;
4873
wsptr[4*5] = dcval;
4874
wsptr[4*6] = dcval;
4875
wsptr[4*7] = dcval;
4876
4877
inptr++; /* advance pointers to next column */
4878
quantptr++;
4879
wsptr++;
4880
continue;
4881
}
4882
4883
/* Even part: reverse the even part of the forward DCT.
4884
* The rotator is c(-6).
4885
*/
4886
4887
z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4888
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4889
z2 <<= CONST_BITS;
4890
z3 <<= CONST_BITS;
4891
/* Add fudge factor here for final descale. */
4892
z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4893
4894
tmp0 = z2 + z3;
4895
tmp1 = z2 - z3;
4896
4897
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4898
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4899
4900
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4901
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4902
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4903
4904
tmp10 = tmp0 + tmp2;
4905
tmp13 = tmp0 - tmp2;
4906
tmp11 = tmp1 + tmp3;
4907
tmp12 = tmp1 - tmp3;
4908
4909
/* Odd part per figure 8; the matrix is unitary and hence its
4910
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4911
*/
4912
4913
tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4914
tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4915
tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4916
tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4917
4918
z2 = tmp0 + tmp2;
4919
z3 = tmp1 + tmp3;
4920
4921
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4922
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4923
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4924
z2 += z1;
4925
z3 += z1;
4926
4927
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4928
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4929
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4930
tmp0 += z1 + z2;
4931
tmp3 += z1 + z3;
4932
4933
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4934
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4935
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4936
tmp1 += z1 + z3;
4937
tmp2 += z1 + z2;
4938
4939
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4940
4941
wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
4942
wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
4943
wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
4944
wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
4945
wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
4946
wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
4947
wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
4948
wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
4949
4950
inptr++; /* advance pointers to next column */
4951
quantptr++;
4952
wsptr++;
4953
}
4954
4955
/* Pass 2: process 8 rows from work array, store into output array.
4956
* 4-point IDCT kernel,
4957
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4958
*/
4959
4960
wsptr = workspace;
4961
for (ctr = 0; ctr < 8; ctr++) {
4962
outptr = output_buf[ctr] + output_col;
4963
4964
/* Even part */
4965
4966
/* Add range center and fudge factor for final descale and range-limit. */
4967
tmp0 = (INT32) wsptr[0] +
4968
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4969
(ONE << (PASS1_BITS+2)));
4970
tmp2 = (INT32) wsptr[2];
4971
4972
tmp10 = (tmp0 + tmp2) << CONST_BITS;
4973
tmp12 = (tmp0 - tmp2) << CONST_BITS;
4974
4975
/* Odd part */
4976
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
4977
4978
z2 = (INT32) wsptr[1];
4979
z3 = (INT32) wsptr[3];
4980
4981
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4982
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4983
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4984
4985
/* Final output stage */
4986
4987
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4988
CONST_BITS+PASS1_BITS+3)
4989
& RANGE_MASK];
4990
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4991
CONST_BITS+PASS1_BITS+3)
4992
& RANGE_MASK];
4993
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4994
CONST_BITS+PASS1_BITS+3)
4995
& RANGE_MASK];
4996
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4997
CONST_BITS+PASS1_BITS+3)
4998
& RANGE_MASK];
4999
5000
wsptr += 4; /* advance pointer to next row */
5001
}
5002
}
5003
5004
5005
/*
5006
* Perform dequantization and inverse DCT on one block of coefficients,
5007
* producing a 3x6 output block.
5008
*
5009
* 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5010
*/
5011
5012
GLOBAL(void)
5013
jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5014
JCOEFPTR coef_block,
5015
JSAMPARRAY output_buf, JDIMENSION output_col)
5016
{
5017
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5018
INT32 z1, z2, z3;
5019
JCOEFPTR inptr;
5020
ISLOW_MULT_TYPE * quantptr;
5021
int * wsptr;
5022
JSAMPROW outptr;
5023
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5024
int ctr;
5025
int workspace[3*6]; /* buffers data between passes */
5026
SHIFT_TEMPS
5027
5028
/* Pass 1: process columns from input, store into work array.
5029
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5030
*/
5031
5032
inptr = coef_block;
5033
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5034
wsptr = workspace;
5035
for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5036
/* Even part */
5037
5038
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5039
tmp0 <<= CONST_BITS;
5040
/* Add fudge factor here for final descale. */
5041
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5042
tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5043
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5044
tmp1 = tmp0 + tmp10;
5045
tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5046
tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5047
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5048
tmp10 = tmp1 + tmp0;
5049
tmp12 = tmp1 - tmp0;
5050
5051
/* Odd part */
5052
5053
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5054
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5055
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5056
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5057
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5058
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5059
tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5060
5061
/* Final output stage */
5062
5063
wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5064
wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5065
wsptr[3*1] = (int) (tmp11 + tmp1);
5066
wsptr[3*4] = (int) (tmp11 - tmp1);
5067
wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5068
wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5069
}
5070
5071
/* Pass 2: process 6 rows from work array, store into output array.
5072
* 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5073
*/
5074
5075
wsptr = workspace;
5076
for (ctr = 0; ctr < 6; ctr++) {
5077
outptr = output_buf[ctr] + output_col;
5078
5079
/* Even part */
5080
5081
/* Add range center and fudge factor for final descale and range-limit. */
5082
tmp0 = (INT32) wsptr[0] +
5083
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
5084
(ONE << (PASS1_BITS+2)));
5085
tmp0 <<= CONST_BITS;
5086
tmp2 = (INT32) wsptr[2];
5087
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5088
tmp10 = tmp0 + tmp12;
5089
tmp2 = tmp0 - tmp12 - tmp12;
5090
5091
/* Odd part */
5092
5093
tmp12 = (INT32) wsptr[1];
5094
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5095
5096
/* Final output stage */
5097
5098
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5099
CONST_BITS+PASS1_BITS+3)
5100
& RANGE_MASK];
5101
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5102
CONST_BITS+PASS1_BITS+3)
5103
& RANGE_MASK];
5104
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5105
CONST_BITS+PASS1_BITS+3)
5106
& RANGE_MASK];
5107
5108
wsptr += 3; /* advance pointer to next row */
5109
}
5110
}
5111
5112
5113
/*
5114
* Perform dequantization and inverse DCT on one block of coefficients,
5115
* producing a 2x4 output block.
5116
*
5117
* 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5118
*/
5119
5120
GLOBAL(void)
5121
jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5122
JCOEFPTR coef_block,
5123
JSAMPARRAY output_buf, JDIMENSION output_col)
5124
{
5125
INT32 tmp0, tmp2, tmp10, tmp12;
5126
INT32 z1, z2, z3;
5127
JCOEFPTR inptr;
5128
ISLOW_MULT_TYPE * quantptr;
5129
INT32 * wsptr;
5130
JSAMPROW outptr;
5131
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5132
int ctr;
5133
INT32 workspace[2*4]; /* buffers data between passes */
5134
SHIFT_TEMPS
5135
5136
/* Pass 1: process columns from input, store into work array.
5137
* 4-point IDCT kernel,
5138
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5139
*/
5140
5141
inptr = coef_block;
5142
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5143
wsptr = workspace;
5144
for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5145
/* Even part */
5146
5147
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5148
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5149
5150
tmp10 = (tmp0 + tmp2) << CONST_BITS;
5151
tmp12 = (tmp0 - tmp2) << CONST_BITS;
5152
5153
/* Odd part */
5154
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
5155
5156
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5157
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5158
5159
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5160
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5161
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5162
5163
/* Final output stage */
5164
5165
wsptr[2*0] = tmp10 + tmp0;
5166
wsptr[2*3] = tmp10 - tmp0;
5167
wsptr[2*1] = tmp12 + tmp2;
5168
wsptr[2*2] = tmp12 - tmp2;
5169
}
5170
5171
/* Pass 2: process 4 rows from work array, store into output array. */
5172
5173
wsptr = workspace;
5174
for (ctr = 0; ctr < 4; ctr++) {
5175
outptr = output_buf[ctr] + output_col;
5176
5177
/* Even part */
5178
5179
/* Add range center and fudge factor for final descale and range-limit. */
5180
tmp10 = wsptr[0] +
5181
((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
5182
(ONE << (CONST_BITS+2)));
5183
5184
/* Odd part */
5185
5186
tmp0 = wsptr[1];
5187
5188
/* Final output stage */
5189
5190
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
5191
& RANGE_MASK];
5192
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
5193
& RANGE_MASK];
5194
5195
wsptr += 2; /* advance pointer to next row */
5196
}
5197
}
5198
5199
5200
/*
5201
* Perform dequantization and inverse DCT on one block of coefficients,
5202
* producing a 1x2 output block.
5203
*
5204
* 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5205
*/
5206
5207
GLOBAL(void)
5208
jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5209
JCOEFPTR coef_block,
5210
JSAMPARRAY output_buf, JDIMENSION output_col)
5211
{
5212
DCTELEM tmp0, tmp1;
5213
ISLOW_MULT_TYPE * quantptr;
5214
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5215
ISHIFT_TEMPS
5216
5217
/* Process 1 column from input, store into output array. */
5218
5219
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5220
5221
/* Even part */
5222
5223
tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5224
/* Add range center and fudge factor for final descale and range-limit. */
5225
tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
5226
5227
/* Odd part */
5228
5229
tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5230
5231
/* Final output stage */
5232
5233
output_buf[0][output_col] =
5234
range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
5235
output_buf[1][output_col] =
5236
range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
5237
}
5238
5239
#endif /* IDCT_SCALING_SUPPORTED */
5240
#endif /* DCT_ISLOW_SUPPORTED */
5241
5242