Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/libjpeg/jidctint.c
16337 views
1
/*
2
* jidctint.c
3
*
4
* Copyright (C) 1991-1998, Thomas G. Lane.
5
* Modification developed 2002-2015 by Guido Vollbeding.
6
* This file is part of the Independent JPEG Group's software.
7
* For conditions of distribution and use, see the accompanying README file.
8
*
9
* This file contains a slow-but-accurate integer implementation of the
10
* inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
11
* must also perform dequantization of the input coefficients.
12
*
13
* A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
14
* on each row (or vice versa, but it's more convenient to emit a row at
15
* a time). Direct algorithms are also available, but they are much more
16
* complex and seem not to be any faster when reduced to code.
17
*
18
* This implementation is based on an algorithm described in
19
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
20
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
21
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
22
* The primary algorithm described there uses 11 multiplies and 29 adds.
23
* We use their alternate method with 12 multiplies and 32 adds.
24
* The advantage of this method is that no data path contains more than one
25
* multiplication; this allows a very simple and accurate implementation in
26
* scaled fixed-point arithmetic, with a minimal number of shifts.
27
*
28
* We also provide IDCT routines with various output sample block sizes for
29
* direct resolution reduction or enlargement and for direct resolving the
30
* common 2x1 and 1x2 subsampling cases without additional resampling: NxN
31
* (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
32
*
33
* For N<8 we simply take the corresponding low-frequency coefficients of
34
* the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
35
* to yield the downscaled outputs.
36
* This can be seen as direct low-pass downsampling from the DCT domain
37
* point of view rather than the usual spatial domain point of view,
38
* yielding significant computational savings and results at least
39
* as good as common bilinear (averaging) spatial downsampling.
40
*
41
* For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
42
* lower frequencies and higher frequencies assumed to be zero.
43
* It turns out that the computational effort is similar to the 8x8 IDCT
44
* regarding the output size.
45
* Furthermore, the scaling and descaling is the same for all IDCT sizes.
46
*
47
* CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
48
* since there would be too many additional constants to pre-calculate.
49
*/
50
51
#define JPEG_INTERNALS
52
#include "jinclude.h"
53
#include "jpeglib.h"
54
#include "jdct.h" /* Private declarations for DCT subsystem */
55
56
#ifdef DCT_ISLOW_SUPPORTED
57
58
59
/*
60
* This module is specialized to the case DCTSIZE = 8.
61
*/
62
63
#if DCTSIZE != 8
64
Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
65
#endif
66
67
68
/*
69
* The poop on this scaling stuff is as follows:
70
*
71
* Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
72
* larger than the true IDCT outputs. The final outputs are therefore
73
* a factor of N larger than desired; since N=8 this can be cured by
74
* a simple right shift at the end of the algorithm. The advantage of
75
* this arrangement is that we save two multiplications per 1-D IDCT,
76
* because the y0 and y4 inputs need not be divided by sqrt(N).
77
*
78
* We have to do addition and subtraction of the integer inputs, which
79
* is no problem, and multiplication by fractional constants, which is
80
* a problem to do in integer arithmetic. We multiply all the constants
81
* by CONST_SCALE and convert them to integer constants (thus retaining
82
* CONST_BITS bits of precision in the constants). After doing a
83
* multiplication we have to divide the product by CONST_SCALE, with proper
84
* rounding, to produce the correct output. This division can be done
85
* cheaply as a right shift of CONST_BITS bits. We postpone shifting
86
* as long as possible so that partial sums can be added together with
87
* full fractional precision.
88
*
89
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
90
* they are represented to better-than-integral precision. These outputs
91
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
92
* with the recommended scaling. (To scale up 12-bit sample data further, an
93
* intermediate INT32 array would be needed.)
94
*
95
* To avoid overflow of the 32-bit intermediate results in pass 2, we must
96
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
97
* shows that the values given below are the most effective.
98
*/
99
100
#if BITS_IN_JSAMPLE == 8
101
#define CONST_BITS 13
102
#define PASS1_BITS 2
103
#else
104
#define CONST_BITS 13
105
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
106
#endif
107
108
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
109
* causing a lot of useless floating-point operations at run time.
110
* To get around this we use the following pre-calculated constants.
111
* If you change CONST_BITS you may want to add appropriate values.
112
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
113
*/
114
115
#if CONST_BITS == 13
116
#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
117
#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
118
#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
119
#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
120
#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
121
#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
122
#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
123
#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
124
#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
125
#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
126
#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
127
#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
128
#else
129
#define FIX_0_298631336 FIX(0.298631336)
130
#define FIX_0_390180644 FIX(0.390180644)
131
#define FIX_0_541196100 FIX(0.541196100)
132
#define FIX_0_765366865 FIX(0.765366865)
133
#define FIX_0_899976223 FIX(0.899976223)
134
#define FIX_1_175875602 FIX(1.175875602)
135
#define FIX_1_501321110 FIX(1.501321110)
136
#define FIX_1_847759065 FIX(1.847759065)
137
#define FIX_1_961570560 FIX(1.961570560)
138
#define FIX_2_053119869 FIX(2.053119869)
139
#define FIX_2_562915447 FIX(2.562915447)
140
#define FIX_3_072711026 FIX(3.072711026)
141
#endif
142
143
144
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
145
* For 8-bit samples with the recommended scaling, all the variable
146
* and constant values involved are no more than 16 bits wide, so a
147
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
148
* For 12-bit samples, a full 32-bit multiplication will be needed.
149
*/
150
151
#if BITS_IN_JSAMPLE == 8
152
#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
153
#else
154
#define MULTIPLY(var,const) ((var) * (const))
155
#endif
156
157
158
/* Dequantize a coefficient by multiplying it by the multiplier-table
159
* entry; produce an int result. In this module, both inputs and result
160
* are 16 bits or less, so either int or short multiply will work.
161
*/
162
163
#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
164
165
166
/*
167
* Perform dequantization and inverse DCT on one block of coefficients.
168
*
169
* cK represents sqrt(2) * cos(K*pi/16).
170
*/
171
172
GLOBAL(void)
173
jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
174
JCOEFPTR coef_block,
175
JSAMPARRAY output_buf, JDIMENSION output_col)
176
{
177
INT32 tmp0, tmp1, tmp2, tmp3;
178
INT32 tmp10, tmp11, tmp12, tmp13;
179
INT32 z1, z2, z3;
180
JCOEFPTR inptr;
181
ISLOW_MULT_TYPE * quantptr;
182
int * wsptr;
183
JSAMPROW outptr;
184
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
185
int ctr;
186
int workspace[DCTSIZE2]; /* buffers data between passes */
187
SHIFT_TEMPS
188
189
/* Pass 1: process columns from input, store into work array.
190
* Note results are scaled up by sqrt(8) compared to a true IDCT;
191
* furthermore, we scale the results by 2**PASS1_BITS.
192
*/
193
194
inptr = coef_block;
195
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
196
wsptr = workspace;
197
for (ctr = DCTSIZE; ctr > 0; ctr--) {
198
/* Due to quantization, we will usually find that many of the input
199
* coefficients are zero, especially the AC terms. We can exploit this
200
* by short-circuiting the IDCT calculation for any column in which all
201
* the AC terms are zero. In that case each output is equal to the
202
* DC coefficient (with scale factor as needed).
203
* With typical images and quantization tables, half or more of the
204
* column DCT calculations can be simplified this way.
205
*/
206
207
if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
208
inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
209
inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
210
inptr[DCTSIZE*7] == 0) {
211
/* AC terms all zero */
212
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
213
214
wsptr[DCTSIZE*0] = dcval;
215
wsptr[DCTSIZE*1] = dcval;
216
wsptr[DCTSIZE*2] = dcval;
217
wsptr[DCTSIZE*3] = dcval;
218
wsptr[DCTSIZE*4] = dcval;
219
wsptr[DCTSIZE*5] = dcval;
220
wsptr[DCTSIZE*6] = dcval;
221
wsptr[DCTSIZE*7] = dcval;
222
223
inptr++; /* advance pointers to next column */
224
quantptr++;
225
wsptr++;
226
continue;
227
}
228
229
/* Even part: reverse the even part of the forward DCT.
230
* The rotator is c(-6).
231
*/
232
233
z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
234
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
235
z2 <<= CONST_BITS;
236
z3 <<= CONST_BITS;
237
/* Add fudge factor here for final descale. */
238
z2 += ONE << (CONST_BITS-PASS1_BITS-1);
239
240
tmp0 = z2 + z3;
241
tmp1 = z2 - z3;
242
243
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
244
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
245
246
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
247
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
248
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
249
250
tmp10 = tmp0 + tmp2;
251
tmp13 = tmp0 - tmp2;
252
tmp11 = tmp1 + tmp3;
253
tmp12 = tmp1 - tmp3;
254
255
/* Odd part per figure 8; the matrix is unitary and hence its
256
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
257
*/
258
259
tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
260
tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
261
tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
262
tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
263
264
z2 = tmp0 + tmp2;
265
z3 = tmp1 + tmp3;
266
267
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
268
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
269
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
270
z2 += z1;
271
z3 += z1;
272
273
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
274
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
275
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
276
tmp0 += z1 + z2;
277
tmp3 += z1 + z3;
278
279
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
280
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
281
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
282
tmp1 += z1 + z3;
283
tmp2 += z1 + z2;
284
285
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
286
287
wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
288
wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
289
wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
290
wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
291
wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
292
wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
293
wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
294
wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
295
296
inptr++; /* advance pointers to next column */
297
quantptr++;
298
wsptr++;
299
}
300
301
/* Pass 2: process rows from work array, store into output array.
302
* Note that we must descale the results by a factor of 8 == 2**3,
303
* and also undo the PASS1_BITS scaling.
304
*/
305
306
wsptr = workspace;
307
for (ctr = 0; ctr < DCTSIZE; ctr++) {
308
outptr = output_buf[ctr] + output_col;
309
310
/* Add range center and fudge factor for final descale and range-limit. */
311
z2 = (INT32) wsptr[0] +
312
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
313
(ONE << (PASS1_BITS+2)));
314
315
/* Rows of zeroes can be exploited in the same way as we did with columns.
316
* However, the column calculation has created many nonzero AC terms, so
317
* the simplification applies less often (typically 5% to 10% of the time).
318
* On machines with very fast multiplication, it's possible that the
319
* test takes more time than it's worth. In that case this section
320
* may be commented out.
321
*/
322
323
#ifndef NO_ZERO_ROW_TEST
324
if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
325
wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
326
/* AC terms all zero */
327
JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
328
& RANGE_MASK];
329
330
outptr[0] = dcval;
331
outptr[1] = dcval;
332
outptr[2] = dcval;
333
outptr[3] = dcval;
334
outptr[4] = dcval;
335
outptr[5] = dcval;
336
outptr[6] = dcval;
337
outptr[7] = dcval;
338
339
wsptr += DCTSIZE; /* advance pointer to next row */
340
continue;
341
}
342
#endif
343
344
/* Even part: reverse the even part of the forward DCT.
345
* The rotator is c(-6).
346
*/
347
348
z3 = (INT32) wsptr[4];
349
350
tmp0 = (z2 + z3) << CONST_BITS;
351
tmp1 = (z2 - z3) << CONST_BITS;
352
353
z2 = (INT32) wsptr[2];
354
z3 = (INT32) wsptr[6];
355
356
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
357
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
358
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
359
360
tmp10 = tmp0 + tmp2;
361
tmp13 = tmp0 - tmp2;
362
tmp11 = tmp1 + tmp3;
363
tmp12 = tmp1 - tmp3;
364
365
/* Odd part per figure 8; the matrix is unitary and hence its
366
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
367
*/
368
369
tmp0 = (INT32) wsptr[7];
370
tmp1 = (INT32) wsptr[5];
371
tmp2 = (INT32) wsptr[3];
372
tmp3 = (INT32) wsptr[1];
373
374
z2 = tmp0 + tmp2;
375
z3 = tmp1 + tmp3;
376
377
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
378
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
379
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
380
z2 += z1;
381
z3 += z1;
382
383
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
384
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
385
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
386
tmp0 += z1 + z2;
387
tmp3 += z1 + z3;
388
389
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
390
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
391
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
392
tmp1 += z1 + z3;
393
tmp2 += z1 + z2;
394
395
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
396
397
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
398
CONST_BITS+PASS1_BITS+3)
399
& RANGE_MASK];
400
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
401
CONST_BITS+PASS1_BITS+3)
402
& RANGE_MASK];
403
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
404
CONST_BITS+PASS1_BITS+3)
405
& RANGE_MASK];
406
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
407
CONST_BITS+PASS1_BITS+3)
408
& RANGE_MASK];
409
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
410
CONST_BITS+PASS1_BITS+3)
411
& RANGE_MASK];
412
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
413
CONST_BITS+PASS1_BITS+3)
414
& RANGE_MASK];
415
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
416
CONST_BITS+PASS1_BITS+3)
417
& RANGE_MASK];
418
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
419
CONST_BITS+PASS1_BITS+3)
420
& RANGE_MASK];
421
422
wsptr += DCTSIZE; /* advance pointer to next row */
423
}
424
}
425
426
#ifdef IDCT_SCALING_SUPPORTED
427
428
429
/*
430
* Perform dequantization and inverse DCT on one block of coefficients,
431
* producing a 7x7 output block.
432
*
433
* Optimized algorithm with 12 multiplications in the 1-D kernel.
434
* cK represents sqrt(2) * cos(K*pi/14).
435
*/
436
437
GLOBAL(void)
438
jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
439
JCOEFPTR coef_block,
440
JSAMPARRAY output_buf, JDIMENSION output_col)
441
{
442
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
443
INT32 z1, z2, z3;
444
JCOEFPTR inptr;
445
ISLOW_MULT_TYPE * quantptr;
446
int * wsptr;
447
JSAMPROW outptr;
448
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
449
int ctr;
450
int workspace[7*7]; /* buffers data between passes */
451
SHIFT_TEMPS
452
453
/* Pass 1: process columns from input, store into work array. */
454
455
inptr = coef_block;
456
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
457
wsptr = workspace;
458
for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
459
/* Even part */
460
461
tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
462
tmp13 <<= CONST_BITS;
463
/* Add fudge factor here for final descale. */
464
tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
465
466
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
467
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
468
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
469
470
tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
471
tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
472
tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
473
tmp0 = z1 + z3;
474
z2 -= tmp0;
475
tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
476
tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
477
tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
478
tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
479
480
/* Odd part */
481
482
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
483
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
484
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
485
486
tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
487
tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
488
tmp0 = tmp1 - tmp2;
489
tmp1 += tmp2;
490
tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
491
tmp1 += tmp2;
492
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
493
tmp0 += z2;
494
tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
495
496
/* Final output stage */
497
498
wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
499
wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
500
wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
501
wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
502
wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
503
wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
504
wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
505
}
506
507
/* Pass 2: process 7 rows from work array, store into output array. */
508
509
wsptr = workspace;
510
for (ctr = 0; ctr < 7; ctr++) {
511
outptr = output_buf[ctr] + output_col;
512
513
/* Even part */
514
515
/* Add range center and fudge factor for final descale and range-limit. */
516
tmp13 = (INT32) wsptr[0] +
517
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
518
(ONE << (PASS1_BITS+2)));
519
tmp13 <<= CONST_BITS;
520
521
z1 = (INT32) wsptr[2];
522
z2 = (INT32) wsptr[4];
523
z3 = (INT32) wsptr[6];
524
525
tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
526
tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
527
tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
528
tmp0 = z1 + z3;
529
z2 -= tmp0;
530
tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
531
tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
532
tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
533
tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
534
535
/* Odd part */
536
537
z1 = (INT32) wsptr[1];
538
z2 = (INT32) wsptr[3];
539
z3 = (INT32) wsptr[5];
540
541
tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
542
tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
543
tmp0 = tmp1 - tmp2;
544
tmp1 += tmp2;
545
tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
546
tmp1 += tmp2;
547
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
548
tmp0 += z2;
549
tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
550
551
/* Final output stage */
552
553
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
554
CONST_BITS+PASS1_BITS+3)
555
& RANGE_MASK];
556
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
557
CONST_BITS+PASS1_BITS+3)
558
& RANGE_MASK];
559
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
560
CONST_BITS+PASS1_BITS+3)
561
& RANGE_MASK];
562
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
563
CONST_BITS+PASS1_BITS+3)
564
& RANGE_MASK];
565
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
566
CONST_BITS+PASS1_BITS+3)
567
& RANGE_MASK];
568
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
569
CONST_BITS+PASS1_BITS+3)
570
& RANGE_MASK];
571
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
572
CONST_BITS+PASS1_BITS+3)
573
& RANGE_MASK];
574
575
wsptr += 7; /* advance pointer to next row */
576
}
577
}
578
579
580
/*
581
* Perform dequantization and inverse DCT on one block of coefficients,
582
* producing a reduced-size 6x6 output block.
583
*
584
* Optimized algorithm with 3 multiplications in the 1-D kernel.
585
* cK represents sqrt(2) * cos(K*pi/12).
586
*/
587
588
GLOBAL(void)
589
jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
590
JCOEFPTR coef_block,
591
JSAMPARRAY output_buf, JDIMENSION output_col)
592
{
593
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
594
INT32 z1, z2, z3;
595
JCOEFPTR inptr;
596
ISLOW_MULT_TYPE * quantptr;
597
int * wsptr;
598
JSAMPROW outptr;
599
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
600
int ctr;
601
int workspace[6*6]; /* buffers data between passes */
602
SHIFT_TEMPS
603
604
/* Pass 1: process columns from input, store into work array. */
605
606
inptr = coef_block;
607
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
608
wsptr = workspace;
609
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
610
/* Even part */
611
612
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
613
tmp0 <<= CONST_BITS;
614
/* Add fudge factor here for final descale. */
615
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
616
tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
617
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
618
tmp1 = tmp0 + tmp10;
619
tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
620
tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
621
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
622
tmp10 = tmp1 + tmp0;
623
tmp12 = tmp1 - tmp0;
624
625
/* Odd part */
626
627
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
628
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
629
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
630
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
631
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
632
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
633
tmp1 = (z1 - z2 - z3) << PASS1_BITS;
634
635
/* Final output stage */
636
637
wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
638
wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
639
wsptr[6*1] = (int) (tmp11 + tmp1);
640
wsptr[6*4] = (int) (tmp11 - tmp1);
641
wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
642
wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
643
}
644
645
/* Pass 2: process 6 rows from work array, store into output array. */
646
647
wsptr = workspace;
648
for (ctr = 0; ctr < 6; ctr++) {
649
outptr = output_buf[ctr] + output_col;
650
651
/* Even part */
652
653
/* Add range center and fudge factor for final descale and range-limit. */
654
tmp0 = (INT32) wsptr[0] +
655
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
656
(ONE << (PASS1_BITS+2)));
657
tmp0 <<= CONST_BITS;
658
tmp2 = (INT32) wsptr[4];
659
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
660
tmp1 = tmp0 + tmp10;
661
tmp11 = tmp0 - tmp10 - tmp10;
662
tmp10 = (INT32) wsptr[2];
663
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
664
tmp10 = tmp1 + tmp0;
665
tmp12 = tmp1 - tmp0;
666
667
/* Odd part */
668
669
z1 = (INT32) wsptr[1];
670
z2 = (INT32) wsptr[3];
671
z3 = (INT32) wsptr[5];
672
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
673
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
674
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
675
tmp1 = (z1 - z2 - z3) << CONST_BITS;
676
677
/* Final output stage */
678
679
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
680
CONST_BITS+PASS1_BITS+3)
681
& RANGE_MASK];
682
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
683
CONST_BITS+PASS1_BITS+3)
684
& RANGE_MASK];
685
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
686
CONST_BITS+PASS1_BITS+3)
687
& RANGE_MASK];
688
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
689
CONST_BITS+PASS1_BITS+3)
690
& RANGE_MASK];
691
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
692
CONST_BITS+PASS1_BITS+3)
693
& RANGE_MASK];
694
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
695
CONST_BITS+PASS1_BITS+3)
696
& RANGE_MASK];
697
698
wsptr += 6; /* advance pointer to next row */
699
}
700
}
701
702
703
/*
704
* Perform dequantization and inverse DCT on one block of coefficients,
705
* producing a reduced-size 5x5 output block.
706
*
707
* Optimized algorithm with 5 multiplications in the 1-D kernel.
708
* cK represents sqrt(2) * cos(K*pi/10).
709
*/
710
711
GLOBAL(void)
712
jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
713
JCOEFPTR coef_block,
714
JSAMPARRAY output_buf, JDIMENSION output_col)
715
{
716
INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
717
INT32 z1, z2, z3;
718
JCOEFPTR inptr;
719
ISLOW_MULT_TYPE * quantptr;
720
int * wsptr;
721
JSAMPROW outptr;
722
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
723
int ctr;
724
int workspace[5*5]; /* buffers data between passes */
725
SHIFT_TEMPS
726
727
/* Pass 1: process columns from input, store into work array. */
728
729
inptr = coef_block;
730
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
731
wsptr = workspace;
732
for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
733
/* Even part */
734
735
tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
736
tmp12 <<= CONST_BITS;
737
/* Add fudge factor here for final descale. */
738
tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
739
tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
740
tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
741
z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
742
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
743
z3 = tmp12 + z2;
744
tmp10 = z3 + z1;
745
tmp11 = z3 - z1;
746
tmp12 -= z2 << 2;
747
748
/* Odd part */
749
750
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
751
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
752
753
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
754
tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
755
tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
756
757
/* Final output stage */
758
759
wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
760
wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
761
wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
762
wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
763
wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
764
}
765
766
/* Pass 2: process 5 rows from work array, store into output array. */
767
768
wsptr = workspace;
769
for (ctr = 0; ctr < 5; ctr++) {
770
outptr = output_buf[ctr] + output_col;
771
772
/* Even part */
773
774
/* Add range center and fudge factor for final descale and range-limit. */
775
tmp12 = (INT32) wsptr[0] +
776
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
777
(ONE << (PASS1_BITS+2)));
778
tmp12 <<= CONST_BITS;
779
tmp0 = (INT32) wsptr[2];
780
tmp1 = (INT32) wsptr[4];
781
z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
782
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
783
z3 = tmp12 + z2;
784
tmp10 = z3 + z1;
785
tmp11 = z3 - z1;
786
tmp12 -= z2 << 2;
787
788
/* Odd part */
789
790
z2 = (INT32) wsptr[1];
791
z3 = (INT32) wsptr[3];
792
793
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
794
tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
795
tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
796
797
/* Final output stage */
798
799
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
800
CONST_BITS+PASS1_BITS+3)
801
& RANGE_MASK];
802
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
803
CONST_BITS+PASS1_BITS+3)
804
& RANGE_MASK];
805
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
806
CONST_BITS+PASS1_BITS+3)
807
& RANGE_MASK];
808
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
809
CONST_BITS+PASS1_BITS+3)
810
& RANGE_MASK];
811
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
812
CONST_BITS+PASS1_BITS+3)
813
& RANGE_MASK];
814
815
wsptr += 5; /* advance pointer to next row */
816
}
817
}
818
819
820
/*
821
* Perform dequantization and inverse DCT on one block of coefficients,
822
* producing a reduced-size 4x4 output block.
823
*
824
* Optimized algorithm with 3 multiplications in the 1-D kernel.
825
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
826
*/
827
828
GLOBAL(void)
829
jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
830
JCOEFPTR coef_block,
831
JSAMPARRAY output_buf, JDIMENSION output_col)
832
{
833
INT32 tmp0, tmp2, tmp10, tmp12;
834
INT32 z1, z2, z3;
835
JCOEFPTR inptr;
836
ISLOW_MULT_TYPE * quantptr;
837
int * wsptr;
838
JSAMPROW outptr;
839
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
840
int ctr;
841
int workspace[4*4]; /* buffers data between passes */
842
SHIFT_TEMPS
843
844
/* Pass 1: process columns from input, store into work array. */
845
846
inptr = coef_block;
847
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
848
wsptr = workspace;
849
for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
850
/* Even part */
851
852
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
853
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
854
855
tmp10 = (tmp0 + tmp2) << PASS1_BITS;
856
tmp12 = (tmp0 - tmp2) << PASS1_BITS;
857
858
/* Odd part */
859
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
860
861
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
862
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
863
864
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
865
/* Add fudge factor here for final descale. */
866
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
867
tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
868
CONST_BITS-PASS1_BITS);
869
tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
870
CONST_BITS-PASS1_BITS);
871
872
/* Final output stage */
873
874
wsptr[4*0] = (int) (tmp10 + tmp0);
875
wsptr[4*3] = (int) (tmp10 - tmp0);
876
wsptr[4*1] = (int) (tmp12 + tmp2);
877
wsptr[4*2] = (int) (tmp12 - tmp2);
878
}
879
880
/* Pass 2: process 4 rows from work array, store into output array. */
881
882
wsptr = workspace;
883
for (ctr = 0; ctr < 4; ctr++) {
884
outptr = output_buf[ctr] + output_col;
885
886
/* Even part */
887
888
/* Add range center and fudge factor for final descale and range-limit. */
889
tmp0 = (INT32) wsptr[0] +
890
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
891
(ONE << (PASS1_BITS+2)));
892
tmp2 = (INT32) wsptr[2];
893
894
tmp10 = (tmp0 + tmp2) << CONST_BITS;
895
tmp12 = (tmp0 - tmp2) << CONST_BITS;
896
897
/* Odd part */
898
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
899
900
z2 = (INT32) wsptr[1];
901
z3 = (INT32) wsptr[3];
902
903
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
904
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
905
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
906
907
/* Final output stage */
908
909
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
910
CONST_BITS+PASS1_BITS+3)
911
& RANGE_MASK];
912
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
913
CONST_BITS+PASS1_BITS+3)
914
& RANGE_MASK];
915
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
916
CONST_BITS+PASS1_BITS+3)
917
& RANGE_MASK];
918
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
919
CONST_BITS+PASS1_BITS+3)
920
& RANGE_MASK];
921
922
wsptr += 4; /* advance pointer to next row */
923
}
924
}
925
926
927
/*
928
* Perform dequantization and inverse DCT on one block of coefficients,
929
* producing a reduced-size 3x3 output block.
930
*
931
* Optimized algorithm with 2 multiplications in the 1-D kernel.
932
* cK represents sqrt(2) * cos(K*pi/6).
933
*/
934
935
GLOBAL(void)
936
jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
937
JCOEFPTR coef_block,
938
JSAMPARRAY output_buf, JDIMENSION output_col)
939
{
940
INT32 tmp0, tmp2, tmp10, tmp12;
941
JCOEFPTR inptr;
942
ISLOW_MULT_TYPE * quantptr;
943
int * wsptr;
944
JSAMPROW outptr;
945
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
946
int ctr;
947
int workspace[3*3]; /* buffers data between passes */
948
SHIFT_TEMPS
949
950
/* Pass 1: process columns from input, store into work array. */
951
952
inptr = coef_block;
953
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
954
wsptr = workspace;
955
for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
956
/* Even part */
957
958
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
959
tmp0 <<= CONST_BITS;
960
/* Add fudge factor here for final descale. */
961
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
962
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
963
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
964
tmp10 = tmp0 + tmp12;
965
tmp2 = tmp0 - tmp12 - tmp12;
966
967
/* Odd part */
968
969
tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
970
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
971
972
/* Final output stage */
973
974
wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
975
wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
976
wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
977
}
978
979
/* Pass 2: process 3 rows from work array, store into output array. */
980
981
wsptr = workspace;
982
for (ctr = 0; ctr < 3; ctr++) {
983
outptr = output_buf[ctr] + output_col;
984
985
/* Even part */
986
987
/* Add range center and fudge factor for final descale and range-limit. */
988
tmp0 = (INT32) wsptr[0] +
989
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
990
(ONE << (PASS1_BITS+2)));
991
tmp0 <<= CONST_BITS;
992
tmp2 = (INT32) wsptr[2];
993
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
994
tmp10 = tmp0 + tmp12;
995
tmp2 = tmp0 - tmp12 - tmp12;
996
997
/* Odd part */
998
999
tmp12 = (INT32) wsptr[1];
1000
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
1001
1002
/* Final output stage */
1003
1004
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1005
CONST_BITS+PASS1_BITS+3)
1006
& RANGE_MASK];
1007
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1008
CONST_BITS+PASS1_BITS+3)
1009
& RANGE_MASK];
1010
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
1011
CONST_BITS+PASS1_BITS+3)
1012
& RANGE_MASK];
1013
1014
wsptr += 3; /* advance pointer to next row */
1015
}
1016
}
1017
1018
1019
/*
1020
* Perform dequantization and inverse DCT on one block of coefficients,
1021
* producing a reduced-size 2x2 output block.
1022
*
1023
* Multiplication-less algorithm.
1024
*/
1025
1026
GLOBAL(void)
1027
jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1028
JCOEFPTR coef_block,
1029
JSAMPARRAY output_buf, JDIMENSION output_col)
1030
{
1031
DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1032
ISLOW_MULT_TYPE * quantptr;
1033
JSAMPROW outptr;
1034
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1035
ISHIFT_TEMPS
1036
1037
/* Pass 1: process columns from input. */
1038
1039
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1040
1041
/* Column 0 */
1042
tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
1043
tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
1044
/* Add range center and fudge factor for final descale and range-limit. */
1045
tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1046
1047
tmp0 = tmp4 + tmp5;
1048
tmp2 = tmp4 - tmp5;
1049
1050
/* Column 1 */
1051
tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
1052
tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
1053
1054
tmp1 = tmp4 + tmp5;
1055
tmp3 = tmp4 - tmp5;
1056
1057
/* Pass 2: process 2 rows, store into output array. */
1058
1059
/* Row 0 */
1060
outptr = output_buf[0] + output_col;
1061
1062
outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
1063
outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
1064
1065
/* Row 1 */
1066
outptr = output_buf[1] + output_col;
1067
1068
outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
1069
outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
1070
}
1071
1072
1073
/*
1074
* Perform dequantization and inverse DCT on one block of coefficients,
1075
* producing a reduced-size 1x1 output block.
1076
*
1077
* We hardly need an inverse DCT routine for this: just take the
1078
* average pixel value, which is one-eighth of the DC coefficient.
1079
*/
1080
1081
GLOBAL(void)
1082
jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1083
JCOEFPTR coef_block,
1084
JSAMPARRAY output_buf, JDIMENSION output_col)
1085
{
1086
DCTELEM dcval;
1087
ISLOW_MULT_TYPE * quantptr;
1088
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1089
ISHIFT_TEMPS
1090
1091
/* 1x1 is trivial: just take the DC coefficient divided by 8. */
1092
1093
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1094
1095
dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
1096
/* Add range center and fudge factor for descale and range-limit. */
1097
dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
1098
1099
output_buf[0][output_col] =
1100
range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
1101
}
1102
1103
1104
/*
1105
* Perform dequantization and inverse DCT on one block of coefficients,
1106
* producing a 9x9 output block.
1107
*
1108
* Optimized algorithm with 10 multiplications in the 1-D kernel.
1109
* cK represents sqrt(2) * cos(K*pi/18).
1110
*/
1111
1112
GLOBAL(void)
1113
jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1114
JCOEFPTR coef_block,
1115
JSAMPARRAY output_buf, JDIMENSION output_col)
1116
{
1117
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
1118
INT32 z1, z2, z3, z4;
1119
JCOEFPTR inptr;
1120
ISLOW_MULT_TYPE * quantptr;
1121
int * wsptr;
1122
JSAMPROW outptr;
1123
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1124
int ctr;
1125
int workspace[8*9]; /* buffers data between passes */
1126
SHIFT_TEMPS
1127
1128
/* Pass 1: process columns from input, store into work array. */
1129
1130
inptr = coef_block;
1131
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1132
wsptr = workspace;
1133
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1134
/* Even part */
1135
1136
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1137
tmp0 <<= CONST_BITS;
1138
/* Add fudge factor here for final descale. */
1139
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
1140
1141
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1142
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1143
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1144
1145
tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1146
tmp1 = tmp0 + tmp3;
1147
tmp2 = tmp0 - tmp3 - tmp3;
1148
1149
tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1150
tmp11 = tmp2 + tmp0;
1151
tmp14 = tmp2 - tmp0 - tmp0;
1152
1153
tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1154
tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1155
tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1156
1157
tmp10 = tmp1 + tmp0 - tmp3;
1158
tmp12 = tmp1 - tmp0 + tmp2;
1159
tmp13 = tmp1 - tmp2 + tmp3;
1160
1161
/* Odd part */
1162
1163
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1164
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1165
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1166
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1167
1168
z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1169
1170
tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1171
tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1172
tmp0 = tmp2 + tmp3 - z2;
1173
tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1174
tmp2 += z2 - tmp1;
1175
tmp3 += z2 + tmp1;
1176
tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1177
1178
/* Final output stage */
1179
1180
wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
1181
wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
1182
wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
1183
wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
1184
wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
1185
wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
1186
wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
1187
wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
1188
wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
1189
}
1190
1191
/* Pass 2: process 9 rows from work array, store into output array. */
1192
1193
wsptr = workspace;
1194
for (ctr = 0; ctr < 9; ctr++) {
1195
outptr = output_buf[ctr] + output_col;
1196
1197
/* Even part */
1198
1199
/* Add range center and fudge factor for final descale and range-limit. */
1200
tmp0 = (INT32) wsptr[0] +
1201
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1202
(ONE << (PASS1_BITS+2)));
1203
tmp0 <<= CONST_BITS;
1204
1205
z1 = (INT32) wsptr[2];
1206
z2 = (INT32) wsptr[4];
1207
z3 = (INT32) wsptr[6];
1208
1209
tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */
1210
tmp1 = tmp0 + tmp3;
1211
tmp2 = tmp0 - tmp3 - tmp3;
1212
1213
tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
1214
tmp11 = tmp2 + tmp0;
1215
tmp14 = tmp2 - tmp0 - tmp0;
1216
1217
tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
1218
tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */
1219
tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */
1220
1221
tmp10 = tmp1 + tmp0 - tmp3;
1222
tmp12 = tmp1 - tmp0 + tmp2;
1223
tmp13 = tmp1 - tmp2 + tmp3;
1224
1225
/* Odd part */
1226
1227
z1 = (INT32) wsptr[1];
1228
z2 = (INT32) wsptr[3];
1229
z3 = (INT32) wsptr[5];
1230
z4 = (INT32) wsptr[7];
1231
1232
z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */
1233
1234
tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */
1235
tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */
1236
tmp0 = tmp2 + tmp3 - z2;
1237
tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */
1238
tmp2 += z2 - tmp1;
1239
tmp3 += z2 + tmp1;
1240
tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
1241
1242
/* Final output stage */
1243
1244
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
1245
CONST_BITS+PASS1_BITS+3)
1246
& RANGE_MASK];
1247
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
1248
CONST_BITS+PASS1_BITS+3)
1249
& RANGE_MASK];
1250
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
1251
CONST_BITS+PASS1_BITS+3)
1252
& RANGE_MASK];
1253
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
1254
CONST_BITS+PASS1_BITS+3)
1255
& RANGE_MASK];
1256
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
1257
CONST_BITS+PASS1_BITS+3)
1258
& RANGE_MASK];
1259
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
1260
CONST_BITS+PASS1_BITS+3)
1261
& RANGE_MASK];
1262
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
1263
CONST_BITS+PASS1_BITS+3)
1264
& RANGE_MASK];
1265
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
1266
CONST_BITS+PASS1_BITS+3)
1267
& RANGE_MASK];
1268
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
1269
CONST_BITS+PASS1_BITS+3)
1270
& RANGE_MASK];
1271
1272
wsptr += 8; /* advance pointer to next row */
1273
}
1274
}
1275
1276
1277
/*
1278
* Perform dequantization and inverse DCT on one block of coefficients,
1279
* producing a 10x10 output block.
1280
*
1281
* Optimized algorithm with 12 multiplications in the 1-D kernel.
1282
* cK represents sqrt(2) * cos(K*pi/20).
1283
*/
1284
1285
GLOBAL(void)
1286
jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1287
JCOEFPTR coef_block,
1288
JSAMPARRAY output_buf, JDIMENSION output_col)
1289
{
1290
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1291
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
1292
INT32 z1, z2, z3, z4, z5;
1293
JCOEFPTR inptr;
1294
ISLOW_MULT_TYPE * quantptr;
1295
int * wsptr;
1296
JSAMPROW outptr;
1297
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1298
int ctr;
1299
int workspace[8*10]; /* buffers data between passes */
1300
SHIFT_TEMPS
1301
1302
/* Pass 1: process columns from input, store into work array. */
1303
1304
inptr = coef_block;
1305
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1306
wsptr = workspace;
1307
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1308
/* Even part */
1309
1310
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1311
z3 <<= CONST_BITS;
1312
/* Add fudge factor here for final descale. */
1313
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1314
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1315
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1316
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1317
tmp10 = z3 + z1;
1318
tmp11 = z3 - z2;
1319
1320
tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
1321
CONST_BITS-PASS1_BITS);
1322
1323
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1324
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1325
1326
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1327
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1328
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1329
1330
tmp20 = tmp10 + tmp12;
1331
tmp24 = tmp10 - tmp12;
1332
tmp21 = tmp11 + tmp13;
1333
tmp23 = tmp11 - tmp13;
1334
1335
/* Odd part */
1336
1337
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1338
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1339
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1340
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1341
1342
tmp11 = z2 + z4;
1343
tmp13 = z2 - z4;
1344
1345
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1346
z5 = z3 << CONST_BITS;
1347
1348
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1349
z4 = z5 + tmp12;
1350
1351
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1352
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1353
1354
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1355
z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
1356
1357
tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
1358
1359
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1360
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1361
1362
/* Final output stage */
1363
1364
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1365
wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1366
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1367
wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1368
wsptr[8*2] = (int) (tmp22 + tmp12);
1369
wsptr[8*7] = (int) (tmp22 - tmp12);
1370
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1371
wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1372
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1373
wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1374
}
1375
1376
/* Pass 2: process 10 rows from work array, store into output array. */
1377
1378
wsptr = workspace;
1379
for (ctr = 0; ctr < 10; ctr++) {
1380
outptr = output_buf[ctr] + output_col;
1381
1382
/* Even part */
1383
1384
/* Add range center and fudge factor for final descale and range-limit. */
1385
z3 = (INT32) wsptr[0] +
1386
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1387
(ONE << (PASS1_BITS+2)));
1388
z3 <<= CONST_BITS;
1389
z4 = (INT32) wsptr[4];
1390
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
1391
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
1392
tmp10 = z3 + z1;
1393
tmp11 = z3 - z2;
1394
1395
tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
1396
1397
z2 = (INT32) wsptr[2];
1398
z3 = (INT32) wsptr[6];
1399
1400
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
1401
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
1402
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
1403
1404
tmp20 = tmp10 + tmp12;
1405
tmp24 = tmp10 - tmp12;
1406
tmp21 = tmp11 + tmp13;
1407
tmp23 = tmp11 - tmp13;
1408
1409
/* Odd part */
1410
1411
z1 = (INT32) wsptr[1];
1412
z2 = (INT32) wsptr[3];
1413
z3 = (INT32) wsptr[5];
1414
z3 <<= CONST_BITS;
1415
z4 = (INT32) wsptr[7];
1416
1417
tmp11 = z2 + z4;
1418
tmp13 = z2 - z4;
1419
1420
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
1421
1422
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
1423
z4 = z3 + tmp12;
1424
1425
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
1426
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
1427
1428
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
1429
z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
1430
1431
tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
1432
1433
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
1434
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
1435
1436
/* Final output stage */
1437
1438
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1439
CONST_BITS+PASS1_BITS+3)
1440
& RANGE_MASK];
1441
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1442
CONST_BITS+PASS1_BITS+3)
1443
& RANGE_MASK];
1444
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1445
CONST_BITS+PASS1_BITS+3)
1446
& RANGE_MASK];
1447
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1448
CONST_BITS+PASS1_BITS+3)
1449
& RANGE_MASK];
1450
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1451
CONST_BITS+PASS1_BITS+3)
1452
& RANGE_MASK];
1453
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1454
CONST_BITS+PASS1_BITS+3)
1455
& RANGE_MASK];
1456
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1457
CONST_BITS+PASS1_BITS+3)
1458
& RANGE_MASK];
1459
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1460
CONST_BITS+PASS1_BITS+3)
1461
& RANGE_MASK];
1462
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1463
CONST_BITS+PASS1_BITS+3)
1464
& RANGE_MASK];
1465
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1466
CONST_BITS+PASS1_BITS+3)
1467
& RANGE_MASK];
1468
1469
wsptr += 8; /* advance pointer to next row */
1470
}
1471
}
1472
1473
1474
/*
1475
* Perform dequantization and inverse DCT on one block of coefficients,
1476
* producing a 11x11 output block.
1477
*
1478
* Optimized algorithm with 24 multiplications in the 1-D kernel.
1479
* cK represents sqrt(2) * cos(K*pi/22).
1480
*/
1481
1482
GLOBAL(void)
1483
jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1484
JCOEFPTR coef_block,
1485
JSAMPARRAY output_buf, JDIMENSION output_col)
1486
{
1487
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1488
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1489
INT32 z1, z2, z3, z4;
1490
JCOEFPTR inptr;
1491
ISLOW_MULT_TYPE * quantptr;
1492
int * wsptr;
1493
JSAMPROW outptr;
1494
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1495
int ctr;
1496
int workspace[8*11]; /* buffers data between passes */
1497
SHIFT_TEMPS
1498
1499
/* Pass 1: process columns from input, store into work array. */
1500
1501
inptr = coef_block;
1502
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1503
wsptr = workspace;
1504
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1505
/* Even part */
1506
1507
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1508
tmp10 <<= CONST_BITS;
1509
/* Add fudge factor here for final descale. */
1510
tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
1511
1512
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1513
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1514
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1515
1516
tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1517
tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1518
z4 = z1 + z3;
1519
tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1520
z4 -= z2;
1521
tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1522
tmp21 = tmp20 + tmp23 + tmp25 -
1523
MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1524
tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1525
tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1526
tmp24 += tmp25;
1527
tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1528
tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1529
MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1530
tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1531
1532
/* Odd part */
1533
1534
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1535
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1536
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1537
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1538
1539
tmp11 = z1 + z2;
1540
tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1541
tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1542
tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1543
tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1544
tmp10 = tmp11 + tmp12 + tmp13 -
1545
MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1546
z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1547
tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1548
tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1549
z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1550
tmp11 += z1;
1551
tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1552
tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1553
MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1554
MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1555
1556
/* Final output stage */
1557
1558
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1559
wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1560
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1561
wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1562
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1563
wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1564
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1565
wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1566
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1567
wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1568
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
1569
}
1570
1571
/* Pass 2: process 11 rows from work array, store into output array. */
1572
1573
wsptr = workspace;
1574
for (ctr = 0; ctr < 11; ctr++) {
1575
outptr = output_buf[ctr] + output_col;
1576
1577
/* Even part */
1578
1579
/* Add range center and fudge factor for final descale and range-limit. */
1580
tmp10 = (INT32) wsptr[0] +
1581
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1582
(ONE << (PASS1_BITS+2)));
1583
tmp10 <<= CONST_BITS;
1584
1585
z1 = (INT32) wsptr[2];
1586
z2 = (INT32) wsptr[4];
1587
z3 = (INT32) wsptr[6];
1588
1589
tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */
1590
tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */
1591
z4 = z1 + z3;
1592
tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */
1593
z4 -= z2;
1594
tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */
1595
tmp21 = tmp20 + tmp23 + tmp25 -
1596
MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */
1597
tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
1598
tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
1599
tmp24 += tmp25;
1600
tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */
1601
tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */
1602
MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */
1603
tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */
1604
1605
/* Odd part */
1606
1607
z1 = (INT32) wsptr[1];
1608
z2 = (INT32) wsptr[3];
1609
z3 = (INT32) wsptr[5];
1610
z4 = (INT32) wsptr[7];
1611
1612
tmp11 = z1 + z2;
1613
tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
1614
tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */
1615
tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */
1616
tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
1617
tmp10 = tmp11 + tmp12 + tmp13 -
1618
MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */
1619
z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
1620
tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */
1621
tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */
1622
z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */
1623
tmp11 += z1;
1624
tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */
1625
tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */
1626
MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */
1627
MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */
1628
1629
/* Final output stage */
1630
1631
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1632
CONST_BITS+PASS1_BITS+3)
1633
& RANGE_MASK];
1634
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1635
CONST_BITS+PASS1_BITS+3)
1636
& RANGE_MASK];
1637
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1638
CONST_BITS+PASS1_BITS+3)
1639
& RANGE_MASK];
1640
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1641
CONST_BITS+PASS1_BITS+3)
1642
& RANGE_MASK];
1643
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1644
CONST_BITS+PASS1_BITS+3)
1645
& RANGE_MASK];
1646
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1647
CONST_BITS+PASS1_BITS+3)
1648
& RANGE_MASK];
1649
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1650
CONST_BITS+PASS1_BITS+3)
1651
& RANGE_MASK];
1652
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1653
CONST_BITS+PASS1_BITS+3)
1654
& RANGE_MASK];
1655
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1656
CONST_BITS+PASS1_BITS+3)
1657
& RANGE_MASK];
1658
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1659
CONST_BITS+PASS1_BITS+3)
1660
& RANGE_MASK];
1661
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25,
1662
CONST_BITS+PASS1_BITS+3)
1663
& RANGE_MASK];
1664
1665
wsptr += 8; /* advance pointer to next row */
1666
}
1667
}
1668
1669
1670
/*
1671
* Perform dequantization and inverse DCT on one block of coefficients,
1672
* producing a 12x12 output block.
1673
*
1674
* Optimized algorithm with 15 multiplications in the 1-D kernel.
1675
* cK represents sqrt(2) * cos(K*pi/24).
1676
*/
1677
1678
GLOBAL(void)
1679
jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1680
JCOEFPTR coef_block,
1681
JSAMPARRAY output_buf, JDIMENSION output_col)
1682
{
1683
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1684
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
1685
INT32 z1, z2, z3, z4;
1686
JCOEFPTR inptr;
1687
ISLOW_MULT_TYPE * quantptr;
1688
int * wsptr;
1689
JSAMPROW outptr;
1690
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1691
int ctr;
1692
int workspace[8*12]; /* buffers data between passes */
1693
SHIFT_TEMPS
1694
1695
/* Pass 1: process columns from input, store into work array. */
1696
1697
inptr = coef_block;
1698
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1699
wsptr = workspace;
1700
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1701
/* Even part */
1702
1703
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1704
z3 <<= CONST_BITS;
1705
/* Add fudge factor here for final descale. */
1706
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
1707
1708
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1709
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1710
1711
tmp10 = z3 + z4;
1712
tmp11 = z3 - z4;
1713
1714
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1715
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1716
z1 <<= CONST_BITS;
1717
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1718
z2 <<= CONST_BITS;
1719
1720
tmp12 = z1 - z2;
1721
1722
tmp21 = z3 + tmp12;
1723
tmp24 = z3 - tmp12;
1724
1725
tmp12 = z4 + z2;
1726
1727
tmp20 = tmp10 + tmp12;
1728
tmp25 = tmp10 - tmp12;
1729
1730
tmp12 = z4 - z1 - z2;
1731
1732
tmp22 = tmp11 + tmp12;
1733
tmp23 = tmp11 - tmp12;
1734
1735
/* Odd part */
1736
1737
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1738
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1739
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1740
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1741
1742
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1743
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1744
1745
tmp10 = z1 + z3;
1746
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1747
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1748
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1749
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1750
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1751
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1752
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1753
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1754
1755
z1 -= z4;
1756
z2 -= z3;
1757
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1758
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1759
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1760
1761
/* Final output stage */
1762
1763
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1764
wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1765
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1766
wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1767
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1768
wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1769
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1770
wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1771
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1772
wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1773
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1774
wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1775
}
1776
1777
/* Pass 2: process 12 rows from work array, store into output array. */
1778
1779
wsptr = workspace;
1780
for (ctr = 0; ctr < 12; ctr++) {
1781
outptr = output_buf[ctr] + output_col;
1782
1783
/* Even part */
1784
1785
/* Add range center and fudge factor for final descale and range-limit. */
1786
z3 = (INT32) wsptr[0] +
1787
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
1788
(ONE << (PASS1_BITS+2)));
1789
z3 <<= CONST_BITS;
1790
1791
z4 = (INT32) wsptr[4];
1792
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
1793
1794
tmp10 = z3 + z4;
1795
tmp11 = z3 - z4;
1796
1797
z1 = (INT32) wsptr[2];
1798
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
1799
z1 <<= CONST_BITS;
1800
z2 = (INT32) wsptr[6];
1801
z2 <<= CONST_BITS;
1802
1803
tmp12 = z1 - z2;
1804
1805
tmp21 = z3 + tmp12;
1806
tmp24 = z3 - tmp12;
1807
1808
tmp12 = z4 + z2;
1809
1810
tmp20 = tmp10 + tmp12;
1811
tmp25 = tmp10 - tmp12;
1812
1813
tmp12 = z4 - z1 - z2;
1814
1815
tmp22 = tmp11 + tmp12;
1816
tmp23 = tmp11 - tmp12;
1817
1818
/* Odd part */
1819
1820
z1 = (INT32) wsptr[1];
1821
z2 = (INT32) wsptr[3];
1822
z3 = (INT32) wsptr[5];
1823
z4 = (INT32) wsptr[7];
1824
1825
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
1826
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
1827
1828
tmp10 = z1 + z3;
1829
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
1830
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
1831
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
1832
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
1833
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
1834
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
1835
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
1836
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
1837
1838
z1 -= z4;
1839
z2 -= z3;
1840
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
1841
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
1842
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
1843
1844
/* Final output stage */
1845
1846
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
1847
CONST_BITS+PASS1_BITS+3)
1848
& RANGE_MASK];
1849
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
1850
CONST_BITS+PASS1_BITS+3)
1851
& RANGE_MASK];
1852
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
1853
CONST_BITS+PASS1_BITS+3)
1854
& RANGE_MASK];
1855
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
1856
CONST_BITS+PASS1_BITS+3)
1857
& RANGE_MASK];
1858
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
1859
CONST_BITS+PASS1_BITS+3)
1860
& RANGE_MASK];
1861
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
1862
CONST_BITS+PASS1_BITS+3)
1863
& RANGE_MASK];
1864
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
1865
CONST_BITS+PASS1_BITS+3)
1866
& RANGE_MASK];
1867
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
1868
CONST_BITS+PASS1_BITS+3)
1869
& RANGE_MASK];
1870
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
1871
CONST_BITS+PASS1_BITS+3)
1872
& RANGE_MASK];
1873
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
1874
CONST_BITS+PASS1_BITS+3)
1875
& RANGE_MASK];
1876
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
1877
CONST_BITS+PASS1_BITS+3)
1878
& RANGE_MASK];
1879
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
1880
CONST_BITS+PASS1_BITS+3)
1881
& RANGE_MASK];
1882
1883
wsptr += 8; /* advance pointer to next row */
1884
}
1885
}
1886
1887
1888
/*
1889
* Perform dequantization and inverse DCT on one block of coefficients,
1890
* producing a 13x13 output block.
1891
*
1892
* Optimized algorithm with 29 multiplications in the 1-D kernel.
1893
* cK represents sqrt(2) * cos(K*pi/26).
1894
*/
1895
1896
GLOBAL(void)
1897
jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
1898
JCOEFPTR coef_block,
1899
JSAMPARRAY output_buf, JDIMENSION output_col)
1900
{
1901
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1902
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
1903
INT32 z1, z2, z3, z4;
1904
JCOEFPTR inptr;
1905
ISLOW_MULT_TYPE * quantptr;
1906
int * wsptr;
1907
JSAMPROW outptr;
1908
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
1909
int ctr;
1910
int workspace[8*13]; /* buffers data between passes */
1911
SHIFT_TEMPS
1912
1913
/* Pass 1: process columns from input, store into work array. */
1914
1915
inptr = coef_block;
1916
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
1917
wsptr = workspace;
1918
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
1919
/* Even part */
1920
1921
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
1922
z1 <<= CONST_BITS;
1923
/* Add fudge factor here for final descale. */
1924
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
1925
1926
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
1927
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
1928
z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
1929
1930
tmp10 = z3 + z4;
1931
tmp11 = z3 - z4;
1932
1933
tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
1934
tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
1935
1936
tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
1937
tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
1938
1939
tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
1940
tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
1941
1942
tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
1943
tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
1944
1945
tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
1946
tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
1947
1948
tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
1949
tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
1950
1951
tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
1952
1953
/* Odd part */
1954
1955
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
1956
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
1957
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
1958
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
1959
1960
tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
1961
tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
1962
tmp15 = z1 + z4;
1963
tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
1964
tmp10 = tmp11 + tmp12 + tmp13 -
1965
MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
1966
tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
1967
tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
1968
tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
1969
tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
1970
tmp11 += tmp14;
1971
tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
1972
tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
1973
tmp12 += tmp14;
1974
tmp13 += tmp14;
1975
tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
1976
tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
1977
MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
1978
z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
1979
tmp14 += z1;
1980
tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
1981
MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
1982
1983
/* Final output stage */
1984
1985
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
1986
wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
1987
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
1988
wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
1989
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
1990
wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
1991
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
1992
wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
1993
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
1994
wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
1995
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
1996
wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
1997
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
1998
}
1999
2000
/* Pass 2: process 13 rows from work array, store into output array. */
2001
2002
wsptr = workspace;
2003
for (ctr = 0; ctr < 13; ctr++) {
2004
outptr = output_buf[ctr] + output_col;
2005
2006
/* Even part */
2007
2008
/* Add range center and fudge factor for final descale and range-limit. */
2009
z1 = (INT32) wsptr[0] +
2010
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2011
(ONE << (PASS1_BITS+2)));
2012
z1 <<= CONST_BITS;
2013
2014
z2 = (INT32) wsptr[2];
2015
z3 = (INT32) wsptr[4];
2016
z4 = (INT32) wsptr[6];
2017
2018
tmp10 = z3 + z4;
2019
tmp11 = z3 - z4;
2020
2021
tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */
2022
tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */
2023
2024
tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */
2025
tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */
2026
2027
tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */
2028
tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */
2029
2030
tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */
2031
tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
2032
2033
tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */
2034
tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */
2035
2036
tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
2037
tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
2038
2039
tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */
2040
2041
/* Odd part */
2042
2043
z1 = (INT32) wsptr[1];
2044
z2 = (INT32) wsptr[3];
2045
z3 = (INT32) wsptr[5];
2046
z4 = (INT32) wsptr[7];
2047
2048
tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */
2049
tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */
2050
tmp15 = z1 + z4;
2051
tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */
2052
tmp10 = tmp11 + tmp12 + tmp13 -
2053
MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */
2054
tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */
2055
tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
2056
tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
2057
tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */
2058
tmp11 += tmp14;
2059
tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
2060
tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */
2061
tmp12 += tmp14;
2062
tmp13 += tmp14;
2063
tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */
2064
tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
2065
MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */
2066
z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */
2067
tmp14 += z1;
2068
tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */
2069
MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */
2070
2071
/* Final output stage */
2072
2073
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2074
CONST_BITS+PASS1_BITS+3)
2075
& RANGE_MASK];
2076
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2077
CONST_BITS+PASS1_BITS+3)
2078
& RANGE_MASK];
2079
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2080
CONST_BITS+PASS1_BITS+3)
2081
& RANGE_MASK];
2082
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2083
CONST_BITS+PASS1_BITS+3)
2084
& RANGE_MASK];
2085
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2086
CONST_BITS+PASS1_BITS+3)
2087
& RANGE_MASK];
2088
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2089
CONST_BITS+PASS1_BITS+3)
2090
& RANGE_MASK];
2091
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2092
CONST_BITS+PASS1_BITS+3)
2093
& RANGE_MASK];
2094
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2095
CONST_BITS+PASS1_BITS+3)
2096
& RANGE_MASK];
2097
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2098
CONST_BITS+PASS1_BITS+3)
2099
& RANGE_MASK];
2100
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2101
CONST_BITS+PASS1_BITS+3)
2102
& RANGE_MASK];
2103
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2104
CONST_BITS+PASS1_BITS+3)
2105
& RANGE_MASK];
2106
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2107
CONST_BITS+PASS1_BITS+3)
2108
& RANGE_MASK];
2109
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26,
2110
CONST_BITS+PASS1_BITS+3)
2111
& RANGE_MASK];
2112
2113
wsptr += 8; /* advance pointer to next row */
2114
}
2115
}
2116
2117
2118
/*
2119
* Perform dequantization and inverse DCT on one block of coefficients,
2120
* producing a 14x14 output block.
2121
*
2122
* Optimized algorithm with 20 multiplications in the 1-D kernel.
2123
* cK represents sqrt(2) * cos(K*pi/28).
2124
*/
2125
2126
GLOBAL(void)
2127
jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2128
JCOEFPTR coef_block,
2129
JSAMPARRAY output_buf, JDIMENSION output_col)
2130
{
2131
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2132
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
2133
INT32 z1, z2, z3, z4;
2134
JCOEFPTR inptr;
2135
ISLOW_MULT_TYPE * quantptr;
2136
int * wsptr;
2137
JSAMPROW outptr;
2138
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2139
int ctr;
2140
int workspace[8*14]; /* buffers data between passes */
2141
SHIFT_TEMPS
2142
2143
/* Pass 1: process columns from input, store into work array. */
2144
2145
inptr = coef_block;
2146
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2147
wsptr = workspace;
2148
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2149
/* Even part */
2150
2151
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2152
z1 <<= CONST_BITS;
2153
/* Add fudge factor here for final descale. */
2154
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2155
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2156
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2157
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2158
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2159
2160
tmp10 = z1 + z2;
2161
tmp11 = z1 + z3;
2162
tmp12 = z1 - z4;
2163
2164
tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
2165
CONST_BITS-PASS1_BITS);
2166
2167
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2168
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2169
2170
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2171
2172
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2173
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2174
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2175
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2176
2177
tmp20 = tmp10 + tmp13;
2178
tmp26 = tmp10 - tmp13;
2179
tmp21 = tmp11 + tmp14;
2180
tmp25 = tmp11 - tmp14;
2181
tmp22 = tmp12 + tmp15;
2182
tmp24 = tmp12 - tmp15;
2183
2184
/* Odd part */
2185
2186
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2187
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2188
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2189
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2190
tmp13 = z4 << CONST_BITS;
2191
2192
tmp14 = z1 + z3;
2193
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2194
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2195
tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2196
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2197
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2198
z1 -= z2;
2199
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
2200
tmp16 += tmp15;
2201
z1 += z4;
2202
z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
2203
tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2204
tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2205
z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2206
tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2207
tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2208
2209
tmp13 = (z1 - z3) << PASS1_BITS;
2210
2211
/* Final output stage */
2212
2213
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2214
wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2215
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2216
wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2217
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2218
wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2219
wsptr[8*3] = (int) (tmp23 + tmp13);
2220
wsptr[8*10] = (int) (tmp23 - tmp13);
2221
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2222
wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2223
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2224
wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2225
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2226
wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2227
}
2228
2229
/* Pass 2: process 14 rows from work array, store into output array. */
2230
2231
wsptr = workspace;
2232
for (ctr = 0; ctr < 14; ctr++) {
2233
outptr = output_buf[ctr] + output_col;
2234
2235
/* Even part */
2236
2237
/* Add range center and fudge factor for final descale and range-limit. */
2238
z1 = (INT32) wsptr[0] +
2239
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2240
(ONE << (PASS1_BITS+2)));
2241
z1 <<= CONST_BITS;
2242
z4 = (INT32) wsptr[4];
2243
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
2244
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
2245
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
2246
2247
tmp10 = z1 + z2;
2248
tmp11 = z1 + z3;
2249
tmp12 = z1 - z4;
2250
2251
tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
2252
2253
z1 = (INT32) wsptr[2];
2254
z2 = (INT32) wsptr[6];
2255
2256
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
2257
2258
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
2259
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
2260
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
2261
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
2262
2263
tmp20 = tmp10 + tmp13;
2264
tmp26 = tmp10 - tmp13;
2265
tmp21 = tmp11 + tmp14;
2266
tmp25 = tmp11 - tmp14;
2267
tmp22 = tmp12 + tmp15;
2268
tmp24 = tmp12 - tmp15;
2269
2270
/* Odd part */
2271
2272
z1 = (INT32) wsptr[1];
2273
z2 = (INT32) wsptr[3];
2274
z3 = (INT32) wsptr[5];
2275
z4 = (INT32) wsptr[7];
2276
z4 <<= CONST_BITS;
2277
2278
tmp14 = z1 + z3;
2279
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
2280
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
2281
tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
2282
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
2283
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
2284
z1 -= z2;
2285
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
2286
tmp16 += tmp15;
2287
tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
2288
tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
2289
tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
2290
tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
2291
tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
2292
tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
2293
2294
tmp13 = ((z1 - z3) << CONST_BITS) + z4;
2295
2296
/* Final output stage */
2297
2298
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2299
CONST_BITS+PASS1_BITS+3)
2300
& RANGE_MASK];
2301
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2302
CONST_BITS+PASS1_BITS+3)
2303
& RANGE_MASK];
2304
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2305
CONST_BITS+PASS1_BITS+3)
2306
& RANGE_MASK];
2307
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2308
CONST_BITS+PASS1_BITS+3)
2309
& RANGE_MASK];
2310
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2311
CONST_BITS+PASS1_BITS+3)
2312
& RANGE_MASK];
2313
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2314
CONST_BITS+PASS1_BITS+3)
2315
& RANGE_MASK];
2316
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2317
CONST_BITS+PASS1_BITS+3)
2318
& RANGE_MASK];
2319
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2320
CONST_BITS+PASS1_BITS+3)
2321
& RANGE_MASK];
2322
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2323
CONST_BITS+PASS1_BITS+3)
2324
& RANGE_MASK];
2325
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2326
CONST_BITS+PASS1_BITS+3)
2327
& RANGE_MASK];
2328
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2329
CONST_BITS+PASS1_BITS+3)
2330
& RANGE_MASK];
2331
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2332
CONST_BITS+PASS1_BITS+3)
2333
& RANGE_MASK];
2334
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2335
CONST_BITS+PASS1_BITS+3)
2336
& RANGE_MASK];
2337
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2338
CONST_BITS+PASS1_BITS+3)
2339
& RANGE_MASK];
2340
2341
wsptr += 8; /* advance pointer to next row */
2342
}
2343
}
2344
2345
2346
/*
2347
* Perform dequantization and inverse DCT on one block of coefficients,
2348
* producing a 15x15 output block.
2349
*
2350
* Optimized algorithm with 22 multiplications in the 1-D kernel.
2351
* cK represents sqrt(2) * cos(K*pi/30).
2352
*/
2353
2354
GLOBAL(void)
2355
jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2356
JCOEFPTR coef_block,
2357
JSAMPARRAY output_buf, JDIMENSION output_col)
2358
{
2359
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2360
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2361
INT32 z1, z2, z3, z4;
2362
JCOEFPTR inptr;
2363
ISLOW_MULT_TYPE * quantptr;
2364
int * wsptr;
2365
JSAMPROW outptr;
2366
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2367
int ctr;
2368
int workspace[8*15]; /* buffers data between passes */
2369
SHIFT_TEMPS
2370
2371
/* Pass 1: process columns from input, store into work array. */
2372
2373
inptr = coef_block;
2374
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2375
wsptr = workspace;
2376
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2377
/* Even part */
2378
2379
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2380
z1 <<= CONST_BITS;
2381
/* Add fudge factor here for final descale. */
2382
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
2383
2384
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2385
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2386
z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2387
2388
tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2389
tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2390
2391
tmp12 = z1 - tmp10;
2392
tmp13 = z1 + tmp11;
2393
z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2394
2395
z4 = z2 - z3;
2396
z3 += z2;
2397
tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2398
tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2399
z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2400
2401
tmp20 = tmp13 + tmp10 + tmp11;
2402
tmp23 = tmp12 - tmp10 + tmp11 + z2;
2403
2404
tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2405
tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2406
2407
tmp25 = tmp13 - tmp10 - tmp11;
2408
tmp26 = tmp12 + tmp10 - tmp11 - z2;
2409
2410
tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2411
tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2412
2413
tmp21 = tmp12 + tmp10 + tmp11;
2414
tmp24 = tmp13 - tmp10 + tmp11;
2415
tmp11 += tmp11;
2416
tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2417
tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2418
2419
/* Odd part */
2420
2421
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2422
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2423
z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2424
z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2425
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2426
2427
tmp13 = z2 - z4;
2428
tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2429
tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2430
tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2431
2432
tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2433
tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2434
z2 = z1 - z4;
2435
tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2436
2437
tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2438
tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2439
tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2440
z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2441
tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2442
tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2443
2444
/* Final output stage */
2445
2446
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
2447
wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
2448
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
2449
wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
2450
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
2451
wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
2452
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
2453
wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
2454
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
2455
wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
2456
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
2457
wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
2458
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
2459
wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
2460
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
2461
}
2462
2463
/* Pass 2: process 15 rows from work array, store into output array. */
2464
2465
wsptr = workspace;
2466
for (ctr = 0; ctr < 15; ctr++) {
2467
outptr = output_buf[ctr] + output_col;
2468
2469
/* Even part */
2470
2471
/* Add range center and fudge factor for final descale and range-limit. */
2472
z1 = (INT32) wsptr[0] +
2473
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2474
(ONE << (PASS1_BITS+2)));
2475
z1 <<= CONST_BITS;
2476
2477
z2 = (INT32) wsptr[2];
2478
z3 = (INT32) wsptr[4];
2479
z4 = (INT32) wsptr[6];
2480
2481
tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
2482
tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
2483
2484
tmp12 = z1 - tmp10;
2485
tmp13 = z1 + tmp11;
2486
z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */
2487
2488
z4 = z2 - z3;
2489
z3 += z2;
2490
tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
2491
tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
2492
z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */
2493
2494
tmp20 = tmp13 + tmp10 + tmp11;
2495
tmp23 = tmp12 - tmp10 + tmp11 + z2;
2496
2497
tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
2498
tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
2499
2500
tmp25 = tmp13 - tmp10 - tmp11;
2501
tmp26 = tmp12 + tmp10 - tmp11 - z2;
2502
2503
tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
2504
tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
2505
2506
tmp21 = tmp12 + tmp10 + tmp11;
2507
tmp24 = tmp13 - tmp10 + tmp11;
2508
tmp11 += tmp11;
2509
tmp22 = z1 + tmp11; /* c10 = c6-c12 */
2510
tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */
2511
2512
/* Odd part */
2513
2514
z1 = (INT32) wsptr[1];
2515
z2 = (INT32) wsptr[3];
2516
z4 = (INT32) wsptr[5];
2517
z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */
2518
z4 = (INT32) wsptr[7];
2519
2520
tmp13 = z2 - z4;
2521
tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */
2522
tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */
2523
tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */
2524
2525
tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */
2526
tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */
2527
z2 = z1 - z4;
2528
tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */
2529
2530
tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
2531
tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
2532
tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */
2533
z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */
2534
tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */
2535
tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */
2536
2537
/* Final output stage */
2538
2539
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
2540
CONST_BITS+PASS1_BITS+3)
2541
& RANGE_MASK];
2542
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
2543
CONST_BITS+PASS1_BITS+3)
2544
& RANGE_MASK];
2545
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
2546
CONST_BITS+PASS1_BITS+3)
2547
& RANGE_MASK];
2548
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
2549
CONST_BITS+PASS1_BITS+3)
2550
& RANGE_MASK];
2551
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
2552
CONST_BITS+PASS1_BITS+3)
2553
& RANGE_MASK];
2554
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
2555
CONST_BITS+PASS1_BITS+3)
2556
& RANGE_MASK];
2557
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
2558
CONST_BITS+PASS1_BITS+3)
2559
& RANGE_MASK];
2560
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
2561
CONST_BITS+PASS1_BITS+3)
2562
& RANGE_MASK];
2563
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
2564
CONST_BITS+PASS1_BITS+3)
2565
& RANGE_MASK];
2566
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
2567
CONST_BITS+PASS1_BITS+3)
2568
& RANGE_MASK];
2569
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
2570
CONST_BITS+PASS1_BITS+3)
2571
& RANGE_MASK];
2572
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
2573
CONST_BITS+PASS1_BITS+3)
2574
& RANGE_MASK];
2575
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
2576
CONST_BITS+PASS1_BITS+3)
2577
& RANGE_MASK];
2578
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
2579
CONST_BITS+PASS1_BITS+3)
2580
& RANGE_MASK];
2581
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27,
2582
CONST_BITS+PASS1_BITS+3)
2583
& RANGE_MASK];
2584
2585
wsptr += 8; /* advance pointer to next row */
2586
}
2587
}
2588
2589
2590
/*
2591
* Perform dequantization and inverse DCT on one block of coefficients,
2592
* producing a 16x16 output block.
2593
*
2594
* Optimized algorithm with 28 multiplications in the 1-D kernel.
2595
* cK represents sqrt(2) * cos(K*pi/32).
2596
*/
2597
2598
GLOBAL(void)
2599
jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2600
JCOEFPTR coef_block,
2601
JSAMPARRAY output_buf, JDIMENSION output_col)
2602
{
2603
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2604
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2605
INT32 z1, z2, z3, z4;
2606
JCOEFPTR inptr;
2607
ISLOW_MULT_TYPE * quantptr;
2608
int * wsptr;
2609
JSAMPROW outptr;
2610
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2611
int ctr;
2612
int workspace[8*16]; /* buffers data between passes */
2613
SHIFT_TEMPS
2614
2615
/* Pass 1: process columns from input, store into work array. */
2616
2617
inptr = coef_block;
2618
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2619
wsptr = workspace;
2620
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
2621
/* Even part */
2622
2623
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2624
tmp0 <<= CONST_BITS;
2625
/* Add fudge factor here for final descale. */
2626
tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
2627
2628
z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2629
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2630
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2631
2632
tmp10 = tmp0 + tmp1;
2633
tmp11 = tmp0 - tmp1;
2634
tmp12 = tmp0 + tmp2;
2635
tmp13 = tmp0 - tmp2;
2636
2637
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2638
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2639
z3 = z1 - z2;
2640
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2641
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2642
2643
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2644
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2645
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2646
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2647
2648
tmp20 = tmp10 + tmp0;
2649
tmp27 = tmp10 - tmp0;
2650
tmp21 = tmp12 + tmp1;
2651
tmp26 = tmp12 - tmp1;
2652
tmp22 = tmp13 + tmp2;
2653
tmp25 = tmp13 - tmp2;
2654
tmp23 = tmp11 + tmp3;
2655
tmp24 = tmp11 - tmp3;
2656
2657
/* Odd part */
2658
2659
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2660
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2661
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2662
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2663
2664
tmp11 = z1 + z3;
2665
2666
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2667
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2668
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2669
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2670
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2671
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2672
tmp0 = tmp1 + tmp2 + tmp3 -
2673
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2674
tmp13 = tmp10 + tmp11 + tmp12 -
2675
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2676
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2677
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2678
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2679
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2680
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2681
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2682
z2 += z4;
2683
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2684
tmp1 += z1;
2685
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2686
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2687
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2688
tmp12 += z2;
2689
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2690
tmp2 += z2;
2691
tmp3 += z2;
2692
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2693
tmp10 += z2;
2694
tmp11 += z2;
2695
2696
/* Final output stage */
2697
2698
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
2699
wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
2700
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
2701
wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
2702
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
2703
wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
2704
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
2705
wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
2706
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
2707
wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
2708
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
2709
wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
2710
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
2711
wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
2712
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
2713
wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
2714
}
2715
2716
/* Pass 2: process 16 rows from work array, store into output array. */
2717
2718
wsptr = workspace;
2719
for (ctr = 0; ctr < 16; ctr++) {
2720
outptr = output_buf[ctr] + output_col;
2721
2722
/* Even part */
2723
2724
/* Add range center and fudge factor for final descale and range-limit. */
2725
tmp0 = (INT32) wsptr[0] +
2726
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
2727
(ONE << (PASS1_BITS+2)));
2728
tmp0 <<= CONST_BITS;
2729
2730
z1 = (INT32) wsptr[4];
2731
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
2732
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
2733
2734
tmp10 = tmp0 + tmp1;
2735
tmp11 = tmp0 - tmp1;
2736
tmp12 = tmp0 + tmp2;
2737
tmp13 = tmp0 - tmp2;
2738
2739
z1 = (INT32) wsptr[2];
2740
z2 = (INT32) wsptr[6];
2741
z3 = z1 - z2;
2742
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
2743
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
2744
2745
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
2746
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
2747
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
2748
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
2749
2750
tmp20 = tmp10 + tmp0;
2751
tmp27 = tmp10 - tmp0;
2752
tmp21 = tmp12 + tmp1;
2753
tmp26 = tmp12 - tmp1;
2754
tmp22 = tmp13 + tmp2;
2755
tmp25 = tmp13 - tmp2;
2756
tmp23 = tmp11 + tmp3;
2757
tmp24 = tmp11 - tmp3;
2758
2759
/* Odd part */
2760
2761
z1 = (INT32) wsptr[1];
2762
z2 = (INT32) wsptr[3];
2763
z3 = (INT32) wsptr[5];
2764
z4 = (INT32) wsptr[7];
2765
2766
tmp11 = z1 + z3;
2767
2768
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
2769
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
2770
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
2771
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
2772
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
2773
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
2774
tmp0 = tmp1 + tmp2 + tmp3 -
2775
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
2776
tmp13 = tmp10 + tmp11 + tmp12 -
2777
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
2778
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
2779
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
2780
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
2781
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
2782
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
2783
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
2784
z2 += z4;
2785
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
2786
tmp1 += z1;
2787
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
2788
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
2789
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
2790
tmp12 += z2;
2791
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
2792
tmp2 += z2;
2793
tmp3 += z2;
2794
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
2795
tmp10 += z2;
2796
tmp11 += z2;
2797
2798
/* Final output stage */
2799
2800
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
2801
CONST_BITS+PASS1_BITS+3)
2802
& RANGE_MASK];
2803
outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
2804
CONST_BITS+PASS1_BITS+3)
2805
& RANGE_MASK];
2806
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
2807
CONST_BITS+PASS1_BITS+3)
2808
& RANGE_MASK];
2809
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
2810
CONST_BITS+PASS1_BITS+3)
2811
& RANGE_MASK];
2812
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
2813
CONST_BITS+PASS1_BITS+3)
2814
& RANGE_MASK];
2815
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
2816
CONST_BITS+PASS1_BITS+3)
2817
& RANGE_MASK];
2818
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
2819
CONST_BITS+PASS1_BITS+3)
2820
& RANGE_MASK];
2821
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
2822
CONST_BITS+PASS1_BITS+3)
2823
& RANGE_MASK];
2824
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
2825
CONST_BITS+PASS1_BITS+3)
2826
& RANGE_MASK];
2827
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
2828
CONST_BITS+PASS1_BITS+3)
2829
& RANGE_MASK];
2830
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
2831
CONST_BITS+PASS1_BITS+3)
2832
& RANGE_MASK];
2833
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
2834
CONST_BITS+PASS1_BITS+3)
2835
& RANGE_MASK];
2836
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
2837
CONST_BITS+PASS1_BITS+3)
2838
& RANGE_MASK];
2839
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
2840
CONST_BITS+PASS1_BITS+3)
2841
& RANGE_MASK];
2842
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
2843
CONST_BITS+PASS1_BITS+3)
2844
& RANGE_MASK];
2845
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
2846
CONST_BITS+PASS1_BITS+3)
2847
& RANGE_MASK];
2848
2849
wsptr += 8; /* advance pointer to next row */
2850
}
2851
}
2852
2853
2854
/*
2855
* Perform dequantization and inverse DCT on one block of coefficients,
2856
* producing a 16x8 output block.
2857
*
2858
* 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
2859
*/
2860
2861
GLOBAL(void)
2862
jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
2863
JCOEFPTR coef_block,
2864
JSAMPARRAY output_buf, JDIMENSION output_col)
2865
{
2866
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
2867
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
2868
INT32 z1, z2, z3, z4;
2869
JCOEFPTR inptr;
2870
ISLOW_MULT_TYPE * quantptr;
2871
int * wsptr;
2872
JSAMPROW outptr;
2873
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
2874
int ctr;
2875
int workspace[8*8]; /* buffers data between passes */
2876
SHIFT_TEMPS
2877
2878
/* Pass 1: process columns from input, store into work array.
2879
* Note results are scaled up by sqrt(8) compared to a true IDCT;
2880
* furthermore, we scale the results by 2**PASS1_BITS.
2881
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2882
*/
2883
2884
inptr = coef_block;
2885
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
2886
wsptr = workspace;
2887
for (ctr = DCTSIZE; ctr > 0; ctr--) {
2888
/* Due to quantization, we will usually find that many of the input
2889
* coefficients are zero, especially the AC terms. We can exploit this
2890
* by short-circuiting the IDCT calculation for any column in which all
2891
* the AC terms are zero. In that case each output is equal to the
2892
* DC coefficient (with scale factor as needed).
2893
* With typical images and quantization tables, half or more of the
2894
* column DCT calculations can be simplified this way.
2895
*/
2896
2897
if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
2898
inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
2899
inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
2900
inptr[DCTSIZE*7] == 0) {
2901
/* AC terms all zero */
2902
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
2903
2904
wsptr[DCTSIZE*0] = dcval;
2905
wsptr[DCTSIZE*1] = dcval;
2906
wsptr[DCTSIZE*2] = dcval;
2907
wsptr[DCTSIZE*3] = dcval;
2908
wsptr[DCTSIZE*4] = dcval;
2909
wsptr[DCTSIZE*5] = dcval;
2910
wsptr[DCTSIZE*6] = dcval;
2911
wsptr[DCTSIZE*7] = dcval;
2912
2913
inptr++; /* advance pointers to next column */
2914
quantptr++;
2915
wsptr++;
2916
continue;
2917
}
2918
2919
/* Even part: reverse the even part of the forward DCT.
2920
* The rotator is c(-6).
2921
*/
2922
2923
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
2924
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
2925
2926
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
2927
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
2928
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
2929
2930
z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
2931
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
2932
z2 <<= CONST_BITS;
2933
z3 <<= CONST_BITS;
2934
/* Add fudge factor here for final descale. */
2935
z2 += ONE << (CONST_BITS-PASS1_BITS-1);
2936
2937
tmp0 = z2 + z3;
2938
tmp1 = z2 - z3;
2939
2940
tmp10 = tmp0 + tmp2;
2941
tmp13 = tmp0 - tmp2;
2942
tmp11 = tmp1 + tmp3;
2943
tmp12 = tmp1 - tmp3;
2944
2945
/* Odd part per figure 8; the matrix is unitary and hence its
2946
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
2947
*/
2948
2949
tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
2950
tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
2951
tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
2952
tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
2953
2954
z2 = tmp0 + tmp2;
2955
z3 = tmp1 + tmp3;
2956
2957
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
2958
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
2959
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
2960
z2 += z1;
2961
z3 += z1;
2962
2963
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2964
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
2965
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
2966
tmp0 += z1 + z2;
2967
tmp3 += z1 + z3;
2968
2969
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2970
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
2971
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
2972
tmp1 += z1 + z3;
2973
tmp2 += z1 + z2;
2974
2975
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
2976
2977
wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
2978
wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
2979
wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
2980
wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
2981
wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
2982
wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
2983
wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
2984
wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
2985
2986
inptr++; /* advance pointers to next column */
2987
quantptr++;
2988
wsptr++;
2989
}
2990
2991
/* Pass 2: process 8 rows from work array, store into output array.
2992
* 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2993
*/
2994
2995
wsptr = workspace;
2996
for (ctr = 0; ctr < 8; ctr++) {
2997
outptr = output_buf[ctr] + output_col;
2998
2999
/* Even part */
3000
3001
/* Add range center and fudge factor for final descale and range-limit. */
3002
tmp0 = (INT32) wsptr[0] +
3003
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3004
(ONE << (PASS1_BITS+2)));
3005
tmp0 <<= CONST_BITS;
3006
3007
z1 = (INT32) wsptr[4];
3008
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
3009
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
3010
3011
tmp10 = tmp0 + tmp1;
3012
tmp11 = tmp0 - tmp1;
3013
tmp12 = tmp0 + tmp2;
3014
tmp13 = tmp0 - tmp2;
3015
3016
z1 = (INT32) wsptr[2];
3017
z2 = (INT32) wsptr[6];
3018
z3 = z1 - z2;
3019
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
3020
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
3021
3022
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
3023
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
3024
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
3025
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
3026
3027
tmp20 = tmp10 + tmp0;
3028
tmp27 = tmp10 - tmp0;
3029
tmp21 = tmp12 + tmp1;
3030
tmp26 = tmp12 - tmp1;
3031
tmp22 = tmp13 + tmp2;
3032
tmp25 = tmp13 - tmp2;
3033
tmp23 = tmp11 + tmp3;
3034
tmp24 = tmp11 - tmp3;
3035
3036
/* Odd part */
3037
3038
z1 = (INT32) wsptr[1];
3039
z2 = (INT32) wsptr[3];
3040
z3 = (INT32) wsptr[5];
3041
z4 = (INT32) wsptr[7];
3042
3043
tmp11 = z1 + z3;
3044
3045
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
3046
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
3047
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
3048
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
3049
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
3050
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
3051
tmp0 = tmp1 + tmp2 + tmp3 -
3052
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
3053
tmp13 = tmp10 + tmp11 + tmp12 -
3054
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
3055
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
3056
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
3057
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
3058
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
3059
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
3060
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
3061
z2 += z4;
3062
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
3063
tmp1 += z1;
3064
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
3065
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
3066
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
3067
tmp12 += z2;
3068
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
3069
tmp2 += z2;
3070
tmp3 += z2;
3071
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
3072
tmp10 += z2;
3073
tmp11 += z2;
3074
3075
/* Final output stage */
3076
3077
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
3078
CONST_BITS+PASS1_BITS+3)
3079
& RANGE_MASK];
3080
outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
3081
CONST_BITS+PASS1_BITS+3)
3082
& RANGE_MASK];
3083
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
3084
CONST_BITS+PASS1_BITS+3)
3085
& RANGE_MASK];
3086
outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
3087
CONST_BITS+PASS1_BITS+3)
3088
& RANGE_MASK];
3089
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
3090
CONST_BITS+PASS1_BITS+3)
3091
& RANGE_MASK];
3092
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
3093
CONST_BITS+PASS1_BITS+3)
3094
& RANGE_MASK];
3095
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
3096
CONST_BITS+PASS1_BITS+3)
3097
& RANGE_MASK];
3098
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
3099
CONST_BITS+PASS1_BITS+3)
3100
& RANGE_MASK];
3101
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
3102
CONST_BITS+PASS1_BITS+3)
3103
& RANGE_MASK];
3104
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
3105
CONST_BITS+PASS1_BITS+3)
3106
& RANGE_MASK];
3107
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
3108
CONST_BITS+PASS1_BITS+3)
3109
& RANGE_MASK];
3110
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
3111
CONST_BITS+PASS1_BITS+3)
3112
& RANGE_MASK];
3113
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
3114
CONST_BITS+PASS1_BITS+3)
3115
& RANGE_MASK];
3116
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
3117
CONST_BITS+PASS1_BITS+3)
3118
& RANGE_MASK];
3119
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
3120
CONST_BITS+PASS1_BITS+3)
3121
& RANGE_MASK];
3122
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
3123
CONST_BITS+PASS1_BITS+3)
3124
& RANGE_MASK];
3125
3126
wsptr += 8; /* advance pointer to next row */
3127
}
3128
}
3129
3130
3131
/*
3132
* Perform dequantization and inverse DCT on one block of coefficients,
3133
* producing a 14x7 output block.
3134
*
3135
* 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
3136
*/
3137
3138
GLOBAL(void)
3139
jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3140
JCOEFPTR coef_block,
3141
JSAMPARRAY output_buf, JDIMENSION output_col)
3142
{
3143
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3144
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
3145
INT32 z1, z2, z3, z4;
3146
JCOEFPTR inptr;
3147
ISLOW_MULT_TYPE * quantptr;
3148
int * wsptr;
3149
JSAMPROW outptr;
3150
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3151
int ctr;
3152
int workspace[8*7]; /* buffers data between passes */
3153
SHIFT_TEMPS
3154
3155
/* Pass 1: process columns from input, store into work array.
3156
* 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3157
*/
3158
3159
inptr = coef_block;
3160
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3161
wsptr = workspace;
3162
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3163
/* Even part */
3164
3165
tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3166
tmp23 <<= CONST_BITS;
3167
/* Add fudge factor here for final descale. */
3168
tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
3169
3170
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3171
z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3172
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
3173
3174
tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
3175
tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
3176
tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
3177
tmp10 = z1 + z3;
3178
z2 -= tmp10;
3179
tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
3180
tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
3181
tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
3182
tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
3183
3184
/* Odd part */
3185
3186
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3187
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3188
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3189
3190
tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3191
tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3192
tmp10 = tmp11 - tmp12;
3193
tmp11 += tmp12;
3194
tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
3195
tmp11 += tmp12;
3196
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
3197
tmp10 += z2;
3198
tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
3199
3200
/* Final output stage */
3201
3202
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3203
wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3204
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
3205
wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
3206
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3207
wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3208
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
3209
}
3210
3211
/* Pass 2: process 7 rows from work array, store into output array.
3212
* 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
3213
*/
3214
3215
wsptr = workspace;
3216
for (ctr = 0; ctr < 7; ctr++) {
3217
outptr = output_buf[ctr] + output_col;
3218
3219
/* Even part */
3220
3221
/* Add range center and fudge factor for final descale and range-limit. */
3222
z1 = (INT32) wsptr[0] +
3223
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3224
(ONE << (PASS1_BITS+2)));
3225
z1 <<= CONST_BITS;
3226
z4 = (INT32) wsptr[4];
3227
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
3228
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
3229
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
3230
3231
tmp10 = z1 + z2;
3232
tmp11 = z1 + z3;
3233
tmp12 = z1 - z4;
3234
3235
tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */
3236
3237
z1 = (INT32) wsptr[2];
3238
z2 = (INT32) wsptr[6];
3239
3240
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
3241
3242
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
3243
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
3244
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
3245
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
3246
3247
tmp20 = tmp10 + tmp13;
3248
tmp26 = tmp10 - tmp13;
3249
tmp21 = tmp11 + tmp14;
3250
tmp25 = tmp11 - tmp14;
3251
tmp22 = tmp12 + tmp15;
3252
tmp24 = tmp12 - tmp15;
3253
3254
/* Odd part */
3255
3256
z1 = (INT32) wsptr[1];
3257
z2 = (INT32) wsptr[3];
3258
z3 = (INT32) wsptr[5];
3259
z4 = (INT32) wsptr[7];
3260
z4 <<= CONST_BITS;
3261
3262
tmp14 = z1 + z3;
3263
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
3264
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
3265
tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
3266
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
3267
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
3268
z1 -= z2;
3269
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */
3270
tmp16 += tmp15;
3271
tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */
3272
tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
3273
tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
3274
tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
3275
tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
3276
tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
3277
3278
tmp13 = ((z1 - z3) << CONST_BITS) + z4;
3279
3280
/* Final output stage */
3281
3282
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3283
CONST_BITS+PASS1_BITS+3)
3284
& RANGE_MASK];
3285
outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3286
CONST_BITS+PASS1_BITS+3)
3287
& RANGE_MASK];
3288
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3289
CONST_BITS+PASS1_BITS+3)
3290
& RANGE_MASK];
3291
outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3292
CONST_BITS+PASS1_BITS+3)
3293
& RANGE_MASK];
3294
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3295
CONST_BITS+PASS1_BITS+3)
3296
& RANGE_MASK];
3297
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3298
CONST_BITS+PASS1_BITS+3)
3299
& RANGE_MASK];
3300
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3301
CONST_BITS+PASS1_BITS+3)
3302
& RANGE_MASK];
3303
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3304
CONST_BITS+PASS1_BITS+3)
3305
& RANGE_MASK];
3306
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3307
CONST_BITS+PASS1_BITS+3)
3308
& RANGE_MASK];
3309
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3310
CONST_BITS+PASS1_BITS+3)
3311
& RANGE_MASK];
3312
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3313
CONST_BITS+PASS1_BITS+3)
3314
& RANGE_MASK];
3315
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3316
CONST_BITS+PASS1_BITS+3)
3317
& RANGE_MASK];
3318
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
3319
CONST_BITS+PASS1_BITS+3)
3320
& RANGE_MASK];
3321
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
3322
CONST_BITS+PASS1_BITS+3)
3323
& RANGE_MASK];
3324
3325
wsptr += 8; /* advance pointer to next row */
3326
}
3327
}
3328
3329
3330
/*
3331
* Perform dequantization and inverse DCT on one block of coefficients,
3332
* producing a 12x6 output block.
3333
*
3334
* 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
3335
*/
3336
3337
GLOBAL(void)
3338
jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3339
JCOEFPTR coef_block,
3340
JSAMPARRAY output_buf, JDIMENSION output_col)
3341
{
3342
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3343
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
3344
INT32 z1, z2, z3, z4;
3345
JCOEFPTR inptr;
3346
ISLOW_MULT_TYPE * quantptr;
3347
int * wsptr;
3348
JSAMPROW outptr;
3349
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3350
int ctr;
3351
int workspace[8*6]; /* buffers data between passes */
3352
SHIFT_TEMPS
3353
3354
/* Pass 1: process columns from input, store into work array.
3355
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3356
*/
3357
3358
inptr = coef_block;
3359
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3360
wsptr = workspace;
3361
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3362
/* Even part */
3363
3364
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3365
tmp10 <<= CONST_BITS;
3366
/* Add fudge factor here for final descale. */
3367
tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
3368
tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3369
tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
3370
tmp11 = tmp10 + tmp20;
3371
tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
3372
tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3373
tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
3374
tmp20 = tmp11 + tmp10;
3375
tmp22 = tmp11 - tmp10;
3376
3377
/* Odd part */
3378
3379
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3380
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3381
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
3382
tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3383
tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
3384
tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
3385
tmp11 = (z1 - z2 - z3) << PASS1_BITS;
3386
3387
/* Final output stage */
3388
3389
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
3390
wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
3391
wsptr[8*1] = (int) (tmp21 + tmp11);
3392
wsptr[8*4] = (int) (tmp21 - tmp11);
3393
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
3394
wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
3395
}
3396
3397
/* Pass 2: process 6 rows from work array, store into output array.
3398
* 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
3399
*/
3400
3401
wsptr = workspace;
3402
for (ctr = 0; ctr < 6; ctr++) {
3403
outptr = output_buf[ctr] + output_col;
3404
3405
/* Even part */
3406
3407
/* Add range center and fudge factor for final descale and range-limit. */
3408
z3 = (INT32) wsptr[0] +
3409
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3410
(ONE << (PASS1_BITS+2)));
3411
z3 <<= CONST_BITS;
3412
3413
z4 = (INT32) wsptr[4];
3414
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
3415
3416
tmp10 = z3 + z4;
3417
tmp11 = z3 - z4;
3418
3419
z1 = (INT32) wsptr[2];
3420
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
3421
z1 <<= CONST_BITS;
3422
z2 = (INT32) wsptr[6];
3423
z2 <<= CONST_BITS;
3424
3425
tmp12 = z1 - z2;
3426
3427
tmp21 = z3 + tmp12;
3428
tmp24 = z3 - tmp12;
3429
3430
tmp12 = z4 + z2;
3431
3432
tmp20 = tmp10 + tmp12;
3433
tmp25 = tmp10 - tmp12;
3434
3435
tmp12 = z4 - z1 - z2;
3436
3437
tmp22 = tmp11 + tmp12;
3438
tmp23 = tmp11 - tmp12;
3439
3440
/* Odd part */
3441
3442
z1 = (INT32) wsptr[1];
3443
z2 = (INT32) wsptr[3];
3444
z3 = (INT32) wsptr[5];
3445
z4 = (INT32) wsptr[7];
3446
3447
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
3448
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
3449
3450
tmp10 = z1 + z3;
3451
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
3452
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
3453
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
3454
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
3455
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
3456
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
3457
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
3458
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
3459
3460
z1 -= z4;
3461
z2 -= z3;
3462
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
3463
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
3464
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
3465
3466
/* Final output stage */
3467
3468
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3469
CONST_BITS+PASS1_BITS+3)
3470
& RANGE_MASK];
3471
outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3472
CONST_BITS+PASS1_BITS+3)
3473
& RANGE_MASK];
3474
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3475
CONST_BITS+PASS1_BITS+3)
3476
& RANGE_MASK];
3477
outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3478
CONST_BITS+PASS1_BITS+3)
3479
& RANGE_MASK];
3480
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3481
CONST_BITS+PASS1_BITS+3)
3482
& RANGE_MASK];
3483
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3484
CONST_BITS+PASS1_BITS+3)
3485
& RANGE_MASK];
3486
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3487
CONST_BITS+PASS1_BITS+3)
3488
& RANGE_MASK];
3489
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3490
CONST_BITS+PASS1_BITS+3)
3491
& RANGE_MASK];
3492
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3493
CONST_BITS+PASS1_BITS+3)
3494
& RANGE_MASK];
3495
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3496
CONST_BITS+PASS1_BITS+3)
3497
& RANGE_MASK];
3498
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
3499
CONST_BITS+PASS1_BITS+3)
3500
& RANGE_MASK];
3501
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
3502
CONST_BITS+PASS1_BITS+3)
3503
& RANGE_MASK];
3504
3505
wsptr += 8; /* advance pointer to next row */
3506
}
3507
}
3508
3509
3510
/*
3511
* Perform dequantization and inverse DCT on one block of coefficients,
3512
* producing a 10x5 output block.
3513
*
3514
* 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
3515
*/
3516
3517
GLOBAL(void)
3518
jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3519
JCOEFPTR coef_block,
3520
JSAMPARRAY output_buf, JDIMENSION output_col)
3521
{
3522
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3523
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
3524
INT32 z1, z2, z3, z4;
3525
JCOEFPTR inptr;
3526
ISLOW_MULT_TYPE * quantptr;
3527
int * wsptr;
3528
JSAMPROW outptr;
3529
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3530
int ctr;
3531
int workspace[8*5]; /* buffers data between passes */
3532
SHIFT_TEMPS
3533
3534
/* Pass 1: process columns from input, store into work array.
3535
* 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3536
*/
3537
3538
inptr = coef_block;
3539
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3540
wsptr = workspace;
3541
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3542
/* Even part */
3543
3544
tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3545
tmp12 <<= CONST_BITS;
3546
/* Add fudge factor here for final descale. */
3547
tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
3548
tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3549
tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
3550
z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
3551
z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
3552
z3 = tmp12 + z2;
3553
tmp10 = z3 + z1;
3554
tmp11 = z3 - z1;
3555
tmp12 -= z2 << 2;
3556
3557
/* Odd part */
3558
3559
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3560
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3561
3562
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
3563
tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
3564
tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
3565
3566
/* Final output stage */
3567
3568
wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
3569
wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
3570
wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
3571
wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
3572
wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
3573
}
3574
3575
/* Pass 2: process 5 rows from work array, store into output array.
3576
* 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3577
*/
3578
3579
wsptr = workspace;
3580
for (ctr = 0; ctr < 5; ctr++) {
3581
outptr = output_buf[ctr] + output_col;
3582
3583
/* Even part */
3584
3585
/* Add range center and fudge factor for final descale and range-limit. */
3586
z3 = (INT32) wsptr[0] +
3587
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3588
(ONE << (PASS1_BITS+2)));
3589
z3 <<= CONST_BITS;
3590
z4 = (INT32) wsptr[4];
3591
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
3592
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
3593
tmp10 = z3 + z1;
3594
tmp11 = z3 - z2;
3595
3596
tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */
3597
3598
z2 = (INT32) wsptr[2];
3599
z3 = (INT32) wsptr[6];
3600
3601
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
3602
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
3603
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
3604
3605
tmp20 = tmp10 + tmp12;
3606
tmp24 = tmp10 - tmp12;
3607
tmp21 = tmp11 + tmp13;
3608
tmp23 = tmp11 - tmp13;
3609
3610
/* Odd part */
3611
3612
z1 = (INT32) wsptr[1];
3613
z2 = (INT32) wsptr[3];
3614
z3 = (INT32) wsptr[5];
3615
z3 <<= CONST_BITS;
3616
z4 = (INT32) wsptr[7];
3617
3618
tmp11 = z2 + z4;
3619
tmp13 = z2 - z4;
3620
3621
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
3622
3623
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
3624
z4 = z3 + tmp12;
3625
3626
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
3627
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
3628
3629
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
3630
z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
3631
3632
tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
3633
3634
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
3635
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
3636
3637
/* Final output stage */
3638
3639
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
3640
CONST_BITS+PASS1_BITS+3)
3641
& RANGE_MASK];
3642
outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
3643
CONST_BITS+PASS1_BITS+3)
3644
& RANGE_MASK];
3645
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
3646
CONST_BITS+PASS1_BITS+3)
3647
& RANGE_MASK];
3648
outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
3649
CONST_BITS+PASS1_BITS+3)
3650
& RANGE_MASK];
3651
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
3652
CONST_BITS+PASS1_BITS+3)
3653
& RANGE_MASK];
3654
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
3655
CONST_BITS+PASS1_BITS+3)
3656
& RANGE_MASK];
3657
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
3658
CONST_BITS+PASS1_BITS+3)
3659
& RANGE_MASK];
3660
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
3661
CONST_BITS+PASS1_BITS+3)
3662
& RANGE_MASK];
3663
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
3664
CONST_BITS+PASS1_BITS+3)
3665
& RANGE_MASK];
3666
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
3667
CONST_BITS+PASS1_BITS+3)
3668
& RANGE_MASK];
3669
3670
wsptr += 8; /* advance pointer to next row */
3671
}
3672
}
3673
3674
3675
/*
3676
* Perform dequantization and inverse DCT on one block of coefficients,
3677
* producing a 8x4 output block.
3678
*
3679
* 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
3680
*/
3681
3682
GLOBAL(void)
3683
jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3684
JCOEFPTR coef_block,
3685
JSAMPARRAY output_buf, JDIMENSION output_col)
3686
{
3687
INT32 tmp0, tmp1, tmp2, tmp3;
3688
INT32 tmp10, tmp11, tmp12, tmp13;
3689
INT32 z1, z2, z3;
3690
JCOEFPTR inptr;
3691
ISLOW_MULT_TYPE * quantptr;
3692
int * wsptr;
3693
JSAMPROW outptr;
3694
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3695
int ctr;
3696
int workspace[8*4]; /* buffers data between passes */
3697
SHIFT_TEMPS
3698
3699
/* Pass 1: process columns from input, store into work array.
3700
* 4-point IDCT kernel,
3701
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3702
*/
3703
3704
inptr = coef_block;
3705
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3706
wsptr = workspace;
3707
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
3708
/* Even part */
3709
3710
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3711
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3712
3713
tmp10 = (tmp0 + tmp2) << PASS1_BITS;
3714
tmp12 = (tmp0 - tmp2) << PASS1_BITS;
3715
3716
/* Odd part */
3717
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
3718
3719
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3720
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
3721
3722
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3723
/* Add fudge factor here for final descale. */
3724
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3725
tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
3726
CONST_BITS-PASS1_BITS);
3727
tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
3728
CONST_BITS-PASS1_BITS);
3729
3730
/* Final output stage */
3731
3732
wsptr[8*0] = (int) (tmp10 + tmp0);
3733
wsptr[8*3] = (int) (tmp10 - tmp0);
3734
wsptr[8*1] = (int) (tmp12 + tmp2);
3735
wsptr[8*2] = (int) (tmp12 - tmp2);
3736
}
3737
3738
/* Pass 2: process rows from work array, store into output array.
3739
* Note that we must descale the results by a factor of 8 == 2**3,
3740
* and also undo the PASS1_BITS scaling.
3741
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3742
*/
3743
3744
wsptr = workspace;
3745
for (ctr = 0; ctr < 4; ctr++) {
3746
outptr = output_buf[ctr] + output_col;
3747
3748
/* Even part: reverse the even part of the forward DCT.
3749
* The rotator is c(-6).
3750
*/
3751
3752
/* Add range center and fudge factor for final descale and range-limit. */
3753
z2 = (INT32) wsptr[0] +
3754
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3755
(ONE << (PASS1_BITS+2)));
3756
z3 = (INT32) wsptr[4];
3757
3758
tmp0 = (z2 + z3) << CONST_BITS;
3759
tmp1 = (z2 - z3) << CONST_BITS;
3760
3761
z2 = (INT32) wsptr[2];
3762
z3 = (INT32) wsptr[6];
3763
3764
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3765
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3766
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3767
3768
tmp10 = tmp0 + tmp2;
3769
tmp13 = tmp0 - tmp2;
3770
tmp11 = tmp1 + tmp3;
3771
tmp12 = tmp1 - tmp3;
3772
3773
/* Odd part per figure 8; the matrix is unitary and hence its
3774
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
3775
*/
3776
3777
tmp0 = (INT32) wsptr[7];
3778
tmp1 = (INT32) wsptr[5];
3779
tmp2 = (INT32) wsptr[3];
3780
tmp3 = (INT32) wsptr[1];
3781
3782
z2 = tmp0 + tmp2;
3783
z3 = tmp1 + tmp3;
3784
3785
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
3786
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
3787
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
3788
z2 += z1;
3789
z3 += z1;
3790
3791
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3792
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
3793
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
3794
tmp0 += z1 + z2;
3795
tmp3 += z1 + z3;
3796
3797
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3798
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
3799
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
3800
tmp1 += z1 + z3;
3801
tmp2 += z1 + z2;
3802
3803
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
3804
3805
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
3806
CONST_BITS+PASS1_BITS+3)
3807
& RANGE_MASK];
3808
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
3809
CONST_BITS+PASS1_BITS+3)
3810
& RANGE_MASK];
3811
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
3812
CONST_BITS+PASS1_BITS+3)
3813
& RANGE_MASK];
3814
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
3815
CONST_BITS+PASS1_BITS+3)
3816
& RANGE_MASK];
3817
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
3818
CONST_BITS+PASS1_BITS+3)
3819
& RANGE_MASK];
3820
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
3821
CONST_BITS+PASS1_BITS+3)
3822
& RANGE_MASK];
3823
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
3824
CONST_BITS+PASS1_BITS+3)
3825
& RANGE_MASK];
3826
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
3827
CONST_BITS+PASS1_BITS+3)
3828
& RANGE_MASK];
3829
3830
wsptr += DCTSIZE; /* advance pointer to next row */
3831
}
3832
}
3833
3834
3835
/*
3836
* Perform dequantization and inverse DCT on one block of coefficients,
3837
* producing a reduced-size 6x3 output block.
3838
*
3839
* 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
3840
*/
3841
3842
GLOBAL(void)
3843
jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3844
JCOEFPTR coef_block,
3845
JSAMPARRAY output_buf, JDIMENSION output_col)
3846
{
3847
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
3848
INT32 z1, z2, z3;
3849
JCOEFPTR inptr;
3850
ISLOW_MULT_TYPE * quantptr;
3851
int * wsptr;
3852
JSAMPROW outptr;
3853
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3854
int ctr;
3855
int workspace[6*3]; /* buffers data between passes */
3856
SHIFT_TEMPS
3857
3858
/* Pass 1: process columns from input, store into work array.
3859
* 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
3860
*/
3861
3862
inptr = coef_block;
3863
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3864
wsptr = workspace;
3865
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
3866
/* Even part */
3867
3868
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3869
tmp0 <<= CONST_BITS;
3870
/* Add fudge factor here for final descale. */
3871
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
3872
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
3873
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
3874
tmp10 = tmp0 + tmp12;
3875
tmp2 = tmp0 - tmp12 - tmp12;
3876
3877
/* Odd part */
3878
3879
tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3880
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
3881
3882
/* Final output stage */
3883
3884
wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
3885
wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
3886
wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3887
}
3888
3889
/* Pass 2: process 3 rows from work array, store into output array.
3890
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3891
*/
3892
3893
wsptr = workspace;
3894
for (ctr = 0; ctr < 3; ctr++) {
3895
outptr = output_buf[ctr] + output_col;
3896
3897
/* Even part */
3898
3899
/* Add range center and fudge factor for final descale and range-limit. */
3900
tmp0 = (INT32) wsptr[0] +
3901
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
3902
(ONE << (PASS1_BITS+2)));
3903
tmp0 <<= CONST_BITS;
3904
tmp2 = (INT32) wsptr[4];
3905
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
3906
tmp1 = tmp0 + tmp10;
3907
tmp11 = tmp0 - tmp10 - tmp10;
3908
tmp10 = (INT32) wsptr[2];
3909
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
3910
tmp10 = tmp1 + tmp0;
3911
tmp12 = tmp1 - tmp0;
3912
3913
/* Odd part */
3914
3915
z1 = (INT32) wsptr[1];
3916
z2 = (INT32) wsptr[3];
3917
z3 = (INT32) wsptr[5];
3918
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
3919
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
3920
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
3921
tmp1 = (z1 - z2 - z3) << CONST_BITS;
3922
3923
/* Final output stage */
3924
3925
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
3926
CONST_BITS+PASS1_BITS+3)
3927
& RANGE_MASK];
3928
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
3929
CONST_BITS+PASS1_BITS+3)
3930
& RANGE_MASK];
3931
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
3932
CONST_BITS+PASS1_BITS+3)
3933
& RANGE_MASK];
3934
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
3935
CONST_BITS+PASS1_BITS+3)
3936
& RANGE_MASK];
3937
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
3938
CONST_BITS+PASS1_BITS+3)
3939
& RANGE_MASK];
3940
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
3941
CONST_BITS+PASS1_BITS+3)
3942
& RANGE_MASK];
3943
3944
wsptr += 6; /* advance pointer to next row */
3945
}
3946
}
3947
3948
3949
/*
3950
* Perform dequantization and inverse DCT on one block of coefficients,
3951
* producing a 4x2 output block.
3952
*
3953
* 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
3954
*/
3955
3956
GLOBAL(void)
3957
jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
3958
JCOEFPTR coef_block,
3959
JSAMPARRAY output_buf, JDIMENSION output_col)
3960
{
3961
INT32 tmp0, tmp2, tmp10, tmp12;
3962
INT32 z1, z2, z3;
3963
JCOEFPTR inptr;
3964
ISLOW_MULT_TYPE * quantptr;
3965
INT32 * wsptr;
3966
JSAMPROW outptr;
3967
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
3968
int ctr;
3969
INT32 workspace[4*2]; /* buffers data between passes */
3970
SHIFT_TEMPS
3971
3972
/* Pass 1: process columns from input, store into work array. */
3973
3974
inptr = coef_block;
3975
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
3976
wsptr = workspace;
3977
for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
3978
/* Even part */
3979
3980
tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
3981
3982
/* Odd part */
3983
3984
tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
3985
3986
/* Final output stage */
3987
3988
wsptr[4*0] = tmp10 + tmp0;
3989
wsptr[4*1] = tmp10 - tmp0;
3990
}
3991
3992
/* Pass 2: process 2 rows from work array, store into output array.
3993
* 4-point IDCT kernel,
3994
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
3995
*/
3996
3997
wsptr = workspace;
3998
for (ctr = 0; ctr < 2; ctr++) {
3999
outptr = output_buf[ctr] + output_col;
4000
4001
/* Even part */
4002
4003
/* Add range center and fudge factor for final descale and range-limit. */
4004
tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
4005
tmp2 = wsptr[2];
4006
4007
tmp10 = (tmp0 + tmp2) << CONST_BITS;
4008
tmp12 = (tmp0 - tmp2) << CONST_BITS;
4009
4010
/* Odd part */
4011
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
4012
4013
z2 = wsptr[1];
4014
z3 = wsptr[3];
4015
4016
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4017
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4018
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4019
4020
/* Final output stage */
4021
4022
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4023
CONST_BITS+3)
4024
& RANGE_MASK];
4025
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4026
CONST_BITS+3)
4027
& RANGE_MASK];
4028
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4029
CONST_BITS+3)
4030
& RANGE_MASK];
4031
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4032
CONST_BITS+3)
4033
& RANGE_MASK];
4034
4035
wsptr += 4; /* advance pointer to next row */
4036
}
4037
}
4038
4039
4040
/*
4041
* Perform dequantization and inverse DCT on one block of coefficients,
4042
* producing a 2x1 output block.
4043
*
4044
* 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
4045
*/
4046
4047
GLOBAL(void)
4048
jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4049
JCOEFPTR coef_block,
4050
JSAMPARRAY output_buf, JDIMENSION output_col)
4051
{
4052
DCTELEM tmp0, tmp1;
4053
ISLOW_MULT_TYPE * quantptr;
4054
JSAMPROW outptr;
4055
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4056
ISHIFT_TEMPS
4057
4058
/* Pass 1: empty. */
4059
4060
/* Pass 2: process 1 row from input, store into output array. */
4061
4062
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4063
outptr = output_buf[0] + output_col;
4064
4065
/* Even part */
4066
4067
tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
4068
/* Add range center and fudge factor for final descale and range-limit. */
4069
tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
4070
4071
/* Odd part */
4072
4073
tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
4074
4075
/* Final output stage */
4076
4077
outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
4078
outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
4079
}
4080
4081
4082
/*
4083
* Perform dequantization and inverse DCT on one block of coefficients,
4084
* producing a 8x16 output block.
4085
*
4086
* 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
4087
*/
4088
4089
GLOBAL(void)
4090
jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4091
JCOEFPTR coef_block,
4092
JSAMPARRAY output_buf, JDIMENSION output_col)
4093
{
4094
INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
4095
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
4096
INT32 z1, z2, z3, z4;
4097
JCOEFPTR inptr;
4098
ISLOW_MULT_TYPE * quantptr;
4099
int * wsptr;
4100
JSAMPROW outptr;
4101
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4102
int ctr;
4103
int workspace[8*16]; /* buffers data between passes */
4104
SHIFT_TEMPS
4105
4106
/* Pass 1: process columns from input, store into work array.
4107
* 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
4108
*/
4109
4110
inptr = coef_block;
4111
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4112
wsptr = workspace;
4113
for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
4114
/* Even part */
4115
4116
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4117
tmp0 <<= CONST_BITS;
4118
/* Add fudge factor here for final descale. */
4119
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
4120
4121
z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4122
tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */
4123
tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */
4124
4125
tmp10 = tmp0 + tmp1;
4126
tmp11 = tmp0 - tmp1;
4127
tmp12 = tmp0 + tmp2;
4128
tmp13 = tmp0 - tmp2;
4129
4130
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4131
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4132
z3 = z1 - z2;
4133
z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */
4134
z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */
4135
4136
tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */
4137
tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */
4138
tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
4139
tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
4140
4141
tmp20 = tmp10 + tmp0;
4142
tmp27 = tmp10 - tmp0;
4143
tmp21 = tmp12 + tmp1;
4144
tmp26 = tmp12 - tmp1;
4145
tmp22 = tmp13 + tmp2;
4146
tmp25 = tmp13 - tmp2;
4147
tmp23 = tmp11 + tmp3;
4148
tmp24 = tmp11 - tmp3;
4149
4150
/* Odd part */
4151
4152
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4153
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4154
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4155
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4156
4157
tmp11 = z1 + z3;
4158
4159
tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */
4160
tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */
4161
tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */
4162
tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */
4163
tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */
4164
tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */
4165
tmp0 = tmp1 + tmp2 + tmp3 -
4166
MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */
4167
tmp13 = tmp10 + tmp11 + tmp12 -
4168
MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */
4169
z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */
4170
tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */
4171
tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */
4172
z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */
4173
tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */
4174
tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */
4175
z2 += z4;
4176
z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */
4177
tmp1 += z1;
4178
tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */
4179
z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */
4180
tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */
4181
tmp12 += z2;
4182
z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
4183
tmp2 += z2;
4184
tmp3 += z2;
4185
z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */
4186
tmp10 += z2;
4187
tmp11 += z2;
4188
4189
/* Final output stage */
4190
4191
wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS);
4192
wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS);
4193
wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS);
4194
wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS);
4195
wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS);
4196
wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS);
4197
wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS);
4198
wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS);
4199
wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
4200
wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
4201
wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
4202
wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
4203
wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
4204
wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
4205
wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
4206
wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
4207
}
4208
4209
/* Pass 2: process rows from work array, store into output array.
4210
* Note that we must descale the results by a factor of 8 == 2**3,
4211
* and also undo the PASS1_BITS scaling.
4212
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4213
*/
4214
4215
wsptr = workspace;
4216
for (ctr = 0; ctr < 16; ctr++) {
4217
outptr = output_buf[ctr] + output_col;
4218
4219
/* Even part: reverse the even part of the forward DCT.
4220
* The rotator is c(-6).
4221
*/
4222
4223
/* Add range center and fudge factor for final descale and range-limit. */
4224
z2 = (INT32) wsptr[0] +
4225
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4226
(ONE << (PASS1_BITS+2)));
4227
z3 = (INT32) wsptr[4];
4228
4229
tmp0 = (z2 + z3) << CONST_BITS;
4230
tmp1 = (z2 - z3) << CONST_BITS;
4231
4232
z2 = (INT32) wsptr[2];
4233
z3 = (INT32) wsptr[6];
4234
4235
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4236
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4237
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4238
4239
tmp10 = tmp0 + tmp2;
4240
tmp13 = tmp0 - tmp2;
4241
tmp11 = tmp1 + tmp3;
4242
tmp12 = tmp1 - tmp3;
4243
4244
/* Odd part per figure 8; the matrix is unitary and hence its
4245
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4246
*/
4247
4248
tmp0 = (INT32) wsptr[7];
4249
tmp1 = (INT32) wsptr[5];
4250
tmp2 = (INT32) wsptr[3];
4251
tmp3 = (INT32) wsptr[1];
4252
4253
z2 = tmp0 + tmp2;
4254
z3 = tmp1 + tmp3;
4255
4256
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4257
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4258
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4259
z2 += z1;
4260
z3 += z1;
4261
4262
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4263
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4264
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4265
tmp0 += z1 + z2;
4266
tmp3 += z1 + z3;
4267
4268
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4269
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4270
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4271
tmp1 += z1 + z3;
4272
tmp2 += z1 + z2;
4273
4274
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4275
4276
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
4277
CONST_BITS+PASS1_BITS+3)
4278
& RANGE_MASK];
4279
outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
4280
CONST_BITS+PASS1_BITS+3)
4281
& RANGE_MASK];
4282
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
4283
CONST_BITS+PASS1_BITS+3)
4284
& RANGE_MASK];
4285
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
4286
CONST_BITS+PASS1_BITS+3)
4287
& RANGE_MASK];
4288
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
4289
CONST_BITS+PASS1_BITS+3)
4290
& RANGE_MASK];
4291
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
4292
CONST_BITS+PASS1_BITS+3)
4293
& RANGE_MASK];
4294
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
4295
CONST_BITS+PASS1_BITS+3)
4296
& RANGE_MASK];
4297
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
4298
CONST_BITS+PASS1_BITS+3)
4299
& RANGE_MASK];
4300
4301
wsptr += DCTSIZE; /* advance pointer to next row */
4302
}
4303
}
4304
4305
4306
/*
4307
* Perform dequantization and inverse DCT on one block of coefficients,
4308
* producing a 7x14 output block.
4309
*
4310
* 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
4311
*/
4312
4313
GLOBAL(void)
4314
jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4315
JCOEFPTR coef_block,
4316
JSAMPARRAY output_buf, JDIMENSION output_col)
4317
{
4318
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
4319
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
4320
INT32 z1, z2, z3, z4;
4321
JCOEFPTR inptr;
4322
ISLOW_MULT_TYPE * quantptr;
4323
int * wsptr;
4324
JSAMPROW outptr;
4325
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4326
int ctr;
4327
int workspace[7*14]; /* buffers data between passes */
4328
SHIFT_TEMPS
4329
4330
/* Pass 1: process columns from input, store into work array.
4331
* 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
4332
*/
4333
4334
inptr = coef_block;
4335
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4336
wsptr = workspace;
4337
for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
4338
/* Even part */
4339
4340
z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4341
z1 <<= CONST_BITS;
4342
/* Add fudge factor here for final descale. */
4343
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
4344
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4345
z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */
4346
z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */
4347
z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */
4348
4349
tmp10 = z1 + z2;
4350
tmp11 = z1 + z3;
4351
tmp12 = z1 - z4;
4352
4353
tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
4354
CONST_BITS-PASS1_BITS);
4355
4356
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4357
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4358
4359
z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */
4360
4361
tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
4362
tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
4363
tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */
4364
MULTIPLY(z2, FIX(1.378756276)); /* c2 */
4365
4366
tmp20 = tmp10 + tmp13;
4367
tmp26 = tmp10 - tmp13;
4368
tmp21 = tmp11 + tmp14;
4369
tmp25 = tmp11 - tmp14;
4370
tmp22 = tmp12 + tmp15;
4371
tmp24 = tmp12 - tmp15;
4372
4373
/* Odd part */
4374
4375
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4376
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4377
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4378
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4379
tmp13 = z4 << CONST_BITS;
4380
4381
tmp14 = z1 + z3;
4382
tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */
4383
tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */
4384
tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
4385
tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */
4386
tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */
4387
z1 -= z2;
4388
tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */
4389
tmp16 += tmp15;
4390
z1 += z4;
4391
z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
4392
tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */
4393
tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */
4394
z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */
4395
tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
4396
tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */
4397
4398
tmp13 = (z1 - z3) << PASS1_BITS;
4399
4400
/* Final output stage */
4401
4402
wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4403
wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4404
wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4405
wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4406
wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4407
wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4408
wsptr[7*3] = (int) (tmp23 + tmp13);
4409
wsptr[7*10] = (int) (tmp23 - tmp13);
4410
wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4411
wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4412
wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4413
wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4414
wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
4415
wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
4416
}
4417
4418
/* Pass 2: process 14 rows from work array, store into output array.
4419
* 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
4420
*/
4421
4422
wsptr = workspace;
4423
for (ctr = 0; ctr < 14; ctr++) {
4424
outptr = output_buf[ctr] + output_col;
4425
4426
/* Even part */
4427
4428
/* Add range center and fudge factor for final descale and range-limit. */
4429
tmp23 = (INT32) wsptr[0] +
4430
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4431
(ONE << (PASS1_BITS+2)));
4432
tmp23 <<= CONST_BITS;
4433
4434
z1 = (INT32) wsptr[2];
4435
z2 = (INT32) wsptr[4];
4436
z3 = (INT32) wsptr[6];
4437
4438
tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */
4439
tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */
4440
tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
4441
tmp10 = z1 + z3;
4442
z2 -= tmp10;
4443
tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
4444
tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */
4445
tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */
4446
tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */
4447
4448
/* Odd part */
4449
4450
z1 = (INT32) wsptr[1];
4451
z2 = (INT32) wsptr[3];
4452
z3 = (INT32) wsptr[5];
4453
4454
tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */
4455
tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */
4456
tmp10 = tmp11 - tmp12;
4457
tmp11 += tmp12;
4458
tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */
4459
tmp11 += tmp12;
4460
z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */
4461
tmp10 += z2;
4462
tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */
4463
4464
/* Final output stage */
4465
4466
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4467
CONST_BITS+PASS1_BITS+3)
4468
& RANGE_MASK];
4469
outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4470
CONST_BITS+PASS1_BITS+3)
4471
& RANGE_MASK];
4472
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4473
CONST_BITS+PASS1_BITS+3)
4474
& RANGE_MASK];
4475
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4476
CONST_BITS+PASS1_BITS+3)
4477
& RANGE_MASK];
4478
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4479
CONST_BITS+PASS1_BITS+3)
4480
& RANGE_MASK];
4481
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4482
CONST_BITS+PASS1_BITS+3)
4483
& RANGE_MASK];
4484
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
4485
CONST_BITS+PASS1_BITS+3)
4486
& RANGE_MASK];
4487
4488
wsptr += 7; /* advance pointer to next row */
4489
}
4490
}
4491
4492
4493
/*
4494
* Perform dequantization and inverse DCT on one block of coefficients,
4495
* producing a 6x12 output block.
4496
*
4497
* 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
4498
*/
4499
4500
GLOBAL(void)
4501
jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4502
JCOEFPTR coef_block,
4503
JSAMPARRAY output_buf, JDIMENSION output_col)
4504
{
4505
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
4506
INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
4507
INT32 z1, z2, z3, z4;
4508
JCOEFPTR inptr;
4509
ISLOW_MULT_TYPE * quantptr;
4510
int * wsptr;
4511
JSAMPROW outptr;
4512
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4513
int ctr;
4514
int workspace[6*12]; /* buffers data between passes */
4515
SHIFT_TEMPS
4516
4517
/* Pass 1: process columns from input, store into work array.
4518
* 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
4519
*/
4520
4521
inptr = coef_block;
4522
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4523
wsptr = workspace;
4524
for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
4525
/* Even part */
4526
4527
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4528
z3 <<= CONST_BITS;
4529
/* Add fudge factor here for final descale. */
4530
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4531
4532
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4533
z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
4534
4535
tmp10 = z3 + z4;
4536
tmp11 = z3 - z4;
4537
4538
z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4539
z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
4540
z1 <<= CONST_BITS;
4541
z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4542
z2 <<= CONST_BITS;
4543
4544
tmp12 = z1 - z2;
4545
4546
tmp21 = z3 + tmp12;
4547
tmp24 = z3 - tmp12;
4548
4549
tmp12 = z4 + z2;
4550
4551
tmp20 = tmp10 + tmp12;
4552
tmp25 = tmp10 - tmp12;
4553
4554
tmp12 = z4 - z1 - z2;
4555
4556
tmp22 = tmp11 + tmp12;
4557
tmp23 = tmp11 - tmp12;
4558
4559
/* Odd part */
4560
4561
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4562
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4563
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4564
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4565
4566
tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */
4567
tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */
4568
4569
tmp10 = z1 + z3;
4570
tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */
4571
tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */
4572
tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */
4573
tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */
4574
tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
4575
tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
4576
tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */
4577
MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */
4578
4579
z1 -= z4;
4580
z2 -= z3;
4581
z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */
4582
tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */
4583
tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */
4584
4585
/* Final output stage */
4586
4587
wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4588
wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4589
wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4590
wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4591
wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
4592
wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
4593
wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4594
wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4595
wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4596
wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4597
wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
4598
wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
4599
}
4600
4601
/* Pass 2: process 12 rows from work array, store into output array.
4602
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
4603
*/
4604
4605
wsptr = workspace;
4606
for (ctr = 0; ctr < 12; ctr++) {
4607
outptr = output_buf[ctr] + output_col;
4608
4609
/* Even part */
4610
4611
/* Add range center and fudge factor for final descale and range-limit. */
4612
tmp10 = (INT32) wsptr[0] +
4613
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4614
(ONE << (PASS1_BITS+2)));
4615
tmp10 <<= CONST_BITS;
4616
tmp12 = (INT32) wsptr[4];
4617
tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */
4618
tmp11 = tmp10 + tmp20;
4619
tmp21 = tmp10 - tmp20 - tmp20;
4620
tmp20 = (INT32) wsptr[2];
4621
tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */
4622
tmp20 = tmp11 + tmp10;
4623
tmp22 = tmp11 - tmp10;
4624
4625
/* Odd part */
4626
4627
z1 = (INT32) wsptr[1];
4628
z2 = (INT32) wsptr[3];
4629
z3 = (INT32) wsptr[5];
4630
tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
4631
tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
4632
tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
4633
tmp11 = (z1 - z2 - z3) << CONST_BITS;
4634
4635
/* Final output stage */
4636
4637
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
4638
CONST_BITS+PASS1_BITS+3)
4639
& RANGE_MASK];
4640
outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
4641
CONST_BITS+PASS1_BITS+3)
4642
& RANGE_MASK];
4643
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
4644
CONST_BITS+PASS1_BITS+3)
4645
& RANGE_MASK];
4646
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
4647
CONST_BITS+PASS1_BITS+3)
4648
& RANGE_MASK];
4649
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
4650
CONST_BITS+PASS1_BITS+3)
4651
& RANGE_MASK];
4652
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
4653
CONST_BITS+PASS1_BITS+3)
4654
& RANGE_MASK];
4655
4656
wsptr += 6; /* advance pointer to next row */
4657
}
4658
}
4659
4660
4661
/*
4662
* Perform dequantization and inverse DCT on one block of coefficients,
4663
* producing a 5x10 output block.
4664
*
4665
* 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
4666
*/
4667
4668
GLOBAL(void)
4669
jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4670
JCOEFPTR coef_block,
4671
JSAMPARRAY output_buf, JDIMENSION output_col)
4672
{
4673
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4674
INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
4675
INT32 z1, z2, z3, z4, z5;
4676
JCOEFPTR inptr;
4677
ISLOW_MULT_TYPE * quantptr;
4678
int * wsptr;
4679
JSAMPROW outptr;
4680
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4681
int ctr;
4682
int workspace[5*10]; /* buffers data between passes */
4683
SHIFT_TEMPS
4684
4685
/* Pass 1: process columns from input, store into work array.
4686
* 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
4687
*/
4688
4689
inptr = coef_block;
4690
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4691
wsptr = workspace;
4692
for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
4693
/* Even part */
4694
4695
z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4696
z3 <<= CONST_BITS;
4697
/* Add fudge factor here for final descale. */
4698
z3 += ONE << (CONST_BITS-PASS1_BITS-1);
4699
z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4700
z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */
4701
z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */
4702
tmp10 = z3 + z1;
4703
tmp11 = z3 - z2;
4704
4705
tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */
4706
CONST_BITS-PASS1_BITS);
4707
4708
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4709
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4710
4711
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */
4712
tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
4713
tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
4714
4715
tmp20 = tmp10 + tmp12;
4716
tmp24 = tmp10 - tmp12;
4717
tmp21 = tmp11 + tmp13;
4718
tmp23 = tmp11 - tmp13;
4719
4720
/* Odd part */
4721
4722
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4723
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4724
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4725
z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4726
4727
tmp11 = z2 + z4;
4728
tmp13 = z2 - z4;
4729
4730
tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */
4731
z5 = z3 << CONST_BITS;
4732
4733
z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */
4734
z4 = z5 + tmp12;
4735
4736
tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
4737
tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
4738
4739
z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */
4740
z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
4741
4742
tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
4743
4744
tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
4745
tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
4746
4747
/* Final output stage */
4748
4749
wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
4750
wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
4751
wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
4752
wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
4753
wsptr[5*2] = (int) (tmp22 + tmp12);
4754
wsptr[5*7] = (int) (tmp22 - tmp12);
4755
wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
4756
wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
4757
wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
4758
wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
4759
}
4760
4761
/* Pass 2: process 10 rows from work array, store into output array.
4762
* 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4763
*/
4764
4765
wsptr = workspace;
4766
for (ctr = 0; ctr < 10; ctr++) {
4767
outptr = output_buf[ctr] + output_col;
4768
4769
/* Even part */
4770
4771
/* Add range center and fudge factor for final descale and range-limit. */
4772
tmp12 = (INT32) wsptr[0] +
4773
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4774
(ONE << (PASS1_BITS+2)));
4775
tmp12 <<= CONST_BITS;
4776
tmp13 = (INT32) wsptr[2];
4777
tmp14 = (INT32) wsptr[4];
4778
z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
4779
z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
4780
z3 = tmp12 + z2;
4781
tmp10 = z3 + z1;
4782
tmp11 = z3 - z1;
4783
tmp12 -= z2 << 2;
4784
4785
/* Odd part */
4786
4787
z2 = (INT32) wsptr[1];
4788
z3 = (INT32) wsptr[3];
4789
4790
z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */
4791
tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */
4792
tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */
4793
4794
/* Final output stage */
4795
4796
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
4797
CONST_BITS+PASS1_BITS+3)
4798
& RANGE_MASK];
4799
outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
4800
CONST_BITS+PASS1_BITS+3)
4801
& RANGE_MASK];
4802
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
4803
CONST_BITS+PASS1_BITS+3)
4804
& RANGE_MASK];
4805
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
4806
CONST_BITS+PASS1_BITS+3)
4807
& RANGE_MASK];
4808
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
4809
CONST_BITS+PASS1_BITS+3)
4810
& RANGE_MASK];
4811
4812
wsptr += 5; /* advance pointer to next row */
4813
}
4814
}
4815
4816
4817
/*
4818
* Perform dequantization and inverse DCT on one block of coefficients,
4819
* producing a 4x8 output block.
4820
*
4821
* 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
4822
*/
4823
4824
GLOBAL(void)
4825
jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
4826
JCOEFPTR coef_block,
4827
JSAMPARRAY output_buf, JDIMENSION output_col)
4828
{
4829
INT32 tmp0, tmp1, tmp2, tmp3;
4830
INT32 tmp10, tmp11, tmp12, tmp13;
4831
INT32 z1, z2, z3;
4832
JCOEFPTR inptr;
4833
ISLOW_MULT_TYPE * quantptr;
4834
int * wsptr;
4835
JSAMPROW outptr;
4836
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
4837
int ctr;
4838
int workspace[4*8]; /* buffers data between passes */
4839
SHIFT_TEMPS
4840
4841
/* Pass 1: process columns from input, store into work array.
4842
* Note results are scaled up by sqrt(8) compared to a true IDCT;
4843
* furthermore, we scale the results by 2**PASS1_BITS.
4844
* 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4845
*/
4846
4847
inptr = coef_block;
4848
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
4849
wsptr = workspace;
4850
for (ctr = 4; ctr > 0; ctr--) {
4851
/* Due to quantization, we will usually find that many of the input
4852
* coefficients are zero, especially the AC terms. We can exploit this
4853
* by short-circuiting the IDCT calculation for any column in which all
4854
* the AC terms are zero. In that case each output is equal to the
4855
* DC coefficient (with scale factor as needed).
4856
* With typical images and quantization tables, half or more of the
4857
* column DCT calculations can be simplified this way.
4858
*/
4859
4860
if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
4861
inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
4862
inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
4863
inptr[DCTSIZE*7] == 0) {
4864
/* AC terms all zero */
4865
int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
4866
4867
wsptr[4*0] = dcval;
4868
wsptr[4*1] = dcval;
4869
wsptr[4*2] = dcval;
4870
wsptr[4*3] = dcval;
4871
wsptr[4*4] = dcval;
4872
wsptr[4*5] = dcval;
4873
wsptr[4*6] = dcval;
4874
wsptr[4*7] = dcval;
4875
4876
inptr++; /* advance pointers to next column */
4877
quantptr++;
4878
wsptr++;
4879
continue;
4880
}
4881
4882
/* Even part: reverse the even part of the forward DCT.
4883
* The rotator is c(-6).
4884
*/
4885
4886
z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
4887
z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
4888
4889
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4890
tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4891
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4892
4893
z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
4894
z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
4895
z2 <<= CONST_BITS;
4896
z3 <<= CONST_BITS;
4897
/* Add fudge factor here for final descale. */
4898
z2 += ONE << (CONST_BITS-PASS1_BITS-1);
4899
4900
tmp0 = z2 + z3;
4901
tmp1 = z2 - z3;
4902
4903
tmp10 = tmp0 + tmp2;
4904
tmp13 = tmp0 - tmp2;
4905
tmp11 = tmp1 + tmp3;
4906
tmp12 = tmp1 - tmp3;
4907
4908
/* Odd part per figure 8; the matrix is unitary and hence its
4909
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
4910
*/
4911
4912
tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
4913
tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
4914
tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
4915
tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
4916
4917
z2 = tmp0 + tmp2;
4918
z3 = tmp1 + tmp3;
4919
4920
z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */
4921
z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */
4922
z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */
4923
z2 += z1;
4924
z3 += z1;
4925
4926
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4927
tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */
4928
tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */
4929
tmp0 += z1 + z2;
4930
tmp3 += z1 + z3;
4931
4932
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4933
tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */
4934
tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */
4935
tmp1 += z1 + z3;
4936
tmp2 += z1 + z2;
4937
4938
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
4939
4940
wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
4941
wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
4942
wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
4943
wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
4944
wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
4945
wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
4946
wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
4947
wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
4948
4949
inptr++; /* advance pointers to next column */
4950
quantptr++;
4951
wsptr++;
4952
}
4953
4954
/* Pass 2: process 8 rows from work array, store into output array.
4955
* 4-point IDCT kernel,
4956
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
4957
*/
4958
4959
wsptr = workspace;
4960
for (ctr = 0; ctr < 8; ctr++) {
4961
outptr = output_buf[ctr] + output_col;
4962
4963
/* Even part */
4964
4965
/* Add range center and fudge factor for final descale and range-limit. */
4966
tmp0 = (INT32) wsptr[0] +
4967
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
4968
(ONE << (PASS1_BITS+2)));
4969
tmp2 = (INT32) wsptr[2];
4970
4971
tmp10 = (tmp0 + tmp2) << CONST_BITS;
4972
tmp12 = (tmp0 - tmp2) << CONST_BITS;
4973
4974
/* Odd part */
4975
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
4976
4977
z2 = (INT32) wsptr[1];
4978
z3 = (INT32) wsptr[3];
4979
4980
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
4981
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
4982
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
4983
4984
/* Final output stage */
4985
4986
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
4987
CONST_BITS+PASS1_BITS+3)
4988
& RANGE_MASK];
4989
outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
4990
CONST_BITS+PASS1_BITS+3)
4991
& RANGE_MASK];
4992
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
4993
CONST_BITS+PASS1_BITS+3)
4994
& RANGE_MASK];
4995
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
4996
CONST_BITS+PASS1_BITS+3)
4997
& RANGE_MASK];
4998
4999
wsptr += 4; /* advance pointer to next row */
5000
}
5001
}
5002
5003
5004
/*
5005
* Perform dequantization and inverse DCT on one block of coefficients,
5006
* producing a reduced-size 3x6 output block.
5007
*
5008
* 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
5009
*/
5010
5011
GLOBAL(void)
5012
jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5013
JCOEFPTR coef_block,
5014
JSAMPARRAY output_buf, JDIMENSION output_col)
5015
{
5016
INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
5017
INT32 z1, z2, z3;
5018
JCOEFPTR inptr;
5019
ISLOW_MULT_TYPE * quantptr;
5020
int * wsptr;
5021
JSAMPROW outptr;
5022
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5023
int ctr;
5024
int workspace[3*6]; /* buffers data between passes */
5025
SHIFT_TEMPS
5026
5027
/* Pass 1: process columns from input, store into work array.
5028
* 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
5029
*/
5030
5031
inptr = coef_block;
5032
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5033
wsptr = workspace;
5034
for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
5035
/* Even part */
5036
5037
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5038
tmp0 <<= CONST_BITS;
5039
/* Add fudge factor here for final descale. */
5040
tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
5041
tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
5042
tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */
5043
tmp1 = tmp0 + tmp10;
5044
tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
5045
tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5046
tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */
5047
tmp10 = tmp1 + tmp0;
5048
tmp12 = tmp1 - tmp0;
5049
5050
/* Odd part */
5051
5052
z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5053
z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5054
z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
5055
tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
5056
tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
5057
tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
5058
tmp1 = (z1 - z2 - z3) << PASS1_BITS;
5059
5060
/* Final output stage */
5061
5062
wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
5063
wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
5064
wsptr[3*1] = (int) (tmp11 + tmp1);
5065
wsptr[3*4] = (int) (tmp11 - tmp1);
5066
wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
5067
wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
5068
}
5069
5070
/* Pass 2: process 6 rows from work array, store into output array.
5071
* 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
5072
*/
5073
5074
wsptr = workspace;
5075
for (ctr = 0; ctr < 6; ctr++) {
5076
outptr = output_buf[ctr] + output_col;
5077
5078
/* Even part */
5079
5080
/* Add range center and fudge factor for final descale and range-limit. */
5081
tmp0 = (INT32) wsptr[0] +
5082
((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
5083
(ONE << (PASS1_BITS+2)));
5084
tmp0 <<= CONST_BITS;
5085
tmp2 = (INT32) wsptr[2];
5086
tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
5087
tmp10 = tmp0 + tmp12;
5088
tmp2 = tmp0 - tmp12 - tmp12;
5089
5090
/* Odd part */
5091
5092
tmp12 = (INT32) wsptr[1];
5093
tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
5094
5095
/* Final output stage */
5096
5097
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
5098
CONST_BITS+PASS1_BITS+3)
5099
& RANGE_MASK];
5100
outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
5101
CONST_BITS+PASS1_BITS+3)
5102
& RANGE_MASK];
5103
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
5104
CONST_BITS+PASS1_BITS+3)
5105
& RANGE_MASK];
5106
5107
wsptr += 3; /* advance pointer to next row */
5108
}
5109
}
5110
5111
5112
/*
5113
* Perform dequantization and inverse DCT on one block of coefficients,
5114
* producing a 2x4 output block.
5115
*
5116
* 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
5117
*/
5118
5119
GLOBAL(void)
5120
jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5121
JCOEFPTR coef_block,
5122
JSAMPARRAY output_buf, JDIMENSION output_col)
5123
{
5124
INT32 tmp0, tmp2, tmp10, tmp12;
5125
INT32 z1, z2, z3;
5126
JCOEFPTR inptr;
5127
ISLOW_MULT_TYPE * quantptr;
5128
INT32 * wsptr;
5129
JSAMPROW outptr;
5130
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5131
int ctr;
5132
INT32 workspace[2*4]; /* buffers data between passes */
5133
SHIFT_TEMPS
5134
5135
/* Pass 1: process columns from input, store into work array.
5136
* 4-point IDCT kernel,
5137
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
5138
*/
5139
5140
inptr = coef_block;
5141
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5142
wsptr = workspace;
5143
for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
5144
/* Even part */
5145
5146
tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
5147
tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
5148
5149
tmp10 = (tmp0 + tmp2) << CONST_BITS;
5150
tmp12 = (tmp0 - tmp2) << CONST_BITS;
5151
5152
/* Odd part */
5153
/* Same rotation as in the even part of the 8x8 LL&M IDCT */
5154
5155
z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
5156
z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
5157
5158
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
5159
tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
5160
tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
5161
5162
/* Final output stage */
5163
5164
wsptr[2*0] = tmp10 + tmp0;
5165
wsptr[2*3] = tmp10 - tmp0;
5166
wsptr[2*1] = tmp12 + tmp2;
5167
wsptr[2*2] = tmp12 - tmp2;
5168
}
5169
5170
/* Pass 2: process 4 rows from work array, store into output array. */
5171
5172
wsptr = workspace;
5173
for (ctr = 0; ctr < 4; ctr++) {
5174
outptr = output_buf[ctr] + output_col;
5175
5176
/* Even part */
5177
5178
/* Add range center and fudge factor for final descale and range-limit. */
5179
tmp10 = wsptr[0] +
5180
((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
5181
(ONE << (CONST_BITS+2)));
5182
5183
/* Odd part */
5184
5185
tmp0 = wsptr[1];
5186
5187
/* Final output stage */
5188
5189
outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
5190
& RANGE_MASK];
5191
outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
5192
& RANGE_MASK];
5193
5194
wsptr += 2; /* advance pointer to next row */
5195
}
5196
}
5197
5198
5199
/*
5200
* Perform dequantization and inverse DCT on one block of coefficients,
5201
* producing a 1x2 output block.
5202
*
5203
* 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
5204
*/
5205
5206
GLOBAL(void)
5207
jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
5208
JCOEFPTR coef_block,
5209
JSAMPARRAY output_buf, JDIMENSION output_col)
5210
{
5211
DCTELEM tmp0, tmp1;
5212
ISLOW_MULT_TYPE * quantptr;
5213
JSAMPLE *range_limit = IDCT_range_limit(cinfo);
5214
ISHIFT_TEMPS
5215
5216
/* Process 1 column from input, store into output array. */
5217
5218
quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
5219
5220
/* Even part */
5221
5222
tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
5223
/* Add range center and fudge factor for final descale and range-limit. */
5224
tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
5225
5226
/* Odd part */
5227
5228
tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
5229
5230
/* Final output stage */
5231
5232
output_buf[0][output_col] =
5233
range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
5234
output_buf[1][output_col] =
5235
range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
5236
}
5237
5238
#endif /* IDCT_SCALING_SUPPORTED */
5239
#endif /* DCT_ISLOW_SUPPORTED */
5240
5241