Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/jpeg/jfdctint.c
8693 views
1
/*
2
* jfdctint.c
3
*
4
* Copyright (C) 1991-1996, Thomas G. Lane.
5
* Modification developed 2003-2026 by Guido Vollbeding.
6
* This file is part of the Independent JPEG Group's software.
7
* For conditions of distribution and use, see the accompanying README file.
8
*
9
* This file contains a slow-but-accurate integer implementation of the
10
* forward DCT (Discrete Cosine Transform).
11
*
12
* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
13
* on each column. Direct algorithms are also available, but they are
14
* much more complex and seem not to be any faster when reduced to code.
15
*
16
* This implementation is based on an algorithm described in
17
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
18
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
19
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
20
* The primary algorithm described there uses 11 multiplies and 29 adds.
21
* We use their alternate method with 12 multiplies and 32 adds.
22
* The advantage of this method is that no data path contains more than one
23
* multiplication; this allows a very simple and accurate implementation in
24
* scaled fixed-point arithmetic, with a minimal number of shifts.
25
*
26
* We also provide FDCT routines with various input sample block sizes for
27
* direct resolution reduction or enlargement and for direct resolving the
28
* common 2x1 and 1x2 subsampling cases without additional resampling: NxN
29
* (N=1...16), 2NxN, and Nx2N (N=1...8) samples for one 8x8 output DCT block.
30
*
31
* For N<8 we fill the remaining block coefficients with zero.
32
* For N>8 we apply a partial N-point FDCT on the input samples, computing
33
* just the lower 8 frequency coefficients and discarding the rest.
34
*
35
* We must scale the output coefficients of the N-point FDCT appropriately
36
* to the standard 8-point FDCT level by 8/N per 1-D pass. This scaling
37
* is folded into the constant multipliers (pass 2) and/or final/initial
38
* shifting.
39
*
40
* CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
41
* since there would be too many additional constants to pre-calculate.
42
*/
43
44
#define JPEG_INTERNALS
45
#include "jinclude.h"
46
#include "jpeglib.h"
47
#include "jdct.h" /* Private declarations for DCT subsystem */
48
49
#ifdef DCT_ISLOW_SUPPORTED
50
51
52
/*
53
* This module is specialized to the case DCTSIZE = 8.
54
*/
55
56
#if DCTSIZE != 8
57
Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
58
#endif
59
60
61
/*
62
* The poop on this scaling stuff is as follows:
63
*
64
* Each 1-D DCT step produces outputs which are a factor of sqrt(N)
65
* larger than the true DCT outputs. The final outputs are therefore
66
* a factor of N larger than desired; since N=8 this can be cured by
67
* a simple right shift at the end of the algorithm. The advantage of
68
* this arrangement is that we save two multiplications per 1-D DCT,
69
* because the y0 and y4 outputs need not be divided by sqrt(N).
70
* In the IJG code, this factor of 8 is removed by the quantization step
71
* (in jcdctmgr.c), NOT in this module.
72
*
73
* We have to do addition and subtraction of the integer inputs, which
74
* is no problem, and multiplication by fractional constants, which is
75
* a problem to do in integer arithmetic. We multiply all the constants
76
* by CONST_SCALE and convert them to integer constants (thus retaining
77
* CONST_BITS bits of precision in the constants). After doing a
78
* multiplication we have to divide the product by CONST_SCALE, with
79
* proper rounding, to produce the correct output. This division can
80
* be done cheaply as a right shift of CONST_BITS bits. We postpone
81
* shifting as long as possible so that partial sums can be added
82
* together with full fractional precision.
83
*
84
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
85
* they are represented to better-than-integral precision. These outputs
86
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit
87
* word with the recommended scaling. (For higher bit depths, the
88
* intermediate array is INT32 anyway.)
89
*
90
* To avoid overflow of the 32-bit intermediate results in pass 2, we
91
* must have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error
92
* analysis shows that the values given below are the most effective.
93
*/
94
95
#if BITS_IN_JSAMPLE <= 10 && JPEG_DATA_PRECISION <= 10
96
#define CONST_BITS 13
97
#define PASS1_BITS (10 - BITS_IN_JSAMPLE)
98
#define PASS2_BITS (10 - JPEG_DATA_PRECISION)
99
#else
100
#if BITS_IN_JSAMPLE <= 13 && JPEG_DATA_PRECISION <= 13
101
#define CONST_BITS 13
102
#define PASS1_BITS (13 - BITS_IN_JSAMPLE)
103
#define PASS2_BITS (13 - JPEG_DATA_PRECISION)
104
#endif
105
#endif
106
107
/* Some C compilers fail to reduce "FIX(constant)" at compile time,
108
* thus causing a lot of useless floating-point operations at run time.
109
* To get around this we use the following pre-calculated constants.
110
* If you change CONST_BITS you may want to add appropriate values.
111
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
112
*/
113
114
#if CONST_BITS == 13
115
#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
116
#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
117
#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
118
#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
119
#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
120
#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
121
#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
122
#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
123
#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
124
#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
125
#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
126
#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
127
#else
128
#define FIX_0_298631336 FIX(0.298631336)
129
#define FIX_0_390180644 FIX(0.390180644)
130
#define FIX_0_541196100 FIX(0.541196100)
131
#define FIX_0_765366865 FIX(0.765366865)
132
#define FIX_0_899976223 FIX(0.899976223)
133
#define FIX_1_175875602 FIX(1.175875602)
134
#define FIX_1_501321110 FIX(1.501321110)
135
#define FIX_1_847759065 FIX(1.847759065)
136
#define FIX_1_961570560 FIX(1.961570560)
137
#define FIX_2_053119869 FIX(2.053119869)
138
#define FIX_2_562915447 FIX(2.562915447)
139
#define FIX_3_072711026 FIX(3.072711026)
140
#endif
141
142
143
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
144
* For up to 10-bit data with the recommended scaling, all the variable
145
* and constant values involved are no more than 16 bits wide, so a
146
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
147
* For higher bit depths, a full 32-bit multiplication will be needed.
148
*/
149
150
#if BITS_IN_JSAMPLE <= 10 && JPEG_DATA_PRECISION <= 10
151
#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
152
#else
153
#define MULTIPLY(var,const) ((var) * (const))
154
#endif
155
156
157
/* Pass 1 output: smart scale up. */
158
159
#if PASS1_BITS > 0
160
#define PASS1_OUTPUT(x) (DCTELEM) ((x) << PASS1_BITS)
161
#else
162
#define PASS1_OUTPUT(x) (DCTELEM) (x)
163
#endif
164
165
166
/* Pass 2 output: smart scale down. */
167
168
#if PASS2_BITS > 0
169
#define PASS2_OUTPUT(x) (DCTELEM) RIGHT_SHIFT(x, PASS2_BITS)
170
#else
171
#define PASS2_OUTPUT(x) (DCTELEM) (x)
172
#endif
173
174
175
/*
176
* Perform the forward DCT on one block of samples.
177
*/
178
179
GLOBAL(void)
180
jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
181
{
182
INT32 tmp0, tmp1, tmp2, tmp3;
183
INT32 tmp10, tmp11, tmp12, tmp13;
184
INT32 z1;
185
DCTELEM *dataptr;
186
JSAMPROW elemptr;
187
int ctr;
188
SHIFT_TEMPS
189
190
/* Pass 1: process rows.
191
* Note results are scaled up by sqrt(8) compared to a true DCT;
192
* furthermore, we scale the results by 2**PASS1_BITS.
193
* cK represents sqrt(2) * cos(K*pi/16).
194
*/
195
196
dataptr = data;
197
for (ctr = 0; ctr < DCTSIZE; ctr++) {
198
elemptr = sample_data[ctr] + start_col;
199
200
/* Even part per LL&M figure 1 --- note that published figure is faulty;
201
* rotator "c1" should be "c6".
202
*/
203
204
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
205
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
206
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
207
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
208
209
tmp10 = tmp0 + tmp3;
210
tmp12 = tmp0 - tmp3;
211
tmp11 = tmp1 + tmp2;
212
tmp13 = tmp1 - tmp2;
213
214
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
215
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
216
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
217
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
218
219
/* Apply unsigned->signed conversion. */
220
dataptr[0] = PASS1_OUTPUT(tmp10 + tmp11 - 8 * CENTERJSAMPLE);
221
dataptr[4] = PASS1_OUTPUT(tmp10 - tmp11);
222
223
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
224
/* Add fudge factor here for final descale. */
225
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
226
227
dataptr[2] = (DCTELEM)
228
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
229
CONST_BITS-PASS1_BITS);
230
dataptr[6] = (DCTELEM)
231
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
232
CONST_BITS-PASS1_BITS);
233
234
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
235
* i0..i3 in the paper are tmp0..tmp3 here.
236
*/
237
238
tmp12 = tmp0 + tmp2;
239
tmp13 = tmp1 + tmp3;
240
241
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
242
/* Add fudge factor here for final descale. */
243
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
244
245
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
246
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
247
tmp12 += z1;
248
tmp13 += z1;
249
250
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
251
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
252
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
253
tmp0 += z1 + tmp12;
254
tmp3 += z1 + tmp13;
255
256
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
257
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
258
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
259
tmp1 += z1 + tmp13;
260
tmp2 += z1 + tmp12;
261
262
dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
263
dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
264
dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
265
dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
266
267
dataptr += DCTSIZE; /* advance pointer to next row */
268
}
269
270
/* Pass 2: process columns.
271
* We apply the PASS2_BITS scaling, but leave the
272
* results scaled up by an overall factor of 8.
273
* cK represents sqrt(2) * cos(K*pi/16).
274
*/
275
276
dataptr = data;
277
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
278
/* Even part per LL&M figure 1 --- note that published figure is faulty;
279
* rotator "c1" should be "c6".
280
*/
281
282
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
283
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
284
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
285
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
286
287
/* Add fudge factor here for final descale. */
288
#if PASS2_BITS > 1
289
tmp10 = tmp0 + tmp3 + (ONE << (PASS2_BITS-1));
290
#else
291
#if PASS2_BITS > 0
292
tmp10 = tmp0 + tmp3 + ONE;
293
#else
294
tmp10 = tmp0 + tmp3;
295
#endif
296
#endif
297
tmp12 = tmp0 - tmp3;
298
tmp11 = tmp1 + tmp2;
299
tmp13 = tmp1 - tmp2;
300
301
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
302
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
303
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
304
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
305
306
dataptr[DCTSIZE*0] = PASS2_OUTPUT(tmp10 + tmp11);
307
dataptr[DCTSIZE*4] = PASS2_OUTPUT(tmp10 - tmp11);
308
309
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
310
/* Add fudge factor here for final descale. */
311
z1 += ONE << (CONST_BITS+PASS2_BITS-1);
312
313
dataptr[DCTSIZE*2] = (DCTELEM)
314
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
315
CONST_BITS+PASS2_BITS);
316
dataptr[DCTSIZE*6] = (DCTELEM)
317
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
318
CONST_BITS+PASS2_BITS);
319
320
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
321
* i0..i3 in the paper are tmp0..tmp3 here.
322
*/
323
324
tmp12 = tmp0 + tmp2;
325
tmp13 = tmp1 + tmp3;
326
327
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
328
/* Add fudge factor here for final descale. */
329
z1 += ONE << (CONST_BITS+PASS2_BITS-1);
330
331
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
332
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
333
tmp12 += z1;
334
tmp13 += z1;
335
336
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
337
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
338
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
339
tmp0 += z1 + tmp12;
340
tmp3 += z1 + tmp13;
341
342
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
343
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
344
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
345
tmp1 += z1 + tmp13;
346
tmp2 += z1 + tmp12;
347
348
dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS2_BITS);
349
dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS2_BITS);
350
dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS2_BITS);
351
dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS2_BITS);
352
353
dataptr++; /* advance pointer to next column */
354
}
355
}
356
357
#ifdef DCT_SCALING_SUPPORTED
358
359
360
/*
361
* Perform the forward DCT on a 7x7 sample block.
362
*/
363
364
GLOBAL(void)
365
jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
366
{
367
INT32 tmp0, tmp1, tmp2, tmp3;
368
INT32 tmp10, tmp11, tmp12;
369
INT32 z1, z2, z3;
370
DCTELEM *dataptr;
371
JSAMPROW elemptr;
372
int ctr;
373
SHIFT_TEMPS
374
375
/* Pre-zero output coefficient block. */
376
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
377
378
/* Pass 1: process rows.
379
* Note results are scaled up by sqrt(8) compared to a true DCT;
380
* furthermore, we scale the results by 2**PASS1_BITS.
381
* cK represents sqrt(2) * cos(K*pi/14).
382
*/
383
384
dataptr = data;
385
for (ctr = 0; ctr < 7; ctr++) {
386
elemptr = sample_data[ctr] + start_col;
387
388
/* Even part */
389
390
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
391
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
392
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
393
tmp3 = GETJSAMPLE(elemptr[3]);
394
395
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
396
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
397
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
398
399
z1 = tmp0 + tmp2;
400
/* Apply unsigned->signed conversion. */
401
dataptr[0] = PASS1_OUTPUT(z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE);
402
tmp3 += tmp3;
403
z1 -= tmp3;
404
z1 -= tmp3;
405
z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
406
z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
407
z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
408
dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
409
z1 -= z2;
410
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
411
dataptr[4] = (DCTELEM)
412
DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
413
CONST_BITS-PASS1_BITS);
414
dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
415
416
/* Odd part */
417
418
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
419
tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
420
tmp0 = tmp1 - tmp2;
421
tmp1 += tmp2;
422
tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
423
tmp1 += tmp2;
424
tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */
425
tmp0 += tmp3;
426
tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */
427
428
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
429
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
430
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
431
432
dataptr += DCTSIZE; /* advance pointer to next row */
433
}
434
435
/* Pass 2: process columns.
436
* We apply the PASS2_BITS scaling, but leave the
437
* results scaled up by an overall factor of 8.
438
* We must also scale the output by (8/7)**2 = 64/49,
439
* which we fold into the constant multipliers:
440
* cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
441
*/
442
443
dataptr = data;
444
for (ctr = 0; ctr < 7; ctr++) {
445
/* Even part */
446
447
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
448
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
449
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
450
tmp3 = dataptr[DCTSIZE*3];
451
452
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
453
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
454
tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
455
456
z1 = tmp0 + tmp2;
457
dataptr[DCTSIZE*0] = (DCTELEM)
458
DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
459
CONST_BITS+PASS2_BITS);
460
tmp3 += tmp3;
461
z1 -= tmp3;
462
z1 -= tmp3;
463
z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */
464
z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */
465
z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */
466
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS2_BITS);
467
z1 -= z2;
468
z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */
469
dataptr[DCTSIZE*4] = (DCTELEM)
470
DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
471
CONST_BITS+PASS2_BITS);
472
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS2_BITS);
473
474
/* Odd part */
475
476
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */
477
tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */
478
tmp0 = tmp1 - tmp2;
479
tmp1 += tmp2;
480
tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
481
tmp1 += tmp2;
482
tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */
483
tmp0 += tmp3;
484
tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */
485
486
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS2_BITS);
487
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS2_BITS);
488
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS2_BITS);
489
490
dataptr++; /* advance pointer to next column */
491
}
492
}
493
494
495
/*
496
* Perform the forward DCT on a 6x6 sample block.
497
*/
498
499
GLOBAL(void)
500
jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
501
{
502
INT32 tmp0, tmp1, tmp2;
503
INT32 tmp10, tmp11, tmp12;
504
DCTELEM *dataptr;
505
JSAMPROW elemptr;
506
int ctr;
507
SHIFT_TEMPS
508
509
/* Pre-zero output coefficient block. */
510
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
511
512
/* Pass 1: process rows.
513
* Note results are scaled up by sqrt(8) compared to a true DCT;
514
* furthermore, we scale the results by 2**PASS1_BITS.
515
* cK represents sqrt(2) * cos(K*pi/12).
516
*/
517
518
dataptr = data;
519
for (ctr = 0; ctr < 6; ctr++) {
520
elemptr = sample_data[ctr] + start_col;
521
522
/* Even part */
523
524
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
525
tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
526
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
527
528
tmp10 = tmp0 + tmp2;
529
tmp12 = tmp0 - tmp2;
530
531
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
532
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
533
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
534
535
/* Apply unsigned->signed conversion. */
536
dataptr[0] = PASS1_OUTPUT(tmp10 + tmp11 - 6 * CENTERJSAMPLE);
537
dataptr[2] = (DCTELEM)
538
DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
539
CONST_BITS-PASS1_BITS);
540
dataptr[4] = (DCTELEM)
541
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
542
CONST_BITS-PASS1_BITS);
543
544
/* Odd part */
545
546
tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
547
CONST_BITS-PASS1_BITS);
548
549
#if PASS1_BITS > 0
550
dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
551
dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
552
dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
553
#else
554
dataptr[1] = (DCTELEM) (tmp10 + tmp0 + tmp1);
555
dataptr[3] = (DCTELEM) (tmp0 - tmp1 - tmp2);
556
dataptr[5] = (DCTELEM) (tmp10 + tmp2 - tmp1);
557
#endif
558
559
dataptr += DCTSIZE; /* advance pointer to next row */
560
}
561
562
/* Pass 2: process columns.
563
* We apply the PASS2_BITS scaling, but leave the
564
* results scaled up by an overall factor of 8.
565
* We must also scale the output by (8/6)**2 = 16/9,
566
* which we fold into the constant multipliers:
567
* cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
568
*/
569
570
dataptr = data;
571
for (ctr = 0; ctr < 6; ctr++) {
572
/* Even part */
573
574
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
575
tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
576
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
577
578
tmp10 = tmp0 + tmp2;
579
tmp12 = tmp0 - tmp2;
580
581
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
582
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
583
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
584
585
dataptr[DCTSIZE*0] = (DCTELEM)
586
DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
587
CONST_BITS+PASS2_BITS);
588
dataptr[DCTSIZE*2] = (DCTELEM)
589
DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
590
CONST_BITS+PASS2_BITS);
591
dataptr[DCTSIZE*4] = (DCTELEM)
592
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
593
CONST_BITS+PASS2_BITS);
594
595
/* Odd part */
596
597
tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
598
599
dataptr[DCTSIZE*1] = (DCTELEM)
600
DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
601
CONST_BITS+PASS2_BITS);
602
dataptr[DCTSIZE*3] = (DCTELEM)
603
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
604
CONST_BITS+PASS2_BITS);
605
dataptr[DCTSIZE*5] = (DCTELEM)
606
DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
607
CONST_BITS+PASS2_BITS);
608
609
dataptr++; /* advance pointer to next column */
610
}
611
}
612
613
614
/*
615
* Perform the forward DCT on a 5x5 sample block.
616
*/
617
618
GLOBAL(void)
619
jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
620
{
621
INT32 tmp0, tmp1, tmp2;
622
INT32 tmp10, tmp11;
623
DCTELEM *dataptr;
624
JSAMPROW elemptr;
625
int ctr;
626
SHIFT_TEMPS
627
628
/* Pre-zero output coefficient block. */
629
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
630
631
/* Pass 1: process rows.
632
* Note results are scaled up by sqrt(8) compared to a true DCT;
633
* furthermore, we scale the results by 2**PASS1_BITS.
634
* We scale the results further by 2 as part of output adaption
635
* scaling for different DCT size.
636
* cK represents sqrt(2) * cos(K*pi/10).
637
*/
638
639
dataptr = data;
640
for (ctr = 0; ctr < 5; ctr++) {
641
elemptr = sample_data[ctr] + start_col;
642
643
/* Even part */
644
645
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
646
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
647
tmp2 = GETJSAMPLE(elemptr[2]);
648
649
tmp10 = tmp0 + tmp1;
650
tmp11 = tmp0 - tmp1;
651
652
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
653
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
654
655
/* Apply unsigned->signed conversion. */
656
dataptr[0] = (DCTELEM)
657
((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
658
tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
659
tmp10 -= tmp2 << 2;
660
tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
661
dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
662
dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
663
664
/* Odd part */
665
666
tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
667
668
dataptr[1] = (DCTELEM)
669
DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
670
CONST_BITS-PASS1_BITS-1);
671
dataptr[3] = (DCTELEM)
672
DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
673
CONST_BITS-PASS1_BITS-1);
674
675
dataptr += DCTSIZE; /* advance pointer to next row */
676
}
677
678
/* Pass 2: process columns.
679
* We apply the PASS2_BITS scaling, but leave the
680
* results scaled up by an overall factor of 8.
681
* We must also scale the output by (8/5)**2 = 64/25, which we partially
682
* fold into the constant multipliers (other part was done in pass 1):
683
* cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
684
*/
685
686
dataptr = data;
687
for (ctr = 0; ctr < 5; ctr++) {
688
/* Even part */
689
690
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
691
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
692
tmp2 = dataptr[DCTSIZE*2];
693
694
tmp10 = tmp0 + tmp1;
695
tmp11 = tmp0 - tmp1;
696
697
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
698
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
699
700
dataptr[DCTSIZE*0] = (DCTELEM)
701
DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */
702
CONST_BITS+PASS2_BITS);
703
tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */
704
tmp10 -= tmp2 << 2;
705
tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */
706
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS2_BITS);
707
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS2_BITS);
708
709
/* Odd part */
710
711
tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */
712
713
dataptr[DCTSIZE*1] = (DCTELEM)
714
DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
715
CONST_BITS+PASS2_BITS);
716
dataptr[DCTSIZE*3] = (DCTELEM)
717
DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
718
CONST_BITS+PASS2_BITS);
719
720
dataptr++; /* advance pointer to next column */
721
}
722
}
723
724
725
/*
726
* Perform the forward DCT on a 4x4 sample block.
727
*/
728
729
GLOBAL(void)
730
jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
731
{
732
INT32 tmp0, tmp1;
733
INT32 tmp10, tmp11;
734
DCTELEM *dataptr;
735
JSAMPROW elemptr;
736
int ctr;
737
SHIFT_TEMPS
738
739
/* Pre-zero output coefficient block. */
740
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
741
742
/* Pass 1: process rows.
743
* Note results are scaled up by sqrt(8) compared to a true DCT;
744
* furthermore, we scale the results by 2**PASS1_BITS.
745
* We must also scale the output by (8/4)**2 = 2**2, which we add here.
746
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
747
*/
748
749
dataptr = data;
750
for (ctr = 0; ctr < 4; ctr++) {
751
elemptr = sample_data[ctr] + start_col;
752
753
/* Even part */
754
755
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
756
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
757
758
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
759
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
760
761
/* Apply unsigned->signed conversion. */
762
dataptr[0] = (DCTELEM)
763
((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
764
dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
765
766
/* Odd part */
767
768
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
769
/* Add fudge factor here for final descale. */
770
tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
771
772
dataptr[1] = (DCTELEM)
773
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
774
CONST_BITS-PASS1_BITS-2);
775
dataptr[3] = (DCTELEM)
776
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
777
CONST_BITS-PASS1_BITS-2);
778
779
dataptr += DCTSIZE; /* advance pointer to next row */
780
}
781
782
/* Pass 2: process columns.
783
* We apply the PASS2_BITS scaling, but leave the
784
* results scaled up by an overall factor of 8.
785
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
786
*/
787
788
dataptr = data;
789
for (ctr = 0; ctr < 4; ctr++) {
790
/* Even part */
791
792
/* Add fudge factor here for final descale. */
793
#if PASS2_BITS > 1
794
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS2_BITS-1));
795
#else
796
#if PASS2_BITS > 0
797
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + ONE;
798
#else
799
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
800
#endif
801
#endif
802
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
803
804
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
805
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
806
807
dataptr[DCTSIZE*0] = PASS2_OUTPUT(tmp0 + tmp1);
808
dataptr[DCTSIZE*2] = PASS2_OUTPUT(tmp0 - tmp1);
809
810
/* Odd part */
811
812
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
813
/* Add fudge factor here for final descale. */
814
tmp0 += ONE << (CONST_BITS+PASS2_BITS-1);
815
816
dataptr[DCTSIZE*1] = (DCTELEM)
817
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
818
CONST_BITS+PASS2_BITS);
819
dataptr[DCTSIZE*3] = (DCTELEM)
820
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
821
CONST_BITS+PASS2_BITS);
822
823
dataptr++; /* advance pointer to next column */
824
}
825
}
826
827
828
/*
829
* Perform the forward DCT on a 3x3 sample block.
830
*/
831
832
GLOBAL(void)
833
jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
834
{
835
INT32 tmp0, tmp1, tmp2;
836
DCTELEM *dataptr;
837
JSAMPROW elemptr;
838
int ctr;
839
SHIFT_TEMPS
840
841
/* Pre-zero output coefficient block. */
842
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
843
844
/* Pass 1: process rows.
845
* Note results are scaled up by sqrt(8) compared to a true DCT;
846
* furthermore, we scale the results by 2**PASS1_BITS.
847
* We scale the results further by 2**2 as part of output adaption
848
* scaling for different DCT size.
849
* cK represents sqrt(2) * cos(K*pi/6).
850
*/
851
852
dataptr = data;
853
for (ctr = 0; ctr < 3; ctr++) {
854
elemptr = sample_data[ctr] + start_col;
855
856
/* Even part */
857
858
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
859
tmp1 = GETJSAMPLE(elemptr[1]);
860
861
tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
862
863
/* Apply unsigned->signed conversion. */
864
dataptr[0] = (DCTELEM)
865
((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
866
dataptr[2] = (DCTELEM)
867
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
868
CONST_BITS-PASS1_BITS-2);
869
870
/* Odd part */
871
872
dataptr[1] = (DCTELEM)
873
DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
874
CONST_BITS-PASS1_BITS-2);
875
876
dataptr += DCTSIZE; /* advance pointer to next row */
877
}
878
879
/* Pass 2: process columns.
880
* We apply the PASS2_BITS scaling, but leave the
881
* results scaled up by an overall factor of 8.
882
* We must also scale the output by (8/3)**2 = 64/9, which we partially
883
* fold into the constant multipliers (other part was done in pass 1):
884
* cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
885
*/
886
887
dataptr = data;
888
for (ctr = 0; ctr < 3; ctr++) {
889
/* Even part */
890
891
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
892
tmp1 = dataptr[DCTSIZE*1];
893
894
tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
895
896
dataptr[DCTSIZE*0] = (DCTELEM)
897
DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
898
CONST_BITS+PASS2_BITS);
899
dataptr[DCTSIZE*2] = (DCTELEM)
900
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
901
CONST_BITS+PASS2_BITS);
902
903
/* Odd part */
904
905
dataptr[DCTSIZE*1] = (DCTELEM)
906
DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
907
CONST_BITS+PASS2_BITS);
908
909
dataptr++; /* advance pointer to next column */
910
}
911
}
912
913
914
/*
915
* Perform the forward DCT on a 2x2 sample block.
916
*/
917
918
GLOBAL(void)
919
jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
920
{
921
DCTELEM tmp0, tmp1, tmp2, tmp3;
922
JSAMPROW elemptr;
923
924
/* Pre-zero output coefficient block. */
925
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
926
927
/* Pass 1: process rows.
928
* Note results are scaled up by sqrt(8) compared to a true DCT.
929
*/
930
931
/* Row 0 */
932
elemptr = sample_data[0] + start_col;
933
934
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
935
tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
936
937
/* Row 1 */
938
elemptr = sample_data[1] + start_col;
939
940
tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
941
tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
942
943
/* Pass 2: process columns.
944
* We leave the results scaled up by an overall factor of 8.
945
* We must also scale the output by (8/2)**2 = 2**4.
946
*/
947
948
/* Column 0 */
949
/* Apply unsigned->signed conversion. */
950
951
#if PASS2_BITS < PASS1_BITS + 4
952
data[DCTSIZE*0] =
953
(tmp0 + tmp2 - 4 * CENTERJSAMPLE) << (4+PASS1_BITS-PASS2_BITS);
954
data[DCTSIZE*1] = (tmp0 - tmp2) << (4+PASS1_BITS-PASS2_BITS);
955
956
/* Column 1 */
957
data[DCTSIZE*0+1] = (tmp1 + tmp3) << (4+PASS1_BITS-PASS2_BITS);
958
data[DCTSIZE*1+1] = (tmp1 - tmp3) << (4+PASS1_BITS-PASS2_BITS);
959
#else
960
data[DCTSIZE*0] = tmp0 + tmp2 - 4 * CENTERJSAMPLE;
961
data[DCTSIZE*1] = tmp0 - tmp2;
962
963
/* Column 1 */
964
data[DCTSIZE*0+1] = tmp1 + tmp3;
965
data[DCTSIZE*1+1] = tmp1 - tmp3;
966
#endif
967
}
968
969
970
/*
971
* Perform the forward DCT on a 1x1 sample block.
972
*/
973
974
GLOBAL(void)
975
jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
976
{
977
DCTELEM dcval;
978
979
/* Pre-zero output coefficient block. */
980
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
981
982
dcval = GETJSAMPLE(sample_data[0][start_col]);
983
984
/* We leave the result scaled up by an overall factor of 8. */
985
/* We must also scale the output by (8/1)**2 = 2**6. */
986
/* Apply unsigned->signed conversion. */
987
data[0] = (dcval - CENTERJSAMPLE) << (6+PASS1_BITS-PASS2_BITS);
988
}
989
990
991
/* Pass 1 bits decrement scaling for block sizes 9, 10, 11. */
992
993
#if PASS1_BITS > 0
994
#define PASS1_DECR (PASS1_BITS - 1)
995
#else
996
#define PASS1_DECR 0
997
#endif
998
999
#if PASS1_DECR > 0
1000
#define PASS1_OUTDEC(x) (DCTELEM) ((x) << PASS1_DECR)
1001
#else
1002
#define PASS1_OUTDEC(x) (DCTELEM) (x)
1003
#endif
1004
1005
1006
/*
1007
* Perform the forward DCT on a 9x9 sample block.
1008
*/
1009
1010
GLOBAL(void)
1011
jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1012
{
1013
INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
1014
INT32 tmp10, tmp11, tmp12, tmp13;
1015
INT32 z1, z2;
1016
DCTELEM workspace[8];
1017
DCTELEM *dataptr;
1018
DCTELEM *wsptr;
1019
JSAMPROW elemptr;
1020
int ctr;
1021
SHIFT_TEMPS
1022
1023
/* Pass 1: process rows.
1024
* Note results are scaled up by sqrt(8) compared to a true DCT;
1025
* furthermore, we scale the results by 2**PASS1_DECR.
1026
* cK represents sqrt(2) * cos(K*pi/18).
1027
*/
1028
1029
dataptr = data;
1030
ctr = 0;
1031
for (;;) {
1032
elemptr = sample_data[ctr] + start_col;
1033
1034
/* Even part */
1035
1036
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
1037
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
1038
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
1039
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
1040
tmp4 = GETJSAMPLE(elemptr[4]);
1041
1042
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
1043
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
1044
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
1045
tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
1046
1047
z1 = tmp0 + tmp2 + tmp3;
1048
z2 = tmp1 + tmp4;
1049
/* Apply unsigned->signed conversion. */
1050
dataptr[0] = PASS1_OUTDEC(z1 + z2 - 9 * CENTERJSAMPLE);
1051
dataptr[6] = (DCTELEM)
1052
DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */
1053
CONST_BITS-PASS1_DECR);
1054
z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */
1055
z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
1056
dataptr[2] = (DCTELEM)
1057
DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */
1058
+ z1 + z2, CONST_BITS-PASS1_DECR);
1059
dataptr[4] = (DCTELEM)
1060
DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */
1061
+ z1 - z2, CONST_BITS-PASS1_DECR);
1062
1063
/* Odd part */
1064
1065
dataptr[3] = (DCTELEM)
1066
DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
1067
CONST_BITS-PASS1_DECR);
1068
1069
tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */
1070
tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
1071
tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
1072
1073
dataptr[1] = (DCTELEM)
1074
DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-PASS1_DECR);
1075
1076
tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
1077
1078
dataptr[5] = (DCTELEM)
1079
DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-PASS1_DECR);
1080
dataptr[7] = (DCTELEM)
1081
DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-PASS1_DECR);
1082
1083
ctr++;
1084
1085
if (ctr != DCTSIZE) {
1086
if (ctr == 9)
1087
break; /* Done. */
1088
dataptr += DCTSIZE; /* advance pointer to next row */
1089
} else
1090
dataptr = workspace; /* switch pointer to extended workspace */
1091
}
1092
1093
/* Pass 2: process columns.
1094
* We remove the PASS1_DECR scaling, but leave the results scaled up
1095
* by an overall factor of 8.
1096
* We must also scale the output by (8/9)**2 = 64/81, which we partially
1097
* fold into the constant multipliers and final shifting:
1098
* cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
1099
*/
1100
1101
dataptr = data;
1102
wsptr = workspace;
1103
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1104
/* Even part */
1105
1106
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
1107
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
1108
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
1109
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
1110
tmp4 = dataptr[DCTSIZE*4];
1111
1112
tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
1113
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
1114
tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
1115
tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
1116
1117
z1 = tmp0 + tmp2 + tmp3;
1118
z2 = tmp1 + tmp4;
1119
dataptr[DCTSIZE*0] = (DCTELEM)
1120
DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)), /* 128/81 */
1121
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1122
dataptr[DCTSIZE*6] = (DCTELEM)
1123
DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)), /* c6 */
1124
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1125
z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287)); /* c2 */
1126
z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
1127
dataptr[DCTSIZE*2] = (DCTELEM)
1128
DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190)) /* c4 */
1129
+ z1 + z2, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1130
dataptr[DCTSIZE*4] = (DCTELEM)
1131
DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096)) /* c8 */
1132
+ z1 - z2, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1133
1134
/* Odd part */
1135
1136
dataptr[DCTSIZE*3] = (DCTELEM)
1137
DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
1138
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1139
1140
tmp11 = MULTIPLY(tmp11, FIX(1.935399303)); /* c3 */
1141
tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
1142
tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
1143
1144
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1,
1145
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1146
1147
tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
1148
1149
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2,
1150
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1151
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2,
1152
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1153
1154
dataptr++; /* advance pointer to next column */
1155
wsptr++; /* advance pointer to next column */
1156
}
1157
}
1158
1159
1160
/*
1161
* Perform the forward DCT on a 10x10 sample block.
1162
*/
1163
1164
GLOBAL(void)
1165
jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1166
{
1167
INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
1168
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1169
DCTELEM workspace[8*2];
1170
DCTELEM *dataptr;
1171
DCTELEM *wsptr;
1172
JSAMPROW elemptr;
1173
int ctr;
1174
SHIFT_TEMPS
1175
1176
/* Pass 1: process rows.
1177
* Note results are scaled up by sqrt(8) compared to a true DCT;
1178
* furthermore, we scale the results by 2**PASS1_DECR.
1179
* cK represents sqrt(2) * cos(K*pi/20).
1180
*/
1181
1182
dataptr = data;
1183
ctr = 0;
1184
for (;;) {
1185
elemptr = sample_data[ctr] + start_col;
1186
1187
/* Even part */
1188
1189
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
1190
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
1191
tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
1192
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
1193
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
1194
1195
tmp10 = tmp0 + tmp4;
1196
tmp13 = tmp0 - tmp4;
1197
tmp11 = tmp1 + tmp3;
1198
tmp14 = tmp1 - tmp3;
1199
1200
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
1201
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
1202
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
1203
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
1204
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
1205
1206
/* Apply unsigned->signed conversion. */
1207
dataptr[0] =
1208
PASS1_OUTDEC(tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE);
1209
tmp12 += tmp12;
1210
dataptr[4] = (DCTELEM)
1211
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
1212
MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
1213
CONST_BITS-PASS1_DECR);
1214
tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
1215
dataptr[2] = (DCTELEM)
1216
DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
1217
CONST_BITS-PASS1_DECR);
1218
dataptr[6] = (DCTELEM)
1219
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
1220
CONST_BITS-PASS1_DECR);
1221
1222
/* Odd part */
1223
1224
tmp10 = tmp0 + tmp4;
1225
tmp11 = tmp1 - tmp3;
1226
dataptr[5] = PASS1_OUTDEC(tmp10 - tmp11 - tmp2);
1227
tmp2 <<= CONST_BITS;
1228
dataptr[1] = (DCTELEM)
1229
DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */
1230
MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */
1231
MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */
1232
MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */
1233
CONST_BITS-PASS1_DECR);
1234
tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */
1235
MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */
1236
tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */
1237
(tmp11 << (CONST_BITS - 1)) - tmp2;
1238
dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_DECR);
1239
dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_DECR);
1240
1241
ctr++;
1242
1243
if (ctr != DCTSIZE) {
1244
if (ctr == 10)
1245
break; /* Done. */
1246
dataptr += DCTSIZE; /* advance pointer to next row */
1247
} else
1248
dataptr = workspace; /* switch pointer to extended workspace */
1249
}
1250
1251
/* Pass 2: process columns.
1252
* We remove the PASS1_DECR scaling, but leave the results scaled up
1253
* by an overall factor of 8.
1254
* We must also scale the output by (8/10)**2 = 16/25, which we partially
1255
* fold into the constant multipliers and final shifting:
1256
* cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
1257
*/
1258
1259
dataptr = data;
1260
wsptr = workspace;
1261
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1262
/* Even part */
1263
1264
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
1265
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
1266
tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
1267
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
1268
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
1269
1270
tmp10 = tmp0 + tmp4;
1271
tmp13 = tmp0 - tmp4;
1272
tmp11 = tmp1 + tmp3;
1273
tmp14 = tmp1 - tmp3;
1274
1275
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
1276
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
1277
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
1278
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
1279
tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
1280
1281
dataptr[DCTSIZE*0] = (DCTELEM)
1282
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
1283
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1284
tmp12 += tmp12;
1285
dataptr[DCTSIZE*4] = (DCTELEM)
1286
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
1287
MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */
1288
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1289
tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */
1290
dataptr[DCTSIZE*2] = (DCTELEM)
1291
DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */
1292
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1293
dataptr[DCTSIZE*6] = (DCTELEM)
1294
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */
1295
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1296
1297
/* Odd part */
1298
1299
tmp10 = tmp0 + tmp4;
1300
tmp11 = tmp1 - tmp3;
1301
dataptr[DCTSIZE*5] = (DCTELEM)
1302
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */
1303
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1304
tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */
1305
dataptr[DCTSIZE*1] = (DCTELEM)
1306
DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */
1307
MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */
1308
MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */
1309
MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */
1310
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1311
tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */
1312
MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */
1313
tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */
1314
MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */
1315
dataptr[DCTSIZE*3] = (DCTELEM)
1316
DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1317
dataptr[DCTSIZE*7] = (DCTELEM)
1318
DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1319
1320
dataptr++; /* advance pointer to next column */
1321
wsptr++; /* advance pointer to next column */
1322
}
1323
}
1324
1325
1326
/*
1327
* Perform the forward DCT on an 11x11 sample block.
1328
*/
1329
1330
GLOBAL(void)
1331
jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1332
{
1333
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1334
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1335
INT32 z1, z2, z3;
1336
DCTELEM workspace[8*3];
1337
DCTELEM *dataptr;
1338
DCTELEM *wsptr;
1339
JSAMPROW elemptr;
1340
int ctr;
1341
SHIFT_TEMPS
1342
1343
/* Pass 1: process rows.
1344
* Note results are scaled up by sqrt(8) compared to a true DCT;
1345
* furthermore, we scale the results by 2**PASS1_DECR.
1346
* cK represents sqrt(2) * cos(K*pi/22).
1347
*/
1348
1349
dataptr = data;
1350
ctr = 0;
1351
for (;;) {
1352
elemptr = sample_data[ctr] + start_col;
1353
1354
/* Even part */
1355
1356
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
1357
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
1358
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
1359
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
1360
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
1361
tmp5 = GETJSAMPLE(elemptr[5]);
1362
1363
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
1364
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
1365
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
1366
tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
1367
tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
1368
1369
/* Apply unsigned->signed conversion. */
1370
dataptr[0] =
1371
PASS1_OUTDEC(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE);
1372
tmp5 += tmp5;
1373
tmp0 -= tmp5;
1374
tmp1 -= tmp5;
1375
tmp2 -= tmp5;
1376
tmp3 -= tmp5;
1377
tmp4 -= tmp5;
1378
z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */
1379
MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */
1380
z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */
1381
z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */
1382
dataptr[2] = (DCTELEM)
1383
DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
1384
- MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */
1385
CONST_BITS-PASS1_DECR);
1386
dataptr[4] = (DCTELEM)
1387
DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
1388
- MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */
1389
+ MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */
1390
CONST_BITS-PASS1_DECR);
1391
dataptr[6] = (DCTELEM)
1392
DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
1393
- MULTIPLY(tmp2, FIX(0.788749120)), /* c8+c10 */
1394
CONST_BITS-PASS1_DECR);
1395
1396
/* Odd part */
1397
1398
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905)); /* c3 */
1399
tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298)); /* c5 */
1400
tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576)); /* c7 */
1401
tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
1402
+ MULTIPLY(tmp14, FIX(0.398430003)); /* c9 */
1403
tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576)); /* -c7 */
1404
tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907)); /* -c1 */
1405
tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
1406
- MULTIPLY(tmp14, FIX(1.068791298)); /* c5 */
1407
tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003)); /* c9 */
1408
tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
1409
+ MULTIPLY(tmp14, FIX(1.399818907)); /* c1 */
1410
tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
1411
- MULTIPLY(tmp14, FIX(1.286413905)); /* c3 */
1412
1413
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_DECR);
1414
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_DECR);
1415
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_DECR);
1416
dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_DECR);
1417
1418
ctr++;
1419
1420
if (ctr != DCTSIZE) {
1421
if (ctr == 11)
1422
break; /* Done. */
1423
dataptr += DCTSIZE; /* advance pointer to next row */
1424
} else
1425
dataptr = workspace; /* switch pointer to extended workspace */
1426
}
1427
1428
/* Pass 2: process columns.
1429
* We remove the PASS1_DECR scaling, but leave the results scaled up
1430
* by an overall factor of 8.
1431
* We must also scale the output by (8/11)**2 = 64/121, which we partially
1432
* fold into the constant multipliers and final shifting:
1433
* cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
1434
*/
1435
1436
dataptr = data;
1437
wsptr = workspace;
1438
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1439
/* Even part */
1440
1441
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
1442
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
1443
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
1444
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
1445
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
1446
tmp5 = dataptr[DCTSIZE*5];
1447
1448
tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
1449
tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
1450
tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
1451
tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
1452
tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
1453
1454
dataptr[DCTSIZE*0] = (DCTELEM)
1455
DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
1456
FIX(1.057851240)), /* 128/121 */
1457
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1458
tmp5 += tmp5;
1459
tmp0 -= tmp5;
1460
tmp1 -= tmp5;
1461
tmp2 -= tmp5;
1462
tmp3 -= tmp5;
1463
tmp4 -= tmp5;
1464
z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) + /* c2 */
1465
MULTIPLY(tmp2 + tmp4, FIX(0.212906922)); /* c10 */
1466
z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713)); /* c6 */
1467
z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479)); /* c4 */
1468
dataptr[DCTSIZE*2] = (DCTELEM)
1469
DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
1470
- MULTIPLY(tmp4, FIX(1.471445400)), /* c4+c10 */
1471
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1472
dataptr[DCTSIZE*4] = (DCTELEM)
1473
DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
1474
- MULTIPLY(tmp2, FIX(1.435427942)) /* c2 */
1475
+ MULTIPLY(tmp4, FIX(0.621472312)), /* c8 */
1476
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1477
dataptr[DCTSIZE*6] = (DCTELEM)
1478
DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
1479
- MULTIPLY(tmp2, FIX(0.834379234)), /* c8+c10 */
1480
CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1481
1482
/* Odd part */
1483
1484
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544)); /* c3 */
1485
tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199)); /* c5 */
1486
tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568)); /* c7 */
1487
tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
1488
+ MULTIPLY(tmp14, FIX(0.421479672)); /* c9 */
1489
tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568)); /* -c7 */
1490
tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167)); /* -c1 */
1491
tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
1492
- MULTIPLY(tmp14, FIX(1.130622199)); /* c5 */
1493
tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672)); /* c9 */
1494
tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
1495
+ MULTIPLY(tmp14, FIX(1.480800167)); /* c1 */
1496
tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
1497
- MULTIPLY(tmp14, FIX(1.360834544)); /* c3 */
1498
1499
dataptr[DCTSIZE*1] = (DCTELEM)
1500
DESCALE(tmp0, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1501
dataptr[DCTSIZE*3] = (DCTELEM)
1502
DESCALE(tmp1, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1503
dataptr[DCTSIZE*5] = (DCTELEM)
1504
DESCALE(tmp2, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1505
dataptr[DCTSIZE*7] = (DCTELEM)
1506
DESCALE(tmp3, CONST_BITS+PASS1_DECR+1+PASS2_BITS-PASS1_BITS);
1507
1508
dataptr++; /* advance pointer to next column */
1509
wsptr++; /* advance pointer to next column */
1510
}
1511
}
1512
1513
1514
/*
1515
* Perform the forward DCT on a 12x12 sample block.
1516
*/
1517
1518
GLOBAL(void)
1519
jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1520
{
1521
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1522
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1523
DCTELEM workspace[8*4];
1524
DCTELEM *dataptr;
1525
DCTELEM *wsptr;
1526
JSAMPROW elemptr;
1527
int ctr;
1528
SHIFT_TEMPS
1529
1530
/* Pass 1: process rows.
1531
* Note results are scaled up by sqrt(8) compared to a true DCT.
1532
* cK represents sqrt(2) * cos(K*pi/24).
1533
*/
1534
1535
dataptr = data;
1536
ctr = 0;
1537
for (;;) {
1538
elemptr = sample_data[ctr] + start_col;
1539
1540
/* Even part */
1541
1542
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
1543
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
1544
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
1545
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
1546
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
1547
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
1548
1549
tmp10 = tmp0 + tmp5;
1550
tmp13 = tmp0 - tmp5;
1551
tmp11 = tmp1 + tmp4;
1552
tmp14 = tmp1 - tmp4;
1553
tmp12 = tmp2 + tmp3;
1554
tmp15 = tmp2 - tmp3;
1555
1556
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
1557
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
1558
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
1559
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
1560
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
1561
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
1562
1563
/* Apply unsigned->signed conversion. */
1564
dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
1565
dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
1566
dataptr[4] = (DCTELEM)
1567
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
1568
CONST_BITS);
1569
dataptr[2] = (DCTELEM)
1570
DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
1571
CONST_BITS);
1572
1573
/* Odd part */
1574
1575
tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
1576
tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
1577
tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
1578
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
1579
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
1580
tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
1581
+ MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
1582
tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
1583
tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
1584
+ MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */
1585
tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
1586
- MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */
1587
tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
1588
- MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */
1589
1590
dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
1591
dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
1592
dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
1593
dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
1594
1595
ctr++;
1596
1597
if (ctr != DCTSIZE) {
1598
if (ctr == 12)
1599
break; /* Done. */
1600
dataptr += DCTSIZE; /* advance pointer to next row */
1601
} else
1602
dataptr = workspace; /* switch pointer to extended workspace */
1603
}
1604
1605
/* Pass 2: process columns.
1606
* We leave the results scaled up by an overall factor of 8.
1607
* We must also scale the output by (8/12)**2 = 4/9, which we partially
1608
* fold into the constant multipliers and final shifting:
1609
* cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
1610
*/
1611
1612
dataptr = data;
1613
wsptr = workspace;
1614
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1615
/* Even part */
1616
1617
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
1618
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
1619
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
1620
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
1621
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
1622
tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
1623
1624
tmp10 = tmp0 + tmp5;
1625
tmp13 = tmp0 - tmp5;
1626
tmp11 = tmp1 + tmp4;
1627
tmp14 = tmp1 - tmp4;
1628
tmp12 = tmp2 + tmp3;
1629
tmp15 = tmp2 - tmp3;
1630
1631
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
1632
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
1633
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
1634
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
1635
tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
1636
tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
1637
1638
dataptr[DCTSIZE*0] = (DCTELEM)
1639
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
1640
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1641
dataptr[DCTSIZE*6] = (DCTELEM)
1642
DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
1643
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1644
dataptr[DCTSIZE*4] = (DCTELEM)
1645
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */
1646
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1647
dataptr[DCTSIZE*2] = (DCTELEM)
1648
DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */
1649
MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */
1650
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1651
1652
/* Odd part */
1653
1654
tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */
1655
tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */
1656
tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */
1657
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */
1658
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */
1659
tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
1660
+ MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */
1661
tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
1662
tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
1663
+ MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */
1664
tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
1665
- MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */
1666
tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
1667
- MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
1668
1669
dataptr[DCTSIZE*1] = (DCTELEM)
1670
DESCALE(tmp10, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1671
dataptr[DCTSIZE*3] = (DCTELEM)
1672
DESCALE(tmp11, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1673
dataptr[DCTSIZE*5] = (DCTELEM)
1674
DESCALE(tmp12, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1675
dataptr[DCTSIZE*7] = (DCTELEM)
1676
DESCALE(tmp13, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1677
1678
dataptr++; /* advance pointer to next column */
1679
wsptr++; /* advance pointer to next column */
1680
}
1681
}
1682
1683
1684
/*
1685
* Perform the forward DCT on a 13x13 sample block.
1686
*/
1687
1688
GLOBAL(void)
1689
jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1690
{
1691
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1692
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1693
INT32 z1, z2;
1694
DCTELEM workspace[8*5];
1695
DCTELEM *dataptr;
1696
DCTELEM *wsptr;
1697
JSAMPROW elemptr;
1698
int ctr;
1699
SHIFT_TEMPS
1700
1701
/* Pass 1: process rows.
1702
* Note results are scaled up by sqrt(8) compared to a true DCT.
1703
* cK represents sqrt(2) * cos(K*pi/26).
1704
*/
1705
1706
dataptr = data;
1707
ctr = 0;
1708
for (;;) {
1709
elemptr = sample_data[ctr] + start_col;
1710
1711
/* Even part */
1712
1713
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
1714
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
1715
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
1716
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
1717
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
1718
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
1719
tmp6 = GETJSAMPLE(elemptr[6]);
1720
1721
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
1722
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
1723
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
1724
tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
1725
tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
1726
tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
1727
1728
/* Apply unsigned->signed conversion. */
1729
dataptr[0] = (DCTELEM)
1730
(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
1731
tmp6 += tmp6;
1732
tmp0 -= tmp6;
1733
tmp1 -= tmp6;
1734
tmp2 -= tmp6;
1735
tmp3 -= tmp6;
1736
tmp4 -= tmp6;
1737
tmp5 -= tmp6;
1738
dataptr[2] = (DCTELEM)
1739
DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */
1740
MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */
1741
MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */
1742
MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */
1743
MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */
1744
MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */
1745
CONST_BITS);
1746
z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
1747
MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
1748
MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */
1749
z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
1750
MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
1751
MULTIPLY(tmp1 + tmp5, FIX(0.486914739)); /* (c8+c12)/2 */
1752
1753
dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
1754
dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
1755
1756
/* Odd part */
1757
1758
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651)); /* c3 */
1759
tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945)); /* c5 */
1760
tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) + /* c7 */
1761
MULTIPLY(tmp14 + tmp15, FIX(0.338443458)); /* c11 */
1762
tmp0 = tmp1 + tmp2 + tmp3 -
1763
MULTIPLY(tmp10, FIX(2.020082300)) + /* c3+c5+c7-c1 */
1764
MULTIPLY(tmp14, FIX(0.318774355)); /* c9-c11 */
1765
tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) - /* c7 */
1766
MULTIPLY(tmp11 + tmp12, FIX(0.338443458)); /* c11 */
1767
tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
1768
tmp1 += tmp4 + tmp5 +
1769
MULTIPLY(tmp11, FIX(0.837223564)) - /* c5+c9+c11-c3 */
1770
MULTIPLY(tmp14, FIX(2.341699410)); /* c1+c7 */
1771
tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
1772
tmp2 += tmp4 + tmp6 -
1773
MULTIPLY(tmp12, FIX(1.572116027)) + /* c1+c5-c9-c11 */
1774
MULTIPLY(tmp15, FIX(2.260109708)); /* c3+c7 */
1775
tmp3 += tmp5 + tmp6 +
1776
MULTIPLY(tmp13, FIX(2.205608352)) - /* c3+c5+c9-c7 */
1777
MULTIPLY(tmp15, FIX(1.742345811)); /* c1+c11 */
1778
1779
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
1780
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
1781
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
1782
dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
1783
1784
ctr++;
1785
1786
if (ctr != DCTSIZE) {
1787
if (ctr == 13)
1788
break; /* Done. */
1789
dataptr += DCTSIZE; /* advance pointer to next row */
1790
} else
1791
dataptr = workspace; /* switch pointer to extended workspace */
1792
}
1793
1794
/* Pass 2: process columns.
1795
* We leave the results scaled up by an overall factor of 8.
1796
* We must also scale the output by (8/13)**2 = 64/169, which we partially
1797
* fold into the constant multipliers and final shifting:
1798
* cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
1799
*/
1800
1801
dataptr = data;
1802
wsptr = workspace;
1803
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1804
/* Even part */
1805
1806
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
1807
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
1808
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
1809
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
1810
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
1811
tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
1812
tmp6 = dataptr[DCTSIZE*6];
1813
1814
tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
1815
tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
1816
tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
1817
tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
1818
tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
1819
tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
1820
1821
dataptr[DCTSIZE*0] = (DCTELEM)
1822
DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
1823
FIX(0.757396450)), /* 128/169 */
1824
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1825
tmp6 += tmp6;
1826
tmp0 -= tmp6;
1827
tmp1 -= tmp6;
1828
tmp2 -= tmp6;
1829
tmp3 -= tmp6;
1830
tmp4 -= tmp6;
1831
tmp5 -= tmp6;
1832
dataptr[DCTSIZE*2] = (DCTELEM)
1833
DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) + /* c2 */
1834
MULTIPLY(tmp1, FIX(0.801745081)) + /* c6 */
1835
MULTIPLY(tmp2, FIX(0.379824504)) - /* c10 */
1836
MULTIPLY(tmp3, FIX(0.129109289)) - /* c12 */
1837
MULTIPLY(tmp4, FIX(0.608465700)) - /* c8 */
1838
MULTIPLY(tmp5, FIX(0.948429952)), /* c4 */
1839
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1840
z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
1841
MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
1842
MULTIPLY(tmp1 - tmp5, FIX(0.239678205)); /* (c8-c12)/2 */
1843
z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
1844
MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
1845
MULTIPLY(tmp1 + tmp5, FIX(0.368787494)); /* (c8+c12)/2 */
1846
1847
dataptr[DCTSIZE*4] = (DCTELEM)
1848
DESCALE(z1 + z2, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1849
dataptr[DCTSIZE*6] = (DCTELEM)
1850
DESCALE(z1 - z2, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1851
1852
/* Odd part */
1853
1854
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908)); /* c3 */
1855
tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751)); /* c5 */
1856
tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) + /* c7 */
1857
MULTIPLY(tmp14 + tmp15, FIX(0.256335874)); /* c11 */
1858
tmp0 = tmp1 + tmp2 + tmp3 -
1859
MULTIPLY(tmp10, FIX(1.530003162)) + /* c3+c5+c7-c1 */
1860
MULTIPLY(tmp14, FIX(0.241438564)); /* c9-c11 */
1861
tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) - /* c7 */
1862
MULTIPLY(tmp11 + tmp12, FIX(0.256335874)); /* c11 */
1863
tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
1864
tmp1 += tmp4 + tmp5 +
1865
MULTIPLY(tmp11, FIX(0.634110155)) - /* c5+c9+c11-c3 */
1866
MULTIPLY(tmp14, FIX(1.773594819)); /* c1+c7 */
1867
tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
1868
tmp2 += tmp4 + tmp6 -
1869
MULTIPLY(tmp12, FIX(1.190715098)) + /* c1+c5-c9-c11 */
1870
MULTIPLY(tmp15, FIX(1.711799069)); /* c3+c7 */
1871
tmp3 += tmp5 + tmp6 +
1872
MULTIPLY(tmp13, FIX(1.670519935)) - /* c3+c5+c9-c7 */
1873
MULTIPLY(tmp15, FIX(1.319646532)); /* c1+c11 */
1874
1875
dataptr[DCTSIZE*1] = (DCTELEM)
1876
DESCALE(tmp0, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1877
dataptr[DCTSIZE*3] = (DCTELEM)
1878
DESCALE(tmp1, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1879
dataptr[DCTSIZE*5] = (DCTELEM)
1880
DESCALE(tmp2, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1881
dataptr[DCTSIZE*7] = (DCTELEM)
1882
DESCALE(tmp3, CONST_BITS+1+PASS2_BITS-PASS1_BITS);
1883
1884
dataptr++; /* advance pointer to next column */
1885
wsptr++; /* advance pointer to next column */
1886
}
1887
}
1888
1889
1890
/*
1891
* Perform the forward DCT on a 14x14 sample block.
1892
*/
1893
1894
GLOBAL(void)
1895
jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1896
{
1897
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1898
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1899
DCTELEM workspace[8*6];
1900
DCTELEM *dataptr;
1901
DCTELEM *wsptr;
1902
JSAMPROW elemptr;
1903
int ctr;
1904
SHIFT_TEMPS
1905
1906
/* Pass 1: process rows.
1907
* Note results are scaled up by sqrt(8) compared to a true DCT.
1908
* cK represents sqrt(2) * cos(K*pi/28).
1909
*/
1910
1911
dataptr = data;
1912
ctr = 0;
1913
for (;;) {
1914
elemptr = sample_data[ctr] + start_col;
1915
1916
/* Even part */
1917
1918
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
1919
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
1920
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
1921
tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
1922
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
1923
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
1924
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
1925
1926
tmp10 = tmp0 + tmp6;
1927
tmp14 = tmp0 - tmp6;
1928
tmp11 = tmp1 + tmp5;
1929
tmp15 = tmp1 - tmp5;
1930
tmp12 = tmp2 + tmp4;
1931
tmp16 = tmp2 - tmp4;
1932
1933
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
1934
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
1935
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
1936
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
1937
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
1938
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
1939
tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
1940
1941
/* Apply unsigned->signed conversion. */
1942
dataptr[0] = (DCTELEM)
1943
(tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
1944
tmp13 += tmp13;
1945
dataptr[4] = (DCTELEM)
1946
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
1947
MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
1948
MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
1949
CONST_BITS);
1950
1951
tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
1952
1953
dataptr[2] = (DCTELEM)
1954
DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
1955
+ MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
1956
CONST_BITS);
1957
dataptr[6] = (DCTELEM)
1958
DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
1959
- MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
1960
CONST_BITS);
1961
1962
/* Odd part */
1963
1964
tmp10 = tmp1 + tmp2;
1965
tmp11 = tmp5 - tmp4;
1966
dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
1967
tmp3 <<= CONST_BITS;
1968
tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */
1969
tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */
1970
tmp10 += tmp11 - tmp3;
1971
tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */
1972
MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */
1973
dataptr[5] = (DCTELEM)
1974
DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
1975
+ MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */
1976
CONST_BITS);
1977
tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */
1978
MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */
1979
dataptr[3] = (DCTELEM)
1980
DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
1981
- MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */
1982
CONST_BITS);
1983
dataptr[1] = (DCTELEM)
1984
DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
1985
MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */
1986
CONST_BITS);
1987
1988
ctr++;
1989
1990
if (ctr != DCTSIZE) {
1991
if (ctr == 14)
1992
break; /* Done. */
1993
dataptr += DCTSIZE; /* advance pointer to next row */
1994
} else
1995
dataptr = workspace; /* switch pointer to extended workspace */
1996
}
1997
1998
/* Pass 2: process columns.
1999
* We leave the results scaled up by an overall factor of 8.
2000
* We must also scale the output by (8/14)**2 = 16/49, which we partially
2001
* fold into the constant multipliers and final shifting:
2002
* cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
2003
*/
2004
2005
dataptr = data;
2006
wsptr = workspace;
2007
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2008
/* Even part */
2009
2010
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
2011
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
2012
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
2013
tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
2014
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
2015
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
2016
tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
2017
2018
tmp10 = tmp0 + tmp6;
2019
tmp14 = tmp0 - tmp6;
2020
tmp11 = tmp1 + tmp5;
2021
tmp15 = tmp1 - tmp5;
2022
tmp12 = tmp2 + tmp4;
2023
tmp16 = tmp2 - tmp4;
2024
2025
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
2026
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
2027
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
2028
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
2029
tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
2030
tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
2031
tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
2032
2033
dataptr[DCTSIZE*0] = (DCTELEM)
2034
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
2035
FIX(0.653061224)), /* 32/49 */
2036
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2037
tmp13 += tmp13;
2038
dataptr[DCTSIZE*4] = (DCTELEM)
2039
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
2040
MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
2041
MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */
2042
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2043
2044
tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */
2045
2046
dataptr[DCTSIZE*2] = (DCTELEM)
2047
DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */
2048
+ MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */
2049
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2050
dataptr[DCTSIZE*6] = (DCTELEM)
2051
DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */
2052
- MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */
2053
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2054
2055
/* Odd part */
2056
2057
tmp10 = tmp1 + tmp2;
2058
tmp11 = tmp5 - tmp4;
2059
dataptr[DCTSIZE*7] = (DCTELEM)
2060
DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
2061
FIX(0.653061224)), /* 32/49 */
2062
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2063
tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */
2064
tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */
2065
tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */
2066
tmp10 += tmp11 - tmp3;
2067
tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */
2068
MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */
2069
dataptr[DCTSIZE*5] = (DCTELEM)
2070
DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
2071
+ MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */
2072
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2073
tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */
2074
MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */
2075
dataptr[DCTSIZE*3] = (DCTELEM)
2076
DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
2077
- MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */
2078
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2079
dataptr[DCTSIZE*1] = (DCTELEM)
2080
DESCALE(tmp11 + tmp12 + tmp3
2081
- MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */
2082
- MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */
2083
CONST_BITS+1+PASS2_BITS-PASS1_BITS);
2084
2085
dataptr++; /* advance pointer to next column */
2086
wsptr++; /* advance pointer to next column */
2087
}
2088
}
2089
2090
2091
/*
2092
* Perform the forward DCT on a 15x15 sample block.
2093
*/
2094
2095
GLOBAL(void)
2096
jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2097
{
2098
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2099
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2100
INT32 z1, z2, z3;
2101
DCTELEM workspace[8*7];
2102
DCTELEM *dataptr;
2103
DCTELEM *wsptr;
2104
JSAMPROW elemptr;
2105
int ctr;
2106
SHIFT_TEMPS
2107
2108
/* Pass 1: process rows.
2109
* Note results are scaled up by sqrt(8) compared to a true DCT.
2110
* cK represents sqrt(2) * cos(K*pi/30).
2111
*/
2112
2113
dataptr = data;
2114
ctr = 0;
2115
for (;;) {
2116
elemptr = sample_data[ctr] + start_col;
2117
2118
/* Even part */
2119
2120
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
2121
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
2122
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
2123
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
2124
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
2125
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
2126
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
2127
tmp7 = GETJSAMPLE(elemptr[7]);
2128
2129
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
2130
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
2131
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
2132
tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
2133
tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
2134
tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
2135
tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
2136
2137
z1 = tmp0 + tmp4 + tmp5;
2138
z2 = tmp1 + tmp3 + tmp6;
2139
z3 = tmp2 + tmp7;
2140
/* Apply unsigned->signed conversion. */
2141
dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
2142
z3 += z3;
2143
dataptr[6] = (DCTELEM)
2144
DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
2145
MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */
2146
CONST_BITS);
2147
tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2148
z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */
2149
MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */
2150
z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */
2151
MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */
2152
z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */
2153
MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */
2154
MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */
2155
2156
dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
2157
dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
2158
2159
/* Odd part */
2160
2161
tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
2162
FIX(1.224744871)); /* c5 */
2163
tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
2164
MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876)); /* c9 */
2165
tmp12 = MULTIPLY(tmp12, FIX(1.224744871)); /* c5 */
2166
tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) + /* c1 */
2167
MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) + /* c3 */
2168
MULTIPLY(tmp13 + tmp15, FIX(0.575212477)); /* c11 */
2169
tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) - /* c7-c11 */
2170
MULTIPLY(tmp14, FIX(0.513743148)) + /* c3-c9 */
2171
MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12; /* c1+c13 */
2172
tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) - /* -(c1-c7) */
2173
MULTIPLY(tmp11, FIX(2.176250899)) - /* c3+c9 */
2174
MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12; /* c11+c13 */
2175
2176
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
2177
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
2178
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
2179
dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
2180
2181
ctr++;
2182
2183
if (ctr != DCTSIZE) {
2184
if (ctr == 15)
2185
break; /* Done. */
2186
dataptr += DCTSIZE; /* advance pointer to next row */
2187
} else
2188
dataptr = workspace; /* switch pointer to extended workspace */
2189
}
2190
2191
/* Pass 2: process columns.
2192
* We leave the results scaled up by an overall factor of 8.
2193
* We must also scale the output by (8/15)**2 = 64/225, which we partially
2194
* fold into the constant multipliers and final shifting:
2195
* cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
2196
*/
2197
2198
dataptr = data;
2199
wsptr = workspace;
2200
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2201
/* Even part */
2202
2203
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
2204
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
2205
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
2206
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
2207
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
2208
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
2209
tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
2210
tmp7 = dataptr[DCTSIZE*7];
2211
2212
tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
2213
tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
2214
tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
2215
tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
2216
tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
2217
tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
2218
tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
2219
2220
z1 = tmp0 + tmp4 + tmp5;
2221
z2 = tmp1 + tmp3 + tmp6;
2222
z3 = tmp2 + tmp7;
2223
dataptr[DCTSIZE*0] = (DCTELEM)
2224
DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
2225
CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2226
z3 += z3;
2227
dataptr[DCTSIZE*6] = (DCTELEM)
2228
DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
2229
MULTIPLY(z2 - z3, FIX(0.497227121)), /* c12 */
2230
CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2231
tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2232
z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) - /* c2+c14 */
2233
MULTIPLY(tmp6 - tmp2, FIX(2.546621957)); /* c4+c8 */
2234
z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) - /* c8-c14 */
2235
MULTIPLY(tmp0 - tmp2, FIX(0.103948774)); /* c2-c4 */
2236
z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) + /* c2 */
2237
MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) + /* c8 */
2238
MULTIPLY(tmp1 - tmp4, FIX(0.899492312)); /* (c6+c12)/2 */
2239
2240
dataptr[DCTSIZE*2] = (DCTELEM)
2241
DESCALE(z1 + z3, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2242
dataptr[DCTSIZE*4] = (DCTELEM)
2243
DESCALE(z2 + z3, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2244
2245
/* Odd part */
2246
2247
tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
2248
FIX(1.393487498)); /* c5 */
2249
tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
2250
MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187)); /* c9 */
2251
tmp12 = MULTIPLY(tmp12, FIX(1.393487498)); /* c5 */
2252
tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) + /* c1 */
2253
MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) + /* c3 */
2254
MULTIPLY(tmp13 + tmp15, FIX(0.654463974)); /* c11 */
2255
tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) - /* c7-c11 */
2256
MULTIPLY(tmp14, FIX(0.584525538)) + /* c3-c9 */
2257
MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12; /* c1+c13 */
2258
tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) - /* -(c1-c7) */
2259
MULTIPLY(tmp11, FIX(2.476089912)) - /* c3+c9 */
2260
MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12; /* c11+c13 */
2261
2262
dataptr[DCTSIZE*1] = (DCTELEM)
2263
DESCALE(tmp0, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2264
dataptr[DCTSIZE*3] = (DCTELEM)
2265
DESCALE(tmp1, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2266
dataptr[DCTSIZE*5] = (DCTELEM)
2267
DESCALE(tmp2, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2268
dataptr[DCTSIZE*7] = (DCTELEM)
2269
DESCALE(tmp3, CONST_BITS+2+PASS2_BITS-PASS1_BITS);
2270
2271
dataptr++; /* advance pointer to next column */
2272
wsptr++; /* advance pointer to next column */
2273
}
2274
}
2275
2276
2277
/*
2278
* Perform the forward DCT on a 16x16 sample block.
2279
*/
2280
2281
GLOBAL(void)
2282
jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2283
{
2284
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2285
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2286
DCTELEM workspace[DCTSIZE2];
2287
DCTELEM *dataptr;
2288
DCTELEM *wsptr;
2289
JSAMPROW elemptr;
2290
int ctr;
2291
SHIFT_TEMPS
2292
2293
/* Pass 1: process rows.
2294
* Note results are scaled up by sqrt(8) compared to a true DCT;
2295
* furthermore, we scale the results by 2**PASS1_BITS.
2296
* cK represents sqrt(2) * cos(K*pi/32).
2297
*/
2298
2299
dataptr = data;
2300
ctr = 0;
2301
for (;;) {
2302
elemptr = sample_data[ctr] + start_col;
2303
2304
/* Even part */
2305
2306
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2307
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2308
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2309
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2310
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2311
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2312
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2313
tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2314
2315
tmp10 = tmp0 + tmp7;
2316
tmp14 = tmp0 - tmp7;
2317
tmp11 = tmp1 + tmp6;
2318
tmp15 = tmp1 - tmp6;
2319
tmp12 = tmp2 + tmp5;
2320
tmp16 = tmp2 - tmp5;
2321
tmp13 = tmp3 + tmp4;
2322
tmp17 = tmp3 - tmp4;
2323
2324
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2325
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2326
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2327
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2328
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2329
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2330
tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2331
tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2332
2333
/* Apply unsigned->signed conversion. */
2334
dataptr[0] =
2335
PASS1_OUTPUT(tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE);
2336
dataptr[4] = (DCTELEM)
2337
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2338
MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2339
CONST_BITS-PASS1_BITS);
2340
2341
tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2342
MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2343
2344
dataptr[2] = (DCTELEM)
2345
DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2346
+ MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2347
CONST_BITS-PASS1_BITS);
2348
dataptr[6] = (DCTELEM)
2349
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2350
- MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2351
CONST_BITS-PASS1_BITS);
2352
2353
/* Odd part */
2354
2355
tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
2356
MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
2357
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
2358
MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
2359
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
2360
MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
2361
tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
2362
MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
2363
tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
2364
MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
2365
tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
2366
MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
2367
tmp10 = tmp11 + tmp12 + tmp13 -
2368
MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
2369
MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2370
tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2371
- MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2372
tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2373
+ MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2374
tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2375
+ MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2376
2377
dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2378
dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2379
dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2380
dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2381
2382
ctr++;
2383
2384
if (ctr != DCTSIZE) {
2385
if (ctr == DCTSIZE * 2)
2386
break; /* Done. */
2387
dataptr += DCTSIZE; /* advance pointer to next row */
2388
} else
2389
dataptr = workspace; /* switch pointer to extended workspace */
2390
}
2391
2392
/* Pass 2: process columns.
2393
* We apply the PASS2_BITS scaling, but leave the
2394
* results scaled up by an overall factor of 8.
2395
* We must also scale the output by (8/16)**2 = 1/2**2.
2396
* cK represents sqrt(2) * cos(K*pi/32).
2397
*/
2398
2399
dataptr = data;
2400
wsptr = workspace;
2401
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2402
/* Even part */
2403
2404
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
2405
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
2406
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
2407
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
2408
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
2409
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
2410
tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
2411
tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
2412
2413
tmp10 = tmp0 + tmp7;
2414
tmp14 = tmp0 - tmp7;
2415
tmp11 = tmp1 + tmp6;
2416
tmp15 = tmp1 - tmp6;
2417
tmp12 = tmp2 + tmp5;
2418
tmp16 = tmp2 - tmp5;
2419
tmp13 = tmp3 + tmp4;
2420
tmp17 = tmp3 - tmp4;
2421
2422
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
2423
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
2424
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
2425
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
2426
tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
2427
tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
2428
tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
2429
tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
2430
2431
dataptr[DCTSIZE*0] = (DCTELEM)
2432
DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS2_BITS+2);
2433
dataptr[DCTSIZE*4] = (DCTELEM)
2434
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2435
MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2436
CONST_BITS+PASS2_BITS+2);
2437
2438
tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2439
MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2440
2441
dataptr[DCTSIZE*2] = (DCTELEM)
2442
DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2443
+ MULTIPLY(tmp16, FIX(2.172734804)), /* c2+10 */
2444
CONST_BITS+PASS2_BITS+2);
2445
dataptr[DCTSIZE*6] = (DCTELEM)
2446
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2447
- MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2448
CONST_BITS+PASS2_BITS+2);
2449
2450
/* Odd part */
2451
2452
tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
2453
MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
2454
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
2455
MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
2456
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
2457
MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
2458
tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
2459
MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
2460
tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
2461
MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
2462
tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
2463
MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
2464
tmp10 = tmp11 + tmp12 + tmp13 -
2465
MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
2466
MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2467
tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2468
- MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2469
tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2470
+ MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2471
tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2472
+ MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2473
2474
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS2_BITS+2);
2475
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS2_BITS+2);
2476
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS2_BITS+2);
2477
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS2_BITS+2);
2478
2479
dataptr++; /* advance pointer to next column */
2480
wsptr++; /* advance pointer to next column */
2481
}
2482
}
2483
2484
2485
/*
2486
* Perform the forward DCT on a 16x8 sample block.
2487
*
2488
* 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
2489
*/
2490
2491
GLOBAL(void)
2492
jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2493
{
2494
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2495
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2496
INT32 z1;
2497
DCTELEM *dataptr;
2498
JSAMPROW elemptr;
2499
int ctr;
2500
SHIFT_TEMPS
2501
2502
/* Pass 1: process rows.
2503
* Note results are scaled up by sqrt(8) compared to a true DCT;
2504
* furthermore, we scale the results by 2**PASS1_BITS.
2505
* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2506
*/
2507
2508
dataptr = data;
2509
ctr = 0;
2510
for (ctr = 0; ctr < DCTSIZE; ctr++) {
2511
elemptr = sample_data[ctr] + start_col;
2512
2513
/* Even part */
2514
2515
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2516
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2517
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2518
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2519
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2520
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2521
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2522
tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2523
2524
tmp10 = tmp0 + tmp7;
2525
tmp14 = tmp0 - tmp7;
2526
tmp11 = tmp1 + tmp6;
2527
tmp15 = tmp1 - tmp6;
2528
tmp12 = tmp2 + tmp5;
2529
tmp16 = tmp2 - tmp5;
2530
tmp13 = tmp3 + tmp4;
2531
tmp17 = tmp3 - tmp4;
2532
2533
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2534
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2535
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2536
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2537
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2538
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2539
tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2540
tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2541
2542
/* Apply unsigned->signed conversion. */
2543
dataptr[0] =
2544
PASS1_OUTPUT(tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE);
2545
dataptr[4] = (DCTELEM)
2546
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2547
MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2548
CONST_BITS-PASS1_BITS);
2549
2550
tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2551
MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2552
2553
dataptr[2] = (DCTELEM)
2554
DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2555
+ MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2556
CONST_BITS-PASS1_BITS);
2557
dataptr[6] = (DCTELEM)
2558
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2559
- MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2560
CONST_BITS-PASS1_BITS);
2561
2562
/* Odd part */
2563
2564
tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
2565
MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
2566
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
2567
MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
2568
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
2569
MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
2570
tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
2571
MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
2572
tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
2573
MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
2574
tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
2575
MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
2576
tmp10 = tmp11 + tmp12 + tmp13 -
2577
MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
2578
MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2579
tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2580
- MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2581
tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2582
+ MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2583
tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2584
+ MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2585
2586
dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2587
dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2588
dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2589
dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2590
2591
dataptr += DCTSIZE; /* advance pointer to next row */
2592
}
2593
2594
/* Pass 2: process columns.
2595
* We apply the PASS2_BITS scaling, but leave the
2596
* results scaled up by an overall factor of 8.
2597
* We must also scale the output by 8/16 = 1/2.
2598
* 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2599
*/
2600
2601
dataptr = data;
2602
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2603
/* Even part per LL&M figure 1 --- note that published figure is faulty;
2604
* rotator "c1" should be "c6".
2605
*/
2606
2607
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
2608
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
2609
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
2610
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
2611
2612
/* Add fudge factor here for final descale. */
2613
#if PASS2_BITS > 0
2614
tmp10 = tmp0 + tmp3 + (ONE << PASS2_BITS);
2615
#else
2616
tmp10 = tmp0 + tmp3 + ONE;
2617
#endif
2618
tmp12 = tmp0 - tmp3;
2619
tmp11 = tmp1 + tmp2;
2620
tmp13 = tmp1 - tmp2;
2621
2622
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
2623
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
2624
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
2625
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
2626
2627
dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS2_BITS+1);
2628
dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS2_BITS+1);
2629
2630
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
2631
/* Add fudge factor here for final descale. */
2632
z1 += ONE << (CONST_BITS+PASS2_BITS);
2633
2634
dataptr[DCTSIZE*2] = (DCTELEM)
2635
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
2636
CONST_BITS+PASS2_BITS+1);
2637
dataptr[DCTSIZE*6] = (DCTELEM)
2638
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
2639
CONST_BITS+PASS2_BITS+1);
2640
2641
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
2642
* i0..i3 in the paper are tmp0..tmp3 here.
2643
*/
2644
2645
tmp12 = tmp0 + tmp2;
2646
tmp13 = tmp1 + tmp3;
2647
2648
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
2649
/* Add fudge factor here for final descale. */
2650
z1 += ONE << (CONST_BITS+PASS2_BITS);
2651
2652
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
2653
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
2654
tmp12 += z1;
2655
tmp13 += z1;
2656
2657
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2658
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
2659
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
2660
tmp0 += z1 + tmp12;
2661
tmp3 += z1 + tmp13;
2662
2663
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2664
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
2665
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
2666
tmp1 += z1 + tmp13;
2667
tmp2 += z1 + tmp12;
2668
2669
dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS2_BITS+1);
2670
dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS2_BITS+1);
2671
dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS2_BITS+1);
2672
dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS2_BITS+1);
2673
2674
dataptr++; /* advance pointer to next column */
2675
}
2676
}
2677
2678
2679
/*
2680
* Perform the forward DCT on a 14x7 sample block.
2681
*
2682
* 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
2683
*/
2684
2685
GLOBAL(void)
2686
jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2687
{
2688
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
2689
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2690
INT32 z1, z2, z3;
2691
DCTELEM *dataptr;
2692
JSAMPROW elemptr;
2693
int ctr;
2694
SHIFT_TEMPS
2695
2696
/* Zero bottom row of output coefficient block. */
2697
MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
2698
2699
/* Pass 1: process rows.
2700
* Note results are scaled up by sqrt(8) compared to a true DCT;
2701
* furthermore, we scale the results by 2**PASS1_BITS.
2702
* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
2703
*/
2704
2705
dataptr = data;
2706
for (ctr = 0; ctr < 7; ctr++) {
2707
elemptr = sample_data[ctr] + start_col;
2708
2709
/* Even part */
2710
2711
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
2712
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
2713
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
2714
tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
2715
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
2716
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
2717
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
2718
2719
tmp10 = tmp0 + tmp6;
2720
tmp14 = tmp0 - tmp6;
2721
tmp11 = tmp1 + tmp5;
2722
tmp15 = tmp1 - tmp5;
2723
tmp12 = tmp2 + tmp4;
2724
tmp16 = tmp2 - tmp4;
2725
2726
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
2727
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
2728
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
2729
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
2730
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
2731
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
2732
tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
2733
2734
/* Apply unsigned->signed conversion. */
2735
dataptr[0] =
2736
PASS1_OUTPUT(tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
2737
tmp13 += tmp13;
2738
dataptr[4] = (DCTELEM)
2739
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
2740
MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
2741
MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
2742
CONST_BITS-PASS1_BITS);
2743
2744
tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
2745
2746
dataptr[2] = (DCTELEM)
2747
DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
2748
+ MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
2749
CONST_BITS-PASS1_BITS);
2750
dataptr[6] = (DCTELEM)
2751
DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
2752
- MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
2753
CONST_BITS-PASS1_BITS);
2754
2755
/* Odd part */
2756
2757
tmp10 = tmp1 + tmp2;
2758
tmp11 = tmp5 - tmp4;
2759
dataptr[7] = PASS1_OUTPUT(tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
2760
tmp3 <<= CONST_BITS;
2761
tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */
2762
tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */
2763
tmp10 += tmp11 - tmp3;
2764
tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */
2765
MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */
2766
dataptr[5] = (DCTELEM)
2767
DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
2768
+ MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */
2769
CONST_BITS-PASS1_BITS);
2770
tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */
2771
MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */
2772
dataptr[3] = (DCTELEM)
2773
DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
2774
- MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */
2775
CONST_BITS-PASS1_BITS);
2776
dataptr[1] = (DCTELEM)
2777
DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
2778
MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */
2779
CONST_BITS-PASS1_BITS);
2780
2781
dataptr += DCTSIZE; /* advance pointer to next row */
2782
}
2783
2784
/* Pass 2: process columns.
2785
* We apply the PASS2_BITS scaling, but leave the
2786
* results scaled up by an overall factor of 8.
2787
* We must also scale the output by (8/14)*(8/7) = 32/49, which we
2788
* partially fold into the constant multipliers and final shifting:
2789
* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
2790
*/
2791
2792
dataptr = data;
2793
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2794
/* Even part */
2795
2796
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
2797
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
2798
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
2799
tmp3 = dataptr[DCTSIZE*3];
2800
2801
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
2802
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
2803
tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
2804
2805
z1 = tmp0 + tmp2;
2806
dataptr[DCTSIZE*0] = (DCTELEM)
2807
DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
2808
CONST_BITS+PASS2_BITS+1);
2809
tmp3 += tmp3;
2810
z1 -= tmp3;
2811
z1 -= tmp3;
2812
z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */
2813
z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */
2814
z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */
2815
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS2_BITS+1);
2816
z1 -= z2;
2817
z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */
2818
dataptr[DCTSIZE*4] = (DCTELEM)
2819
DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
2820
CONST_BITS+PASS2_BITS+1);
2821
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS2_BITS+1);
2822
2823
/* Odd part */
2824
2825
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */
2826
tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */
2827
tmp0 = tmp1 - tmp2;
2828
tmp1 += tmp2;
2829
tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
2830
tmp1 += tmp2;
2831
tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */
2832
tmp0 += tmp3;
2833
tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */
2834
2835
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS2_BITS+1);
2836
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS2_BITS+1);
2837
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS2_BITS+1);
2838
2839
dataptr++; /* advance pointer to next column */
2840
}
2841
}
2842
2843
2844
/*
2845
* Perform the forward DCT on a 12x6 sample block.
2846
*
2847
* 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
2848
*/
2849
2850
GLOBAL(void)
2851
jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2852
{
2853
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
2854
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
2855
DCTELEM *dataptr;
2856
JSAMPROW elemptr;
2857
int ctr;
2858
SHIFT_TEMPS
2859
2860
/* Zero 2 bottom rows of output coefficient block. */
2861
MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
2862
2863
/* Pass 1: process rows.
2864
* Note results are scaled up by sqrt(8) compared to a true DCT;
2865
* furthermore, we scale the results by 2**PASS1_BITS.
2866
* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
2867
*/
2868
2869
dataptr = data;
2870
for (ctr = 0; ctr < 6; ctr++) {
2871
elemptr = sample_data[ctr] + start_col;
2872
2873
/* Even part */
2874
2875
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
2876
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
2877
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
2878
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
2879
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
2880
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
2881
2882
tmp10 = tmp0 + tmp5;
2883
tmp13 = tmp0 - tmp5;
2884
tmp11 = tmp1 + tmp4;
2885
tmp14 = tmp1 - tmp4;
2886
tmp12 = tmp2 + tmp3;
2887
tmp15 = tmp2 - tmp3;
2888
2889
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
2890
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
2891
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
2892
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
2893
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
2894
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
2895
2896
/* Apply unsigned->signed conversion. */
2897
dataptr[0] =
2898
PASS1_OUTPUT(tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
2899
dataptr[6] = PASS1_OUTPUT(tmp13 - tmp14 - tmp15);
2900
dataptr[4] = (DCTELEM)
2901
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
2902
CONST_BITS-PASS1_BITS);
2903
dataptr[2] = (DCTELEM)
2904
DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
2905
CONST_BITS-PASS1_BITS);
2906
2907
/* Odd part */
2908
2909
tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
2910
tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
2911
tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
2912
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
2913
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
2914
tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
2915
+ MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
2916
tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
2917
tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
2918
+ MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */
2919
tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
2920
- MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */
2921
tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
2922
- MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */
2923
2924
dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2925
dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2926
dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2927
dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2928
2929
dataptr += DCTSIZE; /* advance pointer to next row */
2930
}
2931
2932
/* Pass 2: process columns.
2933
* We apply the PASS2_BITS scaling, but leave the
2934
* results scaled up by an overall factor of 8.
2935
* We must also scale the output by (8/12)*(8/6) = 8/9, which we
2936
* partially fold into the constant multipliers and final shifting:
2937
* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
2938
*/
2939
2940
dataptr = data;
2941
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2942
/* Even part */
2943
2944
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
2945
tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
2946
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
2947
2948
tmp10 = tmp0 + tmp2;
2949
tmp12 = tmp0 - tmp2;
2950
2951
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
2952
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
2953
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
2954
2955
dataptr[DCTSIZE*0] = (DCTELEM)
2956
DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
2957
CONST_BITS+PASS2_BITS+1);
2958
dataptr[DCTSIZE*2] = (DCTELEM)
2959
DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
2960
CONST_BITS+PASS2_BITS+1);
2961
dataptr[DCTSIZE*4] = (DCTELEM)
2962
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
2963
CONST_BITS+PASS2_BITS+1);
2964
2965
/* Odd part */
2966
2967
tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
2968
2969
dataptr[DCTSIZE*1] = (DCTELEM)
2970
DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
2971
CONST_BITS+PASS2_BITS+1);
2972
dataptr[DCTSIZE*3] = (DCTELEM)
2973
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
2974
CONST_BITS+PASS2_BITS+1);
2975
dataptr[DCTSIZE*5] = (DCTELEM)
2976
DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
2977
CONST_BITS+PASS2_BITS+1);
2978
2979
dataptr++; /* advance pointer to next column */
2980
}
2981
}
2982
2983
2984
/*
2985
* Perform the forward DCT on a 10x5 sample block.
2986
*
2987
* 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
2988
*/
2989
2990
GLOBAL(void)
2991
jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2992
{
2993
INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
2994
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
2995
DCTELEM *dataptr;
2996
JSAMPROW elemptr;
2997
int ctr;
2998
SHIFT_TEMPS
2999
3000
/* Zero 3 bottom rows of output coefficient block. */
3001
MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
3002
3003
/* Pass 1: process rows.
3004
* Note results are scaled up by sqrt(8) compared to a true DCT;
3005
* furthermore, we scale the results by 2**PASS1_BITS.
3006
* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
3007
*/
3008
3009
dataptr = data;
3010
for (ctr = 0; ctr < 5; ctr++) {
3011
elemptr = sample_data[ctr] + start_col;
3012
3013
/* Even part */
3014
3015
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
3016
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
3017
tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
3018
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
3019
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
3020
3021
tmp10 = tmp0 + tmp4;
3022
tmp13 = tmp0 - tmp4;
3023
tmp11 = tmp1 + tmp3;
3024
tmp14 = tmp1 - tmp3;
3025
3026
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
3027
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
3028
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
3029
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
3030
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
3031
3032
/* Apply unsigned->signed conversion. */
3033
dataptr[0] =
3034
PASS1_OUTPUT(tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE);
3035
tmp12 += tmp12;
3036
dataptr[4] = (DCTELEM)
3037
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
3038
MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
3039
CONST_BITS-PASS1_BITS);
3040
tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
3041
dataptr[2] = (DCTELEM)
3042
DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
3043
CONST_BITS-PASS1_BITS);
3044
dataptr[6] = (DCTELEM)
3045
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
3046
CONST_BITS-PASS1_BITS);
3047
3048
/* Odd part */
3049
3050
tmp10 = tmp0 + tmp4;
3051
tmp11 = tmp1 - tmp3;
3052
dataptr[5] = PASS1_OUTPUT(tmp10 - tmp11 - tmp2);
3053
tmp2 <<= CONST_BITS;
3054
dataptr[1] = (DCTELEM)
3055
DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */
3056
MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */
3057
MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */
3058
MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */
3059
CONST_BITS-PASS1_BITS);
3060
tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */
3061
MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */
3062
tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */
3063
(tmp11 << (CONST_BITS - 1)) - tmp2;
3064
dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
3065
dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
3066
3067
dataptr += DCTSIZE; /* advance pointer to next row */
3068
}
3069
3070
/* Pass 2: process columns.
3071
* We apply the PASS2_BITS scaling, but leave the
3072
* results scaled up by an overall factor of 8.
3073
* We must also scale the output by (8/10)*(8/5) = 32/25,
3074
* which we fold into the constant multipliers:
3075
* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
3076
*/
3077
3078
dataptr = data;
3079
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3080
/* Even part */
3081
3082
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
3083
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
3084
tmp2 = dataptr[DCTSIZE*2];
3085
3086
tmp10 = tmp0 + tmp1;
3087
tmp11 = tmp0 - tmp1;
3088
3089
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
3090
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
3091
3092
dataptr[DCTSIZE*0] = (DCTELEM)
3093
DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */
3094
CONST_BITS+PASS2_BITS);
3095
tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */
3096
tmp10 -= tmp2 << 2;
3097
tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */
3098
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS2_BITS);
3099
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS2_BITS);
3100
3101
/* Odd part */
3102
3103
tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */
3104
3105
dataptr[DCTSIZE*1] = (DCTELEM)
3106
DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
3107
CONST_BITS+PASS2_BITS);
3108
dataptr[DCTSIZE*3] = (DCTELEM)
3109
DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
3110
CONST_BITS+PASS2_BITS);
3111
3112
dataptr++; /* advance pointer to next column */
3113
}
3114
}
3115
3116
3117
/*
3118
* Perform the forward DCT on an 8x4 sample block.
3119
*
3120
* 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
3121
*/
3122
3123
GLOBAL(void)
3124
jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3125
{
3126
INT32 tmp0, tmp1, tmp2, tmp3;
3127
INT32 tmp10, tmp11, tmp12, tmp13;
3128
INT32 z1;
3129
DCTELEM *dataptr;
3130
JSAMPROW elemptr;
3131
int ctr;
3132
SHIFT_TEMPS
3133
3134
/* Zero 4 bottom rows of output coefficient block. */
3135
MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
3136
3137
/* Pass 1: process rows.
3138
* Note results are scaled up by sqrt(8) compared to a true DCT;
3139
* furthermore, we scale the results by 2**PASS1_BITS.
3140
* We must also scale the output by 8/4 = 2, which we add here.
3141
* 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3142
*/
3143
3144
dataptr = data;
3145
for (ctr = 0; ctr < 4; ctr++) {
3146
elemptr = sample_data[ctr] + start_col;
3147
3148
/* Even part per LL&M figure 1 --- note that published figure is faulty;
3149
* rotator "c1" should be "c6".
3150
*/
3151
3152
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3153
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3154
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3155
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3156
3157
tmp10 = tmp0 + tmp3;
3158
tmp12 = tmp0 - tmp3;
3159
tmp11 = tmp1 + tmp2;
3160
tmp13 = tmp1 - tmp2;
3161
3162
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3163
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3164
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3165
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3166
3167
/* Apply unsigned->signed conversion. */
3168
dataptr[0] = (DCTELEM)
3169
((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
3170
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
3171
3172
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
3173
/* Add fudge factor here for final descale. */
3174
z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3175
3176
dataptr[2] = (DCTELEM)
3177
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
3178
CONST_BITS-PASS1_BITS-1);
3179
dataptr[6] = (DCTELEM)
3180
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
3181
CONST_BITS-PASS1_BITS-1);
3182
3183
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3184
* i0..i3 in the paper are tmp0..tmp3 here.
3185
*/
3186
3187
tmp12 = tmp0 + tmp2;
3188
tmp13 = tmp1 + tmp3;
3189
3190
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3191
/* Add fudge factor here for final descale. */
3192
z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3193
3194
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
3195
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3196
tmp12 += z1;
3197
tmp13 += z1;
3198
3199
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3200
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3201
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3202
tmp0 += z1 + tmp12;
3203
tmp3 += z1 + tmp13;
3204
3205
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3206
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3207
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3208
tmp1 += z1 + tmp13;
3209
tmp2 += z1 + tmp12;
3210
3211
dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
3212
dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
3213
dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
3214
dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
3215
3216
dataptr += DCTSIZE; /* advance pointer to next row */
3217
}
3218
3219
/* Pass 2: process columns.
3220
* We apply the PASS2_BITS scaling, but leave the
3221
* results scaled up by an overall factor of 8.
3222
* 4-point FDCT kernel,
3223
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
3224
*/
3225
3226
dataptr = data;
3227
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3228
/* Even part */
3229
3230
/* Add fudge factor here for final descale. */
3231
#if PASS2_BITS > 1
3232
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS2_BITS-1));
3233
#else
3234
#if PASS2_BITS > 0
3235
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + ONE;
3236
#else
3237
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
3238
#endif
3239
#endif
3240
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
3241
3242
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
3243
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
3244
3245
dataptr[DCTSIZE*0] = PASS2_OUTPUT(tmp0 + tmp1);
3246
dataptr[DCTSIZE*2] = PASS2_OUTPUT(tmp0 - tmp1);
3247
3248
/* Odd part */
3249
3250
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
3251
/* Add fudge factor here for final descale. */
3252
tmp0 += ONE << (CONST_BITS+PASS2_BITS-1);
3253
3254
dataptr[DCTSIZE*1] = (DCTELEM)
3255
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
3256
CONST_BITS+PASS2_BITS);
3257
dataptr[DCTSIZE*3] = (DCTELEM)
3258
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
3259
CONST_BITS+PASS2_BITS);
3260
3261
dataptr++; /* advance pointer to next column */
3262
}
3263
}
3264
3265
3266
/*
3267
* Perform the forward DCT on a 6x3 sample block.
3268
*
3269
* 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
3270
*/
3271
3272
GLOBAL(void)
3273
jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3274
{
3275
INT32 tmp0, tmp1, tmp2;
3276
INT32 tmp10, tmp11, tmp12;
3277
DCTELEM *dataptr;
3278
JSAMPROW elemptr;
3279
int ctr;
3280
SHIFT_TEMPS
3281
3282
/* Pre-zero output coefficient block. */
3283
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3284
3285
/* Pass 1: process rows.
3286
* Note results are scaled up by sqrt(8) compared to a true DCT;
3287
* furthermore, we scale the results by 2**PASS1_BITS.
3288
* We scale the results further by 2 as part of output adaption
3289
* scaling for different DCT size.
3290
* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3291
*/
3292
3293
dataptr = data;
3294
for (ctr = 0; ctr < 3; ctr++) {
3295
elemptr = sample_data[ctr] + start_col;
3296
3297
/* Even part */
3298
3299
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3300
tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3301
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3302
3303
tmp10 = tmp0 + tmp2;
3304
tmp12 = tmp0 - tmp2;
3305
3306
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3307
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3308
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3309
3310
/* Apply unsigned->signed conversion. */
3311
dataptr[0] = (DCTELEM)
3312
((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
3313
dataptr[2] = (DCTELEM)
3314
DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3315
CONST_BITS-PASS1_BITS-1);
3316
dataptr[4] = (DCTELEM)
3317
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3318
CONST_BITS-PASS1_BITS-1);
3319
3320
/* Odd part */
3321
3322
tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3323
CONST_BITS-PASS1_BITS-1);
3324
3325
dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
3326
dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
3327
dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
3328
3329
dataptr += DCTSIZE; /* advance pointer to next row */
3330
}
3331
3332
/* Pass 2: process columns.
3333
* We apply the PASS2_BITS scaling, but leave the
3334
* results scaled up by an overall factor of 8.
3335
* We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
3336
* fold into the constant multipliers (other part was done in pass 1):
3337
* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
3338
*/
3339
3340
dataptr = data;
3341
for (ctr = 0; ctr < 6; ctr++) {
3342
/* Even part */
3343
3344
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
3345
tmp1 = dataptr[DCTSIZE*1];
3346
3347
tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
3348
3349
dataptr[DCTSIZE*0] = (DCTELEM)
3350
DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
3351
CONST_BITS+PASS2_BITS);
3352
dataptr[DCTSIZE*2] = (DCTELEM)
3353
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
3354
CONST_BITS+PASS2_BITS);
3355
3356
/* Odd part */
3357
3358
dataptr[DCTSIZE*1] = (DCTELEM)
3359
DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
3360
CONST_BITS+PASS2_BITS);
3361
3362
dataptr++; /* advance pointer to next column */
3363
}
3364
}
3365
3366
3367
/*
3368
* Perform the forward DCT on a 4x2 sample block.
3369
*
3370
* 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
3371
*/
3372
3373
GLOBAL(void)
3374
jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3375
{
3376
DCTELEM tmp0, tmp2, tmp10, tmp12, tmp4, tmp5;
3377
INT32 tmp1, tmp3, tmp11, tmp13;
3378
INT32 z1, z2, z3;
3379
JSAMPROW elemptr;
3380
SHIFT_TEMPS
3381
#if PASS2_BITS > PASS1_BITS + 3
3382
ISHIFT_TEMPS
3383
#endif
3384
3385
/* Pre-zero output coefficient block. */
3386
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3387
3388
/* Pass 1: process rows.
3389
* Note results are scaled up by sqrt(8) compared to a true DCT.
3390
* 4-point FDCT kernel,
3391
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
3392
*/
3393
3394
/* Row 0 */
3395
elemptr = sample_data[0] + start_col;
3396
3397
/* Even part */
3398
3399
tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
3400
tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
3401
3402
#if PASS2_BITS > PASS1_BITS + 3
3403
/* Add fudge factor here for final downscale. */
3404
#if PASS2_BITS > PASS1_BITS + 4
3405
tmp4 += 1 << (PASS2_BITS-PASS1_BITS-3-1);
3406
#else
3407
tmp4 += 1;
3408
#endif
3409
#endif
3410
3411
tmp0 = tmp4 + tmp5;
3412
tmp2 = tmp4 - tmp5;
3413
3414
/* Odd part */
3415
3416
z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
3417
z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
3418
3419
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3420
/* Add fudge factor here for final descale. */
3421
z1 += ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-3-1);
3422
tmp1 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3423
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3424
3425
/* Row 1 */
3426
elemptr = sample_data[1] + start_col;
3427
3428
/* Even part */
3429
3430
tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
3431
tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
3432
3433
tmp10 = tmp4 + tmp5;
3434
tmp12 = tmp4 - tmp5;
3435
3436
/* Odd part */
3437
3438
z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
3439
z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
3440
3441
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3442
tmp11 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3443
tmp13 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3444
3445
/* Pass 2: process columns.
3446
* We leave the results scaled up by an overall factor of 8.
3447
* We must also scale the output by (8/4)*(8/2) = 2**3.
3448
*/
3449
3450
/* Column 0 */
3451
/* Apply unsigned->signed conversion. */
3452
3453
#if PASS2_BITS < PASS1_BITS + 3
3454
data[DCTSIZE*0] =
3455
(tmp0 + tmp10 - 8 * CENTERJSAMPLE) << (3+PASS1_BITS-PASS2_BITS);
3456
data[DCTSIZE*1] = (tmp0 - tmp10) << (3+PASS1_BITS-PASS2_BITS);
3457
3458
/* Column 2 */
3459
data[DCTSIZE*0+2] = (tmp2 + tmp12) << (3+PASS1_BITS-PASS2_BITS);
3460
data[DCTSIZE*1+2] = (tmp2 - tmp12) << (3+PASS1_BITS-PASS2_BITS);
3461
#else
3462
#if PASS2_BITS == PASS1_BITS + 3
3463
data[DCTSIZE*0] = tmp0 + tmp10 - 8 * CENTERJSAMPLE;
3464
data[DCTSIZE*1] = tmp0 - tmp10;
3465
3466
/* Column 2 */
3467
data[DCTSIZE*0+2] = tmp2 + tmp12;
3468
data[DCTSIZE*1+2] = tmp2 - tmp12;
3469
#else
3470
data[DCTSIZE*0] =
3471
IRIGHT_SHIFT(tmp0 + tmp10 - 8 * CENTERJSAMPLE,
3472
PASS2_BITS-PASS1_BITS-3);
3473
data[DCTSIZE*1] =
3474
IRIGHT_SHIFT(tmp0 - tmp10, PASS2_BITS-PASS1_BITS-3);
3475
3476
/* Column 2 */
3477
data[DCTSIZE*0+2] =
3478
IRIGHT_SHIFT(tmp2 + tmp12, PASS2_BITS-PASS1_BITS-3);
3479
data[DCTSIZE*1+2] =
3480
IRIGHT_SHIFT(tmp2 - tmp12, PASS2_BITS-PASS1_BITS-3);
3481
#endif
3482
#endif
3483
3484
/* Column 1 */
3485
data[DCTSIZE*0+1] = (DCTELEM)
3486
RIGHT_SHIFT(tmp1 + tmp11, CONST_BITS+PASS2_BITS-PASS1_BITS-3);
3487
data[DCTSIZE*1+1] = (DCTELEM)
3488
RIGHT_SHIFT(tmp1 - tmp11, CONST_BITS+PASS2_BITS-PASS1_BITS-3);
3489
3490
/* Column 3 */
3491
data[DCTSIZE*0+3] = (DCTELEM)
3492
RIGHT_SHIFT(tmp3 + tmp13, CONST_BITS+PASS2_BITS-PASS1_BITS-3);
3493
data[DCTSIZE*1+3] = (DCTELEM)
3494
RIGHT_SHIFT(tmp3 - tmp13, CONST_BITS+PASS2_BITS-PASS1_BITS-3);
3495
}
3496
3497
3498
/*
3499
* Perform the forward DCT on a 2x1 sample block.
3500
*
3501
* 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
3502
*/
3503
3504
GLOBAL(void)
3505
jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3506
{
3507
DCTELEM tmp0, tmp1;
3508
JSAMPROW elemptr;
3509
3510
/* Pre-zero output coefficient block. */
3511
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3512
3513
elemptr = sample_data[0] + start_col;
3514
3515
tmp0 = GETJSAMPLE(elemptr[0]);
3516
tmp1 = GETJSAMPLE(elemptr[1]);
3517
3518
/* We leave the results scaled up by an overall factor of 8.
3519
* We must also scale the output by (8/2)*(8/1) = 2**5.
3520
*/
3521
3522
/* Even part */
3523
3524
/* Apply unsigned->signed conversion. */
3525
data[0] =
3526
(tmp0 + tmp1 - 2 * CENTERJSAMPLE) << (5+PASS1_BITS-PASS2_BITS);
3527
3528
/* Odd part */
3529
3530
data[1] = (tmp0 - tmp1) << (5+PASS1_BITS-PASS2_BITS);
3531
}
3532
3533
3534
/*
3535
* Perform the forward DCT on an 8x16 sample block.
3536
*
3537
* 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
3538
*/
3539
3540
GLOBAL(void)
3541
jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3542
{
3543
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
3544
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
3545
INT32 z1;
3546
DCTELEM workspace[DCTSIZE2];
3547
DCTELEM *dataptr;
3548
DCTELEM *wsptr;
3549
JSAMPROW elemptr;
3550
int ctr;
3551
SHIFT_TEMPS
3552
3553
/* Pass 1: process rows.
3554
* Note results are scaled up by sqrt(8) compared to a true DCT;
3555
* furthermore, we scale the results by 2**PASS1_BITS.
3556
* 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3557
*/
3558
3559
dataptr = data;
3560
ctr = 0;
3561
for (;;) {
3562
elemptr = sample_data[ctr] + start_col;
3563
3564
/* Even part per LL&M figure 1 --- note that published figure is faulty;
3565
* rotator "c1" should be "c6".
3566
*/
3567
3568
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3569
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3570
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3571
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3572
3573
tmp10 = tmp0 + tmp3;
3574
tmp12 = tmp0 - tmp3;
3575
tmp11 = tmp1 + tmp2;
3576
tmp13 = tmp1 - tmp2;
3577
3578
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3579
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3580
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3581
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3582
3583
/* Apply unsigned->signed conversion. */
3584
dataptr[0] = PASS1_OUTPUT(tmp10 + tmp11 - 8 * CENTERJSAMPLE);
3585
dataptr[4] = PASS1_OUTPUT(tmp10 - tmp11);
3586
3587
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
3588
/* Add fudge factor here for final descale. */
3589
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3590
3591
dataptr[2] = (DCTELEM)
3592
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
3593
CONST_BITS-PASS1_BITS);
3594
dataptr[6] = (DCTELEM)
3595
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
3596
CONST_BITS-PASS1_BITS);
3597
3598
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3599
* i0..i3 in the paper are tmp0..tmp3 here.
3600
*/
3601
3602
tmp12 = tmp0 + tmp2;
3603
tmp13 = tmp1 + tmp3;
3604
3605
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3606
/* Add fudge factor here for final descale. */
3607
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
3608
3609
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
3610
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3611
tmp12 += z1;
3612
tmp13 += z1;
3613
3614
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3615
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3616
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3617
tmp0 += z1 + tmp12;
3618
tmp3 += z1 + tmp13;
3619
3620
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3621
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3622
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3623
tmp1 += z1 + tmp13;
3624
tmp2 += z1 + tmp12;
3625
3626
dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
3627
dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
3628
dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
3629
dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
3630
3631
ctr++;
3632
3633
if (ctr != DCTSIZE) {
3634
if (ctr == DCTSIZE * 2)
3635
break; /* Done. */
3636
dataptr += DCTSIZE; /* advance pointer to next row */
3637
} else
3638
dataptr = workspace; /* switch pointer to extended workspace */
3639
}
3640
3641
/* Pass 2: process columns.
3642
* We apply the PASS2_BITS scaling, but leave the
3643
* results scaled up by an overall factor of 8.
3644
* We must also scale the output by 8/16 = 1/2.
3645
* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3646
*/
3647
3648
dataptr = data;
3649
wsptr = workspace;
3650
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3651
/* Even part */
3652
3653
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
3654
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
3655
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
3656
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
3657
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
3658
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
3659
tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
3660
tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
3661
3662
tmp10 = tmp0 + tmp7;
3663
tmp14 = tmp0 - tmp7;
3664
tmp11 = tmp1 + tmp6;
3665
tmp15 = tmp1 - tmp6;
3666
tmp12 = tmp2 + tmp5;
3667
tmp16 = tmp2 - tmp5;
3668
tmp13 = tmp3 + tmp4;
3669
tmp17 = tmp3 - tmp4;
3670
3671
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
3672
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
3673
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
3674
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
3675
tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
3676
tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
3677
tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
3678
tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
3679
3680
dataptr[DCTSIZE*0] = (DCTELEM)
3681
#if PASS2_BITS > 0
3682
RIGHT_SHIFT(tmp10 + tmp11 + tmp12 + tmp13 + (ONE << PASS2_BITS),
3683
PASS2_BITS+1);
3684
#else
3685
RIGHT_SHIFT(tmp10 + tmp11 + tmp12 + tmp13 + ONE, 1);
3686
#endif
3687
dataptr[DCTSIZE*4] = (DCTELEM)
3688
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
3689
MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
3690
CONST_BITS+PASS2_BITS+1);
3691
3692
tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
3693
MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
3694
3695
dataptr[DCTSIZE*2] = (DCTELEM)
3696
DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
3697
+ MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
3698
CONST_BITS+PASS2_BITS+1);
3699
dataptr[DCTSIZE*6] = (DCTELEM)
3700
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
3701
- MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
3702
CONST_BITS+PASS2_BITS+1);
3703
3704
/* Odd part */
3705
3706
tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
3707
MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
3708
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
3709
MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
3710
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
3711
MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
3712
tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
3713
MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
3714
tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
3715
MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
3716
tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
3717
MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
3718
tmp10 = tmp11 + tmp12 + tmp13 -
3719
MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
3720
MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
3721
tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
3722
- MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
3723
tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
3724
+ MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
3725
tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
3726
+ MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
3727
3728
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS2_BITS+1);
3729
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS2_BITS+1);
3730
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS2_BITS+1);
3731
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS2_BITS+1);
3732
3733
dataptr++; /* advance pointer to next column */
3734
wsptr++; /* advance pointer to next column */
3735
}
3736
}
3737
3738
3739
/*
3740
* Perform the forward DCT on a 7x14 sample block.
3741
*
3742
* 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
3743
*/
3744
3745
GLOBAL(void)
3746
jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3747
{
3748
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
3749
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3750
INT32 z1, z2, z3;
3751
DCTELEM workspace[8*6];
3752
DCTELEM *dataptr;
3753
DCTELEM *wsptr;
3754
JSAMPROW elemptr;
3755
int ctr;
3756
SHIFT_TEMPS
3757
3758
/* Pre-zero output coefficient block. */
3759
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3760
3761
/* Pass 1: process rows.
3762
* Note results are scaled up by sqrt(8) compared to a true DCT;
3763
* furthermore, we scale the results by 2**PASS1_BITS.
3764
* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3765
*/
3766
3767
dataptr = data;
3768
ctr = 0;
3769
for (;;) {
3770
elemptr = sample_data[ctr] + start_col;
3771
3772
/* Even part */
3773
3774
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
3775
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
3776
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
3777
tmp3 = GETJSAMPLE(elemptr[3]);
3778
3779
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
3780
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
3781
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
3782
3783
z1 = tmp0 + tmp2;
3784
/* Apply unsigned->signed conversion. */
3785
dataptr[0] = PASS1_OUTPUT(z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE);
3786
tmp3 += tmp3;
3787
z1 -= tmp3;
3788
z1 -= tmp3;
3789
z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
3790
z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
3791
z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
3792
dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
3793
z1 -= z2;
3794
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
3795
dataptr[4] = (DCTELEM)
3796
DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
3797
CONST_BITS-PASS1_BITS);
3798
dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
3799
3800
/* Odd part */
3801
3802
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3803
tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3804
tmp0 = tmp1 - tmp2;
3805
tmp1 += tmp2;
3806
tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
3807
tmp1 += tmp2;
3808
tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */
3809
tmp0 += tmp3;
3810
tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */
3811
3812
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
3813
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
3814
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
3815
3816
ctr++;
3817
3818
if (ctr != DCTSIZE) {
3819
if (ctr == 14)
3820
break; /* Done. */
3821
dataptr += DCTSIZE; /* advance pointer to next row */
3822
} else
3823
dataptr = workspace; /* switch pointer to extended workspace */
3824
}
3825
3826
/* Pass 2: process columns.
3827
* We apply the PASS2_BITS scaling, but leave the
3828
* results scaled up by an overall factor of 8.
3829
* We must also scale the output by (8/7)*(8/14) = 32/49,
3830
* which we fold into the constant multipliers:
3831
* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
3832
*/
3833
3834
dataptr = data;
3835
wsptr = workspace;
3836
for (ctr = 0; ctr < 7; ctr++) {
3837
/* Even part */
3838
3839
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
3840
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
3841
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
3842
tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
3843
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
3844
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
3845
tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
3846
3847
tmp10 = tmp0 + tmp6;
3848
tmp14 = tmp0 - tmp6;
3849
tmp11 = tmp1 + tmp5;
3850
tmp15 = tmp1 - tmp5;
3851
tmp12 = tmp2 + tmp4;
3852
tmp16 = tmp2 - tmp4;
3853
3854
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
3855
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
3856
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
3857
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
3858
tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
3859
tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
3860
tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
3861
3862
dataptr[DCTSIZE*0] = (DCTELEM)
3863
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
3864
FIX(0.653061224)), /* 32/49 */
3865
CONST_BITS+PASS2_BITS);
3866
tmp13 += tmp13;
3867
dataptr[DCTSIZE*4] = (DCTELEM)
3868
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
3869
MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
3870
MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */
3871
CONST_BITS+PASS2_BITS);
3872
3873
tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */
3874
3875
dataptr[DCTSIZE*2] = (DCTELEM)
3876
DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */
3877
+ MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */
3878
CONST_BITS+PASS2_BITS);
3879
dataptr[DCTSIZE*6] = (DCTELEM)
3880
DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */
3881
- MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */
3882
CONST_BITS+PASS2_BITS);
3883
3884
/* Odd part */
3885
3886
tmp10 = tmp1 + tmp2;
3887
tmp11 = tmp5 - tmp4;
3888
dataptr[DCTSIZE*7] = (DCTELEM)
3889
DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
3890
FIX(0.653061224)), /* 32/49 */
3891
CONST_BITS+PASS2_BITS);
3892
tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */
3893
tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */
3894
tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */
3895
tmp10 += tmp11 - tmp3;
3896
tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */
3897
MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */
3898
dataptr[DCTSIZE*5] = (DCTELEM)
3899
DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
3900
+ MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */
3901
CONST_BITS+PASS2_BITS);
3902
tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */
3903
MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */
3904
dataptr[DCTSIZE*3] = (DCTELEM)
3905
DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
3906
- MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */
3907
CONST_BITS+PASS2_BITS);
3908
dataptr[DCTSIZE*1] = (DCTELEM)
3909
DESCALE(tmp11 + tmp12 + tmp3
3910
- MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */
3911
- MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */
3912
CONST_BITS+PASS2_BITS);
3913
3914
dataptr++; /* advance pointer to next column */
3915
wsptr++; /* advance pointer to next column */
3916
}
3917
}
3918
3919
3920
/*
3921
* Perform the forward DCT on a 6x12 sample block.
3922
*
3923
* 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
3924
*/
3925
3926
GLOBAL(void)
3927
jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3928
{
3929
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
3930
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3931
DCTELEM workspace[8*4];
3932
DCTELEM *dataptr;
3933
DCTELEM *wsptr;
3934
JSAMPROW elemptr;
3935
int ctr;
3936
SHIFT_TEMPS
3937
3938
/* Pre-zero output coefficient block. */
3939
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3940
3941
/* Pass 1: process rows.
3942
* Note results are scaled up by sqrt(8) compared to a true DCT;
3943
* furthermore, we scale the results by 2**PASS1_BITS.
3944
* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3945
*/
3946
3947
dataptr = data;
3948
ctr = 0;
3949
for (;;) {
3950
elemptr = sample_data[ctr] + start_col;
3951
3952
/* Even part */
3953
3954
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3955
tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3956
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3957
3958
tmp10 = tmp0 + tmp2;
3959
tmp12 = tmp0 - tmp2;
3960
3961
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3962
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3963
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3964
3965
/* Apply unsigned->signed conversion. */
3966
dataptr[0] = PASS1_OUTPUT(tmp10 + tmp11 - 6 * CENTERJSAMPLE);
3967
dataptr[2] = (DCTELEM)
3968
DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3969
CONST_BITS-PASS1_BITS);
3970
dataptr[4] = (DCTELEM)
3971
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3972
CONST_BITS-PASS1_BITS);
3973
3974
/* Odd part */
3975
3976
tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3977
CONST_BITS-PASS1_BITS);
3978
3979
#if PASS1_BITS > 0
3980
dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
3981
dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
3982
dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
3983
#else
3984
dataptr[1] = (DCTELEM) (tmp10 + tmp0 + tmp1);
3985
dataptr[3] = (DCTELEM) (tmp0 - tmp1 - tmp2);
3986
dataptr[5] = (DCTELEM) (tmp10 + tmp2 - tmp1);
3987
#endif
3988
3989
ctr++;
3990
3991
if (ctr != DCTSIZE) {
3992
if (ctr == 12)
3993
break; /* Done. */
3994
dataptr += DCTSIZE; /* advance pointer to next row */
3995
} else
3996
dataptr = workspace; /* switch pointer to extended workspace */
3997
}
3998
3999
/* Pass 2: process columns.
4000
* We apply the PASS2_BITS scaling, but leave the
4001
* results scaled up by an overall factor of 8.
4002
* We must also scale the output by (8/6)*(8/12) = 8/9,
4003
* which we fold into the constant multipliers:
4004
* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
4005
*/
4006
4007
dataptr = data;
4008
wsptr = workspace;
4009
for (ctr = 0; ctr < 6; ctr++) {
4010
/* Even part */
4011
4012
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
4013
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
4014
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
4015
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
4016
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
4017
tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
4018
4019
tmp10 = tmp0 + tmp5;
4020
tmp13 = tmp0 - tmp5;
4021
tmp11 = tmp1 + tmp4;
4022
tmp14 = tmp1 - tmp4;
4023
tmp12 = tmp2 + tmp3;
4024
tmp15 = tmp2 - tmp3;
4025
4026
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
4027
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
4028
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
4029
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
4030
tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
4031
tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
4032
4033
dataptr[DCTSIZE*0] = (DCTELEM)
4034
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
4035
CONST_BITS+PASS2_BITS);
4036
dataptr[DCTSIZE*6] = (DCTELEM)
4037
DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
4038
CONST_BITS+PASS2_BITS);
4039
dataptr[DCTSIZE*4] = (DCTELEM)
4040
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */
4041
CONST_BITS+PASS2_BITS);
4042
dataptr[DCTSIZE*2] = (DCTELEM)
4043
DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */
4044
MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */
4045
CONST_BITS+PASS2_BITS);
4046
4047
/* Odd part */
4048
4049
tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */
4050
tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */
4051
tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */
4052
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */
4053
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */
4054
tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
4055
+ MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */
4056
tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
4057
tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
4058
+ MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */
4059
tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
4060
- MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */
4061
tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
4062
- MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
4063
4064
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS2_BITS);
4065
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS2_BITS);
4066
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS2_BITS);
4067
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS2_BITS);
4068
4069
dataptr++; /* advance pointer to next column */
4070
wsptr++; /* advance pointer to next column */
4071
}
4072
}
4073
4074
4075
/*
4076
* Perform the forward DCT on a 5x10 sample block.
4077
*
4078
* 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
4079
*/
4080
4081
GLOBAL(void)
4082
jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4083
{
4084
INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
4085
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
4086
DCTELEM workspace[8*2];
4087
DCTELEM *dataptr;
4088
DCTELEM *wsptr;
4089
JSAMPROW elemptr;
4090
int ctr;
4091
SHIFT_TEMPS
4092
4093
/* Pre-zero output coefficient block. */
4094
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4095
4096
/* Pass 1: process rows.
4097
* Note results are scaled up by sqrt(8) compared to a true DCT;
4098
* furthermore, we scale the results by 2**PASS1_BITS.
4099
* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
4100
*/
4101
4102
dataptr = data;
4103
ctr = 0;
4104
for (;;) {
4105
elemptr = sample_data[ctr] + start_col;
4106
4107
/* Even part */
4108
4109
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
4110
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
4111
tmp2 = GETJSAMPLE(elemptr[2]);
4112
4113
tmp10 = tmp0 + tmp1;
4114
tmp11 = tmp0 - tmp1;
4115
4116
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
4117
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
4118
4119
/* Apply unsigned->signed conversion. */
4120
dataptr[0] = PASS1_OUTPUT(tmp10 + tmp2 - 5 * CENTERJSAMPLE);
4121
tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
4122
tmp10 -= tmp2 << 2;
4123
tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
4124
dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
4125
dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
4126
4127
/* Odd part */
4128
4129
tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
4130
4131
dataptr[1] = (DCTELEM)
4132
DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
4133
CONST_BITS-PASS1_BITS);
4134
dataptr[3] = (DCTELEM)
4135
DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
4136
CONST_BITS-PASS1_BITS);
4137
4138
ctr++;
4139
4140
if (ctr != DCTSIZE) {
4141
if (ctr == 10)
4142
break; /* Done. */
4143
dataptr += DCTSIZE; /* advance pointer to next row */
4144
} else
4145
dataptr = workspace; /* switch pointer to extended workspace */
4146
}
4147
4148
/* Pass 2: process columns.
4149
* We apply the PASS2_BITS scaling, but leave the
4150
* results scaled up by an overall factor of 8.
4151
* We must also scale the output by (8/5)*(8/10) = 32/25,
4152
* which we fold into the constant multipliers:
4153
* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
4154
*/
4155
4156
dataptr = data;
4157
wsptr = workspace;
4158
for (ctr = 0; ctr < 5; ctr++) {
4159
/* Even part */
4160
4161
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
4162
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
4163
tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
4164
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
4165
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
4166
4167
tmp10 = tmp0 + tmp4;
4168
tmp13 = tmp0 - tmp4;
4169
tmp11 = tmp1 + tmp3;
4170
tmp14 = tmp1 - tmp3;
4171
4172
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
4173
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
4174
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
4175
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
4176
tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
4177
4178
dataptr[DCTSIZE*0] = (DCTELEM)
4179
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
4180
CONST_BITS+PASS2_BITS);
4181
tmp12 += tmp12;
4182
dataptr[DCTSIZE*4] = (DCTELEM)
4183
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
4184
MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */
4185
CONST_BITS+PASS2_BITS);
4186
tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */
4187
dataptr[DCTSIZE*2] = (DCTELEM)
4188
DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */
4189
CONST_BITS+PASS2_BITS);
4190
dataptr[DCTSIZE*6] = (DCTELEM)
4191
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */
4192
CONST_BITS+PASS2_BITS);
4193
4194
/* Odd part */
4195
4196
tmp10 = tmp0 + tmp4;
4197
tmp11 = tmp1 - tmp3;
4198
dataptr[DCTSIZE*5] = (DCTELEM)
4199
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */
4200
CONST_BITS+PASS2_BITS);
4201
tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */
4202
dataptr[DCTSIZE*1] = (DCTELEM)
4203
DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */
4204
MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */
4205
MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */
4206
MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */
4207
CONST_BITS+PASS2_BITS);
4208
tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */
4209
MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */
4210
tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */
4211
MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */
4212
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS2_BITS);
4213
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS2_BITS);
4214
4215
dataptr++; /* advance pointer to next column */
4216
wsptr++; /* advance pointer to next column */
4217
}
4218
}
4219
4220
4221
/*
4222
* Perform the forward DCT on a 4x8 sample block.
4223
*
4224
* 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
4225
*/
4226
4227
GLOBAL(void)
4228
jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4229
{
4230
INT32 tmp0, tmp1, tmp2, tmp3;
4231
INT32 tmp10, tmp11, tmp12, tmp13;
4232
INT32 z1;
4233
DCTELEM *dataptr;
4234
JSAMPROW elemptr;
4235
int ctr;
4236
SHIFT_TEMPS
4237
4238
/* Pre-zero output coefficient block. */
4239
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4240
4241
/* Pass 1: process rows.
4242
* Note results are scaled up by sqrt(8) compared to a true DCT;
4243
* furthermore, we scale the results by 2**PASS1_BITS.
4244
* We must also scale the output by 8/4 = 2, which we add here.
4245
* 4-point FDCT kernel,
4246
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4247
*/
4248
4249
dataptr = data;
4250
for (ctr = 0; ctr < DCTSIZE; ctr++) {
4251
elemptr = sample_data[ctr] + start_col;
4252
4253
/* Even part */
4254
4255
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
4256
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
4257
4258
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
4259
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
4260
4261
/* Apply unsigned->signed conversion. */
4262
dataptr[0] = (DCTELEM)
4263
((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
4264
dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
4265
4266
/* Odd part */
4267
4268
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
4269
/* Add fudge factor here for final descale. */
4270
tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
4271
4272
dataptr[1] = (DCTELEM)
4273
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4274
CONST_BITS-PASS1_BITS-1);
4275
dataptr[3] = (DCTELEM)
4276
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4277
CONST_BITS-PASS1_BITS-1);
4278
4279
dataptr += DCTSIZE; /* advance pointer to next row */
4280
}
4281
4282
/* Pass 2: process columns.
4283
* We apply the PASS2_BITS scaling, but leave the
4284
* results scaled up by an overall factor of 8.
4285
* 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4286
*/
4287
4288
dataptr = data;
4289
for (ctr = 0; ctr < 4; ctr++) {
4290
/* Even part per LL&M figure 1 --- note that published figure is faulty;
4291
* rotator "c1" should be "c6".
4292
*/
4293
4294
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
4295
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
4296
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
4297
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
4298
4299
/* Add fudge factor here for final descale. */
4300
#if PASS2_BITS > 1
4301
tmp10 = tmp0 + tmp3 + (ONE << (PASS2_BITS-1));
4302
#else
4303
#if PASS2_BITS > 0
4304
tmp10 = tmp0 + tmp3 + ONE;
4305
#else
4306
tmp10 = tmp0 + tmp3;
4307
#endif
4308
#endif
4309
tmp12 = tmp0 - tmp3;
4310
tmp11 = tmp1 + tmp2;
4311
tmp13 = tmp1 - tmp2;
4312
4313
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
4314
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
4315
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
4316
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
4317
4318
dataptr[DCTSIZE*0] = PASS2_OUTPUT(tmp10 + tmp11);
4319
dataptr[DCTSIZE*4] = PASS2_OUTPUT(tmp10 - tmp11);
4320
4321
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
4322
/* Add fudge factor here for final descale. */
4323
z1 += ONE << (CONST_BITS+PASS2_BITS-1);
4324
4325
dataptr[DCTSIZE*2] = (DCTELEM)
4326
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
4327
CONST_BITS+PASS2_BITS);
4328
dataptr[DCTSIZE*6] = (DCTELEM)
4329
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
4330
CONST_BITS+PASS2_BITS);
4331
4332
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
4333
* i0..i3 in the paper are tmp0..tmp3 here.
4334
*/
4335
4336
tmp12 = tmp0 + tmp2;
4337
tmp13 = tmp1 + tmp3;
4338
4339
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
4340
/* Add fudge factor here for final descale. */
4341
z1 += ONE << (CONST_BITS+PASS2_BITS-1);
4342
4343
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
4344
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
4345
tmp12 += z1;
4346
tmp13 += z1;
4347
4348
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4349
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
4350
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
4351
tmp0 += z1 + tmp12;
4352
tmp3 += z1 + tmp13;
4353
4354
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4355
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
4356
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
4357
tmp1 += z1 + tmp13;
4358
tmp2 += z1 + tmp12;
4359
4360
dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS2_BITS);
4361
dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS2_BITS);
4362
dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS2_BITS);
4363
dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS2_BITS);
4364
4365
dataptr++; /* advance pointer to next column */
4366
}
4367
}
4368
4369
4370
/*
4371
* Perform the forward DCT on a 3x6 sample block.
4372
*
4373
* 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
4374
*/
4375
4376
GLOBAL(void)
4377
jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4378
{
4379
INT32 tmp0, tmp1, tmp2;
4380
INT32 tmp10, tmp11, tmp12;
4381
DCTELEM *dataptr;
4382
JSAMPROW elemptr;
4383
int ctr;
4384
SHIFT_TEMPS
4385
4386
/* Pre-zero output coefficient block. */
4387
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4388
4389
/* Pass 1: process rows.
4390
* Note results are scaled up by sqrt(8) compared to a true DCT;
4391
* furthermore, we scale the results by 2**PASS1_BITS.
4392
* We scale the results further by 2 as part of output adaption
4393
* scaling for different DCT size.
4394
* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
4395
*/
4396
4397
dataptr = data;
4398
for (ctr = 0; ctr < 6; ctr++) {
4399
elemptr = sample_data[ctr] + start_col;
4400
4401
/* Even part */
4402
4403
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
4404
tmp1 = GETJSAMPLE(elemptr[1]);
4405
4406
tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
4407
4408
/* Apply unsigned->signed conversion. */
4409
dataptr[0] = (DCTELEM)
4410
((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
4411
dataptr[2] = (DCTELEM)
4412
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
4413
CONST_BITS-PASS1_BITS-1);
4414
4415
/* Odd part */
4416
4417
dataptr[1] = (DCTELEM)
4418
DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
4419
CONST_BITS-PASS1_BITS-1);
4420
4421
dataptr += DCTSIZE; /* advance pointer to next row */
4422
}
4423
4424
/* Pass 2: process columns.
4425
* We apply the PASS2_BITS scaling, but leave the
4426
* results scaled up by an overall factor of 8.
4427
* We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
4428
* fold into the constant multipliers (other part was done in pass 1):
4429
* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
4430
*/
4431
4432
dataptr = data;
4433
for (ctr = 0; ctr < 3; ctr++) {
4434
/* Even part */
4435
4436
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
4437
tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
4438
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
4439
4440
tmp10 = tmp0 + tmp2;
4441
tmp12 = tmp0 - tmp2;
4442
4443
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
4444
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
4445
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
4446
4447
dataptr[DCTSIZE*0] = (DCTELEM)
4448
DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
4449
CONST_BITS+PASS2_BITS);
4450
dataptr[DCTSIZE*2] = (DCTELEM)
4451
DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
4452
CONST_BITS+PASS2_BITS);
4453
dataptr[DCTSIZE*4] = (DCTELEM)
4454
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
4455
CONST_BITS+PASS2_BITS);
4456
4457
/* Odd part */
4458
4459
tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
4460
4461
dataptr[DCTSIZE*1] = (DCTELEM)
4462
DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
4463
CONST_BITS+PASS2_BITS);
4464
dataptr[DCTSIZE*3] = (DCTELEM)
4465
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
4466
CONST_BITS+PASS2_BITS);
4467
dataptr[DCTSIZE*5] = (DCTELEM)
4468
DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
4469
CONST_BITS+PASS2_BITS);
4470
4471
dataptr++; /* advance pointer to next column */
4472
}
4473
}
4474
4475
4476
/*
4477
* Perform the forward DCT on a 2x4 sample block.
4478
*
4479
* 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
4480
*/
4481
4482
GLOBAL(void)
4483
jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4484
{
4485
INT32 tmp0, tmp1;
4486
INT32 tmp10, tmp11;
4487
DCTELEM *dataptr;
4488
JSAMPROW elemptr;
4489
int ctr;
4490
SHIFT_TEMPS
4491
4492
/* Pre-zero output coefficient block. */
4493
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4494
4495
/* Pass 1: process rows.
4496
* Note results are scaled up by sqrt(8) compared to a true DCT.
4497
*/
4498
4499
dataptr = data;
4500
for (ctr = 0; ctr < 4; ctr++) {
4501
elemptr = sample_data[ctr] + start_col;
4502
4503
/* Even part */
4504
4505
tmp0 = GETJSAMPLE(elemptr[0]);
4506
tmp1 = GETJSAMPLE(elemptr[1]);
4507
4508
/* Apply unsigned->signed conversion. */
4509
dataptr[0] = (DCTELEM) (tmp0 + tmp1 - 2 * CENTERJSAMPLE);
4510
4511
/* Odd part */
4512
4513
dataptr[1] = (DCTELEM) (tmp0 - tmp1);
4514
4515
dataptr += DCTSIZE; /* advance pointer to next row */
4516
}
4517
4518
/* Pass 2: process columns.
4519
* We leave the results scaled up by an overall factor of 8.
4520
* We must also scale the output by (8/2)*(8/4) = 2**3.
4521
* 4-point FDCT kernel,
4522
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4523
*/
4524
4525
dataptr = data;
4526
for (ctr = 0; ctr < 2; ctr++) {
4527
/* Even part */
4528
4529
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
4530
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
4531
4532
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
4533
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
4534
4535
#if PASS2_BITS < PASS1_BITS + 3
4536
dataptr[DCTSIZE*0] = (DCTELEM)
4537
((tmp0 + tmp1) << (3+PASS1_BITS-PASS2_BITS));
4538
dataptr[DCTSIZE*2] = (DCTELEM)
4539
((tmp0 - tmp1) << (3+PASS1_BITS-PASS2_BITS));
4540
#else
4541
#if PASS2_BITS == PASS1_BITS + 3
4542
dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1);
4543
dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1);
4544
#else
4545
/* Add fudge factor for descale. */
4546
tmp0 += ONE << (PASS2_BITS-PASS1_BITS-3-1);
4547
4548
dataptr[DCTSIZE*0] = (DCTELEM)
4549
RIGHT_SHIFT(tmp0 + tmp1, PASS2_BITS-PASS1_BITS-3);
4550
dataptr[DCTSIZE*2] = (DCTELEM)
4551
RIGHT_SHIFT(tmp0 - tmp1, PASS2_BITS-PASS1_BITS-3);
4552
#endif
4553
#endif
4554
4555
/* Odd part */
4556
4557
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
4558
/* Add fudge factor for descale. */
4559
tmp0 += ONE << (CONST_BITS+PASS2_BITS-PASS1_BITS-3-1);
4560
4561
dataptr[DCTSIZE*1] = (DCTELEM)
4562
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4563
CONST_BITS+PASS2_BITS-PASS1_BITS-3);
4564
dataptr[DCTSIZE*3] = (DCTELEM)
4565
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4566
CONST_BITS+PASS2_BITS-PASS1_BITS-3);
4567
4568
dataptr++; /* advance pointer to next column */
4569
}
4570
}
4571
4572
4573
/*
4574
* Perform the forward DCT on a 1x2 sample block.
4575
*
4576
* 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
4577
*/
4578
4579
GLOBAL(void)
4580
jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4581
{
4582
DCTELEM tmp0, tmp1;
4583
4584
/* Pre-zero output coefficient block. */
4585
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4586
4587
/* Pass 1: empty. */
4588
4589
/* Pass 2: process columns.
4590
* We leave the results scaled up by an overall factor of 8.
4591
* We must also scale the output by (8/1)*(8/2) = 2**5.
4592
*/
4593
4594
/* Even part */
4595
4596
tmp0 = GETJSAMPLE(sample_data[0][start_col]);
4597
tmp1 = GETJSAMPLE(sample_data[1][start_col]);
4598
4599
/* Apply unsigned->signed conversion. */
4600
data[DCTSIZE*0] =
4601
(tmp0 + tmp1 - 2 * CENTERJSAMPLE) << (5+PASS1_BITS-PASS2_BITS);
4602
4603
/* Odd part */
4604
4605
data[DCTSIZE*1] = (tmp0 - tmp1) << (5+PASS1_BITS-PASS2_BITS);
4606
}
4607
4608
#endif /* DCT_SCALING_SUPPORTED */
4609
#endif /* DCT_ISLOW_SUPPORTED */
4610
4611