Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/jpeg/jfdctint.c
4389 views
1
/*
2
* jfdctint.c
3
*
4
* Copyright (C) 1991-1996, Thomas G. Lane.
5
* Modification developed 2003-2018 by Guido Vollbeding.
6
* This file is part of the Independent JPEG Group's software.
7
* For conditions of distribution and use, see the accompanying README file.
8
*
9
* This file contains a slow-but-accurate integer implementation of the
10
* forward DCT (Discrete Cosine Transform).
11
*
12
* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
13
* on each column. Direct algorithms are also available, but they are
14
* much more complex and seem not to be any faster when reduced to code.
15
*
16
* This implementation is based on an algorithm described in
17
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
18
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
19
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
20
* The primary algorithm described there uses 11 multiplies and 29 adds.
21
* We use their alternate method with 12 multiplies and 32 adds.
22
* The advantage of this method is that no data path contains more than one
23
* multiplication; this allows a very simple and accurate implementation in
24
* scaled fixed-point arithmetic, with a minimal number of shifts.
25
*
26
* We also provide FDCT routines with various input sample block sizes for
27
* direct resolution reduction or enlargement and for direct resolving the
28
* common 2x1 and 1x2 subsampling cases without additional resampling: NxN
29
* (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block.
30
*
31
* For N<8 we fill the remaining block coefficients with zero.
32
* For N>8 we apply a partial N-point FDCT on the input samples, computing
33
* just the lower 8 frequency coefficients and discarding the rest.
34
*
35
* We must scale the output coefficients of the N-point FDCT appropriately
36
* to the standard 8-point FDCT level by 8/N per 1-D pass. This scaling
37
* is folded into the constant multipliers (pass 2) and/or final/initial
38
* shifting.
39
*
40
* CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
41
* since there would be too many additional constants to pre-calculate.
42
*/
43
44
#define JPEG_INTERNALS
45
#include "jinclude.h"
46
#include "jpeglib.h"
47
#include "jdct.h" /* Private declarations for DCT subsystem */
48
49
#ifdef DCT_ISLOW_SUPPORTED
50
51
52
/*
53
* This module is specialized to the case DCTSIZE = 8.
54
*/
55
56
#if DCTSIZE != 8
57
Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
58
#endif
59
60
61
/*
62
* The poop on this scaling stuff is as follows:
63
*
64
* Each 1-D DCT step produces outputs which are a factor of sqrt(N)
65
* larger than the true DCT outputs. The final outputs are therefore
66
* a factor of N larger than desired; since N=8 this can be cured by
67
* a simple right shift at the end of the algorithm. The advantage of
68
* this arrangement is that we save two multiplications per 1-D DCT,
69
* because the y0 and y4 outputs need not be divided by sqrt(N).
70
* In the IJG code, this factor of 8 is removed by the quantization step
71
* (in jcdctmgr.c), NOT in this module.
72
*
73
* We have to do addition and subtraction of the integer inputs, which
74
* is no problem, and multiplication by fractional constants, which is
75
* a problem to do in integer arithmetic. We multiply all the constants
76
* by CONST_SCALE and convert them to integer constants (thus retaining
77
* CONST_BITS bits of precision in the constants). After doing a
78
* multiplication we have to divide the product by CONST_SCALE, with proper
79
* rounding, to produce the correct output. This division can be done
80
* cheaply as a right shift of CONST_BITS bits. We postpone shifting
81
* as long as possible so that partial sums can be added together with
82
* full fractional precision.
83
*
84
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
85
* they are represented to better-than-integral precision. These outputs
86
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
87
* with the recommended scaling. (For 12-bit sample data, the intermediate
88
* array is INT32 anyway.)
89
*
90
* To avoid overflow of the 32-bit intermediate results in pass 2, we must
91
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
92
* shows that the values given below are the most effective.
93
*/
94
95
#if BITS_IN_JSAMPLE == 8
96
#define CONST_BITS 13
97
#define PASS1_BITS 2
98
#else
99
#define CONST_BITS 13
100
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
101
#endif
102
103
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
104
* causing a lot of useless floating-point operations at run time.
105
* To get around this we use the following pre-calculated constants.
106
* If you change CONST_BITS you may want to add appropriate values.
107
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
108
*/
109
110
#if CONST_BITS == 13
111
#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */
112
#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */
113
#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */
114
#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
115
#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
116
#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */
117
#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */
118
#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
119
#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */
120
#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */
121
#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
122
#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */
123
#else
124
#define FIX_0_298631336 FIX(0.298631336)
125
#define FIX_0_390180644 FIX(0.390180644)
126
#define FIX_0_541196100 FIX(0.541196100)
127
#define FIX_0_765366865 FIX(0.765366865)
128
#define FIX_0_899976223 FIX(0.899976223)
129
#define FIX_1_175875602 FIX(1.175875602)
130
#define FIX_1_501321110 FIX(1.501321110)
131
#define FIX_1_847759065 FIX(1.847759065)
132
#define FIX_1_961570560 FIX(1.961570560)
133
#define FIX_2_053119869 FIX(2.053119869)
134
#define FIX_2_562915447 FIX(2.562915447)
135
#define FIX_3_072711026 FIX(3.072711026)
136
#endif
137
138
139
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
140
* For 8-bit samples with the recommended scaling, all the variable
141
* and constant values involved are no more than 16 bits wide, so a
142
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
143
* For 12-bit samples, a full 32-bit multiplication will be needed.
144
*/
145
146
#if BITS_IN_JSAMPLE == 8
147
#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
148
#else
149
#define MULTIPLY(var,const) ((var) * (const))
150
#endif
151
152
153
/*
154
* Perform the forward DCT on one block of samples.
155
*/
156
157
GLOBAL(void)
158
jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
159
{
160
INT32 tmp0, tmp1, tmp2, tmp3;
161
INT32 tmp10, tmp11, tmp12, tmp13;
162
INT32 z1;
163
DCTELEM *dataptr;
164
JSAMPROW elemptr;
165
int ctr;
166
SHIFT_TEMPS
167
168
/* Pass 1: process rows.
169
* Note results are scaled up by sqrt(8) compared to a true DCT;
170
* furthermore, we scale the results by 2**PASS1_BITS.
171
* cK represents sqrt(2) * cos(K*pi/16).
172
*/
173
174
dataptr = data;
175
for (ctr = 0; ctr < DCTSIZE; ctr++) {
176
elemptr = sample_data[ctr] + start_col;
177
178
/* Even part per LL&M figure 1 --- note that published figure is faulty;
179
* rotator "c1" should be "c6".
180
*/
181
182
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
183
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
184
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
185
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
186
187
tmp10 = tmp0 + tmp3;
188
tmp12 = tmp0 - tmp3;
189
tmp11 = tmp1 + tmp2;
190
tmp13 = tmp1 - tmp2;
191
192
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
193
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
194
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
195
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
196
197
/* Apply unsigned->signed conversion. */
198
dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
199
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
200
201
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
202
/* Add fudge factor here for final descale. */
203
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
204
205
dataptr[2] = (DCTELEM)
206
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
207
CONST_BITS-PASS1_BITS);
208
dataptr[6] = (DCTELEM)
209
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
210
CONST_BITS-PASS1_BITS);
211
212
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
213
* i0..i3 in the paper are tmp0..tmp3 here.
214
*/
215
216
tmp12 = tmp0 + tmp2;
217
tmp13 = tmp1 + tmp3;
218
219
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
220
/* Add fudge factor here for final descale. */
221
z1 += ONE << (CONST_BITS-PASS1_BITS-1);
222
223
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
224
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
225
tmp12 += z1;
226
tmp13 += z1;
227
228
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
229
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
230
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
231
tmp0 += z1 + tmp12;
232
tmp3 += z1 + tmp13;
233
234
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
235
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
236
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
237
tmp1 += z1 + tmp13;
238
tmp2 += z1 + tmp12;
239
240
dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
241
dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
242
dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
243
dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
244
245
dataptr += DCTSIZE; /* advance pointer to next row */
246
}
247
248
/* Pass 2: process columns.
249
* We remove the PASS1_BITS scaling, but leave the results scaled up
250
* by an overall factor of 8.
251
* cK represents sqrt(2) * cos(K*pi/16).
252
*/
253
254
dataptr = data;
255
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
256
/* Even part per LL&M figure 1 --- note that published figure is faulty;
257
* rotator "c1" should be "c6".
258
*/
259
260
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
261
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
262
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
263
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
264
265
/* Add fudge factor here for final descale. */
266
tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
267
tmp12 = tmp0 - tmp3;
268
tmp11 = tmp1 + tmp2;
269
tmp13 = tmp1 - tmp2;
270
271
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
272
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
273
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
274
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
275
276
dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
277
dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
278
279
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
280
/* Add fudge factor here for final descale. */
281
z1 += ONE << (CONST_BITS+PASS1_BITS-1);
282
283
dataptr[DCTSIZE*2] = (DCTELEM)
284
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
285
CONST_BITS+PASS1_BITS);
286
dataptr[DCTSIZE*6] = (DCTELEM)
287
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
288
CONST_BITS+PASS1_BITS);
289
290
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
291
* i0..i3 in the paper are tmp0..tmp3 here.
292
*/
293
294
tmp12 = tmp0 + tmp2;
295
tmp13 = tmp1 + tmp3;
296
297
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
298
/* Add fudge factor here for final descale. */
299
z1 += ONE << (CONST_BITS+PASS1_BITS-1);
300
301
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
302
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
303
tmp12 += z1;
304
tmp13 += z1;
305
306
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
307
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
308
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
309
tmp0 += z1 + tmp12;
310
tmp3 += z1 + tmp13;
311
312
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
313
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
314
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
315
tmp1 += z1 + tmp13;
316
tmp2 += z1 + tmp12;
317
318
dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
319
dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
320
dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
321
dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
322
323
dataptr++; /* advance pointer to next column */
324
}
325
}
326
327
#ifdef DCT_SCALING_SUPPORTED
328
329
330
/*
331
* Perform the forward DCT on a 7x7 sample block.
332
*/
333
334
GLOBAL(void)
335
jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
336
{
337
INT32 tmp0, tmp1, tmp2, tmp3;
338
INT32 tmp10, tmp11, tmp12;
339
INT32 z1, z2, z3;
340
DCTELEM *dataptr;
341
JSAMPROW elemptr;
342
int ctr;
343
SHIFT_TEMPS
344
345
/* Pre-zero output coefficient block. */
346
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
347
348
/* Pass 1: process rows.
349
* Note results are scaled up by sqrt(8) compared to a true DCT;
350
* furthermore, we scale the results by 2**PASS1_BITS.
351
* cK represents sqrt(2) * cos(K*pi/14).
352
*/
353
354
dataptr = data;
355
for (ctr = 0; ctr < 7; ctr++) {
356
elemptr = sample_data[ctr] + start_col;
357
358
/* Even part */
359
360
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
361
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
362
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
363
tmp3 = GETJSAMPLE(elemptr[3]);
364
365
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
366
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
367
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
368
369
z1 = tmp0 + tmp2;
370
/* Apply unsigned->signed conversion. */
371
dataptr[0] = (DCTELEM)
372
((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
373
tmp3 += tmp3;
374
z1 -= tmp3;
375
z1 -= tmp3;
376
z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
377
z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
378
z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
379
dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
380
z1 -= z2;
381
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
382
dataptr[4] = (DCTELEM)
383
DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
384
CONST_BITS-PASS1_BITS);
385
dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
386
387
/* Odd part */
388
389
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
390
tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
391
tmp0 = tmp1 - tmp2;
392
tmp1 += tmp2;
393
tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
394
tmp1 += tmp2;
395
tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */
396
tmp0 += tmp3;
397
tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */
398
399
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
400
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
401
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
402
403
dataptr += DCTSIZE; /* advance pointer to next row */
404
}
405
406
/* Pass 2: process columns.
407
* We remove the PASS1_BITS scaling, but leave the results scaled up
408
* by an overall factor of 8.
409
* We must also scale the output by (8/7)**2 = 64/49, which we fold
410
* into the constant multipliers:
411
* cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
412
*/
413
414
dataptr = data;
415
for (ctr = 0; ctr < 7; ctr++) {
416
/* Even part */
417
418
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
419
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
420
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
421
tmp3 = dataptr[DCTSIZE*3];
422
423
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
424
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
425
tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
426
427
z1 = tmp0 + tmp2;
428
dataptr[DCTSIZE*0] = (DCTELEM)
429
DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
430
CONST_BITS+PASS1_BITS);
431
tmp3 += tmp3;
432
z1 -= tmp3;
433
z1 -= tmp3;
434
z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */
435
z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */
436
z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */
437
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS);
438
z1 -= z2;
439
z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */
440
dataptr[DCTSIZE*4] = (DCTELEM)
441
DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
442
CONST_BITS+PASS1_BITS);
443
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS);
444
445
/* Odd part */
446
447
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */
448
tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */
449
tmp0 = tmp1 - tmp2;
450
tmp1 += tmp2;
451
tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
452
tmp1 += tmp2;
453
tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */
454
tmp0 += tmp3;
455
tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */
456
457
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS);
458
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS);
459
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS);
460
461
dataptr++; /* advance pointer to next column */
462
}
463
}
464
465
466
/*
467
* Perform the forward DCT on a 6x6 sample block.
468
*/
469
470
GLOBAL(void)
471
jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
472
{
473
INT32 tmp0, tmp1, tmp2;
474
INT32 tmp10, tmp11, tmp12;
475
DCTELEM *dataptr;
476
JSAMPROW elemptr;
477
int ctr;
478
SHIFT_TEMPS
479
480
/* Pre-zero output coefficient block. */
481
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
482
483
/* Pass 1: process rows.
484
* Note results are scaled up by sqrt(8) compared to a true DCT;
485
* furthermore, we scale the results by 2**PASS1_BITS.
486
* cK represents sqrt(2) * cos(K*pi/12).
487
*/
488
489
dataptr = data;
490
for (ctr = 0; ctr < 6; ctr++) {
491
elemptr = sample_data[ctr] + start_col;
492
493
/* Even part */
494
495
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
496
tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
497
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
498
499
tmp10 = tmp0 + tmp2;
500
tmp12 = tmp0 - tmp2;
501
502
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
503
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
504
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
505
506
/* Apply unsigned->signed conversion. */
507
dataptr[0] = (DCTELEM)
508
((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
509
dataptr[2] = (DCTELEM)
510
DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
511
CONST_BITS-PASS1_BITS);
512
dataptr[4] = (DCTELEM)
513
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
514
CONST_BITS-PASS1_BITS);
515
516
/* Odd part */
517
518
tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
519
CONST_BITS-PASS1_BITS);
520
521
dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
522
dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
523
dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
524
525
dataptr += DCTSIZE; /* advance pointer to next row */
526
}
527
528
/* Pass 2: process columns.
529
* We remove the PASS1_BITS scaling, but leave the results scaled up
530
* by an overall factor of 8.
531
* We must also scale the output by (8/6)**2 = 16/9, which we fold
532
* into the constant multipliers:
533
* cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
534
*/
535
536
dataptr = data;
537
for (ctr = 0; ctr < 6; ctr++) {
538
/* Even part */
539
540
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
541
tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
542
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
543
544
tmp10 = tmp0 + tmp2;
545
tmp12 = tmp0 - tmp2;
546
547
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
548
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
549
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
550
551
dataptr[DCTSIZE*0] = (DCTELEM)
552
DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
553
CONST_BITS+PASS1_BITS);
554
dataptr[DCTSIZE*2] = (DCTELEM)
555
DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
556
CONST_BITS+PASS1_BITS);
557
dataptr[DCTSIZE*4] = (DCTELEM)
558
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
559
CONST_BITS+PASS1_BITS);
560
561
/* Odd part */
562
563
tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
564
565
dataptr[DCTSIZE*1] = (DCTELEM)
566
DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
567
CONST_BITS+PASS1_BITS);
568
dataptr[DCTSIZE*3] = (DCTELEM)
569
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
570
CONST_BITS+PASS1_BITS);
571
dataptr[DCTSIZE*5] = (DCTELEM)
572
DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
573
CONST_BITS+PASS1_BITS);
574
575
dataptr++; /* advance pointer to next column */
576
}
577
}
578
579
580
/*
581
* Perform the forward DCT on a 5x5 sample block.
582
*/
583
584
GLOBAL(void)
585
jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
586
{
587
INT32 tmp0, tmp1, tmp2;
588
INT32 tmp10, tmp11;
589
DCTELEM *dataptr;
590
JSAMPROW elemptr;
591
int ctr;
592
SHIFT_TEMPS
593
594
/* Pre-zero output coefficient block. */
595
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
596
597
/* Pass 1: process rows.
598
* Note results are scaled up by sqrt(8) compared to a true DCT;
599
* furthermore, we scale the results by 2**PASS1_BITS.
600
* We scale the results further by 2 as part of output adaption
601
* scaling for different DCT size.
602
* cK represents sqrt(2) * cos(K*pi/10).
603
*/
604
605
dataptr = data;
606
for (ctr = 0; ctr < 5; ctr++) {
607
elemptr = sample_data[ctr] + start_col;
608
609
/* Even part */
610
611
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
612
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
613
tmp2 = GETJSAMPLE(elemptr[2]);
614
615
tmp10 = tmp0 + tmp1;
616
tmp11 = tmp0 - tmp1;
617
618
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
619
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
620
621
/* Apply unsigned->signed conversion. */
622
dataptr[0] = (DCTELEM)
623
((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
624
tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
625
tmp10 -= tmp2 << 2;
626
tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
627
dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
628
dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
629
630
/* Odd part */
631
632
tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
633
634
dataptr[1] = (DCTELEM)
635
DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
636
CONST_BITS-PASS1_BITS-1);
637
dataptr[3] = (DCTELEM)
638
DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
639
CONST_BITS-PASS1_BITS-1);
640
641
dataptr += DCTSIZE; /* advance pointer to next row */
642
}
643
644
/* Pass 2: process columns.
645
* We remove the PASS1_BITS scaling, but leave the results scaled up
646
* by an overall factor of 8.
647
* We must also scale the output by (8/5)**2 = 64/25, which we partially
648
* fold into the constant multipliers (other part was done in pass 1):
649
* cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
650
*/
651
652
dataptr = data;
653
for (ctr = 0; ctr < 5; ctr++) {
654
/* Even part */
655
656
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
657
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
658
tmp2 = dataptr[DCTSIZE*2];
659
660
tmp10 = tmp0 + tmp1;
661
tmp11 = tmp0 - tmp1;
662
663
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
664
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
665
666
dataptr[DCTSIZE*0] = (DCTELEM)
667
DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */
668
CONST_BITS+PASS1_BITS);
669
tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */
670
tmp10 -= tmp2 << 2;
671
tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */
672
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
673
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
674
675
/* Odd part */
676
677
tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */
678
679
dataptr[DCTSIZE*1] = (DCTELEM)
680
DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
681
CONST_BITS+PASS1_BITS);
682
dataptr[DCTSIZE*3] = (DCTELEM)
683
DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
684
CONST_BITS+PASS1_BITS);
685
686
dataptr++; /* advance pointer to next column */
687
}
688
}
689
690
691
/*
692
* Perform the forward DCT on a 4x4 sample block.
693
*/
694
695
GLOBAL(void)
696
jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
697
{
698
INT32 tmp0, tmp1;
699
INT32 tmp10, tmp11;
700
DCTELEM *dataptr;
701
JSAMPROW elemptr;
702
int ctr;
703
SHIFT_TEMPS
704
705
/* Pre-zero output coefficient block. */
706
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
707
708
/* Pass 1: process rows.
709
* Note results are scaled up by sqrt(8) compared to a true DCT;
710
* furthermore, we scale the results by 2**PASS1_BITS.
711
* We must also scale the output by (8/4)**2 = 2**2, which we add here.
712
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
713
*/
714
715
dataptr = data;
716
for (ctr = 0; ctr < 4; ctr++) {
717
elemptr = sample_data[ctr] + start_col;
718
719
/* Even part */
720
721
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
722
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
723
724
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
725
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
726
727
/* Apply unsigned->signed conversion. */
728
dataptr[0] = (DCTELEM)
729
((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
730
dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
731
732
/* Odd part */
733
734
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
735
/* Add fudge factor here for final descale. */
736
tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
737
738
dataptr[1] = (DCTELEM)
739
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
740
CONST_BITS-PASS1_BITS-2);
741
dataptr[3] = (DCTELEM)
742
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
743
CONST_BITS-PASS1_BITS-2);
744
745
dataptr += DCTSIZE; /* advance pointer to next row */
746
}
747
748
/* Pass 2: process columns.
749
* We remove the PASS1_BITS scaling, but leave the results scaled up
750
* by an overall factor of 8.
751
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
752
*/
753
754
dataptr = data;
755
for (ctr = 0; ctr < 4; ctr++) {
756
/* Even part */
757
758
/* Add fudge factor here for final descale. */
759
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
760
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
761
762
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
763
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
764
765
dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
766
dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
767
768
/* Odd part */
769
770
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
771
/* Add fudge factor here for final descale. */
772
tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
773
774
dataptr[DCTSIZE*1] = (DCTELEM)
775
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
776
CONST_BITS+PASS1_BITS);
777
dataptr[DCTSIZE*3] = (DCTELEM)
778
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
779
CONST_BITS+PASS1_BITS);
780
781
dataptr++; /* advance pointer to next column */
782
}
783
}
784
785
786
/*
787
* Perform the forward DCT on a 3x3 sample block.
788
*/
789
790
GLOBAL(void)
791
jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
792
{
793
INT32 tmp0, tmp1, tmp2;
794
DCTELEM *dataptr;
795
JSAMPROW elemptr;
796
int ctr;
797
SHIFT_TEMPS
798
799
/* Pre-zero output coefficient block. */
800
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
801
802
/* Pass 1: process rows.
803
* Note results are scaled up by sqrt(8) compared to a true DCT;
804
* furthermore, we scale the results by 2**PASS1_BITS.
805
* We scale the results further by 2**2 as part of output adaption
806
* scaling for different DCT size.
807
* cK represents sqrt(2) * cos(K*pi/6).
808
*/
809
810
dataptr = data;
811
for (ctr = 0; ctr < 3; ctr++) {
812
elemptr = sample_data[ctr] + start_col;
813
814
/* Even part */
815
816
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
817
tmp1 = GETJSAMPLE(elemptr[1]);
818
819
tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
820
821
/* Apply unsigned->signed conversion. */
822
dataptr[0] = (DCTELEM)
823
((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
824
dataptr[2] = (DCTELEM)
825
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
826
CONST_BITS-PASS1_BITS-2);
827
828
/* Odd part */
829
830
dataptr[1] = (DCTELEM)
831
DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
832
CONST_BITS-PASS1_BITS-2);
833
834
dataptr += DCTSIZE; /* advance pointer to next row */
835
}
836
837
/* Pass 2: process columns.
838
* We remove the PASS1_BITS scaling, but leave the results scaled up
839
* by an overall factor of 8.
840
* We must also scale the output by (8/3)**2 = 64/9, which we partially
841
* fold into the constant multipliers (other part was done in pass 1):
842
* cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
843
*/
844
845
dataptr = data;
846
for (ctr = 0; ctr < 3; ctr++) {
847
/* Even part */
848
849
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
850
tmp1 = dataptr[DCTSIZE*1];
851
852
tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
853
854
dataptr[DCTSIZE*0] = (DCTELEM)
855
DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
856
CONST_BITS+PASS1_BITS);
857
dataptr[DCTSIZE*2] = (DCTELEM)
858
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
859
CONST_BITS+PASS1_BITS);
860
861
/* Odd part */
862
863
dataptr[DCTSIZE*1] = (DCTELEM)
864
DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
865
CONST_BITS+PASS1_BITS);
866
867
dataptr++; /* advance pointer to next column */
868
}
869
}
870
871
872
/*
873
* Perform the forward DCT on a 2x2 sample block.
874
*/
875
876
GLOBAL(void)
877
jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
878
{
879
DCTELEM tmp0, tmp1, tmp2, tmp3;
880
JSAMPROW elemptr;
881
882
/* Pre-zero output coefficient block. */
883
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
884
885
/* Pass 1: process rows.
886
* Note results are scaled up by sqrt(8) compared to a true DCT.
887
*/
888
889
/* Row 0 */
890
elemptr = sample_data[0] + start_col;
891
892
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
893
tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
894
895
/* Row 1 */
896
elemptr = sample_data[1] + start_col;
897
898
tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
899
tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
900
901
/* Pass 2: process columns.
902
* We leave the results scaled up by an overall factor of 8.
903
* We must also scale the output by (8/2)**2 = 2**4.
904
*/
905
906
/* Column 0 */
907
/* Apply unsigned->signed conversion. */
908
data[DCTSIZE*0] = (tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4;
909
data[DCTSIZE*1] = (tmp0 - tmp2) << 4;
910
911
/* Column 1 */
912
data[DCTSIZE*0+1] = (tmp1 + tmp3) << 4;
913
data[DCTSIZE*1+1] = (tmp1 - tmp3) << 4;
914
}
915
916
917
/*
918
* Perform the forward DCT on a 1x1 sample block.
919
*/
920
921
GLOBAL(void)
922
jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
923
{
924
DCTELEM dcval;
925
926
/* Pre-zero output coefficient block. */
927
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
928
929
dcval = GETJSAMPLE(sample_data[0][start_col]);
930
931
/* We leave the result scaled up by an overall factor of 8. */
932
/* We must also scale the output by (8/1)**2 = 2**6. */
933
/* Apply unsigned->signed conversion. */
934
data[0] = (dcval - CENTERJSAMPLE) << 6;
935
}
936
937
938
/*
939
* Perform the forward DCT on a 9x9 sample block.
940
*/
941
942
GLOBAL(void)
943
jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
944
{
945
INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
946
INT32 tmp10, tmp11, tmp12, tmp13;
947
INT32 z1, z2;
948
DCTELEM workspace[8];
949
DCTELEM *dataptr;
950
DCTELEM *wsptr;
951
JSAMPROW elemptr;
952
int ctr;
953
SHIFT_TEMPS
954
955
/* Pass 1: process rows.
956
* Note results are scaled up by sqrt(8) compared to a true DCT;
957
* we scale the results further by 2 as part of output adaption
958
* scaling for different DCT size.
959
* cK represents sqrt(2) * cos(K*pi/18).
960
*/
961
962
dataptr = data;
963
ctr = 0;
964
for (;;) {
965
elemptr = sample_data[ctr] + start_col;
966
967
/* Even part */
968
969
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
970
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
971
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
972
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
973
tmp4 = GETJSAMPLE(elemptr[4]);
974
975
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
976
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
977
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
978
tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
979
980
z1 = tmp0 + tmp2 + tmp3;
981
z2 = tmp1 + tmp4;
982
/* Apply unsigned->signed conversion. */
983
dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
984
dataptr[6] = (DCTELEM)
985
DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */
986
CONST_BITS-1);
987
z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */
988
z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
989
dataptr[2] = (DCTELEM)
990
DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */
991
+ z1 + z2, CONST_BITS-1);
992
dataptr[4] = (DCTELEM)
993
DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */
994
+ z1 - z2, CONST_BITS-1);
995
996
/* Odd part */
997
998
dataptr[3] = (DCTELEM)
999
DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
1000
CONST_BITS-1);
1001
1002
tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */
1003
tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
1004
tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
1005
1006
dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1);
1007
1008
tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
1009
1010
dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1);
1011
dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1);
1012
1013
ctr++;
1014
1015
if (ctr != DCTSIZE) {
1016
if (ctr == 9)
1017
break; /* Done. */
1018
dataptr += DCTSIZE; /* advance pointer to next row */
1019
} else
1020
dataptr = workspace; /* switch pointer to extended workspace */
1021
}
1022
1023
/* Pass 2: process columns.
1024
* We leave the results scaled up by an overall factor of 8.
1025
* We must also scale the output by (8/9)**2 = 64/81, which we partially
1026
* fold into the constant multipliers and final/initial shifting:
1027
* cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
1028
*/
1029
1030
dataptr = data;
1031
wsptr = workspace;
1032
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1033
/* Even part */
1034
1035
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
1036
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
1037
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
1038
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
1039
tmp4 = dataptr[DCTSIZE*4];
1040
1041
tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
1042
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
1043
tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
1044
tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
1045
1046
z1 = tmp0 + tmp2 + tmp3;
1047
z2 = tmp1 + tmp4;
1048
dataptr[DCTSIZE*0] = (DCTELEM)
1049
DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)), /* 128/81 */
1050
CONST_BITS+2);
1051
dataptr[DCTSIZE*6] = (DCTELEM)
1052
DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)), /* c6 */
1053
CONST_BITS+2);
1054
z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287)); /* c2 */
1055
z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
1056
dataptr[DCTSIZE*2] = (DCTELEM)
1057
DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190)) /* c4 */
1058
+ z1 + z2, CONST_BITS+2);
1059
dataptr[DCTSIZE*4] = (DCTELEM)
1060
DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096)) /* c8 */
1061
+ z1 - z2, CONST_BITS+2);
1062
1063
/* Odd part */
1064
1065
dataptr[DCTSIZE*3] = (DCTELEM)
1066
DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
1067
CONST_BITS+2);
1068
1069
tmp11 = MULTIPLY(tmp11, FIX(1.935399303)); /* c3 */
1070
tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
1071
tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
1072
1073
dataptr[DCTSIZE*1] = (DCTELEM)
1074
DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2);
1075
1076
tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
1077
1078
dataptr[DCTSIZE*5] = (DCTELEM)
1079
DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2);
1080
dataptr[DCTSIZE*7] = (DCTELEM)
1081
DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2);
1082
1083
dataptr++; /* advance pointer to next column */
1084
wsptr++; /* advance pointer to next column */
1085
}
1086
}
1087
1088
1089
/*
1090
* Perform the forward DCT on a 10x10 sample block.
1091
*/
1092
1093
GLOBAL(void)
1094
jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1095
{
1096
INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
1097
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1098
DCTELEM workspace[8*2];
1099
DCTELEM *dataptr;
1100
DCTELEM *wsptr;
1101
JSAMPROW elemptr;
1102
int ctr;
1103
SHIFT_TEMPS
1104
1105
/* Pass 1: process rows.
1106
* Note results are scaled up by sqrt(8) compared to a true DCT;
1107
* we scale the results further by 2 as part of output adaption
1108
* scaling for different DCT size.
1109
* cK represents sqrt(2) * cos(K*pi/20).
1110
*/
1111
1112
dataptr = data;
1113
ctr = 0;
1114
for (;;) {
1115
elemptr = sample_data[ctr] + start_col;
1116
1117
/* Even part */
1118
1119
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
1120
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
1121
tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
1122
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
1123
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
1124
1125
tmp10 = tmp0 + tmp4;
1126
tmp13 = tmp0 - tmp4;
1127
tmp11 = tmp1 + tmp3;
1128
tmp14 = tmp1 - tmp3;
1129
1130
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
1131
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
1132
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
1133
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
1134
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
1135
1136
/* Apply unsigned->signed conversion. */
1137
dataptr[0] = (DCTELEM)
1138
((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
1139
tmp12 += tmp12;
1140
dataptr[4] = (DCTELEM)
1141
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
1142
MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
1143
CONST_BITS-1);
1144
tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
1145
dataptr[2] = (DCTELEM)
1146
DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
1147
CONST_BITS-1);
1148
dataptr[6] = (DCTELEM)
1149
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
1150
CONST_BITS-1);
1151
1152
/* Odd part */
1153
1154
tmp10 = tmp0 + tmp4;
1155
tmp11 = tmp1 - tmp3;
1156
dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
1157
tmp2 <<= CONST_BITS;
1158
dataptr[1] = (DCTELEM)
1159
DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */
1160
MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */
1161
MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */
1162
MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */
1163
CONST_BITS-1);
1164
tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */
1165
MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */
1166
tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */
1167
(tmp11 << (CONST_BITS - 1)) - tmp2;
1168
dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1);
1169
dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1);
1170
1171
ctr++;
1172
1173
if (ctr != DCTSIZE) {
1174
if (ctr == 10)
1175
break; /* Done. */
1176
dataptr += DCTSIZE; /* advance pointer to next row */
1177
} else
1178
dataptr = workspace; /* switch pointer to extended workspace */
1179
}
1180
1181
/* Pass 2: process columns.
1182
* We leave the results scaled up by an overall factor of 8.
1183
* We must also scale the output by (8/10)**2 = 16/25, which we partially
1184
* fold into the constant multipliers and final/initial shifting:
1185
* cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
1186
*/
1187
1188
dataptr = data;
1189
wsptr = workspace;
1190
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1191
/* Even part */
1192
1193
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
1194
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
1195
tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
1196
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
1197
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
1198
1199
tmp10 = tmp0 + tmp4;
1200
tmp13 = tmp0 - tmp4;
1201
tmp11 = tmp1 + tmp3;
1202
tmp14 = tmp1 - tmp3;
1203
1204
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
1205
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
1206
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
1207
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
1208
tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
1209
1210
dataptr[DCTSIZE*0] = (DCTELEM)
1211
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
1212
CONST_BITS+2);
1213
tmp12 += tmp12;
1214
dataptr[DCTSIZE*4] = (DCTELEM)
1215
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
1216
MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */
1217
CONST_BITS+2);
1218
tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */
1219
dataptr[DCTSIZE*2] = (DCTELEM)
1220
DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */
1221
CONST_BITS+2);
1222
dataptr[DCTSIZE*6] = (DCTELEM)
1223
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */
1224
CONST_BITS+2);
1225
1226
/* Odd part */
1227
1228
tmp10 = tmp0 + tmp4;
1229
tmp11 = tmp1 - tmp3;
1230
dataptr[DCTSIZE*5] = (DCTELEM)
1231
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */
1232
CONST_BITS+2);
1233
tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */
1234
dataptr[DCTSIZE*1] = (DCTELEM)
1235
DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */
1236
MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */
1237
MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */
1238
MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */
1239
CONST_BITS+2);
1240
tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */
1241
MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */
1242
tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */
1243
MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */
1244
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2);
1245
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2);
1246
1247
dataptr++; /* advance pointer to next column */
1248
wsptr++; /* advance pointer to next column */
1249
}
1250
}
1251
1252
1253
/*
1254
* Perform the forward DCT on an 11x11 sample block.
1255
*/
1256
1257
GLOBAL(void)
1258
jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1259
{
1260
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1261
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
1262
INT32 z1, z2, z3;
1263
DCTELEM workspace[8*3];
1264
DCTELEM *dataptr;
1265
DCTELEM *wsptr;
1266
JSAMPROW elemptr;
1267
int ctr;
1268
SHIFT_TEMPS
1269
1270
/* Pass 1: process rows.
1271
* Note results are scaled up by sqrt(8) compared to a true DCT;
1272
* we scale the results further by 2 as part of output adaption
1273
* scaling for different DCT size.
1274
* cK represents sqrt(2) * cos(K*pi/22).
1275
*/
1276
1277
dataptr = data;
1278
ctr = 0;
1279
for (;;) {
1280
elemptr = sample_data[ctr] + start_col;
1281
1282
/* Even part */
1283
1284
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
1285
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
1286
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
1287
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
1288
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
1289
tmp5 = GETJSAMPLE(elemptr[5]);
1290
1291
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
1292
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
1293
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
1294
tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
1295
tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
1296
1297
/* Apply unsigned->signed conversion. */
1298
dataptr[0] = (DCTELEM)
1299
((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
1300
tmp5 += tmp5;
1301
tmp0 -= tmp5;
1302
tmp1 -= tmp5;
1303
tmp2 -= tmp5;
1304
tmp3 -= tmp5;
1305
tmp4 -= tmp5;
1306
z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */
1307
MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */
1308
z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */
1309
z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */
1310
dataptr[2] = (DCTELEM)
1311
DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
1312
- MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */
1313
CONST_BITS-1);
1314
dataptr[4] = (DCTELEM)
1315
DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
1316
- MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */
1317
+ MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */
1318
CONST_BITS-1);
1319
dataptr[6] = (DCTELEM)
1320
DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
1321
- MULTIPLY(tmp2, FIX(0.788749120)), /* c8+c10 */
1322
CONST_BITS-1);
1323
1324
/* Odd part */
1325
1326
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905)); /* c3 */
1327
tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298)); /* c5 */
1328
tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576)); /* c7 */
1329
tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
1330
+ MULTIPLY(tmp14, FIX(0.398430003)); /* c9 */
1331
tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576)); /* -c7 */
1332
tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907)); /* -c1 */
1333
tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
1334
- MULTIPLY(tmp14, FIX(1.068791298)); /* c5 */
1335
tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003)); /* c9 */
1336
tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
1337
+ MULTIPLY(tmp14, FIX(1.399818907)); /* c1 */
1338
tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
1339
- MULTIPLY(tmp14, FIX(1.286413905)); /* c3 */
1340
1341
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1);
1342
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1);
1343
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1);
1344
dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1);
1345
1346
ctr++;
1347
1348
if (ctr != DCTSIZE) {
1349
if (ctr == 11)
1350
break; /* Done. */
1351
dataptr += DCTSIZE; /* advance pointer to next row */
1352
} else
1353
dataptr = workspace; /* switch pointer to extended workspace */
1354
}
1355
1356
/* Pass 2: process columns.
1357
* We leave the results scaled up by an overall factor of 8.
1358
* We must also scale the output by (8/11)**2 = 64/121, which we partially
1359
* fold into the constant multipliers and final/initial shifting:
1360
* cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
1361
*/
1362
1363
dataptr = data;
1364
wsptr = workspace;
1365
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1366
/* Even part */
1367
1368
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
1369
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
1370
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
1371
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
1372
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
1373
tmp5 = dataptr[DCTSIZE*5];
1374
1375
tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
1376
tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
1377
tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
1378
tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
1379
tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
1380
1381
dataptr[DCTSIZE*0] = (DCTELEM)
1382
DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
1383
FIX(1.057851240)), /* 128/121 */
1384
CONST_BITS+2);
1385
tmp5 += tmp5;
1386
tmp0 -= tmp5;
1387
tmp1 -= tmp5;
1388
tmp2 -= tmp5;
1389
tmp3 -= tmp5;
1390
tmp4 -= tmp5;
1391
z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) + /* c2 */
1392
MULTIPLY(tmp2 + tmp4, FIX(0.212906922)); /* c10 */
1393
z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713)); /* c6 */
1394
z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479)); /* c4 */
1395
dataptr[DCTSIZE*2] = (DCTELEM)
1396
DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
1397
- MULTIPLY(tmp4, FIX(1.471445400)), /* c4+c10 */
1398
CONST_BITS+2);
1399
dataptr[DCTSIZE*4] = (DCTELEM)
1400
DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
1401
- MULTIPLY(tmp2, FIX(1.435427942)) /* c2 */
1402
+ MULTIPLY(tmp4, FIX(0.621472312)), /* c8 */
1403
CONST_BITS+2);
1404
dataptr[DCTSIZE*6] = (DCTELEM)
1405
DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
1406
- MULTIPLY(tmp2, FIX(0.834379234)), /* c8+c10 */
1407
CONST_BITS+2);
1408
1409
/* Odd part */
1410
1411
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544)); /* c3 */
1412
tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199)); /* c5 */
1413
tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568)); /* c7 */
1414
tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
1415
+ MULTIPLY(tmp14, FIX(0.421479672)); /* c9 */
1416
tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568)); /* -c7 */
1417
tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167)); /* -c1 */
1418
tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
1419
- MULTIPLY(tmp14, FIX(1.130622199)); /* c5 */
1420
tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672)); /* c9 */
1421
tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
1422
+ MULTIPLY(tmp14, FIX(1.480800167)); /* c1 */
1423
tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
1424
- MULTIPLY(tmp14, FIX(1.360834544)); /* c3 */
1425
1426
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
1427
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
1428
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
1429
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
1430
1431
dataptr++; /* advance pointer to next column */
1432
wsptr++; /* advance pointer to next column */
1433
}
1434
}
1435
1436
1437
/*
1438
* Perform the forward DCT on a 12x12 sample block.
1439
*/
1440
1441
GLOBAL(void)
1442
jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1443
{
1444
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
1445
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1446
DCTELEM workspace[8*4];
1447
DCTELEM *dataptr;
1448
DCTELEM *wsptr;
1449
JSAMPROW elemptr;
1450
int ctr;
1451
SHIFT_TEMPS
1452
1453
/* Pass 1: process rows.
1454
* Note results are scaled up by sqrt(8) compared to a true DCT.
1455
* cK represents sqrt(2) * cos(K*pi/24).
1456
*/
1457
1458
dataptr = data;
1459
ctr = 0;
1460
for (;;) {
1461
elemptr = sample_data[ctr] + start_col;
1462
1463
/* Even part */
1464
1465
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
1466
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
1467
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
1468
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
1469
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
1470
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
1471
1472
tmp10 = tmp0 + tmp5;
1473
tmp13 = tmp0 - tmp5;
1474
tmp11 = tmp1 + tmp4;
1475
tmp14 = tmp1 - tmp4;
1476
tmp12 = tmp2 + tmp3;
1477
tmp15 = tmp2 - tmp3;
1478
1479
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
1480
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
1481
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
1482
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
1483
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
1484
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
1485
1486
/* Apply unsigned->signed conversion. */
1487
dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
1488
dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
1489
dataptr[4] = (DCTELEM)
1490
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
1491
CONST_BITS);
1492
dataptr[2] = (DCTELEM)
1493
DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
1494
CONST_BITS);
1495
1496
/* Odd part */
1497
1498
tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
1499
tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
1500
tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
1501
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
1502
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
1503
tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
1504
+ MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
1505
tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
1506
tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
1507
+ MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */
1508
tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
1509
- MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */
1510
tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
1511
- MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */
1512
1513
dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
1514
dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
1515
dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
1516
dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
1517
1518
ctr++;
1519
1520
if (ctr != DCTSIZE) {
1521
if (ctr == 12)
1522
break; /* Done. */
1523
dataptr += DCTSIZE; /* advance pointer to next row */
1524
} else
1525
dataptr = workspace; /* switch pointer to extended workspace */
1526
}
1527
1528
/* Pass 2: process columns.
1529
* We leave the results scaled up by an overall factor of 8.
1530
* We must also scale the output by (8/12)**2 = 4/9, which we partially
1531
* fold into the constant multipliers and final shifting:
1532
* cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
1533
*/
1534
1535
dataptr = data;
1536
wsptr = workspace;
1537
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1538
/* Even part */
1539
1540
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
1541
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
1542
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
1543
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
1544
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
1545
tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
1546
1547
tmp10 = tmp0 + tmp5;
1548
tmp13 = tmp0 - tmp5;
1549
tmp11 = tmp1 + tmp4;
1550
tmp14 = tmp1 - tmp4;
1551
tmp12 = tmp2 + tmp3;
1552
tmp15 = tmp2 - tmp3;
1553
1554
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
1555
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
1556
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
1557
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
1558
tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
1559
tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
1560
1561
dataptr[DCTSIZE*0] = (DCTELEM)
1562
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
1563
CONST_BITS+1);
1564
dataptr[DCTSIZE*6] = (DCTELEM)
1565
DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
1566
CONST_BITS+1);
1567
dataptr[DCTSIZE*4] = (DCTELEM)
1568
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */
1569
CONST_BITS+1);
1570
dataptr[DCTSIZE*2] = (DCTELEM)
1571
DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */
1572
MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */
1573
CONST_BITS+1);
1574
1575
/* Odd part */
1576
1577
tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */
1578
tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */
1579
tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */
1580
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */
1581
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */
1582
tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
1583
+ MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */
1584
tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
1585
tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
1586
+ MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */
1587
tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
1588
- MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */
1589
tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
1590
- MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
1591
1592
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1);
1593
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1);
1594
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1);
1595
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1);
1596
1597
dataptr++; /* advance pointer to next column */
1598
wsptr++; /* advance pointer to next column */
1599
}
1600
}
1601
1602
1603
/*
1604
* Perform the forward DCT on a 13x13 sample block.
1605
*/
1606
1607
GLOBAL(void)
1608
jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1609
{
1610
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1611
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
1612
INT32 z1, z2;
1613
DCTELEM workspace[8*5];
1614
DCTELEM *dataptr;
1615
DCTELEM *wsptr;
1616
JSAMPROW elemptr;
1617
int ctr;
1618
SHIFT_TEMPS
1619
1620
/* Pass 1: process rows.
1621
* Note results are scaled up by sqrt(8) compared to a true DCT.
1622
* cK represents sqrt(2) * cos(K*pi/26).
1623
*/
1624
1625
dataptr = data;
1626
ctr = 0;
1627
for (;;) {
1628
elemptr = sample_data[ctr] + start_col;
1629
1630
/* Even part */
1631
1632
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
1633
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
1634
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
1635
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
1636
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
1637
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
1638
tmp6 = GETJSAMPLE(elemptr[6]);
1639
1640
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
1641
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
1642
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
1643
tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
1644
tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
1645
tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
1646
1647
/* Apply unsigned->signed conversion. */
1648
dataptr[0] = (DCTELEM)
1649
(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
1650
tmp6 += tmp6;
1651
tmp0 -= tmp6;
1652
tmp1 -= tmp6;
1653
tmp2 -= tmp6;
1654
tmp3 -= tmp6;
1655
tmp4 -= tmp6;
1656
tmp5 -= tmp6;
1657
dataptr[2] = (DCTELEM)
1658
DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */
1659
MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */
1660
MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */
1661
MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */
1662
MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */
1663
MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */
1664
CONST_BITS);
1665
z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
1666
MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
1667
MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */
1668
z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
1669
MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
1670
MULTIPLY(tmp1 + tmp5, FIX(0.486914739)); /* (c8+c12)/2 */
1671
1672
dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
1673
dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
1674
1675
/* Odd part */
1676
1677
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651)); /* c3 */
1678
tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945)); /* c5 */
1679
tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) + /* c7 */
1680
MULTIPLY(tmp14 + tmp15, FIX(0.338443458)); /* c11 */
1681
tmp0 = tmp1 + tmp2 + tmp3 -
1682
MULTIPLY(tmp10, FIX(2.020082300)) + /* c3+c5+c7-c1 */
1683
MULTIPLY(tmp14, FIX(0.318774355)); /* c9-c11 */
1684
tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) - /* c7 */
1685
MULTIPLY(tmp11 + tmp12, FIX(0.338443458)); /* c11 */
1686
tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
1687
tmp1 += tmp4 + tmp5 +
1688
MULTIPLY(tmp11, FIX(0.837223564)) - /* c5+c9+c11-c3 */
1689
MULTIPLY(tmp14, FIX(2.341699410)); /* c1+c7 */
1690
tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
1691
tmp2 += tmp4 + tmp6 -
1692
MULTIPLY(tmp12, FIX(1.572116027)) + /* c1+c5-c9-c11 */
1693
MULTIPLY(tmp15, FIX(2.260109708)); /* c3+c7 */
1694
tmp3 += tmp5 + tmp6 +
1695
MULTIPLY(tmp13, FIX(2.205608352)) - /* c3+c5+c9-c7 */
1696
MULTIPLY(tmp15, FIX(1.742345811)); /* c1+c11 */
1697
1698
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
1699
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
1700
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
1701
dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
1702
1703
ctr++;
1704
1705
if (ctr != DCTSIZE) {
1706
if (ctr == 13)
1707
break; /* Done. */
1708
dataptr += DCTSIZE; /* advance pointer to next row */
1709
} else
1710
dataptr = workspace; /* switch pointer to extended workspace */
1711
}
1712
1713
/* Pass 2: process columns.
1714
* We leave the results scaled up by an overall factor of 8.
1715
* We must also scale the output by (8/13)**2 = 64/169, which we partially
1716
* fold into the constant multipliers and final shifting:
1717
* cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
1718
*/
1719
1720
dataptr = data;
1721
wsptr = workspace;
1722
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1723
/* Even part */
1724
1725
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
1726
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
1727
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
1728
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
1729
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
1730
tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
1731
tmp6 = dataptr[DCTSIZE*6];
1732
1733
tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
1734
tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
1735
tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
1736
tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
1737
tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
1738
tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
1739
1740
dataptr[DCTSIZE*0] = (DCTELEM)
1741
DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
1742
FIX(0.757396450)), /* 128/169 */
1743
CONST_BITS+1);
1744
tmp6 += tmp6;
1745
tmp0 -= tmp6;
1746
tmp1 -= tmp6;
1747
tmp2 -= tmp6;
1748
tmp3 -= tmp6;
1749
tmp4 -= tmp6;
1750
tmp5 -= tmp6;
1751
dataptr[DCTSIZE*2] = (DCTELEM)
1752
DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) + /* c2 */
1753
MULTIPLY(tmp1, FIX(0.801745081)) + /* c6 */
1754
MULTIPLY(tmp2, FIX(0.379824504)) - /* c10 */
1755
MULTIPLY(tmp3, FIX(0.129109289)) - /* c12 */
1756
MULTIPLY(tmp4, FIX(0.608465700)) - /* c8 */
1757
MULTIPLY(tmp5, FIX(0.948429952)), /* c4 */
1758
CONST_BITS+1);
1759
z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
1760
MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
1761
MULTIPLY(tmp1 - tmp5, FIX(0.239678205)); /* (c8-c12)/2 */
1762
z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
1763
MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
1764
MULTIPLY(tmp1 + tmp5, FIX(0.368787494)); /* (c8+c12)/2 */
1765
1766
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1);
1767
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1);
1768
1769
/* Odd part */
1770
1771
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908)); /* c3 */
1772
tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751)); /* c5 */
1773
tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) + /* c7 */
1774
MULTIPLY(tmp14 + tmp15, FIX(0.256335874)); /* c11 */
1775
tmp0 = tmp1 + tmp2 + tmp3 -
1776
MULTIPLY(tmp10, FIX(1.530003162)) + /* c3+c5+c7-c1 */
1777
MULTIPLY(tmp14, FIX(0.241438564)); /* c9-c11 */
1778
tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) - /* c7 */
1779
MULTIPLY(tmp11 + tmp12, FIX(0.256335874)); /* c11 */
1780
tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
1781
tmp1 += tmp4 + tmp5 +
1782
MULTIPLY(tmp11, FIX(0.634110155)) - /* c5+c9+c11-c3 */
1783
MULTIPLY(tmp14, FIX(1.773594819)); /* c1+c7 */
1784
tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
1785
tmp2 += tmp4 + tmp6 -
1786
MULTIPLY(tmp12, FIX(1.190715098)) + /* c1+c5-c9-c11 */
1787
MULTIPLY(tmp15, FIX(1.711799069)); /* c3+c7 */
1788
tmp3 += tmp5 + tmp6 +
1789
MULTIPLY(tmp13, FIX(1.670519935)) - /* c3+c5+c9-c7 */
1790
MULTIPLY(tmp15, FIX(1.319646532)); /* c1+c11 */
1791
1792
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1);
1793
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1);
1794
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1);
1795
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1);
1796
1797
dataptr++; /* advance pointer to next column */
1798
wsptr++; /* advance pointer to next column */
1799
}
1800
}
1801
1802
1803
/*
1804
* Perform the forward DCT on a 14x14 sample block.
1805
*/
1806
1807
GLOBAL(void)
1808
jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
1809
{
1810
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1811
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
1812
DCTELEM workspace[8*6];
1813
DCTELEM *dataptr;
1814
DCTELEM *wsptr;
1815
JSAMPROW elemptr;
1816
int ctr;
1817
SHIFT_TEMPS
1818
1819
/* Pass 1: process rows.
1820
* Note results are scaled up by sqrt(8) compared to a true DCT.
1821
* cK represents sqrt(2) * cos(K*pi/28).
1822
*/
1823
1824
dataptr = data;
1825
ctr = 0;
1826
for (;;) {
1827
elemptr = sample_data[ctr] + start_col;
1828
1829
/* Even part */
1830
1831
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
1832
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
1833
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
1834
tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
1835
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
1836
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
1837
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
1838
1839
tmp10 = tmp0 + tmp6;
1840
tmp14 = tmp0 - tmp6;
1841
tmp11 = tmp1 + tmp5;
1842
tmp15 = tmp1 - tmp5;
1843
tmp12 = tmp2 + tmp4;
1844
tmp16 = tmp2 - tmp4;
1845
1846
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
1847
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
1848
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
1849
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
1850
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
1851
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
1852
tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
1853
1854
/* Apply unsigned->signed conversion. */
1855
dataptr[0] = (DCTELEM)
1856
(tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
1857
tmp13 += tmp13;
1858
dataptr[4] = (DCTELEM)
1859
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
1860
MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
1861
MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
1862
CONST_BITS);
1863
1864
tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
1865
1866
dataptr[2] = (DCTELEM)
1867
DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
1868
+ MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
1869
CONST_BITS);
1870
dataptr[6] = (DCTELEM)
1871
DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
1872
- MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
1873
CONST_BITS);
1874
1875
/* Odd part */
1876
1877
tmp10 = tmp1 + tmp2;
1878
tmp11 = tmp5 - tmp4;
1879
dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
1880
tmp3 <<= CONST_BITS;
1881
tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */
1882
tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */
1883
tmp10 += tmp11 - tmp3;
1884
tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */
1885
MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */
1886
dataptr[5] = (DCTELEM)
1887
DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
1888
+ MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */
1889
CONST_BITS);
1890
tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */
1891
MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */
1892
dataptr[3] = (DCTELEM)
1893
DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
1894
- MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */
1895
CONST_BITS);
1896
dataptr[1] = (DCTELEM)
1897
DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
1898
MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */
1899
CONST_BITS);
1900
1901
ctr++;
1902
1903
if (ctr != DCTSIZE) {
1904
if (ctr == 14)
1905
break; /* Done. */
1906
dataptr += DCTSIZE; /* advance pointer to next row */
1907
} else
1908
dataptr = workspace; /* switch pointer to extended workspace */
1909
}
1910
1911
/* Pass 2: process columns.
1912
* We leave the results scaled up by an overall factor of 8.
1913
* We must also scale the output by (8/14)**2 = 16/49, which we partially
1914
* fold into the constant multipliers and final shifting:
1915
* cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
1916
*/
1917
1918
dataptr = data;
1919
wsptr = workspace;
1920
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
1921
/* Even part */
1922
1923
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
1924
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
1925
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
1926
tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
1927
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
1928
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
1929
tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
1930
1931
tmp10 = tmp0 + tmp6;
1932
tmp14 = tmp0 - tmp6;
1933
tmp11 = tmp1 + tmp5;
1934
tmp15 = tmp1 - tmp5;
1935
tmp12 = tmp2 + tmp4;
1936
tmp16 = tmp2 - tmp4;
1937
1938
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
1939
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
1940
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
1941
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
1942
tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
1943
tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
1944
tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
1945
1946
dataptr[DCTSIZE*0] = (DCTELEM)
1947
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
1948
FIX(0.653061224)), /* 32/49 */
1949
CONST_BITS+1);
1950
tmp13 += tmp13;
1951
dataptr[DCTSIZE*4] = (DCTELEM)
1952
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
1953
MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
1954
MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */
1955
CONST_BITS+1);
1956
1957
tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */
1958
1959
dataptr[DCTSIZE*2] = (DCTELEM)
1960
DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */
1961
+ MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */
1962
CONST_BITS+1);
1963
dataptr[DCTSIZE*6] = (DCTELEM)
1964
DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */
1965
- MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */
1966
CONST_BITS+1);
1967
1968
/* Odd part */
1969
1970
tmp10 = tmp1 + tmp2;
1971
tmp11 = tmp5 - tmp4;
1972
dataptr[DCTSIZE*7] = (DCTELEM)
1973
DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
1974
FIX(0.653061224)), /* 32/49 */
1975
CONST_BITS+1);
1976
tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */
1977
tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */
1978
tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */
1979
tmp10 += tmp11 - tmp3;
1980
tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */
1981
MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */
1982
dataptr[DCTSIZE*5] = (DCTELEM)
1983
DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
1984
+ MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */
1985
CONST_BITS+1);
1986
tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */
1987
MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */
1988
dataptr[DCTSIZE*3] = (DCTELEM)
1989
DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
1990
- MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */
1991
CONST_BITS+1);
1992
dataptr[DCTSIZE*1] = (DCTELEM)
1993
DESCALE(tmp11 + tmp12 + tmp3
1994
- MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */
1995
- MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */
1996
CONST_BITS+1);
1997
1998
dataptr++; /* advance pointer to next column */
1999
wsptr++; /* advance pointer to next column */
2000
}
2001
}
2002
2003
2004
/*
2005
* Perform the forward DCT on a 15x15 sample block.
2006
*/
2007
2008
GLOBAL(void)
2009
jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2010
{
2011
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2012
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2013
INT32 z1, z2, z3;
2014
DCTELEM workspace[8*7];
2015
DCTELEM *dataptr;
2016
DCTELEM *wsptr;
2017
JSAMPROW elemptr;
2018
int ctr;
2019
SHIFT_TEMPS
2020
2021
/* Pass 1: process rows.
2022
* Note results are scaled up by sqrt(8) compared to a true DCT.
2023
* cK represents sqrt(2) * cos(K*pi/30).
2024
*/
2025
2026
dataptr = data;
2027
ctr = 0;
2028
for (;;) {
2029
elemptr = sample_data[ctr] + start_col;
2030
2031
/* Even part */
2032
2033
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
2034
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
2035
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
2036
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
2037
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
2038
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
2039
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
2040
tmp7 = GETJSAMPLE(elemptr[7]);
2041
2042
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
2043
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
2044
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
2045
tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
2046
tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
2047
tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
2048
tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
2049
2050
z1 = tmp0 + tmp4 + tmp5;
2051
z2 = tmp1 + tmp3 + tmp6;
2052
z3 = tmp2 + tmp7;
2053
/* Apply unsigned->signed conversion. */
2054
dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
2055
z3 += z3;
2056
dataptr[6] = (DCTELEM)
2057
DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
2058
MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */
2059
CONST_BITS);
2060
tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2061
z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */
2062
MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */
2063
z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */
2064
MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */
2065
z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */
2066
MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */
2067
MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */
2068
2069
dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
2070
dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
2071
2072
/* Odd part */
2073
2074
tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
2075
FIX(1.224744871)); /* c5 */
2076
tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
2077
MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876)); /* c9 */
2078
tmp12 = MULTIPLY(tmp12, FIX(1.224744871)); /* c5 */
2079
tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) + /* c1 */
2080
MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) + /* c3 */
2081
MULTIPLY(tmp13 + tmp15, FIX(0.575212477)); /* c11 */
2082
tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) - /* c7-c11 */
2083
MULTIPLY(tmp14, FIX(0.513743148)) + /* c3-c9 */
2084
MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12; /* c1+c13 */
2085
tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) - /* -(c1-c7) */
2086
MULTIPLY(tmp11, FIX(2.176250899)) - /* c3+c9 */
2087
MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12; /* c11+c13 */
2088
2089
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
2090
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
2091
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
2092
dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
2093
2094
ctr++;
2095
2096
if (ctr != DCTSIZE) {
2097
if (ctr == 15)
2098
break; /* Done. */
2099
dataptr += DCTSIZE; /* advance pointer to next row */
2100
} else
2101
dataptr = workspace; /* switch pointer to extended workspace */
2102
}
2103
2104
/* Pass 2: process columns.
2105
* We leave the results scaled up by an overall factor of 8.
2106
* We must also scale the output by (8/15)**2 = 64/225, which we partially
2107
* fold into the constant multipliers and final shifting:
2108
* cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
2109
*/
2110
2111
dataptr = data;
2112
wsptr = workspace;
2113
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2114
/* Even part */
2115
2116
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
2117
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
2118
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
2119
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
2120
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
2121
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
2122
tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
2123
tmp7 = dataptr[DCTSIZE*7];
2124
2125
tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
2126
tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
2127
tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
2128
tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
2129
tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
2130
tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
2131
tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
2132
2133
z1 = tmp0 + tmp4 + tmp5;
2134
z2 = tmp1 + tmp3 + tmp6;
2135
z3 = tmp2 + tmp7;
2136
dataptr[DCTSIZE*0] = (DCTELEM)
2137
DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
2138
CONST_BITS+2);
2139
z3 += z3;
2140
dataptr[DCTSIZE*6] = (DCTELEM)
2141
DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
2142
MULTIPLY(z2 - z3, FIX(0.497227121)), /* c12 */
2143
CONST_BITS+2);
2144
tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
2145
z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) - /* c2+c14 */
2146
MULTIPLY(tmp6 - tmp2, FIX(2.546621957)); /* c4+c8 */
2147
z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) - /* c8-c14 */
2148
MULTIPLY(tmp0 - tmp2, FIX(0.103948774)); /* c2-c4 */
2149
z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) + /* c2 */
2150
MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) + /* c8 */
2151
MULTIPLY(tmp1 - tmp4, FIX(0.899492312)); /* (c6+c12)/2 */
2152
2153
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2);
2154
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2);
2155
2156
/* Odd part */
2157
2158
tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
2159
FIX(1.393487498)); /* c5 */
2160
tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
2161
MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187)); /* c9 */
2162
tmp12 = MULTIPLY(tmp12, FIX(1.393487498)); /* c5 */
2163
tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) + /* c1 */
2164
MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) + /* c3 */
2165
MULTIPLY(tmp13 + tmp15, FIX(0.654463974)); /* c11 */
2166
tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) - /* c7-c11 */
2167
MULTIPLY(tmp14, FIX(0.584525538)) + /* c3-c9 */
2168
MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12; /* c1+c13 */
2169
tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) - /* -(c1-c7) */
2170
MULTIPLY(tmp11, FIX(2.476089912)) - /* c3+c9 */
2171
MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12; /* c11+c13 */
2172
2173
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
2174
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
2175
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
2176
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
2177
2178
dataptr++; /* advance pointer to next column */
2179
wsptr++; /* advance pointer to next column */
2180
}
2181
}
2182
2183
2184
/*
2185
* Perform the forward DCT on a 16x16 sample block.
2186
*/
2187
2188
GLOBAL(void)
2189
jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2190
{
2191
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2192
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2193
DCTELEM workspace[DCTSIZE2];
2194
DCTELEM *dataptr;
2195
DCTELEM *wsptr;
2196
JSAMPROW elemptr;
2197
int ctr;
2198
SHIFT_TEMPS
2199
2200
/* Pass 1: process rows.
2201
* Note results are scaled up by sqrt(8) compared to a true DCT;
2202
* furthermore, we scale the results by 2**PASS1_BITS.
2203
* cK represents sqrt(2) * cos(K*pi/32).
2204
*/
2205
2206
dataptr = data;
2207
ctr = 0;
2208
for (;;) {
2209
elemptr = sample_data[ctr] + start_col;
2210
2211
/* Even part */
2212
2213
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2214
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2215
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2216
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2217
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2218
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2219
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2220
tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2221
2222
tmp10 = tmp0 + tmp7;
2223
tmp14 = tmp0 - tmp7;
2224
tmp11 = tmp1 + tmp6;
2225
tmp15 = tmp1 - tmp6;
2226
tmp12 = tmp2 + tmp5;
2227
tmp16 = tmp2 - tmp5;
2228
tmp13 = tmp3 + tmp4;
2229
tmp17 = tmp3 - tmp4;
2230
2231
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2232
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2233
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2234
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2235
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2236
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2237
tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2238
tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2239
2240
/* Apply unsigned->signed conversion. */
2241
dataptr[0] = (DCTELEM)
2242
((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
2243
dataptr[4] = (DCTELEM)
2244
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2245
MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2246
CONST_BITS-PASS1_BITS);
2247
2248
tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2249
MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2250
2251
dataptr[2] = (DCTELEM)
2252
DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2253
+ MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2254
CONST_BITS-PASS1_BITS);
2255
dataptr[6] = (DCTELEM)
2256
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2257
- MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2258
CONST_BITS-PASS1_BITS);
2259
2260
/* Odd part */
2261
2262
tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
2263
MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
2264
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
2265
MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
2266
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
2267
MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
2268
tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
2269
MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
2270
tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
2271
MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
2272
tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
2273
MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
2274
tmp10 = tmp11 + tmp12 + tmp13 -
2275
MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
2276
MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2277
tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2278
- MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2279
tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2280
+ MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2281
tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2282
+ MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2283
2284
dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2285
dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2286
dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2287
dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2288
2289
ctr++;
2290
2291
if (ctr != DCTSIZE) {
2292
if (ctr == DCTSIZE * 2)
2293
break; /* Done. */
2294
dataptr += DCTSIZE; /* advance pointer to next row */
2295
} else
2296
dataptr = workspace; /* switch pointer to extended workspace */
2297
}
2298
2299
/* Pass 2: process columns.
2300
* We remove the PASS1_BITS scaling, but leave the results scaled up
2301
* by an overall factor of 8.
2302
* We must also scale the output by (8/16)**2 = 1/2**2.
2303
* cK represents sqrt(2) * cos(K*pi/32).
2304
*/
2305
2306
dataptr = data;
2307
wsptr = workspace;
2308
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2309
/* Even part */
2310
2311
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
2312
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
2313
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
2314
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
2315
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
2316
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
2317
tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
2318
tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
2319
2320
tmp10 = tmp0 + tmp7;
2321
tmp14 = tmp0 - tmp7;
2322
tmp11 = tmp1 + tmp6;
2323
tmp15 = tmp1 - tmp6;
2324
tmp12 = tmp2 + tmp5;
2325
tmp16 = tmp2 - tmp5;
2326
tmp13 = tmp3 + tmp4;
2327
tmp17 = tmp3 - tmp4;
2328
2329
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
2330
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
2331
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
2332
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
2333
tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
2334
tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
2335
tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
2336
tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
2337
2338
dataptr[DCTSIZE*0] = (DCTELEM)
2339
DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2);
2340
dataptr[DCTSIZE*4] = (DCTELEM)
2341
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2342
MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2343
CONST_BITS+PASS1_BITS+2);
2344
2345
tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2346
MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2347
2348
dataptr[DCTSIZE*2] = (DCTELEM)
2349
DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2350
+ MULTIPLY(tmp16, FIX(2.172734804)), /* c2+10 */
2351
CONST_BITS+PASS1_BITS+2);
2352
dataptr[DCTSIZE*6] = (DCTELEM)
2353
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2354
- MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2355
CONST_BITS+PASS1_BITS+2);
2356
2357
/* Odd part */
2358
2359
tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
2360
MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
2361
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
2362
MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
2363
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
2364
MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
2365
tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
2366
MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
2367
tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
2368
MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
2369
tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
2370
MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
2371
tmp10 = tmp11 + tmp12 + tmp13 -
2372
MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
2373
MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2374
tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2375
- MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2376
tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2377
+ MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2378
tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2379
+ MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2380
2381
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2);
2382
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2);
2383
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2);
2384
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2);
2385
2386
dataptr++; /* advance pointer to next column */
2387
wsptr++; /* advance pointer to next column */
2388
}
2389
}
2390
2391
2392
/*
2393
* Perform the forward DCT on a 16x8 sample block.
2394
*
2395
* 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
2396
*/
2397
2398
GLOBAL(void)
2399
jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2400
{
2401
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
2402
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
2403
INT32 z1;
2404
DCTELEM *dataptr;
2405
JSAMPROW elemptr;
2406
int ctr;
2407
SHIFT_TEMPS
2408
2409
/* Pass 1: process rows.
2410
* Note results are scaled up by sqrt(8) compared to a true DCT;
2411
* furthermore, we scale the results by 2**PASS1_BITS.
2412
* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
2413
*/
2414
2415
dataptr = data;
2416
ctr = 0;
2417
for (ctr = 0; ctr < DCTSIZE; ctr++) {
2418
elemptr = sample_data[ctr] + start_col;
2419
2420
/* Even part */
2421
2422
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
2423
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
2424
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
2425
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
2426
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
2427
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
2428
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
2429
tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
2430
2431
tmp10 = tmp0 + tmp7;
2432
tmp14 = tmp0 - tmp7;
2433
tmp11 = tmp1 + tmp6;
2434
tmp15 = tmp1 - tmp6;
2435
tmp12 = tmp2 + tmp5;
2436
tmp16 = tmp2 - tmp5;
2437
tmp13 = tmp3 + tmp4;
2438
tmp17 = tmp3 - tmp4;
2439
2440
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
2441
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
2442
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
2443
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
2444
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
2445
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
2446
tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
2447
tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
2448
2449
/* Apply unsigned->signed conversion. */
2450
dataptr[0] = (DCTELEM)
2451
((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
2452
dataptr[4] = (DCTELEM)
2453
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
2454
MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
2455
CONST_BITS-PASS1_BITS);
2456
2457
tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
2458
MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
2459
2460
dataptr[2] = (DCTELEM)
2461
DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
2462
+ MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
2463
CONST_BITS-PASS1_BITS);
2464
dataptr[6] = (DCTELEM)
2465
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
2466
- MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
2467
CONST_BITS-PASS1_BITS);
2468
2469
/* Odd part */
2470
2471
tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
2472
MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
2473
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
2474
MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
2475
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
2476
MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
2477
tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
2478
MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
2479
tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
2480
MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
2481
tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
2482
MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
2483
tmp10 = tmp11 + tmp12 + tmp13 -
2484
MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
2485
MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
2486
tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
2487
- MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
2488
tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
2489
+ MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
2490
tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
2491
+ MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
2492
2493
dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2494
dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2495
dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2496
dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2497
2498
dataptr += DCTSIZE; /* advance pointer to next row */
2499
}
2500
2501
/* Pass 2: process columns.
2502
* We remove the PASS1_BITS scaling, but leave the results scaled up
2503
* by an overall factor of 8.
2504
* We must also scale the output by 8/16 = 1/2.
2505
* 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
2506
*/
2507
2508
dataptr = data;
2509
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2510
/* Even part per LL&M figure 1 --- note that published figure is faulty;
2511
* rotator "c1" should be "c6".
2512
*/
2513
2514
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
2515
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
2516
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
2517
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
2518
2519
tmp10 = tmp0 + tmp3;
2520
tmp12 = tmp0 - tmp3;
2521
tmp11 = tmp1 + tmp2;
2522
tmp13 = tmp1 - tmp2;
2523
2524
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
2525
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
2526
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
2527
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
2528
2529
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
2530
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
2531
2532
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
2533
dataptr[DCTSIZE*2] = (DCTELEM)
2534
DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
2535
CONST_BITS+PASS1_BITS+1);
2536
dataptr[DCTSIZE*6] = (DCTELEM)
2537
DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
2538
CONST_BITS+PASS1_BITS+1);
2539
2540
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
2541
* i0..i3 in the paper are tmp0..tmp3 here.
2542
*/
2543
2544
tmp12 = tmp0 + tmp2;
2545
tmp13 = tmp1 + tmp3;
2546
2547
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
2548
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
2549
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
2550
tmp12 += z1;
2551
tmp13 += z1;
2552
2553
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
2554
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
2555
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
2556
tmp0 += z1 + tmp12;
2557
tmp3 += z1 + tmp13;
2558
2559
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
2560
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
2561
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
2562
tmp1 += z1 + tmp13;
2563
tmp2 += z1 + tmp12;
2564
2565
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
2566
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
2567
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
2568
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1);
2569
2570
dataptr++; /* advance pointer to next column */
2571
}
2572
}
2573
2574
2575
/*
2576
* Perform the forward DCT on a 14x7 sample block.
2577
*
2578
* 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
2579
*/
2580
2581
GLOBAL(void)
2582
jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2583
{
2584
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
2585
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
2586
INT32 z1, z2, z3;
2587
DCTELEM *dataptr;
2588
JSAMPROW elemptr;
2589
int ctr;
2590
SHIFT_TEMPS
2591
2592
/* Zero bottom row of output coefficient block. */
2593
MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
2594
2595
/* Pass 1: process rows.
2596
* Note results are scaled up by sqrt(8) compared to a true DCT;
2597
* furthermore, we scale the results by 2**PASS1_BITS.
2598
* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
2599
*/
2600
2601
dataptr = data;
2602
for (ctr = 0; ctr < 7; ctr++) {
2603
elemptr = sample_data[ctr] + start_col;
2604
2605
/* Even part */
2606
2607
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
2608
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
2609
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
2610
tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
2611
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
2612
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
2613
tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
2614
2615
tmp10 = tmp0 + tmp6;
2616
tmp14 = tmp0 - tmp6;
2617
tmp11 = tmp1 + tmp5;
2618
tmp15 = tmp1 - tmp5;
2619
tmp12 = tmp2 + tmp4;
2620
tmp16 = tmp2 - tmp4;
2621
2622
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
2623
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
2624
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
2625
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
2626
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
2627
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
2628
tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
2629
2630
/* Apply unsigned->signed conversion. */
2631
dataptr[0] = (DCTELEM)
2632
((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
2633
tmp13 += tmp13;
2634
dataptr[4] = (DCTELEM)
2635
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
2636
MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
2637
MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */
2638
CONST_BITS-PASS1_BITS);
2639
2640
tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */
2641
2642
dataptr[2] = (DCTELEM)
2643
DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */
2644
+ MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */
2645
CONST_BITS-PASS1_BITS);
2646
dataptr[6] = (DCTELEM)
2647
DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */
2648
- MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */
2649
CONST_BITS-PASS1_BITS);
2650
2651
/* Odd part */
2652
2653
tmp10 = tmp1 + tmp2;
2654
tmp11 = tmp5 - tmp4;
2655
dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS);
2656
tmp3 <<= CONST_BITS;
2657
tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */
2658
tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */
2659
tmp10 += tmp11 - tmp3;
2660
tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */
2661
MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */
2662
dataptr[5] = (DCTELEM)
2663
DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
2664
+ MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */
2665
CONST_BITS-PASS1_BITS);
2666
tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */
2667
MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */
2668
dataptr[3] = (DCTELEM)
2669
DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
2670
- MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */
2671
CONST_BITS-PASS1_BITS);
2672
dataptr[1] = (DCTELEM)
2673
DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
2674
MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */
2675
CONST_BITS-PASS1_BITS);
2676
2677
dataptr += DCTSIZE; /* advance pointer to next row */
2678
}
2679
2680
/* Pass 2: process columns.
2681
* We remove the PASS1_BITS scaling, but leave the results scaled up
2682
* by an overall factor of 8.
2683
* We must also scale the output by (8/14)*(8/7) = 32/49, which we
2684
* partially fold into the constant multipliers and final shifting:
2685
* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
2686
*/
2687
2688
dataptr = data;
2689
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2690
/* Even part */
2691
2692
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
2693
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
2694
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
2695
tmp3 = dataptr[DCTSIZE*3];
2696
2697
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
2698
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
2699
tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
2700
2701
z1 = tmp0 + tmp2;
2702
dataptr[DCTSIZE*0] = (DCTELEM)
2703
DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
2704
CONST_BITS+PASS1_BITS+1);
2705
tmp3 += tmp3;
2706
z1 -= tmp3;
2707
z1 -= tmp3;
2708
z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */
2709
z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */
2710
z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */
2711
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1);
2712
z1 -= z2;
2713
z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */
2714
dataptr[DCTSIZE*4] = (DCTELEM)
2715
DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
2716
CONST_BITS+PASS1_BITS+1);
2717
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1);
2718
2719
/* Odd part */
2720
2721
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */
2722
tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */
2723
tmp0 = tmp1 - tmp2;
2724
tmp1 += tmp2;
2725
tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
2726
tmp1 += tmp2;
2727
tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */
2728
tmp0 += tmp3;
2729
tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */
2730
2731
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
2732
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
2733
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
2734
2735
dataptr++; /* advance pointer to next column */
2736
}
2737
}
2738
2739
2740
/*
2741
* Perform the forward DCT on a 12x6 sample block.
2742
*
2743
* 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
2744
*/
2745
2746
GLOBAL(void)
2747
jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2748
{
2749
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
2750
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
2751
DCTELEM *dataptr;
2752
JSAMPROW elemptr;
2753
int ctr;
2754
SHIFT_TEMPS
2755
2756
/* Zero 2 bottom rows of output coefficient block. */
2757
MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
2758
2759
/* Pass 1: process rows.
2760
* Note results are scaled up by sqrt(8) compared to a true DCT;
2761
* furthermore, we scale the results by 2**PASS1_BITS.
2762
* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
2763
*/
2764
2765
dataptr = data;
2766
for (ctr = 0; ctr < 6; ctr++) {
2767
elemptr = sample_data[ctr] + start_col;
2768
2769
/* Even part */
2770
2771
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
2772
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
2773
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
2774
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
2775
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
2776
tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
2777
2778
tmp10 = tmp0 + tmp5;
2779
tmp13 = tmp0 - tmp5;
2780
tmp11 = tmp1 + tmp4;
2781
tmp14 = tmp1 - tmp4;
2782
tmp12 = tmp2 + tmp3;
2783
tmp15 = tmp2 - tmp3;
2784
2785
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
2786
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
2787
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
2788
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
2789
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
2790
tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
2791
2792
/* Apply unsigned->signed conversion. */
2793
dataptr[0] = (DCTELEM)
2794
((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
2795
dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
2796
dataptr[4] = (DCTELEM)
2797
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
2798
CONST_BITS-PASS1_BITS);
2799
dataptr[2] = (DCTELEM)
2800
DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
2801
CONST_BITS-PASS1_BITS);
2802
2803
/* Odd part */
2804
2805
tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */
2806
tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */
2807
tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */
2808
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */
2809
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */
2810
tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
2811
+ MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */
2812
tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
2813
tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
2814
+ MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */
2815
tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
2816
- MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */
2817
tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
2818
- MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */
2819
2820
dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
2821
dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
2822
dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
2823
dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
2824
2825
dataptr += DCTSIZE; /* advance pointer to next row */
2826
}
2827
2828
/* Pass 2: process columns.
2829
* We remove the PASS1_BITS scaling, but leave the results scaled up
2830
* by an overall factor of 8.
2831
* We must also scale the output by (8/12)*(8/6) = 8/9, which we
2832
* partially fold into the constant multipliers and final shifting:
2833
* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
2834
*/
2835
2836
dataptr = data;
2837
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2838
/* Even part */
2839
2840
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
2841
tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
2842
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
2843
2844
tmp10 = tmp0 + tmp2;
2845
tmp12 = tmp0 - tmp2;
2846
2847
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
2848
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
2849
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
2850
2851
dataptr[DCTSIZE*0] = (DCTELEM)
2852
DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
2853
CONST_BITS+PASS1_BITS+1);
2854
dataptr[DCTSIZE*2] = (DCTELEM)
2855
DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
2856
CONST_BITS+PASS1_BITS+1);
2857
dataptr[DCTSIZE*4] = (DCTELEM)
2858
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
2859
CONST_BITS+PASS1_BITS+1);
2860
2861
/* Odd part */
2862
2863
tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
2864
2865
dataptr[DCTSIZE*1] = (DCTELEM)
2866
DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
2867
CONST_BITS+PASS1_BITS+1);
2868
dataptr[DCTSIZE*3] = (DCTELEM)
2869
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
2870
CONST_BITS+PASS1_BITS+1);
2871
dataptr[DCTSIZE*5] = (DCTELEM)
2872
DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
2873
CONST_BITS+PASS1_BITS+1);
2874
2875
dataptr++; /* advance pointer to next column */
2876
}
2877
}
2878
2879
2880
/*
2881
* Perform the forward DCT on a 10x5 sample block.
2882
*
2883
* 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
2884
*/
2885
2886
GLOBAL(void)
2887
jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
2888
{
2889
INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
2890
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
2891
DCTELEM *dataptr;
2892
JSAMPROW elemptr;
2893
int ctr;
2894
SHIFT_TEMPS
2895
2896
/* Zero 3 bottom rows of output coefficient block. */
2897
MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
2898
2899
/* Pass 1: process rows.
2900
* Note results are scaled up by sqrt(8) compared to a true DCT;
2901
* furthermore, we scale the results by 2**PASS1_BITS.
2902
* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
2903
*/
2904
2905
dataptr = data;
2906
for (ctr = 0; ctr < 5; ctr++) {
2907
elemptr = sample_data[ctr] + start_col;
2908
2909
/* Even part */
2910
2911
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
2912
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
2913
tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
2914
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
2915
tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
2916
2917
tmp10 = tmp0 + tmp4;
2918
tmp13 = tmp0 - tmp4;
2919
tmp11 = tmp1 + tmp3;
2920
tmp14 = tmp1 - tmp3;
2921
2922
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
2923
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
2924
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
2925
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
2926
tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
2927
2928
/* Apply unsigned->signed conversion. */
2929
dataptr[0] = (DCTELEM)
2930
((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
2931
tmp12 += tmp12;
2932
dataptr[4] = (DCTELEM)
2933
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
2934
MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */
2935
CONST_BITS-PASS1_BITS);
2936
tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */
2937
dataptr[2] = (DCTELEM)
2938
DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */
2939
CONST_BITS-PASS1_BITS);
2940
dataptr[6] = (DCTELEM)
2941
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */
2942
CONST_BITS-PASS1_BITS);
2943
2944
/* Odd part */
2945
2946
tmp10 = tmp0 + tmp4;
2947
tmp11 = tmp1 - tmp3;
2948
dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
2949
tmp2 <<= CONST_BITS;
2950
dataptr[1] = (DCTELEM)
2951
DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */
2952
MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */
2953
MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */
2954
MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */
2955
CONST_BITS-PASS1_BITS);
2956
tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */
2957
MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */
2958
tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */
2959
(tmp11 << (CONST_BITS - 1)) - tmp2;
2960
dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
2961
dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
2962
2963
dataptr += DCTSIZE; /* advance pointer to next row */
2964
}
2965
2966
/* Pass 2: process columns.
2967
* We remove the PASS1_BITS scaling, but leave the results scaled up
2968
* by an overall factor of 8.
2969
* We must also scale the output by (8/10)*(8/5) = 32/25, which we
2970
* fold into the constant multipliers:
2971
* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
2972
*/
2973
2974
dataptr = data;
2975
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
2976
/* Even part */
2977
2978
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
2979
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
2980
tmp2 = dataptr[DCTSIZE*2];
2981
2982
tmp10 = tmp0 + tmp1;
2983
tmp11 = tmp0 - tmp1;
2984
2985
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
2986
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
2987
2988
dataptr[DCTSIZE*0] = (DCTELEM)
2989
DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */
2990
CONST_BITS+PASS1_BITS);
2991
tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */
2992
tmp10 -= tmp2 << 2;
2993
tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */
2994
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
2995
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
2996
2997
/* Odd part */
2998
2999
tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */
3000
3001
dataptr[DCTSIZE*1] = (DCTELEM)
3002
DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
3003
CONST_BITS+PASS1_BITS);
3004
dataptr[DCTSIZE*3] = (DCTELEM)
3005
DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
3006
CONST_BITS+PASS1_BITS);
3007
3008
dataptr++; /* advance pointer to next column */
3009
}
3010
}
3011
3012
3013
/*
3014
* Perform the forward DCT on an 8x4 sample block.
3015
*
3016
* 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
3017
*/
3018
3019
GLOBAL(void)
3020
jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3021
{
3022
INT32 tmp0, tmp1, tmp2, tmp3;
3023
INT32 tmp10, tmp11, tmp12, tmp13;
3024
INT32 z1;
3025
DCTELEM *dataptr;
3026
JSAMPROW elemptr;
3027
int ctr;
3028
SHIFT_TEMPS
3029
3030
/* Zero 4 bottom rows of output coefficient block. */
3031
MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
3032
3033
/* Pass 1: process rows.
3034
* Note results are scaled up by sqrt(8) compared to a true DCT;
3035
* furthermore, we scale the results by 2**PASS1_BITS.
3036
* We must also scale the output by 8/4 = 2, which we add here.
3037
* 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3038
*/
3039
3040
dataptr = data;
3041
for (ctr = 0; ctr < 4; ctr++) {
3042
elemptr = sample_data[ctr] + start_col;
3043
3044
/* Even part per LL&M figure 1 --- note that published figure is faulty;
3045
* rotator "c1" should be "c6".
3046
*/
3047
3048
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3049
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3050
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3051
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3052
3053
tmp10 = tmp0 + tmp3;
3054
tmp12 = tmp0 - tmp3;
3055
tmp11 = tmp1 + tmp2;
3056
tmp13 = tmp1 - tmp2;
3057
3058
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3059
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3060
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3061
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3062
3063
/* Apply unsigned->signed conversion. */
3064
dataptr[0] = (DCTELEM)
3065
((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
3066
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
3067
3068
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
3069
/* Add fudge factor here for final descale. */
3070
z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3071
3072
dataptr[2] = (DCTELEM)
3073
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
3074
CONST_BITS-PASS1_BITS-1);
3075
dataptr[6] = (DCTELEM)
3076
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
3077
CONST_BITS-PASS1_BITS-1);
3078
3079
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3080
* i0..i3 in the paper are tmp0..tmp3 here.
3081
*/
3082
3083
tmp12 = tmp0 + tmp2;
3084
tmp13 = tmp1 + tmp3;
3085
3086
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3087
/* Add fudge factor here for final descale. */
3088
z1 += ONE << (CONST_BITS-PASS1_BITS-2);
3089
3090
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
3091
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3092
tmp12 += z1;
3093
tmp13 += z1;
3094
3095
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3096
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3097
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3098
tmp0 += z1 + tmp12;
3099
tmp3 += z1 + tmp13;
3100
3101
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3102
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3103
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3104
tmp1 += z1 + tmp13;
3105
tmp2 += z1 + tmp12;
3106
3107
dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
3108
dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
3109
dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
3110
dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
3111
3112
dataptr += DCTSIZE; /* advance pointer to next row */
3113
}
3114
3115
/* Pass 2: process columns.
3116
* We remove the PASS1_BITS scaling, but leave the results scaled up
3117
* by an overall factor of 8.
3118
* 4-point FDCT kernel,
3119
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
3120
*/
3121
3122
dataptr = data;
3123
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3124
/* Even part */
3125
3126
/* Add fudge factor here for final descale. */
3127
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
3128
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
3129
3130
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
3131
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
3132
3133
dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
3134
dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
3135
3136
/* Odd part */
3137
3138
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
3139
/* Add fudge factor here for final descale. */
3140
tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
3141
3142
dataptr[DCTSIZE*1] = (DCTELEM)
3143
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
3144
CONST_BITS+PASS1_BITS);
3145
dataptr[DCTSIZE*3] = (DCTELEM)
3146
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
3147
CONST_BITS+PASS1_BITS);
3148
3149
dataptr++; /* advance pointer to next column */
3150
}
3151
}
3152
3153
3154
/*
3155
* Perform the forward DCT on a 6x3 sample block.
3156
*
3157
* 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
3158
*/
3159
3160
GLOBAL(void)
3161
jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3162
{
3163
INT32 tmp0, tmp1, tmp2;
3164
INT32 tmp10, tmp11, tmp12;
3165
DCTELEM *dataptr;
3166
JSAMPROW elemptr;
3167
int ctr;
3168
SHIFT_TEMPS
3169
3170
/* Pre-zero output coefficient block. */
3171
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3172
3173
/* Pass 1: process rows.
3174
* Note results are scaled up by sqrt(8) compared to a true DCT;
3175
* furthermore, we scale the results by 2**PASS1_BITS.
3176
* We scale the results further by 2 as part of output adaption
3177
* scaling for different DCT size.
3178
* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3179
*/
3180
3181
dataptr = data;
3182
for (ctr = 0; ctr < 3; ctr++) {
3183
elemptr = sample_data[ctr] + start_col;
3184
3185
/* Even part */
3186
3187
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3188
tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3189
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3190
3191
tmp10 = tmp0 + tmp2;
3192
tmp12 = tmp0 - tmp2;
3193
3194
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3195
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3196
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3197
3198
/* Apply unsigned->signed conversion. */
3199
dataptr[0] = (DCTELEM)
3200
((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
3201
dataptr[2] = (DCTELEM)
3202
DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3203
CONST_BITS-PASS1_BITS-1);
3204
dataptr[4] = (DCTELEM)
3205
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3206
CONST_BITS-PASS1_BITS-1);
3207
3208
/* Odd part */
3209
3210
tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3211
CONST_BITS-PASS1_BITS-1);
3212
3213
dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
3214
dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
3215
dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
3216
3217
dataptr += DCTSIZE; /* advance pointer to next row */
3218
}
3219
3220
/* Pass 2: process columns.
3221
* We remove the PASS1_BITS scaling, but leave the results scaled up
3222
* by an overall factor of 8.
3223
* We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
3224
* fold into the constant multipliers (other part was done in pass 1):
3225
* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
3226
*/
3227
3228
dataptr = data;
3229
for (ctr = 0; ctr < 6; ctr++) {
3230
/* Even part */
3231
3232
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
3233
tmp1 = dataptr[DCTSIZE*1];
3234
3235
tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
3236
3237
dataptr[DCTSIZE*0] = (DCTELEM)
3238
DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
3239
CONST_BITS+PASS1_BITS);
3240
dataptr[DCTSIZE*2] = (DCTELEM)
3241
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
3242
CONST_BITS+PASS1_BITS);
3243
3244
/* Odd part */
3245
3246
dataptr[DCTSIZE*1] = (DCTELEM)
3247
DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */
3248
CONST_BITS+PASS1_BITS);
3249
3250
dataptr++; /* advance pointer to next column */
3251
}
3252
}
3253
3254
3255
/*
3256
* Perform the forward DCT on a 4x2 sample block.
3257
*
3258
* 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
3259
*/
3260
3261
GLOBAL(void)
3262
jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3263
{
3264
DCTELEM tmp0, tmp2, tmp10, tmp12, tmp4, tmp5;
3265
INT32 tmp1, tmp3, tmp11, tmp13;
3266
INT32 z1, z2, z3;
3267
JSAMPROW elemptr;
3268
SHIFT_TEMPS
3269
3270
/* Pre-zero output coefficient block. */
3271
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3272
3273
/* Pass 1: process rows.
3274
* Note results are scaled up by sqrt(8) compared to a true DCT.
3275
* 4-point FDCT kernel,
3276
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
3277
*/
3278
3279
/* Row 0 */
3280
elemptr = sample_data[0] + start_col;
3281
3282
/* Even part */
3283
3284
tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
3285
tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
3286
3287
tmp0 = tmp4 + tmp5;
3288
tmp2 = tmp4 - tmp5;
3289
3290
/* Odd part */
3291
3292
z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
3293
z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
3294
3295
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3296
/* Add fudge factor here for final descale. */
3297
z1 += ONE << (CONST_BITS-3-1);
3298
tmp1 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3299
tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3300
3301
/* Row 1 */
3302
elemptr = sample_data[1] + start_col;
3303
3304
/* Even part */
3305
3306
tmp4 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
3307
tmp5 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
3308
3309
tmp10 = tmp4 + tmp5;
3310
tmp12 = tmp4 - tmp5;
3311
3312
/* Odd part */
3313
3314
z2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
3315
z3 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
3316
3317
z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */
3318
tmp11 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
3319
tmp13 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
3320
3321
/* Pass 2: process columns.
3322
* We leave the results scaled up by an overall factor of 8.
3323
* We must also scale the output by (8/4)*(8/2) = 2**3.
3324
*/
3325
3326
/* Column 0 */
3327
/* Apply unsigned->signed conversion. */
3328
data[DCTSIZE*0] = (tmp0 + tmp10 - 8 * CENTERJSAMPLE) << 3;
3329
data[DCTSIZE*1] = (tmp0 - tmp10) << 3;
3330
3331
/* Column 1 */
3332
data[DCTSIZE*0+1] = (DCTELEM) RIGHT_SHIFT(tmp1 + tmp11, CONST_BITS-3);
3333
data[DCTSIZE*1+1] = (DCTELEM) RIGHT_SHIFT(tmp1 - tmp11, CONST_BITS-3);
3334
3335
/* Column 2 */
3336
data[DCTSIZE*0+2] = (tmp2 + tmp12) << 3;
3337
data[DCTSIZE*1+2] = (tmp2 - tmp12) << 3;
3338
3339
/* Column 3 */
3340
data[DCTSIZE*0+3] = (DCTELEM) RIGHT_SHIFT(tmp3 + tmp13, CONST_BITS-3);
3341
data[DCTSIZE*1+3] = (DCTELEM) RIGHT_SHIFT(tmp3 - tmp13, CONST_BITS-3);
3342
}
3343
3344
3345
/*
3346
* Perform the forward DCT on a 2x1 sample block.
3347
*
3348
* 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
3349
*/
3350
3351
GLOBAL(void)
3352
jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3353
{
3354
DCTELEM tmp0, tmp1;
3355
JSAMPROW elemptr;
3356
3357
/* Pre-zero output coefficient block. */
3358
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3359
3360
elemptr = sample_data[0] + start_col;
3361
3362
tmp0 = GETJSAMPLE(elemptr[0]);
3363
tmp1 = GETJSAMPLE(elemptr[1]);
3364
3365
/* We leave the results scaled up by an overall factor of 8.
3366
* We must also scale the output by (8/2)*(8/1) = 2**5.
3367
*/
3368
3369
/* Even part */
3370
3371
/* Apply unsigned->signed conversion. */
3372
data[0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
3373
3374
/* Odd part */
3375
3376
data[1] = (tmp0 - tmp1) << 5;
3377
}
3378
3379
3380
/*
3381
* Perform the forward DCT on an 8x16 sample block.
3382
*
3383
* 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
3384
*/
3385
3386
GLOBAL(void)
3387
jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3388
{
3389
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
3390
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
3391
INT32 z1;
3392
DCTELEM workspace[DCTSIZE2];
3393
DCTELEM *dataptr;
3394
DCTELEM *wsptr;
3395
JSAMPROW elemptr;
3396
int ctr;
3397
SHIFT_TEMPS
3398
3399
/* Pass 1: process rows.
3400
* Note results are scaled up by sqrt(8) compared to a true DCT;
3401
* furthermore, we scale the results by 2**PASS1_BITS.
3402
* 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
3403
*/
3404
3405
dataptr = data;
3406
ctr = 0;
3407
for (;;) {
3408
elemptr = sample_data[ctr] + start_col;
3409
3410
/* Even part per LL&M figure 1 --- note that published figure is faulty;
3411
* rotator "c1" should be "c6".
3412
*/
3413
3414
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
3415
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
3416
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
3417
tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
3418
3419
tmp10 = tmp0 + tmp3;
3420
tmp12 = tmp0 - tmp3;
3421
tmp11 = tmp1 + tmp2;
3422
tmp13 = tmp1 - tmp2;
3423
3424
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
3425
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
3426
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
3427
tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
3428
3429
/* Apply unsigned->signed conversion. */
3430
dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
3431
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
3432
3433
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
3434
dataptr[2] = (DCTELEM)
3435
DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
3436
CONST_BITS-PASS1_BITS);
3437
dataptr[6] = (DCTELEM)
3438
DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
3439
CONST_BITS-PASS1_BITS);
3440
3441
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
3442
* i0..i3 in the paper are tmp0..tmp3 here.
3443
*/
3444
3445
tmp12 = tmp0 + tmp2;
3446
tmp13 = tmp1 + tmp3;
3447
3448
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
3449
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
3450
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
3451
tmp12 += z1;
3452
tmp13 += z1;
3453
3454
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
3455
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
3456
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
3457
tmp0 += z1 + tmp12;
3458
tmp3 += z1 + tmp13;
3459
3460
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
3461
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
3462
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
3463
tmp1 += z1 + tmp13;
3464
tmp2 += z1 + tmp12;
3465
3466
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
3467
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
3468
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
3469
dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS);
3470
3471
ctr++;
3472
3473
if (ctr != DCTSIZE) {
3474
if (ctr == DCTSIZE * 2)
3475
break; /* Done. */
3476
dataptr += DCTSIZE; /* advance pointer to next row */
3477
} else
3478
dataptr = workspace; /* switch pointer to extended workspace */
3479
}
3480
3481
/* Pass 2: process columns.
3482
* We remove the PASS1_BITS scaling, but leave the results scaled up
3483
* by an overall factor of 8.
3484
* We must also scale the output by 8/16 = 1/2.
3485
* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
3486
*/
3487
3488
dataptr = data;
3489
wsptr = workspace;
3490
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
3491
/* Even part */
3492
3493
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
3494
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
3495
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
3496
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
3497
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
3498
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
3499
tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
3500
tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
3501
3502
tmp10 = tmp0 + tmp7;
3503
tmp14 = tmp0 - tmp7;
3504
tmp11 = tmp1 + tmp6;
3505
tmp15 = tmp1 - tmp6;
3506
tmp12 = tmp2 + tmp5;
3507
tmp16 = tmp2 - tmp5;
3508
tmp13 = tmp3 + tmp4;
3509
tmp17 = tmp3 - tmp4;
3510
3511
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
3512
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
3513
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
3514
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
3515
tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
3516
tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
3517
tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
3518
tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
3519
3520
dataptr[DCTSIZE*0] = (DCTELEM)
3521
DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1);
3522
dataptr[DCTSIZE*4] = (DCTELEM)
3523
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
3524
MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */
3525
CONST_BITS+PASS1_BITS+1);
3526
3527
tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */
3528
MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */
3529
3530
dataptr[DCTSIZE*2] = (DCTELEM)
3531
DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */
3532
+ MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */
3533
CONST_BITS+PASS1_BITS+1);
3534
dataptr[DCTSIZE*6] = (DCTELEM)
3535
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */
3536
- MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */
3537
CONST_BITS+PASS1_BITS+1);
3538
3539
/* Odd part */
3540
3541
tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */
3542
MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */
3543
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */
3544
MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */
3545
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */
3546
MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */
3547
tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */
3548
MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */
3549
tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */
3550
MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */
3551
tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */
3552
MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */
3553
tmp10 = tmp11 + tmp12 + tmp13 -
3554
MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */
3555
MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */
3556
tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
3557
- MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */
3558
tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
3559
+ MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */
3560
tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
3561
+ MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */
3562
3563
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1);
3564
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1);
3565
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1);
3566
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1);
3567
3568
dataptr++; /* advance pointer to next column */
3569
wsptr++; /* advance pointer to next column */
3570
}
3571
}
3572
3573
3574
/*
3575
* Perform the forward DCT on a 7x14 sample block.
3576
*
3577
* 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
3578
*/
3579
3580
GLOBAL(void)
3581
jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3582
{
3583
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
3584
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
3585
INT32 z1, z2, z3;
3586
DCTELEM workspace[8*6];
3587
DCTELEM *dataptr;
3588
DCTELEM *wsptr;
3589
JSAMPROW elemptr;
3590
int ctr;
3591
SHIFT_TEMPS
3592
3593
/* Pre-zero output coefficient block. */
3594
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3595
3596
/* Pass 1: process rows.
3597
* Note results are scaled up by sqrt(8) compared to a true DCT;
3598
* furthermore, we scale the results by 2**PASS1_BITS.
3599
* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
3600
*/
3601
3602
dataptr = data;
3603
ctr = 0;
3604
for (;;) {
3605
elemptr = sample_data[ctr] + start_col;
3606
3607
/* Even part */
3608
3609
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
3610
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
3611
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
3612
tmp3 = GETJSAMPLE(elemptr[3]);
3613
3614
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
3615
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
3616
tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
3617
3618
z1 = tmp0 + tmp2;
3619
/* Apply unsigned->signed conversion. */
3620
dataptr[0] = (DCTELEM)
3621
((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
3622
tmp3 += tmp3;
3623
z1 -= tmp3;
3624
z1 -= tmp3;
3625
z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */
3626
z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */
3627
z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */
3628
dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
3629
z1 -= z2;
3630
z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */
3631
dataptr[4] = (DCTELEM)
3632
DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
3633
CONST_BITS-PASS1_BITS);
3634
dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
3635
3636
/* Odd part */
3637
3638
tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */
3639
tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */
3640
tmp0 = tmp1 - tmp2;
3641
tmp1 += tmp2;
3642
tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
3643
tmp1 += tmp2;
3644
tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */
3645
tmp0 += tmp3;
3646
tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */
3647
3648
dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
3649
dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
3650
dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
3651
3652
ctr++;
3653
3654
if (ctr != DCTSIZE) {
3655
if (ctr == 14)
3656
break; /* Done. */
3657
dataptr += DCTSIZE; /* advance pointer to next row */
3658
} else
3659
dataptr = workspace; /* switch pointer to extended workspace */
3660
}
3661
3662
/* Pass 2: process columns.
3663
* We remove the PASS1_BITS scaling, but leave the results scaled up
3664
* by an overall factor of 8.
3665
* We must also scale the output by (8/7)*(8/14) = 32/49, which we
3666
* fold into the constant multipliers:
3667
* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
3668
*/
3669
3670
dataptr = data;
3671
wsptr = workspace;
3672
for (ctr = 0; ctr < 7; ctr++) {
3673
/* Even part */
3674
3675
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
3676
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
3677
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
3678
tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
3679
tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
3680
tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
3681
tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
3682
3683
tmp10 = tmp0 + tmp6;
3684
tmp14 = tmp0 - tmp6;
3685
tmp11 = tmp1 + tmp5;
3686
tmp15 = tmp1 - tmp5;
3687
tmp12 = tmp2 + tmp4;
3688
tmp16 = tmp2 - tmp4;
3689
3690
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
3691
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
3692
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
3693
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
3694
tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
3695
tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
3696
tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
3697
3698
dataptr[DCTSIZE*0] = (DCTELEM)
3699
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
3700
FIX(0.653061224)), /* 32/49 */
3701
CONST_BITS+PASS1_BITS);
3702
tmp13 += tmp13;
3703
dataptr[DCTSIZE*4] = (DCTELEM)
3704
DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
3705
MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
3706
MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */
3707
CONST_BITS+PASS1_BITS);
3708
3709
tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */
3710
3711
dataptr[DCTSIZE*2] = (DCTELEM)
3712
DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */
3713
+ MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */
3714
CONST_BITS+PASS1_BITS);
3715
dataptr[DCTSIZE*6] = (DCTELEM)
3716
DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */
3717
- MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */
3718
CONST_BITS+PASS1_BITS);
3719
3720
/* Odd part */
3721
3722
tmp10 = tmp1 + tmp2;
3723
tmp11 = tmp5 - tmp4;
3724
dataptr[DCTSIZE*7] = (DCTELEM)
3725
DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
3726
FIX(0.653061224)), /* 32/49 */
3727
CONST_BITS+PASS1_BITS);
3728
tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */
3729
tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */
3730
tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */
3731
tmp10 += tmp11 - tmp3;
3732
tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */
3733
MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */
3734
dataptr[DCTSIZE*5] = (DCTELEM)
3735
DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
3736
+ MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */
3737
CONST_BITS+PASS1_BITS);
3738
tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */
3739
MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */
3740
dataptr[DCTSIZE*3] = (DCTELEM)
3741
DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
3742
- MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */
3743
CONST_BITS+PASS1_BITS);
3744
dataptr[DCTSIZE*1] = (DCTELEM)
3745
DESCALE(tmp11 + tmp12 + tmp3
3746
- MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */
3747
- MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */
3748
CONST_BITS+PASS1_BITS);
3749
3750
dataptr++; /* advance pointer to next column */
3751
wsptr++; /* advance pointer to next column */
3752
}
3753
}
3754
3755
3756
/*
3757
* Perform the forward DCT on a 6x12 sample block.
3758
*
3759
* 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
3760
*/
3761
3762
GLOBAL(void)
3763
jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3764
{
3765
INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
3766
INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
3767
DCTELEM workspace[8*4];
3768
DCTELEM *dataptr;
3769
DCTELEM *wsptr;
3770
JSAMPROW elemptr;
3771
int ctr;
3772
SHIFT_TEMPS
3773
3774
/* Pre-zero output coefficient block. */
3775
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3776
3777
/* Pass 1: process rows.
3778
* Note results are scaled up by sqrt(8) compared to a true DCT;
3779
* furthermore, we scale the results by 2**PASS1_BITS.
3780
* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
3781
*/
3782
3783
dataptr = data;
3784
ctr = 0;
3785
for (;;) {
3786
elemptr = sample_data[ctr] + start_col;
3787
3788
/* Even part */
3789
3790
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
3791
tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
3792
tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
3793
3794
tmp10 = tmp0 + tmp2;
3795
tmp12 = tmp0 - tmp2;
3796
3797
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
3798
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
3799
tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
3800
3801
/* Apply unsigned->signed conversion. */
3802
dataptr[0] = (DCTELEM)
3803
((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
3804
dataptr[2] = (DCTELEM)
3805
DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */
3806
CONST_BITS-PASS1_BITS);
3807
dataptr[4] = (DCTELEM)
3808
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
3809
CONST_BITS-PASS1_BITS);
3810
3811
/* Odd part */
3812
3813
tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */
3814
CONST_BITS-PASS1_BITS);
3815
3816
dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
3817
dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
3818
dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
3819
3820
ctr++;
3821
3822
if (ctr != DCTSIZE) {
3823
if (ctr == 12)
3824
break; /* Done. */
3825
dataptr += DCTSIZE; /* advance pointer to next row */
3826
} else
3827
dataptr = workspace; /* switch pointer to extended workspace */
3828
}
3829
3830
/* Pass 2: process columns.
3831
* We remove the PASS1_BITS scaling, but leave the results scaled up
3832
* by an overall factor of 8.
3833
* We must also scale the output by (8/6)*(8/12) = 8/9, which we
3834
* fold into the constant multipliers:
3835
* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
3836
*/
3837
3838
dataptr = data;
3839
wsptr = workspace;
3840
for (ctr = 0; ctr < 6; ctr++) {
3841
/* Even part */
3842
3843
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
3844
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
3845
tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
3846
tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
3847
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
3848
tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
3849
3850
tmp10 = tmp0 + tmp5;
3851
tmp13 = tmp0 - tmp5;
3852
tmp11 = tmp1 + tmp4;
3853
tmp14 = tmp1 - tmp4;
3854
tmp12 = tmp2 + tmp3;
3855
tmp15 = tmp2 - tmp3;
3856
3857
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
3858
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
3859
tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
3860
tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
3861
tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
3862
tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
3863
3864
dataptr[DCTSIZE*0] = (DCTELEM)
3865
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
3866
CONST_BITS+PASS1_BITS);
3867
dataptr[DCTSIZE*6] = (DCTELEM)
3868
DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
3869
CONST_BITS+PASS1_BITS);
3870
dataptr[DCTSIZE*4] = (DCTELEM)
3871
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */
3872
CONST_BITS+PASS1_BITS);
3873
dataptr[DCTSIZE*2] = (DCTELEM)
3874
DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */
3875
MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */
3876
CONST_BITS+PASS1_BITS);
3877
3878
/* Odd part */
3879
3880
tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */
3881
tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */
3882
tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */
3883
tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */
3884
tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */
3885
tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
3886
+ MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */
3887
tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
3888
tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
3889
+ MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */
3890
tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
3891
- MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */
3892
tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
3893
- MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
3894
3895
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS);
3896
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS);
3897
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS);
3898
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS);
3899
3900
dataptr++; /* advance pointer to next column */
3901
wsptr++; /* advance pointer to next column */
3902
}
3903
}
3904
3905
3906
/*
3907
* Perform the forward DCT on a 5x10 sample block.
3908
*
3909
* 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
3910
*/
3911
3912
GLOBAL(void)
3913
jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
3914
{
3915
INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
3916
INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
3917
DCTELEM workspace[8*2];
3918
DCTELEM *dataptr;
3919
DCTELEM *wsptr;
3920
JSAMPROW elemptr;
3921
int ctr;
3922
SHIFT_TEMPS
3923
3924
/* Pre-zero output coefficient block. */
3925
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
3926
3927
/* Pass 1: process rows.
3928
* Note results are scaled up by sqrt(8) compared to a true DCT;
3929
* furthermore, we scale the results by 2**PASS1_BITS.
3930
* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
3931
*/
3932
3933
dataptr = data;
3934
ctr = 0;
3935
for (;;) {
3936
elemptr = sample_data[ctr] + start_col;
3937
3938
/* Even part */
3939
3940
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
3941
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
3942
tmp2 = GETJSAMPLE(elemptr[2]);
3943
3944
tmp10 = tmp0 + tmp1;
3945
tmp11 = tmp0 - tmp1;
3946
3947
tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
3948
tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
3949
3950
/* Apply unsigned->signed conversion. */
3951
dataptr[0] = (DCTELEM)
3952
((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
3953
tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */
3954
tmp10 -= tmp2 << 2;
3955
tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */
3956
dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
3957
dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
3958
3959
/* Odd part */
3960
3961
tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */
3962
3963
dataptr[1] = (DCTELEM)
3964
DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
3965
CONST_BITS-PASS1_BITS);
3966
dataptr[3] = (DCTELEM)
3967
DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
3968
CONST_BITS-PASS1_BITS);
3969
3970
ctr++;
3971
3972
if (ctr != DCTSIZE) {
3973
if (ctr == 10)
3974
break; /* Done. */
3975
dataptr += DCTSIZE; /* advance pointer to next row */
3976
} else
3977
dataptr = workspace; /* switch pointer to extended workspace */
3978
}
3979
3980
/* Pass 2: process columns.
3981
* We remove the PASS1_BITS scaling, but leave the results scaled up
3982
* by an overall factor of 8.
3983
* We must also scale the output by (8/5)*(8/10) = 32/25, which we
3984
* fold into the constant multipliers:
3985
* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
3986
*/
3987
3988
dataptr = data;
3989
wsptr = workspace;
3990
for (ctr = 0; ctr < 5; ctr++) {
3991
/* Even part */
3992
3993
tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
3994
tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
3995
tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
3996
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
3997
tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
3998
3999
tmp10 = tmp0 + tmp4;
4000
tmp13 = tmp0 - tmp4;
4001
tmp11 = tmp1 + tmp3;
4002
tmp14 = tmp1 - tmp3;
4003
4004
tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
4005
tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
4006
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
4007
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
4008
tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
4009
4010
dataptr[DCTSIZE*0] = (DCTELEM)
4011
DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
4012
CONST_BITS+PASS1_BITS);
4013
tmp12 += tmp12;
4014
dataptr[DCTSIZE*4] = (DCTELEM)
4015
DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
4016
MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */
4017
CONST_BITS+PASS1_BITS);
4018
tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */
4019
dataptr[DCTSIZE*2] = (DCTELEM)
4020
DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */
4021
CONST_BITS+PASS1_BITS);
4022
dataptr[DCTSIZE*6] = (DCTELEM)
4023
DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */
4024
CONST_BITS+PASS1_BITS);
4025
4026
/* Odd part */
4027
4028
tmp10 = tmp0 + tmp4;
4029
tmp11 = tmp1 - tmp3;
4030
dataptr[DCTSIZE*5] = (DCTELEM)
4031
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */
4032
CONST_BITS+PASS1_BITS);
4033
tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */
4034
dataptr[DCTSIZE*1] = (DCTELEM)
4035
DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */
4036
MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */
4037
MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */
4038
MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */
4039
CONST_BITS+PASS1_BITS);
4040
tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */
4041
MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */
4042
tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */
4043
MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */
4044
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS);
4045
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS);
4046
4047
dataptr++; /* advance pointer to next column */
4048
wsptr++; /* advance pointer to next column */
4049
}
4050
}
4051
4052
4053
/*
4054
* Perform the forward DCT on a 4x8 sample block.
4055
*
4056
* 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
4057
*/
4058
4059
GLOBAL(void)
4060
jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4061
{
4062
INT32 tmp0, tmp1, tmp2, tmp3;
4063
INT32 tmp10, tmp11, tmp12, tmp13;
4064
INT32 z1;
4065
DCTELEM *dataptr;
4066
JSAMPROW elemptr;
4067
int ctr;
4068
SHIFT_TEMPS
4069
4070
/* Pre-zero output coefficient block. */
4071
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4072
4073
/* Pass 1: process rows.
4074
* Note results are scaled up by sqrt(8) compared to a true DCT;
4075
* furthermore, we scale the results by 2**PASS1_BITS.
4076
* We must also scale the output by 8/4 = 2, which we add here.
4077
* 4-point FDCT kernel,
4078
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4079
*/
4080
4081
dataptr = data;
4082
for (ctr = 0; ctr < DCTSIZE; ctr++) {
4083
elemptr = sample_data[ctr] + start_col;
4084
4085
/* Even part */
4086
4087
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
4088
tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
4089
4090
tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
4091
tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
4092
4093
/* Apply unsigned->signed conversion. */
4094
dataptr[0] = (DCTELEM)
4095
((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
4096
dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
4097
4098
/* Odd part */
4099
4100
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
4101
/* Add fudge factor here for final descale. */
4102
tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
4103
4104
dataptr[1] = (DCTELEM)
4105
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4106
CONST_BITS-PASS1_BITS-1);
4107
dataptr[3] = (DCTELEM)
4108
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4109
CONST_BITS-PASS1_BITS-1);
4110
4111
dataptr += DCTSIZE; /* advance pointer to next row */
4112
}
4113
4114
/* Pass 2: process columns.
4115
* We remove the PASS1_BITS scaling, but leave the results scaled up
4116
* by an overall factor of 8.
4117
* 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
4118
*/
4119
4120
dataptr = data;
4121
for (ctr = 0; ctr < 4; ctr++) {
4122
/* Even part per LL&M figure 1 --- note that published figure is faulty;
4123
* rotator "c1" should be "c6".
4124
*/
4125
4126
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
4127
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
4128
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
4129
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
4130
4131
/* Add fudge factor here for final descale. */
4132
tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
4133
tmp12 = tmp0 - tmp3;
4134
tmp11 = tmp1 + tmp2;
4135
tmp13 = tmp1 - tmp2;
4136
4137
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
4138
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
4139
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
4140
tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
4141
4142
dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
4143
dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
4144
4145
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); /* c6 */
4146
/* Add fudge factor here for final descale. */
4147
z1 += ONE << (CONST_BITS+PASS1_BITS-1);
4148
4149
dataptr[DCTSIZE*2] = (DCTELEM)
4150
RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
4151
CONST_BITS+PASS1_BITS);
4152
dataptr[DCTSIZE*6] = (DCTELEM)
4153
RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
4154
CONST_BITS+PASS1_BITS);
4155
4156
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
4157
* i0..i3 in the paper are tmp0..tmp3 here.
4158
*/
4159
4160
tmp12 = tmp0 + tmp2;
4161
tmp13 = tmp1 + tmp3;
4162
4163
z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */
4164
/* Add fudge factor here for final descale. */
4165
z1 += ONE << (CONST_BITS+PASS1_BITS-1);
4166
4167
tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* -c3+c5 */
4168
tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */
4169
tmp12 += z1;
4170
tmp13 += z1;
4171
4172
z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
4173
tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */
4174
tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */
4175
tmp0 += z1 + tmp12;
4176
tmp3 += z1 + tmp13;
4177
4178
z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
4179
tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */
4180
tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */
4181
tmp1 += z1 + tmp13;
4182
tmp2 += z1 + tmp12;
4183
4184
dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
4185
dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
4186
dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
4187
dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
4188
4189
dataptr++; /* advance pointer to next column */
4190
}
4191
}
4192
4193
4194
/*
4195
* Perform the forward DCT on a 3x6 sample block.
4196
*
4197
* 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
4198
*/
4199
4200
GLOBAL(void)
4201
jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4202
{
4203
INT32 tmp0, tmp1, tmp2;
4204
INT32 tmp10, tmp11, tmp12;
4205
DCTELEM *dataptr;
4206
JSAMPROW elemptr;
4207
int ctr;
4208
SHIFT_TEMPS
4209
4210
/* Pre-zero output coefficient block. */
4211
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4212
4213
/* Pass 1: process rows.
4214
* Note results are scaled up by sqrt(8) compared to a true DCT;
4215
* furthermore, we scale the results by 2**PASS1_BITS.
4216
* We scale the results further by 2 as part of output adaption
4217
* scaling for different DCT size.
4218
* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
4219
*/
4220
4221
dataptr = data;
4222
for (ctr = 0; ctr < 6; ctr++) {
4223
elemptr = sample_data[ctr] + start_col;
4224
4225
/* Even part */
4226
4227
tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
4228
tmp1 = GETJSAMPLE(elemptr[1]);
4229
4230
tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
4231
4232
/* Apply unsigned->signed conversion. */
4233
dataptr[0] = (DCTELEM)
4234
((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
4235
dataptr[2] = (DCTELEM)
4236
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
4237
CONST_BITS-PASS1_BITS-1);
4238
4239
/* Odd part */
4240
4241
dataptr[1] = (DCTELEM)
4242
DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */
4243
CONST_BITS-PASS1_BITS-1);
4244
4245
dataptr += DCTSIZE; /* advance pointer to next row */
4246
}
4247
4248
/* Pass 2: process columns.
4249
* We remove the PASS1_BITS scaling, but leave the results scaled up
4250
* by an overall factor of 8.
4251
* We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
4252
* fold into the constant multipliers (other part was done in pass 1):
4253
* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
4254
*/
4255
4256
dataptr = data;
4257
for (ctr = 0; ctr < 3; ctr++) {
4258
/* Even part */
4259
4260
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
4261
tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
4262
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
4263
4264
tmp10 = tmp0 + tmp2;
4265
tmp12 = tmp0 - tmp2;
4266
4267
tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
4268
tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
4269
tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
4270
4271
dataptr[DCTSIZE*0] = (DCTELEM)
4272
DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */
4273
CONST_BITS+PASS1_BITS);
4274
dataptr[DCTSIZE*2] = (DCTELEM)
4275
DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */
4276
CONST_BITS+PASS1_BITS);
4277
dataptr[DCTSIZE*4] = (DCTELEM)
4278
DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
4279
CONST_BITS+PASS1_BITS);
4280
4281
/* Odd part */
4282
4283
tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */
4284
4285
dataptr[DCTSIZE*1] = (DCTELEM)
4286
DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */
4287
CONST_BITS+PASS1_BITS);
4288
dataptr[DCTSIZE*3] = (DCTELEM)
4289
DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */
4290
CONST_BITS+PASS1_BITS);
4291
dataptr[DCTSIZE*5] = (DCTELEM)
4292
DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */
4293
CONST_BITS+PASS1_BITS);
4294
4295
dataptr++; /* advance pointer to next column */
4296
}
4297
}
4298
4299
4300
/*
4301
* Perform the forward DCT on a 2x4 sample block.
4302
*
4303
* 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
4304
*/
4305
4306
GLOBAL(void)
4307
jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4308
{
4309
INT32 tmp0, tmp1;
4310
INT32 tmp10, tmp11;
4311
DCTELEM *dataptr;
4312
JSAMPROW elemptr;
4313
int ctr;
4314
SHIFT_TEMPS
4315
4316
/* Pre-zero output coefficient block. */
4317
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4318
4319
/* Pass 1: process rows.
4320
* Note results are scaled up by sqrt(8) compared to a true DCT.
4321
*/
4322
4323
dataptr = data;
4324
for (ctr = 0; ctr < 4; ctr++) {
4325
elemptr = sample_data[ctr] + start_col;
4326
4327
/* Even part */
4328
4329
tmp0 = GETJSAMPLE(elemptr[0]);
4330
tmp1 = GETJSAMPLE(elemptr[1]);
4331
4332
/* Apply unsigned->signed conversion. */
4333
dataptr[0] = (DCTELEM) (tmp0 + tmp1 - 2 * CENTERJSAMPLE);
4334
4335
/* Odd part */
4336
4337
dataptr[1] = (DCTELEM) (tmp0 - tmp1);
4338
4339
dataptr += DCTSIZE; /* advance pointer to next row */
4340
}
4341
4342
/* Pass 2: process columns.
4343
* We leave the results scaled up by an overall factor of 8.
4344
* We must also scale the output by (8/2)*(8/4) = 2**3.
4345
* 4-point FDCT kernel,
4346
* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
4347
*/
4348
4349
dataptr = data;
4350
for (ctr = 0; ctr < 2; ctr++) {
4351
/* Even part */
4352
4353
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
4354
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
4355
4356
tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
4357
tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
4358
4359
dataptr[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1) << 3);
4360
dataptr[DCTSIZE*2] = (DCTELEM) ((tmp0 - tmp1) << 3);
4361
4362
/* Odd part */
4363
4364
tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */
4365
/* Add fudge factor here for final descale. */
4366
tmp0 += ONE << (CONST_BITS-3-1);
4367
4368
dataptr[DCTSIZE*1] = (DCTELEM)
4369
RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
4370
CONST_BITS-3);
4371
dataptr[DCTSIZE*3] = (DCTELEM)
4372
RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
4373
CONST_BITS-3);
4374
4375
dataptr++; /* advance pointer to next column */
4376
}
4377
}
4378
4379
4380
/*
4381
* Perform the forward DCT on a 1x2 sample block.
4382
*
4383
* 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
4384
*/
4385
4386
GLOBAL(void)
4387
jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
4388
{
4389
DCTELEM tmp0, tmp1;
4390
4391
/* Pre-zero output coefficient block. */
4392
MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
4393
4394
/* Pass 1: empty. */
4395
4396
/* Pass 2: process columns.
4397
* We leave the results scaled up by an overall factor of 8.
4398
* We must also scale the output by (8/1)*(8/2) = 2**5.
4399
*/
4400
4401
/* Even part */
4402
4403
tmp0 = GETJSAMPLE(sample_data[0][start_col]);
4404
tmp1 = GETJSAMPLE(sample_data[1][start_col]);
4405
4406
/* Apply unsigned->signed conversion. */
4407
data[DCTSIZE*0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
4408
4409
/* Odd part */
4410
4411
data[DCTSIZE*1] = (tmp0 - tmp1) << 5;
4412
}
4413
4414
#endif /* DCT_SCALING_SUPPORTED */
4415
#endif /* DCT_ISLOW_SUPPORTED */
4416
4417