Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libtheora/fdct.c
9902 views
1
/********************************************************************
2
* *
3
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7
* *
8
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
9
* by the Xiph.Org Foundation https://www.xiph.org/ *
10
* *
11
********************************************************************
12
13
function:
14
15
********************************************************************/
16
#include "encint.h"
17
#include "dct.h"
18
19
20
21
/*Performs a forward 8 point Type-II DCT transform.
22
The output is scaled by a factor of 2 from the orthonormal version of the
23
transform.
24
_y: The buffer to store the result in.
25
Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
26
_x: The input coefficients.
27
Every 8th entry is used (e.g., from a column of an 8x8 block).*/
28
static void oc_fdct8(ogg_int16_t _y[8],const ogg_int16_t *_x){
29
int t0;
30
int t1;
31
int t2;
32
int t3;
33
int t4;
34
int t5;
35
int t6;
36
int t7;
37
int r;
38
int s;
39
int u;
40
int v;
41
/*Stage 1:*/
42
/*0-7 butterfly.*/
43
t0=_x[0<<3]+(int)_x[7<<3];
44
t7=_x[0<<3]-(int)_x[7<<3];
45
/*1-6 butterfly.*/
46
t1=_x[1<<3]+(int)_x[6<<3];
47
t6=_x[1<<3]-(int)_x[6<<3];
48
/*2-5 butterfly.*/
49
t2=_x[2<<3]+(int)_x[5<<3];
50
t5=_x[2<<3]-(int)_x[5<<3];
51
/*3-4 butterfly.*/
52
t3=_x[3<<3]+(int)_x[4<<3];
53
t4=_x[3<<3]-(int)_x[4<<3];
54
/*Stage 2:*/
55
/*0-3 butterfly.*/
56
r=t0+t3;
57
t3=t0-t3;
58
t0=r;
59
/*1-2 butterfly.*/
60
r=t1+t2;
61
t2=t1-t2;
62
t1=r;
63
/*6-5 butterfly.*/
64
r=t6+t5;
65
t5=t6-t5;
66
t6=r;
67
/*Stages 3 and 4 are where all the approximation occurs.
68
These are chosen to be as close to an exact inverse of the approximations
69
made in the iDCT as possible, while still using mostly 16-bit arithmetic.
70
We use some 16x16->32 signed MACs, but those still commonly execute in 1
71
cycle on a 16-bit DSP.
72
For example, s=(27146*t5+0x4000>>16)+t5+(t5!=0) is an exact inverse of
73
t5=(OC_C4S4*s>>16).
74
That is, applying the latter to the output of the former will recover t5
75
exactly (over the valid input range of t5, -23171...23169).
76
We increase the rounding bias to 0xB500 in this particular case so that
77
errors inverting the subsequent butterfly are not one-sided (e.g., the
78
mean error is very close to zero).
79
The (t5!=0) term could be replaced simply by 1, but we want to send 0 to 0.
80
The fDCT of an all-zeros block will still not be zero, because of the
81
biases we added at the very beginning of the process, but it will be close
82
enough that it is guaranteed to round to zero.*/
83
/*Stage 3:*/
84
/*4-5 butterfly.*/
85
s=(27146*t5+0xB500>>16)+t5+(t5!=0)>>1;
86
r=t4+s;
87
t5=t4-s;
88
t4=r;
89
/*7-6 butterfly.*/
90
s=(27146*t6+0xB500>>16)+t6+(t6!=0)>>1;
91
r=t7+s;
92
t6=t7-s;
93
t7=r;
94
/*Stage 4:*/
95
/*0-1 butterfly.*/
96
r=(27146*t0+0x4000>>16)+t0+(t0!=0);
97
s=(27146*t1+0xB500>>16)+t1+(t1!=0);
98
u=r+s>>1;
99
v=r-u;
100
_y[0]=u;
101
_y[4]=v;
102
/*3-2 rotation by 6pi/16*/
103
u=(OC_C6S2*t2+OC_C2S6*t3+0x6CB7>>16)+(t3!=0);
104
s=(OC_C6S2*u>>16)-t2;
105
v=(s*21600+0x2800>>18)+s+(s!=0);
106
_y[2]=u;
107
_y[6]=v;
108
/*6-5 rotation by 3pi/16*/
109
u=(OC_C5S3*t6+OC_C3S5*t5+0x0E3D>>16)+(t5!=0);
110
s=t6-(OC_C5S3*u>>16);
111
v=(s*26568+0x3400>>17)+s+(s!=0);
112
_y[5]=u;
113
_y[3]=v;
114
/*7-4 rotation by 7pi/16*/
115
u=(OC_C7S1*t4+OC_C1S7*t7+0x7B1B>>16)+(t7!=0);
116
s=(OC_C7S1*u>>16)-t4;
117
v=(s*20539+0x3000>>20)+s+(s!=0);
118
_y[1]=u;
119
_y[7]=v;
120
}
121
122
/*Performs a forward 8x8 Type-II DCT transform.
123
The output is scaled by a factor of 4 relative to the orthonormal version
124
of the transform.
125
_y: The buffer to store the result in.
126
This may be the same as _x.
127
_x: The input coefficients. */
128
void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
129
const ogg_int16_t *in;
130
ogg_int16_t *end;
131
ogg_int16_t *out;
132
ogg_int16_t w[64];
133
int i;
134
/*Add two extra bits of working precision to improve accuracy; any more and
135
we could overflow.*/
136
for(i=0;i<64;i++)w[i]=_x[i]<<2;
137
/*These biases correct for some systematic error that remains in the full
138
fDCT->iDCT round trip.*/
139
w[0]+=(w[0]!=0)+1;
140
w[1]++;
141
w[8]--;
142
/*Transform columns of w into rows of _y.*/
143
for(in=w,out=_y,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
144
/*Transform columns of _y into rows of w.*/
145
for(in=_y,out=w,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
146
/*Round the result back to the external working precision (which is still
147
scaled by four relative to the orthogonal result).
148
TODO: We should just update the external working precision.*/
149
for(i=0;i<64;i++)_y[i]=w[OC_FZIG_ZAG[i]]+2>>2;
150
}
151
152
153
154
/*This does not seem to outperform simple LFE border padding before MC.
155
It yields higher PSNR, but much higher bitrate usage.*/
156
#if 0
157
typedef struct oc_extension_info oc_extension_info;
158
159
160
161
/*Information needed to pad boundary blocks.
162
We multiply each row/column by an extension matrix that fills in the padding
163
values as a linear combination of the active values, so that an equivalent
164
number of coefficients are forced to zero.
165
This costs at most 16 multiplies, the same as a 1-D fDCT itself, and as
166
little as 7 multiplies.
167
We compute the extension matrices for every possible shape in advance, as
168
there are only 35.
169
The coefficients for all matrices are stored in a single array to take
170
advantage of the overlap and repetitiveness of many of the shapes.
171
A similar technique is applied to the offsets into this array.
172
This reduces the required table storage by about 48%.
173
See tools/extgen.c for details.
174
We could conceivably do the same for all 256 possible shapes.*/
175
struct oc_extension_info{
176
/*The mask of the active pixels in the shape.*/
177
short mask;
178
/*The number of active pixels in the shape.*/
179
short na;
180
/*The extension matrix.
181
This is (8-na)xna*/
182
const ogg_int16_t *const *ext;
183
/*The pixel indices: na active pixels followed by 8-na padding pixels.*/
184
unsigned char pi[8];
185
/*The coefficient indices: na unconstrained coefficients followed by 8-na
186
coefficients to be forced to zero.*/
187
unsigned char ci[8];
188
};
189
190
191
/*The number of shapes we need.*/
192
#define OC_NSHAPES (35)
193
194
static const ogg_int16_t OC_EXT_COEFFS[229]={
195
0x7FFF,0xE1F8,0x6903,0xAA79,0x5587,0x7FFF,0x1E08,0x7FFF,
196
0x5587,0xAA79,0x6903,0xE1F8,0x7FFF,0x0000,0x0000,0x0000,
197
0x7FFF,0x0000,0x0000,0x7FFF,0x8000,0x7FFF,0x0000,0x0000,
198
0x7FFF,0xE1F8,0x1E08,0xB0A7,0xAA1D,0x337C,0x7FFF,0x4345,
199
0x2267,0x4345,0x7FFF,0x337C,0xAA1D,0xB0A7,0x8A8C,0x4F59,
200
0x03B4,0xE2D6,0x7FFF,0x2CF3,0x7FFF,0xE2D6,0x03B4,0x4F59,
201
0x8A8C,0x1103,0x7AEF,0x5225,0xDF60,0xC288,0xDF60,0x5225,
202
0x7AEF,0x1103,0x668A,0xD6EE,0x3A16,0x0E6C,0xFA07,0x0E6C,
203
0x3A16,0xD6EE,0x668A,0x2A79,0x2402,0x980F,0x50F5,0x4882,
204
0x50F5,0x980F,0x2402,0x2A79,0xF976,0x2768,0x5F22,0x2768,
205
0xF976,0x1F91,0x76C1,0xE9AE,0x76C1,0x1F91,0x7FFF,0xD185,
206
0x0FC8,0xD185,0x7FFF,0x4F59,0x4345,0xED62,0x4345,0x4F59,
207
0xF574,0x5D99,0x2CF3,0x5D99,0xF574,0x5587,0x3505,0x30FC,
208
0xF482,0x953C,0xEAC4,0x7FFF,0x4F04,0x7FFF,0xEAC4,0x953C,
209
0xF482,0x30FC,0x4F04,0x273D,0xD8C3,0x273D,0x1E09,0x61F7,
210
0x1E09,0x273D,0xD8C3,0x273D,0x4F04,0x30FC,0xA57E,0x153C,
211
0x6AC4,0x3C7A,0x1E08,0x3C7A,0x6AC4,0x153C,0xA57E,0x7FFF,
212
0xA57E,0x5A82,0x6AC4,0x153C,0xC386,0xE1F8,0xC386,0x153C,
213
0x6AC4,0x5A82,0xD8C3,0x273D,0x7FFF,0xE1F7,0x7FFF,0x273D,
214
0xD8C3,0x4F04,0x30FC,0xD8C3,0x273D,0xD8C3,0x30FC,0x4F04,
215
0x1FC8,0x67AD,0x1853,0xE038,0x1853,0x67AD,0x1FC8,0x4546,
216
0xE038,0x1FC8,0x3ABA,0x1FC8,0xE038,0x4546,0x3505,0x5587,
217
0xF574,0xBC11,0x78F4,0x4AFB,0xE6F3,0x4E12,0x3C11,0xF8F4,
218
0x4AFB,0x3C7A,0xF88B,0x3C11,0x78F4,0xCAFB,0x7FFF,0x08CC,
219
0x070C,0x236D,0x5587,0x236D,0x070C,0xF88B,0x3C7A,0x4AFB,
220
0xF8F4,0x3C11,0x7FFF,0x153C,0xCAFB,0x153C,0x7FFF,0x1E08,
221
0xE1F8,0x7FFF,0x08CC,0x7FFF,0xCAFB,0x78F4,0x3C11,0x4E12,
222
0xE6F3,0x4AFB,0x78F4,0xBC11,0xFE3D,0x7FFF,0xFE3D,0x2F3A,
223
0x7FFF,0x2F3A,0x89BC,0x7FFF,0x89BC
224
};
225
226
static const ogg_int16_t *const OC_EXT_ROWS[96]={
227
OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,
228
OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 0,OC_EXT_COEFFS+ 6,
229
OC_EXT_COEFFS+ 27,OC_EXT_COEFFS+ 38,OC_EXT_COEFFS+ 43,OC_EXT_COEFFS+ 32,
230
OC_EXT_COEFFS+ 49,OC_EXT_COEFFS+ 58,OC_EXT_COEFFS+ 67,OC_EXT_COEFFS+ 71,
231
OC_EXT_COEFFS+ 62,OC_EXT_COEFFS+ 53,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,
232
OC_EXT_COEFFS+ 14,OC_EXT_COEFFS+ 13,OC_EXT_COEFFS+ 76,OC_EXT_COEFFS+ 81,
233
OC_EXT_COEFFS+ 86,OC_EXT_COEFFS+ 91,OC_EXT_COEFFS+ 96,OC_EXT_COEFFS+ 98,
234
OC_EXT_COEFFS+ 93,OC_EXT_COEFFS+ 88,OC_EXT_COEFFS+ 83,OC_EXT_COEFFS+ 78,
235
OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,
236
OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,
237
OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 103,OC_EXT_COEFFS+ 108,
238
OC_EXT_COEFFS+ 126,OC_EXT_COEFFS+ 16,OC_EXT_COEFFS+ 137,OC_EXT_COEFFS+ 141,
239
OC_EXT_COEFFS+ 20,OC_EXT_COEFFS+ 130,OC_EXT_COEFFS+ 113,OC_EXT_COEFFS+ 116,
240
OC_EXT_COEFFS+ 146,OC_EXT_COEFFS+ 153,OC_EXT_COEFFS+ 160,OC_EXT_COEFFS+ 167,
241
OC_EXT_COEFFS+ 170,OC_EXT_COEFFS+ 163,OC_EXT_COEFFS+ 156,OC_EXT_COEFFS+ 149,
242
OC_EXT_COEFFS+ 119,OC_EXT_COEFFS+ 122,OC_EXT_COEFFS+ 174,OC_EXT_COEFFS+ 177,
243
OC_EXT_COEFFS+ 182,OC_EXT_COEFFS+ 187,OC_EXT_COEFFS+ 192,OC_EXT_COEFFS+ 197,
244
OC_EXT_COEFFS+ 202,OC_EXT_COEFFS+ 207,OC_EXT_COEFFS+ 210,OC_EXT_COEFFS+ 215,
245
OC_EXT_COEFFS+ 179,OC_EXT_COEFFS+ 189,OC_EXT_COEFFS+ 24,OC_EXT_COEFFS+ 204,
246
OC_EXT_COEFFS+ 184,OC_EXT_COEFFS+ 194,OC_EXT_COEFFS+ 212,OC_EXT_COEFFS+ 199,
247
OC_EXT_COEFFS+ 217,OC_EXT_COEFFS+ 100,OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,
248
OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 12,OC_EXT_COEFFS+ 15,OC_EXT_COEFFS+ 134,
249
OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 220,OC_EXT_COEFFS+ 223,
250
OC_EXT_COEFFS+ 226,OC_EXT_COEFFS+ 227,OC_EXT_COEFFS+ 224,OC_EXT_COEFFS+ 221
251
};
252
253
static const oc_extension_info OC_EXTENSION_INFO[OC_NSHAPES]={
254
{0x7F,7,OC_EXT_ROWS+ 0,{0,1,2,3,4,5,6,7},{0,1,2,4,5,6,7,3}},
255
{0xFE,7,OC_EXT_ROWS+ 7,{1,2,3,4,5,6,7,0},{0,1,2,4,5,6,7,3}},
256
{0x3F,6,OC_EXT_ROWS+ 8,{0,1,2,3,4,5,7,6},{0,1,3,4,6,7,5,2}},
257
{0xFC,6,OC_EXT_ROWS+ 10,{2,3,4,5,6,7,1,0},{0,1,3,4,6,7,5,2}},
258
{0x1F,5,OC_EXT_ROWS+ 12,{0,1,2,3,4,7,6,5},{0,2,3,5,7,6,4,1}},
259
{0xF8,5,OC_EXT_ROWS+ 15,{3,4,5,6,7,2,1,0},{0,2,3,5,7,6,4,1}},
260
{0x0F,4,OC_EXT_ROWS+ 18,{0,1,2,3,7,6,5,4},{0,2,4,6,7,5,3,1}},
261
{0xF0,4,OC_EXT_ROWS+ 18,{4,5,6,7,3,2,1,0},{0,2,4,6,7,5,3,1}},
262
{0x07,3,OC_EXT_ROWS+ 22,{0,1,2,7,6,5,4,3},{0,3,6,7,5,4,2,1}},
263
{0xE0,3,OC_EXT_ROWS+ 27,{5,6,7,4,3,2,1,0},{0,3,6,7,5,4,2,1}},
264
{0x03,2,OC_EXT_ROWS+ 32,{0,1,7,6,5,4,3,2},{0,4,7,6,5,3,2,1}},
265
{0xC0,2,OC_EXT_ROWS+ 32,{6,7,5,4,3,2,1,0},{0,4,7,6,5,3,2,1}},
266
{0x01,1,OC_EXT_ROWS+ 0,{0,7,6,5,4,3,2,1},{0,7,6,5,4,3,2,1}},
267
{0x80,1,OC_EXT_ROWS+ 0,{7,6,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
268
{0x7E,6,OC_EXT_ROWS+ 42,{1,2,3,4,5,6,7,0},{0,1,2,5,6,7,4,3}},
269
{0x7C,5,OC_EXT_ROWS+ 44,{2,3,4,5,6,7,1,0},{0,1,4,5,7,6,3,2}},
270
{0x3E,5,OC_EXT_ROWS+ 47,{1,2,3,4,5,7,6,0},{0,1,4,5,7,6,3,2}},
271
{0x78,4,OC_EXT_ROWS+ 50,{3,4,5,6,7,2,1,0},{0,4,5,7,6,3,2,1}},
272
{0x3C,4,OC_EXT_ROWS+ 54,{2,3,4,5,7,6,1,0},{0,3,4,7,6,5,2,1}},
273
{0x1E,4,OC_EXT_ROWS+ 58,{1,2,3,4,7,6,5,0},{0,4,5,7,6,3,2,1}},
274
{0x70,3,OC_EXT_ROWS+ 62,{4,5,6,7,3,2,1,0},{0,5,7,6,4,3,2,1}},
275
{0x38,3,OC_EXT_ROWS+ 67,{3,4,5,7,6,2,1,0},{0,5,6,7,4,3,2,1}},
276
{0x1C,3,OC_EXT_ROWS+ 72,{2,3,4,7,6,5,1,0},{0,5,6,7,4,3,2,1}},
277
{0x0E,3,OC_EXT_ROWS+ 77,{1,2,3,7,6,5,4,0},{0,5,7,6,4,3,2,1}},
278
{0x60,2,OC_EXT_ROWS+ 82,{5,6,7,4,3,2,1,0},{0,2,7,6,5,4,3,1}},
279
{0x30,2,OC_EXT_ROWS+ 36,{4,5,7,6,3,2,1,0},{0,4,7,6,5,3,2,1}},
280
{0x18,2,OC_EXT_ROWS+ 90,{3,4,7,6,5,2,1,0},{0,1,7,6,5,4,3,2}},
281
{0x0C,2,OC_EXT_ROWS+ 34,{2,3,7,6,5,4,1,0},{0,4,7,6,5,3,2,1}},
282
{0x06,2,OC_EXT_ROWS+ 84,{1,2,7,6,5,4,3,0},{0,2,7,6,5,4,3,1}},
283
{0x40,1,OC_EXT_ROWS+ 0,{6,7,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
284
{0x20,1,OC_EXT_ROWS+ 0,{5,7,6,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
285
{0x10,1,OC_EXT_ROWS+ 0,{4,7,6,5,3,2,1,0},{0,7,6,5,4,3,2,1}},
286
{0x08,1,OC_EXT_ROWS+ 0,{3,7,6,5,4,2,1,0},{0,7,6,5,4,3,2,1}},
287
{0x04,1,OC_EXT_ROWS+ 0,{2,7,6,5,4,3,1,0},{0,7,6,5,4,3,2,1}},
288
{0x02,1,OC_EXT_ROWS+ 0,{1,7,6,5,4,3,2,0},{0,7,6,5,4,3,2,1}}
289
};
290
291
292
293
/*Pads a single column of a partial block and then performs a forward Type-II
294
DCT on the result.
295
The input is scaled by a factor of 4 and biased appropriately for the current
296
fDCT implementation.
297
The output is scaled by an additional factor of 2 from the orthonormal
298
version of the transform.
299
_y: The buffer to store the result in.
300
Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
301
_x: The input coefficients.
302
Every 8th entry is used (e.g., from a column of an 8x8 block).
303
_e: The extension information for the shape.*/
304
static void oc_fdct8_ext(ogg_int16_t _y[8],ogg_int16_t *_x,
305
const oc_extension_info *_e){
306
const unsigned char *pi;
307
int na;
308
na=_e->na;
309
pi=_e->pi;
310
if(na==1){
311
int ci;
312
/*While the branch below is still correct for shapes with na==1, we can
313
perform the entire transform with just 1 multiply in this case instead
314
of 23.*/
315
_y[0]=(ogg_int16_t)(OC_DIV2_16(OC_C4S4*(_x[pi[0]])));
316
for(ci=1;ci<8;ci++)_y[ci]=0;
317
}
318
else{
319
const ogg_int16_t *const *ext;
320
int zpi;
321
int api;
322
int nz;
323
/*First multiply by the extension matrix to compute the padding values.*/
324
nz=8-na;
325
ext=_e->ext;
326
for(zpi=0;zpi<nz;zpi++){
327
ogg_int32_t v;
328
v=0;
329
for(api=0;api<na;api++){
330
v+=ext[zpi][api]*(ogg_int32_t)(_x[pi[api]<<3]<<1);
331
}
332
_x[pi[na+zpi]<<3]=(ogg_int16_t)(v+0x8000>>16)+1>>1;
333
}
334
oc_fdct8(_y,_x);
335
}
336
}
337
338
/*Performs a forward 8x8 Type-II DCT transform on blocks which overlap the
339
border of the picture region.
340
This method ONLY works with rectangular regions.
341
_border: A description of which pixels are inside the border.
342
_y: The buffer to store the result in.
343
This may be the same as _x.
344
_x: The input pixel values.
345
Pixel values outside the border will be ignored.*/
346
void oc_fdct8x8_border(const oc_border_info *_border,
347
ogg_int16_t _y[64],const ogg_int16_t _x[64]){
348
ogg_int16_t *in;
349
ogg_int16_t *out;
350
ogg_int16_t w[64];
351
ogg_int64_t mask;
352
const oc_extension_info *cext;
353
const oc_extension_info *rext;
354
int cmask;
355
int rmask;
356
int ri;
357
int ci;
358
/*Identify the shapes of the non-zero rows and columns.*/
359
rmask=cmask=0;
360
mask=_border->mask;
361
for(ri=0;ri<8;ri++){
362
/*This aggregation is _only_ correct for rectangular masks.*/
363
cmask|=((mask&0xFF)!=0)<<ri;
364
rmask|=mask&0xFF;
365
mask>>=8;
366
}
367
/*Find the associated extension info for these shapes.*/
368
if(cmask==0xFF)cext=NULL;
369
else for(cext=OC_EXTENSION_INFO;cext->mask!=cmask;){
370
/*If we somehow can't find the shape, then just do an unpadded fDCT.
371
It won't be efficient, but it should still be correct.*/
372
if(++cext>=OC_EXTENSION_INFO+OC_NSHAPES){
373
oc_enc_fdct8x8_c(_y,_x);
374
return;
375
}
376
}
377
if(rmask==0xFF)rext=NULL;
378
else for(rext=OC_EXTENSION_INFO;rext->mask!=rmask;){
379
/*If we somehow can't find the shape, then just do an unpadded fDCT.
380
It won't be efficient, but it should still be correct.*/
381
if(++rext>=OC_EXTENSION_INFO+OC_NSHAPES){
382
oc_enc_fdct8x8_c(_y,_x);
383
return;
384
}
385
}
386
/*Add two extra bits of working precision to improve accuracy; any more and
387
we could overflow.*/
388
for(ci=0;ci<64;ci++)w[ci]=_x[ci]<<2;
389
/*These biases correct for some systematic error that remains in the full
390
fDCT->iDCT round trip.
391
We can safely add them before padding, since if these pixel values are
392
overwritten, we didn't care what they were anyway (and the unbiased values
393
will usually yield smaller DCT coefficient magnitudes).*/
394
w[0]+=(w[0]!=0)+1;
395
w[1]++;
396
w[8]--;
397
/*Transform the columns.
398
We can ignore zero columns without a problem.*/
399
in=w;
400
out=_y;
401
if(cext==NULL)for(ci=0;ci<8;ci++)oc_fdct8(out+(ci<<3),in+ci);
402
else for(ci=0;ci<8;ci++)if(rmask&(1<<ci))oc_fdct8_ext(out+(ci<<3),in+ci,cext);
403
/*Transform the rows.
404
We transform even rows that are supposedly zero, because rounding errors
405
may make them slightly non-zero, and this will give a more precise
406
reconstruction with very small quantizers.*/
407
in=_y;
408
out=w;
409
if(rext==NULL)for(ri=0;ri<8;ri++)oc_fdct8(out+(ri<<3),in+ri);
410
else for(ri=0;ri<8;ri++)oc_fdct8_ext(out+(ri<<3),in+ri,rext);
411
/*Round the result back to the external working precision (which is still
412
scaled by four relative to the orthogonal result).
413
TODO: We should just update the external working precision.*/
414
for(ci=0;ci<64;ci++)_y[ci]=w[ci]+2>>2;
415
}
416
#endif
417
418