CoCalc -- fdct.c

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libtheora/fdct.c
⁹⁹⁰² views
1
/********************************************************************
2
 *                                                                  *
3
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7
 *                                                                  *
8
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9
 * by the Xiph.Org Foundation https://www.xiph.org/                 *
10
 *                                                                  *
11
 ********************************************************************
12

13
  function:
14

15
 ********************************************************************/
16
#include "encint.h"
17
#include "dct.h"
18

19

20

21
/*Performs a forward 8 point Type-II DCT transform.
22
  The output is scaled by a factor of 2 from the orthonormal version of the
23
   transform.
24
  _y: The buffer to store the result in.
25
      Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
26
  _x: The input coefficients.
27
      Every 8th entry is used (e.g., from a column of an 8x8 block).*/
28
static void oc_fdct8(ogg_int16_t _y[8],const ogg_int16_t *_x){
29
  int t0;
30
  int t1;
31
  int t2;
32
  int t3;
33
  int t4;
34
  int t5;
35
  int t6;
36
  int t7;
37
  int r;
38
  int s;
39
  int u;
40
  int v;
41
  /*Stage 1:*/
42
  /*0-7 butterfly.*/
43
  t0=_x[0<<3]+(int)_x[7<<3];
44
  t7=_x[0<<3]-(int)_x[7<<3];
45
  /*1-6 butterfly.*/
46
  t1=_x[1<<3]+(int)_x[6<<3];
47
  t6=_x[1<<3]-(int)_x[6<<3];
48
  /*2-5 butterfly.*/
49
  t2=_x[2<<3]+(int)_x[5<<3];
50
  t5=_x[2<<3]-(int)_x[5<<3];
51
  /*3-4 butterfly.*/
52
  t3=_x[3<<3]+(int)_x[4<<3];
53
  t4=_x[3<<3]-(int)_x[4<<3];
54
  /*Stage 2:*/
55
  /*0-3 butterfly.*/
56
  r=t0+t3;
57
  t3=t0-t3;
58
  t0=r;
59
  /*1-2 butterfly.*/
60
  r=t1+t2;
61
  t2=t1-t2;
62
  t1=r;
63
  /*6-5 butterfly.*/
64
  r=t6+t5;
65
  t5=t6-t5;
66
  t6=r;
67
  /*Stages 3 and 4 are where all the approximation occurs.
68
    These are chosen to be as close to an exact inverse of the approximations
69
     made in the iDCT as possible, while still using mostly 16-bit arithmetic.
70
    We use some 16x16->32 signed MACs, but those still commonly execute in 1
71
     cycle on a 16-bit DSP.
72
    For example, s=(27146*t5+0x4000>>16)+t5+(t5!=0) is an exact inverse of
73
     t5=(OC_C4S4*s>>16).
74
    That is, applying the latter to the output of the former will recover t5
75
     exactly (over the valid input range of t5, -23171...23169).
76
    We increase the rounding bias to 0xB500 in this particular case so that
77
     errors inverting the subsequent butterfly are not one-sided (e.g., the
78
     mean error is very close to zero).
79
    The (t5!=0) term could be replaced simply by 1, but we want to send 0 to 0.
80
    The fDCT of an all-zeros block will still not be zero, because of the
81
     biases we added at the very beginning of the process, but it will be close
82
     enough that it is guaranteed to round to zero.*/
83
  /*Stage 3:*/
84
  /*4-5 butterfly.*/
85
  s=(27146*t5+0xB500>>16)+t5+(t5!=0)>>1;
86
  r=t4+s;
87
  t5=t4-s;
88
  t4=r;
89
  /*7-6 butterfly.*/
90
  s=(27146*t6+0xB500>>16)+t6+(t6!=0)>>1;
91
  r=t7+s;
92
  t6=t7-s;
93
  t7=r;
94
  /*Stage 4:*/
95
  /*0-1 butterfly.*/
96
  r=(27146*t0+0x4000>>16)+t0+(t0!=0);
97
  s=(27146*t1+0xB500>>16)+t1+(t1!=0);
98
  u=r+s>>1;
99
  v=r-u;
100
  _y[0]=u;
101
  _y[4]=v;
102
  /*3-2 rotation by 6pi/16*/
103
  u=(OC_C6S2*t2+OC_C2S6*t3+0x6CB7>>16)+(t3!=0);
104
  s=(OC_C6S2*u>>16)-t2;
105
  v=(s*21600+0x2800>>18)+s+(s!=0);
106
  _y[2]=u;
107
  _y[6]=v;
108
  /*6-5 rotation by 3pi/16*/
109
  u=(OC_C5S3*t6+OC_C3S5*t5+0x0E3D>>16)+(t5!=0);
110
  s=t6-(OC_C5S3*u>>16);
111
  v=(s*26568+0x3400>>17)+s+(s!=0);
112
  _y[5]=u;
113
  _y[3]=v;
114
  /*7-4 rotation by 7pi/16*/
115
  u=(OC_C7S1*t4+OC_C1S7*t7+0x7B1B>>16)+(t7!=0);
116
  s=(OC_C7S1*u>>16)-t4;
117
  v=(s*20539+0x3000>>20)+s+(s!=0);
118
  _y[1]=u;
119
  _y[7]=v;
120
}
121

122
/*Performs a forward 8x8 Type-II DCT transform.
123
  The output is scaled by a factor of 4 relative to the orthonormal version
124
   of the transform.
125
  _y: The buffer to store the result in.
126
      This may be the same as _x.
127
  _x: The input coefficients. */
128
void oc_enc_fdct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
129
  const ogg_int16_t *in;
130
  ogg_int16_t       *end;
131
  ogg_int16_t       *out;
132
  ogg_int16_t        w[64];
133
  int                i;
134
  /*Add two extra bits of working precision to improve accuracy; any more and
135
     we could overflow.*/
136
  for(i=0;i<64;i++)w[i]=_x[i]<<2;
137
  /*These biases correct for some systematic error that remains in the full
138
     fDCT->iDCT round trip.*/
139
  w[0]+=(w[0]!=0)+1;
140
  w[1]++;
141
  w[8]--;
142
  /*Transform columns of w into rows of _y.*/
143
  for(in=w,out=_y,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
144
  /*Transform columns of _y into rows of w.*/
145
  for(in=_y,out=w,end=out+64;out<end;in++,out+=8)oc_fdct8(out,in);
146
  /*Round the result back to the external working precision (which is still
147
     scaled by four relative to the orthogonal result).
148
    TODO: We should just update the external working precision.*/
149
  for(i=0;i<64;i++)_y[i]=w[OC_FZIG_ZAG[i]]+2>>2;
150
}
151

152

153

154
/*This does not seem to outperform simple LFE border padding before MC.
155
  It yields higher PSNR, but much higher bitrate usage.*/
156
#if 0
157
typedef struct oc_extension_info oc_extension_info;
158

159

160

161
/*Information needed to pad boundary blocks.
162
  We multiply each row/column by an extension matrix that fills in the padding
163
   values as a linear combination of the active values, so that an equivalent
164
   number of coefficients are forced to zero.
165
  This costs at most 16 multiplies, the same as a 1-D fDCT itself, and as
166
   little as 7 multiplies.
167
  We compute the extension matrices for every possible shape in advance, as
168
   there are only 35.
169
  The coefficients for all matrices are stored in a single array to take
170
   advantage of the overlap and repetitiveness of many of the shapes.
171
  A similar technique is applied to the offsets into this array.
172
  This reduces the required table storage by about 48%.
173
  See tools/extgen.c for details.
174
  We could conceivably do the same for all 256 possible shapes.*/
175
struct oc_extension_info{
176
  /*The mask of the active pixels in the shape.*/
177
  short                     mask;
178
  /*The number of active pixels in the shape.*/
179
  short                     na;
180
  /*The extension matrix.
181
    This is (8-na)xna*/
182
  const ogg_int16_t *const *ext;
183
  /*The pixel indices: na active pixels followed by 8-na padding pixels.*/
184
  unsigned char             pi[8];
185
  /*The coefficient indices: na unconstrained coefficients followed by 8-na
186
     coefficients to be forced to zero.*/
187
  unsigned char             ci[8];
188
};
189

190

191
/*The number of shapes we need.*/
192
#define OC_NSHAPES   (35)
193

194
static const ogg_int16_t OC_EXT_COEFFS[229]={
195
  0x7FFF,0xE1F8,0x6903,0xAA79,0x5587,0x7FFF,0x1E08,0x7FFF,
196
  0x5587,0xAA79,0x6903,0xE1F8,0x7FFF,0x0000,0x0000,0x0000,
197
  0x7FFF,0x0000,0x0000,0x7FFF,0x8000,0x7FFF,0x0000,0x0000,
198
  0x7FFF,0xE1F8,0x1E08,0xB0A7,0xAA1D,0x337C,0x7FFF,0x4345,
199
  0x2267,0x4345,0x7FFF,0x337C,0xAA1D,0xB0A7,0x8A8C,0x4F59,
200
  0x03B4,0xE2D6,0x7FFF,0x2CF3,0x7FFF,0xE2D6,0x03B4,0x4F59,
201
  0x8A8C,0x1103,0x7AEF,0x5225,0xDF60,0xC288,0xDF60,0x5225,
202
  0x7AEF,0x1103,0x668A,0xD6EE,0x3A16,0x0E6C,0xFA07,0x0E6C,
203
  0x3A16,0xD6EE,0x668A,0x2A79,0x2402,0x980F,0x50F5,0x4882,
204
  0x50F5,0x980F,0x2402,0x2A79,0xF976,0x2768,0x5F22,0x2768,
205
  0xF976,0x1F91,0x76C1,0xE9AE,0x76C1,0x1F91,0x7FFF,0xD185,
206
  0x0FC8,0xD185,0x7FFF,0x4F59,0x4345,0xED62,0x4345,0x4F59,
207
  0xF574,0x5D99,0x2CF3,0x5D99,0xF574,0x5587,0x3505,0x30FC,
208
  0xF482,0x953C,0xEAC4,0x7FFF,0x4F04,0x7FFF,0xEAC4,0x953C,
209
  0xF482,0x30FC,0x4F04,0x273D,0xD8C3,0x273D,0x1E09,0x61F7,
210
  0x1E09,0x273D,0xD8C3,0x273D,0x4F04,0x30FC,0xA57E,0x153C,
211
  0x6AC4,0x3C7A,0x1E08,0x3C7A,0x6AC4,0x153C,0xA57E,0x7FFF,
212
  0xA57E,0x5A82,0x6AC4,0x153C,0xC386,0xE1F8,0xC386,0x153C,
213
  0x6AC4,0x5A82,0xD8C3,0x273D,0x7FFF,0xE1F7,0x7FFF,0x273D,
214
  0xD8C3,0x4F04,0x30FC,0xD8C3,0x273D,0xD8C3,0x30FC,0x4F04,
215
  0x1FC8,0x67AD,0x1853,0xE038,0x1853,0x67AD,0x1FC8,0x4546,
216
  0xE038,0x1FC8,0x3ABA,0x1FC8,0xE038,0x4546,0x3505,0x5587,
217
  0xF574,0xBC11,0x78F4,0x4AFB,0xE6F3,0x4E12,0x3C11,0xF8F4,
218
  0x4AFB,0x3C7A,0xF88B,0x3C11,0x78F4,0xCAFB,0x7FFF,0x08CC,
219
  0x070C,0x236D,0x5587,0x236D,0x070C,0xF88B,0x3C7A,0x4AFB,
220
  0xF8F4,0x3C11,0x7FFF,0x153C,0xCAFB,0x153C,0x7FFF,0x1E08,
221
  0xE1F8,0x7FFF,0x08CC,0x7FFF,0xCAFB,0x78F4,0x3C11,0x4E12,
222
  0xE6F3,0x4AFB,0x78F4,0xBC11,0xFE3D,0x7FFF,0xFE3D,0x2F3A,
223
  0x7FFF,0x2F3A,0x89BC,0x7FFF,0x89BC
224
};
225

226
static const ogg_int16_t *const OC_EXT_ROWS[96]={
227
  OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,
228
  OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   0,OC_EXT_COEFFS+   6,
229
  OC_EXT_COEFFS+  27,OC_EXT_COEFFS+  38,OC_EXT_COEFFS+  43,OC_EXT_COEFFS+  32,
230
  OC_EXT_COEFFS+  49,OC_EXT_COEFFS+  58,OC_EXT_COEFFS+  67,OC_EXT_COEFFS+  71,
231
  OC_EXT_COEFFS+  62,OC_EXT_COEFFS+  53,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,
232
  OC_EXT_COEFFS+  14,OC_EXT_COEFFS+  13,OC_EXT_COEFFS+  76,OC_EXT_COEFFS+  81,
233
  OC_EXT_COEFFS+  86,OC_EXT_COEFFS+  91,OC_EXT_COEFFS+  96,OC_EXT_COEFFS+  98,
234
  OC_EXT_COEFFS+  93,OC_EXT_COEFFS+  88,OC_EXT_COEFFS+  83,OC_EXT_COEFFS+  78,
235
  OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,
236
  OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,
237
  OC_EXT_COEFFS+  15,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+ 103,OC_EXT_COEFFS+ 108,
238
  OC_EXT_COEFFS+ 126,OC_EXT_COEFFS+  16,OC_EXT_COEFFS+ 137,OC_EXT_COEFFS+ 141,
239
  OC_EXT_COEFFS+  20,OC_EXT_COEFFS+ 130,OC_EXT_COEFFS+ 113,OC_EXT_COEFFS+ 116,
240
  OC_EXT_COEFFS+ 146,OC_EXT_COEFFS+ 153,OC_EXT_COEFFS+ 160,OC_EXT_COEFFS+ 167,
241
  OC_EXT_COEFFS+ 170,OC_EXT_COEFFS+ 163,OC_EXT_COEFFS+ 156,OC_EXT_COEFFS+ 149,
242
  OC_EXT_COEFFS+ 119,OC_EXT_COEFFS+ 122,OC_EXT_COEFFS+ 174,OC_EXT_COEFFS+ 177,
243
  OC_EXT_COEFFS+ 182,OC_EXT_COEFFS+ 187,OC_EXT_COEFFS+ 192,OC_EXT_COEFFS+ 197,
244
  OC_EXT_COEFFS+ 202,OC_EXT_COEFFS+ 207,OC_EXT_COEFFS+ 210,OC_EXT_COEFFS+ 215,
245
  OC_EXT_COEFFS+ 179,OC_EXT_COEFFS+ 189,OC_EXT_COEFFS+  24,OC_EXT_COEFFS+ 204,
246
  OC_EXT_COEFFS+ 184,OC_EXT_COEFFS+ 194,OC_EXT_COEFFS+ 212,OC_EXT_COEFFS+ 199,
247
  OC_EXT_COEFFS+ 217,OC_EXT_COEFFS+ 100,OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,
248
  OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+  12,OC_EXT_COEFFS+  15,OC_EXT_COEFFS+ 134,
249
  OC_EXT_COEFFS+ 134,OC_EXT_COEFFS+ 135,OC_EXT_COEFFS+ 220,OC_EXT_COEFFS+ 223,
250
  OC_EXT_COEFFS+ 226,OC_EXT_COEFFS+ 227,OC_EXT_COEFFS+ 224,OC_EXT_COEFFS+ 221
251
};
252

253
static const oc_extension_info OC_EXTENSION_INFO[OC_NSHAPES]={
254
  {0x7F,7,OC_EXT_ROWS+  0,{0,1,2,3,4,5,6,7},{0,1,2,4,5,6,7,3}},
255
  {0xFE,7,OC_EXT_ROWS+  7,{1,2,3,4,5,6,7,0},{0,1,2,4,5,6,7,3}},
256
  {0x3F,6,OC_EXT_ROWS+  8,{0,1,2,3,4,5,7,6},{0,1,3,4,6,7,5,2}},
257
  {0xFC,6,OC_EXT_ROWS+ 10,{2,3,4,5,6,7,1,0},{0,1,3,4,6,7,5,2}},
258
  {0x1F,5,OC_EXT_ROWS+ 12,{0,1,2,3,4,7,6,5},{0,2,3,5,7,6,4,1}},
259
  {0xF8,5,OC_EXT_ROWS+ 15,{3,4,5,6,7,2,1,0},{0,2,3,5,7,6,4,1}},
260
  {0x0F,4,OC_EXT_ROWS+ 18,{0,1,2,3,7,6,5,4},{0,2,4,6,7,5,3,1}},
261
  {0xF0,4,OC_EXT_ROWS+ 18,{4,5,6,7,3,2,1,0},{0,2,4,6,7,5,3,1}},
262
  {0x07,3,OC_EXT_ROWS+ 22,{0,1,2,7,6,5,4,3},{0,3,6,7,5,4,2,1}},
263
  {0xE0,3,OC_EXT_ROWS+ 27,{5,6,7,4,3,2,1,0},{0,3,6,7,5,4,2,1}},
264
  {0x03,2,OC_EXT_ROWS+ 32,{0,1,7,6,5,4,3,2},{0,4,7,6,5,3,2,1}},
265
  {0xC0,2,OC_EXT_ROWS+ 32,{6,7,5,4,3,2,1,0},{0,4,7,6,5,3,2,1}},
266
  {0x01,1,OC_EXT_ROWS+  0,{0,7,6,5,4,3,2,1},{0,7,6,5,4,3,2,1}},
267
  {0x80,1,OC_EXT_ROWS+  0,{7,6,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
268
  {0x7E,6,OC_EXT_ROWS+ 42,{1,2,3,4,5,6,7,0},{0,1,2,5,6,7,4,3}},
269
  {0x7C,5,OC_EXT_ROWS+ 44,{2,3,4,5,6,7,1,0},{0,1,4,5,7,6,3,2}},
270
  {0x3E,5,OC_EXT_ROWS+ 47,{1,2,3,4,5,7,6,0},{0,1,4,5,7,6,3,2}},
271
  {0x78,4,OC_EXT_ROWS+ 50,{3,4,5,6,7,2,1,0},{0,4,5,7,6,3,2,1}},
272
  {0x3C,4,OC_EXT_ROWS+ 54,{2,3,4,5,7,6,1,0},{0,3,4,7,6,5,2,1}},
273
  {0x1E,4,OC_EXT_ROWS+ 58,{1,2,3,4,7,6,5,0},{0,4,5,7,6,3,2,1}},
274
  {0x70,3,OC_EXT_ROWS+ 62,{4,5,6,7,3,2,1,0},{0,5,7,6,4,3,2,1}},
275
  {0x38,3,OC_EXT_ROWS+ 67,{3,4,5,7,6,2,1,0},{0,5,6,7,4,3,2,1}},
276
  {0x1C,3,OC_EXT_ROWS+ 72,{2,3,4,7,6,5,1,0},{0,5,6,7,4,3,2,1}},
277
  {0x0E,3,OC_EXT_ROWS+ 77,{1,2,3,7,6,5,4,0},{0,5,7,6,4,3,2,1}},
278
  {0x60,2,OC_EXT_ROWS+ 82,{5,6,7,4,3,2,1,0},{0,2,7,6,5,4,3,1}},
279
  {0x30,2,OC_EXT_ROWS+ 36,{4,5,7,6,3,2,1,0},{0,4,7,6,5,3,2,1}},
280
  {0x18,2,OC_EXT_ROWS+ 90,{3,4,7,6,5,2,1,0},{0,1,7,6,5,4,3,2}},
281
  {0x0C,2,OC_EXT_ROWS+ 34,{2,3,7,6,5,4,1,0},{0,4,7,6,5,3,2,1}},
282
  {0x06,2,OC_EXT_ROWS+ 84,{1,2,7,6,5,4,3,0},{0,2,7,6,5,4,3,1}},
283
  {0x40,1,OC_EXT_ROWS+  0,{6,7,5,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
284
  {0x20,1,OC_EXT_ROWS+  0,{5,7,6,4,3,2,1,0},{0,7,6,5,4,3,2,1}},
285
  {0x10,1,OC_EXT_ROWS+  0,{4,7,6,5,3,2,1,0},{0,7,6,5,4,3,2,1}},
286
  {0x08,1,OC_EXT_ROWS+  0,{3,7,6,5,4,2,1,0},{0,7,6,5,4,3,2,1}},
287
  {0x04,1,OC_EXT_ROWS+  0,{2,7,6,5,4,3,1,0},{0,7,6,5,4,3,2,1}},
288
  {0x02,1,OC_EXT_ROWS+  0,{1,7,6,5,4,3,2,0},{0,7,6,5,4,3,2,1}}
289
};
290

291

292

293
/*Pads a single column of a partial block and then performs a forward Type-II
294
   DCT on the result.
295
  The input is scaled by a factor of 4 and biased appropriately for the current
296
   fDCT implementation.
297
  The output is scaled by an additional factor of 2 from the orthonormal
298
   version of the transform.
299
  _y: The buffer to store the result in.
300
      Data will be placed the first 8 entries (e.g., in a row of an 8x8 block).
301
  _x: The input coefficients.
302
      Every 8th entry is used (e.g., from a column of an 8x8 block).
303
  _e: The extension information for the shape.*/
304
static void oc_fdct8_ext(ogg_int16_t _y[8],ogg_int16_t *_x,
305
 const oc_extension_info *_e){
306
  const unsigned char *pi;
307
  int                  na;
308
  na=_e->na;
309
  pi=_e->pi;
310
  if(na==1){
311
    int ci;
312
    /*While the branch below is still correct for shapes with na==1, we can
313
       perform the entire transform with just 1 multiply in this case instead
314
       of 23.*/
315
    _y[0]=(ogg_int16_t)(OC_DIV2_16(OC_C4S4*(_x[pi[0]])));
316
    for(ci=1;ci<8;ci++)_y[ci]=0;
317
  }
318
  else{
319
    const ogg_int16_t *const *ext;
320
    int                       zpi;
321
    int                       api;
322
    int                       nz;
323
    /*First multiply by the extension matrix to compute the padding values.*/
324
    nz=8-na;
325
    ext=_e->ext;
326
    for(zpi=0;zpi<nz;zpi++){
327
      ogg_int32_t v;
328
      v=0;
329
      for(api=0;api<na;api++){
330
        v+=ext[zpi][api]*(ogg_int32_t)(_x[pi[api]<<3]<<1);
331
      }
332
      _x[pi[na+zpi]<<3]=(ogg_int16_t)(v+0x8000>>16)+1>>1;
333
    }
334
    oc_fdct8(_y,_x);
335
  }
336
}
337

338
/*Performs a forward 8x8 Type-II DCT transform on blocks which overlap the
339
   border of the picture region.
340
  This method ONLY works with rectangular regions.
341
  _border: A description of which pixels are inside the border.
342
  _y:      The buffer to store the result in.
343
           This may be the same as _x.
344
  _x:      The input pixel values.
345
           Pixel values outside the border will be ignored.*/
346
void oc_fdct8x8_border(const oc_border_info *_border,
347
 ogg_int16_t _y[64],const ogg_int16_t _x[64]){
348
  ogg_int16_t             *in;
349
  ogg_int16_t             *out;
350
  ogg_int16_t              w[64];
351
  ogg_int64_t              mask;
352
  const oc_extension_info *cext;
353
  const oc_extension_info *rext;
354
  int                      cmask;
355
  int                      rmask;
356
  int                      ri;
357
  int                      ci;
358
  /*Identify the shapes of the non-zero rows and columns.*/
359
  rmask=cmask=0;
360
  mask=_border->mask;
361
  for(ri=0;ri<8;ri++){
362
    /*This aggregation is _only_ correct for rectangular masks.*/
363
    cmask|=((mask&0xFF)!=0)<<ri;
364
    rmask|=mask&0xFF;
365
    mask>>=8;
366
  }
367
  /*Find the associated extension info for these shapes.*/
368
  if(cmask==0xFF)cext=NULL;
369
  else for(cext=OC_EXTENSION_INFO;cext->mask!=cmask;){
370
    /*If we somehow can't find the shape, then just do an unpadded fDCT.
371
      It won't be efficient, but it should still be correct.*/
372
    if(++cext>=OC_EXTENSION_INFO+OC_NSHAPES){
373
      oc_enc_fdct8x8_c(_y,_x);
374
      return;
375
    }
376
  }
377
  if(rmask==0xFF)rext=NULL;
378
  else for(rext=OC_EXTENSION_INFO;rext->mask!=rmask;){
379
    /*If we somehow can't find the shape, then just do an unpadded fDCT.
380
      It won't be efficient, but it should still be correct.*/
381
    if(++rext>=OC_EXTENSION_INFO+OC_NSHAPES){
382
      oc_enc_fdct8x8_c(_y,_x);
383
      return;
384
    }
385
  }
386
  /*Add two extra bits of working precision to improve accuracy; any more and
387
     we could overflow.*/
388
  for(ci=0;ci<64;ci++)w[ci]=_x[ci]<<2;
389
  /*These biases correct for some systematic error that remains in the full
390
     fDCT->iDCT round trip.
391
    We can safely add them before padding, since if these pixel values are
392
     overwritten, we didn't care what they were anyway (and the unbiased values
393
     will usually yield smaller DCT coefficient magnitudes).*/
394
  w[0]+=(w[0]!=0)+1;
395
  w[1]++;
396
  w[8]--;
397
  /*Transform the columns.
398
    We can ignore zero columns without a problem.*/
399
  in=w;
400
  out=_y;
401
  if(cext==NULL)for(ci=0;ci<8;ci++)oc_fdct8(out+(ci<<3),in+ci);
402
  else for(ci=0;ci<8;ci++)if(rmask&(1<<ci))oc_fdct8_ext(out+(ci<<3),in+ci,cext);
403
  /*Transform the rows.
404
    We transform even rows that are supposedly zero, because rounding errors
405
     may make them slightly non-zero, and this will give a more precise
406
     reconstruction with very small quantizers.*/
407
  in=_y;
408
  out=w;
409
  if(rext==NULL)for(ri=0;ri<8;ri++)oc_fdct8(out+(ri<<3),in+ri);
410
  else for(ri=0;ri<8;ri++)oc_fdct8_ext(out+(ri<<3),in+ri,rext);
411
  /*Round the result back to the external working precision (which is still
412
     scaled by four relative to the orthogonal result).
413
    TODO: We should just update the external working precision.*/
414
  for(ci=0;ci<64;ci++)_y[ci]=w[ci]+2>>2;
415
}
416
#endif
417

418
Product

Resources

Company