CoCalc -- idct.c

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libtheora/idct.c
⁹⁸⁹⁶ views
1
/********************************************************************
2
 *                                                                  *
3
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
4
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7
 *                                                                  *
8
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009                *
9
 * by the Xiph.Org Foundation and contributors                      *
10
 * https://www.xiph.org/                                            *
11
 *                                                                  *
12
 ********************************************************************
13

14
  function:
15

16
 ********************************************************************/
17

18
#include <string.h>
19
#include "internal.h"
20
#include "dct.h"
21

22
/*Performs an inverse 8 point Type-II DCT transform.
23
  The output is scaled by a factor of 2 relative to the orthonormal version of
24
   the transform.
25
  _y: The buffer to store the result in.
26
      Data will be placed in every 8th entry (e.g., in a column of an 8x8
27
       block).
28
  _x: The input coefficients.
29
      The first 8 entries are used (e.g., from a row of an 8x8 block).*/
30
static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
31
  ogg_int32_t t[8];
32
  ogg_int32_t r;
33
  /*Stage 1:*/
34
  /*0-1 butterfly.*/
35
  t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
36
  t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
37
  /*2-3 rotation by 6pi/16.*/
38
  t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
39
  t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
40
  /*4-7 rotation by 7pi/16.*/
41
  t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
42
  /*5-6 rotation by 3pi/16.*/
43
  t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
44
  t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
45
  t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
46
  /*Stage 2:*/
47
  /*4-5 butterfly.*/
48
  r=t[4]+t[5];
49
  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
50
  t[4]=r;
51
  /*7-6 butterfly.*/
52
  r=t[7]+t[6];
53
  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
54
  t[7]=r;
55
  /*Stage 3:*/
56
  /*0-3 butterfly.*/
57
  r=t[0]+t[3];
58
  t[3]=t[0]-t[3];
59
  t[0]=r;
60
  /*1-2 butterfly.*/
61
  r=t[1]+t[2];
62
  t[2]=t[1]-t[2];
63
  t[1]=r;
64
  /*6-5 butterfly.*/
65
  r=t[6]+t[5];
66
  t[5]=t[6]-t[5];
67
  t[6]=r;
68
  /*Stage 4:*/
69
  /*0-7 butterfly.*/
70
  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
71
  /*1-6 butterfly.*/
72
  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
73
  /*2-5 butterfly.*/
74
  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
75
  /*3-4 butterfly.*/
76
  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
77
  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
78
  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
79
  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
80
  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
81
}
82

83
/*Performs an inverse 8 point Type-II DCT transform.
84
  The output is scaled by a factor of 2 relative to the orthonormal version of
85
   the transform.
86
  _y: The buffer to store the result in.
87
      Data will be placed in every 8th entry (e.g., in a column of an 8x8
88
       block).
89
  _x: The input coefficients.
90
      Only the first 4 entries are used.
91
      The other 4 are assumed to be 0.*/
92
static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
93
  ogg_int32_t t[8];
94
  ogg_int32_t r;
95
  /*Stage 1:*/
96
  t[0]=OC_C4S4*_x[0]>>16;
97
  t[2]=OC_C6S2*_x[2]>>16;
98
  t[3]=OC_C2S6*_x[2]>>16;
99
  t[4]=OC_C7S1*_x[1]>>16;
100
  t[5]=-(OC_C5S3*_x[3]>>16);
101
  t[6]=OC_C3S5*_x[3]>>16;
102
  t[7]=OC_C1S7*_x[1]>>16;
103
  /*Stage 2:*/
104
  r=t[4]+t[5];
105
  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
106
  t[4]=r;
107
  r=t[7]+t[6];
108
  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
109
  t[7]=r;
110
  /*Stage 3:*/
111
  t[1]=t[0]+t[2];
112
  t[2]=t[0]-t[2];
113
  r=t[0]+t[3];
114
  t[3]=t[0]-t[3];
115
  t[0]=r;
116
  r=t[6]+t[5];
117
  t[5]=t[6]-t[5];
118
  t[6]=r;
119
  /*Stage 4:*/
120
  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
121
  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
122
  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
123
  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
124
  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
125
  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
126
  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
127
  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
128
}
129

130
/*Performs an inverse 8 point Type-II DCT transform.
131
  The output is scaled by a factor of 2 relative to the orthonormal version of
132
   the transform.
133
  _y: The buffer to store the result in.
134
      Data will be placed in every 8th entry (e.g., in a column of an 8x8
135
       block).
136
  _x: The input coefficients.
137
      Only the first 3 entries are used.
138
      The other 5 are assumed to be 0.*/
139
static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
140
  ogg_int32_t t[8];
141
  ogg_int32_t r;
142
  /*Stage 1:*/
143
  t[0]=OC_C4S4*_x[0]>>16;
144
  t[2]=OC_C6S2*_x[2]>>16;
145
  t[3]=OC_C2S6*_x[2]>>16;
146
  t[4]=OC_C7S1*_x[1]>>16;
147
  t[7]=OC_C1S7*_x[1]>>16;
148
  /*Stage 2:*/
149
  t[5]=OC_C4S4*t[4]>>16;
150
  t[6]=OC_C4S4*t[7]>>16;
151
  /*Stage 3:*/
152
  t[1]=t[0]+t[2];
153
  t[2]=t[0]-t[2];
154
  r=t[0]+t[3];
155
  t[3]=t[0]-t[3];
156
  t[0]=r;
157
  r=t[6]+t[5];
158
  t[5]=t[6]-t[5];
159
  t[6]=r;
160
  /*Stage 4:*/
161
  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
162
  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
163
  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
164
  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
165
  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
166
  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
167
  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
168
  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
169
}
170

171
/*Performs an inverse 8 point Type-II DCT transform.
172
  The output is scaled by a factor of 2 relative to the orthonormal version of
173
   the transform.
174
  _y: The buffer to store the result in.
175
      Data will be placed in every 8th entry (e.g., in a column of an 8x8
176
       block).
177
  _x: The input coefficients.
178
      Only the first 2 entries are used.
179
      The other 6 are assumed to be 0.*/
180
static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
181
  ogg_int32_t t[8];
182
  ogg_int32_t r;
183
  /*Stage 1:*/
184
  t[0]=OC_C4S4*_x[0]>>16;
185
  t[4]=OC_C7S1*_x[1]>>16;
186
  t[7]=OC_C1S7*_x[1]>>16;
187
  /*Stage 2:*/
188
  t[5]=OC_C4S4*t[4]>>16;
189
  t[6]=OC_C4S4*t[7]>>16;
190
  /*Stage 3:*/
191
  r=t[6]+t[5];
192
  t[5]=t[6]-t[5];
193
  t[6]=r;
194
  /*Stage 4:*/
195
  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
196
  _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
197
  _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
198
  _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
199
  _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
200
  _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
201
  _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
202
  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
203
}
204

205
/*Performs an inverse 8 point Type-II DCT transform.
206
  The output is scaled by a factor of 2 relative to the orthonormal version of
207
   the transform.
208
  _y: The buffer to store the result in.
209
      Data will be placed in every 8th entry (e.g., in a column of an 8x8
210
       block).
211
  _x: The input coefficients.
212
      Only the first entry is used.
213
      The other 7 are assumed to be 0.*/
214
static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
215
  _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
216
   _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
217
}
218

219
/*Performs an inverse 8x8 Type-II DCT transform.
220
  The input is assumed to be scaled by a factor of 4 relative to orthonormal
221
   version of the transform.
222
  All coefficients but the first 3 in zig-zag scan order are assumed to be 0:
223
   x  x  0  0  0  0  0  0
224
   x  0  0  0  0  0  0  0
225
   0  0  0  0  0  0  0  0
226
   0  0  0  0  0  0  0  0
227
   0  0  0  0  0  0  0  0
228
   0  0  0  0  0  0  0  0
229
   0  0  0  0  0  0  0  0
230
   0  0  0  0  0  0  0  0
231
  _y: The buffer to store the result in.
232
      This may be the same as _x.
233
  _x: The input coefficients.*/
234
static void oc_idct8x8_3(ogg_int16_t _y[64],ogg_int16_t _x[64]){
235
  ogg_int16_t w[64];
236
  int         i;
237
  /*Transform rows of x into columns of w.*/
238
  idct8_2(w,_x);
239
  idct8_1(w+1,_x+8);
240
  /*Transform rows of w into columns of y.*/
241
  for(i=0;i<8;i++)idct8_2(_y+i,w+i*8);
242
  /*Adjust for the scale factor.*/
243
  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
244
  /*Clear input data for next block.*/
245
  _x[0]=_x[1]=_x[8]=0;
246
}
247

248
/*Performs an inverse 8x8 Type-II DCT transform.
249
  The input is assumed to be scaled by a factor of 4 relative to orthonormal
250
   version of the transform.
251
  All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
252
   x  x  x  x  0  0  0  0
253
   x  x  x  0  0  0  0  0
254
   x  x  0  0  0  0  0  0
255
   x  0  0  0  0  0  0  0
256
   0  0  0  0  0  0  0  0
257
   0  0  0  0  0  0  0  0
258
   0  0  0  0  0  0  0  0
259
   0  0  0  0  0  0  0  0
260
  _y: The buffer to store the result in.
261
      This may be the same as _x.
262
  _x: The input coefficients.*/
263
static void oc_idct8x8_10(ogg_int16_t _y[64],ogg_int16_t _x[64]){
264
  ogg_int16_t w[64];
265
  int         i;
266
  /*Transform rows of x into columns of w.*/
267
  idct8_4(w,_x);
268
  idct8_3(w+1,_x+8);
269
  idct8_2(w+2,_x+16);
270
  idct8_1(w+3,_x+24);
271
  /*Transform rows of w into columns of y.*/
272
  for(i=0;i<8;i++)idct8_4(_y+i,w+i*8);
273
  /*Adjust for the scale factor.*/
274
  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
275
  /*Clear input data for next block.*/
276
  _x[0]=_x[1]=_x[2]=_x[3]=_x[8]=_x[9]=_x[10]=_x[16]=_x[17]=_x[24]=0;
277
}
278

279
/*Performs an inverse 8x8 Type-II DCT transform.
280
  The input is assumed to be scaled by a factor of 4 relative to orthonormal
281
   version of the transform.
282
  _y: The buffer to store the result in.
283
      This may be the same as _x.
284
  _x: The input coefficients.*/
285
static void oc_idct8x8_slow(ogg_int16_t _y[64],ogg_int16_t _x[64]){
286
  ogg_int16_t w[64];
287
  int         i;
288
  /*Transform rows of x into columns of w.*/
289
  for(i=0;i<8;i++)idct8(w+i,_x+i*8);
290
  /*Transform rows of w into columns of y.*/
291
  for(i=0;i<8;i++)idct8(_y+i,w+i*8);
292
  /*Adjust for the scale factor.*/
293
  for(i=0;i<64;i++)_y[i]=(ogg_int16_t)(_y[i]+8>>4);
294
  /*Clear input data for next block.*/
295
  for(i=0;i<64;i++)_x[i]=0;
296
}
297

298
/*Performs an inverse 8x8 Type-II DCT transform.
299
  The input is assumed to be scaled by a factor of 4 relative to orthonormal
300
   version of the transform.*/
301
void oc_idct8x8_c(ogg_int16_t _y[64],ogg_int16_t _x[64],int _last_zzi){
302
  /*_last_zzi is subtly different from an actual count of the number of
303
     coefficients we decoded for this block.
304
    It contains the value of zzi BEFORE the final token in the block was
305
     decoded.
306
    In most cases this is an EOB token (the continuation of an EOB run from a
307
     previous block counts), and so this is the same as the coefficient count.
308
    However, in the case that the last token was NOT an EOB token, but filled
309
     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
310
    Provided the last token was not a pure zero run, the minimum value it can
311
     be is 46, and so that doesn't affect any of the cases in this routine.
312
    However, if the last token WAS a pure zero run of length 63, then _last_zzi
313
     will be 1 while the number of coefficients decoded is 64.
314
    Thus, we will trigger the following special case, where the real
315
     coefficient count would not.
316
    Note also that a zero run of length 64 will give _last_zzi a value of 0,
317
     but we still process the DC coefficient, which might have a non-zero value
318
     due to DC prediction.
319
    Although convoluted, this is arguably the correct behavior: it allows us to
320
     use a smaller transform when the block ends with a long zero run instead
321
     of a normal EOB token.
322
    It could be smarter... multiple separate zero runs at the end of a block
323
     will fool it, but an encoder that generates these really deserves what it
324
     gets.
325
    Needless to say we inherited this approach from VP3.*/
326
  /*Then perform the iDCT.*/
327
  if(_last_zzi<=3)oc_idct8x8_3(_y,_x);
328
  else if(_last_zzi<=10)oc_idct8x8_10(_y,_x);
329
  else oc_idct8x8_slow(_y,_x);
330
}
331

332
Product

Resources

Company