Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/dxt.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// Intel License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000, Intel Corporation, all rights reserved.
14
// Third party copyrights are property of their respective owners.
15
//
16
// Redistribution and use in source and binary forms, with or without modification,
17
// are permitted provided that the following conditions are met:
18
//
19
// * Redistribution's of source code must retain the above copyright notice,
20
// this list of conditions and the following disclaimer.
21
//
22
// * Redistribution's in binary form must reproduce the above copyright notice,
23
// this list of conditions and the following disclaimer in the documentation
24
// and/or other materials provided with the distribution.
25
//
26
// * The name of Intel Corporation may not be used to endorse or promote products
27
// derived from this software without specific prior written permission.
28
//
29
// This software is provided by the copyright holders and contributors "as is" and
30
// any express or implied warranties, including, but not limited to, the implied
31
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32
// In no event shall the Intel Corporation or contributors be liable for any direct,
33
// indirect, incidental, special, exemplary, or consequential damages
34
// (including, but not limited to, procurement of substitute goods or services;
35
// loss of use, data, or profits; or business interruption) however caused
36
// and on any theory of liability, whether in contract, strict liability,
37
// or tort (including negligence or otherwise) arising in any way out of
38
// the use of this software, even if advised of the possibility of such damage.
39
//
40
//M*/
41
42
#include "precomp.hpp"
43
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
44
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
45
#include "opencl_kernels_core.hpp"
46
#include <map>
47
48
namespace cv
49
{
50
51
// On Win64 optimized versions of DFT and DCT fail the tests (fixed in VS2010)
52
#if defined _MSC_VER && !defined CV_ICC && defined _M_X64 && _MSC_VER < 1600
53
# pragma optimize("", off)
54
# pragma warning(disable: 4748)
55
#endif
56
57
#if IPP_VERSION_X100 >= 710
58
#define USE_IPP_DFT 1
59
#else
60
#undef USE_IPP_DFT
61
#endif
62
63
/****************************************************************************************\
64
Discrete Fourier Transform
65
\****************************************************************************************/
66
67
#define CV_MAX_LOCAL_DFT_SIZE (1 << 15)
68
69
static unsigned char bitrevTab[] =
70
{
71
0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
72
0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8,
73
0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4,
74
0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc,
75
0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2,
76
0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa,
77
0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6,
78
0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe,
79
0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1,
80
0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9,
81
0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5,
82
0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd,
83
0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3,
84
0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb,
85
0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7,
86
0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
87
};
88
89
static const double DFTTab[][2] =
90
{
91
{ 1.00000000000000000, 0.00000000000000000 },
92
{-1.00000000000000000, 0.00000000000000000 },
93
{ 0.00000000000000000, 1.00000000000000000 },
94
{ 0.70710678118654757, 0.70710678118654746 },
95
{ 0.92387953251128674, 0.38268343236508978 },
96
{ 0.98078528040323043, 0.19509032201612825 },
97
{ 0.99518472667219693, 0.09801714032956060 },
98
{ 0.99879545620517241, 0.04906767432741802 },
99
{ 0.99969881869620425, 0.02454122852291229 },
100
{ 0.99992470183914450, 0.01227153828571993 },
101
{ 0.99998117528260111, 0.00613588464915448 },
102
{ 0.99999529380957619, 0.00306795676296598 },
103
{ 0.99999882345170188, 0.00153398018628477 },
104
{ 0.99999970586288223, 0.00076699031874270 },
105
{ 0.99999992646571789, 0.00038349518757140 },
106
{ 0.99999998161642933, 0.00019174759731070 },
107
{ 0.99999999540410733, 0.00009587379909598 },
108
{ 0.99999999885102686, 0.00004793689960307 },
109
{ 0.99999999971275666, 0.00002396844980842 },
110
{ 0.99999999992818922, 0.00001198422490507 },
111
{ 0.99999999998204725, 0.00000599211245264 },
112
{ 0.99999999999551181, 0.00000299605622633 },
113
{ 0.99999999999887801, 0.00000149802811317 },
114
{ 0.99999999999971945, 0.00000074901405658 },
115
{ 0.99999999999992983, 0.00000037450702829 },
116
{ 0.99999999999998246, 0.00000018725351415 },
117
{ 0.99999999999999567, 0.00000009362675707 },
118
{ 0.99999999999999889, 0.00000004681337854 },
119
{ 0.99999999999999978, 0.00000002340668927 },
120
{ 0.99999999999999989, 0.00000001170334463 },
121
{ 1.00000000000000000, 0.00000000585167232 },
122
{ 1.00000000000000000, 0.00000000292583616 }
123
};
124
125
#define BitRev(i,shift) \
126
((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \
127
((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \
128
((unsigned)bitrevTab[((i)>>16)&255] << 8)+ \
129
((unsigned)bitrevTab[((i)>>24)])) >> (shift)))
130
131
static int
132
DFTFactorize( int n, int* factors )
133
{
134
int nf = 0, f, i, j;
135
136
if( n <= 5 )
137
{
138
factors[0] = n;
139
return 1;
140
}
141
142
f = (((n - 1)^n)+1) >> 1;
143
if( f > 1 )
144
{
145
factors[nf++] = f;
146
n = f == n ? 1 : n/f;
147
}
148
149
for( f = 3; n > 1; )
150
{
151
int d = n/f;
152
if( d*f == n )
153
{
154
factors[nf++] = f;
155
n = d;
156
}
157
else
158
{
159
f += 2;
160
if( f*f > n )
161
break;
162
}
163
}
164
165
if( n > 1 )
166
factors[nf++] = n;
167
168
f = (factors[0] & 1) == 0;
169
for( i = f; i < (nf+f)/2; i++ )
170
CV_SWAP( factors[i], factors[nf-i-1+f], j );
171
172
return nf;
173
}
174
175
static void
176
DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
177
{
178
int digits[34], radix[34];
179
int n = factors[0], m = 0;
180
int* itab0 = itab;
181
int i, j, k;
182
Complex<double> w, w1;
183
double t;
184
185
if( n0 <= 5 )
186
{
187
itab[0] = 0;
188
itab[n0-1] = n0-1;
189
190
if( n0 != 4 )
191
{
192
for( i = 1; i < n0-1; i++ )
193
itab[i] = i;
194
}
195
else
196
{
197
itab[1] = 2;
198
itab[2] = 1;
199
}
200
if( n0 == 5 )
201
{
202
if( elem_size == sizeof(Complex<double>) )
203
((Complex<double>*)_wave)[0] = Complex<double>(1.,0.);
204
else
205
((Complex<float>*)_wave)[0] = Complex<float>(1.f,0.f);
206
}
207
if( n0 != 4 )
208
return;
209
m = 2;
210
}
211
else
212
{
213
// radix[] is initialized from index 'nf' down to zero
214
assert (nf < 34);
215
radix[nf] = 1;
216
digits[nf] = 0;
217
for( i = 0; i < nf; i++ )
218
{
219
digits[i] = 0;
220
radix[nf-i-1] = radix[nf-i]*factors[nf-i-1];
221
}
222
223
if( inv_itab && factors[0] != factors[nf-1] )
224
itab = (int*)_wave;
225
226
if( (n & 1) == 0 )
227
{
228
int a = radix[1], na2 = n*a>>1, na4 = na2 >> 1;
229
for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
230
;
231
if( n <= 2 )
232
{
233
itab[0] = 0;
234
itab[1] = na2;
235
}
236
else if( n <= 256 )
237
{
238
int shift = 10 - m;
239
for( i = 0; i <= n - 4; i += 4 )
240
{
241
j = (bitrevTab[i>>2]>>shift)*a;
242
itab[i] = j;
243
itab[i+1] = j + na2;
244
itab[i+2] = j + na4;
245
itab[i+3] = j + na2 + na4;
246
}
247
}
248
else
249
{
250
int shift = 34 - m;
251
for( i = 0; i < n; i += 4 )
252
{
253
int i4 = i >> 2;
254
j = BitRev(i4,shift)*a;
255
itab[i] = j;
256
itab[i+1] = j + na2;
257
itab[i+2] = j + na4;
258
itab[i+3] = j + na2 + na4;
259
}
260
}
261
262
digits[1]++;
263
264
if( nf >= 2 )
265
{
266
for( i = n, j = radix[2]; i < n0; )
267
{
268
for( k = 0; k < n; k++ )
269
itab[i+k] = itab[k] + j;
270
if( (i += n) >= n0 )
271
break;
272
j += radix[2];
273
for( k = 1; ++digits[k] >= factors[k]; k++ )
274
{
275
digits[k] = 0;
276
j += radix[k+2] - radix[k];
277
}
278
}
279
}
280
}
281
else
282
{
283
for( i = 0, j = 0;; )
284
{
285
itab[i] = j;
286
if( ++i >= n0 )
287
break;
288
j += radix[1];
289
for( k = 0; ++digits[k] >= factors[k]; k++ )
290
{
291
digits[k] = 0;
292
j += radix[k+2] - radix[k];
293
}
294
}
295
}
296
297
if( itab != itab0 )
298
{
299
itab0[0] = 0;
300
for( i = n0 & 1; i < n0; i += 2 )
301
{
302
int k0 = itab[i];
303
int k1 = itab[i+1];
304
itab0[k0] = i;
305
itab0[k1] = i+1;
306
}
307
}
308
}
309
310
if( (n0 & (n0-1)) == 0 )
311
{
312
w.re = w1.re = DFTTab[m][0];
313
w.im = w1.im = -DFTTab[m][1];
314
}
315
else
316
{
317
t = -CV_PI*2/n0;
318
w.im = w1.im = sin(t);
319
w.re = w1.re = std::sqrt(1. - w1.im*w1.im);
320
}
321
n = (n0+1)/2;
322
323
if( elem_size == sizeof(Complex<double>) )
324
{
325
Complex<double>* wave = (Complex<double>*)_wave;
326
327
wave[0].re = 1.;
328
wave[0].im = 0.;
329
330
if( (n0 & 1) == 0 )
331
{
332
wave[n].re = -1.;
333
wave[n].im = 0;
334
}
335
336
for( i = 1; i < n; i++ )
337
{
338
wave[i] = w;
339
wave[n0-i].re = w.re;
340
wave[n0-i].im = -w.im;
341
342
t = w.re*w1.re - w.im*w1.im;
343
w.im = w.re*w1.im + w.im*w1.re;
344
w.re = t;
345
}
346
}
347
else
348
{
349
Complex<float>* wave = (Complex<float>*)_wave;
350
assert( elem_size == sizeof(Complex<float>) );
351
352
wave[0].re = 1.f;
353
wave[0].im = 0.f;
354
355
if( (n0 & 1) == 0 )
356
{
357
wave[n].re = -1.f;
358
wave[n].im = 0.f;
359
}
360
361
for( i = 1; i < n; i++ )
362
{
363
wave[i].re = (float)w.re;
364
wave[i].im = (float)w.im;
365
wave[n0-i].re = (float)w.re;
366
wave[n0-i].im = (float)-w.im;
367
368
t = w.re*w1.re - w.im*w1.im;
369
w.im = w.re*w1.im + w.im*w1.re;
370
w.re = t;
371
}
372
}
373
}
374
375
template<typename T> struct DFT_VecR4
376
{
377
int operator()(Complex<T>*, int, int, int&, const Complex<T>*) const { return 1; }
378
};
379
380
#if CV_SSE3
381
382
// optimized radix-4 transform
383
template<> struct DFT_VecR4<float>
384
{
385
int operator()(Complex<float>* dst, int N, int n0, int& _dw0, const Complex<float>* wave) const
386
{
387
int n = 1, i, j, nx, dw, dw0 = _dw0;
388
__m128 z = _mm_setzero_ps(), x02=z, x13=z, w01=z, w23=z, y01, y23, t0, t1;
389
Cv32suf t; t.i = 0x80000000;
390
__m128 neg0_mask = _mm_load_ss(&t.f);
391
__m128 neg3_mask = _mm_shuffle_ps(neg0_mask, neg0_mask, _MM_SHUFFLE(0,1,2,3));
392
393
for( ; n*4 <= N; )
394
{
395
nx = n;
396
n *= 4;
397
dw0 /= 4;
398
399
for( i = 0; i < n0; i += n )
400
{
401
Complexf *v0, *v1;
402
403
v0 = dst + i;
404
v1 = v0 + nx*2;
405
406
x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
407
x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
408
x02 = _mm_loadh_pi(x02, (const __m64*)&v1[0]);
409
x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]);
410
411
y01 = _mm_add_ps(x02, x13);
412
y23 = _mm_sub_ps(x02, x13);
413
t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
414
t0 = _mm_movelh_ps(y01, y23);
415
y01 = _mm_add_ps(t0, t1);
416
y23 = _mm_sub_ps(t0, t1);
417
418
_mm_storel_pi((__m64*)&v0[0], y01);
419
_mm_storeh_pi((__m64*)&v0[nx], y01);
420
_mm_storel_pi((__m64*)&v1[0], y23);
421
_mm_storeh_pi((__m64*)&v1[nx], y23);
422
423
for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
424
{
425
v0 = dst + i + j;
426
v1 = v0 + nx*2;
427
428
x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
429
w23 = _mm_loadl_pi(w23, (const __m64*)&wave[dw*2]);
430
x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]); // x1, x3 = r1 i1 r3 i3
431
w23 = _mm_loadh_pi(w23, (const __m64*)&wave[dw*3]); // w2, w3 = wr2 wi2 wr3 wi3
432
433
t0 = _mm_mul_ps(_mm_moveldup_ps(x13), w23);
434
t1 = _mm_mul_ps(_mm_movehdup_ps(x13), _mm_shuffle_ps(w23, w23, _MM_SHUFFLE(2,3,0,1)));
435
x13 = _mm_addsub_ps(t0, t1);
436
// re(x1*w2), im(x1*w2), re(x3*w3), im(x3*w3)
437
x02 = _mm_loadl_pi(x02, (const __m64*)&v1[0]); // x2 = r2 i2
438
w01 = _mm_loadl_pi(w01, (const __m64*)&wave[dw]); // w1 = wr1 wi1
439
x02 = _mm_shuffle_ps(x02, x02, _MM_SHUFFLE(0,0,1,1));
440
w01 = _mm_shuffle_ps(w01, w01, _MM_SHUFFLE(1,0,0,1));
441
x02 = _mm_mul_ps(x02, w01);
442
x02 = _mm_addsub_ps(x02, _mm_movelh_ps(x02, x02));
443
// re(x0) im(x0) re(x2*w1), im(x2*w1)
444
x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
445
446
y01 = _mm_add_ps(x02, x13);
447
y23 = _mm_sub_ps(x02, x13);
448
t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
449
t0 = _mm_movelh_ps(y01, y23);
450
y01 = _mm_add_ps(t0, t1);
451
y23 = _mm_sub_ps(t0, t1);
452
453
_mm_storel_pi((__m64*)&v0[0], y01);
454
_mm_storeh_pi((__m64*)&v0[nx], y01);
455
_mm_storel_pi((__m64*)&v1[0], y23);
456
_mm_storeh_pi((__m64*)&v1[nx], y23);
457
}
458
}
459
}
460
461
_dw0 = dw0;
462
return n;
463
}
464
};
465
466
#endif
467
468
#ifdef USE_IPP_DFT
469
static IppStatus ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
470
const void* spec, uchar* buf)
471
{
472
return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_32fc, (const Ipp32fc*)src, (Ipp32fc*)dst,
473
(const IppsDFTSpec_C_32fc*)spec, buf);
474
}
475
476
static IppStatus ippsDFTFwd_CToC( const Complex<double>* src, Complex<double>* dst,
477
const void* spec, uchar* buf)
478
{
479
return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_CToC_64fc, (const Ipp64fc*)src, (Ipp64fc*)dst,
480
(const IppsDFTSpec_C_64fc*)spec, buf);
481
}
482
483
static IppStatus ippsDFTInv_CToC( const Complex<float>* src, Complex<float>* dst,
484
const void* spec, uchar* buf)
485
{
486
return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_32fc, (const Ipp32fc*)src, (Ipp32fc*)dst,
487
(const IppsDFTSpec_C_32fc*)spec, buf);
488
}
489
490
static IppStatus ippsDFTInv_CToC( const Complex<double>* src, Complex<double>* dst,
491
const void* spec, uchar* buf)
492
{
493
return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_CToC_64fc, (const Ipp64fc*)src, (Ipp64fc*)dst,
494
(const IppsDFTSpec_C_64fc*)spec, buf);
495
}
496
497
static IppStatus ippsDFTFwd_RToPack( const float* src, float* dst,
498
const void* spec, uchar* buf)
499
{
500
return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
501
}
502
503
static IppStatus ippsDFTFwd_RToPack( const double* src, double* dst,
504
const void* spec, uchar* buf)
505
{
506
return CV_INSTRUMENT_FUN_IPP(ippsDFTFwd_RToPack_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
507
}
508
509
static IppStatus ippsDFTInv_PackToR( const float* src, float* dst,
510
const void* spec, uchar* buf)
511
{
512
return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_32f, src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
513
}
514
515
static IppStatus ippsDFTInv_PackToR( const double* src, double* dst,
516
const void* spec, uchar* buf)
517
{
518
return CV_INSTRUMENT_FUN_IPP(ippsDFTInv_PackToR_64f, src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
519
}
520
#endif
521
522
struct OcvDftOptions;
523
524
typedef void (*DFTFunc)(const OcvDftOptions & c, const void* src, void* dst);
525
526
struct OcvDftOptions {
527
int nf;
528
int *factors;
529
double scale;
530
531
int* itab;
532
void* wave;
533
int tab_size;
534
int n;
535
536
bool isInverse;
537
bool noPermute;
538
bool isComplex;
539
540
bool haveSSE3;
541
542
DFTFunc dft_func;
543
bool useIpp;
544
545
#ifdef USE_IPP_DFT
546
uchar* ipp_spec;
547
uchar* ipp_work;
548
#endif
549
550
OcvDftOptions()
551
{
552
nf = 0;
553
factors = 0;
554
scale = 0;
555
itab = 0;
556
wave = 0;
557
tab_size = 0;
558
n = 0;
559
isInverse = false;
560
noPermute = false;
561
isComplex = false;
562
useIpp = false;
563
#ifdef USE_IPP_DFT
564
ipp_spec = 0;
565
ipp_work = 0;
566
#endif
567
dft_func = 0;
568
haveSSE3 = checkHardwareSupport(CV_CPU_SSE3);
569
}
570
};
571
572
// mixed-radix complex discrete Fourier transform: double-precision version
573
template<typename T> static void
574
DFT(const OcvDftOptions & c, const Complex<T>* src, Complex<T>* dst)
575
{
576
static const T sin_120 = (T)0.86602540378443864676372317075294;
577
static const T fft5_2 = (T)0.559016994374947424102293417182819;
578
static const T fft5_3 = (T)-0.951056516295153572116439333379382;
579
static const T fft5_4 = (T)-1.538841768587626701285145288018455;
580
static const T fft5_5 = (T)0.363271264002680442947733378740309;
581
582
const Complex<T>* wave = (Complex<T>*)c.wave;
583
const int * itab = c.itab;
584
585
int n = c.n;
586
int f_idx, nx;
587
int inv = c.isInverse;
588
int dw0 = c.tab_size, dw;
589
int i, j, k;
590
Complex<T> t;
591
T scale = (T)c.scale;
592
593
if( c.useIpp )
594
{
595
#ifdef USE_IPP_DFT
596
if( !inv )
597
{
598
if (ippsDFTFwd_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
599
{
600
CV_IMPL_ADD(CV_IMPL_IPP);
601
return;
602
}
603
}
604
else
605
{
606
if (ippsDFTInv_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0)
607
{
608
CV_IMPL_ADD(CV_IMPL_IPP);
609
return;
610
}
611
}
612
setIppErrorStatus();
613
#endif
614
}
615
616
int tab_step = c.tab_size == n ? 1 : c.tab_size == n*2 ? 2 : c.tab_size/n;
617
618
// 0. shuffle data
619
if( dst != src )
620
{
621
assert( !c.noPermute );
622
if( !inv )
623
{
624
for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
625
{
626
int k0 = itab[0], k1 = itab[tab_step];
627
assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
628
dst[i] = src[k0]; dst[i+1] = src[k1];
629
}
630
631
if( i < n )
632
dst[n-1] = src[n-1];
633
}
634
else
635
{
636
for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
637
{
638
int k0 = itab[0], k1 = itab[tab_step];
639
assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
640
t.re = src[k0].re; t.im = -src[k0].im;
641
dst[i] = t;
642
t.re = src[k1].re; t.im = -src[k1].im;
643
dst[i+1] = t;
644
}
645
646
if( i < n )
647
{
648
t.re = src[n-1].re; t.im = -src[n-1].im;
649
dst[i] = t;
650
}
651
}
652
}
653
else
654
{
655
if( !c.noPermute )
656
{
657
CV_Assert( c.factors[0] == c.factors[c.nf-1] );
658
if( c.nf == 1 )
659
{
660
if( (n & 3) == 0 )
661
{
662
int n2 = n/2;
663
Complex<T>* dsth = dst + n2;
664
665
for( i = 0; i < n2; i += 2, itab += tab_step*2 )
666
{
667
j = itab[0];
668
assert( (unsigned)j < (unsigned)n2 );
669
670
CV_SWAP(dst[i+1], dsth[j], t);
671
if( j > i )
672
{
673
CV_SWAP(dst[i], dst[j], t);
674
CV_SWAP(dsth[i+1], dsth[j+1], t);
675
}
676
}
677
}
678
// else do nothing
679
}
680
else
681
{
682
for( i = 0; i < n; i++, itab += tab_step )
683
{
684
j = itab[0];
685
assert( (unsigned)j < (unsigned)n );
686
if( j > i )
687
CV_SWAP(dst[i], dst[j], t);
688
}
689
}
690
}
691
692
if( inv )
693
{
694
for( i = 0; i <= n - 2; i += 2 )
695
{
696
T t0 = -dst[i].im;
697
T t1 = -dst[i+1].im;
698
dst[i].im = t0; dst[i+1].im = t1;
699
}
700
701
if( i < n )
702
dst[n-1].im = -dst[n-1].im;
703
}
704
}
705
706
n = 1;
707
// 1. power-2 transforms
708
if( (c.factors[0] & 1) == 0 )
709
{
710
if( c.factors[0] >= 4 && c.haveSSE3)
711
{
712
DFT_VecR4<T> vr4;
713
n = vr4(dst, c.factors[0], c.n, dw0, wave);
714
}
715
716
// radix-4 transform
717
for( ; n*4 <= c.factors[0]; )
718
{
719
nx = n;
720
n *= 4;
721
dw0 /= 4;
722
723
for( i = 0; i < c.n; i += n )
724
{
725
Complex<T> *v0, *v1;
726
T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
727
728
v0 = dst + i;
729
v1 = v0 + nx*2;
730
731
r0 = v1[0].re; i0 = v1[0].im;
732
r4 = v1[nx].re; i4 = v1[nx].im;
733
734
r1 = r0 + r4; i1 = i0 + i4;
735
r3 = i0 - i4; i3 = r4 - r0;
736
737
r2 = v0[0].re; i2 = v0[0].im;
738
r4 = v0[nx].re; i4 = v0[nx].im;
739
740
r0 = r2 + r4; i0 = i2 + i4;
741
r2 -= r4; i2 -= i4;
742
743
v0[0].re = r0 + r1; v0[0].im = i0 + i1;
744
v1[0].re = r0 - r1; v1[0].im = i0 - i1;
745
v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
746
v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
747
748
for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
749
{
750
v0 = dst + i + j;
751
v1 = v0 + nx*2;
752
753
r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im;
754
i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re;
755
r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re;
756
i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im;
757
r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
758
i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
759
760
r1 = i0 + i3; i1 = r0 + r3;
761
r3 = r0 - r3; i3 = i3 - i0;
762
r4 = v0[0].re; i4 = v0[0].im;
763
764
r0 = r4 + r2; i0 = i4 + i2;
765
r2 = r4 - r2; i2 = i4 - i2;
766
767
v0[0].re = r0 + r1; v0[0].im = i0 + i1;
768
v1[0].re = r0 - r1; v1[0].im = i0 - i1;
769
v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
770
v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
771
}
772
}
773
}
774
775
for( ; n < c.factors[0]; )
776
{
777
// do the remaining radix-2 transform
778
nx = n;
779
n *= 2;
780
dw0 /= 2;
781
782
for( i = 0; i < c.n; i += n )
783
{
784
Complex<T>* v = dst + i;
785
T r0 = v[0].re + v[nx].re;
786
T i0 = v[0].im + v[nx].im;
787
T r1 = v[0].re - v[nx].re;
788
T i1 = v[0].im - v[nx].im;
789
v[0].re = r0; v[0].im = i0;
790
v[nx].re = r1; v[nx].im = i1;
791
792
for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
793
{
794
v = dst + i + j;
795
r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
796
i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
797
r0 = v[0].re; i0 = v[0].im;
798
799
v[0].re = r0 + r1; v[0].im = i0 + i1;
800
v[nx].re = r0 - r1; v[nx].im = i0 - i1;
801
}
802
}
803
}
804
}
805
806
// 2. all the other transforms
807
for( f_idx = (c.factors[0]&1) ? 0 : 1; f_idx < c.nf; f_idx++ )
808
{
809
int factor = c.factors[f_idx];
810
nx = n;
811
n *= factor;
812
dw0 /= factor;
813
814
if( factor == 3 )
815
{
816
// radix-3
817
for( i = 0; i < c.n; i += n )
818
{
819
Complex<T>* v = dst + i;
820
821
T r1 = v[nx].re + v[nx*2].re;
822
T i1 = v[nx].im + v[nx*2].im;
823
T r0 = v[0].re;
824
T i0 = v[0].im;
825
T r2 = sin_120*(v[nx].im - v[nx*2].im);
826
T i2 = sin_120*(v[nx*2].re - v[nx].re);
827
v[0].re = r0 + r1; v[0].im = i0 + i1;
828
r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
829
v[nx].re = r0 + r2; v[nx].im = i0 + i2;
830
v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
831
832
for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
833
{
834
v = dst + i + j;
835
r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
836
i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
837
i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
838
r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
839
r1 = r0 + i2; i1 = i0 + r2;
840
841
r2 = sin_120*(i0 - r2); i2 = sin_120*(i2 - r0);
842
r0 = v[0].re; i0 = v[0].im;
843
v[0].re = r0 + r1; v[0].im = i0 + i1;
844
r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
845
v[nx].re = r0 + r2; v[nx].im = i0 + i2;
846
v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
847
}
848
}
849
}
850
else if( factor == 5 )
851
{
852
// radix-5
853
for( i = 0; i < c.n; i += n )
854
{
855
for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
856
{
857
Complex<T>* v0 = dst + i + j;
858
Complex<T>* v1 = v0 + nx*2;
859
Complex<T>* v2 = v1 + nx*2;
860
861
T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
862
863
r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
864
i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
865
r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
866
i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
867
868
r1 = r3 + r2; i1 = i3 + i2;
869
r3 -= r2; i3 -= i2;
870
871
r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
872
i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
873
r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
874
i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
875
876
r2 = r4 + r0; i2 = i4 + i0;
877
r4 -= r0; i4 -= i0;
878
879
r0 = v0[0].re; i0 = v0[0].im;
880
r5 = r1 + r2; i5 = i1 + i2;
881
882
v0[0].re = r0 + r5; v0[0].im = i0 + i5;
883
884
r0 -= (T)0.25*r5; i0 -= (T)0.25*i5;
885
r1 = fft5_2*(r1 - r2); i1 = fft5_2*(i1 - i2);
886
r2 = -fft5_3*(i3 + i4); i2 = fft5_3*(r3 + r4);
887
888
i3 *= -fft5_5; r3 *= fft5_5;
889
i4 *= -fft5_4; r4 *= fft5_4;
890
891
r5 = r2 + i3; i5 = i2 + r3;
892
r2 -= i4; i2 -= r4;
893
894
r3 = r0 + r1; i3 = i0 + i1;
895
r0 -= r1; i0 -= i1;
896
897
v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
898
v2[0].re = r3 - r2; v2[0].im = i3 - i2;
899
900
v1[0].re = r0 + r5; v1[0].im = i0 + i5;
901
v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
902
}
903
}
904
}
905
else
906
{
907
// radix-"factor" - an odd number
908
int p, q, factor2 = (factor - 1)/2;
909
int d, dd, dw_f = c.tab_size/factor;
910
AutoBuffer<Complex<T> > buf(factor2 * 2);
911
Complex<T>* a = buf.data();
912
Complex<T>* b = a + factor2;
913
914
for( i = 0; i < c.n; i += n )
915
{
916
for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
917
{
918
Complex<T>* v = dst + i + j;
919
Complex<T> v_0 = v[0];
920
Complex<T> vn_0 = v_0;
921
922
if( j == 0 )
923
{
924
for( p = 1, k = nx; p <= factor2; p++, k += nx )
925
{
926
T r0 = v[k].re + v[n-k].re;
927
T i0 = v[k].im - v[n-k].im;
928
T r1 = v[k].re - v[n-k].re;
929
T i1 = v[k].im + v[n-k].im;
930
931
vn_0.re += r0; vn_0.im += i1;
932
a[p-1].re = r0; a[p-1].im = i0;
933
b[p-1].re = r1; b[p-1].im = i1;
934
}
935
}
936
else
937
{
938
const Complex<T>* wave_ = wave + dw*factor;
939
d = dw;
940
941
for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw )
942
{
943
T r2 = v[k].re*wave[d].re - v[k].im*wave[d].im;
944
T i2 = v[k].re*wave[d].im + v[k].im*wave[d].re;
945
946
T r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im;
947
T i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re;
948
949
T r0 = r2 + r1;
950
T i0 = i2 - i1;
951
r1 = r2 - r1;
952
i1 = i2 + i1;
953
954
vn_0.re += r0; vn_0.im += i1;
955
a[p-1].re = r0; a[p-1].im = i0;
956
b[p-1].re = r1; b[p-1].im = i1;
957
}
958
}
959
960
v[0] = vn_0;
961
962
for( p = 1, k = nx; p <= factor2; p++, k += nx )
963
{
964
Complex<T> s0 = v_0, s1 = v_0;
965
d = dd = dw_f*p;
966
967
for( q = 0; q < factor2; q++ )
968
{
969
T r0 = wave[d].re * a[q].re;
970
T i0 = wave[d].im * a[q].im;
971
T r1 = wave[d].re * b[q].im;
972
T i1 = wave[d].im * b[q].re;
973
974
s1.re += r0 + i0; s0.re += r0 - i0;
975
s1.im += r1 - i1; s0.im += r1 + i1;
976
977
d += dd;
978
d -= -(d >= c.tab_size) & c.tab_size;
979
}
980
981
v[k] = s0;
982
v[n-k] = s1;
983
}
984
}
985
}
986
}
987
}
988
989
if( scale != 1 )
990
{
991
T re_scale = scale, im_scale = scale;
992
if( inv )
993
im_scale = -im_scale;
994
995
for( i = 0; i < c.n; i++ )
996
{
997
T t0 = dst[i].re*re_scale;
998
T t1 = dst[i].im*im_scale;
999
dst[i].re = t0;
1000
dst[i].im = t1;
1001
}
1002
}
1003
else if( inv )
1004
{
1005
for( i = 0; i <= c.n - 2; i += 2 )
1006
{
1007
T t0 = -dst[i].im;
1008
T t1 = -dst[i+1].im;
1009
dst[i].im = t0;
1010
dst[i+1].im = t1;
1011
}
1012
1013
if( i < c.n )
1014
dst[c.n-1].im = -dst[c.n-1].im;
1015
}
1016
}
1017
1018
1019
/* FFT of real vector
1020
output vector format:
1021
re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
1022
re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
1023
template<typename T> static void
1024
RealDFT(const OcvDftOptions & c, const T* src, T* dst)
1025
{
1026
int n = c.n;
1027
int complex_output = c.isComplex;
1028
T scale = (T)c.scale;
1029
int j;
1030
dst += complex_output;
1031
1032
if( c.useIpp )
1033
{
1034
#ifdef USE_IPP_DFT
1035
if (ippsDFTFwd_RToPack( src, dst, c.ipp_spec, c.ipp_work ) >=0)
1036
{
1037
if( complex_output )
1038
{
1039
dst[-1] = dst[0];
1040
dst[0] = 0;
1041
if( (n & 1) == 0 )
1042
dst[n] = 0;
1043
}
1044
CV_IMPL_ADD(CV_IMPL_IPP);
1045
return;
1046
}
1047
setIppErrorStatus();
1048
#endif
1049
}
1050
assert( c.tab_size == n );
1051
1052
if( n == 1 )
1053
{
1054
dst[0] = src[0]*scale;
1055
}
1056
else if( n == 2 )
1057
{
1058
T t = (src[0] + src[1])*scale;
1059
dst[1] = (src[0] - src[1])*scale;
1060
dst[0] = t;
1061
}
1062
else if( n & 1 )
1063
{
1064
dst -= complex_output;
1065
Complex<T>* _dst = (Complex<T>*)dst;
1066
_dst[0].re = src[0]*scale;
1067
_dst[0].im = 0;
1068
for( j = 1; j < n; j += 2 )
1069
{
1070
T t0 = src[c.itab[j]]*scale;
1071
T t1 = src[c.itab[j+1]]*scale;
1072
_dst[j].re = t0;
1073
_dst[j].im = 0;
1074
_dst[j+1].re = t1;
1075
_dst[j+1].im = 0;
1076
}
1077
OcvDftOptions sub_c = c;
1078
sub_c.isComplex = false;
1079
sub_c.isInverse = false;
1080
sub_c.noPermute = true;
1081
sub_c.scale = 1.;
1082
DFT(sub_c, _dst, _dst);
1083
if( !complex_output )
1084
dst[1] = dst[0];
1085
}
1086
else
1087
{
1088
T t0, t;
1089
T h1_re, h1_im, h2_re, h2_im;
1090
T scale2 = scale*(T)0.5;
1091
int n2 = n >> 1;
1092
1093
c.factors[0] >>= 1;
1094
1095
OcvDftOptions sub_c = c;
1096
sub_c.factors += (c.factors[0] == 1);
1097
sub_c.nf -= (c.factors[0] == 1);
1098
sub_c.isComplex = false;
1099
sub_c.isInverse = false;
1100
sub_c.noPermute = false;
1101
sub_c.scale = 1.;
1102
sub_c.n = n2;
1103
1104
DFT(sub_c, (Complex<T>*)src, (Complex<T>*)dst);
1105
1106
c.factors[0] <<= 1;
1107
1108
t = dst[0] - dst[1];
1109
dst[0] = (dst[0] + dst[1])*scale;
1110
dst[1] = t*scale;
1111
1112
t0 = dst[n2];
1113
t = dst[n-1];
1114
dst[n-1] = dst[1];
1115
1116
const Complex<T> *wave = (const Complex<T>*)c.wave;
1117
1118
for( j = 2, wave++; j < n2; j += 2, wave++ )
1119
{
1120
/* calc odd */
1121
h2_re = scale2*(dst[j+1] + t);
1122
h2_im = scale2*(dst[n-j] - dst[j]);
1123
1124
/* calc even */
1125
h1_re = scale2*(dst[j] + dst[n-j]);
1126
h1_im = scale2*(dst[j+1] - t);
1127
1128
/* rotate */
1129
t = h2_re*wave->re - h2_im*wave->im;
1130
h2_im = h2_re*wave->im + h2_im*wave->re;
1131
h2_re = t;
1132
t = dst[n-j-1];
1133
1134
dst[j-1] = h1_re + h2_re;
1135
dst[n-j-1] = h1_re - h2_re;
1136
dst[j] = h1_im + h2_im;
1137
dst[n-j] = h2_im - h1_im;
1138
}
1139
1140
if( j <= n2 )
1141
{
1142
dst[n2-1] = t0*scale;
1143
dst[n2] = -t*scale;
1144
}
1145
}
1146
1147
if( complex_output && ((n & 1) == 0 || n == 1))
1148
{
1149
dst[-1] = dst[0];
1150
dst[0] = 0;
1151
if( n > 1 )
1152
dst[n] = 0;
1153
}
1154
}
1155
1156
/* Inverse FFT of complex conjugate-symmetric vector
1157
input vector format:
1158
re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
1159
re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
1160
template<typename T> static void
1161
CCSIDFT(const OcvDftOptions & c, const T* src, T* dst)
1162
{
1163
int n = c.n;
1164
int complex_input = c.isComplex;
1165
int j, k;
1166
T scale = (T)c.scale;
1167
T save_s1 = 0.;
1168
T t0, t1, t2, t3, t;
1169
1170
assert( c.tab_size == n );
1171
1172
if( complex_input )
1173
{
1174
assert( src != dst );
1175
save_s1 = src[1];
1176
((T*)src)[1] = src[0];
1177
src++;
1178
}
1179
if( c.useIpp )
1180
{
1181
#ifdef USE_IPP_DFT
1182
if (ippsDFTInv_PackToR( src, dst, c.ipp_spec, c.ipp_work ) >=0)
1183
{
1184
if( complex_input )
1185
((T*)src)[0] = (T)save_s1;
1186
CV_IMPL_ADD(CV_IMPL_IPP);
1187
return;
1188
}
1189
1190
setIppErrorStatus();
1191
#endif
1192
}
1193
if( n == 1 )
1194
{
1195
dst[0] = (T)(src[0]*scale);
1196
}
1197
else if( n == 2 )
1198
{
1199
t = (src[0] + src[1])*scale;
1200
dst[1] = (src[0] - src[1])*scale;
1201
dst[0] = t;
1202
}
1203
else if( n & 1 )
1204
{
1205
Complex<T>* _src = (Complex<T>*)(src-1);
1206
Complex<T>* _dst = (Complex<T>*)dst;
1207
1208
_dst[0].re = src[0];
1209
_dst[0].im = 0;
1210
1211
int n2 = (n+1) >> 1;
1212
1213
for( j = 1; j < n2; j++ )
1214
{
1215
int k0 = c.itab[j], k1 = c.itab[n-j];
1216
t0 = _src[j].re; t1 = _src[j].im;
1217
_dst[k0].re = t0; _dst[k0].im = -t1;
1218
_dst[k1].re = t0; _dst[k1].im = t1;
1219
}
1220
1221
OcvDftOptions sub_c = c;
1222
sub_c.isComplex = false;
1223
sub_c.isInverse = false;
1224
sub_c.noPermute = true;
1225
sub_c.scale = 1.;
1226
sub_c.n = n;
1227
1228
DFT(sub_c, _dst, _dst);
1229
dst[0] *= scale;
1230
for( j = 1; j < n; j += 2 )
1231
{
1232
t0 = dst[j*2]*scale;
1233
t1 = dst[j*2+2]*scale;
1234
dst[j] = t0;
1235
dst[j+1] = t1;
1236
}
1237
}
1238
else
1239
{
1240
int inplace = src == dst;
1241
const Complex<T>* w = (const Complex<T>*)c.wave;
1242
1243
t = src[1];
1244
t0 = (src[0] + src[n-1]);
1245
t1 = (src[n-1] - src[0]);
1246
dst[0] = t0;
1247
dst[1] = t1;
1248
1249
int n2 = (n+1) >> 1;
1250
1251
for( j = 2, w++; j < n2; j += 2, w++ )
1252
{
1253
T h1_re, h1_im, h2_re, h2_im;
1254
1255
h1_re = (t + src[n-j-1]);
1256
h1_im = (src[j] - src[n-j]);
1257
1258
h2_re = (t - src[n-j-1]);
1259
h2_im = (src[j] + src[n-j]);
1260
1261
t = h2_re*w->re + h2_im*w->im;
1262
h2_im = h2_im*w->re - h2_re*w->im;
1263
h2_re = t;
1264
1265
t = src[j+1];
1266
t0 = h1_re - h2_im;
1267
t1 = -h1_im - h2_re;
1268
t2 = h1_re + h2_im;
1269
t3 = h1_im - h2_re;
1270
1271
if( inplace )
1272
{
1273
dst[j] = t0;
1274
dst[j+1] = t1;
1275
dst[n-j] = t2;
1276
dst[n-j+1]= t3;
1277
}
1278
else
1279
{
1280
int j2 = j >> 1;
1281
k = c.itab[j2];
1282
dst[k] = t0;
1283
dst[k+1] = t1;
1284
k = c.itab[n2-j2];
1285
dst[k] = t2;
1286
dst[k+1]= t3;
1287
}
1288
}
1289
1290
if( j <= n2 )
1291
{
1292
t0 = t*2;
1293
t1 = src[n2]*2;
1294
1295
if( inplace )
1296
{
1297
dst[n2] = t0;
1298
dst[n2+1] = t1;
1299
}
1300
else
1301
{
1302
k = c.itab[n2];
1303
dst[k*2] = t0;
1304
dst[k*2+1] = t1;
1305
}
1306
}
1307
1308
c.factors[0] >>= 1;
1309
1310
OcvDftOptions sub_c = c;
1311
sub_c.factors += (c.factors[0] == 1);
1312
sub_c.nf -= (c.factors[0] == 1);
1313
sub_c.isComplex = false;
1314
sub_c.isInverse = false;
1315
sub_c.noPermute = !inplace;
1316
sub_c.scale = 1.;
1317
sub_c.n = n2;
1318
1319
DFT(sub_c, (Complex<T>*)dst, (Complex<T>*)dst);
1320
1321
c.factors[0] <<= 1;
1322
1323
for( j = 0; j < n; j += 2 )
1324
{
1325
t0 = dst[j]*scale;
1326
t1 = dst[j+1]*(-scale);
1327
dst[j] = t0;
1328
dst[j+1] = t1;
1329
}
1330
}
1331
if( complex_input )
1332
((T*)src)[0] = (T)save_s1;
1333
}
1334
1335
static void
1336
CopyColumn( const uchar* _src, size_t src_step,
1337
uchar* _dst, size_t dst_step,
1338
int len, size_t elem_size )
1339
{
1340
int i, t0, t1;
1341
const int* src = (const int*)_src;
1342
int* dst = (int*)_dst;
1343
src_step /= sizeof(src[0]);
1344
dst_step /= sizeof(dst[0]);
1345
1346
if( elem_size == sizeof(int) )
1347
{
1348
for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1349
dst[0] = src[0];
1350
}
1351
else if( elem_size == sizeof(int)*2 )
1352
{
1353
for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1354
{
1355
t0 = src[0]; t1 = src[1];
1356
dst[0] = t0; dst[1] = t1;
1357
}
1358
}
1359
else if( elem_size == sizeof(int)*4 )
1360
{
1361
for( i = 0; i < len; i++, src += src_step, dst += dst_step )
1362
{
1363
t0 = src[0]; t1 = src[1];
1364
dst[0] = t0; dst[1] = t1;
1365
t0 = src[2]; t1 = src[3];
1366
dst[2] = t0; dst[3] = t1;
1367
}
1368
}
1369
}
1370
1371
1372
static void
1373
CopyFrom2Columns( const uchar* _src, size_t src_step,
1374
uchar* _dst0, uchar* _dst1,
1375
int len, size_t elem_size )
1376
{
1377
int i, t0, t1;
1378
const int* src = (const int*)_src;
1379
int* dst0 = (int*)_dst0;
1380
int* dst1 = (int*)_dst1;
1381
src_step /= sizeof(src[0]);
1382
1383
if( elem_size == sizeof(int) )
1384
{
1385
for( i = 0; i < len; i++, src += src_step )
1386
{
1387
t0 = src[0]; t1 = src[1];
1388
dst0[i] = t0; dst1[i] = t1;
1389
}
1390
}
1391
else if( elem_size == sizeof(int)*2 )
1392
{
1393
for( i = 0; i < len*2; i += 2, src += src_step )
1394
{
1395
t0 = src[0]; t1 = src[1];
1396
dst0[i] = t0; dst0[i+1] = t1;
1397
t0 = src[2]; t1 = src[3];
1398
dst1[i] = t0; dst1[i+1] = t1;
1399
}
1400
}
1401
else if( elem_size == sizeof(int)*4 )
1402
{
1403
for( i = 0; i < len*4; i += 4, src += src_step )
1404
{
1405
t0 = src[0]; t1 = src[1];
1406
dst0[i] = t0; dst0[i+1] = t1;
1407
t0 = src[2]; t1 = src[3];
1408
dst0[i+2] = t0; dst0[i+3] = t1;
1409
t0 = src[4]; t1 = src[5];
1410
dst1[i] = t0; dst1[i+1] = t1;
1411
t0 = src[6]; t1 = src[7];
1412
dst1[i+2] = t0; dst1[i+3] = t1;
1413
}
1414
}
1415
}
1416
1417
1418
static void
1419
CopyTo2Columns( const uchar* _src0, const uchar* _src1,
1420
uchar* _dst, size_t dst_step,
1421
int len, size_t elem_size )
1422
{
1423
int i, t0, t1;
1424
const int* src0 = (const int*)_src0;
1425
const int* src1 = (const int*)_src1;
1426
int* dst = (int*)_dst;
1427
dst_step /= sizeof(dst[0]);
1428
1429
if( elem_size == sizeof(int) )
1430
{
1431
for( i = 0; i < len; i++, dst += dst_step )
1432
{
1433
t0 = src0[i]; t1 = src1[i];
1434
dst[0] = t0; dst[1] = t1;
1435
}
1436
}
1437
else if( elem_size == sizeof(int)*2 )
1438
{
1439
for( i = 0; i < len*2; i += 2, dst += dst_step )
1440
{
1441
t0 = src0[i]; t1 = src0[i+1];
1442
dst[0] = t0; dst[1] = t1;
1443
t0 = src1[i]; t1 = src1[i+1];
1444
dst[2] = t0; dst[3] = t1;
1445
}
1446
}
1447
else if( elem_size == sizeof(int)*4 )
1448
{
1449
for( i = 0; i < len*4; i += 4, dst += dst_step )
1450
{
1451
t0 = src0[i]; t1 = src0[i+1];
1452
dst[0] = t0; dst[1] = t1;
1453
t0 = src0[i+2]; t1 = src0[i+3];
1454
dst[2] = t0; dst[3] = t1;
1455
t0 = src1[i]; t1 = src1[i+1];
1456
dst[4] = t0; dst[5] = t1;
1457
t0 = src1[i+2]; t1 = src1[i+3];
1458
dst[6] = t0; dst[7] = t1;
1459
}
1460
}
1461
}
1462
1463
1464
static void
1465
ExpandCCS( uchar* _ptr, int n, int elem_size )
1466
{
1467
int i;
1468
if( elem_size == (int)sizeof(float) )
1469
{
1470
float* p = (float*)_ptr;
1471
for( i = 1; i < (n+1)/2; i++ )
1472
{
1473
p[(n-i)*2] = p[i*2-1];
1474
p[(n-i)*2+1] = -p[i*2];
1475
}
1476
if( (n & 1) == 0 )
1477
{
1478
p[n] = p[n-1];
1479
p[n+1] = 0.f;
1480
n--;
1481
}
1482
for( i = n-1; i > 0; i-- )
1483
p[i+1] = p[i];
1484
p[1] = 0.f;
1485
}
1486
else
1487
{
1488
double* p = (double*)_ptr;
1489
for( i = 1; i < (n+1)/2; i++ )
1490
{
1491
p[(n-i)*2] = p[i*2-1];
1492
p[(n-i)*2+1] = -p[i*2];
1493
}
1494
if( (n & 1) == 0 )
1495
{
1496
p[n] = p[n-1];
1497
p[n+1] = 0.f;
1498
n--;
1499
}
1500
for( i = n-1; i > 0; i-- )
1501
p[i+1] = p[i];
1502
p[1] = 0.f;
1503
}
1504
}
1505
1506
static void DFT_32f(const OcvDftOptions & c, const Complexf* src, Complexf* dst)
1507
{
1508
DFT(c, src, dst);
1509
}
1510
1511
static void DFT_64f(const OcvDftOptions & c, const Complexd* src, Complexd* dst)
1512
{
1513
DFT(c, src, dst);
1514
}
1515
1516
1517
static void RealDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1518
{
1519
RealDFT(c, src, dst);
1520
}
1521
1522
static void RealDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1523
{
1524
RealDFT(c, src, dst);
1525
}
1526
1527
static void CCSIDFT_32f(const OcvDftOptions & c, const float* src, float* dst)
1528
{
1529
CCSIDFT(c, src, dst);
1530
}
1531
1532
static void CCSIDFT_64f(const OcvDftOptions & c, const double* src, double* dst)
1533
{
1534
CCSIDFT(c, src, dst);
1535
}
1536
1537
}
1538
1539
#ifdef USE_IPP_DFT
1540
typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
1541
typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
1542
#endif
1543
1544
namespace cv
1545
{
1546
#if defined USE_IPP_DFT
1547
1548
typedef IppStatus (CV_STDCALL* ippiDFT_C_Func)(const Ipp32fc*, int, Ipp32fc*, int, const IppiDFTSpec_C_32fc*, Ipp8u*);
1549
typedef IppStatus (CV_STDCALL* ippiDFT_R_Func)(const Ipp32f* , int, Ipp32f* , int, const IppiDFTSpec_R_32f* , Ipp8u*);
1550
1551
template <typename Dft>
1552
class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
1553
{
1554
public:
1555
1556
Dft_C_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1557
const Dft& _ippidft, int _norm_flag, bool *_ok) :
1558
ParallelLoopBody(),
1559
src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1560
ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1561
{
1562
*ok = true;
1563
}
1564
1565
virtual void operator()(const Range& range) const CV_OVERRIDE
1566
{
1567
IppStatus status;
1568
Ipp8u* pBuffer = 0;
1569
Ipp8u* pMemInit= 0;
1570
int sizeBuffer=0;
1571
int sizeSpec=0;
1572
int sizeInit=0;
1573
1574
IppiSize srcRoiSize = {width, 1};
1575
1576
status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1577
if ( status < 0 )
1578
{
1579
*ok = false;
1580
return;
1581
}
1582
1583
IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1584
1585
if ( sizeInit > 0 )
1586
pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1587
1588
if ( sizeBuffer > 0 )
1589
pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1590
1591
status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1592
1593
if ( sizeInit > 0 )
1594
ippFree( pMemInit );
1595
1596
if ( status < 0 )
1597
{
1598
ippFree( pDFTSpec );
1599
if ( sizeBuffer > 0 )
1600
ippFree( pBuffer );
1601
*ok = false;
1602
return;
1603
}
1604
1605
for( int i = range.start; i < range.end; ++i)
1606
if(!ippidft((Ipp32fc*)(src + src_step * i), src_step, (Ipp32fc*)(dst + dst_step * i), dst_step,
1607
pDFTSpec, (Ipp8u*)pBuffer))
1608
{
1609
*ok = false;
1610
}
1611
1612
if ( sizeBuffer > 0 )
1613
ippFree( pBuffer );
1614
1615
ippFree( pDFTSpec );
1616
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1617
}
1618
1619
private:
1620
const uchar * src;
1621
size_t src_step;
1622
uchar * dst;
1623
size_t dst_step;
1624
int width;
1625
const Dft& ippidft;
1626
int norm_flag;
1627
bool *ok;
1628
1629
const Dft_C_IPPLoop_Invoker& operator= (const Dft_C_IPPLoop_Invoker&);
1630
};
1631
1632
template <typename Dft>
1633
class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
1634
{
1635
public:
1636
1637
Dft_R_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width,
1638
const Dft& _ippidft, int _norm_flag, bool *_ok) :
1639
ParallelLoopBody(),
1640
src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width),
1641
ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
1642
{
1643
*ok = true;
1644
}
1645
1646
virtual void operator()(const Range& range) const CV_OVERRIDE
1647
{
1648
IppStatus status;
1649
Ipp8u* pBuffer = 0;
1650
Ipp8u* pMemInit= 0;
1651
int sizeBuffer=0;
1652
int sizeSpec=0;
1653
int sizeInit=0;
1654
1655
IppiSize srcRoiSize = {width, 1};
1656
1657
status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1658
if ( status < 0 )
1659
{
1660
*ok = false;
1661
return;
1662
}
1663
1664
IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
1665
1666
if ( sizeInit > 0 )
1667
pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1668
1669
if ( sizeBuffer > 0 )
1670
pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1671
1672
status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1673
1674
if ( sizeInit > 0 )
1675
ippFree( pMemInit );
1676
1677
if ( status < 0 )
1678
{
1679
ippFree( pDFTSpec );
1680
if ( sizeBuffer > 0 )
1681
ippFree( pBuffer );
1682
*ok = false;
1683
return;
1684
}
1685
1686
for( int i = range.start; i < range.end; ++i)
1687
if(!ippidft((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step,
1688
pDFTSpec, (Ipp8u*)pBuffer))
1689
{
1690
*ok = false;
1691
}
1692
1693
if ( sizeBuffer > 0 )
1694
ippFree( pBuffer );
1695
1696
ippFree( pDFTSpec );
1697
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
1698
}
1699
1700
private:
1701
const uchar * src;
1702
size_t src_step;
1703
uchar * dst;
1704
size_t dst_step;
1705
int width;
1706
const Dft& ippidft;
1707
int norm_flag;
1708
bool *ok;
1709
1710
const Dft_R_IPPLoop_Invoker& operator= (const Dft_R_IPPLoop_Invoker&);
1711
};
1712
1713
template <typename Dft>
1714
bool Dft_C_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1715
{
1716
bool ok;
1717
parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
1718
return ok;
1719
}
1720
1721
template <typename Dft>
1722
bool Dft_R_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag)
1723
{
1724
bool ok;
1725
parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker<Dft>(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) );
1726
return ok;
1727
}
1728
1729
struct IPPDFT_C_Functor
1730
{
1731
IPPDFT_C_Functor(ippiDFT_C_Func _func) : ippiDFT_CToC_32fc_C1R(_func){}
1732
1733
bool operator()(const Ipp32fc* src, size_t srcStep, Ipp32fc* dst, size_t dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const
1734
{
1735
return ippiDFT_CToC_32fc_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_CToC_32fc_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= 0 : false;
1736
}
1737
private:
1738
ippiDFT_C_Func ippiDFT_CToC_32fc_C1R;
1739
};
1740
1741
struct IPPDFT_R_Functor
1742
{
1743
IPPDFT_R_Functor(ippiDFT_R_Func _func) : ippiDFT_PackToR_32f_C1R(_func){}
1744
1745
bool operator()(const Ipp32f* src, size_t srcStep, Ipp32f* dst, size_t dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const
1746
{
1747
return ippiDFT_PackToR_32f_C1R ? CV_INSTRUMENT_FUN_IPP(ippiDFT_PackToR_32f_C1R, src, static_cast<int>(srcStep), dst, static_cast<int>(dstStep), pDFTSpec, pBuffer) >= 0 : false;
1748
}
1749
private:
1750
ippiDFT_R_Func ippiDFT_PackToR_32f_C1R;
1751
};
1752
1753
static bool ippi_DFT_C_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1754
{
1755
CV_INSTRUMENT_REGION_IPP();
1756
1757
IppStatus status;
1758
Ipp8u* pBuffer = 0;
1759
Ipp8u* pMemInit= 0;
1760
int sizeBuffer=0;
1761
int sizeSpec=0;
1762
int sizeInit=0;
1763
1764
IppiSize srcRoiSize = {width, height};
1765
1766
status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1767
if ( status < 0 )
1768
return false;
1769
1770
IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)CV_IPP_MALLOC( sizeSpec );
1771
1772
if ( sizeInit > 0 )
1773
pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1774
1775
if ( sizeBuffer > 0 )
1776
pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1777
1778
status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1779
1780
if ( sizeInit > 0 )
1781
ippFree( pMemInit );
1782
1783
if ( status < 0 )
1784
{
1785
ippFree( pDFTSpec );
1786
if ( sizeBuffer > 0 )
1787
ippFree( pBuffer );
1788
return false;
1789
}
1790
1791
if (!inv)
1792
status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_CToC_32fc_C1R, (Ipp32fc*)src, static_cast<int>(src_step), (Ipp32fc*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1793
else
1794
status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_CToC_32fc_C1R, (Ipp32fc*)src, static_cast<int>(src_step), (Ipp32fc*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1795
1796
if ( sizeBuffer > 0 )
1797
ippFree( pBuffer );
1798
1799
ippFree( pDFTSpec );
1800
1801
if(status >= 0)
1802
{
1803
CV_IMPL_ADD(CV_IMPL_IPP);
1804
return true;
1805
}
1806
return false;
1807
}
1808
1809
static bool ippi_DFT_R_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag)
1810
{
1811
CV_INSTRUMENT_REGION_IPP();
1812
1813
IppStatus status;
1814
Ipp8u* pBuffer = 0;
1815
Ipp8u* pMemInit= 0;
1816
int sizeBuffer=0;
1817
int sizeSpec=0;
1818
int sizeInit=0;
1819
1820
IppiSize srcRoiSize = {width, height};
1821
1822
status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
1823
if ( status < 0 )
1824
return false;
1825
1826
IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)CV_IPP_MALLOC( sizeSpec );
1827
1828
if ( sizeInit > 0 )
1829
pMemInit = (Ipp8u*)CV_IPP_MALLOC( sizeInit );
1830
1831
if ( sizeBuffer > 0 )
1832
pBuffer = (Ipp8u*)CV_IPP_MALLOC( sizeBuffer );
1833
1834
status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
1835
1836
if ( sizeInit > 0 )
1837
ippFree( pMemInit );
1838
1839
if ( status < 0 )
1840
{
1841
ippFree( pDFTSpec );
1842
if ( sizeBuffer > 0 )
1843
ippFree( pBuffer );
1844
return false;
1845
}
1846
1847
if (!inv)
1848
status = CV_INSTRUMENT_FUN_IPP(ippiDFTFwd_RToPack_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1849
else
1850
status = CV_INSTRUMENT_FUN_IPP(ippiDFTInv_PackToR_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDFTSpec, pBuffer);
1851
1852
if ( sizeBuffer > 0 )
1853
ippFree( pBuffer );
1854
1855
ippFree( pDFTSpec );
1856
1857
if(status >= 0)
1858
{
1859
CV_IMPL_ADD(CV_IMPL_IPP);
1860
return true;
1861
}
1862
return false;
1863
}
1864
1865
#endif
1866
}
1867
1868
#ifdef HAVE_OPENCL
1869
1870
namespace cv
1871
{
1872
1873
enum FftType
1874
{
1875
R2R = 0, // real to CCS in case forward transform, CCS to real otherwise
1876
C2R = 1, // complex to real in case inverse transform
1877
R2C = 2, // real to complex in case forward transform
1878
C2C = 3 // complex to complex
1879
};
1880
1881
struct OCL_FftPlan
1882
{
1883
private:
1884
UMat twiddles;
1885
String buildOptions;
1886
int thread_count;
1887
int dft_size;
1888
int dft_depth;
1889
bool status;
1890
1891
public:
1892
OCL_FftPlan(int _size, int _depth) : dft_size(_size), dft_depth(_depth), status(true)
1893
{
1894
CV_Assert( dft_depth == CV_32F || dft_depth == CV_64F );
1895
1896
int min_radix;
1897
std::vector<int> radixes, blocks;
1898
ocl_getRadixes(dft_size, radixes, blocks, min_radix);
1899
thread_count = dft_size / min_radix;
1900
1901
if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize())
1902
{
1903
status = false;
1904
return;
1905
}
1906
1907
// generate string with radix calls
1908
String radix_processing;
1909
int n = 1, twiddle_size = 0;
1910
for (size_t i=0; i<radixes.size(); i++)
1911
{
1912
int radix = radixes[i], block = blocks[i];
1913
if (block > 1)
1914
radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix);
1915
else
1916
radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix);
1917
twiddle_size += (radix-1)*n;
1918
n *= radix;
1919
}
1920
1921
twiddles.create(1, twiddle_size, CV_MAKE_TYPE(dft_depth, 2));
1922
if (dft_depth == CV_32F)
1923
fillRadixTable<float>(twiddles, radixes);
1924
else
1925
fillRadixTable<double>(twiddles, radixes);
1926
1927
buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s",
1928
dft_size, min_radix, ocl::typeToStr(dft_depth), ocl::typeToStr(CV_MAKE_TYPE(dft_depth, 2)),
1929
dft_depth == CV_64F ? " -D DOUBLE_SUPPORT" : "", radix_processing.c_str());
1930
}
1931
1932
bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const
1933
{
1934
if (!status)
1935
return false;
1936
1937
UMat src = _src.getUMat();
1938
UMat dst = _dst.getUMat();
1939
1940
size_t globalsize[2];
1941
size_t localsize[2];
1942
String kernel_name;
1943
1944
bool is1d = (flags & DFT_ROWS) != 0 || num_dfts == 1;
1945
bool inv = (flags & DFT_INVERSE) != 0;
1946
String options = buildOptions;
1947
1948
if (rows)
1949
{
1950
globalsize[0] = thread_count; globalsize[1] = src.rows;
1951
localsize[0] = thread_count; localsize[1] = 1;
1952
kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
1953
if ((is1d || inv) && (flags & DFT_SCALE))
1954
options += " -D DFT_SCALE";
1955
}
1956
else
1957
{
1958
globalsize[0] = num_dfts; globalsize[1] = thread_count;
1959
localsize[0] = 1; localsize[1] = thread_count;
1960
kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols";
1961
if (flags & DFT_SCALE)
1962
options += " -D DFT_SCALE";
1963
}
1964
1965
options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
1966
options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
1967
options += is1d ? " -D IS_1D" : "";
1968
1969
if (!inv)
1970
{
1971
if ((is1d && src.channels() == 1) || (rows && (fftType == R2R)))
1972
options += " -D NO_CONJUGATE";
1973
}
1974
else
1975
{
1976
if (rows && (fftType == C2R || fftType == R2R))
1977
options += " -D NO_CONJUGATE";
1978
if (dst.cols % 2 == 0)
1979
options += " -D EVEN";
1980
}
1981
1982
ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
1983
if (k.empty())
1984
return false;
1985
1986
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::ReadOnlyNoSize(twiddles), thread_count, num_dfts);
1987
return k.run(2, globalsize, localsize, false);
1988
}
1989
1990
private:
1991
static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
1992
{
1993
int factors[34];
1994
int nf = DFTFactorize(cols, factors);
1995
1996
int n = 1;
1997
int factor_index = 0;
1998
min_radix = INT_MAX;
1999
2000
// 2^n transforms
2001
if ((factors[factor_index] & 1) == 0)
2002
{
2003
for( ; n < factors[factor_index];)
2004
{
2005
int radix = 2, block = 1;
2006
if (8*n <= factors[0])
2007
radix = 8;
2008
else if (4*n <= factors[0])
2009
{
2010
radix = 4;
2011
if (cols % 12 == 0)
2012
block = 3;
2013
else if (cols % 8 == 0)
2014
block = 2;
2015
}
2016
else
2017
{
2018
if (cols % 10 == 0)
2019
block = 5;
2020
else if (cols % 8 == 0)
2021
block = 4;
2022
else if (cols % 6 == 0)
2023
block = 3;
2024
else if (cols % 4 == 0)
2025
block = 2;
2026
}
2027
2028
radixes.push_back(radix);
2029
blocks.push_back(block);
2030
min_radix = min(min_radix, block*radix);
2031
n *= radix;
2032
}
2033
factor_index++;
2034
}
2035
2036
// all the other transforms
2037
for( ; factor_index < nf; factor_index++)
2038
{
2039
int radix = factors[factor_index], block = 1;
2040
if (radix == 3)
2041
{
2042
if (cols % 12 == 0)
2043
block = 4;
2044
else if (cols % 9 == 0)
2045
block = 3;
2046
else if (cols % 6 == 0)
2047
block = 2;
2048
}
2049
else if (radix == 5)
2050
{
2051
if (cols % 10 == 0)
2052
block = 2;
2053
}
2054
radixes.push_back(radix);
2055
blocks.push_back(block);
2056
min_radix = min(min_radix, block*radix);
2057
}
2058
}
2059
2060
template <typename T>
2061
static void fillRadixTable(UMat twiddles, const std::vector<int>& radixes)
2062
{
2063
Mat tw = twiddles.getMat(ACCESS_WRITE);
2064
T* ptr = tw.ptr<T>();
2065
int ptr_index = 0;
2066
2067
int n = 1;
2068
for (size_t i=0; i<radixes.size(); i++)
2069
{
2070
int radix = radixes[i];
2071
n *= radix;
2072
2073
for (int j=1; j<radix; j++)
2074
{
2075
double theta = -CV_2PI*j/n;
2076
2077
for (int k=0; k<(n/radix); k++)
2078
{
2079
ptr[ptr_index++] = (T) cos(k*theta);
2080
ptr[ptr_index++] = (T) sin(k*theta);
2081
}
2082
}
2083
}
2084
}
2085
};
2086
2087
class OCL_FftPlanCache
2088
{
2089
public:
2090
static OCL_FftPlanCache & getInstance()
2091
{
2092
CV_SINGLETON_LAZY_INIT_REF(OCL_FftPlanCache, new OCL_FftPlanCache())
2093
}
2094
2095
Ptr<OCL_FftPlan> getFftPlan(int dft_size, int depth)
2096
{
2097
int key = (dft_size << 16) | (depth & 0xFFFF);
2098
std::map<int, Ptr<OCL_FftPlan> >::iterator f = planStorage.find(key);
2099
if (f != planStorage.end())
2100
{
2101
return f->second;
2102
}
2103
else
2104
{
2105
Ptr<OCL_FftPlan> newPlan = Ptr<OCL_FftPlan>(new OCL_FftPlan(dft_size, depth));
2106
planStorage[key] = newPlan;
2107
return newPlan;
2108
}
2109
}
2110
2111
~OCL_FftPlanCache()
2112
{
2113
planStorage.clear();
2114
}
2115
2116
protected:
2117
OCL_FftPlanCache() :
2118
planStorage()
2119
{
2120
}
2121
std::map<int, Ptr<OCL_FftPlan> > planStorage;
2122
};
2123
2124
static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
2125
{
2126
int type = _src.type(), depth = CV_MAT_DEPTH(type);
2127
Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), depth);
2128
return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true);
2129
}
2130
2131
static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
2132
{
2133
int type = _src.type(), depth = CV_MAT_DEPTH(type);
2134
Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), depth);
2135
return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false);
2136
}
2137
2138
inline FftType determineFFTType(bool real_input, bool complex_input, bool real_output, bool complex_output, bool inv)
2139
{
2140
// output format is not specified
2141
if (!real_output && !complex_output)
2142
complex_output = true;
2143
2144
// input or output format is ambiguous
2145
if (real_input == complex_input || real_output == complex_output)
2146
CV_Error(Error::StsBadArg, "Invalid FFT input or output format");
2147
2148
FftType result = real_input ? (real_output ? R2R : R2C) : (real_output ? C2R : C2C);
2149
2150
// Forward Complex to CCS not supported
2151
if (result == C2R && !inv)
2152
result = C2C;
2153
2154
// Inverse CCS to Complex not supported
2155
if (result == R2C && inv)
2156
result = R2R;
2157
2158
return result;
2159
}
2160
2161
static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows)
2162
{
2163
int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
2164
Size ssize = _src.size();
2165
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2166
2167
if (!(cn == 1 || cn == 2)
2168
|| !(depth == CV_32F || (depth == CV_64F && doubleSupport))
2169
|| ((flags & DFT_REAL_OUTPUT) && (flags & DFT_COMPLEX_OUTPUT)))
2170
return false;
2171
2172
// if is not a multiplication of prime numbers { 2, 3, 5 }
2173
if (ssize.area() != getOptimalDFTSize(ssize.area()))
2174
return false;
2175
2176
UMat src = _src.getUMat();
2177
bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2178
2179
if( nonzero_rows <= 0 || nonzero_rows > _src.rows() )
2180
nonzero_rows = _src.rows();
2181
bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1;
2182
2183
FftType fftType = determineFFTType(cn == 1, cn == 2,
2184
(flags & DFT_REAL_OUTPUT) != 0, (flags & DFT_COMPLEX_OUTPUT) != 0, inv);
2185
2186
UMat output;
2187
if (fftType == C2C || fftType == R2C)
2188
{
2189
// complex output
2190
_dst.create(src.size(), CV_MAKETYPE(depth, 2));
2191
output = _dst.getUMat();
2192
}
2193
else
2194
{
2195
// real output
2196
if (is1d)
2197
{
2198
_dst.create(src.size(), CV_MAKETYPE(depth, 1));
2199
output = _dst.getUMat();
2200
}
2201
else
2202
{
2203
_dst.create(src.size(), CV_MAKETYPE(depth, 1));
2204
output.create(src.size(), CV_MAKETYPE(depth, 2));
2205
}
2206
}
2207
2208
bool result = false;
2209
if (!inv)
2210
{
2211
int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols;
2212
result = ocl_dft_rows(src, output, nonzero_rows, flags, fftType);
2213
if (!is1d)
2214
result = result && ocl_dft_cols(output, _dst, nonzero_cols, flags, fftType);
2215
}
2216
else
2217
{
2218
if (fftType == C2C)
2219
{
2220
// complex output
2221
result = ocl_dft_rows(src, output, nonzero_rows, flags, fftType);
2222
if (!is1d)
2223
result = result && ocl_dft_cols(output, output, output.cols, flags, fftType);
2224
}
2225
else
2226
{
2227
if (is1d)
2228
{
2229
result = ocl_dft_rows(src, output, nonzero_rows, flags, fftType);
2230
}
2231
else
2232
{
2233
int nonzero_cols = src.cols/2 + 1;
2234
result = ocl_dft_cols(src, output, nonzero_cols, flags, fftType);
2235
result = result && ocl_dft_rows(output, _dst, nonzero_rows, flags, fftType);
2236
}
2237
}
2238
}
2239
return result;
2240
}
2241
2242
} // namespace cv;
2243
2244
#endif
2245
2246
#ifdef HAVE_CLAMDFFT
2247
2248
namespace cv {
2249
2250
#define CLAMDDFT_Assert(func) \
2251
{ \
2252
clAmdFftStatus s = (func); \
2253
CV_Assert(s == CLFFT_SUCCESS); \
2254
}
2255
2256
class PlanCache
2257
{
2258
struct FftPlan
2259
{
2260
FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
2261
dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
2262
doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType),
2263
context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0)
2264
{
2265
bool dft_inverse = (flags & DFT_INVERSE) != 0;
2266
bool dft_scale = (flags & DFT_SCALE) != 0;
2267
bool dft_rows = (flags & DFT_ROWS) != 0;
2268
2269
clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
2270
clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
2271
2272
size_t batchSize = dft_rows ? dft_size.height : 1;
2273
size_t clLengthsIn[3] = { (size_t)dft_size.width, dft_rows ? 1 : (size_t)dft_size.height, 1 };
2274
size_t clStridesIn[3] = { 1, 1, 1 };
2275
size_t clStridesOut[3] = { 1, 1, 1 };
2276
int elemSize = doubleFP ? sizeof(double) : sizeof(float);
2277
2278
switch (fftType)
2279
{
2280
case C2C:
2281
inLayout = CLFFT_COMPLEX_INTERLEAVED;
2282
outLayout = CLFFT_COMPLEX_INTERLEAVED;
2283
clStridesIn[1] = src_step / (elemSize << 1);
2284
clStridesOut[1] = dst_step / (elemSize << 1);
2285
break;
2286
case R2C:
2287
inLayout = CLFFT_REAL;
2288
outLayout = CLFFT_HERMITIAN_INTERLEAVED;
2289
clStridesIn[1] = src_step / elemSize;
2290
clStridesOut[1] = dst_step / (elemSize << 1);
2291
break;
2292
case C2R:
2293
inLayout = CLFFT_HERMITIAN_INTERLEAVED;
2294
outLayout = CLFFT_REAL;
2295
clStridesIn[1] = src_step / (elemSize << 1);
2296
clStridesOut[1] = dst_step / elemSize;
2297
break;
2298
case R2R:
2299
default:
2300
CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
2301
break;
2302
}
2303
2304
clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
2305
clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
2306
2307
CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))
2308
2309
// setting plan properties
2310
CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
2311
CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
2312
CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout))
2313
CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize))
2314
CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn))
2315
CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut))
2316
CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
2317
2318
float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
2319
CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
2320
2321
// ready to bake
2322
cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2323
CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &queue, NULL, NULL))
2324
}
2325
2326
~FftPlan()
2327
{
2328
// clAmdFftDestroyPlan(&plHandle);
2329
}
2330
2331
friend class PlanCache;
2332
2333
private:
2334
Size dft_size;
2335
int src_step, dst_step;
2336
bool doubleFP;
2337
bool inplace;
2338
int flags;
2339
FftType fftType;
2340
2341
cl_context context;
2342
clAmdFftPlanHandle plHandle;
2343
};
2344
2345
public:
2346
static PlanCache & getInstance()
2347
{
2348
CV_SINGLETON_LAZY_INIT_REF(PlanCache, new PlanCache())
2349
}
2350
2351
clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
2352
bool inplace, int flags, FftType fftType)
2353
{
2354
cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr();
2355
2356
for (size_t i = 0, size = planStorage.size(); i < size; ++i)
2357
{
2358
const FftPlan * const plan = planStorage[i];
2359
2360
if (plan->dft_size == dft_size &&
2361
plan->flags == flags &&
2362
plan->src_step == src_step &&
2363
plan->dst_step == dst_step &&
2364
plan->doubleFP == doubleFP &&
2365
plan->fftType == fftType &&
2366
plan->inplace == inplace)
2367
{
2368
if (plan->context != currentContext)
2369
{
2370
planStorage.erase(planStorage.begin() + i);
2371
break;
2372
}
2373
2374
return plan->plHandle;
2375
}
2376
}
2377
2378
// no baked plan is found, so let's create a new one
2379
Ptr<FftPlan> newPlan = Ptr<FftPlan>(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType));
2380
planStorage.push_back(newPlan);
2381
2382
return newPlan->plHandle;
2383
}
2384
2385
~PlanCache()
2386
{
2387
planStorage.clear();
2388
}
2389
2390
protected:
2391
PlanCache() :
2392
planStorage()
2393
{
2394
}
2395
2396
std::vector<Ptr<FftPlan> > planStorage;
2397
};
2398
2399
extern "C" {
2400
2401
static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
2402
{
2403
UMatData * u = (UMatData *)p;
2404
2405
if( u && CV_XADD(&u->urefcount, -1) == 1 )
2406
u->currAllocator->deallocate(u);
2407
u = 0;
2408
2409
clReleaseEvent(e), e = 0;
2410
}
2411
2412
}
2413
2414
static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
2415
{
2416
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
2417
Size ssize = _src.size();
2418
2419
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
2420
if ( (!doubleSupport && depth == CV_64F) ||
2421
!(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
2422
_src.offset() != 0)
2423
return false;
2424
2425
// if is not a multiplication of prime numbers { 2, 3, 5 }
2426
if (ssize.area() != getOptimalDFTSize(ssize.area()))
2427
return false;
2428
2429
int dst_complex_input = cn == 2 ? 1 : 0;
2430
bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
2431
int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
2432
bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
2433
2434
CV_Assert(dft_complex_output + dft_real_output < 2);
2435
FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
2436
2437
switch (fftType)
2438
{
2439
case C2C:
2440
_dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
2441
break;
2442
case R2C: // TODO implement it if possible
2443
case C2R: // TODO implement it if possible
2444
case R2R: // AMD Fft does not support this type
2445
default:
2446
return false;
2447
}
2448
2449
UMat src = _src.getUMat(), dst = _dst.getUMat();
2450
bool inplace = src.u == dst.u;
2451
2452
clAmdFftPlanHandle plHandle = PlanCache::getInstance().
2453
getPlanHandle(ssize, (int)src.step, (int)dst.step,
2454
depth == CV_64F, inplace, flags, fftType);
2455
2456
// get the bufferSize
2457
size_t bufferSize = 0;
2458
CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize))
2459
UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
2460
2461
cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
2462
cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
2463
2464
cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
2465
cl_event e = 0;
2466
2467
CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
2468
1, &queue, 0, NULL, &e,
2469
&srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
2470
2471
tmpBuffer.addref();
2472
clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
2473
return true;
2474
}
2475
2476
#undef DFT_ASSERT
2477
2478
}
2479
2480
#endif // HAVE_CLAMDFFT
2481
2482
namespace cv
2483
{
2484
2485
template <typename T>
2486
static void complementComplex(T * ptr, size_t step, int n, int len, int dft_dims)
2487
{
2488
T* p0 = (T*)ptr;
2489
size_t dstep = step/sizeof(p0[0]);
2490
for(int i = 0; i < len; i++ )
2491
{
2492
T* p = p0 + dstep*i;
2493
T* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
2494
2495
for( int j = 1; j < (n+1)/2; j++ )
2496
{
2497
p[(n-j)*2] = q[j*2];
2498
p[(n-j)*2+1] = -q[j*2+1];
2499
}
2500
}
2501
}
2502
2503
static void complementComplexOutput(int depth, uchar * ptr, size_t step, int count, int len, int dft_dims)
2504
{
2505
if( depth == CV_32F )
2506
complementComplex((float*)ptr, step, count, len, dft_dims);
2507
else
2508
complementComplex((double*)ptr, step, count, len, dft_dims);
2509
}
2510
2511
enum DftMode {
2512
InvalidDft = 0,
2513
FwdRealToCCS,
2514
FwdRealToComplex,
2515
FwdComplexToComplex,
2516
InvCCSToReal,
2517
InvComplexToReal,
2518
InvComplexToComplex,
2519
};
2520
2521
enum DftDims {
2522
InvalidDim = 0,
2523
OneDim,
2524
OneDimColWise,
2525
TwoDims
2526
};
2527
2528
inline const char * modeName(DftMode m)
2529
{
2530
switch (m)
2531
{
2532
case InvalidDft: return "InvalidDft";
2533
case FwdRealToCCS: return "FwdRealToCCS";
2534
case FwdRealToComplex: return "FwdRealToComplex";
2535
case FwdComplexToComplex: return "FwdComplexToComplex";
2536
case InvCCSToReal: return "InvCCSToReal";
2537
case InvComplexToReal: return "InvComplexToReal";
2538
case InvComplexToComplex: return "InvComplexToComplex";
2539
}
2540
return 0;
2541
}
2542
2543
inline const char * dimsName(DftDims d)
2544
{
2545
switch (d)
2546
{
2547
case InvalidDim: return "InvalidDim";
2548
case OneDim: return "OneDim";
2549
case OneDimColWise: return "OneDimColWise";
2550
case TwoDims: return "TwoDims";
2551
};
2552
return 0;
2553
}
2554
2555
template <typename T>
2556
inline bool isInv(T mode)
2557
{
2558
switch ((DftMode)mode)
2559
{
2560
case InvCCSToReal:
2561
case InvComplexToReal:
2562
case InvComplexToComplex: return true;
2563
default: return false;
2564
}
2565
}
2566
2567
inline DftMode determineMode(bool inv, int cn1, int cn2)
2568
{
2569
if (!inv)
2570
{
2571
if (cn1 == 1 && cn2 == 1)
2572
return FwdRealToCCS;
2573
else if (cn1 == 1 && cn2 == 2)
2574
return FwdRealToComplex;
2575
else if (cn1 == 2 && cn2 == 2)
2576
return FwdComplexToComplex;
2577
}
2578
else
2579
{
2580
if (cn1 == 1 && cn2 == 1)
2581
return InvCCSToReal;
2582
else if (cn1 == 2 && cn2 == 1)
2583
return InvComplexToReal;
2584
else if (cn1 == 2 && cn2 == 2)
2585
return InvComplexToComplex;
2586
}
2587
return InvalidDft;
2588
}
2589
2590
2591
inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuous)
2592
{
2593
// printf("%d x %d (%d, %d)\n", rows, cols, isRowWise, isContinuous);
2594
if (isRowWise)
2595
return OneDim;
2596
if (cols == 1 && rows > 1) // one-column-shaped input
2597
{
2598
if (isContinuous)
2599
return OneDim;
2600
else
2601
return OneDimColWise;
2602
}
2603
if (rows == 1)
2604
return OneDim;
2605
if (cols > 1 && rows > 1)
2606
return TwoDims;
2607
return InvalidDim;
2608
}
2609
2610
class OcvDftImpl CV_FINAL : public hal::DFT2D
2611
{
2612
protected:
2613
Ptr<hal::DFT1D> contextA;
2614
Ptr<hal::DFT1D> contextB;
2615
bool needBufferA;
2616
bool needBufferB;
2617
bool inv;
2618
int width;
2619
int height;
2620
DftMode mode;
2621
int elem_size;
2622
int complex_elem_size;
2623
int depth;
2624
bool real_transform;
2625
int nonzero_rows;
2626
bool isRowTransform;
2627
bool isScaled;
2628
std::vector<int> stages;
2629
bool useIpp;
2630
int src_channels;
2631
int dst_channels;
2632
2633
AutoBuffer<uchar> tmp_bufA;
2634
AutoBuffer<uchar> tmp_bufB;
2635
AutoBuffer<uchar> buf0;
2636
AutoBuffer<uchar> buf1;
2637
2638
public:
2639
OcvDftImpl()
2640
{
2641
needBufferA = false;
2642
needBufferB = false;
2643
inv = false;
2644
width = 0;
2645
height = 0;
2646
mode = InvalidDft;
2647
elem_size = 0;
2648
complex_elem_size = 0;
2649
depth = 0;
2650
real_transform = false;
2651
nonzero_rows = 0;
2652
isRowTransform = false;
2653
isScaled = false;
2654
useIpp = false;
2655
src_channels = 0;
2656
dst_channels = 0;
2657
}
2658
2659
void init(int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows)
2660
{
2661
bool isComplex = _src_channels != _dst_channels;
2662
nonzero_rows = _nonzero_rows;
2663
width = _width;
2664
height = _height;
2665
depth = _depth;
2666
src_channels = _src_channels;
2667
dst_channels = _dst_channels;
2668
bool isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
2669
bool isInplace = (flags & CV_HAL_DFT_IS_INPLACE) != 0;
2670
bool isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
2671
mode = determineMode(isInverse, _src_channels, _dst_channels);
2672
inv = isInverse;
2673
isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
2674
isScaled = (flags & CV_HAL_DFT_SCALE) != 0;
2675
needBufferA = false;
2676
needBufferB = false;
2677
real_transform = (mode != FwdComplexToComplex && mode != InvComplexToComplex);
2678
2679
elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
2680
complex_elem_size = elem_size * 2;
2681
if( !real_transform )
2682
elem_size = complex_elem_size;
2683
2684
#if defined USE_IPP_DFT
2685
CV_IPP_CHECK()
2686
{
2687
if (nonzero_rows == 0 && depth == CV_32F && ((width * height)>(int)(1<<6)))
2688
{
2689
if (mode == FwdComplexToComplex || mode == InvComplexToComplex || mode == FwdRealToCCS || mode == InvCCSToReal)
2690
{
2691
useIpp = true;
2692
return;
2693
}
2694
}
2695
}
2696
#endif
2697
2698
DftDims dims = determineDims(height, width, isRowTransform, isContinuous);
2699
if (dims == TwoDims)
2700
{
2701
stages.resize(2);
2702
if (mode == InvCCSToReal || mode == InvComplexToReal)
2703
{
2704
stages[0] = 1;
2705
stages[1] = 0;
2706
}
2707
else
2708
{
2709
stages[0] = 0;
2710
stages[1] = 1;
2711
}
2712
}
2713
else
2714
{
2715
stages.resize(1);
2716
if (dims == OneDimColWise)
2717
stages[0] = 1;
2718
else
2719
stages[0] = 0;
2720
}
2721
2722
for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
2723
{
2724
if (stageIndex == 1)
2725
{
2726
isInplace = true;
2727
isComplex = false;
2728
}
2729
2730
int stage = stages[stageIndex];
2731
bool isLastStage = (stageIndex + 1 == stages.size());
2732
2733
int len, count;
2734
2735
int f = 0;
2736
if (inv)
2737
f |= CV_HAL_DFT_INVERSE;
2738
if (isScaled)
2739
f |= CV_HAL_DFT_SCALE;
2740
if (isRowTransform)
2741
f |= CV_HAL_DFT_ROWS;
2742
if (isComplex)
2743
f |= CV_HAL_DFT_COMPLEX_OUTPUT;
2744
if (real_transform)
2745
f |= CV_HAL_DFT_REAL_OUTPUT;
2746
if (!isLastStage)
2747
f |= CV_HAL_DFT_TWO_STAGE;
2748
2749
if( stage == 0 ) // row-wise transform
2750
{
2751
if (width == 1 && !isRowTransform )
2752
{
2753
len = height;
2754
count = width;
2755
}
2756
else
2757
{
2758
len = width;
2759
count = height;
2760
}
2761
needBufferA = isInplace;
2762
contextA = hal::DFT1D::create(len, count, depth, f, &needBufferA);
2763
if (needBufferA)
2764
tmp_bufA.allocate(len * complex_elem_size);
2765
}
2766
else
2767
{
2768
len = height;
2769
count = width;
2770
f |= CV_HAL_DFT_STAGE_COLS;
2771
needBufferB = isInplace;
2772
contextB = hal::DFT1D::create(len, count, depth, f, &needBufferB);
2773
if (needBufferB)
2774
tmp_bufB.allocate(len * complex_elem_size);
2775
2776
buf0.allocate(len * complex_elem_size);
2777
buf1.allocate(len * complex_elem_size);
2778
}
2779
}
2780
}
2781
2782
void apply(const uchar * src, size_t src_step, uchar * dst, size_t dst_step) CV_OVERRIDE
2783
{
2784
#if defined USE_IPP_DFT
2785
if (useIpp)
2786
{
2787
int ipp_norm_flag = !isScaled ? 8 : inv ? 2 : 1;
2788
if (!isRowTransform)
2789
{
2790
if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
2791
{
2792
if (ippi_DFT_C_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
2793
{
2794
CV_IMPL_ADD(CV_IMPL_IPP);
2795
return;
2796
}
2797
setIppErrorStatus();
2798
}
2799
else if (mode == FwdRealToCCS || mode == InvCCSToReal)
2800
{
2801
if (ippi_DFT_R_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag))
2802
{
2803
CV_IMPL_ADD(CV_IMPL_IPP);
2804
return;
2805
}
2806
setIppErrorStatus();
2807
}
2808
}
2809
else
2810
{
2811
if (mode == FwdComplexToComplex || mode == InvComplexToComplex)
2812
{
2813
ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
2814
if (Dft_C_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
2815
{
2816
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2817
return;
2818
}
2819
setIppErrorStatus();
2820
}
2821
else if (mode == FwdRealToCCS || mode == InvCCSToReal)
2822
{
2823
ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
2824
if (Dft_R_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
2825
{
2826
CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
2827
return;
2828
}
2829
setIppErrorStatus();
2830
}
2831
}
2832
return;
2833
}
2834
#endif
2835
2836
for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex)
2837
{
2838
int stage_src_channels = src_channels;
2839
int stage_dst_channels = dst_channels;
2840
2841
if (stageIndex == 1)
2842
{
2843
src = dst;
2844
src_step = dst_step;
2845
stage_src_channels = stage_dst_channels;
2846
}
2847
2848
int stage = stages[stageIndex];
2849
bool isLastStage = (stageIndex + 1 == stages.size());
2850
bool isComplex = stage_src_channels != stage_dst_channels;
2851
2852
if( stage == 0 )
2853
rowDft(src, src_step, dst, dst_step, isComplex, isLastStage);
2854
else
2855
colDft(src, src_step, dst, dst_step, stage_src_channels, stage_dst_channels, isLastStage);
2856
}
2857
}
2858
2859
protected:
2860
2861
void rowDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, bool isComplex, bool isLastStage)
2862
{
2863
int len, count;
2864
if (width == 1 && !isRowTransform )
2865
{
2866
len = height;
2867
count = width;
2868
}
2869
else
2870
{
2871
len = width;
2872
count = height;
2873
}
2874
int dptr_offset = 0;
2875
int dst_full_len = len*elem_size;
2876
2877
if( needBufferA )
2878
{
2879
if (mode == FwdRealToCCS && (len & 1) && len > 1)
2880
dptr_offset = elem_size;
2881
}
2882
2883
if( !inv && isComplex )
2884
dst_full_len += (len & 1) ? elem_size : complex_elem_size;
2885
2886
int nz = nonzero_rows;
2887
if( nz <= 0 || nz > count )
2888
nz = count;
2889
2890
int i;
2891
for( i = 0; i < nz; i++ )
2892
{
2893
const uchar* sptr = src_data + src_step * i;
2894
uchar* dptr0 = dst_data + dst_step * i;
2895
uchar* dptr = dptr0;
2896
2897
if( needBufferA )
2898
dptr = tmp_bufA.data();
2899
2900
contextA->apply(sptr, dptr);
2901
2902
if( needBufferA )
2903
memcpy( dptr0, dptr + dptr_offset, dst_full_len );
2904
}
2905
2906
for( ; i < count; i++ )
2907
{
2908
uchar* dptr0 = dst_data + dst_step * i;
2909
memset( dptr0, 0, dst_full_len );
2910
}
2911
if(isLastStage && mode == FwdRealToComplex)
2912
complementComplexOutput(depth, dst_data, dst_step, len, nz, 1);
2913
}
2914
2915
void colDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage)
2916
{
2917
int len = height;
2918
int count = width;
2919
int a = 0, b = count;
2920
uchar *dbuf0, *dbuf1;
2921
const uchar* sptr0 = src_data;
2922
uchar* dptr0 = dst_data;
2923
2924
dbuf0 = buf0.data(), dbuf1 = buf1.data();
2925
2926
if( needBufferB )
2927
{
2928
dbuf1 = tmp_bufB.data();
2929
dbuf0 = buf1.data();
2930
}
2931
2932
if( real_transform )
2933
{
2934
int even;
2935
a = 1;
2936
even = (count & 1) == 0;
2937
b = (count+1)/2;
2938
if( !inv )
2939
{
2940
memset( buf0.data(), 0, len*complex_elem_size );
2941
CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, elem_size );
2942
sptr0 += stage_dst_channels*elem_size;
2943
if( even )
2944
{
2945
memset( buf1.data(), 0, len*complex_elem_size );
2946
CopyColumn( sptr0 + (count-2)*elem_size, src_step,
2947
buf1.data(), complex_elem_size, len, elem_size );
2948
}
2949
}
2950
else if( stage_src_channels == 1 )
2951
{
2952
CopyColumn( sptr0, src_step, buf0.data(), elem_size, len, elem_size );
2953
ExpandCCS( buf0.data(), len, elem_size );
2954
if( even )
2955
{
2956
CopyColumn( sptr0 + (count-1)*elem_size, src_step,
2957
buf1.data(), elem_size, len, elem_size );
2958
ExpandCCS( buf1.data(), len, elem_size );
2959
}
2960
sptr0 += elem_size;
2961
}
2962
else
2963
{
2964
CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, complex_elem_size );
2965
if( even )
2966
{
2967
CopyColumn( sptr0 + b*complex_elem_size, src_step,
2968
buf1.data(), complex_elem_size, len, complex_elem_size );
2969
}
2970
sptr0 += complex_elem_size;
2971
}
2972
2973
if( even )
2974
contextB->apply(buf1.data(), dbuf1);
2975
contextB->apply(buf0.data(), dbuf0);
2976
2977
if( stage_dst_channels == 1 )
2978
{
2979
if( !inv )
2980
{
2981
// copy the half of output vector to the first/last column.
2982
// before doing that, defgragment the vector
2983
memcpy( dbuf0 + elem_size, dbuf0, elem_size );
2984
CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
2985
dst_step, len, elem_size );
2986
if( even )
2987
{
2988
memcpy( dbuf1 + elem_size, dbuf1, elem_size );
2989
CopyColumn( dbuf1 + elem_size, elem_size,
2990
dptr0 + (count-1)*elem_size,
2991
dst_step, len, elem_size );
2992
}
2993
dptr0 += elem_size;
2994
}
2995
else
2996
{
2997
// copy the real part of the complex vector to the first/last column
2998
CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, elem_size );
2999
if( even )
3000
CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
3001
dst_step, len, elem_size );
3002
dptr0 += elem_size;
3003
}
3004
}
3005
else
3006
{
3007
assert( !inv );
3008
CopyColumn( dbuf0, complex_elem_size, dptr0,
3009
dst_step, len, complex_elem_size );
3010
if( even )
3011
CopyColumn( dbuf1, complex_elem_size,
3012
dptr0 + b*complex_elem_size,
3013
dst_step, len, complex_elem_size );
3014
dptr0 += complex_elem_size;
3015
}
3016
}
3017
3018
for(int i = a; i < b; i += 2 )
3019
{
3020
if( i+1 < b )
3021
{
3022
CopyFrom2Columns( sptr0, src_step, buf0.data(), buf1.data(), len, complex_elem_size );
3023
contextB->apply(buf1.data(), dbuf1);
3024
}
3025
else
3026
CopyColumn( sptr0, src_step, buf0.data(), complex_elem_size, len, complex_elem_size );
3027
3028
contextB->apply(buf0.data(), dbuf0);
3029
3030
if( i+1 < b )
3031
CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size );
3032
else
3033
CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, complex_elem_size );
3034
sptr0 += 2*complex_elem_size;
3035
dptr0 += 2*complex_elem_size;
3036
}
3037
if(isLastStage && mode == FwdRealToComplex)
3038
complementComplexOutput(depth, dst_data, dst_step, count, len, 2);
3039
}
3040
};
3041
3042
class OcvDftBasicImpl CV_FINAL : public hal::DFT1D
3043
{
3044
public:
3045
OcvDftOptions opt;
3046
int _factors[34];
3047
AutoBuffer<uchar> wave_buf;
3048
AutoBuffer<int> itab_buf;
3049
#ifdef USE_IPP_DFT
3050
AutoBuffer<uchar> ippbuf;
3051
AutoBuffer<uchar> ippworkbuf;
3052
#endif
3053
3054
public:
3055
OcvDftBasicImpl()
3056
{
3057
opt.factors = _factors;
3058
}
3059
void init(int len, int count, int depth, int flags, bool *needBuffer)
3060
{
3061
int prev_len = opt.n;
3062
3063
int stage = (flags & CV_HAL_DFT_STAGE_COLS) != 0 ? 1 : 0;
3064
int complex_elem_size = depth == CV_32F ? sizeof(Complex<float>) : sizeof(Complex<double>);
3065
opt.isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
3066
bool real_transform = (flags & CV_HAL_DFT_REAL_OUTPUT) != 0;
3067
opt.isComplex = (stage == 0) && (flags & CV_HAL_DFT_COMPLEX_OUTPUT) != 0;
3068
bool needAnotherStage = (flags & CV_HAL_DFT_TWO_STAGE) != 0;
3069
3070
opt.scale = 1;
3071
opt.tab_size = len;
3072
opt.n = len;
3073
3074
opt.useIpp = false;
3075
#ifdef USE_IPP_DFT
3076
opt.ipp_spec = 0;
3077
opt.ipp_work = 0;
3078
3079
if( CV_IPP_CHECK_COND && (opt.n*count >= 64) ) // use IPP DFT if available
3080
{
3081
int ipp_norm_flag = (flags & CV_HAL_DFT_SCALE) == 0 ? 8 : opt.isInverse ? 2 : 1;
3082
int specsize=0, initsize=0, worksize=0;
3083
IppDFTGetSizeFunc getSizeFunc = 0;
3084
IppDFTInitFunc initFunc = 0;
3085
3086
if( real_transform && stage == 0 )
3087
{
3088
if( depth == CV_32F )
3089
{
3090
getSizeFunc = ippsDFTGetSize_R_32f;
3091
initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
3092
}
3093
else
3094
{
3095
getSizeFunc = ippsDFTGetSize_R_64f;
3096
initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
3097
}
3098
}
3099
else
3100
{
3101
if( depth == CV_32F )
3102
{
3103
getSizeFunc = ippsDFTGetSize_C_32fc;
3104
initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
3105
}
3106
else
3107
{
3108
getSizeFunc = ippsDFTGetSize_C_64fc;
3109
initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
3110
}
3111
}
3112
if( getSizeFunc(opt.n, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
3113
{
3114
ippbuf.allocate(specsize + initsize + 64);
3115
opt.ipp_spec = alignPtr(&ippbuf[0], 32);
3116
ippworkbuf.allocate(worksize + 32);
3117
opt.ipp_work = alignPtr(&ippworkbuf[0], 32);
3118
uchar* initbuf = alignPtr((uchar*)opt.ipp_spec + specsize, 32);
3119
if( initFunc(opt.n, ipp_norm_flag, ippAlgHintNone, opt.ipp_spec, initbuf) >= 0 )
3120
opt.useIpp = true;
3121
}
3122
else
3123
setIppErrorStatus();
3124
}
3125
#endif
3126
3127
if (!opt.useIpp)
3128
{
3129
if (len != prev_len)
3130
{
3131
opt.nf = DFTFactorize( opt.n, opt.factors );
3132
}
3133
bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
3134
if (len != prev_len || (!inplace_transform && opt.isInverse && real_transform))
3135
{
3136
wave_buf.allocate(opt.n*complex_elem_size);
3137
opt.wave = wave_buf.data();
3138
itab_buf.allocate(opt.n);
3139
opt.itab = itab_buf.data();
3140
DFTInit( opt.n, opt.nf, opt.factors, opt.itab, complex_elem_size,
3141
opt.wave, stage == 0 && opt.isInverse && real_transform );
3142
}
3143
// otherwise reuse the tables calculated on the previous stage
3144
if (needBuffer)
3145
{
3146
if( (stage == 0 && ((*needBuffer && !inplace_transform) || (real_transform && (len & 1)))) ||
3147
(stage == 1 && !inplace_transform) )
3148
{
3149
*needBuffer = true;
3150
}
3151
}
3152
}
3153
else
3154
{
3155
if (needBuffer)
3156
{
3157
*needBuffer = false;
3158
}
3159
}
3160
3161
{
3162
static DFTFunc dft_tbl[6] =
3163
{
3164
(DFTFunc)DFT_32f,
3165
(DFTFunc)RealDFT_32f,
3166
(DFTFunc)CCSIDFT_32f,
3167
(DFTFunc)DFT_64f,
3168
(DFTFunc)RealDFT_64f,
3169
(DFTFunc)CCSIDFT_64f
3170
};
3171
int idx = 0;
3172
if (stage == 0)
3173
{
3174
if (real_transform)
3175
{
3176
if (!opt.isInverse)
3177
idx = 1;
3178
else
3179
idx = 2;
3180
}
3181
}
3182
if (depth == CV_64F)
3183
idx += 3;
3184
3185
opt.dft_func = dft_tbl[idx];
3186
}
3187
3188
if(!needAnotherStage && (flags & CV_HAL_DFT_SCALE) != 0)
3189
{
3190
int rowCount = count;
3191
if (stage == 0 && (flags & CV_HAL_DFT_ROWS) != 0)
3192
rowCount = 1;
3193
opt.scale = 1./(len * rowCount);
3194
}
3195
}
3196
3197
void apply(const uchar *src, uchar *dst) CV_OVERRIDE
3198
{
3199
opt.dft_func(opt, src, dst);
3200
}
3201
3202
void free() {}
3203
};
3204
3205
struct ReplacementDFT1D : public hal::DFT1D
3206
{
3207
cvhalDFT *context;
3208
bool isInitialized;
3209
3210
ReplacementDFT1D() : context(0), isInitialized(false) {}
3211
bool init(int len, int count, int depth, int flags, bool *needBuffer)
3212
{
3213
int res = cv_hal_dftInit1D(&context, len, count, depth, flags, needBuffer);
3214
isInitialized = (res == CV_HAL_ERROR_OK);
3215
return isInitialized;
3216
}
3217
void apply(const uchar *src, uchar *dst) CV_OVERRIDE
3218
{
3219
if (isInitialized)
3220
{
3221
CALL_HAL(dft1D, cv_hal_dft1D, context, src, dst);
3222
}
3223
}
3224
~ReplacementDFT1D()
3225
{
3226
if (isInitialized)
3227
{
3228
CALL_HAL(dftFree1D, cv_hal_dftFree1D, context);
3229
}
3230
}
3231
};
3232
3233
struct ReplacementDFT2D : public hal::DFT2D
3234
{
3235
cvhalDFT *context;
3236
bool isInitialized;
3237
3238
ReplacementDFT2D() : context(0), isInitialized(false) {}
3239
bool init(int width, int height, int depth,
3240
int src_channels, int dst_channels,
3241
int flags, int nonzero_rows)
3242
{
3243
int res = cv_hal_dftInit2D(&context, width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
3244
isInitialized = (res == CV_HAL_ERROR_OK);
3245
return isInitialized;
3246
}
3247
void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) CV_OVERRIDE
3248
{
3249
if (isInitialized)
3250
{
3251
CALL_HAL(dft2D, cv_hal_dft2D, context, src, src_step, dst, dst_step);
3252
}
3253
}
3254
~ReplacementDFT2D()
3255
{
3256
if (isInitialized)
3257
{
3258
CALL_HAL(dftFree2D, cv_hal_dftFree1D, context);
3259
}
3260
}
3261
};
3262
3263
namespace hal {
3264
3265
//================== 1D ======================
3266
3267
Ptr<DFT1D> DFT1D::create(int len, int count, int depth, int flags, bool *needBuffer)
3268
{
3269
{
3270
ReplacementDFT1D *impl = new ReplacementDFT1D();
3271
if (impl->init(len, count, depth, flags, needBuffer))
3272
{
3273
return Ptr<DFT1D>(impl);
3274
}
3275
delete impl;
3276
}
3277
{
3278
OcvDftBasicImpl *impl = new OcvDftBasicImpl();
3279
impl->init(len, count, depth, flags, needBuffer);
3280
return Ptr<DFT1D>(impl);
3281
}
3282
}
3283
3284
//================== 2D ======================
3285
3286
Ptr<DFT2D> DFT2D::create(int width, int height, int depth,
3287
int src_channels, int dst_channels,
3288
int flags, int nonzero_rows)
3289
{
3290
{
3291
ReplacementDFT2D *impl = new ReplacementDFT2D();
3292
if (impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows))
3293
{
3294
return Ptr<DFT2D>(impl);
3295
}
3296
delete impl;
3297
}
3298
{
3299
if(width == 1 && nonzero_rows > 0 )
3300
{
3301
CV_Error( CV_StsNotImplemented,
3302
"This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
3303
"For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
3304
}
3305
OcvDftImpl *impl = new OcvDftImpl();
3306
impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows);
3307
return Ptr<DFT2D>(impl);
3308
}
3309
}
3310
3311
} // cv::hal::
3312
} // cv::
3313
3314
3315
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
3316
{
3317
CV_INSTRUMENT_REGION();
3318
3319
#ifdef HAVE_CLAMDFFT
3320
CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
3321
_dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
3322
ocl_dft_amdfft(_src0, _dst, flags))
3323
#endif
3324
3325
#ifdef HAVE_OPENCL
3326
CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
3327
ocl_dft(_src0, _dst, flags, nonzero_rows))
3328
#endif
3329
3330
Mat src0 = _src0.getMat(), src = src0;
3331
bool inv = (flags & DFT_INVERSE) != 0;
3332
int type = src.type();
3333
int depth = src.depth();
3334
3335
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
3336
3337
// Fail if DFT_COMPLEX_INPUT is specified, but src is not 2 channels.
3338
CV_Assert( !((flags & DFT_COMPLEX_INPUT) && src.channels() != 2) );
3339
3340
if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
3341
_dst.create( src.size(), CV_MAKETYPE(depth, 2) );
3342
else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
3343
_dst.create( src.size(), depth );
3344
else
3345
_dst.create( src.size(), type );
3346
3347
Mat dst = _dst.getMat();
3348
3349
int f = 0;
3350
if (src.isContinuous() && dst.isContinuous())
3351
f |= CV_HAL_DFT_IS_CONTINUOUS;
3352
if (inv)
3353
f |= CV_HAL_DFT_INVERSE;
3354
if (flags & DFT_ROWS)
3355
f |= CV_HAL_DFT_ROWS;
3356
if (flags & DFT_SCALE)
3357
f |= CV_HAL_DFT_SCALE;
3358
if (src.data == dst.data)
3359
f |= CV_HAL_DFT_IS_INPLACE;
3360
Ptr<hal::DFT2D> c = hal::DFT2D::create(src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows);
3361
c->apply(src.data, src.step, dst.data, dst.step);
3362
}
3363
3364
3365
void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
3366
{
3367
CV_INSTRUMENT_REGION();
3368
3369
dft( src, dst, flags | DFT_INVERSE, nonzero_rows );
3370
}
3371
3372
#ifdef HAVE_OPENCL
3373
3374
namespace cv {
3375
3376
static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
3377
OutputArray _dst, int flags, bool conjB )
3378
{
3379
int atype = _srcA.type(), btype = _srcB.type(),
3380
rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
3381
Size asize = _srcA.size(), bsize = _srcB.size();
3382
CV_Assert(asize == bsize);
3383
3384
if ( !(atype == CV_32FC2 && btype == CV_32FC2) || flags != 0 )
3385
return false;
3386
3387
UMat A = _srcA.getUMat(), B = _srcB.getUMat();
3388
CV_Assert(A.size() == B.size());
3389
3390
_dst.create(A.size(), atype);
3391
UMat dst = _dst.getUMat();
3392
3393
ocl::Kernel k("mulAndScaleSpectrums",
3394
ocl::core::mulspectrums_oclsrc,
3395
format("%s", conjB ? "-D CONJ" : ""));
3396
if (k.empty())
3397
return false;
3398
3399
k.args(ocl::KernelArg::ReadOnlyNoSize(A), ocl::KernelArg::ReadOnlyNoSize(B),
3400
ocl::KernelArg::WriteOnly(dst), rowsPerWI);
3401
3402
size_t globalsize[2] = { (size_t)asize.width, ((size_t)asize.height + rowsPerWI - 1) / rowsPerWI };
3403
return k.run(2, globalsize, NULL, false);
3404
}
3405
3406
}
3407
3408
#endif
3409
3410
namespace {
3411
3412
#define VAL(buf, elem) (((T*)((char*)data ## buf + (step ## buf * (elem))))[0])
3413
#define MUL_SPECTRUMS_COL(A, B, C) \
3414
VAL(C, 0) = VAL(A, 0) * VAL(B, 0); \
3415
for (size_t j = 1; j <= rows - 2; j += 2) \
3416
{ \
3417
double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3418
double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3419
if (conjB) b_im = -b_im; \
3420
double c_re = a_re * b_re - a_im * b_im; \
3421
double c_im = a_re * b_im + a_im * b_re; \
3422
VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3423
} \
3424
if ((rows & 1) == 0) \
3425
VAL(C, rows-1) = VAL(A, rows-1) * VAL(B, rows-1)
3426
3427
template <typename T, bool conjB> static inline
3428
void mulSpectrums_processCol_noinplace(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3429
{
3430
MUL_SPECTRUMS_COL(A, B, C);
3431
}
3432
3433
template <typename T, bool conjB> static inline
3434
void mulSpectrums_processCol_inplaceA(const T* dataB, T* dataAC, size_t stepB, size_t stepAC, size_t rows)
3435
{
3436
MUL_SPECTRUMS_COL(AC, B, AC);
3437
}
3438
template <typename T, bool conjB, bool inplaceA> static inline
3439
void mulSpectrums_processCol(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows)
3440
{
3441
if (inplaceA)
3442
mulSpectrums_processCol_inplaceA<T, conjB>(dataB, dataC, stepB, stepC, rows);
3443
else
3444
mulSpectrums_processCol_noinplace<T, conjB>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3445
}
3446
#undef MUL_SPECTRUMS_COL
3447
#undef VAL
3448
3449
template <typename T, bool conjB, bool inplaceA> static inline
3450
void mulSpectrums_processCols(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols)
3451
{
3452
mulSpectrums_processCol<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows);
3453
if ((cols & 1) == 0)
3454
{
3455
mulSpectrums_processCol<T, conjB, inplaceA>(dataA + cols - 1, dataB + cols - 1, dataC + cols - 1, stepA, stepB, stepC, rows);
3456
}
3457
}
3458
3459
#define VAL(buf, elem) (data ## buf[(elem)])
3460
#define MUL_SPECTRUMS_ROW(A, B, C) \
3461
for (size_t j = j0; j < j1; j += 2) \
3462
{ \
3463
double a_re = VAL(A, j), a_im = VAL(A, j + 1); \
3464
double b_re = VAL(B, j), b_im = VAL(B, j + 1); \
3465
if (conjB) b_im = -b_im; \
3466
double c_re = a_re * b_re - a_im * b_im; \
3467
double c_im = a_re * b_im + a_im * b_re; \
3468
VAL(C, j) = (T)c_re; VAL(C, j + 1) = (T)c_im; \
3469
}
3470
template <typename T, bool conjB> static inline
3471
void mulSpectrums_processRow_noinplace(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3472
{
3473
MUL_SPECTRUMS_ROW(A, B, C);
3474
}
3475
template <typename T, bool conjB> static inline
3476
void mulSpectrums_processRow_inplaceA(const T* dataB, T* dataAC, size_t j0, size_t j1)
3477
{
3478
MUL_SPECTRUMS_ROW(AC, B, AC);
3479
}
3480
template <typename T, bool conjB, bool inplaceA> static inline
3481
void mulSpectrums_processRow(const T* dataA, const T* dataB, T* dataC, size_t j0, size_t j1)
3482
{
3483
if (inplaceA)
3484
mulSpectrums_processRow_inplaceA<T, conjB>(dataB, dataC, j0, j1);
3485
else
3486
mulSpectrums_processRow_noinplace<T, conjB>(dataA, dataB, dataC, j0, j1);
3487
}
3488
#undef MUL_SPECTRUMS_ROW
3489
#undef VAL
3490
3491
template <typename T, bool conjB, bool inplaceA> static inline
3492
void mulSpectrums_processRows(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d_CN1)
3493
{
3494
while (rows-- > 0)
3495
{
3496
if (is_1d_CN1)
3497
dataC[0] = dataA[0]*dataB[0];
3498
mulSpectrums_processRow<T, conjB, inplaceA>(dataA, dataB, dataC, j0, j1);
3499
if (is_1d_CN1 && (cols & 1) == 0)
3500
dataC[j1] = dataA[j1]*dataB[j1];
3501
3502
dataA = (const T*)(((char*)dataA) + stepA);
3503
dataB = (const T*)(((char*)dataB) + stepB);
3504
dataC = (T*)(((char*)dataC) + stepC);
3505
}
3506
}
3507
3508
3509
template <typename T, bool conjB, bool inplaceA> static inline
3510
void mulSpectrums_Impl_(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3511
{
3512
if (!is_1d && isCN1)
3513
{
3514
mulSpectrums_processCols<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols);
3515
}
3516
mulSpectrums_processRows<T, conjB, inplaceA>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d && isCN1);
3517
}
3518
template <typename T, bool conjB> static inline
3519
void mulSpectrums_Impl(const T* dataA, const T* dataB, T* dataC, size_t stepA, size_t stepB, size_t stepC, size_t rows, size_t cols, size_t j0, size_t j1, bool is_1d, bool isCN1)
3520
{
3521
if (dataA == dataC)
3522
mulSpectrums_Impl_<T, conjB, true>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3523
else
3524
mulSpectrums_Impl_<T, conjB, false>(dataA, dataB, dataC, stepA, stepB, stepC, rows, cols, j0, j1, is_1d, isCN1);
3525
}
3526
3527
} // namespace
3528
3529
void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
3530
OutputArray _dst, int flags, bool conjB )
3531
{
3532
CV_INSTRUMENT_REGION();
3533
3534
CV_OCL_RUN(_dst.isUMat() && _srcA.dims() <= 2 && _srcB.dims() <= 2,
3535
ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB))
3536
3537
Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
3538
int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
3539
size_t rows = srcA.rows, cols = srcA.cols;
3540
3541
CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
3542
CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
3543
3544
_dst.create( srcA.rows, srcA.cols, type );
3545
Mat dst = _dst.getMat();
3546
3547
// correct inplace support
3548
// Case 'dst.data == srcA.data' is handled by implementation,
3549
// because it is used frequently (filter2D, matchTemplate)
3550
if (dst.data == srcB.data)
3551
srcB = srcB.clone(); // workaround for B only
3552
3553
bool is_1d = (flags & DFT_ROWS)
3554
|| (rows == 1)
3555
|| (cols == 1 && srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous());
3556
3557
if( is_1d && !(flags & DFT_ROWS) )
3558
cols = cols + rows - 1, rows = 1;
3559
3560
bool isCN1 = cn == 1;
3561
size_t j0 = isCN1 ? 1 : 0;
3562
size_t j1 = cols*cn - (((cols & 1) == 0 && cn == 1) ? 1 : 0);
3563
3564
if (depth == CV_32F)
3565
{
3566
const float* dataA = srcA.ptr<float>();
3567
const float* dataB = srcB.ptr<float>();
3568
float* dataC = dst.ptr<float>();
3569
if (!conjB)
3570
mulSpectrums_Impl<float, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3571
else
3572
mulSpectrums_Impl<float, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3573
}
3574
else
3575
{
3576
const double* dataA = srcA.ptr<double>();
3577
const double* dataB = srcB.ptr<double>();
3578
double* dataC = dst.ptr<double>();
3579
if (!conjB)
3580
mulSpectrums_Impl<double, false>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3581
else
3582
mulSpectrums_Impl<double, true>(dataA, dataB, dataC, srcA.step, srcB.step, dst.step, rows, cols, j0, j1, is_1d, isCN1);
3583
}
3584
}
3585
3586
3587
/****************************************************************************************\
3588
Discrete Cosine Transform
3589
\****************************************************************************************/
3590
3591
namespace cv
3592
{
3593
3594
/* DCT is calculated using DFT, as described here:
3595
http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
3596
*/
3597
template<typename T> static void
3598
DCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3599
const Complex<T>* dct_wave )
3600
{
3601
static const T sin_45 = (T)0.70710678118654752440084436210485;
3602
3603
int n = c.n;
3604
int j, n2 = n >> 1;
3605
3606
src_step /= sizeof(src[0]);
3607
dst_step /= sizeof(dst[0]);
3608
T* dst1 = dst + (n-1)*dst_step;
3609
3610
if( n == 1 )
3611
{
3612
dst[0] = src[0];
3613
return;
3614
}
3615
3616
for( j = 0; j < n2; j++, src += src_step*2 )
3617
{
3618
dft_src[j] = src[0];
3619
dft_src[n-j-1] = src[src_step];
3620
}
3621
3622
RealDFT(c, dft_src, dft_dst);
3623
src = dft_dst;
3624
3625
dst[0] = (T)(src[0]*dct_wave->re*sin_45);
3626
dst += dst_step;
3627
for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3628
dst += dst_step, dst1 -= dst_step )
3629
{
3630
T t0 = dct_wave->re*src[j*2-1] - dct_wave->im*src[j*2];
3631
T t1 = -dct_wave->im*src[j*2-1] - dct_wave->re*src[j*2];
3632
dst[0] = t0;
3633
dst1[0] = t1;
3634
}
3635
3636
dst[0] = src[n-1]*dct_wave->re;
3637
}
3638
3639
3640
template<typename T> static void
3641
IDCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step,
3642
const Complex<T>* dct_wave)
3643
{
3644
static const T sin_45 = (T)0.70710678118654752440084436210485;
3645
int n = c.n;
3646
int j, n2 = n >> 1;
3647
3648
src_step /= sizeof(src[0]);
3649
dst_step /= sizeof(dst[0]);
3650
const T* src1 = src + (n-1)*src_step;
3651
3652
if( n == 1 )
3653
{
3654
dst[0] = src[0];
3655
return;
3656
}
3657
3658
dft_src[0] = (T)(src[0]*2*dct_wave->re*sin_45);
3659
src += src_step;
3660
for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
3661
src += src_step, src1 -= src_step )
3662
{
3663
T t0 = dct_wave->re*src[0] - dct_wave->im*src1[0];
3664
T t1 = -dct_wave->im*src[0] - dct_wave->re*src1[0];
3665
dft_src[j*2-1] = t0;
3666
dft_src[j*2] = t1;
3667
}
3668
3669
dft_src[n-1] = (T)(src[0]*2*dct_wave->re);
3670
CCSIDFT(c, dft_src, dft_dst);
3671
3672
for( j = 0; j < n2; j++, dst += dst_step*2 )
3673
{
3674
dst[0] = dft_dst[j];
3675
dst[dst_step] = dft_dst[n-j-1];
3676
}
3677
}
3678
3679
3680
static void
3681
DCTInit( int n, int elem_size, void* _wave, int inv )
3682
{
3683
static const double DctScale[] =
3684
{
3685
0.707106781186547570, 0.500000000000000000, 0.353553390593273790,
3686
0.250000000000000000, 0.176776695296636890, 0.125000000000000000,
3687
0.088388347648318447, 0.062500000000000000, 0.044194173824159223,
3688
0.031250000000000000, 0.022097086912079612, 0.015625000000000000,
3689
0.011048543456039806, 0.007812500000000000, 0.005524271728019903,
3690
0.003906250000000000, 0.002762135864009952, 0.001953125000000000,
3691
0.001381067932004976, 0.000976562500000000, 0.000690533966002488,
3692
0.000488281250000000, 0.000345266983001244, 0.000244140625000000,
3693
0.000172633491500622, 0.000122070312500000, 0.000086316745750311,
3694
0.000061035156250000, 0.000043158372875155, 0.000030517578125000
3695
};
3696
3697
int i;
3698
Complex<double> w, w1;
3699
double t, scale;
3700
3701
if( n == 1 )
3702
return;
3703
3704
assert( (n&1) == 0 );
3705
3706
if( (n & (n - 1)) == 0 )
3707
{
3708
int m;
3709
for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
3710
;
3711
scale = (!inv ? 2 : 1)*DctScale[m];
3712
w1.re = DFTTab[m+2][0];
3713
w1.im = -DFTTab[m+2][1];
3714
}
3715
else
3716
{
3717
t = 1./(2*n);
3718
scale = (!inv ? 2 : 1)*std::sqrt(t);
3719
w1.im = sin(-CV_PI*t);
3720
w1.re = std::sqrt(1. - w1.im*w1.im);
3721
}
3722
n >>= 1;
3723
3724
if( elem_size == sizeof(Complex<double>) )
3725
{
3726
Complex<double>* wave = (Complex<double>*)_wave;
3727
3728
w.re = scale;
3729
w.im = 0.;
3730
3731
for( i = 0; i <= n; i++ )
3732
{
3733
wave[i] = w;
3734
t = w.re*w1.re - w.im*w1.im;
3735
w.im = w.re*w1.im + w.im*w1.re;
3736
w.re = t;
3737
}
3738
}
3739
else
3740
{
3741
Complex<float>* wave = (Complex<float>*)_wave;
3742
assert( elem_size == sizeof(Complex<float>) );
3743
3744
w.re = (float)scale;
3745
w.im = 0.f;
3746
3747
for( i = 0; i <= n; i++ )
3748
{
3749
wave[i].re = (float)w.re;
3750
wave[i].im = (float)w.im;
3751
t = w.re*w1.re - w.im*w1.im;
3752
w.im = w.re*w1.im + w.im*w1.re;
3753
w.re = t;
3754
}
3755
}
3756
}
3757
3758
3759
typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, size_t src_step, void* dft_src,
3760
void* dft_dst, void* dst, size_t dst_step, const void* dct_wave);
3761
3762
static void DCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3763
float* dst, size_t dst_step, const Complexf* dct_wave)
3764
{
3765
DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3766
}
3767
3768
static void IDCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst,
3769
float* dst, size_t dst_step, const Complexf* dct_wave)
3770
{
3771
IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3772
}
3773
3774
static void DCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3775
double* dst, size_t dst_step, const Complexd* dct_wave)
3776
{
3777
DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3778
}
3779
3780
static void IDCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst,
3781
double* dst, size_t dst_step, const Complexd* dct_wave)
3782
{
3783
IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave);
3784
}
3785
3786
}
3787
3788
#ifdef HAVE_IPP
3789
namespace cv
3790
{
3791
3792
#if IPP_VERSION_X100 >= 900
3793
typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer);
3794
typedef IppStatus (CV_STDCALL * ippiDCTInit)(void* pDCTSpec, IppiSize roiSize, Ipp8u* pMemInit );
3795
typedef IppStatus (CV_STDCALL * ippiDCTGetSize)(IppiSize roiSize, int* pSizeSpec, int* pSizeInit, int* pSizeBuf);
3796
#elif IPP_VERSION_X100 >= 700
3797
typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*);
3798
typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
3799
typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
3800
typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
3801
#endif
3802
3803
class DctIPPLoop_Invoker : public ParallelLoopBody
3804
{
3805
public:
3806
DctIPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width, bool _inv, bool *_ok) :
3807
ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), inv(_inv), ok(_ok)
3808
{
3809
*ok = true;
3810
}
3811
3812
virtual void operator()(const Range& range) const CV_OVERRIDE
3813
{
3814
if(*ok == false)
3815
return;
3816
3817
#if IPP_VERSION_X100 >= 900
3818
IppiSize srcRoiSize = {width, 1};
3819
3820
int specSize = 0;
3821
int initSize = 0;
3822
int bufferSize = 0;
3823
3824
Ipp8u* pDCTSpec = NULL;
3825
Ipp8u* pBuffer = NULL;
3826
Ipp8u* pInitBuf = NULL;
3827
3828
#define IPP_RETURN \
3829
if(pDCTSpec) \
3830
ippFree(pDCTSpec); \
3831
if(pBuffer) \
3832
ippFree(pBuffer); \
3833
if(pInitBuf) \
3834
ippFree(pInitBuf); \
3835
return;
3836
3837
ippiDCTFunc ippiDCT_32f_C1R = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
3838
ippiDCTInit ippDctInit = inv ? (ippiDCTInit)ippiDCTInvInit_32f : (ippiDCTInit)ippiDCTFwdInit_32f;
3839
ippiDCTGetSize ippDctGetSize = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
3840
3841
if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
3842
{
3843
*ok = false;
3844
return;
3845
}
3846
3847
pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
3848
if(!pDCTSpec && specSize)
3849
{
3850
*ok = false;
3851
return;
3852
}
3853
3854
pBuffer = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
3855
if(!pBuffer && bufferSize)
3856
{
3857
*ok = false;
3858
IPP_RETURN
3859
}
3860
pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
3861
if(!pInitBuf && initSize)
3862
{
3863
*ok = false;
3864
IPP_RETURN
3865
}
3866
3867
if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
3868
{
3869
*ok = false;
3870
IPP_RETURN
3871
}
3872
3873
for(int i = range.start; i < range.end; ++i)
3874
{
3875
if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float*)(src + src_step * i), static_cast<int>(src_step), (float*)(dst + dst_step * i), static_cast<int>(dst_step), pDCTSpec, pBuffer) < 0)
3876
{
3877
*ok = false;
3878
IPP_RETURN
3879
}
3880
}
3881
IPP_RETURN
3882
#undef IPP_RETURN
3883
#elif IPP_VERSION_X100 >= 700
3884
void* pDCTSpec;
3885
AutoBuffer<uchar> buf;
3886
uchar* pBuffer = 0;
3887
int bufSize=0;
3888
3889
IppiSize srcRoiSize = {width, 1};
3890
3891
CV_SUPPRESS_DEPRECATED_START
3892
3893
ippiDCTFunc ippDctFun = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
3894
ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
3895
ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f;
3896
ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
3897
3898
if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
3899
{
3900
buf.allocate( bufSize );
3901
pBuffer = (uchar*)buf;
3902
3903
for( int i = range.start; i < range.end; ++i)
3904
{
3905
if(ippDctFun((float*)(src + src_step * i), static_cast<int>(src_step), (float*)(dst + dst_step * i), static_cast<int>(dst_step), pDCTSpec, (Ipp8u*)pBuffer) < 0)
3906
{
3907
*ok = false;
3908
break;
3909
}
3910
}
3911
}
3912
else
3913
*ok = false;
3914
3915
if (pDCTSpec)
3916
ippFree(pDCTSpec);
3917
3918
CV_SUPPRESS_DEPRECATED_END
3919
#else
3920
CV_UNUSED(range);
3921
*ok = false;
3922
#endif
3923
}
3924
3925
private:
3926
const uchar * src;
3927
size_t src_step;
3928
uchar * dst;
3929
size_t dst_step;
3930
int width;
3931
bool inv;
3932
bool *ok;
3933
};
3934
3935
static bool DctIPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv)
3936
{
3937
bool ok;
3938
parallel_for_(Range(0, height), DctIPPLoop_Invoker(src, src_step, dst, dst_step, width, inv, &ok), height/(double)(1<<4) );
3939
return ok;
3940
}
3941
3942
static bool ippi_DCT_32f(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, bool row)
3943
{
3944
CV_INSTRUMENT_REGION_IPP();
3945
3946
if(row)
3947
return DctIPPLoop(src, src_step, dst, dst_step, width, height, inv);
3948
else
3949
{
3950
#if IPP_VERSION_X100 >= 900
3951
IppiSize srcRoiSize = {width, height};
3952
3953
int specSize = 0;
3954
int initSize = 0;
3955
int bufferSize = 0;
3956
3957
Ipp8u* pDCTSpec = NULL;
3958
Ipp8u* pBuffer = NULL;
3959
Ipp8u* pInitBuf = NULL;
3960
3961
#define IPP_RELEASE \
3962
if(pDCTSpec) \
3963
ippFree(pDCTSpec); \
3964
if(pBuffer) \
3965
ippFree(pBuffer); \
3966
if(pInitBuf) \
3967
ippFree(pInitBuf); \
3968
3969
ippiDCTFunc ippiDCT_32f_C1R = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
3970
ippiDCTInit ippDctInit = inv ? (ippiDCTInit)ippiDCTInvInit_32f : (ippiDCTInit)ippiDCTFwdInit_32f;
3971
ippiDCTGetSize ippDctGetSize = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
3972
3973
if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
3974
return false;
3975
3976
pDCTSpec = (Ipp8u*)CV_IPP_MALLOC(specSize);
3977
if(!pDCTSpec && specSize)
3978
return false;
3979
3980
pBuffer = (Ipp8u*)CV_IPP_MALLOC(bufferSize);
3981
if(!pBuffer && bufferSize)
3982
{
3983
IPP_RELEASE
3984
return false;
3985
}
3986
pInitBuf = (Ipp8u*)CV_IPP_MALLOC(initSize);
3987
if(!pInitBuf && initSize)
3988
{
3989
IPP_RELEASE
3990
return false;
3991
}
3992
3993
if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
3994
{
3995
IPP_RELEASE
3996
return false;
3997
}
3998
3999
if(CV_INSTRUMENT_FUN_IPP(ippiDCT_32f_C1R, (float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDCTSpec, pBuffer) < 0)
4000
{
4001
IPP_RELEASE
4002
return false;
4003
}
4004
4005
IPP_RELEASE
4006
return true;
4007
#undef IPP_RELEASE
4008
#elif IPP_VERSION_X100 >= 700
4009
IppStatus status;
4010
void* pDCTSpec;
4011
AutoBuffer<uchar> buf;
4012
uchar* pBuffer = 0;
4013
int bufSize=0;
4014
4015
IppiSize srcRoiSize = {width, height};
4016
4017
CV_SUPPRESS_DEPRECATED_START
4018
4019
ippiDCTFunc ippDctFun = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
4020
ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
4021
ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f;
4022
ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
4023
4024
status = ippStsErr;
4025
4026
if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
4027
{
4028
buf.allocate( bufSize );
4029
pBuffer = (uchar*)buf;
4030
4031
status = ippDctFun((float*)src, static_cast<int>(src_step), (float*)dst, static_cast<int>(dst_step), pDCTSpec, (Ipp8u*)pBuffer);
4032
}
4033
4034
if (pDCTSpec)
4035
ippFree(pDCTSpec);
4036
4037
CV_SUPPRESS_DEPRECATED_END
4038
4039
return status >= 0;
4040
#else
4041
CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(inv); CV_UNUSED(row);
4042
return false;
4043
#endif
4044
}
4045
}
4046
}
4047
#endif
4048
4049
namespace cv {
4050
4051
class OcvDctImpl CV_FINAL : public hal::DCT2D
4052
{
4053
public:
4054
OcvDftOptions opt;
4055
4056
int _factors[34];
4057
AutoBuffer<uint> wave_buf;
4058
AutoBuffer<int> itab_buf;
4059
4060
DCTFunc dct_func;
4061
bool isRowTransform;
4062
bool isInverse;
4063
bool isContinuous;
4064
int start_stage;
4065
int end_stage;
4066
int width;
4067
int height;
4068
int depth;
4069
4070
void init(int _width, int _height, int _depth, int flags)
4071
{
4072
width = _width;
4073
height = _height;
4074
depth = _depth;
4075
isInverse = (flags & CV_HAL_DFT_INVERSE) != 0;
4076
isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0;
4077
isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0;
4078
static DCTFunc dct_tbl[4] =
4079
{
4080
(DCTFunc)DCT_32f,
4081
(DCTFunc)IDCT_32f,
4082
(DCTFunc)DCT_64f,
4083
(DCTFunc)IDCT_64f
4084
};
4085
dct_func = dct_tbl[(int)isInverse + (depth == CV_64F)*2];
4086
opt.nf = 0;
4087
opt.isComplex = false;
4088
opt.isInverse = false;
4089
opt.noPermute = false;
4090
opt.scale = 1.;
4091
opt.factors = _factors;
4092
4093
if (isRowTransform || height == 1 || (width == 1 && isContinuous))
4094
{
4095
start_stage = end_stage = 0;
4096
}
4097
else
4098
{
4099
start_stage = (width == 1);
4100
end_stage = 1;
4101
}
4102
}
4103
void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) CV_OVERRIDE
4104
{
4105
CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform))
4106
4107
AutoBuffer<uchar> dct_wave;
4108
AutoBuffer<uchar> src_buf, dst_buf;
4109
uchar *src_dft_buf = 0, *dst_dft_buf = 0;
4110
int prev_len = 0;
4111
int elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double);
4112
int complex_elem_size = elem_size*2;
4113
4114
for(int stage = start_stage ; stage <= end_stage; stage++ )
4115
{
4116
const uchar* sptr = src;
4117
uchar* dptr = dst;
4118
size_t sstep0, sstep1, dstep0, dstep1;
4119
int len, count;
4120
4121
if( stage == 0 )
4122
{
4123
len = width;
4124
count = height;
4125
if( len == 1 && !isRowTransform )
4126
{
4127
len = height;
4128
count = 1;
4129
}
4130
sstep0 = src_step;
4131
dstep0 = dst_step;
4132
sstep1 = dstep1 = elem_size;
4133
}
4134
else
4135
{
4136
len = height;
4137
count = width;
4138
sstep1 = src_step;
4139
dstep1 = dst_step;
4140
sstep0 = dstep0 = elem_size;
4141
}
4142
4143
opt.n = len;
4144
opt.tab_size = len;
4145
4146
if( len != prev_len )
4147
{
4148
if( len > 1 && (len & 1) )
4149
CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
4150
4151
opt.nf = DFTFactorize( len, opt.factors );
4152
bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1];
4153
4154
wave_buf.allocate(len*complex_elem_size);
4155
opt.wave = wave_buf.data();
4156
itab_buf.allocate(len);
4157
opt.itab = itab_buf.data();
4158
DFTInit( len, opt.nf, opt.factors, opt.itab, complex_elem_size, opt.wave, isInverse );
4159
4160
dct_wave.allocate((len/2 + 1)*complex_elem_size);
4161
src_buf.allocate(len*elem_size);
4162
src_dft_buf = src_buf.data();
4163
if(!inplace_transform)
4164
{
4165
dst_buf.allocate(len*elem_size);
4166
dst_dft_buf = dst_buf.data();
4167
}
4168
else
4169
{
4170
dst_dft_buf = src_buf.data();
4171
}
4172
DCTInit( len, complex_elem_size, dct_wave.data(), isInverse);
4173
prev_len = len;
4174
}
4175
// otherwise reuse the tables calculated on the previous stage
4176
for(unsigned i = 0; i < static_cast<unsigned>(count); i++ )
4177
{
4178
dct_func( opt, sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf,
4179
dptr + i*dstep0, dstep1, dct_wave.data());
4180
}
4181
src = dst;
4182
src_step = dst_step;
4183
}
4184
}
4185
};
4186
4187
struct ReplacementDCT2D : public hal::DCT2D
4188
{
4189
cvhalDFT *context;
4190
bool isInitialized;
4191
4192
ReplacementDCT2D() : context(0), isInitialized(false) {}
4193
bool init(int width, int height, int depth, int flags)
4194
{
4195
int res = hal_ni_dctInit2D(&context, width, height, depth, flags);
4196
isInitialized = (res == CV_HAL_ERROR_OK);
4197
return isInitialized;
4198
}
4199
void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) CV_OVERRIDE
4200
{
4201
if (isInitialized)
4202
{
4203
CALL_HAL(dct2D, cv_hal_dct2D, context, src_data, src_step, dst_data, dst_step);
4204
}
4205
}
4206
~ReplacementDCT2D()
4207
{
4208
if (isInitialized)
4209
{
4210
CALL_HAL(dctFree2D, cv_hal_dctFree2D, context);
4211
}
4212
}
4213
};
4214
4215
namespace hal {
4216
4217
Ptr<DCT2D> DCT2D::create(int width, int height, int depth, int flags)
4218
{
4219
{
4220
ReplacementDCT2D *impl = new ReplacementDCT2D();
4221
if (impl->init(width, height, depth, flags))
4222
{
4223
return Ptr<DCT2D>(impl);
4224
}
4225
delete impl;
4226
}
4227
{
4228
OcvDctImpl *impl = new OcvDctImpl();
4229
impl->init(width, height, depth, flags);
4230
return Ptr<DCT2D>(impl);
4231
}
4232
}
4233
4234
} // cv::hal::
4235
} // cv::
4236
4237
void cv::dct( InputArray _src0, OutputArray _dst, int flags )
4238
{
4239
CV_INSTRUMENT_REGION();
4240
4241
Mat src0 = _src0.getMat(), src = src0;
4242
int type = src.type(), depth = src.depth();
4243
4244
CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
4245
_dst.create( src.rows, src.cols, type );
4246
Mat dst = _dst.getMat();
4247
4248
int f = 0;
4249
if ((flags & DFT_ROWS) != 0)
4250
f |= CV_HAL_DFT_ROWS;
4251
if ((flags & DCT_INVERSE) != 0)
4252
f |= CV_HAL_DFT_INVERSE;
4253
if (src.isContinuous() && dst.isContinuous())
4254
f |= CV_HAL_DFT_IS_CONTINUOUS;
4255
4256
Ptr<hal::DCT2D> c = hal::DCT2D::create(src.cols, src.rows, depth, f);
4257
c->apply(src.data, src.step, dst.data, dst.step);
4258
}
4259
4260
4261
void cv::idct( InputArray src, OutputArray dst, int flags )
4262
{
4263
CV_INSTRUMENT_REGION();
4264
4265
dct( src, dst, flags | DCT_INVERSE );
4266
}
4267
4268
namespace cv
4269
{
4270
4271
static const int optimalDFTSizeTab[] = {
4272
1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40, 45, 48,
4273
50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160,
4274
162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375,
4275
384, 400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720,
4276
729, 750, 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, 1200,
4277
1215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875,
4278
1920, 1944, 2000, 2025, 2048, 2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592,
4279
2700, 2880, 2916, 3000, 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840,
4280
3888, 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400,
4281
5625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290,
4282
7500, 7680, 7776, 8000, 8100, 8192, 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000,
4283
10125, 10240, 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, 12500,
4284
12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, 15360, 15552, 15625, 16000,
4285
16200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, 19440, 19683,
4286
20000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576,
4287
25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720,
4288
31104, 31250, 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864,
4289
37500, 38400, 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080,
4290
46656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, 51840, 52488, 54000, 54675,
4291
55296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, 62208, 62500, 64000, 64800,
4292
65536, 65610, 67500, 69120, 69984, 72000, 72900, 73728, 75000, 76800, 77760, 78125,
4293
78732, 80000, 81000, 81920, 82944, 84375, 86400, 87480, 90000, 91125, 92160, 93312,
4294
93750, 96000, 97200, 98304, 98415, 100000, 101250, 102400, 103680, 104976, 108000,
4295
109350, 110592, 112500, 115200, 116640, 118098, 120000, 121500, 122880, 124416, 125000,
4296
128000, 129600, 131072, 131220, 135000, 138240, 139968, 140625, 144000, 145800, 147456,
4297
150000, 151875, 153600, 155520, 156250, 157464, 160000, 162000, 163840, 164025, 165888,
4298
168750, 172800, 174960, 177147, 180000, 182250, 184320, 186624, 187500, 192000, 194400,
4299
196608, 196830, 200000, 202500, 204800, 207360, 209952, 216000, 218700, 221184, 225000,
4300
230400, 233280, 234375, 236196, 240000, 243000, 245760, 248832, 250000, 253125, 256000,
4301
259200, 262144, 262440, 270000, 273375, 276480, 279936, 281250, 288000, 291600, 294912,
4302
295245, 300000, 303750, 307200, 311040, 312500, 314928, 320000, 324000, 327680, 328050,
4303
331776, 337500, 345600, 349920, 354294, 360000, 364500, 368640, 373248, 375000, 384000,
4304
388800, 390625, 393216, 393660, 400000, 405000, 409600, 414720, 419904, 421875, 432000,
4305
437400, 442368, 450000, 455625, 460800, 466560, 468750, 472392, 480000, 486000, 491520,
4306
492075, 497664, 500000, 506250, 512000, 518400, 524288, 524880, 531441, 540000, 546750,
4307
552960, 559872, 562500, 576000, 583200, 589824, 590490, 600000, 607500, 614400, 622080,
4308
625000, 629856, 640000, 648000, 655360, 656100, 663552, 675000, 691200, 699840, 703125,
4309
708588, 720000, 729000, 737280, 746496, 750000, 759375, 768000, 777600, 781250, 786432,
4310
787320, 800000, 810000, 819200, 820125, 829440, 839808, 843750, 864000, 874800, 884736,
4311
885735, 900000, 911250, 921600, 933120, 937500, 944784, 960000, 972000, 983040, 984150,
4312
995328, 1000000, 1012500, 1024000, 1036800, 1048576, 1049760, 1062882, 1080000, 1093500,
4313
1105920, 1119744, 1125000, 1152000, 1166400, 1171875, 1179648, 1180980, 1200000,
4314
1215000, 1228800, 1244160, 1250000, 1259712, 1265625, 1280000, 1296000, 1310720,
4315
1312200, 1327104, 1350000, 1366875, 1382400, 1399680, 1406250, 1417176, 1440000,
4316
1458000, 1474560, 1476225, 1492992, 1500000, 1518750, 1536000, 1555200, 1562500,
4317
1572864, 1574640, 1594323, 1600000, 1620000, 1638400, 1640250, 1658880, 1679616,
4318
1687500, 1728000, 1749600, 1769472, 1771470, 1800000, 1822500, 1843200, 1866240,
4319
1875000, 1889568, 1920000, 1944000, 1953125, 1966080, 1968300, 1990656, 2000000,
4320
2025000, 2048000, 2073600, 2097152, 2099520, 2109375, 2125764, 2160000, 2187000,
4321
2211840, 2239488, 2250000, 2278125, 2304000, 2332800, 2343750, 2359296, 2361960,
4322
2400000, 2430000, 2457600, 2460375, 2488320, 2500000, 2519424, 2531250, 2560000,
4323
2592000, 2621440, 2624400, 2654208, 2657205, 2700000, 2733750, 2764800, 2799360,
4324
2812500, 2834352, 2880000, 2916000, 2949120, 2952450, 2985984, 3000000, 3037500,
4325
3072000, 3110400, 3125000, 3145728, 3149280, 3188646, 3200000, 3240000, 3276800,
4326
3280500, 3317760, 3359232, 3375000, 3456000, 3499200, 3515625, 3538944, 3542940,
4327
3600000, 3645000, 3686400, 3732480, 3750000, 3779136, 3796875, 3840000, 3888000,
4328
3906250, 3932160, 3936600, 3981312, 4000000, 4050000, 4096000, 4100625, 4147200,
4329
4194304, 4199040, 4218750, 4251528, 4320000, 4374000, 4423680, 4428675, 4478976,
4330
4500000, 4556250, 4608000, 4665600, 4687500, 4718592, 4723920, 4782969, 4800000,
4331
4860000, 4915200, 4920750, 4976640, 5000000, 5038848, 5062500, 5120000, 5184000,
4332
5242880, 5248800, 5308416, 5314410, 5400000, 5467500, 5529600, 5598720, 5625000,
4333
5668704, 5760000, 5832000, 5859375, 5898240, 5904900, 5971968, 6000000, 6075000,
4334
6144000, 6220800, 6250000, 6291456, 6298560, 6328125, 6377292, 6400000, 6480000,
4335
6553600, 6561000, 6635520, 6718464, 6750000, 6834375, 6912000, 6998400, 7031250,
4336
7077888, 7085880, 7200000, 7290000, 7372800, 7381125, 7464960, 7500000, 7558272,
4337
7593750, 7680000, 7776000, 7812500, 7864320, 7873200, 7962624, 7971615, 8000000,
4338
8100000, 8192000, 8201250, 8294400, 8388608, 8398080, 8437500, 8503056, 8640000,
4339
8748000, 8847360, 8857350, 8957952, 9000000, 9112500, 9216000, 9331200, 9375000,
4340
9437184, 9447840, 9565938, 9600000, 9720000, 9765625, 9830400, 9841500, 9953280,
4341
10000000, 10077696, 10125000, 10240000, 10368000, 10485760, 10497600, 10546875, 10616832,
4342
10628820, 10800000, 10935000, 11059200, 11197440, 11250000, 11337408, 11390625, 11520000,
4343
11664000, 11718750, 11796480, 11809800, 11943936, 12000000, 12150000, 12288000, 12301875,
4344
12441600, 12500000, 12582912, 12597120, 12656250, 12754584, 12800000, 12960000, 13107200,
4345
13122000, 13271040, 13286025, 13436928, 13500000, 13668750, 13824000, 13996800, 14062500,
4346
14155776, 14171760, 14400000, 14580000, 14745600, 14762250, 14929920, 15000000, 15116544,
4347
15187500, 15360000, 15552000, 15625000, 15728640, 15746400, 15925248, 15943230, 16000000,
4348
16200000, 16384000, 16402500, 16588800, 16777216, 16796160, 16875000, 17006112, 17280000,
4349
17496000, 17578125, 17694720, 17714700, 17915904, 18000000, 18225000, 18432000, 18662400,
4350
18750000, 18874368, 18895680, 18984375, 19131876, 19200000, 19440000, 19531250, 19660800,
4351
19683000, 19906560, 20000000, 20155392, 20250000, 20480000, 20503125, 20736000, 20971520,
4352
20995200, 21093750, 21233664, 21257640, 21600000, 21870000, 22118400, 22143375, 22394880,
4353
22500000, 22674816, 22781250, 23040000, 23328000, 23437500, 23592960, 23619600, 23887872,
4354
23914845, 24000000, 24300000, 24576000, 24603750, 24883200, 25000000, 25165824, 25194240,
4355
25312500, 25509168, 25600000, 25920000, 26214400, 26244000, 26542080, 26572050, 26873856,
4356
27000000, 27337500, 27648000, 27993600, 28125000, 28311552, 28343520, 28800000, 29160000,
4357
29296875, 29491200, 29524500, 29859840, 30000000, 30233088, 30375000, 30720000, 31104000,
4358
31250000, 31457280, 31492800, 31640625, 31850496, 31886460, 32000000, 32400000, 32768000,
4359
32805000, 33177600, 33554432, 33592320, 33750000, 34012224, 34171875, 34560000, 34992000,
4360
35156250, 35389440, 35429400, 35831808, 36000000, 36450000, 36864000, 36905625, 37324800,
4361
37500000, 37748736, 37791360, 37968750, 38263752, 38400000, 38880000, 39062500, 39321600,
4362
39366000, 39813120, 39858075, 40000000, 40310784, 40500000, 40960000, 41006250, 41472000,
4363
41943040, 41990400, 42187500, 42467328, 42515280, 43200000, 43740000, 44236800, 44286750,
4364
44789760, 45000000, 45349632, 45562500, 46080000, 46656000, 46875000, 47185920, 47239200,
4365
47775744, 47829690, 48000000, 48600000, 48828125, 49152000, 49207500, 49766400, 50000000,
4366
50331648, 50388480, 50625000, 51018336, 51200000, 51840000, 52428800, 52488000, 52734375,
4367
53084160, 53144100, 53747712, 54000000, 54675000, 55296000, 55987200, 56250000, 56623104,
4368
56687040, 56953125, 57600000, 58320000, 58593750, 58982400, 59049000, 59719680, 60000000,
4369
60466176, 60750000, 61440000, 61509375, 62208000, 62500000, 62914560, 62985600, 63281250,
4370
63700992, 63772920, 64000000, 64800000, 65536000, 65610000, 66355200, 66430125, 67108864,
4371
67184640, 67500000, 68024448, 68343750, 69120000, 69984000, 70312500, 70778880, 70858800,
4372
71663616, 72000000, 72900000, 73728000, 73811250, 74649600, 75000000, 75497472, 75582720,
4373
75937500, 76527504, 76800000, 77760000, 78125000, 78643200, 78732000, 79626240, 79716150,
4374
80000000, 80621568, 81000000, 81920000, 82012500, 82944000, 83886080, 83980800, 84375000,
4375
84934656, 85030560, 86400000, 87480000, 87890625, 88473600, 88573500, 89579520, 90000000,
4376
90699264, 91125000, 92160000, 93312000, 93750000, 94371840, 94478400, 94921875, 95551488,
4377
95659380, 96000000, 97200000, 97656250, 98304000, 98415000, 99532800, 100000000,
4378
100663296, 100776960, 101250000, 102036672, 102400000, 102515625, 103680000, 104857600,
4379
104976000, 105468750, 106168320, 106288200, 107495424, 108000000, 109350000, 110592000,
4380
110716875, 111974400, 112500000, 113246208, 113374080, 113906250, 115200000, 116640000,
4381
117187500, 117964800, 118098000, 119439360, 119574225, 120000000, 120932352, 121500000,
4382
122880000, 123018750, 124416000, 125000000, 125829120, 125971200, 126562500, 127401984,
4383
127545840, 128000000, 129600000, 131072000, 131220000, 132710400, 132860250, 134217728,
4384
134369280, 135000000, 136048896, 136687500, 138240000, 139968000, 140625000, 141557760,
4385
141717600, 143327232, 144000000, 145800000, 146484375, 147456000, 147622500, 149299200,
4386
150000000, 150994944, 151165440, 151875000, 153055008, 153600000, 155520000, 156250000,
4387
157286400, 157464000, 158203125, 159252480, 159432300, 160000000, 161243136, 162000000,
4388
163840000, 164025000, 165888000, 167772160, 167961600, 168750000, 169869312, 170061120,
4389
170859375, 172800000, 174960000, 175781250, 176947200, 177147000, 179159040, 180000000,
4390
181398528, 182250000, 184320000, 184528125, 186624000, 187500000, 188743680, 188956800,
4391
189843750, 191102976, 191318760, 192000000, 194400000, 195312500, 196608000, 196830000,
4392
199065600, 199290375, 200000000, 201326592, 201553920, 202500000, 204073344, 204800000,
4393
205031250, 207360000, 209715200, 209952000, 210937500, 212336640, 212576400, 214990848,
4394
216000000, 218700000, 221184000, 221433750, 223948800, 225000000, 226492416, 226748160,
4395
227812500, 230400000, 233280000, 234375000, 235929600, 236196000, 238878720, 239148450,
4396
240000000, 241864704, 243000000, 244140625, 245760000, 246037500, 248832000, 250000000,
4397
251658240, 251942400, 253125000, 254803968, 255091680, 256000000, 259200000, 262144000,
4398
262440000, 263671875, 265420800, 265720500, 268435456, 268738560, 270000000, 272097792,
4399
273375000, 276480000, 279936000, 281250000, 283115520, 283435200, 284765625, 286654464,
4400
288000000, 291600000, 292968750, 294912000, 295245000, 298598400, 300000000, 301989888,
4401
302330880, 303750000, 306110016, 307200000, 307546875, 311040000, 312500000, 314572800,
4402
314928000, 316406250, 318504960, 318864600, 320000000, 322486272, 324000000, 327680000,
4403
328050000, 331776000, 332150625, 335544320, 335923200, 337500000, 339738624, 340122240,
4404
341718750, 345600000, 349920000, 351562500, 353894400, 354294000, 358318080, 360000000,
4405
362797056, 364500000, 368640000, 369056250, 373248000, 375000000, 377487360, 377913600,
4406
379687500, 382205952, 382637520, 384000000, 388800000, 390625000, 393216000, 393660000,
4407
398131200, 398580750, 400000000, 402653184, 403107840, 405000000, 408146688, 409600000,
4408
410062500, 414720000, 419430400, 419904000, 421875000, 424673280, 425152800, 429981696,
4409
432000000, 437400000, 439453125, 442368000, 442867500, 447897600, 450000000, 452984832,
4410
453496320, 455625000, 460800000, 466560000, 468750000, 471859200, 472392000, 474609375,
4411
477757440, 478296900, 480000000, 483729408, 486000000, 488281250, 491520000, 492075000,
4412
497664000, 500000000, 503316480, 503884800, 506250000, 509607936, 510183360, 512000000,
4413
512578125, 518400000, 524288000, 524880000, 527343750, 530841600, 531441000, 536870912,
4414
537477120, 540000000, 544195584, 546750000, 552960000, 553584375, 559872000, 562500000,
4415
566231040, 566870400, 569531250, 573308928, 576000000, 583200000, 585937500, 589824000,
4416
590490000, 597196800, 597871125, 600000000, 603979776, 604661760, 607500000, 612220032,
4417
614400000, 615093750, 622080000, 625000000, 629145600, 629856000, 632812500, 637009920,
4418
637729200, 640000000, 644972544, 648000000, 655360000, 656100000, 663552000, 664301250,
4419
671088640, 671846400, 675000000, 679477248, 680244480, 683437500, 691200000, 699840000,
4420
703125000, 707788800, 708588000, 716636160, 720000000, 725594112, 729000000, 732421875,
4421
737280000, 738112500, 746496000, 750000000, 754974720, 755827200, 759375000, 764411904,
4422
765275040, 768000000, 777600000, 781250000, 786432000, 787320000, 791015625, 796262400,
4423
797161500, 800000000, 805306368, 806215680, 810000000, 816293376, 819200000, 820125000,
4424
829440000, 838860800, 839808000, 843750000, 849346560, 850305600, 854296875, 859963392,
4425
864000000, 874800000, 878906250, 884736000, 885735000, 895795200, 900000000, 905969664,
4426
906992640, 911250000, 921600000, 922640625, 933120000, 937500000, 943718400, 944784000,
4427
949218750, 955514880, 956593800, 960000000, 967458816, 972000000, 976562500, 983040000,
4428
984150000, 995328000, 996451875, 1000000000, 1006632960, 1007769600, 1012500000,
4429
1019215872, 1020366720, 1024000000, 1025156250, 1036800000, 1048576000, 1049760000,
4430
1054687500, 1061683200, 1062882000, 1073741824, 1074954240, 1080000000, 1088391168,
4431
1093500000, 1105920000, 1107168750, 1119744000, 1125000000, 1132462080, 1133740800,
4432
1139062500, 1146617856, 1152000000, 1166400000, 1171875000, 1179648000, 1180980000,
4433
1194393600, 1195742250, 1200000000, 1207959552, 1209323520, 1215000000, 1220703125,
4434
1224440064, 1228800000, 1230187500, 1244160000, 1250000000, 1258291200, 1259712000,
4435
1265625000, 1274019840, 1275458400, 1280000000, 1289945088, 1296000000, 1310720000,
4436
1312200000, 1318359375, 1327104000, 1328602500, 1342177280, 1343692800, 1350000000,
4437
1358954496, 1360488960, 1366875000, 1382400000, 1399680000, 1406250000, 1415577600,
4438
1417176000, 1423828125, 1433272320, 1440000000, 1451188224, 1458000000, 1464843750,
4439
1474560000, 1476225000, 1492992000, 1500000000, 1509949440, 1511654400, 1518750000,
4440
1528823808, 1530550080, 1536000000, 1537734375, 1555200000, 1562500000, 1572864000,
4441
1574640000, 1582031250, 1592524800, 1594323000, 1600000000, 1610612736, 1612431360,
4442
1620000000, 1632586752, 1638400000, 1640250000, 1658880000, 1660753125, 1677721600,
4443
1679616000, 1687500000, 1698693120, 1700611200, 1708593750, 1719926784, 1728000000,
4444
1749600000, 1757812500, 1769472000, 1771470000, 1791590400, 1800000000, 1811939328,
4445
1813985280, 1822500000, 1843200000, 1845281250, 1866240000, 1875000000, 1887436800,
4446
1889568000, 1898437500, 1911029760, 1913187600, 1920000000, 1934917632, 1944000000,
4447
1953125000, 1966080000, 1968300000, 1990656000, 1992903750, 2000000000, 2013265920,
4448
2015539200, 2025000000, 2038431744, 2040733440, 2048000000, 2050312500, 2073600000,
4449
2097152000, 2099520000, 2109375000, 2123366400, 2125764000
4450
};
4451
4452
}
4453
4454
int cv::getOptimalDFTSize( int size0 )
4455
{
4456
int a = 0, b = sizeof(optimalDFTSizeTab)/sizeof(optimalDFTSizeTab[0]) - 1;
4457
if( (unsigned)size0 >= (unsigned)optimalDFTSizeTab[b] )
4458
return -1;
4459
4460
while( a < b )
4461
{
4462
int c = (a + b) >> 1;
4463
if( size0 <= optimalDFTSizeTab[c] )
4464
b = c;
4465
else
4466
a = c+1;
4467
}
4468
4469
return optimalDFTSizeTab[b];
4470
}
4471
4472
CV_IMPL void
4473
cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
4474
{
4475
cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
4476
int _flags = ((flags & CV_DXT_INVERSE) ? cv::DFT_INVERSE : 0) |
4477
((flags & CV_DXT_SCALE) ? cv::DFT_SCALE : 0) |
4478
((flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0);
4479
4480
CV_Assert( src.size == dst.size );
4481
4482
if( src.type() != dst.type() )
4483
{
4484
if( dst.channels() == 2 )
4485
_flags |= cv::DFT_COMPLEX_OUTPUT;
4486
else
4487
_flags |= cv::DFT_REAL_OUTPUT;
4488
}
4489
4490
cv::dft( src, dst, _flags, nonzero_rows );
4491
CV_Assert( dst.data == dst0.data ); // otherwise it means that the destination size or type was incorrect
4492
}
4493
4494
4495
CV_IMPL void
4496
cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr,
4497
CvArr* dstarr, int flags )
4498
{
4499
cv::Mat srcA = cv::cvarrToMat(srcAarr),
4500
srcB = cv::cvarrToMat(srcBarr),
4501
dst = cv::cvarrToMat(dstarr);
4502
CV_Assert( srcA.size == dst.size && srcA.type() == dst.type() );
4503
4504
cv::mulSpectrums(srcA, srcB, dst,
4505
(flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0,
4506
(flags & CV_DXT_MUL_CONJ) != 0 );
4507
}
4508
4509
4510
CV_IMPL void
4511
cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags )
4512
{
4513
cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
4514
CV_Assert( src.size == dst.size && src.type() == dst.type() );
4515
int _flags = ((flags & CV_DXT_INVERSE) ? cv::DCT_INVERSE : 0) |
4516
((flags & CV_DXT_ROWS) ? cv::DCT_ROWS : 0);
4517
cv::dct( src, dst, _flags );
4518
}
4519
4520
4521
CV_IMPL int
4522
cvGetOptimalDFTSize( int size0 )
4523
{
4524
return cv::getOptimalDFTSize(size0);
4525
}
4526
4527
/* End of file. */
4528
4529