the do / gr-peach-opencv-project

Fork of gr-peach-opencv-project by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers dxt.cpp Source File

dxt.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                        Intel License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000, Intel Corporation, all rights reserved.
00014 // Third party copyrights are property of their respective owners.
00015 //
00016 // Redistribution and use in source and binary forms, with or without modification,
00017 // are permitted provided that the following conditions are met:
00018 //
00019 //   * Redistribution's of source code must retain the above copyright notice,
00020 //     this list of conditions and the following disclaimer.
00021 //
00022 //   * Redistribution's in binary form must reproduce the above copyright notice,
00023 //     this list of conditions and the following disclaimer in the documentation
00024 //     and/or other materials provided with the distribution.
00025 //
00026 //   * The name of Intel Corporation may not be used to endorse or promote products
00027 //     derived from this software without specific prior written permission.
00028 //
00029 // This software is provided by the copyright holders and contributors "as is" and
00030 // any express or implied warranties, including, but not limited to, the implied
00031 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00032 // In no event shall the Intel Corporation or contributors be liable for any direct,
00033 // indirect, incidental, special, exemplary, or consequential damages
00034 // (including, but not limited to, procurement of substitute goods or services;
00035 // loss of use, data, or profits; or business interruption) however caused
00036 // and on any theory of liability, whether in contract, strict liability,
00037 // or tort (including negligence or otherwise) arising in any way out of
00038 // the use of this software, even if advised of the possibility of such damage.
00039 //
00040 //M*/
00041 
00042 #include "precomp.hpp"
00043 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
00044 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
00045 #include "opencl_kernels_core.hpp"
00046 #include <map>
00047 
00048 namespace cv
00049 {
00050 
00051 // On Win64 optimized versions of DFT and DCT fail the tests (fixed in VS2010)
00052 #if defined _MSC_VER && !defined CV_ICC && defined _M_X64 && _MSC_VER < 1600
00053 # pragma optimize("", off)
00054 # pragma warning(disable: 4748)
00055 #endif
00056 
00057 #if IPP_VERSION_X100 >= 710
00058 #define USE_IPP_DFT 1
00059 #else
00060 #undef USE_IPP_DFT
00061 #endif
00062 
00063 /****************************************************************************************\
00064                                Discrete Fourier Transform
00065 \****************************************************************************************/
00066 
00067 #define CV_MAX_LOCAL_DFT_SIZE  (1 << 15)
00068 
00069 static unsigned char bitrevTab[] =
00070 {
00071   0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
00072   0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8,
00073   0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4,
00074   0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc,
00075   0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2,
00076   0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa,
00077   0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6,
00078   0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe,
00079   0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1,
00080   0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9,
00081   0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5,
00082   0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd,
00083   0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3,
00084   0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb,
00085   0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7,
00086   0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
00087 };
00088 
00089 static const double DFTTab[][2] =
00090 {
00091 { 1.00000000000000000, 0.00000000000000000 },
00092 {-1.00000000000000000, 0.00000000000000000 },
00093 { 0.00000000000000000, 1.00000000000000000 },
00094 { 0.70710678118654757, 0.70710678118654746 },
00095 { 0.92387953251128674, 0.38268343236508978 },
00096 { 0.98078528040323043, 0.19509032201612825 },
00097 { 0.99518472667219693, 0.09801714032956060 },
00098 { 0.99879545620517241, 0.04906767432741802 },
00099 { 0.99969881869620425, 0.02454122852291229 },
00100 { 0.99992470183914450, 0.01227153828571993 },
00101 { 0.99998117528260111, 0.00613588464915448 },
00102 { 0.99999529380957619, 0.00306795676296598 },
00103 { 0.99999882345170188, 0.00153398018628477 },
00104 { 0.99999970586288223, 0.00076699031874270 },
00105 { 0.99999992646571789, 0.00038349518757140 },
00106 { 0.99999998161642933, 0.00019174759731070 },
00107 { 0.99999999540410733, 0.00009587379909598 },
00108 { 0.99999999885102686, 0.00004793689960307 },
00109 { 0.99999999971275666, 0.00002396844980842 },
00110 { 0.99999999992818922, 0.00001198422490507 },
00111 { 0.99999999998204725, 0.00000599211245264 },
00112 { 0.99999999999551181, 0.00000299605622633 },
00113 { 0.99999999999887801, 0.00000149802811317 },
00114 { 0.99999999999971945, 0.00000074901405658 },
00115 { 0.99999999999992983, 0.00000037450702829 },
00116 { 0.99999999999998246, 0.00000018725351415 },
00117 { 0.99999999999999567, 0.00000009362675707 },
00118 { 0.99999999999999889, 0.00000004681337854 },
00119 { 0.99999999999999978, 0.00000002340668927 },
00120 { 0.99999999999999989, 0.00000001170334463 },
00121 { 1.00000000000000000, 0.00000000585167232 },
00122 { 1.00000000000000000, 0.00000000292583616 }
00123 };
00124 
00125 #define BitRev(i,shift) \
00126    ((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \
00127            ((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \
00128            ((unsigned)bitrevTab[((i)>>16)&255] <<  8)+ \
00129            ((unsigned)bitrevTab[((i)>>24)])) >> (shift)))
00130 
00131 static int
00132 DFTFactorize( int n, int* factors )
00133 {
00134     int nf = 0, f, i, j;
00135 
00136     if( n <= 5 )
00137     {
00138         factors[0] = n;
00139         return 1;
00140     }
00141 
00142     f = (((n - 1)^n)+1) >> 1;
00143     if( f > 1 )
00144     {
00145         factors[nf++] = f;
00146         n = f == n ? 1 : n/f;
00147     }
00148 
00149     for( f = 3; n > 1; )
00150     {
00151         int d = n/f;
00152         if( d*f == n )
00153         {
00154             factors[nf++] = f;
00155             n = d;
00156         }
00157         else
00158         {
00159             f += 2;
00160             if( f*f > n )
00161                 break;
00162         }
00163     }
00164 
00165     if( n > 1 )
00166         factors[nf++] = n;
00167 
00168     f = (factors[0] & 1) == 0;
00169     for( i = f; i < (nf+f)/2; i++ )
00170         CV_SWAP( factors[i], factors[nf-i-1+f], j );
00171 
00172     return nf;
00173 }
00174 
00175 static void
00176 DFTInit( int n0, int nf, int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
00177 {
00178     int digits[34], radix[34];
00179     int n = factors[0], m = 0;
00180     int* itab0 = itab;
00181     int i, j, k;
00182     Complex<double> w, w1;
00183     double t;
00184 
00185     if( n0 <= 5 )
00186     {
00187         itab[0] = 0;
00188         itab[n0-1] = n0-1;
00189 
00190         if( n0 != 4 )
00191         {
00192             for( i = 1; i < n0-1; i++ )
00193                 itab[i] = i;
00194         }
00195         else
00196         {
00197             itab[1] = 2;
00198             itab[2] = 1;
00199         }
00200         if( n0 == 5 )
00201         {
00202             if( elem_size == sizeof(Complex<double>) )
00203                 ((Complex<double>*)_wave)[0] = Complex<double>(1.,0.);
00204             else
00205                 ((Complex<float>*)_wave)[0] = Complex<float>(1.f,0.f);
00206         }
00207         if( n0 != 4 )
00208             return;
00209         m = 2;
00210     }
00211     else
00212     {
00213         // radix[] is initialized from index 'nf' down to zero
00214         assert (nf < 34);
00215         radix[nf] = 1;
00216         digits[nf] = 0;
00217         for( i = 0; i < nf; i++ )
00218         {
00219             digits[i] = 0;
00220             radix[nf-i-1] = radix[nf-i]*factors[nf-i-1];
00221         }
00222 
00223         if( inv_itab && factors[0] != factors[nf-1] )
00224             itab = (int*)_wave;
00225 
00226         if( (n & 1) == 0 )
00227         {
00228             int a = radix[1], na2 = n*a>>1, na4 = na2 >> 1;
00229             for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
00230                 ;
00231             if( n <= 2  )
00232             {
00233                 itab[0] = 0;
00234                 itab[1] = na2;
00235             }
00236             else if( n <= 256 )
00237             {
00238                 int shift = 10 - m;
00239                 for( i = 0; i <= n - 4; i += 4 )
00240                 {
00241                     j = (bitrevTab[i>>2]>>shift)*a;
00242                     itab[i] = j;
00243                     itab[i+1] = j + na2;
00244                     itab[i+2] = j + na4;
00245                     itab[i+3] = j + na2 + na4;
00246                 }
00247             }
00248             else
00249             {
00250                 int shift = 34 - m;
00251                 for( i = 0; i < n; i += 4 )
00252                 {
00253                     int i4 = i >> 2;
00254                     j = BitRev(i4,shift)*a;
00255                     itab[i] = j;
00256                     itab[i+1] = j + na2;
00257                     itab[i+2] = j + na4;
00258                     itab[i+3] = j + na2 + na4;
00259                 }
00260             }
00261 
00262             digits[1]++;
00263 
00264             if( nf >= 2 )
00265             {
00266                 for( i = n, j = radix[2]; i < n0; )
00267                 {
00268                     for( k = 0; k < n; k++ )
00269                         itab[i+k] = itab[k] + j;
00270                     if( (i += n) >= n0 )
00271                         break;
00272                     j += radix[2];
00273                     for( k = 1; ++digits[k] >= factors[k]; k++ )
00274                     {
00275                         digits[k] = 0;
00276                         j += radix[k+2] - radix[k];
00277                     }
00278                 }
00279             }
00280         }
00281         else
00282         {
00283             for( i = 0, j = 0;; )
00284             {
00285                 itab[i] = j;
00286                 if( ++i >= n0 )
00287                     break;
00288                 j += radix[1];
00289                 for( k = 0; ++digits[k] >= factors[k]; k++ )
00290                 {
00291                     digits[k] = 0;
00292                     j += radix[k+2] - radix[k];
00293                 }
00294             }
00295         }
00296 
00297         if( itab != itab0 )
00298         {
00299             itab0[0] = 0;
00300             for( i = n0 & 1; i < n0; i += 2 )
00301             {
00302                 int k0 = itab[i];
00303                 int k1 = itab[i+1];
00304                 itab0[k0] = i;
00305                 itab0[k1] = i+1;
00306             }
00307         }
00308     }
00309 
00310     if( (n0 & (n0-1)) == 0 )
00311     {
00312         w.re = w1.re = DFTTab[m][0];
00313         w.im = w1.im = -DFTTab[m][1];
00314     }
00315     else
00316     {
00317         t = -CV_PI*2/n0;
00318         w.im = w1.im = sin(t);
00319         w.re = w1.re = std::sqrt(1. - w1.im*w1.im);
00320     }
00321     n = (n0+1)/2;
00322 
00323     if( elem_size == sizeof(Complex<double>) )
00324     {
00325         Complex<double>* wave = (Complex<double>*)_wave;
00326 
00327         wave[0].re = 1.;
00328         wave[0].im = 0.;
00329 
00330         if( (n0 & 1) == 0 )
00331         {
00332             wave[n].re = -1.;
00333             wave[n].im = 0;
00334         }
00335 
00336         for( i = 1; i < n; i++ )
00337         {
00338             wave[i] = w;
00339             wave[n0-i].re = w.re;
00340             wave[n0-i].im = -w.im;
00341 
00342             t = w.re*w1.re - w.im*w1.im;
00343             w.im = w.re*w1.im + w.im*w1.re;
00344             w.re = t;
00345         }
00346     }
00347     else
00348     {
00349         Complex<float>* wave = (Complex<float>*)_wave;
00350         assert( elem_size == sizeof(Complex<float>) );
00351 
00352         wave[0].re = 1.f;
00353         wave[0].im = 0.f;
00354 
00355         if( (n0 & 1) == 0 )
00356         {
00357             wave[n].re = -1.f;
00358             wave[n].im = 0.f;
00359         }
00360 
00361         for( i = 1; i < n; i++ )
00362         {
00363             wave[i].re = (float)w.re;
00364             wave[i].im = (float)w.im;
00365             wave[n0-i].re = (float)w.re;
00366             wave[n0-i].im = (float)-w.im;
00367 
00368             t = w.re*w1.re - w.im*w1.im;
00369             w.im = w.re*w1.im + w.im*w1.re;
00370             w.re = t;
00371         }
00372     }
00373 }
00374 
00375 template<typename T> struct DFT_VecR4
00376 {
00377     int operator()(Complex<T>*, int, int, int&, const Complex<T>*) const { return 1; }
00378 };
00379 
00380 #if CV_SSE3
00381 
00382 // optimized radix-4 transform
00383 template<> struct DFT_VecR4<float>
00384 {
00385     int operator()(Complex<float>* dst, int N, int n0, int& _dw0, const Complex<float>* wave) const
00386     {
00387         int n = 1, i, j, nx, dw, dw0 = _dw0;
00388         __m128 z = _mm_setzero_ps(), x02=z, x13=z, w01=z, w23=z, y01, y23, t0, t1;
00389         Cv32suf t; t.i = 0x80000000;
00390         __m128 neg0_mask = _mm_load_ss(&t.f);
00391         __m128 neg3_mask = _mm_shuffle_ps(neg0_mask, neg0_mask, _MM_SHUFFLE(0,1,2,3));
00392 
00393         for( ; n*4 <= N; )
00394         {
00395             nx = n;
00396             n *= 4;
00397             dw0 /= 4;
00398 
00399             for( i = 0; i < n0; i += n )
00400             {
00401                 Complexf *v0, *v1;
00402 
00403                 v0 = dst + i;
00404                 v1 = v0 + nx*2;
00405 
00406                 x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
00407                 x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
00408                 x02 = _mm_loadh_pi(x02, (const __m64*)&v1[0]);
00409                 x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]);
00410 
00411                 y01 = _mm_add_ps(x02, x13);
00412                 y23 = _mm_sub_ps(x02, x13);
00413                 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
00414                 t0 = _mm_movelh_ps(y01, y23);
00415                 y01 = _mm_add_ps(t0, t1);
00416                 y23 = _mm_sub_ps(t0, t1);
00417 
00418                 _mm_storel_pi((__m64*)&v0[0], y01);
00419                 _mm_storeh_pi((__m64*)&v0[nx], y01);
00420                 _mm_storel_pi((__m64*)&v1[0], y23);
00421                 _mm_storeh_pi((__m64*)&v1[nx], y23);
00422 
00423                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
00424                 {
00425                     v0 = dst + i + j;
00426                     v1 = v0 + nx*2;
00427 
00428                     x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
00429                     w23 = _mm_loadl_pi(w23, (const __m64*)&wave[dw*2]);
00430                     x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]); // x1, x3 = r1 i1 r3 i3
00431                     w23 = _mm_loadh_pi(w23, (const __m64*)&wave[dw*3]); // w2, w3 = wr2 wi2 wr3 wi3
00432 
00433                     t0 = _mm_mul_ps(_mm_moveldup_ps(x13), w23);
00434                     t1 = _mm_mul_ps(_mm_movehdup_ps(x13), _mm_shuffle_ps(w23, w23, _MM_SHUFFLE(2,3,0,1)));
00435                     x13 = _mm_addsub_ps(t0, t1);
00436                     // re(x1*w2), im(x1*w2), re(x3*w3), im(x3*w3)
00437                     x02 = _mm_loadl_pi(x02, (const __m64*)&v1[0]); // x2 = r2 i2
00438                     w01 = _mm_loadl_pi(w01, (const __m64*)&wave[dw]); // w1 = wr1 wi1
00439                     x02 = _mm_shuffle_ps(x02, x02, _MM_SHUFFLE(0,0,1,1));
00440                     w01 = _mm_shuffle_ps(w01, w01, _MM_SHUFFLE(1,0,0,1));
00441                     x02 = _mm_mul_ps(x02, w01);
00442                     x02 = _mm_addsub_ps(x02, _mm_movelh_ps(x02, x02));
00443                     // re(x0) im(x0) re(x2*w1), im(x2*w1)
00444                     x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
00445 
00446                     y01 = _mm_add_ps(x02, x13);
00447                     y23 = _mm_sub_ps(x02, x13);
00448                     t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
00449                     t0 = _mm_movelh_ps(y01, y23);
00450                     y01 = _mm_add_ps(t0, t1);
00451                     y23 = _mm_sub_ps(t0, t1);
00452 
00453                     _mm_storel_pi((__m64*)&v0[0], y01);
00454                     _mm_storeh_pi((__m64*)&v0[nx], y01);
00455                     _mm_storel_pi((__m64*)&v1[0], y23);
00456                     _mm_storeh_pi((__m64*)&v1[nx], y23);
00457                 }
00458             }
00459         }
00460 
00461         _dw0 = dw0;
00462         return n;
00463     }
00464 };
00465 
00466 #endif
00467 
00468 #ifdef USE_IPP_DFT
00469 static IppStatus ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
00470                              const void* spec, uchar* buf)
00471 {
00472     return ippsDFTFwd_CToC_32fc( (const Ipp32fc*)src, (Ipp32fc*)dst,
00473                                  (const IppsDFTSpec_C_32fc*)spec, buf);
00474 }
00475 
00476 static IppStatus ippsDFTFwd_CToC( const Complex<double>* src, Complex<double>* dst,
00477                              const void* spec, uchar* buf)
00478 {
00479     return ippsDFTFwd_CToC_64fc( (const Ipp64fc*)src, (Ipp64fc*)dst,
00480                                  (const IppsDFTSpec_C_64fc*)spec, buf);
00481 }
00482 
00483 static IppStatus ippsDFTInv_CToC( const Complex<float>* src, Complex<float>* dst,
00484                              const void* spec, uchar* buf)
00485 {
00486     return ippsDFTInv_CToC_32fc( (const Ipp32fc*)src, (Ipp32fc*)dst,
00487                                  (const IppsDFTSpec_C_32fc*)spec, buf);
00488 }
00489 
00490 static IppStatus ippsDFTInv_CToC( const Complex<double>* src, Complex<double>* dst,
00491                                   const void* spec, uchar* buf)
00492 {
00493     return ippsDFTInv_CToC_64fc( (const Ipp64fc*)src, (Ipp64fc*)dst,
00494                                  (const IppsDFTSpec_C_64fc*)spec, buf);
00495 }
00496 
00497 static IppStatus ippsDFTFwd_RToPack( const float* src, float* dst,
00498                                      const void* spec, uchar* buf)
00499 {
00500     return ippsDFTFwd_RToPack_32f( src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
00501 }
00502 
00503 static IppStatus ippsDFTFwd_RToPack( const double* src, double* dst,
00504                                      const void* spec, uchar* buf)
00505 {
00506     return ippsDFTFwd_RToPack_64f( src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
00507 }
00508 
00509 static IppStatus ippsDFTInv_PackToR( const float* src, float* dst,
00510                                      const void* spec, uchar* buf)
00511 {
00512     return ippsDFTInv_PackToR_32f( src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
00513 }
00514 
00515 static IppStatus ippsDFTInv_PackToR( const double* src, double* dst,
00516                                      const void* spec, uchar* buf)
00517 {
00518     return ippsDFTInv_PackToR_64f( src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
00519 }
00520 #endif
00521 
00522 enum { DFT_NO_PERMUTE=256, DFT_COMPLEX_INPUT_OR_OUTPUT=512 };
00523 
00524 // mixed-radix complex discrete Fourier transform: double-precision version
00525 template<typename T> static void
00526 DFT( const Complex<T>* src, Complex<T>* dst, int n,
00527      int nf, const int* factors, const int* itab,
00528      const Complex<T>* wave, int tab_size,
00529      const void*
00530 #ifdef USE_IPP_DFT
00531      spec
00532 #endif
00533      , Complex<T>* buf,
00534      int flags, double _scale )
00535 {
00536     static const T sin_120 = (T)0.86602540378443864676372317075294;
00537     static const T fft5_2 = (T)0.559016994374947424102293417182819;
00538     static const T fft5_3 = (T)-0.951056516295153572116439333379382;
00539     static const T fft5_4 = (T)-1.538841768587626701285145288018455;
00540     static const T fft5_5 = (T)0.363271264002680442947733378740309;
00541 
00542     int n0 = n, f_idx, nx;
00543     int inv = flags & DFT_INVERSE;
00544     int dw0 = tab_size, dw;
00545     int i, j, k;
00546     Complex<T> t;
00547     T scale = (T)_scale;
00548     int tab_step;
00549 
00550 #ifdef USE_IPP_DFT
00551     if( spec )
00552     {
00553         if( !inv )
00554         {
00555             if (ippsDFTFwd_CToC( src, dst, spec, (uchar*)buf ) >= 0)
00556             {
00557                 CV_IMPL_ADD(CV_IMPL_IPP);
00558                 return;
00559             }
00560         }
00561         else
00562         {
00563             if (ippsDFTInv_CToC( src, dst, spec, (uchar*)buf ) >= 0)
00564             {
00565                 CV_IMPL_ADD(CV_IMPL_IPP);
00566                 return;
00567             }
00568         }
00569         setIppErrorStatus();
00570     }
00571 #endif
00572 
00573     tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n;
00574 
00575     // 0. shuffle data
00576     if( dst != src )
00577     {
00578         assert( (flags & DFT_NO_PERMUTE) == 0 );
00579         if( !inv )
00580         {
00581             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
00582             {
00583                 int k0 = itab[0], k1 = itab[tab_step];
00584                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
00585                 dst[i] = src[k0]; dst[i+1] = src[k1];
00586             }
00587 
00588             if( i < n )
00589                 dst[n-1] = src[n-1];
00590         }
00591         else
00592         {
00593             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
00594             {
00595                 int k0 = itab[0], k1 = itab[tab_step];
00596                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
00597                 t.re = src[k0].re; t.im = -src[k0].im;
00598                 dst[i] = t;
00599                 t.re = src[k1].re; t.im = -src[k1].im;
00600                 dst[i+1] = t;
00601             }
00602 
00603             if( i < n )
00604             {
00605                 t.re = src[n-1].re; t.im = -src[n-1].im;
00606                 dst[i] = t;
00607             }
00608         }
00609     }
00610     else
00611     {
00612         if( (flags & DFT_NO_PERMUTE) == 0 )
00613         {
00614             CV_Assert( factors[0] == factors[nf-1] );
00615             if( nf == 1 )
00616             {
00617                 if( (n & 3) == 0 )
00618                 {
00619                     int n2 = n/2;
00620                     Complex<T>* dsth = dst + n2;
00621 
00622                     for( i = 0; i < n2; i += 2, itab += tab_step*2 )
00623                     {
00624                         j = itab[0];
00625                         assert( (unsigned)j < (unsigned)n2 );
00626 
00627                         CV_SWAP(dst[i+1], dsth[j], t);
00628                         if( j > i )
00629                         {
00630                             CV_SWAP(dst[i], dst[j], t);
00631                             CV_SWAP(dsth[i+1], dsth[j+1], t);
00632                         }
00633                     }
00634                 }
00635                 // else do nothing
00636             }
00637             else
00638             {
00639                 for( i = 0; i < n; i++, itab += tab_step )
00640                 {
00641                     j = itab[0];
00642                     assert( (unsigned)j < (unsigned)n );
00643                     if( j > i )
00644                         CV_SWAP(dst[i], dst[j], t);
00645                 }
00646             }
00647         }
00648 
00649         if( inv )
00650         {
00651             for( i = 0; i <= n - 2; i += 2 )
00652             {
00653                 T t0 = -dst[i].im;
00654                 T t1 = -dst[i+1].im;
00655                 dst[i].im = t0; dst[i+1].im = t1;
00656             }
00657 
00658             if( i < n )
00659                 dst[n-1].im = -dst[n-1].im;
00660         }
00661     }
00662 
00663     n = 1;
00664     // 1. power-2 transforms
00665     if( (factors[0] & 1) == 0 )
00666     {
00667         if( factors[0] >= 4 && checkHardwareSupport(CV_CPU_SSE3))
00668         {
00669             DFT_VecR4<T> vr4;
00670             n = vr4(dst, factors[0], n0, dw0, wave);
00671         }
00672 
00673         // radix-4 transform
00674         for( ; n*4 <= factors[0]; )
00675         {
00676             nx = n;
00677             n *= 4;
00678             dw0 /= 4;
00679 
00680             for( i = 0; i < n0; i += n )
00681             {
00682                 Complex<T> *v0, *v1;
00683                 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
00684 
00685                 v0 = dst + i;
00686                 v1 = v0 + nx*2;
00687 
00688                 r0 = v1[0].re; i0 = v1[0].im;
00689                 r4 = v1[nx].re; i4 = v1[nx].im;
00690 
00691                 r1 = r0 + r4; i1 = i0 + i4;
00692                 r3 = i0 - i4; i3 = r4 - r0;
00693 
00694                 r2 = v0[0].re; i2 = v0[0].im;
00695                 r4 = v0[nx].re; i4 = v0[nx].im;
00696 
00697                 r0 = r2 + r4; i0 = i2 + i4;
00698                 r2 -= r4; i2 -= i4;
00699 
00700                 v0[0].re = r0 + r1; v0[0].im = i0 + i1;
00701                 v1[0].re = r0 - r1; v1[0].im = i0 - i1;
00702                 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
00703                 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
00704 
00705                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
00706                 {
00707                     v0 = dst + i + j;
00708                     v1 = v0 + nx*2;
00709 
00710                     r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im;
00711                     i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re;
00712                     r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re;
00713                     i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im;
00714                     r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
00715                     i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
00716 
00717                     r1 = i0 + i3; i1 = r0 + r3;
00718                     r3 = r0 - r3; i3 = i3 - i0;
00719                     r4 = v0[0].re; i4 = v0[0].im;
00720 
00721                     r0 = r4 + r2; i0 = i4 + i2;
00722                     r2 = r4 - r2; i2 = i4 - i2;
00723 
00724                     v0[0].re = r0 + r1; v0[0].im = i0 + i1;
00725                     v1[0].re = r0 - r1; v1[0].im = i0 - i1;
00726                     v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
00727                     v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
00728                 }
00729             }
00730         }
00731 
00732         for( ; n < factors[0]; )
00733         {
00734             // do the remaining radix-2 transform
00735             nx = n;
00736             n *= 2;
00737             dw0 /= 2;
00738 
00739             for( i = 0; i < n0; i += n )
00740             {
00741                 Complex<T>* v = dst + i;
00742                 T r0 = v[0].re + v[nx].re;
00743                 T i0 = v[0].im + v[nx].im;
00744                 T r1 = v[0].re - v[nx].re;
00745                 T i1 = v[0].im - v[nx].im;
00746                 v[0].re = r0; v[0].im = i0;
00747                 v[nx].re = r1; v[nx].im = i1;
00748 
00749                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
00750                 {
00751                     v = dst + i + j;
00752                     r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
00753                     i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
00754                     r0 = v[0].re; i0 = v[0].im;
00755 
00756                     v[0].re = r0 + r1; v[0].im = i0 + i1;
00757                     v[nx].re = r0 - r1; v[nx].im = i0 - i1;
00758                 }
00759             }
00760         }
00761     }
00762 
00763     // 2. all the other transforms
00764     for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ )
00765     {
00766         int factor = factors[f_idx];
00767         nx = n;
00768         n *= factor;
00769         dw0 /= factor;
00770 
00771         if( factor == 3 )
00772         {
00773             // radix-3
00774             for( i = 0; i < n0; i += n )
00775             {
00776                 Complex<T>* v = dst + i;
00777 
00778                 T r1 = v[nx].re + v[nx*2].re;
00779                 T i1 = v[nx].im + v[nx*2].im;
00780                 T r0 = v[0].re;
00781                 T i0 = v[0].im;
00782                 T r2 = sin_120*(v[nx].im - v[nx*2].im);
00783                 T i2 = sin_120*(v[nx*2].re - v[nx].re);
00784                 v[0].re = r0 + r1; v[0].im = i0 + i1;
00785                 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
00786                 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
00787                 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
00788 
00789                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
00790                 {
00791                     v = dst + i + j;
00792                     r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
00793                     i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
00794                     i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
00795                     r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
00796                     r1 = r0 + i2; i1 = i0 + r2;
00797 
00798                     r2 = sin_120*(i0 - r2); i2 = sin_120*(i2 - r0);
00799                     r0 = v[0].re; i0 = v[0].im;
00800                     v[0].re = r0 + r1; v[0].im = i0 + i1;
00801                     r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
00802                     v[nx].re = r0 + r2; v[nx].im = i0 + i2;
00803                     v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
00804                 }
00805             }
00806         }
00807         else if( factor == 5 )
00808         {
00809             // radix-5
00810             for( i = 0; i < n0; i += n )
00811             {
00812                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
00813                 {
00814                     Complex<T>* v0 = dst + i + j;
00815                     Complex<T>* v1 = v0 + nx*2;
00816                     Complex<T>* v2 = v1 + nx*2;
00817 
00818                     T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
00819 
00820                     r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
00821                     i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
00822                     r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
00823                     i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
00824 
00825                     r1 = r3 + r2; i1 = i3 + i2;
00826                     r3 -= r2; i3 -= i2;
00827 
00828                     r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
00829                     i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
00830                     r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
00831                     i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
00832 
00833                     r2 = r4 + r0; i2 = i4 + i0;
00834                     r4 -= r0; i4 -= i0;
00835 
00836                     r0 = v0[0].re; i0 = v0[0].im;
00837                     r5 = r1 + r2; i5 = i1 + i2;
00838 
00839                     v0[0].re = r0 + r5; v0[0].im = i0 + i5;
00840 
00841                     r0 -= (T)0.25*r5; i0 -= (T)0.25*i5;
00842                     r1 = fft5_2*(r1 - r2); i1 = fft5_2*(i1 - i2);
00843                     r2 = -fft5_3*(i3 + i4); i2 = fft5_3*(r3 + r4);
00844 
00845                     i3 *= -fft5_5; r3 *= fft5_5;
00846                     i4 *= -fft5_4; r4 *= fft5_4;
00847 
00848                     r5 = r2 + i3; i5 = i2 + r3;
00849                     r2 -= i4; i2 -= r4;
00850 
00851                     r3 = r0 + r1; i3 = i0 + i1;
00852                     r0 -= r1; i0 -= i1;
00853 
00854                     v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
00855                     v2[0].re = r3 - r2; v2[0].im = i3 - i2;
00856 
00857                     v1[0].re = r0 + r5; v1[0].im = i0 + i5;
00858                     v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
00859                 }
00860             }
00861         }
00862         else
00863         {
00864             // radix-"factor" - an odd number
00865             int p, q, factor2 = (factor - 1)/2;
00866             int d, dd, dw_f = tab_size/factor;
00867             Complex<T>* a = buf;
00868             Complex<T>* b = buf + factor2;
00869 
00870             for( i = 0; i < n0; i += n )
00871             {
00872                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
00873                 {
00874                     Complex<T>* v = dst + i + j;
00875                     Complex<T> v_0 = v[0];
00876                     Complex<T> vn_0 = v_0;
00877 
00878                     if( j == 0 )
00879                     {
00880                         for( p = 1, k = nx; p <= factor2; p++, k += nx )
00881                         {
00882                             T r0 = v[k].re + v[n-k].re;
00883                             T i0 = v[k].im - v[n-k].im;
00884                             T r1 = v[k].re - v[n-k].re;
00885                             T i1 = v[k].im + v[n-k].im;
00886 
00887                             vn_0.re += r0; vn_0.im += i1;
00888                             a[p-1].re = r0; a[p-1].im = i0;
00889                             b[p-1].re = r1; b[p-1].im = i1;
00890                         }
00891                     }
00892                     else
00893                     {
00894                         const Complex<T>* wave_ = wave + dw*factor;
00895                         d = dw;
00896 
00897                         for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw )
00898                         {
00899                             T r2 = v[k].re*wave[d].re - v[k].im*wave[d].im;
00900                             T i2 = v[k].re*wave[d].im + v[k].im*wave[d].re;
00901 
00902                             T r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im;
00903                             T i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re;
00904 
00905                             T r0 = r2 + r1;
00906                             T i0 = i2 - i1;
00907                             r1 = r2 - r1;
00908                             i1 = i2 + i1;
00909 
00910                             vn_0.re += r0; vn_0.im += i1;
00911                             a[p-1].re = r0; a[p-1].im = i0;
00912                             b[p-1].re = r1; b[p-1].im = i1;
00913                         }
00914                     }
00915 
00916                     v[0] = vn_0;
00917 
00918                     for( p = 1, k = nx; p <= factor2; p++, k += nx )
00919                     {
00920                         Complex<T> s0 = v_0, s1 = v_0;
00921                         d = dd = dw_f*p;
00922 
00923                         for( q = 0; q < factor2; q++ )
00924                         {
00925                             T r0 = wave[d].re * a[q].re;
00926                             T i0 = wave[d].im * a[q].im;
00927                             T r1 = wave[d].re * b[q].im;
00928                             T i1 = wave[d].im * b[q].re;
00929 
00930                             s1.re += r0 + i0; s0.re += r0 - i0;
00931                             s1.im += r1 - i1; s0.im += r1 + i1;
00932 
00933                             d += dd;
00934                             d -= -(d >= tab_size) & tab_size;
00935                         }
00936 
00937                         v[k] = s0;
00938                         v[n-k] = s1;
00939                     }
00940                 }
00941             }
00942         }
00943     }
00944 
00945     if( scale != 1 )
00946     {
00947         T re_scale = scale, im_scale = scale;
00948         if( inv )
00949             im_scale = -im_scale;
00950 
00951         for( i = 0; i < n0; i++ )
00952         {
00953             T t0 = dst[i].re*re_scale;
00954             T t1 = dst[i].im*im_scale;
00955             dst[i].re = t0;
00956             dst[i].im = t1;
00957         }
00958     }
00959     else if( inv )
00960     {
00961         for( i = 0; i <= n0 - 2; i += 2 )
00962         {
00963             T t0 = -dst[i].im;
00964             T t1 = -dst[i+1].im;
00965             dst[i].im = t0;
00966             dst[i+1].im = t1;
00967         }
00968 
00969         if( i < n0 )
00970             dst[n0-1].im = -dst[n0-1].im;
00971     }
00972 }
00973 
00974 
00975 /* FFT of real vector
00976    output vector format:
00977      re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
00978      re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
00979 template<typename T> static void
00980 RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
00981          const Complex<T>* wave, int tab_size, const void*
00982 #ifdef USE_IPP_DFT
00983          spec
00984 #endif
00985          ,
00986          Complex<T>* buf, int flags, double _scale )
00987 {
00988     int complex_output = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0;
00989     T scale = (T)_scale;
00990     int j, n2 = n >> 1;
00991     dst += complex_output;
00992 
00993 #ifdef USE_IPP_DFT
00994     if( spec )
00995     {
00996         if (ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf ) >=0)
00997         {
00998             if( complex_output )
00999             {
01000                 dst[-1] = dst[0];
01001                 dst[0] = 0;
01002                 if( (n & 1) == 0 )
01003                     dst[n] = 0;
01004             }
01005             CV_IMPL_ADD(CV_IMPL_IPP);
01006             return;
01007         }
01008         setIppErrorStatus();
01009     }
01010 #endif
01011     assert( tab_size == n );
01012 
01013     if( n == 1 )
01014     {
01015         dst[0] = src[0]*scale;
01016     }
01017     else if( n == 2 )
01018     {
01019         T t = (src[0] + src[1])*scale;
01020         dst[1] = (src[0] - src[1])*scale;
01021         dst[0] = t;
01022     }
01023     else if( n & 1 )
01024     {
01025         dst -= complex_output;
01026         Complex<T>* _dst = (Complex<T>*)dst;
01027         _dst[0].re = src[0]*scale;
01028         _dst[0].im = 0;
01029         for( j = 1; j < n; j += 2 )
01030         {
01031             T t0 = src[itab[j]]*scale;
01032             T t1 = src[itab[j+1]]*scale;
01033             _dst[j].re = t0;
01034             _dst[j].im = 0;
01035             _dst[j+1].re = t1;
01036             _dst[j+1].im = 0;
01037         }
01038         DFT( _dst, _dst, n, nf, factors, itab, wave,
01039              tab_size, 0, buf, DFT_NO_PERMUTE, 1 );
01040         if( !complex_output )
01041             dst[1] = dst[0];
01042     }
01043     else
01044     {
01045         T t0, t;
01046         T h1_re, h1_im, h2_re, h2_im;
01047         T scale2 = scale*(T)0.5;
01048         factors[0] >>= 1;
01049 
01050         DFT( (Complex<T>*)src, (Complex<T>*)dst, n2, nf - (factors[0] == 1),
01051              factors + (factors[0] == 1),
01052              itab, wave, tab_size, 0, buf, 0, 1 );
01053         factors[0] <<= 1;
01054 
01055         t = dst[0] - dst[1];
01056         dst[0] = (dst[0] + dst[1])*scale;
01057         dst[1] = t*scale;
01058 
01059         t0 = dst[n2];
01060         t = dst[n-1];
01061         dst[n-1] = dst[1];
01062 
01063         for( j = 2, wave++; j < n2; j += 2, wave++ )
01064         {
01065             /* calc odd */
01066             h2_re = scale2*(dst[j+1] + t);
01067             h2_im = scale2*(dst[n-j] - dst[j]);
01068 
01069             /* calc even */
01070             h1_re = scale2*(dst[j] + dst[n-j]);
01071             h1_im = scale2*(dst[j+1] - t);
01072 
01073             /* rotate */
01074             t = h2_re*wave->re - h2_im*wave->im;
01075             h2_im = h2_re*wave->im + h2_im*wave->re;
01076             h2_re = t;
01077             t = dst[n-j-1];
01078 
01079             dst[j-1] = h1_re + h2_re;
01080             dst[n-j-1] = h1_re - h2_re;
01081             dst[j] = h1_im + h2_im;
01082             dst[n-j] = h2_im - h1_im;
01083         }
01084 
01085         if( j <= n2 )
01086         {
01087             dst[n2-1] = t0*scale;
01088             dst[n2] = -t*scale;
01089         }
01090     }
01091 
01092     if( complex_output && ((n & 1) == 0 || n == 1))
01093     {
01094         dst[-1] = dst[0];
01095         dst[0] = 0;
01096         if( n > 1 )
01097             dst[n] = 0;
01098     }
01099 }
01100 
01101 /* Inverse FFT of complex conjugate-symmetric vector
01102    input vector format:
01103       re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
01104       re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
01105 template<typename T> static void
01106 CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
01107          const Complex<T>* wave, int tab_size,
01108          const void*
01109 #ifdef USE_IPP_DFT
01110          spec
01111 #endif
01112          , Complex<T>* buf,
01113          int flags, double _scale )
01114 {
01115     int complex_input = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0;
01116     int j, k, n2 = (n+1) >> 1;
01117     T scale = (T)_scale;
01118     T save_s1 = 0.;
01119     T t0, t1, t2, t3, t;
01120 
01121     assert( tab_size == n );
01122 
01123     if( complex_input )
01124     {
01125         assert( src != dst );
01126         save_s1 = src[1];
01127         ((T*)src)[1] = src[0];
01128         src++;
01129     }
01130 #ifdef USE_IPP_DFT
01131     if( spec )
01132     {
01133         if (ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf ) >=0)
01134         {
01135             if( complex_input )
01136                 ((T*)src)[0] = (T)save_s1;
01137             CV_IMPL_ADD(CV_IMPL_IPP);
01138             return;
01139         }
01140 
01141         setIppErrorStatus();
01142     }
01143 #endif
01144     if( n == 1 )
01145     {
01146         dst[0] = (T)(src[0]*scale);
01147     }
01148     else if( n == 2 )
01149     {
01150         t = (src[0] + src[1])*scale;
01151         dst[1] = (src[0] - src[1])*scale;
01152         dst[0] = t;
01153     }
01154     else if( n & 1 )
01155     {
01156         Complex<T>* _src = (Complex<T>*)(src-1);
01157         Complex<T>* _dst = (Complex<T>*)dst;
01158 
01159         _dst[0].re = src[0];
01160         _dst[0].im = 0;
01161         for( j = 1; j < n2; j++ )
01162         {
01163             int k0 = itab[j], k1 = itab[n-j];
01164             t0 = _src[j].re; t1 = _src[j].im;
01165             _dst[k0].re = t0; _dst[k0].im = -t1;
01166             _dst[k1].re = t0; _dst[k1].im = t1;
01167         }
01168 
01169         DFT( _dst, _dst, n, nf, factors, itab, wave,
01170              tab_size, 0, buf, DFT_NO_PERMUTE, 1. );
01171         dst[0] *= scale;
01172         for( j = 1; j < n; j += 2 )
01173         {
01174             t0 = dst[j*2]*scale;
01175             t1 = dst[j*2+2]*scale;
01176             dst[j] = t0;
01177             dst[j+1] = t1;
01178         }
01179     }
01180     else
01181     {
01182         int inplace = src == dst;
01183         const Complex<T>* w = wave;
01184 
01185         t = src[1];
01186         t0 = (src[0] + src[n-1]);
01187         t1 = (src[n-1] - src[0]);
01188         dst[0] = t0;
01189         dst[1] = t1;
01190 
01191         for( j = 2, w++; j < n2; j += 2, w++ )
01192         {
01193             T h1_re, h1_im, h2_re, h2_im;
01194 
01195             h1_re = (t + src[n-j-1]);
01196             h1_im = (src[j] - src[n-j]);
01197 
01198             h2_re = (t - src[n-j-1]);
01199             h2_im = (src[j] + src[n-j]);
01200 
01201             t = h2_re*w->re + h2_im*w->im;
01202             h2_im = h2_im*w->re - h2_re*w->im;
01203             h2_re = t;
01204 
01205             t = src[j+1];
01206             t0 = h1_re - h2_im;
01207             t1 = -h1_im - h2_re;
01208             t2 = h1_re + h2_im;
01209             t3 = h1_im - h2_re;
01210 
01211             if( inplace )
01212             {
01213                 dst[j] = t0;
01214                 dst[j+1] = t1;
01215                 dst[n-j] = t2;
01216                 dst[n-j+1]= t3;
01217             }
01218             else
01219             {
01220                 int j2 = j >> 1;
01221                 k = itab[j2];
01222                 dst[k] = t0;
01223                 dst[k+1] = t1;
01224                 k = itab[n2-j2];
01225                 dst[k] = t2;
01226                 dst[k+1]= t3;
01227             }
01228         }
01229 
01230         if( j <= n2 )
01231         {
01232             t0 = t*2;
01233             t1 = src[n2]*2;
01234 
01235             if( inplace )
01236             {
01237                 dst[n2] = t0;
01238                 dst[n2+1] = t1;
01239             }
01240             else
01241             {
01242                 k = itab[n2];
01243                 dst[k*2] = t0;
01244                 dst[k*2+1] = t1;
01245             }
01246         }
01247 
01248         factors[0] >>= 1;
01249         DFT( (Complex<T>*)dst, (Complex<T>*)dst, n2,
01250              nf - (factors[0] == 1),
01251              factors + (factors[0] == 1), itab,
01252              wave, tab_size, 0, buf,
01253              inplace ? 0 : DFT_NO_PERMUTE, 1. );
01254         factors[0] <<= 1;
01255 
01256         for( j = 0; j < n; j += 2 )
01257         {
01258             t0 = dst[j]*scale;
01259             t1 = dst[j+1]*(-scale);
01260             dst[j] = t0;
01261             dst[j+1] = t1;
01262         }
01263     }
01264     if( complex_input )
01265         ((T*)src)[0] = (T)save_s1;
01266 }
01267 
01268 static void
01269 CopyColumn( const uchar* _src, size_t src_step,
01270             uchar* _dst, size_t dst_step,
01271             int len, size_t elem_size )
01272 {
01273     int i, t0, t1;
01274     const int* src = (const int*)_src;
01275     int* dst = (int*)_dst;
01276     src_step /= sizeof(src[0]);
01277     dst_step /= sizeof(dst[0]);
01278 
01279     if( elem_size == sizeof(int) )
01280     {
01281         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
01282             dst[0] = src[0];
01283     }
01284     else if( elem_size == sizeof(int)*2 )
01285     {
01286         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
01287         {
01288             t0 = src[0]; t1 = src[1];
01289             dst[0] = t0; dst[1] = t1;
01290         }
01291     }
01292     else if( elem_size == sizeof(int)*4 )
01293     {
01294         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
01295         {
01296             t0 = src[0]; t1 = src[1];
01297             dst[0] = t0; dst[1] = t1;
01298             t0 = src[2]; t1 = src[3];
01299             dst[2] = t0; dst[3] = t1;
01300         }
01301     }
01302 }
01303 
01304 
01305 static void
01306 CopyFrom2Columns( const uchar* _src, size_t src_step,
01307                   uchar* _dst0, uchar* _dst1,
01308                   int len, size_t elem_size )
01309 {
01310     int i, t0, t1;
01311     const int* src = (const int*)_src;
01312     int* dst0 = (int*)_dst0;
01313     int* dst1 = (int*)_dst1;
01314     src_step /= sizeof(src[0]);
01315 
01316     if( elem_size == sizeof(int) )
01317     {
01318         for( i = 0; i < len; i++, src += src_step )
01319         {
01320             t0 = src[0]; t1 = src[1];
01321             dst0[i] = t0; dst1[i] = t1;
01322         }
01323     }
01324     else if( elem_size == sizeof(int)*2 )
01325     {
01326         for( i = 0; i < len*2; i += 2, src += src_step )
01327         {
01328             t0 = src[0]; t1 = src[1];
01329             dst0[i] = t0; dst0[i+1] = t1;
01330             t0 = src[2]; t1 = src[3];
01331             dst1[i] = t0; dst1[i+1] = t1;
01332         }
01333     }
01334     else if( elem_size == sizeof(int)*4 )
01335     {
01336         for( i = 0; i < len*4; i += 4, src += src_step )
01337         {
01338             t0 = src[0]; t1 = src[1];
01339             dst0[i] = t0; dst0[i+1] = t1;
01340             t0 = src[2]; t1 = src[3];
01341             dst0[i+2] = t0; dst0[i+3] = t1;
01342             t0 = src[4]; t1 = src[5];
01343             dst1[i] = t0; dst1[i+1] = t1;
01344             t0 = src[6]; t1 = src[7];
01345             dst1[i+2] = t0; dst1[i+3] = t1;
01346         }
01347     }
01348 }
01349 
01350 
01351 static void
01352 CopyTo2Columns( const uchar* _src0, const uchar* _src1,
01353                 uchar* _dst, size_t dst_step,
01354                 int len, size_t elem_size )
01355 {
01356     int i, t0, t1;
01357     const int* src0 = (const int*)_src0;
01358     const int* src1 = (const int*)_src1;
01359     int* dst = (int*)_dst;
01360     dst_step /= sizeof(dst[0]);
01361 
01362     if( elem_size == sizeof(int) )
01363     {
01364         for( i = 0; i < len; i++, dst += dst_step )
01365         {
01366             t0 = src0[i]; t1 = src1[i];
01367             dst[0] = t0; dst[1] = t1;
01368         }
01369     }
01370     else if( elem_size == sizeof(int)*2 )
01371     {
01372         for( i = 0; i < len*2; i += 2, dst += dst_step )
01373         {
01374             t0 = src0[i]; t1 = src0[i+1];
01375             dst[0] = t0; dst[1] = t1;
01376             t0 = src1[i]; t1 = src1[i+1];
01377             dst[2] = t0; dst[3] = t1;
01378         }
01379     }
01380     else if( elem_size == sizeof(int)*4 )
01381     {
01382         for( i = 0; i < len*4; i += 4, dst += dst_step )
01383         {
01384             t0 = src0[i]; t1 = src0[i+1];
01385             dst[0] = t0; dst[1] = t1;
01386             t0 = src0[i+2]; t1 = src0[i+3];
01387             dst[2] = t0; dst[3] = t1;
01388             t0 = src1[i]; t1 = src1[i+1];
01389             dst[4] = t0; dst[5] = t1;
01390             t0 = src1[i+2]; t1 = src1[i+3];
01391             dst[6] = t0; dst[7] = t1;
01392         }
01393     }
01394 }
01395 
01396 
01397 static void
01398 ExpandCCS( uchar* _ptr, int n, int elem_size )
01399 {
01400     int i;
01401     if( elem_size == (int)sizeof(float) )
01402     {
01403         float* p = (float*)_ptr;
01404         for( i = 1; i < (n+1)/2; i++ )
01405         {
01406             p[(n-i)*2] = p[i*2-1];
01407             p[(n-i)*2+1] = -p[i*2];
01408         }
01409         if( (n & 1) == 0 )
01410         {
01411             p[n] = p[n-1];
01412             p[n+1] = 0.f;
01413             n--;
01414         }
01415         for( i = n-1; i > 0; i-- )
01416             p[i+1] = p[i];
01417         p[1] = 0.f;
01418     }
01419     else
01420     {
01421         double* p = (double*)_ptr;
01422         for( i = 1; i < (n+1)/2; i++ )
01423         {
01424             p[(n-i)*2] = p[i*2-1];
01425             p[(n-i)*2+1] = -p[i*2];
01426         }
01427         if( (n & 1) == 0 )
01428         {
01429             p[n] = p[n-1];
01430             p[n+1] = 0.f;
01431             n--;
01432         }
01433         for( i = n-1; i > 0; i-- )
01434             p[i+1] = p[i];
01435         p[1] = 0.f;
01436     }
01437 }
01438 
01439 
01440 typedef void (*DFTFunc)(
01441      const void* src, void* dst, int n, int nf, int* factors,
01442      const int* itab, const void* wave, int tab_size,
01443      const void* spec, void* buf, int inv, double scale );
01444 
01445 static void DFT_32f( const Complexf* src, Complexf* dst, int n,
01446     int nf, const int* factors, const int* itab,
01447     const Complexf* wave, int tab_size,
01448     const void* spec, Complexf* buf,
01449     int flags, double scale )
01450 {
01451     DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
01452 }
01453 
01454 static void DFT_64f( const Complexd* src, Complexd* dst, int n,
01455     int nf, const int* factors, const int* itab,
01456     const Complexd* wave, int tab_size,
01457     const void* spec, Complexd* buf,
01458     int flags, double scale )
01459 {
01460     DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
01461 }
01462 
01463 
01464 static void RealDFT_32f( const float* src, float* dst, int n, int nf, int* factors,
01465         const int* itab,  const Complexf* wave, int tab_size, const void* spec,
01466         Complexf* buf, int flags, double scale )
01467 {
01468     RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
01469 }
01470 
01471 static void RealDFT_64f( const double* src, double* dst, int n, int nf, int* factors,
01472         const int* itab,  const Complexd* wave, int tab_size, const void* spec,
01473         Complexd* buf, int flags, double scale )
01474 {
01475     RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
01476 }
01477 
01478 static void CCSIDFT_32f( const float* src, float* dst, int n, int nf, int* factors,
01479                          const int* itab,  const Complexf* wave, int tab_size, const void* spec,
01480                          Complexf* buf, int flags, double scale )
01481 {
01482     CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
01483 }
01484 
01485 static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* factors,
01486                          const int* itab,  const Complexd* wave, int tab_size, const void* spec,
01487                          Complexd* buf, int flags, double scale )
01488 {
01489     CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
01490 }
01491 
01492 }
01493 
01494 #ifdef USE_IPP_DFT
01495 typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
01496 typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
01497 #endif
01498 
01499 namespace cv
01500 {
01501 #if defined USE_IPP_DFT
01502 
01503 typedef IppStatus (CV_STDCALL* ippiDFT_C_Func)(const Ipp32fc*, int, Ipp32fc*, int, const IppiDFTSpec_C_32fc*, Ipp8u*);
01504 typedef IppStatus (CV_STDCALL* ippiDFT_R_Func)(const Ipp32f* , int, Ipp32f* , int, const IppiDFTSpec_R_32f* , Ipp8u*);
01505 
01506 template <typename Dft>
01507 class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
01508 {
01509 public:
01510 
01511     Dft_C_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) :
01512         ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
01513     {
01514         *ok = true;
01515     }
01516 
01517     virtual void operator()(const Range& range) const
01518     {
01519         IppStatus status;
01520         Ipp8u* pBuffer = 0;
01521         Ipp8u* pMemInit= 0;
01522         int sizeBuffer=0;
01523         int sizeSpec=0;
01524         int sizeInit=0;
01525 
01526         IppiSize srcRoiSize = {src.cols, 1};
01527 
01528         status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
01529         if ( status < 0 )
01530         {
01531             *ok = false;
01532             return;
01533         }
01534 
01535         IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)ippMalloc( sizeSpec );
01536 
01537         if ( sizeInit > 0 )
01538             pMemInit = (Ipp8u*)ippMalloc( sizeInit );
01539 
01540         if ( sizeBuffer > 0 )
01541             pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
01542 
01543         status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
01544 
01545         if ( sizeInit > 0 )
01546             ippFree( pMemInit );
01547 
01548         if ( status < 0 )
01549         {
01550             ippFree( pDFTSpec );
01551             if ( sizeBuffer > 0 )
01552                 ippFree( pBuffer );
01553             *ok = false;
01554             return;
01555         }
01556 
01557         for( int i = range.start; i < range.end; ++i)
01558             if(!ippidft(src.ptr<Ipp32fc>(i), (int)src.step,dst.ptr<Ipp32fc>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer))
01559             {
01560                 *ok = false;
01561             }
01562 
01563         if ( sizeBuffer > 0 )
01564             ippFree( pBuffer );
01565 
01566         ippFree( pDFTSpec );
01567         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
01568     }
01569 
01570 private:
01571     const Mat& src;
01572     Mat& dst;
01573     const Dft& ippidft;
01574     int norm_flag;
01575     bool *ok;
01576 
01577     const Dft_C_IPPLoop_Invoker& operator= (const Dft_C_IPPLoop_Invoker&);
01578 };
01579 
01580 template <typename Dft>
01581 class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
01582 {
01583 public:
01584 
01585     Dft_R_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) :
01586         ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
01587     {
01588         *ok = true;
01589     }
01590 
01591     virtual void operator()(const Range& range) const
01592     {
01593         IppStatus status;
01594         Ipp8u* pBuffer = 0;
01595         Ipp8u* pMemInit= 0;
01596         int sizeBuffer=0;
01597         int sizeSpec=0;
01598         int sizeInit=0;
01599 
01600         IppiSize srcRoiSize = {src.cols, 1};
01601 
01602         status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
01603         if ( status < 0 )
01604         {
01605             *ok = false;
01606             return;
01607         }
01608 
01609         IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)ippMalloc( sizeSpec );
01610 
01611         if ( sizeInit > 0 )
01612             pMemInit = (Ipp8u*)ippMalloc( sizeInit );
01613 
01614         if ( sizeBuffer > 0 )
01615             pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
01616 
01617         status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
01618 
01619         if ( sizeInit > 0 )
01620             ippFree( pMemInit );
01621 
01622         if ( status < 0 )
01623         {
01624             ippFree( pDFTSpec );
01625             if ( sizeBuffer > 0 )
01626                 ippFree( pBuffer );
01627             *ok = false;
01628             return;
01629         }
01630 
01631         for( int i = range.start; i < range.end; ++i)
01632             if(!ippidft(src.ptr<float>(i), (int)src.step,dst.ptr<float>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer))
01633             {
01634                 *ok = false;
01635             }
01636 
01637         if ( sizeBuffer > 0 )
01638             ippFree( pBuffer );
01639 
01640         ippFree( pDFTSpec );
01641         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
01642     }
01643 
01644 private:
01645     const Mat& src;
01646     Mat& dst;
01647     const Dft& ippidft;
01648     int norm_flag;
01649     bool *ok;
01650 
01651     const Dft_R_IPPLoop_Invoker& operator= (const Dft_R_IPPLoop_Invoker&);
01652 };
01653 
01654 template <typename Dft>
01655 bool Dft_C_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag)
01656 {
01657     bool ok;
01658     parallel_for_(Range(0, src.rows), Dft_C_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) );
01659     return ok;
01660 }
01661 
01662 template <typename Dft>
01663 bool Dft_R_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag)
01664 {
01665     bool ok;
01666     parallel_for_(Range(0, src.rows), Dft_R_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) );
01667     return ok;
01668 }
01669 
01670 struct IPPDFT_C_Functor
01671 {
01672     IPPDFT_C_Functor(ippiDFT_C_Func _func) : func(_func){}
01673 
01674     bool operator()(const Ipp32fc* src, int srcStep, Ipp32fc* dst, int dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const
01675     {
01676         return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false;
01677     }
01678 private:
01679     ippiDFT_C_Func func;
01680 };
01681 
01682 struct IPPDFT_R_Functor
01683 {
01684     IPPDFT_R_Functor(ippiDFT_R_Func _func) : func(_func){}
01685 
01686     bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const
01687     {
01688         return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false;
01689     }
01690 private:
01691     ippiDFT_R_Func func;
01692 };
01693 
01694 static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
01695 {
01696     IppStatus status;
01697     Ipp8u* pBuffer = 0;
01698     Ipp8u* pMemInit= 0;
01699     int sizeBuffer=0;
01700     int sizeSpec=0;
01701     int sizeInit=0;
01702 
01703     IppiSize srcRoiSize = {src.cols, src.rows};
01704 
01705     status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
01706     if ( status < 0 )
01707         return false;
01708 
01709     IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)ippMalloc( sizeSpec );
01710 
01711     if ( sizeInit > 0 )
01712         pMemInit = (Ipp8u*)ippMalloc( sizeInit );
01713 
01714     if ( sizeBuffer > 0 )
01715         pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
01716 
01717     status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
01718 
01719     if ( sizeInit > 0 )
01720         ippFree( pMemInit );
01721 
01722     if ( status < 0 )
01723     {
01724         ippFree( pDFTSpec );
01725         if ( sizeBuffer > 0 )
01726             ippFree( pBuffer );
01727         return false;
01728     }
01729 
01730     if (!inv)
01731         status = ippiDFTFwd_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer );
01732     else
01733         status = ippiDFTInv_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer );
01734 
01735     if ( sizeBuffer > 0 )
01736         ippFree( pBuffer );
01737 
01738     ippFree( pDFTSpec );
01739 
01740     if(status >= 0)
01741     {
01742         CV_IMPL_ADD(CV_IMPL_IPP);
01743         return true;
01744     }
01745     return false;
01746 }
01747 
01748 static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
01749 {
01750     IppStatus status;
01751     Ipp8u* pBuffer = 0;
01752     Ipp8u* pMemInit= 0;
01753     int sizeBuffer=0;
01754     int sizeSpec=0;
01755     int sizeInit=0;
01756 
01757     IppiSize srcRoiSize = {src.cols, src.rows};
01758 
01759     status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
01760     if ( status < 0 )
01761         return false;
01762 
01763     IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)ippMalloc( sizeSpec );
01764 
01765     if ( sizeInit > 0 )
01766         pMemInit = (Ipp8u*)ippMalloc( sizeInit );
01767 
01768     if ( sizeBuffer > 0 )
01769         pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
01770 
01771     status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
01772 
01773     if ( sizeInit > 0 )
01774         ippFree( pMemInit );
01775 
01776     if ( status < 0 )
01777     {
01778         ippFree( pDFTSpec );
01779         if ( sizeBuffer > 0 )
01780             ippFree( pBuffer );
01781         return false;
01782     }
01783 
01784     if (!inv)
01785         status = ippiDFTFwd_RToPack_32f_C1R( src.ptr<float>(), (int)(src.step), dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer );
01786     else
01787         status = ippiDFTInv_PackToR_32f_C1R( src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer );
01788 
01789     if ( sizeBuffer > 0 )
01790         ippFree( pBuffer );
01791 
01792     ippFree( pDFTSpec );
01793 
01794     if(status >= 0)
01795     {
01796         CV_IMPL_ADD(CV_IMPL_IPP);
01797         return true;
01798     }
01799     return false;
01800 }
01801 
01802 #endif
01803 }
01804 
01805 #ifdef HAVE_OPENCL
01806 
01807 namespace cv
01808 {
01809 
01810 enum FftType
01811 {
01812     R2R = 0, // real to CCS in case forward transform, CCS to real otherwise
01813     C2R = 1, // complex to real in case inverse transform
01814     R2C = 2, // real to complex in case forward transform
01815     C2C = 3  // complex to complex
01816 };
01817 
01818 struct OCL_FftPlan
01819 {
01820 private:
01821     UMat twiddles;
01822     String buildOptions;
01823     int thread_count;
01824     int dft_size;
01825     int dft_depth;
01826     bool status;
01827 
01828 public:
01829     OCL_FftPlan(int _size, int _depth) : dft_size(_size), dft_depth(_depth), status(true)
01830     {
01831         CV_Assert( dft_depth == CV_32F || dft_depth == CV_64F );
01832 
01833         int min_radix;
01834         std::vector<int> radixes, blocks;
01835         ocl_getRadixes(dft_size, radixes, blocks, min_radix);
01836         thread_count = dft_size / min_radix;
01837 
01838         if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize())
01839         {
01840             status = false;
01841             return;
01842         }
01843 
01844         // generate string with radix calls
01845         String radix_processing;
01846         int n = 1, twiddle_size = 0;
01847         for (size_t i=0; i<radixes.size(); i++)
01848         {
01849             int radix = radixes[i], block = blocks[i];
01850             if (block > 1)
01851                 radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix);
01852             else
01853                 radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix);
01854             twiddle_size += (radix-1)*n;
01855             n *= radix;
01856         }
01857 
01858         twiddles.create(1, twiddle_size, CV_MAKE_TYPE(dft_depth, 2));
01859         if (dft_depth == CV_32F)
01860             fillRadixTable<float>(twiddles, radixes);
01861         else
01862             fillRadixTable<double>(twiddles, radixes);
01863 
01864         buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s",
01865                               dft_size, min_radix, ocl::typeToStr(dft_depth), ocl::typeToStr(CV_MAKE_TYPE(dft_depth, 2)),
01866                               dft_depth == CV_64F ? " -D DOUBLE_SUPPORT" : "", radix_processing.c_str());
01867     }
01868 
01869     bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const
01870     {
01871         if (!status)
01872             return false;
01873 
01874         UMat src = _src.getUMat();
01875         UMat dst = _dst.getUMat();
01876 
01877         size_t globalsize[2];
01878         size_t localsize[2];
01879         String kernel_name;
01880 
01881         bool is1d = (flags & DFT_ROWS) != 0 || num_dfts == 1;
01882         bool inv = (flags & DFT_INVERSE) != 0;
01883         String options = buildOptions;
01884 
01885         if (rows)
01886         {
01887             globalsize[0] = thread_count; globalsize[1] = src.rows;
01888             localsize[0] = thread_count; localsize[1] = 1;
01889             kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
01890             if ((is1d || inv) && (flags & DFT_SCALE))
01891                 options += " -D DFT_SCALE";
01892         }
01893         else
01894         {
01895             globalsize[0] = num_dfts; globalsize[1] = thread_count;
01896             localsize[0] = 1; localsize[1] = thread_count;
01897             kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols";
01898             if (flags & DFT_SCALE)
01899                 options += " -D DFT_SCALE";
01900         }
01901 
01902         options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
01903         options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
01904         options += is1d ? " -D IS_1D" : "";
01905 
01906         if (!inv)
01907         {
01908             if ((is1d && src.channels() == 1) || (rows && (fftType == R2R)))
01909                 options += " -D NO_CONJUGATE";
01910         }
01911         else
01912         {
01913             if (rows && (fftType == C2R || fftType == R2R))
01914                 options += " -D NO_CONJUGATE";
01915             if (dst.cols % 2 == 0)
01916                 options += " -D EVEN";
01917         }
01918 
01919         ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
01920         if (k.empty())
01921             return false;
01922 
01923         k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::ReadOnlyNoSize(twiddles), thread_count, num_dfts);
01924         return k.run(2, globalsize, localsize, false);
01925     }
01926 
01927 private:
01928     static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
01929     {
01930         int factors[34];
01931         int nf = DFTFactorize(cols, factors);
01932 
01933         int n = 1;
01934         int factor_index = 0;
01935         min_radix = INT_MAX;
01936 
01937         // 2^n transforms
01938         if ((factors[factor_index] & 1) == 0)
01939         {
01940             for( ; n < factors[factor_index];)
01941             {
01942                 int radix = 2, block = 1;
01943                 if (8*n <= factors[0])
01944                     radix = 8;
01945                 else if (4*n <= factors[0])
01946                 {
01947                     radix = 4;
01948                     if (cols % 12 == 0)
01949                         block = 3;
01950                     else if (cols % 8 == 0)
01951                         block = 2;
01952                 }
01953                 else
01954                 {
01955                     if (cols % 10 == 0)
01956                         block = 5;
01957                     else if (cols % 8 == 0)
01958                         block = 4;
01959                     else if (cols % 6 == 0)
01960                         block = 3;
01961                     else if (cols % 4 == 0)
01962                         block = 2;
01963                 }
01964 
01965                 radixes.push_back(radix);
01966                 blocks.push_back(block);
01967                 min_radix = min(min_radix, block*radix);
01968                 n *= radix;
01969             }
01970             factor_index++;
01971         }
01972 
01973         // all the other transforms
01974         for( ; factor_index < nf; factor_index++)
01975         {
01976             int radix = factors[factor_index], block = 1;
01977             if (radix == 3)
01978             {
01979                 if (cols % 12 == 0)
01980                     block = 4;
01981                 else if (cols % 9 == 0)
01982                     block = 3;
01983                 else if (cols % 6 == 0)
01984                     block = 2;
01985             }
01986             else if (radix == 5)
01987             {
01988                 if (cols % 10 == 0)
01989                     block = 2;
01990             }
01991             radixes.push_back(radix);
01992             blocks.push_back(block);
01993             min_radix = min(min_radix, block*radix);
01994         }
01995     }
01996 
01997     template <typename T>
01998     static void fillRadixTable(UMat twiddles, const std::vector<int>& radixes)
01999     {
02000         Mat tw = twiddles.getMat(ACCESS_WRITE);
02001         T* ptr = tw.ptr<T>();
02002         int ptr_index = 0;
02003 
02004         int n = 1;
02005         for (size_t i=0; i<radixes.size(); i++)
02006         {
02007             int radix = radixes[i];
02008             n *= radix;
02009 
02010             for (int j=1; j<radix; j++)
02011             {
02012                 double theta = -CV_2PI*j/n;
02013 
02014                 for (int k=0; k<(n/radix); k++)
02015                 {
02016                     ptr[ptr_index++] = (T) cos(k*theta);
02017                     ptr[ptr_index++] = (T) sin(k*theta);
02018                 }
02019             }
02020         }
02021     }
02022 };
02023 
02024 class OCL_FftPlanCache
02025 {
02026 public:
02027     static OCL_FftPlanCache & getInstance()
02028     {
02029         CV_SINGLETON_LAZY_INIT_REF(OCL_FftPlanCache, new OCL_FftPlanCache())
02030     }
02031 
02032     Ptr<OCL_FftPlan> getFftPlan(int dft_size, int depth)
02033     {
02034         int key = (dft_size << 16) | (depth & 0xFFFF);
02035         std::map<int, Ptr<OCL_FftPlan> >::iterator f = planStorage.find(key);
02036         if (f != planStorage.end())
02037         {
02038             return f->second;
02039         }
02040         else
02041         {
02042             Ptr<OCL_FftPlan> newPlan = Ptr<OCL_FftPlan>(new OCL_FftPlan(dft_size, depth));
02043             planStorage[key] = newPlan;
02044             return newPlan;
02045         }
02046     }
02047 
02048     ~OCL_FftPlanCache()
02049     {
02050         planStorage.clear();
02051     }
02052 
02053 protected:
02054     OCL_FftPlanCache() :
02055         planStorage()
02056     {
02057     }
02058     std::map<int, Ptr<OCL_FftPlan> > planStorage;
02059 };
02060 
02061 static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
02062 {
02063     int type = _src.type(), depth = CV_MAT_DEPTH(type);
02064     Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), depth);
02065     return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true);
02066 }
02067 
02068 static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
02069 {
02070     int type = _src.type(), depth = CV_MAT_DEPTH(type);
02071     Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), depth);
02072     return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false);
02073 }
02074 
02075 static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows)
02076 {
02077     int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
02078     Size ssize = _src.size();
02079     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
02080 
02081     if ( !((cn == 1 || cn == 2) && (depth == CV_32F || (depth == CV_64F && doubleSupport))) )
02082         return false;
02083 
02084     // if is not a multiplication of prime numbers { 2, 3, 5 }
02085     if (ssize.area() != getOptimalDFTSize(ssize.area()))
02086         return false;
02087 
02088     UMat src = _src.getUMat();
02089     int complex_input = cn == 2 ? 1 : 0;
02090     int complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
02091     int real_input = cn == 1 ? 1 : 0;
02092     int real_output = (flags & DFT_REAL_OUTPUT) != 0;
02093     bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0;
02094 
02095     if( nonzero_rows <= 0 || nonzero_rows > _src.rows() )
02096         nonzero_rows = _src.rows();
02097     bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1;
02098 
02099     // if output format is not specified
02100     if (complex_output + real_output == 0)
02101     {
02102         if (real_input)
02103             real_output = 1;
02104         else
02105             complex_output = 1;
02106     }
02107 
02108     FftType fftType = (FftType)(complex_input << 0 | complex_output << 1);
02109 
02110     // Forward Complex to CCS not supported
02111     if (fftType == C2R && !inv)
02112         fftType = C2C;
02113 
02114     // Inverse CCS to Complex not supported
02115     if (fftType == R2C && inv)
02116         fftType = R2R;
02117 
02118     UMat output;
02119     if (fftType == C2C || fftType == R2C)
02120     {
02121         // complex output
02122         _dst.create(src.size(), CV_MAKETYPE(depth, 2));
02123         output = _dst.getUMat();
02124     }
02125     else
02126     {
02127         // real output
02128         if (is1d)
02129         {
02130             _dst.create(src.size(), CV_MAKETYPE(depth, 1));
02131             output = _dst.getUMat();
02132         }
02133         else
02134         {
02135             _dst.create(src.size(), CV_MAKETYPE(depth, 1));
02136             output.create(src.size(), CV_MAKETYPE(depth, 2));
02137         }
02138     }
02139 
02140     if (!inv)
02141     {
02142         if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType))
02143             return false;
02144 
02145         if (!is1d)
02146         {
02147             int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols;
02148             if (!ocl_dft_cols(output, _dst, nonzero_cols, flags, fftType))
02149                 return false;
02150         }
02151     }
02152     else
02153     {
02154         if (fftType == C2C)
02155         {
02156             // complex output
02157             if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType))
02158                 return false;
02159 
02160             if (!is1d)
02161             {
02162                 if (!ocl_dft_cols(output, output, output.cols, flags, fftType))
02163                     return false;
02164             }
02165         }
02166         else
02167         {
02168             if (is1d)
02169             {
02170                 if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType))
02171                     return false;
02172             }
02173             else
02174             {
02175                 int nonzero_cols = src.cols/2 + 1;
02176                 if (!ocl_dft_cols(src, output, nonzero_cols, flags, fftType))
02177                     return false;
02178 
02179                 if (!ocl_dft_rows(output, _dst, nonzero_rows, flags, fftType))
02180                     return false;
02181             }
02182         }
02183     }
02184     return true;
02185 }
02186 
02187 } // namespace cv;
02188 
02189 #endif
02190 
02191 #ifdef HAVE_CLAMDFFT
02192 
02193 namespace cv {
02194 
02195 #define CLAMDDFT_Assert(func) \
02196     { \
02197         clAmdFftStatus s = (func); \
02198         CV_Assert(s == CLFFT_SUCCESS); \
02199     }
02200 
02201 class PlanCache
02202 {
02203     struct FftPlan
02204     {
02205         FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
02206             dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
02207             doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType),
02208             context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0)
02209         {
02210             bool dft_inverse = (flags & DFT_INVERSE) != 0;
02211             bool dft_scale = (flags & DFT_SCALE) != 0;
02212             bool dft_rows = (flags & DFT_ROWS) != 0;
02213 
02214             clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
02215             clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
02216 
02217             size_t batchSize = dft_rows ? dft_size.height : 1;
02218             size_t clLengthsIn[3] = { (size_t)dft_size.width, dft_rows ? 1 : (size_t)dft_size.height, 1 };
02219             size_t clStridesIn[3] = { 1, 1, 1 };
02220             size_t clStridesOut[3]  = { 1, 1, 1 };
02221             int elemSize = doubleFP ? sizeof(double) : sizeof(float);
02222 
02223             switch (fftType)
02224             {
02225             case C2C:
02226                 inLayout = CLFFT_COMPLEX_INTERLEAVED;
02227                 outLayout = CLFFT_COMPLEX_INTERLEAVED;
02228                 clStridesIn[1] = src_step / (elemSize << 1);
02229                 clStridesOut[1] = dst_step / (elemSize << 1);
02230                 break;
02231             case R2C:
02232                 inLayout = CLFFT_REAL;
02233                 outLayout = CLFFT_HERMITIAN_INTERLEAVED;
02234                 clStridesIn[1] = src_step / elemSize;
02235                 clStridesOut[1] = dst_step / (elemSize << 1);
02236                 break;
02237             case C2R:
02238                 inLayout = CLFFT_HERMITIAN_INTERLEAVED;
02239                 outLayout = CLFFT_REAL;
02240                 clStridesIn[1] = src_step / (elemSize << 1);
02241                 clStridesOut[1] = dst_step / elemSize;
02242                 break;
02243             case R2R:
02244             default:
02245                 CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
02246                 break;
02247             }
02248 
02249             clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
02250             clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
02251 
02252             CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))
02253 
02254             // setting plan properties
02255             CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
02256             CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
02257             CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout))
02258             CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize))
02259             CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn))
02260             CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut))
02261             CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
02262 
02263             float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
02264             CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
02265 
02266             // ready to bake
02267             cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
02268             CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &queue, NULL, NULL))
02269         }
02270 
02271         ~FftPlan()
02272         {
02273 //            clAmdFftDestroyPlan(&plHandle);
02274         }
02275 
02276         friend class PlanCache;
02277 
02278     private:
02279         Size dft_size;
02280         int src_step, dst_step;
02281         bool doubleFP;
02282         bool inplace;
02283         int flags;
02284         FftType fftType;
02285 
02286         cl_context context;
02287         clAmdFftPlanHandle plHandle;
02288     };
02289 
02290 public:
02291     static PlanCache & getInstance()
02292     {
02293         CV_SINGLETON_LAZY_INIT_REF(PlanCache, new PlanCache())
02294     }
02295 
02296     clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
02297                                      bool inplace, int flags, FftType fftType)
02298     {
02299         cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr();
02300 
02301         for (size_t i = 0, size = planStorage.size(); i < size; ++i)
02302         {
02303             const FftPlan * const plan = planStorage[i];
02304 
02305             if (plan->dft_size == dft_size &&
02306                 plan->flags == flags &&
02307                 plan->src_step == src_step &&
02308                 plan->dst_step == dst_step &&
02309                 plan->doubleFP == doubleFP &&
02310                 plan->fftType == fftType &&
02311                 plan->inplace == inplace)
02312             {
02313                 if (plan->context != currentContext)
02314                 {
02315                     planStorage.erase(planStorage.begin() + i);
02316                     break;
02317                 }
02318 
02319                 return plan->plHandle;
02320             }
02321         }
02322 
02323         // no baked plan is found, so let's create a new one
02324         Ptr<FftPlan> newPlan = Ptr<FftPlan>(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType));
02325         planStorage.push_back(newPlan);
02326 
02327         return newPlan->plHandle;
02328     }
02329 
02330     ~PlanCache()
02331     {
02332         planStorage.clear();
02333     }
02334 
02335 protected:
02336     PlanCache() :
02337         planStorage()
02338     {
02339     }
02340 
02341     std::vector<Ptr<FftPlan> > planStorage;
02342 };
02343 
02344 extern "C" {
02345 
02346 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
02347 {
02348     UMatData * u = (UMatData *)p;
02349 
02350     if( u && CV_XADD(&u->urefcount, -1) == 1 )
02351         u->currAllocator->deallocate(u);
02352     u = 0;
02353 
02354     clReleaseEvent(e), e = 0;
02355 }
02356 
02357 }
02358 
02359 static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
02360 {
02361     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
02362     Size ssize = _src.size();
02363 
02364     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
02365     if ( (!doubleSupport && depth == CV_64F) ||
02366          !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
02367          _src.offset() != 0)
02368         return false;
02369 
02370     // if is not a multiplication of prime numbers { 2, 3, 5 }
02371     if (ssize.area() != getOptimalDFTSize(ssize.area()))
02372         return false;
02373 
02374     int dst_complex_input = cn == 2 ? 1 : 0;
02375     bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
02376     int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
02377     bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
02378 
02379     CV_Assert(dft_complex_output + dft_real_output < 2);
02380     FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
02381 
02382     switch (fftType)
02383     {
02384     case C2C:
02385         _dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
02386         break;
02387     case R2C: // TODO implement it if possible
02388     case C2R: // TODO implement it if possible
02389     case R2R: // AMD Fft does not support this type
02390     default:
02391         return false;
02392     }
02393 
02394     UMat src = _src.getUMat(), dst = _dst.getUMat();
02395     bool inplace = src.u == dst.u;
02396 
02397     clAmdFftPlanHandle plHandle = PlanCache::getInstance().
02398             getPlanHandle(ssize, (int)src.step, (int)dst.step,
02399                           depth == CV_64F, inplace, flags, fftType);
02400 
02401     // get the bufferSize
02402     size_t bufferSize = 0;
02403     CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize))
02404     UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
02405 
02406     cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
02407     cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
02408 
02409     cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
02410     cl_event e = 0;
02411 
02412     CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
02413                                        1, &queue, 0, NULL, &e,
02414                                        &srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
02415 
02416     tmpBuffer.addref();
02417     clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
02418     return true;
02419 }
02420 
02421 #undef DFT_ASSERT
02422 
02423 }
02424 
02425 #endif // HAVE_CLAMDFFT
02426 
02427 namespace cv
02428 {
02429 static void complementComplexOutput(Mat& dst, int len, int dft_dims)
02430 {
02431     int i, n = dst.cols;
02432     size_t elem_size = dst.elemSize1();
02433     if( elem_size == sizeof(float) )
02434     {
02435         float* p0 = dst.ptr<float>();
02436         size_t dstep = dst.step/sizeof(p0[0]);
02437         for( i = 0; i < len; i++ )
02438         {
02439             float* p = p0 + dstep*i;
02440             float* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
02441 
02442             for( int j = 1; j < (n+1)/2; j++ )
02443             {
02444                 p[(n-j)*2] = q[j*2];
02445                 p[(n-j)*2+1] = -q[j*2+1];
02446             }
02447         }
02448     }
02449     else
02450     {
02451         double* p0 = dst.ptr<double>();
02452         size_t dstep = dst.step/sizeof(p0[0]);
02453         for( i = 0; i < len; i++ )
02454         {
02455             double* p = p0 + dstep*i;
02456             double* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
02457 
02458             for( int j = 1; j < (n+1)/2; j++ )
02459             {
02460                 p[(n-j)*2] = q[j*2];
02461                 p[(n-j)*2+1] = -q[j*2+1];
02462             }
02463         }
02464     }
02465 }
02466 }
02467 
02468 void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
02469 {
02470 #ifdef HAVE_CLAMDFFT
02471 #ifdef HAVE_OPENCL
02472     CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
02473             _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
02474                ocl_dft_amdfft(_src0, _dst, flags))
02475 #endif
02476 #endif
02477 
02478 #ifdef HAVE_OPENCL
02479     CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
02480                ocl_dft(_src0, _dst, flags, nonzero_rows))
02481 #endif
02482 
02483     static DFTFunc dft_tbl[6] =
02484     {
02485         (DFTFunc)DFT_32f,
02486         (DFTFunc)RealDFT_32f,
02487         (DFTFunc)CCSIDFT_32f,
02488         (DFTFunc)DFT_64f,
02489         (DFTFunc)RealDFT_64f,
02490         (DFTFunc)CCSIDFT_64f
02491     };
02492     AutoBuffer<uchar>  buf;
02493     Mat src0 = _src0.getMat(), src = src0;
02494     int prev_len = 0, stage = 0;
02495     bool inv = (flags & DFT_INVERSE) != 0;
02496     int nf = 0, real_transform = src.channels() == 1 || (inv && (flags & DFT_REAL_OUTPUT)!=0);
02497     int type = src.type(), depth = src.depth();
02498     int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
02499     int factors[34];
02500     bool inplace_transform = false;
02501 #ifdef USE_IPP_DFT
02502     AutoBuffer<uchar>  ippbuf;
02503     int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
02504 #endif
02505 
02506     CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
02507 
02508     if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
02509         _dst.create( src.size(), CV_MAKETYPE(depth, 2) );
02510     else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
02511         _dst.create( src.size(), depth );
02512     else
02513         _dst.create( src.size(), type );
02514 
02515     Mat dst = _dst.getMat();
02516 
02517 #if defined USE_IPP_DFT
02518     CV_IPP_CHECK()
02519     {
02520         if ((src.depth() == CV_32F) && (src.total()>(int)(1<<6)) && nonzero_rows == 0)
02521         {
02522             if ((flags & DFT_ROWS) == 0)
02523             {
02524                 if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
02525                 {
02526                     if (ippi_DFT_C_32F(src, dst, inv, ipp_norm_flag))
02527                     {
02528                         CV_IMPL_ADD(CV_IMPL_IPP);
02529                         return;
02530                     }
02531                     setIppErrorStatus();
02532                 }
02533                 if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
02534                 {
02535                     if (ippi_DFT_R_32F(src, dst, inv, ipp_norm_flag))
02536                     {
02537                         CV_IMPL_ADD(CV_IMPL_IPP);
02538                         return;
02539                     }
02540                     setIppErrorStatus();
02541                 }
02542             }
02543             else
02544             {
02545                 if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
02546                 {
02547                     ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
02548                     if (Dft_C_IPPLoop(src, dst, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
02549                     {
02550                         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
02551                         return;
02552                     }
02553                     setIppErrorStatus();
02554                 }
02555                 if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
02556                 {
02557                     ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
02558                     if (Dft_R_IPPLoop(src, dst, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
02559                     {
02560                         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
02561                         return;
02562                     }
02563                     setIppErrorStatus();
02564                 }
02565             }
02566         }
02567     }
02568 #endif
02569 
02570     if( !real_transform )
02571         elem_size = complex_elem_size;
02572 
02573     if( src.cols == 1 && nonzero_rows > 0 )
02574         CV_Error( CV_StsNotImplemented,
02575         "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
02576         "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
02577 
02578     // determine, which transform to do first - row-wise
02579     // (stage 0) or column-wise (stage 1) transform
02580     if( !(flags & DFT_ROWS) && src.rows > 1 &&
02581         ((src.cols == 1 && (!src.isContinuous() || !dst.isContinuous())) ||
02582          (src.cols > 1 && inv && real_transform)) )
02583         stage = 1;
02584 
02585     for(;;)
02586     {
02587         double scale = 1;
02588         uchar* wave = 0;
02589         int* itab = 0;
02590         uchar* ptr;
02591         int i, len, count, sz = 0;
02592         int use_buf = 0, odd_real = 0;
02593         DFTFunc dft_func;
02594 
02595         if( stage == 0 ) // row-wise transform
02596         {
02597             len = !inv ? src.cols : dst.cols;
02598             count = src.rows;
02599             if( len == 1 && !(flags & DFT_ROWS) )
02600             {
02601                 len = !inv ? src.rows : dst.rows;
02602                 count = 1;
02603             }
02604             odd_real = real_transform && (len & 1);
02605         }
02606         else
02607         {
02608             len = dst.rows;
02609             count = !inv ? src0.cols : dst.cols;
02610             sz = 2*len*complex_elem_size;
02611         }
02612 
02613         void *spec = 0;
02614 #ifdef USE_IPP_DFT
02615         if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available
02616         {
02617             int specsize=0, initsize=0, worksize=0;
02618             IppDFTGetSizeFunc getSizeFunc = 0;
02619             IppDFTInitFunc initFunc = 0;
02620 
02621             if( real_transform && stage == 0 )
02622             {
02623                 if( depth == CV_32F )
02624                 {
02625                     getSizeFunc = ippsDFTGetSize_R_32f;
02626                     initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
02627                 }
02628                 else
02629                 {
02630                     getSizeFunc = ippsDFTGetSize_R_64f;
02631                     initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
02632                 }
02633             }
02634             else
02635             {
02636                 if( depth == CV_32F )
02637                 {
02638                     getSizeFunc = ippsDFTGetSize_C_32fc;
02639                     initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
02640                 }
02641                 else
02642                 {
02643                     getSizeFunc = ippsDFTGetSize_C_64fc;
02644                     initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
02645                 }
02646             }
02647             if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
02648             {
02649                 ippbuf.allocate(specsize + initsize + 64);
02650                 spec = alignPtr(&ippbuf[0], 32);
02651                 uchar* initbuf = alignPtr((uchar*)spec + specsize, 32);
02652                 if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 )
02653                     spec = 0;
02654                 sz += worksize;
02655             }
02656             else
02657                 setIppErrorStatus();
02658         }
02659         else
02660 #endif
02661         {
02662             if( len != prev_len )
02663                 nf = DFTFactorize( len, factors );
02664 
02665             inplace_transform = factors[0] == factors[nf-1];
02666             sz += len*(complex_elem_size + sizeof(int));
02667             i = nf > 1 && (factors[0] & 1) == 0;
02668             if( (factors[i] & 1) != 0 && factors[i] > 5 )
02669                 sz += (factors[i]+1)*complex_elem_size;
02670 
02671             if( (stage == 0 && ((src.data == dst.data && !inplace_transform) || odd_real)) ||
02672                 (stage == 1 && !inplace_transform) )
02673             {
02674                 use_buf = 1;
02675                 sz += len*complex_elem_size;
02676             }
02677         }
02678 
02679         ptr = (uchar*)buf;
02680         buf.allocate( sz + 32 );
02681         if( ptr != (uchar*)buf )
02682             prev_len = 0; // because we release the buffer,
02683                           // force recalculation of
02684                           // twiddle factors and permutation table
02685         ptr = (uchar*)buf;
02686         if( !spec )
02687         {
02688             wave = ptr;
02689             ptr += len*complex_elem_size;
02690             itab = (int*)ptr;
02691             ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
02692 
02693             if( len != prev_len || (!inplace_transform && inv && real_transform))
02694                 DFTInit( len, nf, factors, itab, complex_elem_size,
02695                             wave, stage == 0 && inv && real_transform );
02696             // otherwise reuse the tables calculated on the previous stage
02697         }
02698 
02699         if( stage == 0 )
02700         {
02701             uchar* tmp_buf = 0;
02702             int dptr_offset = 0;
02703             int dst_full_len = len*elem_size;
02704             int _flags = (int)inv + (src.channels() != dst.channels() ?
02705                          DFT_COMPLEX_INPUT_OR_OUTPUT : 0);
02706             if( use_buf )
02707             {
02708                 tmp_buf = ptr;
02709                 ptr += len*complex_elem_size;
02710                 if( odd_real && !inv && len > 1 &&
02711                     !(_flags & DFT_COMPLEX_INPUT_OR_OUTPUT))
02712                     dptr_offset = elem_size;
02713             }
02714 
02715             if( !inv && (_flags & DFT_COMPLEX_INPUT_OR_OUTPUT) )
02716                 dst_full_len += (len & 1) ? elem_size : complex_elem_size;
02717 
02718             dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3];
02719 
02720             if( count > 1 && !(flags & DFT_ROWS) && (!inv || !real_transform) )
02721                 stage = 1;
02722             else if( flags & CV_DXT_SCALE )
02723                 scale = 1./(len * (flags & DFT_ROWS ? 1 : count));
02724 
02725             if( nonzero_rows <= 0 || nonzero_rows > count )
02726                 nonzero_rows = count;
02727 
02728             for( i = 0; i < nonzero_rows; i++ )
02729             {
02730                 const uchar* sptr = src.ptr(i);
02731                 uchar* dptr0 = dst.ptr(i);
02732                 uchar* dptr = dptr0;
02733 
02734                 if( tmp_buf )
02735                     dptr = tmp_buf;
02736 
02737                 dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale );
02738                 if( dptr != dptr0 )
02739                     memcpy( dptr0, dptr + dptr_offset, dst_full_len );
02740             }
02741 
02742             for( ; i < count; i++ )
02743             {
02744                 uchar* dptr0 = dst.ptr(i);
02745                 memset( dptr0, 0, dst_full_len );
02746             }
02747 
02748             if( stage != 1 )
02749             {
02750                 if( !inv && real_transform && dst.channels() == 2 )
02751                     complementComplexOutput(dst, nonzero_rows, 1);
02752                 break;
02753             }
02754             src = dst;
02755         }
02756         else
02757         {
02758             int a = 0, b = count;
02759             uchar *buf0, *buf1, *dbuf0, *dbuf1;
02760             const uchar* sptr0 = src.ptr();
02761             uchar* dptr0 = dst.ptr();
02762             buf0 = ptr;
02763             ptr += len*complex_elem_size;
02764             buf1 = ptr;
02765             ptr += len*complex_elem_size;
02766             dbuf0 = buf0, dbuf1 = buf1;
02767 
02768             if( use_buf )
02769             {
02770                 dbuf1 = ptr;
02771                 dbuf0 = buf1;
02772                 ptr += len*complex_elem_size;
02773             }
02774 
02775             dft_func = dft_tbl[(depth == CV_64F)*3];
02776 
02777             if( real_transform && inv && src.cols > 1 )
02778                 stage = 0;
02779             else if( flags & CV_DXT_SCALE )
02780                 scale = 1./(len * count);
02781 
02782             if( real_transform )
02783             {
02784                 int even;
02785                 a = 1;
02786                 even = (count & 1) == 0;
02787                 b = (count+1)/2;
02788                 if( !inv )
02789                 {
02790                     memset( buf0, 0, len*complex_elem_size );
02791                     CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, elem_size );
02792                     sptr0 += dst.channels()*elem_size;
02793                     if( even )
02794                     {
02795                         memset( buf1, 0, len*complex_elem_size );
02796                         CopyColumn( sptr0 + (count-2)*elem_size, src.step,
02797                                     buf1, complex_elem_size, len, elem_size );
02798                     }
02799                 }
02800                 else if( src.channels() == 1 )
02801                 {
02802                     CopyColumn( sptr0, src.step, buf0, elem_size, len, elem_size );
02803                     ExpandCCS( buf0, len, elem_size );
02804                     if( even )
02805                     {
02806                         CopyColumn( sptr0 + (count-1)*elem_size, src.step,
02807                                     buf1, elem_size, len, elem_size );
02808                         ExpandCCS( buf1, len, elem_size );
02809                     }
02810                     sptr0 += elem_size;
02811                 }
02812                 else
02813                 {
02814                     CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
02815                     if( even )
02816                     {
02817                         CopyColumn( sptr0 + b*complex_elem_size, src.step,
02818                                        buf1, complex_elem_size, len, complex_elem_size );
02819                     }
02820                     sptr0 += complex_elem_size;
02821                 }
02822 
02823                 if( even )
02824                     dft_func( buf1, dbuf1, len, nf, factors, itab,
02825                               wave, len, spec, ptr, inv, scale );
02826                 dft_func( buf0, dbuf0, len, nf, factors, itab,
02827                           wave, len, spec, ptr, inv, scale );
02828 
02829                 if( dst.channels() == 1 )
02830                 {
02831                     if( !inv )
02832                     {
02833                         // copy the half of output vector to the first/last column.
02834                         // before doing that, defgragment the vector
02835                         memcpy( dbuf0 + elem_size, dbuf0, elem_size );
02836                         CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
02837                                        dst.step, len, elem_size );
02838                         if( even )
02839                         {
02840                             memcpy( dbuf1 + elem_size, dbuf1, elem_size );
02841                             CopyColumn( dbuf1 + elem_size, elem_size,
02842                                            dptr0 + (count-1)*elem_size,
02843                                            dst.step, len, elem_size );
02844                         }
02845                         dptr0 += elem_size;
02846                     }
02847                     else
02848                     {
02849                         // copy the real part of the complex vector to the first/last column
02850                         CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, elem_size );
02851                         if( even )
02852                             CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
02853                                            dst.step, len, elem_size );
02854                         dptr0 += elem_size;
02855                     }
02856                 }
02857                 else
02858                 {
02859                     assert( !inv );
02860                     CopyColumn( dbuf0, complex_elem_size, dptr0,
02861                                    dst.step, len, complex_elem_size );
02862                     if( even )
02863                         CopyColumn( dbuf1, complex_elem_size,
02864                                        dptr0 + b*complex_elem_size,
02865                                        dst.step, len, complex_elem_size );
02866                     dptr0 += complex_elem_size;
02867                 }
02868             }
02869 
02870             for( i = a; i < b; i += 2 )
02871             {
02872                 if( i+1 < b )
02873                 {
02874                     CopyFrom2Columns( sptr0, src.step, buf0, buf1, len, complex_elem_size );
02875                     dft_func( buf1, dbuf1, len, nf, factors, itab,
02876                               wave, len, spec, ptr, inv, scale );
02877                 }
02878                 else
02879                     CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
02880 
02881                 dft_func( buf0, dbuf0, len, nf, factors, itab,
02882                           wave, len, spec, ptr, inv, scale );
02883 
02884                 if( i+1 < b )
02885                     CopyTo2Columns( dbuf0, dbuf1, dptr0, dst.step, len, complex_elem_size );
02886                 else
02887                     CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, complex_elem_size );
02888                 sptr0 += 2*complex_elem_size;
02889                 dptr0 += 2*complex_elem_size;
02890             }
02891 
02892             if( stage != 0 )
02893             {
02894                 if( !inv && real_transform && dst.channels() == 2 && len > 1 )
02895                     complementComplexOutput(dst, len, 2);
02896                 break;
02897             }
02898             src = dst;
02899         }
02900     }
02901 }
02902 
02903 
02904 void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
02905 {
02906     dft( src, dst, flags | DFT_INVERSE, nonzero_rows );
02907 }
02908 
02909 #ifdef HAVE_OPENCL
02910 
02911 namespace cv {
02912 
02913 static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
02914                               OutputArray _dst, int flags, bool conjB )
02915 {
02916     int atype = _srcA.type(), btype = _srcB.type(),
02917             rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
02918     Size asize = _srcA.size(), bsize = _srcB.size();
02919     CV_Assert(asize == bsize);
02920 
02921     if ( !(atype == CV_32FC2 && btype == CV_32FC2) || flags != 0 )
02922         return false;
02923 
02924     UMat A = _srcA.getUMat(), B = _srcB.getUMat();
02925     CV_Assert(A.size() == B.size());
02926 
02927     _dst.create(A.size(), atype);
02928     UMat dst = _dst.getUMat();
02929 
02930     ocl::Kernel k("mulAndScaleSpectrums",
02931                   ocl::core::mulspectrums_oclsrc,
02932                   format("%s", conjB ? "-D CONJ" : ""));
02933     if (k.empty())
02934         return false;
02935 
02936     k.args(ocl::KernelArg::ReadOnlyNoSize(A), ocl::KernelArg::ReadOnlyNoSize(B),
02937            ocl::KernelArg::WriteOnly(dst), rowsPerWI);
02938 
02939     size_t globalsize[2] = { (size_t)asize.width, ((size_t)asize.height + rowsPerWI - 1) / rowsPerWI };
02940     return k.run(2, globalsize, NULL, false);
02941 }
02942 
02943 }
02944 
02945 #endif
02946 
02947 void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
02948                        OutputArray _dst, int flags, bool conjB )
02949 {
02950 #ifdef HAVE_OPENCL
02951     CV_OCL_RUN(_dst.isUMat() && _srcA.dims() <= 2 && _srcB.dims() <= 2,
02952             ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB))
02953 #endif
02954 
02955     Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
02956     int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
02957     int rows = srcA.rows, cols = srcA.cols;
02958     int j, k;
02959 
02960     CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
02961     CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
02962 
02963     _dst.create( srcA.rows, srcA.cols, type );
02964     Mat dst = _dst.getMat();
02965 
02966     bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 &&
02967              srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous()));
02968 
02969     if( is_1d && !(flags & DFT_ROWS) )
02970         cols = cols + rows - 1, rows = 1;
02971 
02972     int ncols = cols*cn;
02973     int j0 = cn == 1;
02974     int j1 = ncols - (cols % 2 == 0 && cn == 1);
02975 
02976     if( depth == CV_32F )
02977     {
02978         const float* dataA = srcA.ptr<float>();
02979         const float* dataB = srcB.ptr<float>();
02980         float* dataC = dst.ptr<float>();
02981 
02982         size_t stepA = srcA.step/sizeof(dataA[0]);
02983         size_t stepB = srcB.step/sizeof(dataB[0]);
02984         size_t stepC = dst.step/sizeof(dataC[0]);
02985 
02986         if( !is_1d && cn == 1 )
02987         {
02988             for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
02989             {
02990                 if( k == 1 )
02991                     dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
02992                 dataC[0] = dataA[0]*dataB[0];
02993                 if( rows % 2 == 0 )
02994                     dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
02995                 if( !conjB )
02996                     for( j = 1; j <= rows - 2; j += 2 )
02997                     {
02998                         double re = (double)dataA[j*stepA]*dataB[j*stepB] -
02999                                     (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
03000                         double im = (double)dataA[j*stepA]*dataB[(j+1)*stepB] +
03001                                     (double)dataA[(j+1)*stepA]*dataB[j*stepB];
03002                         dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
03003                     }
03004                 else
03005                     for( j = 1; j <= rows - 2; j += 2 )
03006                     {
03007                         double re = (double)dataA[j*stepA]*dataB[j*stepB] +
03008                                     (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
03009                         double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] -
03010                                     (double)dataA[j*stepA]*dataB[(j+1)*stepB];
03011                         dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
03012                     }
03013                 if( k == 1 )
03014                     dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
03015             }
03016         }
03017 
03018         for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
03019         {
03020             if( is_1d && cn == 1 )
03021             {
03022                 dataC[0] = dataA[0]*dataB[0];
03023                 if( cols % 2 == 0 )
03024                     dataC[j1] = dataA[j1]*dataB[j1];
03025             }
03026 
03027             if( !conjB )
03028                 for( j = j0; j < j1; j += 2 )
03029                 {
03030                     double re = (double)dataA[j]*dataB[j] - (double)dataA[j+1]*dataB[j+1];
03031                     double im = (double)dataA[j+1]*dataB[j] + (double)dataA[j]*dataB[j+1];
03032                     dataC[j] = (float)re; dataC[j+1] = (float)im;
03033                 }
03034             else
03035                 for( j = j0; j < j1; j += 2 )
03036                 {
03037                     double re = (double)dataA[j]*dataB[j] + (double)dataA[j+1]*dataB[j+1];
03038                     double im = (double)dataA[j+1]*dataB[j] - (double)dataA[j]*dataB[j+1];
03039                     dataC[j] = (float)re; dataC[j+1] = (float)im;
03040                 }
03041         }
03042     }
03043     else
03044     {
03045         const double* dataA = srcA.ptr<double>();
03046         const double* dataB = srcB.ptr<double>();
03047         double* dataC = dst.ptr<double>();
03048 
03049         size_t stepA = srcA.step/sizeof(dataA[0]);
03050         size_t stepB = srcB.step/sizeof(dataB[0]);
03051         size_t stepC = dst.step/sizeof(dataC[0]);
03052 
03053         if( !is_1d && cn == 1 )
03054         {
03055             for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
03056             {
03057                 if( k == 1 )
03058                     dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
03059                 dataC[0] = dataA[0]*dataB[0];
03060                 if( rows % 2 == 0 )
03061                     dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
03062                 if( !conjB )
03063                     for( j = 1; j <= rows - 2; j += 2 )
03064                     {
03065                         double re = dataA[j*stepA]*dataB[j*stepB] -
03066                                     dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
03067                         double im = dataA[j*stepA]*dataB[(j+1)*stepB] +
03068                                     dataA[(j+1)*stepA]*dataB[j*stepB];
03069                         dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
03070                     }
03071                 else
03072                     for( j = 1; j <= rows - 2; j += 2 )
03073                     {
03074                         double re = dataA[j*stepA]*dataB[j*stepB] +
03075                                     dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
03076                         double im = dataA[(j+1)*stepA]*dataB[j*stepB] -
03077                                     dataA[j*stepA]*dataB[(j+1)*stepB];
03078                         dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
03079                     }
03080                 if( k == 1 )
03081                     dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
03082             }
03083         }
03084 
03085         for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
03086         {
03087             if( is_1d && cn == 1 )
03088             {
03089                 dataC[0] = dataA[0]*dataB[0];
03090                 if( cols % 2 == 0 )
03091                     dataC[j1] = dataA[j1]*dataB[j1];
03092             }
03093 
03094             if( !conjB )
03095                 for( j = j0; j < j1; j += 2 )
03096                 {
03097                     double re = dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1];
03098                     double im = dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1];
03099                     dataC[j] = re; dataC[j+1] = im;
03100                 }
03101             else
03102                 for( j = j0; j < j1; j += 2 )
03103                 {
03104                     double re = dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1];
03105                     double im = dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1];
03106                     dataC[j] = re; dataC[j+1] = im;
03107                 }
03108         }
03109     }
03110 }
03111 
03112 
03113 /****************************************************************************************\
03114                                Discrete Cosine Transform
03115 \****************************************************************************************/
03116 
03117 namespace cv
03118 {
03119 
03120 /* DCT is calculated using DFT, as described here:
03121    http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
03122 */
03123 template<typename T> static void
03124 DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
03125      int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave,
03126      const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
03127 {
03128     static const T sin_45 = (T)0.70710678118654752440084436210485;
03129     int j, n2 = n >> 1;
03130 
03131     src_step /= sizeof(src[0]);
03132     dst_step /= sizeof(dst[0]);
03133     T* dst1 = dst + (n-1)*dst_step;
03134 
03135     if( n == 1 )
03136     {
03137         dst[0] = src[0];
03138         return;
03139     }
03140 
03141     for( j = 0; j < n2; j++, src += src_step*2 )
03142     {
03143         dft_src[j] = src[0];
03144         dft_src[n-j-1] = src[src_step];
03145     }
03146 
03147     RealDFT( dft_src, dft_dst, n, nf, factors,
03148              itab, dft_wave, n, spec, buf, 0, 1.0 );
03149     src = dft_dst;
03150 
03151     dst[0] = (T)(src[0]*dct_wave->re*sin_45);
03152     dst += dst_step;
03153     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
03154                                     dst += dst_step, dst1 -= dst_step )
03155     {
03156         T t0 = dct_wave->re*src[j*2-1] - dct_wave->im*src[j*2];
03157         T t1 = -dct_wave->im*src[j*2-1] - dct_wave->re*src[j*2];
03158         dst[0] = t0;
03159         dst1[0] = t1;
03160     }
03161 
03162     dst[0] = src[n-1]*dct_wave->re;
03163 }
03164 
03165 
03166 template<typename T> static void
03167 IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
03168       int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave,
03169       const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
03170 {
03171     static const T sin_45 = (T)0.70710678118654752440084436210485;
03172     int j, n2 = n >> 1;
03173 
03174     src_step /= sizeof(src[0]);
03175     dst_step /= sizeof(dst[0]);
03176     const T* src1 = src + (n-1)*src_step;
03177 
03178     if( n == 1 )
03179     {
03180         dst[0] = src[0];
03181         return;
03182     }
03183 
03184     dft_src[0] = (T)(src[0]*2*dct_wave->re*sin_45);
03185     src += src_step;
03186     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
03187                                     src += src_step, src1 -= src_step )
03188     {
03189         T t0 = dct_wave->re*src[0] - dct_wave->im*src1[0];
03190         T t1 = -dct_wave->im*src[0] - dct_wave->re*src1[0];
03191         dft_src[j*2-1] = t0;
03192         dft_src[j*2] = t1;
03193     }
03194 
03195     dft_src[n-1] = (T)(src[0]*2*dct_wave->re);
03196     CCSIDFT( dft_src, dft_dst, n, nf, factors, itab,
03197              dft_wave, n, spec, buf, 0, 1.0 );
03198 
03199     for( j = 0; j < n2; j++, dst += dst_step*2 )
03200     {
03201         dst[0] = dft_dst[j];
03202         dst[dst_step] = dft_dst[n-j-1];
03203     }
03204 }
03205 
03206 
03207 static void
03208 DCTInit( int n, int elem_size, void* _wave, int inv )
03209 {
03210     static const double DctScale[] =
03211     {
03212     0.707106781186547570, 0.500000000000000000, 0.353553390593273790,
03213     0.250000000000000000, 0.176776695296636890, 0.125000000000000000,
03214     0.088388347648318447, 0.062500000000000000, 0.044194173824159223,
03215     0.031250000000000000, 0.022097086912079612, 0.015625000000000000,
03216     0.011048543456039806, 0.007812500000000000, 0.005524271728019903,
03217     0.003906250000000000, 0.002762135864009952, 0.001953125000000000,
03218     0.001381067932004976, 0.000976562500000000, 0.000690533966002488,
03219     0.000488281250000000, 0.000345266983001244, 0.000244140625000000,
03220     0.000172633491500622, 0.000122070312500000, 0.000086316745750311,
03221     0.000061035156250000, 0.000043158372875155, 0.000030517578125000
03222     };
03223 
03224     int i;
03225     Complex<double> w, w1;
03226     double t, scale;
03227 
03228     if( n == 1 )
03229         return;
03230 
03231     assert( (n&1) == 0 );
03232 
03233     if( (n & (n - 1)) == 0 )
03234     {
03235         int m;
03236         for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
03237             ;
03238         scale = (!inv ? 2 : 1)*DctScale[m];
03239         w1.re = DFTTab[m+2][0];
03240         w1.im = -DFTTab[m+2][1];
03241     }
03242     else
03243     {
03244         t = 1./(2*n);
03245         scale = (!inv ? 2 : 1)*std::sqrt(t);
03246         w1.im = sin(-CV_PI*t);
03247         w1.re = std::sqrt(1. - w1.im*w1.im);
03248     }
03249     n >>= 1;
03250 
03251     if( elem_size == sizeof(Complex<double>) )
03252     {
03253         Complex<double>* wave = (Complex<double>*)_wave;
03254 
03255         w.re = scale;
03256         w.im = 0.;
03257 
03258         for( i = 0; i <= n; i++ )
03259         {
03260             wave[i] = w;
03261             t = w.re*w1.re - w.im*w1.im;
03262             w.im = w.re*w1.im + w.im*w1.re;
03263             w.re = t;
03264         }
03265     }
03266     else
03267     {
03268         Complex<float>* wave = (Complex<float>*)_wave;
03269         assert( elem_size == sizeof(Complex<float>) );
03270 
03271         w.re = (float)scale;
03272         w.im = 0.f;
03273 
03274         for( i = 0; i <= n; i++ )
03275         {
03276             wave[i].re = (float)w.re;
03277             wave[i].im = (float)w.im;
03278             t = w.re*w1.re - w.im*w1.im;
03279             w.im = w.re*w1.im + w.im*w1.re;
03280             w.re = t;
03281         }
03282     }
03283 }
03284 
03285 
03286 typedef void (*DCTFunc)(const void* src, int src_step, void* dft_src,
03287                         void* dft_dst, void* dst, int dst_step, int n,
03288                         int nf, int* factors, const int* itab, const void* dft_wave,
03289                         const void* dct_wave, const void* spec, void* buf );
03290 
03291 static void DCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst,
03292                     float* dst, int dst_step, int n, int nf, int* factors, const int* itab,
03293                     const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
03294 {
03295     DCT(src, src_step, dft_src, dft_dst, dst, dst_step,
03296         n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
03297 }
03298 
03299 static void IDCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst,
03300                     float* dst, int dst_step, int n, int nf, int* factors, const int* itab,
03301                     const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
03302 {
03303     IDCT(src, src_step, dft_src, dft_dst, dst, dst_step,
03304          n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
03305 }
03306 
03307 static void DCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst,
03308                     double* dst, int dst_step, int n, int nf, int* factors, const int* itab,
03309                     const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
03310 {
03311     DCT(src, src_step, dft_src, dft_dst, dst, dst_step,
03312         n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
03313 }
03314 
03315 static void IDCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst,
03316                      double* dst, int dst_step, int n, int nf, int* factors, const int* itab,
03317                      const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
03318 {
03319     IDCT(src, src_step, dft_src, dft_dst, dst, dst_step,
03320          n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
03321 }
03322 
03323 }
03324 
03325 #ifdef HAVE_IPP
03326 namespace cv
03327 {
03328 
03329 #if IPP_VERSION_X100 >= 900
03330 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer);
03331 typedef IppStatus (CV_STDCALL * ippiDCTInit)(void* pDCTSpec, IppiSize roiSize, Ipp8u* pMemInit );
03332 typedef IppStatus (CV_STDCALL * ippiDCTGetSize)(IppiSize roiSize, int* pSizeSpec, int* pSizeInit, int* pSizeBuf);
03333 #elif IPP_VERSION_X100 >= 700
03334 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*);
03335 typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
03336 typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
03337 typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
03338 #endif
03339 
03340 class DctIPPLoop_Invoker : public ParallelLoopBody
03341 {
03342 public:
03343     DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, bool _inv, bool *_ok) :
03344         ParallelLoopBody(), src(&_src), dst(&_dst), inv(_inv), ok(_ok)
03345     {
03346         *ok = true;
03347     }
03348 
03349     virtual void operator()(const Range& range) const
03350     {
03351         if(*ok == false)
03352             return;
03353 
03354 #if IPP_VERSION_X100 >= 900
03355         IppiSize srcRoiSize = {src->cols, 1};
03356 
03357         int specSize    = 0;
03358         int initSize    = 0;
03359         int bufferSize  = 0;
03360 
03361         Ipp8u* pDCTSpec = NULL;
03362         Ipp8u* pBuffer  = NULL;
03363         Ipp8u* pInitBuf = NULL;
03364 
03365         #define IPP_RETURN              \
03366             if(pDCTSpec)                \
03367                 ippFree(pDCTSpec);      \
03368             if(pBuffer)                 \
03369                 ippFree(pBuffer);       \
03370             if(pInitBuf)                \
03371                 ippFree(pInitBuf);      \
03372             return;
03373 
03374         ippiDCTFunc     ippDctFun   = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R         : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
03375         ippiDCTInit     ippDctInit     = inv ? (ippiDCTInit)ippiDCTInvInit_32f         : (ippiDCTInit)ippiDCTFwdInit_32f;
03376         ippiDCTGetSize  ippDctGetSize  = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f   : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
03377 
03378         if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
03379         {
03380             *ok = false;
03381             return;
03382         }
03383 
03384         pDCTSpec = (Ipp8u*)ippMalloc(specSize);
03385         if(!pDCTSpec && specSize)
03386         {
03387             *ok = false;
03388             return;
03389         }
03390 
03391         pBuffer  = (Ipp8u*)ippMalloc(bufferSize);
03392         if(!pBuffer && bufferSize)
03393         {
03394             *ok = false;
03395             IPP_RETURN
03396         }
03397         pInitBuf = (Ipp8u*)ippMalloc(initSize);
03398         if(!pInitBuf && initSize)
03399         {
03400             *ok = false;
03401             IPP_RETURN
03402         }
03403 
03404         if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
03405         {
03406             *ok = false;
03407             IPP_RETURN
03408         }
03409 
03410         for(int i = range.start; i < range.end; ++i)
03411         {
03412             if(ippDctFun(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, pBuffer) < 0)
03413             {
03414                 *ok = false;
03415                 IPP_RETURN
03416             }
03417         }
03418         IPP_RETURN
03419 #undef IPP_RETURN
03420 #elif IPP_VERSION_X100 >= 700
03421         void* pDCTSpec;
03422         AutoBuffer<uchar> buf;
03423         uchar* pBuffer = 0;
03424         int bufSize=0;
03425 
03426         IppiSize srcRoiSize = {src->cols, 1};
03427 
03428         CV_SUPPRESS_DEPRECATED_START
03429 
03430         ippiDCTFunc ippDctFun           = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R             : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
03431         ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
03432         ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
03433         ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
03434 
03435         if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
03436         {
03437             buf.allocate( bufSize );
03438             pBuffer = (uchar*)buf;
03439 
03440             for( int i = range.start; i < range.end; ++i)
03441             {
03442                 if(ippDctFun(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer) < 0)
03443                 {
03444                     *ok = false;
03445                     break;
03446                 }
03447             }
03448         }
03449         else
03450             *ok = false;
03451 
03452         if (pDCTSpec)
03453             ippFree(pDCTSpec);
03454 
03455         CV_SUPPRESS_DEPRECATED_END
03456 #else
03457         CV_UNUSED(range);
03458         *ok = false;
03459 #endif
03460     }
03461 
03462 private:
03463     const Mat* src;
03464     Mat* dst;
03465     bool inv;
03466     bool *ok;
03467 };
03468 
03469 static bool DctIPPLoop(const Mat& src, Mat& dst, bool inv)
03470 {
03471     bool ok;
03472     parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker(src, dst, inv, &ok), src.rows/(double)(1<<4) );
03473     return ok;
03474 }
03475 
03476 static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
03477 {
03478     if(row)
03479         return DctIPPLoop(src, dst, inv);
03480     else
03481     {
03482 #if IPP_VERSION_X100 >= 900
03483         IppiSize srcRoiSize = {src.cols, src.rows};
03484 
03485         int specSize    = 0;
03486         int initSize    = 0;
03487         int bufferSize  = 0;
03488 
03489         Ipp8u* pDCTSpec = NULL;
03490         Ipp8u* pBuffer  = NULL;
03491         Ipp8u* pInitBuf = NULL;
03492 
03493         #define IPP_RELEASE             \
03494             if(pDCTSpec)                \
03495                 ippFree(pDCTSpec);      \
03496             if(pBuffer)                 \
03497                 ippFree(pBuffer);       \
03498             if(pInitBuf)                \
03499                 ippFree(pInitBuf);      \
03500 
03501         ippiDCTFunc     ippDctFun      = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R         : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
03502         ippiDCTInit     ippDctInit     = inv ? (ippiDCTInit)ippiDCTInvInit_32f         : (ippiDCTInit)ippiDCTFwdInit_32f;
03503         ippiDCTGetSize  ippDctGetSize  = inv ? (ippiDCTGetSize)ippiDCTInvGetSize_32f   : (ippiDCTGetSize)ippiDCTFwdGetSize_32f;
03504 
03505         if(ippDctGetSize(srcRoiSize, &specSize, &initSize, &bufferSize) < 0)
03506             return false;
03507 
03508         pDCTSpec = (Ipp8u*)ippMalloc(specSize);
03509         if(!pDCTSpec && specSize)
03510             return false;
03511 
03512         pBuffer  = (Ipp8u*)ippMalloc(bufferSize);
03513         if(!pBuffer && bufferSize)
03514         {
03515             IPP_RELEASE
03516             return false;
03517         }
03518         pInitBuf = (Ipp8u*)ippMalloc(initSize);
03519         if(!pInitBuf && initSize)
03520         {
03521             IPP_RELEASE
03522             return false;
03523         }
03524 
03525         if(ippDctInit(pDCTSpec, srcRoiSize, pInitBuf) < 0)
03526         {
03527             IPP_RELEASE
03528             return false;
03529         }
03530 
03531         if(ippDctFun(src.ptr<float>(), (int)src.step,dst.ptr<float>(), (int)dst.step, pDCTSpec, pBuffer) < 0)
03532         {
03533             IPP_RELEASE
03534             return false;
03535         }
03536 
03537         IPP_RELEASE
03538         return true;
03539 #undef IPP_RELEASE
03540 #elif IPP_VERSION_X100 >= 700
03541         IppStatus status;
03542         void* pDCTSpec;
03543         AutoBuffer<uchar> buf;
03544         uchar* pBuffer = 0;
03545         int bufSize=0;
03546 
03547         IppiSize srcRoiSize = {src.cols, src.rows};
03548 
03549         CV_SUPPRESS_DEPRECATED_START
03550 
03551         ippiDCTFunc ippDctFun           = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R             : (ippiDCTFunc)ippiDCTFwd_32f_C1R;
03552         ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
03553         ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
03554         ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
03555 
03556         status = ippStsErr;
03557 
03558         if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
03559         {
03560             buf.allocate( bufSize );
03561             pBuffer = (uchar*)buf;
03562 
03563             status = ippDctFun(src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer);
03564         }
03565 
03566         if (pDCTSpec)
03567             ippFree(pDCTSpec);
03568 
03569         CV_SUPPRESS_DEPRECATED_END
03570 
03571         return status >= 0;
03572 #else
03573         CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(inv); CV_UNUSED(row);
03574         return false;
03575 #endif
03576     }
03577 }
03578 }
03579 #endif
03580 
03581 void cv::dct( InputArray _src0, OutputArray _dst, int flags )
03582 {
03583     static DCTFunc dct_tbl[4] =
03584     {
03585         (DCTFunc)DCT_32f,
03586         (DCTFunc)IDCT_32f,
03587         (DCTFunc)DCT_64f,
03588         (DCTFunc)IDCT_64f
03589     };
03590 
03591     bool inv = (flags & DCT_INVERSE) != 0;
03592     Mat src0 = _src0.getMat(), src = src0;
03593     int type = src.type(), depth = src.depth();
03594     void *spec = 0;
03595 
03596     double scale = 1.;
03597     int prev_len = 0, nf = 0, stage, end_stage;
03598     uchar *src_dft_buf = 0, *dst_dft_buf = 0;
03599     uchar *dft_wave = 0, *dct_wave = 0;
03600     int* itab = 0;
03601     uchar* ptr = 0;
03602     int elem_size = (int)src.elemSize(), complex_elem_size = elem_size*2;
03603     int factors[34], inplace_transform;
03604     int i, len, count;
03605     AutoBuffer<uchar>  buf;
03606 
03607     CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
03608     _dst.create( src.rows, src.cols, type );
03609     Mat dst = _dst.getMat();
03610 
03611     CV_IPP_RUN(IPP_VERSION_X100 >= 700 && src.type() == CV_32F, ippi_DCT_32f(src, dst, inv, ((flags & DCT_ROWS) != 0)))
03612 
03613     DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
03614 
03615     if( (flags & DCT_ROWS) || src.rows == 1 ||
03616         (src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
03617     {
03618         stage = end_stage = 0;
03619     }
03620     else
03621     {
03622         stage = src.cols == 1;
03623         end_stage = 1;
03624     }
03625 
03626     for( ; stage <= end_stage; stage++ )
03627     {
03628         const uchar* sptr = src.ptr();
03629         uchar* dptr = dst.ptr();
03630         size_t sstep0, sstep1, dstep0, dstep1;
03631 
03632         if( stage == 0 )
03633         {
03634             len = src.cols;
03635             count = src.rows;
03636             if( len == 1 && !(flags & DCT_ROWS) )
03637             {
03638                 len = src.rows;
03639                 count = 1;
03640             }
03641             sstep0 = src.step;
03642             dstep0 = dst.step;
03643             sstep1 = dstep1 = elem_size;
03644         }
03645         else
03646         {
03647             len = dst.rows;
03648             count = dst.cols;
03649             sstep1 = src.step;
03650             dstep1 = dst.step;
03651             sstep0 = dstep0 = elem_size;
03652         }
03653 
03654         if( len != prev_len )
03655         {
03656             int sz;
03657 
03658             if( len > 1 && (len & 1) )
03659                 CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
03660 
03661             sz = len*elem_size;
03662             sz += (len/2 + 1)*complex_elem_size;
03663 
03664             spec = 0;
03665             inplace_transform = 1;
03666             {
03667                 sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size;
03668 
03669                 nf = DFTFactorize( len, factors );
03670                 inplace_transform = factors[0] == factors[nf-1];
03671 
03672                 i = nf > 1 && (factors[0] & 1) == 0;
03673                 if( (factors[i] & 1) != 0 && factors[i] > 5 )
03674                     sz += (factors[i]+1)*complex_elem_size;
03675 
03676                 if( !inplace_transform )
03677                     sz += len*elem_size;
03678             }
03679 
03680             buf.allocate( sz + 32 );
03681             ptr = (uchar*)buf;
03682 
03683             if( !spec )
03684             {
03685                 dft_wave = ptr;
03686                 ptr += len*complex_elem_size;
03687                 itab = (int*)ptr;
03688                 ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
03689                 DFTInit( len, nf, factors, itab, complex_elem_size, dft_wave, inv );
03690             }
03691 
03692             dct_wave = ptr;
03693             ptr += (len/2 + 1)*complex_elem_size;
03694             src_dft_buf = dst_dft_buf = ptr;
03695             ptr += len*elem_size;
03696             if( !inplace_transform )
03697             {
03698                 dst_dft_buf = ptr;
03699                 ptr += len*elem_size;
03700             }
03701             DCTInit( len, complex_elem_size, dct_wave, inv );
03702             if( !inv )
03703                 scale += scale;
03704             prev_len = len;
03705         }
03706         // otherwise reuse the tables calculated on the previous stage
03707         for( i = 0; i < count; i++ )
03708         {
03709             dct_func( sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf,
03710                       dptr + i*dstep0, (int)dstep1, len, nf, factors,
03711                       itab, dft_wave, dct_wave, spec, ptr );
03712         }
03713         src = dst;
03714     }
03715 }
03716 
03717 
03718 void cv::idct( InputArray src, OutputArray dst, int flags )
03719 {
03720     dct( src, dst, flags | DCT_INVERSE );
03721 }
03722 
03723 namespace cv
03724 {
03725 
03726 static const int optimalDFTSizeTab[] = {
03727 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40, 45, 48,
03728 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160,
03729 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375,
03730 384, 400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720,
03731 729, 750, 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, 1200,
03732 1215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875,
03733 1920, 1944, 2000, 2025, 2048, 2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592,
03734 2700, 2880, 2916, 3000, 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840,
03735 3888, 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400,
03736 5625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290,
03737 7500, 7680, 7776, 8000, 8100, 8192, 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000,
03738 10125, 10240, 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, 12500,
03739 12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, 15360, 15552, 15625, 16000,
03740 16200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, 19440, 19683,
03741 20000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576,
03742 25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720,
03743 31104, 31250, 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864,
03744 37500, 38400, 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080,
03745 46656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, 51840, 52488, 54000, 54675,
03746 55296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, 62208, 62500, 64000, 64800,
03747 65536, 65610, 67500, 69120, 69984, 72000, 72900, 73728, 75000, 76800, 77760, 78125,
03748 78732, 80000, 81000, 81920, 82944, 84375, 86400, 87480, 90000, 91125, 92160, 93312,
03749 93750, 96000, 97200, 98304, 98415, 100000, 101250, 102400, 103680, 104976, 108000,
03750 109350, 110592, 112500, 115200, 116640, 118098, 120000, 121500, 122880, 124416, 125000,
03751 128000, 129600, 131072, 131220, 135000, 138240, 139968, 140625, 144000, 145800, 147456,
03752 150000, 151875, 153600, 155520, 156250, 157464, 160000, 162000, 163840, 164025, 165888,
03753 168750, 172800, 174960, 177147, 180000, 182250, 184320, 186624, 187500, 192000, 194400,
03754 196608, 196830, 200000, 202500, 204800, 207360, 209952, 216000, 218700, 221184, 225000,
03755 230400, 233280, 234375, 236196, 240000, 243000, 245760, 248832, 250000, 253125, 256000,
03756 259200, 262144, 262440, 270000, 273375, 276480, 279936, 281250, 288000, 291600, 294912,
03757 295245, 300000, 303750, 307200, 311040, 312500, 314928, 320000, 324000, 327680, 328050,
03758 331776, 337500, 345600, 349920, 354294, 360000, 364500, 368640, 373248, 375000, 384000,
03759 388800, 390625, 393216, 393660, 400000, 405000, 409600, 414720, 419904, 421875, 432000,
03760 437400, 442368, 450000, 455625, 460800, 466560, 468750, 472392, 480000, 486000, 491520,
03761 492075, 497664, 500000, 506250, 512000, 518400, 524288, 524880, 531441, 540000, 546750,
03762 552960, 559872, 562500, 576000, 583200, 589824, 590490, 600000, 607500, 614400, 622080,
03763 625000, 629856, 640000, 648000, 655360, 656100, 663552, 675000, 691200, 699840, 703125,
03764 708588, 720000, 729000, 737280, 746496, 750000, 759375, 768000, 777600, 781250, 786432,
03765 787320, 800000, 810000, 819200, 820125, 829440, 839808, 843750, 864000, 874800, 884736,
03766 885735, 900000, 911250, 921600, 933120, 937500, 944784, 960000, 972000, 983040, 984150,
03767 995328, 1000000, 1012500, 1024000, 1036800, 1048576, 1049760, 1062882, 1080000, 1093500,
03768 1105920, 1119744, 1125000, 1152000, 1166400, 1171875, 1179648, 1180980, 1200000,
03769 1215000, 1228800, 1244160, 1250000, 1259712, 1265625, 1280000, 1296000, 1310720,
03770 1312200, 1327104, 1350000, 1366875, 1382400, 1399680, 1406250, 1417176, 1440000,
03771 1458000, 1474560, 1476225, 1492992, 1500000, 1518750, 1536000, 1555200, 1562500,
03772 1572864, 1574640, 1594323, 1600000, 1620000, 1638400, 1640250, 1658880, 1679616,
03773 1687500, 1728000, 1749600, 1769472, 1771470, 1800000, 1822500, 1843200, 1866240,
03774 1875000, 1889568, 1920000, 1944000, 1953125, 1966080, 1968300, 1990656, 2000000,
03775 2025000, 2048000, 2073600, 2097152, 2099520, 2109375, 2125764, 2160000, 2187000,
03776 2211840, 2239488, 2250000, 2278125, 2304000, 2332800, 2343750, 2359296, 2361960,
03777 2400000, 2430000, 2457600, 2460375, 2488320, 2500000, 2519424, 2531250, 2560000,
03778 2592000, 2621440, 2624400, 2654208, 2657205, 2700000, 2733750, 2764800, 2799360,
03779 2812500, 2834352, 2880000, 2916000, 2949120, 2952450, 2985984, 3000000, 3037500,
03780 3072000, 3110400, 3125000, 3145728, 3149280, 3188646, 3200000, 3240000, 3276800,
03781 3280500, 3317760, 3359232, 3375000, 3456000, 3499200, 3515625, 3538944, 3542940,
03782 3600000, 3645000, 3686400, 3732480, 3750000, 3779136, 3796875, 3840000, 3888000,
03783 3906250, 3932160, 3936600, 3981312, 4000000, 4050000, 4096000, 4100625, 4147200,
03784 4194304, 4199040, 4218750, 4251528, 4320000, 4374000, 4423680, 4428675, 4478976,
03785 4500000, 4556250, 4608000, 4665600, 4687500, 4718592, 4723920, 4782969, 4800000,
03786 4860000, 4915200, 4920750, 4976640, 5000000, 5038848, 5062500, 5120000, 5184000,
03787 5242880, 5248800, 5308416, 5314410, 5400000, 5467500, 5529600, 5598720, 5625000,
03788 5668704, 5760000, 5832000, 5859375, 5898240, 5904900, 5971968, 6000000, 6075000,
03789 6144000, 6220800, 6250000, 6291456, 6298560, 6328125, 6377292, 6400000, 6480000,
03790 6553600, 6561000, 6635520, 6718464, 6750000, 6834375, 6912000, 6998400, 7031250,
03791 7077888, 7085880, 7200000, 7290000, 7372800, 7381125, 7464960, 7500000, 7558272,
03792 7593750, 7680000, 7776000, 7812500, 7864320, 7873200, 7962624, 7971615, 8000000,
03793 8100000, 8192000, 8201250, 8294400, 8388608, 8398080, 8437500, 8503056, 8640000,
03794 8748000, 8847360, 8857350, 8957952, 9000000, 9112500, 9216000, 9331200, 9375000,
03795 9437184, 9447840, 9565938, 9600000, 9720000, 9765625, 9830400, 9841500, 9953280,
03796 10000000, 10077696, 10125000, 10240000, 10368000, 10485760, 10497600, 10546875, 10616832,
03797 10628820, 10800000, 10935000, 11059200, 11197440, 11250000, 11337408, 11390625, 11520000,
03798 11664000, 11718750, 11796480, 11809800, 11943936, 12000000, 12150000, 12288000, 12301875,
03799 12441600, 12500000, 12582912, 12597120, 12656250, 12754584, 12800000, 12960000, 13107200,
03800 13122000, 13271040, 13286025, 13436928, 13500000, 13668750, 13824000, 13996800, 14062500,
03801 14155776, 14171760, 14400000, 14580000, 14745600, 14762250, 14929920, 15000000, 15116544,
03802 15187500, 15360000, 15552000, 15625000, 15728640, 15746400, 15925248, 15943230, 16000000,
03803 16200000, 16384000, 16402500, 16588800, 16777216, 16796160, 16875000, 17006112, 17280000,
03804 17496000, 17578125, 17694720, 17714700, 17915904, 18000000, 18225000, 18432000, 18662400,
03805 18750000, 18874368, 18895680, 18984375, 19131876, 19200000, 19440000, 19531250, 19660800,
03806 19683000, 19906560, 20000000, 20155392, 20250000, 20480000, 20503125, 20736000, 20971520,
03807 20995200, 21093750, 21233664, 21257640, 21600000, 21870000, 22118400, 22143375, 22394880,
03808 22500000, 22674816, 22781250, 23040000, 23328000, 23437500, 23592960, 23619600, 23887872,
03809 23914845, 24000000, 24300000, 24576000, 24603750, 24883200, 25000000, 25165824, 25194240,
03810 25312500, 25509168, 25600000, 25920000, 26214400, 26244000, 26542080, 26572050, 26873856,
03811 27000000, 27337500, 27648000, 27993600, 28125000, 28311552, 28343520, 28800000, 29160000,
03812 29296875, 29491200, 29524500, 29859840, 30000000, 30233088, 30375000, 30720000, 31104000,
03813 31250000, 31457280, 31492800, 31640625, 31850496, 31886460, 32000000, 32400000, 32768000,
03814 32805000, 33177600, 33554432, 33592320, 33750000, 34012224, 34171875, 34560000, 34992000,
03815 35156250, 35389440, 35429400, 35831808, 36000000, 36450000, 36864000, 36905625, 37324800,
03816 37500000, 37748736, 37791360, 37968750, 38263752, 38400000, 38880000, 39062500, 39321600,
03817 39366000, 39813120, 39858075, 40000000, 40310784, 40500000, 40960000, 41006250, 41472000,
03818 41943040, 41990400, 42187500, 42467328, 42515280, 43200000, 43740000, 44236800, 44286750,
03819 44789760, 45000000, 45349632, 45562500, 46080000, 46656000, 46875000, 47185920, 47239200,
03820 47775744, 47829690, 48000000, 48600000, 48828125, 49152000, 49207500, 49766400, 50000000,
03821 50331648, 50388480, 50625000, 51018336, 51200000, 51840000, 52428800, 52488000, 52734375,
03822 53084160, 53144100, 53747712, 54000000, 54675000, 55296000, 55987200, 56250000, 56623104,
03823 56687040, 56953125, 57600000, 58320000, 58593750, 58982400, 59049000, 59719680, 60000000,
03824 60466176, 60750000, 61440000, 61509375, 62208000, 62500000, 62914560, 62985600, 63281250,
03825 63700992, 63772920, 64000000, 64800000, 65536000, 65610000, 66355200, 66430125, 67108864,
03826 67184640, 67500000, 68024448, 68343750, 69120000, 69984000, 70312500, 70778880, 70858800,
03827 71663616, 72000000, 72900000, 73728000, 73811250, 74649600, 75000000, 75497472, 75582720,
03828 75937500, 76527504, 76800000, 77760000, 78125000, 78643200, 78732000, 79626240, 79716150,
03829 80000000, 80621568, 81000000, 81920000, 82012500, 82944000, 83886080, 83980800, 84375000,
03830 84934656, 85030560, 86400000, 87480000, 87890625, 88473600, 88573500, 89579520, 90000000,
03831 90699264, 91125000, 92160000, 93312000, 93750000, 94371840, 94478400, 94921875, 95551488,
03832 95659380, 96000000, 97200000, 97656250, 98304000, 98415000, 99532800, 100000000,
03833 100663296, 100776960, 101250000, 102036672, 102400000, 102515625, 103680000, 104857600,
03834 104976000, 105468750, 106168320, 106288200, 107495424, 108000000, 109350000, 110592000,
03835 110716875, 111974400, 112500000, 113246208, 113374080, 113906250, 115200000, 116640000,
03836 117187500, 117964800, 118098000, 119439360, 119574225, 120000000, 120932352, 121500000,
03837 122880000, 123018750, 124416000, 125000000, 125829120, 125971200, 126562500, 127401984,
03838 127545840, 128000000, 129600000, 131072000, 131220000, 132710400, 132860250, 134217728,
03839 134369280, 135000000, 136048896, 136687500, 138240000, 139968000, 140625000, 141557760,
03840 141717600, 143327232, 144000000, 145800000, 146484375, 147456000, 147622500, 149299200,
03841 150000000, 150994944, 151165440, 151875000, 153055008, 153600000, 155520000, 156250000,
03842 157286400, 157464000, 158203125, 159252480, 159432300, 160000000, 161243136, 162000000,
03843 163840000, 164025000, 165888000, 167772160, 167961600, 168750000, 169869312, 170061120,
03844 170859375, 172800000, 174960000, 175781250, 176947200, 177147000, 179159040, 180000000,
03845 181398528, 182250000, 184320000, 184528125, 186624000, 187500000, 188743680, 188956800,
03846 189843750, 191102976, 191318760, 192000000, 194400000, 195312500, 196608000, 196830000,
03847 199065600, 199290375, 200000000, 201326592, 201553920, 202500000, 204073344, 204800000,
03848 205031250, 207360000, 209715200, 209952000, 210937500, 212336640, 212576400, 214990848,
03849 216000000, 218700000, 221184000, 221433750, 223948800, 225000000, 226492416, 226748160,
03850 227812500, 230400000, 233280000, 234375000, 235929600, 236196000, 238878720, 239148450,
03851 240000000, 241864704, 243000000, 244140625, 245760000, 246037500, 248832000, 250000000,
03852 251658240, 251942400, 253125000, 254803968, 255091680, 256000000, 259200000, 262144000,
03853 262440000, 263671875, 265420800, 265720500, 268435456, 268738560, 270000000, 272097792,
03854 273375000, 276480000, 279936000, 281250000, 283115520, 283435200, 284765625, 286654464,
03855 288000000, 291600000, 292968750, 294912000, 295245000, 298598400, 300000000, 301989888,
03856 302330880, 303750000, 306110016, 307200000, 307546875, 311040000, 312500000, 314572800,
03857 314928000, 316406250, 318504960, 318864600, 320000000, 322486272, 324000000, 327680000,
03858 328050000, 331776000, 332150625, 335544320, 335923200, 337500000, 339738624, 340122240,
03859 341718750, 345600000, 349920000, 351562500, 353894400, 354294000, 358318080, 360000000,
03860 362797056, 364500000, 368640000, 369056250, 373248000, 375000000, 377487360, 377913600,
03861 379687500, 382205952, 382637520, 384000000, 388800000, 390625000, 393216000, 393660000,
03862 398131200, 398580750, 400000000, 402653184, 403107840, 405000000, 408146688, 409600000,
03863 410062500, 414720000, 419430400, 419904000, 421875000, 424673280, 425152800, 429981696,
03864 432000000, 437400000, 439453125, 442368000, 442867500, 447897600, 450000000, 452984832,
03865 453496320, 455625000, 460800000, 466560000, 468750000, 471859200, 472392000, 474609375,
03866 477757440, 478296900, 480000000, 483729408, 486000000, 488281250, 491520000, 492075000,
03867 497664000, 500000000, 503316480, 503884800, 506250000, 509607936, 510183360, 512000000,
03868 512578125, 518400000, 524288000, 524880000, 527343750, 530841600, 531441000, 536870912,
03869 537477120, 540000000, 544195584, 546750000, 552960000, 553584375, 559872000, 562500000,
03870 566231040, 566870400, 569531250, 573308928, 576000000, 583200000, 585937500, 589824000,
03871 590490000, 597196800, 597871125, 600000000, 603979776, 604661760, 607500000, 612220032,
03872 614400000, 615093750, 622080000, 625000000, 629145600, 629856000, 632812500, 637009920,
03873 637729200, 640000000, 644972544, 648000000, 655360000, 656100000, 663552000, 664301250,
03874 671088640, 671846400, 675000000, 679477248, 680244480, 683437500, 691200000, 699840000,
03875 703125000, 707788800, 708588000, 716636160, 720000000, 725594112, 729000000, 732421875,
03876 737280000, 738112500, 746496000, 750000000, 754974720, 755827200, 759375000, 764411904,
03877 765275040, 768000000, 777600000, 781250000, 786432000, 787320000, 791015625, 796262400,
03878 797161500, 800000000, 805306368, 806215680, 810000000, 816293376, 819200000, 820125000,
03879 829440000, 838860800, 839808000, 843750000, 849346560, 850305600, 854296875, 859963392,
03880 864000000, 874800000, 878906250, 884736000, 885735000, 895795200, 900000000, 905969664,
03881 906992640, 911250000, 921600000, 922640625, 933120000, 937500000, 943718400, 944784000,
03882 949218750, 955514880, 956593800, 960000000, 967458816, 972000000, 976562500, 983040000,
03883 984150000, 995328000, 996451875, 1000000000, 1006632960, 1007769600, 1012500000,
03884 1019215872, 1020366720, 1024000000, 1025156250, 1036800000, 1048576000, 1049760000,
03885 1054687500, 1061683200, 1062882000, 1073741824, 1074954240, 1080000000, 1088391168,
03886 1093500000, 1105920000, 1107168750, 1119744000, 1125000000, 1132462080, 1133740800,
03887 1139062500, 1146617856, 1152000000, 1166400000, 1171875000, 1179648000, 1180980000,
03888 1194393600, 1195742250, 1200000000, 1207959552, 1209323520, 1215000000, 1220703125,
03889 1224440064, 1228800000, 1230187500, 1244160000, 1250000000, 1258291200, 1259712000,
03890 1265625000, 1274019840, 1275458400, 1280000000, 1289945088, 1296000000, 1310720000,
03891 1312200000, 1318359375, 1327104000, 1328602500, 1342177280, 1343692800, 1350000000,
03892 1358954496, 1360488960, 1366875000, 1382400000, 1399680000, 1406250000, 1415577600,
03893 1417176000, 1423828125, 1433272320, 1440000000, 1451188224, 1458000000, 1464843750,
03894 1474560000, 1476225000, 1492992000, 1500000000, 1509949440, 1511654400, 1518750000,
03895 1528823808, 1530550080, 1536000000, 1537734375, 1555200000, 1562500000, 1572864000,
03896 1574640000, 1582031250, 1592524800, 1594323000, 1600000000, 1610612736, 1612431360,
03897 1620000000, 1632586752, 1638400000, 1640250000, 1658880000, 1660753125, 1677721600,
03898 1679616000, 1687500000, 1698693120, 1700611200, 1708593750, 1719926784, 1728000000,
03899 1749600000, 1757812500, 1769472000, 1771470000, 1791590400, 1800000000, 1811939328,
03900 1813985280, 1822500000, 1843200000, 1845281250, 1866240000, 1875000000, 1887436800,
03901 1889568000, 1898437500, 1911029760, 1913187600, 1920000000, 1934917632, 1944000000,
03902 1953125000, 1966080000, 1968300000, 1990656000, 1992903750, 2000000000, 2013265920,
03903 2015539200, 2025000000, 2038431744, 2040733440, 2048000000, 2050312500, 2073600000,
03904 2097152000, 2099520000, 2109375000, 2123366400, 2125764000
03905 };
03906 
03907 }
03908 
03909 int cv::getOptimalDFTSize( int size0 )
03910 {
03911     int a = 0, b = sizeof(optimalDFTSizeTab)/sizeof(optimalDFTSizeTab[0]) - 1;
03912     if( (unsigned)size0 >= (unsigned)optimalDFTSizeTab[b] )
03913         return -1;
03914 
03915     while( a < b )
03916     {
03917         int c = (a + b) >> 1;
03918         if( size0 <= optimalDFTSizeTab[c] )
03919             b = c;
03920         else
03921             a = c+1;
03922     }
03923 
03924     return optimalDFTSizeTab[b];
03925 }
03926 
03927 CV_IMPL void
03928 cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
03929 {
03930     cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
03931     int _flags = ((flags & CV_DXT_INVERSE) ? cv::DFT_INVERSE : 0) |
03932         ((flags & CV_DXT_SCALE) ? cv::DFT_SCALE : 0) |
03933         ((flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0);
03934 
03935     CV_Assert( src.size == dst.size );
03936 
03937     if( src.type() != dst.type() )
03938     {
03939         if( dst.channels() == 2 )
03940             _flags |= cv::DFT_COMPLEX_OUTPUT;
03941         else
03942             _flags |= cv::DFT_REAL_OUTPUT;
03943     }
03944 
03945     cv::dft( src, dst, _flags, nonzero_rows );
03946     CV_Assert( dst.data == dst0.data ); // otherwise it means that the destination size or type was incorrect
03947 }
03948 
03949 
03950 CV_IMPL void
03951 cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr,
03952                 CvArr* dstarr, int flags )
03953 {
03954     cv::Mat srcA = cv::cvarrToMat(srcAarr),
03955         srcB = cv::cvarrToMat(srcBarr),
03956         dst = cv::cvarrToMat(dstarr);
03957     CV_Assert( srcA.size == dst.size && srcA.type() == dst.type() );
03958 
03959     cv::mulSpectrums(srcA, srcB, dst,
03960         (flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0,
03961         (flags & CV_DXT_MUL_CONJ) != 0 );
03962 }
03963 
03964 
03965 CV_IMPL void
03966 cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags )
03967 {
03968     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
03969     CV_Assert( src.size == dst.size && src.type() == dst.type() );
03970     int _flags = ((flags & CV_DXT_INVERSE) ? cv::DCT_INVERSE : 0) |
03971             ((flags & CV_DXT_ROWS) ? cv::DCT_ROWS : 0);
03972     cv::dct( src, dst, _flags );
03973 }
03974 
03975 
03976 CV_IMPL int
03977 cvGetOptimalDFTSize( int size0 )
03978 {
03979     return cv::getOptimalDFTSize(size0);
03980 }
03981 
03982 /* End of file. */
03983