the do / gr-peach-opencv-project

Fork of gr-peach-opencv-project by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers mathfuncs.cpp Source File

mathfuncs.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
00016 // Third party copyrights are property of their respective owners.
00017 //
00018 // Redistribution and use in source and binary forms, with or without modification,
00019 // are permitted provided that the following conditions are met:
00020 //
00021 //   * Redistribution's of source code must retain the above copyright notice,
00022 //     this list of conditions and the following disclaimer.
00023 //
00024 //   * Redistribution's in binary form must reproduce the above copyright notice,
00025 //     this list of conditions and the following disclaimer in the documentation
00026 //     and/or other materials provided with the distribution.
00027 //
00028 //   * The name of the copyright holders may not be used to endorse or promote products
00029 //     derived from this software without specific prior written permission.
00030 //
00031 // This software is provided by the copyright holders and contributors "as is" and
00032 // any express or implied warranties, including, but not limited to, the implied
00033 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00034 // In no event shall the Intel Corporation or contributors be liable for any direct,
00035 // indirect, incidental, special, exemplary, or consequential damages
00036 // (including, but not limited to, procurement of substitute goods or services;
00037 // loss of use, data, or profits; or business interruption) however caused
00038 // and on any theory of liability, whether in contract, strict liability,
00039 // or tort (including negligence or otherwise) arising in any way out of
00040 // the use of this software, even if advised of the possibility of such damage.
00041 //
00042 //M*/
00043 
00044 #include "precomp.hpp"
00045 #include "opencl_kernels_core.hpp"
00046 #include <limits>
00047 #include <iostream>
00048 
00049 namespace cv
00050 {
00051 
00052 typedef void (*MathFunc)(const void* src, void* dst, int len);
00053 
00054 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
00055 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
00056 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
00057 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
00058 
00059 #ifdef HAVE_OPENCL
00060 
00061 enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 };
00062 
00063 static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGREES", "OP_PHASE_RADIANS", 0 };
00064 
00065 static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop)
00066 {
00067     int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
00068     int kercn = oclop == OCL_OP_PHASE_DEGREES ||
00069             oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
00070 
00071     const ocl::Device d = ocl::Device::getDefault();
00072     bool double_support = d.doubleFPConfig() > 0;
00073     if (!double_support && depth == CV_64F)
00074         return false;
00075     int rowsPerWI = d.isIntel() ? 4 : 1;
00076 
00077     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
00078                   format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
00079                          oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI,
00080                          double_support ? " -D DOUBLE_SUPPORT" : ""));
00081     if (k.empty())
00082         return false;
00083 
00084     UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
00085     _dst.create(src1.size(), type);
00086     UMat dst = _dst.getUMat();
00087 
00088     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
00089             src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
00090             dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
00091 
00092     if (src2.empty())
00093         k.args(src1arg, dstarg);
00094     else
00095         k.args(src1arg, src2arg, dstarg);
00096 
00097     size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
00098     return k.run(2, globalsize, 0, false);
00099 }
00100 
00101 #endif
00102 
00103 float fastAtan2( float y, float x )
00104 {
00105     float ax = std::abs(x), ay = std::abs(y);
00106     float a, c, c2;
00107     if( ax >= ay )
00108     {
00109         c = ay/(ax + (float)DBL_EPSILON);
00110         c2 = c*c;
00111         a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
00112     }
00113     else
00114     {
00115         c = ax/(ay + (float)DBL_EPSILON);
00116         c2 = c*c;
00117         a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
00118     }
00119     if( x < 0 )
00120         a = 180.f - a;
00121     if( y < 0 )
00122         a = 360.f - a;
00123     return a;
00124 }
00125 
00126 /* ************************************************************************** *\
00127    Fast cube root by Ken Turkowski
00128    (http://www.worldserver.com/turk/computergraphics/papers.html)
00129 \* ************************************************************************** */
00130 float  cubeRoot( float value )
00131 {
00132     float fr;
00133     Cv32suf v, m;
00134     int ix, s;
00135     int ex, shx;
00136 
00137     v.f = value;
00138     ix = v.i & 0x7fffffff;
00139     s = v.i & 0x80000000;
00140     ex = (ix >> 23) - 127;
00141     shx = ex % 3;
00142     shx -= shx >= 0 ? 3 : 0;
00143     ex = (ex - shx) / 3; /* exponent of cube root */
00144     v.i = (ix & ((1<<23)-1)) | ((shx + 127)<<23);
00145     fr = v.f;
00146 
00147     /* 0.125 <= fr < 1.0 */
00148     /* Use quartic rational polynomial with error < 2^(-24) */
00149     fr = (float)(((((45.2548339756803022511987494 * fr +
00150     192.2798368355061050458134625) * fr +
00151     119.1654824285581628956914143) * fr +
00152     13.43250139086239872172837314) * fr +
00153     0.1636161226585754240958355063)/
00154     ((((14.80884093219134573786480845 * fr +
00155     151.9714051044435648658557668) * fr +
00156     168.5254414101568283957668343) * fr +
00157     33.9905941350215598754191872) * fr +
00158     1.0));
00159 
00160     /* fr *= 2^ex * sign */
00161     m.f = value;
00162     v.f = fr;
00163     v.i = (v.i + (ex << 23) + s) & (m.i*2 != 0 ? -1 : 0);
00164     return v.f;
00165 }
00166 
00167 /****************************************************************************************\
00168 *                                  Cartezian -> Polar                                    *
00169 \****************************************************************************************/
00170 
00171 void magnitude( InputArray src1, InputArray src2, OutputArray dst )
00172 {
00173     int type = src1.type(), depth = src1.depth(), cn = src1.channels();
00174     CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
00175 
00176 #ifdef HAVE_OPENCL
00177     CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
00178                ocl_math_op(src1, src2, dst, OCL_OP_MAG))
00179 #endif
00180 
00181     Mat X = src1.getMat(), Y = src2.getMat();
00182     dst.create(X.dims, X.size, X.type());
00183     Mat Mag = dst.getMat();
00184 
00185     const Mat* arrays[] = {&X, &Y, &Mag, 0};
00186     uchar* ptrs[3];
00187     NAryMatIterator it(arrays, ptrs);
00188     int len = (int)it.size*cn;
00189 
00190     for( size_t i = 0; i < it.nplanes; i++, ++it )
00191     {
00192         if( depth == CV_32F )
00193         {
00194             const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
00195             float *mag = (float*)ptrs[2];
00196             hal::magnitude32f( x, y, mag, len );
00197         }
00198         else
00199         {
00200             const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
00201             double *mag = (double*)ptrs[2];
00202             hal::magnitude64f( x, y, mag, len );
00203         }
00204     }
00205 }
00206 
00207 
00208 void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees )
00209 {
00210     int type = src1.type(), depth = src1.depth(), cn = src1.channels();
00211     CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
00212 
00213 #ifdef HAVE_OPENCL
00214     CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
00215                ocl_math_op(src1, src2, dst, angleInDegrees ? OCL_OP_PHASE_DEGREES : OCL_OP_PHASE_RADIANS))
00216 #endif
00217 
00218     Mat X = src1.getMat(), Y = src2.getMat();
00219     dst.create( X.dims, X.size, type );
00220     Mat Angle = dst.getMat();
00221 
00222     const Mat* arrays[] = {&X, &Y, &Angle, 0};
00223     uchar* ptrs[3];
00224     NAryMatIterator it(arrays, ptrs);
00225     cv::AutoBuffer<float> _buf;
00226     float* buf[2] = {0, 0};
00227     int j, k, total = (int)(it.size*cn), blockSize = total;
00228     size_t esz1 = X.elemSize1();
00229 
00230     if( depth == CV_64F )
00231     {
00232         blockSize = std::min(blockSize, ((BLOCK_SIZE+cn-1)/cn)*cn);
00233         _buf.allocate(blockSize*2);
00234         buf[0] = _buf;
00235         buf[1] = buf[0] + blockSize;
00236     }
00237 
00238     for( size_t i = 0; i < it.nplanes; i++, ++it )
00239     {
00240         for( j = 0; j < total; j += blockSize )
00241         {
00242             int len = std::min(total - j, blockSize);
00243             if( depth == CV_32F )
00244             {
00245                 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
00246                 float *angle = (float*)ptrs[2];
00247                 hal::fastAtan2( y, x, angle, len, angleInDegrees );
00248             }
00249             else
00250             {
00251                 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
00252                 double *angle = (double*)ptrs[2];
00253                 k = 0;
00254 
00255 #if CV_SSE2
00256                 if (USE_SSE2)
00257                 {
00258                     for ( ; k <= len - 4; k += 4)
00259                     {
00260                         __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)),
00261                                                       _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2)));
00262                         __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)),
00263                                                       _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2)));
00264 
00265                         _mm_storeu_ps(buf[0] + k, v_dst0);
00266                         _mm_storeu_ps(buf[1] + k, v_dst1);
00267                     }
00268                 }
00269 #endif
00270 
00271                 for( ; k < len; k++ )
00272                 {
00273                     buf[0][k] = (float)x[k];
00274                     buf[1][k] = (float)y[k];
00275                 }
00276 
00277                 hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees );
00278                 k = 0;
00279 
00280 #if CV_SSE2
00281                 if (USE_SSE2)
00282                 {
00283                     for ( ; k <= len - 4; k += 4)
00284                     {
00285                         __m128 v_src = _mm_loadu_ps(buf[0] + k);
00286                         _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src));
00287                         _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8))));
00288                     }
00289                 }
00290 #endif
00291 
00292                 for( ; k < len; k++ )
00293                     angle[k] = buf[0][k];
00294             }
00295             ptrs[0] += len*esz1;
00296             ptrs[1] += len*esz1;
00297             ptrs[2] += len*esz1;
00298         }
00299     }
00300 }
00301 
00302 #ifdef HAVE_OPENCL
00303 
00304 static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
00305                              OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
00306 {
00307     const ocl::Device & d = ocl::Device::getDefault();
00308     int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
00309             rowsPerWI = d.isIntel() ? 4 : 1;
00310     bool doubleSupport = d.doubleFPConfig() > 0;
00311 
00312     if ( !(_src1.dims() <= 2 && _src2.dims() <= 2 &&
00313            (depth == CV_32F || depth == CV_64F) && type == _src2.type()) ||
00314          (depth == CV_64F && !doubleSupport) )
00315         return false;
00316 
00317     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
00318                   format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s",
00319                          ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
00320                          depth, rowsPerWI, angleInDegrees ? "AD" : "AR",
00321                          doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
00322     if (k.empty())
00323         return false;
00324 
00325     UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
00326     Size size = src1.size();
00327     CV_Assert( size == src2.size() );
00328 
00329     _dst1.create(size, type);
00330     _dst2.create(size, type);
00331     UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
00332 
00333     k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
00334            ocl::KernelArg::ReadOnlyNoSize(src2),
00335            ocl::KernelArg::WriteOnly(dst1, cn),
00336            ocl::KernelArg::WriteOnlyNoSize(dst2));
00337 
00338     size_t globalsize[2] = { (size_t)dst1.cols * cn, ((size_t)dst1.rows + rowsPerWI - 1) / rowsPerWI };
00339     return k.run(2, globalsize, NULL, false);
00340 }
00341 
00342 #endif
00343 
00344 void cartToPolar( InputArray src1, InputArray src2,
00345                   OutputArray dst1, OutputArray dst2, bool angleInDegrees )
00346 {
00347 #ifdef HAVE_OPENCL
00348     CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(),
00349             ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees))
00350 #endif
00351 
00352     Mat X = src1.getMat(), Y = src2.getMat();
00353     int type = X.type(), depth = X.depth(), cn = X.channels();
00354     CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F));
00355     dst1.create( X.dims, X.size, type );
00356     dst2.create( X.dims, X.size, type );
00357     Mat Mag = dst1.getMat(), Angle = dst2.getMat();
00358 
00359     const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
00360     uchar* ptrs[4];
00361     NAryMatIterator it(arrays, ptrs);
00362     cv::AutoBuffer<float> _buf;
00363     float* buf[2] = {0, 0};
00364     int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
00365     size_t esz1 = X.elemSize1();
00366 
00367     if( depth == CV_64F )
00368     {
00369         _buf.allocate(blockSize*2);
00370         buf[0] = _buf;
00371         buf[1] = buf[0] + blockSize;
00372     }
00373 
00374     for( size_t i = 0; i < it.nplanes; i++, ++it )
00375     {
00376         for( j = 0; j < total; j += blockSize )
00377         {
00378             int len = std::min(total - j, blockSize);
00379             if( depth == CV_32F )
00380             {
00381                 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
00382                 float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3];
00383                 hal::magnitude32f( x, y, mag, len );
00384                 hal::fastAtan2( y, x, angle, len, angleInDegrees );
00385             }
00386             else
00387             {
00388                 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
00389                 double *angle = (double*)ptrs[3];
00390 
00391                 hal::magnitude64f(x, y, (double*)ptrs[2], len);
00392                 k = 0;
00393 
00394 #if CV_SSE2
00395                 if (USE_SSE2)
00396                 {
00397                     for ( ; k <= len - 4; k += 4)
00398                     {
00399                         __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)),
00400                                                       _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2)));
00401                         __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)),
00402                                                       _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2)));
00403 
00404                         _mm_storeu_ps(buf[0] + k, v_dst0);
00405                         _mm_storeu_ps(buf[1] + k, v_dst1);
00406                     }
00407                 }
00408 #endif
00409 
00410                 for( ; k < len; k++ )
00411                 {
00412                     buf[0][k] = (float)x[k];
00413                     buf[1][k] = (float)y[k];
00414                 }
00415 
00416                 hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees );
00417                 k = 0;
00418 
00419 #if CV_SSE2
00420                 if (USE_SSE2)
00421                 {
00422                     for ( ; k <= len - 4; k += 4)
00423                     {
00424                         __m128 v_src = _mm_loadu_ps(buf[0] + k);
00425                         _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src));
00426                         _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8))));
00427                     }
00428                 }
00429 #endif
00430 
00431                 for( ; k < len; k++ )
00432                     angle[k] = buf[0][k];
00433             }
00434             ptrs[0] += len*esz1;
00435             ptrs[1] += len*esz1;
00436             ptrs[2] += len*esz1;
00437             ptrs[3] += len*esz1;
00438         }
00439     }
00440 }
00441 
00442 
00443 /****************************************************************************************\
00444 *                                  Polar -> Cartezian                                    *
00445 \****************************************************************************************/
00446 
00447 static void SinCos_32f( const float *angle, float *sinval, float* cosval,
00448                         int len, int angle_in_degrees )
00449 {
00450     const int N = 64;
00451 
00452     static const double sin_table[] =
00453     {
00454      0.00000000000000000000,     0.09801714032956060400,
00455      0.19509032201612825000,     0.29028467725446233000,
00456      0.38268343236508978000,     0.47139673682599764000,
00457      0.55557023301960218000,     0.63439328416364549000,
00458      0.70710678118654746000,     0.77301045336273699000,
00459      0.83146961230254524000,     0.88192126434835494000,
00460      0.92387953251128674000,     0.95694033573220894000,
00461      0.98078528040323043000,     0.99518472667219682000,
00462      1.00000000000000000000,     0.99518472667219693000,
00463      0.98078528040323043000,     0.95694033573220894000,
00464      0.92387953251128674000,     0.88192126434835505000,
00465      0.83146961230254546000,     0.77301045336273710000,
00466      0.70710678118654757000,     0.63439328416364549000,
00467      0.55557023301960218000,     0.47139673682599786000,
00468      0.38268343236508989000,     0.29028467725446239000,
00469      0.19509032201612861000,     0.09801714032956082600,
00470      0.00000000000000012246,    -0.09801714032956059000,
00471     -0.19509032201612836000,    -0.29028467725446211000,
00472     -0.38268343236508967000,    -0.47139673682599764000,
00473     -0.55557023301960196000,    -0.63439328416364527000,
00474     -0.70710678118654746000,    -0.77301045336273666000,
00475     -0.83146961230254524000,    -0.88192126434835494000,
00476     -0.92387953251128652000,    -0.95694033573220882000,
00477     -0.98078528040323032000,    -0.99518472667219693000,
00478     -1.00000000000000000000,    -0.99518472667219693000,
00479     -0.98078528040323043000,    -0.95694033573220894000,
00480     -0.92387953251128663000,    -0.88192126434835505000,
00481     -0.83146961230254546000,    -0.77301045336273688000,
00482     -0.70710678118654768000,    -0.63439328416364593000,
00483     -0.55557023301960218000,    -0.47139673682599792000,
00484     -0.38268343236509039000,    -0.29028467725446250000,
00485     -0.19509032201612872000,    -0.09801714032956050600,
00486     };
00487 
00488     static const double k2 = (2*CV_PI)/N;
00489 
00490     static const double sin_a0 = -0.166630293345647*k2*k2*k2;
00491     static const double sin_a2 = k2;
00492 
00493     static const double cos_a0 = -0.499818138450326*k2*k2;
00494     /*static const double cos_a2 =  1;*/
00495 
00496     double k1;
00497     int i = 0;
00498 
00499     if( !angle_in_degrees )
00500         k1 = N/(2*CV_PI);
00501     else
00502         k1 = N/360.;
00503 
00504 #if CV_AVX2
00505     if (USE_AVX2)
00506     {
00507         __m128d v_k1 = _mm_set1_pd(k1);
00508         __m128d v_1 = _mm_set1_pd(1);
00509         __m128i v_N1 = _mm_set1_epi32(N - 1);
00510         __m128i v_N4 = _mm_set1_epi32(N >> 2);
00511         __m128d v_sin_a0 = _mm_set1_pd(sin_a0);
00512         __m128d v_sin_a2 = _mm_set1_pd(sin_a2);
00513         __m128d v_cos_a0 = _mm_set1_pd(cos_a0);
00514 
00515         for ( ; i <= len - 4; i += 4)
00516         {
00517             __m128 v_angle = _mm_loadu_ps(angle + i);
00518 
00519             // 0-1
00520             __m128d v_t = _mm_mul_pd(_mm_cvtps_pd(v_angle), v_k1);
00521             __m128i v_it = _mm_cvtpd_epi32(v_t);
00522             v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it));
00523 
00524             __m128i v_sin_idx = _mm_and_si128(v_it, v_N1);
00525             __m128i v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1);
00526 
00527             __m128d v_t2 = _mm_mul_pd(v_t, v_t);
00528             __m128d v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t);
00529             __m128d v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1);
00530 
00531             __m128d v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8);
00532             __m128d v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8);
00533 
00534             __m128d v_sin_val_0 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b),
00535                                              _mm_mul_pd(v_cos_a, v_sin_b));
00536             __m128d v_cos_val_0 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b),
00537                                              _mm_mul_pd(v_sin_a, v_sin_b));
00538 
00539             // 2-3
00540             v_t = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_angle), 8))), v_k1);
00541             v_it = _mm_cvtpd_epi32(v_t);
00542             v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it));
00543 
00544             v_sin_idx = _mm_and_si128(v_it, v_N1);
00545             v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1);
00546 
00547             v_t2 = _mm_mul_pd(v_t, v_t);
00548             v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t);
00549             v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1);
00550 
00551             v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8);
00552             v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8);
00553 
00554             __m128d v_sin_val_1 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b),
00555                                              _mm_mul_pd(v_cos_a, v_sin_b));
00556             __m128d v_cos_val_1 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b),
00557                                              _mm_mul_pd(v_sin_a, v_sin_b));
00558 
00559             _mm_storeu_ps(sinval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_sin_val_0),
00560                                                     _mm_cvtpd_ps(v_sin_val_1)));
00561             _mm_storeu_ps(cosval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_cos_val_0),
00562                                                     _mm_cvtpd_ps(v_cos_val_1)));
00563         }
00564     }
00565 #endif
00566 
00567     for( ; i < len; i++ )
00568     {
00569         double t = angle[i]*k1;
00570         int it = cvRound(t);
00571         t -= it;
00572         int sin_idx = it & (N - 1);
00573         int cos_idx = (N/4 - sin_idx) & (N - 1);
00574 
00575         double sin_b = (sin_a0*t*t + sin_a2)*t;
00576         double cos_b = cos_a0*t*t + 1;
00577 
00578         double sin_a = sin_table[sin_idx];
00579         double cos_a = sin_table[cos_idx];
00580 
00581         double sin_val = sin_a*cos_b + cos_a*sin_b;
00582         double cos_val = cos_a*cos_b - sin_a*sin_b;
00583 
00584         sinval[i] = (float)sin_val;
00585         cosval[i] = (float)cos_val;
00586     }
00587 }
00588 
00589 
00590 #ifdef HAVE_OPENCL
00591 
00592 static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
00593                              OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
00594 {
00595     const ocl::Device & d = ocl::Device::getDefault();
00596     int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
00597             rowsPerWI = d.isIntel() ? 4 : 1;
00598     bool doubleSupport = d.doubleFPConfig() > 0;
00599 
00600     if ( !doubleSupport && depth == CV_64F )
00601         return false;
00602 
00603     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
00604                   format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s",
00605                          ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI,
00606                          depth, angleInDegrees ? "AD" : "AR",
00607                          doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
00608     if (k.empty())
00609         return false;
00610 
00611     UMat mag = _mag.getUMat(), angle = _angle.getUMat();
00612     Size size = angle.size();
00613     CV_Assert(mag.size() == size);
00614 
00615     _dst1.create(size, type);
00616     _dst2.create(size, type);
00617     UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
00618 
00619     k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle),
00620            ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2));
00621 
00622     size_t globalsize[2] = { (size_t)dst1.cols * cn, ((size_t)dst1.rows + rowsPerWI - 1) / rowsPerWI };
00623     return k.run(2, globalsize, NULL, false);
00624 }
00625 
00626 #endif
00627 
00628 void polarToCart( InputArray src1, InputArray src2,
00629                   OutputArray dst1, OutputArray dst2, bool angleInDegrees )
00630 {
00631     int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
00632     CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type));
00633 
00634 #ifdef HAVE_OPENCL
00635     CV_OCL_RUN(!src1.empty() && src2.dims() <= 2 && dst1.isUMat() && dst2.isUMat(),
00636                ocl_polarToCart(src1, src2, dst1, dst2, angleInDegrees))
00637 #endif
00638 
00639     Mat Mag = src1.getMat(), Angle = src2.getMat();
00640     CV_Assert( Mag.empty() || Angle.size == Mag.size);
00641     dst1.create( Angle.dims, Angle.size, type );
00642     dst2.create( Angle.dims, Angle.size, type );
00643     Mat X = dst1.getMat(), Y = dst2.getMat();
00644 
00645 #if defined(HAVE_IPP)
00646     CV_IPP_CHECK()
00647     {
00648         if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees)
00649         {
00650             typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase,
00651                                                              void * pDstRe, void * pDstIm, int len);
00652             ippsPolarToCart ippFunc =
00653             depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f :
00654             depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0;
00655             CV_Assert(ippFunc != 0);
00656 
00657             IppStatus status = ippFunc(Mag.ptr(), Angle.ptr(), X.ptr(), Y.ptr(), static_cast<int>(cn * X.total()));
00658             if (status >= 0)
00659             {
00660                 CV_IMPL_ADD(CV_IMPL_IPP);
00661                 return;
00662             }
00663             setIppErrorStatus();
00664         }
00665     }
00666 #endif
00667 
00668     const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
00669     uchar* ptrs[4];
00670     NAryMatIterator it(arrays, ptrs);
00671     cv::AutoBuffer<float> _buf;
00672     float* buf[2] = {0, 0};
00673     int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
00674     size_t esz1 = Angle.elemSize1();
00675 
00676     if( depth == CV_64F )
00677     {
00678         _buf.allocate(blockSize*2);
00679         buf[0] = _buf;
00680         buf[1] = buf[0] + blockSize;
00681     }
00682 
00683     for( size_t i = 0; i < it.nplanes; i++, ++it )
00684     {
00685         for( j = 0; j < total; j += blockSize )
00686         {
00687             int len = std::min(total - j, blockSize);
00688             if( depth == CV_32F )
00689             {
00690                 const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1];
00691                 float *x = (float*)ptrs[2], *y = (float*)ptrs[3];
00692 
00693                 SinCos_32f( angle, y, x, len, angleInDegrees );
00694                 if( mag )
00695                 {
00696                     k = 0;
00697 
00698                     #if CV_NEON
00699                     for( ; k <= len - 4; k += 4 )
00700                     {
00701                         float32x4_t v_m = vld1q_f32(mag + k);
00702                         vst1q_f32(x + k, vmulq_f32(vld1q_f32(x + k), v_m));
00703                         vst1q_f32(y + k, vmulq_f32(vld1q_f32(y + k), v_m));
00704                     }
00705                     #elif CV_SSE2
00706                     if (USE_SSE2)
00707                     {
00708                         for( ; k <= len - 4; k += 4 )
00709                         {
00710                             __m128 v_m = _mm_loadu_ps(mag + k);
00711                             _mm_storeu_ps(x + k, _mm_mul_ps(_mm_loadu_ps(x + k), v_m));
00712                             _mm_storeu_ps(y + k, _mm_mul_ps(_mm_loadu_ps(y + k), v_m));
00713                         }
00714                     }
00715                     #endif
00716 
00717                     for( ; k < len; k++ )
00718                     {
00719                         float m = mag[k];
00720                         x[k] *= m; y[k] *= m;
00721                     }
00722                 }
00723             }
00724             else
00725             {
00726                 const double *mag = (const double*)ptrs[0], *angle = (const double*)ptrs[1];
00727                 double *x = (double*)ptrs[2], *y = (double*)ptrs[3];
00728 
00729                 for( k = 0; k < len; k++ )
00730                     buf[0][k] = (float)angle[k];
00731 
00732                 SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees );
00733                 if( mag )
00734                     for( k = 0; k < len; k++ )
00735                     {
00736                         double m = mag[k];
00737                         x[k] = buf[0][k]*m; y[k] = buf[1][k]*m;
00738                     }
00739                 else
00740                 {
00741                     std::memcpy(x, buf[0], sizeof(float) * len);
00742                     std::memcpy(y, buf[1], sizeof(float) * len);
00743                 }
00744             }
00745 
00746             if( ptrs[0] )
00747                 ptrs[0] += len*esz1;
00748             ptrs[1] += len*esz1;
00749             ptrs[2] += len*esz1;
00750             ptrs[3] += len*esz1;
00751         }
00752     }
00753 }
00754 
00755 /****************************************************************************************\
00756 *                                          E X P                                         *
00757 \****************************************************************************************/
00758 
00759 #ifdef HAVE_IPP
00760 static void Exp_32f_ipp(const float *x, float *y, int n)
00761 {
00762     CV_IPP_CHECK()
00763     {
00764         if (0 <= ippsExp_32f_A21(x, y, n))
00765         {
00766             CV_IMPL_ADD(CV_IMPL_IPP);
00767             return;
00768         }
00769         setIppErrorStatus();
00770     }
00771     hal::exp32f(x, y, n);
00772 }
00773 
00774 static void Exp_64f_ipp(const double *x, double *y, int n)
00775 {
00776     CV_IPP_CHECK()
00777     {
00778         if (0 <= ippsExp_64f_A50(x, y, n))
00779         {
00780             CV_IMPL_ADD(CV_IMPL_IPP);
00781             return;
00782         }
00783         setIppErrorStatus();
00784     }
00785     hal::exp64f(x, y, n);
00786 }
00787 
00788 #define Exp_32f Exp_32f_ipp
00789 #define Exp_64f Exp_64f_ipp
00790 #else
00791 #define Exp_32f hal::exp32f
00792 #define Exp_64f hal::exp64f
00793 #endif
00794 
00795 
00796 void exp( InputArray _src, OutputArray _dst )
00797 {
00798     int type = _src.type(), depth = _src.depth(), cn = _src.channels();
00799     CV_Assert( depth == CV_32F || depth == CV_64F );
00800 
00801 #ifdef HAVE_OPENCL
00802     CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
00803                ocl_math_op(_src, noArray(), _dst, OCL_OP_EXP))
00804 #endif
00805 
00806     Mat src = _src.getMat();
00807     _dst.create( src.dims, src.size, type );
00808     Mat dst = _dst.getMat();
00809 
00810     const Mat* arrays[] = {&src, &dst, 0};
00811     uchar* ptrs[2];
00812     NAryMatIterator it(arrays, ptrs);
00813     int len = (int)(it.size*cn);
00814 
00815     for( size_t i = 0; i < it.nplanes; i++, ++it )
00816     {
00817         if( depth == CV_32F )
00818             Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len);
00819         else
00820             Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len);
00821     }
00822 }
00823 
00824 
00825 /****************************************************************************************\
00826 *                                          L O G                                         *
00827 \****************************************************************************************/
00828 
00829 #ifdef HAVE_IPP
00830 static void Log_32f_ipp(const float *x, float *y, int n)
00831 {
00832     CV_IPP_CHECK()
00833     {
00834         if (0 <= ippsLn_32f_A21(x, y, n))
00835         {
00836             CV_IMPL_ADD(CV_IMPL_IPP);
00837             return;
00838         }
00839         setIppErrorStatus();
00840     }
00841     hal::log32f(x, y, n);
00842 }
00843 
00844 static void Log_64f_ipp(const double *x, double *y, int n)
00845 {
00846     CV_IPP_CHECK()
00847     {
00848         if (0 <= ippsLn_64f_A50(x, y, n))
00849         {
00850             CV_IMPL_ADD(CV_IMPL_IPP);
00851             return;
00852         }
00853         setIppErrorStatus();
00854     }
00855     hal::log64f(x, y, n);
00856 }
00857 
00858 #define Log_32f Log_32f_ipp
00859 #define Log_64f Log_64f_ipp
00860 #else
00861 #define Log_32f hal::log32f
00862 #define Log_64f hal::log64f
00863 #endif
00864 
00865 void log( InputArray _src, OutputArray _dst )
00866 {
00867     int type = _src.type(), depth = _src.depth(), cn = _src.channels();
00868     CV_Assert( depth == CV_32F || depth == CV_64F );
00869 
00870 #ifdef HAVE_OPENCL
00871     CV_OCL_RUN( _dst.isUMat() && _src.dims() <= 2,
00872                 ocl_math_op(_src, noArray(), _dst, OCL_OP_LOG))
00873 #endif
00874 
00875     Mat src = _src.getMat();
00876     _dst.create( src.dims, src.size, type );
00877     Mat dst = _dst.getMat();
00878 
00879     const Mat* arrays[] = {&src, &dst, 0};
00880     uchar* ptrs[2];
00881     NAryMatIterator it(arrays, ptrs);
00882     int len = (int)(it.size*cn);
00883 
00884     for( size_t i = 0; i < it.nplanes; i++, ++it )
00885     {
00886         if( depth == CV_32F )
00887             Log_32f( (const float*)ptrs[0], (float*)ptrs[1], len );
00888         else
00889             Log_64f( (const double*)ptrs[0], (double*)ptrs[1], len );
00890     }
00891 }
00892 
00893 /****************************************************************************************\
00894 *                                    P O W E R                                           *
00895 \****************************************************************************************/
00896 
00897 template <typename T, typename WT>
00898 struct iPow_SIMD
00899 {
00900     int operator() ( const T *, T *, int, int)
00901     {
00902         return 0;
00903     }
00904 };
00905 
00906 #if CV_SIMD128
00907 
00908 template <>
00909 struct iPow_SIMD<uchar, int>
00910 {
00911     int operator() ( const uchar * src, uchar * dst, int len, int power )
00912     {
00913         int i = 0;
00914         v_uint32x4 v_1 = v_setall_u32(1u);
00915 
00916         for ( ; i <= len - 8; i += 8)
00917         {
00918             v_uint32x4 v_a1 = v_1, v_a2 = v_1;
00919             v_uint16x8 v = v_load_expand(src + i);
00920             v_uint32x4 v_b1, v_b2;
00921             v_expand(v, v_b1, v_b2);
00922             int p = power;
00923 
00924             while( p > 1 )
00925             {
00926                 if (p & 1)
00927                 {
00928                     v_a1 *= v_b1;
00929                     v_a2 *= v_b2;
00930                 }
00931                 v_b1 *= v_b1;
00932                 v_b2 *= v_b2;
00933                 p >>= 1;
00934             }
00935 
00936             v_a1 *= v_b1;
00937             v_a2 *= v_b2;
00938 
00939             v = v_pack(v_a1, v_a2);
00940             v_pack_store(dst + i, v);
00941         }
00942 
00943         return i;
00944     }
00945 };
00946 
00947 template <>
00948 struct iPow_SIMD<schar, int>
00949 {
00950     int operator() ( const schar * src, schar * dst, int len, int power)
00951     {
00952         int i = 0;
00953         v_int32x4 v_1 = v_setall_s32(1);
00954 
00955         for ( ; i <= len - 8; i += 8)
00956         {
00957             v_int32x4 v_a1 = v_1, v_a2 = v_1;
00958             v_int16x8 v = v_load_expand(src + i);
00959             v_int32x4 v_b1, v_b2;
00960             v_expand(v, v_b1, v_b2);
00961             int p = power;
00962 
00963             while( p > 1 )
00964             {
00965                 if (p & 1)
00966                 {
00967                     v_a1 *= v_b1;
00968                     v_a2 *= v_b2;
00969                 }
00970                 v_b1 *= v_b1;
00971                 v_b2 *= v_b2;
00972                 p >>= 1;
00973             }
00974 
00975             v_a1 *= v_b1;
00976             v_a2 *= v_b2;
00977 
00978             v = v_pack(v_a1, v_a2);
00979             v_pack_store(dst + i, v);
00980         }
00981 
00982         return i;
00983     }
00984 };
00985 
00986 template <>
00987 struct iPow_SIMD<ushort, int>
00988 {
00989     int operator() ( const ushort * src, ushort * dst, int len, int power)
00990     {
00991         int i = 0;
00992         v_uint32x4 v_1 = v_setall_u32(1u);
00993 
00994         for ( ; i <= len - 8; i += 8)
00995         {
00996             v_uint32x4 v_a1 = v_1, v_a2 = v_1;
00997             v_uint16x8 v = v_load(src + i);
00998             v_uint32x4 v_b1, v_b2;
00999             v_expand(v, v_b1, v_b2);
01000             int p = power;
01001 
01002             while( p > 1 )
01003             {
01004                 if (p & 1)
01005                 {
01006                     v_a1 *= v_b1;
01007                     v_a2 *= v_b2;
01008                 }
01009                 v_b1 *= v_b1;
01010                 v_b2 *= v_b2;
01011                 p >>= 1;
01012             }
01013 
01014             v_a1 *= v_b1;
01015             v_a2 *= v_b2;
01016 
01017             v = v_pack(v_a1, v_a2);
01018             v_store(dst + i, v);
01019         }
01020 
01021         return i;
01022     }
01023 };
01024 
01025 template <>
01026 struct iPow_SIMD<short, int>
01027 {
01028     int operator() ( const short * src, short * dst, int len, int power)
01029     {
01030         int i = 0;
01031         v_int32x4 v_1 = v_setall_s32(1);
01032 
01033         for ( ; i <= len - 8; i += 8)
01034         {
01035             v_int32x4 v_a1 = v_1, v_a2 = v_1;
01036             v_int16x8 v = v_load(src + i);
01037             v_int32x4 v_b1, v_b2;
01038             v_expand(v, v_b1, v_b2);
01039             int p = power;
01040 
01041             while( p > 1 )
01042             {
01043                 if (p & 1)
01044                 {
01045                     v_a1 *= v_b1;
01046                     v_a2 *= v_b2;
01047                 }
01048                 v_b1 *= v_b1;
01049                 v_b2 *= v_b2;
01050                 p >>= 1;
01051             }
01052 
01053             v_a1 *= v_b1;
01054             v_a2 *= v_b2;
01055 
01056             v = v_pack(v_a1, v_a2);
01057             v_store(dst + i, v);
01058         }
01059 
01060         return i;
01061     }
01062 };
01063 
01064 template <>
01065 struct iPow_SIMD<int, int>
01066 {
01067     int operator() ( const int * src, int * dst, int len, int power)
01068     {
01069         int i = 0;
01070         v_int32x4 v_1 = v_setall_s32(1);
01071 
01072         for ( ; i <= len - 8; i += 8)
01073         {
01074             v_int32x4 v_a1 = v_1, v_a2 = v_1;
01075             v_int32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
01076             int p = power;
01077 
01078             while( p > 1 )
01079             {
01080                 if (p & 1)
01081                 {
01082                     v_a1 *= v_b1;
01083                     v_a2 *= v_b2;
01084                 }
01085                 v_b1 *= v_b1;
01086                 v_b2 *= v_b2;
01087                 p >>= 1;
01088             }
01089 
01090             v_a1 *= v_b1;
01091             v_a2 *= v_b2;
01092 
01093             v_store(dst + i, v_a1);
01094             v_store(dst + i + 4, v_a2);
01095         }
01096 
01097         return i;
01098     }
01099 };
01100 
01101 template <>
01102 struct iPow_SIMD<float, float>
01103 {
01104     int operator() ( const float * src, float * dst, int len, int power)
01105     {
01106         int i = 0;
01107         v_float32x4 v_1 = v_setall_f32(1.f);
01108 
01109         for ( ; i <= len - 8; i += 8)
01110         {
01111             v_float32x4 v_a1 = v_1, v_a2 = v_1;
01112             v_float32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4);
01113             int p = std::abs(power);
01114             if( power < 0 )
01115             {
01116                 v_b1 = v_1 / v_b1;
01117                 v_b2 = v_1 / v_b2;
01118             }
01119 
01120             while( p > 1 )
01121             {
01122                 if (p & 1)
01123                 {
01124                     v_a1 *= v_b1;
01125                     v_a2 *= v_b2;
01126                 }
01127                 v_b1 *= v_b1;
01128                 v_b2 *= v_b2;
01129                 p >>= 1;
01130             }
01131 
01132             v_a1 *= v_b1;
01133             v_a2 *= v_b2;
01134 
01135             v_store(dst + i, v_a1);
01136             v_store(dst + i + 4, v_a2);
01137         }
01138 
01139         return i;
01140     }
01141 };
01142 
01143 #if CV_SIMD128_64F
01144 template <>
01145 struct iPow_SIMD<double, double>
01146 {
01147     int operator() ( const double * src, double * dst, int len, int power)
01148     {
01149         int i = 0;
01150         v_float64x2 v_1 = v_setall_f64(1.);
01151 
01152         for ( ; i <= len - 4; i += 4)
01153         {
01154             v_float64x2 v_a1 = v_1, v_a2 = v_1;
01155             v_float64x2 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 2);
01156             int p = std::abs(power);
01157             if( power < 0 )
01158             {
01159                 v_b1 = v_1 / v_b1;
01160                 v_b2 = v_1 / v_b2;
01161             }
01162 
01163             while( p > 1 )
01164             {
01165                 if (p & 1)
01166                 {
01167                     v_a1 *= v_b1;
01168                     v_a2 *= v_b2;
01169                 }
01170                 v_b1 *= v_b1;
01171                 v_b2 *= v_b2;
01172                 p >>= 1;
01173             }
01174 
01175             v_a1 *= v_b1;
01176             v_a2 *= v_b2;
01177 
01178             v_store(dst + i, v_a1);
01179             v_store(dst + i + 2, v_a2);
01180         }
01181 
01182         return i;
01183     }
01184 };
01185 #endif
01186 
01187 #endif
01188 
01189 template<typename T, typename WT>
01190 static void
01191 iPow_i( const T* src, T* dst, int len, int power )
01192 {
01193     if( power < 0 )
01194     {
01195         T tab[5] =
01196         {
01197             saturate_cast<T>(power == -1 ? -1 : 0), saturate_cast<T>((power & 1) ? -1 : 1),
01198             std::numeric_limits<T>::max(), 1, saturate_cast<T>(power == -1 ? 1 : 0)
01199         };
01200         for( int i = 0; i < len; i++ )
01201         {
01202             T val = src[i];
01203             dst[i] = cv_abs(val) <= 2 ? tab[val + 2] : (T)0;
01204         }
01205     }
01206     else
01207     {
01208         iPow_SIMD<T, WT> vop;
01209         int i = vop(src, dst, len, power);
01210 
01211         for( ; i < len; i++ )
01212         {
01213             WT a = 1, b = src[i];
01214             int p = power;
01215             while( p > 1 )
01216             {
01217                 if( p & 1 )
01218                     a *= b;
01219                 b *= b;
01220                 p >>= 1;
01221             }
01222 
01223             a *= b;
01224             dst[i] = saturate_cast<T>(a);
01225         }
01226     }
01227 }
01228 
01229 template<typename T>
01230 static void
01231 iPow_f( const T* src, T* dst, int len, int power0 )
01232 {
01233     iPow_SIMD<T, T> vop;
01234     int i = vop(src, dst, len, power0);
01235     int power = std::abs(power0);
01236 
01237     for( ; i < len; i++ )
01238     {
01239         T a = 1, b = src[i];
01240         int p = power;
01241         if( power0 < 0 )
01242             b = 1/b;
01243 
01244         while( p > 1 )
01245         {
01246             if( p & 1 )
01247                 a *= b;
01248             b *= b;
01249             p >>= 1;
01250         }
01251 
01252         a *= b;
01253         dst[i] = a;
01254     }
01255 }
01256 
01257 static void iPow8u(const uchar* src, uchar* dst, int len, int power)
01258 {
01259     iPow_i<uchar, unsigned>(src, dst, len, power);
01260 }
01261 
01262 static void iPow8s(const schar* src, schar* dst, int len, int power)
01263 {
01264     iPow_i<schar, int>(src, dst, len, power);
01265 }
01266 
01267 static void iPow16u(const ushort* src, ushort* dst, int len, int power)
01268 {
01269     iPow_i<ushort, unsigned>(src, dst, len, power);
01270 }
01271 
01272 static void iPow16s(const short* src, short* dst, int len, int power)
01273 {
01274     iPow_i<short, int>(src, dst, len, power);
01275 }
01276 
01277 static void iPow32s(const int* src, int* dst, int len, int power)
01278 {
01279     iPow_i<int, int>(src, dst, len, power);
01280 }
01281 
01282 static void iPow32f(const float* src, float* dst, int len, int power)
01283 {
01284     iPow_f<float>(src, dst, len, power);
01285 }
01286 
01287 static void iPow64f(const double* src, double* dst, int len, int power)
01288 {
01289     iPow_f<double>(src, dst, len, power);
01290 }
01291 
01292 
01293 typedef void (*IPowFunc)( const uchar* src, uchar* dst, int len, int power );
01294 
01295 static IPowFunc ipowTab[] =
01296 {
01297     (IPowFunc)iPow8u, (IPowFunc)iPow8s, (IPowFunc)iPow16u, (IPowFunc)iPow16s,
01298     (IPowFunc)iPow32s, (IPowFunc)iPow32f, (IPowFunc)iPow64f, 0
01299 };
01300 
01301 #ifdef HAVE_OPENCL
01302 
01303 static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
01304                     bool is_ipower, int ipower)
01305 {
01306     const ocl::Device & d = ocl::Device::getDefault();
01307     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
01308             rowsPerWI = d.isIntel() ? 4 : 1;
01309     bool doubleSupport = d.doubleFPConfig() > 0;
01310 
01311     _dst.createSameSize(_src, type);
01312     if (is_ipower)
01313     {
01314         if (ipower == 0)
01315         {
01316             _dst.setTo(Scalar::all(1));
01317             return true;
01318         }
01319         if (ipower == 1)
01320         {
01321             _src.copyTo(_dst);
01322             return true;
01323         }
01324         if( ipower < 0 )
01325         {
01326             if( depth == CV_32F || depth == CV_64F )
01327                 is_ipower = false;
01328             else
01329                 return false;
01330         }
01331     }
01332 
01333     if (depth == CV_64F && !doubleSupport)
01334         return false;
01335 
01336     bool issqrt = std::abs(power - 0.5) < DBL_EPSILON;
01337     const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
01338 
01339     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
01340                   format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
01341                          ocl::typeToStr(depth), depth,  rowsPerWI, op,
01342                          doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
01343     if (k.empty())
01344         return false;
01345 
01346     UMat src = _src.getUMat();
01347     _dst.create(src.size(), type);
01348     UMat dst = _dst.getUMat();
01349 
01350     ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
01351             dstarg = ocl::KernelArg::WriteOnly(dst, cn);
01352 
01353     if (issqrt)
01354         k.args(srcarg, dstarg);
01355     else if (is_ipower)
01356         k.args(srcarg, dstarg, ipower);
01357     else
01358     {
01359         if (depth == CV_32F)
01360             k.args(srcarg, dstarg, (float)power);
01361         else
01362             k.args(srcarg, dstarg, power);
01363     }
01364 
01365     size_t globalsize[2] = { (size_t)dst.cols *  cn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
01366     return k.run(2, globalsize, NULL, false);
01367 }
01368 
01369 #endif
01370 
01371 static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt32f(src, dst, n); }
01372 static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt64f(src, dst, n); }
01373 static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt32f(src, dst, n); }
01374 static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt64f(src, dst, n); }
01375 
01376 void pow( InputArray _src, double power, OutputArray _dst )
01377 {
01378     int type = _src.type(), depth = CV_MAT_DEPTH(type),
01379             cn = CV_MAT_CN(type), ipower = cvRound(power);
01380     bool is_ipower = fabs(ipower - power) < DBL_EPSILON;
01381 #ifdef HAVE_OPENCL
01382     bool useOpenCL = _dst.isUMat() && _src.dims() <= 2;
01383 #endif
01384 
01385     if( is_ipower
01386 #ifdef HAVE_OPENCL
01387             && !(useOpenCL && ocl::Device::getDefault().isIntel() && depth != CV_64F)
01388 #endif
01389       )
01390     {
01391         switch( ipower )
01392         {
01393         case 0:
01394             _dst.createSameSize(_src, type);
01395             _dst.setTo(Scalar::all(1));
01396             return;
01397         case 1:
01398             _src.copyTo(_dst);
01399             return;
01400         case 2:
01401             multiply(_src, _src, _dst);
01402             return;
01403         }
01404     }
01405     else
01406         CV_Assert( depth == CV_32F || depth == CV_64F );
01407 
01408 #ifdef HAVE_OPENCL
01409     CV_OCL_RUN(useOpenCL, ocl_pow(_src, power, _dst, is_ipower, ipower))
01410 #endif
01411 
01412     Mat src = _src.getMat();
01413     _dst.create( src.dims, src.size, type );
01414     Mat dst = _dst.getMat();
01415 
01416     const Mat* arrays[] = {&src, &dst, 0};
01417     uchar* ptrs[2];
01418     NAryMatIterator it(arrays, ptrs);
01419     int len = (int)(it.size*cn);
01420 
01421     if( is_ipower )
01422     {
01423         IPowFunc func = ipowTab[depth];
01424         CV_Assert( func != 0 );
01425 
01426         for( size_t i = 0; i < it.nplanes; i++, ++it )
01427             func( ptrs[0], ptrs[1], len, ipower );
01428     }
01429     else if( fabs(fabs(power) - 0.5) < DBL_EPSILON )
01430     {
01431         MathFunc func = power < 0 ?
01432             (depth == CV_32F ? (MathFunc)InvSqrt_32f : (MathFunc)InvSqrt_64f) :
01433             (depth == CV_32F ? (MathFunc)Sqrt_32f : (MathFunc)Sqrt_64f);
01434 
01435         for( size_t i = 0; i < it.nplanes; i++, ++it )
01436             func( ptrs[0], ptrs[1], len );
01437     }
01438     else
01439     {
01440         int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn);
01441         size_t esz1 = src.elemSize1();
01442         AutoBuffer<uchar>  buf;
01443         Cv32suf inf32, nan32;
01444         Cv64suf inf64, nan64;
01445         float* fbuf = 0;
01446         double* dbuf = 0;
01447         inf32.i = 0x7f800000;
01448         nan32.i = 0x7fffffff;
01449         inf64.i = CV_BIG_INT(0x7FF0000000000000);
01450         nan64.i = CV_BIG_INT(0x7FFFFFFFFFFFFFFF);
01451 
01452         if( src.ptr() == dst.ptr() )
01453         {
01454             buf.allocate(blockSize*esz1);
01455             fbuf = (float*)(uchar*)buf;
01456             dbuf = (double*)(uchar*)buf;
01457         }
01458 
01459         for( size_t i = 0; i < it.nplanes; i++, ++it )
01460         {
01461             for( j = 0; j < len; j += blockSize )
01462             {
01463                 int bsz = std::min(len - j, blockSize);
01464 
01465             #if defined(HAVE_IPP)
01466                 CV_IPP_CHECK()
01467                 {
01468                     IppStatus status = depth == CV_32F ?
01469                     ippsPowx_32f_A21((const float*)ptrs[0], (float)power, (float*)ptrs[1], bsz) :
01470                     ippsPowx_64f_A50((const double*)ptrs[0], (double)power, (double*)ptrs[1], bsz);
01471 
01472                     if (status >= 0)
01473                     {
01474                         CV_IMPL_ADD(CV_IMPL_IPP);
01475                         ptrs[0] += bsz*esz1;
01476                         ptrs[1] += bsz*esz1;
01477                         continue;
01478                     }
01479                     setIppErrorStatus();
01480                 }
01481             #endif
01482 
01483                 if( depth == CV_32F )
01484                 {
01485                     float* x0 = (float*)ptrs[0];
01486                     float* x = fbuf ? fbuf : x0;
01487                     float* y = (float*)ptrs[1];
01488 
01489                     if( x != x0 )
01490                         memcpy(x, x0, bsz*esz1);
01491 
01492                     Log_32f(x, y, bsz);
01493                     for( k = 0; k < bsz; k++ )
01494                         y[k] = (float)(y[k]*power);
01495                     Exp_32f(y, y, bsz);
01496                     for( k = 0; k < bsz; k++ )
01497                     {
01498                         if( x0[k] <= 0 )
01499                         {
01500                             if( x0[k] == 0.f )
01501                             {
01502                                 if( power < 0 )
01503                                     y[k] = inf32.f;
01504                             }
01505                             else
01506                                 y[k] = nan32.f;
01507                         }
01508                     }
01509                 }
01510                 else
01511                 {
01512                     double* x0 = (double*)ptrs[0];
01513                     double* x = dbuf ? dbuf : x0;
01514                     double* y = (double*)ptrs[1];
01515 
01516                     if( x != x0 )
01517                         memcpy(x, x0, bsz*esz1);
01518 
01519                     Log_64f(x, y, bsz);
01520                     for( k = 0; k < bsz; k++ )
01521                         y[k] *= power;
01522                     Exp_64f(y, y, bsz);
01523 
01524                     for( k = 0; k < bsz; k++ )
01525                     {
01526                         if( x0[k] <= 0 )
01527                         {
01528                             if( x0[k] == 0. )
01529                             {
01530                                 if( power < 0 )
01531                                     y[k] = inf64.f;
01532                             }
01533                             else
01534                                 y[k] = nan64.f;
01535                         }
01536                     }
01537                 }
01538                 ptrs[0] += bsz*esz1;
01539                 ptrs[1] += bsz*esz1;
01540             }
01541         }
01542     }
01543 }
01544 
01545 void sqrt(InputArray a, OutputArray b)
01546 {
01547     cv::pow(a, 0.5, b);
01548 }
01549 
01550 /************************** CheckArray for NaN's, Inf's *********************************/
01551 
01552 template<int cv_mat_type> struct mat_type_assotiations{};
01553 
01554 template<> struct mat_type_assotiations<CV_8U>
01555 {
01556     typedef unsigned char type;
01557     static const type min_allowable = 0x0;
01558     static const type max_allowable = 0xFF;
01559 };
01560 
01561 template<> struct mat_type_assotiations<CV_8S>
01562 {
01563     typedef signed char type;
01564     static const type min_allowable = SCHAR_MIN;
01565     static const type max_allowable = SCHAR_MAX;
01566 };
01567 
01568 template<> struct mat_type_assotiations<CV_16U>
01569 {
01570     typedef unsigned short type;
01571     static const type min_allowable = 0x0;
01572     static const type max_allowable = USHRT_MAX;
01573 };
01574 template<> struct mat_type_assotiations<CV_16S>
01575 {
01576     typedef signed short type;
01577     static const type min_allowable = SHRT_MIN;
01578     static const type max_allowable = SHRT_MAX;
01579 };
01580 
01581 template<> struct mat_type_assotiations<CV_32S>
01582 {
01583     typedef int type;
01584     static const type min_allowable = (-INT_MAX - 1);
01585     static const type max_allowable = INT_MAX;
01586 };
01587 
01588 template<int depth>
01589 static bool checkIntegerRange(cv::Mat src, Point& bad_pt, int minVal, int maxVal)
01590 {
01591     typedef mat_type_assotiations<depth> type_ass;
01592 
01593     if (minVal < type_ass::min_allowable && maxVal > type_ass::max_allowable)
01594     {
01595         return true;
01596     }
01597     else if (minVal > type_ass::max_allowable || maxVal < type_ass::min_allowable || maxVal < minVal)
01598     {
01599         bad_pt = cv::Point (0,0);
01600         return false;
01601     }
01602     cv::Mat as_one_channel = src.reshape(1,0);
01603 
01604     for (int j = 0; j < as_one_channel.rows; ++j)
01605         for (int i = 0; i < as_one_channel.cols; ++i)
01606         {
01607             typename type_ass::type v = as_one_channel.at<typename type_ass::type>(j ,i);
01608             if (v < minVal || v > maxVal)
01609             {
01610                 bad_pt.y = j;
01611                 bad_pt.x = i / src.channels();
01612                 return false;
01613             }
01614         }
01615 
01616     return true;
01617 }
01618 
01619 typedef bool (*check_range_function)(cv::Mat src, Point& bad_pt, int minVal, int maxVal);
01620 
01621 check_range_function check_range_functions[] =
01622 {
01623     &checkIntegerRange<CV_8U>,
01624     &checkIntegerRange<CV_8S>,
01625     &checkIntegerRange<CV_16U>,
01626     &checkIntegerRange<CV_16S>,
01627     &checkIntegerRange<CV_32S>
01628 };
01629 
01630 bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double maxVal)
01631 {
01632     Mat src = _src.getMat();
01633 
01634     if ( src.dims > 2 )
01635     {
01636         CV_Assert(pt == NULL); // no way to provide location info
01637 
01638         const Mat* arrays[] = {&src, 0};
01639         Mat planes[1];
01640         NAryMatIterator it(arrays, planes);
01641 
01642         for ( size_t i = 0; i < it.nplanes; i++, ++it )
01643         {
01644             if (!checkRange( it.planes[0], quiet, NULL, minVal, maxVal ))
01645             {
01646                 return false;
01647             }
01648         }
01649         return true;
01650     }
01651 
01652     int depth = src.depth();
01653     Point badPt(-1, -1);
01654 
01655     if (depth < CV_32F)
01656     {
01657         int minVali = minVal <= INT_MIN ? INT_MIN : cvFloor(minVal);
01658         int maxVali = maxVal > INT_MAX ? INT_MAX : cvCeil(maxVal) - 1;
01659 
01660         (check_range_functions[depth])(src, badPt, minVali, maxVali);
01661     }
01662     else
01663     {
01664         int i, loc = 0;
01665         int cn = src.channels();
01666         Size size = getContinuousSize( src, cn );
01667 
01668         if( depth == CV_32F )
01669         {
01670             Cv32suf a, b;
01671             int ia, ib;
01672             const int* isrc = src.ptr<int>();
01673             size_t step = src.step/sizeof(isrc[0]);
01674 
01675             a.f = (float)std::max(minVal, (double)-FLT_MAX);
01676             b.f = (float)std::min(maxVal, (double)FLT_MAX);
01677 
01678             ia = CV_TOGGLE_FLT(a.i);
01679             ib = CV_TOGGLE_FLT(b.i);
01680 
01681             for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
01682             {
01683                 for( i = 0; i < size.width; i++ )
01684                 {
01685                     int val = isrc[i];
01686                     val = CV_TOGGLE_FLT(val);
01687 
01688                     if( val < ia || val >= ib )
01689                     {
01690                         int pixelId = (loc + i) / cn;
01691                         badPt = Point(pixelId % src.cols, pixelId / src.cols);
01692                         break;
01693                     }
01694                 }
01695             }
01696         }
01697         else
01698         {
01699             Cv64suf a, b;
01700             int64 ia, ib;
01701             const int64* isrc = src.ptr<int64>();
01702             size_t step = src.step/sizeof(isrc[0]);
01703 
01704             a.f = minVal;
01705             b.f = maxVal;
01706 
01707             ia = CV_TOGGLE_DBL(a.i);
01708             ib = CV_TOGGLE_DBL(b.i);
01709 
01710             for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
01711             {
01712                 for( i = 0; i < size.width; i++ )
01713                 {
01714                     int64 val = isrc[i];
01715                     val = CV_TOGGLE_DBL(val);
01716 
01717                     if( val < ia || val >= ib )
01718                     {
01719                         int pixelId = (loc + i) / cn;
01720                         badPt = Point(pixelId % src.cols, pixelId / src.cols);
01721                         break;
01722                     }
01723                 }
01724             }
01725         }
01726     }
01727 
01728     if( badPt.x >= 0 )
01729     {
01730         if( pt )
01731             *pt = badPt;
01732         if( !quiet )
01733         {
01734             cv::String value_str;
01735             value_str << src(cv::Range(badPt.y, badPt.y + 1), cv::Range(badPt.x, badPt.x + 1));
01736             CV_Error_( CV_StsOutOfRange,
01737             ("the value at (%d, %d)=%s is out of range [%f, %f)", badPt.x, badPt.y, value_str.c_str(), minVal, maxVal));
01738         }
01739         return false;
01740     }
01741     return true;
01742 }
01743 
01744 #ifdef HAVE_OPENCL
01745 
01746 static bool ocl_patchNaNs( InputOutputArray _a, float value )
01747 {
01748     int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
01749     ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
01750                      format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d",
01751                             rowsPerWI));
01752     if (k.empty())
01753         return false;
01754 
01755     UMat a = _a.getUMat();
01756     int cn = a.channels();
01757 
01758     k.args(ocl::KernelArg::ReadOnlyNoSize(a),
01759            ocl::KernelArg::WriteOnly(a, cn), (float)value);
01760 
01761     size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI };
01762     return k.run(2, globalsize, NULL, false);
01763 }
01764 
01765 #endif
01766 
01767 void patchNaNs( InputOutputArray _a, double _val )
01768 {
01769     CV_Assert( _a.depth() == CV_32F );
01770 
01771 #ifdef HAVE_OPENCL
01772     CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
01773                ocl_patchNaNs(_a, (float)_val))
01774 #endif
01775 
01776     Mat a = _a.getMat();
01777     const Mat* arrays[] = {&a, 0};
01778     int* ptrs[1];
01779     NAryMatIterator it(arrays, (uchar**)ptrs);
01780     size_t len = it.size*a.channels();
01781     Cv32suf val;
01782     val.f = (float)_val;
01783 
01784 #if CV_SSE2
01785     __m128i v_mask1 = _mm_set1_epi32(0x7fffffff), v_mask2 = _mm_set1_epi32(0x7f800000);
01786     __m128i v_val = _mm_set1_epi32(val.i);
01787 #elif CV_NEON
01788     int32x4_t v_mask1 = vdupq_n_s32(0x7fffffff), v_mask2 = vdupq_n_s32(0x7f800000),
01789         v_val = vdupq_n_s32(val.i);
01790 #endif
01791 
01792     for( size_t i = 0; i < it.nplanes; i++, ++it )
01793     {
01794         int* tptr = ptrs[0];
01795         size_t j = 0;
01796 
01797 #if CV_SSE2
01798         if (USE_SSE2)
01799         {
01800             for ( ; j + 4 <= len; j += 4)
01801             {
01802                 __m128i v_src = _mm_loadu_si128((__m128i const *)(tptr + j));
01803                 __m128i v_cmp_mask = _mm_cmplt_epi32(v_mask2, _mm_and_si128(v_src, v_mask1));
01804                 __m128i v_res = _mm_or_si128(_mm_andnot_si128(v_cmp_mask, v_src), _mm_and_si128(v_cmp_mask, v_val));
01805                 _mm_storeu_si128((__m128i *)(tptr + j), v_res);
01806             }
01807         }
01808 #elif CV_NEON
01809         for ( ; j + 4 <= len; j += 4)
01810         {
01811             int32x4_t v_src = vld1q_s32(tptr + j);
01812             uint32x4_t v_cmp_mask = vcltq_s32(v_mask2, vandq_s32(v_src, v_mask1));
01813             int32x4_t v_dst = vbslq_s32(v_cmp_mask, v_val, v_src);
01814             vst1q_s32(tptr + j, v_dst);
01815         }
01816 #endif
01817 
01818         for( ; j < len; j++ )
01819             if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
01820                 tptr[j] = val.i;
01821     }
01822 }
01823 
01824 }
01825 
01826 CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); }
01827 CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); }
01828 
01829 CV_IMPL void
01830 cvCartToPolar( const CvArr* xarr, const CvArr* yarr,
01831                CvArr* magarr, CvArr* anglearr,
01832                int angle_in_degrees )
01833 {
01834     cv::Mat X = cv::cvarrToMat(xarr), Y = cv::cvarrToMat(yarr), Mag, Angle;
01835     if( magarr )
01836     {
01837         Mag = cv::cvarrToMat(magarr);
01838         CV_Assert( Mag.size() == X.size() && Mag.type() == X.type() );
01839     }
01840     if( anglearr )
01841     {
01842         Angle = cv::cvarrToMat(anglearr);
01843         CV_Assert( Angle.size() == X.size() && Angle.type() == X.type() );
01844     }
01845     if( magarr )
01846     {
01847         if( anglearr )
01848             cv::cartToPolar( X, Y, Mag, Angle, angle_in_degrees != 0 );
01849         else
01850             cv::magnitude( X, Y, Mag );
01851     }
01852     else
01853         cv::phase( X, Y, Angle, angle_in_degrees != 0 );
01854 }
01855 
01856 CV_IMPL void
01857 cvPolarToCart( const CvArr* magarr, const CvArr* anglearr,
01858                CvArr* xarr, CvArr* yarr, int angle_in_degrees )
01859 {
01860     cv::Mat X, Y, Angle = cv::cvarrToMat(anglearr), Mag;
01861     if( magarr )
01862     {
01863         Mag = cv::cvarrToMat(magarr);
01864         CV_Assert( Mag.size() == Angle.size() && Mag.type() == Angle.type() );
01865     }
01866     if( xarr )
01867     {
01868         X = cv::cvarrToMat(xarr);
01869         CV_Assert( X.size() == Angle.size() && X.type() == Angle.type() );
01870     }
01871     if( yarr )
01872     {
01873         Y = cv::cvarrToMat(yarr);
01874         CV_Assert( Y.size() == Angle.size() && Y.type() == Angle.type() );
01875     }
01876 
01877     cv::polarToCart( Mag, Angle, X, Y, angle_in_degrees != 0 );
01878 }
01879 
01880 CV_IMPL void cvExp( const CvArr* srcarr, CvArr* dstarr )
01881 {
01882     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
01883     CV_Assert( src.type() == dst.type() && src.size == dst.size );
01884     cv::exp( src, dst );
01885 }
01886 
01887 CV_IMPL void cvLog( const CvArr* srcarr, CvArr* dstarr )
01888 {
01889     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
01890     CV_Assert( src.type() == dst.type() && src.size == dst.size );
01891     cv::log( src, dst );
01892 }
01893 
01894 CV_IMPL void cvPow( const CvArr* srcarr, CvArr* dstarr, double power )
01895 {
01896     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
01897     CV_Assert( src.type() == dst.type() && src.size == dst.size );
01898     cv::pow( src, power, dst );
01899 }
01900 
01901 CV_IMPL int cvCheckArr( const CvArr* arr, int flags,
01902                         double minVal, double maxVal )
01903 {
01904     if( (flags & CV_CHECK_RANGE) == 0 )
01905         minVal = -DBL_MAX, maxVal = DBL_MAX;
01906     return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal );
01907 }
01908 
01909 
01910 /*
01911   Finds real roots of cubic, quadratic or linear equation.
01912   The original code has been taken from Ken Turkowski web page
01913   (http://www.worldserver.com/turk/opensource/) and adopted for OpenCV.
01914   Here is the copyright notice.
01915 
01916   -----------------------------------------------------------------------
01917   Copyright (C) 1978-1999 Ken Turkowski. <turk@computer.org>
01918 
01919     All rights reserved.
01920 
01921     Warranty Information
01922       Even though I have reviewed this software, I make no warranty
01923       or representation, either express or implied, with respect to this
01924       software, its quality, accuracy, merchantability, or fitness for a
01925       particular purpose.  As a result, this software is provided "as is,"
01926       and you, its user, are assuming the entire risk as to its quality
01927       and accuracy.
01928 
01929     This code may be used and freely distributed as long as it includes
01930     this copyright notice and the above warranty information.
01931   -----------------------------------------------------------------------
01932 */
01933 
01934 int cv::solveCubic( InputArray _coeffs, OutputArray _roots )
01935 {
01936     const int n0 = 3;
01937     Mat coeffs = _coeffs.getMat();
01938     int ctype = coeffs.type();
01939 
01940     CV_Assert( ctype == CV_32F || ctype == CV_64F );
01941     CV_Assert( (coeffs.size() == Size(n0, 1) ||
01942                 coeffs.size() == Size(n0+1, 1) ||
01943                 coeffs.size() == Size(1, n0) ||
01944                 coeffs.size() == Size(1, n0+1)) );
01945 
01946     _roots.create(n0, 1, ctype, -1, true, _OutputArray::DEPTH_MASK_FLT);
01947     Mat roots = _roots.getMat();
01948 
01949     int i = -1, n = 0;
01950     double a0 = 1., a1, a2, a3;
01951     double x0 = 0., x1 = 0., x2 = 0.;
01952     int ncoeffs = coeffs.rows + coeffs.cols - 1;
01953 
01954     if( ctype == CV_32FC1 )
01955     {
01956         if( ncoeffs == 4 )
01957             a0 = coeffs.at<float>(++i);
01958 
01959         a1 = coeffs.at<float>(i+1);
01960         a2 = coeffs.at<float>(i+2);
01961         a3 = coeffs.at<float>(i+3);
01962     }
01963     else
01964     {
01965         if( ncoeffs == 4 )
01966             a0 = coeffs.at<double>(++i);
01967 
01968         a1 = coeffs.at<double>(i+1);
01969         a2 = coeffs.at<double>(i+2);
01970         a3 = coeffs.at<double>(i+3);
01971     }
01972 
01973     if( a0 == 0 )
01974     {
01975         if( a1 == 0 )
01976         {
01977             if( a2 == 0 )
01978                 n = a3 == 0 ? -1 : 0;
01979             else
01980             {
01981                 // linear equation
01982                 x0 = -a3/a2;
01983                 n = 1;
01984             }
01985         }
01986         else
01987         {
01988             // quadratic equation
01989             double d = a2*a2 - 4*a1*a3;
01990             if( d >= 0 )
01991             {
01992                 d = std::sqrt(d);
01993                 double q1 = (-a2 + d) * 0.5;
01994                 double q2 = (a2 + d) * -0.5;
01995                 if( fabs(q1) > fabs(q2) )
01996                 {
01997                     x0 = q1 / a1;
01998                     x1 = a3 / q1;
01999                 }
02000                 else
02001                 {
02002                     x0 = q2 / a1;
02003                     x1 = a3 / q2;
02004                 }
02005                 n = d > 0 ? 2 : 1;
02006             }
02007         }
02008     }
02009     else
02010     {
02011         a0 = 1./a0;
02012         a1 *= a0;
02013         a2 *= a0;
02014         a3 *= a0;
02015 
02016         double Q = (a1 * a1 - 3 * a2) * (1./9);
02017         double R = (2 * a1 * a1 * a1 - 9 * a1 * a2 + 27 * a3) * (1./54);
02018         double Qcubed = Q * Q * Q;
02019         double d = Qcubed - R * R;
02020 
02021         if( d > 0 )
02022         {
02023             double theta = acos(R / sqrt(Qcubed));
02024             double sqrtQ = sqrt(Q);
02025             double t0 = -2 * sqrtQ;
02026             double t1 = theta * (1./3);
02027             double t2 = a1 * (1./3);
02028             x0 = t0 * cos(t1) - t2;
02029             x1 = t0 * cos(t1 + (2.*CV_PI/3)) - t2;
02030             x2 = t0 * cos(t1 + (4.*CV_PI/3)) - t2;
02031             n = 3;
02032         }
02033         else if( d == 0 )
02034         {
02035             if(R >= 0)
02036             {
02037                 x0 = -2*pow(R, 1./3) - a1/3;
02038                 x1 = pow(R, 1./3) - a1/3;
02039             }
02040             else
02041             {
02042                 x0 = 2*pow(-R, 1./3) - a1/3;
02043                 x1 = -pow(-R, 1./3) - a1/3;
02044             }
02045             x2 = 0;
02046             n = x0 == x1 ? 1 : 2;
02047             x1 = x0 == x1 ? 0 : x1;
02048         }
02049         else
02050         {
02051             double e;
02052             d = sqrt(-d);
02053             e = pow(d + fabs(R), 1./3);
02054             if( R > 0 )
02055                 e = -e;
02056             x0 = (e + Q / e) - a1 * (1./3);
02057             n = 1;
02058         }
02059     }
02060 
02061     if( roots.type() == CV_32FC1 )
02062     {
02063         roots.at<float>(0) = (float)x0;
02064         roots.at<float>(1) = (float)x1;
02065         roots.at<float>(2) = (float)x2;
02066     }
02067     else
02068     {
02069         roots.at<double>(0) = x0;
02070         roots.at<double>(1) = x1;
02071         roots.at<double>(2) = x2;
02072     }
02073 
02074     return n;
02075 }
02076 
02077 /* finds complex roots of a polynomial using Durand-Kerner method:
02078    http://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method */
02079 double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters )
02080 {
02081     typedef Complex<double> C;
02082 
02083     double maxDiff = 0;
02084     int iter, i, j;
02085     Mat coeffs0 = _coeffs0.getMat();
02086     int ctype = _coeffs0.type();
02087     int cdepth = CV_MAT_DEPTH(ctype);
02088 
02089     CV_Assert( CV_MAT_DEPTH(ctype) >= CV_32F && CV_MAT_CN(ctype) <= 2 );
02090     CV_Assert( coeffs0.rows == 1 || coeffs0.cols == 1 );
02091 
02092     int n0 = coeffs0.cols + coeffs0.rows - 2, n = n0;
02093 
02094     _roots0.create(n, 1, CV_MAKETYPE(cdepth, 2), -1, true, _OutputArray::DEPTH_MASK_FLT);
02095     Mat roots0 = _roots0.getMat();
02096 
02097     AutoBuffer<C> buf(n*2+2);
02098     C *coeffs = buf, *roots = coeffs + n + 1;
02099     Mat coeffs1(coeffs0.size(), CV_MAKETYPE(CV_64F, coeffs0.channels()), coeffs0.channels() == 2 ? coeffs : roots);
02100     coeffs0.convertTo(coeffs1, coeffs1.type());
02101     if( coeffs0.channels() == 1 )
02102     {
02103         const double* rcoeffs = (const double*)roots;
02104         for( i = 0; i <= n; i++ )
02105             coeffs[i] = C(rcoeffs[i], 0);
02106     }
02107 
02108     for( ; n > 1; n-- )
02109     {
02110         if( std::abs(coeffs[n].re) + std::abs(coeffs[n].im) > DBL_EPSILON )
02111             break;
02112     }
02113 
02114     C p(1, 0), r(1, 1);
02115 
02116     for( i = 0; i < n; i++ )
02117     {
02118         roots[i] = p;
02119         p = p * r;
02120     }
02121 
02122     maxIters = maxIters <= 0 ? 1000 : maxIters;
02123     for( iter = 0; iter < maxIters; iter++ )
02124     {
02125         maxDiff = 0;
02126         for( i = 0; i < n; i++ )
02127         {
02128             p = roots[i];
02129             C num = coeffs[n], denom = coeffs[n];
02130             int num_same_root = 1;
02131             for( j = 0; j < n; j++ )
02132             {
02133                 num = num*p + coeffs[n-j-1];
02134                 if( j != i )
02135                 {
02136                     if ( (p - roots[j]).re != 0 || (p - roots[j]).im != 0 )
02137                         denom = denom * (p - roots[j]);
02138                     else
02139                         num_same_root++;
02140                 }
02141             }
02142             num /= denom;
02143             if( num_same_root > 1)
02144             {
02145                 double old_num_re = num.re;
02146                 double old_num_im = num.im;
02147                 int square_root_times = num_same_root % 2 == 0 ? num_same_root / 2 : num_same_root / 2 - 1;
02148 
02149                 for( j = 0; j < square_root_times; j++)
02150                 {
02151                     num.re = old_num_re*old_num_re + old_num_im*old_num_im;
02152                     num.re = sqrt(num.re);
02153                     num.re += old_num_re;
02154                     num.im = num.re - old_num_re;
02155                     num.re /= 2;
02156                     num.re = sqrt(num.re);
02157 
02158                     num.im /= 2;
02159                     num.im = sqrt(num.im);
02160                     if( old_num_re < 0 ) num.im = -num.im;
02161                 }
02162 
02163                 if( num_same_root % 2 != 0){
02164                     Mat cube_coefs(4, 1, CV_64FC1);
02165                     Mat cube_roots(3, 1, CV_64FC2);
02166                     cube_coefs.at<double>(3) = -(pow(old_num_re, 3));
02167                     cube_coefs.at<double>(2) = -(15*pow(old_num_re, 2) + 27*pow(old_num_im, 2));
02168                     cube_coefs.at<double>(1) = -48*old_num_re;
02169                     cube_coefs.at<double>(0) = 64;
02170                     solveCubic(cube_coefs, cube_roots);
02171 
02172                     if(cube_roots.at<double>(0) >= 0) num.re = pow(cube_roots.at<double>(0), 1./3);
02173                     else num.re = -pow(-cube_roots.at<double>(0), 1./3);
02174                     num.im = sqrt(pow(num.re, 2) / 3 - old_num_re / (3*num.re));
02175                 }
02176             }
02177             roots[i] = p - num;
02178             maxDiff = std::max(maxDiff, cv::abs(num));
02179         }
02180         if( maxDiff <= 0 )
02181             break;
02182     }
02183 
02184     if( coeffs0.channels() == 1 )
02185     {
02186         const double verySmallEps = 1e-100;
02187         for( i = 0; i < n; i++ )
02188             if( fabs(roots[i].im) < verySmallEps )
02189                 roots[i].im = 0;
02190     }
02191 
02192     for( ; n < n0; n++ )
02193         roots[n+1] = roots[n];
02194 
02195     Mat(roots0.size(), CV_64FC2, roots).convertTo(roots0, roots0.type());
02196     return maxDiff;
02197 }
02198 
02199 
02200 CV_IMPL int
02201 cvSolveCubic( const CvMat* coeffs, CvMat* roots )
02202 {
02203     cv::Mat _coeffs = cv::cvarrToMat(coeffs), _roots = cv::cvarrToMat(roots), _roots0 = _roots;
02204     int nroots = cv::solveCubic(_coeffs, _roots);
02205     CV_Assert( _roots.data == _roots0.data ); // check that the array of roots was not reallocated
02206     return nroots;
02207 }
02208 
02209 
02210 void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int)
02211 {
02212     cv::Mat _a = cv::cvarrToMat(a);
02213     cv::Mat _r = cv::cvarrToMat(r);
02214     cv::Mat _r0 = _r;
02215     cv::solvePoly(_a, _r, maxiter);
02216     CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated
02217 }
02218 
02219 
02220 /* End of file. */
02221