Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
mathfuncs.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #include "precomp.hpp" 00045 #include "opencl_kernels_core.hpp" 00046 #include <limits> 00047 #include <iostream> 00048 00049 namespace cv 00050 { 00051 00052 typedef void (*MathFunc)(const void* src, void* dst, int len); 00053 00054 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI); 00055 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI); 00056 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI); 00057 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI); 00058 00059 #ifdef HAVE_OPENCL 00060 00061 enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 }; 00062 00063 static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGREES", "OP_PHASE_RADIANS", 0 }; 00064 00065 static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop) 00066 { 00067 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 00068 int kercn = oclop == OCL_OP_PHASE_DEGREES || 00069 oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst); 00070 00071 const ocl::Device d = ocl::Device::getDefault(); 00072 bool double_support = d.doubleFPConfig() > 0; 00073 if (!double_support && depth == CV_64F) 00074 return false; 00075 int rowsPerWI = d.isIntel() ? 4 : 1; 00076 00077 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, 00078 format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP", 00079 oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI, 00080 double_support ? " -D DOUBLE_SUPPORT" : "")); 00081 if (k.empty()) 00082 return false; 00083 00084 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); 00085 _dst.create(src1.size(), type); 00086 UMat dst = _dst.getUMat(); 00087 00088 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), 00089 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), 00090 dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn); 00091 00092 if (src2.empty()) 00093 k.args(src1arg, dstarg); 00094 else 00095 k.args(src1arg, src2arg, dstarg); 00096 00097 size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI }; 00098 return k.run(2, globalsize, 0, false); 00099 } 00100 00101 #endif 00102 00103 float fastAtan2( float y, float x ) 00104 { 00105 float ax = std::abs(x), ay = std::abs(y); 00106 float a, c, c2; 00107 if( ax >= ay ) 00108 { 00109 c = ay/(ax + (float)DBL_EPSILON); 00110 c2 = c*c; 00111 a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; 00112 } 00113 else 00114 { 00115 c = ax/(ay + (float)DBL_EPSILON); 00116 c2 = c*c; 00117 a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; 00118 } 00119 if( x < 0 ) 00120 a = 180.f - a; 00121 if( y < 0 ) 00122 a = 360.f - a; 00123 return a; 00124 } 00125 00126 /* ************************************************************************** *\ 00127 Fast cube root by Ken Turkowski 00128 (http://www.worldserver.com/turk/computergraphics/papers.html) 00129 \* ************************************************************************** */ 00130 float cubeRoot( float value ) 00131 { 00132 float fr; 00133 Cv32suf v, m; 00134 int ix, s; 00135 int ex, shx; 00136 00137 v.f = value; 00138 ix = v.i & 0x7fffffff; 00139 s = v.i & 0x80000000; 00140 ex = (ix >> 23) - 127; 00141 shx = ex % 3; 00142 shx -= shx >= 0 ? 3 : 0; 00143 ex = (ex - shx) / 3; /* exponent of cube root */ 00144 v.i = (ix & ((1<<23)-1)) | ((shx + 127)<<23); 00145 fr = v.f; 00146 00147 /* 0.125 <= fr < 1.0 */ 00148 /* Use quartic rational polynomial with error < 2^(-24) */ 00149 fr = (float)(((((45.2548339756803022511987494 * fr + 00150 192.2798368355061050458134625) * fr + 00151 119.1654824285581628956914143) * fr + 00152 13.43250139086239872172837314) * fr + 00153 0.1636161226585754240958355063)/ 00154 ((((14.80884093219134573786480845 * fr + 00155 151.9714051044435648658557668) * fr + 00156 168.5254414101568283957668343) * fr + 00157 33.9905941350215598754191872) * fr + 00158 1.0)); 00159 00160 /* fr *= 2^ex * sign */ 00161 m.f = value; 00162 v.f = fr; 00163 v.i = (v.i + (ex << 23) + s) & (m.i*2 != 0 ? -1 : 0); 00164 return v.f; 00165 } 00166 00167 /****************************************************************************************\ 00168 * Cartezian -> Polar * 00169 \****************************************************************************************/ 00170 00171 void magnitude( InputArray src1, InputArray src2, OutputArray dst ) 00172 { 00173 int type = src1.type(), depth = src1.depth(), cn = src1.channels(); 00174 CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F)); 00175 00176 #ifdef HAVE_OPENCL 00177 CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2, 00178 ocl_math_op(src1, src2, dst, OCL_OP_MAG)) 00179 #endif 00180 00181 Mat X = src1.getMat(), Y = src2.getMat(); 00182 dst.create(X.dims, X.size, X.type()); 00183 Mat Mag = dst.getMat(); 00184 00185 const Mat* arrays[] = {&X, &Y, &Mag, 0}; 00186 uchar* ptrs[3]; 00187 NAryMatIterator it(arrays, ptrs); 00188 int len = (int)it.size*cn; 00189 00190 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00191 { 00192 if( depth == CV_32F ) 00193 { 00194 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; 00195 float *mag = (float*)ptrs[2]; 00196 hal::magnitude32f( x, y, mag, len ); 00197 } 00198 else 00199 { 00200 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; 00201 double *mag = (double*)ptrs[2]; 00202 hal::magnitude64f( x, y, mag, len ); 00203 } 00204 } 00205 } 00206 00207 00208 void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees ) 00209 { 00210 int type = src1.type(), depth = src1.depth(), cn = src1.channels(); 00211 CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F)); 00212 00213 #ifdef HAVE_OPENCL 00214 CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2, 00215 ocl_math_op(src1, src2, dst, angleInDegrees ? OCL_OP_PHASE_DEGREES : OCL_OP_PHASE_RADIANS)) 00216 #endif 00217 00218 Mat X = src1.getMat(), Y = src2.getMat(); 00219 dst.create( X.dims, X.size, type ); 00220 Mat Angle = dst.getMat(); 00221 00222 const Mat* arrays[] = {&X, &Y, &Angle, 0}; 00223 uchar* ptrs[3]; 00224 NAryMatIterator it(arrays, ptrs); 00225 cv::AutoBuffer<float> _buf; 00226 float* buf[2] = {0, 0}; 00227 int j, k, total = (int)(it.size*cn), blockSize = total; 00228 size_t esz1 = X.elemSize1(); 00229 00230 if( depth == CV_64F ) 00231 { 00232 blockSize = std::min(blockSize, ((BLOCK_SIZE+cn-1)/cn)*cn); 00233 _buf.allocate(blockSize*2); 00234 buf[0] = _buf; 00235 buf[1] = buf[0] + blockSize; 00236 } 00237 00238 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00239 { 00240 for( j = 0; j < total; j += blockSize ) 00241 { 00242 int len = std::min(total - j, blockSize); 00243 if( depth == CV_32F ) 00244 { 00245 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; 00246 float *angle = (float*)ptrs[2]; 00247 hal::fastAtan2( y, x, angle, len, angleInDegrees ); 00248 } 00249 else 00250 { 00251 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; 00252 double *angle = (double*)ptrs[2]; 00253 k = 0; 00254 00255 #if CV_SSE2 00256 if (USE_SSE2) 00257 { 00258 for ( ; k <= len - 4; k += 4) 00259 { 00260 __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)), 00261 _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2))); 00262 __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)), 00263 _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2))); 00264 00265 _mm_storeu_ps(buf[0] + k, v_dst0); 00266 _mm_storeu_ps(buf[1] + k, v_dst1); 00267 } 00268 } 00269 #endif 00270 00271 for( ; k < len; k++ ) 00272 { 00273 buf[0][k] = (float)x[k]; 00274 buf[1][k] = (float)y[k]; 00275 } 00276 00277 hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees ); 00278 k = 0; 00279 00280 #if CV_SSE2 00281 if (USE_SSE2) 00282 { 00283 for ( ; k <= len - 4; k += 4) 00284 { 00285 __m128 v_src = _mm_loadu_ps(buf[0] + k); 00286 _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src)); 00287 _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8)))); 00288 } 00289 } 00290 #endif 00291 00292 for( ; k < len; k++ ) 00293 angle[k] = buf[0][k]; 00294 } 00295 ptrs[0] += len*esz1; 00296 ptrs[1] += len*esz1; 00297 ptrs[2] += len*esz1; 00298 } 00299 } 00300 } 00301 00302 #ifdef HAVE_OPENCL 00303 00304 static bool ocl_cartToPolar( InputArray _src1, InputArray _src2, 00305 OutputArray _dst1, OutputArray _dst2, bool angleInDegrees ) 00306 { 00307 const ocl::Device & d = ocl::Device::getDefault(); 00308 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), 00309 rowsPerWI = d.isIntel() ? 4 : 1; 00310 bool doubleSupport = d.doubleFPConfig() > 0; 00311 00312 if ( !(_src1.dims() <= 2 && _src2.dims() <= 2 && 00313 (depth == CV_32F || depth == CV_64F) && type == _src2.type()) || 00314 (depth == CV_64F && !doubleSupport) ) 00315 return false; 00316 00317 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, 00318 format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s", 00319 ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), 00320 depth, rowsPerWI, angleInDegrees ? "AD" : "AR", 00321 doubleSupport ? " -D DOUBLE_SUPPORT" : "")); 00322 if (k.empty()) 00323 return false; 00324 00325 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(); 00326 Size size = src1.size(); 00327 CV_Assert( size == src2.size() ); 00328 00329 _dst1.create(size, type); 00330 _dst2.create(size, type); 00331 UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat(); 00332 00333 k.args(ocl::KernelArg::ReadOnlyNoSize(src1), 00334 ocl::KernelArg::ReadOnlyNoSize(src2), 00335 ocl::KernelArg::WriteOnly(dst1, cn), 00336 ocl::KernelArg::WriteOnlyNoSize(dst2)); 00337 00338 size_t globalsize[2] = { (size_t)dst1.cols * cn, ((size_t)dst1.rows + rowsPerWI - 1) / rowsPerWI }; 00339 return k.run(2, globalsize, NULL, false); 00340 } 00341 00342 #endif 00343 00344 void cartToPolar( InputArray src1, InputArray src2, 00345 OutputArray dst1, OutputArray dst2, bool angleInDegrees ) 00346 { 00347 #ifdef HAVE_OPENCL 00348 CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(), 00349 ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees)) 00350 #endif 00351 00352 Mat X = src1.getMat(), Y = src2.getMat(); 00353 int type = X.type(), depth = X.depth(), cn = X.channels(); 00354 CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F)); 00355 dst1.create( X.dims, X.size, type ); 00356 dst2.create( X.dims, X.size, type ); 00357 Mat Mag = dst1.getMat(), Angle = dst2.getMat(); 00358 00359 const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0}; 00360 uchar* ptrs[4]; 00361 NAryMatIterator it(arrays, ptrs); 00362 cv::AutoBuffer<float> _buf; 00363 float* buf[2] = {0, 0}; 00364 int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn); 00365 size_t esz1 = X.elemSize1(); 00366 00367 if( depth == CV_64F ) 00368 { 00369 _buf.allocate(blockSize*2); 00370 buf[0] = _buf; 00371 buf[1] = buf[0] + blockSize; 00372 } 00373 00374 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00375 { 00376 for( j = 0; j < total; j += blockSize ) 00377 { 00378 int len = std::min(total - j, blockSize); 00379 if( depth == CV_32F ) 00380 { 00381 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1]; 00382 float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3]; 00383 hal::magnitude32f( x, y, mag, len ); 00384 hal::fastAtan2( y, x, angle, len, angleInDegrees ); 00385 } 00386 else 00387 { 00388 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1]; 00389 double *angle = (double*)ptrs[3]; 00390 00391 hal::magnitude64f(x, y, (double*)ptrs[2], len); 00392 k = 0; 00393 00394 #if CV_SSE2 00395 if (USE_SSE2) 00396 { 00397 for ( ; k <= len - 4; k += 4) 00398 { 00399 __m128 v_dst0 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(x + k)), 00400 _mm_cvtpd_ps(_mm_loadu_pd(x + k + 2))); 00401 __m128 v_dst1 = _mm_movelh_ps(_mm_cvtpd_ps(_mm_loadu_pd(y + k)), 00402 _mm_cvtpd_ps(_mm_loadu_pd(y + k + 2))); 00403 00404 _mm_storeu_ps(buf[0] + k, v_dst0); 00405 _mm_storeu_ps(buf[1] + k, v_dst1); 00406 } 00407 } 00408 #endif 00409 00410 for( ; k < len; k++ ) 00411 { 00412 buf[0][k] = (float)x[k]; 00413 buf[1][k] = (float)y[k]; 00414 } 00415 00416 hal::fastAtan2( buf[1], buf[0], buf[0], len, angleInDegrees ); 00417 k = 0; 00418 00419 #if CV_SSE2 00420 if (USE_SSE2) 00421 { 00422 for ( ; k <= len - 4; k += 4) 00423 { 00424 __m128 v_src = _mm_loadu_ps(buf[0] + k); 00425 _mm_storeu_pd(angle + k, _mm_cvtps_pd(v_src)); 00426 _mm_storeu_pd(angle + k + 2, _mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8)))); 00427 } 00428 } 00429 #endif 00430 00431 for( ; k < len; k++ ) 00432 angle[k] = buf[0][k]; 00433 } 00434 ptrs[0] += len*esz1; 00435 ptrs[1] += len*esz1; 00436 ptrs[2] += len*esz1; 00437 ptrs[3] += len*esz1; 00438 } 00439 } 00440 } 00441 00442 00443 /****************************************************************************************\ 00444 * Polar -> Cartezian * 00445 \****************************************************************************************/ 00446 00447 static void SinCos_32f( const float *angle, float *sinval, float* cosval, 00448 int len, int angle_in_degrees ) 00449 { 00450 const int N = 64; 00451 00452 static const double sin_table[] = 00453 { 00454 0.00000000000000000000, 0.09801714032956060400, 00455 0.19509032201612825000, 0.29028467725446233000, 00456 0.38268343236508978000, 0.47139673682599764000, 00457 0.55557023301960218000, 0.63439328416364549000, 00458 0.70710678118654746000, 0.77301045336273699000, 00459 0.83146961230254524000, 0.88192126434835494000, 00460 0.92387953251128674000, 0.95694033573220894000, 00461 0.98078528040323043000, 0.99518472667219682000, 00462 1.00000000000000000000, 0.99518472667219693000, 00463 0.98078528040323043000, 0.95694033573220894000, 00464 0.92387953251128674000, 0.88192126434835505000, 00465 0.83146961230254546000, 0.77301045336273710000, 00466 0.70710678118654757000, 0.63439328416364549000, 00467 0.55557023301960218000, 0.47139673682599786000, 00468 0.38268343236508989000, 0.29028467725446239000, 00469 0.19509032201612861000, 0.09801714032956082600, 00470 0.00000000000000012246, -0.09801714032956059000, 00471 -0.19509032201612836000, -0.29028467725446211000, 00472 -0.38268343236508967000, -0.47139673682599764000, 00473 -0.55557023301960196000, -0.63439328416364527000, 00474 -0.70710678118654746000, -0.77301045336273666000, 00475 -0.83146961230254524000, -0.88192126434835494000, 00476 -0.92387953251128652000, -0.95694033573220882000, 00477 -0.98078528040323032000, -0.99518472667219693000, 00478 -1.00000000000000000000, -0.99518472667219693000, 00479 -0.98078528040323043000, -0.95694033573220894000, 00480 -0.92387953251128663000, -0.88192126434835505000, 00481 -0.83146961230254546000, -0.77301045336273688000, 00482 -0.70710678118654768000, -0.63439328416364593000, 00483 -0.55557023301960218000, -0.47139673682599792000, 00484 -0.38268343236509039000, -0.29028467725446250000, 00485 -0.19509032201612872000, -0.09801714032956050600, 00486 }; 00487 00488 static const double k2 = (2*CV_PI)/N; 00489 00490 static const double sin_a0 = -0.166630293345647*k2*k2*k2; 00491 static const double sin_a2 = k2; 00492 00493 static const double cos_a0 = -0.499818138450326*k2*k2; 00494 /*static const double cos_a2 = 1;*/ 00495 00496 double k1; 00497 int i = 0; 00498 00499 if( !angle_in_degrees ) 00500 k1 = N/(2*CV_PI); 00501 else 00502 k1 = N/360.; 00503 00504 #if CV_AVX2 00505 if (USE_AVX2) 00506 { 00507 __m128d v_k1 = _mm_set1_pd(k1); 00508 __m128d v_1 = _mm_set1_pd(1); 00509 __m128i v_N1 = _mm_set1_epi32(N - 1); 00510 __m128i v_N4 = _mm_set1_epi32(N >> 2); 00511 __m128d v_sin_a0 = _mm_set1_pd(sin_a0); 00512 __m128d v_sin_a2 = _mm_set1_pd(sin_a2); 00513 __m128d v_cos_a0 = _mm_set1_pd(cos_a0); 00514 00515 for ( ; i <= len - 4; i += 4) 00516 { 00517 __m128 v_angle = _mm_loadu_ps(angle + i); 00518 00519 // 0-1 00520 __m128d v_t = _mm_mul_pd(_mm_cvtps_pd(v_angle), v_k1); 00521 __m128i v_it = _mm_cvtpd_epi32(v_t); 00522 v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it)); 00523 00524 __m128i v_sin_idx = _mm_and_si128(v_it, v_N1); 00525 __m128i v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1); 00526 00527 __m128d v_t2 = _mm_mul_pd(v_t, v_t); 00528 __m128d v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t); 00529 __m128d v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1); 00530 00531 __m128d v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8); 00532 __m128d v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8); 00533 00534 __m128d v_sin_val_0 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b), 00535 _mm_mul_pd(v_cos_a, v_sin_b)); 00536 __m128d v_cos_val_0 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b), 00537 _mm_mul_pd(v_sin_a, v_sin_b)); 00538 00539 // 2-3 00540 v_t = _mm_mul_pd(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_angle), 8))), v_k1); 00541 v_it = _mm_cvtpd_epi32(v_t); 00542 v_t = _mm_sub_pd(v_t, _mm_cvtepi32_pd(v_it)); 00543 00544 v_sin_idx = _mm_and_si128(v_it, v_N1); 00545 v_cos_idx = _mm_and_si128(_mm_sub_epi32(v_N4, v_sin_idx), v_N1); 00546 00547 v_t2 = _mm_mul_pd(v_t, v_t); 00548 v_sin_b = _mm_mul_pd(_mm_add_pd(_mm_mul_pd(v_sin_a0, v_t2), v_sin_a2), v_t); 00549 v_cos_b = _mm_add_pd(_mm_mul_pd(v_cos_a0, v_t2), v_1); 00550 00551 v_sin_a = _mm_i32gather_pd(sin_table, v_sin_idx, 8); 00552 v_cos_a = _mm_i32gather_pd(sin_table, v_cos_idx, 8); 00553 00554 __m128d v_sin_val_1 = _mm_add_pd(_mm_mul_pd(v_sin_a, v_cos_b), 00555 _mm_mul_pd(v_cos_a, v_sin_b)); 00556 __m128d v_cos_val_1 = _mm_sub_pd(_mm_mul_pd(v_cos_a, v_cos_b), 00557 _mm_mul_pd(v_sin_a, v_sin_b)); 00558 00559 _mm_storeu_ps(sinval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_sin_val_0), 00560 _mm_cvtpd_ps(v_sin_val_1))); 00561 _mm_storeu_ps(cosval + i, _mm_movelh_ps(_mm_cvtpd_ps(v_cos_val_0), 00562 _mm_cvtpd_ps(v_cos_val_1))); 00563 } 00564 } 00565 #endif 00566 00567 for( ; i < len; i++ ) 00568 { 00569 double t = angle[i]*k1; 00570 int it = cvRound(t); 00571 t -= it; 00572 int sin_idx = it & (N - 1); 00573 int cos_idx = (N/4 - sin_idx) & (N - 1); 00574 00575 double sin_b = (sin_a0*t*t + sin_a2)*t; 00576 double cos_b = cos_a0*t*t + 1; 00577 00578 double sin_a = sin_table[sin_idx]; 00579 double cos_a = sin_table[cos_idx]; 00580 00581 double sin_val = sin_a*cos_b + cos_a*sin_b; 00582 double cos_val = cos_a*cos_b - sin_a*sin_b; 00583 00584 sinval[i] = (float)sin_val; 00585 cosval[i] = (float)cos_val; 00586 } 00587 } 00588 00589 00590 #ifdef HAVE_OPENCL 00591 00592 static bool ocl_polarToCart( InputArray _mag, InputArray _angle, 00593 OutputArray _dst1, OutputArray _dst2, bool angleInDegrees ) 00594 { 00595 const ocl::Device & d = ocl::Device::getDefault(); 00596 int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), 00597 rowsPerWI = d.isIntel() ? 4 : 1; 00598 bool doubleSupport = d.doubleFPConfig() > 0; 00599 00600 if ( !doubleSupport && depth == CV_64F ) 00601 return false; 00602 00603 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, 00604 format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s", 00605 ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI, 00606 depth, angleInDegrees ? "AD" : "AR", 00607 doubleSupport ? " -D DOUBLE_SUPPORT" : "")); 00608 if (k.empty()) 00609 return false; 00610 00611 UMat mag = _mag.getUMat(), angle = _angle.getUMat(); 00612 Size size = angle.size(); 00613 CV_Assert(mag.size() == size); 00614 00615 _dst1.create(size, type); 00616 _dst2.create(size, type); 00617 UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat(); 00618 00619 k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle), 00620 ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2)); 00621 00622 size_t globalsize[2] = { (size_t)dst1.cols * cn, ((size_t)dst1.rows + rowsPerWI - 1) / rowsPerWI }; 00623 return k.run(2, globalsize, NULL, false); 00624 } 00625 00626 #endif 00627 00628 void polarToCart( InputArray src1, InputArray src2, 00629 OutputArray dst1, OutputArray dst2, bool angleInDegrees ) 00630 { 00631 int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 00632 CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type)); 00633 00634 #ifdef HAVE_OPENCL 00635 CV_OCL_RUN(!src1.empty() && src2.dims() <= 2 && dst1.isUMat() && dst2.isUMat(), 00636 ocl_polarToCart(src1, src2, dst1, dst2, angleInDegrees)) 00637 #endif 00638 00639 Mat Mag = src1.getMat(), Angle = src2.getMat(); 00640 CV_Assert( Mag.empty() || Angle.size == Mag.size); 00641 dst1.create( Angle.dims, Angle.size, type ); 00642 dst2.create( Angle.dims, Angle.size, type ); 00643 Mat X = dst1.getMat(), Y = dst2.getMat(); 00644 00645 #if defined(HAVE_IPP) 00646 CV_IPP_CHECK() 00647 { 00648 if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees) 00649 { 00650 typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase, 00651 void * pDstRe, void * pDstIm, int len); 00652 ippsPolarToCart ippFunc = 00653 depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f : 00654 depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0; 00655 CV_Assert(ippFunc != 0); 00656 00657 IppStatus status = ippFunc(Mag.ptr(), Angle.ptr(), X.ptr(), Y.ptr(), static_cast<int>(cn * X.total())); 00658 if (status >= 0) 00659 { 00660 CV_IMPL_ADD(CV_IMPL_IPP); 00661 return; 00662 } 00663 setIppErrorStatus(); 00664 } 00665 } 00666 #endif 00667 00668 const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0}; 00669 uchar* ptrs[4]; 00670 NAryMatIterator it(arrays, ptrs); 00671 cv::AutoBuffer<float> _buf; 00672 float* buf[2] = {0, 0}; 00673 int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn); 00674 size_t esz1 = Angle.elemSize1(); 00675 00676 if( depth == CV_64F ) 00677 { 00678 _buf.allocate(blockSize*2); 00679 buf[0] = _buf; 00680 buf[1] = buf[0] + blockSize; 00681 } 00682 00683 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00684 { 00685 for( j = 0; j < total; j += blockSize ) 00686 { 00687 int len = std::min(total - j, blockSize); 00688 if( depth == CV_32F ) 00689 { 00690 const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1]; 00691 float *x = (float*)ptrs[2], *y = (float*)ptrs[3]; 00692 00693 SinCos_32f( angle, y, x, len, angleInDegrees ); 00694 if( mag ) 00695 { 00696 k = 0; 00697 00698 #if CV_NEON 00699 for( ; k <= len - 4; k += 4 ) 00700 { 00701 float32x4_t v_m = vld1q_f32(mag + k); 00702 vst1q_f32(x + k, vmulq_f32(vld1q_f32(x + k), v_m)); 00703 vst1q_f32(y + k, vmulq_f32(vld1q_f32(y + k), v_m)); 00704 } 00705 #elif CV_SSE2 00706 if (USE_SSE2) 00707 { 00708 for( ; k <= len - 4; k += 4 ) 00709 { 00710 __m128 v_m = _mm_loadu_ps(mag + k); 00711 _mm_storeu_ps(x + k, _mm_mul_ps(_mm_loadu_ps(x + k), v_m)); 00712 _mm_storeu_ps(y + k, _mm_mul_ps(_mm_loadu_ps(y + k), v_m)); 00713 } 00714 } 00715 #endif 00716 00717 for( ; k < len; k++ ) 00718 { 00719 float m = mag[k]; 00720 x[k] *= m; y[k] *= m; 00721 } 00722 } 00723 } 00724 else 00725 { 00726 const double *mag = (const double*)ptrs[0], *angle = (const double*)ptrs[1]; 00727 double *x = (double*)ptrs[2], *y = (double*)ptrs[3]; 00728 00729 for( k = 0; k < len; k++ ) 00730 buf[0][k] = (float)angle[k]; 00731 00732 SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees ); 00733 if( mag ) 00734 for( k = 0; k < len; k++ ) 00735 { 00736 double m = mag[k]; 00737 x[k] = buf[0][k]*m; y[k] = buf[1][k]*m; 00738 } 00739 else 00740 { 00741 std::memcpy(x, buf[0], sizeof(float) * len); 00742 std::memcpy(y, buf[1], sizeof(float) * len); 00743 } 00744 } 00745 00746 if( ptrs[0] ) 00747 ptrs[0] += len*esz1; 00748 ptrs[1] += len*esz1; 00749 ptrs[2] += len*esz1; 00750 ptrs[3] += len*esz1; 00751 } 00752 } 00753 } 00754 00755 /****************************************************************************************\ 00756 * E X P * 00757 \****************************************************************************************/ 00758 00759 #ifdef HAVE_IPP 00760 static void Exp_32f_ipp(const float *x, float *y, int n) 00761 { 00762 CV_IPP_CHECK() 00763 { 00764 if (0 <= ippsExp_32f_A21(x, y, n)) 00765 { 00766 CV_IMPL_ADD(CV_IMPL_IPP); 00767 return; 00768 } 00769 setIppErrorStatus(); 00770 } 00771 hal::exp32f(x, y, n); 00772 } 00773 00774 static void Exp_64f_ipp(const double *x, double *y, int n) 00775 { 00776 CV_IPP_CHECK() 00777 { 00778 if (0 <= ippsExp_64f_A50(x, y, n)) 00779 { 00780 CV_IMPL_ADD(CV_IMPL_IPP); 00781 return; 00782 } 00783 setIppErrorStatus(); 00784 } 00785 hal::exp64f(x, y, n); 00786 } 00787 00788 #define Exp_32f Exp_32f_ipp 00789 #define Exp_64f Exp_64f_ipp 00790 #else 00791 #define Exp_32f hal::exp32f 00792 #define Exp_64f hal::exp64f 00793 #endif 00794 00795 00796 void exp( InputArray _src, OutputArray _dst ) 00797 { 00798 int type = _src.type(), depth = _src.depth(), cn = _src.channels(); 00799 CV_Assert( depth == CV_32F || depth == CV_64F ); 00800 00801 #ifdef HAVE_OPENCL 00802 CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, 00803 ocl_math_op(_src, noArray(), _dst, OCL_OP_EXP)) 00804 #endif 00805 00806 Mat src = _src.getMat(); 00807 _dst.create( src.dims, src.size, type ); 00808 Mat dst = _dst.getMat(); 00809 00810 const Mat* arrays[] = {&src, &dst, 0}; 00811 uchar* ptrs[2]; 00812 NAryMatIterator it(arrays, ptrs); 00813 int len = (int)(it.size*cn); 00814 00815 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00816 { 00817 if( depth == CV_32F ) 00818 Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len); 00819 else 00820 Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len); 00821 } 00822 } 00823 00824 00825 /****************************************************************************************\ 00826 * L O G * 00827 \****************************************************************************************/ 00828 00829 #ifdef HAVE_IPP 00830 static void Log_32f_ipp(const float *x, float *y, int n) 00831 { 00832 CV_IPP_CHECK() 00833 { 00834 if (0 <= ippsLn_32f_A21(x, y, n)) 00835 { 00836 CV_IMPL_ADD(CV_IMPL_IPP); 00837 return; 00838 } 00839 setIppErrorStatus(); 00840 } 00841 hal::log32f(x, y, n); 00842 } 00843 00844 static void Log_64f_ipp(const double *x, double *y, int n) 00845 { 00846 CV_IPP_CHECK() 00847 { 00848 if (0 <= ippsLn_64f_A50(x, y, n)) 00849 { 00850 CV_IMPL_ADD(CV_IMPL_IPP); 00851 return; 00852 } 00853 setIppErrorStatus(); 00854 } 00855 hal::log64f(x, y, n); 00856 } 00857 00858 #define Log_32f Log_32f_ipp 00859 #define Log_64f Log_64f_ipp 00860 #else 00861 #define Log_32f hal::log32f 00862 #define Log_64f hal::log64f 00863 #endif 00864 00865 void log( InputArray _src, OutputArray _dst ) 00866 { 00867 int type = _src.type(), depth = _src.depth(), cn = _src.channels(); 00868 CV_Assert( depth == CV_32F || depth == CV_64F ); 00869 00870 #ifdef HAVE_OPENCL 00871 CV_OCL_RUN( _dst.isUMat() && _src.dims() <= 2, 00872 ocl_math_op(_src, noArray(), _dst, OCL_OP_LOG)) 00873 #endif 00874 00875 Mat src = _src.getMat(); 00876 _dst.create( src.dims, src.size, type ); 00877 Mat dst = _dst.getMat(); 00878 00879 const Mat* arrays[] = {&src, &dst, 0}; 00880 uchar* ptrs[2]; 00881 NAryMatIterator it(arrays, ptrs); 00882 int len = (int)(it.size*cn); 00883 00884 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00885 { 00886 if( depth == CV_32F ) 00887 Log_32f( (const float*)ptrs[0], (float*)ptrs[1], len ); 00888 else 00889 Log_64f( (const double*)ptrs[0], (double*)ptrs[1], len ); 00890 } 00891 } 00892 00893 /****************************************************************************************\ 00894 * P O W E R * 00895 \****************************************************************************************/ 00896 00897 template <typename T, typename WT> 00898 struct iPow_SIMD 00899 { 00900 int operator() ( const T *, T *, int, int) 00901 { 00902 return 0; 00903 } 00904 }; 00905 00906 #if CV_SIMD128 00907 00908 template <> 00909 struct iPow_SIMD<uchar, int> 00910 { 00911 int operator() ( const uchar * src, uchar * dst, int len, int power ) 00912 { 00913 int i = 0; 00914 v_uint32x4 v_1 = v_setall_u32(1u); 00915 00916 for ( ; i <= len - 8; i += 8) 00917 { 00918 v_uint32x4 v_a1 = v_1, v_a2 = v_1; 00919 v_uint16x8 v = v_load_expand(src + i); 00920 v_uint32x4 v_b1, v_b2; 00921 v_expand(v, v_b1, v_b2); 00922 int p = power; 00923 00924 while( p > 1 ) 00925 { 00926 if (p & 1) 00927 { 00928 v_a1 *= v_b1; 00929 v_a2 *= v_b2; 00930 } 00931 v_b1 *= v_b1; 00932 v_b2 *= v_b2; 00933 p >>= 1; 00934 } 00935 00936 v_a1 *= v_b1; 00937 v_a2 *= v_b2; 00938 00939 v = v_pack(v_a1, v_a2); 00940 v_pack_store(dst + i, v); 00941 } 00942 00943 return i; 00944 } 00945 }; 00946 00947 template <> 00948 struct iPow_SIMD<schar, int> 00949 { 00950 int operator() ( const schar * src, schar * dst, int len, int power) 00951 { 00952 int i = 0; 00953 v_int32x4 v_1 = v_setall_s32(1); 00954 00955 for ( ; i <= len - 8; i += 8) 00956 { 00957 v_int32x4 v_a1 = v_1, v_a2 = v_1; 00958 v_int16x8 v = v_load_expand(src + i); 00959 v_int32x4 v_b1, v_b2; 00960 v_expand(v, v_b1, v_b2); 00961 int p = power; 00962 00963 while( p > 1 ) 00964 { 00965 if (p & 1) 00966 { 00967 v_a1 *= v_b1; 00968 v_a2 *= v_b2; 00969 } 00970 v_b1 *= v_b1; 00971 v_b2 *= v_b2; 00972 p >>= 1; 00973 } 00974 00975 v_a1 *= v_b1; 00976 v_a2 *= v_b2; 00977 00978 v = v_pack(v_a1, v_a2); 00979 v_pack_store(dst + i, v); 00980 } 00981 00982 return i; 00983 } 00984 }; 00985 00986 template <> 00987 struct iPow_SIMD<ushort, int> 00988 { 00989 int operator() ( const ushort * src, ushort * dst, int len, int power) 00990 { 00991 int i = 0; 00992 v_uint32x4 v_1 = v_setall_u32(1u); 00993 00994 for ( ; i <= len - 8; i += 8) 00995 { 00996 v_uint32x4 v_a1 = v_1, v_a2 = v_1; 00997 v_uint16x8 v = v_load(src + i); 00998 v_uint32x4 v_b1, v_b2; 00999 v_expand(v, v_b1, v_b2); 01000 int p = power; 01001 01002 while( p > 1 ) 01003 { 01004 if (p & 1) 01005 { 01006 v_a1 *= v_b1; 01007 v_a2 *= v_b2; 01008 } 01009 v_b1 *= v_b1; 01010 v_b2 *= v_b2; 01011 p >>= 1; 01012 } 01013 01014 v_a1 *= v_b1; 01015 v_a2 *= v_b2; 01016 01017 v = v_pack(v_a1, v_a2); 01018 v_store(dst + i, v); 01019 } 01020 01021 return i; 01022 } 01023 }; 01024 01025 template <> 01026 struct iPow_SIMD<short, int> 01027 { 01028 int operator() ( const short * src, short * dst, int len, int power) 01029 { 01030 int i = 0; 01031 v_int32x4 v_1 = v_setall_s32(1); 01032 01033 for ( ; i <= len - 8; i += 8) 01034 { 01035 v_int32x4 v_a1 = v_1, v_a2 = v_1; 01036 v_int16x8 v = v_load(src + i); 01037 v_int32x4 v_b1, v_b2; 01038 v_expand(v, v_b1, v_b2); 01039 int p = power; 01040 01041 while( p > 1 ) 01042 { 01043 if (p & 1) 01044 { 01045 v_a1 *= v_b1; 01046 v_a2 *= v_b2; 01047 } 01048 v_b1 *= v_b1; 01049 v_b2 *= v_b2; 01050 p >>= 1; 01051 } 01052 01053 v_a1 *= v_b1; 01054 v_a2 *= v_b2; 01055 01056 v = v_pack(v_a1, v_a2); 01057 v_store(dst + i, v); 01058 } 01059 01060 return i; 01061 } 01062 }; 01063 01064 template <> 01065 struct iPow_SIMD<int, int> 01066 { 01067 int operator() ( const int * src, int * dst, int len, int power) 01068 { 01069 int i = 0; 01070 v_int32x4 v_1 = v_setall_s32(1); 01071 01072 for ( ; i <= len - 8; i += 8) 01073 { 01074 v_int32x4 v_a1 = v_1, v_a2 = v_1; 01075 v_int32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4); 01076 int p = power; 01077 01078 while( p > 1 ) 01079 { 01080 if (p & 1) 01081 { 01082 v_a1 *= v_b1; 01083 v_a2 *= v_b2; 01084 } 01085 v_b1 *= v_b1; 01086 v_b2 *= v_b2; 01087 p >>= 1; 01088 } 01089 01090 v_a1 *= v_b1; 01091 v_a2 *= v_b2; 01092 01093 v_store(dst + i, v_a1); 01094 v_store(dst + i + 4, v_a2); 01095 } 01096 01097 return i; 01098 } 01099 }; 01100 01101 template <> 01102 struct iPow_SIMD<float, float> 01103 { 01104 int operator() ( const float * src, float * dst, int len, int power) 01105 { 01106 int i = 0; 01107 v_float32x4 v_1 = v_setall_f32(1.f); 01108 01109 for ( ; i <= len - 8; i += 8) 01110 { 01111 v_float32x4 v_a1 = v_1, v_a2 = v_1; 01112 v_float32x4 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 4); 01113 int p = std::abs(power); 01114 if( power < 0 ) 01115 { 01116 v_b1 = v_1 / v_b1; 01117 v_b2 = v_1 / v_b2; 01118 } 01119 01120 while( p > 1 ) 01121 { 01122 if (p & 1) 01123 { 01124 v_a1 *= v_b1; 01125 v_a2 *= v_b2; 01126 } 01127 v_b1 *= v_b1; 01128 v_b2 *= v_b2; 01129 p >>= 1; 01130 } 01131 01132 v_a1 *= v_b1; 01133 v_a2 *= v_b2; 01134 01135 v_store(dst + i, v_a1); 01136 v_store(dst + i + 4, v_a2); 01137 } 01138 01139 return i; 01140 } 01141 }; 01142 01143 #if CV_SIMD128_64F 01144 template <> 01145 struct iPow_SIMD<double, double> 01146 { 01147 int operator() ( const double * src, double * dst, int len, int power) 01148 { 01149 int i = 0; 01150 v_float64x2 v_1 = v_setall_f64(1.); 01151 01152 for ( ; i <= len - 4; i += 4) 01153 { 01154 v_float64x2 v_a1 = v_1, v_a2 = v_1; 01155 v_float64x2 v_b1 = v_load(src + i), v_b2 = v_load(src + i + 2); 01156 int p = std::abs(power); 01157 if( power < 0 ) 01158 { 01159 v_b1 = v_1 / v_b1; 01160 v_b2 = v_1 / v_b2; 01161 } 01162 01163 while( p > 1 ) 01164 { 01165 if (p & 1) 01166 { 01167 v_a1 *= v_b1; 01168 v_a2 *= v_b2; 01169 } 01170 v_b1 *= v_b1; 01171 v_b2 *= v_b2; 01172 p >>= 1; 01173 } 01174 01175 v_a1 *= v_b1; 01176 v_a2 *= v_b2; 01177 01178 v_store(dst + i, v_a1); 01179 v_store(dst + i + 2, v_a2); 01180 } 01181 01182 return i; 01183 } 01184 }; 01185 #endif 01186 01187 #endif 01188 01189 template<typename T, typename WT> 01190 static void 01191 iPow_i( const T* src, T* dst, int len, int power ) 01192 { 01193 if( power < 0 ) 01194 { 01195 T tab[5] = 01196 { 01197 saturate_cast<T>(power == -1 ? -1 : 0), saturate_cast<T>((power & 1) ? -1 : 1), 01198 std::numeric_limits<T>::max(), 1, saturate_cast<T>(power == -1 ? 1 : 0) 01199 }; 01200 for( int i = 0; i < len; i++ ) 01201 { 01202 T val = src[i]; 01203 dst[i] = cv_abs(val) <= 2 ? tab[val + 2] : (T)0; 01204 } 01205 } 01206 else 01207 { 01208 iPow_SIMD<T, WT> vop; 01209 int i = vop(src, dst, len, power); 01210 01211 for( ; i < len; i++ ) 01212 { 01213 WT a = 1, b = src[i]; 01214 int p = power; 01215 while( p > 1 ) 01216 { 01217 if( p & 1 ) 01218 a *= b; 01219 b *= b; 01220 p >>= 1; 01221 } 01222 01223 a *= b; 01224 dst[i] = saturate_cast<T>(a); 01225 } 01226 } 01227 } 01228 01229 template<typename T> 01230 static void 01231 iPow_f( const T* src, T* dst, int len, int power0 ) 01232 { 01233 iPow_SIMD<T, T> vop; 01234 int i = vop(src, dst, len, power0); 01235 int power = std::abs(power0); 01236 01237 for( ; i < len; i++ ) 01238 { 01239 T a = 1, b = src[i]; 01240 int p = power; 01241 if( power0 < 0 ) 01242 b = 1/b; 01243 01244 while( p > 1 ) 01245 { 01246 if( p & 1 ) 01247 a *= b; 01248 b *= b; 01249 p >>= 1; 01250 } 01251 01252 a *= b; 01253 dst[i] = a; 01254 } 01255 } 01256 01257 static void iPow8u(const uchar* src, uchar* dst, int len, int power) 01258 { 01259 iPow_i<uchar, unsigned>(src, dst, len, power); 01260 } 01261 01262 static void iPow8s(const schar* src, schar* dst, int len, int power) 01263 { 01264 iPow_i<schar, int>(src, dst, len, power); 01265 } 01266 01267 static void iPow16u(const ushort* src, ushort* dst, int len, int power) 01268 { 01269 iPow_i<ushort, unsigned>(src, dst, len, power); 01270 } 01271 01272 static void iPow16s(const short* src, short* dst, int len, int power) 01273 { 01274 iPow_i<short, int>(src, dst, len, power); 01275 } 01276 01277 static void iPow32s(const int* src, int* dst, int len, int power) 01278 { 01279 iPow_i<int, int>(src, dst, len, power); 01280 } 01281 01282 static void iPow32f(const float* src, float* dst, int len, int power) 01283 { 01284 iPow_f<float>(src, dst, len, power); 01285 } 01286 01287 static void iPow64f(const double* src, double* dst, int len, int power) 01288 { 01289 iPow_f<double>(src, dst, len, power); 01290 } 01291 01292 01293 typedef void (*IPowFunc)( const uchar* src, uchar* dst, int len, int power ); 01294 01295 static IPowFunc ipowTab[] = 01296 { 01297 (IPowFunc)iPow8u, (IPowFunc)iPow8s, (IPowFunc)iPow16u, (IPowFunc)iPow16s, 01298 (IPowFunc)iPow32s, (IPowFunc)iPow32f, (IPowFunc)iPow64f, 0 01299 }; 01300 01301 #ifdef HAVE_OPENCL 01302 01303 static bool ocl_pow(InputArray _src, double power, OutputArray _dst, 01304 bool is_ipower, int ipower) 01305 { 01306 const ocl::Device & d = ocl::Device::getDefault(); 01307 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), 01308 rowsPerWI = d.isIntel() ? 4 : 1; 01309 bool doubleSupport = d.doubleFPConfig() > 0; 01310 01311 _dst.createSameSize(_src, type); 01312 if (is_ipower) 01313 { 01314 if (ipower == 0) 01315 { 01316 _dst.setTo(Scalar::all(1)); 01317 return true; 01318 } 01319 if (ipower == 1) 01320 { 01321 _src.copyTo(_dst); 01322 return true; 01323 } 01324 if( ipower < 0 ) 01325 { 01326 if( depth == CV_32F || depth == CV_64F ) 01327 is_ipower = false; 01328 else 01329 return false; 01330 } 01331 } 01332 01333 if (depth == CV_64F && !doubleSupport) 01334 return false; 01335 01336 bool issqrt = std::abs(power - 0.5) < DBL_EPSILON; 01337 const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; 01338 01339 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, 01340 format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s", 01341 ocl::typeToStr(depth), depth, rowsPerWI, op, 01342 doubleSupport ? " -D DOUBLE_SUPPORT" : "")); 01343 if (k.empty()) 01344 return false; 01345 01346 UMat src = _src.getUMat(); 01347 _dst.create(src.size(), type); 01348 UMat dst = _dst.getUMat(); 01349 01350 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 01351 dstarg = ocl::KernelArg::WriteOnly(dst, cn); 01352 01353 if (issqrt) 01354 k.args(srcarg, dstarg); 01355 else if (is_ipower) 01356 k.args(srcarg, dstarg, ipower); 01357 else 01358 { 01359 if (depth == CV_32F) 01360 k.args(srcarg, dstarg, (float)power); 01361 else 01362 k.args(srcarg, dstarg, power); 01363 } 01364 01365 size_t globalsize[2] = { (size_t)dst.cols * cn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI }; 01366 return k.run(2, globalsize, NULL, false); 01367 } 01368 01369 #endif 01370 01371 static void InvSqrt_32f(const float* src, float* dst, int n) { hal::invSqrt32f(src, dst, n); } 01372 static void InvSqrt_64f(const double* src, double* dst, int n) { hal::invSqrt64f(src, dst, n); } 01373 static void Sqrt_32f(const float* src, float* dst, int n) { hal::sqrt32f(src, dst, n); } 01374 static void Sqrt_64f(const double* src, double* dst, int n) { hal::sqrt64f(src, dst, n); } 01375 01376 void pow( InputArray _src, double power, OutputArray _dst ) 01377 { 01378 int type = _src.type(), depth = CV_MAT_DEPTH(type), 01379 cn = CV_MAT_CN(type), ipower = cvRound(power); 01380 bool is_ipower = fabs(ipower - power) < DBL_EPSILON; 01381 #ifdef HAVE_OPENCL 01382 bool useOpenCL = _dst.isUMat() && _src.dims() <= 2; 01383 #endif 01384 01385 if( is_ipower 01386 #ifdef HAVE_OPENCL 01387 && !(useOpenCL && ocl::Device::getDefault().isIntel() && depth != CV_64F) 01388 #endif 01389 ) 01390 { 01391 switch( ipower ) 01392 { 01393 case 0: 01394 _dst.createSameSize(_src, type); 01395 _dst.setTo(Scalar::all(1)); 01396 return; 01397 case 1: 01398 _src.copyTo(_dst); 01399 return; 01400 case 2: 01401 multiply(_src, _src, _dst); 01402 return; 01403 } 01404 } 01405 else 01406 CV_Assert( depth == CV_32F || depth == CV_64F ); 01407 01408 #ifdef HAVE_OPENCL 01409 CV_OCL_RUN(useOpenCL, ocl_pow(_src, power, _dst, is_ipower, ipower)) 01410 #endif 01411 01412 Mat src = _src.getMat(); 01413 _dst.create( src.dims, src.size, type ); 01414 Mat dst = _dst.getMat(); 01415 01416 const Mat* arrays[] = {&src, &dst, 0}; 01417 uchar* ptrs[2]; 01418 NAryMatIterator it(arrays, ptrs); 01419 int len = (int)(it.size*cn); 01420 01421 if( is_ipower ) 01422 { 01423 IPowFunc func = ipowTab[depth]; 01424 CV_Assert( func != 0 ); 01425 01426 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01427 func( ptrs[0], ptrs[1], len, ipower ); 01428 } 01429 else if( fabs(fabs(power) - 0.5) < DBL_EPSILON ) 01430 { 01431 MathFunc func = power < 0 ? 01432 (depth == CV_32F ? (MathFunc)InvSqrt_32f : (MathFunc)InvSqrt_64f) : 01433 (depth == CV_32F ? (MathFunc)Sqrt_32f : (MathFunc)Sqrt_64f); 01434 01435 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01436 func( ptrs[0], ptrs[1], len ); 01437 } 01438 else 01439 { 01440 int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn); 01441 size_t esz1 = src.elemSize1(); 01442 AutoBuffer<uchar> buf; 01443 Cv32suf inf32, nan32; 01444 Cv64suf inf64, nan64; 01445 float* fbuf = 0; 01446 double* dbuf = 0; 01447 inf32.i = 0x7f800000; 01448 nan32.i = 0x7fffffff; 01449 inf64.i = CV_BIG_INT(0x7FF0000000000000); 01450 nan64.i = CV_BIG_INT(0x7FFFFFFFFFFFFFFF); 01451 01452 if( src.ptr() == dst.ptr() ) 01453 { 01454 buf.allocate(blockSize*esz1); 01455 fbuf = (float*)(uchar*)buf; 01456 dbuf = (double*)(uchar*)buf; 01457 } 01458 01459 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01460 { 01461 for( j = 0; j < len; j += blockSize ) 01462 { 01463 int bsz = std::min(len - j, blockSize); 01464 01465 #if defined(HAVE_IPP) 01466 CV_IPP_CHECK() 01467 { 01468 IppStatus status = depth == CV_32F ? 01469 ippsPowx_32f_A21((const float*)ptrs[0], (float)power, (float*)ptrs[1], bsz) : 01470 ippsPowx_64f_A50((const double*)ptrs[0], (double)power, (double*)ptrs[1], bsz); 01471 01472 if (status >= 0) 01473 { 01474 CV_IMPL_ADD(CV_IMPL_IPP); 01475 ptrs[0] += bsz*esz1; 01476 ptrs[1] += bsz*esz1; 01477 continue; 01478 } 01479 setIppErrorStatus(); 01480 } 01481 #endif 01482 01483 if( depth == CV_32F ) 01484 { 01485 float* x0 = (float*)ptrs[0]; 01486 float* x = fbuf ? fbuf : x0; 01487 float* y = (float*)ptrs[1]; 01488 01489 if( x != x0 ) 01490 memcpy(x, x0, bsz*esz1); 01491 01492 Log_32f(x, y, bsz); 01493 for( k = 0; k < bsz; k++ ) 01494 y[k] = (float)(y[k]*power); 01495 Exp_32f(y, y, bsz); 01496 for( k = 0; k < bsz; k++ ) 01497 { 01498 if( x0[k] <= 0 ) 01499 { 01500 if( x0[k] == 0.f ) 01501 { 01502 if( power < 0 ) 01503 y[k] = inf32.f; 01504 } 01505 else 01506 y[k] = nan32.f; 01507 } 01508 } 01509 } 01510 else 01511 { 01512 double* x0 = (double*)ptrs[0]; 01513 double* x = dbuf ? dbuf : x0; 01514 double* y = (double*)ptrs[1]; 01515 01516 if( x != x0 ) 01517 memcpy(x, x0, bsz*esz1); 01518 01519 Log_64f(x, y, bsz); 01520 for( k = 0; k < bsz; k++ ) 01521 y[k] *= power; 01522 Exp_64f(y, y, bsz); 01523 01524 for( k = 0; k < bsz; k++ ) 01525 { 01526 if( x0[k] <= 0 ) 01527 { 01528 if( x0[k] == 0. ) 01529 { 01530 if( power < 0 ) 01531 y[k] = inf64.f; 01532 } 01533 else 01534 y[k] = nan64.f; 01535 } 01536 } 01537 } 01538 ptrs[0] += bsz*esz1; 01539 ptrs[1] += bsz*esz1; 01540 } 01541 } 01542 } 01543 } 01544 01545 void sqrt(InputArray a, OutputArray b) 01546 { 01547 cv::pow(a, 0.5, b); 01548 } 01549 01550 /************************** CheckArray for NaN's, Inf's *********************************/ 01551 01552 template<int cv_mat_type> struct mat_type_assotiations{}; 01553 01554 template<> struct mat_type_assotiations<CV_8U> 01555 { 01556 typedef unsigned char type; 01557 static const type min_allowable = 0x0; 01558 static const type max_allowable = 0xFF; 01559 }; 01560 01561 template<> struct mat_type_assotiations<CV_8S> 01562 { 01563 typedef signed char type; 01564 static const type min_allowable = SCHAR_MIN; 01565 static const type max_allowable = SCHAR_MAX; 01566 }; 01567 01568 template<> struct mat_type_assotiations<CV_16U> 01569 { 01570 typedef unsigned short type; 01571 static const type min_allowable = 0x0; 01572 static const type max_allowable = USHRT_MAX; 01573 }; 01574 template<> struct mat_type_assotiations<CV_16S> 01575 { 01576 typedef signed short type; 01577 static const type min_allowable = SHRT_MIN; 01578 static const type max_allowable = SHRT_MAX; 01579 }; 01580 01581 template<> struct mat_type_assotiations<CV_32S> 01582 { 01583 typedef int type; 01584 static const type min_allowable = (-INT_MAX - 1); 01585 static const type max_allowable = INT_MAX; 01586 }; 01587 01588 template<int depth> 01589 static bool checkIntegerRange(cv::Mat src, Point& bad_pt, int minVal, int maxVal) 01590 { 01591 typedef mat_type_assotiations<depth> type_ass; 01592 01593 if (minVal < type_ass::min_allowable && maxVal > type_ass::max_allowable) 01594 { 01595 return true; 01596 } 01597 else if (minVal > type_ass::max_allowable || maxVal < type_ass::min_allowable || maxVal < minVal) 01598 { 01599 bad_pt = cv::Point (0,0); 01600 return false; 01601 } 01602 cv::Mat as_one_channel = src.reshape(1,0); 01603 01604 for (int j = 0; j < as_one_channel.rows; ++j) 01605 for (int i = 0; i < as_one_channel.cols; ++i) 01606 { 01607 typename type_ass::type v = as_one_channel.at<typename type_ass::type>(j ,i); 01608 if (v < minVal || v > maxVal) 01609 { 01610 bad_pt.y = j; 01611 bad_pt.x = i / src.channels(); 01612 return false; 01613 } 01614 } 01615 01616 return true; 01617 } 01618 01619 typedef bool (*check_range_function)(cv::Mat src, Point& bad_pt, int minVal, int maxVal); 01620 01621 check_range_function check_range_functions[] = 01622 { 01623 &checkIntegerRange<CV_8U>, 01624 &checkIntegerRange<CV_8S>, 01625 &checkIntegerRange<CV_16U>, 01626 &checkIntegerRange<CV_16S>, 01627 &checkIntegerRange<CV_32S> 01628 }; 01629 01630 bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double maxVal) 01631 { 01632 Mat src = _src.getMat(); 01633 01634 if ( src.dims > 2 ) 01635 { 01636 CV_Assert(pt == NULL); // no way to provide location info 01637 01638 const Mat* arrays[] = {&src, 0}; 01639 Mat planes[1]; 01640 NAryMatIterator it(arrays, planes); 01641 01642 for ( size_t i = 0; i < it.nplanes; i++, ++it ) 01643 { 01644 if (!checkRange( it.planes[0], quiet, NULL, minVal, maxVal )) 01645 { 01646 return false; 01647 } 01648 } 01649 return true; 01650 } 01651 01652 int depth = src.depth(); 01653 Point badPt(-1, -1); 01654 01655 if (depth < CV_32F) 01656 { 01657 int minVali = minVal <= INT_MIN ? INT_MIN : cvFloor(minVal); 01658 int maxVali = maxVal > INT_MAX ? INT_MAX : cvCeil(maxVal) - 1; 01659 01660 (check_range_functions[depth])(src, badPt, minVali, maxVali); 01661 } 01662 else 01663 { 01664 int i, loc = 0; 01665 int cn = src.channels(); 01666 Size size = getContinuousSize( src, cn ); 01667 01668 if( depth == CV_32F ) 01669 { 01670 Cv32suf a, b; 01671 int ia, ib; 01672 const int* isrc = src.ptr<int>(); 01673 size_t step = src.step/sizeof(isrc[0]); 01674 01675 a.f = (float)std::max(minVal, (double)-FLT_MAX); 01676 b.f = (float)std::min(maxVal, (double)FLT_MAX); 01677 01678 ia = CV_TOGGLE_FLT(a.i); 01679 ib = CV_TOGGLE_FLT(b.i); 01680 01681 for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step ) 01682 { 01683 for( i = 0; i < size.width; i++ ) 01684 { 01685 int val = isrc[i]; 01686 val = CV_TOGGLE_FLT(val); 01687 01688 if( val < ia || val >= ib ) 01689 { 01690 int pixelId = (loc + i) / cn; 01691 badPt = Point(pixelId % src.cols, pixelId / src.cols); 01692 break; 01693 } 01694 } 01695 } 01696 } 01697 else 01698 { 01699 Cv64suf a, b; 01700 int64 ia, ib; 01701 const int64* isrc = src.ptr<int64>(); 01702 size_t step = src.step/sizeof(isrc[0]); 01703 01704 a.f = minVal; 01705 b.f = maxVal; 01706 01707 ia = CV_TOGGLE_DBL(a.i); 01708 ib = CV_TOGGLE_DBL(b.i); 01709 01710 for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step ) 01711 { 01712 for( i = 0; i < size.width; i++ ) 01713 { 01714 int64 val = isrc[i]; 01715 val = CV_TOGGLE_DBL(val); 01716 01717 if( val < ia || val >= ib ) 01718 { 01719 int pixelId = (loc + i) / cn; 01720 badPt = Point(pixelId % src.cols, pixelId / src.cols); 01721 break; 01722 } 01723 } 01724 } 01725 } 01726 } 01727 01728 if( badPt.x >= 0 ) 01729 { 01730 if( pt ) 01731 *pt = badPt; 01732 if( !quiet ) 01733 { 01734 cv::String value_str; 01735 value_str << src(cv::Range(badPt.y, badPt.y + 1), cv::Range(badPt.x, badPt.x + 1)); 01736 CV_Error_( CV_StsOutOfRange, 01737 ("the value at (%d, %d)=%s is out of range [%f, %f)", badPt.x, badPt.y, value_str.c_str(), minVal, maxVal)); 01738 } 01739 return false; 01740 } 01741 return true; 01742 } 01743 01744 #ifdef HAVE_OPENCL 01745 01746 static bool ocl_patchNaNs( InputOutputArray _a, float value ) 01747 { 01748 int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; 01749 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, 01750 format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d", 01751 rowsPerWI)); 01752 if (k.empty()) 01753 return false; 01754 01755 UMat a = _a.getUMat(); 01756 int cn = a.channels(); 01757 01758 k.args(ocl::KernelArg::ReadOnlyNoSize(a), 01759 ocl::KernelArg::WriteOnly(a, cn), (float)value); 01760 01761 size_t globalsize[2] = { (size_t)a.cols * cn, ((size_t)a.rows + rowsPerWI - 1) / rowsPerWI }; 01762 return k.run(2, globalsize, NULL, false); 01763 } 01764 01765 #endif 01766 01767 void patchNaNs( InputOutputArray _a, double _val ) 01768 { 01769 CV_Assert( _a.depth() == CV_32F ); 01770 01771 #ifdef HAVE_OPENCL 01772 CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2, 01773 ocl_patchNaNs(_a, (float)_val)) 01774 #endif 01775 01776 Mat a = _a.getMat(); 01777 const Mat* arrays[] = {&a, 0}; 01778 int* ptrs[1]; 01779 NAryMatIterator it(arrays, (uchar**)ptrs); 01780 size_t len = it.size*a.channels(); 01781 Cv32suf val; 01782 val.f = (float)_val; 01783 01784 #if CV_SSE2 01785 __m128i v_mask1 = _mm_set1_epi32(0x7fffffff), v_mask2 = _mm_set1_epi32(0x7f800000); 01786 __m128i v_val = _mm_set1_epi32(val.i); 01787 #elif CV_NEON 01788 int32x4_t v_mask1 = vdupq_n_s32(0x7fffffff), v_mask2 = vdupq_n_s32(0x7f800000), 01789 v_val = vdupq_n_s32(val.i); 01790 #endif 01791 01792 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01793 { 01794 int* tptr = ptrs[0]; 01795 size_t j = 0; 01796 01797 #if CV_SSE2 01798 if (USE_SSE2) 01799 { 01800 for ( ; j + 4 <= len; j += 4) 01801 { 01802 __m128i v_src = _mm_loadu_si128((__m128i const *)(tptr + j)); 01803 __m128i v_cmp_mask = _mm_cmplt_epi32(v_mask2, _mm_and_si128(v_src, v_mask1)); 01804 __m128i v_res = _mm_or_si128(_mm_andnot_si128(v_cmp_mask, v_src), _mm_and_si128(v_cmp_mask, v_val)); 01805 _mm_storeu_si128((__m128i *)(tptr + j), v_res); 01806 } 01807 } 01808 #elif CV_NEON 01809 for ( ; j + 4 <= len; j += 4) 01810 { 01811 int32x4_t v_src = vld1q_s32(tptr + j); 01812 uint32x4_t v_cmp_mask = vcltq_s32(v_mask2, vandq_s32(v_src, v_mask1)); 01813 int32x4_t v_dst = vbslq_s32(v_cmp_mask, v_val, v_src); 01814 vst1q_s32(tptr + j, v_dst); 01815 } 01816 #endif 01817 01818 for( ; j < len; j++ ) 01819 if( (tptr[j] & 0x7fffffff) > 0x7f800000 ) 01820 tptr[j] = val.i; 01821 } 01822 } 01823 01824 } 01825 01826 CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); } 01827 CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); } 01828 01829 CV_IMPL void 01830 cvCartToPolar( const CvArr* xarr, const CvArr* yarr, 01831 CvArr* magarr, CvArr* anglearr, 01832 int angle_in_degrees ) 01833 { 01834 cv::Mat X = cv::cvarrToMat(xarr), Y = cv::cvarrToMat(yarr), Mag, Angle; 01835 if( magarr ) 01836 { 01837 Mag = cv::cvarrToMat(magarr); 01838 CV_Assert( Mag.size() == X.size() && Mag.type() == X.type() ); 01839 } 01840 if( anglearr ) 01841 { 01842 Angle = cv::cvarrToMat(anglearr); 01843 CV_Assert( Angle.size() == X.size() && Angle.type() == X.type() ); 01844 } 01845 if( magarr ) 01846 { 01847 if( anglearr ) 01848 cv::cartToPolar( X, Y, Mag, Angle, angle_in_degrees != 0 ); 01849 else 01850 cv::magnitude( X, Y, Mag ); 01851 } 01852 else 01853 cv::phase( X, Y, Angle, angle_in_degrees != 0 ); 01854 } 01855 01856 CV_IMPL void 01857 cvPolarToCart( const CvArr* magarr, const CvArr* anglearr, 01858 CvArr* xarr, CvArr* yarr, int angle_in_degrees ) 01859 { 01860 cv::Mat X, Y, Angle = cv::cvarrToMat(anglearr), Mag; 01861 if( magarr ) 01862 { 01863 Mag = cv::cvarrToMat(magarr); 01864 CV_Assert( Mag.size() == Angle.size() && Mag.type() == Angle.type() ); 01865 } 01866 if( xarr ) 01867 { 01868 X = cv::cvarrToMat(xarr); 01869 CV_Assert( X.size() == Angle.size() && X.type() == Angle.type() ); 01870 } 01871 if( yarr ) 01872 { 01873 Y = cv::cvarrToMat(yarr); 01874 CV_Assert( Y.size() == Angle.size() && Y.type() == Angle.type() ); 01875 } 01876 01877 cv::polarToCart( Mag, Angle, X, Y, angle_in_degrees != 0 ); 01878 } 01879 01880 CV_IMPL void cvExp( const CvArr* srcarr, CvArr* dstarr ) 01881 { 01882 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 01883 CV_Assert( src.type() == dst.type() && src.size == dst.size ); 01884 cv::exp( src, dst ); 01885 } 01886 01887 CV_IMPL void cvLog( const CvArr* srcarr, CvArr* dstarr ) 01888 { 01889 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 01890 CV_Assert( src.type() == dst.type() && src.size == dst.size ); 01891 cv::log( src, dst ); 01892 } 01893 01894 CV_IMPL void cvPow( const CvArr* srcarr, CvArr* dstarr, double power ) 01895 { 01896 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 01897 CV_Assert( src.type() == dst.type() && src.size == dst.size ); 01898 cv::pow( src, power, dst ); 01899 } 01900 01901 CV_IMPL int cvCheckArr( const CvArr* arr, int flags, 01902 double minVal, double maxVal ) 01903 { 01904 if( (flags & CV_CHECK_RANGE) == 0 ) 01905 minVal = -DBL_MAX, maxVal = DBL_MAX; 01906 return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal ); 01907 } 01908 01909 01910 /* 01911 Finds real roots of cubic, quadratic or linear equation. 01912 The original code has been taken from Ken Turkowski web page 01913 (http://www.worldserver.com/turk/opensource/) and adopted for OpenCV. 01914 Here is the copyright notice. 01915 01916 ----------------------------------------------------------------------- 01917 Copyright (C) 1978-1999 Ken Turkowski. <turk@computer.org> 01918 01919 All rights reserved. 01920 01921 Warranty Information 01922 Even though I have reviewed this software, I make no warranty 01923 or representation, either express or implied, with respect to this 01924 software, its quality, accuracy, merchantability, or fitness for a 01925 particular purpose. As a result, this software is provided "as is," 01926 and you, its user, are assuming the entire risk as to its quality 01927 and accuracy. 01928 01929 This code may be used and freely distributed as long as it includes 01930 this copyright notice and the above warranty information. 01931 ----------------------------------------------------------------------- 01932 */ 01933 01934 int cv::solveCubic( InputArray _coeffs, OutputArray _roots ) 01935 { 01936 const int n0 = 3; 01937 Mat coeffs = _coeffs.getMat(); 01938 int ctype = coeffs.type(); 01939 01940 CV_Assert( ctype == CV_32F || ctype == CV_64F ); 01941 CV_Assert( (coeffs.size() == Size(n0, 1) || 01942 coeffs.size() == Size(n0+1, 1) || 01943 coeffs.size() == Size(1, n0) || 01944 coeffs.size() == Size(1, n0+1)) ); 01945 01946 _roots.create(n0, 1, ctype, -1, true, _OutputArray::DEPTH_MASK_FLT); 01947 Mat roots = _roots.getMat(); 01948 01949 int i = -1, n = 0; 01950 double a0 = 1., a1, a2, a3; 01951 double x0 = 0., x1 = 0., x2 = 0.; 01952 int ncoeffs = coeffs.rows + coeffs.cols - 1; 01953 01954 if( ctype == CV_32FC1 ) 01955 { 01956 if( ncoeffs == 4 ) 01957 a0 = coeffs.at<float>(++i); 01958 01959 a1 = coeffs.at<float>(i+1); 01960 a2 = coeffs.at<float>(i+2); 01961 a3 = coeffs.at<float>(i+3); 01962 } 01963 else 01964 { 01965 if( ncoeffs == 4 ) 01966 a0 = coeffs.at<double>(++i); 01967 01968 a1 = coeffs.at<double>(i+1); 01969 a2 = coeffs.at<double>(i+2); 01970 a3 = coeffs.at<double>(i+3); 01971 } 01972 01973 if( a0 == 0 ) 01974 { 01975 if( a1 == 0 ) 01976 { 01977 if( a2 == 0 ) 01978 n = a3 == 0 ? -1 : 0; 01979 else 01980 { 01981 // linear equation 01982 x0 = -a3/a2; 01983 n = 1; 01984 } 01985 } 01986 else 01987 { 01988 // quadratic equation 01989 double d = a2*a2 - 4*a1*a3; 01990 if( d >= 0 ) 01991 { 01992 d = std::sqrt(d); 01993 double q1 = (-a2 + d) * 0.5; 01994 double q2 = (a2 + d) * -0.5; 01995 if( fabs(q1) > fabs(q2) ) 01996 { 01997 x0 = q1 / a1; 01998 x1 = a3 / q1; 01999 } 02000 else 02001 { 02002 x0 = q2 / a1; 02003 x1 = a3 / q2; 02004 } 02005 n = d > 0 ? 2 : 1; 02006 } 02007 } 02008 } 02009 else 02010 { 02011 a0 = 1./a0; 02012 a1 *= a0; 02013 a2 *= a0; 02014 a3 *= a0; 02015 02016 double Q = (a1 * a1 - 3 * a2) * (1./9); 02017 double R = (2 * a1 * a1 * a1 - 9 * a1 * a2 + 27 * a3) * (1./54); 02018 double Qcubed = Q * Q * Q; 02019 double d = Qcubed - R * R; 02020 02021 if( d > 0 ) 02022 { 02023 double theta = acos(R / sqrt(Qcubed)); 02024 double sqrtQ = sqrt(Q); 02025 double t0 = -2 * sqrtQ; 02026 double t1 = theta * (1./3); 02027 double t2 = a1 * (1./3); 02028 x0 = t0 * cos(t1) - t2; 02029 x1 = t0 * cos(t1 + (2.*CV_PI/3)) - t2; 02030 x2 = t0 * cos(t1 + (4.*CV_PI/3)) - t2; 02031 n = 3; 02032 } 02033 else if( d == 0 ) 02034 { 02035 if(R >= 0) 02036 { 02037 x0 = -2*pow(R, 1./3) - a1/3; 02038 x1 = pow(R, 1./3) - a1/3; 02039 } 02040 else 02041 { 02042 x0 = 2*pow(-R, 1./3) - a1/3; 02043 x1 = -pow(-R, 1./3) - a1/3; 02044 } 02045 x2 = 0; 02046 n = x0 == x1 ? 1 : 2; 02047 x1 = x0 == x1 ? 0 : x1; 02048 } 02049 else 02050 { 02051 double e; 02052 d = sqrt(-d); 02053 e = pow(d + fabs(R), 1./3); 02054 if( R > 0 ) 02055 e = -e; 02056 x0 = (e + Q / e) - a1 * (1./3); 02057 n = 1; 02058 } 02059 } 02060 02061 if( roots.type() == CV_32FC1 ) 02062 { 02063 roots.at<float>(0) = (float)x0; 02064 roots.at<float>(1) = (float)x1; 02065 roots.at<float>(2) = (float)x2; 02066 } 02067 else 02068 { 02069 roots.at<double>(0) = x0; 02070 roots.at<double>(1) = x1; 02071 roots.at<double>(2) = x2; 02072 } 02073 02074 return n; 02075 } 02076 02077 /* finds complex roots of a polynomial using Durand-Kerner method: 02078 http://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method */ 02079 double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters ) 02080 { 02081 typedef Complex<double> C; 02082 02083 double maxDiff = 0; 02084 int iter, i, j; 02085 Mat coeffs0 = _coeffs0.getMat(); 02086 int ctype = _coeffs0.type(); 02087 int cdepth = CV_MAT_DEPTH(ctype); 02088 02089 CV_Assert( CV_MAT_DEPTH(ctype) >= CV_32F && CV_MAT_CN(ctype) <= 2 ); 02090 CV_Assert( coeffs0.rows == 1 || coeffs0.cols == 1 ); 02091 02092 int n0 = coeffs0.cols + coeffs0.rows - 2, n = n0; 02093 02094 _roots0.create(n, 1, CV_MAKETYPE(cdepth, 2), -1, true, _OutputArray::DEPTH_MASK_FLT); 02095 Mat roots0 = _roots0.getMat(); 02096 02097 AutoBuffer<C> buf(n*2+2); 02098 C *coeffs = buf, *roots = coeffs + n + 1; 02099 Mat coeffs1(coeffs0.size(), CV_MAKETYPE(CV_64F, coeffs0.channels()), coeffs0.channels() == 2 ? coeffs : roots); 02100 coeffs0.convertTo(coeffs1, coeffs1.type()); 02101 if( coeffs0.channels() == 1 ) 02102 { 02103 const double* rcoeffs = (const double*)roots; 02104 for( i = 0; i <= n; i++ ) 02105 coeffs[i] = C(rcoeffs[i], 0); 02106 } 02107 02108 for( ; n > 1; n-- ) 02109 { 02110 if( std::abs(coeffs[n].re) + std::abs(coeffs[n].im) > DBL_EPSILON ) 02111 break; 02112 } 02113 02114 C p(1, 0), r(1, 1); 02115 02116 for( i = 0; i < n; i++ ) 02117 { 02118 roots[i] = p; 02119 p = p * r; 02120 } 02121 02122 maxIters = maxIters <= 0 ? 1000 : maxIters; 02123 for( iter = 0; iter < maxIters; iter++ ) 02124 { 02125 maxDiff = 0; 02126 for( i = 0; i < n; i++ ) 02127 { 02128 p = roots[i]; 02129 C num = coeffs[n], denom = coeffs[n]; 02130 int num_same_root = 1; 02131 for( j = 0; j < n; j++ ) 02132 { 02133 num = num*p + coeffs[n-j-1]; 02134 if( j != i ) 02135 { 02136 if ( (p - roots[j]).re != 0 || (p - roots[j]).im != 0 ) 02137 denom = denom * (p - roots[j]); 02138 else 02139 num_same_root++; 02140 } 02141 } 02142 num /= denom; 02143 if( num_same_root > 1) 02144 { 02145 double old_num_re = num.re; 02146 double old_num_im = num.im; 02147 int square_root_times = num_same_root % 2 == 0 ? num_same_root / 2 : num_same_root / 2 - 1; 02148 02149 for( j = 0; j < square_root_times; j++) 02150 { 02151 num.re = old_num_re*old_num_re + old_num_im*old_num_im; 02152 num.re = sqrt(num.re); 02153 num.re += old_num_re; 02154 num.im = num.re - old_num_re; 02155 num.re /= 2; 02156 num.re = sqrt(num.re); 02157 02158 num.im /= 2; 02159 num.im = sqrt(num.im); 02160 if( old_num_re < 0 ) num.im = -num.im; 02161 } 02162 02163 if( num_same_root % 2 != 0){ 02164 Mat cube_coefs(4, 1, CV_64FC1); 02165 Mat cube_roots(3, 1, CV_64FC2); 02166 cube_coefs.at<double>(3) = -(pow(old_num_re, 3)); 02167 cube_coefs.at<double>(2) = -(15*pow(old_num_re, 2) + 27*pow(old_num_im, 2)); 02168 cube_coefs.at<double>(1) = -48*old_num_re; 02169 cube_coefs.at<double>(0) = 64; 02170 solveCubic(cube_coefs, cube_roots); 02171 02172 if(cube_roots.at<double>(0) >= 0) num.re = pow(cube_roots.at<double>(0), 1./3); 02173 else num.re = -pow(-cube_roots.at<double>(0), 1./3); 02174 num.im = sqrt(pow(num.re, 2) / 3 - old_num_re / (3*num.re)); 02175 } 02176 } 02177 roots[i] = p - num; 02178 maxDiff = std::max(maxDiff, cv::abs(num)); 02179 } 02180 if( maxDiff <= 0 ) 02181 break; 02182 } 02183 02184 if( coeffs0.channels() == 1 ) 02185 { 02186 const double verySmallEps = 1e-100; 02187 for( i = 0; i < n; i++ ) 02188 if( fabs(roots[i].im) < verySmallEps ) 02189 roots[i].im = 0; 02190 } 02191 02192 for( ; n < n0; n++ ) 02193 roots[n+1] = roots[n]; 02194 02195 Mat(roots0.size(), CV_64FC2, roots).convertTo(roots0, roots0.type()); 02196 return maxDiff; 02197 } 02198 02199 02200 CV_IMPL int 02201 cvSolveCubic( const CvMat* coeffs, CvMat* roots ) 02202 { 02203 cv::Mat _coeffs = cv::cvarrToMat(coeffs), _roots = cv::cvarrToMat(roots), _roots0 = _roots; 02204 int nroots = cv::solveCubic(_coeffs, _roots); 02205 CV_Assert( _roots.data == _roots0.data ); // check that the array of roots was not reallocated 02206 return nroots; 02207 } 02208 02209 02210 void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int) 02211 { 02212 cv::Mat _a = cv::cvarrToMat(a); 02213 cv::Mat _r = cv::cvarrToMat(r); 02214 cv::Mat _r0 = _r; 02215 cv::solvePoly(_a, _r, maxiter); 02216 CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated 02217 } 02218 02219 02220 /* End of file. */ 02221
Generated on Tue Jul 12 2022 14:47:18 by
