Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
mathfuncs_core.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. 00015 // Third party copyrights are property of their respective owners. 00016 // 00017 // Redistribution and use in source and binary forms, with or without modification, 00018 // are permitted provided that the following conditions are met: 00019 // 00020 // * Redistribution's of source code must retain the above copyright notice, 00021 // this list of conditions and the following disclaimer. 00022 // 00023 // * Redistribution's in binary form must reproduce the above copyright notice, 00024 // this list of conditions and the following disclaimer in the documentation 00025 // and/or other materials provided with the distribution. 00026 // 00027 // * The name of the copyright holders may not be used to endorse or promote products 00028 // derived from this software without specific prior written permission. 00029 // 00030 // This software is provided by the copyright holders and contributors "as is" and 00031 // any express or implied warranties, including, but not limited to, the implied 00032 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00033 // In no event shall the Intel Corporation or contributors be liable for any direct, 00034 // indirect, incidental, special, exemplary, or consequential damages 00035 // (including, but not limited to, procurement of substitute goods or services; 00036 // loss of use, data, or profits; or business interruption) however caused 00037 // and on any theory of liability, whether in contract, strict liability, 00038 // or tort (including negligence or otherwise) arising in any way out of 00039 // the use of this software, even if advised of the possibility of such damage. 00040 // 00041 //M*/ 00042 00043 #include "precomp.hpp" 00044 00045 #undef HAVE_IPP 00046 00047 namespace cv { namespace hal { 00048 00049 ///////////////////////////////////// ATAN2 //////////////////////////////////// 00050 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI); 00051 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI); 00052 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI); 00053 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI); 00054 00055 void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees ) 00056 { 00057 int i = 0; 00058 float scale = angleInDegrees ? 1 : (float)(CV_PI/180); 00059 00060 #ifdef HAVE_TEGRA_OPTIMIZATION 00061 if (tegra::useTegra() && tegra::FastAtan2_32f(Y, X, angle, len, scale)) 00062 return; 00063 #endif 00064 00065 #if CV_SSE2 00066 Cv32suf iabsmask; iabsmask.i = 0x7fffffff; 00067 __m128 eps = _mm_set1_ps((float)DBL_EPSILON), absmask = _mm_set1_ps(iabsmask.f); 00068 __m128 _90 = _mm_set1_ps(90.f), _180 = _mm_set1_ps(180.f), _360 = _mm_set1_ps(360.f); 00069 __m128 z = _mm_setzero_ps(), scale4 = _mm_set1_ps(scale); 00070 __m128 p1 = _mm_set1_ps(atan2_p1), p3 = _mm_set1_ps(atan2_p3); 00071 __m128 p5 = _mm_set1_ps(atan2_p5), p7 = _mm_set1_ps(atan2_p7); 00072 00073 for( ; i <= len - 4; i += 4 ) 00074 { 00075 __m128 x = _mm_loadu_ps(X + i), y = _mm_loadu_ps(Y + i); 00076 __m128 ax = _mm_and_ps(x, absmask), ay = _mm_and_ps(y, absmask); 00077 __m128 mask = _mm_cmplt_ps(ax, ay); 00078 __m128 tmin = _mm_min_ps(ax, ay), tmax = _mm_max_ps(ax, ay); 00079 __m128 c = _mm_div_ps(tmin, _mm_add_ps(tmax, eps)); 00080 __m128 c2 = _mm_mul_ps(c, c); 00081 __m128 a = _mm_mul_ps(c2, p7); 00082 a = _mm_mul_ps(_mm_add_ps(a, p5), c2); 00083 a = _mm_mul_ps(_mm_add_ps(a, p3), c2); 00084 a = _mm_mul_ps(_mm_add_ps(a, p1), c); 00085 00086 __m128 b = _mm_sub_ps(_90, a); 00087 a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask)); 00088 00089 b = _mm_sub_ps(_180, a); 00090 mask = _mm_cmplt_ps(x, z); 00091 a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask)); 00092 00093 b = _mm_sub_ps(_360, a); 00094 mask = _mm_cmplt_ps(y, z); 00095 a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask)); 00096 00097 a = _mm_mul_ps(a, scale4); 00098 _mm_storeu_ps(angle + i, a); 00099 } 00100 #elif CV_NEON 00101 float32x4_t eps = vdupq_n_f32((float)DBL_EPSILON); 00102 float32x4_t _90 = vdupq_n_f32(90.f), _180 = vdupq_n_f32(180.f), _360 = vdupq_n_f32(360.f); 00103 float32x4_t z = vdupq_n_f32(0.0f), scale4 = vdupq_n_f32(scale); 00104 float32x4_t p1 = vdupq_n_f32(atan2_p1), p3 = vdupq_n_f32(atan2_p3); 00105 float32x4_t p5 = vdupq_n_f32(atan2_p5), p7 = vdupq_n_f32(atan2_p7); 00106 00107 for( ; i <= len - 4; i += 4 ) 00108 { 00109 float32x4_t x = vld1q_f32(X + i), y = vld1q_f32(Y + i); 00110 float32x4_t ax = vabsq_f32(x), ay = vabsq_f32(y); 00111 float32x4_t tmin = vminq_f32(ax, ay), tmax = vmaxq_f32(ax, ay); 00112 float32x4_t c = vmulq_f32(tmin, cv_vrecpq_f32(vaddq_f32(tmax, eps))); 00113 float32x4_t c2 = vmulq_f32(c, c); 00114 float32x4_t a = vmulq_f32(c2, p7); 00115 a = vmulq_f32(vaddq_f32(a, p5), c2); 00116 a = vmulq_f32(vaddq_f32(a, p3), c2); 00117 a = vmulq_f32(vaddq_f32(a, p1), c); 00118 00119 a = vbslq_f32(vcgeq_f32(ax, ay), a, vsubq_f32(_90, a)); 00120 a = vbslq_f32(vcltq_f32(x, z), vsubq_f32(_180, a), a); 00121 a = vbslq_f32(vcltq_f32(y, z), vsubq_f32(_360, a), a); 00122 00123 vst1q_f32(angle + i, vmulq_f32(a, scale4)); 00124 } 00125 #endif 00126 00127 for( ; i < len; i++ ) 00128 { 00129 float x = X[i], y = Y[i]; 00130 float ax = std::abs(x), ay = std::abs(y); 00131 float a, c, c2; 00132 if( ax >= ay ) 00133 { 00134 c = ay/(ax + (float)DBL_EPSILON); 00135 c2 = c*c; 00136 a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; 00137 } 00138 else 00139 { 00140 c = ax/(ay + (float)DBL_EPSILON); 00141 c2 = c*c; 00142 a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c; 00143 } 00144 if( x < 0 ) 00145 a = 180.f - a; 00146 if( y < 0 ) 00147 a = 360.f - a; 00148 angle[i] = (float)(a*scale); 00149 } 00150 } 00151 00152 00153 void magnitude32f(const float* x, const float* y, float* mag, int len) 00154 { 00155 #if defined HAVE_IPP 00156 CV_IPP_CHECK() 00157 { 00158 IppStatus status = ippsMagnitude_32f(x, y, mag, len); 00159 if (status >= 0) 00160 { 00161 CV_IMPL_ADD(CV_IMPL_IPP); 00162 return; 00163 } 00164 setIppErrorStatus(); 00165 } 00166 #endif 00167 00168 int i = 0; 00169 00170 #if CV_SIMD128 00171 for( ; i <= len - 8; i += 8 ) 00172 { 00173 v_float32x4 x0 = v_load(x + i), x1 = v_load(x + i + 4); 00174 v_float32x4 y0 = v_load(y + i), y1 = v_load(y + i + 4); 00175 x0 = v_sqrt(v_muladd(x0, x0, y0*y0)); 00176 x1 = v_sqrt(v_muladd(x1, x1, y1*y1)); 00177 v_store(mag + i, x0); 00178 v_store(mag + i + 4, x1); 00179 } 00180 #endif 00181 00182 for( ; i < len; i++ ) 00183 { 00184 float x0 = x[i], y0 = y[i]; 00185 mag[i] = std::sqrt(x0*x0 + y0*y0); 00186 } 00187 } 00188 00189 void magnitude64f(const double* x, const double* y, double* mag, int len) 00190 { 00191 #if defined(HAVE_IPP) 00192 CV_IPP_CHECK() 00193 { 00194 IppStatus status = ippsMagnitude_64f(x, y, mag, len); 00195 if (status >= 0) 00196 { 00197 CV_IMPL_ADD(CV_IMPL_IPP); 00198 return; 00199 } 00200 setIppErrorStatus(); 00201 } 00202 #endif 00203 00204 int i = 0; 00205 00206 #if CV_SIMD128_64F 00207 for( ; i <= len - 4; i += 4 ) 00208 { 00209 v_float64x2 x0 = v_load(x + i), x1 = v_load(x + i + 2); 00210 v_float64x2 y0 = v_load(y + i), y1 = v_load(y + i + 2); 00211 x0 = v_sqrt(v_muladd(x0, x0, y0*y0)); 00212 x1 = v_sqrt(v_muladd(x1, x1, y1*y1)); 00213 v_store(mag + i, x0); 00214 v_store(mag + i + 2, x1); 00215 } 00216 #endif 00217 00218 for( ; i < len; i++ ) 00219 { 00220 double x0 = x[i], y0 = y[i]; 00221 mag[i] = std::sqrt(x0*x0 + y0*y0); 00222 } 00223 } 00224 00225 00226 void invSqrt32f(const float* src, float* dst, int len) 00227 { 00228 #if defined(HAVE_IPP) 00229 CV_IPP_CHECK() 00230 { 00231 if (ippsInvSqrt_32f_A21(src, dst, len) >= 0) 00232 { 00233 CV_IMPL_ADD(CV_IMPL_IPP); 00234 return; 00235 } 00236 setIppErrorStatus(); 00237 } 00238 #endif 00239 00240 int i = 0; 00241 00242 #if CV_SIMD128 00243 for( ; i <= len - 8; i += 8 ) 00244 { 00245 v_float32x4 t0 = v_load(src + i), t1 = v_load(src + i + 4); 00246 t0 = v_invsqrt(t0); 00247 t1 = v_invsqrt(t1); 00248 v_store(dst + i, t0); v_store(dst + i + 4, t1); 00249 } 00250 #endif 00251 00252 for( ; i < len; i++ ) 00253 dst[i] = 1/std::sqrt(src[i]); 00254 } 00255 00256 00257 void invSqrt64f(const double* src, double* dst, int len) 00258 { 00259 int i = 0; 00260 00261 #if CV_SSE2 00262 __m128d v_1 = _mm_set1_pd(1.0); 00263 for ( ; i <= len - 2; i += 2) 00264 _mm_storeu_pd(dst + i, _mm_div_pd(v_1, _mm_sqrt_pd(_mm_loadu_pd(src + i)))); 00265 #endif 00266 00267 for( ; i < len; i++ ) 00268 dst[i] = 1/std::sqrt(src[i]); 00269 } 00270 00271 00272 void sqrt32f(const float* src, float* dst, int len) 00273 { 00274 #if defined(HAVE_IPP) 00275 CV_IPP_CHECK() 00276 { 00277 if (ippsSqrt_32f_A21(src, dst, len) >= 0) 00278 { 00279 CV_IMPL_ADD(CV_IMPL_IPP); 00280 return; 00281 } 00282 setIppErrorStatus(); 00283 } 00284 #endif 00285 00286 int i = 0; 00287 00288 #if CV_SIMD128 00289 for( ; i <= len - 8; i += 8 ) 00290 { 00291 v_float32x4 t0 = v_load(src + i), t1 = v_load(src + i + 4); 00292 t0 = v_sqrt(t0); 00293 t1 = v_sqrt(t1); 00294 v_store(dst + i, t0); v_store(dst + i + 4, t1); 00295 } 00296 #endif 00297 00298 for( ; i < len; i++ ) 00299 dst[i] = std::sqrt(src[i]); 00300 } 00301 00302 00303 void sqrt64f(const double* src, double* dst, int len) 00304 { 00305 #if defined(HAVE_IPP) 00306 CV_IPP_CHECK() 00307 { 00308 if (ippsSqrt_64f_A50(src, dst, len) >= 0) 00309 { 00310 CV_IMPL_ADD(CV_IMPL_IPP); 00311 return; 00312 } 00313 setIppErrorStatus(); 00314 } 00315 #endif 00316 00317 int i = 0; 00318 00319 #if CV_SIMD128_64F 00320 for( ; i <= len - 4; i += 4 ) 00321 { 00322 v_float64x2 t0 = v_load(src + i), t1 = v_load(src + i + 2); 00323 t0 = v_sqrt(t0); 00324 t1 = v_sqrt(t1); 00325 v_store(dst + i, t0); v_store(dst + i + 2, t1); 00326 } 00327 #endif 00328 00329 for( ; i < len; i++ ) 00330 dst[i] = std::sqrt(src[i]); 00331 } 00332 00333 ////////////////////////////////////// EXP ///////////////////////////////////// 00334 00335 typedef union 00336 { 00337 struct { 00338 #if ( defined( WORDS_BIGENDIAN ) && !defined( OPENCV_UNIVERSAL_BUILD ) ) || defined( __BIG_ENDIAN__ ) 00339 int hi; 00340 int lo; 00341 #else 00342 int lo; 00343 int hi; 00344 #endif 00345 } i; 00346 double d; 00347 } 00348 DBLINT; 00349 00350 #define EXPTAB_SCALE 6 00351 #define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1) 00352 00353 #define EXPPOLY_32F_A0 .9670371139572337719125840413672004409288e-2 00354 00355 static const double expTab[] = { 00356 1.0 * EXPPOLY_32F_A0, 00357 1.0108892860517004600204097905619 * EXPPOLY_32F_A0, 00358 1.0218971486541166782344801347833 * EXPPOLY_32F_A0, 00359 1.0330248790212284225001082839705 * EXPPOLY_32F_A0, 00360 1.0442737824274138403219664787399 * EXPPOLY_32F_A0, 00361 1.0556451783605571588083413251529 * EXPPOLY_32F_A0, 00362 1.0671404006768236181695211209928 * EXPPOLY_32F_A0, 00363 1.0787607977571197937406800374385 * EXPPOLY_32F_A0, 00364 1.0905077326652576592070106557607 * EXPPOLY_32F_A0, 00365 1.1023825833078409435564142094256 * EXPPOLY_32F_A0, 00366 1.1143867425958925363088129569196 * EXPPOLY_32F_A0, 00367 1.126521618608241899794798643787 * EXPPOLY_32F_A0, 00368 1.1387886347566916537038302838415 * EXPPOLY_32F_A0, 00369 1.151189229952982705817759635202 * EXPPOLY_32F_A0, 00370 1.1637248587775775138135735990922 * EXPPOLY_32F_A0, 00371 1.1763969916502812762846457284838 * EXPPOLY_32F_A0, 00372 1.1892071150027210667174999705605 * EXPPOLY_32F_A0, 00373 1.2021567314527031420963969574978 * EXPPOLY_32F_A0, 00374 1.2152473599804688781165202513388 * EXPPOLY_32F_A0, 00375 1.2284805361068700056940089577928 * EXPPOLY_32F_A0, 00376 1.2418578120734840485936774687266 * EXPPOLY_32F_A0, 00377 1.2553807570246910895793906574423 * EXPPOLY_32F_A0, 00378 1.2690509571917332225544190810323 * EXPPOLY_32F_A0, 00379 1.2828700160787782807266697810215 * EXPPOLY_32F_A0, 00380 1.2968395546510096659337541177925 * EXPPOLY_32F_A0, 00381 1.3109612115247643419229917863308 * EXPPOLY_32F_A0, 00382 1.3252366431597412946295370954987 * EXPPOLY_32F_A0, 00383 1.3396675240533030053600306697244 * EXPPOLY_32F_A0, 00384 1.3542555469368927282980147401407 * EXPPOLY_32F_A0, 00385 1.3690024229745906119296011329822 * EXPPOLY_32F_A0, 00386 1.3839098819638319548726595272652 * EXPPOLY_32F_A0, 00387 1.3989796725383111402095281367152 * EXPPOLY_32F_A0, 00388 1.4142135623730950488016887242097 * EXPPOLY_32F_A0, 00389 1.4296133383919700112350657782751 * EXPPOLY_32F_A0, 00390 1.4451808069770466200370062414717 * EXPPOLY_32F_A0, 00391 1.4609177941806469886513028903106 * EXPPOLY_32F_A0, 00392 1.476826145939499311386907480374 * EXPPOLY_32F_A0, 00393 1.4929077282912648492006435314867 * EXPPOLY_32F_A0, 00394 1.5091644275934227397660195510332 * EXPPOLY_32F_A0, 00395 1.5255981507445383068512536895169 * EXPPOLY_32F_A0, 00396 1.5422108254079408236122918620907 * EXPPOLY_32F_A0, 00397 1.5590044002378369670337280894749 * EXPPOLY_32F_A0, 00398 1.5759808451078864864552701601819 * EXPPOLY_32F_A0, 00399 1.5931421513422668979372486431191 * EXPPOLY_32F_A0, 00400 1.6104903319492543081795206673574 * EXPPOLY_32F_A0, 00401 1.628027421857347766848218522014 * EXPPOLY_32F_A0, 00402 1.6457554781539648445187567247258 * EXPPOLY_32F_A0, 00403 1.6636765803267364350463364569764 * EXPPOLY_32F_A0, 00404 1.6817928305074290860622509524664 * EXPPOLY_32F_A0, 00405 1.7001063537185234695013625734975 * EXPPOLY_32F_A0, 00406 1.7186192981224779156293443764563 * EXPPOLY_32F_A0, 00407 1.7373338352737062489942020818722 * EXPPOLY_32F_A0, 00408 1.7562521603732994831121606193753 * EXPPOLY_32F_A0, 00409 1.7753764925265212525505592001993 * EXPPOLY_32F_A0, 00410 1.7947090750031071864277032421278 * EXPPOLY_32F_A0, 00411 1.8142521755003987562498346003623 * EXPPOLY_32F_A0, 00412 1.8340080864093424634870831895883 * EXPPOLY_32F_A0, 00413 1.8539791250833855683924530703377 * EXPPOLY_32F_A0, 00414 1.8741676341102999013299989499544 * EXPPOLY_32F_A0, 00415 1.8945759815869656413402186534269 * EXPPOLY_32F_A0, 00416 1.9152065613971472938726112702958 * EXPPOLY_32F_A0, 00417 1.9360617934922944505980559045667 * EXPPOLY_32F_A0, 00418 1.9571441241754002690183222516269 * EXPPOLY_32F_A0, 00419 1.9784560263879509682582499181312 * EXPPOLY_32F_A0, 00420 }; 00421 00422 00423 // the code below uses _mm_cast* intrinsics, which are not avialable on VS2005 00424 #if (defined _MSC_VER && _MSC_VER < 1500) || \ 00425 (!defined __APPLE__ && defined __GNUC__ && __GNUC__*100 + __GNUC_MINOR__ < 402) 00426 #undef CV_SSE2 00427 #define CV_SSE2 0 00428 #endif 00429 00430 static const double exp_prescale = 1.4426950408889634073599246810019 * (1 << EXPTAB_SCALE); 00431 static const double exp_postscale = 1./(1 << EXPTAB_SCALE); 00432 static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) < 3000 00433 00434 void exp32f( const float *_x, float *y, int n ) 00435 { 00436 static const float 00437 A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0), 00438 A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0), 00439 A2 = (float)(.2402265109513301490103372422686535526573 / EXPPOLY_32F_A0), 00440 A1 = (float)(.5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0); 00441 00442 #undef EXPPOLY 00443 #define EXPPOLY(x) \ 00444 (((((x) + A1)*(x) + A2)*(x) + A3)*(x) + A4) 00445 00446 int i = 0; 00447 const Cv32suf* x = (const Cv32suf*)_x; 00448 Cv32suf buf[4]; 00449 00450 #if CV_SSE2 00451 if( n >= 8 ) 00452 { 00453 static const __m128d prescale2 = _mm_set1_pd(exp_prescale); 00454 static const __m128 postscale4 = _mm_set1_ps((float)exp_postscale); 00455 static const __m128 maxval4 = _mm_set1_ps((float)(exp_max_val/exp_prescale)); 00456 static const __m128 minval4 = _mm_set1_ps((float)(-exp_max_val/exp_prescale)); 00457 00458 static const __m128 mA1 = _mm_set1_ps(A1); 00459 static const __m128 mA2 = _mm_set1_ps(A2); 00460 static const __m128 mA3 = _mm_set1_ps(A3); 00461 static const __m128 mA4 = _mm_set1_ps(A4); 00462 bool y_aligned = (size_t)(void*)y % 16 == 0; 00463 00464 ushort CV_DECL_ALIGNED(16) tab_idx[8]; 00465 00466 for( ; i <= n - 8; i += 8 ) 00467 { 00468 __m128 xf0, xf1; 00469 xf0 = _mm_loadu_ps(&x[i].f); 00470 xf1 = _mm_loadu_ps(&x[i+4].f); 00471 __m128i xi0, xi1, xi2, xi3; 00472 00473 xf0 = _mm_min_ps(_mm_max_ps(xf0, minval4), maxval4); 00474 xf1 = _mm_min_ps(_mm_max_ps(xf1, minval4), maxval4); 00475 00476 __m128d xd0 = _mm_cvtps_pd(xf0); 00477 __m128d xd2 = _mm_cvtps_pd(_mm_movehl_ps(xf0, xf0)); 00478 __m128d xd1 = _mm_cvtps_pd(xf1); 00479 __m128d xd3 = _mm_cvtps_pd(_mm_movehl_ps(xf1, xf1)); 00480 00481 xd0 = _mm_mul_pd(xd0, prescale2); 00482 xd2 = _mm_mul_pd(xd2, prescale2); 00483 xd1 = _mm_mul_pd(xd1, prescale2); 00484 xd3 = _mm_mul_pd(xd3, prescale2); 00485 00486 xi0 = _mm_cvtpd_epi32(xd0); 00487 xi2 = _mm_cvtpd_epi32(xd2); 00488 00489 xi1 = _mm_cvtpd_epi32(xd1); 00490 xi3 = _mm_cvtpd_epi32(xd3); 00491 00492 xd0 = _mm_sub_pd(xd0, _mm_cvtepi32_pd(xi0)); 00493 xd2 = _mm_sub_pd(xd2, _mm_cvtepi32_pd(xi2)); 00494 xd1 = _mm_sub_pd(xd1, _mm_cvtepi32_pd(xi1)); 00495 xd3 = _mm_sub_pd(xd3, _mm_cvtepi32_pd(xi3)); 00496 00497 xf0 = _mm_movelh_ps(_mm_cvtpd_ps(xd0), _mm_cvtpd_ps(xd2)); 00498 xf1 = _mm_movelh_ps(_mm_cvtpd_ps(xd1), _mm_cvtpd_ps(xd3)); 00499 00500 xf0 = _mm_mul_ps(xf0, postscale4); 00501 xf1 = _mm_mul_ps(xf1, postscale4); 00502 00503 xi0 = _mm_unpacklo_epi64(xi0, xi2); 00504 xi1 = _mm_unpacklo_epi64(xi1, xi3); 00505 xi0 = _mm_packs_epi32(xi0, xi1); 00506 00507 _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi16(EXPTAB_MASK))); 00508 00509 xi0 = _mm_add_epi16(_mm_srai_epi16(xi0, EXPTAB_SCALE), _mm_set1_epi16(127)); 00510 xi0 = _mm_max_epi16(xi0, _mm_setzero_si128()); 00511 xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(255)); 00512 xi1 = _mm_unpackhi_epi16(xi0, _mm_setzero_si128()); 00513 xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128()); 00514 00515 __m128d yd0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1])); 00516 __m128d yd1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3])); 00517 __m128d yd2 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[4]), _mm_load_sd(expTab + tab_idx[5])); 00518 __m128d yd3 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[6]), _mm_load_sd(expTab + tab_idx[7])); 00519 00520 __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1)); 00521 __m128 yf1 = _mm_movelh_ps(_mm_cvtpd_ps(yd2), _mm_cvtpd_ps(yd3)); 00522 00523 yf0 = _mm_mul_ps(yf0, _mm_castsi128_ps(_mm_slli_epi32(xi0, 23))); 00524 yf1 = _mm_mul_ps(yf1, _mm_castsi128_ps(_mm_slli_epi32(xi1, 23))); 00525 00526 __m128 zf0 = _mm_add_ps(xf0, mA1); 00527 __m128 zf1 = _mm_add_ps(xf1, mA1); 00528 00529 zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA2); 00530 zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA2); 00531 00532 zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA3); 00533 zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA3); 00534 00535 zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA4); 00536 zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA4); 00537 00538 zf0 = _mm_mul_ps(zf0, yf0); 00539 zf1 = _mm_mul_ps(zf1, yf1); 00540 00541 if( y_aligned ) 00542 { 00543 _mm_store_ps(y + i, zf0); 00544 _mm_store_ps(y + i + 4, zf1); 00545 } 00546 else 00547 { 00548 _mm_storeu_ps(y + i, zf0); 00549 _mm_storeu_ps(y + i + 4, zf1); 00550 } 00551 } 00552 } 00553 else 00554 #endif 00555 for( ; i <= n - 4; i += 4 ) 00556 { 00557 double x0 = x[i].f * exp_prescale; 00558 double x1 = x[i + 1].f * exp_prescale; 00559 double x2 = x[i + 2].f * exp_prescale; 00560 double x3 = x[i + 3].f * exp_prescale; 00561 int val0, val1, val2, val3, t; 00562 00563 if( ((x[i].i >> 23) & 255) > 127 + 10 ) 00564 x0 = x[i].i < 0 ? -exp_max_val : exp_max_val; 00565 00566 if( ((x[i+1].i >> 23) & 255) > 127 + 10 ) 00567 x1 = x[i+1].i < 0 ? -exp_max_val : exp_max_val; 00568 00569 if( ((x[i+2].i >> 23) & 255) > 127 + 10 ) 00570 x2 = x[i+2].i < 0 ? -exp_max_val : exp_max_val; 00571 00572 if( ((x[i+3].i >> 23) & 255) > 127 + 10 ) 00573 x3 = x[i+3].i < 0 ? -exp_max_val : exp_max_val; 00574 00575 val0 = cvRound(x0); 00576 val1 = cvRound(x1); 00577 val2 = cvRound(x2); 00578 val3 = cvRound(x3); 00579 00580 x0 = (x0 - val0)*exp_postscale; 00581 x1 = (x1 - val1)*exp_postscale; 00582 x2 = (x2 - val2)*exp_postscale; 00583 x3 = (x3 - val3)*exp_postscale; 00584 00585 t = (val0 >> EXPTAB_SCALE) + 127; 00586 t = !(t & ~255) ? t : t < 0 ? 0 : 255; 00587 buf[0].i = t << 23; 00588 00589 t = (val1 >> EXPTAB_SCALE) + 127; 00590 t = !(t & ~255) ? t : t < 0 ? 0 : 255; 00591 buf[1].i = t << 23; 00592 00593 t = (val2 >> EXPTAB_SCALE) + 127; 00594 t = !(t & ~255) ? t : t < 0 ? 0 : 255; 00595 buf[2].i = t << 23; 00596 00597 t = (val3 >> EXPTAB_SCALE) + 127; 00598 t = !(t & ~255) ? t : t < 0 ? 0 : 255; 00599 buf[3].i = t << 23; 00600 00601 x0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 ); 00602 x1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 ); 00603 00604 y[i] = (float)x0; 00605 y[i + 1] = (float)x1; 00606 00607 x2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 ); 00608 x3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 ); 00609 00610 y[i + 2] = (float)x2; 00611 y[i + 3] = (float)x3; 00612 } 00613 00614 for( ; i < n; i++ ) 00615 { 00616 double x0 = x[i].f * exp_prescale; 00617 int val0, t; 00618 00619 if( ((x[i].i >> 23) & 255) > 127 + 10 ) 00620 x0 = x[i].i < 0 ? -exp_max_val : exp_max_val; 00621 00622 val0 = cvRound(x0); 00623 t = (val0 >> EXPTAB_SCALE) + 127; 00624 t = !(t & ~255) ? t : t < 0 ? 0 : 255; 00625 00626 buf[0].i = t << 23; 00627 x0 = (x0 - val0)*exp_postscale; 00628 00629 y[i] = (float)(buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY(x0)); 00630 } 00631 } 00632 00633 void exp64f( const double *_x, double *y, int n ) 00634 { 00635 static const double 00636 A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0, 00637 A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0, 00638 A3 = .24022650695886477918181338054308 / EXPPOLY_32F_A0, 00639 A2 = .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0, 00640 A1 = .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0, 00641 A0 = .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0; 00642 00643 #undef EXPPOLY 00644 #define EXPPOLY(x) (((((A0*(x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)*(x) + A5) 00645 00646 int i = 0; 00647 Cv64suf buf[4]; 00648 const Cv64suf* x = (const Cv64suf*)_x; 00649 00650 #if CV_SSE2 00651 static const __m128d prescale2 = _mm_set1_pd(exp_prescale); 00652 static const __m128d postscale2 = _mm_set1_pd(exp_postscale); 00653 static const __m128d maxval2 = _mm_set1_pd(exp_max_val); 00654 static const __m128d minval2 = _mm_set1_pd(-exp_max_val); 00655 00656 static const __m128d mA0 = _mm_set1_pd(A0); 00657 static const __m128d mA1 = _mm_set1_pd(A1); 00658 static const __m128d mA2 = _mm_set1_pd(A2); 00659 static const __m128d mA3 = _mm_set1_pd(A3); 00660 static const __m128d mA4 = _mm_set1_pd(A4); 00661 static const __m128d mA5 = _mm_set1_pd(A5); 00662 00663 int CV_DECL_ALIGNED(16) tab_idx[4]; 00664 00665 for( ; i <= n - 4; i += 4 ) 00666 { 00667 __m128d xf0 = _mm_loadu_pd(&x[i].f), xf1 = _mm_loadu_pd(&x[i+2].f); 00668 __m128i xi0, xi1; 00669 xf0 = _mm_min_pd(_mm_max_pd(xf0, minval2), maxval2); 00670 xf1 = _mm_min_pd(_mm_max_pd(xf1, minval2), maxval2); 00671 xf0 = _mm_mul_pd(xf0, prescale2); 00672 xf1 = _mm_mul_pd(xf1, prescale2); 00673 00674 xi0 = _mm_cvtpd_epi32(xf0); 00675 xi1 = _mm_cvtpd_epi32(xf1); 00676 xf0 = _mm_mul_pd(_mm_sub_pd(xf0, _mm_cvtepi32_pd(xi0)), postscale2); 00677 xf1 = _mm_mul_pd(_mm_sub_pd(xf1, _mm_cvtepi32_pd(xi1)), postscale2); 00678 00679 xi0 = _mm_unpacklo_epi64(xi0, xi1); 00680 _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi32(EXPTAB_MASK))); 00681 00682 xi0 = _mm_add_epi32(_mm_srai_epi32(xi0, EXPTAB_SCALE), _mm_set1_epi32(1023)); 00683 xi0 = _mm_packs_epi32(xi0, xi0); 00684 xi0 = _mm_max_epi16(xi0, _mm_setzero_si128()); 00685 xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(2047)); 00686 xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128()); 00687 xi1 = _mm_unpackhi_epi32(xi0, _mm_setzero_si128()); 00688 xi0 = _mm_unpacklo_epi32(xi0, _mm_setzero_si128()); 00689 00690 __m128d yf0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1])); 00691 __m128d yf1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3])); 00692 yf0 = _mm_mul_pd(yf0, _mm_castsi128_pd(_mm_slli_epi64(xi0, 52))); 00693 yf1 = _mm_mul_pd(yf1, _mm_castsi128_pd(_mm_slli_epi64(xi1, 52))); 00694 00695 __m128d zf0 = _mm_add_pd(_mm_mul_pd(mA0, xf0), mA1); 00696 __m128d zf1 = _mm_add_pd(_mm_mul_pd(mA0, xf1), mA1); 00697 00698 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA2); 00699 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA2); 00700 00701 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA3); 00702 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA3); 00703 00704 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA4); 00705 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA4); 00706 00707 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA5); 00708 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA5); 00709 00710 zf0 = _mm_mul_pd(zf0, yf0); 00711 zf1 = _mm_mul_pd(zf1, yf1); 00712 00713 _mm_storeu_pd(y + i, zf0); 00714 _mm_storeu_pd(y + i + 2, zf1); 00715 } 00716 #endif 00717 for( ; i <= n - 4; i += 4 ) 00718 { 00719 double x0 = x[i].f * exp_prescale; 00720 double x1 = x[i + 1].f * exp_prescale; 00721 double x2 = x[i + 2].f * exp_prescale; 00722 double x3 = x[i + 3].f * exp_prescale; 00723 00724 double y0, y1, y2, y3; 00725 int val0, val1, val2, val3, t; 00726 00727 t = (int)(x[i].i >> 52); 00728 if( (t & 2047) > 1023 + 10 ) 00729 x0 = t < 0 ? -exp_max_val : exp_max_val; 00730 00731 t = (int)(x[i+1].i >> 52); 00732 if( (t & 2047) > 1023 + 10 ) 00733 x1 = t < 0 ? -exp_max_val : exp_max_val; 00734 00735 t = (int)(x[i+2].i >> 52); 00736 if( (t & 2047) > 1023 + 10 ) 00737 x2 = t < 0 ? -exp_max_val : exp_max_val; 00738 00739 t = (int)(x[i+3].i >> 52); 00740 if( (t & 2047) > 1023 + 10 ) 00741 x3 = t < 0 ? -exp_max_val : exp_max_val; 00742 00743 val0 = cvRound(x0); 00744 val1 = cvRound(x1); 00745 val2 = cvRound(x2); 00746 val3 = cvRound(x3); 00747 00748 x0 = (x0 - val0)*exp_postscale; 00749 x1 = (x1 - val1)*exp_postscale; 00750 x2 = (x2 - val2)*exp_postscale; 00751 x3 = (x3 - val3)*exp_postscale; 00752 00753 t = (val0 >> EXPTAB_SCALE) + 1023; 00754 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047; 00755 buf[0].i = (int64)t << 52; 00756 00757 t = (val1 >> EXPTAB_SCALE) + 1023; 00758 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047; 00759 buf[1].i = (int64)t << 52; 00760 00761 t = (val2 >> EXPTAB_SCALE) + 1023; 00762 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047; 00763 buf[2].i = (int64)t << 52; 00764 00765 t = (val3 >> EXPTAB_SCALE) + 1023; 00766 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047; 00767 buf[3].i = (int64)t << 52; 00768 00769 y0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 ); 00770 y1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 ); 00771 00772 y[i] = y0; 00773 y[i + 1] = y1; 00774 00775 y2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 ); 00776 y3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 ); 00777 00778 y[i + 2] = y2; 00779 y[i + 3] = y3; 00780 } 00781 00782 for( ; i < n; i++ ) 00783 { 00784 double x0 = x[i].f * exp_prescale; 00785 int val0, t; 00786 00787 t = (int)(x[i].i >> 52); 00788 if( (t & 2047) > 1023 + 10 ) 00789 x0 = t < 0 ? -exp_max_val : exp_max_val; 00790 00791 val0 = cvRound(x0); 00792 t = (val0 >> EXPTAB_SCALE) + 1023; 00793 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047; 00794 00795 buf[0].i = (int64)t << 52; 00796 x0 = (x0 - val0)*exp_postscale; 00797 00798 y[i] = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 ); 00799 } 00800 } 00801 00802 #undef EXPTAB_SCALE 00803 #undef EXPTAB_MASK 00804 #undef EXPPOLY_32F_A0 00805 00806 /////////////////////////////////////////// LOG /////////////////////////////////////// 00807 00808 #define LOGTAB_SCALE 8 00809 #define LOGTAB_MASK ((1 << LOGTAB_SCALE) - 1) 00810 #define LOGTAB_MASK2 ((1 << (20 - LOGTAB_SCALE)) - 1) 00811 #define LOGTAB_MASK2_32F ((1 << (23 - LOGTAB_SCALE)) - 1) 00812 00813 static const double CV_DECL_ALIGNED(16) icvLogTab[] = { 00814 0.0000000000000000000000000000000000000000, 1.000000000000000000000000000000000000000, 00815 .00389864041565732288852075271279318258166, .9961089494163424124513618677042801556420, 00816 .00778214044205494809292034119607706088573, .9922480620155038759689922480620155038760, 00817 .01165061721997527263705585198749759001657, .9884169884169884169884169884169884169884, 00818 .01550418653596525274396267235488267033361, .9846153846153846153846153846153846153846, 00819 .01934296284313093139406447562578250654042, .9808429118773946360153256704980842911877, 00820 .02316705928153437593630670221500622574241, .9770992366412213740458015267175572519084, 00821 .02697658769820207233514075539915211265906, .9733840304182509505703422053231939163498, 00822 .03077165866675368732785500469617545604706, .9696969696969696969696969696969696969697, 00823 .03455238150665972812758397481047722976656, .9660377358490566037735849056603773584906, 00824 .03831886430213659461285757856785494368522, .9624060150375939849624060150375939849624, 00825 .04207121392068705056921373852674150839447, .9588014981273408239700374531835205992509, 00826 .04580953603129420126371940114040626212953, .9552238805970149253731343283582089552239, 00827 .04953393512227662748292900118940451648088, .9516728624535315985130111524163568773234, 00828 .05324451451881227759255210685296333394944, .9481481481481481481481481481481481481481, 00829 .05694137640013842427411105973078520037234, .9446494464944649446494464944649446494465, 00830 .06062462181643483993820353816772694699466, .9411764705882352941176470588235294117647, 00831 .06429435070539725460836422143984236754475, .9377289377289377289377289377289377289377, 00832 .06795066190850773679699159401934593915938, .9343065693430656934306569343065693430657, 00833 .07159365318700880442825962290953611955044, .9309090909090909090909090909090909090909, 00834 .07522342123758751775142172846244648098944, .9275362318840579710144927536231884057971, 00835 .07884006170777602129362549021607264876369, .9241877256317689530685920577617328519856, 00836 .08244366921107458556772229485432035289706, .9208633093525179856115107913669064748201, 00837 .08603433734180314373940490213499288074675, .9175627240143369175627240143369175627240, 00838 .08961215868968712416897659522874164395031, .9142857142857142857142857142857142857143, 00839 .09317722485418328259854092721070628613231, .9110320284697508896797153024911032028470, 00840 .09672962645855109897752299730200320482256, .9078014184397163120567375886524822695035, 00841 .10026945316367513738597949668474029749630, .9045936395759717314487632508833922261484, 00842 .10379679368164355934833764649738441221420, .9014084507042253521126760563380281690141, 00843 .10731173578908805021914218968959175981580, .8982456140350877192982456140350877192982, 00844 .11081436634029011301105782649756292812530, .8951048951048951048951048951048951048951, 00845 .11430477128005862852422325204315711744130, .8919860627177700348432055749128919860627, 00846 .11778303565638344185817487641543266363440, .8888888888888888888888888888888888888889, 00847 .12124924363286967987640707633545389398930, .8858131487889273356401384083044982698962, 00848 .12470347850095722663787967121606925502420, .8827586206896551724137931034482758620690, 00849 .12814582269193003360996385708858724683530, .8797250859106529209621993127147766323024, 00850 .13157635778871926146571524895989568904040, .8767123287671232876712328767123287671233, 00851 .13499516453750481925766280255629681050780, .8737201365187713310580204778156996587031, 00852 .13840232285911913123754857224412262439730, .8707482993197278911564625850340136054422, 00853 .14179791186025733629172407290752744302150, .8677966101694915254237288135593220338983, 00854 .14518200984449788903951628071808954700830, .8648648648648648648648648648648648648649, 00855 .14855469432313711530824207329715136438610, .8619528619528619528619528619528619528620, 00856 .15191604202584196858794030049466527998450, .8590604026845637583892617449664429530201, 00857 .15526612891112392955683674244937719777230, .8561872909698996655518394648829431438127, 00858 .15860503017663857283636730244325008243330, .8533333333333333333333333333333333333333, 00859 .16193282026931324346641360989451641216880, .8504983388704318936877076411960132890365, 00860 .16524957289530714521497145597095368430010, .8476821192052980132450331125827814569536, 00861 .16855536102980664403538924034364754334090, .8448844884488448844884488448844884488449, 00862 .17185025692665920060697715143760433420540, .8421052631578947368421052631578947368421, 00863 .17513433212784912385018287750426679849630, .8393442622950819672131147540983606557377, 00864 .17840765747281828179637841458315961062910, .8366013071895424836601307189542483660131, 00865 .18167030310763465639212199675966985523700, .8338762214983713355048859934853420195440, 00866 .18492233849401198964024217730184318497780, .8311688311688311688311688311688311688312, 00867 .18816383241818296356839823602058459073300, .8284789644012944983818770226537216828479, 00868 .19139485299962943898322009772527962923050, .8258064516129032258064516129032258064516, 00869 .19461546769967164038916962454095482826240, .8231511254019292604501607717041800643087, 00870 .19782574332991986754137769821682013571260, .8205128205128205128205128205128205128205, 00871 .20102574606059073203390141770796617493040, .8178913738019169329073482428115015974441, 00872 .20421554142869088876999228432396193966280, .8152866242038216560509554140127388535032, 00873 .20739519434607056602715147164417430758480, .8126984126984126984126984126984126984127, 00874 .21056476910734961416338251183333341032260, .8101265822784810126582278481012658227848, 00875 .21372432939771812687723695489694364368910, .8075709779179810725552050473186119873817, 00876 .21687393830061435506806333251006435602900, .8050314465408805031446540880503144654088, 00877 .22001365830528207823135744547471404075630, .8025078369905956112852664576802507836991, 00878 .22314355131420973710199007200571941211830, .8000000000000000000000000000000000000000, 00879 .22626367865045338145790765338460914790630, .7975077881619937694704049844236760124611, 00880 .22937410106484582006380890106811420992010, .7950310559006211180124223602484472049689, 00881 .23247487874309405442296849741978803649550, .7925696594427244582043343653250773993808, 00882 .23556607131276688371634975283086532726890, .7901234567901234567901234567901234567901, 00883 .23864773785017498464178231643018079921600, .7876923076923076923076923076923076923077, 00884 .24171993688714515924331749374687206000090, .7852760736196319018404907975460122699387, 00885 .24478272641769091566565919038112042471760, .7828746177370030581039755351681957186544, 00886 .24783616390458124145723672882013488560910, .7804878048780487804878048780487804878049, 00887 .25088030628580937353433455427875742316250, .7781155015197568389057750759878419452888, 00888 .25391520998096339667426946107298135757450, .7757575757575757575757575757575757575758, 00889 .25694093089750041913887912414793390780680, .7734138972809667673716012084592145015106, 00890 .25995752443692604627401010475296061486000, .7710843373493975903614457831325301204819, 00891 .26296504550088134477547896494797896593800, .7687687687687687687687687687687687687688, 00892 .26596354849713793599974565040611196309330, .7664670658682634730538922155688622754491, 00893 .26895308734550393836570947314612567424780, .7641791044776119402985074626865671641791, 00894 .27193371548364175804834985683555714786050, .7619047619047619047619047619047619047619, 00895 .27490548587279922676529508862586226314300, .7596439169139465875370919881305637982196, 00896 .27786845100345625159121709657483734190480, .7573964497041420118343195266272189349112, 00897 .28082266290088775395616949026589281857030, .7551622418879056047197640117994100294985, 00898 .28376817313064456316240580235898960381750, .7529411764705882352941176470588235294118, 00899 .28670503280395426282112225635501090437180, .7507331378299120234604105571847507331378, 00900 .28963329258304265634293983566749375313530, .7485380116959064327485380116959064327485, 00901 .29255300268637740579436012922087684273730, .7463556851311953352769679300291545189504, 00902 .29546421289383584252163927885703742504130, .7441860465116279069767441860465116279070, 00903 .29836697255179722709783618483925238251680, .7420289855072463768115942028985507246377, 00904 .30126133057816173455023545102449133992200, .7398843930635838150289017341040462427746, 00905 .30414733546729666446850615102448500692850, .7377521613832853025936599423631123919308, 00906 .30702503529491181888388950937951449304830, .7356321839080459770114942528735632183908, 00907 .30989447772286465854207904158101882785550, .7335243553008595988538681948424068767908, 00908 .31275571000389684739317885942000430077330, .7314285714285714285714285714285714285714, 00909 .31560877898630329552176476681779604405180, .7293447293447293447293447293447293447293, 00910 .31845373111853458869546784626436419785030, .7272727272727272727272727272727272727273, 00911 .32129061245373424782201254856772720813750, .7252124645892351274787535410764872521246, 00912 .32411946865421192853773391107097268104550, .7231638418079096045197740112994350282486, 00913 .32694034499585328257253991068864706903700, .7211267605633802816901408450704225352113, 00914 .32975328637246797969240219572384376078850, .7191011235955056179775280898876404494382, 00915 .33255833730007655635318997155991382896900, .7170868347338935574229691876750700280112, 00916 .33535554192113781191153520921943709254280, .7150837988826815642458100558659217877095, 00917 .33814494400871636381467055798566434532400, .7130919220055710306406685236768802228412, 00918 .34092658697059319283795275623560883104800, .7111111111111111111111111111111111111111, 00919 .34370051385331840121395430287520866841080, .7091412742382271468144044321329639889197, 00920 .34646676734620857063262633346312213689100, .7071823204419889502762430939226519337017, 00921 .34922538978528827602332285096053965389730, .7052341597796143250688705234159779614325, 00922 .35197642315717814209818925519357435405250, .7032967032967032967032967032967032967033, 00923 .35471990910292899856770532096561510115850, .7013698630136986301369863013698630136986, 00924 .35745588892180374385176833129662554711100, .6994535519125683060109289617486338797814, 00925 .36018440357500774995358483465679455548530, .6975476839237057220708446866485013623978, 00926 .36290549368936841911903457003063522279280, .6956521739130434782608695652173913043478, 00927 .36561919956096466943762379742111079394830, .6937669376693766937669376693766937669377, 00928 .36832556115870762614150635272380895912650, .6918918918918918918918918918918918918919, 00929 .37102461812787262962487488948681857436900, .6900269541778975741239892183288409703504, 00930 .37371640979358405898480555151763837784530, .6881720430107526881720430107526881720430, 00931 .37640097516425302659470730759494472295050, .6863270777479892761394101876675603217158, 00932 .37907835293496944251145919224654790014030, .6844919786096256684491978609625668449198, 00933 .38174858149084833769393299007788300514230, .6826666666666666666666666666666666666667, 00934 .38441169891033200034513583887019194662580, .6808510638297872340425531914893617021277, 00935 .38706774296844825844488013899535872042180, .6790450928381962864721485411140583554377, 00936 .38971675114002518602873692543653305619950, .6772486772486772486772486772486772486772, 00937 .39235876060286384303665840889152605086580, .6754617414248021108179419525065963060686, 00938 .39499380824086893770896722344332374632350, .6736842105263157894736842105263157894737, 00939 .39762193064713846624158577469643205404280, .6719160104986876640419947506561679790026, 00940 .40024316412701266276741307592601515352730, .6701570680628272251308900523560209424084, 00941 .40285754470108348090917615991202183067800, .6684073107049608355091383812010443864230, 00942 .40546510810816432934799991016916465014230, .6666666666666666666666666666666666666667, 00943 .40806588980822172674223224930756259709600, .6649350649350649350649350649350649350649, 00944 .41065992498526837639616360320360399782650, .6632124352331606217616580310880829015544, 00945 .41324724855021932601317757871584035456180, .6614987080103359173126614987080103359173, 00946 .41582789514371093497757669865677598863850, .6597938144329896907216494845360824742268, 00947 .41840189913888381489925905043492093682300, .6580976863753213367609254498714652956298, 00948 .42096929464412963239894338585145305842150, .6564102564102564102564102564102564102564, 00949 .42353011550580327293502591601281892508280, .6547314578005115089514066496163682864450, 00950 .42608439531090003260516141381231136620050, .6530612244897959183673469387755102040816, 00951 .42863216738969872610098832410585600882780, .6513994910941475826972010178117048346056, 00952 .43117346481837132143866142541810404509300, .6497461928934010152284263959390862944162, 00953 .43370832042155937902094819946796633303180, .6481012658227848101265822784810126582278, 00954 .43623676677491801667585491486534010618930, .6464646464646464646464646464646464646465, 00955 .43875883620762790027214350629947148263450, .6448362720403022670025188916876574307305, 00956 .44127456080487520440058801796112675219780, .6432160804020100502512562814070351758794, 00957 .44378397241030093089975139264424797147500, .6416040100250626566416040100250626566416, 00958 .44628710262841947420398014401143882423650, .6400000000000000000000000000000000000000, 00959 .44878398282700665555822183705458883196130, .6384039900249376558603491271820448877805, 00960 .45127464413945855836729492693848442286250, .6368159203980099502487562189054726368159, 00961 .45375911746712049854579618113348260521900, .6352357320099255583126550868486352357320, 00962 .45623743348158757315857769754074979573500, .6336633663366336633663366336633663366337, 00963 .45870962262697662081833982483658473938700, .6320987654320987654320987654320987654321, 00964 .46117571512217014895185229761409573256980, .6305418719211822660098522167487684729064, 00965 .46363574096303250549055974261136725544930, .6289926289926289926289926289926289926290, 00966 .46608972992459918316399125615134835243230, .6274509803921568627450980392156862745098, 00967 .46853771156323925639597405279346276074650, .6259168704156479217603911980440097799511, 00968 .47097971521879100631480241645476780831830, .6243902439024390243902439024390243902439, 00969 .47341577001667212165614273544633761048330, .6228710462287104622871046228710462287105, 00970 .47584590486996386493601107758877333253630, .6213592233009708737864077669902912621359, 00971 .47827014848147025860569669930555392056700, .6198547215496368038740920096852300242131, 00972 .48068852934575190261057286988943815231330, .6183574879227053140096618357487922705314, 00973 .48310107575113581113157579238759353756900, .6168674698795180722891566265060240963855, 00974 .48550781578170076890899053978500887751580, .6153846153846153846153846153846153846154, 00975 .48790877731923892879351001283794175833480, .6139088729016786570743405275779376498801, 00976 .49030398804519381705802061333088204264650, .6124401913875598086124401913875598086124, 00977 .49269347544257524607047571407747454941280, .6109785202863961813842482100238663484487, 00978 .49507726679785146739476431321236304938800, .6095238095238095238095238095238095238095, 00979 .49745538920281889838648226032091770321130, .6080760095011876484560570071258907363420, 00980 .49982786955644931126130359189119189977650, .6066350710900473933649289099526066350711, 00981 .50219473456671548383667413872899487614650, .6052009456264775413711583924349881796690, 00982 .50455601075239520092452494282042607665050, .6037735849056603773584905660377358490566, 00983 .50691172444485432801997148999362252652650, .6023529411764705882352941176470588235294, 00984 .50926190178980790257412536448100581765150, .6009389671361502347417840375586854460094, 00985 .51160656874906207391973111953120678663250, .5995316159250585480093676814988290398126, 00986 .51394575110223428282552049495279788970950, .5981308411214953271028037383177570093458, 00987 .51627947444845445623684554448118433356300, .5967365967365967365967365967365967365967, 00988 .51860776420804555186805373523384332656850, .5953488372093023255813953488372093023256, 00989 .52093064562418522900344441950437612831600, .5939675174013921113689095127610208816705, 00990 .52324814376454775732838697877014055848100, .5925925925925925925925925925925925925926, 00991 .52556028352292727401362526507000438869000, .5912240184757505773672055427251732101617, 00992 .52786708962084227803046587723656557500350, .5898617511520737327188940092165898617512, 00993 .53016858660912158374145519701414741575700, .5885057471264367816091954022988505747126, 00994 .53246479886947173376654518506256863474850, .5871559633027522935779816513761467889908, 00995 .53475575061602764748158733709715306758900, .5858123569794050343249427917620137299771, 00996 .53704146589688361856929077475797384977350, .5844748858447488584474885844748858447489, 00997 .53932196859560876944783558428753167390800, .5831435079726651480637813211845102505695, 00998 .54159728243274429804188230264117009937750, .5818181818181818181818181818181818181818, 00999 .54386743096728351609669971367111429572100, .5804988662131519274376417233560090702948, 01000 .54613243759813556721383065450936555862450, .5791855203619909502262443438914027149321, 01001 .54839232556557315767520321969641372561450, .5778781038374717832957110609480812641084, 01002 .55064711795266219063194057525834068655950, .5765765765765765765765765765765765765766, 01003 .55289683768667763352766542084282264113450, .5752808988764044943820224719101123595506, 01004 .55514150754050151093110798683483153581600, .5739910313901345291479820627802690582960, 01005 .55738115013400635344709144192165695130850, .5727069351230425055928411633109619686801, 01006 .55961578793542265941596269840374588966350, .5714285714285714285714285714285714285714, 01007 .56184544326269181269140062795486301183700, .5701559020044543429844097995545657015590, 01008 .56407013828480290218436721261241473257550, .5688888888888888888888888888888888888889, 01009 .56628989502311577464155334382667206227800, .5676274944567627494456762749445676274945, 01010 .56850473535266865532378233183408156037350, .5663716814159292035398230088495575221239, 01011 .57071468100347144680739575051120482385150, .5651214128035320088300220750551876379691, 01012 .57291975356178548306473885531886480748650, .5638766519823788546255506607929515418502, 01013 .57511997447138785144460371157038025558000, .5626373626373626373626373626373626373626, 01014 .57731536503482350219940144597785547375700, .5614035087719298245614035087719298245614, 01015 .57950594641464214795689713355386629700650, .5601750547045951859956236323851203501094, 01016 .58169173963462239562716149521293118596100, .5589519650655021834061135371179039301310, 01017 .58387276558098266665552955601015128195300, .5577342047930283224400871459694989106754, 01018 .58604904500357812846544902640744112432000, .5565217391304347826086956521739130434783, 01019 .58822059851708596855957011939608491957200, .5553145336225596529284164859002169197397, 01020 .59038744660217634674381770309992134571100, .5541125541125541125541125541125541125541, 01021 .59254960960667157898740242671919986605650, .5529157667386609071274298056155507559395, 01022 .59470710774669277576265358220553025603300, .5517241379310344827586206896551724137931, 01023 .59685996110779382384237123915227130055450, .5505376344086021505376344086021505376344, 01024 .59900818964608337768851242799428291618800, .5493562231759656652360515021459227467811, 01025 .60115181318933474940990890900138765573500, .5481798715203426124197002141327623126338, 01026 .60329085143808425240052883964381180703650, .5470085470085470085470085470085470085470, 01027 .60542532396671688843525771517306566238400, .5458422174840085287846481876332622601279, 01028 .60755525022454170969155029524699784815300, .5446808510638297872340425531914893617021, 01029 .60968064953685519036241657886421307921400, .5435244161358811040339702760084925690021, 01030 .61180154110599282990534675263916142284850, .5423728813559322033898305084745762711864, 01031 .61391794401237043121710712512140162289150, .5412262156448202959830866807610993657505, 01032 .61602987721551394351138242200249806046500, .5400843881856540084388185654008438818565, 01033 .61813735955507864705538167982012964785100, .5389473684210526315789473684210526315789, 01034 .62024040975185745772080281312810257077200, .5378151260504201680672268907563025210084, 01035 .62233904640877868441606324267922900617100, .5366876310272536687631027253668763102725, 01036 .62443328801189346144440150965237990021700, .5355648535564853556485355648535564853556, 01037 .62652315293135274476554741340805776417250, .5344467640918580375782881002087682672234, 01038 .62860865942237409420556559780379757285100, .5333333333333333333333333333333333333333, 01039 .63068982562619868570408243613201193511500, .5322245322245322245322245322245322245322, 01040 .63276666957103777644277897707070223987100, .5311203319502074688796680497925311203320, 01041 .63483920917301017716738442686619237065300, .5300207039337474120082815734989648033126, 01042 .63690746223706917739093569252872839570050, .5289256198347107438016528925619834710744, 01043 .63897144645792069983514238629140891134750, .5278350515463917525773195876288659793814, 01044 .64103117942093124081992527862894348800200, .5267489711934156378600823045267489711934, 01045 .64308667860302726193566513757104985415950, .5256673511293634496919917864476386036961, 01046 .64513796137358470073053240412264131009600, .5245901639344262295081967213114754098361, 01047 .64718504499530948859131740391603671014300, .5235173824130879345603271983640081799591, 01048 .64922794662510974195157587018911726772800, .5224489795918367346938775510204081632653, 01049 .65126668331495807251485530287027359008800, .5213849287169042769857433808553971486762, 01050 .65330127201274557080523663898929953575150, .5203252032520325203252032520325203252033, 01051 .65533172956312757406749369692988693714150, .5192697768762677484787018255578093306288, 01052 .65735807270835999727154330685152672231200, .5182186234817813765182186234817813765182, 01053 .65938031808912778153342060249997302889800, .5171717171717171717171717171717171717172, 01054 .66139848224536490484126716182800009846700, .5161290322580645161290322580645161290323, 01055 .66341258161706617713093692145776003599150, .5150905432595573440643863179074446680080, 01056 .66542263254509037562201001492212526500250, .5140562248995983935742971887550200803213, 01057 .66742865127195616370414654738851822912700, .5130260521042084168336673346693386773547, 01058 .66943065394262923906154583164607174694550, .5120000000000000000000000000000000000000, 01059 .67142865660530226534774556057527661323550, .5109780439121756487025948103792415169661, 01060 .67342267521216669923234121597488410770900, .5099601593625498007968127490039840637450, 01061 .67541272562017662384192817626171745359900, .5089463220675944333996023856858846918489, 01062 .67739882359180603188519853574689477682100, .5079365079365079365079365079365079365079, 01063 .67938098479579733801614338517538271844400, .5069306930693069306930693069306930693069, 01064 .68135922480790300781450241629499942064300, .5059288537549407114624505928853754940711, 01065 .68333355911162063645036823800182901322850, .5049309664694280078895463510848126232742, 01066 .68530400309891936760919861626462079584600, .5039370078740157480314960629921259842520, 01067 .68727057207096020619019327568821609020250, .5029469548133595284872298624754420432220, 01068 .68923328123880889251040571252815425395950, .5019607843137254901960784313725490196078, 01069 .69314718055994530941723212145818, 5.0e-01, 01070 }; 01071 01072 01073 01074 #define LOGTAB_TRANSLATE(x,h) (((x) - 1.)*icvLogTab[(h)+1]) 01075 static const double ln_2 = 0.69314718055994530941723212145818; 01076 01077 void log32f( const float *_x, float *y, int n ) 01078 { 01079 static const float shift[] = { 0, -1.f/512 }; 01080 static const float 01081 A0 = 0.3333333333333333333333333f, 01082 A1 = -0.5f, 01083 A2 = 1.f; 01084 01085 #undef LOGPOLY 01086 #define LOGPOLY(x) (((A0*(x) + A1)*(x) + A2)*(x)) 01087 01088 int i = 0; 01089 Cv32suf buf[4]; 01090 const int* x = (const int*)_x; 01091 01092 #if CV_SSE2 01093 static const __m128d ln2_2 = _mm_set1_pd(ln_2); 01094 static const __m128 _1_4 = _mm_set1_ps(1.f); 01095 static const __m128 shift4 = _mm_set1_ps(-1.f/512); 01096 01097 static const __m128 mA0 = _mm_set1_ps(A0); 01098 static const __m128 mA1 = _mm_set1_ps(A1); 01099 static const __m128 mA2 = _mm_set1_ps(A2); 01100 01101 int CV_DECL_ALIGNED(16) idx[4]; 01102 01103 for( ; i <= n - 4; i += 4 ) 01104 { 01105 __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i)); 01106 __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 23), _mm_set1_epi32(255)), _mm_set1_epi32(127)); 01107 __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2); 01108 __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0,yi0)), ln2_2); 01109 01110 __m128i xi0 = _mm_or_si128(_mm_and_si128(h0, _mm_set1_epi32(LOGTAB_MASK2_32F)), _mm_set1_epi32(127 << 23)); 01111 01112 h0 = _mm_and_si128(_mm_srli_epi32(h0, 23 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK*2)); 01113 _mm_store_si128((__m128i*)idx, h0); 01114 h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510)); 01115 01116 __m128d t0, t1, t2, t3, t4; 01117 t0 = _mm_load_pd(icvLogTab + idx[0]); 01118 t2 = _mm_load_pd(icvLogTab + idx[1]); 01119 t1 = _mm_unpackhi_pd(t0, t2); 01120 t0 = _mm_unpacklo_pd(t0, t2); 01121 t2 = _mm_load_pd(icvLogTab + idx[2]); 01122 t4 = _mm_load_pd(icvLogTab + idx[3]); 01123 t3 = _mm_unpackhi_pd(t2, t4); 01124 t2 = _mm_unpacklo_pd(t2, t4); 01125 01126 yd0 = _mm_add_pd(yd0, t0); 01127 yd1 = _mm_add_pd(yd1, t2); 01128 01129 __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1)); 01130 01131 __m128 xf0 = _mm_sub_ps(_mm_castsi128_ps(xi0), _1_4); 01132 xf0 = _mm_mul_ps(xf0, _mm_movelh_ps(_mm_cvtpd_ps(t1), _mm_cvtpd_ps(t3))); 01133 xf0 = _mm_add_ps(xf0, _mm_and_ps(_mm_castsi128_ps(h0), shift4)); 01134 01135 __m128 zf0 = _mm_mul_ps(xf0, mA0); 01136 zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA1), xf0); 01137 zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA2), xf0); 01138 yf0 = _mm_add_ps(yf0, zf0); 01139 01140 _mm_storeu_ps(y + i, yf0); 01141 } 01142 #endif 01143 for( ; i <= n - 4; i += 4 ) 01144 { 01145 double x0, x1, x2, x3; 01146 double y0, y1, y2, y3; 01147 int h0, h1, h2, h3; 01148 01149 h0 = x[i]; 01150 h1 = x[i+1]; 01151 buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23); 01152 buf[1].i = (h1 & LOGTAB_MASK2_32F) | (127 << 23); 01153 01154 y0 = (((h0 >> 23) & 0xff) - 127) * ln_2; 01155 y1 = (((h1 >> 23) & 0xff) - 127) * ln_2; 01156 01157 h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01158 h1 = (h1 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01159 01160 y0 += icvLogTab[h0]; 01161 y1 += icvLogTab[h1]; 01162 01163 h2 = x[i+2]; 01164 h3 = x[i+3]; 01165 01166 x0 = LOGTAB_TRANSLATE( buf[0].f, h0 ); 01167 x1 = LOGTAB_TRANSLATE( buf[1].f, h1 ); 01168 01169 buf[2].i = (h2 & LOGTAB_MASK2_32F) | (127 << 23); 01170 buf[3].i = (h3 & LOGTAB_MASK2_32F) | (127 << 23); 01171 01172 y2 = (((h2 >> 23) & 0xff) - 127) * ln_2; 01173 y3 = (((h3 >> 23) & 0xff) - 127) * ln_2; 01174 01175 h2 = (h2 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01176 h3 = (h3 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01177 01178 y2 += icvLogTab[h2]; 01179 y3 += icvLogTab[h3]; 01180 01181 x2 = LOGTAB_TRANSLATE( buf[2].f, h2 ); 01182 x3 = LOGTAB_TRANSLATE( buf[3].f, h3 ); 01183 01184 x0 += shift[h0 == 510]; 01185 x1 += shift[h1 == 510]; 01186 y0 += LOGPOLY( x0 ); 01187 y1 += LOGPOLY( x1 ); 01188 01189 y[i] = (float) y0; 01190 y[i + 1] = (float) y1; 01191 01192 x2 += shift[h2 == 510]; 01193 x3 += shift[h3 == 510]; 01194 y2 += LOGPOLY( x2 ); 01195 y3 += LOGPOLY( x3 ); 01196 01197 y[i + 2] = (float) y2; 01198 y[i + 3] = (float) y3; 01199 } 01200 01201 for( ; i < n; i++ ) 01202 { 01203 int h0 = x[i]; 01204 double y0; 01205 float x0; 01206 01207 y0 = (((h0 >> 23) & 0xff) - 127) * ln_2; 01208 01209 buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23); 01210 h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01211 01212 y0 += icvLogTab[h0]; 01213 x0 = (float)LOGTAB_TRANSLATE( buf[0].f, h0 ); 01214 x0 += shift[h0 == 510]; 01215 y0 += LOGPOLY( x0 ); 01216 01217 y[i] = (float)y0; 01218 } 01219 } 01220 01221 void log64f( const double *x, double *y, int n ) 01222 { 01223 static const double shift[] = { 0, -1./512 }; 01224 static const double 01225 A7 = 1.0, 01226 A6 = -0.5, 01227 A5 = 0.333333333333333314829616256247390992939472198486328125, 01228 A4 = -0.25, 01229 A3 = 0.2, 01230 A2 = -0.1666666666666666574148081281236954964697360992431640625, 01231 A1 = 0.1428571428571428769682682968777953647077083587646484375, 01232 A0 = -0.125; 01233 01234 #undef LOGPOLY 01235 #define LOGPOLY(x,k) ((x)+=shift[k], xq = (x)*(x),\ 01236 (((A0*xq + A2)*xq + A4)*xq + A6)*xq + \ 01237 (((A1*xq + A3)*xq + A5)*xq + A7)*(x)) 01238 01239 int i = 0; 01240 DBLINT buf[4]; 01241 DBLINT *X = (DBLINT *) x; 01242 01243 #if CV_SSE2 01244 static const __m128d ln2_2 = _mm_set1_pd(ln_2); 01245 static const __m128d _1_2 = _mm_set1_pd(1.); 01246 static const __m128d shift2 = _mm_set1_pd(-1./512); 01247 01248 static const __m128i log_and_mask2 = _mm_set_epi32(LOGTAB_MASK2, 0xffffffff, LOGTAB_MASK2, 0xffffffff); 01249 static const __m128i log_or_mask2 = _mm_set_epi32(1023 << 20, 0, 1023 << 20, 0); 01250 01251 static const __m128d mA0 = _mm_set1_pd(A0); 01252 static const __m128d mA1 = _mm_set1_pd(A1); 01253 static const __m128d mA2 = _mm_set1_pd(A2); 01254 static const __m128d mA3 = _mm_set1_pd(A3); 01255 static const __m128d mA4 = _mm_set1_pd(A4); 01256 static const __m128d mA5 = _mm_set1_pd(A5); 01257 static const __m128d mA6 = _mm_set1_pd(A6); 01258 static const __m128d mA7 = _mm_set1_pd(A7); 01259 01260 int CV_DECL_ALIGNED(16) idx[4]; 01261 01262 for( ; i <= n - 4; i += 4 ) 01263 { 01264 __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i)); 01265 __m128i h1 = _mm_loadu_si128((const __m128i*)(x + i + 2)); 01266 01267 __m128d xd0 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h0, log_and_mask2), log_or_mask2)); 01268 __m128d xd1 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h1, log_and_mask2), log_or_mask2)); 01269 01270 h0 = _mm_unpackhi_epi32(_mm_unpacklo_epi32(h0, h1), _mm_unpackhi_epi32(h0, h1)); 01271 01272 __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 20), 01273 _mm_set1_epi32(2047)), _mm_set1_epi32(1023)); 01274 __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2); 01275 __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0, yi0)), ln2_2); 01276 01277 h0 = _mm_and_si128(_mm_srli_epi32(h0, 20 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK * 2)); 01278 _mm_store_si128((__m128i*)idx, h0); 01279 h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510)); 01280 01281 __m128d t0, t1, t2, t3, t4; 01282 t0 = _mm_load_pd(icvLogTab + idx[0]); 01283 t2 = _mm_load_pd(icvLogTab + idx[1]); 01284 t1 = _mm_unpackhi_pd(t0, t2); 01285 t0 = _mm_unpacklo_pd(t0, t2); 01286 t2 = _mm_load_pd(icvLogTab + idx[2]); 01287 t4 = _mm_load_pd(icvLogTab + idx[3]); 01288 t3 = _mm_unpackhi_pd(t2, t4); 01289 t2 = _mm_unpacklo_pd(t2, t4); 01290 01291 yd0 = _mm_add_pd(yd0, t0); 01292 yd1 = _mm_add_pd(yd1, t2); 01293 01294 xd0 = _mm_mul_pd(_mm_sub_pd(xd0, _1_2), t1); 01295 xd1 = _mm_mul_pd(_mm_sub_pd(xd1, _1_2), t3); 01296 01297 xd0 = _mm_add_pd(xd0, _mm_and_pd(_mm_castsi128_pd(_mm_unpacklo_epi32(h0, h0)), shift2)); 01298 xd1 = _mm_add_pd(xd1, _mm_and_pd(_mm_castsi128_pd(_mm_unpackhi_epi32(h0, h0)), shift2)); 01299 01300 __m128d zd0 = _mm_mul_pd(xd0, mA0); 01301 __m128d zd1 = _mm_mul_pd(xd1, mA0); 01302 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA1), xd0); 01303 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA1), xd1); 01304 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA2), xd0); 01305 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA2), xd1); 01306 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA3), xd0); 01307 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA3), xd1); 01308 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA4), xd0); 01309 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA4), xd1); 01310 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA5), xd0); 01311 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA5), xd1); 01312 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA6), xd0); 01313 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA6), xd1); 01314 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA7), xd0); 01315 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA7), xd1); 01316 01317 yd0 = _mm_add_pd(yd0, zd0); 01318 yd1 = _mm_add_pd(yd1, zd1); 01319 01320 _mm_storeu_pd(y + i, yd0); 01321 _mm_storeu_pd(y + i + 2, yd1); 01322 } 01323 #endif 01324 for( ; i <= n - 4; i += 4 ) 01325 { 01326 double xq; 01327 double x0, x1, x2, x3; 01328 double y0, y1, y2, y3; 01329 int h0, h1, h2, h3; 01330 01331 h0 = X[i].i.lo; 01332 h1 = X[i + 1].i.lo; 01333 buf[0].i.lo = h0; 01334 buf[1].i.lo = h1; 01335 01336 h0 = X[i].i.hi; 01337 h1 = X[i + 1].i.hi; 01338 buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20); 01339 buf[1].i.hi = (h1 & LOGTAB_MASK2) | (1023 << 20); 01340 01341 y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2; 01342 y1 = (((h1 >> 20) & 0x7ff) - 1023) * ln_2; 01343 01344 h2 = X[i + 2].i.lo; 01345 h3 = X[i + 3].i.lo; 01346 buf[2].i.lo = h2; 01347 buf[3].i.lo = h3; 01348 01349 h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01350 h1 = (h1 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01351 01352 y0 += icvLogTab[h0]; 01353 y1 += icvLogTab[h1]; 01354 01355 h2 = X[i + 2].i.hi; 01356 h3 = X[i + 3].i.hi; 01357 01358 x0 = LOGTAB_TRANSLATE( buf[0].d, h0 ); 01359 x1 = LOGTAB_TRANSLATE( buf[1].d, h1 ); 01360 01361 buf[2].i.hi = (h2 & LOGTAB_MASK2) | (1023 << 20); 01362 buf[3].i.hi = (h3 & LOGTAB_MASK2) | (1023 << 20); 01363 01364 y2 = (((h2 >> 20) & 0x7ff) - 1023) * ln_2; 01365 y3 = (((h3 >> 20) & 0x7ff) - 1023) * ln_2; 01366 01367 h2 = (h2 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01368 h3 = (h3 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01369 01370 y2 += icvLogTab[h2]; 01371 y3 += icvLogTab[h3]; 01372 01373 x2 = LOGTAB_TRANSLATE( buf[2].d, h2 ); 01374 x3 = LOGTAB_TRANSLATE( buf[3].d, h3 ); 01375 01376 y0 += LOGPOLY( x0, h0 == 510 ); 01377 y1 += LOGPOLY( x1, h1 == 510 ); 01378 01379 y[i] = y0; 01380 y[i + 1] = y1; 01381 01382 y2 += LOGPOLY( x2, h2 == 510 ); 01383 y3 += LOGPOLY( x3, h3 == 510 ); 01384 01385 y[i + 2] = y2; 01386 y[i + 3] = y3; 01387 } 01388 01389 for( ; i < n; i++ ) 01390 { 01391 int h0 = X[i].i.hi; 01392 double xq; 01393 double x0, y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2; 01394 01395 buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20); 01396 buf[0].i.lo = X[i].i.lo; 01397 h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2; 01398 01399 y0 += icvLogTab[h0]; 01400 x0 = LOGTAB_TRANSLATE( buf[0].d, h0 ); 01401 y0 += LOGPOLY( x0, h0 == 510 ); 01402 y[i] = y0; 01403 } 01404 } 01405 01406 //============================================================================= 01407 // for compatibility with 3.0 01408 01409 void exp(const float* src, float* dst, int n) 01410 { 01411 exp32f(src, dst, n); 01412 } 01413 01414 void exp(const double* src, double* dst, int n) 01415 { 01416 exp64f(src, dst, n); 01417 } 01418 01419 void log(const float* src, float* dst, int n) 01420 { 01421 log32f(src, dst, n); 01422 } 01423 01424 void log(const double* src, double* dst, int n) 01425 { 01426 log64f(src, dst, n); 01427 } 01428 01429 void magnitude(const float* x, const float* y, float* dst, int n) 01430 { 01431 magnitude32f(x, y, dst, n); 01432 } 01433 01434 void magnitude(const double* x, const double* y, double* dst, int n) 01435 { 01436 magnitude64f(x, y, dst, n); 01437 } 01438 01439 void sqrt(const float* src, float* dst, int len) 01440 { 01441 sqrt32f(src, dst, len); 01442 } 01443 01444 void sqrt(const double* src, double* dst, int len) 01445 { 01446 sqrt64f(src, dst, len); 01447 } 01448 01449 void invSqrt(const float* src, float* dst, int len) 01450 { 01451 invSqrt32f(src, dst, len); 01452 } 01453 01454 void invSqrt(const double* src, double* dst, int len) 01455 { 01456 invSqrt64f(src, dst, len); 01457 } 01458 01459 01460 }} // cv::hal:: 01461
Generated on Tue Jul 12 2022 14:47:20 by
1.7.2
