Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers mathfuncs_core.cpp Source File

mathfuncs_core.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #include "precomp.hpp"
00044 
00045 #undef HAVE_IPP
00046 
00047 namespace cv { namespace hal {
00048 
00049 ///////////////////////////////////// ATAN2 ////////////////////////////////////
00050 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
00051 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
00052 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
00053 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
00054 
00055 void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
00056 {
00057     int i = 0;
00058     float scale = angleInDegrees ? 1 : (float)(CV_PI/180);
00059 
00060 #ifdef HAVE_TEGRA_OPTIMIZATION
00061     if (tegra::useTegra() && tegra::FastAtan2_32f(Y, X, angle, len, scale))
00062         return;
00063 #endif
00064 
00065 #if CV_SSE2
00066     Cv32suf iabsmask; iabsmask.i = 0x7fffffff;
00067     __m128 eps = _mm_set1_ps((float)DBL_EPSILON), absmask = _mm_set1_ps(iabsmask.f);
00068     __m128 _90 = _mm_set1_ps(90.f), _180 = _mm_set1_ps(180.f), _360 = _mm_set1_ps(360.f);
00069     __m128 z = _mm_setzero_ps(), scale4 = _mm_set1_ps(scale);
00070     __m128 p1 = _mm_set1_ps(atan2_p1), p3 = _mm_set1_ps(atan2_p3);
00071     __m128 p5 = _mm_set1_ps(atan2_p5), p7 = _mm_set1_ps(atan2_p7);
00072 
00073     for( ; i <= len - 4; i += 4 )
00074     {
00075         __m128 x = _mm_loadu_ps(X + i), y = _mm_loadu_ps(Y + i);
00076         __m128 ax = _mm_and_ps(x, absmask), ay = _mm_and_ps(y, absmask);
00077         __m128 mask = _mm_cmplt_ps(ax, ay);
00078         __m128 tmin = _mm_min_ps(ax, ay), tmax = _mm_max_ps(ax, ay);
00079         __m128 c = _mm_div_ps(tmin, _mm_add_ps(tmax, eps));
00080         __m128 c2 = _mm_mul_ps(c, c);
00081         __m128 a = _mm_mul_ps(c2, p7);
00082         a = _mm_mul_ps(_mm_add_ps(a, p5), c2);
00083         a = _mm_mul_ps(_mm_add_ps(a, p3), c2);
00084         a = _mm_mul_ps(_mm_add_ps(a, p1), c);
00085 
00086         __m128 b = _mm_sub_ps(_90, a);
00087         a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
00088 
00089         b = _mm_sub_ps(_180, a);
00090         mask = _mm_cmplt_ps(x, z);
00091         a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
00092 
00093         b = _mm_sub_ps(_360, a);
00094         mask = _mm_cmplt_ps(y, z);
00095         a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
00096 
00097         a = _mm_mul_ps(a, scale4);
00098         _mm_storeu_ps(angle + i, a);
00099     }
00100 #elif CV_NEON
00101     float32x4_t eps = vdupq_n_f32((float)DBL_EPSILON);
00102     float32x4_t _90 = vdupq_n_f32(90.f), _180 = vdupq_n_f32(180.f), _360 = vdupq_n_f32(360.f);
00103     float32x4_t z = vdupq_n_f32(0.0f), scale4 = vdupq_n_f32(scale);
00104     float32x4_t p1 = vdupq_n_f32(atan2_p1), p3 = vdupq_n_f32(atan2_p3);
00105     float32x4_t p5 = vdupq_n_f32(atan2_p5), p7 = vdupq_n_f32(atan2_p7);
00106 
00107     for( ; i <= len - 4; i += 4 )
00108     {
00109         float32x4_t x = vld1q_f32(X + i), y = vld1q_f32(Y + i);
00110         float32x4_t ax = vabsq_f32(x), ay = vabsq_f32(y);
00111         float32x4_t tmin = vminq_f32(ax, ay), tmax = vmaxq_f32(ax, ay);
00112         float32x4_t c = vmulq_f32(tmin, cv_vrecpq_f32(vaddq_f32(tmax, eps)));
00113         float32x4_t c2 = vmulq_f32(c, c);
00114         float32x4_t a = vmulq_f32(c2, p7);
00115         a = vmulq_f32(vaddq_f32(a, p5), c2);
00116         a = vmulq_f32(vaddq_f32(a, p3), c2);
00117         a = vmulq_f32(vaddq_f32(a, p1), c);
00118 
00119         a = vbslq_f32(vcgeq_f32(ax, ay), a, vsubq_f32(_90, a));
00120         a = vbslq_f32(vcltq_f32(x, z), vsubq_f32(_180, a), a);
00121         a = vbslq_f32(vcltq_f32(y, z), vsubq_f32(_360, a), a);
00122 
00123         vst1q_f32(angle + i, vmulq_f32(a, scale4));
00124     }
00125 #endif
00126 
00127     for( ; i < len; i++ )
00128     {
00129         float x = X[i], y = Y[i];
00130         float ax = std::abs(x), ay = std::abs(y);
00131         float a, c, c2;
00132         if( ax >= ay )
00133         {
00134             c = ay/(ax + (float)DBL_EPSILON);
00135             c2 = c*c;
00136             a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
00137         }
00138         else
00139         {
00140             c = ax/(ay + (float)DBL_EPSILON);
00141             c2 = c*c;
00142             a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
00143         }
00144         if( x < 0 )
00145             a = 180.f - a;
00146         if( y < 0 )
00147             a = 360.f - a;
00148         angle[i] = (float)(a*scale);
00149     }
00150 }
00151 
00152 
00153 void magnitude32f(const float* x, const float* y, float* mag, int len)
00154 {
00155 #if defined HAVE_IPP
00156     CV_IPP_CHECK()
00157     {
00158         IppStatus status = ippsMagnitude_32f(x, y, mag, len);
00159         if (status >= 0)
00160         {
00161             CV_IMPL_ADD(CV_IMPL_IPP);
00162             return;
00163         }
00164         setIppErrorStatus();
00165     }
00166 #endif
00167 
00168     int i = 0;
00169 
00170 #if CV_SIMD128
00171     for( ; i <= len - 8; i += 8 )
00172     {
00173         v_float32x4 x0 = v_load(x + i), x1 = v_load(x + i + 4);
00174         v_float32x4 y0 = v_load(y + i), y1 = v_load(y + i + 4);
00175         x0 = v_sqrt(v_muladd(x0, x0, y0*y0));
00176         x1 = v_sqrt(v_muladd(x1, x1, y1*y1));
00177         v_store(mag + i, x0);
00178         v_store(mag + i + 4, x1);
00179     }
00180 #endif
00181 
00182     for( ; i < len; i++ )
00183     {
00184         float x0 = x[i], y0 = y[i];
00185         mag[i] = std::sqrt(x0*x0 + y0*y0);
00186     }
00187 }
00188 
00189 void magnitude64f(const double* x, const double* y, double* mag, int len)
00190 {
00191 #if defined(HAVE_IPP)
00192     CV_IPP_CHECK()
00193     {
00194         IppStatus status = ippsMagnitude_64f(x, y, mag, len);
00195         if (status >= 0)
00196         {
00197             CV_IMPL_ADD(CV_IMPL_IPP);
00198             return;
00199         }
00200         setIppErrorStatus();
00201     }
00202 #endif
00203 
00204     int i = 0;
00205 
00206 #if CV_SIMD128_64F
00207     for( ; i <= len - 4; i += 4 )
00208     {
00209         v_float64x2 x0 = v_load(x + i), x1 = v_load(x + i + 2);
00210         v_float64x2 y0 = v_load(y + i), y1 = v_load(y + i + 2);
00211         x0 = v_sqrt(v_muladd(x0, x0, y0*y0));
00212         x1 = v_sqrt(v_muladd(x1, x1, y1*y1));
00213         v_store(mag + i, x0);
00214         v_store(mag + i + 2, x1);
00215     }
00216 #endif
00217 
00218     for( ; i < len; i++ )
00219     {
00220         double x0 = x[i], y0 = y[i];
00221         mag[i] = std::sqrt(x0*x0 + y0*y0);
00222     }
00223 }
00224 
00225 
00226 void invSqrt32f(const float* src, float* dst, int len)
00227 {
00228 #if defined(HAVE_IPP)
00229     CV_IPP_CHECK()
00230     {
00231         if (ippsInvSqrt_32f_A21(src, dst, len) >= 0)
00232         {
00233             CV_IMPL_ADD(CV_IMPL_IPP);
00234             return;
00235         }
00236         setIppErrorStatus();
00237     }
00238 #endif
00239 
00240     int i = 0;
00241 
00242 #if CV_SIMD128
00243     for( ; i <= len - 8; i += 8 )
00244     {
00245         v_float32x4 t0 = v_load(src + i), t1 = v_load(src + i + 4);
00246         t0 = v_invsqrt(t0);
00247         t1 = v_invsqrt(t1);
00248         v_store(dst + i, t0); v_store(dst + i + 4, t1);
00249     }
00250 #endif
00251 
00252     for( ; i < len; i++ )
00253         dst[i] = 1/std::sqrt(src[i]);
00254 }
00255 
00256 
00257 void invSqrt64f(const double* src, double* dst, int len)
00258 {
00259     int i = 0;
00260 
00261 #if CV_SSE2
00262     __m128d v_1 = _mm_set1_pd(1.0);
00263     for ( ; i <= len - 2; i += 2)
00264         _mm_storeu_pd(dst + i, _mm_div_pd(v_1, _mm_sqrt_pd(_mm_loadu_pd(src + i))));
00265 #endif
00266 
00267     for( ; i < len; i++ )
00268         dst[i] = 1/std::sqrt(src[i]);
00269 }
00270 
00271 
00272 void sqrt32f(const float* src, float* dst, int len)
00273 {
00274 #if defined(HAVE_IPP)
00275     CV_IPP_CHECK()
00276     {
00277         if (ippsSqrt_32f_A21(src, dst, len) >= 0)
00278         {
00279             CV_IMPL_ADD(CV_IMPL_IPP);
00280             return;
00281         }
00282         setIppErrorStatus();
00283     }
00284 #endif
00285 
00286     int i = 0;
00287 
00288 #if CV_SIMD128
00289     for( ; i <= len - 8; i += 8 )
00290     {
00291         v_float32x4 t0 = v_load(src + i), t1 = v_load(src + i + 4);
00292         t0 = v_sqrt(t0);
00293         t1 = v_sqrt(t1);
00294         v_store(dst + i, t0); v_store(dst + i + 4, t1);
00295     }
00296 #endif
00297 
00298     for( ; i < len; i++ )
00299         dst[i] = std::sqrt(src[i]);
00300 }
00301 
00302 
00303 void sqrt64f(const double* src, double* dst, int len)
00304 {
00305 #if defined(HAVE_IPP)
00306     CV_IPP_CHECK()
00307     {
00308         if (ippsSqrt_64f_A50(src, dst, len) >= 0)
00309         {
00310             CV_IMPL_ADD(CV_IMPL_IPP);
00311             return;
00312         }
00313         setIppErrorStatus();
00314     }
00315 #endif
00316 
00317     int i = 0;
00318 
00319 #if CV_SIMD128_64F
00320     for( ; i <= len - 4; i += 4 )
00321     {
00322         v_float64x2 t0 = v_load(src + i), t1 = v_load(src + i + 2);
00323         t0 = v_sqrt(t0);
00324         t1 = v_sqrt(t1);
00325         v_store(dst + i, t0); v_store(dst + i + 2, t1);
00326     }
00327 #endif
00328 
00329     for( ; i < len; i++ )
00330         dst[i] = std::sqrt(src[i]);
00331 }
00332 
00333 ////////////////////////////////////// EXP /////////////////////////////////////
00334 
00335 typedef union
00336 {
00337     struct {
00338 #if ( defined( WORDS_BIGENDIAN ) && !defined( OPENCV_UNIVERSAL_BUILD ) ) || defined( __BIG_ENDIAN__ )
00339         int hi;
00340         int lo;
00341 #else
00342         int lo;
00343         int hi;
00344 #endif
00345     } i;
00346     double d;
00347 }
00348 DBLINT;
00349 
00350 #define EXPTAB_SCALE 6
00351 #define EXPTAB_MASK  ((1 << EXPTAB_SCALE) - 1)
00352 
00353 #define EXPPOLY_32F_A0 .9670371139572337719125840413672004409288e-2
00354 
00355 static const double expTab[] = {
00356     1.0 * EXPPOLY_32F_A0,
00357     1.0108892860517004600204097905619 * EXPPOLY_32F_A0,
00358     1.0218971486541166782344801347833 * EXPPOLY_32F_A0,
00359     1.0330248790212284225001082839705 * EXPPOLY_32F_A0,
00360     1.0442737824274138403219664787399 * EXPPOLY_32F_A0,
00361     1.0556451783605571588083413251529 * EXPPOLY_32F_A0,
00362     1.0671404006768236181695211209928 * EXPPOLY_32F_A0,
00363     1.0787607977571197937406800374385 * EXPPOLY_32F_A0,
00364     1.0905077326652576592070106557607 * EXPPOLY_32F_A0,
00365     1.1023825833078409435564142094256 * EXPPOLY_32F_A0,
00366     1.1143867425958925363088129569196 * EXPPOLY_32F_A0,
00367     1.126521618608241899794798643787 * EXPPOLY_32F_A0,
00368     1.1387886347566916537038302838415 * EXPPOLY_32F_A0,
00369     1.151189229952982705817759635202 * EXPPOLY_32F_A0,
00370     1.1637248587775775138135735990922 * EXPPOLY_32F_A0,
00371     1.1763969916502812762846457284838 * EXPPOLY_32F_A0,
00372     1.1892071150027210667174999705605 * EXPPOLY_32F_A0,
00373     1.2021567314527031420963969574978 * EXPPOLY_32F_A0,
00374     1.2152473599804688781165202513388 * EXPPOLY_32F_A0,
00375     1.2284805361068700056940089577928 * EXPPOLY_32F_A0,
00376     1.2418578120734840485936774687266 * EXPPOLY_32F_A0,
00377     1.2553807570246910895793906574423 * EXPPOLY_32F_A0,
00378     1.2690509571917332225544190810323 * EXPPOLY_32F_A0,
00379     1.2828700160787782807266697810215 * EXPPOLY_32F_A0,
00380     1.2968395546510096659337541177925 * EXPPOLY_32F_A0,
00381     1.3109612115247643419229917863308 * EXPPOLY_32F_A0,
00382     1.3252366431597412946295370954987 * EXPPOLY_32F_A0,
00383     1.3396675240533030053600306697244 * EXPPOLY_32F_A0,
00384     1.3542555469368927282980147401407 * EXPPOLY_32F_A0,
00385     1.3690024229745906119296011329822 * EXPPOLY_32F_A0,
00386     1.3839098819638319548726595272652 * EXPPOLY_32F_A0,
00387     1.3989796725383111402095281367152 * EXPPOLY_32F_A0,
00388     1.4142135623730950488016887242097 * EXPPOLY_32F_A0,
00389     1.4296133383919700112350657782751 * EXPPOLY_32F_A0,
00390     1.4451808069770466200370062414717 * EXPPOLY_32F_A0,
00391     1.4609177941806469886513028903106 * EXPPOLY_32F_A0,
00392     1.476826145939499311386907480374 * EXPPOLY_32F_A0,
00393     1.4929077282912648492006435314867 * EXPPOLY_32F_A0,
00394     1.5091644275934227397660195510332 * EXPPOLY_32F_A0,
00395     1.5255981507445383068512536895169 * EXPPOLY_32F_A0,
00396     1.5422108254079408236122918620907 * EXPPOLY_32F_A0,
00397     1.5590044002378369670337280894749 * EXPPOLY_32F_A0,
00398     1.5759808451078864864552701601819 * EXPPOLY_32F_A0,
00399     1.5931421513422668979372486431191 * EXPPOLY_32F_A0,
00400     1.6104903319492543081795206673574 * EXPPOLY_32F_A0,
00401     1.628027421857347766848218522014 * EXPPOLY_32F_A0,
00402     1.6457554781539648445187567247258 * EXPPOLY_32F_A0,
00403     1.6636765803267364350463364569764 * EXPPOLY_32F_A0,
00404     1.6817928305074290860622509524664 * EXPPOLY_32F_A0,
00405     1.7001063537185234695013625734975 * EXPPOLY_32F_A0,
00406     1.7186192981224779156293443764563 * EXPPOLY_32F_A0,
00407     1.7373338352737062489942020818722 * EXPPOLY_32F_A0,
00408     1.7562521603732994831121606193753 * EXPPOLY_32F_A0,
00409     1.7753764925265212525505592001993 * EXPPOLY_32F_A0,
00410     1.7947090750031071864277032421278 * EXPPOLY_32F_A0,
00411     1.8142521755003987562498346003623 * EXPPOLY_32F_A0,
00412     1.8340080864093424634870831895883 * EXPPOLY_32F_A0,
00413     1.8539791250833855683924530703377 * EXPPOLY_32F_A0,
00414     1.8741676341102999013299989499544 * EXPPOLY_32F_A0,
00415     1.8945759815869656413402186534269 * EXPPOLY_32F_A0,
00416     1.9152065613971472938726112702958 * EXPPOLY_32F_A0,
00417     1.9360617934922944505980559045667 * EXPPOLY_32F_A0,
00418     1.9571441241754002690183222516269 * EXPPOLY_32F_A0,
00419     1.9784560263879509682582499181312 * EXPPOLY_32F_A0,
00420 };
00421 
00422 
00423 // the code below uses _mm_cast* intrinsics, which are not avialable on VS2005
00424 #if (defined _MSC_VER && _MSC_VER < 1500) || \
00425 (!defined __APPLE__ && defined __GNUC__ && __GNUC__*100 + __GNUC_MINOR__ < 402)
00426 #undef CV_SSE2
00427 #define CV_SSE2 0
00428 #endif
00429 
00430 static const double exp_prescale = 1.4426950408889634073599246810019 * (1 << EXPTAB_SCALE);
00431 static const double exp_postscale = 1./(1 << EXPTAB_SCALE);
00432 static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) < 3000
00433 
00434 void exp32f( const float *_x, float *y, int n )
00435 {
00436     static const float
00437     A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
00438     A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0),
00439     A2 = (float)(.2402265109513301490103372422686535526573 / EXPPOLY_32F_A0),
00440     A1 = (float)(.5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0);
00441 
00442 #undef EXPPOLY
00443 #define EXPPOLY(x)  \
00444 (((((x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)
00445 
00446     int i = 0;
00447     const Cv32suf* x = (const Cv32suf*)_x;
00448     Cv32suf buf[4];
00449 
00450 #if CV_SSE2
00451     if( n >= 8 )
00452     {
00453         static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
00454         static const __m128 postscale4 = _mm_set1_ps((float)exp_postscale);
00455         static const __m128 maxval4 = _mm_set1_ps((float)(exp_max_val/exp_prescale));
00456         static const __m128 minval4 = _mm_set1_ps((float)(-exp_max_val/exp_prescale));
00457 
00458         static const __m128 mA1 = _mm_set1_ps(A1);
00459         static const __m128 mA2 = _mm_set1_ps(A2);
00460         static const __m128 mA3 = _mm_set1_ps(A3);
00461         static const __m128 mA4 = _mm_set1_ps(A4);
00462         bool y_aligned = (size_t)(void*)y % 16 == 0;
00463 
00464         ushort CV_DECL_ALIGNED(16) tab_idx[8];
00465 
00466         for( ; i <= n - 8; i += 8 )
00467         {
00468             __m128 xf0, xf1;
00469             xf0 = _mm_loadu_ps(&x[i].f);
00470             xf1 = _mm_loadu_ps(&x[i+4].f);
00471             __m128i xi0, xi1, xi2, xi3;
00472 
00473             xf0 = _mm_min_ps(_mm_max_ps(xf0, minval4), maxval4);
00474             xf1 = _mm_min_ps(_mm_max_ps(xf1, minval4), maxval4);
00475 
00476             __m128d xd0 = _mm_cvtps_pd(xf0);
00477             __m128d xd2 = _mm_cvtps_pd(_mm_movehl_ps(xf0, xf0));
00478             __m128d xd1 = _mm_cvtps_pd(xf1);
00479             __m128d xd3 = _mm_cvtps_pd(_mm_movehl_ps(xf1, xf1));
00480 
00481             xd0 = _mm_mul_pd(xd0, prescale2);
00482             xd2 = _mm_mul_pd(xd2, prescale2);
00483             xd1 = _mm_mul_pd(xd1, prescale2);
00484             xd3 = _mm_mul_pd(xd3, prescale2);
00485 
00486             xi0 = _mm_cvtpd_epi32(xd0);
00487             xi2 = _mm_cvtpd_epi32(xd2);
00488 
00489             xi1 = _mm_cvtpd_epi32(xd1);
00490             xi3 = _mm_cvtpd_epi32(xd3);
00491 
00492             xd0 = _mm_sub_pd(xd0, _mm_cvtepi32_pd(xi0));
00493             xd2 = _mm_sub_pd(xd2, _mm_cvtepi32_pd(xi2));
00494             xd1 = _mm_sub_pd(xd1, _mm_cvtepi32_pd(xi1));
00495             xd3 = _mm_sub_pd(xd3, _mm_cvtepi32_pd(xi3));
00496 
00497             xf0 = _mm_movelh_ps(_mm_cvtpd_ps(xd0), _mm_cvtpd_ps(xd2));
00498             xf1 = _mm_movelh_ps(_mm_cvtpd_ps(xd1), _mm_cvtpd_ps(xd3));
00499 
00500             xf0 = _mm_mul_ps(xf0, postscale4);
00501             xf1 = _mm_mul_ps(xf1, postscale4);
00502 
00503             xi0 = _mm_unpacklo_epi64(xi0, xi2);
00504             xi1 = _mm_unpacklo_epi64(xi1, xi3);
00505             xi0 = _mm_packs_epi32(xi0, xi1);
00506 
00507             _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi16(EXPTAB_MASK)));
00508 
00509             xi0 = _mm_add_epi16(_mm_srai_epi16(xi0, EXPTAB_SCALE), _mm_set1_epi16(127));
00510             xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
00511             xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(255));
00512             xi1 = _mm_unpackhi_epi16(xi0, _mm_setzero_si128());
00513             xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
00514 
00515             __m128d yd0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
00516             __m128d yd1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
00517             __m128d yd2 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[4]), _mm_load_sd(expTab + tab_idx[5]));
00518             __m128d yd3 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[6]), _mm_load_sd(expTab + tab_idx[7]));
00519 
00520             __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
00521             __m128 yf1 = _mm_movelh_ps(_mm_cvtpd_ps(yd2), _mm_cvtpd_ps(yd3));
00522 
00523             yf0 = _mm_mul_ps(yf0, _mm_castsi128_ps(_mm_slli_epi32(xi0, 23)));
00524             yf1 = _mm_mul_ps(yf1, _mm_castsi128_ps(_mm_slli_epi32(xi1, 23)));
00525 
00526             __m128 zf0 = _mm_add_ps(xf0, mA1);
00527             __m128 zf1 = _mm_add_ps(xf1, mA1);
00528 
00529             zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA2);
00530             zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA2);
00531 
00532             zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA3);
00533             zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA3);
00534 
00535             zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA4);
00536             zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA4);
00537 
00538             zf0 = _mm_mul_ps(zf0, yf0);
00539             zf1 = _mm_mul_ps(zf1, yf1);
00540 
00541             if( y_aligned )
00542             {
00543                 _mm_store_ps(y + i, zf0);
00544                 _mm_store_ps(y + i + 4, zf1);
00545             }
00546             else
00547             {
00548                 _mm_storeu_ps(y + i, zf0);
00549                 _mm_storeu_ps(y + i + 4, zf1);
00550             }
00551         }
00552     }
00553     else
00554 #endif
00555         for( ; i <= n - 4; i += 4 )
00556         {
00557             double x0 = x[i].f * exp_prescale;
00558             double x1 = x[i + 1].f * exp_prescale;
00559             double x2 = x[i + 2].f * exp_prescale;
00560             double x3 = x[i + 3].f * exp_prescale;
00561             int val0, val1, val2, val3, t;
00562 
00563             if( ((x[i].i >> 23) & 255) > 127 + 10 )
00564                 x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
00565 
00566             if( ((x[i+1].i >> 23) & 255) > 127 + 10 )
00567                 x1 = x[i+1].i < 0 ? -exp_max_val : exp_max_val;
00568 
00569             if( ((x[i+2].i >> 23) & 255) > 127 + 10 )
00570                 x2 = x[i+2].i < 0 ? -exp_max_val : exp_max_val;
00571 
00572             if( ((x[i+3].i >> 23) & 255) > 127 + 10 )
00573                 x3 = x[i+3].i < 0 ? -exp_max_val : exp_max_val;
00574 
00575             val0 = cvRound(x0);
00576             val1 = cvRound(x1);
00577             val2 = cvRound(x2);
00578             val3 = cvRound(x3);
00579 
00580             x0 = (x0 - val0)*exp_postscale;
00581             x1 = (x1 - val1)*exp_postscale;
00582             x2 = (x2 - val2)*exp_postscale;
00583             x3 = (x3 - val3)*exp_postscale;
00584 
00585             t = (val0 >> EXPTAB_SCALE) + 127;
00586             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
00587             buf[0].i = t << 23;
00588 
00589             t = (val1 >> EXPTAB_SCALE) + 127;
00590             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
00591             buf[1].i = t << 23;
00592 
00593             t = (val2 >> EXPTAB_SCALE) + 127;
00594             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
00595             buf[2].i = t << 23;
00596 
00597             t = (val3 >> EXPTAB_SCALE) + 127;
00598             t = !(t & ~255) ? t : t < 0 ? 0 : 255;
00599             buf[3].i = t << 23;
00600 
00601             x0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
00602             x1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
00603 
00604             y[i] = (float)x0;
00605             y[i + 1] = (float)x1;
00606 
00607             x2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
00608             x3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
00609 
00610             y[i + 2] = (float)x2;
00611             y[i + 3] = (float)x3;
00612         }
00613 
00614     for( ; i < n; i++ )
00615     {
00616         double x0 = x[i].f * exp_prescale;
00617         int val0, t;
00618 
00619         if( ((x[i].i >> 23) & 255) > 127 + 10 )
00620             x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
00621 
00622         val0 = cvRound(x0);
00623         t = (val0 >> EXPTAB_SCALE) + 127;
00624         t = !(t & ~255) ? t : t < 0 ? 0 : 255;
00625 
00626         buf[0].i = t << 23;
00627         x0 = (x0 - val0)*exp_postscale;
00628 
00629         y[i] = (float)(buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY(x0));
00630     }
00631 }
00632 
00633 void exp64f( const double *_x, double *y, int n )
00634 {
00635     static const double
00636     A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
00637     A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0,
00638     A3 = .24022650695886477918181338054308 / EXPPOLY_32F_A0,
00639     A2 = .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0,
00640     A1 = .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0,
00641     A0 = .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0;
00642 
00643 #undef EXPPOLY
00644 #define EXPPOLY(x)  (((((A0*(x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)*(x) + A5)
00645 
00646     int i = 0;
00647     Cv64suf buf[4];
00648     const Cv64suf* x = (const Cv64suf*)_x;
00649 
00650 #if CV_SSE2
00651     static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
00652     static const __m128d postscale2 = _mm_set1_pd(exp_postscale);
00653     static const __m128d maxval2 = _mm_set1_pd(exp_max_val);
00654     static const __m128d minval2 = _mm_set1_pd(-exp_max_val);
00655 
00656     static const __m128d mA0 = _mm_set1_pd(A0);
00657     static const __m128d mA1 = _mm_set1_pd(A1);
00658     static const __m128d mA2 = _mm_set1_pd(A2);
00659     static const __m128d mA3 = _mm_set1_pd(A3);
00660     static const __m128d mA4 = _mm_set1_pd(A4);
00661     static const __m128d mA5 = _mm_set1_pd(A5);
00662 
00663     int CV_DECL_ALIGNED(16) tab_idx[4];
00664 
00665     for( ; i <= n - 4; i += 4 )
00666     {
00667         __m128d xf0 = _mm_loadu_pd(&x[i].f), xf1 = _mm_loadu_pd(&x[i+2].f);
00668         __m128i xi0, xi1;
00669         xf0 = _mm_min_pd(_mm_max_pd(xf0, minval2), maxval2);
00670         xf1 = _mm_min_pd(_mm_max_pd(xf1, minval2), maxval2);
00671         xf0 = _mm_mul_pd(xf0, prescale2);
00672         xf1 = _mm_mul_pd(xf1, prescale2);
00673 
00674         xi0 = _mm_cvtpd_epi32(xf0);
00675         xi1 = _mm_cvtpd_epi32(xf1);
00676         xf0 = _mm_mul_pd(_mm_sub_pd(xf0, _mm_cvtepi32_pd(xi0)), postscale2);
00677         xf1 = _mm_mul_pd(_mm_sub_pd(xf1, _mm_cvtepi32_pd(xi1)), postscale2);
00678 
00679         xi0 = _mm_unpacklo_epi64(xi0, xi1);
00680         _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi32(EXPTAB_MASK)));
00681 
00682         xi0 = _mm_add_epi32(_mm_srai_epi32(xi0, EXPTAB_SCALE), _mm_set1_epi32(1023));
00683         xi0 = _mm_packs_epi32(xi0, xi0);
00684         xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
00685         xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(2047));
00686         xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
00687         xi1 = _mm_unpackhi_epi32(xi0, _mm_setzero_si128());
00688         xi0 = _mm_unpacklo_epi32(xi0, _mm_setzero_si128());
00689 
00690         __m128d yf0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
00691         __m128d yf1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
00692         yf0 = _mm_mul_pd(yf0, _mm_castsi128_pd(_mm_slli_epi64(xi0, 52)));
00693         yf1 = _mm_mul_pd(yf1, _mm_castsi128_pd(_mm_slli_epi64(xi1, 52)));
00694 
00695         __m128d zf0 = _mm_add_pd(_mm_mul_pd(mA0, xf0), mA1);
00696         __m128d zf1 = _mm_add_pd(_mm_mul_pd(mA0, xf1), mA1);
00697 
00698         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA2);
00699         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA2);
00700 
00701         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA3);
00702         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA3);
00703 
00704         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA4);
00705         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA4);
00706 
00707         zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA5);
00708         zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA5);
00709 
00710         zf0 = _mm_mul_pd(zf0, yf0);
00711         zf1 = _mm_mul_pd(zf1, yf1);
00712 
00713         _mm_storeu_pd(y + i, zf0);
00714         _mm_storeu_pd(y + i + 2, zf1);
00715     }
00716 #endif
00717     for( ; i <= n - 4; i += 4 )
00718     {
00719         double x0 = x[i].f * exp_prescale;
00720         double x1 = x[i + 1].f * exp_prescale;
00721         double x2 = x[i + 2].f * exp_prescale;
00722         double x3 = x[i + 3].f * exp_prescale;
00723 
00724         double y0, y1, y2, y3;
00725         int val0, val1, val2, val3, t;
00726 
00727         t = (int)(x[i].i >> 52);
00728         if( (t & 2047) > 1023 + 10 )
00729             x0 = t < 0 ? -exp_max_val : exp_max_val;
00730 
00731         t = (int)(x[i+1].i >> 52);
00732         if( (t & 2047) > 1023 + 10 )
00733             x1 = t < 0 ? -exp_max_val : exp_max_val;
00734 
00735         t = (int)(x[i+2].i >> 52);
00736         if( (t & 2047) > 1023 + 10 )
00737             x2 = t < 0 ? -exp_max_val : exp_max_val;
00738 
00739         t = (int)(x[i+3].i >> 52);
00740         if( (t & 2047) > 1023 + 10 )
00741             x3 = t < 0 ? -exp_max_val : exp_max_val;
00742 
00743         val0 = cvRound(x0);
00744         val1 = cvRound(x1);
00745         val2 = cvRound(x2);
00746         val3 = cvRound(x3);
00747 
00748         x0 = (x0 - val0)*exp_postscale;
00749         x1 = (x1 - val1)*exp_postscale;
00750         x2 = (x2 - val2)*exp_postscale;
00751         x3 = (x3 - val3)*exp_postscale;
00752 
00753         t = (val0 >> EXPTAB_SCALE) + 1023;
00754         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
00755         buf[0].i = (int64)t << 52;
00756 
00757         t = (val1 >> EXPTAB_SCALE) + 1023;
00758         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
00759         buf[1].i = (int64)t << 52;
00760 
00761         t = (val2 >> EXPTAB_SCALE) + 1023;
00762         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
00763         buf[2].i = (int64)t << 52;
00764 
00765         t = (val3 >> EXPTAB_SCALE) + 1023;
00766         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
00767         buf[3].i = (int64)t << 52;
00768 
00769         y0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
00770         y1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
00771 
00772         y[i] = y0;
00773         y[i + 1] = y1;
00774 
00775         y2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
00776         y3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
00777 
00778         y[i + 2] = y2;
00779         y[i + 3] = y3;
00780     }
00781 
00782     for( ; i < n; i++ )
00783     {
00784         double x0 = x[i].f * exp_prescale;
00785         int val0, t;
00786 
00787         t = (int)(x[i].i >> 52);
00788         if( (t & 2047) > 1023 + 10 )
00789             x0 = t < 0 ? -exp_max_val : exp_max_val;
00790 
00791         val0 = cvRound(x0);
00792         t = (val0 >> EXPTAB_SCALE) + 1023;
00793         t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
00794 
00795         buf[0].i = (int64)t << 52;
00796         x0 = (x0 - val0)*exp_postscale;
00797 
00798         y[i] = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
00799     }
00800 }
00801 
00802 #undef EXPTAB_SCALE
00803 #undef EXPTAB_MASK
00804 #undef EXPPOLY_32F_A0
00805 
00806 /////////////////////////////////////////// LOG ///////////////////////////////////////
00807 
00808 #define LOGTAB_SCALE    8
00809 #define LOGTAB_MASK         ((1 << LOGTAB_SCALE) - 1)
00810 #define LOGTAB_MASK2        ((1 << (20 - LOGTAB_SCALE)) - 1)
00811 #define LOGTAB_MASK2_32F    ((1 << (23 - LOGTAB_SCALE)) - 1)
00812 
00813 static const double CV_DECL_ALIGNED(16) icvLogTab[] = {
00814     0.0000000000000000000000000000000000000000,    1.000000000000000000000000000000000000000,
00815     .00389864041565732288852075271279318258166,    .9961089494163424124513618677042801556420,
00816     .00778214044205494809292034119607706088573,    .9922480620155038759689922480620155038760,
00817     .01165061721997527263705585198749759001657,    .9884169884169884169884169884169884169884,
00818     .01550418653596525274396267235488267033361,    .9846153846153846153846153846153846153846,
00819     .01934296284313093139406447562578250654042,    .9808429118773946360153256704980842911877,
00820     .02316705928153437593630670221500622574241,    .9770992366412213740458015267175572519084,
00821     .02697658769820207233514075539915211265906,    .9733840304182509505703422053231939163498,
00822     .03077165866675368732785500469617545604706,    .9696969696969696969696969696969696969697,
00823     .03455238150665972812758397481047722976656,    .9660377358490566037735849056603773584906,
00824     .03831886430213659461285757856785494368522,    .9624060150375939849624060150375939849624,
00825     .04207121392068705056921373852674150839447,    .9588014981273408239700374531835205992509,
00826     .04580953603129420126371940114040626212953,    .9552238805970149253731343283582089552239,
00827     .04953393512227662748292900118940451648088,    .9516728624535315985130111524163568773234,
00828     .05324451451881227759255210685296333394944,    .9481481481481481481481481481481481481481,
00829     .05694137640013842427411105973078520037234,    .9446494464944649446494464944649446494465,
00830     .06062462181643483993820353816772694699466,    .9411764705882352941176470588235294117647,
00831     .06429435070539725460836422143984236754475,    .9377289377289377289377289377289377289377,
00832     .06795066190850773679699159401934593915938,    .9343065693430656934306569343065693430657,
00833     .07159365318700880442825962290953611955044,    .9309090909090909090909090909090909090909,
00834     .07522342123758751775142172846244648098944,    .9275362318840579710144927536231884057971,
00835     .07884006170777602129362549021607264876369,    .9241877256317689530685920577617328519856,
00836     .08244366921107458556772229485432035289706,    .9208633093525179856115107913669064748201,
00837     .08603433734180314373940490213499288074675,    .9175627240143369175627240143369175627240,
00838     .08961215868968712416897659522874164395031,    .9142857142857142857142857142857142857143,
00839     .09317722485418328259854092721070628613231,    .9110320284697508896797153024911032028470,
00840     .09672962645855109897752299730200320482256,    .9078014184397163120567375886524822695035,
00841     .10026945316367513738597949668474029749630,    .9045936395759717314487632508833922261484,
00842     .10379679368164355934833764649738441221420,    .9014084507042253521126760563380281690141,
00843     .10731173578908805021914218968959175981580,    .8982456140350877192982456140350877192982,
00844     .11081436634029011301105782649756292812530,    .8951048951048951048951048951048951048951,
00845     .11430477128005862852422325204315711744130,    .8919860627177700348432055749128919860627,
00846     .11778303565638344185817487641543266363440,    .8888888888888888888888888888888888888889,
00847     .12124924363286967987640707633545389398930,    .8858131487889273356401384083044982698962,
00848     .12470347850095722663787967121606925502420,    .8827586206896551724137931034482758620690,
00849     .12814582269193003360996385708858724683530,    .8797250859106529209621993127147766323024,
00850     .13157635778871926146571524895989568904040,    .8767123287671232876712328767123287671233,
00851     .13499516453750481925766280255629681050780,    .8737201365187713310580204778156996587031,
00852     .13840232285911913123754857224412262439730,    .8707482993197278911564625850340136054422,
00853     .14179791186025733629172407290752744302150,    .8677966101694915254237288135593220338983,
00854     .14518200984449788903951628071808954700830,    .8648648648648648648648648648648648648649,
00855     .14855469432313711530824207329715136438610,    .8619528619528619528619528619528619528620,
00856     .15191604202584196858794030049466527998450,    .8590604026845637583892617449664429530201,
00857     .15526612891112392955683674244937719777230,    .8561872909698996655518394648829431438127,
00858     .15860503017663857283636730244325008243330,    .8533333333333333333333333333333333333333,
00859     .16193282026931324346641360989451641216880,    .8504983388704318936877076411960132890365,
00860     .16524957289530714521497145597095368430010,    .8476821192052980132450331125827814569536,
00861     .16855536102980664403538924034364754334090,    .8448844884488448844884488448844884488449,
00862     .17185025692665920060697715143760433420540,    .8421052631578947368421052631578947368421,
00863     .17513433212784912385018287750426679849630,    .8393442622950819672131147540983606557377,
00864     .17840765747281828179637841458315961062910,    .8366013071895424836601307189542483660131,
00865     .18167030310763465639212199675966985523700,    .8338762214983713355048859934853420195440,
00866     .18492233849401198964024217730184318497780,    .8311688311688311688311688311688311688312,
00867     .18816383241818296356839823602058459073300,    .8284789644012944983818770226537216828479,
00868     .19139485299962943898322009772527962923050,    .8258064516129032258064516129032258064516,
00869     .19461546769967164038916962454095482826240,    .8231511254019292604501607717041800643087,
00870     .19782574332991986754137769821682013571260,    .8205128205128205128205128205128205128205,
00871     .20102574606059073203390141770796617493040,    .8178913738019169329073482428115015974441,
00872     .20421554142869088876999228432396193966280,    .8152866242038216560509554140127388535032,
00873     .20739519434607056602715147164417430758480,    .8126984126984126984126984126984126984127,
00874     .21056476910734961416338251183333341032260,    .8101265822784810126582278481012658227848,
00875     .21372432939771812687723695489694364368910,    .8075709779179810725552050473186119873817,
00876     .21687393830061435506806333251006435602900,    .8050314465408805031446540880503144654088,
00877     .22001365830528207823135744547471404075630,    .8025078369905956112852664576802507836991,
00878     .22314355131420973710199007200571941211830,    .8000000000000000000000000000000000000000,
00879     .22626367865045338145790765338460914790630,    .7975077881619937694704049844236760124611,
00880     .22937410106484582006380890106811420992010,    .7950310559006211180124223602484472049689,
00881     .23247487874309405442296849741978803649550,    .7925696594427244582043343653250773993808,
00882     .23556607131276688371634975283086532726890,    .7901234567901234567901234567901234567901,
00883     .23864773785017498464178231643018079921600,    .7876923076923076923076923076923076923077,
00884     .24171993688714515924331749374687206000090,    .7852760736196319018404907975460122699387,
00885     .24478272641769091566565919038112042471760,    .7828746177370030581039755351681957186544,
00886     .24783616390458124145723672882013488560910,    .7804878048780487804878048780487804878049,
00887     .25088030628580937353433455427875742316250,    .7781155015197568389057750759878419452888,
00888     .25391520998096339667426946107298135757450,    .7757575757575757575757575757575757575758,
00889     .25694093089750041913887912414793390780680,    .7734138972809667673716012084592145015106,
00890     .25995752443692604627401010475296061486000,    .7710843373493975903614457831325301204819,
00891     .26296504550088134477547896494797896593800,    .7687687687687687687687687687687687687688,
00892     .26596354849713793599974565040611196309330,    .7664670658682634730538922155688622754491,
00893     .26895308734550393836570947314612567424780,    .7641791044776119402985074626865671641791,
00894     .27193371548364175804834985683555714786050,    .7619047619047619047619047619047619047619,
00895     .27490548587279922676529508862586226314300,    .7596439169139465875370919881305637982196,
00896     .27786845100345625159121709657483734190480,    .7573964497041420118343195266272189349112,
00897     .28082266290088775395616949026589281857030,    .7551622418879056047197640117994100294985,
00898     .28376817313064456316240580235898960381750,    .7529411764705882352941176470588235294118,
00899     .28670503280395426282112225635501090437180,    .7507331378299120234604105571847507331378,
00900     .28963329258304265634293983566749375313530,    .7485380116959064327485380116959064327485,
00901     .29255300268637740579436012922087684273730,    .7463556851311953352769679300291545189504,
00902     .29546421289383584252163927885703742504130,    .7441860465116279069767441860465116279070,
00903     .29836697255179722709783618483925238251680,    .7420289855072463768115942028985507246377,
00904     .30126133057816173455023545102449133992200,    .7398843930635838150289017341040462427746,
00905     .30414733546729666446850615102448500692850,    .7377521613832853025936599423631123919308,
00906     .30702503529491181888388950937951449304830,    .7356321839080459770114942528735632183908,
00907     .30989447772286465854207904158101882785550,    .7335243553008595988538681948424068767908,
00908     .31275571000389684739317885942000430077330,    .7314285714285714285714285714285714285714,
00909     .31560877898630329552176476681779604405180,    .7293447293447293447293447293447293447293,
00910     .31845373111853458869546784626436419785030,    .7272727272727272727272727272727272727273,
00911     .32129061245373424782201254856772720813750,    .7252124645892351274787535410764872521246,
00912     .32411946865421192853773391107097268104550,    .7231638418079096045197740112994350282486,
00913     .32694034499585328257253991068864706903700,    .7211267605633802816901408450704225352113,
00914     .32975328637246797969240219572384376078850,    .7191011235955056179775280898876404494382,
00915     .33255833730007655635318997155991382896900,    .7170868347338935574229691876750700280112,
00916     .33535554192113781191153520921943709254280,    .7150837988826815642458100558659217877095,
00917     .33814494400871636381467055798566434532400,    .7130919220055710306406685236768802228412,
00918     .34092658697059319283795275623560883104800,    .7111111111111111111111111111111111111111,
00919     .34370051385331840121395430287520866841080,    .7091412742382271468144044321329639889197,
00920     .34646676734620857063262633346312213689100,    .7071823204419889502762430939226519337017,
00921     .34922538978528827602332285096053965389730,    .7052341597796143250688705234159779614325,
00922     .35197642315717814209818925519357435405250,    .7032967032967032967032967032967032967033,
00923     .35471990910292899856770532096561510115850,    .7013698630136986301369863013698630136986,
00924     .35745588892180374385176833129662554711100,    .6994535519125683060109289617486338797814,
00925     .36018440357500774995358483465679455548530,    .6975476839237057220708446866485013623978,
00926     .36290549368936841911903457003063522279280,    .6956521739130434782608695652173913043478,
00927     .36561919956096466943762379742111079394830,    .6937669376693766937669376693766937669377,
00928     .36832556115870762614150635272380895912650,    .6918918918918918918918918918918918918919,
00929     .37102461812787262962487488948681857436900,    .6900269541778975741239892183288409703504,
00930     .37371640979358405898480555151763837784530,    .6881720430107526881720430107526881720430,
00931     .37640097516425302659470730759494472295050,    .6863270777479892761394101876675603217158,
00932     .37907835293496944251145919224654790014030,    .6844919786096256684491978609625668449198,
00933     .38174858149084833769393299007788300514230,    .6826666666666666666666666666666666666667,
00934     .38441169891033200034513583887019194662580,    .6808510638297872340425531914893617021277,
00935     .38706774296844825844488013899535872042180,    .6790450928381962864721485411140583554377,
00936     .38971675114002518602873692543653305619950,    .6772486772486772486772486772486772486772,
00937     .39235876060286384303665840889152605086580,    .6754617414248021108179419525065963060686,
00938     .39499380824086893770896722344332374632350,    .6736842105263157894736842105263157894737,
00939     .39762193064713846624158577469643205404280,    .6719160104986876640419947506561679790026,
00940     .40024316412701266276741307592601515352730,    .6701570680628272251308900523560209424084,
00941     .40285754470108348090917615991202183067800,    .6684073107049608355091383812010443864230,
00942     .40546510810816432934799991016916465014230,    .6666666666666666666666666666666666666667,
00943     .40806588980822172674223224930756259709600,    .6649350649350649350649350649350649350649,
00944     .41065992498526837639616360320360399782650,    .6632124352331606217616580310880829015544,
00945     .41324724855021932601317757871584035456180,    .6614987080103359173126614987080103359173,
00946     .41582789514371093497757669865677598863850,    .6597938144329896907216494845360824742268,
00947     .41840189913888381489925905043492093682300,    .6580976863753213367609254498714652956298,
00948     .42096929464412963239894338585145305842150,    .6564102564102564102564102564102564102564,
00949     .42353011550580327293502591601281892508280,    .6547314578005115089514066496163682864450,
00950     .42608439531090003260516141381231136620050,    .6530612244897959183673469387755102040816,
00951     .42863216738969872610098832410585600882780,    .6513994910941475826972010178117048346056,
00952     .43117346481837132143866142541810404509300,    .6497461928934010152284263959390862944162,
00953     .43370832042155937902094819946796633303180,    .6481012658227848101265822784810126582278,
00954     .43623676677491801667585491486534010618930,    .6464646464646464646464646464646464646465,
00955     .43875883620762790027214350629947148263450,    .6448362720403022670025188916876574307305,
00956     .44127456080487520440058801796112675219780,    .6432160804020100502512562814070351758794,
00957     .44378397241030093089975139264424797147500,    .6416040100250626566416040100250626566416,
00958     .44628710262841947420398014401143882423650,    .6400000000000000000000000000000000000000,
00959     .44878398282700665555822183705458883196130,    .6384039900249376558603491271820448877805,
00960     .45127464413945855836729492693848442286250,    .6368159203980099502487562189054726368159,
00961     .45375911746712049854579618113348260521900,    .6352357320099255583126550868486352357320,
00962     .45623743348158757315857769754074979573500,    .6336633663366336633663366336633663366337,
00963     .45870962262697662081833982483658473938700,    .6320987654320987654320987654320987654321,
00964     .46117571512217014895185229761409573256980,    .6305418719211822660098522167487684729064,
00965     .46363574096303250549055974261136725544930,    .6289926289926289926289926289926289926290,
00966     .46608972992459918316399125615134835243230,    .6274509803921568627450980392156862745098,
00967     .46853771156323925639597405279346276074650,    .6259168704156479217603911980440097799511,
00968     .47097971521879100631480241645476780831830,    .6243902439024390243902439024390243902439,
00969     .47341577001667212165614273544633761048330,    .6228710462287104622871046228710462287105,
00970     .47584590486996386493601107758877333253630,    .6213592233009708737864077669902912621359,
00971     .47827014848147025860569669930555392056700,    .6198547215496368038740920096852300242131,
00972     .48068852934575190261057286988943815231330,    .6183574879227053140096618357487922705314,
00973     .48310107575113581113157579238759353756900,    .6168674698795180722891566265060240963855,
00974     .48550781578170076890899053978500887751580,    .6153846153846153846153846153846153846154,
00975     .48790877731923892879351001283794175833480,    .6139088729016786570743405275779376498801,
00976     .49030398804519381705802061333088204264650,    .6124401913875598086124401913875598086124,
00977     .49269347544257524607047571407747454941280,    .6109785202863961813842482100238663484487,
00978     .49507726679785146739476431321236304938800,    .6095238095238095238095238095238095238095,
00979     .49745538920281889838648226032091770321130,    .6080760095011876484560570071258907363420,
00980     .49982786955644931126130359189119189977650,    .6066350710900473933649289099526066350711,
00981     .50219473456671548383667413872899487614650,    .6052009456264775413711583924349881796690,
00982     .50455601075239520092452494282042607665050,    .6037735849056603773584905660377358490566,
00983     .50691172444485432801997148999362252652650,    .6023529411764705882352941176470588235294,
00984     .50926190178980790257412536448100581765150,    .6009389671361502347417840375586854460094,
00985     .51160656874906207391973111953120678663250,    .5995316159250585480093676814988290398126,
00986     .51394575110223428282552049495279788970950,    .5981308411214953271028037383177570093458,
00987     .51627947444845445623684554448118433356300,    .5967365967365967365967365967365967365967,
00988     .51860776420804555186805373523384332656850,    .5953488372093023255813953488372093023256,
00989     .52093064562418522900344441950437612831600,    .5939675174013921113689095127610208816705,
00990     .52324814376454775732838697877014055848100,    .5925925925925925925925925925925925925926,
00991     .52556028352292727401362526507000438869000,    .5912240184757505773672055427251732101617,
00992     .52786708962084227803046587723656557500350,    .5898617511520737327188940092165898617512,
00993     .53016858660912158374145519701414741575700,    .5885057471264367816091954022988505747126,
00994     .53246479886947173376654518506256863474850,    .5871559633027522935779816513761467889908,
00995     .53475575061602764748158733709715306758900,    .5858123569794050343249427917620137299771,
00996     .53704146589688361856929077475797384977350,    .5844748858447488584474885844748858447489,
00997     .53932196859560876944783558428753167390800,    .5831435079726651480637813211845102505695,
00998     .54159728243274429804188230264117009937750,    .5818181818181818181818181818181818181818,
00999     .54386743096728351609669971367111429572100,    .5804988662131519274376417233560090702948,
01000     .54613243759813556721383065450936555862450,    .5791855203619909502262443438914027149321,
01001     .54839232556557315767520321969641372561450,    .5778781038374717832957110609480812641084,
01002     .55064711795266219063194057525834068655950,    .5765765765765765765765765765765765765766,
01003     .55289683768667763352766542084282264113450,    .5752808988764044943820224719101123595506,
01004     .55514150754050151093110798683483153581600,    .5739910313901345291479820627802690582960,
01005     .55738115013400635344709144192165695130850,    .5727069351230425055928411633109619686801,
01006     .55961578793542265941596269840374588966350,    .5714285714285714285714285714285714285714,
01007     .56184544326269181269140062795486301183700,    .5701559020044543429844097995545657015590,
01008     .56407013828480290218436721261241473257550,    .5688888888888888888888888888888888888889,
01009     .56628989502311577464155334382667206227800,    .5676274944567627494456762749445676274945,
01010     .56850473535266865532378233183408156037350,    .5663716814159292035398230088495575221239,
01011     .57071468100347144680739575051120482385150,    .5651214128035320088300220750551876379691,
01012     .57291975356178548306473885531886480748650,    .5638766519823788546255506607929515418502,
01013     .57511997447138785144460371157038025558000,    .5626373626373626373626373626373626373626,
01014     .57731536503482350219940144597785547375700,    .5614035087719298245614035087719298245614,
01015     .57950594641464214795689713355386629700650,    .5601750547045951859956236323851203501094,
01016     .58169173963462239562716149521293118596100,    .5589519650655021834061135371179039301310,
01017     .58387276558098266665552955601015128195300,    .5577342047930283224400871459694989106754,
01018     .58604904500357812846544902640744112432000,    .5565217391304347826086956521739130434783,
01019     .58822059851708596855957011939608491957200,    .5553145336225596529284164859002169197397,
01020     .59038744660217634674381770309992134571100,    .5541125541125541125541125541125541125541,
01021     .59254960960667157898740242671919986605650,    .5529157667386609071274298056155507559395,
01022     .59470710774669277576265358220553025603300,    .5517241379310344827586206896551724137931,
01023     .59685996110779382384237123915227130055450,    .5505376344086021505376344086021505376344,
01024     .59900818964608337768851242799428291618800,    .5493562231759656652360515021459227467811,
01025     .60115181318933474940990890900138765573500,    .5481798715203426124197002141327623126338,
01026     .60329085143808425240052883964381180703650,    .5470085470085470085470085470085470085470,
01027     .60542532396671688843525771517306566238400,    .5458422174840085287846481876332622601279,
01028     .60755525022454170969155029524699784815300,    .5446808510638297872340425531914893617021,
01029     .60968064953685519036241657886421307921400,    .5435244161358811040339702760084925690021,
01030     .61180154110599282990534675263916142284850,    .5423728813559322033898305084745762711864,
01031     .61391794401237043121710712512140162289150,    .5412262156448202959830866807610993657505,
01032     .61602987721551394351138242200249806046500,    .5400843881856540084388185654008438818565,
01033     .61813735955507864705538167982012964785100,    .5389473684210526315789473684210526315789,
01034     .62024040975185745772080281312810257077200,    .5378151260504201680672268907563025210084,
01035     .62233904640877868441606324267922900617100,    .5366876310272536687631027253668763102725,
01036     .62443328801189346144440150965237990021700,    .5355648535564853556485355648535564853556,
01037     .62652315293135274476554741340805776417250,    .5344467640918580375782881002087682672234,
01038     .62860865942237409420556559780379757285100,    .5333333333333333333333333333333333333333,
01039     .63068982562619868570408243613201193511500,    .5322245322245322245322245322245322245322,
01040     .63276666957103777644277897707070223987100,    .5311203319502074688796680497925311203320,
01041     .63483920917301017716738442686619237065300,    .5300207039337474120082815734989648033126,
01042     .63690746223706917739093569252872839570050,    .5289256198347107438016528925619834710744,
01043     .63897144645792069983514238629140891134750,    .5278350515463917525773195876288659793814,
01044     .64103117942093124081992527862894348800200,    .5267489711934156378600823045267489711934,
01045     .64308667860302726193566513757104985415950,    .5256673511293634496919917864476386036961,
01046     .64513796137358470073053240412264131009600,    .5245901639344262295081967213114754098361,
01047     .64718504499530948859131740391603671014300,    .5235173824130879345603271983640081799591,
01048     .64922794662510974195157587018911726772800,    .5224489795918367346938775510204081632653,
01049     .65126668331495807251485530287027359008800,    .5213849287169042769857433808553971486762,
01050     .65330127201274557080523663898929953575150,    .5203252032520325203252032520325203252033,
01051     .65533172956312757406749369692988693714150,    .5192697768762677484787018255578093306288,
01052     .65735807270835999727154330685152672231200,    .5182186234817813765182186234817813765182,
01053     .65938031808912778153342060249997302889800,    .5171717171717171717171717171717171717172,
01054     .66139848224536490484126716182800009846700,    .5161290322580645161290322580645161290323,
01055     .66341258161706617713093692145776003599150,    .5150905432595573440643863179074446680080,
01056     .66542263254509037562201001492212526500250,    .5140562248995983935742971887550200803213,
01057     .66742865127195616370414654738851822912700,    .5130260521042084168336673346693386773547,
01058     .66943065394262923906154583164607174694550,    .5120000000000000000000000000000000000000,
01059     .67142865660530226534774556057527661323550,    .5109780439121756487025948103792415169661,
01060     .67342267521216669923234121597488410770900,    .5099601593625498007968127490039840637450,
01061     .67541272562017662384192817626171745359900,    .5089463220675944333996023856858846918489,
01062     .67739882359180603188519853574689477682100,    .5079365079365079365079365079365079365079,
01063     .67938098479579733801614338517538271844400,    .5069306930693069306930693069306930693069,
01064     .68135922480790300781450241629499942064300,    .5059288537549407114624505928853754940711,
01065     .68333355911162063645036823800182901322850,    .5049309664694280078895463510848126232742,
01066     .68530400309891936760919861626462079584600,    .5039370078740157480314960629921259842520,
01067     .68727057207096020619019327568821609020250,    .5029469548133595284872298624754420432220,
01068     .68923328123880889251040571252815425395950,    .5019607843137254901960784313725490196078,
01069     .69314718055994530941723212145818, 5.0e-01,
01070 };
01071 
01072 
01073 
01074 #define LOGTAB_TRANSLATE(x,h) (((x) - 1.)*icvLogTab[(h)+1])
01075 static const double ln_2 = 0.69314718055994530941723212145818;
01076 
01077 void log32f( const float *_x, float *y, int n )
01078 {
01079     static const float shift[] = { 0, -1.f/512 };
01080     static const float
01081     A0 = 0.3333333333333333333333333f,
01082     A1 = -0.5f,
01083     A2 = 1.f;
01084 
01085 #undef LOGPOLY
01086 #define LOGPOLY(x) (((A0*(x) + A1)*(x) + A2)*(x))
01087 
01088     int i = 0;
01089     Cv32suf buf[4];
01090     const int* x = (const int*)_x;
01091 
01092 #if CV_SSE2
01093     static const __m128d ln2_2 = _mm_set1_pd(ln_2);
01094     static const __m128 _1_4 = _mm_set1_ps(1.f);
01095     static const __m128 shift4 = _mm_set1_ps(-1.f/512);
01096 
01097     static const __m128 mA0 = _mm_set1_ps(A0);
01098     static const __m128 mA1 = _mm_set1_ps(A1);
01099     static const __m128 mA2 = _mm_set1_ps(A2);
01100 
01101     int CV_DECL_ALIGNED(16) idx[4];
01102 
01103     for( ; i <= n - 4; i += 4 )
01104     {
01105         __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
01106         __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 23), _mm_set1_epi32(255)), _mm_set1_epi32(127));
01107         __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
01108         __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0,yi0)), ln2_2);
01109 
01110         __m128i xi0 = _mm_or_si128(_mm_and_si128(h0, _mm_set1_epi32(LOGTAB_MASK2_32F)), _mm_set1_epi32(127 << 23));
01111 
01112         h0 = _mm_and_si128(_mm_srli_epi32(h0, 23 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK*2));
01113         _mm_store_si128((__m128i*)idx, h0);
01114         h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
01115 
01116         __m128d t0, t1, t2, t3, t4;
01117         t0 = _mm_load_pd(icvLogTab + idx[0]);
01118         t2 = _mm_load_pd(icvLogTab + idx[1]);
01119         t1 = _mm_unpackhi_pd(t0, t2);
01120         t0 = _mm_unpacklo_pd(t0, t2);
01121         t2 = _mm_load_pd(icvLogTab + idx[2]);
01122         t4 = _mm_load_pd(icvLogTab + idx[3]);
01123         t3 = _mm_unpackhi_pd(t2, t4);
01124         t2 = _mm_unpacklo_pd(t2, t4);
01125 
01126         yd0 = _mm_add_pd(yd0, t0);
01127         yd1 = _mm_add_pd(yd1, t2);
01128 
01129         __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
01130 
01131         __m128 xf0 = _mm_sub_ps(_mm_castsi128_ps(xi0), _1_4);
01132         xf0 = _mm_mul_ps(xf0, _mm_movelh_ps(_mm_cvtpd_ps(t1), _mm_cvtpd_ps(t3)));
01133         xf0 = _mm_add_ps(xf0, _mm_and_ps(_mm_castsi128_ps(h0), shift4));
01134 
01135         __m128 zf0 = _mm_mul_ps(xf0, mA0);
01136         zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA1), xf0);
01137         zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA2), xf0);
01138         yf0 = _mm_add_ps(yf0, zf0);
01139 
01140         _mm_storeu_ps(y + i, yf0);
01141     }
01142 #endif
01143     for( ; i <= n - 4; i += 4 )
01144     {
01145         double x0, x1, x2, x3;
01146         double y0, y1, y2, y3;
01147         int h0, h1, h2, h3;
01148 
01149         h0 = x[i];
01150         h1 = x[i+1];
01151         buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
01152         buf[1].i = (h1 & LOGTAB_MASK2_32F) | (127 << 23);
01153 
01154         y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
01155         y1 = (((h1 >> 23) & 0xff) - 127) * ln_2;
01156 
01157         h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01158         h1 = (h1 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01159 
01160         y0 += icvLogTab[h0];
01161         y1 += icvLogTab[h1];
01162 
01163         h2 = x[i+2];
01164         h3 = x[i+3];
01165 
01166         x0 = LOGTAB_TRANSLATE( buf[0].f, h0 );
01167         x1 = LOGTAB_TRANSLATE( buf[1].f, h1 );
01168 
01169         buf[2].i = (h2 & LOGTAB_MASK2_32F) | (127 << 23);
01170         buf[3].i = (h3 & LOGTAB_MASK2_32F) | (127 << 23);
01171 
01172         y2 = (((h2 >> 23) & 0xff) - 127) * ln_2;
01173         y3 = (((h3 >> 23) & 0xff) - 127) * ln_2;
01174 
01175         h2 = (h2 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01176         h3 = (h3 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01177 
01178         y2 += icvLogTab[h2];
01179         y3 += icvLogTab[h3];
01180 
01181         x2 = LOGTAB_TRANSLATE( buf[2].f, h2 );
01182         x3 = LOGTAB_TRANSLATE( buf[3].f, h3 );
01183 
01184         x0 += shift[h0 == 510];
01185         x1 += shift[h1 == 510];
01186         y0 += LOGPOLY( x0 );
01187         y1 += LOGPOLY( x1 );
01188 
01189         y[i] = (float) y0;
01190         y[i + 1] = (float) y1;
01191 
01192         x2 += shift[h2 == 510];
01193         x3 += shift[h3 == 510];
01194         y2 += LOGPOLY( x2 );
01195         y3 += LOGPOLY( x3 );
01196 
01197         y[i + 2] = (float) y2;
01198         y[i + 3] = (float) y3;
01199     }
01200 
01201     for( ; i < n; i++ )
01202     {
01203         int h0 = x[i];
01204         double y0;
01205         float x0;
01206 
01207         y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
01208 
01209         buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
01210         h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01211 
01212         y0 += icvLogTab[h0];
01213         x0 = (float)LOGTAB_TRANSLATE( buf[0].f, h0 );
01214         x0 += shift[h0 == 510];
01215         y0 += LOGPOLY( x0 );
01216 
01217         y[i] = (float)y0;
01218     }
01219 }
01220 
01221 void log64f( const double *x, double *y, int n )
01222 {
01223     static const double shift[] = { 0, -1./512 };
01224     static const double
01225     A7 = 1.0,
01226     A6 = -0.5,
01227     A5 = 0.333333333333333314829616256247390992939472198486328125,
01228     A4 = -0.25,
01229     A3 = 0.2,
01230     A2 = -0.1666666666666666574148081281236954964697360992431640625,
01231     A1 = 0.1428571428571428769682682968777953647077083587646484375,
01232     A0 = -0.125;
01233 
01234 #undef LOGPOLY
01235 #define LOGPOLY(x,k) ((x)+=shift[k], xq = (x)*(x),\
01236 (((A0*xq + A2)*xq + A4)*xq + A6)*xq + \
01237 (((A1*xq + A3)*xq + A5)*xq + A7)*(x))
01238 
01239     int i = 0;
01240     DBLINT buf[4];
01241     DBLINT *X = (DBLINT *) x;
01242 
01243 #if CV_SSE2
01244     static const __m128d ln2_2 = _mm_set1_pd(ln_2);
01245     static const __m128d _1_2 = _mm_set1_pd(1.);
01246     static const __m128d shift2 = _mm_set1_pd(-1./512);
01247 
01248     static const __m128i log_and_mask2 = _mm_set_epi32(LOGTAB_MASK2, 0xffffffff, LOGTAB_MASK2, 0xffffffff);
01249     static const __m128i log_or_mask2 = _mm_set_epi32(1023 << 20, 0, 1023 << 20, 0);
01250 
01251     static const __m128d mA0 = _mm_set1_pd(A0);
01252     static const __m128d mA1 = _mm_set1_pd(A1);
01253     static const __m128d mA2 = _mm_set1_pd(A2);
01254     static const __m128d mA3 = _mm_set1_pd(A3);
01255     static const __m128d mA4 = _mm_set1_pd(A4);
01256     static const __m128d mA5 = _mm_set1_pd(A5);
01257     static const __m128d mA6 = _mm_set1_pd(A6);
01258     static const __m128d mA7 = _mm_set1_pd(A7);
01259 
01260     int CV_DECL_ALIGNED(16) idx[4];
01261 
01262     for( ; i <= n - 4; i += 4 )
01263     {
01264         __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
01265         __m128i h1 = _mm_loadu_si128((const __m128i*)(x + i + 2));
01266 
01267         __m128d xd0 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h0, log_and_mask2), log_or_mask2));
01268         __m128d xd1 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h1, log_and_mask2), log_or_mask2));
01269 
01270         h0 = _mm_unpackhi_epi32(_mm_unpacklo_epi32(h0, h1), _mm_unpackhi_epi32(h0, h1));
01271 
01272         __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 20),
01273                                                   _mm_set1_epi32(2047)), _mm_set1_epi32(1023));
01274         __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
01275         __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0, yi0)), ln2_2);
01276 
01277         h0 = _mm_and_si128(_mm_srli_epi32(h0, 20 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK * 2));
01278         _mm_store_si128((__m128i*)idx, h0);
01279         h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
01280 
01281         __m128d t0, t1, t2, t3, t4;
01282         t0 = _mm_load_pd(icvLogTab + idx[0]);
01283         t2 = _mm_load_pd(icvLogTab + idx[1]);
01284         t1 = _mm_unpackhi_pd(t0, t2);
01285         t0 = _mm_unpacklo_pd(t0, t2);
01286         t2 = _mm_load_pd(icvLogTab + idx[2]);
01287         t4 = _mm_load_pd(icvLogTab + idx[3]);
01288         t3 = _mm_unpackhi_pd(t2, t4);
01289         t2 = _mm_unpacklo_pd(t2, t4);
01290 
01291         yd0 = _mm_add_pd(yd0, t0);
01292         yd1 = _mm_add_pd(yd1, t2);
01293 
01294         xd0 = _mm_mul_pd(_mm_sub_pd(xd0, _1_2), t1);
01295         xd1 = _mm_mul_pd(_mm_sub_pd(xd1, _1_2), t3);
01296 
01297         xd0 = _mm_add_pd(xd0, _mm_and_pd(_mm_castsi128_pd(_mm_unpacklo_epi32(h0, h0)), shift2));
01298         xd1 = _mm_add_pd(xd1, _mm_and_pd(_mm_castsi128_pd(_mm_unpackhi_epi32(h0, h0)), shift2));
01299 
01300         __m128d zd0 = _mm_mul_pd(xd0, mA0);
01301         __m128d zd1 = _mm_mul_pd(xd1, mA0);
01302         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA1), xd0);
01303         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA1), xd1);
01304         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA2), xd0);
01305         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA2), xd1);
01306         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA3), xd0);
01307         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA3), xd1);
01308         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA4), xd0);
01309         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA4), xd1);
01310         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA5), xd0);
01311         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA5), xd1);
01312         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA6), xd0);
01313         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA6), xd1);
01314         zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA7), xd0);
01315         zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA7), xd1);
01316 
01317         yd0 = _mm_add_pd(yd0, zd0);
01318         yd1 = _mm_add_pd(yd1, zd1);
01319 
01320         _mm_storeu_pd(y + i, yd0);
01321         _mm_storeu_pd(y + i + 2, yd1);
01322     }
01323 #endif
01324     for( ; i <= n - 4; i += 4 )
01325     {
01326         double xq;
01327         double x0, x1, x2, x3;
01328         double y0, y1, y2, y3;
01329         int h0, h1, h2, h3;
01330 
01331         h0 = X[i].i.lo;
01332         h1 = X[i + 1].i.lo;
01333         buf[0].i.lo = h0;
01334         buf[1].i.lo = h1;
01335 
01336         h0 = X[i].i.hi;
01337         h1 = X[i + 1].i.hi;
01338         buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
01339         buf[1].i.hi = (h1 & LOGTAB_MASK2) | (1023 << 20);
01340 
01341         y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
01342         y1 = (((h1 >> 20) & 0x7ff) - 1023) * ln_2;
01343 
01344         h2 = X[i + 2].i.lo;
01345         h3 = X[i + 3].i.lo;
01346         buf[2].i.lo = h2;
01347         buf[3].i.lo = h3;
01348 
01349         h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01350         h1 = (h1 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01351 
01352         y0 += icvLogTab[h0];
01353         y1 += icvLogTab[h1];
01354 
01355         h2 = X[i + 2].i.hi;
01356         h3 = X[i + 3].i.hi;
01357 
01358         x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
01359         x1 = LOGTAB_TRANSLATE( buf[1].d, h1 );
01360 
01361         buf[2].i.hi = (h2 & LOGTAB_MASK2) | (1023 << 20);
01362         buf[3].i.hi = (h3 & LOGTAB_MASK2) | (1023 << 20);
01363 
01364         y2 = (((h2 >> 20) & 0x7ff) - 1023) * ln_2;
01365         y3 = (((h3 >> 20) & 0x7ff) - 1023) * ln_2;
01366 
01367         h2 = (h2 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01368         h3 = (h3 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01369 
01370         y2 += icvLogTab[h2];
01371         y3 += icvLogTab[h3];
01372 
01373         x2 = LOGTAB_TRANSLATE( buf[2].d, h2 );
01374         x3 = LOGTAB_TRANSLATE( buf[3].d, h3 );
01375 
01376         y0 += LOGPOLY( x0, h0 == 510 );
01377         y1 += LOGPOLY( x1, h1 == 510 );
01378 
01379         y[i] = y0;
01380         y[i + 1] = y1;
01381 
01382         y2 += LOGPOLY( x2, h2 == 510 );
01383         y3 += LOGPOLY( x3, h3 == 510 );
01384 
01385         y[i + 2] = y2;
01386         y[i + 3] = y3;
01387     }
01388 
01389     for( ; i < n; i++ )
01390     {
01391         int h0 = X[i].i.hi;
01392         double xq;
01393         double x0, y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
01394 
01395         buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
01396         buf[0].i.lo = X[i].i.lo;
01397         h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
01398 
01399         y0 += icvLogTab[h0];
01400         x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
01401         y0 += LOGPOLY( x0, h0 == 510 );
01402         y[i] = y0;
01403     }
01404 }
01405 
01406 //=============================================================================
01407 // for compatibility with 3.0
01408 
01409 void exp(const float* src, float* dst, int n)
01410 {
01411     exp32f(src, dst, n);
01412 }
01413 
01414 void exp(const double* src, double* dst, int n)
01415 {
01416     exp64f(src, dst, n);
01417 }
01418 
01419 void log(const float* src, float* dst, int n)
01420 {
01421     log32f(src, dst, n);
01422 }
01423 
01424 void log(const double* src, double* dst, int n)
01425 {
01426     log64f(src, dst, n);
01427 }
01428 
01429 void magnitude(const float* x, const float* y, float* dst, int n)
01430 {
01431     magnitude32f(x, y, dst, n);
01432 }
01433 
01434 void magnitude(const double* x, const double* y, double* dst, int n)
01435 {
01436     magnitude64f(x, y, dst, n);
01437 }
01438 
01439 void sqrt(const float* src, float* dst, int len)
01440 {
01441     sqrt32f(src, dst, len);
01442 }
01443 
01444 void sqrt(const double* src, double* dst, int len)
01445 {
01446     sqrt64f(src, dst, len);
01447 }
01448 
01449 void invSqrt(const float* src, float* dst, int len)
01450 {
01451     invSqrt32f(src, dst, len);
01452 }
01453 
01454 void invSqrt(const double* src, double* dst, int len)
01455 {
01456     invSqrt64f(src, dst, len);
01457 }
01458 
01459 
01460 }} // cv::hal::
01461