Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Dependents: RZ_A2M_Mbed_samples
fast_math.hpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 00016 // Copyright (C) 2015, Itseez Inc., all rights reserved. 00017 // Third party copyrights are property of their respective owners. 00018 // 00019 // Redistribution and use in source and binary forms, with or without modification, 00020 // are permitted provided that the following conditions are met: 00021 // 00022 // * Redistribution's of source code must retain the above copyright notice, 00023 // this list of conditions and the following disclaimer. 00024 // 00025 // * Redistribution's in binary form must reproduce the above copyright notice, 00026 // this list of conditions and the following disclaimer in the documentation 00027 // and/or other materials provided with the distribution. 00028 // 00029 // * The name of the copyright holders may not be used to endorse or promote products 00030 // derived from this software without specific prior written permission. 00031 // 00032 // This software is provided by the copyright holders and contributors "as is" and 00033 // any express or implied warranties, including, but not limited to, the implied 00034 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00035 // In no event shall the Intel Corporation or contributors be liable for any direct, 00036 // indirect, incidental, special, exemplary, or consequential damages 00037 // (including, but not limited to, procurement of substitute goods or services; 00038 // loss of use, data, or profits; or business interruption) however caused 00039 // and on any theory of liability, whether in contract, strict liability, 00040 // or tort (including negligence or otherwise) arising in any way out of 00041 // the use of this software, even if advised of the possibility of such damage. 00042 // 00043 //M*/ 00044 00045 #ifndef OPENCV_CORE_FAST_MATH_HPP 00046 #define OPENCV_CORE_FAST_MATH_HPP 00047 00048 #include "opencv2/core/cvdef.h" 00049 00050 //! @addtogroup core_utils 00051 //! @{ 00052 00053 /****************************************************************************************\ 00054 * fast math * 00055 \****************************************************************************************/ 00056 00057 #if defined __BORLANDC__ 00058 # include <fastmath.h> 00059 #elif defined __cplusplus 00060 # include <cmath> 00061 #else 00062 # include <math.h> 00063 #endif 00064 00065 #ifdef HAVE_TEGRA_OPTIMIZATION 00066 # include "tegra_round.hpp" 00067 #endif 00068 00069 #if CV_VFP 00070 // 1. general scheme 00071 #define ARM_ROUND(_value, _asm_string) \ 00072 int res; \ 00073 float temp; \ 00074 (void)temp; \ 00075 asm(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \ 00076 return res 00077 // 2. version for double 00078 #ifdef __clang__ 00079 #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]") 00080 #else 00081 #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]") 00082 #endif 00083 // 3. version for float 00084 #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") 00085 #endif // CV_VFP 00086 00087 /** @brief Rounds floating-point number to the nearest integer 00088 00089 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the 00090 result is not defined. 00091 */ 00092 CV_INLINE int 00093 cvRound( double value ) 00094 { 00095 #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ 00096 && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) 00097 __m128d t = _mm_set_sd( value ); 00098 return _mm_cvtsd_si32(t); 00099 #elif defined _MSC_VER && defined _M_IX86 00100 int t; 00101 __asm 00102 { 00103 fld value; 00104 fistp t; 00105 } 00106 return t; 00107 #elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ 00108 defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION 00109 TEGRA_ROUND_DBL(value); 00110 #elif defined CV_ICC || defined __GNUC__ 00111 # if CV_VFP 00112 ARM_ROUND_DBL(value); 00113 # else 00114 return (int)lrint(value); 00115 # endif 00116 #else 00117 /* it's ok if round does not comply with IEEE754 standard; 00118 the tests should allow +/-1 difference when the tested functions use round */ 00119 return (int)(value + (value >= 0 ? 0.5 : -0.5)); 00120 #endif 00121 } 00122 00123 00124 /** @brief Rounds floating-point number to the nearest integer not larger than the original. 00125 00126 The function computes an integer i such that: 00127 \f[i \le \texttt{value} < i+1\f] 00128 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the 00129 result is not defined. 00130 */ 00131 CV_INLINE int cvFloor( double value ) 00132 { 00133 #if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) 00134 __m128d t = _mm_set_sd( value ); 00135 int i = _mm_cvtsd_si32(t); 00136 return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i))); 00137 #elif defined __GNUC__ 00138 int i = (int)value; 00139 return i - (i > value); 00140 #else 00141 int i = cvRound(value); 00142 float diff = (float)(value - i); 00143 return i - (diff < 0); 00144 #endif 00145 } 00146 00147 /** @brief Rounds floating-point number to the nearest integer not smaller than the original. 00148 00149 The function computes an integer i such that: 00150 \f[i \le \texttt{value} < i+1\f] 00151 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the 00152 result is not defined. 00153 */ 00154 CV_INLINE int cvCeil( double value ) 00155 { 00156 #if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) 00157 __m128d t = _mm_set_sd( value ); 00158 int i = _mm_cvtsd_si32(t); 00159 return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t)); 00160 #elif defined __GNUC__ 00161 int i = (int)value; 00162 return i + (i < value); 00163 #else 00164 int i = cvRound(value); 00165 float diff = (float)(i - value); 00166 return i + (diff < 0); 00167 #endif 00168 } 00169 00170 /** @brief Determines if the argument is Not A Number. 00171 00172 @param value The input floating-point value 00173 00174 The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0 00175 otherwise. */ 00176 CV_INLINE int cvIsNaN( double value ) 00177 { 00178 Cv64suf ieee754; 00179 ieee754.f = value; 00180 return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) + 00181 ((unsigned)ieee754.u != 0) > 0x7ff00000; 00182 } 00183 00184 /** @brief Determines if the argument is Infinity. 00185 00186 @param value The input floating-point value 00187 00188 The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard) 00189 and 0 otherwise. */ 00190 CV_INLINE int cvIsInf( double value ) 00191 { 00192 Cv64suf ieee754; 00193 ieee754.f = value; 00194 return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 && 00195 (unsigned)ieee754.u == 0; 00196 } 00197 00198 #ifdef __cplusplus 00199 00200 /** @overload */ 00201 CV_INLINE int cvRound(float value) 00202 { 00203 #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \ 00204 defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) 00205 __m128 t = _mm_set_ss( value ); 00206 return _mm_cvtss_si32(t); 00207 #elif defined _MSC_VER && defined _M_IX86 00208 int t; 00209 __asm 00210 { 00211 fld value; 00212 fistp t; 00213 } 00214 return t; 00215 #elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \ 00216 defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION 00217 TEGRA_ROUND_FLT(value); 00218 #elif defined CV_ICC || defined __GNUC__ 00219 # if CV_VFP 00220 ARM_ROUND_FLT(value); 00221 # else 00222 return (int)lrintf(value); 00223 # endif 00224 #else 00225 /* it's ok if round does not comply with IEEE754 standard; 00226 the tests should allow +/-1 difference when the tested functions use round */ 00227 return (int)(value + (value >= 0 ? 0.5f : -0.5f)); 00228 #endif 00229 } 00230 00231 /** @overload */ 00232 CV_INLINE int cvRound( int value ) 00233 { 00234 return value; 00235 } 00236 00237 /** @overload */ 00238 CV_INLINE int cvFloor( float value ) 00239 { 00240 #if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) 00241 __m128 t = _mm_set_ss( value ); 00242 int i = _mm_cvtss_si32(t); 00243 return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i))); 00244 #elif defined __GNUC__ 00245 int i = (int)value; 00246 return i - (i > value); 00247 #else 00248 int i = cvRound(value); 00249 float diff = (float)(value - i); 00250 return i - (diff < 0); 00251 #endif 00252 } 00253 00254 /** @overload */ 00255 CV_INLINE int cvFloor( int value ) 00256 { 00257 return value; 00258 } 00259 00260 /** @overload */ 00261 CV_INLINE int cvCeil( float value ) 00262 { 00263 #if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) 00264 __m128 t = _mm_set_ss( value ); 00265 int i = _mm_cvtss_si32(t); 00266 return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t)); 00267 #elif defined __GNUC__ 00268 int i = (int)value; 00269 return i + (i < value); 00270 #else 00271 int i = cvRound(value); 00272 float diff = (float)(i - value); 00273 return i + (diff < 0); 00274 #endif 00275 } 00276 00277 /** @overload */ 00278 CV_INLINE int cvCeil( int value ) 00279 { 00280 return value; 00281 } 00282 00283 /** @overload */ 00284 CV_INLINE int cvIsNaN( float value ) 00285 { 00286 Cv32suf ieee754; 00287 ieee754.f = value; 00288 return (ieee754.u & 0x7fffffff) > 0x7f800000; 00289 } 00290 00291 /** @overload */ 00292 CV_INLINE int cvIsInf( float value ) 00293 { 00294 Cv32suf ieee754; 00295 ieee754.f = value; 00296 return (ieee754.u & 0x7fffffff) == 0x7f800000; 00297 } 00298 00299 #endif // __cplusplus 00300 00301 //! @} core_utils 00302 00303 #endif
Generated on Tue Jul 12 2022 18:20:17 by
