Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
vec_distance.hpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Third party copyrights are property of their respective owners. 00016 // 00017 // Redistribution and use in source and binary forms, with or without modification, 00018 // are permitted provided that the following conditions are met: 00019 // 00020 // * Redistribution's of source code must retain the above copyright notice, 00021 // this list of conditions and the following disclaimer. 00022 // 00023 // * Redistribution's in binary form must reproduce the above copyright notice, 00024 // this list of conditions and the following disclaimer in the documentation 00025 // and/or other materials provided with the distribution. 00026 // 00027 // * The name of the copyright holders may not be used to endorse or promote products 00028 // derived from this software without specific prior written permission. 00029 // 00030 // This software is provided by the copyright holders and contributors "as is" and 00031 // any express or implied warranties, including, but not limited to, the implied 00032 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00033 // In no event shall the Intel Corporation or contributors be liable for any direct, 00034 // indirect, incidental, special, exemplary, or consequential damages 00035 // (including, but not limited to, procurement of substitute goods or services; 00036 // loss of use, data, or profits; or business interruption) however caused 00037 // and on any theory of liability, whether in contract, strict liability, 00038 // or tort (including negligence or otherwise) arising in any way out of 00039 // the use of this software, even if advised of the possibility of such damage. 00040 // 00041 //M*/ 00042 00043 #ifndef __OPENCV_CUDA_VEC_DISTANCE_HPP__ 00044 #define __OPENCV_CUDA_VEC_DISTANCE_HPP__ 00045 00046 #include "reduce.hpp" 00047 #include "functional.hpp " 00048 #include "detail/vec_distance_detail.hpp" 00049 00050 /** @file 00051 * @deprecated Use @ref cudev instead. 00052 */ 00053 00054 //! @cond IGNORED 00055 00056 namespace cv { namespace cuda { namespace device 00057 { 00058 template <typename T> struct L1Dist 00059 { 00060 typedef int value_type; 00061 typedef int result_type; 00062 00063 __device__ __forceinline__ L1Dist() : mySum(0) {} 00064 00065 __device__ __forceinline__ void reduceIter(int val1, int val2) 00066 { 00067 mySum = __sad(val1, val2, mySum); 00068 } 00069 00070 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) 00071 { 00072 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); 00073 } 00074 00075 __device__ __forceinline__ operator int() const 00076 { 00077 return mySum; 00078 } 00079 00080 int mySum; 00081 }; 00082 template <> struct L1Dist<float> 00083 { 00084 typedef float value_type; 00085 typedef float result_type; 00086 00087 __device__ __forceinline__ L1Dist() : mySum(0.0f) {} 00088 00089 __device__ __forceinline__ void reduceIter(float val1, float val2) 00090 { 00091 mySum += ::fabs(val1 - val2); 00092 } 00093 00094 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) 00095 { 00096 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); 00097 } 00098 00099 __device__ __forceinline__ operator float() const 00100 { 00101 return mySum; 00102 } 00103 00104 float mySum; 00105 }; 00106 00107 struct L2Dist 00108 { 00109 typedef float value_type; 00110 typedef float result_type; 00111 00112 __device__ __forceinline__ L2Dist() : mySum(0.0f) {} 00113 00114 __device__ __forceinline__ void reduceIter(float val1, float val2) 00115 { 00116 float reg = val1 - val2; 00117 mySum += reg * reg; 00118 } 00119 00120 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid) 00121 { 00122 reduce<THREAD_DIM>(smem, mySum, tid, plus<float>()); 00123 } 00124 00125 __device__ __forceinline__ operator float() const 00126 { 00127 return sqrtf(mySum); 00128 } 00129 00130 float mySum; 00131 }; 00132 00133 struct HammingDist 00134 { 00135 typedef int value_type; 00136 typedef int result_type; 00137 00138 __device__ __forceinline__ HammingDist() : mySum(0) {} 00139 00140 __device__ __forceinline__ void reduceIter(int val1, int val2) 00141 { 00142 mySum += __popc(val1 ^ val2); 00143 } 00144 00145 template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid) 00146 { 00147 reduce<THREAD_DIM>(smem, mySum, tid, plus<int>()); 00148 } 00149 00150 __device__ __forceinline__ operator int() const 00151 { 00152 return mySum; 00153 } 00154 00155 int mySum; 00156 }; 00157 00158 // calc distance between two vectors in global memory 00159 template <int THREAD_DIM, typename Dist, typename T1, typename T2> 00160 __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) 00161 { 00162 for (int i = tid; i < len; i += THREAD_DIM) 00163 { 00164 T1 val1; 00165 ForceGlob<T1>::Load(vec1, i, val1); 00166 00167 T2 val2; 00168 ForceGlob<T2>::Load(vec2, i, val2); 00169 00170 dist.reduceIter(val1, val2); 00171 } 00172 00173 dist.reduceAll<THREAD_DIM>(smem, tid); 00174 } 00175 00176 // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory 00177 template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2> 00178 __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid) 00179 { 00180 vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid); 00181 00182 dist.reduceAll<THREAD_DIM>(smem, tid); 00183 } 00184 00185 // calc distance between two vectors in global memory 00186 template <int THREAD_DIM, typename T1> struct VecDiffGlobal 00187 { 00188 explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0) 00189 { 00190 vec1 = vec1_; 00191 } 00192 00193 template <typename T2, typename Dist> 00194 __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const 00195 { 00196 calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid); 00197 } 00198 00199 const T1* vec1; 00200 }; 00201 00202 // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory 00203 template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister 00204 { 00205 template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid) 00206 { 00207 if (glob_tid < len) 00208 smem[glob_tid] = vec1[glob_tid]; 00209 __syncthreads(); 00210 00211 U* vec1ValsPtr = vec1Vals; 00212 00213 #pragma unroll 00214 for (int i = tid; i < MAX_LEN; i += THREAD_DIM) 00215 *vec1ValsPtr++ = smem[i]; 00216 00217 __syncthreads(); 00218 } 00219 00220 template <typename T2, typename Dist> 00221 __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const 00222 { 00223 calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid); 00224 } 00225 00226 U vec1Vals[MAX_LEN / THREAD_DIM]; 00227 }; 00228 }}} // namespace cv { namespace cuda { namespace cudev 00229 00230 //! @endcond 00231 00232 #endif // __OPENCV_CUDA_VEC_DISTANCE_HPP__ 00233
Generated on Tue Jul 12 2022 14:47:51 by
1.7.2
