Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers vec_distance.hpp Source File

vec_distance.hpp

Go to the documentation of this file.
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #ifndef __OPENCV_CUDA_VEC_DISTANCE_HPP__
00044 #define __OPENCV_CUDA_VEC_DISTANCE_HPP__
00045 
00046 #include "reduce.hpp"
00047 #include "functional.hpp "
00048 #include "detail/vec_distance_detail.hpp"
00049 
00050 /** @file
00051  * @deprecated Use @ref cudev instead.
00052  */
00053 
00054 //! @cond IGNORED
00055 
00056 namespace cv { namespace cuda { namespace device
00057 {
00058     template <typename T> struct L1Dist
00059     {
00060         typedef int value_type;
00061         typedef int result_type;
00062 
00063         __device__ __forceinline__ L1Dist() : mySum(0) {}
00064 
00065         __device__ __forceinline__ void reduceIter(int val1, int val2)
00066         {
00067             mySum = __sad(val1, val2, mySum);
00068         }
00069 
00070         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
00071         {
00072             reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
00073         }
00074 
00075         __device__ __forceinline__ operator int() const
00076         {
00077             return mySum;
00078         }
00079 
00080         int mySum;
00081     };
00082     template <> struct L1Dist<float>
00083     {
00084         typedef float value_type;
00085         typedef float result_type;
00086 
00087         __device__ __forceinline__ L1Dist() : mySum(0.0f) {}
00088 
00089         __device__ __forceinline__ void reduceIter(float val1, float val2)
00090         {
00091             mySum += ::fabs(val1 - val2);
00092         }
00093 
00094         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
00095         {
00096             reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
00097         }
00098 
00099         __device__ __forceinline__ operator float() const
00100         {
00101             return mySum;
00102         }
00103 
00104         float mySum;
00105     };
00106 
00107     struct L2Dist
00108     {
00109         typedef float value_type;
00110         typedef float result_type;
00111 
00112         __device__ __forceinline__ L2Dist() : mySum(0.0f) {}
00113 
00114         __device__ __forceinline__ void reduceIter(float val1, float val2)
00115         {
00116             float reg = val1 - val2;
00117             mySum += reg * reg;
00118         }
00119 
00120         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
00121         {
00122             reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
00123         }
00124 
00125         __device__ __forceinline__ operator float() const
00126         {
00127             return sqrtf(mySum);
00128         }
00129 
00130         float mySum;
00131     };
00132 
00133     struct HammingDist
00134     {
00135         typedef int value_type;
00136         typedef int result_type;
00137 
00138         __device__ __forceinline__ HammingDist() : mySum(0) {}
00139 
00140         __device__ __forceinline__ void reduceIter(int val1, int val2)
00141         {
00142             mySum += __popc(val1 ^ val2);
00143         }
00144 
00145         template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
00146         {
00147             reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
00148         }
00149 
00150         __device__ __forceinline__ operator int() const
00151         {
00152             return mySum;
00153         }
00154 
00155         int mySum;
00156     };
00157 
00158     // calc distance between two vectors in global memory
00159     template <int THREAD_DIM, typename Dist, typename T1, typename T2>
00160     __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
00161     {
00162         for (int i = tid; i < len; i += THREAD_DIM)
00163         {
00164             T1 val1;
00165             ForceGlob<T1>::Load(vec1, i, val1);
00166 
00167             T2 val2;
00168             ForceGlob<T2>::Load(vec2, i, val2);
00169 
00170             dist.reduceIter(val1, val2);
00171         }
00172 
00173         dist.reduceAll<THREAD_DIM>(smem, tid);
00174     }
00175 
00176     // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
00177     template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
00178     __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
00179     {
00180         vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
00181 
00182         dist.reduceAll<THREAD_DIM>(smem, tid);
00183     }
00184 
00185     // calc distance between two vectors in global memory
00186     template <int THREAD_DIM, typename T1> struct VecDiffGlobal
00187     {
00188         explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
00189         {
00190             vec1 = vec1_;
00191         }
00192 
00193         template <typename T2, typename Dist>
00194         __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
00195         {
00196             calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
00197         }
00198 
00199         const T1* vec1;
00200     };
00201 
00202     // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
00203     template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
00204     {
00205         template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
00206         {
00207             if (glob_tid < len)
00208                 smem[glob_tid] = vec1[glob_tid];
00209             __syncthreads();
00210 
00211             U* vec1ValsPtr = vec1Vals;
00212 
00213             #pragma unroll
00214             for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
00215                 *vec1ValsPtr++ = smem[i];
00216 
00217             __syncthreads();
00218         }
00219 
00220         template <typename T2, typename Dist>
00221         __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
00222         {
00223             calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
00224         }
00225 
00226         U vec1Vals[MAX_LEN / THREAD_DIM];
00227     };
00228 }}} // namespace cv { namespace cuda { namespace cudev
00229 
00230 //! @endcond
00231 
00232 #endif // __OPENCV_CUDA_VEC_DISTANCE_HPP__
00233