gr-peach-opencv-project-sd-card_update

Renesas GR-PEACH OpenCV Development » Code » Documentation
Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update
Fork of gr-peach-opencv-project-sd-card by the do
Embed: (wiki syntax)
Show/hide line numbers scan.hpp Source File
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #ifndef __OPENCV_CUDA_SCAN_HPP__
00044 #define __OPENCV_CUDA_SCAN_HPP__
00045 
00046 #include "opencv2/core/cuda/common.hpp "
00047 #include "opencv2/core/cuda/utility.hpp "
00048 #include "opencv2/core/cuda/warp.hpp "
00049 #include "opencv2/core/cuda/warp_shuffle.hpp "
00050 
00051 /** @file
00052  * @deprecated Use @ref cudev instead.
00053  */
00054 
00055 //! @cond IGNORED
00056 
00057 namespace cv { namespace cuda { namespace device
00058 {
00059     enum ScanKind { EXCLUSIVE = 0,  INCLUSIVE = 1 };
00060 
00061     template <ScanKind Kind, typename T, typename F> struct WarpScan
00062     {
00063         __device__ __forceinline__ WarpScan() {}
00064         __device__ __forceinline__ WarpScan(const WarpScan& other) { (void)other; }
00065 
00066         __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
00067         {
00068             const unsigned int lane = idx & 31;
00069             F op;
00070 
00071             if ( lane >=  1) ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
00072             if ( lane >=  2) ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
00073             if ( lane >=  4) ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
00074             if ( lane >=  8) ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
00075             if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
00076 
00077             if( Kind == INCLUSIVE )
00078                 return ptr [idx];
00079             else
00080                 return (lane > 0) ? ptr [idx - 1] : 0;
00081         }
00082 
00083         __device__ __forceinline__ unsigned int index(const unsigned int tid)
00084         {
00085             return tid;
00086         }
00087 
00088         __device__ __forceinline__ void init(volatile T *ptr){}
00089 
00090         static const int warp_offset      = 0;
00091 
00092         typedef WarpScan<INCLUSIVE, T, F>  merge;
00093     };
00094 
00095     template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
00096     {
00097         __device__ __forceinline__ WarpScanNoComp() {}
00098         __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { (void)other; }
00099 
00100         __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
00101         {
00102             const unsigned int lane = threadIdx.x & 31;
00103             F op;
00104 
00105             ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
00106             ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
00107             ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
00108             ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
00109             ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
00110 
00111             if( Kind == INCLUSIVE )
00112                 return ptr [idx];
00113             else
00114                 return (lane > 0) ? ptr [idx - 1] : 0;
00115         }
00116 
00117         __device__ __forceinline__ unsigned int index(const unsigned int tid)
00118         {
00119             return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
00120         }
00121 
00122         __device__ __forceinline__ void init(volatile T *ptr)
00123         {
00124             ptr[threadIdx.x] = 0;
00125         }
00126 
00127         static const int warp_smem_stride = 32 + 16 + 1;
00128         static const int warp_offset      = 16;
00129         static const int warp_log         = 5;
00130         static const int warp_mask        = 31;
00131 
00132         typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
00133     };
00134 
00135     template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
00136     {
00137         __device__ __forceinline__ BlockScan() {}
00138         __device__ __forceinline__ BlockScan(const BlockScan& other) { (void)other; }
00139 
00140         __device__ __forceinline__ T operator()(volatile T *ptr)
00141         {
00142             const unsigned int tid  = threadIdx.x;
00143             const unsigned int lane = tid & warp_mask;
00144             const unsigned int warp = tid >> warp_log;
00145 
00146             Sc scan;
00147             typename Sc::merge merge_scan;
00148             const unsigned int idx = scan.index(tid);
00149 
00150             T val = scan(ptr, idx);
00151             __syncthreads ();
00152 
00153             if( warp == 0)
00154                 scan.init(ptr);
00155             __syncthreads ();
00156 
00157             if( lane == 31 )
00158                 ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
00159             __syncthreads ();
00160 
00161             if( warp == 0 )
00162                 merge_scan(ptr, idx);
00163             __syncthreads();
00164 
00165             if ( warp > 0)
00166                 val = ptr [scan.warp_offset + warp - 1] + val;
00167             __syncthreads ();
00168 
00169             ptr[idx] = val;
00170             __syncthreads ();
00171 
00172             return val ;
00173         }
00174 
00175         static const int warp_log  = 5;
00176         static const int warp_mask = 31;
00177     };
00178 
00179     template <typename T>
00180     __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
00181     {
00182     #if __CUDA_ARCH__ >= 300
00183         const unsigned int laneId = cv::cuda::device::Warp::laneId();
00184 
00185         // scan on shuffl functions
00186         #pragma unroll
00187         for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
00188         {
00189             const T n = cv::cuda::device::shfl_up(idata, i);
00190             if (laneId >= i)
00191                   idata += n;
00192         }
00193 
00194         return idata;
00195     #else
00196         unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
00197         s_Data[pos] = 0;
00198         pos += OPENCV_CUDA_WARP_SIZE;
00199         s_Data[pos] = idata;
00200 
00201         s_Data[pos] += s_Data[pos - 1];
00202         s_Data[pos] += s_Data[pos - 2];
00203         s_Data[pos] += s_Data[pos - 4];
00204         s_Data[pos] += s_Data[pos - 8];
00205         s_Data[pos] += s_Data[pos - 16];
00206 
00207         return s_Data[pos];
00208     #endif
00209     }
00210 
00211     template <typename T>
00212     __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
00213     {
00214         return warpScanInclusive(idata, s_Data, tid) - idata;
00215     }
00216 
00217     template <int tiNumScanThreads, typename T>
00218     __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
00219     {
00220         if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
00221         {
00222             //Bottom-level inclusive warp scan
00223             T warpResult = warpScanInclusive(idata, s_Data, tid);
00224 
00225             //Save top elements of each warp for exclusive warp scan
00226             //sync to wait for warp scans to complete (because s_Data is being overwritten)
00227             __syncthreads();
00228             if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
00229             {
00230                 s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
00231             }
00232 
00233             //wait for warp scans to complete
00234             __syncthreads();
00235 
00236             if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
00237             {
00238                 //grab top warp elements
00239                 T val = s_Data[tid];
00240                 //calculate exclusive scan and write back to shared memory
00241                 s_Data[tid] = warpScanExclusive(val, s_Data, tid);
00242             }
00243 
00244             //return updated warp scans with exclusive scan results
00245             __syncthreads();
00246 
00247             return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
00248         }
00249         else
00250         {
00251             return warpScanInclusive(idata, s_Data, tid);
00252         }
00253     }
00254 }}}
00255 
00256 //! @endcond
00257 
00258 #endif // __OPENCV_CUDA_SCAN_HPP__
00259
Repository toolbox

Repository details

Type:	Program
Created:	26 Jul 2017
Imports:	3
Forks:	0
Commits:	168
Dependents:	0
Dependencies:	0
Followers:	9
Important changes to repositories hosted on mbed.com

scan.hpp

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

scan.hpp

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning