Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers block.hpp Source File

block.hpp

Go to the documentation of this file.
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #ifndef __OPENCV_CUDA_DEVICE_BLOCK_HPP__
00044 #define __OPENCV_CUDA_DEVICE_BLOCK_HPP__
00045 
00046 /** @file
00047  * @deprecated Use @ref cudev instead.
00048  */
00049 
00050 //! @cond IGNORED
00051 
00052 namespace cv { namespace cuda { namespace device
00053 {
00054     struct Block
00055     {
00056         static __device__ __forceinline__ unsigned int id()
00057         {
00058             return blockIdx.x;
00059         }
00060 
00061         static __device__ __forceinline__ unsigned int stride()
00062         {
00063             return blockDim.x * blockDim.y * blockDim.z;
00064         }
00065 
00066         static __device__ __forceinline__ void sync()
00067         {
00068             __syncthreads();
00069         }
00070 
00071         static __device__ __forceinline__ int flattenedThreadId()
00072         {
00073             return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
00074         }
00075 
00076         template<typename It, typename T>
00077         static __device__ __forceinline__ void fill(It beg, It end, const T& value)
00078         {
00079             int STRIDE = stride();
00080             It t = beg + flattenedThreadId();
00081 
00082             for(; t < end; t += STRIDE)
00083                 *t = value;
00084         }
00085 
00086         template<typename OutIt, typename T>
00087         static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
00088         {
00089             int STRIDE = stride();
00090             int tid = flattenedThreadId();
00091             value += tid;
00092 
00093             for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
00094                 *t = value;
00095         }
00096 
00097         template<typename InIt, typename OutIt>
00098         static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
00099         {
00100             int STRIDE = stride();
00101             InIt  t = beg + flattenedThreadId();
00102             OutIt o = out + (t - beg);
00103 
00104             for(; t < end; t += STRIDE, o += STRIDE)
00105                 *o = *t;
00106         }
00107 
00108         template<typename InIt, typename OutIt, class UnOp>
00109         static __device__ __forceinline__ void transfrom(InIt beg, InIt end, OutIt out, UnOp op)
00110         {
00111             int STRIDE = stride();
00112             InIt  t = beg + flattenedThreadId();
00113             OutIt o = out + (t - beg);
00114 
00115             for(; t < end; t += STRIDE, o += STRIDE)
00116                 *o = op(*t);
00117         }
00118 
00119         template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
00120         static __device__ __forceinline__ void transfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
00121         {
00122             int STRIDE = stride();
00123             InIt1 t1 = beg1 + flattenedThreadId();
00124             InIt2 t2 = beg2 + flattenedThreadId();
00125             OutIt o  = out + (t1 - beg1);
00126 
00127             for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
00128                 *o = op(*t1, *t2);
00129         }
00130 
00131         template<int CTA_SIZE, typename T, class BinOp>
00132         static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
00133         {
00134             int tid = flattenedThreadId();
00135             T val =  buffer[tid];
00136 
00137             if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
00138             if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
00139             if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
00140             if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }
00141 
00142             if (tid < 32)
00143             {
00144                 if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
00145                 if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
00146                 if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
00147                 if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
00148                 if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
00149                 if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
00150             }
00151         }
00152 
00153         template<int CTA_SIZE, typename T, class BinOp>
00154         static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
00155         {
00156             int tid = flattenedThreadId();
00157             T val =  buffer[tid] = init;
00158             __syncthreads();
00159 
00160             if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
00161             if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
00162             if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
00163             if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }
00164 
00165             if (tid < 32)
00166             {
00167                 if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
00168                 if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
00169                 if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
00170                 if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
00171                 if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
00172                 if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
00173             }
00174             __syncthreads();
00175             return buffer[0];
00176         }
00177 
00178         template <typename T, class BinOp>
00179         static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
00180         {
00181             int ftid = flattenedThreadId();
00182             int sft = stride();
00183 
00184             if (sft < n)
00185             {
00186                 for (unsigned int i = sft + ftid; i < n; i += sft)
00187                     data[ftid] = op(data[ftid], data[i]);
00188 
00189                 __syncthreads();
00190 
00191                 n = sft;
00192             }
00193 
00194             while (n > 1)
00195             {
00196                 unsigned int half = n/2;
00197 
00198                 if (ftid < half)
00199                     data[ftid] = op(data[ftid], data[n - ftid - 1]);
00200 
00201                 __syncthreads();
00202 
00203                 n = n - half;
00204             }
00205         }
00206     };
00207 }}}
00208 
00209 //! @endcond
00210 
00211 #endif /* __OPENCV_CUDA_DEVICE_BLOCK_HPP__ */
00212