Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
block.hpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Third party copyrights are property of their respective owners. 00016 // 00017 // Redistribution and use in source and binary forms, with or without modification, 00018 // are permitted provided that the following conditions are met: 00019 // 00020 // * Redistribution's of source code must retain the above copyright notice, 00021 // this list of conditions and the following disclaimer. 00022 // 00023 // * Redistribution's in binary form must reproduce the above copyright notice, 00024 // this list of conditions and the following disclaimer in the documentation 00025 // and/or other materials provided with the distribution. 00026 // 00027 // * The name of the copyright holders may not be used to endorse or promote products 00028 // derived from this software without specific prior written permission. 00029 // 00030 // This software is provided by the copyright holders and contributors "as is" and 00031 // any express or implied warranties, including, but not limited to, the implied 00032 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00033 // In no event shall the Intel Corporation or contributors be liable for any direct, 00034 // indirect, incidental, special, exemplary, or consequential damages 00035 // (including, but not limited to, procurement of substitute goods or services; 00036 // loss of use, data, or profits; or business interruption) however caused 00037 // and on any theory of liability, whether in contract, strict liability, 00038 // or tort (including negligence or otherwise) arising in any way out of 00039 // the use of this software, even if advised of the possibility of such damage. 00040 // 00041 //M*/ 00042 00043 #ifndef __OPENCV_CUDA_DEVICE_BLOCK_HPP__ 00044 #define __OPENCV_CUDA_DEVICE_BLOCK_HPP__ 00045 00046 /** @file 00047 * @deprecated Use @ref cudev instead. 00048 */ 00049 00050 //! @cond IGNORED 00051 00052 namespace cv { namespace cuda { namespace device 00053 { 00054 struct Block 00055 { 00056 static __device__ __forceinline__ unsigned int id() 00057 { 00058 return blockIdx.x; 00059 } 00060 00061 static __device__ __forceinline__ unsigned int stride() 00062 { 00063 return blockDim.x * blockDim.y * blockDim.z; 00064 } 00065 00066 static __device__ __forceinline__ void sync() 00067 { 00068 __syncthreads(); 00069 } 00070 00071 static __device__ __forceinline__ int flattenedThreadId() 00072 { 00073 return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; 00074 } 00075 00076 template<typename It, typename T> 00077 static __device__ __forceinline__ void fill(It beg, It end, const T& value) 00078 { 00079 int STRIDE = stride(); 00080 It t = beg + flattenedThreadId(); 00081 00082 for(; t < end; t += STRIDE) 00083 *t = value; 00084 } 00085 00086 template<typename OutIt, typename T> 00087 static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value) 00088 { 00089 int STRIDE = stride(); 00090 int tid = flattenedThreadId(); 00091 value += tid; 00092 00093 for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE) 00094 *t = value; 00095 } 00096 00097 template<typename InIt, typename OutIt> 00098 static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out) 00099 { 00100 int STRIDE = stride(); 00101 InIt t = beg + flattenedThreadId(); 00102 OutIt o = out + (t - beg); 00103 00104 for(; t < end; t += STRIDE, o += STRIDE) 00105 *o = *t; 00106 } 00107 00108 template<typename InIt, typename OutIt, class UnOp> 00109 static __device__ __forceinline__ void transfrom(InIt beg, InIt end, OutIt out, UnOp op) 00110 { 00111 int STRIDE = stride(); 00112 InIt t = beg + flattenedThreadId(); 00113 OutIt o = out + (t - beg); 00114 00115 for(; t < end; t += STRIDE, o += STRIDE) 00116 *o = op(*t); 00117 } 00118 00119 template<typename InIt1, typename InIt2, typename OutIt, class BinOp> 00120 static __device__ __forceinline__ void transfrom(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op) 00121 { 00122 int STRIDE = stride(); 00123 InIt1 t1 = beg1 + flattenedThreadId(); 00124 InIt2 t2 = beg2 + flattenedThreadId(); 00125 OutIt o = out + (t1 - beg1); 00126 00127 for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE) 00128 *o = op(*t1, *t2); 00129 } 00130 00131 template<int CTA_SIZE, typename T, class BinOp> 00132 static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op) 00133 { 00134 int tid = flattenedThreadId(); 00135 T val = buffer[tid]; 00136 00137 if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } 00138 if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } 00139 if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } 00140 if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); } 00141 00142 if (tid < 32) 00143 { 00144 if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); } 00145 if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); } 00146 if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); } 00147 if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); } 00148 if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); } 00149 if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); } 00150 } 00151 } 00152 00153 template<int CTA_SIZE, typename T, class BinOp> 00154 static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op) 00155 { 00156 int tid = flattenedThreadId(); 00157 T val = buffer[tid] = init; 00158 __syncthreads(); 00159 00160 if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); } 00161 if (CTA_SIZE >= 512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); } 00162 if (CTA_SIZE >= 256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); } 00163 if (CTA_SIZE >= 128) { if (tid < 64) buffer[tid] = val = op(val, buffer[tid + 64]); __syncthreads(); } 00164 00165 if (tid < 32) 00166 { 00167 if (CTA_SIZE >= 64) { buffer[tid] = val = op(val, buffer[tid + 32]); } 00168 if (CTA_SIZE >= 32) { buffer[tid] = val = op(val, buffer[tid + 16]); } 00169 if (CTA_SIZE >= 16) { buffer[tid] = val = op(val, buffer[tid + 8]); } 00170 if (CTA_SIZE >= 8) { buffer[tid] = val = op(val, buffer[tid + 4]); } 00171 if (CTA_SIZE >= 4) { buffer[tid] = val = op(val, buffer[tid + 2]); } 00172 if (CTA_SIZE >= 2) { buffer[tid] = val = op(val, buffer[tid + 1]); } 00173 } 00174 __syncthreads(); 00175 return buffer[0]; 00176 } 00177 00178 template <typename T, class BinOp> 00179 static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op) 00180 { 00181 int ftid = flattenedThreadId(); 00182 int sft = stride(); 00183 00184 if (sft < n) 00185 { 00186 for (unsigned int i = sft + ftid; i < n; i += sft) 00187 data[ftid] = op(data[ftid], data[i]); 00188 00189 __syncthreads(); 00190 00191 n = sft; 00192 } 00193 00194 while (n > 1) 00195 { 00196 unsigned int half = n/2; 00197 00198 if (ftid < half) 00199 data[ftid] = op(data[ftid], data[n - ftid - 1]); 00200 00201 __syncthreads(); 00202 00203 n = n - half; 00204 } 00205 } 00206 }; 00207 }}} 00208 00209 //! @endcond 00210 00211 #endif /* __OPENCV_CUDA_DEVICE_BLOCK_HPP__ */ 00212
Generated on Tue Jul 12 2022 14:46:02 by
1.7.2
