Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers emulation.hpp Source File

emulation.hpp

Go to the documentation of this file.
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #ifndef OPENCV_CUDA_EMULATION_HPP_
00044 #define OPENCV_CUDA_EMULATION_HPP_
00045 
00046 #include "common.hpp "
00047 #include "warp_reduce.hpp "
00048 
00049 /** @file
00050  * @deprecated Use @ref cudev instead.
00051  */
00052 
00053 //! @cond IGNORED
00054 
00055 namespace cv { namespace cuda { namespace device
00056 {
00057     struct Emulation
00058     {
00059 
00060         static __device__ __forceinline__ int syncthreadsOr(int pred)
00061         {
00062 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
00063                 // just campilation stab
00064                 return 0;
00065 #else
00066                 return __syncthreads_or(pred);
00067 #endif
00068         }
00069 
00070         template<int CTA_SIZE>
00071         static __forceinline__ __device__ int Ballot(int predicate)
00072         {
00073 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
00074             return __ballot(predicate);
00075 #else
00076             __shared__ volatile int cta_buffer[CTA_SIZE];
00077 
00078             int tid = threadIdx.x;
00079             cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
00080             return warp_reduce(cta_buffer);
00081 #endif
00082         }
00083 
00084         struct smem
00085         {
00086             enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
00087 
00088             template<typename T>
00089             static __device__ __forceinline__ T atomicInc(T* address, T val)
00090             {
00091 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
00092                 T count;
00093                 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
00094                 do
00095                 {
00096                     count = *address & TAG_MASK;
00097                     count = tag | (count + 1);
00098                     *address = count;
00099                 } while (*address != count);
00100 
00101                 return (count & TAG_MASK) - 1;
00102 #else
00103                 return ::atomicInc(address, val);
00104 #endif
00105             }
00106 
00107             template<typename T>
00108             static __device__ __forceinline__ T atomicAdd(T* address, T val)
00109             {
00110 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
00111                 T count;
00112                 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
00113                 do
00114                 {
00115                     count = *address & TAG_MASK;
00116                     count = tag | (count + val);
00117                     *address = count;
00118                 } while (*address != count);
00119 
00120                 return (count & TAG_MASK) - val;
00121 #else
00122                 return ::atomicAdd(address, val);
00123 #endif
00124             }
00125 
00126             template<typename T>
00127             static __device__ __forceinline__ T atomicMin(T* address, T val)
00128             {
00129 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
00130                 T count = ::min(*address, val);
00131                 do
00132                 {
00133                     *address = count;
00134                 } while (*address > count);
00135 
00136                 return count;
00137 #else
00138                 return ::atomicMin(address, val);
00139 #endif
00140             }
00141         }; // struct cmem
00142 
00143         struct glob
00144         {
00145             static __device__ __forceinline__ int atomicAdd(int* address, int val)
00146             {
00147                 return ::atomicAdd(address, val);
00148             }
00149             static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
00150             {
00151                 return ::atomicAdd(address, val);
00152             }
00153             static __device__ __forceinline__ float atomicAdd(float* address, float val)
00154             {
00155             #if __CUDA_ARCH__ >= 200
00156                 return ::atomicAdd(address, val);
00157             #else
00158                 int* address_as_i = (int*) address;
00159                 int old = *address_as_i, assumed;
00160                 do {
00161                     assumed = old;
00162                     old = ::atomicCAS(address_as_i, assumed,
00163                         __float_as_int(val + __int_as_float(assumed)));
00164                 } while (assumed != old);
00165                 return __int_as_float(old);
00166             #endif
00167             }
00168             static __device__ __forceinline__ double atomicAdd(double* address, double val)
00169             {
00170             #if __CUDA_ARCH__ >= 130
00171                 unsigned long long int* address_as_ull = (unsigned long long int*) address;
00172                 unsigned long long int old = *address_as_ull, assumed;
00173                 do {
00174                     assumed = old;
00175                     old = ::atomicCAS(address_as_ull, assumed,
00176                         __double_as_longlong(val + __longlong_as_double(assumed)));
00177                 } while (assumed != old);
00178                 return __longlong_as_double(old);
00179             #else
00180                 (void) address;
00181                 (void) val;
00182                 return 0.0;
00183             #endif
00184             }
00185 
00186             static __device__ __forceinline__ int atomicMin(int* address, int val)
00187             {
00188                 return ::atomicMin(address, val);
00189             }
00190             static __device__ __forceinline__ float atomicMin(float* address, float val)
00191             {
00192             #if __CUDA_ARCH__ >= 120
00193                 int* address_as_i = (int*) address;
00194                 int old = *address_as_i, assumed;
00195                 do {
00196                     assumed = old;
00197                     old = ::atomicCAS(address_as_i, assumed,
00198                         __float_as_int(::fminf(val, __int_as_float(assumed))));
00199                 } while (assumed != old);
00200                 return __int_as_float(old);
00201             #else
00202                 (void) address;
00203                 (void) val;
00204                 return 0.0f;
00205             #endif
00206             }
00207             static __device__ __forceinline__ double atomicMin(double* address, double val)
00208             {
00209             #if __CUDA_ARCH__ >= 130
00210                 unsigned long long int* address_as_ull = (unsigned long long int*) address;
00211                 unsigned long long int old = *address_as_ull, assumed;
00212                 do {
00213                     assumed = old;
00214                     old = ::atomicCAS(address_as_ull, assumed,
00215                         __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
00216                 } while (assumed != old);
00217                 return __longlong_as_double(old);
00218             #else
00219                 (void) address;
00220                 (void) val;
00221                 return 0.0;
00222             #endif
00223             }
00224 
00225             static __device__ __forceinline__ int atomicMax(int* address, int val)
00226             {
00227                 return ::atomicMax(address, val);
00228             }
00229             static __device__ __forceinline__ float atomicMax(float* address, float val)
00230             {
00231             #if __CUDA_ARCH__ >= 120
00232                 int* address_as_i = (int*) address;
00233                 int old = *address_as_i, assumed;
00234                 do {
00235                     assumed = old;
00236                     old = ::atomicCAS(address_as_i, assumed,
00237                         __float_as_int(::fmaxf(val, __int_as_float(assumed))));
00238                 } while (assumed != old);
00239                 return __int_as_float(old);
00240             #else
00241                 (void) address;
00242                 (void) val;
00243                 return 0.0f;
00244             #endif
00245             }
00246             static __device__ __forceinline__ double atomicMax(double* address, double val)
00247             {
00248             #if __CUDA_ARCH__ >= 130
00249                 unsigned long long int* address_as_ull = (unsigned long long int*) address;
00250                 unsigned long long int old = *address_as_ull, assumed;
00251                 do {
00252                     assumed = old;
00253                     old = ::atomicCAS(address_as_ull, assumed,
00254                         __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
00255                 } while (assumed != old);
00256                 return __longlong_as_double(old);
00257             #else
00258                 (void) address;
00259                 (void) val;
00260                 return 0.0;
00261             #endif
00262             }
00263         };
00264     }; //struct Emulation
00265 }}} // namespace cv { namespace cuda { namespace cudev
00266 
00267 //! @endcond
00268 
00269 #endif /* OPENCV_CUDA_EMULATION_HPP_ */
00270