Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers transform_detail.hpp Source File

transform_detail.hpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #ifndef __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
00044 #define __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
00045 
00046 #include "../common.hpp"
00047 #include "../vec_traits.hpp"
00048 #include "../functional.hpp"
00049 
00050 //! @cond IGNORED
00051 
00052 namespace cv { namespace cuda { namespace device
00053 {
00054     namespace transform_detail
00055     {
00056         //! Read Write Traits
00057 
00058         template <typename T, typename D, int shift> struct UnaryReadWriteTraits
00059         {
00060             typedef typename TypeVec<T, shift>::vec_type read_type;
00061             typedef typename TypeVec<D, shift>::vec_type write_type;
00062         };
00063 
00064         template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
00065         {
00066             typedef typename TypeVec<T1, shift>::vec_type read_type1;
00067             typedef typename TypeVec<T2, shift>::vec_type read_type2;
00068             typedef typename TypeVec<D, shift>::vec_type write_type;
00069         };
00070 
00071         //! Transform kernels
00072 
00073         template <int shift> struct OpUnroller;
00074         template <> struct OpUnroller<1>
00075         {
00076             template <typename T, typename D, typename UnOp, typename Mask>
00077             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
00078             {
00079                 if (mask(y, x_shifted))
00080                     dst.x = op(src.x);
00081             }
00082 
00083             template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
00084             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
00085             {
00086                 if (mask(y, x_shifted))
00087                     dst.x = op(src1.x, src2.x);
00088             }
00089         };
00090         template <> struct OpUnroller<2>
00091         {
00092             template <typename T, typename D, typename UnOp, typename Mask>
00093             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
00094             {
00095                 if (mask(y, x_shifted))
00096                     dst.x = op(src.x);
00097                 if (mask(y, x_shifted + 1))
00098                     dst.y = op(src.y);
00099             }
00100 
00101             template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
00102             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
00103             {
00104                 if (mask(y, x_shifted))
00105                     dst.x = op(src1.x, src2.x);
00106                 if (mask(y, x_shifted + 1))
00107                     dst.y = op(src1.y, src2.y);
00108             }
00109         };
00110         template <> struct OpUnroller<3>
00111         {
00112             template <typename T, typename D, typename UnOp, typename Mask>
00113             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
00114             {
00115                 if (mask(y, x_shifted))
00116                     dst.x = op(src.x);
00117                 if (mask(y, x_shifted + 1))
00118                     dst.y = op(src.y);
00119                 if (mask(y, x_shifted + 2))
00120                     dst.z = op(src.z);
00121             }
00122 
00123             template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
00124             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
00125             {
00126                 if (mask(y, x_shifted))
00127                     dst.x = op(src1.x, src2.x);
00128                 if (mask(y, x_shifted + 1))
00129                     dst.y = op(src1.y, src2.y);
00130                 if (mask(y, x_shifted + 2))
00131                     dst.z = op(src1.z, src2.z);
00132             }
00133         };
00134         template <> struct OpUnroller<4>
00135         {
00136             template <typename T, typename D, typename UnOp, typename Mask>
00137             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
00138             {
00139                 if (mask(y, x_shifted))
00140                     dst.x = op(src.x);
00141                 if (mask(y, x_shifted + 1))
00142                     dst.y = op(src.y);
00143                 if (mask(y, x_shifted + 2))
00144                     dst.z = op(src.z);
00145                 if (mask(y, x_shifted + 3))
00146                     dst.w = op(src.w);
00147             }
00148 
00149             template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
00150             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
00151             {
00152                 if (mask(y, x_shifted))
00153                     dst.x = op(src1.x, src2.x);
00154                 if (mask(y, x_shifted + 1))
00155                     dst.y = op(src1.y, src2.y);
00156                 if (mask(y, x_shifted + 2))
00157                     dst.z = op(src1.z, src2.z);
00158                 if (mask(y, x_shifted + 3))
00159                     dst.w = op(src1.w, src2.w);
00160             }
00161         };
00162         template <> struct OpUnroller<8>
00163         {
00164             template <typename T, typename D, typename UnOp, typename Mask>
00165             static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
00166             {
00167                 if (mask(y, x_shifted))
00168                     dst.a0 = op(src.a0);
00169                 if (mask(y, x_shifted + 1))
00170                     dst.a1 = op(src.a1);
00171                 if (mask(y, x_shifted + 2))
00172                     dst.a2 = op(src.a2);
00173                 if (mask(y, x_shifted + 3))
00174                     dst.a3 = op(src.a3);
00175                 if (mask(y, x_shifted + 4))
00176                     dst.a4 = op(src.a4);
00177                 if (mask(y, x_shifted + 5))
00178                     dst.a5 = op(src.a5);
00179                 if (mask(y, x_shifted + 6))
00180                     dst.a6 = op(src.a6);
00181                 if (mask(y, x_shifted + 7))
00182                     dst.a7 = op(src.a7);
00183             }
00184 
00185             template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
00186             static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
00187             {
00188                 if (mask(y, x_shifted))
00189                     dst.a0 = op(src1.a0, src2.a0);
00190                 if (mask(y, x_shifted + 1))
00191                     dst.a1 = op(src1.a1, src2.a1);
00192                 if (mask(y, x_shifted + 2))
00193                     dst.a2 = op(src1.a2, src2.a2);
00194                 if (mask(y, x_shifted + 3))
00195                     dst.a3 = op(src1.a3, src2.a3);
00196                 if (mask(y, x_shifted + 4))
00197                     dst.a4 = op(src1.a4, src2.a4);
00198                 if (mask(y, x_shifted + 5))
00199                     dst.a5 = op(src1.a5, src2.a5);
00200                 if (mask(y, x_shifted + 6))
00201                     dst.a6 = op(src1.a6, src2.a6);
00202                 if (mask(y, x_shifted + 7))
00203                     dst.a7 = op(src1.a7, src2.a7);
00204             }
00205         };
00206 
00207         template <typename T, typename D, typename UnOp, typename Mask>
00208         static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
00209         {
00210             typedef TransformFunctorTraits<UnOp> ft;
00211             typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
00212             typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
00213 
00214             const int x = threadIdx.x + blockIdx.x * blockDim.x;
00215             const int y = threadIdx.y + blockIdx.y * blockDim.y;
00216             const int x_shifted = x * ft::smart_shift;
00217 
00218             if (y < src_.rows)
00219             {
00220                 const T* src = src_.ptr(y);
00221                 D* dst = dst_.ptr(y);
00222 
00223                 if (x_shifted + ft::smart_shift - 1 < src_.cols)
00224                 {
00225                     const read_type src_n_el = ((const read_type*)src)[x];
00226                     write_type dst_n_el = ((const write_type*)dst)[x];
00227 
00228                     OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
00229 
00230                     ((write_type*)dst)[x] = dst_n_el;
00231                 }
00232                 else
00233                 {
00234                     for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
00235                     {
00236                         if (mask(y, real_x))
00237                             dst[real_x] = op(src[real_x]);
00238                     }
00239                 }
00240             }
00241         }
00242 
00243         template <typename T, typename D, typename UnOp, typename Mask>
00244         __global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
00245         {
00246             const int x = blockDim.x * blockIdx.x + threadIdx.x;
00247             const int y = blockDim.y * blockIdx.y + threadIdx.y;
00248 
00249             if (x < src.cols && y < src.rows && mask(y, x))
00250             {
00251                 dst.ptr(y)[x] = op(src.ptr(y)[x]);
00252             }
00253         }
00254 
00255         template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
00256         static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
00257             const Mask mask, const BinOp op)
00258         {
00259             typedef TransformFunctorTraits<BinOp> ft;
00260             typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
00261             typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
00262             typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
00263 
00264             const int x = threadIdx.x + blockIdx.x * blockDim.x;
00265             const int y = threadIdx.y + blockIdx.y * blockDim.y;
00266             const int x_shifted = x * ft::smart_shift;
00267 
00268             if (y < src1_.rows)
00269             {
00270                 const T1* src1 = src1_.ptr(y);
00271                 const T2* src2 = src2_.ptr(y);
00272                 D* dst = dst_.ptr(y);
00273 
00274                 if (x_shifted + ft::smart_shift - 1 < src1_.cols)
00275                 {
00276                     const read_type1 src1_n_el = ((const read_type1*)src1)[x];
00277                     const read_type2 src2_n_el = ((const read_type2*)src2)[x];
00278                     write_type dst_n_el = ((const write_type*)dst)[x];
00279 
00280                     OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
00281 
00282                     ((write_type*)dst)[x] = dst_n_el;
00283                 }
00284                 else
00285                 {
00286                     for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
00287                     {
00288                         if (mask(y, real_x))
00289                             dst[real_x] = op(src1[real_x], src2[real_x]);
00290                     }
00291                 }
00292             }
00293         }
00294 
00295         template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
00296         static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
00297             const Mask mask, const BinOp op)
00298         {
00299             const int x = blockDim.x * blockIdx.x + threadIdx.x;
00300             const int y = blockDim.y * blockIdx.y + threadIdx.y;
00301 
00302             if (x < src1.cols && y < src1.rows && mask(y, x))
00303             {
00304                 const T1 src1_data = src1.ptr(y)[x];
00305                 const T2 src2_data = src2.ptr(y)[x];
00306                 dst.ptr(y)[x] = op(src1_data, src2_data);
00307             }
00308         }
00309 
00310         template <bool UseSmart> struct TransformDispatcher;
00311         template<> struct TransformDispatcher<false>
00312         {
00313             template <typename T, typename D, typename UnOp, typename Mask>
00314             static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
00315             {
00316                 typedef TransformFunctorTraits<UnOp> ft;
00317 
00318                 const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
00319                 const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
00320 
00321                 transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
00322                 cudaSafeCall( cudaGetLastError() );
00323 
00324                 if (stream == 0)
00325                     cudaSafeCall( cudaDeviceSynchronize() );
00326             }
00327 
00328             template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
00329             static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
00330             {
00331                 typedef TransformFunctorTraits<BinOp> ft;
00332 
00333                 const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
00334                 const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
00335 
00336                 transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
00337                 cudaSafeCall( cudaGetLastError() );
00338 
00339                 if (stream == 0)
00340                     cudaSafeCall( cudaDeviceSynchronize() );
00341             }
00342         };
00343         template<> struct TransformDispatcher<true>
00344         {
00345             template <typename T, typename D, typename UnOp, typename Mask>
00346             static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
00347             {
00348                 typedef TransformFunctorTraits<UnOp> ft;
00349 
00350                 CV_StaticAssert(ft::smart_shift != 1, "");
00351 
00352                 if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
00353                     !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
00354                 {
00355                     TransformDispatcher<false>::call(src, dst, op, mask, stream);
00356                     return;
00357                 }
00358 
00359                 const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
00360                 const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
00361 
00362                 transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
00363                 cudaSafeCall( cudaGetLastError() );
00364 
00365                 if (stream == 0)
00366                     cudaSafeCall( cudaDeviceSynchronize() );
00367             }
00368 
00369             template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
00370             static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
00371             {
00372                 typedef TransformFunctorTraits<BinOp> ft;
00373 
00374                 CV_StaticAssert(ft::smart_shift != 1, "");
00375 
00376                 if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
00377                     !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
00378                     !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
00379                 {
00380                     TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
00381                     return;
00382                 }
00383 
00384                 const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
00385                 const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
00386 
00387                 transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
00388                 cudaSafeCall( cudaGetLastError() );
00389 
00390                 if (stream == 0)
00391                     cudaSafeCall( cudaDeviceSynchronize() );
00392             }
00393         };
00394     } // namespace transform_detail
00395 }}} // namespace cv { namespace cuda { namespace cudev
00396 
00397 //! @endcond
00398 
00399 #endif // __OPENCV_CUDA_TRANSFORM_DETAIL_HPP__
00400