Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers reduce_key_val.hpp Source File

reduce_key_val.hpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #ifndef __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
00044 #define __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
00045 
00046 #include <thrust/tuple.h>
00047 #include "../warp.hpp"
00048 #include "../warp_shuffle.hpp"
00049 
00050 //! @cond IGNORED
00051 
00052 namespace cv { namespace cuda { namespace device
00053 {
00054     namespace reduce_key_val_detail
00055     {
00056         template <typename T> struct GetType;
00057         template <typename T> struct GetType<T*>
00058         {
00059             typedef T type;
00060         };
00061         template <typename T> struct GetType<volatile T*>
00062         {
00063             typedef T type;
00064         };
00065         template <typename T> struct GetType<T&>
00066         {
00067             typedef T type;
00068         };
00069 
00070         template <unsigned int I, unsigned int N>
00071         struct For
00072         {
00073             template <class PointerTuple, class ReferenceTuple>
00074             static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
00075             {
00076                 thrust::get<I>(smem)[tid] = thrust::get<I>(data);
00077 
00078                 For<I + 1, N>::loadToSmem(smem, data, tid);
00079             }
00080             template <class PointerTuple, class ReferenceTuple>
00081             static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
00082             {
00083                 thrust::get<I>(data) = thrust::get<I>(smem)[tid];
00084 
00085                 For<I + 1, N>::loadFromSmem(smem, data, tid);
00086             }
00087 
00088             template <class ReferenceTuple>
00089             static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
00090             {
00091                 thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
00092 
00093                 For<I + 1, N>::copyShfl(val, delta, width);
00094             }
00095             template <class PointerTuple, class ReferenceTuple>
00096             static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
00097             {
00098                 thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
00099 
00100                 For<I + 1, N>::copy(svals, val, tid, delta);
00101             }
00102 
00103             template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
00104             static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
00105             {
00106                 typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);
00107 
00108                 if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
00109                 {
00110                     thrust::get<I>(key) = reg;
00111                     thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
00112                 }
00113 
00114                 For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
00115             }
00116             template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
00117             static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
00118                                          const ValPointerTuple& svals, const ValReferenceTuple& val,
00119                                          const CmpTuple& cmp,
00120                                          unsigned int tid, unsigned int delta)
00121             {
00122                 typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];
00123 
00124                 if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
00125                 {
00126                     thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
00127                     thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
00128                 }
00129 
00130                 For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
00131             }
00132         };
00133         template <unsigned int N>
00134         struct For<N, N>
00135         {
00136             template <class PointerTuple, class ReferenceTuple>
00137             static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
00138             {
00139             }
00140             template <class PointerTuple, class ReferenceTuple>
00141             static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
00142             {
00143             }
00144 
00145             template <class ReferenceTuple>
00146             static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
00147             {
00148             }
00149             template <class PointerTuple, class ReferenceTuple>
00150             static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
00151             {
00152             }
00153 
00154             template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
00155             static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
00156             {
00157             }
00158             template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
00159             static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
00160                                          const ValPointerTuple&, const ValReferenceTuple&,
00161                                          const CmpTuple&,
00162                                          unsigned int, unsigned int)
00163             {
00164             }
00165         };
00166 
00167         //////////////////////////////////////////////////////
00168         // loadToSmem
00169 
00170         template <typename T>
00171         __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
00172         {
00173             smem[tid] = data;
00174         }
00175         template <typename T>
00176         __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
00177         {
00178             data = smem[tid];
00179         }
00180         template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
00181                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
00182         __device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
00183                                                    const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
00184                                                    unsigned int tid)
00185         {
00186             For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
00187         }
00188         template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
00189                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
00190         __device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
00191                                                      const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
00192                                                      unsigned int tid)
00193         {
00194             For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
00195         }
00196 
00197         //////////////////////////////////////////////////////
00198         // copyVals
00199 
00200         template <typename V>
00201         __device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
00202         {
00203             val = shfl_down(val, delta, width);
00204         }
00205         template <typename V>
00206         __device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
00207         {
00208             svals[tid] = val = svals[tid + delta];
00209         }
00210         template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
00211         __device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
00212                                                      unsigned int delta,
00213                                                      int width)
00214         {
00215             For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
00216         }
00217         template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
00218                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
00219         __device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
00220                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
00221                                                  unsigned int tid, unsigned int delta)
00222         {
00223             For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
00224         }
00225 
00226         //////////////////////////////////////////////////////
00227         // merge
00228 
00229         template <typename K, typename V, class Cmp>
00230         __device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
00231         {
00232             K reg = shfl_down(key, delta, width);
00233 
00234             if (cmp(reg, key))
00235             {
00236                 key = reg;
00237                 copyValsShfl(val, delta, width);
00238             }
00239         }
00240         template <typename K, typename V, class Cmp>
00241         __device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
00242         {
00243             K reg = skeys[tid + delta];
00244 
00245             if (cmp(reg, key))
00246             {
00247                 skeys[tid] = key = reg;
00248                 copyVals(svals, val, tid, delta);
00249             }
00250         }
00251         template <typename K,
00252                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
00253                   class Cmp>
00254         __device__ __forceinline__ void mergeShfl(K& key,
00255                                                   const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
00256                                                   const Cmp& cmp,
00257                                                   unsigned int delta, int width)
00258         {
00259             K reg = shfl_down(key, delta, width);
00260 
00261             if (cmp(reg, key))
00262             {
00263                 key = reg;
00264                 copyValsShfl(val, delta, width);
00265             }
00266         }
00267         template <typename K,
00268                   typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
00269                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
00270                   class Cmp>
00271         __device__ __forceinline__ void merge(volatile K* skeys, K& key,
00272                                               const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
00273                                               const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
00274                                               const Cmp& cmp, unsigned int tid, unsigned int delta)
00275         {
00276             K reg = skeys[tid + delta];
00277 
00278             if (cmp(reg, key))
00279             {
00280                 skeys[tid] = key = reg;
00281                 copyVals(svals, val, tid, delta);
00282             }
00283         }
00284         template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
00285                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
00286                   class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
00287         __device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
00288                                                   const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
00289                                                   const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
00290                                                   unsigned int delta, int width)
00291         {
00292             For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
00293         }
00294         template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
00295                   typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
00296                   typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
00297                   typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
00298                   class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
00299         __device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
00300                                               const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
00301                                               const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
00302                                               const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
00303                                               const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
00304                                               unsigned int tid, unsigned int delta)
00305         {
00306             For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
00307         }
00308 
00309         //////////////////////////////////////////////////////
00310         // Generic
00311 
00312         template <unsigned int N> struct Generic
00313         {
00314             template <class KP, class KR, class VP, class VR, class Cmp>
00315             static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
00316             {
00317                 loadToSmem(skeys, key, tid);
00318                 loadValsToSmem(svals, val, tid);
00319                 if (N >= 32)
00320                     __syncthreads();
00321 
00322                 if (N >= 2048)
00323                 {
00324                     if (tid < 1024)
00325                         merge(skeys, key, svals, val, cmp, tid, 1024);
00326 
00327                     __syncthreads();
00328                 }
00329                 if (N >= 1024)
00330                 {
00331                     if (tid < 512)
00332                         merge(skeys, key, svals, val, cmp, tid, 512);
00333 
00334                     __syncthreads();
00335                 }
00336                 if (N >= 512)
00337                 {
00338                     if (tid < 256)
00339                         merge(skeys, key, svals, val, cmp, tid, 256);
00340 
00341                     __syncthreads();
00342                 }
00343                 if (N >= 256)
00344                 {
00345                     if (tid < 128)
00346                         merge(skeys, key, svals, val, cmp, tid, 128);
00347 
00348                     __syncthreads();
00349                 }
00350                 if (N >= 128)
00351                 {
00352                     if (tid < 64)
00353                         merge(skeys, key, svals, val, cmp, tid, 64);
00354 
00355                     __syncthreads();
00356                 }
00357                 if (N >= 64)
00358                 {
00359                     if (tid < 32)
00360                         merge(skeys, key, svals, val, cmp, tid, 32);
00361                 }
00362 
00363                 if (tid < 16)
00364                 {
00365                     merge(skeys, key, svals, val, cmp, tid, 16);
00366                     merge(skeys, key, svals, val, cmp, tid, 8);
00367                     merge(skeys, key, svals, val, cmp, tid, 4);
00368                     merge(skeys, key, svals, val, cmp, tid, 2);
00369                     merge(skeys, key, svals, val, cmp, tid, 1);
00370                 }
00371             }
00372         };
00373 
00374         template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
00375         struct Unroll
00376         {
00377             static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
00378             {
00379                 mergeShfl(key, val, cmp, I, N);
00380                 Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
00381             }
00382             static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
00383             {
00384                 merge(skeys, key, svals, val, cmp, tid, I);
00385                 Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
00386             }
00387         };
00388         template <class KP, class KR, class VP, class VR, class Cmp>
00389         struct Unroll<0, KP, KR, VP, VR, Cmp>
00390         {
00391             static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
00392             {
00393             }
00394             static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
00395             {
00396             }
00397         };
00398 
00399         template <unsigned int N> struct WarpOptimized
00400         {
00401             template <class KP, class KR, class VP, class VR, class Cmp>
00402             static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
00403             {
00404             #if 0 // __CUDA_ARCH__ >= 300
00405                 (void) skeys;
00406                 (void) svals;
00407                 (void) tid;
00408 
00409                 Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
00410             #else
00411                 loadToSmem(skeys, key, tid);
00412                 loadToSmem(svals, val, tid);
00413 
00414                 if (tid < N / 2)
00415                     Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
00416             #endif
00417             }
00418         };
00419 
00420         template <unsigned int N> struct GenericOptimized32
00421         {
00422             enum { M = N / 32 };
00423 
00424             template <class KP, class KR, class VP, class VR, class Cmp>
00425             static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
00426             {
00427                 const unsigned int laneId = Warp::laneId();
00428 
00429             #if 0 // __CUDA_ARCH__ >= 300
00430                 Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);
00431 
00432                 if (laneId == 0)
00433                 {
00434                     loadToSmem(skeys, key, tid / 32);
00435                     loadToSmem(svals, val, tid / 32);
00436                 }
00437             #else
00438                 loadToSmem(skeys, key, tid);
00439                 loadToSmem(svals, val, tid);
00440 
00441                 if (laneId < 16)
00442                     Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
00443 
00444                 __syncthreads();
00445 
00446                 if (laneId == 0)
00447                 {
00448                     loadToSmem(skeys, key, tid / 32);
00449                     loadToSmem(svals, val, tid / 32);
00450                 }
00451             #endif
00452 
00453                 __syncthreads();
00454 
00455                 loadFromSmem(skeys, key, tid);
00456 
00457                 if (tid < 32)
00458                 {
00459                 #if 0 // __CUDA_ARCH__ >= 300
00460                     loadFromSmem(svals, val, tid);
00461 
00462                     Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
00463                 #else
00464                     Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
00465                 #endif
00466                 }
00467             }
00468         };
00469 
00470         template <bool val, class T1, class T2> struct StaticIf;
00471         template <class T1, class T2> struct StaticIf<true, T1, T2>
00472         {
00473             typedef T1 type;
00474         };
00475         template <class T1, class T2> struct StaticIf<false, T1, T2>
00476         {
00477             typedef T2 type;
00478         };
00479 
00480         template <unsigned int N> struct IsPowerOf2
00481         {
00482             enum { value = ((N != 0) && !(N & (N - 1))) };
00483         };
00484 
00485         template <unsigned int N> struct Dispatcher
00486         {
00487             typedef typename StaticIf<
00488                 (N <= 32) && IsPowerOf2<N>::value,
00489                 WarpOptimized<N>,
00490                 typename StaticIf<
00491                     (N <= 1024) && IsPowerOf2<N>::value,
00492                     GenericOptimized32<N>,
00493                     Generic<N>
00494                 >::type
00495             >::type reductor;
00496         };
00497     }
00498 }}}
00499 
00500 //! @endcond
00501 
00502 #endif // __OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP__
00503