Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers split.cpp Source File

split.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
00016 // Third party copyrights are property of their respective owners.
00017 //
00018 // Redistribution and use in source and binary forms, with or without modification,
00019 // are permitted provided that the following conditions are met:
00020 //
00021 //   * Redistribution's of source code must retain the above copyright notice,
00022 //     this list of conditions and the following disclaimer.
00023 //
00024 //   * Redistribution's in binary form must reproduce the above copyright notice,
00025 //     this list of conditions and the following disclaimer in the documentation
00026 //     and/or other materials provided with the distribution.
00027 //
00028 //   * The name of the copyright holders may not be used to endorse or promote products
00029 //     derived from this software without specific prior written permission.
00030 //
00031 // This software is provided by the copyright holders and contributors "as is" and
00032 // any express or implied warranties, including, but not limited to, the implied
00033 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00034 // In no event shall the Intel Corporation or contributors be liable for any direct,
00035 // indirect, incidental, special, exemplary, or consequential damages
00036 // (including, but not limited to, procurement of substitute goods or services;
00037 // loss of use, data, or profits; or business interruption) however caused
00038 // and on any theory of liability, whether in contract, strict liability,
00039 // or tort (including negligence or otherwise) arising in any way out of
00040 // the use of this software, even if advised of the possibility of such damage.
00041 //
00042 //M*/
00043 
00044 #include "precomp.hpp"
00045 
00046 namespace cv { namespace hal {
00047 
00048 #if CV_NEON
00049 template<typename T> struct VSplit2;
00050 template<typename T> struct VSplit3;
00051 template<typename T> struct VSplit4;
00052 
00053 #define SPLIT2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func)  \
00054     template<>                                                                    \
00055     struct name<data_type>                                                        \
00056     {                                                                             \
00057         void operator()(const data_type* src, data_type* dst0,                    \
00058                         data_type* dst1) const                                    \
00059         {                                                                         \
00060             reg_type r = load_func(src);                                          \
00061             store_func(dst0, r.val[0]);                                           \
00062             store_func(dst1, r.val[1]);                                           \
00063         }                                                                         \
00064     }
00065 
00066 #define SPLIT3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func)  \
00067     template<>                                                                    \
00068     struct name<data_type>                                                        \
00069     {                                                                             \
00070         void operator()(const data_type* src, data_type* dst0, data_type* dst1,   \
00071                         data_type* dst2) const                                    \
00072         {                                                                         \
00073             reg_type r = load_func(src);                                          \
00074             store_func(dst0, r.val[0]);                                           \
00075             store_func(dst1, r.val[1]);                                           \
00076             store_func(dst2, r.val[2]);                                           \
00077         }                                                                         \
00078     }
00079 
00080 #define SPLIT4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func)  \
00081     template<>                                                                    \
00082     struct name<data_type>                                                        \
00083     {                                                                             \
00084         void operator()(const data_type* src, data_type* dst0, data_type* dst1,   \
00085                         data_type* dst2, data_type* dst3) const                   \
00086         {                                                                         \
00087             reg_type r = load_func(src);                                          \
00088             store_func(dst0, r.val[0]);                                           \
00089             store_func(dst1, r.val[1]);                                           \
00090             store_func(dst2, r.val[2]);                                           \
00091             store_func(dst3, r.val[3]);                                           \
00092         }                                                                         \
00093     }
00094 
00095 SPLIT2_KERNEL_TEMPLATE(VSplit2, uchar ,  uint8x16x2_t, vld2q_u8 , vst1q_u8 );
00096 SPLIT2_KERNEL_TEMPLATE(VSplit2, ushort,  uint16x8x2_t, vld2q_u16, vst1q_u16);
00097 SPLIT2_KERNEL_TEMPLATE(VSplit2, int   ,   int32x4x2_t, vld2q_s32, vst1q_s32);
00098 SPLIT2_KERNEL_TEMPLATE(VSplit2, int64 ,   int64x1x2_t, vld2_s64 , vst1_s64 );
00099 
00100 SPLIT3_KERNEL_TEMPLATE(VSplit3, uchar ,  uint8x16x3_t, vld3q_u8 , vst1q_u8 );
00101 SPLIT3_KERNEL_TEMPLATE(VSplit3, ushort,  uint16x8x3_t, vld3q_u16, vst1q_u16);
00102 SPLIT3_KERNEL_TEMPLATE(VSplit3, int   ,   int32x4x3_t, vld3q_s32, vst1q_s32);
00103 SPLIT3_KERNEL_TEMPLATE(VSplit3, int64 ,   int64x1x3_t, vld3_s64 , vst1_s64 );
00104 
00105 SPLIT4_KERNEL_TEMPLATE(VSplit4, uchar ,  uint8x16x4_t, vld4q_u8 , vst1q_u8 );
00106 SPLIT4_KERNEL_TEMPLATE(VSplit4, ushort,  uint16x8x4_t, vld4q_u16, vst1q_u16);
00107 SPLIT4_KERNEL_TEMPLATE(VSplit4, int   ,   int32x4x4_t, vld4q_s32, vst1q_s32);
00108 SPLIT4_KERNEL_TEMPLATE(VSplit4, int64 ,   int64x1x4_t, vld4_s64 , vst1_s64 );
00109 
00110 #elif CV_SSE2
00111 
00112 template <typename T>
00113 struct VSplit2
00114 {
00115     VSplit2() : support(false) { }
00116     void operator()(const T *, T *, T *) const { }
00117 
00118     bool support;
00119 };
00120 
00121 template <typename T>
00122 struct VSplit3
00123 {
00124     VSplit3() : support(false) { }
00125     void operator()(const T *, T *, T *, T *) const { }
00126 
00127     bool support;
00128 };
00129 
00130 template <typename T>
00131 struct VSplit4
00132 {
00133     VSplit4() : support(false) { }
00134     void operator()(const T *, T *, T *, T *, T *) const { }
00135 
00136     bool support;
00137 };
00138 
00139 #define SPLIT2_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor)   \
00140 template <>                                                                                \
00141 struct VSplit2<data_type>                                                                  \
00142 {                                                                                          \
00143     enum                                                                                   \
00144     {                                                                                      \
00145         ELEMS_IN_VEC = 16 / sizeof(data_type)                                              \
00146     };                                                                                     \
00147                                                                                            \
00148     VSplit2()                                                                              \
00149     {                                                                                      \
00150         support = checkHardwareSupport(CV_CPU_SSE2);                                       \
00151     }                                                                                      \
00152                                                                                            \
00153     void operator()(const data_type * src,                                                 \
00154                     data_type * dst0, data_type * dst1) const                              \
00155     {                                                                                      \
00156         reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src));                    \
00157         reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC));     \
00158         reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
00159         reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
00160                                                                                            \
00161         _mm_deinterleave(v_src0, v_src1, v_src2, v_src3);                                  \
00162                                                                                            \
00163         _mm_storeu_##flavor((cast_type *)(dst0), v_src0);                                  \
00164         _mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1);                   \
00165         _mm_storeu_##flavor((cast_type *)(dst1), v_src2);                                  \
00166         _mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3);                   \
00167     }                                                                                      \
00168                                                                                            \
00169     bool support;                                                                          \
00170 }
00171 
00172 #define SPLIT3_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor)   \
00173 template <>                                                                                \
00174 struct VSplit3<data_type>                                                                  \
00175 {                                                                                          \
00176     enum                                                                                   \
00177     {                                                                                      \
00178         ELEMS_IN_VEC = 16 / sizeof(data_type)                                              \
00179     };                                                                                     \
00180                                                                                            \
00181     VSplit3()                                                                              \
00182     {                                                                                      \
00183         support = checkHardwareSupport(CV_CPU_SSE2);                                       \
00184     }                                                                                      \
00185                                                                                            \
00186     void operator()(const data_type * src,                                                 \
00187                     data_type * dst0, data_type * dst1, data_type * dst2) const            \
00188     {                                                                                      \
00189         reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src));                    \
00190         reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC));     \
00191         reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
00192         reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
00193         reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \
00194         reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \
00195                                                                                            \
00196         _mm_deinterleave(v_src0, v_src1, v_src2,                                           \
00197                          v_src3, v_src4, v_src5);                                          \
00198                                                                                            \
00199         _mm_storeu_##flavor((cast_type *)(dst0), v_src0);                                  \
00200         _mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1);                   \
00201         _mm_storeu_##flavor((cast_type *)(dst1), v_src2);                                  \
00202         _mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3);                   \
00203         _mm_storeu_##flavor((cast_type *)(dst2), v_src4);                                  \
00204         _mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5);                   \
00205     }                                                                                      \
00206                                                                                            \
00207     bool support;                                                                          \
00208 }
00209 
00210 #define SPLIT4_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor)   \
00211 template <>                                                                                \
00212 struct VSplit4<data_type>                                                                  \
00213 {                                                                                          \
00214     enum                                                                                   \
00215     {                                                                                      \
00216         ELEMS_IN_VEC = 16 / sizeof(data_type)                                              \
00217     };                                                                                     \
00218                                                                                            \
00219     VSplit4()                                                                              \
00220     {                                                                                      \
00221         support = checkHardwareSupport(CV_CPU_SSE2);                                       \
00222     }                                                                                      \
00223                                                                                            \
00224     void operator()(const data_type * src, data_type * dst0, data_type * dst1,             \
00225                     data_type * dst2, data_type * dst3) const                              \
00226     {                                                                                      \
00227         reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src));                    \
00228         reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC));     \
00229         reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
00230         reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
00231         reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \
00232         reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \
00233         reg_type v_src6 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 6)); \
00234         reg_type v_src7 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 7)); \
00235                                                                                            \
00236         _mm_deinterleave(v_src0, v_src1, v_src2, v_src3,                                   \
00237                          v_src4, v_src5, v_src6, v_src7);                                  \
00238                                                                                            \
00239         _mm_storeu_##flavor((cast_type *)(dst0), v_src0);                                  \
00240         _mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1);                   \
00241         _mm_storeu_##flavor((cast_type *)(dst1), v_src2);                                  \
00242         _mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3);                   \
00243         _mm_storeu_##flavor((cast_type *)(dst2), v_src4);                                  \
00244         _mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5);                   \
00245         _mm_storeu_##flavor((cast_type *)(dst3), v_src6);                                  \
00246         _mm_storeu_##flavor((cast_type *)(dst3 + ELEMS_IN_VEC), v_src7);                   \
00247     }                                                                                      \
00248                                                                                            \
00249     bool support;                                                                          \
00250 }
00251 
00252 SPLIT2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
00253 SPLIT2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
00254 SPLIT2_KERNEL_TEMPLATE(   int,  __m128,   float, _mm_deinterleave_ps, ps);
00255 
00256 SPLIT3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
00257 SPLIT3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
00258 SPLIT3_KERNEL_TEMPLATE(   int,  __m128,   float, _mm_deinterleave_ps, ps);
00259 
00260 SPLIT4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
00261 SPLIT4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
00262 SPLIT4_KERNEL_TEMPLATE(   int,  __m128,   float, _mm_deinterleave_ps, ps);
00263 
00264 #endif
00265 
00266 template<typename T> static void
00267 split_( const T* src, T** dst, int len, int cn )
00268 {
00269     int k = cn % 4 ? cn % 4 : 4;
00270     int i, j;
00271     if( k == 1 )
00272     {
00273         T* dst0 = dst[0];
00274 
00275         if(cn == 1)
00276         {
00277             memcpy(dst0, src, len * sizeof(T));
00278         }
00279         else
00280         {
00281             for( i = 0, j = 0 ; i < len; i++, j += cn )
00282                 dst0[i] = src[j];
00283         }
00284     }
00285     else if( k == 2 )
00286     {
00287         T *dst0 = dst[0], *dst1 = dst[1];
00288         i = j = 0;
00289 
00290 #if CV_NEON
00291         if(cn == 2)
00292         {
00293             int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
00294             int inc_j = 2 * inc_i;
00295 
00296             VSplit2<T> vsplit;
00297             for( ; i < len - inc_i; i += inc_i, j += inc_j)
00298                 vsplit(src + j, dst0 + i, dst1 + i);
00299         }
00300 #elif CV_SSE2
00301         if (cn == 2)
00302         {
00303             int inc_i = 32/sizeof(T);
00304             int inc_j = 2 * inc_i;
00305 
00306             VSplit2<T> vsplit;
00307             if (vsplit.support)
00308             {
00309                 for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00310                     vsplit(src + j, dst0 + i, dst1 + i);
00311             }
00312         }
00313 #endif
00314         for( ; i < len; i++, j += cn )
00315         {
00316             dst0[i] = src[j];
00317             dst1[i] = src[j+1];
00318         }
00319     }
00320     else if( k == 3 )
00321     {
00322         T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2];
00323         i = j = 0;
00324 
00325 #if CV_NEON
00326         if(cn == 3)
00327         {
00328             int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
00329             int inc_j = 3 * inc_i;
00330 
00331             VSplit3<T> vsplit;
00332             for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00333                 vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
00334         }
00335 #elif CV_SSE2
00336         if (cn == 3)
00337         {
00338             int inc_i = 32/sizeof(T);
00339             int inc_j = 3 * inc_i;
00340 
00341             VSplit3<T> vsplit;
00342 
00343             if (vsplit.support)
00344             {
00345                 for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00346                     vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
00347             }
00348         }
00349 #endif
00350         for( ; i < len; i++, j += cn )
00351         {
00352             dst0[i] = src[j];
00353             dst1[i] = src[j+1];
00354             dst2[i] = src[j+2];
00355         }
00356     }
00357     else
00358     {
00359         T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2], *dst3 = dst[3];
00360         i = j = 0;
00361 
00362 #if CV_NEON
00363         if(cn == 4)
00364         {
00365             int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
00366             int inc_j = 4 * inc_i;
00367 
00368             VSplit4<T> vsplit;
00369             for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00370                 vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
00371         }
00372 #elif CV_SSE2
00373         if (cn == 4)
00374         {
00375             int inc_i = 32/sizeof(T);
00376             int inc_j = 4 * inc_i;
00377 
00378             VSplit4<T> vsplit;
00379             if (vsplit.support)
00380             {
00381                 for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00382                     vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
00383             }
00384         }
00385 #endif
00386         for( ; i < len; i++, j += cn )
00387         {
00388             dst0[i] = src[j]; dst1[i] = src[j+1];
00389             dst2[i] = src[j+2]; dst3[i] = src[j+3];
00390         }
00391     }
00392 
00393     for( ; k < cn; k += 4 )
00394     {
00395         T *dst0 = dst[k], *dst1 = dst[k+1], *dst2 = dst[k+2], *dst3 = dst[k+3];
00396         for( i = 0, j = k; i < len; i++, j += cn )
00397         {
00398             dst0[i] = src[j]; dst1[i] = src[j+1];
00399             dst2[i] = src[j+2]; dst3[i] = src[j+3];
00400         }
00401     }
00402 }
00403 
00404 void split8u(const uchar* src, uchar** dst, int len, int cn )
00405 {
00406     CALL_HAL(split8u, cv_hal_split8u, src,dst, len, cn)
00407     split_(src, dst, len, cn);
00408 }
00409 
00410 void split16u(const ushort* src, ushort** dst, int len, int cn )
00411 {
00412     CALL_HAL(split16u, cv_hal_split16u, src,dst, len, cn)
00413     split_(src, dst, len, cn);
00414 }
00415 
00416 void split32s(const int* src, int** dst, int len, int cn )
00417 {
00418     CALL_HAL(split32s, cv_hal_split32s, src,dst, len, cn)
00419     split_(src, dst, len, cn);
00420 }
00421 
00422 void split64s(const int64* src, int64** dst, int len, int cn )
00423 {
00424     CALL_HAL(split64s, cv_hal_split64s, src,dst, len, cn)
00425     split_(src, dst, len, cn);
00426 }
00427 
00428 }}
00429