gr-peach-opencv-project-sd-card_update

Renesas GR-PEACH OpenCV Development » Code » Documentation
Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update
Fork of gr-peach-opencv-project-sd-card by the do
Embed: (wiki syntax)
Show/hide line numbers split.cpp Source File
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
00016 // Third party copyrights are property of their respective owners.
00017 //
00018 // Redistribution and use in source and binary forms, with or without modification,
00019 // are permitted provided that the following conditions are met:
00020 //
00021 //   * Redistribution's of source code must retain the above copyright notice,
00022 //     this list of conditions and the following disclaimer.
00023 //
00024 //   * Redistribution's in binary form must reproduce the above copyright notice,
00025 //     this list of conditions and the following disclaimer in the documentation
00026 //     and/or other materials provided with the distribution.
00027 //
00028 //   * The name of the copyright holders may not be used to endorse or promote products
00029 //     derived from this software without specific prior written permission.
00030 //
00031 // This software is provided by the copyright holders and contributors "as is" and
00032 // any express or implied warranties, including, but not limited to, the implied
00033 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00034 // In no event shall the Intel Corporation or contributors be liable for any direct,
00035 // indirect, incidental, special, exemplary, or consequential damages
00036 // (including, but not limited to, procurement of substitute goods or services;
00037 // loss of use, data, or profits; or business interruption) however caused
00038 // and on any theory of liability, whether in contract, strict liability,
00039 // or tort (including negligence or otherwise) arising in any way out of
00040 // the use of this software, even if advised of the possibility of such damage.
00041 //
00042 //M*/
00043 
00044 #include "precomp.hpp"
00045 
00046 namespace cv { namespace hal {
00047 
00048 #if CV_NEON
00049 template<typename T> struct VSplit2;
00050 template<typename T> struct VSplit3;
00051 template<typename T> struct VSplit4;
00052 
00053 #define SPLIT2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func)  \
00054     template<>                                                                    \
00055     struct name<data_type>                                                        \
00056     {                                                                             \
00057         void operator()(const data_type* src, data_type* dst0,                    \
00058                         data_type* dst1) const                                    \
00059         {                                                                         \
00060             reg_type r = load_func(src);                                          \
00061             store_func(dst0, r.val[0]);                                           \
00062             store_func(dst1, r.val[1]);                                           \
00063         }                                                                         \
00064     }
00065 
00066 #define SPLIT3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func)  \
00067     template<>                                                                    \
00068     struct name<data_type>                                                        \
00069     {                                                                             \
00070         void operator()(const data_type* src, data_type* dst0, data_type* dst1,   \
00071                         data_type* dst2) const                                    \
00072         {                                                                         \
00073             reg_type r = load_func(src);                                          \
00074             store_func(dst0, r.val[0]);                                           \
00075             store_func(dst1, r.val[1]);                                           \
00076             store_func(dst2, r.val[2]);                                           \
00077         }                                                                         \
00078     }
00079 
00080 #define SPLIT4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func)  \
00081     template<>                                                                    \
00082     struct name<data_type>                                                        \
00083     {                                                                             \
00084         void operator()(const data_type* src, data_type* dst0, data_type* dst1,   \
00085                         data_type* dst2, data_type* dst3) const                   \
00086         {                                                                         \
00087             reg_type r = load_func(src);                                          \
00088             store_func(dst0, r.val[0]);                                           \
00089             store_func(dst1, r.val[1]);                                           \
00090             store_func(dst2, r.val[2]);                                           \
00091             store_func(dst3, r.val[3]);                                           \
00092         }                                                                         \
00093     }
00094 
00095 SPLIT2_KERNEL_TEMPLATE(VSplit2, uchar ,  uint8x16x2_t, vld2q_u8 , vst1q_u8 );
00096 SPLIT2_KERNEL_TEMPLATE(VSplit2, ushort,  uint16x8x2_t, vld2q_u16, vst1q_u16);
00097 SPLIT2_KERNEL_TEMPLATE(VSplit2, int   ,   int32x4x2_t, vld2q_s32, vst1q_s32);
00098 SPLIT2_KERNEL_TEMPLATE(VSplit2, int64 ,   int64x1x2_t, vld2_s64 , vst1_s64 );
00099 
00100 SPLIT3_KERNEL_TEMPLATE(VSplit3, uchar ,  uint8x16x3_t, vld3q_u8 , vst1q_u8 );
00101 SPLIT3_KERNEL_TEMPLATE(VSplit3, ushort,  uint16x8x3_t, vld3q_u16, vst1q_u16);
00102 SPLIT3_KERNEL_TEMPLATE(VSplit3, int   ,   int32x4x3_t, vld3q_s32, vst1q_s32);
00103 SPLIT3_KERNEL_TEMPLATE(VSplit3, int64 ,   int64x1x3_t, vld3_s64 , vst1_s64 );
00104 
00105 SPLIT4_KERNEL_TEMPLATE(VSplit4, uchar ,  uint8x16x4_t, vld4q_u8 , vst1q_u8 );
00106 SPLIT4_KERNEL_TEMPLATE(VSplit4, ushort,  uint16x8x4_t, vld4q_u16, vst1q_u16);
00107 SPLIT4_KERNEL_TEMPLATE(VSplit4, int   ,   int32x4x4_t, vld4q_s32, vst1q_s32);
00108 SPLIT4_KERNEL_TEMPLATE(VSplit4, int64 ,   int64x1x4_t, vld4_s64 , vst1_s64 );
00109 
00110 #elif CV_SSE2
00111 
00112 template <typename T>
00113 struct VSplit2
00114 {
00115     VSplit2() : support(false) { }
00116     void operator()(const T *, T *, T *) const { }
00117 
00118     bool support;
00119 };
00120 
00121 template <typename T>
00122 struct VSplit3
00123 {
00124     VSplit3() : support(false) { }
00125     void operator()(const T *, T *, T *, T *) const { }
00126 
00127     bool support;
00128 };
00129 
00130 template <typename T>
00131 struct VSplit4
00132 {
00133     VSplit4() : support(false) { }
00134     void operator()(const T *, T *, T *, T *, T *) const { }
00135 
00136     bool support;
00137 };
00138 
00139 #define SPLIT2_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor)   \
00140 template <>                                                                                \
00141 struct VSplit2<data_type>                                                                  \
00142 {                                                                                          \
00143     enum                                                                                   \
00144     {                                                                                      \
00145         ELEMS_IN_VEC = 16 / sizeof(data_type)                                              \
00146     };                                                                                     \
00147                                                                                            \
00148     VSplit2()                                                                              \
00149     {                                                                                      \
00150         support = checkHardwareSupport(CV_CPU_SSE2);                                       \
00151     }                                                                                      \
00152                                                                                            \
00153     void operator()(const data_type * src,                                                 \
00154                     data_type * dst0, data_type * dst1) const                              \
00155     {                                                                                      \
00156         reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src));                    \
00157         reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC));     \
00158         reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
00159         reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
00160                                                                                            \
00161         _mm_deinterleave(v_src0, v_src1, v_src2, v_src3);                                  \
00162                                                                                            \
00163         _mm_storeu_##flavor((cast_type *)(dst0), v_src0);                                  \
00164         _mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1);                   \
00165         _mm_storeu_##flavor((cast_type *)(dst1), v_src2);                                  \
00166         _mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3);                   \
00167     }                                                                                      \
00168                                                                                            \
00169     bool support;                                                                          \
00170 }
00171 
00172 #define SPLIT3_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor)   \
00173 template <>                                                                                \
00174 struct VSplit3<data_type>                                                                  \
00175 {                                                                                          \
00176     enum                                                                                   \
00177     {                                                                                      \
00178         ELEMS_IN_VEC = 16 / sizeof(data_type)                                              \
00179     };                                                                                     \
00180                                                                                            \
00181     VSplit3()                                                                              \
00182     {                                                                                      \
00183         support = checkHardwareSupport(CV_CPU_SSE2);                                       \
00184     }                                                                                      \
00185                                                                                            \
00186     void operator()(const data_type * src,                                                 \
00187                     data_type * dst0, data_type * dst1, data_type * dst2) const            \
00188     {                                                                                      \
00189         reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src));                    \
00190         reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC));     \
00191         reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
00192         reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
00193         reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \
00194         reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \
00195                                                                                            \
00196         _mm_deinterleave(v_src0, v_src1, v_src2,                                           \
00197                          v_src3, v_src4, v_src5);                                          \
00198                                                                                            \
00199         _mm_storeu_##flavor((cast_type *)(dst0), v_src0);                                  \
00200         _mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1);                   \
00201         _mm_storeu_##flavor((cast_type *)(dst1), v_src2);                                  \
00202         _mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3);                   \
00203         _mm_storeu_##flavor((cast_type *)(dst2), v_src4);                                  \
00204         _mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5);                   \
00205     }                                                                                      \
00206                                                                                            \
00207     bool support;                                                                          \
00208 }
00209 
00210 #define SPLIT4_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor)   \
00211 template <>                                                                                \
00212 struct VSplit4<data_type>                                                                  \
00213 {                                                                                          \
00214     enum                                                                                   \
00215     {                                                                                      \
00216         ELEMS_IN_VEC = 16 / sizeof(data_type)                                              \
00217     };                                                                                     \
00218                                                                                            \
00219     VSplit4()                                                                              \
00220     {                                                                                      \
00221         support = checkHardwareSupport(CV_CPU_SSE2);                                       \
00222     }                                                                                      \
00223                                                                                            \
00224     void operator()(const data_type * src, data_type * dst0, data_type * dst1,             \
00225                     data_type * dst2, data_type * dst3) const                              \
00226     {                                                                                      \
00227         reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src));                    \
00228         reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC));     \
00229         reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \
00230         reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \
00231         reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \
00232         reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \
00233         reg_type v_src6 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 6)); \
00234         reg_type v_src7 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 7)); \
00235                                                                                            \
00236         _mm_deinterleave(v_src0, v_src1, v_src2, v_src3,                                   \
00237                          v_src4, v_src5, v_src6, v_src7);                                  \
00238                                                                                            \
00239         _mm_storeu_##flavor((cast_type *)(dst0), v_src0);                                  \
00240         _mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1);                   \
00241         _mm_storeu_##flavor((cast_type *)(dst1), v_src2);                                  \
00242         _mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3);                   \
00243         _mm_storeu_##flavor((cast_type *)(dst2), v_src4);                                  \
00244         _mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5);                   \
00245         _mm_storeu_##flavor((cast_type *)(dst3), v_src6);                                  \
00246         _mm_storeu_##flavor((cast_type *)(dst3 + ELEMS_IN_VEC), v_src7);                   \
00247     }                                                                                      \
00248                                                                                            \
00249     bool support;                                                                          \
00250 }
00251 
00252 SPLIT2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
00253 SPLIT2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
00254 SPLIT2_KERNEL_TEMPLATE(   int,  __m128,   float, _mm_deinterleave_ps, ps);
00255 
00256 SPLIT3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
00257 SPLIT3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
00258 SPLIT3_KERNEL_TEMPLATE(   int,  __m128,   float, _mm_deinterleave_ps, ps);
00259 
00260 SPLIT4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128);
00261 SPLIT4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128);
00262 SPLIT4_KERNEL_TEMPLATE(   int,  __m128,   float, _mm_deinterleave_ps, ps);
00263 
00264 #endif
00265 
00266 template<typename T> static void
00267 split_( const T* src, T** dst, int len, int cn )
00268 {
00269     int k = cn % 4 ? cn % 4 : 4;
00270     int i, j;
00271     if( k == 1 )
00272     {
00273         T* dst0 = dst[0];
00274 
00275         if(cn == 1)
00276         {
00277             memcpy(dst0, src, len * sizeof(T));
00278         }
00279         else
00280         {
00281             for( i = 0, j = 0 ; i < len; i++, j += cn )
00282                 dst0[i] = src[j];
00283         }
00284     }
00285     else if( k == 2 )
00286     {
00287         T *dst0 = dst[0], *dst1 = dst[1];
00288         i = j = 0;
00289 
00290 #if CV_NEON
00291         if(cn == 2)
00292         {
00293             int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
00294             int inc_j = 2 * inc_i;
00295 
00296             VSplit2<T> vsplit;
00297             for( ; i < len - inc_i; i += inc_i, j += inc_j)
00298                 vsplit(src + j, dst0 + i, dst1 + i);
00299         }
00300 #elif CV_SSE2
00301         if (cn == 2)
00302         {
00303             int inc_i = 32/sizeof(T);
00304             int inc_j = 2 * inc_i;
00305 
00306             VSplit2<T> vsplit;
00307             if (vsplit.support)
00308             {
00309                 for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00310                     vsplit(src + j, dst0 + i, dst1 + i);
00311             }
00312         }
00313 #endif
00314         for( ; i < len; i++, j += cn )
00315         {
00316             dst0[i] = src[j];
00317             dst1[i] = src[j+1];
00318         }
00319     }
00320     else if( k == 3 )
00321     {
00322         T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2];
00323         i = j = 0;
00324 
00325 #if CV_NEON
00326         if(cn == 3)
00327         {
00328             int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
00329             int inc_j = 3 * inc_i;
00330 
00331             VSplit3<T> vsplit;
00332             for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00333                 vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
00334         }
00335 #elif CV_SSE2
00336         if (cn == 3)
00337         {
00338             int inc_i = 32/sizeof(T);
00339             int inc_j = 3 * inc_i;
00340 
00341             VSplit3<T> vsplit;
00342 
00343             if (vsplit.support)
00344             {
00345                 for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00346                     vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
00347             }
00348         }
00349 #endif
00350         for( ; i < len; i++, j += cn )
00351         {
00352             dst0[i] = src[j];
00353             dst1[i] = src[j+1];
00354             dst2[i] = src[j+2];
00355         }
00356     }
00357     else
00358     {
00359         T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2], *dst3 = dst[3];
00360         i = j = 0;
00361 
00362 #if CV_NEON
00363         if(cn == 4)
00364         {
00365             int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
00366             int inc_j = 4 * inc_i;
00367 
00368             VSplit4<T> vsplit;
00369             for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00370                 vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
00371         }
00372 #elif CV_SSE2
00373         if (cn == 4)
00374         {
00375             int inc_i = 32/sizeof(T);
00376             int inc_j = 4 * inc_i;
00377 
00378             VSplit4<T> vsplit;
00379             if (vsplit.support)
00380             {
00381                 for( ; i <= len - inc_i; i += inc_i, j += inc_j)
00382                     vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
00383             }
00384         }
00385 #endif
00386         for( ; i < len; i++, j += cn )
00387         {
00388             dst0[i] = src[j]; dst1[i] = src[j+1];
00389             dst2[i] = src[j+2]; dst3[i] = src[j+3];
00390         }
00391     }
00392 
00393     for( ; k < cn; k += 4 )
00394     {
00395         T *dst0 = dst[k], *dst1 = dst[k+1], *dst2 = dst[k+2], *dst3 = dst[k+3];
00396         for( i = 0, j = k; i < len; i++, j += cn )
00397         {
00398             dst0[i] = src[j]; dst1[i] = src[j+1];
00399             dst2[i] = src[j+2]; dst3[i] = src[j+3];
00400         }
00401     }
00402 }
00403 
00404 void split8u(const uchar* src, uchar** dst, int len, int cn )
00405 {
00406     CALL_HAL(split8u, cv_hal_split8u, src,dst, len, cn)
00407     split_(src, dst, len, cn);
00408 }
00409 
00410 void split16u(const ushort* src, ushort** dst, int len, int cn )
00411 {
00412     CALL_HAL(split16u, cv_hal_split16u, src,dst, len, cn)
00413     split_(src, dst, len, cn);
00414 }
00415 
00416 void split32s(const int* src, int** dst, int len, int cn )
00417 {
00418     CALL_HAL(split32s, cv_hal_split32s, src,dst, len, cn)
00419     split_(src, dst, len, cn);
00420 }
00421 
00422 void split64s(const int64* src, int64** dst, int len, int cn )
00423 {
00424     CALL_HAL(split64s, cv_hal_split64s, src,dst, len, cn)
00425     split_(src, dst, len, cn);
00426 }
00427 
00428 }}
00429
Repository toolbox

Repository details

Type:	Program
Created:	26 Jul 2017
Imports:	3
Forks:	0
Commits:	168
Dependents:	0
Dependencies:	0
Followers:	9
Important changes to repositories hosted on mbed.com

split.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

split.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning