Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
split.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #include "precomp.hpp" 00045 00046 namespace cv { namespace hal { 00047 00048 #if CV_NEON 00049 template<typename T> struct VSplit2; 00050 template<typename T> struct VSplit3; 00051 template<typename T> struct VSplit4; 00052 00053 #define SPLIT2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \ 00054 template<> \ 00055 struct name<data_type> \ 00056 { \ 00057 void operator()(const data_type* src, data_type* dst0, \ 00058 data_type* dst1) const \ 00059 { \ 00060 reg_type r = load_func(src); \ 00061 store_func(dst0, r.val[0]); \ 00062 store_func(dst1, r.val[1]); \ 00063 } \ 00064 } 00065 00066 #define SPLIT3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \ 00067 template<> \ 00068 struct name<data_type> \ 00069 { \ 00070 void operator()(const data_type* src, data_type* dst0, data_type* dst1, \ 00071 data_type* dst2) const \ 00072 { \ 00073 reg_type r = load_func(src); \ 00074 store_func(dst0, r.val[0]); \ 00075 store_func(dst1, r.val[1]); \ 00076 store_func(dst2, r.val[2]); \ 00077 } \ 00078 } 00079 00080 #define SPLIT4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \ 00081 template<> \ 00082 struct name<data_type> \ 00083 { \ 00084 void operator()(const data_type* src, data_type* dst0, data_type* dst1, \ 00085 data_type* dst2, data_type* dst3) const \ 00086 { \ 00087 reg_type r = load_func(src); \ 00088 store_func(dst0, r.val[0]); \ 00089 store_func(dst1, r.val[1]); \ 00090 store_func(dst2, r.val[2]); \ 00091 store_func(dst3, r.val[3]); \ 00092 } \ 00093 } 00094 00095 SPLIT2_KERNEL_TEMPLATE(VSplit2, uchar , uint8x16x2_t, vld2q_u8 , vst1q_u8 ); 00096 SPLIT2_KERNEL_TEMPLATE(VSplit2, ushort, uint16x8x2_t, vld2q_u16, vst1q_u16); 00097 SPLIT2_KERNEL_TEMPLATE(VSplit2, int , int32x4x2_t, vld2q_s32, vst1q_s32); 00098 SPLIT2_KERNEL_TEMPLATE(VSplit2, int64 , int64x1x2_t, vld2_s64 , vst1_s64 ); 00099 00100 SPLIT3_KERNEL_TEMPLATE(VSplit3, uchar , uint8x16x3_t, vld3q_u8 , vst1q_u8 ); 00101 SPLIT3_KERNEL_TEMPLATE(VSplit3, ushort, uint16x8x3_t, vld3q_u16, vst1q_u16); 00102 SPLIT3_KERNEL_TEMPLATE(VSplit3, int , int32x4x3_t, vld3q_s32, vst1q_s32); 00103 SPLIT3_KERNEL_TEMPLATE(VSplit3, int64 , int64x1x3_t, vld3_s64 , vst1_s64 ); 00104 00105 SPLIT4_KERNEL_TEMPLATE(VSplit4, uchar , uint8x16x4_t, vld4q_u8 , vst1q_u8 ); 00106 SPLIT4_KERNEL_TEMPLATE(VSplit4, ushort, uint16x8x4_t, vld4q_u16, vst1q_u16); 00107 SPLIT4_KERNEL_TEMPLATE(VSplit4, int , int32x4x4_t, vld4q_s32, vst1q_s32); 00108 SPLIT4_KERNEL_TEMPLATE(VSplit4, int64 , int64x1x4_t, vld4_s64 , vst1_s64 ); 00109 00110 #elif CV_SSE2 00111 00112 template <typename T> 00113 struct VSplit2 00114 { 00115 VSplit2() : support(false) { } 00116 void operator()(const T *, T *, T *) const { } 00117 00118 bool support; 00119 }; 00120 00121 template <typename T> 00122 struct VSplit3 00123 { 00124 VSplit3() : support(false) { } 00125 void operator()(const T *, T *, T *, T *) const { } 00126 00127 bool support; 00128 }; 00129 00130 template <typename T> 00131 struct VSplit4 00132 { 00133 VSplit4() : support(false) { } 00134 void operator()(const T *, T *, T *, T *, T *) const { } 00135 00136 bool support; 00137 }; 00138 00139 #define SPLIT2_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor) \ 00140 template <> \ 00141 struct VSplit2<data_type> \ 00142 { \ 00143 enum \ 00144 { \ 00145 ELEMS_IN_VEC = 16 / sizeof(data_type) \ 00146 }; \ 00147 \ 00148 VSplit2() \ 00149 { \ 00150 support = checkHardwareSupport(CV_CPU_SSE2); \ 00151 } \ 00152 \ 00153 void operator()(const data_type * src, \ 00154 data_type * dst0, data_type * dst1) const \ 00155 { \ 00156 reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src)); \ 00157 reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC)); \ 00158 reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \ 00159 reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \ 00160 \ 00161 _mm_deinterleave(v_src0, v_src1, v_src2, v_src3); \ 00162 \ 00163 _mm_storeu_##flavor((cast_type *)(dst0), v_src0); \ 00164 _mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1); \ 00165 _mm_storeu_##flavor((cast_type *)(dst1), v_src2); \ 00166 _mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3); \ 00167 } \ 00168 \ 00169 bool support; \ 00170 } 00171 00172 #define SPLIT3_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor) \ 00173 template <> \ 00174 struct VSplit3<data_type> \ 00175 { \ 00176 enum \ 00177 { \ 00178 ELEMS_IN_VEC = 16 / sizeof(data_type) \ 00179 }; \ 00180 \ 00181 VSplit3() \ 00182 { \ 00183 support = checkHardwareSupport(CV_CPU_SSE2); \ 00184 } \ 00185 \ 00186 void operator()(const data_type * src, \ 00187 data_type * dst0, data_type * dst1, data_type * dst2) const \ 00188 { \ 00189 reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src)); \ 00190 reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC)); \ 00191 reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \ 00192 reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \ 00193 reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \ 00194 reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \ 00195 \ 00196 _mm_deinterleave(v_src0, v_src1, v_src2, \ 00197 v_src3, v_src4, v_src5); \ 00198 \ 00199 _mm_storeu_##flavor((cast_type *)(dst0), v_src0); \ 00200 _mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1); \ 00201 _mm_storeu_##flavor((cast_type *)(dst1), v_src2); \ 00202 _mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3); \ 00203 _mm_storeu_##flavor((cast_type *)(dst2), v_src4); \ 00204 _mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5); \ 00205 } \ 00206 \ 00207 bool support; \ 00208 } 00209 00210 #define SPLIT4_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_deinterleave, flavor) \ 00211 template <> \ 00212 struct VSplit4<data_type> \ 00213 { \ 00214 enum \ 00215 { \ 00216 ELEMS_IN_VEC = 16 / sizeof(data_type) \ 00217 }; \ 00218 \ 00219 VSplit4() \ 00220 { \ 00221 support = checkHardwareSupport(CV_CPU_SSE2); \ 00222 } \ 00223 \ 00224 void operator()(const data_type * src, data_type * dst0, data_type * dst1, \ 00225 data_type * dst2, data_type * dst3) const \ 00226 { \ 00227 reg_type v_src0 = _mm_loadu_##flavor((cast_type const *)(src)); \ 00228 reg_type v_src1 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC)); \ 00229 reg_type v_src2 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 2)); \ 00230 reg_type v_src3 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 3)); \ 00231 reg_type v_src4 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 4)); \ 00232 reg_type v_src5 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 5)); \ 00233 reg_type v_src6 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 6)); \ 00234 reg_type v_src7 = _mm_loadu_##flavor((cast_type const *)(src + ELEMS_IN_VEC * 7)); \ 00235 \ 00236 _mm_deinterleave(v_src0, v_src1, v_src2, v_src3, \ 00237 v_src4, v_src5, v_src6, v_src7); \ 00238 \ 00239 _mm_storeu_##flavor((cast_type *)(dst0), v_src0); \ 00240 _mm_storeu_##flavor((cast_type *)(dst0 + ELEMS_IN_VEC), v_src1); \ 00241 _mm_storeu_##flavor((cast_type *)(dst1), v_src2); \ 00242 _mm_storeu_##flavor((cast_type *)(dst1 + ELEMS_IN_VEC), v_src3); \ 00243 _mm_storeu_##flavor((cast_type *)(dst2), v_src4); \ 00244 _mm_storeu_##flavor((cast_type *)(dst2 + ELEMS_IN_VEC), v_src5); \ 00245 _mm_storeu_##flavor((cast_type *)(dst3), v_src6); \ 00246 _mm_storeu_##flavor((cast_type *)(dst3 + ELEMS_IN_VEC), v_src7); \ 00247 } \ 00248 \ 00249 bool support; \ 00250 } 00251 00252 SPLIT2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128); 00253 SPLIT2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128); 00254 SPLIT2_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps); 00255 00256 SPLIT3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128); 00257 SPLIT3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128); 00258 SPLIT3_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps); 00259 00260 SPLIT4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_deinterleave_epi8, si128); 00261 SPLIT4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_deinterleave_epi16, si128); 00262 SPLIT4_KERNEL_TEMPLATE( int, __m128, float, _mm_deinterleave_ps, ps); 00263 00264 #endif 00265 00266 template<typename T> static void 00267 split_( const T* src, T** dst, int len, int cn ) 00268 { 00269 int k = cn % 4 ? cn % 4 : 4; 00270 int i, j; 00271 if( k == 1 ) 00272 { 00273 T* dst0 = dst[0]; 00274 00275 if(cn == 1) 00276 { 00277 memcpy(dst0, src, len * sizeof(T)); 00278 } 00279 else 00280 { 00281 for( i = 0, j = 0 ; i < len; i++, j += cn ) 00282 dst0[i] = src[j]; 00283 } 00284 } 00285 else if( k == 2 ) 00286 { 00287 T *dst0 = dst[0], *dst1 = dst[1]; 00288 i = j = 0; 00289 00290 #if CV_NEON 00291 if(cn == 2) 00292 { 00293 int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T); 00294 int inc_j = 2 * inc_i; 00295 00296 VSplit2<T> vsplit; 00297 for( ; i < len - inc_i; i += inc_i, j += inc_j) 00298 vsplit(src + j, dst0 + i, dst1 + i); 00299 } 00300 #elif CV_SSE2 00301 if (cn == 2) 00302 { 00303 int inc_i = 32/sizeof(T); 00304 int inc_j = 2 * inc_i; 00305 00306 VSplit2<T> vsplit; 00307 if (vsplit.support) 00308 { 00309 for( ; i <= len - inc_i; i += inc_i, j += inc_j) 00310 vsplit(src + j, dst0 + i, dst1 + i); 00311 } 00312 } 00313 #endif 00314 for( ; i < len; i++, j += cn ) 00315 { 00316 dst0[i] = src[j]; 00317 dst1[i] = src[j+1]; 00318 } 00319 } 00320 else if( k == 3 ) 00321 { 00322 T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2]; 00323 i = j = 0; 00324 00325 #if CV_NEON 00326 if(cn == 3) 00327 { 00328 int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T); 00329 int inc_j = 3 * inc_i; 00330 00331 VSplit3<T> vsplit; 00332 for( ; i <= len - inc_i; i += inc_i, j += inc_j) 00333 vsplit(src + j, dst0 + i, dst1 + i, dst2 + i); 00334 } 00335 #elif CV_SSE2 00336 if (cn == 3) 00337 { 00338 int inc_i = 32/sizeof(T); 00339 int inc_j = 3 * inc_i; 00340 00341 VSplit3<T> vsplit; 00342 00343 if (vsplit.support) 00344 { 00345 for( ; i <= len - inc_i; i += inc_i, j += inc_j) 00346 vsplit(src + j, dst0 + i, dst1 + i, dst2 + i); 00347 } 00348 } 00349 #endif 00350 for( ; i < len; i++, j += cn ) 00351 { 00352 dst0[i] = src[j]; 00353 dst1[i] = src[j+1]; 00354 dst2[i] = src[j+2]; 00355 } 00356 } 00357 else 00358 { 00359 T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2], *dst3 = dst[3]; 00360 i = j = 0; 00361 00362 #if CV_NEON 00363 if(cn == 4) 00364 { 00365 int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T); 00366 int inc_j = 4 * inc_i; 00367 00368 VSplit4<T> vsplit; 00369 for( ; i <= len - inc_i; i += inc_i, j += inc_j) 00370 vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i); 00371 } 00372 #elif CV_SSE2 00373 if (cn == 4) 00374 { 00375 int inc_i = 32/sizeof(T); 00376 int inc_j = 4 * inc_i; 00377 00378 VSplit4<T> vsplit; 00379 if (vsplit.support) 00380 { 00381 for( ; i <= len - inc_i; i += inc_i, j += inc_j) 00382 vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i); 00383 } 00384 } 00385 #endif 00386 for( ; i < len; i++, j += cn ) 00387 { 00388 dst0[i] = src[j]; dst1[i] = src[j+1]; 00389 dst2[i] = src[j+2]; dst3[i] = src[j+3]; 00390 } 00391 } 00392 00393 for( ; k < cn; k += 4 ) 00394 { 00395 T *dst0 = dst[k], *dst1 = dst[k+1], *dst2 = dst[k+2], *dst3 = dst[k+3]; 00396 for( i = 0, j = k; i < len; i++, j += cn ) 00397 { 00398 dst0[i] = src[j]; dst1[i] = src[j+1]; 00399 dst2[i] = src[j+2]; dst3[i] = src[j+3]; 00400 } 00401 } 00402 } 00403 00404 void split8u(const uchar* src, uchar** dst, int len, int cn ) 00405 { 00406 CALL_HAL(split8u, cv_hal_split8u, src,dst, len, cn) 00407 split_(src, dst, len, cn); 00408 } 00409 00410 void split16u(const ushort* src, ushort** dst, int len, int cn ) 00411 { 00412 CALL_HAL(split16u, cv_hal_split16u, src,dst, len, cn) 00413 split_(src, dst, len, cn); 00414 } 00415 00416 void split32s(const int* src, int** dst, int len, int cn ) 00417 { 00418 CALL_HAL(split32s, cv_hal_split32s, src,dst, len, cn) 00419 split_(src, dst, len, cn); 00420 } 00421 00422 void split64s(const int64* src, int64** dst, int len, int cn ) 00423 { 00424 CALL_HAL(split64s, cv_hal_split64s, src,dst, len, cn) 00425 split_(src, dst, len, cn); 00426 } 00427 00428 }} 00429
Generated on Tue Jul 12 2022 14:47:37 by
1.7.2
