Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
merge.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #include "precomp.hpp" 00045 00046 namespace cv { namespace hal { 00047 00048 #if CV_NEON 00049 template<typename T> struct VMerge2; 00050 template<typename T> struct VMerge3; 00051 template<typename T> struct VMerge4; 00052 00053 #define MERGE2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \ 00054 template<> \ 00055 struct name<data_type>{ \ 00056 void operator()(const data_type* src0, const data_type* src1, \ 00057 data_type* dst){ \ 00058 reg_type r; \ 00059 r.val[0] = load_func(src0); \ 00060 r.val[1] = load_func(src1); \ 00061 store_func(dst, r); \ 00062 } \ 00063 } 00064 00065 #define MERGE3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \ 00066 template<> \ 00067 struct name<data_type>{ \ 00068 void operator()(const data_type* src0, const data_type* src1, \ 00069 const data_type* src2, data_type* dst){ \ 00070 reg_type r; \ 00071 r.val[0] = load_func(src0); \ 00072 r.val[1] = load_func(src1); \ 00073 r.val[2] = load_func(src2); \ 00074 store_func(dst, r); \ 00075 } \ 00076 } 00077 00078 #define MERGE4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \ 00079 template<> \ 00080 struct name<data_type>{ \ 00081 void operator()(const data_type* src0, const data_type* src1, \ 00082 const data_type* src2, const data_type* src3, \ 00083 data_type* dst){ \ 00084 reg_type r; \ 00085 r.val[0] = load_func(src0); \ 00086 r.val[1] = load_func(src1); \ 00087 r.val[2] = load_func(src2); \ 00088 r.val[3] = load_func(src3); \ 00089 store_func(dst, r); \ 00090 } \ 00091 } 00092 00093 MERGE2_KERNEL_TEMPLATE(VMerge2, uchar , uint8x16x2_t, vld1q_u8 , vst2q_u8 ); 00094 MERGE2_KERNEL_TEMPLATE(VMerge2, ushort, uint16x8x2_t, vld1q_u16, vst2q_u16); 00095 MERGE2_KERNEL_TEMPLATE(VMerge2, int , int32x4x2_t, vld1q_s32, vst2q_s32); 00096 MERGE2_KERNEL_TEMPLATE(VMerge2, int64 , int64x1x2_t, vld1_s64 , vst2_s64 ); 00097 00098 MERGE3_KERNEL_TEMPLATE(VMerge3, uchar , uint8x16x3_t, vld1q_u8 , vst3q_u8 ); 00099 MERGE3_KERNEL_TEMPLATE(VMerge3, ushort, uint16x8x3_t, vld1q_u16, vst3q_u16); 00100 MERGE3_KERNEL_TEMPLATE(VMerge3, int , int32x4x3_t, vld1q_s32, vst3q_s32); 00101 MERGE3_KERNEL_TEMPLATE(VMerge3, int64 , int64x1x3_t, vld1_s64 , vst3_s64 ); 00102 00103 MERGE4_KERNEL_TEMPLATE(VMerge4, uchar , uint8x16x4_t, vld1q_u8 , vst4q_u8 ); 00104 MERGE4_KERNEL_TEMPLATE(VMerge4, ushort, uint16x8x4_t, vld1q_u16, vst4q_u16); 00105 MERGE4_KERNEL_TEMPLATE(VMerge4, int , int32x4x4_t, vld1q_s32, vst4q_s32); 00106 MERGE4_KERNEL_TEMPLATE(VMerge4, int64 , int64x1x4_t, vld1_s64 , vst4_s64 ); 00107 00108 #elif CV_SSE2 00109 00110 template <typename T> 00111 struct VMerge2 00112 { 00113 VMerge2() : support(false) { } 00114 void operator()(const T *, const T *, T *) const { } 00115 00116 bool support; 00117 }; 00118 00119 template <typename T> 00120 struct VMerge3 00121 { 00122 VMerge3() : support(false) { } 00123 void operator()(const T *, const T *, const T *, T *) const { } 00124 00125 bool support; 00126 }; 00127 00128 template <typename T> 00129 struct VMerge4 00130 { 00131 VMerge4() : support(false) { } 00132 void operator()(const T *, const T *, const T *, const T *, T *) const { } 00133 00134 bool support; 00135 }; 00136 00137 #define MERGE2_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_interleave, flavor, se) \ 00138 template <> \ 00139 struct VMerge2<data_type> \ 00140 { \ 00141 enum \ 00142 { \ 00143 ELEMS_IN_VEC = 16 / sizeof(data_type) \ 00144 }; \ 00145 \ 00146 VMerge2() \ 00147 { \ 00148 support = checkHardwareSupport(se); \ 00149 } \ 00150 \ 00151 void operator()(const data_type * src0, const data_type * src1, \ 00152 data_type * dst) const \ 00153 { \ 00154 reg_type v_src0 = _mm_loadu_##flavor((const cast_type *)(src0)); \ 00155 reg_type v_src1 = _mm_loadu_##flavor((const cast_type *)(src0 + ELEMS_IN_VEC)); \ 00156 reg_type v_src2 = _mm_loadu_##flavor((const cast_type *)(src1)); \ 00157 reg_type v_src3 = _mm_loadu_##flavor((const cast_type *)(src1 + ELEMS_IN_VEC)); \ 00158 \ 00159 _mm_interleave(v_src0, v_src1, v_src2, v_src3); \ 00160 \ 00161 _mm_storeu_##flavor((cast_type *)(dst), v_src0); \ 00162 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC), v_src1); \ 00163 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 2), v_src2); \ 00164 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 3), v_src3); \ 00165 } \ 00166 \ 00167 bool support; \ 00168 } 00169 00170 #define MERGE3_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_interleave, flavor, se) \ 00171 template <> \ 00172 struct VMerge3<data_type> \ 00173 { \ 00174 enum \ 00175 { \ 00176 ELEMS_IN_VEC = 16 / sizeof(data_type) \ 00177 }; \ 00178 \ 00179 VMerge3() \ 00180 { \ 00181 support = checkHardwareSupport(se); \ 00182 } \ 00183 \ 00184 void operator()(const data_type * src0, const data_type * src1, const data_type * src2,\ 00185 data_type * dst) const \ 00186 { \ 00187 reg_type v_src0 = _mm_loadu_##flavor((const cast_type *)(src0)); \ 00188 reg_type v_src1 = _mm_loadu_##flavor((const cast_type *)(src0 + ELEMS_IN_VEC)); \ 00189 reg_type v_src2 = _mm_loadu_##flavor((const cast_type *)(src1)); \ 00190 reg_type v_src3 = _mm_loadu_##flavor((const cast_type *)(src1 + ELEMS_IN_VEC)); \ 00191 reg_type v_src4 = _mm_loadu_##flavor((const cast_type *)(src2)); \ 00192 reg_type v_src5 = _mm_loadu_##flavor((const cast_type *)(src2 + ELEMS_IN_VEC)); \ 00193 \ 00194 _mm_interleave(v_src0, v_src1, v_src2, \ 00195 v_src3, v_src4, v_src5); \ 00196 \ 00197 _mm_storeu_##flavor((cast_type *)(dst), v_src0); \ 00198 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC), v_src1); \ 00199 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 2), v_src2); \ 00200 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 3), v_src3); \ 00201 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 4), v_src4); \ 00202 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 5), v_src5); \ 00203 } \ 00204 \ 00205 bool support; \ 00206 } 00207 00208 #define MERGE4_KERNEL_TEMPLATE(data_type, reg_type, cast_type, _mm_interleave, flavor, se) \ 00209 template <> \ 00210 struct VMerge4<data_type> \ 00211 { \ 00212 enum \ 00213 { \ 00214 ELEMS_IN_VEC = 16 / sizeof(data_type) \ 00215 }; \ 00216 \ 00217 VMerge4() \ 00218 { \ 00219 support = checkHardwareSupport(se); \ 00220 } \ 00221 \ 00222 void operator()(const data_type * src0, const data_type * src1, \ 00223 const data_type * src2, const data_type * src3, \ 00224 data_type * dst) const \ 00225 { \ 00226 reg_type v_src0 = _mm_loadu_##flavor((const cast_type *)(src0)); \ 00227 reg_type v_src1 = _mm_loadu_##flavor((const cast_type *)(src0 + ELEMS_IN_VEC)); \ 00228 reg_type v_src2 = _mm_loadu_##flavor((const cast_type *)(src1)); \ 00229 reg_type v_src3 = _mm_loadu_##flavor((const cast_type *)(src1 + ELEMS_IN_VEC)); \ 00230 reg_type v_src4 = _mm_loadu_##flavor((const cast_type *)(src2)); \ 00231 reg_type v_src5 = _mm_loadu_##flavor((const cast_type *)(src2 + ELEMS_IN_VEC)); \ 00232 reg_type v_src6 = _mm_loadu_##flavor((const cast_type *)(src3)); \ 00233 reg_type v_src7 = _mm_loadu_##flavor((const cast_type *)(src3 + ELEMS_IN_VEC)); \ 00234 \ 00235 _mm_interleave(v_src0, v_src1, v_src2, v_src3, \ 00236 v_src4, v_src5, v_src6, v_src7); \ 00237 \ 00238 _mm_storeu_##flavor((cast_type *)(dst), v_src0); \ 00239 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC), v_src1); \ 00240 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 2), v_src2); \ 00241 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 3), v_src3); \ 00242 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 4), v_src4); \ 00243 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 5), v_src5); \ 00244 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 6), v_src6); \ 00245 _mm_storeu_##flavor((cast_type *)(dst + ELEMS_IN_VEC * 7), v_src7); \ 00246 } \ 00247 \ 00248 bool support; \ 00249 } 00250 00251 MERGE2_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128, CV_CPU_SSE2); 00252 MERGE3_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128, CV_CPU_SSE2); 00253 MERGE4_KERNEL_TEMPLATE( uchar, __m128i, __m128i, _mm_interleave_epi8, si128, CV_CPU_SSE2); 00254 00255 #if CV_SSE4_1 00256 MERGE2_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128, CV_CPU_SSE4_1); 00257 MERGE3_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128, CV_CPU_SSE4_1); 00258 MERGE4_KERNEL_TEMPLATE(ushort, __m128i, __m128i, _mm_interleave_epi16, si128, CV_CPU_SSE4_1); 00259 #endif 00260 00261 MERGE2_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps, CV_CPU_SSE2); 00262 MERGE3_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps, CV_CPU_SSE2); 00263 MERGE4_KERNEL_TEMPLATE( int, __m128, float, _mm_interleave_ps, ps, CV_CPU_SSE2); 00264 00265 #endif 00266 00267 template<typename T> static void 00268 merge_( const T** src, T* dst, int len, int cn ) 00269 { 00270 int k = cn % 4 ? cn % 4 : 4; 00271 int i, j; 00272 if( k == 1 ) 00273 { 00274 const T* src0 = src[0]; 00275 for( i = j = 0; i < len; i++, j += cn ) 00276 dst[j] = src0[i]; 00277 } 00278 else if( k == 2 ) 00279 { 00280 const T *src0 = src[0], *src1 = src[1]; 00281 i = j = 0; 00282 #if CV_NEON 00283 if(cn == 2) 00284 { 00285 int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T); 00286 int inc_j = 2 * inc_i; 00287 00288 VMerge2<T> vmerge; 00289 for( ; i < len - inc_i; i += inc_i, j += inc_j) 00290 vmerge(src0 + i, src1 + i, dst + j); 00291 } 00292 #elif CV_SSE2 00293 if(cn == 2) 00294 { 00295 int inc_i = 32/sizeof(T); 00296 int inc_j = 2 * inc_i; 00297 00298 VMerge2<T> vmerge; 00299 if (vmerge.support) 00300 for( ; i < len - inc_i; i += inc_i, j += inc_j) 00301 vmerge(src0 + i, src1 + i, dst + j); 00302 } 00303 #endif 00304 for( ; i < len; i++, j += cn ) 00305 { 00306 dst[j] = src0[i]; 00307 dst[j+1] = src1[i]; 00308 } 00309 } 00310 else if( k == 3 ) 00311 { 00312 const T *src0 = src[0], *src1 = src[1], *src2 = src[2]; 00313 i = j = 0; 00314 #if CV_NEON 00315 if(cn == 3) 00316 { 00317 int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T); 00318 int inc_j = 3 * inc_i; 00319 00320 VMerge3<T> vmerge; 00321 for( ; i < len - inc_i; i += inc_i, j += inc_j) 00322 vmerge(src0 + i, src1 + i, src2 + i, dst + j); 00323 } 00324 #elif CV_SSE2 00325 if(cn == 3) 00326 { 00327 int inc_i = 32/sizeof(T); 00328 int inc_j = 3 * inc_i; 00329 00330 VMerge3<T> vmerge; 00331 if (vmerge.support) 00332 for( ; i < len - inc_i; i += inc_i, j += inc_j) 00333 vmerge(src0 + i, src1 + i, src2 + i, dst + j); 00334 } 00335 #endif 00336 for( ; i < len; i++, j += cn ) 00337 { 00338 dst[j] = src0[i]; 00339 dst[j+1] = src1[i]; 00340 dst[j+2] = src2[i]; 00341 } 00342 } 00343 else 00344 { 00345 const T *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3]; 00346 i = j = 0; 00347 #if CV_NEON 00348 if(cn == 4) 00349 { 00350 int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T); 00351 int inc_j = 4 * inc_i; 00352 00353 VMerge4<T> vmerge; 00354 for( ; i < len - inc_i; i += inc_i, j += inc_j) 00355 vmerge(src0 + i, src1 + i, src2 + i, src3 + i, dst + j); 00356 } 00357 #elif CV_SSE2 00358 if(cn == 4) 00359 { 00360 int inc_i = 32/sizeof(T); 00361 int inc_j = 4 * inc_i; 00362 00363 VMerge4<T> vmerge; 00364 if (vmerge.support) 00365 for( ; i < len - inc_i; i += inc_i, j += inc_j) 00366 vmerge(src0 + i, src1 + i, src2 + i, src3 + i, dst + j); 00367 } 00368 #endif 00369 for( ; i < len; i++, j += cn ) 00370 { 00371 dst[j] = src0[i]; dst[j+1] = src1[i]; 00372 dst[j+2] = src2[i]; dst[j+3] = src3[i]; 00373 } 00374 } 00375 00376 for( ; k < cn; k += 4 ) 00377 { 00378 const T *src0 = src[k], *src1 = src[k+1], *src2 = src[k+2], *src3 = src[k+3]; 00379 for( i = 0, j = k; i < len; i++, j += cn ) 00380 { 00381 dst[j] = src0[i]; dst[j+1] = src1[i]; 00382 dst[j+2] = src2[i]; dst[j+3] = src3[i]; 00383 } 00384 } 00385 } 00386 00387 00388 void merge8u(const uchar** src, uchar* dst, int len, int cn ) 00389 { 00390 CALL_HAL(merge8u, cv_hal_merge8u, src, dst, len, cn) 00391 merge_(src, dst, len, cn); 00392 } 00393 00394 void merge16u(const ushort** src, ushort* dst, int len, int cn ) 00395 { 00396 CALL_HAL(merge16u, cv_hal_merge16u, src, dst, len, cn) 00397 merge_(src, dst, len, cn); 00398 } 00399 00400 void merge32s(const int** src, int* dst, int len, int cn ) 00401 { 00402 CALL_HAL(merge32s, cv_hal_merge32s, src, dst, len, cn) 00403 merge_(src, dst, len, cn); 00404 } 00405 00406 void merge64s(const int64** src, int64* dst, int len, int cn ) 00407 { 00408 CALL_HAL(merge64s, cv_hal_merge64s, src, dst, len, cn) 00409 merge_(src, dst, len, cn); 00410 } 00411 00412 }} 00413
Generated on Tue Jul 12 2022 14:47:27 by
1.7.2
