Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
pyramids.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #include "precomp.hpp" 00045 #include "opencl_kernels_imgproc.hpp" 00046 00047 namespace cv 00048 { 00049 00050 template<typename T, int shift> struct FixPtCast 00051 { 00052 typedef int type1; 00053 typedef T rtype; 00054 rtype operator ()(type1 arg) const { return (T)((arg + (1 << (shift-1))) >> shift); } 00055 }; 00056 00057 template<typename T, int shift> struct FltCast 00058 { 00059 typedef T type1; 00060 typedef T rtype; 00061 rtype operator ()(type1 arg) const { return arg*(T)(1./(1 << shift)); } 00062 }; 00063 00064 template<typename T1, typename T2> struct PyrDownNoVec 00065 { 00066 int operator()(T1**, T2*, int, int) const { return 0; } 00067 }; 00068 00069 template<typename T1, typename T2> struct PyrUpNoVec 00070 { 00071 int operator()(T1**, T2**, int, int) const { return 0; } 00072 }; 00073 00074 #if CV_SSE2 00075 00076 struct PyrDownVec_32s8u 00077 { 00078 int operator()(int** src, uchar* dst, int, int width) const 00079 { 00080 if( !checkHardwareSupport(CV_CPU_SSE2) ) 00081 return 0; 00082 00083 int x = 0; 00084 const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4]; 00085 __m128i delta = _mm_set1_epi16(128); 00086 00087 for( ; x <= width - 16; x += 16 ) 00088 { 00089 __m128i r0, r1, r2, r3, r4, t0, t1; 00090 r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)), 00091 _mm_load_si128((const __m128i*)(row0 + x + 4))); 00092 r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)), 00093 _mm_load_si128((const __m128i*)(row1 + x + 4))); 00094 r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)), 00095 _mm_load_si128((const __m128i*)(row2 + x + 4))); 00096 r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)), 00097 _mm_load_si128((const __m128i*)(row3 + x + 4))); 00098 r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)), 00099 _mm_load_si128((const __m128i*)(row4 + x + 4))); 00100 r0 = _mm_add_epi16(r0, r4); 00101 r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2); 00102 r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2)); 00103 t0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2)); 00104 r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x + 8)), 00105 _mm_load_si128((const __m128i*)(row0 + x + 12))); 00106 r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x + 8)), 00107 _mm_load_si128((const __m128i*)(row1 + x + 12))); 00108 r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x + 8)), 00109 _mm_load_si128((const __m128i*)(row2 + x + 12))); 00110 r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x + 8)), 00111 _mm_load_si128((const __m128i*)(row3 + x + 12))); 00112 r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x + 8)), 00113 _mm_load_si128((const __m128i*)(row4 + x + 12))); 00114 r0 = _mm_add_epi16(r0, r4); 00115 r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2); 00116 r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2)); 00117 t1 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2)); 00118 t0 = _mm_srli_epi16(_mm_add_epi16(t0, delta), 8); 00119 t1 = _mm_srli_epi16(_mm_add_epi16(t1, delta), 8); 00120 _mm_storeu_si128((__m128i*)(dst + x), _mm_packus_epi16(t0, t1)); 00121 } 00122 00123 for( ; x <= width - 4; x += 4 ) 00124 { 00125 __m128i r0, r1, r2, r3, r4, z = _mm_setzero_si128(); 00126 r0 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row0 + x)), z); 00127 r1 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row1 + x)), z); 00128 r2 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row2 + x)), z); 00129 r3 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row3 + x)), z); 00130 r4 = _mm_packs_epi32(_mm_load_si128((const __m128i*)(row4 + x)), z); 00131 r0 = _mm_add_epi16(r0, r4); 00132 r1 = _mm_add_epi16(_mm_add_epi16(r1, r3), r2); 00133 r0 = _mm_add_epi16(r0, _mm_add_epi16(r2, r2)); 00134 r0 = _mm_add_epi16(r0, _mm_slli_epi16(r1, 2)); 00135 r0 = _mm_srli_epi16(_mm_add_epi16(r0, delta), 8); 00136 *(int*)(dst + x) = _mm_cvtsi128_si32(_mm_packus_epi16(r0, r0)); 00137 } 00138 00139 return x; 00140 } 00141 }; 00142 00143 struct PyrDownVec_32f 00144 { 00145 int operator()(float** src, float* dst, int, int width) const 00146 { 00147 if( !checkHardwareSupport(CV_CPU_SSE) ) 00148 return 0; 00149 00150 int x = 0; 00151 const float *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4]; 00152 __m128 _4 = _mm_set1_ps(4.f), _scale = _mm_set1_ps(1.f/256); 00153 for( ; x <= width - 8; x += 8 ) 00154 { 00155 __m128 r0, r1, r2, r3, r4, t0, t1; 00156 r0 = _mm_load_ps(row0 + x); 00157 r1 = _mm_load_ps(row1 + x); 00158 r2 = _mm_load_ps(row2 + x); 00159 r3 = _mm_load_ps(row3 + x); 00160 r4 = _mm_load_ps(row4 + x); 00161 r0 = _mm_add_ps(r0, r4); 00162 r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2); 00163 r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2)); 00164 t0 = _mm_add_ps(r0, _mm_mul_ps(r1, _4)); 00165 00166 r0 = _mm_load_ps(row0 + x + 4); 00167 r1 = _mm_load_ps(row1 + x + 4); 00168 r2 = _mm_load_ps(row2 + x + 4); 00169 r3 = _mm_load_ps(row3 + x + 4); 00170 r4 = _mm_load_ps(row4 + x + 4); 00171 r0 = _mm_add_ps(r0, r4); 00172 r1 = _mm_add_ps(_mm_add_ps(r1, r3), r2); 00173 r0 = _mm_add_ps(r0, _mm_add_ps(r2, r2)); 00174 t1 = _mm_add_ps(r0, _mm_mul_ps(r1, _4)); 00175 00176 t0 = _mm_mul_ps(t0, _scale); 00177 t1 = _mm_mul_ps(t1, _scale); 00178 00179 _mm_storeu_ps(dst + x, t0); 00180 _mm_storeu_ps(dst + x + 4, t1); 00181 } 00182 00183 return x; 00184 } 00185 }; 00186 00187 #if CV_SSE4_1 00188 00189 struct PyrDownVec_32s16u 00190 { 00191 PyrDownVec_32s16u() 00192 { 00193 haveSSE = checkHardwareSupport(CV_CPU_SSE4_1); 00194 } 00195 00196 int operator()(int** src, ushort* dst, int, int width) const 00197 { 00198 int x = 0; 00199 00200 if (!haveSSE) 00201 return x; 00202 00203 const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4]; 00204 __m128i v_delta = _mm_set1_epi32(128); 00205 00206 for( ; x <= width - 8; x += 8 ) 00207 { 00208 __m128i v_r00 = _mm_loadu_si128((__m128i const *)(row0 + x)), 00209 v_r01 = _mm_loadu_si128((__m128i const *)(row0 + x + 4)); 00210 __m128i v_r10 = _mm_loadu_si128((__m128i const *)(row1 + x)), 00211 v_r11 = _mm_loadu_si128((__m128i const *)(row1 + x + 4)); 00212 __m128i v_r20 = _mm_loadu_si128((__m128i const *)(row2 + x)), 00213 v_r21 = _mm_loadu_si128((__m128i const *)(row2 + x + 4)); 00214 __m128i v_r30 = _mm_loadu_si128((__m128i const *)(row3 + x)), 00215 v_r31 = _mm_loadu_si128((__m128i const *)(row3 + x + 4)); 00216 __m128i v_r40 = _mm_loadu_si128((__m128i const *)(row4 + x)), 00217 v_r41 = _mm_loadu_si128((__m128i const *)(row4 + x + 4)); 00218 00219 v_r00 = _mm_add_epi32(_mm_add_epi32(v_r00, v_r40), _mm_add_epi32(v_r20, v_r20)); 00220 v_r10 = _mm_add_epi32(_mm_add_epi32(v_r10, v_r20), v_r30); 00221 00222 v_r10 = _mm_slli_epi32(v_r10, 2); 00223 __m128i v_dst0 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(v_r00, v_r10), v_delta), 8); 00224 00225 v_r01 = _mm_add_epi32(_mm_add_epi32(v_r01, v_r41), _mm_add_epi32(v_r21, v_r21)); 00226 v_r11 = _mm_add_epi32(_mm_add_epi32(v_r11, v_r21), v_r31); 00227 v_r11 = _mm_slli_epi32(v_r11, 2); 00228 __m128i v_dst1 = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(v_r01, v_r11), v_delta), 8); 00229 00230 _mm_storeu_si128((__m128i *)(dst + x), _mm_packus_epi32(v_dst0, v_dst1)); 00231 } 00232 00233 return x; 00234 } 00235 00236 bool haveSSE; 00237 }; 00238 00239 #else 00240 00241 typedef PyrDownNoVec<int, ushort> PyrDownVec_32s16u; 00242 00243 #endif // CV_SSE4_1 00244 00245 struct PyrDownVec_32s16s 00246 { 00247 PyrDownVec_32s16s() 00248 { 00249 haveSSE = checkHardwareSupport(CV_CPU_SSE2); 00250 } 00251 00252 int operator()(int** src, short* dst, int, int width) const 00253 { 00254 int x = 0; 00255 00256 if (!haveSSE) 00257 return x; 00258 00259 const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4]; 00260 __m128i v_delta = _mm_set1_epi32(128); 00261 00262 for( ; x <= width - 8; x += 8 ) 00263 { 00264 __m128i v_r00 = _mm_loadu_si128((__m128i const *)(row0 + x)), 00265 v_r01 = _mm_loadu_si128((__m128i const *)(row0 + x + 4)); 00266 __m128i v_r10 = _mm_loadu_si128((__m128i const *)(row1 + x)), 00267 v_r11 = _mm_loadu_si128((__m128i const *)(row1 + x + 4)); 00268 __m128i v_r20 = _mm_loadu_si128((__m128i const *)(row2 + x)), 00269 v_r21 = _mm_loadu_si128((__m128i const *)(row2 + x + 4)); 00270 __m128i v_r30 = _mm_loadu_si128((__m128i const *)(row3 + x)), 00271 v_r31 = _mm_loadu_si128((__m128i const *)(row3 + x + 4)); 00272 __m128i v_r40 = _mm_loadu_si128((__m128i const *)(row4 + x)), 00273 v_r41 = _mm_loadu_si128((__m128i const *)(row4 + x + 4)); 00274 00275 v_r00 = _mm_add_epi32(_mm_add_epi32(v_r00, v_r40), _mm_add_epi32(v_r20, v_r20)); 00276 v_r10 = _mm_add_epi32(_mm_add_epi32(v_r10, v_r20), v_r30); 00277 00278 v_r10 = _mm_slli_epi32(v_r10, 2); 00279 __m128i v_dst0 = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(v_r00, v_r10), v_delta), 8); 00280 00281 v_r01 = _mm_add_epi32(_mm_add_epi32(v_r01, v_r41), _mm_add_epi32(v_r21, v_r21)); 00282 v_r11 = _mm_add_epi32(_mm_add_epi32(v_r11, v_r21), v_r31); 00283 v_r11 = _mm_slli_epi32(v_r11, 2); 00284 __m128i v_dst1 = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(v_r01, v_r11), v_delta), 8); 00285 00286 _mm_storeu_si128((__m128i *)(dst + x), _mm_packs_epi32(v_dst0, v_dst1)); 00287 } 00288 00289 return x; 00290 } 00291 00292 bool haveSSE; 00293 }; 00294 00295 struct PyrUpVec_32s8u 00296 { 00297 int operator()(int** src, uchar** dst, int, int width) const 00298 { 00299 int x = 0; 00300 00301 if (!checkHardwareSupport(CV_CPU_SSE2)) 00302 return x; 00303 00304 uchar *dst0 = dst[0], *dst1 = dst[1]; 00305 const uint *row0 = (uint *)src[0], *row1 = (uint *)src[1], *row2 = (uint *)src[2]; 00306 __m128i v_delta = _mm_set1_epi16(32), v_zero = _mm_setzero_si128(); 00307 00308 for( ; x <= width - 16; x += 16 ) 00309 { 00310 __m128i v_r0 = _mm_packs_epi32(_mm_loadu_si128((__m128i const *)(row0 + x)), 00311 _mm_loadu_si128((__m128i const *)(row0 + x + 4))); 00312 __m128i v_r1 = _mm_packs_epi32(_mm_loadu_si128((__m128i const *)(row1 + x)), 00313 _mm_loadu_si128((__m128i const *)(row1 + x + 4))); 00314 __m128i v_r2 = _mm_packs_epi32(_mm_loadu_si128((__m128i const *)(row2 + x)), 00315 _mm_loadu_si128((__m128i const *)(row2 + x + 4))); 00316 00317 __m128i v_2r1 = _mm_adds_epu16(v_r1, v_r1), v_4r1 = _mm_adds_epu16(v_2r1, v_2r1); 00318 __m128i v_dst00 = _mm_adds_epu16(_mm_adds_epu16(v_r0, v_r2), _mm_adds_epu16(v_2r1, v_4r1)); 00319 __m128i v_dst10 = _mm_slli_epi16(_mm_adds_epu16(v_r1, v_r2), 2); 00320 00321 v_r0 = _mm_packs_epi32(_mm_loadu_si128((__m128i const *)(row0 + x + 8)), 00322 _mm_loadu_si128((__m128i const *)(row0 + x + 12))); 00323 v_r1 = _mm_packs_epi32(_mm_loadu_si128((__m128i const *)(row1 + x + 8)), 00324 _mm_loadu_si128((__m128i const *)(row1 + x + 12))); 00325 v_r2 = _mm_packs_epi32(_mm_loadu_si128((__m128i const *)(row2 + x + 8)), 00326 _mm_loadu_si128((__m128i const *)(row2 + x + 12))); 00327 00328 v_2r1 = _mm_adds_epu16(v_r1, v_r1), v_4r1 = _mm_adds_epu16(v_2r1, v_2r1); 00329 __m128i v_dst01 = _mm_adds_epu16(_mm_adds_epu16(v_r0, v_r2), _mm_adds_epu16(v_2r1, v_4r1)); 00330 __m128i v_dst11 = _mm_slli_epi16(_mm_adds_epu16(v_r1, v_r2), 2); 00331 00332 _mm_storeu_si128((__m128i *)(dst0 + x), _mm_packus_epi16(_mm_srli_epi16(_mm_adds_epu16(v_dst00, v_delta), 6), 00333 _mm_srli_epi16(_mm_adds_epu16(v_dst01, v_delta), 6))); 00334 _mm_storeu_si128((__m128i *)(dst1 + x), _mm_packus_epi16(_mm_srli_epi16(_mm_adds_epu16(v_dst10, v_delta), 6), 00335 _mm_srli_epi16(_mm_adds_epu16(v_dst11, v_delta), 6))); 00336 } 00337 00338 for( ; x <= width - 8; x += 8 ) 00339 { 00340 __m128i v_r0 = _mm_packs_epi32(_mm_loadu_si128((__m128i const *)(row0 + x)), 00341 _mm_loadu_si128((__m128i const *)(row0 + x + 4))); 00342 __m128i v_r1 = _mm_packs_epi32(_mm_loadu_si128((__m128i const *)(row1 + x)), 00343 _mm_loadu_si128((__m128i const *)(row1 + x + 4))); 00344 __m128i v_r2 = _mm_packs_epi32(_mm_loadu_si128((__m128i const *)(row2 + x)), 00345 _mm_loadu_si128((__m128i const *)(row2 + x + 4))); 00346 00347 __m128i v_2r1 = _mm_adds_epu16(v_r1, v_r1), v_4r1 = _mm_adds_epu16(v_2r1, v_2r1); 00348 __m128i v_dst0 = _mm_adds_epu16(_mm_adds_epu16(v_r0, v_r2), _mm_adds_epu16(v_2r1, v_4r1)); 00349 __m128i v_dst1 = _mm_slli_epi16(_mm_adds_epu16(v_r1, v_r2), 2); 00350 00351 _mm_storel_epi64((__m128i *)(dst0 + x), _mm_packus_epi16(_mm_srli_epi16(_mm_adds_epu16(v_dst0, v_delta), 6), v_zero)); 00352 _mm_storel_epi64((__m128i *)(dst1 + x), _mm_packus_epi16(_mm_srli_epi16(_mm_adds_epu16(v_dst1, v_delta), 6), v_zero)); 00353 } 00354 00355 return x; 00356 } 00357 }; 00358 00359 struct PyrUpVec_32s16s 00360 { 00361 int operator()(int** src, short** dst, int, int width) const 00362 { 00363 int x = 0; 00364 00365 if (!checkHardwareSupport(CV_CPU_SSE2)) 00366 return x; 00367 00368 short *dst0 = dst[0], *dst1 = dst[1]; 00369 const uint *row0 = (uint *)src[0], *row1 = (uint *)src[1], *row2 = (uint *)src[2]; 00370 __m128i v_delta = _mm_set1_epi32(32), v_zero = _mm_setzero_si128(); 00371 00372 for( ; x <= width - 8; x += 8 ) 00373 { 00374 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(row0 + x)), 00375 v_r1 = _mm_loadu_si128((__m128i const *)(row1 + x)), 00376 v_r2 = _mm_loadu_si128((__m128i const *)(row2 + x)); 00377 __m128i v_2r1 = _mm_slli_epi32(v_r1, 1), v_4r1 = _mm_slli_epi32(v_r1, 2); 00378 __m128i v_dst00 = _mm_add_epi32(_mm_add_epi32(v_r0, v_r2), _mm_add_epi32(v_2r1, v_4r1)); 00379 __m128i v_dst10 = _mm_slli_epi32(_mm_add_epi32(v_r1, v_r2), 2); 00380 00381 v_r0 = _mm_loadu_si128((__m128i const *)(row0 + x + 4)); 00382 v_r1 = _mm_loadu_si128((__m128i const *)(row1 + x + 4)); 00383 v_r2 = _mm_loadu_si128((__m128i const *)(row2 + x + 4)); 00384 v_2r1 = _mm_slli_epi32(v_r1, 1); 00385 v_4r1 = _mm_slli_epi32(v_r1, 2); 00386 __m128i v_dst01 = _mm_add_epi32(_mm_add_epi32(v_r0, v_r2), _mm_add_epi32(v_2r1, v_4r1)); 00387 __m128i v_dst11 = _mm_slli_epi32(_mm_add_epi32(v_r1, v_r2), 2); 00388 00389 _mm_storeu_si128((__m128i *)(dst0 + x), 00390 _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(v_dst00, v_delta), 6), 00391 _mm_srai_epi32(_mm_add_epi32(v_dst01, v_delta), 6))); 00392 _mm_storeu_si128((__m128i *)(dst1 + x), 00393 _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(v_dst10, v_delta), 6), 00394 _mm_srai_epi32(_mm_add_epi32(v_dst11, v_delta), 6))); 00395 } 00396 00397 for( ; x <= width - 4; x += 4 ) 00398 { 00399 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(row0 + x)), 00400 v_r1 = _mm_loadu_si128((__m128i const *)(row1 + x)), 00401 v_r2 = _mm_loadu_si128((__m128i const *)(row2 + x)); 00402 __m128i v_2r1 = _mm_slli_epi32(v_r1, 1), v_4r1 = _mm_slli_epi32(v_r1, 2); 00403 00404 __m128i v_dst0 = _mm_add_epi32(_mm_add_epi32(v_r0, v_r2), _mm_add_epi32(v_2r1, v_4r1)); 00405 __m128i v_dst1 = _mm_slli_epi32(_mm_add_epi32(v_r1, v_r2), 2); 00406 00407 _mm_storel_epi64((__m128i *)(dst0 + x), 00408 _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(v_dst0, v_delta), 6), v_zero)); 00409 _mm_storel_epi64((__m128i *)(dst1 + x), 00410 _mm_packs_epi32(_mm_srai_epi32(_mm_add_epi32(v_dst1, v_delta), 6), v_zero)); 00411 } 00412 00413 return x; 00414 } 00415 }; 00416 00417 #if CV_SSE4_1 00418 00419 struct PyrUpVec_32s16u 00420 { 00421 int operator()(int** src, ushort** dst, int, int width) const 00422 { 00423 int x = 0; 00424 00425 if (!checkHardwareSupport(CV_CPU_SSE4_1)) 00426 return x; 00427 00428 ushort *dst0 = dst[0], *dst1 = dst[1]; 00429 const uint *row0 = (uint *)src[0], *row1 = (uint *)src[1], *row2 = (uint *)src[2]; 00430 __m128i v_delta = _mm_set1_epi32(32), v_zero = _mm_setzero_si128(); 00431 00432 for( ; x <= width - 8; x += 8 ) 00433 { 00434 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(row0 + x)), 00435 v_r1 = _mm_loadu_si128((__m128i const *)(row1 + x)), 00436 v_r2 = _mm_loadu_si128((__m128i const *)(row2 + x)); 00437 __m128i v_2r1 = _mm_slli_epi32(v_r1, 1), v_4r1 = _mm_slli_epi32(v_r1, 2); 00438 __m128i v_dst00 = _mm_add_epi32(_mm_add_epi32(v_r0, v_r2), _mm_add_epi32(v_2r1, v_4r1)); 00439 __m128i v_dst10 = _mm_slli_epi32(_mm_add_epi32(v_r1, v_r2), 2); 00440 00441 v_r0 = _mm_loadu_si128((__m128i const *)(row0 + x + 4)); 00442 v_r1 = _mm_loadu_si128((__m128i const *)(row1 + x + 4)); 00443 v_r2 = _mm_loadu_si128((__m128i const *)(row2 + x + 4)); 00444 v_2r1 = _mm_slli_epi32(v_r1, 1); 00445 v_4r1 = _mm_slli_epi32(v_r1, 2); 00446 __m128i v_dst01 = _mm_add_epi32(_mm_add_epi32(v_r0, v_r2), _mm_add_epi32(v_2r1, v_4r1)); 00447 __m128i v_dst11 = _mm_slli_epi32(_mm_add_epi32(v_r1, v_r2), 2); 00448 00449 _mm_storeu_si128((__m128i *)(dst0 + x), 00450 _mm_packus_epi32(_mm_srli_epi32(_mm_add_epi32(v_dst00, v_delta), 6), 00451 _mm_srli_epi32(_mm_add_epi32(v_dst01, v_delta), 6))); 00452 _mm_storeu_si128((__m128i *)(dst1 + x), 00453 _mm_packus_epi32(_mm_srli_epi32(_mm_add_epi32(v_dst10, v_delta), 6), 00454 _mm_srli_epi32(_mm_add_epi32(v_dst11, v_delta), 6))); 00455 } 00456 00457 for( ; x <= width - 4; x += 4 ) 00458 { 00459 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(row0 + x)), 00460 v_r1 = _mm_loadu_si128((__m128i const *)(row1 + x)), 00461 v_r2 = _mm_loadu_si128((__m128i const *)(row2 + x)); 00462 __m128i v_2r1 = _mm_slli_epi32(v_r1, 1), v_4r1 = _mm_slli_epi32(v_r1, 2); 00463 00464 __m128i v_dst0 = _mm_add_epi32(_mm_add_epi32(v_r0, v_r2), _mm_add_epi32(v_2r1, v_4r1)); 00465 __m128i v_dst1 = _mm_slli_epi32(_mm_add_epi32(v_r1, v_r2), 2); 00466 00467 _mm_storel_epi64((__m128i *)(dst0 + x), 00468 _mm_packus_epi32(_mm_srli_epi32(_mm_add_epi32(v_dst0, v_delta), 6), v_zero)); 00469 _mm_storel_epi64((__m128i *)(dst1 + x), 00470 _mm_packus_epi32(_mm_srli_epi32(_mm_add_epi32(v_dst1, v_delta), 6), v_zero)); 00471 } 00472 00473 return x; 00474 } 00475 }; 00476 00477 #else 00478 00479 typedef PyrUpNoVec<int, ushort> PyrUpVec_32s16u; 00480 00481 #endif // CV_SSE4_1 00482 00483 struct PyrUpVec_32f 00484 { 00485 int operator()(float** src, float** dst, int, int width) const 00486 { 00487 int x = 0; 00488 00489 if (!checkHardwareSupport(CV_CPU_SSE2)) 00490 return x; 00491 00492 const float *row0 = src[0], *row1 = src[1], *row2 = src[2]; 00493 float *dst0 = dst[0], *dst1 = dst[1]; 00494 __m128 v_6 = _mm_set1_ps(6.0f), v_scale = _mm_set1_ps(1.f/64.0f), 00495 v_scale4 = _mm_mul_ps(v_scale, _mm_set1_ps(4.0f)); 00496 00497 for( ; x <= width - 8; x += 8 ) 00498 { 00499 __m128 v_r0 = _mm_loadu_ps(row0 + x); 00500 __m128 v_r1 = _mm_loadu_ps(row1 + x); 00501 __m128 v_r2 = _mm_loadu_ps(row2 + x); 00502 00503 _mm_storeu_ps(dst1 + x, _mm_mul_ps(v_scale4, _mm_add_ps(v_r1, v_r2))); 00504 _mm_storeu_ps(dst0 + x, _mm_mul_ps(v_scale, _mm_add_ps(_mm_add_ps(v_r0, _mm_mul_ps(v_6, v_r1)), v_r2))); 00505 00506 v_r0 = _mm_loadu_ps(row0 + x + 4); 00507 v_r1 = _mm_loadu_ps(row1 + x + 4); 00508 v_r2 = _mm_loadu_ps(row2 + x + 4); 00509 00510 _mm_storeu_ps(dst1 + x + 4, _mm_mul_ps(v_scale4, _mm_add_ps(v_r1, v_r2))); 00511 _mm_storeu_ps(dst0 + x + 4, _mm_mul_ps(v_scale, _mm_add_ps(_mm_add_ps(v_r0, _mm_mul_ps(v_6, v_r1)), v_r2))); 00512 } 00513 00514 return x; 00515 } 00516 }; 00517 00518 #elif CV_NEON 00519 00520 struct PyrDownVec_32s8u 00521 { 00522 int operator()(int** src, uchar* dst, int, int width) const 00523 { 00524 int x = 0; 00525 const unsigned int *row0 = (unsigned int*)src[0], *row1 = (unsigned int*)src[1], 00526 *row2 = (unsigned int*)src[2], *row3 = (unsigned int*)src[3], 00527 *row4 = (unsigned int*)src[4]; 00528 uint16x8_t v_delta = vdupq_n_u16(128); 00529 00530 for( ; x <= width - 16; x += 16 ) 00531 { 00532 uint16x8_t v_r0 = vcombine_u16(vqmovn_u32(vld1q_u32(row0 + x)), vqmovn_u32(vld1q_u32(row0 + x + 4))); 00533 uint16x8_t v_r1 = vcombine_u16(vqmovn_u32(vld1q_u32(row1 + x)), vqmovn_u32(vld1q_u32(row1 + x + 4))); 00534 uint16x8_t v_r2 = vcombine_u16(vqmovn_u32(vld1q_u32(row2 + x)), vqmovn_u32(vld1q_u32(row2 + x + 4))); 00535 uint16x8_t v_r3 = vcombine_u16(vqmovn_u32(vld1q_u32(row3 + x)), vqmovn_u32(vld1q_u32(row3 + x + 4))); 00536 uint16x8_t v_r4 = vcombine_u16(vqmovn_u32(vld1q_u32(row4 + x)), vqmovn_u32(vld1q_u32(row4 + x + 4))); 00537 00538 v_r0 = vaddq_u16(vaddq_u16(v_r0, v_r4), vaddq_u16(v_r2, v_r2)); 00539 v_r1 = vaddq_u16(vaddq_u16(v_r1, v_r2), v_r3); 00540 uint16x8_t v_dst0 = vaddq_u16(v_r0, vshlq_n_u16(v_r1, 2)); 00541 00542 v_r0 = vcombine_u16(vqmovn_u32(vld1q_u32(row0 + x + 8)), vqmovn_u32(vld1q_u32(row0 + x + 12))); 00543 v_r1 = vcombine_u16(vqmovn_u32(vld1q_u32(row1 + x + 8)), vqmovn_u32(vld1q_u32(row1 + x + 12))); 00544 v_r2 = vcombine_u16(vqmovn_u32(vld1q_u32(row2 + x + 8)), vqmovn_u32(vld1q_u32(row2 + x + 12))); 00545 v_r3 = vcombine_u16(vqmovn_u32(vld1q_u32(row3 + x + 8)), vqmovn_u32(vld1q_u32(row3 + x + 12))); 00546 v_r4 = vcombine_u16(vqmovn_u32(vld1q_u32(row4 + x + 8)), vqmovn_u32(vld1q_u32(row4 + x + 12))); 00547 00548 v_r0 = vaddq_u16(vaddq_u16(v_r0, v_r4), vaddq_u16(v_r2, v_r2)); 00549 v_r1 = vaddq_u16(vaddq_u16(v_r1, v_r2), v_r3); 00550 uint16x8_t v_dst1 = vaddq_u16(v_r0, vshlq_n_u16(v_r1, 2)); 00551 00552 vst1q_u8(dst + x, vcombine_u8(vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst0, v_delta), 8)), 00553 vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst1, v_delta), 8)))); 00554 } 00555 00556 return x; 00557 } 00558 }; 00559 00560 struct PyrDownVec_32s16u 00561 { 00562 int operator()(int** src, ushort* dst, int, int width) const 00563 { 00564 int x = 0; 00565 const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4]; 00566 int32x4_t v_delta = vdupq_n_s32(128); 00567 00568 for( ; x <= width - 8; x += 8 ) 00569 { 00570 int32x4_t v_r00 = vld1q_s32(row0 + x), v_r01 = vld1q_s32(row0 + x + 4); 00571 int32x4_t v_r10 = vld1q_s32(row1 + x), v_r11 = vld1q_s32(row1 + x + 4); 00572 int32x4_t v_r20 = vld1q_s32(row2 + x), v_r21 = vld1q_s32(row2 + x + 4); 00573 int32x4_t v_r30 = vld1q_s32(row3 + x), v_r31 = vld1q_s32(row3 + x + 4); 00574 int32x4_t v_r40 = vld1q_s32(row4 + x), v_r41 = vld1q_s32(row4 + x + 4); 00575 00576 v_r00 = vaddq_s32(vaddq_s32(v_r00, v_r40), vaddq_s32(v_r20, v_r20)); 00577 v_r10 = vaddq_s32(vaddq_s32(v_r10, v_r20), v_r30); 00578 00579 v_r10 = vshlq_n_s32(v_r10, 2); 00580 int32x4_t v_dst0 = vshrq_n_s32(vaddq_s32(vaddq_s32(v_r00, v_r10), v_delta), 8); 00581 00582 v_r01 = vaddq_s32(vaddq_s32(v_r01, v_r41), vaddq_s32(v_r21, v_r21)); 00583 v_r11 = vaddq_s32(vaddq_s32(v_r11, v_r21), v_r31); 00584 v_r11 = vshlq_n_s32(v_r11, 2); 00585 int32x4_t v_dst1 = vshrq_n_s32(vaddq_s32(vaddq_s32(v_r01, v_r11), v_delta), 8); 00586 00587 vst1q_u16(dst + x, vcombine_u16(vqmovun_s32(v_dst0), vqmovun_s32(v_dst1))); 00588 } 00589 00590 return x; 00591 } 00592 }; 00593 00594 struct PyrDownVec_32s16s 00595 { 00596 int operator()(int** src, short* dst, int, int width) const 00597 { 00598 int x = 0; 00599 const int *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4]; 00600 int32x4_t v_delta = vdupq_n_s32(128); 00601 00602 for( ; x <= width - 8; x += 8 ) 00603 { 00604 int32x4_t v_r00 = vld1q_s32(row0 + x), v_r01 = vld1q_s32(row0 + x + 4); 00605 int32x4_t v_r10 = vld1q_s32(row1 + x), v_r11 = vld1q_s32(row1 + x + 4); 00606 int32x4_t v_r20 = vld1q_s32(row2 + x), v_r21 = vld1q_s32(row2 + x + 4); 00607 int32x4_t v_r30 = vld1q_s32(row3 + x), v_r31 = vld1q_s32(row3 + x + 4); 00608 int32x4_t v_r40 = vld1q_s32(row4 + x), v_r41 = vld1q_s32(row4 + x + 4); 00609 00610 v_r00 = vaddq_s32(vaddq_s32(v_r00, v_r40), vaddq_s32(v_r20, v_r20)); 00611 v_r10 = vaddq_s32(vaddq_s32(v_r10, v_r20), v_r30); 00612 v_r10 = vshlq_n_s32(v_r10, 2); 00613 int32x4_t v_dst0 = vshrq_n_s32(vaddq_s32(vaddq_s32(v_r00, v_r10), v_delta), 8); 00614 00615 v_r01 = vaddq_s32(vaddq_s32(v_r01, v_r41), vaddq_s32(v_r21, v_r21)); 00616 v_r11 = vaddq_s32(vaddq_s32(v_r11, v_r21), v_r31); 00617 v_r11 = vshlq_n_s32(v_r11, 2); 00618 int32x4_t v_dst1 = vshrq_n_s32(vaddq_s32(vaddq_s32(v_r01, v_r11), v_delta), 8); 00619 00620 vst1q_s16(dst + x, vcombine_s16(vqmovn_s32(v_dst0), vqmovn_s32(v_dst1))); 00621 } 00622 00623 return x; 00624 } 00625 }; 00626 00627 struct PyrDownVec_32f 00628 { 00629 int operator()(float** src, float* dst, int, int width) const 00630 { 00631 int x = 0; 00632 const float *row0 = src[0], *row1 = src[1], *row2 = src[2], *row3 = src[3], *row4 = src[4]; 00633 float32x4_t v_4 = vdupq_n_f32(4.0f), v_scale = vdupq_n_f32(1.f/256.0f); 00634 00635 for( ; x <= width - 8; x += 8 ) 00636 { 00637 float32x4_t v_r0 = vld1q_f32(row0 + x); 00638 float32x4_t v_r1 = vld1q_f32(row1 + x); 00639 float32x4_t v_r2 = vld1q_f32(row2 + x); 00640 float32x4_t v_r3 = vld1q_f32(row3 + x); 00641 float32x4_t v_r4 = vld1q_f32(row4 + x); 00642 00643 v_r0 = vaddq_f32(vaddq_f32(v_r0, v_r4), vaddq_f32(v_r2, v_r2)); 00644 v_r1 = vaddq_f32(vaddq_f32(v_r1, v_r2), v_r3); 00645 vst1q_f32(dst + x, vmulq_f32(vmlaq_f32(v_r0, v_4, v_r1), v_scale)); 00646 00647 v_r0 = vld1q_f32(row0 + x + 4); 00648 v_r1 = vld1q_f32(row1 + x + 4); 00649 v_r2 = vld1q_f32(row2 + x + 4); 00650 v_r3 = vld1q_f32(row3 + x + 4); 00651 v_r4 = vld1q_f32(row4 + x + 4); 00652 00653 v_r0 = vaddq_f32(vaddq_f32(v_r0, v_r4), vaddq_f32(v_r2, v_r2)); 00654 v_r1 = vaddq_f32(vaddq_f32(v_r1, v_r2), v_r3); 00655 vst1q_f32(dst + x + 4, vmulq_f32(vmlaq_f32(v_r0, v_4, v_r1), v_scale)); 00656 } 00657 00658 return x; 00659 } 00660 }; 00661 00662 struct PyrUpVec_32s8u 00663 { 00664 int operator()(int** src, uchar** dst, int, int width) const 00665 { 00666 int x = 0; 00667 uchar *dst0 = dst[0], *dst1 = dst[1]; 00668 const uint *row0 = (uint *)src[0], *row1 = (uint *)src[1], *row2 = (uint *)src[2]; 00669 uint16x8_t v_delta = vdupq_n_u16(32); 00670 00671 for( ; x <= width - 16; x += 16 ) 00672 { 00673 uint16x8_t v_r0 = vcombine_u16(vqmovn_u32(vld1q_u32(row0 + x)), vqmovn_u32(vld1q_u32(row0 + x + 4))); 00674 uint16x8_t v_r1 = vcombine_u16(vqmovn_u32(vld1q_u32(row1 + x)), vqmovn_u32(vld1q_u32(row1 + x + 4))); 00675 uint16x8_t v_r2 = vcombine_u16(vqmovn_u32(vld1q_u32(row2 + x)), vqmovn_u32(vld1q_u32(row2 + x + 4))); 00676 00677 uint16x8_t v_2r1 = vaddq_u16(v_r1, v_r1), v_4r1 = vaddq_u16(v_2r1, v_2r1); 00678 uint16x8_t v_dst00 = vaddq_u16(vaddq_u16(v_r0, v_r2), vaddq_u16(v_2r1, v_4r1)); 00679 uint16x8_t v_dst10 = vshlq_n_u16(vaddq_u16(v_r1, v_r2), 2); 00680 00681 v_r0 = vcombine_u16(vqmovn_u32(vld1q_u32(row0 + x + 8)), vqmovn_u32(vld1q_u32(row0 + x + 12))); 00682 v_r1 = vcombine_u16(vqmovn_u32(vld1q_u32(row1 + x + 8)), vqmovn_u32(vld1q_u32(row1 + x + 12))); 00683 v_r2 = vcombine_u16(vqmovn_u32(vld1q_u32(row2 + x + 8)), vqmovn_u32(vld1q_u32(row2 + x + 12))); 00684 00685 v_2r1 = vaddq_u16(v_r1, v_r1), v_4r1 = vaddq_u16(v_2r1, v_2r1); 00686 uint16x8_t v_dst01 = vaddq_u16(vaddq_u16(v_r0, v_r2), vaddq_u16(v_2r1, v_4r1)); 00687 uint16x8_t v_dst11 = vshlq_n_u16(vaddq_u16(v_r1, v_r2), 2); 00688 00689 vst1q_u8(dst0 + x, vcombine_u8(vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst00, v_delta), 6)), 00690 vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst01, v_delta), 6)))); 00691 vst1q_u8(dst1 + x, vcombine_u8(vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst10, v_delta), 6)), 00692 vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst11, v_delta), 6)))); 00693 } 00694 00695 for( ; x <= width - 8; x += 8 ) 00696 { 00697 uint16x8_t v_r0 = vcombine_u16(vqmovn_u32(vld1q_u32(row0 + x)), vqmovn_u32(vld1q_u32(row0 + x + 4))); 00698 uint16x8_t v_r1 = vcombine_u16(vqmovn_u32(vld1q_u32(row1 + x)), vqmovn_u32(vld1q_u32(row1 + x + 4))); 00699 uint16x8_t v_r2 = vcombine_u16(vqmovn_u32(vld1q_u32(row2 + x)), vqmovn_u32(vld1q_u32(row2 + x + 4))); 00700 00701 uint16x8_t v_2r1 = vaddq_u16(v_r1, v_r1), v_4r1 = vaddq_u16(v_2r1, v_2r1); 00702 uint16x8_t v_dst0 = vaddq_u16(vaddq_u16(v_r0, v_r2), vaddq_u16(v_2r1, v_4r1)); 00703 uint16x8_t v_dst1 = vshlq_n_u16(vaddq_u16(v_r1, v_r2), 2); 00704 00705 vst1_u8(dst0 + x, vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst0, v_delta), 6))); 00706 vst1_u8(dst1 + x, vqmovn_u16(vshrq_n_u16(vaddq_u16(v_dst1, v_delta), 6))); 00707 } 00708 00709 return x; 00710 } 00711 }; 00712 00713 struct PyrUpVec_32s16u 00714 { 00715 int operator()(int** src, ushort** dst, int, int width) const 00716 { 00717 int x = 0; 00718 ushort *dst0 = dst[0], *dst1 = dst[1]; 00719 const uint *row0 = (uint *)src[0], *row1 = (uint *)src[1], *row2 = (uint *)src[2]; 00720 uint32x4_t v_delta = vdupq_n_u32(32); 00721 00722 for( ; x <= width - 8; x += 8 ) 00723 { 00724 uint32x4_t v_r0 = vld1q_u32(row0 + x), v_r1 = vld1q_u32(row1 + x), v_r2 = vld1q_u32(row2 + x); 00725 uint32x4_t v_2r1 = vshlq_n_u32(v_r1, 1), v_4r1 = vshlq_n_u32(v_r1, 2); 00726 uint32x4_t v_dst00 = vaddq_u32(vaddq_u32(v_r0, v_r2), vaddq_u32(v_2r1, v_4r1)); 00727 uint32x4_t v_dst10 = vshlq_n_u32(vaddq_u32(v_r1, v_r2), 2); 00728 00729 v_r0 = vld1q_u32(row0 + x + 4); 00730 v_r1 = vld1q_u32(row1 + x + 4); 00731 v_r2 = vld1q_u32(row2 + x + 4); 00732 v_2r1 = vshlq_n_u32(v_r1, 1); 00733 v_4r1 = vshlq_n_u32(v_r1, 2); 00734 uint32x4_t v_dst01 = vaddq_u32(vaddq_u32(v_r0, v_r2), vaddq_u32(v_2r1, v_4r1)); 00735 uint32x4_t v_dst11 = vshlq_n_u32(vaddq_u32(v_r1, v_r2), 2); 00736 00737 vst1q_u16(dst0 + x, vcombine_u16(vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst00, v_delta), 6)), 00738 vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst01, v_delta), 6)))); 00739 vst1q_u16(dst1 + x, vcombine_u16(vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst10, v_delta), 6)), 00740 vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst11, v_delta), 6)))); 00741 } 00742 00743 for( ; x <= width - 4; x += 4 ) 00744 { 00745 uint32x4_t v_r0 = vld1q_u32(row0 + x), v_r1 = vld1q_u32(row1 + x), v_r2 = vld1q_u32(row2 + x); 00746 uint32x4_t v_2r1 = vshlq_n_u32(v_r1, 1), v_4r1 = vshlq_n_u32(v_r1, 2); 00747 00748 uint32x4_t v_dst0 = vaddq_u32(vaddq_u32(v_r0, v_r2), vaddq_u32(v_2r1, v_4r1)); 00749 uint32x4_t v_dst1 = vshlq_n_u32(vaddq_u32(v_r1, v_r2), 2); 00750 00751 vst1_u16(dst0 + x, vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst0, v_delta), 6))); 00752 vst1_u16(dst1 + x, vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst1, v_delta), 6))); 00753 } 00754 00755 return x; 00756 } 00757 }; 00758 00759 struct PyrUpVec_32s16s 00760 { 00761 int operator()(int** src, short** dst, int, int width) const 00762 { 00763 int x = 0; 00764 short *dst0 = dst[0], *dst1 = dst[1]; 00765 const int *row0 = src[0], *row1 = src[1], *row2 = src[2]; 00766 int32x4_t v_delta = vdupq_n_s32(32); 00767 00768 for( ; x <= width - 8; x += 8 ) 00769 { 00770 int32x4_t v_r0 = vld1q_s32(row0 + x), v_r1 = vld1q_s32(row1 + x), v_r2 = vld1q_s32(row2 + x); 00771 int32x4_t v_2r1 = vshlq_n_s32(v_r1, 1), v_4r1 = vshlq_n_s32(v_r1, 2); 00772 int32x4_t v_dst00 = vaddq_s32(vaddq_s32(v_r0, v_r2), vaddq_s32(v_2r1, v_4r1)); 00773 int32x4_t v_dst10 = vshlq_n_s32(vaddq_s32(v_r1, v_r2), 2); 00774 00775 v_r0 = vld1q_s32(row0 + x + 4); 00776 v_r1 = vld1q_s32(row1 + x + 4); 00777 v_r2 = vld1q_s32(row2 + x + 4); 00778 v_2r1 = vshlq_n_s32(v_r1, 1); 00779 v_4r1 = vshlq_n_s32(v_r1, 2); 00780 int32x4_t v_dst01 = vaddq_s32(vaddq_s32(v_r0, v_r2), vaddq_s32(v_2r1, v_4r1)); 00781 int32x4_t v_dst11 = vshlq_n_s32(vaddq_s32(v_r1, v_r2), 2); 00782 00783 vst1q_s16(dst0 + x, vcombine_s16(vqmovn_s32(vshrq_n_s32(vaddq_s32(v_dst00, v_delta), 6)), 00784 vqmovn_s32(vshrq_n_s32(vaddq_s32(v_dst01, v_delta), 6)))); 00785 vst1q_s16(dst1 + x, vcombine_s16(vqmovn_s32(vshrq_n_s32(vaddq_s32(v_dst10, v_delta), 6)), 00786 vqmovn_s32(vshrq_n_s32(vaddq_s32(v_dst11, v_delta), 6)))); 00787 } 00788 00789 for( ; x <= width - 4; x += 4 ) 00790 { 00791 int32x4_t v_r0 = vld1q_s32(row0 + x), v_r1 = vld1q_s32(row1 + x), v_r2 = vld1q_s32(row2 + x); 00792 int32x4_t v_2r1 = vshlq_n_s32(v_r1, 1), v_4r1 = vshlq_n_s32(v_r1, 2); 00793 00794 int32x4_t v_dst0 = vaddq_s32(vaddq_s32(v_r0, v_r2), vaddq_s32(v_2r1, v_4r1)); 00795 int32x4_t v_dst1 = vshlq_n_s32(vaddq_s32(v_r1, v_r2), 2); 00796 00797 vst1_s16(dst0 + x, vqmovn_s32(vshrq_n_s32(vaddq_s32(v_dst0, v_delta), 6))); 00798 vst1_s16(dst1 + x, vqmovn_s32(vshrq_n_s32(vaddq_s32(v_dst1, v_delta), 6))); 00799 } 00800 00801 return x; 00802 } 00803 }; 00804 00805 struct PyrUpVec_32f 00806 { 00807 int operator()(float** src, float** dst, int, int width) const 00808 { 00809 int x = 0; 00810 const float *row0 = src[0], *row1 = src[1], *row2 = src[2]; 00811 float *dst0 = dst[0], *dst1 = dst[1]; 00812 float32x4_t v_6 = vdupq_n_f32(6.0f), v_scale = vdupq_n_f32(1.f/64.0f), v_scale4 = vmulq_n_f32(v_scale, 4.0f); 00813 00814 for( ; x <= width - 8; x += 8 ) 00815 { 00816 float32x4_t v_r0 = vld1q_f32(row0 + x); 00817 float32x4_t v_r1 = vld1q_f32(row1 + x); 00818 float32x4_t v_r2 = vld1q_f32(row2 + x); 00819 00820 vst1q_f32(dst1 + x, vmulq_f32(v_scale4, vaddq_f32(v_r1, v_r2))); 00821 vst1q_f32(dst0 + x, vmulq_f32(v_scale, vaddq_f32(vmlaq_f32(v_r0, v_6, v_r1), v_r2))); 00822 00823 v_r0 = vld1q_f32(row0 + x + 4); 00824 v_r1 = vld1q_f32(row1 + x + 4); 00825 v_r2 = vld1q_f32(row2 + x + 4); 00826 00827 vst1q_f32(dst1 + x + 4, vmulq_f32(v_scale4, vaddq_f32(v_r1, v_r2))); 00828 vst1q_f32(dst0 + x + 4, vmulq_f32(v_scale, vaddq_f32(vmlaq_f32(v_r0, v_6, v_r1), v_r2))); 00829 } 00830 00831 return x; 00832 } 00833 }; 00834 00835 #else 00836 00837 typedef PyrDownNoVec<int, uchar> PyrDownVec_32s8u; 00838 typedef PyrDownNoVec<int, ushort> PyrDownVec_32s16u; 00839 typedef PyrDownNoVec<int, short> PyrDownVec_32s16s; 00840 typedef PyrDownNoVec<float, float> PyrDownVec_32f; 00841 00842 typedef PyrUpNoVec<int, uchar> PyrUpVec_32s8u; 00843 typedef PyrUpNoVec<int, short> PyrUpVec_32s16s; 00844 typedef PyrUpNoVec<int, ushort> PyrUpVec_32s16u; 00845 typedef PyrUpNoVec<float, float> PyrUpVec_32f; 00846 00847 #endif 00848 00849 template<class CastOp, class VecOp> void 00850 pyrDown_( const Mat& _src, Mat& _dst, int borderType ) 00851 { 00852 const int PD_SZ = 5; 00853 typedef typename CastOp::type1 WT; 00854 typedef typename CastOp::rtype T; 00855 00856 CV_Assert( !_src.empty() ); 00857 Size ssize = _src.size(), dsize = _dst.size(); 00858 int cn = _src.channels(); 00859 int bufstep = (int)alignSize(dsize.width*cn, 16); 00860 AutoBuffer<WT> _buf(bufstep*PD_SZ + 16); 00861 WT* buf = alignPtr((WT*)_buf, 16); 00862 int tabL[CV_CN_MAX*(PD_SZ+2)], tabR[CV_CN_MAX*(PD_SZ+2)]; 00863 AutoBuffer<int> _tabM(dsize.width*cn); 00864 int* tabM = _tabM; 00865 WT* rows[PD_SZ]; 00866 CastOp castOp; 00867 VecOp vecOp; 00868 00869 CV_Assert( ssize.width > 0 && ssize.height > 0 && 00870 std::abs(dsize.width*2 - ssize.width) <= 2 && 00871 std::abs(dsize.height*2 - ssize.height) <= 2 ); 00872 int k, x, sy0 = -PD_SZ/2, sy = sy0, width0 = std::min((ssize.width-PD_SZ/2-1)/2 + 1, dsize.width); 00873 00874 for( x = 0; x <= PD_SZ+1; x++ ) 00875 { 00876 int sx0 = borderInterpolate(x - PD_SZ/2, ssize.width, borderType)*cn; 00877 int sx1 = borderInterpolate(x + width0*2 - PD_SZ/2, ssize.width, borderType)*cn; 00878 for( k = 0; k < cn; k++ ) 00879 { 00880 tabL[x*cn + k] = sx0 + k; 00881 tabR[x*cn + k] = sx1 + k; 00882 } 00883 } 00884 00885 ssize.width *= cn; 00886 dsize.width *= cn; 00887 width0 *= cn; 00888 00889 for( x = 0; x < dsize.width; x++ ) 00890 tabM[x] = (x/cn)*2*cn + x % cn; 00891 00892 for( int y = 0; y < dsize.height; y++ ) 00893 { 00894 T* dst = _dst.ptr<T>(y); 00895 WT *row0, *row1, *row2, *row3, *row4; 00896 00897 // fill the ring buffer (horizontal convolution and decimation) 00898 for( ; sy <= y*2 + 2; sy++ ) 00899 { 00900 WT* row = buf + ((sy - sy0) % PD_SZ)*bufstep; 00901 int _sy = borderInterpolate(sy, ssize.height, borderType); 00902 const T* src = _src.ptr<T>(_sy); 00903 int limit = cn; 00904 const int* tab = tabL; 00905 00906 for( x = 0;;) 00907 { 00908 for( ; x < limit; x++ ) 00909 { 00910 row[x] = src[tab[x+cn*2]]*6 + (src[tab[x+cn]] + src[tab[x+cn*3]])*4 + 00911 src[tab[x]] + src[tab[x+cn*4]]; 00912 } 00913 00914 if( x == dsize.width ) 00915 break; 00916 00917 if( cn == 1 ) 00918 { 00919 for( ; x < width0; x++ ) 00920 row[x] = src[x*2]*6 + (src[x*2 - 1] + src[x*2 + 1])*4 + 00921 src[x*2 - 2] + src[x*2 + 2]; 00922 } 00923 else if( cn == 3 ) 00924 { 00925 for( ; x < width0; x += 3 ) 00926 { 00927 const T* s = src + x*2; 00928 WT t0 = s[0]*6 + (s[-3] + s[3])*4 + s[-6] + s[6]; 00929 WT t1 = s[1]*6 + (s[-2] + s[4])*4 + s[-5] + s[7]; 00930 WT t2 = s[2]*6 + (s[-1] + s[5])*4 + s[-4] + s[8]; 00931 row[x] = t0; row[x+1] = t1; row[x+2] = t2; 00932 } 00933 } 00934 else if( cn == 4 ) 00935 { 00936 for( ; x < width0; x += 4 ) 00937 { 00938 const T* s = src + x*2; 00939 WT t0 = s[0]*6 + (s[-4] + s[4])*4 + s[-8] + s[8]; 00940 WT t1 = s[1]*6 + (s[-3] + s[5])*4 + s[-7] + s[9]; 00941 row[x] = t0; row[x+1] = t1; 00942 t0 = s[2]*6 + (s[-2] + s[6])*4 + s[-6] + s[10]; 00943 t1 = s[3]*6 + (s[-1] + s[7])*4 + s[-5] + s[11]; 00944 row[x+2] = t0; row[x+3] = t1; 00945 } 00946 } 00947 else 00948 { 00949 for( ; x < width0; x++ ) 00950 { 00951 int sx = tabM[x]; 00952 row[x] = src[sx]*6 + (src[sx - cn] + src[sx + cn])*4 + 00953 src[sx - cn*2] + src[sx + cn*2]; 00954 } 00955 } 00956 00957 limit = dsize.width; 00958 tab = tabR - x; 00959 } 00960 } 00961 00962 // do vertical convolution and decimation and write the result to the destination image 00963 for( k = 0; k < PD_SZ; k++ ) 00964 rows[k] = buf + ((y*2 - PD_SZ/2 + k - sy0) % PD_SZ)*bufstep; 00965 row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; row3 = rows[3]; row4 = rows[4]; 00966 00967 x = vecOp(rows, dst, (int)_dst.step, dsize.width); 00968 for( ; x < dsize.width; x++ ) 00969 dst[x] = castOp(row2[x]*6 + (row1[x] + row3[x])*4 + row0[x] + row4[x]); 00970 } 00971 } 00972 00973 00974 template<class CastOp, class VecOp> void 00975 pyrUp_( const Mat& _src, Mat& _dst, int) 00976 { 00977 const int PU_SZ = 3; 00978 typedef typename CastOp::type1 WT; 00979 typedef typename CastOp::rtype T; 00980 00981 Size ssize = _src.size(), dsize = _dst.size(); 00982 int cn = _src.channels(); 00983 int bufstep = (int)alignSize((dsize.width+1)*cn, 16); 00984 AutoBuffer<WT> _buf(bufstep*PU_SZ + 16); 00985 WT* buf = alignPtr((WT*)_buf, 16); 00986 AutoBuffer<int> _dtab(ssize.width*cn); 00987 int* dtab = _dtab; 00988 WT* rows[PU_SZ]; 00989 T* dsts[2]; 00990 CastOp castOp; 00991 VecOp vecOp; 00992 00993 CV_Assert( std::abs(dsize.width - ssize.width*2) == dsize.width % 2 && 00994 std::abs(dsize.height - ssize.height*2) == dsize.height % 2); 00995 int k, x, sy0 = -PU_SZ/2, sy = sy0; 00996 00997 ssize.width *= cn; 00998 dsize.width *= cn; 00999 01000 for( x = 0; x < ssize.width; x++ ) 01001 dtab[x] = (x/cn)*2*cn + x % cn; 01002 01003 for( int y = 0; y < ssize.height; y++ ) 01004 { 01005 T* dst0 = _dst.ptr<T>(y*2); 01006 T* dst1 = _dst.ptr<T>(std::min(y*2+1, dsize.height-1)); 01007 WT *row0, *row1, *row2; 01008 01009 // fill the ring buffer (horizontal convolution and decimation) 01010 for( ; sy <= y + 1; sy++ ) 01011 { 01012 WT* row = buf + ((sy - sy0) % PU_SZ)*bufstep; 01013 int _sy = borderInterpolate(sy*2, dsize.height, BORDER_REFLECT_101)/2; 01014 const T* src = _src.ptr<T>(_sy); 01015 01016 if( ssize.width == cn ) 01017 { 01018 for( x = 0; x < cn; x++ ) 01019 row[x] = row[x + cn] = src[x]*8; 01020 continue; 01021 } 01022 01023 for( x = 0; x < cn; x++ ) 01024 { 01025 int dx = dtab[x]; 01026 WT t0 = src[x]*6 + src[x + cn]*2; 01027 WT t1 = (src[x] + src[x + cn])*4; 01028 row[dx] = t0; row[dx + cn] = t1; 01029 dx = dtab[ssize.width - cn + x]; 01030 int sx = ssize.width - cn + x; 01031 t0 = src[sx - cn] + src[sx]*7; 01032 t1 = src[sx]*8; 01033 row[dx] = t0; row[dx + cn] = t1; 01034 } 01035 01036 for( x = cn; x < ssize.width - cn; x++ ) 01037 { 01038 int dx = dtab[x]; 01039 WT t0 = src[x-cn] + src[x]*6 + src[x+cn]; 01040 WT t1 = (src[x] + src[x+cn])*4; 01041 row[dx] = t0; 01042 row[dx+cn] = t1; 01043 } 01044 } 01045 01046 // do vertical convolution and decimation and write the result to the destination image 01047 for( k = 0; k < PU_SZ; k++ ) 01048 rows[k] = buf + ((y - PU_SZ/2 + k - sy0) % PU_SZ)*bufstep; 01049 row0 = rows[0]; row1 = rows[1]; row2 = rows[2]; 01050 dsts[0] = dst0; dsts[1] = dst1; 01051 01052 x = vecOp(rows, dsts, (int)_dst.step, dsize.width); 01053 for( ; x < dsize.width; x++ ) 01054 { 01055 T t1 = castOp((row1[x] + row2[x])*4); 01056 T t0 = castOp(row0[x] + row1[x]*6 + row2[x]); 01057 dst1[x] = t1; dst0[x] = t0; 01058 } 01059 } 01060 } 01061 01062 typedef void (*PyrFunc)(const Mat&, Mat&, int); 01063 01064 #ifdef HAVE_OPENCL 01065 01066 static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) 01067 { 01068 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 01069 01070 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; 01071 if (cn > 4 || (depth == CV_64F && !doubleSupport)) 01072 return false; 01073 01074 Size ssize = _src.size(); 01075 Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz; 01076 if (dsize.height < 2 || dsize.width < 2) 01077 return false; 01078 01079 CV_Assert( ssize.width > 0 && ssize.height > 0 && 01080 std::abs(dsize.width*2 - ssize.width) <= 2 && 01081 std::abs(dsize.height*2 - ssize.height) <= 2 ); 01082 01083 UMat src = _src.getUMat(); 01084 _dst.create( dsize, src.type() ); 01085 UMat dst = _dst.getUMat(); 01086 01087 int float_depth = depth == CV_64F ? CV_64F : CV_32F; 01088 const int local_size = 256; 01089 int kercn = 1; 01090 if (depth == CV_8U && float_depth == CV_32F && cn == 1 && ocl::Device::getDefault().isIntel()) 01091 kercn = 4; 01092 const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", 01093 "BORDER_REFLECT_101" }; 01094 char cvt[2][50]; 01095 String buildOptions = format( 01096 "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " 01097 "-D T1=%s -D cn=%d -D kercn=%d -D fdepth=%d -D %s -D LOCAL_SIZE=%d", 01098 ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, cn)), 01099 ocl::convertTypeStr(float_depth, depth, cn, cvt[0]), 01100 ocl::convertTypeStr(depth, float_depth, cn, cvt[1]), 01101 doubleSupport ? " -D DOUBLE_SUPPORT" : "", ocl::typeToStr(depth), 01102 cn, kercn, float_depth, borderMap[borderType], local_size 01103 ); 01104 ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions); 01105 if (k.empty()) 01106 return false; 01107 01108 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); 01109 01110 size_t localThreads[2] = { (size_t)local_size/kercn, 1 }; 01111 size_t globalThreads[2] = { ((size_t)src.cols + (kercn-1))/kercn, ((size_t)dst.rows + 1) / 2 }; 01112 return k.run(2, globalThreads, localThreads, false); 01113 } 01114 01115 static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType) 01116 { 01117 int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); 01118 01119 if (channels > 4 || borderType != BORDER_DEFAULT) 01120 return false; 01121 01122 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; 01123 if (depth == CV_64F && !doubleSupport) 01124 return false; 01125 01126 Size ssize = _src.size(); 01127 if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2))) 01128 return false; 01129 01130 UMat src = _src.getUMat(); 01131 Size dsize = Size(ssize.width * 2, ssize.height * 2); 01132 _dst.create( dsize, src.type() ); 01133 UMat dst = _dst.getUMat(); 01134 01135 int float_depth = depth == CV_64F ? CV_64F : CV_32F; 01136 const int local_size = 16; 01137 char cvt[2][50]; 01138 String buildOptions = format( 01139 "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " 01140 "-D T1=%s -D cn=%d -D LOCAL_SIZE=%d", 01141 ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), 01142 ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), 01143 ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), 01144 doubleSupport ? " -D DOUBLE_SUPPORT" : "", 01145 ocl::typeToStr(depth), channels, local_size 01146 ); 01147 size_t globalThreads[2] = { (size_t)dst.cols, (size_t)dst.rows }; 01148 size_t localThreads[2] = { (size_t)local_size, (size_t)local_size }; 01149 ocl::Kernel k; 01150 if (ocl::Device::getDefault().isIntel() && channels == 1) 01151 { 01152 k.create("pyrUp_unrolled", ocl::imgproc::pyr_up_oclsrc, buildOptions); 01153 globalThreads[0] = dst.cols/2; globalThreads[1] = dst.rows/2; 01154 } 01155 else 01156 k.create("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions); 01157 01158 if (k.empty()) 01159 return false; 01160 01161 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst)); 01162 return k.run(2, globalThreads, localThreads, false); 01163 } 01164 01165 #endif 01166 01167 } 01168 01169 #if defined(HAVE_IPP) 01170 namespace cv 01171 { 01172 static bool ipp_pyrdown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType ) 01173 { 01174 #if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK 01175 Size dsz = _dsz.area() == 0 ? Size((_src.cols() + 1)/2, (_src.rows() + 1)/2) : _dsz; 01176 bool isolated = (borderType & BORDER_ISOLATED) != 0; 01177 int borderTypeNI = borderType & ~BORDER_ISOLATED; 01178 01179 Mat src = _src.getMat(); 01180 _dst.create( dsz, src.type() ); 01181 Mat dst = _dst.getMat(); 01182 int depth = src.depth(); 01183 01184 01185 { 01186 bool isolated = (borderType & BORDER_ISOLATED) != 0; 01187 int borderTypeNI = borderType & ~BORDER_ISOLATED; 01188 if (borderTypeNI == BORDER_DEFAULT && (!src.isSubmatrix() || isolated) && dsz == Size(src.cols*2, src.rows*2)) 01189 { 01190 typedef IppStatus (CV_STDCALL * ippiPyrUp)(const void* pSrc, int srcStep, void* pDst, int dstStep, IppiSize srcRoi, Ipp8u* buffer); 01191 int type = src.type(); 01192 CV_SUPPRESS_DEPRECATED_START 01193 ippiPyrUp pyrUpFunc = type == CV_8UC1 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_8u_C1R : 01194 type == CV_8UC3 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_8u_C3R : 01195 type == CV_32FC1 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_32f_C1R : 01196 type == CV_32FC3 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_32f_C3R : 0; 01197 CV_SUPPRESS_DEPRECATED_END 01198 01199 if (pyrUpFunc) 01200 { 01201 int bufferSize; 01202 IppiSize srcRoi = { src.cols, src.rows }; 01203 IppDataType dataType = depth == CV_8U ? ipp8u : ipp32f; 01204 CV_SUPPRESS_DEPRECATED_START 01205 IppStatus ok = ippiPyrUpGetBufSize_Gauss5x5(srcRoi.width, dataType, src.channels(), &bufferSize); 01206 CV_SUPPRESS_DEPRECATED_END 01207 if (ok >= 0) 01208 { 01209 Ipp8u* buffer = ippsMalloc_8u(bufferSize); 01210 ok = pyrUpFunc(src.data, (int) src.step, dst.data, (int) dst.step, srcRoi, buffer); 01211 ippsFree(buffer); 01212 01213 if (ok >= 0) 01214 { 01215 CV_IMPL_ADD(CV_IMPL_IPP); 01216 return true; 01217 } 01218 } 01219 } 01220 } 01221 } 01222 #else 01223 CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(_dsz); CV_UNUSED(borderType); 01224 #endif 01225 return false; 01226 } 01227 } 01228 #endif 01229 01230 void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType ) 01231 { 01232 CV_Assert(borderType != BORDER_CONSTANT); 01233 01234 #ifdef HAVE_OPENCL 01235 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), 01236 ocl_pyrDown(_src, _dst, _dsz, borderType)) 01237 #endif 01238 01239 Mat src = _src.getMat(); 01240 Size dsz = _dsz.area() == 0 ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz; 01241 _dst.create( dsz, src.type() ); 01242 Mat dst = _dst.getMat(); 01243 int depth = src.depth(); 01244 01245 #ifdef HAVE_TEGRA_OPTIMIZATION 01246 if(borderType == BORDER_DEFAULT && tegra::useTegra() && tegra::pyrDown(src, dst)) 01247 return; 01248 #endif 01249 01250 #ifdef HAVE_IPP 01251 bool isolated = (borderType & BORDER_ISOLATED) != 0; 01252 int borderTypeNI = borderType & ~BORDER_ISOLATED; 01253 #endif 01254 CV_IPP_RUN(borderTypeNI == BORDER_DEFAULT && (!_src.isSubmatrix() || isolated) && dsz == Size((_src.cols() + 1)/2, (_src.rows() + 1)/2), 01255 ipp_pyrdown( _src, _dst, _dsz, borderType)); 01256 01257 01258 PyrFunc func = 0; 01259 if( depth == CV_8U ) 01260 func = pyrDown_<FixPtCast<uchar, 8>, PyrDownVec_32s8u>; 01261 else if( depth == CV_16S ) 01262 func = pyrDown_<FixPtCast<short, 8>, PyrDownVec_32s16s >; 01263 else if( depth == CV_16U ) 01264 func = pyrDown_<FixPtCast<ushort, 8>, PyrDownVec_32s16u >; 01265 else if( depth == CV_32F ) 01266 func = pyrDown_<FltCast<float, 8>, PyrDownVec_32f>; 01267 else if( depth == CV_64F ) 01268 func = pyrDown_<FltCast<double, 8>, PyrDownNoVec<double, double> >; 01269 else 01270 CV_Error( CV_StsUnsupportedFormat, "" ); 01271 01272 func( src, dst, borderType ); 01273 } 01274 01275 01276 #if defined(HAVE_IPP) 01277 namespace cv 01278 { 01279 static bool ipp_pyrup( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType ) 01280 { 01281 #if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK 01282 Size sz = _src.dims() <= 2 ? _src.size() : Size(); 01283 Size dsz = _dsz.area() == 0 ? Size(_src.cols()*2, _src.rows()*2) : _dsz; 01284 01285 Mat src = _src.getMat(); 01286 _dst.create( dsz, src.type() ); 01287 Mat dst = _dst.getMat(); 01288 int depth = src.depth(); 01289 01290 { 01291 bool isolated = (borderType & BORDER_ISOLATED) != 0; 01292 int borderTypeNI = borderType & ~BORDER_ISOLATED; 01293 if (borderTypeNI == BORDER_DEFAULT && (!src.isSubmatrix() || isolated) && dsz == Size(src.cols*2, src.rows*2)) 01294 { 01295 typedef IppStatus (CV_STDCALL * ippiPyrUp)(const void* pSrc, int srcStep, void* pDst, int dstStep, IppiSize srcRoi, Ipp8u* buffer); 01296 int type = src.type(); 01297 CV_SUPPRESS_DEPRECATED_START 01298 ippiPyrUp pyrUpFunc = type == CV_8UC1 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_8u_C1R : 01299 type == CV_8UC3 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_8u_C3R : 01300 type == CV_32FC1 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_32f_C1R : 01301 type == CV_32FC3 ? (ippiPyrUp) ippiPyrUp_Gauss5x5_32f_C3R : 0; 01302 CV_SUPPRESS_DEPRECATED_END 01303 01304 if (pyrUpFunc) 01305 { 01306 int bufferSize; 01307 IppiSize srcRoi = { src.cols, src.rows }; 01308 IppDataType dataType = depth == CV_8U ? ipp8u : ipp32f; 01309 CV_SUPPRESS_DEPRECATED_START 01310 IppStatus ok = ippiPyrUpGetBufSize_Gauss5x5(srcRoi.width, dataType, src.channels(), &bufferSize); 01311 CV_SUPPRESS_DEPRECATED_END 01312 if (ok >= 0) 01313 { 01314 Ipp8u* buffer = ippsMalloc_8u(bufferSize); 01315 ok = pyrUpFunc(src.data, (int) src.step, dst.data, (int) dst.step, srcRoi, buffer); 01316 ippsFree(buffer); 01317 01318 if (ok >= 0) 01319 { 01320 CV_IMPL_ADD(CV_IMPL_IPP); 01321 return true; 01322 } 01323 } 01324 } 01325 } 01326 } 01327 #else 01328 CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(_dsz); CV_UNUSED(borderType); 01329 #endif 01330 return false; 01331 } 01332 } 01333 #endif 01334 01335 void cv::pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType ) 01336 { 01337 CV_Assert(borderType == BORDER_DEFAULT); 01338 01339 #ifdef HAVE_OPENCL 01340 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), 01341 ocl_pyrUp(_src, _dst, _dsz, borderType)) 01342 #endif 01343 01344 01345 Mat src = _src.getMat(); 01346 Size dsz = _dsz.area() == 0 ? Size(src.cols*2, src.rows*2) : _dsz; 01347 _dst.create( dsz, src.type() ); 01348 Mat dst = _dst.getMat(); 01349 int depth = src.depth(); 01350 01351 #ifdef HAVE_TEGRA_OPTIMIZATION 01352 if(borderType == BORDER_DEFAULT && tegra::useTegra() && tegra::pyrUp(src, dst)) 01353 return; 01354 #endif 01355 01356 #ifdef HAVE_IPP 01357 bool isolated = (borderType & BORDER_ISOLATED) != 0; 01358 int borderTypeNI = borderType & ~BORDER_ISOLATED; 01359 #endif 01360 CV_IPP_RUN(borderTypeNI == BORDER_DEFAULT && (!_src.isSubmatrix() || isolated) && dsz == Size(_src.cols()*2, _src.rows()*2), 01361 ipp_pyrup( _src, _dst, _dsz, borderType)); 01362 01363 01364 PyrFunc func = 0; 01365 if( depth == CV_8U ) 01366 func = pyrUp_<FixPtCast<uchar, 6>, PyrUpVec_32s8u >; 01367 else if( depth == CV_16S ) 01368 func = pyrUp_<FixPtCast<short, 6>, PyrUpVec_32s16s >; 01369 else if( depth == CV_16U ) 01370 func = pyrUp_<FixPtCast<ushort, 6>, PyrUpVec_32s16u >; 01371 else if( depth == CV_32F ) 01372 func = pyrUp_<FltCast<float, 6>, PyrUpVec_32f >; 01373 else if( depth == CV_64F ) 01374 func = pyrUp_<FltCast<double, 6>, PyrUpNoVec<double, double> >; 01375 else 01376 CV_Error( CV_StsUnsupportedFormat, "" ); 01377 01378 func( src, dst, borderType ); 01379 } 01380 01381 01382 #ifdef HAVE_IPP 01383 namespace cv 01384 { 01385 static bool ipp_buildpyramid( InputArray _src, OutputArrayOfArrays _dst, int maxlevel, int borderType ) 01386 { 01387 #if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK 01388 Mat src = _src.getMat(); 01389 _dst.create( maxlevel + 1, 1, 0 ); 01390 _dst.getMatRef(0) = src; 01391 01392 int i=1; 01393 01394 { 01395 bool isolated = (borderType & BORDER_ISOLATED) != 0; 01396 int borderTypeNI = borderType & ~BORDER_ISOLATED; 01397 if (borderTypeNI == BORDER_DEFAULT && (!src.isSubmatrix() || isolated)) 01398 { 01399 typedef IppStatus (CV_STDCALL * ippiPyramidLayerDownInitAlloc)(void** ppState, IppiSize srcRoi, Ipp32f rate, void* pKernel, int kerSize, int mode); 01400 typedef IppStatus (CV_STDCALL * ippiPyramidLayerDown)(void* pSrc, int srcStep, IppiSize srcRoiSize, void* pDst, int dstStep, IppiSize dstRoiSize, void* pState); 01401 typedef IppStatus (CV_STDCALL * ippiPyramidLayerDownFree)(void* pState); 01402 01403 int type = src.type(); 01404 int depth = src.depth(); 01405 ippiPyramidLayerDownInitAlloc pyrInitAllocFunc = 0; 01406 ippiPyramidLayerDown pyrDownFunc = 0; 01407 ippiPyramidLayerDownFree pyrFreeFunc = 0; 01408 01409 if (type == CV_8UC1) 01410 { 01411 pyrInitAllocFunc = (ippiPyramidLayerDownInitAlloc) ippiPyramidLayerDownInitAlloc_8u_C1R; 01412 pyrDownFunc = (ippiPyramidLayerDown) ippiPyramidLayerDown_8u_C1R; 01413 pyrFreeFunc = (ippiPyramidLayerDownFree) ippiPyramidLayerDownFree_8u_C1R; 01414 } 01415 else if (type == CV_8UC3) 01416 { 01417 pyrInitAllocFunc = (ippiPyramidLayerDownInitAlloc) ippiPyramidLayerDownInitAlloc_8u_C3R; 01418 pyrDownFunc = (ippiPyramidLayerDown) ippiPyramidLayerDown_8u_C3R; 01419 pyrFreeFunc = (ippiPyramidLayerDownFree) ippiPyramidLayerDownFree_8u_C3R; 01420 } 01421 else if (type == CV_32FC1) 01422 { 01423 pyrInitAllocFunc = (ippiPyramidLayerDownInitAlloc) ippiPyramidLayerDownInitAlloc_32f_C1R; 01424 pyrDownFunc = (ippiPyramidLayerDown) ippiPyramidLayerDown_32f_C1R; 01425 pyrFreeFunc = (ippiPyramidLayerDownFree) ippiPyramidLayerDownFree_32f_C1R; 01426 } 01427 else if (type == CV_32FC3) 01428 { 01429 pyrInitAllocFunc = (ippiPyramidLayerDownInitAlloc) ippiPyramidLayerDownInitAlloc_32f_C3R; 01430 pyrDownFunc = (ippiPyramidLayerDown) ippiPyramidLayerDown_32f_C3R; 01431 pyrFreeFunc = (ippiPyramidLayerDownFree) ippiPyramidLayerDownFree_32f_C3R; 01432 } 01433 01434 if (pyrInitAllocFunc && pyrDownFunc && pyrFreeFunc) 01435 { 01436 float rate = 2.f; 01437 IppiSize srcRoi = { src.cols, src.rows }; 01438 IppiPyramid *gPyr; 01439 IppStatus ok = ippiPyramidInitAlloc(&gPyr, maxlevel + 1, srcRoi, rate); 01440 01441 Ipp16s iKernel[5] = { 1, 4, 6, 4, 1 }; 01442 Ipp32f fKernel[5] = { 1.f, 4.f, 6.f, 4.f, 1.f }; 01443 void* kernel = depth >= CV_32F ? (void*) fKernel : (void*) iKernel; 01444 01445 if (ok >= 0) ok = pyrInitAllocFunc((void**) &(gPyr->pState), srcRoi, rate, kernel, 5, IPPI_INTER_LINEAR); 01446 if (ok >= 0) 01447 { 01448 gPyr->pImage[0] = src.data; 01449 gPyr->pStep[0] = (int) src.step; 01450 gPyr->pRoi[0] = srcRoi; 01451 for( ; i <= maxlevel; i++ ) 01452 { 01453 IppiSize dstRoi; 01454 ok = ippiGetPyramidDownROI(gPyr->pRoi[i-1], &dstRoi, rate); 01455 Mat& dst = _dst.getMatRef(i); 01456 dst.create(Size(dstRoi.width, dstRoi.height), type); 01457 gPyr->pImage[i] = dst.data; 01458 gPyr->pStep[i] = (int) dst.step; 01459 gPyr->pRoi[i] = dstRoi; 01460 01461 if (ok >= 0) ok = pyrDownFunc(gPyr->pImage[i-1], gPyr->pStep[i-1], gPyr->pRoi[i-1], 01462 gPyr->pImage[i], gPyr->pStep[i], gPyr->pRoi[i], gPyr->pState); 01463 01464 if (ok < 0) 01465 { 01466 pyrFreeFunc(gPyr->pState); 01467 return false; 01468 } 01469 else 01470 { 01471 CV_IMPL_ADD(CV_IMPL_IPP); 01472 } 01473 } 01474 pyrFreeFunc(gPyr->pState); 01475 } 01476 else 01477 { 01478 ippiPyramidFree(gPyr); 01479 return false; 01480 } 01481 ippiPyramidFree(gPyr); 01482 } 01483 return true; 01484 } 01485 return false; 01486 } 01487 #else 01488 CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(maxlevel); CV_UNUSED(borderType); 01489 #endif 01490 return false; 01491 } 01492 } 01493 #endif 01494 01495 void cv::buildPyramid( InputArray _src, OutputArrayOfArrays _dst, int maxlevel, int borderType ) 01496 { 01497 CV_Assert(borderType != BORDER_CONSTANT); 01498 01499 if (_src.dims() <= 2 && _dst.isUMatVector()) 01500 { 01501 UMat src = _src.getUMat(); 01502 _dst.create( maxlevel + 1, 1, 0 ); 01503 _dst.getUMatRef(0) = src; 01504 for( int i = 1; i <= maxlevel; i++ ) 01505 pyrDown( _dst.getUMatRef(i-1), _dst.getUMatRef(i), Size(), borderType ); 01506 return; 01507 } 01508 01509 Mat src = _src.getMat(); 01510 _dst.create( maxlevel + 1, 1, 0 ); 01511 _dst.getMatRef(0) = src; 01512 01513 int i=1; 01514 01515 CV_IPP_RUN(((IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK) && ((borderType & ~BORDER_ISOLATED) == BORDER_DEFAULT && (!_src.isSubmatrix() || ((borderType & BORDER_ISOLATED) != 0)))), 01516 ipp_buildpyramid( _src, _dst, maxlevel, borderType)); 01517 01518 for( ; i <= maxlevel; i++ ) 01519 pyrDown( _dst.getMatRef(i-1), _dst.getMatRef(i), Size(), borderType ); 01520 } 01521 01522 CV_IMPL void cvPyrDown( const void* srcarr, void* dstarr, int _filter ) 01523 { 01524 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 01525 01526 CV_Assert( _filter == CV_GAUSSIAN_5x5 && src.type() == dst.type()); 01527 cv::pyrDown( src, dst, dst.size() ); 01528 } 01529 01530 CV_IMPL void cvPyrUp( const void* srcarr, void* dstarr, int _filter ) 01531 { 01532 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 01533 01534 CV_Assert( _filter == CV_GAUSSIAN_5x5 && src.type() == dst.type()); 01535 cv::pyrUp( src, dst, dst.size() ); 01536 } 01537 01538 01539 CV_IMPL void 01540 cvReleasePyramid( CvMat*** _pyramid, int extra_layers ) 01541 { 01542 if( !_pyramid ) 01543 CV_Error( CV_StsNullPtr, "" ); 01544 01545 if( *_pyramid ) 01546 for( int i = 0; i <= extra_layers; i++ ) 01547 cvReleaseMat( &(*_pyramid)[i] ); 01548 01549 cvFree( _pyramid ); 01550 } 01551 01552 01553 CV_IMPL CvMat** 01554 cvCreatePyramid( const CvArr* srcarr, int extra_layers, double rate, 01555 const CvSize* layer_sizes, CvArr* bufarr, 01556 int calc, int filter ) 01557 { 01558 const float eps = 0.1f; 01559 uchar* ptr = 0; 01560 01561 CvMat stub, *src = cvGetMat( srcarr, &stub ); 01562 01563 if( extra_layers < 0 ) 01564 CV_Error( CV_StsOutOfRange, "The number of extra layers must be non negative" ); 01565 01566 int i, layer_step, elem_size = CV_ELEM_SIZE(src->type); 01567 CvSize layer_size, size = cvGetMatSize(src); 01568 01569 if( bufarr ) 01570 { 01571 CvMat bstub, *buf; 01572 int bufsize = 0; 01573 01574 buf = cvGetMat( bufarr, &bstub ); 01575 bufsize = buf->rows*buf->cols*CV_ELEM_SIZE(buf->type); 01576 layer_size = size; 01577 for( i = 1; i <= extra_layers; i++ ) 01578 { 01579 if( !layer_sizes ) 01580 { 01581 layer_size.width = cvRound(layer_size.width*rate+eps); 01582 layer_size.height = cvRound(layer_size.height*rate+eps); 01583 } 01584 else 01585 layer_size = layer_sizes[i-1]; 01586 layer_step = layer_size.width*elem_size; 01587 bufsize -= layer_step*layer_size.height; 01588 } 01589 01590 if( bufsize < 0 ) 01591 CV_Error( CV_StsOutOfRange, "The buffer is too small to fit the pyramid" ); 01592 ptr = buf->data.ptr; 01593 } 01594 01595 CvMat** pyramid = (CvMat**)cvAlloc( (extra_layers+1)*sizeof(pyramid[0]) ); 01596 memset( pyramid, 0, (extra_layers+1)*sizeof(pyramid[0]) ); 01597 01598 pyramid[0] = cvCreateMatHeader( size.height, size.width, src->type ); 01599 cvSetData( pyramid[0], src->data.ptr, src->step ); 01600 layer_size = size; 01601 01602 for( i = 1; i <= extra_layers; i++ ) 01603 { 01604 if( !layer_sizes ) 01605 { 01606 layer_size.width = cvRound(layer_size.width*rate + eps); 01607 layer_size.height = cvRound(layer_size.height*rate + eps); 01608 } 01609 else 01610 layer_size = layer_sizes[i]; 01611 01612 if( bufarr ) 01613 { 01614 pyramid[i] = cvCreateMatHeader( layer_size.height, layer_size.width, src->type ); 01615 layer_step = layer_size.width*elem_size; 01616 cvSetData( pyramid[i], ptr, layer_step ); 01617 ptr += layer_step*layer_size.height; 01618 } 01619 else 01620 pyramid[i] = cvCreateMat( layer_size.height, layer_size.width, src->type ); 01621 01622 if( calc ) 01623 cvPyrDown( pyramid[i-1], pyramid[i], filter ); 01624 //cvResize( pyramid[i-1], pyramid[i], CV_INTER_LINEAR ); 01625 } 01626 01627 return pyramid; 01628 } 01629 01630 /* End of file. */ 01631
Generated on Tue Jul 12 2022 14:47:32 by
1.7.2
