Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
accum.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 / 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #include "precomp.hpp" 00045 #include "opencl_kernels_imgproc.hpp" 00046 00047 namespace cv 00048 { 00049 00050 template <typename T, typename AT> 00051 struct Acc_SIMD 00052 { 00053 int operator() (const T *, AT *, const uchar *, int, int) const 00054 { 00055 return 0; 00056 } 00057 }; 00058 00059 template <typename T, typename AT> 00060 struct AccSqr_SIMD 00061 { 00062 int operator() (const T *, AT *, const uchar *, int, int) const 00063 { 00064 return 0; 00065 } 00066 }; 00067 00068 template <typename T, typename AT> 00069 struct AccProd_SIMD 00070 { 00071 int operator() (const T *, const T *, AT *, const uchar *, int, int) const 00072 { 00073 return 0; 00074 } 00075 }; 00076 00077 template <typename T, typename AT> 00078 struct AccW_SIMD 00079 { 00080 int operator() (const T *, AT *, const uchar *, int, int, AT) const 00081 { 00082 return 0; 00083 } 00084 }; 00085 00086 #if CV_NEON 00087 00088 template <> 00089 struct Acc_SIMD<uchar, float> 00090 { 00091 int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn) const 00092 { 00093 int x = 0; 00094 00095 if (!mask) 00096 { 00097 len *= cn; 00098 for ( ; x <= len - 16; x += 16) 00099 { 00100 uint8x16_t v_src = vld1q_u8(src + x); 00101 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src)); 00102 00103 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))))); 00104 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))))); 00105 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))))); 00106 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))))); 00107 } 00108 } 00109 else if (cn == 1) 00110 { 00111 uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0); 00112 00113 for ( ; x <= len - 16; x += 16) 00114 { 00115 uint8x16_t v_src = vandq_u8(vld1q_u8(src + x), veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0))); 00116 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src)); 00117 00118 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))))); 00119 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))))); 00120 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))))); 00121 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))))); 00122 } 00123 } 00124 00125 return x; 00126 } 00127 }; 00128 00129 template <> 00130 struct Acc_SIMD<ushort, float> 00131 { 00132 int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn) const 00133 { 00134 int x = 0; 00135 00136 if (!mask) 00137 { 00138 len *= cn; 00139 for ( ; x <= len - 8; x += 8) 00140 { 00141 uint16x8_t v_src = vld1q_u16(src + x); 00142 uint32x4_t v_src0 = vmovl_u16(vget_low_u16(v_src)), v_src1 = vmovl_u16(vget_high_u16(v_src)); 00143 00144 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0))); 00145 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1))); 00146 } 00147 } 00148 00149 return x; 00150 } 00151 }; 00152 00153 template <> 00154 struct Acc_SIMD<float, float> 00155 { 00156 int operator() (const float * src, float * dst, const uchar * mask, int len, int cn) const 00157 { 00158 int x = 0; 00159 00160 if (!mask) 00161 { 00162 len *= cn; 00163 for ( ; x <= len - 8; x += 8) 00164 { 00165 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vld1q_f32(src + x))); 00166 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vld1q_f32(src + x + 4))); 00167 } 00168 } 00169 00170 return x; 00171 } 00172 }; 00173 00174 template <> 00175 struct AccSqr_SIMD<uchar, float> 00176 { 00177 int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn) const 00178 { 00179 int x = 0; 00180 00181 if (!mask) 00182 { 00183 len *= cn; 00184 for ( ; x <= len - 16; x += 16) 00185 { 00186 uint8x16_t v_src = vld1q_u8(src + x); 00187 uint8x8_t v_src_0 = vget_low_u8(v_src), v_src_1 = vget_high_u8(v_src); 00188 uint16x8_t v_src0 = vmull_u8(v_src_0, v_src_0), v_src1 = vmull_u8(v_src_1, v_src_1); 00189 00190 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))))); 00191 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))))); 00192 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))))); 00193 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))))); 00194 } 00195 } 00196 else if (cn == 1) 00197 { 00198 uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0); 00199 00200 for ( ; x <= len - 16; x += 16) 00201 { 00202 uint8x16_t v_src = vandq_u8(vld1q_u8(src + x), veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0))); 00203 uint8x8_t v_src_0 = vget_low_u8(v_src), v_src_1 = vget_high_u8(v_src); 00204 uint16x8_t v_src0 = vmull_u8(v_src_0, v_src_0), v_src1 = vmull_u8(v_src_1, v_src_1); 00205 00206 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))))); 00207 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))))); 00208 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))))); 00209 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))))); 00210 } 00211 } 00212 00213 return x; 00214 } 00215 }; 00216 00217 template <> 00218 struct AccSqr_SIMD<ushort, float> 00219 { 00220 int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn) const 00221 { 00222 int x = 0; 00223 00224 if (!mask) 00225 { 00226 len *= cn; 00227 for ( ; x <= len - 8; x += 8) 00228 { 00229 uint16x8_t v_src = vld1q_u16(src + x); 00230 uint16x4_t v_src_0 = vget_low_u16(v_src), v_src_1 = vget_high_u16(v_src); 00231 uint32x4_t v_src0 = vmull_u16(v_src_0, v_src_0), v_src1 = vmull_u16(v_src_1, v_src_1); 00232 00233 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0))); 00234 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1))); 00235 } 00236 } 00237 else if (cn == 1) 00238 { 00239 uint8x8_t v_255 = vdup_n_u8(255), v_0 = vdup_n_u8(0); 00240 00241 for ( ; x <= len - 8; x += 8) 00242 { 00243 uint8x8_t v_mask_src = veor_u8(v_255, vceq_u8(vld1_u8(mask + x), v_0)); 00244 uint8x8x2_t v_mask_zp = vzip_u8(v_mask_src, v_mask_src); 00245 uint16x8_t v_mask = vreinterpretq_u16_u8(vcombine_u8(v_mask_zp.val[0], v_mask_zp.val[1])), 00246 v_src = vandq_u16(vld1q_u16(src + x), v_mask); 00247 00248 uint16x4_t v_src_0 = vget_low_u16(v_src), v_src_1 = vget_high_u16(v_src); 00249 uint32x4_t v_src0 = vmull_u16(v_src_0, v_src_0), v_src1 = vmull_u16(v_src_1, v_src_1); 00250 00251 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0))); 00252 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1))); 00253 } 00254 } 00255 00256 return x; 00257 } 00258 }; 00259 00260 template <> 00261 struct AccSqr_SIMD<float, float> 00262 { 00263 int operator() (const float * src, float * dst, const uchar * mask, int len, int cn) const 00264 { 00265 int x = 0; 00266 00267 if (!mask) 00268 { 00269 len *= cn; 00270 for ( ; x <= len - 8; x += 8) 00271 { 00272 float32x4_t v_src = vld1q_f32(src + x); 00273 vst1q_f32(dst + x, vmlaq_f32(vld1q_f32(dst + x), v_src, v_src)); 00274 00275 v_src = vld1q_f32(src + x + 4); 00276 vst1q_f32(dst + x + 4, vmlaq_f32(vld1q_f32(dst + x + 4), v_src, v_src)); 00277 } 00278 } 00279 00280 return x; 00281 } 00282 }; 00283 00284 template <> 00285 struct AccProd_SIMD<uchar, float> 00286 { 00287 int operator() (const uchar * src1, const uchar * src2, float * dst, const uchar * mask, int len, int cn) const 00288 { 00289 int x = 0; 00290 00291 if (!mask) 00292 { 00293 len *= cn; 00294 for ( ; x <= len - 16; x += 16) 00295 { 00296 uint8x16_t v_1src = vld1q_u8(src1 + x), v_2src = vld1q_u8(src2 + x); 00297 uint16x8_t v_src0 = vmull_u8(vget_low_u8(v_1src), vget_low_u8(v_2src)), 00298 v_src1 = vmull_u8(vget_high_u8(v_1src), vget_high_u8(v_2src)); 00299 00300 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))))); 00301 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))))); 00302 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))))); 00303 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))))); 00304 } 00305 } 00306 else if (cn == 1) 00307 { 00308 uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0); 00309 00310 for ( ; x <= len - 16; x += 16) 00311 { 00312 uint8x16_t v_mask = veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0)); 00313 uint8x16_t v_1src = vandq_u8(vld1q_u8(src1 + x), v_mask), v_2src = vandq_u8(vld1q_u8(src2 + x), v_mask); 00314 uint16x8_t v_src0 = vmull_u8(vget_low_u8(v_1src), vget_low_u8(v_2src)), 00315 v_src1 = vmull_u8(vget_high_u8(v_1src), vget_high_u8(v_2src)); 00316 00317 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))))); 00318 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))))); 00319 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))))); 00320 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))))); 00321 } 00322 } 00323 00324 return x; 00325 } 00326 }; 00327 00328 template <> 00329 struct AccProd_SIMD<ushort, float> 00330 { 00331 int operator() (const ushort * src1, const ushort * src2, float * dst, const uchar * mask, int len, int cn) const 00332 { 00333 int x = 0; 00334 00335 if (!mask) 00336 { 00337 len *= cn; 00338 for ( ; x <= len - 8; x += 8) 00339 { 00340 uint16x8_t v_1src = vld1q_u16(src1 + x), v_2src = vld1q_u16(src2 + x); 00341 uint32x4_t v_src0 = vmull_u16(vget_low_u16(v_1src), vget_low_u16(v_2src)), 00342 v_src1 = vmull_u16(vget_high_u16(v_1src), vget_high_u16(v_2src)); 00343 00344 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0))); 00345 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1))); 00346 } 00347 } 00348 else if (cn == 1) 00349 { 00350 uint8x8_t v_255 = vdup_n_u8(255), v_0 = vdup_n_u8(0); 00351 00352 for ( ; x <= len - 8; x += 8) 00353 { 00354 uint8x8_t v_mask_src = veor_u8(v_255, vceq_u8(vld1_u8(mask + x), v_0)); 00355 uint8x8x2_t v_mask_zp = vzip_u8(v_mask_src, v_mask_src); 00356 uint16x8_t v_mask = vreinterpretq_u16_u8(vcombine_u8(v_mask_zp.val[0], v_mask_zp.val[1])), 00357 v_1src = vandq_u16(vld1q_u16(src1 + x), v_mask), 00358 v_2src = vandq_u16(vld1q_u16(src2 + x), v_mask); 00359 00360 uint32x4_t v_src0 = vmull_u16(vget_low_u16(v_1src), vget_low_u16(v_2src)), 00361 v_src1 = vmull_u16(vget_high_u16(v_1src), vget_high_u16(v_2src)); 00362 00363 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0))); 00364 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1))); 00365 } 00366 } 00367 00368 return x; 00369 } 00370 }; 00371 00372 template <> 00373 struct AccProd_SIMD<float, float> 00374 { 00375 int operator() (const float * src1, const float * src2, float * dst, const uchar * mask, int len, int cn) const 00376 { 00377 int x = 0; 00378 00379 if (!mask) 00380 { 00381 len *= cn; 00382 for ( ; x <= len - 8; x += 8) 00383 { 00384 vst1q_f32(dst + x, vmlaq_f32(vld1q_f32(dst + x), vld1q_f32(src1 + x), vld1q_f32(src2 + x))); 00385 vst1q_f32(dst + x + 4, vmlaq_f32(vld1q_f32(dst + x + 4), vld1q_f32(src1 + x + 4), vld1q_f32(src2 + x + 4))); 00386 } 00387 } 00388 00389 return x; 00390 } 00391 }; 00392 00393 template <> 00394 struct AccW_SIMD<uchar, float> 00395 { 00396 int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn, float alpha) const 00397 { 00398 int x = 0; 00399 float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha); 00400 00401 if (!mask) 00402 { 00403 len *= cn; 00404 for ( ; x <= len - 16; x += 16) 00405 { 00406 uint8x16_t v_src = vld1q_u8(src + x); 00407 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src)); 00408 00409 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta), 00410 vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))), v_alpha)); 00411 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta), 00412 vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))), v_alpha)); 00413 vst1q_f32(dst + x + 8, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 8), v_beta), 00414 vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))), v_alpha)); 00415 vst1q_f32(dst + x + 12, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 12), v_beta), 00416 vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))), v_alpha)); 00417 } 00418 } 00419 00420 return x; 00421 } 00422 }; 00423 00424 template <> 00425 struct AccW_SIMD<ushort, float> 00426 { 00427 int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn, float alpha) const 00428 { 00429 int x = 0; 00430 float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha); 00431 00432 if (!mask) 00433 { 00434 len *= cn; 00435 for ( ; x <= len - 8; x += 8) 00436 { 00437 uint16x8_t v_src = vld1q_u16(src + x); 00438 uint32x4_t v_src0 = vmovl_u16(vget_low_u16(v_src)), v_src1 = vmovl_u16(vget_high_u16(v_src)); 00439 00440 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta), vcvtq_f32_u32(v_src0), v_alpha)); 00441 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta), vcvtq_f32_u32(v_src1), v_alpha)); 00442 } 00443 } 00444 00445 return x; 00446 } 00447 }; 00448 00449 template <> 00450 struct AccW_SIMD<float, float> 00451 { 00452 int operator() (const float * src, float * dst, const uchar * mask, int len, int cn, float alpha) const 00453 { 00454 int x = 0; 00455 float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha); 00456 00457 if (!mask) 00458 { 00459 len *= cn; 00460 for ( ; x <= len - 8; x += 8) 00461 { 00462 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta), vld1q_f32(src + x), v_alpha)); 00463 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta), vld1q_f32(src + x + 4), v_alpha)); 00464 } 00465 } 00466 00467 return x; 00468 } 00469 }; 00470 00471 #endif 00472 00473 template<typename T, typename AT> void 00474 acc_( const T* src, AT* dst, const uchar* mask, int len, int cn ) 00475 { 00476 int i = Acc_SIMD<T, AT>()(src, dst, mask, len, cn); 00477 00478 if( !mask ) 00479 { 00480 len *= cn; 00481 #if CV_ENABLE_UNROLLED 00482 for( ; i <= len - 4; i += 4 ) 00483 { 00484 AT t0, t1; 00485 t0 = src[i] + dst[i]; 00486 t1 = src[i+1] + dst[i+1]; 00487 dst[i] = t0; dst[i+1] = t1; 00488 00489 t0 = src[i+2] + dst[i+2]; 00490 t1 = src[i+3] + dst[i+3]; 00491 dst[i+2] = t0; dst[i+3] = t1; 00492 } 00493 #endif 00494 for( ; i < len; i++ ) 00495 dst[i] += src[i]; 00496 } 00497 else if( cn == 1 ) 00498 { 00499 for( ; i < len; i++ ) 00500 { 00501 if( mask[i] ) 00502 dst[i] += src[i]; 00503 } 00504 } 00505 else if( cn == 3 ) 00506 { 00507 for( ; i < len; i++, src += 3, dst += 3 ) 00508 { 00509 if( mask[i] ) 00510 { 00511 AT t0 = src[0] + dst[0]; 00512 AT t1 = src[1] + dst[1]; 00513 AT t2 = src[2] + dst[2]; 00514 00515 dst[0] = t0; dst[1] = t1; dst[2] = t2; 00516 } 00517 } 00518 } 00519 else 00520 { 00521 for( ; i < len; i++, src += cn, dst += cn ) 00522 if( mask[i] ) 00523 { 00524 for( int k = 0; k < cn; k++ ) 00525 dst[k] += src[k]; 00526 } 00527 } 00528 } 00529 00530 00531 template<typename T, typename AT> void 00532 accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn ) 00533 { 00534 int i = AccSqr_SIMD<T, AT>()(src, dst, mask, len, cn); 00535 00536 if( !mask ) 00537 { 00538 len *= cn; 00539 #if CV_ENABLE_UNROLLED 00540 for( ; i <= len - 4; i += 4 ) 00541 { 00542 AT t0, t1; 00543 t0 = (AT)src[i]*src[i] + dst[i]; 00544 t1 = (AT)src[i+1]*src[i+1] + dst[i+1]; 00545 dst[i] = t0; dst[i+1] = t1; 00546 00547 t0 = (AT)src[i+2]*src[i+2] + dst[i+2]; 00548 t1 = (AT)src[i+3]*src[i+3] + dst[i+3]; 00549 dst[i+2] = t0; dst[i+3] = t1; 00550 } 00551 #endif 00552 for( ; i < len; i++ ) 00553 dst[i] += (AT)src[i]*src[i]; 00554 } 00555 else if( cn == 1 ) 00556 { 00557 for( ; i < len; i++ ) 00558 { 00559 if( mask[i] ) 00560 dst[i] += (AT)src[i]*src[i]; 00561 } 00562 } 00563 else if( cn == 3 ) 00564 { 00565 for( ; i < len; i++, src += 3, dst += 3 ) 00566 { 00567 if( mask[i] ) 00568 { 00569 AT t0 = (AT)src[0]*src[0] + dst[0]; 00570 AT t1 = (AT)src[1]*src[1] + dst[1]; 00571 AT t2 = (AT)src[2]*src[2] + dst[2]; 00572 00573 dst[0] = t0; dst[1] = t1; dst[2] = t2; 00574 } 00575 } 00576 } 00577 else 00578 { 00579 for( ; i < len; i++, src += cn, dst += cn ) 00580 if( mask[i] ) 00581 { 00582 for( int k = 0; k < cn; k++ ) 00583 dst[k] += (AT)src[k]*src[k]; 00584 } 00585 } 00586 } 00587 00588 00589 template<typename T, typename AT> void 00590 accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int cn ) 00591 { 00592 int i = AccProd_SIMD<T, AT>()(src1, src2, dst, mask, len, cn); 00593 00594 if( !mask ) 00595 { 00596 len *= cn; 00597 #if CV_ENABLE_UNROLLED 00598 for( ; i <= len - 4; i += 4 ) 00599 { 00600 AT t0, t1; 00601 t0 = (AT)src1[i]*src2[i] + dst[i]; 00602 t1 = (AT)src1[i+1]*src2[i+1] + dst[i+1]; 00603 dst[i] = t0; dst[i+1] = t1; 00604 00605 t0 = (AT)src1[i+2]*src2[i+2] + dst[i+2]; 00606 t1 = (AT)src1[i+3]*src2[i+3] + dst[i+3]; 00607 dst[i+2] = t0; dst[i+3] = t1; 00608 } 00609 #endif 00610 for( ; i < len; i++ ) 00611 dst[i] += (AT)src1[i]*src2[i]; 00612 } 00613 else if( cn == 1 ) 00614 { 00615 for( ; i < len; i++ ) 00616 { 00617 if( mask[i] ) 00618 dst[i] += (AT)src1[i]*src2[i]; 00619 } 00620 } 00621 else if( cn == 3 ) 00622 { 00623 for( ; i < len; i++, src1 += 3, src2 += 3, dst += 3 ) 00624 { 00625 if( mask[i] ) 00626 { 00627 AT t0 = (AT)src1[0]*src2[0] + dst[0]; 00628 AT t1 = (AT)src1[1]*src2[1] + dst[1]; 00629 AT t2 = (AT)src1[2]*src2[2] + dst[2]; 00630 00631 dst[0] = t0; dst[1] = t1; dst[2] = t2; 00632 } 00633 } 00634 } 00635 else 00636 { 00637 for( ; i < len; i++, src1 += cn, src2 += cn, dst += cn ) 00638 if( mask[i] ) 00639 { 00640 for( int k = 0; k < cn; k++ ) 00641 dst[k] += (AT)src1[k]*src2[k]; 00642 } 00643 } 00644 } 00645 00646 00647 template<typename T, typename AT> void 00648 accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha ) 00649 { 00650 AT a = (AT)alpha, b = 1 - a; 00651 int i = AccW_SIMD<T, AT>()(src, dst, mask, len, cn, a); 00652 00653 if( !mask ) 00654 { 00655 len *= cn; 00656 #if CV_ENABLE_UNROLLED 00657 for( ; i <= len - 4; i += 4 ) 00658 { 00659 AT t0, t1; 00660 t0 = src[i]*a + dst[i]*b; 00661 t1 = src[i+1]*a + dst[i+1]*b; 00662 dst[i] = t0; dst[i+1] = t1; 00663 00664 t0 = src[i+2]*a + dst[i+2]*b; 00665 t1 = src[i+3]*a + dst[i+3]*b; 00666 dst[i+2] = t0; dst[i+3] = t1; 00667 } 00668 #endif 00669 for( ; i < len; i++ ) 00670 dst[i] = src[i]*a + dst[i]*b; 00671 } 00672 else if( cn == 1 ) 00673 { 00674 for( ; i < len; i++ ) 00675 { 00676 if( mask[i] ) 00677 dst[i] = src[i]*a + dst[i]*b; 00678 } 00679 } 00680 else if( cn == 3 ) 00681 { 00682 for( ; i < len; i++, src += 3, dst += 3 ) 00683 { 00684 if( mask[i] ) 00685 { 00686 AT t0 = src[0]*a + dst[0]*b; 00687 AT t1 = src[1]*a + dst[1]*b; 00688 AT t2 = src[2]*a + dst[2]*b; 00689 00690 dst[0] = t0; dst[1] = t1; dst[2] = t2; 00691 } 00692 } 00693 } 00694 else 00695 { 00696 for( ; i < len; i++, src += cn, dst += cn ) 00697 if( mask[i] ) 00698 { 00699 for( int k = 0; k < cn; k++ ) 00700 dst[k] = src[k]*a + dst[k]*b; 00701 } 00702 } 00703 } 00704 00705 00706 #define DEF_ACC_FUNCS(suffix, type, acctype) \ 00707 static void acc_##suffix(const type* src, acctype* dst, \ 00708 const uchar* mask, int len, int cn) \ 00709 { acc_(src, dst, mask, len, cn); } \ 00710 \ 00711 static void accSqr_##suffix(const type* src, acctype* dst, \ 00712 const uchar* mask, int len, int cn) \ 00713 { accSqr_(src, dst, mask, len, cn); } \ 00714 \ 00715 static void accProd_##suffix(const type* src1, const type* src2, \ 00716 acctype* dst, const uchar* mask, int len, int cn) \ 00717 { accProd_(src1, src2, dst, mask, len, cn); } \ 00718 \ 00719 static void accW_##suffix(const type* src, acctype* dst, \ 00720 const uchar* mask, int len, int cn, double alpha) \ 00721 { accW_(src, dst, mask, len, cn, alpha); } 00722 00723 00724 DEF_ACC_FUNCS(8u32f, uchar, float) 00725 DEF_ACC_FUNCS(8u64f, uchar, double) 00726 DEF_ACC_FUNCS(16u32f, ushort, float) 00727 DEF_ACC_FUNCS(16u64f, ushort, double) 00728 DEF_ACC_FUNCS(32f, float, float) 00729 DEF_ACC_FUNCS(32f64f, float, double) 00730 DEF_ACC_FUNCS(64f, double, double) 00731 00732 00733 typedef void (*AccFunc)(const uchar*, uchar*, const uchar*, int, int); 00734 typedef void (*AccProdFunc)(const uchar*, const uchar*, uchar*, const uchar*, int, int); 00735 typedef void (*AccWFunc)(const uchar*, uchar*, const uchar*, int, int, double); 00736 00737 static AccFunc accTab[] = 00738 { 00739 (AccFunc)acc_8u32f, (AccFunc)acc_8u64f, 00740 (AccFunc)acc_16u32f, (AccFunc)acc_16u64f, 00741 (AccFunc)acc_32f, (AccFunc)acc_32f64f, 00742 (AccFunc)acc_64f 00743 }; 00744 00745 static AccFunc accSqrTab[] = 00746 { 00747 (AccFunc)accSqr_8u32f, (AccFunc)accSqr_8u64f, 00748 (AccFunc)accSqr_16u32f, (AccFunc)accSqr_16u64f, 00749 (AccFunc)accSqr_32f, (AccFunc)accSqr_32f64f, 00750 (AccFunc)accSqr_64f 00751 }; 00752 00753 static AccProdFunc accProdTab[] = 00754 { 00755 (AccProdFunc)accProd_8u32f, (AccProdFunc)accProd_8u64f, 00756 (AccProdFunc)accProd_16u32f, (AccProdFunc)accProd_16u64f, 00757 (AccProdFunc)accProd_32f, (AccProdFunc)accProd_32f64f, 00758 (AccProdFunc)accProd_64f 00759 }; 00760 00761 static AccWFunc accWTab[] = 00762 { 00763 (AccWFunc)accW_8u32f, (AccWFunc)accW_8u64f, 00764 (AccWFunc)accW_16u32f, (AccWFunc)accW_16u64f, 00765 (AccWFunc)accW_32f, (AccWFunc)accW_32f64f, 00766 (AccWFunc)accW_64f 00767 }; 00768 00769 inline int getAccTabIdx(int sdepth, int ddepth) 00770 { 00771 return sdepth == CV_8U && ddepth == CV_32F ? 0 : 00772 sdepth == CV_8U && ddepth == CV_64F ? 1 : 00773 sdepth == CV_16U && ddepth == CV_32F ? 2 : 00774 sdepth == CV_16U && ddepth == CV_64F ? 3 : 00775 sdepth == CV_32F && ddepth == CV_32F ? 4 : 00776 sdepth == CV_32F && ddepth == CV_64F ? 5 : 00777 sdepth == CV_64F && ddepth == CV_64F ? 6 : -1; 00778 } 00779 00780 #ifdef HAVE_OPENCL 00781 00782 enum 00783 { 00784 ACCUMULATE = 0, 00785 ACCUMULATE_SQUARE = 1, 00786 ACCUMULATE_PRODUCT = 2, 00787 ACCUMULATE_WEIGHTED = 3 00788 }; 00789 00790 static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray _dst, double alpha, 00791 InputArray _mask, int op_type ) 00792 { 00793 CV_Assert(op_type == ACCUMULATE || op_type == ACCUMULATE_SQUARE || 00794 op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED); 00795 00796 const ocl::Device & dev = ocl::Device::getDefault(); 00797 bool haveMask = !_mask.empty(), doubleSupport = dev.doubleFPConfig() > 0; 00798 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), ddepth = _dst.depth(); 00799 int kercn = haveMask ? cn : ocl::predictOptimalVectorWidthMax(_src, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1; 00800 00801 if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) 00802 return false; 00803 00804 const char * const opMap[4] = { "ACCUMULATE", "ACCUMULATE_SQUARE", "ACCUMULATE_PRODUCT", 00805 "ACCUMULATE_WEIGHTED" }; 00806 00807 char cvt[40]; 00808 ocl::Kernel k("accumulate", ocl::imgproc::accumulate_oclsrc, 00809 format("-D %s%s -D srcT1=%s -D cn=%d -D dstT1=%s%s -D rowsPerWI=%d -D convertToDT=%s", 00810 opMap[op_type], haveMask ? " -D HAVE_MASK" : "", 00811 ocl::typeToStr(sdepth), kercn, ocl::typeToStr(ddepth), 00812 doubleSupport ? " -D DOUBLE_SUPPORT" : "", rowsPerWI, 00813 ocl::convertTypeStr(sdepth, ddepth, 1, cvt))); 00814 if (k.empty()) 00815 return false; 00816 00817 UMat src = _src.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat(), mask = _mask.getUMat(); 00818 00819 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 00820 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), 00821 dstarg = ocl::KernelArg::ReadWrite(dst, cn, kercn), 00822 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask); 00823 00824 int argidx = k.set(0, srcarg); 00825 if (op_type == ACCUMULATE_PRODUCT) 00826 argidx = k.set(argidx, src2arg); 00827 argidx = k.set(argidx, dstarg); 00828 if (op_type == ACCUMULATE_WEIGHTED) 00829 { 00830 if (ddepth == CV_32F) 00831 argidx = k.set(argidx, (float)alpha); 00832 else 00833 argidx = k.set(argidx, alpha); 00834 } 00835 if (haveMask) 00836 k.set(argidx, maskarg); 00837 00838 size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI }; 00839 return k.run(2, globalsize, NULL, false); 00840 } 00841 00842 #endif 00843 00844 } 00845 00846 #if defined(HAVE_IPP) 00847 namespace cv 00848 { 00849 static bool ipp_accumulate(InputArray _src, InputOutputArray _dst, InputArray _mask) 00850 { 00851 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); 00852 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype); 00853 00854 Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 00855 00856 if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous()))) 00857 { 00858 typedef IppStatus (CV_STDCALL * ippiAdd)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize); 00859 typedef IppStatus (CV_STDCALL * ippiAddMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, 00860 int srcDstStep, IppiSize roiSize); 00861 ippiAdd ippFunc = 0; 00862 ippiAddMask ippFuncMask = 0; 00863 00864 if (mask.empty()) 00865 { 00866 CV_SUPPRESS_DEPRECATED_START 00867 ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAdd)ippiAdd_8u32f_C1IR : 00868 sdepth == CV_16U && ddepth == CV_32F ? (ippiAdd)ippiAdd_16u32f_C1IR : 00869 sdepth == CV_32F && ddepth == CV_32F ? (ippiAdd)ippiAdd_32f_C1IR : 0; 00870 CV_SUPPRESS_DEPRECATED_END 00871 } 00872 else if (scn == 1) 00873 { 00874 ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddMask)ippiAdd_8u32f_C1IMR : 00875 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddMask)ippiAdd_16u32f_C1IMR : 00876 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddMask)ippiAdd_32f_C1IMR : 0; 00877 } 00878 00879 if (ippFunc || ippFuncMask) 00880 { 00881 IppStatus status = ippStsErr; 00882 00883 Size size = src.size(); 00884 int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step; 00885 if (src.isContinuous() && dst.isContinuous() && mask.isContinuous()) 00886 { 00887 srcstep = static_cast<int>(src.total() * src.elemSize()); 00888 dststep = static_cast<int>(dst.total() * dst.elemSize()); 00889 maskstep = static_cast<int>(mask.total() * mask.elemSize()); 00890 size.width = static_cast<int>(src.total()); 00891 size.height = 1; 00892 } 00893 size.width *= scn; 00894 00895 if (ippFunc) 00896 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height)); 00897 else if(ippFuncMask) 00898 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep, 00899 dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height)); 00900 00901 if (status >= 0) 00902 return true; 00903 } 00904 } 00905 return false; 00906 } 00907 } 00908 #endif 00909 00910 void cv::accumulate( InputArray _src, InputOutputArray _dst, InputArray _mask ) 00911 { 00912 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); 00913 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype); 00914 00915 CV_Assert( _src.sameSize(_dst) && dcn == scn ); 00916 CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) ); 00917 00918 #ifdef HAVE_OPENCL 00919 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), 00920 ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE)) 00921 #endif 00922 00923 CV_IPP_RUN((_src.dims() <= 2 || (_src.isContinuous() && _dst.isContinuous() && (_mask.empty() || _mask.isContinuous()))), 00924 ipp_accumulate(_src, _dst, _mask)); 00925 00926 Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 00927 00928 00929 int fidx = getAccTabIdx(sdepth, ddepth); 00930 AccFunc func = fidx >= 0 ? accTab[fidx] : 0; 00931 CV_Assert( func != 0 ); 00932 00933 const Mat* arrays[] = {&src, &dst, &mask, 0}; 00934 uchar* ptrs[3]; 00935 NAryMatIterator it(arrays, ptrs); 00936 int len = (int)it.size; 00937 00938 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00939 func(ptrs[0], ptrs[1], ptrs[2], len, scn); 00940 } 00941 00942 #if defined(HAVE_IPP) 00943 namespace cv 00944 { 00945 static bool ipp_accumulate_square(InputArray _src, InputOutputArray _dst, InputArray _mask) 00946 { 00947 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); 00948 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype); 00949 00950 Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 00951 00952 if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous()))) 00953 { 00954 typedef IppStatus (CV_STDCALL * ippiAddSquare)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize); 00955 typedef IppStatus (CV_STDCALL * ippiAddSquareMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, 00956 int srcDstStep, IppiSize roiSize); 00957 ippiAddSquare ippFunc = 0; 00958 ippiAddSquareMask ippFuncMask = 0; 00959 00960 if (mask.empty()) 00961 { 00962 ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_8u32f_C1IR : 00963 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_16u32f_C1IR : 00964 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_32f_C1IR : 0; 00965 } 00966 else if (scn == 1) 00967 { 00968 ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_8u32f_C1IMR : 00969 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_16u32f_C1IMR : 00970 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_32f_C1IMR : 0; 00971 } 00972 00973 if (ippFunc || ippFuncMask) 00974 { 00975 IppStatus status = ippStsErr; 00976 00977 Size size = src.size(); 00978 int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step; 00979 if (src.isContinuous() && dst.isContinuous() && mask.isContinuous()) 00980 { 00981 srcstep = static_cast<int>(src.total() * src.elemSize()); 00982 dststep = static_cast<int>(dst.total() * dst.elemSize()); 00983 maskstep = static_cast<int>(mask.total() * mask.elemSize()); 00984 size.width = static_cast<int>(src.total()); 00985 size.height = 1; 00986 } 00987 size.width *= scn; 00988 00989 if (ippFunc) 00990 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height)); 00991 else if(ippFuncMask) 00992 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep, 00993 dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height)); 00994 00995 if (status >= 0) 00996 return true; 00997 } 00998 } 00999 return false; 01000 } 01001 } 01002 #endif 01003 01004 void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _mask ) 01005 { 01006 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); 01007 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype); 01008 01009 CV_Assert( _src.sameSize(_dst) && dcn == scn ); 01010 CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) ); 01011 01012 #ifdef HAVE_OPENCL 01013 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), 01014 ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE_SQUARE)) 01015 #endif 01016 01017 CV_IPP_RUN((_src.dims() <= 2 || (_src.isContinuous() && _dst.isContinuous() && (_mask.empty() || _mask.isContinuous()))), 01018 ipp_accumulate_square(_src, _dst, _mask)); 01019 01020 Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 01021 01022 int fidx = getAccTabIdx(sdepth, ddepth); 01023 AccFunc func = fidx >= 0 ? accSqrTab[fidx] : 0; 01024 CV_Assert( func != 0 ); 01025 01026 const Mat* arrays[] = {&src, &dst, &mask, 0}; 01027 uchar* ptrs[3]; 01028 NAryMatIterator it(arrays, ptrs); 01029 int len = (int)it.size; 01030 01031 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01032 func(ptrs[0], ptrs[1], ptrs[2], len, scn); 01033 } 01034 01035 #if defined(HAVE_IPP) 01036 namespace cv 01037 { 01038 static bool ipp_accumulate_product(InputArray _src1, InputArray _src2, 01039 InputOutputArray _dst, InputArray _mask) 01040 { 01041 int stype = _src1.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); 01042 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype); 01043 01044 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 01045 01046 if (src1.dims <= 2 || (src1.isContinuous() && src2.isContinuous() && dst.isContinuous())) 01047 { 01048 typedef IppStatus (CV_STDCALL * ippiAddProduct)(const void * pSrc1, int src1Step, const void * pSrc2, 01049 int src2Step, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize); 01050 typedef IppStatus (CV_STDCALL * ippiAddProductMask)(const void * pSrc1, int src1Step, const void * pSrc2, int src2Step, 01051 const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize); 01052 ippiAddProduct ippFunc = 0; 01053 ippiAddProductMask ippFuncMask = 0; 01054 01055 if (mask.empty()) 01056 { 01057 ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_8u32f_C1IR : 01058 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_16u32f_C1IR : 01059 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_32f_C1IR : 0; 01060 } 01061 else if (scn == 1) 01062 { 01063 ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_8u32f_C1IMR : 01064 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_16u32f_C1IMR : 01065 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_32f_C1IMR : 0; 01066 } 01067 01068 if (ippFunc || ippFuncMask) 01069 { 01070 IppStatus status = ippStsErr; 01071 01072 Size size = src1.size(); 01073 int src1step = (int)src1.step, src2step = (int)src2.step, dststep = (int)dst.step, maskstep = (int)mask.step; 01074 if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous() && mask.isContinuous()) 01075 { 01076 src1step = static_cast<int>(src1.total() * src1.elemSize()); 01077 src2step = static_cast<int>(src2.total() * src2.elemSize()); 01078 dststep = static_cast<int>(dst.total() * dst.elemSize()); 01079 maskstep = static_cast<int>(mask.total() * mask.elemSize()); 01080 size.width = static_cast<int>(src1.total()); 01081 size.height = 1; 01082 } 01083 size.width *= scn; 01084 01085 if (ippFunc) 01086 status = ippFunc(src1.ptr(), src1step, src2.ptr(), src2step, dst.ptr<Ipp32f>(), 01087 dststep, ippiSize(size.width, size.height)); 01088 else if(ippFuncMask) 01089 status = ippFuncMask(src1.ptr(), src1step, src2.ptr(), src2step, mask.ptr<Ipp8u>(), maskstep, 01090 dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height)); 01091 01092 if (status >= 0) 01093 return true; 01094 } 01095 } 01096 return false; 01097 } 01098 } 01099 #endif 01100 01101 01102 01103 void cv::accumulateProduct( InputArray _src1, InputArray _src2, 01104 InputOutputArray _dst, InputArray _mask ) 01105 { 01106 int stype = _src1.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); 01107 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype); 01108 01109 CV_Assert( _src1.sameSize(_src2) && stype == _src2.type() ); 01110 CV_Assert( _src1.sameSize(_dst) && dcn == scn ); 01111 CV_Assert( _mask.empty() || (_src1.sameSize(_mask) && _mask.type() == CV_8U) ); 01112 01113 #ifdef HAVE_OPENCL 01114 CV_OCL_RUN(_src1.dims() <= 2 && _dst.isUMat(), 01115 ocl_accumulate(_src1, _src2, _dst, 0.0, _mask, ACCUMULATE_PRODUCT)) 01116 #endif 01117 01118 CV_IPP_RUN( (_src1.dims() <= 2 || (_src1.isContinuous() && _src2.isContinuous() && _dst.isContinuous())), 01119 ipp_accumulate_product(_src1, _src2, _dst, _mask)); 01120 01121 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 01122 01123 int fidx = getAccTabIdx(sdepth, ddepth); 01124 AccProdFunc func = fidx >= 0 ? accProdTab[fidx] : 0; 01125 CV_Assert( func != 0 ); 01126 01127 const Mat* arrays[] = {&src1, &src2, &dst, &mask, 0}; 01128 uchar* ptrs[4]; 01129 NAryMatIterator it(arrays, ptrs); 01130 int len = (int)it.size; 01131 01132 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01133 func(ptrs[0], ptrs[1], ptrs[2], ptrs[3], len, scn); 01134 } 01135 01136 #if defined(HAVE_IPP) 01137 namespace cv 01138 { 01139 static bool ipp_accumulate_weighted( InputArray _src, InputOutputArray _dst, 01140 double alpha, InputArray _mask ) 01141 { 01142 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); 01143 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype); 01144 01145 Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 01146 01147 if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && mask.isContinuous())) 01148 { 01149 typedef IppStatus (CV_STDCALL * ippiAddWeighted)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, 01150 IppiSize roiSize, Ipp32f alpha); 01151 typedef IppStatus (CV_STDCALL * ippiAddWeightedMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, 01152 int maskStep, Ipp32f * pSrcDst, 01153 int srcDstStep, IppiSize roiSize, Ipp32f alpha); 01154 ippiAddWeighted ippFunc = 0; 01155 ippiAddWeightedMask ippFuncMask = 0; 01156 01157 if (mask.empty()) 01158 { 01159 ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_8u32f_C1IR : 01160 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_16u32f_C1IR : 01161 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_32f_C1IR : 0; 01162 } 01163 else if (scn == 1) 01164 { 01165 ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_8u32f_C1IMR : 01166 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_16u32f_C1IMR : 01167 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_32f_C1IMR : 0; 01168 } 01169 01170 if (ippFunc || ippFuncMask) 01171 { 01172 IppStatus status = ippStsErr; 01173 01174 Size size = src.size(); 01175 int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step; 01176 if (src.isContinuous() && dst.isContinuous() && mask.isContinuous()) 01177 { 01178 srcstep = static_cast<int>(src.total() * src.elemSize()); 01179 dststep = static_cast<int>(dst.total() * dst.elemSize()); 01180 maskstep = static_cast<int>(mask.total() * mask.elemSize()); 01181 size.width = static_cast<int>((int)src.total()); 01182 size.height = 1; 01183 } 01184 size.width *= scn; 01185 01186 if (ippFunc) 01187 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height), (Ipp32f)alpha); 01188 else if(ippFuncMask) 01189 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep, 01190 dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height), (Ipp32f)alpha); 01191 01192 if (status >= 0) 01193 return true; 01194 } 01195 } 01196 return false; 01197 } 01198 } 01199 #endif 01200 01201 void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst, 01202 double alpha, InputArray _mask ) 01203 { 01204 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype); 01205 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype); 01206 01207 CV_Assert( _src.sameSize(_dst) && dcn == scn ); 01208 CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) ); 01209 01210 #ifdef HAVE_OPENCL 01211 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), 01212 ocl_accumulate(_src, noArray(), _dst, alpha, _mask, ACCUMULATE_WEIGHTED)) 01213 #endif 01214 01215 CV_IPP_RUN((_src.dims() <= 2 || (_src.isContinuous() && _dst.isContinuous() && _mask.isContinuous())), ipp_accumulate_weighted(_src, _dst, alpha, _mask)); 01216 01217 01218 Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 01219 01220 01221 int fidx = getAccTabIdx(sdepth, ddepth); 01222 AccWFunc func = fidx >= 0 ? accWTab[fidx] : 0; 01223 CV_Assert( func != 0 ); 01224 01225 const Mat* arrays[] = {&src, &dst, &mask, 0}; 01226 uchar* ptrs[3]; 01227 NAryMatIterator it(arrays, ptrs); 01228 int len = (int)it.size; 01229 01230 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01231 func(ptrs[0], ptrs[1], ptrs[2], len, scn, alpha); 01232 } 01233 01234 01235 CV_IMPL void 01236 cvAcc( const void* arr, void* sumarr, const void* maskarr ) 01237 { 01238 cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask; 01239 if( maskarr ) 01240 mask = cv::cvarrToMat(maskarr); 01241 cv::accumulate( src, dst, mask ); 01242 } 01243 01244 CV_IMPL void 01245 cvSquareAcc( const void* arr, void* sumarr, const void* maskarr ) 01246 { 01247 cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask; 01248 if( maskarr ) 01249 mask = cv::cvarrToMat(maskarr); 01250 cv::accumulateSquare( src, dst, mask ); 01251 } 01252 01253 CV_IMPL void 01254 cvMultiplyAcc( const void* arr1, const void* arr2, 01255 void* sumarr, const void* maskarr ) 01256 { 01257 cv::Mat src1 = cv::cvarrToMat(arr1), src2 = cv::cvarrToMat(arr2); 01258 cv::Mat dst = cv::cvarrToMat(sumarr), mask; 01259 if( maskarr ) 01260 mask = cv::cvarrToMat(maskarr); 01261 cv::accumulateProduct( src1, src2, dst, mask ); 01262 } 01263 01264 CV_IMPL void 01265 cvRunningAvg( const void* arr, void* sumarr, double alpha, const void* maskarr ) 01266 { 01267 cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask; 01268 if( maskarr ) 01269 mask = cv::cvarrToMat(maskarr); 01270 cv::accumulateWeighted( src, dst, alpha, mask ); 01271 } 01272 01273 /* End of file. */ 01274
Generated on Tue Jul 12 2022 14:45:56 by
1.7.2
