Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers accum.cpp Source File

accum.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Copyright (C) 2014, Itseez Inc., all rights reserved.
00016 // Third party copyrights are property of their respective owners.
00017 /
00018 // Redistribution and use in source and binary forms, with or without modification,
00019 // are permitted provided that the following conditions are met:
00020 //
00021 //   * Redistribution's of source code must retain the above copyright notice,
00022 //     this list of conditions and the following disclaimer.
00023 //
00024 //   * Redistribution's in binary form must reproduce the above copyright notice,
00025 //     this list of conditions and the following disclaimer in the documentation
00026 //     and/or other materials provided with the distribution.
00027 //
00028 //   * The name of the copyright holders may not be used to endorse or promote products
00029 //     derived from this software without specific prior written permission.
00030 //
00031 // This software is provided by the copyright holders and contributors "as is" and
00032 // any express or implied warranties, including, but not limited to, the implied
00033 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00034 // In no event shall the Intel Corporation or contributors be liable for any direct,
00035 // indirect, incidental, special, exemplary, or consequential damages
00036 // (including, but not limited to, procurement of substitute goods or services;
00037 // loss of use, data, or profits; or business interruption) however caused
00038 // and on any theory of liability, whether in contract, strict liability,
00039 // or tort (including negligence or otherwise) arising in any way out of
00040 // the use of this software, even if advised of the possibility of such damage.
00041 //
00042 //M*/
00043 
00044 #include "precomp.hpp"
00045 #include "opencl_kernels_imgproc.hpp"
00046 
00047 namespace cv
00048 {
00049 
00050 template <typename T, typename AT>
00051 struct Acc_SIMD
00052 {
00053     int operator() (const T *, AT *, const uchar *, int, int) const
00054     {
00055         return 0;
00056     }
00057 };
00058 
00059 template <typename T, typename AT>
00060 struct AccSqr_SIMD
00061 {
00062     int operator() (const T *, AT *, const uchar *, int, int) const
00063     {
00064         return 0;
00065     }
00066 };
00067 
00068 template <typename T, typename AT>
00069 struct AccProd_SIMD
00070 {
00071     int operator() (const T *, const T *, AT *, const uchar *, int, int) const
00072     {
00073         return 0;
00074     }
00075 };
00076 
00077 template <typename T, typename AT>
00078 struct AccW_SIMD
00079 {
00080     int operator() (const T *, AT *, const uchar *, int, int, AT) const
00081     {
00082         return 0;
00083     }
00084 };
00085 
00086 #if CV_NEON
00087 
00088 template <>
00089 struct Acc_SIMD<uchar, float>
00090 {
00091     int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn) const
00092     {
00093         int x = 0;
00094 
00095         if (!mask)
00096         {
00097             len *= cn;
00098             for ( ; x <= len - 16; x += 16)
00099             {
00100                 uint8x16_t v_src = vld1q_u8(src + x);
00101                 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src));
00102 
00103                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00104                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00105                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00106                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00107             }
00108         }
00109         else if (cn == 1)
00110         {
00111             uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0);
00112 
00113             for ( ; x <= len - 16; x += 16)
00114             {
00115                 uint8x16_t v_src = vandq_u8(vld1q_u8(src + x), veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0)));
00116                 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src));
00117 
00118                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00119                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00120                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00121                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00122             }
00123         }
00124 
00125         return x;
00126     }
00127 };
00128 
00129 template <>
00130 struct Acc_SIMD<ushort, float>
00131 {
00132     int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn) const
00133     {
00134         int x = 0;
00135 
00136         if (!mask)
00137         {
00138             len *= cn;
00139             for ( ; x <= len - 8; x += 8)
00140             {
00141                 uint16x8_t v_src = vld1q_u16(src + x);
00142                 uint32x4_t v_src0 = vmovl_u16(vget_low_u16(v_src)), v_src1 = vmovl_u16(vget_high_u16(v_src));
00143 
00144                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00145                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00146             }
00147         }
00148 
00149         return x;
00150     }
00151 };
00152 
00153 template <>
00154 struct Acc_SIMD<float, float>
00155 {
00156     int operator() (const float * src, float * dst, const uchar * mask, int len, int cn) const
00157     {
00158         int x = 0;
00159 
00160         if (!mask)
00161         {
00162             len *= cn;
00163             for ( ; x <= len - 8; x += 8)
00164             {
00165                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vld1q_f32(src + x)));
00166                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vld1q_f32(src + x + 4)));
00167             }
00168         }
00169 
00170         return x;
00171     }
00172 };
00173 
00174 template <>
00175 struct AccSqr_SIMD<uchar, float>
00176 {
00177     int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn) const
00178     {
00179         int x = 0;
00180 
00181         if (!mask)
00182         {
00183             len *= cn;
00184             for ( ; x <= len - 16; x += 16)
00185             {
00186                 uint8x16_t v_src = vld1q_u8(src + x);
00187                 uint8x8_t v_src_0 = vget_low_u8(v_src), v_src_1 = vget_high_u8(v_src);
00188                 uint16x8_t v_src0 = vmull_u8(v_src_0, v_src_0), v_src1 = vmull_u8(v_src_1, v_src_1);
00189 
00190                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00191                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00192                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00193                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00194             }
00195         }
00196         else if (cn == 1)
00197         {
00198             uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0);
00199 
00200             for ( ; x <= len - 16; x += 16)
00201             {
00202                 uint8x16_t v_src = vandq_u8(vld1q_u8(src + x), veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0)));
00203                 uint8x8_t v_src_0 = vget_low_u8(v_src), v_src_1 = vget_high_u8(v_src);
00204                 uint16x8_t v_src0 = vmull_u8(v_src_0, v_src_0), v_src1 = vmull_u8(v_src_1, v_src_1);
00205 
00206                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00207                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00208                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00209                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00210             }
00211         }
00212 
00213         return x;
00214     }
00215 };
00216 
00217 template <>
00218 struct AccSqr_SIMD<ushort, float>
00219 {
00220     int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn) const
00221     {
00222         int x = 0;
00223 
00224         if (!mask)
00225         {
00226             len *= cn;
00227             for ( ; x <= len - 8; x += 8)
00228             {
00229                 uint16x8_t v_src = vld1q_u16(src + x);
00230                 uint16x4_t v_src_0 = vget_low_u16(v_src), v_src_1 = vget_high_u16(v_src);
00231                 uint32x4_t v_src0 = vmull_u16(v_src_0, v_src_0), v_src1 = vmull_u16(v_src_1, v_src_1);
00232 
00233                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00234                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00235             }
00236         }
00237         else if (cn == 1)
00238         {
00239             uint8x8_t v_255 = vdup_n_u8(255), v_0 = vdup_n_u8(0);
00240 
00241             for ( ; x <= len - 8; x += 8)
00242             {
00243                 uint8x8_t v_mask_src = veor_u8(v_255, vceq_u8(vld1_u8(mask + x), v_0));
00244                 uint8x8x2_t v_mask_zp = vzip_u8(v_mask_src, v_mask_src);
00245                 uint16x8_t v_mask = vreinterpretq_u16_u8(vcombine_u8(v_mask_zp.val[0], v_mask_zp.val[1])),
00246                            v_src = vandq_u16(vld1q_u16(src + x), v_mask);
00247 
00248                 uint16x4_t v_src_0 = vget_low_u16(v_src), v_src_1 = vget_high_u16(v_src);
00249                 uint32x4_t v_src0 = vmull_u16(v_src_0, v_src_0), v_src1 = vmull_u16(v_src_1, v_src_1);
00250 
00251                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00252                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00253             }
00254         }
00255 
00256         return x;
00257     }
00258 };
00259 
00260 template <>
00261 struct AccSqr_SIMD<float, float>
00262 {
00263     int operator() (const float * src, float * dst, const uchar * mask, int len, int cn) const
00264     {
00265         int x = 0;
00266 
00267         if (!mask)
00268         {
00269             len *= cn;
00270             for ( ; x <= len - 8; x += 8)
00271             {
00272                 float32x4_t v_src = vld1q_f32(src + x);
00273                 vst1q_f32(dst + x, vmlaq_f32(vld1q_f32(dst + x), v_src, v_src));
00274 
00275                 v_src = vld1q_f32(src + x + 4);
00276                 vst1q_f32(dst + x + 4, vmlaq_f32(vld1q_f32(dst + x + 4), v_src, v_src));
00277             }
00278         }
00279 
00280         return x;
00281     }
00282 };
00283 
00284 template <>
00285 struct AccProd_SIMD<uchar, float>
00286 {
00287     int operator() (const uchar * src1, const uchar * src2, float * dst, const uchar * mask, int len, int cn) const
00288     {
00289         int x = 0;
00290 
00291         if (!mask)
00292         {
00293             len *= cn;
00294             for ( ; x <= len - 16; x += 16)
00295             {
00296                 uint8x16_t v_1src = vld1q_u8(src1 + x), v_2src = vld1q_u8(src2 + x);
00297                 uint16x8_t v_src0 = vmull_u8(vget_low_u8(v_1src), vget_low_u8(v_2src)),
00298                            v_src1 = vmull_u8(vget_high_u8(v_1src), vget_high_u8(v_2src));
00299 
00300                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00301                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00302                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00303                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00304             }
00305         }
00306         else if (cn == 1)
00307         {
00308             uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0);
00309 
00310             for ( ; x <= len - 16; x += 16)
00311             {
00312                 uint8x16_t v_mask = veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0));
00313                 uint8x16_t v_1src = vandq_u8(vld1q_u8(src1 + x), v_mask), v_2src = vandq_u8(vld1q_u8(src2 + x), v_mask);
00314                 uint16x8_t v_src0 = vmull_u8(vget_low_u8(v_1src), vget_low_u8(v_2src)),
00315                            v_src1 = vmull_u8(vget_high_u8(v_1src), vget_high_u8(v_2src));
00316 
00317                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00318                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00319                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00320                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00321             }
00322         }
00323 
00324         return x;
00325     }
00326 };
00327 
00328 template <>
00329 struct AccProd_SIMD<ushort, float>
00330 {
00331     int operator() (const ushort * src1, const ushort * src2, float * dst, const uchar * mask, int len, int cn) const
00332     {
00333         int x = 0;
00334 
00335         if (!mask)
00336         {
00337             len *= cn;
00338             for ( ; x <= len - 8; x += 8)
00339             {
00340                 uint16x8_t v_1src = vld1q_u16(src1 + x), v_2src = vld1q_u16(src2 + x);
00341                 uint32x4_t v_src0 = vmull_u16(vget_low_u16(v_1src), vget_low_u16(v_2src)),
00342                            v_src1 = vmull_u16(vget_high_u16(v_1src), vget_high_u16(v_2src));
00343 
00344                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00345                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00346             }
00347         }
00348         else if (cn == 1)
00349         {
00350             uint8x8_t v_255 = vdup_n_u8(255), v_0 = vdup_n_u8(0);
00351 
00352             for ( ; x <= len - 8; x += 8)
00353             {
00354                 uint8x8_t v_mask_src = veor_u8(v_255, vceq_u8(vld1_u8(mask + x), v_0));
00355                 uint8x8x2_t v_mask_zp = vzip_u8(v_mask_src, v_mask_src);
00356                 uint16x8_t v_mask = vreinterpretq_u16_u8(vcombine_u8(v_mask_zp.val[0], v_mask_zp.val[1])),
00357                            v_1src = vandq_u16(vld1q_u16(src1 + x), v_mask),
00358                            v_2src = vandq_u16(vld1q_u16(src2 + x), v_mask);
00359 
00360                 uint32x4_t v_src0 = vmull_u16(vget_low_u16(v_1src), vget_low_u16(v_2src)),
00361                            v_src1 = vmull_u16(vget_high_u16(v_1src), vget_high_u16(v_2src));
00362 
00363                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00364                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00365             }
00366         }
00367 
00368         return x;
00369     }
00370 };
00371 
00372 template <>
00373 struct AccProd_SIMD<float, float>
00374 {
00375     int operator() (const float * src1, const float * src2, float * dst, const uchar * mask, int len, int cn) const
00376     {
00377         int x = 0;
00378 
00379         if (!mask)
00380         {
00381             len *= cn;
00382             for ( ; x <= len - 8; x += 8)
00383             {
00384                 vst1q_f32(dst + x, vmlaq_f32(vld1q_f32(dst + x), vld1q_f32(src1 + x), vld1q_f32(src2 + x)));
00385                 vst1q_f32(dst + x + 4, vmlaq_f32(vld1q_f32(dst + x + 4), vld1q_f32(src1 + x + 4), vld1q_f32(src2 + x + 4)));
00386             }
00387         }
00388 
00389         return x;
00390     }
00391 };
00392 
00393 template <>
00394 struct AccW_SIMD<uchar, float>
00395 {
00396     int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn, float alpha) const
00397     {
00398         int x = 0;
00399         float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha);
00400 
00401         if (!mask)
00402         {
00403             len *= cn;
00404             for ( ; x <= len - 16; x += 16)
00405             {
00406                 uint8x16_t v_src = vld1q_u8(src + x);
00407                 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src));
00408 
00409                 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta),
00410                                              vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))), v_alpha));
00411                 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta),
00412                                              vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))), v_alpha));
00413                 vst1q_f32(dst + x + 8, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 8), v_beta),
00414                                                  vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))), v_alpha));
00415                 vst1q_f32(dst + x + 12, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 12), v_beta),
00416                                                   vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))), v_alpha));
00417             }
00418         }
00419 
00420         return x;
00421     }
00422 };
00423 
00424 template <>
00425 struct AccW_SIMD<ushort, float>
00426 {
00427     int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn, float alpha) const
00428     {
00429         int x = 0;
00430         float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha);
00431 
00432         if (!mask)
00433         {
00434             len *= cn;
00435             for ( ; x <= len - 8; x += 8)
00436             {
00437                 uint16x8_t v_src = vld1q_u16(src + x);
00438                 uint32x4_t v_src0 = vmovl_u16(vget_low_u16(v_src)), v_src1 = vmovl_u16(vget_high_u16(v_src));
00439 
00440                 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta), vcvtq_f32_u32(v_src0), v_alpha));
00441                 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta), vcvtq_f32_u32(v_src1), v_alpha));
00442             }
00443         }
00444 
00445         return x;
00446     }
00447 };
00448 
00449 template <>
00450 struct AccW_SIMD<float, float>
00451 {
00452     int operator() (const float * src, float * dst, const uchar * mask, int len, int cn, float alpha) const
00453     {
00454         int x = 0;
00455         float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha);
00456 
00457         if (!mask)
00458         {
00459             len *= cn;
00460             for ( ; x <= len - 8; x += 8)
00461             {
00462                 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta), vld1q_f32(src + x), v_alpha));
00463                 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta), vld1q_f32(src + x + 4), v_alpha));
00464             }
00465         }
00466 
00467         return x;
00468     }
00469 };
00470 
00471 #endif
00472 
00473 template<typename T, typename AT> void
00474 acc_( const T* src, AT* dst, const uchar* mask, int len, int cn )
00475 {
00476     int i = Acc_SIMD<T, AT>()(src, dst, mask, len, cn);
00477 
00478     if( !mask )
00479     {
00480         len *= cn;
00481         #if CV_ENABLE_UNROLLED
00482         for( ; i <= len - 4; i += 4 )
00483         {
00484             AT t0, t1;
00485             t0 = src[i] + dst[i];
00486             t1 = src[i+1] + dst[i+1];
00487             dst[i] = t0; dst[i+1] = t1;
00488 
00489             t0 = src[i+2] + dst[i+2];
00490             t1 = src[i+3] + dst[i+3];
00491             dst[i+2] = t0; dst[i+3] = t1;
00492         }
00493         #endif
00494         for( ; i < len; i++ )
00495             dst[i] += src[i];
00496     }
00497     else if( cn == 1 )
00498     {
00499         for( ; i < len; i++ )
00500         {
00501             if( mask[i] )
00502                 dst[i] += src[i];
00503         }
00504     }
00505     else if( cn == 3 )
00506     {
00507         for( ; i < len; i++, src += 3, dst += 3 )
00508         {
00509             if( mask[i] )
00510             {
00511                 AT t0 = src[0] + dst[0];
00512                 AT t1 = src[1] + dst[1];
00513                 AT t2 = src[2] + dst[2];
00514 
00515                 dst[0] = t0; dst[1] = t1; dst[2] = t2;
00516             }
00517         }
00518     }
00519     else
00520     {
00521         for( ; i < len; i++, src += cn, dst += cn )
00522             if( mask[i] )
00523             {
00524                 for( int k = 0; k < cn; k++ )
00525                     dst[k] += src[k];
00526             }
00527     }
00528 }
00529 
00530 
00531 template<typename T, typename AT> void
00532 accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn )
00533 {
00534     int i = AccSqr_SIMD<T, AT>()(src, dst, mask, len, cn);
00535 
00536     if( !mask )
00537     {
00538         len *= cn;
00539          #if CV_ENABLE_UNROLLED
00540         for( ; i <= len - 4; i += 4 )
00541         {
00542             AT t0, t1;
00543             t0 = (AT)src[i]*src[i] + dst[i];
00544             t1 = (AT)src[i+1]*src[i+1] + dst[i+1];
00545             dst[i] = t0; dst[i+1] = t1;
00546 
00547             t0 = (AT)src[i+2]*src[i+2] + dst[i+2];
00548             t1 = (AT)src[i+3]*src[i+3] + dst[i+3];
00549             dst[i+2] = t0; dst[i+3] = t1;
00550         }
00551         #endif
00552         for( ; i < len; i++ )
00553             dst[i] += (AT)src[i]*src[i];
00554     }
00555     else if( cn == 1 )
00556     {
00557         for( ; i < len; i++ )
00558         {
00559             if( mask[i] )
00560                 dst[i] += (AT)src[i]*src[i];
00561         }
00562     }
00563     else if( cn == 3 )
00564     {
00565         for( ; i < len; i++, src += 3, dst += 3 )
00566         {
00567             if( mask[i] )
00568             {
00569                 AT t0 = (AT)src[0]*src[0] + dst[0];
00570                 AT t1 = (AT)src[1]*src[1] + dst[1];
00571                 AT t2 = (AT)src[2]*src[2] + dst[2];
00572 
00573                 dst[0] = t0; dst[1] = t1; dst[2] = t2;
00574             }
00575         }
00576     }
00577     else
00578     {
00579         for( ; i < len; i++, src += cn, dst += cn )
00580             if( mask[i] )
00581             {
00582                 for( int k = 0; k < cn; k++ )
00583                     dst[k] += (AT)src[k]*src[k];
00584             }
00585     }
00586 }
00587 
00588 
00589 template<typename T, typename AT> void
00590 accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int cn )
00591 {
00592     int i = AccProd_SIMD<T, AT>()(src1, src2, dst, mask, len, cn);
00593 
00594     if( !mask )
00595     {
00596         len *= cn;
00597         #if CV_ENABLE_UNROLLED
00598         for( ; i <= len - 4; i += 4 )
00599         {
00600             AT t0, t1;
00601             t0 = (AT)src1[i]*src2[i] + dst[i];
00602             t1 = (AT)src1[i+1]*src2[i+1] + dst[i+1];
00603             dst[i] = t0; dst[i+1] = t1;
00604 
00605             t0 = (AT)src1[i+2]*src2[i+2] + dst[i+2];
00606             t1 = (AT)src1[i+3]*src2[i+3] + dst[i+3];
00607             dst[i+2] = t0; dst[i+3] = t1;
00608         }
00609         #endif
00610         for( ; i < len; i++ )
00611             dst[i] += (AT)src1[i]*src2[i];
00612     }
00613     else if( cn == 1 )
00614     {
00615         for( ; i < len; i++ )
00616         {
00617             if( mask[i] )
00618                 dst[i] += (AT)src1[i]*src2[i];
00619         }
00620     }
00621     else if( cn == 3 )
00622     {
00623         for( ; i < len; i++, src1 += 3, src2 += 3, dst += 3 )
00624         {
00625             if( mask[i] )
00626             {
00627                 AT t0 = (AT)src1[0]*src2[0] + dst[0];
00628                 AT t1 = (AT)src1[1]*src2[1] + dst[1];
00629                 AT t2 = (AT)src1[2]*src2[2] + dst[2];
00630 
00631                 dst[0] = t0; dst[1] = t1; dst[2] = t2;
00632             }
00633         }
00634     }
00635     else
00636     {
00637         for( ; i < len; i++, src1 += cn, src2 += cn, dst += cn )
00638             if( mask[i] )
00639             {
00640                 for( int k = 0; k < cn; k++ )
00641                     dst[k] += (AT)src1[k]*src2[k];
00642             }
00643     }
00644 }
00645 
00646 
00647 template<typename T, typename AT> void
00648 accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha )
00649 {
00650     AT a = (AT)alpha, b = 1 - a;
00651     int i = AccW_SIMD<T, AT>()(src, dst, mask, len, cn, a);
00652 
00653     if( !mask )
00654     {
00655         len *= cn;
00656         #if CV_ENABLE_UNROLLED
00657         for( ; i <= len - 4; i += 4 )
00658         {
00659             AT t0, t1;
00660             t0 = src[i]*a + dst[i]*b;
00661             t1 = src[i+1]*a + dst[i+1]*b;
00662             dst[i] = t0; dst[i+1] = t1;
00663 
00664             t0 = src[i+2]*a + dst[i+2]*b;
00665             t1 = src[i+3]*a + dst[i+3]*b;
00666             dst[i+2] = t0; dst[i+3] = t1;
00667         }
00668         #endif
00669         for( ; i < len; i++ )
00670             dst[i] = src[i]*a + dst[i]*b;
00671     }
00672     else if( cn == 1 )
00673     {
00674         for( ; i < len; i++ )
00675         {
00676             if( mask[i] )
00677                 dst[i] = src[i]*a + dst[i]*b;
00678         }
00679     }
00680     else if( cn == 3 )
00681     {
00682         for( ; i < len; i++, src += 3, dst += 3 )
00683         {
00684             if( mask[i] )
00685             {
00686                 AT t0 = src[0]*a + dst[0]*b;
00687                 AT t1 = src[1]*a + dst[1]*b;
00688                 AT t2 = src[2]*a + dst[2]*b;
00689 
00690                 dst[0] = t0; dst[1] = t1; dst[2] = t2;
00691             }
00692         }
00693     }
00694     else
00695     {
00696         for( ; i < len; i++, src += cn, dst += cn )
00697             if( mask[i] )
00698             {
00699                 for( int k = 0; k < cn; k++ )
00700                     dst[k] = src[k]*a + dst[k]*b;
00701             }
00702     }
00703 }
00704 
00705 
00706 #define DEF_ACC_FUNCS(suffix, type, acctype) \
00707 static void acc_##suffix(const type* src, acctype* dst, \
00708                          const uchar* mask, int len, int cn) \
00709 { acc_(src, dst, mask, len, cn); } \
00710 \
00711 static void accSqr_##suffix(const type* src, acctype* dst, \
00712                             const uchar* mask, int len, int cn) \
00713 { accSqr_(src, dst, mask, len, cn); } \
00714 \
00715 static void accProd_##suffix(const type* src1, const type* src2, \
00716                              acctype* dst, const uchar* mask, int len, int cn) \
00717 { accProd_(src1, src2, dst, mask, len, cn); } \
00718 \
00719 static void accW_##suffix(const type* src, acctype* dst, \
00720                           const uchar* mask, int len, int cn, double alpha) \
00721 { accW_(src, dst, mask, len, cn, alpha); }
00722 
00723 
00724 DEF_ACC_FUNCS(8u32f, uchar, float)
00725 DEF_ACC_FUNCS(8u64f, uchar, double)
00726 DEF_ACC_FUNCS(16u32f, ushort, float)
00727 DEF_ACC_FUNCS(16u64f, ushort, double)
00728 DEF_ACC_FUNCS(32f, float, float)
00729 DEF_ACC_FUNCS(32f64f, float, double)
00730 DEF_ACC_FUNCS(64f, double, double)
00731 
00732 
00733 typedef void (*AccFunc)(const uchar*, uchar*, const uchar*, int, int);
00734 typedef void (*AccProdFunc)(const uchar*, const uchar*, uchar*, const uchar*, int, int);
00735 typedef void (*AccWFunc)(const uchar*, uchar*, const uchar*, int, int, double);
00736 
00737 static AccFunc accTab[] =
00738 {
00739     (AccFunc)acc_8u32f, (AccFunc)acc_8u64f,
00740     (AccFunc)acc_16u32f, (AccFunc)acc_16u64f,
00741     (AccFunc)acc_32f, (AccFunc)acc_32f64f,
00742     (AccFunc)acc_64f
00743 };
00744 
00745 static AccFunc accSqrTab[] =
00746 {
00747     (AccFunc)accSqr_8u32f, (AccFunc)accSqr_8u64f,
00748     (AccFunc)accSqr_16u32f, (AccFunc)accSqr_16u64f,
00749     (AccFunc)accSqr_32f, (AccFunc)accSqr_32f64f,
00750     (AccFunc)accSqr_64f
00751 };
00752 
00753 static AccProdFunc accProdTab[] =
00754 {
00755     (AccProdFunc)accProd_8u32f, (AccProdFunc)accProd_8u64f,
00756     (AccProdFunc)accProd_16u32f, (AccProdFunc)accProd_16u64f,
00757     (AccProdFunc)accProd_32f, (AccProdFunc)accProd_32f64f,
00758     (AccProdFunc)accProd_64f
00759 };
00760 
00761 static AccWFunc accWTab[] =
00762 {
00763     (AccWFunc)accW_8u32f, (AccWFunc)accW_8u64f,
00764     (AccWFunc)accW_16u32f, (AccWFunc)accW_16u64f,
00765     (AccWFunc)accW_32f, (AccWFunc)accW_32f64f,
00766     (AccWFunc)accW_64f
00767 };
00768 
00769 inline int getAccTabIdx(int sdepth, int ddepth)
00770 {
00771     return sdepth == CV_8U && ddepth == CV_32F ? 0 :
00772            sdepth == CV_8U && ddepth == CV_64F ? 1 :
00773            sdepth == CV_16U && ddepth == CV_32F ? 2 :
00774            sdepth == CV_16U && ddepth == CV_64F ? 3 :
00775            sdepth == CV_32F && ddepth == CV_32F ? 4 :
00776            sdepth == CV_32F && ddepth == CV_64F ? 5 :
00777            sdepth == CV_64F && ddepth == CV_64F ? 6 : -1;
00778 }
00779 
00780 #ifdef HAVE_OPENCL
00781 
00782 enum
00783 {
00784     ACCUMULATE = 0,
00785     ACCUMULATE_SQUARE = 1,
00786     ACCUMULATE_PRODUCT = 2,
00787     ACCUMULATE_WEIGHTED = 3
00788 };
00789 
00790 static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray _dst, double alpha,
00791                             InputArray _mask, int op_type )
00792 {
00793     CV_Assert(op_type == ACCUMULATE || op_type == ACCUMULATE_SQUARE ||
00794               op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED);
00795 
00796     const ocl::Device & dev = ocl::Device::getDefault();
00797     bool haveMask = !_mask.empty(), doubleSupport = dev.doubleFPConfig() > 0;
00798     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), ddepth = _dst.depth();
00799     int kercn = haveMask ? cn : ocl::predictOptimalVectorWidthMax(_src, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1;
00800 
00801     if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
00802         return false;
00803 
00804     const char * const opMap[4] = { "ACCUMULATE", "ACCUMULATE_SQUARE", "ACCUMULATE_PRODUCT",
00805                                    "ACCUMULATE_WEIGHTED" };
00806 
00807     char cvt[40];
00808     ocl::Kernel k("accumulate", ocl::imgproc::accumulate_oclsrc,
00809                   format("-D %s%s -D srcT1=%s -D cn=%d -D dstT1=%s%s -D rowsPerWI=%d -D convertToDT=%s",
00810                          opMap[op_type], haveMask ? " -D HAVE_MASK" : "",
00811                          ocl::typeToStr(sdepth), kercn, ocl::typeToStr(ddepth),
00812                          doubleSupport ? " -D DOUBLE_SUPPORT" : "", rowsPerWI,
00813                          ocl::convertTypeStr(sdepth, ddepth, 1, cvt)));
00814     if (k.empty())
00815         return false;
00816 
00817     UMat src = _src.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat(), mask = _mask.getUMat();
00818 
00819     ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
00820             src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
00821             dstarg = ocl::KernelArg::ReadWrite(dst, cn, kercn),
00822             maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
00823 
00824     int argidx = k.set(0, srcarg);
00825     if (op_type == ACCUMULATE_PRODUCT)
00826         argidx = k.set(argidx, src2arg);
00827     argidx = k.set(argidx, dstarg);
00828     if (op_type == ACCUMULATE_WEIGHTED)
00829     {
00830         if (ddepth == CV_32F)
00831             argidx = k.set(argidx, (float)alpha);
00832         else
00833             argidx = k.set(argidx, alpha);
00834     }
00835     if (haveMask)
00836         k.set(argidx, maskarg);
00837 
00838     size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
00839     return k.run(2, globalsize, NULL, false);
00840 }
00841 
00842 #endif
00843 
00844 }
00845 
00846 #if defined(HAVE_IPP)
00847 namespace cv
00848 {
00849 static bool ipp_accumulate(InputArray _src, InputOutputArray _dst, InputArray _mask)
00850 {
00851     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
00852     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype);
00853 
00854     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
00855 
00856     if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous())))
00857     {
00858         typedef IppStatus (CV_STDCALL * ippiAdd)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize);
00859         typedef IppStatus (CV_STDCALL * ippiAddMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst,
00860                                                     int srcDstStep, IppiSize roiSize);
00861         ippiAdd ippFunc = 0;
00862         ippiAddMask ippFuncMask = 0;
00863 
00864         if (mask.empty())
00865         {
00866             CV_SUPPRESS_DEPRECATED_START
00867             ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAdd)ippiAdd_8u32f_C1IR :
00868                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAdd)ippiAdd_16u32f_C1IR :
00869                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAdd)ippiAdd_32f_C1IR : 0;
00870             CV_SUPPRESS_DEPRECATED_END
00871         }
00872         else if (scn == 1)
00873         {
00874             ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddMask)ippiAdd_8u32f_C1IMR :
00875                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddMask)ippiAdd_16u32f_C1IMR :
00876                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddMask)ippiAdd_32f_C1IMR : 0;
00877         }
00878 
00879         if (ippFunc || ippFuncMask)
00880         {
00881             IppStatus status = ippStsErr;
00882 
00883             Size size = src.size();
00884             int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
00885             if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
00886             {
00887                 srcstep = static_cast<int>(src.total() * src.elemSize());
00888                 dststep = static_cast<int>(dst.total() * dst.elemSize());
00889                 maskstep = static_cast<int>(mask.total() * mask.elemSize());
00890                 size.width = static_cast<int>(src.total());
00891                 size.height = 1;
00892             }
00893             size.width *= scn;
00894 
00895             if (ippFunc)
00896                 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
00897             else if(ippFuncMask)
00898                 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep,
00899                                         dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
00900 
00901             if (status >= 0)
00902                 return true;
00903         }
00904     }
00905     return false;
00906 }
00907 }
00908 #endif
00909 
00910 void cv::accumulate( InputArray _src, InputOutputArray _dst, InputArray _mask )
00911 {
00912     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
00913     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
00914 
00915     CV_Assert( _src.sameSize(_dst) && dcn == scn );
00916     CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) );
00917 
00918 #ifdef HAVE_OPENCL
00919     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
00920                ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE))
00921 #endif
00922 
00923     CV_IPP_RUN((_src.dims() <= 2 || (_src.isContinuous() && _dst.isContinuous() && (_mask.empty() || _mask.isContinuous()))),
00924         ipp_accumulate(_src, _dst, _mask));
00925 
00926     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
00927 
00928 
00929     int fidx = getAccTabIdx(sdepth, ddepth);
00930     AccFunc func = fidx >= 0 ? accTab[fidx] : 0;
00931     CV_Assert( func != 0 );
00932 
00933     const Mat* arrays[] = {&src, &dst, &mask, 0};
00934     uchar* ptrs[3];
00935     NAryMatIterator it(arrays, ptrs);
00936     int len = (int)it.size;
00937 
00938     for( size_t i = 0; i < it.nplanes; i++, ++it )
00939         func(ptrs[0], ptrs[1], ptrs[2], len, scn);
00940 }
00941 
00942 #if defined(HAVE_IPP)
00943 namespace cv
00944 {
00945 static bool ipp_accumulate_square(InputArray _src, InputOutputArray _dst, InputArray _mask)
00946 {
00947     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
00948     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype);
00949 
00950     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
00951 
00952     if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous())))
00953     {
00954         typedef IppStatus (CV_STDCALL * ippiAddSquare)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize);
00955         typedef IppStatus (CV_STDCALL * ippiAddSquareMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst,
00956                                                             int srcDstStep, IppiSize roiSize);
00957         ippiAddSquare ippFunc = 0;
00958         ippiAddSquareMask ippFuncMask = 0;
00959 
00960         if (mask.empty())
00961         {
00962             ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_8u32f_C1IR :
00963                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_16u32f_C1IR :
00964                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_32f_C1IR : 0;
00965         }
00966         else if (scn == 1)
00967         {
00968             ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_8u32f_C1IMR :
00969                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_16u32f_C1IMR :
00970                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_32f_C1IMR : 0;
00971         }
00972 
00973         if (ippFunc || ippFuncMask)
00974         {
00975             IppStatus status = ippStsErr;
00976 
00977             Size size = src.size();
00978             int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
00979             if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
00980             {
00981                 srcstep = static_cast<int>(src.total() * src.elemSize());
00982                 dststep = static_cast<int>(dst.total() * dst.elemSize());
00983                 maskstep = static_cast<int>(mask.total() * mask.elemSize());
00984                 size.width = static_cast<int>(src.total());
00985                 size.height = 1;
00986             }
00987             size.width *= scn;
00988 
00989             if (ippFunc)
00990                 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
00991             else if(ippFuncMask)
00992                 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep,
00993                                         dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
00994 
00995             if (status >= 0)
00996                 return true;
00997         }
00998     }
00999     return false;
01000 }
01001 }
01002 #endif
01003 
01004 void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _mask )
01005 {
01006     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01007     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
01008 
01009     CV_Assert( _src.sameSize(_dst) && dcn == scn );
01010     CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) );
01011 
01012 #ifdef HAVE_OPENCL
01013     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
01014                ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE_SQUARE))
01015 #endif
01016 
01017     CV_IPP_RUN((_src.dims() <= 2 || (_src.isContinuous() && _dst.isContinuous() && (_mask.empty() || _mask.isContinuous()))),
01018         ipp_accumulate_square(_src, _dst, _mask));
01019 
01020     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01021 
01022     int fidx = getAccTabIdx(sdepth, ddepth);
01023     AccFunc func = fidx >= 0 ? accSqrTab[fidx] : 0;
01024     CV_Assert( func != 0 );
01025 
01026     const Mat* arrays[] = {&src, &dst, &mask, 0};
01027     uchar* ptrs[3];
01028     NAryMatIterator it(arrays, ptrs);
01029     int len = (int)it.size;
01030 
01031     for( size_t i = 0; i < it.nplanes; i++, ++it )
01032         func(ptrs[0], ptrs[1], ptrs[2], len, scn);
01033 }
01034 
01035 #if defined(HAVE_IPP)
01036 namespace cv
01037 {
01038 static bool ipp_accumulate_product(InputArray _src1, InputArray _src2,
01039                             InputOutputArray _dst, InputArray _mask)
01040 {
01041     int stype = _src1.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01042     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype);
01043 
01044     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01045 
01046     if (src1.dims <= 2 || (src1.isContinuous() && src2.isContinuous() && dst.isContinuous()))
01047     {
01048         typedef IppStatus (CV_STDCALL * ippiAddProduct)(const void * pSrc1, int src1Step, const void * pSrc2,
01049                                                         int src2Step, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
01050         typedef IppStatus (CV_STDCALL * ippiAddProductMask)(const void * pSrc1, int src1Step, const void * pSrc2, int src2Step,
01051                                                             const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
01052         ippiAddProduct ippFunc = 0;
01053         ippiAddProductMask ippFuncMask = 0;
01054 
01055         if (mask.empty())
01056         {
01057             ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_8u32f_C1IR :
01058                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_16u32f_C1IR :
01059                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_32f_C1IR : 0;
01060         }
01061         else if (scn == 1)
01062         {
01063             ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_8u32f_C1IMR :
01064                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_16u32f_C1IMR :
01065                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_32f_C1IMR : 0;
01066         }
01067 
01068         if (ippFunc || ippFuncMask)
01069         {
01070             IppStatus status = ippStsErr;
01071 
01072             Size size = src1.size();
01073             int src1step = (int)src1.step, src2step = (int)src2.step, dststep = (int)dst.step, maskstep = (int)mask.step;
01074             if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous() && mask.isContinuous())
01075             {
01076                 src1step = static_cast<int>(src1.total() * src1.elemSize());
01077                 src2step = static_cast<int>(src2.total() * src2.elemSize());
01078                 dststep = static_cast<int>(dst.total() * dst.elemSize());
01079                 maskstep = static_cast<int>(mask.total() * mask.elemSize());
01080                 size.width = static_cast<int>(src1.total());
01081                 size.height = 1;
01082             }
01083             size.width *= scn;
01084 
01085             if (ippFunc)
01086                 status = ippFunc(src1.ptr(), src1step, src2.ptr(), src2step, dst.ptr<Ipp32f>(),
01087                                     dststep, ippiSize(size.width, size.height));
01088             else if(ippFuncMask)
01089                 status = ippFuncMask(src1.ptr(), src1step, src2.ptr(), src2step, mask.ptr<Ipp8u>(), maskstep,
01090                                         dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
01091 
01092             if (status >= 0)
01093                 return true;
01094         }
01095     }
01096     return false;
01097 }
01098 }
01099 #endif
01100 
01101 
01102 
01103 void cv::accumulateProduct( InputArray _src1, InputArray _src2,
01104                             InputOutputArray _dst, InputArray _mask )
01105 {
01106     int stype = _src1.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01107     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
01108 
01109     CV_Assert( _src1.sameSize(_src2) && stype == _src2.type() );
01110     CV_Assert( _src1.sameSize(_dst) && dcn == scn );
01111     CV_Assert( _mask.empty() || (_src1.sameSize(_mask) && _mask.type() == CV_8U) );
01112 
01113 #ifdef HAVE_OPENCL
01114     CV_OCL_RUN(_src1.dims() <= 2 && _dst.isUMat(),
01115                ocl_accumulate(_src1, _src2, _dst, 0.0, _mask, ACCUMULATE_PRODUCT))
01116 #endif
01117 
01118     CV_IPP_RUN( (_src1.dims() <= 2 || (_src1.isContinuous() && _src2.isContinuous() && _dst.isContinuous())),
01119         ipp_accumulate_product(_src1, _src2, _dst, _mask));
01120 
01121     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01122 
01123     int fidx = getAccTabIdx(sdepth, ddepth);
01124     AccProdFunc func = fidx >= 0 ? accProdTab[fidx] : 0;
01125     CV_Assert( func != 0 );
01126 
01127     const Mat* arrays[] = {&src1, &src2, &dst, &mask, 0};
01128     uchar* ptrs[4];
01129     NAryMatIterator it(arrays, ptrs);
01130     int len = (int)it.size;
01131 
01132     for( size_t i = 0; i < it.nplanes; i++, ++it )
01133         func(ptrs[0], ptrs[1], ptrs[2], ptrs[3], len, scn);
01134 }
01135 
01136 #if defined(HAVE_IPP)
01137 namespace cv
01138 {
01139 static bool ipp_accumulate_weighted( InputArray _src, InputOutputArray _dst,
01140                              double alpha, InputArray _mask )
01141 {
01142     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01143     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype);
01144 
01145     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01146 
01147     if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && mask.isContinuous()))
01148     {
01149         typedef IppStatus (CV_STDCALL * ippiAddWeighted)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep,
01150                                                             IppiSize roiSize, Ipp32f alpha);
01151         typedef IppStatus (CV_STDCALL * ippiAddWeightedMask)(const void * pSrc, int srcStep, const Ipp8u * pMask,
01152                                                                 int maskStep, Ipp32f * pSrcDst,
01153                                                                 int srcDstStep, IppiSize roiSize, Ipp32f alpha);
01154         ippiAddWeighted ippFunc = 0;
01155         ippiAddWeightedMask ippFuncMask = 0;
01156 
01157         if (mask.empty())
01158         {
01159             ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_8u32f_C1IR :
01160                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_16u32f_C1IR :
01161                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_32f_C1IR : 0;
01162         }
01163         else if (scn == 1)
01164         {
01165             ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_8u32f_C1IMR :
01166                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_16u32f_C1IMR :
01167                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_32f_C1IMR : 0;
01168         }
01169 
01170         if (ippFunc || ippFuncMask)
01171         {
01172             IppStatus status = ippStsErr;
01173 
01174             Size size = src.size();
01175             int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
01176             if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
01177             {
01178                 srcstep = static_cast<int>(src.total() * src.elemSize());
01179                 dststep = static_cast<int>(dst.total() * dst.elemSize());
01180                 maskstep = static_cast<int>(mask.total() * mask.elemSize());
01181                 size.width = static_cast<int>((int)src.total());
01182                 size.height = 1;
01183             }
01184             size.width *= scn;
01185 
01186             if (ippFunc)
01187                 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
01188             else if(ippFuncMask)
01189                 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep,
01190                                         dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
01191 
01192             if (status >= 0)
01193                 return true;
01194         }
01195     }
01196     return false;
01197 }
01198 }
01199 #endif
01200 
01201 void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst,
01202                              double alpha, InputArray _mask )
01203 {
01204     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01205     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
01206 
01207     CV_Assert( _src.sameSize(_dst) && dcn == scn );
01208     CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) );
01209 
01210 #ifdef HAVE_OPENCL
01211     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
01212                ocl_accumulate(_src, noArray(), _dst, alpha, _mask, ACCUMULATE_WEIGHTED))
01213 #endif
01214 
01215     CV_IPP_RUN((_src.dims() <= 2 || (_src.isContinuous() && _dst.isContinuous() && _mask.isContinuous())), ipp_accumulate_weighted(_src, _dst, alpha, _mask));
01216 
01217 
01218     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01219 
01220 
01221     int fidx = getAccTabIdx(sdepth, ddepth);
01222     AccWFunc func = fidx >= 0 ? accWTab[fidx] : 0;
01223     CV_Assert( func != 0 );
01224 
01225     const Mat* arrays[] = {&src, &dst, &mask, 0};
01226     uchar* ptrs[3];
01227     NAryMatIterator it(arrays, ptrs);
01228     int len = (int)it.size;
01229 
01230     for( size_t i = 0; i < it.nplanes; i++, ++it )
01231         func(ptrs[0], ptrs[1], ptrs[2], len, scn, alpha);
01232 }
01233 
01234 
01235 CV_IMPL void
01236 cvAcc( const void* arr, void* sumarr, const void* maskarr )
01237 {
01238     cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask;
01239     if( maskarr )
01240         mask = cv::cvarrToMat(maskarr);
01241     cv::accumulate( src, dst, mask );
01242 }
01243 
01244 CV_IMPL void
01245 cvSquareAcc( const void* arr, void* sumarr, const void* maskarr )
01246 {
01247     cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask;
01248     if( maskarr )
01249         mask = cv::cvarrToMat(maskarr);
01250     cv::accumulateSquare( src, dst, mask );
01251 }
01252 
01253 CV_IMPL void
01254 cvMultiplyAcc( const void* arr1, const void* arr2,
01255                void* sumarr, const void* maskarr )
01256 {
01257     cv::Mat src1 = cv::cvarrToMat(arr1), src2 = cv::cvarrToMat(arr2);
01258     cv::Mat dst = cv::cvarrToMat(sumarr), mask;
01259     if( maskarr )
01260         mask = cv::cvarrToMat(maskarr);
01261     cv::accumulateProduct( src1, src2, dst, mask );
01262 }
01263 
01264 CV_IMPL void
01265 cvRunningAvg( const void* arr, void* sumarr, double alpha, const void* maskarr )
01266 {
01267     cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask;
01268     if( maskarr )
01269         mask = cv::cvarrToMat(maskarr);
01270     cv::accumulateWeighted( src, dst, alpha, mask );
01271 }
01272 
01273 /* End of file. */
01274