gr-peach-opencv-project-sd-card_update

Renesas GR-PEACH OpenCV Development » Code » Documentation
Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update
Fork of gr-peach-opencv-project-sd-card by the do
Embed: (wiki syntax)
Show/hide line numbers accum.cpp Source File
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Copyright (C) 2014, Itseez Inc., all rights reserved.
00016 // Third party copyrights are property of their respective owners.
00017 /
00018 // Redistribution and use in source and binary forms, with or without modification,
00019 // are permitted provided that the following conditions are met:
00020 //
00021 //   * Redistribution's of source code must retain the above copyright notice,
00022 //     this list of conditions and the following disclaimer.
00023 //
00024 //   * Redistribution's in binary form must reproduce the above copyright notice,
00025 //     this list of conditions and the following disclaimer in the documentation
00026 //     and/or other materials provided with the distribution.
00027 //
00028 //   * The name of the copyright holders may not be used to endorse or promote products
00029 //     derived from this software without specific prior written permission.
00030 //
00031 // This software is provided by the copyright holders and contributors "as is" and
00032 // any express or implied warranties, including, but not limited to, the implied
00033 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00034 // In no event shall the Intel Corporation or contributors be liable for any direct,
00035 // indirect, incidental, special, exemplary, or consequential damages
00036 // (including, but not limited to, procurement of substitute goods or services;
00037 // loss of use, data, or profits; or business interruption) however caused
00038 // and on any theory of liability, whether in contract, strict liability,
00039 // or tort (including negligence or otherwise) arising in any way out of
00040 // the use of this software, even if advised of the possibility of such damage.
00041 //
00042 //M*/
00043 
00044 #include "precomp.hpp"
00045 #include "opencl_kernels_imgproc.hpp"
00046 
00047 namespace cv
00048 {
00049 
00050 template <typename T, typename AT>
00051 struct Acc_SIMD
00052 {
00053     int operator() (const T *, AT *, const uchar *, int, int) const
00054     {
00055         return 0;
00056     }
00057 };
00058 
00059 template <typename T, typename AT>
00060 struct AccSqr_SIMD
00061 {
00062     int operator() (const T *, AT *, const uchar *, int, int) const
00063     {
00064         return 0;
00065     }
00066 };
00067 
00068 template <typename T, typename AT>
00069 struct AccProd_SIMD
00070 {
00071     int operator() (const T *, const T *, AT *, const uchar *, int, int) const
00072     {
00073         return 0;
00074     }
00075 };
00076 
00077 template <typename T, typename AT>
00078 struct AccW_SIMD
00079 {
00080     int operator() (const T *, AT *, const uchar *, int, int, AT) const
00081     {
00082         return 0;
00083     }
00084 };
00085 
00086 #if CV_NEON
00087 
00088 template <>
00089 struct Acc_SIMD<uchar, float>
00090 {
00091     int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn) const
00092     {
00093         int x = 0;
00094 
00095         if (!mask)
00096         {
00097             len *= cn;
00098             for ( ; x <= len - 16; x += 16)
00099             {
00100                 uint8x16_t v_src = vld1q_u8(src + x);
00101                 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src));
00102 
00103                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00104                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00105                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00106                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00107             }
00108         }
00109         else if (cn == 1)
00110         {
00111             uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0);
00112 
00113             for ( ; x <= len - 16; x += 16)
00114             {
00115                 uint8x16_t v_src = vandq_u8(vld1q_u8(src + x), veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0)));
00116                 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src));
00117 
00118                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00119                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00120                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00121                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00122             }
00123         }
00124 
00125         return x;
00126     }
00127 };
00128 
00129 template <>
00130 struct Acc_SIMD<ushort, float>
00131 {
00132     int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn) const
00133     {
00134         int x = 0;
00135 
00136         if (!mask)
00137         {
00138             len *= cn;
00139             for ( ; x <= len - 8; x += 8)
00140             {
00141                 uint16x8_t v_src = vld1q_u16(src + x);
00142                 uint32x4_t v_src0 = vmovl_u16(vget_low_u16(v_src)), v_src1 = vmovl_u16(vget_high_u16(v_src));
00143 
00144                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00145                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00146             }
00147         }
00148 
00149         return x;
00150     }
00151 };
00152 
00153 template <>
00154 struct Acc_SIMD<float, float>
00155 {
00156     int operator() (const float * src, float * dst, const uchar * mask, int len, int cn) const
00157     {
00158         int x = 0;
00159 
00160         if (!mask)
00161         {
00162             len *= cn;
00163             for ( ; x <= len - 8; x += 8)
00164             {
00165                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vld1q_f32(src + x)));
00166                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vld1q_f32(src + x + 4)));
00167             }
00168         }
00169 
00170         return x;
00171     }
00172 };
00173 
00174 template <>
00175 struct AccSqr_SIMD<uchar, float>
00176 {
00177     int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn) const
00178     {
00179         int x = 0;
00180 
00181         if (!mask)
00182         {
00183             len *= cn;
00184             for ( ; x <= len - 16; x += 16)
00185             {
00186                 uint8x16_t v_src = vld1q_u8(src + x);
00187                 uint8x8_t v_src_0 = vget_low_u8(v_src), v_src_1 = vget_high_u8(v_src);
00188                 uint16x8_t v_src0 = vmull_u8(v_src_0, v_src_0), v_src1 = vmull_u8(v_src_1, v_src_1);
00189 
00190                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00191                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00192                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00193                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00194             }
00195         }
00196         else if (cn == 1)
00197         {
00198             uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0);
00199 
00200             for ( ; x <= len - 16; x += 16)
00201             {
00202                 uint8x16_t v_src = vandq_u8(vld1q_u8(src + x), veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0)));
00203                 uint8x8_t v_src_0 = vget_low_u8(v_src), v_src_1 = vget_high_u8(v_src);
00204                 uint16x8_t v_src0 = vmull_u8(v_src_0, v_src_0), v_src1 = vmull_u8(v_src_1, v_src_1);
00205 
00206                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00207                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00208                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00209                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00210             }
00211         }
00212 
00213         return x;
00214     }
00215 };
00216 
00217 template <>
00218 struct AccSqr_SIMD<ushort, float>
00219 {
00220     int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn) const
00221     {
00222         int x = 0;
00223 
00224         if (!mask)
00225         {
00226             len *= cn;
00227             for ( ; x <= len - 8; x += 8)
00228             {
00229                 uint16x8_t v_src = vld1q_u16(src + x);
00230                 uint16x4_t v_src_0 = vget_low_u16(v_src), v_src_1 = vget_high_u16(v_src);
00231                 uint32x4_t v_src0 = vmull_u16(v_src_0, v_src_0), v_src1 = vmull_u16(v_src_1, v_src_1);
00232 
00233                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00234                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00235             }
00236         }
00237         else if (cn == 1)
00238         {
00239             uint8x8_t v_255 = vdup_n_u8(255), v_0 = vdup_n_u8(0);
00240 
00241             for ( ; x <= len - 8; x += 8)
00242             {
00243                 uint8x8_t v_mask_src = veor_u8(v_255, vceq_u8(vld1_u8(mask + x), v_0));
00244                 uint8x8x2_t v_mask_zp = vzip_u8(v_mask_src, v_mask_src);
00245                 uint16x8_t v_mask = vreinterpretq_u16_u8(vcombine_u8(v_mask_zp.val[0], v_mask_zp.val[1])),
00246                            v_src = vandq_u16(vld1q_u16(src + x), v_mask);
00247 
00248                 uint16x4_t v_src_0 = vget_low_u16(v_src), v_src_1 = vget_high_u16(v_src);
00249                 uint32x4_t v_src0 = vmull_u16(v_src_0, v_src_0), v_src1 = vmull_u16(v_src_1, v_src_1);
00250 
00251                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00252                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00253             }
00254         }
00255 
00256         return x;
00257     }
00258 };
00259 
00260 template <>
00261 struct AccSqr_SIMD<float, float>
00262 {
00263     int operator() (const float * src, float * dst, const uchar * mask, int len, int cn) const
00264     {
00265         int x = 0;
00266 
00267         if (!mask)
00268         {
00269             len *= cn;
00270             for ( ; x <= len - 8; x += 8)
00271             {
00272                 float32x4_t v_src = vld1q_f32(src + x);
00273                 vst1q_f32(dst + x, vmlaq_f32(vld1q_f32(dst + x), v_src, v_src));
00274 
00275                 v_src = vld1q_f32(src + x + 4);
00276                 vst1q_f32(dst + x + 4, vmlaq_f32(vld1q_f32(dst + x + 4), v_src, v_src));
00277             }
00278         }
00279 
00280         return x;
00281     }
00282 };
00283 
00284 template <>
00285 struct AccProd_SIMD<uchar, float>
00286 {
00287     int operator() (const uchar * src1, const uchar * src2, float * dst, const uchar * mask, int len, int cn) const
00288     {
00289         int x = 0;
00290 
00291         if (!mask)
00292         {
00293             len *= cn;
00294             for ( ; x <= len - 16; x += 16)
00295             {
00296                 uint8x16_t v_1src = vld1q_u8(src1 + x), v_2src = vld1q_u8(src2 + x);
00297                 uint16x8_t v_src0 = vmull_u8(vget_low_u8(v_1src), vget_low_u8(v_2src)),
00298                            v_src1 = vmull_u8(vget_high_u8(v_1src), vget_high_u8(v_2src));
00299 
00300                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00301                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00302                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00303                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00304             }
00305         }
00306         else if (cn == 1)
00307         {
00308             uint8x16_t v_255 = vdupq_n_u8(255), v_0 = vdupq_n_u8(0);
00309 
00310             for ( ; x <= len - 16; x += 16)
00311             {
00312                 uint8x16_t v_mask = veorq_u8(v_255, vceqq_u8(vld1q_u8(mask + x), v_0));
00313                 uint8x16_t v_1src = vandq_u8(vld1q_u8(src1 + x), v_mask), v_2src = vandq_u8(vld1q_u8(src2 + x), v_mask);
00314                 uint16x8_t v_src0 = vmull_u8(vget_low_u8(v_1src), vget_low_u8(v_2src)),
00315                            v_src1 = vmull_u8(vget_high_u8(v_1src), vget_high_u8(v_2src));
00316 
00317                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0)))));
00318                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0)))));
00319                 vst1q_f32(dst + x + 8, vaddq_f32(vld1q_f32(dst + x + 8), vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1)))));
00320                 vst1q_f32(dst + x + 12, vaddq_f32(vld1q_f32(dst + x + 12), vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1)))));
00321             }
00322         }
00323 
00324         return x;
00325     }
00326 };
00327 
00328 template <>
00329 struct AccProd_SIMD<ushort, float>
00330 {
00331     int operator() (const ushort * src1, const ushort * src2, float * dst, const uchar * mask, int len, int cn) const
00332     {
00333         int x = 0;
00334 
00335         if (!mask)
00336         {
00337             len *= cn;
00338             for ( ; x <= len - 8; x += 8)
00339             {
00340                 uint16x8_t v_1src = vld1q_u16(src1 + x), v_2src = vld1q_u16(src2 + x);
00341                 uint32x4_t v_src0 = vmull_u16(vget_low_u16(v_1src), vget_low_u16(v_2src)),
00342                            v_src1 = vmull_u16(vget_high_u16(v_1src), vget_high_u16(v_2src));
00343 
00344                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00345                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00346             }
00347         }
00348         else if (cn == 1)
00349         {
00350             uint8x8_t v_255 = vdup_n_u8(255), v_0 = vdup_n_u8(0);
00351 
00352             for ( ; x <= len - 8; x += 8)
00353             {
00354                 uint8x8_t v_mask_src = veor_u8(v_255, vceq_u8(vld1_u8(mask + x), v_0));
00355                 uint8x8x2_t v_mask_zp = vzip_u8(v_mask_src, v_mask_src);
00356                 uint16x8_t v_mask = vreinterpretq_u16_u8(vcombine_u8(v_mask_zp.val[0], v_mask_zp.val[1])),
00357                            v_1src = vandq_u16(vld1q_u16(src1 + x), v_mask),
00358                            v_2src = vandq_u16(vld1q_u16(src2 + x), v_mask);
00359 
00360                 uint32x4_t v_src0 = vmull_u16(vget_low_u16(v_1src), vget_low_u16(v_2src)),
00361                            v_src1 = vmull_u16(vget_high_u16(v_1src), vget_high_u16(v_2src));
00362 
00363                 vst1q_f32(dst + x, vaddq_f32(vld1q_f32(dst + x), vcvtq_f32_u32(v_src0)));
00364                 vst1q_f32(dst + x + 4, vaddq_f32(vld1q_f32(dst + x + 4), vcvtq_f32_u32(v_src1)));
00365             }
00366         }
00367 
00368         return x;
00369     }
00370 };
00371 
00372 template <>
00373 struct AccProd_SIMD<float, float>
00374 {
00375     int operator() (const float * src1, const float * src2, float * dst, const uchar * mask, int len, int cn) const
00376     {
00377         int x = 0;
00378 
00379         if (!mask)
00380         {
00381             len *= cn;
00382             for ( ; x <= len - 8; x += 8)
00383             {
00384                 vst1q_f32(dst + x, vmlaq_f32(vld1q_f32(dst + x), vld1q_f32(src1 + x), vld1q_f32(src2 + x)));
00385                 vst1q_f32(dst + x + 4, vmlaq_f32(vld1q_f32(dst + x + 4), vld1q_f32(src1 + x + 4), vld1q_f32(src2 + x + 4)));
00386             }
00387         }
00388 
00389         return x;
00390     }
00391 };
00392 
00393 template <>
00394 struct AccW_SIMD<uchar, float>
00395 {
00396     int operator() (const uchar * src, float * dst, const uchar * mask, int len, int cn, float alpha) const
00397     {
00398         int x = 0;
00399         float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha);
00400 
00401         if (!mask)
00402         {
00403             len *= cn;
00404             for ( ; x <= len - 16; x += 16)
00405             {
00406                 uint8x16_t v_src = vld1q_u8(src + x);
00407                 uint16x8_t v_src0 = vmovl_u8(vget_low_u8(v_src)), v_src1 = vmovl_u8(vget_high_u8(v_src));
00408 
00409                 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta),
00410                                              vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))), v_alpha));
00411                 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta),
00412                                              vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))), v_alpha));
00413                 vst1q_f32(dst + x + 8, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 8), v_beta),
00414                                                  vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))), v_alpha));
00415                 vst1q_f32(dst + x + 12, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 12), v_beta),
00416                                                   vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))), v_alpha));
00417             }
00418         }
00419 
00420         return x;
00421     }
00422 };
00423 
00424 template <>
00425 struct AccW_SIMD<ushort, float>
00426 {
00427     int operator() (const ushort * src, float * dst, const uchar * mask, int len, int cn, float alpha) const
00428     {
00429         int x = 0;
00430         float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha);
00431 
00432         if (!mask)
00433         {
00434             len *= cn;
00435             for ( ; x <= len - 8; x += 8)
00436             {
00437                 uint16x8_t v_src = vld1q_u16(src + x);
00438                 uint32x4_t v_src0 = vmovl_u16(vget_low_u16(v_src)), v_src1 = vmovl_u16(vget_high_u16(v_src));
00439 
00440                 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta), vcvtq_f32_u32(v_src0), v_alpha));
00441                 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta), vcvtq_f32_u32(v_src1), v_alpha));
00442             }
00443         }
00444 
00445         return x;
00446     }
00447 };
00448 
00449 template <>
00450 struct AccW_SIMD<float, float>
00451 {
00452     int operator() (const float * src, float * dst, const uchar * mask, int len, int cn, float alpha) const
00453     {
00454         int x = 0;
00455         float32x4_t v_alpha = vdupq_n_f32(alpha), v_beta = vdupq_n_f32(1.0f - alpha);
00456 
00457         if (!mask)
00458         {
00459             len *= cn;
00460             for ( ; x <= len - 8; x += 8)
00461             {
00462                 vst1q_f32(dst + x, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x), v_beta), vld1q_f32(src + x), v_alpha));
00463                 vst1q_f32(dst + x + 4, vmlaq_f32(vmulq_f32(vld1q_f32(dst + x + 4), v_beta), vld1q_f32(src + x + 4), v_alpha));
00464             }
00465         }
00466 
00467         return x;
00468     }
00469 };
00470 
00471 #endif
00472 
00473 template<typename T, typename AT> void
00474 acc_( const T* src, AT* dst, const uchar* mask, int len, int cn )
00475 {
00476     int i = Acc_SIMD<T, AT>()(src, dst, mask, len, cn);
00477 
00478     if( !mask )
00479     {
00480         len *= cn;
00481         #if CV_ENABLE_UNROLLED
00482         for( ; i <= len - 4; i += 4 )
00483         {
00484             AT t0, t1;
00485             t0 = src[i] + dst[i];
00486             t1 = src[i+1] + dst[i+1];
00487             dst[i] = t0; dst[i+1] = t1;
00488 
00489             t0 = src[i+2] + dst[i+2];
00490             t1 = src[i+3] + dst[i+3];
00491             dst[i+2] = t0; dst[i+3] = t1;
00492         }
00493         #endif
00494         for( ; i < len; i++ )
00495             dst[i] += src[i];
00496     }
00497     else if( cn == 1 )
00498     {
00499         for( ; i < len; i++ )
00500         {
00501             if( mask[i] )
00502                 dst[i] += src[i];
00503         }
00504     }
00505     else if( cn == 3 )
00506     {
00507         for( ; i < len; i++, src += 3, dst += 3 )
00508         {
00509             if( mask[i] )
00510             {
00511                 AT t0 = src[0] + dst[0];
00512                 AT t1 = src[1] + dst[1];
00513                 AT t2 = src[2] + dst[2];
00514 
00515                 dst[0] = t0; dst[1] = t1; dst[2] = t2;
00516             }
00517         }
00518     }
00519     else
00520     {
00521         for( ; i < len; i++, src += cn, dst += cn )
00522             if( mask[i] )
00523             {
00524                 for( int k = 0; k < cn; k++ )
00525                     dst[k] += src[k];
00526             }
00527     }
00528 }
00529 
00530 
00531 template<typename T, typename AT> void
00532 accSqr_( const T* src, AT* dst, const uchar* mask, int len, int cn )
00533 {
00534     int i = AccSqr_SIMD<T, AT>()(src, dst, mask, len, cn);
00535 
00536     if( !mask )
00537     {
00538         len *= cn;
00539          #if CV_ENABLE_UNROLLED
00540         for( ; i <= len - 4; i += 4 )
00541         {
00542             AT t0, t1;
00543             t0 = (AT)src[i]*src[i] + dst[i];
00544             t1 = (AT)src[i+1]*src[i+1] + dst[i+1];
00545             dst[i] = t0; dst[i+1] = t1;
00546 
00547             t0 = (AT)src[i+2]*src[i+2] + dst[i+2];
00548             t1 = (AT)src[i+3]*src[i+3] + dst[i+3];
00549             dst[i+2] = t0; dst[i+3] = t1;
00550         }
00551         #endif
00552         for( ; i < len; i++ )
00553             dst[i] += (AT)src[i]*src[i];
00554     }
00555     else if( cn == 1 )
00556     {
00557         for( ; i < len; i++ )
00558         {
00559             if( mask[i] )
00560                 dst[i] += (AT)src[i]*src[i];
00561         }
00562     }
00563     else if( cn == 3 )
00564     {
00565         for( ; i < len; i++, src += 3, dst += 3 )
00566         {
00567             if( mask[i] )
00568             {
00569                 AT t0 = (AT)src[0]*src[0] + dst[0];
00570                 AT t1 = (AT)src[1]*src[1] + dst[1];
00571                 AT t2 = (AT)src[2]*src[2] + dst[2];
00572 
00573                 dst[0] = t0; dst[1] = t1; dst[2] = t2;
00574             }
00575         }
00576     }
00577     else
00578     {
00579         for( ; i < len; i++, src += cn, dst += cn )
00580             if( mask[i] )
00581             {
00582                 for( int k = 0; k < cn; k++ )
00583                     dst[k] += (AT)src[k]*src[k];
00584             }
00585     }
00586 }
00587 
00588 
00589 template<typename T, typename AT> void
00590 accProd_( const T* src1, const T* src2, AT* dst, const uchar* mask, int len, int cn )
00591 {
00592     int i = AccProd_SIMD<T, AT>()(src1, src2, dst, mask, len, cn);
00593 
00594     if( !mask )
00595     {
00596         len *= cn;
00597         #if CV_ENABLE_UNROLLED
00598         for( ; i <= len - 4; i += 4 )
00599         {
00600             AT t0, t1;
00601             t0 = (AT)src1[i]*src2[i] + dst[i];
00602             t1 = (AT)src1[i+1]*src2[i+1] + dst[i+1];
00603             dst[i] = t0; dst[i+1] = t1;
00604 
00605             t0 = (AT)src1[i+2]*src2[i+2] + dst[i+2];
00606             t1 = (AT)src1[i+3]*src2[i+3] + dst[i+3];
00607             dst[i+2] = t0; dst[i+3] = t1;
00608         }
00609         #endif
00610         for( ; i < len; i++ )
00611             dst[i] += (AT)src1[i]*src2[i];
00612     }
00613     else if( cn == 1 )
00614     {
00615         for( ; i < len; i++ )
00616         {
00617             if( mask[i] )
00618                 dst[i] += (AT)src1[i]*src2[i];
00619         }
00620     }
00621     else if( cn == 3 )
00622     {
00623         for( ; i < len; i++, src1 += 3, src2 += 3, dst += 3 )
00624         {
00625             if( mask[i] )
00626             {
00627                 AT t0 = (AT)src1[0]*src2[0] + dst[0];
00628                 AT t1 = (AT)src1[1]*src2[1] + dst[1];
00629                 AT t2 = (AT)src1[2]*src2[2] + dst[2];
00630 
00631                 dst[0] = t0; dst[1] = t1; dst[2] = t2;
00632             }
00633         }
00634     }
00635     else
00636     {
00637         for( ; i < len; i++, src1 += cn, src2 += cn, dst += cn )
00638             if( mask[i] )
00639             {
00640                 for( int k = 0; k < cn; k++ )
00641                     dst[k] += (AT)src1[k]*src2[k];
00642             }
00643     }
00644 }
00645 
00646 
00647 template<typename T, typename AT> void
00648 accW_( const T* src, AT* dst, const uchar* mask, int len, int cn, double alpha )
00649 {
00650     AT a = (AT)alpha, b = 1 - a;
00651     int i = AccW_SIMD<T, AT>()(src, dst, mask, len, cn, a);
00652 
00653     if( !mask )
00654     {
00655         len *= cn;
00656         #if CV_ENABLE_UNROLLED
00657         for( ; i <= len - 4; i += 4 )
00658         {
00659             AT t0, t1;
00660             t0 = src[i]*a + dst[i]*b;
00661             t1 = src[i+1]*a + dst[i+1]*b;
00662             dst[i] = t0; dst[i+1] = t1;
00663 
00664             t0 = src[i+2]*a + dst[i+2]*b;
00665             t1 = src[i+3]*a + dst[i+3]*b;
00666             dst[i+2] = t0; dst[i+3] = t1;
00667         }
00668         #endif
00669         for( ; i < len; i++ )
00670             dst[i] = src[i]*a + dst[i]*b;
00671     }
00672     else if( cn == 1 )
00673     {
00674         for( ; i < len; i++ )
00675         {
00676             if( mask[i] )
00677                 dst[i] = src[i]*a + dst[i]*b;
00678         }
00679     }
00680     else if( cn == 3 )
00681     {
00682         for( ; i < len; i++, src += 3, dst += 3 )
00683         {
00684             if( mask[i] )
00685             {
00686                 AT t0 = src[0]*a + dst[0]*b;
00687                 AT t1 = src[1]*a + dst[1]*b;
00688                 AT t2 = src[2]*a + dst[2]*b;
00689 
00690                 dst[0] = t0; dst[1] = t1; dst[2] = t2;
00691             }
00692         }
00693     }
00694     else
00695     {
00696         for( ; i < len; i++, src += cn, dst += cn )
00697             if( mask[i] )
00698             {
00699                 for( int k = 0; k < cn; k++ )
00700                     dst[k] = src[k]*a + dst[k]*b;
00701             }
00702     }
00703 }
00704 
00705 
00706 #define DEF_ACC_FUNCS(suffix, type, acctype) \
00707 static void acc_##suffix(const type* src, acctype* dst, \
00708                          const uchar* mask, int len, int cn) \
00709 { acc_(src, dst, mask, len, cn); } \
00710 \
00711 static void accSqr_##suffix(const type* src, acctype* dst, \
00712                             const uchar* mask, int len, int cn) \
00713 { accSqr_(src, dst, mask, len, cn); } \
00714 \
00715 static void accProd_##suffix(const type* src1, const type* src2, \
00716                              acctype* dst, const uchar* mask, int len, int cn) \
00717 { accProd_(src1, src2, dst, mask, len, cn); } \
00718 \
00719 static void accW_##suffix(const type* src, acctype* dst, \
00720                           const uchar* mask, int len, int cn, double alpha) \
00721 { accW_(src, dst, mask, len, cn, alpha); }
00722 
00723 
00724 DEF_ACC_FUNCS(8u32f, uchar, float)
00725 DEF_ACC_FUNCS(8u64f, uchar, double)
00726 DEF_ACC_FUNCS(16u32f, ushort, float)
00727 DEF_ACC_FUNCS(16u64f, ushort, double)
00728 DEF_ACC_FUNCS(32f, float, float)
00729 DEF_ACC_FUNCS(32f64f, float, double)
00730 DEF_ACC_FUNCS(64f, double, double)
00731 
00732 
00733 typedef void (*AccFunc)(const uchar*, uchar*, const uchar*, int, int);
00734 typedef void (*AccProdFunc)(const uchar*, const uchar*, uchar*, const uchar*, int, int);
00735 typedef void (*AccWFunc)(const uchar*, uchar*, const uchar*, int, int, double);
00736 
00737 static AccFunc accTab[] =
00738 {
00739     (AccFunc)acc_8u32f, (AccFunc)acc_8u64f,
00740     (AccFunc)acc_16u32f, (AccFunc)acc_16u64f,
00741     (AccFunc)acc_32f, (AccFunc)acc_32f64f,
00742     (AccFunc)acc_64f
00743 };
00744 
00745 static AccFunc accSqrTab[] =
00746 {
00747     (AccFunc)accSqr_8u32f, (AccFunc)accSqr_8u64f,
00748     (AccFunc)accSqr_16u32f, (AccFunc)accSqr_16u64f,
00749     (AccFunc)accSqr_32f, (AccFunc)accSqr_32f64f,
00750     (AccFunc)accSqr_64f
00751 };
00752 
00753 static AccProdFunc accProdTab[] =
00754 {
00755     (AccProdFunc)accProd_8u32f, (AccProdFunc)accProd_8u64f,
00756     (AccProdFunc)accProd_16u32f, (AccProdFunc)accProd_16u64f,
00757     (AccProdFunc)accProd_32f, (AccProdFunc)accProd_32f64f,
00758     (AccProdFunc)accProd_64f
00759 };
00760 
00761 static AccWFunc accWTab[] =
00762 {
00763     (AccWFunc)accW_8u32f, (AccWFunc)accW_8u64f,
00764     (AccWFunc)accW_16u32f, (AccWFunc)accW_16u64f,
00765     (AccWFunc)accW_32f, (AccWFunc)accW_32f64f,
00766     (AccWFunc)accW_64f
00767 };
00768 
00769 inline int getAccTabIdx(int sdepth, int ddepth)
00770 {
00771     return sdepth == CV_8U && ddepth == CV_32F ? 0 :
00772            sdepth == CV_8U && ddepth == CV_64F ? 1 :
00773            sdepth == CV_16U && ddepth == CV_32F ? 2 :
00774            sdepth == CV_16U && ddepth == CV_64F ? 3 :
00775            sdepth == CV_32F && ddepth == CV_32F ? 4 :
00776            sdepth == CV_32F && ddepth == CV_64F ? 5 :
00777            sdepth == CV_64F && ddepth == CV_64F ? 6 : -1;
00778 }
00779 
00780 #ifdef HAVE_OPENCL
00781 
00782 enum
00783 {
00784     ACCUMULATE = 0,
00785     ACCUMULATE_SQUARE = 1,
00786     ACCUMULATE_PRODUCT = 2,
00787     ACCUMULATE_WEIGHTED = 3
00788 };
00789 
00790 static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray _dst, double alpha,
00791                             InputArray _mask, int op_type )
00792 {
00793     CV_Assert(op_type == ACCUMULATE || op_type == ACCUMULATE_SQUARE ||
00794               op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED);
00795 
00796     const ocl::Device & dev = ocl::Device::getDefault();
00797     bool haveMask = !_mask.empty(), doubleSupport = dev.doubleFPConfig() > 0;
00798     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), ddepth = _dst.depth();
00799     int kercn = haveMask ? cn : ocl::predictOptimalVectorWidthMax(_src, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1;
00800 
00801     if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
00802         return false;
00803 
00804     const char * const opMap[4] = { "ACCUMULATE", "ACCUMULATE_SQUARE", "ACCUMULATE_PRODUCT",
00805                                    "ACCUMULATE_WEIGHTED" };
00806 
00807     char cvt[40];
00808     ocl::Kernel k("accumulate", ocl::imgproc::accumulate_oclsrc,
00809                   format("-D %s%s -D srcT1=%s -D cn=%d -D dstT1=%s%s -D rowsPerWI=%d -D convertToDT=%s",
00810                          opMap[op_type], haveMask ? " -D HAVE_MASK" : "",
00811                          ocl::typeToStr(sdepth), kercn, ocl::typeToStr(ddepth),
00812                          doubleSupport ? " -D DOUBLE_SUPPORT" : "", rowsPerWI,
00813                          ocl::convertTypeStr(sdepth, ddepth, 1, cvt)));
00814     if (k.empty())
00815         return false;
00816 
00817     UMat src = _src.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat(), mask = _mask.getUMat();
00818 
00819     ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
00820             src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
00821             dstarg = ocl::KernelArg::ReadWrite(dst, cn, kercn),
00822             maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
00823 
00824     int argidx = k.set(0, srcarg);
00825     if (op_type == ACCUMULATE_PRODUCT)
00826         argidx = k.set(argidx, src2arg);
00827     argidx = k.set(argidx, dstarg);
00828     if (op_type == ACCUMULATE_WEIGHTED)
00829     {
00830         if (ddepth == CV_32F)
00831             argidx = k.set(argidx, (float)alpha);
00832         else
00833             argidx = k.set(argidx, alpha);
00834     }
00835     if (haveMask)
00836         k.set(argidx, maskarg);
00837 
00838     size_t globalsize[2] = { (size_t)src.cols * cn / kercn, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
00839     return k.run(2, globalsize, NULL, false);
00840 }
00841 
00842 #endif
00843 
00844 }
00845 
00846 #if defined(HAVE_IPP)
00847 namespace cv
00848 {
00849 static bool ipp_accumulate(InputArray _src, InputOutputArray _dst, InputArray _mask)
00850 {
00851     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
00852     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype);
00853 
00854     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
00855 
00856     if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous())))
00857     {
00858         typedef IppStatus (CV_STDCALL * ippiAdd)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize);
00859         typedef IppStatus (CV_STDCALL * ippiAddMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst,
00860                                                     int srcDstStep, IppiSize roiSize);
00861         ippiAdd ippFunc = 0;
00862         ippiAddMask ippFuncMask = 0;
00863 
00864         if (mask.empty())
00865         {
00866             CV_SUPPRESS_DEPRECATED_START
00867             ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAdd)ippiAdd_8u32f_C1IR :
00868                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAdd)ippiAdd_16u32f_C1IR :
00869                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAdd)ippiAdd_32f_C1IR : 0;
00870             CV_SUPPRESS_DEPRECATED_END
00871         }
00872         else if (scn == 1)
00873         {
00874             ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddMask)ippiAdd_8u32f_C1IMR :
00875                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddMask)ippiAdd_16u32f_C1IMR :
00876                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddMask)ippiAdd_32f_C1IMR : 0;
00877         }
00878 
00879         if (ippFunc || ippFuncMask)
00880         {
00881             IppStatus status = ippStsErr;
00882 
00883             Size size = src.size();
00884             int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
00885             if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
00886             {
00887                 srcstep = static_cast<int>(src.total() * src.elemSize());
00888                 dststep = static_cast<int>(dst.total() * dst.elemSize());
00889                 maskstep = static_cast<int>(mask.total() * mask.elemSize());
00890                 size.width = static_cast<int>(src.total());
00891                 size.height = 1;
00892             }
00893             size.width *= scn;
00894 
00895             if (ippFunc)
00896                 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
00897             else if(ippFuncMask)
00898                 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep,
00899                                         dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
00900 
00901             if (status >= 0)
00902                 return true;
00903         }
00904     }
00905     return false;
00906 }
00907 }
00908 #endif
00909 
00910 void cv::accumulate( InputArray _src, InputOutputArray _dst, InputArray _mask )
00911 {
00912     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
00913     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
00914 
00915     CV_Assert( _src.sameSize(_dst) && dcn == scn );
00916     CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) );
00917 
00918 #ifdef HAVE_OPENCL
00919     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
00920                ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE))
00921 #endif
00922 
00923     CV_IPP_RUN((_src.dims() <= 2 || (_src.isContinuous() && _dst.isContinuous() && (_mask.empty() || _mask.isContinuous()))),
00924         ipp_accumulate(_src, _dst, _mask));
00925 
00926     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
00927 
00928 
00929     int fidx = getAccTabIdx(sdepth, ddepth);
00930     AccFunc func = fidx >= 0 ? accTab[fidx] : 0;
00931     CV_Assert( func != 0 );
00932 
00933     const Mat* arrays[] = {&src, &dst, &mask, 0};
00934     uchar* ptrs[3];
00935     NAryMatIterator it(arrays, ptrs);
00936     int len = (int)it.size;
00937 
00938     for( size_t i = 0; i < it.nplanes; i++, ++it )
00939         func(ptrs[0], ptrs[1], ptrs[2], len, scn);
00940 }
00941 
00942 #if defined(HAVE_IPP)
00943 namespace cv
00944 {
00945 static bool ipp_accumulate_square(InputArray _src, InputOutputArray _dst, InputArray _mask)
00946 {
00947     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
00948     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype);
00949 
00950     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
00951 
00952     if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && (mask.empty() || mask.isContinuous())))
00953     {
00954         typedef IppStatus (CV_STDCALL * ippiAddSquare)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep, IppiSize roiSize);
00955         typedef IppStatus (CV_STDCALL * ippiAddSquareMask)(const void * pSrc, int srcStep, const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst,
00956                                                             int srcDstStep, IppiSize roiSize);
00957         ippiAddSquare ippFunc = 0;
00958         ippiAddSquareMask ippFuncMask = 0;
00959 
00960         if (mask.empty())
00961         {
00962             ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_8u32f_C1IR :
00963                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_16u32f_C1IR :
00964                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquare)ippiAddSquare_32f_C1IR : 0;
00965         }
00966         else if (scn == 1)
00967         {
00968             ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_8u32f_C1IMR :
00969                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_16u32f_C1IMR :
00970                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddSquareMask)ippiAddSquare_32f_C1IMR : 0;
00971         }
00972 
00973         if (ippFunc || ippFuncMask)
00974         {
00975             IppStatus status = ippStsErr;
00976 
00977             Size size = src.size();
00978             int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
00979             if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
00980             {
00981                 srcstep = static_cast<int>(src.total() * src.elemSize());
00982                 dststep = static_cast<int>(dst.total() * dst.elemSize());
00983                 maskstep = static_cast<int>(mask.total() * mask.elemSize());
00984                 size.width = static_cast<int>(src.total());
00985                 size.height = 1;
00986             }
00987             size.width *= scn;
00988 
00989             if (ippFunc)
00990                 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
00991             else if(ippFuncMask)
00992                 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep,
00993                                         dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
00994 
00995             if (status >= 0)
00996                 return true;
00997         }
00998     }
00999     return false;
01000 }
01001 }
01002 #endif
01003 
01004 void cv::accumulateSquare( InputArray _src, InputOutputArray _dst, InputArray _mask )
01005 {
01006     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01007     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
01008 
01009     CV_Assert( _src.sameSize(_dst) && dcn == scn );
01010     CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) );
01011 
01012 #ifdef HAVE_OPENCL
01013     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
01014                ocl_accumulate(_src, noArray(), _dst, 0.0, _mask, ACCUMULATE_SQUARE))
01015 #endif
01016 
01017     CV_IPP_RUN((_src.dims() <= 2 || (_src.isContinuous() && _dst.isContinuous() && (_mask.empty() || _mask.isContinuous()))),
01018         ipp_accumulate_square(_src, _dst, _mask));
01019 
01020     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01021 
01022     int fidx = getAccTabIdx(sdepth, ddepth);
01023     AccFunc func = fidx >= 0 ? accSqrTab[fidx] : 0;
01024     CV_Assert( func != 0 );
01025 
01026     const Mat* arrays[] = {&src, &dst, &mask, 0};
01027     uchar* ptrs[3];
01028     NAryMatIterator it(arrays, ptrs);
01029     int len = (int)it.size;
01030 
01031     for( size_t i = 0; i < it.nplanes; i++, ++it )
01032         func(ptrs[0], ptrs[1], ptrs[2], len, scn);
01033 }
01034 
01035 #if defined(HAVE_IPP)
01036 namespace cv
01037 {
01038 static bool ipp_accumulate_product(InputArray _src1, InputArray _src2,
01039                             InputOutputArray _dst, InputArray _mask)
01040 {
01041     int stype = _src1.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01042     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype);
01043 
01044     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01045 
01046     if (src1.dims <= 2 || (src1.isContinuous() && src2.isContinuous() && dst.isContinuous()))
01047     {
01048         typedef IppStatus (CV_STDCALL * ippiAddProduct)(const void * pSrc1, int src1Step, const void * pSrc2,
01049                                                         int src2Step, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
01050         typedef IppStatus (CV_STDCALL * ippiAddProductMask)(const void * pSrc1, int src1Step, const void * pSrc2, int src2Step,
01051                                                             const Ipp8u * pMask, int maskStep, Ipp32f * pSrcDst, int srcDstStep, IppiSize roiSize);
01052         ippiAddProduct ippFunc = 0;
01053         ippiAddProductMask ippFuncMask = 0;
01054 
01055         if (mask.empty())
01056         {
01057             ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_8u32f_C1IR :
01058                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_16u32f_C1IR :
01059                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProduct)ippiAddProduct_32f_C1IR : 0;
01060         }
01061         else if (scn == 1)
01062         {
01063             ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_8u32f_C1IMR :
01064                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_16u32f_C1IMR :
01065                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddProductMask)ippiAddProduct_32f_C1IMR : 0;
01066         }
01067 
01068         if (ippFunc || ippFuncMask)
01069         {
01070             IppStatus status = ippStsErr;
01071 
01072             Size size = src1.size();
01073             int src1step = (int)src1.step, src2step = (int)src2.step, dststep = (int)dst.step, maskstep = (int)mask.step;
01074             if (src1.isContinuous() && src2.isContinuous() && dst.isContinuous() && mask.isContinuous())
01075             {
01076                 src1step = static_cast<int>(src1.total() * src1.elemSize());
01077                 src2step = static_cast<int>(src2.total() * src2.elemSize());
01078                 dststep = static_cast<int>(dst.total() * dst.elemSize());
01079                 maskstep = static_cast<int>(mask.total() * mask.elemSize());
01080                 size.width = static_cast<int>(src1.total());
01081                 size.height = 1;
01082             }
01083             size.width *= scn;
01084 
01085             if (ippFunc)
01086                 status = ippFunc(src1.ptr(), src1step, src2.ptr(), src2step, dst.ptr<Ipp32f>(),
01087                                     dststep, ippiSize(size.width, size.height));
01088             else if(ippFuncMask)
01089                 status = ippFuncMask(src1.ptr(), src1step, src2.ptr(), src2step, mask.ptr<Ipp8u>(), maskstep,
01090                                         dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
01091 
01092             if (status >= 0)
01093                 return true;
01094         }
01095     }
01096     return false;
01097 }
01098 }
01099 #endif
01100 
01101 
01102 
01103 void cv::accumulateProduct( InputArray _src1, InputArray _src2,
01104                             InputOutputArray _dst, InputArray _mask )
01105 {
01106     int stype = _src1.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01107     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
01108 
01109     CV_Assert( _src1.sameSize(_src2) && stype == _src2.type() );
01110     CV_Assert( _src1.sameSize(_dst) && dcn == scn );
01111     CV_Assert( _mask.empty() || (_src1.sameSize(_mask) && _mask.type() == CV_8U) );
01112 
01113 #ifdef HAVE_OPENCL
01114     CV_OCL_RUN(_src1.dims() <= 2 && _dst.isUMat(),
01115                ocl_accumulate(_src1, _src2, _dst, 0.0, _mask, ACCUMULATE_PRODUCT))
01116 #endif
01117 
01118     CV_IPP_RUN( (_src1.dims() <= 2 || (_src1.isContinuous() && _src2.isContinuous() && _dst.isContinuous())),
01119         ipp_accumulate_product(_src1, _src2, _dst, _mask));
01120 
01121     Mat src1 = _src1.getMat(), src2 = _src2.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01122 
01123     int fidx = getAccTabIdx(sdepth, ddepth);
01124     AccProdFunc func = fidx >= 0 ? accProdTab[fidx] : 0;
01125     CV_Assert( func != 0 );
01126 
01127     const Mat* arrays[] = {&src1, &src2, &dst, &mask, 0};
01128     uchar* ptrs[4];
01129     NAryMatIterator it(arrays, ptrs);
01130     int len = (int)it.size;
01131 
01132     for( size_t i = 0; i < it.nplanes; i++, ++it )
01133         func(ptrs[0], ptrs[1], ptrs[2], ptrs[3], len, scn);
01134 }
01135 
01136 #if defined(HAVE_IPP)
01137 namespace cv
01138 {
01139 static bool ipp_accumulate_weighted( InputArray _src, InputOutputArray _dst,
01140                              double alpha, InputArray _mask )
01141 {
01142     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01143     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype);
01144 
01145     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01146 
01147     if (src.dims <= 2 || (src.isContinuous() && dst.isContinuous() && mask.isContinuous()))
01148     {
01149         typedef IppStatus (CV_STDCALL * ippiAddWeighted)(const void * pSrc, int srcStep, Ipp32f * pSrcDst, int srcdstStep,
01150                                                             IppiSize roiSize, Ipp32f alpha);
01151         typedef IppStatus (CV_STDCALL * ippiAddWeightedMask)(const void * pSrc, int srcStep, const Ipp8u * pMask,
01152                                                                 int maskStep, Ipp32f * pSrcDst,
01153                                                                 int srcDstStep, IppiSize roiSize, Ipp32f alpha);
01154         ippiAddWeighted ippFunc = 0;
01155         ippiAddWeightedMask ippFuncMask = 0;
01156 
01157         if (mask.empty())
01158         {
01159             ippFunc = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_8u32f_C1IR :
01160                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_16u32f_C1IR :
01161                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeighted)ippiAddWeighted_32f_C1IR : 0;
01162         }
01163         else if (scn == 1)
01164         {
01165             ippFuncMask = sdepth == CV_8U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_8u32f_C1IMR :
01166                 sdepth == CV_16U && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_16u32f_C1IMR :
01167                 sdepth == CV_32F && ddepth == CV_32F ? (ippiAddWeightedMask)ippiAddWeighted_32f_C1IMR : 0;
01168         }
01169 
01170         if (ippFunc || ippFuncMask)
01171         {
01172             IppStatus status = ippStsErr;
01173 
01174             Size size = src.size();
01175             int srcstep = (int)src.step, dststep = (int)dst.step, maskstep = (int)mask.step;
01176             if (src.isContinuous() && dst.isContinuous() && mask.isContinuous())
01177             {
01178                 srcstep = static_cast<int>(src.total() * src.elemSize());
01179                 dststep = static_cast<int>(dst.total() * dst.elemSize());
01180                 maskstep = static_cast<int>(mask.total() * mask.elemSize());
01181                 size.width = static_cast<int>((int)src.total());
01182                 size.height = 1;
01183             }
01184             size.width *= scn;
01185 
01186             if (ippFunc)
01187                 status = ippFunc(src.ptr(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
01188             else if(ippFuncMask)
01189                 status = ippFuncMask(src.ptr(), srcstep, mask.ptr<Ipp8u>(), maskstep,
01190                                         dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height), (Ipp32f)alpha);
01191 
01192             if (status >= 0)
01193                 return true;
01194         }
01195     }
01196     return false;
01197 }
01198 }
01199 #endif
01200 
01201 void cv::accumulateWeighted( InputArray _src, InputOutputArray _dst,
01202                              double alpha, InputArray _mask )
01203 {
01204     int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), scn = CV_MAT_CN(stype);
01205     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), dcn = CV_MAT_CN(dtype);
01206 
01207     CV_Assert( _src.sameSize(_dst) && dcn == scn );
01208     CV_Assert( _mask.empty() || (_src.sameSize(_mask) && _mask.type() == CV_8U) );
01209 
01210 #ifdef HAVE_OPENCL
01211     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
01212                ocl_accumulate(_src, noArray(), _dst, alpha, _mask, ACCUMULATE_WEIGHTED))
01213 #endif
01214 
01215     CV_IPP_RUN((_src.dims() <= 2 || (_src.isContinuous() && _dst.isContinuous() && _mask.isContinuous())), ipp_accumulate_weighted(_src, _dst, alpha, _mask));
01216 
01217 
01218     Mat src = _src.getMat(), dst = _dst.getMat(), mask = _mask.getMat();
01219 
01220 
01221     int fidx = getAccTabIdx(sdepth, ddepth);
01222     AccWFunc func = fidx >= 0 ? accWTab[fidx] : 0;
01223     CV_Assert( func != 0 );
01224 
01225     const Mat* arrays[] = {&src, &dst, &mask, 0};
01226     uchar* ptrs[3];
01227     NAryMatIterator it(arrays, ptrs);
01228     int len = (int)it.size;
01229 
01230     for( size_t i = 0; i < it.nplanes; i++, ++it )
01231         func(ptrs[0], ptrs[1], ptrs[2], len, scn, alpha);
01232 }
01233 
01234 
01235 CV_IMPL void
01236 cvAcc( const void* arr, void* sumarr, const void* maskarr )
01237 {
01238     cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask;
01239     if( maskarr )
01240         mask = cv::cvarrToMat(maskarr);
01241     cv::accumulate( src, dst, mask );
01242 }
01243 
01244 CV_IMPL void
01245 cvSquareAcc( const void* arr, void* sumarr, const void* maskarr )
01246 {
01247     cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask;
01248     if( maskarr )
01249         mask = cv::cvarrToMat(maskarr);
01250     cv::accumulateSquare( src, dst, mask );
01251 }
01252 
01253 CV_IMPL void
01254 cvMultiplyAcc( const void* arr1, const void* arr2,
01255                void* sumarr, const void* maskarr )
01256 {
01257     cv::Mat src1 = cv::cvarrToMat(arr1), src2 = cv::cvarrToMat(arr2);
01258     cv::Mat dst = cv::cvarrToMat(sumarr), mask;
01259     if( maskarr )
01260         mask = cv::cvarrToMat(maskarr);
01261     cv::accumulateProduct( src1, src2, dst, mask );
01262 }
01263 
01264 CV_IMPL void
01265 cvRunningAvg( const void* arr, void* sumarr, double alpha, const void* maskarr )
01266 {
01267     cv::Mat src = cv::cvarrToMat(arr), dst = cv::cvarrToMat(sumarr), mask;
01268     if( maskarr )
01269         mask = cv::cvarrToMat(maskarr);
01270     cv::accumulateWeighted( src, dst, alpha, mask );
01271 }
01272 
01273 /* End of file. */
01274
Repository toolbox

Repository details

Type:	Program
Created:	26 Jul 2017
Imports:	3
Forks:	0
Commits:	168
Dependents:	0
Dependencies:	0
Followers:	9
Important changes to repositories hosted on mbed.com

accum.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

accum.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning