Opencv 3.1 project on GR-PEACH board

Fork of gr-peach-opencv-project by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arithm.cpp Source File

arithm.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
00016 // Third party copyrights are property of their respective owners.
00017 //
00018 // Redistribution and use in source and binary forms, with or without modification,
00019 // are permitted provided that the following conditions are met:
00020 //
00021 //   * Redistribution's of source code must retain the above copyright notice,
00022 //     this list of conditions and the following disclaimer.
00023 //
00024 //   * Redistribution's in binary form must reproduce the above copyright notice,
00025 //     this list of conditions and the following disclaimer in the documentation
00026 //     and/or other materials provided with the distribution.
00027 //
00028 //   * The name of the copyright holders may not be used to endorse or promote products
00029 //     derived from this software without specific prior written permission.
00030 //
00031 // This software is provided by the copyright holders and contributors "as is" and
00032 // any express or implied warranties, including, but not limited to, the implied
00033 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00034 // In no event shall the Intel Corporation or contributors be liable for any direct,
00035 // indirect, incidental, special, exemplary, or consequential damages
00036 // (including, but not limited to, procurement of substitute goods or services;
00037 // loss of use, data, or profits; or business interruption) however caused
00038 // and on any theory of liability, whether in contract, strict liability,
00039 // or tort (including negligence or otherwise) arising in any way out of
00040 // the use of this software, even if advised of the possibility of such damage.
00041 //
00042 //M*/
00043 
00044 /* ////////////////////////////////////////////////////////////////////
00045 //
00046 //  Arithmetic and logical operations: +, -, *, /, &, |, ^, ~, abs ...
00047 //
00048 // */
00049 
00050 #include "precomp.hpp"
00051 #include "opencl_kernels_core.hpp"
00052 
00053 namespace cv
00054 {
00055 
00056 /****************************************************************************************\
00057 *                                   logical operations                                   *
00058 \****************************************************************************************/
00059 
00060 void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize )
00061 {
00062     int scn = (int)sc.total(), cn = CV_MAT_CN(buftype);
00063     size_t esz = CV_ELEM_SIZE(buftype);
00064     getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0);
00065     // unroll the scalar
00066     if( scn < cn )
00067     {
00068         CV_Assert( scn == 1 );
00069         size_t esz1 = CV_ELEM_SIZE1(buftype);
00070         for( size_t i = esz1; i < esz; i++ )
00071             scbuf[i] = scbuf[i - esz1];
00072     }
00073     for( size_t i = esz; i < blocksize*esz; i++ )
00074         scbuf[i] = scbuf[i - esz];
00075 }
00076 
00077 
00078 enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
00079        OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
00080        OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14,
00081        OCL_OP_RDIV_SCALE=15 };
00082 
00083 #ifdef HAVE_OPENCL
00084 
00085 static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF",
00086     "OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE",
00087     "OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", "OP_RDIV_SCALE", 0 };
00088 
00089 static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
00090                           InputArray _mask, bool bitwise, int oclop, bool haveScalar )
00091 {
00092     bool haveMask = !_mask.empty();
00093     int srctype = _src1.type();
00094     int srcdepth = CV_MAT_DEPTH(srctype);
00095     int cn = CV_MAT_CN(srctype);
00096 
00097     const ocl::Device d = ocl::Device::getDefault();
00098     bool doubleSupport = d.doubleFPConfig() > 0;
00099     if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) ||
00100             (!doubleSupport && srcdepth == CV_64F && !bitwise))
00101         return false;
00102 
00103     char opts[1024];
00104     int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
00105     int scalarcn = kercn == 3 ? 4 : kercn;
00106     int rowsPerWI = d.isIntel() ? 4 : 1;
00107 
00108     sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d",
00109             haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop],
00110             bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) :
00111                 ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "",
00112             bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) :
00113                 ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)),
00114             bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) :
00115                 ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)),
00116             kercn, rowsPerWI);
00117 
00118     ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
00119     if (k.empty())
00120         return false;
00121 
00122     UMat src1 = _src1.getUMat(), src2;
00123     UMat dst = _dst.getUMat(), mask = _mask.getUMat();
00124 
00125     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn);
00126     ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) :
00127                                        ocl::KernelArg::WriteOnly(dst, cn, kercn);
00128     ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
00129 
00130     if( haveScalar )
00131     {
00132         size_t esz = CV_ELEM_SIZE1(srctype)*scalarcn;
00133         double buf[4] = {0,0,0,0};
00134 
00135         if( oclop != OCL_OP_NOT )
00136         {
00137             Mat src2sc = _src2.getMat();
00138             convertAndUnrollScalar(src2sc, srctype, (uchar*)buf, 1);
00139         }
00140 
00141         ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz);
00142 
00143         if( !haveMask )
00144             k.args(src1arg, dstarg, scalararg);
00145         else
00146             k.args(src1arg, maskarg, dstarg, scalararg);
00147     }
00148     else
00149     {
00150         src2 = _src2.getUMat();
00151         ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn);
00152 
00153         if( !haveMask )
00154             k.args(src1arg, src2arg, dstarg);
00155         else
00156             k.args(src1arg, src2arg, maskarg, dstarg);
00157     }
00158 
00159     size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
00160     return k.run(2, globalsize, 0, false);
00161 }
00162 
00163 #endif
00164 
00165 static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
00166                        InputArray _mask, const BinaryFuncC* tab,
00167                        bool bitwise, int oclop )
00168 {
00169     const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
00170     int kind1 = psrc1->kind(), kind2 = psrc2->kind();
00171     int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
00172     int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
00173     int dims1 = psrc1->dims(), dims2 = psrc2->dims();
00174     Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
00175     Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
00176 #ifdef HAVE_OPENCL
00177     bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) &&
00178             dims1 <= 2 && dims2 <= 2;
00179 #endif
00180     bool haveMask = !_mask.empty(), haveScalar = false;
00181     BinaryFuncC func;
00182 
00183     if( dims1 <= 2 && dims2 <= 2 && kind1 == kind2 && sz1 == sz2 && type1 == type2 && !haveMask )
00184     {
00185         _dst.create(sz1, type1);
00186 #ifdef HAVE_OPENCL
00187         CV_OCL_RUN(use_opencl,
00188                    ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, false))
00189 #endif
00190 
00191         if( bitwise )
00192         {
00193             func = *tab;
00194             cn = (int)CV_ELEM_SIZE(type1);
00195         }
00196         else
00197             func = tab[depth1];
00198 
00199         Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
00200         Size sz = getContinuousSize(src1, src2, dst);
00201         size_t len = sz.width*(size_t)cn;
00202         if( len == (size_t)(int)len )
00203         {
00204             sz.width = (int)len;
00205             func(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, 0);
00206             return;
00207         }
00208     }
00209 
00210     if( oclop == OCL_OP_NOT )
00211         haveScalar = true;
00212     else if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 ||
00213         !psrc1->sameSize(*psrc2) || type1 != type2 )
00214     {
00215         if( checkScalar(*psrc1, type2, kind1, kind2) )
00216         {
00217             // src1 is a scalar; swap it with src2
00218             swap(psrc1, psrc2);
00219             swap(type1, type2);
00220             swap(depth1, depth2);
00221             swap(cn, cn2);
00222             swap(sz1, sz2);
00223         }
00224         else if( !checkScalar(*psrc2, type1, kind2, kind1) )
00225             CV_Error( CV_StsUnmatchedSizes,
00226                       "The operation is neither 'array op array' (where arrays have the same size and type), "
00227                       "nor 'array op scalar', nor 'scalar op array'" );
00228         haveScalar = true;
00229     }
00230     else
00231     {
00232         CV_Assert( psrc1->sameSize(*psrc2) && type1 == type2 );
00233     }
00234 
00235     size_t esz = CV_ELEM_SIZE(type1);
00236     size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz;
00237     BinaryFunc copymask = 0;
00238     bool reallocate = false;
00239 
00240     if( haveMask )
00241     {
00242         int mtype = _mask.type();
00243         CV_Assert( (mtype == CV_8U || mtype == CV_8S) && _mask.sameSize(*psrc1));
00244         copymask = getCopyMaskFunc(esz);
00245         reallocate = !_dst.sameSize(*psrc1) || _dst.type() != type1;
00246     }
00247 
00248     AutoBuffer<uchar> _buf;
00249     uchar *scbuf = 0, *maskbuf = 0;
00250 
00251     _dst.createSameSize(*psrc1, type1);
00252     // if this is mask operation and dst has been reallocated,
00253     // we have to clear the destination
00254     if( haveMask && reallocate )
00255         _dst.setTo(0.);
00256 #ifdef HAVE_OPENCL
00257     CV_OCL_RUN(use_opencl,
00258                ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, haveScalar))
00259 #endif
00260 
00261 
00262     Mat src1 = psrc1->getMat(), src2 = psrc2->getMat();
00263     Mat dst = _dst.getMat(), mask = _mask.getMat();
00264 
00265     if( bitwise )
00266     {
00267         func = *tab;
00268         cn = (int)esz;
00269     }
00270     else
00271         func = tab[depth1];
00272 
00273     if( !haveScalar )
00274     {
00275         const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
00276         uchar* ptrs[4];
00277 
00278         NAryMatIterator it(arrays, ptrs);
00279         size_t total = it.size, blocksize = total;
00280 
00281         if( blocksize*cn > INT_MAX )
00282             blocksize = INT_MAX/cn;
00283 
00284         if( haveMask )
00285         {
00286             blocksize = std::min(blocksize, blocksize0);
00287             _buf.allocate(blocksize*esz);
00288             maskbuf = _buf;
00289         }
00290 
00291         for( size_t i = 0; i < it.nplanes; i++, ++it )
00292         {
00293             for( size_t j = 0; j < total; j += blocksize )
00294             {
00295                 int bsz = (int)MIN(total - j, blocksize);
00296 
00297                 func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, bsz*cn, 1, 0 );
00298                 if( haveMask )
00299                 {
00300                     copymask( maskbuf, 0, ptrs[3], 0, ptrs[2], 0, Size(bsz, 1), &esz );
00301                     ptrs[3] += bsz;
00302                 }
00303 
00304                 bsz *= (int)esz;
00305                 ptrs[0] += bsz; ptrs[1] += bsz; ptrs[2] += bsz;
00306             }
00307         }
00308     }
00309     else
00310     {
00311         const Mat* arrays[] = { &src1, &dst, &mask, 0 };
00312         uchar* ptrs[3];
00313 
00314         NAryMatIterator it(arrays, ptrs);
00315         size_t total = it.size, blocksize = std::min(total, blocksize0);
00316 
00317         _buf.allocate(blocksize*(haveMask ? 2 : 1)*esz + 32);
00318         scbuf = _buf;
00319         maskbuf = alignPtr(scbuf + blocksize*esz, 16);
00320 
00321         convertAndUnrollScalar( src2, src1.type(), scbuf, blocksize);
00322 
00323         for( size_t i = 0; i < it.nplanes; i++, ++it )
00324         {
00325             for( size_t j = 0; j < total; j += blocksize )
00326             {
00327                 int bsz = (int)MIN(total - j, blocksize);
00328 
00329                 func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, bsz*cn, 1, 0 );
00330                 if( haveMask )
00331                 {
00332                     copymask( maskbuf, 0, ptrs[2], 0, ptrs[1], 0, Size(bsz, 1), &esz );
00333                     ptrs[2] += bsz;
00334                 }
00335 
00336                 bsz *= (int)esz;
00337                 ptrs[0] += bsz; ptrs[1] += bsz;
00338             }
00339         }
00340     }
00341 }
00342 
00343 static BinaryFuncC* getMaxTab()
00344 {
00345     static BinaryFuncC maxTab[] =
00346     {
00347         (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8s),
00348         (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16s),
00349         (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32s),
00350         (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32f), (BinaryFuncC)cv::hal::max64f,
00351         0
00352     };
00353 
00354     return maxTab;
00355 }
00356 
00357 static BinaryFuncC* getMinTab()
00358 {
00359     static BinaryFuncC minTab[] =
00360     {
00361         (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8s),
00362         (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16s),
00363         (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32s),
00364         (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32f), (BinaryFuncC)cv::hal::min64f,
00365         0
00366     };
00367 
00368     return minTab;
00369 }
00370 
00371 }
00372 
00373 void cv::bitwise_and(InputArray a, InputArray b, OutputArray c, InputArray mask)
00374 {
00375     BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::and8u);
00376     binary_op(a, b, c, mask, &f, true, OCL_OP_AND);
00377 }
00378 
00379 void cv::bitwise_or(InputArray a, InputArray b, OutputArray c, InputArray mask)
00380 {
00381     BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::or8u);
00382     binary_op(a, b, c, mask, &f, true, OCL_OP_OR);
00383 }
00384 
00385 void cv::bitwise_xor(InputArray a, InputArray b, OutputArray c, InputArray mask)
00386 {
00387     BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::xor8u);
00388     binary_op(a, b, c, mask, &f, true, OCL_OP_XOR);
00389 }
00390 
00391 void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask)
00392 {
00393     BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::not8u);
00394     binary_op(a, a, c, mask, &f, true, OCL_OP_NOT);
00395 }
00396 
00397 void cv::max( InputArray src1, InputArray src2, OutputArray dst )
00398 {
00399     binary_op(src1, src2, dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
00400 }
00401 
00402 void cv::min( InputArray src1, InputArray src2, OutputArray dst )
00403 {
00404     binary_op(src1, src2, dst, noArray(), getMinTab(), false, OCL_OP_MIN );
00405 }
00406 
00407 void cv::max(const Mat& src1, const Mat& src2, Mat& dst)
00408 {
00409     OutputArray _dst(dst);
00410     binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
00411 }
00412 
00413 void cv::min(const Mat& src1, const Mat& src2, Mat& dst)
00414 {
00415     OutputArray _dst(dst);
00416     binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
00417 }
00418 
00419 void cv::max(const UMat & src1, const UMat & src2, UMat & dst)
00420 {
00421     OutputArray _dst(dst);
00422     binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
00423 }
00424 
00425 void cv::min(const UMat & src1, const UMat & src2, UMat & dst)
00426 {
00427     OutputArray _dst(dst);
00428     binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
00429 }
00430 
00431 
00432 /****************************************************************************************\
00433 *                                      add/subtract                                      *
00434 \****************************************************************************************/
00435 
00436 namespace cv
00437 {
00438 
00439 static int actualScalarDepth(const double* data, int len)
00440 {
00441     int i = 0, minval = INT_MAX, maxval = INT_MIN;
00442     for(; i < len; ++i)
00443     {
00444         int ival = cvRound(data[i]);
00445         if( ival != data[i] )
00446             break;
00447         minval = MIN(minval, ival);
00448         maxval = MAX(maxval, ival);
00449     }
00450     return i < len ? CV_64F :
00451         minval >= 0 && maxval <= (int)UCHAR_MAX ? CV_8U :
00452         minval >= (int)SCHAR_MIN && maxval <= (int)SCHAR_MAX ? CV_8S :
00453         minval >= 0 && maxval <= (int)USHRT_MAX ? CV_16U :
00454         minval >= (int)SHRT_MIN && maxval <= (int)SHRT_MAX ? CV_16S :
00455         CV_32S;
00456 }
00457 
00458 #ifdef HAVE_OPENCL
00459 
00460 static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
00461                           InputArray _mask, int wtype,
00462                           void* usrdata, int oclop,
00463                           bool haveScalar )
00464 {
00465     const ocl::Device d = ocl::Device::getDefault();
00466     bool doubleSupport = d.doubleFPConfig() > 0;
00467     int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
00468     bool haveMask = !_mask.empty();
00469 
00470     if ( (haveMask || haveScalar) && cn > 4 )
00471         return false;
00472 
00473     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype));
00474     if (!doubleSupport)
00475         wdepth = std::min(wdepth, CV_32F);
00476 
00477     wtype = CV_MAKETYPE(wdepth, cn);
00478     int type2 = haveScalar ? wtype : _src2.type(), depth2 = CV_MAT_DEPTH(type2);
00479     if (!doubleSupport && (depth2 == CV_64F || depth1 == CV_64F))
00480         return false;
00481 
00482     int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
00483     int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = d.isIntel() ? 4 : 1;
00484 
00485     char cvtstr[4][32], opts[1024];
00486     sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s "
00487             "-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s "
00488             "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s",
00489             (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
00490             oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
00491             ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
00492             ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
00493             ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
00494             ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)),
00495             ocl::typeToStr(wdepth), wdepth,
00496             ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
00497             ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
00498             ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]),
00499             doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn, rowsPerWI,
00500             oclop == OCL_OP_ABSDIFF && wdepth == CV_32S && ddepth == wdepth ?
00501             ocl::convertTypeStr(CV_8U, ddepth, kercn, cvtstr[3]) : "noconvert");
00502 
00503     size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
00504     const uchar* usrdata_p = (const uchar*)usrdata;
00505     const double* usrdata_d = (const double*)usrdata;
00506     float usrdata_f[3];
00507     int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE ||
00508         oclop == OCL_OP_RDIV_SCALE || oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0;
00509     if( n > 0 && wdepth == CV_32F )
00510     {
00511         for( i = 0; i < n; i++ )
00512             usrdata_f[i] = (float)usrdata_d[i];
00513         usrdata_p = (const uchar*)usrdata_f;
00514     }
00515 
00516     ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
00517     if (k.empty())
00518         return false;
00519 
00520     UMat src1 = _src1.getUMat(), src2;
00521     UMat dst = _dst.getUMat(), mask = _mask.getUMat();
00522 
00523     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn);
00524     ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) :
00525                                        ocl::KernelArg::WriteOnly(dst, cn, kercn);
00526     ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
00527 
00528     if( haveScalar )
00529     {
00530         size_t esz = CV_ELEM_SIZE1(wtype)*scalarcn;
00531         double buf[4]={0,0,0,0};
00532         Mat src2sc = _src2.getMat();
00533 
00534         if( !src2sc.empty() )
00535             convertAndUnrollScalar(src2sc, wtype, (uchar*)buf, 1);
00536         ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz);
00537 
00538         if( !haveMask )
00539         {
00540             if(n == 0)
00541                 k.args(src1arg, dstarg, scalararg);
00542             else if(n == 1)
00543                 k.args(src1arg, dstarg, scalararg,
00544                        ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz));
00545             else
00546                 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
00547         }
00548         else
00549             k.args(src1arg, maskarg, dstarg, scalararg);
00550     }
00551     else
00552     {
00553         src2 = _src2.getUMat();
00554         ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn);
00555 
00556         if( !haveMask )
00557         {
00558             if (n == 0)
00559                 k.args(src1arg, src2arg, dstarg);
00560             else if (n == 1)
00561                 k.args(src1arg, src2arg, dstarg,
00562                        ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz));
00563             else if (n == 3)
00564                 k.args(src1arg, src2arg, dstarg,
00565                        ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz),
00566                        ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz),
00567                        ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz));
00568             else
00569                 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
00570         }
00571         else
00572             k.args(src1arg, src2arg, maskarg, dstarg);
00573     }
00574 
00575     size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
00576     return k.run(2, globalsize, NULL, false);
00577 }
00578 
00579 #endif
00580 
00581 static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
00582                       InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false,
00583                       void* usrdata=0, int oclop=-1 )
00584 {
00585     const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
00586     int kind1 = psrc1->kind(), kind2 = psrc2->kind();
00587     bool haveMask = !_mask.empty();
00588     bool reallocate = false;
00589     int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
00590     int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
00591     int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims();
00592     Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
00593     Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
00594 #ifdef HAVE_OPENCL
00595     bool use_opencl = OCL_PERFORMANCE_CHECK(_dst.isUMat()) && dims1 <= 2 && dims2 <= 2;
00596 #endif
00597     bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
00598     bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
00599 
00600     if( (kind1 == kind2 || cn == 1) && sz1 == sz2 && dims1 <= 2 && dims2 <= 2 && type1 == type2 &&
00601         !haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == depth1)) ||
00602                        (_dst.fixedType() && _dst.type() == type1)) &&
00603         ((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) )
00604     {
00605         _dst.createSameSize(*psrc1, type1);
00606 #ifdef HAVE_OPENCL
00607         CV_OCL_RUN(use_opencl,
00608             ocl_arithm_op(*psrc1, *psrc2, _dst, _mask,
00609                           (!usrdata ? type1 : std::max(depth1, CV_32F)),
00610                           usrdata, oclop, false))
00611 #endif
00612 
00613         Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
00614         Size sz = getContinuousSize(src1, src2, dst, src1.channels());
00615         tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata);
00616         return;
00617     }
00618 
00619     bool haveScalar = false, swapped12 = false;
00620 
00621     if( dims1 != dims2 || sz1 != sz2 || cn != cn2 ||
00622         (kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) ||
00623         (kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) )
00624     {
00625         if( checkScalar(*psrc1, type2, kind1, kind2) )
00626         {
00627             // src1 is a scalar; swap it with src2
00628             swap(psrc1, psrc2);
00629             swap(sz1, sz2);
00630             swap(type1, type2);
00631             swap(depth1, depth2);
00632             swap(cn, cn2);
00633             swap(dims1, dims2);
00634             swapped12 = true;
00635             if( oclop == OCL_OP_SUB )
00636                 oclop = OCL_OP_RSUB;
00637             if ( oclop == OCL_OP_DIV_SCALE )
00638                 oclop = OCL_OP_RDIV_SCALE;
00639         }
00640         else if( !checkScalar(*psrc2, type1, kind2, kind1) )
00641             CV_Error( CV_StsUnmatchedSizes,
00642                      "The operation is neither 'array op array' "
00643                      "(where arrays have the same size and the same number of channels), "
00644                      "nor 'array op scalar', nor 'scalar op array'" );
00645         haveScalar = true;
00646         CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4));
00647 
00648         if (!muldiv)
00649         {
00650             Mat sc = psrc2->getMat();
00651             depth2 = actualScalarDepth(sc.ptr<double>(), cn);
00652             if( depth2 == CV_64F && (depth1 < CV_32S || depth1 == CV_32F) )
00653                 depth2 = CV_32F;
00654         }
00655         else
00656             depth2 = CV_64F;
00657     }
00658 
00659     if( dtype < 0 )
00660     {
00661         if( _dst.fixedType() )
00662             dtype = _dst.type();
00663         else
00664         {
00665             if( !haveScalar && type1 != type2 )
00666                 CV_Error(CV_StsBadArg,
00667                      "When the input arrays in add/subtract/multiply/divide functions have different types, "
00668                      "the output array type must be explicitly specified");
00669             dtype = type1;
00670         }
00671     }
00672     dtype = CV_MAT_DEPTH(dtype);
00673 
00674     if( depth1 == depth2 && dtype == depth1 )
00675         wtype = dtype;
00676     else if( !muldiv )
00677     {
00678         wtype = depth1 <= CV_8S && depth2 <= CV_8S ? CV_16S :
00679                 depth1 <= CV_32S && depth2 <= CV_32S ? CV_32S : std::max(depth1, depth2);
00680         wtype = std::max(wtype, dtype);
00681 
00682         // when the result of addition should be converted to an integer type,
00683         // and just one of the input arrays is floating-point, it makes sense to convert that input to integer type before the operation,
00684         // instead of converting the other input to floating-point and then converting the operation result back to integers.
00685         if( dtype < CV_32F && (depth1 < CV_32F || depth2 < CV_32F) )
00686             wtype = CV_32S;
00687     }
00688     else
00689     {
00690         wtype = std::max(depth1, std::max(depth2, CV_32F));
00691         wtype = std::max(wtype, dtype);
00692     }
00693 
00694     dtype = CV_MAKETYPE(dtype, cn);
00695     wtype = CV_MAKETYPE(wtype, cn);
00696 
00697     if( haveMask )
00698     {
00699         int mtype = _mask.type();
00700         CV_Assert( (mtype == CV_8UC1 || mtype == CV_8SC1) && _mask.sameSize(*psrc1) );
00701         reallocate = !_dst.sameSize(*psrc1) || _dst.type() != dtype;
00702     }
00703 
00704     _dst.createSameSize(*psrc1, dtype);
00705     if( reallocate )
00706         _dst.setTo(0.);
00707 
00708 #ifdef HAVE_OPENCL
00709     CV_OCL_RUN(use_opencl,
00710                ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype,
00711                usrdata, oclop, haveScalar))
00712 #endif
00713 
00714     BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype);
00715     BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype);
00716     BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);
00717 
00718     size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2);
00719     size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
00720     size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz;
00721     BinaryFunc copymask = getCopyMaskFunc(dsz);
00722     Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(), mask = _mask.getMat();
00723 
00724     AutoBuffer<uchar> _buf;
00725     uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0;
00726     size_t bufesz = (cvtsrc1 ? wsz : 0) +
00727                     (cvtsrc2 || haveScalar ? wsz : 0) +
00728                     (cvtdst ? wsz : 0) +
00729                     (haveMask ? dsz : 0);
00730     BinaryFuncC func = tab[CV_MAT_DEPTH(wtype)];
00731 
00732     if( !haveScalar )
00733     {
00734         const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
00735         uchar* ptrs[4];
00736 
00737         NAryMatIterator it(arrays, ptrs);
00738         size_t total = it.size, blocksize = total;
00739 
00740         if( haveMask || cvtsrc1 || cvtsrc2 || cvtdst )
00741             blocksize = std::min(blocksize, blocksize0);
00742 
00743         _buf.allocate(bufesz*blocksize + 64);
00744         buf = _buf;
00745         if( cvtsrc1 )
00746             buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
00747         if( cvtsrc2 )
00748             buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
00749         wbuf = maskbuf = buf;
00750         if( cvtdst )
00751             buf = alignPtr(buf + blocksize*wsz, 16);
00752         if( haveMask )
00753             maskbuf = buf;
00754 
00755         for( size_t i = 0; i < it.nplanes; i++, ++it )
00756         {
00757             for( size_t j = 0; j < total; j += blocksize )
00758             {
00759                 int bsz = (int)MIN(total - j, blocksize);
00760                 Size bszn(bsz*cn, 1);
00761                 const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1];
00762                 uchar* dptr = ptrs[2];
00763                 if( cvtsrc1 )
00764                 {
00765                     cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
00766                     sptr1 = buf1;
00767                 }
00768                 if( ptrs[0] == ptrs[1] )
00769                     sptr2 = sptr1;
00770                 else if( cvtsrc2 )
00771                 {
00772                     cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 );
00773                     sptr2 = buf2;
00774                 }
00775 
00776                 if( !haveMask && !cvtdst )
00777                     func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
00778                 else
00779                 {
00780                     func( sptr1, 1, sptr2, 1, wbuf, 0, bszn.width, bszn.height, usrdata );
00781                     if( !haveMask )
00782                         cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
00783                     else if( !cvtdst )
00784                     {
00785                         copymask( wbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
00786                         ptrs[3] += bsz;
00787                     }
00788                     else
00789                     {
00790                         cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
00791                         copymask( maskbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
00792                         ptrs[3] += bsz;
00793                     }
00794                 }
00795                 ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz;
00796             }
00797         }
00798     }
00799     else
00800     {
00801         const Mat* arrays[] = { &src1, &dst, &mask, 0 };
00802         uchar* ptrs[3];
00803 
00804         NAryMatIterator it(arrays, ptrs);
00805         size_t total = it.size, blocksize = std::min(total, blocksize0);
00806 
00807         _buf.allocate(bufesz*blocksize + 64);
00808         buf = _buf;
00809         if( cvtsrc1 )
00810             buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
00811         buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16);
00812         wbuf = maskbuf = buf;
00813         if( cvtdst )
00814             buf = alignPtr(buf + blocksize*wsz, 16);
00815         if( haveMask )
00816             maskbuf = buf;
00817 
00818         convertAndUnrollScalar( src2, wtype, buf2, blocksize);
00819 
00820         for( size_t i = 0; i < it.nplanes; i++, ++it )
00821         {
00822             for( size_t j = 0; j < total; j += blocksize )
00823             {
00824                 int bsz = (int)MIN(total - j, blocksize);
00825                 Size bszn(bsz*cn, 1);
00826                 const uchar *sptr1 = ptrs[0];
00827                 const uchar* sptr2 = buf2;
00828                 uchar* dptr = ptrs[1];
00829 
00830                 if( cvtsrc1 )
00831                 {
00832                     cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
00833                     sptr1 = buf1;
00834                 }
00835 
00836                 if( swapped12 )
00837                     std::swap(sptr1, sptr2);
00838 
00839                 if( !haveMask && !cvtdst )
00840                     func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
00841                 else
00842                 {
00843                     func( sptr1, 1, sptr2, 1, wbuf, 1, bszn.width, bszn.height, usrdata );
00844                     if( !haveMask )
00845                         cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
00846                     else if( !cvtdst )
00847                     {
00848                         copymask( wbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
00849                         ptrs[2] += bsz;
00850                     }
00851                     else
00852                     {
00853                         cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
00854                         copymask( maskbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
00855                         ptrs[2] += bsz;
00856                     }
00857                 }
00858                 ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz;
00859             }
00860         }
00861     }
00862 }
00863 
00864 static BinaryFuncC* getAddTab()
00865 {
00866     static BinaryFuncC addTab[] =
00867     {
00868         (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8s),
00869         (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16s),
00870         (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32s),
00871         (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32f), (BinaryFuncC)cv::hal::add64f,
00872         0
00873     };
00874 
00875     return addTab;
00876 }
00877 
00878 static BinaryFuncC* getSubTab()
00879 {
00880     static BinaryFuncC subTab[] =
00881     {
00882         (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8s),
00883         (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16s),
00884         (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32s),
00885         (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32f), (BinaryFuncC)cv::hal::sub64f,
00886         0
00887     };
00888 
00889     return subTab;
00890 }
00891 
00892 static BinaryFuncC* getAbsDiffTab()
00893 {
00894     static BinaryFuncC absDiffTab[] =
00895     {
00896         (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8s),
00897         (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16s),
00898         (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32s),
00899         (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32f), (BinaryFuncC)cv::hal::absdiff64f,
00900         0
00901     };
00902 
00903     return absDiffTab;
00904 }
00905 
00906 }
00907 
00908 void cv::add( InputArray src1, InputArray src2, OutputArray dst,
00909           InputArray mask, int dtype )
00910 {
00911     arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD );
00912 }
00913 
00914 void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst,
00915                InputArray mask, int dtype )
00916 {
00917 #ifdef HAVE_TEGRA_OPTIMIZATION
00918     if (tegra::useTegra())
00919     {
00920         int kind1 = _src1.kind(), kind2 = _src2.kind();
00921         Mat src1 = _src1.getMat(), src2 = _src2.getMat();
00922         bool src1Scalar = checkScalar(src1, _src2.type(), kind1, kind2);
00923         bool src2Scalar = checkScalar(src2, _src1.type(), kind2, kind1);
00924 
00925         if (!src1Scalar && !src2Scalar &&
00926             src1.depth() == CV_8U && src2.type() == src1.type() &&
00927             src1.dims == 2 && src2.size() == src1.size() &&
00928             mask.empty())
00929         {
00930             if (dtype < 0)
00931             {
00932                 if (_dst.fixedType())
00933                 {
00934                     dtype = _dst.depth();
00935                 }
00936                 else
00937                 {
00938                     dtype = src1.depth();
00939                 }
00940             }
00941 
00942             dtype = CV_MAT_DEPTH(dtype);
00943 
00944             if (!_dst.fixedType() || dtype == _dst.depth())
00945             {
00946                 _dst.create(src1.size(), CV_MAKE_TYPE(dtype, src1.channels()));
00947 
00948                 if (dtype == CV_16S)
00949                 {
00950                     Mat dst = _dst.getMat();
00951                     if(tegra::subtract_8u8u16s(src1, src2, dst))
00952                         return;
00953                 }
00954                 else if (dtype == CV_32F)
00955                 {
00956                     Mat dst = _dst.getMat();
00957                     if(tegra::subtract_8u8u32f(src1, src2, dst))
00958                         return;
00959                 }
00960                 else if (dtype == CV_8S)
00961                 {
00962                     Mat dst = _dst.getMat();
00963                     if(tegra::subtract_8u8u8s(src1, src2, dst))
00964                         return;
00965                 }
00966             }
00967         }
00968     }
00969 #endif
00970     arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB );
00971 }
00972 
00973 void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
00974 {
00975     arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF);
00976 }
00977 
00978 /****************************************************************************************\
00979 *                                    multiply/divide                                     *
00980 \****************************************************************************************/
00981 
00982 namespace cv
00983 {
00984 
00985 static BinaryFuncC* getMulTab()
00986 {
00987     static BinaryFuncC mulTab[] =
00988     {
00989         (BinaryFuncC)cv::hal::mul8u, (BinaryFuncC)cv::hal::mul8s, (BinaryFuncC)cv::hal::mul16u,
00990         (BinaryFuncC)cv::hal::mul16s, (BinaryFuncC)cv::hal::mul32s, (BinaryFuncC)cv::hal::mul32f,
00991         (BinaryFuncC)cv::hal::mul64f, 0
00992     };
00993 
00994     return mulTab;
00995 }
00996 
00997 static BinaryFuncC* getDivTab()
00998 {
00999     static BinaryFuncC divTab[] =
01000     {
01001         (BinaryFuncC)cv::hal::div8u, (BinaryFuncC)cv::hal::div8s, (BinaryFuncC)cv::hal::div16u,
01002         (BinaryFuncC)cv::hal::div16s, (BinaryFuncC)cv::hal::div32s, (BinaryFuncC)cv::hal::div32f,
01003         (BinaryFuncC)cv::hal::div64f, 0
01004     };
01005 
01006     return divTab;
01007 }
01008 
01009 static BinaryFuncC* getRecipTab()
01010 {
01011     static BinaryFuncC recipTab[] =
01012     {
01013         (BinaryFuncC)cv::hal::recip8u, (BinaryFuncC)cv::hal::recip8s, (BinaryFuncC)cv::hal::recip16u,
01014         (BinaryFuncC)cv::hal::recip16s, (BinaryFuncC)cv::hal::recip32s, (BinaryFuncC)cv::hal::recip32f,
01015         (BinaryFuncC)cv::hal::recip64f, 0
01016     };
01017 
01018     return recipTab;
01019 }
01020 
01021 }
01022 
01023 void cv::multiply(InputArray src1, InputArray src2,
01024                   OutputArray dst, double scale, int dtype)
01025 {
01026     arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(),
01027               true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE);
01028 }
01029 
01030 void cv::divide(InputArray src1, InputArray src2,
01031                 OutputArray dst, double scale, int dtype)
01032 {
01033     arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
01034 }
01035 
01036 void cv::divide(double scale, InputArray src2,
01037                 OutputArray dst, int dtype)
01038 {
01039     arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
01040 }
01041 
01042 /****************************************************************************************\
01043 *                                      addWeighted                                       *
01044 \****************************************************************************************/
01045 
01046 namespace cv
01047 {
01048 
01049 static BinaryFuncC* getAddWeightedTab()
01050 {
01051     static BinaryFuncC addWeightedTab[] =
01052     {
01053         (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16u),
01054         (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted32s), (BinaryFuncC)cv::hal::addWeighted32f,
01055         (BinaryFuncC)cv::hal::addWeighted64f, 0
01056     };
01057 
01058     return addWeightedTab;
01059 }
01060 
01061 }
01062 
01063 void cv::addWeighted( InputArray src1, double alpha, InputArray src2,
01064                       double beta, double gamma, OutputArray dst, int dtype )
01065 {
01066     double scalars[] = {alpha, beta, gamma};
01067     arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW);
01068 }
01069 
01070 
01071 /****************************************************************************************\
01072 *                                          compare                                       *
01073 \****************************************************************************************/
01074 
01075 namespace cv
01076 {
01077 
01078 static BinaryFuncC getCmpFunc(int depth)
01079 {
01080     static BinaryFuncC cmpTab[] =
01081     {
01082         (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8s),
01083         (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16s),
01084         (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32s),
01085         (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32f), (BinaryFuncC)cv::hal::cmp64f,
01086         0
01087     };
01088 
01089     return cmpTab[depth];
01090 }
01091 
01092 static double getMinVal(int depth)
01093 {
01094     static const double tab[] = {0, -128, 0, -32768, INT_MIN, -FLT_MAX, -DBL_MAX, 0};
01095     return tab[depth];
01096 }
01097 
01098 static double getMaxVal(int depth)
01099 {
01100     static const double tab[] = {255, 127, 65535, 32767, INT_MAX, FLT_MAX, DBL_MAX, 0};
01101     return tab[depth];
01102 }
01103 
01104 #ifdef HAVE_OPENCL
01105 
01106 static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op, bool haveScalar)
01107 {
01108     const ocl::Device& dev = ocl::Device::getDefault();
01109     bool doubleSupport = dev.doubleFPConfig() > 0;
01110     int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1),
01111             type2 = _src2.type(), depth2 = CV_MAT_DEPTH(type2);
01112 
01113     if (!doubleSupport && depth1 == CV_64F)
01114         return false;
01115 
01116     if (!haveScalar && (!_src1.sameSize(_src2) || type1 != type2))
01117             return false;
01118 
01119     int kercn = haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1;
01120     // Workaround for bug with "?:" operator in AMD OpenCL compiler
01121     if (depth1 >= CV_16U)
01122         kercn = 1;
01123 
01124     int scalarcn = kercn == 3 ? 4 : kercn;
01125     const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" };
01126     char cvt[40];
01127 
01128     String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d"
01129                          " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s"
01130                          " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s",
01131                          haveScalar ? "UNARY_OP" : "BINARY_OP",
01132                          ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)),
01133                          ocl::typeToStr(CV_8UC(kercn)), kercn,
01134                          ocl::convertTypeStr(depth1, CV_8U, kercn, cvt),
01135                          operationMap[op], ocl::typeToStr(depth1),
01136                          ocl::typeToStr(depth1), ocl::typeToStr(CV_8U),
01137                          ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)), rowsPerWI,
01138                          doubleSupport ? " -D DOUBLE_SUPPORT" : "");
01139 
01140     ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
01141     if (k.empty())
01142         return false;
01143 
01144     UMat src1 = _src1.getUMat();
01145     Size size = src1.size();
01146     _dst.create(size, CV_8UC(cn));
01147     UMat dst = _dst.getUMat();
01148 
01149     if (haveScalar)
01150     {
01151         size_t esz = CV_ELEM_SIZE1(type1) * scalarcn;
01152         double buf[4] = { 0, 0, 0, 0 };
01153         Mat src2 = _src2.getMat();
01154 
01155         if( depth1 > CV_32S )
01156             convertAndUnrollScalar( src2, depth1, (uchar *)buf, kercn );
01157         else
01158         {
01159             double fval = 0;
01160             getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar *)&fval, 1, Size(1, 1), 0);
01161             if( fval < getMinVal(depth1) )
01162                 return dst.setTo(Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0)), true;
01163 
01164             if( fval > getMaxVal(depth1) )
01165                 return dst.setTo(Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0)), true;
01166 
01167             int ival = cvRound(fval);
01168             if( fval != ival )
01169             {
01170                 if( op == CMP_LT || op == CMP_GE )
01171                     ival = cvCeil(fval);
01172                 else if( op == CMP_LE || op == CMP_GT )
01173                     ival = cvFloor(fval);
01174                 else
01175                     return dst.setTo(Scalar::all(op == CMP_NE ? 255 : 0)), true;
01176             }
01177             convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, (uchar *)buf, kercn);
01178         }
01179 
01180         ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz);
01181 
01182         k.args(ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn),
01183                ocl::KernelArg::WriteOnly(dst, cn, kercn), scalararg);
01184     }
01185     else
01186     {
01187         UMat src2 = _src2.getUMat();
01188 
01189         k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
01190                ocl::KernelArg::ReadOnlyNoSize(src2),
01191                ocl::KernelArg::WriteOnly(dst, cn, kercn));
01192     }
01193 
01194     size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
01195     return k.run(2, globalsize, NULL, false);
01196 }
01197 
01198 #endif
01199 
01200 }
01201 
01202 void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
01203 {
01204     CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ ||
01205                op == CMP_NE || op == CMP_GE || op == CMP_GT );
01206 
01207     bool haveScalar = false;
01208 
01209     if ((_src1.isMatx() + _src2.isMatx()) == 1
01210             || !_src1.sameSize(_src2)
01211             || _src1.type() != _src2.type())
01212     {
01213         if (checkScalar(_src1, _src2.type(), _src1.kind(), _src2.kind()))
01214         {
01215             op = op == CMP_LT ? CMP_GT : op == CMP_LE ? CMP_GE :
01216                 op == CMP_GE ? CMP_LE : op == CMP_GT ? CMP_LT : op;
01217             // src1 is a scalar; swap it with src2
01218             compare(_src2, _src1, _dst, op);
01219             return;
01220         }
01221         else if( !checkScalar(_src2, _src1.type(), _src2.kind(), _src1.kind()) )
01222             CV_Error( CV_StsUnmatchedSizes,
01223                      "The operation is neither 'array op array' (where arrays have the same size and the same type), "
01224                      "nor 'array op scalar', nor 'scalar op array'" );
01225         haveScalar = true;
01226     }
01227 
01228 #ifdef HAVE_OPENCL
01229     CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
01230                ocl_compare(_src1, _src2, _dst, op, haveScalar))
01231 #endif
01232 
01233     int kind1 = _src1.kind(), kind2 = _src2.kind();
01234     Mat src1 = _src1.getMat(), src2 = _src2.getMat();
01235 
01236     if( kind1 == kind2 && src1.dims <= 2 && src2.dims <= 2 && src1.size() == src2.size() && src1.type() == src2.type() )
01237     {
01238         int cn = src1.channels();
01239         _dst.create(src1.size(), CV_8UC(cn));
01240         Mat dst = _dst.getMat();
01241         Size sz = getContinuousSize(src1, src2, dst, src1.channels());
01242         getCmpFunc(src1.depth())(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, &op);
01243         return;
01244     }
01245 
01246     int cn = src1.channels(), depth1 = src1.depth(), depth2 = src2.depth();
01247 
01248     _dst.create(src1.dims, src1.size, CV_8UC(cn));
01249     src1 = src1.reshape(1); src2 = src2.reshape(1);
01250     Mat dst = _dst.getMat().reshape(1);
01251 
01252     size_t esz = src1.elemSize();
01253     size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
01254     BinaryFuncC func = getCmpFunc(depth1);
01255 
01256     if( !haveScalar )
01257     {
01258         const Mat* arrays[] = { &src1, &src2, &dst, 0 };
01259         uchar* ptrs[3];
01260 
01261         NAryMatIterator it(arrays, ptrs);
01262         size_t total = it.size;
01263 
01264         for( size_t i = 0; i < it.nplanes; i++, ++it )
01265             func( ptrs[0], 0, ptrs[1], 0, ptrs[2], 0, (int)total, 1, &op );
01266     }
01267     else
01268     {
01269         const Mat* arrays[] = { &src1, &dst, 0 };
01270         uchar* ptrs[2];
01271 
01272         NAryMatIterator it(arrays, ptrs);
01273         size_t total = it.size, blocksize = std::min(total, blocksize0);
01274 
01275         AutoBuffer<uchar>  _buf(blocksize*esz);
01276         uchar *buf = _buf;
01277 
01278         if( depth1 > CV_32S )
01279             convertAndUnrollScalar( src2, depth1, buf, blocksize );
01280         else
01281         {
01282             double fval=0;
01283             getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar*)&fval, 1, Size(1,1), 0);
01284             if( fval < getMinVal(depth1) )
01285             {
01286                 dst = Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0);
01287                 return;
01288             }
01289 
01290             if( fval > getMaxVal(depth1) )
01291             {
01292                 dst = Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0);
01293                 return;
01294             }
01295 
01296             int ival = cvRound(fval);
01297             if( fval != ival )
01298             {
01299                 if( op == CMP_LT || op == CMP_GE )
01300                     ival = cvCeil(fval);
01301                 else if( op == CMP_LE || op == CMP_GT )
01302                     ival = cvFloor(fval);
01303                 else
01304                 {
01305                     dst = Scalar::all(op == CMP_NE ? 255 : 0);
01306                     return;
01307                 }
01308             }
01309             convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, buf, blocksize);
01310         }
01311 
01312         for( size_t i = 0; i < it.nplanes; i++, ++it )
01313         {
01314             for( size_t j = 0; j < total; j += blocksize )
01315             {
01316                 int bsz = (int)MIN(total - j, blocksize);
01317                 func( ptrs[0], 0, buf, 0, ptrs[1], 0, bsz, 1, &op);
01318                 ptrs[0] += bsz*esz;
01319                 ptrs[1] += bsz;
01320             }
01321         }
01322     }
01323 }
01324 
01325 /****************************************************************************************\
01326 *                                        inRange                                         *
01327 \****************************************************************************************/
01328 
01329 namespace cv
01330 {
01331 
01332 template <typename T>
01333 struct InRange_SIMD
01334 {
01335     int operator () (const T *, const T *, const T *, uchar *, int) const
01336     {
01337         return 0;
01338     }
01339 };
01340 
01341 #if CV_SSE2
01342 
01343 template <>
01344 struct InRange_SIMD<uchar>
01345 {
01346     int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
01347                      uchar * dst, int len) const
01348     {
01349         int x = 0;
01350 
01351         if (USE_SSE2)
01352         {
01353             __m128i v_full = _mm_set1_epi8(-1), v_128 = _mm_set1_epi8(-128);
01354 
01355             for ( ; x <= len - 16; x += 16 )
01356             {
01357                 __m128i v_src = _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src1 + x)), v_128);
01358                 __m128i v_mask1 = _mm_cmpgt_epi8(_mm_add_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_128), v_src);
01359                 __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src3 + x)), v_128));
01360                 _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full));
01361             }
01362         }
01363 
01364         return x;
01365     }
01366 };
01367 
01368 template <>
01369 struct InRange_SIMD<schar>
01370 {
01371     int operator () (const schar * src1, const schar * src2, const schar * src3,
01372                      uchar * dst, int len) const
01373     {
01374         int x = 0;
01375 
01376         if (USE_SSE2)
01377         {
01378             __m128i v_full = _mm_set1_epi8(-1);
01379 
01380             for ( ; x <= len - 16; x += 16 )
01381             {
01382                 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x));
01383                 __m128i v_mask1 = _mm_cmpgt_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src);
01384                 __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_loadu_si128((const __m128i *)(src3 + x)));
01385                 _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full));
01386             }
01387         }
01388 
01389         return x;
01390     }
01391 };
01392 
01393 template <>
01394 struct InRange_SIMD<ushort>
01395 {
01396     int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
01397                      uchar * dst, int len) const
01398     {
01399         int x = 0;
01400 
01401         if (USE_SSE2)
01402         {
01403             __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1), v_32768 = _mm_set1_epi16(-32768);
01404 
01405             for ( ; x <= len - 8; x += 8 )
01406             {
01407                 __m128i v_src = _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src1 + x)), v_32768);
01408                 __m128i v_mask1 = _mm_cmpgt_epi16(_mm_add_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_32768), v_src);
01409                 __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src3 + x)), v_32768));
01410                 __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full);
01411                 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero));
01412             }
01413         }
01414 
01415         return x;
01416     }
01417 };
01418 
01419 template <>
01420 struct InRange_SIMD<short>
01421 {
01422     int operator () (const short * src1, const short * src2, const short * src3,
01423                      uchar * dst, int len) const
01424     {
01425         int x = 0;
01426 
01427         if (USE_SSE2)
01428         {
01429             __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1);
01430 
01431             for ( ; x <= len - 8; x += 8 )
01432             {
01433                 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x));
01434                 __m128i v_mask1 = _mm_cmpgt_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src);
01435                 __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_loadu_si128((const __m128i *)(src3 + x)));
01436                 __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full);
01437                 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero));
01438             }
01439         }
01440 
01441         return x;
01442     }
01443 };
01444 
01445 template <>
01446 struct InRange_SIMD<int>
01447 {
01448     int operator () (const int * src1, const int * src2, const int * src3,
01449                      uchar * dst, int len) const
01450     {
01451         int x = 0;
01452 
01453         if (USE_SSE2)
01454         {
01455             __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi32(-1);
01456 
01457             for ( ; x <= len - 8; x += 8 )
01458             {
01459                 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x));
01460                 __m128i v_res1 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src),
01461                     _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x))));
01462 
01463                 v_src = _mm_loadu_si128((const __m128i *)(src1 + x + 4));
01464                 __m128i v_res2 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x + 4)), v_src),
01465                     _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x + 4))));
01466 
01467                 __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(_mm_andnot_si128(v_res1, v_full), 16),
01468                                                 _mm_srli_epi32(_mm_andnot_si128(v_res2, v_full), 16));
01469                 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero));
01470             }
01471         }
01472 
01473         return x;
01474     }
01475 };
01476 
01477 template <>
01478 struct InRange_SIMD<float>
01479 {
01480     int operator () (const float * src1, const float * src2, const float * src3,
01481                      uchar * dst, int len) const
01482     {
01483         int x = 0;
01484 
01485         if (USE_SSE2)
01486         {
01487             __m128i v_zero = _mm_setzero_si128();
01488 
01489             for ( ; x <= len - 8; x += 8 )
01490             {
01491                 __m128 v_src = _mm_loadu_ps(src1 + x);
01492                 __m128 v_res1 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x), v_src),
01493                     _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x)));
01494 
01495                 v_src = _mm_loadu_ps(src1 + x + 4);
01496                 __m128 v_res2 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x + 4), v_src),
01497                     _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x + 4)));
01498 
01499                 __m128i v_res1i = _mm_cvtps_epi32(v_res1), v_res2i = _mm_cvtps_epi32(v_res2);
01500                 __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(v_res1i, 16), _mm_srli_epi32(v_res2i, 16));
01501                 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero));
01502             }
01503         }
01504 
01505         return x;
01506     }
01507 };
01508 
01509 #elif CV_NEON
01510 
01511 template <>
01512 struct InRange_SIMD<uchar>
01513 {
01514     int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
01515                      uchar * dst, int len) const
01516     {
01517         int x = 0;
01518 
01519         for ( ; x <= len - 16; x += 16 )
01520         {
01521             uint8x16_t values = vld1q_u8(src1 + x);
01522             uint8x16_t low = vld1q_u8(src2 + x);
01523             uint8x16_t high = vld1q_u8(src3 + x);
01524 
01525             vst1q_u8(dst + x, vandq_u8(vcgeq_u8(values, low), vcgeq_u8(high, values)));
01526         }
01527         return x;
01528     }
01529 };
01530 
01531 template <>
01532 struct InRange_SIMD<schar>
01533 {
01534     int operator () (const schar * src1, const schar * src2, const schar * src3,
01535                      uchar * dst, int len) const
01536     {
01537         int x = 0;
01538 
01539         for ( ; x <= len - 16; x += 16 )
01540         {
01541             int8x16_t values = vld1q_s8(src1 + x);
01542             int8x16_t low = vld1q_s8(src2 + x);
01543             int8x16_t high = vld1q_s8(src3 + x);
01544 
01545             vst1q_u8(dst + x, vandq_u8(vcgeq_s8(values, low), vcgeq_s8(high, values)));
01546         }
01547         return x;
01548     }
01549 };
01550 
01551 template <>
01552 struct InRange_SIMD<ushort>
01553 {
01554     int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
01555                      uchar * dst, int len) const
01556     {
01557         int x = 0;
01558 
01559         for ( ; x <= len - 16; x += 16 )
01560         {
01561             uint16x8_t values = vld1q_u16((const uint16_t*)(src1 + x));
01562             uint16x8_t low = vld1q_u16((const uint16_t*)(src2 + x));
01563             uint16x8_t high = vld1q_u16((const uint16_t*)(src3 + x));
01564             uint8x8_t  r1 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
01565 
01566             values = vld1q_u16((const uint16_t*)(src1 + x + 8));
01567             low = vld1q_u16((const uint16_t*)(src2 + x + 8));
01568             high = vld1q_u16((const uint16_t*)(src3 + x + 8));
01569             uint8x8_t  r2 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
01570 
01571             vst1q_u8(dst + x, vcombine_u8(r1, r2));
01572         }
01573         return x;
01574     }
01575 };
01576 
01577 template <>
01578 struct InRange_SIMD<short>
01579 {
01580     int operator () (const short * src1, const short * src2, const short * src3,
01581                      uchar * dst, int len) const
01582     {
01583         int x = 0;
01584 
01585         for ( ; x <= len - 16; x += 16 )
01586         {
01587             int16x8_t values = vld1q_s16((const int16_t*)(src1 + x));
01588             int16x8_t low = vld1q_s16((const int16_t*)(src2 + x));
01589             int16x8_t high = vld1q_s16((const int16_t*)(src3 + x));
01590             uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
01591 
01592             values = vld1q_s16((const int16_t*)(src1 + x + 8));
01593             low = vld1q_s16((const int16_t*)(src2 + x + 8));
01594             high = vld1q_s16((const int16_t*)(src3 + x + 8));
01595             uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
01596 
01597             vst1q_u8(dst + x, vcombine_u8(r1, r2));
01598         }
01599         return x;
01600     }
01601 };
01602 
01603 template <>
01604 struct InRange_SIMD<int>
01605 {
01606     int operator () (const int * src1, const int * src2, const int * src3,
01607                      uchar * dst, int len) const
01608     {
01609         int x = 0;
01610 
01611         for ( ; x <= len - 8; x += 8 )
01612         {
01613             int32x4_t values = vld1q_s32((const int32_t*)(src1 + x));
01614             int32x4_t low = vld1q_s32((const int32_t*)(src2 + x));
01615             int32x4_t high = vld1q_s32((const int32_t*)(src3 + x));
01616 
01617             uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
01618 
01619             values = vld1q_s32((const int32_t*)(src1 + x + 4));
01620             low = vld1q_s32((const int32_t*)(src2 + x + 4));
01621             high = vld1q_s32((const int32_t*)(src3 + x + 4));
01622 
01623             uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
01624 
01625             uint16x8_t res_16 = vcombine_u16(r1, r2);
01626 
01627             vst1_u8(dst + x, vmovn_u16(res_16));
01628         }
01629         return x;
01630     }
01631 };
01632 
01633 template <>
01634 struct InRange_SIMD<float>
01635 {
01636     int operator () (const float * src1, const float * src2, const float * src3,
01637                      uchar * dst, int len) const
01638     {
01639         int x = 0;
01640 
01641         for ( ; x <= len - 8; x += 8 )
01642         {
01643             float32x4_t values = vld1q_f32((const float32_t*)(src1 + x));
01644             float32x4_t low = vld1q_f32((const float32_t*)(src2 + x));
01645             float32x4_t high = vld1q_f32((const float32_t*)(src3 + x));
01646 
01647             uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
01648 
01649             values = vld1q_f32((const float32_t*)(src1 + x + 4));
01650             low = vld1q_f32((const float32_t*)(src2 + x + 4));
01651             high = vld1q_f32((const float32_t*)(src3 + x + 4));
01652 
01653             uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
01654 
01655             uint16x8_t res_16 = vcombine_u16(r1, r2);
01656 
01657             vst1_u8(dst + x, vmovn_u16(res_16));
01658         }
01659         return x;
01660     }
01661 };
01662 
01663 #endif
01664 
01665 template <typename T>
01666 static void inRange_(const T* src1, size_t step1, const T* src2, size_t step2,
01667          const T* src3, size_t step3, uchar* dst, size_t step,
01668          Size size)
01669 {
01670     step1 /= sizeof(src1[0]);
01671     step2 /= sizeof(src2[0]);
01672     step3 /= sizeof(src3[0]);
01673 
01674     InRange_SIMD<T> vop;
01675 
01676     for( ; size.height--; src1 += step1, src2 += step2, src3 += step3, dst += step )
01677     {
01678         int x = vop(src1, src2, src3, dst, size.width);
01679         #if CV_ENABLE_UNROLLED
01680         for( ; x <= size.width - 4; x += 4 )
01681         {
01682             int t0, t1;
01683             t0 = src2[x] <= src1[x] && src1[x] <= src3[x];
01684             t1 = src2[x+1] <= src1[x+1] && src1[x+1] <= src3[x+1];
01685             dst[x] = (uchar)-t0; dst[x+1] = (uchar)-t1;
01686             t0 = src2[x+2] <= src1[x+2] && src1[x+2] <= src3[x+2];
01687             t1 = src2[x+3] <= src1[x+3] && src1[x+3] <= src3[x+3];
01688             dst[x+2] = (uchar)-t0; dst[x+3] = (uchar)-t1;
01689         }
01690         #endif
01691         for( ; x < size.width; x++ )
01692             dst[x] = (uchar)-(src2[x] <= src1[x] && src1[x] <= src3[x]);
01693     }
01694 }
01695 
01696 
01697 static void inRange8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
01698                       const uchar* src3, size_t step3, uchar* dst, size_t step, Size size)
01699 {
01700     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01701 }
01702 
01703 static void inRange8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
01704                       const schar* src3, size_t step3, uchar* dst, size_t step, Size size)
01705 {
01706     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01707 }
01708 
01709 static void inRange16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
01710                        const ushort* src3, size_t step3, uchar* dst, size_t step, Size size)
01711 {
01712     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01713 }
01714 
01715 static void inRange16s(const short* src1, size_t step1, const short* src2, size_t step2,
01716                        const short* src3, size_t step3, uchar* dst, size_t step, Size size)
01717 {
01718     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01719 }
01720 
01721 static void inRange32s(const int* src1, size_t step1, const int* src2, size_t step2,
01722                        const int* src3, size_t step3, uchar* dst, size_t step, Size size)
01723 {
01724     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01725 }
01726 
01727 static void inRange32f(const float* src1, size_t step1, const float* src2, size_t step2,
01728                        const float* src3, size_t step3, uchar* dst, size_t step, Size size)
01729 {
01730     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01731 }
01732 
01733 static void inRange64f(const double* src1, size_t step1, const double* src2, size_t step2,
01734                        const double* src3, size_t step3, uchar* dst, size_t step, Size size)
01735 {
01736     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01737 }
01738 
01739 static void inRangeReduce(const uchar* src, uchar* dst, size_t len, int cn)
01740 {
01741     int k = cn % 4 ? cn % 4 : 4;
01742     size_t i, j;
01743     if( k == 1 )
01744         for( i = j = 0; i < len; i++, j += cn )
01745             dst[i] = src[j];
01746     else if( k == 2 )
01747         for( i = j = 0; i < len; i++, j += cn )
01748             dst[i] = src[j] & src[j+1];
01749     else if( k == 3 )
01750         for( i = j = 0; i < len; i++, j += cn )
01751             dst[i] = src[j] & src[j+1] & src[j+2];
01752     else
01753         for( i = j = 0; i < len; i++, j += cn )
01754             dst[i] = src[j] & src[j+1] & src[j+2] & src[j+3];
01755 
01756     for( ; k < cn; k += 4 )
01757     {
01758         for( i = 0, j = k; i < len; i++, j += cn )
01759             dst[i] &= src[j] & src[j+1] & src[j+2] & src[j+3];
01760     }
01761 }
01762 
01763 typedef void (*InRangeFunc)( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
01764                              const uchar* src3, size_t step3, uchar* dst, size_t step, Size sz );
01765 
01766 static InRangeFunc getInRangeFunc(int depth)
01767 {
01768     static InRangeFunc inRangeTab[] =
01769     {
01770         (InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u),
01771         (InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f),
01772         (InRangeFunc)inRange64f, 0
01773     };
01774 
01775     return inRangeTab[depth];
01776 }
01777 
01778 #ifdef HAVE_OPENCL
01779 
01780 static bool ocl_inRange( InputArray _src, InputArray _lowerb,
01781                          InputArray _upperb, OutputArray _dst )
01782 {
01783     const ocl::Device & d = ocl::Device::getDefault();
01784     int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
01785     Size ssize = _src.size(), lsize = _lowerb.size(), usize = _upperb.size();
01786     int stype = _src.type(), ltype = _lowerb.type(), utype = _upperb.type();
01787     int sdepth = CV_MAT_DEPTH(stype), ldepth = CV_MAT_DEPTH(ltype), udepth = CV_MAT_DEPTH(utype);
01788     int cn = CV_MAT_CN(stype), rowsPerWI = d.isIntel() ? 4 : 1;
01789     bool lbScalar = false, ubScalar = false;
01790 
01791     if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) ||
01792         ssize != lsize || stype != ltype )
01793     {
01794         if( !checkScalar(_lowerb, stype, lkind, skind) )
01795             CV_Error( CV_StsUnmatchedSizes,
01796                      "The lower bounary is neither an array of the same size and same type as src, nor a scalar");
01797         lbScalar = true;
01798     }
01799 
01800     if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) ||
01801         ssize != usize || stype != utype )
01802     {
01803         if( !checkScalar(_upperb, stype, ukind, skind) )
01804             CV_Error( CV_StsUnmatchedSizes,
01805                      "The upper bounary is neither an array of the same size and same type as src, nor a scalar");
01806         ubScalar = true;
01807     }
01808 
01809     if (lbScalar != ubScalar)
01810         return false;
01811 
01812     bool doubleSupport = d.doubleFPConfig() > 0,
01813             haveScalar = lbScalar && ubScalar;
01814 
01815     if ( (!doubleSupport && sdepth == CV_64F) ||
01816          (!haveScalar && (sdepth != ldepth || sdepth != udepth)) )
01817         return false;
01818 
01819     int kercn = haveScalar ? cn : std::max(std::min(ocl::predictOptimalVectorWidth(_src, _lowerb, _upperb, _dst), 4), cn);
01820     if (kercn % cn != 0)
01821         kercn = cn;
01822     int colsPerWI = kercn / cn;
01823     String opts = format("%s-D cn=%d -D srcT=%s -D srcT1=%s -D dstT=%s -D kercn=%d -D depth=%d%s -D colsPerWI=%d",
01824                            haveScalar ? "-D HAVE_SCALAR " : "", cn, ocl::typeToStr(CV_MAKE_TYPE(sdepth, kercn)),
01825                            ocl::typeToStr(sdepth), ocl::typeToStr(CV_8UC(colsPerWI)), kercn, sdepth,
01826                            doubleSupport ? " -D DOUBLE_SUPPORT" : "", colsPerWI);
01827 
01828     ocl::Kernel ker("inrange", ocl::core::inrange_oclsrc, opts);
01829     if (ker.empty())
01830         return false;
01831 
01832     _dst.create(ssize, CV_8UC1);
01833     UMat src = _src.getUMat(), dst = _dst.getUMat(), lscalaru, uscalaru;
01834     Mat lscalar, uscalar;
01835 
01836     if (lbScalar && ubScalar)
01837     {
01838         lscalar = _lowerb.getMat();
01839         uscalar = _upperb.getMat();
01840 
01841         size_t esz = src.elemSize();
01842         size_t blocksize = 36;
01843 
01844         AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
01845         uchar *buf = alignPtr(_buf + blocksize*cn, 16);
01846 
01847         if( ldepth != sdepth && sdepth < CV_32S )
01848         {
01849             int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16);
01850             int* iubuf = ilbuf + cn;
01851 
01852             BinaryFunc sccvtfunc = getConvertFunc(ldepth, CV_32S);
01853             sccvtfunc(lscalar.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0);
01854             sccvtfunc(uscalar.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0);
01855             int minval = cvRound(getMinVal(sdepth)), maxval = cvRound(getMaxVal(sdepth));
01856 
01857             for( int k = 0; k < cn; k++ )
01858             {
01859                 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval )
01860                     ilbuf[k] = minval+1, iubuf[k] = minval;
01861             }
01862             lscalar = Mat(cn, 1, CV_32S, ilbuf);
01863             uscalar = Mat(cn, 1, CV_32S, iubuf);
01864         }
01865 
01866         lscalar.convertTo(lscalar, stype);
01867         uscalar.convertTo(uscalar, stype);
01868     }
01869     else
01870     {
01871         lscalaru = _lowerb.getUMat();
01872         uscalaru = _upperb.getUMat();
01873     }
01874 
01875     ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
01876             dstarg = ocl::KernelArg::WriteOnly(dst, 1, colsPerWI);
01877 
01878     if (haveScalar)
01879     {
01880         lscalar.copyTo(lscalaru);
01881         uscalar.copyTo(uscalaru);
01882 
01883         ker.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(lscalaru),
01884                ocl::KernelArg::PtrReadOnly(uscalaru), rowsPerWI);
01885     }
01886     else
01887         ker.args(srcarg, dstarg, ocl::KernelArg::ReadOnlyNoSize(lscalaru),
01888                ocl::KernelArg::ReadOnlyNoSize(uscalaru), rowsPerWI);
01889 
01890     size_t globalsize[2] = { (size_t)ssize.width / colsPerWI, ((size_t)ssize.height + rowsPerWI - 1) / rowsPerWI };
01891     return ker.run(2, globalsize, NULL, false);
01892 }
01893 
01894 #endif
01895 
01896 }
01897 
01898 void cv::inRange(InputArray _src, InputArray _lowerb,
01899                  InputArray _upperb, OutputArray _dst)
01900 {
01901 #ifdef HAVE_OPENCL
01902     CV_OCL_RUN(_src.dims() <= 2 && _lowerb.dims() <= 2 &&
01903                _upperb.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
01904                ocl_inRange(_src, _lowerb, _upperb, _dst))
01905 #endif
01906 
01907     int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
01908     Mat src = _src.getMat(), lb = _lowerb.getMat(), ub = _upperb.getMat();
01909 
01910     bool lbScalar = false, ubScalar = false;
01911 
01912     if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) ||
01913         src.size != lb.size || src.type() != lb.type() )
01914     {
01915         if( !checkScalar(lb, src.type(), lkind, skind) )
01916             CV_Error( CV_StsUnmatchedSizes,
01917                      "The lower bounary is neither an array of the same size and same type as src, nor a scalar");
01918         lbScalar = true;
01919     }
01920 
01921     if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) ||
01922         src.size != ub.size || src.type() != ub.type() )
01923     {
01924         if( !checkScalar(ub, src.type(), ukind, skind) )
01925             CV_Error( CV_StsUnmatchedSizes,
01926                      "The upper bounary is neither an array of the same size and same type as src, nor a scalar");
01927         ubScalar = true;
01928     }
01929 
01930     CV_Assert(lbScalar == ubScalar);
01931 
01932     int cn = src.channels(), depth = src.depth();
01933 
01934     size_t esz = src.elemSize();
01935     size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
01936 
01937     _dst.create(src.dims, src.size, CV_8UC1);
01938     Mat dst = _dst.getMat();
01939     InRangeFunc func = getInRangeFunc(depth);
01940 
01941     const Mat* arrays_sc[] = { &src, &dst, 0 };
01942     const Mat* arrays_nosc[] = { &src, &dst, &lb, &ub, 0 };
01943     uchar* ptrs[4];
01944 
01945     NAryMatIterator it(lbScalar && ubScalar ? arrays_sc : arrays_nosc, ptrs);
01946     size_t total = it.size, blocksize = std::min(total, blocksize0);
01947 
01948     AutoBuffer<uchar>  _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
01949     uchar *buf = _buf, *mbuf = buf, *lbuf = 0, *ubuf = 0;
01950     buf = alignPtr(buf + blocksize*cn, 16);
01951 
01952     if( lbScalar && ubScalar )
01953     {
01954         lbuf = buf;
01955         ubuf = buf = alignPtr(buf + blocksize*esz, 16);
01956 
01957         CV_Assert( lb.type() == ub.type() );
01958         int scdepth = lb.depth();
01959 
01960         if( scdepth != depth && depth < CV_32S )
01961         {
01962             int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16);
01963             int* iubuf = ilbuf + cn;
01964 
01965             BinaryFunc sccvtfunc = getConvertFunc(scdepth, CV_32S);
01966             sccvtfunc(lb.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0);
01967             sccvtfunc(ub.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0);
01968             int minval = cvRound(getMinVal(depth)), maxval = cvRound(getMaxVal(depth));
01969 
01970             for( int k = 0; k < cn; k++ )
01971             {
01972                 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval )
01973                     ilbuf[k] = minval+1, iubuf[k] = minval;
01974             }
01975             lb = Mat(cn, 1, CV_32S, ilbuf);
01976             ub = Mat(cn, 1, CV_32S, iubuf);
01977         }
01978 
01979         convertAndUnrollScalar( lb, src.type(), lbuf, blocksize );
01980         convertAndUnrollScalar( ub, src.type(), ubuf, blocksize );
01981     }
01982 
01983     for( size_t i = 0; i < it.nplanes; i++, ++it )
01984     {
01985         for( size_t j = 0; j < total; j += blocksize )
01986         {
01987             int bsz = (int)MIN(total - j, blocksize);
01988             size_t delta = bsz*esz;
01989             uchar *lptr = lbuf, *uptr = ubuf;
01990             if( !lbScalar )
01991             {
01992                 lptr = ptrs[2];
01993                 ptrs[2] += delta;
01994             }
01995             if( !ubScalar )
01996             {
01997                 int idx = !lbScalar ? 3 : 2;
01998                 uptr = ptrs[idx];
01999                 ptrs[idx] += delta;
02000             }
02001             func( ptrs[0], 0, lptr, 0, uptr, 0, cn == 1 ? ptrs[1] : mbuf, 0, Size(bsz*cn, 1));
02002             if( cn > 1 )
02003                 inRangeReduce(mbuf, ptrs[1], bsz, cn);
02004             ptrs[0] += delta;
02005             ptrs[1] += bsz;
02006         }
02007     }
02008 }
02009 
02010 /****************************************************************************************\
02011 *                                Earlier API: cvAdd etc.                                 *
02012 \****************************************************************************************/
02013 
02014 CV_IMPL void
02015 cvNot( const CvArr* srcarr, CvArr* dstarr )
02016 {
02017     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
02018     CV_Assert( src.size == dst.size && src.type() == dst.type() );
02019     cv::bitwise_not( src, dst );
02020 }
02021 
02022 
02023 CV_IMPL void
02024 cvAnd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02025 {
02026     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02027         dst = cv::cvarrToMat(dstarr), mask;
02028     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02029     if( maskarr )
02030         mask = cv::cvarrToMat(maskarr);
02031     cv::bitwise_and( src1, src2, dst, mask );
02032 }
02033 
02034 
02035 CV_IMPL void
02036 cvOr( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02037 {
02038     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02039         dst = cv::cvarrToMat(dstarr), mask;
02040     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02041     if( maskarr )
02042         mask = cv::cvarrToMat(maskarr);
02043     cv::bitwise_or( src1, src2, dst, mask );
02044 }
02045 
02046 
02047 CV_IMPL void
02048 cvXor( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02049 {
02050     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02051         dst = cv::cvarrToMat(dstarr), mask;
02052     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02053     if( maskarr )
02054         mask = cv::cvarrToMat(maskarr);
02055     cv::bitwise_xor( src1, src2, dst, mask );
02056 }
02057 
02058 
02059 CV_IMPL void
02060 cvAndS( const CvArr* srcarr, CvScalar  s, CvArr* dstarr, const CvArr* maskarr )
02061 {
02062     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
02063     CV_Assert( src.size == dst.size && src.type() == dst.type() );
02064     if( maskarr )
02065         mask = cv::cvarrToMat(maskarr);
02066     cv::bitwise_and( src, (const cv::Scalar &)s, dst, mask );
02067 }
02068 
02069 
02070 CV_IMPL void
02071 cvOrS( const CvArr* srcarr, CvScalar  s, CvArr* dstarr, const CvArr* maskarr )
02072 {
02073     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
02074     CV_Assert( src.size == dst.size && src.type() == dst.type() );
02075     if( maskarr )
02076         mask = cv::cvarrToMat(maskarr);
02077     cv::bitwise_or( src, (const cv::Scalar &)s, dst, mask );
02078 }
02079 
02080 
02081 CV_IMPL void
02082 cvXorS( const CvArr* srcarr, CvScalar  s, CvArr* dstarr, const CvArr* maskarr )
02083 {
02084     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
02085     CV_Assert( src.size == dst.size && src.type() == dst.type() );
02086     if( maskarr )
02087         mask = cv::cvarrToMat(maskarr);
02088     cv::bitwise_xor( src, (const cv::Scalar &)s, dst, mask );
02089 }
02090 
02091 
02092 CV_IMPL void cvAdd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02093 {
02094     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02095         dst = cv::cvarrToMat(dstarr), mask;
02096     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02097     if( maskarr )
02098         mask = cv::cvarrToMat(maskarr);
02099     cv::add( src1, src2, dst, mask, dst.type() );
02100 }
02101 
02102 
02103 CV_IMPL void cvSub( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02104 {
02105     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02106         dst = cv::cvarrToMat(dstarr), mask;
02107     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02108     if( maskarr )
02109         mask = cv::cvarrToMat(maskarr);
02110     cv::subtract( src1, src2, dst, mask, dst.type() );
02111 }
02112 
02113 
02114 CV_IMPL void cvAddS( const CvArr* srcarr1, CvScalar  value, CvArr* dstarr, const CvArr* maskarr )
02115 {
02116     cv::Mat src1 = cv::cvarrToMat(srcarr1),
02117         dst = cv::cvarrToMat(dstarr), mask;
02118     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02119     if( maskarr )
02120         mask = cv::cvarrToMat(maskarr);
02121     cv::add( src1, (const cv::Scalar &)value, dst, mask, dst.type() );
02122 }
02123 
02124 
02125 CV_IMPL void cvSubRS( const CvArr* srcarr1, CvScalar  value, CvArr* dstarr, const CvArr* maskarr )
02126 {
02127     cv::Mat src1 = cv::cvarrToMat(srcarr1),
02128         dst = cv::cvarrToMat(dstarr), mask;
02129     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02130     if( maskarr )
02131         mask = cv::cvarrToMat(maskarr);
02132     cv::subtract( (const cv::Scalar &)value, src1, dst, mask, dst.type() );
02133 }
02134 
02135 
02136 CV_IMPL void cvMul( const CvArr* srcarr1, const CvArr* srcarr2,
02137                     CvArr* dstarr, double scale )
02138 {
02139     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02140         dst = cv::cvarrToMat(dstarr);
02141     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02142     cv::multiply( src1, src2, dst, scale, dst.type() );
02143 }
02144 
02145 
02146 CV_IMPL void cvDiv( const CvArr* srcarr1, const CvArr* srcarr2,
02147                     CvArr* dstarr, double scale )
02148 {
02149     cv::Mat src2 = cv::cvarrToMat(srcarr2),
02150         dst = cv::cvarrToMat(dstarr), mask;
02151     CV_Assert( src2.size == dst.size && src2.channels() == dst.channels() );
02152 
02153     if( srcarr1 )
02154         cv::divide( cv::cvarrToMat(srcarr1), src2, dst, scale, dst.type() );
02155     else
02156         cv::divide( scale, src2, dst, dst.type() );
02157 }
02158 
02159 
02160 CV_IMPL void
02161 cvAddWeighted( const CvArr* srcarr1, double alpha,
02162                const CvArr* srcarr2, double beta,
02163                double gamma, CvArr* dstarr )
02164 {
02165     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02166         dst = cv::cvarrToMat(dstarr);
02167     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02168     cv::addWeighted( src1, alpha, src2, beta, gamma, dst, dst.type() );
02169 }
02170 
02171 
02172 CV_IMPL  void
02173 cvAbsDiff( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr )
02174 {
02175     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02176     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02177 
02178     cv::absdiff( src1, cv::cvarrToMat(srcarr2), dst );
02179 }
02180 
02181 
02182 CV_IMPL void
02183 cvAbsDiffS( const CvArr* srcarr1, CvArr* dstarr, CvScalar  scalar )
02184 {
02185     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02186     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02187 
02188     cv::absdiff( src1, (const cv::Scalar &)scalar, dst );
02189 }
02190 
02191 
02192 CV_IMPL void
02193 cvInRange( const void* srcarr1, const void* srcarr2,
02194            const void* srcarr3, void* dstarr )
02195 {
02196     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02197     CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
02198 
02199     cv::inRange( src1, cv::cvarrToMat(srcarr2), cv::cvarrToMat(srcarr3), dst );
02200 }
02201 
02202 
02203 CV_IMPL void
02204 cvInRangeS( const void* srcarr1, CvScalar  lowerb, CvScalar  upperb, void* dstarr )
02205 {
02206     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02207     CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
02208 
02209     cv::inRange( src1, (const cv::Scalar &)lowerb, (const cv::Scalar &)upperb, dst );
02210 }
02211 
02212 
02213 CV_IMPL void
02214 cvCmp( const void* srcarr1, const void* srcarr2, void* dstarr, int cmp_op )
02215 {
02216     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02217     CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
02218 
02219     cv::compare( src1, cv::cvarrToMat(srcarr2), dst, cmp_op );
02220 }
02221 
02222 
02223 CV_IMPL void
02224 cvCmpS( const void* srcarr1, double value, void* dstarr, int cmp_op )
02225 {
02226     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02227     CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
02228 
02229     cv::compare( src1, value, dst, cmp_op );
02230 }
02231 
02232 
02233 CV_IMPL void
02234 cvMin( const void* srcarr1, const void* srcarr2, void* dstarr )
02235 {
02236     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02237     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02238 
02239     cv::min( src1, cv::cvarrToMat(srcarr2), dst );
02240 }
02241 
02242 
02243 CV_IMPL void
02244 cvMax( const void* srcarr1, const void* srcarr2, void* dstarr )
02245 {
02246     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02247     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02248 
02249     cv::max( src1, cv::cvarrToMat(srcarr2), dst );
02250 }
02251 
02252 
02253 CV_IMPL void
02254 cvMinS( const void* srcarr1, double value, void* dstarr )
02255 {
02256     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02257     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02258 
02259     cv::min( src1, value, dst );
02260 }
02261 
02262 
02263 CV_IMPL void
02264 cvMaxS( const void* srcarr1, double value, void* dstarr )
02265 {
02266     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02267     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02268 
02269     cv::max( src1, value, dst );
02270 }
02271 
02272 
02273 
02274 namespace cv { namespace hal {
02275 
02276 //=======================================
02277 
02278 #if (ARITHM_USE_IPP == 1)
02279 static inline void fixSteps(int width, int height, size_t elemSize, size_t& step1, size_t& step2, size_t& step)
02280 {
02281     if( height == 1 )
02282         step1 = step2 = step = width*elemSize;
02283 }
02284 #define CALL_IPP_BIN_E_12(fun) \
02285     CV_IPP_CHECK() \
02286     { \
02287         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02288         if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \
02289         { \
02290             CV_IMPL_ADD(CV_IMPL_IPP); \
02291             return; \
02292         } \
02293         setIppErrorStatus(); \
02294     }
02295 
02296 #define CALL_IPP_BIN_E_21(fun) \
02297     CV_IPP_CHECK() \
02298     { \
02299         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02300         if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \
02301         { \
02302             CV_IMPL_ADD(CV_IMPL_IPP); \
02303             return; \
02304         } \
02305         setIppErrorStatus(); \
02306     }
02307 
02308 #define CALL_IPP_BIN_12(fun) \
02309     CV_IPP_CHECK() \
02310     { \
02311         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02312         if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height))) \
02313         { \
02314             CV_IMPL_ADD(CV_IMPL_IPP); \
02315             return; \
02316         } \
02317         setIppErrorStatus(); \
02318     }
02319 
02320 #define CALL_IPP_BIN_21(fun) \
02321     CV_IPP_CHECK() \
02322     { \
02323         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02324         if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height))) \
02325         { \
02326             CV_IMPL_ADD(CV_IMPL_IPP); \
02327             return; \
02328         } \
02329         setIppErrorStatus(); \
02330     }
02331 
02332 #else
02333 #define CALL_IPP_BIN_E_12(fun)
02334 #define CALL_IPP_BIN_E_21(fun)
02335 #define CALL_IPP_BIN_12(fun)
02336 #define CALL_IPP_BIN_21(fun)
02337 #endif
02338 
02339 
02340 //=======================================
02341 // Add
02342 //=======================================
02343 
02344 void add8u( const uchar* src1, size_t step1,
02345                    const uchar* src2, size_t step2,
02346                    uchar* dst, size_t step, int width, int height, void* )
02347 {
02348     CALL_HAL(add8u, cv_hal_add8u, src1, step1, src2, step2, dst, step, width, height)
02349     CALL_IPP_BIN_E_12(ippiAdd_8u_C1RSfs)
02350     (vBinOp<uchar, cv::OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02351 }
02352 
02353 void add8s( const schar* src1, size_t step1,
02354                    const schar* src2, size_t step2,
02355                    schar* dst, size_t step, int width, int height, void* )
02356 {
02357     CALL_HAL(add8s, cv_hal_add8s, src1, step1, src2, step2, dst, step, width, height)
02358     vBinOp<schar, cv::OpAdd<schar>, IF_SIMD(VAdd<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02359 }
02360 
02361 void add16u( const ushort* src1, size_t step1,
02362                     const ushort* src2, size_t step2,
02363                     ushort* dst, size_t step, int width, int height, void* )
02364 {
02365     CALL_HAL(add16u, cv_hal_add16u, src1, step1, src2, step2, dst, step, width, height)
02366     CALL_IPP_BIN_E_12(ippiAdd_16u_C1RSfs)
02367     (vBinOp<ushort, cv::OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
02368 }
02369 
02370 void add16s( const short* src1, size_t step1,
02371                     const short* src2, size_t step2,
02372                     short* dst, size_t step, int width, int height, void* )
02373 {
02374     CALL_HAL(add16s, cv_hal_add16s, src1, step1, src2, step2, dst, step, width, height)
02375     CALL_IPP_BIN_E_12(ippiAdd_16s_C1RSfs)
02376     (vBinOp<short, cv::OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, width, height));
02377 }
02378 
02379 void add32s( const int* src1, size_t step1,
02380                     const int* src2, size_t step2,
02381                     int* dst, size_t step, int width, int height, void* )
02382 {
02383     CALL_HAL(add32s, cv_hal_add32s, src1, step1, src2, step2, dst, step, width, height)
02384     vBinOp32<int, cv::OpAdd<int>, IF_SIMD(VAdd<int>)>(src1, step1, src2, step2, dst, step, width, height);
02385 }
02386 
02387 void add32f( const float* src1, size_t step1,
02388                     const float* src2, size_t step2,
02389                     float* dst, size_t step, int width, int height, void* )
02390 {
02391     CALL_HAL(add32f, cv_hal_add32f, src1, step1, src2, step2, dst, step, width, height)
02392     CALL_IPP_BIN_12(ippiAdd_32f_C1R)
02393     (vBinOp32<float, cv::OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, width, height));
02394 }
02395 
02396 void add64f( const double* src1, size_t step1,
02397                     const double* src2, size_t step2,
02398                     double* dst, size_t step, int width, int height, void* )
02399 {
02400     CALL_HAL(add64f, cv_hal_add64f, src1, step1, src2, step2, dst, step, width, height)
02401     vBinOp64<double, cv::OpAdd<double>, IF_SIMD(VAdd<double>)>(src1, step1, src2, step2, dst, step, width, height);
02402 }
02403 
02404 //=======================================
02405 // Subtract
02406 //=======================================
02407 
02408 void sub8u( const uchar* src1, size_t step1,
02409                    const uchar* src2, size_t step2,
02410                    uchar* dst, size_t step, int width, int height, void* )
02411 {
02412     CALL_HAL(sub8u, cv_hal_sub8u, src1, step1, src2, step2, dst, step, width, height)
02413     CALL_IPP_BIN_E_21(ippiSub_8u_C1RSfs)
02414     (vBinOp<uchar, cv::OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02415 }
02416 
02417 void sub8s( const schar* src1, size_t step1,
02418                    const schar* src2, size_t step2,
02419                    schar* dst, size_t step, int width, int height, void* )
02420 {
02421     CALL_HAL(sub8s, cv_hal_sub8s, src1, step1, src2, step2, dst, step, width, height)
02422     vBinOp<schar, cv::OpSub<schar>, IF_SIMD(VSub<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02423 }
02424 
02425 void sub16u( const ushort* src1, size_t step1,
02426                     const ushort* src2, size_t step2,
02427                     ushort* dst, size_t step, int width, int height, void* )
02428 {
02429     CALL_HAL(sub16u, cv_hal_sub16u, src1, step1, src2, step2, dst, step, width, height)
02430     CALL_IPP_BIN_E_21(ippiSub_16u_C1RSfs)
02431     (vBinOp<ushort, cv::OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
02432 }
02433 
02434 void sub16s( const short* src1, size_t step1,
02435                     const short* src2, size_t step2,
02436                     short* dst, size_t step, int width, int height, void* )
02437 {
02438     CALL_HAL(sub16s, cv_hal_sub16s, src1, step1, src2, step2, dst, step, width, height)
02439     CALL_IPP_BIN_E_21(ippiSub_16s_C1RSfs)
02440     (vBinOp<short, cv::OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, width, height));
02441 }
02442 
02443 void sub32s( const int* src1, size_t step1,
02444                     const int* src2, size_t step2,
02445                     int* dst, size_t step, int width, int height, void* )
02446 {
02447     CALL_HAL(sub32s, cv_hal_sub32s, src1, step1, src2, step2, dst, step, width, height)
02448     vBinOp32<int, cv::OpSub<int>, IF_SIMD(VSub<int>)>(src1, step1, src2, step2, dst, step, width, height);
02449 }
02450 
02451 void sub32f( const float* src1, size_t step1,
02452                    const float* src2, size_t step2,
02453                    float* dst, size_t step, int width, int height, void* )
02454 {
02455     CALL_HAL(sub32f, cv_hal_sub32f, src1, step1, src2, step2, dst, step, width, height)
02456     CALL_IPP_BIN_21(ippiSub_32f_C1R)
02457     (vBinOp32<float, cv::OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, width, height));
02458 }
02459 
02460 void sub64f( const double* src1, size_t step1,
02461                     const double* src2, size_t step2,
02462                     double* dst, size_t step, int width, int height, void* )
02463 {
02464     CALL_HAL(sub64f, cv_hal_sub64f, src1, step1, src2, step2, dst, step, width, height)
02465     vBinOp64<double, cv::OpSub<double>, IF_SIMD(VSub<double>)>(src1, step1, src2, step2, dst, step, width, height);
02466 }
02467 
02468 //=======================================
02469 
02470 #if (ARITHM_USE_IPP == 1)
02471 #define CALL_IPP_MIN_MAX(fun, type) \
02472     CV_IPP_CHECK() \
02473     { \
02474         type* s1 = (type*)src1; \
02475         type* s2 = (type*)src2; \
02476         type* d  = dst; \
02477         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02478         int i = 0; \
02479         for(; i < height; i++) \
02480         { \
02481             if (0 > fun(s1, s2, d, width)) \
02482                 break; \
02483             s1 = (type*)((uchar*)s1 + step1); \
02484             s2 = (type*)((uchar*)s2 + step2); \
02485             d  = (type*)((uchar*)d + step); \
02486         } \
02487         if (i == height) \
02488         { \
02489             CV_IMPL_ADD(CV_IMPL_IPP); \
02490             return; \
02491         } \
02492         setIppErrorStatus(); \
02493     }
02494 #else
02495 #define CALL_IPP_MIN_MAX(fun, type)
02496 #endif
02497 
02498 //=======================================
02499 // Max
02500 //=======================================
02501 
02502 void max8u( const uchar* src1, size_t step1,
02503                    const uchar* src2, size_t step2,
02504                    uchar* dst, size_t step, int width, int height, void* )
02505 {
02506     CALL_HAL(max8u, cv_hal_max8u, src1, step1, src2, step2, dst, step, width, height)
02507     CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar)
02508     vBinOp<uchar, cv::OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
02509 }
02510 
02511 void max8s( const schar* src1, size_t step1,
02512                    const schar* src2, size_t step2,
02513                    schar* dst, size_t step, int width, int height, void* )
02514 {
02515     CALL_HAL(max8s, cv_hal_max8s, src1, step1, src2, step2, dst, step, width, height)
02516     vBinOp<schar, cv::OpMax<schar>, IF_SIMD(VMax<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02517 }
02518 
02519 void max16u( const ushort* src1, size_t step1,
02520                     const ushort* src2, size_t step2,
02521                     ushort* dst, size_t step, int width, int height, void* )
02522 {
02523     CALL_HAL(max16u, cv_hal_max16u, src1, step1, src2, step2, dst, step, width, height)
02524     CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort)
02525     vBinOp<ushort, cv::OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
02526 }
02527 
02528 void max16s( const short* src1, size_t step1,
02529                     const short* src2, size_t step2,
02530                     short* dst, size_t step, int width, int height, void* )
02531 {
02532     CALL_HAL(max16s, cv_hal_max16s, src1, step1, src2, step2, dst, step, width, height)
02533     vBinOp<short, cv::OpMax<short>, IF_SIMD(VMax<short>)>(src1, step1, src2, step2, dst, step, width, height);
02534 }
02535 
02536 void max32s( const int* src1, size_t step1,
02537                     const int* src2, size_t step2,
02538                     int* dst, size_t step, int width, int height, void* )
02539 {
02540     CALL_HAL(max32s, cv_hal_max32s, src1, step1, src2, step2, dst, step, width, height)
02541     vBinOp32<int, cv::OpMax<int>, IF_SIMD(VMax<int>)>(src1, step1, src2, step2, dst, step, width, height);
02542 }
02543 
02544 void max32f( const float* src1, size_t step1,
02545                     const float* src2, size_t step2,
02546                     float* dst, size_t step, int width, int height, void* )
02547 {
02548     CALL_HAL(max32f, cv_hal_max32f, src1, step1, src2, step2, dst, step, width, height)
02549     CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float)
02550     vBinOp32<float, cv::OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, width, height);
02551 }
02552 
02553 void max64f( const double* src1, size_t step1,
02554                     const double* src2, size_t step2,
02555                     double* dst, size_t step, int width, int height, void* )
02556 {
02557     CALL_HAL(max64f, cv_hal_max64f, src1, step1, src2, step2, dst, step, width, height)
02558     CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double)
02559     vBinOp64<double, cv::OpMax<double>, IF_SIMD(VMax<double>)>(src1, step1, src2, step2, dst, step, width, height);
02560 }
02561 
02562 //=======================================
02563 // Min
02564 //=======================================
02565 
02566 void min8u( const uchar* src1, size_t step1,
02567                    const uchar* src2, size_t step2,
02568                    uchar* dst, size_t step, int width, int height, void* )
02569 {
02570     CALL_HAL(min8u, cv_hal_min8u, src1, step1, src2, step2, dst, step, width, height)
02571     CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar)
02572     vBinOp<uchar, cv::OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
02573 }
02574 
02575 void min8s( const schar* src1, size_t step1,
02576                    const schar* src2, size_t step2,
02577                    schar* dst, size_t step, int width, int height, void* )
02578 {
02579     CALL_HAL(min8s, cv_hal_min8s, src1, step1, src2, step2, dst, step, width, height)
02580     vBinOp<schar, cv::OpMin<schar>, IF_SIMD(VMin<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02581 }
02582 
02583 void min16u( const ushort* src1, size_t step1,
02584                     const ushort* src2, size_t step2,
02585                     ushort* dst, size_t step, int width, int height, void* )
02586 {
02587     CALL_HAL(min16u, cv_hal_min16u, src1, step1, src2, step2, dst, step, width, height)
02588     CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort)
02589     vBinOp<ushort, cv::OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
02590 }
02591 
02592 void min16s( const short* src1, size_t step1,
02593                     const short* src2, size_t step2,
02594                     short* dst, size_t step, int width, int height, void* )
02595 {
02596     CALL_HAL(min16s, cv_hal_min16s, src1, step1, src2, step2, dst, step, width, height)
02597     vBinOp<short, cv::OpMin<short>, IF_SIMD(VMin<short>)>(src1, step1, src2, step2, dst, step, width, height);
02598 }
02599 
02600 void min32s( const int* src1, size_t step1,
02601                     const int* src2, size_t step2,
02602                     int* dst, size_t step, int width, int height, void* )
02603 {
02604     CALL_HAL(min32s, cv_hal_min32s, src1, step1, src2, step2, dst, step, width, height)
02605     vBinOp32<int, cv::OpMin<int>, IF_SIMD(VMin<int>)>(src1, step1, src2, step2, dst, step, width, height);
02606 }
02607 
02608 void min32f( const float* src1, size_t step1,
02609                     const float* src2, size_t step2,
02610                     float* dst, size_t step, int width, int height, void* )
02611 {
02612     CALL_HAL(min32f, cv_hal_min32f, src1, step1, src2, step2, dst, step, width, height)
02613     CALL_IPP_MIN_MAX(ippsMinEvery_32f, float)
02614     vBinOp32<float, cv::OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, width, height);
02615 }
02616 
02617 void min64f( const double* src1, size_t step1,
02618                     const double* src2, size_t step2,
02619                     double* dst, size_t step, int width, int height, void* )
02620 {
02621     CALL_HAL(min64f, cv_hal_min64f, src1, step1, src2, step2, dst, step, width, height)
02622     CALL_IPP_MIN_MAX(ippsMinEvery_64f, double)
02623     vBinOp64<double, cv::OpMin<double>, IF_SIMD(VMin<double>)>(src1, step1, src2, step2, dst, step, width, height);
02624 }
02625 
02626 //=======================================
02627 // AbsDiff
02628 //=======================================
02629 
02630 void absdiff8u( const uchar* src1, size_t step1,
02631                        const uchar* src2, size_t step2,
02632                        uchar* dst, size_t step, int width, int height, void* )
02633 {
02634     CALL_HAL(absdiff8u, cv_hal_absdiff8u, src1, step1, src2, step2, dst, step, width, height)
02635     CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R)
02636     (vBinOp<uchar, cv::OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02637 }
02638 
02639 void absdiff8s( const schar* src1, size_t step1,
02640                        const schar* src2, size_t step2,
02641                        schar* dst, size_t step, int width, int height, void* )
02642 {
02643     CALL_HAL(absdiff8s, cv_hal_absdiff8s, src1, step1, src2, step2, dst, step, width, height)
02644     vBinOp<schar, cv::OpAbsDiff<schar>, IF_SIMD(VAbsDiff<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02645 }
02646 
02647 void absdiff16u( const ushort* src1, size_t step1,
02648                         const ushort* src2, size_t step2,
02649                         ushort* dst, size_t step, int width, int height, void* )
02650 {
02651     CALL_HAL(absdiff16u, cv_hal_absdiff16u, src1, step1, src2, step2, dst, step, width, height)
02652     CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R)
02653     (vBinOp<ushort, cv::OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
02654 }
02655 
02656 void absdiff16s( const short* src1, size_t step1,
02657                         const short* src2, size_t step2,
02658                         short* dst, size_t step, int width, int height, void* )
02659 {
02660     CALL_HAL(absdiff16s, cv_hal_absdiff16s, src1, step1, src2, step2, dst, step, width, height)
02661     vBinOp<short, cv::OpAbsDiff<short>, IF_SIMD(VAbsDiff<short>)>(src1, step1, src2, step2, dst, step, width, height);
02662 }
02663 
02664 void absdiff32s( const int* src1, size_t step1,
02665                         const int* src2, size_t step2,
02666                         int* dst, size_t step, int width, int height, void* )
02667 {
02668     CALL_HAL(absdiff32s, cv_hal_absdiff32s, src1, step1, src2, step2, dst, step, width, height)
02669     vBinOp32<int, cv::OpAbsDiff<int>, IF_SIMD(VAbsDiff<int>)>(src1, step1, src2, step2, dst, step, width, height);
02670 }
02671 
02672 void absdiff32f( const float* src1, size_t step1,
02673                         const float* src2, size_t step2,
02674                         float* dst, size_t step, int width, int height, void* )
02675 {
02676     CALL_HAL(absdiff32f, cv_hal_absdiff32f, src1, step1, src2, step2, dst, step, width, height)
02677     CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R)
02678     (vBinOp32<float, cv::OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, width, height));
02679 }
02680 
02681 void absdiff64f( const double* src1, size_t step1,
02682                         const double* src2, size_t step2,
02683                         double* dst, size_t step, int width, int height, void* )
02684 {
02685     CALL_HAL(absdiff64f, cv_hal_absdiff64f, src1, step1, src2, step2, dst, step, width, height)
02686     vBinOp64<double, cv::OpAbsDiff<double>, IF_SIMD(VAbsDiff<double>)>(src1, step1, src2, step2, dst, step, width, height);
02687 }
02688 
02689 //=======================================
02690 // Logical
02691 //=======================================
02692 
02693 #if (ARITHM_USE_IPP == 1)
02694 #define CALL_IPP_UN(fun) \
02695     CV_IPP_CHECK() \
02696     { \
02697         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); (void)src2; \
02698         if (0 <= fun(src1, (int)step1, dst, (int)step, ippiSize(width, height))) \
02699         { \
02700             CV_IMPL_ADD(CV_IMPL_IPP); \
02701             return; \
02702         } \
02703         setIppErrorStatus(); \
02704     }
02705 #else
02706 #define CALL_IPP_UN(fun)
02707 #endif
02708 
02709 void and8u( const uchar* src1, size_t step1,
02710                    const uchar* src2, size_t step2,
02711                    uchar* dst, size_t step, int width, int height, void* )
02712 {
02713     CALL_HAL(and8u, cv_hal_and8u, src1, step1, src2, step2, dst, step, width, height)
02714     CALL_IPP_BIN_12(ippiAnd_8u_C1R)
02715     (vBinOp<uchar, cv::OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02716 }
02717 
02718 void or8u( const uchar* src1, size_t step1,
02719                   const uchar* src2, size_t step2,
02720                   uchar* dst, size_t step, int width, int height, void* )
02721 {
02722     CALL_HAL(or8u, cv_hal_or8u, src1, step1, src2, step2, dst, step, width, height)
02723     CALL_IPP_BIN_12(ippiOr_8u_C1R)
02724     (vBinOp<uchar, cv::OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02725 }
02726 
02727 void xor8u( const uchar* src1, size_t step1,
02728                    const uchar* src2, size_t step2,
02729                    uchar* dst, size_t step, int width, int height, void* )
02730 {
02731     CALL_HAL(xor8u, cv_hal_xor8u, src1, step1, src2, step2, dst, step, width, height)
02732     CALL_IPP_BIN_12(ippiXor_8u_C1R)
02733     (vBinOp<uchar, cv::OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02734 }
02735 
02736 void not8u( const uchar* src1, size_t step1,
02737                    const uchar* src2, size_t step2,
02738                    uchar* dst, size_t step, int width, int height, void* )
02739 {
02740     CALL_HAL(not8u, cv_hal_not8u, src1, step1, dst, step, width, height)
02741     CALL_IPP_UN(ippiNot_8u_C1R)
02742     (vBinOp<uchar, cv::OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02743 }
02744 
02745 //=======================================
02746 
02747 #if ARITHM_USE_IPP
02748 inline static IppCmpOp convert_cmp(int _cmpop)
02749 {
02750     return _cmpop == CMP_EQ ? ippCmpEq :
02751         _cmpop == CMP_GT ? ippCmpGreater :
02752         _cmpop == CMP_GE ? ippCmpGreaterEq :
02753         _cmpop == CMP_LT ? ippCmpLess :
02754         _cmpop == CMP_LE ? ippCmpLessEq :
02755         (IppCmpOp)-1;
02756 }
02757 #define CALL_IPP_CMP(fun) \
02758     CV_IPP_CHECK() \
02759     { \
02760         IppCmpOp op = convert_cmp(*(int *)_cmpop); \
02761         if( op  >= 0 ) \
02762         { \
02763             fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02764             if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \
02765             { \
02766                 CV_IMPL_ADD(CV_IMPL_IPP); \
02767                 return; \
02768             } \
02769             setIppErrorStatus(); \
02770         } \
02771     }
02772 #else
02773 #define CALL_IPP_CMP(fun)
02774 #endif
02775 
02776 //=======================================
02777 // Compare
02778 //=======================================
02779 
02780 void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
02781                   uchar* dst, size_t step, int width, int height, void* _cmpop)
02782 {
02783     CALL_HAL(cmp8u, cv_hal_cmp8u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
02784     CALL_IPP_CMP(ippiCompare_8u_C1R)
02785   //vz optimized  cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
02786     int code = *(int*)_cmpop;
02787     step1 /= sizeof(src1[0]);
02788     step2 /= sizeof(src2[0]);
02789     if( code == CMP_GE || code == CMP_LT )
02790     {
02791         std::swap(src1, src2);
02792         std::swap(step1, step2);
02793         code = code == CMP_GE ? CMP_LE : CMP_GT;
02794     }
02795 
02796     if( code == CMP_GT || code == CMP_LE )
02797     {
02798         int m = code == CMP_GT ? 0 : 255;
02799         for( ; height--; src1 += step1, src2 += step2, dst += step )
02800         {
02801             int x =0;
02802             #if CV_SSE2
02803             if( USE_SSE2 )
02804             {
02805                 __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
02806                 __m128i c128 = _mm_set1_epi8 (-128);
02807                 for( ; x <= width - 16; x += 16 )
02808                 {
02809                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02810                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02811                     // no simd for 8u comparison, that's why we need the trick
02812                     r00 = _mm_sub_epi8(r00,c128);
02813                     r10 = _mm_sub_epi8(r10,c128);
02814 
02815                     r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128);
02816                     _mm_storeu_si128((__m128i*)(dst + x),r00);
02817 
02818                 }
02819             }
02820             #elif CV_NEON
02821             uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
02822 
02823             for( ; x <= width - 16; x += 16 )
02824             {
02825                 vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
02826             }
02827 
02828            #endif
02829 
02830             for( ; x < width; x++ ){
02831                 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
02832             }
02833         }
02834     }
02835     else if( code == CMP_EQ || code == CMP_NE )
02836     {
02837         int m = code == CMP_EQ ? 0 : 255;
02838         for( ; height--; src1 += step1, src2 += step2, dst += step )
02839         {
02840             int x = 0;
02841             #if CV_SSE2
02842             if( USE_SSE2 )
02843             {
02844                 __m128i m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
02845                 for( ; x <= width - 16; x += 16 )
02846                 {
02847                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02848                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02849                     r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128);
02850                     _mm_storeu_si128((__m128i*)(dst + x), r00);
02851                 }
02852             }
02853             #elif CV_NEON
02854             uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
02855 
02856             for( ; x <= width - 16; x += 16 )
02857             {
02858                 vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
02859             }
02860            #endif
02861            for( ; x < width; x++ )
02862                 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
02863         }
02864     }
02865 }
02866 
02867 void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
02868                   uchar* dst, size_t step, int width, int height, void* _cmpop)
02869 {
02870     CALL_HAL(cmp8s, cv_hal_cmp8s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
02871     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
02872 }
02873 
02874 void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
02875                   uchar* dst, size_t step, int width, int height, void* _cmpop)
02876 {
02877     CALL_HAL(cmp16u, cv_hal_cmp16u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
02878     CALL_IPP_CMP(ippiCompare_16u_C1R)
02879     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
02880 }
02881 
02882 void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
02883                   uchar* dst, size_t step, int width, int height, void* _cmpop)
02884 {
02885     CALL_HAL(cmp16s, cv_hal_cmp16s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
02886     CALL_IPP_CMP(ippiCompare_16s_C1R)
02887    //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
02888 
02889     int code = *(int*)_cmpop;
02890     step1 /= sizeof(src1[0]);
02891     step2 /= sizeof(src2[0]);
02892     if( code == CMP_GE || code == CMP_LT )
02893     {
02894         std::swap(src1, src2);
02895         std::swap(step1, step2);
02896         code = code == CMP_GE ? CMP_LE : CMP_GT;
02897     }
02898 
02899     if( code == CMP_GT || code == CMP_LE )
02900     {
02901         int m = code == CMP_GT ? 0 : 255;
02902         for( ; height--; src1 += step1, src2 += step2, dst += step )
02903         {
02904             int x =0;
02905             #if CV_SSE2
02906             if( USE_SSE2)
02907             {
02908                 __m128i m128 =  code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
02909                 for( ; x <= width - 16; x += 16 )
02910                 {
02911                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02912                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02913                     r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
02914                     __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
02915                     __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
02916                     r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128);
02917                     r11 = _mm_packs_epi16(r00, r01);
02918                     _mm_storeu_si128((__m128i*)(dst + x), r11);
02919                 }
02920                 if( x <= width-8)
02921                 {
02922                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02923                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02924                     r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
02925                     r10 = _mm_packs_epi16(r00, r00);
02926                     _mm_storel_epi64((__m128i*)(dst + x), r10);
02927 
02928                     x += 8;
02929                 }
02930             }
02931             #elif CV_NEON
02932             uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
02933 
02934             for( ; x <= width - 16; x += 16 )
02935             {
02936                 int16x8_t in1 = vld1q_s16(src1 + x);
02937                 int16x8_t in2 = vld1q_s16(src2 + x);
02938                 uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2));
02939 
02940                 in1 = vld1q_s16(src1 + x + 8);
02941                 in2 = vld1q_s16(src2 + x + 8);
02942                 uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2));
02943 
02944                 vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
02945             }
02946             #endif
02947 
02948             for( ; x < width; x++ ){
02949                  dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
02950             }
02951         }
02952     }
02953     else if( code == CMP_EQ || code == CMP_NE )
02954     {
02955         int m = code == CMP_EQ ? 0 : 255;
02956         for( ; height--; src1 += step1, src2 += step2, dst += step )
02957         {
02958             int x = 0;
02959             #if CV_SSE2
02960             if( USE_SSE2 )
02961             {
02962                 __m128i m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
02963                 for( ; x <= width - 16; x += 16 )
02964                 {
02965                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02966                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02967                     r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
02968                     __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
02969                     __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
02970                     r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128);
02971                     r11 = _mm_packs_epi16(r00, r01);
02972                     _mm_storeu_si128((__m128i*)(dst + x), r11);
02973                 }
02974                 if( x <= width - 8)
02975                 {
02976                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02977                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02978                     r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
02979                     r10 = _mm_packs_epi16(r00, r00);
02980                     _mm_storel_epi64((__m128i*)(dst + x), r10);
02981 
02982                     x += 8;
02983                 }
02984             }
02985             #elif CV_NEON
02986             uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
02987 
02988             for( ; x <= width - 16; x += 16 )
02989             {
02990                 int16x8_t in1 = vld1q_s16(src1 + x);
02991                 int16x8_t in2 = vld1q_s16(src2 + x);
02992                 uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2));
02993 
02994                 in1 = vld1q_s16(src1 + x + 8);
02995                 in2 = vld1q_s16(src2 + x + 8);
02996                 uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2));
02997 
02998                 vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
02999             }
03000             #endif
03001             for( ; x < width; x++ )
03002                 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
03003         }
03004     }
03005 }
03006 
03007 void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2,
03008                    uchar* dst, size_t step, int width, int height, void* _cmpop)
03009 {
03010     CALL_HAL(cmp32s, cv_hal_cmp32s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
03011     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
03012 }
03013 
03014 void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
03015                   uchar* dst, size_t step, int width, int height, void* _cmpop)
03016 {
03017     CALL_HAL(cmp32f, cv_hal_cmp32f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
03018     CALL_IPP_CMP(ippiCompare_32f_C1R)
03019     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
03020 }
03021 
03022 void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2,
03023                   uchar* dst, size_t step, int width, int height, void* _cmpop)
03024 {
03025     CALL_HAL(cmp64f, cv_hal_cmp64f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
03026     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
03027 }
03028 
03029 //=======================================
03030 
03031 #if defined HAVE_IPP
03032 #define CALL_IPP_MUL(fun) \
03033     CV_IPP_CHECK() \
03034     { \
03035         if (std::fabs(fscale - 1) <= FLT_EPSILON) \
03036         { \
03037             if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \
03038             { \
03039                 CV_IMPL_ADD(CV_IMPL_IPP); \
03040                 return; \
03041             } \
03042             setIppErrorStatus(); \
03043         } \
03044     }
03045 
03046 #define CALL_IPP_MUL_2(fun) \
03047     CV_IPP_CHECK() \
03048     { \
03049         if (std::fabs(fscale - 1) <= FLT_EPSILON) \
03050         { \
03051             if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)) >= 0) \
03052             { \
03053                 CV_IMPL_ADD(CV_IMPL_IPP); \
03054                 return; \
03055             } \
03056             setIppErrorStatus(); \
03057         } \
03058     }
03059 
03060 #else
03061 #define CALL_IPP_MUL(fun)
03062 #define CALL_IPP_MUL_2(fun)
03063 #endif
03064 
03065 //=======================================
03066 // Multilpy
03067 //=======================================
03068 
03069 void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
03070                    uchar* dst, size_t step, int width, int height, void* scale)
03071 {
03072     CALL_HAL(mul8u, cv_hal_mul8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03073     float fscale = (float)*(const double*)scale;
03074     CALL_IPP_MUL(ippiMul_8u_C1RSfs)
03075     mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
03076 }
03077 
03078 void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
03079                    schar* dst, size_t step, int width, int height, void* scale)
03080 {
03081     CALL_HAL(mul8s, cv_hal_mul8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03082     mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale);
03083 }
03084 
03085 void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
03086                     ushort* dst, size_t step, int width, int height, void* scale)
03087 {
03088     CALL_HAL(mul16u, cv_hal_mul16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03089     float fscale = (float)*(const double*)scale;
03090     CALL_IPP_MUL(ippiMul_16u_C1RSfs)
03091     mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
03092 }
03093 
03094 void mul16s( const short* src1, size_t step1, const short* src2, size_t step2,
03095                     short* dst, size_t step, int width, int height, void* scale)
03096 {
03097     CALL_HAL(mul16s, cv_hal_mul16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03098     float fscale = (float)*(const double*)scale;
03099     CALL_IPP_MUL(ippiMul_16s_C1RSfs)
03100     mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
03101 }
03102 
03103 void mul32s( const int* src1, size_t step1, const int* src2, size_t step2,
03104                     int* dst, size_t step, int width, int height, void* scale)
03105 {
03106     CALL_HAL(mul32s, cv_hal_mul32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03107     mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03108 }
03109 
03110 void mul32f( const float* src1, size_t step1, const float* src2, size_t step2,
03111                     float* dst, size_t step, int width, int height, void* scale)
03112 {
03113     CALL_HAL(mul32f, cv_hal_mul32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03114     float fscale = (float)*(const double*)scale;
03115     CALL_IPP_MUL_2(ippiMul_32f_C1R)
03116     mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
03117 }
03118 
03119 void mul64f( const double* src1, size_t step1, const double* src2, size_t step2,
03120                     double* dst, size_t step, int width, int height, void* scale)
03121 {
03122     CALL_HAL(mul64f, cv_hal_mul64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03123     mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03124 }
03125 
03126 //=======================================
03127 // Divide
03128 //=======================================
03129 
03130 void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
03131                    uchar* dst, size_t step, int width, int height, void* scale)
03132 {
03133     CALL_HAL(div8u, cv_hal_div8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03134     if( src1 )
03135         div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03136     else
03137         recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03138 }
03139 
03140 void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
03141                   schar* dst, size_t step, int width, int height, void* scale)
03142 {
03143     CALL_HAL(div8s, cv_hal_div8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03144     div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03145 }
03146 
03147 void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
03148                     ushort* dst, size_t step, int width, int height, void* scale)
03149 {
03150     CALL_HAL(div16u, cv_hal_div16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03151     div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03152 }
03153 
03154 void div16s( const short* src1, size_t step1, const short* src2, size_t step2,
03155                     short* dst, size_t step, int width, int height, void* scale)
03156 {
03157     CALL_HAL(div16s, cv_hal_div16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03158     div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03159 }
03160 
03161 void div32s( const int* src1, size_t step1, const int* src2, size_t step2,
03162                     int* dst, size_t step, int width, int height, void* scale)
03163 {
03164     CALL_HAL(div32s, cv_hal_div32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03165     div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03166 }
03167 
03168 void div32f( const float* src1, size_t step1, const float* src2, size_t step2,
03169                     float* dst, size_t step, int width, int height, void* scale)
03170 {
03171     CALL_HAL(div32f, cv_hal_div32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03172     div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03173 }
03174 
03175 void div64f( const double* src1, size_t step1, const double* src2, size_t step2,
03176                     double* dst, size_t step, int width, int height, void* scale)
03177 {
03178     CALL_HAL(div64f, cv_hal_div64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03179     div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03180 }
03181 
03182 //=======================================
03183 // Reciprocial
03184 //=======================================
03185 
03186 void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
03187                   uchar* dst, size_t step, int width, int height, void* scale)
03188 {
03189     CALL_HAL(recip8u, cv_hal_recip8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03190     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03191 }
03192 
03193 void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
03194                   schar* dst, size_t step, int width, int height, void* scale)
03195 {
03196     CALL_HAL(recip8s, cv_hal_recip8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03197     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03198 }
03199 
03200 void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
03201                    ushort* dst, size_t step, int width, int height, void* scale)
03202 {
03203     CALL_HAL(recip16u, cv_hal_recip16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03204     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03205 }
03206 
03207 void recip16s( const short* src1, size_t step1, const short* src2, size_t step2,
03208                    short* dst, size_t step, int width, int height, void* scale)
03209 {
03210     CALL_HAL(recip16s, cv_hal_recip16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03211     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03212 }
03213 
03214 void recip32s( const int* src1, size_t step1, const int* src2, size_t step2,
03215                    int* dst, size_t step, int width, int height, void* scale)
03216 {
03217     CALL_HAL(recip32s, cv_hal_recip32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03218     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03219 }
03220 
03221 void recip32f( const float* src1, size_t step1, const float* src2, size_t step2,
03222                    float* dst, size_t step, int width, int height, void* scale)
03223 {
03224     CALL_HAL(recip32f, cv_hal_recip32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03225     recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03226 }
03227 
03228 void recip64f( const double* src1, size_t step1, const double* src2, size_t step2,
03229                    double* dst, size_t step, int width, int height, void* scale)
03230 {
03231     CALL_HAL(recip64f, cv_hal_recip64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03232     recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03233 }
03234 
03235 //=======================================
03236 // Add weighted
03237 //=======================================
03238 
03239 void
03240 addWeighted8u( const uchar* src1, size_t step1,
03241                const uchar* src2, size_t step2,
03242                uchar* dst, size_t step, int width, int height,
03243                void* scalars )
03244 {
03245     CALL_HAL(addWeighted8u, cv_hal_addWeighted8u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03246     const double* scalars_ = (const double*)scalars;
03247     float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2];
03248 
03249     for( ; height--; src1 += step1, src2 += step2, dst += step )
03250     {
03251         int x = 0;
03252 
03253 #if CV_SSE2
03254         if( USE_SSE2 )
03255         {
03256             __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma);
03257             __m128i z = _mm_setzero_si128();
03258 
03259             for( ; x <= width - 8; x += 8 )
03260             {
03261                 __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z);
03262                 __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z);
03263 
03264                 __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z));
03265                 __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z));
03266                 __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z));
03267                 __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z));
03268 
03269                 u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4));
03270                 u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4));
03271                 u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4);
03272 
03273                 u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1));
03274                 u = _mm_packus_epi16(u, u);
03275 
03276                 _mm_storel_epi64((__m128i*)(dst + x), u);
03277             }
03278         }
03279 #elif CV_NEON
03280         float32x4_t g = vdupq_n_f32 (gamma);
03281 
03282         for( ; x <= width - 8; x += 8 )
03283         {
03284             uint8x8_t in1 = vld1_u8(src1+x);
03285             uint16x8_t in1_16 = vmovl_u8(in1);
03286             float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16)));
03287             float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16)));
03288 
03289             uint8x8_t in2 = vld1_u8(src2+x);
03290             uint16x8_t in2_16 = vmovl_u8(in2);
03291             float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16)));
03292             float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16)));
03293 
03294             float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta));
03295             float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta));
03296             out_f_l = vaddq_f32(out_f_l, g);
03297             out_f_h = vaddq_f32(out_f_h, g);
03298 
03299             uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l));
03300             uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h));
03301 
03302             uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h);
03303             uint8x8_t out = vqmovn_u16(out_16);
03304 
03305             vst1_u8(dst+x, out);
03306         }
03307 #endif
03308         #if CV_ENABLE_UNROLLED
03309         for( ; x <= width - 4; x += 4 )
03310         {
03311             float t0, t1;
03312             t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
03313             t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma;
03314 
03315             dst[x] = saturate_cast<uchar>(t0);
03316             dst[x+1] = saturate_cast<uchar>(t1);
03317 
03318             t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma;
03319             t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma;
03320 
03321             dst[x+2] = saturate_cast<uchar>(t0);
03322             dst[x+3] = saturate_cast<uchar>(t1);
03323         }
03324         #endif
03325 
03326         for( ; x < width; x++ )
03327         {
03328             float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
03329             dst[x] = saturate_cast<uchar>(t0);
03330         }
03331     }
03332 }
03333 
03334 void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
03335                            schar* dst, size_t step, int width, int height, void* scalars )
03336 {
03337     CALL_HAL(addWeighted8s, cv_hal_addWeighted8s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03338     addWeighted_<schar, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
03339 }
03340 
03341 void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
03342                             ushort* dst, size_t step, int width, int height, void* scalars )
03343 {
03344     CALL_HAL(addWeighted16u, cv_hal_addWeighted16u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03345     addWeighted_<ushort, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
03346 }
03347 
03348 void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2,
03349                             short* dst, size_t step, int width, int height, void* scalars )
03350 {
03351     CALL_HAL(addWeighted16s, cv_hal_addWeighted16s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03352     addWeighted_<short, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
03353 }
03354 
03355 void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2,
03356                             int* dst, size_t step, int width, int height, void* scalars )
03357 {
03358     CALL_HAL(addWeighted32s, cv_hal_addWeighted32s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03359     addWeighted_<int, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
03360 }
03361 
03362 void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2,
03363                             float* dst, size_t step, int width, int height, void* scalars )
03364 {
03365     CALL_HAL(addWeighted32f, cv_hal_addWeighted32f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03366     addWeighted_<float, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
03367 }
03368 
03369 void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2,
03370                             double* dst, size_t step, int width, int height, void* scalars )
03371 {
03372     CALL_HAL(addWeighted64f, cv_hal_addWeighted64f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03373     addWeighted_<double, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
03374 }
03375 
03376 }} // cv::hal::
03377 
03378 /* End of file. */
03379