gr-peach-opencv-project-sd-card_update

Renesas GR-PEACH OpenCV Development » Code » Documentation
Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update
Fork of gr-peach-opencv-project-sd-card by the do
Embed: (wiki syntax)
Show/hide line numbers arithm.cpp Source File
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
00016 // Third party copyrights are property of their respective owners.
00017 //
00018 // Redistribution and use in source and binary forms, with or without modification,
00019 // are permitted provided that the following conditions are met:
00020 //
00021 //   * Redistribution's of source code must retain the above copyright notice,
00022 //     this list of conditions and the following disclaimer.
00023 //
00024 //   * Redistribution's in binary form must reproduce the above copyright notice,
00025 //     this list of conditions and the following disclaimer in the documentation
00026 //     and/or other materials provided with the distribution.
00027 //
00028 //   * The name of the copyright holders may not be used to endorse or promote products
00029 //     derived from this software without specific prior written permission.
00030 //
00031 // This software is provided by the copyright holders and contributors "as is" and
00032 // any express or implied warranties, including, but not limited to, the implied
00033 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00034 // In no event shall the Intel Corporation or contributors be liable for any direct,
00035 // indirect, incidental, special, exemplary, or consequential damages
00036 // (including, but not limited to, procurement of substitute goods or services;
00037 // loss of use, data, or profits; or business interruption) however caused
00038 // and on any theory of liability, whether in contract, strict liability,
00039 // or tort (including negligence or otherwise) arising in any way out of
00040 // the use of this software, even if advised of the possibility of such damage.
00041 //
00042 //M*/
00043 
00044 /* ////////////////////////////////////////////////////////////////////
00045 //
00046 //  Arithmetic and logical operations: +, -, *, /, &, |, ^, ~, abs ...
00047 //
00048 // */
00049 
00050 #include "precomp.hpp"
00051 #include "opencl_kernels_core.hpp"
00052 
00053 namespace cv
00054 {
00055 
00056 /****************************************************************************************\
00057 *                                   logical operations                                   *
00058 \****************************************************************************************/
00059 
00060 void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize )
00061 {
00062     int scn = (int)sc.total(), cn = CV_MAT_CN(buftype);
00063     size_t esz = CV_ELEM_SIZE(buftype);
00064     getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0);
00065     // unroll the scalar
00066     if( scn < cn )
00067     {
00068         CV_Assert( scn == 1 );
00069         size_t esz1 = CV_ELEM_SIZE1(buftype);
00070         for( size_t i = esz1; i < esz; i++ )
00071             scbuf[i] = scbuf[i - esz1];
00072     }
00073     for( size_t i = esz; i < blocksize*esz; i++ )
00074         scbuf[i] = scbuf[i - esz];
00075 }
00076 
00077 
00078 enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
00079        OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
00080        OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14,
00081        OCL_OP_RDIV_SCALE=15 };
00082 
00083 #ifdef HAVE_OPENCL
00084 
00085 static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF",
00086     "OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE",
00087     "OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", "OP_RDIV_SCALE", 0 };
00088 
00089 static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst,
00090                           InputArray _mask, bool bitwise, int oclop, bool haveScalar )
00091 {
00092     bool haveMask = !_mask.empty();
00093     int srctype = _src1.type();
00094     int srcdepth = CV_MAT_DEPTH(srctype);
00095     int cn = CV_MAT_CN(srctype);
00096 
00097     const ocl::Device d = ocl::Device::getDefault();
00098     bool doubleSupport = d.doubleFPConfig() > 0;
00099     if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) ||
00100             (!doubleSupport && srcdepth == CV_64F && !bitwise))
00101         return false;
00102 
00103     char opts[1024];
00104     int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
00105     int scalarcn = kercn == 3 ? 4 : kercn;
00106     int rowsPerWI = d.isIntel() ? 4 : 1;
00107 
00108     sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d",
00109             haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop],
00110             bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) :
00111                 ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "",
00112             bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) :
00113                 ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)),
00114             bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) :
00115                 ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)),
00116             kercn, rowsPerWI);
00117 
00118     ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
00119     if (k.empty())
00120         return false;
00121 
00122     UMat src1 = _src1.getUMat(), src2;
00123     UMat dst = _dst.getUMat(), mask = _mask.getUMat();
00124 
00125     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn);
00126     ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) :
00127                                        ocl::KernelArg::WriteOnly(dst, cn, kercn);
00128     ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
00129 
00130     if( haveScalar )
00131     {
00132         size_t esz = CV_ELEM_SIZE1(srctype)*scalarcn;
00133         double buf[4] = {0,0,0,0};
00134 
00135         if( oclop != OCL_OP_NOT )
00136         {
00137             Mat src2sc = _src2.getMat();
00138             convertAndUnrollScalar(src2sc, srctype, (uchar*)buf, 1);
00139         }
00140 
00141         ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz);
00142 
00143         if( !haveMask )
00144             k.args(src1arg, dstarg, scalararg);
00145         else
00146             k.args(src1arg, maskarg, dstarg, scalararg);
00147     }
00148     else
00149     {
00150         src2 = _src2.getUMat();
00151         ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn);
00152 
00153         if( !haveMask )
00154             k.args(src1arg, src2arg, dstarg);
00155         else
00156             k.args(src1arg, src2arg, maskarg, dstarg);
00157     }
00158 
00159     size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
00160     return k.run(2, globalsize, 0, false);
00161 }
00162 
00163 #endif
00164 
00165 static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst,
00166                        InputArray _mask, const BinaryFuncC* tab,
00167                        bool bitwise, int oclop )
00168 {
00169     const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
00170     int kind1 = psrc1->kind(), kind2 = psrc2->kind();
00171     int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
00172     int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
00173     int dims1 = psrc1->dims(), dims2 = psrc2->dims();
00174     Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
00175     Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
00176 #ifdef HAVE_OPENCL
00177     bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) &&
00178             dims1 <= 2 && dims2 <= 2;
00179 #endif
00180     bool haveMask = !_mask.empty(), haveScalar = false;
00181     BinaryFuncC func;
00182 
00183     if( dims1 <= 2 && dims2 <= 2 && kind1 == kind2 && sz1 == sz2 && type1 == type2 && !haveMask )
00184     {
00185         _dst.create(sz1, type1);
00186 #ifdef HAVE_OPENCL
00187         CV_OCL_RUN(use_opencl,
00188                    ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, false))
00189 #endif
00190 
00191         if( bitwise )
00192         {
00193             func = *tab;
00194             cn = (int)CV_ELEM_SIZE(type1);
00195         }
00196         else
00197             func = tab[depth1];
00198 
00199         Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
00200         Size sz = getContinuousSize(src1, src2, dst);
00201         size_t len = sz.width*(size_t)cn;
00202         if( len == (size_t)(int)len )
00203         {
00204             sz.width = (int)len;
00205             func(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, 0);
00206             return;
00207         }
00208     }
00209 
00210     if( oclop == OCL_OP_NOT )
00211         haveScalar = true;
00212     else if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 ||
00213         !psrc1->sameSize(*psrc2) || type1 != type2 )
00214     {
00215         if( checkScalar(*psrc1, type2, kind1, kind2) )
00216         {
00217             // src1 is a scalar; swap it with src2
00218             swap(psrc1, psrc2);
00219             swap(type1, type2);
00220             swap(depth1, depth2);
00221             swap(cn, cn2);
00222             swap(sz1, sz2);
00223         }
00224         else if( !checkScalar(*psrc2, type1, kind2, kind1) )
00225             CV_Error( CV_StsUnmatchedSizes,
00226                       "The operation is neither 'array op array' (where arrays have the same size and type), "
00227                       "nor 'array op scalar', nor 'scalar op array'" );
00228         haveScalar = true;
00229     }
00230     else
00231     {
00232         CV_Assert( psrc1->sameSize(*psrc2) && type1 == type2 );
00233     }
00234 
00235     size_t esz = CV_ELEM_SIZE(type1);
00236     size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz;
00237     BinaryFunc copymask = 0;
00238     bool reallocate = false;
00239 
00240     if( haveMask )
00241     {
00242         int mtype = _mask.type();
00243         CV_Assert( (mtype == CV_8U || mtype == CV_8S) && _mask.sameSize(*psrc1));
00244         copymask = getCopyMaskFunc(esz);
00245         reallocate = !_dst.sameSize(*psrc1) || _dst.type() != type1;
00246     }
00247 
00248     AutoBuffer<uchar> _buf;
00249     uchar *scbuf = 0, *maskbuf = 0;
00250 
00251     _dst.createSameSize(*psrc1, type1);
00252     // if this is mask operation and dst has been reallocated,
00253     // we have to clear the destination
00254     if( haveMask && reallocate )
00255         _dst.setTo(0.);
00256 #ifdef HAVE_OPENCL
00257     CV_OCL_RUN(use_opencl,
00258                ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, haveScalar))
00259 #endif
00260 
00261 
00262     Mat src1 = psrc1->getMat(), src2 = psrc2->getMat();
00263     Mat dst = _dst.getMat(), mask = _mask.getMat();
00264 
00265     if( bitwise )
00266     {
00267         func = *tab;
00268         cn = (int)esz;
00269     }
00270     else
00271         func = tab[depth1];
00272 
00273     if( !haveScalar )
00274     {
00275         const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
00276         uchar* ptrs[4];
00277 
00278         NAryMatIterator it(arrays, ptrs);
00279         size_t total = it.size, blocksize = total;
00280 
00281         if( blocksize*cn > INT_MAX )
00282             blocksize = INT_MAX/cn;
00283 
00284         if( haveMask )
00285         {
00286             blocksize = std::min(blocksize, blocksize0);
00287             _buf.allocate(blocksize*esz);
00288             maskbuf = _buf;
00289         }
00290 
00291         for( size_t i = 0; i < it.nplanes; i++, ++it )
00292         {
00293             for( size_t j = 0; j < total; j += blocksize )
00294             {
00295                 int bsz = (int)MIN(total - j, blocksize);
00296 
00297                 func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, bsz*cn, 1, 0 );
00298                 if( haveMask )
00299                 {
00300                     copymask( maskbuf, 0, ptrs[3], 0, ptrs[2], 0, Size(bsz, 1), &esz );
00301                     ptrs[3] += bsz;
00302                 }
00303 
00304                 bsz *= (int)esz;
00305                 ptrs[0] += bsz; ptrs[1] += bsz; ptrs[2] += bsz;
00306             }
00307         }
00308     }
00309     else
00310     {
00311         const Mat* arrays[] = { &src1, &dst, &mask, 0 };
00312         uchar* ptrs[3];
00313 
00314         NAryMatIterator it(arrays, ptrs);
00315         size_t total = it.size, blocksize = std::min(total, blocksize0);
00316 
00317         _buf.allocate(blocksize*(haveMask ? 2 : 1)*esz + 32);
00318         scbuf = _buf;
00319         maskbuf = alignPtr(scbuf + blocksize*esz, 16);
00320 
00321         convertAndUnrollScalar( src2, src1.type(), scbuf, blocksize);
00322 
00323         for( size_t i = 0; i < it.nplanes; i++, ++it )
00324         {
00325             for( size_t j = 0; j < total; j += blocksize )
00326             {
00327                 int bsz = (int)MIN(total - j, blocksize);
00328 
00329                 func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, bsz*cn, 1, 0 );
00330                 if( haveMask )
00331                 {
00332                     copymask( maskbuf, 0, ptrs[2], 0, ptrs[1], 0, Size(bsz, 1), &esz );
00333                     ptrs[2] += bsz;
00334                 }
00335 
00336                 bsz *= (int)esz;
00337                 ptrs[0] += bsz; ptrs[1] += bsz;
00338             }
00339         }
00340     }
00341 }
00342 
00343 static BinaryFuncC* getMaxTab()
00344 {
00345     static BinaryFuncC maxTab[] =
00346     {
00347         (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8s),
00348         (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16s),
00349         (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32s),
00350         (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32f), (BinaryFuncC)cv::hal::max64f,
00351         0
00352     };
00353 
00354     return maxTab;
00355 }
00356 
00357 static BinaryFuncC* getMinTab()
00358 {
00359     static BinaryFuncC minTab[] =
00360     {
00361         (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8s),
00362         (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16s),
00363         (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32s),
00364         (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32f), (BinaryFuncC)cv::hal::min64f,
00365         0
00366     };
00367 
00368     return minTab;
00369 }
00370 
00371 }
00372 
00373 void cv::bitwise_and(InputArray a, InputArray b, OutputArray c, InputArray mask)
00374 {
00375     BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::and8u);
00376     binary_op(a, b, c, mask, &f, true, OCL_OP_AND);
00377 }
00378 
00379 void cv::bitwise_or(InputArray a, InputArray b, OutputArray c, InputArray mask)
00380 {
00381     BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::or8u);
00382     binary_op(a, b, c, mask, &f, true, OCL_OP_OR);
00383 }
00384 
00385 void cv::bitwise_xor(InputArray a, InputArray b, OutputArray c, InputArray mask)
00386 {
00387     BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::xor8u);
00388     binary_op(a, b, c, mask, &f, true, OCL_OP_XOR);
00389 }
00390 
00391 void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask)
00392 {
00393     BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::not8u);
00394     binary_op(a, a, c, mask, &f, true, OCL_OP_NOT);
00395 }
00396 
00397 void cv::max( InputArray src1, InputArray src2, OutputArray dst )
00398 {
00399     binary_op(src1, src2, dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
00400 }
00401 
00402 void cv::min( InputArray src1, InputArray src2, OutputArray dst )
00403 {
00404     binary_op(src1, src2, dst, noArray(), getMinTab(), false, OCL_OP_MIN );
00405 }
00406 
00407 void cv::max(const Mat& src1, const Mat& src2, Mat& dst)
00408 {
00409     OutputArray _dst(dst);
00410     binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
00411 }
00412 
00413 void cv::min(const Mat& src1, const Mat& src2, Mat& dst)
00414 {
00415     OutputArray _dst(dst);
00416     binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
00417 }
00418 
00419 void cv::max(const UMat & src1, const UMat & src2, UMat & dst)
00420 {
00421     OutputArray _dst(dst);
00422     binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX );
00423 }
00424 
00425 void cv::min(const UMat & src1, const UMat & src2, UMat & dst)
00426 {
00427     OutputArray _dst(dst);
00428     binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN );
00429 }
00430 
00431 
00432 /****************************************************************************************\
00433 *                                      add/subtract                                      *
00434 \****************************************************************************************/
00435 
00436 namespace cv
00437 {
00438 
00439 static int actualScalarDepth(const double* data, int len)
00440 {
00441     int i = 0, minval = INT_MAX, maxval = INT_MIN;
00442     for(; i < len; ++i)
00443     {
00444         int ival = cvRound(data[i]);
00445         if( ival != data[i] )
00446             break;
00447         minval = MIN(minval, ival);
00448         maxval = MAX(maxval, ival);
00449     }
00450     return i < len ? CV_64F :
00451         minval >= 0 && maxval <= (int)UCHAR_MAX ? CV_8U :
00452         minval >= (int)SCHAR_MIN && maxval <= (int)SCHAR_MAX ? CV_8S :
00453         minval >= 0 && maxval <= (int)USHRT_MAX ? CV_16U :
00454         minval >= (int)SHRT_MIN && maxval <= (int)SHRT_MAX ? CV_16S :
00455         CV_32S;
00456 }
00457 
00458 #ifdef HAVE_OPENCL
00459 
00460 static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
00461                           InputArray _mask, int wtype,
00462                           void* usrdata, int oclop,
00463                           bool haveScalar )
00464 {
00465     const ocl::Device d = ocl::Device::getDefault();
00466     bool doubleSupport = d.doubleFPConfig() > 0;
00467     int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
00468     bool haveMask = !_mask.empty();
00469 
00470     if ( (haveMask || haveScalar) && cn > 4 )
00471         return false;
00472 
00473     int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype));
00474     if (!doubleSupport)
00475         wdepth = std::min(wdepth, CV_32F);
00476 
00477     wtype = CV_MAKETYPE(wdepth, cn);
00478     int type2 = haveScalar ? wtype : _src2.type(), depth2 = CV_MAT_DEPTH(type2);
00479     if (!doubleSupport && (depth2 == CV_64F || depth1 == CV_64F))
00480         return false;
00481 
00482     int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
00483     int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = d.isIntel() ? 4 : 1;
00484 
00485     char cvtstr[4][32], opts[1024];
00486     sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s "
00487             "-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s "
00488             "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s",
00489             (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
00490             oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
00491             ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
00492             ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
00493             ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
00494             ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)),
00495             ocl::typeToStr(wdepth), wdepth,
00496             ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
00497             ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
00498             ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]),
00499             doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn, rowsPerWI,
00500             oclop == OCL_OP_ABSDIFF && wdepth == CV_32S && ddepth == wdepth ?
00501             ocl::convertTypeStr(CV_8U, ddepth, kercn, cvtstr[3]) : "noconvert");
00502 
00503     size_t usrdata_esz = CV_ELEM_SIZE(wdepth);
00504     const uchar* usrdata_p = (const uchar*)usrdata;
00505     const double* usrdata_d = (const double*)usrdata;
00506     float usrdata_f[3];
00507     int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE ||
00508         oclop == OCL_OP_RDIV_SCALE || oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0;
00509     if( n > 0 && wdepth == CV_32F )
00510     {
00511         for( i = 0; i < n; i++ )
00512             usrdata_f[i] = (float)usrdata_d[i];
00513         usrdata_p = (const uchar*)usrdata_f;
00514     }
00515 
00516     ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
00517     if (k.empty())
00518         return false;
00519 
00520     UMat src1 = _src1.getUMat(), src2;
00521     UMat dst = _dst.getUMat(), mask = _mask.getUMat();
00522 
00523     ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn);
00524     ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) :
00525                                        ocl::KernelArg::WriteOnly(dst, cn, kercn);
00526     ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1);
00527 
00528     if( haveScalar )
00529     {
00530         size_t esz = CV_ELEM_SIZE1(wtype)*scalarcn;
00531         double buf[4]={0,0,0,0};
00532         Mat src2sc = _src2.getMat();
00533 
00534         if( !src2sc.empty() )
00535             convertAndUnrollScalar(src2sc, wtype, (uchar*)buf, 1);
00536         ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz);
00537 
00538         if( !haveMask )
00539         {
00540             if(n == 0)
00541                 k.args(src1arg, dstarg, scalararg);
00542             else if(n == 1)
00543                 k.args(src1arg, dstarg, scalararg,
00544                        ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz));
00545             else
00546                 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
00547         }
00548         else
00549             k.args(src1arg, maskarg, dstarg, scalararg);
00550     }
00551     else
00552     {
00553         src2 = _src2.getUMat();
00554         ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn);
00555 
00556         if( !haveMask )
00557         {
00558             if (n == 0)
00559                 k.args(src1arg, src2arg, dstarg);
00560             else if (n == 1)
00561                 k.args(src1arg, src2arg, dstarg,
00562                        ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz));
00563             else if (n == 3)
00564                 k.args(src1arg, src2arg, dstarg,
00565                        ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz),
00566                        ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz),
00567                        ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz));
00568             else
00569                 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters");
00570         }
00571         else
00572             k.args(src1arg, src2arg, maskarg, dstarg);
00573     }
00574 
00575     size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI };
00576     return k.run(2, globalsize, NULL, false);
00577 }
00578 
00579 #endif
00580 
00581 static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
00582                       InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false,
00583                       void* usrdata=0, int oclop=-1 )
00584 {
00585     const _InputArray *psrc1 = &_src1, *psrc2 = &_src2;
00586     int kind1 = psrc1->kind(), kind2 = psrc2->kind();
00587     bool haveMask = !_mask.empty();
00588     bool reallocate = false;
00589     int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
00590     int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2);
00591     int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims();
00592     Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
00593     Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
00594 #ifdef HAVE_OPENCL
00595     bool use_opencl = OCL_PERFORMANCE_CHECK(_dst.isUMat()) && dims1 <= 2 && dims2 <= 2;
00596 #endif
00597     bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
00598     bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
00599 
00600     if( (kind1 == kind2 || cn == 1) && sz1 == sz2 && dims1 <= 2 && dims2 <= 2 && type1 == type2 &&
00601         !haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == depth1)) ||
00602                        (_dst.fixedType() && _dst.type() == type1)) &&
00603         ((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) )
00604     {
00605         _dst.createSameSize(*psrc1, type1);
00606 #ifdef HAVE_OPENCL
00607         CV_OCL_RUN(use_opencl,
00608             ocl_arithm_op(*psrc1, *psrc2, _dst, _mask,
00609                           (!usrdata ? type1 : std::max(depth1, CV_32F)),
00610                           usrdata, oclop, false))
00611 #endif
00612 
00613         Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat();
00614         Size sz = getContinuousSize(src1, src2, dst, src1.channels());
00615         tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata);
00616         return;
00617     }
00618 
00619     bool haveScalar = false, swapped12 = false;
00620 
00621     if( dims1 != dims2 || sz1 != sz2 || cn != cn2 ||
00622         (kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) ||
00623         (kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) )
00624     {
00625         if( checkScalar(*psrc1, type2, kind1, kind2) )
00626         {
00627             // src1 is a scalar; swap it with src2
00628             swap(psrc1, psrc2);
00629             swap(sz1, sz2);
00630             swap(type1, type2);
00631             swap(depth1, depth2);
00632             swap(cn, cn2);
00633             swap(dims1, dims2);
00634             swapped12 = true;
00635             if( oclop == OCL_OP_SUB )
00636                 oclop = OCL_OP_RSUB;
00637             if ( oclop == OCL_OP_DIV_SCALE )
00638                 oclop = OCL_OP_RDIV_SCALE;
00639         }
00640         else if( !checkScalar(*psrc2, type1, kind2, kind1) )
00641             CV_Error( CV_StsUnmatchedSizes,
00642                      "The operation is neither 'array op array' "
00643                      "(where arrays have the same size and the same number of channels), "
00644                      "nor 'array op scalar', nor 'scalar op array'" );
00645         haveScalar = true;
00646         CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4));
00647 
00648         if (!muldiv)
00649         {
00650             Mat sc = psrc2->getMat();
00651             depth2 = actualScalarDepth(sc.ptr<double>(), cn);
00652             if( depth2 == CV_64F && (depth1 < CV_32S || depth1 == CV_32F) )
00653                 depth2 = CV_32F;
00654         }
00655         else
00656             depth2 = CV_64F;
00657     }
00658 
00659     if( dtype < 0 )
00660     {
00661         if( _dst.fixedType() )
00662             dtype = _dst.type();
00663         else
00664         {
00665             if( !haveScalar && type1 != type2 )
00666                 CV_Error(CV_StsBadArg,
00667                      "When the input arrays in add/subtract/multiply/divide functions have different types, "
00668                      "the output array type must be explicitly specified");
00669             dtype = type1;
00670         }
00671     }
00672     dtype = CV_MAT_DEPTH(dtype);
00673 
00674     if( depth1 == depth2 && dtype == depth1 )
00675         wtype = dtype;
00676     else if( !muldiv )
00677     {
00678         wtype = depth1 <= CV_8S && depth2 <= CV_8S ? CV_16S :
00679                 depth1 <= CV_32S && depth2 <= CV_32S ? CV_32S : std::max(depth1, depth2);
00680         wtype = std::max(wtype, dtype);
00681 
00682         // when the result of addition should be converted to an integer type,
00683         // and just one of the input arrays is floating-point, it makes sense to convert that input to integer type before the operation,
00684         // instead of converting the other input to floating-point and then converting the operation result back to integers.
00685         if( dtype < CV_32F && (depth1 < CV_32F || depth2 < CV_32F) )
00686             wtype = CV_32S;
00687     }
00688     else
00689     {
00690         wtype = std::max(depth1, std::max(depth2, CV_32F));
00691         wtype = std::max(wtype, dtype);
00692     }
00693 
00694     dtype = CV_MAKETYPE(dtype, cn);
00695     wtype = CV_MAKETYPE(wtype, cn);
00696 
00697     if( haveMask )
00698     {
00699         int mtype = _mask.type();
00700         CV_Assert( (mtype == CV_8UC1 || mtype == CV_8SC1) && _mask.sameSize(*psrc1) );
00701         reallocate = !_dst.sameSize(*psrc1) || _dst.type() != dtype;
00702     }
00703 
00704     _dst.createSameSize(*psrc1, dtype);
00705     if( reallocate )
00706         _dst.setTo(0.);
00707 
00708 #ifdef HAVE_OPENCL
00709     CV_OCL_RUN(use_opencl,
00710                ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype,
00711                usrdata, oclop, haveScalar))
00712 #endif
00713 
00714     BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype);
00715     BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype);
00716     BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype);
00717 
00718     size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2);
00719     size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype);
00720     size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz;
00721     BinaryFunc copymask = getCopyMaskFunc(dsz);
00722     Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(), mask = _mask.getMat();
00723 
00724     AutoBuffer<uchar> _buf;
00725     uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0;
00726     size_t bufesz = (cvtsrc1 ? wsz : 0) +
00727                     (cvtsrc2 || haveScalar ? wsz : 0) +
00728                     (cvtdst ? wsz : 0) +
00729                     (haveMask ? dsz : 0);
00730     BinaryFuncC func = tab[CV_MAT_DEPTH(wtype)];
00731 
00732     if( !haveScalar )
00733     {
00734         const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 };
00735         uchar* ptrs[4];
00736 
00737         NAryMatIterator it(arrays, ptrs);
00738         size_t total = it.size, blocksize = total;
00739 
00740         if( haveMask || cvtsrc1 || cvtsrc2 || cvtdst )
00741             blocksize = std::min(blocksize, blocksize0);
00742 
00743         _buf.allocate(bufesz*blocksize + 64);
00744         buf = _buf;
00745         if( cvtsrc1 )
00746             buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
00747         if( cvtsrc2 )
00748             buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
00749         wbuf = maskbuf = buf;
00750         if( cvtdst )
00751             buf = alignPtr(buf + blocksize*wsz, 16);
00752         if( haveMask )
00753             maskbuf = buf;
00754 
00755         for( size_t i = 0; i < it.nplanes; i++, ++it )
00756         {
00757             for( size_t j = 0; j < total; j += blocksize )
00758             {
00759                 int bsz = (int)MIN(total - j, blocksize);
00760                 Size bszn(bsz*cn, 1);
00761                 const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1];
00762                 uchar* dptr = ptrs[2];
00763                 if( cvtsrc1 )
00764                 {
00765                     cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
00766                     sptr1 = buf1;
00767                 }
00768                 if( ptrs[0] == ptrs[1] )
00769                     sptr2 = sptr1;
00770                 else if( cvtsrc2 )
00771                 {
00772                     cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 );
00773                     sptr2 = buf2;
00774                 }
00775 
00776                 if( !haveMask && !cvtdst )
00777                     func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
00778                 else
00779                 {
00780                     func( sptr1, 1, sptr2, 1, wbuf, 0, bszn.width, bszn.height, usrdata );
00781                     if( !haveMask )
00782                         cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
00783                     else if( !cvtdst )
00784                     {
00785                         copymask( wbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
00786                         ptrs[3] += bsz;
00787                     }
00788                     else
00789                     {
00790                         cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
00791                         copymask( maskbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz );
00792                         ptrs[3] += bsz;
00793                     }
00794                 }
00795                 ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz;
00796             }
00797         }
00798     }
00799     else
00800     {
00801         const Mat* arrays[] = { &src1, &dst, &mask, 0 };
00802         uchar* ptrs[3];
00803 
00804         NAryMatIterator it(arrays, ptrs);
00805         size_t total = it.size, blocksize = std::min(total, blocksize0);
00806 
00807         _buf.allocate(bufesz*blocksize + 64);
00808         buf = _buf;
00809         if( cvtsrc1 )
00810             buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16);
00811         buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16);
00812         wbuf = maskbuf = buf;
00813         if( cvtdst )
00814             buf = alignPtr(buf + blocksize*wsz, 16);
00815         if( haveMask )
00816             maskbuf = buf;
00817 
00818         convertAndUnrollScalar( src2, wtype, buf2, blocksize);
00819 
00820         for( size_t i = 0; i < it.nplanes; i++, ++it )
00821         {
00822             for( size_t j = 0; j < total; j += blocksize )
00823             {
00824                 int bsz = (int)MIN(total - j, blocksize);
00825                 Size bszn(bsz*cn, 1);
00826                 const uchar *sptr1 = ptrs[0];
00827                 const uchar* sptr2 = buf2;
00828                 uchar* dptr = ptrs[1];
00829 
00830                 if( cvtsrc1 )
00831                 {
00832                     cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 );
00833                     sptr1 = buf1;
00834                 }
00835 
00836                 if( swapped12 )
00837                     std::swap(sptr1, sptr2);
00838 
00839                 if( !haveMask && !cvtdst )
00840                     func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata );
00841                 else
00842                 {
00843                     func( sptr1, 1, sptr2, 1, wbuf, 1, bszn.width, bszn.height, usrdata );
00844                     if( !haveMask )
00845                         cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 );
00846                     else if( !cvtdst )
00847                     {
00848                         copymask( wbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
00849                         ptrs[2] += bsz;
00850                     }
00851                     else
00852                     {
00853                         cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 );
00854                         copymask( maskbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz );
00855                         ptrs[2] += bsz;
00856                     }
00857                 }
00858                 ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz;
00859             }
00860         }
00861     }
00862 }
00863 
00864 static BinaryFuncC* getAddTab()
00865 {
00866     static BinaryFuncC addTab[] =
00867     {
00868         (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8s),
00869         (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16s),
00870         (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32s),
00871         (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32f), (BinaryFuncC)cv::hal::add64f,
00872         0
00873     };
00874 
00875     return addTab;
00876 }
00877 
00878 static BinaryFuncC* getSubTab()
00879 {
00880     static BinaryFuncC subTab[] =
00881     {
00882         (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8s),
00883         (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16s),
00884         (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32s),
00885         (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32f), (BinaryFuncC)cv::hal::sub64f,
00886         0
00887     };
00888 
00889     return subTab;
00890 }
00891 
00892 static BinaryFuncC* getAbsDiffTab()
00893 {
00894     static BinaryFuncC absDiffTab[] =
00895     {
00896         (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8s),
00897         (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16s),
00898         (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32s),
00899         (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32f), (BinaryFuncC)cv::hal::absdiff64f,
00900         0
00901     };
00902 
00903     return absDiffTab;
00904 }
00905 
00906 }
00907 
00908 void cv::add( InputArray src1, InputArray src2, OutputArray dst,
00909           InputArray mask, int dtype )
00910 {
00911     arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD );
00912 }
00913 
00914 void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst,
00915                InputArray mask, int dtype )
00916 {
00917 #ifdef HAVE_TEGRA_OPTIMIZATION
00918     if (tegra::useTegra())
00919     {
00920         int kind1 = _src1.kind(), kind2 = _src2.kind();
00921         Mat src1 = _src1.getMat(), src2 = _src2.getMat();
00922         bool src1Scalar = checkScalar(src1, _src2.type(), kind1, kind2);
00923         bool src2Scalar = checkScalar(src2, _src1.type(), kind2, kind1);
00924 
00925         if (!src1Scalar && !src2Scalar &&
00926             src1.depth() == CV_8U && src2.type() == src1.type() &&
00927             src1.dims == 2 && src2.size() == src1.size() &&
00928             mask.empty())
00929         {
00930             if (dtype < 0)
00931             {
00932                 if (_dst.fixedType())
00933                 {
00934                     dtype = _dst.depth();
00935                 }
00936                 else
00937                 {
00938                     dtype = src1.depth();
00939                 }
00940             }
00941 
00942             dtype = CV_MAT_DEPTH(dtype);
00943 
00944             if (!_dst.fixedType() || dtype == _dst.depth())
00945             {
00946                 _dst.create(src1.size(), CV_MAKE_TYPE(dtype, src1.channels()));
00947 
00948                 if (dtype == CV_16S)
00949                 {
00950                     Mat dst = _dst.getMat();
00951                     if(tegra::subtract_8u8u16s(src1, src2, dst))
00952                         return;
00953                 }
00954                 else if (dtype == CV_32F)
00955                 {
00956                     Mat dst = _dst.getMat();
00957                     if(tegra::subtract_8u8u32f(src1, src2, dst))
00958                         return;
00959                 }
00960                 else if (dtype == CV_8S)
00961                 {
00962                     Mat dst = _dst.getMat();
00963                     if(tegra::subtract_8u8u8s(src1, src2, dst))
00964                         return;
00965                 }
00966             }
00967         }
00968     }
00969 #endif
00970     arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB );
00971 }
00972 
00973 void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst )
00974 {
00975     arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF);
00976 }
00977 
00978 /****************************************************************************************\
00979 *                                    multiply/divide                                     *
00980 \****************************************************************************************/
00981 
00982 namespace cv
00983 {
00984 
00985 static BinaryFuncC* getMulTab()
00986 {
00987     static BinaryFuncC mulTab[] =
00988     {
00989         (BinaryFuncC)cv::hal::mul8u, (BinaryFuncC)cv::hal::mul8s, (BinaryFuncC)cv::hal::mul16u,
00990         (BinaryFuncC)cv::hal::mul16s, (BinaryFuncC)cv::hal::mul32s, (BinaryFuncC)cv::hal::mul32f,
00991         (BinaryFuncC)cv::hal::mul64f, 0
00992     };
00993 
00994     return mulTab;
00995 }
00996 
00997 static BinaryFuncC* getDivTab()
00998 {
00999     static BinaryFuncC divTab[] =
01000     {
01001         (BinaryFuncC)cv::hal::div8u, (BinaryFuncC)cv::hal::div8s, (BinaryFuncC)cv::hal::div16u,
01002         (BinaryFuncC)cv::hal::div16s, (BinaryFuncC)cv::hal::div32s, (BinaryFuncC)cv::hal::div32f,
01003         (BinaryFuncC)cv::hal::div64f, 0
01004     };
01005 
01006     return divTab;
01007 }
01008 
01009 static BinaryFuncC* getRecipTab()
01010 {
01011     static BinaryFuncC recipTab[] =
01012     {
01013         (BinaryFuncC)cv::hal::recip8u, (BinaryFuncC)cv::hal::recip8s, (BinaryFuncC)cv::hal::recip16u,
01014         (BinaryFuncC)cv::hal::recip16s, (BinaryFuncC)cv::hal::recip32s, (BinaryFuncC)cv::hal::recip32f,
01015         (BinaryFuncC)cv::hal::recip64f, 0
01016     };
01017 
01018     return recipTab;
01019 }
01020 
01021 }
01022 
01023 void cv::multiply(InputArray src1, InputArray src2,
01024                   OutputArray dst, double scale, int dtype)
01025 {
01026     arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(),
01027               true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE);
01028 }
01029 
01030 void cv::divide(InputArray src1, InputArray src2,
01031                 OutputArray dst, double scale, int dtype)
01032 {
01033     arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
01034 }
01035 
01036 void cv::divide(double scale, InputArray src2,
01037                 OutputArray dst, int dtype)
01038 {
01039     arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
01040 }
01041 
01042 /****************************************************************************************\
01043 *                                      addWeighted                                       *
01044 \****************************************************************************************/
01045 
01046 namespace cv
01047 {
01048 
01049 static BinaryFuncC* getAddWeightedTab()
01050 {
01051     static BinaryFuncC addWeightedTab[] =
01052     {
01053         (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16u),
01054         (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted32s), (BinaryFuncC)cv::hal::addWeighted32f,
01055         (BinaryFuncC)cv::hal::addWeighted64f, 0
01056     };
01057 
01058     return addWeightedTab;
01059 }
01060 
01061 }
01062 
01063 void cv::addWeighted( InputArray src1, double alpha, InputArray src2,
01064                       double beta, double gamma, OutputArray dst, int dtype )
01065 {
01066     double scalars[] = {alpha, beta, gamma};
01067     arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW);
01068 }
01069 
01070 
01071 /****************************************************************************************\
01072 *                                          compare                                       *
01073 \****************************************************************************************/
01074 
01075 namespace cv
01076 {
01077 
01078 static BinaryFuncC getCmpFunc(int depth)
01079 {
01080     static BinaryFuncC cmpTab[] =
01081     {
01082         (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8s),
01083         (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16s),
01084         (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32s),
01085         (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32f), (BinaryFuncC)cv::hal::cmp64f,
01086         0
01087     };
01088 
01089     return cmpTab[depth];
01090 }
01091 
01092 static double getMinVal(int depth)
01093 {
01094     static const double tab[] = {0, -128, 0, -32768, INT_MIN, -FLT_MAX, -DBL_MAX, 0};
01095     return tab[depth];
01096 }
01097 
01098 static double getMaxVal(int depth)
01099 {
01100     static const double tab[] = {255, 127, 65535, 32767, INT_MAX, FLT_MAX, DBL_MAX, 0};
01101     return tab[depth];
01102 }
01103 
01104 #ifdef HAVE_OPENCL
01105 
01106 static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op, bool haveScalar)
01107 {
01108     const ocl::Device& dev = ocl::Device::getDefault();
01109     bool doubleSupport = dev.doubleFPConfig() > 0;
01110     int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1),
01111             type2 = _src2.type(), depth2 = CV_MAT_DEPTH(type2);
01112 
01113     if (!doubleSupport && depth1 == CV_64F)
01114         return false;
01115 
01116     if (!haveScalar && (!_src1.sameSize(_src2) || type1 != type2))
01117             return false;
01118 
01119     int kercn = haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1;
01120     // Workaround for bug with "?:" operator in AMD OpenCL compiler
01121     if (depth1 >= CV_16U)
01122         kercn = 1;
01123 
01124     int scalarcn = kercn == 3 ? 4 : kercn;
01125     const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" };
01126     char cvt[40];
01127 
01128     String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d"
01129                          " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s"
01130                          " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s",
01131                          haveScalar ? "UNARY_OP" : "BINARY_OP",
01132                          ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)),
01133                          ocl::typeToStr(CV_8UC(kercn)), kercn,
01134                          ocl::convertTypeStr(depth1, CV_8U, kercn, cvt),
01135                          operationMap[op], ocl::typeToStr(depth1),
01136                          ocl::typeToStr(depth1), ocl::typeToStr(CV_8U),
01137                          ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)), rowsPerWI,
01138                          doubleSupport ? " -D DOUBLE_SUPPORT" : "");
01139 
01140     ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
01141     if (k.empty())
01142         return false;
01143 
01144     UMat src1 = _src1.getUMat();
01145     Size size = src1.size();
01146     _dst.create(size, CV_8UC(cn));
01147     UMat dst = _dst.getUMat();
01148 
01149     if (haveScalar)
01150     {
01151         size_t esz = CV_ELEM_SIZE1(type1) * scalarcn;
01152         double buf[4] = { 0, 0, 0, 0 };
01153         Mat src2 = _src2.getMat();
01154 
01155         if( depth1 > CV_32S )
01156             convertAndUnrollScalar( src2, depth1, (uchar *)buf, kercn );
01157         else
01158         {
01159             double fval = 0;
01160             getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar *)&fval, 1, Size(1, 1), 0);
01161             if( fval < getMinVal(depth1) )
01162                 return dst.setTo(Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0)), true;
01163 
01164             if( fval > getMaxVal(depth1) )
01165                 return dst.setTo(Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0)), true;
01166 
01167             int ival = cvRound(fval);
01168             if( fval != ival )
01169             {
01170                 if( op == CMP_LT || op == CMP_GE )
01171                     ival = cvCeil(fval);
01172                 else if( op == CMP_LE || op == CMP_GT )
01173                     ival = cvFloor(fval);
01174                 else
01175                     return dst.setTo(Scalar::all(op == CMP_NE ? 255 : 0)), true;
01176             }
01177             convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, (uchar *)buf, kercn);
01178         }
01179 
01180         ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz);
01181 
01182         k.args(ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn),
01183                ocl::KernelArg::WriteOnly(dst, cn, kercn), scalararg);
01184     }
01185     else
01186     {
01187         UMat src2 = _src2.getUMat();
01188 
01189         k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
01190                ocl::KernelArg::ReadOnlyNoSize(src2),
01191                ocl::KernelArg::WriteOnly(dst, cn, kercn));
01192     }
01193 
01194     size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI };
01195     return k.run(2, globalsize, NULL, false);
01196 }
01197 
01198 #endif
01199 
01200 }
01201 
01202 void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
01203 {
01204     CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ ||
01205                op == CMP_NE || op == CMP_GE || op == CMP_GT );
01206 
01207     bool haveScalar = false;
01208 
01209     if ((_src1.isMatx() + _src2.isMatx()) == 1
01210             || !_src1.sameSize(_src2)
01211             || _src1.type() != _src2.type())
01212     {
01213         if (checkScalar(_src1, _src2.type(), _src1.kind(), _src2.kind()))
01214         {
01215             op = op == CMP_LT ? CMP_GT : op == CMP_LE ? CMP_GE :
01216                 op == CMP_GE ? CMP_LE : op == CMP_GT ? CMP_LT : op;
01217             // src1 is a scalar; swap it with src2
01218             compare(_src2, _src1, _dst, op);
01219             return;
01220         }
01221         else if( !checkScalar(_src2, _src1.type(), _src2.kind(), _src1.kind()) )
01222             CV_Error( CV_StsUnmatchedSizes,
01223                      "The operation is neither 'array op array' (where arrays have the same size and the same type), "
01224                      "nor 'array op scalar', nor 'scalar op array'" );
01225         haveScalar = true;
01226     }
01227 
01228 #ifdef HAVE_OPENCL
01229     CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
01230                ocl_compare(_src1, _src2, _dst, op, haveScalar))
01231 #endif
01232 
01233     int kind1 = _src1.kind(), kind2 = _src2.kind();
01234     Mat src1 = _src1.getMat(), src2 = _src2.getMat();
01235 
01236     if( kind1 == kind2 && src1.dims <= 2 && src2.dims <= 2 && src1.size() == src2.size() && src1.type() == src2.type() )
01237     {
01238         int cn = src1.channels();
01239         _dst.create(src1.size(), CV_8UC(cn));
01240         Mat dst = _dst.getMat();
01241         Size sz = getContinuousSize(src1, src2, dst, src1.channels());
01242         getCmpFunc(src1.depth())(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, &op);
01243         return;
01244     }
01245 
01246     int cn = src1.channels(), depth1 = src1.depth(), depth2 = src2.depth();
01247 
01248     _dst.create(src1.dims, src1.size, CV_8UC(cn));
01249     src1 = src1.reshape(1); src2 = src2.reshape(1);
01250     Mat dst = _dst.getMat().reshape(1);
01251 
01252     size_t esz = src1.elemSize();
01253     size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
01254     BinaryFuncC func = getCmpFunc(depth1);
01255 
01256     if( !haveScalar )
01257     {
01258         const Mat* arrays[] = { &src1, &src2, &dst, 0 };
01259         uchar* ptrs[3];
01260 
01261         NAryMatIterator it(arrays, ptrs);
01262         size_t total = it.size;
01263 
01264         for( size_t i = 0; i < it.nplanes; i++, ++it )
01265             func( ptrs[0], 0, ptrs[1], 0, ptrs[2], 0, (int)total, 1, &op );
01266     }
01267     else
01268     {
01269         const Mat* arrays[] = { &src1, &dst, 0 };
01270         uchar* ptrs[2];
01271 
01272         NAryMatIterator it(arrays, ptrs);
01273         size_t total = it.size, blocksize = std::min(total, blocksize0);
01274 
01275         AutoBuffer<uchar>  _buf(blocksize*esz);
01276         uchar *buf = _buf;
01277 
01278         if( depth1 > CV_32S )
01279             convertAndUnrollScalar( src2, depth1, buf, blocksize );
01280         else
01281         {
01282             double fval=0;
01283             getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar*)&fval, 1, Size(1,1), 0);
01284             if( fval < getMinVal(depth1) )
01285             {
01286                 dst = Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0);
01287                 return;
01288             }
01289 
01290             if( fval > getMaxVal(depth1) )
01291             {
01292                 dst = Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0);
01293                 return;
01294             }
01295 
01296             int ival = cvRound(fval);
01297             if( fval != ival )
01298             {
01299                 if( op == CMP_LT || op == CMP_GE )
01300                     ival = cvCeil(fval);
01301                 else if( op == CMP_LE || op == CMP_GT )
01302                     ival = cvFloor(fval);
01303                 else
01304                 {
01305                     dst = Scalar::all(op == CMP_NE ? 255 : 0);
01306                     return;
01307                 }
01308             }
01309             convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, buf, blocksize);
01310         }
01311 
01312         for( size_t i = 0; i < it.nplanes; i++, ++it )
01313         {
01314             for( size_t j = 0; j < total; j += blocksize )
01315             {
01316                 int bsz = (int)MIN(total - j, blocksize);
01317                 func( ptrs[0], 0, buf, 0, ptrs[1], 0, bsz, 1, &op);
01318                 ptrs[0] += bsz*esz;
01319                 ptrs[1] += bsz;
01320             }
01321         }
01322     }
01323 }
01324 
01325 /****************************************************************************************\
01326 *                                        inRange                                         *
01327 \****************************************************************************************/
01328 
01329 namespace cv
01330 {
01331 
01332 template <typename T>
01333 struct InRange_SIMD
01334 {
01335     int operator () (const T *, const T *, const T *, uchar *, int) const
01336     {
01337         return 0;
01338     }
01339 };
01340 
01341 #if CV_SSE2
01342 
01343 template <>
01344 struct InRange_SIMD<uchar>
01345 {
01346     int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
01347                      uchar * dst, int len) const
01348     {
01349         int x = 0;
01350 
01351         if (USE_SSE2)
01352         {
01353             __m128i v_full = _mm_set1_epi8(-1), v_128 = _mm_set1_epi8(-128);
01354 
01355             for ( ; x <= len - 16; x += 16 )
01356             {
01357                 __m128i v_src = _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src1 + x)), v_128);
01358                 __m128i v_mask1 = _mm_cmpgt_epi8(_mm_add_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_128), v_src);
01359                 __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src3 + x)), v_128));
01360                 _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full));
01361             }
01362         }
01363 
01364         return x;
01365     }
01366 };
01367 
01368 template <>
01369 struct InRange_SIMD<schar>
01370 {
01371     int operator () (const schar * src1, const schar * src2, const schar * src3,
01372                      uchar * dst, int len) const
01373     {
01374         int x = 0;
01375 
01376         if (USE_SSE2)
01377         {
01378             __m128i v_full = _mm_set1_epi8(-1);
01379 
01380             for ( ; x <= len - 16; x += 16 )
01381             {
01382                 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x));
01383                 __m128i v_mask1 = _mm_cmpgt_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src);
01384                 __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_loadu_si128((const __m128i *)(src3 + x)));
01385                 _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full));
01386             }
01387         }
01388 
01389         return x;
01390     }
01391 };
01392 
01393 template <>
01394 struct InRange_SIMD<ushort>
01395 {
01396     int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
01397                      uchar * dst, int len) const
01398     {
01399         int x = 0;
01400 
01401         if (USE_SSE2)
01402         {
01403             __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1), v_32768 = _mm_set1_epi16(-32768);
01404 
01405             for ( ; x <= len - 8; x += 8 )
01406             {
01407                 __m128i v_src = _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src1 + x)), v_32768);
01408                 __m128i v_mask1 = _mm_cmpgt_epi16(_mm_add_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_32768), v_src);
01409                 __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src3 + x)), v_32768));
01410                 __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full);
01411                 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero));
01412             }
01413         }
01414 
01415         return x;
01416     }
01417 };
01418 
01419 template <>
01420 struct InRange_SIMD<short>
01421 {
01422     int operator () (const short * src1, const short * src2, const short * src3,
01423                      uchar * dst, int len) const
01424     {
01425         int x = 0;
01426 
01427         if (USE_SSE2)
01428         {
01429             __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1);
01430 
01431             for ( ; x <= len - 8; x += 8 )
01432             {
01433                 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x));
01434                 __m128i v_mask1 = _mm_cmpgt_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src);
01435                 __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_loadu_si128((const __m128i *)(src3 + x)));
01436                 __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full);
01437                 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero));
01438             }
01439         }
01440 
01441         return x;
01442     }
01443 };
01444 
01445 template <>
01446 struct InRange_SIMD<int>
01447 {
01448     int operator () (const int * src1, const int * src2, const int * src3,
01449                      uchar * dst, int len) const
01450     {
01451         int x = 0;
01452 
01453         if (USE_SSE2)
01454         {
01455             __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi32(-1);
01456 
01457             for ( ; x <= len - 8; x += 8 )
01458             {
01459                 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x));
01460                 __m128i v_res1 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src),
01461                     _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x))));
01462 
01463                 v_src = _mm_loadu_si128((const __m128i *)(src1 + x + 4));
01464                 __m128i v_res2 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x + 4)), v_src),
01465                     _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x + 4))));
01466 
01467                 __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(_mm_andnot_si128(v_res1, v_full), 16),
01468                                                 _mm_srli_epi32(_mm_andnot_si128(v_res2, v_full), 16));
01469                 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero));
01470             }
01471         }
01472 
01473         return x;
01474     }
01475 };
01476 
01477 template <>
01478 struct InRange_SIMD<float>
01479 {
01480     int operator () (const float * src1, const float * src2, const float * src3,
01481                      uchar * dst, int len) const
01482     {
01483         int x = 0;
01484 
01485         if (USE_SSE2)
01486         {
01487             __m128i v_zero = _mm_setzero_si128();
01488 
01489             for ( ; x <= len - 8; x += 8 )
01490             {
01491                 __m128 v_src = _mm_loadu_ps(src1 + x);
01492                 __m128 v_res1 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x), v_src),
01493                     _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x)));
01494 
01495                 v_src = _mm_loadu_ps(src1 + x + 4);
01496                 __m128 v_res2 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x + 4), v_src),
01497                     _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x + 4)));
01498 
01499                 __m128i v_res1i = _mm_cvtps_epi32(v_res1), v_res2i = _mm_cvtps_epi32(v_res2);
01500                 __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(v_res1i, 16), _mm_srli_epi32(v_res2i, 16));
01501                 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero));
01502             }
01503         }
01504 
01505         return x;
01506     }
01507 };
01508 
01509 #elif CV_NEON
01510 
01511 template <>
01512 struct InRange_SIMD<uchar>
01513 {
01514     int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
01515                      uchar * dst, int len) const
01516     {
01517         int x = 0;
01518 
01519         for ( ; x <= len - 16; x += 16 )
01520         {
01521             uint8x16_t values = vld1q_u8(src1 + x);
01522             uint8x16_t low = vld1q_u8(src2 + x);
01523             uint8x16_t high = vld1q_u8(src3 + x);
01524 
01525             vst1q_u8(dst + x, vandq_u8(vcgeq_u8(values, low), vcgeq_u8(high, values)));
01526         }
01527         return x;
01528     }
01529 };
01530 
01531 template <>
01532 struct InRange_SIMD<schar>
01533 {
01534     int operator () (const schar * src1, const schar * src2, const schar * src3,
01535                      uchar * dst, int len) const
01536     {
01537         int x = 0;
01538 
01539         for ( ; x <= len - 16; x += 16 )
01540         {
01541             int8x16_t values = vld1q_s8(src1 + x);
01542             int8x16_t low = vld1q_s8(src2 + x);
01543             int8x16_t high = vld1q_s8(src3 + x);
01544 
01545             vst1q_u8(dst + x, vandq_u8(vcgeq_s8(values, low), vcgeq_s8(high, values)));
01546         }
01547         return x;
01548     }
01549 };
01550 
01551 template <>
01552 struct InRange_SIMD<ushort>
01553 {
01554     int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
01555                      uchar * dst, int len) const
01556     {
01557         int x = 0;
01558 
01559         for ( ; x <= len - 16; x += 16 )
01560         {
01561             uint16x8_t values = vld1q_u16((const uint16_t*)(src1 + x));
01562             uint16x8_t low = vld1q_u16((const uint16_t*)(src2 + x));
01563             uint16x8_t high = vld1q_u16((const uint16_t*)(src3 + x));
01564             uint8x8_t  r1 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
01565 
01566             values = vld1q_u16((const uint16_t*)(src1 + x + 8));
01567             low = vld1q_u16((const uint16_t*)(src2 + x + 8));
01568             high = vld1q_u16((const uint16_t*)(src3 + x + 8));
01569             uint8x8_t  r2 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
01570 
01571             vst1q_u8(dst + x, vcombine_u8(r1, r2));
01572         }
01573         return x;
01574     }
01575 };
01576 
01577 template <>
01578 struct InRange_SIMD<short>
01579 {
01580     int operator () (const short * src1, const short * src2, const short * src3,
01581                      uchar * dst, int len) const
01582     {
01583         int x = 0;
01584 
01585         for ( ; x <= len - 16; x += 16 )
01586         {
01587             int16x8_t values = vld1q_s16((const int16_t*)(src1 + x));
01588             int16x8_t low = vld1q_s16((const int16_t*)(src2 + x));
01589             int16x8_t high = vld1q_s16((const int16_t*)(src3 + x));
01590             uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
01591 
01592             values = vld1q_s16((const int16_t*)(src1 + x + 8));
01593             low = vld1q_s16((const int16_t*)(src2 + x + 8));
01594             high = vld1q_s16((const int16_t*)(src3 + x + 8));
01595             uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
01596 
01597             vst1q_u8(dst + x, vcombine_u8(r1, r2));
01598         }
01599         return x;
01600     }
01601 };
01602 
01603 template <>
01604 struct InRange_SIMD<int>
01605 {
01606     int operator () (const int * src1, const int * src2, const int * src3,
01607                      uchar * dst, int len) const
01608     {
01609         int x = 0;
01610 
01611         for ( ; x <= len - 8; x += 8 )
01612         {
01613             int32x4_t values = vld1q_s32((const int32_t*)(src1 + x));
01614             int32x4_t low = vld1q_s32((const int32_t*)(src2 + x));
01615             int32x4_t high = vld1q_s32((const int32_t*)(src3 + x));
01616 
01617             uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
01618 
01619             values = vld1q_s32((const int32_t*)(src1 + x + 4));
01620             low = vld1q_s32((const int32_t*)(src2 + x + 4));
01621             high = vld1q_s32((const int32_t*)(src3 + x + 4));
01622 
01623             uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
01624 
01625             uint16x8_t res_16 = vcombine_u16(r1, r2);
01626 
01627             vst1_u8(dst + x, vmovn_u16(res_16));
01628         }
01629         return x;
01630     }
01631 };
01632 
01633 template <>
01634 struct InRange_SIMD<float>
01635 {
01636     int operator () (const float * src1, const float * src2, const float * src3,
01637                      uchar * dst, int len) const
01638     {
01639         int x = 0;
01640 
01641         for ( ; x <= len - 8; x += 8 )
01642         {
01643             float32x4_t values = vld1q_f32((const float32_t*)(src1 + x));
01644             float32x4_t low = vld1q_f32((const float32_t*)(src2 + x));
01645             float32x4_t high = vld1q_f32((const float32_t*)(src3 + x));
01646 
01647             uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
01648 
01649             values = vld1q_f32((const float32_t*)(src1 + x + 4));
01650             low = vld1q_f32((const float32_t*)(src2 + x + 4));
01651             high = vld1q_f32((const float32_t*)(src3 + x + 4));
01652 
01653             uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
01654 
01655             uint16x8_t res_16 = vcombine_u16(r1, r2);
01656 
01657             vst1_u8(dst + x, vmovn_u16(res_16));
01658         }
01659         return x;
01660     }
01661 };
01662 
01663 #endif
01664 
01665 template <typename T>
01666 static void inRange_(const T* src1, size_t step1, const T* src2, size_t step2,
01667          const T* src3, size_t step3, uchar* dst, size_t step,
01668          Size size)
01669 {
01670     step1 /= sizeof(src1[0]);
01671     step2 /= sizeof(src2[0]);
01672     step3 /= sizeof(src3[0]);
01673 
01674     InRange_SIMD<T> vop;
01675 
01676     for( ; size.height--; src1 += step1, src2 += step2, src3 += step3, dst += step )
01677     {
01678         int x = vop(src1, src2, src3, dst, size.width);
01679         #if CV_ENABLE_UNROLLED
01680         for( ; x <= size.width - 4; x += 4 )
01681         {
01682             int t0, t1;
01683             t0 = src2[x] <= src1[x] && src1[x] <= src3[x];
01684             t1 = src2[x+1] <= src1[x+1] && src1[x+1] <= src3[x+1];
01685             dst[x] = (uchar)-t0; dst[x+1] = (uchar)-t1;
01686             t0 = src2[x+2] <= src1[x+2] && src1[x+2] <= src3[x+2];
01687             t1 = src2[x+3] <= src1[x+3] && src1[x+3] <= src3[x+3];
01688             dst[x+2] = (uchar)-t0; dst[x+3] = (uchar)-t1;
01689         }
01690         #endif
01691         for( ; x < size.width; x++ )
01692             dst[x] = (uchar)-(src2[x] <= src1[x] && src1[x] <= src3[x]);
01693     }
01694 }
01695 
01696 
01697 static void inRange8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
01698                       const uchar* src3, size_t step3, uchar* dst, size_t step, Size size)
01699 {
01700     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01701 }
01702 
01703 static void inRange8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
01704                       const schar* src3, size_t step3, uchar* dst, size_t step, Size size)
01705 {
01706     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01707 }
01708 
01709 static void inRange16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
01710                        const ushort* src3, size_t step3, uchar* dst, size_t step, Size size)
01711 {
01712     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01713 }
01714 
01715 static void inRange16s(const short* src1, size_t step1, const short* src2, size_t step2,
01716                        const short* src3, size_t step3, uchar* dst, size_t step, Size size)
01717 {
01718     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01719 }
01720 
01721 static void inRange32s(const int* src1, size_t step1, const int* src2, size_t step2,
01722                        const int* src3, size_t step3, uchar* dst, size_t step, Size size)
01723 {
01724     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01725 }
01726 
01727 static void inRange32f(const float* src1, size_t step1, const float* src2, size_t step2,
01728                        const float* src3, size_t step3, uchar* dst, size_t step, Size size)
01729 {
01730     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01731 }
01732 
01733 static void inRange64f(const double* src1, size_t step1, const double* src2, size_t step2,
01734                        const double* src3, size_t step3, uchar* dst, size_t step, Size size)
01735 {
01736     inRange_(src1, step1, src2, step2, src3, step3, dst, step, size);
01737 }
01738 
01739 static void inRangeReduce(const uchar* src, uchar* dst, size_t len, int cn)
01740 {
01741     int k = cn % 4 ? cn % 4 : 4;
01742     size_t i, j;
01743     if( k == 1 )
01744         for( i = j = 0; i < len; i++, j += cn )
01745             dst[i] = src[j];
01746     else if( k == 2 )
01747         for( i = j = 0; i < len; i++, j += cn )
01748             dst[i] = src[j] & src[j+1];
01749     else if( k == 3 )
01750         for( i = j = 0; i < len; i++, j += cn )
01751             dst[i] = src[j] & src[j+1] & src[j+2];
01752     else
01753         for( i = j = 0; i < len; i++, j += cn )
01754             dst[i] = src[j] & src[j+1] & src[j+2] & src[j+3];
01755 
01756     for( ; k < cn; k += 4 )
01757     {
01758         for( i = 0, j = k; i < len; i++, j += cn )
01759             dst[i] &= src[j] & src[j+1] & src[j+2] & src[j+3];
01760     }
01761 }
01762 
01763 typedef void (*InRangeFunc)( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
01764                              const uchar* src3, size_t step3, uchar* dst, size_t step, Size sz );
01765 
01766 static InRangeFunc getInRangeFunc(int depth)
01767 {
01768     static InRangeFunc inRangeTab[] =
01769     {
01770         (InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u),
01771         (InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f),
01772         (InRangeFunc)inRange64f, 0
01773     };
01774 
01775     return inRangeTab[depth];
01776 }
01777 
01778 #ifdef HAVE_OPENCL
01779 
01780 static bool ocl_inRange( InputArray _src, InputArray _lowerb,
01781                          InputArray _upperb, OutputArray _dst )
01782 {
01783     const ocl::Device & d = ocl::Device::getDefault();
01784     int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
01785     Size ssize = _src.size(), lsize = _lowerb.size(), usize = _upperb.size();
01786     int stype = _src.type(), ltype = _lowerb.type(), utype = _upperb.type();
01787     int sdepth = CV_MAT_DEPTH(stype), ldepth = CV_MAT_DEPTH(ltype), udepth = CV_MAT_DEPTH(utype);
01788     int cn = CV_MAT_CN(stype), rowsPerWI = d.isIntel() ? 4 : 1;
01789     bool lbScalar = false, ubScalar = false;
01790 
01791     if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) ||
01792         ssize != lsize || stype != ltype )
01793     {
01794         if( !checkScalar(_lowerb, stype, lkind, skind) )
01795             CV_Error( CV_StsUnmatchedSizes,
01796                      "The lower bounary is neither an array of the same size and same type as src, nor a scalar");
01797         lbScalar = true;
01798     }
01799 
01800     if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) ||
01801         ssize != usize || stype != utype )
01802     {
01803         if( !checkScalar(_upperb, stype, ukind, skind) )
01804             CV_Error( CV_StsUnmatchedSizes,
01805                      "The upper bounary is neither an array of the same size and same type as src, nor a scalar");
01806         ubScalar = true;
01807     }
01808 
01809     if (lbScalar != ubScalar)
01810         return false;
01811 
01812     bool doubleSupport = d.doubleFPConfig() > 0,
01813             haveScalar = lbScalar && ubScalar;
01814 
01815     if ( (!doubleSupport && sdepth == CV_64F) ||
01816          (!haveScalar && (sdepth != ldepth || sdepth != udepth)) )
01817         return false;
01818 
01819     int kercn = haveScalar ? cn : std::max(std::min(ocl::predictOptimalVectorWidth(_src, _lowerb, _upperb, _dst), 4), cn);
01820     if (kercn % cn != 0)
01821         kercn = cn;
01822     int colsPerWI = kercn / cn;
01823     String opts = format("%s-D cn=%d -D srcT=%s -D srcT1=%s -D dstT=%s -D kercn=%d -D depth=%d%s -D colsPerWI=%d",
01824                            haveScalar ? "-D HAVE_SCALAR " : "", cn, ocl::typeToStr(CV_MAKE_TYPE(sdepth, kercn)),
01825                            ocl::typeToStr(sdepth), ocl::typeToStr(CV_8UC(colsPerWI)), kercn, sdepth,
01826                            doubleSupport ? " -D DOUBLE_SUPPORT" : "", colsPerWI);
01827 
01828     ocl::Kernel ker("inrange", ocl::core::inrange_oclsrc, opts);
01829     if (ker.empty())
01830         return false;
01831 
01832     _dst.create(ssize, CV_8UC1);
01833     UMat src = _src.getUMat(), dst = _dst.getUMat(), lscalaru, uscalaru;
01834     Mat lscalar, uscalar;
01835 
01836     if (lbScalar && ubScalar)
01837     {
01838         lscalar = _lowerb.getMat();
01839         uscalar = _upperb.getMat();
01840 
01841         size_t esz = src.elemSize();
01842         size_t blocksize = 36;
01843 
01844         AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
01845         uchar *buf = alignPtr(_buf + blocksize*cn, 16);
01846 
01847         if( ldepth != sdepth && sdepth < CV_32S )
01848         {
01849             int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16);
01850             int* iubuf = ilbuf + cn;
01851 
01852             BinaryFunc sccvtfunc = getConvertFunc(ldepth, CV_32S);
01853             sccvtfunc(lscalar.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0);
01854             sccvtfunc(uscalar.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0);
01855             int minval = cvRound(getMinVal(sdepth)), maxval = cvRound(getMaxVal(sdepth));
01856 
01857             for( int k = 0; k < cn; k++ )
01858             {
01859                 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval )
01860                     ilbuf[k] = minval+1, iubuf[k] = minval;
01861             }
01862             lscalar = Mat(cn, 1, CV_32S, ilbuf);
01863             uscalar = Mat(cn, 1, CV_32S, iubuf);
01864         }
01865 
01866         lscalar.convertTo(lscalar, stype);
01867         uscalar.convertTo(uscalar, stype);
01868     }
01869     else
01870     {
01871         lscalaru = _lowerb.getUMat();
01872         uscalaru = _upperb.getUMat();
01873     }
01874 
01875     ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
01876             dstarg = ocl::KernelArg::WriteOnly(dst, 1, colsPerWI);
01877 
01878     if (haveScalar)
01879     {
01880         lscalar.copyTo(lscalaru);
01881         uscalar.copyTo(uscalaru);
01882 
01883         ker.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(lscalaru),
01884                ocl::KernelArg::PtrReadOnly(uscalaru), rowsPerWI);
01885     }
01886     else
01887         ker.args(srcarg, dstarg, ocl::KernelArg::ReadOnlyNoSize(lscalaru),
01888                ocl::KernelArg::ReadOnlyNoSize(uscalaru), rowsPerWI);
01889 
01890     size_t globalsize[2] = { (size_t)ssize.width / colsPerWI, ((size_t)ssize.height + rowsPerWI - 1) / rowsPerWI };
01891     return ker.run(2, globalsize, NULL, false);
01892 }
01893 
01894 #endif
01895 
01896 }
01897 
01898 void cv::inRange(InputArray _src, InputArray _lowerb,
01899                  InputArray _upperb, OutputArray _dst)
01900 {
01901 #ifdef HAVE_OPENCL
01902     CV_OCL_RUN(_src.dims() <= 2 && _lowerb.dims() <= 2 &&
01903                _upperb.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
01904                ocl_inRange(_src, _lowerb, _upperb, _dst))
01905 #endif
01906 
01907     int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
01908     Mat src = _src.getMat(), lb = _lowerb.getMat(), ub = _upperb.getMat();
01909 
01910     bool lbScalar = false, ubScalar = false;
01911 
01912     if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) ||
01913         src.size != lb.size || src.type() != lb.type() )
01914     {
01915         if( !checkScalar(lb, src.type(), lkind, skind) )
01916             CV_Error( CV_StsUnmatchedSizes,
01917                      "The lower bounary is neither an array of the same size and same type as src, nor a scalar");
01918         lbScalar = true;
01919     }
01920 
01921     if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) ||
01922         src.size != ub.size || src.type() != ub.type() )
01923     {
01924         if( !checkScalar(ub, src.type(), ukind, skind) )
01925             CV_Error( CV_StsUnmatchedSizes,
01926                      "The upper bounary is neither an array of the same size and same type as src, nor a scalar");
01927         ubScalar = true;
01928     }
01929 
01930     CV_Assert(lbScalar == ubScalar);
01931 
01932     int cn = src.channels(), depth = src.depth();
01933 
01934     size_t esz = src.elemSize();
01935     size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
01936 
01937     _dst.create(src.dims, src.size, CV_8UC1);
01938     Mat dst = _dst.getMat();
01939     InRangeFunc func = getInRangeFunc(depth);
01940 
01941     const Mat* arrays_sc[] = { &src, &dst, 0 };
01942     const Mat* arrays_nosc[] = { &src, &dst, &lb, &ub, 0 };
01943     uchar* ptrs[4];
01944 
01945     NAryMatIterator it(lbScalar && ubScalar ? arrays_sc : arrays_nosc, ptrs);
01946     size_t total = it.size, blocksize = std::min(total, blocksize0);
01947 
01948     AutoBuffer<uchar>  _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
01949     uchar *buf = _buf, *mbuf = buf, *lbuf = 0, *ubuf = 0;
01950     buf = alignPtr(buf + blocksize*cn, 16);
01951 
01952     if( lbScalar && ubScalar )
01953     {
01954         lbuf = buf;
01955         ubuf = buf = alignPtr(buf + blocksize*esz, 16);
01956 
01957         CV_Assert( lb.type() == ub.type() );
01958         int scdepth = lb.depth();
01959 
01960         if( scdepth != depth && depth < CV_32S )
01961         {
01962             int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16);
01963             int* iubuf = ilbuf + cn;
01964 
01965             BinaryFunc sccvtfunc = getConvertFunc(scdepth, CV_32S);
01966             sccvtfunc(lb.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0);
01967             sccvtfunc(ub.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0);
01968             int minval = cvRound(getMinVal(depth)), maxval = cvRound(getMaxVal(depth));
01969 
01970             for( int k = 0; k < cn; k++ )
01971             {
01972                 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval )
01973                     ilbuf[k] = minval+1, iubuf[k] = minval;
01974             }
01975             lb = Mat(cn, 1, CV_32S, ilbuf);
01976             ub = Mat(cn, 1, CV_32S, iubuf);
01977         }
01978 
01979         convertAndUnrollScalar( lb, src.type(), lbuf, blocksize );
01980         convertAndUnrollScalar( ub, src.type(), ubuf, blocksize );
01981     }
01982 
01983     for( size_t i = 0; i < it.nplanes; i++, ++it )
01984     {
01985         for( size_t j = 0; j < total; j += blocksize )
01986         {
01987             int bsz = (int)MIN(total - j, blocksize);
01988             size_t delta = bsz*esz;
01989             uchar *lptr = lbuf, *uptr = ubuf;
01990             if( !lbScalar )
01991             {
01992                 lptr = ptrs[2];
01993                 ptrs[2] += delta;
01994             }
01995             if( !ubScalar )
01996             {
01997                 int idx = !lbScalar ? 3 : 2;
01998                 uptr = ptrs[idx];
01999                 ptrs[idx] += delta;
02000             }
02001             func( ptrs[0], 0, lptr, 0, uptr, 0, cn == 1 ? ptrs[1] : mbuf, 0, Size(bsz*cn, 1));
02002             if( cn > 1 )
02003                 inRangeReduce(mbuf, ptrs[1], bsz, cn);
02004             ptrs[0] += delta;
02005             ptrs[1] += bsz;
02006         }
02007     }
02008 }
02009 
02010 /****************************************************************************************\
02011 *                                Earlier API: cvAdd etc.                                 *
02012 \****************************************************************************************/
02013 
02014 CV_IMPL void
02015 cvNot( const CvArr* srcarr, CvArr* dstarr )
02016 {
02017     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
02018     CV_Assert( src.size == dst.size && src.type() == dst.type() );
02019     cv::bitwise_not( src, dst );
02020 }
02021 
02022 
02023 CV_IMPL void
02024 cvAnd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02025 {
02026     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02027         dst = cv::cvarrToMat(dstarr), mask;
02028     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02029     if( maskarr )
02030         mask = cv::cvarrToMat(maskarr);
02031     cv::bitwise_and( src1, src2, dst, mask );
02032 }
02033 
02034 
02035 CV_IMPL void
02036 cvOr( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02037 {
02038     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02039         dst = cv::cvarrToMat(dstarr), mask;
02040     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02041     if( maskarr )
02042         mask = cv::cvarrToMat(maskarr);
02043     cv::bitwise_or( src1, src2, dst, mask );
02044 }
02045 
02046 
02047 CV_IMPL void
02048 cvXor( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02049 {
02050     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02051         dst = cv::cvarrToMat(dstarr), mask;
02052     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02053     if( maskarr )
02054         mask = cv::cvarrToMat(maskarr);
02055     cv::bitwise_xor( src1, src2, dst, mask );
02056 }
02057 
02058 
02059 CV_IMPL void
02060 cvAndS( const CvArr* srcarr, CvScalar  s, CvArr* dstarr, const CvArr* maskarr )
02061 {
02062     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
02063     CV_Assert( src.size == dst.size && src.type() == dst.type() );
02064     if( maskarr )
02065         mask = cv::cvarrToMat(maskarr);
02066     cv::bitwise_and( src, (const cv::Scalar &)s, dst, mask );
02067 }
02068 
02069 
02070 CV_IMPL void
02071 cvOrS( const CvArr* srcarr, CvScalar  s, CvArr* dstarr, const CvArr* maskarr )
02072 {
02073     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
02074     CV_Assert( src.size == dst.size && src.type() == dst.type() );
02075     if( maskarr )
02076         mask = cv::cvarrToMat(maskarr);
02077     cv::bitwise_or( src, (const cv::Scalar &)s, dst, mask );
02078 }
02079 
02080 
02081 CV_IMPL void
02082 cvXorS( const CvArr* srcarr, CvScalar  s, CvArr* dstarr, const CvArr* maskarr )
02083 {
02084     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask;
02085     CV_Assert( src.size == dst.size && src.type() == dst.type() );
02086     if( maskarr )
02087         mask = cv::cvarrToMat(maskarr);
02088     cv::bitwise_xor( src, (const cv::Scalar &)s, dst, mask );
02089 }
02090 
02091 
02092 CV_IMPL void cvAdd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02093 {
02094     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02095         dst = cv::cvarrToMat(dstarr), mask;
02096     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02097     if( maskarr )
02098         mask = cv::cvarrToMat(maskarr);
02099     cv::add( src1, src2, dst, mask, dst.type() );
02100 }
02101 
02102 
02103 CV_IMPL void cvSub( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr )
02104 {
02105     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02106         dst = cv::cvarrToMat(dstarr), mask;
02107     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02108     if( maskarr )
02109         mask = cv::cvarrToMat(maskarr);
02110     cv::subtract( src1, src2, dst, mask, dst.type() );
02111 }
02112 
02113 
02114 CV_IMPL void cvAddS( const CvArr* srcarr1, CvScalar  value, CvArr* dstarr, const CvArr* maskarr )
02115 {
02116     cv::Mat src1 = cv::cvarrToMat(srcarr1),
02117         dst = cv::cvarrToMat(dstarr), mask;
02118     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02119     if( maskarr )
02120         mask = cv::cvarrToMat(maskarr);
02121     cv::add( src1, (const cv::Scalar &)value, dst, mask, dst.type() );
02122 }
02123 
02124 
02125 CV_IMPL void cvSubRS( const CvArr* srcarr1, CvScalar  value, CvArr* dstarr, const CvArr* maskarr )
02126 {
02127     cv::Mat src1 = cv::cvarrToMat(srcarr1),
02128         dst = cv::cvarrToMat(dstarr), mask;
02129     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02130     if( maskarr )
02131         mask = cv::cvarrToMat(maskarr);
02132     cv::subtract( (const cv::Scalar &)value, src1, dst, mask, dst.type() );
02133 }
02134 
02135 
02136 CV_IMPL void cvMul( const CvArr* srcarr1, const CvArr* srcarr2,
02137                     CvArr* dstarr, double scale )
02138 {
02139     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02140         dst = cv::cvarrToMat(dstarr);
02141     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02142     cv::multiply( src1, src2, dst, scale, dst.type() );
02143 }
02144 
02145 
02146 CV_IMPL void cvDiv( const CvArr* srcarr1, const CvArr* srcarr2,
02147                     CvArr* dstarr, double scale )
02148 {
02149     cv::Mat src2 = cv::cvarrToMat(srcarr2),
02150         dst = cv::cvarrToMat(dstarr), mask;
02151     CV_Assert( src2.size == dst.size && src2.channels() == dst.channels() );
02152 
02153     if( srcarr1 )
02154         cv::divide( cv::cvarrToMat(srcarr1), src2, dst, scale, dst.type() );
02155     else
02156         cv::divide( scale, src2, dst, dst.type() );
02157 }
02158 
02159 
02160 CV_IMPL void
02161 cvAddWeighted( const CvArr* srcarr1, double alpha,
02162                const CvArr* srcarr2, double beta,
02163                double gamma, CvArr* dstarr )
02164 {
02165     cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2),
02166         dst = cv::cvarrToMat(dstarr);
02167     CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() );
02168     cv::addWeighted( src1, alpha, src2, beta, gamma, dst, dst.type() );
02169 }
02170 
02171 
02172 CV_IMPL  void
02173 cvAbsDiff( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr )
02174 {
02175     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02176     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02177 
02178     cv::absdiff( src1, cv::cvarrToMat(srcarr2), dst );
02179 }
02180 
02181 
02182 CV_IMPL void
02183 cvAbsDiffS( const CvArr* srcarr1, CvArr* dstarr, CvScalar  scalar )
02184 {
02185     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02186     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02187 
02188     cv::absdiff( src1, (const cv::Scalar &)scalar, dst );
02189 }
02190 
02191 
02192 CV_IMPL void
02193 cvInRange( const void* srcarr1, const void* srcarr2,
02194            const void* srcarr3, void* dstarr )
02195 {
02196     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02197     CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
02198 
02199     cv::inRange( src1, cv::cvarrToMat(srcarr2), cv::cvarrToMat(srcarr3), dst );
02200 }
02201 
02202 
02203 CV_IMPL void
02204 cvInRangeS( const void* srcarr1, CvScalar  lowerb, CvScalar  upperb, void* dstarr )
02205 {
02206     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02207     CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
02208 
02209     cv::inRange( src1, (const cv::Scalar &)lowerb, (const cv::Scalar &)upperb, dst );
02210 }
02211 
02212 
02213 CV_IMPL void
02214 cvCmp( const void* srcarr1, const void* srcarr2, void* dstarr, int cmp_op )
02215 {
02216     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02217     CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
02218 
02219     cv::compare( src1, cv::cvarrToMat(srcarr2), dst, cmp_op );
02220 }
02221 
02222 
02223 CV_IMPL void
02224 cvCmpS( const void* srcarr1, double value, void* dstarr, int cmp_op )
02225 {
02226     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02227     CV_Assert( src1.size == dst.size && dst.type() == CV_8U );
02228 
02229     cv::compare( src1, value, dst, cmp_op );
02230 }
02231 
02232 
02233 CV_IMPL void
02234 cvMin( const void* srcarr1, const void* srcarr2, void* dstarr )
02235 {
02236     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02237     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02238 
02239     cv::min( src1, cv::cvarrToMat(srcarr2), dst );
02240 }
02241 
02242 
02243 CV_IMPL void
02244 cvMax( const void* srcarr1, const void* srcarr2, void* dstarr )
02245 {
02246     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02247     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02248 
02249     cv::max( src1, cv::cvarrToMat(srcarr2), dst );
02250 }
02251 
02252 
02253 CV_IMPL void
02254 cvMinS( const void* srcarr1, double value, void* dstarr )
02255 {
02256     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02257     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02258 
02259     cv::min( src1, value, dst );
02260 }
02261 
02262 
02263 CV_IMPL void
02264 cvMaxS( const void* srcarr1, double value, void* dstarr )
02265 {
02266     cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr);
02267     CV_Assert( src1.size == dst.size && src1.type() == dst.type() );
02268 
02269     cv::max( src1, value, dst );
02270 }
02271 
02272 
02273 
02274 namespace cv { namespace hal {
02275 
02276 //=======================================
02277 
02278 #if (ARITHM_USE_IPP == 1)
02279 static inline void fixSteps(int width, int height, size_t elemSize, size_t& step1, size_t& step2, size_t& step)
02280 {
02281     if( height == 1 )
02282         step1 = step2 = step = width*elemSize;
02283 }
02284 #define CALL_IPP_BIN_E_12(fun) \
02285     CV_IPP_CHECK() \
02286     { \
02287         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02288         if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \
02289         { \
02290             CV_IMPL_ADD(CV_IMPL_IPP); \
02291             return; \
02292         } \
02293         setIppErrorStatus(); \
02294     }
02295 
02296 #define CALL_IPP_BIN_E_21(fun) \
02297     CV_IPP_CHECK() \
02298     { \
02299         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02300         if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \
02301         { \
02302             CV_IMPL_ADD(CV_IMPL_IPP); \
02303             return; \
02304         } \
02305         setIppErrorStatus(); \
02306     }
02307 
02308 #define CALL_IPP_BIN_12(fun) \
02309     CV_IPP_CHECK() \
02310     { \
02311         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02312         if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height))) \
02313         { \
02314             CV_IMPL_ADD(CV_IMPL_IPP); \
02315             return; \
02316         } \
02317         setIppErrorStatus(); \
02318     }
02319 
02320 #define CALL_IPP_BIN_21(fun) \
02321     CV_IPP_CHECK() \
02322     { \
02323         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02324         if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height))) \
02325         { \
02326             CV_IMPL_ADD(CV_IMPL_IPP); \
02327             return; \
02328         } \
02329         setIppErrorStatus(); \
02330     }
02331 
02332 #else
02333 #define CALL_IPP_BIN_E_12(fun)
02334 #define CALL_IPP_BIN_E_21(fun)
02335 #define CALL_IPP_BIN_12(fun)
02336 #define CALL_IPP_BIN_21(fun)
02337 #endif
02338 
02339 
02340 //=======================================
02341 // Add
02342 //=======================================
02343 
02344 void add8u( const uchar* src1, size_t step1,
02345                    const uchar* src2, size_t step2,
02346                    uchar* dst, size_t step, int width, int height, void* )
02347 {
02348     CALL_HAL(add8u, cv_hal_add8u, src1, step1, src2, step2, dst, step, width, height)
02349     CALL_IPP_BIN_E_12(ippiAdd_8u_C1RSfs)
02350     (vBinOp<uchar, cv::OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02351 }
02352 
02353 void add8s( const schar* src1, size_t step1,
02354                    const schar* src2, size_t step2,
02355                    schar* dst, size_t step, int width, int height, void* )
02356 {
02357     CALL_HAL(add8s, cv_hal_add8s, src1, step1, src2, step2, dst, step, width, height)
02358     vBinOp<schar, cv::OpAdd<schar>, IF_SIMD(VAdd<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02359 }
02360 
02361 void add16u( const ushort* src1, size_t step1,
02362                     const ushort* src2, size_t step2,
02363                     ushort* dst, size_t step, int width, int height, void* )
02364 {
02365     CALL_HAL(add16u, cv_hal_add16u, src1, step1, src2, step2, dst, step, width, height)
02366     CALL_IPP_BIN_E_12(ippiAdd_16u_C1RSfs)
02367     (vBinOp<ushort, cv::OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
02368 }
02369 
02370 void add16s( const short* src1, size_t step1,
02371                     const short* src2, size_t step2,
02372                     short* dst, size_t step, int width, int height, void* )
02373 {
02374     CALL_HAL(add16s, cv_hal_add16s, src1, step1, src2, step2, dst, step, width, height)
02375     CALL_IPP_BIN_E_12(ippiAdd_16s_C1RSfs)
02376     (vBinOp<short, cv::OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, width, height));
02377 }
02378 
02379 void add32s( const int* src1, size_t step1,
02380                     const int* src2, size_t step2,
02381                     int* dst, size_t step, int width, int height, void* )
02382 {
02383     CALL_HAL(add32s, cv_hal_add32s, src1, step1, src2, step2, dst, step, width, height)
02384     vBinOp32<int, cv::OpAdd<int>, IF_SIMD(VAdd<int>)>(src1, step1, src2, step2, dst, step, width, height);
02385 }
02386 
02387 void add32f( const float* src1, size_t step1,
02388                     const float* src2, size_t step2,
02389                     float* dst, size_t step, int width, int height, void* )
02390 {
02391     CALL_HAL(add32f, cv_hal_add32f, src1, step1, src2, step2, dst, step, width, height)
02392     CALL_IPP_BIN_12(ippiAdd_32f_C1R)
02393     (vBinOp32<float, cv::OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, width, height));
02394 }
02395 
02396 void add64f( const double* src1, size_t step1,
02397                     const double* src2, size_t step2,
02398                     double* dst, size_t step, int width, int height, void* )
02399 {
02400     CALL_HAL(add64f, cv_hal_add64f, src1, step1, src2, step2, dst, step, width, height)
02401     vBinOp64<double, cv::OpAdd<double>, IF_SIMD(VAdd<double>)>(src1, step1, src2, step2, dst, step, width, height);
02402 }
02403 
02404 //=======================================
02405 // Subtract
02406 //=======================================
02407 
02408 void sub8u( const uchar* src1, size_t step1,
02409                    const uchar* src2, size_t step2,
02410                    uchar* dst, size_t step, int width, int height, void* )
02411 {
02412     CALL_HAL(sub8u, cv_hal_sub8u, src1, step1, src2, step2, dst, step, width, height)
02413     CALL_IPP_BIN_E_21(ippiSub_8u_C1RSfs)
02414     (vBinOp<uchar, cv::OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02415 }
02416 
02417 void sub8s( const schar* src1, size_t step1,
02418                    const schar* src2, size_t step2,
02419                    schar* dst, size_t step, int width, int height, void* )
02420 {
02421     CALL_HAL(sub8s, cv_hal_sub8s, src1, step1, src2, step2, dst, step, width, height)
02422     vBinOp<schar, cv::OpSub<schar>, IF_SIMD(VSub<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02423 }
02424 
02425 void sub16u( const ushort* src1, size_t step1,
02426                     const ushort* src2, size_t step2,
02427                     ushort* dst, size_t step, int width, int height, void* )
02428 {
02429     CALL_HAL(sub16u, cv_hal_sub16u, src1, step1, src2, step2, dst, step, width, height)
02430     CALL_IPP_BIN_E_21(ippiSub_16u_C1RSfs)
02431     (vBinOp<ushort, cv::OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
02432 }
02433 
02434 void sub16s( const short* src1, size_t step1,
02435                     const short* src2, size_t step2,
02436                     short* dst, size_t step, int width, int height, void* )
02437 {
02438     CALL_HAL(sub16s, cv_hal_sub16s, src1, step1, src2, step2, dst, step, width, height)
02439     CALL_IPP_BIN_E_21(ippiSub_16s_C1RSfs)
02440     (vBinOp<short, cv::OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, width, height));
02441 }
02442 
02443 void sub32s( const int* src1, size_t step1,
02444                     const int* src2, size_t step2,
02445                     int* dst, size_t step, int width, int height, void* )
02446 {
02447     CALL_HAL(sub32s, cv_hal_sub32s, src1, step1, src2, step2, dst, step, width, height)
02448     vBinOp32<int, cv::OpSub<int>, IF_SIMD(VSub<int>)>(src1, step1, src2, step2, dst, step, width, height);
02449 }
02450 
02451 void sub32f( const float* src1, size_t step1,
02452                    const float* src2, size_t step2,
02453                    float* dst, size_t step, int width, int height, void* )
02454 {
02455     CALL_HAL(sub32f, cv_hal_sub32f, src1, step1, src2, step2, dst, step, width, height)
02456     CALL_IPP_BIN_21(ippiSub_32f_C1R)
02457     (vBinOp32<float, cv::OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, width, height));
02458 }
02459 
02460 void sub64f( const double* src1, size_t step1,
02461                     const double* src2, size_t step2,
02462                     double* dst, size_t step, int width, int height, void* )
02463 {
02464     CALL_HAL(sub64f, cv_hal_sub64f, src1, step1, src2, step2, dst, step, width, height)
02465     vBinOp64<double, cv::OpSub<double>, IF_SIMD(VSub<double>)>(src1, step1, src2, step2, dst, step, width, height);
02466 }
02467 
02468 //=======================================
02469 
02470 #if (ARITHM_USE_IPP == 1)
02471 #define CALL_IPP_MIN_MAX(fun, type) \
02472     CV_IPP_CHECK() \
02473     { \
02474         type* s1 = (type*)src1; \
02475         type* s2 = (type*)src2; \
02476         type* d  = dst; \
02477         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02478         int i = 0; \
02479         for(; i < height; i++) \
02480         { \
02481             if (0 > fun(s1, s2, d, width)) \
02482                 break; \
02483             s1 = (type*)((uchar*)s1 + step1); \
02484             s2 = (type*)((uchar*)s2 + step2); \
02485             d  = (type*)((uchar*)d + step); \
02486         } \
02487         if (i == height) \
02488         { \
02489             CV_IMPL_ADD(CV_IMPL_IPP); \
02490             return; \
02491         } \
02492         setIppErrorStatus(); \
02493     }
02494 #else
02495 #define CALL_IPP_MIN_MAX(fun, type)
02496 #endif
02497 
02498 //=======================================
02499 // Max
02500 //=======================================
02501 
02502 void max8u( const uchar* src1, size_t step1,
02503                    const uchar* src2, size_t step2,
02504                    uchar* dst, size_t step, int width, int height, void* )
02505 {
02506     CALL_HAL(max8u, cv_hal_max8u, src1, step1, src2, step2, dst, step, width, height)
02507     CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar)
02508     vBinOp<uchar, cv::OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
02509 }
02510 
02511 void max8s( const schar* src1, size_t step1,
02512                    const schar* src2, size_t step2,
02513                    schar* dst, size_t step, int width, int height, void* )
02514 {
02515     CALL_HAL(max8s, cv_hal_max8s, src1, step1, src2, step2, dst, step, width, height)
02516     vBinOp<schar, cv::OpMax<schar>, IF_SIMD(VMax<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02517 }
02518 
02519 void max16u( const ushort* src1, size_t step1,
02520                     const ushort* src2, size_t step2,
02521                     ushort* dst, size_t step, int width, int height, void* )
02522 {
02523     CALL_HAL(max16u, cv_hal_max16u, src1, step1, src2, step2, dst, step, width, height)
02524     CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort)
02525     vBinOp<ushort, cv::OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
02526 }
02527 
02528 void max16s( const short* src1, size_t step1,
02529                     const short* src2, size_t step2,
02530                     short* dst, size_t step, int width, int height, void* )
02531 {
02532     CALL_HAL(max16s, cv_hal_max16s, src1, step1, src2, step2, dst, step, width, height)
02533     vBinOp<short, cv::OpMax<short>, IF_SIMD(VMax<short>)>(src1, step1, src2, step2, dst, step, width, height);
02534 }
02535 
02536 void max32s( const int* src1, size_t step1,
02537                     const int* src2, size_t step2,
02538                     int* dst, size_t step, int width, int height, void* )
02539 {
02540     CALL_HAL(max32s, cv_hal_max32s, src1, step1, src2, step2, dst, step, width, height)
02541     vBinOp32<int, cv::OpMax<int>, IF_SIMD(VMax<int>)>(src1, step1, src2, step2, dst, step, width, height);
02542 }
02543 
02544 void max32f( const float* src1, size_t step1,
02545                     const float* src2, size_t step2,
02546                     float* dst, size_t step, int width, int height, void* )
02547 {
02548     CALL_HAL(max32f, cv_hal_max32f, src1, step1, src2, step2, dst, step, width, height)
02549     CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float)
02550     vBinOp32<float, cv::OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, width, height);
02551 }
02552 
02553 void max64f( const double* src1, size_t step1,
02554                     const double* src2, size_t step2,
02555                     double* dst, size_t step, int width, int height, void* )
02556 {
02557     CALL_HAL(max64f, cv_hal_max64f, src1, step1, src2, step2, dst, step, width, height)
02558     CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double)
02559     vBinOp64<double, cv::OpMax<double>, IF_SIMD(VMax<double>)>(src1, step1, src2, step2, dst, step, width, height);
02560 }
02561 
02562 //=======================================
02563 // Min
02564 //=======================================
02565 
02566 void min8u( const uchar* src1, size_t step1,
02567                    const uchar* src2, size_t step2,
02568                    uchar* dst, size_t step, int width, int height, void* )
02569 {
02570     CALL_HAL(min8u, cv_hal_min8u, src1, step1, src2, step2, dst, step, width, height)
02571     CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar)
02572     vBinOp<uchar, cv::OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, width, height);
02573 }
02574 
02575 void min8s( const schar* src1, size_t step1,
02576                    const schar* src2, size_t step2,
02577                    schar* dst, size_t step, int width, int height, void* )
02578 {
02579     CALL_HAL(min8s, cv_hal_min8s, src1, step1, src2, step2, dst, step, width, height)
02580     vBinOp<schar, cv::OpMin<schar>, IF_SIMD(VMin<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02581 }
02582 
02583 void min16u( const ushort* src1, size_t step1,
02584                     const ushort* src2, size_t step2,
02585                     ushort* dst, size_t step, int width, int height, void* )
02586 {
02587     CALL_HAL(min16u, cv_hal_min16u, src1, step1, src2, step2, dst, step, width, height)
02588     CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort)
02589     vBinOp<ushort, cv::OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, width, height);
02590 }
02591 
02592 void min16s( const short* src1, size_t step1,
02593                     const short* src2, size_t step2,
02594                     short* dst, size_t step, int width, int height, void* )
02595 {
02596     CALL_HAL(min16s, cv_hal_min16s, src1, step1, src2, step2, dst, step, width, height)
02597     vBinOp<short, cv::OpMin<short>, IF_SIMD(VMin<short>)>(src1, step1, src2, step2, dst, step, width, height);
02598 }
02599 
02600 void min32s( const int* src1, size_t step1,
02601                     const int* src2, size_t step2,
02602                     int* dst, size_t step, int width, int height, void* )
02603 {
02604     CALL_HAL(min32s, cv_hal_min32s, src1, step1, src2, step2, dst, step, width, height)
02605     vBinOp32<int, cv::OpMin<int>, IF_SIMD(VMin<int>)>(src1, step1, src2, step2, dst, step, width, height);
02606 }
02607 
02608 void min32f( const float* src1, size_t step1,
02609                     const float* src2, size_t step2,
02610                     float* dst, size_t step, int width, int height, void* )
02611 {
02612     CALL_HAL(min32f, cv_hal_min32f, src1, step1, src2, step2, dst, step, width, height)
02613     CALL_IPP_MIN_MAX(ippsMinEvery_32f, float)
02614     vBinOp32<float, cv::OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, width, height);
02615 }
02616 
02617 void min64f( const double* src1, size_t step1,
02618                     const double* src2, size_t step2,
02619                     double* dst, size_t step, int width, int height, void* )
02620 {
02621     CALL_HAL(min64f, cv_hal_min64f, src1, step1, src2, step2, dst, step, width, height)
02622     CALL_IPP_MIN_MAX(ippsMinEvery_64f, double)
02623     vBinOp64<double, cv::OpMin<double>, IF_SIMD(VMin<double>)>(src1, step1, src2, step2, dst, step, width, height);
02624 }
02625 
02626 //=======================================
02627 // AbsDiff
02628 //=======================================
02629 
02630 void absdiff8u( const uchar* src1, size_t step1,
02631                        const uchar* src2, size_t step2,
02632                        uchar* dst, size_t step, int width, int height, void* )
02633 {
02634     CALL_HAL(absdiff8u, cv_hal_absdiff8u, src1, step1, src2, step2, dst, step, width, height)
02635     CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R)
02636     (vBinOp<uchar, cv::OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02637 }
02638 
02639 void absdiff8s( const schar* src1, size_t step1,
02640                        const schar* src2, size_t step2,
02641                        schar* dst, size_t step, int width, int height, void* )
02642 {
02643     CALL_HAL(absdiff8s, cv_hal_absdiff8s, src1, step1, src2, step2, dst, step, width, height)
02644     vBinOp<schar, cv::OpAbsDiff<schar>, IF_SIMD(VAbsDiff<schar>)>(src1, step1, src2, step2, dst, step, width, height);
02645 }
02646 
02647 void absdiff16u( const ushort* src1, size_t step1,
02648                         const ushort* src2, size_t step2,
02649                         ushort* dst, size_t step, int width, int height, void* )
02650 {
02651     CALL_HAL(absdiff16u, cv_hal_absdiff16u, src1, step1, src2, step2, dst, step, width, height)
02652     CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R)
02653     (vBinOp<ushort, cv::OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, width, height));
02654 }
02655 
02656 void absdiff16s( const short* src1, size_t step1,
02657                         const short* src2, size_t step2,
02658                         short* dst, size_t step, int width, int height, void* )
02659 {
02660     CALL_HAL(absdiff16s, cv_hal_absdiff16s, src1, step1, src2, step2, dst, step, width, height)
02661     vBinOp<short, cv::OpAbsDiff<short>, IF_SIMD(VAbsDiff<short>)>(src1, step1, src2, step2, dst, step, width, height);
02662 }
02663 
02664 void absdiff32s( const int* src1, size_t step1,
02665                         const int* src2, size_t step2,
02666                         int* dst, size_t step, int width, int height, void* )
02667 {
02668     CALL_HAL(absdiff32s, cv_hal_absdiff32s, src1, step1, src2, step2, dst, step, width, height)
02669     vBinOp32<int, cv::OpAbsDiff<int>, IF_SIMD(VAbsDiff<int>)>(src1, step1, src2, step2, dst, step, width, height);
02670 }
02671 
02672 void absdiff32f( const float* src1, size_t step1,
02673                         const float* src2, size_t step2,
02674                         float* dst, size_t step, int width, int height, void* )
02675 {
02676     CALL_HAL(absdiff32f, cv_hal_absdiff32f, src1, step1, src2, step2, dst, step, width, height)
02677     CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R)
02678     (vBinOp32<float, cv::OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, width, height));
02679 }
02680 
02681 void absdiff64f( const double* src1, size_t step1,
02682                         const double* src2, size_t step2,
02683                         double* dst, size_t step, int width, int height, void* )
02684 {
02685     CALL_HAL(absdiff64f, cv_hal_absdiff64f, src1, step1, src2, step2, dst, step, width, height)
02686     vBinOp64<double, cv::OpAbsDiff<double>, IF_SIMD(VAbsDiff<double>)>(src1, step1, src2, step2, dst, step, width, height);
02687 }
02688 
02689 //=======================================
02690 // Logical
02691 //=======================================
02692 
02693 #if (ARITHM_USE_IPP == 1)
02694 #define CALL_IPP_UN(fun) \
02695     CV_IPP_CHECK() \
02696     { \
02697         fixSteps(width, height, sizeof(dst[0]), step1, step2, step); (void)src2; \
02698         if (0 <= fun(src1, (int)step1, dst, (int)step, ippiSize(width, height))) \
02699         { \
02700             CV_IMPL_ADD(CV_IMPL_IPP); \
02701             return; \
02702         } \
02703         setIppErrorStatus(); \
02704     }
02705 #else
02706 #define CALL_IPP_UN(fun)
02707 #endif
02708 
02709 void and8u( const uchar* src1, size_t step1,
02710                    const uchar* src2, size_t step2,
02711                    uchar* dst, size_t step, int width, int height, void* )
02712 {
02713     CALL_HAL(and8u, cv_hal_and8u, src1, step1, src2, step2, dst, step, width, height)
02714     CALL_IPP_BIN_12(ippiAnd_8u_C1R)
02715     (vBinOp<uchar, cv::OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02716 }
02717 
02718 void or8u( const uchar* src1, size_t step1,
02719                   const uchar* src2, size_t step2,
02720                   uchar* dst, size_t step, int width, int height, void* )
02721 {
02722     CALL_HAL(or8u, cv_hal_or8u, src1, step1, src2, step2, dst, step, width, height)
02723     CALL_IPP_BIN_12(ippiOr_8u_C1R)
02724     (vBinOp<uchar, cv::OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02725 }
02726 
02727 void xor8u( const uchar* src1, size_t step1,
02728                    const uchar* src2, size_t step2,
02729                    uchar* dst, size_t step, int width, int height, void* )
02730 {
02731     CALL_HAL(xor8u, cv_hal_xor8u, src1, step1, src2, step2, dst, step, width, height)
02732     CALL_IPP_BIN_12(ippiXor_8u_C1R)
02733     (vBinOp<uchar, cv::OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02734 }
02735 
02736 void not8u( const uchar* src1, size_t step1,
02737                    const uchar* src2, size_t step2,
02738                    uchar* dst, size_t step, int width, int height, void* )
02739 {
02740     CALL_HAL(not8u, cv_hal_not8u, src1, step1, dst, step, width, height)
02741     CALL_IPP_UN(ippiNot_8u_C1R)
02742     (vBinOp<uchar, cv::OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, width, height));
02743 }
02744 
02745 //=======================================
02746 
02747 #if ARITHM_USE_IPP
02748 inline static IppCmpOp convert_cmp(int _cmpop)
02749 {
02750     return _cmpop == CMP_EQ ? ippCmpEq :
02751         _cmpop == CMP_GT ? ippCmpGreater :
02752         _cmpop == CMP_GE ? ippCmpGreaterEq :
02753         _cmpop == CMP_LT ? ippCmpLess :
02754         _cmpop == CMP_LE ? ippCmpLessEq :
02755         (IppCmpOp)-1;
02756 }
02757 #define CALL_IPP_CMP(fun) \
02758     CV_IPP_CHECK() \
02759     { \
02760         IppCmpOp op = convert_cmp(*(int *)_cmpop); \
02761         if( op  >= 0 ) \
02762         { \
02763             fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \
02764             if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \
02765             { \
02766                 CV_IMPL_ADD(CV_IMPL_IPP); \
02767                 return; \
02768             } \
02769             setIppErrorStatus(); \
02770         } \
02771     }
02772 #else
02773 #define CALL_IPP_CMP(fun)
02774 #endif
02775 
02776 //=======================================
02777 // Compare
02778 //=======================================
02779 
02780 void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2,
02781                   uchar* dst, size_t step, int width, int height, void* _cmpop)
02782 {
02783     CALL_HAL(cmp8u, cv_hal_cmp8u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
02784     CALL_IPP_CMP(ippiCompare_8u_C1R)
02785   //vz optimized  cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
02786     int code = *(int*)_cmpop;
02787     step1 /= sizeof(src1[0]);
02788     step2 /= sizeof(src2[0]);
02789     if( code == CMP_GE || code == CMP_LT )
02790     {
02791         std::swap(src1, src2);
02792         std::swap(step1, step2);
02793         code = code == CMP_GE ? CMP_LE : CMP_GT;
02794     }
02795 
02796     if( code == CMP_GT || code == CMP_LE )
02797     {
02798         int m = code == CMP_GT ? 0 : 255;
02799         for( ; height--; src1 += step1, src2 += step2, dst += step )
02800         {
02801             int x =0;
02802             #if CV_SSE2
02803             if( USE_SSE2 )
02804             {
02805                 __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
02806                 __m128i c128 = _mm_set1_epi8 (-128);
02807                 for( ; x <= width - 16; x += 16 )
02808                 {
02809                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02810                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02811                     // no simd for 8u comparison, that's why we need the trick
02812                     r00 = _mm_sub_epi8(r00,c128);
02813                     r10 = _mm_sub_epi8(r10,c128);
02814 
02815                     r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128);
02816                     _mm_storeu_si128((__m128i*)(dst + x),r00);
02817 
02818                 }
02819             }
02820             #elif CV_NEON
02821             uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
02822 
02823             for( ; x <= width - 16; x += 16 )
02824             {
02825                 vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
02826             }
02827 
02828            #endif
02829 
02830             for( ; x < width; x++ ){
02831                 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
02832             }
02833         }
02834     }
02835     else if( code == CMP_EQ || code == CMP_NE )
02836     {
02837         int m = code == CMP_EQ ? 0 : 255;
02838         for( ; height--; src1 += step1, src2 += step2, dst += step )
02839         {
02840             int x = 0;
02841             #if CV_SSE2
02842             if( USE_SSE2 )
02843             {
02844                 __m128i m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1);
02845                 for( ; x <= width - 16; x += 16 )
02846                 {
02847                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02848                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02849                     r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128);
02850                     _mm_storeu_si128((__m128i*)(dst + x), r00);
02851                 }
02852             }
02853             #elif CV_NEON
02854             uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
02855 
02856             for( ; x <= width - 16; x += 16 )
02857             {
02858                 vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
02859             }
02860            #endif
02861            for( ; x < width; x++ )
02862                 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
02863         }
02864     }
02865 }
02866 
02867 void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2,
02868                   uchar* dst, size_t step, int width, int height, void* _cmpop)
02869 {
02870     CALL_HAL(cmp8s, cv_hal_cmp8s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
02871     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
02872 }
02873 
02874 void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2,
02875                   uchar* dst, size_t step, int width, int height, void* _cmpop)
02876 {
02877     CALL_HAL(cmp16u, cv_hal_cmp16u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
02878     CALL_IPP_CMP(ippiCompare_16u_C1R)
02879     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
02880 }
02881 
02882 void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2,
02883                   uchar* dst, size_t step, int width, int height, void* _cmpop)
02884 {
02885     CALL_HAL(cmp16s, cv_hal_cmp16s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
02886     CALL_IPP_CMP(ippiCompare_16s_C1R)
02887    //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
02888 
02889     int code = *(int*)_cmpop;
02890     step1 /= sizeof(src1[0]);
02891     step2 /= sizeof(src2[0]);
02892     if( code == CMP_GE || code == CMP_LT )
02893     {
02894         std::swap(src1, src2);
02895         std::swap(step1, step2);
02896         code = code == CMP_GE ? CMP_LE : CMP_GT;
02897     }
02898 
02899     if( code == CMP_GT || code == CMP_LE )
02900     {
02901         int m = code == CMP_GT ? 0 : 255;
02902         for( ; height--; src1 += step1, src2 += step2, dst += step )
02903         {
02904             int x =0;
02905             #if CV_SSE2
02906             if( USE_SSE2)
02907             {
02908                 __m128i m128 =  code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
02909                 for( ; x <= width - 16; x += 16 )
02910                 {
02911                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02912                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02913                     r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
02914                     __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
02915                     __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
02916                     r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128);
02917                     r11 = _mm_packs_epi16(r00, r01);
02918                     _mm_storeu_si128((__m128i*)(dst + x), r11);
02919                 }
02920                 if( x <= width-8)
02921                 {
02922                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02923                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02924                     r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128);
02925                     r10 = _mm_packs_epi16(r00, r00);
02926                     _mm_storel_epi64((__m128i*)(dst + x), r10);
02927 
02928                     x += 8;
02929                 }
02930             }
02931             #elif CV_NEON
02932             uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
02933 
02934             for( ; x <= width - 16; x += 16 )
02935             {
02936                 int16x8_t in1 = vld1q_s16(src1 + x);
02937                 int16x8_t in2 = vld1q_s16(src2 + x);
02938                 uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2));
02939 
02940                 in1 = vld1q_s16(src1 + x + 8);
02941                 in2 = vld1q_s16(src2 + x + 8);
02942                 uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2));
02943 
02944                 vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
02945             }
02946             #endif
02947 
02948             for( ; x < width; x++ ){
02949                  dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m);
02950             }
02951         }
02952     }
02953     else if( code == CMP_EQ || code == CMP_NE )
02954     {
02955         int m = code == CMP_EQ ? 0 : 255;
02956         for( ; height--; src1 += step1, src2 += step2, dst += step )
02957         {
02958             int x = 0;
02959             #if CV_SSE2
02960             if( USE_SSE2 )
02961             {
02962                 __m128i m128 =  code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1);
02963                 for( ; x <= width - 16; x += 16 )
02964                 {
02965                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02966                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02967                     r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
02968                     __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8));
02969                     __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8));
02970                     r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128);
02971                     r11 = _mm_packs_epi16(r00, r01);
02972                     _mm_storeu_si128((__m128i*)(dst + x), r11);
02973                 }
02974                 if( x <= width - 8)
02975                 {
02976                     __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x));
02977                     __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x));
02978                     r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128);
02979                     r10 = _mm_packs_epi16(r00, r00);
02980                     _mm_storel_epi64((__m128i*)(dst + x), r10);
02981 
02982                     x += 8;
02983                 }
02984             }
02985             #elif CV_NEON
02986             uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
02987 
02988             for( ; x <= width - 16; x += 16 )
02989             {
02990                 int16x8_t in1 = vld1q_s16(src1 + x);
02991                 int16x8_t in2 = vld1q_s16(src2 + x);
02992                 uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2));
02993 
02994                 in1 = vld1q_s16(src1 + x + 8);
02995                 in2 = vld1q_s16(src2 + x + 8);
02996                 uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2));
02997 
02998                 vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
02999             }
03000             #endif
03001             for( ; x < width; x++ )
03002                 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
03003         }
03004     }
03005 }
03006 
03007 void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2,
03008                    uchar* dst, size_t step, int width, int height, void* _cmpop)
03009 {
03010     CALL_HAL(cmp32s, cv_hal_cmp32s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
03011     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
03012 }
03013 
03014 void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2,
03015                   uchar* dst, size_t step, int width, int height, void* _cmpop)
03016 {
03017     CALL_HAL(cmp32f, cv_hal_cmp32f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
03018     CALL_IPP_CMP(ippiCompare_32f_C1R)
03019     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
03020 }
03021 
03022 void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2,
03023                   uchar* dst, size_t step, int width, int height, void* _cmpop)
03024 {
03025     CALL_HAL(cmp64f, cv_hal_cmp64f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop)
03026     cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop);
03027 }
03028 
03029 //=======================================
03030 
03031 #if defined HAVE_IPP
03032 #define CALL_IPP_MUL(fun) \
03033     CV_IPP_CHECK() \
03034     { \
03035         if (std::fabs(fscale - 1) <= FLT_EPSILON) \
03036         { \
03037             if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \
03038             { \
03039                 CV_IMPL_ADD(CV_IMPL_IPP); \
03040                 return; \
03041             } \
03042             setIppErrorStatus(); \
03043         } \
03044     }
03045 
03046 #define CALL_IPP_MUL_2(fun) \
03047     CV_IPP_CHECK() \
03048     { \
03049         if (std::fabs(fscale - 1) <= FLT_EPSILON) \
03050         { \
03051             if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)) >= 0) \
03052             { \
03053                 CV_IMPL_ADD(CV_IMPL_IPP); \
03054                 return; \
03055             } \
03056             setIppErrorStatus(); \
03057         } \
03058     }
03059 
03060 #else
03061 #define CALL_IPP_MUL(fun)
03062 #define CALL_IPP_MUL_2(fun)
03063 #endif
03064 
03065 //=======================================
03066 // Multilpy
03067 //=======================================
03068 
03069 void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
03070                    uchar* dst, size_t step, int width, int height, void* scale)
03071 {
03072     CALL_HAL(mul8u, cv_hal_mul8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03073     float fscale = (float)*(const double*)scale;
03074     CALL_IPP_MUL(ippiMul_8u_C1RSfs)
03075     mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
03076 }
03077 
03078 void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
03079                    schar* dst, size_t step, int width, int height, void* scale)
03080 {
03081     CALL_HAL(mul8s, cv_hal_mul8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03082     mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale);
03083 }
03084 
03085 void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
03086                     ushort* dst, size_t step, int width, int height, void* scale)
03087 {
03088     CALL_HAL(mul16u, cv_hal_mul16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03089     float fscale = (float)*(const double*)scale;
03090     CALL_IPP_MUL(ippiMul_16u_C1RSfs)
03091     mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
03092 }
03093 
03094 void mul16s( const short* src1, size_t step1, const short* src2, size_t step2,
03095                     short* dst, size_t step, int width, int height, void* scale)
03096 {
03097     CALL_HAL(mul16s, cv_hal_mul16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03098     float fscale = (float)*(const double*)scale;
03099     CALL_IPP_MUL(ippiMul_16s_C1RSfs)
03100     mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
03101 }
03102 
03103 void mul32s( const int* src1, size_t step1, const int* src2, size_t step2,
03104                     int* dst, size_t step, int width, int height, void* scale)
03105 {
03106     CALL_HAL(mul32s, cv_hal_mul32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03107     mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03108 }
03109 
03110 void mul32f( const float* src1, size_t step1, const float* src2, size_t step2,
03111                     float* dst, size_t step, int width, int height, void* scale)
03112 {
03113     CALL_HAL(mul32f, cv_hal_mul32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03114     float fscale = (float)*(const double*)scale;
03115     CALL_IPP_MUL_2(ippiMul_32f_C1R)
03116     mul_(src1, step1, src2, step2, dst, step, width, height, fscale);
03117 }
03118 
03119 void mul64f( const double* src1, size_t step1, const double* src2, size_t step2,
03120                     double* dst, size_t step, int width, int height, void* scale)
03121 {
03122     CALL_HAL(mul64f, cv_hal_mul64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03123     mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03124 }
03125 
03126 //=======================================
03127 // Divide
03128 //=======================================
03129 
03130 void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
03131                    uchar* dst, size_t step, int width, int height, void* scale)
03132 {
03133     CALL_HAL(div8u, cv_hal_div8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03134     if( src1 )
03135         div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03136     else
03137         recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03138 }
03139 
03140 void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
03141                   schar* dst, size_t step, int width, int height, void* scale)
03142 {
03143     CALL_HAL(div8s, cv_hal_div8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03144     div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03145 }
03146 
03147 void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
03148                     ushort* dst, size_t step, int width, int height, void* scale)
03149 {
03150     CALL_HAL(div16u, cv_hal_div16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03151     div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03152 }
03153 
03154 void div16s( const short* src1, size_t step1, const short* src2, size_t step2,
03155                     short* dst, size_t step, int width, int height, void* scale)
03156 {
03157     CALL_HAL(div16s, cv_hal_div16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03158     div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03159 }
03160 
03161 void div32s( const int* src1, size_t step1, const int* src2, size_t step2,
03162                     int* dst, size_t step, int width, int height, void* scale)
03163 {
03164     CALL_HAL(div32s, cv_hal_div32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03165     div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03166 }
03167 
03168 void div32f( const float* src1, size_t step1, const float* src2, size_t step2,
03169                     float* dst, size_t step, int width, int height, void* scale)
03170 {
03171     CALL_HAL(div32f, cv_hal_div32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03172     div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03173 }
03174 
03175 void div64f( const double* src1, size_t step1, const double* src2, size_t step2,
03176                     double* dst, size_t step, int width, int height, void* scale)
03177 {
03178     CALL_HAL(div64f, cv_hal_div64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03179     div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03180 }
03181 
03182 //=======================================
03183 // Reciprocial
03184 //=======================================
03185 
03186 void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2,
03187                   uchar* dst, size_t step, int width, int height, void* scale)
03188 {
03189     CALL_HAL(recip8u, cv_hal_recip8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03190     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03191 }
03192 
03193 void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
03194                   schar* dst, size_t step, int width, int height, void* scale)
03195 {
03196     CALL_HAL(recip8s, cv_hal_recip8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03197     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03198 }
03199 
03200 void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
03201                    ushort* dst, size_t step, int width, int height, void* scale)
03202 {
03203     CALL_HAL(recip16u, cv_hal_recip16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03204     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03205 }
03206 
03207 void recip16s( const short* src1, size_t step1, const short* src2, size_t step2,
03208                    short* dst, size_t step, int width, int height, void* scale)
03209 {
03210     CALL_HAL(recip16s, cv_hal_recip16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03211     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03212 }
03213 
03214 void recip32s( const int* src1, size_t step1, const int* src2, size_t step2,
03215                    int* dst, size_t step, int width, int height, void* scale)
03216 {
03217     CALL_HAL(recip32s, cv_hal_recip32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03218     recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03219 }
03220 
03221 void recip32f( const float* src1, size_t step1, const float* src2, size_t step2,
03222                    float* dst, size_t step, int width, int height, void* scale)
03223 {
03224     CALL_HAL(recip32f, cv_hal_recip32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03225     recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03226 }
03227 
03228 void recip64f( const double* src1, size_t step1, const double* src2, size_t step2,
03229                    double* dst, size_t step, int width, int height, void* scale)
03230 {
03231     CALL_HAL(recip64f, cv_hal_recip64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale)
03232     recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale);
03233 }
03234 
03235 //=======================================
03236 // Add weighted
03237 //=======================================
03238 
03239 void
03240 addWeighted8u( const uchar* src1, size_t step1,
03241                const uchar* src2, size_t step2,
03242                uchar* dst, size_t step, int width, int height,
03243                void* scalars )
03244 {
03245     CALL_HAL(addWeighted8u, cv_hal_addWeighted8u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03246     const double* scalars_ = (const double*)scalars;
03247     float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2];
03248 
03249     for( ; height--; src1 += step1, src2 += step2, dst += step )
03250     {
03251         int x = 0;
03252 
03253 #if CV_SSE2
03254         if( USE_SSE2 )
03255         {
03256             __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma);
03257             __m128i z = _mm_setzero_si128();
03258 
03259             for( ; x <= width - 8; x += 8 )
03260             {
03261                 __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z);
03262                 __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z);
03263 
03264                 __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z));
03265                 __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z));
03266                 __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z));
03267                 __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z));
03268 
03269                 u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4));
03270                 u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4));
03271                 u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4);
03272 
03273                 u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1));
03274                 u = _mm_packus_epi16(u, u);
03275 
03276                 _mm_storel_epi64((__m128i*)(dst + x), u);
03277             }
03278         }
03279 #elif CV_NEON
03280         float32x4_t g = vdupq_n_f32 (gamma);
03281 
03282         for( ; x <= width - 8; x += 8 )
03283         {
03284             uint8x8_t in1 = vld1_u8(src1+x);
03285             uint16x8_t in1_16 = vmovl_u8(in1);
03286             float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16)));
03287             float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16)));
03288 
03289             uint8x8_t in2 = vld1_u8(src2+x);
03290             uint16x8_t in2_16 = vmovl_u8(in2);
03291             float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16)));
03292             float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16)));
03293 
03294             float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta));
03295             float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta));
03296             out_f_l = vaddq_f32(out_f_l, g);
03297             out_f_h = vaddq_f32(out_f_h, g);
03298 
03299             uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l));
03300             uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h));
03301 
03302             uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h);
03303             uint8x8_t out = vqmovn_u16(out_16);
03304 
03305             vst1_u8(dst+x, out);
03306         }
03307 #endif
03308         #if CV_ENABLE_UNROLLED
03309         for( ; x <= width - 4; x += 4 )
03310         {
03311             float t0, t1;
03312             t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
03313             t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma;
03314 
03315             dst[x] = saturate_cast<uchar>(t0);
03316             dst[x+1] = saturate_cast<uchar>(t1);
03317 
03318             t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma;
03319             t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma;
03320 
03321             dst[x+2] = saturate_cast<uchar>(t0);
03322             dst[x+3] = saturate_cast<uchar>(t1);
03323         }
03324         #endif
03325 
03326         for( ; x < width; x++ )
03327         {
03328             float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma;
03329             dst[x] = saturate_cast<uchar>(t0);
03330         }
03331     }
03332 }
03333 
03334 void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2,
03335                            schar* dst, size_t step, int width, int height, void* scalars )
03336 {
03337     CALL_HAL(addWeighted8s, cv_hal_addWeighted8s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03338     addWeighted_<schar, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
03339 }
03340 
03341 void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2,
03342                             ushort* dst, size_t step, int width, int height, void* scalars )
03343 {
03344     CALL_HAL(addWeighted16u, cv_hal_addWeighted16u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03345     addWeighted_<ushort, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
03346 }
03347 
03348 void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2,
03349                             short* dst, size_t step, int width, int height, void* scalars )
03350 {
03351     CALL_HAL(addWeighted16s, cv_hal_addWeighted16s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03352     addWeighted_<short, float>(src1, step1, src2, step2, dst, step, width, height, scalars);
03353 }
03354 
03355 void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2,
03356                             int* dst, size_t step, int width, int height, void* scalars )
03357 {
03358     CALL_HAL(addWeighted32s, cv_hal_addWeighted32s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03359     addWeighted_<int, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
03360 }
03361 
03362 void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2,
03363                             float* dst, size_t step, int width, int height, void* scalars )
03364 {
03365     CALL_HAL(addWeighted32f, cv_hal_addWeighted32f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03366     addWeighted_<float, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
03367 }
03368 
03369 void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2,
03370                             double* dst, size_t step, int width, int height, void* scalars )
03371 {
03372     CALL_HAL(addWeighted64f, cv_hal_addWeighted64f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars)
03373     addWeighted_<double, double>(src1, step1, src2, step2, dst, step, width, height, scalars);
03374 }
03375 
03376 }} // cv::hal::
03377 
03378 /* End of file. */
03379
Repository toolbox

Repository details

Type:	Program
Created:	26 Jul 2017
Imports:	3
Forks:	0
Commits:	168
Dependents:	0
Dependencies:	0
Followers:	9
Important changes to repositories hosted on mbed.com

arithm.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

arithm.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning