Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
arithm.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 /* //////////////////////////////////////////////////////////////////// 00045 // 00046 // Arithmetic and logical operations: +, -, *, /, &, |, ^, ~, abs ... 00047 // 00048 // */ 00049 00050 #include "precomp.hpp" 00051 #include "opencl_kernels_core.hpp" 00052 00053 namespace cv 00054 { 00055 00056 /****************************************************************************************\ 00057 * logical operations * 00058 \****************************************************************************************/ 00059 00060 void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ) 00061 { 00062 int scn = (int)sc.total(), cn = CV_MAT_CN(buftype); 00063 size_t esz = CV_ELEM_SIZE(buftype); 00064 getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); 00065 // unroll the scalar 00066 if( scn < cn ) 00067 { 00068 CV_Assert( scn == 1 ); 00069 size_t esz1 = CV_ELEM_SIZE1(buftype); 00070 for( size_t i = esz1; i < esz; i++ ) 00071 scbuf[i] = scbuf[i - esz1]; 00072 } 00073 for( size_t i = esz; i < blocksize*esz; i++ ) 00074 scbuf[i] = scbuf[i - esz]; 00075 } 00076 00077 00078 enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4, 00079 OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8, 00080 OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14, 00081 OCL_OP_RDIV_SCALE=15 }; 00082 00083 #ifdef HAVE_OPENCL 00084 00085 static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF", 00086 "OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE", 00087 "OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", "OP_RDIV_SCALE", 0 }; 00088 00089 static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst, 00090 InputArray _mask, bool bitwise, int oclop, bool haveScalar ) 00091 { 00092 bool haveMask = !_mask.empty(); 00093 int srctype = _src1.type(); 00094 int srcdepth = CV_MAT_DEPTH(srctype); 00095 int cn = CV_MAT_CN(srctype); 00096 00097 const ocl::Device d = ocl::Device::getDefault(); 00098 bool doubleSupport = d.doubleFPConfig() > 0; 00099 if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) || 00100 (!doubleSupport && srcdepth == CV_64F && !bitwise)) 00101 return false; 00102 00103 char opts[1024]; 00104 int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst); 00105 int scalarcn = kercn == 3 ? 4 : kercn; 00106 int rowsPerWI = d.isIntel() ? 4 : 1; 00107 00108 sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d", 00109 haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop], 00110 bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) : 00111 ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "", 00112 bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) : 00113 ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)), 00114 bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) : 00115 ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)), 00116 kercn, rowsPerWI); 00117 00118 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); 00119 if (k.empty()) 00120 return false; 00121 00122 UMat src1 = _src1.getUMat(), src2; 00123 UMat dst = _dst.getUMat(), mask = _mask.getUMat(); 00124 00125 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn); 00126 ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) : 00127 ocl::KernelArg::WriteOnly(dst, cn, kercn); 00128 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1); 00129 00130 if( haveScalar ) 00131 { 00132 size_t esz = CV_ELEM_SIZE1(srctype)*scalarcn; 00133 double buf[4] = {0,0,0,0}; 00134 00135 if( oclop != OCL_OP_NOT ) 00136 { 00137 Mat src2sc = _src2.getMat(); 00138 convertAndUnrollScalar(src2sc, srctype, (uchar*)buf, 1); 00139 } 00140 00141 ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz); 00142 00143 if( !haveMask ) 00144 k.args(src1arg, dstarg, scalararg); 00145 else 00146 k.args(src1arg, maskarg, dstarg, scalararg); 00147 } 00148 else 00149 { 00150 src2 = _src2.getUMat(); 00151 ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn); 00152 00153 if( !haveMask ) 00154 k.args(src1arg, src2arg, dstarg); 00155 else 00156 k.args(src1arg, src2arg, maskarg, dstarg); 00157 } 00158 00159 size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI }; 00160 return k.run(2, globalsize, 0, false); 00161 } 00162 00163 #endif 00164 00165 static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst, 00166 InputArray _mask, const BinaryFuncC* tab, 00167 bool bitwise, int oclop ) 00168 { 00169 const _InputArray *psrc1 = &_src1, *psrc2 = &_src2; 00170 int kind1 = psrc1->kind(), kind2 = psrc2->kind(); 00171 int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); 00172 int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2); 00173 int dims1 = psrc1->dims(), dims2 = psrc2->dims(); 00174 Size sz1 = dims1 <= 2 ? psrc1->size() : Size(); 00175 Size sz2 = dims2 <= 2 ? psrc2->size() : Size(); 00176 #ifdef HAVE_OPENCL 00177 bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) && 00178 dims1 <= 2 && dims2 <= 2; 00179 #endif 00180 bool haveMask = !_mask.empty(), haveScalar = false; 00181 BinaryFuncC func; 00182 00183 if( dims1 <= 2 && dims2 <= 2 && kind1 == kind2 && sz1 == sz2 && type1 == type2 && !haveMask ) 00184 { 00185 _dst.create(sz1, type1); 00186 #ifdef HAVE_OPENCL 00187 CV_OCL_RUN(use_opencl, 00188 ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, false)) 00189 #endif 00190 00191 if( bitwise ) 00192 { 00193 func = *tab; 00194 cn = (int)CV_ELEM_SIZE(type1); 00195 } 00196 else 00197 func = tab[depth1]; 00198 00199 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); 00200 Size sz = getContinuousSize(src1, src2, dst); 00201 size_t len = sz.width*(size_t)cn; 00202 if( len == (size_t)(int)len ) 00203 { 00204 sz.width = (int)len; 00205 func(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, 0); 00206 return; 00207 } 00208 } 00209 00210 if( oclop == OCL_OP_NOT ) 00211 haveScalar = true; 00212 else if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 || 00213 !psrc1->sameSize(*psrc2) || type1 != type2 ) 00214 { 00215 if( checkScalar(*psrc1, type2, kind1, kind2) ) 00216 { 00217 // src1 is a scalar; swap it with src2 00218 swap(psrc1, psrc2); 00219 swap(type1, type2); 00220 swap(depth1, depth2); 00221 swap(cn, cn2); 00222 swap(sz1, sz2); 00223 } 00224 else if( !checkScalar(*psrc2, type1, kind2, kind1) ) 00225 CV_Error( CV_StsUnmatchedSizes, 00226 "The operation is neither 'array op array' (where arrays have the same size and type), " 00227 "nor 'array op scalar', nor 'scalar op array'" ); 00228 haveScalar = true; 00229 } 00230 else 00231 { 00232 CV_Assert( psrc1->sameSize(*psrc2) && type1 == type2 ); 00233 } 00234 00235 size_t esz = CV_ELEM_SIZE(type1); 00236 size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz; 00237 BinaryFunc copymask = 0; 00238 bool reallocate = false; 00239 00240 if( haveMask ) 00241 { 00242 int mtype = _mask.type(); 00243 CV_Assert( (mtype == CV_8U || mtype == CV_8S) && _mask.sameSize(*psrc1)); 00244 copymask = getCopyMaskFunc(esz); 00245 reallocate = !_dst.sameSize(*psrc1) || _dst.type() != type1; 00246 } 00247 00248 AutoBuffer<uchar> _buf; 00249 uchar *scbuf = 0, *maskbuf = 0; 00250 00251 _dst.createSameSize(*psrc1, type1); 00252 // if this is mask operation and dst has been reallocated, 00253 // we have to clear the destination 00254 if( haveMask && reallocate ) 00255 _dst.setTo(0.); 00256 #ifdef HAVE_OPENCL 00257 CV_OCL_RUN(use_opencl, 00258 ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, haveScalar)) 00259 #endif 00260 00261 00262 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(); 00263 Mat dst = _dst.getMat(), mask = _mask.getMat(); 00264 00265 if( bitwise ) 00266 { 00267 func = *tab; 00268 cn = (int)esz; 00269 } 00270 else 00271 func = tab[depth1]; 00272 00273 if( !haveScalar ) 00274 { 00275 const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 }; 00276 uchar* ptrs[4]; 00277 00278 NAryMatIterator it(arrays, ptrs); 00279 size_t total = it.size, blocksize = total; 00280 00281 if( blocksize*cn > INT_MAX ) 00282 blocksize = INT_MAX/cn; 00283 00284 if( haveMask ) 00285 { 00286 blocksize = std::min(blocksize, blocksize0); 00287 _buf.allocate(blocksize*esz); 00288 maskbuf = _buf; 00289 } 00290 00291 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00292 { 00293 for( size_t j = 0; j < total; j += blocksize ) 00294 { 00295 int bsz = (int)MIN(total - j, blocksize); 00296 00297 func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, bsz*cn, 1, 0 ); 00298 if( haveMask ) 00299 { 00300 copymask( maskbuf, 0, ptrs[3], 0, ptrs[2], 0, Size(bsz, 1), &esz ); 00301 ptrs[3] += bsz; 00302 } 00303 00304 bsz *= (int)esz; 00305 ptrs[0] += bsz; ptrs[1] += bsz; ptrs[2] += bsz; 00306 } 00307 } 00308 } 00309 else 00310 { 00311 const Mat* arrays[] = { &src1, &dst, &mask, 0 }; 00312 uchar* ptrs[3]; 00313 00314 NAryMatIterator it(arrays, ptrs); 00315 size_t total = it.size, blocksize = std::min(total, blocksize0); 00316 00317 _buf.allocate(blocksize*(haveMask ? 2 : 1)*esz + 32); 00318 scbuf = _buf; 00319 maskbuf = alignPtr(scbuf + blocksize*esz, 16); 00320 00321 convertAndUnrollScalar( src2, src1.type(), scbuf, blocksize); 00322 00323 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00324 { 00325 for( size_t j = 0; j < total; j += blocksize ) 00326 { 00327 int bsz = (int)MIN(total - j, blocksize); 00328 00329 func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, bsz*cn, 1, 0 ); 00330 if( haveMask ) 00331 { 00332 copymask( maskbuf, 0, ptrs[2], 0, ptrs[1], 0, Size(bsz, 1), &esz ); 00333 ptrs[2] += bsz; 00334 } 00335 00336 bsz *= (int)esz; 00337 ptrs[0] += bsz; ptrs[1] += bsz; 00338 } 00339 } 00340 } 00341 } 00342 00343 static BinaryFuncC* getMaxTab() 00344 { 00345 static BinaryFuncC maxTab[] = 00346 { 00347 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8s), 00348 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16s), 00349 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32s), 00350 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32f), (BinaryFuncC)cv::hal::max64f, 00351 0 00352 }; 00353 00354 return maxTab; 00355 } 00356 00357 static BinaryFuncC* getMinTab() 00358 { 00359 static BinaryFuncC minTab[] = 00360 { 00361 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8s), 00362 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16s), 00363 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32s), 00364 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32f), (BinaryFuncC)cv::hal::min64f, 00365 0 00366 }; 00367 00368 return minTab; 00369 } 00370 00371 } 00372 00373 void cv::bitwise_and(InputArray a, InputArray b, OutputArray c, InputArray mask) 00374 { 00375 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::and8u); 00376 binary_op(a, b, c, mask, &f, true, OCL_OP_AND); 00377 } 00378 00379 void cv::bitwise_or(InputArray a, InputArray b, OutputArray c, InputArray mask) 00380 { 00381 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::or8u); 00382 binary_op(a, b, c, mask, &f, true, OCL_OP_OR); 00383 } 00384 00385 void cv::bitwise_xor(InputArray a, InputArray b, OutputArray c, InputArray mask) 00386 { 00387 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::xor8u); 00388 binary_op(a, b, c, mask, &f, true, OCL_OP_XOR); 00389 } 00390 00391 void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask) 00392 { 00393 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::not8u); 00394 binary_op(a, a, c, mask, &f, true, OCL_OP_NOT); 00395 } 00396 00397 void cv::max( InputArray src1, InputArray src2, OutputArray dst ) 00398 { 00399 binary_op(src1, src2, dst, noArray(), getMaxTab(), false, OCL_OP_MAX ); 00400 } 00401 00402 void cv::min( InputArray src1, InputArray src2, OutputArray dst ) 00403 { 00404 binary_op(src1, src2, dst, noArray(), getMinTab(), false, OCL_OP_MIN ); 00405 } 00406 00407 void cv::max(const Mat& src1, const Mat& src2, Mat& dst) 00408 { 00409 OutputArray _dst(dst); 00410 binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX ); 00411 } 00412 00413 void cv::min(const Mat& src1, const Mat& src2, Mat& dst) 00414 { 00415 OutputArray _dst(dst); 00416 binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN ); 00417 } 00418 00419 void cv::max(const UMat & src1, const UMat & src2, UMat & dst) 00420 { 00421 OutputArray _dst(dst); 00422 binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX ); 00423 } 00424 00425 void cv::min(const UMat & src1, const UMat & src2, UMat & dst) 00426 { 00427 OutputArray _dst(dst); 00428 binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN ); 00429 } 00430 00431 00432 /****************************************************************************************\ 00433 * add/subtract * 00434 \****************************************************************************************/ 00435 00436 namespace cv 00437 { 00438 00439 static int actualScalarDepth(const double* data, int len) 00440 { 00441 int i = 0, minval = INT_MAX, maxval = INT_MIN; 00442 for(; i < len; ++i) 00443 { 00444 int ival = cvRound(data[i]); 00445 if( ival != data[i] ) 00446 break; 00447 minval = MIN(minval, ival); 00448 maxval = MAX(maxval, ival); 00449 } 00450 return i < len ? CV_64F : 00451 minval >= 0 && maxval <= (int)UCHAR_MAX ? CV_8U : 00452 minval >= (int)SCHAR_MIN && maxval <= (int)SCHAR_MAX ? CV_8S : 00453 minval >= 0 && maxval <= (int)USHRT_MAX ? CV_16U : 00454 minval >= (int)SHRT_MIN && maxval <= (int)SHRT_MAX ? CV_16S : 00455 CV_32S; 00456 } 00457 00458 #ifdef HAVE_OPENCL 00459 00460 static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, 00461 InputArray _mask, int wtype, 00462 void* usrdata, int oclop, 00463 bool haveScalar ) 00464 { 00465 const ocl::Device d = ocl::Device::getDefault(); 00466 bool doubleSupport = d.doubleFPConfig() > 0; 00467 int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); 00468 bool haveMask = !_mask.empty(); 00469 00470 if ( (haveMask || haveScalar) && cn > 4 ) 00471 return false; 00472 00473 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype)); 00474 if (!doubleSupport) 00475 wdepth = std::min(wdepth, CV_32F); 00476 00477 wtype = CV_MAKETYPE(wdepth, cn); 00478 int type2 = haveScalar ? wtype : _src2.type(), depth2 = CV_MAT_DEPTH(type2); 00479 if (!doubleSupport && (depth2 == CV_64F || depth1 == CV_64F)) 00480 return false; 00481 00482 int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst); 00483 int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = d.isIntel() ? 4 : 1; 00484 00485 char cvtstr[4][32], opts[1024]; 00486 sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s " 00487 "-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s " 00488 "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s", 00489 (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), 00490 oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), 00491 ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), 00492 ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), 00493 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), 00494 ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)), 00495 ocl::typeToStr(wdepth), wdepth, 00496 ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), 00497 ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), 00498 ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]), 00499 doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn, rowsPerWI, 00500 oclop == OCL_OP_ABSDIFF && wdepth == CV_32S && ddepth == wdepth ? 00501 ocl::convertTypeStr(CV_8U, ddepth, kercn, cvtstr[3]) : "noconvert"); 00502 00503 size_t usrdata_esz = CV_ELEM_SIZE(wdepth); 00504 const uchar* usrdata_p = (const uchar*)usrdata; 00505 const double* usrdata_d = (const double*)usrdata; 00506 float usrdata_f[3]; 00507 int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE || 00508 oclop == OCL_OP_RDIV_SCALE || oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0; 00509 if( n > 0 && wdepth == CV_32F ) 00510 { 00511 for( i = 0; i < n; i++ ) 00512 usrdata_f[i] = (float)usrdata_d[i]; 00513 usrdata_p = (const uchar*)usrdata_f; 00514 } 00515 00516 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); 00517 if (k.empty()) 00518 return false; 00519 00520 UMat src1 = _src1.getUMat(), src2; 00521 UMat dst = _dst.getUMat(), mask = _mask.getUMat(); 00522 00523 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn); 00524 ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) : 00525 ocl::KernelArg::WriteOnly(dst, cn, kercn); 00526 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1); 00527 00528 if( haveScalar ) 00529 { 00530 size_t esz = CV_ELEM_SIZE1(wtype)*scalarcn; 00531 double buf[4]={0,0,0,0}; 00532 Mat src2sc = _src2.getMat(); 00533 00534 if( !src2sc.empty() ) 00535 convertAndUnrollScalar(src2sc, wtype, (uchar*)buf, 1); 00536 ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz); 00537 00538 if( !haveMask ) 00539 { 00540 if(n == 0) 00541 k.args(src1arg, dstarg, scalararg); 00542 else if(n == 1) 00543 k.args(src1arg, dstarg, scalararg, 00544 ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz)); 00545 else 00546 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters"); 00547 } 00548 else 00549 k.args(src1arg, maskarg, dstarg, scalararg); 00550 } 00551 else 00552 { 00553 src2 = _src2.getUMat(); 00554 ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn); 00555 00556 if( !haveMask ) 00557 { 00558 if (n == 0) 00559 k.args(src1arg, src2arg, dstarg); 00560 else if (n == 1) 00561 k.args(src1arg, src2arg, dstarg, 00562 ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz)); 00563 else if (n == 3) 00564 k.args(src1arg, src2arg, dstarg, 00565 ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz), 00566 ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz), 00567 ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz)); 00568 else 00569 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters"); 00570 } 00571 else 00572 k.args(src1arg, src2arg, maskarg, dstarg); 00573 } 00574 00575 size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI }; 00576 return k.run(2, globalsize, NULL, false); 00577 } 00578 00579 #endif 00580 00581 static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, 00582 InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false, 00583 void* usrdata=0, int oclop=-1 ) 00584 { 00585 const _InputArray *psrc1 = &_src1, *psrc2 = &_src2; 00586 int kind1 = psrc1->kind(), kind2 = psrc2->kind(); 00587 bool haveMask = !_mask.empty(); 00588 bool reallocate = false; 00589 int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); 00590 int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2); 00591 int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims(); 00592 Size sz1 = dims1 <= 2 ? psrc1->size() : Size(); 00593 Size sz2 = dims2 <= 2 ? psrc2->size() : Size(); 00594 #ifdef HAVE_OPENCL 00595 bool use_opencl = OCL_PERFORMANCE_CHECK(_dst.isUMat()) && dims1 <= 2 && dims2 <= 2; 00596 #endif 00597 bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2); 00598 bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1); 00599 00600 if( (kind1 == kind2 || cn == 1) && sz1 == sz2 && dims1 <= 2 && dims2 <= 2 && type1 == type2 && 00601 !haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == depth1)) || 00602 (_dst.fixedType() && _dst.type() == type1)) && 00603 ((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) ) 00604 { 00605 _dst.createSameSize(*psrc1, type1); 00606 #ifdef HAVE_OPENCL 00607 CV_OCL_RUN(use_opencl, 00608 ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, 00609 (!usrdata ? type1 : std::max(depth1, CV_32F)), 00610 usrdata, oclop, false)) 00611 #endif 00612 00613 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); 00614 Size sz = getContinuousSize(src1, src2, dst, src1.channels()); 00615 tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata); 00616 return; 00617 } 00618 00619 bool haveScalar = false, swapped12 = false; 00620 00621 if( dims1 != dims2 || sz1 != sz2 || cn != cn2 || 00622 (kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) || 00623 (kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) ) 00624 { 00625 if( checkScalar(*psrc1, type2, kind1, kind2) ) 00626 { 00627 // src1 is a scalar; swap it with src2 00628 swap(psrc1, psrc2); 00629 swap(sz1, sz2); 00630 swap(type1, type2); 00631 swap(depth1, depth2); 00632 swap(cn, cn2); 00633 swap(dims1, dims2); 00634 swapped12 = true; 00635 if( oclop == OCL_OP_SUB ) 00636 oclop = OCL_OP_RSUB; 00637 if ( oclop == OCL_OP_DIV_SCALE ) 00638 oclop = OCL_OP_RDIV_SCALE; 00639 } 00640 else if( !checkScalar(*psrc2, type1, kind2, kind1) ) 00641 CV_Error( CV_StsUnmatchedSizes, 00642 "The operation is neither 'array op array' " 00643 "(where arrays have the same size and the same number of channels), " 00644 "nor 'array op scalar', nor 'scalar op array'" ); 00645 haveScalar = true; 00646 CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4)); 00647 00648 if (!muldiv) 00649 { 00650 Mat sc = psrc2->getMat(); 00651 depth2 = actualScalarDepth(sc.ptr<double>(), cn); 00652 if( depth2 == CV_64F && (depth1 < CV_32S || depth1 == CV_32F) ) 00653 depth2 = CV_32F; 00654 } 00655 else 00656 depth2 = CV_64F; 00657 } 00658 00659 if( dtype < 0 ) 00660 { 00661 if( _dst.fixedType() ) 00662 dtype = _dst.type(); 00663 else 00664 { 00665 if( !haveScalar && type1 != type2 ) 00666 CV_Error(CV_StsBadArg, 00667 "When the input arrays in add/subtract/multiply/divide functions have different types, " 00668 "the output array type must be explicitly specified"); 00669 dtype = type1; 00670 } 00671 } 00672 dtype = CV_MAT_DEPTH(dtype); 00673 00674 if( depth1 == depth2 && dtype == depth1 ) 00675 wtype = dtype; 00676 else if( !muldiv ) 00677 { 00678 wtype = depth1 <= CV_8S && depth2 <= CV_8S ? CV_16S : 00679 depth1 <= CV_32S && depth2 <= CV_32S ? CV_32S : std::max(depth1, depth2); 00680 wtype = std::max(wtype, dtype); 00681 00682 // when the result of addition should be converted to an integer type, 00683 // and just one of the input arrays is floating-point, it makes sense to convert that input to integer type before the operation, 00684 // instead of converting the other input to floating-point and then converting the operation result back to integers. 00685 if( dtype < CV_32F && (depth1 < CV_32F || depth2 < CV_32F) ) 00686 wtype = CV_32S; 00687 } 00688 else 00689 { 00690 wtype = std::max(depth1, std::max(depth2, CV_32F)); 00691 wtype = std::max(wtype, dtype); 00692 } 00693 00694 dtype = CV_MAKETYPE(dtype, cn); 00695 wtype = CV_MAKETYPE(wtype, cn); 00696 00697 if( haveMask ) 00698 { 00699 int mtype = _mask.type(); 00700 CV_Assert( (mtype == CV_8UC1 || mtype == CV_8SC1) && _mask.sameSize(*psrc1) ); 00701 reallocate = !_dst.sameSize(*psrc1) || _dst.type() != dtype; 00702 } 00703 00704 _dst.createSameSize(*psrc1, dtype); 00705 if( reallocate ) 00706 _dst.setTo(0.); 00707 00708 #ifdef HAVE_OPENCL 00709 CV_OCL_RUN(use_opencl, 00710 ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype, 00711 usrdata, oclop, haveScalar)) 00712 #endif 00713 00714 BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype); 00715 BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype); 00716 BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype); 00717 00718 size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2); 00719 size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype); 00720 size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz; 00721 BinaryFunc copymask = getCopyMaskFunc(dsz); 00722 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 00723 00724 AutoBuffer<uchar> _buf; 00725 uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0; 00726 size_t bufesz = (cvtsrc1 ? wsz : 0) + 00727 (cvtsrc2 || haveScalar ? wsz : 0) + 00728 (cvtdst ? wsz : 0) + 00729 (haveMask ? dsz : 0); 00730 BinaryFuncC func = tab[CV_MAT_DEPTH(wtype)]; 00731 00732 if( !haveScalar ) 00733 { 00734 const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 }; 00735 uchar* ptrs[4]; 00736 00737 NAryMatIterator it(arrays, ptrs); 00738 size_t total = it.size, blocksize = total; 00739 00740 if( haveMask || cvtsrc1 || cvtsrc2 || cvtdst ) 00741 blocksize = std::min(blocksize, blocksize0); 00742 00743 _buf.allocate(bufesz*blocksize + 64); 00744 buf = _buf; 00745 if( cvtsrc1 ) 00746 buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16); 00747 if( cvtsrc2 ) 00748 buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16); 00749 wbuf = maskbuf = buf; 00750 if( cvtdst ) 00751 buf = alignPtr(buf + blocksize*wsz, 16); 00752 if( haveMask ) 00753 maskbuf = buf; 00754 00755 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00756 { 00757 for( size_t j = 0; j < total; j += blocksize ) 00758 { 00759 int bsz = (int)MIN(total - j, blocksize); 00760 Size bszn(bsz*cn, 1); 00761 const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1]; 00762 uchar* dptr = ptrs[2]; 00763 if( cvtsrc1 ) 00764 { 00765 cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); 00766 sptr1 = buf1; 00767 } 00768 if( ptrs[0] == ptrs[1] ) 00769 sptr2 = sptr1; 00770 else if( cvtsrc2 ) 00771 { 00772 cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 ); 00773 sptr2 = buf2; 00774 } 00775 00776 if( !haveMask && !cvtdst ) 00777 func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata ); 00778 else 00779 { 00780 func( sptr1, 1, sptr2, 1, wbuf, 0, bszn.width, bszn.height, usrdata ); 00781 if( !haveMask ) 00782 cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 ); 00783 else if( !cvtdst ) 00784 { 00785 copymask( wbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz ); 00786 ptrs[3] += bsz; 00787 } 00788 else 00789 { 00790 cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 ); 00791 copymask( maskbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz ); 00792 ptrs[3] += bsz; 00793 } 00794 } 00795 ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz; 00796 } 00797 } 00798 } 00799 else 00800 { 00801 const Mat* arrays[] = { &src1, &dst, &mask, 0 }; 00802 uchar* ptrs[3]; 00803 00804 NAryMatIterator it(arrays, ptrs); 00805 size_t total = it.size, blocksize = std::min(total, blocksize0); 00806 00807 _buf.allocate(bufesz*blocksize + 64); 00808 buf = _buf; 00809 if( cvtsrc1 ) 00810 buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16); 00811 buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16); 00812 wbuf = maskbuf = buf; 00813 if( cvtdst ) 00814 buf = alignPtr(buf + blocksize*wsz, 16); 00815 if( haveMask ) 00816 maskbuf = buf; 00817 00818 convertAndUnrollScalar( src2, wtype, buf2, blocksize); 00819 00820 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00821 { 00822 for( size_t j = 0; j < total; j += blocksize ) 00823 { 00824 int bsz = (int)MIN(total - j, blocksize); 00825 Size bszn(bsz*cn, 1); 00826 const uchar *sptr1 = ptrs[0]; 00827 const uchar* sptr2 = buf2; 00828 uchar* dptr = ptrs[1]; 00829 00830 if( cvtsrc1 ) 00831 { 00832 cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); 00833 sptr1 = buf1; 00834 } 00835 00836 if( swapped12 ) 00837 std::swap(sptr1, sptr2); 00838 00839 if( !haveMask && !cvtdst ) 00840 func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata ); 00841 else 00842 { 00843 func( sptr1, 1, sptr2, 1, wbuf, 1, bszn.width, bszn.height, usrdata ); 00844 if( !haveMask ) 00845 cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 ); 00846 else if( !cvtdst ) 00847 { 00848 copymask( wbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz ); 00849 ptrs[2] += bsz; 00850 } 00851 else 00852 { 00853 cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 ); 00854 copymask( maskbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz ); 00855 ptrs[2] += bsz; 00856 } 00857 } 00858 ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz; 00859 } 00860 } 00861 } 00862 } 00863 00864 static BinaryFuncC* getAddTab() 00865 { 00866 static BinaryFuncC addTab[] = 00867 { 00868 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8s), 00869 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16s), 00870 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32s), 00871 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32f), (BinaryFuncC)cv::hal::add64f, 00872 0 00873 }; 00874 00875 return addTab; 00876 } 00877 00878 static BinaryFuncC* getSubTab() 00879 { 00880 static BinaryFuncC subTab[] = 00881 { 00882 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8s), 00883 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16s), 00884 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32s), 00885 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32f), (BinaryFuncC)cv::hal::sub64f, 00886 0 00887 }; 00888 00889 return subTab; 00890 } 00891 00892 static BinaryFuncC* getAbsDiffTab() 00893 { 00894 static BinaryFuncC absDiffTab[] = 00895 { 00896 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8s), 00897 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16s), 00898 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32s), 00899 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32f), (BinaryFuncC)cv::hal::absdiff64f, 00900 0 00901 }; 00902 00903 return absDiffTab; 00904 } 00905 00906 } 00907 00908 void cv::add( InputArray src1, InputArray src2, OutputArray dst, 00909 InputArray mask, int dtype ) 00910 { 00911 arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD ); 00912 } 00913 00914 void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst, 00915 InputArray mask, int dtype ) 00916 { 00917 #ifdef HAVE_TEGRA_OPTIMIZATION 00918 if (tegra::useTegra()) 00919 { 00920 int kind1 = _src1.kind(), kind2 = _src2.kind(); 00921 Mat src1 = _src1.getMat(), src2 = _src2.getMat(); 00922 bool src1Scalar = checkScalar(src1, _src2.type(), kind1, kind2); 00923 bool src2Scalar = checkScalar(src2, _src1.type(), kind2, kind1); 00924 00925 if (!src1Scalar && !src2Scalar && 00926 src1.depth() == CV_8U && src2.type() == src1.type() && 00927 src1.dims == 2 && src2.size() == src1.size() && 00928 mask.empty()) 00929 { 00930 if (dtype < 0) 00931 { 00932 if (_dst.fixedType()) 00933 { 00934 dtype = _dst.depth(); 00935 } 00936 else 00937 { 00938 dtype = src1.depth(); 00939 } 00940 } 00941 00942 dtype = CV_MAT_DEPTH(dtype); 00943 00944 if (!_dst.fixedType() || dtype == _dst.depth()) 00945 { 00946 _dst.create(src1.size(), CV_MAKE_TYPE(dtype, src1.channels())); 00947 00948 if (dtype == CV_16S) 00949 { 00950 Mat dst = _dst.getMat(); 00951 if(tegra::subtract_8u8u16s(src1, src2, dst)) 00952 return; 00953 } 00954 else if (dtype == CV_32F) 00955 { 00956 Mat dst = _dst.getMat(); 00957 if(tegra::subtract_8u8u32f(src1, src2, dst)) 00958 return; 00959 } 00960 else if (dtype == CV_8S) 00961 { 00962 Mat dst = _dst.getMat(); 00963 if(tegra::subtract_8u8u8s(src1, src2, dst)) 00964 return; 00965 } 00966 } 00967 } 00968 } 00969 #endif 00970 arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB ); 00971 } 00972 00973 void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst ) 00974 { 00975 arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF); 00976 } 00977 00978 /****************************************************************************************\ 00979 * multiply/divide * 00980 \****************************************************************************************/ 00981 00982 namespace cv 00983 { 00984 00985 static BinaryFuncC* getMulTab() 00986 { 00987 static BinaryFuncC mulTab[] = 00988 { 00989 (BinaryFuncC)cv::hal::mul8u, (BinaryFuncC)cv::hal::mul8s, (BinaryFuncC)cv::hal::mul16u, 00990 (BinaryFuncC)cv::hal::mul16s, (BinaryFuncC)cv::hal::mul32s, (BinaryFuncC)cv::hal::mul32f, 00991 (BinaryFuncC)cv::hal::mul64f, 0 00992 }; 00993 00994 return mulTab; 00995 } 00996 00997 static BinaryFuncC* getDivTab() 00998 { 00999 static BinaryFuncC divTab[] = 01000 { 01001 (BinaryFuncC)cv::hal::div8u, (BinaryFuncC)cv::hal::div8s, (BinaryFuncC)cv::hal::div16u, 01002 (BinaryFuncC)cv::hal::div16s, (BinaryFuncC)cv::hal::div32s, (BinaryFuncC)cv::hal::div32f, 01003 (BinaryFuncC)cv::hal::div64f, 0 01004 }; 01005 01006 return divTab; 01007 } 01008 01009 static BinaryFuncC* getRecipTab() 01010 { 01011 static BinaryFuncC recipTab[] = 01012 { 01013 (BinaryFuncC)cv::hal::recip8u, (BinaryFuncC)cv::hal::recip8s, (BinaryFuncC)cv::hal::recip16u, 01014 (BinaryFuncC)cv::hal::recip16s, (BinaryFuncC)cv::hal::recip32s, (BinaryFuncC)cv::hal::recip32f, 01015 (BinaryFuncC)cv::hal::recip64f, 0 01016 }; 01017 01018 return recipTab; 01019 } 01020 01021 } 01022 01023 void cv::multiply(InputArray src1, InputArray src2, 01024 OutputArray dst, double scale, int dtype) 01025 { 01026 arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(), 01027 true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE); 01028 } 01029 01030 void cv::divide(InputArray src1, InputArray src2, 01031 OutputArray dst, double scale, int dtype) 01032 { 01033 arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE); 01034 } 01035 01036 void cv::divide(double scale, InputArray src2, 01037 OutputArray dst, int dtype) 01038 { 01039 arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE); 01040 } 01041 01042 /****************************************************************************************\ 01043 * addWeighted * 01044 \****************************************************************************************/ 01045 01046 namespace cv 01047 { 01048 01049 static BinaryFuncC* getAddWeightedTab() 01050 { 01051 static BinaryFuncC addWeightedTab[] = 01052 { 01053 (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16u), 01054 (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted32s), (BinaryFuncC)cv::hal::addWeighted32f, 01055 (BinaryFuncC)cv::hal::addWeighted64f, 0 01056 }; 01057 01058 return addWeightedTab; 01059 } 01060 01061 } 01062 01063 void cv::addWeighted( InputArray src1, double alpha, InputArray src2, 01064 double beta, double gamma, OutputArray dst, int dtype ) 01065 { 01066 double scalars[] = {alpha, beta, gamma}; 01067 arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW); 01068 } 01069 01070 01071 /****************************************************************************************\ 01072 * compare * 01073 \****************************************************************************************/ 01074 01075 namespace cv 01076 { 01077 01078 static BinaryFuncC getCmpFunc(int depth) 01079 { 01080 static BinaryFuncC cmpTab[] = 01081 { 01082 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8s), 01083 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16s), 01084 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32s), 01085 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32f), (BinaryFuncC)cv::hal::cmp64f, 01086 0 01087 }; 01088 01089 return cmpTab[depth]; 01090 } 01091 01092 static double getMinVal(int depth) 01093 { 01094 static const double tab[] = {0, -128, 0, -32768, INT_MIN, -FLT_MAX, -DBL_MAX, 0}; 01095 return tab[depth]; 01096 } 01097 01098 static double getMaxVal(int depth) 01099 { 01100 static const double tab[] = {255, 127, 65535, 32767, INT_MAX, FLT_MAX, DBL_MAX, 0}; 01101 return tab[depth]; 01102 } 01103 01104 #ifdef HAVE_OPENCL 01105 01106 static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op, bool haveScalar) 01107 { 01108 const ocl::Device& dev = ocl::Device::getDefault(); 01109 bool doubleSupport = dev.doubleFPConfig() > 0; 01110 int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1), 01111 type2 = _src2.type(), depth2 = CV_MAT_DEPTH(type2); 01112 01113 if (!doubleSupport && depth1 == CV_64F) 01114 return false; 01115 01116 if (!haveScalar && (!_src1.sameSize(_src2) || type1 != type2)) 01117 return false; 01118 01119 int kercn = haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1; 01120 // Workaround for bug with "?:" operator in AMD OpenCL compiler 01121 if (depth1 >= CV_16U) 01122 kercn = 1; 01123 01124 int scalarcn = kercn == 3 ? 4 : kercn; 01125 const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; 01126 char cvt[40]; 01127 01128 String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d" 01129 " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s" 01130 " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s", 01131 haveScalar ? "UNARY_OP" : "BINARY_OP", 01132 ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)), 01133 ocl::typeToStr(CV_8UC(kercn)), kercn, 01134 ocl::convertTypeStr(depth1, CV_8U, kercn, cvt), 01135 operationMap[op], ocl::typeToStr(depth1), 01136 ocl::typeToStr(depth1), ocl::typeToStr(CV_8U), 01137 ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)), rowsPerWI, 01138 doubleSupport ? " -D DOUBLE_SUPPORT" : ""); 01139 01140 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); 01141 if (k.empty()) 01142 return false; 01143 01144 UMat src1 = _src1.getUMat(); 01145 Size size = src1.size(); 01146 _dst.create(size, CV_8UC(cn)); 01147 UMat dst = _dst.getUMat(); 01148 01149 if (haveScalar) 01150 { 01151 size_t esz = CV_ELEM_SIZE1(type1) * scalarcn; 01152 double buf[4] = { 0, 0, 0, 0 }; 01153 Mat src2 = _src2.getMat(); 01154 01155 if( depth1 > CV_32S ) 01156 convertAndUnrollScalar( src2, depth1, (uchar *)buf, kercn ); 01157 else 01158 { 01159 double fval = 0; 01160 getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar *)&fval, 1, Size(1, 1), 0); 01161 if( fval < getMinVal(depth1) ) 01162 return dst.setTo(Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0)), true; 01163 01164 if( fval > getMaxVal(depth1) ) 01165 return dst.setTo(Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0)), true; 01166 01167 int ival = cvRound(fval); 01168 if( fval != ival ) 01169 { 01170 if( op == CMP_LT || op == CMP_GE ) 01171 ival = cvCeil(fval); 01172 else if( op == CMP_LE || op == CMP_GT ) 01173 ival = cvFloor(fval); 01174 else 01175 return dst.setTo(Scalar::all(op == CMP_NE ? 255 : 0)), true; 01176 } 01177 convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, (uchar *)buf, kercn); 01178 } 01179 01180 ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz); 01181 01182 k.args(ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn), 01183 ocl::KernelArg::WriteOnly(dst, cn, kercn), scalararg); 01184 } 01185 else 01186 { 01187 UMat src2 = _src2.getUMat(); 01188 01189 k.args(ocl::KernelArg::ReadOnlyNoSize(src1), 01190 ocl::KernelArg::ReadOnlyNoSize(src2), 01191 ocl::KernelArg::WriteOnly(dst, cn, kercn)); 01192 } 01193 01194 size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI }; 01195 return k.run(2, globalsize, NULL, false); 01196 } 01197 01198 #endif 01199 01200 } 01201 01202 void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op) 01203 { 01204 CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ || 01205 op == CMP_NE || op == CMP_GE || op == CMP_GT ); 01206 01207 bool haveScalar = false; 01208 01209 if ((_src1.isMatx() + _src2.isMatx()) == 1 01210 || !_src1.sameSize(_src2) 01211 || _src1.type() != _src2.type()) 01212 { 01213 if (checkScalar(_src1, _src2.type(), _src1.kind(), _src2.kind())) 01214 { 01215 op = op == CMP_LT ? CMP_GT : op == CMP_LE ? CMP_GE : 01216 op == CMP_GE ? CMP_LE : op == CMP_GT ? CMP_LT : op; 01217 // src1 is a scalar; swap it with src2 01218 compare(_src2, _src1, _dst, op); 01219 return; 01220 } 01221 else if( !checkScalar(_src2, _src1.type(), _src2.kind(), _src1.kind()) ) 01222 CV_Error( CV_StsUnmatchedSizes, 01223 "The operation is neither 'array op array' (where arrays have the same size and the same type), " 01224 "nor 'array op scalar', nor 'scalar op array'" ); 01225 haveScalar = true; 01226 } 01227 01228 #ifdef HAVE_OPENCL 01229 CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()), 01230 ocl_compare(_src1, _src2, _dst, op, haveScalar)) 01231 #endif 01232 01233 int kind1 = _src1.kind(), kind2 = _src2.kind(); 01234 Mat src1 = _src1.getMat(), src2 = _src2.getMat(); 01235 01236 if( kind1 == kind2 && src1.dims <= 2 && src2.dims <= 2 && src1.size() == src2.size() && src1.type() == src2.type() ) 01237 { 01238 int cn = src1.channels(); 01239 _dst.create(src1.size(), CV_8UC(cn)); 01240 Mat dst = _dst.getMat(); 01241 Size sz = getContinuousSize(src1, src2, dst, src1.channels()); 01242 getCmpFunc(src1.depth())(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, &op); 01243 return; 01244 } 01245 01246 int cn = src1.channels(), depth1 = src1.depth(), depth2 = src2.depth(); 01247 01248 _dst.create(src1.dims, src1.size, CV_8UC(cn)); 01249 src1 = src1.reshape(1); src2 = src2.reshape(1); 01250 Mat dst = _dst.getMat().reshape(1); 01251 01252 size_t esz = src1.elemSize(); 01253 size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz; 01254 BinaryFuncC func = getCmpFunc(depth1); 01255 01256 if( !haveScalar ) 01257 { 01258 const Mat* arrays[] = { &src1, &src2, &dst, 0 }; 01259 uchar* ptrs[3]; 01260 01261 NAryMatIterator it(arrays, ptrs); 01262 size_t total = it.size; 01263 01264 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01265 func( ptrs[0], 0, ptrs[1], 0, ptrs[2], 0, (int)total, 1, &op ); 01266 } 01267 else 01268 { 01269 const Mat* arrays[] = { &src1, &dst, 0 }; 01270 uchar* ptrs[2]; 01271 01272 NAryMatIterator it(arrays, ptrs); 01273 size_t total = it.size, blocksize = std::min(total, blocksize0); 01274 01275 AutoBuffer<uchar> _buf(blocksize*esz); 01276 uchar *buf = _buf; 01277 01278 if( depth1 > CV_32S ) 01279 convertAndUnrollScalar( src2, depth1, buf, blocksize ); 01280 else 01281 { 01282 double fval=0; 01283 getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar*)&fval, 1, Size(1,1), 0); 01284 if( fval < getMinVal(depth1) ) 01285 { 01286 dst = Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0); 01287 return; 01288 } 01289 01290 if( fval > getMaxVal(depth1) ) 01291 { 01292 dst = Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0); 01293 return; 01294 } 01295 01296 int ival = cvRound(fval); 01297 if( fval != ival ) 01298 { 01299 if( op == CMP_LT || op == CMP_GE ) 01300 ival = cvCeil(fval); 01301 else if( op == CMP_LE || op == CMP_GT ) 01302 ival = cvFloor(fval); 01303 else 01304 { 01305 dst = Scalar::all(op == CMP_NE ? 255 : 0); 01306 return; 01307 } 01308 } 01309 convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, buf, blocksize); 01310 } 01311 01312 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01313 { 01314 for( size_t j = 0; j < total; j += blocksize ) 01315 { 01316 int bsz = (int)MIN(total - j, blocksize); 01317 func( ptrs[0], 0, buf, 0, ptrs[1], 0, bsz, 1, &op); 01318 ptrs[0] += bsz*esz; 01319 ptrs[1] += bsz; 01320 } 01321 } 01322 } 01323 } 01324 01325 /****************************************************************************************\ 01326 * inRange * 01327 \****************************************************************************************/ 01328 01329 namespace cv 01330 { 01331 01332 template <typename T> 01333 struct InRange_SIMD 01334 { 01335 int operator () (const T *, const T *, const T *, uchar *, int) const 01336 { 01337 return 0; 01338 } 01339 }; 01340 01341 #if CV_SSE2 01342 01343 template <> 01344 struct InRange_SIMD<uchar> 01345 { 01346 int operator () (const uchar * src1, const uchar * src2, const uchar * src3, 01347 uchar * dst, int len) const 01348 { 01349 int x = 0; 01350 01351 if (USE_SSE2) 01352 { 01353 __m128i v_full = _mm_set1_epi8(-1), v_128 = _mm_set1_epi8(-128); 01354 01355 for ( ; x <= len - 16; x += 16 ) 01356 { 01357 __m128i v_src = _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src1 + x)), v_128); 01358 __m128i v_mask1 = _mm_cmpgt_epi8(_mm_add_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_128), v_src); 01359 __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src3 + x)), v_128)); 01360 _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full)); 01361 } 01362 } 01363 01364 return x; 01365 } 01366 }; 01367 01368 template <> 01369 struct InRange_SIMD<schar> 01370 { 01371 int operator () (const schar * src1, const schar * src2, const schar * src3, 01372 uchar * dst, int len) const 01373 { 01374 int x = 0; 01375 01376 if (USE_SSE2) 01377 { 01378 __m128i v_full = _mm_set1_epi8(-1); 01379 01380 for ( ; x <= len - 16; x += 16 ) 01381 { 01382 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x)); 01383 __m128i v_mask1 = _mm_cmpgt_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src); 01384 __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_loadu_si128((const __m128i *)(src3 + x))); 01385 _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full)); 01386 } 01387 } 01388 01389 return x; 01390 } 01391 }; 01392 01393 template <> 01394 struct InRange_SIMD<ushort> 01395 { 01396 int operator () (const ushort * src1, const ushort * src2, const ushort * src3, 01397 uchar * dst, int len) const 01398 { 01399 int x = 0; 01400 01401 if (USE_SSE2) 01402 { 01403 __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1), v_32768 = _mm_set1_epi16(-32768); 01404 01405 for ( ; x <= len - 8; x += 8 ) 01406 { 01407 __m128i v_src = _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src1 + x)), v_32768); 01408 __m128i v_mask1 = _mm_cmpgt_epi16(_mm_add_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_32768), v_src); 01409 __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src3 + x)), v_32768)); 01410 __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full); 01411 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero)); 01412 } 01413 } 01414 01415 return x; 01416 } 01417 }; 01418 01419 template <> 01420 struct InRange_SIMD<short> 01421 { 01422 int operator () (const short * src1, const short * src2, const short * src3, 01423 uchar * dst, int len) const 01424 { 01425 int x = 0; 01426 01427 if (USE_SSE2) 01428 { 01429 __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1); 01430 01431 for ( ; x <= len - 8; x += 8 ) 01432 { 01433 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x)); 01434 __m128i v_mask1 = _mm_cmpgt_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src); 01435 __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_loadu_si128((const __m128i *)(src3 + x))); 01436 __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full); 01437 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero)); 01438 } 01439 } 01440 01441 return x; 01442 } 01443 }; 01444 01445 template <> 01446 struct InRange_SIMD<int> 01447 { 01448 int operator () (const int * src1, const int * src2, const int * src3, 01449 uchar * dst, int len) const 01450 { 01451 int x = 0; 01452 01453 if (USE_SSE2) 01454 { 01455 __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi32(-1); 01456 01457 for ( ; x <= len - 8; x += 8 ) 01458 { 01459 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x)); 01460 __m128i v_res1 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src), 01461 _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x)))); 01462 01463 v_src = _mm_loadu_si128((const __m128i *)(src1 + x + 4)); 01464 __m128i v_res2 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x + 4)), v_src), 01465 _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x + 4)))); 01466 01467 __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(_mm_andnot_si128(v_res1, v_full), 16), 01468 _mm_srli_epi32(_mm_andnot_si128(v_res2, v_full), 16)); 01469 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero)); 01470 } 01471 } 01472 01473 return x; 01474 } 01475 }; 01476 01477 template <> 01478 struct InRange_SIMD<float> 01479 { 01480 int operator () (const float * src1, const float * src2, const float * src3, 01481 uchar * dst, int len) const 01482 { 01483 int x = 0; 01484 01485 if (USE_SSE2) 01486 { 01487 __m128i v_zero = _mm_setzero_si128(); 01488 01489 for ( ; x <= len - 8; x += 8 ) 01490 { 01491 __m128 v_src = _mm_loadu_ps(src1 + x); 01492 __m128 v_res1 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x), v_src), 01493 _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x))); 01494 01495 v_src = _mm_loadu_ps(src1 + x + 4); 01496 __m128 v_res2 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x + 4), v_src), 01497 _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x + 4))); 01498 01499 __m128i v_res1i = _mm_cvtps_epi32(v_res1), v_res2i = _mm_cvtps_epi32(v_res2); 01500 __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(v_res1i, 16), _mm_srli_epi32(v_res2i, 16)); 01501 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero)); 01502 } 01503 } 01504 01505 return x; 01506 } 01507 }; 01508 01509 #elif CV_NEON 01510 01511 template <> 01512 struct InRange_SIMD<uchar> 01513 { 01514 int operator () (const uchar * src1, const uchar * src2, const uchar * src3, 01515 uchar * dst, int len) const 01516 { 01517 int x = 0; 01518 01519 for ( ; x <= len - 16; x += 16 ) 01520 { 01521 uint8x16_t values = vld1q_u8(src1 + x); 01522 uint8x16_t low = vld1q_u8(src2 + x); 01523 uint8x16_t high = vld1q_u8(src3 + x); 01524 01525 vst1q_u8(dst + x, vandq_u8(vcgeq_u8(values, low), vcgeq_u8(high, values))); 01526 } 01527 return x; 01528 } 01529 }; 01530 01531 template <> 01532 struct InRange_SIMD<schar> 01533 { 01534 int operator () (const schar * src1, const schar * src2, const schar * src3, 01535 uchar * dst, int len) const 01536 { 01537 int x = 0; 01538 01539 for ( ; x <= len - 16; x += 16 ) 01540 { 01541 int8x16_t values = vld1q_s8(src1 + x); 01542 int8x16_t low = vld1q_s8(src2 + x); 01543 int8x16_t high = vld1q_s8(src3 + x); 01544 01545 vst1q_u8(dst + x, vandq_u8(vcgeq_s8(values, low), vcgeq_s8(high, values))); 01546 } 01547 return x; 01548 } 01549 }; 01550 01551 template <> 01552 struct InRange_SIMD<ushort> 01553 { 01554 int operator () (const ushort * src1, const ushort * src2, const ushort * src3, 01555 uchar * dst, int len) const 01556 { 01557 int x = 0; 01558 01559 for ( ; x <= len - 16; x += 16 ) 01560 { 01561 uint16x8_t values = vld1q_u16((const uint16_t*)(src1 + x)); 01562 uint16x8_t low = vld1q_u16((const uint16_t*)(src2 + x)); 01563 uint16x8_t high = vld1q_u16((const uint16_t*)(src3 + x)); 01564 uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values))); 01565 01566 values = vld1q_u16((const uint16_t*)(src1 + x + 8)); 01567 low = vld1q_u16((const uint16_t*)(src2 + x + 8)); 01568 high = vld1q_u16((const uint16_t*)(src3 + x + 8)); 01569 uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values))); 01570 01571 vst1q_u8(dst + x, vcombine_u8(r1, r2)); 01572 } 01573 return x; 01574 } 01575 }; 01576 01577 template <> 01578 struct InRange_SIMD<short> 01579 { 01580 int operator () (const short * src1, const short * src2, const short * src3, 01581 uchar * dst, int len) const 01582 { 01583 int x = 0; 01584 01585 for ( ; x <= len - 16; x += 16 ) 01586 { 01587 int16x8_t values = vld1q_s16((const int16_t*)(src1 + x)); 01588 int16x8_t low = vld1q_s16((const int16_t*)(src2 + x)); 01589 int16x8_t high = vld1q_s16((const int16_t*)(src3 + x)); 01590 uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values))); 01591 01592 values = vld1q_s16((const int16_t*)(src1 + x + 8)); 01593 low = vld1q_s16((const int16_t*)(src2 + x + 8)); 01594 high = vld1q_s16((const int16_t*)(src3 + x + 8)); 01595 uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values))); 01596 01597 vst1q_u8(dst + x, vcombine_u8(r1, r2)); 01598 } 01599 return x; 01600 } 01601 }; 01602 01603 template <> 01604 struct InRange_SIMD<int> 01605 { 01606 int operator () (const int * src1, const int * src2, const int * src3, 01607 uchar * dst, int len) const 01608 { 01609 int x = 0; 01610 01611 for ( ; x <= len - 8; x += 8 ) 01612 { 01613 int32x4_t values = vld1q_s32((const int32_t*)(src1 + x)); 01614 int32x4_t low = vld1q_s32((const int32_t*)(src2 + x)); 01615 int32x4_t high = vld1q_s32((const int32_t*)(src3 + x)); 01616 01617 uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values))); 01618 01619 values = vld1q_s32((const int32_t*)(src1 + x + 4)); 01620 low = vld1q_s32((const int32_t*)(src2 + x + 4)); 01621 high = vld1q_s32((const int32_t*)(src3 + x + 4)); 01622 01623 uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values))); 01624 01625 uint16x8_t res_16 = vcombine_u16(r1, r2); 01626 01627 vst1_u8(dst + x, vmovn_u16(res_16)); 01628 } 01629 return x; 01630 } 01631 }; 01632 01633 template <> 01634 struct InRange_SIMD<float> 01635 { 01636 int operator () (const float * src1, const float * src2, const float * src3, 01637 uchar * dst, int len) const 01638 { 01639 int x = 0; 01640 01641 for ( ; x <= len - 8; x += 8 ) 01642 { 01643 float32x4_t values = vld1q_f32((const float32_t*)(src1 + x)); 01644 float32x4_t low = vld1q_f32((const float32_t*)(src2 + x)); 01645 float32x4_t high = vld1q_f32((const float32_t*)(src3 + x)); 01646 01647 uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values))); 01648 01649 values = vld1q_f32((const float32_t*)(src1 + x + 4)); 01650 low = vld1q_f32((const float32_t*)(src2 + x + 4)); 01651 high = vld1q_f32((const float32_t*)(src3 + x + 4)); 01652 01653 uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values))); 01654 01655 uint16x8_t res_16 = vcombine_u16(r1, r2); 01656 01657 vst1_u8(dst + x, vmovn_u16(res_16)); 01658 } 01659 return x; 01660 } 01661 }; 01662 01663 #endif 01664 01665 template <typename T> 01666 static void inRange_(const T* src1, size_t step1, const T* src2, size_t step2, 01667 const T* src3, size_t step3, uchar* dst, size_t step, 01668 Size size) 01669 { 01670 step1 /= sizeof(src1[0]); 01671 step2 /= sizeof(src2[0]); 01672 step3 /= sizeof(src3[0]); 01673 01674 InRange_SIMD<T> vop; 01675 01676 for( ; size.height--; src1 += step1, src2 += step2, src3 += step3, dst += step ) 01677 { 01678 int x = vop(src1, src2, src3, dst, size.width); 01679 #if CV_ENABLE_UNROLLED 01680 for( ; x <= size.width - 4; x += 4 ) 01681 { 01682 int t0, t1; 01683 t0 = src2[x] <= src1[x] && src1[x] <= src3[x]; 01684 t1 = src2[x+1] <= src1[x+1] && src1[x+1] <= src3[x+1]; 01685 dst[x] = (uchar)-t0; dst[x+1] = (uchar)-t1; 01686 t0 = src2[x+2] <= src1[x+2] && src1[x+2] <= src3[x+2]; 01687 t1 = src2[x+3] <= src1[x+3] && src1[x+3] <= src3[x+3]; 01688 dst[x+2] = (uchar)-t0; dst[x+3] = (uchar)-t1; 01689 } 01690 #endif 01691 for( ; x < size.width; x++ ) 01692 dst[x] = (uchar)-(src2[x] <= src1[x] && src1[x] <= src3[x]); 01693 } 01694 } 01695 01696 01697 static void inRange8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, 01698 const uchar* src3, size_t step3, uchar* dst, size_t step, Size size) 01699 { 01700 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01701 } 01702 01703 static void inRange8s(const schar* src1, size_t step1, const schar* src2, size_t step2, 01704 const schar* src3, size_t step3, uchar* dst, size_t step, Size size) 01705 { 01706 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01707 } 01708 01709 static void inRange16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, 01710 const ushort* src3, size_t step3, uchar* dst, size_t step, Size size) 01711 { 01712 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01713 } 01714 01715 static void inRange16s(const short* src1, size_t step1, const short* src2, size_t step2, 01716 const short* src3, size_t step3, uchar* dst, size_t step, Size size) 01717 { 01718 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01719 } 01720 01721 static void inRange32s(const int* src1, size_t step1, const int* src2, size_t step2, 01722 const int* src3, size_t step3, uchar* dst, size_t step, Size size) 01723 { 01724 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01725 } 01726 01727 static void inRange32f(const float* src1, size_t step1, const float* src2, size_t step2, 01728 const float* src3, size_t step3, uchar* dst, size_t step, Size size) 01729 { 01730 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01731 } 01732 01733 static void inRange64f(const double* src1, size_t step1, const double* src2, size_t step2, 01734 const double* src3, size_t step3, uchar* dst, size_t step, Size size) 01735 { 01736 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01737 } 01738 01739 static void inRangeReduce(const uchar* src, uchar* dst, size_t len, int cn) 01740 { 01741 int k = cn % 4 ? cn % 4 : 4; 01742 size_t i, j; 01743 if( k == 1 ) 01744 for( i = j = 0; i < len; i++, j += cn ) 01745 dst[i] = src[j]; 01746 else if( k == 2 ) 01747 for( i = j = 0; i < len; i++, j += cn ) 01748 dst[i] = src[j] & src[j+1]; 01749 else if( k == 3 ) 01750 for( i = j = 0; i < len; i++, j += cn ) 01751 dst[i] = src[j] & src[j+1] & src[j+2]; 01752 else 01753 for( i = j = 0; i < len; i++, j += cn ) 01754 dst[i] = src[j] & src[j+1] & src[j+2] & src[j+3]; 01755 01756 for( ; k < cn; k += 4 ) 01757 { 01758 for( i = 0, j = k; i < len; i++, j += cn ) 01759 dst[i] &= src[j] & src[j+1] & src[j+2] & src[j+3]; 01760 } 01761 } 01762 01763 typedef void (*InRangeFunc)( const uchar* src1, size_t step1, const uchar* src2, size_t step2, 01764 const uchar* src3, size_t step3, uchar* dst, size_t step, Size sz ); 01765 01766 static InRangeFunc getInRangeFunc(int depth) 01767 { 01768 static InRangeFunc inRangeTab[] = 01769 { 01770 (InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u), 01771 (InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f), 01772 (InRangeFunc)inRange64f, 0 01773 }; 01774 01775 return inRangeTab[depth]; 01776 } 01777 01778 #ifdef HAVE_OPENCL 01779 01780 static bool ocl_inRange( InputArray _src, InputArray _lowerb, 01781 InputArray _upperb, OutputArray _dst ) 01782 { 01783 const ocl::Device & d = ocl::Device::getDefault(); 01784 int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind(); 01785 Size ssize = _src.size(), lsize = _lowerb.size(), usize = _upperb.size(); 01786 int stype = _src.type(), ltype = _lowerb.type(), utype = _upperb.type(); 01787 int sdepth = CV_MAT_DEPTH(stype), ldepth = CV_MAT_DEPTH(ltype), udepth = CV_MAT_DEPTH(utype); 01788 int cn = CV_MAT_CN(stype), rowsPerWI = d.isIntel() ? 4 : 1; 01789 bool lbScalar = false, ubScalar = false; 01790 01791 if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) || 01792 ssize != lsize || stype != ltype ) 01793 { 01794 if( !checkScalar(_lowerb, stype, lkind, skind) ) 01795 CV_Error( CV_StsUnmatchedSizes, 01796 "The lower bounary is neither an array of the same size and same type as src, nor a scalar"); 01797 lbScalar = true; 01798 } 01799 01800 if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) || 01801 ssize != usize || stype != utype ) 01802 { 01803 if( !checkScalar(_upperb, stype, ukind, skind) ) 01804 CV_Error( CV_StsUnmatchedSizes, 01805 "The upper bounary is neither an array of the same size and same type as src, nor a scalar"); 01806 ubScalar = true; 01807 } 01808 01809 if (lbScalar != ubScalar) 01810 return false; 01811 01812 bool doubleSupport = d.doubleFPConfig() > 0, 01813 haveScalar = lbScalar && ubScalar; 01814 01815 if ( (!doubleSupport && sdepth == CV_64F) || 01816 (!haveScalar && (sdepth != ldepth || sdepth != udepth)) ) 01817 return false; 01818 01819 int kercn = haveScalar ? cn : std::max(std::min(ocl::predictOptimalVectorWidth(_src, _lowerb, _upperb, _dst), 4), cn); 01820 if (kercn % cn != 0) 01821 kercn = cn; 01822 int colsPerWI = kercn / cn; 01823 String opts = format("%s-D cn=%d -D srcT=%s -D srcT1=%s -D dstT=%s -D kercn=%d -D depth=%d%s -D colsPerWI=%d", 01824 haveScalar ? "-D HAVE_SCALAR " : "", cn, ocl::typeToStr(CV_MAKE_TYPE(sdepth, kercn)), 01825 ocl::typeToStr(sdepth), ocl::typeToStr(CV_8UC(colsPerWI)), kercn, sdepth, 01826 doubleSupport ? " -D DOUBLE_SUPPORT" : "", colsPerWI); 01827 01828 ocl::Kernel ker("inrange", ocl::core::inrange_oclsrc, opts); 01829 if (ker.empty()) 01830 return false; 01831 01832 _dst.create(ssize, CV_8UC1); 01833 UMat src = _src.getUMat(), dst = _dst.getUMat(), lscalaru, uscalaru; 01834 Mat lscalar, uscalar; 01835 01836 if (lbScalar && ubScalar) 01837 { 01838 lscalar = _lowerb.getMat(); 01839 uscalar = _upperb.getMat(); 01840 01841 size_t esz = src.elemSize(); 01842 size_t blocksize = 36; 01843 01844 AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128); 01845 uchar *buf = alignPtr(_buf + blocksize*cn, 16); 01846 01847 if( ldepth != sdepth && sdepth < CV_32S ) 01848 { 01849 int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16); 01850 int* iubuf = ilbuf + cn; 01851 01852 BinaryFunc sccvtfunc = getConvertFunc(ldepth, CV_32S); 01853 sccvtfunc(lscalar.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0); 01854 sccvtfunc(uscalar.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0); 01855 int minval = cvRound(getMinVal(sdepth)), maxval = cvRound(getMaxVal(sdepth)); 01856 01857 for( int k = 0; k < cn; k++ ) 01858 { 01859 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval ) 01860 ilbuf[k] = minval+1, iubuf[k] = minval; 01861 } 01862 lscalar = Mat(cn, 1, CV_32S, ilbuf); 01863 uscalar = Mat(cn, 1, CV_32S, iubuf); 01864 } 01865 01866 lscalar.convertTo(lscalar, stype); 01867 uscalar.convertTo(uscalar, stype); 01868 } 01869 else 01870 { 01871 lscalaru = _lowerb.getUMat(); 01872 uscalaru = _upperb.getUMat(); 01873 } 01874 01875 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 01876 dstarg = ocl::KernelArg::WriteOnly(dst, 1, colsPerWI); 01877 01878 if (haveScalar) 01879 { 01880 lscalar.copyTo(lscalaru); 01881 uscalar.copyTo(uscalaru); 01882 01883 ker.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(lscalaru), 01884 ocl::KernelArg::PtrReadOnly(uscalaru), rowsPerWI); 01885 } 01886 else 01887 ker.args(srcarg, dstarg, ocl::KernelArg::ReadOnlyNoSize(lscalaru), 01888 ocl::KernelArg::ReadOnlyNoSize(uscalaru), rowsPerWI); 01889 01890 size_t globalsize[2] = { (size_t)ssize.width / colsPerWI, ((size_t)ssize.height + rowsPerWI - 1) / rowsPerWI }; 01891 return ker.run(2, globalsize, NULL, false); 01892 } 01893 01894 #endif 01895 01896 } 01897 01898 void cv::inRange(InputArray _src, InputArray _lowerb, 01899 InputArray _upperb, OutputArray _dst) 01900 { 01901 #ifdef HAVE_OPENCL 01902 CV_OCL_RUN(_src.dims() <= 2 && _lowerb.dims() <= 2 && 01903 _upperb.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()), 01904 ocl_inRange(_src, _lowerb, _upperb, _dst)) 01905 #endif 01906 01907 int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind(); 01908 Mat src = _src.getMat(), lb = _lowerb.getMat(), ub = _upperb.getMat(); 01909 01910 bool lbScalar = false, ubScalar = false; 01911 01912 if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) || 01913 src.size != lb.size || src.type() != lb.type() ) 01914 { 01915 if( !checkScalar(lb, src.type(), lkind, skind) ) 01916 CV_Error( CV_StsUnmatchedSizes, 01917 "The lower bounary is neither an array of the same size and same type as src, nor a scalar"); 01918 lbScalar = true; 01919 } 01920 01921 if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) || 01922 src.size != ub.size || src.type() != ub.type() ) 01923 { 01924 if( !checkScalar(ub, src.type(), ukind, skind) ) 01925 CV_Error( CV_StsUnmatchedSizes, 01926 "The upper bounary is neither an array of the same size and same type as src, nor a scalar"); 01927 ubScalar = true; 01928 } 01929 01930 CV_Assert(lbScalar == ubScalar); 01931 01932 int cn = src.channels(), depth = src.depth(); 01933 01934 size_t esz = src.elemSize(); 01935 size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz; 01936 01937 _dst.create(src.dims, src.size, CV_8UC1); 01938 Mat dst = _dst.getMat(); 01939 InRangeFunc func = getInRangeFunc(depth); 01940 01941 const Mat* arrays_sc[] = { &src, &dst, 0 }; 01942 const Mat* arrays_nosc[] = { &src, &dst, &lb, &ub, 0 }; 01943 uchar* ptrs[4]; 01944 01945 NAryMatIterator it(lbScalar && ubScalar ? arrays_sc : arrays_nosc, ptrs); 01946 size_t total = it.size, blocksize = std::min(total, blocksize0); 01947 01948 AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128); 01949 uchar *buf = _buf, *mbuf = buf, *lbuf = 0, *ubuf = 0; 01950 buf = alignPtr(buf + blocksize*cn, 16); 01951 01952 if( lbScalar && ubScalar ) 01953 { 01954 lbuf = buf; 01955 ubuf = buf = alignPtr(buf + blocksize*esz, 16); 01956 01957 CV_Assert( lb.type() == ub.type() ); 01958 int scdepth = lb.depth(); 01959 01960 if( scdepth != depth && depth < CV_32S ) 01961 { 01962 int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16); 01963 int* iubuf = ilbuf + cn; 01964 01965 BinaryFunc sccvtfunc = getConvertFunc(scdepth, CV_32S); 01966 sccvtfunc(lb.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0); 01967 sccvtfunc(ub.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0); 01968 int minval = cvRound(getMinVal(depth)), maxval = cvRound(getMaxVal(depth)); 01969 01970 for( int k = 0; k < cn; k++ ) 01971 { 01972 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval ) 01973 ilbuf[k] = minval+1, iubuf[k] = minval; 01974 } 01975 lb = Mat(cn, 1, CV_32S, ilbuf); 01976 ub = Mat(cn, 1, CV_32S, iubuf); 01977 } 01978 01979 convertAndUnrollScalar( lb, src.type(), lbuf, blocksize ); 01980 convertAndUnrollScalar( ub, src.type(), ubuf, blocksize ); 01981 } 01982 01983 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01984 { 01985 for( size_t j = 0; j < total; j += blocksize ) 01986 { 01987 int bsz = (int)MIN(total - j, blocksize); 01988 size_t delta = bsz*esz; 01989 uchar *lptr = lbuf, *uptr = ubuf; 01990 if( !lbScalar ) 01991 { 01992 lptr = ptrs[2]; 01993 ptrs[2] += delta; 01994 } 01995 if( !ubScalar ) 01996 { 01997 int idx = !lbScalar ? 3 : 2; 01998 uptr = ptrs[idx]; 01999 ptrs[idx] += delta; 02000 } 02001 func( ptrs[0], 0, lptr, 0, uptr, 0, cn == 1 ? ptrs[1] : mbuf, 0, Size(bsz*cn, 1)); 02002 if( cn > 1 ) 02003 inRangeReduce(mbuf, ptrs[1], bsz, cn); 02004 ptrs[0] += delta; 02005 ptrs[1] += bsz; 02006 } 02007 } 02008 } 02009 02010 /****************************************************************************************\ 02011 * Earlier API: cvAdd etc. * 02012 \****************************************************************************************/ 02013 02014 CV_IMPL void 02015 cvNot( const CvArr* srcarr, CvArr* dstarr ) 02016 { 02017 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 02018 CV_Assert( src.size == dst.size && src.type() == dst.type() ); 02019 cv::bitwise_not( src, dst ); 02020 } 02021 02022 02023 CV_IMPL void 02024 cvAnd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02025 { 02026 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02027 dst = cv::cvarrToMat(dstarr), mask; 02028 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02029 if( maskarr ) 02030 mask = cv::cvarrToMat(maskarr); 02031 cv::bitwise_and( src1, src2, dst, mask ); 02032 } 02033 02034 02035 CV_IMPL void 02036 cvOr( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02037 { 02038 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02039 dst = cv::cvarrToMat(dstarr), mask; 02040 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02041 if( maskarr ) 02042 mask = cv::cvarrToMat(maskarr); 02043 cv::bitwise_or( src1, src2, dst, mask ); 02044 } 02045 02046 02047 CV_IMPL void 02048 cvXor( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02049 { 02050 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02051 dst = cv::cvarrToMat(dstarr), mask; 02052 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02053 if( maskarr ) 02054 mask = cv::cvarrToMat(maskarr); 02055 cv::bitwise_xor( src1, src2, dst, mask ); 02056 } 02057 02058 02059 CV_IMPL void 02060 cvAndS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr ) 02061 { 02062 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask; 02063 CV_Assert( src.size == dst.size && src.type() == dst.type() ); 02064 if( maskarr ) 02065 mask = cv::cvarrToMat(maskarr); 02066 cv::bitwise_and( src, (const cv::Scalar &)s, dst, mask ); 02067 } 02068 02069 02070 CV_IMPL void 02071 cvOrS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr ) 02072 { 02073 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask; 02074 CV_Assert( src.size == dst.size && src.type() == dst.type() ); 02075 if( maskarr ) 02076 mask = cv::cvarrToMat(maskarr); 02077 cv::bitwise_or( src, (const cv::Scalar &)s, dst, mask ); 02078 } 02079 02080 02081 CV_IMPL void 02082 cvXorS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr ) 02083 { 02084 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask; 02085 CV_Assert( src.size == dst.size && src.type() == dst.type() ); 02086 if( maskarr ) 02087 mask = cv::cvarrToMat(maskarr); 02088 cv::bitwise_xor( src, (const cv::Scalar &)s, dst, mask ); 02089 } 02090 02091 02092 CV_IMPL void cvAdd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02093 { 02094 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02095 dst = cv::cvarrToMat(dstarr), mask; 02096 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02097 if( maskarr ) 02098 mask = cv::cvarrToMat(maskarr); 02099 cv::add( src1, src2, dst, mask, dst.type() ); 02100 } 02101 02102 02103 CV_IMPL void cvSub( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02104 { 02105 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02106 dst = cv::cvarrToMat(dstarr), mask; 02107 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02108 if( maskarr ) 02109 mask = cv::cvarrToMat(maskarr); 02110 cv::subtract( src1, src2, dst, mask, dst.type() ); 02111 } 02112 02113 02114 CV_IMPL void cvAddS( const CvArr* srcarr1, CvScalar value, CvArr* dstarr, const CvArr* maskarr ) 02115 { 02116 cv::Mat src1 = cv::cvarrToMat(srcarr1), 02117 dst = cv::cvarrToMat(dstarr), mask; 02118 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02119 if( maskarr ) 02120 mask = cv::cvarrToMat(maskarr); 02121 cv::add( src1, (const cv::Scalar &)value, dst, mask, dst.type() ); 02122 } 02123 02124 02125 CV_IMPL void cvSubRS( const CvArr* srcarr1, CvScalar value, CvArr* dstarr, const CvArr* maskarr ) 02126 { 02127 cv::Mat src1 = cv::cvarrToMat(srcarr1), 02128 dst = cv::cvarrToMat(dstarr), mask; 02129 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02130 if( maskarr ) 02131 mask = cv::cvarrToMat(maskarr); 02132 cv::subtract( (const cv::Scalar &)value, src1, dst, mask, dst.type() ); 02133 } 02134 02135 02136 CV_IMPL void cvMul( const CvArr* srcarr1, const CvArr* srcarr2, 02137 CvArr* dstarr, double scale ) 02138 { 02139 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02140 dst = cv::cvarrToMat(dstarr); 02141 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02142 cv::multiply( src1, src2, dst, scale, dst.type() ); 02143 } 02144 02145 02146 CV_IMPL void cvDiv( const CvArr* srcarr1, const CvArr* srcarr2, 02147 CvArr* dstarr, double scale ) 02148 { 02149 cv::Mat src2 = cv::cvarrToMat(srcarr2), 02150 dst = cv::cvarrToMat(dstarr), mask; 02151 CV_Assert( src2.size == dst.size && src2.channels() == dst.channels() ); 02152 02153 if( srcarr1 ) 02154 cv::divide( cv::cvarrToMat(srcarr1), src2, dst, scale, dst.type() ); 02155 else 02156 cv::divide( scale, src2, dst, dst.type() ); 02157 } 02158 02159 02160 CV_IMPL void 02161 cvAddWeighted( const CvArr* srcarr1, double alpha, 02162 const CvArr* srcarr2, double beta, 02163 double gamma, CvArr* dstarr ) 02164 { 02165 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02166 dst = cv::cvarrToMat(dstarr); 02167 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02168 cv::addWeighted( src1, alpha, src2, beta, gamma, dst, dst.type() ); 02169 } 02170 02171 02172 CV_IMPL void 02173 cvAbsDiff( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr ) 02174 { 02175 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02176 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02177 02178 cv::absdiff( src1, cv::cvarrToMat(srcarr2), dst ); 02179 } 02180 02181 02182 CV_IMPL void 02183 cvAbsDiffS( const CvArr* srcarr1, CvArr* dstarr, CvScalar scalar ) 02184 { 02185 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02186 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02187 02188 cv::absdiff( src1, (const cv::Scalar &)scalar, dst ); 02189 } 02190 02191 02192 CV_IMPL void 02193 cvInRange( const void* srcarr1, const void* srcarr2, 02194 const void* srcarr3, void* dstarr ) 02195 { 02196 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02197 CV_Assert( src1.size == dst.size && dst.type() == CV_8U ); 02198 02199 cv::inRange( src1, cv::cvarrToMat(srcarr2), cv::cvarrToMat(srcarr3), dst ); 02200 } 02201 02202 02203 CV_IMPL void 02204 cvInRangeS( const void* srcarr1, CvScalar lowerb, CvScalar upperb, void* dstarr ) 02205 { 02206 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02207 CV_Assert( src1.size == dst.size && dst.type() == CV_8U ); 02208 02209 cv::inRange( src1, (const cv::Scalar &)lowerb, (const cv::Scalar &)upperb, dst ); 02210 } 02211 02212 02213 CV_IMPL void 02214 cvCmp( const void* srcarr1, const void* srcarr2, void* dstarr, int cmp_op ) 02215 { 02216 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02217 CV_Assert( src1.size == dst.size && dst.type() == CV_8U ); 02218 02219 cv::compare( src1, cv::cvarrToMat(srcarr2), dst, cmp_op ); 02220 } 02221 02222 02223 CV_IMPL void 02224 cvCmpS( const void* srcarr1, double value, void* dstarr, int cmp_op ) 02225 { 02226 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02227 CV_Assert( src1.size == dst.size && dst.type() == CV_8U ); 02228 02229 cv::compare( src1, value, dst, cmp_op ); 02230 } 02231 02232 02233 CV_IMPL void 02234 cvMin( const void* srcarr1, const void* srcarr2, void* dstarr ) 02235 { 02236 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02237 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02238 02239 cv::min( src1, cv::cvarrToMat(srcarr2), dst ); 02240 } 02241 02242 02243 CV_IMPL void 02244 cvMax( const void* srcarr1, const void* srcarr2, void* dstarr ) 02245 { 02246 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02247 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02248 02249 cv::max( src1, cv::cvarrToMat(srcarr2), dst ); 02250 } 02251 02252 02253 CV_IMPL void 02254 cvMinS( const void* srcarr1, double value, void* dstarr ) 02255 { 02256 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02257 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02258 02259 cv::min( src1, value, dst ); 02260 } 02261 02262 02263 CV_IMPL void 02264 cvMaxS( const void* srcarr1, double value, void* dstarr ) 02265 { 02266 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02267 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02268 02269 cv::max( src1, value, dst ); 02270 } 02271 02272 02273 02274 namespace cv { namespace hal { 02275 02276 //======================================= 02277 02278 #if (ARITHM_USE_IPP == 1) 02279 static inline void fixSteps(int width, int height, size_t elemSize, size_t& step1, size_t& step2, size_t& step) 02280 { 02281 if( height == 1 ) 02282 step1 = step2 = step = width*elemSize; 02283 } 02284 #define CALL_IPP_BIN_E_12(fun) \ 02285 CV_IPP_CHECK() \ 02286 { \ 02287 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02288 if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \ 02289 { \ 02290 CV_IMPL_ADD(CV_IMPL_IPP); \ 02291 return; \ 02292 } \ 02293 setIppErrorStatus(); \ 02294 } 02295 02296 #define CALL_IPP_BIN_E_21(fun) \ 02297 CV_IPP_CHECK() \ 02298 { \ 02299 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02300 if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \ 02301 { \ 02302 CV_IMPL_ADD(CV_IMPL_IPP); \ 02303 return; \ 02304 } \ 02305 setIppErrorStatus(); \ 02306 } 02307 02308 #define CALL_IPP_BIN_12(fun) \ 02309 CV_IPP_CHECK() \ 02310 { \ 02311 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02312 if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height))) \ 02313 { \ 02314 CV_IMPL_ADD(CV_IMPL_IPP); \ 02315 return; \ 02316 } \ 02317 setIppErrorStatus(); \ 02318 } 02319 02320 #define CALL_IPP_BIN_21(fun) \ 02321 CV_IPP_CHECK() \ 02322 { \ 02323 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02324 if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height))) \ 02325 { \ 02326 CV_IMPL_ADD(CV_IMPL_IPP); \ 02327 return; \ 02328 } \ 02329 setIppErrorStatus(); \ 02330 } 02331 02332 #else 02333 #define CALL_IPP_BIN_E_12(fun) 02334 #define CALL_IPP_BIN_E_21(fun) 02335 #define CALL_IPP_BIN_12(fun) 02336 #define CALL_IPP_BIN_21(fun) 02337 #endif 02338 02339 02340 //======================================= 02341 // Add 02342 //======================================= 02343 02344 void add8u( const uchar* src1, size_t step1, 02345 const uchar* src2, size_t step2, 02346 uchar* dst, size_t step, int width, int height, void* ) 02347 { 02348 CALL_HAL(add8u, cv_hal_add8u, src1, step1, src2, step2, dst, step, width, height) 02349 CALL_IPP_BIN_E_12(ippiAdd_8u_C1RSfs) 02350 (vBinOp<uchar, cv::OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02351 } 02352 02353 void add8s( const schar* src1, size_t step1, 02354 const schar* src2, size_t step2, 02355 schar* dst, size_t step, int width, int height, void* ) 02356 { 02357 CALL_HAL(add8s, cv_hal_add8s, src1, step1, src2, step2, dst, step, width, height) 02358 vBinOp<schar, cv::OpAdd<schar>, IF_SIMD(VAdd<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02359 } 02360 02361 void add16u( const ushort* src1, size_t step1, 02362 const ushort* src2, size_t step2, 02363 ushort* dst, size_t step, int width, int height, void* ) 02364 { 02365 CALL_HAL(add16u, cv_hal_add16u, src1, step1, src2, step2, dst, step, width, height) 02366 CALL_IPP_BIN_E_12(ippiAdd_16u_C1RSfs) 02367 (vBinOp<ushort, cv::OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, width, height)); 02368 } 02369 02370 void add16s( const short* src1, size_t step1, 02371 const short* src2, size_t step2, 02372 short* dst, size_t step, int width, int height, void* ) 02373 { 02374 CALL_HAL(add16s, cv_hal_add16s, src1, step1, src2, step2, dst, step, width, height) 02375 CALL_IPP_BIN_E_12(ippiAdd_16s_C1RSfs) 02376 (vBinOp<short, cv::OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, width, height)); 02377 } 02378 02379 void add32s( const int* src1, size_t step1, 02380 const int* src2, size_t step2, 02381 int* dst, size_t step, int width, int height, void* ) 02382 { 02383 CALL_HAL(add32s, cv_hal_add32s, src1, step1, src2, step2, dst, step, width, height) 02384 vBinOp32<int, cv::OpAdd<int>, IF_SIMD(VAdd<int>)>(src1, step1, src2, step2, dst, step, width, height); 02385 } 02386 02387 void add32f( const float* src1, size_t step1, 02388 const float* src2, size_t step2, 02389 float* dst, size_t step, int width, int height, void* ) 02390 { 02391 CALL_HAL(add32f, cv_hal_add32f, src1, step1, src2, step2, dst, step, width, height) 02392 CALL_IPP_BIN_12(ippiAdd_32f_C1R) 02393 (vBinOp32<float, cv::OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, width, height)); 02394 } 02395 02396 void add64f( const double* src1, size_t step1, 02397 const double* src2, size_t step2, 02398 double* dst, size_t step, int width, int height, void* ) 02399 { 02400 CALL_HAL(add64f, cv_hal_add64f, src1, step1, src2, step2, dst, step, width, height) 02401 vBinOp64<double, cv::OpAdd<double>, IF_SIMD(VAdd<double>)>(src1, step1, src2, step2, dst, step, width, height); 02402 } 02403 02404 //======================================= 02405 // Subtract 02406 //======================================= 02407 02408 void sub8u( const uchar* src1, size_t step1, 02409 const uchar* src2, size_t step2, 02410 uchar* dst, size_t step, int width, int height, void* ) 02411 { 02412 CALL_HAL(sub8u, cv_hal_sub8u, src1, step1, src2, step2, dst, step, width, height) 02413 CALL_IPP_BIN_E_21(ippiSub_8u_C1RSfs) 02414 (vBinOp<uchar, cv::OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02415 } 02416 02417 void sub8s( const schar* src1, size_t step1, 02418 const schar* src2, size_t step2, 02419 schar* dst, size_t step, int width, int height, void* ) 02420 { 02421 CALL_HAL(sub8s, cv_hal_sub8s, src1, step1, src2, step2, dst, step, width, height) 02422 vBinOp<schar, cv::OpSub<schar>, IF_SIMD(VSub<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02423 } 02424 02425 void sub16u( const ushort* src1, size_t step1, 02426 const ushort* src2, size_t step2, 02427 ushort* dst, size_t step, int width, int height, void* ) 02428 { 02429 CALL_HAL(sub16u, cv_hal_sub16u, src1, step1, src2, step2, dst, step, width, height) 02430 CALL_IPP_BIN_E_21(ippiSub_16u_C1RSfs) 02431 (vBinOp<ushort, cv::OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, width, height)); 02432 } 02433 02434 void sub16s( const short* src1, size_t step1, 02435 const short* src2, size_t step2, 02436 short* dst, size_t step, int width, int height, void* ) 02437 { 02438 CALL_HAL(sub16s, cv_hal_sub16s, src1, step1, src2, step2, dst, step, width, height) 02439 CALL_IPP_BIN_E_21(ippiSub_16s_C1RSfs) 02440 (vBinOp<short, cv::OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, width, height)); 02441 } 02442 02443 void sub32s( const int* src1, size_t step1, 02444 const int* src2, size_t step2, 02445 int* dst, size_t step, int width, int height, void* ) 02446 { 02447 CALL_HAL(sub32s, cv_hal_sub32s, src1, step1, src2, step2, dst, step, width, height) 02448 vBinOp32<int, cv::OpSub<int>, IF_SIMD(VSub<int>)>(src1, step1, src2, step2, dst, step, width, height); 02449 } 02450 02451 void sub32f( const float* src1, size_t step1, 02452 const float* src2, size_t step2, 02453 float* dst, size_t step, int width, int height, void* ) 02454 { 02455 CALL_HAL(sub32f, cv_hal_sub32f, src1, step1, src2, step2, dst, step, width, height) 02456 CALL_IPP_BIN_21(ippiSub_32f_C1R) 02457 (vBinOp32<float, cv::OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, width, height)); 02458 } 02459 02460 void sub64f( const double* src1, size_t step1, 02461 const double* src2, size_t step2, 02462 double* dst, size_t step, int width, int height, void* ) 02463 { 02464 CALL_HAL(sub64f, cv_hal_sub64f, src1, step1, src2, step2, dst, step, width, height) 02465 vBinOp64<double, cv::OpSub<double>, IF_SIMD(VSub<double>)>(src1, step1, src2, step2, dst, step, width, height); 02466 } 02467 02468 //======================================= 02469 02470 #if (ARITHM_USE_IPP == 1) 02471 #define CALL_IPP_MIN_MAX(fun, type) \ 02472 CV_IPP_CHECK() \ 02473 { \ 02474 type* s1 = (type*)src1; \ 02475 type* s2 = (type*)src2; \ 02476 type* d = dst; \ 02477 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02478 int i = 0; \ 02479 for(; i < height; i++) \ 02480 { \ 02481 if (0 > fun(s1, s2, d, width)) \ 02482 break; \ 02483 s1 = (type*)((uchar*)s1 + step1); \ 02484 s2 = (type*)((uchar*)s2 + step2); \ 02485 d = (type*)((uchar*)d + step); \ 02486 } \ 02487 if (i == height) \ 02488 { \ 02489 CV_IMPL_ADD(CV_IMPL_IPP); \ 02490 return; \ 02491 } \ 02492 setIppErrorStatus(); \ 02493 } 02494 #else 02495 #define CALL_IPP_MIN_MAX(fun, type) 02496 #endif 02497 02498 //======================================= 02499 // Max 02500 //======================================= 02501 02502 void max8u( const uchar* src1, size_t step1, 02503 const uchar* src2, size_t step2, 02504 uchar* dst, size_t step, int width, int height, void* ) 02505 { 02506 CALL_HAL(max8u, cv_hal_max8u, src1, step1, src2, step2, dst, step, width, height) 02507 CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar) 02508 vBinOp<uchar, cv::OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, width, height); 02509 } 02510 02511 void max8s( const schar* src1, size_t step1, 02512 const schar* src2, size_t step2, 02513 schar* dst, size_t step, int width, int height, void* ) 02514 { 02515 CALL_HAL(max8s, cv_hal_max8s, src1, step1, src2, step2, dst, step, width, height) 02516 vBinOp<schar, cv::OpMax<schar>, IF_SIMD(VMax<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02517 } 02518 02519 void max16u( const ushort* src1, size_t step1, 02520 const ushort* src2, size_t step2, 02521 ushort* dst, size_t step, int width, int height, void* ) 02522 { 02523 CALL_HAL(max16u, cv_hal_max16u, src1, step1, src2, step2, dst, step, width, height) 02524 CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort) 02525 vBinOp<ushort, cv::OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, width, height); 02526 } 02527 02528 void max16s( const short* src1, size_t step1, 02529 const short* src2, size_t step2, 02530 short* dst, size_t step, int width, int height, void* ) 02531 { 02532 CALL_HAL(max16s, cv_hal_max16s, src1, step1, src2, step2, dst, step, width, height) 02533 vBinOp<short, cv::OpMax<short>, IF_SIMD(VMax<short>)>(src1, step1, src2, step2, dst, step, width, height); 02534 } 02535 02536 void max32s( const int* src1, size_t step1, 02537 const int* src2, size_t step2, 02538 int* dst, size_t step, int width, int height, void* ) 02539 { 02540 CALL_HAL(max32s, cv_hal_max32s, src1, step1, src2, step2, dst, step, width, height) 02541 vBinOp32<int, cv::OpMax<int>, IF_SIMD(VMax<int>)>(src1, step1, src2, step2, dst, step, width, height); 02542 } 02543 02544 void max32f( const float* src1, size_t step1, 02545 const float* src2, size_t step2, 02546 float* dst, size_t step, int width, int height, void* ) 02547 { 02548 CALL_HAL(max32f, cv_hal_max32f, src1, step1, src2, step2, dst, step, width, height) 02549 CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float) 02550 vBinOp32<float, cv::OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, width, height); 02551 } 02552 02553 void max64f( const double* src1, size_t step1, 02554 const double* src2, size_t step2, 02555 double* dst, size_t step, int width, int height, void* ) 02556 { 02557 CALL_HAL(max64f, cv_hal_max64f, src1, step1, src2, step2, dst, step, width, height) 02558 CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double) 02559 vBinOp64<double, cv::OpMax<double>, IF_SIMD(VMax<double>)>(src1, step1, src2, step2, dst, step, width, height); 02560 } 02561 02562 //======================================= 02563 // Min 02564 //======================================= 02565 02566 void min8u( const uchar* src1, size_t step1, 02567 const uchar* src2, size_t step2, 02568 uchar* dst, size_t step, int width, int height, void* ) 02569 { 02570 CALL_HAL(min8u, cv_hal_min8u, src1, step1, src2, step2, dst, step, width, height) 02571 CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar) 02572 vBinOp<uchar, cv::OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, width, height); 02573 } 02574 02575 void min8s( const schar* src1, size_t step1, 02576 const schar* src2, size_t step2, 02577 schar* dst, size_t step, int width, int height, void* ) 02578 { 02579 CALL_HAL(min8s, cv_hal_min8s, src1, step1, src2, step2, dst, step, width, height) 02580 vBinOp<schar, cv::OpMin<schar>, IF_SIMD(VMin<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02581 } 02582 02583 void min16u( const ushort* src1, size_t step1, 02584 const ushort* src2, size_t step2, 02585 ushort* dst, size_t step, int width, int height, void* ) 02586 { 02587 CALL_HAL(min16u, cv_hal_min16u, src1, step1, src2, step2, dst, step, width, height) 02588 CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort) 02589 vBinOp<ushort, cv::OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, width, height); 02590 } 02591 02592 void min16s( const short* src1, size_t step1, 02593 const short* src2, size_t step2, 02594 short* dst, size_t step, int width, int height, void* ) 02595 { 02596 CALL_HAL(min16s, cv_hal_min16s, src1, step1, src2, step2, dst, step, width, height) 02597 vBinOp<short, cv::OpMin<short>, IF_SIMD(VMin<short>)>(src1, step1, src2, step2, dst, step, width, height); 02598 } 02599 02600 void min32s( const int* src1, size_t step1, 02601 const int* src2, size_t step2, 02602 int* dst, size_t step, int width, int height, void* ) 02603 { 02604 CALL_HAL(min32s, cv_hal_min32s, src1, step1, src2, step2, dst, step, width, height) 02605 vBinOp32<int, cv::OpMin<int>, IF_SIMD(VMin<int>)>(src1, step1, src2, step2, dst, step, width, height); 02606 } 02607 02608 void min32f( const float* src1, size_t step1, 02609 const float* src2, size_t step2, 02610 float* dst, size_t step, int width, int height, void* ) 02611 { 02612 CALL_HAL(min32f, cv_hal_min32f, src1, step1, src2, step2, dst, step, width, height) 02613 CALL_IPP_MIN_MAX(ippsMinEvery_32f, float) 02614 vBinOp32<float, cv::OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, width, height); 02615 } 02616 02617 void min64f( const double* src1, size_t step1, 02618 const double* src2, size_t step2, 02619 double* dst, size_t step, int width, int height, void* ) 02620 { 02621 CALL_HAL(min64f, cv_hal_min64f, src1, step1, src2, step2, dst, step, width, height) 02622 CALL_IPP_MIN_MAX(ippsMinEvery_64f, double) 02623 vBinOp64<double, cv::OpMin<double>, IF_SIMD(VMin<double>)>(src1, step1, src2, step2, dst, step, width, height); 02624 } 02625 02626 //======================================= 02627 // AbsDiff 02628 //======================================= 02629 02630 void absdiff8u( const uchar* src1, size_t step1, 02631 const uchar* src2, size_t step2, 02632 uchar* dst, size_t step, int width, int height, void* ) 02633 { 02634 CALL_HAL(absdiff8u, cv_hal_absdiff8u, src1, step1, src2, step2, dst, step, width, height) 02635 CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R) 02636 (vBinOp<uchar, cv::OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02637 } 02638 02639 void absdiff8s( const schar* src1, size_t step1, 02640 const schar* src2, size_t step2, 02641 schar* dst, size_t step, int width, int height, void* ) 02642 { 02643 CALL_HAL(absdiff8s, cv_hal_absdiff8s, src1, step1, src2, step2, dst, step, width, height) 02644 vBinOp<schar, cv::OpAbsDiff<schar>, IF_SIMD(VAbsDiff<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02645 } 02646 02647 void absdiff16u( const ushort* src1, size_t step1, 02648 const ushort* src2, size_t step2, 02649 ushort* dst, size_t step, int width, int height, void* ) 02650 { 02651 CALL_HAL(absdiff16u, cv_hal_absdiff16u, src1, step1, src2, step2, dst, step, width, height) 02652 CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R) 02653 (vBinOp<ushort, cv::OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, width, height)); 02654 } 02655 02656 void absdiff16s( const short* src1, size_t step1, 02657 const short* src2, size_t step2, 02658 short* dst, size_t step, int width, int height, void* ) 02659 { 02660 CALL_HAL(absdiff16s, cv_hal_absdiff16s, src1, step1, src2, step2, dst, step, width, height) 02661 vBinOp<short, cv::OpAbsDiff<short>, IF_SIMD(VAbsDiff<short>)>(src1, step1, src2, step2, dst, step, width, height); 02662 } 02663 02664 void absdiff32s( const int* src1, size_t step1, 02665 const int* src2, size_t step2, 02666 int* dst, size_t step, int width, int height, void* ) 02667 { 02668 CALL_HAL(absdiff32s, cv_hal_absdiff32s, src1, step1, src2, step2, dst, step, width, height) 02669 vBinOp32<int, cv::OpAbsDiff<int>, IF_SIMD(VAbsDiff<int>)>(src1, step1, src2, step2, dst, step, width, height); 02670 } 02671 02672 void absdiff32f( const float* src1, size_t step1, 02673 const float* src2, size_t step2, 02674 float* dst, size_t step, int width, int height, void* ) 02675 { 02676 CALL_HAL(absdiff32f, cv_hal_absdiff32f, src1, step1, src2, step2, dst, step, width, height) 02677 CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R) 02678 (vBinOp32<float, cv::OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, width, height)); 02679 } 02680 02681 void absdiff64f( const double* src1, size_t step1, 02682 const double* src2, size_t step2, 02683 double* dst, size_t step, int width, int height, void* ) 02684 { 02685 CALL_HAL(absdiff64f, cv_hal_absdiff64f, src1, step1, src2, step2, dst, step, width, height) 02686 vBinOp64<double, cv::OpAbsDiff<double>, IF_SIMD(VAbsDiff<double>)>(src1, step1, src2, step2, dst, step, width, height); 02687 } 02688 02689 //======================================= 02690 // Logical 02691 //======================================= 02692 02693 #if (ARITHM_USE_IPP == 1) 02694 #define CALL_IPP_UN(fun) \ 02695 CV_IPP_CHECK() \ 02696 { \ 02697 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); (void)src2; \ 02698 if (0 <= fun(src1, (int)step1, dst, (int)step, ippiSize(width, height))) \ 02699 { \ 02700 CV_IMPL_ADD(CV_IMPL_IPP); \ 02701 return; \ 02702 } \ 02703 setIppErrorStatus(); \ 02704 } 02705 #else 02706 #define CALL_IPP_UN(fun) 02707 #endif 02708 02709 void and8u( const uchar* src1, size_t step1, 02710 const uchar* src2, size_t step2, 02711 uchar* dst, size_t step, int width, int height, void* ) 02712 { 02713 CALL_HAL(and8u, cv_hal_and8u, src1, step1, src2, step2, dst, step, width, height) 02714 CALL_IPP_BIN_12(ippiAnd_8u_C1R) 02715 (vBinOp<uchar, cv::OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02716 } 02717 02718 void or8u( const uchar* src1, size_t step1, 02719 const uchar* src2, size_t step2, 02720 uchar* dst, size_t step, int width, int height, void* ) 02721 { 02722 CALL_HAL(or8u, cv_hal_or8u, src1, step1, src2, step2, dst, step, width, height) 02723 CALL_IPP_BIN_12(ippiOr_8u_C1R) 02724 (vBinOp<uchar, cv::OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02725 } 02726 02727 void xor8u( const uchar* src1, size_t step1, 02728 const uchar* src2, size_t step2, 02729 uchar* dst, size_t step, int width, int height, void* ) 02730 { 02731 CALL_HAL(xor8u, cv_hal_xor8u, src1, step1, src2, step2, dst, step, width, height) 02732 CALL_IPP_BIN_12(ippiXor_8u_C1R) 02733 (vBinOp<uchar, cv::OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02734 } 02735 02736 void not8u( const uchar* src1, size_t step1, 02737 const uchar* src2, size_t step2, 02738 uchar* dst, size_t step, int width, int height, void* ) 02739 { 02740 CALL_HAL(not8u, cv_hal_not8u, src1, step1, dst, step, width, height) 02741 CALL_IPP_UN(ippiNot_8u_C1R) 02742 (vBinOp<uchar, cv::OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02743 } 02744 02745 //======================================= 02746 02747 #if ARITHM_USE_IPP 02748 inline static IppCmpOp convert_cmp(int _cmpop) 02749 { 02750 return _cmpop == CMP_EQ ? ippCmpEq : 02751 _cmpop == CMP_GT ? ippCmpGreater : 02752 _cmpop == CMP_GE ? ippCmpGreaterEq : 02753 _cmpop == CMP_LT ? ippCmpLess : 02754 _cmpop == CMP_LE ? ippCmpLessEq : 02755 (IppCmpOp)-1; 02756 } 02757 #define CALL_IPP_CMP(fun) \ 02758 CV_IPP_CHECK() \ 02759 { \ 02760 IppCmpOp op = convert_cmp(*(int *)_cmpop); \ 02761 if( op >= 0 ) \ 02762 { \ 02763 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02764 if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \ 02765 { \ 02766 CV_IMPL_ADD(CV_IMPL_IPP); \ 02767 return; \ 02768 } \ 02769 setIppErrorStatus(); \ 02770 } \ 02771 } 02772 #else 02773 #define CALL_IPP_CMP(fun) 02774 #endif 02775 02776 //======================================= 02777 // Compare 02778 //======================================= 02779 02780 void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, 02781 uchar* dst, size_t step, int width, int height, void* _cmpop) 02782 { 02783 CALL_HAL(cmp8u, cv_hal_cmp8u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 02784 CALL_IPP_CMP(ippiCompare_8u_C1R) 02785 //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 02786 int code = *(int*)_cmpop; 02787 step1 /= sizeof(src1[0]); 02788 step2 /= sizeof(src2[0]); 02789 if( code == CMP_GE || code == CMP_LT ) 02790 { 02791 std::swap(src1, src2); 02792 std::swap(step1, step2); 02793 code = code == CMP_GE ? CMP_LE : CMP_GT; 02794 } 02795 02796 if( code == CMP_GT || code == CMP_LE ) 02797 { 02798 int m = code == CMP_GT ? 0 : 255; 02799 for( ; height--; src1 += step1, src2 += step2, dst += step ) 02800 { 02801 int x =0; 02802 #if CV_SSE2 02803 if( USE_SSE2 ) 02804 { 02805 __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1); 02806 __m128i c128 = _mm_set1_epi8 (-128); 02807 for( ; x <= width - 16; x += 16 ) 02808 { 02809 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02810 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02811 // no simd for 8u comparison, that's why we need the trick 02812 r00 = _mm_sub_epi8(r00,c128); 02813 r10 = _mm_sub_epi8(r10,c128); 02814 02815 r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128); 02816 _mm_storeu_si128((__m128i*)(dst + x),r00); 02817 02818 } 02819 } 02820 #elif CV_NEON 02821 uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); 02822 02823 for( ; x <= width - 16; x += 16 ) 02824 { 02825 vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); 02826 } 02827 02828 #endif 02829 02830 for( ; x < width; x++ ){ 02831 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); 02832 } 02833 } 02834 } 02835 else if( code == CMP_EQ || code == CMP_NE ) 02836 { 02837 int m = code == CMP_EQ ? 0 : 255; 02838 for( ; height--; src1 += step1, src2 += step2, dst += step ) 02839 { 02840 int x = 0; 02841 #if CV_SSE2 02842 if( USE_SSE2 ) 02843 { 02844 __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1); 02845 for( ; x <= width - 16; x += 16 ) 02846 { 02847 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02848 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02849 r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128); 02850 _mm_storeu_si128((__m128i*)(dst + x), r00); 02851 } 02852 } 02853 #elif CV_NEON 02854 uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); 02855 02856 for( ; x <= width - 16; x += 16 ) 02857 { 02858 vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); 02859 } 02860 #endif 02861 for( ; x < width; x++ ) 02862 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); 02863 } 02864 } 02865 } 02866 02867 void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, 02868 uchar* dst, size_t step, int width, int height, void* _cmpop) 02869 { 02870 CALL_HAL(cmp8s, cv_hal_cmp8s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 02871 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 02872 } 02873 02874 void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, 02875 uchar* dst, size_t step, int width, int height, void* _cmpop) 02876 { 02877 CALL_HAL(cmp16u, cv_hal_cmp16u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 02878 CALL_IPP_CMP(ippiCompare_16u_C1R) 02879 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 02880 } 02881 02882 void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, 02883 uchar* dst, size_t step, int width, int height, void* _cmpop) 02884 { 02885 CALL_HAL(cmp16s, cv_hal_cmp16s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 02886 CALL_IPP_CMP(ippiCompare_16s_C1R) 02887 //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 02888 02889 int code = *(int*)_cmpop; 02890 step1 /= sizeof(src1[0]); 02891 step2 /= sizeof(src2[0]); 02892 if( code == CMP_GE || code == CMP_LT ) 02893 { 02894 std::swap(src1, src2); 02895 std::swap(step1, step2); 02896 code = code == CMP_GE ? CMP_LE : CMP_GT; 02897 } 02898 02899 if( code == CMP_GT || code == CMP_LE ) 02900 { 02901 int m = code == CMP_GT ? 0 : 255; 02902 for( ; height--; src1 += step1, src2 += step2, dst += step ) 02903 { 02904 int x =0; 02905 #if CV_SSE2 02906 if( USE_SSE2) 02907 { 02908 __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1); 02909 for( ; x <= width - 16; x += 16 ) 02910 { 02911 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02912 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02913 r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); 02914 __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); 02915 __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); 02916 r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128); 02917 r11 = _mm_packs_epi16(r00, r01); 02918 _mm_storeu_si128((__m128i*)(dst + x), r11); 02919 } 02920 if( x <= width-8) 02921 { 02922 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02923 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02924 r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); 02925 r10 = _mm_packs_epi16(r00, r00); 02926 _mm_storel_epi64((__m128i*)(dst + x), r10); 02927 02928 x += 8; 02929 } 02930 } 02931 #elif CV_NEON 02932 uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); 02933 02934 for( ; x <= width - 16; x += 16 ) 02935 { 02936 int16x8_t in1 = vld1q_s16(src1 + x); 02937 int16x8_t in2 = vld1q_s16(src2 + x); 02938 uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2)); 02939 02940 in1 = vld1q_s16(src1 + x + 8); 02941 in2 = vld1q_s16(src2 + x + 8); 02942 uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2)); 02943 02944 vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); 02945 } 02946 #endif 02947 02948 for( ; x < width; x++ ){ 02949 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); 02950 } 02951 } 02952 } 02953 else if( code == CMP_EQ || code == CMP_NE ) 02954 { 02955 int m = code == CMP_EQ ? 0 : 255; 02956 for( ; height--; src1 += step1, src2 += step2, dst += step ) 02957 { 02958 int x = 0; 02959 #if CV_SSE2 02960 if( USE_SSE2 ) 02961 { 02962 __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1); 02963 for( ; x <= width - 16; x += 16 ) 02964 { 02965 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02966 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02967 r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); 02968 __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); 02969 __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); 02970 r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128); 02971 r11 = _mm_packs_epi16(r00, r01); 02972 _mm_storeu_si128((__m128i*)(dst + x), r11); 02973 } 02974 if( x <= width - 8) 02975 { 02976 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02977 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02978 r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); 02979 r10 = _mm_packs_epi16(r00, r00); 02980 _mm_storel_epi64((__m128i*)(dst + x), r10); 02981 02982 x += 8; 02983 } 02984 } 02985 #elif CV_NEON 02986 uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); 02987 02988 for( ; x <= width - 16; x += 16 ) 02989 { 02990 int16x8_t in1 = vld1q_s16(src1 + x); 02991 int16x8_t in2 = vld1q_s16(src2 + x); 02992 uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2)); 02993 02994 in1 = vld1q_s16(src1 + x + 8); 02995 in2 = vld1q_s16(src2 + x + 8); 02996 uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2)); 02997 02998 vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); 02999 } 03000 #endif 03001 for( ; x < width; x++ ) 03002 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); 03003 } 03004 } 03005 } 03006 03007 void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, 03008 uchar* dst, size_t step, int width, int height, void* _cmpop) 03009 { 03010 CALL_HAL(cmp32s, cv_hal_cmp32s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 03011 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 03012 } 03013 03014 void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, 03015 uchar* dst, size_t step, int width, int height, void* _cmpop) 03016 { 03017 CALL_HAL(cmp32f, cv_hal_cmp32f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 03018 CALL_IPP_CMP(ippiCompare_32f_C1R) 03019 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 03020 } 03021 03022 void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, 03023 uchar* dst, size_t step, int width, int height, void* _cmpop) 03024 { 03025 CALL_HAL(cmp64f, cv_hal_cmp64f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 03026 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 03027 } 03028 03029 //======================================= 03030 03031 #if defined HAVE_IPP 03032 #define CALL_IPP_MUL(fun) \ 03033 CV_IPP_CHECK() \ 03034 { \ 03035 if (std::fabs(fscale - 1) <= FLT_EPSILON) \ 03036 { \ 03037 if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \ 03038 { \ 03039 CV_IMPL_ADD(CV_IMPL_IPP); \ 03040 return; \ 03041 } \ 03042 setIppErrorStatus(); \ 03043 } \ 03044 } 03045 03046 #define CALL_IPP_MUL_2(fun) \ 03047 CV_IPP_CHECK() \ 03048 { \ 03049 if (std::fabs(fscale - 1) <= FLT_EPSILON) \ 03050 { \ 03051 if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)) >= 0) \ 03052 { \ 03053 CV_IMPL_ADD(CV_IMPL_IPP); \ 03054 return; \ 03055 } \ 03056 setIppErrorStatus(); \ 03057 } \ 03058 } 03059 03060 #else 03061 #define CALL_IPP_MUL(fun) 03062 #define CALL_IPP_MUL_2(fun) 03063 #endif 03064 03065 //======================================= 03066 // Multilpy 03067 //======================================= 03068 03069 void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, 03070 uchar* dst, size_t step, int width, int height, void* scale) 03071 { 03072 CALL_HAL(mul8u, cv_hal_mul8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03073 float fscale = (float)*(const double*)scale; 03074 CALL_IPP_MUL(ippiMul_8u_C1RSfs) 03075 mul_(src1, step1, src2, step2, dst, step, width, height, fscale); 03076 } 03077 03078 void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, 03079 schar* dst, size_t step, int width, int height, void* scale) 03080 { 03081 CALL_HAL(mul8s, cv_hal_mul8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03082 mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale); 03083 } 03084 03085 void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, 03086 ushort* dst, size_t step, int width, int height, void* scale) 03087 { 03088 CALL_HAL(mul16u, cv_hal_mul16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03089 float fscale = (float)*(const double*)scale; 03090 CALL_IPP_MUL(ippiMul_16u_C1RSfs) 03091 mul_(src1, step1, src2, step2, dst, step, width, height, fscale); 03092 } 03093 03094 void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, 03095 short* dst, size_t step, int width, int height, void* scale) 03096 { 03097 CALL_HAL(mul16s, cv_hal_mul16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03098 float fscale = (float)*(const double*)scale; 03099 CALL_IPP_MUL(ippiMul_16s_C1RSfs) 03100 mul_(src1, step1, src2, step2, dst, step, width, height, fscale); 03101 } 03102 03103 void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, 03104 int* dst, size_t step, int width, int height, void* scale) 03105 { 03106 CALL_HAL(mul32s, cv_hal_mul32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03107 mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03108 } 03109 03110 void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, 03111 float* dst, size_t step, int width, int height, void* scale) 03112 { 03113 CALL_HAL(mul32f, cv_hal_mul32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03114 float fscale = (float)*(const double*)scale; 03115 CALL_IPP_MUL_2(ippiMul_32f_C1R) 03116 mul_(src1, step1, src2, step2, dst, step, width, height, fscale); 03117 } 03118 03119 void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, 03120 double* dst, size_t step, int width, int height, void* scale) 03121 { 03122 CALL_HAL(mul64f, cv_hal_mul64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03123 mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03124 } 03125 03126 //======================================= 03127 // Divide 03128 //======================================= 03129 03130 void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, 03131 uchar* dst, size_t step, int width, int height, void* scale) 03132 { 03133 CALL_HAL(div8u, cv_hal_div8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03134 if( src1 ) 03135 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03136 else 03137 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03138 } 03139 03140 void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, 03141 schar* dst, size_t step, int width, int height, void* scale) 03142 { 03143 CALL_HAL(div8s, cv_hal_div8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03144 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03145 } 03146 03147 void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, 03148 ushort* dst, size_t step, int width, int height, void* scale) 03149 { 03150 CALL_HAL(div16u, cv_hal_div16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03151 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03152 } 03153 03154 void div16s( const short* src1, size_t step1, const short* src2, size_t step2, 03155 short* dst, size_t step, int width, int height, void* scale) 03156 { 03157 CALL_HAL(div16s, cv_hal_div16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03158 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03159 } 03160 03161 void div32s( const int* src1, size_t step1, const int* src2, size_t step2, 03162 int* dst, size_t step, int width, int height, void* scale) 03163 { 03164 CALL_HAL(div32s, cv_hal_div32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03165 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03166 } 03167 03168 void div32f( const float* src1, size_t step1, const float* src2, size_t step2, 03169 float* dst, size_t step, int width, int height, void* scale) 03170 { 03171 CALL_HAL(div32f, cv_hal_div32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03172 div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03173 } 03174 03175 void div64f( const double* src1, size_t step1, const double* src2, size_t step2, 03176 double* dst, size_t step, int width, int height, void* scale) 03177 { 03178 CALL_HAL(div64f, cv_hal_div64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03179 div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03180 } 03181 03182 //======================================= 03183 // Reciprocial 03184 //======================================= 03185 03186 void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, 03187 uchar* dst, size_t step, int width, int height, void* scale) 03188 { 03189 CALL_HAL(recip8u, cv_hal_recip8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03190 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03191 } 03192 03193 void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, 03194 schar* dst, size_t step, int width, int height, void* scale) 03195 { 03196 CALL_HAL(recip8s, cv_hal_recip8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03197 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03198 } 03199 03200 void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, 03201 ushort* dst, size_t step, int width, int height, void* scale) 03202 { 03203 CALL_HAL(recip16u, cv_hal_recip16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03204 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03205 } 03206 03207 void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, 03208 short* dst, size_t step, int width, int height, void* scale) 03209 { 03210 CALL_HAL(recip16s, cv_hal_recip16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03211 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03212 } 03213 03214 void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, 03215 int* dst, size_t step, int width, int height, void* scale) 03216 { 03217 CALL_HAL(recip32s, cv_hal_recip32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03218 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03219 } 03220 03221 void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, 03222 float* dst, size_t step, int width, int height, void* scale) 03223 { 03224 CALL_HAL(recip32f, cv_hal_recip32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03225 recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03226 } 03227 03228 void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, 03229 double* dst, size_t step, int width, int height, void* scale) 03230 { 03231 CALL_HAL(recip64f, cv_hal_recip64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03232 recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03233 } 03234 03235 //======================================= 03236 // Add weighted 03237 //======================================= 03238 03239 void 03240 addWeighted8u( const uchar* src1, size_t step1, 03241 const uchar* src2, size_t step2, 03242 uchar* dst, size_t step, int width, int height, 03243 void* scalars ) 03244 { 03245 CALL_HAL(addWeighted8u, cv_hal_addWeighted8u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03246 const double* scalars_ = (const double*)scalars; 03247 float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2]; 03248 03249 for( ; height--; src1 += step1, src2 += step2, dst += step ) 03250 { 03251 int x = 0; 03252 03253 #if CV_SSE2 03254 if( USE_SSE2 ) 03255 { 03256 __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma); 03257 __m128i z = _mm_setzero_si128(); 03258 03259 for( ; x <= width - 8; x += 8 ) 03260 { 03261 __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z); 03262 __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z); 03263 03264 __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z)); 03265 __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z)); 03266 __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z)); 03267 __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z)); 03268 03269 u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4)); 03270 u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4)); 03271 u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4); 03272 03273 u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1)); 03274 u = _mm_packus_epi16(u, u); 03275 03276 _mm_storel_epi64((__m128i*)(dst + x), u); 03277 } 03278 } 03279 #elif CV_NEON 03280 float32x4_t g = vdupq_n_f32 (gamma); 03281 03282 for( ; x <= width - 8; x += 8 ) 03283 { 03284 uint8x8_t in1 = vld1_u8(src1+x); 03285 uint16x8_t in1_16 = vmovl_u8(in1); 03286 float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16))); 03287 float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16))); 03288 03289 uint8x8_t in2 = vld1_u8(src2+x); 03290 uint16x8_t in2_16 = vmovl_u8(in2); 03291 float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16))); 03292 float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16))); 03293 03294 float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta)); 03295 float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta)); 03296 out_f_l = vaddq_f32(out_f_l, g); 03297 out_f_h = vaddq_f32(out_f_h, g); 03298 03299 uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l)); 03300 uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h)); 03301 03302 uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h); 03303 uint8x8_t out = vqmovn_u16(out_16); 03304 03305 vst1_u8(dst+x, out); 03306 } 03307 #endif 03308 #if CV_ENABLE_UNROLLED 03309 for( ; x <= width - 4; x += 4 ) 03310 { 03311 float t0, t1; 03312 t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; 03313 t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma; 03314 03315 dst[x] = saturate_cast<uchar>(t0); 03316 dst[x+1] = saturate_cast<uchar>(t1); 03317 03318 t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma; 03319 t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma; 03320 03321 dst[x+2] = saturate_cast<uchar>(t0); 03322 dst[x+3] = saturate_cast<uchar>(t1); 03323 } 03324 #endif 03325 03326 for( ; x < width; x++ ) 03327 { 03328 float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; 03329 dst[x] = saturate_cast<uchar>(t0); 03330 } 03331 } 03332 } 03333 03334 void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, 03335 schar* dst, size_t step, int width, int height, void* scalars ) 03336 { 03337 CALL_HAL(addWeighted8s, cv_hal_addWeighted8s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03338 addWeighted_<schar, float>(src1, step1, src2, step2, dst, step, width, height, scalars); 03339 } 03340 03341 void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, 03342 ushort* dst, size_t step, int width, int height, void* scalars ) 03343 { 03344 CALL_HAL(addWeighted16u, cv_hal_addWeighted16u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03345 addWeighted_<ushort, float>(src1, step1, src2, step2, dst, step, width, height, scalars); 03346 } 03347 03348 void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, 03349 short* dst, size_t step, int width, int height, void* scalars ) 03350 { 03351 CALL_HAL(addWeighted16s, cv_hal_addWeighted16s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03352 addWeighted_<short, float>(src1, step1, src2, step2, dst, step, width, height, scalars); 03353 } 03354 03355 void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, 03356 int* dst, size_t step, int width, int height, void* scalars ) 03357 { 03358 CALL_HAL(addWeighted32s, cv_hal_addWeighted32s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03359 addWeighted_<int, double>(src1, step1, src2, step2, dst, step, width, height, scalars); 03360 } 03361 03362 void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, 03363 float* dst, size_t step, int width, int height, void* scalars ) 03364 { 03365 CALL_HAL(addWeighted32f, cv_hal_addWeighted32f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03366 addWeighted_<float, double>(src1, step1, src2, step2, dst, step, width, height, scalars); 03367 } 03368 03369 void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, 03370 double* dst, size_t step, int width, int height, void* scalars ) 03371 { 03372 CALL_HAL(addWeighted64f, cv_hal_addWeighted64f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03373 addWeighted_<double, double>(src1, step1, src2, step2, dst, step, width, height, scalars); 03374 } 03375 03376 }} // cv::hal:: 03377 03378 /* End of file. */ 03379
Generated on Tue Jul 12 2022 14:45:57 by
