Opencv 3.1 project on GR-PEACH board
Fork of gr-peach-opencv-project by
arithm.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 /* //////////////////////////////////////////////////////////////////// 00045 // 00046 // Arithmetic and logical operations: +, -, *, /, &, |, ^, ~, abs ... 00047 // 00048 // */ 00049 00050 #include "precomp.hpp" 00051 #include "opencl_kernels_core.hpp" 00052 00053 namespace cv 00054 { 00055 00056 /****************************************************************************************\ 00057 * logical operations * 00058 \****************************************************************************************/ 00059 00060 void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize ) 00061 { 00062 int scn = (int)sc.total(), cn = CV_MAT_CN(buftype); 00063 size_t esz = CV_ELEM_SIZE(buftype); 00064 getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0); 00065 // unroll the scalar 00066 if( scn < cn ) 00067 { 00068 CV_Assert( scn == 1 ); 00069 size_t esz1 = CV_ELEM_SIZE1(buftype); 00070 for( size_t i = esz1; i < esz; i++ ) 00071 scbuf[i] = scbuf[i - esz1]; 00072 } 00073 for( size_t i = esz; i < blocksize*esz; i++ ) 00074 scbuf[i] = scbuf[i - esz]; 00075 } 00076 00077 00078 enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4, 00079 OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8, 00080 OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14, 00081 OCL_OP_RDIV_SCALE=15 }; 00082 00083 #ifdef HAVE_OPENCL 00084 00085 static const char* oclop2str[] = { "OP_ADD", "OP_SUB", "OP_RSUB", "OP_ABSDIFF", 00086 "OP_MUL", "OP_MUL_SCALE", "OP_DIV_SCALE", "OP_RECIP_SCALE", 00087 "OP_ADDW", "OP_AND", "OP_OR", "OP_XOR", "OP_NOT", "OP_MIN", "OP_MAX", "OP_RDIV_SCALE", 0 }; 00088 00089 static bool ocl_binary_op(InputArray _src1, InputArray _src2, OutputArray _dst, 00090 InputArray _mask, bool bitwise, int oclop, bool haveScalar ) 00091 { 00092 bool haveMask = !_mask.empty(); 00093 int srctype = _src1.type(); 00094 int srcdepth = CV_MAT_DEPTH(srctype); 00095 int cn = CV_MAT_CN(srctype); 00096 00097 const ocl::Device d = ocl::Device::getDefault(); 00098 bool doubleSupport = d.doubleFPConfig() > 0; 00099 if( oclop < 0 || ((haveMask || haveScalar) && cn > 4) || 00100 (!doubleSupport && srcdepth == CV_64F && !bitwise)) 00101 return false; 00102 00103 char opts[1024]; 00104 int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst); 00105 int scalarcn = kercn == 3 ? 4 : kercn; 00106 int rowsPerWI = d.isIntel() ? 4 : 1; 00107 00108 sprintf(opts, "-D %s%s -D %s -D dstT=%s%s -D dstT_C1=%s -D workST=%s -D cn=%d -D rowsPerWI=%d", 00109 haveMask ? "MASK_" : "", haveScalar ? "UNARY_OP" : "BINARY_OP", oclop2str[oclop], 00110 bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, kercn)) : 00111 ocl::typeToStr(CV_MAKETYPE(srcdepth, kercn)), doubleSupport ? " -D DOUBLE_SUPPORT" : "", 00112 bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, 1)) : 00113 ocl::typeToStr(CV_MAKETYPE(srcdepth, 1)), 00114 bitwise ? ocl::memopTypeToStr(CV_MAKETYPE(srcdepth, scalarcn)) : 00115 ocl::typeToStr(CV_MAKETYPE(srcdepth, scalarcn)), 00116 kercn, rowsPerWI); 00117 00118 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); 00119 if (k.empty()) 00120 return false; 00121 00122 UMat src1 = _src1.getUMat(), src2; 00123 UMat dst = _dst.getUMat(), mask = _mask.getUMat(); 00124 00125 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn); 00126 ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) : 00127 ocl::KernelArg::WriteOnly(dst, cn, kercn); 00128 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1); 00129 00130 if( haveScalar ) 00131 { 00132 size_t esz = CV_ELEM_SIZE1(srctype)*scalarcn; 00133 double buf[4] = {0,0,0,0}; 00134 00135 if( oclop != OCL_OP_NOT ) 00136 { 00137 Mat src2sc = _src2.getMat(); 00138 convertAndUnrollScalar(src2sc, srctype, (uchar*)buf, 1); 00139 } 00140 00141 ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz); 00142 00143 if( !haveMask ) 00144 k.args(src1arg, dstarg, scalararg); 00145 else 00146 k.args(src1arg, maskarg, dstarg, scalararg); 00147 } 00148 else 00149 { 00150 src2 = _src2.getUMat(); 00151 ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn); 00152 00153 if( !haveMask ) 00154 k.args(src1arg, src2arg, dstarg); 00155 else 00156 k.args(src1arg, src2arg, maskarg, dstarg); 00157 } 00158 00159 size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI }; 00160 return k.run(2, globalsize, 0, false); 00161 } 00162 00163 #endif 00164 00165 static void binary_op( InputArray _src1, InputArray _src2, OutputArray _dst, 00166 InputArray _mask, const BinaryFuncC* tab, 00167 bool bitwise, int oclop ) 00168 { 00169 const _InputArray *psrc1 = &_src1, *psrc2 = &_src2; 00170 int kind1 = psrc1->kind(), kind2 = psrc2->kind(); 00171 int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); 00172 int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2); 00173 int dims1 = psrc1->dims(), dims2 = psrc2->dims(); 00174 Size sz1 = dims1 <= 2 ? psrc1->size() : Size(); 00175 Size sz2 = dims2 <= 2 ? psrc2->size() : Size(); 00176 #ifdef HAVE_OPENCL 00177 bool use_opencl = (kind1 == _InputArray::UMAT || kind2 == _InputArray::UMAT) && 00178 dims1 <= 2 && dims2 <= 2; 00179 #endif 00180 bool haveMask = !_mask.empty(), haveScalar = false; 00181 BinaryFuncC func; 00182 00183 if( dims1 <= 2 && dims2 <= 2 && kind1 == kind2 && sz1 == sz2 && type1 == type2 && !haveMask ) 00184 { 00185 _dst.create(sz1, type1); 00186 #ifdef HAVE_OPENCL 00187 CV_OCL_RUN(use_opencl, 00188 ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, false)) 00189 #endif 00190 00191 if( bitwise ) 00192 { 00193 func = *tab; 00194 cn = (int)CV_ELEM_SIZE(type1); 00195 } 00196 else 00197 func = tab[depth1]; 00198 00199 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); 00200 Size sz = getContinuousSize(src1, src2, dst); 00201 size_t len = sz.width*(size_t)cn; 00202 if( len == (size_t)(int)len ) 00203 { 00204 sz.width = (int)len; 00205 func(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, 0); 00206 return; 00207 } 00208 } 00209 00210 if( oclop == OCL_OP_NOT ) 00211 haveScalar = true; 00212 else if( (kind1 == _InputArray::MATX) + (kind2 == _InputArray::MATX) == 1 || 00213 !psrc1->sameSize(*psrc2) || type1 != type2 ) 00214 { 00215 if( checkScalar(*psrc1, type2, kind1, kind2) ) 00216 { 00217 // src1 is a scalar; swap it with src2 00218 swap(psrc1, psrc2); 00219 swap(type1, type2); 00220 swap(depth1, depth2); 00221 swap(cn, cn2); 00222 swap(sz1, sz2); 00223 } 00224 else if( !checkScalar(*psrc2, type1, kind2, kind1) ) 00225 CV_Error( CV_StsUnmatchedSizes, 00226 "The operation is neither 'array op array' (where arrays have the same size and type), " 00227 "nor 'array op scalar', nor 'scalar op array'" ); 00228 haveScalar = true; 00229 } 00230 else 00231 { 00232 CV_Assert( psrc1->sameSize(*psrc2) && type1 == type2 ); 00233 } 00234 00235 size_t esz = CV_ELEM_SIZE(type1); 00236 size_t blocksize0 = (BLOCK_SIZE + esz-1)/esz; 00237 BinaryFunc copymask = 0; 00238 bool reallocate = false; 00239 00240 if( haveMask ) 00241 { 00242 int mtype = _mask.type(); 00243 CV_Assert( (mtype == CV_8U || mtype == CV_8S) && _mask.sameSize(*psrc1)); 00244 copymask = getCopyMaskFunc(esz); 00245 reallocate = !_dst.sameSize(*psrc1) || _dst.type() != type1; 00246 } 00247 00248 AutoBuffer<uchar> _buf; 00249 uchar *scbuf = 0, *maskbuf = 0; 00250 00251 _dst.createSameSize(*psrc1, type1); 00252 // if this is mask operation and dst has been reallocated, 00253 // we have to clear the destination 00254 if( haveMask && reallocate ) 00255 _dst.setTo(0.); 00256 #ifdef HAVE_OPENCL 00257 CV_OCL_RUN(use_opencl, 00258 ocl_binary_op(*psrc1, *psrc2, _dst, _mask, bitwise, oclop, haveScalar)) 00259 #endif 00260 00261 00262 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(); 00263 Mat dst = _dst.getMat(), mask = _mask.getMat(); 00264 00265 if( bitwise ) 00266 { 00267 func = *tab; 00268 cn = (int)esz; 00269 } 00270 else 00271 func = tab[depth1]; 00272 00273 if( !haveScalar ) 00274 { 00275 const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 }; 00276 uchar* ptrs[4]; 00277 00278 NAryMatIterator it(arrays, ptrs); 00279 size_t total = it.size, blocksize = total; 00280 00281 if( blocksize*cn > INT_MAX ) 00282 blocksize = INT_MAX/cn; 00283 00284 if( haveMask ) 00285 { 00286 blocksize = std::min(blocksize, blocksize0); 00287 _buf.allocate(blocksize*esz); 00288 maskbuf = _buf; 00289 } 00290 00291 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00292 { 00293 for( size_t j = 0; j < total; j += blocksize ) 00294 { 00295 int bsz = (int)MIN(total - j, blocksize); 00296 00297 func( ptrs[0], 0, ptrs[1], 0, haveMask ? maskbuf : ptrs[2], 0, bsz*cn, 1, 0 ); 00298 if( haveMask ) 00299 { 00300 copymask( maskbuf, 0, ptrs[3], 0, ptrs[2], 0, Size(bsz, 1), &esz ); 00301 ptrs[3] += bsz; 00302 } 00303 00304 bsz *= (int)esz; 00305 ptrs[0] += bsz; ptrs[1] += bsz; ptrs[2] += bsz; 00306 } 00307 } 00308 } 00309 else 00310 { 00311 const Mat* arrays[] = { &src1, &dst, &mask, 0 }; 00312 uchar* ptrs[3]; 00313 00314 NAryMatIterator it(arrays, ptrs); 00315 size_t total = it.size, blocksize = std::min(total, blocksize0); 00316 00317 _buf.allocate(blocksize*(haveMask ? 2 : 1)*esz + 32); 00318 scbuf = _buf; 00319 maskbuf = alignPtr(scbuf + blocksize*esz, 16); 00320 00321 convertAndUnrollScalar( src2, src1.type(), scbuf, blocksize); 00322 00323 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00324 { 00325 for( size_t j = 0; j < total; j += blocksize ) 00326 { 00327 int bsz = (int)MIN(total - j, blocksize); 00328 00329 func( ptrs[0], 0, scbuf, 0, haveMask ? maskbuf : ptrs[1], 0, bsz*cn, 1, 0 ); 00330 if( haveMask ) 00331 { 00332 copymask( maskbuf, 0, ptrs[2], 0, ptrs[1], 0, Size(bsz, 1), &esz ); 00333 ptrs[2] += bsz; 00334 } 00335 00336 bsz *= (int)esz; 00337 ptrs[0] += bsz; ptrs[1] += bsz; 00338 } 00339 } 00340 } 00341 } 00342 00343 static BinaryFuncC* getMaxTab() 00344 { 00345 static BinaryFuncC maxTab[] = 00346 { 00347 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max8s), 00348 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::max16s), 00349 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32s), 00350 (BinaryFuncC)GET_OPTIMIZED(cv::hal::max32f), (BinaryFuncC)cv::hal::max64f, 00351 0 00352 }; 00353 00354 return maxTab; 00355 } 00356 00357 static BinaryFuncC* getMinTab() 00358 { 00359 static BinaryFuncC minTab[] = 00360 { 00361 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min8s), 00362 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::min16s), 00363 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32s), 00364 (BinaryFuncC)GET_OPTIMIZED(cv::hal::min32f), (BinaryFuncC)cv::hal::min64f, 00365 0 00366 }; 00367 00368 return minTab; 00369 } 00370 00371 } 00372 00373 void cv::bitwise_and(InputArray a, InputArray b, OutputArray c, InputArray mask) 00374 { 00375 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::and8u); 00376 binary_op(a, b, c, mask, &f, true, OCL_OP_AND); 00377 } 00378 00379 void cv::bitwise_or(InputArray a, InputArray b, OutputArray c, InputArray mask) 00380 { 00381 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::or8u); 00382 binary_op(a, b, c, mask, &f, true, OCL_OP_OR); 00383 } 00384 00385 void cv::bitwise_xor(InputArray a, InputArray b, OutputArray c, InputArray mask) 00386 { 00387 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::xor8u); 00388 binary_op(a, b, c, mask, &f, true, OCL_OP_XOR); 00389 } 00390 00391 void cv::bitwise_not(InputArray a, OutputArray c, InputArray mask) 00392 { 00393 BinaryFuncC f = (BinaryFuncC)GET_OPTIMIZED(cv::hal::not8u); 00394 binary_op(a, a, c, mask, &f, true, OCL_OP_NOT); 00395 } 00396 00397 void cv::max( InputArray src1, InputArray src2, OutputArray dst ) 00398 { 00399 binary_op(src1, src2, dst, noArray(), getMaxTab(), false, OCL_OP_MAX ); 00400 } 00401 00402 void cv::min( InputArray src1, InputArray src2, OutputArray dst ) 00403 { 00404 binary_op(src1, src2, dst, noArray(), getMinTab(), false, OCL_OP_MIN ); 00405 } 00406 00407 void cv::max(const Mat& src1, const Mat& src2, Mat& dst) 00408 { 00409 OutputArray _dst(dst); 00410 binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX ); 00411 } 00412 00413 void cv::min(const Mat& src1, const Mat& src2, Mat& dst) 00414 { 00415 OutputArray _dst(dst); 00416 binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN ); 00417 } 00418 00419 void cv::max(const UMat & src1, const UMat & src2, UMat & dst) 00420 { 00421 OutputArray _dst(dst); 00422 binary_op(src1, src2, _dst, noArray(), getMaxTab(), false, OCL_OP_MAX ); 00423 } 00424 00425 void cv::min(const UMat & src1, const UMat & src2, UMat & dst) 00426 { 00427 OutputArray _dst(dst); 00428 binary_op(src1, src2, _dst, noArray(), getMinTab(), false, OCL_OP_MIN ); 00429 } 00430 00431 00432 /****************************************************************************************\ 00433 * add/subtract * 00434 \****************************************************************************************/ 00435 00436 namespace cv 00437 { 00438 00439 static int actualScalarDepth(const double* data, int len) 00440 { 00441 int i = 0, minval = INT_MAX, maxval = INT_MIN; 00442 for(; i < len; ++i) 00443 { 00444 int ival = cvRound(data[i]); 00445 if( ival != data[i] ) 00446 break; 00447 minval = MIN(minval, ival); 00448 maxval = MAX(maxval, ival); 00449 } 00450 return i < len ? CV_64F : 00451 minval >= 0 && maxval <= (int)UCHAR_MAX ? CV_8U : 00452 minval >= (int)SCHAR_MIN && maxval <= (int)SCHAR_MAX ? CV_8S : 00453 minval >= 0 && maxval <= (int)USHRT_MAX ? CV_16U : 00454 minval >= (int)SHRT_MIN && maxval <= (int)SHRT_MAX ? CV_16S : 00455 CV_32S; 00456 } 00457 00458 #ifdef HAVE_OPENCL 00459 00460 static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, 00461 InputArray _mask, int wtype, 00462 void* usrdata, int oclop, 00463 bool haveScalar ) 00464 { 00465 const ocl::Device d = ocl::Device::getDefault(); 00466 bool doubleSupport = d.doubleFPConfig() > 0; 00467 int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); 00468 bool haveMask = !_mask.empty(); 00469 00470 if ( (haveMask || haveScalar) && cn > 4 ) 00471 return false; 00472 00473 int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype)); 00474 if (!doubleSupport) 00475 wdepth = std::min(wdepth, CV_32F); 00476 00477 wtype = CV_MAKETYPE(wdepth, cn); 00478 int type2 = haveScalar ? wtype : _src2.type(), depth2 = CV_MAT_DEPTH(type2); 00479 if (!doubleSupport && (depth2 == CV_64F || depth1 == CV_64F)) 00480 return false; 00481 00482 int kercn = haveMask || haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst); 00483 int scalarcn = kercn == 3 ? 4 : kercn, rowsPerWI = d.isIntel() ? 4 : 1; 00484 00485 char cvtstr[4][32], opts[1024]; 00486 sprintf(opts, "-D %s%s -D %s -D srcT1=%s -D srcT1_C1=%s -D srcT2=%s -D srcT2_C1=%s " 00487 "-D dstT=%s -D dstT_C1=%s -D workT=%s -D workST=%s -D scaleT=%s -D wdepth=%d -D convertToWT1=%s " 00488 "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d -D rowsPerWI=%d -D convertFromU=%s", 00489 (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), 00490 oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), 00491 ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), 00492 ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)), 00493 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)), 00494 ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)), 00495 ocl::typeToStr(wdepth), wdepth, 00496 ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), 00497 ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), 00498 ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]), 00499 doubleSupport ? " -D DOUBLE_SUPPORT" : "", kercn, rowsPerWI, 00500 oclop == OCL_OP_ABSDIFF && wdepth == CV_32S && ddepth == wdepth ? 00501 ocl::convertTypeStr(CV_8U, ddepth, kercn, cvtstr[3]) : "noconvert"); 00502 00503 size_t usrdata_esz = CV_ELEM_SIZE(wdepth); 00504 const uchar* usrdata_p = (const uchar*)usrdata; 00505 const double* usrdata_d = (const double*)usrdata; 00506 float usrdata_f[3]; 00507 int i, n = oclop == OCL_OP_MUL_SCALE || oclop == OCL_OP_DIV_SCALE || 00508 oclop == OCL_OP_RDIV_SCALE || oclop == OCL_OP_RECIP_SCALE ? 1 : oclop == OCL_OP_ADDW ? 3 : 0; 00509 if( n > 0 && wdepth == CV_32F ) 00510 { 00511 for( i = 0; i < n; i++ ) 00512 usrdata_f[i] = (float)usrdata_d[i]; 00513 usrdata_p = (const uchar*)usrdata_f; 00514 } 00515 00516 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); 00517 if (k.empty()) 00518 return false; 00519 00520 UMat src1 = _src1.getUMat(), src2; 00521 UMat dst = _dst.getUMat(), mask = _mask.getUMat(); 00522 00523 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn); 00524 ocl::KernelArg dstarg = haveMask ? ocl::KernelArg::ReadWrite(dst, cn, kercn) : 00525 ocl::KernelArg::WriteOnly(dst, cn, kercn); 00526 ocl::KernelArg maskarg = ocl::KernelArg::ReadOnlyNoSize(mask, 1); 00527 00528 if( haveScalar ) 00529 { 00530 size_t esz = CV_ELEM_SIZE1(wtype)*scalarcn; 00531 double buf[4]={0,0,0,0}; 00532 Mat src2sc = _src2.getMat(); 00533 00534 if( !src2sc.empty() ) 00535 convertAndUnrollScalar(src2sc, wtype, (uchar*)buf, 1); 00536 ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz); 00537 00538 if( !haveMask ) 00539 { 00540 if(n == 0) 00541 k.args(src1arg, dstarg, scalararg); 00542 else if(n == 1) 00543 k.args(src1arg, dstarg, scalararg, 00544 ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz)); 00545 else 00546 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters"); 00547 } 00548 else 00549 k.args(src1arg, maskarg, dstarg, scalararg); 00550 } 00551 else 00552 { 00553 src2 = _src2.getUMat(); 00554 ocl::KernelArg src2arg = ocl::KernelArg::ReadOnlyNoSize(src2, cn, kercn); 00555 00556 if( !haveMask ) 00557 { 00558 if (n == 0) 00559 k.args(src1arg, src2arg, dstarg); 00560 else if (n == 1) 00561 k.args(src1arg, src2arg, dstarg, 00562 ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz)); 00563 else if (n == 3) 00564 k.args(src1arg, src2arg, dstarg, 00565 ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz), 00566 ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz), 00567 ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz*2, usrdata_esz)); 00568 else 00569 CV_Error(Error::StsNotImplemented, "unsupported number of extra parameters"); 00570 } 00571 else 00572 k.args(src1arg, src2arg, maskarg, dstarg); 00573 } 00574 00575 size_t globalsize[] = { (size_t)src1.cols * cn / kercn, ((size_t)src1.rows + rowsPerWI - 1) / rowsPerWI }; 00576 return k.run(2, globalsize, NULL, false); 00577 } 00578 00579 #endif 00580 00581 static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, 00582 InputArray _mask, int dtype, BinaryFuncC* tab, bool muldiv=false, 00583 void* usrdata=0, int oclop=-1 ) 00584 { 00585 const _InputArray *psrc1 = &_src1, *psrc2 = &_src2; 00586 int kind1 = psrc1->kind(), kind2 = psrc2->kind(); 00587 bool haveMask = !_mask.empty(); 00588 bool reallocate = false; 00589 int type1 = psrc1->type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); 00590 int type2 = psrc2->type(), depth2 = CV_MAT_DEPTH(type2), cn2 = CV_MAT_CN(type2); 00591 int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims(); 00592 Size sz1 = dims1 <= 2 ? psrc1->size() : Size(); 00593 Size sz2 = dims2 <= 2 ? psrc2->size() : Size(); 00594 #ifdef HAVE_OPENCL 00595 bool use_opencl = OCL_PERFORMANCE_CHECK(_dst.isUMat()) && dims1 <= 2 && dims2 <= 2; 00596 #endif 00597 bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2); 00598 bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1); 00599 00600 if( (kind1 == kind2 || cn == 1) && sz1 == sz2 && dims1 <= 2 && dims2 <= 2 && type1 == type2 && 00601 !haveMask && ((!_dst.fixedType() && (dtype < 0 || CV_MAT_DEPTH(dtype) == depth1)) || 00602 (_dst.fixedType() && _dst.type() == type1)) && 00603 ((src1Scalar && src2Scalar) || (!src1Scalar && !src2Scalar)) ) 00604 { 00605 _dst.createSameSize(*psrc1, type1); 00606 #ifdef HAVE_OPENCL 00607 CV_OCL_RUN(use_opencl, 00608 ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, 00609 (!usrdata ? type1 : std::max(depth1, CV_32F)), 00610 usrdata, oclop, false)) 00611 #endif 00612 00613 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(); 00614 Size sz = getContinuousSize(src1, src2, dst, src1.channels()); 00615 tab[depth1](src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, usrdata); 00616 return; 00617 } 00618 00619 bool haveScalar = false, swapped12 = false; 00620 00621 if( dims1 != dims2 || sz1 != sz2 || cn != cn2 || 00622 (kind1 == _InputArray::MATX && (sz1 == Size(1,4) || sz1 == Size(1,1))) || 00623 (kind2 == _InputArray::MATX && (sz2 == Size(1,4) || sz2 == Size(1,1))) ) 00624 { 00625 if( checkScalar(*psrc1, type2, kind1, kind2) ) 00626 { 00627 // src1 is a scalar; swap it with src2 00628 swap(psrc1, psrc2); 00629 swap(sz1, sz2); 00630 swap(type1, type2); 00631 swap(depth1, depth2); 00632 swap(cn, cn2); 00633 swap(dims1, dims2); 00634 swapped12 = true; 00635 if( oclop == OCL_OP_SUB ) 00636 oclop = OCL_OP_RSUB; 00637 if ( oclop == OCL_OP_DIV_SCALE ) 00638 oclop = OCL_OP_RDIV_SCALE; 00639 } 00640 else if( !checkScalar(*psrc2, type1, kind2, kind1) ) 00641 CV_Error( CV_StsUnmatchedSizes, 00642 "The operation is neither 'array op array' " 00643 "(where arrays have the same size and the same number of channels), " 00644 "nor 'array op scalar', nor 'scalar op array'" ); 00645 haveScalar = true; 00646 CV_Assert(type2 == CV_64F && (sz2.height == 1 || sz2.height == 4)); 00647 00648 if (!muldiv) 00649 { 00650 Mat sc = psrc2->getMat(); 00651 depth2 = actualScalarDepth(sc.ptr<double>(), cn); 00652 if( depth2 == CV_64F && (depth1 < CV_32S || depth1 == CV_32F) ) 00653 depth2 = CV_32F; 00654 } 00655 else 00656 depth2 = CV_64F; 00657 } 00658 00659 if( dtype < 0 ) 00660 { 00661 if( _dst.fixedType() ) 00662 dtype = _dst.type(); 00663 else 00664 { 00665 if( !haveScalar && type1 != type2 ) 00666 CV_Error(CV_StsBadArg, 00667 "When the input arrays in add/subtract/multiply/divide functions have different types, " 00668 "the output array type must be explicitly specified"); 00669 dtype = type1; 00670 } 00671 } 00672 dtype = CV_MAT_DEPTH(dtype); 00673 00674 if( depth1 == depth2 && dtype == depth1 ) 00675 wtype = dtype; 00676 else if( !muldiv ) 00677 { 00678 wtype = depth1 <= CV_8S && depth2 <= CV_8S ? CV_16S : 00679 depth1 <= CV_32S && depth2 <= CV_32S ? CV_32S : std::max(depth1, depth2); 00680 wtype = std::max(wtype, dtype); 00681 00682 // when the result of addition should be converted to an integer type, 00683 // and just one of the input arrays is floating-point, it makes sense to convert that input to integer type before the operation, 00684 // instead of converting the other input to floating-point and then converting the operation result back to integers. 00685 if( dtype < CV_32F && (depth1 < CV_32F || depth2 < CV_32F) ) 00686 wtype = CV_32S; 00687 } 00688 else 00689 { 00690 wtype = std::max(depth1, std::max(depth2, CV_32F)); 00691 wtype = std::max(wtype, dtype); 00692 } 00693 00694 dtype = CV_MAKETYPE(dtype, cn); 00695 wtype = CV_MAKETYPE(wtype, cn); 00696 00697 if( haveMask ) 00698 { 00699 int mtype = _mask.type(); 00700 CV_Assert( (mtype == CV_8UC1 || mtype == CV_8SC1) && _mask.sameSize(*psrc1) ); 00701 reallocate = !_dst.sameSize(*psrc1) || _dst.type() != dtype; 00702 } 00703 00704 _dst.createSameSize(*psrc1, dtype); 00705 if( reallocate ) 00706 _dst.setTo(0.); 00707 00708 #ifdef HAVE_OPENCL 00709 CV_OCL_RUN(use_opencl, 00710 ocl_arithm_op(*psrc1, *psrc2, _dst, _mask, wtype, 00711 usrdata, oclop, haveScalar)) 00712 #endif 00713 00714 BinaryFunc cvtsrc1 = type1 == wtype ? 0 : getConvertFunc(type1, wtype); 00715 BinaryFunc cvtsrc2 = type2 == type1 ? cvtsrc1 : type2 == wtype ? 0 : getConvertFunc(type2, wtype); 00716 BinaryFunc cvtdst = dtype == wtype ? 0 : getConvertFunc(wtype, dtype); 00717 00718 size_t esz1 = CV_ELEM_SIZE(type1), esz2 = CV_ELEM_SIZE(type2); 00719 size_t dsz = CV_ELEM_SIZE(dtype), wsz = CV_ELEM_SIZE(wtype); 00720 size_t blocksize0 = (size_t)(BLOCK_SIZE + wsz-1)/wsz; 00721 BinaryFunc copymask = getCopyMaskFunc(dsz); 00722 Mat src1 = psrc1->getMat(), src2 = psrc2->getMat(), dst = _dst.getMat(), mask = _mask.getMat(); 00723 00724 AutoBuffer<uchar> _buf; 00725 uchar *buf, *maskbuf = 0, *buf1 = 0, *buf2 = 0, *wbuf = 0; 00726 size_t bufesz = (cvtsrc1 ? wsz : 0) + 00727 (cvtsrc2 || haveScalar ? wsz : 0) + 00728 (cvtdst ? wsz : 0) + 00729 (haveMask ? dsz : 0); 00730 BinaryFuncC func = tab[CV_MAT_DEPTH(wtype)]; 00731 00732 if( !haveScalar ) 00733 { 00734 const Mat* arrays[] = { &src1, &src2, &dst, &mask, 0 }; 00735 uchar* ptrs[4]; 00736 00737 NAryMatIterator it(arrays, ptrs); 00738 size_t total = it.size, blocksize = total; 00739 00740 if( haveMask || cvtsrc1 || cvtsrc2 || cvtdst ) 00741 blocksize = std::min(blocksize, blocksize0); 00742 00743 _buf.allocate(bufesz*blocksize + 64); 00744 buf = _buf; 00745 if( cvtsrc1 ) 00746 buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16); 00747 if( cvtsrc2 ) 00748 buf2 = buf, buf = alignPtr(buf + blocksize*wsz, 16); 00749 wbuf = maskbuf = buf; 00750 if( cvtdst ) 00751 buf = alignPtr(buf + blocksize*wsz, 16); 00752 if( haveMask ) 00753 maskbuf = buf; 00754 00755 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00756 { 00757 for( size_t j = 0; j < total; j += blocksize ) 00758 { 00759 int bsz = (int)MIN(total - j, blocksize); 00760 Size bszn(bsz*cn, 1); 00761 const uchar *sptr1 = ptrs[0], *sptr2 = ptrs[1]; 00762 uchar* dptr = ptrs[2]; 00763 if( cvtsrc1 ) 00764 { 00765 cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); 00766 sptr1 = buf1; 00767 } 00768 if( ptrs[0] == ptrs[1] ) 00769 sptr2 = sptr1; 00770 else if( cvtsrc2 ) 00771 { 00772 cvtsrc2( sptr2, 1, 0, 1, buf2, 1, bszn, 0 ); 00773 sptr2 = buf2; 00774 } 00775 00776 if( !haveMask && !cvtdst ) 00777 func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata ); 00778 else 00779 { 00780 func( sptr1, 1, sptr2, 1, wbuf, 0, bszn.width, bszn.height, usrdata ); 00781 if( !haveMask ) 00782 cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 ); 00783 else if( !cvtdst ) 00784 { 00785 copymask( wbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz ); 00786 ptrs[3] += bsz; 00787 } 00788 else 00789 { 00790 cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 ); 00791 copymask( maskbuf, 1, ptrs[3], 1, dptr, 1, Size(bsz, 1), &dsz ); 00792 ptrs[3] += bsz; 00793 } 00794 } 00795 ptrs[0] += bsz*esz1; ptrs[1] += bsz*esz2; ptrs[2] += bsz*dsz; 00796 } 00797 } 00798 } 00799 else 00800 { 00801 const Mat* arrays[] = { &src1, &dst, &mask, 0 }; 00802 uchar* ptrs[3]; 00803 00804 NAryMatIterator it(arrays, ptrs); 00805 size_t total = it.size, blocksize = std::min(total, blocksize0); 00806 00807 _buf.allocate(bufesz*blocksize + 64); 00808 buf = _buf; 00809 if( cvtsrc1 ) 00810 buf1 = buf, buf = alignPtr(buf + blocksize*wsz, 16); 00811 buf2 = buf; buf = alignPtr(buf + blocksize*wsz, 16); 00812 wbuf = maskbuf = buf; 00813 if( cvtdst ) 00814 buf = alignPtr(buf + blocksize*wsz, 16); 00815 if( haveMask ) 00816 maskbuf = buf; 00817 00818 convertAndUnrollScalar( src2, wtype, buf2, blocksize); 00819 00820 for( size_t i = 0; i < it.nplanes; i++, ++it ) 00821 { 00822 for( size_t j = 0; j < total; j += blocksize ) 00823 { 00824 int bsz = (int)MIN(total - j, blocksize); 00825 Size bszn(bsz*cn, 1); 00826 const uchar *sptr1 = ptrs[0]; 00827 const uchar* sptr2 = buf2; 00828 uchar* dptr = ptrs[1]; 00829 00830 if( cvtsrc1 ) 00831 { 00832 cvtsrc1( sptr1, 1, 0, 1, buf1, 1, bszn, 0 ); 00833 sptr1 = buf1; 00834 } 00835 00836 if( swapped12 ) 00837 std::swap(sptr1, sptr2); 00838 00839 if( !haveMask && !cvtdst ) 00840 func( sptr1, 1, sptr2, 1, dptr, 1, bszn.width, bszn.height, usrdata ); 00841 else 00842 { 00843 func( sptr1, 1, sptr2, 1, wbuf, 1, bszn.width, bszn.height, usrdata ); 00844 if( !haveMask ) 00845 cvtdst( wbuf, 1, 0, 1, dptr, 1, bszn, 0 ); 00846 else if( !cvtdst ) 00847 { 00848 copymask( wbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz ); 00849 ptrs[2] += bsz; 00850 } 00851 else 00852 { 00853 cvtdst( wbuf, 1, 0, 1, maskbuf, 1, bszn, 0 ); 00854 copymask( maskbuf, 1, ptrs[2], 1, dptr, 1, Size(bsz, 1), &dsz ); 00855 ptrs[2] += bsz; 00856 } 00857 } 00858 ptrs[0] += bsz*esz1; ptrs[1] += bsz*dsz; 00859 } 00860 } 00861 } 00862 } 00863 00864 static BinaryFuncC* getAddTab() 00865 { 00866 static BinaryFuncC addTab[] = 00867 { 00868 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add8s), 00869 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::add16s), 00870 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32s), 00871 (BinaryFuncC)GET_OPTIMIZED(cv::hal::add32f), (BinaryFuncC)cv::hal::add64f, 00872 0 00873 }; 00874 00875 return addTab; 00876 } 00877 00878 static BinaryFuncC* getSubTab() 00879 { 00880 static BinaryFuncC subTab[] = 00881 { 00882 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub8s), 00883 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub16s), 00884 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32s), 00885 (BinaryFuncC)GET_OPTIMIZED(cv::hal::sub32f), (BinaryFuncC)cv::hal::sub64f, 00886 0 00887 }; 00888 00889 return subTab; 00890 } 00891 00892 static BinaryFuncC* getAbsDiffTab() 00893 { 00894 static BinaryFuncC absDiffTab[] = 00895 { 00896 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff8s), 00897 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff16s), 00898 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32s), 00899 (BinaryFuncC)GET_OPTIMIZED(cv::hal::absdiff32f), (BinaryFuncC)cv::hal::absdiff64f, 00900 0 00901 }; 00902 00903 return absDiffTab; 00904 } 00905 00906 } 00907 00908 void cv::add( InputArray src1, InputArray src2, OutputArray dst, 00909 InputArray mask, int dtype ) 00910 { 00911 arithm_op(src1, src2, dst, mask, dtype, getAddTab(), false, 0, OCL_OP_ADD ); 00912 } 00913 00914 void cv::subtract( InputArray _src1, InputArray _src2, OutputArray _dst, 00915 InputArray mask, int dtype ) 00916 { 00917 #ifdef HAVE_TEGRA_OPTIMIZATION 00918 if (tegra::useTegra()) 00919 { 00920 int kind1 = _src1.kind(), kind2 = _src2.kind(); 00921 Mat src1 = _src1.getMat(), src2 = _src2.getMat(); 00922 bool src1Scalar = checkScalar(src1, _src2.type(), kind1, kind2); 00923 bool src2Scalar = checkScalar(src2, _src1.type(), kind2, kind1); 00924 00925 if (!src1Scalar && !src2Scalar && 00926 src1.depth() == CV_8U && src2.type() == src1.type() && 00927 src1.dims == 2 && src2.size() == src1.size() && 00928 mask.empty()) 00929 { 00930 if (dtype < 0) 00931 { 00932 if (_dst.fixedType()) 00933 { 00934 dtype = _dst.depth(); 00935 } 00936 else 00937 { 00938 dtype = src1.depth(); 00939 } 00940 } 00941 00942 dtype = CV_MAT_DEPTH(dtype); 00943 00944 if (!_dst.fixedType() || dtype == _dst.depth()) 00945 { 00946 _dst.create(src1.size(), CV_MAKE_TYPE(dtype, src1.channels())); 00947 00948 if (dtype == CV_16S) 00949 { 00950 Mat dst = _dst.getMat(); 00951 if(tegra::subtract_8u8u16s(src1, src2, dst)) 00952 return; 00953 } 00954 else if (dtype == CV_32F) 00955 { 00956 Mat dst = _dst.getMat(); 00957 if(tegra::subtract_8u8u32f(src1, src2, dst)) 00958 return; 00959 } 00960 else if (dtype == CV_8S) 00961 { 00962 Mat dst = _dst.getMat(); 00963 if(tegra::subtract_8u8u8s(src1, src2, dst)) 00964 return; 00965 } 00966 } 00967 } 00968 } 00969 #endif 00970 arithm_op(_src1, _src2, _dst, mask, dtype, getSubTab(), false, 0, OCL_OP_SUB ); 00971 } 00972 00973 void cv::absdiff( InputArray src1, InputArray src2, OutputArray dst ) 00974 { 00975 arithm_op(src1, src2, dst, noArray(), -1, getAbsDiffTab(), false, 0, OCL_OP_ABSDIFF); 00976 } 00977 00978 /****************************************************************************************\ 00979 * multiply/divide * 00980 \****************************************************************************************/ 00981 00982 namespace cv 00983 { 00984 00985 static BinaryFuncC* getMulTab() 00986 { 00987 static BinaryFuncC mulTab[] = 00988 { 00989 (BinaryFuncC)cv::hal::mul8u, (BinaryFuncC)cv::hal::mul8s, (BinaryFuncC)cv::hal::mul16u, 00990 (BinaryFuncC)cv::hal::mul16s, (BinaryFuncC)cv::hal::mul32s, (BinaryFuncC)cv::hal::mul32f, 00991 (BinaryFuncC)cv::hal::mul64f, 0 00992 }; 00993 00994 return mulTab; 00995 } 00996 00997 static BinaryFuncC* getDivTab() 00998 { 00999 static BinaryFuncC divTab[] = 01000 { 01001 (BinaryFuncC)cv::hal::div8u, (BinaryFuncC)cv::hal::div8s, (BinaryFuncC)cv::hal::div16u, 01002 (BinaryFuncC)cv::hal::div16s, (BinaryFuncC)cv::hal::div32s, (BinaryFuncC)cv::hal::div32f, 01003 (BinaryFuncC)cv::hal::div64f, 0 01004 }; 01005 01006 return divTab; 01007 } 01008 01009 static BinaryFuncC* getRecipTab() 01010 { 01011 static BinaryFuncC recipTab[] = 01012 { 01013 (BinaryFuncC)cv::hal::recip8u, (BinaryFuncC)cv::hal::recip8s, (BinaryFuncC)cv::hal::recip16u, 01014 (BinaryFuncC)cv::hal::recip16s, (BinaryFuncC)cv::hal::recip32s, (BinaryFuncC)cv::hal::recip32f, 01015 (BinaryFuncC)cv::hal::recip64f, 0 01016 }; 01017 01018 return recipTab; 01019 } 01020 01021 } 01022 01023 void cv::multiply(InputArray src1, InputArray src2, 01024 OutputArray dst, double scale, int dtype) 01025 { 01026 arithm_op(src1, src2, dst, noArray(), dtype, getMulTab(), 01027 true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE); 01028 } 01029 01030 void cv::divide(InputArray src1, InputArray src2, 01031 OutputArray dst, double scale, int dtype) 01032 { 01033 arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE); 01034 } 01035 01036 void cv::divide(double scale, InputArray src2, 01037 OutputArray dst, int dtype) 01038 { 01039 arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE); 01040 } 01041 01042 /****************************************************************************************\ 01043 * addWeighted * 01044 \****************************************************************************************/ 01045 01046 namespace cv 01047 { 01048 01049 static BinaryFuncC* getAddWeightedTab() 01050 { 01051 static BinaryFuncC addWeightedTab[] = 01052 { 01053 (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted8s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16u), 01054 (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted16s), (BinaryFuncC)GET_OPTIMIZED(cv::hal::addWeighted32s), (BinaryFuncC)cv::hal::addWeighted32f, 01055 (BinaryFuncC)cv::hal::addWeighted64f, 0 01056 }; 01057 01058 return addWeightedTab; 01059 } 01060 01061 } 01062 01063 void cv::addWeighted( InputArray src1, double alpha, InputArray src2, 01064 double beta, double gamma, OutputArray dst, int dtype ) 01065 { 01066 double scalars[] = {alpha, beta, gamma}; 01067 arithm_op(src1, src2, dst, noArray(), dtype, getAddWeightedTab(), true, scalars, OCL_OP_ADDW); 01068 } 01069 01070 01071 /****************************************************************************************\ 01072 * compare * 01073 \****************************************************************************************/ 01074 01075 namespace cv 01076 { 01077 01078 static BinaryFuncC getCmpFunc(int depth) 01079 { 01080 static BinaryFuncC cmpTab[] = 01081 { 01082 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp8s), 01083 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16u), (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp16s), 01084 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32s), 01085 (BinaryFuncC)GET_OPTIMIZED(cv::hal::cmp32f), (BinaryFuncC)cv::hal::cmp64f, 01086 0 01087 }; 01088 01089 return cmpTab[depth]; 01090 } 01091 01092 static double getMinVal(int depth) 01093 { 01094 static const double tab[] = {0, -128, 0, -32768, INT_MIN, -FLT_MAX, -DBL_MAX, 0}; 01095 return tab[depth]; 01096 } 01097 01098 static double getMaxVal(int depth) 01099 { 01100 static const double tab[] = {255, 127, 65535, 32767, INT_MAX, FLT_MAX, DBL_MAX, 0}; 01101 return tab[depth]; 01102 } 01103 01104 #ifdef HAVE_OPENCL 01105 01106 static bool ocl_compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op, bool haveScalar) 01107 { 01108 const ocl::Device& dev = ocl::Device::getDefault(); 01109 bool doubleSupport = dev.doubleFPConfig() > 0; 01110 int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1), 01111 type2 = _src2.type(), depth2 = CV_MAT_DEPTH(type2); 01112 01113 if (!doubleSupport && depth1 == CV_64F) 01114 return false; 01115 01116 if (!haveScalar && (!_src1.sameSize(_src2) || type1 != type2)) 01117 return false; 01118 01119 int kercn = haveScalar ? cn : ocl::predictOptimalVectorWidth(_src1, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1; 01120 // Workaround for bug with "?:" operator in AMD OpenCL compiler 01121 if (depth1 >= CV_16U) 01122 kercn = 1; 01123 01124 int scalarcn = kercn == 3 ? 4 : kercn; 01125 const char * const operationMap[] = { "==", ">", ">=", "<", "<=", "!=" }; 01126 char cvt[40]; 01127 01128 String opts = format("-D %s -D srcT1=%s -D dstT=%s -D workT=srcT1 -D cn=%d" 01129 " -D convertToDT=%s -D OP_CMP -D CMP_OPERATOR=%s -D srcT1_C1=%s" 01130 " -D srcT2_C1=%s -D dstT_C1=%s -D workST=%s -D rowsPerWI=%d%s", 01131 haveScalar ? "UNARY_OP" : "BINARY_OP", 01132 ocl::typeToStr(CV_MAKE_TYPE(depth1, kercn)), 01133 ocl::typeToStr(CV_8UC(kercn)), kercn, 01134 ocl::convertTypeStr(depth1, CV_8U, kercn, cvt), 01135 operationMap[op], ocl::typeToStr(depth1), 01136 ocl::typeToStr(depth1), ocl::typeToStr(CV_8U), 01137 ocl::typeToStr(CV_MAKE_TYPE(depth1, scalarcn)), rowsPerWI, 01138 doubleSupport ? " -D DOUBLE_SUPPORT" : ""); 01139 01140 ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); 01141 if (k.empty()) 01142 return false; 01143 01144 UMat src1 = _src1.getUMat(); 01145 Size size = src1.size(); 01146 _dst.create(size, CV_8UC(cn)); 01147 UMat dst = _dst.getUMat(); 01148 01149 if (haveScalar) 01150 { 01151 size_t esz = CV_ELEM_SIZE1(type1) * scalarcn; 01152 double buf[4] = { 0, 0, 0, 0 }; 01153 Mat src2 = _src2.getMat(); 01154 01155 if( depth1 > CV_32S ) 01156 convertAndUnrollScalar( src2, depth1, (uchar *)buf, kercn ); 01157 else 01158 { 01159 double fval = 0; 01160 getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar *)&fval, 1, Size(1, 1), 0); 01161 if( fval < getMinVal(depth1) ) 01162 return dst.setTo(Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0)), true; 01163 01164 if( fval > getMaxVal(depth1) ) 01165 return dst.setTo(Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0)), true; 01166 01167 int ival = cvRound(fval); 01168 if( fval != ival ) 01169 { 01170 if( op == CMP_LT || op == CMP_GE ) 01171 ival = cvCeil(fval); 01172 else if( op == CMP_LE || op == CMP_GT ) 01173 ival = cvFloor(fval); 01174 else 01175 return dst.setTo(Scalar::all(op == CMP_NE ? 255 : 0)), true; 01176 } 01177 convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, (uchar *)buf, kercn); 01178 } 01179 01180 ocl::KernelArg scalararg = ocl::KernelArg(0, 0, 0, 0, buf, esz); 01181 01182 k.args(ocl::KernelArg::ReadOnlyNoSize(src1, cn, kercn), 01183 ocl::KernelArg::WriteOnly(dst, cn, kercn), scalararg); 01184 } 01185 else 01186 { 01187 UMat src2 = _src2.getUMat(); 01188 01189 k.args(ocl::KernelArg::ReadOnlyNoSize(src1), 01190 ocl::KernelArg::ReadOnlyNoSize(src2), 01191 ocl::KernelArg::WriteOnly(dst, cn, kercn)); 01192 } 01193 01194 size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, ((size_t)dst.rows + rowsPerWI - 1) / rowsPerWI }; 01195 return k.run(2, globalsize, NULL, false); 01196 } 01197 01198 #endif 01199 01200 } 01201 01202 void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op) 01203 { 01204 CV_Assert( op == CMP_LT || op == CMP_LE || op == CMP_EQ || 01205 op == CMP_NE || op == CMP_GE || op == CMP_GT ); 01206 01207 bool haveScalar = false; 01208 01209 if ((_src1.isMatx() + _src2.isMatx()) == 1 01210 || !_src1.sameSize(_src2) 01211 || _src1.type() != _src2.type()) 01212 { 01213 if (checkScalar(_src1, _src2.type(), _src1.kind(), _src2.kind())) 01214 { 01215 op = op == CMP_LT ? CMP_GT : op == CMP_LE ? CMP_GE : 01216 op == CMP_GE ? CMP_LE : op == CMP_GT ? CMP_LT : op; 01217 // src1 is a scalar; swap it with src2 01218 compare(_src2, _src1, _dst, op); 01219 return; 01220 } 01221 else if( !checkScalar(_src2, _src1.type(), _src2.kind(), _src1.kind()) ) 01222 CV_Error( CV_StsUnmatchedSizes, 01223 "The operation is neither 'array op array' (where arrays have the same size and the same type), " 01224 "nor 'array op scalar', nor 'scalar op array'" ); 01225 haveScalar = true; 01226 } 01227 01228 #ifdef HAVE_OPENCL 01229 CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()), 01230 ocl_compare(_src1, _src2, _dst, op, haveScalar)) 01231 #endif 01232 01233 int kind1 = _src1.kind(), kind2 = _src2.kind(); 01234 Mat src1 = _src1.getMat(), src2 = _src2.getMat(); 01235 01236 if( kind1 == kind2 && src1.dims <= 2 && src2.dims <= 2 && src1.size() == src2.size() && src1.type() == src2.type() ) 01237 { 01238 int cn = src1.channels(); 01239 _dst.create(src1.size(), CV_8UC(cn)); 01240 Mat dst = _dst.getMat(); 01241 Size sz = getContinuousSize(src1, src2, dst, src1.channels()); 01242 getCmpFunc(src1.depth())(src1.ptr(), src1.step, src2.ptr(), src2.step, dst.ptr(), dst.step, sz.width, sz.height, &op); 01243 return; 01244 } 01245 01246 int cn = src1.channels(), depth1 = src1.depth(), depth2 = src2.depth(); 01247 01248 _dst.create(src1.dims, src1.size, CV_8UC(cn)); 01249 src1 = src1.reshape(1); src2 = src2.reshape(1); 01250 Mat dst = _dst.getMat().reshape(1); 01251 01252 size_t esz = src1.elemSize(); 01253 size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz; 01254 BinaryFuncC func = getCmpFunc(depth1); 01255 01256 if( !haveScalar ) 01257 { 01258 const Mat* arrays[] = { &src1, &src2, &dst, 0 }; 01259 uchar* ptrs[3]; 01260 01261 NAryMatIterator it(arrays, ptrs); 01262 size_t total = it.size; 01263 01264 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01265 func( ptrs[0], 0, ptrs[1], 0, ptrs[2], 0, (int)total, 1, &op ); 01266 } 01267 else 01268 { 01269 const Mat* arrays[] = { &src1, &dst, 0 }; 01270 uchar* ptrs[2]; 01271 01272 NAryMatIterator it(arrays, ptrs); 01273 size_t total = it.size, blocksize = std::min(total, blocksize0); 01274 01275 AutoBuffer<uchar> _buf(blocksize*esz); 01276 uchar *buf = _buf; 01277 01278 if( depth1 > CV_32S ) 01279 convertAndUnrollScalar( src2, depth1, buf, blocksize ); 01280 else 01281 { 01282 double fval=0; 01283 getConvertFunc(depth2, CV_64F)(src2.ptr(), 1, 0, 1, (uchar*)&fval, 1, Size(1,1), 0); 01284 if( fval < getMinVal(depth1) ) 01285 { 01286 dst = Scalar::all(op == CMP_GT || op == CMP_GE || op == CMP_NE ? 255 : 0); 01287 return; 01288 } 01289 01290 if( fval > getMaxVal(depth1) ) 01291 { 01292 dst = Scalar::all(op == CMP_LT || op == CMP_LE || op == CMP_NE ? 255 : 0); 01293 return; 01294 } 01295 01296 int ival = cvRound(fval); 01297 if( fval != ival ) 01298 { 01299 if( op == CMP_LT || op == CMP_GE ) 01300 ival = cvCeil(fval); 01301 else if( op == CMP_LE || op == CMP_GT ) 01302 ival = cvFloor(fval); 01303 else 01304 { 01305 dst = Scalar::all(op == CMP_NE ? 255 : 0); 01306 return; 01307 } 01308 } 01309 convertAndUnrollScalar(Mat(1, 1, CV_32S, &ival), depth1, buf, blocksize); 01310 } 01311 01312 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01313 { 01314 for( size_t j = 0; j < total; j += blocksize ) 01315 { 01316 int bsz = (int)MIN(total - j, blocksize); 01317 func( ptrs[0], 0, buf, 0, ptrs[1], 0, bsz, 1, &op); 01318 ptrs[0] += bsz*esz; 01319 ptrs[1] += bsz; 01320 } 01321 } 01322 } 01323 } 01324 01325 /****************************************************************************************\ 01326 * inRange * 01327 \****************************************************************************************/ 01328 01329 namespace cv 01330 { 01331 01332 template <typename T> 01333 struct InRange_SIMD 01334 { 01335 int operator () (const T *, const T *, const T *, uchar *, int) const 01336 { 01337 return 0; 01338 } 01339 }; 01340 01341 #if CV_SSE2 01342 01343 template <> 01344 struct InRange_SIMD<uchar> 01345 { 01346 int operator () (const uchar * src1, const uchar * src2, const uchar * src3, 01347 uchar * dst, int len) const 01348 { 01349 int x = 0; 01350 01351 if (USE_SSE2) 01352 { 01353 __m128i v_full = _mm_set1_epi8(-1), v_128 = _mm_set1_epi8(-128); 01354 01355 for ( ; x <= len - 16; x += 16 ) 01356 { 01357 __m128i v_src = _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src1 + x)), v_128); 01358 __m128i v_mask1 = _mm_cmpgt_epi8(_mm_add_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_128), v_src); 01359 __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_add_epi8(_mm_loadu_si128((const __m128i *)(src3 + x)), v_128)); 01360 _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full)); 01361 } 01362 } 01363 01364 return x; 01365 } 01366 }; 01367 01368 template <> 01369 struct InRange_SIMD<schar> 01370 { 01371 int operator () (const schar * src1, const schar * src2, const schar * src3, 01372 uchar * dst, int len) const 01373 { 01374 int x = 0; 01375 01376 if (USE_SSE2) 01377 { 01378 __m128i v_full = _mm_set1_epi8(-1); 01379 01380 for ( ; x <= len - 16; x += 16 ) 01381 { 01382 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x)); 01383 __m128i v_mask1 = _mm_cmpgt_epi8(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src); 01384 __m128i v_mask2 = _mm_cmpgt_epi8(v_src, _mm_loadu_si128((const __m128i *)(src3 + x))); 01385 _mm_storeu_si128((__m128i *)(dst + x), _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full)); 01386 } 01387 } 01388 01389 return x; 01390 } 01391 }; 01392 01393 template <> 01394 struct InRange_SIMD<ushort> 01395 { 01396 int operator () (const ushort * src1, const ushort * src2, const ushort * src3, 01397 uchar * dst, int len) const 01398 { 01399 int x = 0; 01400 01401 if (USE_SSE2) 01402 { 01403 __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1), v_32768 = _mm_set1_epi16(-32768); 01404 01405 for ( ; x <= len - 8; x += 8 ) 01406 { 01407 __m128i v_src = _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src1 + x)), v_32768); 01408 __m128i v_mask1 = _mm_cmpgt_epi16(_mm_add_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_32768), v_src); 01409 __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_add_epi16(_mm_loadu_si128((const __m128i *)(src3 + x)), v_32768)); 01410 __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full); 01411 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero)); 01412 } 01413 } 01414 01415 return x; 01416 } 01417 }; 01418 01419 template <> 01420 struct InRange_SIMD<short> 01421 { 01422 int operator () (const short * src1, const short * src2, const short * src3, 01423 uchar * dst, int len) const 01424 { 01425 int x = 0; 01426 01427 if (USE_SSE2) 01428 { 01429 __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi16(-1); 01430 01431 for ( ; x <= len - 8; x += 8 ) 01432 { 01433 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x)); 01434 __m128i v_mask1 = _mm_cmpgt_epi16(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src); 01435 __m128i v_mask2 = _mm_cmpgt_epi16(v_src, _mm_loadu_si128((const __m128i *)(src3 + x))); 01436 __m128i v_res = _mm_andnot_si128(_mm_or_si128(v_mask1, v_mask2), v_full); 01437 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(_mm_srli_epi16(v_res, 8), v_zero)); 01438 } 01439 } 01440 01441 return x; 01442 } 01443 }; 01444 01445 template <> 01446 struct InRange_SIMD<int> 01447 { 01448 int operator () (const int * src1, const int * src2, const int * src3, 01449 uchar * dst, int len) const 01450 { 01451 int x = 0; 01452 01453 if (USE_SSE2) 01454 { 01455 __m128i v_zero = _mm_setzero_si128(), v_full = _mm_set1_epi32(-1); 01456 01457 for ( ; x <= len - 8; x += 8 ) 01458 { 01459 __m128i v_src = _mm_loadu_si128((const __m128i *)(src1 + x)); 01460 __m128i v_res1 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x)), v_src), 01461 _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x)))); 01462 01463 v_src = _mm_loadu_si128((const __m128i *)(src1 + x + 4)); 01464 __m128i v_res2 = _mm_or_si128(_mm_cmpgt_epi32(_mm_loadu_si128((const __m128i *)(src2 + x + 4)), v_src), 01465 _mm_cmpgt_epi32(v_src, _mm_loadu_si128((const __m128i *)(src3 + x + 4)))); 01466 01467 __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(_mm_andnot_si128(v_res1, v_full), 16), 01468 _mm_srli_epi32(_mm_andnot_si128(v_res2, v_full), 16)); 01469 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero)); 01470 } 01471 } 01472 01473 return x; 01474 } 01475 }; 01476 01477 template <> 01478 struct InRange_SIMD<float> 01479 { 01480 int operator () (const float * src1, const float * src2, const float * src3, 01481 uchar * dst, int len) const 01482 { 01483 int x = 0; 01484 01485 if (USE_SSE2) 01486 { 01487 __m128i v_zero = _mm_setzero_si128(); 01488 01489 for ( ; x <= len - 8; x += 8 ) 01490 { 01491 __m128 v_src = _mm_loadu_ps(src1 + x); 01492 __m128 v_res1 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x), v_src), 01493 _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x))); 01494 01495 v_src = _mm_loadu_ps(src1 + x + 4); 01496 __m128 v_res2 = _mm_and_ps(_mm_cmple_ps(_mm_loadu_ps(src2 + x + 4), v_src), 01497 _mm_cmple_ps(v_src, _mm_loadu_ps(src3 + x + 4))); 01498 01499 __m128i v_res1i = _mm_cvtps_epi32(v_res1), v_res2i = _mm_cvtps_epi32(v_res2); 01500 __m128i v_res = _mm_packs_epi32(_mm_srli_epi32(v_res1i, 16), _mm_srli_epi32(v_res2i, 16)); 01501 _mm_storel_epi64((__m128i *)(dst + x), _mm_packus_epi16(v_res, v_zero)); 01502 } 01503 } 01504 01505 return x; 01506 } 01507 }; 01508 01509 #elif CV_NEON 01510 01511 template <> 01512 struct InRange_SIMD<uchar> 01513 { 01514 int operator () (const uchar * src1, const uchar * src2, const uchar * src3, 01515 uchar * dst, int len) const 01516 { 01517 int x = 0; 01518 01519 for ( ; x <= len - 16; x += 16 ) 01520 { 01521 uint8x16_t values = vld1q_u8(src1 + x); 01522 uint8x16_t low = vld1q_u8(src2 + x); 01523 uint8x16_t high = vld1q_u8(src3 + x); 01524 01525 vst1q_u8(dst + x, vandq_u8(vcgeq_u8(values, low), vcgeq_u8(high, values))); 01526 } 01527 return x; 01528 } 01529 }; 01530 01531 template <> 01532 struct InRange_SIMD<schar> 01533 { 01534 int operator () (const schar * src1, const schar * src2, const schar * src3, 01535 uchar * dst, int len) const 01536 { 01537 int x = 0; 01538 01539 for ( ; x <= len - 16; x += 16 ) 01540 { 01541 int8x16_t values = vld1q_s8(src1 + x); 01542 int8x16_t low = vld1q_s8(src2 + x); 01543 int8x16_t high = vld1q_s8(src3 + x); 01544 01545 vst1q_u8(dst + x, vandq_u8(vcgeq_s8(values, low), vcgeq_s8(high, values))); 01546 } 01547 return x; 01548 } 01549 }; 01550 01551 template <> 01552 struct InRange_SIMD<ushort> 01553 { 01554 int operator () (const ushort * src1, const ushort * src2, const ushort * src3, 01555 uchar * dst, int len) const 01556 { 01557 int x = 0; 01558 01559 for ( ; x <= len - 16; x += 16 ) 01560 { 01561 uint16x8_t values = vld1q_u16((const uint16_t*)(src1 + x)); 01562 uint16x8_t low = vld1q_u16((const uint16_t*)(src2 + x)); 01563 uint16x8_t high = vld1q_u16((const uint16_t*)(src3 + x)); 01564 uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values))); 01565 01566 values = vld1q_u16((const uint16_t*)(src1 + x + 8)); 01567 low = vld1q_u16((const uint16_t*)(src2 + x + 8)); 01568 high = vld1q_u16((const uint16_t*)(src3 + x + 8)); 01569 uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values))); 01570 01571 vst1q_u8(dst + x, vcombine_u8(r1, r2)); 01572 } 01573 return x; 01574 } 01575 }; 01576 01577 template <> 01578 struct InRange_SIMD<short> 01579 { 01580 int operator () (const short * src1, const short * src2, const short * src3, 01581 uchar * dst, int len) const 01582 { 01583 int x = 0; 01584 01585 for ( ; x <= len - 16; x += 16 ) 01586 { 01587 int16x8_t values = vld1q_s16((const int16_t*)(src1 + x)); 01588 int16x8_t low = vld1q_s16((const int16_t*)(src2 + x)); 01589 int16x8_t high = vld1q_s16((const int16_t*)(src3 + x)); 01590 uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values))); 01591 01592 values = vld1q_s16((const int16_t*)(src1 + x + 8)); 01593 low = vld1q_s16((const int16_t*)(src2 + x + 8)); 01594 high = vld1q_s16((const int16_t*)(src3 + x + 8)); 01595 uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values))); 01596 01597 vst1q_u8(dst + x, vcombine_u8(r1, r2)); 01598 } 01599 return x; 01600 } 01601 }; 01602 01603 template <> 01604 struct InRange_SIMD<int> 01605 { 01606 int operator () (const int * src1, const int * src2, const int * src3, 01607 uchar * dst, int len) const 01608 { 01609 int x = 0; 01610 01611 for ( ; x <= len - 8; x += 8 ) 01612 { 01613 int32x4_t values = vld1q_s32((const int32_t*)(src1 + x)); 01614 int32x4_t low = vld1q_s32((const int32_t*)(src2 + x)); 01615 int32x4_t high = vld1q_s32((const int32_t*)(src3 + x)); 01616 01617 uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values))); 01618 01619 values = vld1q_s32((const int32_t*)(src1 + x + 4)); 01620 low = vld1q_s32((const int32_t*)(src2 + x + 4)); 01621 high = vld1q_s32((const int32_t*)(src3 + x + 4)); 01622 01623 uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values))); 01624 01625 uint16x8_t res_16 = vcombine_u16(r1, r2); 01626 01627 vst1_u8(dst + x, vmovn_u16(res_16)); 01628 } 01629 return x; 01630 } 01631 }; 01632 01633 template <> 01634 struct InRange_SIMD<float> 01635 { 01636 int operator () (const float * src1, const float * src2, const float * src3, 01637 uchar * dst, int len) const 01638 { 01639 int x = 0; 01640 01641 for ( ; x <= len - 8; x += 8 ) 01642 { 01643 float32x4_t values = vld1q_f32((const float32_t*)(src1 + x)); 01644 float32x4_t low = vld1q_f32((const float32_t*)(src2 + x)); 01645 float32x4_t high = vld1q_f32((const float32_t*)(src3 + x)); 01646 01647 uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values))); 01648 01649 values = vld1q_f32((const float32_t*)(src1 + x + 4)); 01650 low = vld1q_f32((const float32_t*)(src2 + x + 4)); 01651 high = vld1q_f32((const float32_t*)(src3 + x + 4)); 01652 01653 uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values))); 01654 01655 uint16x8_t res_16 = vcombine_u16(r1, r2); 01656 01657 vst1_u8(dst + x, vmovn_u16(res_16)); 01658 } 01659 return x; 01660 } 01661 }; 01662 01663 #endif 01664 01665 template <typename T> 01666 static void inRange_(const T* src1, size_t step1, const T* src2, size_t step2, 01667 const T* src3, size_t step3, uchar* dst, size_t step, 01668 Size size) 01669 { 01670 step1 /= sizeof(src1[0]); 01671 step2 /= sizeof(src2[0]); 01672 step3 /= sizeof(src3[0]); 01673 01674 InRange_SIMD<T> vop; 01675 01676 for( ; size.height--; src1 += step1, src2 += step2, src3 += step3, dst += step ) 01677 { 01678 int x = vop(src1, src2, src3, dst, size.width); 01679 #if CV_ENABLE_UNROLLED 01680 for( ; x <= size.width - 4; x += 4 ) 01681 { 01682 int t0, t1; 01683 t0 = src2[x] <= src1[x] && src1[x] <= src3[x]; 01684 t1 = src2[x+1] <= src1[x+1] && src1[x+1] <= src3[x+1]; 01685 dst[x] = (uchar)-t0; dst[x+1] = (uchar)-t1; 01686 t0 = src2[x+2] <= src1[x+2] && src1[x+2] <= src3[x+2]; 01687 t1 = src2[x+3] <= src1[x+3] && src1[x+3] <= src3[x+3]; 01688 dst[x+2] = (uchar)-t0; dst[x+3] = (uchar)-t1; 01689 } 01690 #endif 01691 for( ; x < size.width; x++ ) 01692 dst[x] = (uchar)-(src2[x] <= src1[x] && src1[x] <= src3[x]); 01693 } 01694 } 01695 01696 01697 static void inRange8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, 01698 const uchar* src3, size_t step3, uchar* dst, size_t step, Size size) 01699 { 01700 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01701 } 01702 01703 static void inRange8s(const schar* src1, size_t step1, const schar* src2, size_t step2, 01704 const schar* src3, size_t step3, uchar* dst, size_t step, Size size) 01705 { 01706 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01707 } 01708 01709 static void inRange16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, 01710 const ushort* src3, size_t step3, uchar* dst, size_t step, Size size) 01711 { 01712 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01713 } 01714 01715 static void inRange16s(const short* src1, size_t step1, const short* src2, size_t step2, 01716 const short* src3, size_t step3, uchar* dst, size_t step, Size size) 01717 { 01718 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01719 } 01720 01721 static void inRange32s(const int* src1, size_t step1, const int* src2, size_t step2, 01722 const int* src3, size_t step3, uchar* dst, size_t step, Size size) 01723 { 01724 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01725 } 01726 01727 static void inRange32f(const float* src1, size_t step1, const float* src2, size_t step2, 01728 const float* src3, size_t step3, uchar* dst, size_t step, Size size) 01729 { 01730 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01731 } 01732 01733 static void inRange64f(const double* src1, size_t step1, const double* src2, size_t step2, 01734 const double* src3, size_t step3, uchar* dst, size_t step, Size size) 01735 { 01736 inRange_(src1, step1, src2, step2, src3, step3, dst, step, size); 01737 } 01738 01739 static void inRangeReduce(const uchar* src, uchar* dst, size_t len, int cn) 01740 { 01741 int k = cn % 4 ? cn % 4 : 4; 01742 size_t i, j; 01743 if( k == 1 ) 01744 for( i = j = 0; i < len; i++, j += cn ) 01745 dst[i] = src[j]; 01746 else if( k == 2 ) 01747 for( i = j = 0; i < len; i++, j += cn ) 01748 dst[i] = src[j] & src[j+1]; 01749 else if( k == 3 ) 01750 for( i = j = 0; i < len; i++, j += cn ) 01751 dst[i] = src[j] & src[j+1] & src[j+2]; 01752 else 01753 for( i = j = 0; i < len; i++, j += cn ) 01754 dst[i] = src[j] & src[j+1] & src[j+2] & src[j+3]; 01755 01756 for( ; k < cn; k += 4 ) 01757 { 01758 for( i = 0, j = k; i < len; i++, j += cn ) 01759 dst[i] &= src[j] & src[j+1] & src[j+2] & src[j+3]; 01760 } 01761 } 01762 01763 typedef void (*InRangeFunc)( const uchar* src1, size_t step1, const uchar* src2, size_t step2, 01764 const uchar* src3, size_t step3, uchar* dst, size_t step, Size sz ); 01765 01766 static InRangeFunc getInRangeFunc(int depth) 01767 { 01768 static InRangeFunc inRangeTab[] = 01769 { 01770 (InRangeFunc)GET_OPTIMIZED(inRange8u), (InRangeFunc)GET_OPTIMIZED(inRange8s), (InRangeFunc)GET_OPTIMIZED(inRange16u), 01771 (InRangeFunc)GET_OPTIMIZED(inRange16s), (InRangeFunc)GET_OPTIMIZED(inRange32s), (InRangeFunc)GET_OPTIMIZED(inRange32f), 01772 (InRangeFunc)inRange64f, 0 01773 }; 01774 01775 return inRangeTab[depth]; 01776 } 01777 01778 #ifdef HAVE_OPENCL 01779 01780 static bool ocl_inRange( InputArray _src, InputArray _lowerb, 01781 InputArray _upperb, OutputArray _dst ) 01782 { 01783 const ocl::Device & d = ocl::Device::getDefault(); 01784 int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind(); 01785 Size ssize = _src.size(), lsize = _lowerb.size(), usize = _upperb.size(); 01786 int stype = _src.type(), ltype = _lowerb.type(), utype = _upperb.type(); 01787 int sdepth = CV_MAT_DEPTH(stype), ldepth = CV_MAT_DEPTH(ltype), udepth = CV_MAT_DEPTH(utype); 01788 int cn = CV_MAT_CN(stype), rowsPerWI = d.isIntel() ? 4 : 1; 01789 bool lbScalar = false, ubScalar = false; 01790 01791 if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) || 01792 ssize != lsize || stype != ltype ) 01793 { 01794 if( !checkScalar(_lowerb, stype, lkind, skind) ) 01795 CV_Error( CV_StsUnmatchedSizes, 01796 "The lower bounary is neither an array of the same size and same type as src, nor a scalar"); 01797 lbScalar = true; 01798 } 01799 01800 if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) || 01801 ssize != usize || stype != utype ) 01802 { 01803 if( !checkScalar(_upperb, stype, ukind, skind) ) 01804 CV_Error( CV_StsUnmatchedSizes, 01805 "The upper bounary is neither an array of the same size and same type as src, nor a scalar"); 01806 ubScalar = true; 01807 } 01808 01809 if (lbScalar != ubScalar) 01810 return false; 01811 01812 bool doubleSupport = d.doubleFPConfig() > 0, 01813 haveScalar = lbScalar && ubScalar; 01814 01815 if ( (!doubleSupport && sdepth == CV_64F) || 01816 (!haveScalar && (sdepth != ldepth || sdepth != udepth)) ) 01817 return false; 01818 01819 int kercn = haveScalar ? cn : std::max(std::min(ocl::predictOptimalVectorWidth(_src, _lowerb, _upperb, _dst), 4), cn); 01820 if (kercn % cn != 0) 01821 kercn = cn; 01822 int colsPerWI = kercn / cn; 01823 String opts = format("%s-D cn=%d -D srcT=%s -D srcT1=%s -D dstT=%s -D kercn=%d -D depth=%d%s -D colsPerWI=%d", 01824 haveScalar ? "-D HAVE_SCALAR " : "", cn, ocl::typeToStr(CV_MAKE_TYPE(sdepth, kercn)), 01825 ocl::typeToStr(sdepth), ocl::typeToStr(CV_8UC(colsPerWI)), kercn, sdepth, 01826 doubleSupport ? " -D DOUBLE_SUPPORT" : "", colsPerWI); 01827 01828 ocl::Kernel ker("inrange", ocl::core::inrange_oclsrc, opts); 01829 if (ker.empty()) 01830 return false; 01831 01832 _dst.create(ssize, CV_8UC1); 01833 UMat src = _src.getUMat(), dst = _dst.getUMat(), lscalaru, uscalaru; 01834 Mat lscalar, uscalar; 01835 01836 if (lbScalar && ubScalar) 01837 { 01838 lscalar = _lowerb.getMat(); 01839 uscalar = _upperb.getMat(); 01840 01841 size_t esz = src.elemSize(); 01842 size_t blocksize = 36; 01843 01844 AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128); 01845 uchar *buf = alignPtr(_buf + blocksize*cn, 16); 01846 01847 if( ldepth != sdepth && sdepth < CV_32S ) 01848 { 01849 int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16); 01850 int* iubuf = ilbuf + cn; 01851 01852 BinaryFunc sccvtfunc = getConvertFunc(ldepth, CV_32S); 01853 sccvtfunc(lscalar.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0); 01854 sccvtfunc(uscalar.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0); 01855 int minval = cvRound(getMinVal(sdepth)), maxval = cvRound(getMaxVal(sdepth)); 01856 01857 for( int k = 0; k < cn; k++ ) 01858 { 01859 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval ) 01860 ilbuf[k] = minval+1, iubuf[k] = minval; 01861 } 01862 lscalar = Mat(cn, 1, CV_32S, ilbuf); 01863 uscalar = Mat(cn, 1, CV_32S, iubuf); 01864 } 01865 01866 lscalar.convertTo(lscalar, stype); 01867 uscalar.convertTo(uscalar, stype); 01868 } 01869 else 01870 { 01871 lscalaru = _lowerb.getUMat(); 01872 uscalaru = _upperb.getUMat(); 01873 } 01874 01875 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 01876 dstarg = ocl::KernelArg::WriteOnly(dst, 1, colsPerWI); 01877 01878 if (haveScalar) 01879 { 01880 lscalar.copyTo(lscalaru); 01881 uscalar.copyTo(uscalaru); 01882 01883 ker.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(lscalaru), 01884 ocl::KernelArg::PtrReadOnly(uscalaru), rowsPerWI); 01885 } 01886 else 01887 ker.args(srcarg, dstarg, ocl::KernelArg::ReadOnlyNoSize(lscalaru), 01888 ocl::KernelArg::ReadOnlyNoSize(uscalaru), rowsPerWI); 01889 01890 size_t globalsize[2] = { (size_t)ssize.width / colsPerWI, ((size_t)ssize.height + rowsPerWI - 1) / rowsPerWI }; 01891 return ker.run(2, globalsize, NULL, false); 01892 } 01893 01894 #endif 01895 01896 } 01897 01898 void cv::inRange(InputArray _src, InputArray _lowerb, 01899 InputArray _upperb, OutputArray _dst) 01900 { 01901 #ifdef HAVE_OPENCL 01902 CV_OCL_RUN(_src.dims() <= 2 && _lowerb.dims() <= 2 && 01903 _upperb.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()), 01904 ocl_inRange(_src, _lowerb, _upperb, _dst)) 01905 #endif 01906 01907 int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind(); 01908 Mat src = _src.getMat(), lb = _lowerb.getMat(), ub = _upperb.getMat(); 01909 01910 bool lbScalar = false, ubScalar = false; 01911 01912 if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) || 01913 src.size != lb.size || src.type() != lb.type() ) 01914 { 01915 if( !checkScalar(lb, src.type(), lkind, skind) ) 01916 CV_Error( CV_StsUnmatchedSizes, 01917 "The lower bounary is neither an array of the same size and same type as src, nor a scalar"); 01918 lbScalar = true; 01919 } 01920 01921 if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) || 01922 src.size != ub.size || src.type() != ub.type() ) 01923 { 01924 if( !checkScalar(ub, src.type(), ukind, skind) ) 01925 CV_Error( CV_StsUnmatchedSizes, 01926 "The upper bounary is neither an array of the same size and same type as src, nor a scalar"); 01927 ubScalar = true; 01928 } 01929 01930 CV_Assert(lbScalar == ubScalar); 01931 01932 int cn = src.channels(), depth = src.depth(); 01933 01934 size_t esz = src.elemSize(); 01935 size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz; 01936 01937 _dst.create(src.dims, src.size, CV_8UC1); 01938 Mat dst = _dst.getMat(); 01939 InRangeFunc func = getInRangeFunc(depth); 01940 01941 const Mat* arrays_sc[] = { &src, &dst, 0 }; 01942 const Mat* arrays_nosc[] = { &src, &dst, &lb, &ub, 0 }; 01943 uchar* ptrs[4]; 01944 01945 NAryMatIterator it(lbScalar && ubScalar ? arrays_sc : arrays_nosc, ptrs); 01946 size_t total = it.size, blocksize = std::min(total, blocksize0); 01947 01948 AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128); 01949 uchar *buf = _buf, *mbuf = buf, *lbuf = 0, *ubuf = 0; 01950 buf = alignPtr(buf + blocksize*cn, 16); 01951 01952 if( lbScalar && ubScalar ) 01953 { 01954 lbuf = buf; 01955 ubuf = buf = alignPtr(buf + blocksize*esz, 16); 01956 01957 CV_Assert( lb.type() == ub.type() ); 01958 int scdepth = lb.depth(); 01959 01960 if( scdepth != depth && depth < CV_32S ) 01961 { 01962 int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16); 01963 int* iubuf = ilbuf + cn; 01964 01965 BinaryFunc sccvtfunc = getConvertFunc(scdepth, CV_32S); 01966 sccvtfunc(lb.ptr(), 1, 0, 1, (uchar*)ilbuf, 1, Size(cn, 1), 0); 01967 sccvtfunc(ub.ptr(), 1, 0, 1, (uchar*)iubuf, 1, Size(cn, 1), 0); 01968 int minval = cvRound(getMinVal(depth)), maxval = cvRound(getMaxVal(depth)); 01969 01970 for( int k = 0; k < cn; k++ ) 01971 { 01972 if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval ) 01973 ilbuf[k] = minval+1, iubuf[k] = minval; 01974 } 01975 lb = Mat(cn, 1, CV_32S, ilbuf); 01976 ub = Mat(cn, 1, CV_32S, iubuf); 01977 } 01978 01979 convertAndUnrollScalar( lb, src.type(), lbuf, blocksize ); 01980 convertAndUnrollScalar( ub, src.type(), ubuf, blocksize ); 01981 } 01982 01983 for( size_t i = 0; i < it.nplanes; i++, ++it ) 01984 { 01985 for( size_t j = 0; j < total; j += blocksize ) 01986 { 01987 int bsz = (int)MIN(total - j, blocksize); 01988 size_t delta = bsz*esz; 01989 uchar *lptr = lbuf, *uptr = ubuf; 01990 if( !lbScalar ) 01991 { 01992 lptr = ptrs[2]; 01993 ptrs[2] += delta; 01994 } 01995 if( !ubScalar ) 01996 { 01997 int idx = !lbScalar ? 3 : 2; 01998 uptr = ptrs[idx]; 01999 ptrs[idx] += delta; 02000 } 02001 func( ptrs[0], 0, lptr, 0, uptr, 0, cn == 1 ? ptrs[1] : mbuf, 0, Size(bsz*cn, 1)); 02002 if( cn > 1 ) 02003 inRangeReduce(mbuf, ptrs[1], bsz, cn); 02004 ptrs[0] += delta; 02005 ptrs[1] += bsz; 02006 } 02007 } 02008 } 02009 02010 /****************************************************************************************\ 02011 * Earlier API: cvAdd etc. * 02012 \****************************************************************************************/ 02013 02014 CV_IMPL void 02015 cvNot( const CvArr* srcarr, CvArr* dstarr ) 02016 { 02017 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 02018 CV_Assert( src.size == dst.size && src.type() == dst.type() ); 02019 cv::bitwise_not( src, dst ); 02020 } 02021 02022 02023 CV_IMPL void 02024 cvAnd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02025 { 02026 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02027 dst = cv::cvarrToMat(dstarr), mask; 02028 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02029 if( maskarr ) 02030 mask = cv::cvarrToMat(maskarr); 02031 cv::bitwise_and( src1, src2, dst, mask ); 02032 } 02033 02034 02035 CV_IMPL void 02036 cvOr( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02037 { 02038 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02039 dst = cv::cvarrToMat(dstarr), mask; 02040 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02041 if( maskarr ) 02042 mask = cv::cvarrToMat(maskarr); 02043 cv::bitwise_or( src1, src2, dst, mask ); 02044 } 02045 02046 02047 CV_IMPL void 02048 cvXor( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02049 { 02050 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02051 dst = cv::cvarrToMat(dstarr), mask; 02052 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02053 if( maskarr ) 02054 mask = cv::cvarrToMat(maskarr); 02055 cv::bitwise_xor( src1, src2, dst, mask ); 02056 } 02057 02058 02059 CV_IMPL void 02060 cvAndS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr ) 02061 { 02062 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask; 02063 CV_Assert( src.size == dst.size && src.type() == dst.type() ); 02064 if( maskarr ) 02065 mask = cv::cvarrToMat(maskarr); 02066 cv::bitwise_and( src, (const cv::Scalar &)s, dst, mask ); 02067 } 02068 02069 02070 CV_IMPL void 02071 cvOrS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr ) 02072 { 02073 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask; 02074 CV_Assert( src.size == dst.size && src.type() == dst.type() ); 02075 if( maskarr ) 02076 mask = cv::cvarrToMat(maskarr); 02077 cv::bitwise_or( src, (const cv::Scalar &)s, dst, mask ); 02078 } 02079 02080 02081 CV_IMPL void 02082 cvXorS( const CvArr* srcarr, CvScalar s, CvArr* dstarr, const CvArr* maskarr ) 02083 { 02084 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), mask; 02085 CV_Assert( src.size == dst.size && src.type() == dst.type() ); 02086 if( maskarr ) 02087 mask = cv::cvarrToMat(maskarr); 02088 cv::bitwise_xor( src, (const cv::Scalar &)s, dst, mask ); 02089 } 02090 02091 02092 CV_IMPL void cvAdd( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02093 { 02094 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02095 dst = cv::cvarrToMat(dstarr), mask; 02096 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02097 if( maskarr ) 02098 mask = cv::cvarrToMat(maskarr); 02099 cv::add( src1, src2, dst, mask, dst.type() ); 02100 } 02101 02102 02103 CV_IMPL void cvSub( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr, const CvArr* maskarr ) 02104 { 02105 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02106 dst = cv::cvarrToMat(dstarr), mask; 02107 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02108 if( maskarr ) 02109 mask = cv::cvarrToMat(maskarr); 02110 cv::subtract( src1, src2, dst, mask, dst.type() ); 02111 } 02112 02113 02114 CV_IMPL void cvAddS( const CvArr* srcarr1, CvScalar value, CvArr* dstarr, const CvArr* maskarr ) 02115 { 02116 cv::Mat src1 = cv::cvarrToMat(srcarr1), 02117 dst = cv::cvarrToMat(dstarr), mask; 02118 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02119 if( maskarr ) 02120 mask = cv::cvarrToMat(maskarr); 02121 cv::add( src1, (const cv::Scalar &)value, dst, mask, dst.type() ); 02122 } 02123 02124 02125 CV_IMPL void cvSubRS( const CvArr* srcarr1, CvScalar value, CvArr* dstarr, const CvArr* maskarr ) 02126 { 02127 cv::Mat src1 = cv::cvarrToMat(srcarr1), 02128 dst = cv::cvarrToMat(dstarr), mask; 02129 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02130 if( maskarr ) 02131 mask = cv::cvarrToMat(maskarr); 02132 cv::subtract( (const cv::Scalar &)value, src1, dst, mask, dst.type() ); 02133 } 02134 02135 02136 CV_IMPL void cvMul( const CvArr* srcarr1, const CvArr* srcarr2, 02137 CvArr* dstarr, double scale ) 02138 { 02139 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02140 dst = cv::cvarrToMat(dstarr); 02141 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02142 cv::multiply( src1, src2, dst, scale, dst.type() ); 02143 } 02144 02145 02146 CV_IMPL void cvDiv( const CvArr* srcarr1, const CvArr* srcarr2, 02147 CvArr* dstarr, double scale ) 02148 { 02149 cv::Mat src2 = cv::cvarrToMat(srcarr2), 02150 dst = cv::cvarrToMat(dstarr), mask; 02151 CV_Assert( src2.size == dst.size && src2.channels() == dst.channels() ); 02152 02153 if( srcarr1 ) 02154 cv::divide( cv::cvarrToMat(srcarr1), src2, dst, scale, dst.type() ); 02155 else 02156 cv::divide( scale, src2, dst, dst.type() ); 02157 } 02158 02159 02160 CV_IMPL void 02161 cvAddWeighted( const CvArr* srcarr1, double alpha, 02162 const CvArr* srcarr2, double beta, 02163 double gamma, CvArr* dstarr ) 02164 { 02165 cv::Mat src1 = cv::cvarrToMat(srcarr1), src2 = cv::cvarrToMat(srcarr2), 02166 dst = cv::cvarrToMat(dstarr); 02167 CV_Assert( src1.size == dst.size && src1.channels() == dst.channels() ); 02168 cv::addWeighted( src1, alpha, src2, beta, gamma, dst, dst.type() ); 02169 } 02170 02171 02172 CV_IMPL void 02173 cvAbsDiff( const CvArr* srcarr1, const CvArr* srcarr2, CvArr* dstarr ) 02174 { 02175 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02176 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02177 02178 cv::absdiff( src1, cv::cvarrToMat(srcarr2), dst ); 02179 } 02180 02181 02182 CV_IMPL void 02183 cvAbsDiffS( const CvArr* srcarr1, CvArr* dstarr, CvScalar scalar ) 02184 { 02185 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02186 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02187 02188 cv::absdiff( src1, (const cv::Scalar &)scalar, dst ); 02189 } 02190 02191 02192 CV_IMPL void 02193 cvInRange( const void* srcarr1, const void* srcarr2, 02194 const void* srcarr3, void* dstarr ) 02195 { 02196 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02197 CV_Assert( src1.size == dst.size && dst.type() == CV_8U ); 02198 02199 cv::inRange( src1, cv::cvarrToMat(srcarr2), cv::cvarrToMat(srcarr3), dst ); 02200 } 02201 02202 02203 CV_IMPL void 02204 cvInRangeS( const void* srcarr1, CvScalar lowerb, CvScalar upperb, void* dstarr ) 02205 { 02206 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02207 CV_Assert( src1.size == dst.size && dst.type() == CV_8U ); 02208 02209 cv::inRange( src1, (const cv::Scalar &)lowerb, (const cv::Scalar &)upperb, dst ); 02210 } 02211 02212 02213 CV_IMPL void 02214 cvCmp( const void* srcarr1, const void* srcarr2, void* dstarr, int cmp_op ) 02215 { 02216 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02217 CV_Assert( src1.size == dst.size && dst.type() == CV_8U ); 02218 02219 cv::compare( src1, cv::cvarrToMat(srcarr2), dst, cmp_op ); 02220 } 02221 02222 02223 CV_IMPL void 02224 cvCmpS( const void* srcarr1, double value, void* dstarr, int cmp_op ) 02225 { 02226 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02227 CV_Assert( src1.size == dst.size && dst.type() == CV_8U ); 02228 02229 cv::compare( src1, value, dst, cmp_op ); 02230 } 02231 02232 02233 CV_IMPL void 02234 cvMin( const void* srcarr1, const void* srcarr2, void* dstarr ) 02235 { 02236 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02237 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02238 02239 cv::min( src1, cv::cvarrToMat(srcarr2), dst ); 02240 } 02241 02242 02243 CV_IMPL void 02244 cvMax( const void* srcarr1, const void* srcarr2, void* dstarr ) 02245 { 02246 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02247 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02248 02249 cv::max( src1, cv::cvarrToMat(srcarr2), dst ); 02250 } 02251 02252 02253 CV_IMPL void 02254 cvMinS( const void* srcarr1, double value, void* dstarr ) 02255 { 02256 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02257 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02258 02259 cv::min( src1, value, dst ); 02260 } 02261 02262 02263 CV_IMPL void 02264 cvMaxS( const void* srcarr1, double value, void* dstarr ) 02265 { 02266 cv::Mat src1 = cv::cvarrToMat(srcarr1), dst = cv::cvarrToMat(dstarr); 02267 CV_Assert( src1.size == dst.size && src1.type() == dst.type() ); 02268 02269 cv::max( src1, value, dst ); 02270 } 02271 02272 02273 02274 namespace cv { namespace hal { 02275 02276 //======================================= 02277 02278 #if (ARITHM_USE_IPP == 1) 02279 static inline void fixSteps(int width, int height, size_t elemSize, size_t& step1, size_t& step2, size_t& step) 02280 { 02281 if( height == 1 ) 02282 step1 = step2 = step = width*elemSize; 02283 } 02284 #define CALL_IPP_BIN_E_12(fun) \ 02285 CV_IPP_CHECK() \ 02286 { \ 02287 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02288 if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0)) \ 02289 { \ 02290 CV_IMPL_ADD(CV_IMPL_IPP); \ 02291 return; \ 02292 } \ 02293 setIppErrorStatus(); \ 02294 } 02295 02296 #define CALL_IPP_BIN_E_21(fun) \ 02297 CV_IPP_CHECK() \ 02298 { \ 02299 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02300 if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height), 0)) \ 02301 { \ 02302 CV_IMPL_ADD(CV_IMPL_IPP); \ 02303 return; \ 02304 } \ 02305 setIppErrorStatus(); \ 02306 } 02307 02308 #define CALL_IPP_BIN_12(fun) \ 02309 CV_IPP_CHECK() \ 02310 { \ 02311 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02312 if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height))) \ 02313 { \ 02314 CV_IMPL_ADD(CV_IMPL_IPP); \ 02315 return; \ 02316 } \ 02317 setIppErrorStatus(); \ 02318 } 02319 02320 #define CALL_IPP_BIN_21(fun) \ 02321 CV_IPP_CHECK() \ 02322 { \ 02323 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02324 if (0 <= fun(src2, (int)step2, src1, (int)step1, dst, (int)step, ippiSize(width, height))) \ 02325 { \ 02326 CV_IMPL_ADD(CV_IMPL_IPP); \ 02327 return; \ 02328 } \ 02329 setIppErrorStatus(); \ 02330 } 02331 02332 #else 02333 #define CALL_IPP_BIN_E_12(fun) 02334 #define CALL_IPP_BIN_E_21(fun) 02335 #define CALL_IPP_BIN_12(fun) 02336 #define CALL_IPP_BIN_21(fun) 02337 #endif 02338 02339 02340 //======================================= 02341 // Add 02342 //======================================= 02343 02344 void add8u( const uchar* src1, size_t step1, 02345 const uchar* src2, size_t step2, 02346 uchar* dst, size_t step, int width, int height, void* ) 02347 { 02348 CALL_HAL(add8u, cv_hal_add8u, src1, step1, src2, step2, dst, step, width, height) 02349 CALL_IPP_BIN_E_12(ippiAdd_8u_C1RSfs) 02350 (vBinOp<uchar, cv::OpAdd<uchar>, IF_SIMD(VAdd<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02351 } 02352 02353 void add8s( const schar* src1, size_t step1, 02354 const schar* src2, size_t step2, 02355 schar* dst, size_t step, int width, int height, void* ) 02356 { 02357 CALL_HAL(add8s, cv_hal_add8s, src1, step1, src2, step2, dst, step, width, height) 02358 vBinOp<schar, cv::OpAdd<schar>, IF_SIMD(VAdd<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02359 } 02360 02361 void add16u( const ushort* src1, size_t step1, 02362 const ushort* src2, size_t step2, 02363 ushort* dst, size_t step, int width, int height, void* ) 02364 { 02365 CALL_HAL(add16u, cv_hal_add16u, src1, step1, src2, step2, dst, step, width, height) 02366 CALL_IPP_BIN_E_12(ippiAdd_16u_C1RSfs) 02367 (vBinOp<ushort, cv::OpAdd<ushort>, IF_SIMD(VAdd<ushort>)>(src1, step1, src2, step2, dst, step, width, height)); 02368 } 02369 02370 void add16s( const short* src1, size_t step1, 02371 const short* src2, size_t step2, 02372 short* dst, size_t step, int width, int height, void* ) 02373 { 02374 CALL_HAL(add16s, cv_hal_add16s, src1, step1, src2, step2, dst, step, width, height) 02375 CALL_IPP_BIN_E_12(ippiAdd_16s_C1RSfs) 02376 (vBinOp<short, cv::OpAdd<short>, IF_SIMD(VAdd<short>)>(src1, step1, src2, step2, dst, step, width, height)); 02377 } 02378 02379 void add32s( const int* src1, size_t step1, 02380 const int* src2, size_t step2, 02381 int* dst, size_t step, int width, int height, void* ) 02382 { 02383 CALL_HAL(add32s, cv_hal_add32s, src1, step1, src2, step2, dst, step, width, height) 02384 vBinOp32<int, cv::OpAdd<int>, IF_SIMD(VAdd<int>)>(src1, step1, src2, step2, dst, step, width, height); 02385 } 02386 02387 void add32f( const float* src1, size_t step1, 02388 const float* src2, size_t step2, 02389 float* dst, size_t step, int width, int height, void* ) 02390 { 02391 CALL_HAL(add32f, cv_hal_add32f, src1, step1, src2, step2, dst, step, width, height) 02392 CALL_IPP_BIN_12(ippiAdd_32f_C1R) 02393 (vBinOp32<float, cv::OpAdd<float>, IF_SIMD(VAdd<float>)>(src1, step1, src2, step2, dst, step, width, height)); 02394 } 02395 02396 void add64f( const double* src1, size_t step1, 02397 const double* src2, size_t step2, 02398 double* dst, size_t step, int width, int height, void* ) 02399 { 02400 CALL_HAL(add64f, cv_hal_add64f, src1, step1, src2, step2, dst, step, width, height) 02401 vBinOp64<double, cv::OpAdd<double>, IF_SIMD(VAdd<double>)>(src1, step1, src2, step2, dst, step, width, height); 02402 } 02403 02404 //======================================= 02405 // Subtract 02406 //======================================= 02407 02408 void sub8u( const uchar* src1, size_t step1, 02409 const uchar* src2, size_t step2, 02410 uchar* dst, size_t step, int width, int height, void* ) 02411 { 02412 CALL_HAL(sub8u, cv_hal_sub8u, src1, step1, src2, step2, dst, step, width, height) 02413 CALL_IPP_BIN_E_21(ippiSub_8u_C1RSfs) 02414 (vBinOp<uchar, cv::OpSub<uchar>, IF_SIMD(VSub<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02415 } 02416 02417 void sub8s( const schar* src1, size_t step1, 02418 const schar* src2, size_t step2, 02419 schar* dst, size_t step, int width, int height, void* ) 02420 { 02421 CALL_HAL(sub8s, cv_hal_sub8s, src1, step1, src2, step2, dst, step, width, height) 02422 vBinOp<schar, cv::OpSub<schar>, IF_SIMD(VSub<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02423 } 02424 02425 void sub16u( const ushort* src1, size_t step1, 02426 const ushort* src2, size_t step2, 02427 ushort* dst, size_t step, int width, int height, void* ) 02428 { 02429 CALL_HAL(sub16u, cv_hal_sub16u, src1, step1, src2, step2, dst, step, width, height) 02430 CALL_IPP_BIN_E_21(ippiSub_16u_C1RSfs) 02431 (vBinOp<ushort, cv::OpSub<ushort>, IF_SIMD(VSub<ushort>)>(src1, step1, src2, step2, dst, step, width, height)); 02432 } 02433 02434 void sub16s( const short* src1, size_t step1, 02435 const short* src2, size_t step2, 02436 short* dst, size_t step, int width, int height, void* ) 02437 { 02438 CALL_HAL(sub16s, cv_hal_sub16s, src1, step1, src2, step2, dst, step, width, height) 02439 CALL_IPP_BIN_E_21(ippiSub_16s_C1RSfs) 02440 (vBinOp<short, cv::OpSub<short>, IF_SIMD(VSub<short>)>(src1, step1, src2, step2, dst, step, width, height)); 02441 } 02442 02443 void sub32s( const int* src1, size_t step1, 02444 const int* src2, size_t step2, 02445 int* dst, size_t step, int width, int height, void* ) 02446 { 02447 CALL_HAL(sub32s, cv_hal_sub32s, src1, step1, src2, step2, dst, step, width, height) 02448 vBinOp32<int, cv::OpSub<int>, IF_SIMD(VSub<int>)>(src1, step1, src2, step2, dst, step, width, height); 02449 } 02450 02451 void sub32f( const float* src1, size_t step1, 02452 const float* src2, size_t step2, 02453 float* dst, size_t step, int width, int height, void* ) 02454 { 02455 CALL_HAL(sub32f, cv_hal_sub32f, src1, step1, src2, step2, dst, step, width, height) 02456 CALL_IPP_BIN_21(ippiSub_32f_C1R) 02457 (vBinOp32<float, cv::OpSub<float>, IF_SIMD(VSub<float>)>(src1, step1, src2, step2, dst, step, width, height)); 02458 } 02459 02460 void sub64f( const double* src1, size_t step1, 02461 const double* src2, size_t step2, 02462 double* dst, size_t step, int width, int height, void* ) 02463 { 02464 CALL_HAL(sub64f, cv_hal_sub64f, src1, step1, src2, step2, dst, step, width, height) 02465 vBinOp64<double, cv::OpSub<double>, IF_SIMD(VSub<double>)>(src1, step1, src2, step2, dst, step, width, height); 02466 } 02467 02468 //======================================= 02469 02470 #if (ARITHM_USE_IPP == 1) 02471 #define CALL_IPP_MIN_MAX(fun, type) \ 02472 CV_IPP_CHECK() \ 02473 { \ 02474 type* s1 = (type*)src1; \ 02475 type* s2 = (type*)src2; \ 02476 type* d = dst; \ 02477 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02478 int i = 0; \ 02479 for(; i < height; i++) \ 02480 { \ 02481 if (0 > fun(s1, s2, d, width)) \ 02482 break; \ 02483 s1 = (type*)((uchar*)s1 + step1); \ 02484 s2 = (type*)((uchar*)s2 + step2); \ 02485 d = (type*)((uchar*)d + step); \ 02486 } \ 02487 if (i == height) \ 02488 { \ 02489 CV_IMPL_ADD(CV_IMPL_IPP); \ 02490 return; \ 02491 } \ 02492 setIppErrorStatus(); \ 02493 } 02494 #else 02495 #define CALL_IPP_MIN_MAX(fun, type) 02496 #endif 02497 02498 //======================================= 02499 // Max 02500 //======================================= 02501 02502 void max8u( const uchar* src1, size_t step1, 02503 const uchar* src2, size_t step2, 02504 uchar* dst, size_t step, int width, int height, void* ) 02505 { 02506 CALL_HAL(max8u, cv_hal_max8u, src1, step1, src2, step2, dst, step, width, height) 02507 CALL_IPP_MIN_MAX(ippsMaxEvery_8u, uchar) 02508 vBinOp<uchar, cv::OpMax<uchar>, IF_SIMD(VMax<uchar>)>(src1, step1, src2, step2, dst, step, width, height); 02509 } 02510 02511 void max8s( const schar* src1, size_t step1, 02512 const schar* src2, size_t step2, 02513 schar* dst, size_t step, int width, int height, void* ) 02514 { 02515 CALL_HAL(max8s, cv_hal_max8s, src1, step1, src2, step2, dst, step, width, height) 02516 vBinOp<schar, cv::OpMax<schar>, IF_SIMD(VMax<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02517 } 02518 02519 void max16u( const ushort* src1, size_t step1, 02520 const ushort* src2, size_t step2, 02521 ushort* dst, size_t step, int width, int height, void* ) 02522 { 02523 CALL_HAL(max16u, cv_hal_max16u, src1, step1, src2, step2, dst, step, width, height) 02524 CALL_IPP_MIN_MAX(ippsMaxEvery_16u, ushort) 02525 vBinOp<ushort, cv::OpMax<ushort>, IF_SIMD(VMax<ushort>)>(src1, step1, src2, step2, dst, step, width, height); 02526 } 02527 02528 void max16s( const short* src1, size_t step1, 02529 const short* src2, size_t step2, 02530 short* dst, size_t step, int width, int height, void* ) 02531 { 02532 CALL_HAL(max16s, cv_hal_max16s, src1, step1, src2, step2, dst, step, width, height) 02533 vBinOp<short, cv::OpMax<short>, IF_SIMD(VMax<short>)>(src1, step1, src2, step2, dst, step, width, height); 02534 } 02535 02536 void max32s( const int* src1, size_t step1, 02537 const int* src2, size_t step2, 02538 int* dst, size_t step, int width, int height, void* ) 02539 { 02540 CALL_HAL(max32s, cv_hal_max32s, src1, step1, src2, step2, dst, step, width, height) 02541 vBinOp32<int, cv::OpMax<int>, IF_SIMD(VMax<int>)>(src1, step1, src2, step2, dst, step, width, height); 02542 } 02543 02544 void max32f( const float* src1, size_t step1, 02545 const float* src2, size_t step2, 02546 float* dst, size_t step, int width, int height, void* ) 02547 { 02548 CALL_HAL(max32f, cv_hal_max32f, src1, step1, src2, step2, dst, step, width, height) 02549 CALL_IPP_MIN_MAX(ippsMaxEvery_32f, float) 02550 vBinOp32<float, cv::OpMax<float>, IF_SIMD(VMax<float>)>(src1, step1, src2, step2, dst, step, width, height); 02551 } 02552 02553 void max64f( const double* src1, size_t step1, 02554 const double* src2, size_t step2, 02555 double* dst, size_t step, int width, int height, void* ) 02556 { 02557 CALL_HAL(max64f, cv_hal_max64f, src1, step1, src2, step2, dst, step, width, height) 02558 CALL_IPP_MIN_MAX(ippsMaxEvery_64f, double) 02559 vBinOp64<double, cv::OpMax<double>, IF_SIMD(VMax<double>)>(src1, step1, src2, step2, dst, step, width, height); 02560 } 02561 02562 //======================================= 02563 // Min 02564 //======================================= 02565 02566 void min8u( const uchar* src1, size_t step1, 02567 const uchar* src2, size_t step2, 02568 uchar* dst, size_t step, int width, int height, void* ) 02569 { 02570 CALL_HAL(min8u, cv_hal_min8u, src1, step1, src2, step2, dst, step, width, height) 02571 CALL_IPP_MIN_MAX(ippsMinEvery_8u, uchar) 02572 vBinOp<uchar, cv::OpMin<uchar>, IF_SIMD(VMin<uchar>)>(src1, step1, src2, step2, dst, step, width, height); 02573 } 02574 02575 void min8s( const schar* src1, size_t step1, 02576 const schar* src2, size_t step2, 02577 schar* dst, size_t step, int width, int height, void* ) 02578 { 02579 CALL_HAL(min8s, cv_hal_min8s, src1, step1, src2, step2, dst, step, width, height) 02580 vBinOp<schar, cv::OpMin<schar>, IF_SIMD(VMin<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02581 } 02582 02583 void min16u( const ushort* src1, size_t step1, 02584 const ushort* src2, size_t step2, 02585 ushort* dst, size_t step, int width, int height, void* ) 02586 { 02587 CALL_HAL(min16u, cv_hal_min16u, src1, step1, src2, step2, dst, step, width, height) 02588 CALL_IPP_MIN_MAX(ippsMinEvery_16u, ushort) 02589 vBinOp<ushort, cv::OpMin<ushort>, IF_SIMD(VMin<ushort>)>(src1, step1, src2, step2, dst, step, width, height); 02590 } 02591 02592 void min16s( const short* src1, size_t step1, 02593 const short* src2, size_t step2, 02594 short* dst, size_t step, int width, int height, void* ) 02595 { 02596 CALL_HAL(min16s, cv_hal_min16s, src1, step1, src2, step2, dst, step, width, height) 02597 vBinOp<short, cv::OpMin<short>, IF_SIMD(VMin<short>)>(src1, step1, src2, step2, dst, step, width, height); 02598 } 02599 02600 void min32s( const int* src1, size_t step1, 02601 const int* src2, size_t step2, 02602 int* dst, size_t step, int width, int height, void* ) 02603 { 02604 CALL_HAL(min32s, cv_hal_min32s, src1, step1, src2, step2, dst, step, width, height) 02605 vBinOp32<int, cv::OpMin<int>, IF_SIMD(VMin<int>)>(src1, step1, src2, step2, dst, step, width, height); 02606 } 02607 02608 void min32f( const float* src1, size_t step1, 02609 const float* src2, size_t step2, 02610 float* dst, size_t step, int width, int height, void* ) 02611 { 02612 CALL_HAL(min32f, cv_hal_min32f, src1, step1, src2, step2, dst, step, width, height) 02613 CALL_IPP_MIN_MAX(ippsMinEvery_32f, float) 02614 vBinOp32<float, cv::OpMin<float>, IF_SIMD(VMin<float>)>(src1, step1, src2, step2, dst, step, width, height); 02615 } 02616 02617 void min64f( const double* src1, size_t step1, 02618 const double* src2, size_t step2, 02619 double* dst, size_t step, int width, int height, void* ) 02620 { 02621 CALL_HAL(min64f, cv_hal_min64f, src1, step1, src2, step2, dst, step, width, height) 02622 CALL_IPP_MIN_MAX(ippsMinEvery_64f, double) 02623 vBinOp64<double, cv::OpMin<double>, IF_SIMD(VMin<double>)>(src1, step1, src2, step2, dst, step, width, height); 02624 } 02625 02626 //======================================= 02627 // AbsDiff 02628 //======================================= 02629 02630 void absdiff8u( const uchar* src1, size_t step1, 02631 const uchar* src2, size_t step2, 02632 uchar* dst, size_t step, int width, int height, void* ) 02633 { 02634 CALL_HAL(absdiff8u, cv_hal_absdiff8u, src1, step1, src2, step2, dst, step, width, height) 02635 CALL_IPP_BIN_12(ippiAbsDiff_8u_C1R) 02636 (vBinOp<uchar, cv::OpAbsDiff<uchar>, IF_SIMD(VAbsDiff<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02637 } 02638 02639 void absdiff8s( const schar* src1, size_t step1, 02640 const schar* src2, size_t step2, 02641 schar* dst, size_t step, int width, int height, void* ) 02642 { 02643 CALL_HAL(absdiff8s, cv_hal_absdiff8s, src1, step1, src2, step2, dst, step, width, height) 02644 vBinOp<schar, cv::OpAbsDiff<schar>, IF_SIMD(VAbsDiff<schar>)>(src1, step1, src2, step2, dst, step, width, height); 02645 } 02646 02647 void absdiff16u( const ushort* src1, size_t step1, 02648 const ushort* src2, size_t step2, 02649 ushort* dst, size_t step, int width, int height, void* ) 02650 { 02651 CALL_HAL(absdiff16u, cv_hal_absdiff16u, src1, step1, src2, step2, dst, step, width, height) 02652 CALL_IPP_BIN_12(ippiAbsDiff_16u_C1R) 02653 (vBinOp<ushort, cv::OpAbsDiff<ushort>, IF_SIMD(VAbsDiff<ushort>)>(src1, step1, src2, step2, dst, step, width, height)); 02654 } 02655 02656 void absdiff16s( const short* src1, size_t step1, 02657 const short* src2, size_t step2, 02658 short* dst, size_t step, int width, int height, void* ) 02659 { 02660 CALL_HAL(absdiff16s, cv_hal_absdiff16s, src1, step1, src2, step2, dst, step, width, height) 02661 vBinOp<short, cv::OpAbsDiff<short>, IF_SIMD(VAbsDiff<short>)>(src1, step1, src2, step2, dst, step, width, height); 02662 } 02663 02664 void absdiff32s( const int* src1, size_t step1, 02665 const int* src2, size_t step2, 02666 int* dst, size_t step, int width, int height, void* ) 02667 { 02668 CALL_HAL(absdiff32s, cv_hal_absdiff32s, src1, step1, src2, step2, dst, step, width, height) 02669 vBinOp32<int, cv::OpAbsDiff<int>, IF_SIMD(VAbsDiff<int>)>(src1, step1, src2, step2, dst, step, width, height); 02670 } 02671 02672 void absdiff32f( const float* src1, size_t step1, 02673 const float* src2, size_t step2, 02674 float* dst, size_t step, int width, int height, void* ) 02675 { 02676 CALL_HAL(absdiff32f, cv_hal_absdiff32f, src1, step1, src2, step2, dst, step, width, height) 02677 CALL_IPP_BIN_12(ippiAbsDiff_32f_C1R) 02678 (vBinOp32<float, cv::OpAbsDiff<float>, IF_SIMD(VAbsDiff<float>)>(src1, step1, src2, step2, dst, step, width, height)); 02679 } 02680 02681 void absdiff64f( const double* src1, size_t step1, 02682 const double* src2, size_t step2, 02683 double* dst, size_t step, int width, int height, void* ) 02684 { 02685 CALL_HAL(absdiff64f, cv_hal_absdiff64f, src1, step1, src2, step2, dst, step, width, height) 02686 vBinOp64<double, cv::OpAbsDiff<double>, IF_SIMD(VAbsDiff<double>)>(src1, step1, src2, step2, dst, step, width, height); 02687 } 02688 02689 //======================================= 02690 // Logical 02691 //======================================= 02692 02693 #if (ARITHM_USE_IPP == 1) 02694 #define CALL_IPP_UN(fun) \ 02695 CV_IPP_CHECK() \ 02696 { \ 02697 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); (void)src2; \ 02698 if (0 <= fun(src1, (int)step1, dst, (int)step, ippiSize(width, height))) \ 02699 { \ 02700 CV_IMPL_ADD(CV_IMPL_IPP); \ 02701 return; \ 02702 } \ 02703 setIppErrorStatus(); \ 02704 } 02705 #else 02706 #define CALL_IPP_UN(fun) 02707 #endif 02708 02709 void and8u( const uchar* src1, size_t step1, 02710 const uchar* src2, size_t step2, 02711 uchar* dst, size_t step, int width, int height, void* ) 02712 { 02713 CALL_HAL(and8u, cv_hal_and8u, src1, step1, src2, step2, dst, step, width, height) 02714 CALL_IPP_BIN_12(ippiAnd_8u_C1R) 02715 (vBinOp<uchar, cv::OpAnd<uchar>, IF_SIMD(VAnd<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02716 } 02717 02718 void or8u( const uchar* src1, size_t step1, 02719 const uchar* src2, size_t step2, 02720 uchar* dst, size_t step, int width, int height, void* ) 02721 { 02722 CALL_HAL(or8u, cv_hal_or8u, src1, step1, src2, step2, dst, step, width, height) 02723 CALL_IPP_BIN_12(ippiOr_8u_C1R) 02724 (vBinOp<uchar, cv::OpOr<uchar>, IF_SIMD(VOr<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02725 } 02726 02727 void xor8u( const uchar* src1, size_t step1, 02728 const uchar* src2, size_t step2, 02729 uchar* dst, size_t step, int width, int height, void* ) 02730 { 02731 CALL_HAL(xor8u, cv_hal_xor8u, src1, step1, src2, step2, dst, step, width, height) 02732 CALL_IPP_BIN_12(ippiXor_8u_C1R) 02733 (vBinOp<uchar, cv::OpXor<uchar>, IF_SIMD(VXor<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02734 } 02735 02736 void not8u( const uchar* src1, size_t step1, 02737 const uchar* src2, size_t step2, 02738 uchar* dst, size_t step, int width, int height, void* ) 02739 { 02740 CALL_HAL(not8u, cv_hal_not8u, src1, step1, dst, step, width, height) 02741 CALL_IPP_UN(ippiNot_8u_C1R) 02742 (vBinOp<uchar, cv::OpNot<uchar>, IF_SIMD(VNot<uchar>)>(src1, step1, src2, step2, dst, step, width, height)); 02743 } 02744 02745 //======================================= 02746 02747 #if ARITHM_USE_IPP 02748 inline static IppCmpOp convert_cmp(int _cmpop) 02749 { 02750 return _cmpop == CMP_EQ ? ippCmpEq : 02751 _cmpop == CMP_GT ? ippCmpGreater : 02752 _cmpop == CMP_GE ? ippCmpGreaterEq : 02753 _cmpop == CMP_LT ? ippCmpLess : 02754 _cmpop == CMP_LE ? ippCmpLessEq : 02755 (IppCmpOp)-1; 02756 } 02757 #define CALL_IPP_CMP(fun) \ 02758 CV_IPP_CHECK() \ 02759 { \ 02760 IppCmpOp op = convert_cmp(*(int *)_cmpop); \ 02761 if( op >= 0 ) \ 02762 { \ 02763 fixSteps(width, height, sizeof(dst[0]), step1, step2, step); \ 02764 if (0 <= fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), op)) \ 02765 { \ 02766 CV_IMPL_ADD(CV_IMPL_IPP); \ 02767 return; \ 02768 } \ 02769 setIppErrorStatus(); \ 02770 } \ 02771 } 02772 #else 02773 #define CALL_IPP_CMP(fun) 02774 #endif 02775 02776 //======================================= 02777 // Compare 02778 //======================================= 02779 02780 void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, 02781 uchar* dst, size_t step, int width, int height, void* _cmpop) 02782 { 02783 CALL_HAL(cmp8u, cv_hal_cmp8u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 02784 CALL_IPP_CMP(ippiCompare_8u_C1R) 02785 //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 02786 int code = *(int*)_cmpop; 02787 step1 /= sizeof(src1[0]); 02788 step2 /= sizeof(src2[0]); 02789 if( code == CMP_GE || code == CMP_LT ) 02790 { 02791 std::swap(src1, src2); 02792 std::swap(step1, step2); 02793 code = code == CMP_GE ? CMP_LE : CMP_GT; 02794 } 02795 02796 if( code == CMP_GT || code == CMP_LE ) 02797 { 02798 int m = code == CMP_GT ? 0 : 255; 02799 for( ; height--; src1 += step1, src2 += step2, dst += step ) 02800 { 02801 int x =0; 02802 #if CV_SSE2 02803 if( USE_SSE2 ) 02804 { 02805 __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi8 (-1); 02806 __m128i c128 = _mm_set1_epi8 (-128); 02807 for( ; x <= width - 16; x += 16 ) 02808 { 02809 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02810 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02811 // no simd for 8u comparison, that's why we need the trick 02812 r00 = _mm_sub_epi8(r00,c128); 02813 r10 = _mm_sub_epi8(r10,c128); 02814 02815 r00 =_mm_xor_si128(_mm_cmpgt_epi8(r00, r10), m128); 02816 _mm_storeu_si128((__m128i*)(dst + x),r00); 02817 02818 } 02819 } 02820 #elif CV_NEON 02821 uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); 02822 02823 for( ; x <= width - 16; x += 16 ) 02824 { 02825 vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); 02826 } 02827 02828 #endif 02829 02830 for( ; x < width; x++ ){ 02831 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); 02832 } 02833 } 02834 } 02835 else if( code == CMP_EQ || code == CMP_NE ) 02836 { 02837 int m = code == CMP_EQ ? 0 : 255; 02838 for( ; height--; src1 += step1, src2 += step2, dst += step ) 02839 { 02840 int x = 0; 02841 #if CV_SSE2 02842 if( USE_SSE2 ) 02843 { 02844 __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi8 (-1); 02845 for( ; x <= width - 16; x += 16 ) 02846 { 02847 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02848 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02849 r00 = _mm_xor_si128 ( _mm_cmpeq_epi8 (r00, r10), m128); 02850 _mm_storeu_si128((__m128i*)(dst + x), r00); 02851 } 02852 } 02853 #elif CV_NEON 02854 uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); 02855 02856 for( ; x <= width - 16; x += 16 ) 02857 { 02858 vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask)); 02859 } 02860 #endif 02861 for( ; x < width; x++ ) 02862 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); 02863 } 02864 } 02865 } 02866 02867 void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, 02868 uchar* dst, size_t step, int width, int height, void* _cmpop) 02869 { 02870 CALL_HAL(cmp8s, cv_hal_cmp8s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 02871 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 02872 } 02873 02874 void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, 02875 uchar* dst, size_t step, int width, int height, void* _cmpop) 02876 { 02877 CALL_HAL(cmp16u, cv_hal_cmp16u, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 02878 CALL_IPP_CMP(ippiCompare_16u_C1R) 02879 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 02880 } 02881 02882 void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, 02883 uchar* dst, size_t step, int width, int height, void* _cmpop) 02884 { 02885 CALL_HAL(cmp16s, cv_hal_cmp16s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 02886 CALL_IPP_CMP(ippiCompare_16s_C1R) 02887 //vz optimized cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 02888 02889 int code = *(int*)_cmpop; 02890 step1 /= sizeof(src1[0]); 02891 step2 /= sizeof(src2[0]); 02892 if( code == CMP_GE || code == CMP_LT ) 02893 { 02894 std::swap(src1, src2); 02895 std::swap(step1, step2); 02896 code = code == CMP_GE ? CMP_LE : CMP_GT; 02897 } 02898 02899 if( code == CMP_GT || code == CMP_LE ) 02900 { 02901 int m = code == CMP_GT ? 0 : 255; 02902 for( ; height--; src1 += step1, src2 += step2, dst += step ) 02903 { 02904 int x =0; 02905 #if CV_SSE2 02906 if( USE_SSE2) 02907 { 02908 __m128i m128 = code == CMP_GT ? _mm_setzero_si128() : _mm_set1_epi16 (-1); 02909 for( ; x <= width - 16; x += 16 ) 02910 { 02911 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02912 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02913 r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); 02914 __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); 02915 __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); 02916 r01 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r01, r11), m128); 02917 r11 = _mm_packs_epi16(r00, r01); 02918 _mm_storeu_si128((__m128i*)(dst + x), r11); 02919 } 02920 if( x <= width-8) 02921 { 02922 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02923 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02924 r00 = _mm_xor_si128 ( _mm_cmpgt_epi16 (r00, r10), m128); 02925 r10 = _mm_packs_epi16(r00, r00); 02926 _mm_storel_epi64((__m128i*)(dst + x), r10); 02927 02928 x += 8; 02929 } 02930 } 02931 #elif CV_NEON 02932 uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255); 02933 02934 for( ; x <= width - 16; x += 16 ) 02935 { 02936 int16x8_t in1 = vld1q_s16(src1 + x); 02937 int16x8_t in2 = vld1q_s16(src2 + x); 02938 uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2)); 02939 02940 in1 = vld1q_s16(src1 + x + 8); 02941 in2 = vld1q_s16(src2 + x + 8); 02942 uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2)); 02943 02944 vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); 02945 } 02946 #endif 02947 02948 for( ; x < width; x++ ){ 02949 dst[x] = (uchar)(-(src1[x] > src2[x]) ^ m); 02950 } 02951 } 02952 } 02953 else if( code == CMP_EQ || code == CMP_NE ) 02954 { 02955 int m = code == CMP_EQ ? 0 : 255; 02956 for( ; height--; src1 += step1, src2 += step2, dst += step ) 02957 { 02958 int x = 0; 02959 #if CV_SSE2 02960 if( USE_SSE2 ) 02961 { 02962 __m128i m128 = code == CMP_EQ ? _mm_setzero_si128() : _mm_set1_epi16 (-1); 02963 for( ; x <= width - 16; x += 16 ) 02964 { 02965 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02966 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02967 r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); 02968 __m128i r01 = _mm_loadu_si128((const __m128i*)(src1 + x + 8)); 02969 __m128i r11 = _mm_loadu_si128((const __m128i*)(src2 + x + 8)); 02970 r01 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r01, r11), m128); 02971 r11 = _mm_packs_epi16(r00, r01); 02972 _mm_storeu_si128((__m128i*)(dst + x), r11); 02973 } 02974 if( x <= width - 8) 02975 { 02976 __m128i r00 = _mm_loadu_si128((const __m128i*)(src1 + x)); 02977 __m128i r10 = _mm_loadu_si128((const __m128i*)(src2 + x)); 02978 r00 = _mm_xor_si128 ( _mm_cmpeq_epi16 (r00, r10), m128); 02979 r10 = _mm_packs_epi16(r00, r00); 02980 _mm_storel_epi64((__m128i*)(dst + x), r10); 02981 02982 x += 8; 02983 } 02984 } 02985 #elif CV_NEON 02986 uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255); 02987 02988 for( ; x <= width - 16; x += 16 ) 02989 { 02990 int16x8_t in1 = vld1q_s16(src1 + x); 02991 int16x8_t in2 = vld1q_s16(src2 + x); 02992 uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2)); 02993 02994 in1 = vld1q_s16(src1 + x + 8); 02995 in2 = vld1q_s16(src2 + x + 8); 02996 uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2)); 02997 02998 vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask)); 02999 } 03000 #endif 03001 for( ; x < width; x++ ) 03002 dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m); 03003 } 03004 } 03005 } 03006 03007 void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, 03008 uchar* dst, size_t step, int width, int height, void* _cmpop) 03009 { 03010 CALL_HAL(cmp32s, cv_hal_cmp32s, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 03011 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 03012 } 03013 03014 void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, 03015 uchar* dst, size_t step, int width, int height, void* _cmpop) 03016 { 03017 CALL_HAL(cmp32f, cv_hal_cmp32f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 03018 CALL_IPP_CMP(ippiCompare_32f_C1R) 03019 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 03020 } 03021 03022 void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, 03023 uchar* dst, size_t step, int width, int height, void* _cmpop) 03024 { 03025 CALL_HAL(cmp64f, cv_hal_cmp64f, src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop) 03026 cmp_(src1, step1, src2, step2, dst, step, width, height, *(int*)_cmpop); 03027 } 03028 03029 //======================================= 03030 03031 #if defined HAVE_IPP 03032 #define CALL_IPP_MUL(fun) \ 03033 CV_IPP_CHECK() \ 03034 { \ 03035 if (std::fabs(fscale - 1) <= FLT_EPSILON) \ 03036 { \ 03037 if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height), 0) >= 0) \ 03038 { \ 03039 CV_IMPL_ADD(CV_IMPL_IPP); \ 03040 return; \ 03041 } \ 03042 setIppErrorStatus(); \ 03043 } \ 03044 } 03045 03046 #define CALL_IPP_MUL_2(fun) \ 03047 CV_IPP_CHECK() \ 03048 { \ 03049 if (std::fabs(fscale - 1) <= FLT_EPSILON) \ 03050 { \ 03051 if (fun(src1, (int)step1, src2, (int)step2, dst, (int)step, ippiSize(width, height)) >= 0) \ 03052 { \ 03053 CV_IMPL_ADD(CV_IMPL_IPP); \ 03054 return; \ 03055 } \ 03056 setIppErrorStatus(); \ 03057 } \ 03058 } 03059 03060 #else 03061 #define CALL_IPP_MUL(fun) 03062 #define CALL_IPP_MUL_2(fun) 03063 #endif 03064 03065 //======================================= 03066 // Multilpy 03067 //======================================= 03068 03069 void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, 03070 uchar* dst, size_t step, int width, int height, void* scale) 03071 { 03072 CALL_HAL(mul8u, cv_hal_mul8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03073 float fscale = (float)*(const double*)scale; 03074 CALL_IPP_MUL(ippiMul_8u_C1RSfs) 03075 mul_(src1, step1, src2, step2, dst, step, width, height, fscale); 03076 } 03077 03078 void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, 03079 schar* dst, size_t step, int width, int height, void* scale) 03080 { 03081 CALL_HAL(mul8s, cv_hal_mul8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03082 mul_(src1, step1, src2, step2, dst, step, width, height, (float)*(const double*)scale); 03083 } 03084 03085 void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, 03086 ushort* dst, size_t step, int width, int height, void* scale) 03087 { 03088 CALL_HAL(mul16u, cv_hal_mul16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03089 float fscale = (float)*(const double*)scale; 03090 CALL_IPP_MUL(ippiMul_16u_C1RSfs) 03091 mul_(src1, step1, src2, step2, dst, step, width, height, fscale); 03092 } 03093 03094 void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, 03095 short* dst, size_t step, int width, int height, void* scale) 03096 { 03097 CALL_HAL(mul16s, cv_hal_mul16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03098 float fscale = (float)*(const double*)scale; 03099 CALL_IPP_MUL(ippiMul_16s_C1RSfs) 03100 mul_(src1, step1, src2, step2, dst, step, width, height, fscale); 03101 } 03102 03103 void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, 03104 int* dst, size_t step, int width, int height, void* scale) 03105 { 03106 CALL_HAL(mul32s, cv_hal_mul32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03107 mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03108 } 03109 03110 void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, 03111 float* dst, size_t step, int width, int height, void* scale) 03112 { 03113 CALL_HAL(mul32f, cv_hal_mul32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03114 float fscale = (float)*(const double*)scale; 03115 CALL_IPP_MUL_2(ippiMul_32f_C1R) 03116 mul_(src1, step1, src2, step2, dst, step, width, height, fscale); 03117 } 03118 03119 void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, 03120 double* dst, size_t step, int width, int height, void* scale) 03121 { 03122 CALL_HAL(mul64f, cv_hal_mul64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03123 mul_(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03124 } 03125 03126 //======================================= 03127 // Divide 03128 //======================================= 03129 03130 void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, 03131 uchar* dst, size_t step, int width, int height, void* scale) 03132 { 03133 CALL_HAL(div8u, cv_hal_div8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03134 if( src1 ) 03135 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03136 else 03137 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03138 } 03139 03140 void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, 03141 schar* dst, size_t step, int width, int height, void* scale) 03142 { 03143 CALL_HAL(div8s, cv_hal_div8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03144 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03145 } 03146 03147 void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, 03148 ushort* dst, size_t step, int width, int height, void* scale) 03149 { 03150 CALL_HAL(div16u, cv_hal_div16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03151 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03152 } 03153 03154 void div16s( const short* src1, size_t step1, const short* src2, size_t step2, 03155 short* dst, size_t step, int width, int height, void* scale) 03156 { 03157 CALL_HAL(div16s, cv_hal_div16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03158 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03159 } 03160 03161 void div32s( const int* src1, size_t step1, const int* src2, size_t step2, 03162 int* dst, size_t step, int width, int height, void* scale) 03163 { 03164 CALL_HAL(div32s, cv_hal_div32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03165 div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03166 } 03167 03168 void div32f( const float* src1, size_t step1, const float* src2, size_t step2, 03169 float* dst, size_t step, int width, int height, void* scale) 03170 { 03171 CALL_HAL(div32f, cv_hal_div32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03172 div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03173 } 03174 03175 void div64f( const double* src1, size_t step1, const double* src2, size_t step2, 03176 double* dst, size_t step, int width, int height, void* scale) 03177 { 03178 CALL_HAL(div64f, cv_hal_div64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03179 div_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03180 } 03181 03182 //======================================= 03183 // Reciprocial 03184 //======================================= 03185 03186 void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, 03187 uchar* dst, size_t step, int width, int height, void* scale) 03188 { 03189 CALL_HAL(recip8u, cv_hal_recip8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03190 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03191 } 03192 03193 void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, 03194 schar* dst, size_t step, int width, int height, void* scale) 03195 { 03196 CALL_HAL(recip8s, cv_hal_recip8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03197 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03198 } 03199 03200 void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, 03201 ushort* dst, size_t step, int width, int height, void* scale) 03202 { 03203 CALL_HAL(recip16u, cv_hal_recip16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03204 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03205 } 03206 03207 void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, 03208 short* dst, size_t step, int width, int height, void* scale) 03209 { 03210 CALL_HAL(recip16s, cv_hal_recip16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03211 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03212 } 03213 03214 void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, 03215 int* dst, size_t step, int width, int height, void* scale) 03216 { 03217 CALL_HAL(recip32s, cv_hal_recip32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03218 recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03219 } 03220 03221 void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, 03222 float* dst, size_t step, int width, int height, void* scale) 03223 { 03224 CALL_HAL(recip32f, cv_hal_recip32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03225 recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03226 } 03227 03228 void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, 03229 double* dst, size_t step, int width, int height, void* scale) 03230 { 03231 CALL_HAL(recip64f, cv_hal_recip64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) 03232 recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); 03233 } 03234 03235 //======================================= 03236 // Add weighted 03237 //======================================= 03238 03239 void 03240 addWeighted8u( const uchar* src1, size_t step1, 03241 const uchar* src2, size_t step2, 03242 uchar* dst, size_t step, int width, int height, 03243 void* scalars ) 03244 { 03245 CALL_HAL(addWeighted8u, cv_hal_addWeighted8u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03246 const double* scalars_ = (const double*)scalars; 03247 float alpha = (float)scalars_[0], beta = (float)scalars_[1], gamma = (float)scalars_[2]; 03248 03249 for( ; height--; src1 += step1, src2 += step2, dst += step ) 03250 { 03251 int x = 0; 03252 03253 #if CV_SSE2 03254 if( USE_SSE2 ) 03255 { 03256 __m128 a4 = _mm_set1_ps(alpha), b4 = _mm_set1_ps(beta), g4 = _mm_set1_ps(gamma); 03257 __m128i z = _mm_setzero_si128(); 03258 03259 for( ; x <= width - 8; x += 8 ) 03260 { 03261 __m128i u = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src1 + x)), z); 03262 __m128i v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src2 + x)), z); 03263 03264 __m128 u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(u, z)); 03265 __m128 u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(u, z)); 03266 __m128 v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v, z)); 03267 __m128 v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v, z)); 03268 03269 u0 = _mm_add_ps(_mm_mul_ps(u0, a4), _mm_mul_ps(v0, b4)); 03270 u1 = _mm_add_ps(_mm_mul_ps(u1, a4), _mm_mul_ps(v1, b4)); 03271 u0 = _mm_add_ps(u0, g4); u1 = _mm_add_ps(u1, g4); 03272 03273 u = _mm_packs_epi32(_mm_cvtps_epi32(u0), _mm_cvtps_epi32(u1)); 03274 u = _mm_packus_epi16(u, u); 03275 03276 _mm_storel_epi64((__m128i*)(dst + x), u); 03277 } 03278 } 03279 #elif CV_NEON 03280 float32x4_t g = vdupq_n_f32 (gamma); 03281 03282 for( ; x <= width - 8; x += 8 ) 03283 { 03284 uint8x8_t in1 = vld1_u8(src1+x); 03285 uint16x8_t in1_16 = vmovl_u8(in1); 03286 float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16))); 03287 float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16))); 03288 03289 uint8x8_t in2 = vld1_u8(src2+x); 03290 uint16x8_t in2_16 = vmovl_u8(in2); 03291 float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16))); 03292 float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16))); 03293 03294 float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta)); 03295 float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta)); 03296 out_f_l = vaddq_f32(out_f_l, g); 03297 out_f_h = vaddq_f32(out_f_h, g); 03298 03299 uint16x4_t out_16_l = vqmovun_s32(cv_vrndq_s32_f32(out_f_l)); 03300 uint16x4_t out_16_h = vqmovun_s32(cv_vrndq_s32_f32(out_f_h)); 03301 03302 uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h); 03303 uint8x8_t out = vqmovn_u16(out_16); 03304 03305 vst1_u8(dst+x, out); 03306 } 03307 #endif 03308 #if CV_ENABLE_UNROLLED 03309 for( ; x <= width - 4; x += 4 ) 03310 { 03311 float t0, t1; 03312 t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; 03313 t1 = CV_8TO32F(src1[x+1])*alpha + CV_8TO32F(src2[x+1])*beta + gamma; 03314 03315 dst[x] = saturate_cast<uchar>(t0); 03316 dst[x+1] = saturate_cast<uchar>(t1); 03317 03318 t0 = CV_8TO32F(src1[x+2])*alpha + CV_8TO32F(src2[x+2])*beta + gamma; 03319 t1 = CV_8TO32F(src1[x+3])*alpha + CV_8TO32F(src2[x+3])*beta + gamma; 03320 03321 dst[x+2] = saturate_cast<uchar>(t0); 03322 dst[x+3] = saturate_cast<uchar>(t1); 03323 } 03324 #endif 03325 03326 for( ; x < width; x++ ) 03327 { 03328 float t0 = CV_8TO32F(src1[x])*alpha + CV_8TO32F(src2[x])*beta + gamma; 03329 dst[x] = saturate_cast<uchar>(t0); 03330 } 03331 } 03332 } 03333 03334 void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, 03335 schar* dst, size_t step, int width, int height, void* scalars ) 03336 { 03337 CALL_HAL(addWeighted8s, cv_hal_addWeighted8s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03338 addWeighted_<schar, float>(src1, step1, src2, step2, dst, step, width, height, scalars); 03339 } 03340 03341 void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, 03342 ushort* dst, size_t step, int width, int height, void* scalars ) 03343 { 03344 CALL_HAL(addWeighted16u, cv_hal_addWeighted16u, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03345 addWeighted_<ushort, float>(src1, step1, src2, step2, dst, step, width, height, scalars); 03346 } 03347 03348 void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, 03349 short* dst, size_t step, int width, int height, void* scalars ) 03350 { 03351 CALL_HAL(addWeighted16s, cv_hal_addWeighted16s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03352 addWeighted_<short, float>(src1, step1, src2, step2, dst, step, width, height, scalars); 03353 } 03354 03355 void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, 03356 int* dst, size_t step, int width, int height, void* scalars ) 03357 { 03358 CALL_HAL(addWeighted32s, cv_hal_addWeighted32s, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03359 addWeighted_<int, double>(src1, step1, src2, step2, dst, step, width, height, scalars); 03360 } 03361 03362 void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, 03363 float* dst, size_t step, int width, int height, void* scalars ) 03364 { 03365 CALL_HAL(addWeighted32f, cv_hal_addWeighted32f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03366 addWeighted_<float, double>(src1, step1, src2, step2, dst, step, width, height, scalars); 03367 } 03368 03369 void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, 03370 double* dst, size_t step, int width, int height, void* scalars ) 03371 { 03372 CALL_HAL(addWeighted64f, cv_hal_addWeighted64f, src1, step1, src2, step2, dst, step, width, height, (const double*)scalars) 03373 addWeighted_<double, double>(src1, step1, src2, step2, dst, step, width, height, scalars); 03374 } 03375 03376 }} // cv::hal:: 03377 03378 /* End of file. */ 03379
Generated on Tue Jul 12 2022 15:17:18 by 1.7.2