Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project by
color.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 /********************************* COPYRIGHT NOTICE *******************************\ 00045 The function for RGB to Lab conversion is based on the MATLAB script 00046 RGB2Lab.m translated by Mark Ruzon from C code by Yossi Rubner, 23 September 1997. 00047 See the page [http://vision.stanford.edu/~ruzon/software/rgblab.html] 00048 \**********************************************************************************/ 00049 00050 /********************************* COPYRIGHT NOTICE *******************************\ 00051 Original code for Bayer->BGR/RGB conversion is provided by Dirk Schaefer 00052 from MD-Mathematische Dienste GmbH. Below is the copyright notice: 00053 00054 IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00055 By downloading, copying, installing or using the software you agree 00056 to this license. If you do not agree to this license, do not download, 00057 install, copy or use the software. 00058 00059 Contributors License Agreement: 00060 00061 Copyright (c) 2002, 00062 MD-Mathematische Dienste GmbH 00063 Im Defdahl 5-10 00064 44141 Dortmund 00065 Germany 00066 www.md-it.de 00067 00068 Redistribution and use in source and binary forms, 00069 with or without modification, are permitted provided 00070 that the following conditions are met: 00071 00072 Redistributions of source code must retain 00073 the above copyright notice, this list of conditions and the following disclaimer. 00074 Redistributions in binary form must reproduce the above copyright notice, 00075 this list of conditions and the following disclaimer in the documentation 00076 and/or other materials provided with the distribution. 00077 The name of Contributor may not be used to endorse or promote products 00078 derived from this software without specific prior written permission. 00079 00080 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00081 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00082 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00083 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE 00084 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00085 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00086 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00087 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 00088 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00089 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 00090 THE POSSIBILITY OF SUCH DAMAGE. 00091 \**********************************************************************************/ 00092 00093 #include "precomp.hpp" 00094 #include "opencl_kernels_imgproc.hpp" 00095 #include <limits> 00096 00097 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) 00098 00099 #if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700) 00100 #define MAX_IPP8u 255 00101 #define MAX_IPP16u 65535 00102 #define MAX_IPP32f 1.0 00103 #endif 00104 00105 namespace cv 00106 { 00107 00108 // computes cubic spline coefficients for a function: (xi=i, yi=f[i]), i=0..n 00109 template<typename _Tp> static void splineBuild(const _Tp* f, int n, _Tp* tab) 00110 { 00111 _Tp cn = 0; 00112 int i; 00113 tab[0] = tab[1] = (_Tp)0; 00114 00115 for(i = 1; i < n-1; i++) 00116 { 00117 _Tp t = 3*(f[i+1] - 2*f[i] + f[i-1]); 00118 _Tp l = 1/(4 - tab[(i-1)*4]); 00119 tab[i*4] = l; tab[i*4+1] = (t - tab[(i-1)*4+1])*l; 00120 } 00121 00122 for(i = n-1; i >= 0; i--) 00123 { 00124 _Tp c = tab[i*4+1] - tab[i*4]*cn; 00125 _Tp b = f[i+1] - f[i] - (cn + c*2)*(_Tp)0.3333333333333333; 00126 _Tp d = (cn - c)*(_Tp)0.3333333333333333; 00127 tab[i*4] = f[i]; tab[i*4+1] = b; 00128 tab[i*4+2] = c; tab[i*4+3] = d; 00129 cn = c; 00130 } 00131 } 00132 00133 // interpolates value of a function at x, 0 <= x <= n using a cubic spline. 00134 template<typename _Tp> static inline _Tp splineInterpolate(_Tp x, const _Tp* tab, int n) 00135 { 00136 // don't touch this function without urgent need - some versions of gcc fail to inline it correctly 00137 int ix = std::min(std::max(int(x), 0), n-1); 00138 x -= ix; 00139 tab += ix*4; 00140 return ((tab[3]*x + tab[2])*x + tab[1])*x + tab[0]; 00141 } 00142 00143 00144 template<typename _Tp> struct ColorChannel 00145 { 00146 typedef float worktype_f; 00147 static _Tp max() { return std::numeric_limits<_Tp>::max(); } 00148 static _Tp half() { return (_Tp)(max()/2 + 1); } 00149 }; 00150 00151 template<> struct ColorChannel<float> 00152 { 00153 typedef float worktype_f; 00154 static float max() { return 1.f; } 00155 static float half() { return 0.5f; } 00156 }; 00157 00158 /*template<> struct ColorChannel<double> 00159 { 00160 typedef double worktype_f; 00161 static double max() { return 1.; } 00162 static double half() { return 0.5; } 00163 };*/ 00164 00165 00166 ///////////////////////////// Top-level template function //////////////////////////////// 00167 00168 template <typename Cvt> 00169 class CvtColorLoop_Invoker : public ParallelLoopBody 00170 { 00171 typedef typename Cvt::channel_type _Tp; 00172 public: 00173 00174 CvtColorLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt) : 00175 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt) 00176 { 00177 } 00178 00179 virtual void operator()(const Range& range) const 00180 { 00181 const uchar* yS = src.ptr<uchar>(range.start); 00182 uchar* yD = dst.ptr<uchar>(range.start); 00183 00184 for( int i = range.start; i < range.end; ++i, yS += src.step, yD += dst.step ) 00185 cvt((const _Tp*)yS, (_Tp*)yD, src.cols); 00186 } 00187 00188 private: 00189 const Mat& src; 00190 Mat& dst; 00191 const Cvt& cvt; 00192 00193 const CvtColorLoop_Invoker& operator= (const CvtColorLoop_Invoker&); 00194 }; 00195 00196 template <typename Cvt> 00197 void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt) 00198 { 00199 parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker<Cvt>(src, dst, cvt), src.total()/(double)(1<<16) ); 00200 } 00201 00202 #if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700) 00203 00204 typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *); 00205 typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize); 00206 typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *); 00207 00208 template <typename Cvt> 00209 class CvtColorIPPLoop_Invoker : 00210 public ParallelLoopBody 00211 { 00212 public: 00213 00214 CvtColorIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt, bool *_ok) : 00215 ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt), ok(_ok) 00216 { 00217 *ok = true; 00218 } 00219 00220 virtual void operator()(const Range& range) const 00221 { 00222 const void *yS = src.ptr<uchar>(range.start); 00223 void *yD = dst.ptr<uchar>(range.start); 00224 if( !cvt(yS, (int)src.step[0], yD, (int)dst.step[0], src.cols, range.end - range.start) ) 00225 *ok = false; 00226 else 00227 { 00228 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 00229 } 00230 } 00231 00232 private: 00233 const Mat& src; 00234 Mat& dst; 00235 const Cvt& cvt; 00236 bool *ok; 00237 00238 const CvtColorIPPLoop_Invoker& operator= (const CvtColorIPPLoop_Invoker&); 00239 }; 00240 00241 template <typename Cvt> 00242 bool CvtColorIPPLoop(const Mat& src, Mat& dst, const Cvt& cvt) 00243 { 00244 bool ok; 00245 parallel_for_(Range(0, src.rows), CvtColorIPPLoop_Invoker<Cvt>(src, dst, cvt, &ok), src.total()/(double)(1<<16) ); 00246 return ok; 00247 } 00248 00249 template <typename Cvt> 00250 bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt) 00251 { 00252 Mat temp; 00253 Mat &source = src; 00254 if( src.data == dst.data ) 00255 { 00256 src.copyTo(temp); 00257 source = temp; 00258 } 00259 bool ok; 00260 parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker<Cvt>(source, dst, cvt, &ok), 00261 source.total()/(double)(1<<16) ); 00262 return ok; 00263 } 00264 00265 static IppStatus CV_STDCALL ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, 00266 IppiSize roiSize, const int *dstOrder) 00267 { 00268 return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u); 00269 } 00270 00271 static IppStatus CV_STDCALL ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, 00272 IppiSize roiSize, const int *dstOrder) 00273 { 00274 return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u); 00275 } 00276 00277 static IppStatus CV_STDCALL ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, 00278 IppiSize roiSize, const int *dstOrder) 00279 { 00280 return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f); 00281 } 00282 00283 static ippiReorderFunc ippiSwapChannelsC3C4RTab[] = 00284 { 00285 (ippiReorderFunc)ippiSwapChannels_8u_C3C4Rf, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3C4Rf, 0, 00286 0, (ippiReorderFunc)ippiSwapChannels_32f_C3C4Rf, 0, 0 00287 }; 00288 00289 static ippiGeneralFunc ippiCopyAC4C3RTab[] = 00290 { 00291 (ippiGeneralFunc)ippiCopy_8u_AC4C3R, 0, (ippiGeneralFunc)ippiCopy_16u_AC4C3R, 0, 00292 0, (ippiGeneralFunc)ippiCopy_32f_AC4C3R, 0, 0 00293 }; 00294 00295 static ippiReorderFunc ippiSwapChannelsC4C3RTab[] = 00296 { 00297 (ippiReorderFunc)ippiSwapChannels_8u_C4C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4C3R, 0, 00298 0, (ippiReorderFunc)ippiSwapChannels_32f_C4C3R, 0, 0 00299 }; 00300 00301 static ippiReorderFunc ippiSwapChannelsC3RTab[] = 00302 { 00303 (ippiReorderFunc)ippiSwapChannels_8u_C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3R, 0, 00304 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0 00305 }; 00306 00307 #if IPP_VERSION_X100 >= 810 00308 static ippiReorderFunc ippiSwapChannelsC4RTab[] = 00309 { 00310 (ippiReorderFunc)ippiSwapChannels_8u_C4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4R, 0, 00311 0, (ippiReorderFunc)ippiSwapChannels_32f_C4R, 0, 0 00312 }; 00313 #endif 00314 00315 static ippiColor2GrayFunc ippiColor2GrayC3Tab[] = 00316 { 00317 (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0, 00318 0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0 00319 }; 00320 00321 static ippiColor2GrayFunc ippiColor2GrayC4Tab[] = 00322 { 00323 (ippiColor2GrayFunc)ippiColorToGray_8u_AC4C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_AC4C1R, 0, 00324 0, (ippiColor2GrayFunc)ippiColorToGray_32f_AC4C1R, 0, 0 00325 }; 00326 00327 static ippiGeneralFunc ippiRGB2GrayC3Tab[] = 00328 { 00329 (ippiGeneralFunc)ippiRGBToGray_8u_C3C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_C3C1R, 0, 00330 0, (ippiGeneralFunc)ippiRGBToGray_32f_C3C1R, 0, 0 00331 }; 00332 00333 static ippiGeneralFunc ippiRGB2GrayC4Tab[] = 00334 { 00335 (ippiGeneralFunc)ippiRGBToGray_8u_AC4C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_AC4C1R, 0, 00336 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0 00337 }; 00338 00339 static ippiGeneralFunc ippiCopyP3C3RTab[] = 00340 { 00341 (ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0, 00342 0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0 00343 }; 00344 00345 static ippiGeneralFunc ippiRGB2XYZTab[] = 00346 { 00347 (ippiGeneralFunc)ippiRGBToXYZ_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToXYZ_16u_C3R, 0, 00348 0, (ippiGeneralFunc)ippiRGBToXYZ_32f_C3R, 0, 0 00349 }; 00350 00351 static ippiGeneralFunc ippiXYZ2RGBTab[] = 00352 { 00353 (ippiGeneralFunc)ippiXYZToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiXYZToRGB_16u_C3R, 0, 00354 0, (ippiGeneralFunc)ippiXYZToRGB_32f_C3R, 0, 0 00355 }; 00356 00357 static ippiGeneralFunc ippiRGB2HSVTab[] = 00358 { 00359 (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0, 00360 0, 0, 0, 0 00361 }; 00362 00363 static ippiGeneralFunc ippiHSV2RGBTab[] = 00364 { 00365 (ippiGeneralFunc)ippiHSVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHSVToRGB_16u_C3R, 0, 00366 0, 0, 0, 0 00367 }; 00368 00369 static ippiGeneralFunc ippiRGB2HLSTab[] = 00370 { 00371 (ippiGeneralFunc)ippiRGBToHLS_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHLS_16u_C3R, 0, 00372 0, (ippiGeneralFunc)ippiRGBToHLS_32f_C3R, 0, 0 00373 }; 00374 00375 static ippiGeneralFunc ippiHLS2RGBTab[] = 00376 { 00377 (ippiGeneralFunc)ippiHLSToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHLSToRGB_16u_C3R, 0, 00378 0, (ippiGeneralFunc)ippiHLSToRGB_32f_C3R, 0, 0 00379 }; 00380 00381 #if !defined(HAVE_IPP_ICV_ONLY) && IPP_DISABLE_BLOCK 00382 static ippiGeneralFunc ippiRGBToLUVTab[] = 00383 { 00384 (ippiGeneralFunc)ippiRGBToLUV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToLUV_16u_C3R, 0, 00385 0, (ippiGeneralFunc)ippiRGBToLUV_32f_C3R, 0, 0 00386 }; 00387 00388 static ippiGeneralFunc ippiLUVToRGBTab[] = 00389 { 00390 (ippiGeneralFunc)ippiLUVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiLUVToRGB_16u_C3R, 0, 00391 0, (ippiGeneralFunc)ippiLUVToRGB_32f_C3R, 0, 0 00392 }; 00393 #endif 00394 00395 struct IPPGeneralFunctor 00396 { 00397 IPPGeneralFunctor(ippiGeneralFunc _func) : func(_func){} 00398 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 00399 { 00400 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0 : false; 00401 } 00402 private: 00403 ippiGeneralFunc func; 00404 }; 00405 00406 struct IPPReorderFunctor 00407 { 00408 IPPReorderFunctor(ippiReorderFunc _func, int _order0, int _order1, int _order2) : func(_func) 00409 { 00410 order[0] = _order0; 00411 order[1] = _order1; 00412 order[2] = _order2; 00413 order[3] = 3; 00414 } 00415 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 00416 { 00417 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0 : false; 00418 } 00419 private: 00420 ippiReorderFunc func; 00421 int order[4]; 00422 }; 00423 00424 struct IPPColor2GrayFunctor 00425 { 00426 IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : 00427 func(_func) 00428 { 00429 coeffs[0] = 0.114f; 00430 coeffs[1] = 0.587f; 00431 coeffs[2] = 0.299f; 00432 } 00433 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 00434 { 00435 return func ? func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0 : false; 00436 } 00437 private: 00438 ippiColor2GrayFunc func; 00439 Ipp32f coeffs[3]; 00440 }; 00441 00442 struct IPPGray2BGRFunctor 00443 { 00444 IPPGray2BGRFunctor(ippiGeneralFunc _func) : 00445 func(_func) 00446 { 00447 } 00448 00449 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 00450 { 00451 if (func == 0) 00452 return false; 00453 00454 const void* srcarray[3] = { src, src, src }; 00455 return func(srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0; 00456 } 00457 private: 00458 ippiGeneralFunc func; 00459 }; 00460 00461 struct IPPGray2BGRAFunctor 00462 { 00463 IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) : 00464 func1(_func1), func2(_func2), depth(_depth) 00465 { 00466 } 00467 00468 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 00469 { 00470 if (func1 == 0 || func2 == 0) 00471 return false; 00472 00473 const void* srcarray[3] = { src, src, src }; 00474 Mat temp(rows, cols, CV_MAKETYPE(depth, 3)); 00475 if(func1(srcarray, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0) 00476 return false; 00477 int order[4] = {0, 1, 2, 3}; 00478 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0; 00479 } 00480 private: 00481 ippiGeneralFunc func1; 00482 ippiReorderFunc func2; 00483 int depth; 00484 }; 00485 00486 struct IPPReorderGeneralFunctor 00487 { 00488 IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) : 00489 func1(_func1), func2(_func2), depth(_depth) 00490 { 00491 order[0] = _order0; 00492 order[1] = _order1; 00493 order[2] = _order2; 00494 order[3] = 3; 00495 } 00496 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 00497 { 00498 if (func1 == 0 || func2 == 0) 00499 return false; 00500 00501 Mat temp; 00502 temp.create(rows, cols, CV_MAKETYPE(depth, 3)); 00503 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows), order) < 0) 00504 return false; 00505 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows)) >= 0; 00506 } 00507 private: 00508 ippiReorderFunc func1; 00509 ippiGeneralFunc func2; 00510 int order[4]; 00511 int depth; 00512 }; 00513 00514 struct IPPGeneralReorderFunctor 00515 { 00516 IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) : 00517 func1(_func1), func2(_func2), depth(_depth) 00518 { 00519 order[0] = _order0; 00520 order[1] = _order1; 00521 order[2] = _order2; 00522 order[3] = 3; 00523 } 00524 bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const 00525 { 00526 if (func1 == 0 || func2 == 0) 00527 return false; 00528 00529 Mat temp; 00530 temp.create(rows, cols, CV_MAKETYPE(depth, 3)); 00531 if(func1(src, srcStep, temp.ptr(), (int)temp.step[0], ippiSize(cols, rows)) < 0) 00532 return false; 00533 return func2(temp.ptr(), (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0; 00534 } 00535 private: 00536 ippiGeneralFunc func1; 00537 ippiReorderFunc func2; 00538 int order[4]; 00539 int depth; 00540 }; 00541 00542 #endif 00543 00544 ////////////////// Various 3/4-channel to 3/4-channel RGB transformations ///////////////// 00545 00546 template<typename _Tp> struct RGB2RGB 00547 { 00548 typedef _Tp channel_type; 00549 00550 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) {} 00551 void operator()(const _Tp* src, _Tp* dst, int n) const 00552 { 00553 int scn = srccn, dcn = dstcn, bidx = blueIdx; 00554 if( dcn == 3 ) 00555 { 00556 n *= 3; 00557 for( int i = 0; i < n; i += 3, src += scn ) 00558 { 00559 _Tp t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; 00560 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2; 00561 } 00562 } 00563 else if( scn == 3 ) 00564 { 00565 n *= 3; 00566 _Tp alpha = ColorChannel<_Tp>::max(); 00567 for( int i = 0; i < n; i += 3, dst += 4 ) 00568 { 00569 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2]; 00570 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha; 00571 } 00572 } 00573 else 00574 { 00575 n *= 4; 00576 for( int i = 0; i < n; i += 4 ) 00577 { 00578 _Tp t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3]; 00579 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3; 00580 } 00581 } 00582 } 00583 00584 int srccn, dstcn, blueIdx; 00585 }; 00586 00587 #if CV_NEON 00588 00589 template<> struct RGB2RGB<uchar> 00590 { 00591 typedef uchar channel_type; 00592 00593 RGB2RGB(int _srccn, int _dstcn, int _blueIdx) : 00594 srccn(_srccn), dstcn(_dstcn), blueIdx(_blueIdx) 00595 { 00596 v_alpha = vdupq_n_u8(ColorChannel<uchar>::max()); 00597 v_alpha2 = vget_low_u8(v_alpha); 00598 } 00599 00600 void operator()(const uchar * src, uchar * dst, int n) const 00601 { 00602 int scn = srccn, dcn = dstcn, bidx = blueIdx, i = 0; 00603 if (dcn == 3) 00604 { 00605 n *= 3; 00606 if (scn == 3) 00607 { 00608 for ( ; i <= n - 48; i += 48, src += 48 ) 00609 { 00610 uint8x16x3_t v_src = vld3q_u8(src), v_dst; 00611 v_dst.val[0] = v_src.val[bidx]; 00612 v_dst.val[1] = v_src.val[1]; 00613 v_dst.val[2] = v_src.val[bidx ^ 2]; 00614 vst3q_u8(dst + i, v_dst); 00615 } 00616 for ( ; i <= n - 24; i += 24, src += 24 ) 00617 { 00618 uint8x8x3_t v_src = vld3_u8(src), v_dst; 00619 v_dst.val[0] = v_src.val[bidx]; 00620 v_dst.val[1] = v_src.val[1]; 00621 v_dst.val[2] = v_src.val[bidx ^ 2]; 00622 vst3_u8(dst + i, v_dst); 00623 } 00624 for ( ; i < n; i += 3, src += 3 ) 00625 { 00626 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; 00627 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2; 00628 } 00629 } 00630 else 00631 { 00632 for ( ; i <= n - 48; i += 48, src += 64 ) 00633 { 00634 uint8x16x4_t v_src = vld4q_u8(src); 00635 uint8x16x3_t v_dst; 00636 v_dst.val[0] = v_src.val[bidx]; 00637 v_dst.val[1] = v_src.val[1]; 00638 v_dst.val[2] = v_src.val[bidx ^ 2]; 00639 vst3q_u8(dst + i, v_dst); 00640 } 00641 for ( ; i <= n - 24; i += 24, src += 32 ) 00642 { 00643 uint8x8x4_t v_src = vld4_u8(src); 00644 uint8x8x3_t v_dst; 00645 v_dst.val[0] = v_src.val[bidx]; 00646 v_dst.val[1] = v_src.val[1]; 00647 v_dst.val[2] = v_src.val[bidx ^ 2]; 00648 vst3_u8(dst + i, v_dst); 00649 } 00650 for ( ; i < n; i += 3, src += 4 ) 00651 { 00652 uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2]; 00653 dst[i] = t0; dst[i+1] = t1; dst[i+2] = t2; 00654 } 00655 } 00656 } 00657 else if (scn == 3) 00658 { 00659 n *= 3; 00660 for ( ; i <= n - 48; i += 48, dst += 64 ) 00661 { 00662 uint8x16x3_t v_src = vld3q_u8(src + i); 00663 uint8x16x4_t v_dst; 00664 v_dst.val[bidx] = v_src.val[0]; 00665 v_dst.val[1] = v_src.val[1]; 00666 v_dst.val[bidx ^ 2] = v_src.val[2]; 00667 v_dst.val[3] = v_alpha; 00668 vst4q_u8(dst, v_dst); 00669 } 00670 for ( ; i <= n - 24; i += 24, dst += 32 ) 00671 { 00672 uint8x8x3_t v_src = vld3_u8(src + i); 00673 uint8x8x4_t v_dst; 00674 v_dst.val[bidx] = v_src.val[0]; 00675 v_dst.val[1] = v_src.val[1]; 00676 v_dst.val[bidx ^ 2] = v_src.val[2]; 00677 v_dst.val[3] = v_alpha2; 00678 vst4_u8(dst, v_dst); 00679 } 00680 uchar alpha = ColorChannel<uchar>::max(); 00681 for (; i < n; i += 3, dst += 4 ) 00682 { 00683 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2]; 00684 dst[bidx] = t0; dst[1] = t1; dst[bidx^2] = t2; dst[3] = alpha; 00685 } 00686 } 00687 else 00688 { 00689 n *= 4; 00690 for ( ; i <= n - 64; i += 64 ) 00691 { 00692 uint8x16x4_t v_src = vld4q_u8(src + i), v_dst; 00693 v_dst.val[0] = v_src.val[2]; 00694 v_dst.val[1] = v_src.val[1]; 00695 v_dst.val[2] = v_src.val[0]; 00696 v_dst.val[3] = v_src.val[3]; 00697 vst4q_u8(dst + i, v_dst); 00698 } 00699 for ( ; i <= n - 32; i += 32 ) 00700 { 00701 uint8x8x4_t v_src = vld4_u8(src + i), v_dst; 00702 v_dst.val[0] = v_src.val[2]; 00703 v_dst.val[1] = v_src.val[1]; 00704 v_dst.val[2] = v_src.val[0]; 00705 v_dst.val[3] = v_src.val[3]; 00706 vst4_u8(dst + i, v_dst); 00707 } 00708 for ( ; i < n; i += 4) 00709 { 00710 uchar t0 = src[i], t1 = src[i+1], t2 = src[i+2], t3 = src[i+3]; 00711 dst[i] = t2; dst[i+1] = t1; dst[i+2] = t0; dst[i+3] = t3; 00712 } 00713 } 00714 } 00715 00716 int srccn, dstcn, blueIdx; 00717 00718 uint8x16_t v_alpha; 00719 uint8x8_t v_alpha2; 00720 }; 00721 00722 #endif 00723 00724 /////////// Transforming 16-bit (565 or 555) RGB to/from 24/32-bit (888[8]) RGB ////////// 00725 00726 struct RGB5x52RGB 00727 { 00728 typedef uchar channel_type; 00729 00730 RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits) 00731 : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits) 00732 { 00733 #if CV_NEON 00734 v_n3 = vdupq_n_u16(~3); 00735 v_n7 = vdupq_n_u16(~7); 00736 v_255 = vdupq_n_u8(255); 00737 v_0 = vdupq_n_u8(0); 00738 v_mask = vdupq_n_u16(0x8000); 00739 #endif 00740 } 00741 00742 void operator()(const uchar* src, uchar* dst, int n) const 00743 { 00744 int dcn = dstcn, bidx = blueIdx, i = 0; 00745 if( greenBits == 6 ) 00746 { 00747 #if CV_NEON 00748 for ( ; i <= n - 16; i += 16, dst += dcn * 16) 00749 { 00750 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8); 00751 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3))); 00752 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 3), v_n3)), 00753 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 3), v_n3))); 00754 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 8), v_n7)), 00755 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 8), v_n7))); 00756 if (dcn == 3) 00757 { 00758 uint8x16x3_t v_dst; 00759 v_dst.val[bidx] = v_b; 00760 v_dst.val[1] = v_g; 00761 v_dst.val[bidx^2] = v_r; 00762 vst3q_u8(dst, v_dst); 00763 } 00764 else 00765 { 00766 uint8x16x4_t v_dst; 00767 v_dst.val[bidx] = v_b; 00768 v_dst.val[1] = v_g; 00769 v_dst.val[bidx^2] = v_r; 00770 v_dst.val[3] = v_255; 00771 vst4q_u8(dst, v_dst); 00772 } 00773 } 00774 #endif 00775 for( ; i < n; i++, dst += dcn ) 00776 { 00777 unsigned t = ((const ushort*)src)[i]; 00778 dst[bidx] = (uchar)(t << 3); 00779 dst[1] = (uchar)((t >> 3) & ~3); 00780 dst[bidx ^ 2] = (uchar)((t >> 8) & ~7); 00781 if( dcn == 4 ) 00782 dst[3] = 255; 00783 } 00784 } 00785 else 00786 { 00787 #if CV_NEON 00788 for ( ; i <= n - 16; i += 16, dst += dcn * 16) 00789 { 00790 uint16x8_t v_src0 = vld1q_u16((const ushort *)src + i), v_src1 = vld1q_u16((const ushort *)src + i + 8); 00791 uint8x16_t v_b = vcombine_u8(vmovn_u16(vshlq_n_u16(v_src0, 3)), vmovn_u16(vshlq_n_u16(v_src1, 3))); 00792 uint8x16_t v_g = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 2), v_n7)), 00793 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 2), v_n7))); 00794 uint8x16_t v_r = vcombine_u8(vmovn_u16(vandq_u16(vshrq_n_u16(v_src0, 7), v_n7)), 00795 vmovn_u16(vandq_u16(vshrq_n_u16(v_src1, 7), v_n7))); 00796 if (dcn == 3) 00797 { 00798 uint8x16x3_t v_dst; 00799 v_dst.val[bidx] = v_b; 00800 v_dst.val[1] = v_g; 00801 v_dst.val[bidx^2] = v_r; 00802 vst3q_u8(dst, v_dst); 00803 } 00804 else 00805 { 00806 uint8x16x4_t v_dst; 00807 v_dst.val[bidx] = v_b; 00808 v_dst.val[1] = v_g; 00809 v_dst.val[bidx^2] = v_r; 00810 v_dst.val[3] = vbslq_u8(vcombine_u8(vqmovn_u16(vandq_u16(v_src0, v_mask)), 00811 vqmovn_u16(vandq_u16(v_src1, v_mask))), v_255, v_0); 00812 vst4q_u8(dst, v_dst); 00813 } 00814 } 00815 #endif 00816 for( ; i < n; i++, dst += dcn ) 00817 { 00818 unsigned t = ((const ushort*)src)[i]; 00819 dst[bidx] = (uchar)(t << 3); 00820 dst[1] = (uchar)((t >> 2) & ~7); 00821 dst[bidx ^ 2] = (uchar)((t >> 7) & ~7); 00822 if( dcn == 4 ) 00823 dst[3] = t & 0x8000 ? 255 : 0; 00824 } 00825 } 00826 } 00827 00828 int dstcn, blueIdx, greenBits; 00829 #if CV_NEON 00830 uint16x8_t v_n3, v_n7, v_mask; 00831 uint8x16_t v_255, v_0; 00832 #endif 00833 }; 00834 00835 00836 struct RGB2RGB5x5 00837 { 00838 typedef uchar channel_type; 00839 00840 RGB2RGB5x5(int _srccn, int _blueIdx, int _greenBits) 00841 : srccn(_srccn), blueIdx(_blueIdx), greenBits(_greenBits) 00842 { 00843 #if CV_NEON 00844 v_n3 = vdup_n_u8(~3); 00845 v_n7 = vdup_n_u8(~7); 00846 v_mask = vdupq_n_u16(0x8000); 00847 v_0 = vdupq_n_u16(0); 00848 v_full = vdupq_n_u16(0xffff); 00849 #endif 00850 } 00851 00852 void operator()(const uchar* src, uchar* dst, int n) const 00853 { 00854 int scn = srccn, bidx = blueIdx, i = 0; 00855 if (greenBits == 6) 00856 { 00857 if (scn == 3) 00858 { 00859 #if CV_NEON 00860 for ( ; i <= n - 8; i += 8, src += 24 ) 00861 { 00862 uint8x8x3_t v_src = vld3_u8(src); 00863 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); 00864 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3)); 00865 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8)); 00866 vst1q_u16((ushort *)dst + i, v_dst); 00867 } 00868 #endif 00869 for ( ; i < n; i++, src += 3 ) 00870 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8)); 00871 } 00872 else 00873 { 00874 #if CV_NEON 00875 for ( ; i <= n - 8; i += 8, src += 32 ) 00876 { 00877 uint8x8x4_t v_src = vld4_u8(src); 00878 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); 00879 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n3)), 3)); 00880 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 8)); 00881 vst1q_u16((ushort *)dst + i, v_dst); 00882 } 00883 #endif 00884 for ( ; i < n; i++, src += 4 ) 00885 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~3) << 3)|((src[bidx^2]&~7) << 8)); 00886 } 00887 } 00888 else if (scn == 3) 00889 { 00890 #if CV_NEON 00891 for ( ; i <= n - 8; i += 8, src += 24 ) 00892 { 00893 uint8x8x3_t v_src = vld3_u8(src); 00894 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); 00895 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2)); 00896 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7)); 00897 vst1q_u16((ushort *)dst + i, v_dst); 00898 } 00899 #endif 00900 for ( ; i < n; i++, src += 3 ) 00901 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)|((src[bidx^2]&~7) << 7)); 00902 } 00903 else 00904 { 00905 #if CV_NEON 00906 for ( ; i <= n - 8; i += 8, src += 32 ) 00907 { 00908 uint8x8x4_t v_src = vld4_u8(src); 00909 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src.val[bidx], 3)); 00910 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[1], v_n7)), 2)); 00911 v_dst = vorrq_u16(v_dst, vorrq_u16(vshlq_n_u16(vmovl_u8(vand_u8(v_src.val[bidx^2], v_n7)), 7), 00912 vbslq_u16(veorq_u16(vceqq_u16(vmovl_u8(v_src.val[3]), v_0), v_full), v_mask, v_0))); 00913 vst1q_u16((ushort *)dst + i, v_dst); 00914 } 00915 #endif 00916 for ( ; i < n; i++, src += 4 ) 00917 ((ushort*)dst)[i] = (ushort)((src[bidx] >> 3)|((src[1]&~7) << 2)| 00918 ((src[bidx^2]&~7) << 7)|(src[3] ? 0x8000 : 0)); 00919 } 00920 } 00921 00922 int srccn, blueIdx, greenBits; 00923 #if CV_NEON 00924 uint8x8_t v_n3, v_n7; 00925 uint16x8_t v_mask, v_0, v_full; 00926 #endif 00927 }; 00928 00929 ///////////////////////////////// Color to/from Grayscale //////////////////////////////// 00930 00931 template<typename _Tp> 00932 struct Gray2RGB 00933 { 00934 typedef _Tp channel_type; 00935 00936 Gray2RGB(int _dstcn) : dstcn(_dstcn) {} 00937 void operator()(const _Tp* src, _Tp* dst, int n) const 00938 { 00939 if( dstcn == 3 ) 00940 for( int i = 0; i < n; i++, dst += 3 ) 00941 { 00942 dst[0] = dst[1] = dst[2] = src[i]; 00943 } 00944 else 00945 { 00946 _Tp alpha = ColorChannel<_Tp>::max(); 00947 for( int i = 0; i < n; i++, dst += 4 ) 00948 { 00949 dst[0] = dst[1] = dst[2] = src[i]; 00950 dst[3] = alpha; 00951 } 00952 } 00953 } 00954 00955 int dstcn; 00956 }; 00957 00958 00959 struct Gray2RGB5x5 00960 { 00961 typedef uchar channel_type; 00962 00963 Gray2RGB5x5(int _greenBits) : greenBits(_greenBits) 00964 { 00965 #if CV_NEON 00966 v_n7 = vdup_n_u8(~7); 00967 v_n3 = vdup_n_u8(~3); 00968 #elif CV_SSE2 00969 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 00970 v_n7 = _mm_set1_epi16(~7); 00971 v_n3 = _mm_set1_epi16(~3); 00972 v_zero = _mm_setzero_si128(); 00973 #endif 00974 } 00975 00976 void operator()(const uchar* src, uchar* dst, int n) const 00977 { 00978 int i = 0; 00979 if( greenBits == 6 ) 00980 { 00981 #if CV_NEON 00982 for ( ; i <= n - 8; i += 8 ) 00983 { 00984 uint8x8_t v_src = vld1_u8(src + i); 00985 uint16x8_t v_dst = vmovl_u8(vshr_n_u8(v_src, 3)); 00986 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n3)), 3)); 00987 v_dst = vorrq_u16(v_dst, vshlq_n_u16(vmovl_u8(vand_u8(v_src, v_n7)), 8)); 00988 vst1q_u16((ushort *)dst + i, v_dst); 00989 } 00990 #elif CV_SSE2 00991 if (haveSIMD) 00992 { 00993 for ( ; i <= n - 16; i += 16 ) 00994 { 00995 __m128i v_src = _mm_loadu_si128((__m128i const *)(src + i)); 00996 00997 __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero); 00998 __m128i v_dst = _mm_or_si128(_mm_srli_epi16(v_src_p, 3), 00999 _mm_or_si128(_mm_slli_epi16(_mm_and_si128(v_src_p, v_n3), 3), 01000 _mm_slli_epi16(_mm_and_si128(v_src_p, v_n7), 8))); 01001 _mm_storeu_si128((__m128i *)((ushort *)dst + i), v_dst); 01002 01003 v_src_p = _mm_unpackhi_epi8(v_src, v_zero); 01004 v_dst = _mm_or_si128(_mm_srli_epi16(v_src_p, 3), 01005 _mm_or_si128(_mm_slli_epi16(_mm_and_si128(v_src_p, v_n3), 3), 01006 _mm_slli_epi16(_mm_and_si128(v_src_p, v_n7), 8))); 01007 _mm_storeu_si128((__m128i *)((ushort *)dst + i + 8), v_dst); 01008 } 01009 } 01010 #endif 01011 for ( ; i < n; i++ ) 01012 { 01013 int t = src[i]; 01014 ((ushort*)dst)[i] = (ushort)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8)); 01015 } 01016 } 01017 else 01018 { 01019 #if CV_NEON 01020 for ( ; i <= n - 8; i += 8 ) 01021 { 01022 uint16x8_t v_src = vmovl_u8(vshr_n_u8(vld1_u8(src + i), 3)); 01023 uint16x8_t v_dst = vorrq_u16(vorrq_u16(v_src, vshlq_n_u16(v_src, 5)), vshlq_n_u16(v_src, 10)); 01024 vst1q_u16((ushort *)dst + i, v_dst); 01025 } 01026 #elif CV_SSE2 01027 if (haveSIMD) 01028 { 01029 for ( ; i <= n - 16; i += 8 ) 01030 { 01031 __m128i v_src = _mm_loadu_si128((__m128i const *)(src + i)); 01032 01033 __m128i v_src_p = _mm_srli_epi16(_mm_unpacklo_epi8(v_src, v_zero), 3); 01034 __m128i v_dst = _mm_or_si128(v_src_p, 01035 _mm_or_si128(_mm_slli_epi32(v_src_p, 5), 01036 _mm_slli_epi16(v_src_p, 10))); 01037 _mm_storeu_si128((__m128i *)((ushort *)dst + i), v_dst); 01038 01039 v_src_p = _mm_srli_epi16(_mm_unpackhi_epi8(v_src, v_zero), 3); 01040 v_dst = _mm_or_si128(v_src_p, 01041 _mm_or_si128(_mm_slli_epi16(v_src_p, 5), 01042 _mm_slli_epi16(v_src_p, 10))); 01043 _mm_storeu_si128((__m128i *)((ushort *)dst + i + 8), v_dst); 01044 } 01045 } 01046 #endif 01047 for( ; i < n; i++ ) 01048 { 01049 int t = src[i] >> 3; 01050 ((ushort*)dst)[i] = (ushort)(t|(t << 5)|(t << 10)); 01051 } 01052 } 01053 } 01054 int greenBits; 01055 01056 #if CV_NEON 01057 uint8x8_t v_n7, v_n3; 01058 #elif CV_SSE2 01059 __m128i v_n7, v_n3, v_zero; 01060 bool haveSIMD; 01061 #endif 01062 }; 01063 01064 01065 #undef R2Y 01066 #undef G2Y 01067 #undef B2Y 01068 01069 enum 01070 { 01071 yuv_shift = 14, 01072 xyz_shift = 12, 01073 R2Y = 4899, 01074 G2Y = 9617, 01075 B2Y = 1868, 01076 BLOCK_SIZE = 256 01077 }; 01078 01079 01080 struct RGB5x52Gray 01081 { 01082 typedef uchar channel_type; 01083 01084 RGB5x52Gray(int _greenBits) : greenBits(_greenBits) 01085 { 01086 #if CV_NEON 01087 v_b2y = vdup_n_u16(B2Y); 01088 v_g2y = vdup_n_u16(G2Y); 01089 v_r2y = vdup_n_u16(R2Y); 01090 v_delta = vdupq_n_u32(1 << (yuv_shift - 1)); 01091 v_f8 = vdupq_n_u16(0xf8); 01092 v_fc = vdupq_n_u16(0xfc); 01093 #elif CV_SSE2 01094 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 01095 v_b2y = _mm_set1_epi16(B2Y); 01096 v_g2y = _mm_set1_epi16(G2Y); 01097 v_r2y = _mm_set1_epi16(R2Y); 01098 v_delta = _mm_set1_epi32(1 << (yuv_shift - 1)); 01099 v_f8 = _mm_set1_epi16(0xf8); 01100 v_fc = _mm_set1_epi16(0xfc); 01101 #endif 01102 } 01103 01104 void operator()(const uchar* src, uchar* dst, int n) const 01105 { 01106 int i = 0; 01107 if( greenBits == 6 ) 01108 { 01109 #if CV_NEON 01110 for ( ; i <= n - 8; i += 8) 01111 { 01112 uint16x8_t v_src = vld1q_u16((ushort *)src + i); 01113 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8), 01114 v_t1 = vandq_u16(vshrq_n_u16(v_src, 3), v_fc), 01115 v_t2 = vandq_u16(vshrq_n_u16(v_src, 8), v_f8); 01116 01117 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y), 01118 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y); 01119 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y), 01120 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y); 01121 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift); 01122 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift); 01123 01124 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1)))); 01125 } 01126 #elif CV_SSE2 01127 if (haveSIMD) 01128 { 01129 __m128i v_zero = _mm_setzero_si128(); 01130 01131 for ( ; i <= n - 8; i += 8) 01132 { 01133 __m128i v_src = _mm_loadu_si128((__m128i const *)((ushort *)src + i)); 01134 __m128i v_t0 = _mm_and_si128(_mm_slli_epi16(v_src, 3), v_f8), 01135 v_t1 = _mm_and_si128(_mm_srli_epi16(v_src, 3), v_fc), 01136 v_t2 = _mm_and_si128(_mm_srli_epi16(v_src, 8), v_f8); 01137 01138 __m128i v_mullo_b = _mm_mullo_epi16(v_t0, v_b2y); 01139 __m128i v_mullo_g = _mm_mullo_epi16(v_t1, v_g2y); 01140 __m128i v_mullo_r = _mm_mullo_epi16(v_t2, v_r2y); 01141 __m128i v_mulhi_b = _mm_mulhi_epi16(v_t0, v_b2y); 01142 __m128i v_mulhi_g = _mm_mulhi_epi16(v_t1, v_g2y); 01143 __m128i v_mulhi_r = _mm_mulhi_epi16(v_t2, v_r2y); 01144 01145 __m128i v_dst0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_b, v_mulhi_b), 01146 _mm_unpacklo_epi16(v_mullo_g, v_mulhi_g)); 01147 v_dst0 = _mm_add_epi32(_mm_add_epi32(v_dst0, v_delta), 01148 _mm_unpacklo_epi16(v_mullo_r, v_mulhi_r)); 01149 01150 __m128i v_dst1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_b, v_mulhi_b), 01151 _mm_unpackhi_epi16(v_mullo_g, v_mulhi_g)); 01152 v_dst1 = _mm_add_epi32(_mm_add_epi32(v_dst1, v_delta), 01153 _mm_unpackhi_epi16(v_mullo_r, v_mulhi_r)); 01154 01155 v_dst0 = _mm_srli_epi32(v_dst0, yuv_shift); 01156 v_dst1 = _mm_srli_epi32(v_dst1, yuv_shift); 01157 01158 __m128i v_dst = _mm_packs_epi32(v_dst0, v_dst1); 01159 _mm_storel_epi64((__m128i *)(dst + i), _mm_packus_epi16(v_dst, v_zero)); 01160 } 01161 } 01162 #endif 01163 for ( ; i < n; i++) 01164 { 01165 int t = ((ushort*)src)[i]; 01166 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y + 01167 ((t >> 3) & 0xfc)*G2Y + 01168 ((t >> 8) & 0xf8)*R2Y, yuv_shift); 01169 } 01170 } 01171 else 01172 { 01173 #if CV_NEON 01174 for ( ; i <= n - 8; i += 8) 01175 { 01176 uint16x8_t v_src = vld1q_u16((ushort *)src + i); 01177 uint16x8_t v_t0 = vandq_u16(vshlq_n_u16(v_src, 3), v_f8), 01178 v_t1 = vandq_u16(vshrq_n_u16(v_src, 2), v_f8), 01179 v_t2 = vandq_u16(vshrq_n_u16(v_src, 7), v_f8); 01180 01181 uint32x4_t v_dst0 = vmlal_u16(vmlal_u16(vmull_u16(vget_low_u16(v_t0), v_b2y), 01182 vget_low_u16(v_t1), v_g2y), vget_low_u16(v_t2), v_r2y); 01183 uint32x4_t v_dst1 = vmlal_u16(vmlal_u16(vmull_u16(vget_high_u16(v_t0), v_b2y), 01184 vget_high_u16(v_t1), v_g2y), vget_high_u16(v_t2), v_r2y); 01185 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_delta), yuv_shift); 01186 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_delta), yuv_shift); 01187 01188 vst1_u8(dst + i, vmovn_u16(vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1)))); 01189 } 01190 #elif CV_SSE2 01191 if (haveSIMD) 01192 { 01193 __m128i v_zero = _mm_setzero_si128(); 01194 01195 for ( ; i <= n - 8; i += 8) 01196 { 01197 __m128i v_src = _mm_loadu_si128((__m128i const *)((ushort *)src + i)); 01198 __m128i v_t0 = _mm_and_si128(_mm_slli_epi16(v_src, 3), v_f8), 01199 v_t1 = _mm_and_si128(_mm_srli_epi16(v_src, 2), v_f8), 01200 v_t2 = _mm_and_si128(_mm_srli_epi16(v_src, 7), v_f8); 01201 01202 __m128i v_mullo_b = _mm_mullo_epi16(v_t0, v_b2y); 01203 __m128i v_mullo_g = _mm_mullo_epi16(v_t1, v_g2y); 01204 __m128i v_mullo_r = _mm_mullo_epi16(v_t2, v_r2y); 01205 __m128i v_mulhi_b = _mm_mulhi_epi16(v_t0, v_b2y); 01206 __m128i v_mulhi_g = _mm_mulhi_epi16(v_t1, v_g2y); 01207 __m128i v_mulhi_r = _mm_mulhi_epi16(v_t2, v_r2y); 01208 01209 __m128i v_dst0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_b, v_mulhi_b), 01210 _mm_unpacklo_epi16(v_mullo_g, v_mulhi_g)); 01211 v_dst0 = _mm_add_epi32(_mm_add_epi32(v_dst0, v_delta), 01212 _mm_unpacklo_epi16(v_mullo_r, v_mulhi_r)); 01213 01214 __m128i v_dst1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_b, v_mulhi_b), 01215 _mm_unpackhi_epi16(v_mullo_g, v_mulhi_g)); 01216 v_dst1 = _mm_add_epi32(_mm_add_epi32(v_dst1, v_delta), 01217 _mm_unpackhi_epi16(v_mullo_r, v_mulhi_r)); 01218 01219 v_dst0 = _mm_srli_epi32(v_dst0, yuv_shift); 01220 v_dst1 = _mm_srli_epi32(v_dst1, yuv_shift); 01221 01222 __m128i v_dst = _mm_packs_epi32(v_dst0, v_dst1); 01223 _mm_storel_epi64((__m128i *)(dst + i), _mm_packus_epi16(v_dst, v_zero)); 01224 } 01225 } 01226 #endif 01227 for ( ; i < n; i++) 01228 { 01229 int t = ((ushort*)src)[i]; 01230 dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y + 01231 ((t >> 2) & 0xf8)*G2Y + 01232 ((t >> 7) & 0xf8)*R2Y, yuv_shift); 01233 } 01234 } 01235 } 01236 int greenBits; 01237 01238 #if CV_NEON 01239 uint16x4_t v_b2y, v_g2y, v_r2y; 01240 uint32x4_t v_delta; 01241 uint16x8_t v_f8, v_fc; 01242 #elif CV_SSE2 01243 bool haveSIMD; 01244 __m128i v_b2y, v_g2y, v_r2y; 01245 __m128i v_delta; 01246 __m128i v_f8, v_fc; 01247 #endif 01248 }; 01249 01250 01251 template<typename _Tp> struct RGB2Gray 01252 { 01253 typedef _Tp channel_type; 01254 01255 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 01256 { 01257 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f }; 01258 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) ); 01259 if(blueIdx == 0) 01260 std::swap(coeffs[0], coeffs[2]); 01261 } 01262 01263 void operator()(const _Tp* src, _Tp* dst, int n) const 01264 { 01265 int scn = srccn; 01266 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2]; 01267 for(int i = 0; i < n; i++, src += scn) 01268 dst[i] = saturate_cast<_Tp>(src[0]*cb + src[1]*cg + src[2]*cr); 01269 } 01270 int srccn; 01271 float coeffs[3]; 01272 }; 01273 01274 template<> struct RGB2Gray<uchar> 01275 { 01276 typedef uchar channel_type; 01277 01278 RGB2Gray(int _srccn, int blueIdx, const int* coeffs) : srccn(_srccn) 01279 { 01280 const int coeffs0[] = { R2Y, G2Y, B2Y }; 01281 if(!coeffs) coeffs = coeffs0; 01282 01283 int b = 0, g = 0, r = (1 << (yuv_shift-1)); 01284 int db = coeffs[blueIdx^2], dg = coeffs[1], dr = coeffs[blueIdx]; 01285 01286 for( int i = 0; i < 256; i++, b += db, g += dg, r += dr ) 01287 { 01288 tab[i] = b; 01289 tab[i+256] = g; 01290 tab[i+512] = r; 01291 } 01292 } 01293 void operator()(const uchar* src, uchar* dst, int n) const 01294 { 01295 int scn = srccn; 01296 const int* _tab = tab; 01297 for(int i = 0; i < n; i++, src += scn) 01298 dst[i] = (uchar)((_tab[src[0]] + _tab[src[1]+256] + _tab[src[2]+512]) >> yuv_shift); 01299 } 01300 int srccn; 01301 int tab[256*3]; 01302 }; 01303 01304 #if CV_NEON 01305 01306 template <> 01307 struct RGB2Gray<ushort> 01308 { 01309 typedef ushort channel_type; 01310 01311 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : 01312 srccn(_srccn) 01313 { 01314 static const int coeffs0[] = { R2Y, G2Y, B2Y }; 01315 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0])); 01316 if( blueIdx == 0 ) 01317 std::swap(coeffs[0], coeffs[2]); 01318 01319 v_cb = vdup_n_u16(coeffs[0]); 01320 v_cg = vdup_n_u16(coeffs[1]); 01321 v_cr = vdup_n_u16(coeffs[2]); 01322 v_delta = vdupq_n_u32(1 << (yuv_shift - 1)); 01323 } 01324 01325 void operator()(const ushort* src, ushort* dst, int n) const 01326 { 01327 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2], i = 0; 01328 01329 for ( ; i <= n - 8; i += 8, src += scn * 8) 01330 { 01331 uint16x8_t v_b, v_r, v_g; 01332 if (scn == 3) 01333 { 01334 uint16x8x3_t v_src = vld3q_u16(src); 01335 v_b = v_src.val[0]; 01336 v_g = v_src.val[1]; 01337 v_r = v_src.val[2]; 01338 } 01339 else 01340 { 01341 uint16x8x4_t v_src = vld4q_u16(src); 01342 v_b = v_src.val[0]; 01343 v_g = v_src.val[1]; 01344 v_r = v_src.val[2]; 01345 } 01346 01347 uint32x4_t v_dst0_ = vmlal_u16(vmlal_u16( 01348 vmull_u16(vget_low_u16(v_b), v_cb), 01349 vget_low_u16(v_g), v_cg), 01350 vget_low_u16(v_r), v_cr); 01351 uint32x4_t v_dst1_ = vmlal_u16(vmlal_u16( 01352 vmull_u16(vget_high_u16(v_b), v_cb), 01353 vget_high_u16(v_g), v_cg), 01354 vget_high_u16(v_r), v_cr); 01355 01356 uint16x4_t v_dst0 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst0_, v_delta), yuv_shift)); 01357 uint16x4_t v_dst1 = vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst1_, v_delta), yuv_shift)); 01358 01359 vst1q_u16(dst + i, vcombine_u16(v_dst0, v_dst1)); 01360 } 01361 01362 for ( ; i <= n - 4; i += 4, src += scn * 4) 01363 { 01364 uint16x4_t v_b, v_r, v_g; 01365 if (scn == 3) 01366 { 01367 uint16x4x3_t v_src = vld3_u16(src); 01368 v_b = v_src.val[0]; 01369 v_g = v_src.val[1]; 01370 v_r = v_src.val[2]; 01371 } 01372 else 01373 { 01374 uint16x4x4_t v_src = vld4_u16(src); 01375 v_b = v_src.val[0]; 01376 v_g = v_src.val[1]; 01377 v_r = v_src.val[2]; 01378 } 01379 01380 uint32x4_t v_dst = vmlal_u16(vmlal_u16( 01381 vmull_u16(v_b, v_cb), 01382 v_g, v_cg), 01383 v_r, v_cr); 01384 01385 vst1_u16(dst + i, vmovn_u32(vshrq_n_u32(vaddq_u32(v_dst, v_delta), yuv_shift))); 01386 } 01387 01388 for( ; i < n; i++, src += scn) 01389 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift); 01390 } 01391 01392 int srccn, coeffs[3]; 01393 uint16x4_t v_cb, v_cg, v_cr; 01394 uint32x4_t v_delta; 01395 }; 01396 01397 template <> 01398 struct RGB2Gray<float> 01399 { 01400 typedef float channel_type; 01401 01402 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 01403 { 01404 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f }; 01405 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) ); 01406 if(blueIdx == 0) 01407 std::swap(coeffs[0], coeffs[2]); 01408 01409 v_cb = vdupq_n_f32(coeffs[0]); 01410 v_cg = vdupq_n_f32(coeffs[1]); 01411 v_cr = vdupq_n_f32(coeffs[2]); 01412 } 01413 01414 void operator()(const float * src, float * dst, int n) const 01415 { 01416 int scn = srccn, i = 0; 01417 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2]; 01418 01419 if (scn == 3) 01420 { 01421 for ( ; i <= n - 8; i += 8, src += scn * 8) 01422 { 01423 float32x4x3_t v_src = vld3q_f32(src); 01424 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 01425 01426 v_src = vld3q_f32(src + scn * 4); 01427 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 01428 } 01429 01430 for ( ; i <= n - 4; i += 4, src += scn * 4) 01431 { 01432 float32x4x3_t v_src = vld3q_f32(src); 01433 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 01434 } 01435 } 01436 else 01437 { 01438 for ( ; i <= n - 8; i += 8, src += scn * 8) 01439 { 01440 float32x4x4_t v_src = vld4q_f32(src); 01441 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 01442 01443 v_src = vld4q_f32(src + scn * 4); 01444 vst1q_f32(dst + i + 4, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 01445 } 01446 01447 for ( ; i <= n - 4; i += 4, src += scn * 4) 01448 { 01449 float32x4x4_t v_src = vld4q_f32(src); 01450 vst1q_f32(dst + i, vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_cb), v_src.val[1], v_cg), v_src.val[2], v_cr)); 01451 } 01452 } 01453 01454 for ( ; i < n; i++, src += scn) 01455 dst[i] = src[0]*cb + src[1]*cg + src[2]*cr; 01456 } 01457 01458 int srccn; 01459 float coeffs[3]; 01460 float32x4_t v_cb, v_cg, v_cr; 01461 }; 01462 01463 #elif CV_SSE2 01464 01465 #if CV_SSE4_1 01466 01467 template <> 01468 struct RGB2Gray<ushort> 01469 { 01470 typedef ushort channel_type; 01471 01472 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : 01473 srccn(_srccn) 01474 { 01475 static const int coeffs0[] = { R2Y, G2Y, B2Y }; 01476 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0])); 01477 if( blueIdx == 0 ) 01478 std::swap(coeffs[0], coeffs[2]); 01479 01480 v_cb = _mm_set1_epi16((short)coeffs[0]); 01481 v_cg = _mm_set1_epi16((short)coeffs[1]); 01482 v_cr = _mm_set1_epi16((short)coeffs[2]); 01483 v_delta = _mm_set1_epi32(1 << (yuv_shift - 1)); 01484 01485 haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1); 01486 } 01487 01488 // 16s x 8 01489 void process(__m128i v_b, __m128i v_g, __m128i v_r, 01490 __m128i & v_gray) const 01491 { 01492 __m128i v_mullo_r = _mm_mullo_epi16(v_r, v_cr); 01493 __m128i v_mullo_g = _mm_mullo_epi16(v_g, v_cg); 01494 __m128i v_mullo_b = _mm_mullo_epi16(v_b, v_cb); 01495 __m128i v_mulhi_r = _mm_mulhi_epu16(v_r, v_cr); 01496 __m128i v_mulhi_g = _mm_mulhi_epu16(v_g, v_cg); 01497 __m128i v_mulhi_b = _mm_mulhi_epu16(v_b, v_cb); 01498 01499 __m128i v_gray0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_r, v_mulhi_r), 01500 _mm_unpacklo_epi16(v_mullo_g, v_mulhi_g)); 01501 v_gray0 = _mm_add_epi32(_mm_unpacklo_epi16(v_mullo_b, v_mulhi_b), v_gray0); 01502 v_gray0 = _mm_srli_epi32(_mm_add_epi32(v_gray0, v_delta), yuv_shift); 01503 01504 __m128i v_gray1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_r, v_mulhi_r), 01505 _mm_unpackhi_epi16(v_mullo_g, v_mulhi_g)); 01506 v_gray1 = _mm_add_epi32(_mm_unpackhi_epi16(v_mullo_b, v_mulhi_b), v_gray1); 01507 v_gray1 = _mm_srli_epi32(_mm_add_epi32(v_gray1, v_delta), yuv_shift); 01508 01509 v_gray = _mm_packus_epi32(v_gray0, v_gray1); 01510 } 01511 01512 void operator()(const ushort* src, ushort* dst, int n) const 01513 { 01514 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2], i = 0; 01515 01516 if (scn == 3 && haveSIMD) 01517 { 01518 for ( ; i <= n - 16; i += 16, src += scn * 16) 01519 { 01520 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src)); 01521 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 8)); 01522 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 16)); 01523 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 24)); 01524 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 32)); 01525 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 40)); 01526 01527 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 01528 01529 __m128i v_gray0; 01530 process(v_r0, v_g0, v_b0, 01531 v_gray0); 01532 01533 __m128i v_gray1; 01534 process(v_r1, v_g1, v_b1, 01535 v_gray1); 01536 01537 _mm_storeu_si128((__m128i *)(dst + i), v_gray0); 01538 _mm_storeu_si128((__m128i *)(dst + i + 8), v_gray1); 01539 } 01540 } 01541 else if (scn == 4 && haveSIMD) 01542 { 01543 for ( ; i <= n - 16; i += 16, src += scn * 16) 01544 { 01545 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src)); 01546 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 8)); 01547 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 16)); 01548 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 24)); 01549 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 32)); 01550 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 40)); 01551 __m128i v_a0 = _mm_loadu_si128((__m128i const *)(src + 48)); 01552 __m128i v_a1 = _mm_loadu_si128((__m128i const *)(src + 56)); 01553 01554 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1, v_a0, v_a1); 01555 01556 __m128i v_gray0; 01557 process(v_r0, v_g0, v_b0, 01558 v_gray0); 01559 01560 __m128i v_gray1; 01561 process(v_r1, v_g1, v_b1, 01562 v_gray1); 01563 01564 _mm_storeu_si128((__m128i *)(dst + i), v_gray0); 01565 _mm_storeu_si128((__m128i *)(dst + i + 8), v_gray1); 01566 } 01567 } 01568 01569 for( ; i < n; i++, src += scn) 01570 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift); 01571 } 01572 01573 int srccn, coeffs[3]; 01574 __m128i v_cb, v_cg, v_cr; 01575 __m128i v_delta; 01576 bool haveSIMD; 01577 }; 01578 01579 #endif // CV_SSE4_1 01580 01581 template <> 01582 struct RGB2Gray<float> 01583 { 01584 typedef float channel_type; 01585 01586 RGB2Gray(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 01587 { 01588 static const float coeffs0[] = { 0.299f, 0.587f, 0.114f }; 01589 memcpy( coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0]) ); 01590 if(blueIdx == 0) 01591 std::swap(coeffs[0], coeffs[2]); 01592 01593 v_cb = _mm_set1_ps(coeffs[0]); 01594 v_cg = _mm_set1_ps(coeffs[1]); 01595 v_cr = _mm_set1_ps(coeffs[2]); 01596 01597 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 01598 } 01599 01600 void process(__m128 v_b, __m128 v_g, __m128 v_r, 01601 __m128 & v_gray) const 01602 { 01603 v_gray = _mm_mul_ps(v_r, v_cr); 01604 v_gray = _mm_add_ps(v_gray, _mm_mul_ps(v_g, v_cg)); 01605 v_gray = _mm_add_ps(v_gray, _mm_mul_ps(v_b, v_cb)); 01606 } 01607 01608 void operator()(const float * src, float * dst, int n) const 01609 { 01610 int scn = srccn, i = 0; 01611 float cb = coeffs[0], cg = coeffs[1], cr = coeffs[2]; 01612 01613 if (scn == 3 && haveSIMD) 01614 { 01615 for ( ; i <= n - 8; i += 8, src += scn * 8) 01616 { 01617 __m128 v_r0 = _mm_loadu_ps(src); 01618 __m128 v_r1 = _mm_loadu_ps(src + 4); 01619 __m128 v_g0 = _mm_loadu_ps(src + 8); 01620 __m128 v_g1 = _mm_loadu_ps(src + 12); 01621 __m128 v_b0 = _mm_loadu_ps(src + 16); 01622 __m128 v_b1 = _mm_loadu_ps(src + 20); 01623 01624 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 01625 01626 __m128 v_gray0; 01627 process(v_r0, v_g0, v_b0, 01628 v_gray0); 01629 01630 __m128 v_gray1; 01631 process(v_r1, v_g1, v_b1, 01632 v_gray1); 01633 01634 _mm_storeu_ps(dst + i, v_gray0); 01635 _mm_storeu_ps(dst + i + 4, v_gray1); 01636 } 01637 } 01638 else if (scn == 4 && haveSIMD) 01639 { 01640 for ( ; i <= n - 8; i += 8, src += scn * 8) 01641 { 01642 __m128 v_r0 = _mm_loadu_ps(src); 01643 __m128 v_r1 = _mm_loadu_ps(src + 4); 01644 __m128 v_g0 = _mm_loadu_ps(src + 8); 01645 __m128 v_g1 = _mm_loadu_ps(src + 12); 01646 __m128 v_b0 = _mm_loadu_ps(src + 16); 01647 __m128 v_b1 = _mm_loadu_ps(src + 20); 01648 __m128 v_a0 = _mm_loadu_ps(src + 24); 01649 __m128 v_a1 = _mm_loadu_ps(src + 28); 01650 01651 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1, v_a0, v_a1); 01652 01653 __m128 v_gray0; 01654 process(v_r0, v_g0, v_b0, 01655 v_gray0); 01656 01657 __m128 v_gray1; 01658 process(v_r1, v_g1, v_b1, 01659 v_gray1); 01660 01661 _mm_storeu_ps(dst + i, v_gray0); 01662 _mm_storeu_ps(dst + i + 4, v_gray1); 01663 } 01664 } 01665 01666 for ( ; i < n; i++, src += scn) 01667 dst[i] = src[0]*cb + src[1]*cg + src[2]*cr; 01668 } 01669 01670 int srccn; 01671 float coeffs[3]; 01672 __m128 v_cb, v_cg, v_cr; 01673 bool haveSIMD; 01674 }; 01675 01676 #endif // CV_SSE2 01677 01678 #if !CV_NEON && !CV_SSE4_1 01679 01680 template<> struct RGB2Gray<ushort> 01681 { 01682 typedef ushort channel_type; 01683 01684 RGB2Gray(int _srccn, int blueIdx, const int* _coeffs) : srccn(_srccn) 01685 { 01686 static const int coeffs0[] = { R2Y, G2Y, B2Y }; 01687 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 3*sizeof(coeffs[0])); 01688 if( blueIdx == 0 ) 01689 std::swap(coeffs[0], coeffs[2]); 01690 } 01691 01692 void operator()(const ushort* src, ushort* dst, int n) const 01693 { 01694 int scn = srccn, cb = coeffs[0], cg = coeffs[1], cr = coeffs[2]; 01695 for(int i = 0; i < n; i++, src += scn) 01696 dst[i] = (ushort)CV_DESCALE((unsigned)(src[0]*cb + src[1]*cg + src[2]*cr), yuv_shift); 01697 } 01698 int srccn; 01699 int coeffs[3]; 01700 }; 01701 01702 #endif // !CV_NEON && !CV_SSE4_1 01703 01704 ///////////////////////////////////// RGB <-> YCrCb ////////////////////////////////////// 01705 01706 template<typename _Tp> struct RGB2YCrCb_f 01707 { 01708 typedef _Tp channel_type; 01709 01710 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : srccn(_srccn), blueIdx(_blueIdx) 01711 { 01712 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; 01713 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 01714 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]); 01715 } 01716 01717 void operator()(const _Tp* src, _Tp* dst, int n) const 01718 { 01719 int scn = srccn, bidx = blueIdx; 01720 const _Tp delta = ColorChannel<_Tp>::half(); 01721 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 01722 n *= 3; 01723 for(int i = 0; i < n; i += 3, src += scn) 01724 { 01725 _Tp Y = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2); 01726 _Tp Cr = saturate_cast<_Tp>((src[bidx^2] - Y)*C3 + delta); 01727 _Tp Cb = saturate_cast<_Tp>((src[bidx] - Y)*C4 + delta); 01728 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb; 01729 } 01730 } 01731 int srccn, blueIdx; 01732 float coeffs[5]; 01733 }; 01734 01735 #if CV_NEON 01736 01737 template <> 01738 struct RGB2YCrCb_f<float> 01739 { 01740 typedef float channel_type; 01741 01742 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : 01743 srccn(_srccn), blueIdx(_blueIdx) 01744 { 01745 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; 01746 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 01747 if(blueIdx==0) 01748 std::swap(coeffs[0], coeffs[2]); 01749 01750 v_c0 = vdupq_n_f32(coeffs[0]); 01751 v_c1 = vdupq_n_f32(coeffs[1]); 01752 v_c2 = vdupq_n_f32(coeffs[2]); 01753 v_c3 = vdupq_n_f32(coeffs[3]); 01754 v_c4 = vdupq_n_f32(coeffs[4]); 01755 v_delta = vdupq_n_f32(ColorChannel<float>::half()); 01756 } 01757 01758 void operator()(const float * src, float * dst, int n) const 01759 { 01760 int scn = srccn, bidx = blueIdx, i = 0; 01761 const float delta = ColorChannel<float>::half(); 01762 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 01763 n *= 3; 01764 01765 if (scn == 3) 01766 for ( ; i <= n - 12; i += 12, src += 12) 01767 { 01768 float32x4x3_t v_src = vld3q_f32(src), v_dst; 01769 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); 01770 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3); 01771 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4); 01772 01773 vst3q_f32(dst + i, v_dst); 01774 } 01775 else 01776 for ( ; i <= n - 12; i += 12, src += 16) 01777 { 01778 float32x4x4_t v_src = vld4q_f32(src); 01779 float32x4x3_t v_dst; 01780 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); 01781 v_dst.val[1] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx^2], v_dst.val[0]), v_c3); 01782 v_dst.val[2] = vmlaq_f32(v_delta, vsubq_f32(v_src.val[bidx], v_dst.val[0]), v_c4); 01783 01784 vst3q_f32(dst + i, v_dst); 01785 } 01786 01787 for ( ; i < n; i += 3, src += scn) 01788 { 01789 float Y = src[0]*C0 + src[1]*C1 + src[2]*C2; 01790 float Cr = (src[bidx^2] - Y)*C3 + delta; 01791 float Cb = (src[bidx] - Y)*C4 + delta; 01792 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb; 01793 } 01794 } 01795 int srccn, blueIdx; 01796 float coeffs[5]; 01797 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta; 01798 }; 01799 01800 #elif CV_SSE2 01801 01802 template <> 01803 struct RGB2YCrCb_f<float> 01804 { 01805 typedef float channel_type; 01806 01807 RGB2YCrCb_f(int _srccn, int _blueIdx, const float* _coeffs) : 01808 srccn(_srccn), blueIdx(_blueIdx) 01809 { 01810 static const float coeffs0[] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f}; 01811 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 01812 if (blueIdx==0) 01813 std::swap(coeffs[0], coeffs[2]); 01814 01815 v_c0 = _mm_set1_ps(coeffs[0]); 01816 v_c1 = _mm_set1_ps(coeffs[1]); 01817 v_c2 = _mm_set1_ps(coeffs[2]); 01818 v_c3 = _mm_set1_ps(coeffs[3]); 01819 v_c4 = _mm_set1_ps(coeffs[4]); 01820 v_delta = _mm_set1_ps(ColorChannel<float>::half()); 01821 01822 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 01823 } 01824 01825 void process(__m128 v_r, __m128 v_g, __m128 v_b, 01826 __m128 & v_y, __m128 & v_cr, __m128 & v_cb) const 01827 { 01828 v_y = _mm_mul_ps(v_r, v_c0); 01829 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_g, v_c1)); 01830 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_b, v_c2)); 01831 01832 v_cr = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(blueIdx == 0 ? v_b : v_r, v_y), v_c3), v_delta); 01833 v_cb = _mm_add_ps(_mm_mul_ps(_mm_sub_ps(blueIdx == 2 ? v_b : v_r, v_y), v_c4), v_delta); 01834 } 01835 01836 void operator()(const float * src, float * dst, int n) const 01837 { 01838 int scn = srccn, bidx = blueIdx, i = 0; 01839 const float delta = ColorChannel<float>::half(); 01840 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 01841 n *= 3; 01842 01843 if (haveSIMD) 01844 { 01845 for ( ; i <= n - 24; i += 24, src += 8 * scn) 01846 { 01847 __m128 v_r0 = _mm_loadu_ps(src); 01848 __m128 v_r1 = _mm_loadu_ps(src + 4); 01849 __m128 v_g0 = _mm_loadu_ps(src + 8); 01850 __m128 v_g1 = _mm_loadu_ps(src + 12); 01851 __m128 v_b0 = _mm_loadu_ps(src + 16); 01852 __m128 v_b1 = _mm_loadu_ps(src + 20); 01853 01854 if (scn == 4) 01855 { 01856 __m128 v_a0 = _mm_loadu_ps(src + 24); 01857 __m128 v_a1 = _mm_loadu_ps(src + 28); 01858 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, 01859 v_b0, v_b1, v_a0, v_a1); 01860 } 01861 else 01862 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 01863 01864 __m128 v_y0, v_cr0, v_cb0; 01865 process(v_r0, v_g0, v_b0, 01866 v_y0, v_cr0, v_cb0); 01867 01868 __m128 v_y1, v_cr1, v_cb1; 01869 process(v_r1, v_g1, v_b1, 01870 v_y1, v_cr1, v_cb1); 01871 01872 _mm_interleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); 01873 01874 _mm_storeu_ps(dst + i, v_y0); 01875 _mm_storeu_ps(dst + i + 4, v_y1); 01876 _mm_storeu_ps(dst + i + 8, v_cr0); 01877 _mm_storeu_ps(dst + i + 12, v_cr1); 01878 _mm_storeu_ps(dst + i + 16, v_cb0); 01879 _mm_storeu_ps(dst + i + 20, v_cb1); 01880 } 01881 } 01882 01883 for ( ; i < n; i += 3, src += scn) 01884 { 01885 float Y = src[0]*C0 + src[1]*C1 + src[2]*C2; 01886 float Cr = (src[bidx^2] - Y)*C3 + delta; 01887 float Cb = (src[bidx] - Y)*C4 + delta; 01888 dst[i] = Y; dst[i+1] = Cr; dst[i+2] = Cb; 01889 } 01890 } 01891 int srccn, blueIdx; 01892 float coeffs[5]; 01893 __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_delta; 01894 bool haveSIMD; 01895 }; 01896 01897 #endif 01898 01899 template<typename _Tp> struct RGB2YCrCb_i 01900 { 01901 typedef _Tp channel_type; 01902 01903 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 01904 : srccn(_srccn), blueIdx(_blueIdx) 01905 { 01906 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 01907 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 01908 if(blueIdx==0) std::swap(coeffs[0], coeffs[2]); 01909 } 01910 void operator()(const _Tp* src, _Tp* dst, int n) const 01911 { 01912 int scn = srccn, bidx = blueIdx; 01913 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 01914 int delta = ColorChannel<_Tp>::half()*(1 << yuv_shift); 01915 n *= 3; 01916 for(int i = 0; i < n; i += 3, src += scn) 01917 { 01918 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 01919 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 01920 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 01921 dst[i] = saturate_cast<_Tp>(Y); 01922 dst[i+1] = saturate_cast<_Tp>(Cr); 01923 dst[i+2] = saturate_cast<_Tp>(Cb); 01924 } 01925 } 01926 int srccn, blueIdx; 01927 int coeffs[5]; 01928 }; 01929 01930 #if CV_NEON 01931 01932 template <> 01933 struct RGB2YCrCb_i<uchar> 01934 { 01935 typedef uchar channel_type; 01936 01937 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 01938 : srccn(_srccn), blueIdx(_blueIdx) 01939 { 01940 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 01941 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 01942 if (blueIdx==0) 01943 std::swap(coeffs[0], coeffs[2]); 01944 01945 v_c0 = vdup_n_s16(coeffs[0]); 01946 v_c1 = vdup_n_s16(coeffs[1]); 01947 v_c2 = vdup_n_s16(coeffs[2]); 01948 v_c3 = vdupq_n_s32(coeffs[3]); 01949 v_c4 = vdupq_n_s32(coeffs[4]); 01950 v_delta = vdupq_n_s32(ColorChannel<uchar>::half()*(1 << yuv_shift)); 01951 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1)); 01952 } 01953 01954 void operator()(const uchar * src, uchar * dst, int n) const 01955 { 01956 int scn = srccn, bidx = blueIdx, i = 0; 01957 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 01958 int delta = ColorChannel<uchar>::half()*(1 << yuv_shift); 01959 n *= 3; 01960 01961 for ( ; i <= n - 24; i += 24, src += scn * 8) 01962 { 01963 uint8x8x3_t v_dst; 01964 int16x8x3_t v_src16; 01965 01966 if (scn == 3) 01967 { 01968 uint8x8x3_t v_src = vld3_u8(src); 01969 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0])); 01970 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1])); 01971 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); 01972 } 01973 else 01974 { 01975 uint8x8x4_t v_src = vld4_u8(src); 01976 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0])); 01977 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1])); 01978 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); 01979 } 01980 01981 int16x4x3_t v_src0; 01982 v_src0.val[0] = vget_low_s16(v_src16.val[0]); 01983 v_src0.val[1] = vget_low_s16(v_src16.val[1]); 01984 v_src0.val[2] = vget_low_s16(v_src16.val[2]); 01985 01986 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 01987 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift); 01988 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y0), v_c3); 01989 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift); 01990 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y0), v_c4); 01991 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift); 01992 01993 v_src0.val[0] = vget_high_s16(v_src16.val[0]); 01994 v_src0.val[1] = vget_high_s16(v_src16.val[1]); 01995 v_src0.val[2] = vget_high_s16(v_src16.val[2]); 01996 01997 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 01998 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift); 01999 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx^2]), v_Y1), v_c3); 02000 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift); 02001 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(vmovl_s16(v_src0.val[bidx]), v_Y1), v_c4); 02002 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift); 02003 02004 v_dst.val[0] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1))); 02005 v_dst.val[1] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cr0), vqmovn_s32(v_Cr1))); 02006 v_dst.val[2] = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Cb0), vqmovn_s32(v_Cb1))); 02007 02008 vst3_u8(dst + i, v_dst); 02009 } 02010 02011 for ( ; i < n; i += 3, src += scn) 02012 { 02013 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 02014 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 02015 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 02016 dst[i] = saturate_cast<uchar>(Y); 02017 dst[i+1] = saturate_cast<uchar>(Cr); 02018 dst[i+2] = saturate_cast<uchar>(Cb); 02019 } 02020 } 02021 int srccn, blueIdx, coeffs[5]; 02022 int16x4_t v_c0, v_c1, v_c2; 02023 int32x4_t v_c3, v_c4, v_delta, v_delta2; 02024 }; 02025 02026 template <> 02027 struct RGB2YCrCb_i<ushort> 02028 { 02029 typedef ushort channel_type; 02030 02031 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 02032 : srccn(_srccn), blueIdx(_blueIdx) 02033 { 02034 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 02035 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 02036 if (blueIdx==0) 02037 std::swap(coeffs[0], coeffs[2]); 02038 02039 v_c0 = vdupq_n_s32(coeffs[0]); 02040 v_c1 = vdupq_n_s32(coeffs[1]); 02041 v_c2 = vdupq_n_s32(coeffs[2]); 02042 v_c3 = vdupq_n_s32(coeffs[3]); 02043 v_c4 = vdupq_n_s32(coeffs[4]); 02044 v_delta = vdupq_n_s32(ColorChannel<ushort>::half()*(1 << yuv_shift)); 02045 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1)); 02046 } 02047 02048 void operator()(const ushort * src, ushort * dst, int n) const 02049 { 02050 int scn = srccn, bidx = blueIdx, i = 0; 02051 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 02052 int delta = ColorChannel<ushort>::half()*(1 << yuv_shift); 02053 n *= 3; 02054 02055 for ( ; i <= n - 24; i += 24, src += scn * 8) 02056 { 02057 uint16x8x3_t v_src, v_dst; 02058 int32x4x3_t v_src0; 02059 02060 if (scn == 3) 02061 v_src = vld3q_u16(src); 02062 else 02063 { 02064 uint16x8x4_t v_src_ = vld4q_u16(src); 02065 v_src.val[0] = v_src_.val[0]; 02066 v_src.val[1] = v_src_.val[1]; 02067 v_src.val[2] = v_src_.val[2]; 02068 } 02069 02070 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))); 02071 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))); 02072 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2]))); 02073 02074 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 02075 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta2), yuv_shift); 02076 int32x4_t v_Cr0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y0), v_c3); 02077 v_Cr0 = vshrq_n_s32(vaddq_s32(v_Cr0, v_delta2), yuv_shift); 02078 int32x4_t v_Cb0 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y0), v_c4); 02079 v_Cb0 = vshrq_n_s32(vaddq_s32(v_Cb0, v_delta2), yuv_shift); 02080 02081 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))); 02082 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))); 02083 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2]))); 02084 02085 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 02086 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta2), yuv_shift); 02087 int32x4_t v_Cr1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y1), v_c3); 02088 v_Cr1 = vshrq_n_s32(vaddq_s32(v_Cr1, v_delta2), yuv_shift); 02089 int32x4_t v_Cb1 = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y1), v_c4); 02090 v_Cb1 = vshrq_n_s32(vaddq_s32(v_Cb1, v_delta2), yuv_shift); 02091 02092 v_dst.val[0] = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1)); 02093 v_dst.val[1] = vcombine_u16(vqmovun_s32(v_Cr0), vqmovun_s32(v_Cr1)); 02094 v_dst.val[2] = vcombine_u16(vqmovun_s32(v_Cb0), vqmovun_s32(v_Cb1)); 02095 02096 vst3q_u16(dst + i, v_dst); 02097 } 02098 02099 for ( ; i <= n - 12; i += 12, src += scn * 4) 02100 { 02101 uint16x4x3_t v_dst; 02102 int32x4x3_t v_src0; 02103 02104 if (scn == 3) 02105 { 02106 uint16x4x3_t v_src = vld3_u16(src); 02107 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])); 02108 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])); 02109 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); 02110 } 02111 else 02112 { 02113 uint16x4x4_t v_src = vld4_u16(src); 02114 v_src0.val[0] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])); 02115 v_src0.val[1] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])); 02116 v_src0.val[2] = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); 02117 } 02118 02119 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_src0.val[0], v_c0), v_src0.val[1], v_c1), v_src0.val[2], v_c2); 02120 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta2), yuv_shift); 02121 int32x4_t v_Cr = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx^2], v_Y), v_c3); 02122 v_Cr = vshrq_n_s32(vaddq_s32(v_Cr, v_delta2), yuv_shift); 02123 int32x4_t v_Cb = vmlaq_s32(v_delta, vsubq_s32(v_src0.val[bidx], v_Y), v_c4); 02124 v_Cb = vshrq_n_s32(vaddq_s32(v_Cb, v_delta2), yuv_shift); 02125 02126 v_dst.val[0] = vqmovun_s32(v_Y); 02127 v_dst.val[1] = vqmovun_s32(v_Cr); 02128 v_dst.val[2] = vqmovun_s32(v_Cb); 02129 02130 vst3_u16(dst + i, v_dst); 02131 } 02132 02133 for ( ; i < n; i += 3, src += scn) 02134 { 02135 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 02136 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 02137 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 02138 dst[i] = saturate_cast<ushort>(Y); 02139 dst[i+1] = saturate_cast<ushort>(Cr); 02140 dst[i+2] = saturate_cast<ushort>(Cb); 02141 } 02142 } 02143 int srccn, blueIdx, coeffs[5]; 02144 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_delta, v_delta2; 02145 }; 02146 02147 #elif CV_SSE4_1 02148 02149 template <> 02150 struct RGB2YCrCb_i<uchar> 02151 { 02152 typedef uchar channel_type; 02153 02154 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 02155 : srccn(_srccn), blueIdx(_blueIdx) 02156 { 02157 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 02158 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 02159 if (blueIdx==0) 02160 std::swap(coeffs[0], coeffs[2]); 02161 02162 v_c0 = _mm_set1_epi32(coeffs[0]); 02163 v_c1 = _mm_set1_epi32(coeffs[1]); 02164 v_c2 = _mm_set1_epi32(coeffs[2]); 02165 v_c3 = _mm_set1_epi32(coeffs[3]); 02166 v_c4 = _mm_set1_epi32(coeffs[4]); 02167 v_delta2 = _mm_set1_epi32(1 << (yuv_shift - 1)); 02168 v_delta = _mm_set1_epi32(ColorChannel<uchar>::half()*(1 << yuv_shift)); 02169 v_delta = _mm_add_epi32(v_delta, v_delta2); 02170 v_zero = _mm_setzero_si128(); 02171 02172 haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1); 02173 } 02174 02175 // 16u x 8 02176 void process(__m128i v_r, __m128i v_g, __m128i v_b, 02177 __m128i & v_y, __m128i & v_cr, __m128i & v_cb) const 02178 { 02179 __m128i v_r_p = _mm_unpacklo_epi16(v_r, v_zero); 02180 __m128i v_g_p = _mm_unpacklo_epi16(v_g, v_zero); 02181 __m128i v_b_p = _mm_unpacklo_epi16(v_b, v_zero); 02182 02183 __m128i v_y0 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0), 02184 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1), 02185 _mm_mullo_epi32(v_b_p, v_c2))); 02186 v_y0 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y0), yuv_shift); 02187 02188 __m128i v_cr0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y0), v_c3); 02189 __m128i v_cb0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y0), v_c4); 02190 v_cr0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr0), yuv_shift); 02191 v_cb0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb0), yuv_shift); 02192 02193 v_r_p = _mm_unpackhi_epi16(v_r, v_zero); 02194 v_g_p = _mm_unpackhi_epi16(v_g, v_zero); 02195 v_b_p = _mm_unpackhi_epi16(v_b, v_zero); 02196 02197 __m128i v_y1 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0), 02198 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1), 02199 _mm_mullo_epi32(v_b_p, v_c2))); 02200 v_y1 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y1), yuv_shift); 02201 02202 __m128i v_cr1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y1), v_c3); 02203 __m128i v_cb1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y1), v_c4); 02204 v_cr1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr1), yuv_shift); 02205 v_cb1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb1), yuv_shift); 02206 02207 v_y = _mm_packs_epi32(v_y0, v_y1); 02208 v_cr = _mm_packs_epi32(v_cr0, v_cr1); 02209 v_cb = _mm_packs_epi32(v_cb0, v_cb1); 02210 } 02211 02212 void operator()(const uchar * src, uchar * dst, int n) const 02213 { 02214 int scn = srccn, bidx = blueIdx, i = 0; 02215 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 02216 int delta = ColorChannel<uchar>::half()*(1 << yuv_shift); 02217 n *= 3; 02218 02219 if (haveSIMD) 02220 { 02221 for ( ; i <= n - 96; i += 96, src += scn * 32) 02222 { 02223 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src)); 02224 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 16)); 02225 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 32)); 02226 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 48)); 02227 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 64)); 02228 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 80)); 02229 02230 if (scn == 4) 02231 { 02232 __m128i v_a0 = _mm_loadu_si128((__m128i const *)(src + 96)); 02233 __m128i v_a1 = _mm_loadu_si128((__m128i const *)(src + 112)); 02234 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, 02235 v_b0, v_b1, v_a0, v_a1); 02236 } 02237 else 02238 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 02239 02240 __m128i v_y0 = v_zero, v_cr0 = v_zero, v_cb0 = v_zero; 02241 process(_mm_unpacklo_epi8(v_r0, v_zero), 02242 _mm_unpacklo_epi8(v_g0, v_zero), 02243 _mm_unpacklo_epi8(v_b0, v_zero), 02244 v_y0, v_cr0, v_cb0); 02245 02246 __m128i v_y1 = v_zero, v_cr1 = v_zero, v_cb1 = v_zero; 02247 process(_mm_unpackhi_epi8(v_r0, v_zero), 02248 _mm_unpackhi_epi8(v_g0, v_zero), 02249 _mm_unpackhi_epi8(v_b0, v_zero), 02250 v_y1, v_cr1, v_cb1); 02251 02252 __m128i v_y_0 = _mm_packus_epi16(v_y0, v_y1); 02253 __m128i v_cr_0 = _mm_packus_epi16(v_cr0, v_cr1); 02254 __m128i v_cb_0 = _mm_packus_epi16(v_cb0, v_cb1); 02255 02256 process(_mm_unpacklo_epi8(v_r1, v_zero), 02257 _mm_unpacklo_epi8(v_g1, v_zero), 02258 _mm_unpacklo_epi8(v_b1, v_zero), 02259 v_y0, v_cr0, v_cb0); 02260 02261 process(_mm_unpackhi_epi8(v_r1, v_zero), 02262 _mm_unpackhi_epi8(v_g1, v_zero), 02263 _mm_unpackhi_epi8(v_b1, v_zero), 02264 v_y1, v_cr1, v_cb1); 02265 02266 __m128i v_y_1 = _mm_packus_epi16(v_y0, v_y1); 02267 __m128i v_cr_1 = _mm_packus_epi16(v_cr0, v_cr1); 02268 __m128i v_cb_1 = _mm_packus_epi16(v_cb0, v_cb1); 02269 02270 _mm_interleave_epi8(v_y_0, v_y_1, v_cr_0, v_cr_1, v_cb_0, v_cb_1); 02271 02272 _mm_storeu_si128((__m128i *)(dst + i), v_y_0); 02273 _mm_storeu_si128((__m128i *)(dst + i + 16), v_y_1); 02274 _mm_storeu_si128((__m128i *)(dst + i + 32), v_cr_0); 02275 _mm_storeu_si128((__m128i *)(dst + i + 48), v_cr_1); 02276 _mm_storeu_si128((__m128i *)(dst + i + 64), v_cb_0); 02277 _mm_storeu_si128((__m128i *)(dst + i + 80), v_cb_1); 02278 } 02279 } 02280 02281 for ( ; i < n; i += 3, src += scn) 02282 { 02283 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 02284 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 02285 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 02286 dst[i] = saturate_cast<uchar>(Y); 02287 dst[i+1] = saturate_cast<uchar>(Cr); 02288 dst[i+2] = saturate_cast<uchar>(Cb); 02289 } 02290 } 02291 02292 int srccn, blueIdx, coeffs[5]; 02293 __m128i v_c0, v_c1, v_c2; 02294 __m128i v_c3, v_c4, v_delta, v_delta2; 02295 __m128i v_zero; 02296 bool haveSIMD; 02297 }; 02298 02299 template <> 02300 struct RGB2YCrCb_i<ushort> 02301 { 02302 typedef ushort channel_type; 02303 02304 RGB2YCrCb_i(int _srccn, int _blueIdx, const int* _coeffs) 02305 : srccn(_srccn), blueIdx(_blueIdx) 02306 { 02307 static const int coeffs0[] = {R2Y, G2Y, B2Y, 11682, 9241}; 02308 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 5*sizeof(coeffs[0])); 02309 if (blueIdx==0) 02310 std::swap(coeffs[0], coeffs[2]); 02311 02312 v_c0 = _mm_set1_epi32(coeffs[0]); 02313 v_c1 = _mm_set1_epi32(coeffs[1]); 02314 v_c2 = _mm_set1_epi32(coeffs[2]); 02315 v_c3 = _mm_set1_epi32(coeffs[3]); 02316 v_c4 = _mm_set1_epi32(coeffs[4]); 02317 v_delta2 = _mm_set1_epi32(1 << (yuv_shift - 1)); 02318 v_delta = _mm_set1_epi32(ColorChannel<ushort>::half()*(1 << yuv_shift)); 02319 v_delta = _mm_add_epi32(v_delta, v_delta2); 02320 v_zero = _mm_setzero_si128(); 02321 02322 haveSIMD = checkHardwareSupport(CV_CPU_SSE4_1); 02323 } 02324 02325 // 16u x 8 02326 void process(__m128i v_r, __m128i v_g, __m128i v_b, 02327 __m128i & v_y, __m128i & v_cr, __m128i & v_cb) const 02328 { 02329 __m128i v_r_p = _mm_unpacklo_epi16(v_r, v_zero); 02330 __m128i v_g_p = _mm_unpacklo_epi16(v_g, v_zero); 02331 __m128i v_b_p = _mm_unpacklo_epi16(v_b, v_zero); 02332 02333 __m128i v_y0 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0), 02334 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1), 02335 _mm_mullo_epi32(v_b_p, v_c2))); 02336 v_y0 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y0), yuv_shift); 02337 02338 __m128i v_cr0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y0), v_c3); 02339 __m128i v_cb0 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y0), v_c4); 02340 v_cr0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr0), yuv_shift); 02341 v_cb0 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb0), yuv_shift); 02342 02343 v_r_p = _mm_unpackhi_epi16(v_r, v_zero); 02344 v_g_p = _mm_unpackhi_epi16(v_g, v_zero); 02345 v_b_p = _mm_unpackhi_epi16(v_b, v_zero); 02346 02347 __m128i v_y1 = _mm_add_epi32(_mm_mullo_epi32(v_r_p, v_c0), 02348 _mm_add_epi32(_mm_mullo_epi32(v_g_p, v_c1), 02349 _mm_mullo_epi32(v_b_p, v_c2))); 02350 v_y1 = _mm_srli_epi32(_mm_add_epi32(v_delta2, v_y1), yuv_shift); 02351 02352 __m128i v_cr1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 2 ? v_r_p : v_b_p, v_y1), v_c3); 02353 __m128i v_cb1 = _mm_mullo_epi32(_mm_sub_epi32(blueIdx == 0 ? v_r_p : v_b_p, v_y1), v_c4); 02354 v_cr1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cr1), yuv_shift); 02355 v_cb1 = _mm_srai_epi32(_mm_add_epi32(v_delta, v_cb1), yuv_shift); 02356 02357 v_y = _mm_packus_epi32(v_y0, v_y1); 02358 v_cr = _mm_packus_epi32(v_cr0, v_cr1); 02359 v_cb = _mm_packus_epi32(v_cb0, v_cb1); 02360 } 02361 02362 void operator()(const ushort * src, ushort * dst, int n) const 02363 { 02364 int scn = srccn, bidx = blueIdx, i = 0; 02365 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3], C4 = coeffs[4]; 02366 int delta = ColorChannel<ushort>::half()*(1 << yuv_shift); 02367 n *= 3; 02368 02369 if (haveSIMD) 02370 { 02371 for ( ; i <= n - 48; i += 48, src += scn * 16) 02372 { 02373 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src)); 02374 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + 8)); 02375 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + 16)); 02376 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + 24)); 02377 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + 32)); 02378 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + 40)); 02379 02380 if (scn == 4) 02381 { 02382 __m128i v_a0 = _mm_loadu_si128((__m128i const *)(src + 48)); 02383 __m128i v_a1 = _mm_loadu_si128((__m128i const *)(src + 56)); 02384 02385 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, 02386 v_b0, v_b1, v_a0, v_a1); 02387 } 02388 else 02389 _mm_deinterleave_epi16(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 02390 02391 __m128i v_y0 = v_zero, v_cr0 = v_zero, v_cb0 = v_zero; 02392 process(v_r0, v_g0, v_b0, 02393 v_y0, v_cr0, v_cb0); 02394 02395 __m128i v_y1 = v_zero, v_cr1 = v_zero, v_cb1 = v_zero; 02396 process(v_r1, v_g1, v_b1, 02397 v_y1, v_cr1, v_cb1); 02398 02399 _mm_interleave_epi16(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); 02400 02401 _mm_storeu_si128((__m128i *)(dst + i), v_y0); 02402 _mm_storeu_si128((__m128i *)(dst + i + 8), v_y1); 02403 _mm_storeu_si128((__m128i *)(dst + i + 16), v_cr0); 02404 _mm_storeu_si128((__m128i *)(dst + i + 24), v_cr1); 02405 _mm_storeu_si128((__m128i *)(dst + i + 32), v_cb0); 02406 _mm_storeu_si128((__m128i *)(dst + i + 40), v_cb1); 02407 } 02408 } 02409 02410 for ( ; i < n; i += 3, src += scn) 02411 { 02412 int Y = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, yuv_shift); 02413 int Cr = CV_DESCALE((src[bidx^2] - Y)*C3 + delta, yuv_shift); 02414 int Cb = CV_DESCALE((src[bidx] - Y)*C4 + delta, yuv_shift); 02415 dst[i] = saturate_cast<ushort>(Y); 02416 dst[i+1] = saturate_cast<ushort>(Cr); 02417 dst[i+2] = saturate_cast<ushort>(Cb); 02418 } 02419 } 02420 02421 int srccn, blueIdx, coeffs[5]; 02422 __m128i v_c0, v_c1, v_c2; 02423 __m128i v_c3, v_c4, v_delta, v_delta2; 02424 __m128i v_zero; 02425 bool haveSIMD; 02426 }; 02427 02428 #endif // CV_SSE4_1 02429 02430 template<typename _Tp> struct YCrCb2RGB_f 02431 { 02432 typedef _Tp channel_type; 02433 02434 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 02435 : dstcn(_dstcn), blueIdx(_blueIdx) 02436 { 02437 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f}; 02438 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 02439 } 02440 void operator()(const _Tp* src, _Tp* dst, int n) const 02441 { 02442 int dcn = dstcn, bidx = blueIdx; 02443 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max(); 02444 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 02445 n *= 3; 02446 for(int i = 0; i < n; i += 3, dst += dcn) 02447 { 02448 _Tp Y = src[i]; 02449 _Tp Cr = src[i+1]; 02450 _Tp Cb = src[i+2]; 02451 02452 _Tp b = saturate_cast<_Tp>(Y + (Cb - delta)*C3); 02453 _Tp g = saturate_cast<_Tp>(Y + (Cb - delta)*C2 + (Cr - delta)*C1); 02454 _Tp r = saturate_cast<_Tp>(Y + (Cr - delta)*C0); 02455 02456 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r; 02457 if( dcn == 4 ) 02458 dst[3] = alpha; 02459 } 02460 } 02461 int dstcn, blueIdx; 02462 float coeffs[4]; 02463 }; 02464 02465 #if CV_NEON 02466 02467 template <> 02468 struct YCrCb2RGB_f<float> 02469 { 02470 typedef float channel_type; 02471 02472 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 02473 : dstcn(_dstcn), blueIdx(_blueIdx) 02474 { 02475 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f}; 02476 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 02477 02478 v_c0 = vdupq_n_f32(coeffs[0]); 02479 v_c1 = vdupq_n_f32(coeffs[1]); 02480 v_c2 = vdupq_n_f32(coeffs[2]); 02481 v_c3 = vdupq_n_f32(coeffs[3]); 02482 v_delta = vdupq_n_f32(ColorChannel<float>::half()); 02483 v_alpha = vdupq_n_f32(ColorChannel<float>::max()); 02484 } 02485 02486 void operator()(const float* src, float* dst, int n) const 02487 { 02488 int dcn = dstcn, bidx = blueIdx, i = 0; 02489 const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max(); 02490 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 02491 n *= 3; 02492 02493 if (dcn == 3) 02494 for ( ; i <= n - 12; i += 12, dst += 12) 02495 { 02496 float32x4x3_t v_src = vld3q_f32(src + i), v_dst; 02497 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2]; 02498 02499 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3); 02500 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y); 02501 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0); 02502 02503 vst3q_f32(dst, v_dst); 02504 } 02505 else 02506 for ( ; i <= n - 12; i += 12, dst += 16) 02507 { 02508 float32x4x3_t v_src = vld3q_f32(src + i); 02509 float32x4x4_t v_dst; 02510 float32x4_t v_Y = v_src.val[0], v_Cr = v_src.val[1], v_Cb = v_src.val[2]; 02511 02512 v_dst.val[bidx] = vmlaq_f32(v_Y, vsubq_f32(v_Cb, v_delta), v_c3); 02513 v_dst.val[1] = vaddq_f32(vmlaq_f32(vmulq_f32(vsubq_f32(v_Cb, v_delta), v_c2), vsubq_f32(v_Cr, v_delta), v_c1), v_Y); 02514 v_dst.val[bidx^2] = vmlaq_f32(v_Y, vsubq_f32(v_Cr, v_delta), v_c0); 02515 v_dst.val[3] = v_alpha; 02516 02517 vst4q_f32(dst, v_dst); 02518 } 02519 02520 for ( ; i < n; i += 3, dst += dcn) 02521 { 02522 float Y = src[i], Cr = src[i+1], Cb = src[i+2]; 02523 02524 float b = Y + (Cb - delta)*C3; 02525 float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1; 02526 float r = Y + (Cr - delta)*C0; 02527 02528 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r; 02529 if( dcn == 4 ) 02530 dst[3] = alpha; 02531 } 02532 } 02533 int dstcn, blueIdx; 02534 float coeffs[4]; 02535 float32x4_t v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta; 02536 }; 02537 02538 #elif CV_SSE2 02539 02540 template <> 02541 struct YCrCb2RGB_f<float> 02542 { 02543 typedef float channel_type; 02544 02545 YCrCb2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 02546 : dstcn(_dstcn), blueIdx(_blueIdx) 02547 { 02548 static const float coeffs0[] = {1.403f, -0.714f, -0.344f, 1.773f}; 02549 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 02550 02551 v_c0 = _mm_set1_ps(coeffs[0]); 02552 v_c1 = _mm_set1_ps(coeffs[1]); 02553 v_c2 = _mm_set1_ps(coeffs[2]); 02554 v_c3 = _mm_set1_ps(coeffs[3]); 02555 v_delta = _mm_set1_ps(ColorChannel<float>::half()); 02556 v_alpha = _mm_set1_ps(ColorChannel<float>::max()); 02557 02558 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 02559 } 02560 02561 void process(__m128 v_y, __m128 v_cr, __m128 v_cb, 02562 __m128 & v_r, __m128 & v_g, __m128 & v_b) const 02563 { 02564 v_cb = _mm_sub_ps(v_cb, v_delta); 02565 v_cr = _mm_sub_ps(v_cr, v_delta); 02566 02567 v_b = _mm_mul_ps(v_cb, v_c3); 02568 v_g = _mm_add_ps(_mm_mul_ps(v_cb, v_c2), _mm_mul_ps(v_cr, v_c1)); 02569 v_r = _mm_mul_ps(v_cr, v_c0); 02570 02571 v_b = _mm_add_ps(v_b, v_y); 02572 v_g = _mm_add_ps(v_g, v_y); 02573 v_r = _mm_add_ps(v_r, v_y); 02574 02575 if (blueIdx == 0) 02576 std::swap(v_b, v_r); 02577 } 02578 02579 void operator()(const float* src, float* dst, int n) const 02580 { 02581 int dcn = dstcn, bidx = blueIdx, i = 0; 02582 const float delta = ColorChannel<float>::half(), alpha = ColorChannel<float>::max(); 02583 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 02584 n *= 3; 02585 02586 if (haveSIMD) 02587 { 02588 for ( ; i <= n - 24; i += 24, dst += 8 * dcn) 02589 { 02590 __m128 v_y0 = _mm_loadu_ps(src + i); 02591 __m128 v_y1 = _mm_loadu_ps(src + i + 4); 02592 __m128 v_cr0 = _mm_loadu_ps(src + i + 8); 02593 __m128 v_cr1 = _mm_loadu_ps(src + i + 12); 02594 __m128 v_cb0 = _mm_loadu_ps(src + i + 16); 02595 __m128 v_cb1 = _mm_loadu_ps(src + i + 20); 02596 02597 _mm_deinterleave_ps(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); 02598 02599 __m128 v_r0, v_g0, v_b0; 02600 process(v_y0, v_cr0, v_cb0, 02601 v_r0, v_g0, v_b0); 02602 02603 __m128 v_r1, v_g1, v_b1; 02604 process(v_y1, v_cr1, v_cb1, 02605 v_r1, v_g1, v_b1); 02606 02607 __m128 v_a0 = v_alpha, v_a1 = v_alpha; 02608 02609 if (dcn == 3) 02610 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 02611 else 02612 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, 02613 v_b0, v_b1, v_a0, v_a1); 02614 02615 _mm_storeu_ps(dst, v_r0); 02616 _mm_storeu_ps(dst + 4, v_r1); 02617 _mm_storeu_ps(dst + 8, v_g0); 02618 _mm_storeu_ps(dst + 12, v_g1); 02619 _mm_storeu_ps(dst + 16, v_b0); 02620 _mm_storeu_ps(dst + 20, v_b1); 02621 02622 if (dcn == 4) 02623 { 02624 _mm_storeu_ps(dst + 24, v_a0); 02625 _mm_storeu_ps(dst + 28, v_a1); 02626 } 02627 } 02628 } 02629 02630 for ( ; i < n; i += 3, dst += dcn) 02631 { 02632 float Y = src[i], Cr = src[i+1], Cb = src[i+2]; 02633 02634 float b = Y + (Cb - delta)*C3; 02635 float g = Y + (Cb - delta)*C2 + (Cr - delta)*C1; 02636 float r = Y + (Cr - delta)*C0; 02637 02638 dst[bidx] = b; dst[1] = g; dst[bidx^2] = r; 02639 if( dcn == 4 ) 02640 dst[3] = alpha; 02641 } 02642 } 02643 int dstcn, blueIdx; 02644 float coeffs[4]; 02645 02646 __m128 v_c0, v_c1, v_c2, v_c3, v_alpha, v_delta; 02647 bool haveSIMD; 02648 }; 02649 02650 #endif 02651 02652 template<typename _Tp> struct YCrCb2RGB_i 02653 { 02654 typedef _Tp channel_type; 02655 02656 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 02657 : dstcn(_dstcn), blueIdx(_blueIdx) 02658 { 02659 static const int coeffs0[] = {22987, -11698, -5636, 29049}; 02660 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 02661 } 02662 02663 void operator()(const _Tp* src, _Tp* dst, int n) const 02664 { 02665 int dcn = dstcn, bidx = blueIdx; 02666 const _Tp delta = ColorChannel<_Tp>::half(), alpha = ColorChannel<_Tp>::max(); 02667 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 02668 n *= 3; 02669 for(int i = 0; i < n; i += 3, dst += dcn) 02670 { 02671 _Tp Y = src[i]; 02672 _Tp Cr = src[i+1]; 02673 _Tp Cb = src[i+2]; 02674 02675 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); 02676 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); 02677 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift); 02678 02679 dst[bidx] = saturate_cast<_Tp>(b); 02680 dst[1] = saturate_cast<_Tp>(g); 02681 dst[bidx^2] = saturate_cast<_Tp>(r); 02682 if( dcn == 4 ) 02683 dst[3] = alpha; 02684 } 02685 } 02686 int dstcn, blueIdx; 02687 int coeffs[4]; 02688 }; 02689 02690 #if CV_NEON 02691 02692 template <> 02693 struct YCrCb2RGB_i<uchar> 02694 { 02695 typedef uchar channel_type; 02696 02697 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 02698 : dstcn(_dstcn), blueIdx(_blueIdx) 02699 { 02700 static const int coeffs0[] = {22987, -11698, -5636, 29049}; 02701 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 02702 02703 v_c0 = vdupq_n_s32(coeffs[0]); 02704 v_c1 = vdupq_n_s32(coeffs[1]); 02705 v_c2 = vdupq_n_s32(coeffs[2]); 02706 v_c3 = vdupq_n_s32(coeffs[3]); 02707 v_delta = vdup_n_s16(ColorChannel<uchar>::half()); 02708 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1)); 02709 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 02710 } 02711 02712 void operator()(const uchar* src, uchar* dst, int n) const 02713 { 02714 int dcn = dstcn, bidx = blueIdx, i = 0; 02715 const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max(); 02716 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 02717 n *= 3; 02718 02719 for ( ; i <= n - 24; i += 24, dst += dcn * 8) 02720 { 02721 uint8x8x3_t v_src = vld3_u8(src + i); 02722 int16x8x3_t v_src16; 02723 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0])); 02724 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1])); 02725 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); 02726 02727 int16x4_t v_Y = vget_low_s16(v_src16.val[0]), 02728 v_Cr = vget_low_s16(v_src16.val[1]), 02729 v_Cb = vget_low_s16(v_src16.val[2]); 02730 02731 int32x4_t v_b0 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta)); 02732 v_b0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y); 02733 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2); 02734 v_g0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y); 02735 int32x4_t v_r0 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta)); 02736 v_r0 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y); 02737 02738 v_Y = vget_high_s16(v_src16.val[0]); 02739 v_Cr = vget_high_s16(v_src16.val[1]); 02740 v_Cb = vget_high_s16(v_src16.val[2]); 02741 02742 int32x4_t v_b1 = vmulq_s32(v_c3, vsubl_s16(v_Cb, v_delta)); 02743 v_b1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y); 02744 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubl_s16(v_Cr, v_delta), v_c1), vsubl_s16(v_Cb, v_delta), v_c2); 02745 v_g1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y); 02746 int32x4_t v_r1 = vmulq_s32(v_c0, vsubl_s16(v_Cr, v_delta)); 02747 v_r1 = vaddw_s16(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y); 02748 02749 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vmovn_s32(v_b0), vmovn_s32(v_b1))); 02750 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vmovn_s32(v_g0), vmovn_s32(v_g1))); 02751 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vmovn_s32(v_r0), vmovn_s32(v_r1))); 02752 02753 if (dcn == 3) 02754 { 02755 uint8x8x3_t v_dst; 02756 v_dst.val[bidx] = v_b; 02757 v_dst.val[1] = v_g; 02758 v_dst.val[bidx^2] = v_r; 02759 vst3_u8(dst, v_dst); 02760 } 02761 else 02762 { 02763 uint8x8x4_t v_dst; 02764 v_dst.val[bidx] = v_b; 02765 v_dst.val[1] = v_g; 02766 v_dst.val[bidx^2] = v_r; 02767 v_dst.val[3] = v_alpha; 02768 vst4_u8(dst, v_dst); 02769 } 02770 } 02771 02772 for ( ; i < n; i += 3, dst += dcn) 02773 { 02774 uchar Y = src[i]; 02775 uchar Cr = src[i+1]; 02776 uchar Cb = src[i+2]; 02777 02778 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); 02779 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); 02780 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift); 02781 02782 dst[bidx] = saturate_cast<uchar>(b); 02783 dst[1] = saturate_cast<uchar>(g); 02784 dst[bidx^2] = saturate_cast<uchar>(r); 02785 if( dcn == 4 ) 02786 dst[3] = alpha; 02787 } 02788 } 02789 int dstcn, blueIdx; 02790 int coeffs[4]; 02791 02792 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2; 02793 int16x4_t v_delta; 02794 uint8x8_t v_alpha; 02795 }; 02796 02797 template <> 02798 struct YCrCb2RGB_i<ushort> 02799 { 02800 typedef ushort channel_type; 02801 02802 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 02803 : dstcn(_dstcn), blueIdx(_blueIdx) 02804 { 02805 static const int coeffs0[] = {22987, -11698, -5636, 29049}; 02806 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 02807 02808 v_c0 = vdupq_n_s32(coeffs[0]); 02809 v_c1 = vdupq_n_s32(coeffs[1]); 02810 v_c2 = vdupq_n_s32(coeffs[2]); 02811 v_c3 = vdupq_n_s32(coeffs[3]); 02812 v_delta = vdupq_n_s32(ColorChannel<ushort>::half()); 02813 v_delta2 = vdupq_n_s32(1 << (yuv_shift - 1)); 02814 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max()); 02815 v_alpha2 = vget_low_u16(v_alpha); 02816 } 02817 02818 void operator()(const ushort* src, ushort* dst, int n) const 02819 { 02820 int dcn = dstcn, bidx = blueIdx, i = 0; 02821 const ushort delta = ColorChannel<ushort>::half(), alpha = ColorChannel<ushort>::max(); 02822 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 02823 n *= 3; 02824 02825 for ( ; i <= n - 24; i += 24, dst += dcn * 8) 02826 { 02827 uint16x8x3_t v_src = vld3q_u16(src + i); 02828 02829 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))), 02830 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))), 02831 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2]))); 02832 02833 int32x4_t v_b0 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta)); 02834 v_b0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b0, v_delta2), yuv_shift), v_Y); 02835 int32x4_t v_g0 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2); 02836 v_g0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g0, v_delta2), yuv_shift), v_Y); 02837 int32x4_t v_r0 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta)); 02838 v_r0 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r0, v_delta2), yuv_shift), v_Y); 02839 02840 v_Y = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))), 02841 v_Cr = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))), 02842 v_Cb = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2]))); 02843 02844 int32x4_t v_b1 = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta)); 02845 v_b1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b1, v_delta2), yuv_shift), v_Y); 02846 int32x4_t v_g1 = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2); 02847 v_g1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g1, v_delta2), yuv_shift), v_Y); 02848 int32x4_t v_r1 = vmulq_s32(v_c0, vsubq_s32(v_Cr, v_delta)); 02849 v_r1 = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r1, v_delta2), yuv_shift), v_Y); 02850 02851 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_b0), vqmovun_s32(v_b1)); 02852 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_g0), vqmovun_s32(v_g1)); 02853 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_r0), vqmovun_s32(v_r1)); 02854 02855 if (dcn == 3) 02856 { 02857 uint16x8x3_t v_dst; 02858 v_dst.val[bidx] = v_b; 02859 v_dst.val[1] = v_g; 02860 v_dst.val[bidx^2] = v_r; 02861 vst3q_u16(dst, v_dst); 02862 } 02863 else 02864 { 02865 uint16x8x4_t v_dst; 02866 v_dst.val[bidx] = v_b; 02867 v_dst.val[1] = v_g; 02868 v_dst.val[bidx^2] = v_r; 02869 v_dst.val[3] = v_alpha; 02870 vst4q_u16(dst, v_dst); 02871 } 02872 } 02873 02874 for ( ; i <= n - 12; i += 12, dst += dcn * 4) 02875 { 02876 uint16x4x3_t v_src = vld3_u16(src + i); 02877 02878 int32x4_t v_Y = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])), 02879 v_Cr = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])), 02880 v_Cb = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); 02881 02882 int32x4_t v_b = vmulq_s32(v_c3, vsubq_s32(v_Cb, v_delta)); 02883 v_b = vaddq_s32(vshrq_n_s32(vaddq_s32(v_b, v_delta2), yuv_shift), v_Y); 02884 int32x4_t v_g = vmlaq_s32(vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c1), vsubq_s32(v_Cb, v_delta), v_c2); 02885 v_g = vaddq_s32(vshrq_n_s32(vaddq_s32(v_g, v_delta2), yuv_shift), v_Y); 02886 int32x4_t v_r = vmulq_s32(vsubq_s32(v_Cr, v_delta), v_c0); 02887 v_r = vaddq_s32(vshrq_n_s32(vaddq_s32(v_r, v_delta2), yuv_shift), v_Y); 02888 02889 uint16x4_t v_bd = vqmovun_s32(v_b); 02890 uint16x4_t v_gd = vqmovun_s32(v_g); 02891 uint16x4_t v_rd = vqmovun_s32(v_r); 02892 02893 if (dcn == 3) 02894 { 02895 uint16x4x3_t v_dst; 02896 v_dst.val[bidx] = v_bd; 02897 v_dst.val[1] = v_gd; 02898 v_dst.val[bidx^2] = v_rd; 02899 vst3_u16(dst, v_dst); 02900 } 02901 else 02902 { 02903 uint16x4x4_t v_dst; 02904 v_dst.val[bidx] = v_bd; 02905 v_dst.val[1] = v_gd; 02906 v_dst.val[bidx^2] = v_rd; 02907 v_dst.val[3] = v_alpha2; 02908 vst4_u16(dst, v_dst); 02909 } 02910 } 02911 02912 for ( ; i < n; i += 3, dst += dcn) 02913 { 02914 ushort Y = src[i]; 02915 ushort Cr = src[i+1]; 02916 ushort Cb = src[i+2]; 02917 02918 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); 02919 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); 02920 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift); 02921 02922 dst[bidx] = saturate_cast<ushort>(b); 02923 dst[1] = saturate_cast<ushort>(g); 02924 dst[bidx^2] = saturate_cast<ushort>(r); 02925 if( dcn == 4 ) 02926 dst[3] = alpha; 02927 } 02928 } 02929 int dstcn, blueIdx; 02930 int coeffs[4]; 02931 02932 int32x4_t v_c0, v_c1, v_c2, v_c3, v_delta2, v_delta; 02933 uint16x8_t v_alpha; 02934 uint16x4_t v_alpha2; 02935 }; 02936 02937 #elif CV_SSE2 02938 02939 template <> 02940 struct YCrCb2RGB_i<uchar> 02941 { 02942 typedef uchar channel_type; 02943 02944 YCrCb2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 02945 : dstcn(_dstcn), blueIdx(_blueIdx) 02946 { 02947 static const int coeffs0[] = {22987, -11698, -5636, 29049}; 02948 memcpy(coeffs, _coeffs ? _coeffs : coeffs0, 4*sizeof(coeffs[0])); 02949 02950 v_c0 = _mm_set1_epi16((short)coeffs[0]); 02951 v_c1 = _mm_set1_epi16((short)coeffs[1]); 02952 v_c2 = _mm_set1_epi16((short)coeffs[2]); 02953 v_c3 = _mm_set1_epi16((short)coeffs[3]); 02954 v_delta = _mm_set1_epi16(ColorChannel<uchar>::half()); 02955 v_delta2 = _mm_set1_epi32(1 << (yuv_shift - 1)); 02956 v_zero = _mm_setzero_si128(); 02957 02958 uchar alpha = ColorChannel<uchar>::max(); 02959 v_alpha = _mm_set1_epi8(*(char *)&alpha); 02960 02961 useSSE = coeffs[0] <= std::numeric_limits<short>::max(); 02962 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 02963 } 02964 02965 // 16s x 8 02966 void process(__m128i v_y, __m128i v_cr, __m128i v_cb, 02967 __m128i & v_r, __m128i & v_g, __m128i & v_b) const 02968 { 02969 v_cr = _mm_sub_epi16(v_cr, v_delta); 02970 v_cb = _mm_sub_epi16(v_cb, v_delta); 02971 02972 __m128i v_y_p = _mm_unpacklo_epi16(v_y, v_zero); 02973 02974 __m128i v_mullo_3 = _mm_mullo_epi16(v_cb, v_c3); 02975 __m128i v_mullo_2 = _mm_mullo_epi16(v_cb, v_c2); 02976 __m128i v_mullo_1 = _mm_mullo_epi16(v_cr, v_c1); 02977 __m128i v_mullo_0 = _mm_mullo_epi16(v_cr, v_c0); 02978 02979 __m128i v_mulhi_3 = _mm_mulhi_epi16(v_cb, v_c3); 02980 __m128i v_mulhi_2 = _mm_mulhi_epi16(v_cb, v_c2); 02981 __m128i v_mulhi_1 = _mm_mulhi_epi16(v_cr, v_c1); 02982 __m128i v_mulhi_0 = _mm_mulhi_epi16(v_cr, v_c0); 02983 02984 __m128i v_b0 = _mm_srai_epi32(_mm_add_epi32(_mm_unpacklo_epi16(v_mullo_3, v_mulhi_3), v_delta2), yuv_shift); 02985 __m128i v_g0 = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(_mm_unpacklo_epi16(v_mullo_2, v_mulhi_2), 02986 _mm_unpacklo_epi16(v_mullo_1, v_mulhi_1)), v_delta2), 02987 yuv_shift); 02988 __m128i v_r0 = _mm_srai_epi32(_mm_add_epi32(_mm_unpacklo_epi16(v_mullo_0, v_mulhi_0), v_delta2), yuv_shift); 02989 02990 v_r0 = _mm_add_epi32(v_r0, v_y_p); 02991 v_g0 = _mm_add_epi32(v_g0, v_y_p); 02992 v_b0 = _mm_add_epi32(v_b0, v_y_p); 02993 02994 v_y_p = _mm_unpackhi_epi16(v_y, v_zero); 02995 02996 __m128i v_b1 = _mm_srai_epi32(_mm_add_epi32(_mm_unpackhi_epi16(v_mullo_3, v_mulhi_3), v_delta2), yuv_shift); 02997 __m128i v_g1 = _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(_mm_unpackhi_epi16(v_mullo_2, v_mulhi_2), 02998 _mm_unpackhi_epi16(v_mullo_1, v_mulhi_1)), v_delta2), 02999 yuv_shift); 03000 __m128i v_r1 = _mm_srai_epi32(_mm_add_epi32(_mm_unpackhi_epi16(v_mullo_0, v_mulhi_0), v_delta2), yuv_shift); 03001 03002 v_r1 = _mm_add_epi32(v_r1, v_y_p); 03003 v_g1 = _mm_add_epi32(v_g1, v_y_p); 03004 v_b1 = _mm_add_epi32(v_b1, v_y_p); 03005 03006 v_r = _mm_packs_epi32(v_r0, v_r1); 03007 v_g = _mm_packs_epi32(v_g0, v_g1); 03008 v_b = _mm_packs_epi32(v_b0, v_b1); 03009 } 03010 03011 void operator()(const uchar* src, uchar* dst, int n) const 03012 { 03013 int dcn = dstcn, bidx = blueIdx, i = 0; 03014 const uchar delta = ColorChannel<uchar>::half(), alpha = ColorChannel<uchar>::max(); 03015 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], C3 = coeffs[3]; 03016 n *= 3; 03017 03018 if (haveSIMD && useSSE) 03019 { 03020 for ( ; i <= n - 96; i += 96, dst += dcn * 32) 03021 { 03022 __m128i v_y0 = _mm_loadu_si128((__m128i const *)(src + i)); 03023 __m128i v_y1 = _mm_loadu_si128((__m128i const *)(src + i + 16)); 03024 __m128i v_cr0 = _mm_loadu_si128((__m128i const *)(src + i + 32)); 03025 __m128i v_cr1 = _mm_loadu_si128((__m128i const *)(src + i + 48)); 03026 __m128i v_cb0 = _mm_loadu_si128((__m128i const *)(src + i + 64)); 03027 __m128i v_cb1 = _mm_loadu_si128((__m128i const *)(src + i + 80)); 03028 03029 _mm_deinterleave_epi8(v_y0, v_y1, v_cr0, v_cr1, v_cb0, v_cb1); 03030 03031 __m128i v_r_0 = v_zero, v_g_0 = v_zero, v_b_0 = v_zero; 03032 process(_mm_unpacklo_epi8(v_y0, v_zero), 03033 _mm_unpacklo_epi8(v_cr0, v_zero), 03034 _mm_unpacklo_epi8(v_cb0, v_zero), 03035 v_r_0, v_g_0, v_b_0); 03036 03037 __m128i v_r_1 = v_zero, v_g_1 = v_zero, v_b_1 = v_zero; 03038 process(_mm_unpackhi_epi8(v_y0, v_zero), 03039 _mm_unpackhi_epi8(v_cr0, v_zero), 03040 _mm_unpackhi_epi8(v_cb0, v_zero), 03041 v_r_1, v_g_1, v_b_1); 03042 03043 __m128i v_r0 = _mm_packus_epi16(v_r_0, v_r_1); 03044 __m128i v_g0 = _mm_packus_epi16(v_g_0, v_g_1); 03045 __m128i v_b0 = _mm_packus_epi16(v_b_0, v_b_1); 03046 03047 process(_mm_unpacklo_epi8(v_y1, v_zero), 03048 _mm_unpacklo_epi8(v_cr1, v_zero), 03049 _mm_unpacklo_epi8(v_cb1, v_zero), 03050 v_r_0, v_g_0, v_b_0); 03051 03052 process(_mm_unpackhi_epi8(v_y1, v_zero), 03053 _mm_unpackhi_epi8(v_cr1, v_zero), 03054 _mm_unpackhi_epi8(v_cb1, v_zero), 03055 v_r_1, v_g_1, v_b_1); 03056 03057 __m128i v_r1 = _mm_packus_epi16(v_r_0, v_r_1); 03058 __m128i v_g1 = _mm_packus_epi16(v_g_0, v_g_1); 03059 __m128i v_b1 = _mm_packus_epi16(v_b_0, v_b_1); 03060 03061 if (bidx == 0) 03062 { 03063 std::swap(v_r0, v_b0); 03064 std::swap(v_r1, v_b1); 03065 } 03066 03067 __m128i v_a0 = v_alpha, v_a1 = v_alpha; 03068 03069 if (dcn == 3) 03070 _mm_interleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 03071 else 03072 _mm_interleave_epi8(v_r0, v_r1, v_g0, v_g1, 03073 v_b0, v_b1, v_a0, v_a1); 03074 03075 _mm_storeu_si128((__m128i *)(dst), v_r0); 03076 _mm_storeu_si128((__m128i *)(dst + 16), v_r1); 03077 _mm_storeu_si128((__m128i *)(dst + 32), v_g0); 03078 _mm_storeu_si128((__m128i *)(dst + 48), v_g1); 03079 _mm_storeu_si128((__m128i *)(dst + 64), v_b0); 03080 _mm_storeu_si128((__m128i *)(dst + 80), v_b1); 03081 03082 if (dcn == 4) 03083 { 03084 _mm_storeu_si128((__m128i *)(dst + 96), v_a0); 03085 _mm_storeu_si128((__m128i *)(dst + 112), v_a1); 03086 } 03087 } 03088 } 03089 03090 for ( ; i < n; i += 3, dst += dcn) 03091 { 03092 uchar Y = src[i]; 03093 uchar Cr = src[i+1]; 03094 uchar Cb = src[i+2]; 03095 03096 int b = Y + CV_DESCALE((Cb - delta)*C3, yuv_shift); 03097 int g = Y + CV_DESCALE((Cb - delta)*C2 + (Cr - delta)*C1, yuv_shift); 03098 int r = Y + CV_DESCALE((Cr - delta)*C0, yuv_shift); 03099 03100 dst[bidx] = saturate_cast<uchar>(b); 03101 dst[1] = saturate_cast<uchar>(g); 03102 dst[bidx^2] = saturate_cast<uchar>(r); 03103 if( dcn == 4 ) 03104 dst[3] = alpha; 03105 } 03106 } 03107 int dstcn, blueIdx; 03108 int coeffs[4]; 03109 bool useSSE, haveSIMD; 03110 03111 __m128i v_c0, v_c1, v_c2, v_c3, v_delta2; 03112 __m128i v_delta, v_alpha, v_zero; 03113 }; 03114 03115 #endif // CV_SSE2 03116 03117 ////////////////////////////////////// RGB <-> XYZ /////////////////////////////////////// 03118 03119 static const float sRGB2XYZ_D65[] = 03120 { 03121 0.412453f, 0.357580f, 0.180423f, 03122 0.212671f, 0.715160f, 0.072169f, 03123 0.019334f, 0.119193f, 0.950227f 03124 }; 03125 03126 static const float XYZ2sRGB_D65[] = 03127 { 03128 3.240479f, -1.53715f, -0.498535f, 03129 -0.969256f, 1.875991f, 0.041556f, 03130 0.055648f, -0.204043f, 1.057311f 03131 }; 03132 03133 template<typename _Tp> struct RGB2XYZ_f 03134 { 03135 typedef _Tp channel_type; 03136 03137 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 03138 { 03139 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0])); 03140 if(blueIdx == 0) 03141 { 03142 std::swap(coeffs[0], coeffs[2]); 03143 std::swap(coeffs[3], coeffs[5]); 03144 std::swap(coeffs[6], coeffs[8]); 03145 } 03146 } 03147 void operator()(const _Tp* src, _Tp* dst, int n) const 03148 { 03149 int scn = srccn; 03150 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03151 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03152 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03153 03154 n *= 3; 03155 for(int i = 0; i < n; i += 3, src += scn) 03156 { 03157 _Tp X = saturate_cast<_Tp>(src[0]*C0 + src[1]*C1 + src[2]*C2); 03158 _Tp Y = saturate_cast<_Tp>(src[0]*C3 + src[1]*C4 + src[2]*C5); 03159 _Tp Z = saturate_cast<_Tp>(src[0]*C6 + src[1]*C7 + src[2]*C8); 03160 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; 03161 } 03162 } 03163 int srccn; 03164 float coeffs[9]; 03165 }; 03166 03167 #if CV_NEON 03168 03169 template <> 03170 struct RGB2XYZ_f<float> 03171 { 03172 typedef float channel_type; 03173 03174 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 03175 { 03176 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0])); 03177 if(blueIdx == 0) 03178 { 03179 std::swap(coeffs[0], coeffs[2]); 03180 std::swap(coeffs[3], coeffs[5]); 03181 std::swap(coeffs[6], coeffs[8]); 03182 } 03183 03184 v_c0 = vdupq_n_f32(coeffs[0]); 03185 v_c1 = vdupq_n_f32(coeffs[1]); 03186 v_c2 = vdupq_n_f32(coeffs[2]); 03187 v_c3 = vdupq_n_f32(coeffs[3]); 03188 v_c4 = vdupq_n_f32(coeffs[4]); 03189 v_c5 = vdupq_n_f32(coeffs[5]); 03190 v_c6 = vdupq_n_f32(coeffs[6]); 03191 v_c7 = vdupq_n_f32(coeffs[7]); 03192 v_c8 = vdupq_n_f32(coeffs[8]); 03193 } 03194 03195 void operator()(const float* src, float* dst, int n) const 03196 { 03197 int scn = srccn, i = 0; 03198 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03199 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03200 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03201 03202 n *= 3; 03203 03204 if (scn == 3) 03205 for ( ; i <= n - 12; i += 12, src += 12) 03206 { 03207 float32x4x3_t v_src = vld3q_f32(src), v_dst; 03208 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); 03209 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5); 03210 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8); 03211 vst3q_f32(dst + i, v_dst); 03212 } 03213 else 03214 for ( ; i <= n - 12; i += 12, src += 16) 03215 { 03216 float32x4x4_t v_src = vld4q_f32(src); 03217 float32x4x3_t v_dst; 03218 v_dst.val[0] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c0), v_src.val[1], v_c1), v_src.val[2], v_c2); 03219 v_dst.val[1] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c3), v_src.val[1], v_c4), v_src.val[2], v_c5); 03220 v_dst.val[2] = vmlaq_f32(vmlaq_f32(vmulq_f32(v_src.val[0], v_c6), v_src.val[1], v_c7), v_src.val[2], v_c8); 03221 vst3q_f32(dst + i, v_dst); 03222 } 03223 03224 for ( ; i < n; i += 3, src += scn) 03225 { 03226 float X = saturate_cast<float>(src[0]*C0 + src[1]*C1 + src[2]*C2); 03227 float Y = saturate_cast<float>(src[0]*C3 + src[1]*C4 + src[2]*C5); 03228 float Z = saturate_cast<float>(src[0]*C6 + src[1]*C7 + src[2]*C8); 03229 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; 03230 } 03231 } 03232 03233 int srccn; 03234 float coeffs[9]; 03235 float32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 03236 }; 03237 03238 #elif CV_SSE2 03239 03240 template <> 03241 struct RGB2XYZ_f<float> 03242 { 03243 typedef float channel_type; 03244 03245 RGB2XYZ_f(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 03246 { 03247 memcpy(coeffs, _coeffs ? _coeffs : sRGB2XYZ_D65, 9*sizeof(coeffs[0])); 03248 if(blueIdx == 0) 03249 { 03250 std::swap(coeffs[0], coeffs[2]); 03251 std::swap(coeffs[3], coeffs[5]); 03252 std::swap(coeffs[6], coeffs[8]); 03253 } 03254 03255 v_c0 = _mm_set1_ps(coeffs[0]); 03256 v_c1 = _mm_set1_ps(coeffs[1]); 03257 v_c2 = _mm_set1_ps(coeffs[2]); 03258 v_c3 = _mm_set1_ps(coeffs[3]); 03259 v_c4 = _mm_set1_ps(coeffs[4]); 03260 v_c5 = _mm_set1_ps(coeffs[5]); 03261 v_c6 = _mm_set1_ps(coeffs[6]); 03262 v_c7 = _mm_set1_ps(coeffs[7]); 03263 v_c8 = _mm_set1_ps(coeffs[8]); 03264 03265 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 03266 } 03267 03268 void process(__m128 v_r, __m128 v_g, __m128 v_b, 03269 __m128 & v_x, __m128 & v_y, __m128 & v_z) const 03270 { 03271 v_x = _mm_mul_ps(v_r, v_c0); 03272 v_x = _mm_add_ps(v_x, _mm_mul_ps(v_g, v_c1)); 03273 v_x = _mm_add_ps(v_x, _mm_mul_ps(v_b, v_c2)); 03274 03275 v_y = _mm_mul_ps(v_r, v_c3); 03276 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_g, v_c4)); 03277 v_y = _mm_add_ps(v_y, _mm_mul_ps(v_b, v_c5)); 03278 03279 v_z = _mm_mul_ps(v_r, v_c6); 03280 v_z = _mm_add_ps(v_z, _mm_mul_ps(v_g, v_c7)); 03281 v_z = _mm_add_ps(v_z, _mm_mul_ps(v_b, v_c8)); 03282 } 03283 03284 void operator()(const float* src, float* dst, int n) const 03285 { 03286 int scn = srccn, i = 0; 03287 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03288 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03289 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03290 03291 n *= 3; 03292 03293 if (haveSIMD) 03294 { 03295 for ( ; i <= n - 24; i += 24, src += 8 * scn) 03296 { 03297 __m128 v_r0 = _mm_loadu_ps(src); 03298 __m128 v_r1 = _mm_loadu_ps(src + 4); 03299 __m128 v_g0 = _mm_loadu_ps(src + 8); 03300 __m128 v_g1 = _mm_loadu_ps(src + 12); 03301 __m128 v_b0 = _mm_loadu_ps(src + 16); 03302 __m128 v_b1 = _mm_loadu_ps(src + 20); 03303 03304 if (scn == 4) 03305 { 03306 __m128 v_a0 = _mm_loadu_ps(src + 24); 03307 __m128 v_a1 = _mm_loadu_ps(src + 28); 03308 03309 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, 03310 v_b0, v_b1, v_a0, v_a1); 03311 } 03312 else 03313 _mm_deinterleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 03314 03315 __m128 v_x0, v_y0, v_z0; 03316 process(v_r0, v_g0, v_b0, 03317 v_x0, v_y0, v_z0); 03318 03319 __m128 v_x1, v_y1, v_z1; 03320 process(v_r1, v_g1, v_b1, 03321 v_x1, v_y1, v_z1); 03322 03323 _mm_interleave_ps(v_x0, v_x1, v_y0, v_y1, v_z0, v_z1); 03324 03325 _mm_storeu_ps(dst + i, v_x0); 03326 _mm_storeu_ps(dst + i + 4, v_x1); 03327 _mm_storeu_ps(dst + i + 8, v_y0); 03328 _mm_storeu_ps(dst + i + 12, v_y1); 03329 _mm_storeu_ps(dst + i + 16, v_z0); 03330 _mm_storeu_ps(dst + i + 20, v_z1); 03331 } 03332 } 03333 03334 for ( ; i < n; i += 3, src += scn) 03335 { 03336 float X = saturate_cast<float>(src[0]*C0 + src[1]*C1 + src[2]*C2); 03337 float Y = saturate_cast<float>(src[0]*C3 + src[1]*C4 + src[2]*C5); 03338 float Z = saturate_cast<float>(src[0]*C6 + src[1]*C7 + src[2]*C8); 03339 dst[i] = X; dst[i+1] = Y; dst[i+2] = Z; 03340 } 03341 } 03342 03343 int srccn; 03344 float coeffs[9]; 03345 __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 03346 bool haveSIMD; 03347 }; 03348 03349 03350 #endif 03351 03352 template<typename _Tp> struct RGB2XYZ_i 03353 { 03354 typedef _Tp channel_type; 03355 03356 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 03357 { 03358 static const int coeffs0[] = 03359 { 03360 1689, 1465, 739, 03361 871, 2929, 296, 03362 79, 488, 3892 03363 }; 03364 for( int i = 0; i < 9; i++ ) 03365 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 03366 if(blueIdx == 0) 03367 { 03368 std::swap(coeffs[0], coeffs[2]); 03369 std::swap(coeffs[3], coeffs[5]); 03370 std::swap(coeffs[6], coeffs[8]); 03371 } 03372 } 03373 void operator()(const _Tp* src, _Tp* dst, int n) const 03374 { 03375 int scn = srccn; 03376 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03377 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03378 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03379 n *= 3; 03380 03381 for(int i = 0; i < n; i += 3, src += scn) 03382 { 03383 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); 03384 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); 03385 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); 03386 dst[i] = saturate_cast<_Tp>(X); dst[i+1] = saturate_cast<_Tp>(Y); 03387 dst[i+2] = saturate_cast<_Tp>(Z); 03388 } 03389 } 03390 int srccn; 03391 int coeffs[9]; 03392 }; 03393 03394 #if CV_NEON 03395 03396 template <> 03397 struct RGB2XYZ_i<uchar> 03398 { 03399 typedef uchar channel_type; 03400 03401 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 03402 { 03403 static const int coeffs0[] = 03404 { 03405 1689, 1465, 739, 03406 871, 2929, 296, 03407 79, 488, 3892 03408 }; 03409 for( int i = 0; i < 9; i++ ) 03410 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 03411 if(blueIdx == 0) 03412 { 03413 std::swap(coeffs[0], coeffs[2]); 03414 std::swap(coeffs[3], coeffs[5]); 03415 std::swap(coeffs[6], coeffs[8]); 03416 } 03417 03418 v_c0 = vdup_n_u16(coeffs[0]); 03419 v_c1 = vdup_n_u16(coeffs[1]); 03420 v_c2 = vdup_n_u16(coeffs[2]); 03421 v_c3 = vdup_n_u16(coeffs[3]); 03422 v_c4 = vdup_n_u16(coeffs[4]); 03423 v_c5 = vdup_n_u16(coeffs[5]); 03424 v_c6 = vdup_n_u16(coeffs[6]); 03425 v_c7 = vdup_n_u16(coeffs[7]); 03426 v_c8 = vdup_n_u16(coeffs[8]); 03427 v_delta = vdupq_n_u32(1 << (xyz_shift - 1)); 03428 } 03429 void operator()(const uchar * src, uchar * dst, int n) const 03430 { 03431 int scn = srccn, i = 0; 03432 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03433 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03434 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03435 n *= 3; 03436 03437 for ( ; i <= n - 24; i += 24, src += scn * 8) 03438 { 03439 uint8x8x3_t v_dst; 03440 uint16x8x3_t v_src16; 03441 03442 if (scn == 3) 03443 { 03444 uint8x8x3_t v_src = vld3_u8(src); 03445 v_src16.val[0] = vmovl_u8(v_src.val[0]); 03446 v_src16.val[1] = vmovl_u8(v_src.val[1]); 03447 v_src16.val[2] = vmovl_u8(v_src.val[2]); 03448 } 03449 else 03450 { 03451 uint8x8x4_t v_src = vld4_u8(src); 03452 v_src16.val[0] = vmovl_u8(v_src.val[0]); 03453 v_src16.val[1] = vmovl_u8(v_src.val[1]); 03454 v_src16.val[2] = vmovl_u8(v_src.val[2]); 03455 } 03456 03457 uint16x4_t v_s0 = vget_low_u16(v_src16.val[0]), 03458 v_s1 = vget_low_u16(v_src16.val[1]), 03459 v_s2 = vget_low_u16(v_src16.val[2]); 03460 03461 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 03462 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 03463 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 03464 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift); 03465 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift); 03466 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift); 03467 03468 v_s0 = vget_high_u16(v_src16.val[0]), 03469 v_s1 = vget_high_u16(v_src16.val[1]), 03470 v_s2 = vget_high_u16(v_src16.val[2]); 03471 03472 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 03473 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 03474 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 03475 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift); 03476 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift); 03477 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift); 03478 03479 v_dst.val[0] = vqmovn_u16(vcombine_u16(vmovn_u32(v_X0), vmovn_u32(v_X1))); 03480 v_dst.val[1] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Y0), vmovn_u32(v_Y1))); 03481 v_dst.val[2] = vqmovn_u16(vcombine_u16(vmovn_u32(v_Z0), vmovn_u32(v_Z1))); 03482 03483 vst3_u8(dst + i, v_dst); 03484 } 03485 03486 for ( ; i < n; i += 3, src += scn) 03487 { 03488 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); 03489 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); 03490 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); 03491 dst[i] = saturate_cast<uchar>(X); 03492 dst[i+1] = saturate_cast<uchar>(Y); 03493 dst[i+2] = saturate_cast<uchar>(Z); 03494 } 03495 } 03496 03497 int srccn, coeffs[9]; 03498 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 03499 uint32x4_t v_delta; 03500 }; 03501 03502 template <> 03503 struct RGB2XYZ_i<ushort> 03504 { 03505 typedef ushort channel_type; 03506 03507 RGB2XYZ_i(int _srccn, int blueIdx, const float* _coeffs) : srccn(_srccn) 03508 { 03509 static const int coeffs0[] = 03510 { 03511 1689, 1465, 739, 03512 871, 2929, 296, 03513 79, 488, 3892 03514 }; 03515 for( int i = 0; i < 9; i++ ) 03516 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 03517 if(blueIdx == 0) 03518 { 03519 std::swap(coeffs[0], coeffs[2]); 03520 std::swap(coeffs[3], coeffs[5]); 03521 std::swap(coeffs[6], coeffs[8]); 03522 } 03523 03524 v_c0 = vdup_n_u16(coeffs[0]); 03525 v_c1 = vdup_n_u16(coeffs[1]); 03526 v_c2 = vdup_n_u16(coeffs[2]); 03527 v_c3 = vdup_n_u16(coeffs[3]); 03528 v_c4 = vdup_n_u16(coeffs[4]); 03529 v_c5 = vdup_n_u16(coeffs[5]); 03530 v_c6 = vdup_n_u16(coeffs[6]); 03531 v_c7 = vdup_n_u16(coeffs[7]); 03532 v_c8 = vdup_n_u16(coeffs[8]); 03533 v_delta = vdupq_n_u32(1 << (xyz_shift - 1)); 03534 } 03535 03536 void operator()(const ushort * src, ushort * dst, int n) const 03537 { 03538 int scn = srccn, i = 0; 03539 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03540 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03541 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03542 n *= 3; 03543 03544 for ( ; i <= n - 24; i += 24, src += scn * 8) 03545 { 03546 uint16x8x3_t v_src, v_dst; 03547 03548 if (scn == 3) 03549 v_src = vld3q_u16(src); 03550 else 03551 { 03552 uint16x8x4_t v_src4 = vld4q_u16(src); 03553 v_src.val[0] = v_src4.val[0]; 03554 v_src.val[1] = v_src4.val[1]; 03555 v_src.val[2] = v_src4.val[2]; 03556 } 03557 03558 uint16x4_t v_s0 = vget_low_u16(v_src.val[0]), 03559 v_s1 = vget_low_u16(v_src.val[1]), 03560 v_s2 = vget_low_u16(v_src.val[2]); 03561 03562 uint32x4_t v_X0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 03563 uint32x4_t v_Y0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 03564 uint32x4_t v_Z0 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 03565 v_X0 = vshrq_n_u32(vaddq_u32(v_X0, v_delta), xyz_shift); 03566 v_Y0 = vshrq_n_u32(vaddq_u32(v_Y0, v_delta), xyz_shift); 03567 v_Z0 = vshrq_n_u32(vaddq_u32(v_Z0, v_delta), xyz_shift); 03568 03569 v_s0 = vget_high_u16(v_src.val[0]), 03570 v_s1 = vget_high_u16(v_src.val[1]), 03571 v_s2 = vget_high_u16(v_src.val[2]); 03572 03573 uint32x4_t v_X1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 03574 uint32x4_t v_Y1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 03575 uint32x4_t v_Z1 = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 03576 v_X1 = vshrq_n_u32(vaddq_u32(v_X1, v_delta), xyz_shift); 03577 v_Y1 = vshrq_n_u32(vaddq_u32(v_Y1, v_delta), xyz_shift); 03578 v_Z1 = vshrq_n_u32(vaddq_u32(v_Z1, v_delta), xyz_shift); 03579 03580 v_dst.val[0] = vcombine_u16(vqmovn_u32(v_X0), vqmovn_u32(v_X1)); 03581 v_dst.val[1] = vcombine_u16(vqmovn_u32(v_Y0), vqmovn_u32(v_Y1)); 03582 v_dst.val[2] = vcombine_u16(vqmovn_u32(v_Z0), vqmovn_u32(v_Z1)); 03583 03584 vst3q_u16(dst + i, v_dst); 03585 } 03586 03587 for ( ; i <= n - 12; i += 12, src += scn * 4) 03588 { 03589 uint16x4x3_t v_dst; 03590 uint16x4_t v_s0, v_s1, v_s2; 03591 03592 if (scn == 3) 03593 { 03594 uint16x4x3_t v_src = vld3_u16(src); 03595 v_s0 = v_src.val[0]; 03596 v_s1 = v_src.val[1]; 03597 v_s2 = v_src.val[2]; 03598 } 03599 else 03600 { 03601 uint16x4x4_t v_src = vld4_u16(src); 03602 v_s0 = v_src.val[0]; 03603 v_s1 = v_src.val[1]; 03604 v_s2 = v_src.val[2]; 03605 } 03606 03607 uint32x4_t v_X = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 03608 uint32x4_t v_Y = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 03609 uint32x4_t v_Z = vmlal_u16(vmlal_u16(vmull_u16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 03610 03611 v_dst.val[0] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_X, v_delta), xyz_shift)); 03612 v_dst.val[1] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Y, v_delta), xyz_shift)); 03613 v_dst.val[2] = vqmovn_u32(vshrq_n_u32(vaddq_u32(v_Z, v_delta), xyz_shift)); 03614 03615 vst3_u16(dst + i, v_dst); 03616 } 03617 03618 for ( ; i < n; i += 3, src += scn) 03619 { 03620 int X = CV_DESCALE(src[0]*C0 + src[1]*C1 + src[2]*C2, xyz_shift); 03621 int Y = CV_DESCALE(src[0]*C3 + src[1]*C4 + src[2]*C5, xyz_shift); 03622 int Z = CV_DESCALE(src[0]*C6 + src[1]*C7 + src[2]*C8, xyz_shift); 03623 dst[i] = saturate_cast<ushort>(X); 03624 dst[i+1] = saturate_cast<ushort>(Y); 03625 dst[i+2] = saturate_cast<ushort>(Z); 03626 } 03627 } 03628 03629 int srccn, coeffs[9]; 03630 uint16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 03631 uint32x4_t v_delta; 03632 }; 03633 03634 #endif 03635 03636 template<typename _Tp> struct XYZ2RGB_f 03637 { 03638 typedef _Tp channel_type; 03639 03640 XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 03641 : dstcn(_dstcn), blueIdx(_blueIdx) 03642 { 03643 memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0])); 03644 if(blueIdx == 0) 03645 { 03646 std::swap(coeffs[0], coeffs[6]); 03647 std::swap(coeffs[1], coeffs[7]); 03648 std::swap(coeffs[2], coeffs[8]); 03649 } 03650 } 03651 03652 void operator()(const _Tp* src, _Tp* dst, int n) const 03653 { 03654 int dcn = dstcn; 03655 _Tp alpha = ColorChannel<_Tp>::max(); 03656 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03657 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03658 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03659 n *= 3; 03660 for(int i = 0; i < n; i += 3, dst += dcn) 03661 { 03662 _Tp B = saturate_cast<_Tp>(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2); 03663 _Tp G = saturate_cast<_Tp>(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5); 03664 _Tp R = saturate_cast<_Tp>(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8); 03665 dst[0] = B; dst[1] = G; dst[2] = R; 03666 if( dcn == 4 ) 03667 dst[3] = alpha; 03668 } 03669 } 03670 int dstcn, blueIdx; 03671 float coeffs[9]; 03672 }; 03673 03674 #if CV_SSE2 03675 03676 template <> 03677 struct XYZ2RGB_f<float> 03678 { 03679 typedef float channel_type; 03680 03681 XYZ2RGB_f(int _dstcn, int _blueIdx, const float* _coeffs) 03682 : dstcn(_dstcn), blueIdx(_blueIdx) 03683 { 03684 memcpy(coeffs, _coeffs ? _coeffs : XYZ2sRGB_D65, 9*sizeof(coeffs[0])); 03685 if(blueIdx == 0) 03686 { 03687 std::swap(coeffs[0], coeffs[6]); 03688 std::swap(coeffs[1], coeffs[7]); 03689 std::swap(coeffs[2], coeffs[8]); 03690 } 03691 03692 v_c0 = _mm_set1_ps(coeffs[0]); 03693 v_c1 = _mm_set1_ps(coeffs[1]); 03694 v_c2 = _mm_set1_ps(coeffs[2]); 03695 v_c3 = _mm_set1_ps(coeffs[3]); 03696 v_c4 = _mm_set1_ps(coeffs[4]); 03697 v_c5 = _mm_set1_ps(coeffs[5]); 03698 v_c6 = _mm_set1_ps(coeffs[6]); 03699 v_c7 = _mm_set1_ps(coeffs[7]); 03700 v_c8 = _mm_set1_ps(coeffs[8]); 03701 03702 v_alpha = _mm_set1_ps(ColorChannel<float>::max()); 03703 03704 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 03705 } 03706 03707 void process(__m128 v_x, __m128 v_y, __m128 v_z, 03708 __m128 & v_r, __m128 & v_g, __m128 & v_b) const 03709 { 03710 v_b = _mm_mul_ps(v_x, v_c0); 03711 v_b = _mm_add_ps(v_b, _mm_mul_ps(v_y, v_c1)); 03712 v_b = _mm_add_ps(v_b, _mm_mul_ps(v_z, v_c2)); 03713 03714 v_g = _mm_mul_ps(v_x, v_c3); 03715 v_g = _mm_add_ps(v_g, _mm_mul_ps(v_y, v_c4)); 03716 v_g = _mm_add_ps(v_g, _mm_mul_ps(v_z, v_c5)); 03717 03718 v_r = _mm_mul_ps(v_x, v_c6); 03719 v_r = _mm_add_ps(v_r, _mm_mul_ps(v_y, v_c7)); 03720 v_r = _mm_add_ps(v_r, _mm_mul_ps(v_z, v_c8)); 03721 } 03722 03723 void operator()(const float* src, float* dst, int n) const 03724 { 03725 int dcn = dstcn; 03726 float alpha = ColorChannel<float>::max(); 03727 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03728 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03729 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03730 n *= 3; 03731 int i = 0; 03732 03733 if (haveSIMD) 03734 { 03735 for ( ; i <= n - 24; i += 24, dst += 8 * dcn) 03736 { 03737 __m128 v_x0 = _mm_loadu_ps(src + i); 03738 __m128 v_x1 = _mm_loadu_ps(src + i + 4); 03739 __m128 v_y0 = _mm_loadu_ps(src + i + 8); 03740 __m128 v_y1 = _mm_loadu_ps(src + i + 12); 03741 __m128 v_z0 = _mm_loadu_ps(src + i + 16); 03742 __m128 v_z1 = _mm_loadu_ps(src + i + 20); 03743 03744 _mm_deinterleave_ps(v_x0, v_x1, v_y0, v_y1, v_z0, v_z1); 03745 03746 __m128 v_r0, v_g0, v_b0; 03747 process(v_x0, v_y0, v_z0, 03748 v_r0, v_g0, v_b0); 03749 03750 __m128 v_r1, v_g1, v_b1; 03751 process(v_x1, v_y1, v_z1, 03752 v_r1, v_g1, v_b1); 03753 03754 __m128 v_a0 = v_alpha, v_a1 = v_alpha; 03755 03756 if (dcn == 4) 03757 _mm_interleave_ps(v_b0, v_b1, v_g0, v_g1, 03758 v_r0, v_r1, v_a0, v_a1); 03759 else 03760 _mm_interleave_ps(v_b0, v_b1, v_g0, v_g1, v_r0, v_r1); 03761 03762 _mm_storeu_ps(dst, v_b0); 03763 _mm_storeu_ps(dst + 4, v_b1); 03764 _mm_storeu_ps(dst + 8, v_g0); 03765 _mm_storeu_ps(dst + 12, v_g1); 03766 _mm_storeu_ps(dst + 16, v_r0); 03767 _mm_storeu_ps(dst + 20, v_r1); 03768 03769 if (dcn == 4) 03770 { 03771 _mm_storeu_ps(dst + 24, v_a0); 03772 _mm_storeu_ps(dst + 28, v_a1); 03773 } 03774 } 03775 03776 } 03777 03778 for( ; i < n; i += 3, dst += dcn) 03779 { 03780 float B = src[i]*C0 + src[i+1]*C1 + src[i+2]*C2; 03781 float G = src[i]*C3 + src[i+1]*C4 + src[i+2]*C5; 03782 float R = src[i]*C6 + src[i+1]*C7 + src[i+2]*C8; 03783 dst[0] = B; dst[1] = G; dst[2] = R; 03784 if( dcn == 4 ) 03785 dst[3] = alpha; 03786 } 03787 } 03788 int dstcn, blueIdx; 03789 float coeffs[9]; 03790 03791 __m128 v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 03792 __m128 v_alpha; 03793 bool haveSIMD; 03794 }; 03795 03796 #endif // CV_SSE2 03797 03798 03799 template<typename _Tp> struct XYZ2RGB_i 03800 { 03801 typedef _Tp channel_type; 03802 03803 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 03804 : dstcn(_dstcn), blueIdx(_blueIdx) 03805 { 03806 static const int coeffs0[] = 03807 { 03808 13273, -6296, -2042, 03809 -3970, 7684, 170, 03810 228, -836, 4331 03811 }; 03812 for(int i = 0; i < 9; i++) 03813 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 03814 03815 if(blueIdx == 0) 03816 { 03817 std::swap(coeffs[0], coeffs[6]); 03818 std::swap(coeffs[1], coeffs[7]); 03819 std::swap(coeffs[2], coeffs[8]); 03820 } 03821 } 03822 void operator()(const _Tp* src, _Tp* dst, int n) const 03823 { 03824 int dcn = dstcn; 03825 _Tp alpha = ColorChannel<_Tp>::max(); 03826 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03827 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03828 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03829 n *= 3; 03830 for(int i = 0; i < n; i += 3, dst += dcn) 03831 { 03832 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift); 03833 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); 03834 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift); 03835 dst[0] = saturate_cast<_Tp>(B); dst[1] = saturate_cast<_Tp>(G); 03836 dst[2] = saturate_cast<_Tp>(R); 03837 if( dcn == 4 ) 03838 dst[3] = alpha; 03839 } 03840 } 03841 int dstcn, blueIdx; 03842 int coeffs[9]; 03843 }; 03844 03845 #if CV_NEON 03846 03847 template <> 03848 struct XYZ2RGB_i<uchar> 03849 { 03850 typedef uchar channel_type; 03851 03852 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 03853 : dstcn(_dstcn), blueIdx(_blueIdx) 03854 { 03855 static const int coeffs0[] = 03856 { 03857 13273, -6296, -2042, 03858 -3970, 7684, 170, 03859 228, -836, 4331 03860 }; 03861 for(int i = 0; i < 9; i++) 03862 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 03863 03864 if(blueIdx == 0) 03865 { 03866 std::swap(coeffs[0], coeffs[6]); 03867 std::swap(coeffs[1], coeffs[7]); 03868 std::swap(coeffs[2], coeffs[8]); 03869 } 03870 03871 v_c0 = vdup_n_s16(coeffs[0]); 03872 v_c1 = vdup_n_s16(coeffs[1]); 03873 v_c2 = vdup_n_s16(coeffs[2]); 03874 v_c3 = vdup_n_s16(coeffs[3]); 03875 v_c4 = vdup_n_s16(coeffs[4]); 03876 v_c5 = vdup_n_s16(coeffs[5]); 03877 v_c6 = vdup_n_s16(coeffs[6]); 03878 v_c7 = vdup_n_s16(coeffs[7]); 03879 v_c8 = vdup_n_s16(coeffs[8]); 03880 v_delta = vdupq_n_s32(1 << (xyz_shift - 1)); 03881 v_alpha = vmovn_u16(vdupq_n_u16(ColorChannel<uchar>::max())); 03882 } 03883 03884 void operator()(const uchar* src, uchar* dst, int n) const 03885 { 03886 int dcn = dstcn, i = 0; 03887 uchar alpha = ColorChannel<uchar>::max(); 03888 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 03889 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 03890 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 03891 n *= 3; 03892 03893 for ( ; i <= n - 24; i += 24, dst += dcn * 8) 03894 { 03895 uint8x8x3_t v_src = vld3_u8(src + i); 03896 int16x8x3_t v_src16; 03897 v_src16.val[0] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[0])); 03898 v_src16.val[1] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[1])); 03899 v_src16.val[2] = vreinterpretq_s16_u16(vmovl_u8(v_src.val[2])); 03900 03901 int16x4_t v_s0 = vget_low_s16(v_src16.val[0]), 03902 v_s1 = vget_low_s16(v_src16.val[1]), 03903 v_s2 = vget_low_s16(v_src16.val[2]); 03904 03905 int32x4_t v_X0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 03906 int32x4_t v_Y0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 03907 int32x4_t v_Z0 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 03908 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift); 03909 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift); 03910 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift); 03911 03912 v_s0 = vget_high_s16(v_src16.val[0]), 03913 v_s1 = vget_high_s16(v_src16.val[1]), 03914 v_s2 = vget_high_s16(v_src16.val[2]); 03915 03916 int32x4_t v_X1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 03917 int32x4_t v_Y1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 03918 int32x4_t v_Z1 = vmlal_s16(vmlal_s16(vmull_s16(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 03919 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift); 03920 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift); 03921 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift); 03922 03923 uint8x8_t v_b = vqmovun_s16(vcombine_s16(vqmovn_s32(v_X0), vqmovn_s32(v_X1))); 03924 uint8x8_t v_g = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Y0), vqmovn_s32(v_Y1))); 03925 uint8x8_t v_r = vqmovun_s16(vcombine_s16(vqmovn_s32(v_Z0), vqmovn_s32(v_Z1))); 03926 03927 if (dcn == 3) 03928 { 03929 uint8x8x3_t v_dst; 03930 v_dst.val[0] = v_b; 03931 v_dst.val[1] = v_g; 03932 v_dst.val[2] = v_r; 03933 vst3_u8(dst, v_dst); 03934 } 03935 else 03936 { 03937 uint8x8x4_t v_dst; 03938 v_dst.val[0] = v_b; 03939 v_dst.val[1] = v_g; 03940 v_dst.val[2] = v_r; 03941 v_dst.val[3] = v_alpha; 03942 vst4_u8(dst, v_dst); 03943 } 03944 } 03945 03946 for ( ; i < n; i += 3, dst += dcn) 03947 { 03948 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift); 03949 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); 03950 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift); 03951 dst[0] = saturate_cast<uchar>(B); dst[1] = saturate_cast<uchar>(G); 03952 dst[2] = saturate_cast<uchar>(R); 03953 if( dcn == 4 ) 03954 dst[3] = alpha; 03955 } 03956 } 03957 int dstcn, blueIdx; 03958 int coeffs[9]; 03959 03960 int16x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8; 03961 uint8x8_t v_alpha; 03962 int32x4_t v_delta; 03963 }; 03964 03965 template <> 03966 struct XYZ2RGB_i<ushort> 03967 { 03968 typedef ushort channel_type; 03969 03970 XYZ2RGB_i(int _dstcn, int _blueIdx, const int* _coeffs) 03971 : dstcn(_dstcn), blueIdx(_blueIdx) 03972 { 03973 static const int coeffs0[] = 03974 { 03975 13273, -6296, -2042, 03976 -3970, 7684, 170, 03977 228, -836, 4331 03978 }; 03979 for(int i = 0; i < 9; i++) 03980 coeffs[i] = _coeffs ? cvRound(_coeffs[i]*(1 << xyz_shift)) : coeffs0[i]; 03981 03982 if(blueIdx == 0) 03983 { 03984 std::swap(coeffs[0], coeffs[6]); 03985 std::swap(coeffs[1], coeffs[7]); 03986 std::swap(coeffs[2], coeffs[8]); 03987 } 03988 03989 v_c0 = vdupq_n_s32(coeffs[0]); 03990 v_c1 = vdupq_n_s32(coeffs[1]); 03991 v_c2 = vdupq_n_s32(coeffs[2]); 03992 v_c3 = vdupq_n_s32(coeffs[3]); 03993 v_c4 = vdupq_n_s32(coeffs[4]); 03994 v_c5 = vdupq_n_s32(coeffs[5]); 03995 v_c6 = vdupq_n_s32(coeffs[6]); 03996 v_c7 = vdupq_n_s32(coeffs[7]); 03997 v_c8 = vdupq_n_s32(coeffs[8]); 03998 v_delta = vdupq_n_s32(1 << (xyz_shift - 1)); 03999 v_alpha = vdupq_n_u16(ColorChannel<ushort>::max()); 04000 v_alpha2 = vget_low_u16(v_alpha); 04001 } 04002 04003 void operator()(const ushort* src, ushort* dst, int n) const 04004 { 04005 int dcn = dstcn, i = 0; 04006 ushort alpha = ColorChannel<ushort>::max(); 04007 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 04008 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 04009 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 04010 n *= 3; 04011 04012 for ( ; i <= n - 24; i += 24, dst += dcn * 8) 04013 { 04014 uint16x8x3_t v_src = vld3q_u16(src + i); 04015 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[0]))), 04016 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[1]))), 04017 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(v_src.val[2]))); 04018 04019 int32x4_t v_X0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 04020 int32x4_t v_Y0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 04021 int32x4_t v_Z0 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 04022 v_X0 = vshrq_n_s32(vaddq_s32(v_X0, v_delta), xyz_shift); 04023 v_Y0 = vshrq_n_s32(vaddq_s32(v_Y0, v_delta), xyz_shift); 04024 v_Z0 = vshrq_n_s32(vaddq_s32(v_Z0, v_delta), xyz_shift); 04025 04026 v_s0 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[0]))); 04027 v_s1 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[1]))); 04028 v_s2 = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(v_src.val[2]))); 04029 04030 int32x4_t v_X1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 04031 int32x4_t v_Y1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 04032 int32x4_t v_Z1 = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 04033 v_X1 = vshrq_n_s32(vaddq_s32(v_X1, v_delta), xyz_shift); 04034 v_Y1 = vshrq_n_s32(vaddq_s32(v_Y1, v_delta), xyz_shift); 04035 v_Z1 = vshrq_n_s32(vaddq_s32(v_Z1, v_delta), xyz_shift); 04036 04037 uint16x8_t v_b = vcombine_u16(vqmovun_s32(v_X0), vqmovun_s32(v_X1)); 04038 uint16x8_t v_g = vcombine_u16(vqmovun_s32(v_Y0), vqmovun_s32(v_Y1)); 04039 uint16x8_t v_r = vcombine_u16(vqmovun_s32(v_Z0), vqmovun_s32(v_Z1)); 04040 04041 if (dcn == 3) 04042 { 04043 uint16x8x3_t v_dst; 04044 v_dst.val[0] = v_b; 04045 v_dst.val[1] = v_g; 04046 v_dst.val[2] = v_r; 04047 vst3q_u16(dst, v_dst); 04048 } 04049 else 04050 { 04051 uint16x8x4_t v_dst; 04052 v_dst.val[0] = v_b; 04053 v_dst.val[1] = v_g; 04054 v_dst.val[2] = v_r; 04055 v_dst.val[3] = v_alpha; 04056 vst4q_u16(dst, v_dst); 04057 } 04058 } 04059 04060 for ( ; i <= n - 12; i += 12, dst += dcn * 4) 04061 { 04062 uint16x4x3_t v_src = vld3_u16(src + i); 04063 int32x4_t v_s0 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[0])), 04064 v_s1 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[1])), 04065 v_s2 = vreinterpretq_s32_u32(vmovl_u16(v_src.val[2])); 04066 04067 int32x4_t v_X = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c0), v_s1, v_c1), v_s2, v_c2); 04068 int32x4_t v_Y = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c3), v_s1, v_c4), v_s2, v_c5); 04069 int32x4_t v_Z = vmlaq_s32(vmlaq_s32(vmulq_s32(v_s0, v_c6), v_s1, v_c7), v_s2, v_c8); 04070 v_X = vshrq_n_s32(vaddq_s32(v_X, v_delta), xyz_shift); 04071 v_Y = vshrq_n_s32(vaddq_s32(v_Y, v_delta), xyz_shift); 04072 v_Z = vshrq_n_s32(vaddq_s32(v_Z, v_delta), xyz_shift); 04073 04074 uint16x4_t v_b = vqmovun_s32(v_X); 04075 uint16x4_t v_g = vqmovun_s32(v_Y); 04076 uint16x4_t v_r = vqmovun_s32(v_Z); 04077 04078 if (dcn == 3) 04079 { 04080 uint16x4x3_t v_dst; 04081 v_dst.val[0] = v_b; 04082 v_dst.val[1] = v_g; 04083 v_dst.val[2] = v_r; 04084 vst3_u16(dst, v_dst); 04085 } 04086 else 04087 { 04088 uint16x4x4_t v_dst; 04089 v_dst.val[0] = v_b; 04090 v_dst.val[1] = v_g; 04091 v_dst.val[2] = v_r; 04092 v_dst.val[3] = v_alpha2; 04093 vst4_u16(dst, v_dst); 04094 } 04095 } 04096 04097 for ( ; i < n; i += 3, dst += dcn) 04098 { 04099 int B = CV_DESCALE(src[i]*C0 + src[i+1]*C1 + src[i+2]*C2, xyz_shift); 04100 int G = CV_DESCALE(src[i]*C3 + src[i+1]*C4 + src[i+2]*C5, xyz_shift); 04101 int R = CV_DESCALE(src[i]*C6 + src[i+1]*C7 + src[i+2]*C8, xyz_shift); 04102 dst[0] = saturate_cast<ushort>(B); dst[1] = saturate_cast<ushort>(G); 04103 dst[2] = saturate_cast<ushort>(R); 04104 if( dcn == 4 ) 04105 dst[3] = alpha; 04106 } 04107 } 04108 int dstcn, blueIdx; 04109 int coeffs[9]; 04110 04111 int32x4_t v_c0, v_c1, v_c2, v_c3, v_c4, v_c5, v_c6, v_c7, v_c8, v_delta; 04112 uint16x4_t v_alpha2; 04113 uint16x8_t v_alpha; 04114 }; 04115 04116 #endif 04117 04118 ////////////////////////////////////// RGB <-> HSV /////////////////////////////////////// 04119 04120 04121 struct RGB2HSV_b 04122 { 04123 typedef uchar channel_type; 04124 04125 RGB2HSV_b(int _srccn, int _blueIdx, int _hrange) 04126 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) 04127 { 04128 CV_Assert( hrange == 180 || hrange == 256 ); 04129 } 04130 04131 void operator()(const uchar* src, uchar* dst, int n) const 04132 { 04133 int i, bidx = blueIdx, scn = srccn; 04134 const int hsv_shift = 12; 04135 04136 static int sdiv_table[256]; 04137 static int hdiv_table180[256]; 04138 static int hdiv_table256[256]; 04139 static volatile bool initialized = false; 04140 04141 int hr = hrange; 04142 const int* hdiv_table = hr == 180 ? hdiv_table180 : hdiv_table256; 04143 n *= 3; 04144 04145 if( !initialized ) 04146 { 04147 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0; 04148 for( i = 1; i < 256; i++ ) 04149 { 04150 sdiv_table[i] = saturate_cast<int>((255 << hsv_shift)/(1.*i)); 04151 hdiv_table180[i] = saturate_cast<int>((180 << hsv_shift)/(6.*i)); 04152 hdiv_table256[i] = saturate_cast<int>((256 << hsv_shift)/(6.*i)); 04153 } 04154 initialized = true; 04155 } 04156 04157 for( i = 0; i < n; i += 3, src += scn ) 04158 { 04159 int b = src[bidx], g = src[1], r = src[bidx^2]; 04160 int h, s, v = b; 04161 int vmin = b, diff; 04162 int vr, vg; 04163 04164 CV_CALC_MAX_8U( v, g ); 04165 CV_CALC_MAX_8U( v, r ); 04166 CV_CALC_MIN_8U( vmin, g ); 04167 CV_CALC_MIN_8U( vmin, r ); 04168 04169 diff = v - vmin; 04170 vr = v == r ? -1 : 0; 04171 vg = v == g ? -1 : 0; 04172 04173 s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift; 04174 h = (vr & (g - b)) + 04175 (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff)))); 04176 h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift; 04177 h += h < 0 ? hr : 0; 04178 04179 dst[i] = saturate_cast<uchar>(h); 04180 dst[i+1] = (uchar)s; 04181 dst[i+2] = (uchar)v; 04182 } 04183 } 04184 04185 int srccn, blueIdx, hrange; 04186 }; 04187 04188 04189 struct RGB2HSV_f 04190 { 04191 typedef float channel_type; 04192 04193 RGB2HSV_f(int _srccn, int _blueIdx, float _hrange) 04194 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {} 04195 04196 void operator()(const float* src, float* dst, int n) const 04197 { 04198 int i, bidx = blueIdx, scn = srccn; 04199 float hscale = hrange*(1.f/360.f); 04200 n *= 3; 04201 04202 for( i = 0; i < n; i += 3, src += scn ) 04203 { 04204 float b = src[bidx], g = src[1], r = src[bidx^2]; 04205 float h, s, v; 04206 04207 float vmin, diff; 04208 04209 v = vmin = r; 04210 if( v < g ) v = g; 04211 if( v < b ) v = b; 04212 if( vmin > g ) vmin = g; 04213 if( vmin > b ) vmin = b; 04214 04215 diff = v - vmin; 04216 s = diff/(float)(fabs(v) + FLT_EPSILON); 04217 diff = (float)(60./(diff + FLT_EPSILON)); 04218 if( v == r ) 04219 h = (g - b)*diff; 04220 else if( v == g ) 04221 h = (b - r)*diff + 120.f; 04222 else 04223 h = (r - g)*diff + 240.f; 04224 04225 if( h < 0 ) h += 360.f; 04226 04227 dst[i] = h*hscale; 04228 dst[i+1] = s; 04229 dst[i+2] = v; 04230 } 04231 } 04232 04233 int srccn, blueIdx; 04234 float hrange; 04235 }; 04236 04237 04238 struct HSV2RGB_f 04239 { 04240 typedef float channel_type; 04241 04242 HSV2RGB_f(int _dstcn, int _blueIdx, float _hrange) 04243 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {} 04244 04245 void operator()(const float* src, float* dst, int n) const 04246 { 04247 int i, bidx = blueIdx, dcn = dstcn; 04248 float _hscale = hscale; 04249 float alpha = ColorChannel<float>::max(); 04250 n *= 3; 04251 04252 for( i = 0; i < n; i += 3, dst += dcn ) 04253 { 04254 float h = src[i], s = src[i+1], v = src[i+2]; 04255 float b, g, r; 04256 04257 if( s == 0 ) 04258 b = g = r = v; 04259 else 04260 { 04261 static const int sector_data[][3]= 04262 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}}; 04263 float tab[4]; 04264 int sector; 04265 h *= _hscale; 04266 if( h < 0 ) 04267 do h += 6; while( h < 0 ); 04268 else if( h >= 6 ) 04269 do h -= 6; while( h >= 6 ); 04270 sector = cvFloor(h); 04271 h -= sector; 04272 if( (unsigned)sector >= 6u ) 04273 { 04274 sector = 0; 04275 h = 0.f; 04276 } 04277 04278 tab[0] = v; 04279 tab[1] = v*(1.f - s); 04280 tab[2] = v*(1.f - s*h); 04281 tab[3] = v*(1.f - s*(1.f - h)); 04282 04283 b = tab[sector_data[sector][0]]; 04284 g = tab[sector_data[sector][1]]; 04285 r = tab[sector_data[sector][2]]; 04286 } 04287 04288 dst[bidx] = b; 04289 dst[1] = g; 04290 dst[bidx^2] = r; 04291 if( dcn == 4 ) 04292 dst[3] = alpha; 04293 } 04294 } 04295 04296 int dstcn, blueIdx; 04297 float hscale; 04298 }; 04299 04300 04301 struct HSV2RGB_b 04302 { 04303 typedef uchar channel_type; 04304 04305 HSV2RGB_b(int _dstcn, int _blueIdx, int _hrange) 04306 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange) 04307 { 04308 #if CV_NEON 04309 v_scale_inv = vdupq_n_f32(1.f/255.f); 04310 v_scale = vdupq_n_f32(255.f); 04311 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 04312 #elif CV_SSE2 04313 v_scale_inv = _mm_set1_ps(1.f/255.f); 04314 v_scale = _mm_set1_ps(255.0f); 04315 v_zero = _mm_setzero_si128(); 04316 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 04317 #endif 04318 } 04319 04320 #if CV_SSE2 04321 // 16s x 8 04322 void process(__m128i v_r, __m128i v_g, __m128i v_b, 04323 float * buf) const 04324 { 04325 __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero)); 04326 __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero)); 04327 __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero)); 04328 04329 __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero)); 04330 __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero)); 04331 __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero)); 04332 04333 v_g0 = _mm_mul_ps(v_g0, v_scale_inv); 04334 v_b0 = _mm_mul_ps(v_b0, v_scale_inv); 04335 04336 v_g1 = _mm_mul_ps(v_g1, v_scale_inv); 04337 v_b1 = _mm_mul_ps(v_b1, v_scale_inv); 04338 04339 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 04340 04341 _mm_store_ps(buf, v_r0); 04342 _mm_store_ps(buf + 4, v_r1); 04343 _mm_store_ps(buf + 8, v_g0); 04344 _mm_store_ps(buf + 12, v_g1); 04345 _mm_store_ps(buf + 16, v_b0); 04346 _mm_store_ps(buf + 20, v_b1); 04347 } 04348 #endif 04349 04350 void operator()(const uchar* src, uchar* dst, int n) const 04351 { 04352 int i, j, dcn = dstcn; 04353 uchar alpha = ColorChannel<uchar>::max(); 04354 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 04355 04356 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) 04357 { 04358 int dn = std::min(n - i, (int)BLOCK_SIZE); 04359 j = 0; 04360 04361 #if CV_NEON 04362 for ( ; j <= (dn - 8) * 3; j += 24) 04363 { 04364 uint8x8x3_t v_src = vld3_u8(src + j); 04365 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]), 04366 v_t1 = vmovl_u8(v_src.val[1]), 04367 v_t2 = vmovl_u8(v_src.val[2]); 04368 04369 float32x4x3_t v_dst; 04370 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))); 04371 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv); 04372 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv); 04373 vst3q_f32(buf + j, v_dst); 04374 04375 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))); 04376 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv); 04377 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv); 04378 vst3q_f32(buf + j + 12, v_dst); 04379 } 04380 #elif CV_SSE2 04381 if (haveSIMD) 04382 { 04383 for ( ; j <= (dn - 32) * 3; j += 96) 04384 { 04385 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j)); 04386 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16)); 04387 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32)); 04388 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48)); 04389 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64)); 04390 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80)); 04391 04392 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 04393 04394 process(_mm_unpacklo_epi8(v_r0, v_zero), 04395 _mm_unpacklo_epi8(v_g0, v_zero), 04396 _mm_unpacklo_epi8(v_b0, v_zero), 04397 buf + j); 04398 04399 process(_mm_unpackhi_epi8(v_r0, v_zero), 04400 _mm_unpackhi_epi8(v_g0, v_zero), 04401 _mm_unpackhi_epi8(v_b0, v_zero), 04402 buf + j + 24); 04403 04404 process(_mm_unpacklo_epi8(v_r1, v_zero), 04405 _mm_unpacklo_epi8(v_g1, v_zero), 04406 _mm_unpacklo_epi8(v_b1, v_zero), 04407 buf + j + 48); 04408 04409 process(_mm_unpackhi_epi8(v_r1, v_zero), 04410 _mm_unpackhi_epi8(v_g1, v_zero), 04411 _mm_unpackhi_epi8(v_b1, v_zero), 04412 buf + j + 72); 04413 } 04414 } 04415 #endif 04416 04417 for( ; j < dn*3; j += 3 ) 04418 { 04419 buf[j] = src[j]; 04420 buf[j+1] = src[j+1]*(1.f/255.f); 04421 buf[j+2] = src[j+2]*(1.f/255.f); 04422 } 04423 cvt(buf, buf, dn); 04424 04425 j = 0; 04426 #if CV_NEON 04427 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8) 04428 { 04429 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 04430 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 04431 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 04432 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 04433 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 04434 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 04435 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 04436 04437 if (dcn == 4) 04438 { 04439 uint8x8x4_t v_dst; 04440 v_dst.val[0] = v_dst0; 04441 v_dst.val[1] = v_dst1; 04442 v_dst.val[2] = v_dst2; 04443 v_dst.val[3] = v_alpha; 04444 vst4_u8(dst, v_dst); 04445 } 04446 else 04447 { 04448 uint8x8x3_t v_dst; 04449 v_dst.val[0] = v_dst0; 04450 v_dst.val[1] = v_dst1; 04451 v_dst.val[2] = v_dst2; 04452 vst3_u8(dst, v_dst); 04453 } 04454 } 04455 #elif CV_SSE2 04456 if (dcn == 3 && haveSIMD) 04457 { 04458 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16) 04459 { 04460 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); 04461 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); 04462 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); 04463 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale); 04464 04465 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0), 04466 _mm_cvtps_epi32(v_src1)); 04467 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2), 04468 _mm_cvtps_epi32(v_src3)); 04469 04470 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); 04471 } 04472 04473 int jr = j % 3; 04474 if (jr) 04475 dst -= jr, j -= jr; 04476 } 04477 #endif 04478 04479 for( ; j < dn*3; j += 3, dst += dcn ) 04480 { 04481 dst[0] = saturate_cast<uchar>(buf[j]*255.f); 04482 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f); 04483 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f); 04484 if( dcn == 4 ) 04485 dst[3] = alpha; 04486 } 04487 } 04488 } 04489 04490 int dstcn; 04491 HSV2RGB_f cvt; 04492 #if CV_NEON 04493 float32x4_t v_scale, v_scale_inv; 04494 uint8x8_t v_alpha; 04495 #elif CV_SSE2 04496 __m128 v_scale_inv, v_scale; 04497 __m128i v_zero; 04498 bool haveSIMD; 04499 #endif 04500 }; 04501 04502 04503 ///////////////////////////////////// RGB <-> HLS //////////////////////////////////////// 04504 04505 struct RGB2HLS_f 04506 { 04507 typedef float channel_type; 04508 04509 RGB2HLS_f(int _srccn, int _blueIdx, float _hrange) 04510 : srccn(_srccn), blueIdx(_blueIdx), hrange(_hrange) {} 04511 04512 void operator()(const float* src, float* dst, int n) const 04513 { 04514 int i, bidx = blueIdx, scn = srccn; 04515 float hscale = hrange*(1.f/360.f); 04516 n *= 3; 04517 04518 for( i = 0; i < n; i += 3, src += scn ) 04519 { 04520 float b = src[bidx], g = src[1], r = src[bidx^2]; 04521 float h = 0.f, s = 0.f, l; 04522 float vmin, vmax, diff; 04523 04524 vmax = vmin = r; 04525 if( vmax < g ) vmax = g; 04526 if( vmax < b ) vmax = b; 04527 if( vmin > g ) vmin = g; 04528 if( vmin > b ) vmin = b; 04529 04530 diff = vmax - vmin; 04531 l = (vmax + vmin)*0.5f; 04532 04533 if( diff > FLT_EPSILON ) 04534 { 04535 s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin); 04536 diff = 60.f/diff; 04537 04538 if( vmax == r ) 04539 h = (g - b)*diff; 04540 else if( vmax == g ) 04541 h = (b - r)*diff + 120.f; 04542 else 04543 h = (r - g)*diff + 240.f; 04544 04545 if( h < 0.f ) h += 360.f; 04546 } 04547 04548 dst[i] = h*hscale; 04549 dst[i+1] = l; 04550 dst[i+2] = s; 04551 } 04552 } 04553 04554 int srccn, blueIdx; 04555 float hrange; 04556 }; 04557 04558 04559 struct RGB2HLS_b 04560 { 04561 typedef uchar channel_type; 04562 04563 RGB2HLS_b(int _srccn, int _blueIdx, int _hrange) 04564 : srccn(_srccn), cvt(3, _blueIdx, (float)_hrange) 04565 { 04566 #if CV_NEON 04567 v_scale_inv = vdupq_n_f32(1.f/255.f); 04568 v_scale = vdupq_n_f32(255.f); 04569 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 04570 #elif CV_SSE2 04571 v_scale_inv = _mm_set1_ps(1.f/255.f); 04572 v_scale = _mm_set1_ps(255.f); 04573 v_zero = _mm_setzero_si128(); 04574 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 04575 #endif 04576 } 04577 04578 #if CV_SSE2 04579 void process(const float * buf, 04580 __m128i & v_h, __m128i & v_l, __m128i & v_s) const 04581 { 04582 __m128 v_h0f = _mm_load_ps(buf); 04583 __m128 v_h1f = _mm_load_ps(buf + 4); 04584 __m128 v_l0f = _mm_load_ps(buf + 8); 04585 __m128 v_l1f = _mm_load_ps(buf + 12); 04586 __m128 v_s0f = _mm_load_ps(buf + 16); 04587 __m128 v_s1f = _mm_load_ps(buf + 20); 04588 04589 _mm_deinterleave_ps(v_h0f, v_h1f, v_l0f, v_l1f, v_s0f, v_s1f); 04590 04591 v_l0f = _mm_mul_ps(v_l0f, v_scale); 04592 v_l1f = _mm_mul_ps(v_l1f, v_scale); 04593 v_s0f = _mm_mul_ps(v_s0f, v_scale); 04594 v_s1f = _mm_mul_ps(v_s1f, v_scale); 04595 04596 v_h = _mm_packs_epi32(_mm_cvtps_epi32(v_h0f), _mm_cvtps_epi32(v_h1f)); 04597 v_l = _mm_packs_epi32(_mm_cvtps_epi32(v_l0f), _mm_cvtps_epi32(v_l1f)); 04598 v_s = _mm_packs_epi32(_mm_cvtps_epi32(v_s0f), _mm_cvtps_epi32(v_s1f)); 04599 } 04600 #endif 04601 04602 void operator()(const uchar* src, uchar* dst, int n) const 04603 { 04604 int i, j, scn = srccn; 04605 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 04606 04607 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 ) 04608 { 04609 int dn = std::min(n - i, (int)BLOCK_SIZE); 04610 j = 0; 04611 04612 #if CV_NEON 04613 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn) 04614 { 04615 uint16x8_t v_t0, v_t1, v_t2; 04616 04617 if (scn == 3) 04618 { 04619 uint8x8x3_t v_src = vld3_u8(src); 04620 v_t0 = vmovl_u8(v_src.val[0]); 04621 v_t1 = vmovl_u8(v_src.val[1]); 04622 v_t2 = vmovl_u8(v_src.val[2]); 04623 } 04624 else 04625 { 04626 uint8x8x4_t v_src = vld4_u8(src); 04627 v_t0 = vmovl_u8(v_src.val[0]); 04628 v_t1 = vmovl_u8(v_src.val[1]); 04629 v_t2 = vmovl_u8(v_src.val[2]); 04630 } 04631 04632 float32x4x3_t v_dst; 04633 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); 04634 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv); 04635 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv); 04636 vst3q_f32(buf + j, v_dst); 04637 04638 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); 04639 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv); 04640 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv); 04641 vst3q_f32(buf + j + 12, v_dst); 04642 } 04643 #elif CV_SSE2 04644 if (scn == 3 && haveSIMD) 04645 { 04646 for ( ; j <= (dn * 3 - 16); j += 16, src += 16) 04647 { 04648 __m128i v_src = _mm_loadu_si128((__m128i const *)src); 04649 04650 __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero); 04651 _mm_store_ps(buf + j, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); 04652 _mm_store_ps(buf + j + 4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); 04653 04654 v_src_p = _mm_unpackhi_epi8(v_src, v_zero); 04655 _mm_store_ps(buf + j + 8, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); 04656 _mm_store_ps(buf + j + 12, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); 04657 } 04658 04659 int jr = j % 3; 04660 if (jr) 04661 src -= jr, j -= jr; 04662 } 04663 #endif 04664 for( ; j < dn*3; j += 3, src += scn ) 04665 { 04666 buf[j] = src[0]*(1.f/255.f); 04667 buf[j+1] = src[1]*(1.f/255.f); 04668 buf[j+2] = src[2]*(1.f/255.f); 04669 } 04670 cvt(buf, buf, dn); 04671 04672 j = 0; 04673 #if CV_NEON 04674 for ( ; j <= (dn - 8) * 3; j += 24) 04675 { 04676 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 04677 04678 uint8x8x3_t v_dst; 04679 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(v_src0.val[0])), 04680 vqmovn_u32(cv_vrndq_u32_f32(v_src1.val[0])))); 04681 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 04682 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 04683 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 04684 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 04685 vst3_u8(dst + j, v_dst); 04686 } 04687 #elif CV_SSE2 04688 if (haveSIMD) 04689 { 04690 for ( ; j <= (dn - 32) * 3; j += 96) 04691 { 04692 __m128i v_h_0, v_l_0, v_s_0; 04693 process(buf + j, 04694 v_h_0, v_l_0, v_s_0); 04695 04696 __m128i v_h_1, v_l_1, v_s_1; 04697 process(buf + j + 24, 04698 v_h_1, v_l_1, v_s_1); 04699 04700 __m128i v_h0 = _mm_packus_epi16(v_h_0, v_h_1); 04701 __m128i v_l0 = _mm_packus_epi16(v_l_0, v_l_1); 04702 __m128i v_s0 = _mm_packus_epi16(v_s_0, v_s_1); 04703 04704 process(buf + j + 48, 04705 v_h_0, v_l_0, v_s_0); 04706 04707 process(buf + j + 72, 04708 v_h_1, v_l_1, v_s_1); 04709 04710 __m128i v_h1 = _mm_packus_epi16(v_h_0, v_h_1); 04711 __m128i v_l1 = _mm_packus_epi16(v_l_0, v_l_1); 04712 __m128i v_s1 = _mm_packus_epi16(v_s_0, v_s_1); 04713 04714 _mm_interleave_epi8(v_h0, v_h1, v_l0, v_l1, v_s0, v_s1); 04715 04716 _mm_storeu_si128((__m128i *)(dst + j), v_h0); 04717 _mm_storeu_si128((__m128i *)(dst + j + 16), v_h1); 04718 _mm_storeu_si128((__m128i *)(dst + j + 32), v_l0); 04719 _mm_storeu_si128((__m128i *)(dst + j + 48), v_l1); 04720 _mm_storeu_si128((__m128i *)(dst + j + 64), v_s0); 04721 _mm_storeu_si128((__m128i *)(dst + j + 80), v_s1); 04722 } 04723 } 04724 #endif 04725 for( ; j < dn*3; j += 3 ) 04726 { 04727 dst[j] = saturate_cast<uchar>(buf[j]); 04728 dst[j+1] = saturate_cast<uchar>(buf[j+1]*255.f); 04729 dst[j+2] = saturate_cast<uchar>(buf[j+2]*255.f); 04730 } 04731 } 04732 } 04733 04734 int srccn; 04735 RGB2HLS_f cvt; 04736 #if CV_NEON 04737 float32x4_t v_scale, v_scale_inv; 04738 uint8x8_t v_alpha; 04739 #elif CV_SSE2 04740 __m128 v_scale, v_scale_inv; 04741 __m128i v_zero; 04742 bool haveSIMD; 04743 #endif 04744 }; 04745 04746 04747 struct HLS2RGB_f 04748 { 04749 typedef float channel_type; 04750 04751 HLS2RGB_f(int _dstcn, int _blueIdx, float _hrange) 04752 : dstcn(_dstcn), blueIdx(_blueIdx), hscale(6.f/_hrange) {} 04753 04754 void operator()(const float* src, float* dst, int n) const 04755 { 04756 int i, bidx = blueIdx, dcn = dstcn; 04757 float _hscale = hscale; 04758 float alpha = ColorChannel<float>::max(); 04759 n *= 3; 04760 04761 for( i = 0; i < n; i += 3, dst += dcn ) 04762 { 04763 float h = src[i], l = src[i+1], s = src[i+2]; 04764 float b, g, r; 04765 04766 if( s == 0 ) 04767 b = g = r = l; 04768 else 04769 { 04770 static const int sector_data[][3]= 04771 {{1,3,0}, {1,0,2}, {3,0,1}, {0,2,1}, {0,1,3}, {2,1,0}}; 04772 float tab[4]; 04773 int sector; 04774 04775 float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s; 04776 float p1 = 2*l - p2; 04777 04778 h *= _hscale; 04779 if( h < 0 ) 04780 do h += 6; while( h < 0 ); 04781 else if( h >= 6 ) 04782 do h -= 6; while( h >= 6 ); 04783 04784 assert( 0 <= h && h < 6 ); 04785 sector = cvFloor(h); 04786 h -= sector; 04787 04788 tab[0] = p2; 04789 tab[1] = p1; 04790 tab[2] = p1 + (p2 - p1)*(1-h); 04791 tab[3] = p1 + (p2 - p1)*h; 04792 04793 b = tab[sector_data[sector][0]]; 04794 g = tab[sector_data[sector][1]]; 04795 r = tab[sector_data[sector][2]]; 04796 } 04797 04798 dst[bidx] = b; 04799 dst[1] = g; 04800 dst[bidx^2] = r; 04801 if( dcn == 4 ) 04802 dst[3] = alpha; 04803 } 04804 } 04805 04806 int dstcn, blueIdx; 04807 float hscale; 04808 }; 04809 04810 04811 struct HLS2RGB_b 04812 { 04813 typedef uchar channel_type; 04814 04815 HLS2RGB_b(int _dstcn, int _blueIdx, int _hrange) 04816 : dstcn(_dstcn), cvt(3, _blueIdx, (float)_hrange) 04817 { 04818 #if CV_NEON 04819 v_scale_inv = vdupq_n_f32(1.f/255.f); 04820 v_scale = vdupq_n_f32(255.f); 04821 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 04822 #elif CV_SSE2 04823 v_scale_inv = _mm_set1_ps(1.f/255.f); 04824 v_scale = _mm_set1_ps(255.f); 04825 v_zero = _mm_setzero_si128(); 04826 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 04827 #endif 04828 } 04829 04830 #if CV_SSE2 04831 // 16s x 8 04832 void process(__m128i v_r, __m128i v_g, __m128i v_b, 04833 float * buf) const 04834 { 04835 __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero)); 04836 __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero)); 04837 __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero)); 04838 04839 __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero)); 04840 __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero)); 04841 __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero)); 04842 04843 v_g0 = _mm_mul_ps(v_g0, v_scale_inv); 04844 v_b0 = _mm_mul_ps(v_b0, v_scale_inv); 04845 04846 v_g1 = _mm_mul_ps(v_g1, v_scale_inv); 04847 v_b1 = _mm_mul_ps(v_b1, v_scale_inv); 04848 04849 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 04850 04851 _mm_store_ps(buf, v_r0); 04852 _mm_store_ps(buf + 4, v_r1); 04853 _mm_store_ps(buf + 8, v_g0); 04854 _mm_store_ps(buf + 12, v_g1); 04855 _mm_store_ps(buf + 16, v_b0); 04856 _mm_store_ps(buf + 20, v_b1); 04857 } 04858 #endif 04859 04860 void operator()(const uchar* src, uchar* dst, int n) const 04861 { 04862 int i, j, dcn = dstcn; 04863 uchar alpha = ColorChannel<uchar>::max(); 04864 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 04865 04866 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) 04867 { 04868 int dn = std::min(n - i, (int)BLOCK_SIZE); 04869 j = 0; 04870 04871 #if CV_NEON 04872 for ( ; j <= (dn - 8) * 3; j += 24) 04873 { 04874 uint8x8x3_t v_src = vld3_u8(src + j); 04875 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]), 04876 v_t1 = vmovl_u8(v_src.val[1]), 04877 v_t2 = vmovl_u8(v_src.val[2]); 04878 04879 float32x4x3_t v_dst; 04880 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))); 04881 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv); 04882 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv); 04883 vst3q_f32(buf + j, v_dst); 04884 04885 v_dst.val[0] = vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))); 04886 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv); 04887 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv); 04888 vst3q_f32(buf + j + 12, v_dst); 04889 } 04890 #elif CV_SSE2 04891 if (haveSIMD) 04892 { 04893 for ( ; j <= (dn - 32) * 3; j += 96) 04894 { 04895 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j)); 04896 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16)); 04897 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32)); 04898 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48)); 04899 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64)); 04900 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80)); 04901 04902 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 04903 04904 process(_mm_unpacklo_epi8(v_r0, v_zero), 04905 _mm_unpacklo_epi8(v_g0, v_zero), 04906 _mm_unpacklo_epi8(v_b0, v_zero), 04907 buf + j); 04908 04909 process(_mm_unpackhi_epi8(v_r0, v_zero), 04910 _mm_unpackhi_epi8(v_g0, v_zero), 04911 _mm_unpackhi_epi8(v_b0, v_zero), 04912 buf + j + 24); 04913 04914 process(_mm_unpacklo_epi8(v_r1, v_zero), 04915 _mm_unpacklo_epi8(v_g1, v_zero), 04916 _mm_unpacklo_epi8(v_b1, v_zero), 04917 buf + j + 48); 04918 04919 process(_mm_unpackhi_epi8(v_r1, v_zero), 04920 _mm_unpackhi_epi8(v_g1, v_zero), 04921 _mm_unpackhi_epi8(v_b1, v_zero), 04922 buf + j + 72); 04923 } 04924 } 04925 #endif 04926 for( ; j < dn*3; j += 3 ) 04927 { 04928 buf[j] = src[j]; 04929 buf[j+1] = src[j+1]*(1.f/255.f); 04930 buf[j+2] = src[j+2]*(1.f/255.f); 04931 } 04932 cvt(buf, buf, dn); 04933 04934 j = 0; 04935 #if CV_NEON 04936 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8) 04937 { 04938 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 04939 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 04940 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 04941 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 04942 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 04943 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 04944 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 04945 04946 if (dcn == 4) 04947 { 04948 uint8x8x4_t v_dst; 04949 v_dst.val[0] = v_dst0; 04950 v_dst.val[1] = v_dst1; 04951 v_dst.val[2] = v_dst2; 04952 v_dst.val[3] = v_alpha; 04953 vst4_u8(dst, v_dst); 04954 } 04955 else 04956 { 04957 uint8x8x3_t v_dst; 04958 v_dst.val[0] = v_dst0; 04959 v_dst.val[1] = v_dst1; 04960 v_dst.val[2] = v_dst2; 04961 vst3_u8(dst, v_dst); 04962 } 04963 } 04964 #elif CV_SSE2 04965 if (dcn == 3 && haveSIMD) 04966 { 04967 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16) 04968 { 04969 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); 04970 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); 04971 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); 04972 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale); 04973 04974 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0), 04975 _mm_cvtps_epi32(v_src1)); 04976 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2), 04977 _mm_cvtps_epi32(v_src3)); 04978 04979 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); 04980 } 04981 04982 int jr = j % 3; 04983 if (jr) 04984 dst -= jr, j -= jr; 04985 } 04986 #endif 04987 04988 for( ; j < dn*3; j += 3, dst += dcn ) 04989 { 04990 dst[0] = saturate_cast<uchar>(buf[j]*255.f); 04991 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f); 04992 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f); 04993 if( dcn == 4 ) 04994 dst[3] = alpha; 04995 } 04996 } 04997 } 04998 04999 int dstcn; 05000 HLS2RGB_f cvt; 05001 #if CV_NEON 05002 float32x4_t v_scale, v_scale_inv; 05003 uint8x8_t v_alpha; 05004 #elif CV_SSE2 05005 __m128 v_scale, v_scale_inv; 05006 __m128i v_zero; 05007 bool haveSIMD; 05008 #endif 05009 }; 05010 05011 05012 ///////////////////////////////////// RGB <-> L*a*b* ///////////////////////////////////// 05013 05014 static const float D65[] = { 0.950456f, 1.f, 1.088754f }; 05015 05016 enum { LAB_CBRT_TAB_SIZE = 1024, GAMMA_TAB_SIZE = 1024 }; 05017 static float LabCbrtTab[LAB_CBRT_TAB_SIZE*4]; 05018 static const float LabCbrtTabScale = LAB_CBRT_TAB_SIZE/1.5f; 05019 05020 static float sRGBGammaTab[GAMMA_TAB_SIZE*4], sRGBInvGammaTab[GAMMA_TAB_SIZE*4]; 05021 static const float GammaTabScale = (float)GAMMA_TAB_SIZE; 05022 05023 static ushort sRGBGammaTab_b[256], linearGammaTab_b[256]; 05024 #undef lab_shift 05025 #define lab_shift xyz_shift 05026 #define gamma_shift 3 05027 #define lab_shift2 (lab_shift + gamma_shift) 05028 #define LAB_CBRT_TAB_SIZE_B (256*3/2*(1<<gamma_shift)) 05029 static ushort LabCbrtTab_b[LAB_CBRT_TAB_SIZE_B]; 05030 05031 static void initLabTabs() 05032 { 05033 static bool initialized = false; 05034 if(!initialized) 05035 { 05036 float f[LAB_CBRT_TAB_SIZE+1], g[GAMMA_TAB_SIZE+1], ig[GAMMA_TAB_SIZE+1], scale = 1.f/LabCbrtTabScale; 05037 int i; 05038 for(i = 0; i <= LAB_CBRT_TAB_SIZE; i++) 05039 { 05040 float x = i*scale; 05041 f[i] = x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x); 05042 } 05043 splineBuild(f, LAB_CBRT_TAB_SIZE, LabCbrtTab); 05044 05045 scale = 1.f/GammaTabScale; 05046 for(i = 0; i <= GAMMA_TAB_SIZE; i++) 05047 { 05048 float x = i*scale; 05049 g[i] = x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4); 05050 ig[i] = x <= 0.0031308 ? x*12.92f : (float)(1.055*std::pow((double)x, 1./2.4) - 0.055); 05051 } 05052 splineBuild(g, GAMMA_TAB_SIZE, sRGBGammaTab); 05053 splineBuild(ig, GAMMA_TAB_SIZE, sRGBInvGammaTab); 05054 05055 for(i = 0; i < 256; i++) 05056 { 05057 float x = i*(1.f/255.f); 05058 sRGBGammaTab_b[i] = saturate_cast<ushort>(255.f*(1 << gamma_shift)*(x <= 0.04045f ? x*(1.f/12.92f) : (float)std::pow((double)(x + 0.055)*(1./1.055), 2.4))); 05059 linearGammaTab_b[i] = (ushort)(i*(1 << gamma_shift)); 05060 } 05061 05062 for(i = 0; i < LAB_CBRT_TAB_SIZE_B; i++) 05063 { 05064 float x = i*(1.f/(255.f*(1 << gamma_shift))); 05065 LabCbrtTab_b[i] = saturate_cast<ushort>((1 << lab_shift2)*(x < 0.008856f ? x*7.787f + 0.13793103448275862f : cvCbrt(x))); 05066 } 05067 initialized = true; 05068 } 05069 } 05070 05071 struct RGB2Lab_b 05072 { 05073 typedef uchar channel_type; 05074 05075 RGB2Lab_b(int _srccn, int blueIdx, const float* _coeffs, 05076 const float* _whitept, bool _srgb) 05077 : srccn(_srccn), srgb(_srgb) 05078 { 05079 static volatile int _3 = 3; 05080 initLabTabs(); 05081 05082 if (!_coeffs) 05083 _coeffs = sRGB2XYZ_D65; 05084 if (!_whitept) 05085 _whitept = D65; 05086 05087 float scale[] = 05088 { 05089 (1 << lab_shift)/_whitept[0], 05090 (float)(1 << lab_shift), 05091 (1 << lab_shift)/_whitept[2] 05092 }; 05093 05094 for( int i = 0; i < _3; i++ ) 05095 { 05096 coeffs[i*3+(blueIdx^2)] = cvRound(_coeffs[i*3]*scale[i]); 05097 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]); 05098 coeffs[i*3+blueIdx] = cvRound(_coeffs[i*3+2]*scale[i]); 05099 05100 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 && 05101 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) ); 05102 } 05103 } 05104 05105 void operator()(const uchar* src, uchar* dst, int n) const 05106 { 05107 const int Lscale = (116*255+50)/100; 05108 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100); 05109 const ushort* tab = srgb ? sRGBGammaTab_b : linearGammaTab_b; 05110 int i, scn = srccn; 05111 int C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 05112 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 05113 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 05114 n *= 3; 05115 05116 for( i = 0; i < n; i += 3, src += scn ) 05117 { 05118 int R = tab[src[0]], G = tab[src[1]], B = tab[src[2]]; 05119 int fX = LabCbrtTab_b[CV_DESCALE(R*C0 + G*C1 + B*C2, lab_shift)]; 05120 int fY = LabCbrtTab_b[CV_DESCALE(R*C3 + G*C4 + B*C5, lab_shift)]; 05121 int fZ = LabCbrtTab_b[CV_DESCALE(R*C6 + G*C7 + B*C8, lab_shift)]; 05122 05123 int L = CV_DESCALE( Lscale*fY + Lshift, lab_shift2 ); 05124 int a = CV_DESCALE( 500*(fX - fY) + 128*(1 << lab_shift2), lab_shift2 ); 05125 int b = CV_DESCALE( 200*(fY - fZ) + 128*(1 << lab_shift2), lab_shift2 ); 05126 05127 dst[i] = saturate_cast<uchar>(L); 05128 dst[i+1] = saturate_cast<uchar>(a); 05129 dst[i+2] = saturate_cast<uchar>(b); 05130 } 05131 } 05132 05133 int srccn; 05134 int coeffs[9]; 05135 bool srgb; 05136 }; 05137 05138 05139 #define clip(value) \ 05140 value < 0.0f ? 0.0f : value > 1.0f ? 1.0f : value; 05141 05142 struct RGB2Lab_f 05143 { 05144 typedef float channel_type; 05145 05146 RGB2Lab_f(int _srccn, int blueIdx, const float* _coeffs, 05147 const float* _whitept, bool _srgb) 05148 : srccn(_srccn), srgb(_srgb) 05149 { 05150 volatile int _3 = 3; 05151 initLabTabs(); 05152 05153 if (!_coeffs) 05154 _coeffs = sRGB2XYZ_D65; 05155 if (!_whitept) 05156 _whitept = D65; 05157 05158 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] }; 05159 05160 for( int i = 0; i < _3; i++ ) 05161 { 05162 int j = i * 3; 05163 coeffs[j + (blueIdx ^ 2)] = _coeffs[j] * scale[i]; 05164 coeffs[j + 1] = _coeffs[j + 1] * scale[i]; 05165 coeffs[j + blueIdx] = _coeffs[j + 2] * scale[i]; 05166 05167 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 && 05168 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*LabCbrtTabScale ); 05169 } 05170 } 05171 05172 void operator()(const float* src, float* dst, int n) const 05173 { 05174 int i, scn = srccn; 05175 float gscale = GammaTabScale; 05176 const float* gammaTab = srgb ? sRGBGammaTab : 0; 05177 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 05178 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 05179 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 05180 n *= 3; 05181 05182 static const float _1_3 = 1.0f / 3.0f; 05183 static const float _a = 16.0f / 116.0f; 05184 for (i = 0; i < n; i += 3, src += scn ) 05185 { 05186 float R = clip(src[0]); 05187 float G = clip(src[1]); 05188 float B = clip(src[2]); 05189 05190 if (gammaTab) 05191 { 05192 R = splineInterpolate(R * gscale, gammaTab, GAMMA_TAB_SIZE); 05193 G = splineInterpolate(G * gscale, gammaTab, GAMMA_TAB_SIZE); 05194 B = splineInterpolate(B * gscale, gammaTab, GAMMA_TAB_SIZE); 05195 } 05196 float X = R*C0 + G*C1 + B*C2; 05197 float Y = R*C3 + G*C4 + B*C5; 05198 float Z = R*C6 + G*C7 + B*C8; 05199 05200 float FX = X > 0.008856f ? std::pow(X, _1_3) : (7.787f * X + _a); 05201 float FY = Y > 0.008856f ? std::pow(Y, _1_3) : (7.787f * Y + _a); 05202 float FZ = Z > 0.008856f ? std::pow(Z, _1_3) : (7.787f * Z + _a); 05203 05204 float L = Y > 0.008856f ? (116.f * FY - 16.f) : (903.3f * Y); 05205 float a = 500.f * (FX - FY); 05206 float b = 200.f * (FY - FZ); 05207 05208 dst[i] = L; 05209 dst[i + 1] = a; 05210 dst[i + 2] = b; 05211 } 05212 } 05213 05214 int srccn; 05215 float coeffs[9]; 05216 bool srgb; 05217 }; 05218 05219 struct Lab2RGB_f 05220 { 05221 typedef float channel_type; 05222 05223 Lab2RGB_f( int _dstcn, int blueIdx, const float* _coeffs, 05224 const float* _whitept, bool _srgb ) 05225 : dstcn(_dstcn), srgb(_srgb) 05226 { 05227 initLabTabs(); 05228 05229 if(!_coeffs) 05230 _coeffs = XYZ2sRGB_D65; 05231 if(!_whitept) 05232 _whitept = D65; 05233 05234 for( int i = 0; i < 3; i++ ) 05235 { 05236 coeffs[i+(blueIdx^2)*3] = _coeffs[i]*_whitept[i]; 05237 coeffs[i+3] = _coeffs[i+3]*_whitept[i]; 05238 coeffs[i+blueIdx*3] = _coeffs[i+6]*_whitept[i]; 05239 } 05240 } 05241 05242 void operator()(const float* src, float* dst, int n) const 05243 { 05244 int i, dcn = dstcn; 05245 const float* gammaTab = srgb ? sRGBInvGammaTab : 0; 05246 float gscale = GammaTabScale; 05247 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 05248 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 05249 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 05250 float alpha = ColorChannel<float>::max(); 05251 n *= 3; 05252 05253 static const float lThresh = 0.008856f * 903.3f; 05254 static const float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f; 05255 for (i = 0; i < n; i += 3, dst += dcn) 05256 { 05257 float li = src[i]; 05258 float ai = src[i + 1]; 05259 float bi = src[i + 2]; 05260 05261 float y, fy; 05262 if (li <= lThresh) 05263 { 05264 y = li / 903.3f; 05265 fy = 7.787f * y + 16.0f / 116.0f; 05266 } 05267 else 05268 { 05269 fy = (li + 16.0f) / 116.0f; 05270 y = fy * fy * fy; 05271 } 05272 05273 float fxz[] = { ai / 500.0f + fy, fy - bi / 200.0f }; 05274 05275 for (int j = 0; j < 2; j++) 05276 if (fxz[j] <= fThresh) 05277 fxz[j] = (fxz[j] - 16.0f / 116.0f) / 7.787f; 05278 else 05279 fxz[j] = fxz[j] * fxz[j] * fxz[j]; 05280 05281 05282 float x = fxz[0], z = fxz[1]; 05283 float ro = C0 * x + C1 * y + C2 * z; 05284 float go = C3 * x + C4 * y + C5 * z; 05285 float bo = C6 * x + C7 * y + C8 * z; 05286 ro = clip(ro); 05287 go = clip(go); 05288 bo = clip(bo); 05289 05290 if (gammaTab) 05291 { 05292 ro = splineInterpolate(ro * gscale, gammaTab, GAMMA_TAB_SIZE); 05293 go = splineInterpolate(go * gscale, gammaTab, GAMMA_TAB_SIZE); 05294 bo = splineInterpolate(bo * gscale, gammaTab, GAMMA_TAB_SIZE); 05295 } 05296 05297 dst[0] = ro, dst[1] = go, dst[2] = bo; 05298 if( dcn == 4 ) 05299 dst[3] = alpha; 05300 } 05301 } 05302 05303 int dstcn; 05304 float coeffs[9]; 05305 bool srgb; 05306 }; 05307 05308 #undef clip 05309 05310 struct Lab2RGB_b 05311 { 05312 typedef uchar channel_type; 05313 05314 Lab2RGB_b( int _dstcn, int blueIdx, const float* _coeffs, 05315 const float* _whitept, bool _srgb ) 05316 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb ) 05317 { 05318 #if CV_NEON 05319 v_scale_inv = vdupq_n_f32(100.f/255.f); 05320 v_scale = vdupq_n_f32(255.f); 05321 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 05322 v_128 = vdupq_n_f32(128.0f); 05323 #elif CV_SSE2 05324 v_scale_inv = _mm_set1_ps(100.f/255.f); 05325 v_scale = _mm_set1_ps(255.f); 05326 v_128 = _mm_set1_ps(128.0f); 05327 v_zero = _mm_setzero_si128(); 05328 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 05329 #endif 05330 } 05331 05332 #if CV_SSE2 05333 // 16s x 8 05334 void process(__m128i v_r, __m128i v_g, __m128i v_b, 05335 float * buf) const 05336 { 05337 __m128 v_r0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_r, v_zero)); 05338 __m128 v_g0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_g, v_zero)); 05339 __m128 v_b0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_b, v_zero)); 05340 05341 __m128 v_r1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_r, v_zero)); 05342 __m128 v_g1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_g, v_zero)); 05343 __m128 v_b1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_b, v_zero)); 05344 05345 v_r0 = _mm_mul_ps(v_r0, v_scale_inv); 05346 v_r1 = _mm_mul_ps(v_r1, v_scale_inv); 05347 05348 v_g0 = _mm_sub_ps(v_g0, v_128); 05349 v_g1 = _mm_sub_ps(v_g1, v_128); 05350 v_b0 = _mm_sub_ps(v_b0, v_128); 05351 v_b1 = _mm_sub_ps(v_b1, v_128); 05352 05353 _mm_interleave_ps(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 05354 05355 _mm_store_ps(buf, v_r0); 05356 _mm_store_ps(buf + 4, v_r1); 05357 _mm_store_ps(buf + 8, v_g0); 05358 _mm_store_ps(buf + 12, v_g1); 05359 _mm_store_ps(buf + 16, v_b0); 05360 _mm_store_ps(buf + 20, v_b1); 05361 } 05362 #endif 05363 05364 void operator()(const uchar* src, uchar* dst, int n) const 05365 { 05366 int i, j, dcn = dstcn; 05367 uchar alpha = ColorChannel<uchar>::max(); 05368 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 05369 05370 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) 05371 { 05372 int dn = std::min(n - i, (int)BLOCK_SIZE); 05373 j = 0; 05374 05375 #if CV_NEON 05376 for ( ; j <= (dn - 8) * 3; j += 24) 05377 { 05378 uint8x8x3_t v_src = vld3_u8(src + j); 05379 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]), 05380 v_t1 = vmovl_u8(v_src.val[1]), 05381 v_t2 = vmovl_u8(v_src.val[2]); 05382 05383 float32x4x3_t v_dst; 05384 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); 05385 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_128); 05386 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_128); 05387 vst3q_f32(buf + j, v_dst); 05388 05389 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); 05390 v_dst.val[1] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_128); 05391 v_dst.val[2] = vsubq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_128); 05392 vst3q_f32(buf + j + 12, v_dst); 05393 } 05394 #elif CV_SSE2 05395 if (haveSIMD) 05396 { 05397 for ( ; j <= (dn - 32) * 3; j += 96) 05398 { 05399 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j)); 05400 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16)); 05401 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32)); 05402 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48)); 05403 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64)); 05404 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80)); 05405 05406 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 05407 05408 process(_mm_unpacklo_epi8(v_r0, v_zero), 05409 _mm_unpacklo_epi8(v_g0, v_zero), 05410 _mm_unpacklo_epi8(v_b0, v_zero), 05411 buf + j); 05412 05413 process(_mm_unpackhi_epi8(v_r0, v_zero), 05414 _mm_unpackhi_epi8(v_g0, v_zero), 05415 _mm_unpackhi_epi8(v_b0, v_zero), 05416 buf + j + 24); 05417 05418 process(_mm_unpacklo_epi8(v_r1, v_zero), 05419 _mm_unpacklo_epi8(v_g1, v_zero), 05420 _mm_unpacklo_epi8(v_b1, v_zero), 05421 buf + j + 48); 05422 05423 process(_mm_unpackhi_epi8(v_r1, v_zero), 05424 _mm_unpackhi_epi8(v_g1, v_zero), 05425 _mm_unpackhi_epi8(v_b1, v_zero), 05426 buf + j + 72); 05427 } 05428 } 05429 #endif 05430 05431 for( ; j < dn*3; j += 3 ) 05432 { 05433 buf[j] = src[j]*(100.f/255.f); 05434 buf[j+1] = (float)(src[j+1] - 128); 05435 buf[j+2] = (float)(src[j+2] - 128); 05436 } 05437 cvt(buf, buf, dn); 05438 j = 0; 05439 05440 #if CV_NEON 05441 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8) 05442 { 05443 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 05444 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 05445 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 05446 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 05447 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 05448 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 05449 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 05450 05451 if (dcn == 4) 05452 { 05453 uint8x8x4_t v_dst; 05454 v_dst.val[0] = v_dst0; 05455 v_dst.val[1] = v_dst1; 05456 v_dst.val[2] = v_dst2; 05457 v_dst.val[3] = v_alpha; 05458 vst4_u8(dst, v_dst); 05459 } 05460 else 05461 { 05462 uint8x8x3_t v_dst; 05463 v_dst.val[0] = v_dst0; 05464 v_dst.val[1] = v_dst1; 05465 v_dst.val[2] = v_dst2; 05466 vst3_u8(dst, v_dst); 05467 } 05468 } 05469 #elif CV_SSE2 05470 if (dcn == 3 && haveSIMD) 05471 { 05472 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16) 05473 { 05474 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); 05475 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); 05476 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); 05477 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale); 05478 05479 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0), 05480 _mm_cvtps_epi32(v_src1)); 05481 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2), 05482 _mm_cvtps_epi32(v_src3)); 05483 05484 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); 05485 } 05486 05487 int jr = j % 3; 05488 if (jr) 05489 dst -= jr, j -= jr; 05490 } 05491 #endif 05492 05493 for( ; j < dn*3; j += 3, dst += dcn ) 05494 { 05495 dst[0] = saturate_cast<uchar>(buf[j]*255.f); 05496 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f); 05497 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f); 05498 if( dcn == 4 ) 05499 dst[3] = alpha; 05500 } 05501 } 05502 } 05503 05504 int dstcn; 05505 Lab2RGB_f cvt; 05506 05507 #if CV_NEON 05508 float32x4_t v_scale, v_scale_inv, v_128; 05509 uint8x8_t v_alpha; 05510 #elif CV_SSE2 05511 __m128 v_scale, v_scale_inv, v_128; 05512 __m128i v_zero; 05513 bool haveSIMD; 05514 #endif 05515 }; 05516 05517 05518 ///////////////////////////////////// RGB <-> L*u*v* ///////////////////////////////////// 05519 05520 struct RGB2Luv_f 05521 { 05522 typedef float channel_type; 05523 05524 RGB2Luv_f( int _srccn, int blueIdx, const float* _coeffs, 05525 const float* whitept, bool _srgb ) 05526 : srccn(_srccn), srgb(_srgb) 05527 { 05528 volatile int i; 05529 initLabTabs(); 05530 05531 if(!_coeffs) _coeffs = sRGB2XYZ_D65; 05532 if(!whitept) whitept = D65; 05533 05534 for( i = 0; i < 3; i++ ) 05535 { 05536 coeffs[i*3] = _coeffs[i*3]; 05537 coeffs[i*3+1] = _coeffs[i*3+1]; 05538 coeffs[i*3+2] = _coeffs[i*3+2]; 05539 if( blueIdx == 0 ) 05540 std::swap(coeffs[i*3], coeffs[i*3+2]); 05541 CV_Assert( coeffs[i*3] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 && 05542 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 1.5f ); 05543 } 05544 05545 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3); 05546 un = 4*whitept[0]*d; 05547 vn = 9*whitept[1]*d; 05548 05549 CV_Assert(whitept[1] == 1.f); 05550 } 05551 05552 void operator()(const float* src, float* dst, int n) const 05553 { 05554 int i, scn = srccn; 05555 float gscale = GammaTabScale; 05556 const float* gammaTab = srgb ? sRGBGammaTab : 0; 05557 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 05558 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 05559 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 05560 float _un = 13*un, _vn = 13*vn; 05561 n *= 3; 05562 05563 for( i = 0; i < n; i += 3, src += scn ) 05564 { 05565 float R = src[0], G = src[1], B = src[2]; 05566 if( gammaTab ) 05567 { 05568 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE); 05569 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE); 05570 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE); 05571 } 05572 05573 float X = R*C0 + G*C1 + B*C2; 05574 float Y = R*C3 + G*C4 + B*C5; 05575 float Z = R*C6 + G*C7 + B*C8; 05576 05577 float L = splineInterpolate(Y*LabCbrtTabScale, LabCbrtTab, LAB_CBRT_TAB_SIZE); 05578 L = 116.f*L - 16.f; 05579 05580 float d = (4*13) / std::max(X + 15 * Y + 3 * Z, FLT_EPSILON); 05581 float u = L*(X*d - _un); 05582 float v = L*((9*0.25f)*Y*d - _vn); 05583 05584 dst[i] = L; dst[i+1] = u; dst[i+2] = v; 05585 } 05586 } 05587 05588 int srccn; 05589 float coeffs[9], un, vn; 05590 bool srgb; 05591 }; 05592 05593 05594 struct Luv2RGB_f 05595 { 05596 typedef float channel_type; 05597 05598 Luv2RGB_f( int _dstcn, int blueIdx, const float* _coeffs, 05599 const float* whitept, bool _srgb ) 05600 : dstcn(_dstcn), srgb(_srgb) 05601 { 05602 initLabTabs(); 05603 05604 if(!_coeffs) _coeffs = XYZ2sRGB_D65; 05605 if(!whitept) whitept = D65; 05606 05607 for( int i = 0; i < 3; i++ ) 05608 { 05609 coeffs[i+(blueIdx^2)*3] = _coeffs[i]; 05610 coeffs[i+3] = _coeffs[i+3]; 05611 coeffs[i+blueIdx*3] = _coeffs[i+6]; 05612 } 05613 05614 float d = 1.f/(whitept[0] + whitept[1]*15 + whitept[2]*3); 05615 un = 4*whitept[0]*d; 05616 vn = 9*whitept[1]*d; 05617 05618 CV_Assert(whitept[1] == 1.f); 05619 } 05620 05621 void operator()(const float* src, float* dst, int n) const 05622 { 05623 int i, dcn = dstcn; 05624 const float* gammaTab = srgb ? sRGBInvGammaTab : 0; 05625 float gscale = GammaTabScale; 05626 float C0 = coeffs[0], C1 = coeffs[1], C2 = coeffs[2], 05627 C3 = coeffs[3], C4 = coeffs[4], C5 = coeffs[5], 05628 C6 = coeffs[6], C7 = coeffs[7], C8 = coeffs[8]; 05629 float alpha = ColorChannel<float>::max(); 05630 float _un = un, _vn = vn; 05631 n *= 3; 05632 05633 for( i = 0; i < n; i += 3, dst += dcn ) 05634 { 05635 float L = src[i], u = src[i+1], v = src[i+2], d, X, Y, Z; 05636 Y = (L + 16.f) * (1.f/116.f); 05637 Y = Y*Y*Y; 05638 d = (1.f/13.f)/L; 05639 u = u*d + _un; 05640 v = v*d + _vn; 05641 float iv = 1.f/v; 05642 X = 2.25f * u * Y * iv ; 05643 Z = (12 - 3 * u - 20 * v) * Y * 0.25f * iv; 05644 05645 float R = X*C0 + Y*C1 + Z*C2; 05646 float G = X*C3 + Y*C4 + Z*C5; 05647 float B = X*C6 + Y*C7 + Z*C8; 05648 05649 R = std::min(std::max(R, 0.f), 1.f); 05650 G = std::min(std::max(G, 0.f), 1.f); 05651 B = std::min(std::max(B, 0.f), 1.f); 05652 05653 if( gammaTab ) 05654 { 05655 R = splineInterpolate(R*gscale, gammaTab, GAMMA_TAB_SIZE); 05656 G = splineInterpolate(G*gscale, gammaTab, GAMMA_TAB_SIZE); 05657 B = splineInterpolate(B*gscale, gammaTab, GAMMA_TAB_SIZE); 05658 } 05659 05660 dst[0] = R; dst[1] = G; dst[2] = B; 05661 if( dcn == 4 ) 05662 dst[3] = alpha; 05663 } 05664 } 05665 05666 int dstcn; 05667 float coeffs[9], un, vn; 05668 bool srgb; 05669 }; 05670 05671 05672 struct RGB2Luv_b 05673 { 05674 typedef uchar channel_type; 05675 05676 RGB2Luv_b( int _srccn, int blueIdx, const float* _coeffs, 05677 const float* _whitept, bool _srgb ) 05678 : srccn(_srccn), cvt(3, blueIdx, _coeffs, _whitept, _srgb) 05679 { 05680 #if CV_NEON 05681 v_scale_inv = vdupq_n_f32(1.f/255.f); 05682 v_scale = vdupq_n_f32(2.55f); 05683 v_coeff1 = vdupq_n_f32(0.72033898305084743f); 05684 v_coeff2 = vdupq_n_f32(96.525423728813564f); 05685 v_coeff3 = vdupq_n_f32(0.9732824427480916f); 05686 v_coeff4 = vdupq_n_f32(136.259541984732824f); 05687 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 05688 #elif CV_SSE2 05689 v_zero = _mm_setzero_si128(); 05690 v_scale_inv = _mm_set1_ps(1.f/255.f); 05691 v_scale = _mm_set1_ps(2.55f); 05692 v_coeff1 = _mm_set1_ps(0.72033898305084743f); 05693 v_coeff2 = _mm_set1_ps(96.525423728813564f); 05694 v_coeff3 = _mm_set1_ps(0.9732824427480916f); 05695 v_coeff4 = _mm_set1_ps(136.259541984732824f); 05696 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 05697 #endif 05698 } 05699 05700 #if CV_SSE2 05701 void process(const float * buf, 05702 __m128i & v_l, __m128i & v_u, __m128i & v_v) const 05703 { 05704 __m128 v_l0f = _mm_load_ps(buf); 05705 __m128 v_l1f = _mm_load_ps(buf + 4); 05706 __m128 v_u0f = _mm_load_ps(buf + 8); 05707 __m128 v_u1f = _mm_load_ps(buf + 12); 05708 __m128 v_v0f = _mm_load_ps(buf + 16); 05709 __m128 v_v1f = _mm_load_ps(buf + 20); 05710 05711 _mm_deinterleave_ps(v_l0f, v_l1f, v_u0f, v_u1f, v_v0f, v_v1f); 05712 05713 v_l0f = _mm_mul_ps(v_l0f, v_scale); 05714 v_l1f = _mm_mul_ps(v_l1f, v_scale); 05715 v_u0f = _mm_add_ps(_mm_mul_ps(v_u0f, v_coeff1), v_coeff2); 05716 v_u1f = _mm_add_ps(_mm_mul_ps(v_u1f, v_coeff1), v_coeff2); 05717 v_v0f = _mm_add_ps(_mm_mul_ps(v_v0f, v_coeff3), v_coeff4); 05718 v_v1f = _mm_add_ps(_mm_mul_ps(v_v1f, v_coeff3), v_coeff4); 05719 05720 v_l = _mm_packs_epi32(_mm_cvtps_epi32(v_l0f), _mm_cvtps_epi32(v_l1f)); 05721 v_u = _mm_packs_epi32(_mm_cvtps_epi32(v_u0f), _mm_cvtps_epi32(v_u1f)); 05722 v_v = _mm_packs_epi32(_mm_cvtps_epi32(v_v0f), _mm_cvtps_epi32(v_v1f)); 05723 } 05724 #endif 05725 05726 void operator()(const uchar* src, uchar* dst, int n) const 05727 { 05728 int i, j, scn = srccn; 05729 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 05730 05731 for( i = 0; i < n; i += BLOCK_SIZE, dst += BLOCK_SIZE*3 ) 05732 { 05733 int dn = std::min(n - i, (int)BLOCK_SIZE); 05734 j = 0; 05735 05736 #if CV_NEON 05737 for ( ; j <= (dn - 8) * 3; j += 24, src += 8 * scn) 05738 { 05739 uint16x8_t v_t0, v_t1, v_t2; 05740 05741 if (scn == 3) 05742 { 05743 uint8x8x3_t v_src = vld3_u8(src); 05744 v_t0 = vmovl_u8(v_src.val[0]); 05745 v_t1 = vmovl_u8(v_src.val[1]); 05746 v_t2 = vmovl_u8(v_src.val[2]); 05747 } 05748 else 05749 { 05750 uint8x8x4_t v_src = vld4_u8(src); 05751 v_t0 = vmovl_u8(v_src.val[0]); 05752 v_t1 = vmovl_u8(v_src.val[1]); 05753 v_t2 = vmovl_u8(v_src.val[2]); 05754 } 05755 05756 float32x4x3_t v_dst; 05757 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); 05758 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_scale_inv); 05759 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_scale_inv); 05760 vst3q_f32(buf + j, v_dst); 05761 05762 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); 05763 v_dst.val[1] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_scale_inv); 05764 v_dst.val[2] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_scale_inv); 05765 vst3q_f32(buf + j + 12, v_dst); 05766 } 05767 #elif CV_SSE2 05768 if (scn == 3 && haveSIMD) 05769 { 05770 for ( ; j <= (dn * 3 - 16); j += 16, src += 16) 05771 { 05772 __m128i v_src = _mm_loadu_si128((__m128i const *)src); 05773 05774 __m128i v_src_p = _mm_unpacklo_epi8(v_src, v_zero); 05775 _mm_store_ps(buf + j, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); 05776 _mm_store_ps(buf + j + 4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); 05777 05778 v_src_p = _mm_unpackhi_epi8(v_src, v_zero); 05779 _mm_store_ps(buf + j + 8, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(v_src_p, v_zero)), v_scale_inv)); 05780 _mm_store_ps(buf + j + 12, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(v_src_p, v_zero)), v_scale_inv)); 05781 } 05782 05783 int jr = j % 3; 05784 if (jr) 05785 src -= jr, j -= jr; 05786 } 05787 #endif 05788 for( ; j < dn*3; j += 3, src += scn ) 05789 { 05790 buf[j] = src[0]*(1.f/255.f); 05791 buf[j+1] = (float)(src[1]*(1.f/255.f)); 05792 buf[j+2] = (float)(src[2]*(1.f/255.f)); 05793 } 05794 cvt(buf, buf, dn); 05795 05796 j = 0; 05797 #if CV_NEON 05798 for ( ; j <= (dn - 8) * 3; j += 24) 05799 { 05800 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 05801 05802 uint8x8x3_t v_dst; 05803 v_dst.val[0] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 05804 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 05805 v_dst.val[1] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[1], v_coeff1), v_coeff2))), 05806 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[1], v_coeff1), v_coeff2))))); 05807 v_dst.val[2] = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src0.val[2], v_coeff3), v_coeff4))), 05808 vqmovn_u32(cv_vrndq_u32_f32(vaddq_f32(vmulq_f32(v_src1.val[2], v_coeff3), v_coeff4))))); 05809 05810 vst3_u8(dst + j, v_dst); 05811 } 05812 #elif CV_SSE2 05813 if (haveSIMD) 05814 { 05815 for ( ; j <= (dn - 32) * 3; j += 96) 05816 { 05817 __m128i v_l_0, v_u_0, v_v_0; 05818 process(buf + j, 05819 v_l_0, v_u_0, v_v_0); 05820 05821 __m128i v_l_1, v_u_1, v_v_1; 05822 process(buf + j + 24, 05823 v_l_1, v_u_1, v_v_1); 05824 05825 __m128i v_l0 = _mm_packus_epi16(v_l_0, v_l_1); 05826 __m128i v_u0 = _mm_packus_epi16(v_u_0, v_u_1); 05827 __m128i v_v0 = _mm_packus_epi16(v_v_0, v_v_1); 05828 05829 process(buf + j + 48, 05830 v_l_0, v_u_0, v_v_0); 05831 05832 process(buf + j + 72, 05833 v_l_1, v_u_1, v_v_1); 05834 05835 __m128i v_l1 = _mm_packus_epi16(v_l_0, v_l_1); 05836 __m128i v_u1 = _mm_packus_epi16(v_u_0, v_u_1); 05837 __m128i v_v1 = _mm_packus_epi16(v_v_0, v_v_1); 05838 05839 _mm_interleave_epi8(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1); 05840 05841 _mm_storeu_si128((__m128i *)(dst + j), v_l0); 05842 _mm_storeu_si128((__m128i *)(dst + j + 16), v_l1); 05843 _mm_storeu_si128((__m128i *)(dst + j + 32), v_u0); 05844 _mm_storeu_si128((__m128i *)(dst + j + 48), v_u1); 05845 _mm_storeu_si128((__m128i *)(dst + j + 64), v_v0); 05846 _mm_storeu_si128((__m128i *)(dst + j + 80), v_v1); 05847 } 05848 } 05849 #endif 05850 05851 for( ; j < dn*3; j += 3 ) 05852 { 05853 dst[j] = saturate_cast<uchar>(buf[j]*2.55f); 05854 dst[j+1] = saturate_cast<uchar>(buf[j+1]*0.72033898305084743f + 96.525423728813564f); 05855 dst[j+2] = saturate_cast<uchar>(buf[j+2]*0.9732824427480916f + 136.259541984732824f); 05856 } 05857 } 05858 } 05859 05860 int srccn; 05861 RGB2Luv_f cvt; 05862 05863 #if CV_NEON 05864 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4; 05865 uint8x8_t v_alpha; 05866 #elif CV_SSE2 05867 __m128 v_scale, v_scale_inv, v_coeff1, v_coeff2, v_coeff3, v_coeff4; 05868 __m128i v_zero; 05869 bool haveSIMD; 05870 #endif 05871 }; 05872 05873 05874 struct Luv2RGB_b 05875 { 05876 typedef uchar channel_type; 05877 05878 Luv2RGB_b( int _dstcn, int blueIdx, const float* _coeffs, 05879 const float* _whitept, bool _srgb ) 05880 : dstcn(_dstcn), cvt(3, blueIdx, _coeffs, _whitept, _srgb ) 05881 { 05882 #if CV_NEON 05883 v_scale_inv = vdupq_n_f32(100.f/255.f); 05884 v_coeff1 = vdupq_n_f32(1.388235294117647f); 05885 v_coeff2 = vdupq_n_f32(1.027450980392157f); 05886 v_134 = vdupq_n_f32(134.f); 05887 v_140 = vdupq_n_f32(140.f); 05888 v_scale = vdupq_n_f32(255.f); 05889 v_alpha = vdup_n_u8(ColorChannel<uchar>::max()); 05890 #elif CV_SSE2 05891 v_scale_inv = _mm_set1_ps(100.f/255.f); 05892 v_coeff1 = _mm_set1_ps(1.388235294117647f); 05893 v_coeff2 = _mm_set1_ps(1.027450980392157f); 05894 v_134 = _mm_set1_ps(134.f); 05895 v_140 = _mm_set1_ps(140.f); 05896 v_scale = _mm_set1_ps(255.f); 05897 v_zero = _mm_setzero_si128(); 05898 haveSIMD = checkHardwareSupport(CV_CPU_SSE2); 05899 #endif 05900 } 05901 05902 #if CV_SSE2 05903 // 16s x 8 05904 void process(__m128i v_l, __m128i v_u, __m128i v_v, 05905 float * buf) const 05906 { 05907 __m128 v_l0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_l, v_zero)); 05908 __m128 v_u0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_u, v_zero)); 05909 __m128 v_v0 = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v_v, v_zero)); 05910 05911 __m128 v_l1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_l, v_zero)); 05912 __m128 v_u1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_u, v_zero)); 05913 __m128 v_v1 = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v_v, v_zero)); 05914 05915 v_l0 = _mm_mul_ps(v_l0, v_scale_inv); 05916 v_l1 = _mm_mul_ps(v_l1, v_scale_inv); 05917 05918 v_u0 = _mm_sub_ps(_mm_mul_ps(v_u0, v_coeff1), v_134); 05919 v_u1 = _mm_sub_ps(_mm_mul_ps(v_u1, v_coeff1), v_134); 05920 v_v0 = _mm_sub_ps(_mm_mul_ps(v_v0, v_coeff2), v_140); 05921 v_v1 = _mm_sub_ps(_mm_mul_ps(v_v1, v_coeff2), v_140); 05922 05923 _mm_interleave_ps(v_l0, v_l1, v_u0, v_u1, v_v0, v_v1); 05924 05925 _mm_store_ps(buf, v_l0); 05926 _mm_store_ps(buf + 4, v_l1); 05927 _mm_store_ps(buf + 8, v_u0); 05928 _mm_store_ps(buf + 12, v_u1); 05929 _mm_store_ps(buf + 16, v_v0); 05930 _mm_store_ps(buf + 20, v_v1); 05931 } 05932 #endif 05933 05934 void operator()(const uchar* src, uchar* dst, int n) const 05935 { 05936 int i, j, dcn = dstcn; 05937 uchar alpha = ColorChannel<uchar>::max(); 05938 float CV_DECL_ALIGNED(16) buf[3*BLOCK_SIZE]; 05939 05940 for( i = 0; i < n; i += BLOCK_SIZE, src += BLOCK_SIZE*3 ) 05941 { 05942 int dn = std::min(n - i, (int)BLOCK_SIZE); 05943 j = 0; 05944 05945 #if CV_NEON 05946 for ( ; j <= (dn - 8) * 3; j += 24) 05947 { 05948 uint8x8x3_t v_src = vld3_u8(src + j); 05949 uint16x8_t v_t0 = vmovl_u8(v_src.val[0]), 05950 v_t1 = vmovl_u8(v_src.val[1]), 05951 v_t2 = vmovl_u8(v_src.val[2]); 05952 05953 float32x4x3_t v_dst; 05954 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t0))), v_scale_inv); 05955 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t1))), v_coeff1), v_134); 05956 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_t2))), v_coeff2), v_140); 05957 vst3q_f32(buf + j, v_dst); 05958 05959 v_dst.val[0] = vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t0))), v_scale_inv); 05960 v_dst.val[1] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t1))), v_coeff1), v_134); 05961 v_dst.val[2] = vsubq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_t2))), v_coeff2), v_140); 05962 vst3q_f32(buf + j + 12, v_dst); 05963 } 05964 #elif CV_SSE2 05965 if (haveSIMD) 05966 { 05967 for ( ; j <= (dn - 32) * 3; j += 96) 05968 { 05969 __m128i v_r0 = _mm_loadu_si128((__m128i const *)(src + j)); 05970 __m128i v_r1 = _mm_loadu_si128((__m128i const *)(src + j + 16)); 05971 __m128i v_g0 = _mm_loadu_si128((__m128i const *)(src + j + 32)); 05972 __m128i v_g1 = _mm_loadu_si128((__m128i const *)(src + j + 48)); 05973 __m128i v_b0 = _mm_loadu_si128((__m128i const *)(src + j + 64)); 05974 __m128i v_b1 = _mm_loadu_si128((__m128i const *)(src + j + 80)); 05975 05976 _mm_deinterleave_epi8(v_r0, v_r1, v_g0, v_g1, v_b0, v_b1); 05977 05978 process(_mm_unpacklo_epi8(v_r0, v_zero), 05979 _mm_unpacklo_epi8(v_g0, v_zero), 05980 _mm_unpacklo_epi8(v_b0, v_zero), 05981 buf + j); 05982 05983 process(_mm_unpackhi_epi8(v_r0, v_zero), 05984 _mm_unpackhi_epi8(v_g0, v_zero), 05985 _mm_unpackhi_epi8(v_b0, v_zero), 05986 buf + j + 24); 05987 05988 process(_mm_unpacklo_epi8(v_r1, v_zero), 05989 _mm_unpacklo_epi8(v_g1, v_zero), 05990 _mm_unpacklo_epi8(v_b1, v_zero), 05991 buf + j + 48); 05992 05993 process(_mm_unpackhi_epi8(v_r1, v_zero), 05994 _mm_unpackhi_epi8(v_g1, v_zero), 05995 _mm_unpackhi_epi8(v_b1, v_zero), 05996 buf + j + 72); 05997 } 05998 } 05999 #endif 06000 for( ; j < dn*3; j += 3 ) 06001 { 06002 buf[j] = src[j]*(100.f/255.f); 06003 buf[j+1] = (float)(src[j+1]*1.388235294117647f - 134.f); 06004 buf[j+2] = (float)(src[j+2]*1.027450980392157f - 140.f); 06005 } 06006 cvt(buf, buf, dn); 06007 06008 j = 0; 06009 #if CV_NEON 06010 for ( ; j <= (dn - 8) * 3; j += 24, dst += dcn * 8) 06011 { 06012 float32x4x3_t v_src0 = vld3q_f32(buf + j), v_src1 = vld3q_f32(buf + j + 12); 06013 uint8x8_t v_dst0 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[0], v_scale))), 06014 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[0], v_scale))))); 06015 uint8x8_t v_dst1 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[1], v_scale))), 06016 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[1], v_scale))))); 06017 uint8x8_t v_dst2 = vqmovn_u16(vcombine_u16(vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src0.val[2], v_scale))), 06018 vqmovn_u32(cv_vrndq_u32_f32(vmulq_f32(v_src1.val[2], v_scale))))); 06019 06020 if (dcn == 4) 06021 { 06022 uint8x8x4_t v_dst; 06023 v_dst.val[0] = v_dst0; 06024 v_dst.val[1] = v_dst1; 06025 v_dst.val[2] = v_dst2; 06026 v_dst.val[3] = v_alpha; 06027 vst4_u8(dst, v_dst); 06028 } 06029 else 06030 { 06031 uint8x8x3_t v_dst; 06032 v_dst.val[0] = v_dst0; 06033 v_dst.val[1] = v_dst1; 06034 v_dst.val[2] = v_dst2; 06035 vst3_u8(dst, v_dst); 06036 } 06037 } 06038 #elif CV_SSE2 06039 if (dcn == 3 && haveSIMD) 06040 { 06041 for ( ; j <= (dn * 3 - 16); j += 16, dst += 16) 06042 { 06043 __m128 v_src0 = _mm_mul_ps(_mm_load_ps(buf + j), v_scale); 06044 __m128 v_src1 = _mm_mul_ps(_mm_load_ps(buf + j + 4), v_scale); 06045 __m128 v_src2 = _mm_mul_ps(_mm_load_ps(buf + j + 8), v_scale); 06046 __m128 v_src3 = _mm_mul_ps(_mm_load_ps(buf + j + 12), v_scale); 06047 06048 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(v_src0), 06049 _mm_cvtps_epi32(v_src1)); 06050 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(v_src2), 06051 _mm_cvtps_epi32(v_src3)); 06052 06053 _mm_storeu_si128((__m128i *)dst, _mm_packus_epi16(v_dst0, v_dst1)); 06054 } 06055 06056 int jr = j % 3; 06057 if (jr) 06058 dst -= jr, j -= jr; 06059 } 06060 #endif 06061 06062 for( ; j < dn*3; j += 3, dst += dcn ) 06063 { 06064 dst[0] = saturate_cast<uchar>(buf[j]*255.f); 06065 dst[1] = saturate_cast<uchar>(buf[j+1]*255.f); 06066 dst[2] = saturate_cast<uchar>(buf[j+2]*255.f); 06067 if( dcn == 4 ) 06068 dst[3] = alpha; 06069 } 06070 } 06071 } 06072 06073 int dstcn; 06074 Luv2RGB_f cvt; 06075 06076 #if CV_NEON 06077 float32x4_t v_scale, v_scale_inv, v_coeff1, v_coeff2, v_134, v_140; 06078 uint8x8_t v_alpha; 06079 #elif CV_SSE2 06080 __m128 v_scale, v_scale_inv, v_coeff1, v_coeff2, v_134, v_140; 06081 __m128i v_zero; 06082 bool haveSIMD; 06083 #endif 06084 }; 06085 06086 06087 ///////////////////////////////////// YUV420 -> RGB ///////////////////////////////////// 06088 06089 const int ITUR_BT_601_CY = 1220542; 06090 const int ITUR_BT_601_CUB = 2116026; 06091 const int ITUR_BT_601_CUG = -409993; 06092 const int ITUR_BT_601_CVG = -852492; 06093 const int ITUR_BT_601_CVR = 1673527; 06094 const int ITUR_BT_601_SHIFT = 20; 06095 06096 // Coefficients for RGB to YUV420p conversion 06097 const int ITUR_BT_601_CRY = 269484; 06098 const int ITUR_BT_601_CGY = 528482; 06099 const int ITUR_BT_601_CBY = 102760; 06100 const int ITUR_BT_601_CRU = -155188; 06101 const int ITUR_BT_601_CGU = -305135; 06102 const int ITUR_BT_601_CBU = 460324; 06103 const int ITUR_BT_601_CGV = -385875; 06104 const int ITUR_BT_601_CBV = -74448; 06105 06106 template<int bIdx, int uIdx> 06107 struct YUV420sp2RGB888Invoker : ParallelLoopBody 06108 { 06109 Mat* dst; 06110 const uchar* my1, *muv; 06111 int width, stride; 06112 06113 YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) 06114 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} 06115 06116 void operator()(const Range& range) const 06117 { 06118 int rangeBegin = range.start * 2; 06119 int rangeEnd = range.end * 2; 06120 06121 //R = 1.164(Y - 16) + 1.596(V - 128) 06122 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) 06123 //B = 1.164(Y - 16) + 2.018(U - 128) 06124 06125 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20 06126 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20 06127 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20 06128 06129 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2; 06130 06131 #ifdef HAVE_TEGRA_OPTIMIZATION 06132 if(tegra::useTegra() && tegra::cvtYUV4202RGB(bIdx, uIdx, 3, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols)) 06133 return; 06134 #endif 06135 06136 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride) 06137 { 06138 uchar* row1 = dst->ptr<uchar>(j); 06139 uchar* row2 = dst->ptr<uchar>(j + 1); 06140 const uchar* y2 = y1 + stride; 06141 06142 for (int i = 0; i < width; i += 2, row1 += 6, row2 += 6) 06143 { 06144 int u = int(uv[i + 0 + uIdx]) - 128; 06145 int v = int(uv[i + 1 - uIdx]) - 128; 06146 06147 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 06148 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 06149 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 06150 06151 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY; 06152 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 06153 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 06154 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 06155 06156 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY; 06157 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 06158 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 06159 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 06160 06161 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY; 06162 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT); 06163 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT); 06164 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT); 06165 06166 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY; 06167 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT); 06168 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT); 06169 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT); 06170 } 06171 } 06172 } 06173 }; 06174 06175 template<int bIdx, int uIdx> 06176 struct YUV420sp2RGBA8888Invoker : ParallelLoopBody 06177 { 06178 Mat* dst; 06179 const uchar* my1, *muv; 06180 int width, stride; 06181 06182 YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv) 06183 : dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {} 06184 06185 void operator()(const Range& range) const 06186 { 06187 int rangeBegin = range.start * 2; 06188 int rangeEnd = range.end * 2; 06189 06190 //R = 1.164(Y - 16) + 1.596(V - 128) 06191 //G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) 06192 //B = 1.164(Y - 16) + 2.018(U - 128) 06193 06194 //R = (1220542(Y - 16) + 1673527(V - 128) + (1 << 19)) >> 20 06195 //G = (1220542(Y - 16) - 852492(V - 128) - 409993(U - 128) + (1 << 19)) >> 20 06196 //B = (1220542(Y - 16) + 2116026(U - 128) + (1 << 19)) >> 20 06197 06198 const uchar* y1 = my1 + rangeBegin * stride, *uv = muv + rangeBegin * stride / 2; 06199 06200 #ifdef HAVE_TEGRA_OPTIMIZATION 06201 if(tegra::useTegra() && tegra::cvtYUV4202RGB(bIdx, uIdx, 4, y1, uv, stride, dst->ptr<uchar>(rangeBegin), dst->step, rangeEnd - rangeBegin, dst->cols)) 06202 return; 06203 #endif 06204 06205 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, uv += stride) 06206 { 06207 uchar* row1 = dst->ptr<uchar>(j); 06208 uchar* row2 = dst->ptr<uchar>(j + 1); 06209 const uchar* y2 = y1 + stride; 06210 06211 for (int i = 0; i < width; i += 2, row1 += 8, row2 += 8) 06212 { 06213 int u = int(uv[i + 0 + uIdx]) - 128; 06214 int v = int(uv[i + 1 - uIdx]) - 128; 06215 06216 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 06217 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 06218 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 06219 06220 int y00 = std::max(0, int(y1[i]) - 16) * ITUR_BT_601_CY; 06221 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 06222 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 06223 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 06224 row1[3] = uchar(0xff); 06225 06226 int y01 = std::max(0, int(y1[i + 1]) - 16) * ITUR_BT_601_CY; 06227 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 06228 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 06229 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 06230 row1[7] = uchar(0xff); 06231 06232 int y10 = std::max(0, int(y2[i]) - 16) * ITUR_BT_601_CY; 06233 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT); 06234 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT); 06235 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT); 06236 row2[3] = uchar(0xff); 06237 06238 int y11 = std::max(0, int(y2[i + 1]) - 16) * ITUR_BT_601_CY; 06239 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT); 06240 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT); 06241 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT); 06242 row2[7] = uchar(0xff); 06243 } 06244 } 06245 } 06246 }; 06247 06248 template<int bIdx> 06249 struct YUV420p2RGB888Invoker : ParallelLoopBody 06250 { 06251 Mat* dst; 06252 const uchar* my1, *mu, *mv; 06253 int width, stride; 06254 int ustepIdx, vstepIdx; 06255 06256 YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) 06257 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} 06258 06259 void operator()(const Range& range) const 06260 { 06261 const int rangeBegin = range.start * 2; 06262 const int rangeEnd = range.end * 2; 06263 06264 int uvsteps[2] = {width/2, stride - width/2}; 06265 int usIdx = ustepIdx, vsIdx = vstepIdx; 06266 06267 const uchar* y1 = my1 + rangeBegin * stride; 06268 const uchar* u1 = mu + (range.start / 2) * stride; 06269 const uchar* v1 = mv + (range.start / 2) * stride; 06270 06271 if(range.start % 2 == 1) 06272 { 06273 u1 += uvsteps[(usIdx++) & 1]; 06274 v1 += uvsteps[(vsIdx++) & 1]; 06275 } 06276 06277 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1]) 06278 { 06279 uchar* row1 = dst->ptr<uchar>(j); 06280 uchar* row2 = dst->ptr<uchar>(j + 1); 06281 const uchar* y2 = y1 + stride; 06282 06283 for (int i = 0; i < width / 2; i += 1, row1 += 6, row2 += 6) 06284 { 06285 int u = int(u1[i]) - 128; 06286 int v = int(v1[i]) - 128; 06287 06288 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 06289 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 06290 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 06291 06292 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY; 06293 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 06294 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 06295 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 06296 06297 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY; 06298 row1[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 06299 row1[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 06300 row1[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 06301 06302 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY; 06303 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT); 06304 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT); 06305 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT); 06306 06307 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY; 06308 row2[5-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT); 06309 row2[4] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT); 06310 row2[3+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT); 06311 } 06312 } 06313 } 06314 }; 06315 06316 template<int bIdx> 06317 struct YUV420p2RGBA8888Invoker : ParallelLoopBody 06318 { 06319 Mat* dst; 06320 const uchar* my1, *mu, *mv; 06321 int width, stride; 06322 int ustepIdx, vstepIdx; 06323 06324 YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx) 06325 : dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {} 06326 06327 void operator()(const Range& range) const 06328 { 06329 int rangeBegin = range.start * 2; 06330 int rangeEnd = range.end * 2; 06331 06332 int uvsteps[2] = {width/2, stride - width/2}; 06333 int usIdx = ustepIdx, vsIdx = vstepIdx; 06334 06335 const uchar* y1 = my1 + rangeBegin * stride; 06336 const uchar* u1 = mu + (range.start / 2) * stride; 06337 const uchar* v1 = mv + (range.start / 2) * stride; 06338 06339 if(range.start % 2 == 1) 06340 { 06341 u1 += uvsteps[(usIdx++) & 1]; 06342 v1 += uvsteps[(vsIdx++) & 1]; 06343 } 06344 06345 for (int j = rangeBegin; j < rangeEnd; j += 2, y1 += stride * 2, u1 += uvsteps[(usIdx++) & 1], v1 += uvsteps[(vsIdx++) & 1]) 06346 { 06347 uchar* row1 = dst->ptr<uchar>(j); 06348 uchar* row2 = dst->ptr<uchar>(j + 1); 06349 const uchar* y2 = y1 + stride; 06350 06351 for (int i = 0; i < width / 2; i += 1, row1 += 8, row2 += 8) 06352 { 06353 int u = int(u1[i]) - 128; 06354 int v = int(v1[i]) - 128; 06355 06356 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 06357 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 06358 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 06359 06360 int y00 = std::max(0, int(y1[2 * i]) - 16) * ITUR_BT_601_CY; 06361 row1[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 06362 row1[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 06363 row1[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 06364 row1[3] = uchar(0xff); 06365 06366 int y01 = std::max(0, int(y1[2 * i + 1]) - 16) * ITUR_BT_601_CY; 06367 row1[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 06368 row1[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 06369 row1[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 06370 row1[7] = uchar(0xff); 06371 06372 int y10 = std::max(0, int(y2[2 * i]) - 16) * ITUR_BT_601_CY; 06373 row2[2-bIdx] = saturate_cast<uchar>((y10 + ruv) >> ITUR_BT_601_SHIFT); 06374 row2[1] = saturate_cast<uchar>((y10 + guv) >> ITUR_BT_601_SHIFT); 06375 row2[bIdx] = saturate_cast<uchar>((y10 + buv) >> ITUR_BT_601_SHIFT); 06376 row2[3] = uchar(0xff); 06377 06378 int y11 = std::max(0, int(y2[2 * i + 1]) - 16) * ITUR_BT_601_CY; 06379 row2[6-bIdx] = saturate_cast<uchar>((y11 + ruv) >> ITUR_BT_601_SHIFT); 06380 row2[5] = saturate_cast<uchar>((y11 + guv) >> ITUR_BT_601_SHIFT); 06381 row2[4+bIdx] = saturate_cast<uchar>((y11 + buv) >> ITUR_BT_601_SHIFT); 06382 row2[7] = uchar(0xff); 06383 } 06384 } 06385 } 06386 }; 06387 06388 #define MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION (320*240) 06389 06390 template<int bIdx, int uIdx> 06391 inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) 06392 { 06393 YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv); 06394 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) 06395 parallel_for_(Range(0, _dst.rows/2), converter); 06396 else 06397 converter(Range(0, _dst.rows/2)); 06398 } 06399 06400 template<int bIdx, int uIdx> 06401 inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv) 06402 { 06403 YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv); 06404 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) 06405 parallel_for_(Range(0, _dst.rows/2), converter); 06406 else 06407 converter(Range(0, _dst.rows/2)); 06408 } 06409 06410 template<int bIdx> 06411 inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) 06412 { 06413 YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); 06414 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) 06415 parallel_for_(Range(0, _dst.rows/2), converter); 06416 else 06417 converter(Range(0, _dst.rows/2)); 06418 } 06419 06420 template<int bIdx> 06421 inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx) 06422 { 06423 YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx); 06424 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION) 06425 parallel_for_(Range(0, _dst.rows/2), converter); 06426 else 06427 converter(Range(0, _dst.rows/2)); 06428 } 06429 06430 ///////////////////////////////////// RGB -> YUV420p ///////////////////////////////////// 06431 06432 template<int bIdx> 06433 struct RGB888toYUV420pInvoker: public ParallelLoopBody 06434 { 06435 RGB888toYUV420pInvoker( const Mat& src, Mat* dst, const int uIdx ) 06436 : src_(src), 06437 dst_(dst), 06438 uIdx_(uIdx) { } 06439 06440 void operator()(const Range& rowRange) const 06441 { 06442 const int w = src_.cols; 06443 const int h = src_.rows; 06444 06445 const int cn = src_.channels(); 06446 for( int i = rowRange.start; i < rowRange.end; i++ ) 06447 { 06448 const uchar* row0 = src_.ptr<uchar>(2 * i); 06449 const uchar* row1 = src_.ptr<uchar>(2 * i + 1); 06450 06451 uchar* y = dst_->ptr<uchar>(2*i); 06452 uchar* u = dst_->ptr<uchar>(h + i/2) + (i % 2) * (w/2); 06453 uchar* v = dst_->ptr<uchar>(h + (i + h/2)/2) + ((i + h/2) % 2) * (w/2); 06454 if( uIdx_ == 2 ) std::swap(u, v); 06455 06456 for( int j = 0, k = 0; j < w * cn; j += 2 * cn, k++ ) 06457 { 06458 int r00 = row0[2-bIdx + j]; int g00 = row0[1 + j]; int b00 = row0[bIdx + j]; 06459 int r01 = row0[2-bIdx + cn + j]; int g01 = row0[1 + cn + j]; int b01 = row0[bIdx + cn + j]; 06460 int r10 = row1[2-bIdx + j]; int g10 = row1[1 + j]; int b10 = row1[bIdx + j]; 06461 int r11 = row1[2-bIdx + cn + j]; int g11 = row1[1 + cn + j]; int b11 = row1[bIdx + cn + j]; 06462 06463 const int shifted16 = (16 << ITUR_BT_601_SHIFT); 06464 const int halfShift = (1 << (ITUR_BT_601_SHIFT - 1)); 06465 int y00 = ITUR_BT_601_CRY * r00 + ITUR_BT_601_CGY * g00 + ITUR_BT_601_CBY * b00 + halfShift + shifted16; 06466 int y01 = ITUR_BT_601_CRY * r01 + ITUR_BT_601_CGY * g01 + ITUR_BT_601_CBY * b01 + halfShift + shifted16; 06467 int y10 = ITUR_BT_601_CRY * r10 + ITUR_BT_601_CGY * g10 + ITUR_BT_601_CBY * b10 + halfShift + shifted16; 06468 int y11 = ITUR_BT_601_CRY * r11 + ITUR_BT_601_CGY * g11 + ITUR_BT_601_CBY * b11 + halfShift + shifted16; 06469 06470 y[2*k + 0] = saturate_cast<uchar>(y00 >> ITUR_BT_601_SHIFT); 06471 y[2*k + 1] = saturate_cast<uchar>(y01 >> ITUR_BT_601_SHIFT); 06472 y[2*k + dst_->step + 0] = saturate_cast<uchar>(y10 >> ITUR_BT_601_SHIFT); 06473 y[2*k + dst_->step + 1] = saturate_cast<uchar>(y11 >> ITUR_BT_601_SHIFT); 06474 06475 const int shifted128 = (128 << ITUR_BT_601_SHIFT); 06476 int u00 = ITUR_BT_601_CRU * r00 + ITUR_BT_601_CGU * g00 + ITUR_BT_601_CBU * b00 + halfShift + shifted128; 06477 int v00 = ITUR_BT_601_CBU * r00 + ITUR_BT_601_CGV * g00 + ITUR_BT_601_CBV * b00 + halfShift + shifted128; 06478 06479 u[k] = saturate_cast<uchar>(u00 >> ITUR_BT_601_SHIFT); 06480 v[k] = saturate_cast<uchar>(v00 >> ITUR_BT_601_SHIFT); 06481 } 06482 } 06483 } 06484 06485 static bool isFit( const Mat& src ) 06486 { 06487 return (src.total() >= 320*240); 06488 } 06489 06490 private: 06491 RGB888toYUV420pInvoker& operator=(const RGB888toYUV420pInvoker&); 06492 06493 const Mat& src_; 06494 Mat* const dst_; 06495 const int uIdx_; 06496 }; 06497 06498 template<int bIdx, int uIdx> 06499 static void cvtRGBtoYUV420p(const Mat& src, Mat& dst) 06500 { 06501 RGB888toYUV420pInvoker<bIdx> colorConverter(src, &dst, uIdx); 06502 if( RGB888toYUV420pInvoker<bIdx>::isFit(src) ) 06503 parallel_for_(Range(0, src.rows/2), colorConverter); 06504 else 06505 colorConverter(Range(0, src.rows/2)); 06506 } 06507 06508 ///////////////////////////////////// YUV422 -> RGB ///////////////////////////////////// 06509 06510 template<int bIdx, int uIdx, int yIdx> 06511 struct YUV422toRGB888Invoker : ParallelLoopBody 06512 { 06513 Mat* dst; 06514 const uchar* src; 06515 int width, stride; 06516 06517 YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv) 06518 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} 06519 06520 void operator()(const Range& range) const 06521 { 06522 int rangeBegin = range.start; 06523 int rangeEnd = range.end; 06524 06525 const int uidx = 1 - yIdx + uIdx * 2; 06526 const int vidx = (2 + uidx) % 4; 06527 const uchar* yuv_src = src + rangeBegin * stride; 06528 06529 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride) 06530 { 06531 uchar* row = dst->ptr<uchar>(j); 06532 06533 for (int i = 0; i < 2 * width; i += 4, row += 6) 06534 { 06535 int u = int(yuv_src[i + uidx]) - 128; 06536 int v = int(yuv_src[i + vidx]) - 128; 06537 06538 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 06539 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 06540 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 06541 06542 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY; 06543 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 06544 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 06545 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 06546 06547 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY; 06548 row[5-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 06549 row[4] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 06550 row[3+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 06551 } 06552 } 06553 } 06554 }; 06555 06556 template<int bIdx, int uIdx, int yIdx> 06557 struct YUV422toRGBA8888Invoker : ParallelLoopBody 06558 { 06559 Mat* dst; 06560 const uchar* src; 06561 int width, stride; 06562 06563 YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv) 06564 : dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {} 06565 06566 void operator()(const Range& range) const 06567 { 06568 int rangeBegin = range.start; 06569 int rangeEnd = range.end; 06570 06571 const int uidx = 1 - yIdx + uIdx * 2; 06572 const int vidx = (2 + uidx) % 4; 06573 const uchar* yuv_src = src + rangeBegin * stride; 06574 06575 for (int j = rangeBegin; j < rangeEnd; j++, yuv_src += stride) 06576 { 06577 uchar* row = dst->ptr<uchar>(j); 06578 06579 for (int i = 0; i < 2 * width; i += 4, row += 8) 06580 { 06581 int u = int(yuv_src[i + uidx]) - 128; 06582 int v = int(yuv_src[i + vidx]) - 128; 06583 06584 int ruv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVR * v; 06585 int guv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CVG * v + ITUR_BT_601_CUG * u; 06586 int buv = (1 << (ITUR_BT_601_SHIFT - 1)) + ITUR_BT_601_CUB * u; 06587 06588 int y00 = std::max(0, int(yuv_src[i + yIdx]) - 16) * ITUR_BT_601_CY; 06589 row[2-bIdx] = saturate_cast<uchar>((y00 + ruv) >> ITUR_BT_601_SHIFT); 06590 row[1] = saturate_cast<uchar>((y00 + guv) >> ITUR_BT_601_SHIFT); 06591 row[bIdx] = saturate_cast<uchar>((y00 + buv) >> ITUR_BT_601_SHIFT); 06592 row[3] = uchar(0xff); 06593 06594 int y01 = std::max(0, int(yuv_src[i + yIdx + 2]) - 16) * ITUR_BT_601_CY; 06595 row[6-bIdx] = saturate_cast<uchar>((y01 + ruv) >> ITUR_BT_601_SHIFT); 06596 row[5] = saturate_cast<uchar>((y01 + guv) >> ITUR_BT_601_SHIFT); 06597 row[4+bIdx] = saturate_cast<uchar>((y01 + buv) >> ITUR_BT_601_SHIFT); 06598 row[7] = uchar(0xff); 06599 } 06600 } 06601 } 06602 }; 06603 06604 #define MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION (320*240) 06605 06606 template<int bIdx, int uIdx, int yIdx> 06607 inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv) 06608 { 06609 YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv); 06610 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) 06611 parallel_for_(Range(0, _dst.rows), converter); 06612 else 06613 converter(Range(0, _dst.rows)); 06614 } 06615 06616 template<int bIdx, int uIdx, int yIdx> 06617 inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv) 06618 { 06619 YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv); 06620 if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION) 06621 parallel_for_(Range(0, _dst.rows), converter); 06622 else 06623 converter(Range(0, _dst.rows)); 06624 } 06625 06626 /////////////////////////// RGBA <-> mRGBA (alpha premultiplied) ////////////// 06627 06628 template<typename _Tp> 06629 struct RGBA2mRGBA 06630 { 06631 typedef _Tp channel_type; 06632 06633 void operator()(const _Tp* src, _Tp* dst, int n) const 06634 { 06635 _Tp max_val = ColorChannel<_Tp>::max(); 06636 _Tp half_val = ColorChannel<_Tp>::half(); 06637 for( int i = 0; i < n; i++ ) 06638 { 06639 _Tp v0 = *src++; 06640 _Tp v1 = *src++; 06641 _Tp v2 = *src++; 06642 _Tp v3 = *src++; 06643 06644 *dst++ = (v0 * v3 + half_val) / max_val; 06645 *dst++ = (v1 * v3 + half_val) / max_val; 06646 *dst++ = (v2 * v3 + half_val) / max_val; 06647 *dst++ = v3; 06648 } 06649 } 06650 }; 06651 06652 06653 template<typename _Tp> 06654 struct mRGBA2RGBA 06655 { 06656 typedef _Tp channel_type; 06657 06658 void operator()(const _Tp* src, _Tp* dst, int n) const 06659 { 06660 _Tp max_val = ColorChannel<_Tp>::max(); 06661 for( int i = 0; i < n; i++ ) 06662 { 06663 _Tp v0 = *src++; 06664 _Tp v1 = *src++; 06665 _Tp v2 = *src++; 06666 _Tp v3 = *src++; 06667 _Tp v3_half = v3 / 2; 06668 06669 *dst++ = (v3==0)? 0 : (v0 * max_val + v3_half) / v3; 06670 *dst++ = (v3==0)? 0 : (v1 * max_val + v3_half) / v3; 06671 *dst++ = (v3==0)? 0 : (v2 * max_val + v3_half) / v3; 06672 *dst++ = v3; 06673 } 06674 } 06675 }; 06676 06677 #ifdef HAVE_OPENCL 06678 06679 static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) 06680 { 06681 bool ok = false; 06682 UMat src = _src.getUMat(), dst; 06683 Size sz = src.size(), dstSz = sz; 06684 int scn = src.channels(), depth = src.depth(), bidx, uidx, yidx; 06685 int dims = 2, stripeSize = 1; 06686 ocl::Kernel k; 06687 06688 if (depth != CV_8U && depth != CV_16U && depth != CV_32F) 06689 return false; 06690 06691 ocl::Device dev = ocl::Device::getDefault(); 06692 int pxPerWIy = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1; 06693 int pxPerWIx = 1; 06694 06695 size_t globalsize[] = { (size_t)src.cols, ((size_t)src.rows + pxPerWIy - 1) / pxPerWIy }; 06696 cv::String opts = format("-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d ", 06697 depth, scn, pxPerWIy); 06698 06699 switch (code) 06700 { 06701 case COLOR_BGR2BGRA: case COLOR_RGB2BGRA: case COLOR_BGRA2BGR: 06702 case COLOR_RGBA2BGR: case COLOR_RGB2BGR: case COLOR_BGRA2RGBA: 06703 { 06704 CV_Assert(scn == 3 || scn == 4); 06705 dcn = code == COLOR_BGR2BGRA || code == COLOR_RGB2BGRA || code == COLOR_BGRA2RGBA ? 4 : 3; 06706 bool reverse = !(code == COLOR_BGR2BGRA || code == COLOR_BGRA2BGR); 06707 k.create("RGB", ocl::imgproc::cvtcolor_oclsrc, 06708 opts + format("-D dcn=%d -D bidx=0 -D %s", dcn, 06709 reverse ? "REVERSE" : "ORDER")); 06710 break; 06711 } 06712 case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB: 06713 case COLOR_BGR5652BGRA: case COLOR_BGR5552BGRA: case COLOR_BGR5652RGBA: case COLOR_BGR5552RGBA: 06714 { 06715 dcn = code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA || code == COLOR_BGR5652RGBA || code == COLOR_BGR5552RGBA ? 4 : 3; 06716 CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U); 06717 bidx = code == COLOR_BGR5652BGR || code == COLOR_BGR5552BGR || 06718 code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA ? 0 : 2; 06719 int greenbits = code == COLOR_BGR5652BGR || code == COLOR_BGR5652RGB || 06720 code == COLOR_BGR5652BGRA || code == COLOR_BGR5652RGBA ? 6 : 5; 06721 k.create("RGB5x52RGB", ocl::imgproc::cvtcolor_oclsrc, 06722 opts + format("-D dcn=%d -D bidx=%d -D greenbits=%d", dcn, bidx, greenbits)); 06723 break; 06724 } 06725 case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555: 06726 case COLOR_BGRA2BGR565: case COLOR_BGRA2BGR555: case COLOR_RGBA2BGR565: case COLOR_RGBA2BGR555: 06727 { 06728 CV_Assert((scn == 3 || scn == 4) && depth == CV_8U ); 06729 bidx = code == COLOR_BGR2BGR565 || code == COLOR_BGR2BGR555 || 06730 code == COLOR_BGRA2BGR565 || code == COLOR_BGRA2BGR555 ? 0 : 2; 06731 int greenbits = code == COLOR_BGR2BGR565 || code == COLOR_RGB2BGR565 || 06732 code == COLOR_BGRA2BGR565 || code == COLOR_RGBA2BGR565 ? 6 : 5; 06733 dcn = 2; 06734 k.create("RGB2RGB5x5", ocl::imgproc::cvtcolor_oclsrc, 06735 opts + format("-D dcn=2 -D bidx=%d -D greenbits=%d", bidx, greenbits)); 06736 break; 06737 } 06738 case COLOR_BGR5652GRAY: case COLOR_BGR5552GRAY: 06739 { 06740 CV_Assert(scn == 2 && depth == CV_8U); 06741 dcn = 1; 06742 int greenbits = code == COLOR_BGR5652GRAY ? 6 : 5; 06743 k.create("BGR5x52Gray", ocl::imgproc::cvtcolor_oclsrc, 06744 opts + format("-D dcn=1 -D bidx=0 -D greenbits=%d", greenbits)); 06745 break; 06746 } 06747 case COLOR_GRAY2BGR565: case COLOR_GRAY2BGR555: 06748 { 06749 CV_Assert(scn == 1 && depth == CV_8U); 06750 dcn = 2; 06751 int greenbits = code == COLOR_GRAY2BGR565 ? 6 : 5; 06752 k.create("Gray2BGR5x5", ocl::imgproc::cvtcolor_oclsrc, 06753 opts + format("-D dcn=2 -D bidx=0 -D greenbits=%d", greenbits)); 06754 break; 06755 } 06756 case COLOR_BGR2GRAY: case COLOR_BGRA2GRAY: 06757 case COLOR_RGB2GRAY: case COLOR_RGBA2GRAY: 06758 { 06759 CV_Assert(scn == 3 || scn == 4); 06760 bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2; 06761 dcn = 1; 06762 k.create("RGB2Gray", ocl::imgproc::cvtcolor_oclsrc, 06763 opts + format("-D dcn=1 -D bidx=%d -D STRIPE_SIZE=%d", 06764 bidx, stripeSize)); 06765 globalsize[0] = (src.cols + stripeSize-1)/stripeSize; 06766 break; 06767 } 06768 case COLOR_GRAY2BGR: 06769 case COLOR_GRAY2BGRA: 06770 { 06771 CV_Assert(scn == 1); 06772 dcn = code == COLOR_GRAY2BGRA ? 4 : 3; 06773 k.create("Gray2RGB", ocl::imgproc::cvtcolor_oclsrc, 06774 opts + format("-D bidx=0 -D dcn=%d", dcn)); 06775 break; 06776 } 06777 case COLOR_BGR2YUV: 06778 case COLOR_RGB2YUV: 06779 { 06780 CV_Assert(scn == 3 || scn == 4); 06781 bidx = code == COLOR_RGB2YUV ? 0 : 2; 06782 dcn = 3; 06783 k.create("RGB2YUV", ocl::imgproc::cvtcolor_oclsrc, 06784 opts + format("-D dcn=3 -D bidx=%d", bidx)); 06785 break; 06786 } 06787 case COLOR_YUV2BGR: 06788 case COLOR_YUV2RGB: 06789 { 06790 if(dcn < 0) dcn = 3; 06791 CV_Assert(dcn == 3 || dcn == 4); 06792 bidx = code == COLOR_YUV2RGB ? 0 : 2; 06793 k.create("YUV2RGB", ocl::imgproc::cvtcolor_oclsrc, 06794 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx)); 06795 break; 06796 } 06797 case COLOR_YUV2RGB_NV12: case COLOR_YUV2BGR_NV12: case COLOR_YUV2RGB_NV21: case COLOR_YUV2BGR_NV21: 06798 case COLOR_YUV2RGBA_NV12: case COLOR_YUV2BGRA_NV12: case COLOR_YUV2RGBA_NV21: case COLOR_YUV2BGRA_NV21: 06799 { 06800 CV_Assert( scn == 1 ); 06801 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 06802 dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 || 06803 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2RGBA_NV21 ? 4 : 3; 06804 bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 || 06805 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 0 : 2; 06806 uidx = code == COLOR_YUV2RGBA_NV21 || code == COLOR_YUV2RGB_NV21 || 06807 code == COLOR_YUV2BGRA_NV21 || code == COLOR_YUV2BGR_NV21 ? 1 : 0; 06808 06809 dstSz = Size(sz.width, sz.height * 2 / 3); 06810 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy; 06811 k.create("YUV2RGB_NVx", ocl::imgproc::cvtcolor_oclsrc, 06812 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d", dcn, bidx, uidx)); 06813 break; 06814 } 06815 case COLOR_YUV2BGR_YV12: case COLOR_YUV2RGB_YV12: case COLOR_YUV2BGRA_YV12: case COLOR_YUV2RGBA_YV12: 06816 case COLOR_YUV2BGR_IYUV: case COLOR_YUV2RGB_IYUV: case COLOR_YUV2BGRA_IYUV: case COLOR_YUV2RGBA_IYUV: 06817 { 06818 CV_Assert( scn == 1 ); 06819 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 06820 dcn = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2RGBA_YV12 || 06821 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2RGBA_IYUV ? 4 : 3; 06822 bidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || 06823 code == COLOR_YUV2BGRA_IYUV || code == COLOR_YUV2BGR_IYUV ? 0 : 2; 06824 uidx = code == COLOR_YUV2BGRA_YV12 || code == COLOR_YUV2BGR_YV12 || 06825 code == COLOR_YUV2RGBA_YV12 || code == COLOR_YUV2RGB_YV12 ? 1 : 0; 06826 06827 dstSz = Size(sz.width, sz.height * 2 / 3); 06828 globalsize[0] = dstSz.width / 2; globalsize[1] = (dstSz.height/2 + pxPerWIy - 1) / pxPerWIy; 06829 k.create("YUV2RGB_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc, 06830 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d%s", dcn, bidx, uidx, 06831 src.isContinuous() ? " -D SRC_CONT" : "")); 06832 break; 06833 } 06834 case COLOR_YUV2GRAY_420: 06835 { 06836 if (dcn <= 0) dcn = 1; 06837 06838 CV_Assert( dcn == 1 ); 06839 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 06840 06841 dstSz = Size(sz.width, sz.height * 2 / 3); 06842 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 06843 dst = _dst.getUMat(); 06844 06845 src.rowRange(0, dstSz.height).copyTo(dst); 06846 return true; 06847 } 06848 case COLOR_RGB2YUV_YV12: case COLOR_BGR2YUV_YV12: case COLOR_RGBA2YUV_YV12: case COLOR_BGRA2YUV_YV12: 06849 case COLOR_RGB2YUV_IYUV: case COLOR_BGR2YUV_IYUV: case COLOR_RGBA2YUV_IYUV: case COLOR_BGRA2YUV_IYUV: 06850 { 06851 if (dcn <= 0) dcn = 1; 06852 bidx = code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 || 06853 code == COLOR_BGRA2YUV_IYUV || code == COLOR_BGR2YUV_IYUV ? 0 : 2; 06854 uidx = code == COLOR_RGBA2YUV_YV12 || code == COLOR_RGB2YUV_YV12 || 06855 code == COLOR_BGRA2YUV_YV12 || code == COLOR_BGR2YUV_YV12 ? 1 : 0; 06856 06857 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U ); 06858 CV_Assert( dcn == 1 ); 06859 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 ); 06860 06861 dstSz = Size(sz.width, sz.height / 2 * 3); 06862 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 06863 dst = _dst.getUMat(); 06864 06865 if (dev.isIntel() && src.cols % 4 == 0 && src.step % 4 == 0 && src.offset % 4 == 0 && 06866 dst.step % 4 == 0 && dst.offset % 4 == 0) 06867 { 06868 pxPerWIx = 2; 06869 } 06870 globalsize[0] = dstSz.width / (2 * pxPerWIx); globalsize[1] = (dstSz.height/3 + pxPerWIy - 1) / pxPerWIy; 06871 06872 k.create("RGB2YUV_YV12_IYUV", ocl::imgproc::cvtcolor_oclsrc, 06873 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D PIX_PER_WI_X=%d", dcn, bidx, uidx, pxPerWIx)); 06874 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); 06875 return k.run(2, globalsize, NULL, false); 06876 } 06877 case COLOR_YUV2RGB_UYVY: case COLOR_YUV2BGR_UYVY: case COLOR_YUV2RGBA_UYVY: case COLOR_YUV2BGRA_UYVY: 06878 case COLOR_YUV2RGB_YUY2: case COLOR_YUV2BGR_YUY2: case COLOR_YUV2RGB_YVYU: case COLOR_YUV2BGR_YVYU: 06879 case COLOR_YUV2RGBA_YUY2: case COLOR_YUV2BGRA_YUY2: case COLOR_YUV2RGBA_YVYU: case COLOR_YUV2BGRA_YVYU: 06880 { 06881 if (dcn <= 0) 06882 dcn = (code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2RGBA_YUY2 || 06883 code==COLOR_YUV2BGRA_YUY2 || code==COLOR_YUV2RGBA_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 4 : 3; 06884 06885 bidx = (code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY || code==COLOR_YUV2BGRA_YUY2 || 06886 code==COLOR_YUV2BGR_YUY2 || code==COLOR_YUV2BGRA_YVYU || code==COLOR_YUV2BGR_YVYU) ? 0 : 2; 06887 yidx = (code==COLOR_YUV2RGB_UYVY || code==COLOR_YUV2RGBA_UYVY || code==COLOR_YUV2BGR_UYVY || code==COLOR_YUV2BGRA_UYVY) ? 1 : 0; 06888 uidx = (code==COLOR_YUV2RGB_YVYU || code==COLOR_YUV2RGBA_YVYU || 06889 code==COLOR_YUV2BGR_YVYU || code==COLOR_YUV2BGRA_YVYU) ? 2 : 0; 06890 uidx = 1 - yidx + uidx; 06891 06892 CV_Assert( dcn == 3 || dcn == 4 ); 06893 CV_Assert( scn == 2 && depth == CV_8U ); 06894 06895 k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc, 06896 opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx, 06897 src.offset % 4 == 0 && src.step % 4 == 0 ? " -D USE_OPTIMIZED_LOAD" : "")); 06898 break; 06899 } 06900 case COLOR_BGR2YCrCb: 06901 case COLOR_RGB2YCrCb: 06902 { 06903 CV_Assert(scn == 3 || scn == 4); 06904 bidx = code == COLOR_BGR2YCrCb ? 0 : 2; 06905 dcn = 3; 06906 k.create("RGB2YCrCb", ocl::imgproc::cvtcolor_oclsrc, 06907 opts + format("-D dcn=3 -D bidx=%d", bidx)); 06908 break; 06909 } 06910 case COLOR_YCrCb2BGR: 06911 case COLOR_YCrCb2RGB: 06912 { 06913 if( dcn <= 0 ) 06914 dcn = 3; 06915 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); 06916 bidx = code == COLOR_YCrCb2BGR ? 0 : 2; 06917 k.create("YCrCb2RGB", ocl::imgproc::cvtcolor_oclsrc, 06918 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx)); 06919 break; 06920 } 06921 case COLOR_BGR2XYZ: case COLOR_RGB2XYZ: 06922 { 06923 CV_Assert(scn == 3 || scn == 4); 06924 bidx = code == COLOR_BGR2XYZ ? 0 : 2; 06925 06926 UMat c; 06927 if (depth == CV_32F) 06928 { 06929 float coeffs[] = 06930 { 06931 0.412453f, 0.357580f, 0.180423f, 06932 0.212671f, 0.715160f, 0.072169f, 06933 0.019334f, 0.119193f, 0.950227f 06934 }; 06935 if (bidx == 0) 06936 { 06937 std::swap(coeffs[0], coeffs[2]); 06938 std::swap(coeffs[3], coeffs[5]); 06939 std::swap(coeffs[6], coeffs[8]); 06940 } 06941 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c); 06942 } 06943 else 06944 { 06945 int coeffs[] = 06946 { 06947 1689, 1465, 739, 06948 871, 2929, 296, 06949 79, 488, 3892 06950 }; 06951 if (bidx == 0) 06952 { 06953 std::swap(coeffs[0], coeffs[2]); 06954 std::swap(coeffs[3], coeffs[5]); 06955 std::swap(coeffs[6], coeffs[8]); 06956 } 06957 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c); 06958 } 06959 06960 _dst.create(dstSz, CV_MAKETYPE(depth, 3)); 06961 dst = _dst.getUMat(); 06962 06963 k.create("RGB2XYZ", ocl::imgproc::cvtcolor_oclsrc, 06964 opts + format("-D dcn=3 -D bidx=%d", bidx)); 06965 if (k.empty()) 06966 return false; 06967 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c)); 06968 return k.run(2, globalsize, 0, false); 06969 } 06970 case COLOR_XYZ2BGR: case COLOR_XYZ2RGB: 06971 { 06972 if (dcn <= 0) 06973 dcn = 3; 06974 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); 06975 bidx = code == COLOR_XYZ2BGR ? 0 : 2; 06976 06977 UMat c; 06978 if (depth == CV_32F) 06979 { 06980 float coeffs[] = 06981 { 06982 3.240479f, -1.53715f, -0.498535f, 06983 -0.969256f, 1.875991f, 0.041556f, 06984 0.055648f, -0.204043f, 1.057311f 06985 }; 06986 if (bidx == 0) 06987 { 06988 std::swap(coeffs[0], coeffs[6]); 06989 std::swap(coeffs[1], coeffs[7]); 06990 std::swap(coeffs[2], coeffs[8]); 06991 } 06992 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c); 06993 } 06994 else 06995 { 06996 int coeffs[] = 06997 { 06998 13273, -6296, -2042, 06999 -3970, 7684, 170, 07000 228, -836, 4331 07001 }; 07002 if (bidx == 0) 07003 { 07004 std::swap(coeffs[0], coeffs[6]); 07005 std::swap(coeffs[1], coeffs[7]); 07006 std::swap(coeffs[2], coeffs[8]); 07007 } 07008 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c); 07009 } 07010 07011 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 07012 dst = _dst.getUMat(); 07013 07014 k.create("XYZ2RGB", ocl::imgproc::cvtcolor_oclsrc, 07015 opts + format("-D dcn=%d -D bidx=%d", dcn, bidx)); 07016 if (k.empty()) 07017 return false; 07018 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c)); 07019 return k.run(2, globalsize, 0, false); 07020 } 07021 case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL: 07022 case COLOR_BGR2HLS: case COLOR_RGB2HLS: case COLOR_BGR2HLS_FULL: case COLOR_RGB2HLS_FULL: 07023 { 07024 CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F)); 07025 bidx = code == COLOR_BGR2HSV || code == COLOR_BGR2HLS || 07026 code == COLOR_BGR2HSV_FULL || code == COLOR_BGR2HLS_FULL ? 0 : 2; 07027 int hrange = depth == CV_32F ? 360 : code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || 07028 code == COLOR_BGR2HLS || code == COLOR_RGB2HLS ? 180 : 256; 07029 bool is_hsv = code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || code == COLOR_BGR2HSV_FULL || code == COLOR_RGB2HSV_FULL; 07030 String kernelName = String("RGB2") + (is_hsv ? "HSV" : "HLS"); 07031 dcn = 3; 07032 07033 if (is_hsv && depth == CV_8U) 07034 { 07035 static UMat sdiv_data; 07036 static UMat hdiv_data180; 07037 static UMat hdiv_data256; 07038 static int sdiv_table[256]; 07039 static int hdiv_table180[256]; 07040 static int hdiv_table256[256]; 07041 static volatile bool initialized180 = false, initialized256 = false; 07042 volatile bool & initialized = hrange == 180 ? initialized180 : initialized256; 07043 07044 if (!initialized) 07045 { 07046 int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12; 07047 UMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256; 07048 07049 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0; 07050 07051 int v = 255 << hsv_shift; 07052 if (!initialized180 && !initialized256) 07053 { 07054 for(int i = 1; i < 256; i++ ) 07055 sdiv_table[i] = saturate_cast<int>(v/(1.*i)); 07056 Mat(1, 256, CV_32SC1, sdiv_table).copyTo(sdiv_data); 07057 } 07058 07059 v = hrange << hsv_shift; 07060 for (int i = 1; i < 256; i++ ) 07061 hdiv_table[i] = saturate_cast<int>(v/(6.*i)); 07062 07063 Mat(1, 256, CV_32SC1, hdiv_table).copyTo(hdiv_data); 07064 initialized = true; 07065 } 07066 07067 _dst.create(dstSz, CV_8UC3); 07068 dst = _dst.getUMat(); 07069 07070 k.create("RGB2HSV", ocl::imgproc::cvtcolor_oclsrc, 07071 opts + format("-D hrange=%d -D bidx=%d -D dcn=3", 07072 hrange, bidx)); 07073 if (k.empty()) 07074 return false; 07075 07076 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), 07077 ocl::KernelArg::PtrReadOnly(sdiv_data), hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) : 07078 ocl::KernelArg::PtrReadOnly(hdiv_data180)); 07079 07080 return k.run(2, globalsize, NULL, false); 07081 } 07082 else 07083 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc, 07084 opts + format("-D hscale=%ff -D bidx=%d -D dcn=3", 07085 hrange*(1.f/360.f), bidx)); 07086 break; 07087 } 07088 case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL: 07089 case COLOR_HLS2BGR: case COLOR_HLS2RGB: case COLOR_HLS2BGR_FULL: case COLOR_HLS2RGB_FULL: 07090 { 07091 if (dcn <= 0) 07092 dcn = 3; 07093 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F)); 07094 bidx = code == COLOR_HSV2BGR || code == COLOR_HLS2BGR || 07095 code == COLOR_HSV2BGR_FULL || code == COLOR_HLS2BGR_FULL ? 0 : 2; 07096 int hrange = depth == CV_32F ? 360 : code == COLOR_HSV2BGR || code == COLOR_HSV2RGB || 07097 code == COLOR_HLS2BGR || code == COLOR_HLS2RGB ? 180 : 255; 07098 bool is_hsv = code == COLOR_HSV2BGR || code == COLOR_HSV2RGB || 07099 code == COLOR_HSV2BGR_FULL || code == COLOR_HSV2RGB_FULL; 07100 07101 String kernelName = String(is_hsv ? "HSV" : "HLS") + "2RGB"; 07102 k.create(kernelName.c_str(), ocl::imgproc::cvtcolor_oclsrc, 07103 opts + format("-D dcn=%d -D bidx=%d -D hrange=%d -D hscale=%ff", 07104 dcn, bidx, hrange, 6.f/hrange)); 07105 break; 07106 } 07107 case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA: 07108 { 07109 CV_Assert(scn == 4 && depth == CV_8U); 07110 dcn = 4; 07111 07112 k.create(code == COLOR_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA", ocl::imgproc::cvtcolor_oclsrc, 07113 opts + "-D dcn=4 -D bidx=3"); 07114 break; 07115 } 07116 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab: 07117 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv: 07118 { 07119 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) ); 07120 07121 bidx = code == CV_BGR2Lab || code == CV_LBGR2Lab || code == CV_BGR2Luv || code == CV_LBGR2Luv ? 0 : 2; 07122 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_RGB2Luv || code == CV_BGR2Luv; 07123 bool lab = code == CV_BGR2Lab || code == CV_RGB2Lab || code == CV_LBGR2Lab || code == CV_LRGB2Lab; 07124 float un, vn; 07125 dcn = 3; 07126 07127 k.create(format("BGR2%s", lab ? "Lab" : "Luv").c_str(), 07128 ocl::imgproc::cvtcolor_oclsrc, 07129 opts + format("-D dcn=%d -D bidx=%d%s", 07130 dcn, bidx, srgb ? " -D SRGB" : "")); 07131 if (k.empty()) 07132 return false; 07133 07134 initLabTabs(); 07135 07136 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 07137 dst = _dst.getUMat(); 07138 07139 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 07140 dstarg = ocl::KernelArg::WriteOnly(dst); 07141 07142 if (depth == CV_8U && lab) 07143 { 07144 static UMat usRGBGammaTab, ulinearGammaTab, uLabCbrtTab, ucoeffs; 07145 07146 if (srgb && usRGBGammaTab.empty()) 07147 Mat(1, 256, CV_16UC1, sRGBGammaTab_b).copyTo(usRGBGammaTab); 07148 else if (ulinearGammaTab.empty()) 07149 Mat(1, 256, CV_16UC1, linearGammaTab_b).copyTo(ulinearGammaTab); 07150 if (uLabCbrtTab.empty()) 07151 Mat(1, LAB_CBRT_TAB_SIZE_B, CV_16UC1, LabCbrtTab_b).copyTo(uLabCbrtTab); 07152 07153 { 07154 int coeffs[9]; 07155 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65; 07156 const float scale[] = 07157 { 07158 (1 << lab_shift)/_whitept[0], 07159 (float)(1 << lab_shift), 07160 (1 << lab_shift)/_whitept[2] 07161 }; 07162 07163 for (int i = 0; i < 3; i++ ) 07164 { 07165 coeffs[i*3+(bidx^2)] = cvRound(_coeffs[i*3]*scale[i]); 07166 coeffs[i*3+1] = cvRound(_coeffs[i*3+1]*scale[i]); 07167 coeffs[i*3+bidx] = cvRound(_coeffs[i*3+2]*scale[i]); 07168 07169 CV_Assert( coeffs[i] >= 0 && coeffs[i*3+1] >= 0 && coeffs[i*3+2] >= 0 && 07170 coeffs[i*3] + coeffs[i*3+1] + coeffs[i*3+2] < 2*(1 << lab_shift) ); 07171 } 07172 Mat(1, 9, CV_32SC1, coeffs).copyTo(ucoeffs); 07173 } 07174 07175 const int Lscale = (116*255+50)/100; 07176 const int Lshift = -((16*255*(1 << lab_shift2) + 50)/100); 07177 07178 k.args(srcarg, dstarg, 07179 ocl::KernelArg::PtrReadOnly(srgb ? usRGBGammaTab : ulinearGammaTab), 07180 ocl::KernelArg::PtrReadOnly(uLabCbrtTab), ocl::KernelArg::PtrReadOnly(ucoeffs), 07181 Lscale, Lshift); 07182 } 07183 else 07184 { 07185 static UMat usRGBGammaTab, ucoeffs, uLabCbrtTab; 07186 07187 if (srgb && usRGBGammaTab.empty()) 07188 Mat(1, GAMMA_TAB_SIZE * 4, CV_32FC1, sRGBGammaTab).copyTo(usRGBGammaTab); 07189 if (!lab && uLabCbrtTab.empty()) 07190 Mat(1, LAB_CBRT_TAB_SIZE * 4, CV_32FC1, LabCbrtTab).copyTo(uLabCbrtTab); 07191 07192 { 07193 float coeffs[9]; 07194 const float * const _coeffs = sRGB2XYZ_D65, * const _whitept = D65; 07195 float scale[] = { 1.0f / _whitept[0], 1.0f, 1.0f / _whitept[2] }; 07196 07197 for (int i = 0; i < 3; i++) 07198 { 07199 int j = i * 3; 07200 coeffs[j + (bidx ^ 2)] = _coeffs[j] * (lab ? scale[i] : 1); 07201 coeffs[j + 1] = _coeffs[j + 1] * (lab ? scale[i] : 1); 07202 coeffs[j + bidx] = _coeffs[j + 2] * (lab ? scale[i] : 1); 07203 07204 CV_Assert( coeffs[j] >= 0 && coeffs[j + 1] >= 0 && coeffs[j + 2] >= 0 && 07205 coeffs[j] + coeffs[j + 1] + coeffs[j + 2] < 1.5f*(lab ? LabCbrtTabScale : 1) ); 07206 } 07207 07208 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3); 07209 un = 13*4*_whitept[0]*d; 07210 vn = 13*9*_whitept[1]*d; 07211 07212 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs); 07213 } 07214 07215 float _1_3 = 1.0f / 3.0f, _a = 16.0f / 116.0f; 07216 ocl::KernelArg ucoeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs); 07217 07218 if (lab) 07219 { 07220 if (srgb) 07221 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab), 07222 ucoeffsarg, _1_3, _a); 07223 else 07224 k.args(srcarg, dstarg, ucoeffsarg, _1_3, _a); 07225 } 07226 else 07227 { 07228 ocl::KernelArg LabCbrtTabarg = ocl::KernelArg::PtrReadOnly(uLabCbrtTab); 07229 if (srgb) 07230 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBGammaTab), 07231 LabCbrtTabarg, ucoeffsarg, un, vn); 07232 else 07233 k.args(srcarg, dstarg, LabCbrtTabarg, ucoeffsarg, un, vn); 07234 } 07235 } 07236 07237 return k.run(dims, globalsize, NULL, false); 07238 } 07239 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB: 07240 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB: 07241 { 07242 if( dcn <= 0 ) 07243 dcn = 3; 07244 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) ); 07245 07246 bidx = code == CV_Lab2BGR || code == CV_Lab2LBGR || code == CV_Luv2BGR || code == CV_Luv2LBGR ? 0 : 2; 07247 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Luv2BGR || code == CV_Luv2RGB; 07248 bool lab = code == CV_Lab2BGR || code == CV_Lab2RGB || code == CV_Lab2LBGR || code == CV_Lab2LRGB; 07249 float un, vn; 07250 07251 k.create(format("%s2BGR", lab ? "Lab" : "Luv").c_str(), 07252 ocl::imgproc::cvtcolor_oclsrc, 07253 opts + format("-D dcn=%d -D bidx=%d%s", 07254 dcn, bidx, srgb ? " -D SRGB" : "")); 07255 if (k.empty()) 07256 return false; 07257 07258 initLabTabs(); 07259 static UMat ucoeffs, usRGBInvGammaTab; 07260 07261 if (srgb && usRGBInvGammaTab.empty()) 07262 Mat(1, GAMMA_TAB_SIZE*4, CV_32FC1, sRGBInvGammaTab).copyTo(usRGBInvGammaTab); 07263 07264 { 07265 float coeffs[9]; 07266 const float * const _coeffs = XYZ2sRGB_D65, * const _whitept = D65; 07267 07268 for( int i = 0; i < 3; i++ ) 07269 { 07270 coeffs[i+(bidx^2)*3] = _coeffs[i] * (lab ? _whitept[i] : 1); 07271 coeffs[i+3] = _coeffs[i+3] * (lab ? _whitept[i] : 1); 07272 coeffs[i+bidx*3] = _coeffs[i+6] * (lab ? _whitept[i] : 1); 07273 } 07274 07275 float d = 1.f/(_whitept[0] + _whitept[1]*15 + _whitept[2]*3); 07276 un = 4*_whitept[0]*d; 07277 vn = 9*_whitept[1]*d; 07278 07279 Mat(1, 9, CV_32FC1, coeffs).copyTo(ucoeffs); 07280 } 07281 07282 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 07283 dst = _dst.getUMat(); 07284 07285 float lThresh = 0.008856f * 903.3f; 07286 float fThresh = 7.787f * 0.008856f + 16.0f / 116.0f; 07287 07288 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 07289 dstarg = ocl::KernelArg::WriteOnly(dst), 07290 coeffsarg = ocl::KernelArg::PtrReadOnly(ucoeffs); 07291 07292 if (lab) 07293 { 07294 if (srgb) 07295 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab), 07296 coeffsarg, lThresh, fThresh); 07297 else 07298 k.args(srcarg, dstarg, coeffsarg, lThresh, fThresh); 07299 } 07300 else 07301 { 07302 if (srgb) 07303 k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(usRGBInvGammaTab), 07304 coeffsarg, un, vn); 07305 else 07306 k.args(srcarg, dstarg, coeffsarg, un, vn); 07307 } 07308 07309 return k.run(dims, globalsize, NULL, false); 07310 } 07311 default: 07312 break; 07313 } 07314 07315 if( !k.empty() ) 07316 { 07317 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 07318 dst = _dst.getUMat(); 07319 k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst)); 07320 ok = k.run(dims, globalsize, NULL, false); 07321 } 07322 return ok; 07323 } 07324 07325 #endif 07326 07327 #ifdef HAVE_IPP 07328 static bool ipp_cvtColor( Mat &src, OutputArray _dst, int code, int dcn ) 07329 { 07330 int stype = src.type(); 07331 int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype); 07332 07333 Mat dst; 07334 Size sz = src.size(); 07335 07336 switch( code ) 07337 { 07338 #if IPP_VERSION_X100 >= 700 07339 case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR: 07340 case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: 07341 CV_Assert( scn == 3 || scn == 4 ); 07342 dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3; 07343 _dst.create( sz, CV_MAKETYPE(depth, dcn)); 07344 dst = _dst.getMat(); 07345 07346 if( code == CV_BGR2BGRA) 07347 { 07348 if ( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) ) 07349 return true; 07350 } 07351 else if( code == CV_BGRA2BGR ) 07352 { 07353 if ( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiCopyAC4C3RTab[depth])) ) 07354 return true; 07355 } 07356 else if( code == CV_BGR2RGBA ) 07357 { 07358 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 2, 1, 0)) ) 07359 return true; 07360 } 07361 else if( code == CV_RGBA2BGR ) 07362 { 07363 if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC4C3RTab[depth], 2, 1, 0)) ) 07364 return true; 07365 } 07366 else if( code == CV_RGB2BGR ) 07367 { 07368 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) ) 07369 return true; 07370 } 07371 #if IPP_VERSION_X100 >= 810 07372 else if( code == CV_RGBA2BGRA ) 07373 { 07374 if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) ) 07375 return true; 07376 } 07377 #endif 07378 return false; 07379 #endif 07380 07381 #if IPP_DISABLE_BLOCK // breaks OCL accuracy tests 07382 case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555: 07383 case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555: 07384 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U ); 07385 _dst.create(sz, CV_8UC2); 07386 dst = _dst.getMat(); 07387 07388 CV_SUPPRESS_DEPRECATED_START 07389 07390 if (code == CV_BGR2BGR565 && scn == 3) 07391 { 07392 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R))) 07393 return true; 07394 } 07395 else if (code == CV_BGRA2BGR565 && scn == 4) 07396 { 07397 if (CvtColorIPPLoopCopy(src, dst, 07398 IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 07399 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 0, 1, 2, depth))) 07400 return true; 07401 } 07402 else if (code == CV_RGB2BGR565 && scn == 3) 07403 { 07404 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], 07405 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) ) 07406 return true; 07407 } 07408 else if (code == CV_RGBA2BGR565 && scn == 4) 07409 { 07410 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 07411 (ippiGeneralFunc)ippiBGRToBGR565_8u16u_C3R, 2, 1, 0, depth)) ) 07412 return true; 07413 } 07414 CV_SUPPRESS_DEPRECATED_END 07415 return false; 07416 #endif 07417 07418 #if IPP_VERSION_X100 < 900 07419 case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB: 07420 case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA: 07421 if(dcn <= 0) dcn = (code==CV_BGR5652BGRA || code==CV_BGR5552BGRA || code==CV_BGR5652RGBA || code==CV_BGR5552RGBA) ? 4 : 3; 07422 CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U ); 07423 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 07424 dst = _dst.getMat(); 07425 07426 CV_SUPPRESS_DEPRECATED_START 07427 if (code == CV_BGR5652BGR && dcn == 3) 07428 { 07429 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R))) 07430 return true; 07431 } 07432 else if (code == CV_BGR5652RGB && dcn == 3) 07433 { 07434 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R, 07435 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth))) 07436 return true; 07437 } 07438 else if (code == CV_BGR5652BGRA && dcn == 4) 07439 { 07440 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R, 07441 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth))) 07442 return true; 07443 } 07444 else if (code == CV_BGR5652RGBA && dcn == 4) 07445 { 07446 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiBGR565ToBGR_16u8u_C3R, 07447 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth))) 07448 return true; 07449 } 07450 CV_SUPPRESS_DEPRECATED_END 07451 return false; 07452 #endif 07453 07454 #if IPP_VERSION_X100 >= 700 07455 case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY: 07456 CV_Assert( scn == 3 || scn == 4 ); 07457 _dst.create(sz, CV_MAKETYPE(depth, 1)); 07458 dst = _dst.getMat(); 07459 07460 if( code == CV_BGR2GRAY && depth == CV_32F ) 07461 { 07462 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) ) 07463 return true; 07464 } 07465 else if( code == CV_RGB2GRAY && depth == CV_32F ) 07466 { 07467 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) ) 07468 return true; 07469 } 07470 else if( code == CV_BGRA2GRAY && depth == CV_32F ) 07471 { 07472 if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC4Tab[depth])) ) 07473 return true; 07474 } 07475 else if( code == CV_RGBA2GRAY && depth == CV_32F ) 07476 { 07477 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC4Tab[depth])) ) 07478 return true; 07479 } 07480 return false; 07481 07482 case CV_GRAY2BGR: case CV_GRAY2BGRA: 07483 if( dcn <= 0 ) dcn = (code==CV_GRAY2BGRA) ? 4 : 3; 07484 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4)); 07485 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 07486 dst = _dst.getMat(); 07487 07488 if( code == CV_GRAY2BGR ) 07489 { 07490 if( CvtColorIPPLoop(src, dst, IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) ) 07491 return true; 07492 } 07493 else if( code == CV_GRAY2BGRA ) 07494 { 07495 if( CvtColorIPPLoop(src, dst, IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) ) 07496 return true; 07497 } 07498 return false; 07499 #endif 07500 07501 #if IPP_DISABLE_BLOCK 07502 case CV_BGR2YCrCb: case CV_RGB2YCrCb: 07503 case CV_BGR2YUV: case CV_RGB2YUV: 07504 { 07505 CV_Assert( scn == 3 || scn == 4 ); 07506 static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f }; 07507 static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 }; 07508 const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f; 07509 const int* coeffs_i = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_i; 07510 07511 _dst.create(sz, CV_MAKETYPE(depth, 3)); 07512 dst = _dst.getMat(); 07513 07514 if (code == CV_RGB2YUV && scn == 3 && depth == CV_8U) 07515 { 07516 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiRGBToYUV_8u_C3R))) 07517 return true; 07518 } 07519 else if (code == CV_BGR2YUV && scn == 3 && depth == CV_8U) 07520 { 07521 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], 07522 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth))) 07523 return true; 07524 } 07525 else if (code == CV_RGB2YUV && scn == 4 && depth == CV_8U) 07526 { 07527 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 07528 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 0, 1, 2, depth))) 07529 return true; 07530 } 07531 else if (code == CV_BGR2YUV && scn == 4 && depth == CV_8U) 07532 { 07533 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 07534 (ippiGeneralFunc)ippiRGBToYUV_8u_C3R, 2, 1, 0, depth))) 07535 return true; 07536 } 07537 return false; 07538 } 07539 #endif 07540 07541 #if IPP_DISABLE_BLOCK 07542 case CV_YCrCb2BGR: case CV_YCrCb2RGB: 07543 case CV_YUV2BGR: case CV_YUV2RGB: 07544 { 07545 if( dcn <= 0 ) dcn = 3; 07546 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); 07547 static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f }; 07548 static const int yuv_i[] = { 33292, -6472, -9519, 18678 }; 07549 const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f; 07550 const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_i; 07551 07552 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 07553 dst = _dst.getMat(); 07554 07555 if (code == CV_YUV2RGB && dcn == 3 && depth == CV_8U) 07556 { 07557 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R))) 07558 return true; 07559 } 07560 else if (code == CV_YUV2BGR && dcn == 3 && depth == CV_8U) 07561 { 07562 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, 07563 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth))) 07564 return true; 07565 } 07566 else if (code == CV_YUV2RGB && dcn == 4 && depth == CV_8U) 07567 { 07568 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, 07569 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth))) 07570 return true; 07571 } 07572 else if (code == CV_YUV2BGR && dcn == 4 && depth == CV_8U) 07573 { 07574 if (CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiYUVToRGB_8u_C3R, 07575 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth))) 07576 return true; 07577 } 07578 return false; 07579 } 07580 #endif 07581 07582 #if IPP_VERSION_X100 >= 700 07583 case CV_BGR2XYZ: case CV_RGB2XYZ: 07584 CV_Assert( scn == 3 || scn == 4 ); 07585 _dst.create(sz, CV_MAKETYPE(depth, 3)); 07586 dst = _dst.getMat(); 07587 07588 if( code == CV_BGR2XYZ && scn == 3 && depth != CV_32F ) 07589 { 07590 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) ) 07591 return true; 07592 } 07593 else if( code == CV_BGR2XYZ && scn == 4 && depth != CV_32F ) 07594 { 07595 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) ) 07596 return true; 07597 } 07598 else if( code == CV_RGB2XYZ && scn == 3 && depth != CV_32F ) 07599 { 07600 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2XYZTab[depth])) ) 07601 return true; 07602 } 07603 else if( code == CV_RGB2XYZ && scn == 4 && depth != CV_32F ) 07604 { 07605 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 0, 1, 2, depth)) ) 07606 return true; 07607 } 07608 return false; 07609 #endif 07610 07611 #if IPP_VERSION_X100 >= 700 07612 case CV_XYZ2BGR: case CV_XYZ2RGB: 07613 if( dcn <= 0 ) dcn = 3; 07614 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); 07615 07616 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 07617 dst = _dst.getMat(); 07618 07619 if( code == CV_XYZ2BGR && dcn == 3 && depth != CV_32F ) 07620 { 07621 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 07622 return true; 07623 } 07624 else if( code == CV_XYZ2BGR && dcn == 4 && depth != CV_32F ) 07625 { 07626 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 07627 return true; 07628 } 07629 if( code == CV_XYZ2RGB && dcn == 3 && depth != CV_32F ) 07630 { 07631 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiXYZ2RGBTab[depth])) ) 07632 return true; 07633 } 07634 else if( code == CV_XYZ2RGB && dcn == 4 && depth != CV_32F ) 07635 { 07636 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 07637 return true; 07638 } 07639 return false; 07640 #endif 07641 07642 #if IPP_VERSION_X100 >= 700 07643 case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: 07644 case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: 07645 { 07646 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) ); 07647 _dst.create(sz, CV_MAKETYPE(depth, 3)); 07648 dst = _dst.getMat(); 07649 07650 if( depth == CV_8U || depth == CV_16U ) 07651 { 07652 #if IPP_DISABLE_BLOCK // breaks OCL accuracy tests 07653 if( code == CV_BGR2HSV_FULL && scn == 3 ) 07654 { 07655 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) 07656 return true; 07657 } 07658 else if( code == CV_BGR2HSV_FULL && scn == 4 ) 07659 { 07660 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) 07661 return true; 07662 } 07663 else if( code == CV_RGB2HSV_FULL && scn == 4 ) 07664 { 07665 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) ) 07666 return true; 07667 } else 07668 #endif 07669 if( code == CV_RGB2HSV_FULL && scn == 3 && depth == CV_16U ) 07670 { 07671 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HSVTab[depth])) ) 07672 return true; 07673 } 07674 else if( code == CV_BGR2HLS_FULL && scn == 3 ) 07675 { 07676 if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) 07677 return true; 07678 } 07679 else if( code == CV_BGR2HLS_FULL && scn == 4 ) 07680 { 07681 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) 07682 return true; 07683 } 07684 else if( code == CV_RGB2HLS_FULL && scn == 3 ) 07685 { 07686 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HLSTab[depth])) ) 07687 return true; 07688 } 07689 else if( code == CV_RGB2HLS_FULL && scn == 4 ) 07690 { 07691 if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) ) 07692 return true; 07693 } 07694 } 07695 return false; 07696 } 07697 #endif 07698 07699 #if IPP_VERSION_X100 >= 700 07700 case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL: 07701 case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL: 07702 { 07703 if( dcn <= 0 ) dcn = 3; 07704 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) ); 07705 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 07706 dst = _dst.getMat(); 07707 07708 if( depth == CV_8U || depth == CV_16U ) 07709 { 07710 if( code == CV_HSV2BGR_FULL && dcn == 3 ) 07711 { 07712 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 07713 return true; 07714 } 07715 else if( code == CV_HSV2BGR_FULL && dcn == 4 ) 07716 { 07717 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 07718 return true; 07719 } 07720 else if( code == CV_HSV2RGB_FULL && dcn == 3 ) 07721 { 07722 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHSV2RGBTab[depth])) ) 07723 return true; 07724 } 07725 else if( code == CV_HSV2RGB_FULL && dcn == 4 ) 07726 { 07727 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 07728 return true; 07729 } 07730 else if( code == CV_HLS2BGR_FULL && dcn == 3 ) 07731 { 07732 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 07733 return true; 07734 } 07735 else if( code == CV_HLS2BGR_FULL && dcn == 4 ) 07736 { 07737 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 07738 return true; 07739 } 07740 else if( code == CV_HLS2RGB_FULL && dcn == 3 ) 07741 { 07742 if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHLS2RGBTab[depth])) ) 07743 return true; 07744 } 07745 else if( code == CV_HLS2RGB_FULL && dcn == 4 ) 07746 { 07747 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 07748 return true; 07749 } 07750 } 07751 return false; 07752 } 07753 #endif 07754 07755 #if IPP_DISABLE_BLOCK 07756 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab: 07757 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv: 07758 { 07759 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) ); 07760 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab || 07761 code == CV_BGR2Luv || code == CV_RGB2Luv; 07762 07763 _dst.create(sz, CV_MAKETYPE(depth, 3)); 07764 dst = _dst.getMat(); 07765 07766 if (code == CV_LBGR2Lab && scn == 3 && depth == CV_8U) 07767 { 07768 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiBGRToLab_8u_C3R))) 07769 return true; 07770 } 07771 else if (code == CV_LBGR2Lab && scn == 4 && depth == CV_8U) 07772 { 07773 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 07774 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 0, 1, 2, depth))) 07775 return true; 07776 } 07777 else 07778 if (code == CV_LRGB2Lab && scn == 3 && depth == CV_8U) // slower than OpenCV 07779 { 07780 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], 07781 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth))) 07782 return true; 07783 } 07784 else if (code == CV_LRGB2Lab && scn == 4 && depth == CV_8U) // slower than OpenCV 07785 { 07786 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 07787 (ippiGeneralFunc)ippiBGRToLab_8u_C3R, 2, 1, 0, depth))) 07788 return true; 07789 } 07790 else if (code == CV_LRGB2Luv && scn == 3) 07791 { 07792 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGBToLUVTab[depth]))) 07793 return true; 07794 } 07795 else if (code == CV_LRGB2Luv && scn == 4) 07796 { 07797 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 07798 ippiRGBToLUVTab[depth], 0, 1, 2, depth))) 07799 return true; 07800 } 07801 else if (code == CV_LBGR2Luv && scn == 3) 07802 { 07803 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], 07804 ippiRGBToLUVTab[depth], 2, 1, 0, depth))) 07805 return true; 07806 } 07807 else if (code == CV_LBGR2Luv && scn == 4) 07808 { 07809 if (CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], 07810 ippiRGBToLUVTab[depth], 2, 1, 0, depth))) 07811 return true; 07812 } 07813 return false; 07814 } 07815 #endif 07816 07817 #if IPP_DISABLE_BLOCK 07818 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB: 07819 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB: 07820 { 07821 if( dcn <= 0 ) dcn = 3; 07822 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) ); 07823 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB || 07824 code == CV_Luv2BGR || code == CV_Luv2RGB; 07825 07826 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 07827 dst = _dst.getMat(); 07828 07829 if( code == CV_Lab2LBGR && dcn == 3 && depth == CV_8U) 07830 { 07831 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R)) ) 07832 return true; 07833 } 07834 else if( code == CV_Lab2LBGR && dcn == 4 && depth == CV_8U ) 07835 { 07836 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R, 07837 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 07838 return true; 07839 } 07840 if( code == CV_Lab2LRGB && dcn == 3 && depth == CV_8U ) 07841 { 07842 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R, 07843 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 07844 return true; 07845 } 07846 else if( code == CV_Lab2LRGB && dcn == 4 && depth == CV_8U ) 07847 { 07848 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor((ippiGeneralFunc)ippiLabToBGR_8u_C3R, 07849 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 07850 return true; 07851 } 07852 if( code == CV_Luv2LRGB && dcn == 3 ) 07853 { 07854 if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiLUVToRGBTab[depth])) ) 07855 return true; 07856 } 07857 else if( code == CV_Luv2LRGB && dcn == 4 ) 07858 { 07859 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth], 07860 ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) 07861 return true; 07862 } 07863 if( code == CV_Luv2LBGR && dcn == 3 ) 07864 { 07865 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth], 07866 ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) 07867 return true; 07868 } 07869 else if( code == CV_Luv2LBGR && dcn == 4 ) 07870 { 07871 if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiLUVToRGBTab[depth], 07872 ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) 07873 return true; 07874 } 07875 return false; 07876 } 07877 #endif 07878 07879 case CV_YUV2GRAY_420: 07880 { 07881 if (dcn <= 0) dcn = 1; 07882 07883 CV_Assert( dcn == 1 ); 07884 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 07885 07886 Size dstSz(sz.width, sz.height * 2 / 3); 07887 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 07888 dst = _dst.getMat(); 07889 07890 if (ippStsNoErr == ippiCopy_8u_C1R(src.data, (int)src.step, dst.data, (int)dst.step, 07891 ippiSize(dstSz.width, dstSz.height))) 07892 return true; 07893 return false; 07894 } 07895 07896 case CV_RGBA2mRGBA: 07897 { 07898 if (dcn <= 0) dcn = 4; 07899 CV_Assert( scn == 4 && dcn == 4 ); 07900 07901 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 07902 dst = _dst.getMat(); 07903 07904 if( depth == CV_8U ) 07905 { 07906 if (CvtColorIPPLoop(src, dst, IPPGeneralFunctor((ippiGeneralFunc)ippiAlphaPremul_8u_AC4R))) 07907 return true; 07908 return false; 07909 } 07910 07911 return false; 07912 } 07913 07914 default: 07915 return false; 07916 } 07917 } 07918 #endif 07919 } 07920 07921 ////////////////////////////////////////////////////////////////////////////////////////// 07922 // The main function // 07923 ////////////////////////////////////////////////////////////////////////////////////////// 07924 07925 void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) 07926 { 07927 int stype = _src.type(); 07928 int scn = CV_MAT_CN(stype), depth = CV_MAT_DEPTH(stype), bidx; 07929 07930 #ifdef HAVE_OPENCL 07931 CV_OCL_RUN( _src.dims() <= 2 && _dst.isUMat() && !(depth == CV_8U && (code == CV_Luv2BGR || code == CV_Luv2RGB)), 07932 ocl_cvtColor(_src, _dst, code, dcn) ) 07933 #endif 07934 07935 Mat src = _src.getMat(), dst; 07936 Size sz = src.size(); 07937 CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32F ); 07938 07939 CV_IPP_RUN(true, ipp_cvtColor(src, _dst, code, dcn)); 07940 07941 switch( code ) 07942 { 07943 case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR: 07944 case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: 07945 CV_Assert( scn == 3 || scn == 4 ); 07946 dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3; 07947 bidx = code == CV_BGR2BGRA || code == CV_BGRA2BGR ? 0 : 2; 07948 07949 _dst.create( sz, CV_MAKETYPE(depth, dcn)); 07950 dst = _dst.getMat(); 07951 07952 if( depth == CV_8U ) 07953 { 07954 #ifdef HAVE_TEGRA_OPTIMIZATION 07955 if(tegra::useTegra() && tegra::cvtBGR2RGB(src, dst, bidx)) 07956 break; 07957 #endif 07958 CvtColorLoop(src, dst, RGB2RGB<uchar>(scn, dcn, bidx)); 07959 } 07960 else if( depth == CV_16U ) 07961 CvtColorLoop(src, dst, RGB2RGB<ushort>(scn, dcn, bidx)); 07962 else 07963 CvtColorLoop(src, dst, RGB2RGB<float>(scn, dcn, bidx)); 07964 break; 07965 07966 case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555: 07967 case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555: 07968 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U ); 07969 _dst.create(sz, CV_8UC2); 07970 dst = _dst.getMat(); 07971 07972 #ifdef HAVE_TEGRA_OPTIMIZATION 07973 if(code == CV_BGR2BGR565 || code == CV_BGRA2BGR565 || code == CV_RGB2BGR565 || code == CV_RGBA2BGR565) 07974 if(tegra::useTegra() && tegra::cvtRGB2RGB565(src, dst, code == CV_RGB2BGR565 || code == CV_RGBA2BGR565 ? 0 : 2)) 07975 break; 07976 #endif 07977 07978 CvtColorLoop(src, dst, RGB2RGB5x5(scn, 07979 code == CV_BGR2BGR565 || code == CV_BGR2BGR555 || 07980 code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2, 07981 code == CV_BGR2BGR565 || code == CV_RGB2BGR565 || 07982 code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5 // green bits 07983 )); 07984 break; 07985 07986 case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB: 07987 case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA: 07988 if(dcn <= 0) dcn = (code==CV_BGR5652BGRA || code==CV_BGR5552BGRA || code==CV_BGR5652RGBA || code==CV_BGR5552RGBA) ? 4 : 3; 07989 CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U ); 07990 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 07991 dst = _dst.getMat(); 07992 07993 CvtColorLoop(src, dst, RGB5x52RGB(dcn, 07994 code == CV_BGR5652BGR || code == CV_BGR5552BGR || 07995 code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2, // blue idx 07996 code == CV_BGR5652BGR || code == CV_BGR5652RGB || 07997 code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5 // green bits 07998 )); 07999 break; 08000 08001 case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY: 08002 CV_Assert( scn == 3 || scn == 4 ); 08003 _dst.create(sz, CV_MAKETYPE(depth, 1)); 08004 dst = _dst.getMat(); 08005 08006 bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; 08007 08008 if( depth == CV_8U ) 08009 { 08010 #ifdef HAVE_TEGRA_OPTIMIZATION 08011 if(tegra::useTegra() && tegra::cvtRGB2Gray(src, dst, bidx)) 08012 break; 08013 #endif 08014 CvtColorLoop(src, dst, RGB2Gray<uchar>(scn, bidx, 0)); 08015 } 08016 else if( depth == CV_16U ) 08017 CvtColorLoop(src, dst, RGB2Gray<ushort>(scn, bidx, 0)); 08018 else 08019 CvtColorLoop(src, dst, RGB2Gray<float>(scn, bidx, 0)); 08020 break; 08021 08022 case CV_BGR5652GRAY: case CV_BGR5552GRAY: 08023 CV_Assert( scn == 2 && depth == CV_8U ); 08024 _dst.create(sz, CV_8UC1); 08025 dst = _dst.getMat(); 08026 08027 CvtColorLoop(src, dst, RGB5x52Gray(code == CV_BGR5652GRAY ? 6 : 5)); 08028 break; 08029 08030 case CV_GRAY2BGR: case CV_GRAY2BGRA: 08031 if( dcn <= 0 ) dcn = (code==CV_GRAY2BGRA) ? 4 : 3; 08032 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4)); 08033 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 08034 dst = _dst.getMat(); 08035 08036 if( depth == CV_8U ) 08037 { 08038 #ifdef HAVE_TEGRA_OPTIMIZATION 08039 if(tegra::useTegra() && tegra::cvtGray2RGB(src, dst)) 08040 break; 08041 #endif 08042 CvtColorLoop(src, dst, Gray2RGB<uchar>(dcn)); 08043 } 08044 else if( depth == CV_16U ) 08045 CvtColorLoop(src, dst, Gray2RGB<ushort>(dcn)); 08046 else 08047 CvtColorLoop(src, dst, Gray2RGB<float>(dcn)); 08048 break; 08049 08050 case CV_GRAY2BGR565: case CV_GRAY2BGR555: 08051 CV_Assert( scn == 1 && depth == CV_8U ); 08052 _dst.create(sz, CV_8UC2); 08053 dst = _dst.getMat(); 08054 08055 CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5)); 08056 break; 08057 08058 case CV_BGR2YCrCb: case CV_RGB2YCrCb: 08059 case CV_BGR2YUV: case CV_RGB2YUV: 08060 { 08061 CV_Assert( scn == 3 || scn == 4 ); 08062 bidx = code == CV_BGR2YCrCb || code == CV_BGR2YUV ? 0 : 2; 08063 static const float yuv_f[] = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f }; 08064 static const int yuv_i[] = { B2Y, G2Y, R2Y, 8061, 14369 }; 08065 const float* coeffs_f = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_f; 08066 const int* coeffs_i = code == CV_BGR2YCrCb || code == CV_RGB2YCrCb ? 0 : yuv_i; 08067 08068 _dst.create(sz, CV_MAKETYPE(depth, 3)); 08069 dst = _dst.getMat(); 08070 08071 if( depth == CV_8U ) 08072 { 08073 #ifdef HAVE_TEGRA_OPTIMIZATION 08074 if((code == CV_RGB2YCrCb || code == CV_BGR2YCrCb) && tegra::useTegra() && tegra::cvtRGB2YCrCb(src, dst, bidx)) 08075 break; 08076 #endif 08077 CvtColorLoop(src, dst, RGB2YCrCb_i<uchar>(scn, bidx, coeffs_i)); 08078 } 08079 else if( depth == CV_16U ) 08080 CvtColorLoop(src, dst, RGB2YCrCb_i<ushort>(scn, bidx, coeffs_i)); 08081 else 08082 CvtColorLoop(src, dst, RGB2YCrCb_f<float>(scn, bidx, coeffs_f)); 08083 } 08084 break; 08085 08086 case CV_YCrCb2BGR: case CV_YCrCb2RGB: 08087 case CV_YUV2BGR: case CV_YUV2RGB: 08088 { 08089 if( dcn <= 0 ) dcn = 3; 08090 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); 08091 bidx = code == CV_YCrCb2BGR || code == CV_YUV2BGR ? 0 : 2; 08092 static const float yuv_f[] = { 2.032f, -0.395f, -0.581f, 1.140f }; 08093 static const int yuv_i[] = { 33292, -6472, -9519, 18678 }; 08094 const float* coeffs_f = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_f; 08095 const int* coeffs_i = code == CV_YCrCb2BGR || code == CV_YCrCb2RGB ? 0 : yuv_i; 08096 08097 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 08098 dst = _dst.getMat(); 08099 08100 if( depth == CV_8U ) 08101 CvtColorLoop(src, dst, YCrCb2RGB_i<uchar>(dcn, bidx, coeffs_i)); 08102 else if( depth == CV_16U ) 08103 CvtColorLoop(src, dst, YCrCb2RGB_i<ushort>(dcn, bidx, coeffs_i)); 08104 else 08105 CvtColorLoop(src, dst, YCrCb2RGB_f<float>(dcn, bidx, coeffs_f)); 08106 } 08107 break; 08108 08109 case CV_BGR2XYZ: case CV_RGB2XYZ: 08110 CV_Assert( scn == 3 || scn == 4 ); 08111 bidx = code == CV_BGR2XYZ ? 0 : 2; 08112 08113 _dst.create(sz, CV_MAKETYPE(depth, 3)); 08114 dst = _dst.getMat(); 08115 08116 if( depth == CV_8U ) 08117 CvtColorLoop(src, dst, RGB2XYZ_i<uchar>(scn, bidx, 0)); 08118 else if( depth == CV_16U ) 08119 CvtColorLoop(src, dst, RGB2XYZ_i<ushort>(scn, bidx, 0)); 08120 else 08121 CvtColorLoop(src, dst, RGB2XYZ_f<float>(scn, bidx, 0)); 08122 break; 08123 08124 case CV_XYZ2BGR: case CV_XYZ2RGB: 08125 if( dcn <= 0 ) dcn = 3; 08126 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); 08127 bidx = code == CV_XYZ2BGR ? 0 : 2; 08128 08129 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 08130 dst = _dst.getMat(); 08131 08132 if( depth == CV_8U ) 08133 CvtColorLoop(src, dst, XYZ2RGB_i<uchar>(dcn, bidx, 0)); 08134 else if( depth == CV_16U ) 08135 CvtColorLoop(src, dst, XYZ2RGB_i<ushort>(dcn, bidx, 0)); 08136 else 08137 CvtColorLoop(src, dst, XYZ2RGB_f<float>(dcn, bidx, 0)); 08138 break; 08139 08140 case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: 08141 case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: 08142 { 08143 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) ); 08144 bidx = code == CV_BGR2HSV || code == CV_BGR2HLS || 08145 code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2; 08146 int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV || 08147 code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256; 08148 08149 _dst.create(sz, CV_MAKETYPE(depth, 3)); 08150 dst = _dst.getMat(); 08151 08152 if( code == CV_BGR2HSV || code == CV_RGB2HSV || 08153 code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL ) 08154 { 08155 #ifdef HAVE_TEGRA_OPTIMIZATION 08156 if(tegra::useTegra() && tegra::cvtRGB2HSV(src, dst, bidx, hrange)) 08157 break; 08158 #endif 08159 if( depth == CV_8U ) 08160 CvtColorLoop(src, dst, RGB2HSV_b(scn, bidx, hrange)); 08161 else 08162 CvtColorLoop(src, dst, RGB2HSV_f(scn, bidx, (float)hrange)); 08163 } 08164 else 08165 { 08166 if( depth == CV_8U ) 08167 CvtColorLoop(src, dst, RGB2HLS_b(scn, bidx, hrange)); 08168 else 08169 CvtColorLoop(src, dst, RGB2HLS_f(scn, bidx, (float)hrange)); 08170 } 08171 } 08172 break; 08173 08174 case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL: 08175 case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL: 08176 { 08177 if( dcn <= 0 ) dcn = 3; 08178 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) ); 08179 bidx = code == CV_HSV2BGR || code == CV_HLS2BGR || 08180 code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2; 08181 int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB || 08182 code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255; 08183 08184 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 08185 dst = _dst.getMat(); 08186 08187 if( code == CV_HSV2BGR || code == CV_HSV2RGB || 08188 code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL ) 08189 { 08190 if( depth == CV_8U ) 08191 CvtColorLoop(src, dst, HSV2RGB_b(dcn, bidx, hrange)); 08192 else 08193 CvtColorLoop(src, dst, HSV2RGB_f(dcn, bidx, (float)hrange)); 08194 } 08195 else 08196 { 08197 if( depth == CV_8U ) 08198 CvtColorLoop(src, dst, HLS2RGB_b(dcn, bidx, hrange)); 08199 else 08200 CvtColorLoop(src, dst, HLS2RGB_f(dcn, bidx, (float)hrange)); 08201 } 08202 } 08203 break; 08204 08205 case CV_BGR2Lab: case CV_RGB2Lab: case CV_LBGR2Lab: case CV_LRGB2Lab: 08206 case CV_BGR2Luv: case CV_RGB2Luv: case CV_LBGR2Luv: case CV_LRGB2Luv: 08207 { 08208 CV_Assert( (scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F) ); 08209 bidx = code == CV_BGR2Lab || code == CV_BGR2Luv || 08210 code == CV_LBGR2Lab || code == CV_LBGR2Luv ? 0 : 2; 08211 bool srgb = code == CV_BGR2Lab || code == CV_RGB2Lab || 08212 code == CV_BGR2Luv || code == CV_RGB2Luv; 08213 08214 _dst.create(sz, CV_MAKETYPE(depth, 3)); 08215 dst = _dst.getMat(); 08216 08217 if( code == CV_BGR2Lab || code == CV_RGB2Lab || 08218 code == CV_LBGR2Lab || code == CV_LRGB2Lab ) 08219 { 08220 if( depth == CV_8U ) 08221 CvtColorLoop(src, dst, RGB2Lab_b(scn, bidx, 0, 0, srgb)); 08222 else 08223 CvtColorLoop(src, dst, RGB2Lab_f(scn, bidx, 0, 0, srgb)); 08224 } 08225 else 08226 { 08227 if( depth == CV_8U ) 08228 CvtColorLoop(src, dst, RGB2Luv_b(scn, bidx, 0, 0, srgb)); 08229 else 08230 CvtColorLoop(src, dst, RGB2Luv_f(scn, bidx, 0, 0, srgb)); 08231 } 08232 } 08233 break; 08234 08235 case CV_Lab2BGR: case CV_Lab2RGB: case CV_Lab2LBGR: case CV_Lab2LRGB: 08236 case CV_Luv2BGR: case CV_Luv2RGB: case CV_Luv2LBGR: case CV_Luv2LRGB: 08237 { 08238 if( dcn <= 0 ) dcn = 3; 08239 CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F) ); 08240 bidx = code == CV_Lab2BGR || code == CV_Luv2BGR || 08241 code == CV_Lab2LBGR || code == CV_Luv2LBGR ? 0 : 2; 08242 bool srgb = code == CV_Lab2BGR || code == CV_Lab2RGB || 08243 code == CV_Luv2BGR || code == CV_Luv2RGB; 08244 08245 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 08246 dst = _dst.getMat(); 08247 08248 if( code == CV_Lab2BGR || code == CV_Lab2RGB || 08249 code == CV_Lab2LBGR || code == CV_Lab2LRGB ) 08250 { 08251 if( depth == CV_8U ) 08252 CvtColorLoop(src, dst, Lab2RGB_b(dcn, bidx, 0, 0, srgb)); 08253 else 08254 CvtColorLoop(src, dst, Lab2RGB_f(dcn, bidx, 0, 0, srgb)); 08255 } 08256 else 08257 { 08258 if( depth == CV_8U ) 08259 CvtColorLoop(src, dst, Luv2RGB_b(dcn, bidx, 0, 0, srgb)); 08260 else 08261 CvtColorLoop(src, dst, Luv2RGB_f(dcn, bidx, 0, 0, srgb)); 08262 } 08263 } 08264 break; 08265 08266 case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY: 08267 case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR: 08268 case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG: 08269 case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA: 08270 demosaicing(src, _dst, code, dcn); 08271 break; 08272 08273 case CV_YUV2BGR_NV21: case CV_YUV2RGB_NV21: case CV_YUV2BGR_NV12: case CV_YUV2RGB_NV12: 08274 case CV_YUV2BGRA_NV21: case CV_YUV2RGBA_NV21: case CV_YUV2BGRA_NV12: case CV_YUV2RGBA_NV12: 08275 { 08276 // http://www.fourcc.org/yuv.php#NV21 == yuv420sp -> a plane of 8 bit Y samples followed by an interleaved V/U plane containing 8 bit 2x2 subsampled chroma samples 08277 // http://www.fourcc.org/yuv.php#NV12 -> a plane of 8 bit Y samples followed by an interleaved U/V plane containing 8 bit 2x2 subsampled colour difference samples 08278 08279 if (dcn <= 0) dcn = (code==CV_YUV420sp2BGRA || code==CV_YUV420sp2RGBA || code==CV_YUV2BGRA_NV12 || code==CV_YUV2RGBA_NV12) ? 4 : 3; 08280 const int bIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2BGR_NV12 || code==CV_YUV2BGRA_NV12) ? 0 : 2; 08281 const int uIdx = (code==CV_YUV2BGR_NV21 || code==CV_YUV2BGRA_NV21 || code==CV_YUV2RGB_NV21 || code==CV_YUV2RGBA_NV21) ? 1 : 0; 08282 08283 CV_Assert( dcn == 3 || dcn == 4 ); 08284 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 08285 08286 Size dstSz(sz.width, sz.height * 2 / 3); 08287 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 08288 dst = _dst.getMat(); 08289 08290 int srcstep = (int)src.step; 08291 const uchar* y = src.ptr(); 08292 const uchar* uv = y + srcstep * dstSz.height; 08293 08294 switch(dcn*100 + bIdx * 10 + uIdx) 08295 { 08296 case 300: cvtYUV420sp2RGB<0, 0> (dst, srcstep, y, uv); break; 08297 case 301: cvtYUV420sp2RGB<0, 1> (dst, srcstep, y, uv); break; 08298 case 320: cvtYUV420sp2RGB<2, 0> (dst, srcstep, y, uv); break; 08299 case 321: cvtYUV420sp2RGB<2, 1> (dst, srcstep, y, uv); break; 08300 case 400: cvtYUV420sp2RGBA<0, 0>(dst, srcstep, y, uv); break; 08301 case 401: cvtYUV420sp2RGBA<0, 1>(dst, srcstep, y, uv); break; 08302 case 420: cvtYUV420sp2RGBA<2, 0>(dst, srcstep, y, uv); break; 08303 case 421: cvtYUV420sp2RGBA<2, 1>(dst, srcstep, y, uv); break; 08304 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break; 08305 }; 08306 } 08307 break; 08308 case CV_YUV2BGR_YV12: case CV_YUV2RGB_YV12: case CV_YUV2BGRA_YV12: case CV_YUV2RGBA_YV12: 08309 case CV_YUV2BGR_IYUV: case CV_YUV2RGB_IYUV: case CV_YUV2BGRA_IYUV: case CV_YUV2RGBA_IYUV: 08310 { 08311 //http://www.fourcc.org/yuv.php#YV12 == yuv420p -> It comprises an NxM Y plane followed by (N/2)x(M/2) V and U planes. 08312 //http://www.fourcc.org/yuv.php#IYUV == I420 -> It comprises an NxN Y plane followed by (N/2)x(N/2) U and V planes 08313 08314 if (dcn <= 0) dcn = (code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12 || code==CV_YUV2RGBA_IYUV || code==CV_YUV2BGRA_IYUV) ? 4 : 3; 08315 const int bIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2BGR_IYUV || code==CV_YUV2BGRA_IYUV) ? 0 : 2; 08316 const int uIdx = (code==CV_YUV2BGR_YV12 || code==CV_YUV2RGB_YV12 || code==CV_YUV2BGRA_YV12 || code==CV_YUV2RGBA_YV12) ? 1 : 0; 08317 08318 CV_Assert( dcn == 3 || dcn == 4 ); 08319 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 08320 08321 Size dstSz(sz.width, sz.height * 2 / 3); 08322 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 08323 dst = _dst.getMat(); 08324 08325 int srcstep = (int)src.step; 08326 const uchar* y = src.ptr(); 08327 const uchar* u = y + srcstep * dstSz.height; 08328 const uchar* v = y + srcstep * (dstSz.height + dstSz.height/4) + (dstSz.width/2) * ((dstSz.height % 4)/2); 08329 08330 int ustepIdx = 0; 08331 int vstepIdx = dstSz.height % 4 == 2 ? 1 : 0; 08332 08333 if(uIdx == 1) { std::swap(u ,v), std::swap(ustepIdx, vstepIdx); } 08334 08335 switch(dcn*10 + bIdx) 08336 { 08337 case 30: cvtYUV420p2RGB<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break; 08338 case 32: cvtYUV420p2RGB<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break; 08339 case 40: cvtYUV420p2RGBA<0>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break; 08340 case 42: cvtYUV420p2RGBA<2>(dst, srcstep, y, u, v, ustepIdx, vstepIdx); break; 08341 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break; 08342 }; 08343 } 08344 break; 08345 case CV_YUV2GRAY_420: 08346 { 08347 if (dcn <= 0) dcn = 1; 08348 08349 CV_Assert( dcn == 1 ); 08350 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); 08351 08352 Size dstSz(sz.width, sz.height * 2 / 3); 08353 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 08354 dst = _dst.getMat(); 08355 src(Range(0, dstSz.height), Range::all()).copyTo(dst); 08356 } 08357 break; 08358 case CV_RGB2YUV_YV12: case CV_BGR2YUV_YV12: case CV_RGBA2YUV_YV12: case CV_BGRA2YUV_YV12: 08359 case CV_RGB2YUV_IYUV: case CV_BGR2YUV_IYUV: case CV_RGBA2YUV_IYUV: case CV_BGRA2YUV_IYUV: 08360 { 08361 if (dcn <= 0) dcn = 1; 08362 const int bIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_BGR2YUV_YV12 || code == CV_BGRA2YUV_YV12) ? 0 : 2; 08363 const int uIdx = (code == CV_BGR2YUV_IYUV || code == CV_BGRA2YUV_IYUV || code == CV_RGB2YUV_IYUV || code == CV_RGBA2YUV_IYUV) ? 1 : 2; 08364 08365 CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U ); 08366 CV_Assert( dcn == 1 ); 08367 CV_Assert( sz.width % 2 == 0 && sz.height % 2 == 0 ); 08368 08369 Size dstSz(sz.width, sz.height / 2 * 3); 08370 _dst.create(dstSz, CV_MAKETYPE(depth, dcn)); 08371 dst = _dst.getMat(); 08372 08373 switch(bIdx + uIdx*10) 08374 { 08375 case 10: cvtRGBtoYUV420p<0, 1>(src, dst); break; 08376 case 12: cvtRGBtoYUV420p<2, 1>(src, dst); break; 08377 case 20: cvtRGBtoYUV420p<0, 2>(src, dst); break; 08378 case 22: cvtRGBtoYUV420p<2, 2>(src, dst); break; 08379 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break; 08380 }; 08381 } 08382 break; 08383 case CV_YUV2RGB_UYVY: case CV_YUV2BGR_UYVY: case CV_YUV2RGBA_UYVY: case CV_YUV2BGRA_UYVY: 08384 case CV_YUV2RGB_YUY2: case CV_YUV2BGR_YUY2: case CV_YUV2RGB_YVYU: case CV_YUV2BGR_YVYU: 08385 case CV_YUV2RGBA_YUY2: case CV_YUV2BGRA_YUY2: case CV_YUV2RGBA_YVYU: case CV_YUV2BGRA_YVYU: 08386 { 08387 //http://www.fourcc.org/yuv.php#UYVY 08388 //http://www.fourcc.org/yuv.php#YUY2 08389 //http://www.fourcc.org/yuv.php#YVYU 08390 08391 if (dcn <= 0) dcn = (code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2RGBA_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 4 : 3; 08392 const int bIdx = (code==CV_YUV2BGR_UYVY || code==CV_YUV2BGRA_UYVY || code==CV_YUV2BGR_YUY2 || code==CV_YUV2BGRA_YUY2 || code==CV_YUV2BGR_YVYU || code==CV_YUV2BGRA_YVYU) ? 0 : 2; 08393 const int ycn = (code==CV_YUV2RGB_UYVY || code==CV_YUV2BGR_UYVY || code==CV_YUV2RGBA_UYVY || code==CV_YUV2BGRA_UYVY) ? 1 : 0; 08394 const int uIdx = (code==CV_YUV2RGB_YVYU || code==CV_YUV2BGR_YVYU || code==CV_YUV2RGBA_YVYU || code==CV_YUV2BGRA_YVYU) ? 1 : 0; 08395 08396 CV_Assert( dcn == 3 || dcn == 4 ); 08397 CV_Assert( scn == 2 && depth == CV_8U ); 08398 08399 _dst.create(sz, CV_8UC(dcn)); 08400 dst = _dst.getMat(); 08401 08402 switch(dcn*1000 + bIdx*100 + uIdx*10 + ycn) 08403 { 08404 case 3000: cvtYUV422toRGB<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break; 08405 case 3001: cvtYUV422toRGB<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break; 08406 case 3010: cvtYUV422toRGB<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break; 08407 case 3011: cvtYUV422toRGB<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break; 08408 case 3200: cvtYUV422toRGB<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break; 08409 case 3201: cvtYUV422toRGB<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break; 08410 case 3210: cvtYUV422toRGB<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break; 08411 case 3211: cvtYUV422toRGB<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break; 08412 case 4000: cvtYUV422toRGBA<0,0,0>(dst, (int)src.step, src.ptr<uchar>()); break; 08413 case 4001: cvtYUV422toRGBA<0,0,1>(dst, (int)src.step, src.ptr<uchar>()); break; 08414 case 4010: cvtYUV422toRGBA<0,1,0>(dst, (int)src.step, src.ptr<uchar>()); break; 08415 case 4011: cvtYUV422toRGBA<0,1,1>(dst, (int)src.step, src.ptr<uchar>()); break; 08416 case 4200: cvtYUV422toRGBA<2,0,0>(dst, (int)src.step, src.ptr<uchar>()); break; 08417 case 4201: cvtYUV422toRGBA<2,0,1>(dst, (int)src.step, src.ptr<uchar>()); break; 08418 case 4210: cvtYUV422toRGBA<2,1,0>(dst, (int)src.step, src.ptr<uchar>()); break; 08419 case 4211: cvtYUV422toRGBA<2,1,1>(dst, (int)src.step, src.ptr<uchar>()); break; 08420 default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); break; 08421 }; 08422 } 08423 break; 08424 case CV_YUV2GRAY_UYVY: case CV_YUV2GRAY_YUY2: 08425 { 08426 if (dcn <= 0) dcn = 1; 08427 08428 CV_Assert( dcn == 1 ); 08429 CV_Assert( scn == 2 && depth == CV_8U ); 08430 08431 src.release(); // T-API datarace fixup 08432 extractChannel(_src, _dst, code == CV_YUV2GRAY_UYVY ? 1 : 0); 08433 } 08434 break; 08435 case CV_RGBA2mRGBA: 08436 { 08437 if (dcn <= 0) dcn = 4; 08438 CV_Assert( scn == 4 && dcn == 4 ); 08439 08440 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 08441 dst = _dst.getMat(); 08442 08443 if( depth == CV_8U ) 08444 { 08445 CvtColorLoop(src, dst, RGBA2mRGBA<uchar>()); 08446 } 08447 else 08448 { 08449 CV_Error( CV_StsBadArg, "Unsupported image depth" ); 08450 } 08451 } 08452 break; 08453 case CV_mRGBA2RGBA: 08454 { 08455 if (dcn <= 0) dcn = 4; 08456 CV_Assert( scn == 4 && dcn == 4 ); 08457 08458 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 08459 dst = _dst.getMat(); 08460 08461 if( depth == CV_8U ) 08462 CvtColorLoop(src, dst, mRGBA2RGBA<uchar>()); 08463 else 08464 { 08465 CV_Error( CV_StsBadArg, "Unsupported image depth" ); 08466 } 08467 } 08468 break; 08469 default: 08470 CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); 08471 } 08472 } 08473 08474 CV_IMPL void 08475 cvCvtColor( const CvArr* srcarr, CvArr* dstarr, int code ) 08476 { 08477 cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0; 08478 CV_Assert( src.depth() == dst.depth() ); 08479 08480 cv::cvtColor(src, dst, code, dst.channels()); 08481 CV_Assert( dst.data == dst0.data ); 08482 } 08483 08484 08485 /* End of file. */ 08486
Generated on Tue Jul 12 2022 15:17:20 by
1.7.2
