the do / gr-peach-opencv-project

Fork of gr-peach-opencv-project by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers morph.cpp Source File

morph.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #include "precomp.hpp"
00044 #include <limits.h>
00045 #include "opencl_kernels_imgproc.hpp"
00046 
00047 /****************************************************************************************\
00048                      Basic Morphological Operations: Erosion & Dilation
00049 \****************************************************************************************/
00050 
00051 namespace cv
00052 {
00053 
00054 template<typename T> struct MinOp
00055 {
00056     typedef T type1;
00057     typedef T type2;
00058     typedef T rtype;
00059     T operator ()(const T a, const T b) const { return std::min(a, b); }
00060 };
00061 
00062 template<typename T> struct MaxOp
00063 {
00064     typedef T type1;
00065     typedef T type2;
00066     typedef T rtype;
00067     T operator ()(const T a, const T b) const { return std::max(a, b); }
00068 };
00069 
00070 #undef CV_MIN_8U
00071 #undef CV_MAX_8U
00072 #define CV_MIN_8U(a,b)       ((a) - CV_FAST_CAST_8U((a) - (b)))
00073 #define CV_MAX_8U(a,b)       ((a) + CV_FAST_CAST_8U((b) - (a)))
00074 
00075 template<> inline uchar MinOp<uchar>::operator ()(const uchar a, const uchar b) const { return CV_MIN_8U(a, b); }
00076 template<> inline uchar MaxOp<uchar>::operator ()(const uchar a, const uchar b) const { return CV_MAX_8U(a, b); }
00077 
00078 struct MorphRowNoVec
00079 {
00080     MorphRowNoVec(int, int) {}
00081     int operator()(const uchar*, uchar*, int, int) const { return 0; }
00082 };
00083 
00084 struct MorphColumnNoVec
00085 {
00086     MorphColumnNoVec(int, int) {}
00087     int operator()(const uchar**, uchar*, int, int, int) const { return 0; }
00088 };
00089 
00090 struct MorphNoVec
00091 {
00092     int operator()(uchar**, int, uchar*, int) const { return 0; }
00093 };
00094 
00095 #if CV_SSE2
00096 
00097 template<class VecUpdate> struct MorphRowIVec
00098 {
00099     enum { ESZ = VecUpdate::ESZ };
00100 
00101     MorphRowIVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
00102     int operator()(const uchar* src, uchar* dst, int width, int cn) const
00103     {
00104         if( !checkHardwareSupport(CV_CPU_SSE2) )
00105             return 0;
00106 
00107         cn *= ESZ;
00108         int i, k, _ksize = ksize*cn;
00109         width = (width & -4)*cn;
00110         VecUpdate updateOp;
00111 
00112         for( i = 0; i <= width - 16; i += 16 )
00113         {
00114             __m128i s = _mm_loadu_si128((const __m128i*)(src + i));
00115             for( k = cn; k < _ksize; k += cn )
00116             {
00117                 __m128i x = _mm_loadu_si128((const __m128i*)(src + i + k));
00118                 s = updateOp(s, x);
00119             }
00120             _mm_storeu_si128((__m128i*)(dst + i), s);
00121         }
00122 
00123         for( ; i < width; i += 4 )
00124         {
00125             __m128i s = _mm_cvtsi32_si128(*(const int*)(src + i));
00126             for( k = cn; k < _ksize; k += cn )
00127             {
00128                 __m128i x = _mm_cvtsi32_si128(*(const int*)(src + i + k));
00129                 s = updateOp(s, x);
00130             }
00131             *(int*)(dst + i) = _mm_cvtsi128_si32(s);
00132         }
00133 
00134         return i/ESZ;
00135     }
00136 
00137     int ksize, anchor;
00138 };
00139 
00140 
00141 template<class VecUpdate> struct MorphRowFVec
00142 {
00143     MorphRowFVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
00144     int operator()(const uchar* src, uchar* dst, int width, int cn) const
00145     {
00146         if( !checkHardwareSupport(CV_CPU_SSE) )
00147             return 0;
00148 
00149         int i, k, _ksize = ksize*cn;
00150         width = (width & -4)*cn;
00151         VecUpdate updateOp;
00152 
00153         for( i = 0; i < width; i += 4 )
00154         {
00155             __m128 s = _mm_loadu_ps((const float*)src + i);
00156             for( k = cn; k < _ksize; k += cn )
00157             {
00158                 __m128 x = _mm_loadu_ps((const float*)src + i + k);
00159                 s = updateOp(s, x);
00160             }
00161             _mm_storeu_ps((float*)dst + i, s);
00162         }
00163 
00164         return i;
00165     }
00166 
00167     int ksize, anchor;
00168 };
00169 
00170 
00171 template<class VecUpdate> struct MorphColumnIVec
00172 {
00173     enum { ESZ = VecUpdate::ESZ };
00174 
00175     MorphColumnIVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
00176     int operator()(const uchar** src, uchar* dst, int dststep, int count, int width) const
00177     {
00178         if( !checkHardwareSupport(CV_CPU_SSE2) )
00179             return 0;
00180 
00181         int i = 0, k, _ksize = ksize;
00182         width *= ESZ;
00183         VecUpdate updateOp;
00184 
00185         for( i = 0; i < count + ksize - 1; i++ )
00186             CV_Assert( ((size_t)src[i] & 15) == 0 );
00187 
00188         for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 )
00189         {
00190             for( i = 0; i <= width - 32; i += 32 )
00191             {
00192                 const uchar* sptr = src[1] + i;
00193                 __m128i s0 = _mm_load_si128((const __m128i*)sptr);
00194                 __m128i s1 = _mm_load_si128((const __m128i*)(sptr + 16));
00195                 __m128i x0, x1;
00196 
00197                 for( k = 2; k < _ksize; k++ )
00198                 {
00199                     sptr = src[k] + i;
00200                     x0 = _mm_load_si128((const __m128i*)sptr);
00201                     x1 = _mm_load_si128((const __m128i*)(sptr + 16));
00202                     s0 = updateOp(s0, x0);
00203                     s1 = updateOp(s1, x1);
00204                 }
00205 
00206                 sptr = src[0] + i;
00207                 x0 = _mm_load_si128((const __m128i*)sptr);
00208                 x1 = _mm_load_si128((const __m128i*)(sptr + 16));
00209                 _mm_storeu_si128((__m128i*)(dst + i), updateOp(s0, x0));
00210                 _mm_storeu_si128((__m128i*)(dst + i + 16), updateOp(s1, x1));
00211 
00212                 sptr = src[k] + i;
00213                 x0 = _mm_load_si128((const __m128i*)sptr);
00214                 x1 = _mm_load_si128((const __m128i*)(sptr + 16));
00215                 _mm_storeu_si128((__m128i*)(dst + dststep + i), updateOp(s0, x0));
00216                 _mm_storeu_si128((__m128i*)(dst + dststep + i + 16), updateOp(s1, x1));
00217             }
00218 
00219             for( ; i <= width - 8; i += 8 )
00220             {
00221                 __m128i s0 = _mm_loadl_epi64((const __m128i*)(src[1] + i)), x0;
00222 
00223                 for( k = 2; k < _ksize; k++ )
00224                 {
00225                     x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
00226                     s0 = updateOp(s0, x0);
00227                 }
00228 
00229                 x0 = _mm_loadl_epi64((const __m128i*)(src[0] + i));
00230                 _mm_storel_epi64((__m128i*)(dst + i), updateOp(s0, x0));
00231                 x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
00232                 _mm_storel_epi64((__m128i*)(dst + dststep + i), updateOp(s0, x0));
00233             }
00234         }
00235 
00236         for( ; count > 0; count--, dst += dststep, src++ )
00237         {
00238             for( i = 0; i <= width - 32; i += 32 )
00239             {
00240                 const uchar* sptr = src[0] + i;
00241                 __m128i s0 = _mm_load_si128((const __m128i*)sptr);
00242                 __m128i s1 = _mm_load_si128((const __m128i*)(sptr + 16));
00243                 __m128i x0, x1;
00244 
00245                 for( k = 1; k < _ksize; k++ )
00246                 {
00247                     sptr = src[k] + i;
00248                     x0 = _mm_load_si128((const __m128i*)sptr);
00249                     x1 = _mm_load_si128((const __m128i*)(sptr + 16));
00250                     s0 = updateOp(s0, x0);
00251                     s1 = updateOp(s1, x1);
00252                 }
00253                 _mm_storeu_si128((__m128i*)(dst + i), s0);
00254                 _mm_storeu_si128((__m128i*)(dst + i + 16), s1);
00255             }
00256 
00257             for( ; i <= width - 8; i += 8 )
00258             {
00259                 __m128i s0 = _mm_loadl_epi64((const __m128i*)(src[0] + i)), x0;
00260 
00261                 for( k = 1; k < _ksize; k++ )
00262                 {
00263                     x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
00264                     s0 = updateOp(s0, x0);
00265                 }
00266                 _mm_storel_epi64((__m128i*)(dst + i), s0);
00267             }
00268         }
00269 
00270         return i/ESZ;
00271     }
00272 
00273     int ksize, anchor;
00274 };
00275 
00276 
00277 template<class VecUpdate> struct MorphColumnFVec
00278 {
00279     MorphColumnFVec(int _ksize, int _anchor) : ksize(_ksize), anchor(_anchor) {}
00280     int operator()(const uchar** _src, uchar* _dst, int dststep, int count, int width) const
00281     {
00282         if( !checkHardwareSupport(CV_CPU_SSE) )
00283             return 0;
00284 
00285         int i = 0, k, _ksize = ksize;
00286         VecUpdate updateOp;
00287 
00288         for( i = 0; i < count + ksize - 1; i++ )
00289             CV_Assert( ((size_t)_src[i] & 15) == 0 );
00290 
00291         const float** src = (const float**)_src;
00292         float* dst = (float*)_dst;
00293         dststep /= sizeof(dst[0]);
00294 
00295         for( ; _ksize > 1 && count > 1; count -= 2, dst += dststep*2, src += 2 )
00296         {
00297             for( i = 0; i <= width - 16; i += 16 )
00298             {
00299                 const float* sptr = src[1] + i;
00300                 __m128 s0 = _mm_load_ps(sptr);
00301                 __m128 s1 = _mm_load_ps(sptr + 4);
00302                 __m128 s2 = _mm_load_ps(sptr + 8);
00303                 __m128 s3 = _mm_load_ps(sptr + 12);
00304                 __m128 x0, x1, x2, x3;
00305 
00306                 for( k = 2; k < _ksize; k++ )
00307                 {
00308                     sptr = src[k] + i;
00309                     x0 = _mm_load_ps(sptr);
00310                     x1 = _mm_load_ps(sptr + 4);
00311                     s0 = updateOp(s0, x0);
00312                     s1 = updateOp(s1, x1);
00313                     x2 = _mm_load_ps(sptr + 8);
00314                     x3 = _mm_load_ps(sptr + 12);
00315                     s2 = updateOp(s2, x2);
00316                     s3 = updateOp(s3, x3);
00317                 }
00318 
00319                 sptr = src[0] + i;
00320                 x0 = _mm_load_ps(sptr);
00321                 x1 = _mm_load_ps(sptr + 4);
00322                 x2 = _mm_load_ps(sptr + 8);
00323                 x3 = _mm_load_ps(sptr + 12);
00324                 _mm_storeu_ps(dst + i, updateOp(s0, x0));
00325                 _mm_storeu_ps(dst + i + 4, updateOp(s1, x1));
00326                 _mm_storeu_ps(dst + i + 8, updateOp(s2, x2));
00327                 _mm_storeu_ps(dst + i + 12, updateOp(s3, x3));
00328 
00329                 sptr = src[k] + i;
00330                 x0 = _mm_load_ps(sptr);
00331                 x1 = _mm_load_ps(sptr + 4);
00332                 x2 = _mm_load_ps(sptr + 8);
00333                 x3 = _mm_load_ps(sptr + 12);
00334                 _mm_storeu_ps(dst + dststep + i, updateOp(s0, x0));
00335                 _mm_storeu_ps(dst + dststep + i + 4, updateOp(s1, x1));
00336                 _mm_storeu_ps(dst + dststep + i + 8, updateOp(s2, x2));
00337                 _mm_storeu_ps(dst + dststep + i + 12, updateOp(s3, x3));
00338             }
00339 
00340             for( ; i <= width - 4; i += 4 )
00341             {
00342                 __m128 s0 = _mm_load_ps(src[1] + i), x0;
00343 
00344                 for( k = 2; k < _ksize; k++ )
00345                 {
00346                     x0 = _mm_load_ps(src[k] + i);
00347                     s0 = updateOp(s0, x0);
00348                 }
00349 
00350                 x0 = _mm_load_ps(src[0] + i);
00351                 _mm_storeu_ps(dst + i, updateOp(s0, x0));
00352                 x0 = _mm_load_ps(src[k] + i);
00353                 _mm_storeu_ps(dst + dststep + i, updateOp(s0, x0));
00354             }
00355         }
00356 
00357         for( ; count > 0; count--, dst += dststep, src++ )
00358         {
00359             for( i = 0; i <= width - 16; i += 16 )
00360             {
00361                 const float* sptr = src[0] + i;
00362                 __m128 s0 = _mm_load_ps(sptr);
00363                 __m128 s1 = _mm_load_ps(sptr + 4);
00364                 __m128 s2 = _mm_load_ps(sptr + 8);
00365                 __m128 s3 = _mm_load_ps(sptr + 12);
00366                 __m128 x0, x1, x2, x3;
00367 
00368                 for( k = 1; k < _ksize; k++ )
00369                 {
00370                     sptr = src[k] + i;
00371                     x0 = _mm_load_ps(sptr);
00372                     x1 = _mm_load_ps(sptr + 4);
00373                     s0 = updateOp(s0, x0);
00374                     s1 = updateOp(s1, x1);
00375                     x2 = _mm_load_ps(sptr + 8);
00376                     x3 = _mm_load_ps(sptr + 12);
00377                     s2 = updateOp(s2, x2);
00378                     s3 = updateOp(s3, x3);
00379                 }
00380                 _mm_storeu_ps(dst + i, s0);
00381                 _mm_storeu_ps(dst + i + 4, s1);
00382                 _mm_storeu_ps(dst + i + 8, s2);
00383                 _mm_storeu_ps(dst + i + 12, s3);
00384             }
00385 
00386             for( i = 0; i <= width - 4; i += 4 )
00387             {
00388                 __m128 s0 = _mm_load_ps(src[0] + i), x0;
00389                 for( k = 1; k < _ksize; k++ )
00390                 {
00391                     x0 = _mm_load_ps(src[k] + i);
00392                     s0 = updateOp(s0, x0);
00393                 }
00394                 _mm_storeu_ps(dst + i, s0);
00395             }
00396         }
00397 
00398         return i;
00399     }
00400 
00401     int ksize, anchor;
00402 };
00403 
00404 
00405 template<class VecUpdate> struct MorphIVec
00406 {
00407     enum { ESZ = VecUpdate::ESZ };
00408 
00409     int operator()(uchar** src, int nz, uchar* dst, int width) const
00410     {
00411         if( !checkHardwareSupport(CV_CPU_SSE2) )
00412             return 0;
00413 
00414         int i, k;
00415         width *= ESZ;
00416         VecUpdate updateOp;
00417 
00418         for( i = 0; i <= width - 32; i += 32 )
00419         {
00420             const uchar* sptr = src[0] + i;
00421             __m128i s0 = _mm_loadu_si128((const __m128i*)sptr);
00422             __m128i s1 = _mm_loadu_si128((const __m128i*)(sptr + 16));
00423             __m128i x0, x1;
00424 
00425             for( k = 1; k < nz; k++ )
00426             {
00427                 sptr = src[k] + i;
00428                 x0 = _mm_loadu_si128((const __m128i*)sptr);
00429                 x1 = _mm_loadu_si128((const __m128i*)(sptr + 16));
00430                 s0 = updateOp(s0, x0);
00431                 s1 = updateOp(s1, x1);
00432             }
00433             _mm_storeu_si128((__m128i*)(dst + i), s0);
00434             _mm_storeu_si128((__m128i*)(dst + i + 16), s1);
00435         }
00436 
00437         for( ; i <= width - 8; i += 8 )
00438         {
00439             __m128i s0 = _mm_loadl_epi64((const __m128i*)(src[0] + i)), x0;
00440 
00441             for( k = 1; k < nz; k++ )
00442             {
00443                 x0 = _mm_loadl_epi64((const __m128i*)(src[k] + i));
00444                 s0 = updateOp(s0, x0);
00445             }
00446             _mm_storel_epi64((__m128i*)(dst + i), s0);
00447         }
00448 
00449         return i/ESZ;
00450     }
00451 };
00452 
00453 
00454 template<class VecUpdate> struct MorphFVec
00455 {
00456     int operator()(uchar** _src, int nz, uchar* _dst, int width) const
00457     {
00458         if( !checkHardwareSupport(CV_CPU_SSE) )
00459             return 0;
00460 
00461         const float** src = (const float**)_src;
00462         float* dst = (float*)_dst;
00463         int i, k;
00464         VecUpdate updateOp;
00465 
00466         for( i = 0; i <= width - 16; i += 16 )
00467         {
00468             const float* sptr = src[0] + i;
00469             __m128 s0 = _mm_loadu_ps(sptr);
00470             __m128 s1 = _mm_loadu_ps(sptr + 4);
00471             __m128 s2 = _mm_loadu_ps(sptr + 8);
00472             __m128 s3 = _mm_loadu_ps(sptr + 12);
00473             __m128 x0, x1, x2, x3;
00474 
00475             for( k = 1; k < nz; k++ )
00476             {
00477                 sptr = src[k] + i;
00478                 x0 = _mm_loadu_ps(sptr);
00479                 x1 = _mm_loadu_ps(sptr + 4);
00480                 x2 = _mm_loadu_ps(sptr + 8);
00481                 x3 = _mm_loadu_ps(sptr + 12);
00482                 s0 = updateOp(s0, x0);
00483                 s1 = updateOp(s1, x1);
00484                 s2 = updateOp(s2, x2);
00485                 s3 = updateOp(s3, x3);
00486             }
00487             _mm_storeu_ps(dst + i, s0);
00488             _mm_storeu_ps(dst + i + 4, s1);
00489             _mm_storeu_ps(dst + i + 8, s2);
00490             _mm_storeu_ps(dst + i + 12, s3);
00491         }
00492 
00493         for( ; i <= width - 4; i += 4 )
00494         {
00495             __m128 s0 = _mm_loadu_ps(src[0] + i), x0;
00496 
00497             for( k = 1; k < nz; k++ )
00498             {
00499                 x0 = _mm_loadu_ps(src[k] + i);
00500                 s0 = updateOp(s0, x0);
00501             }
00502             _mm_storeu_ps(dst + i, s0);
00503         }
00504 
00505         for( ; i < width; i++ )
00506         {
00507             __m128 s0 = _mm_load_ss(src[0] + i), x0;
00508 
00509             for( k = 1; k < nz; k++ )
00510             {
00511                 x0 = _mm_load_ss(src[k] + i);
00512                 s0 = updateOp(s0, x0);
00513             }
00514             _mm_store_ss(dst + i, s0);
00515         }
00516 
00517         return i;
00518     }
00519 };
00520 
00521 struct VMin8u
00522 {
00523     enum { ESZ = 1 };
00524     __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_min_epu8(a,b); }
00525 };
00526 struct VMax8u
00527 {
00528     enum { ESZ = 1 };
00529     __m128i operator()(const __m128i& a, const __m128i& b) const { return _mm_max_epu8(a,b); }
00530 };
00531 struct VMin16u
00532 {
00533     enum { ESZ = 2 };
00534     __m128i operator()(const __m128i& a, const __m128i& b) const
00535     { return _mm_subs_epu16(a,_mm_subs_epu16(a,b)); }
00536 };
00537 struct VMax16u
00538 {
00539     enum { ESZ = 2 };
00540     __m128i operator()(const __m128i& a, const __m128i& b) const
00541     { return _mm_adds_epu16(_mm_subs_epu16(a,b), b); }
00542 };
00543 struct VMin16s
00544 {
00545     enum { ESZ = 2 };
00546     __m128i operator()(const __m128i& a, const __m128i& b) const
00547     { return _mm_min_epi16(a, b); }
00548 };
00549 struct VMax16s
00550 {
00551     enum { ESZ = 2 };
00552     __m128i operator()(const __m128i& a, const __m128i& b) const
00553     { return _mm_max_epi16(a, b); }
00554 };
00555 struct VMin32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_min_ps(a,b); }};
00556 struct VMax32f { __m128 operator()(const __m128& a, const __m128& b) const { return _mm_max_ps(a,b); }};
00557 
00558 typedef MorphRowIVec<VMin8u> ErodeRowVec8u;
00559 typedef MorphRowIVec<VMax8u> DilateRowVec8u;
00560 typedef MorphRowIVec<VMin16u> ErodeRowVec16u;
00561 typedef MorphRowIVec<VMax16u> DilateRowVec16u;
00562 typedef MorphRowIVec<VMin16s> ErodeRowVec16s;
00563 typedef MorphRowIVec<VMax16s> DilateRowVec16s;
00564 typedef MorphRowFVec<VMin32f> ErodeRowVec32f;
00565 typedef MorphRowFVec<VMax32f> DilateRowVec32f;
00566 
00567 typedef MorphColumnIVec<VMin8u> ErodeColumnVec8u;
00568 typedef MorphColumnIVec<VMax8u> DilateColumnVec8u;
00569 typedef MorphColumnIVec<VMin16u> ErodeColumnVec16u;
00570 typedef MorphColumnIVec<VMax16u> DilateColumnVec16u;
00571 typedef MorphColumnIVec<VMin16s> ErodeColumnVec16s;
00572 typedef MorphColumnIVec<VMax16s> DilateColumnVec16s;
00573 typedef MorphColumnFVec<VMin32f> ErodeColumnVec32f;
00574 typedef MorphColumnFVec<VMax32f> DilateColumnVec32f;
00575 
00576 typedef MorphIVec<VMin8u> ErodeVec8u;
00577 typedef MorphIVec<VMax8u> DilateVec8u;
00578 typedef MorphIVec<VMin16u> ErodeVec16u;
00579 typedef MorphIVec<VMax16u> DilateVec16u;
00580 typedef MorphIVec<VMin16s> ErodeVec16s;
00581 typedef MorphIVec<VMax16s> DilateVec16s;
00582 typedef MorphFVec<VMin32f> ErodeVec32f;
00583 typedef MorphFVec<VMax32f> DilateVec32f;
00584 
00585 #else
00586 
00587 #ifdef HAVE_TEGRA_OPTIMIZATION
00588 using tegra::ErodeRowVec8u;
00589 using tegra::DilateRowVec8u;
00590 
00591 using tegra::ErodeColumnVec8u;
00592 using tegra::DilateColumnVec8u;
00593 #else
00594 typedef MorphRowNoVec ErodeRowVec8u;
00595 typedef MorphRowNoVec DilateRowVec8u;
00596 
00597 typedef MorphColumnNoVec ErodeColumnVec8u;
00598 typedef MorphColumnNoVec DilateColumnVec8u;
00599 #endif
00600 
00601 typedef MorphRowNoVec ErodeRowVec16u;
00602 typedef MorphRowNoVec DilateRowVec16u;
00603 typedef MorphRowNoVec ErodeRowVec16s;
00604 typedef MorphRowNoVec DilateRowVec16s;
00605 typedef MorphRowNoVec ErodeRowVec32f;
00606 typedef MorphRowNoVec DilateRowVec32f;
00607 
00608 typedef MorphColumnNoVec ErodeColumnVec16u;
00609 typedef MorphColumnNoVec DilateColumnVec16u;
00610 typedef MorphColumnNoVec ErodeColumnVec16s;
00611 typedef MorphColumnNoVec DilateColumnVec16s;
00612 typedef MorphColumnNoVec ErodeColumnVec32f;
00613 typedef MorphColumnNoVec DilateColumnVec32f;
00614 
00615 typedef MorphNoVec ErodeVec8u;
00616 typedef MorphNoVec DilateVec8u;
00617 typedef MorphNoVec ErodeVec16u;
00618 typedef MorphNoVec DilateVec16u;
00619 typedef MorphNoVec ErodeVec16s;
00620 typedef MorphNoVec DilateVec16s;
00621 typedef MorphNoVec ErodeVec32f;
00622 typedef MorphNoVec DilateVec32f;
00623 
00624 #endif
00625 
00626 typedef MorphRowNoVec ErodeRowVec64f;
00627 typedef MorphRowNoVec DilateRowVec64f;
00628 typedef MorphColumnNoVec ErodeColumnVec64f;
00629 typedef MorphColumnNoVec DilateColumnVec64f;
00630 typedef MorphNoVec ErodeVec64f;
00631 typedef MorphNoVec DilateVec64f;
00632 
00633 
00634 template<class Op, class VecOp> struct MorphRowFilter : public BaseRowFilter
00635 {
00636     typedef typename Op::rtype T;
00637 
00638     MorphRowFilter( int _ksize, int _anchor ) : vecOp(_ksize, _anchor)
00639     {
00640         ksize = _ksize;
00641         anchor = _anchor;
00642     }
00643 
00644     void operator()(const uchar* src, uchar* dst, int width, int cn)
00645     {
00646         int i, j, k, _ksize = ksize*cn;
00647         const T* S = (const T*)src;
00648         Op op;
00649         T* D = (T*)dst;
00650 
00651         if( _ksize == cn )
00652         {
00653             for( i = 0; i < width*cn; i++ )
00654                 D[i] = S[i];
00655             return;
00656         }
00657 
00658         int i0 = vecOp(src, dst, width, cn);
00659         width *= cn;
00660 
00661         for( k = 0; k < cn; k++, S++, D++ )
00662         {
00663             for( i = i0; i <= width - cn*2; i += cn*2 )
00664             {
00665                 const T* s = S + i;
00666                 T m = s[cn];
00667                 for( j = cn*2; j < _ksize; j += cn )
00668                     m = op(m, s[j]);
00669                 D[i] = op(m, s[0]);
00670                 D[i+cn] = op(m, s[j]);
00671             }
00672 
00673             for( ; i < width; i += cn )
00674             {
00675                 const T* s = S + i;
00676                 T m = s[0];
00677                 for( j = cn; j < _ksize; j += cn )
00678                     m = op(m, s[j]);
00679                 D[i] = m;
00680             }
00681         }
00682     }
00683 
00684     VecOp vecOp;
00685 };
00686 
00687 
00688 template<class Op, class VecOp> struct MorphColumnFilter : public BaseColumnFilter
00689 {
00690     typedef typename Op::rtype T;
00691 
00692     MorphColumnFilter( int _ksize, int _anchor ) : vecOp(_ksize, _anchor)
00693     {
00694         ksize = _ksize;
00695         anchor = _anchor;
00696     }
00697 
00698     void operator()(const uchar** _src, uchar* dst, int dststep, int count, int width)
00699     {
00700         int i, k, _ksize = ksize;
00701         const T** src = (const T**)_src;
00702         T* D = (T*)dst;
00703         Op op;
00704 
00705         int i0 = vecOp(_src, dst, dststep, count, width);
00706         dststep /= sizeof(D[0]);
00707 
00708         for( ; _ksize > 1 && count > 1; count -= 2, D += dststep*2, src += 2 )
00709         {
00710             i = i0;
00711             #if CV_ENABLE_UNROLLED
00712             for( ; i <= width - 4; i += 4 )
00713             {
00714                 const T* sptr = src[1] + i;
00715                 T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
00716 
00717                 for( k = 2; k < _ksize; k++ )
00718                 {
00719                     sptr = src[k] + i;
00720                     s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]);
00721                     s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]);
00722                 }
00723 
00724                 sptr = src[0] + i;
00725                 D[i] = op(s0, sptr[0]);
00726                 D[i+1] = op(s1, sptr[1]);
00727                 D[i+2] = op(s2, sptr[2]);
00728                 D[i+3] = op(s3, sptr[3]);
00729 
00730                 sptr = src[k] + i;
00731                 D[i+dststep] = op(s0, sptr[0]);
00732                 D[i+dststep+1] = op(s1, sptr[1]);
00733                 D[i+dststep+2] = op(s2, sptr[2]);
00734                 D[i+dststep+3] = op(s3, sptr[3]);
00735             }
00736             #endif
00737             for( ; i < width; i++ )
00738             {
00739                 T s0 = src[1][i];
00740 
00741                 for( k = 2; k < _ksize; k++ )
00742                     s0 = op(s0, src[k][i]);
00743 
00744                 D[i] = op(s0, src[0][i]);
00745                 D[i+dststep] = op(s0, src[k][i]);
00746             }
00747         }
00748 
00749         for( ; count > 0; count--, D += dststep, src++ )
00750         {
00751             i = i0;
00752             #if CV_ENABLE_UNROLLED
00753             for( ; i <= width - 4; i += 4 )
00754             {
00755                 const T* sptr = src[0] + i;
00756                 T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
00757 
00758                 for( k = 1; k < _ksize; k++ )
00759                 {
00760                     sptr = src[k] + i;
00761                     s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]);
00762                     s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]);
00763                 }
00764 
00765                 D[i] = s0; D[i+1] = s1;
00766                 D[i+2] = s2; D[i+3] = s3;
00767             }
00768             #endif
00769             for( ; i < width; i++ )
00770             {
00771                 T s0 = src[0][i];
00772                 for( k = 1; k < _ksize; k++ )
00773                     s0 = op(s0, src[k][i]);
00774                 D[i] = s0;
00775             }
00776         }
00777     }
00778 
00779     VecOp vecOp;
00780 };
00781 
00782 
00783 template<class Op, class VecOp> struct MorphFilter : BaseFilter
00784 {
00785     typedef typename Op::rtype T;
00786 
00787     MorphFilter( const Mat& _kernel, Point _anchor )
00788     {
00789         anchor = _anchor;
00790         ksize = _kernel.size();
00791         CV_Assert( _kernel.type() == CV_8U );
00792 
00793         std::vector<uchar> coeffs; // we do not really the values of non-zero
00794         // kernel elements, just their locations
00795         preprocess2DKernel( _kernel, coords, coeffs );
00796         ptrs.resize( coords.size() );
00797     }
00798 
00799     void operator()(const uchar** src, uchar* dst, int dststep, int count, int width, int cn)
00800     {
00801         const Point* pt = &coords[0];
00802         const T** kp = (const T**)&ptrs[0];
00803         int i, k, nz = (int)coords.size();
00804         Op op;
00805 
00806         width *= cn;
00807         for( ; count > 0; count--, dst += dststep, src++ )
00808         {
00809             T* D = (T*)dst;
00810 
00811             for( k = 0; k < nz; k++ )
00812                 kp[k] = (const T*)src[pt[k].y] + pt[k].x*cn;
00813 
00814             i = vecOp(&ptrs[0], nz, dst, width);
00815             #if CV_ENABLE_UNROLLED
00816             for( ; i <= width - 4; i += 4 )
00817             {
00818                 const T* sptr = kp[0] + i;
00819                 T s0 = sptr[0], s1 = sptr[1], s2 = sptr[2], s3 = sptr[3];
00820 
00821                 for( k = 1; k < nz; k++ )
00822                 {
00823                     sptr = kp[k] + i;
00824                     s0 = op(s0, sptr[0]); s1 = op(s1, sptr[1]);
00825                     s2 = op(s2, sptr[2]); s3 = op(s3, sptr[3]);
00826                 }
00827 
00828                 D[i] = s0; D[i+1] = s1;
00829                 D[i+2] = s2; D[i+3] = s3;
00830             }
00831             #endif
00832             for( ; i < width; i++ )
00833             {
00834                 T s0 = kp[0][i];
00835                 for( k = 1; k < nz; k++ )
00836                     s0 = op(s0, kp[k][i]);
00837                 D[i] = s0;
00838             }
00839         }
00840     }
00841 
00842     std::vector<Point> coords;
00843     std::vector<uchar*> ptrs;
00844     VecOp vecOp;
00845 };
00846 
00847 }
00848 
00849 /////////////////////////////////// External Interface /////////////////////////////////////
00850 
00851 cv::Ptr<cv::BaseRowFilter> cv::getMorphologyRowFilter(int op, int type, int ksize, int anchor)
00852 {
00853     int depth = CV_MAT_DEPTH(type);
00854     if( anchor < 0 )
00855         anchor = ksize/2;
00856     CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
00857     if( op == MORPH_ERODE )
00858     {
00859         if( depth == CV_8U )
00860             return makePtr<MorphRowFilter<MinOp<uchar>,
00861                                       ErodeRowVec8u> >(ksize, anchor);
00862         if( depth == CV_16U )
00863             return makePtr<MorphRowFilter<MinOp<ushort>,
00864                                       ErodeRowVec16u> >(ksize, anchor);
00865         if( depth == CV_16S )
00866             return makePtr<MorphRowFilter<MinOp<short>,
00867                                       ErodeRowVec16s> >(ksize, anchor);
00868         if( depth == CV_32F )
00869             return makePtr<MorphRowFilter<MinOp<float>,
00870                                       ErodeRowVec32f> >(ksize, anchor);
00871         if( depth == CV_64F )
00872             return makePtr<MorphRowFilter<MinOp<double>,
00873                                       ErodeRowVec64f> >(ksize, anchor);
00874     }
00875     else
00876     {
00877         if( depth == CV_8U )
00878             return makePtr<MorphRowFilter<MaxOp<uchar>,
00879                                       DilateRowVec8u> >(ksize, anchor);
00880         if( depth == CV_16U )
00881             return makePtr<MorphRowFilter<MaxOp<ushort>,
00882                                       DilateRowVec16u> >(ksize, anchor);
00883         if( depth == CV_16S )
00884             return makePtr<MorphRowFilter<MaxOp<short>,
00885                                       DilateRowVec16s> >(ksize, anchor);
00886         if( depth == CV_32F )
00887             return makePtr<MorphRowFilter<MaxOp<float>,
00888                                       DilateRowVec32f> >(ksize, anchor);
00889         if( depth == CV_64F )
00890             return makePtr<MorphRowFilter<MaxOp<double>,
00891                                       DilateRowVec64f> >(ksize, anchor);
00892     }
00893 
00894     CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type));
00895     return Ptr<BaseRowFilter> ();
00896 }
00897 
00898 cv::Ptr<cv::BaseColumnFilter> cv::getMorphologyColumnFilter(int op, int type, int ksize, int anchor)
00899 {
00900     int depth = CV_MAT_DEPTH(type);
00901     if( anchor < 0 )
00902         anchor = ksize/2;
00903     CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
00904     if( op == MORPH_ERODE )
00905     {
00906         if( depth == CV_8U )
00907             return makePtr<MorphColumnFilter<MinOp<uchar>,
00908                                          ErodeColumnVec8u> >(ksize, anchor);
00909         if( depth == CV_16U )
00910             return makePtr<MorphColumnFilter<MinOp<ushort>,
00911                                          ErodeColumnVec16u> >(ksize, anchor);
00912         if( depth == CV_16S )
00913             return makePtr<MorphColumnFilter<MinOp<short>,
00914                                          ErodeColumnVec16s> >(ksize, anchor);
00915         if( depth == CV_32F )
00916             return makePtr<MorphColumnFilter<MinOp<float>,
00917                                          ErodeColumnVec32f> >(ksize, anchor);
00918         if( depth == CV_64F )
00919             return makePtr<MorphColumnFilter<MinOp<double>,
00920                                          ErodeColumnVec64f> >(ksize, anchor);
00921     }
00922     else
00923     {
00924         if( depth == CV_8U )
00925             return makePtr<MorphColumnFilter<MaxOp<uchar>,
00926                                          DilateColumnVec8u> >(ksize, anchor);
00927         if( depth == CV_16U )
00928             return makePtr<MorphColumnFilter<MaxOp<ushort>,
00929                                          DilateColumnVec16u> >(ksize, anchor);
00930         if( depth == CV_16S )
00931             return makePtr<MorphColumnFilter<MaxOp<short>,
00932                                          DilateColumnVec16s> >(ksize, anchor);
00933         if( depth == CV_32F )
00934             return makePtr<MorphColumnFilter<MaxOp<float>,
00935                                          DilateColumnVec32f> >(ksize, anchor);
00936         if( depth == CV_64F )
00937             return makePtr<MorphColumnFilter<MaxOp<double>,
00938                                          DilateColumnVec64f> >(ksize, anchor);
00939     }
00940 
00941     CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type));
00942     return Ptr<BaseColumnFilter> ();
00943 }
00944 
00945 
00946 cv::Ptr<cv::BaseFilter> cv::getMorphologyFilter(int op, int type, InputArray _kernel, Point anchor)
00947 {
00948     Mat kernel = _kernel.getMat();
00949     int depth = CV_MAT_DEPTH(type);
00950     anchor = normalizeAnchor(anchor, kernel.size());
00951     CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE );
00952     if( op == MORPH_ERODE )
00953     {
00954         if( depth == CV_8U )
00955             return makePtr<MorphFilter<MinOp<uchar>, ErodeVec8u> >(kernel, anchor);
00956         if( depth == CV_16U )
00957             return makePtr<MorphFilter<MinOp<ushort>, ErodeVec16u> >(kernel, anchor);
00958         if( depth == CV_16S )
00959             return makePtr<MorphFilter<MinOp<short>, ErodeVec16s> >(kernel, anchor);
00960         if( depth == CV_32F )
00961             return makePtr<MorphFilter<MinOp<float>, ErodeVec32f> >(kernel, anchor);
00962         if( depth == CV_64F )
00963             return makePtr<MorphFilter<MinOp<double>, ErodeVec64f> >(kernel, anchor);
00964     }
00965     else
00966     {
00967         if( depth == CV_8U )
00968             return makePtr<MorphFilter<MaxOp<uchar>, DilateVec8u> >(kernel, anchor);
00969         if( depth == CV_16U )
00970             return makePtr<MorphFilter<MaxOp<ushort>, DilateVec16u> >(kernel, anchor);
00971         if( depth == CV_16S )
00972             return makePtr<MorphFilter<MaxOp<short>, DilateVec16s> >(kernel, anchor);
00973         if( depth == CV_32F )
00974             return makePtr<MorphFilter<MaxOp<float>, DilateVec32f> >(kernel, anchor);
00975         if( depth == CV_64F )
00976             return makePtr<MorphFilter<MaxOp<double>, DilateVec64f> >(kernel, anchor);
00977     }
00978 
00979     CV_Error_( CV_StsNotImplemented, ("Unsupported data type (=%d)", type));
00980     return Ptr<BaseFilter> ();
00981 }
00982 
00983 
00984 cv::Ptr<cv::FilterEngine> cv::createMorphologyFilter( int op, int type, InputArray _kernel,
00985                                                       Point anchor, int _rowBorderType, int _columnBorderType,
00986                                                       const Scalar & _borderValue )
00987 {
00988     Mat kernel = _kernel.getMat();
00989     anchor = normalizeAnchor(anchor, kernel.size());
00990 
00991     Ptr<BaseRowFilter>  rowFilter;
00992     Ptr<BaseColumnFilter>  columnFilter;
00993     Ptr<BaseFilter>  filter2D;
00994 
00995     if( countNonZero(kernel) == kernel.rows*kernel.cols )
00996     {
00997         // rectangular structuring element
00998         rowFilter = getMorphologyRowFilter(op, type, kernel.cols, anchor.x);
00999         columnFilter = getMorphologyColumnFilter(op, type, kernel.rows, anchor.y);
01000     }
01001     else
01002         filter2D = getMorphologyFilter(op, type, kernel, anchor);
01003 
01004     Scalar  borderValue = _borderValue;
01005     if( (_rowBorderType == BORDER_CONSTANT || _columnBorderType == BORDER_CONSTANT) &&
01006             borderValue == morphologyDefaultBorderValue() )
01007     {
01008         int depth = CV_MAT_DEPTH(type);
01009         CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_16S ||
01010                    depth == CV_32F || depth == CV_64F );
01011         if( op == MORPH_ERODE )
01012             borderValue = Scalar::all( depth == CV_8U ? (double)UCHAR_MAX :
01013                                        depth == CV_16U ? (double)USHRT_MAX :
01014                                        depth == CV_16S ? (double)SHRT_MAX :
01015                                        depth == CV_32F ? (double)FLT_MAX : DBL_MAX);
01016         else
01017             borderValue = Scalar::all( depth == CV_8U || depth == CV_16U ?
01018                                            0. :
01019                                        depth == CV_16S ? (double)SHRT_MIN :
01020                                        depth == CV_32F ? (double)-FLT_MAX : -DBL_MAX);
01021     }
01022 
01023     return makePtr<FilterEngine>(filter2D, rowFilter, columnFilter,
01024                                  type, type, type, _rowBorderType, _columnBorderType, borderValue );
01025 }
01026 
01027 
01028 cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor)
01029 {
01030     int i, j;
01031     int r = 0, c = 0;
01032     double inv_r2 = 0;
01033 
01034     CV_Assert( shape == MORPH_RECT || shape == MORPH_CROSS || shape == MORPH_ELLIPSE );
01035 
01036     anchor = normalizeAnchor(anchor, ksize);
01037 
01038     if( ksize == Size(1,1) )
01039         shape = MORPH_RECT;
01040 
01041     if( shape == MORPH_ELLIPSE )
01042     {
01043         r = ksize.height/2;
01044         c = ksize.width/2;
01045         inv_r2 = r ? 1./((double)r*r) : 0;
01046     }
01047 
01048     Mat elem(ksize, CV_8U);
01049 
01050     for( i = 0; i < ksize.height; i++ )
01051     {
01052         uchar* ptr = elem.ptr(i);
01053         int j1 = 0, j2 = 0;
01054 
01055         if( shape == MORPH_RECT || (shape == MORPH_CROSS && i == anchor.y) )
01056             j2 = ksize.width;
01057         else if( shape == MORPH_CROSS )
01058             j1 = anchor.x, j2 = j1 + 1;
01059         else
01060         {
01061             int dy = i - r;
01062             if( std::abs(dy) <= r )
01063             {
01064                 int dx = saturate_cast<int>(c*std::sqrt((r*r - dy*dy)*inv_r2));
01065                 j1 = std::max( c - dx, 0 );
01066                 j2 = std::min( c + dx + 1, ksize.width );
01067             }
01068         }
01069 
01070         for( j = 0; j < j1; j++ )
01071             ptr[j] = 0;
01072         for( ; j < j2; j++ )
01073             ptr[j] = 1;
01074         for( ; j < ksize.width; j++ )
01075             ptr[j] = 0;
01076     }
01077 
01078     return elem;
01079 }
01080 
01081 namespace cv
01082 {
01083 
01084 class MorphologyRunner : public ParallelLoopBody
01085 {
01086 public:
01087     MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations,
01088                      int _op, Mat _kernel, Point _anchor,
01089                      int _rowBorderType, int _columnBorderType, const Scalar& _borderValue) :
01090         borderValue(_borderValue)
01091     {
01092         src = _src;
01093         dst = _dst;
01094 
01095         nStripes = _nStripes;
01096         iterations = _iterations;
01097 
01098         op = _op;
01099         kernel = _kernel;
01100         anchor = _anchor;
01101         rowBorderType = _rowBorderType;
01102         columnBorderType = _columnBorderType;
01103     }
01104 
01105     void operator () ( const Range& range ) const
01106     {
01107         int row0 = std::min(cvRound(range.start * src.rows / nStripes), src.rows);
01108         int row1 = std::min(cvRound(range.end * src.rows / nStripes), src.rows);
01109 
01110         /*if(0)
01111             printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
01112                    src.rows, src.cols, range.start, range.end, row0, row1);*/
01113 
01114         Mat srcStripe = src.rowRange(row0, row1);
01115         Mat dstStripe = dst.rowRange(row0, row1);
01116 
01117         Ptr<FilterEngine> f = createMorphologyFilter(op, src.type(), kernel, anchor,
01118                                                      rowBorderType, columnBorderType, borderValue );
01119 
01120         f->apply( srcStripe, dstStripe );
01121         for( int i = 1; i < iterations; i++ )
01122             f->apply( dstStripe, dstStripe );
01123     }
01124 
01125 private:
01126     Mat src;
01127     Mat dst;
01128     int nStripes;
01129     int iterations;
01130 
01131     int op;
01132     Mat kernel;
01133     Point anchor;
01134     int rowBorderType;
01135     int columnBorderType;
01136     Scalar borderValue;
01137 };
01138 
01139 #ifdef HAVE_IPP
01140 static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel,
01141                               const Size& ksize, const Point &anchor, bool rectKernel)
01142 {
01143 #if IPP_VERSION_X100 >= 810
01144     int type = src.type();
01145     const Mat* _src = &src;
01146     Mat temp;
01147     if (src.data == dst.data)
01148     {
01149         src.copyTo(temp);
01150         _src = &temp;
01151     }
01152 
01153     IppiSize roiSize = {src.cols, src.rows};
01154     IppiSize kernelSize = {ksize.width, ksize.height};
01155 
01156     if (!rectKernel)
01157     {
01158 #if IPP_VERSION_X100 >= 900
01159         if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y))
01160             return false;
01161         #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
01162         case cvtype: \
01163             {\
01164                 int specSize = 0, bufferSize = 0;\
01165                 if (0 > ippiMorphologyBorderGetSize_##flavor(roiSize, kernelSize, &specSize, &bufferSize))\
01166                     return false;\
01167                 IppiMorphState *pSpec = (IppiMorphState*)ippMalloc(specSize);\
01168                 Ipp8u *pBuffer = (Ipp8u*)ippMalloc(bufferSize);\
01169                 if (0 > ippiMorphologyBorderInit_##flavor(roiSize, kernel.ptr(), kernelSize, pSpec, pBuffer))\
01170                 {\
01171                     ippFree(pBuffer);\
01172                     ippFree(pSpec);\
01173                     return false;\
01174                 }\
01175                 bool ok = false;\
01176                 if (op == MORPH_ERODE)\
01177                     ok = (0 <= ippiErodeBorder_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0],\
01178                                             roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
01179                 else\
01180                     ok = (0 <= ippiDilateBorder_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0],\
01181                                             roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
01182                 ippFree(pBuffer);\
01183                 ippFree(pSpec);\
01184                 return ok;\
01185             }\
01186             break;
01187 #else
01188         if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y))
01189             return false;
01190         #define IPP_MORPH_CASE(cvtype, flavor, data_type) \
01191         case cvtype: \
01192             {\
01193                 int specSize = 0, bufferSize = 0;\
01194                 if (0 > ippiMorphologyBorderGetSize_##flavor(roiSize.width, kernelSize, &specSize, &bufferSize))\
01195                     return false;\
01196                 IppiMorphState *pSpec = (IppiMorphState*)ippMalloc(specSize);\
01197                 Ipp8u *pBuffer = (Ipp8u*)ippMalloc(bufferSize);\
01198                 if (0 > ippiMorphologyBorderInit_##flavor(roiSize.width, kernel.ptr(), kernelSize, pSpec, pBuffer))\
01199                 {\
01200                     ippFree(pBuffer);\
01201                     ippFree(pSpec);\
01202                     return false;\
01203                 }\
01204                 bool ok = false;\
01205                 if (op == MORPH_ERODE)\
01206                     ok = (0 <= ippiErodeBorder_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0],\
01207                                             roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
01208                 else\
01209                     ok = (0 <= ippiDilateBorder_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0],\
01210                                             roiSize, ippBorderRepl, 0, pSpec, pBuffer));\
01211                 ippFree(pBuffer);\
01212                 ippFree(pSpec);\
01213                 return ok;\
01214             }\
01215             break;
01216 #endif
01217         CV_SUPPRESS_DEPRECATED_START
01218         switch (type)
01219         {
01220         IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
01221         IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
01222         IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
01223         IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
01224         IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
01225         IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
01226         default:
01227             ;
01228         }
01229         CV_SUPPRESS_DEPRECATED_END
01230         #undef IPP_MORPH_CASE
01231     }
01232     else
01233     {
01234 #if IPP_VERSION_X100 != 900 // Problems with accuracy in 9.0.0
01235 #if IPP_VERSION_X100 >= 900
01236         if (((kernelSize.width - 1) / 2 != anchor.x) || ((kernelSize.height - 1) / 2 != anchor.y)) // Arbitrary anchor is no longer supporeted since IPP 9.0.0
01237             return false;
01238 
01239         #define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \
01240         case cvtype: \
01241             {\
01242                 if (op == MORPH_ERODE)\
01243                 {\
01244                     int bufSize = 0;\
01245                     if (0 > ippiFilterMinBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\
01246                         return false;\
01247                     AutoBuffer<uchar> buf(bufSize + 64);\
01248                     uchar* buffer = alignPtr((uchar*)buf, 32);\
01249                     return (0 <= ippiFilterMinBorder_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0], roiSize, kernelSize, ippBorderRepl, 0, buffer));\
01250                 }\
01251                 else\
01252                 {\
01253                     int bufSize = 0;\
01254                     if (0 > ippiFilterMaxBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\
01255                         return false;\
01256                     AutoBuffer<uchar> buf(bufSize + 64);\
01257                     uchar* buffer = alignPtr((uchar*)buf, 32);\
01258                     return (0 <= ippiFilterMaxBorder_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0], roiSize, kernelSize, ippBorderRepl, 0, buffer));\
01259                 }\
01260             }\
01261             break;
01262 #else
01263         IppiPoint point = {anchor.x, anchor.y};
01264 
01265         #define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \
01266         case cvtype: \
01267             {\
01268                 int bufSize = 0;\
01269                 if (0 > ippiFilterMinGetBufferSize_##flavor(src.cols, kernelSize, &bufSize))\
01270                     return false;\
01271                 AutoBuffer<uchar> buf(bufSize + 64);\
01272                 uchar* buffer = alignPtr((uchar*)buf, 32);\
01273                 if (op == MORPH_ERODE)\
01274                     return (0 <= ippiFilterMinBorderReplicate_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0], roiSize, kernelSize, point, buffer));\
01275                 return (0 <= ippiFilterMaxBorderReplicate_##flavor(_src->ptr<Ipp##data_type>(), (int)_src->step[0], dst.ptr<Ipp##data_type>(), (int)dst.step[0], roiSize, kernelSize, point, buffer));\
01276             }\
01277             break;
01278 #endif
01279 
01280         CV_SUPPRESS_DEPRECATED_START
01281         switch (type)
01282         {
01283         IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u, 1);
01284         IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u, 3);
01285         IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u, 4);
01286         IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f, 1);
01287         IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f, 3);
01288         IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f, 4);
01289         default:
01290             ;
01291         }
01292         CV_SUPPRESS_DEPRECATED_END
01293         #undef IPP_MORPH_CASE
01294 #endif
01295     }
01296 #else
01297     CV_UNUSED(op); CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(kernel); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(rectKernel);
01298 #endif
01299     return false;
01300 }
01301 
01302 static bool ipp_MorphOp(int op, InputArray _src, OutputArray _dst,
01303     const Mat& _kernel, Point anchor, int iterations,
01304     int borderType, const Scalar &borderValue)
01305 {
01306     Mat src = _src.getMat(), kernel = _kernel;
01307     int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
01308 
01309     if( !( depth == CV_8U || depth == CV_32F ) || !(cn == 1 || cn == 3 || cn == 4) ||
01310         !( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() &&
01311         kernel.size() == Size(3,3)) ) || !( op == MORPH_DILATE || op == MORPH_ERODE) || _src.isSubmatrix() )
01312         return false;
01313 
01314     // In case BORDER_CONSTANT, IPPMorphReplicate works correct with kernels of size 3*3 only
01315     if( borderType == cv::BORDER_CONSTANT && kernel.data )
01316     {
01317         int x, y;
01318         for( y = 0; y < kernel.rows; y++ )
01319         {
01320             if( kernel.at<uchar>(y, anchor.x) != 0 )
01321                 continue;
01322             for( x = 0; x < kernel.cols; x++ )
01323             {
01324                 if( kernel.at<uchar>(y,x) != 0 )
01325                     return false;
01326             }
01327         }
01328         for( x = 0; x < kernel.cols; x++ )
01329         {
01330             if( kernel.at<uchar>(anchor.y, x) != 0 )
01331                 continue;
01332             for( y = 0; y < kernel.rows; y++ )
01333             {
01334                 if( kernel.at<uchar>(y,x) != 0 )
01335                     return false;
01336             }
01337         }
01338 
01339     }
01340     Size ksize = !kernel.empty() ? kernel.size() : Size(3,3);
01341 
01342     _dst.create( src.size(), src.type() );
01343     Mat dst = _dst.getMat();
01344 
01345     if( iterations == 0 || kernel.rows*kernel.cols == 1 )
01346     {
01347         src.copyTo(dst);
01348         return true;
01349     }
01350 
01351     bool rectKernel = false;
01352     if( kernel.empty() )
01353     {
01354         ksize = Size(1+iterations*2,1+iterations*2);
01355         anchor = Point(iterations, iterations);
01356         rectKernel = true;
01357         iterations = 1;
01358     }
01359     else if( iterations >= 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
01360     {
01361         ksize = Size(ksize.width + (iterations-1)*(ksize.width-1),
01362              ksize.height + (iterations-1)*(ksize.height-1)),
01363         anchor = Point(anchor.x*iterations, anchor.y*iterations);
01364         kernel = Mat();
01365         rectKernel = true;
01366         iterations = 1;
01367     }
01368 
01369     // TODO: implement the case of iterations > 1.
01370     if( iterations > 1 )
01371         return false;
01372 
01373     return ipp_MorphReplicate( op, src, dst, kernel, ksize, anchor, rectKernel );
01374 }
01375 #endif
01376 
01377 #ifdef HAVE_OPENCL
01378 
01379 #define ROUNDUP(sz, n)      ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
01380 
01381 static bool ocl_morphSmall( InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int borderType,
01382                             int op, int actual_op = -1, InputArray _extraMat = noArray())
01383 {
01384     const ocl::Device & dev = ocl::Device::getDefault();
01385     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type);
01386     bool doubleSupport = dev.doubleFPConfig() > 0;
01387 
01388     if (cn > 4 || (!doubleSupport && depth == CV_64F) ||
01389         _src.offset() % esz != 0 || _src.step() % esz != 0)
01390         return false;
01391 
01392     bool haveExtraMat = !_extraMat.empty();
01393     CV_Assert(actual_op <= 3 || haveExtraMat);
01394 
01395     Size ksize = _kernel.size();
01396     if (anchor.x < 0)
01397         anchor.x = ksize.width / 2;
01398     if (anchor.y < 0)
01399         anchor.y = ksize.height / 2;
01400 
01401     Size size = _src.size(), wholeSize;
01402     bool isolated = (borderType & BORDER_ISOLATED) != 0;
01403     borderType &= ~BORDER_ISOLATED;
01404     int wdepth = depth, wtype = type;
01405     if (depth == CV_8U)
01406     {
01407         wdepth = CV_32S;
01408         wtype = CV_MAKETYPE(wdepth, cn);
01409     }
01410     char cvt[2][40];
01411 
01412     const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE",
01413                                        "BORDER_REFLECT", 0, "BORDER_REFLECT_101" };
01414     size_t globalsize[2] = { (size_t)size.width, (size_t)size.height };
01415 
01416     UMat src = _src.getUMat();
01417     if (!isolated)
01418     {
01419         Point ofs;
01420         src.locateROI(wholeSize, ofs);
01421     }
01422 
01423     int h = isolated ? size.height : wholeSize.height;
01424     int w = isolated ? size.width : wholeSize.width;
01425     if (w < ksize.width || h < ksize.height)
01426         return false;
01427 
01428     // Figure out what vector size to use for loading the pixels.
01429     int pxLoadNumPixels = cn != 1 || size.width % 4 ? 1 : 4;
01430     int pxLoadVecSize = cn * pxLoadNumPixels;
01431 
01432     // Figure out how many pixels per work item to compute in X and Y
01433     // directions.  Too many and we run out of registers.
01434     int pxPerWorkItemX = 1, pxPerWorkItemY = 1;
01435     if (cn <= 2 && ksize.width <= 4 && ksize.height <= 4)
01436     {
01437         pxPerWorkItemX = size.width % 8 ? size.width % 4 ? size.width % 2 ? 1 : 2 : 4 : 8;
01438         pxPerWorkItemY = size.height % 2 ? 1 : 2;
01439     }
01440     else if (cn < 4 || (ksize.width <= 4 && ksize.height <= 4))
01441     {
01442         pxPerWorkItemX = size.width % 2 ? 1 : 2;
01443         pxPerWorkItemY = size.height % 2 ? 1 : 2;
01444     }
01445     globalsize[0] = size.width / pxPerWorkItemX;
01446     globalsize[1] = size.height / pxPerWorkItemY;
01447 
01448     // Need some padding in the private array for pixels
01449     int privDataWidth = ROUNDUP(pxPerWorkItemX + ksize.width - 1, pxLoadNumPixels);
01450 
01451     // Make the global size a nice round number so the runtime can pick
01452     // from reasonable choices for the workgroup size
01453     const int wgRound = 256;
01454     globalsize[0] = ROUNDUP(globalsize[0], wgRound);
01455 
01456     if (actual_op < 0)
01457         actual_op = op;
01458 
01459     // build processing
01460     String processing;
01461     Mat kernel8u;
01462     _kernel.getMat().convertTo(kernel8u, CV_8U);
01463     for (int y = 0; y < kernel8u.rows; ++y)
01464         for (int x = 0; x < kernel8u.cols; ++x)
01465             if (kernel8u.at<uchar>(y, x) != 0)
01466                 processing += format("PROCESS(%d,%d)", y, x);
01467 
01468 
01469     static const char * const op2str[] = { "OP_ERODE", "OP_DILATE", NULL, NULL, "OP_GRADIENT", "OP_TOPHAT", "OP_BLACKHAT" };
01470     String opts = format("-D cn=%d "
01471             "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
01472             "-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d -D DEPTH_%d "
01473             "-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s "
01474             "-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d "
01475             "-D srcT=%s -D srcT1=%s -D dstT=srcT -D dstT1=srcT1 -D WT=%s -D WT1=%s "
01476             "-D convertToWT=%s -D convertToDstT=%s -D PX_LOAD_FLOAT_VEC_CONV=convert_%s -D PROCESS_ELEM_=%s -D %s%s",
01477             cn, anchor.x, anchor.y, ksize.width, ksize.height,
01478             pxLoadVecSize, pxLoadNumPixels, depth,
01479             pxPerWorkItemX, pxPerWorkItemY, privDataWidth, borderMap[borderType],
01480             isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
01481             privDataWidth / pxLoadNumPixels, pxPerWorkItemY + ksize.height - 1,
01482             ocl::typeToStr(type), ocl::typeToStr(depth),
01483             haveExtraMat ? ocl::typeToStr(wtype):"srcT",//to prevent overflow - WT
01484             haveExtraMat ? ocl::typeToStr(wdepth):"srcT1",//to prevent overflow - WT1
01485             haveExtraMat ? ocl::convertTypeStr(depth, wdepth, cn, cvt[0]) : "noconvert",//to prevent overflow - src to WT
01486             haveExtraMat ? ocl::convertTypeStr(wdepth, depth, cn, cvt[1]) : "noconvert",//to prevent overflow - WT to dst
01487             ocl::typeToStr(CV_MAKE_TYPE(haveExtraMat ? wdepth : depth, pxLoadVecSize)), //PX_LOAD_FLOAT_VEC_CONV
01488             processing.c_str(), op2str[op],
01489             actual_op == op ? "" : cv::format(" -D %s", op2str[actual_op]).c_str());
01490 
01491     ocl::Kernel kernel("filterSmall", cv::ocl::imgproc::filterSmall_oclsrc, opts);
01492     if (kernel.empty())
01493         return false;
01494 
01495     _dst.create(size, type);
01496     UMat dst = _dst.getUMat();
01497 
01498     UMat source;
01499     if(src.u != dst.u)
01500         source = src;
01501     else
01502     {
01503         Point ofs;
01504         int cols =  src.cols, rows = src.rows;
01505         src.locateROI(wholeSize, ofs);
01506         src.adjustROI(ofs.y, wholeSize.height - rows - ofs.y, ofs.x, wholeSize.width - cols - ofs.x);
01507         src.copyTo(source);
01508 
01509         src.adjustROI(-ofs.y, -wholeSize.height + rows + ofs.y, -ofs.x, -wholeSize.width + cols + ofs.x);
01510         source.adjustROI(-ofs.y, -wholeSize.height + rows + ofs.y, -ofs.x, -wholeSize.width + cols + ofs.x);
01511         source.locateROI(wholeSize, ofs);
01512     }
01513 
01514     UMat extraMat = _extraMat.getUMat();
01515 
01516     int idxArg = kernel.set(0, ocl::KernelArg::PtrReadOnly(source));
01517     idxArg = kernel.set(idxArg, (int)source.step);
01518     int srcOffsetX = (int)((source.offset % source.step) / source.elemSize());
01519     int srcOffsetY = (int)(source.offset / source.step);
01520     int srcEndX = isolated ? srcOffsetX + size.width : wholeSize.width;
01521     int srcEndY = isolated ? srcOffsetY + size.height : wholeSize.height;
01522     idxArg = kernel.set(idxArg, srcOffsetX);
01523     idxArg = kernel.set(idxArg, srcOffsetY);
01524     idxArg = kernel.set(idxArg, srcEndX);
01525     idxArg = kernel.set(idxArg, srcEndY);
01526     idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst));
01527 
01528     if (haveExtraMat)
01529     {
01530         idxArg = kernel.set(idxArg, ocl::KernelArg::ReadOnlyNoSize(extraMat));
01531     }
01532 
01533     return kernel.run(2, globalsize, NULL, false);
01534 
01535 }
01536 
01537 static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
01538                         Point anchor, int iterations, int op, int borderType,
01539                         const Scalar &, int actual_op = -1, InputArray _extraMat = noArray())
01540 {
01541     const ocl::Device & dev = ocl::Device::getDefault();
01542     int type = _src.type(), depth = CV_MAT_DEPTH(type),
01543             cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(type);
01544     Mat kernel = _kernel.getMat();
01545     Size ksize = !kernel.empty() ? kernel.size() : Size(3, 3), ssize = _src.size();
01546 
01547     bool doubleSupport = dev.doubleFPConfig() > 0;
01548     if ((depth == CV_64F && !doubleSupport) || borderType != BORDER_CONSTANT)
01549         return false;
01550 
01551     bool haveExtraMat = !_extraMat.empty();
01552     CV_Assert(actual_op <= 3 || haveExtraMat);
01553 
01554     if (kernel.empty())
01555     {
01556         kernel = getStructuringElement(MORPH_RECT, Size(1+iterations*2,1+iterations*2));
01557         anchor = Point(iterations, iterations);
01558         iterations = 1;
01559     }
01560     else if( iterations > 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
01561     {
01562         anchor = Point(anchor.x*iterations, anchor.y*iterations);
01563         kernel = getStructuringElement(MORPH_RECT,
01564                                        Size(ksize.width + (iterations-1)*(ksize.width-1),
01565                                             ksize.height + (iterations-1)*(ksize.height-1)),
01566                                        anchor);
01567         iterations = 1;
01568     }
01569 
01570     // try to use OpenCL kernel adopted for small morph kernel
01571     if (dev.isIntel() && !(dev.type() & ocl::Device::TYPE_CPU) &&
01572         ((ksize.width < 5 && ksize.height < 5 && esz <= 4) ||
01573          (ksize.width == 5 && ksize.height == 5 && cn == 1)) &&
01574          (iterations == 1)
01575 #if defined __APPLE__
01576          && cn == 1
01577 #endif
01578          )
01579     {
01580         if (ocl_morphSmall(_src, _dst, kernel, anchor, borderType, op, actual_op, _extraMat))
01581             return true;
01582     }
01583 
01584     if (iterations == 0 || kernel.rows*kernel.cols == 1)
01585     {
01586         _src.copyTo(_dst);
01587         return true;
01588     }
01589 
01590 #ifdef ANDROID
01591     size_t localThreads[2] = { 16, 8 };
01592 #else
01593     size_t localThreads[2] = { 16, 16 };
01594 #endif
01595     size_t globalThreads[2] = { (size_t)ssize.width, (size_t)ssize.height };
01596 
01597 #ifdef __APPLE__
01598     if( actual_op != MORPH_ERODE && actual_op != MORPH_DILATE )
01599         localThreads[0] = localThreads[1] = 4;
01600 #endif
01601 
01602     if (localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1))
01603         return false;
01604 
01605 #ifdef ANDROID
01606     if (dev.isNVidia())
01607         return false;
01608 #endif
01609 
01610     // build processing
01611     String processing;
01612     Mat kernel8u;
01613     kernel.convertTo(kernel8u, CV_8U);
01614     for (int y = 0; y < kernel8u.rows; ++y)
01615         for (int x = 0; x < kernel8u.cols; ++x)
01616             if (kernel8u.at<uchar>(y, x) != 0)
01617                 processing += format("PROCESS(%d,%d)", y, x);
01618 
01619     static const char * const op2str[] = { "OP_ERODE", "OP_DILATE", NULL, NULL, "OP_GRADIENT", "OP_TOPHAT", "OP_BLACKHAT" };
01620 
01621     char cvt[2][50];
01622     int wdepth = std::max(depth, CV_32F), scalarcn = cn == 3 ? 4 : cn;
01623 
01624     if (actual_op < 0)
01625         actual_op = op;
01626 
01627     std::vector<ocl::Kernel> kernels(iterations);
01628     for (int i = 0; i < iterations; i++)
01629     {
01630         int current_op = iterations == i + 1 ? actual_op : op;
01631         String buildOptions = format("-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s%s"
01632                                      " -D PROCESS_ELEMS=%s -D T=%s -D DEPTH_%d -D cn=%d -D T1=%s"
01633                                      " -D convertToWT=%s -D convertToT=%s -D ST=%s%s",
01634                                      anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], op2str[op],
01635                                      doubleSupport ? " -D DOUBLE_SUPPORT" : "", processing.c_str(),
01636                                      ocl::typeToStr(type), depth, cn, ocl::typeToStr(depth),
01637                                      ocl::convertTypeStr(depth, wdepth, cn, cvt[0]),
01638                                      ocl::convertTypeStr(wdepth, depth, cn, cvt[1]),
01639                                      ocl::typeToStr(CV_MAKE_TYPE(depth, scalarcn)),
01640                                      current_op == op ? "" : cv::format(" -D %s", op2str[current_op]).c_str());
01641 
01642         kernels[i].create("morph", ocl::imgproc::morph_oclsrc, buildOptions);
01643         if (kernels[i].empty())
01644             return false;
01645     }
01646 
01647     UMat src = _src.getUMat(), extraMat = _extraMat.getUMat();
01648     _dst.create(src.size(), src.type());
01649     UMat dst = _dst.getUMat();
01650 
01651     if (iterations == 1 && src.u != dst.u)
01652     {
01653         Size wholesize;
01654         Point ofs;
01655         src.locateROI(wholesize, ofs);
01656         int wholecols = wholesize.width, wholerows = wholesize.height;
01657 
01658         if (haveExtraMat)
01659             kernels[0].args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst),
01660                         ofs.x, ofs.y, src.cols, src.rows, wholecols, wholerows,
01661                         ocl::KernelArg::ReadOnlyNoSize(extraMat));
01662         else
01663             kernels[0].args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst),
01664                         ofs.x, ofs.y, src.cols, src.rows, wholecols, wholerows);
01665 
01666         return kernels[0].run(2, globalThreads, localThreads, false);
01667     }
01668 
01669     for (int i = 0; i < iterations; i++)
01670     {
01671         UMat source;
01672         Size wholesize;
01673         Point ofs;
01674 
01675         if (i == 0)
01676         {
01677             int cols =  src.cols, rows = src.rows;
01678             src.locateROI(wholesize, ofs);
01679             src.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
01680             if(src.u != dst.u)
01681                 source = src;
01682             else
01683                 src.copyTo(source);
01684 
01685             src.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
01686             source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
01687         }
01688         else
01689         {
01690             int cols =  dst.cols, rows = dst.rows;
01691             dst.locateROI(wholesize, ofs);
01692             dst.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
01693             dst.copyTo(source);
01694             dst.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
01695             source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
01696         }
01697         source.locateROI(wholesize, ofs);
01698 
01699         if (haveExtraMat && iterations == i + 1)
01700             kernels[i].args(ocl::KernelArg::ReadOnlyNoSize(source), ocl::KernelArg::WriteOnlyNoSize(dst),
01701                 ofs.x, ofs.y, source.cols, source.rows, wholesize.width, wholesize.height,
01702                 ocl::KernelArg::ReadOnlyNoSize(extraMat));
01703         else
01704             kernels[i].args(ocl::KernelArg::ReadOnlyNoSize(source), ocl::KernelArg::WriteOnlyNoSize(dst),
01705                 ofs.x, ofs.y, source.cols, source.rows, wholesize.width, wholesize.height);
01706 
01707         if (!kernels[i].run(2, globalThreads, localThreads, false))
01708             return false;
01709     }
01710 
01711     return true;
01712 }
01713 
01714 #endif
01715 
01716 static void morphOp( int op, InputArray _src, OutputArray _dst,
01717                      InputArray _kernel,
01718                      Point anchor, int iterations,
01719                      int borderType, const Scalar& borderValue )
01720 {
01721     Mat kernel = _kernel.getMat();
01722     Size ksize = !kernel.empty() ? kernel.size() : Size(3,3);
01723     anchor = normalizeAnchor(anchor, ksize);
01724 
01725 #ifdef HAVE_OPENCL
01726     CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && _src.channels() <= 4 &&
01727                borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() &&
01728                (op == MORPH_ERODE || op == MORPH_DILATE) &&
01729                anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1,
01730                ocl_morphOp(_src, _dst, kernel, anchor, iterations, op, borderType, borderValue) )
01731 #endif
01732 
01733     if (iterations == 0 || kernel.rows*kernel.cols == 1)
01734     {
01735         _src.copyTo(_dst);
01736         return;
01737     }
01738 
01739     if (kernel.empty())
01740     {
01741         kernel = getStructuringElement(MORPH_RECT, Size(1+iterations*2,1+iterations*2));
01742         anchor = Point(iterations, iterations);
01743         iterations = 1;
01744     }
01745     else if( iterations > 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
01746     {
01747         anchor = Point(anchor.x*iterations, anchor.y*iterations);
01748         kernel = getStructuringElement(MORPH_RECT,
01749                                        Size(ksize.width + (iterations-1)*(ksize.width-1),
01750                                             ksize.height + (iterations-1)*(ksize.height-1)),
01751                                        anchor);
01752         iterations = 1;
01753     }
01754 
01755     CV_IPP_RUN(IPP_VERSION_X100 >= 810, ipp_MorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue))
01756 
01757     Mat src = _src.getMat();
01758     _dst.create( src.size(), src.type() );
01759     Mat dst = _dst.getMat();
01760 
01761     int nStripes = 1;
01762 #if defined HAVE_TEGRA_OPTIMIZATION
01763     if (src.data != dst.data && iterations == 1 &&  //NOTE: threads are not used for inplace processing
01764         (borderType & BORDER_ISOLATED) == 0 && //TODO: check border types
01765         src.rows >= 64 ) //NOTE: just heuristics
01766         nStripes = 4;
01767 #endif
01768 
01769     parallel_for_(Range(0, nStripes),
01770                   MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
01771 }
01772 
01773 }
01774 
01775 void cv::erode( InputArray src, OutputArray dst, InputArray kernel,
01776                 Point anchor, int iterations,
01777                 int borderType, const Scalar & borderValue )
01778 {
01779     morphOp( MORPH_ERODE, src, dst, kernel, anchor, iterations, borderType, borderValue );
01780 }
01781 
01782 
01783 void cv::dilate( InputArray src, OutputArray dst, InputArray kernel,
01784                  Point anchor, int iterations,
01785                  int borderType, const Scalar & borderValue )
01786 {
01787     morphOp( MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue );
01788 }
01789 
01790 #ifdef HAVE_OPENCL
01791 
01792 namespace cv {
01793 
01794 static bool ocl_morphologyEx(InputArray _src, OutputArray _dst, int op,
01795                              InputArray kernel, Point anchor, int iterations,
01796                              int borderType, const Scalar& borderValue)
01797 {
01798     _dst.createSameSize(_src, _src.type());
01799     bool submat = _dst.isSubmatrix();
01800     UMat temp;
01801     _OutputArray _temp = submat ? _dst : _OutputArray(temp);
01802 
01803     switch( op )
01804     {
01805     case MORPH_ERODE:
01806         if (!ocl_morphOp( _src, _dst, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
01807             return false;
01808         break;
01809     case MORPH_DILATE:
01810         if (!ocl_morphOp( _src, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue ))
01811             return false;
01812         break;
01813     case MORPH_OPEN:
01814         if (!ocl_morphOp( _src, _temp, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
01815             return false;
01816         if (!ocl_morphOp( _temp, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue ))
01817             return false;
01818         break;
01819     case MORPH_CLOSE:
01820         if (!ocl_morphOp( _src, _temp, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue ))
01821             return false;
01822         if (!ocl_morphOp( _temp, _dst, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
01823             return false;
01824         break;
01825     case MORPH_GRADIENT:
01826         if (!ocl_morphOp( _src, temp, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
01827             return false;
01828         if (!ocl_morphOp( _src, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue, MORPH_GRADIENT, temp ))
01829             return false;
01830         break;
01831     case MORPH_TOPHAT:
01832         if (!ocl_morphOp( _src, _temp, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue ))
01833             return false;
01834         if (!ocl_morphOp( _temp, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue, MORPH_TOPHAT, _src ))
01835             return false;
01836         break;
01837     case MORPH_BLACKHAT:
01838         if (!ocl_morphOp( _src, _temp, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue ))
01839             return false;
01840         if (!ocl_morphOp( _temp, _dst, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue, MORPH_BLACKHAT, _src ))
01841             return false;
01842         break;
01843     default:
01844         CV_Error( CV_StsBadArg, "unknown morphological operation" );
01845     }
01846 
01847     return true;
01848 }
01849 
01850 }
01851 
01852 #endif
01853 
01854 void cv::morphologyEx( InputArray _src, OutputArray _dst, int op,
01855                        InputArray _kernel, Point anchor, int iterations,
01856                        int borderType, const Scalar & borderValue )
01857 {
01858     Mat kernel = _kernel.getMat();
01859     if (kernel.empty())
01860     {
01861         kernel = getStructuringElement(MORPH_RECT, Size(3,3), Point(1,1));
01862     }
01863 #ifdef HAVE_OPENCL
01864     Size ksize = kernel.size();
01865     anchor = normalizeAnchor(anchor, ksize);
01866 
01867     CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && _src.channels() <= 4 &&
01868         anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1 &&
01869         borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue(),
01870         ocl_morphologyEx(_src, _dst, op, kernel, anchor, iterations, borderType, borderValue))
01871 #endif
01872 
01873     Mat src = _src.getMat(), temp;
01874     _dst.create(src.size(), src.type());
01875     Mat dst = _dst.getMat();
01876 
01877     Mat k1, k2, e1, e2;     //only for hit and miss op
01878 
01879     switch( op )
01880     {
01881     case MORPH_ERODE:
01882         erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
01883         break;
01884     case MORPH_DILATE:
01885         dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
01886         break;
01887     case MORPH_OPEN:
01888         erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
01889         dilate( dst, dst, kernel, anchor, iterations, borderType, borderValue );
01890         break;
01891     case CV_MOP_CLOSE:
01892         dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
01893         erode( dst, dst, kernel, anchor, iterations, borderType, borderValue );
01894         break;
01895     case CV_MOP_GRADIENT:
01896         erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
01897         dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
01898         dst -= temp;
01899         break;
01900     case CV_MOP_TOPHAT:
01901         if( src.data != dst.data )
01902             temp = dst;
01903         erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
01904         dilate( temp, temp, kernel, anchor, iterations, borderType, borderValue );
01905         dst = src - temp;
01906         break;
01907     case CV_MOP_BLACKHAT:
01908         if( src.data != dst.data )
01909             temp = dst;
01910         dilate( src, temp, kernel, anchor, iterations, borderType, borderValue );
01911         erode( temp, temp, kernel, anchor, iterations, borderType, borderValue );
01912         dst = temp - src;
01913         break;
01914     case MORPH_HITMISS:
01915         CV_Assert(src.type() == CV_8UC1);
01916         k1 = (kernel == 1);
01917         k2 = (kernel == -1);
01918         if (countNonZero(k1) <= 0)
01919             e1 = src;
01920         else
01921             erode(src, e1, k1, anchor, iterations, borderType, borderValue);
01922         if (countNonZero(k2) <= 0)
01923             e2 = src;
01924         else
01925         {
01926             Mat src_complement;
01927             bitwise_not(src, src_complement);
01928             erode(src_complement, e2, k2, anchor, iterations, borderType, borderValue);
01929         }
01930         dst = e1 & e2;
01931         break;
01932     default:
01933         CV_Error( CV_StsBadArg, "unknown morphological operation" );
01934     }
01935 }
01936 
01937 CV_IMPL IplConvKernel *
01938 cvCreateStructuringElementEx( int cols, int rows,
01939                               int anchorX, int anchorY,
01940                               int shape, int *values )
01941 {
01942     cv::Size ksize = cv::Size(cols, rows);
01943     cv::Point  anchor = cv::Point (anchorX, anchorY);
01944     CV_Assert( cols > 0 && rows > 0 && anchor.inside(cv::Rect(0,0,cols,rows)) &&
01945                (shape != CV_SHAPE_CUSTOM || values != 0));
01946 
01947     int i, size = rows * cols;
01948     int element_size = sizeof(IplConvKernel) + size*sizeof(int);
01949     IplConvKernel *element = (IplConvKernel*)cvAlloc(element_size + 32);
01950 
01951     element->nCols = cols;
01952     element->nRows = rows;
01953     element->anchorX = anchorX;
01954     element->anchorY = anchorY;
01955     element->nShiftR = shape < CV_SHAPE_ELLIPSE ? shape : CV_SHAPE_CUSTOM;
01956     element->values = (int*)(element + 1);
01957 
01958     if( shape == CV_SHAPE_CUSTOM )
01959     {
01960         for( i = 0; i < size; i++ )
01961             element->values[i] = values[i];
01962     }
01963     else
01964     {
01965         cv::Mat elem = cv::getStructuringElement(shape, ksize, anchor);
01966         for( i = 0; i < size; i++ )
01967             element->values[i] = elem.ptr()[i];
01968     }
01969 
01970     return element;
01971 }
01972 
01973 
01974 CV_IMPL void
01975 cvReleaseStructuringElement( IplConvKernel ** element )
01976 {
01977     if( !element )
01978         CV_Error( CV_StsNullPtr, "" );
01979     cvFree( element );
01980 }
01981 
01982 
01983 static void convertConvKernel( const IplConvKernel* src, cv::Mat& dst, cv::Point & anchor )
01984 {
01985     if(!src)
01986     {
01987         anchor = cv::Point (1,1);
01988         dst.release();
01989         return;
01990     }
01991     anchor = cv::Point (src->anchorX, src->anchorY);
01992     dst.create(src->nRows, src->nCols, CV_8U);
01993 
01994     int i, size = src->nRows*src->nCols;
01995     for( i = 0; i < size; i++ )
01996         dst.ptr()[i] = (uchar)(src->values[i] != 0);
01997 }
01998 
01999 
02000 CV_IMPL void
02001 cvErode( const CvArr* srcarr, CvArr* dstarr, IplConvKernel* element, int iterations )
02002 {
02003     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), kernel;
02004     CV_Assert( src.size() == dst.size() && src.type() == dst.type() );
02005     cv::Point  anchor;
02006     convertConvKernel( element, kernel, anchor );
02007     cv::erode( src, dst, kernel, anchor, iterations, cv::BORDER_REPLICATE );
02008 }
02009 
02010 
02011 CV_IMPL void
02012 cvDilate( const CvArr* srcarr, CvArr* dstarr, IplConvKernel* element, int iterations )
02013 {
02014     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), kernel;
02015     CV_Assert( src.size() == dst.size() && src.type() == dst.type() );
02016     cv::Point  anchor;
02017     convertConvKernel( element, kernel, anchor );
02018     cv::dilate( src, dst, kernel, anchor, iterations, cv::BORDER_REPLICATE );
02019 }
02020 
02021 
02022 CV_IMPL void
02023 cvMorphologyEx( const void* srcarr, void* dstarr, void*,
02024                 IplConvKernel* element, int op, int iterations )
02025 {
02026     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), kernel;
02027     CV_Assert( src.size() == dst.size() && src.type() == dst.type() );
02028     cv::Point  anchor;
02029     IplConvKernel* temp_element = NULL;
02030     if (!element)
02031     {
02032         temp_element = cvCreateStructuringElementEx(3, 3, 1, 1, CV_SHAPE_RECT);
02033     } else {
02034         temp_element = element;
02035     }
02036     convertConvKernel( temp_element, kernel, anchor );
02037     if (!element)
02038     {
02039         cvReleaseStructuringElement(&temp_element);
02040     }
02041     cv::morphologyEx( src, dst, op, kernel, anchor, iterations, cv::BORDER_REPLICATE );
02042 }
02043 
02044 /* End of file. */
02045