Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers canny.cpp Source File

canny.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                        Intel License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2014, Itseez Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of Intel Corporation may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #include "precomp.hpp"
00044 #include "opencl_kernels_imgproc.hpp"
00045 
00046 
00047 #if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700)
00048 #define USE_IPP_CANNY 1
00049 #else
00050 #define USE_IPP_CANNY 0
00051 #endif
00052 
00053 
00054 namespace cv
00055 {
00056 #ifdef HAVE_IPP
00057 static bool ippCanny(const Mat& _src, Mat& _dst, float low,  float high)
00058 {
00059 #if USE_IPP_CANNY
00060     int size = 0, size1 = 0;
00061     IppiSize roi = { _src.cols, _src.rows };
00062 
00063 #if IPP_VERSION_X100 < 900
00064     if (ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size) < 0)
00065         return false;
00066     if (ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1) < 0)
00067         return false;
00068 #else
00069     if (ippiFilterSobelNegVertBorderGetBufferSize(roi, ippMskSize3x3, ipp8u, ipp16s, 1, &size) < 0)
00070         return false;
00071     if (ippiFilterSobelHorizBorderGetBufferSize(roi, ippMskSize3x3, ipp8u, ipp16s, 1, &size1) < 0)
00072         return false;
00073 #endif
00074 
00075     size = std::max(size, size1);
00076 
00077     if (ippiCannyGetSize(roi, &size1) < 0)
00078         return false;
00079     size = std::max(size, size1);
00080 
00081     AutoBuffer<uchar> buf(size + 64);
00082     uchar* buffer = alignPtr((uchar*)buf, 32);
00083 
00084     Mat _dx(_src.rows, _src.cols, CV_16S);
00085     if( ippiFilterSobelNegVertBorder_8u16s_C1R(_src.ptr(), (int)_src.step,
00086                     _dx.ptr<short>(), (int)_dx.step, roi,
00087                     ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
00088         return false;
00089 
00090     Mat _dy(_src.rows, _src.cols, CV_16S);
00091     if( ippiFilterSobelHorizBorder_8u16s_C1R(_src.ptr(), (int)_src.step,
00092                     _dy.ptr<short>(), (int)_dy.step, roi,
00093                     ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
00094         return false;
00095 
00096     if( ippiCanny_16s8u_C1R(_dx.ptr<short>(), (int)_dx.step,
00097                                _dy.ptr<short>(), (int)_dy.step,
00098                               _dst.ptr(), (int)_dst.step, roi, low, high, buffer) < 0 )
00099         return false;
00100     return true;
00101 #else
00102     CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(low); CV_UNUSED(high);
00103     return false;
00104 #endif
00105 }
00106 #endif
00107 
00108 #ifdef HAVE_OPENCL
00109 
00110 static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float high_thresh,
00111                       int aperture_size, bool L2gradient, int cn, const Size & size)
00112 {
00113     UMat map;
00114 
00115     const ocl::Device &dev = ocl::Device::getDefault();
00116     int max_wg_size = (int)dev.maxWorkGroupSize();
00117 
00118     int lSizeX = 32;
00119     int lSizeY = max_wg_size / 32;
00120 
00121     if (lSizeY == 0)
00122     {
00123         lSizeX = 16;
00124         lSizeY = max_wg_size / 16;
00125     }
00126     if (lSizeY == 0)
00127     {
00128         lSizeY = 1;
00129     }
00130 
00131     if (L2gradient)
00132     {
00133         low_thresh = std::min(32767.0f, low_thresh);
00134         high_thresh = std::min(32767.0f, high_thresh);
00135 
00136         if (low_thresh > 0)
00137             low_thresh *= low_thresh;
00138         if (high_thresh > 0)
00139             high_thresh *= high_thresh;
00140     }
00141     int low = cvFloor(low_thresh), high = cvFloor(high_thresh);
00142 
00143     if (aperture_size == 3 && !_src.isSubmatrix())
00144     {
00145         /*
00146             stage1_with_sobel:
00147                 Sobel operator
00148                 Calc magnitudes
00149                 Non maxima suppression
00150                 Double thresholding
00151         */
00152         char cvt[40];
00153         ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
00154                                format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
00155                                       cn, ocl::memopTypeToStr(_src.depth()),
00156                                       ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt),
00157                                       ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
00158                                       lSizeX, lSizeY,
00159                                       L2gradient ? " -D L2GRAD" : ""));
00160         if (with_sobel.empty())
00161             return false;
00162 
00163         UMat src = _src.getUMat();
00164         map.create(size, CV_32S);
00165         with_sobel.args(ocl::KernelArg::ReadOnly(src),
00166                         ocl::KernelArg::WriteOnlyNoSize(map),
00167                         (float) low, (float) high);
00168 
00169         size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
00170                 localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
00171 
00172         if (!with_sobel.run(2, globalsize, localsize, false))
00173             return false;
00174     }
00175     else
00176     {
00177         /*
00178             stage1_without_sobel:
00179                 Calc magnitudes
00180                 Non maxima suppression
00181                 Double thresholding
00182         */
00183         UMat dx, dy;
00184         Sobel(_src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00185         Sobel(_src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00186 
00187         ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc,
00188                                     format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
00189                                            cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : ""));
00190         if (without_sobel.empty())
00191             return false;
00192 
00193         map.create(size, CV_32S);
00194         without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
00195                            ocl::KernelArg::WriteOnly(map),
00196                            low, high);
00197 
00198         size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
00199                 localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
00200 
00201         if (!without_sobel.run(2, globalsize, localsize, false))
00202             return false;
00203     }
00204 
00205     int PIX_PER_WI = 8;
00206     /*
00207         stage2:
00208             hysteresis (add weak edges if they are connected with strong edges)
00209     */
00210 
00211     int sizey = lSizeY / PIX_PER_WI;
00212     if (sizey == 0)
00213         sizey = 1;
00214 
00215     size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { (size_t)lSizeX, (size_t)sizey };
00216 
00217     ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc,
00218                                 format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d",
00219                                 PIX_PER_WI, lSizeX, sizey));
00220 
00221     if (edgesHysteresis.empty())
00222         return false;
00223 
00224     edgesHysteresis.args(ocl::KernelArg::ReadWrite(map));
00225     if (!edgesHysteresis.run(2, globalsize, localsize, false))
00226         return false;
00227 
00228     // get edges
00229 
00230     ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc,
00231                                 format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI));
00232     if (getEdgesKernel.empty())
00233         return false;
00234 
00235     _dst.create(size, CV_8UC1);
00236     UMat dst = _dst.getUMat();
00237 
00238     getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst));
00239 
00240     return getEdgesKernel.run(2, globalsize, NULL, false);
00241 }
00242 
00243 #endif
00244 
00245 #ifdef HAVE_TBB
00246 
00247 // Queue with peaks that will processed serially.
00248 static tbb::concurrent_queue<uchar*> borderPeaks;
00249 
00250 class tbbCanny
00251 {
00252 public:
00253     tbbCanny(const Range _boundaries, const Mat& _src, uchar* _map, int _low,
00254             int _high, int _aperture_size, bool _L2gradient)
00255         : boundaries(_boundaries), src(_src), map(_map), low(_low), high(_high),
00256           aperture_size(_aperture_size), L2gradient(_L2gradient)
00257     {}
00258 
00259     // This parallel version of Canny algorithm splits the src image in threadsNumber horizontal slices.
00260     // The first row of each slice contains the last row of the previous slice and
00261     // the last row of each slice contains the first row of the next slice
00262     // so that each slice is independent and no mutexes are required.
00263     void operator()() const
00264     {
00265 #if CV_SSE2
00266         bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
00267 #endif
00268 
00269         const int type = src.type(), cn = CV_MAT_CN(type);
00270 
00271         Mat dx, dy;
00272 
00273         ptrdiff_t mapstep = src.cols + 2;
00274 
00275         // In sobel transform we calculate ksize2 extra lines for the first and last rows of each slice
00276         // because IPPDerivSobel expects only isolated ROIs, in contrast with the opencv version which
00277         // uses the pixels outside of the ROI to form a border.
00278         uchar ksize2 = aperture_size / 2;
00279 
00280         if (boundaries.start == 0 && boundaries.end == src.rows)
00281         {
00282             Mat tempdx(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn));
00283             Mat tempdy(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn));
00284 
00285             memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short));
00286             memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short));
00287             memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short));
00288             memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short));
00289 
00290             Sobel(src, tempdx.rowRange(1, tempdx.rows - 1), CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00291             Sobel(src, tempdy.rowRange(1, tempdy.rows - 1), CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00292 
00293             dx = tempdx;
00294             dy = tempdy;
00295         }
00296         else if (boundaries.start == 0)
00297         {
00298             Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
00299             Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
00300 
00301             memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short));
00302             memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short));
00303 
00304             Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdx.rowRange(1, tempdx.rows),
00305                     CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00306             Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdy.rowRange(1, tempdy.rows),
00307                     CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00308 
00309             dx = tempdx.rowRange(0, tempdx.rows - ksize2);
00310             dy = tempdy.rowRange(0, tempdy.rows - ksize2);
00311         }
00312         else if (boundaries.end == src.rows)
00313         {
00314             Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
00315             Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
00316 
00317             memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short));
00318             memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short));
00319 
00320             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdx.rowRange(0, tempdx.rows - 1),
00321                     CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00322             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdy.rowRange(0, tempdy.rows - 1),
00323                     CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00324 
00325             dx = tempdx.rowRange(ksize2, tempdx.rows);
00326             dy = tempdy.rowRange(ksize2, tempdy.rows);
00327         }
00328         else
00329         {
00330             Mat tempdx(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn));
00331             Mat tempdy(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn));
00332 
00333             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdx,
00334                     CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00335             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdy,
00336                     CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00337 
00338             dx = tempdx.rowRange(ksize2, tempdx.rows - ksize2);
00339             dy = tempdy.rowRange(ksize2, tempdy.rows - ksize2);
00340         }
00341 
00342         int maxsize = std::max(1 << 10, src.cols * (boundaries.end - boundaries.start) / 10);
00343         std::vector<uchar*> stack(maxsize);
00344         uchar **stack_top = &stack[0];
00345         uchar **stack_bottom = &stack[0];
00346 
00347         AutoBuffer<uchar> buffer(cn * mapstep * 3 * sizeof(int));
00348 
00349         int* mag_buf[3];
00350         mag_buf[0] = (int*)(uchar*)buffer;
00351         mag_buf[1] = mag_buf[0] + mapstep*cn;
00352         mag_buf[2] = mag_buf[1] + mapstep*cn;
00353 
00354         // calculate magnitude and angle of gradient, perform non-maxima suppression.
00355         // fill the map with one of the following values:
00356         //   0 - the pixel might belong to an edge
00357         //   1 - the pixel can not belong to an edge
00358         //   2 - the pixel does belong to an edge
00359         for (int i = boundaries.start - 1; i <= boundaries.end; i++)
00360         {
00361             int* _norm = mag_buf[(i > boundaries.start) - (i == boundaries.start - 1) + 1] + 1;
00362 
00363             short* _dx = dx.ptr<short>(i - boundaries.start + 1);
00364             short* _dy = dy.ptr<short>(i - boundaries.start + 1);
00365 
00366             if (!L2gradient)
00367             {
00368                 int j = 0, width = src.cols * cn;
00369 #if CV_SSE2
00370                 if (haveSSE2)
00371                 {
00372                     __m128i v_zero = _mm_setzero_si128();
00373                     for ( ; j <= width - 8; j += 8)
00374                     {
00375                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
00376                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
00377                         v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx));
00378                         v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy));
00379 
00380                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero));
00381                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
00382 
00383                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero));
00384                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
00385                     }
00386                 }
00387 #elif CV_NEON
00388                 for ( ; j <= width - 8; j += 8)
00389                 {
00390                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
00391                     vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
00392                                                    vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
00393                     vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
00394                                                        vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
00395                 }
00396 #endif
00397                 for ( ; j < width; ++j)
00398                     _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
00399             }
00400             else
00401             {
00402                 int j = 0, width = src.cols * cn;
00403 #if CV_SSE2
00404                 if (haveSSE2)
00405                 {
00406                     for ( ; j <= width - 8; j += 8)
00407                     {
00408                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
00409                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
00410 
00411                         __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx);
00412                         __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy);
00413 
00414                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh));
00415                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
00416 
00417                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh));
00418                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
00419                     }
00420                 }
00421 #elif CV_NEON
00422                 for ( ; j <= width - 8; j += 8)
00423                 {
00424                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
00425                     int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
00426                     int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
00427                     vst1q_s32(_norm + j, v_dst);
00428 
00429                     v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
00430                     v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
00431                     vst1q_s32(_norm + j + 4, v_dst);
00432                 }
00433 #endif
00434                 for ( ; j < width; ++j)
00435                     _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
00436             }
00437 
00438             if (cn > 1)
00439             {
00440                 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
00441                 {
00442                     int maxIdx = jn;
00443                     for(int k = 1; k < cn; ++k)
00444                         if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
00445                     _norm[j] = _norm[maxIdx];
00446                     _dx[j] = _dx[maxIdx];
00447                     _dy[j] = _dy[maxIdx];
00448                 }
00449             }
00450             _norm[-1] = _norm[src.cols] = 0;
00451 
00452             // at the very beginning we do not have a complete ring
00453             // buffer of 3 magnitude rows for non-maxima suppression
00454             if (i <= boundaries.start)
00455                 continue;
00456 
00457             uchar* _map = map + mapstep*i + 1;
00458             _map[-1] = _map[src.cols] = 1;
00459 
00460             int* _mag = mag_buf[1] + 1; // take the central row
00461             ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
00462             ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
00463 
00464             const short* _x = dx.ptr<short>(i - boundaries.start);
00465             const short* _y = dy.ptr<short>(i - boundaries.start);
00466 
00467             if ((stack_top - stack_bottom) + src.cols > maxsize)
00468             {
00469                 int sz = (int)(stack_top - stack_bottom);
00470                 maxsize = std::max(maxsize * 3/2, sz + src.cols);
00471                 stack.resize(maxsize);
00472                 stack_bottom = &stack[0];
00473                 stack_top = stack_bottom + sz;
00474             }
00475 
00476 #define CANNY_PUSH(d)    *(d) = uchar(2), *stack_top++ = (d)
00477 #define CANNY_POP(d)     (d) = *--stack_top
00478 
00479             int prev_flag = 0;
00480             bool canny_push = false;
00481             for (int j = 0; j < src.cols; j++)
00482             {
00483                 #define CANNY_SHIFT 15
00484                 const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
00485 
00486                 int m = _mag[j];
00487 
00488                 if (m > low)
00489                 {
00490                     int xs = _x[j];
00491                     int ys = _y[j];
00492                     int x = std::abs(xs);
00493                     int y = std::abs(ys) << CANNY_SHIFT;
00494 
00495                     int tg22x = x * TG22;
00496 
00497                     if (y < tg22x)
00498                     {
00499                         if (m > _mag[j-1] && m >= _mag[j+1]) canny_push = true;
00500                     }
00501                     else
00502                     {
00503                         int tg67x = tg22x + (x << (CANNY_SHIFT+1));
00504                         if (y > tg67x)
00505                         {
00506                             if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) canny_push = true;
00507                         }
00508                         else
00509                         {
00510                             int s = (xs ^ ys) < 0 ? -1 : 1;
00511                             if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) canny_push = true;
00512                         }
00513                     }
00514                 }
00515                 if (!canny_push)
00516                 {
00517                     prev_flag = 0;
00518                     _map[j] = uchar(1);
00519                     continue;
00520                 }
00521                 else
00522                 {
00523                     // _map[j-mapstep] is short-circuited at the start because previous thread is
00524                     // responsible for initializing it.
00525                     if (!prev_flag && m > high && (i <= boundaries.start+1 || _map[j-mapstep] != 2) )
00526                     {
00527                         CANNY_PUSH(_map + j);
00528                         prev_flag = 1;
00529                     }
00530                     else
00531                         _map[j] = 0;
00532 
00533                     canny_push = false;
00534                 }
00535             }
00536 
00537             // scroll the ring buffer
00538             _mag = mag_buf[0];
00539             mag_buf[0] = mag_buf[1];
00540             mag_buf[1] = mag_buf[2];
00541             mag_buf[2] = _mag;
00542         }
00543 
00544         // now track the edges (hysteresis thresholding)
00545         while (stack_top > stack_bottom)
00546         {
00547             if ((stack_top - stack_bottom) + 8 > maxsize)
00548             {
00549                 int sz = (int)(stack_top - stack_bottom);
00550                 maxsize = maxsize * 3/2;
00551                 stack.resize(maxsize);
00552                 stack_bottom = &stack[0];
00553                 stack_top = stack_bottom + sz;
00554             }
00555 
00556             uchar* m;
00557             CANNY_POP(m);
00558 
00559             // Stops thresholding from expanding to other slices by sending pixels in the borders of each
00560             // slice in a queue to be serially processed later.
00561             if ( (m < map + (boundaries.start + 2) * mapstep) || (m >= map + boundaries.end * mapstep) )
00562             {
00563                 borderPeaks.push(m);
00564                 continue;
00565             }
00566 
00567             if (!m[-1])         CANNY_PUSH(m - 1);
00568             if (!m[1])          CANNY_PUSH(m + 1);
00569             if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
00570             if (!m[-mapstep])   CANNY_PUSH(m - mapstep);
00571             if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
00572             if (!m[mapstep-1])  CANNY_PUSH(m + mapstep - 1);
00573             if (!m[mapstep])    CANNY_PUSH(m + mapstep);
00574             if (!m[mapstep+1])  CANNY_PUSH(m + mapstep + 1);
00575         }
00576     }
00577 
00578 private:
00579     const Range boundaries;
00580     const Mat& src;
00581     uchar* map;
00582     int low;
00583     int high;
00584     int aperture_size;
00585     bool L2gradient;
00586 };
00587 
00588 #endif
00589 
00590 } // namespace cv
00591 
00592 void cv::Canny( InputArray _src, OutputArray _dst,
00593                 double low_thresh, double high_thresh,
00594                 int aperture_size, bool L2gradient )
00595 {
00596     const int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
00597     const Size size = _src.size();
00598 
00599     CV_Assert( depth == CV_8U );
00600     _dst.create(size, CV_8U);
00601 
00602     if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT)
00603     {
00604         // backward compatibility
00605         aperture_size &= ~CV_CANNY_L2_GRADIENT;
00606         L2gradient = true;
00607     }
00608 
00609     if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
00610         CV_Error(CV_StsBadFlag, "Aperture size should be odd");
00611 
00612     if (low_thresh > high_thresh)
00613         std::swap(low_thresh, high_thresh);
00614 
00615 #ifdef HAVE_OPENCL
00616     CV_OCL_RUN(_dst.isUMat() && (cn == 1 || cn == 3),
00617                ocl_Canny(_src, _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, cn, size))
00618 #endif
00619 
00620     Mat src = _src.getMat(), dst = _dst.getMat();
00621 
00622 #ifdef HAVE_TEGRA_OPTIMIZATION
00623     if (tegra::useTegra() && tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient))
00624         return;
00625 #endif
00626 
00627     CV_IPP_RUN(USE_IPP_CANNY && (aperture_size == 3 && !L2gradient && 1 == cn), ippCanny(src, dst, (float)low_thresh, (float)high_thresh))
00628 
00629 #ifdef HAVE_TBB
00630 
00631 if (L2gradient)
00632 {
00633     low_thresh = std::min(32767.0, low_thresh);
00634     high_thresh = std::min(32767.0, high_thresh);
00635 
00636     if (low_thresh > 0) low_thresh *= low_thresh;
00637     if (high_thresh > 0) high_thresh *= high_thresh;
00638 }
00639 int low = cvFloor(low_thresh);
00640 int high = cvFloor(high_thresh);
00641 
00642 ptrdiff_t mapstep = src.cols + 2;
00643 AutoBuffer<uchar>  buffer((src.cols+2)*(src.rows+2));
00644 
00645 uchar* map = (uchar*)buffer;
00646 memset(map, 1, mapstep);
00647 memset(map + mapstep*(src.rows + 1), 1, mapstep);
00648 
00649 int threadsNumber = tbb::task_scheduler_init::default_num_threads();
00650 int grainSize = src.rows / threadsNumber;
00651 
00652 // Make a fallback for pictures with too few rows.
00653 uchar ksize2 = aperture_size / 2;
00654 int minGrainSize = 1 + ksize2;
00655 int maxGrainSize = src.rows - 2 - 2*ksize2;
00656 if ( !( minGrainSize <= grainSize && grainSize <= maxGrainSize ) )
00657 {
00658     threadsNumber = 1;
00659     grainSize = src.rows;
00660 }
00661 
00662 tbb::task_group g;
00663 
00664 for (int i = 0; i < threadsNumber; ++i)
00665 {
00666     if (i < threadsNumber - 1)
00667         g.run(tbbCanny(Range(i * grainSize, (i + 1) * grainSize), src, map, low, high, aperture_size, L2gradient));
00668     else
00669         g.run(tbbCanny(Range(i * grainSize, src.rows), src, map, low, high, aperture_size, L2gradient));
00670 }
00671 
00672 g.wait();
00673 
00674 #define CANNY_PUSH_SERIAL(d)    *(d) = uchar(2), borderPeaks.push(d)
00675 
00676 // now track the edges (hysteresis thresholding)
00677 uchar* m;
00678 while (borderPeaks.try_pop(m))
00679 {
00680     if (!m[-1])         CANNY_PUSH_SERIAL(m - 1);
00681     if (!m[1])          CANNY_PUSH_SERIAL(m + 1);
00682     if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1);
00683     if (!m[-mapstep])   CANNY_PUSH_SERIAL(m - mapstep);
00684     if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1);
00685     if (!m[mapstep-1])  CANNY_PUSH_SERIAL(m + mapstep - 1);
00686     if (!m[mapstep])    CANNY_PUSH_SERIAL(m + mapstep);
00687     if (!m[mapstep+1])  CANNY_PUSH_SERIAL(m + mapstep + 1);
00688 }
00689 
00690 #else
00691 
00692     Mat dx(src.rows, src.cols, CV_16SC(cn));
00693     Mat dy(src.rows, src.cols, CV_16SC(cn));
00694 
00695     Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00696     Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00697 
00698     if (L2gradient)
00699     {
00700         low_thresh = std::min(32767.0, low_thresh);
00701         high_thresh = std::min(32767.0, high_thresh);
00702 
00703         if (low_thresh > 0) low_thresh *= low_thresh;
00704         if (high_thresh > 0) high_thresh *= high_thresh;
00705     }
00706     int low = cvFloor(low_thresh);
00707     int high = cvFloor(high_thresh);
00708 
00709     ptrdiff_t mapstep = src.cols + 2;
00710     AutoBuffer<uchar>  buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int));
00711 
00712     int* mag_buf[3];
00713     mag_buf[0] = (int*)(uchar*)buffer;
00714     mag_buf[1] = mag_buf[0] + mapstep*cn;
00715     mag_buf[2] = mag_buf[1] + mapstep*cn;
00716     memset(mag_buf[0], 0, /* cn* */mapstep*sizeof(int));
00717 
00718     uchar* map = (uchar*)(mag_buf[2] + mapstep*cn);
00719     memset(map, 1, mapstep);
00720     memset(map + mapstep*(src.rows + 1), 1, mapstep);
00721 
00722     int maxsize = std::max(1 << 10, src.cols * src.rows / 10);
00723     std::vector<uchar*> stack(maxsize);
00724     uchar **stack_top = &stack[0];
00725     uchar **stack_bottom = &stack[0];
00726 
00727     /* sector numbers
00728        (Top-Left Origin)
00729 
00730         1   2   3
00731          *  *  *
00732           * * *
00733         0*******0
00734           * * *
00735          *  *  *
00736         3   2   1
00737     */
00738 
00739     #define CANNY_PUSH(d)    *(d) = uchar(2), *stack_top++ = (d)
00740     #define CANNY_POP(d)     (d) = *--stack_top
00741 
00742 #if CV_SSE2
00743     bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
00744 #endif
00745 
00746     // calculate magnitude and angle of gradient, perform non-maxima suppression.
00747     // fill the map with one of the following values:
00748     //   0 - the pixel might belong to an edge
00749     //   1 - the pixel can not belong to an edge
00750     //   2 - the pixel does belong to an edge
00751     for (int i = 0; i <= src.rows; i++)
00752     {
00753         int* _norm = mag_buf[(i > 0) + 1] + 1;
00754         if (i < src.rows)
00755         {
00756             short* _dx = dx.ptr<short>(i);
00757             short* _dy = dy.ptr<short>(i);
00758 
00759             if (!L2gradient)
00760             {
00761                 int j = 0, width = src.cols * cn;
00762 #if CV_SSE2
00763                 if (haveSSE2)
00764                 {
00765                     __m128i v_zero = _mm_setzero_si128();
00766                     for ( ; j <= width - 8; j += 8)
00767                     {
00768                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
00769                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
00770                         v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx));
00771                         v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy));
00772 
00773                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero));
00774                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
00775 
00776                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero));
00777                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
00778                     }
00779                 }
00780 #elif CV_NEON
00781                 for ( ; j <= width - 8; j += 8)
00782                 {
00783                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
00784                     vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
00785                                                    vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
00786                     vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
00787                                                        vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
00788                 }
00789 #endif
00790                 for ( ; j < width; ++j)
00791                     _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
00792             }
00793             else
00794             {
00795                 int j = 0, width = src.cols * cn;
00796 #if CV_SSE2
00797                 if (haveSSE2)
00798                 {
00799                     for ( ; j <= width - 8; j += 8)
00800                     {
00801                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
00802                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
00803 
00804                         __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx);
00805                         __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy);
00806 
00807                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh));
00808                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
00809 
00810                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh));
00811                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
00812                     }
00813                 }
00814 #elif CV_NEON
00815                 for ( ; j <= width - 8; j += 8)
00816                 {
00817                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
00818                     int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
00819                     int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
00820                     vst1q_s32(_norm + j, v_dst);
00821 
00822                     v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
00823                     v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
00824                     vst1q_s32(_norm + j + 4, v_dst);
00825                 }
00826 #endif
00827                 for ( ; j < width; ++j)
00828                     _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
00829             }
00830 
00831             if (cn > 1)
00832             {
00833                 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
00834                 {
00835                     int maxIdx = jn;
00836                     for(int k = 1; k < cn; ++k)
00837                         if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
00838                     _norm[j] = _norm[maxIdx];
00839                     _dx[j] = _dx[maxIdx];
00840                     _dy[j] = _dy[maxIdx];
00841                 }
00842             }
00843             _norm[-1] = _norm[src.cols] = 0;
00844         }
00845         else
00846             memset(_norm-1, 0, /* cn* */mapstep*sizeof(int));
00847 
00848         // at the very beginning we do not have a complete ring
00849         // buffer of 3 magnitude rows for non-maxima suppression
00850         if (i == 0)
00851             continue;
00852 
00853         uchar* _map = map + mapstep*i + 1;
00854         _map[-1] = _map[src.cols] = 1;
00855 
00856         int* _mag = mag_buf[1] + 1; // take the central row
00857         ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
00858         ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
00859 
00860         const short* _x = dx.ptr<short>(i-1);
00861         const short* _y = dy.ptr<short>(i-1);
00862 
00863         if ((stack_top - stack_bottom) + src.cols > maxsize)
00864         {
00865             int sz = (int)(stack_top - stack_bottom);
00866             maxsize = std::max(maxsize * 3/2, sz + src.cols);
00867             stack.resize(maxsize);
00868             stack_bottom = &stack[0];
00869             stack_top = stack_bottom + sz;
00870         }
00871 
00872         int prev_flag = 0;
00873         for (int j = 0; j < src.cols; j++)
00874         {
00875             #define CANNY_SHIFT 15
00876             const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
00877 
00878             int m = _mag[j];
00879 
00880             if (m > low)
00881             {
00882                 int xs = _x[j];
00883                 int ys = _y[j];
00884                 int x = std::abs(xs);
00885                 int y = std::abs(ys) << CANNY_SHIFT;
00886 
00887                 int tg22x = x * TG22;
00888 
00889                 if (y < tg22x)
00890                 {
00891                     if (m > _mag[j-1] && m >= _mag[j+1]) goto __ocv_canny_push;
00892                 }
00893                 else
00894                 {
00895                     int tg67x = tg22x + (x << (CANNY_SHIFT+1));
00896                     if (y > tg67x)
00897                     {
00898                         if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __ocv_canny_push;
00899                     }
00900                     else
00901                     {
00902                         int s = (xs ^ ys) < 0 ? -1 : 1;
00903                         if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __ocv_canny_push;
00904                     }
00905                 }
00906             }
00907             prev_flag = 0;
00908             _map[j] = uchar(1);
00909             continue;
00910 __ocv_canny_push:
00911             if (!prev_flag && m > high && _map[j-mapstep] != 2)
00912             {
00913                 CANNY_PUSH(_map + j);
00914                 prev_flag = 1;
00915             }
00916             else
00917                 _map[j] = 0;
00918         }
00919 
00920         // scroll the ring buffer
00921         _mag = mag_buf[0];
00922         mag_buf[0] = mag_buf[1];
00923         mag_buf[1] = mag_buf[2];
00924         mag_buf[2] = _mag;
00925     }
00926 
00927     // now track the edges (hysteresis thresholding)
00928     while (stack_top > stack_bottom)
00929     {
00930         uchar* m;
00931         if ((stack_top - stack_bottom) + 8 > maxsize)
00932         {
00933             int sz = (int)(stack_top - stack_bottom);
00934             maxsize = maxsize * 3/2;
00935             stack.resize(maxsize);
00936             stack_bottom = &stack[0];
00937             stack_top = stack_bottom + sz;
00938         }
00939 
00940         CANNY_POP(m);
00941 
00942         if (!m[-1])         CANNY_PUSH(m - 1);
00943         if (!m[1])          CANNY_PUSH(m + 1);
00944         if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
00945         if (!m[-mapstep])   CANNY_PUSH(m - mapstep);
00946         if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
00947         if (!m[mapstep-1])  CANNY_PUSH(m + mapstep - 1);
00948         if (!m[mapstep])    CANNY_PUSH(m + mapstep);
00949         if (!m[mapstep+1])  CANNY_PUSH(m + mapstep + 1);
00950     }
00951 
00952 #endif
00953 
00954     // the final pass, form the final image
00955     const uchar* pmap = map + mapstep + 1;
00956     uchar* pdst = dst.ptr();
00957     for (int i = 0; i < src.rows; i++, pmap += mapstep, pdst += dst.step)
00958     {
00959         for (int j = 0; j < src.cols; j++)
00960             pdst[j] = (uchar)-(pmap[j] >> 1);
00961     }
00962 }
00963 
00964 void cvCanny( const CvArr* image, CvArr* edges, double threshold1,
00965               double threshold2, int aperture_size )
00966 {
00967     cv::Mat src = cv::cvarrToMat(image), dst = cv::cvarrToMat(edges);
00968     CV_Assert( src.size == dst.size && src.depth() == CV_8U && dst.type() == CV_8U );
00969 
00970     cv::Canny(src, dst, threshold1, threshold2, aperture_size & 255,
00971               (aperture_size & CV_CANNY_L2_GRADIENT) != 0);
00972 }
00973 
00974 /* End of file. */
00975