gr-peach-opencv-project-sd-card_update

Renesas GR-PEACH OpenCV Development » Code » Documentation
Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update
Fork of gr-peach-opencv-project-sd-card by the do
Embed: (wiki syntax)
Show/hide line numbers canny.cpp Source File
00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                        Intel License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2014, Itseez Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of Intel Corporation may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #include "precomp.hpp"
00044 #include "opencl_kernels_imgproc.hpp"
00045 
00046 
00047 #if defined (HAVE_IPP) && (IPP_VERSION_X100 >= 700)
00048 #define USE_IPP_CANNY 1
00049 #else
00050 #define USE_IPP_CANNY 0
00051 #endif
00052 
00053 
00054 namespace cv
00055 {
00056 #ifdef HAVE_IPP
00057 static bool ippCanny(const Mat& _src, Mat& _dst, float low,  float high)
00058 {
00059 #if USE_IPP_CANNY
00060     int size = 0, size1 = 0;
00061     IppiSize roi = { _src.cols, _src.rows };
00062 
00063 #if IPP_VERSION_X100 < 900
00064     if (ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size) < 0)
00065         return false;
00066     if (ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1) < 0)
00067         return false;
00068 #else
00069     if (ippiFilterSobelNegVertBorderGetBufferSize(roi, ippMskSize3x3, ipp8u, ipp16s, 1, &size) < 0)
00070         return false;
00071     if (ippiFilterSobelHorizBorderGetBufferSize(roi, ippMskSize3x3, ipp8u, ipp16s, 1, &size1) < 0)
00072         return false;
00073 #endif
00074 
00075     size = std::max(size, size1);
00076 
00077     if (ippiCannyGetSize(roi, &size1) < 0)
00078         return false;
00079     size = std::max(size, size1);
00080 
00081     AutoBuffer<uchar> buf(size + 64);
00082     uchar* buffer = alignPtr((uchar*)buf, 32);
00083 
00084     Mat _dx(_src.rows, _src.cols, CV_16S);
00085     if( ippiFilterSobelNegVertBorder_8u16s_C1R(_src.ptr(), (int)_src.step,
00086                     _dx.ptr<short>(), (int)_dx.step, roi,
00087                     ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
00088         return false;
00089 
00090     Mat _dy(_src.rows, _src.cols, CV_16S);
00091     if( ippiFilterSobelHorizBorder_8u16s_C1R(_src.ptr(), (int)_src.step,
00092                     _dy.ptr<short>(), (int)_dy.step, roi,
00093                     ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 )
00094         return false;
00095 
00096     if( ippiCanny_16s8u_C1R(_dx.ptr<short>(), (int)_dx.step,
00097                                _dy.ptr<short>(), (int)_dy.step,
00098                               _dst.ptr(), (int)_dst.step, roi, low, high, buffer) < 0 )
00099         return false;
00100     return true;
00101 #else
00102     CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(low); CV_UNUSED(high);
00103     return false;
00104 #endif
00105 }
00106 #endif
00107 
00108 #ifdef HAVE_OPENCL
00109 
00110 static bool ocl_Canny(InputArray _src, OutputArray _dst, float low_thresh, float high_thresh,
00111                       int aperture_size, bool L2gradient, int cn, const Size & size)
00112 {
00113     UMat map;
00114 
00115     const ocl::Device &dev = ocl::Device::getDefault();
00116     int max_wg_size = (int)dev.maxWorkGroupSize();
00117 
00118     int lSizeX = 32;
00119     int lSizeY = max_wg_size / 32;
00120 
00121     if (lSizeY == 0)
00122     {
00123         lSizeX = 16;
00124         lSizeY = max_wg_size / 16;
00125     }
00126     if (lSizeY == 0)
00127     {
00128         lSizeY = 1;
00129     }
00130 
00131     if (L2gradient)
00132     {
00133         low_thresh = std::min(32767.0f, low_thresh);
00134         high_thresh = std::min(32767.0f, high_thresh);
00135 
00136         if (low_thresh > 0)
00137             low_thresh *= low_thresh;
00138         if (high_thresh > 0)
00139             high_thresh *= high_thresh;
00140     }
00141     int low = cvFloor(low_thresh), high = cvFloor(high_thresh);
00142 
00143     if (aperture_size == 3 && !_src.isSubmatrix())
00144     {
00145         /*
00146             stage1_with_sobel:
00147                 Sobel operator
00148                 Calc magnitudes
00149                 Non maxima suppression
00150                 Double thresholding
00151         */
00152         char cvt[40];
00153         ocl::Kernel with_sobel("stage1_with_sobel", ocl::imgproc::canny_oclsrc,
00154                                format("-D WITH_SOBEL -D cn=%d -D TYPE=%s -D convert_floatN=%s -D floatN=%s -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
00155                                       cn, ocl::memopTypeToStr(_src.depth()),
00156                                       ocl::convertTypeStr(_src.depth(), CV_32F, cn, cvt),
00157                                       ocl::typeToStr(CV_MAKE_TYPE(CV_32F, cn)),
00158                                       lSizeX, lSizeY,
00159                                       L2gradient ? " -D L2GRAD" : ""));
00160         if (with_sobel.empty())
00161             return false;
00162 
00163         UMat src = _src.getUMat();
00164         map.create(size, CV_32S);
00165         with_sobel.args(ocl::KernelArg::ReadOnly(src),
00166                         ocl::KernelArg::WriteOnlyNoSize(map),
00167                         (float) low, (float) high);
00168 
00169         size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
00170                 localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
00171 
00172         if (!with_sobel.run(2, globalsize, localsize, false))
00173             return false;
00174     }
00175     else
00176     {
00177         /*
00178             stage1_without_sobel:
00179                 Calc magnitudes
00180                 Non maxima suppression
00181                 Double thresholding
00182         */
00183         UMat dx, dy;
00184         Sobel(_src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00185         Sobel(_src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00186 
00187         ocl::Kernel without_sobel("stage1_without_sobel", ocl::imgproc::canny_oclsrc,
00188                                     format("-D WITHOUT_SOBEL -D cn=%d -D GRP_SIZEX=%d -D GRP_SIZEY=%d%s",
00189                                            cn, lSizeX, lSizeY, L2gradient ? " -D L2GRAD" : ""));
00190         if (without_sobel.empty())
00191             return false;
00192 
00193         map.create(size, CV_32S);
00194         without_sobel.args(ocl::KernelArg::ReadOnlyNoSize(dx), ocl::KernelArg::ReadOnlyNoSize(dy),
00195                            ocl::KernelArg::WriteOnly(map),
00196                            low, high);
00197 
00198         size_t globalsize[2] = { (size_t)size.width, (size_t)size.height },
00199                 localsize[2] = { (size_t)lSizeX, (size_t)lSizeY };
00200 
00201         if (!without_sobel.run(2, globalsize, localsize, false))
00202             return false;
00203     }
00204 
00205     int PIX_PER_WI = 8;
00206     /*
00207         stage2:
00208             hysteresis (add weak edges if they are connected with strong edges)
00209     */
00210 
00211     int sizey = lSizeY / PIX_PER_WI;
00212     if (sizey == 0)
00213         sizey = 1;
00214 
00215     size_t globalsize[2] = { (size_t)size.width, ((size_t)size.height + PIX_PER_WI - 1) / PIX_PER_WI }, localsize[2] = { (size_t)lSizeX, (size_t)sizey };
00216 
00217     ocl::Kernel edgesHysteresis("stage2_hysteresis", ocl::imgproc::canny_oclsrc,
00218                                 format("-D STAGE2 -D PIX_PER_WI=%d -D LOCAL_X=%d -D LOCAL_Y=%d",
00219                                 PIX_PER_WI, lSizeX, sizey));
00220 
00221     if (edgesHysteresis.empty())
00222         return false;
00223 
00224     edgesHysteresis.args(ocl::KernelArg::ReadWrite(map));
00225     if (!edgesHysteresis.run(2, globalsize, localsize, false))
00226         return false;
00227 
00228     // get edges
00229 
00230     ocl::Kernel getEdgesKernel("getEdges", ocl::imgproc::canny_oclsrc,
00231                                 format("-D GET_EDGES -D PIX_PER_WI=%d", PIX_PER_WI));
00232     if (getEdgesKernel.empty())
00233         return false;
00234 
00235     _dst.create(size, CV_8UC1);
00236     UMat dst = _dst.getUMat();
00237 
00238     getEdgesKernel.args(ocl::KernelArg::ReadOnly(map), ocl::KernelArg::WriteOnlyNoSize(dst));
00239 
00240     return getEdgesKernel.run(2, globalsize, NULL, false);
00241 }
00242 
00243 #endif
00244 
00245 #ifdef HAVE_TBB
00246 
00247 // Queue with peaks that will processed serially.
00248 static tbb::concurrent_queue<uchar*> borderPeaks;
00249 
00250 class tbbCanny
00251 {
00252 public:
00253     tbbCanny(const Range _boundaries, const Mat& _src, uchar* _map, int _low,
00254             int _high, int _aperture_size, bool _L2gradient)
00255         : boundaries(_boundaries), src(_src), map(_map), low(_low), high(_high),
00256           aperture_size(_aperture_size), L2gradient(_L2gradient)
00257     {}
00258 
00259     // This parallel version of Canny algorithm splits the src image in threadsNumber horizontal slices.
00260     // The first row of each slice contains the last row of the previous slice and
00261     // the last row of each slice contains the first row of the next slice
00262     // so that each slice is independent and no mutexes are required.
00263     void operator()() const
00264     {
00265 #if CV_SSE2
00266         bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
00267 #endif
00268 
00269         const int type = src.type(), cn = CV_MAT_CN(type);
00270 
00271         Mat dx, dy;
00272 
00273         ptrdiff_t mapstep = src.cols + 2;
00274 
00275         // In sobel transform we calculate ksize2 extra lines for the first and last rows of each slice
00276         // because IPPDerivSobel expects only isolated ROIs, in contrast with the opencv version which
00277         // uses the pixels outside of the ROI to form a border.
00278         uchar ksize2 = aperture_size / 2;
00279 
00280         if (boundaries.start == 0 && boundaries.end == src.rows)
00281         {
00282             Mat tempdx(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn));
00283             Mat tempdy(boundaries.end - boundaries.start + 2, src.cols, CV_16SC(cn));
00284 
00285             memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short));
00286             memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short));
00287             memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short));
00288             memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short));
00289 
00290             Sobel(src, tempdx.rowRange(1, tempdx.rows - 1), CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00291             Sobel(src, tempdy.rowRange(1, tempdy.rows - 1), CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00292 
00293             dx = tempdx;
00294             dy = tempdy;
00295         }
00296         else if (boundaries.start == 0)
00297         {
00298             Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
00299             Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
00300 
00301             memset(tempdx.ptr<short>(0), 0, cn * src.cols*sizeof(short));
00302             memset(tempdy.ptr<short>(0), 0, cn * src.cols*sizeof(short));
00303 
00304             Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdx.rowRange(1, tempdx.rows),
00305                     CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00306             Sobel(src.rowRange(boundaries.start, boundaries.end + 1 + ksize2), tempdy.rowRange(1, tempdy.rows),
00307                     CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00308 
00309             dx = tempdx.rowRange(0, tempdx.rows - ksize2);
00310             dy = tempdy.rowRange(0, tempdy.rows - ksize2);
00311         }
00312         else if (boundaries.end == src.rows)
00313         {
00314             Mat tempdx(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
00315             Mat tempdy(boundaries.end - boundaries.start + 2 + ksize2, src.cols, CV_16SC(cn));
00316 
00317             memset(tempdx.ptr<short>(tempdx.rows - 1), 0, cn * src.cols*sizeof(short));
00318             memset(tempdy.ptr<short>(tempdy.rows - 1), 0, cn * src.cols*sizeof(short));
00319 
00320             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdx.rowRange(0, tempdx.rows - 1),
00321                     CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00322             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end), tempdy.rowRange(0, tempdy.rows - 1),
00323                     CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00324 
00325             dx = tempdx.rowRange(ksize2, tempdx.rows);
00326             dy = tempdy.rowRange(ksize2, tempdy.rows);
00327         }
00328         else
00329         {
00330             Mat tempdx(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn));
00331             Mat tempdy(boundaries.end - boundaries.start + 2 + 2*ksize2, src.cols, CV_16SC(cn));
00332 
00333             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdx,
00334                     CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00335             Sobel(src.rowRange(boundaries.start - 1 - ksize2, boundaries.end + 1 + ksize2), tempdy,
00336                     CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00337 
00338             dx = tempdx.rowRange(ksize2, tempdx.rows - ksize2);
00339             dy = tempdy.rowRange(ksize2, tempdy.rows - ksize2);
00340         }
00341 
00342         int maxsize = std::max(1 << 10, src.cols * (boundaries.end - boundaries.start) / 10);
00343         std::vector<uchar*> stack(maxsize);
00344         uchar **stack_top = &stack[0];
00345         uchar **stack_bottom = &stack[0];
00346 
00347         AutoBuffer<uchar> buffer(cn * mapstep * 3 * sizeof(int));
00348 
00349         int* mag_buf[3];
00350         mag_buf[0] = (int*)(uchar*)buffer;
00351         mag_buf[1] = mag_buf[0] + mapstep*cn;
00352         mag_buf[2] = mag_buf[1] + mapstep*cn;
00353 
00354         // calculate magnitude and angle of gradient, perform non-maxima suppression.
00355         // fill the map with one of the following values:
00356         //   0 - the pixel might belong to an edge
00357         //   1 - the pixel can not belong to an edge
00358         //   2 - the pixel does belong to an edge
00359         for (int i = boundaries.start - 1; i <= boundaries.end; i++)
00360         {
00361             int* _norm = mag_buf[(i > boundaries.start) - (i == boundaries.start - 1) + 1] + 1;
00362 
00363             short* _dx = dx.ptr<short>(i - boundaries.start + 1);
00364             short* _dy = dy.ptr<short>(i - boundaries.start + 1);
00365 
00366             if (!L2gradient)
00367             {
00368                 int j = 0, width = src.cols * cn;
00369 #if CV_SSE2
00370                 if (haveSSE2)
00371                 {
00372                     __m128i v_zero = _mm_setzero_si128();
00373                     for ( ; j <= width - 8; j += 8)
00374                     {
00375                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
00376                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
00377                         v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx));
00378                         v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy));
00379 
00380                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero));
00381                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
00382 
00383                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero));
00384                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
00385                     }
00386                 }
00387 #elif CV_NEON
00388                 for ( ; j <= width - 8; j += 8)
00389                 {
00390                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
00391                     vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
00392                                                    vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
00393                     vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
00394                                                        vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
00395                 }
00396 #endif
00397                 for ( ; j < width; ++j)
00398                     _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
00399             }
00400             else
00401             {
00402                 int j = 0, width = src.cols * cn;
00403 #if CV_SSE2
00404                 if (haveSSE2)
00405                 {
00406                     for ( ; j <= width - 8; j += 8)
00407                     {
00408                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
00409                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
00410 
00411                         __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx);
00412                         __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy);
00413 
00414                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh));
00415                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
00416 
00417                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh));
00418                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
00419                     }
00420                 }
00421 #elif CV_NEON
00422                 for ( ; j <= width - 8; j += 8)
00423                 {
00424                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
00425                     int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
00426                     int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
00427                     vst1q_s32(_norm + j, v_dst);
00428 
00429                     v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
00430                     v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
00431                     vst1q_s32(_norm + j + 4, v_dst);
00432                 }
00433 #endif
00434                 for ( ; j < width; ++j)
00435                     _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
00436             }
00437 
00438             if (cn > 1)
00439             {
00440                 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
00441                 {
00442                     int maxIdx = jn;
00443                     for(int k = 1; k < cn; ++k)
00444                         if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
00445                     _norm[j] = _norm[maxIdx];
00446                     _dx[j] = _dx[maxIdx];
00447                     _dy[j] = _dy[maxIdx];
00448                 }
00449             }
00450             _norm[-1] = _norm[src.cols] = 0;
00451 
00452             // at the very beginning we do not have a complete ring
00453             // buffer of 3 magnitude rows for non-maxima suppression
00454             if (i <= boundaries.start)
00455                 continue;
00456 
00457             uchar* _map = map + mapstep*i + 1;
00458             _map[-1] = _map[src.cols] = 1;
00459 
00460             int* _mag = mag_buf[1] + 1; // take the central row
00461             ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
00462             ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
00463 
00464             const short* _x = dx.ptr<short>(i - boundaries.start);
00465             const short* _y = dy.ptr<short>(i - boundaries.start);
00466 
00467             if ((stack_top - stack_bottom) + src.cols > maxsize)
00468             {
00469                 int sz = (int)(stack_top - stack_bottom);
00470                 maxsize = std::max(maxsize * 3/2, sz + src.cols);
00471                 stack.resize(maxsize);
00472                 stack_bottom = &stack[0];
00473                 stack_top = stack_bottom + sz;
00474             }
00475 
00476 #define CANNY_PUSH(d)    *(d) = uchar(2), *stack_top++ = (d)
00477 #define CANNY_POP(d)     (d) = *--stack_top
00478 
00479             int prev_flag = 0;
00480             bool canny_push = false;
00481             for (int j = 0; j < src.cols; j++)
00482             {
00483                 #define CANNY_SHIFT 15
00484                 const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
00485 
00486                 int m = _mag[j];
00487 
00488                 if (m > low)
00489                 {
00490                     int xs = _x[j];
00491                     int ys = _y[j];
00492                     int x = std::abs(xs);
00493                     int y = std::abs(ys) << CANNY_SHIFT;
00494 
00495                     int tg22x = x * TG22;
00496 
00497                     if (y < tg22x)
00498                     {
00499                         if (m > _mag[j-1] && m >= _mag[j+1]) canny_push = true;
00500                     }
00501                     else
00502                     {
00503                         int tg67x = tg22x + (x << (CANNY_SHIFT+1));
00504                         if (y > tg67x)
00505                         {
00506                             if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) canny_push = true;
00507                         }
00508                         else
00509                         {
00510                             int s = (xs ^ ys) < 0 ? -1 : 1;
00511                             if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) canny_push = true;
00512                         }
00513                     }
00514                 }
00515                 if (!canny_push)
00516                 {
00517                     prev_flag = 0;
00518                     _map[j] = uchar(1);
00519                     continue;
00520                 }
00521                 else
00522                 {
00523                     // _map[j-mapstep] is short-circuited at the start because previous thread is
00524                     // responsible for initializing it.
00525                     if (!prev_flag && m > high && (i <= boundaries.start+1 || _map[j-mapstep] != 2) )
00526                     {
00527                         CANNY_PUSH(_map + j);
00528                         prev_flag = 1;
00529                     }
00530                     else
00531                         _map[j] = 0;
00532 
00533                     canny_push = false;
00534                 }
00535             }
00536 
00537             // scroll the ring buffer
00538             _mag = mag_buf[0];
00539             mag_buf[0] = mag_buf[1];
00540             mag_buf[1] = mag_buf[2];
00541             mag_buf[2] = _mag;
00542         }
00543 
00544         // now track the edges (hysteresis thresholding)
00545         while (stack_top > stack_bottom)
00546         {
00547             if ((stack_top - stack_bottom) + 8 > maxsize)
00548             {
00549                 int sz = (int)(stack_top - stack_bottom);
00550                 maxsize = maxsize * 3/2;
00551                 stack.resize(maxsize);
00552                 stack_bottom = &stack[0];
00553                 stack_top = stack_bottom + sz;
00554             }
00555 
00556             uchar* m;
00557             CANNY_POP(m);
00558 
00559             // Stops thresholding from expanding to other slices by sending pixels in the borders of each
00560             // slice in a queue to be serially processed later.
00561             if ( (m < map + (boundaries.start + 2) * mapstep) || (m >= map + boundaries.end * mapstep) )
00562             {
00563                 borderPeaks.push(m);
00564                 continue;
00565             }
00566 
00567             if (!m[-1])         CANNY_PUSH(m - 1);
00568             if (!m[1])          CANNY_PUSH(m + 1);
00569             if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
00570             if (!m[-mapstep])   CANNY_PUSH(m - mapstep);
00571             if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
00572             if (!m[mapstep-1])  CANNY_PUSH(m + mapstep - 1);
00573             if (!m[mapstep])    CANNY_PUSH(m + mapstep);
00574             if (!m[mapstep+1])  CANNY_PUSH(m + mapstep + 1);
00575         }
00576     }
00577 
00578 private:
00579     const Range boundaries;
00580     const Mat& src;
00581     uchar* map;
00582     int low;
00583     int high;
00584     int aperture_size;
00585     bool L2gradient;
00586 };
00587 
00588 #endif
00589 
00590 } // namespace cv
00591 
00592 void cv::Canny( InputArray _src, OutputArray _dst,
00593                 double low_thresh, double high_thresh,
00594                 int aperture_size, bool L2gradient )
00595 {
00596     const int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
00597     const Size size = _src.size();
00598 
00599     CV_Assert( depth == CV_8U );
00600     _dst.create(size, CV_8U);
00601 
00602     if (!L2gradient && (aperture_size & CV_CANNY_L2_GRADIENT) == CV_CANNY_L2_GRADIENT)
00603     {
00604         // backward compatibility
00605         aperture_size &= ~CV_CANNY_L2_GRADIENT;
00606         L2gradient = true;
00607     }
00608 
00609     if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7)))
00610         CV_Error(CV_StsBadFlag, "Aperture size should be odd");
00611 
00612     if (low_thresh > high_thresh)
00613         std::swap(low_thresh, high_thresh);
00614 
00615 #ifdef HAVE_OPENCL
00616     CV_OCL_RUN(_dst.isUMat() && (cn == 1 || cn == 3),
00617                ocl_Canny(_src, _dst, (float)low_thresh, (float)high_thresh, aperture_size, L2gradient, cn, size))
00618 #endif
00619 
00620     Mat src = _src.getMat(), dst = _dst.getMat();
00621 
00622 #ifdef HAVE_TEGRA_OPTIMIZATION
00623     if (tegra::useTegra() && tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient))
00624         return;
00625 #endif
00626 
00627     CV_IPP_RUN(USE_IPP_CANNY && (aperture_size == 3 && !L2gradient && 1 == cn), ippCanny(src, dst, (float)low_thresh, (float)high_thresh))
00628 
00629 #ifdef HAVE_TBB
00630 
00631 if (L2gradient)
00632 {
00633     low_thresh = std::min(32767.0, low_thresh);
00634     high_thresh = std::min(32767.0, high_thresh);
00635 
00636     if (low_thresh > 0) low_thresh *= low_thresh;
00637     if (high_thresh > 0) high_thresh *= high_thresh;
00638 }
00639 int low = cvFloor(low_thresh);
00640 int high = cvFloor(high_thresh);
00641 
00642 ptrdiff_t mapstep = src.cols + 2;
00643 AutoBuffer<uchar>  buffer((src.cols+2)*(src.rows+2));
00644 
00645 uchar* map = (uchar*)buffer;
00646 memset(map, 1, mapstep);
00647 memset(map + mapstep*(src.rows + 1), 1, mapstep);
00648 
00649 int threadsNumber = tbb::task_scheduler_init::default_num_threads();
00650 int grainSize = src.rows / threadsNumber;
00651 
00652 // Make a fallback for pictures with too few rows.
00653 uchar ksize2 = aperture_size / 2;
00654 int minGrainSize = 1 + ksize2;
00655 int maxGrainSize = src.rows - 2 - 2*ksize2;
00656 if ( !( minGrainSize <= grainSize && grainSize <= maxGrainSize ) )
00657 {
00658     threadsNumber = 1;
00659     grainSize = src.rows;
00660 }
00661 
00662 tbb::task_group g;
00663 
00664 for (int i = 0; i < threadsNumber; ++i)
00665 {
00666     if (i < threadsNumber - 1)
00667         g.run(tbbCanny(Range(i * grainSize, (i + 1) * grainSize), src, map, low, high, aperture_size, L2gradient));
00668     else
00669         g.run(tbbCanny(Range(i * grainSize, src.rows), src, map, low, high, aperture_size, L2gradient));
00670 }
00671 
00672 g.wait();
00673 
00674 #define CANNY_PUSH_SERIAL(d)    *(d) = uchar(2), borderPeaks.push(d)
00675 
00676 // now track the edges (hysteresis thresholding)
00677 uchar* m;
00678 while (borderPeaks.try_pop(m))
00679 {
00680     if (!m[-1])         CANNY_PUSH_SERIAL(m - 1);
00681     if (!m[1])          CANNY_PUSH_SERIAL(m + 1);
00682     if (!m[-mapstep-1]) CANNY_PUSH_SERIAL(m - mapstep - 1);
00683     if (!m[-mapstep])   CANNY_PUSH_SERIAL(m - mapstep);
00684     if (!m[-mapstep+1]) CANNY_PUSH_SERIAL(m - mapstep + 1);
00685     if (!m[mapstep-1])  CANNY_PUSH_SERIAL(m + mapstep - 1);
00686     if (!m[mapstep])    CANNY_PUSH_SERIAL(m + mapstep);
00687     if (!m[mapstep+1])  CANNY_PUSH_SERIAL(m + mapstep + 1);
00688 }
00689 
00690 #else
00691 
00692     Mat dx(src.rows, src.cols, CV_16SC(cn));
00693     Mat dy(src.rows, src.cols, CV_16SC(cn));
00694 
00695     Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, BORDER_REPLICATE);
00696     Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, BORDER_REPLICATE);
00697 
00698     if (L2gradient)
00699     {
00700         low_thresh = std::min(32767.0, low_thresh);
00701         high_thresh = std::min(32767.0, high_thresh);
00702 
00703         if (low_thresh > 0) low_thresh *= low_thresh;
00704         if (high_thresh > 0) high_thresh *= high_thresh;
00705     }
00706     int low = cvFloor(low_thresh);
00707     int high = cvFloor(high_thresh);
00708 
00709     ptrdiff_t mapstep = src.cols + 2;
00710     AutoBuffer<uchar>  buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int));
00711 
00712     int* mag_buf[3];
00713     mag_buf[0] = (int*)(uchar*)buffer;
00714     mag_buf[1] = mag_buf[0] + mapstep*cn;
00715     mag_buf[2] = mag_buf[1] + mapstep*cn;
00716     memset(mag_buf[0], 0, /* cn* */mapstep*sizeof(int));
00717 
00718     uchar* map = (uchar*)(mag_buf[2] + mapstep*cn);
00719     memset(map, 1, mapstep);
00720     memset(map + mapstep*(src.rows + 1), 1, mapstep);
00721 
00722     int maxsize = std::max(1 << 10, src.cols * src.rows / 10);
00723     std::vector<uchar*> stack(maxsize);
00724     uchar **stack_top = &stack[0];
00725     uchar **stack_bottom = &stack[0];
00726 
00727     /* sector numbers
00728        (Top-Left Origin)
00729 
00730         1   2   3
00731          *  *  *
00732           * * *
00733         0*******0
00734           * * *
00735          *  *  *
00736         3   2   1
00737     */
00738 
00739     #define CANNY_PUSH(d)    *(d) = uchar(2), *stack_top++ = (d)
00740     #define CANNY_POP(d)     (d) = *--stack_top
00741 
00742 #if CV_SSE2
00743     bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
00744 #endif
00745 
00746     // calculate magnitude and angle of gradient, perform non-maxima suppression.
00747     // fill the map with one of the following values:
00748     //   0 - the pixel might belong to an edge
00749     //   1 - the pixel can not belong to an edge
00750     //   2 - the pixel does belong to an edge
00751     for (int i = 0; i <= src.rows; i++)
00752     {
00753         int* _norm = mag_buf[(i > 0) + 1] + 1;
00754         if (i < src.rows)
00755         {
00756             short* _dx = dx.ptr<short>(i);
00757             short* _dy = dy.ptr<short>(i);
00758 
00759             if (!L2gradient)
00760             {
00761                 int j = 0, width = src.cols * cn;
00762 #if CV_SSE2
00763                 if (haveSSE2)
00764                 {
00765                     __m128i v_zero = _mm_setzero_si128();
00766                     for ( ; j <= width - 8; j += 8)
00767                     {
00768                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
00769                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
00770                         v_dx = _mm_max_epi16(v_dx, _mm_sub_epi16(v_zero, v_dx));
00771                         v_dy = _mm_max_epi16(v_dy, _mm_sub_epi16(v_zero, v_dy));
00772 
00773                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx, v_zero), _mm_unpacklo_epi16(v_dy, v_zero));
00774                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
00775 
00776                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx, v_zero), _mm_unpackhi_epi16(v_dy, v_zero));
00777                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
00778                     }
00779                 }
00780 #elif CV_NEON
00781                 for ( ; j <= width - 8; j += 8)
00782                 {
00783                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
00784                     vst1q_s32(_norm + j, vaddq_s32(vabsq_s32(vmovl_s16(vget_low_s16(v_dx))),
00785                                                    vabsq_s32(vmovl_s16(vget_low_s16(v_dy)))));
00786                     vst1q_s32(_norm + j + 4, vaddq_s32(vabsq_s32(vmovl_s16(vget_high_s16(v_dx))),
00787                                                        vabsq_s32(vmovl_s16(vget_high_s16(v_dy)))));
00788                 }
00789 #endif
00790                 for ( ; j < width; ++j)
00791                     _norm[j] = std::abs(int(_dx[j])) + std::abs(int(_dy[j]));
00792             }
00793             else
00794             {
00795                 int j = 0, width = src.cols * cn;
00796 #if CV_SSE2
00797                 if (haveSSE2)
00798                 {
00799                     for ( ; j <= width - 8; j += 8)
00800                     {
00801                         __m128i v_dx = _mm_loadu_si128((const __m128i *)(_dx + j));
00802                         __m128i v_dy = _mm_loadu_si128((const __m128i *)(_dy + j));
00803 
00804                         __m128i v_dx_ml = _mm_mullo_epi16(v_dx, v_dx), v_dx_mh = _mm_mulhi_epi16(v_dx, v_dx);
00805                         __m128i v_dy_ml = _mm_mullo_epi16(v_dy, v_dy), v_dy_mh = _mm_mulhi_epi16(v_dy, v_dy);
00806 
00807                         __m128i v_norm = _mm_add_epi32(_mm_unpacklo_epi16(v_dx_ml, v_dx_mh), _mm_unpacklo_epi16(v_dy_ml, v_dy_mh));
00808                         _mm_storeu_si128((__m128i *)(_norm + j), v_norm);
00809 
00810                         v_norm = _mm_add_epi32(_mm_unpackhi_epi16(v_dx_ml, v_dx_mh), _mm_unpackhi_epi16(v_dy_ml, v_dy_mh));
00811                         _mm_storeu_si128((__m128i *)(_norm + j + 4), v_norm);
00812                     }
00813                 }
00814 #elif CV_NEON
00815                 for ( ; j <= width - 8; j += 8)
00816                 {
00817                     int16x8_t v_dx = vld1q_s16(_dx + j), v_dy = vld1q_s16(_dy + j);
00818                     int16x4_t v_dxp = vget_low_s16(v_dx), v_dyp = vget_low_s16(v_dy);
00819                     int32x4_t v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
00820                     vst1q_s32(_norm + j, v_dst);
00821 
00822                     v_dxp = vget_high_s16(v_dx), v_dyp = vget_high_s16(v_dy);
00823                     v_dst = vmlal_s16(vmull_s16(v_dxp, v_dxp), v_dyp, v_dyp);
00824                     vst1q_s32(_norm + j + 4, v_dst);
00825                 }
00826 #endif
00827                 for ( ; j < width; ++j)
00828                     _norm[j] = int(_dx[j])*_dx[j] + int(_dy[j])*_dy[j];
00829             }
00830 
00831             if (cn > 1)
00832             {
00833                 for(int j = 0, jn = 0; j < src.cols; ++j, jn += cn)
00834                 {
00835                     int maxIdx = jn;
00836                     for(int k = 1; k < cn; ++k)
00837                         if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
00838                     _norm[j] = _norm[maxIdx];
00839                     _dx[j] = _dx[maxIdx];
00840                     _dy[j] = _dy[maxIdx];
00841                 }
00842             }
00843             _norm[-1] = _norm[src.cols] = 0;
00844         }
00845         else
00846             memset(_norm-1, 0, /* cn* */mapstep*sizeof(int));
00847 
00848         // at the very beginning we do not have a complete ring
00849         // buffer of 3 magnitude rows for non-maxima suppression
00850         if (i == 0)
00851             continue;
00852 
00853         uchar* _map = map + mapstep*i + 1;
00854         _map[-1] = _map[src.cols] = 1;
00855 
00856         int* _mag = mag_buf[1] + 1; // take the central row
00857         ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
00858         ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
00859 
00860         const short* _x = dx.ptr<short>(i-1);
00861         const short* _y = dy.ptr<short>(i-1);
00862 
00863         if ((stack_top - stack_bottom) + src.cols > maxsize)
00864         {
00865             int sz = (int)(stack_top - stack_bottom);
00866             maxsize = std::max(maxsize * 3/2, sz + src.cols);
00867             stack.resize(maxsize);
00868             stack_bottom = &stack[0];
00869             stack_top = stack_bottom + sz;
00870         }
00871 
00872         int prev_flag = 0;
00873         for (int j = 0; j < src.cols; j++)
00874         {
00875             #define CANNY_SHIFT 15
00876             const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
00877 
00878             int m = _mag[j];
00879 
00880             if (m > low)
00881             {
00882                 int xs = _x[j];
00883                 int ys = _y[j];
00884                 int x = std::abs(xs);
00885                 int y = std::abs(ys) << CANNY_SHIFT;
00886 
00887                 int tg22x = x * TG22;
00888 
00889                 if (y < tg22x)
00890                 {
00891                     if (m > _mag[j-1] && m >= _mag[j+1]) goto __ocv_canny_push;
00892                 }
00893                 else
00894                 {
00895                     int tg67x = tg22x + (x << (CANNY_SHIFT+1));
00896                     if (y > tg67x)
00897                     {
00898                         if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __ocv_canny_push;
00899                     }
00900                     else
00901                     {
00902                         int s = (xs ^ ys) < 0 ? -1 : 1;
00903                         if (m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __ocv_canny_push;
00904                     }
00905                 }
00906             }
00907             prev_flag = 0;
00908             _map[j] = uchar(1);
00909             continue;
00910 __ocv_canny_push:
00911             if (!prev_flag && m > high && _map[j-mapstep] != 2)
00912             {
00913                 CANNY_PUSH(_map + j);
00914                 prev_flag = 1;
00915             }
00916             else
00917                 _map[j] = 0;
00918         }
00919 
00920         // scroll the ring buffer
00921         _mag = mag_buf[0];
00922         mag_buf[0] = mag_buf[1];
00923         mag_buf[1] = mag_buf[2];
00924         mag_buf[2] = _mag;
00925     }
00926 
00927     // now track the edges (hysteresis thresholding)
00928     while (stack_top > stack_bottom)
00929     {
00930         uchar* m;
00931         if ((stack_top - stack_bottom) + 8 > maxsize)
00932         {
00933             int sz = (int)(stack_top - stack_bottom);
00934             maxsize = maxsize * 3/2;
00935             stack.resize(maxsize);
00936             stack_bottom = &stack[0];
00937             stack_top = stack_bottom + sz;
00938         }
00939 
00940         CANNY_POP(m);
00941 
00942         if (!m[-1])         CANNY_PUSH(m - 1);
00943         if (!m[1])          CANNY_PUSH(m + 1);
00944         if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
00945         if (!m[-mapstep])   CANNY_PUSH(m - mapstep);
00946         if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
00947         if (!m[mapstep-1])  CANNY_PUSH(m + mapstep - 1);
00948         if (!m[mapstep])    CANNY_PUSH(m + mapstep);
00949         if (!m[mapstep+1])  CANNY_PUSH(m + mapstep + 1);
00950     }
00951 
00952 #endif
00953 
00954     // the final pass, form the final image
00955     const uchar* pmap = map + mapstep + 1;
00956     uchar* pdst = dst.ptr();
00957     for (int i = 0; i < src.rows; i++, pmap += mapstep, pdst += dst.step)
00958     {
00959         for (int j = 0; j < src.cols; j++)
00960             pdst[j] = (uchar)-(pmap[j] >> 1);
00961     }
00962 }
00963 
00964 void cvCanny( const CvArr* image, CvArr* edges, double threshold1,
00965               double threshold2, int aperture_size )
00966 {
00967     cv::Mat src = cv::cvarrToMat(image), dst = cv::cvarrToMat(edges);
00968     CV_Assert( src.size == dst.size && src.depth() == CV_8U && dst.type() == CV_8U );
00969 
00970     cv::Canny(src, dst, threshold1, threshold2, aperture_size & 255,
00971               (aperture_size & CV_CANNY_L2_GRADIENT) != 0);
00972 }
00973 
00974 /* End of file. */
00975
Repository toolbox

Repository details

Type:	Program
Created:	26 Jul 2017
Imports:	3
Forks:	0
Commits:	168
Dependents:	0
Dependencies:	0
Followers:	9
Important changes to repositories hosted on mbed.com

canny.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

canny.cpp

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning