Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers corner.cpp Source File

corner.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved.
00016 // Third party copyrights are property of their respective owners.
00017 //
00018 // Redistribution and use in source and binary forms, with or without modification,
00019 // are permitted provided that the following conditions are met:
00020 //
00021 //   * Redistribution's of source code must retain the above copyright notice,
00022 //     this list of conditions and the following disclaimer.
00023 //
00024 //   * Redistribution's in binary form must reproduce the above copyright notice,
00025 //     this list of conditions and the following disclaimer in the documentation
00026 //     and/or other materials provided with the distribution.
00027 //
00028 //   * The name of the copyright holders may not be used to endorse or promote products
00029 //     derived from this software without specific prior written permission.
00030 //
00031 // This software is provided by the copyright holders and contributors "as is" and
00032 // any express or implied warranties, including, but not limited to, the implied
00033 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00034 // In no event shall the Intel Corporation or contributors be liable for any direct,
00035 // indirect, incidental, special, exemplary, or consequential damages
00036 // (including, but not limited to, procurement of substitute goods or services;
00037 // loss of use, data, or profits; or business interruption) however caused
00038 // and on any theory of liability, whether in contract, strict liability,
00039 // or tort (including negligence or otherwise) arising in any way out of
00040 // the use of this software, even if advised of the possibility of such damage.
00041 //
00042 //M*/
00043 
00044 #include "precomp.hpp"
00045 #include "opencl_kernels_imgproc.hpp"
00046 
00047 namespace cv
00048 {
00049 
00050 static void calcMinEigenVal( const Mat& _cov, Mat& _dst )
00051 {
00052     int i, j;
00053     Size size = _cov.size();
00054 #if CV_SSE
00055     volatile bool simd = checkHardwareSupport(CV_CPU_SSE);
00056 #endif
00057 
00058     if( _cov.isContinuous() && _dst.isContinuous() )
00059     {
00060         size.width *= size.height;
00061         size.height = 1;
00062     }
00063 
00064     for( i = 0; i < size.height; i++ )
00065     {
00066         const float* cov = _cov.ptr<float>(i);
00067         float* dst = _dst.ptr<float>(i);
00068         j = 0;
00069     #if CV_SSE
00070         if( simd )
00071         {
00072             __m128 half = _mm_set1_ps(0.5f);
00073             for( ; j <= size.width - 4; j += 4 )
00074             {
00075                 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x
00076                 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x
00077                 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x
00078                 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x
00079                 __m128 a, b, c, t;
00080                 t = _mm_unpacklo_ps(t0, t1); // a0 a1 b0 b1
00081                 c = _mm_unpackhi_ps(t0, t1); // c0 c1 x x
00082                 b = _mm_unpacklo_ps(t2, t3); // a2 a3 b2 b3
00083                 c = _mm_movelh_ps(c, _mm_unpackhi_ps(t2, t3)); // c0 c1 c2 c3
00084                 a = _mm_movelh_ps(t, b);
00085                 b = _mm_movehl_ps(b, t);
00086                 a = _mm_mul_ps(a, half);
00087                 c = _mm_mul_ps(c, half);
00088                 t = _mm_sub_ps(a, c);
00089                 t = _mm_add_ps(_mm_mul_ps(t, t), _mm_mul_ps(b,b));
00090                 a = _mm_sub_ps(_mm_add_ps(a, c), _mm_sqrt_ps(t));
00091                 _mm_storeu_ps(dst + j, a);
00092             }
00093         }
00094     #elif CV_NEON
00095         float32x4_t v_half = vdupq_n_f32(0.5f);
00096         for( ; j <= size.width - 4; j += 4 )
00097         {
00098             float32x4x3_t v_src = vld3q_f32(cov + j * 3);
00099             float32x4_t v_a = vmulq_f32(v_src.val[0], v_half);
00100             float32x4_t v_b = v_src.val[1];
00101             float32x4_t v_c = vmulq_f32(v_src.val[2], v_half);
00102 
00103             float32x4_t v_t = vsubq_f32(v_a, v_c);
00104             v_t = vmlaq_f32(vmulq_f32(v_t, v_t), v_b, v_b);
00105             vst1q_f32(dst + j, vsubq_f32(vaddq_f32(v_a, v_c), cv_vsqrtq_f32(v_t)));
00106         }
00107     #endif
00108         for( ; j < size.width; j++ )
00109         {
00110             float a = cov[j*3]*0.5f;
00111             float b = cov[j*3+1];
00112             float c = cov[j*3+2]*0.5f;
00113             dst[j] = (float)((a + c) - std::sqrt((a - c)*(a - c) + b*b));
00114         }
00115     }
00116 }
00117 
00118 
00119 static void calcHarris( const Mat& _cov, Mat& _dst, double k )
00120 {
00121     int i, j;
00122     Size size = _cov.size();
00123 #if CV_SSE
00124     volatile bool simd = checkHardwareSupport(CV_CPU_SSE);
00125 #endif
00126 
00127     if( _cov.isContinuous() && _dst.isContinuous() )
00128     {
00129         size.width *= size.height;
00130         size.height = 1;
00131     }
00132 
00133     for( i = 0; i < size.height; i++ )
00134     {
00135         const float* cov = _cov.ptr<float>(i);
00136         float* dst = _dst.ptr<float>(i);
00137         j = 0;
00138 
00139     #if CV_SSE
00140         if( simd )
00141         {
00142             __m128 k4 = _mm_set1_ps((float)k);
00143             for( ; j <= size.width - 4; j += 4 )
00144             {
00145                 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x
00146                 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x
00147                 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x
00148                 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x
00149                 __m128 a, b, c, t;
00150                 t = _mm_unpacklo_ps(t0, t1); // a0 a1 b0 b1
00151                 c = _mm_unpackhi_ps(t0, t1); // c0 c1 x x
00152                 b = _mm_unpacklo_ps(t2, t3); // a2 a3 b2 b3
00153                 c = _mm_movelh_ps(c, _mm_unpackhi_ps(t2, t3)); // c0 c1 c2 c3
00154                 a = _mm_movelh_ps(t, b);
00155                 b = _mm_movehl_ps(b, t);
00156                 t = _mm_add_ps(a, c);
00157                 a = _mm_sub_ps(_mm_mul_ps(a, c), _mm_mul_ps(b, b));
00158                 t = _mm_mul_ps(_mm_mul_ps(k4, t), t);
00159                 a = _mm_sub_ps(a, t);
00160                 _mm_storeu_ps(dst + j, a);
00161             }
00162         }
00163     #elif CV_NEON
00164         float32x4_t v_k = vdupq_n_f32((float)k);
00165 
00166         for( ; j <= size.width - 4; j += 4 )
00167         {
00168             float32x4x3_t v_src = vld3q_f32(cov + j * 3);
00169             float32x4_t v_a = v_src.val[0], v_b = v_src.val[1], v_c = v_src.val[2];
00170             float32x4_t v_ac_bb = vmlsq_f32(vmulq_f32(v_a, v_c), v_b, v_b);
00171             float32x4_t v_ac = vaddq_f32(v_a, v_c);
00172             vst1q_f32(dst + j, vmlsq_f32(v_ac_bb, v_k, vmulq_f32(v_ac, v_ac)));
00173         }
00174     #endif
00175 
00176         for( ; j < size.width; j++ )
00177         {
00178             float a = cov[j*3];
00179             float b = cov[j*3+1];
00180             float c = cov[j*3+2];
00181             dst[j] = (float)(a*c - b*b - k*(a + c)*(a + c));
00182         }
00183     }
00184 }
00185 
00186 
00187 static void eigen2x2( const float* cov, float* dst, int n )
00188 {
00189     for( int j = 0; j < n; j++ )
00190     {
00191         double a = cov[j*3];
00192         double b = cov[j*3+1];
00193         double c = cov[j*3+2];
00194 
00195         double u = (a + c)*0.5;
00196         double v = std::sqrt((a - c)*(a - c)*0.25 + b*b);
00197         double l1 = u + v;
00198         double l2 = u - v;
00199 
00200         double x = b;
00201         double y = l1 - a;
00202         double e = fabs(x);
00203 
00204         if( e + fabs(y) < 1e-4 )
00205         {
00206             y = b;
00207             x = l1 - c;
00208             e = fabs(x);
00209             if( e + fabs(y) < 1e-4 )
00210             {
00211                 e = 1./(e + fabs(y) + FLT_EPSILON);
00212                 x *= e, y *= e;
00213             }
00214         }
00215 
00216         double d = 1./std::sqrt(x*x + y*y + DBL_EPSILON);
00217         dst[6*j] = (float)l1;
00218         dst[6*j + 2] = (float)(x*d);
00219         dst[6*j + 3] = (float)(y*d);
00220 
00221         x = b;
00222         y = l2 - a;
00223         e = fabs(x);
00224 
00225         if( e + fabs(y) < 1e-4 )
00226         {
00227             y = b;
00228             x = l2 - c;
00229             e = fabs(x);
00230             if( e + fabs(y) < 1e-4 )
00231             {
00232                 e = 1./(e + fabs(y) + FLT_EPSILON);
00233                 x *= e, y *= e;
00234             }
00235         }
00236 
00237         d = 1./std::sqrt(x*x + y*y + DBL_EPSILON);
00238         dst[6*j + 1] = (float)l2;
00239         dst[6*j + 4] = (float)(x*d);
00240         dst[6*j + 5] = (float)(y*d);
00241     }
00242 }
00243 
00244 static void calcEigenValsVecs( const Mat& _cov, Mat& _dst )
00245 {
00246     Size size = _cov.size();
00247     if( _cov.isContinuous() && _dst.isContinuous() )
00248     {
00249         size.width *= size.height;
00250         size.height = 1;
00251     }
00252 
00253     for( int i = 0; i < size.height; i++ )
00254     {
00255         const float* cov = _cov.ptr<float>(i);
00256         float* dst = _dst.ptr<float>(i);
00257 
00258         eigen2x2(cov, dst, size.width);
00259     }
00260 }
00261 
00262 
00263 enum { MINEIGENVAL=0, HARRIS=1, EIGENVALSVECS=2 };
00264 
00265 
00266 static void
00267 cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size,
00268                      int aperture_size, int op_type, double k=0.,
00269                      int borderType=BORDER_DEFAULT )
00270 {
00271 #ifdef HAVE_TEGRA_OPTIMIZATION
00272     if (tegra::useTegra() && tegra::cornerEigenValsVecs(src, eigenv, block_size, aperture_size, op_type, k, borderType))
00273         return;
00274 #endif
00275 #if CV_SSE2
00276     bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2);
00277 #endif
00278 
00279     int depth = src.depth();
00280     double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size;
00281     if( aperture_size < 0 )
00282         scale *= 2.0;
00283     if( depth == CV_8U )
00284         scale *= 255.0;
00285     scale = 1.0/scale;
00286 
00287     CV_Assert( src.type() == CV_8UC1 || src.type() == CV_32FC1 );
00288 
00289     Mat Dx, Dy;
00290     if( aperture_size > 0 )
00291     {
00292         Sobel( src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType );
00293         Sobel( src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType );
00294     }
00295     else
00296     {
00297         Scharr( src, Dx, CV_32F, 1, 0, scale, 0, borderType );
00298         Scharr( src, Dy, CV_32F, 0, 1, scale, 0, borderType );
00299     }
00300 
00301     Size size = src.size();
00302     Mat cov( size, CV_32FC3 );
00303     int i, j;
00304 
00305     for( i = 0; i < size.height; i++ )
00306     {
00307         float* cov_data = cov.ptr<float>(i);
00308         const float* dxdata = Dx.ptr<float>(i);
00309         const float* dydata = Dy.ptr<float>(i);
00310         j = 0;
00311 
00312         #if CV_NEON
00313         for( ; j <= size.width - 4; j += 4 )
00314         {
00315             float32x4_t v_dx = vld1q_f32(dxdata + j);
00316             float32x4_t v_dy = vld1q_f32(dydata + j);
00317 
00318             float32x4x3_t v_dst;
00319             v_dst.val[0] = vmulq_f32(v_dx, v_dx);
00320             v_dst.val[1] = vmulq_f32(v_dx, v_dy);
00321             v_dst.val[2] = vmulq_f32(v_dy, v_dy);
00322 
00323             vst3q_f32(cov_data + j * 3, v_dst);
00324         }
00325         #elif CV_SSE2
00326         if (haveSSE2)
00327         {
00328             for( ; j <= size.width - 8; j += 8 )
00329             {
00330                 __m128 v_dx_0 = _mm_loadu_ps(dxdata + j);
00331                 __m128 v_dx_1 = _mm_loadu_ps(dxdata + j + 4);
00332                 __m128 v_dy_0 = _mm_loadu_ps(dydata + j);
00333                 __m128 v_dy_1 = _mm_loadu_ps(dydata + j + 4);
00334 
00335                 __m128 v_dx2_0 = _mm_mul_ps(v_dx_0, v_dx_0);
00336                 __m128 v_dxy_0 = _mm_mul_ps(v_dx_0, v_dy_0);
00337                 __m128 v_dy2_0 = _mm_mul_ps(v_dy_0, v_dy_0);
00338                 __m128 v_dx2_1 = _mm_mul_ps(v_dx_1, v_dx_1);
00339                 __m128 v_dxy_1 = _mm_mul_ps(v_dx_1, v_dy_1);
00340                 __m128 v_dy2_1 = _mm_mul_ps(v_dy_1, v_dy_1);
00341 
00342                 _mm_interleave_ps(v_dx2_0, v_dx2_1, v_dxy_0, v_dxy_1, v_dy2_0, v_dy2_1);
00343 
00344                 _mm_storeu_ps(cov_data + j * 3, v_dx2_0);
00345                 _mm_storeu_ps(cov_data + j * 3 + 4, v_dx2_1);
00346                 _mm_storeu_ps(cov_data + j * 3 + 8, v_dxy_0);
00347                 _mm_storeu_ps(cov_data + j * 3 + 12, v_dxy_1);
00348                 _mm_storeu_ps(cov_data + j * 3 + 16, v_dy2_0);
00349                 _mm_storeu_ps(cov_data + j * 3 + 20, v_dy2_1);
00350             }
00351         }
00352         #endif
00353 
00354         for( ; j < size.width; j++ )
00355         {
00356             float dx = dxdata[j];
00357             float dy = dydata[j];
00358 
00359             cov_data[j*3] = dx*dx;
00360             cov_data[j*3+1] = dx*dy;
00361             cov_data[j*3+2] = dy*dy;
00362         }
00363     }
00364 
00365     boxFilter(cov, cov, cov.depth(), Size(block_size, block_size),
00366         Point(-1,-1), false, borderType );
00367 
00368     if( op_type == MINEIGENVAL )
00369         calcMinEigenVal( cov, eigenv );
00370     else if( op_type == HARRIS )
00371         calcHarris( cov, eigenv, k );
00372     else if( op_type == EIGENVALSVECS )
00373         calcEigenValsVecs( cov, eigenv );
00374 }
00375 
00376 #ifdef HAVE_OPENCL
00377 
00378 static bool extractCovData(InputArray _src, UMat & Dx, UMat & Dy, int depth,
00379                            float scale, int aperture_size, int borderType)
00380 {
00381     UMat src = _src.getUMat();
00382 
00383     Size wholeSize;
00384     Point ofs;
00385     src.locateROI(wholeSize, ofs);
00386 
00387     const int sobel_lsz = 16;
00388     if ((aperture_size == 3 || aperture_size == 5 || aperture_size == 7 || aperture_size == -1) &&
00389         wholeSize.height > sobel_lsz + (aperture_size >> 1) &&
00390         wholeSize.width > sobel_lsz + (aperture_size >> 1))
00391     {
00392         CV_Assert(depth == CV_8U || depth == CV_32F);
00393 
00394         Dx.create(src.size(), CV_32FC1);
00395         Dy.create(src.size(), CV_32FC1);
00396 
00397         size_t localsize[2] = { (size_t)sobel_lsz, (size_t)sobel_lsz };
00398         size_t globalsize[2] = { localsize[0] * (1 + (src.cols - 1) / localsize[0]),
00399                                  localsize[1] * (1 + (src.rows - 1) / localsize[1]) };
00400 
00401         int src_offset_x = (int)((src.offset % src.step) / src.elemSize());
00402         int src_offset_y = (int)(src.offset / src.step);
00403 
00404         const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT",
00405                                              "BORDER_WRAP", "BORDER_REFLECT101" };
00406 
00407         ocl::Kernel k(format("sobel%d", aperture_size).c_str(), ocl::imgproc::covardata_oclsrc,
00408                       cv::format("-D BLK_X=%d -D BLK_Y=%d -D %s -D SRCTYPE=%s%s",
00409                                  (int)localsize[0], (int)localsize[1], borderTypes[borderType], ocl::typeToStr(depth),
00410                                  aperture_size < 0 ? " -D SCHARR" : ""));
00411         if (k.empty())
00412             return false;
00413 
00414         k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, src_offset_x, src_offset_y,
00415                ocl::KernelArg::WriteOnlyNoSize(Dx), ocl::KernelArg::WriteOnly(Dy),
00416                wholeSize.height, wholeSize.width, scale);
00417 
00418         return k.run(2, globalsize, localsize, false);
00419     }
00420     else
00421     {
00422         if (aperture_size > 0)
00423         {
00424             Sobel(_src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType);
00425             Sobel(_src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType);
00426         }
00427         else
00428         {
00429             Scharr(_src, Dx, CV_32F, 1, 0, scale, 0, borderType);
00430             Scharr(_src, Dy, CV_32F, 0, 1, scale, 0, borderType);
00431         }
00432     }
00433 
00434     return true;
00435 }
00436 
00437 static bool ocl_cornerMinEigenValVecs(InputArray _src, OutputArray _dst, int block_size,
00438                                       int aperture_size, double k, int borderType, int op_type)
00439 {
00440     CV_Assert(op_type == HARRIS || op_type == MINEIGENVAL);
00441 
00442     if ( !(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE ||
00443            borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101) )
00444         return false;
00445 
00446     int type = _src.type(), depth = CV_MAT_DEPTH(type);
00447     if ( !(type == CV_8UC1 || type == CV_32FC1) )
00448         return false;
00449 
00450     const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT",
00451                                          "BORDER_WRAP", "BORDER_REFLECT101" };
00452     const char * const cornerType[] = { "CORNER_MINEIGENVAL", "CORNER_HARRIS", 0 };
00453 
00454 
00455     double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size;
00456     if (aperture_size < 0)
00457         scale *= 2.0;
00458     if (depth == CV_8U)
00459         scale *= 255.0;
00460     scale = 1.0 / scale;
00461 
00462     UMat Dx, Dy;
00463     if (!extractCovData(_src, Dx, Dy, depth, (float)scale, aperture_size, borderType))
00464         return false;
00465 
00466     ocl::Kernel cornelKernel("corner", ocl::imgproc::corner_oclsrc,
00467                              format("-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s -D %s",
00468                                     block_size / 2, block_size / 2, block_size, block_size,
00469                                     borderTypes[borderType], cornerType[op_type]));
00470     if (cornelKernel.empty())
00471         return false;
00472 
00473     _dst.createSameSize(_src, CV_32FC1);
00474     UMat dst = _dst.getUMat();
00475 
00476     cornelKernel.args(ocl::KernelArg::ReadOnly(Dx), ocl::KernelArg::ReadOnly(Dy),
00477                       ocl::KernelArg::WriteOnly(dst), (float)k);
00478 
00479     size_t blockSizeX = 256, blockSizeY = 1;
00480     size_t gSize = blockSizeX - block_size / 2 * 2;
00481     size_t globalSizeX = (Dx.cols) % gSize == 0 ? Dx.cols / gSize * blockSizeX : (Dx.cols / gSize + 1) * blockSizeX;
00482     size_t rows_per_thread = 2;
00483     size_t globalSizeY = ((Dx.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
00484                          ((Dx.rows + rows_per_thread - 1) / rows_per_thread) :
00485                          (((Dx.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
00486 
00487     size_t globalsize[2] = { globalSizeX, globalSizeY }, localsize[2] = { blockSizeX, blockSizeY };
00488     return cornelKernel.run(2, globalsize, localsize, false);
00489 }
00490 
00491 static bool ocl_preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int borderType, int depth )
00492 {
00493     UMat Dx, Dy, D2x, D2y, Dxy;
00494 
00495     if (!extractCovData(_src, Dx, Dy, depth, 1, ksize, borderType))
00496         return false;
00497 
00498     Sobel( _src, D2x, CV_32F, 2, 0, ksize, 1, 0, borderType );
00499     Sobel( _src, D2y, CV_32F, 0, 2, ksize, 1, 0, borderType );
00500     Sobel( _src, Dxy, CV_32F, 1, 1, ksize, 1, 0, borderType );
00501 
00502     _dst.create( _src.size(), CV_32FC1 );
00503     UMat dst = _dst.getUMat();
00504 
00505     double factor = 1 << (ksize - 1);
00506     if( depth == CV_8U )
00507         factor *= 255;
00508     factor = 1./(factor * factor * factor);
00509 
00510     ocl::Kernel k("preCornerDetect", ocl::imgproc::precornerdetect_oclsrc);
00511     if (k.empty())
00512         return false;
00513 
00514     k.args(ocl::KernelArg::ReadOnlyNoSize(Dx), ocl::KernelArg::ReadOnlyNoSize(Dy),
00515            ocl::KernelArg::ReadOnlyNoSize(D2x), ocl::KernelArg::ReadOnlyNoSize(D2y),
00516            ocl::KernelArg::ReadOnlyNoSize(Dxy), ocl::KernelArg::WriteOnly(dst), (float)factor);
00517 
00518     size_t globalsize[2] = { (size_t)dst.cols, (size_t)dst.rows };
00519     return k.run(2, globalsize, NULL, false);
00520 }
00521 
00522 #endif
00523 
00524 }
00525 
00526 #if defined(HAVE_IPP)
00527 namespace cv
00528 {
00529 static bool ipp_cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType )
00530 {
00531 #if IPP_VERSION_X100 >= 800
00532     Mat src = _src.getMat();
00533     _dst.create( src.size(), CV_32FC1 );
00534     Mat dst = _dst.getMat();
00535 
00536     {
00537         typedef IppStatus (CV_STDCALL * ippiMinEigenValGetBufferSize)(IppiSize, int, int, int*);
00538         typedef IppStatus (CV_STDCALL * ippiMinEigenVal)(const void*, int, Ipp32f*, int, IppiSize, IppiKernelType, int, int, Ipp8u*);
00539         IppiKernelType kerType;
00540         int kerSize = ksize;
00541         if (ksize < 0)
00542         {
00543             kerType = ippKernelScharr;
00544             kerSize = 3;
00545         } else
00546         {
00547             kerType = ippKernelSobel;
00548         }
00549         bool isolated = (borderType & BORDER_ISOLATED) != 0;
00550         int borderTypeNI = borderType & ~BORDER_ISOLATED;
00551         if ((borderTypeNI == BORDER_REPLICATE && (!src.isSubmatrix() || isolated)) &&
00552             (kerSize == 3 || kerSize == 5) && (blockSize == 3 || blockSize == 5))
00553         {
00554             ippiMinEigenValGetBufferSize getBufferSizeFunc = 0;
00555             ippiMinEigenVal minEigenValFunc = 0;
00556             float norm_coef = 0.f;
00557 
00558             if (src.type() == CV_8UC1)
00559             {
00560                 getBufferSizeFunc = (ippiMinEigenValGetBufferSize) ippiMinEigenValGetBufferSize_8u32f_C1R;
00561                 minEigenValFunc = (ippiMinEigenVal) ippiMinEigenVal_8u32f_C1R;
00562                 norm_coef = 1.f / 255.f;
00563             } else if (src.type() == CV_32FC1)
00564             {
00565                 getBufferSizeFunc = (ippiMinEigenValGetBufferSize) ippiMinEigenValGetBufferSize_32f_C1R;
00566                 minEigenValFunc = (ippiMinEigenVal) ippiMinEigenVal_32f_C1R;
00567                 norm_coef = 255.f;
00568             }
00569             norm_coef = kerType == ippKernelSobel ? norm_coef : norm_coef / 2.45f;
00570 
00571             if (getBufferSizeFunc && minEigenValFunc)
00572             {
00573                 int bufferSize;
00574                 IppiSize srcRoi = { src.cols, src.rows };
00575                 IppStatus ok = getBufferSizeFunc(srcRoi, kerSize, blockSize, &bufferSize);
00576                 if (ok >= 0)
00577                 {
00578                     AutoBuffer<uchar> buffer(bufferSize);
00579                     ok = minEigenValFunc(src.ptr(), (int) src.step, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi, kerType, kerSize, blockSize, buffer);
00580                     CV_SUPPRESS_DEPRECATED_START
00581                     if (ok >= 0) ok = ippiMulC_32f_C1IR(norm_coef, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi);
00582                     CV_SUPPRESS_DEPRECATED_END
00583                     if (ok >= 0)
00584                     {
00585                         CV_IMPL_ADD(CV_IMPL_IPP);
00586                         return true;
00587                     }
00588                 }
00589             }
00590         }
00591     }
00592 #else
00593     CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(blockSize); CV_UNUSED(borderType);
00594 #endif
00595     return false;
00596 }
00597 }
00598 #endif
00599 
00600 void cv::cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType )
00601 {
00602 #ifdef HAVE_OPENCL
00603     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
00604                ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, 0.0, borderType, MINEIGENVAL))
00605 #endif
00606 
00607 #ifdef HAVE_IPP
00608     int kerSize = (ksize < 0)?3:ksize;
00609     bool isolated = (borderType & BORDER_ISOLATED) != 0;
00610     int borderTypeNI = borderType & ~BORDER_ISOLATED;
00611 #endif
00612     CV_IPP_RUN(((borderTypeNI == BORDER_REPLICATE && (!_src.isSubmatrix() || isolated)) &&
00613             (kerSize == 3 || kerSize == 5) && (blockSize == 3 || blockSize == 5)) && IPP_VERSION_X100 >= 800,
00614     ipp_cornerMinEigenVal( _src, _dst, blockSize, ksize, borderType ));
00615 
00616 
00617     Mat src = _src.getMat();
00618     _dst.create( src.size(), CV_32FC1 );
00619     Mat dst = _dst.getMat();
00620 
00621     cornerEigenValsVecs( src, dst, blockSize, ksize, MINEIGENVAL, 0, borderType );
00622 }
00623 
00624 
00625 #if defined(HAVE_IPP)
00626 namespace cv
00627 {
00628 static bool ipp_cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksize, double k, int borderType )
00629 {
00630 #if IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK
00631     Mat src = _src.getMat();
00632     _dst.create( src.size(), CV_32FC1 );
00633     Mat dst = _dst.getMat();
00634 
00635     {
00636         int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
00637         int borderTypeNI = borderType & ~BORDER_ISOLATED;
00638         bool isolated = (borderType & BORDER_ISOLATED) != 0;
00639 
00640         if ( (ksize == 3 || ksize == 5) && (type == CV_8UC1 || type == CV_32FC1) &&
00641             (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE) && cn == 1 && (!src.isSubmatrix() || isolated) )
00642         {
00643             IppiSize roisize = { src.cols, src.rows };
00644             IppiMaskSize masksize = ksize == 5 ? ippMskSize5x5 : ippMskSize3x3;
00645             IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp32f;
00646             Ipp32s bufsize = 0;
00647 
00648             double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
00649             if (ksize < 0)
00650                 scale *= 2.0;
00651             if (depth == CV_8U)
00652                 scale *= 255.0;
00653             scale = std::pow(scale, -4.0);
00654 
00655             if (ippiHarrisCornerGetBufferSize(roisize, masksize, blockSize, datatype, cn, &bufsize) >= 0)
00656             {
00657                 Ipp8u * buffer = ippsMalloc_8u(bufsize);
00658                 IppiDifferentialKernel filterType = ksize > 0 ? ippFilterSobel : ippFilterScharr;
00659                 IppiBorderType borderTypeIpp = borderTypeNI == BORDER_CONSTANT ? ippBorderConst : ippBorderRepl;
00660                 IppStatus status = (IppStatus)-1;
00661 
00662                 if (depth == CV_8U)
00663                     status = ippiHarrisCorner_8u32f_C1R((const Ipp8u *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
00664                                                         filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer);
00665                 else if (depth == CV_32F)
00666                     status = ippiHarrisCorner_32f_C1R((const Ipp32f *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
00667                                                       filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer);
00668                 ippsFree(buffer);
00669 
00670                 if (status >= 0)
00671                 {
00672                     CV_IMPL_ADD(CV_IMPL_IPP);
00673                     return true;
00674                 }
00675             }
00676         }
00677     }
00678 #else
00679     CV_UNUSED(_src); CV_UNUSED(_dst); CV_UNUSED(blockSize);  CV_UNUSED(ksize); CV_UNUSED(k); CV_UNUSED(borderType);
00680 #endif
00681     return false;
00682 }
00683 }
00684 #endif
00685 
00686 void cv::cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksize, double k, int borderType )
00687 {
00688 #ifdef HAVE_OPENCL
00689     CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
00690                ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, k, borderType, HARRIS))
00691 #endif
00692 
00693 #ifdef HAVE_IPP
00694     int borderTypeNI = borderType & ~BORDER_ISOLATED;
00695     bool isolated = (borderType & BORDER_ISOLATED) != 0;
00696 #endif
00697     CV_IPP_RUN(((ksize == 3 || ksize == 5) && (_src.type() == CV_8UC1 || _src.type() == CV_32FC1) &&
00698         (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE) && CV_MAT_CN(_src.type()) == 1 &&
00699         (!_src.isSubmatrix() || isolated)) && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK, ipp_cornerHarris( _src, _dst, blockSize, ksize, k, borderType ));
00700 
00701 
00702     Mat src = _src.getMat();
00703     _dst.create( src.size(), CV_32FC1 );
00704     Mat dst = _dst.getMat();
00705 
00706 
00707     cornerEigenValsVecs( src, dst, blockSize, ksize, HARRIS, k, borderType );
00708 }
00709 
00710 
00711 void cv::cornerEigenValsAndVecs( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType )
00712 {
00713     Mat src = _src.getMat();
00714     Size dsz = _dst.size();
00715     int dtype = _dst.type();
00716 
00717     if( dsz.height != src.rows || dsz.width*CV_MAT_CN(dtype) != src.cols*6 || CV_MAT_DEPTH(dtype) != CV_32F )
00718         _dst.create( src.size(), CV_32FC(6) );
00719     Mat dst = _dst.getMat();
00720     cornerEigenValsVecs( src, dst, blockSize, ksize, EIGENVALSVECS, 0, borderType );
00721 }
00722 
00723 
00724 void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int borderType )
00725 {
00726     int type = _src.type();
00727     CV_Assert( type == CV_8UC1 || type == CV_32FC1 );
00728 
00729 #ifdef HAVE_OPENCL
00730     CV_OCL_RUN( _src.dims() <= 2 && _dst.isUMat(),
00731                 ocl_preCornerDetect(_src, _dst, ksize, borderType, CV_MAT_DEPTH(type)))
00732 #endif
00733 
00734     Mat Dx, Dy, D2x, D2y, Dxy, src = _src.getMat();
00735     _dst.create( src.size(), CV_32FC1 );
00736     Mat dst = _dst.getMat();
00737 
00738     Sobel( src, Dx, CV_32F, 1, 0, ksize, 1, 0, borderType );
00739     Sobel( src, Dy, CV_32F, 0, 1, ksize, 1, 0, borderType );
00740     Sobel( src, D2x, CV_32F, 2, 0, ksize, 1, 0, borderType );
00741     Sobel( src, D2y, CV_32F, 0, 2, ksize, 1, 0, borderType );
00742     Sobel( src, Dxy, CV_32F, 1, 1, ksize, 1, 0, borderType );
00743 
00744     double factor = 1 << (ksize - 1);
00745     if( src.depth() == CV_8U )
00746         factor *= 255;
00747     factor = 1./(factor * factor * factor);
00748 #if CV_NEON || CV_SSE2
00749     float factor_f = (float)factor;
00750 #endif
00751 
00752 #if CV_SSE2
00753     volatile bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
00754     __m128 v_factor = _mm_set1_ps(factor_f), v_m2 = _mm_set1_ps(-2.0f);
00755 #endif
00756 
00757     Size size = src.size();
00758     int i, j;
00759     for( i = 0; i < size.height; i++ )
00760     {
00761         float* dstdata = dst.ptr<float>(i);
00762         const float* dxdata = Dx.ptr<float>(i);
00763         const float* dydata = Dy.ptr<float>(i);
00764         const float* d2xdata = D2x.ptr<float>(i);
00765         const float* d2ydata = D2y.ptr<float>(i);
00766         const float* dxydata = Dxy.ptr<float>(i);
00767 
00768         j = 0;
00769 
00770 #if CV_SSE2
00771         if (haveSSE2)
00772         {
00773             for( ; j <= size.width - 4; j += 4 )
00774             {
00775                 __m128 v_dx = _mm_loadu_ps((const float *)(dxdata + j));
00776                 __m128 v_dy = _mm_loadu_ps((const float *)(dydata + j));
00777 
00778                 __m128 v_s1 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dx), _mm_loadu_ps((const float *)(d2ydata + j)));
00779                 __m128 v_s2 = _mm_mul_ps(_mm_mul_ps(v_dy, v_dy), _mm_loadu_ps((const float *)(d2xdata + j)));
00780                 __m128 v_s3 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dy), _mm_loadu_ps((const float *)(dxydata + j)));
00781                 v_s1 = _mm_mul_ps(v_factor, _mm_add_ps(v_s1, _mm_add_ps(v_s2, _mm_mul_ps(v_s3, v_m2))));
00782                 _mm_storeu_ps(dstdata + j, v_s1);
00783             }
00784         }
00785 #elif CV_NEON
00786         for( ; j <= size.width - 4; j += 4 )
00787         {
00788             float32x4_t v_dx = vld1q_f32(dxdata + j), v_dy = vld1q_f32(dydata + j);
00789             float32x4_t v_s = vmulq_f32(v_dx, vmulq_f32(v_dx, vld1q_f32(d2ydata + j)));
00790             v_s = vmlaq_f32(v_s, vld1q_f32(d2xdata + j), vmulq_f32(v_dy, v_dy));
00791             v_s = vmlaq_f32(v_s, vld1q_f32(dxydata + j), vmulq_n_f32(vmulq_f32(v_dy, v_dx), -2));
00792             vst1q_f32(dstdata + j, vmulq_n_f32(v_s, factor_f));
00793         }
00794 #endif
00795 
00796         for( ; j < size.width; j++ )
00797         {
00798             float dx = dxdata[j];
00799             float dy = dydata[j];
00800             dstdata[j] = (float)(factor*(dx*dx*d2ydata[j] + dy*dy*d2xdata[j] - 2*dx*dy*dxydata[j]));
00801         }
00802     }
00803 }
00804 
00805 CV_IMPL void
00806 cvCornerMinEigenVal( const CvArr* srcarr, CvArr* dstarr,
00807                      int block_size, int aperture_size )
00808 {
00809     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
00810 
00811     CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 );
00812     cv::cornerMinEigenVal( src, dst, block_size, aperture_size, cv::BORDER_REPLICATE );
00813 }
00814 
00815 CV_IMPL void
00816 cvCornerHarris( const CvArr* srcarr, CvArr* dstarr,
00817                 int block_size, int aperture_size, double k )
00818 {
00819     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
00820 
00821     CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 );
00822     cv::cornerHarris( src, dst, block_size, aperture_size, k, cv::BORDER_REPLICATE );
00823 }
00824 
00825 
00826 CV_IMPL void
00827 cvCornerEigenValsAndVecs( const void* srcarr, void* dstarr,
00828                           int block_size, int aperture_size )
00829 {
00830     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
00831 
00832     CV_Assert( src.rows == dst.rows && src.cols*6 == dst.cols*dst.channels() && dst.depth() == CV_32F );
00833     cv::cornerEigenValsAndVecs( src, dst, block_size, aperture_size, cv::BORDER_REPLICATE );
00834 }
00835 
00836 
00837 CV_IMPL void
00838 cvPreCornerDetect( const void* srcarr, void* dstarr, int aperture_size )
00839 {
00840     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
00841 
00842     CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 );
00843     cv::preCornerDetect( src, dst, aperture_size, cv::BORDER_REPLICATE );
00844 }
00845 
00846 /* End of file */
00847