Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
lapack.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Third party copyrights are property of their respective owners. 00016 // 00017 // Redistribution and use in source and binary forms, with or without modification, 00018 // are permitted provided that the following conditions are met: 00019 // 00020 // * Redistribution's of source code must retain the above copyright notice, 00021 // this list of conditions and the following disclaimer. 00022 // 00023 // * Redistribution's in binary form must reproduce the above copyright notice, 00024 // this list of conditions and the following disclaimer in the documentation 00025 // and/or other materials provided with the distribution. 00026 // 00027 // * The name of the copyright holders may not be used to endorse or promote products 00028 // derived from this software without specific prior written permission. 00029 // 00030 // This software is provided by the copyright holders and contributors "as is" and 00031 // any express or implied warranties, including, but not limited to, the implied 00032 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00033 // In no event shall the Intel Corporation or contributors be liable for any direct, 00034 // indirect, incidental, special, exemplary, or consequential damages 00035 // (including, but not limited to, procurement of substitute goods or services; 00036 // loss of use, data, or profits; or business interruption) however caused 00037 // and on any theory of liability, whether in contract, strict liability, 00038 // or tort (including negligence or otherwise) arising in any way out of 00039 // the use of this software, even if advised of the possibility of such damage. 00040 // 00041 //M*/ 00042 00043 #include "precomp.hpp" 00044 #include <limits> 00045 00046 #if defined _M_IX86 && defined _MSC_VER && _MSC_VER < 1700 00047 #pragma float_control(precise, on) 00048 #endif 00049 00050 namespace cv 00051 { 00052 00053 int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n) 00054 { 00055 return hal::LU32f(A, astep, m, b, bstep, n); 00056 } 00057 00058 int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n) 00059 { 00060 return hal::LU64f(A, astep, m, b, bstep, n); 00061 } 00062 00063 bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n) 00064 { 00065 return hal::Cholesky32f(A, astep, m, b, bstep, n); 00066 } 00067 00068 bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n) 00069 { 00070 return hal::Cholesky64f(A, astep, m, b, bstep, n); 00071 } 00072 00073 template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b) 00074 { 00075 a = std::abs(a); 00076 b = std::abs(b); 00077 if( a > b ) 00078 { 00079 b /= a; 00080 return a*std::sqrt(1 + b*b); 00081 } 00082 if( b > 0 ) 00083 { 00084 a /= b; 00085 return b*std::sqrt(1 + a*a); 00086 } 00087 return 0; 00088 } 00089 00090 00091 template<typename _Tp> bool 00092 JacobiImpl_( _Tp* A, size_t astep, _Tp* W, _Tp* V, size_t vstep, int n, uchar* buf ) 00093 { 00094 const _Tp eps = std::numeric_limits<_Tp>::epsilon(); 00095 int i, j, k, m; 00096 00097 astep /= sizeof(A[0]); 00098 if( V ) 00099 { 00100 vstep /= sizeof(V[0]); 00101 for( i = 0; i < n; i++ ) 00102 { 00103 for( j = 0; j < n; j++ ) 00104 V[i*vstep + j] = (_Tp)0; 00105 V[i*vstep + i] = (_Tp)1; 00106 } 00107 } 00108 00109 int iters, maxIters = n*n*30; 00110 00111 int* indR = (int*)alignPtr(buf, sizeof(int)); 00112 int* indC = indR + n; 00113 _Tp mv = (_Tp)0; 00114 00115 for( k = 0; k < n; k++ ) 00116 { 00117 W[k] = A[(astep + 1)*k]; 00118 if( k < n - 1 ) 00119 { 00120 for( m = k+1, mv = std::abs(A[astep*k + m]), i = k+2; i < n; i++ ) 00121 { 00122 _Tp val = std::abs(A[astep*k+i]); 00123 if( mv < val ) 00124 mv = val, m = i; 00125 } 00126 indR[k] = m; 00127 } 00128 if( k > 0 ) 00129 { 00130 for( m = 0, mv = std::abs(A[k]), i = 1; i < k; i++ ) 00131 { 00132 _Tp val = std::abs(A[astep*i+k]); 00133 if( mv < val ) 00134 mv = val, m = i; 00135 } 00136 indC[k] = m; 00137 } 00138 } 00139 00140 if( n > 1 ) for( iters = 0; iters < maxIters; iters++ ) 00141 { 00142 // find index (k,l) of pivot p 00143 for( k = 0, mv = std::abs(A[indR[0]]), i = 1; i < n-1; i++ ) 00144 { 00145 _Tp val = std::abs(A[astep*i + indR[i]]); 00146 if( mv < val ) 00147 mv = val, k = i; 00148 } 00149 int l = indR[k]; 00150 for( i = 1; i < n; i++ ) 00151 { 00152 _Tp val = std::abs(A[astep*indC[i] + i]); 00153 if( mv < val ) 00154 mv = val, k = indC[i], l = i; 00155 } 00156 00157 _Tp p = A[astep*k + l]; 00158 if( std::abs(p) <= eps ) 00159 break; 00160 _Tp y = (_Tp)((W[l] - W[k])*0.5); 00161 _Tp t = std::abs(y) + hypot(p, y); 00162 _Tp s = hypot(p, t); 00163 _Tp c = t/s; 00164 s = p/s; t = (p/t)*p; 00165 if( y < 0 ) 00166 s = -s, t = -t; 00167 A[astep*k + l] = 0; 00168 00169 W[k] -= t; 00170 W[l] += t; 00171 00172 _Tp a0, b0; 00173 00174 #undef rotate 00175 #define rotate(v0, v1) a0 = v0, b0 = v1, v0 = a0*c - b0*s, v1 = a0*s + b0*c 00176 00177 // rotate rows and columns k and l 00178 for( i = 0; i < k; i++ ) 00179 rotate(A[astep*i+k], A[astep*i+l]); 00180 for( i = k+1; i < l; i++ ) 00181 rotate(A[astep*k+i], A[astep*i+l]); 00182 for( i = l+1; i < n; i++ ) 00183 rotate(A[astep*k+i], A[astep*l+i]); 00184 00185 // rotate eigenvectors 00186 if( V ) 00187 for( i = 0; i < n; i++ ) 00188 rotate(V[vstep*k+i], V[vstep*l+i]); 00189 00190 #undef rotate 00191 00192 for( j = 0; j < 2; j++ ) 00193 { 00194 int idx = j == 0 ? k : l; 00195 if( idx < n - 1 ) 00196 { 00197 for( m = idx+1, mv = std::abs(A[astep*idx + m]), i = idx+2; i < n; i++ ) 00198 { 00199 _Tp val = std::abs(A[astep*idx+i]); 00200 if( mv < val ) 00201 mv = val, m = i; 00202 } 00203 indR[idx] = m; 00204 } 00205 if( idx > 0 ) 00206 { 00207 for( m = 0, mv = std::abs(A[idx]), i = 1; i < idx; i++ ) 00208 { 00209 _Tp val = std::abs(A[astep*i+idx]); 00210 if( mv < val ) 00211 mv = val, m = i; 00212 } 00213 indC[idx] = m; 00214 } 00215 } 00216 } 00217 00218 // sort eigenvalues & eigenvectors 00219 for( k = 0; k < n-1; k++ ) 00220 { 00221 m = k; 00222 for( i = k+1; i < n; i++ ) 00223 { 00224 if( W[m] < W[i] ) 00225 m = i; 00226 } 00227 if( k != m ) 00228 { 00229 std::swap(W[m], W[k]); 00230 if( V ) 00231 for( i = 0; i < n; i++ ) 00232 std::swap(V[vstep*m + i], V[vstep*k + i]); 00233 } 00234 } 00235 00236 return true; 00237 } 00238 00239 static bool Jacobi( float* S, size_t sstep, float* e, float* E, size_t estep, int n, uchar* buf ) 00240 { 00241 return JacobiImpl_(S, sstep, e, E, estep, n, buf); 00242 } 00243 00244 static bool Jacobi( double* S, size_t sstep, double* e, double* E, size_t estep, int n, uchar* buf ) 00245 { 00246 return JacobiImpl_(S, sstep, e, E, estep, n, buf); 00247 } 00248 00249 00250 template<typename T> struct VBLAS 00251 { 00252 int dot(const T*, const T*, int, T*) const { return 0; } 00253 int givens(T*, T*, int, T, T) const { return 0; } 00254 int givensx(T*, T*, int, T, T, T*, T*) const { return 0; } 00255 }; 00256 00257 #if CV_SSE2 00258 template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const 00259 { 00260 if( n < 8 ) 00261 return 0; 00262 int k = 0; 00263 __m128 s0 = _mm_setzero_ps(), s1 = _mm_setzero_ps(); 00264 for( ; k <= n - 8; k += 8 ) 00265 { 00266 __m128 a0 = _mm_load_ps(a + k), a1 = _mm_load_ps(a + k + 4); 00267 __m128 b0 = _mm_load_ps(b + k), b1 = _mm_load_ps(b + k + 4); 00268 00269 s0 = _mm_add_ps(s0, _mm_mul_ps(a0, b0)); 00270 s1 = _mm_add_ps(s1, _mm_mul_ps(a1, b1)); 00271 } 00272 s0 = _mm_add_ps(s0, s1); 00273 float sbuf[4]; 00274 _mm_storeu_ps(sbuf, s0); 00275 *result = sbuf[0] + sbuf[1] + sbuf[2] + sbuf[3]; 00276 return k; 00277 } 00278 00279 00280 template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const 00281 { 00282 if( n < 4 ) 00283 return 0; 00284 int k = 0; 00285 __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s); 00286 for( ; k <= n - 4; k += 4 ) 00287 { 00288 __m128 a0 = _mm_load_ps(a + k); 00289 __m128 b0 = _mm_load_ps(b + k); 00290 __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4)); 00291 __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4)); 00292 _mm_store_ps(a + k, t0); 00293 _mm_store_ps(b + k, t1); 00294 } 00295 return k; 00296 } 00297 00298 00299 template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c, float s, 00300 float* anorm, float* bnorm) const 00301 { 00302 if( n < 4 ) 00303 return 0; 00304 int k = 0; 00305 __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s); 00306 __m128 sa = _mm_setzero_ps(), sb = _mm_setzero_ps(); 00307 for( ; k <= n - 4; k += 4 ) 00308 { 00309 __m128 a0 = _mm_load_ps(a + k); 00310 __m128 b0 = _mm_load_ps(b + k); 00311 __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4)); 00312 __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4)); 00313 _mm_store_ps(a + k, t0); 00314 _mm_store_ps(b + k, t1); 00315 sa = _mm_add_ps(sa, _mm_mul_ps(t0, t0)); 00316 sb = _mm_add_ps(sb, _mm_mul_ps(t1, t1)); 00317 } 00318 float abuf[4], bbuf[4]; 00319 _mm_storeu_ps(abuf, sa); 00320 _mm_storeu_ps(bbuf, sb); 00321 *anorm = abuf[0] + abuf[1] + abuf[2] + abuf[3]; 00322 *bnorm = bbuf[0] + bbuf[1] + bbuf[2] + bbuf[3]; 00323 return k; 00324 } 00325 00326 00327 template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const 00328 { 00329 if( n < 4 ) 00330 return 0; 00331 int k = 0; 00332 __m128d s0 = _mm_setzero_pd(), s1 = _mm_setzero_pd(); 00333 for( ; k <= n - 4; k += 4 ) 00334 { 00335 __m128d a0 = _mm_load_pd(a + k), a1 = _mm_load_pd(a + k + 2); 00336 __m128d b0 = _mm_load_pd(b + k), b1 = _mm_load_pd(b + k + 2); 00337 00338 s0 = _mm_add_pd(s0, _mm_mul_pd(a0, b0)); 00339 s1 = _mm_add_pd(s1, _mm_mul_pd(a1, b1)); 00340 } 00341 s0 = _mm_add_pd(s0, s1); 00342 double sbuf[2]; 00343 _mm_storeu_pd(sbuf, s0); 00344 *result = sbuf[0] + sbuf[1]; 00345 return k; 00346 } 00347 00348 00349 template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double c, double s) const 00350 { 00351 int k = 0; 00352 __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s); 00353 for( ; k <= n - 2; k += 2 ) 00354 { 00355 __m128d a0 = _mm_load_pd(a + k); 00356 __m128d b0 = _mm_load_pd(b + k); 00357 __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2)); 00358 __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2)); 00359 _mm_store_pd(a + k, t0); 00360 _mm_store_pd(b + k, t1); 00361 } 00362 return k; 00363 } 00364 00365 00366 template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double c, double s, 00367 double* anorm, double* bnorm) const 00368 { 00369 int k = 0; 00370 __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s); 00371 __m128d sa = _mm_setzero_pd(), sb = _mm_setzero_pd(); 00372 for( ; k <= n - 2; k += 2 ) 00373 { 00374 __m128d a0 = _mm_load_pd(a + k); 00375 __m128d b0 = _mm_load_pd(b + k); 00376 __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2)); 00377 __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2)); 00378 _mm_store_pd(a + k, t0); 00379 _mm_store_pd(b + k, t1); 00380 sa = _mm_add_pd(sa, _mm_mul_pd(t0, t0)); 00381 sb = _mm_add_pd(sb, _mm_mul_pd(t1, t1)); 00382 } 00383 double abuf[2], bbuf[2]; 00384 _mm_storeu_pd(abuf, sa); 00385 _mm_storeu_pd(bbuf, sb); 00386 *anorm = abuf[0] + abuf[1]; 00387 *bnorm = bbuf[0] + bbuf[1]; 00388 return k; 00389 } 00390 #endif 00391 00392 template<typename _Tp> void 00393 JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep, 00394 int m, int n, int n1, double minval, _Tp eps) 00395 { 00396 VBLAS<_Tp> vblas; 00397 AutoBuffer<double> Wbuf(n); 00398 double* W = Wbuf; 00399 int i, j, k, iter, max_iter = std::max(m, 30); 00400 _Tp c, s; 00401 double sd; 00402 astep /= sizeof(At[0]); 00403 vstep /= sizeof(Vt[0]); 00404 00405 for( i = 0; i < n; i++ ) 00406 { 00407 for( k = 0, sd = 0; k < m; k++ ) 00408 { 00409 _Tp t = At[i*astep + k]; 00410 sd += (double)t*t; 00411 } 00412 W[i] = sd; 00413 00414 if( Vt ) 00415 { 00416 for( k = 0; k < n; k++ ) 00417 Vt[i*vstep + k] = 0; 00418 Vt[i*vstep + i] = 1; 00419 } 00420 } 00421 00422 for( iter = 0; iter < max_iter; iter++ ) 00423 { 00424 bool changed = false; 00425 00426 for( i = 0; i < n-1; i++ ) 00427 for( j = i+1; j < n; j++ ) 00428 { 00429 _Tp *Ai = At + i*astep, *Aj = At + j*astep; 00430 double a = W[i], p = 0, b = W[j]; 00431 00432 for( k = 0; k < m; k++ ) 00433 p += (double)Ai[k]*Aj[k]; 00434 00435 if( std::abs(p) <= eps*std::sqrt((double)a*b) ) 00436 continue; 00437 00438 p *= 2; 00439 double beta = a - b, gamma = hypot((double)p, beta); 00440 if( beta < 0 ) 00441 { 00442 double delta = (gamma - beta)*0.5; 00443 s = (_Tp)std::sqrt(delta/gamma); 00444 c = (_Tp)(p/(gamma*s*2)); 00445 } 00446 else 00447 { 00448 c = (_Tp)std::sqrt((gamma + beta)/(gamma*2)); 00449 s = (_Tp)(p/(gamma*c*2)); 00450 } 00451 00452 a = b = 0; 00453 for( k = 0; k < m; k++ ) 00454 { 00455 _Tp t0 = c*Ai[k] + s*Aj[k]; 00456 _Tp t1 = -s*Ai[k] + c*Aj[k]; 00457 Ai[k] = t0; Aj[k] = t1; 00458 00459 a += (double)t0*t0; b += (double)t1*t1; 00460 } 00461 W[i] = a; W[j] = b; 00462 00463 changed = true; 00464 00465 if( Vt ) 00466 { 00467 _Tp *Vi = Vt + i*vstep, *Vj = Vt + j*vstep; 00468 k = vblas.givens(Vi, Vj, n, c, s); 00469 00470 for( ; k < n; k++ ) 00471 { 00472 _Tp t0 = c*Vi[k] + s*Vj[k]; 00473 _Tp t1 = -s*Vi[k] + c*Vj[k]; 00474 Vi[k] = t0; Vj[k] = t1; 00475 } 00476 } 00477 } 00478 if( !changed ) 00479 break; 00480 } 00481 00482 for( i = 0; i < n; i++ ) 00483 { 00484 for( k = 0, sd = 0; k < m; k++ ) 00485 { 00486 _Tp t = At[i*astep + k]; 00487 sd += (double)t*t; 00488 } 00489 W[i] = std::sqrt(sd); 00490 } 00491 00492 for( i = 0; i < n-1; i++ ) 00493 { 00494 j = i; 00495 for( k = i+1; k < n; k++ ) 00496 { 00497 if( W[j] < W[k] ) 00498 j = k; 00499 } 00500 if( i != j ) 00501 { 00502 std::swap(W[i], W[j]); 00503 if( Vt ) 00504 { 00505 for( k = 0; k < m; k++ ) 00506 std::swap(At[i*astep + k], At[j*astep + k]); 00507 00508 for( k = 0; k < n; k++ ) 00509 std::swap(Vt[i*vstep + k], Vt[j*vstep + k]); 00510 } 00511 } 00512 } 00513 00514 for( i = 0; i < n; i++ ) 00515 _W[i] = (_Tp)W[i]; 00516 00517 if( !Vt ) 00518 return; 00519 00520 RNG rng(0x12345678); 00521 for( i = 0; i < n1; i++ ) 00522 { 00523 sd = i < n ? W[i] : 0; 00524 00525 for( int ii = 0; ii < 100 && sd <= minval; ii++ ) 00526 { 00527 // if we got a zero singular value, then in order to get the corresponding left singular vector 00528 // we generate a random vector, project it to the previously computed left singular vectors, 00529 // subtract the projection and normalize the difference. 00530 const _Tp val0 = (_Tp)(1./m); 00531 for( k = 0; k < m; k++ ) 00532 { 00533 _Tp val = (rng.next() & 256) != 0 ? val0 : -val0; 00534 At[i*astep + k] = val; 00535 } 00536 for( iter = 0; iter < 2; iter++ ) 00537 { 00538 for( j = 0; j < i; j++ ) 00539 { 00540 sd = 0; 00541 for( k = 0; k < m; k++ ) 00542 sd += At[i*astep + k]*At[j*astep + k]; 00543 _Tp asum = 0; 00544 for( k = 0; k < m; k++ ) 00545 { 00546 _Tp t = (_Tp)(At[i*astep + k] - sd*At[j*astep + k]); 00547 At[i*astep + k] = t; 00548 asum += std::abs(t); 00549 } 00550 asum = asum > eps*100 ? 1/asum : 0; 00551 for( k = 0; k < m; k++ ) 00552 At[i*astep + k] *= asum; 00553 } 00554 } 00555 sd = 0; 00556 for( k = 0; k < m; k++ ) 00557 { 00558 _Tp t = At[i*astep + k]; 00559 sd += (double)t*t; 00560 } 00561 sd = std::sqrt(sd); 00562 } 00563 00564 s = (_Tp)(sd > minval ? 1/sd : 0.); 00565 for( k = 0; k < m; k++ ) 00566 At[i*astep + k] *= s; 00567 } 00568 } 00569 00570 00571 static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1) 00572 { 00573 JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2); 00574 } 00575 00576 static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1) 00577 { 00578 JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10); 00579 } 00580 00581 /* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */ 00582 template<typename T1, typename T2, typename T3> static void 00583 MatrAXPY( int m, int n, const T1* x, int dx, 00584 const T2* a, int inca, T3* y, int dy ) 00585 { 00586 int i; 00587 for( i = 0; i < m; i++, x += dx, y += dy ) 00588 { 00589 T2 s = a[i*inca]; 00590 int j = 0; 00591 #if CV_ENABLE_UNROLLED 00592 for(; j <= n - 4; j += 4 ) 00593 { 00594 T3 t0 = (T3)(y[j] + s*x[j]); 00595 T3 t1 = (T3)(y[j+1] + s*x[j+1]); 00596 y[j] = t0; 00597 y[j+1] = t1; 00598 t0 = (T3)(y[j+2] + s*x[j+2]); 00599 t1 = (T3)(y[j+3] + s*x[j+3]); 00600 y[j+2] = t0; 00601 y[j+3] = t1; 00602 } 00603 #endif 00604 for( ; j < n; j++ ) 00605 y[j] = (T3)(y[j] + s*x[j]); 00606 } 00607 } 00608 00609 template<typename T> static void 00610 SVBkSbImpl_( int m, int n, const T* w, int incw, 00611 const T* u, int ldu, bool uT, 00612 const T* v, int ldv, bool vT, 00613 const T* b, int ldb, int nb, 00614 T* x, int ldx, double* buffer, T eps ) 00615 { 00616 double threshold = 0; 00617 int udelta0 = uT ? ldu : 1, udelta1 = uT ? 1 : ldu; 00618 int vdelta0 = vT ? ldv : 1, vdelta1 = vT ? 1 : ldv; 00619 int i, j, nm = std::min(m, n); 00620 00621 if( !b ) 00622 nb = m; 00623 00624 for( i = 0; i < n; i++ ) 00625 for( j = 0; j < nb; j++ ) 00626 x[i*ldx + j] = 0; 00627 00628 for( i = 0; i < nm; i++ ) 00629 threshold += w[i*incw]; 00630 threshold *= eps; 00631 00632 // v * inv(w) * uT * b 00633 for( i = 0; i < nm; i++, u += udelta0, v += vdelta0 ) 00634 { 00635 double wi = w[i*incw]; 00636 if( (double)std::abs(wi) <= threshold ) 00637 continue; 00638 wi = 1/wi; 00639 00640 if( nb == 1 ) 00641 { 00642 double s = 0; 00643 if( b ) 00644 for( j = 0; j < m; j++ ) 00645 s += u[j*udelta1]*b[j*ldb]; 00646 else 00647 s = u[0]; 00648 s *= wi; 00649 00650 for( j = 0; j < n; j++ ) 00651 x[j*ldx] = (T)(x[j*ldx] + s*v[j*vdelta1]); 00652 } 00653 else 00654 { 00655 if( b ) 00656 { 00657 for( j = 0; j < nb; j++ ) 00658 buffer[j] = 0; 00659 MatrAXPY( m, nb, b, ldb, u, udelta1, buffer, 0 ); 00660 for( j = 0; j < nb; j++ ) 00661 buffer[j] *= wi; 00662 } 00663 else 00664 { 00665 for( j = 0; j < nb; j++ ) 00666 buffer[j] = u[j*udelta1]*wi; 00667 } 00668 MatrAXPY( n, nb, buffer, 0, v, vdelta1, x, ldx ); 00669 } 00670 } 00671 } 00672 00673 static void 00674 SVBkSb( int m, int n, const float* w, size_t wstep, 00675 const float* u, size_t ustep, bool uT, 00676 const float* v, size_t vstep, bool vT, 00677 const float* b, size_t bstep, int nb, 00678 float* x, size_t xstep, uchar* buffer ) 00679 { 00680 SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1, 00681 u, (int)(ustep/sizeof(u[0])), uT, 00682 v, (int)(vstep/sizeof(v[0])), vT, 00683 b, (int)(bstep/sizeof(b[0])), nb, 00684 x, (int)(xstep/sizeof(x[0])), 00685 (double*)alignPtr(buffer, sizeof(double)), (float)(DBL_EPSILON*2) ); 00686 } 00687 00688 static void 00689 SVBkSb( int m, int n, const double* w, size_t wstep, 00690 const double* u, size_t ustep, bool uT, 00691 const double* v, size_t vstep, bool vT, 00692 const double* b, size_t bstep, int nb, 00693 double* x, size_t xstep, uchar* buffer ) 00694 { 00695 SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1, 00696 u, (int)(ustep/sizeof(u[0])), uT, 00697 v, (int)(vstep/sizeof(v[0])), vT, 00698 b, (int)(bstep/sizeof(b[0])), nb, 00699 x, (int)(xstep/sizeof(x[0])), 00700 (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 ); 00701 } 00702 00703 } 00704 00705 /****************************************************************************************\ 00706 * Determinant of the matrix * 00707 \****************************************************************************************/ 00708 00709 #define det2(m) ((double)m(0,0)*m(1,1) - (double)m(0,1)*m(1,0)) 00710 #define det3(m) (m(0,0)*((double)m(1,1)*m(2,2) - (double)m(1,2)*m(2,1)) - \ 00711 m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \ 00712 m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0))) 00713 00714 double cv::determinant( InputArray _mat ) 00715 { 00716 Mat mat = _mat.getMat(); 00717 double result = 0; 00718 int type = mat.type(), rows = mat.rows; 00719 size_t step = mat.step; 00720 const uchar* m = mat.ptr(); 00721 00722 CV_Assert( !mat.empty() ); 00723 CV_Assert( mat.rows == mat.cols && (type == CV_32F || type == CV_64F)); 00724 00725 #define Mf(y, x) ((float*)(m + y*step))[x] 00726 #define Md(y, x) ((double*)(m + y*step))[x] 00727 00728 if( type == CV_32F ) 00729 { 00730 if( rows == 2 ) 00731 result = det2(Mf); 00732 else if( rows == 3 ) 00733 result = det3(Mf); 00734 else if( rows == 1 ) 00735 result = Mf(0,0); 00736 else 00737 { 00738 size_t bufSize = rows*rows*sizeof(float); 00739 AutoBuffer<uchar> buffer(bufSize); 00740 Mat a(rows, rows, CV_32F, (uchar*)buffer); 00741 mat.copyTo(a); 00742 00743 result = hal::LU32f(a.ptr<float>(), a.step, rows, 0, 0, 0); 00744 if( result ) 00745 { 00746 for( int i = 0; i < rows; i++ ) 00747 result *= a.at<float>(i,i); 00748 result = 1./result; 00749 } 00750 } 00751 } 00752 else 00753 { 00754 if( rows == 2 ) 00755 result = det2(Md); 00756 else if( rows == 3 ) 00757 result = det3(Md); 00758 else if( rows == 1 ) 00759 result = Md(0,0); 00760 else 00761 { 00762 size_t bufSize = rows*rows*sizeof(double); 00763 AutoBuffer<uchar> buffer(bufSize); 00764 Mat a(rows, rows, CV_64F, (uchar*)buffer); 00765 mat.copyTo(a); 00766 00767 result = hal::LU64f(a.ptr<double>(), a.step, rows, 0, 0, 0); 00768 if( result ) 00769 { 00770 for( int i = 0; i < rows; i++ ) 00771 result *= a.at<double>(i,i); 00772 result = 1./result; 00773 } 00774 } 00775 } 00776 00777 #undef Mf 00778 #undef Md 00779 00780 return result; 00781 } 00782 00783 /****************************************************************************************\ 00784 * Inverse (or pseudo-inverse) of a matrix * 00785 \****************************************************************************************/ 00786 00787 #define Sf( y, x ) ((float*)(srcdata + y*srcstep))[x] 00788 #define Sd( y, x ) ((double*)(srcdata + y*srcstep))[x] 00789 #define Df( y, x ) ((float*)(dstdata + y*dststep))[x] 00790 #define Dd( y, x ) ((double*)(dstdata + y*dststep))[x] 00791 00792 double cv::invert( InputArray _src, OutputArray _dst, int method ) 00793 { 00794 bool result = false; 00795 Mat src = _src.getMat(); 00796 int type = src.type(); 00797 00798 CV_Assert(type == CV_32F || type == CV_64F); 00799 00800 size_t esz = CV_ELEM_SIZE(type); 00801 int m = src.rows, n = src.cols; 00802 00803 if( method == DECOMP_SVD ) 00804 { 00805 int nm = std::min(m, n); 00806 00807 AutoBuffer<uchar> _buf((m*nm + nm + nm*n)*esz + sizeof(double)); 00808 uchar* buf = alignPtr((uchar*)_buf, (int)esz); 00809 Mat u(m, nm, type, buf); 00810 Mat w(nm, 1, type, u.ptr() + m*nm*esz); 00811 Mat vt(nm, n, type, w.ptr() + nm*esz); 00812 00813 SVD::compute(src, w, u, vt); 00814 SVD::backSubst(w, u, vt, Mat(), _dst); 00815 return type == CV_32F ? 00816 (w.ptr<float>()[0] >= FLT_EPSILON ? 00817 w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) : 00818 (w.ptr<double>()[0] >= DBL_EPSILON ? 00819 w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0); 00820 } 00821 00822 CV_Assert( m == n ); 00823 00824 if( method == DECOMP_EIG ) 00825 { 00826 AutoBuffer<uchar> _buf((n*n*2 + n)*esz + sizeof(double)); 00827 uchar* buf = alignPtr((uchar*)_buf, (int)esz); 00828 Mat u(n, n, type, buf); 00829 Mat w(n, 1, type, u.ptr() + n*n*esz); 00830 Mat vt(n, n, type, w.ptr() + n*esz); 00831 00832 eigen(src, w, vt); 00833 transpose(vt, u); 00834 SVD::backSubst(w, u, vt, Mat(), _dst); 00835 return type == CV_32F ? 00836 (w.ptr<float>()[0] >= FLT_EPSILON ? 00837 w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) : 00838 (w.ptr<double>()[0] >= DBL_EPSILON ? 00839 w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0); 00840 } 00841 00842 CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY ); 00843 00844 _dst.create( n, n, type ); 00845 Mat dst = _dst.getMat(); 00846 00847 if( n <= 3 ) 00848 { 00849 const uchar* srcdata = src.ptr(); 00850 uchar* dstdata = dst.ptr(); 00851 size_t srcstep = src.step; 00852 size_t dststep = dst.step; 00853 00854 if( n == 2 ) 00855 { 00856 if( type == CV_32FC1 ) 00857 { 00858 double d = det2(Sf); 00859 if( d != 0. ) 00860 { 00861 result = true; 00862 d = 1./d; 00863 00864 #if CV_SSE2 00865 if(USE_SSE2) 00866 { 00867 __m128 zero = _mm_setzero_ps(); 00868 __m128 t0 = _mm_loadl_pi(zero, (const __m64*)srcdata); //t0 = sf(0,0) sf(0,1) 00869 __m128 t1 = _mm_loadh_pi(zero, (const __m64*)(srcdata+srcstep)); //t1 = sf(1,0) sf(1,1) 00870 __m128 s0 = _mm_or_ps(t0, t1); 00871 __m128 det =_mm_set1_ps((float)d); 00872 s0 = _mm_mul_ps(s0, det); 00873 static const uchar CV_DECL_ALIGNED(16) inv[16] = {0,0,0,0,0,0,0,0x80,0,0,0,0x80,0,0,0,0}; 00874 __m128 pattern = _mm_load_ps((const float*)inv); 00875 s0 = _mm_xor_ps(s0, pattern);//==-1*s0 00876 s0 = _mm_shuffle_ps(s0, s0, _MM_SHUFFLE(0,2,1,3)); 00877 _mm_storel_pi((__m64*)dstdata, s0); 00878 _mm_storeh_pi((__m64*)((float*)(dstdata+dststep)), s0); 00879 } 00880 else 00881 #endif 00882 { 00883 double t0, t1; 00884 t0 = Sf(0,0)*d; 00885 t1 = Sf(1,1)*d; 00886 Df(1,1) = (float)t0; 00887 Df(0,0) = (float)t1; 00888 t0 = -Sf(0,1)*d; 00889 t1 = -Sf(1,0)*d; 00890 Df(0,1) = (float)t0; 00891 Df(1,0) = (float)t1; 00892 } 00893 00894 } 00895 } 00896 else 00897 { 00898 double d = det2(Sd); 00899 if( d != 0. ) 00900 { 00901 result = true; 00902 d = 1./d; 00903 #if CV_SSE2 00904 if(USE_SSE2) 00905 { 00906 __m128d s0 = _mm_loadu_pd((const double*)srcdata); //s0 = sf(0,0) sf(0,1) 00907 __m128d s1 = _mm_loadu_pd ((const double*)(srcdata+srcstep));//s1 = sf(1,0) sf(1,1) 00908 __m128d sm = _mm_unpacklo_pd(s0, _mm_load_sd((const double*)(srcdata+srcstep)+1)); //sm = sf(0,0) sf(1,1) - main diagonal 00909 __m128d ss = _mm_shuffle_pd(s0, s1, _MM_SHUFFLE2(0,1)); //ss = sf(0,1) sf(1,0) - secondary diagonal 00910 __m128d det = _mm_load1_pd((const double*)&d); 00911 sm = _mm_mul_pd(sm, det); 00912 00913 static const uchar CV_DECL_ALIGNED(16) inv[8] = {0,0,0,0,0,0,0,0x80}; 00914 __m128d pattern = _mm_load1_pd((double*)inv); 00915 ss = _mm_mul_pd(ss, det); 00916 ss = _mm_xor_pd(ss, pattern);//==-1*ss 00917 00918 s0 = _mm_shuffle_pd(sm, ss, _MM_SHUFFLE2(0,1)); 00919 s1 = _mm_shuffle_pd(ss, sm, _MM_SHUFFLE2(0,1)); 00920 _mm_storeu_pd((double*)dstdata, s0); 00921 _mm_storeu_pd((double*)(dstdata+dststep), s1); 00922 } 00923 else 00924 #endif 00925 { 00926 double t0, t1; 00927 t0 = Sd(0,0)*d; 00928 t1 = Sd(1,1)*d; 00929 Dd(1,1) = t0; 00930 Dd(0,0) = t1; 00931 t0 = -Sd(0,1)*d; 00932 t1 = -Sd(1,0)*d; 00933 Dd(0,1) = t0; 00934 Dd(1,0) = t1; 00935 } 00936 } 00937 } 00938 } 00939 else if( n == 3 ) 00940 { 00941 if( type == CV_32FC1 ) 00942 { 00943 double d = det3(Sf); 00944 00945 if( d != 0. ) 00946 { 00947 double t[12]; 00948 00949 result = true; 00950 d = 1./d; 00951 t[0] = (((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d); 00952 t[1] = (((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d); 00953 t[2] = (((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d); 00954 00955 t[3] = (((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d); 00956 t[4] = (((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d); 00957 t[5] = (((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d); 00958 00959 t[6] = (((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d); 00960 t[7] = (((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d); 00961 t[8] = (((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d); 00962 00963 Df(0,0) = (float)t[0]; Df(0,1) = (float)t[1]; Df(0,2) = (float)t[2]; 00964 Df(1,0) = (float)t[3]; Df(1,1) = (float)t[4]; Df(1,2) = (float)t[5]; 00965 Df(2,0) = (float)t[6]; Df(2,1) = (float)t[7]; Df(2,2) = (float)t[8]; 00966 } 00967 } 00968 else 00969 { 00970 double d = det3(Sd); 00971 if( d != 0. ) 00972 { 00973 result = true; 00974 d = 1./d; 00975 double t[9]; 00976 00977 t[0] = (Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1)) * d; 00978 t[1] = (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2)) * d; 00979 t[2] = (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1)) * d; 00980 00981 t[3] = (Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2)) * d; 00982 t[4] = (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0)) * d; 00983 t[5] = (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2)) * d; 00984 00985 t[6] = (Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0)) * d; 00986 t[7] = (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1)) * d; 00987 t[8] = (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0)) * d; 00988 00989 Dd(0,0) = t[0]; Dd(0,1) = t[1]; Dd(0,2) = t[2]; 00990 Dd(1,0) = t[3]; Dd(1,1) = t[4]; Dd(1,2) = t[5]; 00991 Dd(2,0) = t[6]; Dd(2,1) = t[7]; Dd(2,2) = t[8]; 00992 } 00993 } 00994 } 00995 else 00996 { 00997 assert( n == 1 ); 00998 00999 if( type == CV_32FC1 ) 01000 { 01001 double d = Sf(0,0); 01002 if( d != 0. ) 01003 { 01004 result = true; 01005 Df(0,0) = (float)(1./d); 01006 } 01007 } 01008 else 01009 { 01010 double d = Sd(0,0); 01011 if( d != 0. ) 01012 { 01013 result = true; 01014 Dd(0,0) = 1./d; 01015 } 01016 } 01017 } 01018 if( !result ) 01019 dst = Scalar (0); 01020 return result; 01021 } 01022 01023 int elem_size = CV_ELEM_SIZE(type); 01024 AutoBuffer<uchar> buf(n*n*elem_size); 01025 Mat src1(n, n, type, (uchar*)buf); 01026 src.copyTo(src1); 01027 setIdentity(dst); 01028 01029 if( method == DECOMP_LU && type == CV_32F ) 01030 result = hal::LU32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0; 01031 else if( method == DECOMP_LU && type == CV_64F ) 01032 result = hal::LU64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0; 01033 else if( method == DECOMP_CHOLESKY && type == CV_32F ) 01034 result = hal::Cholesky32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n); 01035 else 01036 result = hal::Cholesky64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n); 01037 01038 if( !result ) 01039 dst = Scalar (0); 01040 01041 return result; 01042 } 01043 01044 01045 01046 /****************************************************************************************\ 01047 * Solving a linear system * 01048 \****************************************************************************************/ 01049 01050 bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method ) 01051 { 01052 bool result = true; 01053 Mat src = _src.getMat(), _src2 = _src2arg.getMat(); 01054 int type = src.type(); 01055 bool is_normal = (method & DECOMP_NORMAL) != 0; 01056 01057 CV_Assert( type == _src2.type() && (type == CV_32F || type == CV_64F) ); 01058 01059 method &= ~DECOMP_NORMAL; 01060 CV_Assert( (method != DECOMP_LU && method != DECOMP_CHOLESKY) || 01061 is_normal || src.rows == src.cols ); 01062 01063 // check case of a single equation and small matrix 01064 if( (method == DECOMP_LU || method == DECOMP_CHOLESKY) && !is_normal && 01065 src.rows <= 3 && src.rows == src.cols && _src2.cols == 1 ) 01066 { 01067 _dst.create( src.cols, _src2.cols, src.type() ); 01068 Mat dst = _dst.getMat(); 01069 01070 #define bf(y) ((float*)(bdata + y*src2step))[0] 01071 #define bd(y) ((double*)(bdata + y*src2step))[0] 01072 01073 const uchar* srcdata = src.ptr(); 01074 const uchar* bdata = _src2.ptr(); 01075 uchar* dstdata = dst.ptr(); 01076 size_t srcstep = src.step; 01077 size_t src2step = _src2.step; 01078 size_t dststep = dst.step; 01079 01080 if( src.rows == 2 ) 01081 { 01082 if( type == CV_32FC1 ) 01083 { 01084 double d = det2(Sf); 01085 if( d != 0. ) 01086 { 01087 double t; 01088 d = 1./d; 01089 t = (float)(((double)bf(0)*Sf(1,1) - (double)bf(1)*Sf(0,1))*d); 01090 Df(1,0) = (float)(((double)bf(1)*Sf(0,0) - (double)bf(0)*Sf(1,0))*d); 01091 Df(0,0) = (float)t; 01092 } 01093 else 01094 result = false; 01095 } 01096 else 01097 { 01098 double d = det2(Sd); 01099 if( d != 0. ) 01100 { 01101 double t; 01102 d = 1./d; 01103 t = (bd(0)*Sd(1,1) - bd(1)*Sd(0,1))*d; 01104 Dd(1,0) = (bd(1)*Sd(0,0) - bd(0)*Sd(1,0))*d; 01105 Dd(0,0) = t; 01106 } 01107 else 01108 result = false; 01109 } 01110 } 01111 else if( src.rows == 3 ) 01112 { 01113 if( type == CV_32FC1 ) 01114 { 01115 double d = det3(Sf); 01116 if( d != 0. ) 01117 { 01118 float t[3]; 01119 d = 1./d; 01120 01121 t[0] = (float)(d* 01122 (bf(0)*((double)Sf(1,1)*Sf(2,2) - (double)Sf(1,2)*Sf(2,1)) - 01123 Sf(0,1)*((double)bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) + 01124 Sf(0,2)*((double)bf(1)*Sf(2,1) - (double)Sf(1,1)*bf(2)))); 01125 01126 t[1] = (float)(d* 01127 (Sf(0,0)*(double)(bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) - 01128 bf(0)*((double)Sf(1,0)*Sf(2,2) - (double)Sf(1,2)*Sf(2,0)) + 01129 Sf(0,2)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)))); 01130 01131 t[2] = (float)(d* 01132 (Sf(0,0)*((double)Sf(1,1)*bf(2) - (double)bf(1)*Sf(2,1)) - 01133 Sf(0,1)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)) + 01134 bf(0)*((double)Sf(1,0)*Sf(2,1) - (double)Sf(1,1)*Sf(2,0)))); 01135 01136 Df(0,0) = t[0]; 01137 Df(1,0) = t[1]; 01138 Df(2,0) = t[2]; 01139 } 01140 else 01141 result = false; 01142 } 01143 else 01144 { 01145 double d = det3(Sd); 01146 if( d != 0. ) 01147 { 01148 double t[9]; 01149 01150 d = 1./d; 01151 01152 t[0] = ((Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1))*bd(0) + 01153 (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2))*bd(1) + 01154 (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1))*bd(2))*d; 01155 01156 t[1] = ((Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2))*bd(0) + 01157 (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0))*bd(1) + 01158 (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2))*bd(2))*d; 01159 01160 t[2] = ((Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0))*bd(0) + 01161 (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1))*bd(1) + 01162 (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0))*bd(2))*d; 01163 01164 Dd(0,0) = t[0]; 01165 Dd(1,0) = t[1]; 01166 Dd(2,0) = t[2]; 01167 } 01168 else 01169 result = false; 01170 } 01171 } 01172 else 01173 { 01174 assert( src.rows == 1 ); 01175 01176 if( type == CV_32FC1 ) 01177 { 01178 double d = Sf(0,0); 01179 if( d != 0. ) 01180 Df(0,0) = (float)(bf(0)/d); 01181 else 01182 result = false; 01183 } 01184 else 01185 { 01186 double d = Sd(0,0); 01187 if( d != 0. ) 01188 Dd(0,0) = (bd(0)/d); 01189 else 01190 result = false; 01191 } 01192 } 01193 return result; 01194 } 01195 01196 if( method == DECOMP_QR ) 01197 method = DECOMP_SVD; 01198 01199 int m = src.rows, m_ = m, n = src.cols, nb = _src2.cols; 01200 size_t esz = CV_ELEM_SIZE(type), bufsize = 0; 01201 size_t vstep = alignSize(n*esz, 16); 01202 size_t astep = method == DECOMP_SVD && !is_normal ? alignSize(m*esz, 16) : vstep; 01203 AutoBuffer<uchar> buffer; 01204 01205 Mat src2 = _src2; 01206 _dst.create( src.cols, src2.cols, src.type() ); 01207 Mat dst = _dst.getMat(); 01208 01209 if( m < n ) 01210 CV_Error(CV_StsBadArg, "The function can not solve under-determined linear systems" ); 01211 01212 if( m == n ) 01213 is_normal = false; 01214 else if( is_normal ) 01215 { 01216 m_ = n; 01217 if( method == DECOMP_SVD ) 01218 method = DECOMP_EIG; 01219 } 01220 01221 size_t asize = astep*(method == DECOMP_SVD || is_normal ? n : m); 01222 bufsize += asize + 32; 01223 01224 if( is_normal ) 01225 bufsize += n*nb*esz; 01226 01227 if( method == DECOMP_SVD || method == DECOMP_EIG ) 01228 bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32; 01229 01230 buffer.allocate(bufsize); 01231 uchar* ptr = alignPtr((uchar*)buffer, 16); 01232 01233 Mat a(m_, n, type, ptr, astep); 01234 01235 if( is_normal ) 01236 mulTransposed(src, a, true); 01237 else if( method != DECOMP_SVD ) 01238 src.copyTo(a); 01239 else 01240 { 01241 a = Mat(n, m_, type, ptr, astep); 01242 transpose(src, a); 01243 } 01244 ptr += asize; 01245 01246 if( !is_normal ) 01247 { 01248 if( method == DECOMP_LU || method == DECOMP_CHOLESKY ) 01249 src2.copyTo(dst); 01250 } 01251 else 01252 { 01253 // a'*b 01254 if( method == DECOMP_LU || method == DECOMP_CHOLESKY ) 01255 gemm( src, src2, 1, Mat(), 0, dst, GEMM_1_T ); 01256 else 01257 { 01258 Mat tmp(n, nb, type, ptr); 01259 ptr += n*nb*esz; 01260 gemm( src, src2, 1, Mat(), 0, tmp, GEMM_1_T ); 01261 src2 = tmp; 01262 } 01263 } 01264 01265 if( method == DECOMP_LU ) 01266 { 01267 if( type == CV_32F ) 01268 result = hal::LU32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0; 01269 else 01270 result = hal::LU64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0; 01271 } 01272 else if( method == DECOMP_CHOLESKY ) 01273 { 01274 if( type == CV_32F ) 01275 result = hal::Cholesky32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb); 01276 else 01277 result = hal::Cholesky64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb); 01278 } 01279 else 01280 { 01281 ptr = alignPtr(ptr, 16); 01282 Mat v(n, n, type, ptr, vstep), w(n, 1, type, ptr + vstep*n), u; 01283 ptr += n*(vstep + esz); 01284 01285 if( method == DECOMP_EIG ) 01286 { 01287 if( type == CV_32F ) 01288 Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr); 01289 else 01290 Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr); 01291 u = v; 01292 } 01293 else 01294 { 01295 if( type == CV_32F ) 01296 JacobiSVD(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, m_, n); 01297 else 01298 JacobiSVD(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, m_, n); 01299 u = a; 01300 } 01301 01302 if( type == CV_32F ) 01303 { 01304 SVBkSb(m_, n, w.ptr<float>(), 0, u.ptr<float>(), u.step, true, 01305 v.ptr<float>(), v.step, true, src2.ptr<float>(), 01306 src2.step, nb, dst.ptr<float>(), dst.step, ptr); 01307 } 01308 else 01309 { 01310 SVBkSb(m_, n, w.ptr<double>(), 0, u.ptr<double>(), u.step, true, 01311 v.ptr<double>(), v.step, true, src2.ptr<double>(), 01312 src2.step, nb, dst.ptr<double>(), dst.step, ptr); 01313 } 01314 result = true; 01315 } 01316 01317 if( !result ) 01318 dst = Scalar (0); 01319 01320 return result; 01321 } 01322 01323 01324 /////////////////// finding eigenvalues and eigenvectors of a symmetric matrix /////////////// 01325 01326 bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects ) 01327 { 01328 Mat src = _src.getMat(); 01329 int type = src.type(); 01330 int n = src.rows; 01331 01332 CV_Assert( src.rows == src.cols ); 01333 CV_Assert (type == CV_32F || type == CV_64F); 01334 01335 Mat v; 01336 if( _evects.needed() ) 01337 { 01338 _evects.create(n, n, type); 01339 v = _evects.getMat(); 01340 } 01341 01342 size_t elemSize = src.elemSize(), astep = alignSize(n*elemSize, 16); 01343 AutoBuffer<uchar> buf(n*astep + n*5*elemSize + 32); 01344 uchar* ptr = alignPtr((uchar*)buf, 16); 01345 Mat a(n, n, type, ptr, astep), w(n, 1, type, ptr + astep*n); 01346 ptr += astep*n + elemSize*n; 01347 src.copyTo(a); 01348 bool ok = type == CV_32F ? 01349 Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr) : 01350 Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr); 01351 01352 w.copyTo(_evals); 01353 return ok; 01354 } 01355 01356 namespace cv 01357 { 01358 01359 static void _SVDcompute( InputArray _aarr, OutputArray _w, 01360 OutputArray _u, OutputArray _vt, int flags ) 01361 { 01362 Mat src = _aarr.getMat(); 01363 int m = src.rows, n = src.cols; 01364 int type = src.type(); 01365 bool compute_uv = _u.needed() || _vt.needed(); 01366 bool full_uv = (flags & SVD::FULL_UV) != 0; 01367 01368 CV_Assert( type == CV_32F || type == CV_64F ); 01369 01370 if( flags & SVD::NO_UV ) 01371 { 01372 _u.release(); 01373 _vt.release(); 01374 compute_uv = full_uv = false; 01375 } 01376 01377 bool at = false; 01378 if( m < n ) 01379 { 01380 std::swap(m, n); 01381 at = true; 01382 } 01383 01384 int urows = full_uv ? m : n; 01385 size_t esz = src.elemSize(), astep = alignSize(m*esz, 16), vstep = alignSize(n*esz, 16); 01386 AutoBuffer<uchar> _buf(urows*astep + n*vstep + n*esz + 32); 01387 uchar* buf = alignPtr((uchar*)_buf, 16); 01388 Mat temp_a(n, m, type, buf, astep); 01389 Mat temp_w(n, 1, type, buf + urows*astep); 01390 Mat temp_u(urows, m, type, buf, astep), temp_v; 01391 01392 if( compute_uv ) 01393 temp_v = Mat(n, n, type, alignPtr(buf + urows*astep + n*esz, 16), vstep); 01394 01395 if( urows > n ) 01396 temp_u = Scalar::all(0); 01397 01398 if( !at ) 01399 transpose(src, temp_a); 01400 else 01401 src.copyTo(temp_a); 01402 01403 if( type == CV_32F ) 01404 { 01405 JacobiSVD(temp_a.ptr<float>(), temp_u.step, temp_w.ptr<float>(), 01406 temp_v.ptr<float>(), temp_v.step, m, n, compute_uv ? urows : 0); 01407 } 01408 else 01409 { 01410 JacobiSVD(temp_a.ptr<double>(), temp_u.step, temp_w.ptr<double>(), 01411 temp_v.ptr<double>(), temp_v.step, m, n, compute_uv ? urows : 0); 01412 } 01413 temp_w.copyTo(_w); 01414 if( compute_uv ) 01415 { 01416 if( !at ) 01417 { 01418 if( _u.needed() ) 01419 transpose(temp_u, _u); 01420 if( _vt.needed() ) 01421 temp_v.copyTo(_vt); 01422 } 01423 else 01424 { 01425 if( _u.needed() ) 01426 transpose(temp_v, _u); 01427 if( _vt.needed() ) 01428 temp_u.copyTo(_vt); 01429 } 01430 } 01431 } 01432 01433 01434 void SVD::compute( InputArray a, OutputArray w, OutputArray u, OutputArray vt, int flags ) 01435 { 01436 _SVDcompute(a, w, u, vt, flags); 01437 } 01438 01439 void SVD::compute( InputArray a, OutputArray w, int flags ) 01440 { 01441 _SVDcompute(a, w, noArray(), noArray(), flags); 01442 } 01443 01444 void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt, 01445 InputArray _rhs, OutputArray _dst ) 01446 { 01447 Mat w = _w.getMat(), u = _u.getMat(), vt = _vt.getMat(), rhs = _rhs.getMat(); 01448 int type = w.type(), esz = (int)w.elemSize(); 01449 int m = u.rows, n = vt.cols, nb = rhs.data ? rhs.cols : m, nm = std::min(m, n); 01450 size_t wstep = w.rows == 1 ? (size_t)esz : w.cols == 1 ? (size_t)w.step : (size_t)w.step + esz; 01451 AutoBuffer<uchar> buffer(nb*sizeof(double) + 16); 01452 CV_Assert( w.type() == u.type() && u.type() == vt.type() && u.data && vt.data && w.data ); 01453 CV_Assert( u.cols >= nm && vt.rows >= nm && 01454 (w.size() == Size(nm, 1) || w.size() == Size(1, nm) || w.size() == Size(vt.rows, u.cols)) ); 01455 CV_Assert( rhs.data == 0 || (rhs.type() == type && rhs.rows == m) ); 01456 01457 _dst.create( n, nb, type ); 01458 Mat dst = _dst.getMat(); 01459 if( type == CV_32F ) 01460 SVBkSb(m, n, w.ptr<float>(), wstep, u.ptr<float>(), u.step, false, 01461 vt.ptr<float>(), vt.step, true, rhs.ptr<float>(), rhs.step, nb, 01462 dst.ptr<float>(), dst.step, buffer); 01463 else if( type == CV_64F ) 01464 SVBkSb(m, n, w.ptr<double>(), wstep, u.ptr<double>(), u.step, false, 01465 vt.ptr<double>(), vt.step, true, rhs.ptr<double>(), rhs.step, nb, 01466 dst.ptr<double>(), dst.step, buffer); 01467 else 01468 CV_Error( CV_StsUnsupportedFormat, "" ); 01469 } 01470 01471 01472 SVD& SVD::operator ()(InputArray a, int flags) 01473 { 01474 _SVDcompute(a, w, u, vt, flags); 01475 return *this; 01476 } 01477 01478 01479 void SVD::backSubst( InputArray rhs, OutputArray dst ) const 01480 { 01481 backSubst( w, u, vt, rhs, dst ); 01482 } 01483 01484 } 01485 01486 01487 void cv::SVDecomp(InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags) 01488 { 01489 SVD::compute(src, w, u, vt, flags); 01490 } 01491 01492 void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, OutputArray dst) 01493 { 01494 SVD::backSubst(w, u, vt, rhs, dst); 01495 } 01496 01497 01498 CV_IMPL double 01499 cvDet( const CvArr* arr ) 01500 { 01501 if( CV_IS_MAT(arr) && ((CvMat*)arr)->rows <= 3 ) 01502 { 01503 CvMat* mat = (CvMat*)arr; 01504 int type = CV_MAT_TYPE(mat->type); 01505 int rows = mat->rows; 01506 uchar* m = mat->data.ptr; 01507 int step = mat->step; 01508 CV_Assert( rows == mat->cols ); 01509 01510 #define Mf(y, x) ((float*)(m + y*step))[x] 01511 #define Md(y, x) ((double*)(m + y*step))[x] 01512 01513 if( type == CV_32F ) 01514 { 01515 if( rows == 2 ) 01516 return det2(Mf); 01517 if( rows == 3 ) 01518 return det3(Mf); 01519 } 01520 else if( type == CV_64F ) 01521 { 01522 if( rows == 2 ) 01523 return det2(Md); 01524 if( rows == 3 ) 01525 return det3(Md); 01526 } 01527 } 01528 return cv::determinant(cv::cvarrToMat(arr)); 01529 } 01530 01531 01532 CV_IMPL double 01533 cvInvert( const CvArr* srcarr, CvArr* dstarr, int method ) 01534 { 01535 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 01536 01537 CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows ); 01538 return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY : 01539 method == CV_SVD ? cv::DECOMP_SVD : 01540 method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU ); 01541 } 01542 01543 01544 CV_IMPL int 01545 cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method ) 01546 { 01547 cv::Mat A = cv::cvarrToMat(Aarr), b = cv::cvarrToMat(barr), x = cv::cvarrToMat(xarr); 01548 01549 CV_Assert( A.type() == x.type() && A.cols == x.rows && x.cols == b.cols ); 01550 bool is_normal = (method & CV_NORMAL) != 0; 01551 method &= ~CV_NORMAL; 01552 return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY : 01553 method == CV_SVD ? cv::DECOMP_SVD : 01554 method == CV_SVD_SYM ? cv::DECOMP_EIG : 01555 A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) ); 01556 } 01557 01558 01559 CV_IMPL void 01560 cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double, 01561 int, int ) 01562 { 01563 cv::Mat src = cv::cvarrToMat(srcarr), evals0 = cv::cvarrToMat(evalsarr), evals = evals0; 01564 if( evectsarr ) 01565 { 01566 cv::Mat evects0 = cv::cvarrToMat(evectsarr), evects = evects0; 01567 eigen(src, evals, evects); 01568 if( evects0.data != evects.data ) 01569 { 01570 const uchar* p = evects0.ptr(); 01571 evects.convertTo(evects0, evects0.type()); 01572 CV_Assert( p == evects0.ptr() ); 01573 } 01574 } 01575 else 01576 eigen(src, evals); 01577 if( evals0.data != evals.data ) 01578 { 01579 const uchar* p = evals0.ptr(); 01580 if( evals0.size() == evals.size() ) 01581 evals.convertTo(evals0, evals0.type()); 01582 else if( evals0.type() == evals.type() ) 01583 cv::transpose(evals, evals0); 01584 else 01585 cv::Mat(evals.t()).convertTo(evals0, evals0.type()); 01586 CV_Assert( p == evals0.ptr() ); 01587 } 01588 } 01589 01590 01591 CV_IMPL void 01592 cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags ) 01593 { 01594 cv::Mat a = cv::cvarrToMat(aarr), w = cv::cvarrToMat(warr), u, v; 01595 int m = a.rows, n = a.cols, type = a.type(), mn = std::max(m, n), nm = std::min(m, n); 01596 01597 CV_Assert( w.type() == type && 01598 (w.size() == cv::Size(nm,1) || w.size() == cv::Size(1, nm) || 01599 w.size() == cv::Size(nm, nm) || w.size() == cv::Size(n, m)) ); 01600 01601 cv::SVD svd; 01602 01603 if( w.size() == cv::Size(nm, 1) ) 01604 svd.w = cv::Mat(nm, 1, type, w.ptr() ); 01605 else if( w.isContinuous() ) 01606 svd.w = w; 01607 01608 if( uarr ) 01609 { 01610 u = cv::cvarrToMat(uarr); 01611 CV_Assert( u.type() == type ); 01612 svd.u = u; 01613 } 01614 01615 if( varr ) 01616 { 01617 v = cv::cvarrToMat(varr); 01618 CV_Assert( v.type() == type ); 01619 svd.vt = v; 01620 } 01621 01622 svd(a, ((flags & CV_SVD_MODIFY_A) ? cv::SVD::MODIFY_A : 0) | 01623 ((!svd.u.data && !svd.vt.data) ? cv::SVD::NO_UV : 0) | 01624 ((m != n && (svd.u.size() == cv::Size(mn, mn) || 01625 svd.vt.size() == cv::Size(mn, mn))) ? cv::SVD::FULL_UV : 0)); 01626 01627 if( !u.empty() ) 01628 { 01629 if( flags & CV_SVD_U_T ) 01630 cv::transpose( svd.u, u ); 01631 else if( u.data != svd.u.data ) 01632 { 01633 CV_Assert( u.size() == svd.u.size() ); 01634 svd.u.copyTo(u); 01635 } 01636 } 01637 01638 if( !v.empty() ) 01639 { 01640 if( !(flags & CV_SVD_V_T) ) 01641 cv::transpose( svd.vt, v ); 01642 else if( v.data != svd.vt.data ) 01643 { 01644 CV_Assert( v.size() == svd.vt.size() ); 01645 svd.vt.copyTo(v); 01646 } 01647 } 01648 01649 if( w.data != svd.w.data ) 01650 { 01651 if( w.size() == svd.w.size() ) 01652 svd.w.copyTo(w); 01653 else 01654 { 01655 w = cv::Scalar (0); 01656 cv::Mat wd = w.diag(); 01657 svd.w.copyTo(wd); 01658 } 01659 } 01660 } 01661 01662 01663 CV_IMPL void 01664 cvSVBkSb( const CvArr* warr, const CvArr* uarr, 01665 const CvArr* varr, const CvArr* rhsarr, 01666 CvArr* dstarr, int flags ) 01667 { 01668 cv::Mat w = cv::cvarrToMat(warr), u = cv::cvarrToMat(uarr), 01669 v = cv::cvarrToMat(varr), rhs, 01670 dst = cv::cvarrToMat(dstarr), dst0 = dst; 01671 if( flags & CV_SVD_U_T ) 01672 { 01673 cv::Mat tmp; 01674 transpose(u, tmp); 01675 u = tmp; 01676 } 01677 if( !(flags & CV_SVD_V_T) ) 01678 { 01679 cv::Mat tmp; 01680 transpose(v, tmp); 01681 v = tmp; 01682 } 01683 if( rhsarr ) 01684 rhs = cv::cvarrToMat(rhsarr); 01685 01686 cv::SVD::backSubst(w, u, v, rhs, dst); 01687 CV_Assert( dst.data == dst0.data ); 01688 } 01689
Generated on Tue Jul 12 2022 14:47:12 by
