Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers lapack.cpp Source File

lapack.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #include "precomp.hpp"
00044 #include <limits>
00045 
00046 #if defined _M_IX86 && defined _MSC_VER && _MSC_VER < 1700
00047 #pragma float_control(precise, on)
00048 #endif
00049 
00050 namespace cv
00051 {
00052 
00053 int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
00054 {
00055     return hal::LU32f(A, astep, m, b, bstep, n);
00056 }
00057 
00058 int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
00059 {
00060     return hal::LU64f(A, astep, m, b, bstep, n);
00061 }
00062 
00063 bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
00064 {
00065     return hal::Cholesky32f(A, astep, m, b, bstep, n);
00066 }
00067 
00068 bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
00069 {
00070     return hal::Cholesky64f(A, astep, m, b, bstep, n);
00071 }
00072 
00073 template<typename _Tp> static inline _Tp hypot(_Tp a, _Tp b)
00074 {
00075     a = std::abs(a);
00076     b = std::abs(b);
00077     if( a > b )
00078     {
00079         b /= a;
00080         return a*std::sqrt(1 + b*b);
00081     }
00082     if( b > 0 )
00083     {
00084         a /= b;
00085         return b*std::sqrt(1 + a*a);
00086     }
00087     return 0;
00088 }
00089 
00090 
00091 template<typename _Tp> bool
00092 JacobiImpl_( _Tp* A, size_t astep, _Tp* W, _Tp* V, size_t vstep, int n, uchar* buf )
00093 {
00094     const _Tp eps = std::numeric_limits<_Tp>::epsilon();
00095     int i, j, k, m;
00096 
00097     astep /= sizeof(A[0]);
00098     if( V )
00099     {
00100         vstep /= sizeof(V[0]);
00101         for( i = 0; i < n; i++ )
00102         {
00103             for( j = 0; j < n; j++ )
00104                 V[i*vstep + j] = (_Tp)0;
00105             V[i*vstep + i] = (_Tp)1;
00106         }
00107     }
00108 
00109     int iters, maxIters = n*n*30;
00110 
00111     int* indR = (int*)alignPtr(buf, sizeof(int));
00112     int* indC = indR + n;
00113     _Tp mv = (_Tp)0;
00114 
00115     for( k = 0; k < n; k++ )
00116     {
00117         W[k] = A[(astep + 1)*k];
00118         if( k < n - 1 )
00119         {
00120             for( m = k+1, mv = std::abs(A[astep*k + m]), i = k+2; i < n; i++ )
00121             {
00122                 _Tp val = std::abs(A[astep*k+i]);
00123                 if( mv < val )
00124                     mv = val, m = i;
00125             }
00126             indR[k] = m;
00127         }
00128         if( k > 0 )
00129         {
00130             for( m = 0, mv = std::abs(A[k]), i = 1; i < k; i++ )
00131             {
00132                 _Tp val = std::abs(A[astep*i+k]);
00133                 if( mv < val )
00134                     mv = val, m = i;
00135             }
00136             indC[k] = m;
00137         }
00138     }
00139 
00140     if( n > 1 ) for( iters = 0; iters < maxIters; iters++ )
00141     {
00142         // find index (k,l) of pivot p
00143         for( k = 0, mv = std::abs(A[indR[0]]), i = 1; i < n-1; i++ )
00144         {
00145             _Tp val = std::abs(A[astep*i + indR[i]]);
00146             if( mv < val )
00147                 mv = val, k = i;
00148         }
00149         int l = indR[k];
00150         for( i = 1; i < n; i++ )
00151         {
00152             _Tp val = std::abs(A[astep*indC[i] + i]);
00153             if( mv < val )
00154                 mv = val, k = indC[i], l = i;
00155         }
00156 
00157         _Tp p = A[astep*k + l];
00158         if( std::abs(p) <= eps )
00159             break;
00160         _Tp y = (_Tp)((W[l] - W[k])*0.5);
00161         _Tp t = std::abs(y) + hypot(p, y);
00162         _Tp s = hypot(p, t);
00163         _Tp c = t/s;
00164         s = p/s; t = (p/t)*p;
00165         if( y < 0 )
00166             s = -s, t = -t;
00167         A[astep*k + l] = 0;
00168 
00169         W[k] -= t;
00170         W[l] += t;
00171 
00172         _Tp a0, b0;
00173 
00174 #undef rotate
00175 #define rotate(v0, v1) a0 = v0, b0 = v1, v0 = a0*c - b0*s, v1 = a0*s + b0*c
00176 
00177         // rotate rows and columns k and l
00178         for( i = 0; i < k; i++ )
00179             rotate(A[astep*i+k], A[astep*i+l]);
00180         for( i = k+1; i < l; i++ )
00181             rotate(A[astep*k+i], A[astep*i+l]);
00182         for( i = l+1; i < n; i++ )
00183             rotate(A[astep*k+i], A[astep*l+i]);
00184 
00185         // rotate eigenvectors
00186         if( V )
00187             for( i = 0; i < n; i++ )
00188                 rotate(V[vstep*k+i], V[vstep*l+i]);
00189 
00190 #undef rotate
00191 
00192         for( j = 0; j < 2; j++ )
00193         {
00194             int idx = j == 0 ? k : l;
00195             if( idx < n - 1 )
00196             {
00197                 for( m = idx+1, mv = std::abs(A[astep*idx + m]), i = idx+2; i < n; i++ )
00198                 {
00199                     _Tp val = std::abs(A[astep*idx+i]);
00200                     if( mv < val )
00201                         mv = val, m = i;
00202                 }
00203                 indR[idx] = m;
00204             }
00205             if( idx > 0 )
00206             {
00207                 for( m = 0, mv = std::abs(A[idx]), i = 1; i < idx; i++ )
00208                 {
00209                     _Tp val = std::abs(A[astep*i+idx]);
00210                     if( mv < val )
00211                         mv = val, m = i;
00212                 }
00213                 indC[idx] = m;
00214             }
00215         }
00216     }
00217 
00218     // sort eigenvalues & eigenvectors
00219     for( k = 0; k < n-1; k++ )
00220     {
00221         m = k;
00222         for( i = k+1; i < n; i++ )
00223         {
00224             if( W[m] < W[i] )
00225                 m = i;
00226         }
00227         if( k != m )
00228         {
00229             std::swap(W[m], W[k]);
00230             if( V )
00231                 for( i = 0; i < n; i++ )
00232                     std::swap(V[vstep*m + i], V[vstep*k + i]);
00233         }
00234     }
00235 
00236     return true;
00237 }
00238 
00239 static bool Jacobi( float* S, size_t sstep, float* e, float* E, size_t estep, int n, uchar* buf )
00240 {
00241     return JacobiImpl_(S, sstep, e, E, estep, n, buf);
00242 }
00243 
00244 static bool Jacobi( double* S, size_t sstep, double* e, double* E, size_t estep, int n, uchar* buf )
00245 {
00246     return JacobiImpl_(S, sstep, e, E, estep, n, buf);
00247 }
00248 
00249 
00250 template<typename T> struct VBLAS
00251 {
00252     int dot(const T*, const T*, int, T*) const { return 0; }
00253     int givens(T*, T*, int, T, T) const { return 0; }
00254     int givensx(T*, T*, int, T, T, T*, T*) const { return 0; }
00255 };
00256 
00257 #if CV_SSE2
00258 template<> inline int VBLAS<float>::dot(const float* a, const float* b, int n, float* result) const
00259 {
00260     if( n < 8 )
00261         return 0;
00262     int k = 0;
00263     __m128 s0 = _mm_setzero_ps(), s1 = _mm_setzero_ps();
00264     for( ; k <= n - 8; k += 8 )
00265     {
00266         __m128 a0 = _mm_load_ps(a + k), a1 = _mm_load_ps(a + k + 4);
00267         __m128 b0 = _mm_load_ps(b + k), b1 = _mm_load_ps(b + k + 4);
00268 
00269         s0 = _mm_add_ps(s0, _mm_mul_ps(a0, b0));
00270         s1 = _mm_add_ps(s1, _mm_mul_ps(a1, b1));
00271     }
00272     s0 = _mm_add_ps(s0, s1);
00273     float sbuf[4];
00274     _mm_storeu_ps(sbuf, s0);
00275     *result = sbuf[0] + sbuf[1] + sbuf[2] + sbuf[3];
00276     return k;
00277 }
00278 
00279 
00280 template<> inline int VBLAS<float>::givens(float* a, float* b, int n, float c, float s) const
00281 {
00282     if( n < 4 )
00283         return 0;
00284     int k = 0;
00285     __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s);
00286     for( ; k <= n - 4; k += 4 )
00287     {
00288         __m128 a0 = _mm_load_ps(a + k);
00289         __m128 b0 = _mm_load_ps(b + k);
00290         __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4));
00291         __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4));
00292         _mm_store_ps(a + k, t0);
00293         _mm_store_ps(b + k, t1);
00294     }
00295     return k;
00296 }
00297 
00298 
00299 template<> inline int VBLAS<float>::givensx(float* a, float* b, int n, float c, float s,
00300                                              float* anorm, float* bnorm) const
00301 {
00302     if( n < 4 )
00303         return 0;
00304     int k = 0;
00305     __m128 c4 = _mm_set1_ps(c), s4 = _mm_set1_ps(s);
00306     __m128 sa = _mm_setzero_ps(), sb = _mm_setzero_ps();
00307     for( ; k <= n - 4; k += 4 )
00308     {
00309         __m128 a0 = _mm_load_ps(a + k);
00310         __m128 b0 = _mm_load_ps(b + k);
00311         __m128 t0 = _mm_add_ps(_mm_mul_ps(a0, c4), _mm_mul_ps(b0, s4));
00312         __m128 t1 = _mm_sub_ps(_mm_mul_ps(b0, c4), _mm_mul_ps(a0, s4));
00313         _mm_store_ps(a + k, t0);
00314         _mm_store_ps(b + k, t1);
00315         sa = _mm_add_ps(sa, _mm_mul_ps(t0, t0));
00316         sb = _mm_add_ps(sb, _mm_mul_ps(t1, t1));
00317     }
00318     float abuf[4], bbuf[4];
00319     _mm_storeu_ps(abuf, sa);
00320     _mm_storeu_ps(bbuf, sb);
00321     *anorm = abuf[0] + abuf[1] + abuf[2] + abuf[3];
00322     *bnorm = bbuf[0] + bbuf[1] + bbuf[2] + bbuf[3];
00323     return k;
00324 }
00325 
00326 
00327 template<> inline int VBLAS<double>::dot(const double* a, const double* b, int n, double* result) const
00328 {
00329     if( n < 4 )
00330         return 0;
00331     int k = 0;
00332     __m128d s0 = _mm_setzero_pd(), s1 = _mm_setzero_pd();
00333     for( ; k <= n - 4; k += 4 )
00334     {
00335         __m128d a0 = _mm_load_pd(a + k), a1 = _mm_load_pd(a + k + 2);
00336         __m128d b0 = _mm_load_pd(b + k), b1 = _mm_load_pd(b + k + 2);
00337 
00338         s0 = _mm_add_pd(s0, _mm_mul_pd(a0, b0));
00339         s1 = _mm_add_pd(s1, _mm_mul_pd(a1, b1));
00340     }
00341     s0 = _mm_add_pd(s0, s1);
00342     double sbuf[2];
00343     _mm_storeu_pd(sbuf, s0);
00344     *result = sbuf[0] + sbuf[1];
00345     return k;
00346 }
00347 
00348 
00349 template<> inline int VBLAS<double>::givens(double* a, double* b, int n, double c, double s) const
00350 {
00351     int k = 0;
00352     __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s);
00353     for( ; k <= n - 2; k += 2 )
00354     {
00355         __m128d a0 = _mm_load_pd(a + k);
00356         __m128d b0 = _mm_load_pd(b + k);
00357         __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2));
00358         __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2));
00359         _mm_store_pd(a + k, t0);
00360         _mm_store_pd(b + k, t1);
00361     }
00362     return k;
00363 }
00364 
00365 
00366 template<> inline int VBLAS<double>::givensx(double* a, double* b, int n, double c, double s,
00367                                               double* anorm, double* bnorm) const
00368 {
00369     int k = 0;
00370     __m128d c2 = _mm_set1_pd(c), s2 = _mm_set1_pd(s);
00371     __m128d sa = _mm_setzero_pd(), sb = _mm_setzero_pd();
00372     for( ; k <= n - 2; k += 2 )
00373     {
00374         __m128d a0 = _mm_load_pd(a + k);
00375         __m128d b0 = _mm_load_pd(b + k);
00376         __m128d t0 = _mm_add_pd(_mm_mul_pd(a0, c2), _mm_mul_pd(b0, s2));
00377         __m128d t1 = _mm_sub_pd(_mm_mul_pd(b0, c2), _mm_mul_pd(a0, s2));
00378         _mm_store_pd(a + k, t0);
00379         _mm_store_pd(b + k, t1);
00380         sa = _mm_add_pd(sa, _mm_mul_pd(t0, t0));
00381         sb = _mm_add_pd(sb, _mm_mul_pd(t1, t1));
00382     }
00383     double abuf[2], bbuf[2];
00384     _mm_storeu_pd(abuf, sa);
00385     _mm_storeu_pd(bbuf, sb);
00386     *anorm = abuf[0] + abuf[1];
00387     *bnorm = bbuf[0] + bbuf[1];
00388     return k;
00389 }
00390 #endif
00391 
00392 template<typename _Tp> void
00393 JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
00394                int m, int n, int n1, double minval, _Tp eps)
00395 {
00396     VBLAS<_Tp> vblas;
00397     AutoBuffer<double> Wbuf(n);
00398     double* W = Wbuf;
00399     int i, j, k, iter, max_iter = std::max(m, 30);
00400     _Tp c, s;
00401     double sd;
00402     astep /= sizeof(At[0]);
00403     vstep /= sizeof(Vt[0]);
00404 
00405     for( i = 0; i < n; i++ )
00406     {
00407         for( k = 0, sd = 0; k < m; k++ )
00408         {
00409             _Tp t = At[i*astep + k];
00410             sd += (double)t*t;
00411         }
00412         W[i] = sd;
00413 
00414         if( Vt )
00415         {
00416             for( k = 0; k < n; k++ )
00417                 Vt[i*vstep + k] = 0;
00418             Vt[i*vstep + i] = 1;
00419         }
00420     }
00421 
00422     for( iter = 0; iter < max_iter; iter++ )
00423     {
00424         bool changed = false;
00425 
00426         for( i = 0; i < n-1; i++ )
00427             for( j = i+1; j < n; j++ )
00428             {
00429                 _Tp *Ai = At + i*astep, *Aj = At + j*astep;
00430                 double a = W[i], p = 0, b = W[j];
00431 
00432                 for( k = 0; k < m; k++ )
00433                     p += (double)Ai[k]*Aj[k];
00434 
00435                 if( std::abs(p) <= eps*std::sqrt((double)a*b) )
00436                     continue;
00437 
00438                 p *= 2;
00439                 double beta = a - b, gamma = hypot((double)p, beta);
00440                 if( beta < 0 )
00441                 {
00442                     double delta = (gamma - beta)*0.5;
00443                     s = (_Tp)std::sqrt(delta/gamma);
00444                     c = (_Tp)(p/(gamma*s*2));
00445                 }
00446                 else
00447                 {
00448                     c = (_Tp)std::sqrt((gamma + beta)/(gamma*2));
00449                     s = (_Tp)(p/(gamma*c*2));
00450                 }
00451 
00452                 a = b = 0;
00453                 for( k = 0; k < m; k++ )
00454                 {
00455                     _Tp t0 = c*Ai[k] + s*Aj[k];
00456                     _Tp t1 = -s*Ai[k] + c*Aj[k];
00457                     Ai[k] = t0; Aj[k] = t1;
00458 
00459                     a += (double)t0*t0; b += (double)t1*t1;
00460                 }
00461                 W[i] = a; W[j] = b;
00462 
00463                 changed = true;
00464 
00465                 if( Vt )
00466                 {
00467                     _Tp *Vi = Vt + i*vstep, *Vj = Vt + j*vstep;
00468                     k = vblas.givens(Vi, Vj, n, c, s);
00469 
00470                     for( ; k < n; k++ )
00471                     {
00472                         _Tp t0 = c*Vi[k] + s*Vj[k];
00473                         _Tp t1 = -s*Vi[k] + c*Vj[k];
00474                         Vi[k] = t0; Vj[k] = t1;
00475                     }
00476                 }
00477             }
00478         if( !changed )
00479             break;
00480     }
00481 
00482     for( i = 0; i < n; i++ )
00483     {
00484         for( k = 0, sd = 0; k < m; k++ )
00485         {
00486             _Tp t = At[i*astep + k];
00487             sd += (double)t*t;
00488         }
00489         W[i] = std::sqrt(sd);
00490     }
00491 
00492     for( i = 0; i < n-1; i++ )
00493     {
00494         j = i;
00495         for( k = i+1; k < n; k++ )
00496         {
00497             if( W[j] < W[k] )
00498                 j = k;
00499         }
00500         if( i != j )
00501         {
00502             std::swap(W[i], W[j]);
00503             if( Vt )
00504             {
00505                 for( k = 0; k < m; k++ )
00506                     std::swap(At[i*astep + k], At[j*astep + k]);
00507 
00508                 for( k = 0; k < n; k++ )
00509                     std::swap(Vt[i*vstep + k], Vt[j*vstep + k]);
00510             }
00511         }
00512     }
00513 
00514     for( i = 0; i < n; i++ )
00515         _W[i] = (_Tp)W[i];
00516 
00517     if( !Vt )
00518         return;
00519 
00520     RNG rng(0x12345678);
00521     for( i = 0; i < n1; i++ )
00522     {
00523         sd = i < n ? W[i] : 0;
00524 
00525         for( int ii = 0; ii < 100 && sd <= minval; ii++ )
00526         {
00527             // if we got a zero singular value, then in order to get the corresponding left singular vector
00528             // we generate a random vector, project it to the previously computed left singular vectors,
00529             // subtract the projection and normalize the difference.
00530             const _Tp val0 = (_Tp)(1./m);
00531             for( k = 0; k < m; k++ )
00532             {
00533                 _Tp val = (rng.next() & 256) != 0 ? val0 : -val0;
00534                 At[i*astep + k] = val;
00535             }
00536             for( iter = 0; iter < 2; iter++ )
00537             {
00538                 for( j = 0; j < i; j++ )
00539                 {
00540                     sd = 0;
00541                     for( k = 0; k < m; k++ )
00542                         sd += At[i*astep + k]*At[j*astep + k];
00543                     _Tp asum = 0;
00544                     for( k = 0; k < m; k++ )
00545                     {
00546                         _Tp t = (_Tp)(At[i*astep + k] - sd*At[j*astep + k]);
00547                         At[i*astep + k] = t;
00548                         asum += std::abs(t);
00549                     }
00550                     asum = asum > eps*100 ? 1/asum : 0;
00551                     for( k = 0; k < m; k++ )
00552                         At[i*astep + k] *= asum;
00553                 }
00554             }
00555             sd = 0;
00556             for( k = 0; k < m; k++ )
00557             {
00558                 _Tp t = At[i*astep + k];
00559                 sd += (double)t*t;
00560             }
00561             sd = std::sqrt(sd);
00562         }
00563 
00564         s = (_Tp)(sd > minval ? 1/sd : 0.);
00565         for( k = 0; k < m; k++ )
00566             At[i*astep + k] *= s;
00567     }
00568 }
00569 
00570 
00571 static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
00572 {
00573     JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
00574 }
00575 
00576 static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
00577 {
00578     JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10);
00579 }
00580 
00581 /* y[0:m,0:n] += diag(a[0:1,0:m]) * x[0:m,0:n] */
00582 template<typename T1, typename T2, typename T3> static void
00583 MatrAXPY( int m, int n, const T1* x, int dx,
00584          const T2* a, int inca, T3* y, int dy )
00585 {
00586     int i;
00587     for( i = 0; i < m; i++, x += dx, y += dy )
00588     {
00589         T2 s = a[i*inca];
00590         int j = 0;
00591          #if CV_ENABLE_UNROLLED
00592         for(; j <= n - 4; j += 4 )
00593         {
00594             T3 t0 = (T3)(y[j]   + s*x[j]);
00595             T3 t1 = (T3)(y[j+1] + s*x[j+1]);
00596             y[j]   = t0;
00597             y[j+1] = t1;
00598             t0 = (T3)(y[j+2] + s*x[j+2]);
00599             t1 = (T3)(y[j+3] + s*x[j+3]);
00600             y[j+2] = t0;
00601             y[j+3] = t1;
00602         }
00603         #endif
00604         for( ; j < n; j++ )
00605             y[j] = (T3)(y[j] + s*x[j]);
00606     }
00607 }
00608 
00609 template<typename T> static void
00610 SVBkSbImpl_( int m, int n, const T* w, int incw,
00611        const T* u, int ldu, bool uT,
00612        const T* v, int ldv, bool vT,
00613        const T* b, int ldb, int nb,
00614        T* x, int ldx, double* buffer, T eps )
00615 {
00616     double threshold = 0;
00617     int udelta0 = uT ? ldu : 1, udelta1 = uT ? 1 : ldu;
00618     int vdelta0 = vT ? ldv : 1, vdelta1 = vT ? 1 : ldv;
00619     int i, j, nm = std::min(m, n);
00620 
00621     if( !b )
00622         nb = m;
00623 
00624     for( i = 0; i < n; i++ )
00625         for( j = 0; j < nb; j++ )
00626             x[i*ldx + j] = 0;
00627 
00628     for( i = 0; i < nm; i++ )
00629         threshold += w[i*incw];
00630     threshold *= eps;
00631 
00632     // v * inv(w) * uT * b
00633     for( i = 0; i < nm; i++, u += udelta0, v += vdelta0 )
00634     {
00635         double wi = w[i*incw];
00636         if( (double)std::abs(wi) <= threshold )
00637             continue;
00638         wi = 1/wi;
00639 
00640         if( nb == 1 )
00641         {
00642             double s = 0;
00643             if( b )
00644                 for( j = 0; j < m; j++ )
00645                     s += u[j*udelta1]*b[j*ldb];
00646             else
00647                 s = u[0];
00648             s *= wi;
00649 
00650             for( j = 0; j < n; j++ )
00651                 x[j*ldx] = (T)(x[j*ldx] + s*v[j*vdelta1]);
00652         }
00653         else
00654         {
00655             if( b )
00656             {
00657                 for( j = 0; j < nb; j++ )
00658                     buffer[j] = 0;
00659                 MatrAXPY( m, nb, b, ldb, u, udelta1, buffer, 0 );
00660                 for( j = 0; j < nb; j++ )
00661                     buffer[j] *= wi;
00662             }
00663             else
00664             {
00665                 for( j = 0; j < nb; j++ )
00666                     buffer[j] = u[j*udelta1]*wi;
00667             }
00668             MatrAXPY( n, nb, buffer, 0, v, vdelta1, x, ldx );
00669         }
00670     }
00671 }
00672 
00673 static void
00674 SVBkSb( int m, int n, const float* w, size_t wstep,
00675         const float* u, size_t ustep, bool uT,
00676         const float* v, size_t vstep, bool vT,
00677         const float* b, size_t bstep, int nb,
00678         float* x, size_t xstep, uchar* buffer )
00679 {
00680     SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
00681                 u, (int)(ustep/sizeof(u[0])), uT,
00682                 v, (int)(vstep/sizeof(v[0])), vT,
00683                 b, (int)(bstep/sizeof(b[0])), nb,
00684                 x, (int)(xstep/sizeof(x[0])),
00685                 (double*)alignPtr(buffer, sizeof(double)), (float)(DBL_EPSILON*2) );
00686 }
00687 
00688 static void
00689 SVBkSb( int m, int n, const double* w, size_t wstep,
00690        const double* u, size_t ustep, bool uT,
00691        const double* v, size_t vstep, bool vT,
00692        const double* b, size_t bstep, int nb,
00693        double* x, size_t xstep, uchar* buffer )
00694 {
00695     SVBkSbImpl_(m, n, w, wstep ? (int)(wstep/sizeof(w[0])) : 1,
00696                 u, (int)(ustep/sizeof(u[0])), uT,
00697                 v, (int)(vstep/sizeof(v[0])), vT,
00698                 b, (int)(bstep/sizeof(b[0])), nb,
00699                 x, (int)(xstep/sizeof(x[0])),
00700                 (double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 );
00701 }
00702 
00703 }
00704 
00705 /****************************************************************************************\
00706 *                                 Determinant of the matrix                              *
00707 \****************************************************************************************/
00708 
00709 #define det2(m)   ((double)m(0,0)*m(1,1) - (double)m(0,1)*m(1,0))
00710 #define det3(m)   (m(0,0)*((double)m(1,1)*m(2,2) - (double)m(1,2)*m(2,1)) -  \
00711                    m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) +  \
00712                    m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0)))
00713 
00714 double cv::determinant( InputArray _mat )
00715 {
00716     Mat mat = _mat.getMat();
00717     double result = 0;
00718     int type = mat.type(), rows = mat.rows;
00719     size_t step = mat.step;
00720     const uchar* m = mat.ptr();
00721 
00722     CV_Assert( !mat.empty() );
00723     CV_Assert( mat.rows == mat.cols && (type == CV_32F || type == CV_64F));
00724 
00725     #define Mf(y, x) ((float*)(m + y*step))[x]
00726     #define Md(y, x) ((double*)(m + y*step))[x]
00727 
00728     if( type == CV_32F )
00729     {
00730         if( rows == 2 )
00731             result = det2(Mf);
00732         else if( rows == 3 )
00733             result = det3(Mf);
00734         else if( rows == 1 )
00735             result = Mf(0,0);
00736         else
00737         {
00738             size_t bufSize = rows*rows*sizeof(float);
00739             AutoBuffer<uchar>  buffer(bufSize);
00740             Mat a(rows, rows, CV_32F, (uchar*)buffer);
00741             mat.copyTo(a);
00742 
00743             result = hal::LU32f(a.ptr<float>(), a.step, rows, 0, 0, 0);
00744             if( result )
00745             {
00746                 for( int i = 0; i < rows; i++ )
00747                     result *= a.at<float>(i,i);
00748                 result = 1./result;
00749             }
00750         }
00751     }
00752     else
00753     {
00754         if( rows == 2 )
00755             result = det2(Md);
00756         else if( rows == 3 )
00757             result = det3(Md);
00758         else if( rows == 1 )
00759             result = Md(0,0);
00760         else
00761         {
00762             size_t bufSize = rows*rows*sizeof(double);
00763             AutoBuffer<uchar>  buffer(bufSize);
00764             Mat a(rows, rows, CV_64F, (uchar*)buffer);
00765             mat.copyTo(a);
00766 
00767             result = hal::LU64f(a.ptr<double>(), a.step, rows, 0, 0, 0);
00768             if( result )
00769             {
00770                 for( int i = 0; i < rows; i++ )
00771                     result *= a.at<double>(i,i);
00772                 result = 1./result;
00773             }
00774         }
00775     }
00776 
00777     #undef Mf
00778     #undef Md
00779 
00780     return result;
00781 }
00782 
00783 /****************************************************************************************\
00784 *                          Inverse (or pseudo-inverse) of a matrix                       *
00785 \****************************************************************************************/
00786 
00787 #define Sf( y, x ) ((float*)(srcdata + y*srcstep))[x]
00788 #define Sd( y, x ) ((double*)(srcdata + y*srcstep))[x]
00789 #define Df( y, x ) ((float*)(dstdata + y*dststep))[x]
00790 #define Dd( y, x ) ((double*)(dstdata + y*dststep))[x]
00791 
00792 double cv::invert( InputArray _src, OutputArray _dst, int method )
00793 {
00794     bool result = false;
00795     Mat src = _src.getMat();
00796     int type = src.type();
00797 
00798     CV_Assert(type == CV_32F || type == CV_64F);
00799 
00800     size_t esz = CV_ELEM_SIZE(type);
00801     int m = src.rows, n = src.cols;
00802 
00803     if( method == DECOMP_SVD )
00804     {
00805         int nm = std::min(m, n);
00806 
00807         AutoBuffer<uchar>  _buf((m*nm + nm + nm*n)*esz + sizeof(double));
00808         uchar* buf = alignPtr((uchar*)_buf, (int)esz);
00809         Mat u(m, nm, type, buf);
00810         Mat w(nm, 1, type, u.ptr() + m*nm*esz);
00811         Mat vt(nm, n, type, w.ptr() + nm*esz);
00812 
00813         SVD::compute(src, w, u, vt);
00814         SVD::backSubst(w, u, vt, Mat(), _dst);
00815         return type == CV_32F ?
00816             (w.ptr<float>()[0] >= FLT_EPSILON ?
00817              w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
00818             (w.ptr<double>()[0] >= DBL_EPSILON ?
00819              w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
00820     }
00821 
00822     CV_Assert( m == n );
00823 
00824     if( method == DECOMP_EIG )
00825     {
00826         AutoBuffer<uchar>  _buf((n*n*2 + n)*esz + sizeof(double));
00827         uchar* buf = alignPtr((uchar*)_buf, (int)esz);
00828         Mat u(n, n, type, buf);
00829         Mat w(n, 1, type, u.ptr() + n*n*esz);
00830         Mat vt(n, n, type, w.ptr() + n*esz);
00831 
00832         eigen(src, w, vt);
00833         transpose(vt, u);
00834         SVD::backSubst(w, u, vt, Mat(), _dst);
00835         return type == CV_32F ?
00836         (w.ptr<float>()[0] >= FLT_EPSILON ?
00837          w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
00838         (w.ptr<double>()[0] >= DBL_EPSILON ?
00839          w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
00840     }
00841 
00842     CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
00843 
00844     _dst.create( n, n, type );
00845     Mat dst = _dst.getMat();
00846 
00847     if( n <= 3 )
00848     {
00849         const uchar* srcdata = src.ptr();
00850         uchar* dstdata = dst.ptr();
00851         size_t srcstep = src.step;
00852         size_t dststep = dst.step;
00853 
00854         if( n == 2 )
00855         {
00856             if( type == CV_32FC1 )
00857             {
00858                 double d = det2(Sf);
00859                 if( d != 0. )
00860                 {
00861                     result = true;
00862                     d = 1./d;
00863 
00864                     #if CV_SSE2
00865                         if(USE_SSE2)
00866                         {
00867                             __m128 zero = _mm_setzero_ps();
00868                             __m128 t0 = _mm_loadl_pi(zero, (const __m64*)srcdata); //t0 = sf(0,0) sf(0,1)
00869                             __m128 t1 = _mm_loadh_pi(zero, (const __m64*)(srcdata+srcstep)); //t1 = sf(1,0) sf(1,1)
00870                             __m128 s0 = _mm_or_ps(t0, t1);
00871                             __m128 det =_mm_set1_ps((float)d);
00872                             s0 =  _mm_mul_ps(s0, det);
00873                             static const uchar CV_DECL_ALIGNED(16) inv[16] = {0,0,0,0,0,0,0,0x80,0,0,0,0x80,0,0,0,0};
00874                             __m128 pattern = _mm_load_ps((const float*)inv);
00875                             s0 = _mm_xor_ps(s0, pattern);//==-1*s0
00876                             s0 = _mm_shuffle_ps(s0, s0, _MM_SHUFFLE(0,2,1,3));
00877                             _mm_storel_pi((__m64*)dstdata, s0);
00878                             _mm_storeh_pi((__m64*)((float*)(dstdata+dststep)), s0);
00879                         }
00880                         else
00881                     #endif
00882                         {
00883                         double t0, t1;
00884                         t0 = Sf(0,0)*d;
00885                         t1 = Sf(1,1)*d;
00886                         Df(1,1) = (float)t0;
00887                         Df(0,0) = (float)t1;
00888                         t0 = -Sf(0,1)*d;
00889                         t1 = -Sf(1,0)*d;
00890                         Df(0,1) = (float)t0;
00891                         Df(1,0) = (float)t1;
00892                         }
00893 
00894                 }
00895             }
00896             else
00897             {
00898                 double d = det2(Sd);
00899                 if( d != 0. )
00900                 {
00901                     result = true;
00902                     d = 1./d;
00903                     #if CV_SSE2
00904                         if(USE_SSE2)
00905                         {
00906                             __m128d s0 = _mm_loadu_pd((const double*)srcdata); //s0 = sf(0,0) sf(0,1)
00907                             __m128d s1 = _mm_loadu_pd ((const double*)(srcdata+srcstep));//s1 = sf(1,0) sf(1,1)
00908                             __m128d sm = _mm_unpacklo_pd(s0, _mm_load_sd((const double*)(srcdata+srcstep)+1)); //sm = sf(0,0) sf(1,1) - main diagonal
00909                             __m128d ss = _mm_shuffle_pd(s0, s1, _MM_SHUFFLE2(0,1)); //ss = sf(0,1) sf(1,0) - secondary diagonal
00910                             __m128d det = _mm_load1_pd((const double*)&d);
00911                             sm =  _mm_mul_pd(sm, det);
00912 
00913                             static const uchar CV_DECL_ALIGNED(16) inv[8] = {0,0,0,0,0,0,0,0x80};
00914                             __m128d pattern = _mm_load1_pd((double*)inv);
00915                             ss = _mm_mul_pd(ss, det);
00916                             ss = _mm_xor_pd(ss, pattern);//==-1*ss
00917 
00918                             s0 = _mm_shuffle_pd(sm, ss, _MM_SHUFFLE2(0,1));
00919                             s1 = _mm_shuffle_pd(ss, sm, _MM_SHUFFLE2(0,1));
00920                             _mm_storeu_pd((double*)dstdata, s0);
00921                             _mm_storeu_pd((double*)(dstdata+dststep), s1);
00922                         }
00923                         else
00924                     #endif
00925                         {
00926                             double t0, t1;
00927                             t0 = Sd(0,0)*d;
00928                             t1 = Sd(1,1)*d;
00929                             Dd(1,1) = t0;
00930                             Dd(0,0) = t1;
00931                             t0 = -Sd(0,1)*d;
00932                             t1 = -Sd(1,0)*d;
00933                             Dd(0,1) = t0;
00934                             Dd(1,0) = t1;
00935                         }
00936                 }
00937             }
00938         }
00939         else if( n == 3 )
00940         {
00941             if( type == CV_32FC1 )
00942             {
00943                 double d = det3(Sf);
00944 
00945                 if( d != 0. )
00946                 {
00947                     double t[12];
00948 
00949                     result = true;
00950                     d = 1./d;
00951                     t[0] = (((double)Sf(1,1) * Sf(2,2) - (double)Sf(1,2) * Sf(2,1)) * d);
00952                     t[1] = (((double)Sf(0,2) * Sf(2,1) - (double)Sf(0,1) * Sf(2,2)) * d);
00953                     t[2] = (((double)Sf(0,1) * Sf(1,2) - (double)Sf(0,2) * Sf(1,1)) * d);
00954 
00955                     t[3] = (((double)Sf(1,2) * Sf(2,0) - (double)Sf(1,0) * Sf(2,2)) * d);
00956                     t[4] = (((double)Sf(0,0) * Sf(2,2) - (double)Sf(0,2) * Sf(2,0)) * d);
00957                     t[5] = (((double)Sf(0,2) * Sf(1,0) - (double)Sf(0,0) * Sf(1,2)) * d);
00958 
00959                     t[6] = (((double)Sf(1,0) * Sf(2,1) - (double)Sf(1,1) * Sf(2,0)) * d);
00960                     t[7] = (((double)Sf(0,1) * Sf(2,0) - (double)Sf(0,0) * Sf(2,1)) * d);
00961                     t[8] = (((double)Sf(0,0) * Sf(1,1) - (double)Sf(0,1) * Sf(1,0)) * d);
00962 
00963                     Df(0,0) = (float)t[0]; Df(0,1) = (float)t[1]; Df(0,2) = (float)t[2];
00964                     Df(1,0) = (float)t[3]; Df(1,1) = (float)t[4]; Df(1,2) = (float)t[5];
00965                     Df(2,0) = (float)t[6]; Df(2,1) = (float)t[7]; Df(2,2) = (float)t[8];
00966                 }
00967             }
00968             else
00969             {
00970                 double d = det3(Sd);
00971                 if( d != 0. )
00972                 {
00973                     result = true;
00974                     d = 1./d;
00975                     double t[9];
00976 
00977                     t[0] = (Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1)) * d;
00978                     t[1] = (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2)) * d;
00979                     t[2] = (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1)) * d;
00980 
00981                     t[3] = (Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2)) * d;
00982                     t[4] = (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0)) * d;
00983                     t[5] = (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2)) * d;
00984 
00985                     t[6] = (Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0)) * d;
00986                     t[7] = (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1)) * d;
00987                     t[8] = (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0)) * d;
00988 
00989                     Dd(0,0) = t[0]; Dd(0,1) = t[1]; Dd(0,2) = t[2];
00990                     Dd(1,0) = t[3]; Dd(1,1) = t[4]; Dd(1,2) = t[5];
00991                     Dd(2,0) = t[6]; Dd(2,1) = t[7]; Dd(2,2) = t[8];
00992                 }
00993             }
00994         }
00995         else
00996         {
00997             assert( n == 1 );
00998 
00999             if( type == CV_32FC1 )
01000             {
01001                 double d = Sf(0,0);
01002                 if( d != 0. )
01003                 {
01004                     result = true;
01005                     Df(0,0) = (float)(1./d);
01006                 }
01007             }
01008             else
01009             {
01010                 double d = Sd(0,0);
01011                 if( d != 0. )
01012                 {
01013                     result = true;
01014                     Dd(0,0) = 1./d;
01015                 }
01016             }
01017         }
01018         if( !result )
01019             dst = Scalar (0);
01020         return result;
01021     }
01022 
01023    int elem_size = CV_ELEM_SIZE(type);
01024     AutoBuffer<uchar>  buf(n*n*elem_size);
01025     Mat src1(n, n, type, (uchar*)buf);
01026     src.copyTo(src1);
01027     setIdentity(dst);
01028 
01029     if( method == DECOMP_LU && type == CV_32F )
01030         result = hal::LU32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n) != 0;
01031     else if( method == DECOMP_LU && type == CV_64F )
01032         result = hal::LU64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n) != 0;
01033     else if( method == DECOMP_CHOLESKY && type == CV_32F )
01034         result = hal::Cholesky32f(src1.ptr<float>(), src1.step, n, dst.ptr<float>(), dst.step, n);
01035     else
01036         result = hal::Cholesky64f(src1.ptr<double>(), src1.step, n, dst.ptr<double>(), dst.step, n);
01037 
01038     if( !result )
01039         dst = Scalar (0);
01040 
01041     return result;
01042 }
01043 
01044 
01045 
01046 /****************************************************************************************\
01047 *                              Solving a linear system                                   *
01048 \****************************************************************************************/
01049 
01050 bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
01051 {
01052     bool result = true;
01053     Mat src = _src.getMat(), _src2 = _src2arg.getMat();
01054     int type = src.type();
01055     bool is_normal = (method & DECOMP_NORMAL) != 0;
01056 
01057     CV_Assert( type == _src2.type() && (type == CV_32F || type == CV_64F) );
01058 
01059     method &= ~DECOMP_NORMAL;
01060     CV_Assert( (method != DECOMP_LU && method != DECOMP_CHOLESKY) ||
01061         is_normal || src.rows == src.cols );
01062 
01063     // check case of a single equation and small matrix
01064     if( (method == DECOMP_LU || method == DECOMP_CHOLESKY) && !is_normal &&
01065         src.rows <= 3 && src.rows == src.cols && _src2.cols == 1 )
01066     {
01067         _dst.create( src.cols, _src2.cols, src.type() );
01068         Mat dst = _dst.getMat();
01069 
01070         #define bf(y) ((float*)(bdata + y*src2step))[0]
01071         #define bd(y) ((double*)(bdata + y*src2step))[0]
01072 
01073         const uchar* srcdata = src.ptr();
01074         const uchar* bdata = _src2.ptr();
01075         uchar* dstdata = dst.ptr();
01076         size_t srcstep = src.step;
01077         size_t src2step = _src2.step;
01078         size_t dststep = dst.step;
01079 
01080         if( src.rows == 2 )
01081         {
01082             if( type == CV_32FC1 )
01083             {
01084                 double d = det2(Sf);
01085                 if( d != 0. )
01086                 {
01087                     double t;
01088                     d = 1./d;
01089                     t = (float)(((double)bf(0)*Sf(1,1) - (double)bf(1)*Sf(0,1))*d);
01090                     Df(1,0) = (float)(((double)bf(1)*Sf(0,0) - (double)bf(0)*Sf(1,0))*d);
01091                     Df(0,0) = (float)t;
01092                 }
01093                 else
01094                     result = false;
01095             }
01096             else
01097             {
01098                 double d = det2(Sd);
01099                 if( d != 0. )
01100                 {
01101                     double t;
01102                     d = 1./d;
01103                     t = (bd(0)*Sd(1,1) - bd(1)*Sd(0,1))*d;
01104                     Dd(1,0) = (bd(1)*Sd(0,0) - bd(0)*Sd(1,0))*d;
01105                     Dd(0,0) = t;
01106                 }
01107                 else
01108                     result = false;
01109             }
01110         }
01111         else if( src.rows == 3 )
01112         {
01113             if( type == CV_32FC1 )
01114             {
01115                 double d = det3(Sf);
01116                 if( d != 0. )
01117                 {
01118                     float t[3];
01119                     d = 1./d;
01120 
01121                     t[0] = (float)(d*
01122                            (bf(0)*((double)Sf(1,1)*Sf(2,2) - (double)Sf(1,2)*Sf(2,1)) -
01123                             Sf(0,1)*((double)bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) +
01124                             Sf(0,2)*((double)bf(1)*Sf(2,1) - (double)Sf(1,1)*bf(2))));
01125 
01126                     t[1] = (float)(d*
01127                            (Sf(0,0)*(double)(bf(1)*Sf(2,2) - (double)Sf(1,2)*bf(2)) -
01128                             bf(0)*((double)Sf(1,0)*Sf(2,2) - (double)Sf(1,2)*Sf(2,0)) +
01129                             Sf(0,2)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0))));
01130 
01131                     t[2] = (float)(d*
01132                            (Sf(0,0)*((double)Sf(1,1)*bf(2) - (double)bf(1)*Sf(2,1)) -
01133                             Sf(0,1)*((double)Sf(1,0)*bf(2) - (double)bf(1)*Sf(2,0)) +
01134                             bf(0)*((double)Sf(1,0)*Sf(2,1) - (double)Sf(1,1)*Sf(2,0))));
01135 
01136                     Df(0,0) = t[0];
01137                     Df(1,0) = t[1];
01138                     Df(2,0) = t[2];
01139                 }
01140                 else
01141                     result = false;
01142             }
01143             else
01144             {
01145                 double d = det3(Sd);
01146                 if( d != 0. )
01147                 {
01148                     double t[9];
01149 
01150                     d = 1./d;
01151 
01152                     t[0] = ((Sd(1,1) * Sd(2,2) - Sd(1,2) * Sd(2,1))*bd(0) +
01153                             (Sd(0,2) * Sd(2,1) - Sd(0,1) * Sd(2,2))*bd(1) +
01154                             (Sd(0,1) * Sd(1,2) - Sd(0,2) * Sd(1,1))*bd(2))*d;
01155 
01156                     t[1] = ((Sd(1,2) * Sd(2,0) - Sd(1,0) * Sd(2,2))*bd(0) +
01157                             (Sd(0,0) * Sd(2,2) - Sd(0,2) * Sd(2,0))*bd(1) +
01158                             (Sd(0,2) * Sd(1,0) - Sd(0,0) * Sd(1,2))*bd(2))*d;
01159 
01160                     t[2] = ((Sd(1,0) * Sd(2,1) - Sd(1,1) * Sd(2,0))*bd(0) +
01161                             (Sd(0,1) * Sd(2,0) - Sd(0,0) * Sd(2,1))*bd(1) +
01162                             (Sd(0,0) * Sd(1,1) - Sd(0,1) * Sd(1,0))*bd(2))*d;
01163 
01164                     Dd(0,0) = t[0];
01165                     Dd(1,0) = t[1];
01166                     Dd(2,0) = t[2];
01167                 }
01168                 else
01169                     result = false;
01170             }
01171         }
01172         else
01173         {
01174             assert( src.rows == 1 );
01175 
01176             if( type == CV_32FC1 )
01177             {
01178                 double d = Sf(0,0);
01179                 if( d != 0. )
01180                     Df(0,0) = (float)(bf(0)/d);
01181                 else
01182                     result = false;
01183             }
01184             else
01185             {
01186                 double d = Sd(0,0);
01187                 if( d != 0. )
01188                     Dd(0,0) = (bd(0)/d);
01189                 else
01190                     result = false;
01191             }
01192         }
01193         return result;
01194     }
01195 
01196     if( method == DECOMP_QR )
01197         method = DECOMP_SVD;
01198 
01199     int m = src.rows, m_ = m, n = src.cols, nb = _src2.cols;
01200     size_t esz = CV_ELEM_SIZE(type), bufsize = 0;
01201     size_t vstep = alignSize(n*esz, 16);
01202     size_t astep = method == DECOMP_SVD && !is_normal ? alignSize(m*esz, 16) : vstep;
01203     AutoBuffer<uchar>  buffer;
01204 
01205     Mat src2 = _src2;
01206     _dst.create( src.cols, src2.cols, src.type() );
01207     Mat dst = _dst.getMat();
01208 
01209     if( m < n )
01210         CV_Error(CV_StsBadArg, "The function can not solve under-determined linear systems" );
01211 
01212     if( m == n )
01213         is_normal = false;
01214     else if( is_normal )
01215     {
01216         m_ = n;
01217         if( method == DECOMP_SVD )
01218             method = DECOMP_EIG;
01219     }
01220 
01221     size_t asize = astep*(method == DECOMP_SVD || is_normal ? n : m);
01222     bufsize += asize + 32;
01223 
01224     if( is_normal )
01225         bufsize += n*nb*esz;
01226 
01227     if( method == DECOMP_SVD || method == DECOMP_EIG )
01228         bufsize += n*5*esz + n*vstep + nb*sizeof(double) + 32;
01229 
01230     buffer.allocate(bufsize);
01231     uchar* ptr = alignPtr((uchar*)buffer, 16);
01232 
01233     Mat a(m_, n, type, ptr, astep);
01234 
01235     if( is_normal )
01236         mulTransposed(src, a, true);
01237     else if( method != DECOMP_SVD )
01238         src.copyTo(a);
01239     else
01240     {
01241         a = Mat(n, m_, type, ptr, astep);
01242         transpose(src, a);
01243     }
01244     ptr += asize;
01245 
01246     if( !is_normal )
01247     {
01248         if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
01249             src2.copyTo(dst);
01250     }
01251     else
01252     {
01253         // a'*b
01254         if( method == DECOMP_LU || method == DECOMP_CHOLESKY )
01255             gemm( src, src2, 1, Mat(), 0, dst, GEMM_1_T );
01256         else
01257         {
01258             Mat tmp(n, nb, type, ptr);
01259             ptr += n*nb*esz;
01260             gemm( src, src2, 1, Mat(), 0, tmp, GEMM_1_T );
01261             src2 = tmp;
01262         }
01263     }
01264 
01265     if( method == DECOMP_LU )
01266     {
01267         if( type == CV_32F )
01268             result = hal::LU32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb) != 0;
01269         else
01270             result = hal::LU64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb) != 0;
01271     }
01272     else if( method == DECOMP_CHOLESKY )
01273     {
01274         if( type == CV_32F )
01275             result = hal::Cholesky32f(a.ptr<float>(), a.step, n, dst.ptr<float>(), dst.step, nb);
01276         else
01277             result = hal::Cholesky64f(a.ptr<double>(), a.step, n, dst.ptr<double>(), dst.step, nb);
01278     }
01279     else
01280     {
01281         ptr = alignPtr(ptr, 16);
01282         Mat v(n, n, type, ptr, vstep), w(n, 1, type, ptr + vstep*n), u;
01283         ptr += n*(vstep + esz);
01284 
01285         if( method == DECOMP_EIG )
01286         {
01287             if( type == CV_32F )
01288                 Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr);
01289             else
01290                 Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
01291             u = v;
01292         }
01293         else
01294         {
01295             if( type == CV_32F )
01296                 JacobiSVD(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, m_, n);
01297             else
01298                 JacobiSVD(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, m_, n);
01299             u = a;
01300         }
01301 
01302         if( type == CV_32F )
01303         {
01304             SVBkSb(m_, n, w.ptr<float>(), 0, u.ptr<float>(), u.step, true,
01305                    v.ptr<float>(), v.step, true, src2.ptr<float>(),
01306                    src2.step, nb, dst.ptr<float>(), dst.step, ptr);
01307         }
01308         else
01309         {
01310             SVBkSb(m_, n, w.ptr<double>(), 0, u.ptr<double>(), u.step, true,
01311                    v.ptr<double>(), v.step, true, src2.ptr<double>(),
01312                    src2.step, nb, dst.ptr<double>(), dst.step, ptr);
01313         }
01314         result = true;
01315     }
01316 
01317     if( !result )
01318         dst = Scalar (0);
01319 
01320     return result;
01321 }
01322 
01323 
01324 /////////////////// finding eigenvalues and eigenvectors of a symmetric matrix ///////////////
01325 
01326 bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
01327 {
01328     Mat src = _src.getMat();
01329     int type = src.type();
01330     int n = src.rows;
01331 
01332     CV_Assert( src.rows == src.cols );
01333     CV_Assert (type == CV_32F || type == CV_64F);
01334 
01335     Mat v;
01336     if( _evects.needed() )
01337     {
01338         _evects.create(n, n, type);
01339         v = _evects.getMat();
01340     }
01341 
01342     size_t elemSize = src.elemSize(), astep = alignSize(n*elemSize, 16);
01343     AutoBuffer<uchar>  buf(n*astep + n*5*elemSize + 32);
01344     uchar* ptr = alignPtr((uchar*)buf, 16);
01345     Mat a(n, n, type, ptr, astep), w(n, 1, type, ptr + astep*n);
01346     ptr += astep*n + elemSize*n;
01347     src.copyTo(a);
01348     bool ok = type == CV_32F ?
01349         Jacobi(a.ptr<float>(), a.step, w.ptr<float>(), v.ptr<float>(), v.step, n, ptr) :
01350         Jacobi(a.ptr<double>(), a.step, w.ptr<double>(), v.ptr<double>(), v.step, n, ptr);
01351 
01352     w.copyTo(_evals);
01353     return ok;
01354 }
01355 
01356 namespace cv
01357 {
01358 
01359 static void _SVDcompute( InputArray _aarr, OutputArray _w,
01360                          OutputArray _u, OutputArray _vt, int flags )
01361 {
01362     Mat src = _aarr.getMat();
01363     int m = src.rows, n = src.cols;
01364     int type = src.type();
01365     bool compute_uv = _u.needed() || _vt.needed();
01366     bool full_uv = (flags & SVD::FULL_UV) != 0;
01367 
01368     CV_Assert( type == CV_32F || type == CV_64F );
01369 
01370     if( flags & SVD::NO_UV )
01371     {
01372         _u.release();
01373         _vt.release();
01374         compute_uv = full_uv = false;
01375     }
01376 
01377     bool at = false;
01378     if( m < n )
01379     {
01380         std::swap(m, n);
01381         at = true;
01382     }
01383 
01384     int urows = full_uv ? m : n;
01385     size_t esz = src.elemSize(), astep = alignSize(m*esz, 16), vstep = alignSize(n*esz, 16);
01386     AutoBuffer<uchar> _buf(urows*astep + n*vstep + n*esz + 32);
01387     uchar* buf = alignPtr((uchar*)_buf, 16);
01388     Mat temp_a(n, m, type, buf, astep);
01389     Mat temp_w(n, 1, type, buf + urows*astep);
01390     Mat temp_u(urows, m, type, buf, astep), temp_v;
01391 
01392     if( compute_uv )
01393         temp_v = Mat(n, n, type, alignPtr(buf + urows*astep + n*esz, 16), vstep);
01394 
01395     if( urows > n )
01396         temp_u = Scalar::all(0);
01397 
01398     if( !at )
01399         transpose(src, temp_a);
01400     else
01401         src.copyTo(temp_a);
01402 
01403     if( type == CV_32F )
01404     {
01405         JacobiSVD(temp_a.ptr<float>(), temp_u.step, temp_w.ptr<float>(),
01406               temp_v.ptr<float>(), temp_v.step, m, n, compute_uv ? urows : 0);
01407     }
01408     else
01409     {
01410         JacobiSVD(temp_a.ptr<double>(), temp_u.step, temp_w.ptr<double>(),
01411               temp_v.ptr<double>(), temp_v.step, m, n, compute_uv ? urows : 0);
01412     }
01413     temp_w.copyTo(_w);
01414     if( compute_uv )
01415     {
01416         if( !at )
01417         {
01418             if( _u.needed() )
01419                 transpose(temp_u, _u);
01420             if( _vt.needed() )
01421                 temp_v.copyTo(_vt);
01422         }
01423         else
01424         {
01425             if( _u.needed() )
01426                 transpose(temp_v, _u);
01427             if( _vt.needed() )
01428                 temp_u.copyTo(_vt);
01429         }
01430     }
01431 }
01432 
01433 
01434 void SVD::compute( InputArray a, OutputArray w, OutputArray u, OutputArray vt, int flags )
01435 {
01436     _SVDcompute(a, w, u, vt, flags);
01437 }
01438 
01439 void SVD::compute( InputArray a, OutputArray w, int flags )
01440 {
01441     _SVDcompute(a, w, noArray(), noArray(), flags);
01442 }
01443 
01444 void SVD::backSubst( InputArray _w, InputArray _u, InputArray _vt,
01445                      InputArray _rhs, OutputArray _dst )
01446 {
01447     Mat w = _w.getMat(), u = _u.getMat(), vt = _vt.getMat(), rhs = _rhs.getMat();
01448     int type = w.type(), esz = (int)w.elemSize();
01449     int m = u.rows, n = vt.cols, nb = rhs.data ? rhs.cols : m, nm = std::min(m, n);
01450     size_t wstep = w.rows == 1 ? (size_t)esz : w.cols == 1 ? (size_t)w.step : (size_t)w.step + esz;
01451     AutoBuffer<uchar>  buffer(nb*sizeof(double) + 16);
01452     CV_Assert( w.type() == u.type() && u.type() == vt.type() && u.data && vt.data && w.data );
01453     CV_Assert( u.cols >= nm && vt.rows >= nm &&
01454               (w.size() == Size(nm, 1) || w.size() == Size(1, nm) || w.size() == Size(vt.rows, u.cols)) );
01455     CV_Assert( rhs.data == 0 || (rhs.type() == type && rhs.rows == m) );
01456 
01457     _dst.create( n, nb, type );
01458     Mat dst = _dst.getMat();
01459     if( type == CV_32F )
01460         SVBkSb(m, n, w.ptr<float>(), wstep, u.ptr<float>(), u.step, false,
01461                vt.ptr<float>(), vt.step, true, rhs.ptr<float>(), rhs.step, nb,
01462                dst.ptr<float>(), dst.step, buffer);
01463     else if( type == CV_64F )
01464         SVBkSb(m, n, w.ptr<double>(), wstep, u.ptr<double>(), u.step, false,
01465                vt.ptr<double>(), vt.step, true, rhs.ptr<double>(), rhs.step, nb,
01466                dst.ptr<double>(), dst.step, buffer);
01467     else
01468         CV_Error( CV_StsUnsupportedFormat, "" );
01469 }
01470 
01471 
01472 SVD& SVD::operator ()(InputArray a, int flags)
01473 {
01474     _SVDcompute(a, w, u, vt, flags);
01475     return *this;
01476 }
01477 
01478 
01479 void SVD::backSubst( InputArray rhs, OutputArray dst ) const
01480 {
01481     backSubst( w, u, vt, rhs, dst );
01482 }
01483 
01484 }
01485 
01486 
01487 void cv::SVDecomp(InputArray src, OutputArray w, OutputArray u, OutputArray vt, int flags)
01488 {
01489     SVD::compute(src, w, u, vt, flags);
01490 }
01491 
01492 void cv::SVBackSubst(InputArray w, InputArray u, InputArray vt, InputArray rhs, OutputArray dst)
01493 {
01494     SVD::backSubst(w, u, vt, rhs, dst);
01495 }
01496 
01497 
01498 CV_IMPL double
01499 cvDet( const CvArr* arr )
01500 {
01501     if( CV_IS_MAT(arr) && ((CvMat*)arr)->rows <= 3 )
01502     {
01503         CvMat* mat = (CvMat*)arr;
01504         int type = CV_MAT_TYPE(mat->type);
01505         int rows = mat->rows;
01506         uchar* m = mat->data.ptr;
01507         int step = mat->step;
01508         CV_Assert( rows == mat->cols );
01509 
01510         #define Mf(y, x) ((float*)(m + y*step))[x]
01511         #define Md(y, x) ((double*)(m + y*step))[x]
01512 
01513         if( type == CV_32F )
01514         {
01515             if( rows == 2 )
01516                 return det2(Mf);
01517             if( rows == 3 )
01518                 return det3(Mf);
01519         }
01520         else if( type == CV_64F )
01521         {
01522             if( rows == 2 )
01523                 return det2(Md);
01524             if( rows == 3 )
01525                 return det3(Md);
01526         }
01527     }
01528     return cv::determinant(cv::cvarrToMat(arr));
01529 }
01530 
01531 
01532 CV_IMPL double
01533 cvInvert( const CvArr* srcarr, CvArr* dstarr, int method )
01534 {
01535     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
01536 
01537     CV_Assert( src.type() == dst.type() && src.rows == dst.cols && src.cols == dst.rows );
01538     return cv::invert( src, dst, method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
01539                       method == CV_SVD ? cv::DECOMP_SVD :
01540                       method == CV_SVD_SYM ? cv::DECOMP_EIG : cv::DECOMP_LU );
01541 }
01542 
01543 
01544 CV_IMPL int
01545 cvSolve( const CvArr* Aarr, const CvArr* barr, CvArr* xarr, int method )
01546 {
01547     cv::Mat A = cv::cvarrToMat(Aarr), b = cv::cvarrToMat(barr), x = cv::cvarrToMat(xarr);
01548 
01549     CV_Assert( A.type() == x.type() && A.cols == x.rows && x.cols == b.cols );
01550     bool is_normal = (method & CV_NORMAL) != 0;
01551     method &= ~CV_NORMAL;
01552     return cv::solve( A, b, x, (method == CV_CHOLESKY ? cv::DECOMP_CHOLESKY :
01553                                 method == CV_SVD ? cv::DECOMP_SVD :
01554                                 method == CV_SVD_SYM ? cv::DECOMP_EIG :
01555         A.rows > A.cols ? cv::DECOMP_QR : cv::DECOMP_LU) + (is_normal ? cv::DECOMP_NORMAL : 0) );
01556 }
01557 
01558 
01559 CV_IMPL void
01560 cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
01561            int, int )
01562 {
01563     cv::Mat src = cv::cvarrToMat(srcarr), evals0 = cv::cvarrToMat(evalsarr), evals = evals0;
01564     if( evectsarr )
01565     {
01566         cv::Mat evects0 = cv::cvarrToMat(evectsarr), evects = evects0;
01567         eigen(src, evals, evects);
01568         if( evects0.data != evects.data )
01569         {
01570             const uchar* p = evects0.ptr();
01571             evects.convertTo(evects0, evects0.type());
01572             CV_Assert( p == evects0.ptr() );
01573         }
01574     }
01575     else
01576         eigen(src, evals);
01577     if( evals0.data != evals.data )
01578     {
01579         const uchar* p = evals0.ptr();
01580         if( evals0.size() == evals.size() )
01581             evals.convertTo(evals0, evals0.type());
01582         else if( evals0.type() == evals.type() )
01583             cv::transpose(evals, evals0);
01584         else
01585             cv::Mat(evals.t()).convertTo(evals0, evals0.type());
01586         CV_Assert( p == evals0.ptr() );
01587     }
01588 }
01589 
01590 
01591 CV_IMPL void
01592 cvSVD( CvArr* aarr, CvArr* warr, CvArr* uarr, CvArr* varr, int flags )
01593 {
01594     cv::Mat a = cv::cvarrToMat(aarr), w = cv::cvarrToMat(warr), u, v;
01595     int m = a.rows, n = a.cols, type = a.type(), mn = std::max(m, n), nm = std::min(m, n);
01596 
01597     CV_Assert( w.type() == type &&
01598         (w.size() == cv::Size(nm,1) || w.size() == cv::Size(1, nm) ||
01599         w.size() == cv::Size(nm, nm) || w.size() == cv::Size(n, m)) );
01600 
01601     cv::SVD svd;
01602 
01603     if( w.size() == cv::Size(nm, 1) )
01604         svd.w = cv::Mat(nm, 1, type, w.ptr() );
01605     else if( w.isContinuous() )
01606         svd.w = w;
01607 
01608     if( uarr )
01609     {
01610         u = cv::cvarrToMat(uarr);
01611         CV_Assert( u.type() == type );
01612         svd.u = u;
01613     }
01614 
01615     if( varr )
01616     {
01617         v = cv::cvarrToMat(varr);
01618         CV_Assert( v.type() == type );
01619         svd.vt = v;
01620     }
01621 
01622     svd(a, ((flags & CV_SVD_MODIFY_A) ? cv::SVD::MODIFY_A : 0) |
01623         ((!svd.u.data && !svd.vt.data) ? cv::SVD::NO_UV : 0) |
01624         ((m != n && (svd.u.size() == cv::Size(mn, mn) ||
01625         svd.vt.size() == cv::Size(mn, mn))) ? cv::SVD::FULL_UV : 0));
01626 
01627     if( !u.empty() )
01628     {
01629         if( flags & CV_SVD_U_T )
01630             cv::transpose( svd.u, u );
01631         else if( u.data != svd.u.data )
01632         {
01633             CV_Assert( u.size() == svd.u.size() );
01634             svd.u.copyTo(u);
01635         }
01636     }
01637 
01638     if( !v.empty() )
01639     {
01640         if( !(flags & CV_SVD_V_T) )
01641             cv::transpose( svd.vt, v );
01642         else if( v.data != svd.vt.data )
01643         {
01644             CV_Assert( v.size() == svd.vt.size() );
01645             svd.vt.copyTo(v);
01646         }
01647     }
01648 
01649     if( w.data != svd.w.data )
01650     {
01651         if( w.size() == svd.w.size() )
01652             svd.w.copyTo(w);
01653         else
01654         {
01655             w = cv::Scalar (0);
01656             cv::Mat wd = w.diag();
01657             svd.w.copyTo(wd);
01658         }
01659     }
01660 }
01661 
01662 
01663 CV_IMPL void
01664 cvSVBkSb( const CvArr* warr, const CvArr* uarr,
01665           const CvArr* varr, const CvArr* rhsarr,
01666           CvArr* dstarr, int flags )
01667 {
01668     cv::Mat w = cv::cvarrToMat(warr), u = cv::cvarrToMat(uarr),
01669         v = cv::cvarrToMat(varr), rhs,
01670         dst = cv::cvarrToMat(dstarr), dst0 = dst;
01671     if( flags & CV_SVD_U_T )
01672     {
01673         cv::Mat tmp;
01674         transpose(u, tmp);
01675         u = tmp;
01676     }
01677     if( !(flags & CV_SVD_V_T) )
01678     {
01679         cv::Mat tmp;
01680         transpose(v, tmp);
01681         v = tmp;
01682     }
01683     if( rhsarr )
01684         rhs = cv::cvarrToMat(rhsarr);
01685 
01686     cv::SVD::backSubst(w, u, v, rhs, dst);
01687     CV_Assert( dst.data == dst0.data );
01688 }
01689