Eigen | Mbed

Users » pmic » Code » Eigen » Documentation
Embed: (wiki syntax)
Show/hide line numbers SelfadjointMatrixMatrix.h Source File
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00011 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
00012 
00013 namespace Eigen { 
00014 
00015 namespace internal {
00016 
00017 // pack a selfadjoint block diagonal for use with the gebp_kernel
00018 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
00019 struct symm_pack_lhs
00020 {
00021   template<int BlockRows> inline
00022   void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
00023   {
00024     // normal copy
00025     for(Index k=0; k<i; k++)
00026       for(Index w=0; w<BlockRows; w++)
00027         blockA[count++] = lhs(i+w,k);           // normal
00028     // symmetric copy
00029     Index h = 0;
00030     for(Index k=i; k<i+BlockRows; k++)
00031     {
00032       for(Index w=0; w<h; w++)
00033         blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
00034 
00035       blockA[count++] = numext::real(lhs(k,k));   // real (diagonal)
00036 
00037       for(Index w=h+1; w<BlockRows; w++)
00038         blockA[count++] = lhs(i+w, k);          // normal
00039       ++h;
00040     }
00041     // transposed copy
00042     for(Index k=i+BlockRows; k<cols; k++)
00043       for(Index w=0; w<BlockRows; w++)
00044         blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
00045   }
00046   void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
00047   {
00048     const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
00049     Index count = 0;
00050     Index peeled_mc = (rows/Pack1)*Pack1;
00051     for(Index i=0; i<peeled_mc; i+=Pack1)
00052     {
00053       pack<Pack1>(blockA, lhs, cols, i, count);
00054     }
00055 
00056     if(rows-peeled_mc>=Pack2)
00057     {
00058       pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
00059       peeled_mc += Pack2;
00060     }
00061 
00062     // do the same with mr==1
00063     for(Index i=peeled_mc; i<rows; i++)
00064     {
00065       for(Index k=0; k<i; k++)
00066         blockA[count++] = lhs(i, k);              // normal
00067 
00068       blockA[count++] = numext::real(lhs(i, i));       // real (diagonal)
00069 
00070       for(Index k=i+1; k<cols; k++)
00071         blockA[count++] = numext::conj(lhs(k, i));     // transposed
00072     }
00073   }
00074 };
00075 
00076 template<typename Scalar, typename Index, int nr, int StorageOrder>
00077 struct symm_pack_rhs
00078 {
00079   enum { PacketSize = packet_traits<Scalar>::size };
00080   void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
00081   {
00082     Index end_k = k2 + rows;
00083     Index count = 0;
00084     const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
00085     Index packet_cols = (cols/nr)*nr;
00086 
00087     // first part: normal case
00088     for(Index j2=0; j2<k2; j2+=nr)
00089     {
00090       for(Index k=k2; k<end_k; k++)
00091       {
00092         blockB[count+0] = rhs(k,j2+0);
00093         blockB[count+1] = rhs(k,j2+1);
00094         if (nr==4)
00095         {
00096           blockB[count+2] = rhs(k,j2+2);
00097           blockB[count+3] = rhs(k,j2+3);
00098         }
00099         count += nr;
00100       }
00101     }
00102 
00103     // second part: diagonal block
00104     for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
00105     {
00106       // again we can split vertically in three different parts (transpose, symmetric, normal)
00107       // transpose
00108       for(Index k=k2; k<j2; k++)
00109       {
00110         blockB[count+0] = numext::conj(rhs(j2+0,k));
00111         blockB[count+1] = numext::conj(rhs(j2+1,k));
00112         if (nr==4)
00113         {
00114           blockB[count+2] = numext::conj(rhs(j2+2,k));
00115           blockB[count+3] = numext::conj(rhs(j2+3,k));
00116         }
00117         count += nr;
00118       }
00119       // symmetric
00120       Index h = 0;
00121       for(Index k=j2; k<j2+nr; k++)
00122       {
00123         // normal
00124         for (Index w=0 ; w<h; ++w)
00125           blockB[count+w] = rhs(k,j2+w);
00126 
00127         blockB[count+h] = numext::real(rhs(k,k));
00128 
00129         // transpose
00130         for (Index w=h+1 ; w<nr; ++w)
00131           blockB[count+w] = numext::conj(rhs(j2+w,k));
00132         count += nr;
00133         ++h;
00134       }
00135       // normal
00136       for(Index k=j2+nr; k<end_k; k++)
00137       {
00138         blockB[count+0] = rhs(k,j2+0);
00139         blockB[count+1] = rhs(k,j2+1);
00140         if (nr==4)
00141         {
00142           blockB[count+2] = rhs(k,j2+2);
00143           blockB[count+3] = rhs(k,j2+3);
00144         }
00145         count += nr;
00146       }
00147     }
00148 
00149     // third part: transposed
00150     for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
00151     {
00152       for(Index k=k2; k<end_k; k++)
00153       {
00154         blockB[count+0] = numext::conj(rhs(j2+0,k));
00155         blockB[count+1] = numext::conj(rhs(j2+1,k));
00156         if (nr==4)
00157         {
00158           blockB[count+2] = numext::conj(rhs(j2+2,k));
00159           blockB[count+3] = numext::conj(rhs(j2+3,k));
00160         }
00161         count += nr;
00162       }
00163     }
00164 
00165     // copy the remaining columns one at a time (=> the same with nr==1)
00166     for(Index j2=packet_cols; j2<cols; ++j2)
00167     {
00168       // transpose
00169       Index half = (std::min)(end_k,j2);
00170       for(Index k=k2; k<half; k++)
00171       {
00172         blockB[count] = numext::conj(rhs(j2,k));
00173         count += 1;
00174       }
00175 
00176       if(half==j2 && half<k2+rows)
00177       {
00178         blockB[count] = numext::real(rhs(j2,j2));
00179         count += 1;
00180       }
00181       else
00182         half--;
00183 
00184       // normal
00185       for(Index k=half+1; k<k2+rows; k++)
00186       {
00187         blockB[count] = rhs(k,j2);
00188         count += 1;
00189       }
00190     }
00191   }
00192 };
00193 
00194 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
00195  * the general matrix matrix product.
00196  */
00197 template <typename Scalar, typename Index,
00198           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00199           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
00200           int ResStorageOrder>
00201 struct product_selfadjoint_matrix;
00202 
00203 template <typename Scalar, typename Index,
00204           int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
00205           int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
00206 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
00207 {
00208 
00209   static EIGEN_STRONG_INLINE void run(
00210     Index rows, Index cols,
00211     const Scalar* lhs, Index lhsStride,
00212     const Scalar* rhs, Index rhsStride,
00213     Scalar* res,       Index resStride,
00214     const Scalar& alpha)
00215   {
00216     product_selfadjoint_matrix<Scalar, Index,
00217       EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00218       RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
00219       EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
00220       LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
00221       ColMajor>
00222       ::run(cols, rows,  rhs, rhsStride,  lhs, lhsStride,  res, resStride,  alpha);
00223   }
00224 };
00225 
00226 template <typename Scalar, typename Index,
00227           int LhsStorageOrder, bool ConjugateLhs,
00228           int RhsStorageOrder, bool ConjugateRhs>
00229 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
00230 {
00231 
00232   static EIGEN_DONT_INLINE void run(
00233     Index rows, Index cols,
00234     const Scalar* _lhs, Index lhsStride,
00235     const Scalar* _rhs, Index rhsStride,
00236     Scalar* res,        Index resStride,
00237     const Scalar& alpha);
00238 };
00239 
00240 template <typename Scalar, typename Index,
00241           int LhsStorageOrder, bool ConjugateLhs,
00242           int RhsStorageOrder, bool ConjugateRhs>
00243 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
00244     Index rows, Index cols,
00245     const Scalar* _lhs, Index lhsStride,
00246     const Scalar* _rhs, Index rhsStride,
00247     Scalar* res,        Index resStride,
00248     const Scalar& alpha)
00249   {
00250     Index size = rows;
00251 
00252     const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00253     const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00254 
00255     typedef gebp_traits<Scalar,Scalar> Traits;
00256 
00257     Index kc = size;  // cache block size along the K direction
00258     Index mc = rows;  // cache block size along the M direction
00259     Index nc = cols;  // cache block size along the N direction
00260     computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00261     // kc must smaller than mc
00262     kc = (std::min)(kc,mc);
00263 
00264     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00265     std::size_t sizeB = sizeW + kc*cols;
00266     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00267     ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00268     Scalar* blockB = allocatedBlockB + sizeW;
00269 
00270     gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00271     symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00272     gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00273     gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
00274 
00275     for(Index k2=0; k2<size; k2+=kc)
00276     {
00277       const Index actual_kc = (std::min)(k2+kc,size)-k2;
00278 
00279       // we have selected one row panel of rhs and one column panel of lhs
00280       // pack rhs's panel into a sequential chunk of memory
00281       // and expand each coeff to a constant packet for further reuse
00282       pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
00283 
00284       // the select lhs's panel has to be split in three different parts:
00285       //  1 - the transposed panel above the diagonal block => transposed packed copy
00286       //  2 - the diagonal block => special packed copy
00287       //  3 - the panel below the diagonal block => generic packed copy
00288       for(Index i2=0; i2<k2; i2+=mc)
00289       {
00290         const Index actual_mc = (std::min)(i2+mc,k2)-i2;
00291         // transposed packed copy
00292         pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
00293 
00294         gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00295       }
00296       // the block diagonal
00297       {
00298         const Index actual_mc = (std::min)(k2+kc,size)-k2;
00299         // symmetric packed copy
00300         pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
00301 
00302         gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00303       }
00304 
00305       for(Index i2=k2+kc; i2<size; i2+=mc)
00306       {
00307         const Index actual_mc = (std::min)(i2+mc,size)-i2;
00308         gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
00309           (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00310 
00311         gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00312       }
00313     }
00314   }
00315 
00316 // matrix * selfadjoint product
00317 template <typename Scalar, typename Index,
00318           int LhsStorageOrder, bool ConjugateLhs,
00319           int RhsStorageOrder, bool ConjugateRhs>
00320 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
00321 {
00322 
00323   static EIGEN_DONT_INLINE void run(
00324     Index rows, Index cols,
00325     const Scalar* _lhs, Index lhsStride,
00326     const Scalar* _rhs, Index rhsStride,
00327     Scalar* res,        Index resStride,
00328     const Scalar& alpha);
00329 };
00330 
00331 template <typename Scalar, typename Index,
00332           int LhsStorageOrder, bool ConjugateLhs,
00333           int RhsStorageOrder, bool ConjugateRhs>
00334 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
00335     Index rows, Index cols,
00336     const Scalar* _lhs, Index lhsStride,
00337     const Scalar* _rhs, Index rhsStride,
00338     Scalar* res,        Index resStride,
00339     const Scalar& alpha)
00340   {
00341     Index size = cols;
00342 
00343     const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00344 
00345     typedef gebp_traits<Scalar,Scalar> Traits;
00346 
00347     Index kc = size; // cache block size along the K direction
00348     Index mc = rows;  // cache block size along the M direction
00349     Index nc = cols;  // cache block size along the N direction
00350     computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
00351     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00352     std::size_t sizeB = sizeW + kc*cols;
00353     ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
00354     ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
00355     Scalar* blockB = allocatedBlockB + sizeW;
00356 
00357     gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
00358     gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00359     symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
00360 
00361     for(Index k2=0; k2<size; k2+=kc)
00362     {
00363       const Index actual_kc = (std::min)(k2+kc,size)-k2;
00364 
00365       pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
00366 
00367       // => GEPP
00368       for(Index i2=0; i2<rows; i2+=mc)
00369       {
00370         const Index actual_mc = (std::min)(i2+mc,rows)-i2;
00371         pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00372 
00373         gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
00374       }
00375     }
00376   }
00377 
00378 } // end namespace internal
00379 
00380 /***************************************************************************
00381 * Wrapper to product_selfadjoint_matrix
00382 ***************************************************************************/
00383 
00384 namespace internal {
00385 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00386 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
00387   : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
00388 {};
00389 }
00390 
00391 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
00392 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
00393   : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
00394 {
00395   EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00396 
00397   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00398 
00399   enum {
00400     LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
00401     LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
00402     RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
00403     RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
00404   };
00405 
00406   template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
00407   {
00408     eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
00409 
00410     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
00411     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
00412 
00413     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00414                                * RhsBlasTraits::extractScalarFactor(m_rhs);
00415 
00416     internal::product_selfadjoint_matrix<Scalar, Index,
00417       EIGEN_LOGICAL_XOR(LhsIsUpper,
00418                         internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
00419       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
00420       EIGEN_LOGICAL_XOR(RhsIsUpper,
00421                         internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
00422       NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
00423       internal::traits<Dest>::Flags&RowMajorBit  ? RowMajor : ColMajor>
00424       ::run(
00425         lhs.rows(), rhs.cols(),                 // sizes
00426         &lhs.coeffRef(0,0),    lhs.outerStride(),  // lhs info
00427         &rhs.coeffRef(0,0),    rhs.outerStride(),  // rhs info
00428         &dst.coeffRef(0,0), dst.outerStride(),  // result info
00429         actualAlpha                             // alpha
00430       );
00431   }
00432 };
00433 
00434 } // end namespace Eigen
00435 
00436 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
Repository toolbox

Repository details

Type:	Library
Created:	17 Nov 2022
Imports:	1
Forks:	0
Commits:	2
Dependents:	0
Dependencies:	0
Followers:	1
Important changes to repositories hosted on mbed.com

SelfadjointMatrixMatrix.h

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

SelfadjointMatrixMatrix.h

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning