Eigen | Mbed

Users » pmic » Code » Eigen » Documentation
Embed: (wiki syntax)
Show/hide line numbers SelfadjointMatrixVector.h Source File
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
00011 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
00012 
00013 namespace Eigen { 
00014 
00015 namespace internal {
00016 
00017 /* Optimized selfadjoint matrix * vector product:
00018  * This algorithm processes 2 columns at onces that allows to both reduce
00019  * the number of load/stores of the result by a factor 2 and to reduce
00020  * the instruction dependency.
00021  */
00022 
00023 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized>
00024 struct selfadjoint_matrix_vector_product;
00025 
00026 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
00027 struct selfadjoint_matrix_vector_product
00028 
00029 {
00030 static EIGEN_DONT_INLINE void run(
00031   Index size,
00032   const Scalar*  lhs, Index lhsStride,
00033   const Scalar* _rhs, Index rhsIncr,
00034   Scalar* res,
00035   Scalar alpha);
00036 };
00037 
00038 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
00039 EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
00040   Index size,
00041   const Scalar*  lhs, Index lhsStride,
00042   const Scalar* _rhs, Index rhsIncr,
00043   Scalar* res,
00044   Scalar alpha)
00045 {
00046   typedef typename packet_traits<Scalar>::type Packet;
00047   const Index PacketSize = sizeof(Packet)/sizeof(Scalar);
00048 
00049   enum {
00050     IsRowMajor = StorageOrder==RowMajor ? 1 : 0,
00051     IsLower = UpLo == Lower ? 1 : 0,
00052     FirstTriangular = IsRowMajor == IsLower
00053   };
00054 
00055   conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> cj0;
00056   conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
00057   conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
00058 
00059   conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs,  IsRowMajor), ConjugateRhs> pcj0;
00060   conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
00061 
00062   Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
00063 
00064   // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
00065   // if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
00066   // this is because we need to extract packets
00067   ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);  
00068   if (rhsIncr!=1)
00069   {
00070     const Scalar* it = _rhs;
00071     for (Index i=0; i<size; ++i, it+=rhsIncr)
00072       rhs[i] = *it;
00073   }
00074 
00075   Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
00076   if (FirstTriangular)
00077     bound = size - bound;
00078 
00079   for (Index j=FirstTriangular ? bound : 0;
00080        j<(FirstTriangular ? size : bound);j+=2)
00081   {
00082     const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
00083     const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
00084 
00085     Scalar t0 = cjAlpha * rhs[j];
00086     Packet ptmp0 = pset1<Packet>(t0);
00087     Scalar t1 = cjAlpha * rhs[j+1];
00088     Packet ptmp1 = pset1<Packet>(t1);
00089 
00090     Scalar t2(0);
00091     Packet ptmp2 = pset1<Packet>(t2);
00092     Scalar t3(0);
00093     Packet ptmp3 = pset1<Packet>(t3);
00094 
00095     size_t starti = FirstTriangular ? 0 : j+2;
00096     size_t endi   = FirstTriangular ? j : size;
00097     size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
00098     size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
00099 
00100     // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
00101     res[j]   += cjd.pmul(numext::real(A0[j]), t0);
00102     res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1);
00103     if(FirstTriangular)
00104     {
00105       res[j]   += cj0.pmul(A1[j],   t1);
00106       t3       += cj1.pmul(A1[j],   rhs[j]);
00107     }
00108     else
00109     {
00110       res[j+1] += cj0.pmul(A0[j+1],t0);
00111       t2 += cj1.pmul(A0[j+1], rhs[j+1]);
00112     }
00113 
00114     for (size_t i=starti; i<alignedStart; ++i)
00115     {
00116       res[i] += t0 * A0[i] + t1 * A1[i];
00117       t2 += numext::conj(A0[i]) * rhs[i];
00118       t3 += numext::conj(A1[i]) * rhs[i];
00119     }
00120     // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
00121     // gcc 4.2 does this optimization automatically.
00122     const Scalar* EIGEN_RESTRICT a0It  = A0  + alignedStart;
00123     const Scalar* EIGEN_RESTRICT a1It  = A1  + alignedStart;
00124     const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
00125           Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
00126     for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
00127     {
00128       Packet A0i = ploadu<Packet>(a0It);  a0It  += PacketSize;
00129       Packet A1i = ploadu<Packet>(a1It);  a1It  += PacketSize;
00130       Packet Bi  = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases
00131       Packet Xi  = pload <Packet>(resIt);
00132 
00133       Xi    = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
00134       ptmp2 = pcj1.pmadd(A0i,  Bi, ptmp2);
00135       ptmp3 = pcj1.pmadd(A1i,  Bi, ptmp3);
00136       pstore(resIt,Xi); resIt += PacketSize;
00137     }
00138     for (size_t i=alignedEnd; i<endi; i++)
00139     {
00140       res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
00141       t2 += cj1.pmul(A0[i], rhs[i]);
00142       t3 += cj1.pmul(A1[i], rhs[i]);
00143     }
00144 
00145     res[j]   += alpha * (t2 + predux(ptmp2));
00146     res[j+1] += alpha * (t3 + predux(ptmp3));
00147   }
00148   for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
00149   {
00150     const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
00151 
00152     Scalar t1 = cjAlpha * rhs[j];
00153     Scalar t2(0);
00154     // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
00155     res[j] += cjd.pmul(numext::real(A0[j]), t1);
00156     for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
00157     {
00158       res[i] += cj0.pmul(A0[i], t1);
00159       t2 += cj1.pmul(A0[i], rhs[i]);
00160     }
00161     res[j] += alpha * t2;
00162   }
00163 }
00164 
00165 } // end namespace internal 
00166 
00167 /***************************************************************************
00168 * Wrapper to product_selfadjoint_vector
00169 ***************************************************************************/
00170 
00171 namespace internal {
00172 template<typename Lhs, int LhsMode, typename Rhs>
00173 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
00174   : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
00175 {};
00176 }
00177 
00178 template<typename Lhs, int LhsMode, typename Rhs>
00179 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
00180   : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs >
00181 {
00182   EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00183 
00184   enum {
00185     LhsUpLo = LhsMode&(Upper|Lower)
00186   };
00187 
00188   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00189 
00190   template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
00191   {
00192     typedef typename Dest::Scalar ResScalar;
00193     typedef typename Base::RhsScalar RhsScalar;
00194     typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
00195     
00196     eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
00197 
00198     typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
00199     typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
00200 
00201     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
00202                                * RhsBlasTraits::extractScalarFactor(m_rhs);
00203 
00204     enum {
00205       EvalToDest = (Dest::InnerStrideAtCompileTime==1),
00206       UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
00207     };
00208     
00209     internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
00210     internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
00211 
00212     ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
00213                                                   EvalToDest ? dest.data() : static_dest.data());
00214                                                   
00215     ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(),
00216         UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data());
00217     
00218     if(!EvalToDest)
00219     {
00220       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
00221       int size = dest.size();
00222       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
00223       #endif
00224       MappedDest(actualDestPtr, dest.size()) = dest;
00225     }
00226       
00227     if(!UseRhs)
00228     {
00229       #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
00230       int size = rhs.size();
00231       EIGEN_DENSE_STORAGE_CTOR_PLUGIN
00232       #endif
00233       Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
00234     }
00235       
00236       
00237     internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
00238       (
00239         lhs.rows(),                             // size
00240         &lhs.coeffRef(0,0),  lhs.outerStride(), // lhs info
00241         actualRhsPtr, 1,                        // rhs info
00242         actualDestPtr,                          // result info
00243         actualAlpha                             // scale factor
00244       );
00245     
00246     if(!EvalToDest)
00247       dest = MappedDest(actualDestPtr, dest.size());
00248   }
00249 };
00250 
00251 namespace internal {
00252 template<typename Lhs, typename Rhs, int RhsMode>
00253 struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
00254   : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
00255 {};
00256 }
00257 
00258 template<typename Lhs, typename Rhs, int RhsMode>
00259 struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
00260   : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
00261 {
00262   EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
00263 
00264   enum {
00265     RhsUpLo = RhsMode&(Upper|Lower)
00266   };
00267 
00268   SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
00269 
00270   template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
00271   {
00272     // let's simply transpose the product
00273     Transpose<Dest> destT(dest);
00274     SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
00275                              Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
00276   }
00277 };
00278 
00279 } // end namespace Eigen
00280 
00281 #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
Repository toolbox

Repository details

Type:	Library
Created:	17 Nov 2022
Imports:	1
Forks:	0
Commits:	2
Dependents:	0
Dependencies:	0
Followers:	1
Important changes to repositories hosted on mbed.com

SelfadjointMatrixVector.h

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

SelfadjointMatrixVector.h

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning