Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
SelfadjointMatrixVector.h
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H 00011 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H 00012 00013 namespace Eigen { 00014 00015 namespace internal { 00016 00017 /* Optimized selfadjoint matrix * vector product: 00018 * This algorithm processes 2 columns at onces that allows to both reduce 00019 * the number of load/stores of the result by a factor 2 and to reduce 00020 * the instruction dependency. 00021 */ 00022 00023 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version=Specialized> 00024 struct selfadjoint_matrix_vector_product; 00025 00026 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version> 00027 struct selfadjoint_matrix_vector_product 00028 00029 { 00030 static EIGEN_DONT_INLINE void run( 00031 Index size, 00032 const Scalar* lhs, Index lhsStride, 00033 const Scalar* _rhs, Index rhsIncr, 00034 Scalar* res, 00035 Scalar alpha); 00036 }; 00037 00038 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version> 00039 EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run( 00040 Index size, 00041 const Scalar* lhs, Index lhsStride, 00042 const Scalar* _rhs, Index rhsIncr, 00043 Scalar* res, 00044 Scalar alpha) 00045 { 00046 typedef typename packet_traits<Scalar>::type Packet; 00047 const Index PacketSize = sizeof(Packet)/sizeof(Scalar); 00048 00049 enum { 00050 IsRowMajor = StorageOrder==RowMajor ? 1 : 0, 00051 IsLower = UpLo == Lower ? 1 : 0, 00052 FirstTriangular = IsRowMajor == IsLower 00053 }; 00054 00055 conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0; 00056 conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1; 00057 conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd; 00058 00059 conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0; 00060 conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1; 00061 00062 Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha; 00063 00064 // FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed. 00065 // if the rhs is not sequentially stored in memory we copy it to a temporary buffer, 00066 // this is because we need to extract packets 00067 ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0); 00068 if (rhsIncr!=1) 00069 { 00070 const Scalar* it = _rhs; 00071 for (Index i=0; i<size; ++i, it+=rhsIncr) 00072 rhs[i] = *it; 00073 } 00074 00075 Index bound = (std::max)(Index(0),size-8) & 0xfffffffe; 00076 if (FirstTriangular) 00077 bound = size - bound; 00078 00079 for (Index j=FirstTriangular ? bound : 0; 00080 j<(FirstTriangular ? size : bound);j+=2) 00081 { 00082 const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride; 00083 const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride; 00084 00085 Scalar t0 = cjAlpha * rhs[j]; 00086 Packet ptmp0 = pset1<Packet>(t0); 00087 Scalar t1 = cjAlpha * rhs[j+1]; 00088 Packet ptmp1 = pset1<Packet>(t1); 00089 00090 Scalar t2(0); 00091 Packet ptmp2 = pset1<Packet>(t2); 00092 Scalar t3(0); 00093 Packet ptmp3 = pset1<Packet>(t3); 00094 00095 size_t starti = FirstTriangular ? 0 : j+2; 00096 size_t endi = FirstTriangular ? j : size; 00097 size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti); 00098 size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize); 00099 00100 // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed 00101 res[j] += cjd.pmul(numext::real(A0[j]), t0); 00102 res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1); 00103 if(FirstTriangular) 00104 { 00105 res[j] += cj0.pmul(A1[j], t1); 00106 t3 += cj1.pmul(A1[j], rhs[j]); 00107 } 00108 else 00109 { 00110 res[j+1] += cj0.pmul(A0[j+1],t0); 00111 t2 += cj1.pmul(A0[j+1], rhs[j+1]); 00112 } 00113 00114 for (size_t i=starti; i<alignedStart; ++i) 00115 { 00116 res[i] += t0 * A0[i] + t1 * A1[i]; 00117 t2 += numext::conj(A0[i]) * rhs[i]; 00118 t3 += numext::conj(A1[i]) * rhs[i]; 00119 } 00120 // Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up) 00121 // gcc 4.2 does this optimization automatically. 00122 const Scalar* EIGEN_RESTRICT a0It = A0 + alignedStart; 00123 const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart; 00124 const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart; 00125 Scalar* EIGEN_RESTRICT resIt = res + alignedStart; 00126 for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize) 00127 { 00128 Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize; 00129 Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize; 00130 Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize; // FIXME should be aligned in most cases 00131 Packet Xi = pload <Packet>(resIt); 00132 00133 Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi)); 00134 ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2); 00135 ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3); 00136 pstore(resIt,Xi); resIt += PacketSize; 00137 } 00138 for (size_t i=alignedEnd; i<endi; i++) 00139 { 00140 res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1); 00141 t2 += cj1.pmul(A0[i], rhs[i]); 00142 t3 += cj1.pmul(A1[i], rhs[i]); 00143 } 00144 00145 res[j] += alpha * (t2 + predux(ptmp2)); 00146 res[j+1] += alpha * (t3 + predux(ptmp3)); 00147 } 00148 for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++) 00149 { 00150 const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride; 00151 00152 Scalar t1 = cjAlpha * rhs[j]; 00153 Scalar t2(0); 00154 // TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed 00155 res[j] += cjd.pmul(numext::real(A0[j]), t1); 00156 for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++) 00157 { 00158 res[i] += cj0.pmul(A0[i], t1); 00159 t2 += cj1.pmul(A0[i], rhs[i]); 00160 } 00161 res[j] += alpha * t2; 00162 } 00163 } 00164 00165 } // end namespace internal 00166 00167 /*************************************************************************** 00168 * Wrapper to product_selfadjoint_vector 00169 ***************************************************************************/ 00170 00171 namespace internal { 00172 template<typename Lhs, int LhsMode, typename Rhs> 00173 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> > 00174 : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> > 00175 {}; 00176 } 00177 00178 template<typename Lhs, int LhsMode, typename Rhs> 00179 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> 00180 : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs > 00181 { 00182 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) 00183 00184 enum { 00185 LhsUpLo = LhsMode&(Upper|Lower) 00186 }; 00187 00188 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} 00189 00190 template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const 00191 { 00192 typedef typename Dest::Scalar ResScalar; 00193 typedef typename Base::RhsScalar RhsScalar; 00194 typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest; 00195 00196 eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols()); 00197 00198 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); 00199 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); 00200 00201 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) 00202 * RhsBlasTraits::extractScalarFactor(m_rhs); 00203 00204 enum { 00205 EvalToDest = (Dest::InnerStrideAtCompileTime==1), 00206 UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1) 00207 }; 00208 00209 internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest; 00210 internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs; 00211 00212 ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), 00213 EvalToDest ? dest.data() : static_dest.data()); 00214 00215 ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,rhs.size(), 00216 UseRhs ? const_cast<RhsScalar*>(rhs.data()) : static_rhs.data()); 00217 00218 if(!EvalToDest) 00219 { 00220 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN 00221 int size = dest.size(); 00222 EIGEN_DENSE_STORAGE_CTOR_PLUGIN 00223 #endif 00224 MappedDest(actualDestPtr, dest.size()) = dest; 00225 } 00226 00227 if(!UseRhs) 00228 { 00229 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN 00230 int size = rhs.size(); 00231 EIGEN_DENSE_STORAGE_CTOR_PLUGIN 00232 #endif 00233 Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs; 00234 } 00235 00236 00237 internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run 00238 ( 00239 lhs.rows(), // size 00240 &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info 00241 actualRhsPtr, 1, // rhs info 00242 actualDestPtr, // result info 00243 actualAlpha // scale factor 00244 ); 00245 00246 if(!EvalToDest) 00247 dest = MappedDest(actualDestPtr, dest.size()); 00248 } 00249 }; 00250 00251 namespace internal { 00252 template<typename Lhs, typename Rhs, int RhsMode> 00253 struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> > 00254 : traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> > 00255 {}; 00256 } 00257 00258 template<typename Lhs, typename Rhs, int RhsMode> 00259 struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> 00260 : public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs > 00261 { 00262 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) 00263 00264 enum { 00265 RhsUpLo = RhsMode&(Upper|Lower) 00266 }; 00267 00268 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} 00269 00270 template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const 00271 { 00272 // let's simply transpose the product 00273 Transpose<Dest> destT(dest); 00274 SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false, 00275 Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha); 00276 } 00277 }; 00278 00279 } // end namespace Eigen 00280 00281 #endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
Generated on Thu Nov 17 2022 22:01:30 by
1.7.2