Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
SelfadjointMatrixMatrix.h
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H 00011 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H 00012 00013 namespace Eigen { 00014 00015 namespace internal { 00016 00017 // pack a selfadjoint block diagonal for use with the gebp_kernel 00018 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder> 00019 struct symm_pack_lhs 00020 { 00021 template<int BlockRows> inline 00022 void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count) 00023 { 00024 // normal copy 00025 for(Index k=0; k<i; k++) 00026 for(Index w=0; w<BlockRows; w++) 00027 blockA[count++] = lhs(i+w,k); // normal 00028 // symmetric copy 00029 Index h = 0; 00030 for(Index k=i; k<i+BlockRows; k++) 00031 { 00032 for(Index w=0; w<h; w++) 00033 blockA[count++] = numext::conj(lhs(k, i+w)); // transposed 00034 00035 blockA[count++] = numext::real(lhs(k,k)); // real (diagonal) 00036 00037 for(Index w=h+1; w<BlockRows; w++) 00038 blockA[count++] = lhs(i+w, k); // normal 00039 ++h; 00040 } 00041 // transposed copy 00042 for(Index k=i+BlockRows; k<cols; k++) 00043 for(Index w=0; w<BlockRows; w++) 00044 blockA[count++] = numext::conj(lhs(k, i+w)); // transposed 00045 } 00046 void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows) 00047 { 00048 const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride); 00049 Index count = 0; 00050 Index peeled_mc = (rows/Pack1)*Pack1; 00051 for(Index i=0; i<peeled_mc; i+=Pack1) 00052 { 00053 pack<Pack1>(blockA, lhs, cols, i, count); 00054 } 00055 00056 if(rows-peeled_mc>=Pack2) 00057 { 00058 pack<Pack2>(blockA, lhs, cols, peeled_mc, count); 00059 peeled_mc += Pack2; 00060 } 00061 00062 // do the same with mr==1 00063 for(Index i=peeled_mc; i<rows; i++) 00064 { 00065 for(Index k=0; k<i; k++) 00066 blockA[count++] = lhs(i, k); // normal 00067 00068 blockA[count++] = numext::real(lhs(i, i)); // real (diagonal) 00069 00070 for(Index k=i+1; k<cols; k++) 00071 blockA[count++] = numext::conj(lhs(k, i)); // transposed 00072 } 00073 } 00074 }; 00075 00076 template<typename Scalar, typename Index, int nr, int StorageOrder> 00077 struct symm_pack_rhs 00078 { 00079 enum { PacketSize = packet_traits<Scalar>::size }; 00080 void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2) 00081 { 00082 Index end_k = k2 + rows; 00083 Index count = 0; 00084 const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride); 00085 Index packet_cols = (cols/nr)*nr; 00086 00087 // first part: normal case 00088 for(Index j2=0; j2<k2; j2+=nr) 00089 { 00090 for(Index k=k2; k<end_k; k++) 00091 { 00092 blockB[count+0] = rhs(k,j2+0); 00093 blockB[count+1] = rhs(k,j2+1); 00094 if (nr==4) 00095 { 00096 blockB[count+2] = rhs(k,j2+2); 00097 blockB[count+3] = rhs(k,j2+3); 00098 } 00099 count += nr; 00100 } 00101 } 00102 00103 // second part: diagonal block 00104 for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr) 00105 { 00106 // again we can split vertically in three different parts (transpose, symmetric, normal) 00107 // transpose 00108 for(Index k=k2; k<j2; k++) 00109 { 00110 blockB[count+0] = numext::conj(rhs(j2+0,k)); 00111 blockB[count+1] = numext::conj(rhs(j2+1,k)); 00112 if (nr==4) 00113 { 00114 blockB[count+2] = numext::conj(rhs(j2+2,k)); 00115 blockB[count+3] = numext::conj(rhs(j2+3,k)); 00116 } 00117 count += nr; 00118 } 00119 // symmetric 00120 Index h = 0; 00121 for(Index k=j2; k<j2+nr; k++) 00122 { 00123 // normal 00124 for (Index w=0 ; w<h; ++w) 00125 blockB[count+w] = rhs(k,j2+w); 00126 00127 blockB[count+h] = numext::real(rhs(k,k)); 00128 00129 // transpose 00130 for (Index w=h+1 ; w<nr; ++w) 00131 blockB[count+w] = numext::conj(rhs(j2+w,k)); 00132 count += nr; 00133 ++h; 00134 } 00135 // normal 00136 for(Index k=j2+nr; k<end_k; k++) 00137 { 00138 blockB[count+0] = rhs(k,j2+0); 00139 blockB[count+1] = rhs(k,j2+1); 00140 if (nr==4) 00141 { 00142 blockB[count+2] = rhs(k,j2+2); 00143 blockB[count+3] = rhs(k,j2+3); 00144 } 00145 count += nr; 00146 } 00147 } 00148 00149 // third part: transposed 00150 for(Index j2=k2+rows; j2<packet_cols; j2+=nr) 00151 { 00152 for(Index k=k2; k<end_k; k++) 00153 { 00154 blockB[count+0] = numext::conj(rhs(j2+0,k)); 00155 blockB[count+1] = numext::conj(rhs(j2+1,k)); 00156 if (nr==4) 00157 { 00158 blockB[count+2] = numext::conj(rhs(j2+2,k)); 00159 blockB[count+3] = numext::conj(rhs(j2+3,k)); 00160 } 00161 count += nr; 00162 } 00163 } 00164 00165 // copy the remaining columns one at a time (=> the same with nr==1) 00166 for(Index j2=packet_cols; j2<cols; ++j2) 00167 { 00168 // transpose 00169 Index half = (std::min)(end_k,j2); 00170 for(Index k=k2; k<half; k++) 00171 { 00172 blockB[count] = numext::conj(rhs(j2,k)); 00173 count += 1; 00174 } 00175 00176 if(half==j2 && half<k2+rows) 00177 { 00178 blockB[count] = numext::real(rhs(j2,j2)); 00179 count += 1; 00180 } 00181 else 00182 half--; 00183 00184 // normal 00185 for(Index k=half+1; k<k2+rows; k++) 00186 { 00187 blockB[count] = rhs(k,j2); 00188 count += 1; 00189 } 00190 } 00191 } 00192 }; 00193 00194 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of 00195 * the general matrix matrix product. 00196 */ 00197 template <typename Scalar, typename Index, 00198 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, 00199 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs, 00200 int ResStorageOrder> 00201 struct product_selfadjoint_matrix; 00202 00203 template <typename Scalar, typename Index, 00204 int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs, 00205 int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs> 00206 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor> 00207 { 00208 00209 static EIGEN_STRONG_INLINE void run( 00210 Index rows, Index cols, 00211 const Scalar* lhs, Index lhsStride, 00212 const Scalar* rhs, Index rhsStride, 00213 Scalar* res, Index resStride, 00214 const Scalar& alpha) 00215 { 00216 product_selfadjoint_matrix<Scalar, Index, 00217 EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor, 00218 RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs), 00219 EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, 00220 LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), 00221 ColMajor> 00222 ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); 00223 } 00224 }; 00225 00226 template <typename Scalar, typename Index, 00227 int LhsStorageOrder, bool ConjugateLhs, 00228 int RhsStorageOrder, bool ConjugateRhs> 00229 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor> 00230 { 00231 00232 static EIGEN_DONT_INLINE void run( 00233 Index rows, Index cols, 00234 const Scalar* _lhs, Index lhsStride, 00235 const Scalar* _rhs, Index rhsStride, 00236 Scalar* res, Index resStride, 00237 const Scalar& alpha); 00238 }; 00239 00240 template <typename Scalar, typename Index, 00241 int LhsStorageOrder, bool ConjugateLhs, 00242 int RhsStorageOrder, bool ConjugateRhs> 00243 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run( 00244 Index rows, Index cols, 00245 const Scalar* _lhs, Index lhsStride, 00246 const Scalar* _rhs, Index rhsStride, 00247 Scalar* res, Index resStride, 00248 const Scalar& alpha) 00249 { 00250 Index size = rows; 00251 00252 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); 00253 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); 00254 00255 typedef gebp_traits<Scalar,Scalar> Traits; 00256 00257 Index kc = size; // cache block size along the K direction 00258 Index mc = rows; // cache block size along the M direction 00259 Index nc = cols; // cache block size along the N direction 00260 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); 00261 // kc must smaller than mc 00262 kc = (std::min)(kc,mc); 00263 00264 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 00265 std::size_t sizeB = sizeW + kc*cols; 00266 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); 00267 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); 00268 Scalar* blockB = allocatedBlockB + sizeW; 00269 00270 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; 00271 symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 00272 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; 00273 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed; 00274 00275 for(Index k2=0; k2<size; k2+=kc) 00276 { 00277 const Index actual_kc = (std::min)(k2+kc,size)-k2; 00278 00279 // we have selected one row panel of rhs and one column panel of lhs 00280 // pack rhs's panel into a sequential chunk of memory 00281 // and expand each coeff to a constant packet for further reuse 00282 pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols); 00283 00284 // the select lhs's panel has to be split in three different parts: 00285 // 1 - the transposed panel above the diagonal block => transposed packed copy 00286 // 2 - the diagonal block => special packed copy 00287 // 3 - the panel below the diagonal block => generic packed copy 00288 for(Index i2=0; i2<k2; i2+=mc) 00289 { 00290 const Index actual_mc = (std::min)(i2+mc,k2)-i2; 00291 // transposed packed copy 00292 pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc); 00293 00294 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); 00295 } 00296 // the block diagonal 00297 { 00298 const Index actual_mc = (std::min)(k2+kc,size)-k2; 00299 // symmetric packed copy 00300 pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc); 00301 00302 gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); 00303 } 00304 00305 for(Index i2=k2+kc; i2<size; i2+=mc) 00306 { 00307 const Index actual_mc = (std::min)(i2+mc,size)-i2; 00308 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>() 00309 (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); 00310 00311 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); 00312 } 00313 } 00314 } 00315 00316 // matrix * selfadjoint product 00317 template <typename Scalar, typename Index, 00318 int LhsStorageOrder, bool ConjugateLhs, 00319 int RhsStorageOrder, bool ConjugateRhs> 00320 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor> 00321 { 00322 00323 static EIGEN_DONT_INLINE void run( 00324 Index rows, Index cols, 00325 const Scalar* _lhs, Index lhsStride, 00326 const Scalar* _rhs, Index rhsStride, 00327 Scalar* res, Index resStride, 00328 const Scalar& alpha); 00329 }; 00330 00331 template <typename Scalar, typename Index, 00332 int LhsStorageOrder, bool ConjugateLhs, 00333 int RhsStorageOrder, bool ConjugateRhs> 00334 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run( 00335 Index rows, Index cols, 00336 const Scalar* _lhs, Index lhsStride, 00337 const Scalar* _rhs, Index rhsStride, 00338 Scalar* res, Index resStride, 00339 const Scalar& alpha) 00340 { 00341 Index size = cols; 00342 00343 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); 00344 00345 typedef gebp_traits<Scalar,Scalar> Traits; 00346 00347 Index kc = size; // cache block size along the K direction 00348 Index mc = rows; // cache block size along the M direction 00349 Index nc = cols; // cache block size along the N direction 00350 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc); 00351 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 00352 std::size_t sizeB = sizeW + kc*cols; 00353 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); 00354 ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); 00355 Scalar* blockB = allocatedBlockB + sizeW; 00356 00357 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; 00358 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 00359 symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; 00360 00361 for(Index k2=0; k2<size; k2+=kc) 00362 { 00363 const Index actual_kc = (std::min)(k2+kc,size)-k2; 00364 00365 pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2); 00366 00367 // => GEPP 00368 for(Index i2=0; i2<rows; i2+=mc) 00369 { 00370 const Index actual_mc = (std::min)(i2+mc,rows)-i2; 00371 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); 00372 00373 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); 00374 } 00375 } 00376 } 00377 00378 } // end namespace internal 00379 00380 /*************************************************************************** 00381 * Wrapper to product_selfadjoint_matrix 00382 ***************************************************************************/ 00383 00384 namespace internal { 00385 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> 00386 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> > 00387 : traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> > 00388 {}; 00389 } 00390 00391 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode> 00392 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> 00393 : public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs > 00394 { 00395 EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix) 00396 00397 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {} 00398 00399 enum { 00400 LhsIsUpper = (LhsMode&(Upper|Lower))==Upper, 00401 LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint, 00402 RhsIsUpper = (RhsMode&(Upper|Lower))==Upper, 00403 RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint 00404 }; 00405 00406 template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const 00407 { 00408 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols()); 00409 00410 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs); 00411 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs); 00412 00413 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs) 00414 * RhsBlasTraits::extractScalarFactor(m_rhs); 00415 00416 internal::product_selfadjoint_matrix<Scalar, Index, 00417 EIGEN_LOGICAL_XOR(LhsIsUpper, 00418 internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, 00419 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), 00420 EIGEN_LOGICAL_XOR(RhsIsUpper, 00421 internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint, 00422 NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)), 00423 internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor> 00424 ::run( 00425 lhs.rows(), rhs.cols(), // sizes 00426 &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info 00427 &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info 00428 &dst.coeffRef(0,0), dst.outerStride(), // result info 00429 actualAlpha // alpha 00430 ); 00431 } 00432 }; 00433 00434 } // end namespace Eigen 00435 00436 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
Generated on Thu Nov 17 2022 22:01:30 by
1.7.2