Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
GeneralMatrixMatrixTriangular.h
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H 00011 #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H 00012 00013 namespace Eigen { 00014 00015 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs> 00016 struct selfadjoint_rank1_update; 00017 00018 namespace internal { 00019 00020 /********************************************************************** 00021 * This file implements a general A * B product while 00022 * evaluating only one triangular part of the product. 00023 * This is more general version of self adjoint product (C += A A^T) 00024 * as the level 3 SYRK Blas routine. 00025 **********************************************************************/ 00026 00027 // forward declarations (defined at the end of this file) 00028 template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> 00029 struct tribb_kernel; 00030 00031 /* Optimized matrix-matrix product evaluating only one triangular half */ 00032 template <typename Index, 00033 typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 00034 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, 00035 int ResStorageOrder, int UpLo, int Version = Specialized> 00036 struct general_matrix_matrix_triangular_product; 00037 00038 // as usual if the result is row major => we transpose the product 00039 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 00040 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> 00041 struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version> 00042 { 00043 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; 00044 static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, 00045 const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) 00046 { 00047 general_matrix_matrix_triangular_product<Index, 00048 RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, 00049 LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, 00050 ColMajor, UpLo==Lower?Upper:Lower> 00051 ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); 00052 } 00053 }; 00054 00055 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 00056 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> 00057 struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version> 00058 { 00059 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; 00060 static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, 00061 const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) 00062 { 00063 const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); 00064 const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); 00065 00066 typedef gebp_traits<LhsScalar,RhsScalar> Traits; 00067 00068 Index kc = depth; // cache block size along the K direction 00069 Index mc = size; // cache block size along the M direction 00070 Index nc = size; // cache block size along the N direction 00071 computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc); 00072 // !!! mc must be a multiple of nr: 00073 if(mc > Traits::nr) 00074 mc = (mc/Traits::nr)*Traits::nr; 00075 00076 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 00077 std::size_t sizeB = sizeW + kc*size; 00078 ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0); 00079 ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0); 00080 RhsScalar* blockB = allocatedBlockB + sizeW; 00081 00082 gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 00083 gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs; 00084 gebp_kernel <LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; 00085 tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb; 00086 00087 for(Index k2=0; k2<depth; k2+=kc) 00088 { 00089 const Index actual_kc = (std::min)(k2+kc,depth)-k2; 00090 00091 // note that the actual rhs is the transpose/adjoint of mat 00092 pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size); 00093 00094 for(Index i2=0; i2<size; i2+=mc) 00095 { 00096 const Index actual_mc = (std::min)(i2+mc,size)-i2; 00097 00098 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); 00099 00100 // the selected actual_mc * size panel of res is split into three different part: 00101 // 1 - before the diagonal => processed with gebp or skipped 00102 // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel 00103 // 3 - after the diagonal => processed with gebp or skipped 00104 if (UpLo==Lower) 00105 gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha, 00106 -1, -1, 0, 0, allocatedBlockB); 00107 00108 sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB); 00109 00110 if (UpLo==Upper) 00111 { 00112 Index j2 = i2+actual_mc; 00113 gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha, 00114 -1, -1, 0, 0, allocatedBlockB); 00115 } 00116 } 00117 } 00118 } 00119 }; 00120 00121 // Optimized packed Block * packed Block product kernel evaluating only one given triangular part 00122 // This kernel is built on top of the gebp kernel: 00123 // - the current destination block is processed per panel of actual_mc x BlockSize 00124 // where BlockSize is set to the minimal value allowing gebp to be as fast as possible 00125 // - then, as usual, each panel is split into three parts along the diagonal, 00126 // the sub blocks above and below the diagonal are processed as usual, 00127 // while the triangular block overlapping the diagonal is evaluated into a 00128 // small temporary buffer which is then accumulated into the result using a 00129 // triangular traversal. 00130 template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> 00131 struct tribb_kernel 00132 { 00133 typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits; 00134 typedef typename Traits::ResScalar ResScalar; 00135 00136 enum { 00137 BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) 00138 }; 00139 void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace) 00140 { 00141 gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel; 00142 Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer; 00143 00144 // let's process the block per panel of actual_mc x BlockSize, 00145 // again, each is split into three parts, etc. 00146 for (Index j=0; j<size; j+=BlockSize) 00147 { 00148 Index actualBlockSize = std::min<Index>(BlockSize,size - j); 00149 const RhsScalar* actual_b = blockB+j*depth; 00150 00151 if(UpLo==Upper) 00152 gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha, 00153 -1, -1, 0, 0, workspace); 00154 00155 // selfadjoint micro block 00156 { 00157 Index i = j; 00158 buffer.setZero(); 00159 // 1 - apply the kernel on the temporary buffer 00160 gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, 00161 -1, -1, 0, 0, workspace); 00162 // 2 - triangular accumulation 00163 for(Index j1=0; j1<actualBlockSize; ++j1) 00164 { 00165 ResScalar* r = res + (j+j1)*resStride + i; 00166 for(Index i1=UpLo==Lower ? j1 : 0; 00167 UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1) 00168 r[i1] += buffer(i1,j1); 00169 } 00170 } 00171 00172 if(UpLo==Lower) 00173 { 00174 Index i = j+actualBlockSize; 00175 gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha, 00176 -1, -1, 0, 0, workspace); 00177 } 00178 } 00179 } 00180 }; 00181 00182 } // end namespace internal 00183 00184 // high level API 00185 00186 template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct> 00187 struct general_product_to_triangular_selector; 00188 00189 00190 template<typename MatrixType, typename ProductType, int UpLo> 00191 struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true> 00192 { 00193 static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) 00194 { 00195 typedef typename MatrixType::Scalar Scalar; 00196 typedef typename MatrixType::Index Index; 00197 00198 typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs; 00199 typedef internal::blas_traits<Lhs> LhsBlasTraits; 00200 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; 00201 typedef typename internal::remove_all<ActualLhs>::type _ActualLhs; 00202 typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); 00203 00204 typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs; 00205 typedef internal::blas_traits<Rhs> RhsBlasTraits; 00206 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; 00207 typedef typename internal::remove_all<ActualRhs>::type _ActualRhs; 00208 typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); 00209 00210 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); 00211 00212 enum { 00213 StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor, 00214 UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1, 00215 UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1 00216 }; 00217 00218 internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs; 00219 ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(), 00220 (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data())); 00221 if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs; 00222 00223 internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs; 00224 ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(), 00225 (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data())); 00226 if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; 00227 00228 00229 selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo, 00230 LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, 00231 RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex> 00232 ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha); 00233 } 00234 }; 00235 00236 template<typename MatrixType, typename ProductType, int UpLo> 00237 struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> 00238 { 00239 static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) 00240 { 00241 typedef typename MatrixType::Index Index; 00242 00243 typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs; 00244 typedef internal::blas_traits<Lhs> LhsBlasTraits; 00245 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; 00246 typedef typename internal::remove_all<ActualLhs>::type _ActualLhs; 00247 typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); 00248 00249 typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs; 00250 typedef internal::blas_traits<Rhs> RhsBlasTraits; 00251 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; 00252 typedef typename internal::remove_all<ActualRhs>::type _ActualRhs; 00253 typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); 00254 00255 typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); 00256 00257 internal::general_matrix_matrix_triangular_product<Index, 00258 typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, 00259 typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, 00260 MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> 00261 ::run(mat.cols(), actualLhs.cols(), 00262 &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), 00263 mat.data(), mat.outerStride(), actualAlpha); 00264 } 00265 }; 00266 00267 template<typename MatrixType, unsigned int UpLo> 00268 template<typename ProductDerived, typename _Lhs, typename _Rhs> 00269 TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha) 00270 { 00271 general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha); 00272 00273 return *this; 00274 } 00275 00276 } // end namespace Eigen 00277 00278 #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
Generated on Thu Nov 17 2022 22:01:28 by
1.7.2