Eigen | Mbed

Users » pmic » Code » Eigen » Documentation
Embed: (wiki syntax)
Show/hide line numbers GeneralMatrixMatrixTriangular.h Source File
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
00011 #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
00012 
00013 namespace Eigen { 
00014 
00015 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
00016 struct selfadjoint_rank1_update;
00017 
00018 namespace internal {
00019 
00020 /**********************************************************************
00021 * This file implements a general A * B product while
00022 * evaluating only one triangular part of the product.
00023 * This is more general version of self adjoint product (C += A A^T)
00024 * as the level 3 SYRK Blas routine.
00025 **********************************************************************/
00026 
00027 // forward declarations (defined at the end of this file)
00028 template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
00029 struct tribb_kernel;
00030   
00031 /* Optimized matrix-matrix product evaluating only one triangular half */
00032 template <typename Index,
00033           typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
00034           typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
00035                               int ResStorageOrder, int  UpLo, int Version = Specialized>
00036 struct general_matrix_matrix_triangular_product;
00037 
00038 // as usual if the result is row major => we transpose the product
00039 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
00040                           typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
00041 struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
00042 {
00043   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
00044   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
00045                                       const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
00046   {
00047     general_matrix_matrix_triangular_product<Index,
00048         RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
00049         LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
00050         ColMajor, UpLo==Lower?Upper:Lower>
00051       ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha);
00052   }
00053 };
00054 
00055 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
00056                           typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
00057 struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
00058 {
00059   typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
00060   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
00061                                       const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
00062   {
00063     const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
00064     const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
00065 
00066     typedef gebp_traits<LhsScalar,RhsScalar> Traits;
00067 
00068     Index kc = depth; // cache block size along the K direction
00069     Index mc = size;  // cache block size along the M direction
00070     Index nc = size;  // cache block size along the N direction
00071     computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc);
00072     // !!! mc must be a multiple of nr:
00073     if(mc > Traits::nr)
00074       mc = (mc/Traits::nr)*Traits::nr;
00075 
00076     std::size_t sizeW = kc*Traits::WorkSpaceFactor;
00077     std::size_t sizeB = sizeW + kc*size;
00078     ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
00079     ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0);
00080     RhsScalar* blockB = allocatedBlockB + sizeW;
00081     
00082     gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
00083     gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs;
00084     gebp_kernel <LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp;
00085     tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb;
00086 
00087     for(Index k2=0; k2<depth; k2+=kc)
00088     {
00089       const Index actual_kc = (std::min)(k2+kc,depth)-k2;
00090 
00091       // note that the actual rhs is the transpose/adjoint of mat
00092       pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size);
00093 
00094       for(Index i2=0; i2<size; i2+=mc)
00095       {
00096         const Index actual_mc = (std::min)(i2+mc,size)-i2;
00097 
00098         pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
00099 
00100         // the selected actual_mc * size panel of res is split into three different part:
00101         //  1 - before the diagonal => processed with gebp or skipped
00102         //  2 - the actual_mc x actual_mc symmetric block => processed with a special kernel
00103         //  3 - after the diagonal => processed with gebp or skipped
00104         if (UpLo==Lower)
00105           gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha,
00106                -1, -1, 0, 0, allocatedBlockB);
00107 
00108         sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB);
00109 
00110         if (UpLo==Upper)
00111         {
00112           Index j2 = i2+actual_mc;
00113           gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha,
00114                -1, -1, 0, 0, allocatedBlockB);
00115         }
00116       }
00117     }
00118   }
00119 };
00120 
00121 // Optimized packed Block * packed Block product kernel evaluating only one given triangular part
00122 // This kernel is built on top of the gebp kernel:
00123 // - the current destination block is processed per panel of actual_mc x BlockSize
00124 //   where BlockSize is set to the minimal value allowing gebp to be as fast as possible
00125 // - then, as usual, each panel is split into three parts along the diagonal,
00126 //   the sub blocks above and below the diagonal are processed as usual,
00127 //   while the triangular block overlapping the diagonal is evaluated into a
00128 //   small temporary buffer which is then accumulated into the result using a
00129 //   triangular traversal.
00130 template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo>
00131 struct tribb_kernel
00132 {
00133   typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits;
00134   typedef typename Traits::ResScalar ResScalar;
00135   
00136   enum {
00137     BlockSize  = EIGEN_PLAIN_ENUM_MAX(mr,nr)
00138   };
00139   void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
00140   {
00141     gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
00142     Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
00143 
00144     // let's process the block per panel of actual_mc x BlockSize,
00145     // again, each is split into three parts, etc.
00146     for (Index j=0; j<size; j+=BlockSize)
00147     {
00148       Index actualBlockSize = std::min<Index>(BlockSize,size - j);
00149       const RhsScalar* actual_b = blockB+j*depth;
00150 
00151       if(UpLo==Upper)
00152         gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha,
00153                     -1, -1, 0, 0, workspace);
00154 
00155       // selfadjoint micro block
00156       {
00157         Index i = j;
00158         buffer.setZero();
00159         // 1 - apply the kernel on the temporary buffer
00160         gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha,
00161                     -1, -1, 0, 0, workspace);
00162         // 2 - triangular accumulation
00163         for(Index j1=0; j1<actualBlockSize; ++j1)
00164         {
00165           ResScalar* r = res + (j+j1)*resStride + i;
00166           for(Index i1=UpLo==Lower ? j1 : 0;
00167               UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1)
00168             r[i1] += buffer(i1,j1);
00169         }
00170       }
00171 
00172       if(UpLo==Lower)
00173       {
00174         Index i = j+actualBlockSize;
00175         gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha,
00176                     -1, -1, 0, 0, workspace);
00177       }
00178     }
00179   }
00180 };
00181 
00182 } // end namespace internal
00183 
00184 // high level API
00185 
00186 template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
00187 struct general_product_to_triangular_selector;
00188 
00189 
00190 template<typename MatrixType, typename ProductType, int UpLo>
00191 struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
00192 {
00193   static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
00194   {
00195     typedef typename MatrixType::Scalar Scalar;
00196     typedef typename MatrixType::Index Index;
00197     
00198     typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
00199     typedef internal::blas_traits<Lhs> LhsBlasTraits;
00200     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
00201     typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
00202     typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
00203     
00204     typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
00205     typedef internal::blas_traits<Rhs> RhsBlasTraits;
00206     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
00207     typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
00208     typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
00209 
00210     Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
00211 
00212     enum {
00213       StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
00214       UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
00215       UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
00216     };
00217     
00218     internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
00219     ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
00220       (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
00221     if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
00222     
00223     internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
00224     ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
00225       (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
00226     if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
00227     
00228     
00229     selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
00230                               LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
00231                               RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
00232           ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);
00233   }
00234 };
00235 
00236 template<typename MatrixType, typename ProductType, int UpLo>
00237 struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
00238 {
00239   static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
00240   {
00241     typedef typename MatrixType::Index Index;
00242     
00243     typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
00244     typedef internal::blas_traits<Lhs> LhsBlasTraits;
00245     typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
00246     typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
00247     typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
00248     
00249     typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
00250     typedef internal::blas_traits<Rhs> RhsBlasTraits;
00251     typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
00252     typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
00253     typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
00254 
00255     typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
00256 
00257     internal::general_matrix_matrix_triangular_product<Index,
00258       typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
00259       typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
00260       MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
00261       ::run(mat.cols(), actualLhs.cols(),
00262             &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
00263             mat.data(), mat.outerStride(), actualAlpha);
00264   }
00265 };
00266 
00267 template<typename MatrixType, unsigned int UpLo>
00268 template<typename ProductDerived, typename _Lhs, typename _Rhs>
00269 TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
00270 {
00271   general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
00272   
00273   return *this;
00274 }
00275 
00276 } // end namespace Eigen
00277 
00278 #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
Repository toolbox

Repository details

Type:	Library
Created:	17 Nov 2022
Imports:	1
Forks:	0
Commits:	2
Dependents:	0
Dependencies:	0
Followers:	1
Important changes to repositories hosted on mbed.com

GeneralMatrixMatrixTriangular.h

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

GeneralMatrixMatrixTriangular.h

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning