Eigne Matrix Class Library
Dependents: Eigen_test Odometry_test AttitudeEstimation_usingTicker MPU9250_Quaternion_Binary_Serial ... more
GeneralMatrixMatrixTriangular.h
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H 00011 #define EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H 00012 00013 namespace Eigen { 00014 00015 template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs> 00016 struct selfadjoint_rank1_update; 00017 00018 namespace internal { 00019 00020 /********************************************************************** 00021 * This file implements a general A * B product while 00022 * evaluating only one triangular part of the product. 00023 * This is more general version of self adjoint product (C += A A^T) 00024 * as the level 3 SYRK Blas routine. 00025 **********************************************************************/ 00026 00027 // forward declarations (defined at the end of this file) 00028 template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> 00029 struct tribb_kernel; 00030 00031 /* Optimized matrix-matrix product evaluating only one triangular half */ 00032 template <typename Index, 00033 typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 00034 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, 00035 int ResStorageOrder, int UpLo, int Version = Specialized> 00036 struct general_matrix_matrix_triangular_product; 00037 00038 // as usual if the result is row major => we transpose the product 00039 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 00040 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> 00041 struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version> 00042 { 00043 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; 00044 static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, 00045 const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) 00046 { 00047 general_matrix_matrix_triangular_product<Index, 00048 RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, 00049 LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, 00050 ColMajor, UpLo==Lower?Upper:Lower> 00051 ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); 00052 } 00053 }; 00054 00055 template <typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, 00056 typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int UpLo, int Version> 00057 struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version> 00058 { 00059 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; 00060 static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, 00061 const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) 00062 { 00063 const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride); 00064 const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride); 00065 00066 typedef gebp_traits<LhsScalar,RhsScalar> Traits; 00067 00068 Index kc = depth; // cache block size along the K direction 00069 Index mc = size; // cache block size along the M direction 00070 Index nc = size; // cache block size along the N direction 00071 computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc); 00072 // !!! mc must be a multiple of nr: 00073 if(mc > Traits::nr) 00074 mc = (mc/Traits::nr)*Traits::nr; 00075 00076 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 00077 std::size_t sizeB = sizeW + kc*size; 00078 ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0); 00079 ei_declare_aligned_stack_constructed_variable(RhsScalar, allocatedBlockB, sizeB, 0); 00080 RhsScalar* blockB = allocatedBlockB + sizeW; 00081 00082 gemm_pack_lhs<LhsScalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; 00083 gemm_pack_rhs<RhsScalar, Index, Traits::nr, RhsStorageOrder> pack_rhs; 00084 gebp_kernel <LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp; 00085 tribb_kernel<LhsScalar, RhsScalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs, UpLo> sybb; 00086 00087 for(Index k2=0; k2<depth; k2+=kc) 00088 { 00089 const Index actual_kc = (std::min)(k2+kc,depth)-k2; 00090 00091 // note that the actual rhs is the transpose/adjoint of mat 00092 pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, size); 00093 00094 for(Index i2=0; i2<size; i2+=mc) 00095 { 00096 const Index actual_mc = (std::min)(i2+mc,size)-i2; 00097 00098 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); 00099 00100 // the selected actual_mc * size panel of res is split into three different part: 00101 // 1 - before the diagonal => processed with gebp or skipped 00102 // 2 - the actual_mc x actual_mc symmetric block => processed with a special kernel 00103 // 3 - after the diagonal => processed with gebp or skipped 00104 if (UpLo==Lower) 00105 gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, (std::min)(size,i2), alpha, 00106 -1, -1, 0, 0, allocatedBlockB); 00107 00108 sybb(res+resStride*i2 + i2, resStride, blockA, blockB + actual_kc*i2, actual_mc, actual_kc, alpha, allocatedBlockB); 00109 00110 if (UpLo==Upper) 00111 { 00112 Index j2 = i2+actual_mc; 00113 gebp(res+resStride*j2+i2, resStride, blockA, blockB+actual_kc*j2, actual_mc, actual_kc, (std::max)(Index(0), size-j2), alpha, 00114 -1, -1, 0, 0, allocatedBlockB); 00115 } 00116 } 00117 } 00118 } 00119 }; 00120 00121 // Optimized packed Block * packed Block product kernel evaluating only one given triangular part 00122 // This kernel is built on top of the gebp kernel: 00123 // - the current destination block is processed per panel of actual_mc x BlockSize 00124 // where BlockSize is set to the minimal value allowing gebp to be as fast as possible 00125 // - then, as usual, each panel is split into three parts along the diagonal, 00126 // the sub blocks above and below the diagonal are processed as usual, 00127 // while the triangular block overlapping the diagonal is evaluated into a 00128 // small temporary buffer which is then accumulated into the result using a 00129 // triangular traversal. 00130 template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjLhs, bool ConjRhs, int UpLo> 00131 struct tribb_kernel 00132 { 00133 typedef gebp_traits<LhsScalar,RhsScalar,ConjLhs,ConjRhs> Traits; 00134 typedef typename Traits::ResScalar ResScalar; 00135 00136 enum { 00137 BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) 00138 }; 00139 void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace) 00140 { 00141 gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel; 00142 Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer; 00143 00144 // let's process the block per panel of actual_mc x BlockSize, 00145 // again, each is split into three parts, etc. 00146 for (Index j=0; j<size; j+=BlockSize) 00147 { 00148 Index actualBlockSize = std::min<Index>(BlockSize,size - j); 00149 const RhsScalar* actual_b = blockB+j*depth; 00150 00151 if(UpLo==Upper) 00152 gebp_kernel(res+j*resStride, resStride, blockA, actual_b, j, depth, actualBlockSize, alpha, 00153 -1, -1, 0, 0, workspace); 00154 00155 // selfadjoint micro block 00156 { 00157 Index i = j; 00158 buffer.setZero(); 00159 // 1 - apply the kernel on the temporary buffer 00160 gebp_kernel(buffer.data(), BlockSize, blockA+depth*i, actual_b, actualBlockSize, depth, actualBlockSize, alpha, 00161 -1, -1, 0, 0, workspace); 00162 // 2 - triangular accumulation 00163 for(Index j1=0; j1<actualBlockSize; ++j1) 00164 { 00165 ResScalar* r = res + (j+j1)*resStride + i; 00166 for(Index i1=UpLo==Lower ? j1 : 0; 00167 UpLo==Lower ? i1<actualBlockSize : i1<=j1; ++i1) 00168 r[i1] += buffer(i1,j1); 00169 } 00170 } 00171 00172 if(UpLo==Lower) 00173 { 00174 Index i = j+actualBlockSize; 00175 gebp_kernel(res+j*resStride+i, resStride, blockA+depth*i, actual_b, size-i, depth, actualBlockSize, alpha, 00176 -1, -1, 0, 0, workspace); 00177 } 00178 } 00179 } 00180 }; 00181 00182 } // end namespace internal 00183 00184 // high level API 00185 00186 template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct> 00187 struct general_product_to_triangular_selector; 00188 00189 00190 template<typename MatrixType, typename ProductType, int UpLo> 00191 struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true> 00192 { 00193 static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) 00194 { 00195 typedef typename MatrixType::Scalar Scalar; 00196 typedef typename MatrixType::Index Index; 00197 00198 typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs; 00199 typedef internal::blas_traits<Lhs> LhsBlasTraits; 00200 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; 00201 typedef typename internal::remove_all<ActualLhs>::type _ActualLhs; 00202 typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); 00203 00204 typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs; 00205 typedef internal::blas_traits<Rhs> RhsBlasTraits; 00206 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; 00207 typedef typename internal::remove_all<ActualRhs>::type _ActualRhs; 00208 typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); 00209 00210 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); 00211 00212 enum { 00213 StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor, 00214 UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1, 00215 UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1 00216 }; 00217 00218 internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs; 00219 ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(), 00220 (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data())); 00221 if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs; 00222 00223 internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs; 00224 ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(), 00225 (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data())); 00226 if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs; 00227 00228 00229 selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo, 00230 LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, 00231 RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex> 00232 ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha); 00233 } 00234 }; 00235 00236 template<typename MatrixType, typename ProductType, int UpLo> 00237 struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> 00238 { 00239 static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha) 00240 { 00241 typedef typename MatrixType::Index Index; 00242 00243 typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs; 00244 typedef internal::blas_traits<Lhs> LhsBlasTraits; 00245 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs; 00246 typedef typename internal::remove_all<ActualLhs>::type _ActualLhs; 00247 typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs()); 00248 00249 typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs; 00250 typedef internal::blas_traits<Rhs> RhsBlasTraits; 00251 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs; 00252 typedef typename internal::remove_all<ActualRhs>::type _ActualRhs; 00253 typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs()); 00254 00255 typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); 00256 00257 internal::general_matrix_matrix_triangular_product<Index, 00258 typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, 00259 typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, 00260 MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> 00261 ::run(mat.cols(), actualLhs.cols(), 00262 &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), 00263 mat.data(), mat.outerStride(), actualAlpha); 00264 } 00265 }; 00266 00267 template<typename MatrixType, unsigned int UpLo> 00268 template<typename ProductDerived, typename _Lhs, typename _Rhs> 00269 TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha) 00270 { 00271 general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha); 00272 00273 return *this; 00274 } 00275 00276 } // end namespace Eigen 00277 00278 #endif // EIGEN_GENERAL_MATRIX_MATRIX_TRIANGULAR_H
Generated on Tue Jul 12 2022 17:46:54 by 1.7.2