Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
TriangularSolverMatrix.h
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H 00011 #define EIGEN_TRIANGULAR_SOLVER_MATRIX_H 00012 00013 namespace Eigen { 00014 00015 namespace internal { 00016 00017 // if the rhs is row major, let's transpose the product 00018 template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder> 00019 struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor> 00020 { 00021 static void run( 00022 Index size, Index cols, 00023 const Scalar* tri, Index triStride, 00024 Scalar* _other, Index otherStride, 00025 level3_blocking<Scalar,Scalar>& blocking) 00026 { 00027 triangular_solve_matrix< 00028 Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft, 00029 (Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper), 00030 NumTraits<Scalar>::IsComplex && Conjugate, 00031 TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor> 00032 ::run(size, cols, tri, triStride, _other, otherStride, blocking); 00033 } 00034 }; 00035 00036 /* Optimized triangular solver with multiple right hand side and the triangular matrix on the left 00037 */ 00038 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> 00039 struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> 00040 { 00041 static EIGEN_DONT_INLINE void run( 00042 Index size, Index otherSize, 00043 const Scalar* _tri, Index triStride, 00044 Scalar* _other, Index otherStride, 00045 level3_blocking<Scalar,Scalar>& blocking); 00046 }; 00047 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> 00048 EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run( 00049 Index size, Index otherSize, 00050 const Scalar* _tri, Index triStride, 00051 Scalar* _other, Index otherStride, 00052 level3_blocking<Scalar,Scalar>& blocking) 00053 { 00054 Index cols = otherSize; 00055 const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride); 00056 blas_data_mapper<Scalar, Index, ColMajor> other(_other,otherStride); 00057 00058 typedef gebp_traits<Scalar,Scalar> Traits; 00059 enum { 00060 SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), 00061 IsLower = (Mode&Lower) == Lower 00062 }; 00063 00064 Index kc = blocking.kc(); // cache block size along the K direction 00065 Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction 00066 00067 std::size_t sizeA = kc*mc; 00068 std::size_t sizeB = kc*cols; 00069 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 00070 00071 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); 00072 ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); 00073 ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); 00074 00075 conj_if<Conjugate> conj; 00076 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel; 00077 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs; 00078 gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs; 00079 00080 // the goal here is to subdivise the Rhs panels such that we keep some cache 00081 // coherence when accessing the rhs elements 00082 std::ptrdiff_t l1, l2; 00083 manage_caching_sizes(GetAction, &l1, &l2); 00084 Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0; 00085 subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr); 00086 00087 for(Index k2=IsLower ? 0 : size; 00088 IsLower ? k2<size : k2>0; 00089 IsLower ? k2+=kc : k2-=kc) 00090 { 00091 const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc); 00092 00093 // We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel, 00094 // and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into 00095 // A11 (the triangular part) and A21 the remaining rectangular part. 00096 // Then the high level algorithm is: 00097 // - B = R1 => general block copy (done during the next step) 00098 // - R1 = A11^-1 B => tricky part 00099 // - update B from the new R1 => actually this has to be performed continuously during the above step 00100 // - R2 -= A21 * B => GEPP 00101 00102 // The tricky part: compute R1 = A11^-1 B while updating B from R1 00103 // The idea is to split A11 into multiple small vertical panels. 00104 // Each panel can be split into a small triangular part T1k which is processed without optimization, 00105 // and the remaining small part T2k which is processed using gebp with appropriate block strides 00106 for(Index j2=0; j2<cols; j2+=subcols) 00107 { 00108 Index actual_cols = (std::min)(cols-j2,subcols); 00109 // for each small vertical panels [T1k^T, T2k^T]^T of lhs 00110 for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth) 00111 { 00112 Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth); 00113 // tr solve 00114 for (Index k=0; k<actualPanelWidth; ++k) 00115 { 00116 // TODO write a small kernel handling this (can be shared with trsv) 00117 Index i = IsLower ? k2+k1+k : k2-k1-k-1; 00118 Index rs = actualPanelWidth - k - 1; // remaining size 00119 Index s = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1) 00120 : IsLower ? i+1 : i-rs; 00121 00122 Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i)); 00123 for (Index j=j2; j<j2+actual_cols; ++j) 00124 { 00125 if (TriStorageOrder==RowMajor) 00126 { 00127 Scalar b(0); 00128 const Scalar* l = &tri(i,s); 00129 Scalar* r = &other(s,j); 00130 for (Index i3=0; i3<k; ++i3) 00131 b += conj(l[i3]) * r[i3]; 00132 00133 other(i,j) = (other(i,j) - b)*a; 00134 } 00135 else 00136 { 00137 Scalar b = (other(i,j) *= a); 00138 Scalar* r = &other(s,j); 00139 const Scalar* l = &tri(s,i); 00140 for (Index i3=0;i3<rs;++i3) 00141 r[i3] -= b * conj(l[i3]); 00142 } 00143 } 00144 } 00145 00146 Index lengthTarget = actual_kc-k1-actualPanelWidth; 00147 Index startBlock = IsLower ? k2+k1 : k2-k1-actualPanelWidth; 00148 Index blockBOffset = IsLower ? k1 : lengthTarget; 00149 00150 // update the respective rows of B from other 00151 pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset); 00152 00153 // GEBP 00154 if (lengthTarget>0) 00155 { 00156 Index startTarget = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc; 00157 00158 pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget); 00159 00160 gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1), 00161 actualPanelWidth, actual_kc, 0, blockBOffset, blockW); 00162 } 00163 } 00164 } 00165 00166 // R2 -= A21 * B => GEPP 00167 { 00168 Index start = IsLower ? k2+kc : 0; 00169 Index end = IsLower ? size : k2-kc; 00170 for(Index i2=start; i2<end; i2+=mc) 00171 { 00172 const Index actual_mc = (std::min)(mc,end-i2); 00173 if (actual_mc>0) 00174 { 00175 pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc); 00176 00177 gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW); 00178 } 00179 } 00180 } 00181 } 00182 } 00183 00184 /* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right 00185 */ 00186 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> 00187 struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> 00188 { 00189 static EIGEN_DONT_INLINE void run( 00190 Index size, Index otherSize, 00191 const Scalar* _tri, Index triStride, 00192 Scalar* _other, Index otherStride, 00193 level3_blocking<Scalar,Scalar>& blocking); 00194 }; 00195 template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder> 00196 EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run( 00197 Index size, Index otherSize, 00198 const Scalar* _tri, Index triStride, 00199 Scalar* _other, Index otherStride, 00200 level3_blocking<Scalar,Scalar>& blocking) 00201 { 00202 Index rows = otherSize; 00203 const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride); 00204 blas_data_mapper<Scalar, Index, ColMajor> lhs(_other,otherStride); 00205 00206 typedef gebp_traits<Scalar,Scalar> Traits; 00207 enum { 00208 RhsStorageOrder = TriStorageOrder, 00209 SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr), 00210 IsLower = (Mode&Lower) == Lower 00211 }; 00212 00213 Index kc = blocking.kc(); // cache block size along the K direction 00214 Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction 00215 00216 std::size_t sizeA = kc*mc; 00217 std::size_t sizeB = kc*size; 00218 std::size_t sizeW = kc*Traits::WorkSpaceFactor; 00219 00220 ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); 00221 ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); 00222 ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW()); 00223 00224 conj_if<Conjugate> conj; 00225 gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel; 00226 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; 00227 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel; 00228 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel; 00229 00230 for(Index k2=IsLower ? size : 0; 00231 IsLower ? k2>0 : k2<size; 00232 IsLower ? k2-=kc : k2+=kc) 00233 { 00234 const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc); 00235 Index actual_k2 = IsLower ? k2-actual_kc : k2 ; 00236 00237 Index startPanel = IsLower ? 0 : k2+actual_kc; 00238 Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc; 00239 Scalar* geb = blockB+actual_kc*actual_kc; 00240 00241 if (rs>0) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, actual_kc, rs); 00242 00243 // triangular packing (we only pack the panels off the diagonal, 00244 // neglecting the blocks overlapping the diagonal 00245 { 00246 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth) 00247 { 00248 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth); 00249 Index actual_j2 = actual_k2 + j2; 00250 Index panelOffset = IsLower ? j2+actualPanelWidth : 0; 00251 Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2; 00252 00253 if (panelLength>0) 00254 pack_rhs_panel(blockB+j2*actual_kc, 00255 &rhs(actual_k2+panelOffset, actual_j2), triStride, 00256 panelLength, actualPanelWidth, 00257 actual_kc, panelOffset); 00258 } 00259 } 00260 00261 for(Index i2=0; i2<rows; i2+=mc) 00262 { 00263 const Index actual_mc = (std::min)(mc,rows-i2); 00264 00265 // triangular solver kernel 00266 { 00267 // for each small block of the diagonal (=> vertical panels of rhs) 00268 for (Index j2 = IsLower 00269 ? (actual_kc - ((actual_kc%SmallPanelWidth) ? Index(actual_kc%SmallPanelWidth) 00270 : Index(SmallPanelWidth))) 00271 : 0; 00272 IsLower ? j2>=0 : j2<actual_kc; 00273 IsLower ? j2-=SmallPanelWidth : j2+=SmallPanelWidth) 00274 { 00275 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth); 00276 Index absolute_j2 = actual_k2 + j2; 00277 Index panelOffset = IsLower ? j2+actualPanelWidth : 0; 00278 Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2; 00279 00280 // GEBP 00281 if(panelLength>0) 00282 { 00283 gebp_kernel(&lhs(i2,absolute_j2), otherStride, 00284 blockA, blockB+j2*actual_kc, 00285 actual_mc, panelLength, actualPanelWidth, 00286 Scalar(-1), 00287 actual_kc, actual_kc, // strides 00288 panelOffset, panelOffset, // offsets 00289 blockW); // workspace 00290 } 00291 00292 // unblocked triangular solve 00293 for (Index k=0; k<actualPanelWidth; ++k) 00294 { 00295 Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k; 00296 00297 Scalar* r = &lhs(i2,j); 00298 for (Index k3=0; k3<k; ++k3) 00299 { 00300 Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j)); 00301 Scalar* a = &lhs(i2,IsLower ? j+1+k3 : absolute_j2+k3); 00302 for (Index i=0; i<actual_mc; ++i) 00303 r[i] -= a[i] * b; 00304 } 00305 if((Mode & UnitDiag)==0) 00306 { 00307 Scalar b = conj(rhs(j,j)); 00308 for (Index i=0; i<actual_mc; ++i) 00309 r[i] /= b; 00310 } 00311 } 00312 00313 // pack the just computed part of lhs to A 00314 pack_lhs_panel(blockA, _other+absolute_j2*otherStride+i2, otherStride, 00315 actualPanelWidth, actual_mc, 00316 actual_kc, j2); 00317 } 00318 } 00319 00320 if (rs>0) 00321 gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb, 00322 actual_mc, actual_kc, rs, Scalar(-1), 00323 -1, -1, 0, 0, blockW); 00324 } 00325 } 00326 } 00327 00328 } // end namespace internal 00329 00330 } // end namespace Eigen 00331 00332 #endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H
Generated on Thu Nov 17 2022 22:01:30 by
1.7.2