Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Redux.h
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_REDUX_H 00012 #define EIGEN_REDUX_H 00013 00014 namespace Eigen { 00015 00016 namespace internal { 00017 00018 // TODO 00019 // * implement other kind of vectorization 00020 // * factorize code 00021 00022 /*************************************************************************** 00023 * Part 1 : the logic deciding a strategy for vectorization and unrolling 00024 ***************************************************************************/ 00025 00026 template<typename Func, typename Derived> 00027 struct redux_traits 00028 { 00029 public: 00030 enum { 00031 PacketSize = packet_traits<typename Derived::Scalar>::size, 00032 InnerMaxSize = int(Derived::IsRowMajor) 00033 ? Derived::MaxColsAtCompileTime 00034 : Derived::MaxRowsAtCompileTime 00035 }; 00036 00037 enum { 00038 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) 00039 && (functor_traits<Func>::PacketAccess), 00040 MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), 00041 MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize 00042 }; 00043 00044 public: 00045 enum { 00046 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 00047 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 00048 : int(DefaultTraversal) 00049 }; 00050 00051 public: 00052 enum { 00053 Cost = ( Derived::SizeAtCompileTime == Dynamic 00054 || Derived::CoeffReadCost == Dynamic 00055 || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic) 00056 ) ? Dynamic 00057 : Derived::SizeAtCompileTime * Derived::CoeffReadCost 00058 + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, 00059 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) 00060 }; 00061 00062 public: 00063 enum { 00064 Unrolling = Cost != Dynamic && Cost <= UnrollingLimit 00065 ? CompleteUnrolling 00066 : NoUnrolling 00067 }; 00068 }; 00069 00070 /*************************************************************************** 00071 * Part 2 : unrollers 00072 ***************************************************************************/ 00073 00074 /*** no vectorization ***/ 00075 00076 template<typename Func, typename Derived, int Start, int Length> 00077 struct redux_novec_unroller 00078 { 00079 enum { 00080 HalfLength = Length/2 00081 }; 00082 00083 typedef typename Derived::Scalar Scalar; 00084 00085 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 00086 { 00087 return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 00088 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func)); 00089 } 00090 }; 00091 00092 template<typename Func, typename Derived, int Start> 00093 struct redux_novec_unroller<Func, Derived, Start, 1> 00094 { 00095 enum { 00096 outer = Start / Derived::InnerSizeAtCompileTime, 00097 inner = Start % Derived::InnerSizeAtCompileTime 00098 }; 00099 00100 typedef typename Derived::Scalar Scalar; 00101 00102 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) 00103 { 00104 return mat.coeffByOuterInner(outer, inner); 00105 } 00106 }; 00107 00108 // This is actually dead code and will never be called. It is required 00109 // to prevent false warnings regarding failed inlining though 00110 // for 0 length run() will never be called at all. 00111 template<typename Func, typename Derived, int Start> 00112 struct redux_novec_unroller<Func, Derived, Start, 0> 00113 { 00114 typedef typename Derived::Scalar Scalar; 00115 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } 00116 }; 00117 00118 /*** vectorization ***/ 00119 00120 template<typename Func, typename Derived, int Start, int Length> 00121 struct redux_vec_unroller 00122 { 00123 enum { 00124 PacketSize = packet_traits<typename Derived::Scalar>::size, 00125 HalfLength = Length/2 00126 }; 00127 00128 typedef typename Derived::Scalar Scalar; 00129 typedef typename packet_traits<Scalar>::type PacketScalar; 00130 00131 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) 00132 { 00133 return func.packetOp( 00134 redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 00135 redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) ); 00136 } 00137 }; 00138 00139 template<typename Func, typename Derived, int Start> 00140 struct redux_vec_unroller<Func, Derived, Start, 1> 00141 { 00142 enum { 00143 index = Start * packet_traits<typename Derived::Scalar>::size, 00144 outer = index / int(Derived::InnerSizeAtCompileTime), 00145 inner = index % int(Derived::InnerSizeAtCompileTime), 00146 alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned 00147 }; 00148 00149 typedef typename Derived::Scalar Scalar; 00150 typedef typename packet_traits<Scalar>::type PacketScalar; 00151 00152 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) 00153 { 00154 return mat.template packetByOuterInner<alignment>(outer, inner); 00155 } 00156 }; 00157 00158 /*************************************************************************** 00159 * Part 3 : implementation of all cases 00160 ***************************************************************************/ 00161 00162 template<typename Func, typename Derived, 00163 int Traversal = redux_traits<Func, Derived>::Traversal, 00164 int Unrolling = redux_traits<Func, Derived>::Unrolling 00165 > 00166 struct redux_impl; 00167 00168 template<typename Func, typename Derived> 00169 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> 00170 { 00171 typedef typename Derived::Scalar Scalar; 00172 typedef typename Derived::Index Index; 00173 static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) 00174 { 00175 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00176 Scalar res; 00177 res = mat.coeffByOuterInner(0, 0); 00178 for(Index i = 1; i < mat.innerSize(); ++i) 00179 res = func(res, mat.coeffByOuterInner(0, i)); 00180 for(Index i = 1; i < mat.outerSize(); ++i) 00181 for(Index j = 0; j < mat.innerSize(); ++j) 00182 res = func(res, mat.coeffByOuterInner(i, j)); 00183 return res; 00184 } 00185 }; 00186 00187 template<typename Func, typename Derived> 00188 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> 00189 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> 00190 {}; 00191 00192 template<typename Func, typename Derived> 00193 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> 00194 { 00195 typedef typename Derived::Scalar Scalar; 00196 typedef typename packet_traits<Scalar>::type PacketScalar; 00197 typedef typename Derived::Index Index; 00198 00199 static Scalar run(const Derived& mat, const Func& func) 00200 { 00201 const Index size = mat.size(); 00202 eigen_assert(size && "you are using an empty matrix"); 00203 const Index packetSize = packet_traits<Scalar>::size; 00204 const Index alignedStart = internal::first_aligned(mat); 00205 enum { 00206 alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit) 00207 ? Aligned : Unaligned 00208 }; 00209 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); 00210 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); 00211 const Index alignedEnd2 = alignedStart + alignedSize2; 00212 const Index alignedEnd = alignedStart + alignedSize; 00213 Scalar res; 00214 if(alignedSize) 00215 { 00216 PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart); 00217 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop 00218 { 00219 PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize); 00220 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) 00221 { 00222 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index)); 00223 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize)); 00224 } 00225 00226 packet_res0 = func.packetOp(packet_res0,packet_res1); 00227 if(alignedEnd>alignedEnd2) 00228 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2)); 00229 } 00230 res = func.predux(packet_res0); 00231 00232 for(Index index = 0; index < alignedStart; ++index) 00233 res = func(res,mat.coeff(index)); 00234 00235 for(Index index = alignedEnd; index < size; ++index) 00236 res = func(res,mat.coeff(index)); 00237 } 00238 else // too small to vectorize anything. 00239 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 00240 { 00241 res = mat.coeff(0); 00242 for(Index index = 1; index < size; ++index) 00243 res = func(res,mat.coeff(index)); 00244 } 00245 00246 return res; 00247 } 00248 }; 00249 00250 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling 00251 template<typename Func, typename Derived, int Unrolling> 00252 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling> 00253 { 00254 typedef typename Derived::Scalar Scalar; 00255 typedef typename packet_traits<Scalar>::type PacketScalar; 00256 typedef typename Derived::Index Index; 00257 00258 static Scalar run(const Derived& mat, const Func& func) 00259 { 00260 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00261 const Index innerSize = mat.innerSize(); 00262 const Index outerSize = mat.outerSize(); 00263 enum { 00264 packetSize = packet_traits<Scalar>::size 00265 }; 00266 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; 00267 Scalar res; 00268 if(packetedInnerSize) 00269 { 00270 PacketScalar packet_res = mat.template packet<Unaligned>(0,0); 00271 for(Index j=0; j<outerSize; ++j) 00272 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize)) 00273 packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i)); 00274 00275 res = func.predux(packet_res); 00276 for(Index j=0; j<outerSize; ++j) 00277 for(Index i=packetedInnerSize; i<innerSize; ++i) 00278 res = func(res, mat.coeffByOuterInner(j,i)); 00279 } 00280 else // too small to vectorize anything. 00281 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 00282 { 00283 res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); 00284 } 00285 00286 return res; 00287 } 00288 }; 00289 00290 template<typename Func, typename Derived> 00291 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> 00292 { 00293 typedef typename Derived::Scalar Scalar; 00294 typedef typename packet_traits<Scalar>::type PacketScalar; 00295 enum { 00296 PacketSize = packet_traits<Scalar>::size, 00297 Size = Derived::SizeAtCompileTime, 00298 VectorizedSize = (Size / PacketSize) * PacketSize 00299 }; 00300 static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) 00301 { 00302 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00303 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); 00304 if (VectorizedSize != Size) 00305 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); 00306 return res; 00307 } 00308 }; 00309 00310 } // end namespace internal 00311 00312 /*************************************************************************** 00313 * Part 4 : public API 00314 ***************************************************************************/ 00315 00316 00317 /** \returns the result of a full redux operation on the whole matrix or vector using \a func 00318 * 00319 * The template parameter \a BinaryOp is the type of the functor \a func which must be 00320 * an associative operator. Both current STL and TR1 functor styles are handled. 00321 * 00322 * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() 00323 */ 00324 template<typename Derived> 00325 template<typename Func> 00326 EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type 00327 DenseBase<Derived>::redux(const Func& func) const 00328 { 00329 typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested; 00330 return internal::redux_impl<Func, ThisNested> 00331 ::run(derived(), func); 00332 } 00333 00334 /** \returns the minimum of all coefficients of \c *this. 00335 * \warning the result is undefined if \c *this contains NaN. 00336 */ 00337 template<typename Derived> 00338 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00339 DenseBase<Derived>::minCoeff() const 00340 { 00341 return this->redux(Eigen::internal::scalar_min_op<Scalar>()); 00342 } 00343 00344 /** \returns the maximum of all coefficients of \c *this. 00345 * \warning the result is undefined if \c *this contains NaN. 00346 */ 00347 template<typename Derived> 00348 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00349 DenseBase<Derived>::maxCoeff() const 00350 { 00351 return this->redux(Eigen::internal::scalar_max_op<Scalar>()); 00352 } 00353 00354 /** \returns the sum of all coefficients of *this 00355 * 00356 * \sa trace(), prod(), mean() 00357 */ 00358 template<typename Derived> 00359 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00360 DenseBase<Derived>::sum() const 00361 { 00362 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 00363 return Scalar(0); 00364 return this->redux(Eigen::internal::scalar_sum_op<Scalar>()); 00365 } 00366 00367 /** \returns the mean of all coefficients of *this 00368 * 00369 * \sa trace(), prod(), sum() 00370 */ 00371 template<typename Derived> 00372 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00373 DenseBase<Derived>::mean() const 00374 { 00375 return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size()); 00376 } 00377 00378 /** \returns the product of all coefficients of *this 00379 * 00380 * Example: \include MatrixBase_prod.cpp 00381 * Output: \verbinclude MatrixBase_prod.out 00382 * 00383 * \sa sum(), mean(), trace() 00384 */ 00385 template<typename Derived> 00386 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00387 DenseBase<Derived>::prod() const 00388 { 00389 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 00390 return Scalar(1); 00391 return this->redux(Eigen::internal::scalar_product_op<Scalar>()); 00392 } 00393 00394 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. 00395 * 00396 * \c *this can be any matrix, not necessarily square. 00397 * 00398 * \sa diagonal(), sum() 00399 */ 00400 template<typename Derived> 00401 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00402 MatrixBase<Derived>::trace() const 00403 { 00404 return derived().diagonal ().sum(); 00405 } 00406 00407 } // end namespace Eigen 00408 00409 #endif // EIGEN_REDUX_H
Generated on Thu Nov 17 2022 22:01:30 by
1.7.2