Eigen | Mbed

Users » pmic » Code » Eigen » Documentation
Embed: (wiki syntax)
Show/hide line numbers Redux.h Source File
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
00006 //
00007 // This Source Code Form is subject to the terms of the Mozilla
00008 // Public License v. 2.0. If a copy of the MPL was not distributed
00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00010 
00011 #ifndef EIGEN_REDUX_H
00012 #define EIGEN_REDUX_H
00013 
00014 namespace Eigen { 
00015 
00016 namespace internal {
00017 
00018 // TODO
00019 //  * implement other kind of vectorization
00020 //  * factorize code
00021 
00022 /***************************************************************************
00023 * Part 1 : the logic deciding a strategy for vectorization and unrolling
00024 ***************************************************************************/
00025 
00026 template<typename Func, typename Derived>
00027 struct redux_traits
00028 {
00029 public:
00030   enum {
00031     PacketSize = packet_traits<typename Derived::Scalar>::size,
00032     InnerMaxSize = int(Derived::IsRowMajor)
00033                  ? Derived::MaxColsAtCompileTime
00034                  : Derived::MaxRowsAtCompileTime
00035   };
00036 
00037   enum {
00038     MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
00039                   && (functor_traits<Func>::PacketAccess),
00040     MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
00041     MaySliceVectorize  = MightVectorize && int(InnerMaxSize)>=3*PacketSize
00042   };
00043 
00044 public:
00045   enum {
00046     Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
00047               : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
00048                                         : int(DefaultTraversal)
00049   };
00050 
00051 public:
00052   enum {
00053     Cost = (  Derived::SizeAtCompileTime == Dynamic
00054            || Derived::CoeffReadCost == Dynamic
00055            || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic)
00056            ) ? Dynamic
00057            : Derived::SizeAtCompileTime * Derived::CoeffReadCost
00058                + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost,
00059     UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize))
00060   };
00061 
00062 public:
00063   enum {
00064     Unrolling = Cost != Dynamic && Cost <= UnrollingLimit
00065               ? CompleteUnrolling
00066               : NoUnrolling
00067   };
00068 };
00069 
00070 /***************************************************************************
00071 * Part 2 : unrollers
00072 ***************************************************************************/
00073 
00074 /*** no vectorization ***/
00075 
00076 template<typename Func, typename Derived, int Start, int Length>
00077 struct redux_novec_unroller
00078 {
00079   enum {
00080     HalfLength = Length/2
00081   };
00082 
00083   typedef typename Derived::Scalar Scalar;
00084 
00085   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
00086   {
00087     return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
00088                 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func));
00089   }
00090 };
00091 
00092 template<typename Func, typename Derived, int Start>
00093 struct redux_novec_unroller<Func, Derived, Start, 1>
00094 {
00095   enum {
00096     outer = Start / Derived::InnerSizeAtCompileTime,
00097     inner = Start % Derived::InnerSizeAtCompileTime
00098   };
00099 
00100   typedef typename Derived::Scalar Scalar;
00101 
00102   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
00103   {
00104     return mat.coeffByOuterInner(outer, inner);
00105   }
00106 };
00107 
00108 // This is actually dead code and will never be called. It is required
00109 // to prevent false warnings regarding failed inlining though
00110 // for 0 length run() will never be called at all.
00111 template<typename Func, typename Derived, int Start>
00112 struct redux_novec_unroller<Func, Derived, Start, 0>
00113 {
00114   typedef typename Derived::Scalar Scalar;
00115   static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
00116 };
00117 
00118 /*** vectorization ***/
00119 
00120 template<typename Func, typename Derived, int Start, int Length>
00121 struct redux_vec_unroller
00122 {
00123   enum {
00124     PacketSize = packet_traits<typename Derived::Scalar>::size,
00125     HalfLength = Length/2
00126   };
00127 
00128   typedef typename Derived::Scalar Scalar;
00129   typedef typename packet_traits<Scalar>::type PacketScalar;
00130 
00131   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func)
00132   {
00133     return func.packetOp(
00134             redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
00135             redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) );
00136   }
00137 };
00138 
00139 template<typename Func, typename Derived, int Start>
00140 struct redux_vec_unroller<Func, Derived, Start, 1>
00141 {
00142   enum {
00143     index = Start * packet_traits<typename Derived::Scalar>::size,
00144     outer = index / int(Derived::InnerSizeAtCompileTime),
00145     inner = index % int(Derived::InnerSizeAtCompileTime),
00146     alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned
00147   };
00148 
00149   typedef typename Derived::Scalar Scalar;
00150   typedef typename packet_traits<Scalar>::type PacketScalar;
00151 
00152   static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&)
00153   {
00154     return mat.template packetByOuterInner<alignment>(outer, inner);
00155   }
00156 };
00157 
00158 /***************************************************************************
00159 * Part 3 : implementation of all cases
00160 ***************************************************************************/
00161 
00162 template<typename Func, typename Derived,
00163          int Traversal = redux_traits<Func, Derived>::Traversal,
00164          int Unrolling = redux_traits<Func, Derived>::Unrolling
00165 >
00166 struct redux_impl;
00167 
00168 template<typename Func, typename Derived>
00169 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
00170 {
00171   typedef typename Derived::Scalar Scalar;
00172   typedef typename Derived::Index Index;
00173   static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
00174   {
00175     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
00176     Scalar res;
00177     res = mat.coeffByOuterInner(0, 0);
00178     for(Index i = 1; i < mat.innerSize(); ++i)
00179       res = func(res, mat.coeffByOuterInner(0, i));
00180     for(Index i = 1; i < mat.outerSize(); ++i)
00181       for(Index j = 0; j < mat.innerSize(); ++j)
00182         res = func(res, mat.coeffByOuterInner(i, j));
00183     return res;
00184   }
00185 };
00186 
00187 template<typename Func, typename Derived>
00188 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling>
00189   : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime>
00190 {};
00191 
00192 template<typename Func, typename Derived>
00193 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
00194 {
00195   typedef typename Derived::Scalar Scalar;
00196   typedef typename packet_traits<Scalar>::type PacketScalar;
00197   typedef typename Derived::Index Index;
00198 
00199   static Scalar run(const Derived& mat, const Func& func)
00200   {
00201     const Index size = mat.size();
00202     eigen_assert(size && "you are using an empty matrix");
00203     const Index packetSize = packet_traits<Scalar>::size;
00204     const Index alignedStart = internal::first_aligned(mat);
00205     enum {
00206       alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit)
00207                 ? Aligned : Unaligned
00208     };
00209     const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize);
00210     const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize);
00211     const Index alignedEnd2 = alignedStart + alignedSize2;
00212     const Index alignedEnd  = alignedStart + alignedSize;
00213     Scalar res;
00214     if(alignedSize)
00215     {
00216       PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart);
00217       if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop
00218       {
00219         PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize);
00220         for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize)
00221         {
00222           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index));
00223           packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize));
00224         }
00225 
00226         packet_res0 = func.packetOp(packet_res0,packet_res1);
00227         if(alignedEnd>alignedEnd2)
00228           packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2));
00229       }
00230       res = func.predux(packet_res0);
00231 
00232       for(Index index = 0; index < alignedStart; ++index)
00233         res = func(res,mat.coeff(index));
00234 
00235       for(Index index = alignedEnd; index < size; ++index)
00236         res = func(res,mat.coeff(index));
00237     }
00238     else // too small to vectorize anything.
00239          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
00240     {
00241       res = mat.coeff(0);
00242       for(Index index = 1; index < size; ++index)
00243         res = func(res,mat.coeff(index));
00244     }
00245 
00246     return res;
00247   }
00248 };
00249 
00250 // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
00251 template<typename Func, typename Derived, int Unrolling>
00252 struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
00253 {
00254   typedef typename Derived::Scalar Scalar;
00255   typedef typename packet_traits<Scalar>::type PacketScalar;
00256   typedef typename Derived::Index Index;
00257 
00258   static Scalar run(const Derived& mat, const Func& func)
00259   {
00260     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
00261     const Index innerSize = mat.innerSize();
00262     const Index outerSize = mat.outerSize();
00263     enum {
00264       packetSize = packet_traits<Scalar>::size
00265     };
00266     const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize;
00267     Scalar res;
00268     if(packetedInnerSize)
00269     {
00270       PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
00271       for(Index j=0; j<outerSize; ++j)
00272         for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize))
00273           packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i));
00274 
00275       res = func.predux(packet_res);
00276       for(Index j=0; j<outerSize; ++j)
00277         for(Index i=packetedInnerSize; i<innerSize; ++i)
00278           res = func(res, mat.coeffByOuterInner(j,i));
00279     }
00280     else // too small to vectorize anything.
00281          // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
00282     {
00283       res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func);
00284     }
00285 
00286     return res;
00287   }
00288 };
00289 
00290 template<typename Func, typename Derived>
00291 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling>
00292 {
00293   typedef typename Derived::Scalar Scalar;
00294   typedef typename packet_traits<Scalar>::type PacketScalar;
00295   enum {
00296     PacketSize = packet_traits<Scalar>::size,
00297     Size = Derived::SizeAtCompileTime,
00298     VectorizedSize = (Size / PacketSize) * PacketSize
00299   };
00300   static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
00301   {
00302     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
00303     Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func));
00304     if (VectorizedSize != Size)
00305       res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func));
00306     return res;
00307   }
00308 };
00309 
00310 } // end namespace internal
00311 
00312 /***************************************************************************
00313 * Part 4 : public API
00314 ***************************************************************************/
00315 
00316 
00317 /** \returns the result of a full redux operation on the whole matrix or vector using \a func
00318   *
00319   * The template parameter \a BinaryOp is the type of the functor \a func which must be
00320   * an associative operator. Both current STL and TR1 functor styles are handled.
00321   *
00322   * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise()
00323   */
00324 template<typename Derived>
00325 template<typename Func>
00326 EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type
00327 DenseBase<Derived>::redux(const Func& func) const
00328 {
00329   typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested;
00330   return internal::redux_impl<Func, ThisNested>
00331             ::run(derived(), func);
00332 }
00333 
00334 /** \returns the minimum of all coefficients of \c *this.
00335   * \warning the result is undefined if \c *this contains NaN.
00336   */
00337 template<typename Derived>
00338 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00339 DenseBase<Derived>::minCoeff() const
00340 {
00341   return this->redux(Eigen::internal::scalar_min_op<Scalar>());
00342 }
00343 
00344 /** \returns the maximum of all coefficients of \c *this.
00345   * \warning the result is undefined if \c *this contains NaN.
00346   */
00347 template<typename Derived>
00348 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00349 DenseBase<Derived>::maxCoeff() const
00350 {
00351   return this->redux(Eigen::internal::scalar_max_op<Scalar>());
00352 }
00353 
00354 /** \returns the sum of all coefficients of *this
00355   *
00356   * \sa trace(), prod(), mean()
00357   */
00358 template<typename Derived>
00359 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00360 DenseBase<Derived>::sum() const
00361 {
00362   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
00363     return Scalar(0);
00364   return this->redux(Eigen::internal::scalar_sum_op<Scalar>());
00365 }
00366 
00367 /** \returns the mean of all coefficients of *this
00368 *
00369 * \sa trace(), prod(), sum()
00370 */
00371 template<typename Derived>
00372 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00373 DenseBase<Derived>::mean() const
00374 {
00375   return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
00376 }
00377 
00378 /** \returns the product of all coefficients of *this
00379   *
00380   * Example: \include MatrixBase_prod.cpp
00381   * Output: \verbinclude MatrixBase_prod.out
00382   *
00383   * \sa sum(), mean(), trace()
00384   */
00385 template<typename Derived>
00386 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00387 DenseBase<Derived>::prod() const
00388 {
00389   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
00390     return Scalar(1);
00391   return this->redux(Eigen::internal::scalar_product_op<Scalar>());
00392 }
00393 
00394 /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal.
00395   *
00396   * \c *this can be any matrix, not necessarily square.
00397   *
00398   * \sa diagonal(), sum()
00399   */
00400 template<typename Derived>
00401 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
00402 MatrixBase<Derived>::trace() const
00403 {
00404   return derived().diagonal ().sum();
00405 }
00406 
00407 } // end namespace Eigen
00408 
00409 #endif // EIGEN_REDUX_H
Repository toolbox

Repository details

Type:	Library
Created:	17 Nov 2022
Imports:	1
Forks:	0
Commits:	2
Dependents:	0
Dependencies:	0
Followers:	1
Important changes to repositories hosted on mbed.com

Redux.h

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

Redux.h

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning