Eigne Matrix Class Library
Dependents: MPC_current_control HydraulicControlBoard_SW AHRS Test_ekf ... more
src/Core/Redux.h@0:13a5d365ba16, 2016-10-13 (annotated)
- Committer:
- ykuroda
- Date:
- Thu Oct 13 04:07:23 2016 +0000
- Revision:
- 0:13a5d365ba16
First commint, Eigne Matrix Class Library
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
ykuroda | 0:13a5d365ba16 | 1 | // This file is part of Eigen, a lightweight C++ template library |
ykuroda | 0:13a5d365ba16 | 2 | // for linear algebra. |
ykuroda | 0:13a5d365ba16 | 3 | // |
ykuroda | 0:13a5d365ba16 | 4 | // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> |
ykuroda | 0:13a5d365ba16 | 5 | // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> |
ykuroda | 0:13a5d365ba16 | 6 | // |
ykuroda | 0:13a5d365ba16 | 7 | // This Source Code Form is subject to the terms of the Mozilla |
ykuroda | 0:13a5d365ba16 | 8 | // Public License v. 2.0. If a copy of the MPL was not distributed |
ykuroda | 0:13a5d365ba16 | 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
ykuroda | 0:13a5d365ba16 | 10 | |
ykuroda | 0:13a5d365ba16 | 11 | #ifndef EIGEN_REDUX_H |
ykuroda | 0:13a5d365ba16 | 12 | #define EIGEN_REDUX_H |
ykuroda | 0:13a5d365ba16 | 13 | |
ykuroda | 0:13a5d365ba16 | 14 | namespace Eigen { |
ykuroda | 0:13a5d365ba16 | 15 | |
ykuroda | 0:13a5d365ba16 | 16 | namespace internal { |
ykuroda | 0:13a5d365ba16 | 17 | |
ykuroda | 0:13a5d365ba16 | 18 | // TODO |
ykuroda | 0:13a5d365ba16 | 19 | // * implement other kind of vectorization |
ykuroda | 0:13a5d365ba16 | 20 | // * factorize code |
ykuroda | 0:13a5d365ba16 | 21 | |
ykuroda | 0:13a5d365ba16 | 22 | /*************************************************************************** |
ykuroda | 0:13a5d365ba16 | 23 | * Part 1 : the logic deciding a strategy for vectorization and unrolling |
ykuroda | 0:13a5d365ba16 | 24 | ***************************************************************************/ |
ykuroda | 0:13a5d365ba16 | 25 | |
ykuroda | 0:13a5d365ba16 | 26 | template<typename Func, typename Derived> |
ykuroda | 0:13a5d365ba16 | 27 | struct redux_traits |
ykuroda | 0:13a5d365ba16 | 28 | { |
ykuroda | 0:13a5d365ba16 | 29 | public: |
ykuroda | 0:13a5d365ba16 | 30 | enum { |
ykuroda | 0:13a5d365ba16 | 31 | PacketSize = packet_traits<typename Derived::Scalar>::size, |
ykuroda | 0:13a5d365ba16 | 32 | InnerMaxSize = int(Derived::IsRowMajor) |
ykuroda | 0:13a5d365ba16 | 33 | ? Derived::MaxColsAtCompileTime |
ykuroda | 0:13a5d365ba16 | 34 | : Derived::MaxRowsAtCompileTime |
ykuroda | 0:13a5d365ba16 | 35 | }; |
ykuroda | 0:13a5d365ba16 | 36 | |
ykuroda | 0:13a5d365ba16 | 37 | enum { |
ykuroda | 0:13a5d365ba16 | 38 | MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) |
ykuroda | 0:13a5d365ba16 | 39 | && (functor_traits<Func>::PacketAccess), |
ykuroda | 0:13a5d365ba16 | 40 | MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), |
ykuroda | 0:13a5d365ba16 | 41 | MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize |
ykuroda | 0:13a5d365ba16 | 42 | }; |
ykuroda | 0:13a5d365ba16 | 43 | |
ykuroda | 0:13a5d365ba16 | 44 | public: |
ykuroda | 0:13a5d365ba16 | 45 | enum { |
ykuroda | 0:13a5d365ba16 | 46 | Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) |
ykuroda | 0:13a5d365ba16 | 47 | : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) |
ykuroda | 0:13a5d365ba16 | 48 | : int(DefaultTraversal) |
ykuroda | 0:13a5d365ba16 | 49 | }; |
ykuroda | 0:13a5d365ba16 | 50 | |
ykuroda | 0:13a5d365ba16 | 51 | public: |
ykuroda | 0:13a5d365ba16 | 52 | enum { |
ykuroda | 0:13a5d365ba16 | 53 | Cost = ( Derived::SizeAtCompileTime == Dynamic |
ykuroda | 0:13a5d365ba16 | 54 | || Derived::CoeffReadCost == Dynamic |
ykuroda | 0:13a5d365ba16 | 55 | || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic) |
ykuroda | 0:13a5d365ba16 | 56 | ) ? Dynamic |
ykuroda | 0:13a5d365ba16 | 57 | : Derived::SizeAtCompileTime * Derived::CoeffReadCost |
ykuroda | 0:13a5d365ba16 | 58 | + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, |
ykuroda | 0:13a5d365ba16 | 59 | UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) |
ykuroda | 0:13a5d365ba16 | 60 | }; |
ykuroda | 0:13a5d365ba16 | 61 | |
ykuroda | 0:13a5d365ba16 | 62 | public: |
ykuroda | 0:13a5d365ba16 | 63 | enum { |
ykuroda | 0:13a5d365ba16 | 64 | Unrolling = Cost != Dynamic && Cost <= UnrollingLimit |
ykuroda | 0:13a5d365ba16 | 65 | ? CompleteUnrolling |
ykuroda | 0:13a5d365ba16 | 66 | : NoUnrolling |
ykuroda | 0:13a5d365ba16 | 67 | }; |
ykuroda | 0:13a5d365ba16 | 68 | }; |
ykuroda | 0:13a5d365ba16 | 69 | |
ykuroda | 0:13a5d365ba16 | 70 | /*************************************************************************** |
ykuroda | 0:13a5d365ba16 | 71 | * Part 2 : unrollers |
ykuroda | 0:13a5d365ba16 | 72 | ***************************************************************************/ |
ykuroda | 0:13a5d365ba16 | 73 | |
ykuroda | 0:13a5d365ba16 | 74 | /*** no vectorization ***/ |
ykuroda | 0:13a5d365ba16 | 75 | |
ykuroda | 0:13a5d365ba16 | 76 | template<typename Func, typename Derived, int Start, int Length> |
ykuroda | 0:13a5d365ba16 | 77 | struct redux_novec_unroller |
ykuroda | 0:13a5d365ba16 | 78 | { |
ykuroda | 0:13a5d365ba16 | 79 | enum { |
ykuroda | 0:13a5d365ba16 | 80 | HalfLength = Length/2 |
ykuroda | 0:13a5d365ba16 | 81 | }; |
ykuroda | 0:13a5d365ba16 | 82 | |
ykuroda | 0:13a5d365ba16 | 83 | typedef typename Derived::Scalar Scalar; |
ykuroda | 0:13a5d365ba16 | 84 | |
ykuroda | 0:13a5d365ba16 | 85 | static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) |
ykuroda | 0:13a5d365ba16 | 86 | { |
ykuroda | 0:13a5d365ba16 | 87 | return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), |
ykuroda | 0:13a5d365ba16 | 88 | redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func)); |
ykuroda | 0:13a5d365ba16 | 89 | } |
ykuroda | 0:13a5d365ba16 | 90 | }; |
ykuroda | 0:13a5d365ba16 | 91 | |
ykuroda | 0:13a5d365ba16 | 92 | template<typename Func, typename Derived, int Start> |
ykuroda | 0:13a5d365ba16 | 93 | struct redux_novec_unroller<Func, Derived, Start, 1> |
ykuroda | 0:13a5d365ba16 | 94 | { |
ykuroda | 0:13a5d365ba16 | 95 | enum { |
ykuroda | 0:13a5d365ba16 | 96 | outer = Start / Derived::InnerSizeAtCompileTime, |
ykuroda | 0:13a5d365ba16 | 97 | inner = Start % Derived::InnerSizeAtCompileTime |
ykuroda | 0:13a5d365ba16 | 98 | }; |
ykuroda | 0:13a5d365ba16 | 99 | |
ykuroda | 0:13a5d365ba16 | 100 | typedef typename Derived::Scalar Scalar; |
ykuroda | 0:13a5d365ba16 | 101 | |
ykuroda | 0:13a5d365ba16 | 102 | static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) |
ykuroda | 0:13a5d365ba16 | 103 | { |
ykuroda | 0:13a5d365ba16 | 104 | return mat.coeffByOuterInner(outer, inner); |
ykuroda | 0:13a5d365ba16 | 105 | } |
ykuroda | 0:13a5d365ba16 | 106 | }; |
ykuroda | 0:13a5d365ba16 | 107 | |
ykuroda | 0:13a5d365ba16 | 108 | // This is actually dead code and will never be called. It is required |
ykuroda | 0:13a5d365ba16 | 109 | // to prevent false warnings regarding failed inlining though |
ykuroda | 0:13a5d365ba16 | 110 | // for 0 length run() will never be called at all. |
ykuroda | 0:13a5d365ba16 | 111 | template<typename Func, typename Derived, int Start> |
ykuroda | 0:13a5d365ba16 | 112 | struct redux_novec_unroller<Func, Derived, Start, 0> |
ykuroda | 0:13a5d365ba16 | 113 | { |
ykuroda | 0:13a5d365ba16 | 114 | typedef typename Derived::Scalar Scalar; |
ykuroda | 0:13a5d365ba16 | 115 | static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } |
ykuroda | 0:13a5d365ba16 | 116 | }; |
ykuroda | 0:13a5d365ba16 | 117 | |
ykuroda | 0:13a5d365ba16 | 118 | /*** vectorization ***/ |
ykuroda | 0:13a5d365ba16 | 119 | |
ykuroda | 0:13a5d365ba16 | 120 | template<typename Func, typename Derived, int Start, int Length> |
ykuroda | 0:13a5d365ba16 | 121 | struct redux_vec_unroller |
ykuroda | 0:13a5d365ba16 | 122 | { |
ykuroda | 0:13a5d365ba16 | 123 | enum { |
ykuroda | 0:13a5d365ba16 | 124 | PacketSize = packet_traits<typename Derived::Scalar>::size, |
ykuroda | 0:13a5d365ba16 | 125 | HalfLength = Length/2 |
ykuroda | 0:13a5d365ba16 | 126 | }; |
ykuroda | 0:13a5d365ba16 | 127 | |
ykuroda | 0:13a5d365ba16 | 128 | typedef typename Derived::Scalar Scalar; |
ykuroda | 0:13a5d365ba16 | 129 | typedef typename packet_traits<Scalar>::type PacketScalar; |
ykuroda | 0:13a5d365ba16 | 130 | |
ykuroda | 0:13a5d365ba16 | 131 | static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) |
ykuroda | 0:13a5d365ba16 | 132 | { |
ykuroda | 0:13a5d365ba16 | 133 | return func.packetOp( |
ykuroda | 0:13a5d365ba16 | 134 | redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), |
ykuroda | 0:13a5d365ba16 | 135 | redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) ); |
ykuroda | 0:13a5d365ba16 | 136 | } |
ykuroda | 0:13a5d365ba16 | 137 | }; |
ykuroda | 0:13a5d365ba16 | 138 | |
ykuroda | 0:13a5d365ba16 | 139 | template<typename Func, typename Derived, int Start> |
ykuroda | 0:13a5d365ba16 | 140 | struct redux_vec_unroller<Func, Derived, Start, 1> |
ykuroda | 0:13a5d365ba16 | 141 | { |
ykuroda | 0:13a5d365ba16 | 142 | enum { |
ykuroda | 0:13a5d365ba16 | 143 | index = Start * packet_traits<typename Derived::Scalar>::size, |
ykuroda | 0:13a5d365ba16 | 144 | outer = index / int(Derived::InnerSizeAtCompileTime), |
ykuroda | 0:13a5d365ba16 | 145 | inner = index % int(Derived::InnerSizeAtCompileTime), |
ykuroda | 0:13a5d365ba16 | 146 | alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned |
ykuroda | 0:13a5d365ba16 | 147 | }; |
ykuroda | 0:13a5d365ba16 | 148 | |
ykuroda | 0:13a5d365ba16 | 149 | typedef typename Derived::Scalar Scalar; |
ykuroda | 0:13a5d365ba16 | 150 | typedef typename packet_traits<Scalar>::type PacketScalar; |
ykuroda | 0:13a5d365ba16 | 151 | |
ykuroda | 0:13a5d365ba16 | 152 | static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) |
ykuroda | 0:13a5d365ba16 | 153 | { |
ykuroda | 0:13a5d365ba16 | 154 | return mat.template packetByOuterInner<alignment>(outer, inner); |
ykuroda | 0:13a5d365ba16 | 155 | } |
ykuroda | 0:13a5d365ba16 | 156 | }; |
ykuroda | 0:13a5d365ba16 | 157 | |
ykuroda | 0:13a5d365ba16 | 158 | /*************************************************************************** |
ykuroda | 0:13a5d365ba16 | 159 | * Part 3 : implementation of all cases |
ykuroda | 0:13a5d365ba16 | 160 | ***************************************************************************/ |
ykuroda | 0:13a5d365ba16 | 161 | |
ykuroda | 0:13a5d365ba16 | 162 | template<typename Func, typename Derived, |
ykuroda | 0:13a5d365ba16 | 163 | int Traversal = redux_traits<Func, Derived>::Traversal, |
ykuroda | 0:13a5d365ba16 | 164 | int Unrolling = redux_traits<Func, Derived>::Unrolling |
ykuroda | 0:13a5d365ba16 | 165 | > |
ykuroda | 0:13a5d365ba16 | 166 | struct redux_impl; |
ykuroda | 0:13a5d365ba16 | 167 | |
ykuroda | 0:13a5d365ba16 | 168 | template<typename Func, typename Derived> |
ykuroda | 0:13a5d365ba16 | 169 | struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> |
ykuroda | 0:13a5d365ba16 | 170 | { |
ykuroda | 0:13a5d365ba16 | 171 | typedef typename Derived::Scalar Scalar; |
ykuroda | 0:13a5d365ba16 | 172 | typedef typename Derived::Index Index; |
ykuroda | 0:13a5d365ba16 | 173 | static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) |
ykuroda | 0:13a5d365ba16 | 174 | { |
ykuroda | 0:13a5d365ba16 | 175 | eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); |
ykuroda | 0:13a5d365ba16 | 176 | Scalar res; |
ykuroda | 0:13a5d365ba16 | 177 | res = mat.coeffByOuterInner(0, 0); |
ykuroda | 0:13a5d365ba16 | 178 | for(Index i = 1; i < mat.innerSize(); ++i) |
ykuroda | 0:13a5d365ba16 | 179 | res = func(res, mat.coeffByOuterInner(0, i)); |
ykuroda | 0:13a5d365ba16 | 180 | for(Index i = 1; i < mat.outerSize(); ++i) |
ykuroda | 0:13a5d365ba16 | 181 | for(Index j = 0; j < mat.innerSize(); ++j) |
ykuroda | 0:13a5d365ba16 | 182 | res = func(res, mat.coeffByOuterInner(i, j)); |
ykuroda | 0:13a5d365ba16 | 183 | return res; |
ykuroda | 0:13a5d365ba16 | 184 | } |
ykuroda | 0:13a5d365ba16 | 185 | }; |
ykuroda | 0:13a5d365ba16 | 186 | |
ykuroda | 0:13a5d365ba16 | 187 | template<typename Func, typename Derived> |
ykuroda | 0:13a5d365ba16 | 188 | struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> |
ykuroda | 0:13a5d365ba16 | 189 | : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> |
ykuroda | 0:13a5d365ba16 | 190 | {}; |
ykuroda | 0:13a5d365ba16 | 191 | |
ykuroda | 0:13a5d365ba16 | 192 | template<typename Func, typename Derived> |
ykuroda | 0:13a5d365ba16 | 193 | struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> |
ykuroda | 0:13a5d365ba16 | 194 | { |
ykuroda | 0:13a5d365ba16 | 195 | typedef typename Derived::Scalar Scalar; |
ykuroda | 0:13a5d365ba16 | 196 | typedef typename packet_traits<Scalar>::type PacketScalar; |
ykuroda | 0:13a5d365ba16 | 197 | typedef typename Derived::Index Index; |
ykuroda | 0:13a5d365ba16 | 198 | |
ykuroda | 0:13a5d365ba16 | 199 | static Scalar run(const Derived& mat, const Func& func) |
ykuroda | 0:13a5d365ba16 | 200 | { |
ykuroda | 0:13a5d365ba16 | 201 | const Index size = mat.size(); |
ykuroda | 0:13a5d365ba16 | 202 | eigen_assert(size && "you are using an empty matrix"); |
ykuroda | 0:13a5d365ba16 | 203 | const Index packetSize = packet_traits<Scalar>::size; |
ykuroda | 0:13a5d365ba16 | 204 | const Index alignedStart = internal::first_aligned(mat); |
ykuroda | 0:13a5d365ba16 | 205 | enum { |
ykuroda | 0:13a5d365ba16 | 206 | alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit) |
ykuroda | 0:13a5d365ba16 | 207 | ? Aligned : Unaligned |
ykuroda | 0:13a5d365ba16 | 208 | }; |
ykuroda | 0:13a5d365ba16 | 209 | const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); |
ykuroda | 0:13a5d365ba16 | 210 | const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); |
ykuroda | 0:13a5d365ba16 | 211 | const Index alignedEnd2 = alignedStart + alignedSize2; |
ykuroda | 0:13a5d365ba16 | 212 | const Index alignedEnd = alignedStart + alignedSize; |
ykuroda | 0:13a5d365ba16 | 213 | Scalar res; |
ykuroda | 0:13a5d365ba16 | 214 | if(alignedSize) |
ykuroda | 0:13a5d365ba16 | 215 | { |
ykuroda | 0:13a5d365ba16 | 216 | PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart); |
ykuroda | 0:13a5d365ba16 | 217 | if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop |
ykuroda | 0:13a5d365ba16 | 218 | { |
ykuroda | 0:13a5d365ba16 | 219 | PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize); |
ykuroda | 0:13a5d365ba16 | 220 | for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) |
ykuroda | 0:13a5d365ba16 | 221 | { |
ykuroda | 0:13a5d365ba16 | 222 | packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index)); |
ykuroda | 0:13a5d365ba16 | 223 | packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize)); |
ykuroda | 0:13a5d365ba16 | 224 | } |
ykuroda | 0:13a5d365ba16 | 225 | |
ykuroda | 0:13a5d365ba16 | 226 | packet_res0 = func.packetOp(packet_res0,packet_res1); |
ykuroda | 0:13a5d365ba16 | 227 | if(alignedEnd>alignedEnd2) |
ykuroda | 0:13a5d365ba16 | 228 | packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2)); |
ykuroda | 0:13a5d365ba16 | 229 | } |
ykuroda | 0:13a5d365ba16 | 230 | res = func.predux(packet_res0); |
ykuroda | 0:13a5d365ba16 | 231 | |
ykuroda | 0:13a5d365ba16 | 232 | for(Index index = 0; index < alignedStart; ++index) |
ykuroda | 0:13a5d365ba16 | 233 | res = func(res,mat.coeff(index)); |
ykuroda | 0:13a5d365ba16 | 234 | |
ykuroda | 0:13a5d365ba16 | 235 | for(Index index = alignedEnd; index < size; ++index) |
ykuroda | 0:13a5d365ba16 | 236 | res = func(res,mat.coeff(index)); |
ykuroda | 0:13a5d365ba16 | 237 | } |
ykuroda | 0:13a5d365ba16 | 238 | else // too small to vectorize anything. |
ykuroda | 0:13a5d365ba16 | 239 | // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. |
ykuroda | 0:13a5d365ba16 | 240 | { |
ykuroda | 0:13a5d365ba16 | 241 | res = mat.coeff(0); |
ykuroda | 0:13a5d365ba16 | 242 | for(Index index = 1; index < size; ++index) |
ykuroda | 0:13a5d365ba16 | 243 | res = func(res,mat.coeff(index)); |
ykuroda | 0:13a5d365ba16 | 244 | } |
ykuroda | 0:13a5d365ba16 | 245 | |
ykuroda | 0:13a5d365ba16 | 246 | return res; |
ykuroda | 0:13a5d365ba16 | 247 | } |
ykuroda | 0:13a5d365ba16 | 248 | }; |
ykuroda | 0:13a5d365ba16 | 249 | |
ykuroda | 0:13a5d365ba16 | 250 | // NOTE: for SliceVectorizedTraversal we simply bypass unrolling |
ykuroda | 0:13a5d365ba16 | 251 | template<typename Func, typename Derived, int Unrolling> |
ykuroda | 0:13a5d365ba16 | 252 | struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling> |
ykuroda | 0:13a5d365ba16 | 253 | { |
ykuroda | 0:13a5d365ba16 | 254 | typedef typename Derived::Scalar Scalar; |
ykuroda | 0:13a5d365ba16 | 255 | typedef typename packet_traits<Scalar>::type PacketScalar; |
ykuroda | 0:13a5d365ba16 | 256 | typedef typename Derived::Index Index; |
ykuroda | 0:13a5d365ba16 | 257 | |
ykuroda | 0:13a5d365ba16 | 258 | static Scalar run(const Derived& mat, const Func& func) |
ykuroda | 0:13a5d365ba16 | 259 | { |
ykuroda | 0:13a5d365ba16 | 260 | eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); |
ykuroda | 0:13a5d365ba16 | 261 | const Index innerSize = mat.innerSize(); |
ykuroda | 0:13a5d365ba16 | 262 | const Index outerSize = mat.outerSize(); |
ykuroda | 0:13a5d365ba16 | 263 | enum { |
ykuroda | 0:13a5d365ba16 | 264 | packetSize = packet_traits<Scalar>::size |
ykuroda | 0:13a5d365ba16 | 265 | }; |
ykuroda | 0:13a5d365ba16 | 266 | const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; |
ykuroda | 0:13a5d365ba16 | 267 | Scalar res; |
ykuroda | 0:13a5d365ba16 | 268 | if(packetedInnerSize) |
ykuroda | 0:13a5d365ba16 | 269 | { |
ykuroda | 0:13a5d365ba16 | 270 | PacketScalar packet_res = mat.template packet<Unaligned>(0,0); |
ykuroda | 0:13a5d365ba16 | 271 | for(Index j=0; j<outerSize; ++j) |
ykuroda | 0:13a5d365ba16 | 272 | for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize)) |
ykuroda | 0:13a5d365ba16 | 273 | packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i)); |
ykuroda | 0:13a5d365ba16 | 274 | |
ykuroda | 0:13a5d365ba16 | 275 | res = func.predux(packet_res); |
ykuroda | 0:13a5d365ba16 | 276 | for(Index j=0; j<outerSize; ++j) |
ykuroda | 0:13a5d365ba16 | 277 | for(Index i=packetedInnerSize; i<innerSize; ++i) |
ykuroda | 0:13a5d365ba16 | 278 | res = func(res, mat.coeffByOuterInner(j,i)); |
ykuroda | 0:13a5d365ba16 | 279 | } |
ykuroda | 0:13a5d365ba16 | 280 | else // too small to vectorize anything. |
ykuroda | 0:13a5d365ba16 | 281 | // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. |
ykuroda | 0:13a5d365ba16 | 282 | { |
ykuroda | 0:13a5d365ba16 | 283 | res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); |
ykuroda | 0:13a5d365ba16 | 284 | } |
ykuroda | 0:13a5d365ba16 | 285 | |
ykuroda | 0:13a5d365ba16 | 286 | return res; |
ykuroda | 0:13a5d365ba16 | 287 | } |
ykuroda | 0:13a5d365ba16 | 288 | }; |
ykuroda | 0:13a5d365ba16 | 289 | |
ykuroda | 0:13a5d365ba16 | 290 | template<typename Func, typename Derived> |
ykuroda | 0:13a5d365ba16 | 291 | struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> |
ykuroda | 0:13a5d365ba16 | 292 | { |
ykuroda | 0:13a5d365ba16 | 293 | typedef typename Derived::Scalar Scalar; |
ykuroda | 0:13a5d365ba16 | 294 | typedef typename packet_traits<Scalar>::type PacketScalar; |
ykuroda | 0:13a5d365ba16 | 295 | enum { |
ykuroda | 0:13a5d365ba16 | 296 | PacketSize = packet_traits<Scalar>::size, |
ykuroda | 0:13a5d365ba16 | 297 | Size = Derived::SizeAtCompileTime, |
ykuroda | 0:13a5d365ba16 | 298 | VectorizedSize = (Size / PacketSize) * PacketSize |
ykuroda | 0:13a5d365ba16 | 299 | }; |
ykuroda | 0:13a5d365ba16 | 300 | static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) |
ykuroda | 0:13a5d365ba16 | 301 | { |
ykuroda | 0:13a5d365ba16 | 302 | eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); |
ykuroda | 0:13a5d365ba16 | 303 | Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); |
ykuroda | 0:13a5d365ba16 | 304 | if (VectorizedSize != Size) |
ykuroda | 0:13a5d365ba16 | 305 | res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); |
ykuroda | 0:13a5d365ba16 | 306 | return res; |
ykuroda | 0:13a5d365ba16 | 307 | } |
ykuroda | 0:13a5d365ba16 | 308 | }; |
ykuroda | 0:13a5d365ba16 | 309 | |
ykuroda | 0:13a5d365ba16 | 310 | } // end namespace internal |
ykuroda | 0:13a5d365ba16 | 311 | |
ykuroda | 0:13a5d365ba16 | 312 | /*************************************************************************** |
ykuroda | 0:13a5d365ba16 | 313 | * Part 4 : public API |
ykuroda | 0:13a5d365ba16 | 314 | ***************************************************************************/ |
ykuroda | 0:13a5d365ba16 | 315 | |
ykuroda | 0:13a5d365ba16 | 316 | |
ykuroda | 0:13a5d365ba16 | 317 | /** \returns the result of a full redux operation on the whole matrix or vector using \a func |
ykuroda | 0:13a5d365ba16 | 318 | * |
ykuroda | 0:13a5d365ba16 | 319 | * The template parameter \a BinaryOp is the type of the functor \a func which must be |
ykuroda | 0:13a5d365ba16 | 320 | * an associative operator. Both current STL and TR1 functor styles are handled. |
ykuroda | 0:13a5d365ba16 | 321 | * |
ykuroda | 0:13a5d365ba16 | 322 | * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() |
ykuroda | 0:13a5d365ba16 | 323 | */ |
ykuroda | 0:13a5d365ba16 | 324 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 325 | template<typename Func> |
ykuroda | 0:13a5d365ba16 | 326 | EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type |
ykuroda | 0:13a5d365ba16 | 327 | DenseBase<Derived>::redux(const Func& func) const |
ykuroda | 0:13a5d365ba16 | 328 | { |
ykuroda | 0:13a5d365ba16 | 329 | typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested; |
ykuroda | 0:13a5d365ba16 | 330 | return internal::redux_impl<Func, ThisNested> |
ykuroda | 0:13a5d365ba16 | 331 | ::run(derived(), func); |
ykuroda | 0:13a5d365ba16 | 332 | } |
ykuroda | 0:13a5d365ba16 | 333 | |
ykuroda | 0:13a5d365ba16 | 334 | /** \returns the minimum of all coefficients of \c *this. |
ykuroda | 0:13a5d365ba16 | 335 | * \warning the result is undefined if \c *this contains NaN. |
ykuroda | 0:13a5d365ba16 | 336 | */ |
ykuroda | 0:13a5d365ba16 | 337 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 338 | EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar |
ykuroda | 0:13a5d365ba16 | 339 | DenseBase<Derived>::minCoeff() const |
ykuroda | 0:13a5d365ba16 | 340 | { |
ykuroda | 0:13a5d365ba16 | 341 | return this->redux(Eigen::internal::scalar_min_op<Scalar>()); |
ykuroda | 0:13a5d365ba16 | 342 | } |
ykuroda | 0:13a5d365ba16 | 343 | |
ykuroda | 0:13a5d365ba16 | 344 | /** \returns the maximum of all coefficients of \c *this. |
ykuroda | 0:13a5d365ba16 | 345 | * \warning the result is undefined if \c *this contains NaN. |
ykuroda | 0:13a5d365ba16 | 346 | */ |
ykuroda | 0:13a5d365ba16 | 347 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 348 | EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar |
ykuroda | 0:13a5d365ba16 | 349 | DenseBase<Derived>::maxCoeff() const |
ykuroda | 0:13a5d365ba16 | 350 | { |
ykuroda | 0:13a5d365ba16 | 351 | return this->redux(Eigen::internal::scalar_max_op<Scalar>()); |
ykuroda | 0:13a5d365ba16 | 352 | } |
ykuroda | 0:13a5d365ba16 | 353 | |
ykuroda | 0:13a5d365ba16 | 354 | /** \returns the sum of all coefficients of *this |
ykuroda | 0:13a5d365ba16 | 355 | * |
ykuroda | 0:13a5d365ba16 | 356 | * \sa trace(), prod(), mean() |
ykuroda | 0:13a5d365ba16 | 357 | */ |
ykuroda | 0:13a5d365ba16 | 358 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 359 | EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar |
ykuroda | 0:13a5d365ba16 | 360 | DenseBase<Derived>::sum() const |
ykuroda | 0:13a5d365ba16 | 361 | { |
ykuroda | 0:13a5d365ba16 | 362 | if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) |
ykuroda | 0:13a5d365ba16 | 363 | return Scalar(0); |
ykuroda | 0:13a5d365ba16 | 364 | return this->redux(Eigen::internal::scalar_sum_op<Scalar>()); |
ykuroda | 0:13a5d365ba16 | 365 | } |
ykuroda | 0:13a5d365ba16 | 366 | |
ykuroda | 0:13a5d365ba16 | 367 | /** \returns the mean of all coefficients of *this |
ykuroda | 0:13a5d365ba16 | 368 | * |
ykuroda | 0:13a5d365ba16 | 369 | * \sa trace(), prod(), sum() |
ykuroda | 0:13a5d365ba16 | 370 | */ |
ykuroda | 0:13a5d365ba16 | 371 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 372 | EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar |
ykuroda | 0:13a5d365ba16 | 373 | DenseBase<Derived>::mean() const |
ykuroda | 0:13a5d365ba16 | 374 | { |
ykuroda | 0:13a5d365ba16 | 375 | return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size()); |
ykuroda | 0:13a5d365ba16 | 376 | } |
ykuroda | 0:13a5d365ba16 | 377 | |
ykuroda | 0:13a5d365ba16 | 378 | /** \returns the product of all coefficients of *this |
ykuroda | 0:13a5d365ba16 | 379 | * |
ykuroda | 0:13a5d365ba16 | 380 | * Example: \include MatrixBase_prod.cpp |
ykuroda | 0:13a5d365ba16 | 381 | * Output: \verbinclude MatrixBase_prod.out |
ykuroda | 0:13a5d365ba16 | 382 | * |
ykuroda | 0:13a5d365ba16 | 383 | * \sa sum(), mean(), trace() |
ykuroda | 0:13a5d365ba16 | 384 | */ |
ykuroda | 0:13a5d365ba16 | 385 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 386 | EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar |
ykuroda | 0:13a5d365ba16 | 387 | DenseBase<Derived>::prod() const |
ykuroda | 0:13a5d365ba16 | 388 | { |
ykuroda | 0:13a5d365ba16 | 389 | if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) |
ykuroda | 0:13a5d365ba16 | 390 | return Scalar(1); |
ykuroda | 0:13a5d365ba16 | 391 | return this->redux(Eigen::internal::scalar_product_op<Scalar>()); |
ykuroda | 0:13a5d365ba16 | 392 | } |
ykuroda | 0:13a5d365ba16 | 393 | |
ykuroda | 0:13a5d365ba16 | 394 | /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. |
ykuroda | 0:13a5d365ba16 | 395 | * |
ykuroda | 0:13a5d365ba16 | 396 | * \c *this can be any matrix, not necessarily square. |
ykuroda | 0:13a5d365ba16 | 397 | * |
ykuroda | 0:13a5d365ba16 | 398 | * \sa diagonal(), sum() |
ykuroda | 0:13a5d365ba16 | 399 | */ |
ykuroda | 0:13a5d365ba16 | 400 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 401 | EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar |
ykuroda | 0:13a5d365ba16 | 402 | MatrixBase<Derived>::trace() const |
ykuroda | 0:13a5d365ba16 | 403 | { |
ykuroda | 0:13a5d365ba16 | 404 | return derived().diagonal().sum(); |
ykuroda | 0:13a5d365ba16 | 405 | } |
ykuroda | 0:13a5d365ba16 | 406 | |
ykuroda | 0:13a5d365ba16 | 407 | } // end namespace Eigen |
ykuroda | 0:13a5d365ba16 | 408 | |
ykuroda | 0:13a5d365ba16 | 409 | #endif // EIGEN_REDUX_H |