Eigen libary for mbed
src/Core/StableNorm.h@1:3b8049da21b8, 2019-09-24 (annotated)
- Committer:
- jsoh91
- Date:
- Tue Sep 24 00:18:23 2019 +0000
- Revision:
- 1:3b8049da21b8
- Parent:
- 0:13a5d365ba16
ignore and revise some of error parts
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
ykuroda | 0:13a5d365ba16 | 1 | // This file is part of Eigen, a lightweight C++ template library |
ykuroda | 0:13a5d365ba16 | 2 | // for linear algebra. |
ykuroda | 0:13a5d365ba16 | 3 | // |
ykuroda | 0:13a5d365ba16 | 4 | // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr> |
ykuroda | 0:13a5d365ba16 | 5 | // |
ykuroda | 0:13a5d365ba16 | 6 | // This Source Code Form is subject to the terms of the Mozilla |
ykuroda | 0:13a5d365ba16 | 7 | // Public License v. 2.0. If a copy of the MPL was not distributed |
ykuroda | 0:13a5d365ba16 | 8 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
ykuroda | 0:13a5d365ba16 | 9 | |
ykuroda | 0:13a5d365ba16 | 10 | #ifndef EIGEN_STABLENORM_H |
ykuroda | 0:13a5d365ba16 | 11 | #define EIGEN_STABLENORM_H |
ykuroda | 0:13a5d365ba16 | 12 | |
ykuroda | 0:13a5d365ba16 | 13 | namespace Eigen { |
ykuroda | 0:13a5d365ba16 | 14 | |
ykuroda | 0:13a5d365ba16 | 15 | namespace internal { |
ykuroda | 0:13a5d365ba16 | 16 | |
ykuroda | 0:13a5d365ba16 | 17 | template<typename ExpressionType, typename Scalar> |
ykuroda | 0:13a5d365ba16 | 18 | inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale) |
ykuroda | 0:13a5d365ba16 | 19 | { |
ykuroda | 0:13a5d365ba16 | 20 | using std::max; |
ykuroda | 0:13a5d365ba16 | 21 | Scalar maxCoeff = bl.cwiseAbs().maxCoeff(); |
ykuroda | 0:13a5d365ba16 | 22 | |
ykuroda | 0:13a5d365ba16 | 23 | if (maxCoeff>scale) |
ykuroda | 0:13a5d365ba16 | 24 | { |
ykuroda | 0:13a5d365ba16 | 25 | ssq = ssq * numext::abs2(scale/maxCoeff); |
ykuroda | 0:13a5d365ba16 | 26 | Scalar tmp = Scalar(1)/maxCoeff; |
ykuroda | 0:13a5d365ba16 | 27 | if(tmp > NumTraits<Scalar>::highest()) |
ykuroda | 0:13a5d365ba16 | 28 | { |
ykuroda | 0:13a5d365ba16 | 29 | invScale = NumTraits<Scalar>::highest(); |
ykuroda | 0:13a5d365ba16 | 30 | scale = Scalar(1)/invScale; |
ykuroda | 0:13a5d365ba16 | 31 | } |
ykuroda | 0:13a5d365ba16 | 32 | else |
ykuroda | 0:13a5d365ba16 | 33 | { |
ykuroda | 0:13a5d365ba16 | 34 | scale = maxCoeff; |
ykuroda | 0:13a5d365ba16 | 35 | invScale = tmp; |
ykuroda | 0:13a5d365ba16 | 36 | } |
ykuroda | 0:13a5d365ba16 | 37 | } |
ykuroda | 0:13a5d365ba16 | 38 | |
ykuroda | 0:13a5d365ba16 | 39 | // TODO if the maxCoeff is much much smaller than the current scale, |
ykuroda | 0:13a5d365ba16 | 40 | // then we can neglect this sub vector |
ykuroda | 0:13a5d365ba16 | 41 | if(scale>Scalar(0)) // if scale==0, then bl is 0 |
ykuroda | 0:13a5d365ba16 | 42 | ssq += (bl*invScale).squaredNorm(); |
ykuroda | 0:13a5d365ba16 | 43 | } |
ykuroda | 0:13a5d365ba16 | 44 | |
ykuroda | 0:13a5d365ba16 | 45 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 46 | inline typename NumTraits<typename traits<Derived>::Scalar>::Real |
ykuroda | 0:13a5d365ba16 | 47 | blueNorm_impl(const EigenBase<Derived>& _vec) |
ykuroda | 0:13a5d365ba16 | 48 | { |
ykuroda | 0:13a5d365ba16 | 49 | typedef typename Derived::RealScalar RealScalar; |
ykuroda | 0:13a5d365ba16 | 50 | typedef typename Derived::Index Index; |
ykuroda | 0:13a5d365ba16 | 51 | using std::pow; |
ykuroda | 0:13a5d365ba16 | 52 | using std::min; |
ykuroda | 0:13a5d365ba16 | 53 | using std::max; |
ykuroda | 0:13a5d365ba16 | 54 | using std::sqrt; |
ykuroda | 0:13a5d365ba16 | 55 | using std::abs; |
ykuroda | 0:13a5d365ba16 | 56 | const Derived& vec(_vec.derived()); |
ykuroda | 0:13a5d365ba16 | 57 | static bool initialized = false; |
ykuroda | 0:13a5d365ba16 | 58 | static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr; |
ykuroda | 0:13a5d365ba16 | 59 | if(!initialized) |
ykuroda | 0:13a5d365ba16 | 60 | { |
ykuroda | 0:13a5d365ba16 | 61 | int ibeta, it, iemin, iemax, iexp; |
ykuroda | 0:13a5d365ba16 | 62 | RealScalar eps; |
ykuroda | 0:13a5d365ba16 | 63 | // This program calculates the machine-dependent constants |
ykuroda | 0:13a5d365ba16 | 64 | // bl, b2, slm, s2m, relerr overfl |
ykuroda | 0:13a5d365ba16 | 65 | // from the "basic" machine-dependent numbers |
ykuroda | 0:13a5d365ba16 | 66 | // nbig, ibeta, it, iemin, iemax, rbig. |
ykuroda | 0:13a5d365ba16 | 67 | // The following define the basic machine-dependent constants. |
ykuroda | 0:13a5d365ba16 | 68 | // For portability, the PORT subprograms "ilmaeh" and "rlmach" |
ykuroda | 0:13a5d365ba16 | 69 | // are used. For any specific computer, each of the assignment |
ykuroda | 0:13a5d365ba16 | 70 | // statements can be replaced |
ykuroda | 0:13a5d365ba16 | 71 | ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers |
ykuroda | 0:13a5d365ba16 | 72 | it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa |
ykuroda | 0:13a5d365ba16 | 73 | iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent |
ykuroda | 0:13a5d365ba16 | 74 | iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent |
ykuroda | 0:13a5d365ba16 | 75 | rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number |
ykuroda | 0:13a5d365ba16 | 76 | |
ykuroda | 0:13a5d365ba16 | 77 | iexp = -((1-iemin)/2); |
ykuroda | 0:13a5d365ba16 | 78 | b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange |
ykuroda | 0:13a5d365ba16 | 79 | iexp = (iemax + 1 - it)/2; |
ykuroda | 0:13a5d365ba16 | 80 | b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange |
ykuroda | 0:13a5d365ba16 | 81 | |
ykuroda | 0:13a5d365ba16 | 82 | iexp = (2-iemin)/2; |
ykuroda | 0:13a5d365ba16 | 83 | s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range |
ykuroda | 0:13a5d365ba16 | 84 | iexp = - ((iemax+it)/2); |
ykuroda | 0:13a5d365ba16 | 85 | s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range |
ykuroda | 0:13a5d365ba16 | 86 | |
ykuroda | 0:13a5d365ba16 | 87 | overfl = rbig*s2m; // overflow boundary for abig |
ykuroda | 0:13a5d365ba16 | 88 | eps = RealScalar(pow(double(ibeta), 1-it)); |
ykuroda | 0:13a5d365ba16 | 89 | relerr = sqrt(eps); // tolerance for neglecting asml |
ykuroda | 0:13a5d365ba16 | 90 | initialized = true; |
ykuroda | 0:13a5d365ba16 | 91 | } |
ykuroda | 0:13a5d365ba16 | 92 | Index n = vec.size(); |
ykuroda | 0:13a5d365ba16 | 93 | RealScalar ab2 = b2 / RealScalar(n); |
ykuroda | 0:13a5d365ba16 | 94 | RealScalar asml = RealScalar(0); |
ykuroda | 0:13a5d365ba16 | 95 | RealScalar amed = RealScalar(0); |
ykuroda | 0:13a5d365ba16 | 96 | RealScalar abig = RealScalar(0); |
ykuroda | 0:13a5d365ba16 | 97 | for(typename Derived::InnerIterator it(vec, 0); it; ++it) |
ykuroda | 0:13a5d365ba16 | 98 | { |
ykuroda | 0:13a5d365ba16 | 99 | RealScalar ax = abs(it.value()); |
ykuroda | 0:13a5d365ba16 | 100 | if(ax > ab2) abig += numext::abs2(ax*s2m); |
ykuroda | 0:13a5d365ba16 | 101 | else if(ax < b1) asml += numext::abs2(ax*s1m); |
ykuroda | 0:13a5d365ba16 | 102 | else amed += numext::abs2(ax); |
ykuroda | 0:13a5d365ba16 | 103 | } |
ykuroda | 0:13a5d365ba16 | 104 | if(abig > RealScalar(0)) |
ykuroda | 0:13a5d365ba16 | 105 | { |
ykuroda | 0:13a5d365ba16 | 106 | abig = sqrt(abig); |
ykuroda | 0:13a5d365ba16 | 107 | if(abig > overfl) |
ykuroda | 0:13a5d365ba16 | 108 | { |
ykuroda | 0:13a5d365ba16 | 109 | return rbig; |
ykuroda | 0:13a5d365ba16 | 110 | } |
ykuroda | 0:13a5d365ba16 | 111 | if(amed > RealScalar(0)) |
ykuroda | 0:13a5d365ba16 | 112 | { |
ykuroda | 0:13a5d365ba16 | 113 | abig = abig/s2m; |
ykuroda | 0:13a5d365ba16 | 114 | amed = sqrt(amed); |
ykuroda | 0:13a5d365ba16 | 115 | } |
ykuroda | 0:13a5d365ba16 | 116 | else |
ykuroda | 0:13a5d365ba16 | 117 | return abig/s2m; |
ykuroda | 0:13a5d365ba16 | 118 | } |
ykuroda | 0:13a5d365ba16 | 119 | else if(asml > RealScalar(0)) |
ykuroda | 0:13a5d365ba16 | 120 | { |
ykuroda | 0:13a5d365ba16 | 121 | if (amed > RealScalar(0)) |
ykuroda | 0:13a5d365ba16 | 122 | { |
ykuroda | 0:13a5d365ba16 | 123 | abig = sqrt(amed); |
ykuroda | 0:13a5d365ba16 | 124 | amed = sqrt(asml) / s1m; |
ykuroda | 0:13a5d365ba16 | 125 | } |
ykuroda | 0:13a5d365ba16 | 126 | else |
ykuroda | 0:13a5d365ba16 | 127 | return sqrt(asml)/s1m; |
ykuroda | 0:13a5d365ba16 | 128 | } |
ykuroda | 0:13a5d365ba16 | 129 | else |
ykuroda | 0:13a5d365ba16 | 130 | return sqrt(amed); |
ykuroda | 0:13a5d365ba16 | 131 | asml = (min)(abig, amed); |
ykuroda | 0:13a5d365ba16 | 132 | abig = (max)(abig, amed); |
ykuroda | 0:13a5d365ba16 | 133 | if(asml <= abig*relerr) |
ykuroda | 0:13a5d365ba16 | 134 | return abig; |
ykuroda | 0:13a5d365ba16 | 135 | else |
ykuroda | 0:13a5d365ba16 | 136 | return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig)); |
ykuroda | 0:13a5d365ba16 | 137 | } |
ykuroda | 0:13a5d365ba16 | 138 | |
ykuroda | 0:13a5d365ba16 | 139 | } // end namespace internal |
ykuroda | 0:13a5d365ba16 | 140 | |
ykuroda | 0:13a5d365ba16 | 141 | /** \returns the \em l2 norm of \c *this avoiding underflow and overflow. |
ykuroda | 0:13a5d365ba16 | 142 | * This version use a blockwise two passes algorithm: |
ykuroda | 0:13a5d365ba16 | 143 | * 1 - find the absolute largest coefficient \c s |
ykuroda | 0:13a5d365ba16 | 144 | * 2 - compute \f$ s \Vert \frac{*this}{s} \Vert \f$ in a standard way |
ykuroda | 0:13a5d365ba16 | 145 | * |
ykuroda | 0:13a5d365ba16 | 146 | * For architecture/scalar types supporting vectorization, this version |
ykuroda | 0:13a5d365ba16 | 147 | * is faster than blueNorm(). Otherwise the blueNorm() is much faster. |
ykuroda | 0:13a5d365ba16 | 148 | * |
ykuroda | 0:13a5d365ba16 | 149 | * \sa norm(), blueNorm(), hypotNorm() |
ykuroda | 0:13a5d365ba16 | 150 | */ |
ykuroda | 0:13a5d365ba16 | 151 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 152 | inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real |
ykuroda | 0:13a5d365ba16 | 153 | MatrixBase<Derived>::stableNorm() const |
ykuroda | 0:13a5d365ba16 | 154 | { |
ykuroda | 0:13a5d365ba16 | 155 | using std::min; |
ykuroda | 0:13a5d365ba16 | 156 | using std::sqrt; |
ykuroda | 0:13a5d365ba16 | 157 | const Index blockSize = 4096; |
ykuroda | 0:13a5d365ba16 | 158 | RealScalar scale(0); |
ykuroda | 0:13a5d365ba16 | 159 | RealScalar invScale(1); |
ykuroda | 0:13a5d365ba16 | 160 | RealScalar ssq(0); // sum of square |
ykuroda | 0:13a5d365ba16 | 161 | enum { |
ykuroda | 0:13a5d365ba16 | 162 | Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0 |
ykuroda | 0:13a5d365ba16 | 163 | }; |
ykuroda | 0:13a5d365ba16 | 164 | Index n = size(); |
ykuroda | 0:13a5d365ba16 | 165 | Index bi = internal::first_aligned(derived()); |
ykuroda | 0:13a5d365ba16 | 166 | if (bi>0) |
ykuroda | 0:13a5d365ba16 | 167 | internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale); |
ykuroda | 0:13a5d365ba16 | 168 | for (; bi<n; bi+=blockSize) |
ykuroda | 0:13a5d365ba16 | 169 | internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale); |
ykuroda | 0:13a5d365ba16 | 170 | return scale * sqrt(ssq); |
ykuroda | 0:13a5d365ba16 | 171 | } |
ykuroda | 0:13a5d365ba16 | 172 | |
ykuroda | 0:13a5d365ba16 | 173 | /** \returns the \em l2 norm of \c *this using the Blue's algorithm. |
ykuroda | 0:13a5d365ba16 | 174 | * A Portable Fortran Program to Find the Euclidean Norm of a Vector, |
ykuroda | 0:13a5d365ba16 | 175 | * ACM TOMS, Vol 4, Issue 1, 1978. |
ykuroda | 0:13a5d365ba16 | 176 | * |
ykuroda | 0:13a5d365ba16 | 177 | * For architecture/scalar types without vectorization, this version |
ykuroda | 0:13a5d365ba16 | 178 | * is much faster than stableNorm(). Otherwise the stableNorm() is faster. |
ykuroda | 0:13a5d365ba16 | 179 | * |
ykuroda | 0:13a5d365ba16 | 180 | * \sa norm(), stableNorm(), hypotNorm() |
ykuroda | 0:13a5d365ba16 | 181 | */ |
ykuroda | 0:13a5d365ba16 | 182 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 183 | inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real |
ykuroda | 0:13a5d365ba16 | 184 | MatrixBase<Derived>::blueNorm() const |
ykuroda | 0:13a5d365ba16 | 185 | { |
ykuroda | 0:13a5d365ba16 | 186 | return internal::blueNorm_impl(*this); |
ykuroda | 0:13a5d365ba16 | 187 | } |
ykuroda | 0:13a5d365ba16 | 188 | |
ykuroda | 0:13a5d365ba16 | 189 | /** \returns the \em l2 norm of \c *this avoiding undeflow and overflow. |
ykuroda | 0:13a5d365ba16 | 190 | * This version use a concatenation of hypot() calls, and it is very slow. |
ykuroda | 0:13a5d365ba16 | 191 | * |
ykuroda | 0:13a5d365ba16 | 192 | * \sa norm(), stableNorm() |
ykuroda | 0:13a5d365ba16 | 193 | */ |
ykuroda | 0:13a5d365ba16 | 194 | template<typename Derived> |
ykuroda | 0:13a5d365ba16 | 195 | inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real |
ykuroda | 0:13a5d365ba16 | 196 | MatrixBase<Derived>::hypotNorm() const |
ykuroda | 0:13a5d365ba16 | 197 | { |
ykuroda | 0:13a5d365ba16 | 198 | return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>()); |
ykuroda | 0:13a5d365ba16 | 199 | } |
ykuroda | 0:13a5d365ba16 | 200 | |
ykuroda | 0:13a5d365ba16 | 201 | } // end namespace Eigen |
ykuroda | 0:13a5d365ba16 | 202 | |
ykuroda | 0:13a5d365ba16 | 203 | #endif // EIGEN_STABLENORM_H |