User | Revision | Line number | New contents of line |
ykuroda |
0:13a5d365ba16
|
1
|
// This file is part of Eigen, a lightweight C++ template library
|
ykuroda |
0:13a5d365ba16
|
2
|
// for linear algebra.
|
ykuroda |
0:13a5d365ba16
|
3
|
//
|
ykuroda |
0:13a5d365ba16
|
4
|
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
ykuroda |
0:13a5d365ba16
|
5
|
//
|
ykuroda |
0:13a5d365ba16
|
6
|
// This Source Code Form is subject to the terms of the Mozilla
|
ykuroda |
0:13a5d365ba16
|
7
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
ykuroda |
0:13a5d365ba16
|
8
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
ykuroda |
0:13a5d365ba16
|
9
|
|
ykuroda |
0:13a5d365ba16
|
10
|
#ifndef EIGEN_STABLENORM_H
|
ykuroda |
0:13a5d365ba16
|
11
|
#define EIGEN_STABLENORM_H
|
ykuroda |
0:13a5d365ba16
|
12
|
|
ykuroda |
0:13a5d365ba16
|
13
|
namespace Eigen {
|
ykuroda |
0:13a5d365ba16
|
14
|
|
ykuroda |
0:13a5d365ba16
|
15
|
namespace internal {
|
ykuroda |
0:13a5d365ba16
|
16
|
|
ykuroda |
0:13a5d365ba16
|
17
|
template<typename ExpressionType, typename Scalar>
|
ykuroda |
0:13a5d365ba16
|
18
|
inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
|
ykuroda |
0:13a5d365ba16
|
19
|
{
|
ykuroda |
0:13a5d365ba16
|
20
|
using std::max;
|
ykuroda |
0:13a5d365ba16
|
21
|
Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
|
ykuroda |
0:13a5d365ba16
|
22
|
|
ykuroda |
0:13a5d365ba16
|
23
|
if (maxCoeff>scale)
|
ykuroda |
0:13a5d365ba16
|
24
|
{
|
ykuroda |
0:13a5d365ba16
|
25
|
ssq = ssq * numext::abs2(scale/maxCoeff);
|
ykuroda |
0:13a5d365ba16
|
26
|
Scalar tmp = Scalar(1)/maxCoeff;
|
ykuroda |
0:13a5d365ba16
|
27
|
if(tmp > NumTraits<Scalar>::highest())
|
ykuroda |
0:13a5d365ba16
|
28
|
{
|
ykuroda |
0:13a5d365ba16
|
29
|
invScale = NumTraits<Scalar>::highest();
|
ykuroda |
0:13a5d365ba16
|
30
|
scale = Scalar(1)/invScale;
|
ykuroda |
0:13a5d365ba16
|
31
|
}
|
ykuroda |
0:13a5d365ba16
|
32
|
else
|
ykuroda |
0:13a5d365ba16
|
33
|
{
|
ykuroda |
0:13a5d365ba16
|
34
|
scale = maxCoeff;
|
ykuroda |
0:13a5d365ba16
|
35
|
invScale = tmp;
|
ykuroda |
0:13a5d365ba16
|
36
|
}
|
ykuroda |
0:13a5d365ba16
|
37
|
}
|
ykuroda |
0:13a5d365ba16
|
38
|
|
ykuroda |
0:13a5d365ba16
|
39
|
// TODO if the maxCoeff is much much smaller than the current scale,
|
ykuroda |
0:13a5d365ba16
|
40
|
// then we can neglect this sub vector
|
ykuroda |
0:13a5d365ba16
|
41
|
if(scale>Scalar(0)) // if scale==0, then bl is 0
|
ykuroda |
0:13a5d365ba16
|
42
|
ssq += (bl*invScale).squaredNorm();
|
ykuroda |
0:13a5d365ba16
|
43
|
}
|
ykuroda |
0:13a5d365ba16
|
44
|
|
ykuroda |
0:13a5d365ba16
|
45
|
template<typename Derived>
|
ykuroda |
0:13a5d365ba16
|
46
|
inline typename NumTraits<typename traits<Derived>::Scalar>::Real
|
ykuroda |
0:13a5d365ba16
|
47
|
blueNorm_impl(const EigenBase<Derived>& _vec)
|
ykuroda |
0:13a5d365ba16
|
48
|
{
|
ykuroda |
0:13a5d365ba16
|
49
|
typedef typename Derived::RealScalar RealScalar;
|
ykuroda |
0:13a5d365ba16
|
50
|
typedef typename Derived::Index Index;
|
ykuroda |
0:13a5d365ba16
|
51
|
using std::pow;
|
ykuroda |
0:13a5d365ba16
|
52
|
using std::min;
|
ykuroda |
0:13a5d365ba16
|
53
|
using std::max;
|
ykuroda |
0:13a5d365ba16
|
54
|
using std::sqrt;
|
ykuroda |
0:13a5d365ba16
|
55
|
using std::abs;
|
ykuroda |
0:13a5d365ba16
|
56
|
const Derived& vec(_vec.derived());
|
ykuroda |
0:13a5d365ba16
|
57
|
static bool initialized = false;
|
ykuroda |
0:13a5d365ba16
|
58
|
static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
|
ykuroda |
0:13a5d365ba16
|
59
|
if(!initialized)
|
ykuroda |
0:13a5d365ba16
|
60
|
{
|
ykuroda |
0:13a5d365ba16
|
61
|
int ibeta, it, iemin, iemax, iexp;
|
ykuroda |
0:13a5d365ba16
|
62
|
RealScalar eps;
|
ykuroda |
0:13a5d365ba16
|
63
|
// This program calculates the machine-dependent constants
|
ykuroda |
0:13a5d365ba16
|
64
|
// bl, b2, slm, s2m, relerr overfl
|
ykuroda |
0:13a5d365ba16
|
65
|
// from the "basic" machine-dependent numbers
|
ykuroda |
0:13a5d365ba16
|
66
|
// nbig, ibeta, it, iemin, iemax, rbig.
|
ykuroda |
0:13a5d365ba16
|
67
|
// The following define the basic machine-dependent constants.
|
ykuroda |
0:13a5d365ba16
|
68
|
// For portability, the PORT subprograms "ilmaeh" and "rlmach"
|
ykuroda |
0:13a5d365ba16
|
69
|
// are used. For any specific computer, each of the assignment
|
ykuroda |
0:13a5d365ba16
|
70
|
// statements can be replaced
|
ykuroda |
0:13a5d365ba16
|
71
|
ibeta = std::numeric_limits<RealScalar>::radix; // base for floating-point numbers
|
ykuroda |
0:13a5d365ba16
|
72
|
it = std::numeric_limits<RealScalar>::digits; // number of base-beta digits in mantissa
|
ykuroda |
0:13a5d365ba16
|
73
|
iemin = std::numeric_limits<RealScalar>::min_exponent; // minimum exponent
|
ykuroda |
0:13a5d365ba16
|
74
|
iemax = std::numeric_limits<RealScalar>::max_exponent; // maximum exponent
|
ykuroda |
0:13a5d365ba16
|
75
|
rbig = (std::numeric_limits<RealScalar>::max)(); // largest floating-point number
|
ykuroda |
0:13a5d365ba16
|
76
|
|
ykuroda |
0:13a5d365ba16
|
77
|
iexp = -((1-iemin)/2);
|
ykuroda |
0:13a5d365ba16
|
78
|
b1 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // lower boundary of midrange
|
ykuroda |
0:13a5d365ba16
|
79
|
iexp = (iemax + 1 - it)/2;
|
ykuroda |
0:13a5d365ba16
|
80
|
b2 = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // upper boundary of midrange
|
ykuroda |
0:13a5d365ba16
|
81
|
|
ykuroda |
0:13a5d365ba16
|
82
|
iexp = (2-iemin)/2;
|
ykuroda |
0:13a5d365ba16
|
83
|
s1m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for lower range
|
ykuroda |
0:13a5d365ba16
|
84
|
iexp = - ((iemax+it)/2);
|
ykuroda |
0:13a5d365ba16
|
85
|
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
|
ykuroda |
0:13a5d365ba16
|
86
|
|
ykuroda |
0:13a5d365ba16
|
87
|
overfl = rbig*s2m; // overflow boundary for abig
|
ykuroda |
0:13a5d365ba16
|
88
|
eps = RealScalar(pow(double(ibeta), 1-it));
|
ykuroda |
0:13a5d365ba16
|
89
|
relerr = sqrt(eps); // tolerance for neglecting asml
|
ykuroda |
0:13a5d365ba16
|
90
|
initialized = true;
|
ykuroda |
0:13a5d365ba16
|
91
|
}
|
ykuroda |
0:13a5d365ba16
|
92
|
Index n = vec.size();
|
ykuroda |
0:13a5d365ba16
|
93
|
RealScalar ab2 = b2 / RealScalar(n);
|
ykuroda |
0:13a5d365ba16
|
94
|
RealScalar asml = RealScalar(0);
|
ykuroda |
0:13a5d365ba16
|
95
|
RealScalar amed = RealScalar(0);
|
ykuroda |
0:13a5d365ba16
|
96
|
RealScalar abig = RealScalar(0);
|
ykuroda |
0:13a5d365ba16
|
97
|
for(typename Derived::InnerIterator it(vec, 0); it; ++it)
|
ykuroda |
0:13a5d365ba16
|
98
|
{
|
ykuroda |
0:13a5d365ba16
|
99
|
RealScalar ax = abs(it.value());
|
ykuroda |
0:13a5d365ba16
|
100
|
if(ax > ab2) abig += numext::abs2(ax*s2m);
|
ykuroda |
0:13a5d365ba16
|
101
|
else if(ax < b1) asml += numext::abs2(ax*s1m);
|
ykuroda |
0:13a5d365ba16
|
102
|
else amed += numext::abs2(ax);
|
ykuroda |
0:13a5d365ba16
|
103
|
}
|
ykuroda |
0:13a5d365ba16
|
104
|
if(abig > RealScalar(0))
|
ykuroda |
0:13a5d365ba16
|
105
|
{
|
ykuroda |
0:13a5d365ba16
|
106
|
abig = sqrt(abig);
|
ykuroda |
0:13a5d365ba16
|
107
|
if(abig > overfl)
|
ykuroda |
0:13a5d365ba16
|
108
|
{
|
ykuroda |
0:13a5d365ba16
|
109
|
return rbig;
|
ykuroda |
0:13a5d365ba16
|
110
|
}
|
ykuroda |
0:13a5d365ba16
|
111
|
if(amed > RealScalar(0))
|
ykuroda |
0:13a5d365ba16
|
112
|
{
|
ykuroda |
0:13a5d365ba16
|
113
|
abig = abig/s2m;
|
ykuroda |
0:13a5d365ba16
|
114
|
amed = sqrt(amed);
|
ykuroda |
0:13a5d365ba16
|
115
|
}
|
ykuroda |
0:13a5d365ba16
|
116
|
else
|
ykuroda |
0:13a5d365ba16
|
117
|
return abig/s2m;
|
ykuroda |
0:13a5d365ba16
|
118
|
}
|
ykuroda |
0:13a5d365ba16
|
119
|
else if(asml > RealScalar(0))
|
ykuroda |
0:13a5d365ba16
|
120
|
{
|
ykuroda |
0:13a5d365ba16
|
121
|
if (amed > RealScalar(0))
|
ykuroda |
0:13a5d365ba16
|
122
|
{
|
ykuroda |
0:13a5d365ba16
|
123
|
abig = sqrt(amed);
|
ykuroda |
0:13a5d365ba16
|
124
|
amed = sqrt(asml) / s1m;
|
ykuroda |
0:13a5d365ba16
|
125
|
}
|
ykuroda |
0:13a5d365ba16
|
126
|
else
|
ykuroda |
0:13a5d365ba16
|
127
|
return sqrt(asml)/s1m;
|
ykuroda |
0:13a5d365ba16
|
128
|
}
|
ykuroda |
0:13a5d365ba16
|
129
|
else
|
ykuroda |
0:13a5d365ba16
|
130
|
return sqrt(amed);
|
ykuroda |
0:13a5d365ba16
|
131
|
asml = (min)(abig, amed);
|
ykuroda |
0:13a5d365ba16
|
132
|
abig = (max)(abig, amed);
|
ykuroda |
0:13a5d365ba16
|
133
|
if(asml <= abig*relerr)
|
ykuroda |
0:13a5d365ba16
|
134
|
return abig;
|
ykuroda |
0:13a5d365ba16
|
135
|
else
|
ykuroda |
0:13a5d365ba16
|
136
|
return abig * sqrt(RealScalar(1) + numext::abs2(asml/abig));
|
ykuroda |
0:13a5d365ba16
|
137
|
}
|
ykuroda |
0:13a5d365ba16
|
138
|
|
ykuroda |
0:13a5d365ba16
|
139
|
} // end namespace internal
|
ykuroda |
0:13a5d365ba16
|
140
|
|
ykuroda |
0:13a5d365ba16
|
141
|
/** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
|
ykuroda |
0:13a5d365ba16
|
142
|
* This version use a blockwise two passes algorithm:
|
ykuroda |
0:13a5d365ba16
|
143
|
* 1 - find the absolute largest coefficient \c s
|
ykuroda |
0:13a5d365ba16
|
144
|
* 2 - compute \f$ s \Vert \frac{*this}{s} \Vert \f$ in a standard way
|
ykuroda |
0:13a5d365ba16
|
145
|
*
|
ykuroda |
0:13a5d365ba16
|
146
|
* For architecture/scalar types supporting vectorization, this version
|
ykuroda |
0:13a5d365ba16
|
147
|
* is faster than blueNorm(). Otherwise the blueNorm() is much faster.
|
ykuroda |
0:13a5d365ba16
|
148
|
*
|
ykuroda |
0:13a5d365ba16
|
149
|
* \sa norm(), blueNorm(), hypotNorm()
|
ykuroda |
0:13a5d365ba16
|
150
|
*/
|
ykuroda |
0:13a5d365ba16
|
151
|
template<typename Derived>
|
ykuroda |
0:13a5d365ba16
|
152
|
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
ykuroda |
0:13a5d365ba16
|
153
|
MatrixBase<Derived>::stableNorm() const
|
ykuroda |
0:13a5d365ba16
|
154
|
{
|
ykuroda |
0:13a5d365ba16
|
155
|
using std::min;
|
ykuroda |
0:13a5d365ba16
|
156
|
using std::sqrt;
|
ykuroda |
0:13a5d365ba16
|
157
|
const Index blockSize = 4096;
|
ykuroda |
0:13a5d365ba16
|
158
|
RealScalar scale(0);
|
ykuroda |
0:13a5d365ba16
|
159
|
RealScalar invScale(1);
|
ykuroda |
0:13a5d365ba16
|
160
|
RealScalar ssq(0); // sum of square
|
ykuroda |
0:13a5d365ba16
|
161
|
enum {
|
ykuroda |
0:13a5d365ba16
|
162
|
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0
|
ykuroda |
0:13a5d365ba16
|
163
|
};
|
ykuroda |
0:13a5d365ba16
|
164
|
Index n = size();
|
ykuroda |
0:13a5d365ba16
|
165
|
Index bi = internal::first_aligned(derived());
|
ykuroda |
0:13a5d365ba16
|
166
|
if (bi>0)
|
ykuroda |
0:13a5d365ba16
|
167
|
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale);
|
ykuroda |
0:13a5d365ba16
|
168
|
for (; bi<n; bi+=blockSize)
|
ykuroda |
0:13a5d365ba16
|
169
|
internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
|
ykuroda |
0:13a5d365ba16
|
170
|
return scale * sqrt(ssq);
|
ykuroda |
0:13a5d365ba16
|
171
|
}
|
ykuroda |
0:13a5d365ba16
|
172
|
|
ykuroda |
0:13a5d365ba16
|
173
|
/** \returns the \em l2 norm of \c *this using the Blue's algorithm.
|
ykuroda |
0:13a5d365ba16
|
174
|
* A Portable Fortran Program to Find the Euclidean Norm of a Vector,
|
ykuroda |
0:13a5d365ba16
|
175
|
* ACM TOMS, Vol 4, Issue 1, 1978.
|
ykuroda |
0:13a5d365ba16
|
176
|
*
|
ykuroda |
0:13a5d365ba16
|
177
|
* For architecture/scalar types without vectorization, this version
|
ykuroda |
0:13a5d365ba16
|
178
|
* is much faster than stableNorm(). Otherwise the stableNorm() is faster.
|
ykuroda |
0:13a5d365ba16
|
179
|
*
|
ykuroda |
0:13a5d365ba16
|
180
|
* \sa norm(), stableNorm(), hypotNorm()
|
ykuroda |
0:13a5d365ba16
|
181
|
*/
|
ykuroda |
0:13a5d365ba16
|
182
|
template<typename Derived>
|
ykuroda |
0:13a5d365ba16
|
183
|
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
ykuroda |
0:13a5d365ba16
|
184
|
MatrixBase<Derived>::blueNorm() const
|
ykuroda |
0:13a5d365ba16
|
185
|
{
|
ykuroda |
0:13a5d365ba16
|
186
|
return internal::blueNorm_impl(*this);
|
ykuroda |
0:13a5d365ba16
|
187
|
}
|
ykuroda |
0:13a5d365ba16
|
188
|
|
ykuroda |
0:13a5d365ba16
|
189
|
/** \returns the \em l2 norm of \c *this avoiding undeflow and overflow.
|
ykuroda |
0:13a5d365ba16
|
190
|
* This version use a concatenation of hypot() calls, and it is very slow.
|
ykuroda |
0:13a5d365ba16
|
191
|
*
|
ykuroda |
0:13a5d365ba16
|
192
|
* \sa norm(), stableNorm()
|
ykuroda |
0:13a5d365ba16
|
193
|
*/
|
ykuroda |
0:13a5d365ba16
|
194
|
template<typename Derived>
|
ykuroda |
0:13a5d365ba16
|
195
|
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
|
ykuroda |
0:13a5d365ba16
|
196
|
MatrixBase<Derived>::hypotNorm() const
|
ykuroda |
0:13a5d365ba16
|
197
|
{
|
ykuroda |
0:13a5d365ba16
|
198
|
return this->cwiseAbs().redux(internal::scalar_hypot_op<RealScalar>());
|
ykuroda |
0:13a5d365ba16
|
199
|
}
|
ykuroda |
0:13a5d365ba16
|
200
|
|
ykuroda |
0:13a5d365ba16
|
201
|
} // end namespace Eigen
|
ykuroda |
0:13a5d365ba16
|
202
|
|
ykuroda |
0:13a5d365ba16
|
203
|
#endif // EIGEN_STABLENORM_H |