Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
GenericPacketMath.h
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_GENERIC_PACKET_MATH_H 00012 #define EIGEN_GENERIC_PACKET_MATH_H 00013 00014 namespace Eigen { 00015 00016 namespace internal { 00017 00018 /** \internal 00019 * \file GenericPacketMath.h 00020 * 00021 * Default implementation for types not supported by the vectorization. 00022 * In practice these functions are provided to make easier the writing 00023 * of generic vectorized code. 00024 */ 00025 00026 #ifndef EIGEN_DEBUG_ALIGNED_LOAD 00027 #define EIGEN_DEBUG_ALIGNED_LOAD 00028 #endif 00029 00030 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD 00031 #define EIGEN_DEBUG_UNALIGNED_LOAD 00032 #endif 00033 00034 #ifndef EIGEN_DEBUG_ALIGNED_STORE 00035 #define EIGEN_DEBUG_ALIGNED_STORE 00036 #endif 00037 00038 #ifndef EIGEN_DEBUG_UNALIGNED_STORE 00039 #define EIGEN_DEBUG_UNALIGNED_STORE 00040 #endif 00041 00042 struct default_packet_traits 00043 { 00044 enum { 00045 HasAdd = 1, 00046 HasSub = 1, 00047 HasMul = 1, 00048 HasNegate = 1, 00049 HasAbs = 1, 00050 HasAbs2 = 1, 00051 HasMin = 1, 00052 HasMax = 1, 00053 HasConj = 1, 00054 HasSetLinear = 1, 00055 00056 HasDiv = 0, 00057 HasSqrt = 0, 00058 HasExp = 0, 00059 HasLog = 0, 00060 HasPow = 0, 00061 00062 HasSin = 0, 00063 HasCos = 0, 00064 HasTan = 0, 00065 HasASin = 0, 00066 HasACos = 0, 00067 HasATan = 0 00068 }; 00069 }; 00070 00071 template<typename T> struct packet_traits : default_packet_traits 00072 { 00073 typedef T type; 00074 enum { 00075 Vectorizable = 0, 00076 size = 1, 00077 AlignedOnScalar = 0 00078 }; 00079 enum { 00080 HasAdd = 0, 00081 HasSub = 0, 00082 HasMul = 0, 00083 HasNegate = 0, 00084 HasAbs = 0, 00085 HasAbs2 = 0, 00086 HasMin = 0, 00087 HasMax = 0, 00088 HasConj = 0, 00089 HasSetLinear = 0 00090 }; 00091 }; 00092 00093 /** \internal \returns a + b (coeff-wise) */ 00094 template<typename Packet> inline Packet 00095 padd(const Packet& a, 00096 const Packet& b) { return a+b; } 00097 00098 /** \internal \returns a - b (coeff-wise) */ 00099 template<typename Packet> inline Packet 00100 psub(const Packet& a, 00101 const Packet& b) { return a-b; } 00102 00103 /** \internal \returns -a (coeff-wise) */ 00104 template<typename Packet> inline Packet 00105 pnegate(const Packet& a) { return -a; } 00106 00107 /** \internal \returns conj(a) (coeff-wise) */ 00108 template<typename Packet> inline Packet 00109 pconj(const Packet& a) { return numext::conj(a); } 00110 00111 /** \internal \returns a * b (coeff-wise) */ 00112 template<typename Packet> inline Packet 00113 pmul(const Packet& a, 00114 const Packet& b) { return a*b; } 00115 00116 /** \internal \returns a / b (coeff-wise) */ 00117 template<typename Packet> inline Packet 00118 pdiv(const Packet& a, 00119 const Packet& b) { return a/b; } 00120 00121 /** \internal \returns the min of \a a and \a b (coeff-wise) */ 00122 template<typename Packet> inline Packet 00123 pmin(const Packet& a, 00124 const Packet& b) { using std::min; return (min)(a, b); } 00125 00126 /** \internal \returns the max of \a a and \a b (coeff-wise) */ 00127 template<typename Packet> inline Packet 00128 pmax(const Packet& a, 00129 const Packet& b) { using std::max; return (max)(a, b); } 00130 00131 /** \internal \returns the absolute value of \a a */ 00132 template<typename Packet> inline Packet 00133 pabs(const Packet& a) { using std::abs; return abs(a); } 00134 00135 /** \internal \returns the bitwise and of \a a and \a b */ 00136 template<typename Packet> inline Packet 00137 pand(const Packet& a, const Packet& b) { return a & b; } 00138 00139 /** \internal \returns the bitwise or of \a a and \a b */ 00140 template<typename Packet> inline Packet 00141 por(const Packet& a, const Packet& b) { return a | b; } 00142 00143 /** \internal \returns the bitwise xor of \a a and \a b */ 00144 template<typename Packet> inline Packet 00145 pxor(const Packet& a, const Packet& b) { return a ^ b; } 00146 00147 /** \internal \returns the bitwise andnot of \a a and \a b */ 00148 template<typename Packet> inline Packet 00149 pandnot(const Packet& a, const Packet& b) { return a & (!b); } 00150 00151 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ 00152 template<typename Packet> inline Packet 00153 pload(const typename unpacket_traits<Packet>::type* from) { return *from; } 00154 00155 /** \internal \returns a packet version of \a *from, (un-aligned load) */ 00156 template<typename Packet> inline Packet 00157 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; } 00158 00159 /** \internal \returns a packet with elements of \a *from duplicated. 00160 * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and 00161 * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]} 00162 * Currently, this function is only used for scalar * complex products. 00163 */ 00164 template<typename Packet> inline Packet 00165 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; } 00166 00167 /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ 00168 template<typename Packet> inline Packet 00169 pset1(const typename unpacket_traits<Packet>::type& a) { return a; } 00170 00171 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ 00172 template<typename Scalar> inline typename packet_traits<Scalar>::type 00173 plset(const Scalar& a) { return a; } 00174 00175 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ 00176 template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from) 00177 { (*to) = from; } 00178 00179 /** \internal copy the packet \a from to \a *to, (un-aligned store) */ 00180 template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from) 00181 { (*to) = from; } 00182 00183 /** \internal tries to do cache prefetching of \a addr */ 00184 template<typename Scalar> inline void prefetch(const Scalar* addr) 00185 { 00186 #if !defined(_MSC_VER) 00187 __builtin_prefetch(addr); 00188 #endif 00189 } 00190 00191 /** \internal \returns the first element of a packet */ 00192 template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) 00193 { return a; } 00194 00195 /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ 00196 template<typename Packet> inline Packet 00197 preduxp(const Packet* vecs) { return vecs[0]; } 00198 00199 /** \internal \returns the sum of the elements of \a a*/ 00200 template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a) 00201 { return a; } 00202 00203 /** \internal \returns the product of the elements of \a a*/ 00204 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) 00205 { return a; } 00206 00207 /** \internal \returns the min of the elements of \a a*/ 00208 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) 00209 { return a; } 00210 00211 /** \internal \returns the max of the elements of \a a*/ 00212 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) 00213 { return a; } 00214 00215 /** \internal \returns the reversed elements of \a a*/ 00216 template<typename Packet> inline Packet preverse(const Packet& a) 00217 { return a; } 00218 00219 00220 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ 00221 template<typename Packet> inline Packet pcplxflip(const Packet& a) 00222 { 00223 // FIXME: uncomment the following in case we drop the internal imag and real functions. 00224 // using std::imag; 00225 // using std::real; 00226 return Packet(imag(a),real(a)); 00227 } 00228 00229 /************************** 00230 * Special math functions 00231 ***************************/ 00232 00233 /** \internal \returns the sine of \a a (coeff-wise) */ 00234 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00235 Packet psin(const Packet& a) { using std::sin; return sin(a); } 00236 00237 /** \internal \returns the cosine of \a a (coeff-wise) */ 00238 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00239 Packet pcos(const Packet& a) { using std::cos; return cos(a); } 00240 00241 /** \internal \returns the tan of \a a (coeff-wise) */ 00242 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00243 Packet ptan(const Packet& a) { using std::tan; return tan(a); } 00244 00245 /** \internal \returns the arc sine of \a a (coeff-wise) */ 00246 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00247 Packet pasin(const Packet& a) { using std::asin; return asin(a); } 00248 00249 /** \internal \returns the arc cosine of \a a (coeff-wise) */ 00250 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00251 Packet pacos(const Packet& a) { using std::acos; return acos(a); } 00252 00253 /** \internal \returns the exp of \a a (coeff-wise) */ 00254 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00255 Packet pexp(const Packet& a) { using std::exp; return exp(a); } 00256 00257 /** \internal \returns the log of \a a (coeff-wise) */ 00258 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00259 Packet plog(const Packet& a) { using std::log; return log(a); } 00260 00261 /** \internal \returns the square-root of \a a (coeff-wise) */ 00262 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS 00263 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } 00264 00265 /*************************************************************************** 00266 * The following functions might not have to be overwritten for vectorized types 00267 ***************************************************************************/ 00268 00269 /** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */ 00270 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type) 00271 template<typename Packet> 00272 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) 00273 { 00274 pstore(to, pset1<Packet>(a)); 00275 } 00276 00277 /** \internal \returns a * b + c (coeff-wise) */ 00278 template<typename Packet> inline Packet 00279 pmadd(const Packet& a, 00280 const Packet& b, 00281 const Packet& c) 00282 { return padd(pmul(a, b),c); } 00283 00284 /** \internal \returns a packet version of \a *from. 00285 * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */ 00286 template<typename Packet, int LoadMode> 00287 inline Packet ploadt(const typename unpacket_traits<Packet>::type* from) 00288 { 00289 if(LoadMode == Aligned) 00290 return pload<Packet>(from); 00291 else 00292 return ploadu<Packet>(from); 00293 } 00294 00295 /** \internal copy the packet \a from to \a *to. 00296 * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */ 00297 template<typename Scalar, typename Packet, int LoadMode> 00298 inline void pstoret(Scalar* to, const Packet& from) 00299 { 00300 if(LoadMode == Aligned) 00301 pstore(to, from); 00302 else 00303 pstoreu(to, from); 00304 } 00305 00306 /** \internal default implementation of palign() allowing partial specialization */ 00307 template<int Offset,typename PacketType> 00308 struct palign_impl 00309 { 00310 // by default data are aligned, so there is nothing to be done :) 00311 static inline void run(PacketType&, const PacketType&) {} 00312 }; 00313 00314 /** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements 00315 * of \a first and \a Offset first elements of \a second. 00316 * 00317 * This function is currently only used to optimize matrix-vector products on unligned matrices. 00318 * It takes 2 packets that represent a contiguous memory array, and returns a packet starting 00319 * at the position \a Offset. For instance, for packets of 4 elements, we have: 00320 * Input: 00321 * - first = {f0,f1,f2,f3} 00322 * - second = {s0,s1,s2,s3} 00323 * Output: 00324 * - if Offset==0 then {f0,f1,f2,f3} 00325 * - if Offset==1 then {f1,f2,f3,s0} 00326 * - if Offset==2 then {f2,f3,s0,s1} 00327 * - if Offset==3 then {f3,s0,s1,s3} 00328 */ 00329 template<int Offset,typename PacketType> 00330 inline void palign(PacketType& first, const PacketType& second) 00331 { 00332 palign_impl<Offset,PacketType>::run(first,second); 00333 } 00334 00335 /*************************************************************************** 00336 * Fast complex products (GCC generates a function call which is very slow) 00337 ***************************************************************************/ 00338 00339 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) 00340 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } 00341 00342 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) 00343 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } 00344 00345 } // end namespace internal 00346 00347 } // end namespace Eigen 00348 00349 #endif // EIGEN_GENERIC_PACKET_MATH_H
Generated on Thu Nov 17 2022 22:01:28 by
1.7.2