Eigen | Mbed

Users » pmic » Code » Eigen » Documentation
Embed: (wiki syntax)
Show/hide line numbers GenericPacketMath.h Source File
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
00005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
00006 //
00007 // This Source Code Form is subject to the terms of the Mozilla
00008 // Public License v. 2.0. If a copy of the MPL was not distributed
00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00010 
00011 #ifndef EIGEN_GENERIC_PACKET_MATH_H
00012 #define EIGEN_GENERIC_PACKET_MATH_H
00013 
00014 namespace Eigen {
00015 
00016 namespace internal {
00017 
00018 /** \internal
00019   * \file GenericPacketMath.h
00020   *
00021   * Default implementation for types not supported by the vectorization.
00022   * In practice these functions are provided to make easier the writing
00023   * of generic vectorized code.
00024   */
00025 
00026 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
00027 #define EIGEN_DEBUG_ALIGNED_LOAD
00028 #endif
00029 
00030 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
00031 #define EIGEN_DEBUG_UNALIGNED_LOAD
00032 #endif
00033 
00034 #ifndef EIGEN_DEBUG_ALIGNED_STORE
00035 #define EIGEN_DEBUG_ALIGNED_STORE
00036 #endif
00037 
00038 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
00039 #define EIGEN_DEBUG_UNALIGNED_STORE
00040 #endif
00041 
00042 struct default_packet_traits
00043 {
00044   enum {
00045     HasAdd    = 1,
00046     HasSub    = 1,
00047     HasMul    = 1,
00048     HasNegate = 1,
00049     HasAbs    = 1,
00050     HasAbs2   = 1,
00051     HasMin    = 1,
00052     HasMax    = 1,
00053     HasConj   = 1,
00054     HasSetLinear = 1,
00055 
00056     HasDiv    = 0,
00057     HasSqrt   = 0,
00058     HasExp    = 0,
00059     HasLog    = 0,
00060     HasPow    = 0,
00061 
00062     HasSin    = 0,
00063     HasCos    = 0,
00064     HasTan    = 0,
00065     HasASin   = 0,
00066     HasACos   = 0,
00067     HasATan   = 0
00068   };
00069 };
00070 
00071 template<typename T> struct packet_traits : default_packet_traits
00072 {
00073   typedef T type;
00074   enum {
00075     Vectorizable = 0,
00076     size = 1,
00077     AlignedOnScalar = 0
00078   };
00079   enum {
00080     HasAdd    = 0,
00081     HasSub    = 0,
00082     HasMul    = 0,
00083     HasNegate = 0,
00084     HasAbs    = 0,
00085     HasAbs2   = 0,
00086     HasMin    = 0,
00087     HasMax    = 0,
00088     HasConj   = 0,
00089     HasSetLinear = 0
00090   };
00091 };
00092 
00093 /** \internal \returns a + b (coeff-wise) */
00094 template<typename Packet> inline Packet
00095 padd(const Packet& a,
00096         const Packet& b) { return a+b; }
00097 
00098 /** \internal \returns a - b (coeff-wise) */
00099 template<typename Packet> inline Packet
00100 psub(const Packet& a,
00101         const Packet& b) { return a-b; }
00102 
00103 /** \internal \returns -a (coeff-wise) */
00104 template<typename Packet> inline Packet
00105 pnegate(const Packet& a) { return -a; }
00106 
00107 /** \internal \returns conj(a) (coeff-wise) */
00108 template<typename Packet> inline Packet
00109 pconj(const Packet& a) { return numext::conj(a); }
00110 
00111 /** \internal \returns a * b (coeff-wise) */
00112 template<typename Packet> inline Packet
00113 pmul(const Packet& a,
00114         const Packet& b) { return a*b; }
00115 
00116 /** \internal \returns a / b (coeff-wise) */
00117 template<typename Packet> inline Packet
00118 pdiv(const Packet& a,
00119         const Packet& b) { return a/b; }
00120 
00121 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
00122 template<typename Packet> inline Packet
00123 pmin(const Packet& a,
00124         const Packet& b) { using std::min; return (min)(a, b); }
00125 
00126 /** \internal \returns the max of \a a and \a b  (coeff-wise) */
00127 template<typename Packet> inline Packet
00128 pmax(const Packet& a,
00129         const Packet& b) { using std::max; return (max)(a, b); }
00130 
00131 /** \internal \returns the absolute value of \a a */
00132 template<typename Packet> inline Packet
00133 pabs(const Packet& a) { using std::abs; return abs(a); }
00134 
00135 /** \internal \returns the bitwise and of \a a and \a b */
00136 template<typename Packet> inline Packet
00137 pand(const Packet& a, const Packet& b) { return a & b; }
00138 
00139 /** \internal \returns the bitwise or of \a a and \a b */
00140 template<typename Packet> inline Packet
00141 por(const Packet& a, const Packet& b) { return a | b; }
00142 
00143 /** \internal \returns the bitwise xor of \a a and \a b */
00144 template<typename Packet> inline Packet
00145 pxor(const Packet& a, const Packet& b) { return a ^ b; }
00146 
00147 /** \internal \returns the bitwise andnot of \a a and \a b */
00148 template<typename Packet> inline Packet
00149 pandnot(const Packet& a, const Packet& b) { return a & (!b); }
00150 
00151 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
00152 template<typename Packet> inline Packet
00153 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
00154 
00155 /** \internal \returns a packet version of \a *from, (un-aligned load) */
00156 template<typename Packet> inline Packet
00157 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
00158 
00159 /** \internal \returns a packet with elements of \a *from duplicated.
00160   * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
00161   * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
00162   * Currently, this function is only used for scalar * complex products.
00163  */
00164 template<typename Packet> inline Packet
00165 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
00166 
00167 /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
00168 template<typename Packet> inline Packet
00169 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
00170 
00171 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
00172 template<typename Scalar> inline typename packet_traits<Scalar>::type
00173 plset(const Scalar& a) { return a; }
00174 
00175 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
00176 template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
00177 { (*to) = from; }
00178 
00179 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
00180 template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
00181 { (*to) = from; }
00182 
00183 /** \internal tries to do cache prefetching of \a addr */
00184 template<typename Scalar> inline void prefetch(const Scalar* addr)
00185 {
00186 #if !defined(_MSC_VER)
00187 __builtin_prefetch(addr);
00188 #endif
00189 }
00190 
00191 /** \internal \returns the first element of a packet */
00192 template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
00193 { return a; }
00194 
00195 /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
00196 template<typename Packet> inline Packet
00197 preduxp(const Packet* vecs) { return vecs[0]; }
00198 
00199 /** \internal \returns the sum of the elements of \a a*/
00200 template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
00201 { return a; }
00202 
00203 /** \internal \returns the product of the elements of \a a*/
00204 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
00205 { return a; }
00206 
00207 /** \internal \returns the min of the elements of \a a*/
00208 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
00209 { return a; }
00210 
00211 /** \internal \returns the max of the elements of \a a*/
00212 template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
00213 { return a; }
00214 
00215 /** \internal \returns the reversed elements of \a a*/
00216 template<typename Packet> inline Packet preverse(const Packet& a)
00217 { return a; }
00218 
00219 
00220 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
00221 template<typename Packet> inline Packet pcplxflip(const Packet& a)
00222 {
00223   // FIXME: uncomment the following in case we drop the internal imag and real functions.
00224 //   using std::imag;
00225 //   using std::real;
00226   return Packet(imag(a),real(a));
00227 }
00228 
00229 /**************************
00230 * Special math functions
00231 ***************************/
00232 
00233 /** \internal \returns the sine of \a a (coeff-wise) */
00234 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00235 Packet psin(const Packet& a) { using std::sin; return sin(a); }
00236 
00237 /** \internal \returns the cosine of \a a (coeff-wise) */
00238 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00239 Packet pcos(const Packet& a) { using std::cos; return cos(a); }
00240 
00241 /** \internal \returns the tan of \a a (coeff-wise) */
00242 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00243 Packet ptan(const Packet& a) { using std::tan; return tan(a); }
00244 
00245 /** \internal \returns the arc sine of \a a (coeff-wise) */
00246 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00247 Packet pasin(const Packet& a) { using std::asin; return asin(a); }
00248 
00249 /** \internal \returns the arc cosine of \a a (coeff-wise) */
00250 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00251 Packet pacos(const Packet& a) { using std::acos; return acos(a); }
00252 
00253 /** \internal \returns the exp of \a a (coeff-wise) */
00254 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00255 Packet pexp(const Packet& a) { using std::exp; return exp(a); }
00256 
00257 /** \internal \returns the log of \a a (coeff-wise) */
00258 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00259 Packet plog(const Packet& a) { using std::log; return log(a); }
00260 
00261 /** \internal \returns the square-root of \a a (coeff-wise) */
00262 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
00263 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
00264 
00265 /***************************************************************************
00266 * The following functions might not have to be overwritten for vectorized types
00267 ***************************************************************************/
00268 
00269 /** \internal copy a packet with constant coeficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
00270 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
00271 template<typename Packet>
00272 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
00273 {
00274   pstore(to, pset1<Packet>(a));
00275 }
00276 
00277 /** \internal \returns a * b + c (coeff-wise) */
00278 template<typename Packet> inline Packet
00279 pmadd(const Packet&  a,
00280          const Packet&  b,
00281          const Packet&  c)
00282 { return padd(pmul(a, b),c); }
00283 
00284 /** \internal \returns a packet version of \a *from.
00285   * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
00286 template<typename Packet, int LoadMode>
00287 inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
00288 {
00289   if(LoadMode == Aligned)
00290     return pload<Packet>(from);
00291   else
00292     return ploadu<Packet>(from);
00293 }
00294 
00295 /** \internal copy the packet \a from to \a *to.
00296   * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
00297 template<typename Scalar, typename Packet, int LoadMode>
00298 inline void pstoret(Scalar* to, const Packet& from)
00299 {
00300   if(LoadMode == Aligned)
00301     pstore(to, from);
00302   else
00303     pstoreu(to, from);
00304 }
00305 
00306 /** \internal default implementation of palign() allowing partial specialization */
00307 template<int Offset,typename PacketType>
00308 struct palign_impl
00309 {
00310   // by default data are aligned, so there is nothing to be done :)
00311   static inline void run(PacketType&, const PacketType&) {}
00312 };
00313 
00314 /** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
00315   * of \a first and \a Offset first elements of \a second.
00316   * 
00317   * This function is currently only used to optimize matrix-vector products on unligned matrices.
00318   * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
00319   * at the position \a Offset. For instance, for packets of 4 elements, we have:
00320   *  Input:
00321   *  - first = {f0,f1,f2,f3}
00322   *  - second = {s0,s1,s2,s3}
00323   * Output: 
00324   *   - if Offset==0 then {f0,f1,f2,f3}
00325   *   - if Offset==1 then {f1,f2,f3,s0}
00326   *   - if Offset==2 then {f2,f3,s0,s1}
00327   *   - if Offset==3 then {f3,s0,s1,s3}
00328   */
00329 template<int Offset,typename PacketType>
00330 inline void palign(PacketType& first, const PacketType& second)
00331 {
00332   palign_impl<Offset,PacketType>::run(first,second);
00333 }
00334 
00335 /***************************************************************************
00336 * Fast complex products (GCC generates a function call which is very slow)
00337 ***************************************************************************/
00338 
00339 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
00340 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
00341 
00342 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
00343 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
00344 
00345 } // end namespace internal
00346 
00347 } // end namespace Eigen
00348 
00349 #endif // EIGEN_GENERIC_PACKET_MATH_H
Repository toolbox

Repository details

Type:	Library
Created:	17 Nov 2022
Imports:	1
Forks:	0
Commits:	2
Dependents:	0
Dependencies:	0
Followers:	1
Important changes to repositories hosted on mbed.com

GenericPacketMath.h

Repository toolbox

Repository details

Important Information for this Arm website

Important changes to repositories hosted on mbed.com

GenericPacketMath.h

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning