2014 Eurobot fork
Dependencies: mbed-rtos mbed QEI
tvmet/loop/Gemm.h@92:4a1225fbb146, 2013-10-15 (annotated)
- Committer:
- rsavitski
- Date:
- Tue Oct 15 12:19:32 2013 +0000
- Revision:
- 92:4a1225fbb146
- Parent:
- 15:9c5aaeda36dc
touch: ripped out 2013-specific bits. Need to address "2014" comments. Rewrite AI layer and other deleted parts.
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
madcowswe | 15:9c5aaeda36dc | 1 | /* |
madcowswe | 15:9c5aaeda36dc | 2 | * Tiny Vector Matrix Library |
madcowswe | 15:9c5aaeda36dc | 3 | * Dense Vector Matrix Libary of Tiny size using Expression Templates |
madcowswe | 15:9c5aaeda36dc | 4 | * |
madcowswe | 15:9c5aaeda36dc | 5 | * Copyright (C) 2001 - 2007 Olaf Petzold <opetzold@users.sourceforge.net> |
madcowswe | 15:9c5aaeda36dc | 6 | * |
madcowswe | 15:9c5aaeda36dc | 7 | * This library is free software; you can redistribute it and/or |
madcowswe | 15:9c5aaeda36dc | 8 | * modify it under the terms of the GNU Lesser General Public |
madcowswe | 15:9c5aaeda36dc | 9 | * License as published by the Free Software Foundation; either |
madcowswe | 15:9c5aaeda36dc | 10 | * version 2.1 of the License, or (at your option) any later version. |
madcowswe | 15:9c5aaeda36dc | 11 | * |
madcowswe | 15:9c5aaeda36dc | 12 | * This library is distributed in the hope that it will be useful, |
madcowswe | 15:9c5aaeda36dc | 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
madcowswe | 15:9c5aaeda36dc | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
madcowswe | 15:9c5aaeda36dc | 15 | * Lesser General Public License for more details. |
madcowswe | 15:9c5aaeda36dc | 16 | * |
madcowswe | 15:9c5aaeda36dc | 17 | * You should have received a copy of the GNU Lesser General Public |
madcowswe | 15:9c5aaeda36dc | 18 | * License along with this library; if not, write to the Free Software |
madcowswe | 15:9c5aaeda36dc | 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
madcowswe | 15:9c5aaeda36dc | 20 | * |
madcowswe | 15:9c5aaeda36dc | 21 | * $Id: Gemm.h,v 1.12 2007-06-23 15:58:59 opetzold Exp $ |
madcowswe | 15:9c5aaeda36dc | 22 | */ |
madcowswe | 15:9c5aaeda36dc | 23 | |
madcowswe | 15:9c5aaeda36dc | 24 | #ifndef TVMET_LOOP_GEMM_H |
madcowswe | 15:9c5aaeda36dc | 25 | #define TVMET_LOOP_GEMM_H |
madcowswe | 15:9c5aaeda36dc | 26 | |
madcowswe | 15:9c5aaeda36dc | 27 | namespace tvmet { |
madcowswe | 15:9c5aaeda36dc | 28 | |
madcowswe | 15:9c5aaeda36dc | 29 | namespace loop { |
madcowswe | 15:9c5aaeda36dc | 30 | |
madcowswe | 15:9c5aaeda36dc | 31 | |
madcowswe | 15:9c5aaeda36dc | 32 | /** |
madcowswe | 15:9c5aaeda36dc | 33 | * \class gemm Gemm.h "tvmet/loop/Gemm.h" |
madcowswe | 15:9c5aaeda36dc | 34 | * \brief class for matrix-matrix product using loop unrolling. |
madcowswe | 15:9c5aaeda36dc | 35 | * using formula |
madcowswe | 15:9c5aaeda36dc | 36 | * \f[ |
madcowswe | 15:9c5aaeda36dc | 37 | * M_1\,M_2 |
madcowswe | 15:9c5aaeda36dc | 38 | * \f] |
madcowswe | 15:9c5aaeda36dc | 39 | * \par Example: |
madcowswe | 15:9c5aaeda36dc | 40 | * \code |
madcowswe | 15:9c5aaeda36dc | 41 | * template<class T, std::size_t Rows1, std::size_t Cols1, std::size_t Cols2> |
madcowswe | 15:9c5aaeda36dc | 42 | * inline |
madcowswe | 15:9c5aaeda36dc | 43 | * void |
madcowswe | 15:9c5aaeda36dc | 44 | * prod(const Matrix<T, Rows1, Cols1>& lhs, const Matrix<T, Cols1, Cols2>& rhs, |
madcowswe | 15:9c5aaeda36dc | 45 | * Matrix<T, Rows1, Cols2>& dest) |
madcowswe | 15:9c5aaeda36dc | 46 | * { |
madcowswe | 15:9c5aaeda36dc | 47 | * for (std::size_t i = 0; i != Rows1; ++i) { |
madcowswe | 15:9c5aaeda36dc | 48 | * for (std::size_t j = 0; j != Cols2; ++j) { |
madcowswe | 15:9c5aaeda36dc | 49 | * dest(i, j) = tvmet::loop::gemm<Rows1, Cols1, Cols2>().prod(lhs, rhs, i, j); |
madcowswe | 15:9c5aaeda36dc | 50 | * } |
madcowswe | 15:9c5aaeda36dc | 51 | * } |
madcowswe | 15:9c5aaeda36dc | 52 | * } |
madcowswe | 15:9c5aaeda36dc | 53 | * \endcode |
madcowswe | 15:9c5aaeda36dc | 54 | * \note The number of rows of rhs matrix have to be equal to cols of lhs matrix. |
madcowswe | 15:9c5aaeda36dc | 55 | * The result is a (Rows1 x Cols2) matrix. |
madcowswe | 15:9c5aaeda36dc | 56 | */ |
madcowswe | 15:9c5aaeda36dc | 57 | template<std::size_t Rows1, std::size_t Cols1, |
madcowswe | 15:9c5aaeda36dc | 58 | std::size_t Cols2> |
madcowswe | 15:9c5aaeda36dc | 59 | class gemm |
madcowswe | 15:9c5aaeda36dc | 60 | { |
madcowswe | 15:9c5aaeda36dc | 61 | gemm(const gemm&); |
madcowswe | 15:9c5aaeda36dc | 62 | gemm& operator=(const gemm&); |
madcowswe | 15:9c5aaeda36dc | 63 | |
madcowswe | 15:9c5aaeda36dc | 64 | private: |
madcowswe | 15:9c5aaeda36dc | 65 | enum { |
madcowswe | 15:9c5aaeda36dc | 66 | count = Cols1, |
madcowswe | 15:9c5aaeda36dc | 67 | N = (count+7)/8 |
madcowswe | 15:9c5aaeda36dc | 68 | }; |
madcowswe | 15:9c5aaeda36dc | 69 | |
madcowswe | 15:9c5aaeda36dc | 70 | public: |
madcowswe | 15:9c5aaeda36dc | 71 | gemm() { } |
madcowswe | 15:9c5aaeda36dc | 72 | |
madcowswe | 15:9c5aaeda36dc | 73 | public: |
madcowswe | 15:9c5aaeda36dc | 74 | template<class E1, class E2> |
madcowswe | 15:9c5aaeda36dc | 75 | static inline |
madcowswe | 15:9c5aaeda36dc | 76 | typename PromoteTraits< |
madcowswe | 15:9c5aaeda36dc | 77 | typename E1::value_type, |
madcowswe | 15:9c5aaeda36dc | 78 | typename E2::value_type |
madcowswe | 15:9c5aaeda36dc | 79 | >::value_type |
madcowswe | 15:9c5aaeda36dc | 80 | prod(const E1& lhs, const E2& rhs, std::size_t i, std::size_t j) { |
madcowswe | 15:9c5aaeda36dc | 81 | typename PromoteTraits< |
madcowswe | 15:9c5aaeda36dc | 82 | typename E1::value_type, |
madcowswe | 15:9c5aaeda36dc | 83 | typename E2::value_type |
madcowswe | 15:9c5aaeda36dc | 84 | >::value_type sum(0); |
madcowswe | 15:9c5aaeda36dc | 85 | std::size_t k(0); |
madcowswe | 15:9c5aaeda36dc | 86 | std::size_t n(N); |
madcowswe | 15:9c5aaeda36dc | 87 | |
madcowswe | 15:9c5aaeda36dc | 88 | // Duff's device |
madcowswe | 15:9c5aaeda36dc | 89 | switch(count % 8) { |
madcowswe | 15:9c5aaeda36dc | 90 | case 0: do { sum += lhs(i, k) * rhs(k, j); ++k; |
madcowswe | 15:9c5aaeda36dc | 91 | case 7: sum += lhs(i, k) * rhs(k, j); ++k; |
madcowswe | 15:9c5aaeda36dc | 92 | case 6: sum += lhs(i, k) * rhs(k, j); ++k; |
madcowswe | 15:9c5aaeda36dc | 93 | case 5: sum += lhs(i, k) * rhs(k, j); ++k; |
madcowswe | 15:9c5aaeda36dc | 94 | case 4: sum += lhs(i, k) * rhs(k, j); ++k; |
madcowswe | 15:9c5aaeda36dc | 95 | case 3: sum += lhs(i, k) * rhs(k, j); ++k; |
madcowswe | 15:9c5aaeda36dc | 96 | case 2: sum += lhs(i, k) * rhs(k, j); ++k; |
madcowswe | 15:9c5aaeda36dc | 97 | case 1: sum += lhs(i, k) * rhs(k, j); ++k; |
madcowswe | 15:9c5aaeda36dc | 98 | } while(--n != 0); |
madcowswe | 15:9c5aaeda36dc | 99 | } |
madcowswe | 15:9c5aaeda36dc | 100 | |
madcowswe | 15:9c5aaeda36dc | 101 | return sum; |
madcowswe | 15:9c5aaeda36dc | 102 | } |
madcowswe | 15:9c5aaeda36dc | 103 | }; |
madcowswe | 15:9c5aaeda36dc | 104 | |
madcowswe | 15:9c5aaeda36dc | 105 | |
madcowswe | 15:9c5aaeda36dc | 106 | } // namespace loop |
madcowswe | 15:9c5aaeda36dc | 107 | |
madcowswe | 15:9c5aaeda36dc | 108 | } // namespace tvmet |
madcowswe | 15:9c5aaeda36dc | 109 | |
madcowswe | 15:9c5aaeda36dc | 110 | #endif /* TVMET_LOOP_GEMM_H */ |
madcowswe | 15:9c5aaeda36dc | 111 | |
madcowswe | 15:9c5aaeda36dc | 112 | // Local Variables: |
madcowswe | 15:9c5aaeda36dc | 113 | // mode:C++ |
madcowswe | 15:9c5aaeda36dc | 114 | // tab-width:8 |
madcowswe | 15:9c5aaeda36dc | 115 | // End: |