Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of Eurobot_2012_Secondary by
tvmet/loop/Gemm.h@1:cc2a9eb0bd55, 2012-10-17 (annotated)
- Committer:
- narshu
- Date:
- Wed Oct 17 22:25:31 2012 +0000
- Revision:
- 1:cc2a9eb0bd55
Commit before publishing
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
narshu | 1:cc2a9eb0bd55 | 1 | /* |
narshu | 1:cc2a9eb0bd55 | 2 | * Tiny Vector Matrix Library |
narshu | 1:cc2a9eb0bd55 | 3 | * Dense Vector Matrix Libary of Tiny size using Expression Templates |
narshu | 1:cc2a9eb0bd55 | 4 | * |
narshu | 1:cc2a9eb0bd55 | 5 | * Copyright (C) 2001 - 2007 Olaf Petzold <opetzold@users.sourceforge.net> |
narshu | 1:cc2a9eb0bd55 | 6 | * |
narshu | 1:cc2a9eb0bd55 | 7 | * This library is free software; you can redistribute it and/or |
narshu | 1:cc2a9eb0bd55 | 8 | * modify it under the terms of the GNU Lesser General Public |
narshu | 1:cc2a9eb0bd55 | 9 | * License as published by the Free Software Foundation; either |
narshu | 1:cc2a9eb0bd55 | 10 | * version 2.1 of the License, or (at your option) any later version. |
narshu | 1:cc2a9eb0bd55 | 11 | * |
narshu | 1:cc2a9eb0bd55 | 12 | * This library is distributed in the hope that it will be useful, |
narshu | 1:cc2a9eb0bd55 | 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
narshu | 1:cc2a9eb0bd55 | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
narshu | 1:cc2a9eb0bd55 | 15 | * Lesser General Public License for more details. |
narshu | 1:cc2a9eb0bd55 | 16 | * |
narshu | 1:cc2a9eb0bd55 | 17 | * You should have received a copy of the GNU Lesser General Public |
narshu | 1:cc2a9eb0bd55 | 18 | * License along with this library; if not, write to the Free Software |
narshu | 1:cc2a9eb0bd55 | 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
narshu | 1:cc2a9eb0bd55 | 20 | * |
narshu | 1:cc2a9eb0bd55 | 21 | * $Id: Gemm.h,v 1.12 2007-06-23 15:58:59 opetzold Exp $ |
narshu | 1:cc2a9eb0bd55 | 22 | */ |
narshu | 1:cc2a9eb0bd55 | 23 | |
narshu | 1:cc2a9eb0bd55 | 24 | #ifndef TVMET_LOOP_GEMM_H |
narshu | 1:cc2a9eb0bd55 | 25 | #define TVMET_LOOP_GEMM_H |
narshu | 1:cc2a9eb0bd55 | 26 | |
narshu | 1:cc2a9eb0bd55 | 27 | namespace tvmet { |
narshu | 1:cc2a9eb0bd55 | 28 | |
narshu | 1:cc2a9eb0bd55 | 29 | namespace loop { |
narshu | 1:cc2a9eb0bd55 | 30 | |
narshu | 1:cc2a9eb0bd55 | 31 | |
narshu | 1:cc2a9eb0bd55 | 32 | /** |
narshu | 1:cc2a9eb0bd55 | 33 | * \class gemm Gemm.h "tvmet/loop/Gemm.h" |
narshu | 1:cc2a9eb0bd55 | 34 | * \brief class for matrix-matrix product using loop unrolling. |
narshu | 1:cc2a9eb0bd55 | 35 | * using formula |
narshu | 1:cc2a9eb0bd55 | 36 | * \f[ |
narshu | 1:cc2a9eb0bd55 | 37 | * M_1\,M_2 |
narshu | 1:cc2a9eb0bd55 | 38 | * \f] |
narshu | 1:cc2a9eb0bd55 | 39 | * \par Example: |
narshu | 1:cc2a9eb0bd55 | 40 | * \code |
narshu | 1:cc2a9eb0bd55 | 41 | * template<class T, std::size_t Rows1, std::size_t Cols1, std::size_t Cols2> |
narshu | 1:cc2a9eb0bd55 | 42 | * inline |
narshu | 1:cc2a9eb0bd55 | 43 | * void |
narshu | 1:cc2a9eb0bd55 | 44 | * prod(const Matrix<T, Rows1, Cols1>& lhs, const Matrix<T, Cols1, Cols2>& rhs, |
narshu | 1:cc2a9eb0bd55 | 45 | * Matrix<T, Rows1, Cols2>& dest) |
narshu | 1:cc2a9eb0bd55 | 46 | * { |
narshu | 1:cc2a9eb0bd55 | 47 | * for (std::size_t i = 0; i != Rows1; ++i) { |
narshu | 1:cc2a9eb0bd55 | 48 | * for (std::size_t j = 0; j != Cols2; ++j) { |
narshu | 1:cc2a9eb0bd55 | 49 | * dest(i, j) = tvmet::loop::gemm<Rows1, Cols1, Cols2>().prod(lhs, rhs, i, j); |
narshu | 1:cc2a9eb0bd55 | 50 | * } |
narshu | 1:cc2a9eb0bd55 | 51 | * } |
narshu | 1:cc2a9eb0bd55 | 52 | * } |
narshu | 1:cc2a9eb0bd55 | 53 | * \endcode |
narshu | 1:cc2a9eb0bd55 | 54 | * \note The number of rows of rhs matrix have to be equal to cols of lhs matrix. |
narshu | 1:cc2a9eb0bd55 | 55 | * The result is a (Rows1 x Cols2) matrix. |
narshu | 1:cc2a9eb0bd55 | 56 | */ |
narshu | 1:cc2a9eb0bd55 | 57 | template<std::size_t Rows1, std::size_t Cols1, |
narshu | 1:cc2a9eb0bd55 | 58 | std::size_t Cols2> |
narshu | 1:cc2a9eb0bd55 | 59 | class gemm |
narshu | 1:cc2a9eb0bd55 | 60 | { |
narshu | 1:cc2a9eb0bd55 | 61 | gemm(const gemm&); |
narshu | 1:cc2a9eb0bd55 | 62 | gemm& operator=(const gemm&); |
narshu | 1:cc2a9eb0bd55 | 63 | |
narshu | 1:cc2a9eb0bd55 | 64 | private: |
narshu | 1:cc2a9eb0bd55 | 65 | enum { |
narshu | 1:cc2a9eb0bd55 | 66 | count = Cols1, |
narshu | 1:cc2a9eb0bd55 | 67 | N = (count+7)/8 |
narshu | 1:cc2a9eb0bd55 | 68 | }; |
narshu | 1:cc2a9eb0bd55 | 69 | |
narshu | 1:cc2a9eb0bd55 | 70 | public: |
narshu | 1:cc2a9eb0bd55 | 71 | gemm() { } |
narshu | 1:cc2a9eb0bd55 | 72 | |
narshu | 1:cc2a9eb0bd55 | 73 | public: |
narshu | 1:cc2a9eb0bd55 | 74 | template<class E1, class E2> |
narshu | 1:cc2a9eb0bd55 | 75 | static inline |
narshu | 1:cc2a9eb0bd55 | 76 | typename PromoteTraits< |
narshu | 1:cc2a9eb0bd55 | 77 | typename E1::value_type, |
narshu | 1:cc2a9eb0bd55 | 78 | typename E2::value_type |
narshu | 1:cc2a9eb0bd55 | 79 | >::value_type |
narshu | 1:cc2a9eb0bd55 | 80 | prod(const E1& lhs, const E2& rhs, std::size_t i, std::size_t j) { |
narshu | 1:cc2a9eb0bd55 | 81 | typename PromoteTraits< |
narshu | 1:cc2a9eb0bd55 | 82 | typename E1::value_type, |
narshu | 1:cc2a9eb0bd55 | 83 | typename E2::value_type |
narshu | 1:cc2a9eb0bd55 | 84 | >::value_type sum(0); |
narshu | 1:cc2a9eb0bd55 | 85 | std::size_t k(0); |
narshu | 1:cc2a9eb0bd55 | 86 | std::size_t n(N); |
narshu | 1:cc2a9eb0bd55 | 87 | |
narshu | 1:cc2a9eb0bd55 | 88 | // Duff's device |
narshu | 1:cc2a9eb0bd55 | 89 | switch(count % 8) { |
narshu | 1:cc2a9eb0bd55 | 90 | case 0: do { sum += lhs(i, k) * rhs(k, j); ++k; |
narshu | 1:cc2a9eb0bd55 | 91 | case 7: sum += lhs(i, k) * rhs(k, j); ++k; |
narshu | 1:cc2a9eb0bd55 | 92 | case 6: sum += lhs(i, k) * rhs(k, j); ++k; |
narshu | 1:cc2a9eb0bd55 | 93 | case 5: sum += lhs(i, k) * rhs(k, j); ++k; |
narshu | 1:cc2a9eb0bd55 | 94 | case 4: sum += lhs(i, k) * rhs(k, j); ++k; |
narshu | 1:cc2a9eb0bd55 | 95 | case 3: sum += lhs(i, k) * rhs(k, j); ++k; |
narshu | 1:cc2a9eb0bd55 | 96 | case 2: sum += lhs(i, k) * rhs(k, j); ++k; |
narshu | 1:cc2a9eb0bd55 | 97 | case 1: sum += lhs(i, k) * rhs(k, j); ++k; |
narshu | 1:cc2a9eb0bd55 | 98 | } while(--n != 0); |
narshu | 1:cc2a9eb0bd55 | 99 | } |
narshu | 1:cc2a9eb0bd55 | 100 | |
narshu | 1:cc2a9eb0bd55 | 101 | return sum; |
narshu | 1:cc2a9eb0bd55 | 102 | } |
narshu | 1:cc2a9eb0bd55 | 103 | }; |
narshu | 1:cc2a9eb0bd55 | 104 | |
narshu | 1:cc2a9eb0bd55 | 105 | |
narshu | 1:cc2a9eb0bd55 | 106 | } // namespace loop |
narshu | 1:cc2a9eb0bd55 | 107 | |
narshu | 1:cc2a9eb0bd55 | 108 | } // namespace tvmet |
narshu | 1:cc2a9eb0bd55 | 109 | |
narshu | 1:cc2a9eb0bd55 | 110 | #endif /* TVMET_LOOP_GEMM_H */ |
narshu | 1:cc2a9eb0bd55 | 111 | |
narshu | 1:cc2a9eb0bd55 | 112 | // Local Variables: |
narshu | 1:cc2a9eb0bd55 | 113 | // mode:C++ |
narshu | 1:cc2a9eb0bd55 | 114 | // tab-width:8 |
narshu | 1:cc2a9eb0bd55 | 115 | // End: |