CMSIS DSP library

Dependents:   performance_timer Surfboard_ gps2rtty Capstone ... more

Legacy Warning

This is an mbed 2 library. To learn more about mbed OS 5, visit the docs.

Committer:
mbed_official
Date:
Fri Nov 20 08:45:18 2015 +0000
Revision:
5:3762170b6d4d
Synchronized with git revision 2eb940b9a73af188d3004a2575fdfbb05febe62b

Full URL: https://github.com/mbedmicro/mbed/commit/2eb940b9a73af188d3004a2575fdfbb05febe62b/

Added option to build rpc library. closes #1426

Who changed what in which revision?

UserRevisionLine numberNew contents of line
mbed_official 5:3762170b6d4d 1 /* ----------------------------------------------------------------------
mbed_official 5:3762170b6d4d 2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
mbed_official 5:3762170b6d4d 3 *
mbed_official 5:3762170b6d4d 4 * $Date: 19. March 2015
mbed_official 5:3762170b6d4d 5 * $Revision: V.1.4.5
mbed_official 5:3762170b6d4d 6 *
mbed_official 5:3762170b6d4d 7 * Project: CMSIS DSP Library
mbed_official 5:3762170b6d4d 8 * Title: arm_mat_cmplx_mult_q31.c
mbed_official 5:3762170b6d4d 9 *
mbed_official 5:3762170b6d4d 10 * Description: Floating-point matrix multiplication.
mbed_official 5:3762170b6d4d 11 *
mbed_official 5:3762170b6d4d 12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
mbed_official 5:3762170b6d4d 13 *
mbed_official 5:3762170b6d4d 14 * Redistribution and use in source and binary forms, with or without
mbed_official 5:3762170b6d4d 15 * modification, are permitted provided that the following conditions
mbed_official 5:3762170b6d4d 16 * are met:
mbed_official 5:3762170b6d4d 17 * - Redistributions of source code must retain the above copyright
mbed_official 5:3762170b6d4d 18 * notice, this list of conditions and the following disclaimer.
mbed_official 5:3762170b6d4d 19 * - Redistributions in binary form must reproduce the above copyright
mbed_official 5:3762170b6d4d 20 * notice, this list of conditions and the following disclaimer in
mbed_official 5:3762170b6d4d 21 * the documentation and/or other materials provided with the
mbed_official 5:3762170b6d4d 22 * distribution.
mbed_official 5:3762170b6d4d 23 * - Neither the name of ARM LIMITED nor the names of its contributors
mbed_official 5:3762170b6d4d 24 * may be used to endorse or promote products derived from this
mbed_official 5:3762170b6d4d 25 * software without specific prior written permission.
mbed_official 5:3762170b6d4d 26 *
mbed_official 5:3762170b6d4d 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
mbed_official 5:3762170b6d4d 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
mbed_official 5:3762170b6d4d 29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
mbed_official 5:3762170b6d4d 30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
mbed_official 5:3762170b6d4d 31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
mbed_official 5:3762170b6d4d 32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
mbed_official 5:3762170b6d4d 33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
mbed_official 5:3762170b6d4d 34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
mbed_official 5:3762170b6d4d 35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
mbed_official 5:3762170b6d4d 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
mbed_official 5:3762170b6d4d 37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
mbed_official 5:3762170b6d4d 38 * POSSIBILITY OF SUCH DAMAGE.
mbed_official 5:3762170b6d4d 39 * -------------------------------------------------------------------- */
mbed_official 5:3762170b6d4d 40 #include "arm_math.h"
mbed_official 5:3762170b6d4d 41
mbed_official 5:3762170b6d4d 42 /**
mbed_official 5:3762170b6d4d 43 * @ingroup groupMatrix
mbed_official 5:3762170b6d4d 44 */
mbed_official 5:3762170b6d4d 45
mbed_official 5:3762170b6d4d 46 /**
mbed_official 5:3762170b6d4d 47 * @addtogroup CmplxMatrixMult
mbed_official 5:3762170b6d4d 48 * @{
mbed_official 5:3762170b6d4d 49 */
mbed_official 5:3762170b6d4d 50
mbed_official 5:3762170b6d4d 51 /**
mbed_official 5:3762170b6d4d 52 * @brief Q31 Complex matrix multiplication
mbed_official 5:3762170b6d4d 53 * @param[in] *pSrcA points to the first input complex matrix structure
mbed_official 5:3762170b6d4d 54 * @param[in] *pSrcB points to the second input complex matrix structure
mbed_official 5:3762170b6d4d 55 * @param[out] *pDst points to output complex matrix structure
mbed_official 5:3762170b6d4d 56 * @return The function returns either
mbed_official 5:3762170b6d4d 57 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
mbed_official 5:3762170b6d4d 58 *
mbed_official 5:3762170b6d4d 59 * @details
mbed_official 5:3762170b6d4d 60 * <b>Scaling and Overflow Behavior:</b>
mbed_official 5:3762170b6d4d 61 *
mbed_official 5:3762170b6d4d 62 * \par
mbed_official 5:3762170b6d4d 63 * The function is implemented using an internal 64-bit accumulator.
mbed_official 5:3762170b6d4d 64 * The accumulator has a 2.62 format and maintains full precision of the intermediate
mbed_official 5:3762170b6d4d 65 * multiplication results but provides only a single guard bit. There is no saturation
mbed_official 5:3762170b6d4d 66 * on intermediate additions. Thus, if the accumulator overflows it wraps around and
mbed_official 5:3762170b6d4d 67 * distorts the result. The input signals should be scaled down to avoid intermediate
mbed_official 5:3762170b6d4d 68 * overflows. The input is thus scaled down by log2(numColsA) bits
mbed_official 5:3762170b6d4d 69 * to avoid overflows, as a total of numColsA additions are performed internally.
mbed_official 5:3762170b6d4d 70 * The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result.
mbed_official 5:3762170b6d4d 71 *
mbed_official 5:3762170b6d4d 72 *
mbed_official 5:3762170b6d4d 73 */
mbed_official 5:3762170b6d4d 74
mbed_official 5:3762170b6d4d 75 arm_status arm_mat_cmplx_mult_q31(
mbed_official 5:3762170b6d4d 76 const arm_matrix_instance_q31 * pSrcA,
mbed_official 5:3762170b6d4d 77 const arm_matrix_instance_q31 * pSrcB,
mbed_official 5:3762170b6d4d 78 arm_matrix_instance_q31 * pDst)
mbed_official 5:3762170b6d4d 79 {
mbed_official 5:3762170b6d4d 80 q31_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */
mbed_official 5:3762170b6d4d 81 q31_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */
mbed_official 5:3762170b6d4d 82 q31_t *pInA = pSrcA->pData; /* input data matrix pointer A */
mbed_official 5:3762170b6d4d 83 q31_t *pOut = pDst->pData; /* output data matrix pointer */
mbed_official 5:3762170b6d4d 84 q31_t *px; /* Temporary output data matrix pointer */
mbed_official 5:3762170b6d4d 85 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */
mbed_official 5:3762170b6d4d 86 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */
mbed_official 5:3762170b6d4d 87 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */
mbed_official 5:3762170b6d4d 88 q63_t sumReal1, sumImag1; /* accumulator */
mbed_official 5:3762170b6d4d 89 q31_t a0, b0, c0, d0;
mbed_official 5:3762170b6d4d 90 q31_t a1, b1, c1, d1;
mbed_official 5:3762170b6d4d 91
mbed_official 5:3762170b6d4d 92
mbed_official 5:3762170b6d4d 93 /* Run the below code for Cortex-M4 and Cortex-M3 */
mbed_official 5:3762170b6d4d 94
mbed_official 5:3762170b6d4d 95 uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */
mbed_official 5:3762170b6d4d 96 arm_status status; /* status of matrix multiplication */
mbed_official 5:3762170b6d4d 97
mbed_official 5:3762170b6d4d 98 #ifdef ARM_MATH_MATRIX_CHECK
mbed_official 5:3762170b6d4d 99
mbed_official 5:3762170b6d4d 100
mbed_official 5:3762170b6d4d 101 /* Check for matrix mismatch condition */
mbed_official 5:3762170b6d4d 102 if((pSrcA->numCols != pSrcB->numRows) ||
mbed_official 5:3762170b6d4d 103 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols))
mbed_official 5:3762170b6d4d 104 {
mbed_official 5:3762170b6d4d 105
mbed_official 5:3762170b6d4d 106 /* Set status as ARM_MATH_SIZE_MISMATCH */
mbed_official 5:3762170b6d4d 107 status = ARM_MATH_SIZE_MISMATCH;
mbed_official 5:3762170b6d4d 108 }
mbed_official 5:3762170b6d4d 109 else
mbed_official 5:3762170b6d4d 110 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
mbed_official 5:3762170b6d4d 111
mbed_official 5:3762170b6d4d 112 {
mbed_official 5:3762170b6d4d 113 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */
mbed_official 5:3762170b6d4d 114 /* row loop */
mbed_official 5:3762170b6d4d 115 do
mbed_official 5:3762170b6d4d 116 {
mbed_official 5:3762170b6d4d 117 /* Output pointer is set to starting address of the row being processed */
mbed_official 5:3762170b6d4d 118 px = pOut + 2 * i;
mbed_official 5:3762170b6d4d 119
mbed_official 5:3762170b6d4d 120 /* For every row wise process, the column loop counter is to be initiated */
mbed_official 5:3762170b6d4d 121 col = numColsB;
mbed_official 5:3762170b6d4d 122
mbed_official 5:3762170b6d4d 123 /* For every row wise process, the pIn2 pointer is set
mbed_official 5:3762170b6d4d 124 ** to the starting address of the pSrcB data */
mbed_official 5:3762170b6d4d 125 pIn2 = pSrcB->pData;
mbed_official 5:3762170b6d4d 126
mbed_official 5:3762170b6d4d 127 j = 0u;
mbed_official 5:3762170b6d4d 128
mbed_official 5:3762170b6d4d 129 /* column loop */
mbed_official 5:3762170b6d4d 130 do
mbed_official 5:3762170b6d4d 131 {
mbed_official 5:3762170b6d4d 132 /* Set the variable sum, that acts as accumulator, to zero */
mbed_official 5:3762170b6d4d 133 sumReal1 = 0.0;
mbed_official 5:3762170b6d4d 134 sumImag1 = 0.0;
mbed_official 5:3762170b6d4d 135
mbed_official 5:3762170b6d4d 136 /* Initiate the pointer pIn1 to point to the starting address of the column being processed */
mbed_official 5:3762170b6d4d 137 pIn1 = pInA;
mbed_official 5:3762170b6d4d 138
mbed_official 5:3762170b6d4d 139 /* Apply loop unrolling and compute 4 MACs simultaneously. */
mbed_official 5:3762170b6d4d 140 colCnt = numColsA >> 2;
mbed_official 5:3762170b6d4d 141
mbed_official 5:3762170b6d4d 142 /* matrix multiplication */
mbed_official 5:3762170b6d4d 143 while(colCnt > 0u)
mbed_official 5:3762170b6d4d 144 {
mbed_official 5:3762170b6d4d 145
mbed_official 5:3762170b6d4d 146 /* Reading real part of complex matrix A */
mbed_official 5:3762170b6d4d 147 a0 = *pIn1;
mbed_official 5:3762170b6d4d 148
mbed_official 5:3762170b6d4d 149 /* Reading real part of complex matrix B */
mbed_official 5:3762170b6d4d 150 c0 = *pIn2;
mbed_official 5:3762170b6d4d 151
mbed_official 5:3762170b6d4d 152 /* Reading imaginary part of complex matrix A */
mbed_official 5:3762170b6d4d 153 b0 = *(pIn1 + 1u);
mbed_official 5:3762170b6d4d 154
mbed_official 5:3762170b6d4d 155 /* Reading imaginary part of complex matrix B */
mbed_official 5:3762170b6d4d 156 d0 = *(pIn2 + 1u);
mbed_official 5:3762170b6d4d 157
mbed_official 5:3762170b6d4d 158 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 159 sumReal1 += (q63_t) a0 *c0;
mbed_official 5:3762170b6d4d 160 sumImag1 += (q63_t) b0 *c0;
mbed_official 5:3762170b6d4d 161
mbed_official 5:3762170b6d4d 162 /* update pointers */
mbed_official 5:3762170b6d4d 163 pIn1 += 2u;
mbed_official 5:3762170b6d4d 164 pIn2 += 2 * numColsB;
mbed_official 5:3762170b6d4d 165
mbed_official 5:3762170b6d4d 166 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 167 sumReal1 -= (q63_t) b0 *d0;
mbed_official 5:3762170b6d4d 168 sumImag1 += (q63_t) a0 *d0;
mbed_official 5:3762170b6d4d 169
mbed_official 5:3762170b6d4d 170 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
mbed_official 5:3762170b6d4d 171
mbed_official 5:3762170b6d4d 172 /* read real and imag values from pSrcA and pSrcB buffer */
mbed_official 5:3762170b6d4d 173 a1 = *pIn1;
mbed_official 5:3762170b6d4d 174 c1 = *pIn2;
mbed_official 5:3762170b6d4d 175 b1 = *(pIn1 + 1u);
mbed_official 5:3762170b6d4d 176 d1 = *(pIn2 + 1u);
mbed_official 5:3762170b6d4d 177
mbed_official 5:3762170b6d4d 178 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 179 sumReal1 += (q63_t) a1 *c1;
mbed_official 5:3762170b6d4d 180 sumImag1 += (q63_t) b1 *c1;
mbed_official 5:3762170b6d4d 181
mbed_official 5:3762170b6d4d 182 /* update pointers */
mbed_official 5:3762170b6d4d 183 pIn1 += 2u;
mbed_official 5:3762170b6d4d 184 pIn2 += 2 * numColsB;
mbed_official 5:3762170b6d4d 185
mbed_official 5:3762170b6d4d 186 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 187 sumReal1 -= (q63_t) b1 *d1;
mbed_official 5:3762170b6d4d 188 sumImag1 += (q63_t) a1 *d1;
mbed_official 5:3762170b6d4d 189
mbed_official 5:3762170b6d4d 190 a0 = *pIn1;
mbed_official 5:3762170b6d4d 191 c0 = *pIn2;
mbed_official 5:3762170b6d4d 192
mbed_official 5:3762170b6d4d 193 b0 = *(pIn1 + 1u);
mbed_official 5:3762170b6d4d 194 d0 = *(pIn2 + 1u);
mbed_official 5:3762170b6d4d 195
mbed_official 5:3762170b6d4d 196 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 197 sumReal1 += (q63_t) a0 *c0;
mbed_official 5:3762170b6d4d 198 sumImag1 += (q63_t) b0 *c0;
mbed_official 5:3762170b6d4d 199
mbed_official 5:3762170b6d4d 200 /* update pointers */
mbed_official 5:3762170b6d4d 201 pIn1 += 2u;
mbed_official 5:3762170b6d4d 202 pIn2 += 2 * numColsB;
mbed_official 5:3762170b6d4d 203
mbed_official 5:3762170b6d4d 204 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 205 sumReal1 -= (q63_t) b0 *d0;
mbed_official 5:3762170b6d4d 206 sumImag1 += (q63_t) a0 *d0;
mbed_official 5:3762170b6d4d 207
mbed_official 5:3762170b6d4d 208 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
mbed_official 5:3762170b6d4d 209
mbed_official 5:3762170b6d4d 210 a1 = *pIn1;
mbed_official 5:3762170b6d4d 211 c1 = *pIn2;
mbed_official 5:3762170b6d4d 212
mbed_official 5:3762170b6d4d 213 b1 = *(pIn1 + 1u);
mbed_official 5:3762170b6d4d 214 d1 = *(pIn2 + 1u);
mbed_official 5:3762170b6d4d 215
mbed_official 5:3762170b6d4d 216 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 217 sumReal1 += (q63_t) a1 *c1;
mbed_official 5:3762170b6d4d 218 sumImag1 += (q63_t) b1 *c1;
mbed_official 5:3762170b6d4d 219
mbed_official 5:3762170b6d4d 220 /* update pointers */
mbed_official 5:3762170b6d4d 221 pIn1 += 2u;
mbed_official 5:3762170b6d4d 222 pIn2 += 2 * numColsB;
mbed_official 5:3762170b6d4d 223
mbed_official 5:3762170b6d4d 224 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 225 sumReal1 -= (q63_t) b1 *d1;
mbed_official 5:3762170b6d4d 226 sumImag1 += (q63_t) a1 *d1;
mbed_official 5:3762170b6d4d 227
mbed_official 5:3762170b6d4d 228 /* Decrement the loop count */
mbed_official 5:3762170b6d4d 229 colCnt--;
mbed_official 5:3762170b6d4d 230 }
mbed_official 5:3762170b6d4d 231
mbed_official 5:3762170b6d4d 232 /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
mbed_official 5:3762170b6d4d 233 ** No loop unrolling is used. */
mbed_official 5:3762170b6d4d 234 colCnt = numColsA % 0x4u;
mbed_official 5:3762170b6d4d 235
mbed_official 5:3762170b6d4d 236 while(colCnt > 0u)
mbed_official 5:3762170b6d4d 237 {
mbed_official 5:3762170b6d4d 238 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
mbed_official 5:3762170b6d4d 239 a1 = *pIn1;
mbed_official 5:3762170b6d4d 240 c1 = *pIn2;
mbed_official 5:3762170b6d4d 241
mbed_official 5:3762170b6d4d 242 b1 = *(pIn1 + 1u);
mbed_official 5:3762170b6d4d 243 d1 = *(pIn2 + 1u);
mbed_official 5:3762170b6d4d 244
mbed_official 5:3762170b6d4d 245 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 246 sumReal1 += (q63_t) a1 *c1;
mbed_official 5:3762170b6d4d 247 sumImag1 += (q63_t) b1 *c1;
mbed_official 5:3762170b6d4d 248
mbed_official 5:3762170b6d4d 249 /* update pointers */
mbed_official 5:3762170b6d4d 250 pIn1 += 2u;
mbed_official 5:3762170b6d4d 251 pIn2 += 2 * numColsB;
mbed_official 5:3762170b6d4d 252
mbed_official 5:3762170b6d4d 253 /* Multiply and Accumlates */
mbed_official 5:3762170b6d4d 254 sumReal1 -= (q63_t) b1 *d1;
mbed_official 5:3762170b6d4d 255 sumImag1 += (q63_t) a1 *d1;
mbed_official 5:3762170b6d4d 256
mbed_official 5:3762170b6d4d 257 /* Decrement the loop counter */
mbed_official 5:3762170b6d4d 258 colCnt--;
mbed_official 5:3762170b6d4d 259 }
mbed_official 5:3762170b6d4d 260
mbed_official 5:3762170b6d4d 261 /* Store the result in the destination buffer */
mbed_official 5:3762170b6d4d 262 *px++ = (q31_t) clip_q63_to_q31(sumReal1 >> 31);
mbed_official 5:3762170b6d4d 263 *px++ = (q31_t) clip_q63_to_q31(sumImag1 >> 31);
mbed_official 5:3762170b6d4d 264
mbed_official 5:3762170b6d4d 265 /* Update the pointer pIn2 to point to the starting address of the next column */
mbed_official 5:3762170b6d4d 266 j++;
mbed_official 5:3762170b6d4d 267 pIn2 = pSrcB->pData + 2u * j;
mbed_official 5:3762170b6d4d 268
mbed_official 5:3762170b6d4d 269 /* Decrement the column loop counter */
mbed_official 5:3762170b6d4d 270 col--;
mbed_official 5:3762170b6d4d 271
mbed_official 5:3762170b6d4d 272 } while(col > 0u);
mbed_official 5:3762170b6d4d 273
mbed_official 5:3762170b6d4d 274 /* Update the pointer pInA to point to the starting address of the next row */
mbed_official 5:3762170b6d4d 275 i = i + numColsB;
mbed_official 5:3762170b6d4d 276 pInA = pInA + 2 * numColsA;
mbed_official 5:3762170b6d4d 277
mbed_official 5:3762170b6d4d 278 /* Decrement the row loop counter */
mbed_official 5:3762170b6d4d 279 row--;
mbed_official 5:3762170b6d4d 280
mbed_official 5:3762170b6d4d 281 } while(row > 0u);
mbed_official 5:3762170b6d4d 282
mbed_official 5:3762170b6d4d 283 /* Set status as ARM_MATH_SUCCESS */
mbed_official 5:3762170b6d4d 284 status = ARM_MATH_SUCCESS;
mbed_official 5:3762170b6d4d 285 }
mbed_official 5:3762170b6d4d 286
mbed_official 5:3762170b6d4d 287 /* Return to application */
mbed_official 5:3762170b6d4d 288 return (status);
mbed_official 5:3762170b6d4d 289 }
mbed_official 5:3762170b6d4d 290
mbed_official 5:3762170b6d4d 291 /**
mbed_official 5:3762170b6d4d 292 * @} end of MatrixMult group
mbed_official 5:3762170b6d4d 293 */