Aded CMSIS5 DSP and NN folder. Needs some work

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_mat_cmplx_mult_q31.c Source File

arm_mat_cmplx_mult_q31.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_mat_cmplx_mult_q31.c
00004  * Description:  Floating-point matrix multiplication
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032  * @ingroup groupMatrix
00033  */
00034 
00035 /**
00036  * @addtogroup CmplxMatrixMult
00037  * @{
00038  */
00039 
00040 /**
00041  * @brief Q31 Complex matrix multiplication
00042  * @param[in]       *pSrcA points to the first input complex matrix structure
00043  * @param[in]       *pSrcB points to the second input complex matrix structure
00044  * @param[out]      *pDst points to output complex matrix structure
00045  * @return          The function returns either
00046  * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
00047  *
00048  * @details
00049  * <b>Scaling and Overflow Behavior:</b>
00050  *
00051  * \par
00052  * The function is implemented using an internal 64-bit accumulator.
00053  * The accumulator has a 2.62 format and maintains full precision of the intermediate
00054  * multiplication results but provides only a single guard bit. There is no saturation
00055  * on intermediate additions. Thus, if the accumulator overflows it wraps around and
00056  * distorts the result. The input signals should be scaled down to avoid intermediate
00057  * overflows. The input is thus scaled down by log2(numColsA) bits
00058  * to avoid overflows, as a total of numColsA additions are performed internally.
00059  * The 2.62 accumulator is right shifted by 31 bits and saturated to 1.31 format to yield the final result.
00060  *
00061  *
00062  */
00063 
00064 arm_status arm_mat_cmplx_mult_q31(
00065   const arm_matrix_instance_q31 * pSrcA,
00066   const arm_matrix_instance_q31 * pSrcB,
00067   arm_matrix_instance_q31 * pDst)
00068 {
00069   q31_t *pIn1 = pSrcA->pData;                    /* input data matrix pointer A */
00070   q31_t *pIn2 = pSrcB->pData;                    /* input data matrix pointer B */
00071   q31_t *pInA = pSrcA->pData;                    /* input data matrix pointer A  */
00072   q31_t *pOut = pDst->pData;                     /* output data matrix pointer */
00073   q31_t *px;                                     /* Temporary output data matrix pointer */
00074   uint16_t numRowsA = pSrcA->numRows;            /* number of rows of input matrix A */
00075   uint16_t numColsB = pSrcB->numCols;            /* number of columns of input matrix B */
00076   uint16_t numColsA = pSrcA->numCols;            /* number of columns of input matrix A */
00077   q63_t sumReal1, sumImag1;                      /* accumulator */
00078   q31_t a0, b0, c0, d0;
00079   q31_t a1, b1, c1, d1;
00080 
00081 
00082   /* Run the below code for Cortex-M4 and Cortex-M3 */
00083 
00084   uint16_t col, i = 0U, j, row = numRowsA, colCnt;      /* loop counters */
00085   arm_status status;                             /* status of matrix multiplication */
00086 
00087 #ifdef ARM_MATH_MATRIX_CHECK
00088 
00089 
00090   /* Check for matrix mismatch condition */
00091   if ((pSrcA->numCols != pSrcB->numRows) ||
00092      (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols))
00093   {
00094 
00095     /* Set status as ARM_MATH_SIZE_MISMATCH */
00096     status = ARM_MATH_SIZE_MISMATCH;
00097   }
00098   else
00099 #endif /*      #ifdef ARM_MATH_MATRIX_CHECK    */
00100 
00101   {
00102     /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */
00103     /* row loop */
00104     do
00105     {
00106       /* Output pointer is set to starting address of the row being processed */
00107       px = pOut + 2 * i;
00108 
00109       /* For every row wise process, the column loop counter is to be initiated */
00110       col = numColsB;
00111 
00112       /* For every row wise process, the pIn2 pointer is set
00113        ** to the starting address of the pSrcB data */
00114       pIn2 = pSrcB->pData;
00115 
00116       j = 0U;
00117 
00118       /* column loop */
00119       do
00120       {
00121         /* Set the variable sum, that acts as accumulator, to zero */
00122         sumReal1 = 0.0;
00123         sumImag1 = 0.0;
00124 
00125         /* Initiate the pointer pIn1 to point to the starting address of the column being processed */
00126         pIn1 = pInA;
00127 
00128         /* Apply loop unrolling and compute 4 MACs simultaneously. */
00129         colCnt = numColsA >> 2;
00130 
00131         /* matrix multiplication        */
00132         while (colCnt > 0U)
00133         {
00134 
00135           /* Reading real part of complex matrix A */
00136           a0 = *pIn1;
00137 
00138           /* Reading real part of complex matrix B */
00139           c0 = *pIn2;
00140 
00141           /* Reading imaginary part of complex matrix A */
00142           b0 = *(pIn1 + 1U);
00143 
00144           /* Reading imaginary part of complex matrix B */
00145           d0 = *(pIn2 + 1U);
00146 
00147           /* Multiply and Accumlates */
00148           sumReal1 += (q63_t) a0 *c0;
00149           sumImag1 += (q63_t) b0 *c0;
00150 
00151           /* update pointers */
00152           pIn1 += 2U;
00153           pIn2 += 2 * numColsB;
00154 
00155           /* Multiply and Accumlates */
00156           sumReal1 -= (q63_t) b0 *d0;
00157           sumImag1 += (q63_t) a0 *d0;
00158 
00159           /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
00160 
00161           /* read real and imag values from pSrcA and pSrcB buffer */
00162           a1 = *pIn1;
00163           c1 = *pIn2;
00164           b1 = *(pIn1 + 1U);
00165           d1 = *(pIn2 + 1U);
00166 
00167           /* Multiply and Accumlates */
00168           sumReal1 += (q63_t) a1 *c1;
00169           sumImag1 += (q63_t) b1 *c1;
00170 
00171           /* update pointers */
00172           pIn1 += 2U;
00173           pIn2 += 2 * numColsB;
00174 
00175           /* Multiply and Accumlates */
00176           sumReal1 -= (q63_t) b1 *d1;
00177           sumImag1 += (q63_t) a1 *d1;
00178 
00179           a0 = *pIn1;
00180           c0 = *pIn2;
00181 
00182           b0 = *(pIn1 + 1U);
00183           d0 = *(pIn2 + 1U);
00184 
00185           /* Multiply and Accumlates */
00186           sumReal1 += (q63_t) a0 *c0;
00187           sumImag1 += (q63_t) b0 *c0;
00188 
00189           /* update pointers */
00190           pIn1 += 2U;
00191           pIn2 += 2 * numColsB;
00192 
00193           /* Multiply and Accumlates */
00194           sumReal1 -= (q63_t) b0 *d0;
00195           sumImag1 += (q63_t) a0 *d0;
00196 
00197           /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
00198 
00199           a1 = *pIn1;
00200           c1 = *pIn2;
00201 
00202           b1 = *(pIn1 + 1U);
00203           d1 = *(pIn2 + 1U);
00204 
00205           /* Multiply and Accumlates */
00206           sumReal1 += (q63_t) a1 *c1;
00207           sumImag1 += (q63_t) b1 *c1;
00208 
00209           /* update pointers */
00210           pIn1 += 2U;
00211           pIn2 += 2 * numColsB;
00212 
00213           /* Multiply and Accumlates */
00214           sumReal1 -= (q63_t) b1 *d1;
00215           sumImag1 += (q63_t) a1 *d1;
00216 
00217           /* Decrement the loop count */
00218           colCnt--;
00219         }
00220 
00221         /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
00222          ** No loop unrolling is used. */
00223         colCnt = numColsA % 0x4U;
00224 
00225         while (colCnt > 0U)
00226         {
00227           /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
00228           a1 = *pIn1;
00229           c1 = *pIn2;
00230 
00231           b1 = *(pIn1 + 1U);
00232           d1 = *(pIn2 + 1U);
00233 
00234           /* Multiply and Accumlates */
00235           sumReal1 += (q63_t) a1 *c1;
00236           sumImag1 += (q63_t) b1 *c1;
00237 
00238           /* update pointers */
00239           pIn1 += 2U;
00240           pIn2 += 2 * numColsB;
00241 
00242           /* Multiply and Accumlates */
00243           sumReal1 -= (q63_t) b1 *d1;
00244           sumImag1 += (q63_t) a1 *d1;
00245 
00246           /* Decrement the loop counter */
00247           colCnt--;
00248         }
00249 
00250         /* Store the result in the destination buffer */
00251         *px++ = (q31_t) clip_q63_to_q31(sumReal1 >> 31);
00252         *px++ = (q31_t) clip_q63_to_q31(sumImag1 >> 31);
00253 
00254         /* Update the pointer pIn2 to point to the  starting address of the next column */
00255         j++;
00256         pIn2 = pSrcB->pData + 2U * j;
00257 
00258         /* Decrement the column loop counter */
00259         col--;
00260 
00261       } while (col > 0U);
00262 
00263       /* Update the pointer pInA to point to the  starting address of the next row */
00264       i = i + numColsB;
00265       pInA = pInA + 2 * numColsA;
00266 
00267       /* Decrement the row loop counter */
00268       row--;
00269 
00270     } while (row > 0U);
00271 
00272     /* Set status as ARM_MATH_SUCCESS */
00273     status = ARM_MATH_SUCCESS;
00274   }
00275 
00276   /* Return to application */
00277   return (status);
00278 }
00279 
00280 /**
00281  * @} end of MatrixMult group
00282  */
00283