Aded CMSIS5 DSP and NN folder. Needs some work

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_mat_cmplx_mult_f32.c Source File

arm_mat_cmplx_mult_f32.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_mat_cmplx_mult_f32.c
00004  * Description:  Floating-point matrix multiplication
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032  * @ingroup groupMatrix
00033  */
00034 
00035 /**
00036  * @defgroup CmplxMatrixMult  Complex Matrix Multiplication
00037  *
00038  * Complex Matrix multiplication is only defined if the number of columns of the
00039  * first matrix equals the number of rows of the second matrix.
00040  * Multiplying an <code>M x N</code> matrix with an <code>N x P</code> matrix results
00041  * in an <code>M x P</code> matrix.
00042  * When matrix size checking is enabled, the functions check: (1) that the inner dimensions of
00043  * <code>pSrcA</code> and <code>pSrcB</code> are equal; and (2) that the size of the output
00044  * matrix equals the outer dimensions of <code>pSrcA</code> and <code>pSrcB</code>.
00045  */
00046 
00047 
00048 /**
00049  * @addtogroup CmplxMatrixMult
00050  * @{
00051  */
00052 
00053 /**
00054  * @brief Floating-point Complex matrix multiplication.
00055  * @param[in]       *pSrcA points to the first input complex matrix structure
00056  * @param[in]       *pSrcB points to the second input complex matrix structure
00057  * @param[out]      *pDst points to output complex matrix structure
00058  * @return          The function returns either
00059  * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
00060  */
00061 
00062 arm_status arm_mat_cmplx_mult_f32(
00063   const arm_matrix_instance_f32 * pSrcA,
00064   const arm_matrix_instance_f32 * pSrcB,
00065   arm_matrix_instance_f32 * pDst)
00066 {
00067   float32_t *pIn1 = pSrcA->pData;                /* input data matrix pointer A */
00068   float32_t *pIn2 = pSrcB->pData;                /* input data matrix pointer B */
00069   float32_t *pInA = pSrcA->pData;                /* input data matrix pointer A  */
00070   float32_t *pOut = pDst->pData;                 /* output data matrix pointer */
00071   float32_t *px;                                 /* Temporary output data matrix pointer */
00072   uint16_t numRowsA = pSrcA->numRows;            /* number of rows of input matrix A */
00073   uint16_t numColsB = pSrcB->numCols;            /* number of columns of input matrix B */
00074   uint16_t numColsA = pSrcA->numCols;            /* number of columns of input matrix A */
00075   float32_t sumReal1, sumImag1;                  /* accumulator */
00076   float32_t a0, b0, c0, d0;
00077   float32_t a1, b1, c1, d1;
00078   float32_t sumReal2, sumImag2;                  /* accumulator */
00079 
00080 
00081   /* Run the below code for Cortex-M4 and Cortex-M3 */
00082 
00083   uint16_t col, i = 0U, j, row = numRowsA, colCnt;      /* loop counters */
00084   arm_status status;                             /* status of matrix multiplication */
00085 
00086 #ifdef ARM_MATH_MATRIX_CHECK
00087 
00088 
00089   /* Check for matrix mismatch condition */
00090   if ((pSrcA->numCols != pSrcB->numRows) ||
00091      (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols))
00092   {
00093 
00094     /* Set status as ARM_MATH_SIZE_MISMATCH */
00095     status = ARM_MATH_SIZE_MISMATCH;
00096   }
00097   else
00098 #endif /*      #ifdef ARM_MATH_MATRIX_CHECK    */
00099 
00100   {
00101     /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */
00102     /* row loop */
00103     do
00104     {
00105       /* Output pointer is set to starting address of the row being processed */
00106       px = pOut + 2 * i;
00107 
00108       /* For every row wise process, the column loop counter is to be initiated */
00109       col = numColsB;
00110 
00111       /* For every row wise process, the pIn2 pointer is set
00112        ** to the starting address of the pSrcB data */
00113       pIn2 = pSrcB->pData;
00114 
00115       j = 0U;
00116 
00117       /* column loop */
00118       do
00119       {
00120         /* Set the variable sum, that acts as accumulator, to zero */
00121         sumReal1 = 0.0f;
00122         sumImag1 = 0.0f;
00123 
00124         sumReal2 = 0.0f;
00125         sumImag2 = 0.0f;
00126 
00127         /* Initiate the pointer pIn1 to point to the starting address of the column being processed */
00128         pIn1 = pInA;
00129 
00130         /* Apply loop unrolling and compute 4 MACs simultaneously. */
00131         colCnt = numColsA >> 2;
00132 
00133         /* matrix multiplication        */
00134         while (colCnt > 0U)
00135         {
00136 
00137           /* Reading real part of complex matrix A */
00138           a0 = *pIn1;
00139 
00140           /* Reading real part of complex matrix B */
00141           c0 = *pIn2;
00142 
00143           /* Reading imaginary part of complex matrix A */
00144           b0 = *(pIn1 + 1U);
00145 
00146           /* Reading imaginary part of complex matrix B */
00147           d0 = *(pIn2 + 1U);
00148 
00149           sumReal1 += a0 * c0;
00150           sumImag1 += b0 * c0;
00151 
00152           pIn1 += 2U;
00153           pIn2 += 2 * numColsB;
00154 
00155           sumReal2 -= b0 * d0;
00156           sumImag2 += a0 * d0;
00157 
00158           /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
00159 
00160           a1 = *pIn1;
00161           c1 = *pIn2;
00162 
00163           b1 = *(pIn1 + 1U);
00164           d1 = *(pIn2 + 1U);
00165 
00166           sumReal1 += a1 * c1;
00167           sumImag1 += b1 * c1;
00168 
00169           pIn1 += 2U;
00170           pIn2 += 2 * numColsB;
00171 
00172           sumReal2 -= b1 * d1;
00173           sumImag2 += a1 * d1;
00174 
00175           a0 = *pIn1;
00176           c0 = *pIn2;
00177 
00178           b0 = *(pIn1 + 1U);
00179           d0 = *(pIn2 + 1U);
00180 
00181           sumReal1 += a0 * c0;
00182           sumImag1 += b0 * c0;
00183 
00184           pIn1 += 2U;
00185           pIn2 += 2 * numColsB;
00186 
00187           sumReal2 -= b0 * d0;
00188           sumImag2 += a0 * d0;
00189 
00190           /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
00191 
00192           a1 = *pIn1;
00193           c1 = *pIn2;
00194 
00195           b1 = *(pIn1 + 1U);
00196           d1 = *(pIn2 + 1U);
00197 
00198           sumReal1 += a1 * c1;
00199           sumImag1 += b1 * c1;
00200 
00201           pIn1 += 2U;
00202           pIn2 += 2 * numColsB;
00203 
00204           sumReal2 -= b1 * d1;
00205           sumImag2 += a1 * d1;
00206 
00207           /* Decrement the loop count */
00208           colCnt--;
00209         }
00210 
00211         /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here.
00212          ** No loop unrolling is used. */
00213         colCnt = numColsA % 0x4U;
00214 
00215         while (colCnt > 0U)
00216         {
00217           /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */
00218           a1 = *pIn1;
00219           c1 = *pIn2;
00220 
00221           b1 = *(pIn1 + 1U);
00222           d1 = *(pIn2 + 1U);
00223 
00224           sumReal1 += a1 * c1;
00225           sumImag1 += b1 * c1;
00226 
00227           pIn1 += 2U;
00228           pIn2 += 2 * numColsB;
00229 
00230           sumReal2 -= b1 * d1;
00231           sumImag2 += a1 * d1;
00232 
00233           /* Decrement the loop counter */
00234           colCnt--;
00235         }
00236 
00237         sumReal1 += sumReal2;
00238         sumImag1 += sumImag2;
00239 
00240         /* Store the result in the destination buffer */
00241         *px++ = sumReal1;
00242         *px++ = sumImag1;
00243 
00244         /* Update the pointer pIn2 to point to the  starting address of the next column */
00245         j++;
00246         pIn2 = pSrcB->pData + 2U * j;
00247 
00248         /* Decrement the column loop counter */
00249         col--;
00250 
00251       } while (col > 0U);
00252 
00253       /* Update the pointer pInA to point to the  starting address of the next row */
00254       i = i + numColsB;
00255       pInA = pInA + 2 * numColsA;
00256 
00257       /* Decrement the row loop counter */
00258       row--;
00259 
00260     } while (row > 0U);
00261 
00262     /* Set status as ARM_MATH_SUCCESS */
00263     status = ARM_MATH_SUCCESS;
00264   }
00265 
00266   /* Return to application */
00267   return (status);
00268 }
00269 
00270 /**
00271  * @} end of MatrixMult group
00272  */
00273