Daniel Konegen / MNIST_example

Dependencies:   mbed-os

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cmplx_mag_squared_q10p6.c Source File

arm_cmplx_mag_squared_q10p6.c

00001 /* This file is a modification of the ARM CMSIS library file arm_cmplx_mag_squared_q15.c
00002  * We have retained the original copyright and header information, in
00003  * accordance with the Apache 2.0 license terms.
00004  */
00005 
00006 /* ----------------------------------------------------------------------
00007  * Project:      CMSIS DSP Library
00008  * Title:        arm_cmplx_mag_squared_q15.c
00009  * Description:  Q15 complex magnitude squared
00010  *
00011  * $Date:        27. January 2017
00012  * $Revision:    V.1.5.1
00013  *
00014  * Target Processor: Cortex-M cores
00015  * -------------------------------------------------------------------- */
00016 /*
00017  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00018  *
00019  * SPDX-License-Identifier: Apache-2.0
00020  *
00021  * Licensed under the Apache License, Version 2.0 (the License); you may
00022  * not use this file except in compliance with the License.
00023  * You may obtain a copy of the License at
00024  *
00025  * www.apache.org/licenses/LICENSE-2.0
00026  *
00027  * Unless required by applicable law or agreed to in writing, software
00028  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00029  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00030  * See the License for the specific language governing permissions and
00031  * limitations under the License.
00032  */
00033 
00034 #include "arm_math.h"
00035 
00036 /**
00037  * @ingroup groupCmplxMath
00038  */
00039 
00040 /**
00041  * @addtogroup cmplx_mag_squared
00042  * @{
00043  */
00044 
00045 /**
00046  * @brief  Q15 complex magnitude squared
00047  * @param  *pSrc points to the complex input vector
00048  * @param  *pDst points to the real output vector
00049  * @param  numSamples number of complex samples in the input vector
00050  * @return none.
00051  *
00052  * <b>Scaling and Overflow Behavior:</b>
00053  * \par
00054  * The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
00055  */
00056 
00057 void arm_cmplx_mag_squared_q10p6(
00058   q15_t * pSrc,
00059   q15_t * pDst,
00060   uint32_t numSamples)
00061 {
00062   q31_t acc0, acc1;                              /* Accumulators */
00063 
00064 #if defined (ARM_MATH_DSP)
00065 
00066   /* Run the below code for Cortex-M4 and Cortex-M3 */
00067   uint32_t blkCnt;                               /* loop counter */
00068   q31_t in1, in2, in3, in4;
00069   q31_t acc2, acc3;
00070 
00071   /*loop Unrolling */
00072   blkCnt = numSamples >> 2U;
00073 
00074   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
00075    ** a second loop below computes the remaining 1 to 3 samples. */
00076   while (blkCnt > 0U)
00077   {
00078     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
00079     in1 = *__SIMD32(pSrc)++;
00080     in2 = *__SIMD32(pSrc)++;
00081     in3 = *__SIMD32(pSrc)++;
00082     in4 = *__SIMD32(pSrc)++;
00083 
00084     acc0 = __SMUAD(in1, in1);
00085     acc1 = __SMUAD(in2, in2);
00086     acc2 = __SMUAD(in3, in3);
00087     acc3 = __SMUAD(in4, in4);
00088 
00089     /* store the result in 3.13 format in the destination buffer. */
00090     *pDst++ = (q15_t) (acc0 >> 6);
00091     *pDst++ = (q15_t) (acc1 >> 6);
00092     *pDst++ = (q15_t) (acc2 >> 6);
00093     *pDst++ = (q15_t) (acc3 >> 6);
00094 
00095     /* Decrement the loop counter */
00096     blkCnt--;
00097   }
00098 
00099   /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
00100    ** No loop unrolling is used. */
00101   blkCnt = numSamples % 0x4U;
00102 
00103   while (blkCnt > 0U)
00104   {
00105     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
00106     in1 = *__SIMD32(pSrc)++;
00107     acc0 = __SMUAD(in1, in1);
00108 
00109     /* store the result in 3.13 format in the destination buffer. */
00110     *pDst++ = (q15_t) (acc0 >> 6);
00111 
00112     /* Decrement the loop counter */
00113     blkCnt--;
00114   }
00115 
00116 #else
00117 
00118   /* Run the below code for Cortex-M0 */
00119   q15_t real, imag;                              /* Temporary variables to store real and imaginary values */
00120 
00121   while (numSamples > 0U)
00122   {
00123     /* out = ((real * real) + (imag * imag)) */
00124     real = *pSrc++;
00125     imag = *pSrc++;
00126     acc0 = (real * real);
00127     acc1 = (imag * imag);
00128     /* store the result in 3.13 format in the destination buffer. */
00129     *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 6);
00130 
00131     /* Decrement the loop counter */
00132     numSamples--;
00133   }
00134 
00135 #endif /* #if defined (ARM_MATH_DSP) */
00136 
00137 }
00138 
00139 /**
00140  * @} end of cmplx_mag_squared group
00141  */