Robert Lopez / CMSIS5
Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cmplx_mult_cmplx_q31.c Source File

arm_cmplx_mult_cmplx_q31.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_cmplx_mult_cmplx_q31.c
00004  * Description:  Q31 complex-by-complex multiplication
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032  * @ingroup groupCmplxMath
00033  */
00034 
00035 /**
00036  * @addtogroup CmplxByCmplxMult
00037  * @{
00038  */
00039 
00040 
00041 /**
00042  * @brief  Q31 complex-by-complex multiplication
00043  * @param[in]  *pSrcA points to the first input vector
00044  * @param[in]  *pSrcB points to the second input vector
00045  * @param[out]  *pDst  points to the output vector
00046  * @param[in]  numSamples number of complex samples in each vector
00047  * @return none.
00048  *
00049  * <b>Scaling and Overflow Behavior:</b>
00050  * \par
00051  * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
00052  * Input down scaling is not required.
00053  */
00054 
00055 void arm_cmplx_mult_cmplx_q31(
00056   q31_t * pSrcA,
00057   q31_t * pSrcB,
00058   q31_t * pDst,
00059   uint32_t numSamples)
00060 {
00061   q31_t a, b, c, d;                              /* Temporary variables to store real and imaginary values */
00062   uint32_t blkCnt;                               /* loop counters */
00063   q31_t mul1, mul2, mul3, mul4;
00064   q31_t out1, out2;
00065 
00066 #if defined (ARM_MATH_DSP)
00067 
00068   /* Run the below code for Cortex-M4 and Cortex-M3 */
00069 
00070   /* loop Unrolling */
00071   blkCnt = numSamples >> 2U;
00072 
00073   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
00074    ** a second loop below computes the remaining 1 to 3 samples. */
00075   while (blkCnt > 0U)
00076   {
00077     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
00078     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
00079     a = *pSrcA++;
00080     b = *pSrcA++;
00081     c = *pSrcB++;
00082     d = *pSrcB++;
00083 
00084     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00085     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00086     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00087     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00088 
00089     mul1 = (mul1 >> 1);
00090     mul2 = (mul2 >> 1);
00091     mul3 = (mul3 >> 1);
00092     mul4 = (mul4 >> 1);
00093 
00094     out1 = mul1 - mul2;
00095     out2 = mul3 + mul4;
00096 
00097     /* store the real result in 3.29 format in the destination buffer. */
00098     *pDst++ = out1;
00099     /* store the imag result in 3.29 format in the destination buffer. */
00100     *pDst++ = out2;
00101 
00102     a = *pSrcA++;
00103     b = *pSrcA++;
00104     c = *pSrcB++;
00105     d = *pSrcB++;
00106 
00107     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00108     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00109     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00110     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00111 
00112     mul1 = (mul1 >> 1);
00113     mul2 = (mul2 >> 1);
00114     mul3 = (mul3 >> 1);
00115     mul4 = (mul4 >> 1);
00116 
00117     out1 = mul1 - mul2;
00118     out2 = mul3 + mul4;
00119 
00120     /* store the real result in 3.29 format in the destination buffer. */
00121     *pDst++ = out1;
00122     /* store the imag result in 3.29 format in the destination buffer. */
00123     *pDst++ = out2;
00124 
00125     a = *pSrcA++;
00126     b = *pSrcA++;
00127     c = *pSrcB++;
00128     d = *pSrcB++;
00129 
00130     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00131     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00132     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00133     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00134 
00135     mul1 = (mul1 >> 1);
00136     mul2 = (mul2 >> 1);
00137     mul3 = (mul3 >> 1);
00138     mul4 = (mul4 >> 1);
00139 
00140     out1 = mul1 - mul2;
00141     out2 = mul3 + mul4;
00142 
00143     /* store the real result in 3.29 format in the destination buffer. */
00144     *pDst++ = out1;
00145     /* store the imag result in 3.29 format in the destination buffer. */
00146     *pDst++ = out2;
00147 
00148     a = *pSrcA++;
00149     b = *pSrcA++;
00150     c = *pSrcB++;
00151     d = *pSrcB++;
00152 
00153     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00154     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00155     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00156     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00157 
00158     mul1 = (mul1 >> 1);
00159     mul2 = (mul2 >> 1);
00160     mul3 = (mul3 >> 1);
00161     mul4 = (mul4 >> 1);
00162 
00163     out1 = mul1 - mul2;
00164     out2 = mul3 + mul4;
00165 
00166     /* store the real result in 3.29 format in the destination buffer. */
00167     *pDst++ = out1;
00168     /* store the imag result in 3.29 format in the destination buffer. */
00169     *pDst++ = out2;
00170 
00171     /* Decrement the blockSize loop counter */
00172     blkCnt--;
00173   }
00174 
00175   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
00176    ** No loop unrolling is used. */
00177   blkCnt = numSamples % 0x4U;
00178 
00179   while (blkCnt > 0U)
00180   {
00181     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
00182     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
00183     a = *pSrcA++;
00184     b = *pSrcA++;
00185     c = *pSrcB++;
00186     d = *pSrcB++;
00187 
00188     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00189     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00190     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00191     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00192 
00193     mul1 = (mul1 >> 1);
00194     mul2 = (mul2 >> 1);
00195     mul3 = (mul3 >> 1);
00196     mul4 = (mul4 >> 1);
00197 
00198     out1 = mul1 - mul2;
00199     out2 = mul3 + mul4;
00200 
00201     /* store the real result in 3.29 format in the destination buffer. */
00202     *pDst++ = out1;
00203     /* store the imag result in 3.29 format in the destination buffer. */
00204     *pDst++ = out2;
00205 
00206     /* Decrement the blockSize loop counter */
00207     blkCnt--;
00208   }
00209 
00210 #else
00211 
00212   /* Run the below code for Cortex-M0 */
00213 
00214   /* loop Unrolling */
00215   blkCnt = numSamples >> 1U;
00216 
00217   /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.
00218    ** a second loop below computes the remaining 1 sample. */
00219   while (blkCnt > 0U)
00220   {
00221     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
00222     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
00223     a = *pSrcA++;
00224     b = *pSrcA++;
00225     c = *pSrcB++;
00226     d = *pSrcB++;
00227 
00228     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00229     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00230     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00231     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00232 
00233     mul1 = (mul1 >> 1);
00234     mul2 = (mul2 >> 1);
00235     mul3 = (mul3 >> 1);
00236     mul4 = (mul4 >> 1);
00237 
00238     out1 = mul1 - mul2;
00239     out2 = mul3 + mul4;
00240 
00241     /* store the real result in 3.29 format in the destination buffer. */
00242     *pDst++ = out1;
00243     /* store the imag result in 3.29 format in the destination buffer. */
00244     *pDst++ = out2;
00245 
00246     a = *pSrcA++;
00247     b = *pSrcA++;
00248     c = *pSrcB++;
00249     d = *pSrcB++;
00250 
00251     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00252     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00253     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00254     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00255 
00256     mul1 = (mul1 >> 1);
00257     mul2 = (mul2 >> 1);
00258     mul3 = (mul3 >> 1);
00259     mul4 = (mul4 >> 1);
00260 
00261     out1 = mul1 - mul2;
00262     out2 = mul3 + mul4;
00263 
00264     /* store the real result in 3.29 format in the destination buffer. */
00265     *pDst++ = out1;
00266     /* store the imag result in 3.29 format in the destination buffer. */
00267     *pDst++ = out2;
00268 
00269     /* Decrement the blockSize loop counter */
00270     blkCnt--;
00271   }
00272 
00273   /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
00274    ** No loop unrolling is used. */
00275   blkCnt = numSamples % 0x2U;
00276 
00277   while (blkCnt > 0U)
00278   {
00279     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
00280     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
00281     a = *pSrcA++;
00282     b = *pSrcA++;
00283     c = *pSrcB++;
00284     d = *pSrcB++;
00285 
00286     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00287     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00288     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00289     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00290 
00291     mul1 = (mul1 >> 1);
00292     mul2 = (mul2 >> 1);
00293     mul3 = (mul3 >> 1);
00294     mul4 = (mul4 >> 1);
00295 
00296     out1 = mul1 - mul2;
00297     out2 = mul3 + mul4;
00298 
00299     /* store the real result in 3.29 format in the destination buffer. */
00300     *pDst++ = out1;
00301     /* store the imag result in 3.29 format in the destination buffer. */
00302     *pDst++ = out2;
00303 
00304     /* Decrement the blockSize loop counter */
00305     blkCnt--;
00306   }
00307 
00308 #endif /* #if defined (ARM_MATH_DSP) */
00309 
00310 }
00311 
00312 /**
00313  * @} end of CmplxByCmplxMult group
00314  */
00315