Laxmi Kant Tiwari / mbed-dsp

Fork of mbed-dsp by mbed official

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_cmplx_mult_cmplx_q31.c Source File

arm_cmplx_mult_cmplx_q31.c

00001 /* ----------------------------------------------------------------------    
00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
00003 *    
00004 * $Date:        17. January 2013
00005 * $Revision:    V1.4.1
00006 *    
00007 * Project:      CMSIS DSP Library    
00008 * Title:        arm_cmplx_mult_cmplx_q31.c    
00009 *    
00010 * Description:  Q31 complex-by-complex multiplication    
00011 *    
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Redistribution and use in source and binary forms, with or without 
00015 * modification, are permitted provided that the following conditions
00016 * are met:
00017 *   - Redistributions of source code must retain the above copyright
00018 *     notice, this list of conditions and the following disclaimer.
00019 *   - Redistributions in binary form must reproduce the above copyright
00020 *     notice, this list of conditions and the following disclaimer in
00021 *     the documentation and/or other materials provided with the 
00022 *     distribution.
00023 *   - Neither the name of ARM LIMITED nor the names of its contributors
00024 *     may be used to endorse or promote products derived from this
00025 *     software without specific prior written permission.
00026 *
00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00038 * POSSIBILITY OF SUCH DAMAGE. 
00039 * -------------------------------------------------------------------- */
00040 
00041 #include "arm_math.h"
00042 
00043 /**    
00044  * @ingroup groupCmplxMath    
00045  */
00046 
00047 /**    
00048  * @addtogroup CmplxByCmplxMult    
00049  * @{    
00050  */
00051 
00052 
00053 /**    
00054  * @brief  Q31 complex-by-complex multiplication    
00055  * @param[in]  *pSrcA points to the first input vector    
00056  * @param[in]  *pSrcB points to the second input vector    
00057  * @param[out]  *pDst  points to the output vector    
00058  * @param[in]  numSamples number of complex samples in each vector    
00059  * @return none.    
00060  *    
00061  * <b>Scaling and Overflow Behavior:</b>    
00062  * \par    
00063  * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.    
00064  * Input down scaling is not required.    
00065  */
00066 
00067 void arm_cmplx_mult_cmplx_q31(
00068   q31_t * pSrcA,
00069   q31_t * pSrcB,
00070   q31_t * pDst,
00071   uint32_t numSamples)
00072 {
00073   q31_t a, b, c, d;                              /* Temporary variables to store real and imaginary values */
00074   uint32_t blkCnt;                               /* loop counters */
00075   q31_t mul1, mul2, mul3, mul4;
00076   q31_t out1, out2;
00077 
00078 #ifndef ARM_MATH_CM0_FAMILY
00079 
00080   /* Run the below code for Cortex-M4 and Cortex-M3 */
00081 
00082   /* loop Unrolling */
00083   blkCnt = numSamples >> 2u;
00084 
00085   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
00086    ** a second loop below computes the remaining 1 to 3 samples. */
00087   while(blkCnt > 0u)
00088   {
00089     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
00090     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
00091     a = *pSrcA++;
00092     b = *pSrcA++;
00093     c = *pSrcB++;
00094     d = *pSrcB++;
00095 
00096     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00097     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00098     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00099     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00100 
00101     mul1 = (mul1 >> 1);
00102     mul2 = (mul2 >> 1);
00103     mul3 = (mul3 >> 1);
00104     mul4 = (mul4 >> 1);
00105 
00106     out1 = mul1 - mul2;
00107     out2 = mul3 + mul4;
00108 
00109     /* store the real result in 3.29 format in the destination buffer. */
00110     *pDst++ = out1;
00111     /* store the imag result in 3.29 format in the destination buffer. */
00112     *pDst++ = out2;
00113 
00114     a = *pSrcA++;
00115     b = *pSrcA++;
00116     c = *pSrcB++;
00117     d = *pSrcB++;
00118 
00119     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00120     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00121     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00122     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00123 
00124     mul1 = (mul1 >> 1);
00125     mul2 = (mul2 >> 1);
00126     mul3 = (mul3 >> 1);
00127     mul4 = (mul4 >> 1);
00128 
00129     out1 = mul1 - mul2;
00130     out2 = mul3 + mul4;
00131 
00132     /* store the real result in 3.29 format in the destination buffer. */
00133     *pDst++ = out1;
00134     /* store the imag result in 3.29 format in the destination buffer. */
00135     *pDst++ = out2;
00136 
00137     a = *pSrcA++;
00138     b = *pSrcA++;
00139     c = *pSrcB++;
00140     d = *pSrcB++;
00141 
00142     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00143     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00144     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00145     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00146 
00147     mul1 = (mul1 >> 1);
00148     mul2 = (mul2 >> 1);
00149     mul3 = (mul3 >> 1);
00150     mul4 = (mul4 >> 1);
00151 
00152     out1 = mul1 - mul2;
00153     out2 = mul3 + mul4;
00154 
00155     /* store the real result in 3.29 format in the destination buffer. */
00156     *pDst++ = out1;
00157     /* store the imag result in 3.29 format in the destination buffer. */
00158     *pDst++ = out2;
00159 
00160     a = *pSrcA++;
00161     b = *pSrcA++;
00162     c = *pSrcB++;
00163     d = *pSrcB++;
00164 
00165     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00166     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00167     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00168     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00169 
00170     mul1 = (mul1 >> 1);
00171     mul2 = (mul2 >> 1);
00172     mul3 = (mul3 >> 1);
00173     mul4 = (mul4 >> 1);
00174 
00175     out1 = mul1 - mul2;
00176     out2 = mul3 + mul4;
00177 
00178     /* store the real result in 3.29 format in the destination buffer. */
00179     *pDst++ = out1;
00180     /* store the imag result in 3.29 format in the destination buffer. */
00181     *pDst++ = out2;
00182 
00183     /* Decrement the blockSize loop counter */
00184     blkCnt--;
00185   }
00186 
00187   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
00188    ** No loop unrolling is used. */
00189   blkCnt = numSamples % 0x4u;
00190 
00191   while(blkCnt > 0u)
00192   {
00193     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
00194     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
00195     a = *pSrcA++;
00196     b = *pSrcA++;
00197     c = *pSrcB++;
00198     d = *pSrcB++;
00199 
00200     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00201     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00202     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00203     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00204 
00205     mul1 = (mul1 >> 1);
00206     mul2 = (mul2 >> 1);
00207     mul3 = (mul3 >> 1);
00208     mul4 = (mul4 >> 1);
00209 
00210     out1 = mul1 - mul2;
00211     out2 = mul3 + mul4;
00212 
00213     /* store the real result in 3.29 format in the destination buffer. */
00214     *pDst++ = out1;
00215     /* store the imag result in 3.29 format in the destination buffer. */
00216     *pDst++ = out2;
00217 
00218     /* Decrement the blockSize loop counter */
00219     blkCnt--;
00220   }
00221 
00222 #else
00223 
00224   /* Run the below code for Cortex-M0 */
00225 
00226   /* loop Unrolling */
00227   blkCnt = numSamples >> 1u;
00228 
00229   /* First part of the processing with loop unrolling.  Compute 2 outputs at a time.     
00230    ** a second loop below computes the remaining 1 sample. */
00231   while(blkCnt > 0u)
00232   {
00233     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
00234     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
00235     a = *pSrcA++;
00236     b = *pSrcA++;
00237     c = *pSrcB++;
00238     d = *pSrcB++;
00239 
00240     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00241     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00242     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00243     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00244 
00245     mul1 = (mul1 >> 1);
00246     mul2 = (mul2 >> 1);
00247     mul3 = (mul3 >> 1);
00248     mul4 = (mul4 >> 1);
00249 
00250     out1 = mul1 - mul2;
00251     out2 = mul3 + mul4;
00252 
00253     /* store the real result in 3.29 format in the destination buffer. */
00254     *pDst++ = out1;
00255     /* store the imag result in 3.29 format in the destination buffer. */
00256     *pDst++ = out2;
00257 
00258     a = *pSrcA++;
00259     b = *pSrcA++;
00260     c = *pSrcB++;
00261     d = *pSrcB++;
00262 
00263     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00264     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00265     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00266     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00267 
00268     mul1 = (mul1 >> 1);
00269     mul2 = (mul2 >> 1);
00270     mul3 = (mul3 >> 1);
00271     mul4 = (mul4 >> 1);
00272 
00273     out1 = mul1 - mul2;
00274     out2 = mul3 + mul4;
00275 
00276     /* store the real result in 3.29 format in the destination buffer. */
00277     *pDst++ = out1;
00278     /* store the imag result in 3.29 format in the destination buffer. */
00279     *pDst++ = out2;
00280 
00281     /* Decrement the blockSize loop counter */
00282     blkCnt--;
00283   }
00284 
00285   /* If the blockSize is not a multiple of 2, compute any remaining output samples here.     
00286    ** No loop unrolling is used. */
00287   blkCnt = numSamples % 0x2u;
00288 
00289   while(blkCnt > 0u)
00290   {
00291     /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1].  */
00292     /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i].  */
00293     a = *pSrcA++;
00294     b = *pSrcA++;
00295     c = *pSrcB++;
00296     d = *pSrcB++;
00297 
00298     mul1 = (q31_t) (((q63_t) a * c) >> 32);
00299     mul2 = (q31_t) (((q63_t) b * d) >> 32);
00300     mul3 = (q31_t) (((q63_t) a * d) >> 32);
00301     mul4 = (q31_t) (((q63_t) b * c) >> 32);
00302 
00303     mul1 = (mul1 >> 1);
00304     mul2 = (mul2 >> 1);
00305     mul3 = (mul3 >> 1);
00306     mul4 = (mul4 >> 1);
00307 
00308     out1 = mul1 - mul2;
00309     out2 = mul3 + mul4;
00310 
00311     /* store the real result in 3.29 format in the destination buffer. */
00312     *pDst++ = out1;
00313     /* store the imag result in 3.29 format in the destination buffer. */
00314     *pDst++ = out2;
00315 
00316     /* Decrement the blockSize loop counter */
00317     blkCnt--;
00318   }
00319 
00320 #endif /* #ifndef ARM_MATH_CM0_FAMILY */
00321 
00322 }
00323 
00324 /**    
00325  * @} end of CmplxByCmplxMult group    
00326  */