Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_cmplx_mult_cmplx_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_cmplx_mult_cmplx_q31.c 00009 * 00010 * Description: Q31 complex-by-complex multiplication 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupCmplxMath 00045 */ 00046 00047 /** 00048 * @addtogroup CmplxByCmplxMult 00049 * @{ 00050 */ 00051 00052 00053 /** 00054 * @brief Q31 complex-by-complex multiplication 00055 * @param[in] *pSrcA points to the first input vector 00056 * @param[in] *pSrcB points to the second input vector 00057 * @param[out] *pDst points to the output vector 00058 * @param[in] numSamples number of complex samples in each vector 00059 * @return none. 00060 * 00061 * <b>Scaling and Overflow Behavior:</b> 00062 * \par 00063 * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format. 00064 * Input down scaling is not required. 00065 */ 00066 00067 void arm_cmplx_mult_cmplx_q31( 00068 q31_t * pSrcA, 00069 q31_t * pSrcB, 00070 q31_t * pDst, 00071 uint32_t numSamples) 00072 { 00073 q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */ 00074 uint32_t blkCnt; /* loop counters */ 00075 q31_t mul1, mul2, mul3, mul4; 00076 q31_t out1, out2; 00077 00078 #ifndef ARM_MATH_CM0_FAMILY 00079 00080 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00081 00082 /* loop Unrolling */ 00083 blkCnt = numSamples >> 2u; 00084 00085 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00086 ** a second loop below computes the remaining 1 to 3 samples. */ 00087 while(blkCnt > 0u) 00088 { 00089 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00090 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00091 a = *pSrcA++; 00092 b = *pSrcA++; 00093 c = *pSrcB++; 00094 d = *pSrcB++; 00095 00096 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00097 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00098 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00099 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00100 00101 mul1 = (mul1 >> 1); 00102 mul2 = (mul2 >> 1); 00103 mul3 = (mul3 >> 1); 00104 mul4 = (mul4 >> 1); 00105 00106 out1 = mul1 - mul2; 00107 out2 = mul3 + mul4; 00108 00109 /* store the real result in 3.29 format in the destination buffer. */ 00110 *pDst++ = out1; 00111 /* store the imag result in 3.29 format in the destination buffer. */ 00112 *pDst++ = out2; 00113 00114 a = *pSrcA++; 00115 b = *pSrcA++; 00116 c = *pSrcB++; 00117 d = *pSrcB++; 00118 00119 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00120 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00121 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00122 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00123 00124 mul1 = (mul1 >> 1); 00125 mul2 = (mul2 >> 1); 00126 mul3 = (mul3 >> 1); 00127 mul4 = (mul4 >> 1); 00128 00129 out1 = mul1 - mul2; 00130 out2 = mul3 + mul4; 00131 00132 /* store the real result in 3.29 format in the destination buffer. */ 00133 *pDst++ = out1; 00134 /* store the imag result in 3.29 format in the destination buffer. */ 00135 *pDst++ = out2; 00136 00137 a = *pSrcA++; 00138 b = *pSrcA++; 00139 c = *pSrcB++; 00140 d = *pSrcB++; 00141 00142 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00143 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00144 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00145 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00146 00147 mul1 = (mul1 >> 1); 00148 mul2 = (mul2 >> 1); 00149 mul3 = (mul3 >> 1); 00150 mul4 = (mul4 >> 1); 00151 00152 out1 = mul1 - mul2; 00153 out2 = mul3 + mul4; 00154 00155 /* store the real result in 3.29 format in the destination buffer. */ 00156 *pDst++ = out1; 00157 /* store the imag result in 3.29 format in the destination buffer. */ 00158 *pDst++ = out2; 00159 00160 a = *pSrcA++; 00161 b = *pSrcA++; 00162 c = *pSrcB++; 00163 d = *pSrcB++; 00164 00165 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00166 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00167 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00168 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00169 00170 mul1 = (mul1 >> 1); 00171 mul2 = (mul2 >> 1); 00172 mul3 = (mul3 >> 1); 00173 mul4 = (mul4 >> 1); 00174 00175 out1 = mul1 - mul2; 00176 out2 = mul3 + mul4; 00177 00178 /* store the real result in 3.29 format in the destination buffer. */ 00179 *pDst++ = out1; 00180 /* store the imag result in 3.29 format in the destination buffer. */ 00181 *pDst++ = out2; 00182 00183 /* Decrement the blockSize loop counter */ 00184 blkCnt--; 00185 } 00186 00187 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00188 ** No loop unrolling is used. */ 00189 blkCnt = numSamples % 0x4u; 00190 00191 while(blkCnt > 0u) 00192 { 00193 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00194 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00195 a = *pSrcA++; 00196 b = *pSrcA++; 00197 c = *pSrcB++; 00198 d = *pSrcB++; 00199 00200 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00201 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00202 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00203 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00204 00205 mul1 = (mul1 >> 1); 00206 mul2 = (mul2 >> 1); 00207 mul3 = (mul3 >> 1); 00208 mul4 = (mul4 >> 1); 00209 00210 out1 = mul1 - mul2; 00211 out2 = mul3 + mul4; 00212 00213 /* store the real result in 3.29 format in the destination buffer. */ 00214 *pDst++ = out1; 00215 /* store the imag result in 3.29 format in the destination buffer. */ 00216 *pDst++ = out2; 00217 00218 /* Decrement the blockSize loop counter */ 00219 blkCnt--; 00220 } 00221 00222 #else 00223 00224 /* Run the below code for Cortex-M0 */ 00225 00226 /* loop Unrolling */ 00227 blkCnt = numSamples >> 1u; 00228 00229 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00230 ** a second loop below computes the remaining 1 sample. */ 00231 while(blkCnt > 0u) 00232 { 00233 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00234 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00235 a = *pSrcA++; 00236 b = *pSrcA++; 00237 c = *pSrcB++; 00238 d = *pSrcB++; 00239 00240 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00241 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00242 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00243 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00244 00245 mul1 = (mul1 >> 1); 00246 mul2 = (mul2 >> 1); 00247 mul3 = (mul3 >> 1); 00248 mul4 = (mul4 >> 1); 00249 00250 out1 = mul1 - mul2; 00251 out2 = mul3 + mul4; 00252 00253 /* store the real result in 3.29 format in the destination buffer. */ 00254 *pDst++ = out1; 00255 /* store the imag result in 3.29 format in the destination buffer. */ 00256 *pDst++ = out2; 00257 00258 a = *pSrcA++; 00259 b = *pSrcA++; 00260 c = *pSrcB++; 00261 d = *pSrcB++; 00262 00263 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00264 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00265 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00266 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00267 00268 mul1 = (mul1 >> 1); 00269 mul2 = (mul2 >> 1); 00270 mul3 = (mul3 >> 1); 00271 mul4 = (mul4 >> 1); 00272 00273 out1 = mul1 - mul2; 00274 out2 = mul3 + mul4; 00275 00276 /* store the real result in 3.29 format in the destination buffer. */ 00277 *pDst++ = out1; 00278 /* store the imag result in 3.29 format in the destination buffer. */ 00279 *pDst++ = out2; 00280 00281 /* Decrement the blockSize loop counter */ 00282 blkCnt--; 00283 } 00284 00285 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00286 ** No loop unrolling is used. */ 00287 blkCnt = numSamples % 0x2u; 00288 00289 while(blkCnt > 0u) 00290 { 00291 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00292 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00293 a = *pSrcA++; 00294 b = *pSrcA++; 00295 c = *pSrcB++; 00296 d = *pSrcB++; 00297 00298 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00299 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00300 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00301 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00302 00303 mul1 = (mul1 >> 1); 00304 mul2 = (mul2 >> 1); 00305 mul3 = (mul3 >> 1); 00306 mul4 = (mul4 >> 1); 00307 00308 out1 = mul1 - mul2; 00309 out2 = mul3 + mul4; 00310 00311 /* store the real result in 3.29 format in the destination buffer. */ 00312 *pDst++ = out1; 00313 /* store the imag result in 3.29 format in the destination buffer. */ 00314 *pDst++ = out2; 00315 00316 /* Decrement the blockSize loop counter */ 00317 blkCnt--; 00318 } 00319 00320 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00321 00322 } 00323 00324 /** 00325 * @} end of CmplxByCmplxMult group 00326 */
Generated on Tue Jul 12 2022 13:15:21 by
