Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
arm_cmplx_mult_cmplx_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Project: CMSIS DSP Library 00003 * Title: arm_cmplx_mult_cmplx_q31.c 00004 * Description: Q31 complex-by-complex multiplication 00005 * 00006 * $Date: 27. January 2017 00007 * $Revision: V.1.5.1 00008 * 00009 * Target Processor: Cortex-M cores 00010 * -------------------------------------------------------------------- */ 00011 /* 00012 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. 00013 * 00014 * SPDX-License-Identifier: Apache-2.0 00015 * 00016 * Licensed under the Apache License, Version 2.0 (the License); you may 00017 * not use this file except in compliance with the License. 00018 * You may obtain a copy of the License at 00019 * 00020 * www.apache.org/licenses/LICENSE-2.0 00021 * 00022 * Unless required by applicable law or agreed to in writing, software 00023 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00024 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00025 * See the License for the specific language governing permissions and 00026 * limitations under the License. 00027 */ 00028 00029 #include "arm_math.h" 00030 00031 /** 00032 * @ingroup groupCmplxMath 00033 */ 00034 00035 /** 00036 * @addtogroup CmplxByCmplxMult 00037 * @{ 00038 */ 00039 00040 00041 /** 00042 * @brief Q31 complex-by-complex multiplication 00043 * @param[in] *pSrcA points to the first input vector 00044 * @param[in] *pSrcB points to the second input vector 00045 * @param[out] *pDst points to the output vector 00046 * @param[in] numSamples number of complex samples in each vector 00047 * @return none. 00048 * 00049 * <b>Scaling and Overflow Behavior:</b> 00050 * \par 00051 * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format. 00052 * Input down scaling is not required. 00053 */ 00054 00055 void arm_cmplx_mult_cmplx_q31( 00056 q31_t * pSrcA, 00057 q31_t * pSrcB, 00058 q31_t * pDst, 00059 uint32_t numSamples) 00060 { 00061 q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */ 00062 uint32_t blkCnt; /* loop counters */ 00063 q31_t mul1, mul2, mul3, mul4; 00064 q31_t out1, out2; 00065 00066 #if defined (ARM_MATH_DSP) 00067 00068 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00069 00070 /* loop Unrolling */ 00071 blkCnt = numSamples >> 2U; 00072 00073 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00074 ** a second loop below computes the remaining 1 to 3 samples. */ 00075 while (blkCnt > 0U) 00076 { 00077 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00078 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00079 a = *pSrcA++; 00080 b = *pSrcA++; 00081 c = *pSrcB++; 00082 d = *pSrcB++; 00083 00084 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00085 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00086 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00087 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00088 00089 mul1 = (mul1 >> 1); 00090 mul2 = (mul2 >> 1); 00091 mul3 = (mul3 >> 1); 00092 mul4 = (mul4 >> 1); 00093 00094 out1 = mul1 - mul2; 00095 out2 = mul3 + mul4; 00096 00097 /* store the real result in 3.29 format in the destination buffer. */ 00098 *pDst++ = out1; 00099 /* store the imag result in 3.29 format in the destination buffer. */ 00100 *pDst++ = out2; 00101 00102 a = *pSrcA++; 00103 b = *pSrcA++; 00104 c = *pSrcB++; 00105 d = *pSrcB++; 00106 00107 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00108 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00109 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00110 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00111 00112 mul1 = (mul1 >> 1); 00113 mul2 = (mul2 >> 1); 00114 mul3 = (mul3 >> 1); 00115 mul4 = (mul4 >> 1); 00116 00117 out1 = mul1 - mul2; 00118 out2 = mul3 + mul4; 00119 00120 /* store the real result in 3.29 format in the destination buffer. */ 00121 *pDst++ = out1; 00122 /* store the imag result in 3.29 format in the destination buffer. */ 00123 *pDst++ = out2; 00124 00125 a = *pSrcA++; 00126 b = *pSrcA++; 00127 c = *pSrcB++; 00128 d = *pSrcB++; 00129 00130 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00131 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00132 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00133 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00134 00135 mul1 = (mul1 >> 1); 00136 mul2 = (mul2 >> 1); 00137 mul3 = (mul3 >> 1); 00138 mul4 = (mul4 >> 1); 00139 00140 out1 = mul1 - mul2; 00141 out2 = mul3 + mul4; 00142 00143 /* store the real result in 3.29 format in the destination buffer. */ 00144 *pDst++ = out1; 00145 /* store the imag result in 3.29 format in the destination buffer. */ 00146 *pDst++ = out2; 00147 00148 a = *pSrcA++; 00149 b = *pSrcA++; 00150 c = *pSrcB++; 00151 d = *pSrcB++; 00152 00153 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00154 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00155 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00156 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00157 00158 mul1 = (mul1 >> 1); 00159 mul2 = (mul2 >> 1); 00160 mul3 = (mul3 >> 1); 00161 mul4 = (mul4 >> 1); 00162 00163 out1 = mul1 - mul2; 00164 out2 = mul3 + mul4; 00165 00166 /* store the real result in 3.29 format in the destination buffer. */ 00167 *pDst++ = out1; 00168 /* store the imag result in 3.29 format in the destination buffer. */ 00169 *pDst++ = out2; 00170 00171 /* Decrement the blockSize loop counter */ 00172 blkCnt--; 00173 } 00174 00175 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00176 ** No loop unrolling is used. */ 00177 blkCnt = numSamples % 0x4U; 00178 00179 while (blkCnt > 0U) 00180 { 00181 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00182 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00183 a = *pSrcA++; 00184 b = *pSrcA++; 00185 c = *pSrcB++; 00186 d = *pSrcB++; 00187 00188 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00189 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00190 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00191 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00192 00193 mul1 = (mul1 >> 1); 00194 mul2 = (mul2 >> 1); 00195 mul3 = (mul3 >> 1); 00196 mul4 = (mul4 >> 1); 00197 00198 out1 = mul1 - mul2; 00199 out2 = mul3 + mul4; 00200 00201 /* store the real result in 3.29 format in the destination buffer. */ 00202 *pDst++ = out1; 00203 /* store the imag result in 3.29 format in the destination buffer. */ 00204 *pDst++ = out2; 00205 00206 /* Decrement the blockSize loop counter */ 00207 blkCnt--; 00208 } 00209 00210 #else 00211 00212 /* Run the below code for Cortex-M0 */ 00213 00214 /* loop Unrolling */ 00215 blkCnt = numSamples >> 1U; 00216 00217 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00218 ** a second loop below computes the remaining 1 sample. */ 00219 while (blkCnt > 0U) 00220 { 00221 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00222 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00223 a = *pSrcA++; 00224 b = *pSrcA++; 00225 c = *pSrcB++; 00226 d = *pSrcB++; 00227 00228 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00229 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00230 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00231 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00232 00233 mul1 = (mul1 >> 1); 00234 mul2 = (mul2 >> 1); 00235 mul3 = (mul3 >> 1); 00236 mul4 = (mul4 >> 1); 00237 00238 out1 = mul1 - mul2; 00239 out2 = mul3 + mul4; 00240 00241 /* store the real result in 3.29 format in the destination buffer. */ 00242 *pDst++ = out1; 00243 /* store the imag result in 3.29 format in the destination buffer. */ 00244 *pDst++ = out2; 00245 00246 a = *pSrcA++; 00247 b = *pSrcA++; 00248 c = *pSrcB++; 00249 d = *pSrcB++; 00250 00251 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00252 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00253 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00254 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00255 00256 mul1 = (mul1 >> 1); 00257 mul2 = (mul2 >> 1); 00258 mul3 = (mul3 >> 1); 00259 mul4 = (mul4 >> 1); 00260 00261 out1 = mul1 - mul2; 00262 out2 = mul3 + mul4; 00263 00264 /* store the real result in 3.29 format in the destination buffer. */ 00265 *pDst++ = out1; 00266 /* store the imag result in 3.29 format in the destination buffer. */ 00267 *pDst++ = out2; 00268 00269 /* Decrement the blockSize loop counter */ 00270 blkCnt--; 00271 } 00272 00273 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00274 ** No loop unrolling is used. */ 00275 blkCnt = numSamples % 0x2U; 00276 00277 while (blkCnt > 0U) 00278 { 00279 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00280 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00281 a = *pSrcA++; 00282 b = *pSrcA++; 00283 c = *pSrcB++; 00284 d = *pSrcB++; 00285 00286 mul1 = (q31_t) (((q63_t) a * c) >> 32); 00287 mul2 = (q31_t) (((q63_t) b * d) >> 32); 00288 mul3 = (q31_t) (((q63_t) a * d) >> 32); 00289 mul4 = (q31_t) (((q63_t) b * c) >> 32); 00290 00291 mul1 = (mul1 >> 1); 00292 mul2 = (mul2 >> 1); 00293 mul3 = (mul3 >> 1); 00294 mul4 = (mul4 >> 1); 00295 00296 out1 = mul1 - mul2; 00297 out2 = mul3 + mul4; 00298 00299 /* store the real result in 3.29 format in the destination buffer. */ 00300 *pDst++ = out1; 00301 /* store the imag result in 3.29 format in the destination buffer. */ 00302 *pDst++ = out2; 00303 00304 /* Decrement the blockSize loop counter */ 00305 blkCnt--; 00306 } 00307 00308 #endif /* #if defined (ARM_MATH_DSP) */ 00309 00310 } 00311 00312 /** 00313 * @} end of CmplxByCmplxMult group 00314 */ 00315
Generated on Tue Jul 12 2022 16:46:23 by
1.7.2