Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_dct4_q31.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_dct4_q31.c 00009 * 00010 * Description: Processing function of DCT4 & IDCT4 Q31. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @addtogroup DCT4_IDCT4 00045 * @{ 00046 */ 00047 00048 /** 00049 * @brief Processing function for the Q31 DCT4/IDCT4. 00050 * @param[in] *S points to an instance of the Q31 DCT4 structure. 00051 * @param[in] *pState points to state buffer. 00052 * @param[in,out] *pInlineBuffer points to the in-place input and output buffer. 00053 * @return none. 00054 * \par Input an output formats: 00055 * Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process, 00056 * as the conversion from DCT2 to DCT4 involves one subtraction. 00057 * Internally inputs are downscaled in the RFFT process function to avoid overflows. 00058 * Number of bits downscaled, depends on the size of the transform. 00059 * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below: 00060 * 00061 * \image html dct4FormatsQ31Table.gif 00062 */ 00063 00064 void arm_dct4_q31( 00065 const arm_dct4_instance_q31 * S, 00066 q31_t * pState, 00067 q31_t * pInlineBuffer) 00068 { 00069 uint16_t i; /* Loop counter */ 00070 q31_t *weights = S->pTwiddle; /* Pointer to the Weights table */ 00071 q31_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */ 00072 q31_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */ 00073 q31_t in; /* Temporary variable */ 00074 00075 00076 /* DCT4 computation involves DCT2 (which is calculated using RFFT) 00077 * along with some pre-processing and post-processing. 00078 * Computational procedure is explained as follows: 00079 * (a) Pre-processing involves multiplying input with cos factor, 00080 * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n)) 00081 * where, 00082 * r(n) -- output of preprocessing 00083 * u(n) -- input to preprocessing(actual Source buffer) 00084 * (b) Calculation of DCT2 using FFT is divided into three steps: 00085 * Step1: Re-ordering of even and odd elements of input. 00086 * Step2: Calculating FFT of the re-ordered input. 00087 * Step3: Taking the real part of the product of FFT output and weights. 00088 * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation: 00089 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) 00090 * where, 00091 * Y4 -- DCT4 output, Y2 -- DCT2 output 00092 * (d) Multiplying the output with the normalizing factor sqrt(2/N). 00093 */ 00094 00095 /*-------- Pre-processing ------------*/ 00096 /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */ 00097 arm_mult_q31(pInlineBuffer, cosFact, pInlineBuffer, S->N); 00098 arm_shift_q31(pInlineBuffer, 1, pInlineBuffer, S->N); 00099 00100 /* ---------------------------------------------------------------- 00101 * Step1: Re-ordering of even and odd elements as 00102 * pState[i] = pInlineBuffer[2*i] and 00103 * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2 00104 ---------------------------------------------------------------------*/ 00105 00106 /* pS1 initialized to pState */ 00107 pS1 = pState; 00108 00109 /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */ 00110 pS2 = pState + (S->N - 1u); 00111 00112 /* pbuff initialized to input buffer */ 00113 pbuff = pInlineBuffer; 00114 00115 #ifndef ARM_MATH_CM0_FAMILY 00116 00117 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00118 00119 /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */ 00120 i = S->Nby2 >> 2u; 00121 00122 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00123 ** a second loop below computes the remaining 1 to 3 samples. */ 00124 do 00125 { 00126 /* Re-ordering of even and odd elements */ 00127 /* pState[i] = pInlineBuffer[2*i] */ 00128 *pS1++ = *pbuff++; 00129 /* pState[N-i-1] = pInlineBuffer[2*i+1] */ 00130 *pS2-- = *pbuff++; 00131 00132 *pS1++ = *pbuff++; 00133 *pS2-- = *pbuff++; 00134 00135 *pS1++ = *pbuff++; 00136 *pS2-- = *pbuff++; 00137 00138 *pS1++ = *pbuff++; 00139 *pS2-- = *pbuff++; 00140 00141 /* Decrement the loop counter */ 00142 i--; 00143 } while(i > 0u); 00144 00145 /* pbuff initialized to input buffer */ 00146 pbuff = pInlineBuffer; 00147 00148 /* pS1 initialized to pState */ 00149 pS1 = pState; 00150 00151 /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 00152 i = S->N >> 2u; 00153 00154 /* Processing with loop unrolling 4 times as N is always multiple of 4. 00155 * Compute 4 outputs at a time */ 00156 do 00157 { 00158 /* Writing the re-ordered output back to inplace input buffer */ 00159 *pbuff++ = *pS1++; 00160 *pbuff++ = *pS1++; 00161 *pbuff++ = *pS1++; 00162 *pbuff++ = *pS1++; 00163 00164 /* Decrement the loop counter */ 00165 i--; 00166 } while(i > 0u); 00167 00168 00169 /* --------------------------------------------------------- 00170 * Step2: Calculate RFFT for N-point input 00171 * ---------------------------------------------------------- */ 00172 /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ 00173 arm_rfft_q31(S->pRfft, pInlineBuffer, pState); 00174 00175 /*---------------------------------------------------------------------- 00176 * Step3: Multiply the FFT output with the weights. 00177 *----------------------------------------------------------------------*/ 00178 arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N); 00179 00180 /* The output of complex multiplication is in 3.29 format. 00181 * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */ 00182 arm_shift_q31(pState, 2, pState, S->N * 2); 00183 00184 /* ----------- Post-processing ---------- */ 00185 /* DCT-IV can be obtained from DCT-II by the equation, 00186 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) 00187 * Hence, Y4(0) = Y2(0)/2 */ 00188 /* Getting only real part from the output and Converting to DCT-IV */ 00189 00190 /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */ 00191 i = (S->N - 1u) >> 2u; 00192 00193 /* pbuff initialized to input buffer. */ 00194 pbuff = pInlineBuffer; 00195 00196 /* pS1 initialized to pState */ 00197 pS1 = pState; 00198 00199 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ 00200 in = *pS1++ >> 1u; 00201 /* input buffer acts as inplace, so output values are stored in the input itself. */ 00202 *pbuff++ = in; 00203 00204 /* pState pointer is incremented twice as the real values are located alternatively in the array */ 00205 pS1++; 00206 00207 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00208 ** a second loop below computes the remaining 1 to 3 samples. */ 00209 do 00210 { 00211 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 00212 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 00213 in = *pS1++ - in; 00214 *pbuff++ = in; 00215 /* points to the next real value */ 00216 pS1++; 00217 00218 in = *pS1++ - in; 00219 *pbuff++ = in; 00220 pS1++; 00221 00222 in = *pS1++ - in; 00223 *pbuff++ = in; 00224 pS1++; 00225 00226 in = *pS1++ - in; 00227 *pbuff++ = in; 00228 pS1++; 00229 00230 /* Decrement the loop counter */ 00231 i--; 00232 } while(i > 0u); 00233 00234 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00235 ** No loop unrolling is used. */ 00236 i = (S->N - 1u) % 0x4u; 00237 00238 while(i > 0u) 00239 { 00240 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 00241 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 00242 in = *pS1++ - in; 00243 *pbuff++ = in; 00244 /* points to the next real value */ 00245 pS1++; 00246 00247 /* Decrement the loop counter */ 00248 i--; 00249 } 00250 00251 00252 /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ 00253 00254 /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 00255 i = S->N >> 2u; 00256 00257 /* pbuff initialized to the pInlineBuffer(now contains the output values) */ 00258 pbuff = pInlineBuffer; 00259 00260 /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */ 00261 do 00262 { 00263 /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ 00264 in = *pbuff; 00265 *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); 00266 00267 in = *pbuff; 00268 *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); 00269 00270 in = *pbuff; 00271 *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); 00272 00273 in = *pbuff; 00274 *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); 00275 00276 /* Decrement the loop counter */ 00277 i--; 00278 } while(i > 0u); 00279 00280 00281 #else 00282 00283 /* Run the below code for Cortex-M0 */ 00284 00285 /* Initializing the loop counter to N/2 */ 00286 i = S->Nby2; 00287 00288 do 00289 { 00290 /* Re-ordering of even and odd elements */ 00291 /* pState[i] = pInlineBuffer[2*i] */ 00292 *pS1++ = *pbuff++; 00293 /* pState[N-i-1] = pInlineBuffer[2*i+1] */ 00294 *pS2-- = *pbuff++; 00295 00296 /* Decrement the loop counter */ 00297 i--; 00298 } while(i > 0u); 00299 00300 /* pbuff initialized to input buffer */ 00301 pbuff = pInlineBuffer; 00302 00303 /* pS1 initialized to pState */ 00304 pS1 = pState; 00305 00306 /* Initializing the loop counter */ 00307 i = S->N; 00308 00309 do 00310 { 00311 /* Writing the re-ordered output back to inplace input buffer */ 00312 *pbuff++ = *pS1++; 00313 00314 /* Decrement the loop counter */ 00315 i--; 00316 } while(i > 0u); 00317 00318 00319 /* --------------------------------------------------------- 00320 * Step2: Calculate RFFT for N-point input 00321 * ---------------------------------------------------------- */ 00322 /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ 00323 arm_rfft_q31(S->pRfft, pInlineBuffer, pState); 00324 00325 /*---------------------------------------------------------------------- 00326 * Step3: Multiply the FFT output with the weights. 00327 *----------------------------------------------------------------------*/ 00328 arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N); 00329 00330 /* The output of complex multiplication is in 3.29 format. 00331 * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */ 00332 arm_shift_q31(pState, 2, pState, S->N * 2); 00333 00334 /* ----------- Post-processing ---------- */ 00335 /* DCT-IV can be obtained from DCT-II by the equation, 00336 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) 00337 * Hence, Y4(0) = Y2(0)/2 */ 00338 /* Getting only real part from the output and Converting to DCT-IV */ 00339 00340 /* pbuff initialized to input buffer. */ 00341 pbuff = pInlineBuffer; 00342 00343 /* pS1 initialized to pState */ 00344 pS1 = pState; 00345 00346 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ 00347 in = *pS1++ >> 1u; 00348 /* input buffer acts as inplace, so output values are stored in the input itself. */ 00349 *pbuff++ = in; 00350 00351 /* pState pointer is incremented twice as the real values are located alternatively in the array */ 00352 pS1++; 00353 00354 /* Initializing the loop counter */ 00355 i = (S->N - 1u); 00356 00357 while(i > 0u) 00358 { 00359 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 00360 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 00361 in = *pS1++ - in; 00362 *pbuff++ = in; 00363 /* points to the next real value */ 00364 pS1++; 00365 00366 /* Decrement the loop counter */ 00367 i--; 00368 } 00369 00370 00371 /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ 00372 00373 /* Initializing the loop counter */ 00374 i = S->N; 00375 00376 /* pbuff initialized to the pInlineBuffer(now contains the output values) */ 00377 pbuff = pInlineBuffer; 00378 00379 do 00380 { 00381 /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ 00382 in = *pbuff; 00383 *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31)); 00384 00385 /* Decrement the loop counter */ 00386 i--; 00387 } while(i > 0u); 00388 00389 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00390 00391 } 00392 00393 /** 00394 * @} end of DCT4_IDCT4 group 00395 */
Generated on Tue Jul 12 2022 13:15:24 by
