Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-os by
arm_dct4_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2014 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 19. March 2015 00005 * $Revision: V.1.4.5 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_dct4_q15.c 00009 * 00010 * Description: Processing function of DCT4 & IDCT4 Q15. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @addtogroup DCT4_IDCT4 00045 * @{ 00046 */ 00047 00048 /** 00049 * @brief Processing function for the Q15 DCT4/IDCT4. 00050 * @param[in] *S points to an instance of the Q15 DCT4 structure. 00051 * @param[in] *pState points to state buffer. 00052 * @param[in,out] *pInlineBuffer points to the in-place input and output buffer. 00053 * @return none. 00054 * 00055 * \par Input an output formats: 00056 * Internally inputs are downscaled in the RFFT process function to avoid overflows. 00057 * Number of bits downscaled, depends on the size of the transform. 00058 * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below: 00059 * 00060 * \image html dct4FormatsQ15Table.gif 00061 */ 00062 00063 void arm_dct4_q15( 00064 const arm_dct4_instance_q15 * S, 00065 q15_t * pState, 00066 q15_t * pInlineBuffer) 00067 { 00068 uint32_t i; /* Loop counter */ 00069 q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */ 00070 q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */ 00071 q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */ 00072 q15_t in; /* Temporary variable */ 00073 00074 00075 /* DCT4 computation involves DCT2 (which is calculated using RFFT) 00076 * along with some pre-processing and post-processing. 00077 * Computational procedure is explained as follows: 00078 * (a) Pre-processing involves multiplying input with cos factor, 00079 * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n)) 00080 * where, 00081 * r(n) -- output of preprocessing 00082 * u(n) -- input to preprocessing(actual Source buffer) 00083 * (b) Calculation of DCT2 using FFT is divided into three steps: 00084 * Step1: Re-ordering of even and odd elements of input. 00085 * Step2: Calculating FFT of the re-ordered input. 00086 * Step3: Taking the real part of the product of FFT output and weights. 00087 * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation: 00088 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) 00089 * where, 00090 * Y4 -- DCT4 output, Y2 -- DCT2 output 00091 * (d) Multiplying the output with the normalizing factor sqrt(2/N). 00092 */ 00093 00094 /*-------- Pre-processing ------------*/ 00095 /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */ 00096 arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N); 00097 arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N); 00098 00099 /* ---------------------------------------------------------------- 00100 * Step1: Re-ordering of even and odd elements as 00101 * pState[i] = pInlineBuffer[2*i] and 00102 * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2 00103 ---------------------------------------------------------------------*/ 00104 00105 /* pS1 initialized to pState */ 00106 pS1 = pState; 00107 00108 /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */ 00109 pS2 = pState + (S->N - 1u); 00110 00111 /* pbuff initialized to input buffer */ 00112 pbuff = pInlineBuffer; 00113 00114 00115 #ifndef ARM_MATH_CM0_FAMILY 00116 00117 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00118 00119 /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */ 00120 i = (uint32_t) S->Nby2 >> 2u; 00121 00122 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00123 ** a second loop below computes the remaining 1 to 3 samples. */ 00124 do 00125 { 00126 /* Re-ordering of even and odd elements */ 00127 /* pState[i] = pInlineBuffer[2*i] */ 00128 *pS1++ = *pbuff++; 00129 /* pState[N-i-1] = pInlineBuffer[2*i+1] */ 00130 *pS2-- = *pbuff++; 00131 00132 *pS1++ = *pbuff++; 00133 *pS2-- = *pbuff++; 00134 00135 *pS1++ = *pbuff++; 00136 *pS2-- = *pbuff++; 00137 00138 *pS1++ = *pbuff++; 00139 *pS2-- = *pbuff++; 00140 00141 /* Decrement the loop counter */ 00142 i--; 00143 } while(i > 0u); 00144 00145 /* pbuff initialized to input buffer */ 00146 pbuff = pInlineBuffer; 00147 00148 /* pS1 initialized to pState */ 00149 pS1 = pState; 00150 00151 /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 00152 i = (uint32_t) S->N >> 2u; 00153 00154 /* Processing with loop unrolling 4 times as N is always multiple of 4. 00155 * Compute 4 outputs at a time */ 00156 do 00157 { 00158 /* Writing the re-ordered output back to inplace input buffer */ 00159 *pbuff++ = *pS1++; 00160 *pbuff++ = *pS1++; 00161 *pbuff++ = *pS1++; 00162 *pbuff++ = *pS1++; 00163 00164 /* Decrement the loop counter */ 00165 i--; 00166 } while(i > 0u); 00167 00168 00169 /* --------------------------------------------------------- 00170 * Step2: Calculate RFFT for N-point input 00171 * ---------------------------------------------------------- */ 00172 /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ 00173 arm_rfft_q15(S->pRfft, pInlineBuffer, pState); 00174 00175 /*---------------------------------------------------------------------- 00176 * Step3: Multiply the FFT output with the weights. 00177 *----------------------------------------------------------------------*/ 00178 arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N); 00179 00180 /* The output of complex multiplication is in 3.13 format. 00181 * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */ 00182 arm_shift_q15(pState, 2, pState, S->N * 2); 00183 00184 /* ----------- Post-processing ---------- */ 00185 /* DCT-IV can be obtained from DCT-II by the equation, 00186 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) 00187 * Hence, Y4(0) = Y2(0)/2 */ 00188 /* Getting only real part from the output and Converting to DCT-IV */ 00189 00190 /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */ 00191 i = ((uint32_t) S->N - 1u) >> 2u; 00192 00193 /* pbuff initialized to input buffer. */ 00194 pbuff = pInlineBuffer; 00195 00196 /* pS1 initialized to pState */ 00197 pS1 = pState; 00198 00199 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ 00200 in = *pS1++ >> 1u; 00201 /* input buffer acts as inplace, so output values are stored in the input itself. */ 00202 *pbuff++ = in; 00203 00204 /* pState pointer is incremented twice as the real values are located alternatively in the array */ 00205 pS1++; 00206 00207 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00208 ** a second loop below computes the remaining 1 to 3 samples. */ 00209 do 00210 { 00211 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 00212 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 00213 in = *pS1++ - in; 00214 *pbuff++ = in; 00215 /* points to the next real value */ 00216 pS1++; 00217 00218 in = *pS1++ - in; 00219 *pbuff++ = in; 00220 pS1++; 00221 00222 in = *pS1++ - in; 00223 *pbuff++ = in; 00224 pS1++; 00225 00226 in = *pS1++ - in; 00227 *pbuff++ = in; 00228 pS1++; 00229 00230 /* Decrement the loop counter */ 00231 i--; 00232 } while(i > 0u); 00233 00234 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00235 ** No loop unrolling is used. */ 00236 i = ((uint32_t) S->N - 1u) % 0x4u; 00237 00238 while(i > 0u) 00239 { 00240 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 00241 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 00242 in = *pS1++ - in; 00243 *pbuff++ = in; 00244 /* points to the next real value */ 00245 pS1++; 00246 00247 /* Decrement the loop counter */ 00248 i--; 00249 } 00250 00251 00252 /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ 00253 00254 /* Initializing the loop counter to N/4 instead of N for loop unrolling */ 00255 i = (uint32_t) S->N >> 2u; 00256 00257 /* pbuff initialized to the pInlineBuffer(now contains the output values) */ 00258 pbuff = pInlineBuffer; 00259 00260 /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */ 00261 do 00262 { 00263 /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ 00264 in = *pbuff; 00265 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); 00266 00267 in = *pbuff; 00268 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); 00269 00270 in = *pbuff; 00271 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); 00272 00273 in = *pbuff; 00274 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); 00275 00276 /* Decrement the loop counter */ 00277 i--; 00278 } while(i > 0u); 00279 00280 00281 #else 00282 00283 /* Run the below code for Cortex-M0 */ 00284 00285 /* Initializing the loop counter to N/2 */ 00286 i = (uint32_t) S->Nby2; 00287 00288 do 00289 { 00290 /* Re-ordering of even and odd elements */ 00291 /* pState[i] = pInlineBuffer[2*i] */ 00292 *pS1++ = *pbuff++; 00293 /* pState[N-i-1] = pInlineBuffer[2*i+1] */ 00294 *pS2-- = *pbuff++; 00295 00296 /* Decrement the loop counter */ 00297 i--; 00298 } while(i > 0u); 00299 00300 /* pbuff initialized to input buffer */ 00301 pbuff = pInlineBuffer; 00302 00303 /* pS1 initialized to pState */ 00304 pS1 = pState; 00305 00306 /* Initializing the loop counter */ 00307 i = (uint32_t) S->N; 00308 00309 do 00310 { 00311 /* Writing the re-ordered output back to inplace input buffer */ 00312 *pbuff++ = *pS1++; 00313 00314 /* Decrement the loop counter */ 00315 i--; 00316 } while(i > 0u); 00317 00318 00319 /* --------------------------------------------------------- 00320 * Step2: Calculate RFFT for N-point input 00321 * ---------------------------------------------------------- */ 00322 /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ 00323 arm_rfft_q15(S->pRfft, pInlineBuffer, pState); 00324 00325 /*---------------------------------------------------------------------- 00326 * Step3: Multiply the FFT output with the weights. 00327 *----------------------------------------------------------------------*/ 00328 arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N); 00329 00330 /* The output of complex multiplication is in 3.13 format. 00331 * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */ 00332 arm_shift_q15(pState, 2, pState, S->N * 2); 00333 00334 /* ----------- Post-processing ---------- */ 00335 /* DCT-IV can be obtained from DCT-II by the equation, 00336 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0) 00337 * Hence, Y4(0) = Y2(0)/2 */ 00338 /* Getting only real part from the output and Converting to DCT-IV */ 00339 00340 /* Initializing the loop counter */ 00341 i = ((uint32_t) S->N - 1u); 00342 00343 /* pbuff initialized to input buffer. */ 00344 pbuff = pInlineBuffer; 00345 00346 /* pS1 initialized to pState */ 00347 pS1 = pState; 00348 00349 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */ 00350 in = *pS1++ >> 1u; 00351 /* input buffer acts as inplace, so output values are stored in the input itself. */ 00352 *pbuff++ = in; 00353 00354 /* pState pointer is incremented twice as the real values are located alternatively in the array */ 00355 pS1++; 00356 00357 do 00358 { 00359 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ 00360 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ 00361 in = *pS1++ - in; 00362 *pbuff++ = in; 00363 /* points to the next real value */ 00364 pS1++; 00365 00366 /* Decrement the loop counter */ 00367 i--; 00368 } while(i > 0u); 00369 00370 /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ 00371 00372 /* Initializing the loop counter */ 00373 i = (uint32_t) S->N; 00374 00375 /* pbuff initialized to the pInlineBuffer(now contains the output values) */ 00376 pbuff = pInlineBuffer; 00377 00378 do 00379 { 00380 /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */ 00381 in = *pbuff; 00382 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); 00383 00384 /* Decrement the loop counter */ 00385 i--; 00386 } while(i > 0u); 00387 00388 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00389 00390 } 00391 00392 /** 00393 * @} end of DCT4_IDCT4 group 00394 */
Generated on Tue Jul 12 2022 13:15:24 by
