The CMSIS DSP 5 library

Dependents:   Nucleo-Heart-Rate ejercicioVrms2 PROYECTOFINAL ejercicioVrms ... more

Committer:
xorjoep
Date:
Wed Jun 20 11:21:31 2018 +0000
Revision:
1:24714b45cd1b
The newest version of the CMSIS library

Who changed what in which revision?

UserRevisionLine numberNew contents of line
xorjoep 1:24714b45cd1b 1 /* ----------------------------------------------------------------------
xorjoep 1:24714b45cd1b 2 * Project: CMSIS DSP Library
xorjoep 1:24714b45cd1b 3 * Title: arm_dct4_q15.c
xorjoep 1:24714b45cd1b 4 * Description: Processing function of DCT4 & IDCT4 Q15
xorjoep 1:24714b45cd1b 5 *
xorjoep 1:24714b45cd1b 6 * $Date: 27. January 2017
xorjoep 1:24714b45cd1b 7 * $Revision: V.1.5.1
xorjoep 1:24714b45cd1b 8 *
xorjoep 1:24714b45cd1b 9 * Target Processor: Cortex-M cores
xorjoep 1:24714b45cd1b 10 * -------------------------------------------------------------------- */
xorjoep 1:24714b45cd1b 11 /*
xorjoep 1:24714b45cd1b 12 * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
xorjoep 1:24714b45cd1b 13 *
xorjoep 1:24714b45cd1b 14 * SPDX-License-Identifier: Apache-2.0
xorjoep 1:24714b45cd1b 15 *
xorjoep 1:24714b45cd1b 16 * Licensed under the Apache License, Version 2.0 (the License); you may
xorjoep 1:24714b45cd1b 17 * not use this file except in compliance with the License.
xorjoep 1:24714b45cd1b 18 * You may obtain a copy of the License at
xorjoep 1:24714b45cd1b 19 *
xorjoep 1:24714b45cd1b 20 * www.apache.org/licenses/LICENSE-2.0
xorjoep 1:24714b45cd1b 21 *
xorjoep 1:24714b45cd1b 22 * Unless required by applicable law or agreed to in writing, software
xorjoep 1:24714b45cd1b 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
xorjoep 1:24714b45cd1b 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
xorjoep 1:24714b45cd1b 25 * See the License for the specific language governing permissions and
xorjoep 1:24714b45cd1b 26 * limitations under the License.
xorjoep 1:24714b45cd1b 27 */
xorjoep 1:24714b45cd1b 28
xorjoep 1:24714b45cd1b 29 #include "arm_math.h"
xorjoep 1:24714b45cd1b 30
xorjoep 1:24714b45cd1b 31 /**
xorjoep 1:24714b45cd1b 32 * @addtogroup DCT4_IDCT4
xorjoep 1:24714b45cd1b 33 * @{
xorjoep 1:24714b45cd1b 34 */
xorjoep 1:24714b45cd1b 35
xorjoep 1:24714b45cd1b 36 /**
xorjoep 1:24714b45cd1b 37 * @brief Processing function for the Q15 DCT4/IDCT4.
xorjoep 1:24714b45cd1b 38 * @param[in] *S points to an instance of the Q15 DCT4 structure.
xorjoep 1:24714b45cd1b 39 * @param[in] *pState points to state buffer.
xorjoep 1:24714b45cd1b 40 * @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
xorjoep 1:24714b45cd1b 41 * @return none.
xorjoep 1:24714b45cd1b 42 *
xorjoep 1:24714b45cd1b 43 * \par Input an output formats:
xorjoep 1:24714b45cd1b 44 * Internally inputs are downscaled in the RFFT process function to avoid overflows.
xorjoep 1:24714b45cd1b 45 * Number of bits downscaled, depends on the size of the transform.
xorjoep 1:24714b45cd1b 46 * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
xorjoep 1:24714b45cd1b 47 *
xorjoep 1:24714b45cd1b 48 * \image html dct4FormatsQ15Table.gif
xorjoep 1:24714b45cd1b 49 */
xorjoep 1:24714b45cd1b 50
xorjoep 1:24714b45cd1b 51 void arm_dct4_q15(
xorjoep 1:24714b45cd1b 52 const arm_dct4_instance_q15 * S,
xorjoep 1:24714b45cd1b 53 q15_t * pState,
xorjoep 1:24714b45cd1b 54 q15_t * pInlineBuffer)
xorjoep 1:24714b45cd1b 55 {
xorjoep 1:24714b45cd1b 56 uint32_t i; /* Loop counter */
xorjoep 1:24714b45cd1b 57 q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */
xorjoep 1:24714b45cd1b 58 q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
xorjoep 1:24714b45cd1b 59 q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
xorjoep 1:24714b45cd1b 60 q15_t in; /* Temporary variable */
xorjoep 1:24714b45cd1b 61
xorjoep 1:24714b45cd1b 62
xorjoep 1:24714b45cd1b 63 /* DCT4 computation involves DCT2 (which is calculated using RFFT)
xorjoep 1:24714b45cd1b 64 * along with some pre-processing and post-processing.
xorjoep 1:24714b45cd1b 65 * Computational procedure is explained as follows:
xorjoep 1:24714b45cd1b 66 * (a) Pre-processing involves multiplying input with cos factor,
xorjoep 1:24714b45cd1b 67 * r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
xorjoep 1:24714b45cd1b 68 * where,
xorjoep 1:24714b45cd1b 69 * r(n) -- output of preprocessing
xorjoep 1:24714b45cd1b 70 * u(n) -- input to preprocessing(actual Source buffer)
xorjoep 1:24714b45cd1b 71 * (b) Calculation of DCT2 using FFT is divided into three steps:
xorjoep 1:24714b45cd1b 72 * Step1: Re-ordering of even and odd elements of input.
xorjoep 1:24714b45cd1b 73 * Step2: Calculating FFT of the re-ordered input.
xorjoep 1:24714b45cd1b 74 * Step3: Taking the real part of the product of FFT output and weights.
xorjoep 1:24714b45cd1b 75 * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
xorjoep 1:24714b45cd1b 76 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
xorjoep 1:24714b45cd1b 77 * where,
xorjoep 1:24714b45cd1b 78 * Y4 -- DCT4 output, Y2 -- DCT2 output
xorjoep 1:24714b45cd1b 79 * (d) Multiplying the output with the normalizing factor sqrt(2/N).
xorjoep 1:24714b45cd1b 80 */
xorjoep 1:24714b45cd1b 81
xorjoep 1:24714b45cd1b 82 /*-------- Pre-processing ------------*/
xorjoep 1:24714b45cd1b 83 /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
xorjoep 1:24714b45cd1b 84 arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N);
xorjoep 1:24714b45cd1b 85 arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N);
xorjoep 1:24714b45cd1b 86
xorjoep 1:24714b45cd1b 87 /* ----------------------------------------------------------------
xorjoep 1:24714b45cd1b 88 * Step1: Re-ordering of even and odd elements as
xorjoep 1:24714b45cd1b 89 * pState[i] = pInlineBuffer[2*i] and
xorjoep 1:24714b45cd1b 90 * pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
xorjoep 1:24714b45cd1b 91 ---------------------------------------------------------------------*/
xorjoep 1:24714b45cd1b 92
xorjoep 1:24714b45cd1b 93 /* pS1 initialized to pState */
xorjoep 1:24714b45cd1b 94 pS1 = pState;
xorjoep 1:24714b45cd1b 95
xorjoep 1:24714b45cd1b 96 /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
xorjoep 1:24714b45cd1b 97 pS2 = pState + (S->N - 1U);
xorjoep 1:24714b45cd1b 98
xorjoep 1:24714b45cd1b 99 /* pbuff initialized to input buffer */
xorjoep 1:24714b45cd1b 100 pbuff = pInlineBuffer;
xorjoep 1:24714b45cd1b 101
xorjoep 1:24714b45cd1b 102
xorjoep 1:24714b45cd1b 103 #if defined (ARM_MATH_DSP)
xorjoep 1:24714b45cd1b 104
xorjoep 1:24714b45cd1b 105 /* Run the below code for Cortex-M4 and Cortex-M3 */
xorjoep 1:24714b45cd1b 106
xorjoep 1:24714b45cd1b 107 /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
xorjoep 1:24714b45cd1b 108 i = (uint32_t) S->Nby2 >> 2U;
xorjoep 1:24714b45cd1b 109
xorjoep 1:24714b45cd1b 110 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
xorjoep 1:24714b45cd1b 111 ** a second loop below computes the remaining 1 to 3 samples. */
xorjoep 1:24714b45cd1b 112 do
xorjoep 1:24714b45cd1b 113 {
xorjoep 1:24714b45cd1b 114 /* Re-ordering of even and odd elements */
xorjoep 1:24714b45cd1b 115 /* pState[i] = pInlineBuffer[2*i] */
xorjoep 1:24714b45cd1b 116 *pS1++ = *pbuff++;
xorjoep 1:24714b45cd1b 117 /* pState[N-i-1] = pInlineBuffer[2*i+1] */
xorjoep 1:24714b45cd1b 118 *pS2-- = *pbuff++;
xorjoep 1:24714b45cd1b 119
xorjoep 1:24714b45cd1b 120 *pS1++ = *pbuff++;
xorjoep 1:24714b45cd1b 121 *pS2-- = *pbuff++;
xorjoep 1:24714b45cd1b 122
xorjoep 1:24714b45cd1b 123 *pS1++ = *pbuff++;
xorjoep 1:24714b45cd1b 124 *pS2-- = *pbuff++;
xorjoep 1:24714b45cd1b 125
xorjoep 1:24714b45cd1b 126 *pS1++ = *pbuff++;
xorjoep 1:24714b45cd1b 127 *pS2-- = *pbuff++;
xorjoep 1:24714b45cd1b 128
xorjoep 1:24714b45cd1b 129 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 130 i--;
xorjoep 1:24714b45cd1b 131 } while (i > 0U);
xorjoep 1:24714b45cd1b 132
xorjoep 1:24714b45cd1b 133 /* pbuff initialized to input buffer */
xorjoep 1:24714b45cd1b 134 pbuff = pInlineBuffer;
xorjoep 1:24714b45cd1b 135
xorjoep 1:24714b45cd1b 136 /* pS1 initialized to pState */
xorjoep 1:24714b45cd1b 137 pS1 = pState;
xorjoep 1:24714b45cd1b 138
xorjoep 1:24714b45cd1b 139 /* Initializing the loop counter to N/4 instead of N for loop unrolling */
xorjoep 1:24714b45cd1b 140 i = (uint32_t) S->N >> 2U;
xorjoep 1:24714b45cd1b 141
xorjoep 1:24714b45cd1b 142 /* Processing with loop unrolling 4 times as N is always multiple of 4.
xorjoep 1:24714b45cd1b 143 * Compute 4 outputs at a time */
xorjoep 1:24714b45cd1b 144 do
xorjoep 1:24714b45cd1b 145 {
xorjoep 1:24714b45cd1b 146 /* Writing the re-ordered output back to inplace input buffer */
xorjoep 1:24714b45cd1b 147 *pbuff++ = *pS1++;
xorjoep 1:24714b45cd1b 148 *pbuff++ = *pS1++;
xorjoep 1:24714b45cd1b 149 *pbuff++ = *pS1++;
xorjoep 1:24714b45cd1b 150 *pbuff++ = *pS1++;
xorjoep 1:24714b45cd1b 151
xorjoep 1:24714b45cd1b 152 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 153 i--;
xorjoep 1:24714b45cd1b 154 } while (i > 0U);
xorjoep 1:24714b45cd1b 155
xorjoep 1:24714b45cd1b 156
xorjoep 1:24714b45cd1b 157 /* ---------------------------------------------------------
xorjoep 1:24714b45cd1b 158 * Step2: Calculate RFFT for N-point input
xorjoep 1:24714b45cd1b 159 * ---------------------------------------------------------- */
xorjoep 1:24714b45cd1b 160 /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
xorjoep 1:24714b45cd1b 161 arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
xorjoep 1:24714b45cd1b 162
xorjoep 1:24714b45cd1b 163 /*----------------------------------------------------------------------
xorjoep 1:24714b45cd1b 164 * Step3: Multiply the FFT output with the weights.
xorjoep 1:24714b45cd1b 165 *----------------------------------------------------------------------*/
xorjoep 1:24714b45cd1b 166 arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
xorjoep 1:24714b45cd1b 167
xorjoep 1:24714b45cd1b 168 /* The output of complex multiplication is in 3.13 format.
xorjoep 1:24714b45cd1b 169 * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
xorjoep 1:24714b45cd1b 170 arm_shift_q15(pState, 2, pState, S->N * 2);
xorjoep 1:24714b45cd1b 171
xorjoep 1:24714b45cd1b 172 /* ----------- Post-processing ---------- */
xorjoep 1:24714b45cd1b 173 /* DCT-IV can be obtained from DCT-II by the equation,
xorjoep 1:24714b45cd1b 174 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
xorjoep 1:24714b45cd1b 175 * Hence, Y4(0) = Y2(0)/2 */
xorjoep 1:24714b45cd1b 176 /* Getting only real part from the output and Converting to DCT-IV */
xorjoep 1:24714b45cd1b 177
xorjoep 1:24714b45cd1b 178 /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
xorjoep 1:24714b45cd1b 179 i = ((uint32_t) S->N - 1U) >> 2U;
xorjoep 1:24714b45cd1b 180
xorjoep 1:24714b45cd1b 181 /* pbuff initialized to input buffer. */
xorjoep 1:24714b45cd1b 182 pbuff = pInlineBuffer;
xorjoep 1:24714b45cd1b 183
xorjoep 1:24714b45cd1b 184 /* pS1 initialized to pState */
xorjoep 1:24714b45cd1b 185 pS1 = pState;
xorjoep 1:24714b45cd1b 186
xorjoep 1:24714b45cd1b 187 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
xorjoep 1:24714b45cd1b 188 in = *pS1++ >> 1U;
xorjoep 1:24714b45cd1b 189 /* input buffer acts as inplace, so output values are stored in the input itself. */
xorjoep 1:24714b45cd1b 190 *pbuff++ = in;
xorjoep 1:24714b45cd1b 191
xorjoep 1:24714b45cd1b 192 /* pState pointer is incremented twice as the real values are located alternatively in the array */
xorjoep 1:24714b45cd1b 193 pS1++;
xorjoep 1:24714b45cd1b 194
xorjoep 1:24714b45cd1b 195 /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
xorjoep 1:24714b45cd1b 196 ** a second loop below computes the remaining 1 to 3 samples. */
xorjoep 1:24714b45cd1b 197 do
xorjoep 1:24714b45cd1b 198 {
xorjoep 1:24714b45cd1b 199 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
xorjoep 1:24714b45cd1b 200 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
xorjoep 1:24714b45cd1b 201 in = *pS1++ - in;
xorjoep 1:24714b45cd1b 202 *pbuff++ = in;
xorjoep 1:24714b45cd1b 203 /* points to the next real value */
xorjoep 1:24714b45cd1b 204 pS1++;
xorjoep 1:24714b45cd1b 205
xorjoep 1:24714b45cd1b 206 in = *pS1++ - in;
xorjoep 1:24714b45cd1b 207 *pbuff++ = in;
xorjoep 1:24714b45cd1b 208 pS1++;
xorjoep 1:24714b45cd1b 209
xorjoep 1:24714b45cd1b 210 in = *pS1++ - in;
xorjoep 1:24714b45cd1b 211 *pbuff++ = in;
xorjoep 1:24714b45cd1b 212 pS1++;
xorjoep 1:24714b45cd1b 213
xorjoep 1:24714b45cd1b 214 in = *pS1++ - in;
xorjoep 1:24714b45cd1b 215 *pbuff++ = in;
xorjoep 1:24714b45cd1b 216 pS1++;
xorjoep 1:24714b45cd1b 217
xorjoep 1:24714b45cd1b 218 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 219 i--;
xorjoep 1:24714b45cd1b 220 } while (i > 0U);
xorjoep 1:24714b45cd1b 221
xorjoep 1:24714b45cd1b 222 /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
xorjoep 1:24714b45cd1b 223 ** No loop unrolling is used. */
xorjoep 1:24714b45cd1b 224 i = ((uint32_t) S->N - 1U) % 0x4U;
xorjoep 1:24714b45cd1b 225
xorjoep 1:24714b45cd1b 226 while (i > 0U)
xorjoep 1:24714b45cd1b 227 {
xorjoep 1:24714b45cd1b 228 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
xorjoep 1:24714b45cd1b 229 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
xorjoep 1:24714b45cd1b 230 in = *pS1++ - in;
xorjoep 1:24714b45cd1b 231 *pbuff++ = in;
xorjoep 1:24714b45cd1b 232 /* points to the next real value */
xorjoep 1:24714b45cd1b 233 pS1++;
xorjoep 1:24714b45cd1b 234
xorjoep 1:24714b45cd1b 235 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 236 i--;
xorjoep 1:24714b45cd1b 237 }
xorjoep 1:24714b45cd1b 238
xorjoep 1:24714b45cd1b 239
xorjoep 1:24714b45cd1b 240 /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
xorjoep 1:24714b45cd1b 241
xorjoep 1:24714b45cd1b 242 /* Initializing the loop counter to N/4 instead of N for loop unrolling */
xorjoep 1:24714b45cd1b 243 i = (uint32_t) S->N >> 2U;
xorjoep 1:24714b45cd1b 244
xorjoep 1:24714b45cd1b 245 /* pbuff initialized to the pInlineBuffer(now contains the output values) */
xorjoep 1:24714b45cd1b 246 pbuff = pInlineBuffer;
xorjoep 1:24714b45cd1b 247
xorjoep 1:24714b45cd1b 248 /* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
xorjoep 1:24714b45cd1b 249 do
xorjoep 1:24714b45cd1b 250 {
xorjoep 1:24714b45cd1b 251 /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
xorjoep 1:24714b45cd1b 252 in = *pbuff;
xorjoep 1:24714b45cd1b 253 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
xorjoep 1:24714b45cd1b 254
xorjoep 1:24714b45cd1b 255 in = *pbuff;
xorjoep 1:24714b45cd1b 256 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
xorjoep 1:24714b45cd1b 257
xorjoep 1:24714b45cd1b 258 in = *pbuff;
xorjoep 1:24714b45cd1b 259 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
xorjoep 1:24714b45cd1b 260
xorjoep 1:24714b45cd1b 261 in = *pbuff;
xorjoep 1:24714b45cd1b 262 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
xorjoep 1:24714b45cd1b 263
xorjoep 1:24714b45cd1b 264 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 265 i--;
xorjoep 1:24714b45cd1b 266 } while (i > 0U);
xorjoep 1:24714b45cd1b 267
xorjoep 1:24714b45cd1b 268
xorjoep 1:24714b45cd1b 269 #else
xorjoep 1:24714b45cd1b 270
xorjoep 1:24714b45cd1b 271 /* Run the below code for Cortex-M0 */
xorjoep 1:24714b45cd1b 272
xorjoep 1:24714b45cd1b 273 /* Initializing the loop counter to N/2 */
xorjoep 1:24714b45cd1b 274 i = (uint32_t) S->Nby2;
xorjoep 1:24714b45cd1b 275
xorjoep 1:24714b45cd1b 276 do
xorjoep 1:24714b45cd1b 277 {
xorjoep 1:24714b45cd1b 278 /* Re-ordering of even and odd elements */
xorjoep 1:24714b45cd1b 279 /* pState[i] = pInlineBuffer[2*i] */
xorjoep 1:24714b45cd1b 280 *pS1++ = *pbuff++;
xorjoep 1:24714b45cd1b 281 /* pState[N-i-1] = pInlineBuffer[2*i+1] */
xorjoep 1:24714b45cd1b 282 *pS2-- = *pbuff++;
xorjoep 1:24714b45cd1b 283
xorjoep 1:24714b45cd1b 284 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 285 i--;
xorjoep 1:24714b45cd1b 286 } while (i > 0U);
xorjoep 1:24714b45cd1b 287
xorjoep 1:24714b45cd1b 288 /* pbuff initialized to input buffer */
xorjoep 1:24714b45cd1b 289 pbuff = pInlineBuffer;
xorjoep 1:24714b45cd1b 290
xorjoep 1:24714b45cd1b 291 /* pS1 initialized to pState */
xorjoep 1:24714b45cd1b 292 pS1 = pState;
xorjoep 1:24714b45cd1b 293
xorjoep 1:24714b45cd1b 294 /* Initializing the loop counter */
xorjoep 1:24714b45cd1b 295 i = (uint32_t) S->N;
xorjoep 1:24714b45cd1b 296
xorjoep 1:24714b45cd1b 297 do
xorjoep 1:24714b45cd1b 298 {
xorjoep 1:24714b45cd1b 299 /* Writing the re-ordered output back to inplace input buffer */
xorjoep 1:24714b45cd1b 300 *pbuff++ = *pS1++;
xorjoep 1:24714b45cd1b 301
xorjoep 1:24714b45cd1b 302 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 303 i--;
xorjoep 1:24714b45cd1b 304 } while (i > 0U);
xorjoep 1:24714b45cd1b 305
xorjoep 1:24714b45cd1b 306
xorjoep 1:24714b45cd1b 307 /* ---------------------------------------------------------
xorjoep 1:24714b45cd1b 308 * Step2: Calculate RFFT for N-point input
xorjoep 1:24714b45cd1b 309 * ---------------------------------------------------------- */
xorjoep 1:24714b45cd1b 310 /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
xorjoep 1:24714b45cd1b 311 arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
xorjoep 1:24714b45cd1b 312
xorjoep 1:24714b45cd1b 313 /*----------------------------------------------------------------------
xorjoep 1:24714b45cd1b 314 * Step3: Multiply the FFT output with the weights.
xorjoep 1:24714b45cd1b 315 *----------------------------------------------------------------------*/
xorjoep 1:24714b45cd1b 316 arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
xorjoep 1:24714b45cd1b 317
xorjoep 1:24714b45cd1b 318 /* The output of complex multiplication is in 3.13 format.
xorjoep 1:24714b45cd1b 319 * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
xorjoep 1:24714b45cd1b 320 arm_shift_q15(pState, 2, pState, S->N * 2);
xorjoep 1:24714b45cd1b 321
xorjoep 1:24714b45cd1b 322 /* ----------- Post-processing ---------- */
xorjoep 1:24714b45cd1b 323 /* DCT-IV can be obtained from DCT-II by the equation,
xorjoep 1:24714b45cd1b 324 * Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
xorjoep 1:24714b45cd1b 325 * Hence, Y4(0) = Y2(0)/2 */
xorjoep 1:24714b45cd1b 326 /* Getting only real part from the output and Converting to DCT-IV */
xorjoep 1:24714b45cd1b 327
xorjoep 1:24714b45cd1b 328 /* Initializing the loop counter */
xorjoep 1:24714b45cd1b 329 i = ((uint32_t) S->N - 1U);
xorjoep 1:24714b45cd1b 330
xorjoep 1:24714b45cd1b 331 /* pbuff initialized to input buffer. */
xorjoep 1:24714b45cd1b 332 pbuff = pInlineBuffer;
xorjoep 1:24714b45cd1b 333
xorjoep 1:24714b45cd1b 334 /* pS1 initialized to pState */
xorjoep 1:24714b45cd1b 335 pS1 = pState;
xorjoep 1:24714b45cd1b 336
xorjoep 1:24714b45cd1b 337 /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
xorjoep 1:24714b45cd1b 338 in = *pS1++ >> 1U;
xorjoep 1:24714b45cd1b 339 /* input buffer acts as inplace, so output values are stored in the input itself. */
xorjoep 1:24714b45cd1b 340 *pbuff++ = in;
xorjoep 1:24714b45cd1b 341
xorjoep 1:24714b45cd1b 342 /* pState pointer is incremented twice as the real values are located alternatively in the array */
xorjoep 1:24714b45cd1b 343 pS1++;
xorjoep 1:24714b45cd1b 344
xorjoep 1:24714b45cd1b 345 do
xorjoep 1:24714b45cd1b 346 {
xorjoep 1:24714b45cd1b 347 /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
xorjoep 1:24714b45cd1b 348 /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
xorjoep 1:24714b45cd1b 349 in = *pS1++ - in;
xorjoep 1:24714b45cd1b 350 *pbuff++ = in;
xorjoep 1:24714b45cd1b 351 /* points to the next real value */
xorjoep 1:24714b45cd1b 352 pS1++;
xorjoep 1:24714b45cd1b 353
xorjoep 1:24714b45cd1b 354 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 355 i--;
xorjoep 1:24714b45cd1b 356 } while (i > 0U);
xorjoep 1:24714b45cd1b 357
xorjoep 1:24714b45cd1b 358 /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
xorjoep 1:24714b45cd1b 359
xorjoep 1:24714b45cd1b 360 /* Initializing the loop counter */
xorjoep 1:24714b45cd1b 361 i = (uint32_t) S->N;
xorjoep 1:24714b45cd1b 362
xorjoep 1:24714b45cd1b 363 /* pbuff initialized to the pInlineBuffer(now contains the output values) */
xorjoep 1:24714b45cd1b 364 pbuff = pInlineBuffer;
xorjoep 1:24714b45cd1b 365
xorjoep 1:24714b45cd1b 366 do
xorjoep 1:24714b45cd1b 367 {
xorjoep 1:24714b45cd1b 368 /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
xorjoep 1:24714b45cd1b 369 in = *pbuff;
xorjoep 1:24714b45cd1b 370 *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
xorjoep 1:24714b45cd1b 371
xorjoep 1:24714b45cd1b 372 /* Decrement the loop counter */
xorjoep 1:24714b45cd1b 373 i--;
xorjoep 1:24714b45cd1b 374 } while (i > 0U);
xorjoep 1:24714b45cd1b 375
xorjoep 1:24714b45cd1b 376 #endif /* #if defined (ARM_MATH_DSP) */
xorjoep 1:24714b45cd1b 377
xorjoep 1:24714b45cd1b 378 }
xorjoep 1:24714b45cd1b 379
xorjoep 1:24714b45cd1b 380 /**
xorjoep 1:24714b45cd1b 381 * @} end of DCT4_IDCT4 group
xorjoep 1:24714b45cd1b 382 */