Aded CMSIS5 DSP and NN folder. Needs some work

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_shift_q7.c Source File

arm_shift_q7.c

00001 /* ----------------------------------------------------------------------
00002  * Project:      CMSIS DSP Library
00003  * Title:        arm_shift_q7.c
00004  * Description:  Processing function for the Q7 Shifting
00005  *
00006  * $Date:        27. January 2017
00007  * $Revision:    V.1.5.1
00008  *
00009  * Target Processor: Cortex-M cores
00010  * -------------------------------------------------------------------- */
00011 /*
00012  * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
00013  *
00014  * SPDX-License-Identifier: Apache-2.0
00015  *
00016  * Licensed under the Apache License, Version 2.0 (the License); you may
00017  * not use this file except in compliance with the License.
00018  * You may obtain a copy of the License at
00019  *
00020  * www.apache.org/licenses/LICENSE-2.0
00021  *
00022  * Unless required by applicable law or agreed to in writing, software
00023  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
00024  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00025  * See the License for the specific language governing permissions and
00026  * limitations under the License.
00027  */
00028 
00029 #include "arm_math.h"
00030 
00031 /**
00032  * @ingroup groupMath
00033  */
00034 
00035 /**
00036  * @addtogroup shift
00037  * @{
00038  */
00039 
00040 
00041 /**
00042  * @brief  Shifts the elements of a Q7 vector a specified number of bits.
00043  * @param[in]  *pSrc points to the input vector
00044  * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
00045  * @param[out]  *pDst points to the output vector
00046  * @param[in]  blockSize number of samples in the vector
00047  * @return none.
00048  *
00049  * \par Conditions for optimum performance
00050  *  Input and output buffers should be aligned by 32-bit
00051  *
00052  *
00053  * <b>Scaling and Overflow Behavior:</b>
00054  * \par
00055  * The function uses saturating arithmetic.
00056  * Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
00057  */
00058 
00059 void arm_shift_q7(
00060   q7_t * pSrc,
00061   int8_t shiftBits,
00062   q7_t * pDst,
00063   uint32_t blockSize)
00064 {
00065   uint32_t blkCnt;                               /* loop counter */
00066   uint8_t sign;                                  /* Sign of shiftBits */
00067 
00068 #if defined (ARM_MATH_DSP)
00069 
00070 /* Run the below code for Cortex-M4 and Cortex-M3 */
00071   q7_t in1;                                      /* Input value1 */
00072   q7_t in2;                                      /* Input value2 */
00073   q7_t in3;                                      /* Input value3 */
00074   q7_t in4;                                      /* Input value4 */
00075 
00076 
00077   /*loop Unrolling */
00078   blkCnt = blockSize >> 2U;
00079 
00080   /* Getting the sign of shiftBits */
00081   sign = (shiftBits & 0x80);
00082 
00083   /* If the shift value is positive then do right shift else left shift */
00084   if (sign == 0U)
00085   {
00086     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
00087      ** a second loop below computes the remaining 1 to 3 samples. */
00088     while (blkCnt > 0U)
00089     {
00090       /* C = A << shiftBits */
00091       /* Read 4 inputs */
00092       in1 = *pSrc;
00093       in2 = *(pSrc + 1);
00094       in3 = *(pSrc + 2);
00095       in4 = *(pSrc + 3);
00096 
00097       /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
00098       *__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
00099                                    __SSAT((in2 << shiftBits), 8),
00100                                    __SSAT((in3 << shiftBits), 8),
00101                                    __SSAT((in4 << shiftBits), 8));
00102       /* Update source pointer to process next sampels */
00103       pSrc += 4U;
00104 
00105       /* Decrement the loop counter */
00106       blkCnt--;
00107     }
00108 
00109     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
00110      ** No loop unrolling is used. */
00111     blkCnt = blockSize % 0x4U;
00112 
00113     while (blkCnt > 0U)
00114     {
00115       /* C = A << shiftBits */
00116       /* Shift the input and then store the result in the destination buffer. */
00117       *pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8);
00118 
00119       /* Decrement the loop counter */
00120       blkCnt--;
00121     }
00122   }
00123   else
00124   {
00125     shiftBits = -shiftBits;
00126     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
00127      ** a second loop below computes the remaining 1 to 3 samples. */
00128     while (blkCnt > 0U)
00129     {
00130       /* C = A >> shiftBits */
00131       /* Read 4 inputs */
00132       in1 = *pSrc;
00133       in2 = *(pSrc + 1);
00134       in3 = *(pSrc + 2);
00135       in4 = *(pSrc + 3);
00136 
00137       /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
00138       *__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
00139                                    (in3 >> shiftBits), (in4 >> shiftBits));
00140 
00141 
00142       pSrc += 4U;
00143 
00144       /* Decrement the loop counter */
00145       blkCnt--;
00146     }
00147 
00148     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
00149      ** No loop unrolling is used. */
00150     blkCnt = blockSize % 0x4U;
00151 
00152     while (blkCnt > 0U)
00153     {
00154       /* C = A >> shiftBits */
00155       /* Shift the input and then store the result in the destination buffer. */
00156       in1 = *pSrc++;
00157       *pDst++ = (in1 >> shiftBits);
00158 
00159       /* Decrement the loop counter */
00160       blkCnt--;
00161     }
00162   }
00163 
00164 #else
00165 
00166   /* Run the below code for Cortex-M0 */
00167 
00168   /* Getting the sign of shiftBits */
00169   sign = (shiftBits & 0x80);
00170 
00171   /* If the shift value is positive then do right shift else left shift */
00172   if (sign == 0U)
00173   {
00174     /* Initialize blkCnt with number of samples */
00175     blkCnt = blockSize;
00176 
00177     while (blkCnt > 0U)
00178     {
00179       /* C = A << shiftBits */
00180       /* Shift the input and then store the result in the destination buffer. */
00181       *pDst++ = (q7_t) __SSAT(((q15_t) * pSrc++ << shiftBits), 8);
00182 
00183       /* Decrement the loop counter */
00184       blkCnt--;
00185     }
00186   }
00187   else
00188   {
00189     /* Initialize blkCnt with number of samples */
00190     blkCnt = blockSize;
00191 
00192     while (blkCnt > 0U)
00193     {
00194       /* C = A >> shiftBits */
00195       /* Shift the input and then store the result in the destination buffer. */
00196       *pDst++ = (*pSrc++ >> -shiftBits);
00197 
00198       /* Decrement the loop counter */
00199       blkCnt--;
00200     }
00201   }
00202 
00203 #endif /* #if defined (ARM_MATH_DSP) */
00204 }
00205 
00206 /**
00207  * @} end of shift group
00208  */
00209