CMSIS DSP Library from CMSIS 2.0. See http://www.onarm.com/cmsis/ for full details

Dependents:   K22F_DSP_Matrix_least_square BNO055-ELEC3810 1BNO055 ECE4180Project--Slave2 ... more

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers arm_shift_q7.c Source File

arm_shift_q7.c

00001 /* ----------------------------------------------------------------------  
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.  
00003 *  
00004 * $Date:        29. November 2010  
00005 * $Revision:    V1.0.3  
00006 *  
00007 * Project:      CMSIS DSP Library  
00008 * Title:        arm_shift_q7.c  
00009 *  
00010 * Description:  Processing function for the Q7 Shifting  
00011 *  
00012 * Target Processor: Cortex-M4/Cortex-M3
00013 *  
00014 * Version 1.0.3 2010/11/29 
00015 *    Re-organized the CMSIS folders and updated documentation.  
00016 *   
00017 * Version 1.0.2 2010/11/11  
00018 *    Documentation updated.   
00019 *  
00020 * Version 1.0.1 2010/10/05   
00021 *    Production release and review comments incorporated.  
00022 *  
00023 * Version 1.0.0 2010/09/20   
00024 *    Production release and review comments incorporated.  
00025 *  
00026 * Version 0.0.7  2010/06/10   
00027 *    Misra-C changes done  
00028 * -------------------------------------------------------------------- */ 
00029  
00030 #include "arm_math.h" 
00031  
00032 /**  
00033  * @ingroup groupMath  
00034  */ 
00035  
00036 /**  
00037  * @addtogroup shift  
00038  * @{  
00039  */ 
00040  
00041  
00042 /**  
00043  * @brief  Shifts the elements of a Q7 vector a specified number of bits.  
00044  * @param  *pSrc points to the input vector  
00045  * @param  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.  
00046  * @param  *pDst points to the output vector  
00047  * @param  blockSize number of samples in the vector  
00048  * @return none.  
00049  *  
00050  * <b>Scaling and Overflow Behavior:</b>  
00051  * \par  
00052  * The function uses saturating arithmetic.  
00053  * Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.  
00054  */ 
00055  
00056 void arm_shift_q7( 
00057   q7_t * pSrc, 
00058   int8_t shiftBits, 
00059   q7_t * pDst, 
00060   uint32_t blockSize) 
00061 { 
00062   uint32_t blkCnt;                               /* loop counter */ 
00063   uint8_t sign;                                  /* Sign of shiftBits */ 
00064   q7_t in1;                                      /* Input value1 */ 
00065   q7_t in2;                                      /* Input value2 */ 
00066   q7_t in3;                                      /* Input value3 */ 
00067   q7_t in4;                                      /* Input value4 */ 
00068  
00069  
00070   /*loop Unrolling */ 
00071   blkCnt = blockSize >> 2u; 
00072  
00073   /* Getting the sign of shiftBits */ 
00074   sign = (shiftBits & 0x80); 
00075  
00076   /* If the shift value is positive then do right shift else left shift */ 
00077   if(sign == 0u) 
00078   { 
00079     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
00080      ** a second loop below computes the remaining 1 to 3 samples. */ 
00081     while(blkCnt > 0u) 
00082     { 
00083       /* C = A << shiftBits */ 
00084       /* Read 4 inputs */ 
00085       in1 = *pSrc++; 
00086       in2 = *pSrc++; 
00087       in3 = *pSrc++; 
00088       in4 = *pSrc++; 
00089  
00090       /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */ 
00091       *__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8), 
00092                                    __SSAT((in2 << shiftBits), 8), 
00093                                    __SSAT((in3 << shiftBits), 8), 
00094                                    __SSAT((in4 << shiftBits), 8)); 
00095  
00096       /* Decrement the loop counter */ 
00097       blkCnt--; 
00098     } 
00099  
00100     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
00101      ** No loop unrolling is used. */ 
00102     blkCnt = blockSize % 0x4u; 
00103  
00104     while(blkCnt > 0u) 
00105     { 
00106       /* C = A << shiftBits */ 
00107       /* Shift the input and then store the result in the destination buffer. */ 
00108       *pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8); 
00109  
00110       /* Decrement the loop counter */ 
00111       blkCnt--; 
00112     } 
00113   } 
00114   else 
00115   { 
00116     /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.  
00117      ** a second loop below computes the remaining 1 to 3 samples. */ 
00118     while(blkCnt > 0u) 
00119     { 
00120       /* C = A >> shiftBits */ 
00121       /* Read 4 inputs */ 
00122       in1 = *pSrc++; 
00123       in2 = *pSrc++; 
00124       in3 = *pSrc++; 
00125       in4 = *pSrc++; 
00126  
00127       /* Store the Shifted result in the destination buffer in single cycle by packing the outputs */ 
00128       *__SIMD32(pDst)++ = __PACKq7((in1 >> -shiftBits), (in2 >> -shiftBits), 
00129                                    (in3 >> -shiftBits), (in4 >> -shiftBits)); 
00130  
00131       /* Decrement the loop counter */ 
00132       blkCnt--; 
00133     } 
00134  
00135     /* If the blockSize is not a multiple of 4, compute any remaining output samples here.  
00136      ** No loop unrolling is used. */ 
00137     blkCnt = blockSize % 0x4u; 
00138  
00139     while(blkCnt > 0u) 
00140     { 
00141       /* C = A >> shiftBits */ 
00142       /* Shift the input and then store the result in the destination buffer. */ 
00143       *pDst++ = (*pSrc++ >> -shiftBits); 
00144  
00145       /* Decrement the loop counter */ 
00146       blkCnt--; 
00147     } 
00148   } 
00149 } 
00150  
00151 /**  
00152  * @} end of shift group  
00153  */