Aded CMSIS5 DSP and NN folder. Needs some work
Embed:
(wiki syntax)
Show/hide line numbers
arm_q7_to_q15_reordered_no_shift.c
00001 /* 00002 * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. 00003 * 00004 * SPDX-License-Identifier: Apache-2.0 00005 * 00006 * Licensed under the Apache License, Version 2.0 (the License); you may 00007 * not use this file except in compliance with the License. 00008 * You may obtain a copy of the License at 00009 * 00010 * www.apache.org/licenses/LICENSE-2.0 00011 * 00012 * Unless required by applicable law or agreed to in writing, software 00013 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 00014 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 * See the License for the specific language governing permissions and 00016 * limitations under the License. 00017 */ 00018 00019 /* ---------------------------------------------------------------------- 00020 * Project: CMSIS NN Library 00021 * Title: arm_q7_to_q15_reordered_no_shift.c 00022 * Description: Converts the elements of the Q7 vector to reordered Q15 vector without left-shift 00023 * 00024 * $Date: 17. January 2018 00025 * $Revision: V.1.0.0 00026 * 00027 * Target Processor: Cortex-M cores 00028 * 00029 * -------------------------------------------------------------------- */ 00030 00031 #include "arm_nnsupportfunctions.h" 00032 00033 /** 00034 * @ingroup groupSupport 00035 */ 00036 00037 /** 00038 * @addtogroup nndata_convert 00039 * @{ 00040 */ 00041 00042 /** 00043 * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift 00044 * @param[in] *pSrc points to the Q7 input vector 00045 * @param[out] *pDst points to the Q15 output vector 00046 * @param[in] blockSize length of the input vector 00047 * @return none. 00048 * 00049 * @details 00050 * 00051 * This function does the q7 to q15 expansion with re-ordering 00052 * 00053 * <pre> 00054 * | A1 | A2 | A3 | A4 | 00055 * 00056 * 0 7 8 15 16 23 24 31 00057 * </pre> 00058 * 00059 * is converted into: 00060 * 00061 * <pre> 00062 * | A1 | A3 | and | A2 | A4 | 00063 * 00064 * 0 15 16 31 0 15 16 31 00065 * </pre> 00066 * 00067 * 00068 * This looks strange but is natural considering how sign-extension is done at 00069 * assembly level. 00070 * 00071 * The expansion of other other oprand will follow the same rule so that the end 00072 * results are the same. 00073 * 00074 * The tail (i.e., last (N % 4) elements) will still be in original order. 00075 * 00076 */ 00077 00078 void arm_q7_to_q15_reordered_no_shift(const q7_t * pSrc, q15_t * pDst, uint32_t blockSize) 00079 { 00080 const q7_t *pIn = pSrc; /* Src pointer */ 00081 uint32_t blkCnt; /* loop counter */ 00082 00083 #ifndef ARM_MATH_CM0_FAMILY 00084 q31_t in; 00085 q31_t in1, in2; 00086 00087 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00088 00089 /*loop Unrolling */ 00090 blkCnt = blockSize >> 2u; 00091 00092 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00093 ** a second loop below computes the remaining 1 to 3 samples. */ 00094 while (blkCnt > 0u) 00095 { 00096 /* C = (q15_t) A << 8 */ 00097 /* convert from q7 to q15 and then store the results in the destination buffer */ 00098 in = *__SIMD32(pIn)++; 00099 00100 /* rotatate in by 8 and extend two q7_t values to q15_t values */ 00101 in1 = __SXTB16(__ROR(in, 8)); 00102 00103 /* extend remainig two q7_t values to q15_t values */ 00104 in2 = __SXTB16(in); 00105 00106 #ifndef ARM_MATH_BIG_ENDIAN 00107 *__SIMD32(pDst)++ = in2; 00108 *__SIMD32(pDst)++ = in1; 00109 #else 00110 *__SIMD32(pDst)++ = in1; 00111 *__SIMD32(pDst)++ = in2; 00112 #endif 00113 00114 /* Decrement the loop counter */ 00115 blkCnt--; 00116 } 00117 00118 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00119 ** No loop unrolling is used. */ 00120 blkCnt = blockSize % 0x4u; 00121 00122 #else 00123 00124 /* Run the below code for Cortex-M0 */ 00125 00126 /* Loop over blockSize number of values */ 00127 blkCnt = blockSize; 00128 00129 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00130 00131 while (blkCnt > 0u) 00132 { 00133 /* C = (q15_t) A << 8 */ 00134 /* convert from q7 to q15 and then store the results in the destination buffer */ 00135 *pDst++ = (q15_t) * pIn++; 00136 00137 /* Decrement the loop counter */ 00138 blkCnt--; 00139 } 00140 00141 } 00142 00143 /** 00144 * @} end of q7_to_x group 00145 */ 00146
Generated on Tue Jul 12 2022 16:47:27 by 1.7.2