Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dsp by
arm_mat_scale_q15.c
00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010-2013 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 17. January 2013 00005 * $Revision: V1.4.1 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_mat_scale_q15.c 00009 * 00010 * Description: Multiplies a Q15 matrix by a scalar. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Redistribution and use in source and binary forms, with or without 00015 * modification, are permitted provided that the following conditions 00016 * are met: 00017 * - Redistributions of source code must retain the above copyright 00018 * notice, this list of conditions and the following disclaimer. 00019 * - Redistributions in binary form must reproduce the above copyright 00020 * notice, this list of conditions and the following disclaimer in 00021 * the documentation and/or other materials provided with the 00022 * distribution. 00023 * - Neither the name of ARM LIMITED nor the names of its contributors 00024 * may be used to endorse or promote products derived from this 00025 * software without specific prior written permission. 00026 * 00027 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00028 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00029 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 00030 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 00031 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 00032 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 00033 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00034 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00035 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00036 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 00037 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00038 * POSSIBILITY OF SUCH DAMAGE. 00039 * -------------------------------------------------------------------- */ 00040 00041 #include "arm_math.h" 00042 00043 /** 00044 * @ingroup groupMatrix 00045 */ 00046 00047 /** 00048 * @addtogroup MatrixScale 00049 * @{ 00050 */ 00051 00052 /** 00053 * @brief Q15 matrix scaling. 00054 * @param[in] *pSrc points to input matrix 00055 * @param[in] scaleFract fractional portion of the scale factor 00056 * @param[in] shift number of bits to shift the result by 00057 * @param[out] *pDst points to output matrix structure 00058 * @return The function returns either 00059 * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking. 00060 * 00061 * @details 00062 * <b>Scaling and Overflow Behavior:</b> 00063 * \par 00064 * The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format. 00065 * These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format. 00066 */ 00067 00068 arm_status arm_mat_scale_q15( 00069 const arm_matrix_instance_q15 * pSrc, 00070 q15_t scaleFract, 00071 int32_t shift, 00072 arm_matrix_instance_q15 * pDst) 00073 { 00074 q15_t *pIn = pSrc->pData; /* input data matrix pointer */ 00075 q15_t *pOut = pDst->pData; /* output data matrix pointer */ 00076 uint32_t numSamples; /* total number of elements in the matrix */ 00077 int32_t totShift = 15 - shift; /* total shift to apply after scaling */ 00078 uint32_t blkCnt; /* loop counters */ 00079 arm_status status; /* status of matrix scaling */ 00080 00081 #ifndef ARM_MATH_CM0_FAMILY 00082 00083 q15_t in1, in2, in3, in4; 00084 q31_t out1, out2, out3, out4; 00085 q31_t inA1, inA2; 00086 00087 #endif // #ifndef ARM_MATH_CM0_FAMILY 00088 00089 #ifdef ARM_MATH_MATRIX_CHECK 00090 /* Check for matrix mismatch */ 00091 if((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols)) 00092 { 00093 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00094 status = ARM_MATH_SIZE_MISMATCH; 00095 } 00096 else 00097 #endif // #ifdef ARM_MATH_MATRIX_CHECK 00098 { 00099 /* Total number of samples in the input matrix */ 00100 numSamples = (uint32_t) pSrc->numRows * pSrc->numCols; 00101 00102 #ifndef ARM_MATH_CM0_FAMILY 00103 00104 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00105 /* Loop Unrolling */ 00106 blkCnt = numSamples >> 2; 00107 00108 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00109 ** a second loop below computes the remaining 1 to 3 samples. */ 00110 while(blkCnt > 0u) 00111 { 00112 /* C(m,n) = A(m,n) * k */ 00113 /* Scale, saturate and then store the results in the destination buffer. */ 00114 /* Reading 2 inputs from memory */ 00115 inA1 = _SIMD32_OFFSET(pIn); 00116 inA2 = _SIMD32_OFFSET(pIn + 2); 00117 00118 /* C = A * scale */ 00119 /* Scale the inputs and then store the 2 results in the destination buffer 00120 * in single cycle by packing the outputs */ 00121 out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract); 00122 out2 = (q31_t) ((q15_t) inA1 * scaleFract); 00123 out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract); 00124 out4 = (q31_t) ((q15_t) inA2 * scaleFract); 00125 00126 out1 = out1 >> totShift; 00127 inA1 = _SIMD32_OFFSET(pIn + 4); 00128 out2 = out2 >> totShift; 00129 inA2 = _SIMD32_OFFSET(pIn + 6); 00130 out3 = out3 >> totShift; 00131 out4 = out4 >> totShift; 00132 00133 in1 = (q15_t) (__SSAT(out1, 16)); 00134 in2 = (q15_t) (__SSAT(out2, 16)); 00135 in3 = (q15_t) (__SSAT(out3, 16)); 00136 in4 = (q15_t) (__SSAT(out4, 16)); 00137 00138 _SIMD32_OFFSET(pOut) = __PKHBT(in2, in1, 16); 00139 _SIMD32_OFFSET(pOut + 2) = __PKHBT(in4, in3, 16); 00140 00141 /* update pointers to process next sampels */ 00142 pIn += 4u; 00143 pOut += 4u; 00144 00145 00146 /* Decrement the numSamples loop counter */ 00147 blkCnt--; 00148 } 00149 00150 /* If the numSamples is not a multiple of 4, compute any remaining output samples here. 00151 ** No loop unrolling is used. */ 00152 blkCnt = numSamples % 0x4u; 00153 00154 #else 00155 00156 /* Run the below code for Cortex-M0 */ 00157 00158 /* Initialize blkCnt with number of samples */ 00159 blkCnt = numSamples; 00160 00161 #endif /* #ifndef ARM_MATH_CM0_FAMILY */ 00162 00163 while(blkCnt > 0u) 00164 { 00165 /* C(m,n) = A(m,n) * k */ 00166 /* Scale, saturate and then store the results in the destination buffer. */ 00167 *pOut++ = 00168 (q15_t) (__SSAT(((q31_t) (*pIn++) * scaleFract) >> totShift, 16)); 00169 00170 /* Decrement the numSamples loop counter */ 00171 blkCnt--; 00172 } 00173 /* Set status as ARM_MATH_SUCCESS */ 00174 status = ARM_MATH_SUCCESS; 00175 } 00176 00177 /* Return to application */ 00178 return (status); 00179 } 00180 00181 /** 00182 * @} end of MatrixScale group 00183 */
Generated on Tue Jul 12 2022 18:44:09 by
1.7.2
