Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of mbed-dev by
Diff: cmsis/arm_math.h
- Revision:
- 167:e84263d55307
- Parent:
- 149:156823d33999
--- a/cmsis/arm_math.h	Thu Jun 08 15:02:37 2017 +0100
+++ b/cmsis/arm_math.h	Wed Jun 21 17:46:44 2017 +0100
@@ -1,42 +1,30 @@
 /* ----------------------------------------------------------------------
-* Copyright (C) 2010-2015 ARM Limited. All rights reserved.
-*
-* $Date:        19. March 2015
-* $Revision: 	V.1.4.5
-*
-* Project: 	    CMSIS DSP Library
-* Title:	    arm_math.h
-*
-* Description:	Public header file for CMSIS DSP Library
-*
-* Target Processor: Cortex-M7/Cortex-M4/Cortex-M3/Cortex-M0
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*   - Redistributions of source code must retain the above copyright
-*     notice, this list of conditions and the following disclaimer.
-*   - Redistributions in binary form must reproduce the above copyright
-*     notice, this list of conditions and the following disclaimer in
-*     the documentation and/or other materials provided with the
-*     distribution.
-*   - Neither the name of ARM LIMITED nor the names of its contributors
-*     may be used to endorse or promote products derived from this
-*     software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-* POSSIBILITY OF SUCH DAMAGE.
+ * Project:      CMSIS DSP Library
+ * Title:        arm_math.h
+ * Description:  Public header file for CMSIS DSP Library
+ *
+ * $Date:        27. January 2017
+ * $Revision:    V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
  * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 /**
    \mainpage CMSIS DSP Software Library
@@ -66,26 +54,34 @@
    * ------------
    *
    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
-   * - arm_cortexM7lfdp_math.lib (Little endian and Double Precision Floating Point Unit on Cortex-M7)
-   * - arm_cortexM7bfdp_math.lib (Big endian and Double Precision Floating Point Unit on Cortex-M7)
-   * - arm_cortexM7lfsp_math.lib (Little endian and Single Precision Floating Point Unit on Cortex-M7)
-   * - arm_cortexM7bfsp_math.lib (Big endian and Single Precision Floating Point Unit on Cortex-M7)
-   * - arm_cortexM7l_math.lib (Little endian on Cortex-M7)
-   * - arm_cortexM7b_math.lib (Big endian on Cortex-M7)
-   * - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
-   * - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
-   * - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
-   * - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
-   * - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
-   * - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
-   * - arm_cortexM0l_math.lib (Little endian on Cortex-M0 / CortexM0+)
-   * - arm_cortexM0b_math.lib (Big endian on Cortex-M0 / CortexM0+)
+   * - arm_cortexM7lfdp_math.lib (Cortex-M7, Little endian, Double Precision Floating Point Unit)
+   * - arm_cortexM7bfdp_math.lib (Cortex-M7, Big endian, Double Precision Floating Point Unit)
+   * - arm_cortexM7lfsp_math.lib (Cortex-M7, Little endian, Single Precision Floating Point Unit)
+   * - arm_cortexM7bfsp_math.lib (Cortex-M7, Big endian and Single Precision Floating Point Unit on)
+   * - arm_cortexM7l_math.lib (Cortex-M7, Little endian)
+   * - arm_cortexM7b_math.lib (Cortex-M7, Big endian)
+   * - arm_cortexM4lf_math.lib (Cortex-M4, Little endian, Floating Point Unit)
+   * - arm_cortexM4bf_math.lib (Cortex-M4, Big endian, Floating Point Unit)
+   * - arm_cortexM4l_math.lib (Cortex-M4, Little endian)
+   * - arm_cortexM4b_math.lib (Cortex-M4, Big endian)
+   * - arm_cortexM3l_math.lib (Cortex-M3, Little endian)
+   * - arm_cortexM3b_math.lib (Cortex-M3, Big endian)
+   * - arm_cortexM0l_math.lib (Cortex-M0 / Cortex-M0+, Little endian)
+   * - arm_cortexM0b_math.lib (Cortex-M0 / Cortex-M0+, Big endian)
+   * - arm_ARMv8MBLl_math.lib (ARMv8M Baseline, Little endian)
+   * - arm_ARMv8MMLl_math.lib (ARMv8M Mainline, Little endian)
+   * - arm_ARMv8MMLlfsp_math.lib (ARMv8M Mainline, Little endian, Single Precision Floating Point Unit)
+   * - arm_ARMv8MMLld_math.lib (ARMv8M Mainline, Little endian, DSP instructions)
+   * - arm_ARMv8MMLldfsp_math.lib (ARMv8M Mainline, Little endian, DSP instructions, Single Precision Floating Point Unit)
    *
    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
-   * public header file <code> arm_math.h</code> for Cortex-M7/M4/M3/M0/M0+ with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
+   * public header file <code> arm_math.h</code> for Cortex-M cores with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
    * Define the appropriate pre processor MACRO ARM_MATH_CM7 or ARM_MATH_CM4 or  ARM_MATH_CM3 or
    * ARM_MATH_CM0 or ARM_MATH_CM0PLUS depending on the target processor in the application.
+   * For ARMv8M cores define pre processor MACRO ARM_MATH_ARMV8MBL or ARM_MATH_ARMV8MML.
+   * Set Pre processor MACRO __DSP_PRESENT if ARMv8M Mainline core supports DSP instructions.
+   * 
    *
    * Examples
    * --------
@@ -134,14 +130,23 @@
    * and ARM_MATH_CM0 for building library on Cortex-M0 target, ARM_MATH_CM0PLUS for building library on Cortex-M0+ target, and
    * ARM_MATH_CM7 for building the library on cortex-M7.
    *
+   * - ARM_MATH_ARMV8MxL:
+   *
+   * Define macro ARM_MATH_ARMV8MBL for building the library on ARMv8M Baseline target, ARM_MATH_ARMV8MBL for building library
+   * on ARMv8M Mainline target.
+   *
    * - __FPU_PRESENT:
    *
-   * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
+   * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for floating point libraries.
+   *
+   * - __DSP_PRESENT:
+   *
+   * Initialize macro __DSP_PRESENT = 1 when ARMv8M Mainline core supports DSP instructions.
    *
    * <hr>
    * CMSIS-DSP in ARM::CMSIS Pack
    * -----------------------------
-   * 
+   *
    * The following files relevant to CMSIS-DSP are present in the <b>ARM::CMSIS</b> Pack directories:
    * |File/Folder                   |Content                                                                 |
    * |------------------------------|------------------------------------------------------------------------|
@@ -149,7 +154,7 @@
    * |\b CMSIS\\DSP_Lib             | Software license agreement (license.txt)                               |
    * |\b CMSIS\\DSP_Lib\\Examples   | Example projects demonstrating the usage of the library functions      |
    * |\b CMSIS\\DSP_Lib\\Source     | Source files for rebuilding the library                                |
-   * 
+   *
    * <hr>
    * Revision History of CMSIS-DSP
    * ------------
@@ -288,28 +293,62 @@
 #ifndef _ARM_MATH_H
 #define _ARM_MATH_H
 
+/* Compiler specific diagnostic adjustment */
+#if   defined ( __CC_ARM )
+
+#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+
+#elif defined ( __GNUC__ )
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wsign-conversion"
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
+
+#elif defined ( __ICCARM__ )
+
+#elif defined ( __TI_ARM__ )
+
+#elif defined ( __CSMC__ )
+
+#elif defined ( __TASKING__ )
+
+#else
+  #error Unknown compiler
+#endif
+
+
 #define __CMSIS_GENERIC         /* disable NVIC and Systick functions */
 
 #if defined(ARM_MATH_CM7)
   #include "core_cm7.h"
+  #define ARM_MATH_DSP
 #elif defined (ARM_MATH_CM4)
   #include "core_cm4.h"
+  #define ARM_MATH_DSP
 #elif defined (ARM_MATH_CM3)
   #include "core_cm3.h"
 #elif defined (ARM_MATH_CM0)
   #include "core_cm0.h"
-#define ARM_MATH_CM0_FAMILY
-  #elif defined (ARM_MATH_CM0PLUS)
-#include "core_cm0plus.h"
+  #define ARM_MATH_CM0_FAMILY
+#elif defined (ARM_MATH_CM0PLUS)
+  #include "core_cm0plus.h"
+  #define ARM_MATH_CM0_FAMILY
+#elif defined (ARM_MATH_ARMV8MBL)
+  #include "core_armv8mbl.h"
   #define ARM_MATH_CM0_FAMILY
+#elif defined (ARM_MATH_ARMV8MML)
+  #include "core_armv8mml.h"
+  #if (defined (__DSP_PRESENT) && (__DSP_PRESENT == 1))
+    #define ARM_MATH_DSP
+  #endif
 #else
-  #error "Define according the used Cortex core ARM_MATH_CM7, ARM_MATH_CM4, ARM_MATH_CM3, ARM_MATH_CM0PLUS or ARM_MATH_CM0"
+  #error "Define according the used Cortex core ARM_MATH_CM7, ARM_MATH_CM4, ARM_MATH_CM3, ARM_MATH_CM0PLUS, ARM_MATH_CM0, ARM_MATH_ARMV8MBL, ARM_MATH_ARMV8MML"
 #endif
 
 #undef  __CMSIS_GENERIC         /* enable NVIC and Systick functions */
 #include "string.h"
 #include "math.h"
-#ifdef	__cplusplus
+#ifdef   __cplusplus
 extern "C"
 {
 #endif
@@ -319,11 +358,11 @@
    * @brief Macros required for reciprocal calculation in Normalized LMS
    */
 
-#define DELTA_Q31 			(0x100)
-#define DELTA_Q15 			0x5
-#define INDEX_MASK 			0x0000003F
+#define DELTA_Q31          (0x100)
+#define DELTA_Q15          0x5
+#define INDEX_MASK         0x0000003F
 #ifndef PI
-#define PI					3.14159265358979f
+  #define PI               3.14159265358979f
 #endif
 
   /**
@@ -334,16 +373,15 @@
 #define FAST_MATH_Q31_SHIFT   (32 - 10)
 #define FAST_MATH_Q15_SHIFT   (16 - 10)
 #define CONTROLLER_Q31_SHIFT  (32 - 9)
-#define TABLE_SIZE  256
-#define TABLE_SPACING_Q31	   0x400000
-#define TABLE_SPACING_Q15	   0x80
+#define TABLE_SPACING_Q31     0x400000
+#define TABLE_SPACING_Q15     0x80
 
   /**
    * @brief Macros required for SINE and COSINE Controller functions
    */
   /* 1.31(q31) Fixed value of 2/360 */
   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
-#define INPUT_SPACING			0xB60B61
+#define INPUT_SPACING         0xB60B61
 
   /**
    * @brief Macro for Unaligned Support
@@ -356,7 +394,7 @@
   #else
     #define ALIGN4 __align(4)
   #endif
-#endif	/*	#ifndef UNALIGNED_SUPPORT_DISABLE	*/
+#endif   /* #ifndef UNALIGNED_SUPPORT_DISABLE */
 
   /**
    * @brief Error status returned by some functions in the library.
@@ -406,59 +444,78 @@
   /**
    * @brief definition to read/write two 16 bit values.
    */
-#if defined __CC_ARM
+#if   defined ( __CC_ARM )
   #define __SIMD32_TYPE int32_t __packed
   #define CMSIS_UNUSED __attribute__((unused))
-#elif defined __ICCARM__
+  #define CMSIS_INLINE __attribute__((always_inline))
+
+#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+  #define __SIMD32_TYPE int32_t
+  #define CMSIS_UNUSED __attribute__((unused))
+  #define CMSIS_INLINE __attribute__((always_inline))
+
+#elif defined ( __GNUC__ )
+  #define __SIMD32_TYPE int32_t
+  #define CMSIS_UNUSED __attribute__((unused))
+  #define CMSIS_INLINE __attribute__((always_inline))
+
+#elif defined ( __ICCARM__ )
   #define __SIMD32_TYPE int32_t __packed
   #define CMSIS_UNUSED
-#elif defined __GNUC__
+  #define CMSIS_INLINE
+
+#elif defined ( __TI_ARM__ )
   #define __SIMD32_TYPE int32_t
   #define CMSIS_UNUSED __attribute__((unused))
-#elif defined __CSMC__			/* Cosmic */
+  #define CMSIS_INLINE
+
+#elif defined ( __CSMC__ )
   #define __SIMD32_TYPE int32_t
   #define CMSIS_UNUSED
-#elif defined __TASKING__
+  #define CMSIS_INLINE
+
+#elif defined ( __TASKING__ )
   #define __SIMD32_TYPE __unaligned int32_t
   #define CMSIS_UNUSED
+  #define CMSIS_INLINE
+
 #else
   #error Unknown compiler
 #endif
 
-#define __SIMD32(addr)  (*(__SIMD32_TYPE **) & (addr))
+#define __SIMD32(addr)        (*(__SIMD32_TYPE **) & (addr))
 #define __SIMD32_CONST(addr)  ((__SIMD32_TYPE *)(addr))
-
 #define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE *)  (addr))
-
-#define __SIMD64(addr)  (*(int64_t **) & (addr))
-
-#if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
+#define __SIMD64(addr)        (*(int64_t **) & (addr))
+
+/* #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
+#if !defined (ARM_MATH_DSP)
   /**
    * @brief definition to pack two 16 bit values.
    */
-#define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
-                                         (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
-#define __PKHTB(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0xFFFF0000) | \
-                                         (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
-
-#endif
-
+#define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0x0000FFFF) | \
+                                    (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
+#define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) <<    0) & (int32_t)0xFFFF0000) | \
+                                    (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
+
+/* #endif // defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
+#endif /* !defined (ARM_MATH_DSP) */
 
    /**
    * @brief definition to pack four 8 bit values.
    */
 #ifndef ARM_MATH_BIG_ENDIAN
 
-#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) |	\
-                                (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) |	\
-							    (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |	\
-							    (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
+#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) | \
+                                (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) | \
+                                (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \
+                                (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
 #else
 
-#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) |	\
-                                (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) |	\
-							    (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) |	\
-							    (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
+#define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) | \
+                                (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) | \
+                                (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \
+                                (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
 
 #endif
 
@@ -466,7 +523,7 @@
   /**
    * @brief Clips Q63 to Q31 values.
    */
-  static __INLINE q31_t clip_q63_to_q31(
+  CMSIS_INLINE __STATIC_INLINE q31_t clip_q63_to_q31(
   q63_t x)
   {
     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
@@ -476,7 +533,7 @@
   /**
    * @brief Clips Q63 to Q15 values.
    */
-  static __INLINE q15_t clip_q63_to_q15(
+  CMSIS_INLINE __STATIC_INLINE q15_t clip_q63_to_q15(
   q63_t x)
   {
     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
@@ -486,7 +543,7 @@
   /**
    * @brief Clips Q31 to Q7 values.
    */
-  static __INLINE q7_t clip_q31_to_q7(
+  CMSIS_INLINE __STATIC_INLINE q7_t clip_q31_to_q7(
   q31_t x)
   {
     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
@@ -496,7 +553,7 @@
   /**
    * @brief Clips Q31 to Q15 values.
    */
-  static __INLINE q15_t clip_q31_to_q15(
+  CMSIS_INLINE __STATIC_INLINE q15_t clip_q31_to_q15(
   q31_t x)
   {
     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
@@ -507,7 +564,7 @@
    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
    */
 
-  static __INLINE q63_t mult32x64(
+  CMSIS_INLINE __STATIC_INLINE q63_t mult32x64(
   q63_t x,
   q31_t y)
   {
@@ -515,64 +572,60 @@
             (((q63_t) (x >> 32) * y)));
   }
 
-
-//#if defined (ARM_MATH_CM0_FAMILY) && defined ( __CC_ARM   )
-//#define __CLZ __clz
-//#endif
-
-//note: function can be removed when all toolchain support __CLZ for Cortex-M0
+/*
+  #if defined (ARM_MATH_CM0_FAMILY) && defined ( __CC_ARM   )
+  #define __CLZ __clz
+  #endif
+ */
+/* note: function can be removed when all toolchain support __CLZ for Cortex-M0 */
 #if defined (ARM_MATH_CM0_FAMILY) && ((defined (__ICCARM__))  )
-
-  static __INLINE uint32_t __CLZ(
+  CMSIS_INLINE __STATIC_INLINE uint32_t __CLZ(
   q31_t data);
 
-
-  static __INLINE uint32_t __CLZ(
+  CMSIS_INLINE __STATIC_INLINE uint32_t __CLZ(
   q31_t data)
   {
     uint32_t count = 0;
     uint32_t mask = 0x80000000;
 
-    while((data & mask) == 0)
+    while ((data & mask) == 0)
     {
       count += 1u;
       mask = mask >> 1u;
     }
 
     return (count);
-
   }
-
 #endif
 
   /**
    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
    */
 
-  static __INLINE uint32_t arm_recip_q31(
+  CMSIS_INLINE __STATIC_INLINE uint32_t arm_recip_q31(
   q31_t in,
   q31_t * dst,
   q31_t * pRecipTable)
   {
-
-    uint32_t out, tempVal;
+    q31_t out;
+    uint32_t tempVal;
     uint32_t index, i;
     uint32_t signBits;
 
-    if(in > 0)
+    if (in > 0)
     {
-      signBits = __CLZ(in) - 1;
+      signBits = ((uint32_t) (__CLZ( in) - 1));
     }
     else
     {
-      signBits = __CLZ(-in) - 1;
+      signBits = ((uint32_t) (__CLZ(-in) - 1));
     }
 
     /* Convert input sample to 1.31 format */
-    in = in << signBits;
+    in = (in << signBits);
 
     /* calculation of index for initial approximated Val */
-    index = (uint32_t) (in >> 24u);
+    index = (uint32_t)(in >> 24);
     index = (index & INDEX_MASK);
 
     /* 1.31 with exp 1 */
@@ -582,11 +635,11 @@
     /* running approximation for two iterations */
     for (i = 0u; i < 2u; i++)
     {
-      tempVal = (q31_t) (((q63_t) in * out) >> 31u);
-      tempVal = 0x7FFFFFFF - tempVal;
+      tempVal = (uint32_t) (((q63_t) in * out) >> 31);
+      tempVal = 0x7FFFFFFFu - tempVal;
       /*      1.31 with exp 1 */
-      //out = (q31_t) (((q63_t) out * tempVal) >> 30u);
-      out = (q31_t) clip_q63_to_q31(((q63_t) out * tempVal) >> 30u);
+      /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */
+      out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30);
     }
 
     /* write output */
@@ -594,36 +647,36 @@
 
     /* return num of signbits of out = 1/in value */
     return (signBits + 1u);
-
   }
 
+
   /**
    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
    */
-  static __INLINE uint32_t arm_recip_q15(
+  CMSIS_INLINE __STATIC_INLINE uint32_t arm_recip_q15(
   q15_t in,
   q15_t * dst,
   q15_t * pRecipTable)
   {
-
-    uint32_t out = 0, tempVal = 0;
+    q15_t out = 0;
+    uint32_t tempVal = 0;
     uint32_t index = 0, i = 0;
     uint32_t signBits = 0;
 
-    if(in > 0)
+    if (in > 0)
     {
-      signBits = __CLZ(in) - 17;
+      signBits = ((uint32_t)(__CLZ( in) - 17));
     }
     else
     {
-      signBits = __CLZ(-in) - 17;
+      signBits = ((uint32_t)(__CLZ(-in) - 17));
     }
 
     /* Convert input sample to 1.15 format */
-    in = in << signBits;
+    in = (in << signBits);
 
     /* calculation of index for initial approximated Val */
-    index = in >> 8;
+    index = (uint32_t)(in >>  8);
     index = (index & INDEX_MASK);
 
     /*      1.15 with exp 1  */
@@ -631,12 +684,13 @@
 
     /* calculation of reciprocal value */
     /* running approximation for two iterations */
-    for (i = 0; i < 2; i++)
+    for (i = 0u; i < 2u; i++)
     {
-      tempVal = (q15_t) (((q31_t) in * out) >> 15);
-      tempVal = 0x7FFF - tempVal;
+      tempVal = (uint32_t) (((q31_t) in * out) >> 15);
+      tempVal = 0x7FFFu - tempVal;
       /*      1.15 with exp 1 */
       out = (q15_t) (((q31_t) out * tempVal) >> 14);
+      /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */
     }
 
     /* write output */
@@ -644,7 +698,6 @@
 
     /* return num of signbits of out = 1/in value */
     return (signBits + 1);
-
   }
 
 
@@ -652,8 +705,7 @@
    * @brief C custom defined intrinisic function for only M0 processors
    */
 #if defined(ARM_MATH_CM0_FAMILY)
-
-  static __INLINE q31_t __SSAT(
+  CMSIS_INLINE __STATIC_INLINE q31_t __SSAT(
   q31_t x,
   uint32_t y)
   {
@@ -666,11 +718,11 @@
       posMax = posMax * 2;
     }
 
-    if(x > 0)
+    if (x > 0)
     {
       posMax = (posMax - 1);
 
-      if(x > posMax)
+      if (x > posMax)
       {
         x = posMax;
       }
@@ -679,400 +731,377 @@
     {
       negMin = -posMax;
 
-      if(x < negMin)
+      if (x < negMin)
       {
         x = negMin;
       }
     }
     return (x);
-
-
   }
-
 #endif /* end of ARM_MATH_CM0_FAMILY */
 
 
-
   /*
    * @brief C custom defined intrinsic function for M3 and M0 processors
    */
-#if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
+/* #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
+#if !defined (ARM_MATH_DSP)
 
   /*
    * @brief C custom defined QADD8 for M3 and M0 processors
    */
-  static __INLINE q31_t __QADD8(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __QADD8(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t sum;
-    q7_t r, s, t, u;
-
-    r = (q7_t) x;
-    s = (q7_t) y;
-
-    r = __SSAT((q31_t) (r + s), 8);
-    s = __SSAT(((q31_t) (((x << 16) >> 24) + ((y << 16) >> 24))), 8);
-    t = __SSAT(((q31_t) (((x << 8) >> 24) + ((y << 8) >> 24))), 8);
-    u = __SSAT(((q31_t) ((x >> 24) + (y >> 24))), 8);
-
-    sum =
-      (((q31_t) u << 24) & 0xFF000000) | (((q31_t) t << 16) & 0x00FF0000) |
-      (((q31_t) s << 8) & 0x0000FF00) | (r & 0x000000FF);
-
-    return sum;
-
+    q31_t r, s, t, u;
+
+    r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
+    s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
+    t = __SSAT(((((q31_t)x <<  8) >> 24) + (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
+    u = __SSAT(((((q31_t)x      ) >> 24) + (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
   }
 
+
   /*
    * @brief C custom defined QSUB8 for M3 and M0 processors
    */
-  static __INLINE q31_t __QSUB8(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __QSUB8(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t sum;
     q31_t r, s, t, u;
 
-    r = (q7_t) x;
-    s = (q7_t) y;
-
-    r = __SSAT((r - s), 8);
-    s = __SSAT(((q31_t) (((x << 16) >> 24) - ((y << 16) >> 24))), 8) << 8;
-    t = __SSAT(((q31_t) (((x << 8) >> 24) - ((y << 8) >> 24))), 8) << 16;
-    u = __SSAT(((q31_t) ((x >> 24) - (y >> 24))), 8) << 24;
-
-    sum =
-      (u & 0xFF000000) | (t & 0x00FF0000) | (s & 0x0000FF00) | (r &
-                                                                0x000000FF);
-
-    return sum;
+    r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF;
+    s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF;
+    t = __SSAT(((((q31_t)x <<  8) >> 24) - (((q31_t)y <<  8) >> 24)), 8) & (int32_t)0x000000FF;
+    u = __SSAT(((((q31_t)x      ) >> 24) - (((q31_t)y      ) >> 24)), 8) & (int32_t)0x000000FF;
+
+    return ((uint32_t)((u << 24) | (t << 16) | (s <<  8) | (r      )));
   }
 
+
   /*
    * @brief C custom defined QADD16 for M3 and M0 processors
    */
-
-  /*
-   * @brief C custom defined QADD16 for M3 and M0 processors
-   */
-  static __INLINE q31_t __QADD16(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __QADD16(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t sum;
-    q31_t r, s;
-
-    r = (q15_t) x;
-    s = (q15_t) y;
-
-    r = __SSAT(r + s, 16);
-    s = __SSAT(((q31_t) ((x >> 16) + (y >> 16))), 16) << 16;
-
-    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
-
-    return sum;
-
+/*  q31_t r,     s;  without initialisation 'arm_offset_q15 test' fails  but 'intrinsic' tests pass! for armCC */
+    q31_t r = 0, s = 0;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
   }
 
+
   /*
    * @brief C custom defined SHADD16 for M3 and M0 processors
    */
-  static __INLINE q31_t __SHADD16(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SHADD16(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t sum;
     q31_t r, s;
 
-    r = (q15_t) x;
-    s = (q15_t) y;
-
-    r = ((r >> 1) + (s >> 1));
-    s = ((q31_t) ((x >> 17) + (y >> 17))) << 16;
-
-    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
-
-    return sum;
-
+    r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
   }
 
+
   /*
    * @brief C custom defined QSUB16 for M3 and M0 processors
    */
-  static __INLINE q31_t __QSUB16(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __QSUB16(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t sum;
     q31_t r, s;
 
-    r = (q15_t) x;
-    s = (q15_t) y;
-
-    r = __SSAT(r - s, 16);
-    s = __SSAT(((q31_t) ((x >> 16) - (y >> 16))), 16) << 16;
-
-    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
-
-    return sum;
+    r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
   }
 
+
   /*
    * @brief C custom defined SHSUB16 for M3 and M0 processors
    */
-  static __INLINE q31_t __SHSUB16(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SHSUB16(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t diff;
     q31_t r, s;
 
-    r = (q15_t) x;
-    s = (q15_t) y;
-
-    r = ((r >> 1) - (s >> 1));
-    s = (((x >> 17) - (y >> 17)) << 16);
-
-    diff = (s & 0xFFFF0000) | (r & 0x0000FFFF);
-
-    return diff;
+    r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
   }
 
+
   /*
    * @brief C custom defined QASX for M3 and M0 processors
    */
-  static __INLINE q31_t __QASX(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __QASX(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t sum = 0;
-
-    sum =
-      ((sum +
-        clip_q31_to_q15((q31_t) ((q15_t) (x >> 16) + (q15_t) y))) << 16) +
-      clip_q31_to_q15((q31_t) ((q15_t) x - (q15_t) (y >> 16)));
-
-    return sum;
+    q31_t r, s;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
   }
 
+
   /*
    * @brief C custom defined SHASX for M3 and M0 processors
    */
-  static __INLINE q31_t __SHASX(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SHASX(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t sum;
     q31_t r, s;
 
-    r = (q15_t) x;
-    s = (q15_t) y;
-
-    r = ((r >> 1) - (y >> 17));
-    s = (((x >> 17) + (s >> 1)) << 16);
-
-    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
-
-    return sum;
+    r = (((((q31_t)x << 16) >> 16) - (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
   }
 
 
   /*
    * @brief C custom defined QSAX for M3 and M0 processors
    */
-  static __INLINE q31_t __QSAX(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __QSAX(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t sum = 0;
-
-    sum =
-      ((sum +
-        clip_q31_to_q15((q31_t) ((q15_t) (x >> 16) - (q15_t) y))) << 16) +
-      clip_q31_to_q15((q31_t) ((q15_t) x + (q15_t) (y >> 16)));
-
-    return sum;
+    q31_t r, s;
+
+    r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)), 16) & (int32_t)0x0000FFFF;
+    s = __SSAT(((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
   }
 
+
   /*
    * @brief C custom defined SHSAX for M3 and M0 processors
    */
-  static __INLINE q31_t __SHSAX(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SHSAX(
+  uint32_t x,
+  uint32_t y)
   {
-
-    q31_t sum;
     q31_t r, s;
 
-    r = (q15_t) x;
-    s = (q15_t) y;
-
-    r = ((r >> 1) + (y >> 17));
-    s = (((x >> 17) - (s >> 1)) << 16);
-
-    sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
-
-    return sum;
+    r = (((((q31_t)x << 16) >> 16) + (((q31_t)y      ) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+    s = (((((q31_t)x      ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF;
+
+    return ((uint32_t)((s << 16) | (r      )));
   }
 
+
   /*
    * @brief C custom defined SMUSDX for M3 and M0 processors
    */
-  static __INLINE q31_t __SMUSDX(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SMUSDX(
+  uint32_t x,
+  uint32_t y)
   {
-
-    return ((q31_t) (((q15_t) x * (q15_t) (y >> 16)) -
-                     ((q15_t) (x >> 16) * (q15_t) y)));
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
   }
 
   /*
    * @brief C custom defined SMUADX for M3 and M0 processors
    */
-  static __INLINE q31_t __SMUADX(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SMUADX(
+  uint32_t x,
+  uint32_t y)
   {
-
-    return ((q31_t) (((q15_t) x * (q15_t) (y >> 16)) +
-                     ((q15_t) (x >> 16) * (q15_t) y)));
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16))   ));
   }
 
+
   /*
    * @brief C custom defined QADD for M3 and M0 processors
    */
-  static __INLINE q31_t __QADD(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE int32_t __QADD(
+  int32_t x,
+  int32_t y)
   {
-    return clip_q63_to_q31((q63_t) x + y);
+    return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y)));
   }
 
+
   /*
    * @brief C custom defined QSUB for M3 and M0 processors
    */
-  static __INLINE q31_t __QSUB(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE int32_t __QSUB(
+  int32_t x,
+  int32_t y)
   {
-    return clip_q63_to_q31((q63_t) x - y);
+    return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y)));
   }
 
+
   /*
    * @brief C custom defined SMLAD for M3 and M0 processors
    */
-  static __INLINE q31_t __SMLAD(
-  q31_t x,
-  q31_t y,
-  q31_t sum)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SMLAD(
+  uint32_t x,
+  uint32_t y,
+  uint32_t sum)
   {
-
-    return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) +
-            ((q15_t) x * (q15_t) y));
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ( ((q31_t)sum    )                                  )   ));
   }
 
+
   /*
    * @brief C custom defined SMLADX for M3 and M0 processors
    */
-  static __INLINE q31_t __SMLADX(
-  q31_t x,
-  q31_t y,
-  q31_t sum)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SMLADX(
+  uint32_t x,
+  uint32_t y,
+  uint32_t sum)
   {
-
-    return (sum + ((q15_t) (x >> 16) * (q15_t) (y)) +
-            ((q15_t) x * (q15_t) (y >> 16)));
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ( ((q31_t)sum    )                                  )   ));
   }
 
+
   /*
    * @brief C custom defined SMLSDX for M3 and M0 processors
    */
-  static __INLINE q31_t __SMLSDX(
-  q31_t x,
-  q31_t y,
-  q31_t sum)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SMLSDX(
+  uint32_t x,
+  uint32_t y,
+  uint32_t sum)
   {
-
-    return (sum - ((q15_t) (x >> 16) * (q15_t) (y)) +
-            ((q15_t) x * (q15_t) (y >> 16)));
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) -
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ( ((q31_t)sum    )                                  )   ));
   }
 
+
   /*
    * @brief C custom defined SMLALD for M3 and M0 processors
    */
-  static __INLINE q63_t __SMLALD(
-  q31_t x,
-  q31_t y,
-  q63_t sum)
+  CMSIS_INLINE __STATIC_INLINE uint64_t __SMLALD(
+  uint32_t x,
+  uint32_t y,
+  uint64_t sum)
   {
-
-    return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) +
-            ((q15_t) x * (q15_t) y));
+/*  return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */
+    return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ( ((q63_t)sum    )                                  )   ));
   }
 
+
   /*
    * @brief C custom defined SMLALDX for M3 and M0 processors
    */
-  static __INLINE q63_t __SMLALDX(
-  q31_t x,
-  q31_t y,
-  q63_t sum)
+  CMSIS_INLINE __STATIC_INLINE uint64_t __SMLALDX(
+  uint32_t x,
+  uint32_t y,
+  uint64_t sum)
   {
-
-    return (sum + ((q15_t) (x >> 16) * (q15_t) y)) +
-      ((q15_t) x * (q15_t) (y >> 16));
+/*  return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */
+    return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y      ) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ( ((q63_t)sum    )                                  )   ));
   }
 
+
   /*
    * @brief C custom defined SMUAD for M3 and M0 processors
    */
-  static __INLINE q31_t __SMUAD(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SMUAD(
+  uint32_t x,
+  uint32_t y)
   {
-
-    return (((x >> 16) * (y >> 16)) +
-            (((x << 16) >> 16) * ((y << 16) >> 16)));
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) +
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
   }
 
+
   /*
    * @brief C custom defined SMUSD for M3 and M0 processors
    */
-  static __INLINE q31_t __SMUSD(
-  q31_t x,
-  q31_t y)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SMUSD(
+  uint32_t x,
+  uint32_t y)
   {
-
-    return (-((x >> 16) * (y >> 16)) +
-            (((x << 16) >> 16) * ((y << 16) >> 16)));
+    return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) -
+                       ((((q31_t)x      ) >> 16) * (((q31_t)y      ) >> 16))   ));
   }
 
 
   /*
    * @brief C custom defined SXTB16 for M3 and M0 processors
    */
-  static __INLINE q31_t __SXTB16(
-  q31_t x)
+  CMSIS_INLINE __STATIC_INLINE uint32_t __SXTB16(
+  uint32_t x)
+  {
+    return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) |
+                       ((((q31_t)x <<  8) >>  8) & (q31_t)0xFFFF0000)  ));
+  }
+
+  /*
+   * @brief C custom defined SMMLA for M3 and M0 processors
+   */
+  CMSIS_INLINE __STATIC_INLINE int32_t __SMMLA(
+  int32_t x,
+  int32_t y,
+  int32_t sum)
   {
-
-    return ((((x << 24) >> 24) & 0x0000FFFF) |
-            (((x << 8) >> 8) & 0xFFFF0000));
+    return (sum + (int32_t) (((int64_t) x * y) >> 32));
+  }
+
+#if 0
+  /*
+   * @brief C custom defined PKHBT for unavailable DSP extension
+   */
+  CMSIS_INLINE __STATIC_INLINE uint32_t __PKHBT(
+  uint32_t x,
+  uint32_t y,
+  uint32_t leftshift)
+  {
+    return ( ((x             ) & 0x0000FFFFUL) |
+             ((y << leftshift) & 0xFFFF0000UL)  );
   }
 
-
-#endif /* defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
+  /*
+   * @brief C custom defined PKHTB for unavailable DSP extension
+   */
+  CMSIS_INLINE __STATIC_INLINE uint32_t __PKHTB(
+  uint32_t x,
+  uint32_t y,
+  uint32_t rightshift)
+  {
+    return ( ((x              ) & 0xFFFF0000UL) |
+             ((y >> rightshift) & 0x0000FFFFUL)  );
+  }
+#endif
+
+/* #endif // defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
+#endif /* !defined (ARM_MATH_DSP) */
 
 
   /**
@@ -1118,11 +1147,10 @@
 
   /**
    * @brief Processing function for the Q7 FIR filter.
-   * @param[in] *S points to an instance of the Q7 FIR filter structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
+   * @param[in]  S          points to an instance of the Q7 FIR filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
    */
   void arm_fir_q7(
   const arm_fir_instance_q7 * S,
@@ -1133,12 +1161,11 @@
 
   /**
    * @brief  Initialization function for the Q7 FIR filter.
-   * @param[in,out] *S points to an instance of the Q7 FIR structure.
-   * @param[in] numTaps  Number of filter coefficients in the filter.
-   * @param[in] *pCoeffs points to the filter coefficients.
-   * @param[in] *pState points to the state buffer.
-   * @param[in] blockSize number of samples that are processed.
-   * @return none
+   * @param[in,out] S          points to an instance of the Q7 FIR structure.
+   * @param[in]     numTaps    Number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of samples that are processed.
    */
   void arm_fir_init_q7(
   arm_fir_instance_q7 * S,
@@ -1150,11 +1177,10 @@
 
   /**
    * @brief Processing function for the Q15 FIR filter.
-   * @param[in] *S points to an instance of the Q15 FIR structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
+   * @param[in]  S          points to an instance of the Q15 FIR structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
    */
   void arm_fir_q15(
   const arm_fir_instance_q15 * S,
@@ -1162,13 +1188,13 @@
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
-   * @param[in] *S points to an instance of the Q15 FIR filter structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
+   * @param[in]  S          points to an instance of the Q15 FIR filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
    */
   void arm_fir_fast_q15(
   const arm_fir_instance_q15 * S,
@@ -1176,17 +1202,17 @@
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the Q15 FIR filter.
-   * @param[in,out] *S points to an instance of the Q15 FIR filter structure.
-   * @param[in] numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
-   * @param[in] *pCoeffs points to the filter coefficients.
-   * @param[in] *pState points to the state buffer.
-   * @param[in] blockSize number of samples that are processed at a time.
+   * @param[in,out] S          points to an instance of the Q15 FIR filter structure.
+   * @param[in]     numTaps    Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of samples that are processed at a time.
    * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
    * <code>numTaps</code> is not a supported value.
    */
-
   arm_status arm_fir_init_q15(
   arm_fir_instance_q15 * S,
   uint16_t numTaps,
@@ -1194,13 +1220,13 @@
   q15_t * pState,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the Q31 FIR filter.
-   * @param[in] *S points to an instance of the Q31 FIR filter structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
+   * @param[in]  S          points to an instance of the Q31 FIR filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
    */
   void arm_fir_q31(
   const arm_fir_instance_q31 * S,
@@ -1208,13 +1234,13 @@
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
-   * @param[in] *S points to an instance of the Q31 FIR structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
+   * @param[in]  S          points to an instance of the Q31 FIR structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
    */
   void arm_fir_fast_q31(
   const arm_fir_instance_q31 * S,
@@ -1222,14 +1248,14 @@
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the Q31 FIR filter.
-   * @param[in,out] *S points to an instance of the Q31 FIR structure.
-   * @param[in] 	numTaps  Number of filter coefficients in the filter.
-   * @param[in] 	*pCoeffs points to the filter coefficients.
-   * @param[in] 	*pState points to the state buffer.
-   * @param[in] 	blockSize number of samples that are processed at a time.
-   * @return 		none.
+   * @param[in,out] S          points to an instance of the Q31 FIR structure.
+   * @param[in]     numTaps    Number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of samples that are processed at a time.
    */
   void arm_fir_init_q31(
   arm_fir_instance_q31 * S,
@@ -1238,13 +1264,13 @@
   q31_t * pState,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the floating-point FIR filter.
-   * @param[in] *S points to an instance of the floating-point FIR structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
+   * @param[in]  S          points to an instance of the floating-point FIR structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
    */
   void arm_fir_f32(
   const arm_fir_instance_f32 * S,
@@ -1252,14 +1278,14 @@
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the floating-point FIR filter.
-   * @param[in,out] *S points to an instance of the floating-point FIR filter structure.
-   * @param[in] 	numTaps  Number of filter coefficients in the filter.
-   * @param[in] 	*pCoeffs points to the filter coefficients.
-   * @param[in] 	*pState points to the state buffer.
-   * @param[in] 	blockSize number of samples that are processed at a time.
-   * @return    	none.
+   * @param[in,out] S          points to an instance of the floating-point FIR filter structure.
+   * @param[in]     numTaps    Number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of samples that are processed at a time.
    */
   void arm_fir_init_f32(
   arm_fir_instance_f32 * S,
@@ -1274,14 +1300,12 @@
    */
   typedef struct
   {
-    int8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    q15_t *pState;            /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
-    q15_t *pCoeffs;           /**< Points to the array of coefficients.  The array is of length 5*numStages. */
-    int8_t postShift;         /**< Additional shift, in bits, applied to each output sample. */
-
+    int8_t numStages;        /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+    q15_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    q15_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
+    int8_t postShift;        /**< Additional shift, in bits, applied to each output sample. */
   } arm_biquad_casd_df1_inst_q15;
 
-
   /**
    * @brief Instance structure for the Q31 Biquad cascade filter.
    */
@@ -1291,7 +1315,6 @@
     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
-
   } arm_biquad_casd_df1_inst_q31;
 
   /**
@@ -1299,40 +1322,34 @@
    */
   typedef struct
   {
-    uint32_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
-    float32_t *pState;          /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
-    float32_t *pCoeffs;         /**< Points to the array of coefficients.  The array is of length 5*numStages. */
-
-
+    uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
+    float32_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
+    float32_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
   } arm_biquad_casd_df1_inst_f32;
 
 
-
   /**
    * @brief Processing function for the Q15 Biquad cascade filter.
-   * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
-   * @param[in]  *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in]  blockSize number of samples to process.
-   * @return     none.
-   */
-
+   * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_biquad_cascade_df1_q15(
   const arm_biquad_casd_df1_inst_q15 * S,
   q15_t * pSrc,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the Q15 Biquad cascade filter.
-   * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.
-   * @param[in]     numStages    number of 2nd order stages in the filter.
-   * @param[in]     *pCoeffs     points to the filter coefficients.
-   * @param[in]     *pState      points to the state buffer.
-   * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
-   * @return        none
-   */
-
+   * @param[in,out] S          points to an instance of the Q15 Biquad cascade structure.
+   * @param[in]     numStages  number of 2nd order stages in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
+   */
   void arm_biquad_cascade_df1_init_q15(
   arm_biquad_casd_df1_inst_q15 * S,
   uint8_t numStages,
@@ -1343,13 +1360,11 @@
 
   /**
    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
-   * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
-   * @param[in]  *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in]  blockSize number of samples to process.
-   * @return     none.
-   */
-
+   * @param[in]  S          points to an instance of the Q15 Biquad cascade structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_biquad_cascade_df1_fast_q15(
   const arm_biquad_casd_df1_inst_q15 * S,
   q15_t * pSrc,
@@ -1359,44 +1374,40 @@
 
   /**
    * @brief Processing function for the Q31 Biquad cascade filter
-   * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
-   * @param[in]  *pSrc      points to the block of input data.
-   * @param[out] *pDst      points to the block of output data.
+   * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
-   * @return     none.
-   */
-
+   */
   void arm_biquad_cascade_df1_q31(
   const arm_biquad_casd_df1_inst_q31 * S,
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
-   * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
-   * @param[in]  *pSrc      points to the block of input data.
-   * @param[out] *pDst      points to the block of output data.
+   * @param[in]  S          points to an instance of the Q31 Biquad cascade structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
-   * @return     none.
-   */
-
+   */
   void arm_biquad_cascade_df1_fast_q31(
   const arm_biquad_casd_df1_inst_q31 * S,
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the Q31 Biquad cascade filter.
-   * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.
-   * @param[in]     numStages      number of 2nd order stages in the filter.
-   * @param[in]     *pCoeffs     points to the filter coefficients.
-   * @param[in]     *pState      points to the state buffer.
-   * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
-   * @return        none
-   */
-
+   * @param[in,out] S          points to an instance of the Q31 Biquad cascade structure.
+   * @param[in]     numStages  number of 2nd order stages in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     postShift  Shift to be applied to the output. Varies according to the coefficients format
+   */
   void arm_biquad_cascade_df1_init_q31(
   arm_biquad_casd_df1_inst_q31 * S,
   uint8_t numStages,
@@ -1404,30 +1415,28 @@
   q31_t * pState,
   int8_t postShift);
 
+
   /**
    * @brief Processing function for the floating-point Biquad cascade filter.
-   * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
-   * @param[in]  *pSrc      points to the block of input data.
-   * @param[out] *pDst      points to the block of output data.
+   * @param[in]  S          points to an instance of the floating-point Biquad cascade structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
    * @param[in]  blockSize  number of samples to process.
-   * @return     none.
-   */
-
+   */
   void arm_biquad_cascade_df1_f32(
   const arm_biquad_casd_df1_inst_f32 * S,
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the floating-point Biquad cascade filter.
-   * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.
-   * @param[in]     numStages    number of 2nd order stages in the filter.
-   * @param[in]     *pCoeffs     points to the filter coefficients.
-   * @param[in]     *pState      points to the state buffer.
-   * @return        none
-   */
-
+   * @param[in,out] S          points to an instance of the floating-point Biquad cascade structure.
+   * @param[in]     numStages  number of 2nd order stages in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   */
   void arm_biquad_cascade_df1_init_f32(
   arm_biquad_casd_df1_inst_f32 * S,
   uint8_t numStages,
@@ -1438,7 +1447,6 @@
   /**
    * @brief Instance structure for the floating-point matrix structure.
    */
-
   typedef struct
   {
     uint16_t numRows;     /**< number of rows of the matrix.     */
@@ -1450,7 +1458,6 @@
   /**
    * @brief Instance structure for the floating-point matrix structure.
    */
-
   typedef struct
   {
     uint16_t numRows;     /**< number of rows of the matrix.     */
@@ -1461,109 +1468,103 @@
   /**
    * @brief Instance structure for the Q15 matrix structure.
    */
-
   typedef struct
   {
     uint16_t numRows;     /**< number of rows of the matrix.     */
     uint16_t numCols;     /**< number of columns of the matrix.  */
     q15_t *pData;         /**< points to the data of the matrix. */
-
   } arm_matrix_instance_q15;
 
   /**
    * @brief Instance structure for the Q31 matrix structure.
    */
-
   typedef struct
   {
     uint16_t numRows;     /**< number of rows of the matrix.     */
     uint16_t numCols;     /**< number of columns of the matrix.  */
     q31_t *pData;         /**< points to the data of the matrix. */
-
   } arm_matrix_instance_q31;
 
 
-
   /**
    * @brief Floating-point matrix addition.
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_add_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
   arm_matrix_instance_f32 * pDst);
 
+
   /**
    * @brief Q15 matrix addition.
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]   pSrcA  points to the first input matrix structure
+   * @param[in]   pSrcB  points to the second input matrix structure
+   * @param[out]  pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_add_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
   arm_matrix_instance_q15 * pDst);
 
+
   /**
    * @brief Q31 matrix addition.
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_add_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
   arm_matrix_instance_q31 * pDst);
 
+
   /**
    * @brief Floating-point, complex, matrix multiplication.
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_cmplx_mult_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
   arm_matrix_instance_f32 * pDst);
 
+
   /**
    * @brief Q15, complex,  matrix multiplication.
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_cmplx_mult_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
   arm_matrix_instance_q15 * pDst,
   q15_t * pScratch);
 
+
   /**
    * @brief Q31, complex, matrix multiplication.
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_cmplx_mult_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
@@ -1572,12 +1573,11 @@
 
   /**
    * @brief Floating-point matrix transpose.
-   * @param[in]  *pSrc points to the input matrix
-   * @param[out] *pDst points to the output matrix
-   * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_trans_f32(
   const arm_matrix_instance_f32 * pSrc,
   arm_matrix_instance_f32 * pDst);
@@ -1585,24 +1585,23 @@
 
   /**
    * @brief Q15 matrix transpose.
-   * @param[in]  *pSrc points to the input matrix
-   * @param[out] *pDst points to the output matrix
-   * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_trans_q15(
   const arm_matrix_instance_q15 * pSrc,
   arm_matrix_instance_q15 * pDst);
 
+
   /**
    * @brief Q31 matrix transpose.
-   * @param[in]  *pSrc points to the input matrix
-   * @param[out] *pDst points to the output matrix
-   * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
+   * @param[in]  pSrc  points to the input matrix
+   * @param[out] pDst  points to the output matrix
+   * @return    The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_trans_q31(
   const arm_matrix_instance_q31 * pSrc,
   arm_matrix_instance_q31 * pDst);
@@ -1610,73 +1609,72 @@
 
   /**
    * @brief Floating-point matrix multiplication
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_mult_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
   arm_matrix_instance_f32 * pDst);
 
+
   /**
    * @brief Q15 matrix multiplication
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
-   * @param[in]		 *pState points to the array for storing intermediate results
+   * @param[in]  pSrcA   points to the first input matrix structure
+   * @param[in]  pSrcB   points to the second input matrix structure
+   * @param[out] pDst    points to output matrix structure
+   * @param[in]  pState  points to the array for storing intermediate results
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_mult_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
   arm_matrix_instance_q15 * pDst,
   q15_t * pState);
 
+
   /**
    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
-   * @param[in]       *pSrcA  points to the first input matrix structure
-   * @param[in]       *pSrcB  points to the second input matrix structure
-   * @param[out]      *pDst   points to output matrix structure
-   * @param[in]		  *pState points to the array for storing intermediate results
+   * @param[in]  pSrcA   points to the first input matrix structure
+   * @param[in]  pSrcB   points to the second input matrix structure
+   * @param[out] pDst    points to output matrix structure
+   * @param[in]  pState  points to the array for storing intermediate results
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_mult_fast_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
   arm_matrix_instance_q15 * pDst,
   q15_t * pState);
 
+
   /**
    * @brief Q31 matrix multiplication
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_mult_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
   arm_matrix_instance_q31 * pDst);
 
+
   /**
    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_mult_fast_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
@@ -1685,86 +1683,85 @@
 
   /**
    * @brief Floating-point matrix subtraction
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_sub_f32(
   const arm_matrix_instance_f32 * pSrcA,
   const arm_matrix_instance_f32 * pSrcB,
   arm_matrix_instance_f32 * pDst);
 
+
   /**
    * @brief Q15 matrix subtraction
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_sub_q15(
   const arm_matrix_instance_q15 * pSrcA,
   const arm_matrix_instance_q15 * pSrcB,
   arm_matrix_instance_q15 * pDst);
 
+
   /**
    * @brief Q31 matrix subtraction
-   * @param[in]       *pSrcA points to the first input matrix structure
-   * @param[in]       *pSrcB points to the second input matrix structure
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrcA  points to the first input matrix structure
+   * @param[in]  pSrcB  points to the second input matrix structure
+   * @param[out] pDst   points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_sub_q31(
   const arm_matrix_instance_q31 * pSrcA,
   const arm_matrix_instance_q31 * pSrcB,
   arm_matrix_instance_q31 * pDst);
 
+
   /**
    * @brief Floating-point matrix scaling.
-   * @param[in]  *pSrc points to the input matrix
-   * @param[in]  scale scale factor
-   * @param[out] *pDst points to the output matrix
+   * @param[in]  pSrc   points to the input matrix
+   * @param[in]  scale  scale factor
+   * @param[out] pDst   points to the output matrix
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_scale_f32(
   const arm_matrix_instance_f32 * pSrc,
   float32_t scale,
   arm_matrix_instance_f32 * pDst);
 
+
   /**
    * @brief Q15 matrix scaling.
-   * @param[in]       *pSrc points to input matrix
-   * @param[in]       scaleFract fractional portion of the scale factor
-   * @param[in]       shift number of bits to shift the result by
-   * @param[out]      *pDst points to output matrix
+   * @param[in]  pSrc        points to input matrix
+   * @param[in]  scaleFract  fractional portion of the scale factor
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to output matrix
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_scale_q15(
   const arm_matrix_instance_q15 * pSrc,
   q15_t scaleFract,
   int32_t shift,
   arm_matrix_instance_q15 * pDst);
 
+
   /**
    * @brief Q31 matrix scaling.
-   * @param[in]       *pSrc points to input matrix
-   * @param[in]       scaleFract fractional portion of the scale factor
-   * @param[in]       shift number of bits to shift the result by
-   * @param[out]      *pDst points to output matrix structure
+   * @param[in]  pSrc        points to input matrix
+   * @param[in]  scaleFract  fractional portion of the scale factor
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to output matrix structure
    * @return     The function returns either
    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
    */
-
   arm_status arm_mat_scale_q31(
   const arm_matrix_instance_q31 * pSrc,
   q31_t scaleFract,
@@ -1774,43 +1771,39 @@
 
   /**
    * @brief  Q31 matrix initialization.
-   * @param[in,out] *S             points to an instance of the floating-point matrix structure.
-   * @param[in]     nRows          number of rows in the matrix.
-   * @param[in]     nColumns       number of columns in the matrix.
-   * @param[in]     *pData	       points to the matrix data array.
-   * @return        none
-   */
-
+   * @param[in,out] S         points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows     number of rows in the matrix.
+   * @param[in]     nColumns  number of columns in the matrix.
+   * @param[in]     pData     points to the matrix data array.
+   */
   void arm_mat_init_q31(
   arm_matrix_instance_q31 * S,
   uint16_t nRows,
   uint16_t nColumns,
   q31_t * pData);
 
+
   /**
    * @brief  Q15 matrix initialization.
-   * @param[in,out] *S             points to an instance of the floating-point matrix structure.
-   * @param[in]     nRows          number of rows in the matrix.
-   * @param[in]     nColumns       number of columns in the matrix.
-   * @param[in]     *pData	       points to the matrix data array.
-   * @return        none
-   */
-
+   * @param[in,out] S         points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows     number of rows in the matrix.
+   * @param[in]     nColumns  number of columns in the matrix.
+   * @param[in]     pData     points to the matrix data array.
+   */
   void arm_mat_init_q15(
   arm_matrix_instance_q15 * S,
   uint16_t nRows,
   uint16_t nColumns,
   q15_t * pData);
 
+
   /**
    * @brief  Floating-point matrix initialization.
-   * @param[in,out] *S             points to an instance of the floating-point matrix structure.
-   * @param[in]     nRows          number of rows in the matrix.
-   * @param[in]     nColumns       number of columns in the matrix.
-   * @param[in]     *pData	       points to the matrix data array.
-   * @return        none
-   */
-
+   * @param[in,out] S         points to an instance of the floating-point matrix structure.
+   * @param[in]     nRows     number of rows in the matrix.
+   * @param[in]     nColumns  number of columns in the matrix.
+   * @param[in]     pData     points to the matrix data array.
+   */
   void arm_mat_init_f32(
   arm_matrix_instance_f32 * S,
   uint16_t nRows,
@@ -1824,14 +1817,14 @@
    */
   typedef struct
   {
-    q15_t A0;    /**< The derived gain, A0 = Kp + Ki + Kd . */
-#ifdef ARM_MATH_CM0_FAMILY
+    q15_t A0;           /**< The derived gain, A0 = Kp + Ki + Kd . */
+#if !defined (ARM_MATH_DSP)
     q15_t A1;
     q15_t A2;
 #else
     q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
 #endif
-    q15_t state[3];       /**< The state array of length 3. */
+    q15_t state[3];     /**< The state array of length 3. */
     q15_t Kp;           /**< The proportional gain. */
     q15_t Ki;           /**< The integral gain. */
     q15_t Kd;           /**< The derivative gain. */
@@ -1849,7 +1842,6 @@
     q31_t Kp;            /**< The proportional gain. */
     q31_t Ki;            /**< The integral gain. */
     q31_t Kd;            /**< The derivative gain. */
-
   } arm_pid_instance_q31;
 
   /**
@@ -1861,27 +1853,26 @@
     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
     float32_t A2;          /**< The derived gain, A2 = Kd . */
     float32_t state[3];    /**< The state array of length 3. */
-    float32_t Kp;               /**< The proportional gain. */
-    float32_t Ki;               /**< The integral gain. */
-    float32_t Kd;               /**< The derivative gain. */
+    float32_t Kp;          /**< The proportional gain. */
+    float32_t Ki;          /**< The integral gain. */
+    float32_t Kd;          /**< The derivative gain. */
   } arm_pid_instance_f32;
 
 
 
   /**
    * @brief  Initialization function for the floating-point PID Control.
-   * @param[in,out] *S      points to an instance of the PID structure.
+   * @param[in,out] S               points to an instance of the PID structure.
    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
-   * @return none.
    */
   void arm_pid_init_f32(
   arm_pid_instance_f32 * S,
   int32_t resetStateFlag);
 
+
   /**
    * @brief  Reset function for the floating-point PID Control.
-   * @param[in,out] *S is an instance of the floating-point PID Control structure
-   * @return none
+   * @param[in,out] S  is an instance of the floating-point PID Control structure
    */
   void arm_pid_reset_f32(
   arm_pid_instance_f32 * S);
@@ -1889,9 +1880,8 @@
 
   /**
    * @brief  Initialization function for the Q31 PID Control.
-   * @param[in,out] *S points to an instance of the Q15 PID structure.
+   * @param[in,out] S               points to an instance of the Q15 PID structure.
    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
-   * @return none.
    */
   void arm_pid_init_q31(
   arm_pid_instance_q31 * S,
@@ -1900,27 +1890,26 @@
 
   /**
    * @brief  Reset function for the Q31 PID Control.
-   * @param[in,out] *S points to an instance of the Q31 PID Control structure
-   * @return none
+   * @param[in,out] S   points to an instance of the Q31 PID Control structure
    */
 
   void arm_pid_reset_q31(
   arm_pid_instance_q31 * S);
 
+
   /**
    * @brief  Initialization function for the Q15 PID Control.
-   * @param[in,out] *S points to an instance of the Q15 PID structure.
-   * @param[in] resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
-   * @return none.
+   * @param[in,out] S               points to an instance of the Q15 PID structure.
+   * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
    */
   void arm_pid_init_q15(
   arm_pid_instance_q15 * S,
   int32_t resetStateFlag);
 
+
   /**
    * @brief  Reset function for the Q15 PID Control.
-   * @param[in,out] *S points to an instance of the q15 PID Control structure
-   * @return none
+   * @param[in,out] S  points to an instance of the q15 PID Control structure
    */
   void arm_pid_reset_q15(
   arm_pid_instance_q15 * S);
@@ -1940,7 +1929,6 @@
   /**
    * @brief Instance structure for the floating-point bilinear interpolation function.
    */
-
   typedef struct
   {
     uint16_t numRows;   /**< number of rows in the data table. */
@@ -1951,7 +1939,6 @@
    /**
    * @brief Instance structure for the Q31 bilinear interpolation function.
    */
-
   typedef struct
   {
     uint16_t numRows;   /**< number of rows in the data table. */
@@ -1962,7 +1949,6 @@
    /**
    * @brief Instance structure for the Q15 bilinear interpolation function.
    */
-
   typedef struct
   {
     uint16_t numRows;   /**< number of rows in the data table. */
@@ -1973,69 +1959,63 @@
    /**
    * @brief Instance structure for the Q15 bilinear interpolation function.
    */
-
   typedef struct
   {
     uint16_t numRows;   /**< number of rows in the data table. */
     uint16_t numCols;   /**< number of columns in the data table. */
-    q7_t *pData;                /**< points to the data table. */
+    q7_t *pData;        /**< points to the data table. */
   } arm_bilinear_interp_instance_q7;
 
 
   /**
    * @brief Q7 vector multiplication.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst  points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_mult_q7(
   q7_t * pSrcA,
   q7_t * pSrcB,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q15 vector multiplication.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst  points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_mult_q15(
   q15_t * pSrcA,
   q15_t * pSrcB,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q31 vector multiplication.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_mult_q31(
   q31_t * pSrcA,
   q31_t * pSrcB,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Floating-point vector multiplication.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_mult_f32(
   float32_t * pSrcA,
   float32_t * pSrcB,
@@ -2043,20 +2023,15 @@
   uint32_t blockSize);
 
 
-
-
-
-
   /**
    * @brief Instance structure for the Q15 CFFT/CIFFT function.
    */
-
   typedef struct
   {
     uint16_t fftLen;                 /**< length of the FFT. */
     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    q15_t *pTwiddle;                     /**< points to the Sin twiddle factor table. */
+    q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
@@ -2075,11 +2050,9 @@
   q15_t * pSrc);
 
 
-
   /**
    * @brief Instance structure for the Q15 CFFT/CIFFT function.
    */
-
   typedef struct
   {
     uint16_t fftLen;                 /**< length of the FFT. */
@@ -2106,13 +2079,12 @@
   /**
    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
    */
-
   typedef struct
   {
     uint16_t fftLen;                 /**< length of the FFT. */
     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
-    q31_t *pTwiddle;                     /**< points to the Twiddle factor table. */
+    q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
@@ -2133,7 +2105,6 @@
   /**
    * @brief Instance structure for the Q31 CFFT/CIFFT function.
    */
-
   typedef struct
   {
     uint16_t fftLen;                 /**< length of the FFT. */
@@ -2160,7 +2131,6 @@
   /**
    * @brief Instance structure for the floating-point CFFT/CIFFT function.
    */
-
   typedef struct
   {
     uint16_t fftLen;                   /**< length of the FFT. */
@@ -2170,7 +2140,7 @@
     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-    float32_t onebyfftLen;                 /**< value of 1/fftLen. */
+    float32_t onebyfftLen;             /**< value of 1/fftLen. */
   } arm_cfft_radix2_instance_f32;
 
 /* Deprecated */
@@ -2188,7 +2158,6 @@
   /**
    * @brief Instance structure for the floating-point CFFT/CIFFT function.
    */
-
   typedef struct
   {
     uint16_t fftLen;                   /**< length of the FFT. */
@@ -2198,7 +2167,7 @@
     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
-    float32_t onebyfftLen;                 /**< value of 1/fftLen. */
+    float32_t onebyfftLen;             /**< value of 1/fftLen. */
   } arm_cfft_radix4_instance_f32;
 
 /* Deprecated */
@@ -2216,7 +2185,6 @@
   /**
    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
    */
-
   typedef struct
   {
     uint16_t fftLen;                   /**< length of the FFT. */
@@ -2225,16 +2193,15 @@
     uint16_t bitRevLength;             /**< bit reversal table length. */
   } arm_cfft_instance_q15;
 
-void arm_cfft_q15( 
-    const arm_cfft_instance_q15 * S, 
+void arm_cfft_q15(
+    const arm_cfft_instance_q15 * S,
     q15_t * p1,
     uint8_t ifftFlag,
-    uint8_t bitReverseFlag);  
+    uint8_t bitReverseFlag);
 
   /**
    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
    */
-
   typedef struct
   {
     uint16_t fftLen;                   /**< length of the FFT. */
@@ -2243,16 +2210,15 @@
     uint16_t bitRevLength;             /**< bit reversal table length. */
   } arm_cfft_instance_q31;
 
-void arm_cfft_q31( 
-    const arm_cfft_instance_q31 * S, 
+void arm_cfft_q31(
+    const arm_cfft_instance_q31 * S,
     q31_t * p1,
     uint8_t ifftFlag,
-    uint8_t bitReverseFlag);  
-  
+    uint8_t bitReverseFlag);
+
   /**
    * @brief Instance structure for the floating-point CFFT/CIFFT function.
    */
-
   typedef struct
   {
     uint16_t fftLen;                   /**< length of the FFT. */
@@ -2270,7 +2236,6 @@
   /**
    * @brief Instance structure for the Q15 RFFT/RIFFT function.
    */
-
   typedef struct
   {
     uint32_t fftLenReal;                      /**< length of the real FFT. */
@@ -2296,7 +2261,6 @@
   /**
    * @brief Instance structure for the Q31 RFFT/RIFFT function.
    */
-
   typedef struct
   {
     uint32_t fftLenReal;                        /**< length of the real FFT. */
@@ -2322,7 +2286,6 @@
   /**
    * @brief Instance structure for the floating-point RFFT/RIFFT function.
    */
-
   typedef struct
   {
     uint32_t fftLenReal;                        /**< length of the real FFT. */
@@ -2350,17 +2313,16 @@
   /**
    * @brief Instance structure for the floating-point RFFT/RIFFT function.
    */
-
 typedef struct
   {
     arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
-    uint16_t fftLenRFFT;                        /**< length of the real sequence */
-	float32_t * pTwiddleRFFT;					/**< Twiddle factors real stage  */
+    uint16_t fftLenRFFT;             /**< length of the real sequence */
+    float32_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
   } arm_rfft_fast_instance_f32 ;
 
 arm_status arm_rfft_fast_init_f32 (
-	arm_rfft_fast_instance_f32 * S,
-	uint16_t fftLen);
+   arm_rfft_fast_instance_f32 * S,
+   uint16_t fftLen);
 
 void arm_rfft_fast_f32(
   arm_rfft_fast_instance_f32 * S,
@@ -2370,29 +2332,28 @@
   /**
    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
    */
-
   typedef struct
   {
-    uint16_t N;                         /**< length of the DCT4. */
-    uint16_t Nby2;                      /**< half of the length of the DCT4. */
-    float32_t normalize;                /**< normalizing factor. */
-    float32_t *pTwiddle;                /**< points to the twiddle factor table. */
-    float32_t *pCosFactor;              /**< points to the cosFactor table. */
+    uint16_t N;                          /**< length of the DCT4. */
+    uint16_t Nby2;                       /**< half of the length of the DCT4. */
+    float32_t normalize;                 /**< normalizing factor. */
+    float32_t *pTwiddle;                 /**< points to the twiddle factor table. */
+    float32_t *pCosFactor;               /**< points to the cosFactor table. */
     arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
     arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
   } arm_dct4_instance_f32;
 
+
   /**
    * @brief  Initialization function for the floating-point DCT4/IDCT4.
-   * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure.
-   * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure.
-   * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure.
+   * @param[in,out] S          points to an instance of floating-point DCT4/IDCT4 structure.
+   * @param[in]     S_RFFT     points to an instance of floating-point RFFT/RIFFT structure.
+   * @param[in]     S_CFFT     points to an instance of floating-point CFFT/CIFFT structure.
    * @param[in]     N          length of the DCT4.
    * @param[in]     Nby2       half of the length of the DCT4.
    * @param[in]     normalize  normalizing factor.
-   * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
-   */
-
+   * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
+   */
   arm_status arm_dct4_init_f32(
   arm_dct4_instance_f32 * S,
   arm_rfft_instance_f32 * S_RFFT,
@@ -2401,45 +2362,44 @@
   uint16_t Nby2,
   float32_t normalize);
 
+
   /**
    * @brief Processing function for the floating-point DCT4/IDCT4.
-   * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
-   * @param[in]       *pState        points to state buffer.
-   * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
-   * @return none.
-   */
-
+   * @param[in]     S              points to an instance of the floating-point DCT4/IDCT4 structure.
+   * @param[in]     pState         points to state buffer.
+   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
+   */
   void arm_dct4_f32(
   const arm_dct4_instance_f32 * S,
   float32_t * pState,
   float32_t * pInlineBuffer);
 
+
   /**
    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
    */
-
   typedef struct
   {
-    uint16_t N;                         /**< length of the DCT4. */
-    uint16_t Nby2;                      /**< half of the length of the DCT4. */
-    q31_t normalize;                    /**< normalizing factor. */
-    q31_t *pTwiddle;                    /**< points to the twiddle factor table. */
-    q31_t *pCosFactor;                  /**< points to the cosFactor table. */
+    uint16_t N;                          /**< length of the DCT4. */
+    uint16_t Nby2;                       /**< half of the length of the DCT4. */
+    q31_t normalize;                     /**< normalizing factor. */
+    q31_t *pTwiddle;                     /**< points to the twiddle factor table. */
+    q31_t *pCosFactor;                   /**< points to the cosFactor table. */
     arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
     arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
   } arm_dct4_instance_q31;
 
+
   /**
    * @brief  Initialization function for the Q31 DCT4/IDCT4.
-   * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure.
-   * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure
-   * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure
+   * @param[in,out] S          points to an instance of Q31 DCT4/IDCT4 structure.
+   * @param[in]     S_RFFT     points to an instance of Q31 RFFT/RIFFT structure
+   * @param[in]     S_CFFT     points to an instance of Q31 CFFT/CIFFT structure
    * @param[in]     N          length of the DCT4.
    * @param[in]     Nby2       half of the length of the DCT4.
    * @param[in]     normalize  normalizing factor.
-   * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
-   */
-
+   * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
+   */
   arm_status arm_dct4_init_q31(
   arm_dct4_instance_q31 * S,
   arm_rfft_instance_q31 * S_RFFT,
@@ -2448,45 +2408,44 @@
   uint16_t Nby2,
   q31_t normalize);
 
+
   /**
    * @brief Processing function for the Q31 DCT4/IDCT4.
-   * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
-   * @param[in]       *pState        points to state buffer.
-   * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
-   * @return none.
-   */
-
+   * @param[in]     S              points to an instance of the Q31 DCT4 structure.
+   * @param[in]     pState         points to state buffer.
+   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
+   */
   void arm_dct4_q31(
   const arm_dct4_instance_q31 * S,
   q31_t * pState,
   q31_t * pInlineBuffer);
 
+
   /**
    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
    */
-
   typedef struct
   {
-    uint16_t N;                         /**< length of the DCT4. */
-    uint16_t Nby2;                      /**< half of the length of the DCT4. */
-    q15_t normalize;                    /**< normalizing factor. */
-    q15_t *pTwiddle;                    /**< points to the twiddle factor table. */
-    q15_t *pCosFactor;                  /**< points to the cosFactor table. */
+    uint16_t N;                          /**< length of the DCT4. */
+    uint16_t Nby2;                       /**< half of the length of the DCT4. */
+    q15_t normalize;                     /**< normalizing factor. */
+    q15_t *pTwiddle;                     /**< points to the twiddle factor table. */
+    q15_t *pCosFactor;                   /**< points to the cosFactor table. */
     arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
     arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
   } arm_dct4_instance_q15;
 
+
   /**
    * @brief  Initialization function for the Q15 DCT4/IDCT4.
-   * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure.
-   * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure.
-   * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure.
+   * @param[in,out] S          points to an instance of Q15 DCT4/IDCT4 structure.
+   * @param[in]     S_RFFT     points to an instance of Q15 RFFT/RIFFT structure.
+   * @param[in]     S_CFFT     points to an instance of Q15 CFFT/CIFFT structure.
    * @param[in]     N          length of the DCT4.
    * @param[in]     Nby2       half of the length of the DCT4.
    * @param[in]     normalize  normalizing factor.
-   * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
-   */
-
+   * @return      arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
+   */
   arm_status arm_dct4_init_q15(
   arm_dct4_instance_q15 * S,
   arm_rfft_instance_q15 * S_RFFT,
@@ -2495,164 +2454,153 @@
   uint16_t Nby2,
   q15_t normalize);
 
+
   /**
    * @brief Processing function for the Q15 DCT4/IDCT4.
-   * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
-   * @param[in]       *pState        points to state buffer.
-   * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
-   * @return none.
-   */
-
+   * @param[in]     S              points to an instance of the Q15 DCT4 structure.
+   * @param[in]     pState         points to state buffer.
+   * @param[in,out] pInlineBuffer  points to the in-place input and output buffer.
+   */
   void arm_dct4_q15(
   const arm_dct4_instance_q15 * S,
   q15_t * pState,
   q15_t * pInlineBuffer);
 
+
   /**
    * @brief Floating-point vector addition.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_add_f32(
   float32_t * pSrcA,
   float32_t * pSrcB,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q7 vector addition.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_add_q7(
   q7_t * pSrcA,
   q7_t * pSrcB,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q15 vector addition.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_add_q15(
   q15_t * pSrcA,
   q15_t * pSrcB,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q31 vector addition.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_add_q31(
   q31_t * pSrcA,
   q31_t * pSrcB,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Floating-point vector subtraction.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_sub_f32(
   float32_t * pSrcA,
   float32_t * pSrcB,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q7 vector subtraction.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_sub_q7(
   q7_t * pSrcA,
   q7_t * pSrcB,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q15 vector subtraction.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_sub_q15(
   q15_t * pSrcA,
   q15_t * pSrcB,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q31 vector subtraction.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_sub_q31(
   q31_t * pSrcA,
   q31_t * pSrcB,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Multiplies a floating-point vector by a scalar.
-   * @param[in]       *pSrc points to the input vector
-   * @param[in]       scale scale factor to be applied
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  scale      scale factor to be applied
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_scale_f32(
   float32_t * pSrc,
   float32_t scale,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Multiplies a Q7 vector by a scalar.
-   * @param[in]       *pSrc points to the input vector
-   * @param[in]       scaleFract fractional portion of the scale value
-   * @param[in]       shift number of bits to shift the result by
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the input vector
+   * @param[in]  scaleFract  fractional portion of the scale value
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to the output vector
+   * @param[in]  blockSize   number of samples in the vector
+   */
   void arm_scale_q7(
   q7_t * pSrc,
   q7_t scaleFract,
@@ -2660,16 +2608,15 @@
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Multiplies a Q15 vector by a scalar.
-   * @param[in]       *pSrc points to the input vector
-   * @param[in]       scaleFract fractional portion of the scale value
-   * @param[in]       shift number of bits to shift the result by
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the input vector
+   * @param[in]  scaleFract  fractional portion of the scale value
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to the output vector
+   * @param[in]  blockSize   number of samples in the vector
+   */
   void arm_scale_q15(
   q15_t * pSrc,
   q15_t scaleFract,
@@ -2677,16 +2624,15 @@
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Multiplies a Q31 vector by a scalar.
-   * @param[in]       *pSrc points to the input vector
-   * @param[in]       scaleFract fractional portion of the scale value
-   * @param[in]       shift number of bits to shift the result by
-   * @param[out]      *pDst points to the output vector
-   * @param[in]       blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the input vector
+   * @param[in]  scaleFract  fractional portion of the scale value
+   * @param[in]  shift       number of bits to shift the result by
+   * @param[out] pDst        points to the output vector
+   * @param[in]  blockSize   number of samples in the vector
+   */
   void arm_scale_q31(
   q31_t * pSrc,
   q31_t scaleFract,
@@ -2694,379 +2640,361 @@
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q7 vector absolute value.
-   * @param[in]       *pSrc points to the input buffer
-   * @param[out]      *pDst points to the output buffer
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_abs_q7(
   q7_t * pSrc,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Floating-point vector absolute value.
-   * @param[in]       *pSrc points to the input buffer
-   * @param[out]      *pDst points to the output buffer
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_abs_f32(
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q15 vector absolute value.
-   * @param[in]       *pSrc points to the input buffer
-   * @param[out]      *pDst points to the output buffer
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_abs_q15(
   q15_t * pSrc,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Q31 vector absolute value.
-   * @param[in]       *pSrc points to the input buffer
-   * @param[out]      *pDst points to the output buffer
-   * @param[in]       blockSize number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input buffer
+   * @param[out] pDst       points to the output buffer
+   * @param[in]  blockSize  number of samples in each vector
+   */
   void arm_abs_q31(
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Dot product of floating-point vectors.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[in]       blockSize number of samples in each vector
-   * @param[out]      *result output result returned here
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
   void arm_dot_prod_f32(
   float32_t * pSrcA,
   float32_t * pSrcB,
   uint32_t blockSize,
   float32_t * result);
 
+
   /**
    * @brief Dot product of Q7 vectors.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[in]       blockSize number of samples in each vector
-   * @param[out]      *result output result returned here
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
   void arm_dot_prod_q7(
   q7_t * pSrcA,
   q7_t * pSrcB,
   uint32_t blockSize,
   q31_t * result);
 
+
   /**
    * @brief Dot product of Q15 vectors.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[in]       blockSize number of samples in each vector
-   * @param[out]      *result output result returned here
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
   void arm_dot_prod_q15(
   q15_t * pSrcA,
   q15_t * pSrcB,
   uint32_t blockSize,
   q63_t * result);
 
+
   /**
    * @brief Dot product of Q31 vectors.
-   * @param[in]       *pSrcA points to the first input vector
-   * @param[in]       *pSrcB points to the second input vector
-   * @param[in]       blockSize number of samples in each vector
-   * @param[out]      *result output result returned here
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input vector
+   * @param[in]  pSrcB      points to the second input vector
+   * @param[in]  blockSize  number of samples in each vector
+   * @param[out] result     output result returned here
+   */
   void arm_dot_prod_q31(
   q31_t * pSrcA,
   q31_t * pSrcB,
   uint32_t blockSize,
   q63_t * result);
 
+
   /**
    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
-   * @param[in]  *pSrc points to the input vector
-   * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_shift_q7(
   q7_t * pSrc,
   int8_t shiftBits,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
-   * @param[in]  *pSrc points to the input vector
-   * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_shift_q15(
   q15_t * pSrc,
   int8_t shiftBits,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
-   * @param[in]  *pSrc points to the input vector
-   * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  shiftBits  number of bits to shift.  A positive value shifts left; a negative value shifts right.
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_shift_q31(
   q31_t * pSrc,
   int8_t shiftBits,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Adds a constant offset to a floating-point vector.
-   * @param[in]  *pSrc points to the input vector
-   * @param[in]  offset is the offset to be added
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_offset_f32(
   float32_t * pSrc,
   float32_t offset,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Adds a constant offset to a Q7 vector.
-   * @param[in]  *pSrc points to the input vector
-   * @param[in]  offset is the offset to be added
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_offset_q7(
   q7_t * pSrc,
   q7_t offset,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Adds a constant offset to a Q15 vector.
-   * @param[in]  *pSrc points to the input vector
-   * @param[in]  offset is the offset to be added
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_offset_q15(
   q15_t * pSrc,
   q15_t offset,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Adds a constant offset to a Q31 vector.
-   * @param[in]  *pSrc points to the input vector
-   * @param[in]  offset is the offset to be added
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[in]  offset     is the offset to be added
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_offset_q31(
   q31_t * pSrc,
   q31_t offset,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Negates the elements of a floating-point vector.
-   * @param[in]  *pSrc points to the input vector
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_negate_f32(
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Negates the elements of a Q7 vector.
-   * @param[in]  *pSrc points to the input vector
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_negate_q7(
   q7_t * pSrc,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Negates the elements of a Q15 vector.
-   * @param[in]  *pSrc points to the input vector
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_negate_q15(
   q15_t * pSrc,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Negates the elements of a Q31 vector.
-   * @param[in]  *pSrc points to the input vector
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  blockSize number of samples in the vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc       points to the input vector
+   * @param[out] pDst       points to the output vector
+   * @param[in]  blockSize  number of samples in the vector
+   */
   void arm_negate_q31(
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t blockSize);
+
+
   /**
    * @brief  Copies the elements of a floating-point vector.
-   * @param[in]  *pSrc input pointer
-   * @param[out]  *pDst output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_copy_f32(
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Copies the elements of a Q7 vector.
-   * @param[in]  *pSrc input pointer
-   * @param[out]  *pDst output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_copy_q7(
   q7_t * pSrc,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Copies the elements of a Q15 vector.
-   * @param[in]  *pSrc input pointer
-   * @param[out]  *pDst output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_copy_q15(
   q15_t * pSrc,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Copies the elements of a Q31 vector.
-   * @param[in]  *pSrc input pointer
-   * @param[out]  *pDst output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_copy_q31(
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t blockSize);
+
+
   /**
    * @brief  Fills a constant value into a floating-point vector.
-   * @param[in]  value input value to be filled
-   * @param[out]  *pDst output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_fill_f32(
   float32_t value,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Fills a constant value into a Q7 vector.
-   * @param[in]  value input value to be filled
-   * @param[out]  *pDst output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_fill_q7(
   q7_t value,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Fills a constant value into a Q15 vector.
-   * @param[in]  value input value to be filled
-   * @param[out]  *pDst output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_fill_q15(
   q15_t value,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Fills a constant value into a Q31 vector.
-   * @param[in]  value input value to be filled
-   * @param[out]  *pDst output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  value      input value to be filled
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_fill_q31(
   q31_t value,
   q31_t * pDst,
   uint32_t blockSize);
 
+
 /**
  * @brief Convolution of floating-point sequences.
- * @param[in] *pSrcA points to the first input sequence.
- * @param[in] srcALen length of the first input sequence.
- * @param[in] *pSrcB points to the second input sequence.
- * @param[in] srcBLen length of the second input sequence.
- * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
- * @return none.
+ * @param[in]  pSrcA    points to the first input sequence.
+ * @param[in]  srcALen  length of the first input sequence.
+ * @param[in]  pSrcB    points to the second input sequence.
+ * @param[in]  srcBLen  length of the second input sequence.
+ * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
  */
-
   void arm_conv_f32(
   float32_t * pSrcA,
   uint32_t srcALen,
@@ -3077,17 +3005,14 @@
 
   /**
    * @brief Convolution of Q15 sequences.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
-   * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
-   * @return none.
-   */
-
-
+   * @param[in]  pSrcA      points to the first input sequence.
+   * @param[in]  srcALen    length of the first input sequence.
+   * @param[in]  pSrcB      points to the second input sequence.
+   * @param[in]  srcBLen    length of the second input sequence.
+   * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
+   * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+   * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
+   */
   void arm_conv_opt_q15(
   q15_t * pSrcA,
   uint32_t srcALen,
@@ -3100,14 +3025,12 @@
 
 /**
  * @brief Convolution of Q15 sequences.
- * @param[in] *pSrcA points to the first input sequence.
- * @param[in] srcALen length of the first input sequence.
- * @param[in] *pSrcB points to the second input sequence.
- * @param[in] srcBLen length of the second input sequence.
- * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
- * @return none.
+ * @param[in]  pSrcA    points to the first input sequence.
+ * @param[in]  srcALen  length of the first input sequence.
+ * @param[in]  pSrcB    points to the second input sequence.
+ * @param[in]  srcBLen  length of the second input sequence.
+ * @param[out] pDst     points to the location where the output result is written.  Length srcALen+srcBLen-1.
  */
-
   void arm_conv_q15(
   q15_t * pSrcA,
   uint32_t srcALen,
@@ -3115,35 +3038,33 @@
   uint32_t srcBLen,
   q15_t * pDst);
 
-  /**
-   * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
-   * @return none.
-   */
-
-  void arm_conv_fast_q15(
-			  q15_t * pSrcA,
-			 uint32_t srcALen,
-			  q15_t * pSrcB,
-			 uint32_t srcBLen,
-			 q15_t * pDst);
 
   /**
    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
-   * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
-   * @return none.
-   */
-
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
+   */
+  void arm_conv_fast_q15(
+          q15_t * pSrcA,
+          uint32_t srcALen,
+          q15_t * pSrcB,
+          uint32_t srcBLen,
+          q15_t * pDst);
+
+
+  /**
+   * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
+   * @param[in]  pSrcA      points to the first input sequence.
+   * @param[in]  srcALen    length of the first input sequence.
+   * @param[in]  pSrcB      points to the second input sequence.
+   * @param[in]  srcBLen    length of the second input sequence.
+   * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
+   * @param[in]  pScratch1  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+   * @param[in]  pScratch2  points to scratch buffer of size min(srcALen, srcBLen).
+   */
   void arm_conv_fast_opt_q15(
   q15_t * pSrcA,
   uint32_t srcALen,
@@ -3154,17 +3075,14 @@
   q15_t * pScratch2);
 
 
-
   /**
    * @brief Convolution of Q31 sequences.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
-   * @return none.
-   */
-
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
+   */
   void arm_conv_q31(
   q31_t * pSrcA,
   uint32_t srcALen,
@@ -3172,16 +3090,15 @@
   uint32_t srcBLen,
   q31_t * pDst);
 
+
   /**
    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
-   * @return none.
-   */
-
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
+   */
   void arm_conv_fast_q31(
   q31_t * pSrcA,
   uint32_t srcALen,
@@ -3192,16 +3109,14 @@
 
     /**
    * @brief Convolution of Q7 sequences.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
-   * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input sequence.
+   * @param[in]  srcALen    length of the first input sequence.
+   * @param[in]  pSrcB      points to the second input sequence.
+   * @param[in]  srcBLen    length of the second input sequence.
+   * @param[out] pDst       points to the block of output data  Length srcALen+srcBLen-1.
+   * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+   * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
+   */
   void arm_conv_opt_q7(
   q7_t * pSrcA,
   uint32_t srcALen,
@@ -3212,17 +3127,14 @@
   q15_t * pScratch2);
 
 
-
   /**
    * @brief Convolution of Q7 sequences.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
-   * @return none.
-   */
-
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length srcALen+srcBLen-1.
+   */
   void arm_conv_q7(
   q7_t * pSrcA,
   uint32_t srcALen,
@@ -3233,16 +3145,15 @@
 
   /**
    * @brief Partial convolution of floating-point sequences.
-   * @param[in]       *pSrcA points to the first input sequence.
-   * @param[in]       srcALen length of the first input sequence.
-   * @param[in]       *pSrcB points to the second input sequence.
-   * @param[in]       srcBLen length of the second input sequence.
-   * @param[out]      *pDst points to the block of output data
-   * @param[in]       firstIndex is the first output sample to start with.
-   * @param[in]       numPoints is the number of output points to be computed.
+   * @param[in]  pSrcA       points to the first input sequence.
+   * @param[in]  srcALen     length of the first input sequence.
+   * @param[in]  pSrcB       points to the second input sequence.
+   * @param[in]  srcBLen     length of the second input sequence.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  firstIndex  is the first output sample to start with.
+   * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-
   arm_status arm_conv_partial_f32(
   float32_t * pSrcA,
   uint32_t srcALen,
@@ -3252,20 +3163,20 @@
   uint32_t firstIndex,
   uint32_t numPoints);
 
-    /**
+
+  /**
    * @brief Partial convolution of Q15 sequences.
-   * @param[in]       *pSrcA points to the first input sequence.
-   * @param[in]       srcALen length of the first input sequence.
-   * @param[in]       *pSrcB points to the second input sequence.
-   * @param[in]       srcBLen length of the second input sequence.
-   * @param[out]      *pDst points to the block of output data
-   * @param[in]       firstIndex is the first output sample to start with.
-   * @param[in]       numPoints is the number of output points to be computed.
-   * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
+   * @param[in]  pSrcA       points to the first input sequence.
+   * @param[in]  srcALen     length of the first input sequence.
+   * @param[in]  pSrcB       points to the second input sequence.
+   * @param[in]  srcBLen     length of the second input sequence.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  firstIndex  is the first output sample to start with.
+   * @param[in]  numPoints   is the number of output points to be computed.
+   * @param[in]  pScratch1   points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+   * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-
   arm_status arm_conv_partial_opt_q15(
   q15_t * pSrcA,
   uint32_t srcALen,
@@ -3278,18 +3189,17 @@
   q15_t * pScratch2);
 
 
-/**
+  /**
    * @brief Partial convolution of Q15 sequences.
-   * @param[in]       *pSrcA points to the first input sequence.
-   * @param[in]       srcALen length of the first input sequence.
-   * @param[in]       *pSrcB points to the second input sequence.
-   * @param[in]       srcBLen length of the second input sequence.
-   * @param[out]      *pDst points to the block of output data
-   * @param[in]       firstIndex is the first output sample to start with.
-   * @param[in]       numPoints is the number of output points to be computed.
+   * @param[in]  pSrcA       points to the first input sequence.
+   * @param[in]  srcALen     length of the first input sequence.
+   * @param[in]  pSrcB       points to the second input sequence.
+   * @param[in]  srcBLen     length of the second input sequence.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  firstIndex  is the first output sample to start with.
+   * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-
   arm_status arm_conv_partial_q15(
   q15_t * pSrcA,
   uint32_t srcALen,
@@ -3299,42 +3209,41 @@
   uint32_t firstIndex,
   uint32_t numPoints);
 
+
   /**
    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
-   * @param[in]       *pSrcA points to the first input sequence.
-   * @param[in]       srcALen length of the first input sequence.
-   * @param[in]       *pSrcB points to the second input sequence.
-   * @param[in]       srcBLen length of the second input sequence.
-   * @param[out]      *pDst points to the block of output data
-   * @param[in]       firstIndex is the first output sample to start with.
-   * @param[in]       numPoints is the number of output points to be computed.
+   * @param[in]  pSrcA       points to the first input sequence.
+   * @param[in]  srcALen     length of the first input sequence.
+   * @param[in]  pSrcB       points to the second input sequence.
+   * @param[in]  srcBLen     length of the second input sequence.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  firstIndex  is the first output sample to start with.
+   * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-
   arm_status arm_conv_partial_fast_q15(
-				        q15_t * pSrcA,
-				       uint32_t srcALen,
-				        q15_t * pSrcB,
-				       uint32_t srcBLen,
-				       q15_t * pDst,
-				       uint32_t firstIndex,
-				       uint32_t numPoints);
+  q15_t * pSrcA,
+  uint32_t srcALen,
+  q15_t * pSrcB,
+  uint32_t srcBLen,
+  q15_t * pDst,
+  uint32_t firstIndex,
+  uint32_t numPoints);
 
 
   /**
    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
-   * @param[in]       *pSrcA points to the first input sequence.
-   * @param[in]       srcALen length of the first input sequence.
-   * @param[in]       *pSrcB points to the second input sequence.
-   * @param[in]       srcBLen length of the second input sequence.
-   * @param[out]      *pDst points to the block of output data
-   * @param[in]       firstIndex is the first output sample to start with.
-   * @param[in]       numPoints is the number of output points to be computed.
-   * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
+   * @param[in]  pSrcA       points to the first input sequence.
+   * @param[in]  srcALen     length of the first input sequence.
+   * @param[in]  pSrcB       points to the second input sequence.
+   * @param[in]  srcBLen     length of the second input sequence.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  firstIndex  is the first output sample to start with.
+   * @param[in]  numPoints   is the number of output points to be computed.
+   * @param[in]  pScratch1   points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+   * @param[in]  pScratch2   points to scratch buffer of size min(srcALen, srcBLen).
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-
   arm_status arm_conv_partial_fast_opt_q15(
   q15_t * pSrcA,
   uint32_t srcALen,
@@ -3349,16 +3258,15 @@
 
   /**
    * @brief Partial convolution of Q31 sequences.
-   * @param[in]       *pSrcA points to the first input sequence.
-   * @param[in]       srcALen length of the first input sequence.
-   * @param[in]       *pSrcB points to the second input sequence.
-   * @param[in]       srcBLen length of the second input sequence.
-   * @param[out]      *pDst points to the block of output data
-   * @param[in]       firstIndex is the first output sample to start with.
-   * @param[in]       numPoints is the number of output points to be computed.
+   * @param[in]  pSrcA       points to the first input sequence.
+   * @param[in]  srcALen     length of the first input sequence.
+   * @param[in]  pSrcB       points to the second input sequence.
+   * @param[in]  srcBLen     length of the second input sequence.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  firstIndex  is the first output sample to start with.
+   * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-
   arm_status arm_conv_partial_q31(
   q31_t * pSrcA,
   uint32_t srcALen,
@@ -3371,16 +3279,15 @@
 
   /**
    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
-   * @param[in]       *pSrcA points to the first input sequence.
-   * @param[in]       srcALen length of the first input sequence.
-   * @param[in]       *pSrcB points to the second input sequence.
-   * @param[in]       srcBLen length of the second input sequence.
-   * @param[out]      *pDst points to the block of output data
-   * @param[in]       firstIndex is the first output sample to start with.
-   * @param[in]       numPoints is the number of output points to be computed.
+   * @param[in]  pSrcA       points to the first input sequence.
+   * @param[in]  srcALen     length of the first input sequence.
+   * @param[in]  pSrcB       points to the second input sequence.
+   * @param[in]  srcBLen     length of the second input sequence.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  firstIndex  is the first output sample to start with.
+   * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-
   arm_status arm_conv_partial_fast_q31(
   q31_t * pSrcA,
   uint32_t srcALen,
@@ -3393,18 +3300,17 @@
 
   /**
    * @brief Partial convolution of Q7 sequences
-   * @param[in]       *pSrcA points to the first input sequence.
-   * @param[in]       srcALen length of the first input sequence.
-   * @param[in]       *pSrcB points to the second input sequence.
-   * @param[in]       srcBLen length of the second input sequence.
-   * @param[out]      *pDst points to the block of output data
-   * @param[in]       firstIndex is the first output sample to start with.
-   * @param[in]       numPoints is the number of output points to be computed.
-   * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
+   * @param[in]  pSrcA       points to the first input sequence.
+   * @param[in]  srcALen     length of the first input sequence.
+   * @param[in]  pSrcB       points to the second input sequence.
+   * @param[in]  srcBLen     length of the second input sequence.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  firstIndex  is the first output sample to start with.
+   * @param[in]  numPoints   is the number of output points to be computed.
+   * @param[in]  pScratch1   points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+   * @param[in]  pScratch2   points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-
   arm_status arm_conv_partial_opt_q7(
   q7_t * pSrcA,
   uint32_t srcALen,
@@ -3419,16 +3325,15 @@
 
 /**
    * @brief Partial convolution of Q7 sequences.
-   * @param[in]       *pSrcA points to the first input sequence.
-   * @param[in]       srcALen length of the first input sequence.
-   * @param[in]       *pSrcB points to the second input sequence.
-   * @param[in]       srcBLen length of the second input sequence.
-   * @param[out]      *pDst points to the block of output data
-   * @param[in]       firstIndex is the first output sample to start with.
-   * @param[in]       numPoints is the number of output points to be computed.
+   * @param[in]  pSrcA       points to the first input sequence.
+   * @param[in]  srcALen     length of the first input sequence.
+   * @param[in]  pSrcB       points to the second input sequence.
+   * @param[in]  srcBLen     length of the second input sequence.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  firstIndex  is the first output sample to start with.
+   * @param[in]  numPoints   is the number of output points to be computed.
    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
    */
-
   arm_status arm_conv_partial_q7(
   q7_t * pSrcA,
   uint32_t srcALen,
@@ -3439,56 +3344,47 @@
   uint32_t numPoints);
 
 
-
   /**
    * @brief Instance structure for the Q15 FIR decimator.
    */
-
   typedef struct
   {
-    uint8_t M;                      /**< decimation factor. */
-    uint16_t numTaps;               /**< number of coefficients in the filter. */
-    q15_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
-    q15_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
+    uint8_t M;                  /**< decimation factor. */
+    uint16_t numTaps;           /**< number of coefficients in the filter. */
+    q15_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
+    q15_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   } arm_fir_decimate_instance_q15;
 
   /**
    * @brief Instance structure for the Q31 FIR decimator.
    */
-
   typedef struct
   {
     uint8_t M;                  /**< decimation factor. */
     uint16_t numTaps;           /**< number of coefficients in the filter. */
-    q31_t *pCoeffs;              /**< points to the coefficient array. The array is of length numTaps.*/
-    q31_t *pState;               /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-
+    q31_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
+    q31_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   } arm_fir_decimate_instance_q31;
 
   /**
    * @brief Instance structure for the floating-point FIR decimator.
    */
-
   typedef struct
   {
-    uint8_t M;                          /**< decimation factor. */
-    uint16_t numTaps;                   /**< number of coefficients in the filter. */
-    float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
-    float32_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
-
+    uint8_t M;                  /**< decimation factor. */
+    uint16_t numTaps;           /**< number of coefficients in the filter. */
+    float32_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
+    float32_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
   } arm_fir_decimate_instance_f32;
 
 
-
   /**
    * @brief Processing function for the floating-point FIR decimator.
-   * @param[in] *S points to an instance of the floating-point FIR decimator structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data
-   * @param[in] blockSize number of input samples to process per call.
-   * @return none
-   */
-
+   * @param[in]  S          points to an instance of the floating-point FIR decimator structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of input samples to process per call.
+   */
   void arm_fir_decimate_f32(
   const arm_fir_decimate_instance_f32 * S,
   float32_t * pSrc,
@@ -3498,16 +3394,15 @@
 
   /**
    * @brief  Initialization function for the floating-point FIR decimator.
-   * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.
-   * @param[in] numTaps  number of coefficients in the filter.
-   * @param[in] M  decimation factor.
-   * @param[in] *pCoeffs points to the filter coefficients.
-   * @param[in] *pState points to the state buffer.
-   * @param[in] blockSize number of input samples to process per call.
+   * @param[in,out] S          points to an instance of the floating-point FIR decimator structure.
+   * @param[in]     numTaps    number of coefficients in the filter.
+   * @param[in]     M          decimation factor.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of input samples to process per call.
    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * <code>blockSize</code> is not a multiple of <code>M</code>.
    */
-
   arm_status arm_fir_decimate_init_f32(
   arm_fir_decimate_instance_f32 * S,
   uint16_t numTaps,
@@ -3516,30 +3411,28 @@
   float32_t * pState,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the Q15 FIR decimator.
-   * @param[in] *S points to an instance of the Q15 FIR decimator structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data
-   * @param[in] blockSize number of input samples to process per call.
-   * @return none
-   */
-
+   * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of input samples to process per call.
+   */
   void arm_fir_decimate_q15(
   const arm_fir_decimate_instance_q15 * S,
   q15_t * pSrc,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
-   * @param[in] *S points to an instance of the Q15 FIR decimator structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data
-   * @param[in] blockSize number of input samples to process per call.
-   * @return none
-   */
-
+   * @param[in]  S          points to an instance of the Q15 FIR decimator structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of input samples to process per call.
+   */
   void arm_fir_decimate_fast_q15(
   const arm_fir_decimate_instance_q15 * S,
   q15_t * pSrc,
@@ -3547,19 +3440,17 @@
   uint32_t blockSize);
 
 
-
   /**
    * @brief  Initialization function for the Q15 FIR decimator.
-   * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.
-   * @param[in] numTaps  number of coefficients in the filter.
-   * @param[in] M  decimation factor.
-   * @param[in] *pCoeffs points to the filter coefficients.
-   * @param[in] *pState points to the state buffer.
-   * @param[in] blockSize number of input samples to process per call.
+   * @param[in,out] S          points to an instance of the Q15 FIR decimator structure.
+   * @param[in]     numTaps    number of coefficients in the filter.
+   * @param[in]     M          decimation factor.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of input samples to process per call.
    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * <code>blockSize</code> is not a multiple of <code>M</code>.
    */
-
   arm_status arm_fir_decimate_init_q15(
   arm_fir_decimate_instance_q15 * S,
   uint16_t numTaps,
@@ -3568,15 +3459,14 @@
   q15_t * pState,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the Q31 FIR decimator.
-   * @param[in] *S points to an instance of the Q31 FIR decimator structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data
+   * @param[in]  S     points to an instance of the Q31 FIR decimator structure.
+   * @param[in]  pSrc  points to the block of input data.
+   * @param[out] pDst  points to the block of output data
    * @param[in] blockSize number of input samples to process per call.
-   * @return none
-   */
-
+   */
   void arm_fir_decimate_q31(
   const arm_fir_decimate_instance_q31 * S,
   q31_t * pSrc,
@@ -3585,13 +3475,11 @@
 
   /**
    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
-   * @param[in] *S points to an instance of the Q31 FIR decimator structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data
-   * @param[in] blockSize number of input samples to process per call.
-   * @return none
-   */
-
+   * @param[in]  S          points to an instance of the Q31 FIR decimator structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of input samples to process per call.
+   */
   void arm_fir_decimate_fast_q31(
   arm_fir_decimate_instance_q31 * S,
   q31_t * pSrc,
@@ -3601,16 +3489,15 @@
 
   /**
    * @brief  Initialization function for the Q31 FIR decimator.
-   * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.
-   * @param[in] numTaps  number of coefficients in the filter.
-   * @param[in] M  decimation factor.
-   * @param[in] *pCoeffs points to the filter coefficients.
-   * @param[in] *pState points to the state buffer.
-   * @param[in] blockSize number of input samples to process per call.
+   * @param[in,out] S          points to an instance of the Q31 FIR decimator structure.
+   * @param[in]     numTaps    number of coefficients in the filter.
+   * @param[in]     M          decimation factor.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of input samples to process per call.
    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * <code>blockSize</code> is not a multiple of <code>M</code>.
    */
-
   arm_status arm_fir_decimate_init_q31(
   arm_fir_decimate_instance_q31 * S,
   uint16_t numTaps,
@@ -3620,11 +3507,9 @@
   uint32_t blockSize);
 
 
-
   /**
    * @brief Instance structure for the Q15 FIR interpolator.
    */
-
   typedef struct
   {
     uint8_t L;                      /**< upsample factor. */
@@ -3636,37 +3521,33 @@
   /**
    * @brief Instance structure for the Q31 FIR interpolator.
    */
-
   typedef struct
   {
     uint8_t L;                      /**< upsample factor. */
     uint16_t phaseLength;           /**< length of each polyphase filter component. */
-    q31_t *pCoeffs;                  /**< points to the coefficient array. The array is of length L*phaseLength. */
-    q31_t *pState;                   /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
+    q31_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
+    q31_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
   } arm_fir_interpolate_instance_q31;
 
   /**
    * @brief Instance structure for the floating-point FIR interpolator.
    */
-
   typedef struct
   {
     uint8_t L;                     /**< upsample factor. */
     uint16_t phaseLength;          /**< length of each polyphase filter component. */
-    float32_t *pCoeffs;             /**< points to the coefficient array. The array is of length L*phaseLength. */
-    float32_t *pState;              /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
+    float32_t *pCoeffs;            /**< points to the coefficient array. The array is of length L*phaseLength. */
+    float32_t *pState;             /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
   } arm_fir_interpolate_instance_f32;
 
 
   /**
    * @brief Processing function for the Q15 FIR interpolator.
-   * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
-   * @param[in] *pSrc     points to the block of input data.
-   * @param[out] *pDst    points to the block of output data.
-   * @param[in] blockSize number of input samples to process per call.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of input samples to process per call.
+   */
   void arm_fir_interpolate_q15(
   const arm_fir_interpolate_instance_q15 * S,
   q15_t * pSrc,
@@ -3676,16 +3557,15 @@
 
   /**
    * @brief  Initialization function for the Q15 FIR interpolator.
-   * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.
-   * @param[in]     L         upsample factor.
-   * @param[in]     numTaps   number of filter coefficients in the filter.
-   * @param[in]     *pCoeffs  points to the filter coefficient buffer.
-   * @param[in]     *pState   points to the state buffer.
-   * @param[in]     blockSize number of input samples to process per call.
+   * @param[in,out] S          points to an instance of the Q15 FIR interpolator structure.
+   * @param[in]     L          upsample factor.
+   * @param[in]     numTaps    number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficient buffer.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of input samples to process per call.
    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
    */
-
   arm_status arm_fir_interpolate_init_q15(
   arm_fir_interpolate_instance_q15 * S,
   uint8_t L,
@@ -3694,33 +3574,32 @@
   q15_t * pState,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the Q31 FIR interpolator.
-   * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
-   * @param[in] *pSrc     points to the block of input data.
-   * @param[out] *pDst    points to the block of output data.
-   * @param[in] blockSize number of input samples to process per call.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the Q15 FIR interpolator structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of input samples to process per call.
+   */
   void arm_fir_interpolate_q31(
   const arm_fir_interpolate_instance_q31 * S,
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the Q31 FIR interpolator.
-   * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.
-   * @param[in]     L         upsample factor.
-   * @param[in]     numTaps   number of filter coefficients in the filter.
-   * @param[in]     *pCoeffs  points to the filter coefficient buffer.
-   * @param[in]     *pState   points to the state buffer.
-   * @param[in]     blockSize number of input samples to process per call.
+   * @param[in,out] S          points to an instance of the Q31 FIR interpolator structure.
+   * @param[in]     L          upsample factor.
+   * @param[in]     numTaps    number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficient buffer.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of input samples to process per call.
    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
    */
-
   arm_status arm_fir_interpolate_init_q31(
   arm_fir_interpolate_instance_q31 * S,
   uint8_t L,
@@ -3732,31 +3611,29 @@
 
   /**
    * @brief Processing function for the floating-point FIR interpolator.
-   * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
-   * @param[in] *pSrc     points to the block of input data.
-   * @param[out] *pDst    points to the block of output data.
-   * @param[in] blockSize number of input samples to process per call.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the floating-point FIR interpolator structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of input samples to process per call.
+   */
   void arm_fir_interpolate_f32(
   const arm_fir_interpolate_instance_f32 * S,
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the floating-point FIR interpolator.
-   * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.
-   * @param[in]     L         upsample factor.
-   * @param[in]     numTaps   number of filter coefficients in the filter.
-   * @param[in]     *pCoeffs  points to the filter coefficient buffer.
-   * @param[in]     *pState   points to the state buffer.
-   * @param[in]     blockSize number of input samples to process per call.
+   * @param[in,out] S          points to an instance of the floating-point FIR interpolator structure.
+   * @param[in]     L          upsample factor.
+   * @param[in]     numTaps    number of filter coefficients in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficient buffer.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     blockSize  number of input samples to process per call.
    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
    */
-
   arm_status arm_fir_interpolate_init_f32(
   arm_fir_interpolate_instance_f32 * S,
   uint8_t L,
@@ -3765,28 +3642,25 @@
   float32_t * pState,
   uint32_t blockSize);
 
+
   /**
    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
    */
-
   typedef struct
   {
     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
-
   } arm_biquad_cas_df1_32x64_ins_q31;
 
 
   /**
-   * @param[in]  *S        points to an instance of the high precision Q31 Biquad cascade filter structure.
-   * @param[in]  *pSrc     points to the block of input data.
-   * @param[out] *pDst     points to the block of output data
-   * @param[in]  blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the high precision Q31 Biquad cascade filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_biquad_cas_df1_32x64_q31(
   const arm_biquad_cas_df1_32x64_ins_q31 * S,
   q31_t * pSrc,
@@ -3795,14 +3669,12 @@
 
 
   /**
-   * @param[in,out] *S           points to an instance of the high precision Q31 Biquad cascade filter structure.
-   * @param[in]     numStages    number of 2nd order stages in the filter.
-   * @param[in]     *pCoeffs     points to the filter coefficients.
-   * @param[in]     *pState      points to the state buffer.
-   * @param[in]     postShift    shift to be applied to the output. Varies according to the coefficients format
-   * @return        none
-   */
-
+   * @param[in,out] S          points to an instance of the high precision Q31 Biquad cascade filter structure.
+   * @param[in]     numStages  number of 2nd order stages in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     postShift  shift to be applied to the output. Varies according to the coefficients format
+   */
   void arm_biquad_cas_df1_32x64_init_q31(
   arm_biquad_cas_df1_32x64_ins_q31 * S,
   uint8_t numStages,
@@ -3811,11 +3683,9 @@
   uint8_t postShift);
 
 
-
   /**
    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
    */
-
   typedef struct
   {
     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
@@ -3823,12 +3693,9 @@
     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
   } arm_biquad_cascade_df2T_instance_f32;
 
-
-
   /**
    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
    */
-
   typedef struct
   {
     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
@@ -3836,12 +3703,9 @@
     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
   } arm_biquad_cascade_stereo_df2T_instance_f32;
 
-
-
   /**
    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
    */
-
   typedef struct
   {
     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
@@ -3852,13 +3716,11 @@
 
   /**
    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
-   * @param[in]  *S        points to an instance of the filter data structure.
-   * @param[in]  *pSrc     points to the block of input data.
-   * @param[out] *pDst     points to the block of output data
-   * @param[in]  blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the filter data structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_biquad_cascade_df2T_f32(
   const arm_biquad_cascade_df2T_instance_f32 * S,
   float32_t * pSrc,
@@ -3868,28 +3730,25 @@
 
   /**
    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
-   * @param[in]  *S        points to an instance of the filter data structure.
-   * @param[in]  *pSrc     points to the block of input data.
-   * @param[out] *pDst     points to the block of output data
-   * @param[in]  blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the filter data structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_biquad_cascade_stereo_df2T_f32(
   const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
-   * @param[in]  *S        points to an instance of the filter data structure.
-   * @param[in]  *pSrc     points to the block of input data.
-   * @param[out] *pDst     points to the block of output data
-   * @param[in]  blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the filter data structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_biquad_cascade_df2T_f64(
   const arm_biquad_cascade_df2T_instance_f64 * S,
   float64_t * pSrc,
@@ -3899,13 +3758,11 @@
 
   /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
-   * @param[in,out] *S           points to an instance of the filter data structure.
-   * @param[in]     numStages    number of 2nd order stages in the filter.
-   * @param[in]     *pCoeffs     points to the filter coefficients.
-   * @param[in]     *pState      points to the state buffer.
-   * @return        none
-   */
-
+   * @param[in,out] S          points to an instance of the filter data structure.
+   * @param[in]     numStages  number of 2nd order stages in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   */
   void arm_biquad_cascade_df2T_init_f32(
   arm_biquad_cascade_df2T_instance_f32 * S,
   uint8_t numStages,
@@ -3915,13 +3772,11 @@
 
   /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
-   * @param[in,out] *S           points to an instance of the filter data structure.
-   * @param[in]     numStages    number of 2nd order stages in the filter.
-   * @param[in]     *pCoeffs     points to the filter coefficients.
-   * @param[in]     *pState      points to the state buffer.
-   * @return        none
-   */
-
+   * @param[in,out] S          points to an instance of the filter data structure.
+   * @param[in]     numStages  number of 2nd order stages in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   */
   void arm_biquad_cascade_stereo_df2T_init_f32(
   arm_biquad_cascade_stereo_df2T_instance_f32 * S,
   uint8_t numStages,
@@ -3931,13 +3786,11 @@
 
   /**
    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
-   * @param[in,out] *S           points to an instance of the filter data structure.
-   * @param[in]     numStages    number of 2nd order stages in the filter.
-   * @param[in]     *pCoeffs     points to the filter coefficients.
-   * @param[in]     *pState      points to the state buffer.
-   * @return        none
-   */
-
+   * @param[in,out] S          points to an instance of the filter data structure.
+   * @param[in]     numStages  number of 2nd order stages in the filter.
+   * @param[in]     pCoeffs    points to the filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   */
   void arm_biquad_cascade_df2T_init_f64(
   arm_biquad_cascade_df2T_instance_f64 * S,
   uint8_t numStages,
@@ -3945,33 +3798,29 @@
   float64_t * pState);
 
 
-
   /**
    * @brief Instance structure for the Q15 FIR lattice filter.
    */
-
   typedef struct
   {
-    uint16_t numStages;                          /**< number of filter stages. */
-    q15_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
-    q15_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
+    uint16_t numStages;                  /**< number of filter stages. */
+    q15_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
+    q15_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
   } arm_fir_lattice_instance_q15;
 
   /**
    * @brief Instance structure for the Q31 FIR lattice filter.
    */
-
   typedef struct
   {
-    uint16_t numStages;                          /**< number of filter stages. */
-    q31_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
-    q31_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
+    uint16_t numStages;                  /**< number of filter stages. */
+    q31_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
+    q31_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
   } arm_fir_lattice_instance_q31;
 
   /**
    * @brief Instance structure for the floating-point FIR lattice filter.
    */
-
   typedef struct
   {
     uint16_t numStages;                  /**< number of filter stages. */
@@ -3979,15 +3828,14 @@
     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
   } arm_fir_lattice_instance_f32;
 
+
   /**
    * @brief Initialization function for the Q15 FIR lattice filter.
-   * @param[in] *S points to an instance of the Q15 FIR lattice structure.
+   * @param[in] S          points to an instance of the Q15 FIR lattice structure.
    * @param[in] numStages  number of filter stages.
-   * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
-   * @param[in] *pState points to the state buffer.  The array is of length numStages.
-   * @return none.
-   */
-
+   * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
+   * @param[in] pState     points to the state buffer.  The array is of length numStages.
+   */
   void arm_fir_lattice_init_q15(
   arm_fir_lattice_instance_q15 * S,
   uint16_t numStages,
@@ -3997,11 +3845,10 @@
 
   /**
    * @brief Processing function for the Q15 FIR lattice filter.
-   * @param[in] *S points to an instance of the Q15 FIR lattice structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
+   * @param[in]  S          points to an instance of the Q15 FIR lattice structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
    */
   void arm_fir_lattice_q15(
   const arm_fir_lattice_instance_q15 * S,
@@ -4009,15 +3856,14 @@
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Initialization function for the Q31 FIR lattice filter.
-   * @param[in] *S points to an instance of the Q31 FIR lattice structure.
+   * @param[in] S          points to an instance of the Q31 FIR lattice structure.
    * @param[in] numStages  number of filter stages.
-   * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
-   * @param[in] *pState points to the state buffer.   The array is of length numStages.
-   * @return none.
-   */
-
+   * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
+   * @param[in] pState     points to the state buffer.   The array is of length numStages.
+   */
   void arm_fir_lattice_init_q31(
   arm_fir_lattice_instance_q31 * S,
   uint16_t numStages,
@@ -4027,58 +3873,55 @@
 
   /**
    * @brief Processing function for the Q31 FIR lattice filter.
-   * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.
-   * @param[in]  *pSrc     points to the block of input data.
-   * @param[out] *pDst     points to the block of output data
-   * @param[in]  blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the Q31 FIR lattice structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_fir_lattice_q31(
   const arm_fir_lattice_instance_q31 * S,
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t blockSize);
 
+
 /**
  * @brief Initialization function for the floating-point FIR lattice filter.
- * @param[in] *S points to an instance of the floating-point FIR lattice structure.
+ * @param[in] S          points to an instance of the floating-point FIR lattice structure.
  * @param[in] numStages  number of filter stages.
- * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
- * @param[in] *pState points to the state buffer.  The array is of length numStages.
- * @return none.
+ * @param[in] pCoeffs    points to the coefficient buffer.  The array is of length numStages.
+ * @param[in] pState     points to the state buffer.  The array is of length numStages.
  */
-
   void arm_fir_lattice_init_f32(
   arm_fir_lattice_instance_f32 * S,
   uint16_t numStages,
   float32_t * pCoeffs,
   float32_t * pState);
 
+
   /**
    * @brief Processing function for the floating-point FIR lattice filter.
-   * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
-   * @param[in]  *pSrc     points to the block of input data.
-   * @param[out] *pDst     points to the block of output data
-   * @param[in]  blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the floating-point FIR lattice structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_fir_lattice_f32(
   const arm_fir_lattice_instance_f32 * S,
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Instance structure for the Q15 IIR lattice filter.
    */
   typedef struct
   {
-    uint16_t numStages;                         /**< number of stages in the filter. */
-    q15_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
-    q15_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
-    q15_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
+    uint16_t numStages;                  /**< number of stages in the filter. */
+    q15_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
+    q15_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
+    q15_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
   } arm_iir_lattice_instance_q15;
 
   /**
@@ -4086,10 +3929,10 @@
    */
   typedef struct
   {
-    uint16_t numStages;                         /**< number of stages in the filter. */
-    q31_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
-    q31_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
-    q31_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
+    uint16_t numStages;                  /**< number of stages in the filter. */
+    q31_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
+    q31_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
+    q31_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
   } arm_iir_lattice_instance_q31;
 
   /**
@@ -4097,38 +3940,36 @@
    */
   typedef struct
   {
-    uint16_t numStages;                         /**< number of stages in the filter. */
-    float32_t *pState;                          /**< points to the state variable array. The array is of length numStages+blockSize. */
-    float32_t *pkCoeffs;                        /**< points to the reflection coefficient array. The array is of length numStages. */
-    float32_t *pvCoeffs;                        /**< points to the ladder coefficient array. The array is of length numStages+1. */
+    uint16_t numStages;                  /**< number of stages in the filter. */
+    float32_t *pState;                   /**< points to the state variable array. The array is of length numStages+blockSize. */
+    float32_t *pkCoeffs;                 /**< points to the reflection coefficient array. The array is of length numStages. */
+    float32_t *pvCoeffs;                 /**< points to the ladder coefficient array. The array is of length numStages+1. */
   } arm_iir_lattice_instance_f32;
 
+
   /**
    * @brief Processing function for the floating-point IIR lattice filter.
-   * @param[in] *S points to an instance of the floating-point IIR lattice structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the floating-point IIR lattice structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_iir_lattice_f32(
   const arm_iir_lattice_instance_f32 * S,
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Initialization function for the floating-point IIR lattice filter.
-   * @param[in] *S points to an instance of the floating-point IIR lattice structure.
-   * @param[in] numStages number of stages in the filter.
-   * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
-   * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
-   * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize-1.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in] S          points to an instance of the floating-point IIR lattice structure.
+   * @param[in] numStages  number of stages in the filter.
+   * @param[in] pkCoeffs   points to the reflection coefficient buffer.  The array is of length numStages.
+   * @param[in] pvCoeffs   points to the ladder coefficient buffer.  The array is of length numStages+1.
+   * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize-1.
+   * @param[in] blockSize  number of samples to process.
+   */
   void arm_iir_lattice_init_f32(
   arm_iir_lattice_instance_f32 * S,
   uint16_t numStages,
@@ -4140,13 +3981,11 @@
 
   /**
    * @brief Processing function for the Q31 IIR lattice filter.
-   * @param[in] *S points to an instance of the Q31 IIR lattice structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the Q31 IIR lattice structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_iir_lattice_q31(
   const arm_iir_lattice_instance_q31 * S,
   q31_t * pSrc,
@@ -4156,15 +3995,13 @@
 
   /**
    * @brief Initialization function for the Q31 IIR lattice filter.
-   * @param[in] *S points to an instance of the Q31 IIR lattice structure.
-   * @param[in] numStages number of stages in the filter.
-   * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
-   * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
-   * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in] S          points to an instance of the Q31 IIR lattice structure.
+   * @param[in] numStages  number of stages in the filter.
+   * @param[in] pkCoeffs   points to the reflection coefficient buffer.  The array is of length numStages.
+   * @param[in] pvCoeffs   points to the ladder coefficient buffer.  The array is of length numStages+1.
+   * @param[in] pState     points to the state buffer.  The array is of length numStages+blockSize.
+   * @param[in] blockSize  number of samples to process.
+   */
   void arm_iir_lattice_init_q31(
   arm_iir_lattice_instance_q31 * S,
   uint16_t numStages,
@@ -4176,13 +4013,11 @@
 
   /**
    * @brief Processing function for the Q15 IIR lattice filter.
-   * @param[in] *S points to an instance of the Q15 IIR lattice structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[out] *pDst points to the block of output data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the Q15 IIR lattice structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[out] pDst       points to the block of output data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_iir_lattice_q15(
   const arm_iir_lattice_instance_q15 * S,
   q15_t * pSrc,
@@ -4192,15 +4027,13 @@
 
 /**
  * @brief Initialization function for the Q15 IIR lattice filter.
- * @param[in] *S points to an instance of the fixed-point Q15 IIR lattice structure.
+ * @param[in] S          points to an instance of the fixed-point Q15 IIR lattice structure.
  * @param[in] numStages  number of stages in the filter.
- * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.
- * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.
- * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.
- * @param[in] blockSize number of samples to process per call.
- * @return none.
+ * @param[in] pkCoeffs   points to reflection coefficient buffer.  The array is of length numStages.
+ * @param[in] pvCoeffs   points to ladder coefficient buffer.  The array is of length numStages+1.
+ * @param[in] pState     points to state buffer.  The array is of length numStages+blockSize.
+ * @param[in] blockSize  number of samples to process per call.
  */
-
   void arm_iir_lattice_init_q15(
   arm_iir_lattice_instance_q15 * S,
   uint16_t numStages,
@@ -4209,10 +4042,10 @@
   q15_t * pState,
   uint32_t blockSize);
 
+
   /**
    * @brief Instance structure for the floating-point LMS filter.
    */
-
   typedef struct
   {
     uint16_t numTaps;    /**< number of coefficients in the filter. */
@@ -4221,17 +4054,16 @@
     float32_t mu;        /**< step size that controls filter coefficient updates. */
   } arm_lms_instance_f32;
 
+
   /**
    * @brief Processing function for floating-point LMS filter.
-   * @param[in]  *S points to an instance of the floating-point LMS filter structure.
-   * @param[in]  *pSrc points to the block of input data.
-   * @param[in]  *pRef points to the block of reference data.
-   * @param[out] *pOut points to the block of output data.
-   * @param[out] *pErr points to the block of error data.
-   * @param[in]  blockSize number of samples to process.
-   * @return     none.
-   */
-
+   * @param[in]  S          points to an instance of the floating-point LMS filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[in]  pRef       points to the block of reference data.
+   * @param[out] pOut       points to the block of output data.
+   * @param[out] pErr       points to the block of error data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_lms_f32(
   const arm_lms_instance_f32 * S,
   float32_t * pSrc,
@@ -4240,17 +4072,16 @@
   float32_t * pErr,
   uint32_t blockSize);
 
+
   /**
    * @brief Initialization function for floating-point LMS filter.
-   * @param[in] *S points to an instance of the floating-point LMS filter structure.
-   * @param[in] numTaps  number of filter coefficients.
-   * @param[in] *pCoeffs points to the coefficient buffer.
-   * @param[in] *pState points to state buffer.
-   * @param[in] mu step size that controls filter coefficient updates.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in] S          points to an instance of the floating-point LMS filter structure.
+   * @param[in] numTaps    number of filter coefficients.
+   * @param[in] pCoeffs    points to the coefficient buffer.
+   * @param[in] pState     points to state buffer.
+   * @param[in] mu         step size that controls filter coefficient updates.
+   * @param[in] blockSize  number of samples to process.
+   */
   void arm_lms_init_f32(
   arm_lms_instance_f32 * S,
   uint16_t numTaps,
@@ -4259,10 +4090,10 @@
   float32_t mu,
   uint32_t blockSize);
 
+
   /**
    * @brief Instance structure for the Q15 LMS filter.
    */
-
   typedef struct
   {
     uint16_t numTaps;    /**< number of coefficients in the filter. */
@@ -4275,16 +4106,14 @@
 
   /**
    * @brief Initialization function for the Q15 LMS filter.
-   * @param[in] *S points to an instance of the Q15 LMS filter structure.
-   * @param[in] numTaps  number of filter coefficients.
-   * @param[in] *pCoeffs points to the coefficient buffer.
-   * @param[in] *pState points to the state buffer.
-   * @param[in] mu step size that controls filter coefficient updates.
-   * @param[in] blockSize number of samples to process.
-   * @param[in] postShift bit shift applied to coefficients.
-   * @return    none.
-   */
-
+   * @param[in] S          points to an instance of the Q15 LMS filter structure.
+   * @param[in] numTaps    number of filter coefficients.
+   * @param[in] pCoeffs    points to the coefficient buffer.
+   * @param[in] pState     points to the state buffer.
+   * @param[in] mu         step size that controls filter coefficient updates.
+   * @param[in] blockSize  number of samples to process.
+   * @param[in] postShift  bit shift applied to coefficients.
+   */
   void arm_lms_init_q15(
   arm_lms_instance_q15 * S,
   uint16_t numTaps,
@@ -4294,17 +4123,16 @@
   uint32_t blockSize,
   uint32_t postShift);
 
+
   /**
    * @brief Processing function for Q15 LMS filter.
-   * @param[in] *S points to an instance of the Q15 LMS filter structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[in] *pRef points to the block of reference data.
-   * @param[out] *pOut points to the block of output data.
-   * @param[out] *pErr points to the block of error data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the Q15 LMS filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[in]  pRef       points to the block of reference data.
+   * @param[out] pOut       points to the block of output data.
+   * @param[out] pErr       points to the block of error data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_lms_q15(
   const arm_lms_instance_q15 * S,
   q15_t * pSrc,
@@ -4317,7 +4145,6 @@
   /**
    * @brief Instance structure for the Q31 LMS filter.
    */
-
   typedef struct
   {
     uint16_t numTaps;    /**< number of coefficients in the filter. */
@@ -4325,20 +4152,18 @@
     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
     q31_t mu;            /**< step size that controls filter coefficient updates. */
     uint32_t postShift;  /**< bit shift applied to coefficients. */
-
   } arm_lms_instance_q31;
 
+
   /**
    * @brief Processing function for Q31 LMS filter.
-   * @param[in]  *S points to an instance of the Q15 LMS filter structure.
-   * @param[in]  *pSrc points to the block of input data.
-   * @param[in]  *pRef points to the block of reference data.
-   * @param[out] *pOut points to the block of output data.
-   * @param[out] *pErr points to the block of error data.
-   * @param[in]  blockSize number of samples to process.
-   * @return     none.
-   */
-
+   * @param[in]  S          points to an instance of the Q15 LMS filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[in]  pRef       points to the block of reference data.
+   * @param[out] pOut       points to the block of output data.
+   * @param[out] pErr       points to the block of error data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_lms_q31(
   const arm_lms_instance_q31 * S,
   q31_t * pSrc,
@@ -4347,18 +4172,17 @@
   q31_t * pErr,
   uint32_t blockSize);
 
+
   /**
    * @brief Initialization function for Q31 LMS filter.
-   * @param[in] *S points to an instance of the Q31 LMS filter structure.
-   * @param[in] numTaps  number of filter coefficients.
-   * @param[in] *pCoeffs points to coefficient buffer.
-   * @param[in] *pState points to state buffer.
-   * @param[in] mu step size that controls filter coefficient updates.
-   * @param[in] blockSize number of samples to process.
-   * @param[in] postShift bit shift applied to coefficients.
-   * @return none.
-   */
-
+   * @param[in] S          points to an instance of the Q31 LMS filter structure.
+   * @param[in] numTaps    number of filter coefficients.
+   * @param[in] pCoeffs    points to coefficient buffer.
+   * @param[in] pState     points to state buffer.
+   * @param[in] mu         step size that controls filter coefficient updates.
+   * @param[in] blockSize  number of samples to process.
+   * @param[in] postShift  bit shift applied to coefficients.
+   */
   void arm_lms_init_q31(
   arm_lms_instance_q31 * S,
   uint16_t numTaps,
@@ -4368,31 +4192,30 @@
   uint32_t blockSize,
   uint32_t postShift);
 
+
   /**
    * @brief Instance structure for the floating-point normalized LMS filter.
    */
-
   typedef struct
   {
     uint16_t numTaps;     /**< number of coefficients in the filter. */
     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
-    float32_t mu;        /**< step size that control filter coefficient updates. */
-    float32_t energy;    /**< saves previous frame energy. */
-    float32_t x0;        /**< saves previous input sample. */
+    float32_t mu;         /**< step size that control filter coefficient updates. */
+    float32_t energy;     /**< saves previous frame energy. */
+    float32_t x0;         /**< saves previous input sample. */
   } arm_lms_norm_instance_f32;
 
+
   /**
    * @brief Processing function for floating-point normalized LMS filter.
-   * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[in] *pRef points to the block of reference data.
-   * @param[out] *pOut points to the block of output data.
-   * @param[out] *pErr points to the block of error data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the floating-point normalized LMS filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[in]  pRef       points to the block of reference data.
+   * @param[out] pOut       points to the block of output data.
+   * @param[out] pErr       points to the block of error data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_lms_norm_f32(
   arm_lms_norm_instance_f32 * S,
   float32_t * pSrc,
@@ -4401,17 +4224,16 @@
   float32_t * pErr,
   uint32_t blockSize);
 
+
   /**
    * @brief Initialization function for floating-point normalized LMS filter.
-   * @param[in] *S points to an instance of the floating-point LMS filter structure.
-   * @param[in] numTaps  number of filter coefficients.
-   * @param[in] *pCoeffs points to coefficient buffer.
-   * @param[in] *pState points to state buffer.
-   * @param[in] mu step size that controls filter coefficient updates.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in] S          points to an instance of the floating-point LMS filter structure.
+   * @param[in] numTaps    number of filter coefficients.
+   * @param[in] pCoeffs    points to coefficient buffer.
+   * @param[in] pState     points to state buffer.
+   * @param[in] mu         step size that controls filter coefficient updates.
+   * @param[in] blockSize  number of samples to process.
+   */
   void arm_lms_norm_init_f32(
   arm_lms_norm_instance_f32 * S,
   uint16_t numTaps,
@@ -4436,17 +4258,16 @@
     q31_t x0;             /**< saves previous input sample. */
   } arm_lms_norm_instance_q31;
 
+
   /**
    * @brief Processing function for Q31 normalized LMS filter.
-   * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[in] *pRef points to the block of reference data.
-   * @param[out] *pOut points to the block of output data.
-   * @param[out] *pErr points to the block of error data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the Q31 normalized LMS filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[in]  pRef       points to the block of reference data.
+   * @param[out] pOut       points to the block of output data.
+   * @param[out] pErr       points to the block of error data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_lms_norm_q31(
   arm_lms_norm_instance_q31 * S,
   q31_t * pSrc,
@@ -4455,18 +4276,17 @@
   q31_t * pErr,
   uint32_t blockSize);
 
+
   /**
    * @brief Initialization function for Q31 normalized LMS filter.
-   * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
-   * @param[in] numTaps  number of filter coefficients.
-   * @param[in] *pCoeffs points to coefficient buffer.
-   * @param[in] *pState points to state buffer.
-   * @param[in] mu step size that controls filter coefficient updates.
-   * @param[in] blockSize number of samples to process.
-   * @param[in] postShift bit shift applied to coefficients.
-   * @return none.
-   */
-
+   * @param[in] S          points to an instance of the Q31 normalized LMS filter structure.
+   * @param[in] numTaps    number of filter coefficients.
+   * @param[in] pCoeffs    points to coefficient buffer.
+   * @param[in] pState     points to state buffer.
+   * @param[in] mu         step size that controls filter coefficient updates.
+   * @param[in] blockSize  number of samples to process.
+   * @param[in] postShift  bit shift applied to coefficients.
+   */
   void arm_lms_norm_init_q31(
   arm_lms_norm_instance_q31 * S,
   uint16_t numTaps,
@@ -4476,33 +4296,32 @@
   uint32_t blockSize,
   uint8_t postShift);
 
+
   /**
    * @brief Instance structure for the Q15 normalized LMS filter.
    */
-
   typedef struct
   {
-    uint16_t numTaps;    /**< Number of coefficients in the filter. */
+    uint16_t numTaps;     /**< Number of coefficients in the filter. */
     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
-    q15_t mu;            /**< step size that controls filter coefficient updates. */
-    uint8_t postShift;   /**< bit shift applied to coefficients. */
-    q15_t *recipTable;   /**< Points to the reciprocal initial value table. */
-    q15_t energy;        /**< saves previous frame energy. */
-    q15_t x0;            /**< saves previous input sample. */
+    q15_t mu;             /**< step size that controls filter coefficient updates. */
+    uint8_t postShift;    /**< bit shift applied to coefficients. */
+    q15_t *recipTable;    /**< Points to the reciprocal initial value table. */
+    q15_t energy;         /**< saves previous frame energy. */
+    q15_t x0;             /**< saves previous input sample. */
   } arm_lms_norm_instance_q15;
 
+
   /**
    * @brief Processing function for Q15 normalized LMS filter.
-   * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
-   * @param[in] *pSrc points to the block of input data.
-   * @param[in] *pRef points to the block of reference data.
-   * @param[out] *pOut points to the block of output data.
-   * @param[out] *pErr points to the block of error data.
-   * @param[in] blockSize number of samples to process.
-   * @return none.
-   */
-
+   * @param[in]  S          points to an instance of the Q15 normalized LMS filter structure.
+   * @param[in]  pSrc       points to the block of input data.
+   * @param[in]  pRef       points to the block of reference data.
+   * @param[out] pOut       points to the block of output data.
+   * @param[out] pErr       points to the block of error data.
+   * @param[in]  blockSize  number of samples to process.
+   */
   void arm_lms_norm_q15(
   arm_lms_norm_instance_q15 * S,
   q15_t * pSrc,
@@ -4514,16 +4333,14 @@
 
   /**
    * @brief Initialization function for Q15 normalized LMS filter.
-   * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
-   * @param[in] numTaps  number of filter coefficients.
-   * @param[in] *pCoeffs points to coefficient buffer.
-   * @param[in] *pState points to state buffer.
-   * @param[in] mu step size that controls filter coefficient updates.
-   * @param[in] blockSize number of samples to process.
-   * @param[in] postShift bit shift applied to coefficients.
-   * @return none.
-   */
-
+   * @param[in] S          points to an instance of the Q15 normalized LMS filter structure.
+   * @param[in] numTaps    number of filter coefficients.
+   * @param[in] pCoeffs    points to coefficient buffer.
+   * @param[in] pState     points to state buffer.
+   * @param[in] mu         step size that controls filter coefficient updates.
+   * @param[in] blockSize  number of samples to process.
+   * @param[in] postShift  bit shift applied to coefficients.
+   */
   void arm_lms_norm_init_q15(
   arm_lms_norm_instance_q15 * S,
   uint16_t numTaps,
@@ -4533,16 +4350,15 @@
   uint32_t blockSize,
   uint8_t postShift);
 
+
   /**
    * @brief Correlation of floating-point sequences.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @return none.
-   */
-
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   */
   void arm_correlate_f32(
   float32_t * pSrcA,
   uint32_t srcALen,
@@ -4553,13 +4369,12 @@
 
    /**
    * @brief Correlation of Q15 sequences
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   * @return none.
+   * @param[in]  pSrcA     points to the first input sequence.
+   * @param[in]  srcALen   length of the first input sequence.
+   * @param[in]  pSrcB     points to the second input sequence.
+   * @param[in]  srcBLen   length of the second input sequence.
+   * @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   * @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
    */
   void arm_correlate_opt_q15(
   q15_t * pSrcA,
@@ -4572,12 +4387,11 @@
 
   /**
    * @brief Correlation of Q15 sequences.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @return none.
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
 
   void arm_correlate_q15(
@@ -4587,36 +4401,33 @@
   uint32_t srcBLen,
   q15_t * pDst);
 
+
   /**
    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @return none.
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
    */
 
   void arm_correlate_fast_q15(
-			       q15_t * pSrcA,
-			      uint32_t srcALen,
-			       q15_t * pSrcB,
-			      uint32_t srcBLen,
-			      q15_t * pDst);
-
+  q15_t * pSrcA,
+  uint32_t srcALen,
+  q15_t * pSrcB,
+  uint32_t srcBLen,
+  q15_t * pDst);
 
 
   /**
    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   * @return none.
-   */
-
+   * @param[in]  pSrcA     points to the first input sequence.
+   * @param[in]  srcALen   length of the first input sequence.
+   * @param[in]  pSrcB     points to the second input sequence.
+   * @param[in]  srcBLen   length of the second input sequence.
+   * @param[out] pDst      points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   * @param[in]  pScratch  points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+   */
   void arm_correlate_fast_opt_q15(
   q15_t * pSrcA,
   uint32_t srcALen,
@@ -4625,16 +4436,15 @@
   q15_t * pDst,
   q15_t * pScratch);
 
+
   /**
    * @brief Correlation of Q31 sequences.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @return none.
-   */
-
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   */
   void arm_correlate_q31(
   q31_t * pSrcA,
   uint32_t srcALen,
@@ -4642,16 +4452,15 @@
   uint32_t srcBLen,
   q31_t * pDst);
 
+
   /**
    * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @return none.
-   */
-
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   */
   void arm_correlate_fast_q31(
   q31_t * pSrcA,
   uint32_t srcALen,
@@ -4660,19 +4469,16 @@
   q31_t * pDst);
 
 
-
  /**
    * @brief Correlation of Q7 sequences.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
-   * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
-   * @return none.
-   */
-
+   * @param[in]  pSrcA      points to the first input sequence.
+   * @param[in]  srcALen    length of the first input sequence.
+   * @param[in]  pSrcB      points to the second input sequence.
+   * @param[in]  srcBLen    length of the second input sequence.
+   * @param[out] pDst       points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   * @param[in]  pScratch1  points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
+   * @param[in]  pScratch2  points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
+   */
   void arm_correlate_opt_q7(
   q7_t * pSrcA,
   uint32_t srcALen,
@@ -4685,14 +4491,12 @@
 
   /**
    * @brief Correlation of Q7 sequences.
-   * @param[in] *pSrcA points to the first input sequence.
-   * @param[in] srcALen length of the first input sequence.
-   * @param[in] *pSrcB points to the second input sequence.
-   * @param[in] srcBLen length of the second input sequence.
-   * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
-   * @return none.
-   */
-
+   * @param[in]  pSrcA    points to the first input sequence.
+   * @param[in]  srcALen  length of the first input sequence.
+   * @param[in]  pSrcB    points to the second input sequence.
+   * @param[in]  srcBLen  length of the second input sequence.
+   * @param[out] pDst     points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
+   */
   void arm_correlate_q7(
   q7_t * pSrcA,
   uint32_t srcALen,
@@ -4717,7 +4521,6 @@
   /**
    * @brief Instance structure for the Q31 sparse FIR filter.
    */
-
   typedef struct
   {
     uint16_t numTaps;             /**< number of coefficients in the filter. */
@@ -4731,7 +4534,6 @@
   /**
    * @brief Instance structure for the Q15 sparse FIR filter.
    */
-
   typedef struct
   {
     uint16_t numTaps;             /**< number of coefficients in the filter. */
@@ -4745,7 +4547,6 @@
   /**
    * @brief Instance structure for the Q7 sparse FIR filter.
    */
-
   typedef struct
   {
     uint16_t numTaps;             /**< number of coefficients in the filter. */
@@ -4756,16 +4557,15 @@
     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
   } arm_fir_sparse_instance_q7;
 
+
   /**
    * @brief Processing function for the floating-point sparse FIR filter.
-   * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
-   * @param[in]  *pSrc       points to the block of input data.
-   * @param[out] *pDst       points to the block of output data
-   * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
+   * @param[in]  S           points to an instance of the floating-point sparse FIR structure.
+   * @param[in]  pSrc        points to the block of input data.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
    * @param[in]  blockSize   number of input samples to process per call.
-   * @return none.
-   */
-
+   */
   void arm_fir_sparse_f32(
   arm_fir_sparse_instance_f32 * S,
   float32_t * pSrc,
@@ -4773,18 +4573,17 @@
   float32_t * pScratchIn,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the floating-point sparse FIR filter.
-   * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure.
+   * @param[in,out] S          points to an instance of the floating-point sparse FIR structure.
    * @param[in]     numTaps    number of nonzero coefficients in the filter.
-   * @param[in]     *pCoeffs   points to the array of filter coefficients.
-   * @param[in]     *pState    points to the state buffer.
-   * @param[in]     *pTapDelay points to the array of offset times.
+   * @param[in]     pCoeffs    points to the array of filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     pTapDelay  points to the array of offset times.
    * @param[in]     maxDelay   maximum offset time supported.
    * @param[in]     blockSize  number of samples that will be processed per block.
-   * @return none
-   */
-
+   */
   void arm_fir_sparse_init_f32(
   arm_fir_sparse_instance_f32 * S,
   uint16_t numTaps,
@@ -4794,16 +4593,15 @@
   uint16_t maxDelay,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the Q31 sparse FIR filter.
-   * @param[in]  *S          points to an instance of the Q31 sparse FIR structure.
-   * @param[in]  *pSrc       points to the block of input data.
-   * @param[out] *pDst       points to the block of output data
-   * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
+   * @param[in]  S           points to an instance of the Q31 sparse FIR structure.
+   * @param[in]  pSrc        points to the block of input data.
+   * @param[out] pDst        points to the block of output data
+   * @param[in]  pScratchIn  points to a temporary buffer of size blockSize.
    * @param[in]  blockSize   number of input samples to process per call.
-   * @return none.
-   */
-
+   */
   void arm_fir_sparse_q31(
   arm_fir_sparse_instance_q31 * S,
   q31_t * pSrc,
@@ -4811,18 +4609,17 @@
   q31_t * pScratchIn,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the Q31 sparse FIR filter.
-   * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure.
+   * @param[in,out] S          points to an instance of the Q31 sparse FIR structure.
    * @param[in]     numTaps    number of nonzero coefficients in the filter.
-   * @param[in]     *pCoeffs   points to the array of filter coefficients.
-   * @param[in]     *pState    points to the state buffer.
-   * @param[in]     *pTapDelay points to the array of offset times.
+   * @param[in]     pCoeffs    points to the array of filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     pTapDelay  points to the array of offset times.
    * @param[in]     maxDelay   maximum offset time supported.
    * @param[in]     blockSize  number of samples that will be processed per block.
-   * @return none
-   */
-
+   */
   void arm_fir_sparse_init_q31(
   arm_fir_sparse_instance_q31 * S,
   uint16_t numTaps,
@@ -4832,17 +4629,16 @@
   uint16_t maxDelay,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the Q15 sparse FIR filter.
-   * @param[in]  *S           points to an instance of the Q15 sparse FIR structure.
-   * @param[in]  *pSrc        points to the block of input data.
-   * @param[out] *pDst        points to the block of output data
-   * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
-   * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
+   * @param[in]  S            points to an instance of the Q15 sparse FIR structure.
+   * @param[in]  pSrc         points to the block of input data.
+   * @param[out] pDst         points to the block of output data
+   * @param[in]  pScratchIn   points to a temporary buffer of size blockSize.
+   * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
    * @param[in]  blockSize    number of input samples to process per call.
-   * @return none.
-   */
-
+   */
   void arm_fir_sparse_q15(
   arm_fir_sparse_instance_q15 * S,
   q15_t * pSrc,
@@ -4854,16 +4650,14 @@
 
   /**
    * @brief  Initialization function for the Q15 sparse FIR filter.
-   * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure.
+   * @param[in,out] S          points to an instance of the Q15 sparse FIR structure.
    * @param[in]     numTaps    number of nonzero coefficients in the filter.
-   * @param[in]     *pCoeffs   points to the array of filter coefficients.
-   * @param[in]     *pState    points to the state buffer.
-   * @param[in]     *pTapDelay points to the array of offset times.
+   * @param[in]     pCoeffs    points to the array of filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     pTapDelay  points to the array of offset times.
    * @param[in]     maxDelay   maximum offset time supported.
    * @param[in]     blockSize  number of samples that will be processed per block.
-   * @return none
-   */
-
+   */
   void arm_fir_sparse_init_q15(
   arm_fir_sparse_instance_q15 * S,
   uint16_t numTaps,
@@ -4873,17 +4667,16 @@
   uint16_t maxDelay,
   uint32_t blockSize);
 
+
   /**
    * @brief Processing function for the Q7 sparse FIR filter.
-   * @param[in]  *S           points to an instance of the Q7 sparse FIR structure.
-   * @param[in]  *pSrc        points to the block of input data.
-   * @param[out] *pDst        points to the block of output data
-   * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
-   * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
+   * @param[in]  S            points to an instance of the Q7 sparse FIR structure.
+   * @param[in]  pSrc         points to the block of input data.
+   * @param[out] pDst         points to the block of output data
+   * @param[in]  pScratchIn   points to a temporary buffer of size blockSize.
+   * @param[in]  pScratchOut  points to a temporary buffer of size blockSize.
    * @param[in]  blockSize    number of input samples to process per call.
-   * @return none.
-   */
-
+   */
   void arm_fir_sparse_q7(
   arm_fir_sparse_instance_q7 * S,
   q7_t * pSrc,
@@ -4892,18 +4685,17 @@
   q31_t * pScratchOut,
   uint32_t blockSize);
 
+
   /**
    * @brief  Initialization function for the Q7 sparse FIR filter.
-   * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure.
+   * @param[in,out] S          points to an instance of the Q7 sparse FIR structure.
    * @param[in]     numTaps    number of nonzero coefficients in the filter.
-   * @param[in]     *pCoeffs   points to the array of filter coefficients.
-   * @param[in]     *pState    points to the state buffer.
-   * @param[in]     *pTapDelay points to the array of offset times.
+   * @param[in]     pCoeffs    points to the array of filter coefficients.
+   * @param[in]     pState     points to the state buffer.
+   * @param[in]     pTapDelay  points to the array of offset times.
    * @param[in]     maxDelay   maximum offset time supported.
    * @param[in]     blockSize  number of samples that will be processed per block.
-   * @return none
-   */
-
+   */
   void arm_fir_sparse_init_q7(
   arm_fir_sparse_instance_q7 * S,
   uint16_t numTaps,
@@ -4914,27 +4706,24 @@
   uint32_t blockSize);
 
 
-  /*
+  /**
    * @brief  Floating-point sin_cos function.
-   * @param[in]  theta    input value in degrees
-   * @param[out] *pSinVal points to the processed sine output.
-   * @param[out] *pCosVal points to the processed cos output.
-   * @return none.
-   */
-
+   * @param[in]  theta   input value in degrees
+   * @param[out] pSinVal  points to the processed sine output.
+   * @param[out] pCosVal  points to the processed cos output.
+   */
   void arm_sin_cos_f32(
   float32_t theta,
   float32_t * pSinVal,
-  float32_t * pCcosVal);
-
-  /*
+  float32_t * pCosVal);
+
+
+  /**
    * @brief  Q31 sin_cos function.
    * @param[in]  theta    scaled input value in degrees
-   * @param[out] *pSinVal points to the processed sine output.
-   * @param[out] *pCosVal points to the processed cosine output.
-   * @return none.
-   */
-
+   * @param[out] pSinVal  points to the processed sine output.
+   * @param[out] pCosVal  points to the processed cosine output.
+   */
   void arm_sin_cos_q31(
   q31_t theta,
   q31_t * pSinVal,
@@ -4943,12 +4732,10 @@
 
   /**
    * @brief  Floating-point complex conjugate.
-   * @param[in]  *pSrc points to the input vector
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  numSamples number of complex samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
   void arm_cmplx_conj_f32(
   float32_t * pSrc,
   float32_t * pDst,
@@ -4956,66 +4743,58 @@
 
   /**
    * @brief  Q31 complex conjugate.
-   * @param[in]  *pSrc points to the input vector
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  numSamples number of complex samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
   void arm_cmplx_conj_q31(
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Q15 complex conjugate.
-   * @param[in]  *pSrc points to the input vector
-   * @param[out]  *pDst points to the output vector
-   * @param[in]  numSamples number of complex samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
   void arm_cmplx_conj_q15(
   q15_t * pSrc,
   q15_t * pDst,
   uint32_t numSamples);
 
 
-
   /**
    * @brief  Floating-point complex magnitude squared
-   * @param[in]  *pSrc points to the complex input vector
-   * @param[out]  *pDst points to the real output vector
-   * @param[in]  numSamples number of complex samples in the input vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
   void arm_cmplx_mag_squared_f32(
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Q31 complex magnitude squared
-   * @param[in]  *pSrc points to the complex input vector
-   * @param[out]  *pDst points to the real output vector
-   * @param[in]  numSamples number of complex samples in the input vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
   void arm_cmplx_mag_squared_q31(
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Q15 complex magnitude squared
-   * @param[in]  *pSrc points to the complex input vector
-   * @param[out]  *pDst points to the real output vector
-   * @param[in]  numSamples number of complex samples in the input vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
   void arm_cmplx_mag_squared_q15(
   q15_t * pSrc,
   q15_t * pDst,
@@ -5090,13 +4869,11 @@
 
   /**
    * @brief  Process function for the floating-point PID Control.
-   * @param[in,out] *S is an instance of the floating-point PID Control structure
-   * @param[in] in input sample to process
+   * @param[in,out] S   is an instance of the floating-point PID Control structure
+   * @param[in]     in  input sample to process
    * @return out processed output sample.
    */
-
-
-  static __INLINE float32_t arm_pid_f32(
+  CMSIS_INLINE __STATIC_INLINE float32_t arm_pid_f32(
   arm_pid_instance_f32 * S,
   float32_t in)
   {
@@ -5118,8 +4895,8 @@
 
   /**
    * @brief  Process function for the Q31 PID Control.
-   * @param[in,out] *S points to an instance of the Q31 PID Control structure
-   * @param[in] in input sample to process
+   * @param[in,out] S  points to an instance of the Q31 PID Control structure
+   * @param[in]     in  input sample to process
    * @return out processed output sample.
    *
    * <b>Scaling and Overflow Behavior:</b>
@@ -5130,8 +4907,7 @@
    * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
    * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
    */
-
-  static __INLINE q31_t arm_pid_q31(
+  CMSIS_INLINE __STATIC_INLINE q31_t arm_pid_q31(
   arm_pid_instance_q31 * S,
   q31_t in)
   {
@@ -5160,13 +4936,13 @@
 
     /* return to application */
     return (out);
-
   }
 
+
   /**
    * @brief  Process function for the Q15 PID Control.
-   * @param[in,out] *S points to an instance of the Q15 PID Control structure
-   * @param[in] in input sample to process
+   * @param[in,out] S   points to an instance of the Q15 PID Control structure
+   * @param[in]     in  input sample to process
    * @return out processed output sample.
    *
    * <b>Scaling and Overflow Behavior:</b>
@@ -5178,26 +4954,24 @@
    * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
    * Lastly, the accumulator is saturated to yield a result in 1.15 format.
    */
-
-  static __INLINE q15_t arm_pid_q15(
+  CMSIS_INLINE __STATIC_INLINE q15_t arm_pid_q15(
   arm_pid_instance_q15 * S,
   q15_t in)
   {
     q63_t acc;
     q15_t out;
 
-#ifndef ARM_MATH_CM0_FAMILY
+#if defined (ARM_MATH_DSP)
     __SIMD32_TYPE *vstate;
 
     /* Implementation of PID controller */
 
     /* acc = A0 * x[n]  */
-    acc = (q31_t) __SMUAD(S->A0, in);
+    acc = (q31_t) __SMUAD((uint32_t)S->A0, (uint32_t)in);
 
     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
     vstate = __SIMD32_CONST(S->state);
-    acc = __SMLALD(S->A1, (q31_t) *vstate, acc);
-
+    acc = (q63_t)__SMLALD((uint32_t)S->A1, (uint32_t)*vstate, (uint64_t)acc);
 #else
     /* acc = A0 * x[n]  */
     acc = ((q31_t) S->A0) * in;
@@ -5205,7 +4979,6 @@
     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
     acc += (q31_t) S->A1 * S->state[0];
     acc += (q31_t) S->A2 * S->state[1];
-
 #endif
 
     /* acc += y[n-1] */
@@ -5221,7 +4994,6 @@
 
     /* return to application */
     return (out);
-
   }
 
   /**
@@ -5231,12 +5003,11 @@
 
   /**
    * @brief Floating-point matrix inverse.
-   * @param[in]  *src points to the instance of the input floating-point matrix structure.
-   * @param[out] *dst points to the instance of the output floating-point matrix structure.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] dst   points to the instance of the output floating-point matrix structure.
    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
    */
-
   arm_status arm_mat_inverse_f32(
   const arm_matrix_instance_f32 * src,
   arm_matrix_instance_f32 * dst);
@@ -5244,12 +5015,11 @@
 
   /**
    * @brief Floating-point matrix inverse.
-   * @param[in]  *src points to the instance of the input floating-point matrix structure.
-   * @param[out] *dst points to the instance of the output floating-point matrix structure.
+   * @param[in]  src   points to the instance of the input floating-point matrix structure.
+   * @param[out] dst   points to the instance of the output floating-point matrix structure.
    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
    */
-
   arm_status arm_mat_inverse_f64(
   const arm_matrix_instance_f64 * src,
   arm_matrix_instance_f64 * dst);
@@ -5260,7 +5030,6 @@
    * @ingroup groupController
    */
 
-
   /**
    * @defgroup clarke Vector Clarke Transform
    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
@@ -5291,14 +5060,12 @@
   /**
    *
    * @brief  Floating-point Clarke transform
-   * @param[in]       Ia       input three-phase coordinate <code>a</code>
-   * @param[in]       Ib       input three-phase coordinate <code>b</code>
-   * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
-   * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
-   * @return none.
-   */
-
-  static __INLINE void arm_clarke_f32(
+   * @param[in]  Ia       input three-phase coordinate <code>a</code>
+   * @param[in]  Ib       input three-phase coordinate <code>b</code>
+   * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+   * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+   */
+  CMSIS_INLINE __STATIC_INLINE void arm_clarke_f32(
   float32_t Ia,
   float32_t Ib,
   float32_t * pIalpha,
@@ -5308,18 +5075,16 @@
     *pIalpha = Ia;
 
     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
-    *pIbeta =
-      ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
-
+    *pIbeta = ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
   }
 
+
   /**
    * @brief  Clarke transform for Q31 version
-   * @param[in]       Ia       input three-phase coordinate <code>a</code>
-   * @param[in]       Ib       input three-phase coordinate <code>b</code>
-   * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
-   * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
-   * @return none.
+   * @param[in]  Ia       input three-phase coordinate <code>a</code>
+   * @param[in]  Ib       input three-phase coordinate <code>b</code>
+   * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+   * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
    *
    * <b>Scaling and Overflow Behavior:</b>
    * \par
@@ -5327,8 +5092,7 @@
    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
    * There is saturation on the addition, hence there is no risk of overflow.
    */
-
-  static __INLINE void arm_clarke_q31(
+  CMSIS_INLINE __STATIC_INLINE void arm_clarke_q31(
   q31_t Ia,
   q31_t Ib,
   q31_t * pIalpha,
@@ -5355,10 +5119,9 @@
 
   /**
    * @brief  Converts the elements of the Q7 vector to Q31 vector.
-   * @param[in]  *pSrc     input pointer
-   * @param[out]  *pDst    output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_q7_to_q31(
   q7_t * pSrc,
@@ -5367,7 +5130,6 @@
 
 
 
-
   /**
    * @ingroup groupController
    */
@@ -5395,15 +5157,12 @@
 
    /**
    * @brief  Floating-point Inverse Clarke transform
-   * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
-   * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
-   * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
-   * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
-   * @return none.
-   */
-
-
-  static __INLINE void arm_inv_clarke_f32(
+   * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
+   * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
+   * @param[out] pIa     points to output three-phase coordinate <code>a</code>
+   * @param[out] pIb     points to output three-phase coordinate <code>b</code>
+   */
+  CMSIS_INLINE __STATIC_INLINE void arm_inv_clarke_f32(
   float32_t Ialpha,
   float32_t Ibeta,
   float32_t * pIa,
@@ -5413,17 +5172,16 @@
     *pIa = Ialpha;
 
     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
-    *pIb = -0.5 * Ialpha + (float32_t) 0.8660254039 *Ibeta;
-
+    *pIb = -0.5f * Ialpha + 0.8660254039f * Ibeta;
   }
 
+
   /**
    * @brief  Inverse Clarke transform for Q31 version
-   * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
-   * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
-   * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
-   * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
-   * @return none.
+   * @param[in]  Ialpha  input two-phase orthogonal vector axis alpha
+   * @param[in]  Ibeta   input two-phase orthogonal vector axis beta
+   * @param[out] pIa     points to output three-phase coordinate <code>a</code>
+   * @param[out] pIb     points to output three-phase coordinate <code>b</code>
    *
    * <b>Scaling and Overflow Behavior:</b>
    * \par
@@ -5431,8 +5189,7 @@
    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
    * There is saturation on the subtraction, hence there is no risk of overflow.
    */
-
-  static __INLINE void arm_inv_clarke_q31(
+  CMSIS_INLINE __STATIC_INLINE void arm_inv_clarke_q31(
   q31_t Ialpha,
   q31_t Ibeta,
   q31_t * pIa,
@@ -5451,7 +5208,6 @@
 
     /* pIb is calculated by subtracting the products */
     *pIb = __QSUB(product2, product1);
-
   }
 
   /**
@@ -5460,10 +5216,9 @@
 
   /**
    * @brief  Converts the elements of the Q7 vector to Q15 vector.
-   * @param[in]  *pSrc     input pointer
-   * @param[out] *pDst     output pointer
-   * @param[in]  blockSize number of samples to process
-   * @return none.
+   * @param[in]  pSrc       input pointer
+   * @param[out] pDst       output pointer
+   * @param[in]  blockSize  number of samples to process
    */
   void arm_q7_to_q15(
   q7_t * pSrc,
@@ -5507,19 +5262,17 @@
 
   /**
    * @brief Floating-point Park transform
-   * @param[in]       Ialpha input two-phase vector coordinate alpha
-   * @param[in]       Ibeta  input two-phase vector coordinate beta
-   * @param[out]      *pId   points to output	rotor reference frame d
-   * @param[out]      *pIq   points to output	rotor reference frame q
-   * @param[in]       sinVal sine value of rotation angle theta
-   * @param[in]       cosVal cosine value of rotation angle theta
-   * @return none.
+   * @param[in]  Ialpha  input two-phase vector coordinate alpha
+   * @param[in]  Ibeta   input two-phase vector coordinate beta
+   * @param[out] pId     points to output   rotor reference frame d
+   * @param[out] pIq     points to output   rotor reference frame q
+   * @param[in]  sinVal  sine value of rotation angle theta
+   * @param[in]  cosVal  cosine value of rotation angle theta
    *
    * The function implements the forward Park transform.
    *
    */
-
-  static __INLINE void arm_park_f32(
+  CMSIS_INLINE __STATIC_INLINE void arm_park_f32(
   float32_t Ialpha,
   float32_t Ibeta,
   float32_t * pId,
@@ -5532,18 +5285,17 @@
 
     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
-
   }
 
+
   /**
    * @brief  Park transform for Q31 version
-   * @param[in]       Ialpha input two-phase vector coordinate alpha
-   * @param[in]       Ibeta  input two-phase vector coordinate beta
-   * @param[out]      *pId   points to output rotor reference frame d
-   * @param[out]      *pIq   points to output rotor reference frame q
-   * @param[in]       sinVal sine value of rotation angle theta
-   * @param[in]       cosVal cosine value of rotation angle theta
-   * @return none.
+   * @param[in]  Ialpha  input two-phase vector coordinate alpha
+   * @param[in]  Ibeta   input two-phase vector coordinate beta
+   * @param[out] pId     points to output rotor reference frame d
+   * @param[out] pIq     points to output rotor reference frame q
+   * @param[in]  sinVal  sine value of rotation angle theta
+   * @param[in]  cosVal  cosine value of rotation angle theta
    *
    * <b>Scaling and Overflow Behavior:</b>
    * \par
@@ -5551,9 +5303,7 @@
    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
    * There is saturation on the addition and subtraction, hence there is no risk of overflow.
    */
-
-
-  static __INLINE void arm_park_q31(
+  CMSIS_INLINE __STATIC_INLINE void arm_park_q31(
   q31_t Ialpha,
   q31_t Ibeta,
   q31_t * pId,
@@ -5590,10 +5340,9 @@
 
   /**
    * @brief  Converts the elements of the Q7 vector to floating-point vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[out]  *pDst is output pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q7_to_float(
   q7_t * pSrc,
@@ -5629,16 +5378,14 @@
 
    /**
    * @brief  Floating-point Inverse Park transform
-   * @param[in]       Id        input coordinate of rotor reference frame d
-   * @param[in]       Iq        input coordinate of rotor reference frame q
-   * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
-   * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
-   * @param[in]       sinVal    sine value of rotation angle theta
-   * @param[in]       cosVal    cosine value of rotation angle theta
-   * @return none.
-   */
-
-  static __INLINE void arm_inv_park_f32(
+   * @param[in]  Id       input coordinate of rotor reference frame d
+   * @param[in]  Iq       input coordinate of rotor reference frame q
+   * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+   * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+   * @param[in]  sinVal   sine value of rotation angle theta
+   * @param[in]  cosVal   cosine value of rotation angle theta
+   */
+  CMSIS_INLINE __STATIC_INLINE void arm_inv_park_f32(
   float32_t Id,
   float32_t Iq,
   float32_t * pIalpha,
@@ -5651,19 +5398,17 @@
 
     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
     *pIbeta = Id * sinVal + Iq * cosVal;
-
   }
 
 
   /**
-   * @brief  Inverse Park transform for	Q31 version
-   * @param[in]       Id        input coordinate of rotor reference frame d
-   * @param[in]       Iq        input coordinate of rotor reference frame q
-   * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
-   * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
-   * @param[in]       sinVal    sine value of rotation angle theta
-   * @param[in]       cosVal    cosine value of rotation angle theta
-   * @return none.
+   * @brief  Inverse Park transform for   Q31 version
+   * @param[in]  Id       input coordinate of rotor reference frame d
+   * @param[in]  Iq       input coordinate of rotor reference frame q
+   * @param[out] pIalpha  points to output two-phase orthogonal vector axis alpha
+   * @param[out] pIbeta   points to output two-phase orthogonal vector axis beta
+   * @param[in]  sinVal   sine value of rotation angle theta
+   * @param[in]  cosVal   cosine value of rotation angle theta
    *
    * <b>Scaling and Overflow Behavior:</b>
    * \par
@@ -5671,9 +5416,7 @@
    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
    * There is saturation on the addition, hence there is no risk of overflow.
    */
-
-
-  static __INLINE void arm_inv_park_q31(
+  CMSIS_INLINE __STATIC_INLINE void arm_inv_park_q31(
   q31_t Id,
   q31_t Iq,
   q31_t * pIalpha,
@@ -5702,7 +5445,6 @@
 
     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
     *pIbeta = __QADD(product4, product3);
-
   }
 
   /**
@@ -5712,10 +5454,9 @@
 
   /**
    * @brief  Converts the elements of the Q31 vector to floating-point vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[out]  *pDst is output pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q31_to_float(
   q31_t * pSrc,
@@ -5765,17 +5506,15 @@
 
   /**
    * @brief  Process function for the floating-point Linear Interpolation Function.
-   * @param[in,out] *S is an instance of the floating-point Linear Interpolation structure
-   * @param[in] x input sample to process
+   * @param[in,out] S  is an instance of the floating-point Linear Interpolation structure
+   * @param[in]     x  input sample to process
    * @return y processed output sample.
    *
    */
-
-  static __INLINE float32_t arm_linear_interp_f32(
+  CMSIS_INLINE __STATIC_INLINE float32_t arm_linear_interp_f32(
   arm_linear_interp_instance_f32 * S,
   float32_t x)
   {
-
     float32_t y;
     float32_t x0, x1;                            /* Nearest input values */
     float32_t y0, y1;                            /* Nearest output values */
@@ -5786,12 +5525,12 @@
     /* Calculation of index */
     i = (int32_t) ((x - S->x1) / xSpacing);
 
-    if(i < 0)
+    if (i < 0)
     {
       /* Iniatilize output for below specified range as least output value of table */
       y = pYData[0];
     }
-    else if((uint32_t)i >= S->nValues)
+    else if ((uint32_t)i >= S->nValues)
     {
       /* Iniatilize output for above specified range as last output value of table */
       y = pYData[S->nValues - 1];
@@ -5799,7 +5538,7 @@
     else
     {
       /* Calculation of nearest input values */
-      x0 = S->x1 + i * xSpacing;
+      x0 = S->x1 +  i      * xSpacing;
       x1 = S->x1 + (i + 1) * xSpacing;
 
       /* Read of nearest output values */
@@ -5815,12 +5554,13 @@
     return (y);
   }
 
+
    /**
    *
    * @brief  Process function for the Q31 Linear Interpolation Function.
-   * @param[in] *pYData  pointer to Q31 Linear Interpolation table
-   * @param[in] x input sample to process
-   * @param[in] nValues number of table values
+   * @param[in] pYData   pointer to Q31 Linear Interpolation table
+   * @param[in] x        input sample to process
+   * @param[in] nValues  number of table values
    * @return y processed output sample.
    *
    * \par
@@ -5828,9 +5568,7 @@
    * This function can support maximum of table size 2^12.
    *
    */
-
-
-  static __INLINE q31_t arm_linear_interp_q31(
+  CMSIS_INLINE __STATIC_INLINE q31_t arm_linear_interp_q31(
   q31_t * pYData,
   q31_t x,
   uint32_t nValues)
@@ -5843,26 +5581,25 @@
     /* Input is in 12.20 format */
     /* 12 bits for the table index */
     /* Index value calculation */
-    index = ((x & 0xFFF00000) >> 20);
-
-    if(index >= (int32_t)(nValues - 1))
+    index = ((x & (q31_t)0xFFF00000) >> 20);
+
+    if (index >= (int32_t)(nValues - 1))
     {
       return (pYData[nValues - 1]);
     }
-    else if(index < 0)
+    else if (index < 0)
     {
       return (pYData[0]);
     }
     else
     {
-
       /* 20 bits for the fractional part */
       /* shift left by 11 to keep fract in 1.31 format */
       fract = (x & 0x000FFFFF) << 11;
 
       /* Read two nearest output values from the index in 1.31(q31) format */
       y0 = pYData[index];
-      y1 = pYData[index + 1u];
+      y1 = pYData[index + 1];
 
       /* Calculation of y0 * (1-fract) and y is in 2.30 format */
       y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
@@ -5872,17 +5609,16 @@
 
       /* Convert y to 1.31 format */
       return (y << 1u);
-
     }
-
   }
 
+
   /**
    *
    * @brief  Process function for the Q15 Linear Interpolation Function.
-   * @param[in] *pYData  pointer to Q15 Linear Interpolation table
-   * @param[in] x input sample to process
-   * @param[in] nValues number of table values
+   * @param[in] pYData   pointer to Q15 Linear Interpolation table
+   * @param[in] x        input sample to process
+   * @param[in] nValues  number of table values
    * @return y processed output sample.
    *
    * \par
@@ -5890,9 +5626,7 @@
    * This function can support maximum of table size 2^12.
    *
    */
-
-
-  static __INLINE q15_t arm_linear_interp_q15(
+  CMSIS_INLINE __STATIC_INLINE q15_t arm_linear_interp_q15(
   q15_t * pYData,
   q31_t x,
   uint32_t nValues)
@@ -5905,13 +5639,13 @@
     /* Input is in 12.20 format */
     /* 12 bits for the table index */
     /* Index value calculation */
-    index = ((x & 0xFFF00000) >> 20u);
-
-    if(index >= (int32_t)(nValues - 1))
+    index = ((x & (int32_t)0xFFF00000) >> 20);
+
+    if (index >= (int32_t)(nValues - 1))
     {
       return (pYData[nValues - 1]);
     }
-    else if(index < 0)
+    else if (index < 0)
     {
       return (pYData[0]);
     }
@@ -5923,7 +5657,7 @@
 
       /* Read two nearest output values from the index */
       y0 = pYData[index];
-      y1 = pYData[index + 1u];
+      y1 = pYData[index + 1];
 
       /* Calculation of y0 * (1-fract) and y is in 13.35 format */
       y = ((q63_t) y0 * (0xFFFFF - fract));
@@ -5932,27 +5666,24 @@
       y += ((q63_t) y1 * (fract));
 
       /* convert y to 1.15 format */
-      return (y >> 20);
+      return (q15_t) (y >> 20);
     }
-
-
   }
 
+
   /**
    *
    * @brief  Process function for the Q7 Linear Interpolation Function.
-   * @param[in] *pYData  pointer to Q7 Linear Interpolation table
-   * @param[in] x input sample to process
-   * @param[in] nValues number of table values
+   * @param[in] pYData   pointer to Q7 Linear Interpolation table
+   * @param[in] x        input sample to process
+   * @param[in] nValues  number of table values
    * @return y processed output sample.
    *
    * \par
    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
    * This function can support maximum of table size 2^12.
    */
-
-
-  static __INLINE q7_t arm_linear_interp_q7(
+  CMSIS_INLINE __STATIC_INLINE q7_t arm_linear_interp_q7(
   q7_t * pYData,
   q31_t x,
   uint32_t nValues)
@@ -5971,21 +5702,19 @@
     }
     index = (x >> 20) & 0xfff;
 
-
-    if(index >= (nValues - 1))
+    if (index >= (nValues - 1))
     {
       return (pYData[nValues - 1]);
     }
     else
     {
-
       /* 20 bits for the fractional part */
       /* fract is in 12.20 format */
       fract = (x & 0x000FFFFF);
 
       /* Read two nearest output values from the index and are in 1.7(q7) format */
       y0 = pYData[index];
-      y1 = pYData[index + 1u];
+      y1 = pYData[index + 1];
 
       /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
       y = ((y0 * (0xFFFFF - fract)));
@@ -5994,66 +5723,64 @@
       y += (y1 * fract);
 
       /* convert y to 1.7(q7) format */
-      return (y >> 20u);
-
-    }
-
+      return (q7_t) (y >> 20);
+     }
   }
+
   /**
    * @} end of LinearInterpolate group
    */
 
   /**
    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
-   * @param[in] x input value in radians.
+   * @param[in] x  input value in radians.
    * @return  sin(x).
    */
-
   float32_t arm_sin_f32(
   float32_t x);
 
+
   /**
    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
-   * @param[in] x Scaled input value in radians.
+   * @param[in] x  Scaled input value in radians.
    * @return  sin(x).
    */
-
   q31_t arm_sin_q31(
   q31_t x);
 
+
   /**
    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
-   * @param[in] x Scaled input value in radians.
+   * @param[in] x  Scaled input value in radians.
    * @return  sin(x).
    */
-
   q15_t arm_sin_q15(
   q15_t x);
 
+
   /**
    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
-   * @param[in] x input value in radians.
+   * @param[in] x  input value in radians.
    * @return  cos(x).
    */
-
   float32_t arm_cos_f32(
   float32_t x);
 
+
   /**
    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
-   * @param[in] x Scaled input value in radians.
+   * @param[in] x  Scaled input value in radians.
    * @return  cos(x).
    */
-
   q31_t arm_cos_q31(
   q31_t x);
 
+
   /**
    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
-   * @param[in] x Scaled input value in radians.
+   * @param[in] x  Scaled input value in radians.
    * @return  cos(x).
    */
-
   q15_t arm_cos_q15(
   q15_t x);
 
@@ -6091,22 +5818,26 @@
 
   /**
    * @brief  Floating-point square root function.
-   * @param[in]  in     input value.
-   * @param[out] *pOut  square root of input value.
+   * @param[in]  in    input value.
+   * @param[out] pOut  square root of input value.
    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
    * <code>in</code> is negative value and returns zero output for negative values.
    */
-
-  static __INLINE arm_status arm_sqrt_f32(
+  CMSIS_INLINE __STATIC_INLINE arm_status arm_sqrt_f32(
   float32_t in,
   float32_t * pOut)
   {
-    if(in >= 0.0f)
+    if (in >= 0.0f)
     {
 
-//      #if __FPU_USED
-#if (__FPU_USED == 1) && defined ( __CC_ARM   )
+#if   (__FPU_USED == 1) && defined ( __CC_ARM   )
       *pOut = __sqrtf(in);
+#elif (__FPU_USED == 1) && (defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050))
+      *pOut = __builtin_sqrtf(in);
+#elif (__FPU_USED == 1) && defined(__GNUC__)
+      *pOut = __builtin_sqrtf(in);
+#elif (__FPU_USED == 1) && defined ( __ICCARM__ ) && (__VER__ >= 6040000)
+      __ASM("VSQRT.F32 %0,%1" : "=t"(*pOut) : "t"(in));
 #else
       *pOut = sqrtf(in);
 #endif
@@ -6118,14 +5849,13 @@
       *pOut = 0.0f;
       return (ARM_MATH_ARGUMENT_ERROR);
     }
-
   }
 
 
   /**
    * @brief Q31 square root function.
-   * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
-   * @param[out]  *pOut square root of input value.
+   * @param[in]  in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
+   * @param[out] pOut  square root of input value.
    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
    * <code>in</code> is negative value and returns zero output for negative values.
    */
@@ -6133,10 +5863,11 @@
   q31_t in,
   q31_t * pOut);
 
+
   /**
    * @brief  Q15 square root function.
-   * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
-   * @param[out]  *pOut  square root of input value.
+   * @param[in]  in    input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
+   * @param[out] pOut  square root of input value.
    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
    * <code>in</code> is negative value and returns zero output for negative values.
    */
@@ -6149,15 +5880,10 @@
    */
 
 
-
-
-
-
   /**
    * @brief floating-point Circular write function.
    */
-
-  static __INLINE void arm_circularWrite_f32(
+  CMSIS_INLINE __STATIC_INLINE void arm_circularWrite_f32(
   int32_t * circBuffer,
   int32_t L,
   uint16_t * writeOffset,
@@ -6176,7 +5902,7 @@
     /* Loop over the blockSize */
     i = blockSize;
 
-    while(i > 0u)
+    while (i > 0u)
     {
       /* copy the input sample to the circular buffer */
       circBuffer[wOffset] = *src;
@@ -6186,7 +5912,7 @@
 
       /* Circularly update wOffset.  Watch out for positive and negative value */
       wOffset += bufferInc;
-      if(wOffset >= L)
+      if (wOffset >= L)
         wOffset -= L;
 
       /* Decrement the loop counter */
@@ -6194,7 +5920,7 @@
     }
 
     /* Update the index pointer */
-    *writeOffset = wOffset;
+    *writeOffset = (uint16_t)wOffset;
   }
 
 
@@ -6202,7 +5928,7 @@
   /**
    * @brief floating-point Circular Read function.
    */
-  static __INLINE void arm_circularRead_f32(
+  CMSIS_INLINE __STATIC_INLINE void arm_circularRead_f32(
   int32_t * circBuffer,
   int32_t L,
   int32_t * readOffset,
@@ -6224,7 +5950,7 @@
     /* Loop over the blockSize */
     i = blockSize;
 
-    while(i > 0u)
+    while (i > 0u)
     {
       /* copy the sample from the circular buffer to the destination buffer */
       *dst = circBuffer[rOffset];
@@ -6232,7 +5958,7 @@
       /* Update the input pointer */
       dst += dstInc;
 
-      if(dst == (int32_t *) dst_end)
+      if (dst == (int32_t *) dst_end)
       {
         dst = dst_base;
       }
@@ -6240,7 +5966,7 @@
       /* Circularly update rOffset.  Watch out for positive and negative value  */
       rOffset += bufferInc;
 
-      if(rOffset >= L)
+      if (rOffset >= L)
       {
         rOffset -= L;
       }
@@ -6253,11 +5979,11 @@
     *readOffset = rOffset;
   }
 
+
   /**
    * @brief Q15 Circular write function.
    */
-
-  static __INLINE void arm_circularWrite_q15(
+  CMSIS_INLINE __STATIC_INLINE void arm_circularWrite_q15(
   q15_t * circBuffer,
   int32_t L,
   uint16_t * writeOffset,
@@ -6276,7 +6002,7 @@
     /* Loop over the blockSize */
     i = blockSize;
 
-    while(i > 0u)
+    while (i > 0u)
     {
       /* copy the input sample to the circular buffer */
       circBuffer[wOffset] = *src;
@@ -6286,7 +6012,7 @@
 
       /* Circularly update wOffset.  Watch out for positive and negative value */
       wOffset += bufferInc;
-      if(wOffset >= L)
+      if (wOffset >= L)
         wOffset -= L;
 
       /* Decrement the loop counter */
@@ -6294,15 +6020,14 @@
     }
 
     /* Update the index pointer */
-    *writeOffset = wOffset;
+    *writeOffset = (uint16_t)wOffset;
   }
 
 
-
   /**
    * @brief Q15 Circular Read function.
    */
-  static __INLINE void arm_circularRead_q15(
+  CMSIS_INLINE __STATIC_INLINE void arm_circularRead_q15(
   q15_t * circBuffer,
   int32_t L,
   int32_t * readOffset,
@@ -6325,7 +6050,7 @@
     /* Loop over the blockSize */
     i = blockSize;
 
-    while(i > 0u)
+    while (i > 0u)
     {
       /* copy the sample from the circular buffer to the destination buffer */
       *dst = circBuffer[rOffset];
@@ -6333,7 +6058,7 @@
       /* Update the input pointer */
       dst += dstInc;
 
-      if(dst == (q15_t *) dst_end)
+      if (dst == (q15_t *) dst_end)
       {
         dst = dst_base;
       }
@@ -6341,7 +6066,7 @@
       /* Circularly update wOffset.  Watch out for positive and negative value */
       rOffset += bufferInc;
 
-      if(rOffset >= L)
+      if (rOffset >= L)
       {
         rOffset -= L;
       }
@@ -6358,8 +6083,7 @@
   /**
    * @brief Q7 Circular write function.
    */
-
-  static __INLINE void arm_circularWrite_q7(
+  CMSIS_INLINE __STATIC_INLINE void arm_circularWrite_q7(
   q7_t * circBuffer,
   int32_t L,
   uint16_t * writeOffset,
@@ -6378,7 +6102,7 @@
     /* Loop over the blockSize */
     i = blockSize;
 
-    while(i > 0u)
+    while (i > 0u)
     {
       /* copy the input sample to the circular buffer */
       circBuffer[wOffset] = *src;
@@ -6388,7 +6112,7 @@
 
       /* Circularly update wOffset.  Watch out for positive and negative value */
       wOffset += bufferInc;
-      if(wOffset >= L)
+      if (wOffset >= L)
         wOffset -= L;
 
       /* Decrement the loop counter */
@@ -6396,15 +6120,14 @@
     }
 
     /* Update the index pointer */
-    *writeOffset = wOffset;
+    *writeOffset = (uint16_t)wOffset;
   }
 
 
-
   /**
    * @brief Q7 Circular Read function.
    */
-  static __INLINE void arm_circularRead_q7(
+  CMSIS_INLINE __STATIC_INLINE void arm_circularRead_q7(
   q7_t * circBuffer,
   int32_t L,
   int32_t * readOffset,
@@ -6427,7 +6150,7 @@
     /* Loop over the blockSize */
     i = blockSize;
 
-    while(i > 0u)
+    while (i > 0u)
     {
       /* copy the sample from the circular buffer to the destination buffer */
       *dst = circBuffer[rOffset];
@@ -6435,7 +6158,7 @@
       /* Update the input pointer */
       dst += dstInc;
 
-      if(dst == (q7_t *) dst_end)
+      if (dst == (q7_t *) dst_end)
       {
         dst = dst_base;
       }
@@ -6443,7 +6166,7 @@
       /* Circularly update rOffset.  Watch out for positive and negative value */
       rOffset += bufferInc;
 
-      if(rOffset >= L)
+      if (rOffset >= L)
       {
         rOffset -= L;
       }
@@ -6459,271 +6182,252 @@
 
   /**
    * @brief  Sum of the squares of the elements of a Q31 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_power_q31(
   q31_t * pSrc,
   uint32_t blockSize,
   q63_t * pResult);
 
+
   /**
    * @brief  Sum of the squares of the elements of a floating-point vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_power_f32(
   float32_t * pSrc,
   uint32_t blockSize,
   float32_t * pResult);
 
+
   /**
    * @brief  Sum of the squares of the elements of a Q15 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_power_q15(
   q15_t * pSrc,
   uint32_t blockSize,
   q63_t * pResult);
 
+
   /**
    * @brief  Sum of the squares of the elements of a Q7 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_power_q7(
   q7_t * pSrc,
   uint32_t blockSize,
   q31_t * pResult);
 
+
   /**
    * @brief  Mean value of a Q7 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_mean_q7(
   q7_t * pSrc,
   uint32_t blockSize,
   q7_t * pResult);
 
+
   /**
    * @brief  Mean value of a Q15 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
    */
   void arm_mean_q15(
   q15_t * pSrc,
   uint32_t blockSize,
   q15_t * pResult);
 
+
   /**
    * @brief  Mean value of a Q31 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
    */
   void arm_mean_q31(
   q31_t * pSrc,
   uint32_t blockSize,
   q31_t * pResult);
 
+
   /**
    * @brief  Mean value of a floating-point vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
    */
   void arm_mean_f32(
   float32_t * pSrc,
   uint32_t blockSize,
   float32_t * pResult);
 
+
   /**
    * @brief  Variance of the elements of a floating-point vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_var_f32(
   float32_t * pSrc,
   uint32_t blockSize,
   float32_t * pResult);
 
+
   /**
    * @brief  Variance of the elements of a Q31 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_var_q31(
   q31_t * pSrc,
   uint32_t blockSize,
   q31_t * pResult);
 
+
   /**
    * @brief  Variance of the elements of a Q15 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_var_q15(
   q15_t * pSrc,
   uint32_t blockSize,
   q15_t * pResult);
 
+
   /**
    * @brief  Root Mean Square of the elements of a floating-point vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_rms_f32(
   float32_t * pSrc,
   uint32_t blockSize,
   float32_t * pResult);
 
+
   /**
    * @brief  Root Mean Square of the elements of a Q31 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_rms_q31(
   q31_t * pSrc,
   uint32_t blockSize,
   q31_t * pResult);
 
+
   /**
    * @brief  Root Mean Square of the elements of a Q15 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_rms_q15(
   q15_t * pSrc,
   uint32_t blockSize,
   q15_t * pResult);
 
+
   /**
    * @brief  Standard deviation of the elements of a floating-point vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_std_f32(
   float32_t * pSrc,
   uint32_t blockSize,
   float32_t * pResult);
 
+
   /**
    * @brief  Standard deviation of the elements of a Q31 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_std_q31(
   q31_t * pSrc,
   uint32_t blockSize,
   q31_t * pResult);
 
+
   /**
    * @brief  Standard deviation of the elements of a Q15 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output value.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output value.
+   */
   void arm_std_q15(
   q15_t * pSrc,
   uint32_t blockSize,
   q15_t * pResult);
 
+
   /**
    * @brief  Floating-point complex magnitude
-   * @param[in]  *pSrc points to the complex input vector
-   * @param[out]  *pDst points to the real output vector
-   * @param[in]  numSamples number of complex samples in the input vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
   void arm_cmplx_mag_f32(
   float32_t * pSrc,
   float32_t * pDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Q31 complex magnitude
-   * @param[in]  *pSrc points to the complex input vector
-   * @param[out]  *pDst points to the real output vector
-   * @param[in]  numSamples number of complex samples in the input vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
   void arm_cmplx_mag_q31(
   q31_t * pSrc,
   q31_t * pDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Q15 complex magnitude
-   * @param[in]  *pSrc points to the complex input vector
-   * @param[out]  *pDst points to the real output vector
-   * @param[in]  numSamples number of complex samples in the input vector
-   * @return none.
-   */
-
+   * @param[in]  pSrc        points to the complex input vector
+   * @param[out] pDst        points to the real output vector
+   * @param[in]  numSamples  number of complex samples in the input vector
+   */
   void arm_cmplx_mag_q15(
   q15_t * pSrc,
   q15_t * pDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Q15 complex dot product
-   * @param[in]  *pSrcA points to the first input vector
-   * @param[in]  *pSrcB points to the second input vector
-   * @param[in]  numSamples number of complex samples in each vector
-   * @param[out]  *realResult real part of the result returned here
-   * @param[out]  *imagResult imaginary part of the result returned here
-   * @return none.
-   */
-
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   * @param[out] realResult  real part of the result returned here
+   * @param[out] imagResult  imaginary part of the result returned here
+   */
   void arm_cmplx_dot_prod_q15(
   q15_t * pSrcA,
   q15_t * pSrcB,
@@ -6731,16 +6435,15 @@
   q31_t * realResult,
   q31_t * imagResult);
 
+
   /**
    * @brief  Q31 complex dot product
-   * @param[in]  *pSrcA points to the first input vector
-   * @param[in]  *pSrcB points to the second input vector
-   * @param[in]  numSamples number of complex samples in each vector
-   * @param[out]  *realResult real part of the result returned here
-   * @param[out]  *imagResult imaginary part of the result returned here
-   * @return none.
-   */
-
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   * @param[out] realResult  real part of the result returned here
+   * @param[out] imagResult  imaginary part of the result returned here
+   */
   void arm_cmplx_dot_prod_q31(
   q31_t * pSrcA,
   q31_t * pSrcB,
@@ -6748,16 +6451,15 @@
   q63_t * realResult,
   q63_t * imagResult);
 
+
   /**
    * @brief  Floating-point complex dot product
-   * @param[in]  *pSrcA points to the first input vector
-   * @param[in]  *pSrcB points to the second input vector
-   * @param[in]  numSamples number of complex samples in each vector
-   * @param[out]  *realResult real part of the result returned here
-   * @param[out]  *imagResult imaginary part of the result returned here
-   * @return none.
-   */
-
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   * @param[out] realResult  real part of the result returned here
+   * @param[out] imagResult  imaginary part of the result returned here
+   */
   void arm_cmplx_dot_prod_f32(
   float32_t * pSrcA,
   float32_t * pSrcB,
@@ -6765,88 +6467,83 @@
   float32_t * realResult,
   float32_t * imagResult);
 
+
   /**
    * @brief  Q15 complex-by-real multiplication
-   * @param[in]  *pSrcCmplx points to the complex input vector
-   * @param[in]  *pSrcReal points to the real input vector
-   * @param[out]  *pCmplxDst points to the complex output vector
-   * @param[in]  numSamples number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcCmplx   points to the complex input vector
+   * @param[in]  pSrcReal    points to the real input vector
+   * @param[out] pCmplxDst   points to the complex output vector
+   * @param[in]  numSamples  number of samples in each vector
+   */
   void arm_cmplx_mult_real_q15(
   q15_t * pSrcCmplx,
   q15_t * pSrcReal,
   q15_t * pCmplxDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Q31 complex-by-real multiplication
-   * @param[in]  *pSrcCmplx points to the complex input vector
-   * @param[in]  *pSrcReal points to the real input vector
-   * @param[out]  *pCmplxDst points to the complex output vector
-   * @param[in]  numSamples number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcCmplx   points to the complex input vector
+   * @param[in]  pSrcReal    points to the real input vector
+   * @param[out] pCmplxDst   points to the complex output vector
+   * @param[in]  numSamples  number of samples in each vector
+   */
   void arm_cmplx_mult_real_q31(
   q31_t * pSrcCmplx,
   q31_t * pSrcReal,
   q31_t * pCmplxDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Floating-point complex-by-real multiplication
-   * @param[in]  *pSrcCmplx points to the complex input vector
-   * @param[in]  *pSrcReal points to the real input vector
-   * @param[out]  *pCmplxDst points to the complex output vector
-   * @param[in]  numSamples number of samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcCmplx   points to the complex input vector
+   * @param[in]  pSrcReal    points to the real input vector
+   * @param[out] pCmplxDst   points to the complex output vector
+   * @param[in]  numSamples  number of samples in each vector
+   */
   void arm_cmplx_mult_real_f32(
   float32_t * pSrcCmplx,
   float32_t * pSrcReal,
   float32_t * pCmplxDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Minimum value of a Q7 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *result is output pointer
-   * @param[in]  index is the array index of the minimum value in the input buffer.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] result     is output pointer
+   * @param[in]  index      is the array index of the minimum value in the input buffer.
+   */
   void arm_min_q7(
   q7_t * pSrc,
   uint32_t blockSize,
   q7_t * result,
   uint32_t * index);
 
+
   /**
    * @brief  Minimum value of a Q15 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output pointer
-   * @param[in]  *pIndex is the array index of the minimum value in the input buffer.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[in]  pIndex     is the array index of the minimum value in the input buffer.
+   */
   void arm_min_q15(
   q15_t * pSrc,
   uint32_t blockSize,
   q15_t * pResult,
   uint32_t * pIndex);
 
+
   /**
    * @brief  Minimum value of a Q31 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output pointer
-   * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
    */
   void arm_min_q31(
   q31_t * pSrc,
@@ -6854,156 +6551,148 @@
   q31_t * pResult,
   uint32_t * pIndex);
 
+
   /**
    * @brief  Minimum value of a floating-point vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @param[out]  *pResult is output pointer
-   * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
-   * @return none.
-   */
-
+   * @param[in]  pSrc       is input pointer
+   * @param[in]  blockSize  is the number of samples to process
+   * @param[out] pResult    is output pointer
+   * @param[out] pIndex     is the array index of the minimum value in the input buffer.
+   */
   void arm_min_f32(
   float32_t * pSrc,
   uint32_t blockSize,
   float32_t * pResult,
   uint32_t * pIndex);
 
+
 /**
  * @brief Maximum value of a Q7 vector.
- * @param[in]       *pSrc points to the input buffer
- * @param[in]       blockSize length of the input vector
- * @param[out]      *pResult maximum value returned here
- * @param[out]      *pIndex index of maximum value returned here
- * @return none.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
  */
-
   void arm_max_q7(
   q7_t * pSrc,
   uint32_t blockSize,
   q7_t * pResult,
   uint32_t * pIndex);
 
+
 /**
  * @brief Maximum value of a Q15 vector.
- * @param[in]       *pSrc points to the input buffer
- * @param[in]       blockSize length of the input vector
- * @param[out]      *pResult maximum value returned here
- * @param[out]      *pIndex index of maximum value returned here
- * @return none.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
  */
-
   void arm_max_q15(
   q15_t * pSrc,
   uint32_t blockSize,
   q15_t * pResult,
   uint32_t * pIndex);
 
+
 /**
  * @brief Maximum value of a Q31 vector.
- * @param[in]       *pSrc points to the input buffer
- * @param[in]       blockSize length of the input vector
- * @param[out]      *pResult maximum value returned here
- * @param[out]      *pIndex index of maximum value returned here
- * @return none.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
  */
-
   void arm_max_q31(
   q31_t * pSrc,
   uint32_t blockSize,
   q31_t * pResult,
   uint32_t * pIndex);
 
+
 /**
  * @brief Maximum value of a floating-point vector.
- * @param[in]       *pSrc points to the input buffer
- * @param[in]       blockSize length of the input vector
- * @param[out]      *pResult maximum value returned here
- * @param[out]      *pIndex index of maximum value returned here
- * @return none.
+ * @param[in]  pSrc       points to the input buffer
+ * @param[in]  blockSize  length of the input vector
+ * @param[out] pResult    maximum value returned here
+ * @param[out] pIndex     index of maximum value returned here
  */
-
   void arm_max_f32(
   float32_t * pSrc,
   uint32_t blockSize,
   float32_t * pResult,
   uint32_t * pIndex);
 
+
   /**
    * @brief  Q15 complex-by-complex multiplication
-   * @param[in]  *pSrcA points to the first input vector
-   * @param[in]  *pSrcB points to the second input vector
-   * @param[out]  *pDst  points to the output vector
-   * @param[in]  numSamples number of complex samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
   void arm_cmplx_mult_cmplx_q15(
   q15_t * pSrcA,
   q15_t * pSrcB,
   q15_t * pDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Q31 complex-by-complex multiplication
-   * @param[in]  *pSrcA points to the first input vector
-   * @param[in]  *pSrcB points to the second input vector
-   * @param[out]  *pDst  points to the output vector
-   * @param[in]  numSamples number of complex samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
   void arm_cmplx_mult_cmplx_q31(
   q31_t * pSrcA,
   q31_t * pSrcB,
   q31_t * pDst,
   uint32_t numSamples);
 
+
   /**
    * @brief  Floating-point complex-by-complex multiplication
-   * @param[in]  *pSrcA points to the first input vector
-   * @param[in]  *pSrcB points to the second input vector
-   * @param[out]  *pDst  points to the output vector
-   * @param[in]  numSamples number of complex samples in each vector
-   * @return none.
-   */
-
+   * @param[in]  pSrcA       points to the first input vector
+   * @param[in]  pSrcB       points to the second input vector
+   * @param[out] pDst        points to the output vector
+   * @param[in]  numSamples  number of complex samples in each vector
+   */
   void arm_cmplx_mult_cmplx_f32(
   float32_t * pSrcA,
   float32_t * pSrcB,
   float32_t * pDst,
   uint32_t numSamples);
 
+
   /**
    * @brief Converts the elements of the floating-point vector to Q31 vector.
-   * @param[in]       *pSrc points to the floating-point input vector
-   * @param[out]      *pDst points to the Q31 output vector
-   * @param[in]       blockSize length of the input vector
-   * @return none.
+   * @param[in]  pSrc       points to the floating-point input vector
+   * @param[out] pDst       points to the Q31 output vector
+   * @param[in]  blockSize  length of the input vector
    */
   void arm_float_to_q31(
   float32_t * pSrc,
   q31_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Converts the elements of the floating-point vector to Q15 vector.
-   * @param[in]       *pSrc points to the floating-point input vector
-   * @param[out]      *pDst points to the Q15 output vector
-   * @param[in]       blockSize length of the input vector
-   * @return          none
+   * @param[in]  pSrc       points to the floating-point input vector
+   * @param[out] pDst       points to the Q15 output vector
+   * @param[in]  blockSize  length of the input vector
    */
   void arm_float_to_q15(
   float32_t * pSrc,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief Converts the elements of the floating-point vector to Q7 vector.
-   * @param[in]       *pSrc points to the floating-point input vector
-   * @param[out]      *pDst points to the Q7 output vector
-   * @param[in]       blockSize length of the input vector
-   * @return          none
+   * @param[in]  pSrc       points to the floating-point input vector
+   * @param[out] pDst       points to the Q7 output vector
+   * @param[in]  blockSize  length of the input vector
    */
   void arm_float_to_q7(
   float32_t * pSrc,
@@ -7013,34 +6702,33 @@
 
   /**
    * @brief  Converts the elements of the Q31 vector to Q15 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[out]  *pDst is output pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q31_to_q15(
   q31_t * pSrc,
   q15_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Converts the elements of the Q31 vector to Q7 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[out]  *pDst is output pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q31_to_q7(
   q31_t * pSrc,
   q7_t * pDst,
   uint32_t blockSize);
 
+
   /**
    * @brief  Converts the elements of the Q15 vector to floating-point vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[out]  *pDst is output pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q15_to_float(
   q15_t * pSrc,
@@ -7050,10 +6738,9 @@
 
   /**
    * @brief  Converts the elements of the Q15 vector to Q31 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[out]  *pDst is output pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q15_to_q31(
   q15_t * pSrc,
@@ -7063,10 +6750,9 @@
 
   /**
    * @brief  Converts the elements of the Q15 vector to Q7 vector.
-   * @param[in]  *pSrc is input pointer
-   * @param[out]  *pDst is output pointer
-   * @param[in]  blockSize is the number of samples to process
-   * @return none.
+   * @param[in]  pSrc       is input pointer
+   * @param[out] pDst       is output pointer
+   * @param[in]  blockSize  is the number of samples to process
    */
   void arm_q15_to_q7(
   q15_t * pSrc,
@@ -7135,17 +6821,16 @@
    * @{
    */
 
+
   /**
   *
   * @brief  Floating-point bilinear interpolation.
-  * @param[in,out] *S points to an instance of the interpolation structure.
-  * @param[in] X interpolation coordinate.
-  * @param[in] Y interpolation coordinate.
+  * @param[in,out] S  points to an instance of the interpolation structure.
+  * @param[in]     X  interpolation coordinate.
+  * @param[in]     Y  interpolation coordinate.
   * @return out interpolated value.
   */
-
-
-  static __INLINE float32_t arm_bilinear_interp_f32(
+  CMSIS_INLINE __STATIC_INLINE float32_t arm_bilinear_interp_f32(
   const arm_bilinear_interp_instance_f32 * S,
   float32_t X,
   float32_t Y)
@@ -7162,8 +6847,7 @@
 
     /* Care taken for table outside boundary */
     /* Returns zero output when values are outside table boundary */
-    if(xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0
-       || yIndex > (S->numCols - 1))
+    if (xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0 || yIndex > (S->numCols - 1))
     {
       return (0);
     }
@@ -7201,19 +6885,18 @@
 
     /* return to application */
     return (out);
-
   }
 
+
   /**
   *
   * @brief  Q31 bilinear interpolation.
-  * @param[in,out] *S points to an instance of the interpolation structure.
-  * @param[in] X interpolation coordinate in 12.20 format.
-  * @param[in] Y interpolation coordinate in 12.20 format.
+  * @param[in,out] S  points to an instance of the interpolation structure.
+  * @param[in]     X  interpolation coordinate in 12.20 format.
+  * @param[in]     Y  interpolation coordinate in 12.20 format.
   * @return out interpolated value.
   */
-
-  static __INLINE q31_t arm_bilinear_interp_q31(
+  CMSIS_INLINE __STATIC_INLINE q31_t arm_bilinear_interp_q31(
   arm_bilinear_interp_instance_q31 * S,
   q31_t X,
   q31_t Y)
@@ -7226,20 +6909,19 @@
     q31_t *pYData = S->pData;                    /* pointer to output table values */
     uint32_t nCols = S->numCols;                 /* num of rows */
 
+    /* Input is in 12.20 format */
+    /* 12 bits for the table index */
+    /* Index value calculation */
+    rI = ((X & (q31_t)0xFFF00000) >> 20);
 
     /* Input is in 12.20 format */
     /* 12 bits for the table index */
     /* Index value calculation */
-    rI = ((X & 0xFFF00000) >> 20u);
-
-    /* Input is in 12.20 format */
-    /* 12 bits for the table index */
-    /* Index value calculation */
-    cI = ((Y & 0xFFF00000) >> 20u);
+    cI = ((Y & (q31_t)0xFFF00000) >> 20);
 
     /* Care taken for table outside boundary */
     /* Returns zero output when values are outside table boundary */
-    if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
+    if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
     {
       return (0);
     }
@@ -7249,19 +6931,19 @@
     xfract = (X & 0x000FFFFF) << 11u;
 
     /* Read two nearest output values from the index */
-    x1 = pYData[(rI) + nCols * (cI)];
-    x2 = pYData[(rI) + nCols * (cI) + 1u];
+    x1 = pYData[(rI) + (int32_t)nCols * (cI)    ];
+    x2 = pYData[(rI) + (int32_t)nCols * (cI) + 1];
 
     /* 20 bits for the fractional part */
     /* shift left yfract by 11 to keep 1.31 format */
     yfract = (Y & 0x000FFFFF) << 11u;
 
     /* Read two nearest output values from the index */
-    y1 = pYData[(rI) + nCols * (cI + 1)];
-    y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
+    y1 = pYData[(rI) + (int32_t)nCols * (cI + 1)    ];
+    y2 = pYData[(rI) + (int32_t)nCols * (cI + 1) + 1];
 
     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
-    out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
+    out = ((q31_t) (((q63_t) x1  * (0x7FFFFFFF - xfract)) >> 32));
     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
 
     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
@@ -7277,19 +6959,18 @@
     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
 
     /* Convert acc to 1.31(q31) format */
-    return (acc << 2u);
-
+    return ((q31_t)(acc << 2));
   }
 
+
   /**
   * @brief  Q15 bilinear interpolation.
-  * @param[in,out] *S points to an instance of the interpolation structure.
-  * @param[in] X interpolation coordinate in 12.20 format.
-  * @param[in] Y interpolation coordinate in 12.20 format.
+  * @param[in,out] S  points to an instance of the interpolation structure.
+  * @param[in]     X  interpolation coordinate in 12.20 format.
+  * @param[in]     Y  interpolation coordinate in 12.20 format.
   * @return out interpolated value.
   */
-
-  static __INLINE q15_t arm_bilinear_interp_q15(
+  CMSIS_INLINE __STATIC_INLINE q15_t arm_bilinear_interp_q15(
   arm_bilinear_interp_instance_q15 * S,
   q31_t X,
   q31_t Y)
@@ -7305,16 +6986,16 @@
     /* Input is in 12.20 format */
     /* 12 bits for the table index */
     /* Index value calculation */
-    rI = ((X & 0xFFF00000) >> 20);
+    rI = ((X & (q31_t)0xFFF00000) >> 20);
 
     /* Input is in 12.20 format */
     /* 12 bits for the table index */
     /* Index value calculation */
-    cI = ((Y & 0xFFF00000) >> 20);
+    cI = ((Y & (q31_t)0xFFF00000) >> 20);
 
     /* Care taken for table outside boundary */
     /* Returns zero output when values are outside table boundary */
-    if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
+    if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
     {
       return (0);
     }
@@ -7324,17 +7005,16 @@
     xfract = (X & 0x000FFFFF);
 
     /* Read two nearest output values from the index */
-    x1 = pYData[(rI) + nCols * (cI)];
-    x2 = pYData[(rI) + nCols * (cI) + 1u];
-
+    x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
+    x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
 
     /* 20 bits for the fractional part */
     /* yfract should be in 12.20 format */
     yfract = (Y & 0x000FFFFF);
 
     /* Read two nearest output values from the index */
-    y1 = pYData[(rI) + nCols * (cI + 1)];
-    y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
+    y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
+    y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
 
     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
 
@@ -7357,19 +7037,18 @@
 
     /* acc is in 13.51 format and down shift acc by 36 times */
     /* Convert out to 1.15 format */
-    return (acc >> 36);
-
+    return ((q15_t)(acc >> 36));
   }
 
+
   /**
   * @brief  Q7 bilinear interpolation.
-  * @param[in,out] *S points to an instance of the interpolation structure.
-  * @param[in] X interpolation coordinate in 12.20 format.
-  * @param[in] Y interpolation coordinate in 12.20 format.
+  * @param[in,out] S  points to an instance of the interpolation structure.
+  * @param[in]     X  interpolation coordinate in 12.20 format.
+  * @param[in]     Y  interpolation coordinate in 12.20 format.
   * @return out interpolated value.
   */
-
-  static __INLINE q7_t arm_bilinear_interp_q7(
+  CMSIS_INLINE __STATIC_INLINE q7_t arm_bilinear_interp_q7(
   arm_bilinear_interp_instance_q7 * S,
   q31_t X,
   q31_t Y)
@@ -7385,36 +7064,35 @@
     /* Input is in 12.20 format */
     /* 12 bits for the table index */
     /* Index value calculation */
-    rI = ((X & 0xFFF00000) >> 20);
+    rI = ((X & (q31_t)0xFFF00000) >> 20);
 
     /* Input is in 12.20 format */
     /* 12 bits for the table index */
     /* Index value calculation */
-    cI = ((Y & 0xFFF00000) >> 20);
+    cI = ((Y & (q31_t)0xFFF00000) >> 20);
 
     /* Care taken for table outside boundary */
     /* Returns zero output when values are outside table boundary */
-    if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
+    if (rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
     {
       return (0);
     }
 
     /* 20 bits for the fractional part */
     /* xfract should be in 12.20 format */
-    xfract = (X & 0x000FFFFF);
+    xfract = (X & (q31_t)0x000FFFFF);
 
     /* Read two nearest output values from the index */
-    x1 = pYData[(rI) + nCols * (cI)];
-    x2 = pYData[(rI) + nCols * (cI) + 1u];
-
+    x1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI)    ];
+    x2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI) + 1];
 
     /* 20 bits for the fractional part */
     /* yfract should be in 12.20 format */
-    yfract = (Y & 0x000FFFFF);
+    yfract = (Y & (q31_t)0x000FFFFF);
 
     /* Read two nearest output values from the index */
-    y1 = pYData[(rI) + nCols * (cI + 1)];
-    y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
+    y1 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1)    ];
+    y2 = pYData[((uint32_t)rI) + nCols * ((uint32_t)cI + 1) + 1];
 
     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
     out = ((x1 * (0xFFFFF - xfract)));
@@ -7433,120 +7111,143 @@
     acc += (((q63_t) out * (xfract)));
 
     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
-    return (acc >> 40);
-
+    return ((q7_t)(acc >> 40));
   }
 
   /**
    * @} end of BilinearInterpolate group
    */
-   
-
-//SMMLAR
+
+
+/* SMMLAR */
 #define multAcc_32x32_keep32_R(a, x, y) \
     a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
 
-//SMMLSR
+/* SMMLSR */
 #define multSub_32x32_keep32_R(a, x, y) \
     a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
 
-//SMMULR
+/* SMMULR */
 #define mult_32x32_keep32_R(a, x, y) \
     a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
 
-//SMMLA
+/* SMMLA */
 #define multAcc_32x32_keep32(a, x, y) \
     a += (q31_t) (((q63_t) x * y) >> 32)
 
-//SMMLS
+/* SMMLS */
 #define multSub_32x32_keep32(a, x, y) \
     a -= (q31_t) (((q63_t) x * y) >> 32)
 
-//SMMUL
+/* SMMUL */
 #define mult_32x32_keep32(a, x, y) \
     a = (q31_t) (((q63_t) x * y ) >> 32)
 
 
-#if defined ( __CC_ARM ) //Keil
-
-//Enter low optimization region - place directly above function definition
-    #ifdef ARM_MATH_CM4
-      #define LOW_OPTIMIZATION_ENTER \
-         _Pragma ("push")         \
-         _Pragma ("O1")
-    #else
-      #define LOW_OPTIMIZATION_ENTER 
-    #endif
-
-//Exit low optimization region - place directly after end of function definition
-    #ifdef ARM_MATH_CM4
-      #define LOW_OPTIMIZATION_EXIT \
-         _Pragma ("pop")
-    #else
-      #define LOW_OPTIMIZATION_EXIT  
-    #endif
-
-//Enter low optimization region - place directly above function definition
+#if   defined ( __CC_ARM )
+  /* Enter low optimization region - place directly above function definition */
+  #if defined( ARM_MATH_CM4 ) || defined( ARM_MATH_CM7)
+    #define LOW_OPTIMIZATION_ENTER \
+       _Pragma ("push")         \
+       _Pragma ("O1")
+  #else
+    #define LOW_OPTIMIZATION_ENTER
+  #endif
+
+  /* Exit low optimization region - place directly after end of function definition */
+  #if defined ( ARM_MATH_CM4 ) || defined ( ARM_MATH_CM7 )
+    #define LOW_OPTIMIZATION_EXIT \
+       _Pragma ("pop")
+  #else
+    #define LOW_OPTIMIZATION_EXIT
+  #endif
+
+  /* Enter low optimization region - place directly above function definition */
   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
 
-//Exit low optimization region - place directly after end of function definition
+  /* Exit low optimization region - place directly after end of function definition */
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined (__ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __GNUC__ )
+  #define LOW_OPTIMIZATION_ENTER \
+       __attribute__(( optimize("-O1") ))
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
 
-#elif defined(__ICCARM__) //IAR
-
-//Enter low optimization region - place directly above function definition
-    #ifdef ARM_MATH_CM4
-      #define LOW_OPTIMIZATION_ENTER \
-         _Pragma ("optimize=low")
-    #else
-      #define LOW_OPTIMIZATION_ENTER   
-    #endif
-
-//Exit low optimization region - place directly after end of function definition
+#elif defined ( __ICCARM__ )
+  /* Enter low optimization region - place directly above function definition */
+  #if defined ( ARM_MATH_CM4 ) || defined ( ARM_MATH_CM7 )
+    #define LOW_OPTIMIZATION_ENTER \
+       _Pragma ("optimize=low")
+  #else
+    #define LOW_OPTIMIZATION_ENTER
+  #endif
+
+  /* Exit low optimization region - place directly after end of function definition */
   #define LOW_OPTIMIZATION_EXIT
 
-//Enter low optimization region - place directly above function definition
-    #ifdef ARM_MATH_CM4
-      #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
-         _Pragma ("optimize=low")
-    #else
-      #define IAR_ONLY_LOW_OPTIMIZATION_ENTER   
-    #endif
-
-//Exit low optimization region - place directly after end of function definition
+  /* Enter low optimization region - place directly above function definition */
+  #if defined ( ARM_MATH_CM4 ) || defined ( ARM_MATH_CM7 )
+    #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
+       _Pragma ("optimize=low")
+  #else
+    #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #endif
+
+  /* Exit low optimization region - place directly after end of function definition */
   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
 
-#elif defined(__GNUC__)
-
-  #define LOW_OPTIMIZATION_ENTER __attribute__(( optimize("-O1") ))
-
+#elif defined ( __TI_ARM__ )
+  #define LOW_OPTIMIZATION_ENTER
   #define LOW_OPTIMIZATION_EXIT
-
   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-
   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
 
-#elif defined(__CSMC__)		// Cosmic
-
-#define LOW_OPTIMIZATION_ENTER
-#define LOW_OPTIMIZATION_EXIT
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
-
-#elif defined(__TASKING__)		// TASKING
-
-#define LOW_OPTIMIZATION_ENTER
-#define LOW_OPTIMIZATION_EXIT
-#define IAR_ONLY_LOW_OPTIMIZATION_ENTER
-#define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+#elif defined ( __CSMC__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
+
+#elif defined ( __TASKING__ )
+  #define LOW_OPTIMIZATION_ENTER
+  #define LOW_OPTIMIZATION_EXIT
+  #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
+  #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
 
 #endif
 
 
-#ifdef	__cplusplus
+#ifdef   __cplusplus
 }
 #endif
 
+/* Compiler specific diagnostic adjustment */
+#if   defined ( __CC_ARM )
+
+#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 )
+
+#elif defined ( __GNUC__ )
+#pragma GCC diagnostic pop
+
+#elif defined ( __ICCARM__ )
+
+#elif defined ( __TI_ARM__ )
+
+#elif defined ( __CSMC__ )
+
+#elif defined ( __TASKING__ )
+
+#else
+  #error Unknown compiler
+#endif
 
 #endif /* _ARM_MATH_H */
 
    