ex
Fork of mbed-os-example-mbed5-blinky by
Embed:
(wiki syntax)
Show/hide line numbers
filters_sse.h
Go to the documentation of this file.
00001 /* Copyright (C) 2002 Jean-Marc Valin */ 00002 /** 00003 @file filters_sse.h 00004 @brief Various analysis/synthesis filters (SSE version) 00005 */ 00006 /* 00007 Redistribution and use in source and binary forms, with or without 00008 modification, are permitted provided that the following conditions 00009 are met: 00010 00011 - Redistributions of source code must retain the above copyright 00012 notice, this list of conditions and the following disclaimer. 00013 00014 - Redistributions in binary form must reproduce the above copyright 00015 notice, this list of conditions and the following disclaimer in the 00016 documentation and/or other materials provided with the distribution. 00017 00018 - Neither the name of the Xiph.org Foundation nor the names of its 00019 contributors may be used to endorse or promote products derived from 00020 this software without specific prior written permission. 00021 00022 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00023 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00024 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00025 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 00026 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00027 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00028 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00029 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00030 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00031 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00032 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 */ 00034 00035 #include <xmmintrin.h> 00036 00037 void filter_mem16_10(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) 00038 { 00039 __m128 num[3], den[3], mem[3]; 00040 00041 int i; 00042 00043 /* Copy numerator, denominator and memory to aligned xmm */ 00044 for (i=0;i<2;i++) 00045 { 00046 mem[i] = _mm_loadu_ps(_mem+4*i); 00047 num[i] = _mm_loadu_ps(_num+4*i); 00048 den[i] = _mm_loadu_ps(_den+4*i); 00049 } 00050 mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0); 00051 num[2] = _mm_setr_ps(_num[8], _num[9], 0, 0); 00052 den[2] = _mm_setr_ps(_den[8], _den[9], 0, 0); 00053 00054 for (i=0;i<N;i++) 00055 { 00056 __m128 xx; 00057 __m128 yy; 00058 /* Compute next filter result */ 00059 xx = _mm_load_ps1(x+i); 00060 yy = _mm_add_ss(xx, mem[0]); 00061 _mm_store_ss(y+i, yy); 00062 yy = _mm_shuffle_ps(yy, yy, 0); 00063 00064 /* Update memory */ 00065 mem[0] = _mm_move_ss(mem[0], mem[1]); 00066 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00067 00068 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0])); 00069 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0])); 00070 00071 mem[1] = _mm_move_ss(mem[1], mem[2]); 00072 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00073 00074 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1])); 00075 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1])); 00076 00077 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd); 00078 00079 mem[2] = _mm_add_ps(mem[2], _mm_mul_ps(xx, num[2])); 00080 mem[2] = _mm_sub_ps(mem[2], _mm_mul_ps(yy, den[2])); 00081 } 00082 /* Put memory back in its place */ 00083 _mm_storeu_ps(_mem, mem[0]); 00084 _mm_storeu_ps(_mem+4, mem[1]); 00085 _mm_store_ss(_mem+8, mem[2]); 00086 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55); 00087 _mm_store_ss(_mem+9, mem[2]); 00088 } 00089 00090 void filter_mem16_8(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem) 00091 { 00092 __m128 num[2], den[2], mem[2]; 00093 00094 int i; 00095 00096 /* Copy numerator, denominator and memory to aligned xmm */ 00097 for (i=0;i<2;i++) 00098 { 00099 mem[i] = _mm_loadu_ps(_mem+4*i); 00100 num[i] = _mm_loadu_ps(_num+4*i); 00101 den[i] = _mm_loadu_ps(_den+4*i); 00102 } 00103 00104 for (i=0;i<N;i++) 00105 { 00106 __m128 xx; 00107 __m128 yy; 00108 /* Compute next filter result */ 00109 xx = _mm_load_ps1(x+i); 00110 yy = _mm_add_ss(xx, mem[0]); 00111 _mm_store_ss(y+i, yy); 00112 yy = _mm_shuffle_ps(yy, yy, 0); 00113 00114 /* Update memory */ 00115 mem[0] = _mm_move_ss(mem[0], mem[1]); 00116 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00117 00118 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0])); 00119 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0])); 00120 00121 mem[1] = _mm_sub_ss(mem[1], mem[1]); 00122 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00123 00124 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1])); 00125 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1])); 00126 } 00127 /* Put memory back in its place */ 00128 _mm_storeu_ps(_mem, mem[0]); 00129 _mm_storeu_ps(_mem+4, mem[1]); 00130 } 00131 00132 00133 #define OVERRIDE_FILTER_MEM16 00134 void filter_mem16(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem, char *stack) 00135 { 00136 if(ord==10) 00137 filter_mem16_10(x, _num, _den, y, N, ord, _mem); 00138 else if (ord==8) 00139 filter_mem16_8(x, _num, _den, y, N, ord, _mem); 00140 } 00141 00142 00143 00144 void iir_mem16_10(const float *x, const float *_den, float *y, int N, int ord, float *_mem) 00145 { 00146 __m128 den[3], mem[3]; 00147 00148 int i; 00149 00150 /* Copy numerator, denominator and memory to aligned xmm */ 00151 for (i=0;i<2;i++) 00152 { 00153 mem[i] = _mm_loadu_ps(_mem+4*i); 00154 den[i] = _mm_loadu_ps(_den+4*i); 00155 } 00156 mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0); 00157 den[2] = _mm_setr_ps(_den[8], _den[9], 0, 0); 00158 00159 for (i=0;i<N;i++) 00160 { 00161 __m128 xx; 00162 __m128 yy; 00163 /* Compute next filter result */ 00164 xx = _mm_load_ps1(x+i); 00165 yy = _mm_add_ss(xx, mem[0]); 00166 _mm_store_ss(y+i, yy); 00167 yy = _mm_shuffle_ps(yy, yy, 0); 00168 00169 /* Update memory */ 00170 mem[0] = _mm_move_ss(mem[0], mem[1]); 00171 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00172 00173 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0])); 00174 00175 mem[1] = _mm_move_ss(mem[1], mem[2]); 00176 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00177 00178 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1])); 00179 00180 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd); 00181 00182 mem[2] = _mm_sub_ps(mem[2], _mm_mul_ps(yy, den[2])); 00183 } 00184 /* Put memory back in its place */ 00185 _mm_storeu_ps(_mem, mem[0]); 00186 _mm_storeu_ps(_mem+4, mem[1]); 00187 _mm_store_ss(_mem+8, mem[2]); 00188 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55); 00189 _mm_store_ss(_mem+9, mem[2]); 00190 } 00191 00192 00193 void iir_mem16_8(const float *x, const float *_den, float *y, int N, int ord, float *_mem) 00194 { 00195 __m128 den[2], mem[2]; 00196 00197 int i; 00198 00199 /* Copy numerator, denominator and memory to aligned xmm */ 00200 for (i=0;i<2;i++) 00201 { 00202 mem[i] = _mm_loadu_ps(_mem+4*i); 00203 den[i] = _mm_loadu_ps(_den+4*i); 00204 } 00205 00206 for (i=0;i<N;i++) 00207 { 00208 __m128 xx; 00209 __m128 yy; 00210 /* Compute next filter result */ 00211 xx = _mm_load_ps1(x+i); 00212 yy = _mm_add_ss(xx, mem[0]); 00213 _mm_store_ss(y+i, yy); 00214 yy = _mm_shuffle_ps(yy, yy, 0); 00215 00216 /* Update memory */ 00217 mem[0] = _mm_move_ss(mem[0], mem[1]); 00218 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00219 00220 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0])); 00221 00222 mem[1] = _mm_sub_ss(mem[1], mem[1]); 00223 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00224 00225 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1])); 00226 } 00227 /* Put memory back in its place */ 00228 _mm_storeu_ps(_mem, mem[0]); 00229 _mm_storeu_ps(_mem+4, mem[1]); 00230 } 00231 00232 #define OVERRIDE_IIR_MEM16 00233 void iir_mem16(const float *x, const float *_den, float *y, int N, int ord, float *_mem, char *stack) 00234 { 00235 if(ord==10) 00236 iir_mem16_10(x, _den, y, N, ord, _mem); 00237 else if (ord==8) 00238 iir_mem16_8(x, _den, y, N, ord, _mem); 00239 } 00240 00241 00242 void fir_mem16_10(const float *x, const float *_num, float *y, int N, int ord, float *_mem) 00243 { 00244 __m128 num[3], mem[3]; 00245 00246 int i; 00247 00248 /* Copy numerator, denominator and memory to aligned xmm */ 00249 for (i=0;i<2;i++) 00250 { 00251 mem[i] = _mm_loadu_ps(_mem+4*i); 00252 num[i] = _mm_loadu_ps(_num+4*i); 00253 } 00254 mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0); 00255 num[2] = _mm_setr_ps(_num[8], _num[9], 0, 0); 00256 00257 for (i=0;i<N;i++) 00258 { 00259 __m128 xx; 00260 __m128 yy; 00261 /* Compute next filter result */ 00262 xx = _mm_load_ps1(x+i); 00263 yy = _mm_add_ss(xx, mem[0]); 00264 _mm_store_ss(y+i, yy); 00265 yy = _mm_shuffle_ps(yy, yy, 0); 00266 00267 /* Update memory */ 00268 mem[0] = _mm_move_ss(mem[0], mem[1]); 00269 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00270 00271 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0])); 00272 00273 mem[1] = _mm_move_ss(mem[1], mem[2]); 00274 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00275 00276 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1])); 00277 00278 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd); 00279 00280 mem[2] = _mm_add_ps(mem[2], _mm_mul_ps(xx, num[2])); 00281 } 00282 /* Put memory back in its place */ 00283 _mm_storeu_ps(_mem, mem[0]); 00284 _mm_storeu_ps(_mem+4, mem[1]); 00285 _mm_store_ss(_mem+8, mem[2]); 00286 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55); 00287 _mm_store_ss(_mem+9, mem[2]); 00288 } 00289 00290 void fir_mem16_8(const float *x, const float *_num, float *y, int N, int ord, float *_mem) 00291 { 00292 __m128 num[2], mem[2]; 00293 00294 int i; 00295 00296 /* Copy numerator, denominator and memory to aligned xmm */ 00297 for (i=0;i<2;i++) 00298 { 00299 mem[i] = _mm_loadu_ps(_mem+4*i); 00300 num[i] = _mm_loadu_ps(_num+4*i); 00301 } 00302 00303 for (i=0;i<N;i++) 00304 { 00305 __m128 xx; 00306 __m128 yy; 00307 /* Compute next filter result */ 00308 xx = _mm_load_ps1(x+i); 00309 yy = _mm_add_ss(xx, mem[0]); 00310 _mm_store_ss(y+i, yy); 00311 yy = _mm_shuffle_ps(yy, yy, 0); 00312 00313 /* Update memory */ 00314 mem[0] = _mm_move_ss(mem[0], mem[1]); 00315 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39); 00316 00317 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0])); 00318 00319 mem[1] = _mm_sub_ss(mem[1], mem[1]); 00320 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39); 00321 00322 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1])); 00323 } 00324 /* Put memory back in its place */ 00325 _mm_storeu_ps(_mem, mem[0]); 00326 _mm_storeu_ps(_mem+4, mem[1]); 00327 } 00328 00329 #define OVERRIDE_FIR_MEM16 00330 void fir_mem16(const float *x, const float *_num, float *y, int N, int ord, float *_mem, char *stack) 00331 { 00332 if(ord==10) 00333 fir_mem16_10(x, _num, y, N, ord, _mem); 00334 else if (ord==8) 00335 fir_mem16_8(x, _num, y, N, ord, _mem); 00336 }
Generated on Tue Jul 12 2022 16:28:53 by
1.7.2
