ex

Fork of mbed-os-example-mbed5-blinky by mbed-os-examples

Committer:
TMBOY
Date:
Tue Jul 18 16:54:45 2017 +0800
Revision:
47:9e361da97763
?

Who changed what in which revision?

UserRevisionLine numberNew contents of line
TMBOY 47:9e361da97763 1 /* Copyright (C) 2002 Jean-Marc Valin */
TMBOY 47:9e361da97763 2 /**
TMBOY 47:9e361da97763 3 @file filters_sse.h
TMBOY 47:9e361da97763 4 @brief Various analysis/synthesis filters (SSE version)
TMBOY 47:9e361da97763 5 */
TMBOY 47:9e361da97763 6 /*
TMBOY 47:9e361da97763 7 Redistribution and use in source and binary forms, with or without
TMBOY 47:9e361da97763 8 modification, are permitted provided that the following conditions
TMBOY 47:9e361da97763 9 are met:
TMBOY 47:9e361da97763 10
TMBOY 47:9e361da97763 11 - Redistributions of source code must retain the above copyright
TMBOY 47:9e361da97763 12 notice, this list of conditions and the following disclaimer.
TMBOY 47:9e361da97763 13
TMBOY 47:9e361da97763 14 - Redistributions in binary form must reproduce the above copyright
TMBOY 47:9e361da97763 15 notice, this list of conditions and the following disclaimer in the
TMBOY 47:9e361da97763 16 documentation and/or other materials provided with the distribution.
TMBOY 47:9e361da97763 17
TMBOY 47:9e361da97763 18 - Neither the name of the Xiph.org Foundation nor the names of its
TMBOY 47:9e361da97763 19 contributors may be used to endorse or promote products derived from
TMBOY 47:9e361da97763 20 this software without specific prior written permission.
TMBOY 47:9e361da97763 21
TMBOY 47:9e361da97763 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
TMBOY 47:9e361da97763 23 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
TMBOY 47:9e361da97763 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
TMBOY 47:9e361da97763 25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
TMBOY 47:9e361da97763 26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
TMBOY 47:9e361da97763 27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
TMBOY 47:9e361da97763 28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
TMBOY 47:9e361da97763 29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
TMBOY 47:9e361da97763 30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
TMBOY 47:9e361da97763 31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
TMBOY 47:9e361da97763 32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
TMBOY 47:9e361da97763 33 */
TMBOY 47:9e361da97763 34
TMBOY 47:9e361da97763 35 #include <xmmintrin.h>
TMBOY 47:9e361da97763 36
TMBOY 47:9e361da97763 37 void filter_mem16_10(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem)
TMBOY 47:9e361da97763 38 {
TMBOY 47:9e361da97763 39 __m128 num[3], den[3], mem[3];
TMBOY 47:9e361da97763 40
TMBOY 47:9e361da97763 41 int i;
TMBOY 47:9e361da97763 42
TMBOY 47:9e361da97763 43 /* Copy numerator, denominator and memory to aligned xmm */
TMBOY 47:9e361da97763 44 for (i=0;i<2;i++)
TMBOY 47:9e361da97763 45 {
TMBOY 47:9e361da97763 46 mem[i] = _mm_loadu_ps(_mem+4*i);
TMBOY 47:9e361da97763 47 num[i] = _mm_loadu_ps(_num+4*i);
TMBOY 47:9e361da97763 48 den[i] = _mm_loadu_ps(_den+4*i);
TMBOY 47:9e361da97763 49 }
TMBOY 47:9e361da97763 50 mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0);
TMBOY 47:9e361da97763 51 num[2] = _mm_setr_ps(_num[8], _num[9], 0, 0);
TMBOY 47:9e361da97763 52 den[2] = _mm_setr_ps(_den[8], _den[9], 0, 0);
TMBOY 47:9e361da97763 53
TMBOY 47:9e361da97763 54 for (i=0;i<N;i++)
TMBOY 47:9e361da97763 55 {
TMBOY 47:9e361da97763 56 __m128 xx;
TMBOY 47:9e361da97763 57 __m128 yy;
TMBOY 47:9e361da97763 58 /* Compute next filter result */
TMBOY 47:9e361da97763 59 xx = _mm_load_ps1(x+i);
TMBOY 47:9e361da97763 60 yy = _mm_add_ss(xx, mem[0]);
TMBOY 47:9e361da97763 61 _mm_store_ss(y+i, yy);
TMBOY 47:9e361da97763 62 yy = _mm_shuffle_ps(yy, yy, 0);
TMBOY 47:9e361da97763 63
TMBOY 47:9e361da97763 64 /* Update memory */
TMBOY 47:9e361da97763 65 mem[0] = _mm_move_ss(mem[0], mem[1]);
TMBOY 47:9e361da97763 66 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
TMBOY 47:9e361da97763 67
TMBOY 47:9e361da97763 68 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0]));
TMBOY 47:9e361da97763 69 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0]));
TMBOY 47:9e361da97763 70
TMBOY 47:9e361da97763 71 mem[1] = _mm_move_ss(mem[1], mem[2]);
TMBOY 47:9e361da97763 72 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
TMBOY 47:9e361da97763 73
TMBOY 47:9e361da97763 74 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1]));
TMBOY 47:9e361da97763 75 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1]));
TMBOY 47:9e361da97763 76
TMBOY 47:9e361da97763 77 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd);
TMBOY 47:9e361da97763 78
TMBOY 47:9e361da97763 79 mem[2] = _mm_add_ps(mem[2], _mm_mul_ps(xx, num[2]));
TMBOY 47:9e361da97763 80 mem[2] = _mm_sub_ps(mem[2], _mm_mul_ps(yy, den[2]));
TMBOY 47:9e361da97763 81 }
TMBOY 47:9e361da97763 82 /* Put memory back in its place */
TMBOY 47:9e361da97763 83 _mm_storeu_ps(_mem, mem[0]);
TMBOY 47:9e361da97763 84 _mm_storeu_ps(_mem+4, mem[1]);
TMBOY 47:9e361da97763 85 _mm_store_ss(_mem+8, mem[2]);
TMBOY 47:9e361da97763 86 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55);
TMBOY 47:9e361da97763 87 _mm_store_ss(_mem+9, mem[2]);
TMBOY 47:9e361da97763 88 }
TMBOY 47:9e361da97763 89
TMBOY 47:9e361da97763 90 void filter_mem16_8(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem)
TMBOY 47:9e361da97763 91 {
TMBOY 47:9e361da97763 92 __m128 num[2], den[2], mem[2];
TMBOY 47:9e361da97763 93
TMBOY 47:9e361da97763 94 int i;
TMBOY 47:9e361da97763 95
TMBOY 47:9e361da97763 96 /* Copy numerator, denominator and memory to aligned xmm */
TMBOY 47:9e361da97763 97 for (i=0;i<2;i++)
TMBOY 47:9e361da97763 98 {
TMBOY 47:9e361da97763 99 mem[i] = _mm_loadu_ps(_mem+4*i);
TMBOY 47:9e361da97763 100 num[i] = _mm_loadu_ps(_num+4*i);
TMBOY 47:9e361da97763 101 den[i] = _mm_loadu_ps(_den+4*i);
TMBOY 47:9e361da97763 102 }
TMBOY 47:9e361da97763 103
TMBOY 47:9e361da97763 104 for (i=0;i<N;i++)
TMBOY 47:9e361da97763 105 {
TMBOY 47:9e361da97763 106 __m128 xx;
TMBOY 47:9e361da97763 107 __m128 yy;
TMBOY 47:9e361da97763 108 /* Compute next filter result */
TMBOY 47:9e361da97763 109 xx = _mm_load_ps1(x+i);
TMBOY 47:9e361da97763 110 yy = _mm_add_ss(xx, mem[0]);
TMBOY 47:9e361da97763 111 _mm_store_ss(y+i, yy);
TMBOY 47:9e361da97763 112 yy = _mm_shuffle_ps(yy, yy, 0);
TMBOY 47:9e361da97763 113
TMBOY 47:9e361da97763 114 /* Update memory */
TMBOY 47:9e361da97763 115 mem[0] = _mm_move_ss(mem[0], mem[1]);
TMBOY 47:9e361da97763 116 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
TMBOY 47:9e361da97763 117
TMBOY 47:9e361da97763 118 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0]));
TMBOY 47:9e361da97763 119 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0]));
TMBOY 47:9e361da97763 120
TMBOY 47:9e361da97763 121 mem[1] = _mm_sub_ss(mem[1], mem[1]);
TMBOY 47:9e361da97763 122 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
TMBOY 47:9e361da97763 123
TMBOY 47:9e361da97763 124 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1]));
TMBOY 47:9e361da97763 125 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1]));
TMBOY 47:9e361da97763 126 }
TMBOY 47:9e361da97763 127 /* Put memory back in its place */
TMBOY 47:9e361da97763 128 _mm_storeu_ps(_mem, mem[0]);
TMBOY 47:9e361da97763 129 _mm_storeu_ps(_mem+4, mem[1]);
TMBOY 47:9e361da97763 130 }
TMBOY 47:9e361da97763 131
TMBOY 47:9e361da97763 132
TMBOY 47:9e361da97763 133 #define OVERRIDE_FILTER_MEM16
TMBOY 47:9e361da97763 134 void filter_mem16(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem, char *stack)
TMBOY 47:9e361da97763 135 {
TMBOY 47:9e361da97763 136 if(ord==10)
TMBOY 47:9e361da97763 137 filter_mem16_10(x, _num, _den, y, N, ord, _mem);
TMBOY 47:9e361da97763 138 else if (ord==8)
TMBOY 47:9e361da97763 139 filter_mem16_8(x, _num, _den, y, N, ord, _mem);
TMBOY 47:9e361da97763 140 }
TMBOY 47:9e361da97763 141
TMBOY 47:9e361da97763 142
TMBOY 47:9e361da97763 143
TMBOY 47:9e361da97763 144 void iir_mem16_10(const float *x, const float *_den, float *y, int N, int ord, float *_mem)
TMBOY 47:9e361da97763 145 {
TMBOY 47:9e361da97763 146 __m128 den[3], mem[3];
TMBOY 47:9e361da97763 147
TMBOY 47:9e361da97763 148 int i;
TMBOY 47:9e361da97763 149
TMBOY 47:9e361da97763 150 /* Copy numerator, denominator and memory to aligned xmm */
TMBOY 47:9e361da97763 151 for (i=0;i<2;i++)
TMBOY 47:9e361da97763 152 {
TMBOY 47:9e361da97763 153 mem[i] = _mm_loadu_ps(_mem+4*i);
TMBOY 47:9e361da97763 154 den[i] = _mm_loadu_ps(_den+4*i);
TMBOY 47:9e361da97763 155 }
TMBOY 47:9e361da97763 156 mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0);
TMBOY 47:9e361da97763 157 den[2] = _mm_setr_ps(_den[8], _den[9], 0, 0);
TMBOY 47:9e361da97763 158
TMBOY 47:9e361da97763 159 for (i=0;i<N;i++)
TMBOY 47:9e361da97763 160 {
TMBOY 47:9e361da97763 161 __m128 xx;
TMBOY 47:9e361da97763 162 __m128 yy;
TMBOY 47:9e361da97763 163 /* Compute next filter result */
TMBOY 47:9e361da97763 164 xx = _mm_load_ps1(x+i);
TMBOY 47:9e361da97763 165 yy = _mm_add_ss(xx, mem[0]);
TMBOY 47:9e361da97763 166 _mm_store_ss(y+i, yy);
TMBOY 47:9e361da97763 167 yy = _mm_shuffle_ps(yy, yy, 0);
TMBOY 47:9e361da97763 168
TMBOY 47:9e361da97763 169 /* Update memory */
TMBOY 47:9e361da97763 170 mem[0] = _mm_move_ss(mem[0], mem[1]);
TMBOY 47:9e361da97763 171 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
TMBOY 47:9e361da97763 172
TMBOY 47:9e361da97763 173 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0]));
TMBOY 47:9e361da97763 174
TMBOY 47:9e361da97763 175 mem[1] = _mm_move_ss(mem[1], mem[2]);
TMBOY 47:9e361da97763 176 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
TMBOY 47:9e361da97763 177
TMBOY 47:9e361da97763 178 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1]));
TMBOY 47:9e361da97763 179
TMBOY 47:9e361da97763 180 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd);
TMBOY 47:9e361da97763 181
TMBOY 47:9e361da97763 182 mem[2] = _mm_sub_ps(mem[2], _mm_mul_ps(yy, den[2]));
TMBOY 47:9e361da97763 183 }
TMBOY 47:9e361da97763 184 /* Put memory back in its place */
TMBOY 47:9e361da97763 185 _mm_storeu_ps(_mem, mem[0]);
TMBOY 47:9e361da97763 186 _mm_storeu_ps(_mem+4, mem[1]);
TMBOY 47:9e361da97763 187 _mm_store_ss(_mem+8, mem[2]);
TMBOY 47:9e361da97763 188 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55);
TMBOY 47:9e361da97763 189 _mm_store_ss(_mem+9, mem[2]);
TMBOY 47:9e361da97763 190 }
TMBOY 47:9e361da97763 191
TMBOY 47:9e361da97763 192
TMBOY 47:9e361da97763 193 void iir_mem16_8(const float *x, const float *_den, float *y, int N, int ord, float *_mem)
TMBOY 47:9e361da97763 194 {
TMBOY 47:9e361da97763 195 __m128 den[2], mem[2];
TMBOY 47:9e361da97763 196
TMBOY 47:9e361da97763 197 int i;
TMBOY 47:9e361da97763 198
TMBOY 47:9e361da97763 199 /* Copy numerator, denominator and memory to aligned xmm */
TMBOY 47:9e361da97763 200 for (i=0;i<2;i++)
TMBOY 47:9e361da97763 201 {
TMBOY 47:9e361da97763 202 mem[i] = _mm_loadu_ps(_mem+4*i);
TMBOY 47:9e361da97763 203 den[i] = _mm_loadu_ps(_den+4*i);
TMBOY 47:9e361da97763 204 }
TMBOY 47:9e361da97763 205
TMBOY 47:9e361da97763 206 for (i=0;i<N;i++)
TMBOY 47:9e361da97763 207 {
TMBOY 47:9e361da97763 208 __m128 xx;
TMBOY 47:9e361da97763 209 __m128 yy;
TMBOY 47:9e361da97763 210 /* Compute next filter result */
TMBOY 47:9e361da97763 211 xx = _mm_load_ps1(x+i);
TMBOY 47:9e361da97763 212 yy = _mm_add_ss(xx, mem[0]);
TMBOY 47:9e361da97763 213 _mm_store_ss(y+i, yy);
TMBOY 47:9e361da97763 214 yy = _mm_shuffle_ps(yy, yy, 0);
TMBOY 47:9e361da97763 215
TMBOY 47:9e361da97763 216 /* Update memory */
TMBOY 47:9e361da97763 217 mem[0] = _mm_move_ss(mem[0], mem[1]);
TMBOY 47:9e361da97763 218 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
TMBOY 47:9e361da97763 219
TMBOY 47:9e361da97763 220 mem[0] = _mm_sub_ps(mem[0], _mm_mul_ps(yy, den[0]));
TMBOY 47:9e361da97763 221
TMBOY 47:9e361da97763 222 mem[1] = _mm_sub_ss(mem[1], mem[1]);
TMBOY 47:9e361da97763 223 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
TMBOY 47:9e361da97763 224
TMBOY 47:9e361da97763 225 mem[1] = _mm_sub_ps(mem[1], _mm_mul_ps(yy, den[1]));
TMBOY 47:9e361da97763 226 }
TMBOY 47:9e361da97763 227 /* Put memory back in its place */
TMBOY 47:9e361da97763 228 _mm_storeu_ps(_mem, mem[0]);
TMBOY 47:9e361da97763 229 _mm_storeu_ps(_mem+4, mem[1]);
TMBOY 47:9e361da97763 230 }
TMBOY 47:9e361da97763 231
TMBOY 47:9e361da97763 232 #define OVERRIDE_IIR_MEM16
TMBOY 47:9e361da97763 233 void iir_mem16(const float *x, const float *_den, float *y, int N, int ord, float *_mem, char *stack)
TMBOY 47:9e361da97763 234 {
TMBOY 47:9e361da97763 235 if(ord==10)
TMBOY 47:9e361da97763 236 iir_mem16_10(x, _den, y, N, ord, _mem);
TMBOY 47:9e361da97763 237 else if (ord==8)
TMBOY 47:9e361da97763 238 iir_mem16_8(x, _den, y, N, ord, _mem);
TMBOY 47:9e361da97763 239 }
TMBOY 47:9e361da97763 240
TMBOY 47:9e361da97763 241
TMBOY 47:9e361da97763 242 void fir_mem16_10(const float *x, const float *_num, float *y, int N, int ord, float *_mem)
TMBOY 47:9e361da97763 243 {
TMBOY 47:9e361da97763 244 __m128 num[3], mem[3];
TMBOY 47:9e361da97763 245
TMBOY 47:9e361da97763 246 int i;
TMBOY 47:9e361da97763 247
TMBOY 47:9e361da97763 248 /* Copy numerator, denominator and memory to aligned xmm */
TMBOY 47:9e361da97763 249 for (i=0;i<2;i++)
TMBOY 47:9e361da97763 250 {
TMBOY 47:9e361da97763 251 mem[i] = _mm_loadu_ps(_mem+4*i);
TMBOY 47:9e361da97763 252 num[i] = _mm_loadu_ps(_num+4*i);
TMBOY 47:9e361da97763 253 }
TMBOY 47:9e361da97763 254 mem[2] = _mm_setr_ps(_mem[8], _mem[9], 0, 0);
TMBOY 47:9e361da97763 255 num[2] = _mm_setr_ps(_num[8], _num[9], 0, 0);
TMBOY 47:9e361da97763 256
TMBOY 47:9e361da97763 257 for (i=0;i<N;i++)
TMBOY 47:9e361da97763 258 {
TMBOY 47:9e361da97763 259 __m128 xx;
TMBOY 47:9e361da97763 260 __m128 yy;
TMBOY 47:9e361da97763 261 /* Compute next filter result */
TMBOY 47:9e361da97763 262 xx = _mm_load_ps1(x+i);
TMBOY 47:9e361da97763 263 yy = _mm_add_ss(xx, mem[0]);
TMBOY 47:9e361da97763 264 _mm_store_ss(y+i, yy);
TMBOY 47:9e361da97763 265 yy = _mm_shuffle_ps(yy, yy, 0);
TMBOY 47:9e361da97763 266
TMBOY 47:9e361da97763 267 /* Update memory */
TMBOY 47:9e361da97763 268 mem[0] = _mm_move_ss(mem[0], mem[1]);
TMBOY 47:9e361da97763 269 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
TMBOY 47:9e361da97763 270
TMBOY 47:9e361da97763 271 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0]));
TMBOY 47:9e361da97763 272
TMBOY 47:9e361da97763 273 mem[1] = _mm_move_ss(mem[1], mem[2]);
TMBOY 47:9e361da97763 274 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
TMBOY 47:9e361da97763 275
TMBOY 47:9e361da97763 276 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1]));
TMBOY 47:9e361da97763 277
TMBOY 47:9e361da97763 278 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0xfd);
TMBOY 47:9e361da97763 279
TMBOY 47:9e361da97763 280 mem[2] = _mm_add_ps(mem[2], _mm_mul_ps(xx, num[2]));
TMBOY 47:9e361da97763 281 }
TMBOY 47:9e361da97763 282 /* Put memory back in its place */
TMBOY 47:9e361da97763 283 _mm_storeu_ps(_mem, mem[0]);
TMBOY 47:9e361da97763 284 _mm_storeu_ps(_mem+4, mem[1]);
TMBOY 47:9e361da97763 285 _mm_store_ss(_mem+8, mem[2]);
TMBOY 47:9e361da97763 286 mem[2] = _mm_shuffle_ps(mem[2], mem[2], 0x55);
TMBOY 47:9e361da97763 287 _mm_store_ss(_mem+9, mem[2]);
TMBOY 47:9e361da97763 288 }
TMBOY 47:9e361da97763 289
TMBOY 47:9e361da97763 290 void fir_mem16_8(const float *x, const float *_num, float *y, int N, int ord, float *_mem)
TMBOY 47:9e361da97763 291 {
TMBOY 47:9e361da97763 292 __m128 num[2], mem[2];
TMBOY 47:9e361da97763 293
TMBOY 47:9e361da97763 294 int i;
TMBOY 47:9e361da97763 295
TMBOY 47:9e361da97763 296 /* Copy numerator, denominator and memory to aligned xmm */
TMBOY 47:9e361da97763 297 for (i=0;i<2;i++)
TMBOY 47:9e361da97763 298 {
TMBOY 47:9e361da97763 299 mem[i] = _mm_loadu_ps(_mem+4*i);
TMBOY 47:9e361da97763 300 num[i] = _mm_loadu_ps(_num+4*i);
TMBOY 47:9e361da97763 301 }
TMBOY 47:9e361da97763 302
TMBOY 47:9e361da97763 303 for (i=0;i<N;i++)
TMBOY 47:9e361da97763 304 {
TMBOY 47:9e361da97763 305 __m128 xx;
TMBOY 47:9e361da97763 306 __m128 yy;
TMBOY 47:9e361da97763 307 /* Compute next filter result */
TMBOY 47:9e361da97763 308 xx = _mm_load_ps1(x+i);
TMBOY 47:9e361da97763 309 yy = _mm_add_ss(xx, mem[0]);
TMBOY 47:9e361da97763 310 _mm_store_ss(y+i, yy);
TMBOY 47:9e361da97763 311 yy = _mm_shuffle_ps(yy, yy, 0);
TMBOY 47:9e361da97763 312
TMBOY 47:9e361da97763 313 /* Update memory */
TMBOY 47:9e361da97763 314 mem[0] = _mm_move_ss(mem[0], mem[1]);
TMBOY 47:9e361da97763 315 mem[0] = _mm_shuffle_ps(mem[0], mem[0], 0x39);
TMBOY 47:9e361da97763 316
TMBOY 47:9e361da97763 317 mem[0] = _mm_add_ps(mem[0], _mm_mul_ps(xx, num[0]));
TMBOY 47:9e361da97763 318
TMBOY 47:9e361da97763 319 mem[1] = _mm_sub_ss(mem[1], mem[1]);
TMBOY 47:9e361da97763 320 mem[1] = _mm_shuffle_ps(mem[1], mem[1], 0x39);
TMBOY 47:9e361da97763 321
TMBOY 47:9e361da97763 322 mem[1] = _mm_add_ps(mem[1], _mm_mul_ps(xx, num[1]));
TMBOY 47:9e361da97763 323 }
TMBOY 47:9e361da97763 324 /* Put memory back in its place */
TMBOY 47:9e361da97763 325 _mm_storeu_ps(_mem, mem[0]);
TMBOY 47:9e361da97763 326 _mm_storeu_ps(_mem+4, mem[1]);
TMBOY 47:9e361da97763 327 }
TMBOY 47:9e361da97763 328
TMBOY 47:9e361da97763 329 #define OVERRIDE_FIR_MEM16
TMBOY 47:9e361da97763 330 void fir_mem16(const float *x, const float *_num, float *y, int N, int ord, float *_mem, char *stack)
TMBOY 47:9e361da97763 331 {
TMBOY 47:9e361da97763 332 if(ord==10)
TMBOY 47:9e361da97763 333 fir_mem16_10(x, _num, y, N, ord, _mem);
TMBOY 47:9e361da97763 334 else if (ord==8)
TMBOY 47:9e361da97763 335 fir_mem16_8(x, _num, y, N, ord, _mem);
TMBOY 47:9e361da97763 336 }