ex

Fork of mbed-os-example-mbed5-blinky by mbed-os-examples

Committer:
TMBOY
Date:
Tue Jul 18 16:54:45 2017 +0800
Revision:
47:9e361da97763
?

Who changed what in which revision?

UserRevisionLine numberNew contents of line
TMBOY 47:9e361da97763 1 /* Copyright (C) 2002 Jean-Marc Valin */
TMBOY 47:9e361da97763 2 /**
TMBOY 47:9e361da97763 3 @file ltp_sse.h
TMBOY 47:9e361da97763 4 @brief Long-Term Prediction functions (SSE version)
TMBOY 47:9e361da97763 5 */
TMBOY 47:9e361da97763 6 /*
TMBOY 47:9e361da97763 7 Redistribution and use in source and binary forms, with or without
TMBOY 47:9e361da97763 8 modification, are permitted provided that the following conditions
TMBOY 47:9e361da97763 9 are met:
TMBOY 47:9e361da97763 10
TMBOY 47:9e361da97763 11 - Redistributions of source code must retain the above copyright
TMBOY 47:9e361da97763 12 notice, this list of conditions and the following disclaimer.
TMBOY 47:9e361da97763 13
TMBOY 47:9e361da97763 14 - Redistributions in binary form must reproduce the above copyright
TMBOY 47:9e361da97763 15 notice, this list of conditions and the following disclaimer in the
TMBOY 47:9e361da97763 16 documentation and/or other materials provided with the distribution.
TMBOY 47:9e361da97763 17
TMBOY 47:9e361da97763 18 - Neither the name of the Xiph.org Foundation nor the names of its
TMBOY 47:9e361da97763 19 contributors may be used to endorse or promote products derived from
TMBOY 47:9e361da97763 20 this software without specific prior written permission.
TMBOY 47:9e361da97763 21
TMBOY 47:9e361da97763 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
TMBOY 47:9e361da97763 23 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
TMBOY 47:9e361da97763 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
TMBOY 47:9e361da97763 25 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
TMBOY 47:9e361da97763 26 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
TMBOY 47:9e361da97763 27 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
TMBOY 47:9e361da97763 28 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
TMBOY 47:9e361da97763 29 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
TMBOY 47:9e361da97763 30 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
TMBOY 47:9e361da97763 31 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
TMBOY 47:9e361da97763 32 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
TMBOY 47:9e361da97763 33 */
TMBOY 47:9e361da97763 34
TMBOY 47:9e361da97763 35 #include <xmmintrin.h>
TMBOY 47:9e361da97763 36
TMBOY 47:9e361da97763 37 #define OVERRIDE_INNER_PROD
TMBOY 47:9e361da97763 38 float inner_prod(const float *a, const float *b, int len)
TMBOY 47:9e361da97763 39 {
TMBOY 47:9e361da97763 40 int i;
TMBOY 47:9e361da97763 41 float ret;
TMBOY 47:9e361da97763 42 __m128 sum = _mm_setzero_ps();
TMBOY 47:9e361da97763 43 for (i=0;i<(len>>2);i+=2)
TMBOY 47:9e361da97763 44 {
TMBOY 47:9e361da97763 45 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0)));
TMBOY 47:9e361da97763 46 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4)));
TMBOY 47:9e361da97763 47 a += 8;
TMBOY 47:9e361da97763 48 b += 8;
TMBOY 47:9e361da97763 49 }
TMBOY 47:9e361da97763 50 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
TMBOY 47:9e361da97763 51 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
TMBOY 47:9e361da97763 52 _mm_store_ss(&ret, sum);
TMBOY 47:9e361da97763 53 return ret;
TMBOY 47:9e361da97763 54 }
TMBOY 47:9e361da97763 55
TMBOY 47:9e361da97763 56 #define OVERRIDE_PITCH_XCORR
TMBOY 47:9e361da97763 57 void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
TMBOY 47:9e361da97763 58 {
TMBOY 47:9e361da97763 59 int i, offset;
TMBOY 47:9e361da97763 60 VARDECL(__m128 *x);
TMBOY 47:9e361da97763 61 VARDECL(__m128 *y);
TMBOY 47:9e361da97763 62 int N, L;
TMBOY 47:9e361da97763 63 N = len>>2;
TMBOY 47:9e361da97763 64 L = nb_pitch>>2;
TMBOY 47:9e361da97763 65 ALLOC(x, N, __m128);
TMBOY 47:9e361da97763 66 ALLOC(y, N+L, __m128);
TMBOY 47:9e361da97763 67 for (i=0;i<N;i++)
TMBOY 47:9e361da97763 68 x[i] = _mm_loadu_ps(_x+(i<<2));
TMBOY 47:9e361da97763 69 for (offset=0;offset<4;offset++)
TMBOY 47:9e361da97763 70 {
TMBOY 47:9e361da97763 71 for (i=0;i<N+L;i++)
TMBOY 47:9e361da97763 72 y[i] = _mm_loadu_ps(_y+(i<<2)+offset);
TMBOY 47:9e361da97763 73 for (i=0;i<L;i++)
TMBOY 47:9e361da97763 74 {
TMBOY 47:9e361da97763 75 int j;
TMBOY 47:9e361da97763 76 __m128 sum, *xx, *yy;
TMBOY 47:9e361da97763 77 sum = _mm_setzero_ps();
TMBOY 47:9e361da97763 78 yy = y+i;
TMBOY 47:9e361da97763 79 xx = x;
TMBOY 47:9e361da97763 80 for (j=0;j<N;j+=2)
TMBOY 47:9e361da97763 81 {
TMBOY 47:9e361da97763 82 sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0]));
TMBOY 47:9e361da97763 83 sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1]));
TMBOY 47:9e361da97763 84 xx += 2;
TMBOY 47:9e361da97763 85 yy += 2;
TMBOY 47:9e361da97763 86 }
TMBOY 47:9e361da97763 87 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
TMBOY 47:9e361da97763 88 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
TMBOY 47:9e361da97763 89 _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum);
TMBOY 47:9e361da97763 90 }
TMBOY 47:9e361da97763 91 }
TMBOY 47:9e361da97763 92 }