bowen liu
/
mbed-os-example-blinky
ex
Fork of mbed-os-example-mbed5-blinky by
Embed:
(wiki syntax)
Show/hide line numbers
ltp_sse.h
Go to the documentation of this file.
00001 /* Copyright (C) 2002 Jean-Marc Valin */ 00002 /** 00003 @file ltp_sse.h 00004 @brief Long-Term Prediction functions (SSE version) 00005 */ 00006 /* 00007 Redistribution and use in source and binary forms, with or without 00008 modification, are permitted provided that the following conditions 00009 are met: 00010 00011 - Redistributions of source code must retain the above copyright 00012 notice, this list of conditions and the following disclaimer. 00013 00014 - Redistributions in binary form must reproduce the above copyright 00015 notice, this list of conditions and the following disclaimer in the 00016 documentation and/or other materials provided with the distribution. 00017 00018 - Neither the name of the Xiph.org Foundation nor the names of its 00019 contributors may be used to endorse or promote products derived from 00020 this software without specific prior written permission. 00021 00022 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00023 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00024 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00025 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 00026 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00027 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00028 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00029 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00030 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00031 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00032 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 */ 00034 00035 #include <xmmintrin.h> 00036 00037 #define OVERRIDE_INNER_PROD 00038 float inner_prod(const float *a, const float *b, int len) 00039 { 00040 int i; 00041 float ret; 00042 __m128 sum = _mm_setzero_ps(); 00043 for (i=0;i<(len>>2);i+=2) 00044 { 00045 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0))); 00046 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4))); 00047 a += 8; 00048 b += 8; 00049 } 00050 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); 00051 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); 00052 _mm_store_ss(&ret, sum); 00053 return ret; 00054 } 00055 00056 #define OVERRIDE_PITCH_XCORR 00057 void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack) 00058 { 00059 int i, offset; 00060 VARDECL(__m128 *x); 00061 VARDECL(__m128 *y); 00062 int N, L; 00063 N = len>>2; 00064 L = nb_pitch>>2; 00065 ALLOC(x, N, __m128); 00066 ALLOC(y, N+L, __m128); 00067 for (i=0;i<N;i++) 00068 x[i] = _mm_loadu_ps(_x+(i<<2)); 00069 for (offset=0;offset<4;offset++) 00070 { 00071 for (i=0;i<N+L;i++) 00072 y[i] = _mm_loadu_ps(_y+(i<<2)+offset); 00073 for (i=0;i<L;i++) 00074 { 00075 int j; 00076 __m128 sum, *xx, *yy; 00077 sum = _mm_setzero_ps(); 00078 yy = y+i; 00079 xx = x; 00080 for (j=0;j<N;j+=2) 00081 { 00082 sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0])); 00083 sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1])); 00084 xx += 2; 00085 yy += 2; 00086 } 00087 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); 00088 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); 00089 _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum); 00090 } 00091 } 00092 }
Generated on Tue Jul 12 2022 16:28:53 by 1.7.2