ex

Fork of mbed-os-example-mbed5-blinky by mbed-os-examples

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers ltp_sse.h Source File

ltp_sse.h

Go to the documentation of this file.
00001 /* Copyright (C) 2002 Jean-Marc Valin */
00002 /**
00003    @file ltp_sse.h
00004    @brief Long-Term Prediction functions (SSE version)
00005 */
00006 /*
00007    Redistribution and use in source and binary forms, with or without
00008    modification, are permitted provided that the following conditions
00009    are met:
00010    
00011    - Redistributions of source code must retain the above copyright
00012    notice, this list of conditions and the following disclaimer.
00013    
00014    - Redistributions in binary form must reproduce the above copyright
00015    notice, this list of conditions and the following disclaimer in the
00016    documentation and/or other materials provided with the distribution.
00017    
00018    - Neither the name of the Xiph.org Foundation nor the names of its
00019    contributors may be used to endorse or promote products derived from
00020    this software without specific prior written permission.
00021    
00022    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00023    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00024    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00025    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
00026    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00027    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00028    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00029    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00030    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00031    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00032    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 */
00034 
00035 #include <xmmintrin.h>
00036 
00037 #define OVERRIDE_INNER_PROD
00038 float inner_prod(const float *a, const float *b, int len)
00039 {
00040    int i;
00041    float ret;
00042    __m128 sum = _mm_setzero_ps();
00043    for (i=0;i<(len>>2);i+=2)
00044    {
00045       sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0)));
00046       sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4)));
00047       a += 8;
00048       b += 8;
00049    }
00050    sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
00051    sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
00052    _mm_store_ss(&ret, sum);
00053    return ret;
00054 }
00055 
00056 #define OVERRIDE_PITCH_XCORR
00057 void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
00058 {
00059    int i, offset;
00060    VARDECL(__m128 *x);
00061    VARDECL(__m128 *y);
00062    int N, L;
00063    N = len>>2;
00064    L = nb_pitch>>2;
00065    ALLOC(x, N, __m128);
00066    ALLOC(y, N+L, __m128);
00067    for (i=0;i<N;i++)
00068       x[i] = _mm_loadu_ps(_x+(i<<2));
00069    for (offset=0;offset<4;offset++)
00070    {
00071       for (i=0;i<N+L;i++)
00072          y[i] = _mm_loadu_ps(_y+(i<<2)+offset);
00073       for (i=0;i<L;i++)
00074       {
00075          int j;
00076          __m128 sum, *xx, *yy;
00077          sum = _mm_setzero_ps();
00078          yy = y+i;
00079          xx = x;
00080          for (j=0;j<N;j+=2)
00081          {
00082             sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0]));
00083             sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1]));
00084             xx += 2;
00085             yy += 2;
00086          }
00087          sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
00088          sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
00089          _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum);
00090       }
00091    }
00092 }