ex

Fork of mbed-os-example-mbed5-blinky by mbed-os-examples

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers vq_sse.h Source File

vq_sse.h

Go to the documentation of this file.
00001 /* Copyright (C) 2004 Jean-Marc Valin */
00002 /**
00003    @file vq_sse.h
00004    @brief SSE-optimized vq routine
00005 */
00006 /*
00007    Redistribution and use in source and binary forms, with or without
00008    modification, are permitted provided that the following conditions
00009    are met:
00010    
00011    - Redistributions of source code must retain the above copyright
00012    notice, this list of conditions and the following disclaimer.
00013    
00014    - Redistributions in binary form must reproduce the above copyright
00015    notice, this list of conditions and the following disclaimer in the
00016    documentation and/or other materials provided with the distribution.
00017    
00018    - Neither the name of the Xiph.org Foundation nor the names of its
00019    contributors may be used to endorse or promote products derived from
00020    this software without specific prior written permission.
00021    
00022    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00023    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00024    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00025    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
00026    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00027    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00028    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00029    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00030    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00031    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00032    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033 */
00034 
00035 #define OVERRIDE_VQ_NBEST
00036 void vq_nbest(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
00037 {
00038    int i,j,k,used;
00039    VARDECL(float *dist);
00040    VARDECL(__m128 *in);
00041    __m128 half;
00042    used = 0;
00043    ALLOC(dist, entries, float);
00044    half = _mm_set_ps1(.5f);
00045    ALLOC(in, len, __m128);
00046    for (i=0;i<len;i++)
00047       in[i] = _mm_set_ps1(_in[i]);
00048    for (i=0;i<entries>>2;i++)
00049    {
00050       __m128 d = _mm_mul_ps(E[i], half);
00051       for (j=0;j<len;j++)
00052          d = _mm_sub_ps(d, _mm_mul_ps(in[j], *codebook++));
00053       _mm_storeu_ps(dist+4*i, d);
00054    }
00055    for (i=0;i<entries;i++)
00056    {
00057       if (i<N || dist[i]<best_dist[N-1])
00058       {
00059          for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
00060          {
00061             best_dist[k]=best_dist[k-1];
00062             nbest[k] = nbest[k-1];
00063          }
00064          best_dist[k]=dist[i];
00065          nbest[k]=i;
00066          used++;
00067       }
00068    }
00069 }
00070 
00071 
00072 
00073 
00074 #define OVERRIDE_VQ_NBEST_SIGN
00075 void vq_nbest_sign(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
00076 {
00077    int i,j,k,used;
00078    VARDECL(float *dist);
00079    VARDECL(__m128 *in);
00080    __m128 half;
00081    used = 0;
00082    ALLOC(dist, entries, float);
00083    half = _mm_set_ps1(.5f);
00084    ALLOC(in, len, __m128);
00085    for (i=0;i<len;i++)
00086       in[i] = _mm_set_ps1(_in[i]);
00087    for (i=0;i<entries>>2;i++)
00088    {
00089       __m128 d = _mm_setzero_ps();
00090       for (j=0;j<len;j++)
00091          d = _mm_add_ps(d, _mm_mul_ps(in[j], *codebook++));
00092       _mm_storeu_ps(dist+4*i, d);
00093    }
00094    for (i=0;i<entries;i++)
00095    {
00096       int sign;
00097       if (dist[i]>0)
00098       {
00099          sign=0;
00100          dist[i]=-dist[i];
00101       } else
00102       {
00103          sign=1;
00104       }
00105       dist[i] += .5f*((float*)E)[i];
00106       if (i<N || dist[i]<best_dist[N-1])
00107       {
00108          for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
00109          {
00110             best_dist[k]=best_dist[k-1];
00111             nbest[k] = nbest[k-1];
00112          }
00113          best_dist[k]=dist[i];
00114          nbest[k]=i;
00115          used++;
00116          if (sign)
00117             nbest[k]+=entries;
00118       }
00119    }
00120 }