bowen liu
/
mbed-os-example-blinky
ex
Fork of mbed-os-example-mbed5-blinky by
Embed:
(wiki syntax)
Show/hide line numbers
vq_sse.h
Go to the documentation of this file.
00001 /* Copyright (C) 2004 Jean-Marc Valin */ 00002 /** 00003 @file vq_sse.h 00004 @brief SSE-optimized vq routine 00005 */ 00006 /* 00007 Redistribution and use in source and binary forms, with or without 00008 modification, are permitted provided that the following conditions 00009 are met: 00010 00011 - Redistributions of source code must retain the above copyright 00012 notice, this list of conditions and the following disclaimer. 00013 00014 - Redistributions in binary form must reproduce the above copyright 00015 notice, this list of conditions and the following disclaimer in the 00016 documentation and/or other materials provided with the distribution. 00017 00018 - Neither the name of the Xiph.org Foundation nor the names of its 00019 contributors may be used to endorse or promote products derived from 00020 this software without specific prior written permission. 00021 00022 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00023 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00024 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00025 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 00026 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00027 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00028 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00029 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00030 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00031 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00032 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 */ 00034 00035 #define OVERRIDE_VQ_NBEST 00036 void vq_nbest(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack) 00037 { 00038 int i,j,k,used; 00039 VARDECL(float *dist); 00040 VARDECL(__m128 *in); 00041 __m128 half; 00042 used = 0; 00043 ALLOC(dist, entries, float); 00044 half = _mm_set_ps1(.5f); 00045 ALLOC(in, len, __m128); 00046 for (i=0;i<len;i++) 00047 in[i] = _mm_set_ps1(_in[i]); 00048 for (i=0;i<entries>>2;i++) 00049 { 00050 __m128 d = _mm_mul_ps(E[i], half); 00051 for (j=0;j<len;j++) 00052 d = _mm_sub_ps(d, _mm_mul_ps(in[j], *codebook++)); 00053 _mm_storeu_ps(dist+4*i, d); 00054 } 00055 for (i=0;i<entries;i++) 00056 { 00057 if (i<N || dist[i]<best_dist[N-1]) 00058 { 00059 for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--) 00060 { 00061 best_dist[k]=best_dist[k-1]; 00062 nbest[k] = nbest[k-1]; 00063 } 00064 best_dist[k]=dist[i]; 00065 nbest[k]=i; 00066 used++; 00067 } 00068 } 00069 } 00070 00071 00072 00073 00074 #define OVERRIDE_VQ_NBEST_SIGN 00075 void vq_nbest_sign(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack) 00076 { 00077 int i,j,k,used; 00078 VARDECL(float *dist); 00079 VARDECL(__m128 *in); 00080 __m128 half; 00081 used = 0; 00082 ALLOC(dist, entries, float); 00083 half = _mm_set_ps1(.5f); 00084 ALLOC(in, len, __m128); 00085 for (i=0;i<len;i++) 00086 in[i] = _mm_set_ps1(_in[i]); 00087 for (i=0;i<entries>>2;i++) 00088 { 00089 __m128 d = _mm_setzero_ps(); 00090 for (j=0;j<len;j++) 00091 d = _mm_add_ps(d, _mm_mul_ps(in[j], *codebook++)); 00092 _mm_storeu_ps(dist+4*i, d); 00093 } 00094 for (i=0;i<entries;i++) 00095 { 00096 int sign; 00097 if (dist[i]>0) 00098 { 00099 sign=0; 00100 dist[i]=-dist[i]; 00101 } else 00102 { 00103 sign=1; 00104 } 00105 dist[i] += .5f*((float*)E)[i]; 00106 if (i<N || dist[i]<best_dist[N-1]) 00107 { 00108 for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--) 00109 { 00110 best_dist[k]=best_dist[k-1]; 00111 nbest[k] = nbest[k-1]; 00112 } 00113 best_dist[k]=dist[i]; 00114 nbest[k]=i; 00115 used++; 00116 if (sign) 00117 nbest[k]+=entries; 00118 } 00119 } 00120 }
Generated on Tue Jul 12 2022 16:28:54 by 1.7.2