bowen liu
/
mbed-os-example-blinky
ex
Fork of mbed-os-example-mbed5-blinky by
Embed:
(wiki syntax)
Show/hide line numbers
ltp_bfin.h
Go to the documentation of this file.
00001 /* Copyright (C) 2005 Analog Devices */ 00002 /** 00003 @file ltp_bfin.h 00004 @author Jean-Marc Valin 00005 @brief Long-Term Prediction functions (Blackfin version) 00006 */ 00007 /* 00008 Redistribution and use in source and binary forms, with or without 00009 modification, are permitted provided that the following conditions 00010 are met: 00011 00012 - Redistributions of source code must retain the above copyright 00013 notice, this list of conditions and the following disclaimer. 00014 00015 - Redistributions in binary form must reproduce the above copyright 00016 notice, this list of conditions and the following disclaimer in the 00017 documentation and/or other materials provided with the distribution. 00018 00019 - Neither the name of the Xiph.org Foundation nor the names of its 00020 contributors may be used to endorse or promote products derived from 00021 this software without specific prior written permission. 00022 00023 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00024 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00025 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00026 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 00027 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00028 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00029 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00030 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00031 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00032 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00033 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00034 */ 00035 00036 #define OVERRIDE_INNER_PROD 00037 spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) 00038 { 00039 spx_word32_t sum=0; 00040 __asm__ __volatile__ ( 00041 "P0 = %3;\n\t" 00042 "P1 = %1;\n\t" 00043 "P2 = %2;\n\t" 00044 "I0 = P1;\n\t" 00045 "I1 = P2;\n\t" 00046 "L0 = 0;\n\t" 00047 "L1 = 0;\n\t" 00048 "A0 = 0;\n\t" 00049 "R0.L = W[I0++] || R1.L = W[I1++];\n\t" 00050 "LOOP inner%= LC0 = P0;\n\t" 00051 "LOOP_BEGIN inner%=;\n\t" 00052 "A0 += R0.L*R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" 00053 "LOOP_END inner%=;\n\t" 00054 "A0 += R0.L*R1.L (IS);\n\t" 00055 "A0 = A0 >>> 6;\n\t" 00056 "R0 = A0;\n\t" 00057 "%0 = R0;\n\t" 00058 : "=m" (sum) 00059 : "m" (x), "m" (y), "d" (len-1) 00060 : "P0", "P1", "P2", "R0", "R1", "A0", "I0", "I1", "L0", "L1", "R3" 00061 ); 00062 return sum; 00063 } 00064 00065 #define OVERRIDE_PITCH_XCORR 00066 void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack) 00067 { 00068 corr += nb_pitch - 1; 00069 __asm__ __volatile__ ( 00070 "P2 = %0;\n\t" 00071 "I0 = P2;\n\t" /* x in I0 */ 00072 "B0 = P2;\n\t" /* x in B0 */ 00073 "R0 = %3;\n\t" /* len in R0 */ 00074 "P3 = %3;\n\t" 00075 "P3 += -2;\n\t" /* len in R0 */ 00076 "P4 = %4;\n\t" /* nb_pitch in R0 */ 00077 "R1 = R0 << 1;\n\t" /* number of bytes in x */ 00078 "L0 = R1;\n\t" 00079 "P0 = %1;\n\t" 00080 00081 "P1 = %2;\n\t" 00082 "B1 = P1;\n\t" 00083 "L1 = 0;\n\t" /*Disable looping on I1*/ 00084 00085 "r0 = [I0++];\n\t" 00086 "LOOP pitch%= LC0 = P4 >> 1;\n\t" 00087 "LOOP_BEGIN pitch%=;\n\t" 00088 "I1 = P0;\n\t" 00089 "A1 = A0 = 0;\n\t" 00090 "R1 = [I1++];\n\t" 00091 "LOOP inner_prod%= LC1 = P3 >> 1;\n\t" 00092 "LOOP_BEGIN inner_prod%=;\n\t" 00093 "A1 += R0.L*R1.H, A0 += R0.L*R1.L (IS) || R1.L = W[I1++];\n\t" 00094 "A1 += R0.H*R1.L, A0 += R0.H*R1.H (IS) || R1.H = W[I1++] || R0 = [I0++];\n\t" 00095 "LOOP_END inner_prod%=;\n\t" 00096 "A1 += R0.L*R1.H, A0 += R0.L*R1.L (IS) || R1.L = W[I1++];\n\t" 00097 "A1 += R0.H*R1.L, A0 += R0.H*R1.H (IS) || R0 = [I0++];\n\t" 00098 "A0 = A0 >>> 6;\n\t" 00099 "A1 = A1 >>> 6;\n\t" 00100 "R2 = A0, R3 = A1;\n\t" 00101 "[P1--] = r2;\n\t" 00102 "[P1--] = r3;\n\t" 00103 "P0 += 4;\n\t" 00104 "LOOP_END pitch%=;\n\t" 00105 "L0 = 0;\n\t" 00106 : : "m" (_x), "m" (_y), "m" (corr), "m" (len), "m" (nb_pitch) 00107 : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "I0", "I1", "L0", "L1", "B0", "B1", "memory" 00108 ); 00109 } 00110 00111 #define OVERRIDE_COMPUTE_PITCH_ERROR 00112 static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g, spx_word16_t pitch_control) 00113 { 00114 spx_word32_t sum; 00115 __asm__ __volatile__ 00116 ( 00117 "A0 = 0;\n\t" 00118 00119 "R0 = W[%1++];\n\t" 00120 "R1.L = %2.L*%5.L (IS);\n\t" 00121 "A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t" 00122 00123 "R1.L = %3.L*%5.L (IS);\n\t" 00124 "A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t" 00125 00126 "R1.L = %4.L*%5.L (IS);\n\t" 00127 "A0 += R1.L*R0.L (IS) || R0 = W[%1++];\n\t" 00128 00129 "R1.L = %2.L*%3.L (IS);\n\t" 00130 "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" 00131 00132 "R1.L = %4.L*%3.L (IS);\n\t" 00133 "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" 00134 00135 "R1.L = %4.L*%2.L (IS);\n\t" 00136 "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" 00137 00138 "R1.L = %2.L*%2.L (IS);\n\t" 00139 "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" 00140 00141 "R1.L = %3.L*%3.L (IS);\n\t" 00142 "A0 -= R1.L*R0.L (IS) || R0 = W[%1++];\n\t" 00143 00144 "R1.L = %4.L*%4.L (IS);\n\t" 00145 "A0 -= R1.L*R0.L (IS);\n\t" 00146 00147 "%0 = A0;\n\t" 00148 : "=&D" (sum), "=a" (C) 00149 : "d" (g[0]), "d" (g[1]), "d" (g[2]), "d" (pitch_control), "1" (C) 00150 : "R0", "R1", "R2", "A0" 00151 ); 00152 return sum; 00153 } 00154 00155 #define OVERRIDE_OPEN_LOOP_NBEST_PITCH 00156 #ifdef OVERRIDE_OPEN_LOOP_NBEST_PITCH 00157 void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack) 00158 { 00159 int i,j,k; 00160 VARDECL(spx_word32_t *best_score); 00161 VARDECL(spx_word32_t *best_ener); 00162 spx_word32_t e0; 00163 VARDECL(spx_word32_t *corr); 00164 VARDECL(spx_word32_t *energy); 00165 00166 ALLOC(best_score, N, spx_word32_t); 00167 ALLOC(best_ener, N, spx_word32_t); 00168 ALLOC(corr, end-start+1, spx_word32_t); 00169 ALLOC(energy, end-start+2, spx_word32_t); 00170 00171 for (i=0;i<N;i++) 00172 { 00173 best_score[i]=-1; 00174 best_ener[i]=0; 00175 pitch[i]=start; 00176 } 00177 00178 energy[0]=inner_prod(sw-start, sw-start, len); 00179 e0=inner_prod(sw, sw, len); 00180 00181 /* energy update -------------------------------------*/ 00182 00183 __asm__ __volatile__ 00184 ( 00185 " P0 = %0;\n\t" 00186 " I1 = %1;\n\t" 00187 " L1 = 0;\n\t" 00188 " I2 = %2;\n\t" 00189 " L2 = 0;\n\t" 00190 " R2 = [P0++];\n\t" 00191 " R3 = 0;\n\t" 00192 " LSETUP (eu1, eu2) LC1 = %3;\n\t" 00193 "eu1: R1.L = W [I1--] || R0.L = W [I2--] ;\n\t" 00194 " R1 = R1.L * R1.L (IS);\n\t" 00195 " R0 = R0.L * R0.L (IS);\n\t" 00196 " R1 >>>= 6;\n\t" 00197 " R1 = R1 + R2;\n\t" 00198 " R0 >>>= 6;\n\t" 00199 " R1 = R1 - R0;\n\t" 00200 " R2 = MAX(R1,R3);\n\t" 00201 "eu2: [P0++] = R2;\n\t" 00202 : : "d" (energy), "d" (&sw[-start-1]), "d" (&sw[-start+len-1]), 00203 "a" (end-start) 00204 : "P0", "I1", "I2", "R0", "R1", "R2", "R3" 00205 #if (__GNUC__ == 4) 00206 , "LC1" 00207 #endif 00208 ); 00209 00210 pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack); 00211 00212 /* FIXME: Fixed-point and floating-point code should be merged */ 00213 { 00214 VARDECL(spx_word16_t *corr16); 00215 VARDECL(spx_word16_t *ener16); 00216 ALLOC(corr16, end-start+1, spx_word16_t); 00217 ALLOC(ener16, end-start+1, spx_word16_t); 00218 /* Normalize to 180 so we can square it and it still fits in 16 bits */ 00219 normalize16(corr, corr16, 180, end-start+1); 00220 normalize16(energy, ener16, 180, end-start+1); 00221 00222 if (N == 1) { 00223 /* optimised asm to handle N==1 case */ 00224 __asm__ __volatile__ 00225 ( 00226 " I0 = %1;\n\t" /* I0: corr16[] */ 00227 " L0 = 0;\n\t" 00228 " I1 = %2;\n\t" /* I1: energy */ 00229 " L1 = 0;\n\t" 00230 " R2 = -1;\n\t" /* R2: best score */ 00231 " R3 = 0;\n\t" /* R3: best energy */ 00232 " P0 = %4;\n\t" /* P0: best pitch */ 00233 " P1 = %4;\n\t" /* P1: counter */ 00234 " LSETUP (sl1, sl2) LC1 = %3;\n\t" 00235 "sl1: R0.L = W [I0++] || R1.L = W [I1++];\n\t" 00236 " R0 = R0.L * R0.L (IS);\n\t" 00237 " R1 += 1;\n\t" 00238 " R4 = R0.L * R3.L;\n\t" 00239 " R5 = R2.L * R1.L;\n\t" 00240 " cc = R5 < R4;\n\t" 00241 " if cc R2 = R0;\n\t" 00242 " if cc R3 = R1;\n\t" 00243 " if cc P0 = P1;\n\t" 00244 "sl2: P1 += 1;\n\t" 00245 " %0 = P0;\n\t" 00246 : "=&d" (pitch[0]) 00247 : "a" (corr16), "a" (ener16), "a" (end+1-start), "d" (start) 00248 : "P0", "P1", "I0", "I1", "R0", "R1", "R2", "R3", "R4", "R5" 00249 #if (__GNUC__ == 4) 00250 , "LC1" 00251 #endif 00252 ); 00253 00254 } 00255 else { 00256 for (i=start;i<=end;i++) 00257 { 00258 spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]); 00259 /* Instead of dividing the tmp by the energy, we multiply on the other side */ 00260 if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start]))) 00261 { 00262 /* We can safely put it last and then check */ 00263 best_score[N-1]=tmp; 00264 best_ener[N-1]=ener16[i-start]+1; 00265 pitch[N-1]=i; 00266 /* Check if it comes in front of others */ 00267 for (j=0;j<N-1;j++) 00268 { 00269 if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start]))) 00270 { 00271 for (k=N-1;k>j;k--) 00272 { 00273 best_score[k]=best_score[k-1]; 00274 best_ener[k]=best_ener[k-1]; 00275 pitch[k]=pitch[k-1]; 00276 } 00277 best_score[j]=tmp; 00278 best_ener[j]=ener16[i-start]+1; 00279 pitch[j]=i; 00280 break; 00281 } 00282 } 00283 } 00284 } 00285 } 00286 } 00287 00288 /* Compute open-loop gain */ 00289 if (gain) 00290 { 00291 for (j=0;j<N;j++) 00292 { 00293 spx_word16_t g; 00294 i=pitch[j]; 00295 g = DIV32(corr[i-start], 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(energy[i-start])),6)); 00296 /* FIXME: g = max(g,corr/energy) */ 00297 if (g<0) 00298 g = 0; 00299 gain[j]=g; 00300 } 00301 } 00302 } 00303 #endif 00304 00305 #define OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ 00306 #ifdef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ 00307 static int pitch_gain_search_3tap_vq( 00308 const signed char *gain_cdbk, 00309 int gain_cdbk_size, 00310 spx_word16_t *C16, 00311 spx_word16_t max_gain 00312 ) 00313 { 00314 const signed char *ptr=gain_cdbk; 00315 int best_cdbk=0; 00316 spx_word32_t best_sum=-VERY_LARGE32; 00317 spx_word32_t sum=0; 00318 spx_word16_t g[3]; 00319 spx_word16_t pitch_control=64; 00320 spx_word16_t gain_sum; 00321 int i; 00322 00323 /* fast asm version of VQ codebook search */ 00324 00325 __asm__ __volatile__ 00326 ( 00327 00328 " P0 = %2;\n\t" /* P0: ptr to gain_cdbk */ 00329 " L1 = 0;\n\t" /* no circ addr for L1 */ 00330 " %0 = 0;\n\t" /* %0: best_sum */ 00331 " %1 = 0;\n\t" /* %1: best_cbdk */ 00332 " P1 = 0;\n\t" /* P1: loop counter */ 00333 00334 " LSETUP (pgs1, pgs2) LC1 = %4;\n\t" 00335 "pgs1: R2 = B [P0++] (X);\n\t" /* R2: g[0] */ 00336 " R3 = B [P0++] (X);\n\t" /* R3: g[1] */ 00337 " R4 = B [P0++] (X);\n\t" /* R4: g[2] */ 00338 " R2 += 32;\n\t" 00339 " R3 += 32;\n\t" 00340 " R4 += 32;\n\t" 00341 " R4.H = 64;\n\t" /* R4.H: pitch_control */ 00342 00343 " R0 = B [P0++] (X);\n\t" 00344 " B0 = R0;\n\t" /* BO: gain_sum */ 00345 00346 /* compute_pitch_error() -------------------------------*/ 00347 00348 " I1 = %3;\n\t" /* I1: ptr to C */ 00349 " A0 = 0;\n\t" 00350 00351 " R0.L = W[I1++];\n\t" 00352 " R1.L = R2.L*R4.H (IS);\n\t" 00353 " A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" 00354 00355 " R1.L = R3.L*R4.H (IS);\n\t" 00356 " A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" 00357 00358 " R1.L = R4.L*R4.H (IS);\n\t" 00359 " A0 += R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" 00360 00361 " R1.L = R2.L*R3.L (IS);\n\t" 00362 " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" 00363 00364 " R1.L = R4.L*R3.L (IS);\n\t" 00365 " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" 00366 00367 " R1.L = R4.L*R2.L (IS);\n\t" 00368 " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" 00369 00370 " R1.L = R2.L*R2.L (IS);\n\t" 00371 " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" 00372 00373 " R1.L = R3.L*R3.L (IS);\n\t" 00374 " A0 -= R1.L*R0.L (IS) || R0.L = W[I1++];\n\t" 00375 00376 " R1.L = R4.L*R4.L (IS);\n\t" 00377 " R0 = (A0 -= R1.L*R0.L) (IS);\n\t" 00378 00379 /* 00380 Re-arrange the if-then to code efficiently on the Blackfin: 00381 00382 if (sum>best_sum && gain_sum<=max_gain) ------ (1) 00383 00384 if (sum>best_sum && !(gain_sum>max_gain)) ------ (2) 00385 00386 if (max_gain<=gain_sum) { ------ (3) 00387 sum = -VERY_LARGE32; 00388 } 00389 if (best_sum<=sum) 00390 00391 The blackin cc instructions are all of the form: 00392 00393 cc = x < y (or cc = x <= y) 00394 */ 00395 " R1 = B0\n\t" 00396 " R2 = %5\n\t" 00397 " R3 = %6\n\t" 00398 " cc = R2 <= R1;\n\t" 00399 " if cc R0 = R3;\n\t" 00400 " cc = %0 <= R0;\n\t" 00401 " if cc %0 = R0;\n\t" 00402 " if cc %1 = P1;\n\t" 00403 00404 "pgs2: P1 += 1;\n\t" 00405 00406 : "=&d" (best_sum), "=&d" (best_cdbk) 00407 : "a" (gain_cdbk), "a" (C16), "a" (gain_cdbk_size), "a" (max_gain), 00408 "b" (-VERY_LARGE32) 00409 : "R0", "R1", "R2", "R3", "R4", "P0", 00410 "P1", "I1", "L1", "A0", "B0" 00411 #if (__GNUC__ == 4) 00412 , "LC1" 00413 #endif 00414 ); 00415 00416 return best_cdbk; 00417 } 00418 #endif 00419
Generated on Tue Jul 12 2022 16:28:53 by 1.7.2