MP3 Player. You can change fwd/rev speed and skip. see: http://mbed.org/users/okini3939/notebook/lpc4088_madplayer/
Dependencies: I2SSlave SDFileSystem TLV320 mbed
madplayer/fixed.h@0:8ba6230eefbd, 2014-02-18 (annotated)
- Committer:
- okini3939
- Date:
- Tue Feb 18 00:22:50 2014 +0000
- Revision:
- 0:8ba6230eefbd
1st build
Who changed what in which revision?
User | Revision | Line number | New contents of line |
---|---|---|---|
okini3939 | 0:8ba6230eefbd | 1 | /* |
okini3939 | 0:8ba6230eefbd | 2 | * libmad - MPEG audio decoder library |
okini3939 | 0:8ba6230eefbd | 3 | * Copyright (C) 2000-2004 Underbit Technologies, Inc. |
okini3939 | 0:8ba6230eefbd | 4 | * |
okini3939 | 0:8ba6230eefbd | 5 | * This program is free software; you can redistribute it and/or modify |
okini3939 | 0:8ba6230eefbd | 6 | * it under the terms of the GNU General Public License as published by |
okini3939 | 0:8ba6230eefbd | 7 | * the Free Software Foundation; either version 2 of the License, or |
okini3939 | 0:8ba6230eefbd | 8 | * (at your option) any later version. |
okini3939 | 0:8ba6230eefbd | 9 | * |
okini3939 | 0:8ba6230eefbd | 10 | * This program is distributed in the hope that it will be useful, |
okini3939 | 0:8ba6230eefbd | 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
okini3939 | 0:8ba6230eefbd | 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
okini3939 | 0:8ba6230eefbd | 13 | * GNU General Public License for more details. |
okini3939 | 0:8ba6230eefbd | 14 | * |
okini3939 | 0:8ba6230eefbd | 15 | * You should have received a copy of the GNU General Public License |
okini3939 | 0:8ba6230eefbd | 16 | * along with this program; if not, write to the Free Software |
okini3939 | 0:8ba6230eefbd | 17 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
okini3939 | 0:8ba6230eefbd | 18 | * |
okini3939 | 0:8ba6230eefbd | 19 | * $Id: fixed.h,v 1.1 2010/11/23 20:12:57 andy Exp $ |
okini3939 | 0:8ba6230eefbd | 20 | */ |
okini3939 | 0:8ba6230eefbd | 21 | |
okini3939 | 0:8ba6230eefbd | 22 | # ifndef LIBMAD_FIXED_H |
okini3939 | 0:8ba6230eefbd | 23 | # define LIBMAD_FIXED_H |
okini3939 | 0:8ba6230eefbd | 24 | |
okini3939 | 0:8ba6230eefbd | 25 | #include "config.h" |
okini3939 | 0:8ba6230eefbd | 26 | |
okini3939 | 0:8ba6230eefbd | 27 | # if SIZEOF_INT >= 4 |
okini3939 | 0:8ba6230eefbd | 28 | typedef signed int mad_fixed_t; |
okini3939 | 0:8ba6230eefbd | 29 | |
okini3939 | 0:8ba6230eefbd | 30 | typedef signed int mad_fixed64hi_t; |
okini3939 | 0:8ba6230eefbd | 31 | typedef unsigned int mad_fixed64lo_t; |
okini3939 | 0:8ba6230eefbd | 32 | # else |
okini3939 | 0:8ba6230eefbd | 33 | typedef signed long mad_fixed_t; |
okini3939 | 0:8ba6230eefbd | 34 | |
okini3939 | 0:8ba6230eefbd | 35 | typedef signed long mad_fixed64hi_t; |
okini3939 | 0:8ba6230eefbd | 36 | typedef unsigned long mad_fixed64lo_t; |
okini3939 | 0:8ba6230eefbd | 37 | # endif |
okini3939 | 0:8ba6230eefbd | 38 | |
okini3939 | 0:8ba6230eefbd | 39 | # if defined(_MSC_VER) |
okini3939 | 0:8ba6230eefbd | 40 | # define mad_fixed64_t signed __int64 |
okini3939 | 0:8ba6230eefbd | 41 | # elif 1 || defined(__GNUC__) |
okini3939 | 0:8ba6230eefbd | 42 | # define mad_fixed64_t signed long long |
okini3939 | 0:8ba6230eefbd | 43 | # endif |
okini3939 | 0:8ba6230eefbd | 44 | |
okini3939 | 0:8ba6230eefbd | 45 | # if defined(FPM_FLOAT) |
okini3939 | 0:8ba6230eefbd | 46 | typedef double mad_sample_t; |
okini3939 | 0:8ba6230eefbd | 47 | # else |
okini3939 | 0:8ba6230eefbd | 48 | typedef mad_fixed_t mad_sample_t; |
okini3939 | 0:8ba6230eefbd | 49 | # endif |
okini3939 | 0:8ba6230eefbd | 50 | |
okini3939 | 0:8ba6230eefbd | 51 | /* |
okini3939 | 0:8ba6230eefbd | 52 | * Fixed-point format: 0xABBBBBBB |
okini3939 | 0:8ba6230eefbd | 53 | * A == whole part (sign + 3 bits) |
okini3939 | 0:8ba6230eefbd | 54 | * B == fractional part (28 bits) |
okini3939 | 0:8ba6230eefbd | 55 | * |
okini3939 | 0:8ba6230eefbd | 56 | * Values are signed two's complement, so the effective range is: |
okini3939 | 0:8ba6230eefbd | 57 | * 0x80000000 to 0x7fffffff |
okini3939 | 0:8ba6230eefbd | 58 | * -8.0 to +7.9999999962747097015380859375 |
okini3939 | 0:8ba6230eefbd | 59 | * |
okini3939 | 0:8ba6230eefbd | 60 | * The smallest representable value is: |
okini3939 | 0:8ba6230eefbd | 61 | * 0x00000001 == 0.0000000037252902984619140625 (i.e. about 3.725e-9) |
okini3939 | 0:8ba6230eefbd | 62 | * |
okini3939 | 0:8ba6230eefbd | 63 | * 28 bits of fractional accuracy represent about |
okini3939 | 0:8ba6230eefbd | 64 | * 8.6 digits of decimal accuracy. |
okini3939 | 0:8ba6230eefbd | 65 | * |
okini3939 | 0:8ba6230eefbd | 66 | * Fixed-point numbers can be added or subtracted as normal |
okini3939 | 0:8ba6230eefbd | 67 | * integers, but multiplication requires shifting the 64-bit result |
okini3939 | 0:8ba6230eefbd | 68 | * from 56 fractional bits back to 28 (and rounding.) |
okini3939 | 0:8ba6230eefbd | 69 | * |
okini3939 | 0:8ba6230eefbd | 70 | * Changing the definition of MAD_F_FRACBITS is only partially |
okini3939 | 0:8ba6230eefbd | 71 | * supported, and must be done with care. |
okini3939 | 0:8ba6230eefbd | 72 | */ |
okini3939 | 0:8ba6230eefbd | 73 | |
okini3939 | 0:8ba6230eefbd | 74 | # define MAD_F_FRACBITS 28 |
okini3939 | 0:8ba6230eefbd | 75 | |
okini3939 | 0:8ba6230eefbd | 76 | # if MAD_F_FRACBITS == 28 |
okini3939 | 0:8ba6230eefbd | 77 | # define MAD_F(x) ((mad_fixed_t) (x##L)) |
okini3939 | 0:8ba6230eefbd | 78 | # else |
okini3939 | 0:8ba6230eefbd | 79 | # if MAD_F_FRACBITS < 28 |
okini3939 | 0:8ba6230eefbd | 80 | # warning "MAD_F_FRACBITS < 28" |
okini3939 | 0:8ba6230eefbd | 81 | # define MAD_F(x) ((mad_fixed_t) \ |
okini3939 | 0:8ba6230eefbd | 82 | (((x##L) + \ |
okini3939 | 0:8ba6230eefbd | 83 | (1L << (28 - MAD_F_FRACBITS - 1))) >> \ |
okini3939 | 0:8ba6230eefbd | 84 | (28 - MAD_F_FRACBITS))) |
okini3939 | 0:8ba6230eefbd | 85 | # elif MAD_F_FRACBITS > 28 |
okini3939 | 0:8ba6230eefbd | 86 | # error "MAD_F_FRACBITS > 28 not currently supported" |
okini3939 | 0:8ba6230eefbd | 87 | # define MAD_F(x) ((mad_fixed_t) \ |
okini3939 | 0:8ba6230eefbd | 88 | ((x##L) << (MAD_F_FRACBITS - 28))) |
okini3939 | 0:8ba6230eefbd | 89 | # endif |
okini3939 | 0:8ba6230eefbd | 90 | # endif |
okini3939 | 0:8ba6230eefbd | 91 | |
okini3939 | 0:8ba6230eefbd | 92 | # define MAD_F_MIN ((mad_fixed_t) -0x80000000L) |
okini3939 | 0:8ba6230eefbd | 93 | # define MAD_F_MAX ((mad_fixed_t) +0x7fffffffL) |
okini3939 | 0:8ba6230eefbd | 94 | |
okini3939 | 0:8ba6230eefbd | 95 | # define MAD_F_ONE MAD_F(0x10000000) |
okini3939 | 0:8ba6230eefbd | 96 | |
okini3939 | 0:8ba6230eefbd | 97 | # define mad_f_tofixed(x) ((mad_fixed_t) \ |
okini3939 | 0:8ba6230eefbd | 98 | ((x) * (double) (1L << MAD_F_FRACBITS) + 0.5)) |
okini3939 | 0:8ba6230eefbd | 99 | # define mad_f_todouble(x) ((double) \ |
okini3939 | 0:8ba6230eefbd | 100 | ((x) / (double) (1L << MAD_F_FRACBITS))) |
okini3939 | 0:8ba6230eefbd | 101 | |
okini3939 | 0:8ba6230eefbd | 102 | # define mad_f_intpart(x) ((x) >> MAD_F_FRACBITS) |
okini3939 | 0:8ba6230eefbd | 103 | # define mad_f_fracpart(x) ((x) & ((1L << MAD_F_FRACBITS) - 1)) |
okini3939 | 0:8ba6230eefbd | 104 | /* (x should be positive) */ |
okini3939 | 0:8ba6230eefbd | 105 | |
okini3939 | 0:8ba6230eefbd | 106 | # define mad_f_fromint(x) ((x) << MAD_F_FRACBITS) |
okini3939 | 0:8ba6230eefbd | 107 | |
okini3939 | 0:8ba6230eefbd | 108 | # define mad_f_add(x, y) ((x) + (y)) |
okini3939 | 0:8ba6230eefbd | 109 | # define mad_f_sub(x, y) ((x) - (y)) |
okini3939 | 0:8ba6230eefbd | 110 | |
okini3939 | 0:8ba6230eefbd | 111 | # if defined(FPM_FLOAT) |
okini3939 | 0:8ba6230eefbd | 112 | # error "FPM_FLOAT not yet supported" |
okini3939 | 0:8ba6230eefbd | 113 | |
okini3939 | 0:8ba6230eefbd | 114 | # undef MAD_F |
okini3939 | 0:8ba6230eefbd | 115 | # define MAD_F(x) mad_f_todouble(x) |
okini3939 | 0:8ba6230eefbd | 116 | |
okini3939 | 0:8ba6230eefbd | 117 | # define mad_f_mul(x, y) ((x) * (y)) |
okini3939 | 0:8ba6230eefbd | 118 | # define mad_f_scale64 |
okini3939 | 0:8ba6230eefbd | 119 | |
okini3939 | 0:8ba6230eefbd | 120 | # undef ASO_ZEROCHECK |
okini3939 | 0:8ba6230eefbd | 121 | |
okini3939 | 0:8ba6230eefbd | 122 | # elif defined(FPM_64BIT) |
okini3939 | 0:8ba6230eefbd | 123 | |
okini3939 | 0:8ba6230eefbd | 124 | /* |
okini3939 | 0:8ba6230eefbd | 125 | * This version should be the most accurate if 64-bit types are supported by |
okini3939 | 0:8ba6230eefbd | 126 | * the compiler, although it may not be the most efficient. |
okini3939 | 0:8ba6230eefbd | 127 | */ |
okini3939 | 0:8ba6230eefbd | 128 | # if defined(OPT_ACCURACY) |
okini3939 | 0:8ba6230eefbd | 129 | # define mad_f_mul(x, y) \ |
okini3939 | 0:8ba6230eefbd | 130 | ((mad_fixed_t) \ |
okini3939 | 0:8ba6230eefbd | 131 | ((((mad_fixed64_t) (x) * (y)) + \ |
okini3939 | 0:8ba6230eefbd | 132 | (1L << (MAD_F_SCALEBITS - 1))) >> MAD_F_SCALEBITS)) |
okini3939 | 0:8ba6230eefbd | 133 | # else |
okini3939 | 0:8ba6230eefbd | 134 | # define mad_f_mul(x, y) \ |
okini3939 | 0:8ba6230eefbd | 135 | ((mad_fixed_t) (((mad_fixed64_t) (x) * (y)) >> MAD_F_SCALEBITS)) |
okini3939 | 0:8ba6230eefbd | 136 | # endif |
okini3939 | 0:8ba6230eefbd | 137 | |
okini3939 | 0:8ba6230eefbd | 138 | # define MAD_F_SCALEBITS MAD_F_FRACBITS |
okini3939 | 0:8ba6230eefbd | 139 | |
okini3939 | 0:8ba6230eefbd | 140 | /* --- Intel --------------------------------------------------------------- */ |
okini3939 | 0:8ba6230eefbd | 141 | |
okini3939 | 0:8ba6230eefbd | 142 | # elif defined(FPM_INTEL) |
okini3939 | 0:8ba6230eefbd | 143 | |
okini3939 | 0:8ba6230eefbd | 144 | # if defined(_MSC_VER) |
okini3939 | 0:8ba6230eefbd | 145 | # pragma warning(push) |
okini3939 | 0:8ba6230eefbd | 146 | # pragma warning(disable: 4035) /* no return value */ |
okini3939 | 0:8ba6230eefbd | 147 | static __forceinline |
okini3939 | 0:8ba6230eefbd | 148 | mad_fixed_t mad_f_mul_inline(mad_fixed_t x, mad_fixed_t y) |
okini3939 | 0:8ba6230eefbd | 149 | { |
okini3939 | 0:8ba6230eefbd | 150 | enum { |
okini3939 | 0:8ba6230eefbd | 151 | fracbits = MAD_F_FRACBITS |
okini3939 | 0:8ba6230eefbd | 152 | }; |
okini3939 | 0:8ba6230eefbd | 153 | |
okini3939 | 0:8ba6230eefbd | 154 | __asm { |
okini3939 | 0:8ba6230eefbd | 155 | mov eax, x |
okini3939 | 0:8ba6230eefbd | 156 | imul y |
okini3939 | 0:8ba6230eefbd | 157 | shrd eax, edx, fracbits |
okini3939 | 0:8ba6230eefbd | 158 | } |
okini3939 | 0:8ba6230eefbd | 159 | |
okini3939 | 0:8ba6230eefbd | 160 | /* implicit return of eax */ |
okini3939 | 0:8ba6230eefbd | 161 | } |
okini3939 | 0:8ba6230eefbd | 162 | # pragma warning(pop) |
okini3939 | 0:8ba6230eefbd | 163 | |
okini3939 | 0:8ba6230eefbd | 164 | # define mad_f_mul mad_f_mul_inline |
okini3939 | 0:8ba6230eefbd | 165 | # define mad_f_scale64 |
okini3939 | 0:8ba6230eefbd | 166 | # else |
okini3939 | 0:8ba6230eefbd | 167 | /* |
okini3939 | 0:8ba6230eefbd | 168 | * This Intel version is fast and accurate; the disposition of the least |
okini3939 | 0:8ba6230eefbd | 169 | * significant bit depends on OPT_ACCURACY via mad_f_scale64(). |
okini3939 | 0:8ba6230eefbd | 170 | */ |
okini3939 | 0:8ba6230eefbd | 171 | # define MAD_F_MLX(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 172 | asm ("imull %3" \ |
okini3939 | 0:8ba6230eefbd | 173 | : "=a" (lo), "=d" (hi) \ |
okini3939 | 0:8ba6230eefbd | 174 | : "%a" (x), "rm" (y) \ |
okini3939 | 0:8ba6230eefbd | 175 | : "cc") |
okini3939 | 0:8ba6230eefbd | 176 | |
okini3939 | 0:8ba6230eefbd | 177 | # if defined(OPT_ACCURACY) |
okini3939 | 0:8ba6230eefbd | 178 | /* |
okini3939 | 0:8ba6230eefbd | 179 | * This gives best accuracy but is not very fast. |
okini3939 | 0:8ba6230eefbd | 180 | */ |
okini3939 | 0:8ba6230eefbd | 181 | # define MAD_F_MLA(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 182 | ({ mad_fixed64hi_t __hi; \ |
okini3939 | 0:8ba6230eefbd | 183 | mad_fixed64lo_t __lo; \ |
okini3939 | 0:8ba6230eefbd | 184 | MAD_F_MLX(__hi, __lo, (x), (y)); \ |
okini3939 | 0:8ba6230eefbd | 185 | asm ("addl %2,%0\n\t" \ |
okini3939 | 0:8ba6230eefbd | 186 | "adcl %3,%1" \ |
okini3939 | 0:8ba6230eefbd | 187 | : "=rm" (lo), "=rm" (hi) \ |
okini3939 | 0:8ba6230eefbd | 188 | : "r" (__lo), "r" (__hi), "0" (lo), "1" (hi) \ |
okini3939 | 0:8ba6230eefbd | 189 | : "cc"); \ |
okini3939 | 0:8ba6230eefbd | 190 | }) |
okini3939 | 0:8ba6230eefbd | 191 | # endif /* OPT_ACCURACY */ |
okini3939 | 0:8ba6230eefbd | 192 | |
okini3939 | 0:8ba6230eefbd | 193 | # if defined(OPT_ACCURACY) |
okini3939 | 0:8ba6230eefbd | 194 | /* |
okini3939 | 0:8ba6230eefbd | 195 | * Surprisingly, this is faster than SHRD followed by ADC. |
okini3939 | 0:8ba6230eefbd | 196 | */ |
okini3939 | 0:8ba6230eefbd | 197 | # define mad_f_scale64(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 198 | ({ mad_fixed64hi_t __hi_; \ |
okini3939 | 0:8ba6230eefbd | 199 | mad_fixed64lo_t __lo_; \ |
okini3939 | 0:8ba6230eefbd | 200 | mad_fixed_t __result; \ |
okini3939 | 0:8ba6230eefbd | 201 | asm ("addl %4,%2\n\t" \ |
okini3939 | 0:8ba6230eefbd | 202 | "adcl %5,%3" \ |
okini3939 | 0:8ba6230eefbd | 203 | : "=rm" (__lo_), "=rm" (__hi_) \ |
okini3939 | 0:8ba6230eefbd | 204 | : "0" (lo), "1" (hi), \ |
okini3939 | 0:8ba6230eefbd | 205 | "ir" (1L << (MAD_F_SCALEBITS - 1)), "ir" (0) \ |
okini3939 | 0:8ba6230eefbd | 206 | : "cc"); \ |
okini3939 | 0:8ba6230eefbd | 207 | asm ("shrdl %3,%2,%1" \ |
okini3939 | 0:8ba6230eefbd | 208 | : "=rm" (__result) \ |
okini3939 | 0:8ba6230eefbd | 209 | : "0" (__lo_), "r" (__hi_), "I" (MAD_F_SCALEBITS) \ |
okini3939 | 0:8ba6230eefbd | 210 | : "cc"); \ |
okini3939 | 0:8ba6230eefbd | 211 | __result; \ |
okini3939 | 0:8ba6230eefbd | 212 | }) |
okini3939 | 0:8ba6230eefbd | 213 | # elif defined(OPT_INTEL) |
okini3939 | 0:8ba6230eefbd | 214 | /* |
okini3939 | 0:8ba6230eefbd | 215 | * Alternate Intel scaling that may or may not perform better. |
okini3939 | 0:8ba6230eefbd | 216 | */ |
okini3939 | 0:8ba6230eefbd | 217 | # define mad_f_scale64(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 218 | ({ mad_fixed_t __result; \ |
okini3939 | 0:8ba6230eefbd | 219 | asm ("shrl %3,%1\n\t" \ |
okini3939 | 0:8ba6230eefbd | 220 | "shll %4,%2\n\t" \ |
okini3939 | 0:8ba6230eefbd | 221 | "orl %2,%1" \ |
okini3939 | 0:8ba6230eefbd | 222 | : "=rm" (__result) \ |
okini3939 | 0:8ba6230eefbd | 223 | : "0" (lo), "r" (hi), \ |
okini3939 | 0:8ba6230eefbd | 224 | "I" (MAD_F_SCALEBITS), "I" (32 - MAD_F_SCALEBITS) \ |
okini3939 | 0:8ba6230eefbd | 225 | : "cc"); \ |
okini3939 | 0:8ba6230eefbd | 226 | __result; \ |
okini3939 | 0:8ba6230eefbd | 227 | }) |
okini3939 | 0:8ba6230eefbd | 228 | # else |
okini3939 | 0:8ba6230eefbd | 229 | # define mad_f_scale64(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 230 | ({ mad_fixed_t __result; \ |
okini3939 | 0:8ba6230eefbd | 231 | asm ("shrdl %3,%2,%1" \ |
okini3939 | 0:8ba6230eefbd | 232 | : "=rm" (__result) \ |
okini3939 | 0:8ba6230eefbd | 233 | : "0" (lo), "r" (hi), "I" (MAD_F_SCALEBITS) \ |
okini3939 | 0:8ba6230eefbd | 234 | : "cc"); \ |
okini3939 | 0:8ba6230eefbd | 235 | __result; \ |
okini3939 | 0:8ba6230eefbd | 236 | }) |
okini3939 | 0:8ba6230eefbd | 237 | # endif /* OPT_ACCURACY */ |
okini3939 | 0:8ba6230eefbd | 238 | |
okini3939 | 0:8ba6230eefbd | 239 | # define MAD_F_SCALEBITS MAD_F_FRACBITS |
okini3939 | 0:8ba6230eefbd | 240 | # endif |
okini3939 | 0:8ba6230eefbd | 241 | |
okini3939 | 0:8ba6230eefbd | 242 | /* --- ARM ----------------------------------------------------------------- */ |
okini3939 | 0:8ba6230eefbd | 243 | |
okini3939 | 0:8ba6230eefbd | 244 | # elif defined(FPM_ARM) |
okini3939 | 0:8ba6230eefbd | 245 | |
okini3939 | 0:8ba6230eefbd | 246 | /* |
okini3939 | 0:8ba6230eefbd | 247 | * This ARM V4 version is as accurate as FPM_64BIT but much faster. The |
okini3939 | 0:8ba6230eefbd | 248 | * least significant bit is properly rounded at no CPU cycle cost! |
okini3939 | 0:8ba6230eefbd | 249 | */ |
okini3939 | 0:8ba6230eefbd | 250 | # if 1 |
okini3939 | 0:8ba6230eefbd | 251 | /* |
okini3939 | 0:8ba6230eefbd | 252 | * This is faster than the default implementation via MAD_F_MLX() and |
okini3939 | 0:8ba6230eefbd | 253 | * mad_f_scale64(). |
okini3939 | 0:8ba6230eefbd | 254 | */ |
okini3939 | 0:8ba6230eefbd | 255 | # define mad_f_mul(x, y) \ |
okini3939 | 0:8ba6230eefbd | 256 | ({ mad_fixed64hi_t __hi; \ |
okini3939 | 0:8ba6230eefbd | 257 | mad_fixed64lo_t __lo; \ |
okini3939 | 0:8ba6230eefbd | 258 | mad_fixed_t __result; \ |
okini3939 | 0:8ba6230eefbd | 259 | asm ("smull %0, %1, %3, %4\n\t" \ |
okini3939 | 0:8ba6230eefbd | 260 | "movs %0, %0, lsr %5\n\t" \ |
okini3939 | 0:8ba6230eefbd | 261 | "adc %2, %0, %1, lsl %6" \ |
okini3939 | 0:8ba6230eefbd | 262 | : "=&r" (__lo), "=&r" (__hi), "=r" (__result) \ |
okini3939 | 0:8ba6230eefbd | 263 | : "%r" (x), "r" (y), \ |
okini3939 | 0:8ba6230eefbd | 264 | "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \ |
okini3939 | 0:8ba6230eefbd | 265 | : "cc"); \ |
okini3939 | 0:8ba6230eefbd | 266 | __result; \ |
okini3939 | 0:8ba6230eefbd | 267 | }) |
okini3939 | 0:8ba6230eefbd | 268 | # endif |
okini3939 | 0:8ba6230eefbd | 269 | |
okini3939 | 0:8ba6230eefbd | 270 | # define MAD_F_MLX(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 271 | asm ("smull %0, %1, %2, %3" \ |
okini3939 | 0:8ba6230eefbd | 272 | : "=&r" (lo), "=&r" (hi) \ |
okini3939 | 0:8ba6230eefbd | 273 | : "%r" (x), "r" (y)) |
okini3939 | 0:8ba6230eefbd | 274 | |
okini3939 | 0:8ba6230eefbd | 275 | # define MAD_F_MLA(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 276 | asm ("smlal %0, %1, %2, %3" \ |
okini3939 | 0:8ba6230eefbd | 277 | : "+r" (lo), "+r" (hi) \ |
okini3939 | 0:8ba6230eefbd | 278 | : "%r" (x), "r" (y)) |
okini3939 | 0:8ba6230eefbd | 279 | |
okini3939 | 0:8ba6230eefbd | 280 | # define MAD_F_MLN(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 281 | asm ("rsbs %0, %2, #0\n\t" \ |
okini3939 | 0:8ba6230eefbd | 282 | "rsc %1, %3, #0" \ |
okini3939 | 0:8ba6230eefbd | 283 | : "=r" (lo), "=r" (hi) \ |
okini3939 | 0:8ba6230eefbd | 284 | : "0" (lo), "1" (hi) \ |
okini3939 | 0:8ba6230eefbd | 285 | : "cc") |
okini3939 | 0:8ba6230eefbd | 286 | |
okini3939 | 0:8ba6230eefbd | 287 | # define mad_f_scale64(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 288 | ({ mad_fixed_t __result; \ |
okini3939 | 0:8ba6230eefbd | 289 | asm ("movs %0, %1, lsr %3\n\t" \ |
okini3939 | 0:8ba6230eefbd | 290 | "adc %0, %0, %2, lsl %4" \ |
okini3939 | 0:8ba6230eefbd | 291 | : "=&r" (__result) \ |
okini3939 | 0:8ba6230eefbd | 292 | : "r" (lo), "r" (hi), \ |
okini3939 | 0:8ba6230eefbd | 293 | "M" (MAD_F_SCALEBITS), "M" (32 - MAD_F_SCALEBITS) \ |
okini3939 | 0:8ba6230eefbd | 294 | : "cc"); \ |
okini3939 | 0:8ba6230eefbd | 295 | __result; \ |
okini3939 | 0:8ba6230eefbd | 296 | }) |
okini3939 | 0:8ba6230eefbd | 297 | |
okini3939 | 0:8ba6230eefbd | 298 | # define MAD_F_SCALEBITS MAD_F_FRACBITS |
okini3939 | 0:8ba6230eefbd | 299 | |
okini3939 | 0:8ba6230eefbd | 300 | /* --- MIPS ---------------------------------------------------------------- */ |
okini3939 | 0:8ba6230eefbd | 301 | |
okini3939 | 0:8ba6230eefbd | 302 | # elif defined(FPM_MIPS) |
okini3939 | 0:8ba6230eefbd | 303 | |
okini3939 | 0:8ba6230eefbd | 304 | /* |
okini3939 | 0:8ba6230eefbd | 305 | * This MIPS version is fast and accurate; the disposition of the least |
okini3939 | 0:8ba6230eefbd | 306 | * significant bit depends on OPT_ACCURACY via mad_f_scale64(). |
okini3939 | 0:8ba6230eefbd | 307 | */ |
okini3939 | 0:8ba6230eefbd | 308 | # define MAD_F_MLX(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 309 | asm ("mult %2,%3" \ |
okini3939 | 0:8ba6230eefbd | 310 | : "=l" (lo), "=h" (hi) \ |
okini3939 | 0:8ba6230eefbd | 311 | : "%r" (x), "r" (y)) |
okini3939 | 0:8ba6230eefbd | 312 | |
okini3939 | 0:8ba6230eefbd | 313 | # if defined(HAVE_MADD_ASM) |
okini3939 | 0:8ba6230eefbd | 314 | # define MAD_F_MLA(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 315 | asm ("madd %2,%3" \ |
okini3939 | 0:8ba6230eefbd | 316 | : "+l" (lo), "+h" (hi) \ |
okini3939 | 0:8ba6230eefbd | 317 | : "%r" (x), "r" (y)) |
okini3939 | 0:8ba6230eefbd | 318 | # elif defined(HAVE_MADD16_ASM) |
okini3939 | 0:8ba6230eefbd | 319 | /* |
okini3939 | 0:8ba6230eefbd | 320 | * This loses significant accuracy due to the 16-bit integer limit in the |
okini3939 | 0:8ba6230eefbd | 321 | * multiply/accumulate instruction. |
okini3939 | 0:8ba6230eefbd | 322 | */ |
okini3939 | 0:8ba6230eefbd | 323 | # define MAD_F_ML0(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 324 | asm ("mult %2,%3" \ |
okini3939 | 0:8ba6230eefbd | 325 | : "=l" (lo), "=h" (hi) \ |
okini3939 | 0:8ba6230eefbd | 326 | : "%r" ((x) >> 12), "r" ((y) >> 16)) |
okini3939 | 0:8ba6230eefbd | 327 | # define MAD_F_MLA(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 328 | asm ("madd16 %2,%3" \ |
okini3939 | 0:8ba6230eefbd | 329 | : "+l" (lo), "+h" (hi) \ |
okini3939 | 0:8ba6230eefbd | 330 | : "%r" ((x) >> 12), "r" ((y) >> 16)) |
okini3939 | 0:8ba6230eefbd | 331 | # define MAD_F_MLZ(hi, lo) ((mad_fixed_t) (lo)) |
okini3939 | 0:8ba6230eefbd | 332 | # endif |
okini3939 | 0:8ba6230eefbd | 333 | |
okini3939 | 0:8ba6230eefbd | 334 | # if defined(OPT_SPEED) |
okini3939 | 0:8ba6230eefbd | 335 | # define mad_f_scale64(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 336 | ((mad_fixed_t) ((hi) << (32 - MAD_F_SCALEBITS))) |
okini3939 | 0:8ba6230eefbd | 337 | # define MAD_F_SCALEBITS MAD_F_FRACBITS |
okini3939 | 0:8ba6230eefbd | 338 | # endif |
okini3939 | 0:8ba6230eefbd | 339 | |
okini3939 | 0:8ba6230eefbd | 340 | /* --- SPARC --------------------------------------------------------------- */ |
okini3939 | 0:8ba6230eefbd | 341 | |
okini3939 | 0:8ba6230eefbd | 342 | # elif defined(FPM_SPARC) |
okini3939 | 0:8ba6230eefbd | 343 | |
okini3939 | 0:8ba6230eefbd | 344 | /* |
okini3939 | 0:8ba6230eefbd | 345 | * This SPARC V8 version is fast and accurate; the disposition of the least |
okini3939 | 0:8ba6230eefbd | 346 | * significant bit depends on OPT_ACCURACY via mad_f_scale64(). |
okini3939 | 0:8ba6230eefbd | 347 | */ |
okini3939 | 0:8ba6230eefbd | 348 | # define MAD_F_MLX(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 349 | asm ("smul %2, %3, %0\n\t" \ |
okini3939 | 0:8ba6230eefbd | 350 | "rd %%y, %1" \ |
okini3939 | 0:8ba6230eefbd | 351 | : "=r" (lo), "=r" (hi) \ |
okini3939 | 0:8ba6230eefbd | 352 | : "%r" (x), "rI" (y)) |
okini3939 | 0:8ba6230eefbd | 353 | |
okini3939 | 0:8ba6230eefbd | 354 | /* --- PowerPC ------------------------------------------------------------- */ |
okini3939 | 0:8ba6230eefbd | 355 | |
okini3939 | 0:8ba6230eefbd | 356 | # elif defined(FPM_PPC) |
okini3939 | 0:8ba6230eefbd | 357 | |
okini3939 | 0:8ba6230eefbd | 358 | /* |
okini3939 | 0:8ba6230eefbd | 359 | * This PowerPC version is fast and accurate; the disposition of the least |
okini3939 | 0:8ba6230eefbd | 360 | * significant bit depends on OPT_ACCURACY via mad_f_scale64(). |
okini3939 | 0:8ba6230eefbd | 361 | */ |
okini3939 | 0:8ba6230eefbd | 362 | # define MAD_F_MLX(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 363 | do { \ |
okini3939 | 0:8ba6230eefbd | 364 | asm ("mullw %0,%1,%2" \ |
okini3939 | 0:8ba6230eefbd | 365 | : "=r" (lo) \ |
okini3939 | 0:8ba6230eefbd | 366 | : "%r" (x), "r" (y)); \ |
okini3939 | 0:8ba6230eefbd | 367 | asm ("mulhw %0,%1,%2" \ |
okini3939 | 0:8ba6230eefbd | 368 | : "=r" (hi) \ |
okini3939 | 0:8ba6230eefbd | 369 | : "%r" (x), "r" (y)); \ |
okini3939 | 0:8ba6230eefbd | 370 | } \ |
okini3939 | 0:8ba6230eefbd | 371 | while (0) |
okini3939 | 0:8ba6230eefbd | 372 | |
okini3939 | 0:8ba6230eefbd | 373 | # if defined(OPT_ACCURACY) |
okini3939 | 0:8ba6230eefbd | 374 | /* |
okini3939 | 0:8ba6230eefbd | 375 | * This gives best accuracy but is not very fast. |
okini3939 | 0:8ba6230eefbd | 376 | */ |
okini3939 | 0:8ba6230eefbd | 377 | # define MAD_F_MLA(hi, lo, x, y) \ |
okini3939 | 0:8ba6230eefbd | 378 | ({ mad_fixed64hi_t __hi; \ |
okini3939 | 0:8ba6230eefbd | 379 | mad_fixed64lo_t __lo; \ |
okini3939 | 0:8ba6230eefbd | 380 | MAD_F_MLX(__hi, __lo, (x), (y)); \ |
okini3939 | 0:8ba6230eefbd | 381 | asm ("addc %0,%2,%3\n\t" \ |
okini3939 | 0:8ba6230eefbd | 382 | "adde %1,%4,%5" \ |
okini3939 | 0:8ba6230eefbd | 383 | : "=r" (lo), "=r" (hi) \ |
okini3939 | 0:8ba6230eefbd | 384 | : "%r" (lo), "r" (__lo), \ |
okini3939 | 0:8ba6230eefbd | 385 | "%r" (hi), "r" (__hi) \ |
okini3939 | 0:8ba6230eefbd | 386 | : "xer"); \ |
okini3939 | 0:8ba6230eefbd | 387 | }) |
okini3939 | 0:8ba6230eefbd | 388 | # endif |
okini3939 | 0:8ba6230eefbd | 389 | |
okini3939 | 0:8ba6230eefbd | 390 | # if defined(OPT_ACCURACY) |
okini3939 | 0:8ba6230eefbd | 391 | /* |
okini3939 | 0:8ba6230eefbd | 392 | * This is slower than the truncating version below it. |
okini3939 | 0:8ba6230eefbd | 393 | */ |
okini3939 | 0:8ba6230eefbd | 394 | # define mad_f_scale64(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 395 | ({ mad_fixed_t __result, __round; \ |
okini3939 | 0:8ba6230eefbd | 396 | asm ("rotrwi %0,%1,%2" \ |
okini3939 | 0:8ba6230eefbd | 397 | : "=r" (__result) \ |
okini3939 | 0:8ba6230eefbd | 398 | : "r" (lo), "i" (MAD_F_SCALEBITS)); \ |
okini3939 | 0:8ba6230eefbd | 399 | asm ("extrwi %0,%1,1,0" \ |
okini3939 | 0:8ba6230eefbd | 400 | : "=r" (__round) \ |
okini3939 | 0:8ba6230eefbd | 401 | : "r" (__result)); \ |
okini3939 | 0:8ba6230eefbd | 402 | asm ("insrwi %0,%1,%2,0" \ |
okini3939 | 0:8ba6230eefbd | 403 | : "+r" (__result) \ |
okini3939 | 0:8ba6230eefbd | 404 | : "r" (hi), "i" (MAD_F_SCALEBITS)); \ |
okini3939 | 0:8ba6230eefbd | 405 | asm ("add %0,%1,%2" \ |
okini3939 | 0:8ba6230eefbd | 406 | : "=r" (__result) \ |
okini3939 | 0:8ba6230eefbd | 407 | : "%r" (__result), "r" (__round)); \ |
okini3939 | 0:8ba6230eefbd | 408 | __result; \ |
okini3939 | 0:8ba6230eefbd | 409 | }) |
okini3939 | 0:8ba6230eefbd | 410 | # else |
okini3939 | 0:8ba6230eefbd | 411 | # define mad_f_scale64(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 412 | ({ mad_fixed_t __result; \ |
okini3939 | 0:8ba6230eefbd | 413 | asm ("rotrwi %0,%1,%2" \ |
okini3939 | 0:8ba6230eefbd | 414 | : "=r" (__result) \ |
okini3939 | 0:8ba6230eefbd | 415 | : "r" (lo), "i" (MAD_F_SCALEBITS)); \ |
okini3939 | 0:8ba6230eefbd | 416 | asm ("insrwi %0,%1,%2,0" \ |
okini3939 | 0:8ba6230eefbd | 417 | : "+r" (__result) \ |
okini3939 | 0:8ba6230eefbd | 418 | : "r" (hi), "i" (MAD_F_SCALEBITS)); \ |
okini3939 | 0:8ba6230eefbd | 419 | __result; \ |
okini3939 | 0:8ba6230eefbd | 420 | }) |
okini3939 | 0:8ba6230eefbd | 421 | # endif |
okini3939 | 0:8ba6230eefbd | 422 | |
okini3939 | 0:8ba6230eefbd | 423 | # define MAD_F_SCALEBITS MAD_F_FRACBITS |
okini3939 | 0:8ba6230eefbd | 424 | |
okini3939 | 0:8ba6230eefbd | 425 | /* --- Default ------------------------------------------------------------- */ |
okini3939 | 0:8ba6230eefbd | 426 | |
okini3939 | 0:8ba6230eefbd | 427 | # elif defined(FPM_DEFAULT) |
okini3939 | 0:8ba6230eefbd | 428 | |
okini3939 | 0:8ba6230eefbd | 429 | /* |
okini3939 | 0:8ba6230eefbd | 430 | * This version is the most portable but it loses significant accuracy. |
okini3939 | 0:8ba6230eefbd | 431 | * Furthermore, accuracy is biased against the second argument, so care |
okini3939 | 0:8ba6230eefbd | 432 | * should be taken when ordering operands. |
okini3939 | 0:8ba6230eefbd | 433 | * |
okini3939 | 0:8ba6230eefbd | 434 | * The scale factors are constant as this is not used with SSO. |
okini3939 | 0:8ba6230eefbd | 435 | * |
okini3939 | 0:8ba6230eefbd | 436 | * Pre-rounding is required to stay within the limits of compliance. |
okini3939 | 0:8ba6230eefbd | 437 | */ |
okini3939 | 0:8ba6230eefbd | 438 | # if defined(OPT_SPEED) |
okini3939 | 0:8ba6230eefbd | 439 | # define mad_f_mul(x, y) (((x) >> 12) * ((y) >> 16)) |
okini3939 | 0:8ba6230eefbd | 440 | # else |
okini3939 | 0:8ba6230eefbd | 441 | # define mad_f_mul(x, y) ((((x) + (1L << 11)) >> 12) * \ |
okini3939 | 0:8ba6230eefbd | 442 | (((y) + (1L << 15)) >> 16)) |
okini3939 | 0:8ba6230eefbd | 443 | # endif |
okini3939 | 0:8ba6230eefbd | 444 | |
okini3939 | 0:8ba6230eefbd | 445 | /* ------------------------------------------------------------------------- */ |
okini3939 | 0:8ba6230eefbd | 446 | |
okini3939 | 0:8ba6230eefbd | 447 | # else |
okini3939 | 0:8ba6230eefbd | 448 | # error "no FPM selected" |
okini3939 | 0:8ba6230eefbd | 449 | # endif |
okini3939 | 0:8ba6230eefbd | 450 | |
okini3939 | 0:8ba6230eefbd | 451 | /* default implementations */ |
okini3939 | 0:8ba6230eefbd | 452 | |
okini3939 | 0:8ba6230eefbd | 453 | # if !defined(mad_f_mul) |
okini3939 | 0:8ba6230eefbd | 454 | # define mad_f_mul(x, y) \ |
okini3939 | 0:8ba6230eefbd | 455 | ({ register mad_fixed64hi_t __hi; \ |
okini3939 | 0:8ba6230eefbd | 456 | register mad_fixed64lo_t __lo; \ |
okini3939 | 0:8ba6230eefbd | 457 | MAD_F_MLX(__hi, __lo, (x), (y)); \ |
okini3939 | 0:8ba6230eefbd | 458 | mad_f_scale64(__hi, __lo); \ |
okini3939 | 0:8ba6230eefbd | 459 | }) |
okini3939 | 0:8ba6230eefbd | 460 | # endif |
okini3939 | 0:8ba6230eefbd | 461 | |
okini3939 | 0:8ba6230eefbd | 462 | # if !defined(MAD_F_MLA) |
okini3939 | 0:8ba6230eefbd | 463 | # define MAD_F_ML0(hi, lo, x, y) ((lo) = mad_f_mul((x), (y))) |
okini3939 | 0:8ba6230eefbd | 464 | # define MAD_F_MLA(hi, lo, x, y) ((lo) += mad_f_mul((x), (y))) |
okini3939 | 0:8ba6230eefbd | 465 | # define MAD_F_MLN(hi, lo) ((lo) = -(lo)) |
okini3939 | 0:8ba6230eefbd | 466 | # define MAD_F_MLZ(hi, lo) ((void) (hi), (mad_fixed_t) (lo)) |
okini3939 | 0:8ba6230eefbd | 467 | # endif |
okini3939 | 0:8ba6230eefbd | 468 | |
okini3939 | 0:8ba6230eefbd | 469 | # if !defined(MAD_F_ML0) |
okini3939 | 0:8ba6230eefbd | 470 | # define MAD_F_ML0(hi, lo, x, y) MAD_F_MLX((hi), (lo), (x), (y)) |
okini3939 | 0:8ba6230eefbd | 471 | # endif |
okini3939 | 0:8ba6230eefbd | 472 | |
okini3939 | 0:8ba6230eefbd | 473 | # if !defined(MAD_F_MLN) |
okini3939 | 0:8ba6230eefbd | 474 | # define MAD_F_MLN(hi, lo) ((hi) = ((lo) = -(lo)) ? ~(hi) : -(hi)) |
okini3939 | 0:8ba6230eefbd | 475 | # endif |
okini3939 | 0:8ba6230eefbd | 476 | |
okini3939 | 0:8ba6230eefbd | 477 | # if !defined(MAD_F_MLZ) |
okini3939 | 0:8ba6230eefbd | 478 | # define MAD_F_MLZ(hi, lo) mad_f_scale64((hi), (lo)) |
okini3939 | 0:8ba6230eefbd | 479 | # endif |
okini3939 | 0:8ba6230eefbd | 480 | |
okini3939 | 0:8ba6230eefbd | 481 | # if !defined(mad_f_scale64) |
okini3939 | 0:8ba6230eefbd | 482 | # if defined(OPT_ACCURACY) |
okini3939 | 0:8ba6230eefbd | 483 | # define mad_f_scale64(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 484 | ((((mad_fixed_t) \ |
okini3939 | 0:8ba6230eefbd | 485 | (((hi) << (32 - (MAD_F_SCALEBITS - 1))) | \ |
okini3939 | 0:8ba6230eefbd | 486 | ((lo) >> (MAD_F_SCALEBITS - 1)))) + 1) >> 1) |
okini3939 | 0:8ba6230eefbd | 487 | # else |
okini3939 | 0:8ba6230eefbd | 488 | # define mad_f_scale64(hi, lo) \ |
okini3939 | 0:8ba6230eefbd | 489 | ((mad_fixed_t) \ |
okini3939 | 0:8ba6230eefbd | 490 | (((hi) << (32 - MAD_F_SCALEBITS)) | \ |
okini3939 | 0:8ba6230eefbd | 491 | ((lo) >> MAD_F_SCALEBITS))) |
okini3939 | 0:8ba6230eefbd | 492 | # endif |
okini3939 | 0:8ba6230eefbd | 493 | # define MAD_F_SCALEBITS MAD_F_FRACBITS |
okini3939 | 0:8ba6230eefbd | 494 | # endif |
okini3939 | 0:8ba6230eefbd | 495 | |
okini3939 | 0:8ba6230eefbd | 496 | /* C routines */ |
okini3939 | 0:8ba6230eefbd | 497 | |
okini3939 | 0:8ba6230eefbd | 498 | mad_fixed_t mad_f_abs(mad_fixed_t); |
okini3939 | 0:8ba6230eefbd | 499 | mad_fixed_t mad_f_div(mad_fixed_t, mad_fixed_t); |
okini3939 | 0:8ba6230eefbd | 500 | |
okini3939 | 0:8ba6230eefbd | 501 | # endif |