ese 519
Diff: fhtdit.cc
- Revision:
- 0:2076b4d80327
diff -r 000000000000 -r 2076b4d80327 fhtdit.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fhtdit.cc Tue Apr 07 21:09:22 2015 +0000 @@ -0,0 +1,262 @@ +// This file is part of the FXT library. +// Copyright (C) 2010, 2012 Joerg Arndt +// License: GNU General Public License version 3 or later, +// see the file COPYING.txt in the main directory. + +#include "fxttypes.h" +#include "cmult.h" +#include "sumdiff.h" +#include "constants.h" // COS_1_PI_8, SIN_1_PI_8 +#include "sincos.h" +#include "revbinpermute.h" + +#include <cmath> // M_PI, M_SQRT1_2 + +// tuning parameter: +// define to use trig recurrence: +// (and possibly lose some precision, see below) +//#define USE_TRIG_REC +// with type 'long double' slight speed loss on my machine, +// with type 'double' little speed gain. + +#if defined USE_TRIG_REC +//#warning 'FYI: fht(double *, ulong) uses trig recursion' +#endif + +// tuning parameter: +#define INITIAL_RADIX_16 1 // 0 or 1 (default) +// +#if ( INITIAL_RADIX_16==1 ) +//#warning 'FYI: INITIAL_RADIX_16 set in fht_dit(double *, ulong)' +#else +#warning 'INITIAL_RADIX_16 is NOT SET in fht_dit(double *, ulong)' +#endif + + +void +fht_dit_core(double *f, ulong ldn) +// Decimation in time (DIT) FHT. +// Input data must be in revbin_permuted order. +// ldn := base-2 logarithm of the array length. +{ + if ( ldn<=2 ) + { + if ( ldn==1 ) // two point fht + { + sumdiff(f[0], f[1]); + } + else if ( ldn==2 ) // four point fht + { + double f0, f1, f2, f3; + sumdiff(f[0], f[1], f0, f1); + sumdiff(f[2], f[3], f2, f3); + sumdiff(f0, f2, f[0], f[2]); + sumdiff(f1, f3, f[1], f[3]); + } + return; + } + + const ulong n = (1UL<<ldn); + const double *fn = f + n; + ulong ldk = ldn & 1; + + if ( ldk==0 ) // ldn is multiple of 2 => n is a power of 4 + { +#if ( INITIAL_RADIX_16==1 ) + for (double *fi=f; fi<fn; fi+=16) // radix-16 step + { + double f0, f1, f2, f3; + sumdiff(fi[0], fi[1], f0, f1); + sumdiff(fi[2], fi[3], f2, f3); + sumdiff(f0, f2, fi[0], fi[2]); + sumdiff(f1, f3, fi[1], fi[3]); + + sumdiff(fi[4], fi[5], f0, f1); + sumdiff(fi[6], fi[7], f2, f3); + sumdiff(f0, f2, fi[4], fi[6]); + sumdiff(f1, f3, fi[5], fi[7]); + + sumdiff(fi[8], fi[9], f0, f1); + sumdiff(fi[10], fi[11], f2, f3); + sumdiff(f0, f2, fi[8], fi[10]); + sumdiff(f1, f3, fi[9], fi[11]); + + sumdiff(fi[12], fi[13], f0, f1); + sumdiff(fi[14], fi[15], f2, f3); + sumdiff(f0, f2, fi[12], fi[14]); + sumdiff(f1, f3, fi[13], fi[15]); + + sumdiff(fi[0], fi[4], f0, f1); + sumdiff(fi[8], fi[12], f2, f3); + sumdiff(f0, f2, fi[0], fi[8]); + sumdiff(f1, f3, fi[4], fi[12]); + + sumdiff(fi[2], fi[6], f0, f1); + f3 = M_SQRT2 * fi[14]; + f2 = M_SQRT2 * fi[10]; + sumdiff(f0, f2, fi[2], fi[10]); + sumdiff(f1, f3, fi[6], fi[14]); + + double a, b, g0, g1, g2, g3; + sumdiff(fi[5], fi[7], a, b); + a *= M_SQRT1_2; + b *= M_SQRT1_2; + sumdiff(fi[1], a, f0, f1); + sumdiff(fi[3], b, g0, g1); + sumdiff(fi[13], fi[15], a, b); + a *= M_SQRT1_2; + b *= M_SQRT1_2; + sumdiff(fi[9], a, f2, f3); + sumdiff(fi[11], b, g2, g3); + double c1 = COS_1_PI_8; // jjkeep + double s1 = SIN_1_PI_8; // jjkeep + cmult(s1, c1, f2, g3, b, a); + sumdiff(f0, a, fi[1], fi[9]); + sumdiff(g1, b, fi[7], fi[15]); + cmult(c1, s1, g2, f3, b, a); + sumdiff(g0, a, fi[3], fi[11]); + sumdiff(f1, b, fi[5], fi[13]); + } + ldk = 4; +#else // INITIAL_RADIX_16 + for (double *fi=f; fi<fn; fi+=4) // radix-4 step + { + double f0, f1, f2, f3; + sumdiff(fi[0], fi[1], f0, f1); + sumdiff(fi[2], fi[3], f2, f3); + sumdiff(f0, f2, fi[0], fi[2]); + sumdiff(f1, f3, fi[1], fi[3]); + } + ldk = 2; +#endif // INITIAL_RADIX_16 + } + else // ldk==1, n is no power of 4 + { + for (double *fi=f; fi<fn; fi+=8) // radix-8 step + { + double g0, f0, f1, g1; + sumdiff(fi[0], fi[1], f0, g0); + sumdiff(fi[2], fi[3], f1, g1); + + sumdiff(f0, f1); + sumdiff(g0, g1); + + double s1, c1, s2, c2; + sumdiff(fi[4], fi[5], s1, c1); + sumdiff(fi[6], fi[7], s2, c2); + + sumdiff(s1, s2); + + sumdiff(f0, s1, fi[0], fi[4]); + sumdiff(f1, s2, fi[2], fi[6]); + + c1 *= M_SQRT2; + c2 *= M_SQRT2; + + sumdiff(g0, c1, fi[1], fi[5]); + sumdiff(g1, c2, fi[3], fi[7]); + } + ldk = 3; + } + + + for ( ; ldk<ldn; ldk+=2) + { + ulong k = 1UL<<ldk; + ulong kh = k >> 1; + ulong k2 = k << 1; + ulong k3 = k2 + k; + ulong k4 = k2 << 1; + + for (double *fi=f, *gi=f+kh; fi<fn; fi+=k4, gi+=k4) + { + double f0, f1, f2, f3; + sumdiff(fi[0], fi[k], f0, f1); + sumdiff(fi[k2], fi[k3], f2, f3); + sumdiff(f0, f2, fi[0], fi[k2]); + sumdiff(f1, f3, fi[k], fi[k3]); + + sumdiff(gi[0], gi[k], f0, f1); + f3 = M_SQRT2 * gi[k3]; + f2 = M_SQRT2 * gi[k2]; + sumdiff(f0, f2, gi[0], gi[k2]); + sumdiff(f1, f3, gi[k], gi[k3]); + } + + double tt = M_PI_4/(double)kh; // jjkeep +#if defined USE_TRIG_REC + double s1 = 0.0, c1 = 1.0; // jjkeep + double al = sin(0.5*tt); // jjkeep + al *= (2.0*al); + double be = sin(tt); // jjkeep +#endif // USE_TRIG_REC + + for (ulong i=1; i<kh; i++) + { +#if defined USE_TRIG_REC + { + double t1 = c1; // jjkeep + c1 -= (al*t1+be*s1); + s1 -= (al*s1-be*t1); + } +#else + double s1, c1; // jjkeep + SinCos(tt*(double)i, &s1, &c1); // jjkeep +#endif // USE_TRIG_REC + + double c2, s2; // jjkeep + csqr(c1, s1, c2, s2); + + for (double *fi=f+i, *gi=f+k-i; fi<fn; fi+=k4, gi+=k4) + { + double a, b, g0, f0, f1, g1, f2, g2, f3, g3; + cmult(s2, c2, fi[k], gi[k], b, a); + sumdiff(fi[0], a, f0, f1); + sumdiff(gi[0], b, g0, g1); + + cmult(s2, c2, fi[k3], gi[k3], b, a); + sumdiff(fi[k2], a, f2, f3); + sumdiff(gi[k2], b, g2, g3); + + cmult(s1, c1, f2, g3, b, a); + sumdiff(f0, a, fi[0], fi[k2]); + sumdiff(g1, b, gi[k], gi[k3]); + + cmult(c1, s1, g2, f3, b, a); + sumdiff(g0, a, gi[0], gi[k2]); + sumdiff(f1, b, fi[k], fi[k3]); + } + } + } +} +// ------------------------- + + +void +fht_dit(double *f, ulong ldn) +// Fast Hartley Transform. +// Split-radix decimation in time (DIT) algorithm. +// ldn := base-2 logarithm of the array length. +{ + if ( ldn<=2 ) + { + if ( ldn==1 ) // two point fht + { + sumdiff(f[0], f[1]); + } + else if ( ldn==2 ) // four point fht + { + double f0, f1, f2, f3; + sumdiff(f[0], f[2], f0, f1); + sumdiff(f[1], f[3], f2, f3); + sumdiff(f0, f2, f[0], f[2]); + sumdiff(f1, f3, f[1], f[3]); + } + + return; + } + + revbin_permute(f, 1UL<<ldn); + fht_dit_core(f, ldn); +} +// -------------------------