widely known free FFT library.
Dependents: Peach_AudioChannelDividerAndCompensator
Revision 0:d7215946c769, committed 2015-10-11
- Comitter:
- dokunewon
- Date:
- Sun Oct 11 07:20:35 2015 +0000
- Commit message:
- First version.
Changed in this revision
diff -r 000000000000 -r d7215946c769 bluestein.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bluestein.c Sun Oct 11 07:20:35 2015 +0000 @@ -0,0 +1,190 @@ +/* + * This file is part of libfftpack. + * + * libfftpack is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * libfftpack is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libfftpack; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik + * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt + * (DLR). + */ + +/* + * Copyright (C) 2005, 2006, 2007, 2008 Max-Planck-Society + * \author Martin Reinecke + */ + +#include <math.h> +#include <stdlib.h> +#include "mytype.h" +#include "fftpack.h" +#include "bluestein.h" + +/* returns the sum of all prime factors of n */ +size_t prime_factor_sum (size_t n) + { + size_t result=0,x,limit,tmp; + while (((tmp=(n>>1))<<1)==n) + { result+=2; n=tmp; } + + limit=(size_t)sqrt(n+0.01); + for (x=3; x<=limit; x+=2) + while ((tmp=(n/x))*x==n) + { + result+=x; + n=tmp; + limit=(size_t)sqrt(n+0.01); + } + if (n>1) result+=n; + + return result; + } + +/* returns the smallest composite of 2, 3 and 5 which is >= n */ +static size_t good_size(size_t n) + { + size_t maxfactors=1, i, j, k, f2=1, f3, f5, bestfac, guessfac; + while ((n>>maxfactors)>0) + ++maxfactors; + bestfac=1<<maxfactors; + + for (i=0; i<maxfactors; ++i) + { + f3=1; + for (j=0; j<maxfactors-i; ++j) + { + if (f2*f3>bestfac) break; + f5=1; + for (k=0; k<maxfactors-i-j; ++k) + { + guessfac = f2*f3*f5; + if (guessfac>=bestfac) break; + if ((guessfac>=n) && (guessfac<bestfac)) + bestfac=guessfac; + f5*=5; + } + f3*=3; + } + f2*=2; + } + return bestfac; + } + +void bluestein_i (size_t n, FLOAT **tstorage) + { + static const FLOAT pi=3.14159265358979323846; + size_t n2=good_size(n*2-1); + size_t m, coeff; + FLOAT angle, xn2; + FLOAT *bk, *bkf, *work; + FLOAT pibyn=pi/n; + *tstorage = RALLOC(FLOAT,2+2*n+8*n2+16); + ((size_t *)(*tstorage))[0]=n2; + bk = *tstorage+2; + bkf = *tstorage+2+2*n; + work= *tstorage+2+2*(n+n2); + +/* initialize b_k */ + bk[0] = 1; + bk[1] = 0; + + coeff=0; + for (m=1; m<n; ++m) + { + coeff+=2*m-1; + if (coeff>=2*n) coeff-=2*n; + angle = pibyn*coeff; + bk[2*m] = cos(angle); + bk[2*m+1] = sin(angle); + } + +/* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */ + xn2 = 1./n2; + bkf[0] = bk[0]*xn2; + bkf[1] = bk[1]*xn2; + for (m=2; m<2*n; m+=2) + { + bkf[m] = bkf[2*n2-m] = bk[m] *xn2; + bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2; + } + for (m=2*n;m<=(2*n2-2*n+1);++m) + bkf[m]=0.; + cffti (n2,work); + cfftf (n2,bkf,work); + } + +void bluestein (size_t n, FLOAT *data, FLOAT *tstorage, int isign) + { + size_t n2=*((size_t *)tstorage); + size_t m; + FLOAT *bk, *bkf, *akf, *work; + bk = tstorage+2; + bkf = tstorage+2+2*n; + work= tstorage+2+2*(n+n2); + akf = tstorage+2+2*n+6*n2+16; + +/* initialize a_k and FFT it */ + if (isign>0) + for (m=0; m<2*n; m+=2) + { + akf[m] = data[m]*bk[m] - data[m+1]*bk[m+1]; + akf[m+1] = data[m]*bk[m+1] + data[m+1]*bk[m]; + } + else + for (m=0; m<2*n; m+=2) + { + akf[m] = data[m]*bk[m] + data[m+1]*bk[m+1]; + akf[m+1] =-data[m]*bk[m+1] + data[m+1]*bk[m]; + } + for (m=2*n; m<2*n2; ++m) + akf[m]=0; + + cfftf (n2,akf,work); + +/* do the convolution */ + if (isign>0) + for (m=0; m<2*n2; m+=2) + { + FLOAT im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; + akf[m ] = akf[m]*bkf[m] + akf[m+1]*bkf[m+1]; + akf[m+1] = im; + } + else + for (m=0; m<2*n2; m+=2) + { + FLOAT im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; + akf[m ] = akf[m]*bkf[m] - akf[m+1]*bkf[m+1]; + akf[m+1] = im; + } + + +/* inverse FFT */ + cfftb (n2,akf,work); + +/* multiply by b_k* */ + if (isign>0) + for (m=0; m<2*n; m+=2) + { + data[m] = bk[m] *akf[m] - bk[m+1]*akf[m+1]; + data[m+1] = bk[m+1]*akf[m] + bk[m] *akf[m+1]; + } + else + for (m=0; m<2*n; m+=2) + { + data[m] = bk[m] *akf[m] + bk[m+1]*akf[m+1]; + data[m+1] =-bk[m+1]*akf[m] + bk[m] *akf[m+1]; + } + }
diff -r 000000000000 -r d7215946c769 bluestein.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bluestein.h Sun Oct 11 07:20:35 2015 +0000 @@ -0,0 +1,48 @@ +/* + * This file is part of libfftpack. + * + * libfftpack is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * libfftpack is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libfftpack; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik + * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt + * (DLR). + */ + +/* + * Copyright (C) 2005 Max-Planck-Society + * \author Martin Reinecke + */ + +#ifndef PLANCK_BLUESTEIN_H +#define PLANCK_BLUESTEIN_H + +#include "c_utils.h" + +#ifdef __cplusplus +extern "C" { +#endif + +size_t prime_factor_sum (size_t n); + +void bluestein_i (size_t n, FLOAT **tstorage); +void bluestein (size_t n, FLOAT *data, FLOAT *tstorage, int isign); + +#ifdef __cplusplus +} +#endif + +#endif
diff -r 000000000000 -r d7215946c769 c_utils.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c_utils.c Sun Oct 11 07:20:35 2015 +0000 @@ -0,0 +1,84 @@ +/* + * This file is part of libc_utils. + * + * libc_utils is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * libc_utils is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libc_utils; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik + * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt + * (DLR). + */ + +/* + * Convenience functions + * + * Copyright (C) 2008, 2009, 2010 Max-Planck-Society + * Author: Martin Reinecke + */ + +#include <stdio.h> +#include <stdlib.h> +#include "mytype.h" +#include "c_utils.h" + +void util_fail_ (const char *file, int line, const char *func, const char *msg) + { + fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg); + exit(1); + } +void util_warn_ (const char *file, int line, const char *func, const char *msg) + { + fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg); + exit(1); + } + +/* This function tries to avoid allocations with a total size close to a high + power of two (called the "critical stride" here), by adding a few more bytes + if necssary. This lowers the probability that two arrays differ by a multiple + of the critical stride in their starting address, which in turn lowers the + risk of cache line contention. */ +static size_t manipsize(size_t sz) + { + const size_t critical_stride=4096, cacheline=64, overhead=32; + if (sz < (critical_stride/2)) return sz; + if (((sz+overhead)%critical_stride)>(2*cacheline)) return sz; + return sz+2*cacheline; + } + +#ifdef __SSE__ +#include <xmmintrin.h> +void *util_malloc_ (size_t sz) + { + void *res; + if (sz==0) return NULL; + res = _mm_malloc(manipsize(sz),16); + UTIL_ASSERT(res,"_mm_malloc() failed"); + return res; + } +void util_free_ (void *ptr) + { if ((ptr)!=NULL) _mm_free(ptr); } +#else +void *util_malloc_ (size_t sz) + { + void *res; + if (sz==0) return NULL; + res = malloc(manipsize(sz)); + UTIL_ASSERT(res,"malloc() failed"); + return res; + } +void util_free_ (void *ptr) + { if ((ptr)!=NULL) free(ptr); } +#endif
diff -r 000000000000 -r d7215946c769 c_utils.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/c_utils.h Sun Oct 11 07:20:35 2015 +0000 @@ -0,0 +1,125 @@ +/* + * This file is part of libc_utils. + * + * libc_utils is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * libc_utils is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libc_utils; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik + * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt + * (DLR). + */ + +/*! \file c_utils.h + * Convenience functions + * + * Copyright (C) 2008, 2009, 2010 Max-Planck-Society + * \author Martin Reinecke + * \note This file should only be included from .c files, NOT from .h files. + */ + +#ifndef PLANCK_C_UTILS_H +#define PLANCK_C_UTILS_H + +#include <math.h> +#include <stdlib.h> +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +void util_fail_ (const char *file, int line, const char *func, const char *msg); +void util_warn_ (const char *file, int line, const char *func, const char *msg); +void *util_malloc_ (size_t sz); +void util_free_ (void *ptr); + +#if defined (__GNUC__) +#define UTIL_FUNC_NAME__ __func__ +#else +#define UTIL_FUNC_NAME__ "unknown" +#endif + +#define UTIL_ASSERT(cond,msg) \ + if(!(cond)) util_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg) +#define UTIL_WARN(cond,msg) \ + if(!(cond)) util_warn_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg) +#define UTIL_FAIL(msg) \ + util_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg) + +#define ALLOC(ptr,type,num) \ + do { (ptr)=(type *)util_malloc_((num)*sizeof(type)); } while (0) +#define RALLOC(type,num) \ + ((type *)util_malloc_((num)*sizeof(type))) +#define DEALLOC(ptr) \ + do { util_free_(ptr); (ptr)=NULL; } while(0) +#define RESIZE(ptr,type,num) \ + do { util_free_(ptr); ALLOC(ptr,type,num); } while(0) +#define REALLOC(ptr,type,num) \ + do { \ + ptr = (type *)realloc(ptr,(num)*sizeof(type)); \ + UTIL_ASSERT(ptr,"realloc() failed"); \ + } while(0) +#define GROW(ptr,type,sz_old,sz_new) \ + do { \ + if ((sz_new)>(sz_old)) \ + { RESIZE(ptr,type,2*(sz_new));sz_old=2*(sz_new); } \ + } while(0) +#define SET_ARRAY(ptr,i1,i2,val) \ + do { \ + ptrdiff_t cnt_; \ + for (cnt_=(i1);cnt_<(i2);++cnt_) (ptr)[cnt_]=(val); \ + } while(0) +#define COPY_ARRAY(src,dest,i1,i2) \ + do { \ + ptrdiff_t cnt_; \ + for (cnt_=(i1);cnt_<(i2);++cnt_) (dest)[cnt_]=(src)[cnt_]; \ + } while(0) + +#define ALLOC2D(ptr,type,num1,num2) \ + do { \ + size_t cnt_, num1_=(num1), num2_=(num2); \ + ALLOC(ptr,type *,num1_); \ + ALLOC(ptr[0],type,num1_*num2_); \ + for (cnt_=1; cnt_<num1_; ++cnt_) \ + ptr[cnt_]=ptr[cnt_-1]+num2_; \ + } while(0) +#define DEALLOC2D(ptr) \ + do { if(ptr) DEALLOC((ptr)[0]); DEALLOC(ptr); } while(0) + +#define FAPPROX(a,b,eps) \ + (fabs((a)-(b))<((eps)*fabs(b))) +#define ABSAPPROX(a,b,eps) \ + (fabs((a)-(b))<(eps)) +#define IMAX(a,b) \ + (((a)>(b)) ? (a) : (b)) +#define IMIN(a,b) \ + (((a)<(b)) ? (a) : (b)) + +#define SWAP(a,b,type) \ + do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0) + +#define CHECK_STACK_ALIGN(align) \ + do { \ + FLOAT foo; \ + UTIL_WARN((((size_t)(&foo))&(align-1))==0, \ + "WARNING: stack not sufficiently aligned!"); \ + } while(0) + +#ifdef __cplusplus +} +#endif + +#endif
diff -r 000000000000 -r d7215946c769 fftpack.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fftpack.c Sun Oct 11 07:20:35 2015 +0000 @@ -0,0 +1,834 @@ +/* + * This file is part of libfftpack. + * + * libfftpack is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * libfftpack is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libfftpack; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik + * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt + * (DLR). + */ + +/* + fftpack.c : A set of FFT routines in C. + Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber + (Version 4, 1985). + + C port by Martin Reinecke (2010) + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include "mytype.h" +#include "fftpack.h" + +#define WA(x,i) wa[(i)+(x)*ido] +#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))] +#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))] +#define PM(a,b,c,d) { a=c+d; b=c-d; } +#define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; } +#define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; } +#define SCALEC(a,b) { a.r*=b; a.i*=b; } +#define CONJFLIPC(a) { FLOAT tmp_=a.r; a.r=-a.i; a.i=tmp_; } +/* (a+ib) = conj(c+id) * (e+if) */ +#define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; } + +typedef struct { + FLOAT r,i; +} cmplx; + +#define CONCAT(a,b) a ## b + +#define X(arg) CONCAT(passb,arg) +#define BACKWARD +#include "fftpack_inc.inc" +#undef BACKWARD +#undef X + +#define X(arg) CONCAT(passf,arg) +#include "fftpack_inc.inc" +#undef X + +#undef CC +#undef CH +#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))] +#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))] + +static void radf2 (size_t ido, size_t l1, const FLOAT *cc, FLOAT *ch, + const FLOAT *wa) + { + const size_t cdim=2; + size_t i, k, ic; + FLOAT ti2, tr2; + + for (k=0; k<l1; k++) + PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1)) + if ((ido&1)==0) + for (k=0; k<l1; k++) + { + CH( 0,1,k) = -CC(ido-1,k,1); + CH(ido-1,0,k) = CC(ido-1,k,0); + } + if (ido<=2) return; + for (k=0; k<l1; k++) + for (i=2; i<ido; i+=2) + { + ic=ido-i; + MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1)) + PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2) + PM (CH(i ,0,k),CH(ic ,1,k),ti2,CC(i ,k,0)) + } + } + +static void radf3(size_t ido, size_t l1, const FLOAT *cc, FLOAT *ch, + const FLOAT *wa) + { + const size_t cdim=3; + static const FLOAT taur=-0.5, taui=0.86602540378443864676; + size_t i, k, ic; + FLOAT ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3; + + for (k=0; k<l1; k++) + { + cr2=CC(0,k,1)+CC(0,k,2); + CH(0,0,k) = CC(0,k,0)+cr2; + CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1)); + CH(ido-1,1,k) = CC(0,k,0)+taur*cr2; + } + if (ido==1) return; + for (k=0; k<l1; k++) + for (i=2; i<ido; i+=2) + { + ic=ido-i; + MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1)) + MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2)) + cr2=dr2+dr3; + ci2=di2+di3; + CH(i-1,0,k) = CC(i-1,k,0)+cr2; + CH(i ,0,k) = CC(i ,k,0)+ci2; + tr2 = CC(i-1,k,0)+taur*cr2; + ti2 = CC(i ,k,0)+taur*ci2; + tr3 = taui*(di2-di3); + ti3 = taui*(dr3-dr2); + PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3) + PM(CH(i ,2,k),CH(ic ,1,k),ti3,ti2) + } + } + +static void radf4(size_t ido, size_t l1, const FLOAT *cc, FLOAT *ch, + const FLOAT *wa) + { + const size_t cdim=4; + static const FLOAT hsqt2=0.70710678118654752440; + size_t i, k, ic; + FLOAT ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; + + for (k=0; k<l1; k++) + { + PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1)) + PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2)) + PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1) + } + if ((ido&1)==0) + for (k=0; k<l1; k++) + { + ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3)); + tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3)); + PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1) + PM (CH( 0,3,k),CH( 0,1,k),ti1,CC(ido-1,k,2)) + } + if (ido<=2) return; + for (k=0; k<l1; k++) + for (i=2; i<ido; i+=2) + { + ic=ido-i; + MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1)) + MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2)) + MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3)) + PM(tr1,tr4,cr4,cr2) + PM(ti1,ti4,ci2,ci4) + PM(tr2,tr3,CC(i-1,k,0),cr3) + PM(ti2,ti3,CC(i ,k,0),ci3) + PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1) + PM(CH(i ,0,k),CH(ic ,3,k),ti1,ti2) + PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4) + PM(CH(i ,2,k),CH(ic ,1,k),tr4,ti3) + } + } + +static void radf5(size_t ido, size_t l1, const FLOAT *cc, FLOAT *ch, + const FLOAT *wa) + { + const size_t cdim=5; + static const FLOAT tr11= 0.3090169943749474241, ti11=0.95105651629515357212, + tr12=-0.8090169943749474241, ti12=0.58778525229247312917; + size_t i, k, ic; + FLOAT ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3, + dr4, dr5, cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5; + + for (k=0; k<l1; k++) + { + PM (cr2,ci5,CC(0,k,4),CC(0,k,1)) + PM (cr3,ci4,CC(0,k,3),CC(0,k,2)) + CH(0,0,k)=CC(0,k,0)+cr2+cr3; + CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3; + CH(0,2,k)=ti11*ci5+ti12*ci4; + CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3; + CH(0,4,k)=ti12*ci5-ti11*ci4; + } + if (ido==1) return; + for (k=0; k<l1;++k) + for (i=2; i<ido; i+=2) + { + ic=ido-i; + MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1)) + MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2)) + MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3)) + MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4)) + PM(cr2,ci5,dr5,dr2) + PM(ci2,cr5,di2,di5) + PM(cr3,ci4,dr4,dr3) + PM(ci3,cr4,di3,di4) + CH(i-1,0,k)=CC(i-1,k,0)+cr2+cr3; + CH(i ,0,k)=CC(i ,k,0)+ci2+ci3; + tr2=CC(i-1,k,0)+tr11*cr2+tr12*cr3; + ti2=CC(i ,k,0)+tr11*ci2+tr12*ci3; + tr3=CC(i-1,k,0)+tr12*cr2+tr11*cr3; + ti3=CC(i ,k,0)+tr12*ci2+tr11*ci3; + MULPM(tr5,tr4,cr5,cr4,ti11,ti12) + MULPM(ti5,ti4,ci5,ci4,ti11,ti12) + PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5) + PM(CH(i ,2,k),CH(ic ,1,k),ti5,ti2) + PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4) + PM(CH(i ,4,k),CH(ic ,3,k),ti4,ti3) + } + } + +#undef CH +#undef CC +#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))] +#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))] +#define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))] +#define C2(a,b) cc[(a)+idl1*(b)] +#define CH2(a,b) ch[(a)+idl1*(b)] +static void radfg(size_t ido, size_t ip, size_t l1, size_t idl1, + FLOAT *cc, FLOAT *ch, const FLOAT *wa) + { + const size_t cdim=ip; + static const FLOAT twopi=6.28318530717958647692; + size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik; + FLOAT ai1, ai2, ar1, ar2, arg; + FLOAT *csarr; + size_t aidx; + + ipph=(ip+1)/ 2; + if(ido!=1) + { + memcpy(ch,cc,idl1*sizeof(FLOAT)); + + for(j=1; j<ip; j++) + for(k=0; k<l1; k++) + { + CH(0,k,j)=C1(0,k,j); + idij=(j-1)*ido+1; + for(i=2; i<ido; i+=2,idij+=2) + MULPM(CH(i-1,k,j),CH(i,k,j),wa[idij-1],wa[idij],C1(i-1,k,j),C1(i,k,j)) + } + + for(j=1,jc=ip-1; j<ipph; j++,jc--) + for(k=0; k<l1; k++) + for(i=2; i<ido; i+=2) + { + PM(C1(i-1,k,j),C1(i ,k,jc),CH(i-1,k,jc),CH(i-1,k,j )) + PM(C1(i ,k,j),C1(i-1,k,jc),CH(i ,k,j ),CH(i ,k,jc)) + } + } + else + memcpy(cc,ch,idl1*sizeof(FLOAT)); + + for(j=1,jc=ip-1; j<ipph; j++,jc--) + for(k=0; k<l1; k++) + PM(C1(0,k,j),C1(0,k,jc),CH(0,k,jc),CH(0,k,j)) + + csarr=RALLOC(FLOAT,2*ip); + arg=twopi / ip; + csarr[0]=1.; + csarr[1]=0.; + csarr[2]=csarr[2*ip-2]=cos(arg); + csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3]; + for (i=2; i<=ip/2; ++i) + { + csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg); + csarr[2*i+1]=sin(i*arg); + csarr[2*ip-2*i+1]=-csarr[2*i+1]; + } + for(l=1,lc=ip-1; l<ipph; l++,lc--) + { + ar1=csarr[2*l]; + ai1=csarr[2*l+1]; + for(ik=0; ik<idl1; ik++) + { + CH2(ik,l)=C2(ik,0)+ar1*C2(ik,1); + CH2(ik,lc)=ai1*C2(ik,ip-1); + } + aidx=2*l; + for(j=2,jc=ip-2; j<ipph; j++,jc--) + { + aidx+=2*l; + if (aidx>=2*ip) aidx-=2*ip; + ar2=csarr[aidx]; + ai2=csarr[aidx+1]; + for(ik=0; ik<idl1; ik++) + { + CH2(ik,l )+=ar2*C2(ik,j ); + CH2(ik,lc)+=ai2*C2(ik,jc); + } + } + } + DEALLOC(csarr); + + for(j=1; j<ipph; j++) + for(ik=0; ik<idl1; ik++) + CH2(ik,0)+=C2(ik,j); + + for(k=0; k<l1; k++) + memcpy(&CC(0,0,k),&CH(0,k,0),ido*sizeof(FLOAT)); + for(j=1; j<ipph; j++) + { + jc=ip-j; + j2=2*j; + for(k=0; k<l1; k++) + { + CC(ido-1,j2-1,k) = CH(0,k,j ); + CC(0 ,j2 ,k) = CH(0,k,jc); + } + } + if(ido==1) return; + + for(j=1; j<ipph; j++) + { + jc=ip-j; + j2=2*j; + for(k=0; k<l1; k++) + for(i=2; i<ido; i+=2) + { + ic=ido-i; + PM (CC(i-1,j2,k),CC(ic-1,j2-1,k),CH(i-1,k,j ),CH(i-1,k,jc)) + PM (CC(i ,j2,k),CC(ic ,j2-1,k),CH(i ,k,jc),CH(i ,k,j )) + } + } + } + +#undef CC +#undef CH +#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))] +#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))] + +static void radb2(size_t ido, size_t l1, const FLOAT *cc, FLOAT *ch, + const FLOAT *wa) + { + const size_t cdim=2; + size_t i, k, ic; + FLOAT ti2, tr2; + + for (k=0; k<l1; k++) + PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k)) + if ((ido&1)==0) + for (k=0; k<l1; k++) + { + CH(ido-1,k,0) = 2*CC(ido-1,0,k); + CH(ido-1,k,1) = -2*CC(0 ,1,k); + } + if (ido<=2) return; + for (k=0; k<l1;++k) + for (i=2; i<ido; i+=2) + { + ic=ido-i; + PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k)) + PM (ti2,CH(i ,k,0),CC(i ,0,k),CC(ic ,1,k)) + MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2) + } + } + +static void radb3(size_t ido, size_t l1, const FLOAT *cc, FLOAT *ch, + const FLOAT *wa) + { + const size_t cdim=3; + static const FLOAT taur=-0.5, taui=0.86602540378443864676; + size_t i, k, ic; + FLOAT ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2; + + for (k=0; k<l1; k++) + { + tr2=2*CC(ido-1,1,k); + cr2=CC(0,0,k)+taur*tr2; + CH(0,k,0)=CC(0,0,k)+tr2; + ci3=2*taui*CC(0,2,k); + PM (CH(0,k,2),CH(0,k,1),cr2,ci3); + } + if (ido==1) return; + for (k=0; k<l1; k++) + for (i=2; i<ido; i+=2) + { + ic=ido-i; + tr2=CC(i-1,2,k)+CC(ic-1,1,k); + ti2=CC(i ,2,k)-CC(ic ,1,k); + cr2=CC(i-1,0,k)+taur*tr2; + ci2=CC(i ,0,k)+taur*ti2; + CH(i-1,k,0)=CC(i-1,0,k)+tr2; + CH(i ,k,0)=CC(i ,0,k)+ti2; + cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k)); + ci3=taui*(CC(i ,2,k)+CC(ic ,1,k)); + PM(dr3,dr2,cr2,ci3) + PM(di2,di3,ci2,cr3) + MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2) + MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3) + } + } + +static void radb4(size_t ido, size_t l1, const FLOAT *cc, FLOAT *ch, + const FLOAT *wa) + { + const size_t cdim=4; + static const FLOAT sqrt2=1.41421356237309504880; + size_t i, k, ic; + FLOAT ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4; + + for (k=0; k<l1; k++) + { + PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k)) + tr3=2*CC(ido-1,1,k); + tr4=2*CC(0,2,k); + PM (CH(0,k,0),CH(0,k,2),tr2,tr3) + PM (CH(0,k,3),CH(0,k,1),tr1,tr4) + } + if ((ido&1)==0) + for (k=0; k<l1; k++) + { + PM (ti1,ti2,CC(0 ,3,k),CC(0 ,1,k)) + PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k)) + CH(ido-1,k,0)=tr2+tr2; + CH(ido-1,k,1)=sqrt2*(tr1-ti1); + CH(ido-1,k,2)=ti2+ti2; + CH(ido-1,k,3)=-sqrt2*(tr1+ti1); + } + if (ido<=2) return; + for (k=0; k<l1;++k) + for (i=2; i<ido; i+=2) + { + ic=ido-i; + PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k)) + PM (ti1,ti2,CC(i ,0,k),CC(ic ,3,k)) + PM (tr4,ti3,CC(i ,2,k),CC(ic ,1,k)) + PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k)) + PM (CH(i-1,k,0),cr3,tr2,tr3) + PM (CH(i ,k,0),ci3,ti2,ti3) + PM (cr4,cr2,tr1,tr4) + PM (ci2,ci4,ti1,ti4) + MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2) + MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3) + MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4) + } + } + +static void radb5(size_t ido, size_t l1, const FLOAT *cc, FLOAT *ch, + const FLOAT *wa) + { + const size_t cdim=5; + static const FLOAT tr11= 0.3090169943749474241, ti11=0.95105651629515357212, + tr12=-0.8090169943749474241, ti12=0.58778525229247312917; + size_t i, k, ic; + FLOAT ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, + ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5; + + for (k=0; k<l1; k++) + { + ti5=2*CC(0,2,k); + ti4=2*CC(0,4,k); + tr2=2*CC(ido-1,1,k); + tr3=2*CC(ido-1,3,k); + CH(0,k,0)=CC(0,0,k)+tr2+tr3; + cr2=CC(0,0,k)+tr11*tr2+tr12*tr3; + cr3=CC(0,0,k)+tr12*tr2+tr11*tr3; + MULPM(ci5,ci4,ti5,ti4,ti11,ti12) + PM(CH(0,k,4),CH(0,k,1),cr2,ci5) + PM(CH(0,k,3),CH(0,k,2),cr3,ci4) + } + if (ido==1) return; + for (k=0; k<l1;++k) + for (i=2; i<ido; i+=2) + { + ic=ido-i; + PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k)) + PM(ti5,ti2,CC(i ,2,k),CC(ic ,1,k)) + PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k)) + PM(ti4,ti3,CC(i ,4,k),CC(ic ,3,k)) + CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3; + CH(i ,k,0)=CC(i ,0,k)+ti2+ti3; + cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3; + ci2=CC(i ,0,k)+tr11*ti2+tr12*ti3; + cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3; + ci3=CC(i ,0,k)+tr12*ti2+tr11*ti3; + MULPM(cr5,cr4,tr5,tr4,ti11,ti12) + MULPM(ci5,ci4,ti5,ti4,ti11,ti12) + PM(dr4,dr3,cr3,ci4) + PM(di3,di4,ci3,cr4) + PM(dr5,dr2,cr2,ci5) + PM(di2,di5,ci2,cr5) + MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2) + MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3) + MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4) + MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5) + } + } + +static void radbg(size_t ido, size_t ip, size_t l1, size_t idl1, + FLOAT *cc, FLOAT *ch, const FLOAT *wa) + { + const size_t cdim=ip; + static const FLOAT twopi=6.28318530717958647692; + size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik; + FLOAT ai1, ai2, ar1, ar2, arg; + FLOAT *csarr; + size_t aidx; + + ipph=(ip+1)/ 2; + for(k=0; k<l1; k++) + memcpy(&CH(0,k,0),&CC(0,0,k),ido*sizeof(FLOAT)); + for(j=1; j<ipph; j++) + { + jc=ip-j; + j2=2*j; + for(k=0; k<l1; k++) + { + CH(0,k,j )=2*CC(ido-1,j2-1,k); + CH(0,k,jc)=2*CC(0 ,j2 ,k); + } + } + + if(ido!=1) + for(j=1,jc=ip-1; j<ipph; j++,jc--) + for(k=0; k<l1; k++) + for(i=2; i<ido; i+=2) + { + ic=ido-i; + PM (CH(i-1,k,j ),CH(i-1,k,jc),CC(i-1,2*j,k),CC(ic-1,2*j-1,k)) + PM (CH(i ,k,jc),CH(i ,k,j ),CC(i ,2*j,k),CC(ic ,2*j-1,k)) + } + + csarr=RALLOC(FLOAT,2*ip); + arg=twopi/ip; + csarr[0]=1.; + csarr[1]=0.; + csarr[2]=csarr[2*ip-2]=cos(arg); + csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3]; + for (i=2; i<=ip/2; ++i) + { + csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg); + csarr[2*i+1]=sin(i*arg); + csarr[2*ip-2*i+1]=-csarr[2*i+1]; + } + for(l=1; l<ipph; l++) + { + lc=ip-l; + ar1=csarr[2*l]; + ai1=csarr[2*l+1]; + for(ik=0; ik<idl1; ik++) + { + C2(ik,l)=CH2(ik,0)+ar1*CH2(ik,1); + C2(ik,lc)=ai1*CH2(ik,ip-1); + } + aidx=2*l; + for(j=2; j<ipph; j++) + { + jc=ip-j; + aidx+=2*l; + if (aidx>=2*ip) aidx-=2*ip; + ar2=csarr[aidx]; + ai2=csarr[aidx+1]; + for(ik=0; ik<idl1; ik++) + { + C2(ik,l )+=ar2*CH2(ik,j ); + C2(ik,lc)+=ai2*CH2(ik,jc); + } + } + } + DEALLOC(csarr); + + for(j=1; j<ipph; j++) + for(ik=0; ik<idl1; ik++) + CH2(ik,0)+=CH2(ik,j); + + for(j=1,jc=ip-1; j<ipph; j++,jc--) + for(k=0; k<l1; k++) + PM (CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc)) + + if(ido==1) + return; + for(j=1,jc=ip-1; j<ipph; j++,jc--) + for(k=0; k<l1; k++) + for(i=2; i<ido; i+=2) + { + PM (CH(i-1,k,jc),CH(i-1,k,j ),C1(i-1,k,j),C1(i ,k,jc)) + PM (CH(i ,k,j ),CH(i ,k,jc),C1(i ,k,j),C1(i-1,k,jc)) + } + memcpy(cc,ch,idl1*sizeof(FLOAT)); + + for(j=1; j<ip; j++) + for(k=0; k<l1; k++) + { + C1(0,k,j)=CH(0,k,j); + idij=(j-1)*ido+1; + for(i=2; i<ido; i+=2,idij+=2) + MULPM (C1(i,k,j),C1(i-1,k,j),wa[idij-1],wa[idij],CH(i,k,j),CH(i-1,k,j)) + } + } + +#undef CC +#undef CH +#undef PM +#undef MULPM + + +/*---------------------------------------------------------------------- + cfftf1, cfftb1, cfftf, cfftb, cffti1, cffti. Complex FFTs. + ----------------------------------------------------------------------*/ + +static void cfft1(size_t n, cmplx c[], cmplx ch[], const cmplx wa[], + const size_t ifac[], int isign) + { + size_t k1, l1=1, nf=ifac[1], iw=0; + cmplx *p1=c, *p2=ch; + + for(k1=0; k1<nf; k1++) + { + size_t ip=ifac[k1+2]; + size_t l2=ip*l1; + size_t ido = n/l2; + if(ip==4) + (isign>0) ? passb4(ido, l1, p1, p2, wa+iw) + : passf4(ido, l1, p1, p2, wa+iw); + else if(ip==2) + (isign>0) ? passb2(ido, l1, p1, p2, wa+iw) + : passf2(ido, l1, p1, p2, wa+iw); + else if(ip==3) + (isign>0) ? passb3(ido, l1, p1, p2, wa+iw) + : passf3(ido, l1, p1, p2, wa+iw); + else if(ip==5) + (isign>0) ? passb5(ido, l1, p1, p2, wa+iw) + : passf5(ido, l1, p1, p2, wa+iw); + else if(ip==6) + (isign>0) ? passb6(ido, l1, p1, p2, wa+iw) + : passf6(ido, l1, p1, p2, wa+iw); + else + (isign>0) ? passbg(ido, ip, l1, p1, p2, wa+iw) + : passfg(ido, ip, l1, p1, p2, wa+iw); + SWAP(p1,p2,cmplx *); + l1=l2; + iw+=(ip-1)*ido; + } + if (p1!=c) + memcpy (c,p1,n*sizeof(cmplx)); + } + +void cfftf(size_t n, FLOAT c[], FLOAT wsave[]) + { + if (n!=1) + cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n), + (size_t*)(wsave+4*n),-1); + } + +void cfftb(size_t n, FLOAT c[], FLOAT wsave[]) + { + if (n!=1) + cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n), + (size_t*)(wsave+4*n),+1); + } + +static void factorize (size_t n, const size_t *pf, size_t npf, size_t *ifac) + { + size_t nl=n, nf=0, ntry=0, j=0, i; + +startloop: + j++; + ntry = (j<=npf) ? pf[j-1] : ntry+2; + do + { + size_t nq=nl / ntry; + size_t nr=nl-ntry*nq; + if (nr!=0) + goto startloop; + nf++; + ifac[nf+1]=ntry; + nl=nq; + if ((ntry==2) && (nf!=1)) + { + for (i=nf+1; i>2; --i) + ifac[i]=ifac[i-1]; + ifac[2]=2; + } + } + while(nl!=1); + ifac[0]=n; + ifac[1]=nf; + } + +static void cffti1(size_t n, FLOAT wa[], size_t ifac[]) + { + static const size_t ntryh[5]={4,6,3,2,5}; + static const FLOAT twopi=6.28318530717958647692; + size_t j, k, fi; + + FLOAT argh=twopi/n; + size_t i=0, l1=1; + factorize (n,ntryh,5,ifac); + for(k=1; k<=ifac[1]; k++) + { + size_t ip=ifac[k+1]; + size_t ido=n/(l1*ip); + for(j=1; j<ip; j++) + { + size_t is = i; + FLOAT argld=j*l1*argh; + wa[i ]=1; + wa[i+1]=0; + for(fi=1; fi<=ido; fi++) + { + FLOAT arg=fi*argld; + i+=2; + wa[i ]=cos(arg); + wa[i+1]=sin(arg); + } + if(ip>6) + { + wa[is ]=wa[i ]; + wa[is+1]=wa[i+1]; + } + } + l1*=ip; + } + } + +void cffti(size_t n, FLOAT wsave[]) + { if (n!=1) cffti1(n, wsave+2*n,(size_t*)(wsave+4*n)); } + + +/*---------------------------------------------------------------------- + rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Real FFTs. + ----------------------------------------------------------------------*/ + +static void rfftf1(size_t n, FLOAT c[], FLOAT ch[], const FLOAT wa[], + const size_t ifac[]) + { + size_t k1, l1=n, nf=ifac[1], iw=n-1; + FLOAT *p1=ch, *p2=c; + + for(k1=1; k1<=nf;++k1) + { + size_t ip=ifac[nf-k1+2]; + size_t ido=n / l1; + l1 /= ip; + iw-=(ip-1)*ido; + SWAP (p1,p2,FLOAT *); + if(ip==4) + radf4(ido, l1, p1, p2, wa+iw); + else if(ip==2) + radf2(ido, l1, p1, p2, wa+iw); + else if(ip==3) + radf3(ido, l1, p1, p2, wa+iw); + else if(ip==5) + radf5(ido, l1, p1, p2, wa+iw); + else + { + if (ido==1) + SWAP (p1,p2,FLOAT *); + radfg(ido, ip, l1, ido*l1, p1, p2, wa+iw); + SWAP (p1,p2,FLOAT *); + } + } + if (p1==c) + memcpy (c,ch,n*sizeof(FLOAT)); + } + +static void rfftb1(size_t n, FLOAT c[], FLOAT ch[], const FLOAT wa[], + const size_t ifac[]) + { + size_t k1, l1=1, nf=ifac[1], iw=0; + FLOAT *p1=c, *p2=ch; + + for(k1=1; k1<=nf; k1++) + { + size_t ip = ifac[k1+1], + ido= n/(ip*l1); + if(ip==4) + radb4(ido, l1, p1, p2, wa+iw); + else if(ip==2) + radb2(ido, l1, p1, p2, wa+iw); + else if(ip==3) + radb3(ido, l1, p1, p2, wa+iw); + else if(ip==5) + radb5(ido, l1, p1, p2, wa+iw); + else + { + radbg(ido, ip, l1, ido*l1, p1, p2, wa+iw); + if (ido!=1) + SWAP (p1,p2,FLOAT *); + } + SWAP (p1,p2,FLOAT *); + l1*=ip; + iw+=(ip-1)*ido; + } + if (p1!=c) + memcpy (c,ch,n*sizeof(FLOAT)); + } + +void rfftf(size_t n, FLOAT r[], FLOAT wsave[]) + { if(n!=1) rfftf1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); } + +void rfftb(size_t n, FLOAT r[], FLOAT wsave[]) + { if(n!=1) rfftb1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); } + +static void rffti1(size_t n, FLOAT wa[], size_t ifac[]) + { + static const size_t ntryh[4]={4,2,3,5}; + static const FLOAT twopi=6.28318530717958647692; + size_t i, j, k, fi; + + FLOAT argh=twopi/n; + size_t is=0, l1=1; + factorize (n,ntryh,4,ifac); + for (k=1; k<ifac[1]; k++) + { + size_t ip=ifac[k+1], + ido=n/(l1*ip); + for (j=1; j<ip; ++j) + { + FLOAT argld=j*l1*argh; + for(i=is,fi=1; i<=ido+is-3; i+=2,++fi) + { + FLOAT arg=fi*argld; + wa[i ]=cos(arg); + wa[i+1]=sin(arg); + } + is+=ido; + } + l1*=ip; + } + } + +void rffti(size_t n, FLOAT wsave[]) + { if (n!=1) rffti1(n, wsave+n,(size_t*)(wsave+2*n)); }
diff -r 000000000000 -r d7215946c769 fftpack.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fftpack.h Sun Oct 11 07:20:35 2015 +0000 @@ -0,0 +1,64 @@ +/* + * This file is part of libfftpack. + * + * libfftpack is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * libfftpack is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libfftpack; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik + * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt + * (DLR). + */ + +/* + fftpack.h : function declarations for fftpack.c + Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber + (Version 4, 1985). + + Pekka Janhunen 23.2.1995 + + (reformatted by joerg arndt) + + reformatted and slightly enhanced by Martin Reinecke (2004) + */ + +#ifndef PLANCK_FFTPACK_H +#define PLANCK_FFTPACK_H + +#include "c_utils.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/*! forward complex transform */ +void cfftf(size_t N, FLOAT complex_data[], FLOAT wrk[]); +/*! backward complex transform */ +void cfftb(size_t N, FLOAT complex_data[], FLOAT wrk[]); +/*! initializer for complex transforms */ +void cffti(size_t N, FLOAT wrk[]); + +/*! forward real transform */ +void rfftf(size_t N, FLOAT data[], FLOAT wrk[]); +/*! backward real transform */ +void rfftb(size_t N, FLOAT data[], FLOAT wrk[]); +/*! initializer for real transforms */ +void rffti(size_t N, FLOAT wrk[]); + +#ifdef __cplusplus +} +#endif + +#endif
diff -r 000000000000 -r d7215946c769 fftpack_inc.inc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fftpack_inc.inc Sun Oct 11 07:20:35 2015 +0000 @@ -0,0 +1,305 @@ +/* + * This file is part of libfftpack. + * + * libfftpack is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * libfftpack is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libfftpack; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik + * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt + * (DLR). + */ + +/* + fftpack.c : A set of FFT routines in C. + Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber + (Version 4, 1985). + + C port by Martin Reinecke (2010) + */ + +#ifdef BACKWARD +#define PSIGN + +#define PMSIGNC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; } +/* a = b*c */ +#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; } +#else +#define PSIGN - +#define PMSIGNC(a,b,c,d) { a.r=c.r-d.r; a.i=c.i-d.i; b.r=c.r+d.r; b.i=c.i+d.i; } +/* a = conj(b)*c */ +#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; } +#endif + +static void X(2) (size_t ido,size_t l1,const cmplx *cc,cmplx *ch,const cmplx *wa) + { + const size_t cdim=2; + size_t k,i; + cmplx t; + if (ido==1) + for (k=0;k<l1;++k) + PMC (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(0,1,k)) + else + for (k=0;k<l1;++k) + for (i=0;i<ido;++i) + { + PMC (CH(i,k,0),t,CC(i,0,k),CC(i,1,k)) + MULPMSIGNC (CH(i,k,1),WA(0,i),t) + } + } + +static void X(3)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch, + const cmplx *wa) + { + const size_t cdim=3; + static const FLOAT taur=-0.5, taui= PSIGN 0.86602540378443864676; + size_t i, k; + cmplx c2, c3, d2, d3, t2; + + if (ido==1) + for (k=0; k<l1; ++k) + { + PMC (t2,c3,CC(0,1,k),CC(0,2,k)) + ADDC (CH(0,k,0),t2,CC(0,0,k)) + SCALEC(t2,taur) + ADDC(c2,CC(0,0,k),t2) + SCALEC(c3,taui) + CONJFLIPC(c3) + PMC(CH(0,k,1),CH(0,k,2),c2,c3) + } + else + for (k=0; k<l1; ++k) + for (i=0; i<ido; ++i) + { + PMC (t2,c3,CC(i,1,k),CC(i,2,k)) + ADDC (CH(i,k,0),t2,CC(i,0,k)) + SCALEC(t2,taur) + ADDC(c2,CC(i,0,k),t2) + SCALEC(c3,taui) + CONJFLIPC(c3) + PMC(d2,d3,c2,c3) + MULPMSIGNC(CH(i,k,1),WA(0,i),d2) + MULPMSIGNC(CH(i,k,2),WA(1,i),d3) + } + } + +static void X(4)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch, + const cmplx *wa) + { + const size_t cdim=4; + size_t i, k; + cmplx c2, c3, c4, t1, t2, t3, t4; + + if (ido==1) + for (k=0; k<l1; ++k) + { + PMC(t2,t1,CC(0,0,k),CC(0,2,k)) + PMC(t3,t4,CC(0,1,k),CC(0,3,k)) + CONJFLIPC(t4) + PMC(CH(0,k,0),CH(0,k,2),t2,t3) + PMSIGNC (CH(0,k,1),CH(0,k,3),t1,t4) + } + else + for (k=0; k<l1; ++k) + for (i=0; i<ido; ++i) + { + PMC(t2,t1,CC(i,0,k),CC(i,2,k)) + PMC(t3,t4,CC(i,1,k),CC(i,3,k)) + CONJFLIPC(t4) + PMC(CH(i,k,0),c3,t2,t3) + PMSIGNC (c2,c4,t1,t4) + MULPMSIGNC (CH(i,k,1),WA(0,i),c2) + MULPMSIGNC (CH(i,k,2),WA(1,i),c3) + MULPMSIGNC (CH(i,k,3),WA(2,i),c4) + } + } + +static void X(5)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch, + const cmplx *wa) + { + const size_t cdim=5; + static const FLOAT tr11= 0.3090169943749474241, + ti11= PSIGN 0.95105651629515357212, + tr12=-0.8090169943749474241, + ti12= PSIGN 0.58778525229247312917; + size_t i, k; + cmplx c2, c3, c4, c5, d2, d3, d4, d5, t2, t3, t4, t5; + + if (ido==1) + for (k=0; k<l1; ++k) + { + PMC (t2,t5,CC(0,1,k),CC(0,4,k)) + PMC (t3,t4,CC(0,2,k),CC(0,3,k)) + CH(0,k,0).r=CC(0,0,k).r+t2.r+t3.r; + CH(0,k,0).i=CC(0,0,k).i+t2.i+t3.i; + c2.r=CC(0,0,k).r+tr11*t2.r+tr12*t3.r; + c2.i=CC(0,0,k).i+tr11*t2.i+tr12*t3.i; + c3.r=CC(0,0,k).r+tr12*t2.r+tr11*t3.r; + c3.i=CC(0,0,k).i+tr12*t2.i+tr11*t3.i; + c5.r=ti11*t5.r+ti12*t4.r; + c5.i=ti11*t5.i+ti12*t4.i; + c4.r=ti12*t5.r-ti11*t4.r; + c4.i=ti12*t5.i-ti11*t4.i; + CONJFLIPC(c5) + PMC(CH(0,k,1),CH(0,k,4),c2,c5) + CONJFLIPC(c4) + PMC(CH(0,k,2),CH(0,k,3),c3,c4) + } + else + for (k=0; k<l1; ++k) + for (i=0; i<ido; ++i) + { + PMC (t2,t5,CC(i,1,k),CC(i,4,k)) + PMC (t3,t4,CC(i,2,k),CC(i,3,k)) + CH(i,k,0).r=CC(i,0,k).r+t2.r+t3.r; + CH(i,k,0).i=CC(i,0,k).i+t2.i+t3.i; + c2.r=CC(i,0,k).r+tr11*t2.r+tr12*t3.r; + c2.i=CC(i,0,k).i+tr11*t2.i+tr12*t3.i; + c3.r=CC(i,0,k).r+tr12*t2.r+tr11*t3.r; + c3.i=CC(i,0,k).i+tr12*t2.i+tr11*t3.i; + c5.r=ti11*t5.r+ti12*t4.r; + c5.i=ti11*t5.i+ti12*t4.i; + c4.r=ti12*t5.r-ti11*t4.r; + c4.i=ti12*t5.i-ti11*t4.i; + CONJFLIPC(c5) + PMC(d2,d5,c2,c5) + CONJFLIPC(c4) + PMC(d3,d4,c3,c4) + MULPMSIGNC (CH(i,k,1),WA(0,i),d2) + MULPMSIGNC (CH(i,k,2),WA(1,i),d3) + MULPMSIGNC (CH(i,k,3),WA(2,i),d4) + MULPMSIGNC (CH(i,k,4),WA(3,i),d5) + } + } + +static void X(6)(size_t ido, size_t l1, const cmplx *cc, cmplx *ch, + const cmplx *wa) + { + const size_t cdim=6; + static const FLOAT taui= PSIGN 0.86602540378443864676; + cmplx ta1,ta2,ta3,a0,a1,a2,tb1,tb2,tb3,b0,b1,b2,d1,d2,d3,d4,d5; + size_t i, k; + + if (ido==1) + for (k=0; k<l1; ++k) + { + PMC(ta1,ta3,CC(0,2,k),CC(0,4,k)) + ta2.r = CC(0,0,k).r - .5*ta1.r; + ta2.i = CC(0,0,k).i - .5*ta1.i; + SCALEC(ta3,taui) + ADDC(a0,CC(0,0,k),ta1) + CONJFLIPC(ta3) + PMC(a1,a2,ta2,ta3) + PMC(tb1,tb3,CC(0,5,k),CC(0,1,k)) + tb2.r = CC(0,3,k).r - .5*tb1.r; + tb2.i = CC(0,3,k).i - .5*tb1.i; + SCALEC(tb3,taui) + ADDC(b0,CC(0,3,k),tb1) + CONJFLIPC(tb3) + PMC(b1,b2,tb2,tb3) + PMC(CH(0,k,0),CH(0,k,3),a0,b0) + PMC(CH(0,k,4),CH(0,k,1),a1,b1) + PMC(CH(0,k,2),CH(0,k,5),a2,b2) + } + else + for (k=0; k<l1; ++k) + for (i=0; i<ido; ++i) + { + PMC(ta1,ta3,CC(i,2,k),CC(i,4,k)) + ta2.r = CC(i,0,k).r - .5*ta1.r; + ta2.i = CC(i,0,k).i - .5*ta1.i; + SCALEC(ta3,taui) + ADDC(a0,CC(i,0,k),ta1) + CONJFLIPC(ta3) + PMC(a1,a2,ta2,ta3) + PMC(tb1,tb3,CC(i,5,k),CC(i,1,k)) + tb2.r = CC(i,3,k).r - .5*tb1.r; + tb2.i = CC(i,3,k).i - .5*tb1.i; + SCALEC(tb3,taui) + ADDC(b0,CC(i,3,k),tb1) + CONJFLIPC(tb3) + PMC(b1,b2,tb2,tb3) + PMC(CH(i,k,0),d3,a0,b0) + PMC(d4,d1,a1,b1) + PMC(d2,d5,a2,b2) + MULPMSIGNC (CH(i,k,1),WA(0,i),d1) + MULPMSIGNC (CH(i,k,2),WA(1,i),d2) + MULPMSIGNC (CH(i,k,3),WA(2,i),d3) + MULPMSIGNC (CH(i,k,4),WA(3,i),d4) + MULPMSIGNC (CH(i,k,5),WA(4,i),d5) + } + } + +static void X(g)(size_t ido, size_t ip, size_t l1, const cmplx *cc, cmplx *ch, + const cmplx *wa) + { + const size_t cdim=ip; + cmplx *tarr=RALLOC(cmplx,2*ip); + cmplx *ccl=tarr, *wal=tarr+ip; + size_t i,j,k,l,jc,lc; + size_t ipph = (ip+1)/2; + + for (i=1; i<ip; ++i) + wal[i]=wa[ido*(i-1)]; + for (k=0; k<l1; ++k) + for (i=0; i<ido; ++i) + { + cmplx s=CC(i,0,k); + ccl[0] = CC(i,0,k); + for(j=1,jc=ip-1; j<ipph; ++j,--jc) + { + PMC (ccl[j],ccl[jc],CC(i,j,k),CC(i,jc,k)) + ADDC (s,s,ccl[j]) + } + CH(i,k,0) = s; + for (j=1, jc=ip-1; j<=ipph; ++j,--jc) + { + cmplx abr=ccl[0], abi={0.,0.}; + size_t iang=0; + for (l=1,lc=ip-1; l<ipph; ++l,--lc) + { + iang+=j; + if (iang>ip) iang-=ip; + abr.r += ccl[l ].r*wal[iang].r; + abr.i += ccl[l ].i*wal[iang].r; + abi.r += ccl[lc].r*wal[iang].i; + abi.i += ccl[lc].i*wal[iang].i; + } +#ifndef BACKWARD + { abi.i=-abi.i; abi.r=-abi.r; } +#endif + CONJFLIPC(abi) + PMC(CH(i,k,j),CH(i,k,jc),abr,abi) + } + } + + DEALLOC(tarr); + + if (ido==1) return; + + for (j=1; j<ip; ++j) + for (k=0; k<l1; ++k) + { + size_t idij=(j-1)*ido+1; + for(i=1; i<ido; ++i, ++idij) + { + cmplx t=CH(i,k,j); + MULPMSIGNC (CH(i,k,j),wa[idij],t) + } + } + } + +#undef PSIGN +#undef PMSIGNC +#undef MULPMSIGNC
diff -r 000000000000 -r d7215946c769 mytype.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mytype.h Sun Oct 11 07:20:35 2015 +0000 @@ -0,0 +1,2 @@ + +#define FLOAT float