static void transform(ComplexMatrix &inout, const std::vector<int> &loop_dims_select, unsigned fftw_flags, bool forward) { if (os::disable_SSE_for_FFTW()) { fftw_flags |= FFTW_UNALIGNED; // see os.h } int num_dims, num_loop_dims; fftw_iodim dims[16], loop_dims[16]; make_iodims(inout.getShape(), loop_dims_select, num_dims, dims, num_loop_dims, loop_dims); ComplexMatrix::accessor inout_acc(inout); double *re = inout_acc.ptr_real(); double *im = inout_acc.ptr_imag(); if (!forward) { std::swap(re, im); } fftw_plan plan = fftw_plan_guru_split_dft( num_dims, dims, num_loop_dims, loop_dims, re, im, // in re, im, // out fftw_flags ); assert(plan); fftw_execute_split_dft(plan, re, im, re, im); fftw_destroy_plan(plan); }
void InvFST_semi_memo(double *rcoeffs, double *icoeffs, double *rdata, double *idata, int bw, double **transpose_seminaive_naive_table, double *workspace, int dataformat, int cutoff, fftw_plan *idctPlan, fftw_plan *ifftPlan ) { int size, m, i, n; double *rdataptr, *idataptr; double *rfourdata, *ifourdata; double *rinvfltres, *iminvfltres, *scratchpad; double *sin_values, *eval_pts; double tmpA ; size = 2*bw ; rfourdata = workspace; /* needs (size * size) */ ifourdata = rfourdata + (size * size); /* needs (size * size) */ rinvfltres = ifourdata + (size * size); /* needs (2 * bw) */ iminvfltres = rinvfltres + (2 * bw); /* needs (2 * bw) */ sin_values = iminvfltres + (2 * bw); /* needs (2 * bw) */ eval_pts = sin_values + (2 * bw); /* needs (2 * bw) */ scratchpad = eval_pts + (2 * bw); /* needs (2 * bw) */ /* total workspace = (8 * bw^2) + (10 * bw) */ /* load up the sin_values array */ n = 2*bw; ArcCosEvalPts(n, eval_pts); for (i=0; i<n; i++) sin_values[i] = sin(eval_pts[i]); /* Now do all of the inverse Legendre transforms */ rdataptr = rcoeffs; idataptr = icoeffs; for (m=0; m<bw; m++) { /* fprintf(stderr,"m = %d\n",m); */ if(m < cutoff) { /* do real part first */ InvSemiNaiveReduced(rdataptr, bw, m, rinvfltres, transpose_seminaive_naive_table[m], sin_values, scratchpad, idctPlan ); /* now do imaginary part */ InvSemiNaiveReduced(idataptr, bw, m, iminvfltres, transpose_seminaive_naive_table[m], sin_values, scratchpad, idctPlan); /* will store normal, then tranpose before doing inverse fft */ memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size); memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size); /* move to next set of coeffs */ rdataptr += bw-m; idataptr += bw-m; } else { /* first do the real part */ Naive_SynthesizeX(rdataptr, bw, m, rinvfltres, transpose_seminaive_naive_table[m]); /* now do the imaginary */ Naive_SynthesizeX(idataptr, bw, m, iminvfltres, transpose_seminaive_naive_table[m]); /* will store normal, then tranpose before doing inverse fft */ memcpy(rfourdata+(m*size), rinvfltres, sizeof(double) * size); memcpy(ifourdata+(m*size), iminvfltres, sizeof(double) * size); /* move to next set of coeffs */ rdataptr += bw-m; idataptr += bw-m; } } /* closes m loop */ /* now fill in zero values where m = bw (from problem definition) */ memset(rfourdata + (bw * size), 0, sizeof(double) * size); memset(ifourdata + (bw * size), 0, sizeof(double) * size); /* now if the data is real, we don't have to compute the coefficients whose order is less than 0, i.e. since the data is real, we know that invf-hat(l,-m) = conjugate(invf-hat(l,m)), so use that to get the rest of the real data dataformat =0 -> samples are complex, =1 -> samples real */ if(dataformat == 0){ /* now do negative m values */ for (m=bw+1; m<size; m++) { /* fprintf(stderr,"m = %d\n",-(size-m)); */ if ( (size-m) < cutoff ) { /* do real part first */ InvSemiNaiveReduced(rdataptr, bw, size - m, rinvfltres, transpose_seminaive_naive_table[size - m], sin_values, scratchpad, idctPlan); /* now do imaginary part */ InvSemiNaiveReduced(idataptr, bw, size - m, iminvfltres, transpose_seminaive_naive_table[size - m], sin_values, scratchpad, idctPlan ); /* will store normal, then tranpose before doing inverse fft */ if ((m % 2) != 0) for(i=0; i< size; i++){ rinvfltres[i] = -rinvfltres[i]; iminvfltres[i] = -iminvfltres[i]; } memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size); memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size); /* move to next set of coeffs */ rdataptr += bw-(size-m); idataptr += bw-(size-m); } else { /* first do the real part */ Naive_SynthesizeX(rdataptr, bw, size-m, rinvfltres, transpose_seminaive_naive_table[size-m]); /* now do the imaginary */ Naive_SynthesizeX(idataptr, bw, size-m, iminvfltres, transpose_seminaive_naive_table[size-m]); /* will store normal, then tranpose before doing inverse fft */ if ((m % 2) != 0) for(i=0; i< size; i++){ rinvfltres[i] = -rinvfltres[i]; iminvfltres[i] = -iminvfltres[i]; } memcpy(rfourdata + (m*size), rinvfltres, sizeof(double) * size); memcpy(ifourdata + (m*size), iminvfltres, sizeof(double) * size); /* move to next set of coeffs */ rdataptr += bw-(size-m); idataptr += bw-(size-m); } } /* closes m loop */ } else { for(m = bw + 1; m < size; m++){ memcpy(rfourdata+(m*size), rfourdata+((size-m)*size), sizeof(double) * size); memcpy(ifourdata+(m*size), ifourdata+((size-m)*size), sizeof(double) * size); for(i = 0; i < size; i++) ifourdata[(m*size)+i] *= -1.0; } } /* normalize */ tmpA = 1./(sqrt(2.*M_PI) ); for(i=0;i<4*bw*bw;i++) { rfourdata[i] *= tmpA ; ifourdata[i] *= tmpA ; } fftw_execute_split_dft( *ifftPlan, ifourdata, rfourdata, idata, rdata ); /* amscray */ }
void FST_semi_memo(double *rdata, double *idata, double *rcoeffs, double *icoeffs, int bw, double **seminaive_naive_table, double *workspace, int dataformat, int cutoff, fftw_plan *dctPlan, fftw_plan *fftPlan, double *weights ) { int size, m, i, j; int dummy0, dummy1 ; double *rres, *ires; double *rdataptr, *idataptr; double *fltres, *scratchpad; double *eval_pts; double pow_one; double tmpA, tmpSize ; size = 2*bw ; tmpSize = 1./ ((double ) size); tmpA = sqrt( 2. * M_PI ) ; rres = workspace; /* needs (size * size) = (4 * bw^2) */ ires = rres + (size * size); /* needs (size * size) = (4 * bw^2) */ fltres = ires + (size * size) ; /* needs bw */ eval_pts = fltres + bw; /* needs (2*bw) */ scratchpad = eval_pts + (2*bw); /* needs (4 * bw) */ /* total workspace is (8 * bw^2) + (7 * bw) */ /* do the FFTs along phi */ fftw_execute_split_dft( *fftPlan, rdata, idata, rres, ires ) ; /* normalize note that I'm getting the sqrt(2pi) in there at this point ... to account for the fact that the spherical harmonics are of norm 1: I need to account for the fact that the associated Legendres are of norm 1 */ tmpSize *= tmpA ; for( j = 0 ; j < size*size ; j ++ ) { rres[j] *= tmpSize ; ires[j] *= tmpSize ; } /* point to start of output data buffers */ rdataptr = rcoeffs; idataptr = icoeffs; for (m=0; m<bw; m++) { /* fprintf(stderr,"m = %d\n",m); */ /*** test to see if before cutoff or after ***/ if (m < cutoff){ /* do the real part */ SemiNaiveReduced(rres+(m*size), bw, m, fltres, scratchpad, seminaive_naive_table[m], weights, dctPlan); /* now load real part of coefficients into output space */ memcpy(rdataptr, fltres, sizeof(double) * (bw - m)); rdataptr += bw-m; /* do imaginary part */ SemiNaiveReduced(ires+(m*size), bw, m, fltres, scratchpad, seminaive_naive_table[m], weights, dctPlan); /* now load imaginary part of coefficients into output space */ memcpy(idataptr, fltres, sizeof(double) * (bw - m)); idataptr += bw-m; } else{ /* do real part */ Naive_AnalysisX(rres+(m*size), bw, m, weights, fltres, seminaive_naive_table[m], scratchpad ); memcpy(rdataptr, fltres, sizeof(double) * (bw - m)); rdataptr += bw-m; /* do imaginary part */ Naive_AnalysisX(ires+(m*size), bw, m, weights, fltres, seminaive_naive_table[m], scratchpad ); memcpy(idataptr, fltres, sizeof(double) * (bw - m)); idataptr += bw-m; } } /*** now do upper coefficients ****/ /* now if the data is real, we don't have to compute the coefficients whose order is less than 0, i.e. since the data is real, we know that f-hat(l,-m) = (-1)^m * conjugate(f-hat(l,m)), so use that to get the rest of the coefficients dataformat =0 -> samples are complex, =1 -> samples real */ if( dataformat == 0 ){ /* note that m is greater than bw here, but this is for purposes of indexing the input data arrays. The "true" value of m as a parameter for Pml is size - m */ for (m=bw+1; m<size; m++) { /* fprintf(stderr,"m = %d\n",-(size-m)); */ if ( (size-m) < cutoff ) { /* do real part */ SemiNaiveReduced(rres+(m*size), bw, size-m, fltres, scratchpad, seminaive_naive_table[size-m], weights, dctPlan ); /* now load real part of coefficients into output space */ if ((m % 2) != 0) { for (i=0; i<m-bw; i++) rdataptr[i] = -fltres[i]; } else { memcpy(rdataptr, fltres, sizeof(double) * (m - bw)); } rdataptr += m-bw; /* do imaginary part */ SemiNaiveReduced(ires+(m*size), bw, size-m, fltres, scratchpad, seminaive_naive_table[size-m], weights, dctPlan); /* now load imag part of coefficients into output space */ if ((m % 2) != 0) { for (i=0; i<m-bw; i++) idataptr[i] = -fltres[i]; } else { memcpy(idataptr, fltres, sizeof(double) * (m - bw)); } idataptr += m-bw; } else { Naive_AnalysisX(rres+(m*size), bw, size-m, weights, fltres, seminaive_naive_table[size-m], scratchpad); /* now load real part of coefficients into output space */ if ((m % 2) != 0) { for (i=0; i<m-bw; i++) rdataptr[i] = -fltres[i]; } else { memcpy(rdataptr, fltres, sizeof(double) * (m - bw)); } rdataptr += m-bw; /* do imaginary part */ Naive_AnalysisX(ires+(m*size), bw, size-m, weights, fltres, seminaive_naive_table[size-m], scratchpad); /* now load imag part of coefficients into output space */ if ((m % 2) != 0) { for (i=0; i<m-bw; i++) idataptr[i] = -fltres[i]; } else { memcpy(idataptr, fltres, sizeof(double) * (m - bw)); } idataptr += m-bw; } } } else /**** if the data is real ****/ { pow_one = 1.0; for(i = 1; i < bw; i++){ pow_one *= -1.0; for( j = i; j < bw; j++){ /* SEANINDEXP(dummy0,i,j,bw); SEANINDEXN(dummy1,-i,j,bw); */ dummy0 = seanindex(i, j, bw); dummy1 = seanindex(-i, j, bw); rcoeffs[dummy1] = pow_one * rcoeffs[dummy0]; icoeffs[dummy1] = -pow_one * icoeffs[dummy0]; } } } }