static void transform(ComplexMatrix &inout, const std::vector<int> &loop_dims_select, unsigned fftw_flags, bool forward) { if (os::disable_SSE_for_FFTW()) { fftw_flags |= FFTW_UNALIGNED; // see os.h } int num_dims, num_loop_dims; fftw_iodim dims[16], loop_dims[16]; make_iodims(inout.getShape(), loop_dims_select, num_dims, dims, num_loop_dims, loop_dims); ComplexMatrix::accessor inout_acc(inout); double *re = inout_acc.ptr_real(); double *im = inout_acc.ptr_imag(); if (!forward) { std::swap(re, im); } fftw_plan plan = fftw_plan_guru_split_dft( num_dims, dims, num_loop_dims, loop_dims, re, im, // in re, im, // out fftw_flags ); assert(plan); fftw_execute_split_dft(plan, re, im, re, im); fftw_destroy_plan(plan); }
// Calling convention: // comp_filterbank(f,g,a); void mexFunction( int UNUSED(nlhs), mxArray *plhs[], int UNUSED(nrhs), const mxArray *prhs[] ) { static int atExitRegistered = 0; if(!atExitRegistered) { atExitRegistered = 1; mexAtExit(filterbankAtExit); } const mxArray* mxf = prhs[0]; const mxArray* mxg = prhs[1]; const mxArray* mxa = prhs[2]; // input data length const mwSize L = mxGetM(mxf); const mwSize W = mxGetN(mxf); // filter number const mwSize M = mxGetNumberOfElements(mxg); // a col count mwSize acols = mxGetN(mxa); // pointer to a double *a = (double*) mxGetData(mxa); if (acols > 1) { int isOnes = 1; for (mwIndex m = 0; m < M; m++) { isOnes = isOnes && a[M + m] == 1; } if (isOnes) { acols = 1; } } // Cell output plhs[0] = mxCreateCellMatrix(M, 1); // Stuff for sorting the filters mwSize tdCount = 0; mwSize fftCount = 0; mwSize fftblCount = 0; mwIndex tdArgsIdx[M]; mwIndex fftArgsIdx[M]; mwIndex fftblArgsIdx[M]; // WALK the filters to determine what has to be done for (mwIndex m = 0; m < M; m++) { mxArray * gEl = mxGetCell(mxg, m); if (mxGetField(gEl, 0, "h") != NULL) { tdArgsIdx[tdCount++] = m; continue; } if (mxGetField(gEl, 0, "H") != NULL) { if (acols == 1 && L == mxGetNumberOfElements(mxGetField(gEl, 0, "H"))) { fftArgsIdx[fftCount++] = m; continue; } else { fftblArgsIdx[fftblCount++] = m; continue; } } } if (tdCount > 0) { /* Here, we have to reformat the inputs and pick up results to comply with: c=comp_filterbank_td(f,g,a,offset,ext); BEWARE OF THE AUTOMATIC DEALLOCATION!! by the Matlab engine. Arrays can be very easily freed twice causing segfaults. This happends particulary when using mxCreateCell* which stores pointers to other mxArray structs. Setting all such pointers to NULL after they are used seems to solve it. */ mxArray* plhs_td[1]; mxArray* prhs_td[5]; prhs_td[0] = (mxArray*) mxf; prhs_td[1] = mxCreateCellMatrix(tdCount, 1); prhs_td[2] = mxCreateDoubleMatrix(tdCount, 1, mxREAL); double* aPtr = mxGetData(prhs_td[2]); prhs_td[3] = mxCreateDoubleMatrix(tdCount, 1, mxREAL); double* offsetPtr = mxGetData(prhs_td[3]); prhs_td[4] = mxCreateString("per"); for (mwIndex m = 0; m < tdCount; m++) { mxArray * gEl = mxGetCell(mxg, tdArgsIdx[m]); mxSetCell(prhs_td[1], m, mxGetField(gEl, 0, "h")); // This has overhead //mxSetCell((mxArray*)prhs_td[1],m,mxDuplicateArray(mxGetField(gEl,0,"h"))); aPtr[m] = a[tdArgsIdx[m]]; offsetPtr[m] = mxGetScalar(mxGetField(gEl, 0, "offset")); } // Finally call it! // comp_filterbank_td(1,plhs_td,5, prhs_td); // This has overhead: mexCallMATLAB(1, plhs_td, 5, prhs_td, "comp_filterbank_td"); // Copy pointers to a proper index in the output + unset all duplicate cell elements for (mwIndex m = 0; m < tdCount; m++) { mxSetCell(plhs[0], tdArgsIdx[m], mxGetCell(plhs_td[0], m)); mxSetCell(plhs_td[0], m, NULL); mxSetCell(prhs_td[1], m, NULL); } mxDestroyArray(plhs_td[0]); mxDestroyArray(prhs_td[1]); mxDestroyArray(prhs_td[2]); mxDestroyArray(prhs_td[3]); mxDestroyArray(prhs_td[4]); } if (fftCount > 0 || fftblCount > 0) { // Need to do FFT of mxf mwIndex ndim = 2; const mwSize dims[] = {L, W}; if (mxF == NULL || mxGetM(mxF) != L || mxGetN(mxF) != W || mxGetClassID(mxF) != mxGetClassID(mxf)) { if (mxF != NULL) { mxDestroyArray(mxF); mxF = NULL; // printf("Should be called just once\n"); } if (mxIsDouble(mxf)) { mxF = mxCreateNumericArray(ndim, dims, mxDOUBLE_CLASS, mxCOMPLEX); fftw_iodim fftw_dims[1]; fftw_iodim howmanydims[1]; fftw_dims[0].n = L; fftw_dims[0].is = 1; fftw_dims[0].os = 1; howmanydims[0].n = W; howmanydims[0].is = L; howmanydims[0].os = L; if (p_double == NULL) p_double = (fftw_plan*) malloc(sizeof(fftw_plan)); else fftw_destroy_plan(*p_double); // FFTW_MEASURE sometimes hangs here *p_double = fftw_plan_guru_split_dft( 1, fftw_dims, 1, howmanydims, mxGetData(mxF), mxGetImagData(mxF), mxGetData(mxF), mxGetImagData(mxF), FFTW_ESTIMATE); } else if (mxIsSingle(mxf)) { mxF = mxCreateNumericArray(ndim, dims, mxSINGLE_CLASS, mxCOMPLEX); // mexPrintf("M= %i, N= %i\n",mxGetM(mxF),mxGetN(mxF)); fftwf_iodim fftw_dims[1]; fftwf_iodim howmanydims[1]; fftw_dims[0].n = L; fftw_dims[0].is = 1; fftw_dims[0].os = 1; howmanydims[0].n = W; howmanydims[0].is = L; howmanydims[0].os = L; if (p_float == NULL) p_float = (fftwf_plan*) malloc(sizeof(fftwf_plan)); else fftwf_destroy_plan(*p_float); *p_float = fftwf_plan_guru_split_dft( 1, fftw_dims, 1, howmanydims, mxGetData(mxF), mxGetImagData(mxF), mxGetData(mxF), mxGetImagData(mxF), FFTW_ESTIMATE); } } if (mxIsDouble(mxf)) { memcpy(mxGetPr(mxF), mxGetPr(mxf), L * W * sizeof(double)); memset(mxGetPi(mxF), 0, L * W * sizeof(double)); if (mxIsComplex(mxf)) memcpy(mxGetPi(mxF), mxGetPi(mxf), L * W * sizeof(double)); fftw_execute(*p_double); } else if (mxIsSingle(mxf)) { memcpy(mxGetPr(mxF), mxGetPr(mxf), L * W * sizeof(float)); memset(mxGetPi(mxF), 0, L * W * sizeof(float)); if (mxIsComplex(mxf)) memcpy(mxGetPi(mxF), mxGetPi(mxf), L * W * sizeof(float)); fftwf_execute(*p_float); } } if (fftCount > 0) { mxArray* plhs_fft[1]; mxArray* prhs_fft[3]; prhs_fft[0] = mxF; prhs_fft[1] = mxCreateCellMatrix(fftCount, 1); prhs_fft[2] = mxCreateDoubleMatrix(fftCount, 1, mxREAL); double* aPtr = mxGetData(prhs_fft[2]); for (mwIndex m = 0; m < fftCount; m++) { mxArray * gEl = mxGetCell(mxg, fftArgsIdx[m]); mxSetCell(prhs_fft[1], m, mxGetField(gEl, 0, "H")); // This has overhead //mxSetCell((mxArray*)prhs_td[1],m,mxDuplicateArray(mxGetField(gEl,0,"h"))); aPtr[m] = a[fftArgsIdx[m]]; } //comp_filterbank_fft(1,plhs_fft,3, prhs_fft); mexCallMATLAB(1, plhs_fft, 3, prhs_fft, "comp_filterbank_fft"); for (mwIndex m = 0; m < fftCount; m++) { mxSetCell(plhs[0], fftArgsIdx[m], mxGetCell(plhs_fft[0], m)); mxSetCell(plhs_fft[0], m, NULL); mxSetCell(prhs_fft[1], m, NULL); } mxDestroyArray(plhs_fft[0]); mxDestroyArray(prhs_fft[1]); mxDestroyArray(prhs_fft[2]); } if (fftblCount > 0) { mxArray* plhs_fftbl[1]; mxArray* prhs_fftbl[5]; prhs_fftbl[0] = mxF; prhs_fftbl[1] = mxCreateCellMatrix(fftblCount, 1); prhs_fftbl[2] = mxCreateDoubleMatrix(fftblCount, 1, mxREAL); prhs_fftbl[3] = mxCreateDoubleMatrix(fftblCount, 2, mxREAL); prhs_fftbl[4] = mxCreateDoubleMatrix(fftblCount, 1, mxREAL); double* foffPtr = mxGetData(prhs_fftbl[2]); double* aPtr = mxGetData(prhs_fftbl[3]); double* realonlyPtr = mxGetData(prhs_fftbl[4]); // Set all realonly flags to zero memset(realonlyPtr, 0, fftblCount * sizeof * realonlyPtr); for (mwIndex m = 0; m < fftblCount; m++) { mxArray * gEl = mxGetCell(mxg, fftblArgsIdx[m]); mxSetCell(prhs_fftbl[1], m, mxGetField(gEl, 0, "H")); foffPtr[m] = mxGetScalar(mxGetField(gEl, 0, "foff")); aPtr[m] = a[fftblArgsIdx[m]]; if (acols > 1) aPtr[m + fftblCount] = a[fftblArgsIdx[m] + M]; else aPtr[m + fftblCount] = 1; // Only if realonly is specified mxArray* mxrealonly; if ((mxrealonly = mxGetField(gEl, 0, "realonly"))) realonlyPtr[m] = mxGetScalar(mxrealonly); } // comp_filterbank_fftbl(1,plhs_fftbl,5, prhs_fftbl); mexCallMATLAB(1, plhs_fftbl, 5, prhs_fftbl, "comp_filterbank_fftbl"); for (mwIndex m = 0; m < fftblCount; m++) { mxSetCell(plhs[0], fftblArgsIdx[m], mxGetCell(plhs_fftbl[0], m)); mxSetCell(plhs_fftbl[0], m, NULL); mxSetCell(prhs_fftbl[1], m, NULL); } mxDestroyArray(plhs_fftbl[0]); mxDestroyArray(prhs_fftbl[1]); mxDestroyArray(prhs_fftbl[2]); mxDestroyArray(prhs_fftbl[3]); mxDestroyArray(prhs_fftbl[4]); } if (mxF != NULL) mexMakeArrayPersistent(mxF); if (L * W > MAXARRAYLEN && mxF != NULL) { //printf("Damn. Should not get here\n"); mxDestroyArray(mxF); mxF = NULL; } }
/* Here's the big banana Convolves two functions defined on the 2-sphere. Uses seminaive algorithms for spherical harmonic transforms size = 2*bw Inputs: rdata, idata - (size * size) arrays containing real and imaginary parts of sampled function. rfilter, ifilter - (size * size) arrays containing real and imaginary parts of sampled filter function. rres, ires - (size * size) arrays containing real and imaginary parts of result function. Suggestion - if you want to do multiple convolutions, don't keep allocating and freeing space with every call, or keep recomputing the spharmonic_pml tables. Allocate workspace once before you call this function, then just set up pointers as first step of this procedure rather than mallocing. And do the same with the FST, FZT, and InvFST functions. ASSUMPTIONS: 1. data is strictly REAL 2. will do semi-naive algorithm for ALL orders -> change the cutoff value if you want it to be different Memory requirements for Conv2Sphere Need space for spharmonic tables and local workspace and scratchpad space for FST_semi Let legendreSize = Reduced_Naive_TableSize(bw,cutoff) + Reduced_SpharmonicTableSize(bw,cutoff) Then the workspace needs to be this large: 2 * legendreSize + 8 * (bw*bw) + 10*bw + 4 * (bw*bw) + 2*bw for a total of 2 * legendreSize + 12 * (bw*bw) + 12*bw ; */ void Conv2Sphere_semi_memo(double *rdata, double *idata, double *rfilter, double *ifilter, double *rres, double *ires, int bw, double *workspace) { int size, spharmonic_bound ; int legendreSize, cutoff ; double *frres, *fires, *filtrres, *filtires, *trres, *tires; double **spharmonic_pml_table, **transpose_spharmonic_pml_table; double *spharmonic_result_space, *transpose_spharmonic_result_space; double *scratchpad; /* fftw */ int rank, howmany_rank ; fftw_iodim dims[1], howmany_dims[1]; /* forward transform stuff */ fftw_plan dctPlan, fftPlan ; double *weights ; /* inverse transform stuff */ fftw_plan idctPlan, ifftPlan ; size =2*bw ; cutoff = bw ; legendreSize = Reduced_Naive_TableSize(bw,cutoff) + Reduced_SpharmonicTableSize(bw,cutoff) ; /* assign space */ spharmonic_bound = legendreSize ; spharmonic_result_space = workspace; /* needs legendreSize */ transpose_spharmonic_result_space = spharmonic_result_space + legendreSize ; /* needs legendreSize */ frres = transpose_spharmonic_result_space + legendreSize ; /* needs (bw*bw) */ fires = frres + (bw*bw); /* needs (bw*bw) */ trres = fires + (bw*bw); /* needs (bw*bw) */ tires = trres + (bw*bw); /* needs (bw*bw) */ filtrres = tires + (bw*bw); /* needs bw */ filtires = filtrres + bw; /* needs bw */ scratchpad = filtires + bw; /* needs (8*bw^2)+(10*bw) */ /* allocate space, and compute, the weights for this bandwidth */ weights = (double *) malloc(sizeof(double) * 4 * bw); makeweights( bw, weights ); /* make the fftw plans */ /* make DCT plans -> note that I will be using the GURU interface to execute these plans within the routines*/ /* forward DCT */ dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT10, FFTW_ESTIMATE ) ; /* inverse DCT */ idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT01, FFTW_ESTIMATE ); /* fft "preamble" ; note that this plan places the output in a transposed array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 1 ; dims[0].os = 2*bw ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 2*bw ; howmany_dims[0].os = 1 ; /* forward fft */ fftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* now plan for inverse fft - note that this plans assumes that I'm working with a transposed array, e.g. the inputs for a length 2*bw transform are placed every 2*bw apart, the output will be consecutive entries in the array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 2*bw ; dims[0].os = 1 ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 1 ; howmany_dims[0].os = 2*bw ; /* inverse fft */ ifftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* precompute the associated Legendre fcts */ spharmonic_pml_table = Spharmonic_Pml_Table(bw, spharmonic_result_space, scratchpad); transpose_spharmonic_pml_table = Transpose_Spharmonic_Pml_Table(spharmonic_pml_table, bw, transpose_spharmonic_result_space, scratchpad); FST_semi_memo(rdata, idata, frres, fires, bw, spharmonic_pml_table, scratchpad, 1, bw, &dctPlan, &fftPlan, weights ); FZT_semi_memo(rfilter, ifilter, filtrres, filtires, bw, spharmonic_pml_table[0], scratchpad, 1, &dctPlan, weights ); TransMult(frres, fires, filtrres, filtires, trres, tires, bw); InvFST_semi_memo(trres, tires, rres, ires, bw, transpose_spharmonic_pml_table, scratchpad, 1, bw, &idctPlan, &ifftPlan ); free( weights ) ; /*** have to free the memory that was allocated in Spharmonic_Pml_Table() and Transpose_Spharmonic_Pml_Table() ***/ free(spharmonic_pml_table); free(transpose_spharmonic_pml_table); /* destroy plans */ fftw_destroy_plan( ifftPlan ) ; fftw_destroy_plan( fftPlan ) ; fftw_destroy_plan( idctPlan ) ; fftw_destroy_plan( dctPlan ) ; }
int main(int argc, char **argv) { FILE *errorsfp; int i, j, bw, size, loops; int l, m, dummy, cutoff ; int rank, howmany_rank ; double *rcoeffs, *icoeffs, *rdata, *idata, *rresult, *iresult; double *workspace, *weights; double dumx, dumy ; double *relerror, *curmax, granderror, grandrelerror; double realtmp, imagtmp,origmag, tmpmag; double ave_error, ave_relerror, stddev_error, stddev_relerror; double total_time, for_time, inv_time; double tstart, tstop; time_t seed; fftw_plan dctPlan, idctPlan ; fftw_plan fftPlan, ifftPlan ; fftw_iodim dims[1], howmany_dims[1]; if (argc < 3) { fprintf(stdout,"Usage: test_s2_semi_fly bw loops [error_file]\n"); exit(0); } bw = atoi(argv[1]); loops = atoi(argv[2]); /*** ASSUMING WILL SEMINAIVE ALL ORDERS ***/ cutoff = bw ; size = 2*bw; total_time = 0.0; for_time = 0.0; inv_time = 0.0; granderror = 0.0; grandrelerror = 0.0; /* allocate memory */ rcoeffs = (double *) malloc(sizeof(double) * (bw * bw)); icoeffs = (double *) malloc(sizeof(double) * (bw * bw)); rdata = (double *) malloc(sizeof(double) * (size * size)); idata = (double *) malloc(sizeof(double) * (size * size)); rresult = (double *) malloc(sizeof(double) * (bw * bw)); iresult = (double *) malloc(sizeof(double) * (bw * bw)); workspace = (double *) malloc(sizeof(double) * ((10 * (bw*bw)) + (24 * bw))); /** space for errors **/ relerror = (double *) malloc(sizeof(double) * loops); curmax = (double *) malloc(sizeof(double) * loops); /* make array for weights */ weights = (double *) malloc(sizeof(double) * 4 * bw); /**** At this point, check to see if all the memory has been allocated. If it has not, there's no point in going further. ****/ if ( (rdata == NULL) || (idata == NULL) || (rresult == NULL) || (iresult == NULL) || (rcoeffs == NULL) || (icoeffs == NULL) || (workspace == NULL) || (weights == NULL) ) { perror("Error in allocating memory"); exit( 1 ) ; } /*** generate a seed, needed to generate random data ***/ time(&seed); srand48( seed ); /* construct fftw plans */ /* make DCT plans -> note that I will be using the GURU interface to execute these plans within the routines*/ /* forward DCT */ dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT10, FFTW_ESTIMATE ) ; /* inverse DCT */ idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT01, FFTW_ESTIMATE ); /* fftw "preamble" ; note that this plan places the output in a transposed array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 1 ; dims[0].os = 2*bw ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 2*bw ; howmany_dims[0].os = 1 ; /* forward fft */ fftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* now plan for inverse fft - note that this plans assumes that I'm working with a transposed array, e.g. the inputs for a length 2*bw transform are placed every 2*bw apart, the output will be consecutive entries in the array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 2*bw ; dims[0].os = 1 ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 1 ; howmany_dims[0].os = 2*bw ; /* inverse fft */ ifftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* now make the weights */ makeweights( bw, weights ); /* now start the looping */ fprintf(stdout,"about to enter loop\n\n"); for(i=0; i<loops; i++){ /**** loop to generate spherical harmonic coefficients of a real-valued function *****/ for(m=0;m<bw;m++) for(l=m;l<bw;l++){ dumx = 2.0 * (drand48()-0.5); dumy = 2.0 * (drand48()-0.5); dummy = seanindex(m,l,bw); rcoeffs[dummy] = dumx; icoeffs[dummy] = dumy; dummy = seanindex(-m,l,bw); rcoeffs[dummy] = ((double) pow(-1.0, (double) m)) * dumx; icoeffs[dummy] = ((double) pow(-1.0, (double) (m + 1))) * dumy; } /* have to zero out the m=0 coefficients, since those are real */ for(m=0;m<bw;m++) icoeffs[m] = 0.0; /* do the inverse spherical transform */ tstart = csecond(); InvFST_semi_fly(rcoeffs,icoeffs, rdata, idata, bw, workspace, 1, cutoff, &idctPlan, &ifftPlan ); tstop = csecond(); inv_time += (tstop - tstart); fprintf(stdout,"inv time \t = %.4e\n", tstop - tstart); /* now do the forward spherical transform */ tstart = csecond(); FST_semi_fly(rdata, idata, rresult, iresult, bw, workspace, 1, cutoff, &dctPlan, &fftPlan, weights ) ; tstop = csecond(); for_time += (tstop - tstart); fprintf(stdout,"forward time \t = %.4e\n", tstop - tstart); /* now to compute the error */ relerror[i] = 0.0; curmax[i] = 0.0; for(j=0;j<(bw*bw);j++){ realtmp = rresult[j]-rcoeffs[j]; imagtmp = iresult[j]-icoeffs[j]; origmag = sqrt((rcoeffs[j]*rcoeffs[j]) + (icoeffs[j]*icoeffs[j])); tmpmag = sqrt((realtmp*realtmp) + (imagtmp*imagtmp)); relerror[i] = max(relerror[i],tmpmag/(origmag + pow(10.0, -50.0))); curmax[i] = max(curmax[i],tmpmag); } fprintf(stdout,"r-o error\t = %.12f\n", curmax[i]); fprintf(stdout,"(r-o)/o error\t = %.12f\n\n", relerror[i]); granderror += curmax[i]; grandrelerror += relerror[i]; } total_time = inv_time + for_time; ave_error = granderror / ( (double) loops ); ave_relerror = grandrelerror / ( (double) loops ); stddev_error = 0.0 ; stddev_relerror = 0.0; for( i = 0 ; i < loops ; i ++ ) { stddev_error += pow( ave_error - curmax[i] , 2.0 ); stddev_relerror += pow( ave_relerror - relerror[i] , 2.0 ); } /*** this won't work if loops == 1 ***/ if( loops != 1 ) { stddev_error = sqrt(stddev_error / ( (double) (loops - 1) ) ); stddev_relerror = sqrt(stddev_relerror / ( (double) (loops - 1) ) ); } fprintf(stdout,"Program: test_s2_semi_fly\n"); fprintf(stdout,"Bandwidth = %d\n", bw); #ifndef WALLCLOCK fprintf(stdout,"Total elapsed cpu time :\t\t %.4e seconds.\n", total_time); fprintf(stdout,"Average cpu forward per iteration:\t %.4e seconds.\n", for_time/((double) loops)); fprintf(stdout,"Average cpu inverse per iteration:\t %.4e seconds.\n", inv_time/((double) loops)); #else fprintf(stdout,"Total elapsed wall time :\t\t %.4e seconds.\n", total_time); fprintf(stdout,"Average wall forward per iteration:\t %.4e seconds.\n", for_time/((double) loops)); fprintf(stdout,"Average wall inverse per iteration:\t %.4e seconds.\n", inv_time/((double) loops)); #endif fprintf(stdout,"Average r-o error:\t\t %.4e\t", granderror/((double) loops)); fprintf(stdout,"std dev: %.4e\n",stddev_error); fprintf(stdout,"Average (r-o)/o error:\t\t %.4e\t", grandrelerror/((double) loops)); fprintf(stdout,"std dev: %.4e\n\n",stddev_relerror); if (argc == 4) { errorsfp = fopen(argv[3],"w"); for(m = 0 ; m < bw ; m++ ) { for(l = m ; l< bw ; l++ ) { dummy = seanindex(m,l,bw); fprintf(errorsfp, "dummy = %d\t m = %d\tl = %d\t%.10f %.10f\n", dummy, m, l, fabs(rcoeffs[dummy] - rresult[dummy]), fabs(icoeffs[dummy] - iresult[dummy])); dummy = seanindex(-m,l,bw); fprintf(errorsfp, "dummy = %d\t m = %d\tl = %d\t%.10f %.10f\n", dummy, -m, l, fabs(rcoeffs[dummy] - rresult[dummy]), fabs(icoeffs[dummy] - iresult[dummy])); } } fclose(errorsfp); } /* destroy fftw plans */ fftw_destroy_plan( ifftPlan ); fftw_destroy_plan( fftPlan ); fftw_destroy_plan( idctPlan ); fftw_destroy_plan( dctPlan ); /* free memory */ free( weights ); free(curmax); free(relerror); free(workspace); free(iresult); free(rresult); free(idata); free(rdata); free(icoeffs); free(rcoeffs); return 0 ; }