void so3CombineCoef_fftw( int bwIn, int bwOut, int degLim, REAL *sigCoefR, REAL *sigCoefI, REAL *patCoefR, REAL *patCoefI, fftw_complex *so3Coef ) { int l, m1, m2 ; int fudge, dummy ; REAL tmpSigCoefR, tmpSigCoefI ; REAL tmpPatCoefR, tmpPatCoefI ; /* for sanity, set all so3coefs to 0 */ memset( so3Coef, 0, sizeof(fftw_complex) * totalCoeffs_so3( bwOut ) ); for( l = 0 ; l <= degLim ; l ++ ) { for( m1 = -l ; m1 <= l ; m1 ++ ) { /* grab signal coefficient, at this degree and order */ dummy = seanindex(-m1, l, bwIn) ; tmpSigCoefR = sigCoefR[ dummy ]; tmpSigCoefI = sigCoefI[ dummy ]; /* need to reset the -1 fudge factor */ if ( (m1 + l) % 2 ) fudge = -1 ; else fudge = 1 ; for( m2 = -l ; m2 <= l ; m2 ++ ) { dummy = seanindex( -m2, l, bwIn ); /* first take the CONJUGATE of the pattern coef and multiply it by the fudge factor */ tmpPatCoefR = fudge * patCoefR[ dummy ]; tmpPatCoefI = fudge * (-patCoefI[ dummy ]); /* now multiply the signal coef by the pattern coef, and save it in the so3 coefficient array */ dummy = so3CoefLoc( m1, m2, l, bwOut ) ; so3Coef[ dummy ][0] = tmpSigCoefR*tmpPatCoefR - tmpSigCoefI*tmpPatCoefI ; so3Coef[ dummy ][1] = tmpSigCoefR*tmpPatCoefI + tmpSigCoefI*tmpPatCoefR ; /* multiply fudge factor by -1 */ fudge *= -1 ; } } } }
void so3CombineCoef( int bwIn, int bwOut, int degLim, double *sigCoefR, double *sigCoefI, double *patCoefR, double *patCoefI, double *so3CoefR, double *so3CoefI ) { int l, m1, m2 ; int fudge, dummy ; double tmpSigCoefR, tmpSigCoefI ; double tmpPatCoefR, tmpPatCoefI ; double wigNorm ; /* for sanity, set all so3coefs to 0 */ memset( so3CoefR, 0, sizeof(double) * totalCoeffs_so3( bwOut ) ); memset( so3CoefI, 0, sizeof(double) * totalCoeffs_so3( bwOut ) ); for( l = 0 ; l <= degLim ; l ++ ) { wigNorm = 2.*M_PI*sqrt(2./(2.*l+1)) ; for( m1 = -l ; m1 <= l ; m1 ++ ) { /* grab signal coefficient, at this degree and order */ dummy = seanindex(-m1, l, bwIn) ; tmpSigCoefR = sigCoefR[ dummy ]; tmpSigCoefI = sigCoefI[ dummy ]; /* need to reset the -1 fudge factor */ if ( (m1 + l) % 2 ) fudge = -1 ; else fudge = 1 ; for( m2 = -l ; m2 <= l ; m2 ++ ) { dummy = seanindex( -m2, l, bwIn ); /* first take the CONJUGATE of the pattern coef and multiply it by the fudge factor */ tmpPatCoefR = fudge * patCoefR[ dummy ]; tmpPatCoefI = fudge * (-patCoefI[ dummy ]); /* now multiply the signal coef by the pattern coef, and save it in the so3 coefficient array */ dummy = so3CoefLoc( m1, m2, l, bwOut ) ; so3CoefR[ dummy ] = wigNorm * ( tmpSigCoefR*tmpPatCoefR - tmpSigCoefI*tmpPatCoefI ) ; so3CoefI[ dummy ] = wigNorm * ( tmpSigCoefR*tmpPatCoefI + tmpSigCoefI*tmpPatCoefR ) ; /* multiply fudge factor by -1 */ fudge *= -1 ; } } } }
void FST_semi_memo(double *rdata, double *idata, double *rcoeffs, double *icoeffs, int bw, double **seminaive_naive_table, double *workspace, int dataformat, int cutoff, fftw_plan *dctPlan, fftw_plan *fftPlan, double *weights ) { int size, m, i, j; int dummy0, dummy1 ; double *rres, *ires; double *rdataptr, *idataptr; double *fltres, *scratchpad; double *eval_pts; double pow_one; double tmpA, tmpSize ; size = 2*bw ; tmpSize = 1./ ((double ) size); tmpA = sqrt( 2. * M_PI ) ; rres = workspace; /* needs (size * size) = (4 * bw^2) */ ires = rres + (size * size); /* needs (size * size) = (4 * bw^2) */ fltres = ires + (size * size) ; /* needs bw */ eval_pts = fltres + bw; /* needs (2*bw) */ scratchpad = eval_pts + (2*bw); /* needs (4 * bw) */ /* total workspace is (8 * bw^2) + (7 * bw) */ /* do the FFTs along phi */ fftw_execute_split_dft( *fftPlan, rdata, idata, rres, ires ) ; /* normalize note that I'm getting the sqrt(2pi) in there at this point ... to account for the fact that the spherical harmonics are of norm 1: I need to account for the fact that the associated Legendres are of norm 1 */ tmpSize *= tmpA ; for( j = 0 ; j < size*size ; j ++ ) { rres[j] *= tmpSize ; ires[j] *= tmpSize ; } /* point to start of output data buffers */ rdataptr = rcoeffs; idataptr = icoeffs; for (m=0; m<bw; m++) { /* fprintf(stderr,"m = %d\n",m); */ /*** test to see if before cutoff or after ***/ if (m < cutoff){ /* do the real part */ SemiNaiveReduced(rres+(m*size), bw, m, fltres, scratchpad, seminaive_naive_table[m], weights, dctPlan); /* now load real part of coefficients into output space */ memcpy(rdataptr, fltres, sizeof(double) * (bw - m)); rdataptr += bw-m; /* do imaginary part */ SemiNaiveReduced(ires+(m*size), bw, m, fltres, scratchpad, seminaive_naive_table[m], weights, dctPlan); /* now load imaginary part of coefficients into output space */ memcpy(idataptr, fltres, sizeof(double) * (bw - m)); idataptr += bw-m; } else{ /* do real part */ Naive_AnalysisX(rres+(m*size), bw, m, weights, fltres, seminaive_naive_table[m], scratchpad ); memcpy(rdataptr, fltres, sizeof(double) * (bw - m)); rdataptr += bw-m; /* do imaginary part */ Naive_AnalysisX(ires+(m*size), bw, m, weights, fltres, seminaive_naive_table[m], scratchpad ); memcpy(idataptr, fltres, sizeof(double) * (bw - m)); idataptr += bw-m; } } /*** now do upper coefficients ****/ /* now if the data is real, we don't have to compute the coefficients whose order is less than 0, i.e. since the data is real, we know that f-hat(l,-m) = (-1)^m * conjugate(f-hat(l,m)), so use that to get the rest of the coefficients dataformat =0 -> samples are complex, =1 -> samples real */ if( dataformat == 0 ){ /* note that m is greater than bw here, but this is for purposes of indexing the input data arrays. The "true" value of m as a parameter for Pml is size - m */ for (m=bw+1; m<size; m++) { /* fprintf(stderr,"m = %d\n",-(size-m)); */ if ( (size-m) < cutoff ) { /* do real part */ SemiNaiveReduced(rres+(m*size), bw, size-m, fltres, scratchpad, seminaive_naive_table[size-m], weights, dctPlan ); /* now load real part of coefficients into output space */ if ((m % 2) != 0) { for (i=0; i<m-bw; i++) rdataptr[i] = -fltres[i]; } else { memcpy(rdataptr, fltres, sizeof(double) * (m - bw)); } rdataptr += m-bw; /* do imaginary part */ SemiNaiveReduced(ires+(m*size), bw, size-m, fltres, scratchpad, seminaive_naive_table[size-m], weights, dctPlan); /* now load imag part of coefficients into output space */ if ((m % 2) != 0) { for (i=0; i<m-bw; i++) idataptr[i] = -fltres[i]; } else { memcpy(idataptr, fltres, sizeof(double) * (m - bw)); } idataptr += m-bw; } else { Naive_AnalysisX(rres+(m*size), bw, size-m, weights, fltres, seminaive_naive_table[size-m], scratchpad); /* now load real part of coefficients into output space */ if ((m % 2) != 0) { for (i=0; i<m-bw; i++) rdataptr[i] = -fltres[i]; } else { memcpy(rdataptr, fltres, sizeof(double) * (m - bw)); } rdataptr += m-bw; /* do imaginary part */ Naive_AnalysisX(ires+(m*size), bw, size-m, weights, fltres, seminaive_naive_table[size-m], scratchpad); /* now load imag part of coefficients into output space */ if ((m % 2) != 0) { for (i=0; i<m-bw; i++) idataptr[i] = -fltres[i]; } else { memcpy(idataptr, fltres, sizeof(double) * (m - bw)); } idataptr += m-bw; } } } else /**** if the data is real ****/ { pow_one = 1.0; for(i = 1; i < bw; i++){ pow_one *= -1.0; for( j = i; j < bw; j++){ /* SEANINDEXP(dummy0,i,j,bw); SEANINDEXN(dummy1,-i,j,bw); */ dummy0 = seanindex(i, j, bw); dummy1 = seanindex(-i, j, bw); rcoeffs[dummy1] = pow_one * rcoeffs[dummy0]; icoeffs[dummy1] = -pow_one * icoeffs[dummy0]; } } } }
int main(int argc, char **argv) { FILE *errorsfp; int i, j, bw, size, loops; int l, m, dummy, cutoff ; int rank, howmany_rank ; double *rcoeffs, *icoeffs, *rdata, *idata, *rresult, *iresult; double *workspace, *weights; double dumx, dumy ; double *relerror, *curmax, granderror, grandrelerror; double realtmp, imagtmp,origmag, tmpmag; double ave_error, ave_relerror, stddev_error, stddev_relerror; double total_time, for_time, inv_time; double tstart, tstop; time_t seed; fftw_plan dctPlan, idctPlan ; fftw_plan fftPlan, ifftPlan ; fftw_iodim dims[1], howmany_dims[1]; if (argc < 3) { fprintf(stdout,"Usage: test_s2_semi_fly bw loops [error_file]\n"); exit(0); } bw = atoi(argv[1]); loops = atoi(argv[2]); /*** ASSUMING WILL SEMINAIVE ALL ORDERS ***/ cutoff = bw ; size = 2*bw; total_time = 0.0; for_time = 0.0; inv_time = 0.0; granderror = 0.0; grandrelerror = 0.0; /* allocate memory */ rcoeffs = (double *) malloc(sizeof(double) * (bw * bw)); icoeffs = (double *) malloc(sizeof(double) * (bw * bw)); rdata = (double *) malloc(sizeof(double) * (size * size)); idata = (double *) malloc(sizeof(double) * (size * size)); rresult = (double *) malloc(sizeof(double) * (bw * bw)); iresult = (double *) malloc(sizeof(double) * (bw * bw)); workspace = (double *) malloc(sizeof(double) * ((10 * (bw*bw)) + (24 * bw))); /** space for errors **/ relerror = (double *) malloc(sizeof(double) * loops); curmax = (double *) malloc(sizeof(double) * loops); /* make array for weights */ weights = (double *) malloc(sizeof(double) * 4 * bw); /**** At this point, check to see if all the memory has been allocated. If it has not, there's no point in going further. ****/ if ( (rdata == NULL) || (idata == NULL) || (rresult == NULL) || (iresult == NULL) || (rcoeffs == NULL) || (icoeffs == NULL) || (workspace == NULL) || (weights == NULL) ) { perror("Error in allocating memory"); exit( 1 ) ; } /*** generate a seed, needed to generate random data ***/ time(&seed); srand48( seed ); /* construct fftw plans */ /* make DCT plans -> note that I will be using the GURU interface to execute these plans within the routines*/ /* forward DCT */ dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT10, FFTW_ESTIMATE ) ; /* inverse DCT */ idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT01, FFTW_ESTIMATE ); /* fftw "preamble" ; note that this plan places the output in a transposed array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 1 ; dims[0].os = 2*bw ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 2*bw ; howmany_dims[0].os = 1 ; /* forward fft */ fftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* now plan for inverse fft - note that this plans assumes that I'm working with a transposed array, e.g. the inputs for a length 2*bw transform are placed every 2*bw apart, the output will be consecutive entries in the array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 2*bw ; dims[0].os = 1 ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 1 ; howmany_dims[0].os = 2*bw ; /* inverse fft */ ifftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* now make the weights */ makeweights( bw, weights ); /* now start the looping */ fprintf(stdout,"about to enter loop\n\n"); for(i=0; i<loops; i++){ /**** loop to generate spherical harmonic coefficients of a real-valued function *****/ for(m=0;m<bw;m++) for(l=m;l<bw;l++){ dumx = 2.0 * (drand48()-0.5); dumy = 2.0 * (drand48()-0.5); dummy = seanindex(m,l,bw); rcoeffs[dummy] = dumx; icoeffs[dummy] = dumy; dummy = seanindex(-m,l,bw); rcoeffs[dummy] = ((double) pow(-1.0, (double) m)) * dumx; icoeffs[dummy] = ((double) pow(-1.0, (double) (m + 1))) * dumy; } /* have to zero out the m=0 coefficients, since those are real */ for(m=0;m<bw;m++) icoeffs[m] = 0.0; /* do the inverse spherical transform */ tstart = csecond(); InvFST_semi_fly(rcoeffs,icoeffs, rdata, idata, bw, workspace, 1, cutoff, &idctPlan, &ifftPlan ); tstop = csecond(); inv_time += (tstop - tstart); fprintf(stdout,"inv time \t = %.4e\n", tstop - tstart); /* now do the forward spherical transform */ tstart = csecond(); FST_semi_fly(rdata, idata, rresult, iresult, bw, workspace, 1, cutoff, &dctPlan, &fftPlan, weights ) ; tstop = csecond(); for_time += (tstop - tstart); fprintf(stdout,"forward time \t = %.4e\n", tstop - tstart); /* now to compute the error */ relerror[i] = 0.0; curmax[i] = 0.0; for(j=0;j<(bw*bw);j++){ realtmp = rresult[j]-rcoeffs[j]; imagtmp = iresult[j]-icoeffs[j]; origmag = sqrt((rcoeffs[j]*rcoeffs[j]) + (icoeffs[j]*icoeffs[j])); tmpmag = sqrt((realtmp*realtmp) + (imagtmp*imagtmp)); relerror[i] = max(relerror[i],tmpmag/(origmag + pow(10.0, -50.0))); curmax[i] = max(curmax[i],tmpmag); } fprintf(stdout,"r-o error\t = %.12f\n", curmax[i]); fprintf(stdout,"(r-o)/o error\t = %.12f\n\n", relerror[i]); granderror += curmax[i]; grandrelerror += relerror[i]; } total_time = inv_time + for_time; ave_error = granderror / ( (double) loops ); ave_relerror = grandrelerror / ( (double) loops ); stddev_error = 0.0 ; stddev_relerror = 0.0; for( i = 0 ; i < loops ; i ++ ) { stddev_error += pow( ave_error - curmax[i] , 2.0 ); stddev_relerror += pow( ave_relerror - relerror[i] , 2.0 ); } /*** this won't work if loops == 1 ***/ if( loops != 1 ) { stddev_error = sqrt(stddev_error / ( (double) (loops - 1) ) ); stddev_relerror = sqrt(stddev_relerror / ( (double) (loops - 1) ) ); } fprintf(stdout,"Program: test_s2_semi_fly\n"); fprintf(stdout,"Bandwidth = %d\n", bw); #ifndef WALLCLOCK fprintf(stdout,"Total elapsed cpu time :\t\t %.4e seconds.\n", total_time); fprintf(stdout,"Average cpu forward per iteration:\t %.4e seconds.\n", for_time/((double) loops)); fprintf(stdout,"Average cpu inverse per iteration:\t %.4e seconds.\n", inv_time/((double) loops)); #else fprintf(stdout,"Total elapsed wall time :\t\t %.4e seconds.\n", total_time); fprintf(stdout,"Average wall forward per iteration:\t %.4e seconds.\n", for_time/((double) loops)); fprintf(stdout,"Average wall inverse per iteration:\t %.4e seconds.\n", inv_time/((double) loops)); #endif fprintf(stdout,"Average r-o error:\t\t %.4e\t", granderror/((double) loops)); fprintf(stdout,"std dev: %.4e\n",stddev_error); fprintf(stdout,"Average (r-o)/o error:\t\t %.4e\t", grandrelerror/((double) loops)); fprintf(stdout,"std dev: %.4e\n\n",stddev_relerror); if (argc == 4) { errorsfp = fopen(argv[3],"w"); for(m = 0 ; m < bw ; m++ ) { for(l = m ; l< bw ; l++ ) { dummy = seanindex(m,l,bw); fprintf(errorsfp, "dummy = %d\t m = %d\tl = %d\t%.10f %.10f\n", dummy, m, l, fabs(rcoeffs[dummy] - rresult[dummy]), fabs(icoeffs[dummy] - iresult[dummy])); dummy = seanindex(-m,l,bw); fprintf(errorsfp, "dummy = %d\t m = %d\tl = %d\t%.10f %.10f\n", dummy, -m, l, fabs(rcoeffs[dummy] - rresult[dummy]), fabs(icoeffs[dummy] - iresult[dummy])); } } fclose(errorsfp); } /* destroy fftw plans */ fftw_destroy_plan( ifftPlan ); fftw_destroy_plan( fftPlan ); fftw_destroy_plan( idctPlan ); fftw_destroy_plan( dctPlan ); /* free memory */ free( weights ); free(curmax); free(relerror); free(workspace); free(iresult); free(rresult); free(idata); free(rdata); free(icoeffs); free(rcoeffs); return 0 ; }