void getinput() { /* reads the input data */ inputoptions(); restdist_inputdata(); makeweights(); } /* getinput */
void getinput() { /* reads the input data */ inputoptions(); if ((!freqsfrom) && !logdet && !similarity) { if (kimura || jukes) { freqa = 0.25; freqc = 0.25; freqg = 0.25; freqt = 0.25; } getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, printdata); if (freqa < 0.00000001) { freqa = 0.000001; freqc = 0.999999*freqc; freqg = 0.999999*freqg; freqt = 0.999999*freqt; } if (freqc < 0.00000001) { freqa = 0.999999*freqa; freqc = 0.000001; freqg = 0.999999*freqg; freqt = 0.999999*freqt; } if (freqg < 0.00000001) { freqa = 0.999999*freqa; freqc = 0.999999*freqc; freqg = 0.000001; freqt = 0.999999*freqt; } if (freqt < 0.00000001) { freqa = 0.999999*freqa; freqc = 0.999999*freqc; freqg = 0.999999*freqg; freqt = 0.000001; } } if (!justwts || firstset) inputdata(sites); makeweights(); dnadist_makevalues(); if (freqsfrom) { dnadist_empiricalfreqs(); getbasefreqs(freqa, freqc, freqg, freqt, &freqr, &freqy, &freqar, &freqcy, &freqgr, &freqty, &ttratio, &xi, &xv, &fracchange, freqsfrom, printdata); } } /* getinput */
void reallocsites() { long i; for (i = 0; i < spp; i++){ free(y[i]); y[i] = (Char *)Malloc(sites*sizeof(Char)); } free(weight); free(alias); free(aliasweight); weight = (steptr)Malloc((sites+1)*sizeof(long)); alias = (steptr)Malloc((sites+1)*sizeof(long)); aliasweight = (steptr)Malloc((sites+1)*sizeof(long)); makeweights(); }
void psht_make_ecp_geom_info_2 (int nrings, int nphi, double phi0, int stride_lon, int stride_lat, psht_geom_info **geom_info) { const double pi=3.141592653589793238462643383279502884197; double *theta=RALLOC(double,nrings); double *weight=RALLOC(double,nrings); int *nph=RALLOC(int,nrings); double *phi0_=RALLOC(double,nrings); ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings); int *stride_=RALLOC(int,nrings); int m; UTIL_ASSERT((nrings&1)==0, "Even number of rings needed for equidistant grid!"); makeweights(nrings/2,weight); for (m=0; m<nrings; ++m) { theta[m] = (m+0.5)*pi/nrings; nph[m]=nphi; phi0_[m]=phi0; ofs[m]=(ptrdiff_t)m*stride_lat; stride_[m]=stride_lon; weight[m]*=2*pi/nphi; } psht_make_geom_info (nrings, nph, ofs, stride_, phi0_, theta, weight, geom_info); DEALLOC(theta); DEALLOC(weight); DEALLOC(nph); DEALLOC(phi0_); DEALLOC(ofs); DEALLOC(stride_); }
/* Here's the big banana Convolves two functions defined on the 2-sphere. Uses seminaive algorithms for spherical harmonic transforms size = 2*bw Inputs: rdata, idata - (size * size) arrays containing real and imaginary parts of sampled function. rfilter, ifilter - (size * size) arrays containing real and imaginary parts of sampled filter function. rres, ires - (size * size) arrays containing real and imaginary parts of result function. Suggestion - if you want to do multiple convolutions, don't keep allocating and freeing space with every call, or keep recomputing the spharmonic_pml tables. Allocate workspace once before you call this function, then just set up pointers as first step of this procedure rather than mallocing. And do the same with the FST, FZT, and InvFST functions. ASSUMPTIONS: 1. data is strictly REAL 2. will do semi-naive algorithm for ALL orders -> change the cutoff value if you want it to be different Memory requirements for Conv2Sphere Need space for spharmonic tables and local workspace and scratchpad space for FST_semi Let legendreSize = Reduced_Naive_TableSize(bw,cutoff) + Reduced_SpharmonicTableSize(bw,cutoff) Then the workspace needs to be this large: 2 * legendreSize + 8 * (bw*bw) + 10*bw + 4 * (bw*bw) + 2*bw for a total of 2 * legendreSize + 12 * (bw*bw) + 12*bw ; */ void Conv2Sphere_semi_memo(double *rdata, double *idata, double *rfilter, double *ifilter, double *rres, double *ires, int bw, double *workspace) { int size, spharmonic_bound ; int legendreSize, cutoff ; double *frres, *fires, *filtrres, *filtires, *trres, *tires; double **spharmonic_pml_table, **transpose_spharmonic_pml_table; double *spharmonic_result_space, *transpose_spharmonic_result_space; double *scratchpad; /* fftw */ int rank, howmany_rank ; fftw_iodim dims[1], howmany_dims[1]; /* forward transform stuff */ fftw_plan dctPlan, fftPlan ; double *weights ; /* inverse transform stuff */ fftw_plan idctPlan, ifftPlan ; size =2*bw ; cutoff = bw ; legendreSize = Reduced_Naive_TableSize(bw,cutoff) + Reduced_SpharmonicTableSize(bw,cutoff) ; /* assign space */ spharmonic_bound = legendreSize ; spharmonic_result_space = workspace; /* needs legendreSize */ transpose_spharmonic_result_space = spharmonic_result_space + legendreSize ; /* needs legendreSize */ frres = transpose_spharmonic_result_space + legendreSize ; /* needs (bw*bw) */ fires = frres + (bw*bw); /* needs (bw*bw) */ trres = fires + (bw*bw); /* needs (bw*bw) */ tires = trres + (bw*bw); /* needs (bw*bw) */ filtrres = tires + (bw*bw); /* needs bw */ filtires = filtrres + bw; /* needs bw */ scratchpad = filtires + bw; /* needs (8*bw^2)+(10*bw) */ /* allocate space, and compute, the weights for this bandwidth */ weights = (double *) malloc(sizeof(double) * 4 * bw); makeweights( bw, weights ); /* make the fftw plans */ /* make DCT plans -> note that I will be using the GURU interface to execute these plans within the routines*/ /* forward DCT */ dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT10, FFTW_ESTIMATE ) ; /* inverse DCT */ idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT01, FFTW_ESTIMATE ); /* fft "preamble" ; note that this plan places the output in a transposed array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 1 ; dims[0].os = 2*bw ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 2*bw ; howmany_dims[0].os = 1 ; /* forward fft */ fftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* now plan for inverse fft - note that this plans assumes that I'm working with a transposed array, e.g. the inputs for a length 2*bw transform are placed every 2*bw apart, the output will be consecutive entries in the array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 2*bw ; dims[0].os = 1 ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 1 ; howmany_dims[0].os = 2*bw ; /* inverse fft */ ifftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* precompute the associated Legendre fcts */ spharmonic_pml_table = Spharmonic_Pml_Table(bw, spharmonic_result_space, scratchpad); transpose_spharmonic_pml_table = Transpose_Spharmonic_Pml_Table(spharmonic_pml_table, bw, transpose_spharmonic_result_space, scratchpad); FST_semi_memo(rdata, idata, frres, fires, bw, spharmonic_pml_table, scratchpad, 1, bw, &dctPlan, &fftPlan, weights ); FZT_semi_memo(rfilter, ifilter, filtrres, filtires, bw, spharmonic_pml_table[0], scratchpad, 1, &dctPlan, weights ); TransMult(frres, fires, filtrres, filtires, trres, tires, bw); InvFST_semi_memo(trres, tires, rres, ires, bw, transpose_spharmonic_pml_table, scratchpad, 1, bw, &idctPlan, &ifftPlan ); free( weights ) ; /*** have to free the memory that was allocated in Spharmonic_Pml_Table() and Transpose_Spharmonic_Pml_Table() ***/ free(spharmonic_pml_table); free(transpose_spharmonic_pml_table); /* destroy plans */ fftw_destroy_plan( ifftPlan ) ; fftw_destroy_plan( fftPlan ) ; fftw_destroy_plan( idctPlan ) ; fftw_destroy_plan( dctPlan ) ; }
int main(int argc, char **argv) { FILE *errorsfp; int i, j, bw, size, loops; int l, m, dummy, cutoff ; int rank, howmany_rank ; double *rcoeffs, *icoeffs, *rdata, *idata, *rresult, *iresult; double *workspace, *weights; double dumx, dumy ; double *relerror, *curmax, granderror, grandrelerror; double realtmp, imagtmp,origmag, tmpmag; double ave_error, ave_relerror, stddev_error, stddev_relerror; double total_time, for_time, inv_time; double tstart, tstop; time_t seed; fftw_plan dctPlan, idctPlan ; fftw_plan fftPlan, ifftPlan ; fftw_iodim dims[1], howmany_dims[1]; if (argc < 3) { fprintf(stdout,"Usage: test_s2_semi_fly bw loops [error_file]\n"); exit(0); } bw = atoi(argv[1]); loops = atoi(argv[2]); /*** ASSUMING WILL SEMINAIVE ALL ORDERS ***/ cutoff = bw ; size = 2*bw; total_time = 0.0; for_time = 0.0; inv_time = 0.0; granderror = 0.0; grandrelerror = 0.0; /* allocate memory */ rcoeffs = (double *) malloc(sizeof(double) * (bw * bw)); icoeffs = (double *) malloc(sizeof(double) * (bw * bw)); rdata = (double *) malloc(sizeof(double) * (size * size)); idata = (double *) malloc(sizeof(double) * (size * size)); rresult = (double *) malloc(sizeof(double) * (bw * bw)); iresult = (double *) malloc(sizeof(double) * (bw * bw)); workspace = (double *) malloc(sizeof(double) * ((10 * (bw*bw)) + (24 * bw))); /** space for errors **/ relerror = (double *) malloc(sizeof(double) * loops); curmax = (double *) malloc(sizeof(double) * loops); /* make array for weights */ weights = (double *) malloc(sizeof(double) * 4 * bw); /**** At this point, check to see if all the memory has been allocated. If it has not, there's no point in going further. ****/ if ( (rdata == NULL) || (idata == NULL) || (rresult == NULL) || (iresult == NULL) || (rcoeffs == NULL) || (icoeffs == NULL) || (workspace == NULL) || (weights == NULL) ) { perror("Error in allocating memory"); exit( 1 ) ; } /*** generate a seed, needed to generate random data ***/ time(&seed); srand48( seed ); /* construct fftw plans */ /* make DCT plans -> note that I will be using the GURU interface to execute these plans within the routines*/ /* forward DCT */ dctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT10, FFTW_ESTIMATE ) ; /* inverse DCT */ idctPlan = fftw_plan_r2r_1d( 2*bw, weights, rdata, FFTW_REDFT01, FFTW_ESTIMATE ); /* fftw "preamble" ; note that this plan places the output in a transposed array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 1 ; dims[0].os = 2*bw ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 2*bw ; howmany_dims[0].os = 1 ; /* forward fft */ fftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* now plan for inverse fft - note that this plans assumes that I'm working with a transposed array, e.g. the inputs for a length 2*bw transform are placed every 2*bw apart, the output will be consecutive entries in the array */ rank = 1 ; dims[0].n = 2*bw ; dims[0].is = 2*bw ; dims[0].os = 1 ; howmany_rank = 1 ; howmany_dims[0].n = 2*bw ; howmany_dims[0].is = 1 ; howmany_dims[0].os = 2*bw ; /* inverse fft */ ifftPlan = fftw_plan_guru_split_dft( rank, dims, howmany_rank, howmany_dims, rdata, idata, workspace, workspace+(4*bw*bw), FFTW_ESTIMATE ); /* now make the weights */ makeweights( bw, weights ); /* now start the looping */ fprintf(stdout,"about to enter loop\n\n"); for(i=0; i<loops; i++){ /**** loop to generate spherical harmonic coefficients of a real-valued function *****/ for(m=0;m<bw;m++) for(l=m;l<bw;l++){ dumx = 2.0 * (drand48()-0.5); dumy = 2.0 * (drand48()-0.5); dummy = seanindex(m,l,bw); rcoeffs[dummy] = dumx; icoeffs[dummy] = dumy; dummy = seanindex(-m,l,bw); rcoeffs[dummy] = ((double) pow(-1.0, (double) m)) * dumx; icoeffs[dummy] = ((double) pow(-1.0, (double) (m + 1))) * dumy; } /* have to zero out the m=0 coefficients, since those are real */ for(m=0;m<bw;m++) icoeffs[m] = 0.0; /* do the inverse spherical transform */ tstart = csecond(); InvFST_semi_fly(rcoeffs,icoeffs, rdata, idata, bw, workspace, 1, cutoff, &idctPlan, &ifftPlan ); tstop = csecond(); inv_time += (tstop - tstart); fprintf(stdout,"inv time \t = %.4e\n", tstop - tstart); /* now do the forward spherical transform */ tstart = csecond(); FST_semi_fly(rdata, idata, rresult, iresult, bw, workspace, 1, cutoff, &dctPlan, &fftPlan, weights ) ; tstop = csecond(); for_time += (tstop - tstart); fprintf(stdout,"forward time \t = %.4e\n", tstop - tstart); /* now to compute the error */ relerror[i] = 0.0; curmax[i] = 0.0; for(j=0;j<(bw*bw);j++){ realtmp = rresult[j]-rcoeffs[j]; imagtmp = iresult[j]-icoeffs[j]; origmag = sqrt((rcoeffs[j]*rcoeffs[j]) + (icoeffs[j]*icoeffs[j])); tmpmag = sqrt((realtmp*realtmp) + (imagtmp*imagtmp)); relerror[i] = max(relerror[i],tmpmag/(origmag + pow(10.0, -50.0))); curmax[i] = max(curmax[i],tmpmag); } fprintf(stdout,"r-o error\t = %.12f\n", curmax[i]); fprintf(stdout,"(r-o)/o error\t = %.12f\n\n", relerror[i]); granderror += curmax[i]; grandrelerror += relerror[i]; } total_time = inv_time + for_time; ave_error = granderror / ( (double) loops ); ave_relerror = grandrelerror / ( (double) loops ); stddev_error = 0.0 ; stddev_relerror = 0.0; for( i = 0 ; i < loops ; i ++ ) { stddev_error += pow( ave_error - curmax[i] , 2.0 ); stddev_relerror += pow( ave_relerror - relerror[i] , 2.0 ); } /*** this won't work if loops == 1 ***/ if( loops != 1 ) { stddev_error = sqrt(stddev_error / ( (double) (loops - 1) ) ); stddev_relerror = sqrt(stddev_relerror / ( (double) (loops - 1) ) ); } fprintf(stdout,"Program: test_s2_semi_fly\n"); fprintf(stdout,"Bandwidth = %d\n", bw); #ifndef WALLCLOCK fprintf(stdout,"Total elapsed cpu time :\t\t %.4e seconds.\n", total_time); fprintf(stdout,"Average cpu forward per iteration:\t %.4e seconds.\n", for_time/((double) loops)); fprintf(stdout,"Average cpu inverse per iteration:\t %.4e seconds.\n", inv_time/((double) loops)); #else fprintf(stdout,"Total elapsed wall time :\t\t %.4e seconds.\n", total_time); fprintf(stdout,"Average wall forward per iteration:\t %.4e seconds.\n", for_time/((double) loops)); fprintf(stdout,"Average wall inverse per iteration:\t %.4e seconds.\n", inv_time/((double) loops)); #endif fprintf(stdout,"Average r-o error:\t\t %.4e\t", granderror/((double) loops)); fprintf(stdout,"std dev: %.4e\n",stddev_error); fprintf(stdout,"Average (r-o)/o error:\t\t %.4e\t", grandrelerror/((double) loops)); fprintf(stdout,"std dev: %.4e\n\n",stddev_relerror); if (argc == 4) { errorsfp = fopen(argv[3],"w"); for(m = 0 ; m < bw ; m++ ) { for(l = m ; l< bw ; l++ ) { dummy = seanindex(m,l,bw); fprintf(errorsfp, "dummy = %d\t m = %d\tl = %d\t%.10f %.10f\n", dummy, m, l, fabs(rcoeffs[dummy] - rresult[dummy]), fabs(icoeffs[dummy] - iresult[dummy])); dummy = seanindex(-m,l,bw); fprintf(errorsfp, "dummy = %d\t m = %d\tl = %d\t%.10f %.10f\n", dummy, -m, l, fabs(rcoeffs[dummy] - rresult[dummy]), fabs(icoeffs[dummy] - iresult[dummy])); } } fclose(errorsfp); } /* destroy fftw plans */ fftw_destroy_plan( ifftPlan ); fftw_destroy_plan( fftPlan ); fftw_destroy_plan( idctPlan ); fftw_destroy_plan( dctPlan ); /* free memory */ free( weights ); free(curmax); free(relerror); free(workspace); free(iresult); free(rresult); free(idata); free(rdata); free(icoeffs); free(rcoeffs); return 0 ; }