void dedisperse_single(float **inin, float **outout, int nchan,int ndat) { //omp_set_num_threads(8); int npass=get_npass(nchan); //printf("need %d passes.\n",npass); //npass=2; int bs=nchan; float **in=inin; float **out=outout; //FILE *fout; //fout = fopen('/var/log/burst_bench.log', 'w'); //fclose(fout); // omp_set_dynamic(0); // omp_set_num_threads(8); for (int i=0;i<npass;i++) { #pragma omp parallel for for (int j=0;j<nchan;j+=bs) { //printf("dedisperse using %i threads\n",omp_get_num_threads()); dedisperse_kernel(in+j,out+j,bs,ndat); } bs/=2; float **tmp=in; in=out; out=tmp; } memcpy(out[0],in[0],nchan*ndat*sizeof(float)); }
void dedisperse_dual(float **inin, float **outout, int nchan,int ndat) { int npass=get_npass(nchan); //printf("need %d passes from %d channels..\n",npass,nchan); //npass=2; int bs=nchan; float **in=inin; float **out=outout; //the npasss-1 is so that we stop in time to hand the final pass to //the single-step kernel in the event of an odd depth. for (int i=0;i<npass-1;i+=2) { #pragma omp parallel for for (int j=0;j<nchan;j+=bs) { //dedisperse_kernel_2pass_v2(in+j,out+j,bs,ndat); dedisperse_block_kernel_2pass((const float **)(in+j),out+j,bs,ndat); } bs/=4; float **tmp=in; in=out; out=tmp; } if (npass%2==1) { //do a single step if we come in with odd depth //printf("doing final step for odd depth with block size %d.\n",bs); #pragma omp parallel for for (int j=0;j<nchan;j+=bs) dedisperse_kernel(in+j,out+j,bs,ndat); float **tmp=in; in=out; out=tmp; } memcpy(out[0],in[0],nchan*ndat*sizeof(float)); }
void dedisperse(float **inin, float **outout, int nchan,int ndat) { //return; int npass=get_npass(nchan); //printf("need %d passes.\n",npass); //npass=2; int bs=nchan; float **in=inin; float **out=outout; for (int i=0;i<npass;i++) { //#pragma omp parallel for for (int j=0;j<nchan;j+=bs) { dedisperse_kernel(in+j,out+j,bs,ndat); } bs/=2; float **tmp=in; in=out; out=tmp; } memcpy(out[0],in[0],nchan*ndat*sizeof(float)); }
/*--------------------------------------------------------------------------------*/ void dedisperse_2pass(float **dat, float **dat2, int nchan, int ndat) { dedisperse_kernel(dat,dat2,nchan,ndat); dedisperse_kernel(dat2,dat,nchan/2,ndat); dedisperse_kernel(dat2+nchan/2,dat+nchan/2,nchan/2,ndat); }
int main(int argc, char *argv[]) { //int nchan=4096; //int ndat=12000; int nchan=1024; int ndat=327680; int nrep=1; if (argc>1) nchan=atoi(argv[1]); if (argc>2) ndat=atoi(argv[2]); if (argc>3) nrep=atoi(argv[3]); float **dat=matrix(nchan,ndat+nchan); float **dat2=matrix(nchan,ndat+nchan); if (1) for (int i=0;i<nchan;i++) dat[i][(int)(0.8317*i+160.2)]=1; else for (int i=0;i<nchan;i++) dat[i][ndat/2]=1; #if 0 write_mat(dat,nchan,ndat,"dat_starting.dat"); dedisperse_kernel(dat,dat2,nchan,ndat); write_mat(dat2,nchan,ndat,"dat_1pass.dat"); dedisperse_2pass(dat,dat2,nchan,ndat); write_mat(dat,nchan,ndat,"dat_2pass.dat"); #endif double t1=omp_get_wtime(); //dedisperse(dat,dat2,nchan,ndat); //dedisperse_blocked(dat,dat2,nchan,ndat); dedisperse_blocked_cached(dat,dat2,nchan,ndat); double t2=omp_get_wtime(); printf("took %12.4f seconds.\n",t2-t1); int ichan,idat; find_peak(dat,nchan,ndat,&ichan,&idat); t1=omp_get_wtime(); printf("took %12.4f seconds to find peak.\n",t1-t2); for (int i=0;i<10;i++) { t1=omp_get_wtime(); for (int j=0;j<nrep;j++) { dedisperse_blocked_cached(dat,dat2,nchan,ndat); //dedisperse(dat,dat2,nchan,ndat); } t2=omp_get_wtime(); double nops=get_npass(nchan)*(nchan+0.0)*(ndat+0.0)*(nrep+0.0); printf("took %12.6f seconds at rate %12.6f.\n",t2-t1,nops/(t2-t1)/1024/1024); //printf("took %12.4f seconds.\n",t2-t1); } //write_mat(dat,nchan,ndat,"dat_final1.dat"); //write_mat(dat2,nchan,ndat,"dat_final2.dat"); }