/** Setup ray tracing operator HXF from xloc to aperture ploc along DM fiting directions*/ static void setup_recon_HXF(RECON_T *recon, const PARMS_T *parms){ if(parms->load.HXF && zfexist(parms->load.HXF)){ warning("Loading saved HXF\n"); recon->HXF=dspcellread("%s",parms->load.HXF); }else{ info2("Generating HXF");TIC;tic; const int nfit=parms->fit.nfit; const int npsr=recon->npsr; recon->HXF=cellnew(nfit, npsr); PDSPCELL(recon->HXF,HXF); for(int ifit=0; ifit<nfit; ifit++){ double hs=parms->fit.hs->p[ifit]; for(int ips=0; ips<npsr; ips++){ const double ht = recon->ht->p[ips]; const double scale=1.-ht/hs; double displace[2]; displace[0]=parms->fit.thetax->p[ifit]*ht; displace[1]=parms->fit.thetay->p[ifit]*ht; HXF[ips][ifit]=mkh(recon->xloc->p[ips], recon->floc, displace[0], displace[1], scale); } } if(parms->save.setup){ writebin(recon->HXF,"HXF"); } toc2(" "); } }
void setup_recon_fit(RECON_T *recon, const PARMS_T *parms){ TIC;tic; if(!parms->sim.idealfit){ /*In idealfit, xloc has high sampling. We avoid HXF. */ setup_recon_HXF(recon,parms); } /*copy over fitwt since we need a dmat */ int nfit=parms->fit.nfit; recon->fitwt=dnew(nfit,1); dcp(&recon->fitwt,parms->fit.wt); /*always assemble fit matrix, faster if many directions */ if(parms->fit.assemble){ setup_recon_fit_matrix(recon,parms); } toc2("Generating fit matrix "); /*Fall back function method if FR.M is NULL (!HXF<-idealfit) */ if(!recon->FR.M){ recon->FR.Mfun = FitR; recon->FR.Mdata = recon; } /*Fall back function method if FL.M is NULL */ if(!recon->FL.M){ recon->FL.Mfun = FitL; recon->FL.Mdata = recon; } recon->FL.alg = parms->fit.alg; recon->FL.bgs = parms->fit.bgs; recon->FL.warm = parms->recon.warm_restart; recon->FL.maxit = parms->fit.maxit; }
/* static int test_fft_speed_small(){ int nis=128; int *is=mycalloc(nis,int); dmat *tim=dnew(nis,1); for(int ii=0; ii<nis; ii++){ is[ii]=ii+1; } ccell *ac=cellnew(nis,1); rand_t stat; seed_rand(&stat,1); for(int ii=0; ii<nis; ii++){ ac->p[ii]=cnew(is[ii],is[ii]); //cfft2plan(ac->p[ii],-1); crandn(ac->p[ii],20,&stat); } TIC; for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; for(int i=0; i<1000; i++){ cfft2(ac->p[ii],-1); } toc2("fft"); tim->p[ii]=toc3; } writebin(tim,"fft_timing"); } static void test_fft_speed(){ int nis=2048; int *is=mycalloc(nis,int); dmat *tim=dnew(nis,1); for(int ii=0; ii<nis; ii++){ is[ii]=(ii+1)*2; } ccell *ac=cellnew(nis,1); rand_t stat; seed_rand(&stat,1); TIC; for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; ac->p[ii]=cnew(is[ii],is[ii]); //cfft2plan(ac->p[ii],-1); crandn(ac->p[ii],20,&stat); toc("plan"); } toc("plan"); for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; int nrepeat; if(is[ii]<300) nrepeat=100; else if(is[ii]<1000) nrepeat=10; else nrepeat=1; for(int i=0; i<nrepeat; i++){ cfft2(ac->p[ii],-1); } toc2("fft"); tim->p[ii]=toc3/nrepeat; } writebin(tim,"fft_timing"); }*/ static void test_fft_special(){ int nis=2; int *is=mycalloc(nis,int); dmat *tim=dnew(nis,1); is[0]=3824; is[1]=4096; ccell *ac=ccellnew(nis,1); rand_t rstat; seed_rand(&rstat,1); TIC; for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; ac->p[ii]=cnew(is[ii],is[ii]); //cfft2plan(ac->p[ii],-1); //cfft2partialplan(ac->p[ii],512,-1); crandn(ac->p[ii],20,&rstat); toc("plan"); } for(int ii=0; ii<nis; ii++){ info2("size %4d: ",is[ii]); tic; int nrepeat; if(is[ii]<300) nrepeat=100; else if(is[ii]<1000) nrepeat=10; else nrepeat=4; for(int i=0; i<nrepeat; i++){ cfft2(ac->p[ii],-1); } toc2("fft"); for(int i=0; i<nrepeat; i++){ cfft2partial(ac->p[ii],512,-1); } toc2("fft2partial"); tim->p[ii]=toc3/nrepeat; } writebin(tim,"fft_timing"); }
int main(int argc, char** argv) { if(argc != 2) { printf("You should use the following format for running this program: %s <Number of Iterations>\n", argv[0]); exit(1); } int N = atoi(argv[1]); int rng = 42; srand(rng); array_number_t vec1 = vector_fill(DIM, 0.0); array_number_t vec2 = vector_fill(DIM, 0.0); array_number_t vec3 = vector_fill(DIM, 0.0); for(int i=0; i<DIM; i++) { vec1->arr[i] = dist(rng); vec2->arr[i] = dist(rng); vec3->arr[i] = dist(rng); } #ifdef HOIST storage_t s = storage_alloc(VECTOR_ALL_BYTES(DIM)); #endif timer_t t = tic(); double total = 0; for (int count = 0; count < N; ++count) { vec1->arr[0] += 1.0 / (2.0 + vec1->arr[0]); vec2->arr[10] += 1.0 / (2.0 + vec2->arr[10]); #ifdef DPS #ifndef HOIST storage_t s = storage_alloc(VECTOR_ALL_BYTES(DIM)); #endif #endif #ifdef ADD3 #ifdef DPS total += vectorSum(TOP_LEVEL_linalg_vectorAdd3_dps(s, vec1, vec2, vec3, DIM, DIM, DIM)); #else total += vectorSum(TOP_LEVEL_linalg_vectorAdd3(vec1, vec2, vec3)); #endif #elif DOT #ifdef DPS total += TOP_LEVEL_linalg_dot_prod_dps(s, vec1, vec2, DIM, DIM); #else total += TOP_LEVEL_linalg_dot_prod(vec1, vec2); #endif #elif CROSS #ifdef DPS total += vectorSum(TOP_LEVEL_linalg_cross_dps(s, vec1, vec2, DIM, DIM)); #else total += vectorSum(TOP_LEVEL_linalg_cross(vec1, vec2)); #endif #endif #ifdef DPS #ifndef HOIST storage_free(s, VECTOR_ALL_BYTES(DIM)); #endif #endif } float elapsed = toc2(t); printf("total =%f, time per call = %f ms\n", total, elapsed / (double)(N)); return 0; }
int main(int argc, char** argv) { int rng = 42; srand(rng); // std::mt19937 rng(42); // std::uniform_real_distribution<Real> dist(0, 1); // Problem size size_t n = 100000; size_t d = GMM_D; size_t K = GMM_K; #ifdef DPS size_t td = TOP_LEVEL_usecases_gmm_tri_dps(empty_storage, d, 0); #else size_t td = TOP_LEVEL_usecases_gmm_tri(d); #endif // Declare and fill GMM coeffs // Vector alphas{ K }; // Vec<VectorD> means{ K, VectorD{ d } }; // Vec<VectorD> qs{ K, VectorD{ d } }; // Vector l0{ size_t(tri(d)) }; // Vec<Vector> ls{ K, l0 }; array_number_t alphas = vector_fill(K, 0); array_array_number_t means = matrix_fill(K, d, 0); array_array_number_t qs = matrix_fill(K, d, 0); array_array_number_t ls = matrix_fill(K, td, 0); for (int k = 0; k < K; ++k) { alphas->arr[k] = dist(rng); for (int j = 0; j < d; ++j) { means->arr[k]->arr[j] = dist(rng) - 0.5; qs->arr[k]->arr[j] = 10.0*dist(rng) - 5.0; } for (int j = 0; j < ls->arr[k]->length; ++j) { ls->arr[k]->arr[j] = dist(rng) - 0.5; if(j >= ls->arr[k]->length - d) ls->arr[k]->arr[j] = 0; } } // Declare and fill xs // Vec<VectorD> xs{ n, Vector{ d } }; array_array_number_t xs = matrix_fill(n, d, 0); for (int i = 0; i < n; ++i) for (int j = 0; j < d; ++j) xs->arr[i]->arr[j] = dist(rng); // TOP_LEVEL_usecases_gmm_Qtimesv_test(0); // boost::timer::auto_cpu_timer t; timer_t t = tic(); // Debug 150s // Release 1s double total = 0; int N = 100; #ifdef _DEBUG N = N / 10; // Debug is roughly this much slower than release -- multiply timings. #endif double wishart_m = 2.0; for (int count = 0; count < N; ++count) { alphas->arr[0] += 1; double wishart_gamma = 1.0 / (1.0 + count); #ifdef DPS total += TOP_LEVEL_usecases_gmm_gmm_objective_dps(empty_storage, xs, alphas, means, qs, ls, wishart_gamma, wishart_m, matrix_shape(xs), vector_shape(alphas), matrix_shape(means), matrix_shape(qs), matrix_shape(ls), 0, 0); #else total += TOP_LEVEL_usecases_gmm_gmm_objective_d(xs, alphas, means, qs, ls, wishart_gamma, wishart_m, xs, alphas, means, qs, ls, wishart_gamma, wishart_m); #endif } // std::cout << "total =" << total << ", time per call = " << t.elapsed().wall / double(N) / 1000.0 << "us" << std::endl; double elapsed = toc2(t); printf("total =%f, time per call = %f ms\n", total, elapsed / (double)(N)); return 0; }
long setup_star_read_ztilt(STAR_S *star, int nstar, const PARMS_S *parms, int seed){ const double ngsgrid=parms->maos.ngsgrid; long nstep=0; TIC;tic; for(int istar=0; istar<nstar; istar++){ STAR_S *stari=&star[istar]; int npowfs=parms->maos.npowfs; stari->ztiltout=dcellnew(npowfs, 1); const double thetax=stari->thetax*206265;/*in as */ const double thetay=stari->thetay*206265; double thxnorm=thetax/ngsgrid; double thynorm=thetay/ngsgrid; long thxl=(long)floor(thxnorm);/*Used to be double, but -0 appears. */ long thyl=(long)floor(thynorm); double wtx=thxnorm-thxl; double wty=thynorm-thyl; for(int ipowfs=0; ipowfs<npowfs; ipowfs++){ const int msa=parms->maos.msa[ipowfs]; const int nsa=parms->maos.nsa[ipowfs]; const int ng=nsa*2; char *fnztilt[2][2]={{NULL,NULL},{NULL,NULL}}; char *fngoff[2][2]={{NULL, NULL}, {NULL, NULL}}; double wtsum=0; for(int ix=0; ix<2; ix++){ double thx=(thxl+ix)*ngsgrid; for(int iy=0; iy<2; iy++){ double thy=(thyl+iy)*ngsgrid; double wtxi=fabs(((1-ix)-wtx)*((1-iy)-wty)); if(wtxi<0.01){ /*info("skipping ix=%d,iy=%d because wt=%g\n",ix,iy,wtxi); */ continue; } fnztilt[iy][ix]=myalloca(PATH_MAX, char); if(parms->skyc.usephygrad){ warning_once("Using phygrad\n"); snprintf(fnztilt[iy][ix],PATH_MAX,"%s/phygrad/phygrad_seed%d_sa%d_x%g_y%g", dirstart,seed,msa,thx,thy); }else{ snprintf(fnztilt[iy][ix],PATH_MAX,"%s/ztiltout/ztiltout_seed%d_sa%d_x%g_y%g", dirstart,seed,msa,thx,thy); } fngoff[iy][ix]=myalloca(PATH_MAX, char); snprintf(fngoff[iy][ix],PATH_MAX,"%s/gradoff/gradoff_sa%d_x%g_y%g", dirstart,msa,thx,thy); if(!zfexist(fnztilt[iy][ix])){ //warning("%s doesnot exist\n",fnwvf[iy][ix]); fnztilt[iy][ix]=fngoff[iy][ix]=NULL; }else{ wtsum+=wtxi; } } } if(wtsum<0.01){ error("PSF is not available for (%g,%g). wtsum=%g\n",thetax,thetay, wtsum); } /*Now do the actual reading */ for(int ix=0; ix<2; ix++){ for(int iy=0; iy<2; iy++){ double wtxi=fabs(((1-ix)-wtx)*((1-iy)-wty))/wtsum; if(fnztilt[iy][ix]){ file_t *fp_ztilt=zfopen(fnztilt[iy][ix],"rb"); header_t header={0,0,0,0}; read_header(&header, fp_ztilt); if(iscell(&header.magic)){ // error("expected data type: %u, got %u\n",(uint32_t)MCC_ANY, header.magic); nstep=header.nx; free(header.str); if(stari->nstep==0){ stari->nstep=nstep; }else{ if(stari->nstep!=nstep){ error("Different type has different steps\n"); } } if(!stari->ztiltout->p[ipowfs]){ stari->ztiltout->p[ipowfs]=dnew(ng, nstep); } dmat *ztiltout=stari->ztiltout->p[ipowfs]; for(long istep=0; istep<nstep; istep++){ dmat *ztilti=dreaddata(fp_ztilt, 0); for(int ig=0; ig<ng; ig++){ ztiltout->p[ig+istep*ng]+=ztilti->p[ig]*wtxi; } dfree(ztilti); } }else{ dmat *tmp=dreaddata(fp_ztilt, &header); dadd(&stari->ztiltout->p[ipowfs], 1, tmp, wtxi ); dfree(tmp); } zfclose(fp_ztilt); }/* if(fnwvf) */ if(fngoff[iy][ix] && zfexist(fngoff[iy][ix])){ if(!stari->goff){ stari->goff=dcellnew(npowfs, 1); } dmat *tmp=dread("%s", fngoff[iy][ix]); dadd(&stari->goff->p[ipowfs], 1, tmp, wtxi); dfree(tmp); } }/*iy */ }/*ix */ }/*ipowfs */ }/*istar */ if(parms->skyc.verbose){ toc2("Reading PSF"); } //close(fd); return nstep; }