示例#1
0
/**
   Setup ray tracing operator HXF from xloc to aperture ploc along DM fiting directions*/
static void
setup_recon_HXF(RECON_T *recon, const PARMS_T *parms){
    if(parms->load.HXF && zfexist(parms->load.HXF)){
	warning("Loading saved HXF\n");
	recon->HXF=dspcellread("%s",parms->load.HXF);
    }else{
	info2("Generating HXF");TIC;tic;
	const int nfit=parms->fit.nfit;
	const int npsr=recon->npsr;
	recon->HXF=cellnew(nfit, npsr);
	PDSPCELL(recon->HXF,HXF);
	for(int ifit=0; ifit<nfit; ifit++){
	    double hs=parms->fit.hs->p[ifit];
	    for(int ips=0; ips<npsr; ips++){
		const double ht = recon->ht->p[ips];
		const double scale=1.-ht/hs;
		double displace[2];
		displace[0]=parms->fit.thetax->p[ifit]*ht;
		displace[1]=parms->fit.thetay->p[ifit]*ht;
		HXF[ips][ifit]=mkh(recon->xloc->p[ips], recon->floc, 
				   displace[0], displace[1], scale);
	    }
	}
	if(parms->save.setup){
	    writebin(recon->HXF,"HXF");
	}
	toc2(" ");
    }
}
示例#2
0
void setup_recon_fit(RECON_T *recon, const PARMS_T *parms){
    TIC;tic;
    if(!parms->sim.idealfit){
	/*In idealfit, xloc has high sampling. We avoid HXF. */
	setup_recon_HXF(recon,parms);
    }
    /*copy over fitwt since we need a dmat */
    int nfit=parms->fit.nfit;
    recon->fitwt=dnew(nfit,1);
    dcp(&recon->fitwt,parms->fit.wt);
    
    /*always assemble fit matrix, faster if many directions */
    if(parms->fit.assemble){
	setup_recon_fit_matrix(recon,parms);
    }
    toc2("Generating fit matrix ");
    /*Fall back function method if FR.M is NULL (!HXF<-idealfit) */
    if(!recon->FR.M){
	recon->FR.Mfun  = FitR;
	recon->FR.Mdata = recon;
    }
    /*Fall back function method if FL.M is NULL */
    if(!recon->FL.M){
	recon->FL.Mfun  = FitL;
	recon->FL.Mdata = recon;
    }
    recon->FL.alg = parms->fit.alg;
    recon->FL.bgs = parms->fit.bgs;
    recon->FL.warm  = parms->recon.warm_restart;
    recon->FL.maxit = parms->fit.maxit;
}
示例#3
0
/*
static int test_fft_speed_small(){
    int nis=128;
    int *is=mycalloc(nis,int);
    dmat *tim=dnew(nis,1);
    for(int ii=0; ii<nis; ii++){
	is[ii]=ii+1;
    }
    ccell *ac=cellnew(nis,1);
    rand_t stat;
    seed_rand(&stat,1);
    for(int ii=0; ii<nis; ii++){
	ac->p[ii]=cnew(is[ii],is[ii]);
	//cfft2plan(ac->p[ii],-1);
	crandn(ac->p[ii],20,&stat);
    }
    TIC;
    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	for(int i=0; i<1000; i++){
	    cfft2(ac->p[ii],-1);
	}
	toc2("fft");
	tim->p[ii]=toc3;
    }
    writebin(tim,"fft_timing");
}

static void test_fft_speed(){
    int nis=2048;
    int *is=mycalloc(nis,int);
    dmat *tim=dnew(nis,1);
    for(int ii=0; ii<nis; ii++){
	is[ii]=(ii+1)*2;
    }
    ccell *ac=cellnew(nis,1);
    rand_t stat;
    seed_rand(&stat,1);
    TIC;
    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	ac->p[ii]=cnew(is[ii],is[ii]);
	//cfft2plan(ac->p[ii],-1);
	crandn(ac->p[ii],20,&stat);
	toc("plan");
    }
    toc("plan");
    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	int nrepeat;
	if(is[ii]<300)
	    nrepeat=100;
	else if(is[ii]<1000)
	    nrepeat=10;
	else
	    nrepeat=1;

	for(int i=0; i<nrepeat; i++){
	    cfft2(ac->p[ii],-1);
	}
	toc2("fft");
	tim->p[ii]=toc3/nrepeat;
    }
    writebin(tim,"fft_timing");
    }*/
static void test_fft_special(){
    int nis=2;
    int *is=mycalloc(nis,int);
    dmat *tim=dnew(nis,1);
    is[0]=3824;
    is[1]=4096;
    ccell *ac=ccellnew(nis,1);
    rand_t rstat;
    seed_rand(&rstat,1);
    TIC;
    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	ac->p[ii]=cnew(is[ii],is[ii]);
	//cfft2plan(ac->p[ii],-1);
	//cfft2partialplan(ac->p[ii],512,-1);
	crandn(ac->p[ii],20,&rstat);
	toc("plan");
    }

    for(int ii=0; ii<nis; ii++){
	info2("size %4d: ",is[ii]);
	tic;
	int nrepeat;
	if(is[ii]<300)
	    nrepeat=100;
	else if(is[ii]<1000)
	    nrepeat=10;
	else
	    nrepeat=4;

	for(int i=0; i<nrepeat; i++){
	    cfft2(ac->p[ii],-1);
	}
	toc2("fft");
	for(int i=0; i<nrepeat; i++){
	    cfft2partial(ac->p[ii],512,-1);
	}
	toc2("fft2partial");
	tim->p[ii]=toc3/nrepeat;
    }
    writebin(tim,"fft_timing");

}
示例#4
0
文件: micro.c 项目: awf/Coconut
int main(int argc, char** argv)
{
	if(argc != 2) {
		printf("You should use the following format for running this program: %s <Number of Iterations>\n", argv[0]);
		exit(1);
	}
	int N = atoi(argv[1]);
	int rng = 42;
    srand(rng);

	array_number_t vec1 = vector_fill(DIM, 0.0);
	array_number_t vec2 = vector_fill(DIM, 0.0);
	array_number_t vec3 = vector_fill(DIM, 0.0);
	for(int i=0; i<DIM; i++) {
		vec1->arr[i] = dist(rng);
		vec2->arr[i] = dist(rng);
		vec3->arr[i] = dist(rng);
	}

#ifdef HOIST
	storage_t s = storage_alloc(VECTOR_ALL_BYTES(DIM));
#endif
	
    timer_t t = tic();

    double total = 0;
    for (int count = 0; count < N; ++count) {
        vec1->arr[0] += 1.0 / (2.0 + vec1->arr[0]);
        vec2->arr[10] += 1.0 / (2.0 + vec2->arr[10]);
#ifdef DPS
#ifndef HOIST
	storage_t s = storage_alloc(VECTOR_ALL_BYTES(DIM));
#endif
#endif
#ifdef ADD3
    #ifdef DPS
        total += vectorSum(TOP_LEVEL_linalg_vectorAdd3_dps(s, vec1, vec2, vec3, DIM, DIM, DIM));
	#else
        total += vectorSum(TOP_LEVEL_linalg_vectorAdd3(vec1, vec2, vec3));
	#endif
#elif DOT
	#ifdef DPS
        total += TOP_LEVEL_linalg_dot_prod_dps(s, vec1, vec2, DIM, DIM);
	#else
        total += TOP_LEVEL_linalg_dot_prod(vec1, vec2);
	#endif
#elif CROSS
    #ifdef DPS
        total += vectorSum(TOP_LEVEL_linalg_cross_dps(s, vec1, vec2, DIM, DIM));
	#else
        total += vectorSum(TOP_LEVEL_linalg_cross(vec1, vec2));
	#endif
#endif
#ifdef DPS
#ifndef HOIST
	storage_free(s, VECTOR_ALL_BYTES(DIM));
#endif
#endif
    }
    float elapsed = toc2(t);
    printf("total =%f, time per call = %f ms\n", total, elapsed / (double)(N));
	return 0;
}
示例#5
0
文件: gmm_d.c 项目: awf/Coconut
int main(int argc, char** argv)
{
  int rng = 42;
  srand(rng);
  // std::mt19937 rng(42);
  // std::uniform_real_distribution<Real> dist(0, 1);

  // Problem size
  size_t n = 100000;
  size_t d = GMM_D;
  size_t K = GMM_K;
#ifdef DPS
  size_t td = TOP_LEVEL_usecases_gmm_tri_dps(empty_storage, d, 0);
#else
  size_t td = TOP_LEVEL_usecases_gmm_tri(d);
#endif
  

  // Declare and fill GMM coeffs
  // Vector alphas{ K };
  // Vec<VectorD> means{ K, VectorD{ d } };
  // Vec<VectorD> qs{ K, VectorD{ d } };
  // Vector l0{ size_t(tri(d)) };
  // Vec<Vector> ls{ K, l0 };
  array_number_t alphas = vector_fill(K, 0);
  array_array_number_t means = matrix_fill(K, d, 0);
  array_array_number_t qs = matrix_fill(K, d, 0);
  array_array_number_t ls = matrix_fill(K, td, 0);
  for (int k = 0; k < K; ++k) {
    alphas->arr[k] = dist(rng);
    for (int j = 0; j < d; ++j) {
      means->arr[k]->arr[j] = dist(rng) - 0.5;
      qs->arr[k]->arr[j] = 10.0*dist(rng) - 5.0;
    }
    for (int j = 0; j < ls->arr[k]->length; ++j) {
      ls->arr[k]->arr[j] = dist(rng) - 0.5;
      if(j >= ls->arr[k]->length - d)
        ls->arr[k]->arr[j] = 0;
    }
  }

  // Declare and fill xs
  // Vec<VectorD> xs{ n, Vector{ d } };
  array_array_number_t xs = matrix_fill(n, d, 0);
  for (int i = 0; i < n; ++i)
    for (int j = 0; j < d; ++j)
      xs->arr[i]->arr[j] = dist(rng);

  // TOP_LEVEL_usecases_gmm_Qtimesv_test(0);

  // boost::timer::auto_cpu_timer t;
  timer_t t = tic();

  // Debug 150s 
    // Release 1s
  double total = 0;
  int N = 100;
#ifdef _DEBUG
  N = N / 10;  // Debug is roughly this much slower than release -- multiply timings.
#endif
  double wishart_m = 2.0;
  for (int count = 0; count < N; ++count) {
    alphas->arr[0] += 1;
    double wishart_gamma = 1.0 / (1.0 + count);
#ifdef DPS
    total += TOP_LEVEL_usecases_gmm_gmm_objective_dps(empty_storage, xs, alphas, means, qs, ls, wishart_gamma, wishart_m, 
    	matrix_shape(xs), vector_shape(alphas), matrix_shape(means), matrix_shape(qs), matrix_shape(ls), 0, 0);
#else
    total += TOP_LEVEL_usecases_gmm_gmm_objective_d(xs, alphas, means, qs, ls, wishart_gamma, wishart_m, xs, alphas, means, qs, ls, wishart_gamma, wishart_m);
#endif
  }

  // std::cout << "total =" << total << ", time per call = " << t.elapsed().wall / double(N) / 1000.0 << "us" << std::endl;
  double elapsed = toc2(t);
  printf("total =%f, time per call = %f ms\n", total, elapsed / (double)(N));

  return 0;
}
示例#6
0
long setup_star_read_ztilt(STAR_S *star, int nstar, const PARMS_S *parms, int seed){
    const double ngsgrid=parms->maos.ngsgrid;
    long nstep=0;
    TIC;tic;
    for(int istar=0; istar<nstar; istar++){
	STAR_S *stari=&star[istar];
	int npowfs=parms->maos.npowfs;
	stari->ztiltout=dcellnew(npowfs, 1);
	const double thetax=stari->thetax*206265;/*in as */
	const double thetay=stari->thetay*206265;

	double thxnorm=thetax/ngsgrid;
	double thynorm=thetay/ngsgrid;
	long thxl=(long)floor(thxnorm);/*Used to be double, but -0 appears. */
	long thyl=(long)floor(thynorm);
	double wtx=thxnorm-thxl;
	double wty=thynorm-thyl;
	for(int ipowfs=0; ipowfs<npowfs; ipowfs++){
	    const int msa=parms->maos.msa[ipowfs];
	    const int nsa=parms->maos.nsa[ipowfs];
	    const int ng=nsa*2;
	    char *fnztilt[2][2]={{NULL,NULL},{NULL,NULL}};
	    char *fngoff[2][2]={{NULL, NULL}, {NULL, NULL}};
	    double wtsum=0;
	    for(int ix=0; ix<2; ix++){
		double thx=(thxl+ix)*ngsgrid;
		for(int iy=0; iy<2; iy++){
		    double thy=(thyl+iy)*ngsgrid;
		    double wtxi=fabs(((1-ix)-wtx)*((1-iy)-wty));

		    if(wtxi<0.01){
			/*info("skipping ix=%d,iy=%d because wt=%g\n",ix,iy,wtxi); */
			continue;
		    }
		    fnztilt[iy][ix]=myalloca(PATH_MAX, char);
		    if(parms->skyc.usephygrad){
			warning_once("Using phygrad\n");
			snprintf(fnztilt[iy][ix],PATH_MAX,"%s/phygrad/phygrad_seed%d_sa%d_x%g_y%g",
				 dirstart,seed,msa,thx,thy);
		    }else{
			snprintf(fnztilt[iy][ix],PATH_MAX,"%s/ztiltout/ztiltout_seed%d_sa%d_x%g_y%g",
				 dirstart,seed,msa,thx,thy);
		    }
		    fngoff[iy][ix]=myalloca(PATH_MAX, char);
		    snprintf(fngoff[iy][ix],PATH_MAX,"%s/gradoff/gradoff_sa%d_x%g_y%g",
			     dirstart,msa,thx,thy);
		    if(!zfexist(fnztilt[iy][ix])){
			//warning("%s doesnot exist\n",fnwvf[iy][ix]);
			fnztilt[iy][ix]=fngoff[iy][ix]=NULL;
		    }else{
			wtsum+=wtxi;
		    }
		}
	    }
	    if(wtsum<0.01){
		error("PSF is not available for (%g,%g). wtsum=%g\n",thetax,thetay, wtsum);
	    }
	    /*Now do the actual reading */
	    for(int ix=0; ix<2; ix++){
		for(int iy=0; iy<2; iy++){
		    double wtxi=fabs(((1-ix)-wtx)*((1-iy)-wty))/wtsum;
		    if(fnztilt[iy][ix]){
			file_t *fp_ztilt=zfopen(fnztilt[iy][ix],"rb");
			header_t header={0,0,0,0};
			read_header(&header, fp_ztilt);
			
			if(iscell(&header.magic)){
			    // error("expected data type: %u, got %u\n",(uint32_t)MCC_ANY, header.magic);
			    nstep=header.nx;
			    free(header.str);
			    if(stari->nstep==0){
				stari->nstep=nstep;
			    }else{
				if(stari->nstep!=nstep){
				    error("Different type has different steps\n");
				}
			    }
			    if(!stari->ztiltout->p[ipowfs]){
				stari->ztiltout->p[ipowfs]=dnew(ng, nstep);
			    }
			    dmat  *ztiltout=stari->ztiltout->p[ipowfs];
			    for(long istep=0; istep<nstep; istep++){
				dmat *ztilti=dreaddata(fp_ztilt, 0);
				for(int ig=0; ig<ng; ig++){
				    ztiltout->p[ig+istep*ng]+=ztilti->p[ig]*wtxi;
				}
				dfree(ztilti);
			    }
			}else{
			    dmat *tmp=dreaddata(fp_ztilt, &header);
			    dadd(&stari->ztiltout->p[ipowfs], 1, tmp, wtxi );
			    dfree(tmp);
			}
			zfclose(fp_ztilt);
		    }/* if(fnwvf) */
		    if(fngoff[iy][ix] && zfexist(fngoff[iy][ix])){
			if(!stari->goff){
			    stari->goff=dcellnew(npowfs, 1);
			}
			dmat *tmp=dread("%s", fngoff[iy][ix]);
			dadd(&stari->goff->p[ipowfs], 1, tmp, wtxi);
			dfree(tmp);
		    }
		}/*iy */
	    }/*ix */
	}/*ipowfs */
    }/*istar */
    if(parms->skyc.verbose){
	toc2("Reading PSF");
    }
    //close(fd);
    return nstep;
}