Beispiel #1
0
/**
   The main(). It parses the command line, setup the parms, ask the scheduler
   for signal to proceed, and then starts skysim to do sky coverage.
 */
int main(int argc, const char *argv[]){
    dirstart=mygetcwd();
    char *scmd=argv2str(argc, argv, " ");
    ARG_S* arg=parse_args(argc,argv);
    /*In detach mode send to background and disable drawing*/
    if(arg->detach){
	daemonize();
    }else{
	redirect();
    }
    info2("%s\n", scmd);
    info2("Output folder is '%s'. %d threads\n",arg->dirout, arg->nthread);
    skyc_version();
    /*register signal handler */
    register_signal_handler(skyc_signal_handler);
    /*
      Ask job scheduler for permission to proceed. If no CPUs are available,
      will block until ones are available.  if arg->force==1, will run
      immediately.
    */
    scheduler_start(scmd,arg->nthread,0,!arg->force);
    /*setting up parameters before asking scheduler to check for any errors. */
    dirsetup=stradd("setup",NULL);
    PARMS_S * parms=setup_parms(arg);
    if(parms->skyc.dbg){
	mymkdir("%s",dirsetup);
    }
    if(!arg->force){
	info2("Waiting start signal from the scheduler ...\n");
	/*Failed to wait. fall back to own checking.*/
	int count=0;
	while(scheduler_wait()&& count<60){
	    warning_time("failed to get reply from scheduler. retry\n");
	    sleep(10);
	    count++;
	    scheduler_start(scmd,arg->nthread,0,!arg->force);
	}
	if(count>=60){
	    warning_time("fall back to own checker\n");
	    wait_cpu(arg->nthread);
	}
    }
    info2("Simulation started at %s in %s.\n",myasctime(),myhostname());
    free(scmd);
    free(arg->dirout);
    free(arg);
    THREAD_POOL_INIT(parms->skyc.nthread);
    /*Loads the main software*/
    OMPTASK_SINGLE skysim(parms);
    free_parms(parms);
    free(dirsetup);
    free(dirstart);
    rename_file(0);
    scheduler_finish(0);
    info2("End:\t%.2f MiB\n",get_job_mem()/1024.);
    info2("Simulation finished at %s in %s.\n",myasctime(),myhostname());
    return 0;
}
Beispiel #2
0
void maos_isim(int isim){
    const PARMS_T *parms=global->parms;
    RECON_T *recon=global->recon;
    SIM_T   *simu =global->simu;
    int iseed=global->iseed;
    int simstart=parms->sim.start;
    int simend=parms->sim.end;
    if(isim==simstart+1){//skip slow first step.
	tk_atm=myclockd();
    }
    if(isim+2+parms->sim.dtrat_hi>=simend){
	draw_single=0;
    }
    double ck_0=myclockd();
    simu->isim=isim;
    simu->status->isim=isim;
    sim_update_etf(simu);
    if(parms->atm.frozenflow){
#if USE_CUDA
	if(parms->gpu.evl || parms->gpu.wfs){
	    /*may need to copy another part */
	    gpu_atm2gpu(simu->atm, simu->atmscale, parms, iseed, isim);
	}
#endif
    }else{
	//Do not put this one inside parallel 
	genatm(simu);
	/*re-seed the atmosphere in case atm is loaded from shm/file */
	seed_rand(simu->atm_rand, lrand(simu->init_rand));
    }
    OMPTASK_SINGLE{
	if(parms->sim.dmproj){
	    /* teporarily disable FR.M so that Mfun is used.*/
	    cell *FRM=recon->FR.M; recon->FR.M=NULL; 
	    muv_solve(&simu->dmproj, &recon->FL, &recon->FR, NULL);
	    recon->FR.M=FRM;/*set FR.M back*/
	    if(parms->save.dm){
		zfarr_dcell(simu->save->dmproj, simu->isim, simu->dmproj);
	    }
	    if(!parms->fit.square){
		/* Embed DM commands to a square array for fast ray tracing */
		for(int idm=0; idm<parms->ndm; idm++){
		    loc_embed(simu->dmprojsq->p[idm], recon->aloc->p[idm], simu->dmproj->p[idm]->p);
		}
	    }
#if USE_CUDA
	    if(parms->gpu.evl || parms->gpu.wfs){
		gpu_dmproj2gpu(simu->dmprojsq);
	    }
#endif
	}
	save_dmreal(simu);
	extern int NO_RECON, NO_WFS, NO_EVL;
	if(PARALLEL){
	    /*
	      We do the big loop in parallel to make better use the
	      CPUs. Notice that the reconstructor is working on grad from
	      last time step so that there is no confliction in data access.
	    */
	    /*when we want to apply idealngs correction, wfsgrad need to wait for perfevl. */
	    long group=0;
	    if(parms->gpu.evl && !NO_EVL){
		//Queue tasks on GPU, no stream sync is done
		QUEUE_THREAD(group, simu->perf_evl_pre, 0);
	    }
	    if(!parms->tomo.ahst_idealngs && parms->gpu.wfs && !NO_WFS){
		//task for each wfs
		QUEUE_THREAD(group, simu->wfs_grad_pre, 0);
	    }
	    if(!NO_RECON){
		//don't put this first. It has cpu overhead in computing gradol
		QUEUE(group, reconstruct, simu, 1, 0);
	    }
	    if(!NO_EVL){
		if(parms->gpu.evl){
		    //wait for GPU tasks to be queued before calling sync
		    WAIT(group);
		}
		QUEUE(group, perfevl, simu, 1, 0);
	    }
	    if(!NO_WFS){
		if(parms->tomo.ahst_idealngs || (parms->gpu.wfs && !parms->gpu.evl)){
		    //in ahst_idealngs mode, weight for perfevl to finish.
		    //otherwise, wait for GPU tasks to be queued before calling sync
		    WAIT(group);
		}
		QUEUE(group, wfsgrad, simu, 1, 0);
	    }
	    if(!NO_RECON){
		//wait for all tasks to finish before modifying dmreal
		WAIT(group);
		shift_grad(simu);/*before filter() */
		filter_dm(simu);/*updates dmreal, so has to be after prefevl/wfsgrad is done. */
	    }
	    WAIT(group);
	}else{/*do the big loop in serial mode. */
	    if(parms->sim.closeloop){
		if(!NO_EVL) perfevl(simu);/*before wfsgrad so we can apply ideal NGS modes */
		if(!NO_WFS) wfsgrad(simu);/*output grads to gradcl, gradol */
		if(!NO_RECON) {
		    reconstruct(simu);/*uses grads from gradlast cl, gradlast ol. */
		    shift_grad(simu);
		    filter_dm(simu);
		}
	    }else{/*in OL mode,  */
		if(!NO_WFS) wfsgrad(simu);
		if(!NO_RECON) {
		    shift_grad(simu);
		    reconstruct(simu);
		    filter_dm(simu);
		}
		if(!NO_EVL) perfevl(simu);
	    }
	}
    }
    double ck_end=myclockd();
    long steps_done=iseed*(simend-simstart)+(isim+1-simstart);
    long steps_rest=parms->sim.nseed*(simend-simstart)-steps_done;
    if(isim!=simstart){
	simu->status->rest=(long)((ck_end-tk_0-(tk_atm-tk_1)*(iseed+1))/steps_done*steps_rest
				  +(tk_atm-tk_1)*(parms->sim.nseed-iseed-1));
	simu->status->mean=(ck_end-tk_atm)/(double)(isim-simstart);
    }
    simu->status->laps=(long)(ck_end-tk_0);
    simu->status->tot  =ck_end-ck_0;
    simu->status->wfs  =simu->tk_wfs;
    simu->status->recon=simu->tk_recon;
    simu->status->other=simu->tk_cache;
    simu->status->eval =simu->tk_eval;
    simu->status->scale=1;
    if(simu->timing){
	simu->timing->p[isim*simu->timing->nx]=get_job_mem();
	simu->timing->p[isim*simu->timing->nx+1]=simu->status->tot;
	simu->timing->p[isim*simu->timing->nx+2]=simu->status->wfs;
	simu->timing->p[isim*simu->timing->nx+3]=simu->status->recon;
	simu->timing->p[isim*simu->timing->nx+4]=simu->status->eval;
    }
    double this_time=myclockd();
    if(this_time>simu->last_report_time+1 || isim+1==simend || parms->sim.pause){
	/*we don't print out or report too frequently. */
	simu->last_report_time=this_time;
#if defined(__linux__) || defined(__APPLE__)
	scheduler_report(simu->status);
#endif
	print_progress(simu);
    }
}
Beispiel #3
0
/**
   This is the standard entrance routine to the program.  It first calls
   setup_parms() to setup the simulation parameters and check for possible
   errors. It then waits for starting signal from the scheduler if in batch
   mode. Finally it hands the control to maos() to start the actual simulation.

   Call maos with overriding *.conf files or embed the overriding parameters in
   the command line to override the default parameters, e.g.

   <p><code>maos base.conf save.setup=1 'powfs.phystep=[0 100 100]'</code><p>

   Any duplicate parameters will override the pervious specified value. The
   configure file nfiraos.conf will be loaded as the master .conf unless a -c
   switch is used with another .conf file. For scao simulations, call maos with
   -c switch and the right base .conf file.
   
   <p><code>maos -c scao_ngs.conf override.conf</code><p>

   for scao NGS simulations 

   <p><code>maos -c scao_lgs.conf override.conf</code><p>

   for scao LGS simulations.  With -c switch, nfiraos.conf will not be read,
   instead scao_ngs.conf or scao_lgs.conf are read as the master config file.
   Do not specify any parameter that are not understood by the code, otherwise
   maos will complain and exit to prevent accidental mistakes.
       
   Generally you link the maos executable into a folder that is in your PATH
   evironment or into the folder where you run simulations.

   Other optional parameters:
   \verbatim
   -d          do detach from console and not exit when logged out
   -s 2 -s 4   set seeds to [2 4]
   -n 4        launch 4 threads.
   -f          To disable job scheduler and force proceed
   \endverbatim
   In detached mode, drawing is automatically disabled.
   \callgraph
*/
int main(int argc, const char *argv[]){
    char *scmd=argv2str(argc,argv," ");
    ARG_T* arg=parse_args(argc,argv);/*does chdir */
    if(arg->detach){
	daemonize();
    }else{
	redirect();
    }
    /*Launch the scheduler if it is not running and report about our process */
    int ngpu;
#if USE_CUDA
    ngpu=arg->ngpu;
    if(!ngpu) ngpu=0xFFFFFF;
#else
    ngpu=0;
#endif
    scheduler_start(scmd,NTHREAD,ngpu,!arg->force);
    info2("%s\n", scmd);
    info2("Output folder is '%s'. %d threads\n",arg->dirout, NTHREAD);
    maos_version();
    /*setting up parameters before asking scheduler to check for any errors. */
    PARMS_T *parms=setup_parms(arg->conf, arg->confcmd, arg->override);
    free(arg->conf); arg->conf=0;
    if(arg->confcmd){
	remove(arg->confcmd); free(arg->confcmd); arg->confcmd=0;
    }
    info2("After setup_parms:\t %.2f MiB\n",get_job_mem()/1024.);
    /*register signal handler */
    register_signal_handler(maos_signal_handler);
 
    if(!arg->force){
	/*
	  Ask job scheduler for permission to proceed. If no CPUs are
	  available, will block until ones are available.
	  if arg->force==1, will run immediately.
	*/
	info2("Waiting start signal from the scheduler ...\n");
	int count=0;
	while(scheduler_wait()&& count<60){
	    /*Failed to wait. fall back to own checking.*/
	    warning_time("failed to get reply from scheduler. retry\n");
	    sleep(10);
	    count++;
	    scheduler_start(scmd,NTHREAD,ngpu,!arg->force);
	}
	if(count>=60){
	    warning_time("fall back to own checker\n");
	    wait_cpu(NTHREAD);
	}
    }
    thread_new((thread_fun)scheduler_listen, maos_daemon);
    setup_parms_gpu(parms, arg->gpus, arg->ngpu);
    if(arg->server){
	while(maos_server_fd<0){
	    warning("Waiting for fd\n");
	    sleep(1);
	}
	maos_server(parms);
	EXIT;
    }
    free(scmd);
    free(arg->dirout);
    free(arg->gpus);
    free(arg);

    /*do not use prallel single in maos(). It causes blas to run single threaded
     * during preparation. Selective enable parallel for certain setup functions
     * that doesn't use blas*/
    maos(parms);
    rename_file(0);
    scheduler_finish(0);
    return 0;
}
Beispiel #4
0
/**
   Assemble the DM fitting matrix

   The fitting is done by minimizing \f$||H_X x - H_A a||^2_W\f$ where \f$H_X,
   H_A\f$ are ray tracing operator from tomography grid xloc, and deformable
   mirror grid aloc to pupil grid ploc. The norm is weighted using bilinear
   influence functions within the telescope aperture. We have
   
   \f$a=\left[H_A^T(W_0-W_1 W_1^T)H_A\right]^{-1} H_A^T (W_0-W_1) H_X x\f$

   For details see www.opticsinfobase.org/abstract.cfm?URI=josaa-19-9-1803 
*/
static void
setup_recon_fit_matrix(RECON_T *recon, const PARMS_T *parms){
    const int nfit=parms->fit.nfit;
    const int ndm=parms->ndm;
    if(ndm==0) return;
    dspcell *HATc=dspcelltrans(recon->HA);
    PDSPCELL(HATc, HAT);
    PDSPCELL(recon->HA,HA);

    info2("Before assembling fit matrix:\t%.2f MiB\n",get_job_mem()/1024.);
    /*Assemble Fit matrix. */
    int npsr=recon->npsr;
    if(parms->load.fit){
	if(!(zfexist("FRM") && zfexist("FRU") && zfexist("FRV"))){
	    error("FRM, FRU, FRV (.bin) not all exist\n");
	}
	warning("Loading saved recon->FR\n");
	recon->FR.M=readbin("FRM");
	recon->FR.U=dcellread("FRU");
	recon->FR.V=dcellread("FRV");
    }else{
	if(recon->HXF){
	    info2("Building recon->FR\n");
	    recon->FR.M=cellnew(ndm, npsr);
	    PDSPCELL(recon->FR.M, FRM);
	    PDSPCELL(recon->HXF, HXF);

	    for(int ips=0; ips<npsr; ips++){
		for(int ifit=0; ifit<nfit; ifit++){
		    if(fabs(recon->fitwt->p[ifit])<1.e-12) continue;
		    dsp *tmp=dspmulsp(recon->W0, HXF[ips][ifit],"nn");
		    for(int idm=0; idm<ndm; idm++){
			dspmulsp2(&FRM[ips][idm],HAT[ifit][idm], tmp, "nn",
				  recon->fitwt->p[ifit]);
		    }
		    dspfree(tmp);
		}
	    }
	    recon->FR.V=cellnew(npsr, 1);
	    dmat **FRV=recon->FR.V->p;  
	
	    for(int ips=0; ips<npsr; ips++){
		int nloc=recon->xloc->p[ips]->nloc;
		FRV[ips]=dnew(nloc,nfit);
		for(int ifit=0; ifit<nfit; ifit++){
		    /*notice the sqrt. */
		    if(fabs(recon->fitwt->p[ifit])<1.e-12) continue;
		    dspmulvec(FRV[ips]->p+ifit*nloc, 
			      HXF[ips][ifit], recon->W1->p, 't',
			      sqrt(recon->fitwt->p[ifit]));
		}
	    }
	    if(parms->save.recon){
		writebin(recon->FR.M,"FRM");
		writebin(recon->FR.V,"FRV");
	    }
	}else{
	    info("Avoid building recon->FR.M\n");
	    recon->FR.M=NULL;
	    recon->FR.V=NULL;
	}
	/*Always need FR.U as it is used to do FL.U, FL.V */
	recon->FR.U=cellnew(ndm, 1);
	dmat **FRU=recon->FR.U->p;
	
	for(int idm=0; idm<ndm; idm++){    
	    int nloc=recon->aloc->p[idm]->nloc;
	    FRU[idm]=dnew(nloc, nfit);
	    for(int ifit=0; ifit<nfit; ifit++){
		/*notice the sart. */
		if(fabs(recon->fitwt->p[ifit])<1.e-12) continue;
		dspmulvec(FRU[idm]->p+ifit*nloc, 
			  HA[idm][ifit], recon->W1->p,'t',
			  sqrt(recon->fitwt->p[ifit]));
	    }
	}
	if(parms->save.recon){
	    writebin(recon->FR.U,"FRU");
	}
    }

    if(parms->load.fit){
	if(!(zfexist("FLM") && zfexist("FLU") && zfexist("FLV"))){
	    error("FLM, FLU, FLV (.bin) not all exist\n");
	}
	warning("Loading saved recon->FL\n");
	recon->FL.M=readbin("FLM");
	recon->FL.U=dcellread("FLU");
	recon->FL.V=dcellread("FLV");
    }else{
	info2("Building recon->FL\n");
	recon->FL.M=cellnew(ndm, ndm);
	dsp *(*FLM)[ndm]=(dsp*(*)[ndm])recon->FL.M->p;
	for(int idm=0; idm<ndm; idm++){
	    for(int ifit=0; ifit<nfit; ifit++){
		if(fabs(recon->fitwt->p[ifit])<1.e-12) continue;
		dsp *tmp=dspmulsp(recon->W0, HA[idm][ifit],"nn");
		for(int jdm=0; jdm<ndm; jdm++){
		    dspmulsp2(&FLM[idm][jdm],HAT[ifit][jdm], tmp,"nn",
			      recon->fitwt->p[ifit]);
		}
		dspfree(tmp);
	    }
	}
	dspcellfree(HATc);
	if(fabs(parms->fit.tikcr)>1.e-15){
	    double tikcr=parms->fit.tikcr;
	    /*Estimated from the formula.  1/nloc is due to W0, the other
	      scaling is due to ray tracing between different sampling freq.*/
	    int nact=0;
	    for(int idm=0; idm<parms->ndm; idm++){
		nact+=recon->aloc->p[idm]->nloc;
	    }
	    double maxeig=4./nact;
	    info2("Adding tikhonov constraint of %g to FLM\n", tikcr);
	    info2("The maximum eigen value is estimated to be around %e\n", maxeig);
	    dcelladdI(recon->FL.M,tikcr*maxeig);
	}

	{/*Low rank terms. */
	    recon->FL.U=dcellcat_each(recon->FR.U, recon->fitNW, 2);
	    dcell *tmp=NULL;/*negative NW. */
	    dcelladd(&tmp, 1, recon->fitNW, -1);
	    recon->FL.V=dcellcat_each(recon->FR.U, tmp, 2);
	    dcellfree(tmp);
	}
	if(recon->actslave){
	    dcelladd(&recon->FL.M, 1, recon->actslave, 1);
	}
	/*dspcellsym(recon->FL.M); */
	info2("DM Fit number of Low rank terms: %ld in RHS, %ld in LHS\n",
	      recon->FR.U->p[0]->ny, recon->FL.U->p[0]->ny);
	if(parms->save.recon){
	    writebin(recon->FL.M,"FLM.bin");
	    writebin(recon->FL.U,"FLU");
	    writebin(recon->FL.V,"FLV");
	}
    }
    if((parms->fit.alg==0 || parms->fit.alg==2) && parms->fit.bgs){
	muv_direct_diag_prep(&(recon->FL),(parms->fit.alg==2)*parms->fit.svdthres);
    }
    if((parms->fit.alg==0 || parms->fit.alg==2) && !parms->fit.bgs){
	if(fabs(parms->fit.tikcr)<1.e-14){
	    warning("tickcr=%g is too small, chol may fail.\n", parms->fit.tikcr);
	}
	muv_direct_prep(&(recon->FL),(parms->fit.alg==2)*parms->fit.svdthres);
	info2("After cholesky/svd on matrix:\t%.2f MiB\n",get_job_mem()/1024.);
    }
 
    if(parms->save.recon){
       	if(recon->FL.C){
	    chol_convert(recon->FL.C, 1);
	    chol_save(recon->FL.C,"FLC.bin");
	}
	if(recon->FL.MI)
	    writebin(recon->FL.MI,"FLMI");
	if(recon->FL.Up)
	    writebin(recon->FL.Up, "FLUp");
	if(recon->FL.Vp)
	    writebin(recon->FL.Vp, "FLVp");
	if(recon->FL.CB){
	    for(int ib=0; ib<recon->FL.nb; ib++){
		chol_save(recon->FL.CB[ib],"FLCB_%d.bin", ib);
	    }
	}
	if(recon->FL.MIB){
	    writebin(recon->FL.MIB,"FLMIB");
	}
    }
    info2("After assemble fit matrix:\t%.2f MiB\n",get_job_mem()/1024.);
}