/** The main(). It parses the command line, setup the parms, ask the scheduler for signal to proceed, and then starts skysim to do sky coverage. */ int main(int argc, const char *argv[]){ dirstart=mygetcwd(); char *scmd=argv2str(argc, argv, " "); ARG_S* arg=parse_args(argc,argv); /*In detach mode send to background and disable drawing*/ if(arg->detach){ daemonize(); }else{ redirect(); } info2("%s\n", scmd); info2("Output folder is '%s'. %d threads\n",arg->dirout, arg->nthread); skyc_version(); /*register signal handler */ register_signal_handler(skyc_signal_handler); /* Ask job scheduler for permission to proceed. If no CPUs are available, will block until ones are available. if arg->force==1, will run immediately. */ scheduler_start(scmd,arg->nthread,0,!arg->force); /*setting up parameters before asking scheduler to check for any errors. */ dirsetup=stradd("setup",NULL); PARMS_S * parms=setup_parms(arg); if(parms->skyc.dbg){ mymkdir("%s",dirsetup); } if(!arg->force){ info2("Waiting start signal from the scheduler ...\n"); /*Failed to wait. fall back to own checking.*/ int count=0; while(scheduler_wait()&& count<60){ warning_time("failed to get reply from scheduler. retry\n"); sleep(10); count++; scheduler_start(scmd,arg->nthread,0,!arg->force); } if(count>=60){ warning_time("fall back to own checker\n"); wait_cpu(arg->nthread); } } info2("Simulation started at %s in %s.\n",myasctime(),myhostname()); free(scmd); free(arg->dirout); free(arg); THREAD_POOL_INIT(parms->skyc.nthread); /*Loads the main software*/ OMPTASK_SINGLE skysim(parms); free_parms(parms); free(dirsetup); free(dirstart); rename_file(0); scheduler_finish(0); info2("End:\t%.2f MiB\n",get_job_mem()/1024.); info2("Simulation finished at %s in %s.\n",myasctime(),myhostname()); return 0; }
void maos_isim(int isim){ const PARMS_T *parms=global->parms; RECON_T *recon=global->recon; SIM_T *simu =global->simu; int iseed=global->iseed; int simstart=parms->sim.start; int simend=parms->sim.end; if(isim==simstart+1){//skip slow first step. tk_atm=myclockd(); } if(isim+2+parms->sim.dtrat_hi>=simend){ draw_single=0; } double ck_0=myclockd(); simu->isim=isim; simu->status->isim=isim; sim_update_etf(simu); if(parms->atm.frozenflow){ #if USE_CUDA if(parms->gpu.evl || parms->gpu.wfs){ /*may need to copy another part */ gpu_atm2gpu(simu->atm, simu->atmscale, parms, iseed, isim); } #endif }else{ //Do not put this one inside parallel genatm(simu); /*re-seed the atmosphere in case atm is loaded from shm/file */ seed_rand(simu->atm_rand, lrand(simu->init_rand)); } OMPTASK_SINGLE{ if(parms->sim.dmproj){ /* teporarily disable FR.M so that Mfun is used.*/ cell *FRM=recon->FR.M; recon->FR.M=NULL; muv_solve(&simu->dmproj, &recon->FL, &recon->FR, NULL); recon->FR.M=FRM;/*set FR.M back*/ if(parms->save.dm){ zfarr_dcell(simu->save->dmproj, simu->isim, simu->dmproj); } if(!parms->fit.square){ /* Embed DM commands to a square array for fast ray tracing */ for(int idm=0; idm<parms->ndm; idm++){ loc_embed(simu->dmprojsq->p[idm], recon->aloc->p[idm], simu->dmproj->p[idm]->p); } } #if USE_CUDA if(parms->gpu.evl || parms->gpu.wfs){ gpu_dmproj2gpu(simu->dmprojsq); } #endif } save_dmreal(simu); extern int NO_RECON, NO_WFS, NO_EVL; if(PARALLEL){ /* We do the big loop in parallel to make better use the CPUs. Notice that the reconstructor is working on grad from last time step so that there is no confliction in data access. */ /*when we want to apply idealngs correction, wfsgrad need to wait for perfevl. */ long group=0; if(parms->gpu.evl && !NO_EVL){ //Queue tasks on GPU, no stream sync is done QUEUE_THREAD(group, simu->perf_evl_pre, 0); } if(!parms->tomo.ahst_idealngs && parms->gpu.wfs && !NO_WFS){ //task for each wfs QUEUE_THREAD(group, simu->wfs_grad_pre, 0); } if(!NO_RECON){ //don't put this first. It has cpu overhead in computing gradol QUEUE(group, reconstruct, simu, 1, 0); } if(!NO_EVL){ if(parms->gpu.evl){ //wait for GPU tasks to be queued before calling sync WAIT(group); } QUEUE(group, perfevl, simu, 1, 0); } if(!NO_WFS){ if(parms->tomo.ahst_idealngs || (parms->gpu.wfs && !parms->gpu.evl)){ //in ahst_idealngs mode, weight for perfevl to finish. //otherwise, wait for GPU tasks to be queued before calling sync WAIT(group); } QUEUE(group, wfsgrad, simu, 1, 0); } if(!NO_RECON){ //wait for all tasks to finish before modifying dmreal WAIT(group); shift_grad(simu);/*before filter() */ filter_dm(simu);/*updates dmreal, so has to be after prefevl/wfsgrad is done. */ } WAIT(group); }else{/*do the big loop in serial mode. */ if(parms->sim.closeloop){ if(!NO_EVL) perfevl(simu);/*before wfsgrad so we can apply ideal NGS modes */ if(!NO_WFS) wfsgrad(simu);/*output grads to gradcl, gradol */ if(!NO_RECON) { reconstruct(simu);/*uses grads from gradlast cl, gradlast ol. */ shift_grad(simu); filter_dm(simu); } }else{/*in OL mode, */ if(!NO_WFS) wfsgrad(simu); if(!NO_RECON) { shift_grad(simu); reconstruct(simu); filter_dm(simu); } if(!NO_EVL) perfevl(simu); } } } double ck_end=myclockd(); long steps_done=iseed*(simend-simstart)+(isim+1-simstart); long steps_rest=parms->sim.nseed*(simend-simstart)-steps_done; if(isim!=simstart){ simu->status->rest=(long)((ck_end-tk_0-(tk_atm-tk_1)*(iseed+1))/steps_done*steps_rest +(tk_atm-tk_1)*(parms->sim.nseed-iseed-1)); simu->status->mean=(ck_end-tk_atm)/(double)(isim-simstart); } simu->status->laps=(long)(ck_end-tk_0); simu->status->tot =ck_end-ck_0; simu->status->wfs =simu->tk_wfs; simu->status->recon=simu->tk_recon; simu->status->other=simu->tk_cache; simu->status->eval =simu->tk_eval; simu->status->scale=1; if(simu->timing){ simu->timing->p[isim*simu->timing->nx]=get_job_mem(); simu->timing->p[isim*simu->timing->nx+1]=simu->status->tot; simu->timing->p[isim*simu->timing->nx+2]=simu->status->wfs; simu->timing->p[isim*simu->timing->nx+3]=simu->status->recon; simu->timing->p[isim*simu->timing->nx+4]=simu->status->eval; } double this_time=myclockd(); if(this_time>simu->last_report_time+1 || isim+1==simend || parms->sim.pause){ /*we don't print out or report too frequently. */ simu->last_report_time=this_time; #if defined(__linux__) || defined(__APPLE__) scheduler_report(simu->status); #endif print_progress(simu); } }
/** This is the standard entrance routine to the program. It first calls setup_parms() to setup the simulation parameters and check for possible errors. It then waits for starting signal from the scheduler if in batch mode. Finally it hands the control to maos() to start the actual simulation. Call maos with overriding *.conf files or embed the overriding parameters in the command line to override the default parameters, e.g. <p><code>maos base.conf save.setup=1 'powfs.phystep=[0 100 100]'</code><p> Any duplicate parameters will override the pervious specified value. The configure file nfiraos.conf will be loaded as the master .conf unless a -c switch is used with another .conf file. For scao simulations, call maos with -c switch and the right base .conf file. <p><code>maos -c scao_ngs.conf override.conf</code><p> for scao NGS simulations <p><code>maos -c scao_lgs.conf override.conf</code><p> for scao LGS simulations. With -c switch, nfiraos.conf will not be read, instead scao_ngs.conf or scao_lgs.conf are read as the master config file. Do not specify any parameter that are not understood by the code, otherwise maos will complain and exit to prevent accidental mistakes. Generally you link the maos executable into a folder that is in your PATH evironment or into the folder where you run simulations. Other optional parameters: \verbatim -d do detach from console and not exit when logged out -s 2 -s 4 set seeds to [2 4] -n 4 launch 4 threads. -f To disable job scheduler and force proceed \endverbatim In detached mode, drawing is automatically disabled. \callgraph */ int main(int argc, const char *argv[]){ char *scmd=argv2str(argc,argv," "); ARG_T* arg=parse_args(argc,argv);/*does chdir */ if(arg->detach){ daemonize(); }else{ redirect(); } /*Launch the scheduler if it is not running and report about our process */ int ngpu; #if USE_CUDA ngpu=arg->ngpu; if(!ngpu) ngpu=0xFFFFFF; #else ngpu=0; #endif scheduler_start(scmd,NTHREAD,ngpu,!arg->force); info2("%s\n", scmd); info2("Output folder is '%s'. %d threads\n",arg->dirout, NTHREAD); maos_version(); /*setting up parameters before asking scheduler to check for any errors. */ PARMS_T *parms=setup_parms(arg->conf, arg->confcmd, arg->override); free(arg->conf); arg->conf=0; if(arg->confcmd){ remove(arg->confcmd); free(arg->confcmd); arg->confcmd=0; } info2("After setup_parms:\t %.2f MiB\n",get_job_mem()/1024.); /*register signal handler */ register_signal_handler(maos_signal_handler); if(!arg->force){ /* Ask job scheduler for permission to proceed. If no CPUs are available, will block until ones are available. if arg->force==1, will run immediately. */ info2("Waiting start signal from the scheduler ...\n"); int count=0; while(scheduler_wait()&& count<60){ /*Failed to wait. fall back to own checking.*/ warning_time("failed to get reply from scheduler. retry\n"); sleep(10); count++; scheduler_start(scmd,NTHREAD,ngpu,!arg->force); } if(count>=60){ warning_time("fall back to own checker\n"); wait_cpu(NTHREAD); } } thread_new((thread_fun)scheduler_listen, maos_daemon); setup_parms_gpu(parms, arg->gpus, arg->ngpu); if(arg->server){ while(maos_server_fd<0){ warning("Waiting for fd\n"); sleep(1); } maos_server(parms); EXIT; } free(scmd); free(arg->dirout); free(arg->gpus); free(arg); /*do not use prallel single in maos(). It causes blas to run single threaded * during preparation. Selective enable parallel for certain setup functions * that doesn't use blas*/ maos(parms); rename_file(0); scheduler_finish(0); return 0; }
/** Assemble the DM fitting matrix The fitting is done by minimizing \f$||H_X x - H_A a||^2_W\f$ where \f$H_X, H_A\f$ are ray tracing operator from tomography grid xloc, and deformable mirror grid aloc to pupil grid ploc. The norm is weighted using bilinear influence functions within the telescope aperture. We have \f$a=\left[H_A^T(W_0-W_1 W_1^T)H_A\right]^{-1} H_A^T (W_0-W_1) H_X x\f$ For details see www.opticsinfobase.org/abstract.cfm?URI=josaa-19-9-1803 */ static void setup_recon_fit_matrix(RECON_T *recon, const PARMS_T *parms){ const int nfit=parms->fit.nfit; const int ndm=parms->ndm; if(ndm==0) return; dspcell *HATc=dspcelltrans(recon->HA); PDSPCELL(HATc, HAT); PDSPCELL(recon->HA,HA); info2("Before assembling fit matrix:\t%.2f MiB\n",get_job_mem()/1024.); /*Assemble Fit matrix. */ int npsr=recon->npsr; if(parms->load.fit){ if(!(zfexist("FRM") && zfexist("FRU") && zfexist("FRV"))){ error("FRM, FRU, FRV (.bin) not all exist\n"); } warning("Loading saved recon->FR\n"); recon->FR.M=readbin("FRM"); recon->FR.U=dcellread("FRU"); recon->FR.V=dcellread("FRV"); }else{ if(recon->HXF){ info2("Building recon->FR\n"); recon->FR.M=cellnew(ndm, npsr); PDSPCELL(recon->FR.M, FRM); PDSPCELL(recon->HXF, HXF); for(int ips=0; ips<npsr; ips++){ for(int ifit=0; ifit<nfit; ifit++){ if(fabs(recon->fitwt->p[ifit])<1.e-12) continue; dsp *tmp=dspmulsp(recon->W0, HXF[ips][ifit],"nn"); for(int idm=0; idm<ndm; idm++){ dspmulsp2(&FRM[ips][idm],HAT[ifit][idm], tmp, "nn", recon->fitwt->p[ifit]); } dspfree(tmp); } } recon->FR.V=cellnew(npsr, 1); dmat **FRV=recon->FR.V->p; for(int ips=0; ips<npsr; ips++){ int nloc=recon->xloc->p[ips]->nloc; FRV[ips]=dnew(nloc,nfit); for(int ifit=0; ifit<nfit; ifit++){ /*notice the sqrt. */ if(fabs(recon->fitwt->p[ifit])<1.e-12) continue; dspmulvec(FRV[ips]->p+ifit*nloc, HXF[ips][ifit], recon->W1->p, 't', sqrt(recon->fitwt->p[ifit])); } } if(parms->save.recon){ writebin(recon->FR.M,"FRM"); writebin(recon->FR.V,"FRV"); } }else{ info("Avoid building recon->FR.M\n"); recon->FR.M=NULL; recon->FR.V=NULL; } /*Always need FR.U as it is used to do FL.U, FL.V */ recon->FR.U=cellnew(ndm, 1); dmat **FRU=recon->FR.U->p; for(int idm=0; idm<ndm; idm++){ int nloc=recon->aloc->p[idm]->nloc; FRU[idm]=dnew(nloc, nfit); for(int ifit=0; ifit<nfit; ifit++){ /*notice the sart. */ if(fabs(recon->fitwt->p[ifit])<1.e-12) continue; dspmulvec(FRU[idm]->p+ifit*nloc, HA[idm][ifit], recon->W1->p,'t', sqrt(recon->fitwt->p[ifit])); } } if(parms->save.recon){ writebin(recon->FR.U,"FRU"); } } if(parms->load.fit){ if(!(zfexist("FLM") && zfexist("FLU") && zfexist("FLV"))){ error("FLM, FLU, FLV (.bin) not all exist\n"); } warning("Loading saved recon->FL\n"); recon->FL.M=readbin("FLM"); recon->FL.U=dcellread("FLU"); recon->FL.V=dcellread("FLV"); }else{ info2("Building recon->FL\n"); recon->FL.M=cellnew(ndm, ndm); dsp *(*FLM)[ndm]=(dsp*(*)[ndm])recon->FL.M->p; for(int idm=0; idm<ndm; idm++){ for(int ifit=0; ifit<nfit; ifit++){ if(fabs(recon->fitwt->p[ifit])<1.e-12) continue; dsp *tmp=dspmulsp(recon->W0, HA[idm][ifit],"nn"); for(int jdm=0; jdm<ndm; jdm++){ dspmulsp2(&FLM[idm][jdm],HAT[ifit][jdm], tmp,"nn", recon->fitwt->p[ifit]); } dspfree(tmp); } } dspcellfree(HATc); if(fabs(parms->fit.tikcr)>1.e-15){ double tikcr=parms->fit.tikcr; /*Estimated from the formula. 1/nloc is due to W0, the other scaling is due to ray tracing between different sampling freq.*/ int nact=0; for(int idm=0; idm<parms->ndm; idm++){ nact+=recon->aloc->p[idm]->nloc; } double maxeig=4./nact; info2("Adding tikhonov constraint of %g to FLM\n", tikcr); info2("The maximum eigen value is estimated to be around %e\n", maxeig); dcelladdI(recon->FL.M,tikcr*maxeig); } {/*Low rank terms. */ recon->FL.U=dcellcat_each(recon->FR.U, recon->fitNW, 2); dcell *tmp=NULL;/*negative NW. */ dcelladd(&tmp, 1, recon->fitNW, -1); recon->FL.V=dcellcat_each(recon->FR.U, tmp, 2); dcellfree(tmp); } if(recon->actslave){ dcelladd(&recon->FL.M, 1, recon->actslave, 1); } /*dspcellsym(recon->FL.M); */ info2("DM Fit number of Low rank terms: %ld in RHS, %ld in LHS\n", recon->FR.U->p[0]->ny, recon->FL.U->p[0]->ny); if(parms->save.recon){ writebin(recon->FL.M,"FLM.bin"); writebin(recon->FL.U,"FLU"); writebin(recon->FL.V,"FLV"); } } if((parms->fit.alg==0 || parms->fit.alg==2) && parms->fit.bgs){ muv_direct_diag_prep(&(recon->FL),(parms->fit.alg==2)*parms->fit.svdthres); } if((parms->fit.alg==0 || parms->fit.alg==2) && !parms->fit.bgs){ if(fabs(parms->fit.tikcr)<1.e-14){ warning("tickcr=%g is too small, chol may fail.\n", parms->fit.tikcr); } muv_direct_prep(&(recon->FL),(parms->fit.alg==2)*parms->fit.svdthres); info2("After cholesky/svd on matrix:\t%.2f MiB\n",get_job_mem()/1024.); } if(parms->save.recon){ if(recon->FL.C){ chol_convert(recon->FL.C, 1); chol_save(recon->FL.C,"FLC.bin"); } if(recon->FL.MI) writebin(recon->FL.MI,"FLMI"); if(recon->FL.Up) writebin(recon->FL.Up, "FLUp"); if(recon->FL.Vp) writebin(recon->FL.Vp, "FLVp"); if(recon->FL.CB){ for(int ib=0; ib<recon->FL.nb; ib++){ chol_save(recon->FL.CB[ib],"FLCB_%d.bin", ib); } } if(recon->FL.MIB){ writebin(recon->FL.MIB,"FLMIB"); } } info2("After assemble fit matrix:\t%.2f MiB\n",get_job_mem()/1024.); }