/** Partition the ray tracing by DM/Destination combinations, as well as segments in each combination to maximum efficiency. */ void calc_cachedm(SIM_T *simu){ double tk_start=myclockd(); if(simu->parms->sim.cachedm){ long group=0; /*zero out the data. */ for(int idm=0; idm<simu->parms->ndm; idm++){ dzero((dmat*)simu->cachedm->p[idm]); /*do the multi-threaded ray tracing */ QUEUE_THREAD(group,(simu->cachedm_prop[idm]), 1); } WAIT_THREAD(group); } simu->tk_cache=myclockd()-tk_start; }
void maos_isim(int isim){ const PARMS_T *parms=global->parms; RECON_T *recon=global->recon; SIM_T *simu =global->simu; int iseed=global->iseed; int simstart=parms->sim.start; int simend=parms->sim.end; if(isim==simstart+1){//skip slow first step. tk_atm=myclockd(); } if(isim+2+parms->sim.dtrat_hi>=simend){ draw_single=0; } double ck_0=myclockd(); simu->isim=isim; simu->status->isim=isim; sim_update_etf(simu); if(parms->atm.frozenflow){ #if USE_CUDA if(parms->gpu.evl || parms->gpu.wfs){ /*may need to copy another part */ gpu_atm2gpu(simu->atm, simu->atmscale, parms, iseed, isim); } #endif }else{ //Do not put this one inside parallel genatm(simu); /*re-seed the atmosphere in case atm is loaded from shm/file */ seed_rand(simu->atm_rand, lrand(simu->init_rand)); } OMPTASK_SINGLE{ if(parms->sim.dmproj){ /* teporarily disable FR.M so that Mfun is used.*/ cell *FRM=recon->FR.M; recon->FR.M=NULL; muv_solve(&simu->dmproj, &recon->FL, &recon->FR, NULL); recon->FR.M=FRM;/*set FR.M back*/ if(parms->save.dm){ zfarr_dcell(simu->save->dmproj, simu->isim, simu->dmproj); } if(!parms->fit.square){ /* Embed DM commands to a square array for fast ray tracing */ for(int idm=0; idm<parms->ndm; idm++){ loc_embed(simu->dmprojsq->p[idm], recon->aloc->p[idm], simu->dmproj->p[idm]->p); } } #if USE_CUDA if(parms->gpu.evl || parms->gpu.wfs){ gpu_dmproj2gpu(simu->dmprojsq); } #endif } save_dmreal(simu); extern int NO_RECON, NO_WFS, NO_EVL; if(PARALLEL){ /* We do the big loop in parallel to make better use the CPUs. Notice that the reconstructor is working on grad from last time step so that there is no confliction in data access. */ /*when we want to apply idealngs correction, wfsgrad need to wait for perfevl. */ long group=0; if(parms->gpu.evl && !NO_EVL){ //Queue tasks on GPU, no stream sync is done QUEUE_THREAD(group, simu->perf_evl_pre, 0); } if(!parms->tomo.ahst_idealngs && parms->gpu.wfs && !NO_WFS){ //task for each wfs QUEUE_THREAD(group, simu->wfs_grad_pre, 0); } if(!NO_RECON){ //don't put this first. It has cpu overhead in computing gradol QUEUE(group, reconstruct, simu, 1, 0); } if(!NO_EVL){ if(parms->gpu.evl){ //wait for GPU tasks to be queued before calling sync WAIT(group); } QUEUE(group, perfevl, simu, 1, 0); } if(!NO_WFS){ if(parms->tomo.ahst_idealngs || (parms->gpu.wfs && !parms->gpu.evl)){ //in ahst_idealngs mode, weight for perfevl to finish. //otherwise, wait for GPU tasks to be queued before calling sync WAIT(group); } QUEUE(group, wfsgrad, simu, 1, 0); } if(!NO_RECON){ //wait for all tasks to finish before modifying dmreal WAIT(group); shift_grad(simu);/*before filter() */ filter_dm(simu);/*updates dmreal, so has to be after prefevl/wfsgrad is done. */ } WAIT(group); }else{/*do the big loop in serial mode. */ if(parms->sim.closeloop){ if(!NO_EVL) perfevl(simu);/*before wfsgrad so we can apply ideal NGS modes */ if(!NO_WFS) wfsgrad(simu);/*output grads to gradcl, gradol */ if(!NO_RECON) { reconstruct(simu);/*uses grads from gradlast cl, gradlast ol. */ shift_grad(simu); filter_dm(simu); } }else{/*in OL mode, */ if(!NO_WFS) wfsgrad(simu); if(!NO_RECON) { shift_grad(simu); reconstruct(simu); filter_dm(simu); } if(!NO_EVL) perfevl(simu); } } } double ck_end=myclockd(); long steps_done=iseed*(simend-simstart)+(isim+1-simstart); long steps_rest=parms->sim.nseed*(simend-simstart)-steps_done; if(isim!=simstart){ simu->status->rest=(long)((ck_end-tk_0-(tk_atm-tk_1)*(iseed+1))/steps_done*steps_rest +(tk_atm-tk_1)*(parms->sim.nseed-iseed-1)); simu->status->mean=(ck_end-tk_atm)/(double)(isim-simstart); } simu->status->laps=(long)(ck_end-tk_0); simu->status->tot =ck_end-ck_0; simu->status->wfs =simu->tk_wfs; simu->status->recon=simu->tk_recon; simu->status->other=simu->tk_cache; simu->status->eval =simu->tk_eval; simu->status->scale=1; if(simu->timing){ simu->timing->p[isim*simu->timing->nx]=get_job_mem(); simu->timing->p[isim*simu->timing->nx+1]=simu->status->tot; simu->timing->p[isim*simu->timing->nx+2]=simu->status->wfs; simu->timing->p[isim*simu->timing->nx+3]=simu->status->recon; simu->timing->p[isim*simu->timing->nx+4]=simu->status->eval; } double this_time=myclockd(); if(this_time>simu->last_report_time+1 || isim+1==simend || parms->sim.pause){ /*we don't print out or report too frequently. */ simu->last_report_time=this_time; #if defined(__linux__) || defined(__APPLE__) scheduler_report(simu->status); #endif print_progress(simu); } }