int client(const char *hostname, int port, int type){ int sock; int nretry=10; if(type==1){ sock=connect_port(hostname, port, 0, 0); if(sock<0) exit(1); for(long iN=1; iN<=N; iN++){ for(int i=0; i<nretry; i++){ double tim1=myclockd(); WRITE(sock, &iN, sizeof(long)); double tim2=myclockd(); READ(sock, temp, sizeof(double)); double tim3=myclockd(); WRITE(sock, temp, sizeof(double)*iN); double tim4=myclockd(); READ(sock, temp, sizeof(double)); double tim5=myclockd(); dbg("N=%ld, send %5.1f, read %5.1f, send2 %5.1f read2 %5.1f\n", iN, (tim2-tim1)*1e6, (tim3-tim2)*1e6, (tim4-tim3)*1e6, (tim5-tim4)*1e6); } } }else{ sock=socket(AF_INET, SOCK_DGRAM, 0); } return 0; }
/** Partition the ray tracing by DM/Destination combinations, as well as segments in each combination to maximum efficiency. */ void calc_cachedm(SIM_T *simu){ double tk_start=myclockd(); if(simu->parms->sim.cachedm){ long group=0; /*zero out the data. */ for(int idm=0; idm<simu->parms->ndm; idm++){ dzero((dmat*)simu->cachedm->p[idm]); /*do the multi-threaded ray tracing */ QUEUE_THREAD(group,(simu->cachedm_prop[idm]), 1); } WAIT_THREAD(group); } simu->tk_cache=myclockd()-tk_start; }
int server(int sock){ double tim1=myclockd(); long nr; READ(sock, &nr, sizeof(long)); double tim2=myclockd(); WRITE(sock, temp, sizeof(double)); double tim3=myclockd(); READ(sock, temp, sizeof(double)*nr); double tim4=myclockd(); WRITE(sock, temp, sizeof(double)); double tim5=myclockd(); //dbg("read %5.1f, send %5.1f, read2 %5.1f send2 %5.1f\n", (tim2-tim1)*1e6, (tim3-tim2)*1e6, (tim4-tim3)*1e6, (tim5-tim4)*1e6); return 0; }
SIM_T *maos_iseed(int iseed){ if(iseed==0) tk_0=myclockd(); tk_1=myclockd(); const PARMS_T *parms=global->parms; POWFS_T *powfs=global->powfs; APER_T *aper =global->aper; RECON_T *recon=global->recon; if(parms->fdlock && parms->fdlock->p[iseed]<0){ warning("Another MAOS is already running. Skip seed %ld\n", parms->sim.seeds->p[iseed]); return 0; } if(!parms->sim.pause){ draw_single=1;//Only draw active frame. }else{ draw_single=0; } global->iseed=iseed; SIM_T *simu=init_simu(parms,powfs,aper,recon,iseed); global->simu=simu; if(parms->atm.frozenflow){ genatm(simu);/*Generating atmospheric screen(s) that frozen flows.*/ if(parms->tomo.predict){ if(recon->HXWtomo){ setup_recon_HXW_predict(simu); } if(parms->tomo.precond==1){ fdpcg_free(recon->fdpcg); recon->fdpcg=fdpcg_prepare(parms, recon, powfs, parms->tomo.predict?simu->atm:NULL); } } } #if USE_CUDA if(parms->gpu.evl || parms->gpu.wfs){ /*put here to avoid messing up timing due to transfering. */ gpu_atm2gpu(simu->atm, simu->atmscale, parms, iseed, parms->sim.start);/*takes 0.4s for NFIRAOS. */ if(parms->tomo.predict){ gpu_update_recon_cn2(parms, recon); } } #endif return simu; }
int client(const char *hostname, int port, int nmin, int nmax, int nstep, int nrep){ int sock=connect_port(hostname, port, 0, 1); if(sock<0 || stwriteint(sock, nstep) || stwriteint(sock, nmin) || stwriteint(sock, nmax) || stwriteint(sock, nrep)) { warning("Unable to connecto to %s\n", hostname); close(sock); return 1; } buf1=(char*)malloc(nmax*nstep); for(int i=0;i<10;i++){//warm up stwrite(sock, buf1, nmax); stread(sock, buf1, 64); usleep(500); } double tim1, tim2, tim3; int nlen=(nmax-nmin+nstep)/nstep; dmat *timing=dnew(nrep, nlen); dmat *timing2=dnew(nrep, nlen); int ilen=-1; for(int len=nmin; len<=nmax; len+=nstep){ ilen++; info("len=%d\n", len); for(int irep=0; irep<nrep; irep++){ if(irep%800==0){ info("irep=%d of %d\n", irep, nrep); } usleep(500); tim1=myclockd(); stwrite(sock, buf1, len); tim2=myclockd(); stread(sock, buf1, 64); tim3=myclockd(); timing->p[irep+ilen*nrep]=tim3-tim1; timing2->p[irep+ilen*nrep]=tim2-tim1; } } close(sock); writebin(timing, "pix_timing_%s_%d_%d_%d", HOST, nmin, nmax, nstep); writebin(timing2, "pix_timing2_%s_%d_%d_%d", HOST, nmin, nmax, nstep); dbg("done\n"); return 0; }
/* Record the host after connection is established*/ static void host_added(int ihost, int sock){ htime[ihost]=myclockd(); proc_remove_all(ihost);/*remove all entries. */ LOCK(mhost); nhostup++; hsock[ihost]=sock; FD_SET(sock, &active_fd_set); UNLOCK(mhost); add_host_wrap(-1);//wakes up listen_host(). info("connected to %s\n", hosts[ihost]); gdk_threads_add_idle(host_up, GINT_TO_POINTER(ihost)); }
/** listen_host() live in a separate thread, it has the following resposibilities: 1) listening commands from the main thread to initiate connection to servers 2) listening to connected servers for maos status event and update the display 3) monitor connected servers for activity. Disable pages when server is disconnected. write to sock_main[1] will be caught by select in listen_host(). This wakes it up.*/ void listen_host(){ htime=calloc(nhost, sizeof(double)); FD_ZERO(&active_fd_set); FD_SET(sock_main[0], &active_fd_set); int keep_listen=1; while(keep_listen){ fd_set read_fd_set = active_fd_set; if(select(FD_SETSIZE, &read_fd_set, NULL, NULL, NULL)<0){ perror("select"); continue; } for(int i=0; i<FD_SETSIZE; i++){ if(FD_ISSET(i, &read_fd_set)){ int res; res=respond(i); if(res==-2){//quit keep_listen=0; break; }else if(res==-1){//remove host host_removed(i); } } } double ntime=myclockd(); for(int ihost=0; ihost<nhost; ihost++){ if(hsock[ihost]>-1){ if(htime[ihost]+10<ntime){ //10 seconds grace period info2("10 seconds no respond. disconnect\n"); host_removed(hsock[ihost]); } } } } for(int i=0; i<FD_SETSIZE; i++){ if(FD_ISSET(i, &active_fd_set)){ close(i); FD_CLR(i, &active_fd_set); } } }
void maos_isim(int isim){ const PARMS_T *parms=global->parms; RECON_T *recon=global->recon; SIM_T *simu =global->simu; int iseed=global->iseed; int simstart=parms->sim.start; int simend=parms->sim.end; if(isim==simstart+1){//skip slow first step. tk_atm=myclockd(); } if(isim+2+parms->sim.dtrat_hi>=simend){ draw_single=0; } double ck_0=myclockd(); simu->isim=isim; simu->status->isim=isim; sim_update_etf(simu); if(parms->atm.frozenflow){ #if USE_CUDA if(parms->gpu.evl || parms->gpu.wfs){ /*may need to copy another part */ gpu_atm2gpu(simu->atm, simu->atmscale, parms, iseed, isim); } #endif }else{ //Do not put this one inside parallel genatm(simu); /*re-seed the atmosphere in case atm is loaded from shm/file */ seed_rand(simu->atm_rand, lrand(simu->init_rand)); } OMPTASK_SINGLE{ if(parms->sim.dmproj){ /* teporarily disable FR.M so that Mfun is used.*/ cell *FRM=recon->FR.M; recon->FR.M=NULL; muv_solve(&simu->dmproj, &recon->FL, &recon->FR, NULL); recon->FR.M=FRM;/*set FR.M back*/ if(parms->save.dm){ zfarr_dcell(simu->save->dmproj, simu->isim, simu->dmproj); } if(!parms->fit.square){ /* Embed DM commands to a square array for fast ray tracing */ for(int idm=0; idm<parms->ndm; idm++){ loc_embed(simu->dmprojsq->p[idm], recon->aloc->p[idm], simu->dmproj->p[idm]->p); } } #if USE_CUDA if(parms->gpu.evl || parms->gpu.wfs){ gpu_dmproj2gpu(simu->dmprojsq); } #endif } save_dmreal(simu); extern int NO_RECON, NO_WFS, NO_EVL; if(PARALLEL){ /* We do the big loop in parallel to make better use the CPUs. Notice that the reconstructor is working on grad from last time step so that there is no confliction in data access. */ /*when we want to apply idealngs correction, wfsgrad need to wait for perfevl. */ long group=0; if(parms->gpu.evl && !NO_EVL){ //Queue tasks on GPU, no stream sync is done QUEUE_THREAD(group, simu->perf_evl_pre, 0); } if(!parms->tomo.ahst_idealngs && parms->gpu.wfs && !NO_WFS){ //task for each wfs QUEUE_THREAD(group, simu->wfs_grad_pre, 0); } if(!NO_RECON){ //don't put this first. It has cpu overhead in computing gradol QUEUE(group, reconstruct, simu, 1, 0); } if(!NO_EVL){ if(parms->gpu.evl){ //wait for GPU tasks to be queued before calling sync WAIT(group); } QUEUE(group, perfevl, simu, 1, 0); } if(!NO_WFS){ if(parms->tomo.ahst_idealngs || (parms->gpu.wfs && !parms->gpu.evl)){ //in ahst_idealngs mode, weight for perfevl to finish. //otherwise, wait for GPU tasks to be queued before calling sync WAIT(group); } QUEUE(group, wfsgrad, simu, 1, 0); } if(!NO_RECON){ //wait for all tasks to finish before modifying dmreal WAIT(group); shift_grad(simu);/*before filter() */ filter_dm(simu);/*updates dmreal, so has to be after prefevl/wfsgrad is done. */ } WAIT(group); }else{/*do the big loop in serial mode. */ if(parms->sim.closeloop){ if(!NO_EVL) perfevl(simu);/*before wfsgrad so we can apply ideal NGS modes */ if(!NO_WFS) wfsgrad(simu);/*output grads to gradcl, gradol */ if(!NO_RECON) { reconstruct(simu);/*uses grads from gradlast cl, gradlast ol. */ shift_grad(simu); filter_dm(simu); } }else{/*in OL mode, */ if(!NO_WFS) wfsgrad(simu); if(!NO_RECON) { shift_grad(simu); reconstruct(simu); filter_dm(simu); } if(!NO_EVL) perfevl(simu); } } } double ck_end=myclockd(); long steps_done=iseed*(simend-simstart)+(isim+1-simstart); long steps_rest=parms->sim.nseed*(simend-simstart)-steps_done; if(isim!=simstart){ simu->status->rest=(long)((ck_end-tk_0-(tk_atm-tk_1)*(iseed+1))/steps_done*steps_rest +(tk_atm-tk_1)*(parms->sim.nseed-iseed-1)); simu->status->mean=(ck_end-tk_atm)/(double)(isim-simstart); } simu->status->laps=(long)(ck_end-tk_0); simu->status->tot =ck_end-ck_0; simu->status->wfs =simu->tk_wfs; simu->status->recon=simu->tk_recon; simu->status->other=simu->tk_cache; simu->status->eval =simu->tk_eval; simu->status->scale=1; if(simu->timing){ simu->timing->p[isim*simu->timing->nx]=get_job_mem(); simu->timing->p[isim*simu->timing->nx+1]=simu->status->tot; simu->timing->p[isim*simu->timing->nx+2]=simu->status->wfs; simu->timing->p[isim*simu->timing->nx+3]=simu->status->recon; simu->timing->p[isim*simu->timing->nx+4]=simu->status->eval; } double this_time=myclockd(); if(this_time>simu->last_report_time+1 || isim+1==simend || parms->sim.pause){ /*we don't print out or report too frequently. */ simu->last_report_time=this_time; #if defined(__linux__) || defined(__APPLE__) scheduler_report(simu->status); #endif print_progress(simu); } }
//respond to scheduler static int respond(int sock){ int cmd[3]; //read fixed length header info. if(streadintarr(sock, cmd, 3)){ return -1;//failed } int ihost=host_from_sock(sock); if(ihost>=0){ htime[ihost]=myclockd(); } int pid=cmd[2]; switch(cmd[0]){ case -1:{//server request shutdown info("disconnect from %s\n", hosts[ihost]); return -1; } break; case MON_VERSION: break; case MON_STATUS: { if(ihost<0){ warning("Host not found\n"); return -1; } PROC_T *p=proc_get(ihost,pid); if(!p){ p=proc_add(ihost,pid); } if(stread(sock, &p->status, sizeof(STATUS_T))){ return -1; } if(p->status.info==S_REMOVE){ proc_remove(ihost, pid); }else{ if(cmd[1]!=ihost && cmd[1]!=cmd[2]){ /*A new mean to replace the ID of a job.*/ p->pid=cmd[1]; } gdk_threads_add_idle((GSourceFunc)refresh, p); } } break; case MON_PATH: { if(ihost<0){ warning("Host not found\n"); return -1; } PROC_T *p=proc_get(ihost,pid); if(!p){ p=proc_add(ihost,pid); } if(streadstr(sock, &p->path)){ return -1; } char *tmp=NULL; while((tmp=strchr(p->path, '\n'))){ tmp[0]=' '; } } break; case MON_LOAD: { if(ihost<0){ warning("Host not found\n"); return -1; } usage_cpu[ihost]=(double)((pid>>16) & 0xFFFF)/100.; usage_mem[ihost]=(double)(pid & 0xFFFF)/100.; usage_cpu[ihost]=MAX(MIN(1,usage_cpu[ihost]),0); usage_mem[ihost]=MAX(MIN(1,usage_mem[ihost]),0); gdk_threads_add_idle((GSourceFunc)update_progress, GINT_TO_POINTER(ihost)); } break; case MON_ADDHOST: if(cmd[1]>-1 && cmd[1]<nhost){ pthread_t tmp; pthread_create(&tmp, NULL, (void*(*)(void*))add_host, GINT_TO_POINTER(cmd[1])); }else if(cmd[1]==-2){ return -2; } break; default: warning_time("Invalid cmd %d\n",cmd[0]); return -1; } return 0; }
//server for mvmfull_real int mvm_server(int sock){ int cmd[7]; if(streadintarr(sock, cmd, 7)){ return -1; } int nact=cmd[0]; int nsa=cmd[1]; int sastep=cmd[2]; int totpix=cmd[3]; int pixpsa=totpix; int nstep=cmd[4]; int nstep0=cmd[5]; int type=cmd[6]; dbg("type=%d, nact=%d, nsa=%d, sastep=%d, %s=%d, nstep=%d\n",type, nact, nsa, sastep, type==1?"pixpsa":"totpix", totpix, nstep); int *saind=NULL; if(type==1){//mvmfull_iwfs totpix=pixpsa*nsa; }else{//mvmfull_real saind=mymalloc((nsa+1),int); if(streadintarr(sock, saind, nsa+1)){ return -1; } } short *pix=mymalloc(totpix,short); if(type==1){ rand_t rseed; seed_rand(&rseed, 1); for(int i=0; i<totpix; i++){ pix[i]=(short)randu(&rseed); } }else{ if(stread(sock, pix, totpix*sizeof(short))){ return -1; } } smat *dmres=snew(nact, 1); int ready; streadint(sock, &ready); //wait for client to be ready. #if __linux__ struct timespec ct; clock_gettime(CLOCK_MONOTONIC, &ct); int readtime_ns=500000;//500 micro-second read out time. int frametime_ns=1250000;//frame time. int nsend=((nsa+sastep-1)/sastep);//number of segments sent along read out int int1_ns=readtime_ns/nsend;//interval between segment sending int int2_ns=frametime_ns-readtime_ns;//interval after last segment. #endif TIC;tic; for(int istep=-nstep0; istep<nstep; istep++){ //info("\rSend trigger "); #if __linux__ //scheduled start time of the frame. double tk0=(double)ct.tv_sec+(double)ct.tv_nsec*1.e-9; #else tic; #endif for(int isa=0; isa<nsa; isa+=sastep){ #if __linux__ if(clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &ct, NULL)){ warning("clock_nanosleep is interrupted\n"); } if(isa==0){ tic; } #endif int nleft; if(type==1){ nleft=((nsa-isa)<sastep?(nsa-isa):sastep)*pixpsa; }else{ if(nsa<isa+sastep){//terminate nleft=totpix-saind[isa]; }else{ nleft=saind[isa+sastep]-saind[isa]; } } if(stwrite(sock, pix+(type==1?pixpsa*isa:saind[isa]), 2*nleft)){//2 byte data. warning("failed: %s\n", strerror(errno)); return -1; } #if __linux__ ct.tv_nsec+=int1_ns; while(ct.tv_nsec>=1000000000){ ct.tv_nsec-=1000000000; ct.tv_sec++; } #endif } if(stread(sock, dmres->p, sizeof(float)*nact)){ warning("read dmres failed: %s\n", strerror(errno)); return -1; } ready=(int)(toc3*1e6);//mvm is finished. #if __linux__ if(nstep<100){ dbg("tk=%.6f tic=%.6f, toc=%.6f, ready=%.6f\n", tk0, tk, myclockd(), ready*1e-6); } #endif if(stwriteint(sock, ready)){ warning("write ready failed: %s\n", strerror(errno)); return -1; } //set next frame start time. #if __linux__ ct.tv_nsec+=int2_ns; while(ct.tv_nsec>=1000000000){ ct.tv_nsec-=1000000000; ct.tv_sec++; } #endif if((istep & 0xFF) == 0xFF){ info("%d %d us.\n", istep, ready); } } info("\n"); return -1; }