void Context::clear() { TimerStart("clear"); #ifdef _CTXH ObjectMap::Entry *entry = mVars.first(); while (entry) { if (entry->second) { if (entry->second->refCount() <= 0) { std::ostringstream oss; oss << "*** Object in context has already been deleted"; throw std::runtime_error(oss.str()); } entry->second->decRef(); } entry = mVars.next(); } #else ObjectMap::iterator it = mVars.begin(); while (it != mVars.end()) { if (it->second) { if (it->second->refCount() <= 0) { std::ostringstream oss; oss << "*** Object \"" << it->first << "\" in context has already been deleted"; throw std::runtime_error(oss.str()); } it->second->decRef(); } ++it; } #endif mVars.clear(); TimerEnd("clear"); }
int SystemDestruct(void) { save_paper(); //By zjh SetIntSign(); /* cli:: disable creat message */ TimerEnd(); FontEnd(); CloseCache(); PageFinish(); MouseDestruct(); UnlockMouseMemory(); WindowEnd(); ItemFinish(); ChineseLibDone(); GraphFinish(); HandleFinish(); WriteDefaultScreenMode(); ReturnOK(); }
/*! This function is the driver routine for the calculation of hydrodynamical * force and rate of change of entropy due to shock heating for all active * particles . */ void hydro_force(void) { TimerBeg(90); long long ntot, ntotleft; int i, j, k, n, ngrp, maxfill, source, ndone; int *nbuffer, *noffset, *nsend_local, *nsend, *numlist, *ndonelist; int level, sendTask, recvTask, nexport, place; double soundspeed_i; double tstart, tend, sumt, sumcomm; double timecomp = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance; MPI_Status status; #ifdef PERIODIC boxSize = All.BoxSize; boxHalf = 0.5 * All.BoxSize; #ifdef LONG_X boxHalf_X = boxHalf * LONG_X; boxSize_X = boxSize * LONG_X; #endif #ifdef LONG_Y boxHalf_Y = boxHalf * LONG_Y; boxSize_Y = boxSize * LONG_Y; #endif #ifdef LONG_Z boxHalf_Z = boxHalf * LONG_Z; boxSize_Z = boxSize * LONG_Z; #endif #endif if(All.ComovingIntegrationOn) { /* Factors for comoving integration of hydro */ hubble_a = All.Omega0 / (All.Time * All.Time * All.Time) + (1 - All.Omega0 - All.OmegaLambda) / (All.Time * All.Time) + All.OmegaLambda; hubble_a = All.Hubble * sqrt(hubble_a); hubble_a2 = All.Time * All.Time * hubble_a; fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time; fac_egy = pow(All.Time, 3 * (GAMMA - 1)); fac_vsic_fix = hubble_a * pow(All.Time, 3 * GAMMA_MINUS1); a3inv = 1 / (All.Time * All.Time * All.Time); atime = All.Time; } else hubble_a = hubble_a2 = atime = fac_mu = fac_vsic_fix = a3inv = fac_egy = 1.0; /* `NumSphUpdate' gives the number of particles on this processor that want a force update */ for(n = 0, NumSphUpdate = 0; n < N_gas; n++) { if(P[n].Ti_endstep == All.Ti_Current) NumSphUpdate++; } numlist = malloc(NTask * sizeof(int) * NTask); MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD); for(i = 0, ntot = 0; i < NTask; i++) ntot += numlist[i]; free(numlist); noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */ nbuffer = malloc(sizeof(int) * NTask); nsend_local = malloc(sizeof(int) * NTask); nsend = malloc(sizeof(int) * NTask * NTask); ndonelist = malloc(sizeof(int) * NTask); i = 0; /* first particle for this task */ ntotleft = ntot; /* particles left for all tasks together */ ///////////////// GX ////////////////////// FUN_MESSAGE(2,"hydro_force()"); #ifdef CUDA_GX_NO_SPH_SUPPORT int oldcudamode=s_gx.cudamode; s_gx.cudamode=0; #endif double starttime,subtime=-1,cpytime=-1; const int Np=PrintInfoInitialize(N_gas,s_gx.cudamode,1); int iter=0; ///////////////// GX ////////////////////// while(ntotleft > 0) { ///////////////// GX ////////////////////// if (s_gx.cudamode!=0 && i!=0) ERROR("cuda mode does not support iterations in hydro calc, try to increasing the 'BufferSize' in the parameter file to surcomevent this problem"); iter++; ///////////////// GX ////////////////////// for(j = 0; j < NTask; j++) nsend_local[j] = 0; /* do local particles and prepare export list */ TimerBeg(91); TimerBeg(93); starttime=GetTime(); tstart = second(); if (s_gx.cudamode==0 || (Np!=N_gas || Np<MIN_SPH_PARTICLES_FOR_GPU_GX)) { //if (s_gx.cudamode==0 || Np<MIN_SPH_PARTICLES_FOR_GPU_GX) { #ifdef CUDA_GX_CHUNCK_MANAGER_SPH ReLaunchChunkManager(); #endif for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeHydro - NTask; i++) if(P[i].Ti_endstep == All.Ti_Current) { ndone++; for(j = 0; j < NTask; j++) Exportflag[j] = 0; hydro_evaluate(i, 0); TimerUpdateCounter(91,1); for(j = 0; j < NTask; j++) { if(Exportflag[j]) { for(k = 0; k < 3; k++) { HydroDataIn[nexport].Pos[k] = P[i].Pos[k]; HydroDataIn[nexport].Vel[k] = SphP[i].VelPred[k]; } HydroDataIn[nexport].Hsml = SphP[i].Hsml; HydroDataIn[nexport].Mass = P[i].Mass; HydroDataIn[nexport].DhsmlDensityFactor = SphP[i].DhsmlDensityFactor; HydroDataIn[nexport].Density = SphP[i].Density; HydroDataIn[nexport].Pressure = SphP[i].Pressure; HydroDataIn[nexport].Timestep = P[i].Ti_endstep - P[i].Ti_begstep; /* calculation of F1 */ soundspeed_i = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density); HydroDataIn[nexport].F1 = fabs(SphP[i].DivVel) / (fabs(SphP[i].DivVel) + SphP[i].CurlVel + 0.0001 * soundspeed_i / SphP[i].Hsml / fac_mu); HydroDataIn[nexport].Index = i; HydroDataIn[nexport].Task = j; nexport++; nsend_local[j]++; } } } #ifdef CUDA_GX_CHUNCK_MANAGER_SPH ManageChuncks(1); #endif } else { ///////////////// GX ////////////////////// cpytime=GetTime(); ASSERT_GX(s_gx.cudamode>0); if (i!=0) ERROR("cuda mode does not support iterations in hydro calc, try to increasing the 'BufferSize' in the parameter file to surcomevent this problem"); const int Np2=InitializeHydraCalculation_gx(NumPart,P,SphP,N_gas,hubble_a2, fac_mu, fac_vsic_fix #ifdef PERIODIC ,boxSize,boxHalf #endif ); if (Np2==0) WARNING("no sph particles participate in this timestep"); ASSERT_GX( Np2==Np ); cpytime = GetTime()-cpytime; subtime=GetTime(); hydro_evaluate_range_cuda_gx(0,N_gas,s_gx,p_gx,h_gx); subtime=GetTime()-subtime; for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeHydro - NTask; i++) if(P[i].Ti_endstep == All.Ti_Current) { ndone++; for(j = 0; j < NTask; j++) Exportflag[j] = 0; ASSERT_GX( P[i].Type==0 ); //hydro_evaluate_cuda_gx(i, 0,&s_gx,&p_gx); TimerUpdateCounter(91,1); ASSERT_GX(i<s_gx.sz_result_hydro); const struct result_hydro_gx r=s_gx.result_hydro[i]; ASSERT_GX( isResultHydraDataOK(r,__FILE__,__LINE__) ); for(k = 0; k < 3; k++) SphP[i].HydroAccel[k] = r.Acc[k]; SphP[i].DtEntropy = r.DtEntropy; SphP[i].MaxSignalVel = r.MaxSignalVel; if (s_gx.NTask>1){ for(j = 0; j < NTask; j++) { const char export_this=GetExportflag_gx(&s_gx,i,NTask,j); if(export_this) { for(k = 0; k < 3; k++) { HydroDataIn[nexport].Pos[k] = P[i].Pos[k]; HydroDataIn[nexport].Vel[k] = SphP[i].VelPred[k]; } HydroDataIn[nexport].Hsml = SphP[i].Hsml; HydroDataIn[nexport].Mass = P[i].Mass; HydroDataIn[nexport].DhsmlDensityFactor = SphP[i].DhsmlDensityFactor; HydroDataIn[nexport].Density = SphP[i].Density; HydroDataIn[nexport].Pressure = SphP[i].Pressure; HydroDataIn[nexport].Timestep = P[i].Ti_endstep - P[i].Ti_begstep; // calculation of F1 soundspeed_i = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density); HydroDataIn[nexport].F1 = fabs(SphP[i].DivVel) / (fabs(SphP[i].DivVel) + SphP[i].CurlVel + 0.0001 * soundspeed_i / SphP[i].Hsml / fac_mu); HydroDataIn[nexport].Index = i; HydroDataIn[nexport].Task = j; nexport++; nsend_local[j]++; } } } } ///////////////// GX ////////////////////// } TimerEnd(93); tend = second(); timecomp += timediff(tstart, tend); ///////////////// GX ////////////////////// PrintInfoFinalize(s_gx,ndone,Np,starttime,cpytime,subtime,1,iter,-1,0,0,nexport,0,0,0); subtime=-1; ///////////////// GX ////////////////////// qsort(HydroDataIn, nexport, sizeof(struct hydrodata_in), hydro_compare_key); for(j = 1, noffset[0] = 0; j < NTask; j++) noffset[j] = noffset[j - 1] + nsend_local[j - 1]; tstart = second(); MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD); tend = second(); timeimbalance += timediff(tstart, tend); TimerEnd(91); TimerBeg(92); /* now do the particles that need to be exported */ for(level = 1; level < (1 << PTask); level++) { tstart = second(); for(j = 0; j < NTask; j++) nbuffer[j] = 0; for(ngrp = level; ngrp < (1 << PTask); ngrp++) { maxfill = 0; for(j = 0; j < NTask; j++) { if((j ^ ngrp) < NTask) if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]) maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]; } if(maxfill >= All.BunchSizeHydro) break; sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0) { /* get the particles */ MPI_Sendrecv(&HydroDataIn[noffset[recvTask]], nsend_local[recvTask] * sizeof(struct hydrodata_in), MPI_BYTE, recvTask, TAG_HYDRO_A, &HydroDataGet[nbuffer[ThisTask]], nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_in), MPI_BYTE, recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, &status); } } for(j = 0; j < NTask; j++) if((j ^ ngrp) < NTask) nbuffer[j] += nsend[(j ^ ngrp) * NTask + j]; } tend = second(); timecommsumm += timediff(tstart, tend); /* now do the imported particles */ tstart = second(); ///////////////// GX ////////////////////// // Do exported particles on the CPU/GPU TimerBeg(94); { AssertsOnhasGadgetDataBeenModified_gx(1,1,0); #if CUDA_DEBUG_GX>1 MESSAGE("INFO: DistRMSGrav=%g",DistRMSGravdata(nbuffer[ThisTask],GravDataGet)); #endif starttime=GetTime(); const int N=nbuffer[ThisTask]; if (N>0){ // YYY NOTE: disable GPU exportmode for now!!! if (1 || s_gx.cudamode==0 || N<MIN_SPH_PARTICLES_FOR_GPU_GX || Np<MIN_SPH_PARTICLES_FOR_GPU_GX) { for(j = 0; j < nbuffer[ThisTask]; j++) hydro_evaluate(j, 1); } else { cpytime=GetTime(); InitializeHydraExportCalculation_gx(N,HydroDataGet); subtime=GetTime(); hydro_evaluate_range_cuda_gx(1,N,s_gx,p_gx,h_gx); subtime=GetTime()-subtime; FinalizeHydraExportCalculation_gx(N); cpytime=GetTime()-cpytime-subtime; } PrintInfoFinalize(s_gx,0,N,starttime,cpytime,subtime,3,iter,level,0,0,nexport,0,0,0); subtime=-1; } } TimerEnd(94); ///////////////// GX ////////////////////// tend = second(); timecomp += timediff(tstart, tend); /* do a block to measure imbalance */ TimerBeg(95); tstart = second(); MPI_Barrier(MPI_COMM_WORLD); tend = second(); timeimbalance += timediff(tstart, tend); TimerEnd(95); /* get the result */ tstart = second(); for(j = 0; j < NTask; j++) nbuffer[j] = 0; for(ngrp = level; ngrp < (1 << PTask); ngrp++) { maxfill = 0; for(j = 0; j < NTask; j++) { if((j ^ ngrp) < NTask) if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]) maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]; } if(maxfill >= All.BunchSizeHydro) break; sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0) { /* send the results */ MPI_Sendrecv(&HydroDataResult[nbuffer[ThisTask]], nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_out), MPI_BYTE, recvTask, TAG_HYDRO_B, &HydroDataPartialResult[noffset[recvTask]], nsend_local[recvTask] * sizeof(struct hydrodata_out), MPI_BYTE, recvTask, TAG_HYDRO_B, MPI_COMM_WORLD, &status); /* add the result to the particles */ for(j = 0; j < nsend_local[recvTask]; j++) { source = j + noffset[recvTask]; place = HydroDataIn[source].Index; for(k = 0; k < 3; k++) SphP[place].HydroAccel[k] += HydroDataPartialResult[source].Acc[k]; SphP[place].DtEntropy += HydroDataPartialResult[source].DtEntropy; if(SphP[place].MaxSignalVel < HydroDataPartialResult[source].MaxSignalVel) SphP[place].MaxSignalVel = HydroDataPartialResult[source].MaxSignalVel; } } } for(j = 0; j < NTask; j++) if((j ^ ngrp) < NTask) nbuffer[j] += nsend[(j ^ ngrp) * NTask + j]; } tend = second(); timecommsumm += timediff(tstart, tend); level = ngrp - 1; } TimerEnd(92); MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD); for(j = 0; j < NTask; j++) ntotleft -= ndonelist[j]; } free(ndonelist); free(nsend); free(nsend_local); free(nbuffer); free(noffset); /* do final operations on results */ tstart = second(); for(i = 0; i < N_gas; i++) if(P[i].Ti_endstep == All.Ti_Current) { SphP[i].DtEntropy *= GAMMA_MINUS1 / (hubble_a2 * pow(SphP[i].Density, GAMMA_MINUS1)); #ifdef SPH_BND_PARTICLES if(P[i].ID == 0) { SphP[i].DtEntropy = 0; for(k = 0; k < 3; k++) SphP[i].HydroAccel[k] = 0; } #endif } tend = second(); timecomp += timediff(tstart, tend); /* collect some timing information */ MPI_Reduce(&timecomp, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&timecommsumm, &sumcomm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(ThisTask == 0) { All.CPU_HydCompWalk += sumt / NTask; All.CPU_HydCommSumm += sumcomm / NTask; All.CPU_HydImbalance += sumimbalance / NTask; } TimerEnd(90); #ifdef RESULT_FILE_DUMP_GX static FILE* resultfile=NULL; if (resultfile==NULL) { char filename[256]; sprintf(filename,"resultfile.%d.%d.txt",s_gx.cudamode,ThisTask); resultfile=fopen(filename,"w"); } else{ MESSAGE("Dumping result..."); static int MM=0; int j; fprintf(resultfile,"Dumping result...N_gas=%d\n",N_gas); for(j=0;j<N_gas;++j) if(P[j].Ti_endstep == All.Ti_Current){ static int NN=0; const int target=j; fprintf(resultfile,"m=0, NN=%6d, t=%6d, e=%.4g, v=%.4g, acc={%.2g,%.2g,%.2g}\n",NN++,target,SphP[target].DtEntropy,SphP[target].MaxSignalVel,SphP[target].HydroAccel[0],SphP[target].HydroAccel[1],SphP[target].HydroAccel[2]); fflush(resultfile); } if (++MM>2) exit(-42); } #endif #ifdef CUDA_GX_NO_SPH_SUPPORT s_gx.cudamode=oldcudamode; #endif //MESSAGE("%6.2f, %6.2f, %6.2f, %6.2f, %6.2f - %5.1f, %5.1f, %5.1f, %5.1f %c sph timers d 90,93,94,95,net",TimerGet(90),TimerGet(93),TimerGet(94),TimerGet(95),TimerGet(90)-TimerGet(93)-TimerGet(94),100.0*TimerGet(93)/TimerGet(90),100.0*TimerGet(94)/TimerGet(90),100.0*TimerGet(95)/TimerGet(90),100.0*(TimerGet(90)-TimerGet(93)-TimerGet(94))/TimerGet(90),'%'); //MESSAGE("%6.2f, %6.2f, %6.2f, %6.2f, %6.2f - %5.1f, %5.1f, %5.1f, %5.1f %c sph timers a 90,93,94,95,net",TimerGetAccumulated(90),TimerGetAccumulated(93),TimerGetAccumulated(94),TimerGetAccumulated(95),TimerGetAccumulated(90)-TimerGetAccumulated(93)-TimerGetAccumulated(94),100.0*TimerGetAccumulated(93)/TimerGetAccumulated(90),100.0*TimerGetAccumulated(94)/TimerGetAccumulated(90),100.0*TimerGetAccumulated(95)/TimerGetAccumulated(90),100.0*(TimerGetAccumulated(90)-TimerGetAccumulated(93)-TimerGetAccumulated(94))/TimerGetAccumulated(90),'%'); }
Object* Context::getVar(const Symbol &name, bool inherit) const { TimerStart("getVar"); #ifdef _CTXH Object *o = 0; if (mVars.getValue(name, o)) { if (o) { o->incRef(); } TimerEnd("getVar"); return o; #else ObjectMap::const_iterator it = mVars.find(name); if (it != mVars.end()) { Object *o = it->second; if (o) { o->incRef(); } TimerEnd("getVar"); return o; #endif } else { if (mParent && inherit) { Object *o = mParent->getVar(name); TimerEnd("getVar"); return o; } else { TimerEnd("getVar"); return 0; } } } Callable* Context::getCallable(const Symbol &name, bool inherit) const { TimerStart("getCallable"); #ifdef _CTXH Object *o = 0; if (mVars.getValue(name, o) && o && o->isCallable()) { o->incRef(); TimerEnd("getCallable"); return (Callable*) o; #else ObjectMap::const_iterator it = mVars.find(name); if (it != mVars.end() && it->second && it->second->isCallable()) { Object *o = it->second; o->incRef(); TimerEnd("getCallable"); return (Callable*) o; #endif } else { if (mParent && inherit) { Callable *c = mParent->getCallable(name); TimerEnd("getCallable"); return c; } else { TimerEnd("getCallable"); return 0; } } } void Context::toStream(std::ostream &os, const std::string &indent) const { TimerStart("toStream"); #ifdef _CTXH ObjectMap::KeyValueVector kv; size_t n = mVars.getPairs(kv); for (size_t i=0; i<n; ++i) { os << indent << "\"" << kv[i].first << "\" = "; kv[i].second->toStream(os); os << std::endl; } #else ObjectMap::const_iterator it = mVars.begin(); while (it != mVars.end()) { os << indent << "\"" << it->first << "\" = "; it->second->toStream(os); os << std::endl; ++it; } #endif if (mParent != 0) { os << indent << "From parent context:" << std::endl; mParent->toStream(os, indent+" "); } TimerEnd("toStream"); }
bool Context::hasVar(const Symbol &name, bool inherit) const { TimerStart("hasVar"); #ifdef _CTXH if (mVars.hasKey(name)) { #else if (mVars.find(name) != mVars.end()) { #endif return true; } else if (mParent && inherit) { return mParent->hasVar(name, true); } else { return false; } TimerEnd("hasVar"); } void Context::setVar(const Symbol &name, Object *v, bool inherit) { TimerStart("setVar"); #ifdef _CTXH ObjectMap::Entry *e = mVars.find(name); if (e) { if (e->second != v) { if (e->second) { e->second->decRef(); } e->second = v; } else { TimerEnd("setVar"); return; } } else { if (mParent && inherit && mParent->hasVar(name, true)) { mParent->setVar(name, v, true); TimerEnd("setVar"); return; } mVars.insert(name, v); } #else ObjectMap::iterator it = mVars.find(name); if (it != mVars.end()) { if (it->second != v) { if (it->second) { it->second->decRef(); } it->second = v; } else { TimerEnd("setVar"); return; } } else { if (mParent && inherit && mParent->hasVar(name, true)) { mParent->setVar(name, v, true); TimerEnd("setVar"); return; } mVars[name] = v; } #endif /* if (mParent && inherit && mParent->hasVar(name, true)) { mParent->setVar(name, v, true); TimerEnd("setVar"); return; } #ifdef _CTXH ObjectMap::Entry *e = mVars.find(name); if (e) { if (e->second != v) { if (e->second) { e->second->decRef(); } e->second = v; } else { TimerEnd("setVar"); return; } } else { mVars.insert(name, v); } #else ObjectMap::iterator it = mVars.find(name); if (it != mVars.end()) { if (it->second != v) { if (it->second) { it->second->decRef(); } it->second = v; } else { TimerEnd("setVar"); return; } } else { mVars[name] = v; } #endif */ if (v) { v->incRef(); } TimerEnd("setVar"); }
void GLWindow_Mainloop(void) { USE_HIGH_PERFORMANCE_TIMER = EmulatorConfig.highperformancetimer; TimerInit(); SCREEN_TEXTURE = calloc(256*224,4); //max possible size int scr_texture_loaded = 0; GLuint scr_texture; //do { GBA_RunFor(1); } while(GBA_MemoryReadFast16(CPU.R[R_PC]) != 0xDF05); //swi 0x05 //CPU.R[R_PC] = 0x00000000; //do { GBA_RunFor(1); } while(CPU.R[R_PC] != 0x080002B0); GLWindow_GBACreateDissasembler(); while(1) { if(GLWindow_HandleEvents()) break; if(GLWindow_Active && (PAUSED == 0)) { if(RUNNING == RUN_GBA) { GLWindow_GBADisassemblerStartAddressSetDefault(); GLWindow_GBAHandleInput(); GBA_CheckKeypadInterrupt(); GBA_RunFor(280896); //clocksperframe = 280896 if(Keys_Down[VK_SPACE]) { GBA_SetFrameskip(10); GBA_SoundResetBufferPointers(); } else GBA_SetFrameskip(FRAMESKIP); if(GBA_HasToSkipFrame()==0) { GBA_ConvertScreenBufferTo32RGB(SCREEN_TEXTURE); glClear(GL_COLOR_BUFFER_BIT); //Clear screen if(scr_texture_loaded) glDeleteTextures(1,&scr_texture); scr_texture_loaded = 1; glGenTextures(1,&scr_texture); glBindTexture(GL_TEXTURE_2D,scr_texture); if(EmulatorConfig.oglfilter) { glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_LINEAR); glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR); } else { glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_NEAREST); glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_NEAREST); } glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_CLAMP); glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_CLAMP); glTexImage2D(GL_TEXTURE_2D,0,4,240,160, 0,GL_RGBA,GL_UNSIGNED_BYTE,SCREEN_TEXTURE); glBindTexture(GL_TEXTURE_2D,scr_texture); glColor3f(1.0,1.0,1.0); glBegin( GL_QUADS ); glTexCoord2f(0,0); // Top-left vertex glVertex3f(0,0,0); glTexCoord2f(1,0); // Bottom-left vertex glVertex3f(240,0,0); glTexCoord2f(1,1); // Bottom-right vertex glVertex3f(240,160,0); glTexCoord2f(0,1); // Top-right vertex glVertex3f(0,160,0); glEnd(); GLWindow_SwapBuffers(); } GBA_UpdateFrameskip(); //GLWindow_MemViewerUpdate(); //GLWindow_IOViewerUpdate(); //GLWindow_DisassemblerUpdate(); TimerWait(Keys_Down[VK_SPACE] == 0); } else { glClear(GL_COLOR_BUFFER_BIT); //Clear screen GLWindow_SwapBuffers(); Sleep(100); } } else { Sleep(1); // Allow the CPU to rest a bit :P } /* if(RUNNING == RUN_GBA) { if(Keys_Down['Q']) { int k = 10; while(k--) GLWindow_GBADisassemblerStep(); GLWindow_GBADisassemblerUpdate(); } if(Keys_Down['W']) { int k = 100; while(k--) GLWindow_GBADisassemblerStep(); GLWindow_GBADisassemblerUpdate(); } if(Keys_Down['R']) { int k = 1000; while(k--) GLWindow_GBADisassemblerStep(); GLWindow_GBADisassemblerUpdate(); } } */ } GLWindow_UnloadRom(1); if(scr_texture_loaded) glDeleteTextures(1,&scr_texture); TimerEnd(); }
void main() { // char c[]="Hello World"; // Queue* q = InitQueue(); int i=0; // for (i=0;i<5;i++) {QueuePush(q,&(c[i]));} // for (i=0;i<5;i++) printf("%c",*(char*)QueuePop(q)); // for (i=5;i<11;i++) {QueuePush(q,&(c[i]));} // for (i=0;i<5;i++) printf("%c",*(char*)QueuePop(q)); // // printf("%d\n",q->size); // DeleteQueue(q); IMG_Init(0); SDL_Surface* sf = IMG_Load("1.jpg"); IMG_Quit(); SDL_Init(SDL_INIT_EVERYTHING); SDL_Surface* screen = SDL_SetVideoMode( FRAME_WIDTH*2, FRAME_HEIGHT, 24, SDL_SWSURFACE ); SDL_Rect r = {0,0,FRAME_WIDTH,FRAME_HEIGHT}; SDL_Event event; char buf[256]; Pixmap* px = PixmapFromSdlSurface(sf); #if USE_BITFIELD printf("Using bitfield optimization ...\n"); Pixmap* px2 = PixmapThresholding(px,0.2); #else Pixmap* px2 = PixmapThresholdingSimple(px,0.2); SavePixmap(px2,"1.bmp"); #endif Pixmap* dup = PixmapCopy(px2); Timer* timer = TimerStart(); for (i=0;i<1;i++) { #if USE_BITFIELD PixmapErosion(px2,1); #else PixmapErosionSimple(px2,1); #endif } SavePixmap(px2,"2.bmp"); printf("time taken: %fs\n",TimerEnd(timer)); // SDL_BlitSurface(sf,&r,screen,NULL); SDL_Surface* res = SdlSurfaceFromPixmap(dup); SDL_BlitSurface(res,&r,screen,NULL); r.x=FRAME_WIDTH; res = SdlSurfaceFromPixmap(px2); SDL_BlitSurface(res,NULL,screen,&r); SDL_Flip(screen); while (1) { if (SDL_PollEvent(&event)) { if (event.type==SDL_QUIT) break; } } printf("Memory used: %s\n",GetMemoryRepr(buf,MemoryInfo())); DeletePixmap(px); DeletePixmap(px2); // SDL_FreeSurface(res); // SDL_FreeSurface(sf); // SDL_FreeSurface(screen); SDL_Quit(); printf("Memory used: %s\n",GetMemoryRepr(buf,MemoryInfo())); }
/*! This function computes the gravitational forces for all active * particles. If needed, a new tree is constructed, otherwise the * dynamically updated tree is used. Particles are only exported to other * processors when really needed, thereby allowing a good use of the * communication buffer. */ void gravity_tree(void) { int tim=20; // GX mod, timer to profile calls TimerBeg(29); TimerBeg(tim); long long ntot; int numnodes, nexportsum = 0; int i, j, iter = 0; int *numnodeslist, maxnumnodes, nexport, *numlist, *nrecv, *ndonelist; double tstart, tend, timetree = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance; double ewaldcount; double costtotal, ewaldtot, *costtreelist, *ewaldlist; double maxt, sumt, *timetreelist, *timecommlist; double fac, plb, plb_max, sumcomm; #ifndef NOGRAVITY int *noffset, *nbuffer, *nsend, *nsend_local; long long ntotleft; int ndone,maxfill, ngrp; int k, place; int level, sendTask, recvTask; double ax, ay, az; MPI_Status status; #endif ///////////////// GX ////////////////////// int totdone=0; #if CUDA_DEBUG_GX>0 int not_timestepped_gx=0; int exporthash_gx=0; int count_exported_gx=0; #endif ///////////////// GX ////////////////////// /* set new softening lengths */ if(All.ComovingIntegrationOn) set_softenings(); /* contruct tree if needed */ tstart = second(); if(TreeReconstructFlag) { if(ThisTask == 0) printf("Tree construction.\n"); force_treebuild(NumPart); TreeReconstructFlag = 0; if(ThisTask == 0) printf("Tree construction done.\n"); } tend = second(); All.CPU_TreeConstruction += timediff(tstart, tend); costtotal = ewaldcount = 0; /* Note: 'NumForceUpdate' has already been determined in find_next_sync_point_and_drift() */ numlist = malloc(NTask * sizeof(int) * NTask); MPI_Allgather(&NumForceUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD); for(i = 0, ntot = 0; i < NTask; i++) ntot += numlist[i]; free(numlist); #ifndef NOGRAVITY if(ThisTask == 0) printf("Begin tree force.\n"); #ifdef SELECTIVE_NO_GRAVITY for(i = 0; i < NumPart; i++) if(((1 << P[i].Type) & (SELECTIVE_NO_GRAVITY))) P[i].Ti_endstep = -P[i].Ti_endstep - 1; #endif noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */ nbuffer = malloc(sizeof(int) * NTask); nsend_local = malloc(sizeof(int) * NTask); nsend = malloc(sizeof(int) * NTask * NTask); ndonelist = malloc(sizeof(int) * NTask); i = 0; /* begin with this index */ ntotleft = ntot; /* particles left for all tasks together */ TimerEnd(tim++); ///////////////// GX ////////////////////// // if (s_gx.cudamode>0 && All.MaxPart>1400000) TimersSleep(10); // GPU card runs hot on large sims, this is around N_p=1404928 // if (s_gx.cudamode>0) TimersSleep(10); TimerBeg(tim); double starttime,subtime=-1,cpytime=-1; int Np=-1; int buffered=0; if(s_gx.cudamode>0) { FUN_MESSAGE(2,"gravity_tree()"); TimerBeg(50); cpytime=GetTime(); Np=InitializeProlog_gx(NumPart); TimerEnd(50); cpytime=GetTime()-cpytime; } ///////////////// GX ////////////////////// while(ntotleft > 0) { TimerBeg(31); starttime=GetTime(); iter++; for(j = 0; j < NTask; j++) nsend_local[j] = 0; /* do local particles and prepare export list */ tstart = second(); if (s_gx.cudamode==0 || Np<MIN_FORCE_PARTICLES_FOR_GPU_GX) { ASSERT_GX( !buffered ); ReLaunchChunkManager(); for(nexport = 0, ndone = 0; i < NumPart && nexport < All.BunchSizeForce - NTask; i++) { if(P[i].Ti_endstep == All.Ti_Current) { ndone++; for(j = 0; j < NTask; j++) Exportflag[j] = 0; TimerUpdateCounter(31,1); #ifndef PMGRID costtotal += force_treeevaluate(i, 0, &ewaldcount); #else costtotal += force_treeevaluate_shortrange(i, 0 ); #endif #if CUDA_DEBUG_GX>0 int flagexported_gx=0; #endif for(j = 0; j < NTask; j++) { if(Exportflag[j]) { ASSERT_GX( NTask>1 ); #if CUDA_DEBUG_GX>0 flagexported_gx=1; exporthash_gx += (i-j)*(j+ThisTask+1); #endif for(k = 0; k < 3; k++) GravDataGet[nexport].u.Pos[k] = P[i].Pos[k]; #ifdef UNEQUALSOFTENINGS GravDataGet[nexport].Type = P[i].Type; #ifdef ADAPTIVE_GRAVSOFT_FORGAS if(P[i].Type == 0) GravDataGet[nexport].Soft = SphP[i].Hsml; #endif #endif GravDataGet[nexport].w.OldAcc = P[i].OldAcc; GravDataIndexTable[nexport].Task = j; GravDataIndexTable[nexport].Index = i; GravDataIndexTable[nexport].SortIndex = nexport; nexport++; nexportsum++; nsend_local[j]++; } } #if CUDA_DEBUG_GX>0 if (flagexported_gx) ++count_exported_gx; #endif } #if CUDA_DEBUG_GX>0 else ++not_timestepped_gx; #endif } ManageChuncks(0); } else { ///////////////// GX ////////////////////// // cudamode>0 ///////////////// GX ////////////////////// #ifndef PMGRID // WARNING Attemping to run in tree-only mode, examine results carefully // ERROR cannot run in non PMGRID mode #endif if (iter==1){ const double tx=GetTime(); TimerBeg(51); ASSERT_GX(NumPart>=i); ASSERT_GX(!buffered); if (iter!=1) ERROR("cuda mode does not support iterations in gravtree calc, try to increasing the 'BufferSize' in the parameter file to surcomevent this problem"); const int Np2=InitializeCalculation_gx(NumPart,P,0); ASSERT_GX( Np2==Np ); if (Np2==0) WARNING("no particles participate in this timestep"); TimerEnd(51); cpytime += GetTime() - tx; subtime=GetTime(); TimerBeg(52); force_treeevaluate_shortrange_range_gx(0, Np); buffered=1; TimerUpdateCounter(31,NumPart-i); TimerEnd(52); subtime = GetTime() - subtime; } else { cpytime=-1; subtime=-1; ASSERT_GX(buffered); } for(nexport = 0, ndone = 0; i < NumPart && nexport < All.BunchSizeForce - NTask; i++) { if(P[i].Ti_endstep == All.Ti_Current) { ndone++; ASSERT_GX( i<NumPart ); ASSERT_GX( buffered ); const struct result_gx r=GetTarget(totdone++,i); // s_gx.result[target]; P[i].GravAccel[0] = r.acc_x; P[i].GravAccel[1] = r.acc_y; P[i].GravAccel[2] = r.acc_z; P[i].GravCost = r.ninteractions; costtotal += r.ninteractions; if (s_gx.NTask>1) { #if CUDA_DEBUG_GX>0 int flagexported_gx=0; #endif for(j = 0; j < NTask; j++) { if (GetExportflag_gx(&s_gx,i,NTask,j)){ ASSERT_GX( NTask>1 ); #if CUDA_DEBUG_GX>0 flagexported_gx=1; exporthash_gx += (i-j)*(j+ThisTask+1); #endif for(k = 0; k < 3; k++) GravDataGet[nexport].u.Pos[k] = P[i].Pos[k]; #ifdef UNEQUALSOFTENINGS GravDataGet[nexport].Type = P[i].Type; #ifdef ADAPTIVE_GRAVSOFT_FORGAS if(P[i].Type == 0) GravDataGet[nexport].Soft = SphP[i].Hsml; #endif #endif GravDataGet[nexport].w.OldAcc = P[i].OldAcc; GravDataIndexTable[nexport].Task = j; GravDataIndexTable[nexport].Index = i; GravDataIndexTable[nexport].SortIndex = nexport; nexport++; nexportsum++; nsend_local[j]++; } } #if CUDA_DEBUG_GX>0 if (flagexported_gx) ++count_exported_gx; #endif } } #if CUDA_DEBUG_GX>0 else ++not_timestepped_gx; #endif } AssertsOnhasGadgetDataBeenModified_gx(0,1,0); } TimerEnd(31); ///////////////// GX ////////////////////// if (iter==1 || !buffered){ PrintInfoFinalize(s_gx,ndone,Np,starttime,cpytime,subtime,0,iter,-1 #if CUDA_DEBUG_GX>0 ,not_timestepped_gx,count_exported_gx,nexport,nexportsum,exporthash_gx,costtotal #else ,0,0,0,0,0,0 #endif ); subtime=-1; } TimerBeg(39); ///////////////// GX ////////////////////// tend = second(); timetree += timediff(tstart, tend); qsort(GravDataIndexTable, nexport, sizeof(struct gravdata_index), grav_tree_compare_key); for(j = 0; j < nexport; j++) GravDataIn[j] = GravDataGet[GravDataIndexTable[j].SortIndex]; for(j = 1, noffset[0] = 0; j < NTask; j++) noffset[j] = noffset[j - 1] + nsend_local[j - 1]; tstart = second(); MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD); tend = second(); timeimbalance += timediff(tstart, tend); /* now do the particles that need to be exported */ for(level = 1; level < (1 << PTask); level++) { tstart = second(); for(j = 0; j < NTask; j++) nbuffer[j] = 0; for(ngrp = level; ngrp < (1 << PTask); ngrp++) { maxfill = 0; for(j = 0; j < NTask; j++) { if((j ^ ngrp) < NTask) if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]) maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]; } if(maxfill >= All.BunchSizeForce) break; sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0) { /* get the particles */ MPI_Sendrecv(&GravDataIn[noffset[recvTask]], nsend_local[recvTask] * sizeof(struct gravdata_in), MPI_BYTE, recvTask, TAG_GRAV_A, &GravDataGet[nbuffer[ThisTask]], nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in), MPI_BYTE, recvTask, TAG_GRAV_A, MPI_COMM_WORLD, &status); } } for(j = 0; j < NTask; j++) if((j ^ ngrp) < NTask) nbuffer[j] += nsend[(j ^ ngrp) * NTask + j]; } tend = second(); timecommsumm += timediff(tstart, tend); TimerBeg(30); TimerUpdateCounter(30,nbuffer[ThisTask]); tstart = second(); ///////////////// GX ////////////////////// // Do exported particles on the CPU/GPU { AssertsOnhasGadgetDataBeenModified_gx(1,1,0); #if CUDA_DEBUG_GX>1 MESSAGE("INFO: DistRMSGrav=%g",DistRMSGravdata(nbuffer[ThisTask],GravDataGet)); #endif starttime=GetTime(); const int N=nbuffer[ThisTask]; if (N>0){ if (s_gx.cudamode==0 || N<MIN_FORCE_PARTICLES_FOR_GPU_GX || Np<MIN_FORCE_PARTICLES_FOR_GPU_GX) { ReLaunchChunkManager(); for(j = 0; j<N ; j++) { #ifndef PMGRID costtotal += force_treeevaluate(j, 1, &ewaldcount); #else costtotal += force_treeevaluate_shortrange(j, 1); #endif } ManageChuncks(0); } else { ASSERT_GX( buffered ); cpytime=GetTime(); InitializeExportCalculation_gx(N,P[0].Type); ASSERT_GX( N==s_gx.Np ); subtime=GetTime(); force_treeevaluate_shortrange_range_gx(1, N); subtime=GetTime()-subtime; costtotal += FinalizeExportCalculation_gx(N); cpytime=GetTime()-cpytime-subtime; ASSERT_GX( N==s_gx.Np ); } PrintInfoFinalize(s_gx,0,N,starttime,cpytime,subtime,2,iter,level,0,0,nexport,0,0,0); subtime=-1; } else { ReLaunchChunkManager(); ManageChuncks(0); } } ///////////////// GX ////////////////////// if (nbuffer[ThisTask]>0) TimerUpdateCounter(30,-1); TimerEnd(30); tend = second(); timetree += timediff(tstart, tend); TimerBeg(33); tstart = second(); MPI_Barrier(MPI_COMM_WORLD); tend = second(); timeimbalance += timediff(tstart, tend); TimerEnd(33); /* get the result */ tstart = second(); for(j = 0; j < NTask; j++) nbuffer[j] = 0; for(ngrp = level; ngrp < (1 << PTask); ngrp++) { maxfill = 0; for(j = 0; j < NTask; j++) { if((j ^ ngrp) < NTask) if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]) maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]; } if(maxfill >= All.BunchSizeForce) break; sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0) { /* send the results */ MPI_Sendrecv(&GravDataResult[nbuffer[ThisTask]], nsend[recvTask * NTask + ThisTask] * sizeof(struct gravdata_in), MPI_BYTE, recvTask, TAG_GRAV_B, &GravDataOut[noffset[recvTask]], nsend_local[recvTask] * sizeof(struct gravdata_in), MPI_BYTE, recvTask, TAG_GRAV_B, MPI_COMM_WORLD, &status); /* add the result to the particles */ for(j = 0; j < nsend_local[recvTask]; j++) { place = GravDataIndexTable[noffset[recvTask] + j].Index; // comment out in order to disable export forces for debugging for(k = 0; k < 3; k++) P[place].GravAccel[k] += GravDataOut[j + noffset[recvTask]].u.Acc[k]; P[place].GravCost += GravDataOut[j + noffset[recvTask]].w.Ninteractions; } } } for(j = 0; j < NTask; j++) if((j ^ ngrp) < NTask) nbuffer[j] += nsend[(j ^ ngrp) * NTask + j]; } tend = second(); timecommsumm += timediff(tstart, tend); level = ngrp - 1; } MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD); for(j = 0; j < NTask; j++) ntotleft -= ndonelist[j]; TimerEnd(39); } TimerEnd(tim++); TimerBeg(tim); free(ndonelist); free(nsend); free(nsend_local); free(nbuffer); free(noffset); /* now add things for comoving integration */ #ifndef PERIODIC #ifndef PMGRID if(All.ComovingIntegrationOn) { fac = 0.5 * All.Hubble * All.Hubble * All.Omega0 / All.G; for(i = 0; i < NumPart; i++) if(P[i].Ti_endstep == All.Ti_Current) for(j = 0; j < 3; j++) P[i].GravAccel[j] += fac * P[i].Pos[j]; } #endif #endif for(i = 0; i < NumPart; i++) if(P[i].Ti_endstep == All.Ti_Current) { #ifdef PMGRID ax = P[i].GravAccel[0] + P[i].GravPM[0] / All.G; ay = P[i].GravAccel[1] + P[i].GravPM[1] / All.G; az = P[i].GravAccel[2] + P[i].GravPM[2] / All.G; #else ax = P[i].GravAccel[0]; ay = P[i].GravAccel[1]; az = P[i].GravAccel[2]; #endif P[i].OldAcc = sqrt(ax * ax + ay * ay + az * az); } if(All.TypeOfOpeningCriterion == 1) All.ErrTolTheta = 0; /* This will switch to the relative opening criterion for the following force computations */ /* muliply by G */ for(i = 0; i < NumPart; i++) if(P[i].Ti_endstep == All.Ti_Current) for(j = 0; j < 3; j++) P[i].GravAccel[j] *= All.G; /* Finally, the following factor allows a computation of a cosmological simulation with vacuum energy in physical coordinates */ #ifndef PERIODIC #ifndef PMGRID if(All.ComovingIntegrationOn == 0) { fac = All.OmegaLambda * All.Hubble * All.Hubble; for(i = 0; i < NumPart; i++) if(P[i].Ti_endstep == All.Ti_Current) for(j = 0; j < 3; j++) P[i].GravAccel[j] += fac * P[i].Pos[j]; } #endif #endif #ifdef SELECTIVE_NO_GRAVITY for(i = 0; i < NumPart; i++) if(P[i].Ti_endstep < 0) P[i].Ti_endstep = -P[i].Ti_endstep - 1; #endif if(ThisTask == 0) printf("tree is done.\n"); #else /* gravity is switched off */ for(i = 0; i < NumPart; i++) if(P[i].Ti_endstep == All.Ti_Current) for(j = 0; j < 3; j++) P[i].GravAccel[j] = 0; #endif /* Now the force computation is finished */ /* gather some diagnostic information */ timetreelist = malloc(sizeof(double) * NTask); timecommlist = malloc(sizeof(double) * NTask); costtreelist = malloc(sizeof(double) * NTask); numnodeslist = malloc(sizeof(int) * NTask); ewaldlist = malloc(sizeof(double) * NTask); nrecv = malloc(sizeof(int) * NTask); numnodes = Numnodestree; MPI_Gather(&costtotal, 1, MPI_DOUBLE, costtreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(&numnodes, 1, MPI_INT, numnodeslist, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&timetree, 1, MPI_DOUBLE, timetreelist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(&timecommsumm, 1, MPI_DOUBLE, timecommlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Gather(&NumPart, 1, MPI_INT, nrecv, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Gather(&ewaldcount, 1, MPI_DOUBLE, ewaldlist, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Reduce(&nexportsum, &nexport, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(ThisTask == 0) { All.TotNumOfForces += ntot; fprintf(FdTimings, "Step= %d t= %g dt= %g \n", All.NumCurrentTiStep, All.Time, All.TimeStep); fprintf(FdTimings, "Nf= %d%09d total-Nf= %d%09d ex-frac= %g iter= %d\n", (int) (ntot / 1000000000), (int) (ntot % 1000000000), (int) (All.TotNumOfForces / 1000000000), (int) (All.TotNumOfForces % 1000000000), nexport / ((double) ntot), iter); /* note: on Linux, the 8-byte integer could be printed with the format identifier "%qd", but doesn't work on AIX */ fac = NTask / ((double) All.TotNumPart); for(i = 0, maxt = timetreelist[0], sumt = 0, plb_max = 0, maxnumnodes = 0, costtotal = 0, sumcomm = 0, ewaldtot = 0; i < NTask; i++) { costtotal += costtreelist[i]; sumcomm += timecommlist[i]; if(maxt < timetreelist[i]) maxt = timetreelist[i]; sumt += timetreelist[i]; plb = nrecv[i] * fac; if(plb > plb_max) plb_max = plb; if(numnodeslist[i] > maxnumnodes) maxnumnodes = numnodeslist[i]; ewaldtot += ewaldlist[i]; } fprintf(FdTimings, "work-load balance: %g max=%g avg=%g PE0=%g\n", maxt / (sumt / NTask), maxt, sumt / NTask, timetreelist[0]); fprintf(FdTimings, "particle-load balance: %g\n", plb_max); fprintf(FdTimings, "max. nodes: %d, filled: %g\n", maxnumnodes, maxnumnodes / (All.TreeAllocFactor * All.MaxPart)); fprintf(FdTimings, "part/sec=%g | %g ia/part=%g (%g)\n", ntot / (sumt + 1.0e-20), ntot / (maxt * NTask), ((double) (costtotal)) / ntot, ((double) ewaldtot) / ntot); fprintf(FdTimings, "\n"); fflush(FdTimings); All.CPU_TreeWalk += sumt / NTask; All.CPU_Imbalance += sumimbalance / NTask; All.CPU_CommSum += sumcomm / NTask; } free(nrecv); free(ewaldlist); free(numnodeslist); free(costtreelist); free(timecommlist); free(timetreelist); ASSERT_GX( tim==22 ); TimerEnd(tim++); TimerEnd(29); //MESSAGE("%6.2f, %6.2f, %6.2f, %6.2f, %6.2f - %5.1f, %5.1f, %5.1f, %5.1f %c force timers d 29,31,30,33,net",TimerGet(29),TimerGet(31),TimerGet(30),TimerGet(33),TimerGet(29)-TimerGet(31)-TimerGet(30),100.0*TimerGet(31)/TimerGet(29),100.0*TimerGet(30)/TimerGet(29),100.0*TimerGet(33)/TimerGet(29),100.0*(TimerGet(29)-TimerGet(31)-TimerGet(30))/TimerGet(29),'%'); //MESSAGE("%6.2f, %6.2f, %6.2f, %6.2f, %6.2f - %5.1f, %5.1f, %5.1f, %5.1f %c force timers a 29,31,30,33,net",TimerGetAccumulated(29),TimerGetAccumulated(31),TimerGetAccumulated(30),TimerGetAccumulated(33),TimerGetAccumulated(29)-TimerGetAccumulated(31)-TimerGetAccumulated(30),100.0*TimerGetAccumulated(31)/TimerGetAccumulated(29),100.0*TimerGetAccumulated(30)/TimerGetAccumulated(29),100.0*TimerGetAccumulated(33)/TimerGetAccumulated(29),100.0*(TimerGetAccumulated(29)-TimerGetAccumulated(31)-TimerGetAccumulated(30))/TimerGetAccumulated(29),'%'); }