/*! This function is the driver routine for the calculation of hydrodynamical * force and rate of change of entropy due to shock heating for all active * particles . */ void hydro_force(void) { long long ntot, ntotleft; int i, j, k, n, ngrp, maxfill, source, ndone; int *nbuffer, *noffset, *nsend_local, *nsend, *numlist, *ndonelist; int level, sendTask, recvTask, nexport, place; double soundspeed_i; double tstart, tend, sumt, sumcomm; double timecomp = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance; #ifndef NOMPI MPI_Status status; #endif #ifdef PERIODIC boxSize = All.BoxSize; boxHalf = 0.5 * All.BoxSize; #ifdef LONG_X boxHalf_X = boxHalf * LONG_X; boxSize_X = boxSize * LONG_X; #endif #ifdef LONG_Y boxHalf_Y = boxHalf * LONG_Y; boxSize_Y = boxSize * LONG_Y; #endif #ifdef LONG_Z boxHalf_Z = boxHalf * LONG_Z; boxSize_Z = boxSize * LONG_Z; #endif #endif if(All.ComovingIntegrationOn) { /* Factors for comoving integration of hydro */ hubble_a = All.Omega0 / (All.Time * All.Time * All.Time) + (1 - All.Omega0 - All.OmegaLambda) / (All.Time * All.Time) + All.OmegaLambda; hubble_a = All.Hubble * sqrt(hubble_a); hubble_a2 = All.Time * All.Time * hubble_a; fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time; fac_egy = pow(All.Time, 3 * (GAMMA - 1)); fac_vsic_fix = hubble_a * pow(All.Time, 3 * GAMMA_MINUS1); a3inv = 1 / (All.Time * All.Time * All.Time); atime = All.Time; } else hubble_a = hubble_a2 = atime = fac_mu = fac_vsic_fix = a3inv = fac_egy = 1.0; /* `NumSphUpdate' gives the number of particles on this processor that want a force update */ for(n = 0, NumSphUpdate = 0; n < N_gas; n++) { if(P[n].Ti_endstep == All.Ti_Current) NumSphUpdate++; } numlist = malloc(NTask * sizeof(int) * NTask); #ifndef NOMPI MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, GADGET_WORLD); #else numlist[0] = NumSphUpdate; #endif for(i = 0, ntot = 0; i < NTask; i++) ntot += numlist[i]; free(numlist); noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */ nbuffer = malloc(sizeof(int) * NTask); nsend_local = malloc(sizeof(int) * NTask); nsend = malloc(sizeof(int) * NTask * NTask); ndonelist = malloc(sizeof(int) * NTask); i = 0; /* first particle for this task */ ntotleft = ntot; /* particles left for all tasks together */ while(ntotleft > 0) { for(j = 0; j < NTask; j++) nsend_local[j] = 0; /* do local particles and prepare export list */ tstart = second(); for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeHydro - NTask; i++) if(P[i].Ti_endstep == All.Ti_Current) { ndone++; for(j = 0; j < NTask; j++) Exportflag[j] = 0; hydro_evaluate(i, 0); for(j = 0; j < NTask; j++) { if(Exportflag[j]) { for(k = 0; k < 3; k++) { HydroDataIn[nexport].Pos[k] = P[i].Pos[k]; HydroDataIn[nexport].Vel[k] = SphP[i].VelPred[k]; } HydroDataIn[nexport].Hsml = SphP[i].Hsml; HydroDataIn[nexport].Mass = P[i].Mass; HydroDataIn[nexport].DhsmlDensityFactor = SphP[i].DhsmlDensityFactor; HydroDataIn[nexport].Density = SphP[i].Density; HydroDataIn[nexport].Pressure = SphP[i].Pressure; HydroDataIn[nexport].Timestep = P[i].Ti_endstep - P[i].Ti_begstep; /* calculation of F1 */ soundspeed_i = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density); HydroDataIn[nexport].F1 = fabs(SphP[i].DivVel) / (fabs(SphP[i].DivVel) + SphP[i].CurlVel + 0.0001 * soundspeed_i / SphP[i].Hsml / fac_mu); HydroDataIn[nexport].Index = i; HydroDataIn[nexport].Task = j; #ifdef MORRIS97VISC HydroDataIn[nexport].Alpha = SphP[i].Alpha; #endif nexport++; nsend_local[j]++; } } } tend = second(); timecomp += timediff(tstart, tend); qsort(HydroDataIn, nexport, sizeof(struct hydrodata_in), hydro_compare_key); for(j = 1, noffset[0] = 0; j < NTask; j++) noffset[j] = noffset[j - 1] + nsend_local[j - 1]; tstart = second(); #ifndef NOMPI MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, GADGET_WORLD); #else nsend[0] = nsend_local[0]; #endif tend = second(); timeimbalance += timediff(tstart, tend); /* now do the particles that need to be exported */ for(level = 1; level < (1 << PTask); level++) { tstart = second(); for(j = 0; j < NTask; j++) nbuffer[j] = 0; for(ngrp = level; ngrp < (1 << PTask); ngrp++) { maxfill = 0; for(j = 0; j < NTask; j++) { if((j ^ ngrp) < NTask) if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]) maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]; } if(maxfill >= All.BunchSizeHydro) break; sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0) { #ifndef NOMPI /* get the particles */ MPI_Sendrecv(&HydroDataIn[noffset[recvTask]], nsend_local[recvTask] * sizeof(struct hydrodata_in), MPI_BYTE, recvTask, TAG_HYDRO_A, &HydroDataGet[nbuffer[ThisTask]], nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_in), MPI_BYTE, recvTask, TAG_HYDRO_A, GADGET_WORLD, &status); #else fprintf(stderr, "NOT SUPPORTED"); exit(1); #endif } } for(j = 0; j < NTask; j++) if((j ^ ngrp) < NTask) nbuffer[j] += nsend[(j ^ ngrp) * NTask + j]; } tend = second(); timecommsumm += timediff(tstart, tend); /* now do the imported particles */ tstart = second(); for(j = 0; j < nbuffer[ThisTask]; j++) hydro_evaluate(j, 1); tend = second(); timecomp += timediff(tstart, tend); /* do a block to measure imbalance */ tstart = second(); #ifndef NOMPI MPI_Barrier(GADGET_WORLD); #endif tend = second(); timeimbalance += timediff(tstart, tend); /* get the result */ tstart = second(); for(j = 0; j < NTask; j++) nbuffer[j] = 0; for(ngrp = level; ngrp < (1 << PTask); ngrp++) { maxfill = 0; for(j = 0; j < NTask; j++) { if((j ^ ngrp) < NTask) if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]) maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]; } if(maxfill >= All.BunchSizeHydro) break; sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0) { #ifndef NOMPI /* send the results */ MPI_Sendrecv(&HydroDataResult[nbuffer[ThisTask]], nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_out), MPI_BYTE, recvTask, TAG_HYDRO_B, &HydroDataPartialResult[noffset[recvTask]], nsend_local[recvTask] * sizeof(struct hydrodata_out), MPI_BYTE, recvTask, TAG_HYDRO_B, GADGET_WORLD, &status); #else fprintf(stderr, "NOT SUPPORTED"); exit(1); #endif /* add the result to the particles */ for(j = 0; j < nsend_local[recvTask]; j++) { source = j + noffset[recvTask]; place = HydroDataIn[source].Index; for(k = 0; k < 3; k++) SphP[place].HydroAccel[k] += HydroDataPartialResult[source].Acc[k]; SphP[place].DtEntropy += HydroDataPartialResult[source].DtEntropy; if(SphP[place].MaxSignalVel < HydroDataPartialResult[source].MaxSignalVel) SphP[place].MaxSignalVel = HydroDataPartialResult[source].MaxSignalVel; } } } for(j = 0; j < NTask; j++) if((j ^ ngrp) < NTask) nbuffer[j] += nsend[(j ^ ngrp) * NTask + j]; } tend = second(); timecommsumm += timediff(tstart, tend); level = ngrp - 1; } #ifndef NOMPI MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, GADGET_WORLD); #else ndonelist[0] = ndone; #endif for(j = 0; j < NTask; j++) ntotleft -= ndonelist[j]; } free(ndonelist); free(nsend); free(nsend_local); free(nbuffer); free(noffset); /* do final operations on results */ tstart = second(); for(i = 0; i < N_gas; i++) if(P[i].Ti_endstep == All.Ti_Current) { SphP[i].DtEntropy *= GAMMA_MINUS1 / (hubble_a2 * pow(SphP[i].Density, GAMMA_MINUS1)); #ifdef SPH_BND_PARTICLES if(P[i].ID == 0) { SphP[i].DtEntropy = 0; for(k = 0; k < 3; k++) SphP[i].HydroAccel[k] = 0; } #endif } tend = second(); timecomp += timediff(tstart, tend); /* collect some timing information */ #ifndef NOMPI MPI_Reduce(&timecomp, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, GADGET_WORLD); MPI_Reduce(&timecommsumm, &sumcomm, 1, MPI_DOUBLE, MPI_SUM, 0, GADGET_WORLD); MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, GADGET_WORLD); #else sumt = timecomp; sumcomm = timecommsumm; sumimbalance = timeimbalance; #endif if(ThisTask == 0) { All.CPU_HydCompWalk += sumt / NTask; All.CPU_HydCommSumm += sumcomm / NTask; All.CPU_HydImbalance += sumimbalance / NTask; } }
/*! This function is the driver routine for the calculation of hydrodynamical * force and rate of change of entropy due to shock heating for all active * particles . */ void hydro_force(void) { TimerBeg(90); long long ntot, ntotleft; int i, j, k, n, ngrp, maxfill, source, ndone; int *nbuffer, *noffset, *nsend_local, *nsend, *numlist, *ndonelist; int level, sendTask, recvTask, nexport, place; double soundspeed_i; double tstart, tend, sumt, sumcomm; double timecomp = 0, timecommsumm = 0, timeimbalance = 0, sumimbalance; MPI_Status status; #ifdef PERIODIC boxSize = All.BoxSize; boxHalf = 0.5 * All.BoxSize; #ifdef LONG_X boxHalf_X = boxHalf * LONG_X; boxSize_X = boxSize * LONG_X; #endif #ifdef LONG_Y boxHalf_Y = boxHalf * LONG_Y; boxSize_Y = boxSize * LONG_Y; #endif #ifdef LONG_Z boxHalf_Z = boxHalf * LONG_Z; boxSize_Z = boxSize * LONG_Z; #endif #endif if(All.ComovingIntegrationOn) { /* Factors for comoving integration of hydro */ hubble_a = All.Omega0 / (All.Time * All.Time * All.Time) + (1 - All.Omega0 - All.OmegaLambda) / (All.Time * All.Time) + All.OmegaLambda; hubble_a = All.Hubble * sqrt(hubble_a); hubble_a2 = All.Time * All.Time * hubble_a; fac_mu = pow(All.Time, 3 * (GAMMA - 1) / 2) / All.Time; fac_egy = pow(All.Time, 3 * (GAMMA - 1)); fac_vsic_fix = hubble_a * pow(All.Time, 3 * GAMMA_MINUS1); a3inv = 1 / (All.Time * All.Time * All.Time); atime = All.Time; } else hubble_a = hubble_a2 = atime = fac_mu = fac_vsic_fix = a3inv = fac_egy = 1.0; /* `NumSphUpdate' gives the number of particles on this processor that want a force update */ for(n = 0, NumSphUpdate = 0; n < N_gas; n++) { if(P[n].Ti_endstep == All.Ti_Current) NumSphUpdate++; } numlist = malloc(NTask * sizeof(int) * NTask); MPI_Allgather(&NumSphUpdate, 1, MPI_INT, numlist, 1, MPI_INT, MPI_COMM_WORLD); for(i = 0, ntot = 0; i < NTask; i++) ntot += numlist[i]; free(numlist); noffset = malloc(sizeof(int) * NTask); /* offsets of bunches in common list */ nbuffer = malloc(sizeof(int) * NTask); nsend_local = malloc(sizeof(int) * NTask); nsend = malloc(sizeof(int) * NTask * NTask); ndonelist = malloc(sizeof(int) * NTask); i = 0; /* first particle for this task */ ntotleft = ntot; /* particles left for all tasks together */ ///////////////// GX ////////////////////// FUN_MESSAGE(2,"hydro_force()"); #ifdef CUDA_GX_NO_SPH_SUPPORT int oldcudamode=s_gx.cudamode; s_gx.cudamode=0; #endif double starttime,subtime=-1,cpytime=-1; const int Np=PrintInfoInitialize(N_gas,s_gx.cudamode,1); int iter=0; ///////////////// GX ////////////////////// while(ntotleft > 0) { ///////////////// GX ////////////////////// if (s_gx.cudamode!=0 && i!=0) ERROR("cuda mode does not support iterations in hydro calc, try to increasing the 'BufferSize' in the parameter file to surcomevent this problem"); iter++; ///////////////// GX ////////////////////// for(j = 0; j < NTask; j++) nsend_local[j] = 0; /* do local particles and prepare export list */ TimerBeg(91); TimerBeg(93); starttime=GetTime(); tstart = second(); if (s_gx.cudamode==0 || (Np!=N_gas || Np<MIN_SPH_PARTICLES_FOR_GPU_GX)) { //if (s_gx.cudamode==0 || Np<MIN_SPH_PARTICLES_FOR_GPU_GX) { #ifdef CUDA_GX_CHUNCK_MANAGER_SPH ReLaunchChunkManager(); #endif for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeHydro - NTask; i++) if(P[i].Ti_endstep == All.Ti_Current) { ndone++; for(j = 0; j < NTask; j++) Exportflag[j] = 0; hydro_evaluate(i, 0); TimerUpdateCounter(91,1); for(j = 0; j < NTask; j++) { if(Exportflag[j]) { for(k = 0; k < 3; k++) { HydroDataIn[nexport].Pos[k] = P[i].Pos[k]; HydroDataIn[nexport].Vel[k] = SphP[i].VelPred[k]; } HydroDataIn[nexport].Hsml = SphP[i].Hsml; HydroDataIn[nexport].Mass = P[i].Mass; HydroDataIn[nexport].DhsmlDensityFactor = SphP[i].DhsmlDensityFactor; HydroDataIn[nexport].Density = SphP[i].Density; HydroDataIn[nexport].Pressure = SphP[i].Pressure; HydroDataIn[nexport].Timestep = P[i].Ti_endstep - P[i].Ti_begstep; /* calculation of F1 */ soundspeed_i = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density); HydroDataIn[nexport].F1 = fabs(SphP[i].DivVel) / (fabs(SphP[i].DivVel) + SphP[i].CurlVel + 0.0001 * soundspeed_i / SphP[i].Hsml / fac_mu); HydroDataIn[nexport].Index = i; HydroDataIn[nexport].Task = j; nexport++; nsend_local[j]++; } } } #ifdef CUDA_GX_CHUNCK_MANAGER_SPH ManageChuncks(1); #endif } else { ///////////////// GX ////////////////////// cpytime=GetTime(); ASSERT_GX(s_gx.cudamode>0); if (i!=0) ERROR("cuda mode does not support iterations in hydro calc, try to increasing the 'BufferSize' in the parameter file to surcomevent this problem"); const int Np2=InitializeHydraCalculation_gx(NumPart,P,SphP,N_gas,hubble_a2, fac_mu, fac_vsic_fix #ifdef PERIODIC ,boxSize,boxHalf #endif ); if (Np2==0) WARNING("no sph particles participate in this timestep"); ASSERT_GX( Np2==Np ); cpytime = GetTime()-cpytime; subtime=GetTime(); hydro_evaluate_range_cuda_gx(0,N_gas,s_gx,p_gx,h_gx); subtime=GetTime()-subtime; for(nexport = 0, ndone = 0; i < N_gas && nexport < All.BunchSizeHydro - NTask; i++) if(P[i].Ti_endstep == All.Ti_Current) { ndone++; for(j = 0; j < NTask; j++) Exportflag[j] = 0; ASSERT_GX( P[i].Type==0 ); //hydro_evaluate_cuda_gx(i, 0,&s_gx,&p_gx); TimerUpdateCounter(91,1); ASSERT_GX(i<s_gx.sz_result_hydro); const struct result_hydro_gx r=s_gx.result_hydro[i]; ASSERT_GX( isResultHydraDataOK(r,__FILE__,__LINE__) ); for(k = 0; k < 3; k++) SphP[i].HydroAccel[k] = r.Acc[k]; SphP[i].DtEntropy = r.DtEntropy; SphP[i].MaxSignalVel = r.MaxSignalVel; if (s_gx.NTask>1){ for(j = 0; j < NTask; j++) { const char export_this=GetExportflag_gx(&s_gx,i,NTask,j); if(export_this) { for(k = 0; k < 3; k++) { HydroDataIn[nexport].Pos[k] = P[i].Pos[k]; HydroDataIn[nexport].Vel[k] = SphP[i].VelPred[k]; } HydroDataIn[nexport].Hsml = SphP[i].Hsml; HydroDataIn[nexport].Mass = P[i].Mass; HydroDataIn[nexport].DhsmlDensityFactor = SphP[i].DhsmlDensityFactor; HydroDataIn[nexport].Density = SphP[i].Density; HydroDataIn[nexport].Pressure = SphP[i].Pressure; HydroDataIn[nexport].Timestep = P[i].Ti_endstep - P[i].Ti_begstep; // calculation of F1 soundspeed_i = sqrt(GAMMA * SphP[i].Pressure / SphP[i].Density); HydroDataIn[nexport].F1 = fabs(SphP[i].DivVel) / (fabs(SphP[i].DivVel) + SphP[i].CurlVel + 0.0001 * soundspeed_i / SphP[i].Hsml / fac_mu); HydroDataIn[nexport].Index = i; HydroDataIn[nexport].Task = j; nexport++; nsend_local[j]++; } } } } ///////////////// GX ////////////////////// } TimerEnd(93); tend = second(); timecomp += timediff(tstart, tend); ///////////////// GX ////////////////////// PrintInfoFinalize(s_gx,ndone,Np,starttime,cpytime,subtime,1,iter,-1,0,0,nexport,0,0,0); subtime=-1; ///////////////// GX ////////////////////// qsort(HydroDataIn, nexport, sizeof(struct hydrodata_in), hydro_compare_key); for(j = 1, noffset[0] = 0; j < NTask; j++) noffset[j] = noffset[j - 1] + nsend_local[j - 1]; tstart = second(); MPI_Allgather(nsend_local, NTask, MPI_INT, nsend, NTask, MPI_INT, MPI_COMM_WORLD); tend = second(); timeimbalance += timediff(tstart, tend); TimerEnd(91); TimerBeg(92); /* now do the particles that need to be exported */ for(level = 1; level < (1 << PTask); level++) { tstart = second(); for(j = 0; j < NTask; j++) nbuffer[j] = 0; for(ngrp = level; ngrp < (1 << PTask); ngrp++) { maxfill = 0; for(j = 0; j < NTask; j++) { if((j ^ ngrp) < NTask) if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]) maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]; } if(maxfill >= All.BunchSizeHydro) break; sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0) { /* get the particles */ MPI_Sendrecv(&HydroDataIn[noffset[recvTask]], nsend_local[recvTask] * sizeof(struct hydrodata_in), MPI_BYTE, recvTask, TAG_HYDRO_A, &HydroDataGet[nbuffer[ThisTask]], nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_in), MPI_BYTE, recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, &status); } } for(j = 0; j < NTask; j++) if((j ^ ngrp) < NTask) nbuffer[j] += nsend[(j ^ ngrp) * NTask + j]; } tend = second(); timecommsumm += timediff(tstart, tend); /* now do the imported particles */ tstart = second(); ///////////////// GX ////////////////////// // Do exported particles on the CPU/GPU TimerBeg(94); { AssertsOnhasGadgetDataBeenModified_gx(1,1,0); #if CUDA_DEBUG_GX>1 MESSAGE("INFO: DistRMSGrav=%g",DistRMSGravdata(nbuffer[ThisTask],GravDataGet)); #endif starttime=GetTime(); const int N=nbuffer[ThisTask]; if (N>0){ // YYY NOTE: disable GPU exportmode for now!!! if (1 || s_gx.cudamode==0 || N<MIN_SPH_PARTICLES_FOR_GPU_GX || Np<MIN_SPH_PARTICLES_FOR_GPU_GX) { for(j = 0; j < nbuffer[ThisTask]; j++) hydro_evaluate(j, 1); } else { cpytime=GetTime(); InitializeHydraExportCalculation_gx(N,HydroDataGet); subtime=GetTime(); hydro_evaluate_range_cuda_gx(1,N,s_gx,p_gx,h_gx); subtime=GetTime()-subtime; FinalizeHydraExportCalculation_gx(N); cpytime=GetTime()-cpytime-subtime; } PrintInfoFinalize(s_gx,0,N,starttime,cpytime,subtime,3,iter,level,0,0,nexport,0,0,0); subtime=-1; } } TimerEnd(94); ///////////////// GX ////////////////////// tend = second(); timecomp += timediff(tstart, tend); /* do a block to measure imbalance */ TimerBeg(95); tstart = second(); MPI_Barrier(MPI_COMM_WORLD); tend = second(); timeimbalance += timediff(tstart, tend); TimerEnd(95); /* get the result */ tstart = second(); for(j = 0; j < NTask; j++) nbuffer[j] = 0; for(ngrp = level; ngrp < (1 << PTask); ngrp++) { maxfill = 0; for(j = 0; j < NTask; j++) { if((j ^ ngrp) < NTask) if(maxfill < nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]) maxfill = nbuffer[j] + nsend[(j ^ ngrp) * NTask + j]; } if(maxfill >= All.BunchSizeHydro) break; sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(nsend[ThisTask * NTask + recvTask] > 0 || nsend[recvTask * NTask + ThisTask] > 0) { /* send the results */ MPI_Sendrecv(&HydroDataResult[nbuffer[ThisTask]], nsend[recvTask * NTask + ThisTask] * sizeof(struct hydrodata_out), MPI_BYTE, recvTask, TAG_HYDRO_B, &HydroDataPartialResult[noffset[recvTask]], nsend_local[recvTask] * sizeof(struct hydrodata_out), MPI_BYTE, recvTask, TAG_HYDRO_B, MPI_COMM_WORLD, &status); /* add the result to the particles */ for(j = 0; j < nsend_local[recvTask]; j++) { source = j + noffset[recvTask]; place = HydroDataIn[source].Index; for(k = 0; k < 3; k++) SphP[place].HydroAccel[k] += HydroDataPartialResult[source].Acc[k]; SphP[place].DtEntropy += HydroDataPartialResult[source].DtEntropy; if(SphP[place].MaxSignalVel < HydroDataPartialResult[source].MaxSignalVel) SphP[place].MaxSignalVel = HydroDataPartialResult[source].MaxSignalVel; } } } for(j = 0; j < NTask; j++) if((j ^ ngrp) < NTask) nbuffer[j] += nsend[(j ^ ngrp) * NTask + j]; } tend = second(); timecommsumm += timediff(tstart, tend); level = ngrp - 1; } TimerEnd(92); MPI_Allgather(&ndone, 1, MPI_INT, ndonelist, 1, MPI_INT, MPI_COMM_WORLD); for(j = 0; j < NTask; j++) ntotleft -= ndonelist[j]; } free(ndonelist); free(nsend); free(nsend_local); free(nbuffer); free(noffset); /* do final operations on results */ tstart = second(); for(i = 0; i < N_gas; i++) if(P[i].Ti_endstep == All.Ti_Current) { SphP[i].DtEntropy *= GAMMA_MINUS1 / (hubble_a2 * pow(SphP[i].Density, GAMMA_MINUS1)); #ifdef SPH_BND_PARTICLES if(P[i].ID == 0) { SphP[i].DtEntropy = 0; for(k = 0; k < 3; k++) SphP[i].HydroAccel[k] = 0; } #endif } tend = second(); timecomp += timediff(tstart, tend); /* collect some timing information */ MPI_Reduce(&timecomp, &sumt, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&timecommsumm, &sumcomm, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&timeimbalance, &sumimbalance, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if(ThisTask == 0) { All.CPU_HydCompWalk += sumt / NTask; All.CPU_HydCommSumm += sumcomm / NTask; All.CPU_HydImbalance += sumimbalance / NTask; } TimerEnd(90); #ifdef RESULT_FILE_DUMP_GX static FILE* resultfile=NULL; if (resultfile==NULL) { char filename[256]; sprintf(filename,"resultfile.%d.%d.txt",s_gx.cudamode,ThisTask); resultfile=fopen(filename,"w"); } else{ MESSAGE("Dumping result..."); static int MM=0; int j; fprintf(resultfile,"Dumping result...N_gas=%d\n",N_gas); for(j=0;j<N_gas;++j) if(P[j].Ti_endstep == All.Ti_Current){ static int NN=0; const int target=j; fprintf(resultfile,"m=0, NN=%6d, t=%6d, e=%.4g, v=%.4g, acc={%.2g,%.2g,%.2g}\n",NN++,target,SphP[target].DtEntropy,SphP[target].MaxSignalVel,SphP[target].HydroAccel[0],SphP[target].HydroAccel[1],SphP[target].HydroAccel[2]); fflush(resultfile); } if (++MM>2) exit(-42); } #endif #ifdef CUDA_GX_NO_SPH_SUPPORT s_gx.cudamode=oldcudamode; #endif //MESSAGE("%6.2f, %6.2f, %6.2f, %6.2f, %6.2f - %5.1f, %5.1f, %5.1f, %5.1f %c sph timers d 90,93,94,95,net",TimerGet(90),TimerGet(93),TimerGet(94),TimerGet(95),TimerGet(90)-TimerGet(93)-TimerGet(94),100.0*TimerGet(93)/TimerGet(90),100.0*TimerGet(94)/TimerGet(90),100.0*TimerGet(95)/TimerGet(90),100.0*(TimerGet(90)-TimerGet(93)-TimerGet(94))/TimerGet(90),'%'); //MESSAGE("%6.2f, %6.2f, %6.2f, %6.2f, %6.2f - %5.1f, %5.1f, %5.1f, %5.1f %c sph timers a 90,93,94,95,net",TimerGetAccumulated(90),TimerGetAccumulated(93),TimerGetAccumulated(94),TimerGetAccumulated(95),TimerGetAccumulated(90)-TimerGetAccumulated(93)-TimerGetAccumulated(94),100.0*TimerGetAccumulated(93)/TimerGetAccumulated(90),100.0*TimerGetAccumulated(94)/TimerGetAccumulated(90),100.0*TimerGetAccumulated(95)/TimerGetAccumulated(90),100.0*(TimerGetAccumulated(90)-TimerGetAccumulated(93)-TimerGetAccumulated(94))/TimerGetAccumulated(90),'%'); }