/*! This function computes the gravitational potential for ALL the particles. * First, the (short-range) tree potential is computed, and then, if needed, * the long range PM potential is added. */ void compute_potential(void) { int i; #ifndef NOGRAVITY int j, k, ret, sendTask, recvTask; int ndone, ndone_flag, dummy; int ngrp, place, nexport, nimport; double fac; MPI_Status status; double r2; if(All.ComovingIntegrationOn) set_softenings(); if(ThisTask == 0) { printf("Start computation of potential for all particles...\n"); fflush(stdout); } CPU_Step[CPU_MISC] += measure_time(); if(TreeReconstructFlag) { if(ThisTask == 0) printf("Tree construction.\n"); CPU_Step[CPU_MISC] += measure_time(); #if defined(SFR) || defined(BLACK_HOLES) rearrange_particle_sequence(); #endif force_treebuild(NumPart, NULL); CPU_Step[CPU_TREEBUILD] += measure_time(); TreeReconstructFlag = 0; if(ThisTask == 0) printf("Tree construction done.\n"); } /* allocate buffers to arrange communication */ All.BunchSize = (int) ((All.BufferSize * 1024 * 1024) / (sizeof(struct data_index) + sizeof(struct data_nodelist) + sizeof(struct gravdata_in) + sizeof(struct potdata_out) + sizemax(sizeof(struct gravdata_in), sizeof(struct potdata_out)))); DataIndexTable = (struct data_index *) mymalloc(All.BunchSize * sizeof(struct data_index)); DataNodeList = (struct data_nodelist *) mymalloc(All.BunchSize * sizeof(struct data_nodelist)); for(i = 0; i < NumPart; i++) if(P[i].Ti_current != All.Ti_Current) drift_particle(i, All.Ti_Current); i = 0; /* beginn with this index */ do { for(j = 0; j < NTask; j++) { Send_count[j] = 0; Exportflag[j] = -1; } /* do local particles and prepare export list */ for(nexport = 0; i < NumPart; i++) { #ifndef PMGRID ret = force_treeevaluate_potential(i, 0, &nexport, Send_count); #else ret = force_treeevaluate_potential_shortrange(i, 0, &nexport, Send_count); #endif if(ret < 0) break; /* export buffer has filled up */ } #ifdef MYSORT mysort_dataindex(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #else qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #endif MPI_Allgather(Send_count, NTask, MPI_INT, Sendcount_matrix, NTask, MPI_INT, MPI_COMM_WORLD); for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) { Recv_count[j] = Sendcount_matrix[j * NTask + ThisTask]; nimport += Recv_count[j]; if(j > 0) { Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; } } GravDataGet = (struct gravdata_in *) mymalloc(nimport * sizeof(struct gravdata_in)); GravDataIn = (struct gravdata_in *) mymalloc(nexport * sizeof(struct gravdata_in)); /* prepare particle data for export */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; for(k = 0; k < 3; k++) GravDataIn[j].Pos[k] = P[place].Pos[k]; #ifdef UNEQUALSOFTENINGS GravDataIn[j].Type = P[place].Type; #ifdef ADAPTIVE_GRAVSOFT_FORGAS if(P[place].Type == 0) GravDataIn[j].Soft = SphP[place].Hsml; #endif #endif GravDataIn[j].OldAcc = P[place].OldAcc; for(k = 0; k < NODELISTLENGTH; k++) GravDataIn[j].NodeList[k] = DataNodeList[DataIndexTable[j].IndexGet].NodeList[k]; } /* exchange particle data */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the particles */ MPI_Sendrecv(&GravDataIn[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct gravdata_in), MPI_BYTE, recvTask, TAG_POTENTIAL_A, &GravDataGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct gravdata_in), MPI_BYTE, recvTask, TAG_POTENTIAL_A, MPI_COMM_WORLD, &status); } } } myfree(GravDataIn); PotDataResult = (struct potdata_out *) mymalloc(nimport * sizeof(struct potdata_out)); PotDataOut = (struct potdata_out *) mymalloc(nexport * sizeof(struct potdata_out)); /* now do the particles that were sent to us */ for(j = 0; j < nimport; j++) { #ifndef PMGRID force_treeevaluate_potential(j, 1, &dummy, &dummy); #else force_treeevaluate_potential_shortrange(j, 1, &dummy, &dummy); #endif } if(i >= NumPart) ndone_flag = 1; else ndone_flag = 0; MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* get the result */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* send the results */ MPI_Sendrecv(&PotDataResult[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct potdata_out), MPI_BYTE, recvTask, TAG_POTENTIAL_B, &PotDataOut[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct potdata_out), MPI_BYTE, recvTask, TAG_POTENTIAL_B, MPI_COMM_WORLD, &status); } } } /* add the results to the local particles */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; P[place].p.dPotential += PotDataOut[j].Potential; } myfree(PotDataOut); myfree(PotDataResult); myfree(GravDataGet); } while(ndone < NTask); myfree(DataNodeList); myfree(DataIndexTable); /* add correction to exclude self-potential */ for(i = 0; i < NumPart; i++) { #ifdef FLTROUNDOFFREDUCTION P[i].p.Potential = FLT(P[i].p.dPotential); #endif /* remove self-potential */ P[i].p.Potential += P[i].Mass / All.SofteningTable[P[i].Type]; if(All.ComovingIntegrationOn) if(All.PeriodicBoundariesOn) P[i].p.Potential -= 2.8372975 * pow(P[i].Mass, 2.0 / 3) * pow(All.Omega0 * 3 * All.Hubble * All.Hubble / (8 * M_PI * All.G), 1.0 / 3); } /* multiply with the gravitational constant */ for(i = 0; i < NumPart; i++) P[i].p.Potential *= All.G; #ifdef PMGRID #ifdef PERIODIC pmpotential_periodic(); #ifdef PLACEHIGHRESREGION i = pmpotential_nonperiodic(1); if(i == 1) /* this is returned if a particle lied outside allowed range */ { pm_init_regionsize(); pm_setup_nonperiodic_kernel(); i = pmpotential_nonperiodic(1); /* try again */ } if(i == 1) endrun(88686); #endif #else i = pmpotential_nonperiodic(0); if(i == 1) /* this is returned if a particle lied outside allowed range */ { pm_init_regionsize(); pm_setup_nonperiodic_kernel(); i = pmpotential_nonperiodic(0); /* try again */ } if(i == 1) endrun(88687); #ifdef PLACEHIGHRESREGION i = pmpotential_nonperiodic(1); if(i == 1) /* this is returned if a particle lied outside allowed range */ { pm_init_regionsize(); i = pmpotential_nonperiodic(1); } if(i != 0) endrun(88688); #endif #endif #endif if(All.ComovingIntegrationOn) { #ifndef PERIODIC fac = -0.5 * All.Omega0 * All.Hubble * All.Hubble; for(i = 0; i < NumPart; i++) { for(k = 0, r2 = 0; k < 3; k++) r2 += P[i].Pos[k] * P[i].Pos[k]; P[i].p.Potential += fac * r2; } #endif } else { fac = -0.5 * All.OmegaLambda * All.Hubble * All.Hubble; if(fac != 0) { for(i = 0; i < NumPart; i++) { for(k = 0, r2 = 0; k < 3; k++) r2 += P[i].Pos[k] * P[i].Pos[k]; P[i].p.Potential += fac * r2; } } } if(ThisTask == 0) { printf("potential done.\n"); fflush(stdout); } #else for(i = 0; i < NumPart; i++) P[i].Potential = 0; #endif CPU_Step[CPU_POTENTIAL] += measure_time(); }
void cosmic_ray_diffusion_matrix_multiply(double *cr_E0_in, double *cr_E0_out, double *cr_n0_in, double *cr_n0_out, int CRpop) { int i, j, k, ngrp, ndone, ndone_flag, dummy; int sendTask, recvTask, nexport, nimport, place; double *cr_E0_sum, *cr_n0_sum; /* allocate buffers to arrange communication */ Ngblist = (int *) mymalloc("Ngblist", NumPart * sizeof(int)); All.BunchSize = (int) ((All.BufferSize * 1024 * 1024) / (sizeof(struct data_index) + sizeof(struct data_nodelist) + sizeof(struct crdiffusiondata_in) + sizeof(struct crdiffusiondata_out) + sizemax(sizeof(struct crdiffusiondata_in), sizeof(struct crdiffusiondata_out)))); DataIndexTable = (struct data_index *) mymalloc("DataIndexTable", All.BunchSize * sizeof(struct data_index)); DataNodeList = (struct data_nodelist *) mymalloc("DataNodeList", All.BunchSize * sizeof(struct data_nodelist)); cr_E0_sum = (double *) mymalloc("cr_E0_sum", N_gas * sizeof(double)); cr_n0_sum = (double *) mymalloc("cr_n0_sum", N_gas * sizeof(double)); i = 0; /* need to go over all gas particles */ do { for(j = 0; j < NTask; j++) { Send_count[j] = 0; Exportflag[j] = -1; } /* do local particles and prepare export list */ for(nexport = 0; i < N_gas; i++) if(P[i].Type == 0) { if(cosmic_ray_diffusion_evaluate(i, 0, cr_E0_in, cr_E0_out, cr_E0_sum, cr_n0_in, cr_n0_out, cr_n0_sum, &nexport, Send_count, CRpop) < 0) break; } #ifdef MYSORT mysort_dataindex(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #else qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #endif MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) { nimport += Recv_count[j]; if(j > 0) { Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; } } CR_DiffusionDataGet = (struct crdiffusiondata_in *) mymalloc(nimport * sizeof(struct crdiffusiondata_in)); CR_DiffusionDataIn = (struct crdiffusiondata_in *) mymalloc(nexport * sizeof(struct crdiffusiondata_in)); /* prepare particle data for export */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; for(k = 0; k < 3; k++) CR_DiffusionDataIn[j].Pos[k] = P[place].Pos[k]; CR_DiffusionDataIn[j].Hsml = PPP[place].Hsml; CR_DiffusionDataIn[j].Density = SphP[place].d.Density; CR_DiffusionDataIn[j].CR_E0_Kappa[CRpop] = CR_E0_Kappa[CRpop][place]; CR_DiffusionDataIn[j].CR_n0_Kappa[CRpop] = CR_n0_Kappa[CRpop][place]; memcpy(CR_DiffusionDataIn[j].NodeList, DataNodeList[DataIndexTable[j].IndexGet].NodeList, NODELISTLENGTH * sizeof(int)); } /* exchange particle data */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the particles */ MPI_Sendrecv(&CR_DiffusionDataIn[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct crdiffusiondata_in), MPI_BYTE, recvTask, TAG_HYDRO_A, &CR_DiffusionDataGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct crdiffusiondata_in), MPI_BYTE, recvTask, TAG_HYDRO_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } myfree(CR_DiffusionDataIn); CR_DiffusionDataResult = (struct crdiffusiondata_out *) mymalloc(nimport * sizeof(struct crdiffusiondata_out)); CR_DiffusionDataOut = (struct crdiffusiondata_out *) mymalloc(nexport * sizeof(struct crdiffusiondata_out)); /* now do the particles that were sent to us */ for(j = 0; j < nimport; j++) cosmic_ray_diffusion_evaluate(j, 1, cr_E0_in, cr_E0_out, cr_E0_sum, cr_n0_in, cr_n0_out, cr_n0_sum, &dummy, &dummy, CRpop); if(i >= N_gas) ndone_flag = 1; else ndone_flag = 0; MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* get the result */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* send the results */ MPI_Sendrecv(&CR_DiffusionDataResult[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct crdiffusiondata_out), MPI_BYTE, recvTask, TAG_HYDRO_B, &CR_DiffusionDataOut[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct crdiffusiondata_out), MPI_BYTE, recvTask, TAG_HYDRO_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } /* add the result to the local particles */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; cr_E0_out[place] += CR_DiffusionDataOut[j].CR_E0_Out; cr_E0_sum[place] += CR_DiffusionDataOut[j].CR_E0_Sum; cr_n0_out[place] += CR_DiffusionDataOut[j].CR_n0_Out; cr_n0_sum[place] += CR_DiffusionDataOut[j].CR_n0_Sum; } myfree(CR_DiffusionDataOut); myfree(CR_DiffusionDataResult); myfree(CR_DiffusionDataGet); } while(ndone < NTask); /* do final operations */ for(i = 0; i < N_gas; i++) if(P[i].Type == 0) { cr_E0_out[i] += cr_E0_in[i] * (1 + cr_E0_sum[i]); cr_n0_out[i] += cr_n0_in[i] * (1 + cr_n0_sum[i]); } myfree(cr_n0_sum); myfree(cr_E0_sum); myfree(DataNodeList); myfree(DataIndexTable); myfree(Ngblist); }
/* This function updates the weights for SN before exploding. This is * necessary due to the fact that gas particles neighbours of a given star * could have been transformed into stars and they need to be taken off the * neighbour list for the exploding star. */ void cs_update_weights(void) { MyFloat *Left, *Right; int i, j, ndone, ndone_flag, npleft, dummy, iter = 0; int ngrp, sendTask, recvTask, place, nexport, nimport; long long ntot; double dmax1, dmax2; double desnumngb; if(ThisTask == 0) { printf("... start update weights phase = %d ...\n", Flag_phase); fflush(stdout); } Left = (MyFloat *) mymalloc(NumPart * sizeof(MyFloat)); Right = (MyFloat *) mymalloc(NumPart * sizeof(MyFloat)); for(i = FirstActiveParticle; i >= 0; i = NextActiveParticle[i]) { if(P[i].Type == 6 || P[i].Type == 7) { Left[i] = Right[i] = 0; } } /* allocate buffers to arrange communication */ Ngblist = (int *) mymalloc(NumPart * sizeof(int)); R2ngblist = (double *) mymalloc(NumPart * sizeof(double)); All.BunchSize = (int) ((All.BufferSize * 1024 * 1024) / (sizeof(struct data_index) + sizeof(struct data_nodelist) + sizeof(struct updateweight_in) + sizeof(struct updateweight_out) + sizemax(sizeof(struct updateweight_in), sizeof(struct updateweight_out)))); DataIndexTable = (struct data_index *) mymalloc(All.BunchSize * sizeof(struct data_index)); DataNodeList = (struct data_nodelist *) mymalloc(All.BunchSize * sizeof(struct data_nodelist)); desnumngb = All.DesNumNgb; /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ do { i = FirstActiveParticle; /* begin with this index */ do { for(j = 0; j < NTask; j++) { Send_count[j] = 0; Exportflag[j] = -1; } /* do local particles and prepare export list */ for(nexport = 0; i >= 0; i = NextActiveParticle[i]) { if((P[i].Type == 6 || P[i].Type == 7) && P[i].TimeBin >= 0) { if(cs_update_weight_evaluate(i, 0, &nexport, Send_count) < 0) break; } } #ifdef MYSORT mysort_dataindex(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #else qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #endif MPI_Allgather(Send_count, NTask, MPI_INT, Sendcount_matrix, NTask, MPI_INT, MPI_COMM_WORLD); for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) { Recv_count[j] = Sendcount_matrix[j * NTask + ThisTask]; nimport += Recv_count[j]; if(j > 0) { Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; } } UpdateweightGet = (struct updateweight_in *) mymalloc(nimport * sizeof(struct updateweight_in)); UpdateweightIn = (struct updateweight_in *) mymalloc(nexport * sizeof(struct updateweight_in)); /* prepare particle data for export */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; UpdateweightIn[j].Pos[0] = P[place].Pos[0]; UpdateweightIn[j].Pos[1] = P[place].Pos[1]; UpdateweightIn[j].Pos[2] = P[place].Pos[2]; UpdateweightIn[j].Hsml = PPP[place].Hsml; memcpy(UpdateweightIn[j].NodeList, DataNodeList[DataIndexTable[j].IndexGet].NodeList, NODELISTLENGTH * sizeof(int)); } /* exchange particle data */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the particles */ MPI_Sendrecv(&UpdateweightIn[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct updateweight_in), MPI_BYTE, recvTask, TAG_DENS_A, &UpdateweightGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct updateweight_in), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } myfree(UpdateweightIn); UpdateweightResult = (struct updateweight_out *) mymalloc(nimport * sizeof(struct updateweight_out)); UpdateweightOut = (struct updateweight_out *) mymalloc(nexport * sizeof(struct updateweight_out)); /* now do the particles that were sent to us */ for(j = 0; j < nimport; j++) cs_update_weight_evaluate(j, 1, &dummy, &dummy); if(i < 0) ndone_flag = 1; else ndone_flag = 0; MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* get the result */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* send the results */ MPI_Sendrecv(&UpdateweightResult[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct updateweight_out), MPI_BYTE, recvTask, TAG_DENS_B, &UpdateweightOut[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct updateweight_out), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } /* add the result to the local particles */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; PPP[place].n.dNumNgb += UpdateweightOut[j].Ngb; } myfree(UpdateweightOut); myfree(UpdateweightResult); myfree(UpdateweightGet); } while(ndone < NTask); /* do final operations on results */ for(i = FirstActiveParticle, npleft = 0; i >= 0; i = NextActiveParticle[i]) { if(P[i].Type == 6 || P[i].Type == 7) { #ifdef FLTROUNDOFFREDUCTION PPP[i].n.NumNgb = FLT(PPP[i].n.dNumNgb); #endif /* now check whether we had enough neighbours */ if(PPP[i].n.NumNgb < (desnumngb - All.MaxNumNgbDeviation) || (PPP[i].n.NumNgb > (desnumngb + All.MaxNumNgbDeviation) && PPP[i].Hsml > (1.01 * All.MinGasHsml))) { /* need to redo this particle */ npleft++; if(Left[i] > 0 && Right[i] > 0) if((Right[i] - Left[i]) < 1.0e-3 * Left[i]) { /* this one should be ok */ npleft--; P[i].TimeBin = -P[i].TimeBin - 1; /* Mark as inactive */ continue; } if(PPP[i].n.NumNgb < (desnumngb - All.MaxNumNgbDeviation)) Left[i] = DMAX(PPP[i].Hsml, Left[i]); else { if(Right[i] != 0) { if(PPP[i].Hsml < Right[i]) Right[i] = PPP[i].Hsml; } else Right[i] = PPP[i].Hsml; } if(iter >= MAXITER - 10) { printf ("i=%d task=%d ID=%d Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, ThisTask, (int) P[i].ID, PPP[i].Hsml, Left[i], Right[i], (float) PPP[i].n.NumNgb, Right[i] - Left[i], P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]); fflush(stdout); } if(Right[i] > 0 && Left[i] > 0) PPP[i].Hsml = pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3); else { if(Right[i] == 0 && Left[i] == 0) endrun(8188); /* can't occur */ if(Right[i] == 0 && Left[i] > 0) { PPP[i].Hsml *= 1.26; } if(Right[i] > 0 && Left[i] == 0) { PPP[i].Hsml /= 1.26; } } if(PPP[i].Hsml < All.MinGasHsml) PPP[i].Hsml = All.MinGasHsml; } else P[i].TimeBin = -P[i].TimeBin - 1; /* Mark as inactive */ /* CECILIA */ if(iter == MAXITER) { int old_type; old_type = P[i].Type; P[i].Type = 4; /* no SN mark any more */ PPP[i].n.NumNgb = 0; printf("part=%d of type=%d was assigned NumNgb=%g and type=%d\n", i, old_type, PPP[i].n.NumNgb, P[i].Type); } } } sumup_large_ints(1, &npleft, &ntot); if(ntot > 0) { iter++; if(iter > 0 && ThisTask == 0) { printf("ngb iteration %d: need to repeat for %d%09d particles.\n", iter, (int) (ntot / 1000000000), (int) (ntot % 1000000000)); fflush(stdout); } if(iter > MAXITER) { #ifndef CS_FEEDBACK printf("failed to converge in neighbour iteration in update_weights \n"); fflush(stdout); endrun(1155); #else /* CECILIA */ if(Flag_phase == 2) /* HOT */ { printf("Not enough hot neighbours for energy/metal distribution part=%d Type=%d\n", i, P[i].Type); fflush(stdout); break; /* endrun(1156); */ } else { printf("Not enough cold neighbours for energy/metal distribution part=%d Type=%d\n", i, P[i].Type); fflush(stdout); break; } #endif } } } while(ntot > 0); myfree(DataNodeList); myfree(DataIndexTable); myfree(R2ngblist); myfree(Ngblist); myfree(Right); myfree(Left); /* mark as active again */ for(i = FirstActiveParticle; i >= 0; i = NextActiveParticle[i]) if(P[i].TimeBin < 0) P[i].TimeBin = -P[i].TimeBin - 1; /* collect some timing information */ if(ThisTask == 0) { printf("... update weights phase = %d done...\n", Flag_phase); fflush(stdout); } }
/* this function computes the vector b(out) given the vector x(in) such as Ax = b, where A is a matrix */ void radtransfer_matrix_multiply(double *in, double *out, double *sum) { int i, j, k, ngrp, dummy, ndone, ndone_flag; int sendTask, recvTask, nexport, nimport, place; double a; /* allocate buffers to arrange communication */ Ngblist = (int *) mymalloc(NumPart * sizeof(int)); All.BunchSize = (int) ((All.BufferSize * 1024 * 1024) / (sizeof(struct data_index) + sizeof(struct data_nodelist) + sizeof(struct radtransferdata_in) + sizeof(struct radtransferdata_out) + sizemax(sizeof(struct radtransferdata_in), sizeof(struct radtransferdata_out)))); DataIndexTable = (struct data_index *) mymalloc(All.BunchSize * sizeof(struct data_index)); DataNodeList = (struct data_nodelist *) mymalloc(All.BunchSize * sizeof(struct data_nodelist)); if(All.ComovingIntegrationOn) a = All.Time; else a = 1.0; i = 0; do /* communication loop */ { for(j = 0; j < NTask; j++) { Send_count[j] = 0; Exportflag[j] = -1; } /* do local particles and prepare export list */ for(nexport = 0; i < N_gas; i++) { if(P[i].Type == 0) #ifdef RT_VAR_DT if(SphP[i].rt_flag == 1) #endif if(radtransfer_evaluate(i, 0, in, out, sum, &nexport, Send_count) < 0) break; } #ifdef MYSORT mysort_dataindex(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #else qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #endif MPI_Allgather(Send_count, NTask, MPI_INT, Sendcount_matrix, NTask, MPI_INT, MPI_COMM_WORLD); for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) { Recv_count[j] = Sendcount_matrix[j * NTask + ThisTask]; nimport += Recv_count[j]; if(j > 0) { Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; } } RadTransferDataGet = (struct radtransferdata_in *) mymalloc(nimport * sizeof(struct radtransferdata_in)); RadTransferDataIn = (struct radtransferdata_in *) mymalloc(nexport * sizeof(struct radtransferdata_in)); /* prepare particle data for export */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; for(k = 0; k < 3; k++) { RadTransferDataIn[j].Pos[k] = P[place].Pos[k]; RadTransferDataIn[j].ET[k] = SphP[place].ET[k]; RadTransferDataIn[j].ET[k + 3] = SphP[place].ET[k + 3]; } RadTransferDataIn[j].Hsml = PPP[place].Hsml; RadTransferDataIn[j].Kappa = Kappa[place]; RadTransferDataIn[j].Lambda = Lambda[place]; RadTransferDataIn[j].Mass = P[place].Mass; RadTransferDataIn[j].Density = SphP[place].d.Density; memcpy(RadTransferDataIn[j].NodeList, DataNodeList[DataIndexTable[j].IndexGet].NodeList, NODELISTLENGTH * sizeof(int)); } /* exchange particle data */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the particles */ MPI_Sendrecv(&RadTransferDataIn[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct radtransferdata_in), MPI_BYTE, recvTask, TAG_RT_A, &RadTransferDataGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct radtransferdata_in), MPI_BYTE, recvTask, TAG_RT_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } myfree(RadTransferDataIn); RadTransferDataResult = (struct radtransferdata_out *) mymalloc(nimport * sizeof(struct radtransferdata_out)); RadTransferDataOut = (struct radtransferdata_out *) mymalloc(nexport * sizeof(struct radtransferdata_out)); /* now do the particles that were sent to us */ for(j = 0; j < nimport; j++) radtransfer_evaluate(j, 1, in, out, sum, &dummy, &dummy); if(i < N_gas) ndone_flag = 0; else ndone_flag = 1; MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* get the result */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* send the results */ MPI_Sendrecv(&RadTransferDataResult[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct radtransferdata_out), MPI_BYTE, recvTask, TAG_RT_B, &RadTransferDataOut[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct radtransferdata_out), MPI_BYTE, recvTask, TAG_RT_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } /* add the result to the local particles */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; out[place] += RadTransferDataOut[j].Out; sum[place] += RadTransferDataOut[j].Sum; } myfree(RadTransferDataOut); myfree(RadTransferDataResult); myfree(RadTransferDataGet); } while(ndone < NTask); /* do final operations on results */ for(i = 0; i < N_gas; i++) if(P[i].Type == 0) { #ifdef RT_VAR_DT if(SphP[i].rt_flag == 0) { sum[i] = 0.0; out[i] = 0.0; } else #endif { /* divide c_light by a to get comoving speed of light (because kappa is comoving) */ if((1 + dt * (c_light / a) * Kappa[i] + sum[i]) < 0) { printf("1 + sum + rate= %g sum=%g rate=%g i =%d\n", 1 + dt * (c_light / a) * Kappa[i] + sum[i], sum[i], dt * (c_light / a) * Kappa[i], i); endrun(123); } sum[i] += 1.0 + dt * (c_light / a) * Kappa[i]; out[i] += in[i] * sum[i]; } } myfree(DataNodeList); myfree(DataIndexTable); myfree(Ngblist); }
void subfind_potential_compute(int num, struct unbind_data *d, int phase, double weakly_bound_limit) { int i, j, k, sendTask, recvTask; int ndone, ndone_flag, dummy; int ngrp, place, nexport, nimport; double atime; /* allocate buffers to arrange communication */ All.BunchSize = (int) ((All.BufferSize * 1024 * 1024) / (sizeof(struct data_index) + sizeof(struct data_nodelist) + sizeof(struct gravdata_in) + sizeof(struct potdata_out) + sizemax(sizeof(struct gravdata_in), sizeof(struct potdata_out)))); DataIndexTable = (struct data_index *) mymalloc(All.BunchSize * sizeof(struct data_index)); DataNodeList = (struct data_nodelist *) mymalloc(All.BunchSize * sizeof(struct data_nodelist)); i = 0; /* beginn with this index */ do { for(j = 0; j < NTask; j++) { Send_count[j] = 0; Exportflag[j] = -1; } /* do local particles and prepare export list */ for(nexport = 0; i < num; i++) { if(phase == 1) if(P[d[i].index].v.DM_BindingEnergy <= weakly_bound_limit) continue; if(subfind_force_treeevaluate_potential(d[i].index, 0, &nexport, Send_count) < 0) break; } qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); MPI_Allgather(Send_count, NTask, MPI_INT, Sendcount_matrix, NTask, MPI_INT, MPI_COMM_WORLD); for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) { Recv_count[j] = Sendcount_matrix[j * NTask + ThisTask]; nimport += Recv_count[j]; if(j > 0) { Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; } } GravDataGet = (struct gravdata_in *) mymalloc(nimport * sizeof(struct gravdata_in)); GravDataIn = (struct gravdata_in *) mymalloc(nexport * sizeof(struct gravdata_in)); /* prepare particle data for export */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; for(k = 0; k < 3; k++) GravDataIn[j].Pos[k] = P[place].Pos[k]; for(k = 0; k < NODELISTLENGTH; k++) GravDataIn[j].NodeList[k] = DataNodeList[DataIndexTable[j].IndexGet].NodeList[k]; #ifdef UNEQUALSOFTENINGS GravDataIn[j].Type = P[place].Type; #ifdef ADAPTIVE_GRAVSOFT_FORGAS if(P[place].Type == 0) GravDataIn[j].Soft = SphP[place].Hsml; #endif #endif } /* exchange particle data */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the particles */ MPI_Sendrecv(&GravDataIn[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct gravdata_in), MPI_BYTE, recvTask, TAG_POTENTIAL_A, &GravDataGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct gravdata_in), MPI_BYTE, recvTask, TAG_POTENTIAL_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } myfree(GravDataIn); PotDataResult = (struct potdata_out *) mymalloc(nimport * sizeof(struct potdata_out)); PotDataOut = (struct potdata_out *) mymalloc(nexport * sizeof(struct potdata_out)); /* now do the particles that were sent to us */ for(j = 0; j < nimport; j++) subfind_force_treeevaluate_potential(j, 1, &dummy, &dummy); if(i >= num) ndone_flag = 1; else ndone_flag = 0; MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* get the result */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* send the results */ MPI_Sendrecv(&PotDataResult[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct potdata_out), MPI_BYTE, recvTask, TAG_POTENTIAL_B, &PotDataOut[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct potdata_out), MPI_BYTE, recvTask, TAG_POTENTIAL_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } /* add the results to the local particles */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; P[place].u.DM_Potential += PotDataOut[j].Potential; } myfree(PotDataOut); myfree(PotDataResult); myfree(GravDataGet); } while(ndone < NTask); if(All.ComovingIntegrationOn) atime = All.Time; else atime = 1; for(i = 0; i < num; i++) { if(phase == 1) if(P[d[i].index].v.DM_BindingEnergy <= weakly_bound_limit) continue; P[d[i].index].u.DM_Potential += P[d[i].index].Mass / All.SofteningTable[P[d[i].index].Type]; P[d[i].index].u.DM_Potential *= All.G / atime; if(All.TotN_gas > 0 && (FOF_PRIMARY_LINK_TYPES & 1) == 0 && All.OmegaBaryon > 0) P[d[i].index].u.DM_Potential *= All.Omega0 / (All.Omega0 - All.OmegaBaryon); } myfree(DataNodeList); myfree(DataIndexTable); }
void subfind_find_linkngb(void) { long long ntot; int i, j, ndone, ndone_flag, npleft, dummy, iter = 0, save_DesNumNgb; MyFloat *Left, *Right; char *Todo; int ngrp, recvTask, place, nexport, nimport; double t0, t1; if(ThisTask == 0) printf("Start find_linkngb (%d particles on task=%d)\n", NumPartGroup, ThisTask); save_DesNumNgb = All.DesNumNgb; All.DesNumNgb = All.DesLinkNgb; /* for simplicity, reset this value */ /* allocate buffers to arrange communication */ Ngblist = (int *) mymalloc("Ngblist", NumPartGroup * sizeof(int)); Dist2list = (double *) mymalloc("Dist2list", NumPartGroup * sizeof(double)); All.BunchSize = (int) ((All.BufferSize * 1024 * 1024) / (sizeof(struct data_index) + sizeof(struct data_nodelist) + sizeof(struct linkngbdata_in) + sizeof(struct linkngbdata_out) + sizemax(sizeof(struct linkngbdata_in), sizeof(struct linkngbdata_out)))); DataIndexTable = (struct data_index *) mymalloc("DataIndexTable", All.BunchSize * sizeof(struct data_index)); DataNodeList = (struct data_nodelist *) mymalloc("DataNodeList", All.BunchSize * sizeof(struct data_nodelist)); Left = mymalloc("Left", sizeof(MyFloat) * NumPartGroup); Right = mymalloc("Right", sizeof(MyFloat) * NumPartGroup); Todo = mymalloc("Todo", sizeof(char) * NumPartGroup); for(i = 0; i < NumPartGroup; i++) { Left[i] = Right[i] = 0; Todo[i] = 1; } /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ do { t0 = second(); i = 0; /* begin with this index */ do { for(j = 0; j < NTask; j++) { Send_count[j] = 0; Exportflag[j] = -1; } /* do local particles and prepare export list */ for(nexport = 0; i < NumPartGroup; i++) { if(Todo[i]) { if(subfind_linkngb_evaluate(i, 0, &nexport, Send_count) < 0) break; } } #ifdef OMP_SORT omp_qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #else qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #endif MPI_Alltoall(Send_count, 1, MPI_INT, Recv_count, 1, MPI_INT, MPI_COMM_WORLD); for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) { nimport += Recv_count[j]; if(j > 0) { Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; } } LinkngbDataGet = (struct linkngbdata_in *) mymalloc(" LinkngbDataGet", nimport * sizeof(struct linkngbdata_in)); LinkngbDataIn = (struct linkngbdata_in *) mymalloc(" LinkngbDataIn", nexport * sizeof(struct linkngbdata_in)); /* prepare particle data for export */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; LinkngbDataIn[j].Pos[0] = P[place].Pos[0]; LinkngbDataIn[j].Pos[1] = P[place].Pos[1]; LinkngbDataIn[j].Pos[2] = P[place].Pos[2]; LinkngbDataIn[j].DM_Hsml = P[place].DM_Hsml; memcpy(LinkngbDataIn[j].NodeList, DataNodeList[DataIndexTable[j].IndexGet].NodeList, NODELISTLENGTH * sizeof(int)); } /* exchange particle data */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the particles */ MPI_Sendrecv(&LinkngbDataIn[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct linkngbdata_in), MPI_BYTE, recvTask, TAG_DENS_A, &LinkngbDataGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct linkngbdata_in), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } myfree(LinkngbDataIn); LinkngbDataResult = (struct linkngbdata_out *) mymalloc(" LinkngbDataResult", nimport * sizeof(struct linkngbdata_out)); LinkngbDataOut = (struct linkngbdata_out *) mymalloc(" LinkngbDataOut", nexport * sizeof(struct linkngbdata_out)); /* now do the particles that were sent to us */ for(j = 0; j < nimport; j++) subfind_linkngb_evaluate(j, 1, &dummy, &dummy); if(i >= NumPartGroup) ndone_flag = 1; else ndone_flag = 0; MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* get the result */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* send the results */ MPI_Sendrecv(&LinkngbDataResult[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct linkngbdata_out), MPI_BYTE, recvTask, TAG_DENS_B, &LinkngbDataOut[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct linkngbdata_out), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } /* add the result to the local particles */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; P[place].DM_NumNgb += LinkngbDataOut[j].Ngb; } myfree(LinkngbDataOut); myfree(LinkngbDataResult); myfree(LinkngbDataGet); } while(ndone < NTask); /* do final operations on results */ for(i = 0, npleft = 0; i < NumPartGroup; i++) { /* now check whether we had enough neighbours */ if(Todo[i]) { if(P[i].DM_NumNgb != All.DesLinkNgb && ((Right[i] - Left[i]) > 1.0e-3 * Left[i] || Left[i] == 0 || Right[i] == 0)) { /* need to redo this particle */ npleft++; if(P[i].DM_NumNgb < All.DesLinkNgb) Left[i] = DMAX(P[i].DM_Hsml, Left[i]); else { if(Right[i] != 0) { if(P[i].DM_Hsml < Right[i]) Right[i] = P[i].DM_Hsml; } else Right[i] = P[i].DM_Hsml; } if(iter >= MAXITER - 10) { printf ("i=%d task=%d ID=%d DM_Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, ThisTask, (int) P[i].ID, P[i].DM_Hsml, Left[i], Right[i], (double) P[i].DM_NumNgb, Right[i] - Left[i], P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]); fflush(stdout); } if(Right[i] > 0 && Left[i] > 0) P[i].DM_Hsml = pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3); else { if(Right[i] == 0 && Left[i] == 0) endrun(8189); /* can't occur */ if(Right[i] == 0 && Left[i] > 0) P[i].DM_Hsml *= 1.26; if(Right[i] > 0 && Left[i] == 0) P[i].DM_Hsml /= 1.26; } } else Todo[i] = 0; } } sumup_large_ints(1, &npleft, &ntot); t1 = second(); if(ntot > 0) { iter++; if(iter > 0 && ThisTask == 0) { printf("find linkngb iteration %d: need to repeat for %d%09d particles. (took %g sec)\n", iter, (int) (ntot / 1000000000), (int) (ntot % 1000000000), timediff(t0, t1)); fflush(stdout); } if(iter > MAXITER) { printf("failed to converge in neighbour iteration in density()\n"); fflush(stdout); endrun(1155); } } } while(ntot > 0); myfree(Todo); myfree(Right); myfree(Left); myfree(DataNodeList); myfree(DataIndexTable); myfree(Dist2list); myfree(Ngblist); All.DesNumNgb = save_DesNumNgb; /* restore it */ }
void cs_find_hot_neighbours(void) { MyFloat *Left, *Right; int nimport; int i, j, n, ndone_flag, dummy; int ndone, ntot, npleft; int iter = 0; int ngrp, sendTask, recvTask; int place, nexport; double dmax1, dmax2; double xhyd, yhel, ne, mu, energy, temp; double a3inv; if(All.ComovingIntegrationOn) a3inv = 1 / (All.Time * All.Time * All.Time); else a3inv = 1; /* allocate buffers to arrange communication */ Left = (MyFloat *) mymalloc(NumPart * sizeof(MyFloat)); Right = (MyFloat *) mymalloc(NumPart * sizeof(MyFloat)); Ngblist = (int *) mymalloc(NumPart * sizeof(int)); All.BunchSize = (int) ((All.BufferSize * 1024 * 1024) / (sizeof(struct data_index) + sizeof(struct data_nodelist) + sizeof(struct hotngbs_in) + sizeof(struct hotngbs_out) + sizemax(sizeof(struct hotngbs_in), sizeof(struct hotngbs_out)))); DataIndexTable = (struct data_index *) mymalloc(All.BunchSize * sizeof(struct data_index)); DataNodeList = (struct data_nodelist *) mymalloc(All.BunchSize * sizeof(struct data_nodelist)); CPU_Step[CPU_MISC] += measure_time(); for(n = FirstActiveParticle; n >= 0; n = NextActiveParticle[n]) { if(P[n].Type == 0) { /* select reservoir and cold phase particles */ if(P[n].EnergySN > 0 && SphP[n].d.Density * a3inv > All.PhysDensThresh * All.DensFrac_Phase) { xhyd = P[n].Zm[6] / P[n].Mass; yhel = (1 - xhyd) / (4. * xhyd); ne = SphP[n].Ne; mu = (1 + 4 * yhel) / (1 + yhel + ne); energy = SphP[n].Entropy * P[n].Mass / GAMMA_MINUS1 * pow(SphP[n].d.Density * a3inv, GAMMA_MINUS1); /* Total Energys */ temp = GAMMA_MINUS1 / BOLTZMANN * energy / P[n].Mass * PROTONMASS * mu; temp *= All.UnitEnergy_in_cgs / All.UnitMass_in_g; /* Temperature in Kelvin */ if(temp < All.Tcrit_Phase) { Left[n] = Right[n] = 0; if(!(SphP[n].HotHsml > 0.)) SphP[n].HotHsml = All.InitialHotHsmlFactor * PPP[n].Hsml; /* Estimation of HotHsml : ONLY first step */ P[n].Type = 10; /* temporarily mark particles of interest with this number */ } } } } /* we will repeat the whole thing for those particles where we didn't find enough neighbours */ do { i = FirstActiveParticle; /* beginn with this index */ do { for(j = 0; j < NTask; j++) { Send_count[j] = 0; Exportflag[j] = -1; } /* do local particles and prepare export list */ for(nexport = 0; i >= 0; i = NextActiveParticle[i]) if(P[i].Type == 10 && P[i].TimeBin >= 0) { if(cs_hotngbs_evaluate(i, 0, &nexport, Send_count) < 0) break; } #ifdef MYSORT mysort_dataindex(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #else qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); #endif MPI_Allgather(Send_count, NTask, MPI_INT, Sendcount_matrix, NTask, MPI_INT, MPI_COMM_WORLD); for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) { Recv_count[j] = Sendcount_matrix[j * NTask + ThisTask]; nimport += Recv_count[j]; if(j > 0) { Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; } } HotNgbsGet = (struct hotngbs_in *) mymalloc(nimport * sizeof(struct hotngbs_in)); HotNgbsIn = (struct hotngbs_in *) mymalloc(nexport * sizeof(struct hotngbs_in)); /* prepare particle data for export */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; HotNgbsIn[j].Pos[0] = P[place].Pos[0]; HotNgbsIn[j].Pos[1] = P[place].Pos[1]; HotNgbsIn[j].Pos[2] = P[place].Pos[2]; HotNgbsIn[j].HotHsml = SphP[place].HotHsml; HotNgbsIn[j].Entropy = SphP[place].Entropy; memcpy(HotNgbsIn[j].NodeList, DataNodeList[DataIndexTable[j].IndexGet].NodeList, NODELISTLENGTH * sizeof(int)); } for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the particles */ MPI_Sendrecv(&HotNgbsIn[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct hotngbs_in), MPI_BYTE, recvTask, TAG_DENS_A, &HotNgbsGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct hotngbs_in), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } myfree(HotNgbsIn); HotNgbsResult = (struct hotngbs_out *) mymalloc(nimport * sizeof(struct hotngbs_out)); HotNgbsOut = (struct hotngbs_out *) mymalloc(nexport * sizeof(struct hotngbs_out)); /* now do the particles that need to be exported */ for(j = 0; j < nimport; j++) cs_hotngbs_evaluate(j, 1, &dummy, &dummy); if(i < 0) ndone_flag = 1; else ndone_flag = 0; MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* get the result */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* send the results */ MPI_Sendrecv(&HotNgbsResult[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct hotngbs_out), MPI_BYTE, recvTask, TAG_DENS_B, &HotNgbsOut[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct hotngbs_out), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } /* add the result to the local particles */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; SphP[place].da.dDensityAvg += HotNgbsOut[j].DensitySum; SphP[place].ea.dEntropyAvg += HotNgbsOut[j].EntropySum; SphP[place].HotNgbNum += HotNgbsOut[j].HotNgbNum; } myfree(HotNgbsOut); myfree(HotNgbsResult); myfree(HotNgbsGet); } while(ndone < NTask); /* do final operations on results */ for(i = FirstActiveParticle, npleft = 0; i >= 0; i = NextActiveParticle[i]) { if(P[i].Type == 10 && P[i].TimeBin >= 0) { #ifdef FLTROUNDOFFREDUCTION SphP[i].da.DensityAvg = FLT(SphP[i].da.dDensityAvg); SphP[i].ea.EntropyAvg = FLT(SphP[i].ea.dEntropyAvg); #endif if(SphP[i].HotNgbNum > 0) { SphP[i].da.DensityAvg /= SphP[i].HotNgbNum; SphP[i].ea.EntropyAvg /= SphP[i].HotNgbNum; } else { SphP[i].da.DensityAvg = 0; SphP[i].ea.EntropyAvg = 0; } /* now check whether we had enough neighbours */ if(SphP[i].HotNgbNum < (All.DesNumNgb - All.MaxNumHotNgbDeviation) || (SphP[i].HotNgbNum > (All.DesNumNgb + All.MaxNumHotNgbDeviation))) { /* need to redo this particle */ npleft++; if(Left[i] > 0 && Right[i] > 0) if((Right[i] - Left[i]) < 1.0e-3 * Left[i]) { /* this one should be ok */ npleft--; P[i].TimeBin = -P[i].TimeBin - 1; /* Mark as inactive */ continue; } if(SphP[i].HotNgbNum < (All.DesNumNgb - All.MaxNumHotNgbDeviation)) Left[i] = DMAX(SphP[i].HotHsml, Left[i]); else { if(Right[i] != 0) { if(SphP[i].HotHsml < Right[i]) Right[i] = SphP[i].HotHsml; } else Right[i] = SphP[i].HotHsml; } if(Left[i] > All.MaxHotHsmlParam * PPP[i].Hsml) /* prevent us from searching too far */ { npleft--; P[i].TimeBin = -P[i].TimeBin - 1; /* Mark as inactive */ /* Ad-hoc definition of SAvg and RhoAvg when there are no hot neighbours */ /* Note that a minimum nunmber of hot neighbours are required for promotion, see c_enrichment.c */ if(SphP[i].HotNgbNum == 0) { SphP[i].da.DensityAvg = SphP[i].d.Density / 100; SphP[i].ea.EntropyAvg = SphP[i].Entropy * 1000; printf("WARNING: Used ad-hoc values for SAvg and RhoAvg, No hot neighbours\n"); } continue; } if(iter >= MAXITER_HOT - 10) { printf ("i=%d task=%d ID=%d Hsml=%g Left=%g Right=%g Ngbs=%g Right-Left=%g\n pos=(%g|%g|%g)\n", i, ThisTask, P[i].ID, SphP[i].HotHsml, Left[i], Right[i], (float) SphP[i].HotNgbNum, Right[i] - Left[i], P[i].Pos[0], P[i].Pos[1], P[i].Pos[2]); fflush(stdout); } if(Right[i] > 0 && Left[i] > 0) SphP[i].HotHsml = pow(0.5 * (pow(Left[i], 3) + pow(Right[i], 3)), 1.0 / 3); else { if(Right[i] == 0 && Left[i] == 0) endrun(8188); /* can't occur */ if(Right[i] == 0 && Left[i] > 0) SphP[i].HotHsml *= 1.26; if(Right[i] > 0 && Left[i] == 0) SphP[i].HotHsml /= 1.26; } } else P[i].TimeBin = -P[i].TimeBin - 1; /* Mark as inactive */ } } MPI_Allreduce(&npleft, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if(ntot > 0) { iter++; if(iter > 0 && ThisTask == 0) { printf("hotngb iteration %d: need to repeat for %d particles.\n", iter, ntot); fflush(stdout); } if(iter > MAXITER_HOT) { printf("failed to converge in hot-neighbour iteration\n"); fflush(stdout); endrun(1155); } } } while(ntot > 0); myfree(DataNodeList); myfree(DataIndexTable); myfree(Ngblist); myfree(Right); myfree(Left); for(i = FirstActiveParticle; i >= 0; i = NextActiveParticle[i]) if(P[i].Type == 10) { P[i].Type = 0; /* mark as active again */ if(P[i].TimeBin < 0) P[i].TimeBin = -P[i].TimeBin - 1; } CPU_Step[CPU_HOTNGBS] += measure_time(); }
void subfind_contamination(void) { int i, j, ndone, ndone_flag, dummy, count; int ngrp, sendTask, recvTask, place, nexport, nimport; struct unbind_data *d; d = (struct unbind_data *) mymalloc(NumPart * sizeof(struct unbind_data)); for(i = 0, count = 0; i < NumPart; i++) #ifdef DENSITY_SPLIT_BY_TYPE if(!((1 << P[i].Type) & (DENSITY_SPLIT_BY_TYPE))) #else if(!((1 << P[i].Type) & (FOF_PRIMARY_LINK_TYPES))) #endif d[count++].index = i; force_treebuild(count, d); /* construct tree only with boundary particles */ myfree(d); /* allocate buffers to arrange communication */ All.BunchSize = (int) ((All.BufferSize * 1024 * 1024) / (sizeof(struct data_index) + sizeof(struct data_nodelist) + sizeof(struct contamdata_in) + sizeof(struct contamdata_out) + sizemax(sizeof(struct contamdata_in), sizeof(struct contamdata_out)))); DataIndexTable = (struct data_index *) mymalloc(All.BunchSize * sizeof(struct data_index)); DataNodeList = (struct data_nodelist *) mymalloc(All.BunchSize * sizeof(struct data_nodelist)); /* we will repeat the whole thing for those groups where we didn't converge to a SO radius yet */ i = 0; /* begin with this index */ do { for(j = 0; j < NTask; j++) { Send_count[j] = 0; Exportflag[j] = -1; } /* do local particles and prepare export list */ for(nexport = 0; i < Ngroups; i++) { if(Group[i].R_Mean200 > 0) { if(subfind_contamination_evaluate(i, 0, &nexport, Send_count) < 0) break; } else { Group[i].ContaminationLen = 0; Group[i].ContaminationMass = 0; } } qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare); MPI_Allgather(Send_count, NTask, MPI_INT, Sendcount_matrix, NTask, MPI_INT, MPI_COMM_WORLD); for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++) { Recv_count[j] = Sendcount_matrix[j * NTask + ThisTask]; nimport += Recv_count[j]; if(j > 0) { Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1]; Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1]; } } ContamGet = (struct contamdata_in *) mymalloc(nimport * sizeof(struct contamdata_in)); ContamIn = (struct contamdata_in *) mymalloc(nexport * sizeof(struct contamdata_in)); /* prepare particle data for export */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; ContamIn[j].Pos[0] = Group[place].Pos[0]; ContamIn[j].Pos[1] = Group[place].Pos[1]; ContamIn[j].Pos[2] = Group[place].Pos[2]; ContamIn[j].R200 = Group[place].R_Mean200; memcpy(ContamIn[j].NodeList, DataNodeList[DataIndexTable[j].IndexGet].NodeList, NODELISTLENGTH * sizeof(int)); } /* exchange data */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* get the data */ MPI_Sendrecv(&ContamIn[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct contamdata_in), MPI_BYTE, recvTask, TAG_DENS_A, &ContamGet[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct contamdata_in), MPI_BYTE, recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } myfree(ContamIn); ContamResult = (struct contamdata_out *) mymalloc(nimport * sizeof(struct contamdata_out)); ContamOut = (struct contamdata_out *) mymalloc(nexport * sizeof(struct contamdata_out)); /* now do the locations that were sent to us */ for(j = 0; j < nimport; j++) subfind_contamination_evaluate(j, 1, &dummy, &dummy); if(i >= Ngroups) ndone_flag = 1; else ndone_flag = 0; MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* get the result */ for(ngrp = 1; ngrp < (1 << PTask); ngrp++) { sendTask = ThisTask; recvTask = ThisTask ^ ngrp; if(recvTask < NTask) { if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0) { /* send the results */ MPI_Sendrecv(&ContamResult[Recv_offset[recvTask]], Recv_count[recvTask] * sizeof(struct contamdata_out), MPI_BYTE, recvTask, TAG_DENS_B, &ContamOut[Send_offset[recvTask]], Send_count[recvTask] * sizeof(struct contamdata_out), MPI_BYTE, recvTask, TAG_DENS_B, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } } /* add the result to the local particles */ for(j = 0; j < nexport; j++) { place = DataIndexTable[j].Index; Group[place].ContaminationLen += ContamOut[j].ContaminationLen; Group[place].ContaminationMass += ContamOut[j].ContaminationMass; } myfree(ContamOut); myfree(ContamResult); myfree(ContamGet); } while(ndone < NTask); myfree(DataNodeList); myfree(DataIndexTable); }