void writefile(AtomVec& atoms, OriginBox& obox){ // The .xyz format is simple: // Line 1: [Number of atoms] // Line 2: [Comment line, whatever you want, left blank here] // Line 3: [element type, here C for carbon]\t[x]\t[y]\t[z] // Line 4: [element type, here C for carbon]\t[x]\t[y]\t[z] // ... // Line N+1: [element type, here C for carbon]\t[x]\t[y]\t[z] // Line N+2: [element type, here C for carbon]\t[x]\t[y]\t[z] // Line N+3: [Number of atoms] // Line N+4: [Comment line, whatever you want, left blank here] // Line N+5: [element type, here C for carbon]\t[x]\t[y]\t[z] // ... // And so on. each set of N atoms/coordinates corresponds to a "frame", // which then make a movie. There must be the same N in each frame for VMD. // Note that if this is compiled as a 2D simulation, it will leave out // the z coordinate, and VMD can't handle that. ofstream outf; outf.open("packing.xyz", ios::out); outf.precision(24); outf << atoms.size() << endl; outf << "L=" << obox.L() << endl; // blank line for comment for(uint i=0; i<atoms.size(); i++){ if(i < Ns){ outf << "C"; } else { outf << "O"; } Vec normloc = obox.diff(Vec::Zero(), atoms[i].x); for(uint j=0; j<NDIM; j++){ outf << "\t" << normloc[j]; } outf << endl; }; // Unnecessary extra: // Write a "tcl" file with the box boundaries // the "tcl" file is made specifically for VMD ofstream pbcfile; pbcfile.open("packing.tcl", ios::out); pbcfile << "set cell [pbc set {"; for(uint j=0; j<NDIM; j++){ pbcfile << obox.box_shape()[j] << " "; } pbcfile << "} -all];\n"; pbcfile << "pbc box -toggle -center origin -color red;\n"; pbcfile << "set natoms [atomselect 0 \"name C\";];\n" << "$natoms set radius " << (sigma/2.0) << ";\n" << "set natoms [atomselect 0 \"name O\";];\n" << "$natoms set radius " << (sigmal/2.0) << ";\n"; // Now you should be able to run "vmd -e LJatoms-pbc.tcl LJatoms.xyz" // and it will show you the movie and also the bounding box // if you have .vmdrc in that same directory, you should also be able // to toggle the box with the "b" button };
void writefile(ofstream& outf, AtomVec& atoms, Box& bx){ // The .xyz format is simple: // Line 1: [Number of atoms] // Line 2: [Comment line, whatever you want, left blank here] // Line 3: [element type, here C for carbon]\t[x]\t[y]\t[z] // Line 4: [element type, here C for carbon]\t[x]\t[y]\t[z] // ... // Line N+1: [element type, here C for carbon]\t[x]\t[y]\t[z] // Line N+2: [element type, here C for carbon]\t[x]\t[y]\t[z] // Line N+3: [Number of atoms] // Line N+4: [Comment line, whatever you want, left blank here] // Line N+5: [element type, here C for carbon]\t[x]\t[y]\t[z] // ... // And so on. each set of N atoms/coordinates corresponds to a "frame", // which then make a movie. There must be the same N in each frame for VMD. // Note that if this is compiled as a 2D simulation, it will leave out // the z coordinate, and VMD can't handle that. outf << atoms.size() << endl; outf << endl; // blank line for comment for(uint i=0; i<atoms.size(); i++){ outf << "C"; Vec normloc = bx.diff(Vec::Zero(), atoms[i].x); for(uint j=0; j<NDIM; j++){ outf << "\t" << normloc[j]; } outf << endl; }; };
void CommBrick::reverse_comm() { int n; MPI_Request request; AtomVec *avec = atom->avec; double **f = atom->f; double *buf; // exchange data with another proc // if other proc is self, just copy // if comm_f_only set, exchange or copy directly from f, don't pack for (int iswap = nswap-1; iswap >= 0; iswap--) { if (sendproc[iswap] != me) { if (comm_f_only) { if (size_reverse_recv[iswap]) MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE, sendproc[iswap],0,world,&request); if (size_reverse_send[iswap]) { if (size_reverse_send[iswap]) buf = f[firstrecv[iswap]]; else buf = NULL; MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE, recvproc[iswap],0,world); } if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); } else { if (size_reverse_recv[iswap]) MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE, sendproc[iswap],0,world,&request); n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); if (n) MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world); if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); } avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv); } else { if (comm_f_only) { if (sendnum[iswap]) avec->unpack_reverse(sendnum[iswap],sendlist[iswap], f[firstrecv[iswap]]); } else { avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send); } } } }
void FixPour::pre_exchange() { int i; // just return if should not be called on this timestep if (next_reneighbor != update->ntimestep || lastexec == update->ntimestep) return; lastexec = update->ntimestep; // nnew = # to insert this timestep int nnew = nper; if (ninserted + nnew > ninsert) nnew = ninsert - ninserted; // lo/hi current = z (or y) bounds of insertion region this timestep if (domain->dimension == 3) { lo_current = zlo + (update->ntimestep - nfirst) * update->dt * rate; hi_current = zhi + (update->ntimestep - nfirst) * update->dt * rate; } else { lo_current = ylo + (update->ntimestep - nfirst) * update->dt * rate; hi_current = yhi + (update->ntimestep - nfirst) * update->dt * rate; } // ncount = # of my atoms that overlap the insertion region // nprevious = total of ncount across all procs int ncount = 0; for (i = 0; i < atom->nlocal; i++) if (overlap(i)) ncount++; int nprevious; MPI_Allreduce(&ncount,&nprevious,1,MPI_INT,MPI_SUM,world); // xmine is for my atoms // xnear is for atoms from all procs + atoms to be inserted double **xmine = memory->create_2d_double_array(ncount,5,"fix_pour:xmine"); double **xnear = memory->create_2d_double_array(nprevious+nnew*particles_per_insertion(),5,"fix_pour:xnear"); int nnear = nprevious; // setup for allgatherv int n = 5*ncount; MPI_Allgather(&n,1,MPI_INT,recvcounts,1,MPI_INT,world); displs[0] = 0; for (int iproc = 1; iproc < nprocs; iproc++) displs[iproc] = displs[iproc-1] + recvcounts[iproc-1]; // load up xmine array double **x = atom->x; double *radius = atom->radius; ncount = 0; for (i = 0; i < atom->nlocal; i++) if (overlap(i)) { xmine[ncount][0] = x[i][0]; xmine[ncount][1] = x[i][1]; xmine[ncount][2] = x[i][2]; xmine[ncount][3] = radius[i]; ncount++; } // perform allgatherv to acquire list of nearby particles on all procs double *ptr = NULL; if (ncount) ptr = xmine[0]; MPI_Allgatherv(ptr,5*ncount,MPI_DOUBLE, xnear[0],recvcounts,displs,MPI_DOUBLE,world); // insert new atoms into xnear list, one by one // check against all nearby atoms and previously inserted ones // if there is an overlap then try again at other z (3d) or y (2d) coord // else insert by adding to xnear list // max = maximum # of insertion attempts for all particles // h = height, biased to give uniform distribution in time of insertion int success; double coord[3],radtmp,rn,h; int attempt = 0; int max = nnew * maxattempt; int ntotal = nprevious+nnew; while (nnear < ntotal) { radtmp = rand_pour(radius_lo,radius_hi,radius_ran_style); success = 0; while (attempt < max) { rn = random->uniform(); h = (hi_current-shift_randompos(radtmp)) - rn * (hi_current-lo_current-2.*shift_randompos(radtmp)); attempt++; xyz_random(h,coord,radtmp); for (i = 0; i < nnear; i++) { if(overlaps_xnear_i(coord,radtmp,xnear,i)) break; } if (i == nnear) { success = 1; break; } } if (success) { nnear=insert_in_xnear(xnear,nnear,coord,radtmp); } else break; } // warn if not all insertions were performed ninserted += nnear-nprevious; if (nnear - nprevious < nnew && me == 0) error->warning("Less insertions than requested"); // check if new atom is in my sub-box or above it if I'm highest proc // if so, add to my list via create_atom() // initialize info about the atom // type, diameter, density set from fix parameters // group mask set to "all" plus fix group // z velocity set to what velocity would be if particle // had fallen from top of insertion region // this gives continuous stream of atoms // set npartner for new atom to 0 (assume not touching any others) AtomVec *avec = atom->avec; int j,m,flag; double denstmp,vxtmp,vytmp,vztmp; double g = grav; //double g = 1.; //originally double *sublo = domain->sublo; double *subhi = domain->subhi; int b_id; int nfix = modify->nfix; Fix **fix = modify->fix; for (i = nprevious; i < nnear; i++) { coord[0] = xnear[i][0]; coord[1] = xnear[i][1]; coord[2] = xnear[i][2]; radtmp = xnear[i][3]; b_id = static_cast<int>(xnear[i][4]); denstmp = rand_pour(density_lo,density_hi,density_ran_style)*density_scaling(); calc_insert_velocities(i,g,xnear,vxtmp,vytmp,vztmp); flag = 0; if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) flag = 1; else if (domain->dimension == 3 && coord[2] >= domain->boxhi[2] && comm->myloc[2] == comm->procgrid[2]-1 && coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1]) flag = 1; else if (domain->dimension == 2 && coord[1] >= domain->boxhi[1] && comm->myloc[1] == comm->procgrid[1]-1 && coord[0] >= sublo[0] && coord[0] < subhi[0]) flag = 1; if (flag) { avec->create_atom(ntype,coord); m = atom->nlocal - 1; atom->type[m] = ntype; atom->radius[m] = radtmp; atom->density[m] = denstmp; atom->rmass[m] = 4.0*PI/3.0 * radtmp*radtmp*radtmp * denstmp; atom->mask[m] = 1 | groupbit; atom->v[m][0] = vxtmp; atom->v[m][1] = vytmp; atom->v[m][2] = vztmp; for (j = 0; j < nfix; j++) if (fix[j]->create_attribute) fix[j]->set_arrays(m); set_body_props(i,m,coord,denstmp,radtmp,vxtmp,vytmp,vztmp,b_id); } } // set tag # of new particles beyond all previous atoms // reset global natoms // if global map exists, reset it now instead of waiting for comm // since deleting atoms messes up ghosts if (atom->tag_enable) { atom->tag_extend(); atom->natoms += nnear - nprevious; if (atom->map_style) { atom->nghost = 0; atom->map_init(); atom->map_set(); } } finalize_insertion(); // free local memory memory->destroy_2d_double_array(xmine); memory->destroy_2d_double_array(xnear); // next timestep to insert if (ninserted < ninsert) next_reneighbor += nfreq; else next_reneighbor = 0; }
void WriteRestart::write(char *file) { // special case where reneighboring is not done in integrator // on timestep restart file is written (due to build_once being set) // if box is changing, must be reset, else restart file will have // wrong box size and atoms will be lost when restart file is read // other calls to pbc and domain and comm are not made, // b/c they only make sense if reneighboring is actually performed if (neighbor->build_once) domain->reset_box(); // natoms = sum of nlocal = value to write into restart file // if unequal and thermo lostflag is "error", don't write restart file bigint nblocal = atom->nlocal; if(region) { nblocal = 0.; for (int i = 0; i < atom->nlocal; i++) if(region->match(atom->x[i][0],atom->x[i][1],atom->x[i][2])) nblocal += 1; } MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (natoms != atom->natoms && output->thermo->lostflag == ERROR && !region) error->all(FLERR,"Atom count is inconsistent, cannot write restart file"); // check if filename contains "%" int multiproc; if (strchr(file,'%')) multiproc = 1; else multiproc = 0; // open single restart file or base file for multiproc case if (me == 0) { char *hfile; if (multiproc) { hfile = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); *ptr = '\0'; sprintf(hfile,"%s%s%s",file,"base",ptr+1); *ptr = '%'; } else hfile = file; fp = fopen(hfile,"wb"); if (fp == NULL) { char str[512]; sprintf(str,"Cannot open restart file %s",hfile); error->one(FLERR,str); } if (multiproc) delete [] hfile; } // proc 0 writes header, groups, ntype-length arrays, force field // all procs write fix info if (me == 0) { header(); group->write_restart(fp); type_arrays(); force_fields(); } modify->write_restart(fp); // communication buffer for all my atom's info // max_size = largest buffer needed by any proc int max_size; int send_size = atom->avec->size_restart(); MPI_Allreduce(&send_size,&max_size,1,MPI_INT,MPI_MAX,world); double *buf; if (me == 0) memory->create(buf,max_size,"write_restart:buf"); else memory->create(buf,send_size,"write_restart:buf"); // pack my atom data into buf AtomVec *avec = atom->avec; int n = 0; if(!region) { for (int i = 0; i < atom->nlocal; i++) n += avec->pack_restart(i,&buf[n]); } else { for (int i = 0; i < atom->nlocal; i++) if(region->match(atom->x[i][0],atom->x[i][1],atom->x[i][2])) n += avec->pack_restart(i,&buf[n]); send_size = n; } // if any fix requires it, remap each atom's coords via PBC // is because fix changes atom coords (excepting an integrate fix) // just remap in buffer, not actual atoms if (modify->restart_pbc_any) { int triclinic = domain->triclinic; double *lo,*hi,*period; if (triclinic == 0) { lo = domain->boxlo; hi = domain->boxhi; period = domain->prd; } else { lo = domain->boxlo_lamda; hi = domain->boxhi_lamda; period = domain->prd_lamda; } int xperiodic = domain->xperiodic; int yperiodic = domain->yperiodic; int zperiodic = domain->zperiodic; double *x; if(region) error->all(FLERR,"have to implement more here"); int m = 0; for (int i = 0; i < atom->nlocal; i++) { x = &buf[m+1]; if (triclinic) domain->x2lamda(x,x); if (xperiodic) { if (x[0] < lo[0]) x[0] += period[0]; if (x[0] >= hi[0]) x[0] -= period[0]; x[0] = MAX(x[0],lo[0]); } if (yperiodic) { if (x[1] < lo[1]) x[1] += period[1]; if (x[1] >= hi[1]) x[1] -= period[1]; x[1] = MAX(x[1],lo[1]); } if (zperiodic) { if (x[2] < lo[2]) x[2] += period[2]; if (x[2] >= hi[2]) x[2] -= period[2]; x[2] = MAX(x[2],lo[2]); } if (triclinic) domain->lamda2x(x,x); m += static_cast<int> (buf[m]); } } // if single file: // write one chunk of atoms per proc to file // proc 0 pings each proc, receives its chunk, writes to file // all other procs wait for ping, send their chunk to proc 0 // else if one file per proc: // each proc opens its own file and writes its chunk directly if (multiproc == 0) { int tmp,recv_size; MPI_Status status; MPI_Request request; if (me == 0) { for (int iproc = 0; iproc < nprocs; iproc++) { if (iproc) { MPI_Irecv(buf,max_size,MPI_DOUBLE,iproc,0,world,&request); MPI_Send(&tmp,0,MPI_INT,iproc,0,world); MPI_Wait(&request,&status); MPI_Get_count(&status,MPI_DOUBLE,&recv_size); } else recv_size = send_size; fwrite(&recv_size,sizeof(int),1,fp); fwrite(buf,sizeof(double),recv_size,fp); } fclose(fp); } else { MPI_Recv(&tmp,0,MPI_INT,0,0,world,&status); MPI_Rsend(buf,send_size,MPI_DOUBLE,0,0,world); } } else { if (me == 0) fclose(fp); char *perproc = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); *ptr = '\0'; sprintf(perproc,"%s%d%s",file,me,ptr+1); *ptr = '%'; fp = fopen(perproc,"wb"); if (fp == NULL) { char str[512]; sprintf(str,"Cannot open restart file %s",perproc); error->one(FLERR,str); } delete [] perproc; fwrite(&send_size,sizeof(int),1,fp); fwrite(buf,sizeof(double),send_size,fp); fclose(fp); } memory->destroy(buf); // invoke any fixes that write their own restart file for (int ifix = 0; ifix < modify->nfix; ifix++) if (modify->fix[ifix]->restart_file) modify->fix[ifix]->write_restart_file(file); }
void DeleteAtoms::command(int narg, char **arg) { if (domain->box_exist == 0) error->all(FLERR,"Delete_atoms command before simulation box is defined"); if (narg < 1) error->all(FLERR,"Illegal delete_atoms command"); if (atom->tag_enable == 0) error->all(FLERR,"Cannot use delete_atoms unless atoms have IDs"); // store state before delete bigint natoms_previous = atom->natoms; if(modify->n_fixes_style_strict("contacthistory") > 0) modify->find_fix_style_strict("contacthistory",0)->pre_exchange(); if(modify->n_fixes_style_strict("bond/propagate/gran") > 0) modify->find_fix_style_strict("bond/propagate/gran",0)->pre_exchange(); // delete the atoms if (strcmp(arg[0],"group") == 0) delete_group(narg,arg); else if (strcmp(arg[0],"region") == 0) delete_region(narg,arg); else if (strcmp(arg[0],"overlap") == 0) delete_overlap(narg,arg); else if (strcmp(arg[0],"porosity") == 0) delete_porosity(narg,arg); else error->all(FLERR,"Illegal delete_atoms command"); // delete local atoms flagged in dlist // reset nlocal AtomVec *avec = atom->avec; int nlocal = atom->nlocal; int i = 0; while (i < nlocal) { if (dlist[i]) { avec->copy(nlocal-1,i,1); dlist[i] = dlist[nlocal-1]; nlocal--; } else i++; } atom->nlocal = nlocal; memory->destroy(dlist); // if non-molecular system and compress flag set, // reset atom tags to be contiguous // set all atom IDs to 0, call tag_extend() if (atom->molecular == 0 && compress_flag) { int *tag = atom->tag; for (i = 0; i < nlocal; i++) tag[i] = 0; atom->tag_extend(); } // reset atom->natoms // reset atom->map if it exists // set nghost to 0 so old ghosts of deleted atoms won't be mapped bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&atom->natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (atom->map_style) { atom->nghost = 0; atom->map_init(); atom->map_set(); } // print before and after atom count bigint ndelete = natoms_previous - atom->natoms; if (comm->me == 0) { if (screen) fprintf(screen,"Deleted " BIGINT_FORMAT " atoms, new total = " BIGINT_FORMAT "\n", ndelete,atom->natoms); if (logfile) fprintf(logfile,"Deleted " BIGINT_FORMAT " atoms, new total = " BIGINT_FORMAT "\n", ndelete,atom->natoms); } }
void DeleteAtoms::command(int narg, char **arg) { if (domain->box_exist == 0) error->all(FLERR,"Delete_atoms command before simulation box is defined"); if (narg < 1) error->all(FLERR,"Illegal delete_atoms command"); if (atom->tag_enable == 0) error->all(FLERR,"Cannot use delete_atoms unless atoms have IDs"); // store state before delete bigint natoms_previous = atom->natoms; bigint nbonds_previous = atom->nbonds; bigint nangles_previous = atom->nangles; bigint ndihedrals_previous = atom->ndihedrals; bigint nimpropers_previous = atom->nimpropers; // delete the atoms if (strcmp(arg[0],"group") == 0) delete_group(narg,arg); else if (strcmp(arg[0],"region") == 0) delete_region(narg,arg); else if (strcmp(arg[0],"overlap") == 0) delete_overlap(narg,arg); else if (strcmp(arg[0],"porosity") == 0) delete_porosity(narg,arg); else error->all(FLERR,"Illegal delete_atoms command"); // optionally delete additional bonds or atoms in molecules if (bond_flag) delete_bond(); if (mol_flag) delete_molecule(); // delete local atoms flagged in dlist // reset nlocal AtomVec *avec = atom->avec; int nlocal = atom->nlocal; int i = 0; while (i < nlocal) { if (dlist[i]) { avec->copy(nlocal-1,i,1); dlist[i] = dlist[nlocal-1]; nlocal--; } else i++; } atom->nlocal = nlocal; memory->destroy(dlist); // if non-molecular system and compress flag set, // reset atom tags to be contiguous // set all atom IDs to 0, call tag_extend() if (atom->molecular == 0 && compress_flag) { tagint *tag = atom->tag; for (i = 0; i < nlocal; i++) tag[i] = 0; atom->tag_extend(); } // reset atom->natoms and also topology counts // reset atom->map if it exists // set nghost to 0 so old ghosts of deleted atoms won't be mapped bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&atom->natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (atom->map_style) { atom->nghost = 0; atom->map_init(); atom->map_set(); } recount_topology(); // print before and after atom and topology counts bigint ndelete = natoms_previous - atom->natoms; bigint ndelete_bonds = nbonds_previous - atom->nbonds; bigint ndelete_angles = nangles_previous - atom->nangles; bigint ndelete_dihedrals = ndihedrals_previous - atom->ndihedrals; bigint ndelete_impropers = nimpropers_previous - atom->nimpropers; if (comm->me == 0) { if (screen) { fprintf(screen,"Deleted " BIGINT_FORMAT " atoms, new total = " BIGINT_FORMAT "\n", ndelete,atom->natoms); if (bond_flag || mol_flag) { if (nbonds_previous) fprintf(screen,"Deleted " BIGINT_FORMAT " bonds, new total = " BIGINT_FORMAT "\n", ndelete_bonds,atom->nbonds); if (nangles_previous) fprintf(screen,"Deleted " BIGINT_FORMAT " angles, new total = " BIGINT_FORMAT "\n", ndelete_angles,atom->nangles); if (ndihedrals_previous) fprintf(screen,"Deleted " BIGINT_FORMAT " dihedrals, new total = " BIGINT_FORMAT "\n", ndelete_dihedrals,atom->ndihedrals); if (nimpropers_previous) fprintf(screen,"Deleted " BIGINT_FORMAT " impropers, new total = " BIGINT_FORMAT "\n", ndelete_impropers,atom->nimpropers); } } if (logfile) { fprintf(logfile,"Deleted " BIGINT_FORMAT " atoms, new total = " BIGINT_FORMAT "\n", ndelete,atom->natoms); if (bond_flag || mol_flag) { if (nbonds_previous) fprintf(logfile,"Deleted " BIGINT_FORMAT " bonds, new total = " BIGINT_FORMAT "\n", ndelete_bonds,atom->nbonds); if (nangles_previous) fprintf(logfile,"Deleted " BIGINT_FORMAT " angles, new total = " BIGINT_FORMAT "\n", ndelete_angles,atom->nangles); if (ndihedrals_previous) fprintf(logfile,"Deleted " BIGINT_FORMAT " dihedrals, new total = " BIGINT_FORMAT "\n", ndelete_dihedrals,atom->ndihedrals); if (nimpropers_previous) fprintf(logfile,"Deleted " BIGINT_FORMAT " impropers, new total = " BIGINT_FORMAT "\n", ndelete_impropers,atom->nimpropers); } } } }
void FixSRP::post_run() { // all bond particles are removed after each run // useful for write_data and write_restart commands // since those commands occur between runs bigint natoms_previous = atom->natoms; int nlocal = atom->nlocal; int* dlist; memory->create(dlist,nlocal,"fix_srp:dlist"); for (int i = 0; i < nlocal; i++){ if(atom->type[i] == bptype) dlist[i] = 1; else dlist[i] = 0; } // delete local atoms flagged in dlist // reset nlocal AtomVec *avec = atom->avec; int i = 0; while (i < nlocal) { if (dlist[i]) { avec->copy(nlocal-1,i,1); dlist[i] = dlist[nlocal-1]; nlocal--; } else i++; } atom->nlocal = nlocal; memory->destroy(dlist); // reset atom->natoms // reset atom->map if it exists // set nghost to 0 so old ghosts won't be mapped bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&atom->natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (atom->map_style) { atom->nghost = 0; atom->map_init(); atom->map_set(); } // print before and after atom count bigint ndelete = natoms_previous - atom->natoms; if (comm->me == 0) { if (screen) fprintf(screen,"Deleted " BIGINT_FORMAT " atoms, new total = " BIGINT_FORMAT "\n", ndelete,atom->natoms); if (logfile) fprintf(logfile,"Deleted " BIGINT_FORMAT " atoms, new total = " BIGINT_FORMAT "\n", ndelete,atom->natoms); } // verlet calls box_too_small_check() in post_run // this check maps all bond partners // therefore need ghosts // need to convert to lambda coords before apply pbc if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); comm->setup(); comm->exchange(); if (atom->sortfreq > 0) atom->sort(); comm->borders(); // change back to box coordinates if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost); }
void CommCuda::forward_comm_pack_cuda() { static int count=0; static double kerneltime=0.0; static double copytime=0.0; timespec time1,time2,time3; int n; // initialize comm buffers & exchange memory MPI_Request request; MPI_Status status; AtomVec *avec = atom->avec; double **x = atom->x; cuda->shared_data.domain.xy=domain->xy; cuda->shared_data.domain.xz=domain->xz; cuda->shared_data.domain.yz=domain->yz; cuda->shared_data.domain.prd[0]=domain->prd[0]; cuda->shared_data.domain.prd[1]=domain->prd[1]; cuda->shared_data.domain.prd[2]=domain->prd[2]; cuda->shared_data.domain.triclinic=domain->triclinic; if(not comm_x_only && not avec->cudable) cuda->downloadAll(); //if not comm_x_only the communication routine of the atom_vec style class is used // exchange data with another proc // if other proc is self, just copy // if comm_x_only set, exchange or copy directly to x, don't unpack for (int iswap = 0; iswap < nswap; iswap++) { if (sendproc[iswap] != me) { if (comm_x_only) { clock_gettime(CLOCK_REALTIME,&time1); // n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*) cuda->shared_data.comm.buf_send[iswap],pbc[iswap],pbc_flag[iswap]); n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*)buf_send,pbc[iswap],pbc_flag[iswap]); clock_gettime(CLOCK_REALTIME,&time2); if((sizeof(X_FLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used n=(n+1)*sizeof(X_FLOAT)/sizeof(double); cuda->shared_data.comm.send_size[iswap]=n; } else if (ghost_velocity) { clock_gettime(CLOCK_REALTIME,&time1); // n = Cuda_CommCuda_PackComm_Vel(&cuda->shared_data,sendnum[iswap],iswap,(void*) &buf_send[iswap*maxsend],pbc[iswap],pbc_flag[iswap]); clock_gettime(CLOCK_REALTIME,&time2); if((sizeof(X_FLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used n=(n+1)*sizeof(X_FLOAT)/sizeof(double); cuda->shared_data.comm.send_size[iswap]=n; } else { MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); if(avec->cudable) n = avec->pack_comm(sendnum[iswap],&iswap, cuda->shared_data.comm.buf_send[iswap],pbc_flag[iswap],pbc[iswap]); else n = avec->pack_comm(sendnum[iswap],sendlist[iswap], cuda->shared_data.comm.buf_send[iswap],pbc_flag[iswap],pbc[iswap]); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); MPI_Wait(&request,&status); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); } } else //sendproc == me { if (comm_x_only) { if (sendnum[iswap]) { n = Cuda_CommCuda_PackComm_Self(&cuda->shared_data,sendnum[iswap],iswap,firstrecv[iswap],pbc[iswap],pbc_flag[iswap]); if(n<0) error->all(FLERR," # CUDA ERRROR on PackComm_Self"); if((sizeof(X_FLOAT)!=sizeof(double)) && n) n=(n+1)*sizeof(X_FLOAT)/sizeof(double); } } else if (ghost_velocity) { n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send); } else { n = avec->pack_comm(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); } } } if(not comm_x_only && not avec->cudable) cuda->uploadAll(); }
void FixPour::pre_exchange() { int i; // just return if should not be called on this timestep if (next_reneighbor != update->ntimestep) return; // nnew = # to insert this timestep int nnew = nper; if (ninserted + nnew > ninsert) nnew = ninsert - ninserted; // lo/hi current = z (or y) bounds of insertion region this timestep if (domain->dimension == 3) { lo_current = zlo + (update->ntimestep - nfirst) * update->dt * rate; hi_current = zhi + (update->ntimestep - nfirst) * update->dt * rate; } else { lo_current = ylo + (update->ntimestep - nfirst) * update->dt * rate; hi_current = yhi + (update->ntimestep - nfirst) * update->dt * rate; } // ncount = # of my atoms that overlap the insertion region // nprevious = total of ncount across all procs int ncount = 0; for (i = 0; i < atom->nlocal; i++) if (overlap(i)) ncount++; int nprevious; MPI_Allreduce(&ncount,&nprevious,1,MPI_INT,MPI_SUM,world); // xmine is for my atoms // xnear is for atoms from all procs + atoms to be inserted double **xmine = memory->create_2d_double_array(ncount,4,"fix_pour:xmine"); double **xnear = memory->create_2d_double_array(nprevious+nnew,4,"fix_pour:xnear"); int nnear = nprevious; // setup for allgatherv int n = 4*ncount; MPI_Allgather(&n,1,MPI_INT,recvcounts,1,MPI_INT,world); displs[0] = 0; for (int iproc = 1; iproc < nprocs; iproc++) displs[iproc] = displs[iproc-1] + recvcounts[iproc-1]; // load up xmine array double **x = atom->x; double *radius = atom->radius; ncount = 0; for (i = 0; i < atom->nlocal; i++) if (overlap(i)) { xmine[ncount][0] = x[i][0]; xmine[ncount][1] = x[i][1]; xmine[ncount][2] = x[i][2]; xmine[ncount][3] = radius[i]; ncount++; } // perform allgatherv to acquire list of nearby particles on all procs double *ptr = NULL; if (ncount) ptr = xmine[0]; MPI_Allgatherv(ptr,4*ncount,MPI_DOUBLE, xnear[0],recvcounts,displs,MPI_DOUBLE,world); // insert new atoms into xnear list, one by one // check against all nearby atoms and previously inserted ones // if there is an overlap then try again at same z (3d) or y (2d) coord // else insert by adding to xnear list // max = maximum # of insertion attempts for all particles // h = height, biased to give uniform distribution in time of insertion int success; double coord[3],radtmp,delx,dely,delz,rsq,radsum,rn,h; int attempt = 0; int max = nnew * maxattempt; int ntotal = nprevious+nnew; while (nnear < ntotal) { rn = random->uniform(); h = hi_current - rn*rn * (hi_current-lo_current); radtmp = radius_lo + random->uniform() * (radius_hi-radius_lo); success = 0; while (attempt < max) { attempt++; xyz_random(h,coord); for (i = 0; i < nnear; i++) { delx = coord[0] - xnear[i][0]; dely = coord[1] - xnear[i][1]; delz = coord[2] - xnear[i][2]; rsq = delx*delx + dely*dely + delz*delz; radsum = radtmp + xnear[i][3]; if (rsq <= radsum*radsum) break; } if (i == nnear) { success = 1; break; } } if (success) { xnear[nnear][0] = coord[0]; xnear[nnear][1] = coord[1]; xnear[nnear][2] = coord[2]; xnear[nnear][3] = radtmp; nnear++; } else break; } // warn if not all insertions were performed ninserted += nnear-nprevious; if (nnear - nprevious < nnew && me == 0) error->warning("Less insertions than requested"); // check if new atom is in my sub-box or above it if I'm highest proc // if so, add to my list via create_atom() // initialize info about the atom // type, diameter, density set from fix parameters // group mask set to "all" plus fix group // z velocity set to what velocity would be if particle // had fallen from top of insertion region // this gives continuous stream of atoms // set npartner for new atom to 0 (assume not touching any others) AtomVec *avec = atom->avec; int m,flag; double denstmp,vxtmp,vytmp,vztmp; double g = 1.0; double *sublo = domain->sublo; double *subhi = domain->subhi; for (i = nprevious; i < nnear; i++) { coord[0] = xnear[i][0]; coord[1] = xnear[i][1]; coord[2] = xnear[i][2]; radtmp = xnear[i][3]; denstmp = density_lo + random->uniform() * (density_hi-density_lo); if (domain->dimension == 3) { vxtmp = vxlo + random->uniform() * (vxhi-vxlo); vytmp = vylo + random->uniform() * (vyhi-vylo); vztmp = vz - sqrt(2.0*g*(hi_current-coord[2])); } else { vxtmp = vxlo + random->uniform() * (vxhi-vxlo); vytmp = vy - sqrt(2.0*g*(hi_current-coord[1])); vztmp = 0.0; } flag = 0; if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) flag = 1; else if (domain->dimension == 3 && coord[2] >= domain->boxhi[2] && comm->myloc[2] == comm->procgrid[2]-1 && coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1]) flag = 1; else if (domain->dimension == 2 && coord[1] >= domain->boxhi[1] && comm->myloc[1] == comm->procgrid[1]-1 && coord[0] >= sublo[0] && coord[0] < subhi[0]) flag = 1; if (flag) { avec->create_atom(ntype,coord); m = atom->nlocal - 1; atom->type[m] = ntype; atom->radius[m] = radtmp; atom->density[m] = denstmp; if (domain->dimension == 3) atom->rmass[m] = 4.0*PI/3.0 * radtmp*radtmp*radtmp * denstmp; else atom->rmass[m] = PI * radtmp*radtmp * denstmp; atom->mask[m] = 1 | groupbit; atom->v[m][0] = vxtmp; atom->v[m][1] = vytmp; atom->v[m][2] = vztmp; if (fix_history) fix_history->npartner[m] = 0; } } // set tag # of new particles beyond all previous atoms // reset global natoms // if global map exists, reset it if (atom->tag_enable) { atom->tag_extend(); atom->natoms += nnear - nprevious; if (atom->map_style) { atom->map_init(); atom->map_set(); } } // free local memory memory->destroy_2d_double_array(xmine); memory->destroy_2d_double_array(xnear); // next timestep to insert if (ninserted < ninsert) next_reneighbor += nfreq; else next_reneighbor = 0; }
void Replicate::command(int narg, char **arg) { int i,j,m,n; if (domain->box_exist == 0) error->all("Replicate command before simulation box is defined"); if (narg != 3) error->all("Illegal replicate command"); int me = comm->me; int nprocs = comm->nprocs; if (me == 0 && screen) fprintf(screen,"Replicating atoms ...\n"); // nrep = total # of replications int nx = atoi(arg[0]); int ny = atoi(arg[1]); int nz = atoi(arg[2]); int nrep = nx*ny*nz; // error and warning checks if (nx <= 0 || ny <= 0 || nz <= 0) error->all("Illegal replicate command"); if (domain->dimension == 2 && nz != 1) error->all("Cannot replicate 2d simulation in z dimension"); if ((nx > 1 && domain->xperiodic == 0) || (ny > 1 && domain->yperiodic == 0) || (nz > 1 && domain->zperiodic == 0)) error->warning("Replicating in a non-periodic dimension"); if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store) error->all("Cannot replicate with fixes that store atom quantities"); // maxtag = largest atom tag across all existing atoms int maxtag = 0; for (i = 0; i < atom->nlocal; i++) maxtag = MAX(atom->tag[i],maxtag); int maxtag_all; MPI_Allreduce(&maxtag,&maxtag_all,1,MPI_INT,MPI_MAX,world); maxtag = maxtag_all; // maxmol = largest molecule tag across all existing atoms int maxmol = 0; if (atom->molecular) { for (i = 0; i < atom->nlocal; i++) maxmol = MAX(atom->molecule[i],maxmol); int maxmol_all; MPI_Allreduce(&maxmol,&maxmol_all,1,MPI_INT,MPI_MAX,world); maxmol = maxmol_all; } // unmap existing atoms via image flags for (i = 0; i < atom->nlocal; i++) domain->unmap(atom->x[i],atom->image[i]); // communication buffer for all my atom's info // max_size = largest buffer needed by any proc // must do before new Atom class created, // since size_restart() uses atom->nlocal int max_size; int send_size = atom->avec->size_restart(); MPI_Allreduce(&send_size,&max_size,1,MPI_INT,MPI_MAX,world); double *buf = (double *) memory->smalloc(max_size*sizeof(double),"replicate:buf"); // old = original atom class // atom = new replicated atom class // if old atom style was hybrid, pass sub-style names to create_avec Atom *old = atom; atom = new Atom(lmp); int nstyles = 0; char **keywords = NULL; if (strcmp(old->atom_style,"hybrid") == 0) { AtomVecHybrid *avec_hybrid = (AtomVecHybrid *) old->avec; nstyles = avec_hybrid->nstyles; keywords = avec_hybrid->keywords; } atom->create_avec(old->atom_style,nstyles,keywords); // check that new problem size will not be too large // if N > 2^31, turn off tags // if molecular, N/Nbonds/etc cannot be > 2^31 else tags/counts invalid double rep = nrep; if (rep*old->natoms > MAXATOMS) atom->tag_enable = 0; if (atom->molecular) { if (rep*old->natoms > MAXATOMS || rep*old->nbonds > MAXATOMS || rep*old->nangles > MAXATOMS || rep*old->ndihedrals > MAXATOMS || rep*old->nimpropers > MAXATOMS) error->all("Too big a problem to replicate with molecular atom style"); } // assign atom and topology counts in new class from old one atom->natoms = old->natoms * nrep; atom->nbonds = old->nbonds * nrep; atom->nangles = old->nangles * nrep; atom->ndihedrals = old->ndihedrals * nrep; atom->nimpropers = old->nimpropers * nrep; atom->ntypes = old->ntypes; atom->nbondtypes = old->nbondtypes; atom->nangletypes = old->nangletypes; atom->ndihedraltypes = old->ndihedraltypes; atom->nimpropertypes = old->nimpropertypes; atom->bond_per_atom = old->bond_per_atom; atom->angle_per_atom = old->angle_per_atom; atom->dihedral_per_atom = old->dihedral_per_atom; atom->improper_per_atom = old->improper_per_atom; // store old simulation box int triclinic = domain->triclinic; double old_xprd = domain->xprd; double old_yprd = domain->yprd; double old_zprd = domain->zprd; double old_xy = domain->xy; double old_xz = domain->xz; double old_yz = domain->yz; // setup new simulation box domain->boxhi[0] = domain->boxlo[0] + nx*old_xprd; domain->boxhi[1] = domain->boxlo[1] + ny*old_yprd; domain->boxhi[2] = domain->boxlo[2] + nz*old_zprd; if (triclinic) { domain->xy *= ny; domain->xz *= nz; domain->yz *= nz; } // new problem setup using new box boundaries if (nprocs == 1) n = static_cast<int> (atom->natoms); else n = static_cast<int> (LB_FACTOR * atom->natoms / nprocs); atom->allocate_type_arrays(); atom->avec->grow(n); n = atom->nmax; domain->print_box(" "); domain->set_initial_box(); domain->set_global_box(); comm->set_procs(); domain->set_local_box(); // copy type arrays to new atom class if (atom->mass) { for (int itype = 1; itype <= atom->ntypes; itype++) { atom->mass_setflag[itype] = old->mass_setflag[itype]; if (atom->mass_setflag[itype]) atom->mass[itype] = old->mass[itype]; } } if (atom->dipole) { for (int itype = 1; itype <= atom->ntypes; itype++) { atom->dipole_setflag[itype] = old->dipole_setflag[itype]; if (atom->dipole_setflag[itype]) atom->dipole[itype] = old->dipole[itype]; } } // set bounds for my proc // if periodic and I am lo/hi proc, adjust bounds by EPSILON // insures all replicated atoms will be owned even with round-off double sublo[3],subhi[3]; if (triclinic == 0) { sublo[0] = domain->sublo[0]; subhi[0] = domain->subhi[0]; sublo[1] = domain->sublo[1]; subhi[1] = domain->subhi[1]; sublo[2] = domain->sublo[2]; subhi[2] = domain->subhi[2]; } else { sublo[0] = domain->sublo_lamda[0]; subhi[0] = domain->subhi_lamda[0]; sublo[1] = domain->sublo_lamda[1]; subhi[1] = domain->subhi_lamda[1]; sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2]; } if (domain->xperiodic) { if (comm->myloc[0] == 0) sublo[0] -= EPSILON; if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += EPSILON; } if (domain->yperiodic) { if (comm->myloc[1] == 0) sublo[1] -= EPSILON; if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += EPSILON; } if (domain->zperiodic) { if (comm->myloc[2] == 0) sublo[2] -= EPSILON; if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += EPSILON; } // loop over all procs // if this iteration of loop is me: // pack my unmapped atom data into buf // bcast it to all other procs // performs 3d replicate loop with while loop over atoms in buf // x = new replicated position, remapped into simulation box // unpack atom into new atom class from buf if I own it // adjust tag, mol #, coord, topology info as needed AtomVec *old_avec = old->avec; AtomVec *avec = atom->avec; int ix,iy,iz,image,atom_offset,mol_offset; double x[3],lamda[3]; double *coord; int tag_enable = atom->tag_enable; for (int iproc = 0; iproc < nprocs; iproc++) { if (me == iproc) { n = 0; for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]); } MPI_Bcast(&n,1,MPI_INT,iproc,world); MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world); for (ix = 0; ix < nx; ix++) { for (iy = 0; iy < ny; iy++) { for (iz = 0; iz < nz; iz++) { // while loop over one proc's atom list m = 0; while (m < n) { image = (512 << 20) | (512 << 10) | 512; if (triclinic == 0) { x[0] = buf[m+1] + ix*old_xprd; x[1] = buf[m+2] + iy*old_yprd; x[2] = buf[m+3] + iz*old_zprd; } else { x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz; x[1] = buf[m+2] + iy*old_yprd + iz*old_yz; x[2] = buf[m+3] + iz*old_zprd; } domain->remap(x,image); if (triclinic) { domain->x2lamda(x,lamda); coord = lamda; } else coord = x; if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) { m += avec->unpack_restart(&buf[m]); i = atom->nlocal - 1; if (tag_enable) atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; else atom_offset = 0; mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; atom->x[i][0] = x[0]; atom->x[i][1] = x[1]; atom->x[i][2] = x[2]; atom->tag[i] += atom_offset; atom->image[i] = image; if (atom->molecular) { if (atom->molecule[i] > 0) atom->molecule[i] += mol_offset; if (atom->avec->bonds_allow) for (j = 0; j < atom->num_bond[i]; j++) atom->bond_atom[i][j] += atom_offset; if (atom->avec->angles_allow) for (j = 0; j < atom->num_angle[i]; j++) { atom->angle_atom1[i][j] += atom_offset; atom->angle_atom2[i][j] += atom_offset; atom->angle_atom3[i][j] += atom_offset; } if (atom->avec->dihedrals_allow) for (j = 0; j < atom->num_dihedral[i]; j++) { atom->dihedral_atom1[i][j] += atom_offset; atom->dihedral_atom2[i][j] += atom_offset; atom->dihedral_atom3[i][j] += atom_offset; atom->dihedral_atom4[i][j] += atom_offset; } if (atom->avec->impropers_allow) for (j = 0; j < atom->num_improper[i]; j++) { atom->improper_atom1[i][j] += atom_offset; atom->improper_atom2[i][j] += atom_offset; atom->improper_atom3[i][j] += atom_offset; atom->improper_atom4[i][j] += atom_offset; } } } else m += static_cast<int> (buf[m]); } } } } } // end of proc loop // free communication buffer and old atom class memory->sfree(buf); delete old; // check that all atoms were assigned to procs double natoms; double rlocal = atom->nlocal; MPI_Allreduce(&rlocal,&natoms,1,MPI_DOUBLE,MPI_SUM,world); if (me == 0) { if (screen) fprintf(screen," %.15g atoms\n",natoms); if (logfile) fprintf(logfile," %.15g atoms\n",natoms); } if (natoms != atom->natoms) error->all("Replicate did not assign all atoms correctly"); if (me == 0) { if (atom->nbonds) { if (screen) fprintf(screen," %d bonds\n",atom->nbonds); if (logfile) fprintf(logfile," %d bonds\n",atom->nbonds); } if (atom->nangles) { if (screen) fprintf(screen," %d angles\n",atom->nangles); if (logfile) fprintf(logfile," %d angles\n",atom->nangles); } if (atom->ndihedrals) { if (screen) fprintf(screen," %d dihedrals\n",atom->ndihedrals); if (logfile) fprintf(logfile," %d dihedrals\n",atom->ndihedrals); } if (atom->nimpropers) { if (screen) fprintf(screen," %d impropers\n",atom->nimpropers); if (logfile) fprintf(logfile," %d impropers\n",atom->nimpropers); } } // create global mapping and bond topology now that system is defined if (atom->map_style) { atom->map_init(); atom->map_set(); } if (atom->molecular) { Special special(lmp); special.build(); } }
void CommBrick::borders() { int i,n,itype,iswap,dim,ineed,twoneed; int nsend,nrecv,sendflag,nfirst,nlast,ngroup; double lo,hi; int *type; double **x; double *buf,*mlo,*mhi; MPI_Request request; AtomVec *avec = atom->avec; // do swaps over all 3 dimensions iswap = 0; smax = rmax = 0; for (dim = 0; dim < 3; dim++) { nlast = 0; twoneed = 2*maxneed[dim]; for (ineed = 0; ineed < twoneed; ineed++) { // find atoms within slab boundaries lo/hi using <= and >= // check atoms between nfirst and nlast // for first swaps in a dim, check owned and ghost // for later swaps in a dim, only check newly arrived ghosts // store sent atom indices in sendlist for use in future timesteps x = atom->x; if (mode == SINGLE) { lo = slablo[iswap]; hi = slabhi[iswap]; } else { type = atom->type; mlo = multilo[iswap]; mhi = multihi[iswap]; } if (ineed % 2 == 0) { nfirst = nlast; nlast = atom->nlocal + atom->nghost; } nsend = 0; // sendflag = 0 if I do not send on this swap // sendneed test indicates receiver no longer requires data // e.g. due to non-PBC or non-uniform sub-domains if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0; else sendflag = 1; // find send atoms according to SINGLE vs MULTI // all atoms eligible versus only atoms in bordergroup // can only limit loop to bordergroup for first sends (ineed < 2) // on these sends, break loop in two: owned (in group) and ghost if (sendflag) { if (!bordergroup || ineed >= 2) { if (mode == SINGLE) { for (i = nfirst; i < nlast; i++) if (x[i][dim] >= lo && x[i][dim] <= hi) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } } else { for (i = nfirst; i < nlast; i++) { itype = type[i]; if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } } } } else { if (mode == SINGLE) { ngroup = atom->nfirst; for (i = 0; i < ngroup; i++) if (x[i][dim] >= lo && x[i][dim] <= hi) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } for (i = atom->nlocal; i < nlast; i++) if (x[i][dim] >= lo && x[i][dim] <= hi) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } } else { ngroup = atom->nfirst; for (i = 0; i < ngroup; i++) { itype = type[i]; if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } } for (i = atom->nlocal; i < nlast; i++) { itype = type[i]; if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } } } } } // pack up list of border atoms if (nsend*size_border > maxsend) grow_send(nsend*size_border,0); if (ghost_velocity) n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send, pbc_flag[iswap],pbc[iswap]); else n = avec->pack_border(nsend,sendlist[iswap],buf_send, pbc_flag[iswap],pbc[iswap]); // swap atoms with other proc // no MPI calls except SendRecv if nsend/nrecv = 0 // put incoming ghosts at end of my atom arrays // if swapping with self, simply copy, no messages if (sendproc[iswap] != me) { MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0, &nrecv,1,MPI_INT,recvproc[iswap],0,world,MPI_STATUS_IGNORE); if (nrecv*size_border > maxrecv) grow_recv(nrecv*size_border); if (nrecv) MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE, recvproc[iswap],0,world,&request); if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); if (nrecv) MPI_Wait(&request,MPI_STATUS_IGNORE); buf = buf_recv; } else { nrecv = nsend; buf = buf_send; } // unpack buffer if (ghost_velocity) avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf); else avec->unpack_border(nrecv,atom->nlocal+atom->nghost,buf); // set all pointers & counters smax = MAX(smax,nsend); rmax = MAX(rmax,nrecv); sendnum[iswap] = nsend; recvnum[iswap] = nrecv; size_forward_recv[iswap] = nrecv*size_forward; size_reverse_send[iswap] = nrecv*size_reverse; size_reverse_recv[iswap] = nsend*size_reverse; firstrecv[iswap] = atom->nlocal + atom->nghost; atom->nghost += nrecv; iswap++; } } // insure send/recv buffers are long enough for all forward & reverse comm int max = MAX(maxforward*smax,maxreverse*rmax); if (max > maxsend) grow_send(max,0); max = MAX(maxforward*rmax,maxreverse*smax); if (max > maxrecv) grow_recv(max); // reset global->local map if (map_style) atom->map_set(); }
void CommBrick::exchange() { int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal; double lo,hi,value; double **x; double *sublo,*subhi; MPI_Request request; AtomVec *avec = atom->avec; // clear global->local map for owned and ghost atoms // b/c atoms migrate to new procs in exchange() and // new ghosts are created in borders() // map_set() is done at end of borders() // clear ghost count and any ghost bonus data internal to AtomVec if (map_style) atom->map_clear(); atom->nghost = 0; atom->avec->clear_bonus(); // insure send buf is large enough for single atom // bufextra = max size of one atom = allowed overflow of sendbuf // fixes can change per-atom size requirement on-the-fly int bufextra_old = bufextra; maxexchange = maxexchange_atom + maxexchange_fix; bufextra = maxexchange + BUFEXTRA; if (bufextra > bufextra_old) memory->grow(buf_send,maxsend+bufextra,"comm:buf_send"); // subbox bounds for orthogonal or triclinic if (triclinic == 0) { sublo = domain->sublo; subhi = domain->subhi; } else { sublo = domain->sublo_lamda; subhi = domain->subhi_lamda; } // loop over dimensions int dimension = domain->dimension; for (int dim = 0; dim < dimension; dim++) { // fill buffer with atoms leaving my box, using < and >= // when atom is deleted, fill it in with last atom x = atom->x; lo = sublo[dim]; hi = subhi[dim]; nlocal = atom->nlocal; i = nsend = 0; while (i < nlocal) { if (x[i][dim] < lo || x[i][dim] >= hi) { if (nsend > maxsend) grow_send(nsend,1); nsend += avec->pack_exchange(i,&buf_send[nsend]); avec->copy(nlocal-1,i,1); nlocal--; } else i++; } atom->nlocal = nlocal; // send/recv atoms in both directions // send size of message first so receiver can realloc buf_recv if needed // if 1 proc in dimension, no send/recv // set nrecv = 0 so buf_send atoms will be lost // if 2 procs in dimension, single send/recv // if more than 2 procs in dimension, send/recv to both neighbors if (procgrid[dim] == 1) nrecv = 0; else { MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE); nrecv = nrecv1; if (procgrid[dim] > 2) { MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0, &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE); nrecv += nrecv2; } if (nrecv > maxrecv) grow_recv(nrecv); MPI_Irecv(buf_recv,nrecv1,MPI_DOUBLE,procneigh[dim][1],0, world,&request); MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][0],0,world); MPI_Wait(&request,MPI_STATUS_IGNORE); if (procgrid[dim] > 2) { MPI_Irecv(&buf_recv[nrecv1],nrecv2,MPI_DOUBLE,procneigh[dim][0],0, world,&request); MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][1],0,world); MPI_Wait(&request,MPI_STATUS_IGNORE); } } // check incoming atoms to see if they are in my box // if so, add to my list // box check is only for this dimension, // atom may be passed to another proc in later dims m = 0; while (m < nrecv) { value = buf_recv[m+dim+1]; if (value >= lo && value < hi) m += avec->unpack_exchange(&buf_recv[m]); else m += static_cast<int> (buf_recv[m]); } } if (atom->firstgroupname) atom->first_reorder(); }
void CommBrick::forward_comm(int dummy) { int n; MPI_Request request; AtomVec *avec = atom->avec; double **x = atom->x; double *buf; // exchange data with another proc // if other proc is self, just copy // if comm_x_only set, exchange or copy directly to x, don't unpack for (int iswap = 0; iswap < nswap; iswap++) { if (sendproc[iswap] != me) { if (comm_x_only) { if (size_forward_recv[iswap]) { if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]]; else buf = NULL; MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } n = avec->pack_comm(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); } else if (ghost_velocity) { if (size_forward_recv[iswap]) MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv); } else { if (size_forward_recv[iswap]) MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); n = avec->pack_comm(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); } } else { if (comm_x_only) { if (sendnum[iswap]) avec->pack_comm(sendnum[iswap],sendlist[iswap], x[firstrecv[iswap]],pbc_flag[iswap],pbc[iswap]); } else if (ghost_velocity) { avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send); } else { avec->pack_comm(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); } } } }
void Irregular::migrate_atoms() { // clear global->local map since atoms move to new procs // clear old ghosts so map_set() at end will operate only on local atoms // exchange() doesn't need to clear ghosts b/c borders() // is called right after and it clears ghosts and calls map_set() if (map_style) atom->map_clear(); atom->nghost = 0; atom->avec->clear_bonus(); // subbox bounds for orthogonal or triclinic box // other comm/domain data used by coord2proc() double *sublo,*subhi; if (triclinic == 0) { sublo = domain->sublo; subhi = domain->subhi; } else { sublo = domain->sublo_lamda; subhi = domain->subhi_lamda; } uniform = comm->uniform; xsplit = comm->xsplit; ysplit = comm->ysplit; zsplit = comm->zsplit; boxlo = domain->boxlo; prd = domain->prd; // loop over atoms, flag any that are not in my sub-box // fill buffer with atoms leaving my box, using < and >= // assign which proc it belongs to via coord2proc() // if coord2proc() returns me, due to round-off // in triclinic x2lamda(), then keep atom and don't send // when atom is deleted, fill it in with last atom AtomVec *avec = atom->avec; double **x = atom->x; int nlocal = atom->nlocal; int nsend = 0; int nsendatom = 0; int *sizes = new int[nlocal]; int *proclist = new int[nlocal]; int igx,igy,igz; int i = 0; while (i < nlocal) { if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] || x[i][1] < sublo[1] || x[i][1] >= subhi[1] || x[i][2] < sublo[2] || x[i][2] >= subhi[2]) { proclist[nsendatom] = coord2proc(x[i],igx,igy,igz); if (proclist[nsendatom] != me) { if (nsend > maxsend) grow_send(nsend,1); sizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]); nsend += sizes[nsendatom]; nsendatom++; avec->copy(nlocal-1,i,1); nlocal--; } else i++; } else i++; } atom->nlocal = nlocal; // create irregular communication plan, perform comm, destroy plan // returned nrecv = size of buffer needed for incoming atoms int nrecv = create_atom(nsendatom,sizes,proclist); if (nrecv > maxrecv) grow_recv(nrecv); exchange_atom(buf_send,sizes,buf_recv); destroy_atom(); delete [] sizes; delete [] proclist; // add received atoms to my list int m = 0; while (m < nrecv) m += avec->unpack_exchange(&buf_recv[m]); // reset global->local map if (map_style) atom->map_set(); }
void ReadRestart::command(int narg, char **arg) { if (narg != 1) error->all(FLERR,"Illegal read_restart command"); if (domain->box_exist) error->all(FLERR,"Cannot read_restart after simulation box is defined"); MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); // if filename contains "*", search dir for latest restart file char *file = new char[strlen(arg[0]) + 16]; if (strchr(arg[0],'*')) { int n; if (me == 0) { file_search(arg[0],file); n = strlen(file) + 1; } MPI_Bcast(&n,1,MPI_INT,0,world); MPI_Bcast(file,n,MPI_CHAR,0,world); } else strcpy(file,arg[0]); // check if filename contains "%" int multiproc; if (strchr(file,'%')) multiproc = 1; else multiproc = 0; // open single restart file or base file for multiproc case // auto-detect whether byte swapping needs to be done as file is read if (me == 0) { if (screen) fprintf(screen,"Reading restart file ...\n"); char *hfile; if (multiproc) { hfile = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); *ptr = '\0'; sprintf(hfile,"%s%s%s",file,"base",ptr+1); *ptr = '%'; } else hfile = file; fp = fopen(hfile,"rb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",hfile); error->one(FLERR,str); } swapflag = autodetect(&fp,hfile); if (multiproc) delete [] hfile; } MPI_Bcast(&swapflag,1,MPI_INT,0,world); // read header info and create atom style and simulation box header(); domain->box_exist = 1; // problem setup using info from header int n; if (nprocs == 1) n = static_cast<int> (atom->natoms); else n = static_cast<int> (LB_FACTOR * atom->natoms / nprocs); atom->allocate_type_arrays(); atom->avec->grow(n); n = atom->nmax; domain->print_box(" "); domain->set_initial_box(); domain->set_global_box(); comm->set_proc_grid(); domain->set_local_box(); // read groups, ntype-length arrays, force field, fix info from file // nextra = max # of extra quantities stored with each atom group->read_restart(fp); type_arrays(); force_fields(); int nextra = modify->read_restart(fp); atom->nextra_store = nextra; memory->create(atom->extra,n,nextra,"atom:extra"); // single file: // nprocs_file = # of chunks in file // proc 0 reads chunks one at a time and bcasts it to other procs // each proc unpacks the atoms, saving ones in it's sub-domain // check for atom in sub-domain differs for orthogonal vs triclinic box // close restart file when done AtomVec *avec = atom->avec; int maxbuf = 0; double *buf = NULL; int m; if (multiproc == 0) { int triclinic = domain->triclinic; double *x,lamda[3]; double *coord,*sublo,*subhi; if (triclinic == 0) { sublo = domain->sublo; subhi = domain->subhi; } else { sublo = domain->sublo_lamda; subhi = domain->subhi_lamda; } for (int iproc = 0; iproc < nprocs_file; iproc++) { n = read_int(); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } if (n > 0) { if (me == 0) nread_double(buf,n,fp); MPI_Bcast(buf,n,MPI_DOUBLE,0,world); } m = 0; while (m < n) { x = &buf[m+1]; if (triclinic) { domain->x2lamda(x,lamda); coord = lamda; } else coord = x; if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) { m += avec->unpack_restart(&buf[m]); } else m += static_cast<int> (buf[m]); } } if (me == 0) fclose(fp); // one file per proc: // nprocs_file = # of files // each proc reads 1/P fraction of files, keeping all atoms in the files // perform irregular comm to migrate atoms to correct procs // close restart file when done } else { if (me == 0) fclose(fp); char *perproc = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); for (int iproc = me; iproc < nprocs_file; iproc += nprocs) { *ptr = '\0'; sprintf(perproc,"%s%d%s",file,iproc,ptr+1); *ptr = '%'; fp = fopen(perproc,"rb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",perproc); error->one(FLERR,str); } nread_int(&n,1,fp); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } if (n > 0) nread_double(buf,n,fp); m = 0; while (m < n) m += avec->unpack_restart(&buf[m]); fclose(fp); } delete [] perproc; // create a temporary fix to hold and migrate extra atom info // necessary b/c irregular will migrate atoms if (nextra) { char cextra[8],fixextra[8]; sprintf(cextra,"%d",nextra); sprintf(fixextra,"%d",modify->nfix_restart_peratom); char **newarg = new char*[5]; newarg[0] = (char *) "_read_restart"; newarg[1] = (char *) "all"; newarg[2] = (char *) "READ_RESTART"; newarg[3] = cextra; newarg[4] = fixextra; modify->add_fix(5,newarg); delete [] newarg; } // move atoms to new processors via irregular() // in case read by different proc than wrote restart file // first do map_init() since irregular->migrate_atoms() will do map_clear() if (atom->map_style) atom->map_init(); if (domain->triclinic) domain->x2lamda(atom->nlocal); Irregular *irregular = new Irregular(lmp); irregular->migrate_atoms(); delete irregular; if (domain->triclinic) domain->lamda2x(atom->nlocal); // put extra atom info held by fix back into atom->extra // destroy temporary fix if (nextra) { memory->destroy(atom->extra); memory->create(atom->extra,atom->nmax,nextra,"atom:extra"); int ifix = modify->find_fix("_read_restart"); FixReadRestart *fix = (FixReadRestart *) modify->fix[ifix]; int *count = fix->count; double **extra = fix->extra; double **atom_extra = atom->extra; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) for (int j = 0; j < count[i]; j++) atom_extra[i][j] = extra[i][j]; modify->delete_fix("_read_restart"); } } // clean-up memory delete [] file; memory->destroy(buf); // check that all atoms were assigned to procs bigint natoms; bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (me == 0) { if (screen) fprintf(screen," " BIGINT_FORMAT " atoms\n",natoms); if (logfile) fprintf(logfile," " BIGINT_FORMAT " atoms\n",natoms); } if (natoms != atom->natoms) error->all(FLERR,"Did not assign all atoms correctly"); if (me == 0) { if (atom->nbonds) { if (screen) fprintf(screen," " BIGINT_FORMAT " bonds\n",atom->nbonds); if (logfile) fprintf(logfile," " BIGINT_FORMAT " bonds\n",atom->nbonds); } if (atom->nangles) { if (screen) fprintf(screen," " BIGINT_FORMAT " angles\n", atom->nangles); if (logfile) fprintf(logfile," " BIGINT_FORMAT " angles\n", atom->nangles); } if (atom->ndihedrals) { if (screen) fprintf(screen," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); if (logfile) fprintf(logfile," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); } if (atom->nimpropers) { if (screen) fprintf(screen," " BIGINT_FORMAT " impropers\n", atom->nimpropers); if (logfile) fprintf(logfile," " BIGINT_FORMAT " impropers\n", atom->nimpropers); } } // check if tags are being used // create global mapping and bond topology now that system is defined int flag = 0; for (int i = 0; i < atom->nlocal; i++) if (atom->tag[i] > 0) flag = 1; int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_MAX,world); if (atom->natoms > 0 && flag_all == 0) atom->tag_enable = 0; if (atom->map_style) { atom->map_init(); atom->map_set(); } if (atom->molecular) { Special special(lmp); special.build(); } }
void FixEvaporate::pre_exchange() { int i,j,m,iwhichglobal,iwhichlocal; int ndel,ndeltopo[4]; if (update->ntimestep != next_reneighbor) return; // grow list and mark arrays if necessary if (atom->nmax > nmax) { memory->destroy(list); memory->destroy(mark); nmax = atom->nmax; memory->create(list,nmax,"evaporate:list"); memory->create(mark,nmax,"evaporate:mark"); } // ncount = # of deletable atoms in region that I own // nall = # on all procs // nbefore = # on procs before me // list[ncount] = list of local indices of atoms I can delete Region *region = domain->regions[iregion]; region->prematch(); double **x = atom->x; int *mask = atom->mask; tagint *tag = atom->tag; int nlocal = atom->nlocal; int ncount = 0; for (i = 0; i < nlocal; i++) if (mask[i] & groupbit) if (region->match(x[i][0],x[i][1],x[i][2])) list[ncount++] = i; int nall,nbefore; MPI_Allreduce(&ncount,&nall,1,MPI_INT,MPI_SUM,world); MPI_Scan(&ncount,&nbefore,1,MPI_INT,MPI_SUM,world); nbefore -= ncount; // ndel = total # of atom deletions, in or out of region // ndeltopo[1,2,3,4] = ditto for bonds, angles, dihedrals, impropers // mark[] = 1 if deleted ndel = 0; for (i = 0; i < nlocal; i++) mark[i] = 0; // atomic deletions // choose atoms randomly across all procs and mark them for deletion // shrink eligible list as my atoms get marked // keep ndel,ncount,nall,nbefore current after each atom deletion if (molflag == 0) { while (nall && ndel < nflux) { iwhichglobal = static_cast<int> (nall*random->uniform()); if (iwhichglobal < nbefore) nbefore--; else if (iwhichglobal < nbefore + ncount) { iwhichlocal = iwhichglobal - nbefore; mark[list[iwhichlocal]] = 1; list[iwhichlocal] = list[ncount-1]; ncount--; } ndel++; nall--; } // molecule deletions // choose one atom in one molecule randomly across all procs // bcast mol ID and delete all atoms in that molecule on any proc // update deletion count by total # of atoms in molecule // shrink list of eligible candidates as any of my atoms get marked // keep ndel,ndeltopo,ncount,nall,nbefore current after each mol deletion } else { int me,proc,iatom,ndelone,ndelall,index; tagint imolecule; tagint *molecule = atom->molecule; int *molindex = atom->molindex; int *molatom = atom->molatom; int molecular = atom->molecular; Molecule **onemols = atom->avec->onemols; ndeltopo[0] = ndeltopo[1] = ndeltopo[2] = ndeltopo[3] = 0; while (nall && ndel < nflux) { // pick an iatom,imolecule on proc me to delete iwhichglobal = static_cast<int> (nall*random->uniform()); if (iwhichglobal >= nbefore && iwhichglobal < nbefore + ncount) { iwhichlocal = iwhichglobal - nbefore; iatom = list[iwhichlocal]; imolecule = molecule[iatom]; me = comm->me; } else me = -1; // bcast mol ID to delete all atoms from // if mol ID > 0, delete any atom in molecule and decrement counters // if mol ID == 0, delete single iatom // logic with ndeltopo is to count # of deleted bonds,angles,etc // for atom->molecular = 1, do this for each deleted atom in molecule // for atom->molecular = 2, use Molecule counts for just 1st atom in mol MPI_Allreduce(&me,&proc,1,MPI_INT,MPI_MAX,world); MPI_Bcast(&imolecule,1,MPI_LMP_TAGINT,proc,world); ndelone = 0; for (i = 0; i < nlocal; i++) { if (imolecule && molecule[i] == imolecule) { mark[i] = 1; ndelone++; if (molecular == 1) { if (atom->avec->bonds_allow) { if (force->newton_bond) ndeltopo[0] += atom->num_bond[i]; else { for (j = 0; j < atom->num_bond[i]; j++) { if (tag[i] < atom->bond_atom[i][j]) ndeltopo[0]++; } } } if (atom->avec->angles_allow) { if (force->newton_bond) ndeltopo[1] += atom->num_angle[i]; else { for (j = 0; j < atom->num_angle[i]; j++) { m = atom->map(atom->angle_atom2[i][j]); if (m >= 0 && m < nlocal) ndeltopo[1]++; } } } if (atom->avec->dihedrals_allow) { if (force->newton_bond) ndeltopo[2] += atom->num_dihedral[i]; else { for (j = 0; j < atom->num_dihedral[i]; j++) { m = atom->map(atom->dihedral_atom2[i][j]); if (m >= 0 && m < nlocal) ndeltopo[2]++; } } } if (atom->avec->impropers_allow) { if (force->newton_bond) ndeltopo[3] += atom->num_improper[i]; else { for (j = 0; j < atom->num_improper[i]; j++) { m = atom->map(atom->improper_atom2[i][j]); if (m >= 0 && m < nlocal) ndeltopo[3]++; } } } } else if (molecular == 2) { if (molatom[i] == 0) { index = molindex[i]; ndeltopo[0] += onemols[index]->nbonds; ndeltopo[1] += onemols[index]->nangles; ndeltopo[2] += onemols[index]->ndihedrals; ndeltopo[3] += onemols[index]->nimpropers; } } } else if (me == proc && i == iatom) { mark[i] = 1; ndelone++; } } // remove any atoms marked for deletion from my eligible list i = 0; while (i < ncount) { if (mark[list[i]]) { list[i] = list[ncount-1]; ncount--; } else i++; } // update ndel,ncount,nall,nbefore // ndelall is total atoms deleted on this iteration // ncount is already correct, so resum to get nall and nbefore MPI_Allreduce(&ndelone,&ndelall,1,MPI_INT,MPI_SUM,world); ndel += ndelall; MPI_Allreduce(&ncount,&nall,1,MPI_INT,MPI_SUM,world); MPI_Scan(&ncount,&nbefore,1,MPI_INT,MPI_SUM,world); nbefore -= ncount; } } // delete my marked atoms // loop in reverse order to avoid copying marked atoms AtomVec *avec = atom->avec; for (i = nlocal-1; i >= 0; i--) { if (mark[i]) { avec->copy(atom->nlocal-1,i,1); atom->nlocal--; } } // reset global natoms and bonds, angles, etc // if global map exists, reset it now instead of waiting for comm // since deleting atoms messes up ghosts atom->natoms -= ndel; if (molflag) { int all[4]; MPI_Allreduce(ndeltopo,all,4,MPI_INT,MPI_SUM,world); atom->nbonds -= all[0]; atom->nangles -= all[1]; atom->ndihedrals -= all[2]; atom->nimpropers -= all[3]; } if (ndel && atom->map_style) { atom->nghost = 0; atom->map_init(); atom->map_set(); } // statistics ndeleted += ndel; next_reneighbor = update->ntimestep + nevery; }
void ReadRestart::command(int narg, char **arg) { if (narg != 1) error->all(FLERR,"Illegal read_restart command"); if (domain->box_exist) error->all(FLERR,"Cannot read_restart after simulation box is defined"); MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); // if filename contains "*", search dir for latest restart file char *file = new char[strlen(arg[0]) + 16]; if (strchr(arg[0],'*')) { int n; if (me == 0) { file_search(arg[0],file); n = strlen(file) + 1; } MPI_Bcast(&n,1,MPI_INT,0,world); MPI_Bcast(file,n,MPI_CHAR,0,world); } else strcpy(file,arg[0]); // check for multiproc files and an MPI-IO filename if (strchr(arg[0],'%')) multiproc = 1; else multiproc = 0; if (strstr(arg[0],".mpi")) mpiioflag = 1; else mpiioflag = 0; if (multiproc && mpiioflag) error->all(FLERR, "Read restart MPI-IO output not allowed with '%' in filename"); if (mpiioflag) { mpiio = new RestartMPIIO(lmp); if (!mpiio->mpiio_exists) error->all(FLERR,"Reading from MPI-IO filename when " "MPIIO package is not installed"); } // open single restart file or base file for multiproc case if (me == 0) { if (screen) fprintf(screen,"Reading restart file ...\n"); char *hfile; if (multiproc) { hfile = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); *ptr = '\0'; sprintf(hfile,"%s%s%s",file,"base",ptr+1); *ptr = '%'; } else hfile = file; fp = fopen(hfile,"rb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",hfile); error->one(FLERR,str); } if (multiproc) delete [] hfile; } // read magic string, endian flag, numeric version magic_string(); endian(); int incompatible = version_numeric(); // read header info which creates simulation box header(incompatible); domain->box_exist = 1; // problem setup using info from header int n; if (nprocs == 1) n = static_cast<int> (atom->natoms); else n = static_cast<int> (LB_FACTOR * atom->natoms / nprocs); atom->allocate_type_arrays(); atom->avec->grow(n); n = atom->nmax; domain->print_box(" "); domain->set_initial_box(); domain->set_global_box(); comm->set_proc_grid(); domain->set_local_box(); // read groups, ntype-length arrays, force field, fix info from file // nextra = max # of extra quantities stored with each atom group->read_restart(fp); type_arrays(); force_fields(); int nextra = modify->read_restart(fp); atom->nextra_store = nextra; memory->create(atom->extra,n,nextra,"atom:extra"); // read file layout info file_layout(); // close header file if in multiproc mode if (multiproc && me == 0) fclose(fp); // read per-proc info AtomVec *avec = atom->avec; int maxbuf = 0; double *buf = NULL; int m,flag; // MPI-IO input from single file if (mpiioflag) { // add calls to RestartMPIIO class // reopen header file // perform reads // allow for different # of procs reading than wrote the file // mpiio->open(file); // mpiio->read(); // mpiio->close(); // then process atom info as //m = 0; //while (m < n) m += avec->unpack_restart(&buf[m]); } // input of single native file // nprocs_file = # of chunks in file // proc 0 reads a chunk and bcasts it to other procs // each proc unpacks the atoms, saving ones in it's sub-domain // check for atom in sub-domain differs for orthogonal vs triclinic box else if (multiproc == 0) { int triclinic = domain->triclinic; double *x,lamda[3]; double *coord,*sublo,*subhi; if (triclinic == 0) { sublo = domain->sublo; subhi = domain->subhi; } else { sublo = domain->sublo_lamda; subhi = domain->subhi_lamda; } for (int iproc = 0; iproc < nprocs_file; iproc++) { if (read_int() != PERPROC) error->all(FLERR,"Invalid flag in peratom section of restart file"); n = read_int(); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } read_double_vec(n,buf); m = 0; while (m < n) { x = &buf[m+1]; if (triclinic) { domain->x2lamda(x,lamda); coord = lamda; } else coord = x; if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) { m += avec->unpack_restart(&buf[m]); } else m += static_cast<int> (buf[m]); } } if (me == 0) fclose(fp); } // input of multiple native files with procs <= files // # of files = multiproc_file // each proc reads a subset of files, striding by nprocs // each proc keeps all atoms in all perproc chunks in its files else if (nprocs <= multiproc_file) { char *procfile = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); for (int iproc = me; iproc < multiproc_file; iproc += nprocs) { *ptr = '\0'; sprintf(procfile,"%s%d%s",file,iproc,ptr+1); *ptr = '%'; fp = fopen(procfile,"rb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",procfile); error->one(FLERR,str); } fread(&flag,sizeof(int),1,fp); if (flag != PROCSPERFILE) error->one(FLERR,"Invalid flag in peratom section of restart file"); int procsperfile; fread(&procsperfile,sizeof(int),1,fp); for (int i = 0; i < procsperfile; i++) { fread(&flag,sizeof(int),1,fp); if (flag != PERPROC) error->one(FLERR,"Invalid flag in peratom section of restart file"); fread(&n,sizeof(int),1,fp); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } fread(buf,sizeof(double),n,fp); m = 0; while (m < n) m += avec->unpack_restart(&buf[m]); } fclose(fp); } delete [] procfile; } // input of multiple native files with procs > files // # of files = multiproc_file // cluster procs based on # of files // 1st proc in each cluster reads per-proc chunks from file // sends chunks round-robin to other procs in its cluster // each proc keeps all atoms in its perproc chunks in file else { // nclusterprocs = # of procs in my cluster that read from one file // filewriter = 1 if this proc reads file, else 0 // fileproc = ID of proc in my cluster who reads from file // clustercomm = MPI communicator within my cluster of procs int nfile = multiproc_file; int icluster = static_cast<int> ((bigint) me * nfile/nprocs); int fileproc = static_cast<int> ((bigint) icluster * nprocs/nfile); int fcluster = static_cast<int> ((bigint) fileproc * nfile/nprocs); if (fcluster < icluster) fileproc++; int fileprocnext = static_cast<int> ((bigint) (icluster+1) * nprocs/nfile); fcluster = static_cast<int> ((bigint) fileprocnext * nfile/nprocs); if (fcluster < icluster+1) fileprocnext++; int nclusterprocs = fileprocnext - fileproc; int filereader = 0; if (me == fileproc) filereader = 1; MPI_Comm clustercomm; MPI_Comm_split(world,icluster,0,&clustercomm); if (filereader) { char *procfile = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); *ptr = '\0'; sprintf(procfile,"%s%d%s",file,icluster,ptr+1); *ptr = '%'; fp = fopen(procfile,"rb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",procfile); error->one(FLERR,str); } delete [] procfile; } int flag,procsperfile; if (filereader) { fread(&flag,sizeof(int),1,fp); if (flag != PROCSPERFILE) error->one(FLERR,"Invalid flag in peratom section of restart file"); fread(&procsperfile,sizeof(int),1,fp); } MPI_Bcast(&procsperfile,1,MPI_INT,0,clustercomm); int tmp,iproc; MPI_Status status; MPI_Request request; for (int i = 0; i < procsperfile; i++) { if (filereader) { fread(&flag,sizeof(int),1,fp); if (flag != PERPROC) error->one(FLERR,"Invalid flag in peratom section of restart file"); fread(&n,sizeof(int),1,fp); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } fread(buf,sizeof(double),n,fp); if (i % nclusterprocs) { iproc = me + (i % nclusterprocs); MPI_Send(&n,1,MPI_INT,iproc,0,world); MPI_Recv(&tmp,0,MPI_INT,iproc,0,world,&status); MPI_Rsend(buf,n,MPI_DOUBLE,iproc,0,world); } } else if (i % nclusterprocs == me - fileproc) { MPI_Recv(&n,1,MPI_INT,fileproc,0,world,&status); if (n > maxbuf) { maxbuf = n; memory->destroy(buf); memory->create(buf,maxbuf,"read_restart:buf"); } MPI_Irecv(buf,n,MPI_DOUBLE,fileproc,0,world,&request); MPI_Send(&tmp,0,MPI_INT,fileproc,0,world); MPI_Wait(&request,&status); } if (i % nclusterprocs == me - fileproc) { m = 0; while (m < n) m += avec->unpack_restart(&buf[m]); } } if (filereader) fclose(fp); MPI_Comm_free(&clustercomm); } // clean-up memory delete [] file; memory->destroy(buf); // for multiproc or MPI-IO files: // perform irregular comm to migrate atoms to correct procs if (multiproc || mpiioflag) { // create a temporary fix to hold and migrate extra atom info // necessary b/c irregular will migrate atoms if (nextra) { char cextra[8],fixextra[8]; sprintf(cextra,"%d",nextra); sprintf(fixextra,"%d",modify->nfix_restart_peratom); char **newarg = new char*[5]; newarg[0] = (char *) "_read_restart"; newarg[1] = (char *) "all"; newarg[2] = (char *) "READ_RESTART"; newarg[3] = cextra; newarg[4] = fixextra; modify->add_fix(5,newarg); delete [] newarg; } // move atoms to new processors via irregular() // in case read by different proc than wrote restart file // first do map_init() since irregular->migrate_atoms() will do map_clear() if (atom->map_style) atom->map_init(); if (domain->triclinic) domain->x2lamda(atom->nlocal); Irregular *irregular = new Irregular(lmp); irregular->migrate_atoms(); delete irregular; if (domain->triclinic) domain->lamda2x(atom->nlocal); // put extra atom info held by fix back into atom->extra // destroy temporary fix if (nextra) { memory->destroy(atom->extra); memory->create(atom->extra,atom->nmax,nextra,"atom:extra"); int ifix = modify->find_fix("_read_restart"); FixReadRestart *fix = (FixReadRestart *) modify->fix[ifix]; int *count = fix->count; double **extra = fix->extra; double **atom_extra = atom->extra; int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) for (int j = 0; j < count[i]; j++) atom_extra[i][j] = extra[i][j]; modify->delete_fix("_read_restart"); } } // check that all atoms were assigned to procs bigint natoms; bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (me == 0) { if (screen) fprintf(screen," " BIGINT_FORMAT " atoms\n",natoms); if (logfile) fprintf(logfile," " BIGINT_FORMAT " atoms\n",natoms); } if (natoms != atom->natoms) error->all(FLERR,"Did not assign all atoms correctly"); if (me == 0) { if (atom->nbonds) { if (screen) fprintf(screen," " BIGINT_FORMAT " bonds\n",atom->nbonds); if (logfile) fprintf(logfile," " BIGINT_FORMAT " bonds\n",atom->nbonds); } if (atom->nangles) { if (screen) fprintf(screen," " BIGINT_FORMAT " angles\n", atom->nangles); if (logfile) fprintf(logfile," " BIGINT_FORMAT " angles\n", atom->nangles); } if (atom->ndihedrals) { if (screen) fprintf(screen," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); if (logfile) fprintf(logfile," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); } if (atom->nimpropers) { if (screen) fprintf(screen," " BIGINT_FORMAT " impropers\n", atom->nimpropers); if (logfile) fprintf(logfile," " BIGINT_FORMAT " impropers\n", atom->nimpropers); } } // check if tags are being used // create global mapping and bond topology now that system is defined flag = 0; for (int i = 0; i < atom->nlocal; i++) if (atom->tag[i] > 0) flag = 1; int flag_all; MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_MAX,world); if (flag_all == 0) atom->tag_enable = 0; if (atom->map_style) { atom->map_init(); atom->map_set(); } if (atom->molecular) { Special special(lmp); special.build(); } }
void CommCuda::forward_comm_unpack_cuda() { static int count=0; static double kerneltime=0.0; static double copytime=0.0; timespec time1,time2,time3; int n; MPI_Request request; MPI_Status status; AtomVec *avec = atom->avec; double **x = atom->x; cuda->shared_data.domain.xy=domain->xy; cuda->shared_data.domain.xz=domain->xz; cuda->shared_data.domain.yz=domain->yz; cuda->shared_data.domain.prd[0]=domain->prd[0]; cuda->shared_data.domain.prd[1]=domain->prd[1]; cuda->shared_data.domain.prd[2]=domain->prd[2]; cuda->shared_data.domain.triclinic=domain->triclinic; if(not comm_x_only && not avec->cudable) cuda->downloadAll(); //if not comm_x_only the communication routine of the atom_vec style class is used // exchange data with another proc // if other proc is self, just copy // if comm_x_only set, exchange or copy directly to x, don't unpack for (int iswap = 0; iswap < nswap; iswap++) { if (sendproc[iswap] != me) { if (comm_x_only) { //Cuda_CommCuda_UnpackComm(&cuda->shared_data,recvnum[iswap],firstrecv[iswap],cuda->shared_data.comm.buf_recv[iswap],iswap); //Unpack for cpu exchange happens implicitely since buf==x[firstrecv] Cuda_CommCuda_UnpackComm(&cuda->shared_data,recvnum[iswap],firstrecv[iswap],buf_recv,iswap); //Unpack for cpu exchange happens implicitely since buf==x[firstrecv] } else if (ghost_velocity) { //Cuda_CommCuda_UnpackComm_Vel(&cuda->shared_data,recvnum[iswap],firstrecv[iswap],(void*)&buf_recv[iswap*maxrecv]); //Unpack for cpu exchange happens implicitely since buf==x[firstrecv] } else { MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); if(avec->cudable) n = avec->pack_comm(sendnum[iswap],&iswap, buf_send,pbc_flag[iswap],pbc[iswap]); else n = avec->pack_comm(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); MPI_Wait(&request,&status); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); } } else //sendproc == me { if (comm_x_only) { if (sendnum[iswap]) { } } else if (ghost_velocity) { } else { n = avec->pack_comm(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); } } } if(not comm_x_only && not avec->cudable) cuda->uploadAll(); }
void CommCuda::forward_comm_cuda() { static int count=0; static double kerneltime=0.0; static double copytime=0.0; timespec time1,time2,time3; int n; MPI_Request request; MPI_Status status; AtomVec *avec = atom->avec; double **x = atom->x; cuda->shared_data.domain.xy=domain->xy; cuda->shared_data.domain.xz=domain->xz; cuda->shared_data.domain.yz=domain->yz; cuda->shared_data.domain.prd[0]=domain->prd[0]; cuda->shared_data.domain.prd[1]=domain->prd[1]; cuda->shared_data.domain.prd[2]=domain->prd[2]; cuda->shared_data.domain.triclinic=domain->triclinic; if(not comm_x_only && not avec->cudable) { cuda->downloadAll(); Comm::forward_comm(); cuda->uploadAll(); return; } // exchange data with another proc // if other proc is self, just copy // if comm_x_only set, exchange or copy directly to x, don't unpack for (int iswap = 0; iswap < nswap; iswap++) { if (sendproc[iswap] != me) { if (comm_x_only) { int size_forward_recv_now=0; if((sizeof(X_FLOAT)!=sizeof(double)) && size_forward_recv[iswap]) //some complicated way to safe some transfer size if single precision is used size_forward_recv_now=(size_forward_recv[iswap]+1)*sizeof(X_FLOAT)/sizeof(double); else size_forward_recv_now=size_forward_recv[iswap]; clock_gettime(CLOCK_REALTIME,&time1); MPI_Irecv(buf_recv,size_forward_recv_now,MPI_DOUBLE, recvproc[iswap],0,world,&request); n = Cuda_CommCuda_PackComm(&cuda->shared_data,sendnum[iswap],iswap,(void*) buf_send,pbc[iswap],pbc_flag[iswap]); clock_gettime(CLOCK_REALTIME,&time2); if((sizeof(X_FLOAT)!=sizeof(double)) && n) //some complicated way to safe some transfer size if single precision is used n=(n+1)*sizeof(X_FLOAT)/sizeof(double); //printf("RecvSize: %i SendSize: %i\n",size_forward_recv_now,n); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); MPI_Wait(&request,&status); clock_gettime(CLOCK_REALTIME,&time3); cuda->shared_data.cuda_timings.comm_forward_mpi_upper+= time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000; cuda->shared_data.cuda_timings.comm_forward_mpi_lower+= time3.tv_sec-time2.tv_sec+1.0*(time3.tv_nsec-time2.tv_nsec)/1000000000; Cuda_CommCuda_UnpackComm(&cuda->shared_data,recvnum[iswap],firstrecv[iswap],(void*)buf_recv,iswap); //Unpack for cpu exchange happens implicitely since buf==x[firstrecv] } else if (ghost_velocity) { MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); if(avec->cudable) n = avec->pack_comm_vel(sendnum[iswap],&iswap, buf_send,pbc_flag[iswap],pbc[iswap]); else n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); MPI_Wait(&request,&status); avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv); } else { MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); if(avec->cudable) n = avec->pack_comm(sendnum[iswap],&iswap, buf_send,pbc_flag[iswap],pbc[iswap]); else n = avec->pack_comm(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); MPI_Wait(&request,&status); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); } } else //sendproc == me { cuda->self_comm=1; if (comm_x_only) { if (sendnum[iswap]) { n = Cuda_CommCuda_PackComm_Self(&cuda->shared_data,sendnum[iswap],iswap,firstrecv[iswap],pbc[iswap],pbc_flag[iswap]); if(n<0) error->all(FLERR," # CUDA ERRROR on PackComm_Self"); if((sizeof(X_FLOAT)!=sizeof(double)) && n) n=(n+1)*sizeof(X_FLOAT)/sizeof(double); } } else if (ghost_velocity) { n = avec->pack_comm_vel(sendnum[iswap],&iswap, (double*) firstrecv,pbc_flag[iswap],pbc[iswap]); //avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],(double*) firstrecv); } else { n = avec->pack_comm(sendnum[iswap],&iswap, (double*) firstrecv,pbc_flag[iswap],pbc[iswap]); //avec->unpack_comm(recvnum[iswap],firstrecv[iswap],(double*) firstrecv); } cuda->self_comm=0; } } }
void CommCuda::exchange_cuda() { int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal; double lo,hi,value; double **x; double *sublo,*subhi,*buf; MPI_Request request; MPI_Status status; AtomVec *avec = atom->avec; timespec time1,time2,time3; // clear global->local map for owned and ghost atoms // b/c atoms migrate to new procs in exchange() and // new ghosts are created in borders() // map_set() is done at end of borders() if(map_style) cuda->cu_tag->download(); if (map_style) atom->map_clear(); // subbox bounds for orthogonal or triclinic if (triclinic == 0) { sublo = domain->sublo; subhi = domain->subhi; } else { sublo = domain->sublo_lamda; subhi = domain->subhi_lamda; } // loop over dimensions for (int dim = 0; dim < 3; dim++) { // fill buffer with atoms leaving my box, using < and >= // when atom is deleted, fill it in with last atom cuda->shared_data.exchange_dim=dim; nlocal = atom->nlocal; avec->maxsend=&maxsend; nsend=avec->pack_exchange(dim,(double*) &buf_send); nlocal = atom->nlocal; atom->nlocal = nlocal; // send/recv atoms in both directions // if 1 proc in dimension, no send/recv, set recv buf to send buf // if 2 procs in dimension, single send/recv // if more than 2 procs in dimension, send/recv to both neighbors clock_gettime(CLOCK_REALTIME,&time1); if (procgrid[dim] == 1) { nrecv = nsend; buf = buf_send; } else { MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status); nrecv = nrecv1; if (procgrid[dim] > 2) { MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0, &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status); nrecv += nrecv2; } if (nrecv+1 > maxrecv) grow_recv(nrecv+1); MPI_Irecv(buf_recv,nrecv1,MPI_DOUBLE,procneigh[dim][1],0, world,&request); MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][0],0,world); MPI_Wait(&request,&status); if (procgrid[dim] > 2) { MPI_Irecv(&buf_recv[nrecv1],nrecv2,MPI_DOUBLE,procneigh[dim][0],0, world,&request); MPI_Send(buf_send,nsend,MPI_DOUBLE,procneigh[dim][1],0,world); MPI_Wait(&request,&status); if((nrecv1==0)||(nrecv2==0)) buf_recv[nrecv]=0; } buf = buf_recv; } //printf("nsend: %i nrecv: %i\n",nsend,nrecv); // check incoming atoms to see if they are in my box // if so, add to my list clock_gettime(CLOCK_REALTIME,&time2); cuda->shared_data.cuda_timings.comm_exchange_mpi+= time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; if(nrecv) { avec->maxsend=&maxsend; avec->unpack_exchange(buf); } } if(atom->firstgroupname) cuda->downloadAll(); if(atom->firstgroupname) atom->first_reorder(); if(atom->firstgroupname) cuda->uploadAll(); }
void CommCuda::forward_comm_transfer_cuda() { static int count=0; static double kerneltime=0.0; static double copytime=0.0; timespec time1,time2,time3; int n; MPI_Request request; MPI_Status status; AtomVec *avec = atom->avec; double **x = atom->x; cuda->shared_data.domain.xy=domain->xy; cuda->shared_data.domain.xz=domain->xz; cuda->shared_data.domain.yz=domain->yz; cuda->shared_data.domain.prd[0]=domain->prd[0]; cuda->shared_data.domain.prd[1]=domain->prd[1]; cuda->shared_data.domain.prd[2]=domain->prd[2]; cuda->shared_data.domain.triclinic=domain->triclinic; if(not comm_x_only && not avec->cudable) cuda->downloadAll(); //if not comm_x_only the communication routine of the atom_vec style class is used //printf("A\n"); // exchange data with another proc // if other proc is self, just copy // if comm_x_only set, exchange or copy directly to x, don't unpack for (int iswap = 0; iswap < nswap; iswap++) { if (sendproc[iswap] != me) { if (comm_x_only) { int size_forward_recv_now=0; if((sizeof(X_FLOAT)!=sizeof(double)) && size_forward_recv[iswap]) //some complicated way to safe some transfer size if single precision is used size_forward_recv_now=(size_forward_recv[iswap]+1)*sizeof(X_FLOAT)/sizeof(double); else size_forward_recv_now=size_forward_recv[iswap]; //printf("A: %i \n",size_forward_recv_now/1024*4); //MPI_Irecv(cuda->shared_data.comm.buf_recv[iswap],size_forward_recv_now,MPI_DOUBLE, // recvproc[iswap],0,world,&request); MPI_Irecv(buf_recv,size_forward_recv_now,MPI_DOUBLE, recvproc[iswap],0,world,&request); //printf("%p %p %i\n",buf_send, cuda->shared_data.comm.buf_send_dev[iswap], cuda->shared_data.comm.send_size[iswap]*sizeof(double)); //memcpy(buf_send,cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap]*sizeof(double)); // CudaWrapper_SyncStream(1); //printf("B: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4); CudaWrapper_DownloadCudaDataAsync((void*) buf_send, cuda->shared_data.comm.buf_send_dev[iswap], cuda->shared_data.comm.send_size[iswap]*sizeof(double),2); //MPI_Send(cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world); clock_gettime(CLOCK_REALTIME,&time1); CudaWrapper_SyncStream(2); //printf("C: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4); clock_gettime(CLOCK_REALTIME,&time2); cuda->shared_data.cuda_timings.comm_forward_download+= time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; MPI_Send(buf_send,cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world); MPI_Wait(&request,&status); //printf("D: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4); CudaWrapper_UploadCudaDataAsync((void*) buf_recv,cuda->shared_data.comm.buf_recv_dev[iswap], size_forward_recv_now*sizeof(double),2); clock_gettime(CLOCK_REALTIME,&time1); CudaWrapper_SyncStream(2); //printf("E: %i \n",cuda->shared_data.comm.send_size[iswap]/1024*4); //memcpy(cuda->shared_data.comm.buf_recv[iswap],buf_recv,size_forward_recv_now*sizeof(double)); //printf("RecvSize: %i SendSize: %i\n",size_forward_recv_now*sizeof(double),cuda->shared_data.comm.send_size[iswap]*sizeof(double)); clock_gettime(CLOCK_REALTIME,&time3); cuda->shared_data.cuda_timings.comm_forward_upload+= time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000; cuda->shared_data.cuda_timings.comm_forward_mpi_lower+= time3.tv_sec-time2.tv_sec+1.0*(time3.tv_nsec-time2.tv_nsec)/1000000000; clock_gettime(CLOCK_REALTIME,&time3); cuda->shared_data.cuda_timings.comm_forward_mpi_upper+= time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000; } else if (ghost_velocity) { /* int size_forward_recv_now=0; if((sizeof(X_FLOAT)!=sizeof(double)) && size_forward_recv[iswap]) //some complicated way to safe some transfer size if single precision is used size_forward_recv_now=(size_forward_recv[iswap]+1)*sizeof(X_FLOAT)/sizeof(double); else size_forward_recv_now=size_forward_recv[iswap]; clock_gettime(CLOCK_REALTIME,&time1); MPI_Irecv(cuda->shared_data.comm.buf_recv[iswap],size_forward_recv_now,MPI_DOUBLE, recvproc[iswap],0,world,&request); clock_gettime(CLOCK_REALTIME,&time2); MPI_Send(cuda->shared_data.comm.buf_send[iswap],cuda->shared_data.comm.send_size[iswap],MPI_DOUBLE,sendproc[iswap],0,world); MPI_Wait(&request,&status); clock_gettime(CLOCK_REALTIME,&time3); cuda->shared_data.cuda_timings.comm_forward_mpi_upper+= time3.tv_sec-time1.tv_sec+1.0*(time3.tv_nsec-time1.tv_nsec)/1000000000; cuda->shared_data.cuda_timings.comm_forward_mpi_lower+= time3.tv_sec-time2.tv_sec+1.0*(time3.tv_nsec-time2.tv_nsec)/1000000000;*/ } else { MPI_Irecv(buf_recv,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); if(avec->cudable) n = avec->pack_comm(sendnum[iswap],&iswap, buf_send,pbc_flag[iswap],pbc[iswap]); else n = avec->pack_comm(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); MPI_Wait(&request,&status); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_recv); } } else //sendproc == me { if (comm_x_only) { if (sendnum[iswap]) { } } else if (ghost_velocity) { } else { n = avec->pack_comm(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); } } } if(not comm_x_only && not avec->cudable) cuda->uploadAll(); }
void FixSRP::setup_pre_force(int /*zz*/) { double **x = atom->x; double **xold; tagint *tag = atom->tag; tagint *tagold; int *type = atom->type; int* dlist; AtomVec *avec = atom->avec; int **bondlist = neighbor->bondlist; int nlocal, nlocal_old; nlocal = nlocal_old = atom->nlocal; bigint nall = atom->nlocal + atom->nghost; int nbondlist = neighbor->nbondlist; int i,j,n; // make a copy of all coordinates and tags // that is consistent with the bond list as // atom->x will be affected by creating/deleting atoms. // also compile list of local atoms to be deleted. memory->create(xold,nall,3,"fix_srp:xold"); memory->create(tagold,nall,"fix_srp:tagold"); memory->create(dlist,nall,"fix_srp:dlist"); for (i = 0; i < nall; i++){ xold[i][0] = x[i][0]; xold[i][1] = x[i][1]; xold[i][2] = x[i][2]; tagold[i]=tag[i]; dlist[i] = (type[i] == bptype) ? 1 : 0; for (n = 0; n < 2; n++) array[i][n] = 0.0; } // delete local atoms flagged in dlist i = 0; int ndel = 0; while (i < nlocal) { if (dlist[i]) { avec->copy(nlocal-1,i,1); dlist[i] = dlist[nlocal-1]; nlocal--; ndel++; } else i++; } atom->nlocal = nlocal; memory->destroy(dlist); int nadd = 0; double rsqold = 0.0; double delx, dely, delz, rmax, rsq, rsqmax; double xone[3]; for (n = 0; n < nbondlist; n++) { // consider only the user defined bond type // btype of zero considers all bonds if(btype > 0 && bondlist[n][2] != btype) continue; i = bondlist[n][0]; j = bondlist[n][1]; // position of bond i xone[0] = (xold[i][0] + xold[j][0])*0.5; xone[1] = (xold[i][1] + xold[j][1])*0.5; xone[2] = (xold[i][2] + xold[j][2])*0.5; // record longest bond // this used to set ghost cutoff delx = xold[j][0] - xold[i][0]; dely = xold[j][1] - xold[i][1]; delz = xold[j][2] - xold[i][2]; rsq = delx*delx + dely*dely + delz*delz; if(rsq > rsqold) rsqold = rsq; // make one particle for each bond // i is local // if newton bond, always make particle // if j is local, always make particle // if j is ghost, decide from tag if ((force->newton_bond) || (j < nlocal_old) || (tagold[i] > tagold[j])) { atom->natoms++; avec->create_atom(bptype,xone); // pack tag i/j into buffer for comm array[atom->nlocal-1][0] = static_cast<double>(tagold[i]); array[atom->nlocal-1][1] = static_cast<double>(tagold[j]); nadd++; } } bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&atom->natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); // free temporary storage memory->destroy(xold); memory->destroy(tagold); char str[128]; int nadd_all = 0, ndel_all = 0; MPI_Allreduce(&ndel,&ndel_all,1,MPI_INT,MPI_SUM,world); MPI_Allreduce(&nadd,&nadd_all,1,MPI_INT,MPI_SUM,world); if(comm->me == 0){ sprintf(str, "Removed/inserted %d/%d bond particles.", ndel_all,nadd_all); error->message(FLERR,str); } // check ghost comm distances // warn and change if shorter from estimate // ghost atoms must be present for bonds on edge of neighbor cutoff // extend cutghost slightly more than half of the longest bond MPI_Allreduce(&rsqold,&rsqmax,1,MPI_DOUBLE,MPI_MAX,world); rmax = sqrt(rsqmax); double cutneighmax_srp = neighbor->cutneighmax + 0.51*rmax; double length0,length1,length2; if (domain->triclinic) { double *h_inv = domain->h_inv; length0 = sqrt(h_inv[0]*h_inv[0] + h_inv[5]*h_inv[5] + h_inv[4]*h_inv[4]); length1 = sqrt(h_inv[1]*h_inv[1] + h_inv[3]*h_inv[3]); length2 = h_inv[2]; } else length0 = length1 = length2 = 1.0; // find smallest cutghost. // comm->cutghost is stored in fractional coordinates for triclinic double cutghostmin = comm->cutghost[0]/length0; if (cutghostmin > comm->cutghost[1]/length1) cutghostmin = comm->cutghost[1]/length1; if (cutghostmin > comm->cutghost[2]/length2) cutghostmin = comm->cutghost[2]/length2; // stop if cutghost is insufficient if (cutneighmax_srp > cutghostmin){ sprintf(str, "Communication cutoff too small for fix srp. " "Need %f, current %f.", cutneighmax_srp, cutghostmin); error->all(FLERR,str); } // assign tags for new atoms, update map atom->tag_extend(); if (atom->map_style) { atom->nghost = 0; atom->map_init(); atom->map_set(); } // put new particles in the box before exchange // move owned to new procs // get ghosts // build neigh lists again // if triclinic, lambda coords needed for pbc, exchange, borders if (domain->triclinic) domain->x2lamda(atom->nlocal); domain->pbc(); comm->setup(); if (neighbor->style) neighbor->setup_bins(); comm->exchange(); if (atom->sortfreq > 0) atom->sort(); comm->borders(); // back to box coords if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost); domain->image_check(); domain->box_too_small_check(); modify->setup_pre_neighbor(); neighbor->build(1); neighbor->ncalls = 0; // new atom counts nlocal = atom->nlocal; nall = atom->nlocal + atom->nghost; // zero all forces for(i = 0; i < nall; i++) atom->f[i][0] = atom->f[i][1] = atom->f[i][2] = 0.0; // do not include bond particles in thermo output // remove them from all groups. set their velocity to zero. for(i=0; i< nlocal; i++) if(atom->type[i] == bptype) { atom->mask[i] = 0; atom->v[i][0] = atom->v[i][1] = atom->v[i][2] = 0.0; } }
void CommCuda::reverse_comm() { int n; MPI_Request request; MPI_Status status; AtomVec *avec = atom->avec; double **f = atom->f; double *buf; if(not comm_f_only && not avec->cudable) cuda->downloadAll(); //not yet implemented in CUDA but only needed for non standard atom styles // exchange data with another proc // if other proc is self, just copy // if comm_f_only set, exchange or copy directly from f, don't pack for (int iswap = nswap-1; iswap >= 0; iswap--) { if (sendproc[iswap] != me) { if (comm_f_only) { int size_recv_now=size_reverse_recv[iswap]; if((sizeof(F_FLOAT)!=sizeof(double))&& size_reverse_recv[iswap]) size_recv_now=(size_recv_now+1)*sizeof(F_FLOAT)/sizeof(double); MPI_Irecv(buf_recv,size_recv_now,MPI_DOUBLE, sendproc[iswap],0,world,&request); buf=buf_send; if (size_reverse_send[iswap]) { Cuda_CommCuda_PackReverse(&cuda->shared_data,size_reverse_send[iswap]/3,firstrecv[iswap],buf); } else buf=NULL; int size_reverse_send_now=size_reverse_send[iswap]; if((sizeof(F_FLOAT)!=sizeof(double))&& size_reverse_send[iswap]) size_reverse_send_now=(size_reverse_send_now+1)*sizeof(F_FLOAT)/sizeof(double); MPI_Send(buf,size_reverse_send_now,MPI_DOUBLE, recvproc[iswap],0,world); MPI_Wait(&request,&status); Cuda_CommCuda_UnpackReverse(&cuda->shared_data,sendnum[iswap],iswap,buf_recv); } else { MPI_Irecv(buf_recv,size_reverse_recv[iswap],MPI_DOUBLE, sendproc[iswap],0,world,&request); n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap],0,world); MPI_Wait(&request,&status); avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_recv); } } else { if (comm_f_only) { if (sendnum[iswap]) Cuda_CommCuda_UnpackReverse_Self(&cuda->shared_data,sendnum[iswap],iswap,firstrecv[iswap]); } else { n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send); } } } if(not comm_f_only && not avec->cudable) cuda->uploadAll(); //not yet implemented in CUDA but only needed for non standard atom styles }
void Replicate::command(int narg, char **arg) { int i,j,m,n; if (domain->box_exist == 0) error->all(FLERR,"Replicate command before simulation box is defined"); if (narg != 3) error->all(FLERR,"Illegal replicate command"); int me = comm->me; int nprocs = comm->nprocs; if (me == 0 && screen) fprintf(screen,"Replicating atoms ...\n"); // nrep = total # of replications int nx = force->inumeric(FLERR,arg[0]); int ny = force->inumeric(FLERR,arg[1]); int nz = force->inumeric(FLERR,arg[2]); int nrep = nx*ny*nz; // error and warning checks if (nx <= 0 || ny <= 0 || nz <= 0) error->all(FLERR,"Illegal replicate command"); if (domain->dimension == 2 && nz != 1) error->all(FLERR,"Cannot replicate 2d simulation in z dimension"); if ((nx > 1 && domain->xperiodic == 0) || (ny > 1 && domain->yperiodic == 0) || (nz > 1 && domain->zperiodic == 0)) { if (comm->me == 0) error->warning(FLERR,"Replicating in a non-periodic dimension"); } if (atom->nextra_grow || atom->nextra_restart || atom->nextra_store) error->all(FLERR,"Cannot replicate with fixes that store atom quantities"); // maxtag = largest atom tag across all existing atoms tagint maxtag = 0; if (atom->tag_enable) { for (i = 0; i < atom->nlocal; i++) maxtag = MAX(atom->tag[i],maxtag); tagint maxtag_all; MPI_Allreduce(&maxtag,&maxtag_all,1,MPI_LMP_TAGINT,MPI_MAX,world); maxtag = maxtag_all; } // maxmol = largest molecule tag across all existing atoms tagint maxmol = 0; if (atom->molecule_flag) { for (i = 0; i < atom->nlocal; i++) maxmol = MAX(atom->molecule[i],maxmol); tagint maxmol_all; MPI_Allreduce(&maxmol,&maxmol_all,1,MPI_LMP_TAGINT,MPI_MAX,world); maxmol = maxmol_all; } // unmap existing atoms via image flags for (i = 0; i < atom->nlocal; i++) domain->unmap(atom->x[i],atom->image[i]); // communication buffer for all my atom's info // max_size = largest buffer needed by any proc // must do before new Atom class created, // since size_restart() uses atom->nlocal int max_size; int send_size = atom->avec->size_restart(); MPI_Allreduce(&send_size,&max_size,1,MPI_INT,MPI_MAX,world); double *buf; memory->create(buf,max_size,"replicate:buf"); // old = original atom class // atom = new replicated atom class // also set atomKK for Kokkos version of Atom class Atom *old = atom; if (lmp->kokkos) atom = new AtomKokkos(lmp); else atom = new Atom(lmp); atomKK = (AtomKokkos*) atom; atom->settings(old); atom->create_avec(old->atom_style,old->avec->nargcopy,old->avec->argcopy,0); // check that new system will not be too large // new tags cannot exceed MAXTAGINT // new system sizes cannot exceed MAXBIGINT if (atom->tag_enable) { bigint maxnewtag = maxtag + (nrep-1)*old->natoms; if (maxnewtag < 0 || maxnewtag >= MAXTAGINT) error->all(FLERR,"Replicated system atom IDs are too big"); } if (nrep*old->natoms < 0 || nrep*old->natoms >= MAXBIGINT || nrep*old->nbonds < 0 || nrep*old->nbonds >= MAXBIGINT || nrep*old->nangles < 0 || nrep*old->nangles >= MAXBIGINT || nrep*old->ndihedrals < 0 || nrep*old->ndihedrals >= MAXBIGINT || nrep*old->nimpropers < 0 || nrep*old->nimpropers >= MAXBIGINT) error->all(FLERR,"Replicated system is too big"); // assign atom and topology counts in new class from old one atom->natoms = old->natoms * nrep; atom->nbonds = old->nbonds * nrep; atom->nangles = old->nangles * nrep; atom->ndihedrals = old->ndihedrals * nrep; atom->nimpropers = old->nimpropers * nrep; atom->ntypes = old->ntypes; atom->nbondtypes = old->nbondtypes; atom->nangletypes = old->nangletypes; atom->ndihedraltypes = old->ndihedraltypes; atom->nimpropertypes = old->nimpropertypes; atom->bond_per_atom = old->bond_per_atom; atom->angle_per_atom = old->angle_per_atom; atom->dihedral_per_atom = old->dihedral_per_atom; atom->improper_per_atom = old->improper_per_atom; // store old simulation box int triclinic = domain->triclinic; double old_xprd = domain->xprd; double old_yprd = domain->yprd; double old_zprd = domain->zprd; double old_xy = domain->xy; double old_xz = domain->xz; double old_yz = domain->yz; // setup new simulation box domain->boxhi[0] = domain->boxlo[0] + nx*old_xprd; domain->boxhi[1] = domain->boxlo[1] + ny*old_yprd; domain->boxhi[2] = domain->boxlo[2] + nz*old_zprd; if (triclinic) { domain->xy *= ny; domain->xz *= nz; domain->yz *= nz; } // new problem setup using new box boundaries if (nprocs == 1) n = static_cast<int> (atom->natoms); else n = static_cast<int> (LB_FACTOR * atom->natoms / nprocs); atom->allocate_type_arrays(); atom->avec->grow(n); n = atom->nmax; domain->print_box(" "); domain->set_initial_box(); domain->set_global_box(); comm->set_proc_grid(); domain->set_local_box(); // copy type arrays to new atom class if (atom->mass) { for (int itype = 1; itype <= atom->ntypes; itype++) { atom->mass_setflag[itype] = old->mass_setflag[itype]; if (atom->mass_setflag[itype]) atom->mass[itype] = old->mass[itype]; } } // set bounds for my proc // if periodic and I am lo/hi proc, adjust bounds by EPSILON // insures all replicated atoms will be owned even with round-off double epsilon[3]; if (triclinic) epsilon[0] = epsilon[1] = epsilon[2] = EPSILON; else { epsilon[0] = domain->prd[0] * EPSILON; epsilon[1] = domain->prd[1] * EPSILON; epsilon[2] = domain->prd[2] * EPSILON; } double sublo[3],subhi[3]; if (triclinic == 0) { sublo[0] = domain->sublo[0]; subhi[0] = domain->subhi[0]; sublo[1] = domain->sublo[1]; subhi[1] = domain->subhi[1]; sublo[2] = domain->sublo[2]; subhi[2] = domain->subhi[2]; } else { sublo[0] = domain->sublo_lamda[0]; subhi[0] = domain->subhi_lamda[0]; sublo[1] = domain->sublo_lamda[1]; subhi[1] = domain->subhi_lamda[1]; sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2]; } if (comm->layout != LAYOUT_TILED) { if (domain->xperiodic) { if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0]; } if (domain->yperiodic) { if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1]; } if (domain->zperiodic) { if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2]; } } else { if (domain->xperiodic) { if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0]; if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0]; } if (domain->yperiodic) { if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1]; if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1]; } if (domain->zperiodic) { if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2]; if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2]; } } // loop over all procs // if this iteration of loop is me: // pack my unmapped atom data into buf // bcast it to all other procs // performs 3d replicate loop with while loop over atoms in buf // x = new replicated position, remapped into simulation box // unpack atom into new atom class from buf if I own it // adjust tag, mol #, coord, topology info as needed AtomVec *old_avec = old->avec; AtomVec *avec = atom->avec; int ix,iy,iz; tagint atom_offset,mol_offset; imageint image; double x[3],lamda[3]; double *coord; int tag_enable = atom->tag_enable; for (int iproc = 0; iproc < nprocs; iproc++) { if (me == iproc) { n = 0; for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]); } MPI_Bcast(&n,1,MPI_INT,iproc,world); MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world); for (ix = 0; ix < nx; ix++) { for (iy = 0; iy < ny; iy++) { for (iz = 0; iz < nz; iz++) { // while loop over one proc's atom list m = 0; while (m < n) { image = ((imageint) IMGMAX << IMG2BITS) | ((imageint) IMGMAX << IMGBITS) | IMGMAX; if (triclinic == 0) { x[0] = buf[m+1] + ix*old_xprd; x[1] = buf[m+2] + iy*old_yprd; x[2] = buf[m+3] + iz*old_zprd; } else { x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz; x[1] = buf[m+2] + iy*old_yprd + iz*old_yz; x[2] = buf[m+3] + iz*old_zprd; } domain->remap(x,image); if (triclinic) { domain->x2lamda(x,lamda); coord = lamda; } else coord = x; if (coord[0] >= sublo[0] && coord[0] < subhi[0] && coord[1] >= sublo[1] && coord[1] < subhi[1] && coord[2] >= sublo[2] && coord[2] < subhi[2]) { m += avec->unpack_restart(&buf[m]); i = atom->nlocal - 1; if (tag_enable) atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; else atom_offset = 0; mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; atom->x[i][0] = x[0]; atom->x[i][1] = x[1]; atom->x[i][2] = x[2]; atom->tag[i] += atom_offset; atom->image[i] = image; if (atom->molecular) { if (atom->molecule[i] > 0) atom->molecule[i] += mol_offset; if (atom->molecular == 1) { if (atom->avec->bonds_allow) for (j = 0; j < atom->num_bond[i]; j++) atom->bond_atom[i][j] += atom_offset; if (atom->avec->angles_allow) for (j = 0; j < atom->num_angle[i]; j++) { atom->angle_atom1[i][j] += atom_offset; atom->angle_atom2[i][j] += atom_offset; atom->angle_atom3[i][j] += atom_offset; } if (atom->avec->dihedrals_allow) for (j = 0; j < atom->num_dihedral[i]; j++) { atom->dihedral_atom1[i][j] += atom_offset; atom->dihedral_atom2[i][j] += atom_offset; atom->dihedral_atom3[i][j] += atom_offset; atom->dihedral_atom4[i][j] += atom_offset; } if (atom->avec->impropers_allow) for (j = 0; j < atom->num_improper[i]; j++) { atom->improper_atom1[i][j] += atom_offset; atom->improper_atom2[i][j] += atom_offset; atom->improper_atom3[i][j] += atom_offset; atom->improper_atom4[i][j] += atom_offset; } } } } else m += static_cast<int> (buf[m]); } } } } } // free communication buffer and old atom class memory->destroy(buf); delete old; // check that all atoms were assigned to procs bigint natoms; bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (me == 0) { if (screen) fprintf(screen," " BIGINT_FORMAT " atoms\n",natoms); if (logfile) fprintf(logfile," " BIGINT_FORMAT " atoms\n",natoms); } if (natoms != atom->natoms) error->all(FLERR,"Replicate did not assign all atoms correctly"); if (me == 0) { if (atom->nbonds) { if (screen) fprintf(screen," " BIGINT_FORMAT " bonds\n",atom->nbonds); if (logfile) fprintf(logfile," " BIGINT_FORMAT " bonds\n",atom->nbonds); } if (atom->nangles) { if (screen) fprintf(screen," " BIGINT_FORMAT " angles\n", atom->nangles); if (logfile) fprintf(logfile," " BIGINT_FORMAT " angles\n", atom->nangles); } if (atom->ndihedrals) { if (screen) fprintf(screen," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); if (logfile) fprintf(logfile," " BIGINT_FORMAT " dihedrals\n", atom->ndihedrals); } if (atom->nimpropers) { if (screen) fprintf(screen," " BIGINT_FORMAT " impropers\n", atom->nimpropers); if (logfile) fprintf(logfile," " BIGINT_FORMAT " impropers\n", atom->nimpropers); } } // check that atom IDs are valid atom->tag_check(); // create global mapping of atoms if (atom->map_style) { atom->map_init(); atom->map_set(); } // create special bond lists for molecular systems if (atom->molecular == 1) { Special special(lmp); special.build(); } }
void CommCuda::borders_cuda() { int i,n,itype,iswap,dim,ineed,maxneed,smax,rmax; int nsend,nrecv,nfirst,nlast,ngroup; double lo,hi; int *type; double **x; double *buf,*mlo,*mhi; MPI_Request request; MPI_Status status; AtomVec *avec = atom->avec; timespec time1,time2,time3; // clear old ghosts atom->nghost = 0; // do swaps over all 3 dimensions iswap = 0; smax = rmax = 0; cuda->shared_data.comm.nsend=0; for (dim = 0; dim < 3; dim++) { nlast = 0; maxneed = 2*need[dim]; for (ineed = 0; ineed < maxneed; ineed++) { // find atoms within slab boundaries lo/hi using <= and >= // check atoms between nfirst and nlast // for first swaps in a dim, check owned and ghost // for later swaps in a dim, only check newly arrived ghosts // store sent atom indices in list for use in future timesteps x = atom->x; if (style == SINGLE) { lo = slablo[iswap]; hi = slabhi[iswap]; } else { type = atom->type; mlo = multilo[iswap]; mhi = multihi[iswap]; } if (ineed % 2 == 0) { nfirst = nlast; nlast = atom->nlocal + atom->nghost; } nsend = 0; // find send atoms according to SINGLE vs MULTI // all atoms eligible versus atoms in bordergroup // only need to limit loop to bordergroup for first sends (ineed < 2) // on these sends, break loop in two: owned (in group) and ghost do { if(nsend>=maxsendlist[iswap]) grow_list(iswap,static_cast <int> (nsend*1.05)); nsend=Cuda_CommCuda_BuildSendlist(&cuda->shared_data,bordergroup,ineed,style==SINGLE?1:0,atom->nfirst,nfirst,nlast,dim,iswap); }while(nsend>=maxsendlist[iswap]); // pack up list of border atoms if (nsend*size_border > maxsend) grow_send(nsend*size_border,0); if (ghost_velocity) n = avec->pack_border_vel(nsend,&iswap,buf_send, pbc_flag[iswap],pbc[iswap]); else n = avec->pack_border(nsend,&iswap,buf_send, pbc_flag[iswap],pbc[iswap]); // swap atoms with other proc // put incoming ghosts at end of my atom arrays // if swapping with self, simply copy, no messages clock_gettime(CLOCK_REALTIME,&time1); if (sendproc[iswap] != me) { MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0, &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status); if (nrecv*size_border > maxrecv) grow_recv(nrecv*size_border); MPI_Irecv(buf_recv,nrecv*size_border,MPI_DOUBLE, recvproc[iswap],0,world,&request); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); MPI_Wait(&request,&status); buf = buf_recv; } else { nrecv = nsend; buf = buf_send; } clock_gettime(CLOCK_REALTIME,&time2); cuda->shared_data.cuda_timings.comm_border_mpi+= time2.tv_sec-time1.tv_sec+1.0*(time2.tv_nsec-time1.tv_nsec)/1000000000; // unpack buffer if (ghost_velocity) avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf); else avec->unpack_border(nrecv,atom->nlocal+atom->nghost,buf); // set all pointers & counters smax = MAX(smax,nsend); rmax = MAX(rmax,nrecv); sendnum[iswap] = nsend; recvnum[iswap] = nrecv; size_forward_recv[iswap] = nrecv*size_forward; size_reverse_send[iswap] = nrecv*size_reverse; size_reverse_recv[iswap] = nsend*size_reverse; firstrecv[iswap] = atom->nlocal + atom->nghost; atom->nghost += nrecv; iswap++; } } // insure send/recv buffers are long enough for all forward & reverse comm int max = MAX(maxforward*smax,maxreverse*rmax); if (max > maxsend) grow_send(max,0); max = MAX(maxforward*rmax,maxreverse*smax); if (max > maxrecv) grow_recv(max); // reset global->local map if(map_style) { cuda->cu_tag->download(); atom->map_set(); } cuda->setSystemParams(); cuda->shared_data.atom.nghost+=n; }
void WriteRestart::write(char *file) { // special case where reneighboring is not done in integrator // on timestep restart file is written (due to build_once being set) // if box is changing, must be reset, else restart file will have // wrong box size and atoms will be lost when restart file is read // other calls to pbc and domain and comm are not made, // b/c they only make sense if reneighboring is actually performed if (neighbor->build_once) domain->reset_box(); // natoms = sum of nlocal = value to write into restart file // if unequal and thermo lostflag is "error", don't write restart file bigint nblocal = atom->nlocal; MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world); if (natoms != atom->natoms && output->thermo->lostflag == ERROR) error->all(FLERR,"Atom count is inconsistent, cannot write restart file"); // open single restart file or base file for multiproc case if (me == 0) { char *hfile; if (multiproc) { hfile = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); *ptr = '\0'; sprintf(hfile,"%s%s%s",file,"base",ptr+1); *ptr = '%'; } else hfile = file; fp = fopen(hfile,"wb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",hfile); error->one(FLERR,str); } if (multiproc) delete [] hfile; } // proc 0 writes magic string, endian flag, numeric version if (me == 0) { magic_string(); endian(); version_numeric(); } // proc 0 writes header, groups, pertype info, force field info if (me == 0) { header(); group->write_restart(fp); type_arrays(); force_fields(); } // all procs write fix info modify->write_restart(fp); // communication buffer for my atom info // max_size = largest buffer needed by any proc int max_size; int send_size = atom->avec->size_restart(); MPI_Allreduce(&send_size,&max_size,1,MPI_INT,MPI_MAX,world); double *buf; memory->create(buf,max_size,"write_restart:buf"); // all procs write file layout info which may include per-proc sizes file_layout(send_size); // header info is complete // if multiproc output: // close header file, open multiname file on each writing proc, // write PROCSPERFILE into new file if (multiproc) { if (me == 0) fclose(fp); char *multiname = new char[strlen(file) + 16]; char *ptr = strchr(file,'%'); *ptr = '\0'; sprintf(multiname,"%s%d%s",file,icluster,ptr+1); *ptr = '%'; if (filewriter) { fp = fopen(multiname,"wb"); if (fp == NULL) { char str[128]; sprintf(str,"Cannot open restart file %s",multiname); error->one(FLERR,str); } write_int(PROCSPERFILE,nclusterprocs); } delete [] multiname; } // pack my atom data into buf AtomVec *avec = atom->avec; int n = 0; for (int i = 0; i < atom->nlocal; i++) n += avec->pack_restart(i,&buf[n]); // if any fix requires it, remap each atom's coords via PBC // is because fix changes atom coords (excepting an integrate fix) // just remap in buffer, not actual atoms if (modify->restart_pbc_any) { int triclinic = domain->triclinic; double *lo,*hi,*period; if (triclinic == 0) { lo = domain->boxlo; hi = domain->boxhi; period = domain->prd; } else { lo = domain->boxlo_lamda; hi = domain->boxhi_lamda; period = domain->prd_lamda; } int xperiodic = domain->xperiodic; int yperiodic = domain->yperiodic; int zperiodic = domain->zperiodic; double *x; int m = 0; for (int i = 0; i < atom->nlocal; i++) { x = &buf[m+1]; if (triclinic) domain->x2lamda(x,x); if (xperiodic) { if (x[0] < lo[0]) x[0] += period[0]; if (x[0] >= hi[0]) x[0] -= period[0]; x[0] = MAX(x[0],lo[0]); } if (yperiodic) { if (x[1] < lo[1]) x[1] += period[1]; if (x[1] >= hi[1]) x[1] -= period[1]; x[1] = MAX(x[1],lo[1]); } if (zperiodic) { if (x[2] < lo[2]) x[2] += period[2]; if (x[2] >= hi[2]) x[2] -= period[2]; x[2] = MAX(x[2],lo[2]); } if (triclinic) domain->lamda2x(x,x); m += static_cast<int> (buf[m]); } } // MPI-IO output to single file if (mpiioflag) { if (me == 0) fclose(fp); mpiio->openForWrite(file); mpiio->write(headerOffset,send_size,buf); mpiio->close(); } // output of one or more native files // filewriter = 1 = this proc writes to file // ping each proc in my cluster, receive its data, write data to file // else wait for ping from fileproc, send my data to fileproc else { int tmp,recv_size; MPI_Status status; MPI_Request request; if (filewriter) { for (int iproc = 0; iproc < nclusterprocs; iproc++) { if (iproc) { MPI_Irecv(buf,max_size,MPI_DOUBLE,me+iproc,0,world,&request); MPI_Send(&tmp,0,MPI_INT,me+iproc,0,world); MPI_Wait(&request,&status); MPI_Get_count(&status,MPI_DOUBLE,&recv_size); } else recv_size = send_size; write_double_vec(PERPROC,recv_size,buf); } fclose(fp); } else { MPI_Recv(&tmp,0,MPI_INT,fileproc,0,world,&status); MPI_Rsend(buf,send_size,MPI_DOUBLE,fileproc,0,world); } } // clean up memory->destroy(buf); // invoke any fixes that write their own restart file for (int ifix = 0; ifix < modify->nfix; ifix++) if (modify->fix[ifix]->restart_file) modify->fix[ifix]->write_restart_file(file); }