void Teuchos::updateParametersFromXmlFileAndBroadcast( const std::string &xmlFileName, const Ptr<ParameterList> ¶mList, const Comm<int> &comm ) { if (comm.getSize()==1) updateParametersFromXmlFile(xmlFileName, paramList); else { if (comm.getRank()==0) { XMLParameterListReader xmlPLReader; xmlPLReader.setAllowsDuplicateSublists( false ); FileInputSource xmlFile(xmlFileName); XMLObject xmlParams = xmlFile.getObject(); std::string xmlString = toString(xmlParams); int strsize = xmlString.size(); broadcast<int, int>(comm, 0, &strsize); broadcast<int, char>(comm, 0, strsize, &xmlString[0]); updateParametersFromXmlString(xmlString, paramList); } else { int strsize; broadcast<int, int>(comm, 0, &strsize); std::string xmlString; xmlString.resize(strsize); broadcast<int, char>(comm, 0, strsize, &xmlString[0]); updateParametersFromXmlString(xmlString, paramList); } } }
void PotJMEAMSpline::fast_compute_densities(const Comm& comm, Vector<double> *density_ptr) { // Untrap procs if necessary if (is_trapped_) { int flag = 4; comm.bcast(&flag, 1, MPI_INT, comm.get_root()); } // Initialize potential on all procs initialize_pot(comm); // Setup local variables for potential function std::vector<T *> rho_fns; for (Basis*& fn : pot_fns_[1].fns) rho_fns.push_back(static_cast<T *>(fn)); std::vector<T *> f_fns; for (Basis*& fn : pot_fns_[3].fns) f_fns.push_back(static_cast<T *>(fn)); std::vector<T *> g_fns; for (Basis*& fn : pot_fns_[4].fns) g_fns.push_back(static_cast<T *>(fn)); // Make list of densities for each atom int natoms = mmz->config->total_natoms; Vector<double> densities(natoms, 0.0); // Loop over all atoms in atomvec for (Atom*& atom_i_ptr : mmz->atomvec->atoms) { AtomJMEAMSpline &atom_i = *(static_cast<AtomJMEAMSpline *>(atom_i_ptr)); // tmp atom for (Pair*& pair_ij_ptr : atom_i.pairs) { PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(pair_ij_ptr)); // tmp pair if (pair_ij.rho_knot != -1) // pair distance is inside density function densities[atom_i.global_idx] += rho_fns[pair_ij.rho_idx]->T::splint(pair_ij.rho_knot, pair_ij.rho_shift); if (pair_ij.f_knot != -1) // radial distance inside f-potential pair_ij.f = f_fns[pair_ij.f_idx]->T::splint(pair_ij.f_knot, pair_ij.f_shift); else pair_ij.f = 0.0; } // END LOOP OVER PAIRS for (Triplet*& triplet_ijk_ptr : atom_i.triplets) { TripletJMEAMSpline &triplet_ijk = *(static_cast<TripletJMEAMSpline *>(triplet_ijk_ptr)); // tmp triplet PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ij)); // tmp pairs PairJMEAMSpline &pair_ik = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ik)); double g_val = g_fns[triplet_ijk.g_idx]->T::splint(triplet_ijk.g_knot, triplet_ijk.g_shift); densities[atom_i.global_idx] += pair_ij.f * pair_ik.f * g_val; } // END LOOP OVER TRIPLETS } // END LOOP OVER ATOMS // Gather up densities from all procs std::vector<double> densities_final(natoms, 0.0); comm.reduce(&densities[0], &densities_final[0], natoms, MPI_DOUBLE, MPI_SUM, comm.get_root()); if (comm.is_root()) density_ptr->swap(densities_final); }
void s4u::Actor::send(Mailbox &chan, void *payload, size_t simulatedSize) { Comm c = Comm::send_init(this,chan); c.setRemains(simulatedSize); c.setSrcData(payload); // c.start() is optional. c.wait(); }
void solve(Operator& opA, Vector& x, Vector& b, Comm& comm) const { Dune::InverseOperatorResult result; // Parallel version is deactivated until we figure out how to do it properly. #if HAVE_MPI if (parallelInformation_.type() == typeid(ParallelISTLInformation)) { const size_t size = opA.getmat().N(); const ParallelISTLInformation& info = boost::any_cast<const ParallelISTLInformation&>( parallelInformation_); // As we use a dune-istl with block size np the number of components // per parallel is only one. info.copyValuesTo(comm.indexSet(), comm.remoteIndices(), size, 1); // Construct operator, scalar product and vectors needed. constructPreconditionerAndSolve<Dune::SolverCategory::overlapping>(opA, x, b, comm, result); } else #endif { OPM_THROW(std::logic_error,"this method if for parallel solve only"); } checkConvergence( result ); }
void *s4u::Actor::recv(Mailbox &chan) { void *res=NULL; Comm c = Comm::recv_init(this, chan); c.setDstData(&res,sizeof(res)); c.wait(); return res; }
bool CommConfigurable::run(Messages & messages, Feedback & feedback) { // Run all the subclasses bool result = false; std::vector<Comm *>::iterator iter; for (iter = comm.begin(); iter != comm.end(); iter++) { Comm *subclass = (Comm *)(*iter); if (subclass->run(messages, feedback)) { result = true; } } return result; }
void mergeCounterNames (const Comm<int>& comm, const Array<std::string>& localNames, Array<std::string>& globalNames, const ECounterSetOp setOp) { const int myRank = comm.getRank(); const int left = 0; const int right = comm.getSize() - 1; Array<std::string> theGlobalNames; mergeCounterNamesHelper (comm, myRank, left, right, localNames, theGlobalNames, setOp); // Proc 0 has the list of counter names. Now broadcast it back to // all the procs. broadcastStrings (comm, theGlobalNames); // "Transactional" semantics ensure strong exception safety for // output. globalNames.swap (theGlobalNames); }
MachineRepresentation(const Comm<int> &comm): networkDim(0), numProcs(comm.getSize()), myRank(comm.getRank()), procCoords(NULL) { // WIll need this constructor to be specific to RAAMP (MD). // Will need a default constructor using, e.g., GeometricGenerator // or nothing at all, for when RAAMP is not available as TPL. // // (AG) In addition, need to be able to run without special // privileges in system (e.g., on hopper). // Notes: For now, all cores connected to same NIC will get the // same coordinates; later, we could add extra coordinate dimensions // to represent nodes or dies (using hwloc info through RAAMP // data object). // (MD) will modify mapping test to use machine representation // #ifdef HAVE_ZOLTAN2_OVIS // Call initializer for RAAMP data object (AG) //get network dimension. //TODO change. // Call RAAMP Data Object to get the network dimension (AG) networkDim = 3; //allocate memory for processor coordinates. procCoords = new nCoord_t *[networkDim]; for (int i = 0; i < networkDim; ++i){ procCoords[i] = new nCoord_t [numProcs]; memset (procCoords[i], 0, sizeof(nCoord_t) * numProcs); } //obtain the coordinate of the processor. this->getMyCoordinate(/*nCoord_t &xyz[networkDim]*/); // copy xyz into appropriate spot in procCoords. (MD) // KDD I agree with this //reduceAll the coordinates of each processor. this->gatherMachineCoordinates(); }
int main() { controls.setPC(comm.getPC()); controls.setup(); // comm.printPosition(); comm.printGains(); controlsInterrupt.attach_us(&controls, &Controls::loop, 1000); while(1) { controls.updateIMUS(); comm.check(); if (serialCounter++>100) { // comm.printPosition(); // comm.getPC()->printf("%f\n", controls.getTheta1()); // comm.getPC()->printf("%f", controls.motor.getPWM()); serialCounter = 0; // float z[4] = {1,2,0,0}; // comm.getPC()->printf("%f\n",controls.target.getTheta2ForTarget(z)); } } }
int init_simulator() { srand(time(NULL)); for(int i=0; i<rand()&0xff; i++) rand32(); InitializeCriticalSection(&cs); comm.add_callback(remote_OnEvent); CreateThread(NULL, NULL, update_state, NULL, NULL, NULL); CreateThread(NULL, NULL, update_controller, NULL, NULL, NULL); CreateThread(NULL, NULL, update_stick, NULL, NULL, NULL); return 0; }
int PotGMEAM::rescale_3body(const Comm& comm, std::ostream *out, int flag) { if (!flag) return 0; // Don't run rescaling if flag is 0 int ntypes = mmz->potlist->get_ntypes(); int f_idx = 0; for (Basis*& f_fn : pot_fns_[3].fns) { double max_f_mag = f_fn->get_max_y_mag(); double b = 1.0/max_f_mag; // Scale f-pot *f_fn *= b; // Scale g-pot std::vector<Basis *> g_fns = pot_fns_[4].fns; for (int i=0; i<ntypes; ++i) { for (int j=0; j<ntypes; ++j) { for (int k=j; k<ntypes; ++k) { int ij_idx = pot_fns_[3].get_2body_alloy_idx(i, j); int ik_idx = pot_fns_[3].get_2body_alloy_idx(i, k); int ijk_idx = pot_fns_[4].get_3body_alloy_idx(i, j, k); if (f_idx == ij_idx) *g_fns[ijk_idx] /= b; if (f_idx == ik_idx) *g_fns[ijk_idx] /= b; } } } // Output scaling factor to screen if (comm.is_root() && out) *out << "MEAM potential scaling factor (b_" << f_idx << ") " << std::fixed << b << std::endl; ++f_idx; } return 1; }
void prepareSolver(Operator& wellOpA, Comm& comm) { Vector& istlb = *(this->rhs_); comm.copyOwnerToAll(istlb, istlb); const double relax = this->parameters_.ilu_relaxation_; const MILU_VARIANT ilu_milu = this->parameters_.ilu_milu_; // TODO: revise choice of parameters // int coarsenTarget = 4000; int coarsenTarget = 1200; Criterion criterion(15, coarsenTarget); criterion.setDebugLevel( this->parameters_.cpr_solver_verbose_ ); // no debug information, 1 for printing hierarchy information criterion.setDefaultValuesIsotropic(2); criterion.setNoPostSmoothSteps( 1 ); criterion.setNoPreSmoothSteps( 1 ); //new guesses by hmbn //criterion.setAlpha(0.01); // criterion for connection strong 1/3 is default //criterion.setMaxLevel(2); // //criterion.setGamma(1); // //1 V cycle 2 WW // Since DUNE 2.2 we also need to pass the smoother args instead of steps directly using AmgType = typename std::conditional<std::is_same<Comm, Dune::Amg::SequentialInformation>::value, BlackoilAmgType, ParallelBlackoilAmgType>::type; using SpType = typename std::conditional<std::is_same<Comm, Dune::Amg::SequentialInformation>::value, Dune::SeqScalarProduct<Vector>, ParallelScalarProduct >::type; using OperatorType = typename std::conditional<std::is_same<Comm, Dune::Amg::SequentialInformation>::value, MatrixAdapter, ParallelMatrixAdapter>::type; typedef typename AmgType::Smoother Smoother; typedef typename Dune::Amg::SmootherTraits<Smoother>::Arguments SmootherArgs; SmootherArgs smootherArgs; smootherArgs.iterations = 1; smootherArgs.relaxationFactor = relax; const Opm::CPRParameter& params(this->parameters_); // strange conversion ISTLUtility::setILUParameters(smootherArgs, ilu_milu); auto& opARef = reinterpret_cast<OperatorType&>(*opA_); int newton_iteration = this->simulator_.model().newtonMethod().numIterations(); bool update_preconditioner = false; if (this->parameters_.cpr_reuse_setup_ < 1) { update_preconditioner = true; } if (this->parameters_.cpr_reuse_setup_ < 2) { if (newton_iteration < 1) { update_preconditioner = true; } } if (this->parameters_.cpr_reuse_setup_ < 3) { if (this->iterations() > 10) { update_preconditioner = true; } } if ( update_preconditioner or (amg_== 0) ) { amg_.reset( new AmgType( params, this->weights_, opARef, criterion, smootherArgs, comm ) ); } else { if (this->parameters_.cpr_solver_verbose_) { std::cout << " Only update amg solver " << std::endl; } reinterpret_cast<AmgType*>(amg_.get())->updatePreconditioner(opARef, smootherArgs, comm); } // Solve. //SuperClass::solve(linearOperator, x, istlb, *sp, *amg, result); //references seems to do something els than refering int verbosity_linsolve = 0; if (comm.communicator().rank() == 0) { verbosity_linsolve = this->parameters_.linear_solver_verbosity_; } linsolve_.reset(new Dune::BiCGSTABSolver<Vector>(wellOpA, reinterpret_cast<SpType&>(*sp_), reinterpret_cast<AmgType&>(*amg_), this->parameters_.linear_solver_reduction_, this->parameters_.linear_solver_maxiter_, verbosity_linsolve)); }
int main(int argc, char** argv) { In in; in.datafile = NULL; int me = 0; //local MPI rank int nprocs = 1; //number of MPI ranks int num_threads = 1; //number of OpenMP threads int num_steps = -1; //number of timesteps (if -1 use value from lj.in) int system_size = -1; //size of the system (if -1 use value from lj.in) int nx = -1; int ny = -1; int nz = -1; int check_safeexchange = 0; //if 1 complain if atom moves further than 1 subdomain length between exchanges int do_safeexchange = 0; //if 1 use safe exchange mode [allows exchange over multiple subdomains] int use_sse = 0; //setting for SSE variant of miniMD only int screen_yaml = 0; //print yaml output to screen also int yaml_output = 0; //print yaml output int halfneigh = 1; //1: use half neighborlist; 0: use full neighborlist; -1: use original miniMD version half neighborlist force int teams = 1; int device = 0; int neighbor_size = -1; char* input_file = NULL; int ghost_newton = 1; int sort = -1; for(int i = 0; i < argc; i++) { if((strcmp(argv[i], "-i") == 0) || (strcmp(argv[i], "--input_file") == 0)) { input_file = argv[++i]; continue; } } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); int error = 0; if(input_file == NULL) error = input(in, "in.lj.miniMD"); else error = input(in, input_file); if(error) { MPI_Finalize(); exit(0); } for(int i = 0; i < argc; i++) { if((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--num_threads") == 0)) { num_threads = atoi(argv[++i]); continue; } if((strcmp(argv[i], "--teams") == 0)) { teams = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0)) { num_steps = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--size") == 0)) { system_size = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-nx") == 0)) { nx = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-ny") == 0)) { ny = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-nz") == 0)) { nz = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-b") == 0) || (strcmp(argv[i], "--neigh_bins") == 0)) { neighbor_size = atoi(argv[++i]); continue; } if((strcmp(argv[i], "--half_neigh") == 0)) { halfneigh = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-sse") == 0)) { use_sse = atoi(argv[++i]); continue; } if((strcmp(argv[i], "--check_exchange") == 0)) { check_safeexchange = 1; continue; } if((strcmp(argv[i], "--sort") == 0)) { sort = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-o") == 0) || (strcmp(argv[i], "--yaml_output") == 0)) { yaml_output = atoi(argv[++i]); continue; } if((strcmp(argv[i], "--yaml_screen") == 0)) { screen_yaml = 1; continue; } if((strcmp(argv[i], "-f") == 0) || (strcmp(argv[i], "--data_file") == 0)) { if(in.datafile == NULL) in.datafile = new char[1000]; strcpy(in.datafile, argv[++i]); continue; } if((strcmp(argv[i], "-u") == 0) || (strcmp(argv[i], "--units") == 0)) { in.units = strcmp(argv[++i], "metal") == 0 ? 1 : 0; continue; } if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--force") == 0)) { in.forcetype = strcmp(argv[++i], "eam") == 0 ? FORCEEAM : FORCELJ; continue; } if((strcmp(argv[i], "-gn") == 0) || (strcmp(argv[i], "--ghost_newton") == 0)) { ghost_newton = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { printf("\n-----------------------------------------------------------------------------------------------------------\n"); printf("-------------" VARIANT_STRING "--------------------\n"); printf("-------------------------------------------------------------------------------------------------------------\n\n"); printf("miniMD is a simple, parallel molecular dynamics (MD) code,\n" "which is part of the Mantevo project at Sandia National\n" "Laboratories ( http://www.mantevo.org ).\n" "The original authors of miniMD are Steve Plimpton ([email protected]) ,\n" "Paul Crozier ([email protected]) with current\n" "versions written by Christian Trott ([email protected]).\n\n"); printf("Commandline Options:\n"); printf("\n Execution configuration:\n"); printf("\t--teams <nteams>: set number of thread-teams used per MPI rank (default 1)\n"); printf("\t-t / --num_threads <threads>: set number of threads per thread-team (default 1)\n"); printf("\t--half_neigh <int>: use half neighborlists (default 1)\n" "\t 0: full neighborlist\n" "\t 1: half neighborlist\n" "\t -1: original miniMD half neighborlist force (not OpenMP safe)\n"); printf("\t-d / --device <int>: choose device to use (only applicable for GPU execution)\n"); printf("\t-dm / --device_map: map devices to MPI ranks\n"); printf("\t-ng / --num_gpus <int>: give number of GPUs per Node (used in conjuction with -dm\n" "\t to determine device id: 'id=mpi_rank%%ng' (default 2)\n"); printf("\t--skip_gpu <int>: skip the specified gpu when assigning devices to MPI ranks\n" "\t used in conjunction with -dm (but must come first in arg list)\n"); printf("\t-sse <sse_version>: use explicit sse intrinsics (use miniMD-SSE variant)\n"); printf("\t-gn / --ghost_newton <int>: set usage of newtons third law for ghost atoms\n" "\t (only applicable with half neighborlists)\n"); printf("\n Simulation setup:\n"); printf("\t-i / --input_file <string>: set input file to be used (default: in.lj.miniMD)\n"); printf("\t-n / --nsteps <int>: set number of timesteps for simulation\n"); printf("\t-s / --size <int>: set linear dimension of systembox\n"); printf("\t-nx/-ny/-nz <int>: set linear dimension of systembox in x/y/z direction\n"); printf("\t-b / --neigh_bins <int>: set linear dimension of neighbor bin grid\n"); printf("\t-u / --units <string>: set units (lj or metal), see LAMMPS documentation\n"); printf("\t-p / --force <string>: set interaction model (lj or eam)\n"); printf("\t-f / --data_file <string>: read configuration from LAMMPS data file\n"); printf("\n Miscelaneous:\n"); printf("\t--check_exchange: check whether atoms moved further than subdomain width\n"); printf("\t--safe_exchange: perform exchange communication with all MPI processes\n" "\t within rcut_neighbor (outer force cutoff)\n"); printf("\t--sort <n>: resort atoms (simple bins) every <n> steps (default: use reneigh frequency; never=0)"); printf("\t-o / --yaml_output <int>: level of yaml output (default 1)\n"); printf("\t--yaml_screen: write yaml output also to screen\n"); printf("\t-h / --help: display this help message\n\n"); printf("---------------------------------------------------------\n\n"); exit(0); } } Atom atom; Neighbor neighbor; Integrate integrate; Thermo thermo; Comm comm; Timer timer; ThreadData threads; Force* force; if(in.forcetype == FORCEEAM) { force = (Force*) new ForceEAM(); if(ghost_newton == 1) { if(me == 0) printf("# EAM currently requires '--ghost_newton 0'; Changing setting now.\n"); ghost_newton = 0; } } if(in.forcetype == FORCELJ) force = (Force*) new ForceLJ(); threads.mpi_me = me; threads.mpi_num_threads = nprocs; threads.omp_me = 0; threads.omp_num_threads = num_threads; atom.threads = &threads; comm.threads = &threads; force->threads = &threads; integrate.threads = &threads; neighbor.threads = &threads; thermo.threads = &threads; force->epsilon = in.epsilon; force->sigma = in.sigma; force->sigma6 = in.sigma*in.sigma*in.sigma*in.sigma*in.sigma*in.sigma; neighbor.ghost_newton = ghost_newton; omp_set_num_threads(num_threads); neighbor.timer = &timer; force->timer = &timer; comm.check_safeexchange = check_safeexchange; comm.do_safeexchange = do_safeexchange; force->use_sse = use_sse; neighbor.halfneigh = halfneigh; if(halfneigh < 0) force->use_oldcompute = 1; if(use_sse) { #ifdef VARIANT_REFERENCE if(me == 0) printf("ERROR: Trying to run with -sse with miniMD reference version. Use SSE variant instead. Exiting.\n"); MPI_Finalize(); exit(0); #endif } if(num_steps > 0) in.ntimes = num_steps; if(system_size > 0) { in.nx = system_size; in.ny = system_size; in.nz = system_size; } if(nx > 0) { in.nx = nx; if(ny > 0) in.ny = ny; else if(system_size < 0) in.ny = nx; if(nz > 0) in.nz = nz; else if(system_size < 0) in.nz = nx; } if(neighbor_size > 0) { neighbor.nbinx = neighbor_size; neighbor.nbiny = neighbor_size; neighbor.nbinz = neighbor_size; } if(neighbor_size < 0 && in.datafile == NULL) { MMD_float neighscale = 5.0 / 6.0; neighbor.nbinx = neighscale * in.nx; neighbor.nbiny = neighscale * in.ny; neighbor.nbinz = neighscale * in.nz; } if(neighbor_size < 0 && in.datafile) neighbor.nbinx = -1; if(neighbor.nbinx == 0) neighbor.nbinx = 1; if(neighbor.nbiny == 0) neighbor.nbiny = 1; if(neighbor.nbinz == 0) neighbor.nbinz = 1; integrate.ntimes = in.ntimes; integrate.dt = in.dt; integrate.sort_every = sort>0?sort:(sort<0?in.neigh_every:0); neighbor.every = in.neigh_every; neighbor.cutneigh = in.neigh_cut; force->cutforce = in.force_cut; thermo.nstat = in.thermo_nstat; if(me == 0) printf("# Create System:\n"); if(in.datafile) { read_lammps_data(atom, comm, neighbor, integrate, thermo, in.datafile, in.units); MMD_float volume = atom.box.xprd * atom.box.yprd * atom.box.zprd; in.rho = 1.0 * atom.natoms / volume; force->setup(); if(in.forcetype == FORCEEAM) atom.mass = force->mass; } else { create_box(atom, in.nx, in.ny, in.nz, in.rho); comm.setup(neighbor.cutneigh, atom); neighbor.setup(atom); integrate.setup(); force->setup(); if(in.forcetype == FORCEEAM) atom.mass = force->mass; create_atoms(atom, in.nx, in.ny, in.nz, in.rho); thermo.setup(in.rho, integrate, atom, in.units); create_velocity(in.t_request, atom, thermo); } if(me == 0) printf("# Done .... \n"); if(me == 0) { fprintf(stdout, "# " VARIANT_STRING " output ...\n"); fprintf(stdout, "# Run Settings: \n"); fprintf(stdout, "\t# MPI processes: %i\n", neighbor.threads->mpi_num_threads); fprintf(stdout, "\t# OpenMP threads: %i\n", neighbor.threads->omp_num_threads); fprintf(stdout, "\t# Inputfile: %s\n", input_file == 0 ? "in.lj.miniMD" : input_file); fprintf(stdout, "\t# Datafile: %s\n", in.datafile ? in.datafile : "None"); fprintf(stdout, "# Physics Settings: \n"); fprintf(stdout, "\t# ForceStyle: %s\n", in.forcetype == FORCELJ ? "LJ" : "EAM"); fprintf(stdout, "\t# Force Parameters: %2.2lf %2.2lf\n",in.epsilon,in.sigma); fprintf(stdout, "\t# Units: %s\n", in.units == 0 ? "LJ" : "METAL"); fprintf(stdout, "\t# Atoms: %i\n", atom.natoms); fprintf(stdout, "\t# System size: %2.2lf %2.2lf %2.2lf (unit cells: %i %i %i)\n", atom.box.xprd, atom.box.yprd, atom.box.zprd, in.nx, in.ny, in.nz); fprintf(stdout, "\t# Density: %lf\n", in.rho); fprintf(stdout, "\t# Force cutoff: %lf\n", force->cutforce); fprintf(stdout, "\t# Timestep size: %lf\n", integrate.dt); fprintf(stdout, "# Technical Settings: \n"); fprintf(stdout, "\t# Neigh cutoff: %lf\n", neighbor.cutneigh); fprintf(stdout, "\t# Half neighborlists: %i\n", neighbor.halfneigh); fprintf(stdout, "\t# Neighbor bins: %i %i %i\n", neighbor.nbinx, neighbor.nbiny, neighbor.nbinz); fprintf(stdout, "\t# Neighbor frequency: %i\n", neighbor.every); fprintf(stdout, "\t# Sorting frequency: %i\n", integrate.sort_every); fprintf(stdout, "\t# Thermo frequency: %i\n", thermo.nstat); fprintf(stdout, "\t# Ghost Newton: %i\n", ghost_newton); fprintf(stdout, "\t# Use intrinsics: %i\n", force->use_sse); fprintf(stdout, "\t# Do safe exchange: %i\n", comm.do_safeexchange); fprintf(stdout, "\t# Size of float: %i\n\n", (int) sizeof(MMD_float)); } comm.exchange(atom); if(sort>0) atom.sort(neighbor); comm.borders(atom); force->evflag = 1; #pragma omp parallel { neighbor.build(atom); force->compute(atom, neighbor, comm, me); } if(neighbor.halfneigh && neighbor.ghost_newton) comm.reverse_communicate(atom); if(me == 0) printf("# Starting dynamics ...\n"); if(me == 0) printf("# Timestep T U P Time\n"); #pragma omp parallel { thermo.compute(0, atom, neighbor, force, timer, comm); } timer.barrier_start(TIME_TOTAL); integrate.run(atom, force, neighbor, comm, thermo, timer); timer.barrier_stop(TIME_TOTAL); int natoms; MPI_Allreduce(&atom.nlocal, &natoms, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); force->evflag = 1; force->compute(atom, neighbor, comm, me); if(neighbor.halfneigh && neighbor.ghost_newton) comm.reverse_communicate(atom); thermo.compute(-1, atom, neighbor, force, timer, comm); if(me == 0) { double time_other = timer.array[TIME_TOTAL] - timer.array[TIME_FORCE] - timer.array[TIME_NEIGH] - timer.array[TIME_COMM]; printf("\n\n"); printf("# Performance Summary:\n"); printf("# MPI_proc OMP_threads nsteps natoms t_total t_force t_neigh t_comm t_other performance perf/thread grep_string t_extra\n"); printf("%i %i %i %i %lf %lf %lf %lf %lf %lf %lf PERF_SUMMARY %lf\n\n\n", nprocs, num_threads, integrate.ntimes, natoms, timer.array[TIME_TOTAL], timer.array[TIME_FORCE], timer.array[TIME_NEIGH], timer.array[TIME_COMM], time_other, 1.0 * natoms * integrate.ntimes / timer.array[TIME_TOTAL], 1.0 * natoms * integrate.ntimes / timer.array[TIME_TOTAL] / nprocs / num_threads, timer.array[TIME_TEST]); } if(yaml_output) output(in, atom, force, neighbor, comm, thermo, integrate, timer, screen_yaml); delete force; MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return 0; }
double PotJMEAMSpline::fast_compute(const Comm& comm, ErrorVec *error_vec) { // Untrap procs if necessary if (is_trapped_) { if (error_vec) { int flag = 3; comm.bcast(&flag, 1, MPI_INT, comm.get_root()); } else { int flag = 2; comm.bcast(&flag, 1, MPI_INT, comm.get_root()); } } // Initialize potential on all procs initialize_pot(comm, error_vec); // Initialize potential by resetting forces initialize_compute(comm); // Setup local variables for potential functions std::vector<T *> phi_fns; for (Basis*& fn : pot_fns_[0].fns) phi_fns.push_back(static_cast<T *>(fn)); std::vector<T *> rho_fns; for (Basis*& fn : pot_fns_[1].fns) rho_fns.push_back(static_cast<T *>(fn)); std::vector<T *> F_fns; for (Basis*& fn : pot_fns_[2].fns) F_fns.push_back(static_cast<T *>(fn)); std::vector<T *> f_fns; for (Basis*& fn : pot_fns_[3].fns) f_fns.push_back(static_cast<T *>(fn)); std::vector<T *> g_fns; for (Basis*& fn : pot_fns_[4].fns) g_fns.push_back(static_cast<T *>(fn)); std::vector<T *> p_fns; for (Basis*& fn : pot_fns_[5].fns) p_fns.push_back(static_cast<T *>(fn)); std::vector<T *> q_fns; for (Basis*& fn : pot_fns_[6].fns) q_fns.push_back(static_cast<T *>(fn)); // Set up constraint error (error from density going out of bounds of embedding function) Vector<double> constraint_err(mmz->config->ncells,0.0); // Loop over all atoms in atomvec for (Atom*& atom_i_ptr : mmz->atomvec->atoms) { // Make temporary atom and cell AtomJMEAMSpline &atom_i = *(static_cast<AtomJMEAMSpline *>(atom_i_ptr)); Cell &cell = *mmz->config->cells[atom_i.cell_idx]; double rho_val = 0.0; // initialize density for this atom double dF = 0.0; // initialize gradient of embedding fn for this atom // Loop over pairs for this atom for (Pair*& pair_ij_ptr : atom_i.pairs) { PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(pair_ij_ptr)); // tmp pair // Check that neighbor length lies in pair potential radius if (pair_ij.phi_knot != -1) { AtomJMEAMSpline &atom_j = *(static_cast<AtomJMEAMSpline *>(pair_ij.neigh)); // tmp atom // Compute phi(r_ij) and its gradient in one step double phigrad; double phival = 0.5 * phi_fns[pair_ij.phi_idx]->T::splint_comb(pair_ij.phi_knot, pair_ij.phi_shift, &phigrad); phigrad *= 0.5; // only half of the gradient/energy contributes to the force/energy since we are double counting cell.energy += phival; // add in piece contributed by neighbor to energy Vect tmp_force = pair_ij.dist * phigrad; // compute tmp force values atom_i.force += tmp_force; // add in force on atom i from atom j atom_j.force -= tmp_force; // subtract off force on atom j from atom i (Newton's law: action = -reaction) // Compute stress on cell tmp_force *= pair_ij.r; cell.stress -= pair_ij.dist & tmp_force; } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR PAIR POTENTIAL // Check that neighbor length lies in rho potential (density function) radius if (pair_ij.rho_knot != -1) { // Compute density and its gradient in one step rho_val += rho_fns[pair_ij.rho_idx]->T::splint_comb(pair_ij.rho_knot, pair_ij.rho_shift, &pair_ij.drho); } else { pair_ij.drho = 0.0; } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR RHO POTENTIAL // Check that neighbor length lies in f- potential radius if (pair_ij.f_knot != -1) { pair_ij.f = f_fns[pair_ij.f_idx]->T::splint_comb(pair_ij.f_knot, pair_ij.f_shift, &pair_ij.df); } else { pair_ij.f = 0.0; pair_ij.df = 0.0; } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR f- POTENTIAL // Check that neighbor length lies in p- potential radius if (pair_ij.p_knot != -1) { pair_ij.p = p_fns[pair_ij.p_idx]->T::splint_comb(pair_ij.p_knot, pair_ij.p_shift, &pair_ij.dp); } else { pair_ij.p = 0.0; pair_ij.dp = 0.0; } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR p- POTENTIAL } // END LOOP OVER PAIRS // Loop over every angle formed by pairs called triplets for (Triplet*& triplet_ijk_ptr : atom_i.triplets) { TripletJMEAMSpline &triplet_ijk = *(static_cast<TripletJMEAMSpline *>(triplet_ijk_ptr)); // tmp triplet PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ij)); // tmp pairs PairJMEAMSpline &pair_ik = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ik)); // The cos(theta) should always lie inside -1 ... 1 // So store the g and g' without checking bounds triplet_ijk.g = g_fns[triplet_ijk.g_idx]->T::splint_comb(triplet_ijk.g_knot, triplet_ijk.g_shift, &triplet_ijk.dg); triplet_ijk.q = q_fns[triplet_ijk.q_idx]->T::splint_comb(triplet_ijk.q_knot, triplet_ijk.q_shift, &triplet_ijk.dq); // Sum up rho piece for atom i caused by j and k // f_ij * f_ik * g_ijk rho_val += pair_ij.f * pair_ik.f * triplet_ijk.g; cell.energy += pair_ij.p * pair_ik.p * triplet_ijk.q; } // END LOOP OVER TRIPLETS // Compute energy, gradient for embedding function F // Punish this potential for having rho lie outside of F if ( rho_val < F_fns[atom_i.F_idx]->get_min_rcut() ) { double rho_i = F_fns[atom_i.F_idx]->get_min_rcut(); constraint_err[atom_i.cell_idx] += cell.weight * DUMMY_WEIGHT * 10. * (rho_i - rho_val) * (rho_i - rho_val); if (!embed_extrap_) rho_val = rho_i; // set the density to the inner cutoff if we don't extrapolate embedding fn later } else if ( rho_val > F_fns[atom_i.F_idx]->get_max_rcut() ) { double rho_f = F_fns[atom_i.F_idx]->get_max_rcut(); constraint_err[atom_i.cell_idx] += cell.weight * DUMMY_WEIGHT * 10. * (rho_val - rho_f) * (rho_val - rho_f); if (!embed_extrap_) rho_val = rho_f; // set the density to the outer cutoff if we don't extrapolate embedding fn later } // Add energy contribution from embedding function and get gradient in one step cell.energy += F_fns[atom_i.F_idx]->T::splint_comb(rho_val, &dF); // Loop over pairs for this atom to compute EAM force for (Pair*& pair_ij_ptr : atom_i.pairs) { PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(pair_ij_ptr)); // tmp pair AtomJMEAMSpline &atom_j = *(static_cast<AtomJMEAMSpline *>(pair_ij.neigh)); // tmp atom Vect tmp_force = pair_ij.dist * pair_ij.drho * dF; // compute tmp force values atom_i.force += tmp_force; // add in force on atom i from atom j atom_j.force -= tmp_force; // subtract off force on atom j from atom i (Newton's law: action = -reaction) // Compute stress on cell tmp_force *= pair_ij.r; cell.stress -= pair_ij.dist & tmp_force; } // END 2nd LOOP OVER PAIRS // Loop over every angle formed by pairs called triplets for (Triplet*& triplet_ijk_ptr : atom_i.triplets) { TripletJMEAMSpline &triplet_ijk = *(static_cast<TripletJMEAMSpline *>(triplet_ijk_ptr)); // tmp triplet PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ij)); // tmp pairs PairJMEAMSpline &pair_ik = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ik)); AtomJMEAMSpline &atom_j = *(static_cast<AtomJMEAMSpline *>(pair_ij.neigh)); // tmp atoms AtomJMEAMSpline &atom_k = *(static_cast<AtomJMEAMSpline *>(pair_ik.neigh)); // Some tmp variables to clean up force fn below double dV3j = triplet_ijk.g * pair_ij.df * pair_ik.f * dF + triplet_ijk.q * pair_ij.dp * pair_ik.p; double dV3k = triplet_ijk.g * pair_ij.f * pair_ik.df * dF + triplet_ijk.q * pair_ij.p * pair_ik.dp; double V3 = triplet_ijk.dg * pair_ij.f * pair_ik.f * dF + triplet_ijk.dq * pair_ij.p * pair_ik.p; double vlj = V3 * pair_ij.invr; double vlk = V3 * pair_ik.invr; double vv3j = dV3j - vlj * triplet_ijk.cos; double vv3k = dV3k - vlk * triplet_ijk.cos; Vect dfj = pair_ij.dist * vv3j + pair_ik.dist * vlj; Vect dfk = pair_ik.dist * vv3k + pair_ij.dist * vlk; atom_i.force += dfj + dfk; // force on atom i from j and k atom_j.force -= dfj; // reaction force on atom j from i and k atom_k.force -= dfk; // reaction force on atom k from i and j // Compute stress on cell dfj *= pair_ij.r; dfk *= pair_ik.r; cell.stress -= pair_ij.dist & dfj; cell.stress -= pair_ik.dist & dfk; } // END LOOP OVER TRIPLETS } // END 1st LOOP OVER ATOMS accumulate_error(comm, error_vec, constraint_err); // Punishment for f-pot y-max magnitude not being 1.0 double max_f_mag = std::abs(pot_fns_[3].get_max_y_mag()); double f_pot_error = DUMMY_WEIGHT * 25. * (1.0 - max_f_mag) * (1.0 - max_f_mag); error_sum_ += f_pot_error * f_pot_error; if (error_vec && comm.is_root()) error_vec->push_back(f_pot_error); // Punishment for p-pot y-max magnitude not being 1.0 double max_p_mag = std::abs(pot_fns_[5].get_max_y_mag()); double p_pot_error = DUMMY_WEIGHT * 25. * (1.0 - max_p_mag) * (1.0 - max_p_mag); error_sum_ += p_pot_error * p_pot_error; if (error_vec && comm.is_root()) error_vec->push_back(p_pot_error); ++ncalls_; // keep track of the number of times this function is called return error_sum_; }
int main(int argc, char **argv) { //Common miniMD settings In in; in.datafile = NULL; int me=0; //local MPI rank int nprocs=1; //number of MPI ranks int num_threads=32; //number of Threads per Block threads int num_steps=-1; //number of timesteps (if -1 use value from lj.in) int system_size=-1; //size of the system (if -1 use value from lj.in) int check_safeexchange=0; //if 1 complain if atom moves further than 1 subdomain length between exchanges int do_safeexchange=0; //if 1 use safe exchange mode [allows exchange over multiple subdomains] int use_sse=0; //setting for SSE variant of miniMD only int screen_yaml=0; //print yaml output to screen also int yaml_output=0; //print yaml output int halfneigh=0; //1: use half neighborlist; 0: use full neighborlist; -1: use original miniMD version half neighborlist force char* input_file = NULL; int ghost_newton = 0; int skip_gpu = 999; int neighbor_size = -1; //OpenCL specific int platform = 0; int device = 0; int subdevice = -1; int ppn = 2; int use_tex = 0; int threads_per_atom = 1; int map_device=0; for(int i = 0; i < argc; i++) { if((strcmp(argv[i], "-i") == 0) || (strcmp(argv[i], "--input_file") == 0)) { input_file = argv[++i]; continue; } if((strcmp(argv[i],"-p")==0)||(strcmp(argv[i],"--platform")==0)) {platform=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-d")==0)||(strcmp(argv[i],"--device")==0)) {device=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-sd")==0)||(strcmp(argv[i],"--subdevice")==0)) {subdevice=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-sd_map")==0)||(strcmp(argv[i],"--subdevice_mapping")==0)) {subdevice=1-me%ppn; continue;} if((strcmp(argv[i],"-ng")==0)||(strcmp(argv[i],"--num_gpus")==0)) {ppn=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-dm")==0)||(strcmp(argv[i],"--device_map")==0)) {map_device=1; continue;} } MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if(map_device) {device = me%ppn; if(device>=skip_gpu) device++;} OpenCLWrapper* opencl = new OpenCLWrapper; if( me == 0) printf("# Platforms: %i\n",opencl->num_platforms); printf("# Proc: %i using device %i\n",me,device); opencl->Init(argc,argv,device,device+1,NULL,platform,subdevice); int error = 0; if(input_file == NULL) error = input(in, "in.lj.miniMD"); else error = input(in, input_file); if (error) { MPI_Finalize(); exit(0); } for(int i=0;i<argc;i++) { if((strcmp(argv[i],"-t")==0)||(strcmp(argv[i],"--num_threads")==0)) {num_threads=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-n")==0)||(strcmp(argv[i],"--nsteps")==0)) {num_steps=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-s")==0)||(strcmp(argv[i],"--size")==0)) {system_size=atoi(argv[++i]); continue;} if((strcmp(argv[i],"--half_neigh")==0)) {halfneigh=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-sse")==0)) {use_sse=atoi(argv[++i]); continue;} if((strcmp(argv[i],"--check_exchange")==0)) {check_safeexchange=1; continue;} if((strcmp(argv[i],"-o")==0)||(strcmp(argv[i],"--yaml_output")==0)) {yaml_output=atoi(argv[++i]); continue;} if((strcmp(argv[i],"--yaml_screen")==0)) {screen_yaml=1; continue;} if((strcmp(argv[i], "-f") == 0) || (strcmp(argv[i], "--data_file") == 0)) { if(in.datafile == NULL) in.datafile = new char[1000]; strcpy(in.datafile, argv[++i]); continue; } if((strcmp(argv[i], "-u") == 0) || (strcmp(argv[i], "--units") == 0)) { in.units = strcmp(argv[++i], "metal") == 0 ? 1 : 0; continue; } if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--force") == 0)) { in.forcetype = strcmp(argv[++i], "eam") == 0 ? FORCEEAM : FORCELJ; continue; } if((strcmp(argv[i], "-gn") == 0) || (strcmp(argv[i], "--ghost_newton") == 0)) { ghost_newton = atoi(argv[++i]); continue; } if((strcmp(argv[i], "--skip_gpu") == 0)) { skip_gpu = atoi(argv[++i]); continue; } if((strcmp(argv[i], "-b") == 0) || (strcmp(argv[i], "--neigh_bins") == 0)) { neighbor_size = atoi(argv[++i]); continue; } if((strcmp(argv[i],"-tex")==0)||(strcmp(argv[i],"--texture")==0)) {use_tex=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-tpa")==0)) {threads_per_atom=atoi(argv[++i]); continue;} if((strcmp(argv[i],"-h")==0)||(strcmp(argv[i],"--help")==0)) { printf("\n---------------------------------------------------------\n"); printf("-------------" VARIANT_STRING "------------\n"); printf("---------------------------------------------------------\n\n"); printf("miniMD is a simple, parallel molecular dynamics (MD) code,\n" "which is part of the Mantevo project at Sandia National\n" "Laboratories ( http://www.mantevo.org ).\n" "The original authors of MPI based miniMD are Steve Plimpton ([email protected]) ,\n" "Paul Crozier ([email protected]) with current versions \n" "written by Christian Trott ([email protected]).\n\n"); printf("Commandline Options:\n"); printf("\n Execution configuration:\n"); printf("\t-t / --num_threads <threads>: set number of threads per block (default 32)\n"); printf("\t--half_neigh <int>: use half neighborlists (default 0)\n" "\t 0: full neighborlist\n" "\t 1: half neighborlist (not supported in OpenCL variant)\n" "\t -1: original miniMD half neighborlist force \n" "\t (not supported in OpenCL variant)\n"); printf("\t-d / --device <int>: select device (default 0)\n"); printf("\t-dm / --device_map: map devices to MPI ranks\n"); printf("\t-ng / --num_gpus <int>: give number of GPUs per Node (used in conjuction with -dm\n" "\t to determine device id: 'id=mpi_rank%%ng' (default 2)\n"); printf("\t--skip_gpu <int>: skip the specified gpu when assigning devices to MPI ranks\n" "\t used in conjunction with -dm (but must come first in arg list)\n"); printf("\t-p / --platform <int>: select platform (default 0)\n"); printf("\t-sse <sse_version>: use explicit sse intrinsics (use miniMD-SSE variant)\n"); printf("\t-gn / --ghost_newton <int>: set usage of newtons third law for ghost atoms\n" "\t (only applicable with half neighborlists)\n"); printf("\n Simulation setup:\n"); printf("\t-i / --input_file <string>: set input file to be used (default: in.lj.miniMD)\n"); printf("\t-n / --nsteps <nsteps>: set number of timesteps for simulation\n"); printf("\t-s / --size <size>: set linear dimension of systembox and neighbor bins\n"); printf("\t-b / --neigh_bins <int>: set linear dimension of neighbor bin grid\n"); printf("\t-u / --units <string>: set units (lj or metal), see LAMMPS documentation\n"); printf("\t-p / --force <string>: set interaction model (lj or eam)\n"); printf("\t-f / --data_file <string>: read configuration from LAMMPS data file\n"); printf("\n Miscelaneous:\n"); printf("\t--check_exchange: check whether atoms moved further than subdomain width\n"); printf("\t--safe_exchange: perform exchange communication with all MPI processes\n" "\t within rcut_neighbor (outer force cutoff)\n"); printf("\t--yaml_output <int>: level of yaml output (default 0)\n"); printf("\t--yaml_screen: write yaml output also to screen\n"); printf("\t-tex / --texture <int>: use texture cache in force kernel (default 0)\n"); printf("\t-h / --help: display this help message\n\n"); printf("---------------------------------------------------------\n\n"); exit(0); } } Atom atom; Force force; Neighbor neighbor; Integrate integrate; Thermo thermo; Comm comm; Timer timer; ThreadData threads; if(in.forcetype == FORCEEAM) { printf("ERROR: " VARIANT_STRING " does not yet support EAM simulations. Exiting.\n"); MPI_Finalize(); exit(0); } if(ghost_newton!=0) { if(me ==0 ) printf("ERROR: -ghost_newton %i is not supported in " VARIANT_STRING ". Exiting.\n",ghost_newton); MPI_Finalize(); exit(0); } if(halfneigh!=0) { if(me ==0 ) printf("ERROR: -half_neigh %i is not supported in " VARIANT_STRING ". Exiting.\n",halfneigh); MPI_Finalize(); exit(0); } if(halfneigh!=0) { if(me ==0 ) printf("ERROR: -half_neigh %i is not supported in " VARIANT_STRING ". Exiting.\n",halfneigh); MPI_Finalize(); exit(0); } if(use_tex!=0) { if(me ==0 ) printf("ERROR: -tex %i is currently broken. Exiting.\n",use_tex); MPI_Finalize(); exit(0); } if(use_sse) { #ifndef VARIANT_SSE if(me ==0 ) printf("ERROR: Trying to run with -sse with miniMD reference version. Use SSE variant instead. Exiting.\n"); MPI_Finalize(); exit(0); #endif } threads.mpi_me=me; threads.mpi_num_threads=nprocs; threads.omp_me=0; threads.omp_num_threads=num_threads; atom.threads = &threads; comm.threads = &threads; force.threads = &threads; integrate.threads = &threads; neighbor.threads = &threads; thermo.threads = &threads; opencl->blockdim = num_threads; atom.threads_per_atom = threads_per_atom; atom.use_tex = use_tex; comm.do_safeexchange=do_safeexchange; force.use_sse=use_sse; neighbor.halfneigh=halfneigh; compile_kernels(opencl); integrate.opencl = opencl; force.opencl = opencl; neighbor.opencl = opencl; atom.opencl = opencl; comm.opencl = opencl; if(num_steps > 0) in.ntimes = num_steps; if(system_size > 0) { in.nx = system_size; in.ny = system_size; in.nz = system_size; } if(neighbor_size > 0) { neighbor.nbinx = neighbor_size; neighbor.nbiny = neighbor_size; neighbor.nbinz = neighbor_size; } if(neighbor_size < 0 && in.datafile == NULL) { MMD_float neighscale = 5.0 / 6.0; neighbor.nbinx = neighscale * in.nx; neighbor.nbiny = neighscale * in.ny; neighbor.nbinz = neighscale * in.ny; } if(neighbor_size < 0 && in.datafile) neighbor.nbinx = -1; if(neighbor.nbinx == 0) neighbor.nbinx = 1; if(neighbor.nbiny == 0) neighbor.nbiny = 1; if(neighbor.nbinz == 0) neighbor.nbinz = 1; integrate.ntimes = in.ntimes; integrate.dt = in.dt; neighbor.every = in.neigh_every; neighbor.cutneigh = in.neigh_cut; force.cutforce = in.force_cut; thermo.nstat = in.thermo_nstat; if(me == 0) printf("# Create System:\n"); if(in.datafile) { read_lammps_data(atom, comm, neighbor, integrate, thermo, in.datafile, in.units); MMD_float volume = atom.box.xprd * atom.box.yprd * atom.box.zprd; in.rho = 1.0 * atom.natoms / volume; force.setup(); } else { create_box(atom, in.nx, in.ny, in.nz, in.rho); comm.setup(neighbor.cutneigh, atom); neighbor.setup(atom); integrate.setup(); force.setup(); create_atoms(atom, in.nx, in.ny, in.nz, in.rho); thermo.setup(in.rho, integrate, atom, in.units); create_velocity(in.t_request, atom, thermo); } if(me == 0) printf("# Done .... \n"); if(me == 0) { fprintf(stdout, "# " VARIANT_STRING " output ...\n"); fprintf(stdout, "# Systemparameters: \n"); fprintf(stdout, "\t# MPI processes: %i\n", neighbor.threads->mpi_num_threads); fprintf(stdout, "\t# OpenMP threads: %i\n", neighbor.threads->omp_num_threads); fprintf(stdout, "\t# Inputfile: %s\n", input_file == 0 ? "in.lj.miniMD" : input_file); fprintf(stdout, "\t# Datafile: %s\n", in.datafile ? in.datafile : "None"); fprintf(stdout, "\t# ForceStyle: %s\n", in.forcetype == FORCELJ ? "LJ" : "EAM"); fprintf(stdout, "\t# Units: %s\n", in.units == 0 ? "LJ" : "METAL"); fprintf(stdout, "\t# Atoms: %i\n", atom.natoms); fprintf(stdout, "\t# System size: %2.2lf %2.2lf %2.2lf (unit cells: %i %i %i)\n", atom.box.xprd, atom.box.yprd, atom.box.zprd, in.nx, in.ny, in.nz); fprintf(stdout, "\t# Density: %lf\n", in.rho); fprintf(stdout, "\t# Force cutoff: %lf\n", force.cutforce); fprintf(stdout, "\t# Neigh cutoff: %lf\n", neighbor.cutneigh); fprintf(stdout, "\t# Half neighborlists: %i\n", neighbor.halfneigh); fprintf(stdout, "\t# Neighbor bins: %i %i %i\n", neighbor.nbinx, neighbor.nbiny, neighbor.nbinz); fprintf(stdout, "\t# Neighbor frequency: %i\n", neighbor.every); fprintf(stdout, "\t# Timestep size: %lf\n", integrate.dt); fprintf(stdout, "\t# Thermo frequency: %i\n", thermo.nstat); fprintf(stdout, "\t# Ghost Newton: %i\n", ghost_newton); fprintf(stdout, "\t# Use SSE intrinsics: %i\n", force.use_sse); fprintf(stdout, "\t# Do safe exchange: %i\n", comm.do_safeexchange); fprintf(stdout, "\t# Size of float: %i\n\n",sizeof(MMD_float)); } comm.exchange(atom); comm.borders(atom); atom.d_x->upload(); atom.d_v->upload(); //atom.d_vold->upload(); neighbor.build(atom); if (me == 0) printf("# Starting dynamics ...\n"); if (me == 0) printf("# Timestep T U P Time\n"); thermo.compute(0,atom,neighbor,force,timer,comm); force.compute(atom,neighbor,comm.me); timer.barrier_start(TIME_TOTAL); integrate.run(atom,force,neighbor,comm,thermo,timer); timer.barrier_stop(TIME_TOTAL); int natoms; MPI_Allreduce(&atom.nlocal,&natoms,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD); thermo.compute(-1,atom,neighbor,force,timer,comm); if(me == 0) { double time_other=timer.array[TIME_TOTAL]-timer.array[TIME_FORCE]-timer.array[TIME_NEIGH]-timer.array[TIME_COMM]; printf("\n\n"); printf("# Performance Summary:\n"); printf("# MPI_proc OMP_threads nsteps natoms t_total t_force t_neigh t_comm t_other performance perf/thread grep_string t_extra\n"); printf("%i %i %i %i %lf %lf %lf %lf %lf %lf %lf PERF_SUMMARY %lf\n\n\n", nprocs,num_threads,integrate.ntimes,natoms, timer.array[TIME_TOTAL],timer.array[TIME_FORCE],timer.array[TIME_NEIGH],timer.array[TIME_COMM],time_other, 1.0*natoms*integrate.ntimes/timer.array[TIME_TOTAL],1.0*natoms*integrate.ntimes/timer.array[TIME_TOTAL]/nprocs/num_threads,timer.array[TIME_TEST]); } if(yaml_output) output(in,atom,force,neighbor,comm,thermo,integrate,timer,screen_yaml); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); delete opencl; return 0; }
void setTargetingDWrapper(Arguments * input, Reply * output){ comm.setTargetingD(input, output); };
void printGainsWrapper(Arguments * input, Reply * output){ comm.printGains(); };
void setDesiredThetaPWrapper(Arguments * input, Reply * output){ comm.setDesiredThetaP(input, output); };
void setSwingUpDWrapper(Arguments * input, Reply * output){ comm.setSwingUpD(input, output); };
int main (int argc, char **argv) { CGMTimers *timers = new CGMTimers (); Comm *comm = Comm::getComm (&argc, &argv, timers); int tag = 0; int p = comm->getNumberOfProcessors (); //printf("Number of processors %d\n",p); int id = comm->getMyId (); //printf("ID%d\n",id); int sendTo = (id + 1) % p; int receiveFrom = (id - 1 + p) % p; int actualSource = -1; vector<int> values; if (argc > 1) { char *size=strtok(argv[1],","); while (size!=NULL) { values.push_back(atoi(size)); size=strtok(NULL,","); } } else { printf("Falt argumento com os tamanhos das matrices: ex. 1,3,4,5,6\n"); } // printf("size %d\n",values.size()); // int bloco=(values.size()-1)/p; //printf("Bloco %d\n",bloco); SimpleCommObject<int> sample(0); int matrix_size=values.size()-1; int **total_matrix=new int*[matrix_size]; for (int i=0;i<matrix_size;i++) total_matrix[i]=new int[matrix_size]; for (int row=0;row<matrix_size;row++) for (int col=0;col<matrix_size;col++) { if (row==col) total_matrix[row][row]=0; else total_matrix[row][col]=-1; } int *blocos=new int[p]; int q=(values.size()-1)/p; if (q<1) q=1; int r=(values.size()-1)%p; if (r<1) r=0; for (int i=0;i<p;i++) { if (i<r) blocos[i]=q+1; else blocos[i]=q; } //printf("ID: %d Bloco: %d\n",id,blocos[id]); int bloco_offset=0; for (int i=0;i<id;i++) bloco_offset+=blocos[i]; //int row_start=blocos[id]*id; int row_start=bloco_offset; int row_end=row_start+blocos[id]-1; //printf("ID %d, row_start %d row_end %d\n",id,row_start, row_end); for (int rodada=0;rodada<=p-id-1;rodada++) { //printf("ID %d RODADA %d\n",id,rodada); //int col_start=blocos[id]*(rodada+id); int col_start=bloco_offset+(blocos[id]*rodada); //int col_end=blocos[id]*(rodada+id+1)-1; int col_end=col_start+blocos[id]-1; if (col_start>values.size()-2) break; // printf("ID %d, col_start %d, col_end %d RODADA: %d\n",id,col_start, col_end,rodada); CommObjectList data_to_send(&sample); workOnSubMatrix(&total_matrix, &values, row_start,row_end,col_start,col_end, rodada, blocos[id], id); //printf("ID: %d SAIU RODADA: %d\n",id,rodada); int **submatrix=new int*[row_end-row_start+1]; for (int i=0;i<row_end-row_start+1;i++) submatrix[i]=new int[col_end-col_start+1]; /*if (id==1) { for (int row=0;row<matrix_size;row++) { for (int col=0;col<matrix_size;col++) { printf("R>%d %d,%d=%d ",rodada,row,col,total_matrix[row][col]); } printf("\n"); } }*/ convertMatrixToList(&total_matrix,row_start, col_end, col_start, col_end, &data_to_send,id,rodada); //printf("ID: %d, COPIOU SUBM RODADA: %d\n",id,rodada); CommObjectList data_to_receive(&sample); if (id!=0) { //printf("ID: %d IS SENDING RODADA: %d\n",id,rodada); comm -> send(id-1,data_to_send,rodada); // printf("MANDOU DE ID: %d a ID: %d TAG_ROUND:%d\n",id,id-1,rodada); } if (id!=p-1) { if (col_end>=values.size()-2) { // printf("ID: %d SAINDO\n",id); continue; }else { int num_receives=blocos[id]/blocos[id+1]; // printf("ID: %d ESPERA RECEBER %d DE ID: %d RODADA: %d\n",id,num_receives,id+1,rodada); for (int i=0;i<num_receives;i++) { comm -> receive(id+1,data_to_receive,rodada+i,&actualSource); copyFromSubMatrix(&total_matrix,&data_to_receive,id+1,id,rodada+i); // printf("ID: %d RECEBEU DE ID: %d TAG_ROUND: %d\n",id,id+1, rodada+i); } } } } // printf("ID %d terminou\n",id); if (id==0) { printf("Custo total da matriz %d\n",total_matrix[0][values.size()-2]); } comm -> dispose(); }
double PotEAMSpline::fast_compute(const Comm& comm, ErrorVec *error_vec) { // Untrap procs if necessary if (is_trapped_) { if (error_vec) { int flag = 3; comm.bcast(&flag, 1, MPI_INT, comm.get_root()); } else { int flag = 2; comm.bcast(&flag, 1, MPI_INT, comm.get_root()); } } // Initialize potential on all procs initialize_pot(comm, error_vec); // Initialize potential by resetting forces initialize_compute(comm); // Setup local variables for potential functions std::vector<T *> phi_fns; for (Basis*& fn : pot_fns_[0].fns) phi_fns.push_back(static_cast<T *>(fn)); std::vector<T *> rho_fns; for (Basis*& fn : pot_fns_[1].fns) rho_fns.push_back(static_cast<T *>(fn)); std::vector<Basis *> F_fns; for (Basis*& fn : pot_fns_[2].fns) // allow embedding fn to be any basis F_fns.push_back(fn); // Set up constraint error (error from density going out of bounds of embedding function) Vector<double> constraint_err(mmz->config->ncells,0.0); // Loop over all atoms in atomvec for (Atom*& atom_i_ptr : mmz->atomvec->atoms) { // Make temporary atom and cell AtomEAMSpline &atom_i = *(static_cast<AtomEAMSpline *>(atom_i_ptr)); Cell &cell = *mmz->config->cells[atom_i.cell_idx]; double rho_val = 0.0; // initialize density for this atom double dF = 0.0; // initialize gradient of embedding fn for this atom // Loop over pairs for this atom for (Pair*& pair_ij_ptr : atom_i.pairs) { PairEAMSpline &pair_ij = *(static_cast<PairEAMSpline *>(pair_ij_ptr)); // tmp pair // Check that neighbor length lies in pair potential radius if (pair_ij.phi_knot != -1) { AtomEAMSpline &atom_j = *(static_cast<AtomEAMSpline *>(pair_ij.neigh)); // tmp atom // Compute phi(r_ij) and its gradient in one step double phigrad; double phival = 0.5 * phi_fns[pair_ij.phi_idx]->T::splint_comb(pair_ij.phi_knot, pair_ij.phi_shift, &phigrad); phigrad *= 0.5; // only half of the gradient/energy contributes to the force/energy since we are double counting cell.energy += phival; // add in piece contributed by neighbor to energy Vect tmp_force = pair_ij.dist * phigrad; // compute tmp force values atom_i.force += tmp_force; // add in force on atom i from atom j atom_j.force -= tmp_force; // subtract off force on atom j from atom i (Newton's law: action = -reaction) // Compute stress on cell tmp_force *= pair_ij.r; cell.stress -= pair_ij.dist & tmp_force; } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR PAIR POTENTIAL // Check that neighbor length lies in rho potential (density function) radius if (pair_ij.rho_knot != -1) { // Compute density and its gradient in one step rho_val += rho_fns[pair_ij.rho_idx]->T::splint_comb(pair_ij.rho_knot, pair_ij.rho_shift, &pair_ij.drho); } else { pair_ij.drho = 0.0; } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR RHO POTENTIAL } // END LOOP OVER PAIRS // Compute energy, gradient for embedding function F // Punish this potential for having rho lie outside of F if ( rho_val < F_fns[atom_i.F_idx]->get_min_rcut() ) { double rho_i = F_fns[atom_i.F_idx]->get_min_rcut(); constraint_err[atom_i.cell_idx] += cell.weight * DUMMY_WEIGHT * 10. * (rho_i - rho_val) * (rho_i - rho_val); if (!embed_extrap_) rho_val = rho_i; // set the density to the inner cutoff if we don't extrapolate embedding fn later } else if ( rho_val > F_fns[atom_i.F_idx]->get_max_rcut() ) { double rho_f = F_fns[atom_i.F_idx]->get_max_rcut(); constraint_err[atom_i.cell_idx] += cell.weight * DUMMY_WEIGHT * 10. * (rho_val - rho_f) * (rho_val - rho_f); if (!embed_extrap_) rho_val = rho_f; // set the density to the outer cutoff if we don't extrapolate embedding fn later } // Add energy contribution from embedding function and get gradient in one step cell.energy += F_fns[atom_i.F_idx]->eval_comb(rho_val, &dF); // Loop over pairs for this atom to compute EAM force for (Pair*& pair_ij_ptr : atom_i.pairs) { PairEAMSpline &pair_ij = *(static_cast<PairEAMSpline *>(pair_ij_ptr)); // tmp pair AtomEAMSpline &atom_j = *(static_cast<AtomEAMSpline *>(pair_ij.neigh)); // tmp atom Vect tmp_force = pair_ij.dist * pair_ij.drho * dF; // compute tmp force values atom_i.force += tmp_force; // add in force on atom i from atom j atom_j.force -= tmp_force; // subtract off force on atom j from atom i (Newton's law: action = -reaction) // Compute stress on cell tmp_force *= pair_ij.r; cell.stress -= pair_ij.dist & tmp_force; } // END 2nd LOOP OVER PAIRS } // END 1st LOOP OVER ATOMS accumulate_error(comm, error_vec, constraint_err); // Punishment for U'(n_mean) != 0 for (int i=0; i<mmz->potlist->get_ntypes(); ++i) { double rho_i = F_fns[i]->get_min_rcut(); double rho_f = F_fns[i]->get_max_rcut(); double eam_error = DUMMY_WEIGHT * F_fns[i]->eval_grad(0.5 * (rho_i + rho_f)); error_sum_ += eam_error * eam_error; if (error_vec && comm.is_root()) error_vec->push_back(eam_error); } ++ncalls_; // keep track of the number of times this function is called return error_sum_; }
void printPositionWrapper(Arguments * input, Reply * output){ comm.printPosition(); };
void setSoftLimitsPWrapper(Arguments * input, Reply * output){ comm.setSoftLimitsP(input, output); };
void openGripper2Wrapper(Arguments * input, Reply * output){ comm.openGripper2(input, output); };
void setThetaWrapper(Arguments * input, Reply * output){ comm.setTheta(input, output); };
void Integrate::run(Atom &atom, Force* force, Neighbor &neighbor, Comm &comm, Thermo &thermo, Timer &timer) { int i, n; comm.timer = &timer; timer.array[TIME_TEST] = 0.0; int check_safeexchange = comm.check_safeexchange; mass = atom.mass; dtforce = dtforce / mass; //Use OpenMP threads only within the following loop containing the main loop. //Do not use OpenMP for setup and postprocessing. #pragma omp parallel private(i,n) { for(n = 0; n < ntimes; n++) { #pragma omp barrier x = &atom.x[0][0]; v = &atom.v[0][0]; f = &atom.f[0][0]; xold = &atom.xold[0][0]; nlocal = atom.nlocal; initialIntegrate(); #pragma omp barrier #pragma omp master timer.stamp(); if((n + 1) % neighbor.every) { #pragma omp barrier comm.communicate(atom); #pragma omp master timer.stamp(TIME_COMM); #pragma omp barrier } else { //these routines are not yet ported to OpenMP { if(check_safeexchange) { #pragma omp master { double d_max = 0; for(i = 0; i < atom.nlocal; i++) { double dx = (x[3 * i + 0] - xold[3 * i + 0]); if(dx > atom.box.xprd) dx -= atom.box.xprd; if(dx < -atom.box.xprd) dx += atom.box.xprd; double dy = (x[3 * i + 1] - xold[3 * i + 1]); if(dy > atom.box.yprd) dy -= atom.box.yprd; if(dy < -atom.box.yprd) dy += atom.box.yprd; double dz = (x[3 * i + 2] - xold[3 * i + 2]); if(dz > atom.box.zprd) dz -= atom.box.zprd; if(dz < -atom.box.zprd) dz += atom.box.zprd; double d = dx * dx + dy * dy + dz * dz; if(d > d_max) d_max = d; } d_max = sqrt(d_max); if((d_max > atom.box.xhi - atom.box.xlo) || (d_max > atom.box.yhi - atom.box.ylo) || (d_max > atom.box.zhi - atom.box.zlo)) printf("Warning: Atoms move further than your subdomain size, which will eventually cause lost atoms.\n" "Increase reneighboring frequency or choose a different processor grid\n" "Maximum move distance: %lf; Subdomain dimensions: %lf %lf %lf\n", d_max, atom.box.xhi - atom.box.xlo, atom.box.yhi - atom.box.ylo, atom.box.zhi - atom.box.zlo); } } //int tid = omp_get_thread_num(); //printf("Check B: %i %i %i\n",comm.me,tid,n); #pragma omp master timer.stamp_extra_start(); comm.exchange(atom); comm.borders(atom); #pragma omp master { timer.stamp_extra_stop(TIME_TEST); timer.stamp(TIME_COMM); } if(check_safeexchange) for(int i = 0; i < 3 * atom.nlocal; i++) atom.xold[i] = atom.x[i]; } #pragma omp barrier neighbor.build(atom); #pragma omp barrier #pragma omp master timer.stamp(TIME_NEIGH); } force->evflag = (n + 1) % thermo.nstat == 0; force->compute(atom, neighbor, comm, comm.me); #pragma omp master timer.stamp(TIME_FORCE); if(neighbor.halfneigh && neighbor.ghost_newton) { comm.reverse_communicate(atom); #pragma omp master timer.stamp(TIME_COMM); } v = &atom.v[0][0]; f = &atom.f[0][0]; nlocal = atom.nlocal; #pragma omp barrier finalIntegrate(); #pragma omp barrier if(thermo.nstat) thermo.compute(n + 1, atom, neighbor, force, timer, comm); } } //end OpenMP parallel }
void closeGripper2Wrapper(Arguments * input, Reply * output){ comm.closeGripper2(input, output); };