void Teuchos::updateParametersFromXmlFileAndBroadcast(
  const std::string &xmlFileName,
  const Ptr<ParameterList> &paramList,
  const Comm<int> &comm
  )
{
  if (comm.getSize()==1)
    updateParametersFromXmlFile(xmlFileName, paramList);
  else {
    if (comm.getRank()==0) {
      XMLParameterListReader xmlPLReader;
      xmlPLReader.setAllowsDuplicateSublists( false );
      FileInputSource xmlFile(xmlFileName);
      XMLObject xmlParams = xmlFile.getObject();
      std::string xmlString = toString(xmlParams);
      int strsize = xmlString.size();
      broadcast<int, int>(comm, 0, &strsize);
      broadcast<int, char>(comm, 0, strsize, &xmlString[0]);
      updateParametersFromXmlString(xmlString, paramList);
    }
    else {
      int strsize;
      broadcast<int, int>(comm, 0, &strsize);
      std::string xmlString;
      xmlString.resize(strsize);
      broadcast<int, char>(comm, 0, strsize, &xmlString[0]);
      updateParametersFromXmlString(xmlString, paramList);
    }
  }
}
Beispiel #2
0
void PotJMEAMSpline::fast_compute_densities(const Comm& comm, Vector<double> *density_ptr)
{
  // Untrap procs if necessary
  if (is_trapped_) {
    int flag = 4;
    comm.bcast(&flag, 1, MPI_INT, comm.get_root());
  }

  // Initialize potential on all procs
  initialize_pot(comm);

  // Setup local variables for potential function
  std::vector<T *> rho_fns;
  for (Basis*& fn : pot_fns_[1].fns)
    rho_fns.push_back(static_cast<T *>(fn));
  std::vector<T *> f_fns;
  for (Basis*& fn : pot_fns_[3].fns)
    f_fns.push_back(static_cast<T *>(fn));
  std::vector<T *> g_fns;
  for (Basis*& fn : pot_fns_[4].fns)
    g_fns.push_back(static_cast<T *>(fn));

  // Make list of densities for each atom
  int natoms = mmz->config->total_natoms;
  Vector<double> densities(natoms, 0.0);

  // Loop over all atoms in atomvec
  for (Atom*& atom_i_ptr : mmz->atomvec->atoms) {
    AtomJMEAMSpline &atom_i = *(static_cast<AtomJMEAMSpline *>(atom_i_ptr));  // tmp atom

    for (Pair*& pair_ij_ptr : atom_i.pairs) {
      PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(pair_ij_ptr));  // tmp pair

      if (pair_ij.rho_knot != -1)  // pair distance is inside density function
        densities[atom_i.global_idx] += rho_fns[pair_ij.rho_idx]->T::splint(pair_ij.rho_knot, pair_ij.rho_shift);

      if (pair_ij.f_knot != -1)  // radial distance inside f-potential
        pair_ij.f = f_fns[pair_ij.f_idx]->T::splint(pair_ij.f_knot, pair_ij.f_shift);
      else
        pair_ij.f = 0.0;
    } // END LOOP OVER PAIRS

    for (Triplet*& triplet_ijk_ptr : atom_i.triplets) {
      TripletJMEAMSpline &triplet_ijk = *(static_cast<TripletJMEAMSpline *>(triplet_ijk_ptr));  // tmp triplet
      PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ij));  // tmp pairs
      PairJMEAMSpline &pair_ik = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ik));

      double g_val = g_fns[triplet_ijk.g_idx]->T::splint(triplet_ijk.g_knot, triplet_ijk.g_shift);

      densities[atom_i.global_idx] += pair_ij.f * pair_ik.f * g_val;
    } // END LOOP OVER TRIPLETS
  } // END LOOP OVER ATOMS


  // Gather up densities from all procs
  std::vector<double> densities_final(natoms, 0.0);
  comm.reduce(&densities[0], &densities_final[0], natoms, MPI_DOUBLE, MPI_SUM, comm.get_root());

  if (comm.is_root()) density_ptr->swap(densities_final);
}
Beispiel #3
0
void s4u::Actor::send(Mailbox &chan, void *payload, size_t simulatedSize) {
  Comm c = Comm::send_init(this,chan);
  c.setRemains(simulatedSize);
  c.setSrcData(payload);
  // c.start() is optional.
  c.wait();
}
Beispiel #4
0
        void solve(Operator& opA, Vector& x, Vector& b, Comm& comm) const
        {
            Dune::InverseOperatorResult result;
            // Parallel version is deactivated until we figure out how to do it properly.
#if HAVE_MPI
            if (parallelInformation_.type() == typeid(ParallelISTLInformation))
            {
                const size_t size = opA.getmat().N();
                const ParallelISTLInformation& info =
                    boost::any_cast<const ParallelISTLInformation&>( parallelInformation_);

                // As we use a dune-istl with block size np the number of components
                // per parallel is only one.
                info.copyValuesTo(comm.indexSet(), comm.remoteIndices(),
                                  size, 1);
                // Construct operator, scalar product and vectors needed.
                constructPreconditionerAndSolve<Dune::SolverCategory::overlapping>(opA, x, b, comm, result);
            }
            else
#endif
            {
                OPM_THROW(std::logic_error,"this method if for parallel solve only");
            }

            checkConvergence( result );
        }
Beispiel #5
0
void *s4u::Actor::recv(Mailbox &chan) {
  void *res=NULL;

  Comm c = Comm::recv_init(this, chan);
  c.setDstData(&res,sizeof(res));
  c.wait();

    return res;
}
bool CommConfigurable::run(Messages & messages, Feedback & feedback) {
  // Run all the subclasses
  bool result = false;
  std::vector<Comm *>::iterator iter;
  for (iter = comm.begin(); iter != comm.end(); iter++) {
    Comm *subclass = (Comm *)(*iter);
    if (subclass->run(messages, feedback)) {
      result = true;
    }
  }

  return result;
}
void
mergeCounterNames (const Comm<int>& comm,
                   const Array<std::string>& localNames,
                   Array<std::string>& globalNames,
                   const ECounterSetOp setOp)
{
    const int myRank = comm.getRank();
    const int left = 0;
    const int right = comm.getSize() - 1;
    Array<std::string> theGlobalNames;
    mergeCounterNamesHelper (comm, myRank, left, right,
                             localNames, theGlobalNames, setOp);

    // Proc 0 has the list of counter names.  Now broadcast it back to
    // all the procs.
    broadcastStrings (comm, theGlobalNames);

    // "Transactional" semantics ensure strong exception safety for
    // output.
    globalNames.swap (theGlobalNames);
}
  MachineRepresentation(const Comm<int> &comm):
    networkDim(0), numProcs(comm.getSize()), myRank(comm.getRank()),
    procCoords(NULL)
  {
    // WIll need this constructor to be specific to RAAMP (MD).
    // Will need a default constructor using, e.g., GeometricGenerator
    // or nothing at all, for when RAAMP is not available as TPL.
    //
    // (AG) In addition, need to be able to run without special
    // privileges in system (e.g., on hopper).
    // Notes:  For now, all cores connected to same NIC will get the
    // same coordinates; later, we could add extra coordinate dimensions
    // to represent nodes or dies (using hwloc info through RAAMP
    // data object).

    // (MD) will modify mapping test to use machine representation
    // #ifdef HAVE_ZOLTAN2_OVIS

    // Call initializer for RAAMP data object (AG)

    //get network dimension.
    //TODO change.
    // Call RAAMP Data Object to get the network dimension (AG)
    networkDim = 3;

    //allocate memory for processor coordinates.
    procCoords = new nCoord_t *[networkDim];
    for (int i = 0; i < networkDim; ++i){
      procCoords[i] = new nCoord_t [numProcs];
      memset (procCoords[i], 0, sizeof(nCoord_t) * numProcs);
    }
    //obtain the coordinate of the processor.
    this->getMyCoordinate(/*nCoord_t &xyz[networkDim]*/);
    // copy xyz into appropriate spot in procCoords. (MD)  // KDD I agree with this

    //reduceAll the coordinates of each processor.
    this->gatherMachineCoordinates();
  }
Beispiel #9
0
int main() {
    controls.setPC(comm.getPC());
    controls.setup();
//    comm.printPosition();
    comm.printGains();

    controlsInterrupt.attach_us(&controls, &Controls::loop, 1000);

    while(1) {
        controls.updateIMUS();
        comm.check();
        
        
        if (serialCounter++>100) {
//            comm.printPosition();
//            comm.getPC()->printf("%f\n", controls.getTheta1());
//            comm.getPC()->printf("%f", controls.motor.getPWM());
            serialCounter = 0;
           // float z[4] = {1,2,0,0};
//            comm.getPC()->printf("%f\n",controls.target.getTheta2ForTarget(z));
        }
    }
}
Beispiel #10
0
int init_simulator()
{
	srand(time(NULL));
	for(int i=0; i<rand()&0xff; i++)
		rand32();

	InitializeCriticalSection(&cs);

	comm.add_callback(remote_OnEvent);
	CreateThread(NULL, NULL, update_state, NULL, NULL, NULL);
	CreateThread(NULL, NULL, update_controller, NULL, NULL, NULL);
	CreateThread(NULL, NULL, update_stick, NULL, NULL, NULL);

	return 0;
}
Beispiel #11
0
int PotGMEAM::rescale_3body(const Comm& comm, std::ostream *out, int flag)
{
  if (!flag) return 0; // Don't run rescaling if flag is 0

  int ntypes = mmz->potlist->get_ntypes();

  int f_idx = 0;
  for (Basis*& f_fn : pot_fns_[3].fns) {
    double max_f_mag = f_fn->get_max_y_mag();
    double b = 1.0/max_f_mag;

    // Scale f-pot
    *f_fn *= b;

    // Scale g-pot
    std::vector<Basis *> g_fns = pot_fns_[4].fns;
    for (int i=0; i<ntypes; ++i) {
      for (int j=0; j<ntypes; ++j) {
        for (int k=j; k<ntypes; ++k) {
          int ij_idx = pot_fns_[3].get_2body_alloy_idx(i, j);
          int ik_idx = pot_fns_[3].get_2body_alloy_idx(i, k);
          int ijk_idx = pot_fns_[4].get_3body_alloy_idx(i, j, k);

          if (f_idx == ij_idx)
            *g_fns[ijk_idx] /= b;
          if (f_idx == ik_idx)
            *g_fns[ijk_idx] /= b;
        }
      }
    }

    // Output scaling factor to screen
    if (comm.is_root() && out)
      *out << "MEAM potential scaling factor (b_" << f_idx << ") " << std::fixed << b << std::endl;

    ++f_idx;
  }

  return 1;
}
        void prepareSolver(Operator& wellOpA, Comm& comm)
        {

            Vector& istlb = *(this->rhs_);
            comm.copyOwnerToAll(istlb, istlb);

            const double relax = this->parameters_.ilu_relaxation_;
            const MILU_VARIANT ilu_milu  = this->parameters_.ilu_milu_;

            // TODO: revise choice of parameters
            // int coarsenTarget = 4000;
            int coarsenTarget = 1200;
            Criterion criterion(15, coarsenTarget);
            criterion.setDebugLevel( this->parameters_.cpr_solver_verbose_ ); // no debug information, 1 for printing hierarchy information
            criterion.setDefaultValuesIsotropic(2);
            criterion.setNoPostSmoothSteps( 1 );
            criterion.setNoPreSmoothSteps( 1 );
            //new guesses by hmbn
            //criterion.setAlpha(0.01); // criterion for connection strong 1/3 is default
            //criterion.setMaxLevel(2); //
            //criterion.setGamma(1); //  //1 V cycle 2 WW

            // Since DUNE 2.2 we also need to pass the smoother args instead of steps directly
            using AmgType           = typename std::conditional<std::is_same<Comm, Dune::Amg::SequentialInformation>::value,
                                                                BlackoilAmgType, ParallelBlackoilAmgType>::type;
            using SpType            = typename std::conditional<std::is_same<Comm, Dune::Amg::SequentialInformation>::value,
                                                                Dune::SeqScalarProduct<Vector>,
                                                                ParallelScalarProduct >::type;
            using OperatorType      = typename std::conditional<std::is_same<Comm, Dune::Amg::SequentialInformation>::value,
                                                                MatrixAdapter, ParallelMatrixAdapter>::type;
            typedef typename AmgType::Smoother Smoother;
            typedef typename Dune::Amg::SmootherTraits<Smoother>::Arguments  SmootherArgs;
            SmootherArgs  smootherArgs;
            smootherArgs.iterations = 1;
            smootherArgs.relaxationFactor = relax;
            const Opm::CPRParameter& params(this->parameters_); // strange conversion
            ISTLUtility::setILUParameters(smootherArgs, ilu_milu);

            auto& opARef = reinterpret_cast<OperatorType&>(*opA_);
            int newton_iteration = this->simulator_.model().newtonMethod().numIterations();
            bool update_preconditioner = false;

            if (this->parameters_.cpr_reuse_setup_ < 1) {
                update_preconditioner = true;
            }
            if (this->parameters_.cpr_reuse_setup_ < 2) {
                if (newton_iteration < 1) {
                    update_preconditioner = true;
                }
            }
            if (this->parameters_.cpr_reuse_setup_ < 3) {
                if (this->iterations() > 10) {
                    update_preconditioner = true;
                }
            }

            if ( update_preconditioner or (amg_== 0) ) {
                amg_.reset( new AmgType( params, this->weights_, opARef, criterion, smootherArgs, comm ) );
            } else {
                if (this->parameters_.cpr_solver_verbose_) {
                    std::cout << " Only update amg solver " << std::endl;
                }
                reinterpret_cast<AmgType*>(amg_.get())->updatePreconditioner(opARef, smootherArgs, comm);
            }
            // Solve.
            //SuperClass::solve(linearOperator, x, istlb, *sp, *amg, result);
            //references seems to do something els than refering

            int verbosity_linsolve = 0;
            if (comm.communicator().rank() == 0) {
                verbosity_linsolve = this->parameters_.linear_solver_verbosity_;
            }

            linsolve_.reset(new Dune::BiCGSTABSolver<Vector>(wellOpA, reinterpret_cast<SpType&>(*sp_), reinterpret_cast<AmgType&>(*amg_),
                                                             this->parameters_.linear_solver_reduction_,
                                                             this->parameters_.linear_solver_maxiter_,
                                                             verbosity_linsolve));
        }
Beispiel #13
0
int main(int argc, char** argv)
{
  In in;
  in.datafile = NULL;
  int me = 0;                   //local MPI rank
  int nprocs = 1;               //number of MPI ranks
  int num_threads = 1;		//number of OpenMP threads
  int num_steps = -1;           //number of timesteps (if -1 use value from lj.in)
  int system_size = -1;         //size of the system (if -1 use value from lj.in)
  int nx = -1;
  int ny = -1;
  int nz = -1;
  int check_safeexchange = 0;   //if 1 complain if atom moves further than 1 subdomain length between exchanges
  int do_safeexchange = 0;      //if 1 use safe exchange mode [allows exchange over multiple subdomains]
  int use_sse = 0;              //setting for SSE variant of miniMD only
  int screen_yaml = 0;          //print yaml output to screen also
  int yaml_output = 0;          //print yaml output
  int halfneigh = 1;            //1: use half neighborlist; 0: use full neighborlist; -1: use original miniMD version half neighborlist force
  int teams = 1;
  int device = 0;
  int neighbor_size = -1;
  char* input_file = NULL;
  int ghost_newton = 1;
  int sort = -1;

  for(int i = 0; i < argc; i++) {
    if((strcmp(argv[i], "-i") == 0) || (strcmp(argv[i], "--input_file") == 0)) {
      input_file = argv[++i];
      continue;
    }
  }

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  int error = 0;

  if(input_file == NULL)
    error = input(in, "in.lj.miniMD");
  else
    error = input(in, input_file);

  if(error) {
    MPI_Finalize();
    exit(0);
  }

  for(int i = 0; i < argc; i++) {
    if((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--num_threads") == 0)) {
      num_threads = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "--teams") == 0)) {
      teams = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-n") == 0) || (strcmp(argv[i], "--nsteps") == 0))  {
      num_steps = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--size") == 0)) {
      system_size = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-nx") == 0)) {
      nx = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-ny") == 0)) {
      ny = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-nz") == 0)) {
      nz = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-b") == 0) || (strcmp(argv[i], "--neigh_bins") == 0))  {
      neighbor_size = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "--half_neigh") == 0))  {
      halfneigh = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-sse") == 0))  {
      use_sse = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "--check_exchange") == 0))  {
      check_safeexchange = 1;
      continue;
    }

    if((strcmp(argv[i], "--sort") == 0))  {
      sort = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-o") == 0) || (strcmp(argv[i], "--yaml_output") == 0))  {
      yaml_output = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "--yaml_screen") == 0))  {
      screen_yaml = 1;
      continue;
    }

    if((strcmp(argv[i], "-f") == 0) || (strcmp(argv[i], "--data_file") == 0)) {
      if(in.datafile == NULL) in.datafile = new char[1000];

      strcpy(in.datafile, argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-u") == 0) || (strcmp(argv[i], "--units") == 0)) {
      in.units = strcmp(argv[++i], "metal") == 0 ? 1 : 0;
      continue;
    }

    if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--force") == 0)) {
      in.forcetype = strcmp(argv[++i], "eam") == 0 ? FORCEEAM : FORCELJ;
      continue;
    }

    if((strcmp(argv[i], "-gn") == 0) || (strcmp(argv[i], "--ghost_newton") == 0)) {
      ghost_newton = atoi(argv[++i]);
      continue;
    }

    if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
      printf("\n-----------------------------------------------------------------------------------------------------------\n");
      printf("-------------" VARIANT_STRING "--------------------\n");
      printf("-------------------------------------------------------------------------------------------------------------\n\n");

      printf("miniMD is a simple, parallel molecular dynamics (MD) code,\n"
             "which is part of the Mantevo project at Sandia National\n"
             "Laboratories ( http://www.mantevo.org ).\n"
             "The original authors of miniMD are Steve Plimpton ([email protected]) ,\n"
             "Paul Crozier ([email protected]) with current\n"
             "versions written by Christian Trott ([email protected]).\n\n");
      printf("Commandline Options:\n");
      printf("\n  Execution configuration:\n");
      printf("\t--teams <nteams>:             set number of thread-teams used per MPI rank (default 1)\n");
      printf("\t-t / --num_threads <threads>: set number of threads per thread-team (default 1)\n");
      printf("\t--half_neigh <int>:           use half neighborlists (default 1)\n"
             "\t                                0: full neighborlist\n"
             "\t                                1: half neighborlist\n"
             "\t                               -1: original miniMD half neighborlist force (not OpenMP safe)\n");
      printf("\t-d / --device <int>:          choose device to use (only applicable for GPU execution)\n");
      printf("\t-dm / --device_map:           map devices to MPI ranks\n");
      printf("\t-ng / --num_gpus <int>:       give number of GPUs per Node (used in conjuction with -dm\n"
             "\t                              to determine device id: 'id=mpi_rank%%ng' (default 2)\n");
      printf("\t--skip_gpu <int>:             skip the specified gpu when assigning devices to MPI ranks\n"
             "\t                              used in conjunction with -dm (but must come first in arg list)\n");
      printf("\t-sse <sse_version>:           use explicit sse intrinsics (use miniMD-SSE variant)\n");
      printf("\t-gn / --ghost_newton <int>:   set usage of newtons third law for ghost atoms\n"
             "\t                                (only applicable with half neighborlists)\n");
      printf("\n  Simulation setup:\n");
      printf("\t-i / --input_file <string>:   set input file to be used (default: in.lj.miniMD)\n");
      printf("\t-n / --nsteps <int>:          set number of timesteps for simulation\n");
      printf("\t-s / --size <int>:            set linear dimension of systembox\n");
      printf("\t-nx/-ny/-nz <int>:            set linear dimension of systembox in x/y/z direction\n");
      printf("\t-b / --neigh_bins <int>:      set linear dimension of neighbor bin grid\n");
      printf("\t-u / --units <string>:        set units (lj or metal), see LAMMPS documentation\n");
      printf("\t-p / --force <string>:        set interaction model (lj or eam)\n");
      printf("\t-f / --data_file <string>:    read configuration from LAMMPS data file\n");

      printf("\n  Miscelaneous:\n");
      printf("\t--check_exchange:             check whether atoms moved further than subdomain width\n");
      printf("\t--safe_exchange:              perform exchange communication with all MPI processes\n"
             "\t                                within rcut_neighbor (outer force cutoff)\n");
      printf("\t--sort <n>:                   resort atoms (simple bins) every <n> steps (default: use reneigh frequency; never=0)");
      printf("\t-o / --yaml_output <int>:     level of yaml output (default 1)\n");
      printf("\t--yaml_screen:                write yaml output also to screen\n");
      printf("\t-h / --help:                  display this help message\n\n");
      printf("---------------------------------------------------------\n\n");

      exit(0);
    }
  }


  Atom atom;
  Neighbor neighbor;
  Integrate integrate;
  Thermo thermo;
  Comm comm;
  Timer timer;
  ThreadData threads;

  Force* force;

  if(in.forcetype == FORCEEAM) {
    force = (Force*) new ForceEAM();

    if(ghost_newton == 1) {
      if(me == 0)
        printf("# EAM currently requires '--ghost_newton 0'; Changing setting now.\n");

      ghost_newton = 0;
    }
  }

  if(in.forcetype == FORCELJ) force = (Force*) new ForceLJ();

  threads.mpi_me = me;
  threads.mpi_num_threads = nprocs;
  threads.omp_me = 0;
  threads.omp_num_threads = num_threads;

  atom.threads = &threads;
  comm.threads = &threads;
  force->threads = &threads;
  integrate.threads = &threads;
  neighbor.threads = &threads;
  thermo.threads = &threads;

  force->epsilon = in.epsilon;
  force->sigma = in.sigma;
  force->sigma6 = in.sigma*in.sigma*in.sigma*in.sigma*in.sigma*in.sigma;

  neighbor.ghost_newton = ghost_newton;

  omp_set_num_threads(num_threads);

  neighbor.timer = &timer;
  force->timer = &timer;
  comm.check_safeexchange = check_safeexchange;
  comm.do_safeexchange = do_safeexchange;
  force->use_sse = use_sse;
  neighbor.halfneigh = halfneigh;

  if(halfneigh < 0) force->use_oldcompute = 1;

  if(use_sse) {
#ifdef VARIANT_REFERENCE

    if(me == 0) printf("ERROR: Trying to run with -sse with miniMD reference version. Use SSE variant instead. Exiting.\n");

    MPI_Finalize();
    exit(0);
#endif
  }

  if(num_steps > 0) in.ntimes = num_steps;

  if(system_size > 0) {
    in.nx = system_size;
    in.ny = system_size;
    in.nz = system_size;
  }

  if(nx > 0) {
    in.nx = nx;
    if(ny > 0)
      in.ny = ny;
    else if(system_size < 0)
      in.ny = nx;

    if(nz > 0)
      in.nz = nz;
    else if(system_size < 0)
      in.nz = nx;
  }

  if(neighbor_size > 0) {
    neighbor.nbinx = neighbor_size;
    neighbor.nbiny = neighbor_size;
    neighbor.nbinz = neighbor_size;
  }

  if(neighbor_size < 0 && in.datafile == NULL) {
    MMD_float neighscale = 5.0 / 6.0;
    neighbor.nbinx = neighscale * in.nx;
    neighbor.nbiny = neighscale * in.ny;
    neighbor.nbinz = neighscale * in.nz;
  }

  if(neighbor_size < 0 && in.datafile)
    neighbor.nbinx = -1;

  if(neighbor.nbinx == 0) neighbor.nbinx = 1;

  if(neighbor.nbiny == 0) neighbor.nbiny = 1;

  if(neighbor.nbinz == 0) neighbor.nbinz = 1;

  integrate.ntimes = in.ntimes;
  integrate.dt = in.dt;
  integrate.sort_every = sort>0?sort:(sort<0?in.neigh_every:0);
  neighbor.every = in.neigh_every;
  neighbor.cutneigh = in.neigh_cut;
  force->cutforce = in.force_cut;
  thermo.nstat = in.thermo_nstat;


  if(me == 0)
    printf("# Create System:\n");

  if(in.datafile) {
    read_lammps_data(atom, comm, neighbor, integrate, thermo, in.datafile, in.units);
    MMD_float volume = atom.box.xprd * atom.box.yprd * atom.box.zprd;
    in.rho = 1.0 * atom.natoms / volume;
    force->setup();

    if(in.forcetype == FORCEEAM) atom.mass = force->mass;
  } else {
    create_box(atom, in.nx, in.ny, in.nz, in.rho);

    comm.setup(neighbor.cutneigh, atom);

    neighbor.setup(atom);

    integrate.setup();

    force->setup();

    if(in.forcetype == FORCEEAM) atom.mass = force->mass;

    create_atoms(atom, in.nx, in.ny, in.nz, in.rho);
    thermo.setup(in.rho, integrate, atom, in.units);

    create_velocity(in.t_request, atom, thermo);

  }

  if(me == 0)
    printf("# Done .... \n");

  if(me == 0) {
    fprintf(stdout, "# " VARIANT_STRING " output ...\n");
    fprintf(stdout, "# Run Settings: \n");
    fprintf(stdout, "\t# MPI processes: %i\n", neighbor.threads->mpi_num_threads);
    fprintf(stdout, "\t# OpenMP threads: %i\n", neighbor.threads->omp_num_threads);
    fprintf(stdout, "\t# Inputfile: %s\n", input_file == 0 ? "in.lj.miniMD" : input_file);
    fprintf(stdout, "\t# Datafile: %s\n", in.datafile ? in.datafile : "None");
    fprintf(stdout, "# Physics Settings: \n");
    fprintf(stdout, "\t# ForceStyle: %s\n", in.forcetype == FORCELJ ? "LJ" : "EAM");
    fprintf(stdout, "\t# Force Parameters: %2.2lf %2.2lf\n",in.epsilon,in.sigma);
    fprintf(stdout, "\t# Units: %s\n", in.units == 0 ? "LJ" : "METAL");
    fprintf(stdout, "\t# Atoms: %i\n", atom.natoms);
    fprintf(stdout, "\t# System size: %2.2lf %2.2lf %2.2lf (unit cells: %i %i %i)\n", atom.box.xprd, atom.box.yprd, atom.box.zprd, in.nx, in.ny, in.nz);
    fprintf(stdout, "\t# Density: %lf\n", in.rho);
    fprintf(stdout, "\t# Force cutoff: %lf\n", force->cutforce);
    fprintf(stdout, "\t# Timestep size: %lf\n", integrate.dt);
    fprintf(stdout, "# Technical Settings: \n");
    fprintf(stdout, "\t# Neigh cutoff: %lf\n", neighbor.cutneigh);
    fprintf(stdout, "\t# Half neighborlists: %i\n", neighbor.halfneigh);
    fprintf(stdout, "\t# Neighbor bins: %i %i %i\n", neighbor.nbinx, neighbor.nbiny, neighbor.nbinz);
    fprintf(stdout, "\t# Neighbor frequency: %i\n", neighbor.every);
    fprintf(stdout, "\t# Sorting frequency: %i\n", integrate.sort_every);
    fprintf(stdout, "\t# Thermo frequency: %i\n", thermo.nstat);
    fprintf(stdout, "\t# Ghost Newton: %i\n", ghost_newton);
    fprintf(stdout, "\t# Use intrinsics: %i\n", force->use_sse);
    fprintf(stdout, "\t# Do safe exchange: %i\n", comm.do_safeexchange);
    fprintf(stdout, "\t# Size of float: %i\n\n", (int) sizeof(MMD_float));
  }

  comm.exchange(atom);
  if(sort>0)
    atom.sort(neighbor);
  comm.borders(atom);

  force->evflag = 1;
  #pragma omp parallel
  {
    neighbor.build(atom);
  
    force->compute(atom, neighbor, comm, me);
  }

  if(neighbor.halfneigh && neighbor.ghost_newton)
    comm.reverse_communicate(atom);

  if(me == 0) printf("# Starting dynamics ...\n");

  if(me == 0) printf("# Timestep T U P Time\n");

  #pragma omp parallel
  {
    thermo.compute(0, atom, neighbor, force, timer, comm);
  }

  timer.barrier_start(TIME_TOTAL);
  integrate.run(atom, force, neighbor, comm, thermo, timer);
  timer.barrier_stop(TIME_TOTAL);

  int natoms;
  MPI_Allreduce(&atom.nlocal, &natoms, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

  force->evflag = 1;
  force->compute(atom, neighbor, comm, me);

  if(neighbor.halfneigh && neighbor.ghost_newton)
    comm.reverse_communicate(atom);

  thermo.compute(-1, atom, neighbor, force, timer, comm);

  if(me == 0) {
    double time_other = timer.array[TIME_TOTAL] - timer.array[TIME_FORCE] - timer.array[TIME_NEIGH] - timer.array[TIME_COMM];
    printf("\n\n");
    printf("# Performance Summary:\n");
    printf("# MPI_proc OMP_threads nsteps natoms t_total t_force t_neigh t_comm t_other performance perf/thread grep_string t_extra\n");
    printf("%i %i %i %i %lf %lf %lf %lf %lf %lf %lf PERF_SUMMARY %lf\n\n\n",
           nprocs, num_threads, integrate.ntimes, natoms,
           timer.array[TIME_TOTAL], timer.array[TIME_FORCE], timer.array[TIME_NEIGH], timer.array[TIME_COMM], time_other,
           1.0 * natoms * integrate.ntimes / timer.array[TIME_TOTAL], 1.0 * natoms * integrate.ntimes / timer.array[TIME_TOTAL] / nprocs / num_threads, timer.array[TIME_TEST]);

  }

  if(yaml_output)
    output(in, atom, force, neighbor, comm, thermo, integrate, timer, screen_yaml);

  delete force;
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Finalize();
  return 0;
}
Beispiel #14
0
double PotJMEAMSpline::fast_compute(const Comm& comm, ErrorVec *error_vec)
{
  // Untrap procs if necessary
  if (is_trapped_) {
    if (error_vec) {
      int flag = 3;
      comm.bcast(&flag, 1, MPI_INT, comm.get_root());
    } else {
      int flag = 2;
      comm.bcast(&flag, 1, MPI_INT, comm.get_root());
    }
  }

  // Initialize potential on all procs
  initialize_pot(comm, error_vec);

  // Initialize potential by resetting forces
  initialize_compute(comm);

  // Setup local variables for potential functions
  std::vector<T *> phi_fns;
  for (Basis*& fn : pot_fns_[0].fns)
    phi_fns.push_back(static_cast<T *>(fn));
  std::vector<T *> rho_fns;
  for (Basis*& fn : pot_fns_[1].fns)
    rho_fns.push_back(static_cast<T *>(fn));
  std::vector<T *> F_fns;
  for (Basis*& fn : pot_fns_[2].fns)
    F_fns.push_back(static_cast<T *>(fn));
  std::vector<T *> f_fns;
  for (Basis*& fn : pot_fns_[3].fns)
    f_fns.push_back(static_cast<T *>(fn));
  std::vector<T *> g_fns;
  for (Basis*& fn : pot_fns_[4].fns)
    g_fns.push_back(static_cast<T *>(fn));
  std::vector<T *> p_fns;
  for (Basis*& fn : pot_fns_[5].fns)
    p_fns.push_back(static_cast<T *>(fn));
  std::vector<T *> q_fns;
  for (Basis*& fn : pot_fns_[6].fns)
    q_fns.push_back(static_cast<T *>(fn));

  // Set up constraint error (error from density going out of bounds of embedding function)
  Vector<double> constraint_err(mmz->config->ncells,0.0);

  // Loop over all atoms in atomvec
  for (Atom*& atom_i_ptr : mmz->atomvec->atoms) {
    // Make temporary atom and cell
    AtomJMEAMSpline &atom_i = *(static_cast<AtomJMEAMSpline *>(atom_i_ptr));
    Cell &cell = *mmz->config->cells[atom_i.cell_idx];

    double rho_val = 0.0; // initialize density for this atom
    double dF = 0.0;      // initialize gradient of embedding fn for this atom

    // Loop over pairs for this atom
    for (Pair*& pair_ij_ptr : atom_i.pairs) {
      PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(pair_ij_ptr));  // tmp pair

      // Check that neighbor length lies in pair potential radius
      if (pair_ij.phi_knot != -1) {
        AtomJMEAMSpline &atom_j = *(static_cast<AtomJMEAMSpline *>(pair_ij.neigh));  // tmp atom

        // Compute phi(r_ij) and its gradient in one step
        double phigrad;
        double phival = 0.5 * phi_fns[pair_ij.phi_idx]->T::splint_comb(pair_ij.phi_knot, pair_ij.phi_shift, &phigrad);

        phigrad *= 0.5; // only half of the gradient/energy contributes to the force/energy since we are double counting

        cell.energy += phival;  // add in piece contributed by neighbor to energy

        Vect tmp_force = pair_ij.dist * phigrad;  // compute tmp force values
        atom_i.force += tmp_force;  // add in force on atom i from atom j
        atom_j.force -= tmp_force;  // subtract off force on atom j from atom i (Newton's law: action = -reaction)

        // Compute stress on cell
        tmp_force *= pair_ij.r;
        cell.stress -= pair_ij.dist & tmp_force;
      } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR PAIR POTENTIAL

      // Check that neighbor length lies in rho potential (density function) radius
      if (pair_ij.rho_knot != -1) {
        // Compute density and its gradient in one step
        rho_val += rho_fns[pair_ij.rho_idx]->T::splint_comb(pair_ij.rho_knot, pair_ij.rho_shift, &pair_ij.drho);
      } else {
        pair_ij.drho = 0.0;
      } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR RHO POTENTIAL

      // Check that neighbor length lies in f- potential radius
      if (pair_ij.f_knot != -1) {
        pair_ij.f = f_fns[pair_ij.f_idx]->T::splint_comb(pair_ij.f_knot, pair_ij.f_shift, &pair_ij.df);
      } else {
        pair_ij.f = 0.0;
        pair_ij.df = 0.0;
      } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR f- POTENTIAL

      // Check that neighbor length lies in p- potential radius
      if (pair_ij.p_knot != -1) {
        pair_ij.p = p_fns[pair_ij.p_idx]->T::splint_comb(pair_ij.p_knot, pair_ij.p_shift, &pair_ij.dp);
      } else {
        pair_ij.p = 0.0;
        pair_ij.dp = 0.0;
      } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR p- POTENTIAL
    } // END LOOP OVER PAIRS

    // Loop over every angle formed by pairs called triplets
    for (Triplet*& triplet_ijk_ptr : atom_i.triplets) {
      TripletJMEAMSpline &triplet_ijk = *(static_cast<TripletJMEAMSpline *>(triplet_ijk_ptr));  // tmp triplet
      PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ij));  // tmp pairs
      PairJMEAMSpline &pair_ik = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ik));

      // The cos(theta) should always lie inside -1 ... 1
      // So store the g and g' without checking bounds
      triplet_ijk.g = g_fns[triplet_ijk.g_idx]->T::splint_comb(triplet_ijk.g_knot, triplet_ijk.g_shift, &triplet_ijk.dg);
      triplet_ijk.q = q_fns[triplet_ijk.q_idx]->T::splint_comb(triplet_ijk.q_knot, triplet_ijk.q_shift, &triplet_ijk.dq);

      // Sum up rho piece for atom i caused by j and k
      // f_ij * f_ik * g_ijk
      rho_val     += pair_ij.f * pair_ik.f * triplet_ijk.g;
      cell.energy += pair_ij.p * pair_ik.p * triplet_ijk.q;
    } // END LOOP OVER TRIPLETS

    // Compute energy, gradient for embedding function F
    // Punish this potential for having rho lie outside of F
    if ( rho_val < F_fns[atom_i.F_idx]->get_min_rcut() ) {
      double rho_i = F_fns[atom_i.F_idx]->get_min_rcut();
      constraint_err[atom_i.cell_idx] += cell.weight * DUMMY_WEIGHT * 10. * (rho_i - rho_val) * (rho_i - rho_val);
      if (!embed_extrap_)
        rho_val = rho_i;  // set the density to the inner cutoff if we don't extrapolate embedding fn later
    } else if ( rho_val > F_fns[atom_i.F_idx]->get_max_rcut() ) {
      double rho_f = F_fns[atom_i.F_idx]->get_max_rcut();
      constraint_err[atom_i.cell_idx] += cell.weight * DUMMY_WEIGHT * 10. * (rho_val - rho_f) * (rho_val - rho_f);
      if (!embed_extrap_)
        rho_val = rho_f;  // set the density to the outer cutoff if we don't extrapolate embedding fn later
    }

    // Add energy contribution from embedding function and get gradient in one step
    cell.energy += F_fns[atom_i.F_idx]->T::splint_comb(rho_val, &dF);

    // Loop over pairs for this atom to compute EAM force
    for (Pair*& pair_ij_ptr : atom_i.pairs) {
      PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(pair_ij_ptr));  // tmp pair
      AtomJMEAMSpline &atom_j = *(static_cast<AtomJMEAMSpline *>(pair_ij.neigh));  // tmp atom

      Vect tmp_force = pair_ij.dist * pair_ij.drho * dF;  // compute tmp force values
      atom_i.force += tmp_force;  // add in force on atom i from atom j
      atom_j.force -= tmp_force;  // subtract off force on atom j from atom i (Newton's law: action = -reaction)

      // Compute stress on cell
      tmp_force *= pair_ij.r;
      cell.stress -= pair_ij.dist & tmp_force;
    } // END 2nd LOOP OVER PAIRS

    // Loop over every angle formed by pairs called triplets
    for (Triplet*& triplet_ijk_ptr : atom_i.triplets) {
      TripletJMEAMSpline &triplet_ijk = *(static_cast<TripletJMEAMSpline *>(triplet_ijk_ptr));  // tmp triplet
      PairJMEAMSpline &pair_ij = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ij));  // tmp pairs
      PairJMEAMSpline &pair_ik = *(static_cast<PairJMEAMSpline *>(triplet_ijk.pair_ik));
      AtomJMEAMSpline &atom_j = *(static_cast<AtomJMEAMSpline *>(pair_ij.neigh));  // tmp atoms
      AtomJMEAMSpline &atom_k = *(static_cast<AtomJMEAMSpline *>(pair_ik.neigh));

      // Some tmp variables to clean up force fn below
      double dV3j = triplet_ijk.g  * pair_ij.df * pair_ik.f  * dF + triplet_ijk.q  * pair_ij.dp * pair_ik.p;
      double dV3k = triplet_ijk.g  * pair_ij.f  * pair_ik.df * dF + triplet_ijk.q  * pair_ij.p  * pair_ik.dp;
      double V3   = triplet_ijk.dg * pair_ij.f  * pair_ik.f  * dF + triplet_ijk.dq * pair_ij.p  * pair_ik.p;

      double vlj  = V3 * pair_ij.invr;
      double vlk  = V3 * pair_ik.invr;
      double vv3j = dV3j - vlj * triplet_ijk.cos;
      double vv3k = dV3k - vlk * triplet_ijk.cos;

      Vect dfj = pair_ij.dist * vv3j + pair_ik.dist * vlj;
      Vect dfk = pair_ik.dist * vv3k + pair_ij.dist * vlk;

      atom_i.force += dfj + dfk;  // force on atom i from j and k
      atom_j.force -= dfj;  // reaction force on atom j from i and k
      atom_k.force -= dfk;  // reaction force on atom k from i and j

      // Compute stress on cell
      dfj *= pair_ij.r;
      dfk *= pair_ik.r;
      cell.stress -= pair_ij.dist & dfj;
      cell.stress -= pair_ik.dist & dfk;
    } // END LOOP OVER TRIPLETS
  } // END 1st LOOP OVER ATOMS

  accumulate_error(comm, error_vec, constraint_err);

  // Punishment for f-pot y-max magnitude not being 1.0
  double max_f_mag = std::abs(pot_fns_[3].get_max_y_mag());
  double f_pot_error = DUMMY_WEIGHT * 25. * (1.0 - max_f_mag) * (1.0 - max_f_mag);
  error_sum_ += f_pot_error * f_pot_error;
  if (error_vec && comm.is_root()) error_vec->push_back(f_pot_error);

  // Punishment for p-pot y-max magnitude not being 1.0
  double max_p_mag = std::abs(pot_fns_[5].get_max_y_mag());
  double p_pot_error = DUMMY_WEIGHT * 25. * (1.0 - max_p_mag) * (1.0 - max_p_mag);
  error_sum_ += p_pot_error * p_pot_error;
  if (error_vec && comm.is_root()) error_vec->push_back(p_pot_error);

  ++ncalls_;  // keep track of the number of times this function is called

  return error_sum_;
}
Beispiel #15
0
int main(int argc, char **argv)
{
  //Common miniMD settings
  In in;
  in.datafile = NULL;
  int me=0;                     //local MPI rank
  int nprocs=1;                 //number of MPI ranks
  int num_threads=32;		    //number of Threads per Block threads
  int num_steps=-1;             //number of timesteps (if -1 use value from lj.in)
  int system_size=-1;           //size of the system (if -1 use value from lj.in)
  int check_safeexchange=0;     //if 1 complain if atom moves further than 1 subdomain length between exchanges
  int do_safeexchange=0;        //if 1 use safe exchange mode [allows exchange over multiple subdomains]
  int use_sse=0;                //setting for SSE variant of miniMD only
  int screen_yaml=0;            //print yaml output to screen also
  int yaml_output=0;            //print yaml output
  int halfneigh=0;              //1: use half neighborlist; 0: use full neighborlist; -1: use original miniMD version half neighborlist force
  char* input_file = NULL;
  int ghost_newton = 0;
  int skip_gpu = 999;
  int neighbor_size = -1;

  //OpenCL specific
  int platform = 0;
  int device = 0;
  int subdevice = -1;
  int ppn = 2;
  int use_tex = 0;
  int threads_per_atom = 1;
  int map_device=0;

  for(int i = 0; i < argc; i++) {
    if((strcmp(argv[i], "-i") == 0) || (strcmp(argv[i], "--input_file") == 0)) {
      input_file = argv[++i];
      continue;
    }
    if((strcmp(argv[i],"-p")==0)||(strcmp(argv[i],"--platform")==0)) {platform=atoi(argv[++i]); continue;}
    if((strcmp(argv[i],"-d")==0)||(strcmp(argv[i],"--device")==0)) {device=atoi(argv[++i]); continue;}
	if((strcmp(argv[i],"-sd")==0)||(strcmp(argv[i],"--subdevice")==0)) {subdevice=atoi(argv[++i]); continue;}
	if((strcmp(argv[i],"-sd_map")==0)||(strcmp(argv[i],"--subdevice_mapping")==0)) {subdevice=1-me%ppn; continue;}
	if((strcmp(argv[i],"-ng")==0)||(strcmp(argv[i],"--num_gpus")==0)) {ppn=atoi(argv[++i]); continue;}
	if((strcmp(argv[i],"-dm")==0)||(strcmp(argv[i],"--device_map")==0)) {map_device=1; continue;}
  }

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  if(map_device) {device = me%ppn; if(device>=skip_gpu) device++;}

  OpenCLWrapper* opencl = new OpenCLWrapper;
  if( me == 0)
  printf("# Platforms: %i\n",opencl->num_platforms);
  printf("# Proc: %i using device %i\n",me,device);
  opencl->Init(argc,argv,device,device+1,NULL,platform,subdevice);

  int error = 0;
  if(input_file == NULL)
    error = input(in, "in.lj.miniMD");
  else
	error = input(in, input_file);

  if (error)
  {
	  MPI_Finalize();
	  exit(0);
  }

  for(int i=0;i<argc;i++)
  {
     if((strcmp(argv[i],"-t")==0)||(strcmp(argv[i],"--num_threads")==0)) {num_threads=atoi(argv[++i]); continue;}
     if((strcmp(argv[i],"-n")==0)||(strcmp(argv[i],"--nsteps")==0))  {num_steps=atoi(argv[++i]); continue;}
     if((strcmp(argv[i],"-s")==0)||(strcmp(argv[i],"--size")==0))  {system_size=atoi(argv[++i]); continue;}
     if((strcmp(argv[i],"--half_neigh")==0))  {halfneigh=atoi(argv[++i]); continue;}
     if((strcmp(argv[i],"-sse")==0))  {use_sse=atoi(argv[++i]); continue;}
     if((strcmp(argv[i],"--check_exchange")==0))  {check_safeexchange=1; continue;}
     if((strcmp(argv[i],"-o")==0)||(strcmp(argv[i],"--yaml_output")==0))  {yaml_output=atoi(argv[++i]); continue;}
     if((strcmp(argv[i],"--yaml_screen")==0))  {screen_yaml=1; continue;}
     if((strcmp(argv[i], "-f") == 0) || (strcmp(argv[i], "--data_file") == 0)) {
       if(in.datafile == NULL) in.datafile = new char[1000];

       strcpy(in.datafile, argv[++i]);
       continue;
     }
     if((strcmp(argv[i], "-u") == 0) || (strcmp(argv[i], "--units") == 0)) {
       in.units = strcmp(argv[++i], "metal") == 0 ? 1 : 0;
       continue;
     }

     if((strcmp(argv[i], "-p") == 0) || (strcmp(argv[i], "--force") == 0)) {
       in.forcetype = strcmp(argv[++i], "eam") == 0 ? FORCEEAM : FORCELJ;
       continue;
     }
     if((strcmp(argv[i], "-gn") == 0) || (strcmp(argv[i], "--ghost_newton") == 0)) {
       ghost_newton = atoi(argv[++i]);
       continue;
     }
     if((strcmp(argv[i], "--skip_gpu") == 0)) {
       skip_gpu = atoi(argv[++i]);
       continue;
     }
     if((strcmp(argv[i], "-b") == 0) || (strcmp(argv[i], "--neigh_bins") == 0))  {
       neighbor_size = atoi(argv[++i]);
       continue;
     }
	 if((strcmp(argv[i],"-tex")==0)||(strcmp(argv[i],"--texture")==0)) {use_tex=atoi(argv[++i]); continue;}
     if((strcmp(argv[i],"-tpa")==0)) {threads_per_atom=atoi(argv[++i]); continue;}
     if((strcmp(argv[i],"-h")==0)||(strcmp(argv[i],"--help")==0))
     {
        printf("\n---------------------------------------------------------\n");
        printf("-------------" VARIANT_STRING "------------\n");
        printf("---------------------------------------------------------\n\n");

        printf("miniMD is a simple, parallel molecular dynamics (MD) code,\n"
               "which is part of the Mantevo project at Sandia National\n"
   	           "Laboratories ( http://www.mantevo.org ).\n"
	           "The original authors of MPI based miniMD are Steve Plimpton ([email protected]) ,\n"
               "Paul Crozier ([email protected]) with current versions \n"
               "written by Christian Trott ([email protected]).\n\n");
        printf("Commandline Options:\n");
        printf("\n  Execution configuration:\n");
        printf("\t-t / --num_threads <threads>: set number of threads per block (default 32)\n");
        printf("\t--half_neigh <int>:           use half neighborlists (default 0)\n"
               "\t                                0: full neighborlist\n"
               "\t                                1: half neighborlist (not supported in OpenCL variant)\n"
               "\t                               -1: original miniMD half neighborlist force \n"
               "\t                                   (not supported in OpenCL variant)\n");
        printf("\t-d / --device <int>:          select device (default 0)\n");
        printf("\t-dm / --device_map:           map devices to MPI ranks\n");
        printf("\t-ng / --num_gpus <int>:       give number of GPUs per Node (used in conjuction with -dm\n"
        	   "\t                              to determine device id: 'id=mpi_rank%%ng' (default 2)\n");
        printf("\t--skip_gpu <int>:             skip the specified gpu when assigning devices to MPI ranks\n"
        	   "\t                              used in conjunction with -dm (but must come first in arg list)\n");
        printf("\t-p / --platform <int>:        select platform (default 0)\n");
        printf("\t-sse <sse_version>:           use explicit sse intrinsics (use miniMD-SSE variant)\n");
        printf("\t-gn / --ghost_newton <int>:   set usage of newtons third law for ghost atoms\n"
               "\t                              (only applicable with half neighborlists)\n");
        printf("\n  Simulation setup:\n");
        printf("\t-i / --input_file <string>:   set input file to be used (default: in.lj.miniMD)\n");
        printf("\t-n / --nsteps <nsteps>:       set number of timesteps for simulation\n");
        printf("\t-s / --size <size>:           set linear dimension of systembox and neighbor bins\n");
        printf("\t-b / --neigh_bins <int>:      set linear dimension of neighbor bin grid\n");
        printf("\t-u / --units <string>:        set units (lj or metal), see LAMMPS documentation\n");
        printf("\t-p / --force <string>:        set interaction model (lj or eam)\n");
        printf("\t-f / --data_file <string>:    read configuration from LAMMPS data file\n");

        printf("\n  Miscelaneous:\n");
        printf("\t--check_exchange:             check whether atoms moved further than subdomain width\n");
        printf("\t--safe_exchange:              perform exchange communication with all MPI processes\n"
	           "\t                              within rcut_neighbor (outer force cutoff)\n");
        printf("\t--yaml_output <int>:          level of yaml output (default 0)\n");
        printf("\t--yaml_screen:                write yaml output also to screen\n");
        printf("\t-tex / --texture <int>:       use texture cache in force kernel (default 0)\n");
        printf("\t-h / --help:                  display this help message\n\n");
        printf("---------------------------------------------------------\n\n");

        exit(0);
     }
  }

  Atom atom;
  Force force;
  Neighbor neighbor;
  Integrate integrate;
  Thermo thermo;
  Comm comm;
  Timer timer;
  ThreadData threads;

  if(in.forcetype == FORCEEAM) {
	  printf("ERROR: " VARIANT_STRING " does not yet support EAM simulations. Exiting.\n");
	  MPI_Finalize();
	  exit(0);
  }
  if(ghost_newton!=0)
  {
    if(me ==0 ) printf("ERROR: -ghost_newton %i is not supported in " VARIANT_STRING ". Exiting.\n",ghost_newton);
    MPI_Finalize();
    exit(0);
  }
  if(halfneigh!=0)
  {
    if(me ==0 ) printf("ERROR: -half_neigh %i is not supported in " VARIANT_STRING ". Exiting.\n",halfneigh);
    MPI_Finalize();
    exit(0);
  }
  if(halfneigh!=0)
  {
    if(me ==0 ) printf("ERROR: -half_neigh %i is not supported in " VARIANT_STRING ". Exiting.\n",halfneigh);
    MPI_Finalize();
    exit(0);
  }
  if(use_tex!=0)
  {
    if(me ==0 ) printf("ERROR: -tex %i is currently broken. Exiting.\n",use_tex);
    MPI_Finalize();
    exit(0);
  }
  if(use_sse)
  {
    #ifndef VARIANT_SSE
    if(me ==0 ) printf("ERROR: Trying to run with -sse with miniMD reference version. Use SSE variant instead. Exiting.\n");
    MPI_Finalize();
    exit(0);
    #endif
  }

  threads.mpi_me=me;
  threads.mpi_num_threads=nprocs;
  threads.omp_me=0;
  threads.omp_num_threads=num_threads;
  


  atom.threads = &threads;
  comm.threads = &threads;
  force.threads = &threads;
  integrate.threads = &threads;
  neighbor.threads = &threads;
  thermo.threads = &threads;


  opencl->blockdim = num_threads;
  atom.threads_per_atom = threads_per_atom;
  atom.use_tex = use_tex;

  comm.do_safeexchange=do_safeexchange;
  force.use_sse=use_sse;
  neighbor.halfneigh=halfneigh;


  compile_kernels(opencl);

  integrate.opencl = opencl;
  force.opencl = opencl;
  neighbor.opencl = opencl;
  atom.opencl = opencl;
  comm.opencl = opencl;

  if(num_steps > 0) in.ntimes = num_steps;

  if(system_size > 0) {
    in.nx = system_size;
    in.ny = system_size;
    in.nz = system_size;
  }

  if(neighbor_size > 0) {
    neighbor.nbinx = neighbor_size;
    neighbor.nbiny = neighbor_size;
    neighbor.nbinz = neighbor_size;
  }

  if(neighbor_size < 0 && in.datafile == NULL) {
    MMD_float neighscale = 5.0 / 6.0;
    neighbor.nbinx = neighscale * in.nx;
    neighbor.nbiny = neighscale * in.ny;
    neighbor.nbinz = neighscale * in.ny;
  }

  if(neighbor_size < 0 && in.datafile)
    neighbor.nbinx = -1;

  if(neighbor.nbinx == 0) neighbor.nbinx = 1;

  if(neighbor.nbiny == 0) neighbor.nbiny = 1;

  if(neighbor.nbinz == 0) neighbor.nbinz = 1;

  integrate.ntimes = in.ntimes;
  integrate.dt = in.dt;
  neighbor.every = in.neigh_every;
  neighbor.cutneigh = in.neigh_cut;
  force.cutforce = in.force_cut;
  thermo.nstat = in.thermo_nstat;


  if(me == 0)
    printf("# Create System:\n");

  if(in.datafile) {
    read_lammps_data(atom, comm, neighbor, integrate, thermo, in.datafile, in.units);
    MMD_float volume = atom.box.xprd * atom.box.yprd * atom.box.zprd;
    in.rho = 1.0 * atom.natoms / volume;
    force.setup();

  } else {
    create_box(atom, in.nx, in.ny, in.nz, in.rho);

    comm.setup(neighbor.cutneigh, atom);

    neighbor.setup(atom);

    integrate.setup();

    force.setup();


    create_atoms(atom, in.nx, in.ny, in.nz, in.rho);
    thermo.setup(in.rho, integrate, atom, in.units);

    create_velocity(in.t_request, atom, thermo);

  }

  if(me == 0)
    printf("# Done .... \n");

  if(me == 0) {
    fprintf(stdout, "# " VARIANT_STRING " output ...\n");
    fprintf(stdout, "# Systemparameters: \n");
    fprintf(stdout, "\t# MPI processes: %i\n", neighbor.threads->mpi_num_threads);
    fprintf(stdout, "\t# OpenMP threads: %i\n", neighbor.threads->omp_num_threads);
    fprintf(stdout, "\t# Inputfile: %s\n", input_file == 0 ? "in.lj.miniMD" : input_file);
    fprintf(stdout, "\t# Datafile: %s\n", in.datafile ? in.datafile : "None");
    fprintf(stdout, "\t# ForceStyle: %s\n", in.forcetype == FORCELJ ? "LJ" : "EAM");
    fprintf(stdout, "\t# Units: %s\n", in.units == 0 ? "LJ" : "METAL");
    fprintf(stdout, "\t# Atoms: %i\n", atom.natoms);
    fprintf(stdout, "\t# System size: %2.2lf %2.2lf %2.2lf (unit cells: %i %i %i)\n", atom.box.xprd, atom.box.yprd, atom.box.zprd, in.nx, in.ny, in.nz);
    fprintf(stdout, "\t# Density: %lf\n", in.rho);
    fprintf(stdout, "\t# Force cutoff: %lf\n", force.cutforce);
    fprintf(stdout, "\t# Neigh cutoff: %lf\n", neighbor.cutneigh);
    fprintf(stdout, "\t# Half neighborlists: %i\n", neighbor.halfneigh);
    fprintf(stdout, "\t# Neighbor bins: %i %i %i\n", neighbor.nbinx, neighbor.nbiny, neighbor.nbinz);
    fprintf(stdout, "\t# Neighbor frequency: %i\n", neighbor.every);
    fprintf(stdout, "\t# Timestep size: %lf\n", integrate.dt);
    fprintf(stdout, "\t# Thermo frequency: %i\n", thermo.nstat);
    fprintf(stdout, "\t# Ghost Newton: %i\n", ghost_newton);
    fprintf(stdout, "\t# Use SSE intrinsics: %i\n", force.use_sse);
    fprintf(stdout, "\t# Do safe exchange: %i\n", comm.do_safeexchange);
    fprintf(stdout, "\t# Size of float: %i\n\n",sizeof(MMD_float));
  }

  comm.exchange(atom);
  comm.borders(atom);

  atom.d_x->upload();
  atom.d_v->upload();
  //atom.d_vold->upload();
  neighbor.build(atom);

  if (me == 0) printf("# Starting dynamics ...\n");
  if (me == 0) printf("# Timestep T U P Time\n");
  thermo.compute(0,atom,neighbor,force,timer,comm);
  force.compute(atom,neighbor,comm.me);
  timer.barrier_start(TIME_TOTAL);
  integrate.run(atom,force,neighbor,comm,thermo,timer);
  timer.barrier_stop(TIME_TOTAL);

  int natoms;
  MPI_Allreduce(&atom.nlocal,&natoms,1,MPI_INT,MPI_SUM,MPI_COMM_WORLD);
  thermo.compute(-1,atom,neighbor,force,timer,comm);

  if(me == 0) {
    double time_other=timer.array[TIME_TOTAL]-timer.array[TIME_FORCE]-timer.array[TIME_NEIGH]-timer.array[TIME_COMM];
    printf("\n\n");
    printf("# Performance Summary:\n");
    printf("# MPI_proc OMP_threads nsteps natoms t_total t_force t_neigh t_comm t_other performance perf/thread grep_string t_extra\n");
    printf("%i %i %i %i %lf %lf %lf %lf %lf %lf %lf PERF_SUMMARY %lf\n\n\n",
       nprocs,num_threads,integrate.ntimes,natoms,
       timer.array[TIME_TOTAL],timer.array[TIME_FORCE],timer.array[TIME_NEIGH],timer.array[TIME_COMM],time_other,
       1.0*natoms*integrate.ntimes/timer.array[TIME_TOTAL],1.0*natoms*integrate.ntimes/timer.array[TIME_TOTAL]/nprocs/num_threads,timer.array[TIME_TEST]);

  }

  if(yaml_output)
  output(in,atom,force,neighbor,comm,thermo,integrate,timer,screen_yaml);

  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Finalize();
  delete opencl;
  return 0;
}
Beispiel #16
0
void setTargetingDWrapper(Arguments * input, Reply * output){
    comm.setTargetingD(input, output);
};
Beispiel #17
0
void printGainsWrapper(Arguments * input, Reply * output){
    comm.printGains();
};
Beispiel #18
0
void setDesiredThetaPWrapper(Arguments * input, Reply * output){
    comm.setDesiredThetaP(input, output);
};
Beispiel #19
0
void setSwingUpDWrapper(Arguments * input, Reply * output){
    comm.setSwingUpD(input, output);
};
Beispiel #20
0
int main (int argc, char **argv)
{
  CGMTimers *timers = new CGMTimers ();
  Comm *comm = Comm::getComm (&argc, &argv, timers);
  int tag = 0;
  int p = comm->getNumberOfProcessors ();
  //printf("Number of processors %d\n",p);
  int id = comm->getMyId ();
  //printf("ID%d\n",id);
  int sendTo = (id + 1) % p;
  int receiveFrom = (id - 1 + p) % p;
  int actualSource = -1;
  vector<int> values;
  if (argc > 1)
  {
	char *size=strtok(argv[1],",");
	while (size!=NULL)
	{
		values.push_back(atoi(size));
		size=strtok(NULL,",");
	}
  }
  else
  {
	printf("Falt argumento com os tamanhos das matrices: ex. 1,3,4,5,6\n");
  }
//	printf("size %d\n",values.size());
//	int bloco=(values.size()-1)/p;
	//printf("Bloco %d\n",bloco);
		SimpleCommObject<int> sample(0);
	int matrix_size=values.size()-1;
	int **total_matrix=new int*[matrix_size];
	for (int i=0;i<matrix_size;i++)
		total_matrix[i]=new int[matrix_size];
	for (int row=0;row<matrix_size;row++)
		for (int col=0;col<matrix_size;col++)
			{
				if (row==col)
					total_matrix[row][row]=0;
				else
					total_matrix[row][col]=-1;
			}
	int *blocos=new int[p];
	int q=(values.size()-1)/p;
	if (q<1) q=1;
	int r=(values.size()-1)%p;
	if (r<1) r=0;
	for (int i=0;i<p;i++)
	{
		if (i<r)
			blocos[i]=q+1;
		else
			blocos[i]=q;
	}
	//printf("ID: %d Bloco: %d\n",id,blocos[id]);
	int bloco_offset=0;
	for (int i=0;i<id;i++)
		bloco_offset+=blocos[i];
	//int row_start=blocos[id]*id;
	int row_start=bloco_offset;
	int row_end=row_start+blocos[id]-1;
	//printf("ID %d, row_start %d row_end %d\n",id,row_start, row_end);
	for (int rodada=0;rodada<=p-id-1;rodada++)
	{
		//printf("ID %d RODADA %d\n",id,rodada);
		//int col_start=blocos[id]*(rodada+id);
		int col_start=bloco_offset+(blocos[id]*rodada);
		//int col_end=blocos[id]*(rodada+id+1)-1;
		int col_end=col_start+blocos[id]-1;
		if (col_start>values.size()-2)
			break;
	//	printf("ID %d, col_start %d, col_end %d RODADA: %d\n",id,col_start, col_end,rodada);
		CommObjectList data_to_send(&sample);

		workOnSubMatrix(&total_matrix, &values, row_start,row_end,col_start,col_end, rodada, blocos[id], id);
		//printf("ID: %d SAIU RODADA: %d\n",id,rodada);
		int **submatrix=new int*[row_end-row_start+1];
		for (int i=0;i<row_end-row_start+1;i++)
			submatrix[i]=new int[col_end-col_start+1];
/*if (id==1)
{		
	for (int row=0;row<matrix_size;row++)
	{
		for (int col=0;col<matrix_size;col++)
			{
				printf("R>%d %d,%d=%d ",rodada,row,col,total_matrix[row][col]);
			}
		printf("\n");
	}
}*/

		convertMatrixToList(&total_matrix,row_start, col_end, col_start, col_end, &data_to_send,id,rodada);
		//printf("ID: %d, COPIOU SUBM RODADA: %d\n",id,rodada);

		CommObjectList data_to_receive(&sample);


		if (id!=0)
		{
			//printf("ID: %d IS SENDING RODADA: %d\n",id,rodada);
			comm -> send(id-1,data_to_send,rodada);
//			printf("MANDOU DE ID: %d a ID: %d TAG_ROUND:%d\n",id,id-1,rodada);
		}
		if (id!=p-1)
		{
			if (col_end>=values.size()-2)
			{
//				printf("ID: %d SAINDO\n",id);
				continue;
			}else
			{
			
				int num_receives=blocos[id]/blocos[id+1];
//				printf("ID: %d ESPERA RECEBER %d DE ID: %d RODADA: %d\n",id,num_receives,id+1,rodada);

			for (int i=0;i<num_receives;i++)
			{
				comm -> receive(id+1,data_to_receive,rodada+i,&actualSource);
				copyFromSubMatrix(&total_matrix,&data_to_receive,id+1,id,rodada+i);

//				printf("ID: %d RECEBEU DE ID: %d TAG_ROUND: %d\n",id,id+1, rodada+i);	
			}
						}
		}
	}
//	printf("ID %d terminou\n",id);

	if (id==0)
	{
		printf("Custo total da matriz %d\n",total_matrix[0][values.size()-2]);
	}
	  comm -> dispose(); 
}
Beispiel #21
0
double PotEAMSpline::fast_compute(const Comm& comm, ErrorVec *error_vec)
{
  // Untrap procs if necessary
  if (is_trapped_) {
    if (error_vec) {
      int flag = 3;
      comm.bcast(&flag, 1, MPI_INT, comm.get_root());
    } else {
      int flag = 2;
      comm.bcast(&flag, 1, MPI_INT, comm.get_root());
    }
  }

  // Initialize potential on all procs
  initialize_pot(comm, error_vec);

  // Initialize potential by resetting forces
  initialize_compute(comm);

  // Setup local variables for potential functions
  std::vector<T *> phi_fns;
  for (Basis*& fn : pot_fns_[0].fns)
    phi_fns.push_back(static_cast<T *>(fn));
  std::vector<T *> rho_fns;
  for (Basis*& fn : pot_fns_[1].fns)
    rho_fns.push_back(static_cast<T *>(fn));
  std::vector<Basis *> F_fns;
  for (Basis*& fn : pot_fns_[2].fns) // allow embedding fn to be any basis
    F_fns.push_back(fn);

  // Set up constraint error (error from density going out of bounds of embedding function)
  Vector<double> constraint_err(mmz->config->ncells,0.0);

  // Loop over all atoms in atomvec
  for (Atom*& atom_i_ptr : mmz->atomvec->atoms) {
    // Make temporary atom and cell
    AtomEAMSpline &atom_i = *(static_cast<AtomEAMSpline *>(atom_i_ptr));
    Cell &cell = *mmz->config->cells[atom_i.cell_idx];

    double rho_val = 0.0; // initialize density for this atom
    double dF = 0.0;      // initialize gradient of embedding fn for this atom

    // Loop over pairs for this atom
    for (Pair*& pair_ij_ptr : atom_i.pairs) {
      PairEAMSpline &pair_ij = *(static_cast<PairEAMSpline *>(pair_ij_ptr));  // tmp pair

      // Check that neighbor length lies in pair potential radius
      if (pair_ij.phi_knot != -1) {
        AtomEAMSpline &atom_j = *(static_cast<AtomEAMSpline *>(pair_ij.neigh));  // tmp atom

        // Compute phi(r_ij) and its gradient in one step
        double phigrad;
        double phival = 0.5 * phi_fns[pair_ij.phi_idx]->T::splint_comb(pair_ij.phi_knot, pair_ij.phi_shift, &phigrad);

        phigrad *= 0.5; // only half of the gradient/energy contributes to the force/energy since we are double counting

        cell.energy += phival;  // add in piece contributed by neighbor to energy

        Vect tmp_force = pair_ij.dist * phigrad;  // compute tmp force values
        atom_i.force += tmp_force;  // add in force on atom i from atom j
        atom_j.force -= tmp_force;  // subtract off force on atom j from atom i (Newton's law: action = -reaction)

        // Compute stress on cell
        tmp_force *= pair_ij.r;
        cell.stress -= pair_ij.dist & tmp_force;
      } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR PAIR POTENTIAL

      // Check that neighbor length lies in rho potential (density function) radius
      if (pair_ij.rho_knot != -1) {
        // Compute density and its gradient in one step
        rho_val += rho_fns[pair_ij.rho_idx]->T::splint_comb(pair_ij.rho_knot, pair_ij.rho_shift, &pair_ij.drho);
      } else {
        pair_ij.drho = 0.0;
      } // END IF STMNT: PAIR LIES INSIDE CUTOFF FOR RHO POTENTIAL
    } // END LOOP OVER PAIRS

    // Compute energy, gradient for embedding function F
    // Punish this potential for having rho lie outside of F
    if ( rho_val < F_fns[atom_i.F_idx]->get_min_rcut() ) {
      double rho_i = F_fns[atom_i.F_idx]->get_min_rcut();
      constraint_err[atom_i.cell_idx] += cell.weight * DUMMY_WEIGHT * 10. * (rho_i - rho_val) * (rho_i - rho_val);
      if (!embed_extrap_)
        rho_val = rho_i;  // set the density to the inner cutoff if we don't extrapolate embedding fn later
    } else if ( rho_val > F_fns[atom_i.F_idx]->get_max_rcut() ) {
      double rho_f = F_fns[atom_i.F_idx]->get_max_rcut();
      constraint_err[atom_i.cell_idx] += cell.weight * DUMMY_WEIGHT * 10. * (rho_val - rho_f) * (rho_val - rho_f);
      if (!embed_extrap_)
        rho_val = rho_f;  // set the density to the outer cutoff if we don't extrapolate embedding fn later
    }

    // Add energy contribution from embedding function and get gradient in one step
    cell.energy += F_fns[atom_i.F_idx]->eval_comb(rho_val, &dF);

    // Loop over pairs for this atom to compute EAM force
    for (Pair*& pair_ij_ptr : atom_i.pairs) {
      PairEAMSpline &pair_ij = *(static_cast<PairEAMSpline *>(pair_ij_ptr));  // tmp pair
      AtomEAMSpline &atom_j = *(static_cast<AtomEAMSpline *>(pair_ij.neigh));  // tmp atom

      Vect tmp_force = pair_ij.dist * pair_ij.drho * dF;  // compute tmp force values
      atom_i.force += tmp_force;  // add in force on atom i from atom j
      atom_j.force -= tmp_force;  // subtract off force on atom j from atom i (Newton's law: action = -reaction)

      // Compute stress on cell
      tmp_force *= pair_ij.r;
      cell.stress -= pair_ij.dist & tmp_force;
    } // END 2nd LOOP OVER PAIRS
  } // END 1st LOOP OVER ATOMS

  accumulate_error(comm, error_vec, constraint_err);

  // Punishment for U'(n_mean) != 0
  for (int i=0; i<mmz->potlist->get_ntypes(); ++i) {
	  double rho_i = F_fns[i]->get_min_rcut();
	  double rho_f = F_fns[i]->get_max_rcut();
    double eam_error = DUMMY_WEIGHT * F_fns[i]->eval_grad(0.5 * (rho_i + rho_f));
    error_sum_ += eam_error * eam_error;
    if (error_vec && comm.is_root()) error_vec->push_back(eam_error);
  }

  ++ncalls_;  // keep track of the number of times this function is called

  return error_sum_;
}
Beispiel #22
0
void printPositionWrapper(Arguments * input, Reply * output){
    comm.printPosition();
};
Beispiel #23
0
void setSoftLimitsPWrapper(Arguments * input, Reply * output){
    comm.setSoftLimitsP(input, output);
};
Beispiel #24
0
void openGripper2Wrapper(Arguments * input, Reply * output){
    comm.openGripper2(input, output);
};
Beispiel #25
0
void setThetaWrapper(Arguments * input, Reply * output){
    comm.setTheta(input, output);
};
Beispiel #26
0
void Integrate::run(Atom &atom, Force* force, Neighbor &neighbor,
                    Comm &comm, Thermo &thermo, Timer &timer)
{
  int i, n;

  comm.timer = &timer;
  timer.array[TIME_TEST] = 0.0;

  int check_safeexchange = comm.check_safeexchange;

  mass = atom.mass;
  dtforce = dtforce / mass;
  //Use OpenMP threads only within the following loop containing the main loop.
  //Do not use OpenMP for setup and postprocessing.
  #pragma omp parallel private(i,n)
  {

    for(n = 0; n < ntimes; n++) {

      #pragma omp barrier

      x = &atom.x[0][0];
      v = &atom.v[0][0];
      f = &atom.f[0][0];
      xold = &atom.xold[0][0];
      nlocal = atom.nlocal;

      initialIntegrate();
      #pragma omp barrier

      #pragma omp master
      timer.stamp();

      if((n + 1) % neighbor.every) {

        #pragma omp barrier
        comm.communicate(atom);
        #pragma omp master
        timer.stamp(TIME_COMM);
        #pragma omp barrier

      } else {
        //these routines are not yet ported to OpenMP
        {
          if(check_safeexchange) {
            #pragma omp master
            {
              double d_max = 0;

              for(i = 0; i < atom.nlocal; i++) {
                double dx = (x[3 * i + 0] - xold[3 * i + 0]);

                if(dx > atom.box.xprd) dx -= atom.box.xprd;

                if(dx < -atom.box.xprd) dx += atom.box.xprd;

                double dy = (x[3 * i + 1] - xold[3 * i + 1]);

                if(dy > atom.box.yprd) dy -= atom.box.yprd;

                if(dy < -atom.box.yprd) dy += atom.box.yprd;

                double dz = (x[3 * i + 2] - xold[3 * i + 2]);

                if(dz > atom.box.zprd) dz -= atom.box.zprd;

                if(dz < -atom.box.zprd) dz += atom.box.zprd;

                double d = dx * dx + dy * dy + dz * dz;

                if(d > d_max) d_max = d;
              }

              d_max = sqrt(d_max);

              if((d_max > atom.box.xhi - atom.box.xlo) || (d_max > atom.box.yhi - atom.box.ylo) || (d_max > atom.box.zhi - atom.box.zlo))
                printf("Warning: Atoms move further than your subdomain size, which will eventually cause lost atoms.\n"
                "Increase reneighboring frequency or choose a different processor grid\n"
                "Maximum move distance: %lf; Subdomain dimensions: %lf %lf %lf\n",
                d_max, atom.box.xhi - atom.box.xlo, atom.box.yhi - atom.box.ylo, atom.box.zhi - atom.box.zlo);

            }

          }


          //int tid = omp_get_thread_num();
          //printf("Check B: %i %i %i\n",comm.me,tid,n);
          #pragma omp master
          timer.stamp_extra_start();
          comm.exchange(atom);
          comm.borders(atom);
          #pragma omp master
          {
            timer.stamp_extra_stop(TIME_TEST);
            timer.stamp(TIME_COMM);
          }

          if(check_safeexchange)
            for(int i = 0; i < 3 * atom.nlocal; i++) atom.xold[i] = atom.x[i];
        }

        #pragma omp barrier

        neighbor.build(atom);
        #pragma omp barrier

        #pragma omp master
        timer.stamp(TIME_NEIGH);
      }

      force->evflag = (n + 1) % thermo.nstat == 0;
      force->compute(atom, neighbor, comm, comm.me);


      #pragma omp master
      timer.stamp(TIME_FORCE);

      if(neighbor.halfneigh && neighbor.ghost_newton) {
        comm.reverse_communicate(atom);

        #pragma omp master
        timer.stamp(TIME_COMM);
      }

      v = &atom.v[0][0];
      f = &atom.f[0][0];
      nlocal = atom.nlocal;

      #pragma omp barrier

      finalIntegrate();

      #pragma omp barrier

      if(thermo.nstat) thermo.compute(n + 1, atom, neighbor, force, timer, comm);

    }
  } //end OpenMP parallel
}
Beispiel #27
0
void closeGripper2Wrapper(Arguments * input, Reply * output){
    comm.closeGripper2(input, output);
};