C++ (Cpp) ThrData示例，ThrData C++ (Cpp)示例

示例#1

0

显示文件

文件： pair_dpd_omp.cpp 项目： lammps/lammps

void PairDPDOMP::compute(int eflag, int vflag)
{
  ev_init(eflag,vflag);

  const int nall = atom->nlocal + atom->nghost;
  const int inum = list->inum;

  // number of threads has changed. reallocate pool of pRNGs
  if (nthreads != comm->nthreads) {
    if (random_thr) {
      for (int i=1; i < nthreads; ++i)
        delete random_thr[i];

      delete[] random_thr;
    }

    nthreads = comm->nthreads;
    random_thr = new RanMars*[nthreads];
    for (int i=1; i < nthreads; ++i)
      random_thr[i] = NULL;

    // to ensure full compatibility with the serial DPD style
    // we use the serial random number generator instance for thread 0
    random_thr[0] = random;
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    // generate a random number generator instance for
    // all threads != 0. make sure we use unique seeds.
    if ((tid > 0) && (random_thr[tid] == NULL))
      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me
                                    + comm->nprocs*tid);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#2

0

显示文件

文件： angle_dipole_omp.cpp 项目： lammps/lammps

void AngleDipoleOMP::compute(int eflag, int vflag)
{
  ev_init(eflag,vflag);

  if (!force->newton_bond)
    error->all(FLERR,"'newton' flag for bonded interactions must be 'on'");

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = neighbor->nanglelist;

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (inum > 0) {
      if (evflag)
        eval<1>(ifrom, ito, thr);
      else
        eval<0>(ifrom, ito, thr);
    }
    thr->timer(Timer::BOND);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region

}

示例#3

0

显示文件

文件： finish.cpp 项目： albapa/lammps

void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
                      const int nthreads,FILE *scr, FILE *log)
{
  const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n";
  double time_min, time_max, time_avg, time_total, time_std;

  time_min =  1.0e100;
  time_max = -1.0e100;
  time_total = time_avg = time_std = 0.0;

  for (int i=0; i < nthreads; ++i) {
    ThrData *thr = fix->get_thr(i);
    double tmp=thr->get_time(which);
    time_min = MIN(time_min,tmp);
    time_max = MAX(time_max,tmp);
    time_avg += tmp;
    time_std += tmp*tmp;
    time_total += thr->get_time(Timer::ALL);
  }

  time_avg /= nthreads;
  time_std /= nthreads;
  time_total /= nthreads;

  if (time_avg > 1.0e-10)
    time_std = sqrt(time_std/time_avg - time_avg)*100.0;
  else
    time_std = 0.0;

  if (scr) fprintf(scr,fmt,label,time_min,time_avg,time_max,time_std,
                   time_avg/time_total*100.0);
  if (log) fprintf(log,fmt,label,time_min,time_avg,time_max,time_std,
                   time_avg/time_total*100.0);
}

示例#4

0

显示文件

文件： pair_edip_omp.cpp 项目： jjjalkanen/lammps

void PairEDIPOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = vflag_atom = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (eflag) {
        if (vflag_atom) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (vflag_atom) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else eval<0,0,0>(ifrom, ito, thr);

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#5

0

显示文件

文件： pair_lj_cut_tip4p_long_soft_omp.cpp 项目： jjjalkanen/lammps

void PairLJCutTIP4PLongSoftOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) ev_setup(eflag,vflag);
  else evflag = vflag_fdotr = 0;

  const int nlocal = atom->nlocal;
  const int nall = nlocal + atom->nghost;

  // reallocate hneigh_thr & newsite_thr if necessary
  // initialize hneigh_thr[0] to -1 on steps when reneighboring occurred
  // initialize hneigh_thr[2] to 0 every step

  if (atom->nmax > nmax) {
    nmax = atom->nmax;
    memory->destroy(hneigh_thr);
    memory->create(hneigh_thr,nmax,"pair:hneigh_thr");
    memory->destroy(newsite_thr);
    memory->create(newsite_thr,nmax,"pair:newsite_thr");
  }

  int i;
  // tag entire list as completely invalid after a neighbor
  // list update, since that can change the order of atoms.
  if (neighbor->ago == 0)
    for (i = 0; i < nall; i++) hneigh_thr[i].a = -1;

  // indicate that the coordinates for the M point need to
  // be updated. this needs to be done in every step.
  for (i = 0; i < nall; i++) hneigh_thr[i].t = 0;

  const int nthreads = comm->nthreads;
  const int inum = list->inum;

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (eflag) {
        if (vflag) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (vflag) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else eval<0,0,0>(ifrom, ito, thr);

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#6

0

显示文件

文件： pair_hbond_dreiding_morse_omp.cpp 项目： jjjalkanen/lammps

void PairHbondDreidingMorseOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

  if (!hbcount_thr) {
    hbcount_thr = new double[nthreads];
    hbeng_thr = new double[nthreads];
  }

  for (int i=0; i < nthreads; ++i) {
    hbcount_thr[i] = 0.0;
    hbeng_thr[i] = 0.0;
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region

  // reduce per thread hbond data
  if (eflag_global) {
    pvector[0] = 0.0;
    pvector[1] = 0.0;
    for (int i=0; i < nthreads; ++i) {
      pvector[0] += hbcount_thr[i];
      pvector[1] += hbeng_thr[i];
    }
  }
}

示例#7

0

显示文件

文件： pair_adp_omp.cpp 项目： Polyun/LIGGGHTS-PUBLIC

void PairADPOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;

  const int nlocal = atom->nlocal;
  const int nall = nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

  // grow energy and fp arrays if necessary
  // need to be atom->nmax in length

  if (atom->nmax > nmax) {
    memory->destroy(rho);
    memory->destroy(fp);
    memory->destroy(mu);
    memory->destroy(lambda);
    nmax = atom->nmax;
    memory->create(rho,nthreads*nmax,"pair:rho");
    memory->create(fp,nmax,"pair:fp");
    memory->create(mu,nthreads*nmax,3,"pair:mu");
    memory->create(lambda,nthreads*nmax,6,"pair:lambda");
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
    
    if (force->newton_pair)
      thr->init_adp(nall, rho, mu, lambda);
    else
      thr->init_adp(nlocal, rho, mu, lambda);

    if (evflag) {
      if (eflag) {
	if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
	else eval<1,1,0>(ifrom, ito, thr);
      } else {
	if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
	else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#8

0

显示文件

文件： pair_dpd_tstat_omp.cpp 项目： milfeld/lammps

void PairDPDTstatOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

  if (!random_thr)
    random_thr = new RanMars*[nthreads];

  // to ensure full compatibility with the serial DPD style
  // we use is random number generator instance for thread 0
  random_thr[0] = random;

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    // generate a random number generator instance for
    // all threads != 0. make sure we use unique seeds.
    if (random_thr && tid > 0)
      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me
                                    + comm->nprocs*tid);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#9

0

显示文件

文件： pppm_omp.cpp 项目： dboemer/lammps

PPPMOMP::~PPPMOMP()
{
#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
  {
#if defined(_OPENMP)
    const int tid = omp_get_thread_num();
#else
    const int tid = 0;
#endif
    ThrData *thr = fix->get_thr(tid);
    thr->init_pppm(-order,memory);
  }
}

示例#10

0

显示文件

文件： pair_peri_lps_omp.cpp 项目： jjjalkanen/lammps

void PairPeriLPSOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

  // grow bond forces array if necessary

  if (atom->nmax > nmax) {
    memory->destroy(s0_new);
    memory->destroy(theta);
    nmax = atom->nmax;
    memory->create(s0_new,nmax,"pair:s0_new");
    memory->create(theta,nmax,"pair:theta");
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#11

0

显示文件

文件： pair_meam_spline_omp.cpp 项目： FranciscoHG/lammps

void PairMEAMSplineOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = listfull->inum;

  if (listhalf->inum != inum)
    error->warning(FLERR,"inconsistent half and full neighborlist");

  // Grow per-atom array if necessary.

  if (atom->nmax > nmax) {
    memory->destroy(Uprime_values);
    nmax = atom->nmax;
    memory->create(Uprime_values,nmax*nthreads,"pair:Uprime");
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    thr->init_eam(nall,Uprime_values);

    if (evflag) {
      if (eflag) {
        eval<1,1>(ifrom, ito, thr);
      } else {
        eval<1,0>(ifrom, ito, thr);
      }
    } else {
      eval<0,0>(ifrom, ito, thr);
    }

    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#12

0

显示文件

文件： pppm_cg_omp.cpp 项目： Clockwork-Sphinx/lammps

void PPPMCGOMP::compute(int eflag, int vflag)
{

  PPPMCG::compute(eflag,vflag);

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
#if defined(_OPENMP)
    const int tid = omp_get_thread_num();
#else
    const int tid = 0;
#endif
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#13

0

显示文件

文件： pair_lj_cut_coul_msm_omp.cpp 项目： Clockwork-Sphinx/lammps

void PairLJCutCoulMSMOMP::compute(int eflag, int vflag)
{
  if (force->kspace->scalar_pressure_flag)
    error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' "
      "with OMP MSM Pair styles");

  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#14

0

显示文件

文件： pair_gauss_omp.cpp 项目： jjjalkanen/lammps

void PairGaussOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;
  double occ = 0.0;

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag) reduction(+:occ)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) occ = eval<1,1,1>(ifrom, ito, thr);
        else occ = eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) occ = eval<1,0,1>(ifrom, ito, thr);
        else occ = eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) occ = eval<0,0,1>(ifrom, ito, thr);
      else occ = eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region

  if (eflag_global) pvector[0] = occ;
}

示例#15

0

显示文件

文件： dihedral_harmonic_omp.cpp 项目： jjjalkanen/lammps

void DihedralHarmonicOMP::compute(int eflag, int vflag)
{

  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = neighbor->ndihedrallist;

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (inum > 0) {
      if (evflag) {
        if (eflag) {
          if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
          else eval<1,1,0>(ifrom, ito, thr);
        } else {
          if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
          else eval<1,0,0>(ifrom, ito, thr);
        }
      } else {
        if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
        else eval<0,0,0>(ifrom, ito, thr);
      }
    }
    thr->timer(Timer::BOND);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#16

0

显示文件

文件： finish.cpp 项目： albapa/lammps

void Finish::end(int flag)
{
  int i,m,nneigh,nneighfull;
  int histo[10];
  int minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag;
  double time,tmp,ave,max,min;
  double time_loop,time_other,cpu_loop;

  int me,nprocs;
  MPI_Comm_rank(world,&me);
  MPI_Comm_size(world,&nprocs);

  const int nthreads = comm->nthreads;

  // recompute natoms in case atoms have been lost

  bigint nblocal = atom->nlocal;
  MPI_Allreduce(&nblocal,&atom->natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);

  // choose flavors of statistical output
  // flag determines caller
  // flag = 0 = just loop summary
  // flag = 1 = dynamics or minimization
  // flag = 2 = PRD
  // flag = 3 = TAD
  // turn off neighflag for Kspace partition of verlet/split integrator

  minflag = prdflag = tadflag = timeflag = fftflag = histoflag = neighflag = 0;
  time_loop = cpu_loop = time_other = 0.0;

  if (flag == 1) {
    if (update->whichflag == 2) minflag = 1;
    timeflag = histoflag = 1;
    neighflag = 1;
    if (update->whichflag == 1 &&
        strncmp(update->integrate_style,"verlet/split",12) == 0 &&
        universe->iworld == 1) neighflag = 0;
    if (force->kspace && force->kspace_match("pppm",0)
        && force->kspace->fftbench) fftflag = 1;
  }
  if (flag == 2) prdflag = timeflag = histoflag = neighflag = 1;
  if (flag == 3) tadflag = histoflag = neighflag = 1;

  // loop stats

  if (timer->has_loop()) {

    // overall loop time

    time_loop = timer->get_wall(Timer::TOTAL);
    cpu_loop = timer->get_cpu(Timer::TOTAL);
    MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time_loop = tmp/nprocs;
    MPI_Allreduce(&cpu_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    cpu_loop = tmp/nprocs;
    if (time_loop > 0.0) cpu_loop = cpu_loop/time_loop*100.0;

    if (me == 0) {
      int ntasks = nprocs * nthreads;
      const char fmt1[] = "Loop time of %g on %d procs "
        "for %d steps with " BIGINT_FORMAT " atoms\n\n";
      if (screen) fprintf(screen,fmt1,time_loop,ntasks,update->nsteps,
                          atom->natoms);
      if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps,
                           atom->natoms);

      // Gromacs/NAMD-style performance metric for suitable unit settings

      if ( timeflag && !minflag && !prdflag && !tadflag &&
           (update->nsteps > 0) && (update->dt != 0.0) &&
           ((strcmp(update->unit_style,"lj") == 0) ||
            (strcmp(update->unit_style,"metal") == 0) ||
            (strcmp(update->unit_style,"micro") == 0) ||
            (strcmp(update->unit_style,"nano") == 0) ||
            (strcmp(update->unit_style,"electron") == 0) ||
            (strcmp(update->unit_style,"real") == 0)) ) {
        double one_fs = force->femtosecond;
        double t_step = ((double) time_loop) / ((double) update->nsteps);
        double step_t = 1.0/t_step;

        if (strcmp(update->unit_style,"lj") == 0) {
          double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs;
          const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n";
          if (screen) fprintf(screen,perf,tau_day,step_t);
          if (logfile) fprintf(logfile,perf,tau_day,step_t);
        } else {
          double hrs_ns = t_step / update->dt * 1000000.0 * one_fs / 3600.0;
          double ns_day = 24.0*3600.0 / t_step * update->dt / one_fs/1000000.0;
          const char perf[] =
            "Performance: %.3f ns/day, %.3f hours/ns, %.3f timesteps/s\n";
          if (screen) fprintf(screen,perf,ns_day,hrs_ns,step_t);
          if (logfile) fprintf(logfile,perf,ns_day,hrs_ns,step_t);
        }
      }

      // CPU use on MPI tasks and OpenMP threads

#ifdef LMP_USER_OMP
      const char fmt2[] =
        "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n";
      if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,nthreads);
      if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,nthreads);
#else
      if (lmp->kokkos) {
        const char fmt2[] =
          "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n";
        if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,lmp->kokkos->num_threads);
        if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,lmp->kokkos->num_threads);
      } else {
        const char fmt2[] =
          "%.1f%% CPU use with %d MPI tasks x no OpenMP threads\n";
        if (screen) fprintf(screen,fmt2,cpu_loop,nprocs);
        if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs);
      }
#endif

    }
  }

  // avoid division by zero for very short runs

  if (time_loop == 0.0) time_loop = 1.0;
  if (cpu_loop == 0.0) cpu_loop = 100.0;

  // get "Other" wall time for later use

  if (timer->has_normal())
    time_other = timer->get_wall(Timer::TOTAL) - timer->get_wall(Timer::ALL);

  // minimization stats

  if (minflag) {
    if (me == 0) {
      if (screen) fprintf(screen,"\n");
      if (logfile) fprintf(logfile,"\n");
    }

    if (me == 0) {
      if (screen) {
        fprintf(screen,"Minimization stats:\n");
        fprintf(screen,"  Stopping criterion = %s\n",
                update->minimize->stopstr);
        fprintf(screen,"  Energy initial, next-to-last, final = \n"
                "    %18.12g %18.12g %18.12g\n",
                update->minimize->einitial,update->minimize->eprevious,
                update->minimize->efinal);
        fprintf(screen,"  Force two-norm initial, final = %g %g\n",
                update->minimize->fnorm2_init,update->minimize->fnorm2_final);
        fprintf(screen,"  Force max component initial, final = %g %g\n",
                update->minimize->fnorminf_init,
                update->minimize->fnorminf_final);
        fprintf(screen,"  Final line search alpha, max atom move = %g %g\n",
                update->minimize->alpha_final,
                update->minimize->alpha_final*
                update->minimize->fnorminf_final);
        fprintf(screen,"  Iterations, force evaluations = %d %d\n",
                update->minimize->niter,update->minimize->neval);
      }
      if (logfile) {
        fprintf(logfile,"Minimization stats:\n");
        fprintf(logfile,"  Stopping criterion = %s\n",
                update->minimize->stopstr);
        fprintf(logfile,"  Energy initial, next-to-last, final = \n"
                "    %18.12g %18.12g %18.12g\n",
                update->minimize->einitial,update->minimize->eprevious,
                update->minimize->efinal);
        fprintf(logfile,"  Force two-norm initial, final = %g %g\n",
                update->minimize->fnorm2_init,update->minimize->fnorm2_final);
        fprintf(logfile,"  Force max component initial, final = %g %g\n",
                update->minimize->fnorminf_init,
                update->minimize->fnorminf_final);
        fprintf(logfile,"  Final line search alpha, max atom move = %g %g\n",
                update->minimize->alpha_final,
                update->minimize->alpha_final*
                update->minimize->fnorminf_final);
        fprintf(logfile,"  Iterations, force evaluations = %d %d\n",
                update->minimize->niter,update->minimize->neval);
      }
    }
  }

  // PRD stats using PAIR,BOND,KSPACE for dephase,dynamics,quench

  if (prdflag) {
    if (me == 0) {
      if (screen) fprintf(screen,"\n");
      if (logfile) fprintf(logfile,"\n");
    }

    if (screen) fprintf(screen,"PRD stats:\n");
    if (logfile) fprintf(logfile,"PRD stats:\n");

    time = timer->get_wall(Timer::DEPHASE);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Dephase  time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Dephase  time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }

    time = timer->get_wall(Timer::DYNAMICS);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Dynamics time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Dynamics time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }

    time = timer->get_wall(Timer::QUENCH);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Quench   time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Quench   time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }

      time = timer->get_wall(Timer::REPCOMM);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Comm     time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Comm     time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }


    time = timer->get_wall(Timer::REPOUT);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Output   time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Output   time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }

    time = time_other;
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) { // XXXX: replica comm, replica output
      if (screen)
        fprintf(screen,"  Other    time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Other    time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }
  }

  // TAD stats using PAIR,BOND,KSPACE for neb,dynamics,quench

  if (tadflag) {
    if (me == 0) {
      if (screen) fprintf(screen,"\n");
      if (logfile) fprintf(logfile,"\n");
    }

    if (screen) fprintf(screen,"TAD stats:\n");
    if (logfile) fprintf(logfile,"TAD stats:\n");

    time = timer->get_wall(Timer::NEB);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  NEB      time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  NEB      time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }

    time = timer->get_wall(Timer::DYNAMICS);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Dynamics time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Dynamics time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }

    time = timer->get_wall(Timer::QUENCH);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Quench   time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Quench   time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }


    time = timer->get_wall(Timer::REPCOMM);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Comm     time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Comm     time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }


    time = timer->get_wall(Timer::REPOUT);
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Output   time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Output   time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }

    time = time_other;
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;
    if (me == 0) {
      if (screen)
        fprintf(screen,"  Other    time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
      if (logfile)
        fprintf(logfile,"  Other    time (%%) = %g (%g)\n",
                time,time/time_loop*100.0);
    }
  }

  if (timeflag && timer->has_normal()) {

    if (timer->has_full()) {
      const char hdr[] = "\nMPI task timing breakdown:\n"
        "Section |  min time  |  avg time  |  max time  |%varavg|  %CPU | %total\n"
        "-----------------------------------------------------------------------\n";
      if (me == 0) {
        if (screen)  fputs(hdr,screen);
        if (logfile) fputs(hdr,logfile);
      }
    } else {
      const char hdr[] = "\nMPI task timing breakdown:\n"
        "Section |  min time  |  avg time  |  max time  |%varavg| %total\n"
        "---------------------------------------------------------------\n";
      if (me == 0) {
        if (screen)  fputs(hdr,screen);
        if (logfile) fputs(hdr,logfile);
      }
    }

    mpi_timings("Pair",timer,Timer::PAIR, world,nprocs,
                nthreads,me,time_loop,screen,logfile);

    if (atom->molecular)
      mpi_timings("Bond",timer,Timer::BOND,world,nprocs,
                  nthreads,me,time_loop,screen,logfile);

    if (force->kspace)
      mpi_timings("Kspace",timer,Timer::KSPACE,world,nprocs,
                  nthreads,me,time_loop,screen,logfile);

    mpi_timings("Neigh",timer,Timer::NEIGH,world,nprocs,
                nthreads,me,time_loop,screen,logfile);
    mpi_timings("Comm",timer,Timer::COMM,world,nprocs,
                nthreads,me,time_loop,screen,logfile);
    mpi_timings("Output",timer,Timer::OUTPUT,world,nprocs,
                nthreads,me,time_loop,screen,logfile);
    mpi_timings("Modify",timer,Timer::MODIFY,world,nprocs,
                nthreads,me,time_loop,screen,logfile);
    if (timer->has_sync())
      mpi_timings("Sync",timer,Timer::SYNC,world,nprocs,
                  nthreads,me,time_loop,screen,logfile);

    time = time_other;
    MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time = tmp/nprocs;

    const char *fmt;
    if (timer->has_full())
      fmt = "Other   |            |%- 12.4g|            |       |       |%6.2f\n";
    else
      fmt = "Other   |            |%- 12.4g|            |       |%6.2f\n";

    if (me == 0) {
      if (screen) fprintf(screen,fmt,time,time/time_loop*100.0);
      if (logfile) fprintf(logfile,fmt,time,time/time_loop*100.0);
    }
  }

#ifdef LMP_USER_OMP
  const char thr_hdr_fmt[] =
    "\nThread timing breakdown (MPI rank %d):\nTotal threaded time %.4g / %.1f%%\n";
  const char thr_header[] =
    "Section |  min time  |  avg time  |  max time  |%varavg| %total\n"
    "---------------------------------------------------------------\n";

  int ifix = modify->find_fix("package_omp");

  // print thread breakdown only with full timer detail

  if ((ifix >= 0) && timer->has_full() && me == 0) {
    double thr_total = 0.0;
    ThrData *td;
    FixOMP *fixomp = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
    for (i=0; i < nthreads; ++i) {
      td = fixomp->get_thr(i);
      thr_total += td->get_time(Timer::ALL);
    }
    thr_total /= (double) nthreads;

    if (thr_total > 0.0) {
      if (screen) {
        fprintf(screen,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
        fputs(thr_header,screen);
      }
      if (logfile) {
        fprintf(logfile,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
        fputs(thr_header,logfile);
      }

      omp_times(fixomp,"Pair",Timer::PAIR,nthreads,screen,logfile);

      if (atom->molecular)
        omp_times(fixomp,"Bond",Timer::BOND,nthreads,screen,logfile);

      if (force->kspace)
        omp_times(fixomp,"Kspace",Timer::KSPACE,nthreads,screen,logfile);

      omp_times(fixomp,"Neigh",Timer::NEIGH,nthreads,screen,logfile);
      omp_times(fixomp,"Reduce",Timer::COMM,nthreads,screen,logfile);
    }
  }
#endif

  if (lmp->kokkos && lmp->kokkos->ngpu > 0)
    if (const char* env_clb = getenv("CUDA_LAUNCH_BLOCKING"))
      if (!(strcmp(env_clb,"1") == 0)) {
        error->warning(FLERR,"Timing breakdown may not be accurate since GPU/CPU overlap is enabled. "
          "Using 'export CUDA_LAUNCH_BLOCKING=1' will give an accurate timing breakdown but will reduce performance");
      }

  // FFT timing statistics
  // time3d,time1d = total time during run for 3d and 1d FFTs
  // loop on timing() until nsample FFTs require at least 1.0 CPU sec
  // time_kspace may be 0.0 if another partition is doing Kspace

  if (fftflag) {
    if (me == 0) {
      if (screen) fprintf(screen,"\n");
      if (logfile) fprintf(logfile,"\n");
    }

    int nsteps = update->nsteps;

    double time3d;
    int nsample = 1;
    int nfft = force->kspace->timing_3d(nsample,time3d);
    while (time3d < 1.0) {
      nsample *= 2;
      nfft = force->kspace->timing_3d(nsample,time3d);
    }

    time3d = nsteps * time3d / nsample;
    MPI_Allreduce(&time3d,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time3d = tmp/nprocs;

    double time1d;
    nsample = 1;
    nfft = force->kspace->timing_1d(nsample,time1d);
    while (time1d < 1.0) {
      nsample *= 2;
      nfft = force->kspace->timing_1d(nsample,time1d);
    }

    time1d = nsteps * time1d / nsample;
    MPI_Allreduce(&time1d,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time1d = tmp/nprocs;

    double time_kspace = timer->get_wall(Timer::KSPACE);
    MPI_Allreduce(&time_kspace,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
    time_kspace = tmp/nprocs;

    double ntotal = 1.0 * force->kspace->nx_pppm *
      force->kspace->ny_pppm * force->kspace->nz_pppm;
    double nflops = 5.0 * ntotal * log(ntotal);

    double fraction,flop3,flop1;
    if (nsteps) {
      if (time_kspace) fraction = time3d/time_kspace*100.0;
      else fraction = 0.0;
      flop3 = nfft*nflops/1.0e9/(time3d/nsteps);
      flop1 = nfft*nflops/1.0e9/(time1d/nsteps);
    } else fraction = flop3 = flop1 = 0.0;

    if (me == 0) {
      if (screen) {
        fprintf(screen,"FFT time (%% of Kspce) = %g (%g)\n",time3d,fraction);
        fprintf(screen,"FFT Gflps 3d (1d only) = %g %g\n",flop3,flop1);
      }
      if (logfile) {
        fprintf(logfile,"FFT time (%% of Kspce) = %g (%g)\n",time3d,fraction);
        fprintf(logfile,"FFT Gflps 3d (1d only) = %g %g\n",flop3,flop1);
      }
    }
  }

  if (histoflag) {
    if (me == 0) {
      if (screen) fprintf(screen,"\n");
      if (logfile) fprintf(logfile,"\n");
    }

    tmp = atom->nlocal;
    stats(1,&tmp,&ave,&max,&min,10,histo);
    if (me == 0) {
      if (screen) {
        fprintf(screen,"Nlocal:    %g ave %g max %g min\n",ave,max,min);
        fprintf(screen,"Histogram:");
        for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]);
        fprintf(screen,"\n");
      }
      if (logfile) {
        fprintf(logfile,"Nlocal:    %g ave %g max %g min\n",ave,max,min);
        fprintf(logfile,"Histogram:");
        for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]);
        fprintf(logfile,"\n");
      }
    }

    tmp = atom->nghost;
    stats(1,&tmp,&ave,&max,&min,10,histo);
    if (me == 0) {
      if (screen) {
        fprintf(screen,"Nghost:    %g ave %g max %g min\n",ave,max,min);
        fprintf(screen,"Histogram:");
        for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]);
        fprintf(screen,"\n");
      }
      if (logfile) {
        fprintf(logfile,"Nghost:    %g ave %g max %g min\n",ave,max,min);
        fprintf(logfile,"Histogram:");
        for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]);
        fprintf(logfile,"\n");
      }
    }

    // find a non-skip neighbor list containing half pairwise interactions
    // count neighbors in that list for stats purposes
    // allow it to be Kokkos neigh list as well

    for (m = 0; m < neighbor->old_nrequest; m++) {
      if ((neighbor->old_requests[m]->half ||
           neighbor->old_requests[m]->gran ||
           neighbor->old_requests[m]->respaouter ||
           neighbor->old_requests[m]->half_from_full) &&
          neighbor->old_requests[m]->skip == 0 &&
          neighbor->lists[m] && neighbor->lists[m]->numneigh) {
        if (!neighbor->lists[m] && lmp->kokkos &&
            lmp->kokkos->neigh_list_kokkos(m)) break;
        else break;
      }
    }

    nneigh = 0;
    if (m < neighbor->old_nrequest) {
      if (neighbor->lists[m]) {
        int inum = neighbor->lists[m]->inum;
        int *ilist = neighbor->lists[m]->ilist;
        int *numneigh = neighbor->lists[m]->numneigh;
        for (i = 0; i < inum; i++)
          nneigh += numneigh[ilist[i]];
      } else if (lmp->kokkos) nneigh = lmp->kokkos->neigh_count(m);
    }

    tmp = nneigh;
    stats(1,&tmp,&ave,&max,&min,10,histo);
    if (me == 0) {
      if (screen) {
        fprintf(screen,"Neighs:    %g ave %g max %g min\n",ave,max,min);
        fprintf(screen,"Histogram:");
        for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]);
        fprintf(screen,"\n");
      }
      if (logfile) {
        fprintf(logfile,"Neighs:    %g ave %g max %g min\n",ave,max,min);
        fprintf(logfile,"Histogram:");
        for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]);
        fprintf(logfile,"\n");
      }
    }

    // find a non-skip neighbor list containing full pairwise interactions
    // count neighbors in that list for stats purposes
    // allow it to be Kokkos neigh list as well

    for (m = 0; m < neighbor->old_nrequest; m++) {
      if (neighbor->old_requests[m]->full &&
          neighbor->old_requests[m]->skip == 0) {
        if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break;
        else break;
      }
    }

    nneighfull = 0;
    if (m < neighbor->old_nrequest) {
      if (neighbor->lists[m] && neighbor->lists[m]->numneigh) {
        int inum = neighbor->lists[m]->inum;
        int *ilist = neighbor->lists[m]->ilist;
        int *numneigh = neighbor->lists[m]->numneigh;
        for (i = 0; i < inum; i++)
          nneighfull += numneigh[ilist[i]];
      } else if (!neighbor->lists[m] && lmp->kokkos)
          nneighfull = lmp->kokkos->neigh_count(m);

      tmp = nneighfull;
      stats(1,&tmp,&ave,&max,&min,10,histo);
      if (me == 0) {
        if (screen) {
          fprintf(screen,"FullNghs:  %g ave %g max %g min\n",ave,max,min);
          fprintf(screen,"Histogram:");
          for (i = 0; i < 10; i++) fprintf(screen," %d",histo[i]);
          fprintf(screen,"\n");
        }
        if (logfile) {
          fprintf(logfile,"FullNghs:  %g ave %g max %g min\n",ave,max,min);
          fprintf(logfile,"Histogram:");
          for (i = 0; i < 10; i++) fprintf(logfile," %d",histo[i]);
          fprintf(logfile,"\n");
        }
      }
    }
  }

  if (neighflag) {
    if (me == 0) {
      if (screen) fprintf(screen,"\n");
      if (logfile) fprintf(logfile,"\n");
    }

    tmp = MAX(nneigh,nneighfull);
    double nall;
    MPI_Allreduce(&tmp,&nall,1,MPI_DOUBLE,MPI_SUM,world);

    int nspec;
    double nspec_all = 0;
    if (atom->molecular == 1) {
      int **nspecial = atom->nspecial;
      int nlocal = atom->nlocal;
      nspec = 0;
      for (i = 0; i < nlocal; i++) nspec += nspecial[i][2];
      tmp = nspec;
      MPI_Allreduce(&tmp,&nspec_all,1,MPI_DOUBLE,MPI_SUM,world);
    } else if (atom->molecular == 2) {
      Molecule **onemols = atom->avec->onemols;
      int *molindex = atom->molindex;
      int *molatom = atom->molatom;
      int nlocal = atom->nlocal;
      int imol,iatom;
      nspec = 0;
      for (i = 0; i < nlocal; i++) {
        if (molindex[i] < 0) continue;
        imol = molindex[i];
        iatom = molatom[i];
        nspec += onemols[imol]->nspecial[iatom][2];
      }
      tmp = nspec;
      MPI_Allreduce(&tmp,&nspec_all,1,MPI_DOUBLE,MPI_SUM,world);
    }

    if (me == 0) {
      if (screen) {
        if (nall < 2.0e9)
          fprintf(screen,
                  "Total # of neighbors = %d\n",static_cast<int> (nall));
        else fprintf(screen,"Total # of neighbors = %g\n",nall);
        if (atom->natoms > 0)
          fprintf(screen,"Ave neighs/atom = %g\n",nall/atom->natoms);
        if (atom->molecular && atom->natoms > 0)
          fprintf(screen,"Ave special neighs/atom = %g\n",
                  nspec_all/atom->natoms);
        fprintf(screen,"Neighbor list builds = " BIGINT_FORMAT "\n",
                neighbor->ncalls);
        if (neighbor->dist_check)
          fprintf(screen,"Dangerous builds = " BIGINT_FORMAT "\n",
                  neighbor->ndanger);
        else fprintf(screen,"Dangerous builds not checked\n");
      }
      if (logfile) {
        if (nall < 2.0e9)
          fprintf(logfile,
                  "Total # of neighbors = %d\n",static_cast<int> (nall));
        else fprintf(logfile,"Total # of neighbors = %g\n",nall);
        if (atom->natoms > 0)
          fprintf(logfile,"Ave neighs/atom = %g\n",nall/atom->natoms);
        if (atom->molecular && atom->natoms > 0)
          fprintf(logfile,"Ave special neighs/atom = %g\n",
                  nspec_all/atom->natoms);
        fprintf(logfile,"Neighbor list builds = " BIGINT_FORMAT "\n",
                neighbor->ncalls);
        if (neighbor->dist_check)
          fprintf(logfile,"Dangerous builds = " BIGINT_FORMAT "\n",
                  neighbor->ndanger);
        else fprintf(logfile,"Dangerous builds not checked\n");
      }
    }
  }

  if (logfile) fflush(logfile);
}

示例#17

0

显示文件

文件： pppm_cg_omp.cpp 项目： Clockwork-Sphinx/lammps

void PPPMCGOMP::compute_gf_ad()
{

  const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;

  const double xprd = prd[0];
  const double yprd = prd[1];
  const double zprd = prd[2];
  const double zprd_slab = zprd*slab_volfactor;
  const double unitkx = (MY_2PI/xprd);
  const double unitky = (MY_2PI/yprd);
  const double unitkz = (MY_2PI/zprd_slab);

  const int numk = nxhi_fft - nxlo_fft + 1;
  const int numl = nyhi_fft - nylo_fft + 1;

  const int twoorder = 2*order;
  double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0;

#if defined(_OPENMP)
#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5)
#endif
  {
    double snx,sny,snz,sqk;
    double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
    double numerator,denominator;
    int k,l,m,kper,lper,mper,n,nfrom,nto,tid;

    loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);

    for (n = nfrom; n < nto; ++n) {

      m = n / (numl*numk);
      l = (n - m*numl*numk) / numk;
      k = n - m*numl*numk - l*numk;
      m += nzlo_fft;
      l += nylo_fft;
      k += nxlo_fft;

      mper = m - nz_pppm*(2*m/nz_pppm);
      qz = unitkz*mper;
      snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
      sz = exp(-0.25*square(qz/g_ewald));
      argz = 0.5*qz*zprd_slab/nz_pppm;
      wz = powsinxx(argz,twoorder);

      lper = l - ny_pppm*(2*l/ny_pppm);
      qy = unitky*lper;
      sny = square(sin(0.5*qy*yprd/ny_pppm));
      sy = exp(-0.25*square(qy/g_ewald));
      argy = 0.5*qy*yprd/ny_pppm;
      wy = powsinxx(argy,twoorder);

      kper = k - nx_pppm*(2*k/nx_pppm);
      qx = unitkx*kper;
      snx = square(sin(0.5*qx*xprd/nx_pppm));
      sx = exp(-0.25*square(qx/g_ewald));
      argx = 0.5*qx*xprd/nx_pppm;
      wx = powsinxx(argx,twoorder);

      sqk = qx*qx + qy*qy + qz*qz;

      if (sqk != 0.0) {
	numerator = MY_4PI/sqk;
	denominator = gf_denom(snx,sny,snz);
	greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
	sf0 += sf_precoeff1[n]*greensfn[n];
	sf1 += sf_precoeff2[n]*greensfn[n];
	sf2 += sf_precoeff3[n]*greensfn[n];
	sf3 += sf_precoeff4[n]*greensfn[n];
	sf4 += sf_precoeff5[n]*greensfn[n];
	sf5 += sf_precoeff6[n]*greensfn[n];
      } else {
	greensfn[n] = 0.0;
	sf0 += sf_precoeff1[n]*greensfn[n];
	sf1 += sf_precoeff2[n]*greensfn[n];
	sf2 += sf_precoeff3[n]*greensfn[n];
	sf3 += sf_precoeff4[n]*greensfn[n];
	sf4 += sf_precoeff5[n]*greensfn[n];
	sf5 += sf_precoeff6[n]*greensfn[n];
      }
    }
    thr->timer(Timer::KSPACE);
  } // end of paralle region

  // compute the coefficients for the self-force correction

  double prex, prey, prez, tmp[6];
  prex = prey = prez = MY_PI/volume;
  prex *= nx_pppm/xprd;
  prey *= ny_pppm/yprd;
  prez *= nz_pppm/zprd_slab;
  tmp[0] = sf0 * prex;
  tmp[1] = sf1 * prex*2;
  tmp[2] = sf2 * prey;
  tmp[3] = sf3 * prey*2;
  tmp[4] = sf4 * prez;
  tmp[5] = sf5 * prez*2;

  // communicate values with other procs

  MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world);
}

示例#18

0

显示文件

文件： pppm_cg_omp.cpp 项目： Clockwork-Sphinx/lammps

void PPPMCGOMP::compute_gf_ik()
{
  const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;

  const double xprd = prd[0];
  const double yprd = prd[1];
  const double zprd = prd[2];
  const double zprd_slab = zprd*slab_volfactor;
  const double unitkx = (MY_2PI/xprd);
  const double unitky = (MY_2PI/yprd);
  const double unitkz = (MY_2PI/zprd_slab);

  const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
                                    pow(-log(EPS_HOC),0.25));
  const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
                                    pow(-log(EPS_HOC),0.25));
  const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
                                    pow(-log(EPS_HOC),0.25));
  const int numk = nxhi_fft - nxlo_fft + 1;
  const int numl = nyhi_fft - nylo_fft + 1;

  const int twoorder = 2*order;

#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
  {
    double snx,sny,snz;
    double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
    double sum1,dot1,dot2;
    double numerator,denominator;
    double sqk;

    int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;

    loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);

    for (n = nfrom; n < nto; ++n) {
      m = n / (numl*numk);
      l = (n - m*numl*numk) / numk;
      k = n - m*numl*numk - l*numk;
      m += nzlo_fft;
      l += nylo_fft;
      k += nxlo_fft;

      mper = m - nz_pppm*(2*m/nz_pppm);
      snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));

      lper = l - ny_pppm*(2*l/ny_pppm);
      sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));

      kper = k - nx_pppm*(2*k/nx_pppm);
      snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));

      sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);

      if (sqk != 0.0) {
	numerator = 12.5663706/sqk;
	denominator = gf_denom(snx,sny,snz);
	sum1 = 0.0;

	for (nx = -nbx; nx <= nbx; nx++) {
	  qx = unitkx*(kper+nx_pppm*nx);
	  sx = exp(-0.25*square(qx/g_ewald));
	  argx = 0.5*qx*xprd/nx_pppm;
	  wx = powsinxx(argx,twoorder);

	  for (ny = -nby; ny <= nby; ny++) {
	    qy = unitky*(lper+ny_pppm*ny);
	    sy = exp(-0.25*square(qy/g_ewald));
	    argy = 0.5*qy*yprd/ny_pppm;
	    wy = powsinxx(argy,twoorder);

	    for (nz = -nbz; nz <= nbz; nz++) {
	      qz = unitkz*(mper+nz_pppm*nz);
	      sz = exp(-0.25*square(qz/g_ewald));
	      argz = 0.5*qz*zprd_slab/nz_pppm;
	      wz = powsinxx(argz,twoorder);

	      dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
	      dot2 = qx*qx+qy*qy+qz*qz;
	      sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
	    }
	  }
	}
	greensfn[n] = numerator*sum1/denominator;
      } else greensfn[n] = 0.0;
    }
    thr->timer(Timer::KSPACE);
  } // end of parallel region
}

示例#19

0

显示文件

文件： pair_cdeam_omp.cpp 项目： Clockwork-Sphinx/lammps

void PairCDEAMOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

  // grow energy and fp arrays if necessary
  // need to be atom->nmax in length

  if (atom->nmax > nmax) {
    memory->destroy(rho);
    memory->destroy(rhoB);
    memory->destroy(D_values);
    memory->destroy(fp);
    nmax = atom->nmax;
    memory->create(rho,nthreads*nmax,"pair:rho");
    memory->create(rhoB,nthreads*nmax,"pair:mu");
    memory->create(D_values,nthreads*nmax,"pair:D_values");
    memory->create(fp,nmax,"pair:fp");
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (force->newton_pair)
      thr->init_cdeam(nall, rho, rhoB, D_values);
    else
      thr->init_cdeam(atom->nlocal, rho, rhoB, D_values);

    switch (cdeamVersion) {

    case 1:

      if (evflag) {
        if (eflag) {
          if (force->newton_pair) eval<1,1,1,1>(ifrom, ito, thr);
          else eval<1,1,0,1>(ifrom, ito, thr);
        } else {
          if (force->newton_pair) eval<1,0,1,1>(ifrom, ito, thr);
          else eval<1,0,0,1>(ifrom, ito, thr);
        }
      } else {
        if (force->newton_pair) eval<0,0,1,1>(ifrom, ito, thr);
        else eval<0,0,0,1>(ifrom, ito, thr);
      }
      break;

    case 2:

      if (evflag) {
        if (eflag) {
          if (force->newton_pair) eval<1,1,1,2>(ifrom, ito, thr);
          else eval<1,1,0,2>(ifrom, ito, thr);
        } else {
          if (force->newton_pair) eval<1,0,1,2>(ifrom, ito, thr);
          else eval<1,0,0,2>(ifrom, ito, thr);
        }
      } else {
        if (force->newton_pair) eval<0,0,1,2>(ifrom, ito, thr);
        else eval<0,0,0,2>(ifrom, ito, thr);
      }
      break;

    default:
      {
#if defined(_OPENMP)
#pragma omp master
#endif
        error->all(FLERR,"unsupported eam/cd pair style variant");
      }
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#20

0

显示文件

文件： pair_gran_hertz_history_omp.cpp 项目： Clockwork-Sphinx/lammps

void PairGranHertzHistoryOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

  const int shearupdate = (update->setupflag) ? 0 : 1;

  // update rigid body info for owned & ghost atoms if using FixRigid masses
  // body[i] = which body atom I is in, -1 if none
  // mass_body = mass of each rigid body

  if (fix_rigid && neighbor->ago == 0) {
    int tmp;
    int *body = (int *) fix_rigid->extract("body",tmp);
    double *mass_body = (double *) fix_rigid->extract("masstotal",tmp);
    if (atom->nmax > nmax) {
      memory->destroy(mass_rigid);
      nmax = atom->nmax;
      memory->create(mass_rigid,nmax,"pair:mass_rigid");
    }
    int nlocal = atom->nlocal;
    for (int i = 0; i < nlocal; i++)
      if (body[i] >= 0) mass_rigid[i] = mass_body[body[i]];
      else mass_rigid[i] = 0.0;
    comm->forward_comm_pair(this);
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (shearupdate) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (shearupdate) {
        if (force->newton_pair) eval<0,1,1>(ifrom, ito, thr);
        else eval<0,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
        else eval<0,0,0>(ifrom, ito, thr);
      }
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#21

0

显示文件

文件： pair_brownian_poly_omp.cpp 项目： Clockwork-Sphinx/lammps

void PairBrownianPolyOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int inum = list->inum;

  // This section of code adjusts R0/RT0/RS0 if necessary due to changes
  // in the volume fraction as a result of fix deform or moving walls

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (int j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
        double wallhi[3], walllo[3];
        for (int j = 0; j < 3; j++){
          wallhi[j] = domain->prd[j];
          walllo[j] = 0;
        }
        for (int m = 0; m < wallfix->nwall; m++){
          int dim = wallfix->wallwhich[m] / 2;
          int side = wallfix->wallwhich[m] % 2;
          if (wallfix->xstyle[m] == VARIABLE){
            wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
          }
          else wallcoord = wallfix->coord0[m];
          if (side == 0) walllo[dim] = wallcoord;
          else wallhi[dim] = wallcoord;
        }
        for (int j = 0; j < 3; j++)
          dims[j] = wallhi[j] - walllo[j];
      }
      double vol_T = dims[0]*dims[1]*dims[2];
      double vol_f = vol_P/vol_T;
      if (flaglog == 0) {
        R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
        RT0 = 8*MY_PI*mu*cube(rad);
        //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
      } else {
        R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
        RT0 = 8*MY_PI*mu*cube(rad)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
        //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
      }
    }


  // number of threads has changed. reallocate pool of pRNGs
  if (nthreads != comm->nthreads) {
    if (random_thr) {
      for (int i=1; i < nthreads; ++i)
        delete random_thr[i];

      delete[] random_thr;
    }

    nthreads = comm->nthreads;
    random_thr = new RanMars*[nthreads];
    for (int i=1; i < nthreads; ++i)
      random_thr[i] = NULL;

    // to ensure full compatibility with the serial BrownianPoly style
    // we use is random number generator instance for thread 0
    random_thr[0] = random;
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    // generate a random number generator instance for
    // all threads != 0. make sure we use unique seeds.
    if ((tid > 0) && (random_thr[tid] == NULL))
      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me
                                    + comm->nprocs*tid);

    if (flaglog) {
      if (evflag)
        eval<1,1>(ifrom, ito, thr);
      else
        eval<1,0>(ifrom, ito, thr);
    } else {
      if (evflag)
        eval<0,1>(ifrom, ito, thr);
      else eval<0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#22

0

显示文件

文件： msm_cg_omp.cpp 项目： akohlmey/lammps

void MSMCGOMP::compute(int eflag, int vflag)
{
  if (scalar_pressure_flag)
    error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' "
      "with kspace_style msm/cg/omp");

  const double * const q = atom->q;
  const int nlocal = atom->nlocal;
  int i,j,n;

  // set energy/virial flags

  if (eflag || vflag) ev_setup(eflag,vflag);
  else evflag = evflag_atom = eflag_global = vflag_global =
    eflag_atom = vflag_atom = eflag_either = vflag_either = 0;

  // invoke allocate_peratom() if needed for first time

  if (vflag_atom && !peratom_allocate_flag) {
    allocate_peratom();
    cg_peratom_all->ghost_notify();
    cg_peratom_all->setup();
    for (int n=0; n<levels; n++) {
      if (!active_flag[n]) continue;
      cg_peratom[n]->ghost_notify();
      cg_peratom[n]->setup();
    }
    peratom_allocate_flag = 1;
  }

  // extend size of per-atom arrays if necessary

  if (atom->nmax > nmax) {
    memory->destroy(part2grid);
    memory->destroy(is_charged);
    nmax = atom->nmax;
    memory->create(part2grid,nmax,3,"msm:part2grid");
    memory->create(is_charged,nmax,"msm/cg:is_charged");
  }

  // one time setup message

  if (num_charged < 0) {
    bigint charged_all, charged_num;
    double charged_frac, charged_fmax, charged_fmin;

    num_charged=0;
    for (i=0; i < nlocal; ++i)
      if (fabs(q[i]) > smallq)
        ++num_charged;

    // get fraction of charged particles per domain

    if (nlocal > 0)
      charged_frac = static_cast<double>(num_charged) * 100.0
                   / static_cast<double>(nlocal);
    else
      charged_frac = 0.0;

    MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world);
    MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world);

    // get fraction of charged particles overall

    charged_num = num_charged;
    MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world);
    charged_frac = static_cast<double>(charged_all) * 100.0
                   / static_cast<double>(atom->natoms);

    if (me == 0) {
      if (screen)
        fprintf(screen,
                "  MSM/cg optimization cutoff: %g\n"
                "  Total charged atoms: %.1f%%\n"
                "  Min/max charged atoms/proc: %.1f%% %.1f%%\n",
                smallq,charged_frac,charged_fmin,charged_fmax);
      if (logfile)
        fprintf(logfile,
                "  MSM/cg optimization cutoff: %g\n"
                "  Total charged atoms: %.1f%%\n"
                "  Min/max charged atoms/proc: %.1f%% %.1f%%\n",
                smallq,charged_frac,charged_fmin,charged_fmax);
    }
  }

  // only need to rebuild this list after a neighbor list update
  if (neighbor->ago == 0) {
    num_charged = 0;
    for (i = 0; i < nlocal; ++i) {
      if (fabs(q[i]) > smallq) {
        is_charged[num_charged] = i;
        ++num_charged;
      }
    }
  }

  // find grid points for all my particles
  // map my particle charge onto my local 3d density grid (aninterpolation)

  particle_map();
  make_rho();

  // all procs reverse communicate charge density values from their ghost grid points
  //   to fully sum contribution in their 3d grid

  current_level = 0;
  cg_all->reverse_comm(this,REVERSE_RHO);

  // forward communicate charge density values to fill ghost grid points
  // compute direct sum interaction and then restrict to coarser grid

  for (int n=0; n<=levels-2; n++) {
    if (!active_flag[n]) continue;
    current_level = n;
    cg[n]->forward_comm(this,FORWARD_RHO);

    direct(n);
    restriction(n);
  }


  // compute direct interation for top grid level for nonperiodic
  //   and for second from top grid level for periodic

  if (active_flag[levels-1]) {
    if (domain->nonperiodic) {
      current_level = levels-1;
      cg[levels-1]->forward_comm(this,FORWARD_RHO);
      direct_top(levels-1);
      cg[levels-1]->reverse_comm(this,REVERSE_AD);
      if (vflag_atom)
        cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
    } else {
      // Here using MPI_Allreduce is cheaper than using commgrid
      grid_swap_forward(levels-1,qgrid[levels-1]);
      direct(levels-1);
      grid_swap_reverse(levels-1,egrid[levels-1]);
      current_level = levels-1;
      if (vflag_atom)
        cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
    }
  }

  // prolongate energy/virial from coarser grid to finer grid
  // reverse communicate from ghost grid points to get full sum

  for (int n=levels-2; n>=0; n--) {
    if (!active_flag[n]) continue;
    prolongation(n);

    current_level = n;
    cg[n]->reverse_comm(this,REVERSE_AD);

    // extra per-atom virial communication

    if (vflag_atom)
      cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
  }

  // all procs communicate E-field values
  // to fill ghost cells surrounding their 3d bricks

  current_level = 0;
  cg_all->forward_comm(this,FORWARD_AD);

  // extra per-atom energy/virial communication

  if (vflag_atom)
    cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);

  // calculate the force on my particles (interpolation)

  fieldforce();

  // calculate the per-atom energy/virial for my particles

  if (evflag_atom) fieldforce_peratom();

  // update qsum and qsqsum, if atom count has changed and energy needed

  if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) {
    qsum_qsq();
    natoms_original = atom->natoms;
  }

  // sum global energy across procs and add in self-energy term

  const double qscale = force->qqrd2e * scale;

  if (eflag_global) {
    double energy_all;
    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
    energy = energy_all;

    double e_self = qsqsum*gamma(0.0)/cutoff;
    energy -= e_self;
    energy *= 0.5*qscale;
  }

  // total long-range virial

  if (vflag_global) {
    double virial_all[6];
    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i];
  }

  // per-atom energy/virial
  // energy includes self-energy correction

  if (evflag_atom) {
    const double qs = 0.5*qscale;

    if (eflag_atom) {
      const double sf = gamma(0.0)/cutoff;
      for (j = 0; j < num_charged; j++) {
        i = is_charged[j];
        eatom[i] -= q[i]*q[i]*sf;
        eatom[i] *= qs;
      }
    }

    if (vflag_atom) {
      for (n = 0; n < num_charged; n++) {
        i = is_charged[n];
        for (j = 0; j < 6; j++)
          vatom[i][j] *= qs;
      }
    }
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
#if defined(_OPENMP)
    const int tid = omp_get_thread_num();
#else
    const int tid = 0;
#endif
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}

示例#23

0

显示文件

文件： pppm_tip4p_omp.cpp 项目： Polyun/LIGGGHTS-PUBLIC

void PPPMTIP4POMP::make_rho()
{
  const double * const q = atom->q;
  const double * const * const x = atom->x;
  const int * const type = atom->type;
  const int nthreads = comm->nthreads;
  const int nlocal = atom->nlocal;

#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
  {  
#if defined(_OPENMP)
    // each thread works on a fixed chunk of atoms.
    const int tid = omp_get_thread_num();
    const int inum = nlocal;
    const int idelta = 1 + inum/nthreads;
    const int ifrom = tid*idelta;
    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
    const int tid = 0;
    const int ifrom = 0;
    const int ito = nlocal;
#endif

    int l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
    FFT_SCALAR dx,dy,dz,x0,y0,z0;
    double xM[3];

    // set up clear 3d density array
    const int nzoffs = (nzhi_out-nzlo_out+1)*tid;
    FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]);
    memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR));

    ThrData *thr = fix->get_thr(tid);
    FFT_SCALAR * const * const r1d = static_cast<FFT_SCALAR **>(thr->get_rho1d());

    // loop over my charges, add their contribution to nearby grid points
    // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
    // (dx,dy,dz) = distance to "lower left" grid pt
    // (mx,my,mz) = global coords of moving stencil pt
    
    // this if protects against having more threads than local atoms
    if (ifrom < nlocal) { 
      for (int i = ifrom; i < ito; i++) {

	if (type[i] == typeO) {
	  find_M(i,iH1,iH2,xM);      
	} else {
	  xM[0] = x[i][0];
	  xM[1] = x[i][1];
	  xM[2] = x[i][2];
	}

	nx = part2grid[i][0];
	ny = part2grid[i][1];
	nz = part2grid[i][2];
	dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv;
	dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv;
	dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv;

	compute_rho1d_thr(r1d,dx,dy,dz);

	z0 = delvolinv * q[i];
	for (n = nlower; n <= nupper; n++) {
	  mz = n+nz;
	  y0 = z0*r1d[2][n];
	  for (m = nlower; m <= nupper; m++) {
	    my = m+ny;
	    x0 = y0*r1d[1][m];
	    for (l = nlower; l <= nupper; l++) {
	      mx = l+nx;
	      db[mz][my][mx] += x0*r1d[0][l];
	    }
	  }
	}
      }
    }
#if defined(_OPENMP)
    // reduce 3d density array
    if (nthreads > 1) {
      data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid);
    }
#endif
  }
}

示例#24

0

显示文件

文件： pppm_tip4p_omp.cpp 项目： Polyun/LIGGGHTS-PUBLIC

void PPPMTIP4POMP::fieldforce()
{
  // loop over my charges, interpolate electric field from nearby grid points
  // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
  // (dx,dy,dz) = distance to "lower left" grid pt
  // (mx,my,mz) = global coords of moving stencil pt
  // ek = 3 components of E-field on particle

  const double * const q = atom->q;
  const double * const * const x = atom->x;
  const int * const type = atom->type;
  const int nthreads = comm->nthreads;
  const int nlocal = atom->nlocal;
  const double qqrd2e = force->qqrd2e;

#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
  {  
#if defined(_OPENMP)
    // each thread works on a fixed chunk of atoms.
    const int tid = omp_get_thread_num();
    const int inum = nlocal;
    const int idelta = 1 + inum/nthreads;
    const int ifrom = tid*idelta;
    const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta;
#else
    const int ifrom = 0;
    const int ito = nlocal;
    const int tid = 0;
#endif
    ThrData *thr = fix->get_thr(tid);
    double * const * const f = thr->get_f();
    FFT_SCALAR * const * const r1d =  static_cast<FFT_SCALAR **>(thr->get_rho1d());
    
    int l,m,n,nx,ny,nz,mx,my,mz;
    FFT_SCALAR dx,dy,dz,x0,y0,z0;
    FFT_SCALAR ekx,eky,ekz;
    int iH1,iH2;
    double xM[3], fx,fy,fz;
    double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z;

    // this if protects against having more threads than local atoms
    if (ifrom < nlocal) { 
      for (int i = ifrom; i < ito; i++) {

	if (type[i] == typeO) {
	  find_M(i,iH1,iH2,xM);      
	} else {
	  xM[0] = x[i][0];
	  xM[1] = x[i][1];
	  xM[2] = x[i][2];
	}

	nx = part2grid[i][0];
	ny = part2grid[i][1];
	nz = part2grid[i][2];
	dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv;
	dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv;
	dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv;

	compute_rho1d_thr(r1d,dx,dy,dz);

	ekx = eky = ekz = ZEROF;
	for (n = nlower; n <= nupper; n++) {
	  mz = n+nz;
	  z0 = r1d[2][n];
	  for (m = nlower; m <= nupper; m++) {
	    my = m+ny;
	    y0 = z0*r1d[1][m];
	    for (l = nlower; l <= nupper; l++) {
	      mx = l+nx;
	      x0 = y0*r1d[0][l];
	      ekx -= x0*vdx_brick[mz][my][mx];
	      eky -= x0*vdy_brick[mz][my][mx];
	      ekz -= x0*vdz_brick[mz][my][mx];
	    }
	  }
	}

	// convert E-field to force
	const double qfactor = qqrd2e*scale*q[i];

	if (type[i] != typeO) {
	  f[i][0] += qfactor*ekx;
	  f[i][1] += qfactor*eky;
	  f[i][2] += qfactor*ekz;

	} else {
	  fx = qfactor * ekx;
	  fy = qfactor * eky;
	  fz = qfactor * ekz;
	  find_M(i,iH1,iH2,xM);

	  rOMx = xM[0] - x[i][0];
	  rOMy = xM[1] - x[i][1];
	  rOMz = xM[2] - x[i][2];
  
	  ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist);

	  f1x = ddotf * rOMx;
	  f1y = ddotf * rOMy;
	  f1z = ddotf * rOMz;

	  f[i][0] += fx - alpha * (fx - f1x);
	  f[i][1] += fy - alpha * (fy - f1y);
	  f[i][2] += fz - alpha * (fz - f1z);

	  f[iH1][0] += 0.5*alpha*(fx - f1x);
	  f[iH1][1] += 0.5*alpha*(fy - f1y);
	  f[iH1][2] += 0.5*alpha*(fz - f1z);

	  f[iH2][0] += 0.5*alpha*(fx - f1x);
	  f[iH2][1] += 0.5*alpha*(fy - f1y);
	  f[iH2][2] += 0.5*alpha*(fz - f1z);
	}
      }
    }
  }
}

示例#25

0

显示文件

文件： pair_tri_lj_omp.cpp 项目： athomps/lammps

void PairTriLJOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;
  const int * const tri = atom->tri;
  const int * const type = atom->type;
  AtomVecTri::Bonus * const bonus = avec->bonus;

  // grow discrete list if necessary and initialize

  if (nall > nmax) {
    nmax = nall;
    memory->destroy(dnum);
    memory->destroy(dfirst);
    memory->create(dnum,nall,"pair:dnum");
    memory->create(dfirst,nall,"pair:dfirst");
  }
  memset(dnum,0,nall*sizeof(int));
  ndiscrete = 0;

  // need to discretize the system ahead of time
  // until we find a good way to multi-thread it.
  for (int i = 0; i < nall; ++i) {
    double dc1[3],dc2[3],dc3[3],p[3][3];

    if (tri[i] >= 0) {
      if (dnum[i] == 0) {
        MathExtra::quat_to_mat(bonus[tri[i]].quat,p);
        MathExtra::matvec(p,bonus[tri[i]].c1,dc1);
        MathExtra::matvec(p,bonus[tri[i]].c2,dc2);
        MathExtra::matvec(p,bonus[tri[i]].c3,dc3);
        dfirst[i] = ndiscrete;
        discretize(i,sigma[type[i]][type[i]],dc1,dc2,dc3);
        dnum[i] = ndiscrete - dfirst[i];
      }
    }
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}