void PairLJCutCoulDSFOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}
Example #2
0
void AngleSDKOMP::compute(int eflag, int vflag)
{
  ev_init(eflag,vflag);

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = neighbor->nanglelist;

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (inum > 0) {
      if (evflag) {
        if (eflag) {
          if (force->newton_bond) eval<1,1,1>(ifrom, ito, thr);
          else eval<1,1,0>(ifrom, ito, thr);
        } else {
          if (force->newton_bond) eval<1,0,1>(ifrom, ito, thr);
          else eval<1,0,0>(ifrom, ito, thr);
        }
      } else {
        if (force->newton_bond) eval<0,0,1>(ifrom, ito, thr);
        else eval<0,0,0>(ifrom, ito, thr);
      }
    }
    thr->timer(Timer::BOND);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}
Example #3
0
void PPPMCGOMP::compute(int eflag, int vflag)
{

  PPPMCG::compute(eflag,vflag);

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
#if defined(_OPENMP)
    const int tid = omp_get_thread_num();
#else
    const int tid = 0;
#endif
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}
Example #4
0
void PPPMCGOMP::compute_gf_ad()
{

  const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;

  const double xprd = prd[0];
  const double yprd = prd[1];
  const double zprd = prd[2];
  const double zprd_slab = zprd*slab_volfactor;
  const double unitkx = (MY_2PI/xprd);
  const double unitky = (MY_2PI/yprd);
  const double unitkz = (MY_2PI/zprd_slab);

  const int numk = nxhi_fft - nxlo_fft + 1;
  const int numl = nyhi_fft - nylo_fft + 1;

  const int twoorder = 2*order;
  double sf0=0.0,sf1=0.0,sf2=0.0,sf3=0.0,sf4=0.0,sf5=0.0;

#if defined(_OPENMP)
#pragma omp parallel default(none) reduction(+:sf0,sf1,sf2,sf3,sf4,sf5)
#endif
  {
    double snx,sny,snz,sqk;
    double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
    double numerator,denominator;
    int k,l,m,kper,lper,mper,n,nfrom,nto,tid;

    loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);

    for (n = nfrom; n < nto; ++n) {

      m = n / (numl*numk);
      l = (n - m*numl*numk) / numk;
      k = n - m*numl*numk - l*numk;
      m += nzlo_fft;
      l += nylo_fft;
      k += nxlo_fft;

      mper = m - nz_pppm*(2*m/nz_pppm);
      qz = unitkz*mper;
      snz = square(sin(0.5*qz*zprd_slab/nz_pppm));
      sz = exp(-0.25*square(qz/g_ewald));
      argz = 0.5*qz*zprd_slab/nz_pppm;
      wz = powsinxx(argz,twoorder);

      lper = l - ny_pppm*(2*l/ny_pppm);
      qy = unitky*lper;
      sny = square(sin(0.5*qy*yprd/ny_pppm));
      sy = exp(-0.25*square(qy/g_ewald));
      argy = 0.5*qy*yprd/ny_pppm;
      wy = powsinxx(argy,twoorder);

      kper = k - nx_pppm*(2*k/nx_pppm);
      qx = unitkx*kper;
      snx = square(sin(0.5*qx*xprd/nx_pppm));
      sx = exp(-0.25*square(qx/g_ewald));
      argx = 0.5*qx*xprd/nx_pppm;
      wx = powsinxx(argx,twoorder);

      sqk = qx*qx + qy*qy + qz*qz;

      if (sqk != 0.0) {
	numerator = MY_4PI/sqk;
	denominator = gf_denom(snx,sny,snz);
	greensfn[n] = numerator*sx*sy*sz*wx*wy*wz/denominator;
	sf0 += sf_precoeff1[n]*greensfn[n];
	sf1 += sf_precoeff2[n]*greensfn[n];
	sf2 += sf_precoeff3[n]*greensfn[n];
	sf3 += sf_precoeff4[n]*greensfn[n];
	sf4 += sf_precoeff5[n]*greensfn[n];
	sf5 += sf_precoeff6[n]*greensfn[n];
      } else {
	greensfn[n] = 0.0;
	sf0 += sf_precoeff1[n]*greensfn[n];
	sf1 += sf_precoeff2[n]*greensfn[n];
	sf2 += sf_precoeff3[n]*greensfn[n];
	sf3 += sf_precoeff4[n]*greensfn[n];
	sf4 += sf_precoeff5[n]*greensfn[n];
	sf5 += sf_precoeff6[n]*greensfn[n];
      }
    }
    thr->timer(Timer::KSPACE);
  } // end of paralle region

  // compute the coefficients for the self-force correction

  double prex, prey, prez, tmp[6];
  prex = prey = prez = MY_PI/volume;
  prex *= nx_pppm/xprd;
  prey *= ny_pppm/yprd;
  prez *= nz_pppm/zprd_slab;
  tmp[0] = sf0 * prex;
  tmp[1] = sf1 * prex*2;
  tmp[2] = sf2 * prey;
  tmp[3] = sf3 * prey*2;
  tmp[4] = sf4 * prez;
  tmp[5] = sf5 * prez*2;

  // communicate values with other procs

  MPI_Allreduce(tmp,sf_coeff,6,MPI_DOUBLE,MPI_SUM,world);
}
Example #5
0
void PPPMCGOMP::compute_gf_ik()
{
  const double * const prd = (triclinic==0) ? domain->prd : domain->prd_lamda;

  const double xprd = prd[0];
  const double yprd = prd[1];
  const double zprd = prd[2];
  const double zprd_slab = zprd*slab_volfactor;
  const double unitkx = (MY_2PI/xprd);
  const double unitky = (MY_2PI/yprd);
  const double unitkz = (MY_2PI/zprd_slab);

  const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
                                    pow(-log(EPS_HOC),0.25));
  const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
                                    pow(-log(EPS_HOC),0.25));
  const int nbz = static_cast<int> ((g_ewald*zprd_slab/(MY_PI*nz_pppm)) *
                                    pow(-log(EPS_HOC),0.25));
  const int numk = nxhi_fft - nxlo_fft + 1;
  const int numl = nyhi_fft - nylo_fft + 1;

  const int twoorder = 2*order;

#if defined(_OPENMP)
#pragma omp parallel default(none)
#endif
  {
    double snx,sny,snz;
    double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
    double sum1,dot1,dot2;
    double numerator,denominator;
    double sqk;

    int k,l,m,nx,ny,nz,kper,lper,mper,n,nfrom,nto,tid;

    loop_setup_thr(nfrom, nto, tid, nfft, comm->nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);

    for (n = nfrom; n < nto; ++n) {
      m = n / (numl*numk);
      l = (n - m*numl*numk) / numk;
      k = n - m*numl*numk - l*numk;
      m += nzlo_fft;
      l += nylo_fft;
      k += nxlo_fft;

      mper = m - nz_pppm*(2*m/nz_pppm);
      snz = square(sin(0.5*unitkz*mper*zprd_slab/nz_pppm));

      lper = l - ny_pppm*(2*l/ny_pppm);
      sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));

      kper = k - nx_pppm*(2*k/nx_pppm);
      snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));

      sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);

      if (sqk != 0.0) {
	numerator = 12.5663706/sqk;
	denominator = gf_denom(snx,sny,snz);
	sum1 = 0.0;

	for (nx = -nbx; nx <= nbx; nx++) {
	  qx = unitkx*(kper+nx_pppm*nx);
	  sx = exp(-0.25*square(qx/g_ewald));
	  argx = 0.5*qx*xprd/nx_pppm;
	  wx = powsinxx(argx,twoorder);

	  for (ny = -nby; ny <= nby; ny++) {
	    qy = unitky*(lper+ny_pppm*ny);
	    sy = exp(-0.25*square(qy/g_ewald));
	    argy = 0.5*qy*yprd/ny_pppm;
	    wy = powsinxx(argy,twoorder);

	    for (nz = -nbz; nz <= nbz; nz++) {
	      qz = unitkz*(mper+nz_pppm*nz);
	      sz = exp(-0.25*square(qz/g_ewald));
	      argz = 0.5*qz*zprd_slab/nz_pppm;
	      wz = powsinxx(argz,twoorder);

	      dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
	      dot2 = qx*qx+qy*qy+qz*qz;
	      sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
	    }
	  }
	}
	greensfn[n] = numerator*sum1/denominator;
      } else greensfn[n] = 0.0;
    }
    thr->timer(Timer::KSPACE);
  } // end of parallel region
}
void PairCDEAMOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = eflag_global = eflag_atom = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

  // grow energy and fp arrays if necessary
  // need to be atom->nmax in length

  if (atom->nmax > nmax) {
    memory->destroy(rho);
    memory->destroy(rhoB);
    memory->destroy(D_values);
    memory->destroy(fp);
    nmax = atom->nmax;
    memory->create(rho,nthreads*nmax,"pair:rho");
    memory->create(rhoB,nthreads*nmax,"pair:mu");
    memory->create(D_values,nthreads*nmax,"pair:D_values");
    memory->create(fp,nmax,"pair:fp");
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (force->newton_pair)
      thr->init_cdeam(nall, rho, rhoB, D_values);
    else
      thr->init_cdeam(atom->nlocal, rho, rhoB, D_values);

    switch (cdeamVersion) {

    case 1:

      if (evflag) {
        if (eflag) {
          if (force->newton_pair) eval<1,1,1,1>(ifrom, ito, thr);
          else eval<1,1,0,1>(ifrom, ito, thr);
        } else {
          if (force->newton_pair) eval<1,0,1,1>(ifrom, ito, thr);
          else eval<1,0,0,1>(ifrom, ito, thr);
        }
      } else {
        if (force->newton_pair) eval<0,0,1,1>(ifrom, ito, thr);
        else eval<0,0,0,1>(ifrom, ito, thr);
      }
      break;

    case 2:

      if (evflag) {
        if (eflag) {
          if (force->newton_pair) eval<1,1,1,2>(ifrom, ito, thr);
          else eval<1,1,0,2>(ifrom, ito, thr);
        } else {
          if (force->newton_pair) eval<1,0,1,2>(ifrom, ito, thr);
          else eval<1,0,0,2>(ifrom, ito, thr);
        }
      } else {
        if (force->newton_pair) eval<0,0,1,2>(ifrom, ito, thr);
        else eval<0,0,0,2>(ifrom, ito, thr);
      }
      break;

    default:
      {
#if defined(_OPENMP)
#pragma omp master
#endif
        error->all(FLERR,"unsupported eam/cd pair style variant");
      }
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}
void PairGranHertzHistoryOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;

  const int shearupdate = (update->setupflag) ? 0 : 1;

  // update rigid body info for owned & ghost atoms if using FixRigid masses
  // body[i] = which body atom I is in, -1 if none
  // mass_body = mass of each rigid body

  if (fix_rigid && neighbor->ago == 0) {
    int tmp;
    int *body = (int *) fix_rigid->extract("body",tmp);
    double *mass_body = (double *) fix_rigid->extract("masstotal",tmp);
    if (atom->nmax > nmax) {
      memory->destroy(mass_rigid);
      nmax = atom->nmax;
      memory->create(mass_rigid,nmax,"pair:mass_rigid");
    }
    int nlocal = atom->nlocal;
    for (int i = 0; i < nlocal; i++)
      if (body[i] >= 0) mass_rigid[i] = mass_body[body[i]];
      else mass_rigid[i] = 0.0;
    comm->forward_comm_pair(this);
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (shearupdate) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (shearupdate) {
        if (force->newton_pair) eval<0,1,1>(ifrom, ito, thr);
        else eval<0,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
        else eval<0,0,0>(ifrom, ito, thr);
      }
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}
void PairBrownianPolyOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int inum = list->inum;

  // This section of code adjusts R0/RT0/RS0 if necessary due to changes
  // in the volume fraction as a result of fix deform or moving walls

  double dims[3], wallcoord;
  if (flagVF) // Flag for volume fraction corrections
    if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
      if (flagdeform && !flagwall)
        for (int j = 0; j < 3; j++)
          dims[j] = domain->prd[j];
      else if (flagwall == 2 || (flagdeform && flagwall == 1)){
        double wallhi[3], walllo[3];
        for (int j = 0; j < 3; j++){
          wallhi[j] = domain->prd[j];
          walllo[j] = 0;
        }
        for (int m = 0; m < wallfix->nwall; m++){
          int dim = wallfix->wallwhich[m] / 2;
          int side = wallfix->wallwhich[m] % 2;
          if (wallfix->xstyle[m] == VARIABLE){
            wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
          }
          else wallcoord = wallfix->coord0[m];
          if (side == 0) walllo[dim] = wallcoord;
          else wallhi[dim] = wallcoord;
        }
        for (int j = 0; j < 3; j++)
          dims[j] = wallhi[j] - walllo[j];
      }
      double vol_T = dims[0]*dims[1]*dims[2];
      double vol_f = vol_P/vol_T;
      if (flaglog == 0) {
        R0  = 6*MY_PI*mu*rad*(1.0 + 2.16*vol_f);
        RT0 = 8*MY_PI*mu*cube(rad);
        //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.33*vol_f + 2.80*vol_f*vol_f);
      } else {
        R0  = 6*MY_PI*mu*rad*(1.0 + 2.725*vol_f - 6.583*vol_f*vol_f);
        RT0 = 8*MY_PI*mu*cube(rad)*(1.0 + 0.749*vol_f - 2.469*vol_f*vol_f);
        //RS0 = 20.0/3.0*MY_PI*mu*pow(rad,3)*(1.0 + 3.64*vol_f - 6.95*vol_f*vol_f);
      }
    }


  // number of threads has changed. reallocate pool of pRNGs
  if (nthreads != comm->nthreads) {
    if (random_thr) {
      for (int i=1; i < nthreads; ++i)
        delete random_thr[i];

      delete[] random_thr;
    }

    nthreads = comm->nthreads;
    random_thr = new RanMars*[nthreads];
    for (int i=1; i < nthreads; ++i)
      random_thr[i] = NULL;

    // to ensure full compatibility with the serial BrownianPoly style
    // we use is random number generator instance for thread 0
    random_thr[0] = random;
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    // generate a random number generator instance for
    // all threads != 0. make sure we use unique seeds.
    if ((tid > 0) && (random_thr[tid] == NULL))
      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me
                                    + comm->nprocs*tid);

    if (flaglog) {
      if (evflag)
        eval<1,1>(ifrom, ito, thr);
      else
        eval<1,0>(ifrom, ito, thr);
    } else {
      if (evflag)
        eval<0,1>(ifrom, ito, thr);
      else eval<0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}
Example #9
0
void MSMCGOMP::compute(int eflag, int vflag)
{
  if (scalar_pressure_flag)
    error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' "
      "with kspace_style msm/cg/omp");

  const double * const q = atom->q;
  const int nlocal = atom->nlocal;
  int i,j,n;

  // set energy/virial flags

  if (eflag || vflag) ev_setup(eflag,vflag);
  else evflag = evflag_atom = eflag_global = vflag_global =
    eflag_atom = vflag_atom = eflag_either = vflag_either = 0;

  // invoke allocate_peratom() if needed for first time

  if (vflag_atom && !peratom_allocate_flag) {
    allocate_peratom();
    cg_peratom_all->ghost_notify();
    cg_peratom_all->setup();
    for (int n=0; n<levels; n++) {
      if (!active_flag[n]) continue;
      cg_peratom[n]->ghost_notify();
      cg_peratom[n]->setup();
    }
    peratom_allocate_flag = 1;
  }

  // extend size of per-atom arrays if necessary

  if (atom->nmax > nmax) {
    memory->destroy(part2grid);
    memory->destroy(is_charged);
    nmax = atom->nmax;
    memory->create(part2grid,nmax,3,"msm:part2grid");
    memory->create(is_charged,nmax,"msm/cg:is_charged");
  }

  // one time setup message

  if (num_charged < 0) {
    bigint charged_all, charged_num;
    double charged_frac, charged_fmax, charged_fmin;

    num_charged=0;
    for (i=0; i < nlocal; ++i)
      if (fabs(q[i]) > smallq)
        ++num_charged;

    // get fraction of charged particles per domain

    if (nlocal > 0)
      charged_frac = static_cast<double>(num_charged) * 100.0
                   / static_cast<double>(nlocal);
    else
      charged_frac = 0.0;

    MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world);
    MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world);

    // get fraction of charged particles overall

    charged_num = num_charged;
    MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world);
    charged_frac = static_cast<double>(charged_all) * 100.0
                   / static_cast<double>(atom->natoms);

    if (me == 0) {
      if (screen)
        fprintf(screen,
                "  MSM/cg optimization cutoff: %g\n"
                "  Total charged atoms: %.1f%%\n"
                "  Min/max charged atoms/proc: %.1f%% %.1f%%\n",
                smallq,charged_frac,charged_fmin,charged_fmax);
      if (logfile)
        fprintf(logfile,
                "  MSM/cg optimization cutoff: %g\n"
                "  Total charged atoms: %.1f%%\n"
                "  Min/max charged atoms/proc: %.1f%% %.1f%%\n",
                smallq,charged_frac,charged_fmin,charged_fmax);
    }
  }

  // only need to rebuild this list after a neighbor list update
  if (neighbor->ago == 0) {
    num_charged = 0;
    for (i = 0; i < nlocal; ++i) {
      if (fabs(q[i]) > smallq) {
        is_charged[num_charged] = i;
        ++num_charged;
      }
    }
  }

  // find grid points for all my particles
  // map my particle charge onto my local 3d density grid (aninterpolation)

  particle_map();
  make_rho();

  // all procs reverse communicate charge density values from their ghost grid points
  //   to fully sum contribution in their 3d grid

  current_level = 0;
  cg_all->reverse_comm(this,REVERSE_RHO);

  // forward communicate charge density values to fill ghost grid points
  // compute direct sum interaction and then restrict to coarser grid

  for (int n=0; n<=levels-2; n++) {
    if (!active_flag[n]) continue;
    current_level = n;
    cg[n]->forward_comm(this,FORWARD_RHO);

    direct(n);
    restriction(n);
  }


  // compute direct interation for top grid level for nonperiodic
  //   and for second from top grid level for periodic

  if (active_flag[levels-1]) {
    if (domain->nonperiodic) {
      current_level = levels-1;
      cg[levels-1]->forward_comm(this,FORWARD_RHO);
      direct_top(levels-1);
      cg[levels-1]->reverse_comm(this,REVERSE_AD);
      if (vflag_atom)
        cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
    } else {
      // Here using MPI_Allreduce is cheaper than using commgrid
      grid_swap_forward(levels-1,qgrid[levels-1]);
      direct(levels-1);
      grid_swap_reverse(levels-1,egrid[levels-1]);
      current_level = levels-1;
      if (vflag_atom)
        cg_peratom[levels-1]->reverse_comm(this,REVERSE_AD_PERATOM);
    }
  }

  // prolongate energy/virial from coarser grid to finer grid
  // reverse communicate from ghost grid points to get full sum

  for (int n=levels-2; n>=0; n--) {
    if (!active_flag[n]) continue;
    prolongation(n);

    current_level = n;
    cg[n]->reverse_comm(this,REVERSE_AD);

    // extra per-atom virial communication

    if (vflag_atom)
      cg_peratom[n]->reverse_comm(this,REVERSE_AD_PERATOM);
  }

  // all procs communicate E-field values
  // to fill ghost cells surrounding their 3d bricks

  current_level = 0;
  cg_all->forward_comm(this,FORWARD_AD);

  // extra per-atom energy/virial communication

  if (vflag_atom)
    cg_peratom_all->forward_comm(this,FORWARD_AD_PERATOM);

  // calculate the force on my particles (interpolation)

  fieldforce();

  // calculate the per-atom energy/virial for my particles

  if (evflag_atom) fieldforce_peratom();

  // update qsum and qsqsum, if atom count has changed and energy needed

  if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) {
    qsum_qsq();
    natoms_original = atom->natoms;
  }

  // sum global energy across procs and add in self-energy term

  const double qscale = force->qqrd2e * scale;

  if (eflag_global) {
    double energy_all;
    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
    energy = energy_all;

    double e_self = qsqsum*gamma(0.0)/cutoff;
    energy -= e_self;
    energy *= 0.5*qscale;
  }

  // total long-range virial

  if (vflag_global) {
    double virial_all[6];
    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i];
  }

  // per-atom energy/virial
  // energy includes self-energy correction

  if (evflag_atom) {
    const double qs = 0.5*qscale;

    if (eflag_atom) {
      const double sf = gamma(0.0)/cutoff;
      for (j = 0; j < num_charged; j++) {
        i = is_charged[j];
        eatom[i] -= q[i]*q[i]*sf;
        eatom[i] *= qs;
      }
    }

    if (vflag_atom) {
      for (n = 0; n < num_charged; n++) {
        i = is_charged[n];
        for (j = 0; j < 6; j++)
          vatom[i][j] *= qs;
      }
    }
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
#if defined(_OPENMP)
    const int tid = omp_get_thread_num();
#else
    const int tid = 0;
#endif
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}
Example #10
0
void PairTriLJOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int nthreads = comm->nthreads;
  const int inum = list->inum;
  const int * const tri = atom->tri;
  const int * const type = atom->type;
  AtomVecTri::Bonus * const bonus = avec->bonus;

  // grow discrete list if necessary and initialize

  if (nall > nmax) {
    nmax = nall;
    memory->destroy(dnum);
    memory->destroy(dfirst);
    memory->create(dnum,nall,"pair:dnum");
    memory->create(dfirst,nall,"pair:dfirst");
  }
  memset(dnum,0,nall*sizeof(int));
  ndiscrete = 0;

  // need to discretize the system ahead of time
  // until we find a good way to multi-thread it.
  for (int i = 0; i < nall; ++i) {
    double dc1[3],dc2[3],dc3[3],p[3][3];

    if (tri[i] >= 0) {
      if (dnum[i] == 0) {
        MathExtra::quat_to_mat(bonus[tri[i]].quat,p);
        MathExtra::matvec(p,bonus[tri[i]].c1,dc1);
        MathExtra::matvec(p,bonus[tri[i]].c2,dc2);
        MathExtra::matvec(p,bonus[tri[i]].c3,dc3);
        dfirst[i] = ndiscrete;
        discretize(i,sigma[type[i]][type[i]],dc1,dc2,dc3);
        dnum[i] = ndiscrete - dfirst[i];
      }
    }
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}
Example #11
0
void PairDPDOMP::compute(int eflag, int vflag)
{
  if (eflag || vflag) {
    ev_setup(eflag,vflag);
  } else evflag = vflag_fdotr = 0;

  const int nall = atom->nlocal + atom->nghost;
  const int inum = list->inum;

  // number of threads has changed. reallocate pool of pRNGs
  if (nthreads != comm->nthreads) {
    if (random_thr) {
      for (int i=1; i < nthreads; ++i)
        delete random_thr[i];

      delete[] random_thr;
    }

    nthreads = comm->nthreads;
    random_thr = new RanMars*[nthreads];
    for (int i=1; i < nthreads; ++i)
      random_thr[i] = NULL;

    // to ensure full compatibility with the serial DPD style
    // we use the serial random number generator instance for thread 0
    random_thr[0] = random;
  }

#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
  {
    int ifrom, ito, tid;

    loop_setup_thr(ifrom, ito, tid, inum, nthreads);
    ThrData *thr = fix->get_thr(tid);
    thr->timer(Timer::START);
    ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);

    // generate a random number generator instance for
    // all threads != 0. make sure we use unique seeds.
    if ((tid > 0) && (random_thr[tid] == NULL))
      random_thr[tid] = new RanMars(Pair::lmp, seed + comm->me
                                    + comm->nprocs*tid);

    if (evflag) {
      if (eflag) {
        if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
        else eval<1,1,0>(ifrom, ito, thr);
      } else {
        if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
        else eval<1,0,0>(ifrom, ito, thr);
      }
    } else {
      if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
      else eval<0,0,0>(ifrom, ito, thr);
    }

    thr->timer(Timer::PAIR);
    reduce_thr(this, eflag, vflag, thr);
  } // end of omp parallel region
}