Example #1
0
/// Calculate potential energy and forces for the EAM potential.
///
/// Three steps are required:
///
///   -# Loop over all atoms and their neighbors, compute the two-body
///   interaction and the electron density at each atom
///   -# Loop over all atoms, compute the embedding energy and its
///   derivative for each atom
///   -# Loop over all atoms and their neighbors, compute the embedding
///   energy contribution to the force and add to the two-body force
/// 
int eamForce(SimFlat* s)
{
   EamPotential* pot = (EamPotential*) s->pot;
   assert(pot);

   // set up halo exchange and internal storage on first call to forces.
   if (pot->forceExchange == NULL)
   {
      int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes;
      pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->rhobar  = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->forceExchange = initForceHaloExchange(s->domain, s->boxes);
      pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData));
      pot->forceExchangeData->dfEmbed = pot->dfEmbed;
      pot->forceExchangeData->boxes = s->boxes;
   }
   
   real_t rCut2 = pot->cutoff*pot->cutoff;
   real_t etot = 0.;

   // zero forces / energy / rho /rhoprime
   int fsize = s->boxes->nTotalBoxes*MAXATOMS;
   //#pragma omp parallel for
   for (int ii=0; ii<fsize; ii++)
   {
      zeroReal3(s->atoms->f[ii]);
      //s->atoms->U[ii] = 0.;//never used
      pot->dfEmbed[ii] = 0.;
      pot->rhobar[ii] = 0.;
   }

   int nNbrBoxes = 27;
   // loop over local boxes
   //#pragma omp parallel for reduction(+:etot)
   for(int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++){
      int nIBox = s->boxes->nAtoms[iBox];
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for(int jTmp=0; jTmp<nNbrBoxes; jTmp++) {
         int jBox = s->boxes->nbrBoxes[iBox][jTmp];
         int nJBox = s->boxes->nAtoms[jBox];
         // loop over atoms in iBox
         for(int iOff=MAXATOMS*iBox; iOff<(iBox*MAXATOMS+nIBox); iOff++) {
            // loop over atoms in jBox
            for(int jOff=MAXATOMS*jBox; jOff<(jBox*MAXATOMS+nJBox); jOff++) {
               real3 dr;
               real_t r2 = 0.0;
               for(int k=0; k<3; k++) {
                  dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k];
                  r2+=dr[k]*dr[k];
               }
               if(r2 <= rCut2 && r2 > 0.0) {
                  real_t r = sqrt(r2);
                  real_t phiTmp, dPhi, rhoTmp, dRho;
                  interpolate(pot->phi, r, &phiTmp, &dPhi);
                  interpolate(pot->rho, r, &rhoTmp, &dRho);
                  for(int k=0; k<3; k++) {
                     s->atoms->f[iOff][k] -= dPhi*dr[k]/r;
                  }
                  // Calculate energy contribution
                  //s->atoms->U[iOff] += 0.5*phiTmp;//never used
                  etot += 0.5*phiTmp;
                  // accumulate rhobar for each atom
                  pot->rhobar[iOff] += rhoTmp;
               }

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   // Compute Embedding Energy
   // loop over all local boxes
   //#pragma omp parallel for reduction(+:etot)
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) {
      int nIBox =  s->boxes->nAtoms[iBox];

      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox; iOff<(MAXATOMS*iBox+nIBox); iOff++)
      {
         real_t fEmbed, dfEmbed;
         interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed);
         pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange
         //s->atoms->U[iOff] += fEmbed;//never used
         etot += fEmbed;
      }
   }

   // exchange derivative of the embedding energy with repsect to rhobar
   startTimer(eamHaloTimer);
   haloExchange(pot->forceExchange, pot->forceExchangeData);
   stopTimer(eamHaloTimer);

   // third pass
   // loop over local boxes
   //#pragma omp parallel for
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox = s->boxes->nAtoms[iBox];

      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = s->boxes->nbrBoxes[iBox][jTmp];
         int nJBox = s->boxes->nAtoms[jBox];

         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox; iOff<(MAXATOMS*iBox+nIBox); iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox; jOff<(MAXATOMS*jBox+nJBox); jOff++)
            { 

               real_t r2 = 0.0;
               real3 dr;
               for (int k=0; k<3; k++)
               {
                  dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k];
                  r2+=dr[k]*dr[k];
               }

               if(r2 <= rCut2 && r2 > 0.0)
               {

                  real_t r = sqrt(r2);

                  real_t rhoTmp, dRho;
                  interpolate(pot->rho, r, &rhoTmp, &dRho);

                  for (int k=0; k<3; k++)
                  {
                     s->atoms->f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
                  }
               }

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   s->ePotential = (real_t) etot;

   return 0;
}
Example #2
0
/// Calculate potential energy and forces for the EAM potential.
///
/// Three steps are required:
///
///   -# Loop over all atoms and their neighbors, compute the two-body
///   interaction and the electron density at each atom
///   -# Loop over all atoms, compute the embedding energy and its
///   derivative for each atom
///   -# Loop over all atoms and their neighbors, compute the embedding
///   energy contribution to the force and add to the two-body force
/// 
int eamForce(SimFlat* s)
{
   EamPotential* pot = (EamPotential*) s->pot;
   assert(pot);

   // set up halo exchange and internal storage on first call to forces.
   if (pot->forceExchange == NULL)
   {
      int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes;
      pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->rhobar  = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->forceExchange = initForceHaloExchange(s->domain, s->boxes);
      pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData));
      pot->forceExchangeData->dfEmbed = pot->dfEmbed;
      pot->forceExchangeData->boxes = s->boxes;
   }
   
   real_t rCut2 = pot->cutoff*pot->cutoff;

   // zero forces / energy / rho /rhoprime
   real_t etot = 0.0;
   memset(s->atoms->f,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real3));
   memset(s->atoms->U,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->dfEmbed, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->rhobar,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));

   // virial stress computation added here
   for (int m = 0;m<9;m++) 
   {
      s->defInfo->stress[m] = 0.0;
   }

   int nbrBoxes[27];
   // loop over local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox = s->boxes->nAtoms[iBox];
      int nNbrBoxes = getNeighborBoxes(s->boxes, iBox, nbrBoxes);
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = nbrBoxes[jTmp];
         if (jBox < iBox ) continue;

         int nJBox = s->boxes->nAtoms[jBox];
         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++)
            {
               if ( (iBox==jBox) &&(ij <= ii) ) continue;

               double r2 = 0.0;
               real3 dr;
               for (int k=0; k<3; k++)
               {
                  dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k];
                  r2+=dr[k]*dr[k];
               }
               if(r2>rCut2) continue;

               double r = sqrt(r2);

               real_t phiTmp, dPhi, rhoTmp, dRho;
               interpolate(pot->phi, r, &phiTmp, &dPhi);
               interpolate(pot->rho, r, &rhoTmp, &dRho);

               for (int k=0; k<3; k++)
               {
                  s->atoms->f[iOff][k] -= dPhi*dr[k]/r;
                  s->atoms->f[jOff][k] += dPhi*dr[k]/r;
               }

               for (int i=0; i<3; i++)
               {
                  for (int j=0; j<3; j++)
                  {
                     int m = 3*i + j;
                     s->defInfo->stress[m] += 1.0*dPhi*dr[i]*dr[j]/r;
                  }
               }

               // update energy terms
               // calculate energy contribution based on whether
               // the neighbor box is local or remote
               if (jBox < s->boxes->nLocalBoxes)
                  etot += phiTmp;
               else
                  etot += 0.5*phiTmp;

               s->atoms->U[iOff] += 0.5*phiTmp;
               s->atoms->U[jOff] += 0.5*phiTmp;

               // accumulate rhobar for each atom
               pot->rhobar[iOff] += rhoTmp;
               pot->rhobar[jOff] += rhoTmp;

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   // Compute Embedding Energy
   // loop over all local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int iOff;
      int nIBox =  s->boxes->nAtoms[iBox];

      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {
         real_t fEmbed, dfEmbed;
         interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed);
         pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange
         etot += fEmbed; 
         s->atoms->U[iOff] += fEmbed;
         int iSpecies = s->atoms->iSpecies[iOff];
         real_t invMass = 1.0/s->species[iSpecies].mass;
         for (int i=0; i<3; i++)
         {
            for (int j=0; j<3; j++)
            {
               int m = 3*i + j;
               s->defInfo->stress[m] -= s->atoms->p[iOff][i]*s->atoms->p[iOff][j]*invMass;
            }
         }
      }
   }

   // exchange derivative of the embedding energy with repsect to rhobar
   startTimer(eamHaloTimer);
   haloExchange(pot->forceExchange, pot->forceExchangeData);
   stopTimer(eamHaloTimer);

   // third pass
   // loop over local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox =  s->boxes->nAtoms[iBox];
      int nNbrBoxes = getNeighborBoxes(s->boxes, iBox, nbrBoxes);
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = nbrBoxes[jTmp];
         if(jBox < iBox) continue;

         int nJBox = s->boxes->nAtoms[jBox];
         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++)
            { 
               if ((iBox==jBox) && (ij <= ii))  continue;

               double r2 = 0.0;
               real3 dr;
               for (int k=0; k<3; k++)
               {
                  dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k];
                  r2+=dr[k]*dr[k];
               }
               if(r2>=rCut2) continue;

               real_t r = sqrt(r2);

               real_t rhoTmp, dRho;
               interpolate(pot->rho, r, &rhoTmp, &dRho);

               for (int k=0; k<3; k++)
               {
                  s->atoms->f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
                  s->atoms->f[jOff][k] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
               }

               for (int i=0; i<3; i++)
               {
                  for (int j=0; j<3; j++)
                  {
                     int m = 3*i + j;
                     s->defInfo->stress[m] += 1.0*(pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[i]*dr[j]/r;
                  }
               }

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   s->ePotential = (real_t) etot;

   for (int m = 0;m<9;m++) 
   {
      s->defInfo->stress[m] = s->defInfo->stress[m]/s->defInfo->globalVolume;
   }

   return 0;
}
Example #3
0
/// Calculate potential energy and forces for the EAM potential.
///
/// Three steps are required:
///
///   -# Loop over all atoms and their neighbors, compute the two-body
///   interaction and the electron density at each atom
///   -# Loop over all atoms, compute the embedding energy and its
///   derivative for each atom
///   -# Loop over all atoms and their neighbors, compute the embedding
///   energy contribution to the force and add to the two-body force
/// 
int eamForce(SimFlat* s)
{
   //OPT: loop invariant references
   Atoms* atoms = s->atoms;
   LinkCell* boxes = s->boxes;
   int nLocalBoxes = boxes->nLocalBoxes;
   int nTotalBoxes = boxes->nTotalBoxes;
   int* nAtoms = boxes->nAtoms;
   real3* atoms_r = atoms->r;
   real3* atoms_f = atoms->f;
   real_t* atoms_U = atoms->U;

   EamPotential* pot = (EamPotential*) s->pot;
   assert(pot);

   // set up halo exchange and internal storage on first call to forces.
   if (pot->forceExchange == NULL)
   {
      int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes;
      pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->rhobar  = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->forceExchange = initForceHaloExchange(s->domain, s->boxes);
      pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData));
      pot->forceExchangeData->dfEmbed = pot->dfEmbed;
      pot->forceExchangeData->boxes = s->boxes;
   }
   
   real_t rCut2 = pot->cutoff*pot->cutoff;

   // zero forces / energy / rho /rhoprime
   real_t etot = 0.0;
   memset(atoms_f,  0, nTotalBoxes*MAXATOMS*sizeof(real3));
   memset(atoms_U,  0, nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->dfEmbed, 0, nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->rhobar,  0, nTotalBoxes*MAXATOMS*sizeof(real_t));

   int nbrBoxes[27];
   // loop over local boxes
   for (int iBox=0; iBox<nLocalBoxes; iBox++)
   {
      int nIBox = nAtoms[iBox];
      int nNbrBoxes = getNeighborBoxes(boxes, iBox, nbrBoxes);
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = nbrBoxes[jTmp];
         if (jBox < iBox ) continue;

         int nJBox = nAtoms[jBox];
         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++)
            {
               if ( (iBox==jBox) &&(ij <= ii) ) continue;

               double r2 = 0.0;
               real3 dr;
//OPT: loop unrolling
//               for (int k=0; k<3; k++)
//               {
//                  dr[k]=atoms_r[iOff][k]-atoms_r[jOff][k];
//                  r2+=dr[k]*dr[k];
//               }
               double dr0 = atoms_r[iOff][0]-atoms_r[jOff][0];
               r2+=dr0*dr0;
               double dr1 = atoms_r[iOff][1]-atoms_r[jOff][1];
               r2+=dr1*dr1;
               double dr2 = atoms_r[iOff][2]-atoms_r[jOff][2];
               r2+=dr2*dr2;
//End of OPT: loop unrolling

               if(r2>rCut2) continue;

               double r = sqrt(r2);

               real_t phiTmp, dPhi, rhoTmp, dRho;
               interpolate(pot->phi, r, &phiTmp, &dPhi);
               interpolate(pot->rho, r, &rhoTmp, &dRho);

//OPT: loop unrolling
//               for (int k=0; k<3; k++)
//               {
//                  atoms_f[iOff][k] -= dPhi*dr[k]/r;
//                  atoms_f[jOff][k] += dPhi*dr[k]/r;
//               }
               real_t cal = dPhi*dr0/r;
               atoms_f[iOff][0] -= cal;
               atoms_f[jOff][0] += cal;
               cal = dPhi*dr1/r;
               atoms_f[iOff][1] -= cal;
               atoms_f[jOff][1] += cal;
               cal = dPhi*dr2/r;
               atoms_f[iOff][2] -= cal;
               atoms_f[jOff][2] += cal;
//End of OPT: loop unrolling

               // update energy terms
               // calculate energy contribution based on whether
               // the neighbor box is local or remote
               if (jBox < nLocalBoxes)
                  etot += phiTmp;
               else
                  etot += 0.5*phiTmp;

               atoms_U[iOff] += 0.5*phiTmp;
               atoms_U[jOff] += 0.5*phiTmp;

               // accumulate rhobar for each atom
               pot->rhobar[iOff] += rhoTmp;
               pot->rhobar[jOff] += rhoTmp;

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   // Compute Embedding Energy
   // loop over all local boxes
   for (int iBox=0; iBox<nLocalBoxes; iBox++)
   {
      int iOff;
      int nIBox = nAtoms[iBox];

      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {
         real_t fEmbed, dfEmbed;
         interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed);
         pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange
         etot += fEmbed; 
         atoms_U[iOff] += fEmbed;
      }
   }

   // exchange derivative of the embedding energy with repsect to rhobar
   startTimer(eamHaloTimer);
   haloExchange(pot->forceExchange, pot->forceExchangeData);
   stopTimer(eamHaloTimer);

   // third pass
   // loop over local boxes
   for (int iBox=0; iBox<nLocalBoxes; iBox++)
   {
      int nIBox = nAtoms[iBox];
      int nNbrBoxes = getNeighborBoxes(boxes, iBox, nbrBoxes);
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = nbrBoxes[jTmp];
         if(jBox < iBox) continue;

         int nJBox = nAtoms[jBox];
         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++)
            { 
               if ((iBox==jBox) && (ij <= ii))  continue;

               double r2 = 0.0;
               real3 dr;
//OPT: loop unrolling
//               for (int k=0; k<3; k++)
//               {
//                  dr[k]=atoms_r[iOff][k]-atoms_r[jOff][k];
//                  r2+=dr[k]*dr[k];
//               }
               real_t dr0 = atoms_r[iOff][0]-atoms_r[jOff][0];
               r2 += dr0*dr0;
               real_t dr1 = atoms_r[iOff][1]-atoms_r[jOff][1];
               r2 += dr1*dr1;
               real_t dr2 = atoms_r[iOff][2]-atoms_r[jOff][2];
               r2 += dr2*dr2;
//End of OPT: loop unrolling

               if(r2>=rCut2) continue;

               real_t r = sqrt(r2);

               real_t rhoTmp, dRho;
               interpolate(pot->rho, r, &rhoTmp, &dRho);

//OPT: loop unrolling
//               for (int k=0; k<3; k++)
//               {
//                  atoms_f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
//                  atoms_f[jOff][k] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
//               }
               real_t cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr0/r;
               atoms_f[iOff][0] -= cal;
               atoms_f[jOff][0] += cal;
               cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr1/r;
               atoms_f[iOff][1] -= cal;
               atoms_f[jOff][1] += cal;
               cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr2/r;
               atoms_f[iOff][2] -= cal;
               atoms_f[jOff][2] += cal;
//End of OPT: loop unrolling
            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   s->ePotential = (real_t) etot;

   return 0;
}
Example #4
0
int eamForceCpuNL(SimFlat* s)
{
   EamPotential* pot = (EamPotential*) s->pot;
   assert(pot);

   // set up halo exchange and internal storage on first call to forces.
   if (pot->forceExchange == NULL)
   {
      int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes;
      pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->rhobar  = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->forceExchange = initForceHaloExchange(s->domain, s->boxes,s->method<CPU_NL);
      pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData));
      pot->forceExchangeData->dfEmbed = pot->dfEmbed;
      pot->forceExchangeData->boxes = s->boxes;
   }
   
   real_t rCut2 = pot->cutoff*pot->cutoff;

   // zero forces / energy / rho /rhoprime
   real_t etot = 0.0;
   zeroVecAll(&(s->atoms->f),s->boxes->nTotalBoxes*MAXATOMS);
   memset(s->atoms->U,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->dfEmbed, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->rhobar,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));

   NeighborList* neighborList = s->atoms->neighborList;

   // loop over local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox = s->boxes->nAtoms[iBox];
      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {

              int iLid = s->atoms->lid[iOff];
              assert(iLid < neighborList->nMaxLocal);
              int* iNeighborList = &(neighborList->list[neighborList->maxNeighbors * iLid]);
              const int nNeighbors = neighborList->nNeighbors[iLid];
              // loop over atoms in neighborlist
              for (int ij=0; ij<nNeighbors; ij++)
              {
                      int jOff = iNeighborList[ij];

                      double r2 = 0.0;
                      real3_old dr;
                      dr[0] = s->atoms->r.x[iOff] - s->atoms->r.x[jOff];
                      dr[1] = s->atoms->r.y[iOff] - s->atoms->r.y[jOff];
                      dr[2] = s->atoms->r.z[iOff] - s->atoms->r.z[jOff];
                      r2+=dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2];
                      if(r2>rCut2) continue;

                      double r = sqrt(r2);

                      real_t phiTmp, dPhi, rhoTmp, dRho;
                      interpolate(pot->phi, r, &phiTmp, &dPhi);
                      interpolate(pot->rho, r, &rhoTmp, &dRho);

                      s->atoms->f.x[iOff] -= dPhi*dr[0]/r;
                      s->atoms->f.y[iOff] -= dPhi*dr[1]/r;
                      s->atoms->f.z[iOff] -= dPhi*dr[2]/r;
                      s->atoms->f.x[jOff] += dPhi*dr[0]/r; 
                      s->atoms->f.y[jOff] += dPhi*dr[1]/r; 
                      s->atoms->f.z[jOff] += dPhi*dr[2]/r; 

                      // update energy terms
                      // calculate energy contribution based on whether
                      // the neighbor box is local or remote
                      if (jOff / MAXATOMS < s->boxes->nLocalBoxes)
                              etot += phiTmp;
                      else
                              etot += 0.5*phiTmp;

                      s->atoms->U[iOff] += 0.5*phiTmp;
                      s->atoms->U[jOff] += 0.5*phiTmp;

                      // accumulate rhobar for each atom
                      pot->rhobar[iOff] += rhoTmp;
                      pot->rhobar[jOff] += rhoTmp; 

              } // loop over atoms in neighborlist 
      } // loop over atoms in iBox
   } // loop over local boxes

   // Compute Embedding Energy
   // loop over all local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox =  s->boxes->nAtoms[iBox];

      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {
         real_t fEmbed, dfEmbed;
         interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed);
         pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange
         etot += fEmbed; 
         s->atoms->U[iOff] += fEmbed;
      }
   }

   // exchange derivative of the embedding energy with repsect to rhobar
   startTimer(eamHaloTimer);
   haloExchange(pot->forceExchange, pot->forceExchangeData);
   stopTimer(eamHaloTimer);

   // third pass
   // loop over local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox =  s->boxes->nAtoms[iBox];
      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {
              int iLid = s->atoms->lid[iOff];
              assert(iLid < neighborList->nMaxLocal);
              int* iNeighborList = &(neighborList->list[ neighborList->maxNeighbors * iLid]);
              int nNeighbors = neighborList->nNeighbors[iLid];
              // loop over atoms in neighborlist
              for (int ij=0; ij<nNeighbors; ij++)
              {
                      int jOff = iNeighborList[ij];

                      double r2 = 0.0;
                      real3_old dr;
                      dr[0] = s->atoms->r.x[iOff] - s->atoms->r.x[jOff];
                      dr[1] = s->atoms->r.y[iOff] - s->atoms->r.y[jOff];
                      dr[2] = s->atoms->r.z[iOff] - s->atoms->r.z[jOff];
                      r2+=dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2];
                      
                      if(r2>=rCut2) continue;

                      real_t r = sqrt(r2);

                      real_t rhoTmp, dRho;
                      interpolate(pot->rho, r, &rhoTmp, &dRho);

                      s->atoms->f.x[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[0]/r;
                      s->atoms->f.y[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[1]/r;
                      s->atoms->f.z[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[2]/r;
                      s->atoms->f.x[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[0]/r;
                      s->atoms->f.y[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[1]/r;
                      s->atoms->f.z[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[2]/r;

              } // loop over atoms in neighborlist
      } // loop over atoms in iBox
   } // loop over local boxes
//   printf("nl: %f %f %f\n",s->atoms->f[MAXATOMS][0],s->atoms->f[MAXATOMS][1],s->atoms->f[MAXATOMS][2]);

   s->ePotential = (real_t) etot;

   return 0;
}