/// \details /// This function provides one-stop shopping for the sequence of events /// that must occur for a proper exchange of halo atoms after the atom /// positions have been updated by the integrator. /// /// - updateLinkCells: Since atoms have moved, some may be in the wrong /// link cells. /// - haloExchange (atom version): Sends atom data to remote tasks. /// - sort: Sort the atoms. /// /// \see updateLinkCells /// \see initAtomHaloExchange /// \see sortAtomsInCell void redistributeAtoms(SimFlat* sim) { updateLinkCells(sim->boxes, sim->atoms); startTimer(atomHaloTimer); haloExchange(sim->atomExchange, sim); stopTimer(atomHaloTimer); #pragma omp parallel for for (int ii=0; ii<sim->boxes->nTotalBoxes; ++ii) sortAtomsInCell(sim->atoms, sim->boxes, ii); }
/// Calculate potential energy and forces for the EAM potential. /// /// Three steps are required: /// /// -# Loop over all atoms and their neighbors, compute the two-body /// interaction and the electron density at each atom /// -# Loop over all atoms, compute the embedding energy and its /// derivative for each atom /// -# Loop over all atoms and their neighbors, compute the embedding /// energy contribution to the force and add to the two-body force /// int eamForce(SimFlat* s) { EamPotential* pot = (EamPotential*) s->pot; assert(pot); // set up halo exchange and internal storage on first call to forces. if (pot->forceExchange == NULL) { int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes; pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->rhobar = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->forceExchange = initForceHaloExchange(s->domain, s->boxes); pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData)); pot->forceExchangeData->dfEmbed = pot->dfEmbed; pot->forceExchangeData->boxes = s->boxes; } real_t rCut2 = pot->cutoff*pot->cutoff; real_t etot = 0.; // zero forces / energy / rho /rhoprime int fsize = s->boxes->nTotalBoxes*MAXATOMS; //#pragma omp parallel for for (int ii=0; ii<fsize; ii++) { zeroReal3(s->atoms->f[ii]); //s->atoms->U[ii] = 0.;//never used pot->dfEmbed[ii] = 0.; pot->rhobar[ii] = 0.; } int nNbrBoxes = 27; // loop over local boxes //#pragma omp parallel for reduction(+:etot) for(int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++){ int nIBox = s->boxes->nAtoms[iBox]; // loop over neighbor boxes of iBox (some may be halo boxes) for(int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = s->boxes->nbrBoxes[iBox][jTmp]; int nJBox = s->boxes->nAtoms[jBox]; // loop over atoms in iBox for(int iOff=MAXATOMS*iBox; iOff<(iBox*MAXATOMS+nIBox); iOff++) { // loop over atoms in jBox for(int jOff=MAXATOMS*jBox; jOff<(jBox*MAXATOMS+nJBox); jOff++) { real3 dr; real_t r2 = 0.0; for(int k=0; k<3; k++) { dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k]; r2+=dr[k]*dr[k]; } if(r2 <= rCut2 && r2 > 0.0) { real_t r = sqrt(r2); real_t phiTmp, dPhi, rhoTmp, dRho; interpolate(pot->phi, r, &phiTmp, &dPhi); interpolate(pot->rho, r, &rhoTmp, &dRho); for(int k=0; k<3; k++) { s->atoms->f[iOff][k] -= dPhi*dr[k]/r; } // Calculate energy contribution //s->atoms->U[iOff] += 0.5*phiTmp;//never used etot += 0.5*phiTmp; // accumulate rhobar for each atom pot->rhobar[iOff] += rhoTmp; } } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes // Compute Embedding Energy // loop over all local boxes //#pragma omp parallel for reduction(+:etot) for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox; iOff<(MAXATOMS*iBox+nIBox); iOff++) { real_t fEmbed, dfEmbed; interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed); pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange //s->atoms->U[iOff] += fEmbed;//never used etot += fEmbed; } } // exchange derivative of the embedding energy with repsect to rhobar startTimer(eamHaloTimer); haloExchange(pot->forceExchange, pot->forceExchangeData); stopTimer(eamHaloTimer); // third pass // loop over local boxes //#pragma omp parallel for for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = s->boxes->nbrBoxes[iBox][jTmp]; int nJBox = s->boxes->nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox; iOff<(MAXATOMS*iBox+nIBox); iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox; jOff<(MAXATOMS*jBox+nJBox); jOff++) { real_t r2 = 0.0; real3 dr; for (int k=0; k<3; k++) { dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k]; r2+=dr[k]*dr[k]; } if(r2 <= rCut2 && r2 > 0.0) { real_t r = sqrt(r2); real_t rhoTmp, dRho; interpolate(pot->rho, r, &rhoTmp, &dRho); for (int k=0; k<3; k++) { s->atoms->f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; } } } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes s->ePotential = (real_t) etot; return 0; }
/// Calculate potential energy and forces for the EAM potential. /// /// Three steps are required: /// /// -# Loop over all atoms and their neighbors, compute the two-body /// interaction and the electron density at each atom /// -# Loop over all atoms, compute the embedding energy and its /// derivative for each atom /// -# Loop over all atoms and their neighbors, compute the embedding /// energy contribution to the force and add to the two-body force /// int eamForce(SimFlat* s) { //OPT: loop invariant references Atoms* atoms = s->atoms; LinkCell* boxes = s->boxes; int nLocalBoxes = boxes->nLocalBoxes; int nTotalBoxes = boxes->nTotalBoxes; int* nAtoms = boxes->nAtoms; real3* atoms_r = atoms->r; real3* atoms_f = atoms->f; real_t* atoms_U = atoms->U; EamPotential* pot = (EamPotential*) s->pot; assert(pot); // set up halo exchange and internal storage on first call to forces. if (pot->forceExchange == NULL) { int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes; pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->rhobar = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->forceExchange = initForceHaloExchange(s->domain, s->boxes); pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData)); pot->forceExchangeData->dfEmbed = pot->dfEmbed; pot->forceExchangeData->boxes = s->boxes; } real_t rCut2 = pot->cutoff*pot->cutoff; // zero forces / energy / rho /rhoprime real_t etot = 0.0; memset(atoms_f, 0, nTotalBoxes*MAXATOMS*sizeof(real3)); memset(atoms_U, 0, nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->dfEmbed, 0, nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->rhobar, 0, nTotalBoxes*MAXATOMS*sizeof(real_t)); int nbrBoxes[27]; // loop over local boxes for (int iBox=0; iBox<nLocalBoxes; iBox++) { int nIBox = nAtoms[iBox]; int nNbrBoxes = getNeighborBoxes(boxes, iBox, nbrBoxes); // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = nbrBoxes[jTmp]; if (jBox < iBox ) continue; int nJBox = nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++) { if ( (iBox==jBox) &&(ij <= ii) ) continue; double r2 = 0.0; real3 dr; //OPT: loop unrolling // for (int k=0; k<3; k++) // { // dr[k]=atoms_r[iOff][k]-atoms_r[jOff][k]; // r2+=dr[k]*dr[k]; // } double dr0 = atoms_r[iOff][0]-atoms_r[jOff][0]; r2+=dr0*dr0; double dr1 = atoms_r[iOff][1]-atoms_r[jOff][1]; r2+=dr1*dr1; double dr2 = atoms_r[iOff][2]-atoms_r[jOff][2]; r2+=dr2*dr2; //End of OPT: loop unrolling if(r2>rCut2) continue; double r = sqrt(r2); real_t phiTmp, dPhi, rhoTmp, dRho; interpolate(pot->phi, r, &phiTmp, &dPhi); interpolate(pot->rho, r, &rhoTmp, &dRho); //OPT: loop unrolling // for (int k=0; k<3; k++) // { // atoms_f[iOff][k] -= dPhi*dr[k]/r; // atoms_f[jOff][k] += dPhi*dr[k]/r; // } real_t cal = dPhi*dr0/r; atoms_f[iOff][0] -= cal; atoms_f[jOff][0] += cal; cal = dPhi*dr1/r; atoms_f[iOff][1] -= cal; atoms_f[jOff][1] += cal; cal = dPhi*dr2/r; atoms_f[iOff][2] -= cal; atoms_f[jOff][2] += cal; //End of OPT: loop unrolling // update energy terms // calculate energy contribution based on whether // the neighbor box is local or remote if (jBox < nLocalBoxes) etot += phiTmp; else etot += 0.5*phiTmp; atoms_U[iOff] += 0.5*phiTmp; atoms_U[jOff] += 0.5*phiTmp; // accumulate rhobar for each atom pot->rhobar[iOff] += rhoTmp; pot->rhobar[jOff] += rhoTmp; } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes // Compute Embedding Energy // loop over all local boxes for (int iBox=0; iBox<nLocalBoxes; iBox++) { int iOff; int nIBox = nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { real_t fEmbed, dfEmbed; interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed); pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange etot += fEmbed; atoms_U[iOff] += fEmbed; } } // exchange derivative of the embedding energy with repsect to rhobar startTimer(eamHaloTimer); haloExchange(pot->forceExchange, pot->forceExchangeData); stopTimer(eamHaloTimer); // third pass // loop over local boxes for (int iBox=0; iBox<nLocalBoxes; iBox++) { int nIBox = nAtoms[iBox]; int nNbrBoxes = getNeighborBoxes(boxes, iBox, nbrBoxes); // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = nbrBoxes[jTmp]; if(jBox < iBox) continue; int nJBox = nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++) { if ((iBox==jBox) && (ij <= ii)) continue; double r2 = 0.0; real3 dr; //OPT: loop unrolling // for (int k=0; k<3; k++) // { // dr[k]=atoms_r[iOff][k]-atoms_r[jOff][k]; // r2+=dr[k]*dr[k]; // } real_t dr0 = atoms_r[iOff][0]-atoms_r[jOff][0]; r2 += dr0*dr0; real_t dr1 = atoms_r[iOff][1]-atoms_r[jOff][1]; r2 += dr1*dr1; real_t dr2 = atoms_r[iOff][2]-atoms_r[jOff][2]; r2 += dr2*dr2; //End of OPT: loop unrolling if(r2>=rCut2) continue; real_t r = sqrt(r2); real_t rhoTmp, dRho; interpolate(pot->rho, r, &rhoTmp, &dRho); //OPT: loop unrolling // for (int k=0; k<3; k++) // { // atoms_f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; // atoms_f[jOff][k] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; // } real_t cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr0/r; atoms_f[iOff][0] -= cal; atoms_f[jOff][0] += cal; cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr1/r; atoms_f[iOff][1] -= cal; atoms_f[jOff][1] += cal; cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr2/r; atoms_f[iOff][2] -= cal; atoms_f[jOff][2] += cal; //End of OPT: loop unrolling } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes s->ePotential = (real_t) etot; return 0; }
/// Calculate potential energy and forces for the EAM potential. /// /// Three steps are required: /// /// -# Loop over all atoms and their neighbors, compute the two-body /// interaction and the electron density at each atom /// -# Loop over all atoms, compute the embedding energy and its /// derivative for each atom /// -# Loop over all atoms and their neighbors, compute the embedding /// energy contribution to the force and add to the two-body force /// int eamForce(SimFlat* s) { EamPotential* pot = (EamPotential*) s->pot; assert(pot); // set up halo exchange and internal storage on first call to forces. if (pot->forceExchange == NULL) { int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes; pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->rhobar = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->forceExchange = initForceHaloExchange(s->domain, s->boxes); pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData)); pot->forceExchangeData->dfEmbed = pot->dfEmbed; pot->forceExchangeData->boxes = s->boxes; } real_t rCut2 = pot->cutoff*pot->cutoff; // zero forces / energy / rho /rhoprime real_t etot = 0.0; memset(s->atoms->f, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real3)); memset(s->atoms->U, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->dfEmbed, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->rhobar, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); // virial stress computation added here for (int m = 0;m<9;m++) { s->defInfo->stress[m] = 0.0; } int nbrBoxes[27]; // loop over local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; int nNbrBoxes = getNeighborBoxes(s->boxes, iBox, nbrBoxes); // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = nbrBoxes[jTmp]; if (jBox < iBox ) continue; int nJBox = s->boxes->nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++) { if ( (iBox==jBox) &&(ij <= ii) ) continue; double r2 = 0.0; real3 dr; for (int k=0; k<3; k++) { dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k]; r2+=dr[k]*dr[k]; } if(r2>rCut2) continue; double r = sqrt(r2); real_t phiTmp, dPhi, rhoTmp, dRho; interpolate(pot->phi, r, &phiTmp, &dPhi); interpolate(pot->rho, r, &rhoTmp, &dRho); for (int k=0; k<3; k++) { s->atoms->f[iOff][k] -= dPhi*dr[k]/r; s->atoms->f[jOff][k] += dPhi*dr[k]/r; } for (int i=0; i<3; i++) { for (int j=0; j<3; j++) { int m = 3*i + j; s->defInfo->stress[m] += 1.0*dPhi*dr[i]*dr[j]/r; } } // update energy terms // calculate energy contribution based on whether // the neighbor box is local or remote if (jBox < s->boxes->nLocalBoxes) etot += phiTmp; else etot += 0.5*phiTmp; s->atoms->U[iOff] += 0.5*phiTmp; s->atoms->U[jOff] += 0.5*phiTmp; // accumulate rhobar for each atom pot->rhobar[iOff] += rhoTmp; pot->rhobar[jOff] += rhoTmp; } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes // Compute Embedding Energy // loop over all local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int iOff; int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { real_t fEmbed, dfEmbed; interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed); pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange etot += fEmbed; s->atoms->U[iOff] += fEmbed; int iSpecies = s->atoms->iSpecies[iOff]; real_t invMass = 1.0/s->species[iSpecies].mass; for (int i=0; i<3; i++) { for (int j=0; j<3; j++) { int m = 3*i + j; s->defInfo->stress[m] -= s->atoms->p[iOff][i]*s->atoms->p[iOff][j]*invMass; } } } } // exchange derivative of the embedding energy with repsect to rhobar startTimer(eamHaloTimer); haloExchange(pot->forceExchange, pot->forceExchangeData); stopTimer(eamHaloTimer); // third pass // loop over local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; int nNbrBoxes = getNeighborBoxes(s->boxes, iBox, nbrBoxes); // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = nbrBoxes[jTmp]; if(jBox < iBox) continue; int nJBox = s->boxes->nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++) { if ((iBox==jBox) && (ij <= ii)) continue; double r2 = 0.0; real3 dr; for (int k=0; k<3; k++) { dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k]; r2+=dr[k]*dr[k]; } if(r2>=rCut2) continue; real_t r = sqrt(r2); real_t rhoTmp, dRho; interpolate(pot->rho, r, &rhoTmp, &dRho); for (int k=0; k<3; k++) { s->atoms->f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; s->atoms->f[jOff][k] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; } for (int i=0; i<3; i++) { for (int j=0; j<3; j++) { int m = 3*i + j; s->defInfo->stress[m] += 1.0*(pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[i]*dr[j]/r; } } } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes s->ePotential = (real_t) etot; for (int m = 0;m<9;m++) { s->defInfo->stress[m] = s->defInfo->stress[m]/s->defInfo->globalVolume; } return 0; }
int eamForceCpuNL(SimFlat* s) { EamPotential* pot = (EamPotential*) s->pot; assert(pot); // set up halo exchange and internal storage on first call to forces. if (pot->forceExchange == NULL) { int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes; pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->rhobar = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->forceExchange = initForceHaloExchange(s->domain, s->boxes,s->method<CPU_NL); pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData)); pot->forceExchangeData->dfEmbed = pot->dfEmbed; pot->forceExchangeData->boxes = s->boxes; } real_t rCut2 = pot->cutoff*pot->cutoff; // zero forces / energy / rho /rhoprime real_t etot = 0.0; zeroVecAll(&(s->atoms->f),s->boxes->nTotalBoxes*MAXATOMS); memset(s->atoms->U, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->dfEmbed, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->rhobar, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); NeighborList* neighborList = s->atoms->neighborList; // loop over local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { int iLid = s->atoms->lid[iOff]; assert(iLid < neighborList->nMaxLocal); int* iNeighborList = &(neighborList->list[neighborList->maxNeighbors * iLid]); const int nNeighbors = neighborList->nNeighbors[iLid]; // loop over atoms in neighborlist for (int ij=0; ij<nNeighbors; ij++) { int jOff = iNeighborList[ij]; double r2 = 0.0; real3_old dr; dr[0] = s->atoms->r.x[iOff] - s->atoms->r.x[jOff]; dr[1] = s->atoms->r.y[iOff] - s->atoms->r.y[jOff]; dr[2] = s->atoms->r.z[iOff] - s->atoms->r.z[jOff]; r2+=dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2]; if(r2>rCut2) continue; double r = sqrt(r2); real_t phiTmp, dPhi, rhoTmp, dRho; interpolate(pot->phi, r, &phiTmp, &dPhi); interpolate(pot->rho, r, &rhoTmp, &dRho); s->atoms->f.x[iOff] -= dPhi*dr[0]/r; s->atoms->f.y[iOff] -= dPhi*dr[1]/r; s->atoms->f.z[iOff] -= dPhi*dr[2]/r; s->atoms->f.x[jOff] += dPhi*dr[0]/r; s->atoms->f.y[jOff] += dPhi*dr[1]/r; s->atoms->f.z[jOff] += dPhi*dr[2]/r; // update energy terms // calculate energy contribution based on whether // the neighbor box is local or remote if (jOff / MAXATOMS < s->boxes->nLocalBoxes) etot += phiTmp; else etot += 0.5*phiTmp; s->atoms->U[iOff] += 0.5*phiTmp; s->atoms->U[jOff] += 0.5*phiTmp; // accumulate rhobar for each atom pot->rhobar[iOff] += rhoTmp; pot->rhobar[jOff] += rhoTmp; } // loop over atoms in neighborlist } // loop over atoms in iBox } // loop over local boxes // Compute Embedding Energy // loop over all local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { real_t fEmbed, dfEmbed; interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed); pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange etot += fEmbed; s->atoms->U[iOff] += fEmbed; } } // exchange derivative of the embedding energy with repsect to rhobar startTimer(eamHaloTimer); haloExchange(pot->forceExchange, pot->forceExchangeData); stopTimer(eamHaloTimer); // third pass // loop over local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { int iLid = s->atoms->lid[iOff]; assert(iLid < neighborList->nMaxLocal); int* iNeighborList = &(neighborList->list[ neighborList->maxNeighbors * iLid]); int nNeighbors = neighborList->nNeighbors[iLid]; // loop over atoms in neighborlist for (int ij=0; ij<nNeighbors; ij++) { int jOff = iNeighborList[ij]; double r2 = 0.0; real3_old dr; dr[0] = s->atoms->r.x[iOff] - s->atoms->r.x[jOff]; dr[1] = s->atoms->r.y[iOff] - s->atoms->r.y[jOff]; dr[2] = s->atoms->r.z[iOff] - s->atoms->r.z[jOff]; r2+=dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2]; if(r2>=rCut2) continue; real_t r = sqrt(r2); real_t rhoTmp, dRho; interpolate(pot->rho, r, &rhoTmp, &dRho); s->atoms->f.x[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[0]/r; s->atoms->f.y[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[1]/r; s->atoms->f.z[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[2]/r; s->atoms->f.x[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[0]/r; s->atoms->f.y[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[1]/r; s->atoms->f.z[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[2]/r; } // loop over atoms in neighborlist } // loop over atoms in iBox } // loop over local boxes // printf("nl: %f %f %f\n",s->atoms->f[MAXATOMS][0],s->atoms->f[MAXATOMS][1],s->atoms->f[MAXATOMS][2]); s->ePotential = (real_t) etot; return 0; }
/// Calculate potential energy and forces for the EAM potential. /// /// Three steps are required: /// /// -# Loop over all atoms and their neighbors, compute the two-body /// interaction and the electron density at each atom /// -# Loop over all atoms, compute the embedding energy and its /// derivative for each atom /// -# Loop over all atoms and their neighbors, compute the embedding /// energy contribution to the force and add to the two-body force /// int eamForceGpu(SimFlat* s) { EamPotential* pot = (EamPotential*) s->pot; assert(pot); if (s->gpuAsync) { // only update neighbors list when method != 0 if (s->method == WARP_ATOM || s->method == CTA_CELL) updateNeighborsGpuAsync(s->gpu, s->flags, s->n_boundary_cells, s->boundary_cells, s->boundary_stream); // interior stream already launched eamForce1GpuAsync(s->gpu, s->gpu.b_list, s->n_boundary_cells, s->boundary_cells, s->method, s->boundary_stream, s->spline); eamForce2GpuAsync(s->gpu, s->gpu.b_list, s->n_boundary_cells, s->boundary_cells, s->method, s->boundary_stream, s->spline); // we need boundary data before halo exchange cudaStreamSynchronize(s->boundary_stream); // now we can start step 3 on the interior int n_interior_cells = s->gpu.boxes.nLocalBoxes - s->n_boundary_cells; eamForce3GpuAsync(s->gpu, s->gpu.i_list, n_interior_cells, s->interior_cells, s->method, s->interior_stream, s->spline); } else { // only update neighbors list when method != 0 if (s->method == WARP_ATOM || s->method == CTA_CELL) updateNeighborsGpu(s->gpu, s->flags); //TODO make the async force exchange work // int n_interior_cells = s->gpu.boxes.nLocalBoxes - s->n_boundary_cells; // eamForce1GpuAsync(s->gpu, s->gpu.i_list, n_interior_cells, s->interior_cells, s->method, s->boundary_stream); // // eamForce1GpuAsync(s->gpu, s->gpu.b_list, s->n_boundary_cells, s->boundary_cells, s->method, s->boundary_stream); // eamForce2GpuAsync(s->gpu, s->gpu.i_list, n_interior_cells, s->interior_cells, s->method, s->boundary_stream); // eamForce2GpuAsync(s->gpu, s->gpu.b_list, s->n_boundary_cells, s->boundary_cells, s->method, s->boundary_stream); // cudaStreamSynchronize(s->boundary_stream); eamForce1Gpu(s->gpu,s->method, s->spline); if (!s->gpuProfile) eamForce2Gpu(s->gpu,s->method, s->spline); } if (!s->gpuProfile) { // exchange derivative of the embedding energy with repsect to rhobar startTimer(eamHaloTimer); haloExchange(pot->forceExchange, s); stopTimer(eamHaloTimer); //TODO make the async force exchange work // int n_interior_cells = s->gpu.boxes.nLocalBoxes - s->n_boundary_cells; // eamForce3GpuAsync(s->gpu, s->gpu.i_list, n_interior_cells, s->interior_cells, s->method, s->boundary_stream); if (s->gpuAsync) { // we need updated interior data before 3rd step cudaStreamSynchronize(s->boundary_stream); } if (s->gpuAsync) { // interior stream already launched eamForce3GpuAsync(s->gpu, s->gpu.b_list, s->n_boundary_cells, s->boundary_cells, s->method, s->boundary_stream, s->spline); cudaDeviceSynchronize(); } else { eamForce3Gpu(s->gpu,s->method, s->spline); } } return 0; }