/// Broadcasts an InterpolationObject from rank 0 to all other ranks. /// /// It is commonly the case that the data needed to create the /// interpolation table is available on only one task (for example, only /// one task has read the data from a file). Broadcasting the table /// eliminates the need to put broadcast code in multiple table readers. /// /// \see eamBcastPotential void bcastInterpolationObject(InterpolationObject** table) { struct { int n; real_t x0, invDx; } buf; if (getMyRank() == 0) { buf.n = (*table)->n; buf.x0 = (*table)->x0; buf.invDx = (*table)->invDx; } bcastParallel(&buf, sizeof(buf), 0); if (getMyRank() != 0) { assert(*table == NULL); *table = comdMalloc(sizeof(InterpolationObject)); (*table)->n = buf.n; (*table)->x0 = buf.x0; (*table)->invDx = buf.invDx; (*table)->values = comdMalloc(sizeof(real_t) * (buf.n+3) ); (*table)->values++; } int valuesSize = sizeof(real_t) * ((*table)->n+3); bcastParallel((*table)->values-1, valuesSize, 0); }
/// This is the function that does the heavy lifting for the /// communication of halo data. It is called once for each axis and /// sends and receives two message. Loading and unloading of the /// buffers is in the hands of the sub-class virtual functions. /// /// \param [in] iAxis Axis index. /// \param [in, out] data Pointer to data that will be passed to the load and /// unload functions void exchangeData(HaloExchange* haloExchange, void* data, int iAxis) { enum HaloFaceOrder faceM = 2*iAxis; enum HaloFaceOrder faceP = faceM+1; char* sendBufM = comdMalloc(haloExchange->bufCapacity); char* sendBufP = comdMalloc(haloExchange->bufCapacity); char* recvBufM = comdMalloc(haloExchange->bufCapacity); char* recvBufP = comdMalloc(haloExchange->bufCapacity); int nSendM = haloExchange->loadBuffer(haloExchange->parms, data, faceM, sendBufM); int nSendP = haloExchange->loadBuffer(haloExchange->parms, data, faceP, sendBufP); int nbrRankM = haloExchange->nbrRank[faceM]; int nbrRankP = haloExchange->nbrRank[faceP]; int nRecvM, nRecvP; startTimer(commHaloTimer); nRecvP = sendReceiveParallel(sendBufM, nSendM, nbrRankM, recvBufP, haloExchange->bufCapacity, nbrRankP); nRecvM = sendReceiveParallel(sendBufP, nSendP, nbrRankP, recvBufM, haloExchange->bufCapacity, nbrRankM); stopTimer(commHaloTimer); haloExchange->unloadBuffer(haloExchange->parms, data, faceM, nRecvM, recvBufM); haloExchange->unloadBuffer(haloExchange->parms, data, faceP, nRecvP, recvBufP); comdFree(recvBufP); comdFree(recvBufM); comdFree(sendBufP); comdFree(sendBufM); }
HashTable* initHashTable(int nMaxEntries) { HashTable *hashTable = (HashTable *) comdMalloc(sizeof(HashTable)); hashTable->nMaxEntries = nMaxEntries; hashTable->nEntriesPut = 0; //allocates a 5MB hashtable. This number is prime. hashTable->nEntriesGet = 0; //allocates a 5MB hashtable. This number is prime. hashTable->offset = (int*) comdMalloc(sizeof(int) * hashTable->nMaxEntries); emptyHashTable(hashTable); return hashTable; }
/// Allocate and initialize the EAM potential data structure. /// /// \param [in] dir The directory in which potential table files are found. /// \param [in] file The name of the potential table file. /// \param [in] type The file format of the potential file (setfl or funcfl). BasePotential* initEamPot(const char* dir, const char* file, const char* type) { EamPotential* pot = comdMalloc(sizeof(EamPotential)); assert(pot); pot->force = eamForce; pot->print = eamPrint; pot->destroy = eamDestroy; pot->phi = NULL; pot->rho = NULL; pot->f = NULL; // Initialization of the next three items requires information about // the parallel decomposition and link cells that isn't available // with the potential is initialized. Hence, we defer their // initialization until the first time we call the force routine. pot->dfEmbed = NULL; pot->rhobar = NULL; pot->forceExchange = NULL; if (getMyRank() == 0) { if (strcmp(type, "setfl" ) == 0) eamReadSetfl(pot, dir, file); else if (strcmp(type,"funcfl") == 0) eamReadFuncfl(pot, dir, file); else typeNotSupported("initEamPot", type); } eamBcastPotential(pot); return (BasePotential*) pot; }
/// The force exchange is considerably simpler than the atom exchange. /// In the force case we only need to exchange data that is needed to /// complete the force calculation. Since the atoms have not moved we /// only need to send data from local link cells and we are guaranteed /// that the same atoms exist in the same order in corresponding halo /// cells on remote tasks. The only tricky part is the size of the /// plane of local cells that needs to be sent grows in each direction. /// This is because the y-axis send must send some of the data that was /// received from the x-axis send, and the z-axis must send some data /// from the y-axis send. This accumulation of data to send is /// responsible for data reaching neighbor cells that share only edges /// or corners. /// /// \see eam.c for an explanation of the requirement to exchange /// force data. HaloExchange* initForceHaloExchange(Domain* domain, LinkCell* boxes) { HaloExchange* hh = initHaloExchange(domain); hh->loadBuffer = loadForceBuffer; hh->unloadBuffer = unloadForceBuffer; hh->destroy = destroyForceExchange; int size0 = (boxes->gridSize[1])*(boxes->gridSize[2]); int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]); int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2); int maxSize = MAX(size0, size1); maxSize = MAX(size1, size2); hh->bufCapacity = (maxSize)*MAXATOMS*sizeof(ForceMsg); ForceExchangeParms* parms = comdMalloc(sizeof(ForceExchangeParms)); parms->nCells[HALO_X_MINUS] = (boxes->gridSize[1] )*(boxes->gridSize[2] ); parms->nCells[HALO_Y_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[2] ); parms->nCells[HALO_Z_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2); parms->nCells[HALO_X_PLUS] = parms->nCells[HALO_X_MINUS]; parms->nCells[HALO_Y_PLUS] = parms->nCells[HALO_Y_MINUS]; parms->nCells[HALO_Z_PLUS] = parms->nCells[HALO_Z_MINUS]; for (int ii=0; ii<6; ++ii) { parms->sendCells[ii] = mkForceSendCellList(boxes, ii, parms->nCells[ii]); parms->recvCells[ii] = mkForceRecvCellList(boxes, ii, parms->nCells[ii]); } hh->parms = parms; return hh; }
/// Make a list of link cells that need to be sent across the specified /// face. For each face, the list must include all cells, local and /// halo, in the first two planes of link cells. Halo cells must be /// included in the list of link cells to send since local atoms may /// have moved from local cells into halo cells on this time step. /// (Actual remote atoms should have been deleted, so the halo cells /// should contain only these few atoms that have just crossed.) /// Sending these atoms will allow them to be reassigned to the task /// that covers the spatial domain they have moved into. /// /// Note that link cell grid coordinates range from -1 to gridSize[iAxis]. /// \see initLinkCells for an explanation link cell grid coordinates. /// /// \param [in] boxes Link cell information. /// \param [in] iFace Index of the face data will be sent across. /// \param [in] nCells Number of cells to send. This is used for a /// consistency check. /// \return The list of cells to send. Caller is responsible to free /// the list. int* mkAtomCellList(LinkCell* boxes, enum HaloFaceOrder iFace, const int nCells) { int* list = comdMalloc(nCells*sizeof(int)); int xBegin = -1; int xEnd = boxes->gridSize[0]+1; int yBegin = -1; int yEnd = boxes->gridSize[1]+1; int zBegin = -1; int zEnd = boxes->gridSize[2]+1; if (iFace == HALO_X_MINUS) xEnd = xBegin+2; if (iFace == HALO_X_PLUS) xBegin = xEnd-2; if (iFace == HALO_Y_MINUS) yEnd = yBegin+2; if (iFace == HALO_Y_PLUS) yBegin = yEnd-2; if (iFace == HALO_Z_MINUS) zEnd = zBegin+2; if (iFace == HALO_Z_PLUS) zBegin = zEnd-2; int count = 0; for (int ix=xBegin; ix<xEnd; ++ix) for (int iy=yBegin; iy<yEnd; ++iy) for (int iz=zBegin; iz<zEnd; ++iz) list[count++] = getBoxFromTuple(boxes, ix, iy, iz); assert(count == nCells); return list; }
/// \param [in] xproc x-size of domain decomposition grid. /// \param [in] yproc y-size of domain decomposition grid. /// \param [in] zproc z-size of domain decomposition grid. /// \param [in] globalExtent Size of the simulation domain (in Angstroms). Domain* initDecomposition(int xproc, int yproc, int zproc, real3 globalExtent) { assert( xproc * yproc * zproc == getNRanks()); Domain* dd = comdMalloc(sizeof(Domain)); dd->procGrid[0] = xproc; dd->procGrid[1] = yproc; dd->procGrid[2] = zproc; // calculate grid coordinates i,j,k for this processor int myRank = getMyRank(); dd->procCoord[0] = myRank % dd->procGrid[0]; myRank /= dd->procGrid[0]; dd->procCoord[1] = myRank % dd->procGrid[1]; dd->procCoord[2] = myRank / dd->procGrid[1]; // initialialize global bounds for (int i = 0; i < 3; i++) { dd->globalMin[i] = 0; dd->globalMax[i] = globalExtent[i]; dd->globalExtent[i] = dd->globalMax[i] - dd->globalMin[i]; } // initialize local bounds on this processor for (int i = 0; i < 3; i++) { dd->localExtent[i] = dd->globalExtent[i] / dd->procGrid[i]; dd->localMin[i] = dd->globalMin[i] + dd->procCoord[i] * dd->localExtent[i]; dd->localMax[i] = dd->globalMin[i] + (dd->procCoord[i]+1) * dd->localExtent[i]; } return dd; }
/// \details /// When called in proper sequence by redistributeAtoms, the atom halo /// exchange helps serve three purposes: /// - Send ghost atom data to neighbor tasks. /// - Shift atom coordinates by the global simulation size when they cross /// periodic boundaries. This shift is performed in loadAtomsBuffer. /// - Transfer ownership of atoms between tasks as the atoms move across /// spatial domain boundaries. This transfer of ownership occurs in /// two places. The former owner gives up ownership when /// updateLinkCells moves a formerly local atom into a halo link cell. /// The new owner accepts ownership when unloadAtomsBuffer calls /// putAtomInBox to place a received atom into a local link cell. /// /// This constructor does the following: /// /// - Sets the bufCapacity to hold the largest possible number of atoms /// that can be sent across a face. /// - Initialize function pointers to the atom-specific versions /// - Sets the number of link cells to send across each face. /// - Builds the list of link cells to send across each face. As /// explained in the comments for mkAtomCellList, this list must /// include any link cell, local or halo, that could possibly contain /// an atom that needs to be sent across the face. Atoms that need to /// be sent include "ghost atoms" that are located in local link /// cells that correspond to halo link cells on receiving tasks as well as /// formerly local atoms that have just moved into halo link cells and /// need to be sent to the rank that owns the spatial domain the atom /// has moved into. /// - Sets a coordinate shift factor for each face to account for /// periodic boundary conditions. For most faces the factor is zero. /// For faces on the +x, +y, or +z face of the simulation domain /// the factor is -1.0 (to shift the coordinates by -1 times the /// simulation domain size). For -x, -y, and -z faces of the /// simulation domain, the factor is +1.0. /// /// \see redistributeAtoms HaloExchange* initAtomHaloExchange(Domain* domain, LinkCell* boxes) { HaloExchange* hh = initHaloExchange(domain); int size0 = (boxes->gridSize[1]+2)*(boxes->gridSize[2]+2); int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]+2); int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2); int maxSize = MAX(size0, size1); maxSize = MAX(size1, size2); hh->bufCapacity = maxSize*2*MAXATOMS*sizeof(AtomMsg); hh->loadBuffer = loadAtomsBuffer; hh->unloadBuffer = unloadAtomsBuffer; hh->destroy = destroyAtomsExchange; AtomExchangeParms* parms = comdMalloc(sizeof(AtomExchangeParms)); parms->nCells[HALO_X_MINUS] = 2*(boxes->gridSize[1]+2)*(boxes->gridSize[2]+2); parms->nCells[HALO_Y_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[2]+2); parms->nCells[HALO_Z_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[1]+2); parms->nCells[HALO_X_PLUS] = parms->nCells[HALO_X_MINUS]; parms->nCells[HALO_Y_PLUS] = parms->nCells[HALO_Y_MINUS]; parms->nCells[HALO_Z_PLUS] = parms->nCells[HALO_Z_MINUS]; for (int ii=0; ii<6; ++ii) parms->cellList[ii] = mkAtomCellList(boxes, ii, parms->nCells[ii]); for (int ii=0; ii<6; ++ii) { parms->pbcFactor[ii] = comdMalloc(3*sizeof(real_t)); for (int jj=0; jj<3; ++jj) parms->pbcFactor[ii][jj] = 0.0; } int* procCoord = domain->procCoord; //alias int* procGrid = domain->procGrid; //alias if (procCoord[HALO_X_AXIS] == 0) parms->pbcFactor[HALO_X_MINUS][HALO_X_AXIS] = +1.0; if (procCoord[HALO_X_AXIS] == procGrid[HALO_X_AXIS]-1) parms->pbcFactor[HALO_X_PLUS][HALO_X_AXIS] = -1.0; if (procCoord[HALO_Y_AXIS] == 0) parms->pbcFactor[HALO_Y_MINUS][HALO_Y_AXIS] = +1.0; if (procCoord[HALO_Y_AXIS] == procGrid[HALO_Y_AXIS]-1) parms->pbcFactor[HALO_Y_PLUS][HALO_Y_AXIS] = -1.0; if (procCoord[HALO_Z_AXIS] == 0) parms->pbcFactor[HALO_Z_MINUS][HALO_Z_AXIS] = +1.0; if (procCoord[HALO_Z_AXIS] == procGrid[HALO_Z_AXIS]-1) parms->pbcFactor[HALO_Z_PLUS][HALO_Z_AXIS] = -1.0; hh->parms = parms; return hh; }
/// \details /// Call functions such as createFccLattice and setTemperature to set up /// initial atom positions and momenta. Atoms* initAtoms(LinkCell* boxes) { Atoms* atoms = comdMalloc(sizeof(Atoms)); int maxTotalAtoms = MAXATOMS*boxes->nTotalBoxes; atoms->gid = (int*) comdMalloc(maxTotalAtoms*sizeof(int)); atoms->iSpecies = (int*) comdMalloc(maxTotalAtoms*sizeof(int)); atoms->r = (real3*) comdMalloc(maxTotalAtoms*sizeof(real3)); atoms->p = (real3*) comdMalloc(maxTotalAtoms*sizeof(real3)); atoms->f = (real3*) comdMalloc(maxTotalAtoms*sizeof(real3)); atoms->U = (real_t*)comdMalloc(maxTotalAtoms*sizeof(real_t)); atoms->nLocal = 0; atoms->nGlobal = 0; for (int iOff = 0; iOff < maxTotalAtoms; iOff++) { atoms->gid[iOff] = 0; atoms->iSpecies[iOff] = 0; zeroReal3(atoms->r[iOff]); zeroReal3(atoms->p[iOff]); zeroReal3(atoms->f[iOff]); atoms->U[iOff] = 0.; } return atoms; }
SpeciesData* initSpecies(BasePotential* pot) { SpeciesData* species = comdMalloc(sizeof(SpeciesData)); strcpy(species->name, pot->name); species->atomicNo = pot->atomicNo; species->mass = pot->mass; return species; }
/// Initialized the main CoMD data stucture, SimFlat, based on command /// line input from the user. Also performs certain sanity checks on /// the input to screen out certain non-sensical inputs. /// /// Simple data members such as the time step dt are initialized /// directly, substructures such as the potential, the link cells, the /// atoms, etc., are initialized by calling additional initialization /// functions (initPotential(), initLinkCells(), initAtoms(), etc.). /// Initialization order is set by the natural dependencies of the /// substructure such as the atoms need the link cells so the link cells /// must be initialized before the atoms. SimFlat* initSimulation(Command cmd) { SimFlat* sim = comdMalloc(sizeof(SimFlat)); sim->nSteps = cmd.nSteps; sim->printRate = cmd.printRate; sim->dt = cmd.dt; sim->domain = NULL; sim->boxes = NULL; sim->atoms = NULL; sim->ePotential = 0.0; sim->eKinetic = 0.0; sim->atomExchange = NULL; sim->pot = initPotential(cmd.doeam, cmd.potDir, cmd.potName, cmd.potType); real_t latticeConstant = cmd.lat; if (cmd.lat < 0.0) latticeConstant = sim->pot->lat; // ensure input parameters make sense. sanityChecks(cmd, sim->pot->cutoff, latticeConstant, sim->pot->latticeType); sim->species = initSpecies(sim->pot); real3 globalExtent; globalExtent[0] = cmd.nx * latticeConstant; globalExtent[1] = cmd.ny * latticeConstant; globalExtent[2] = cmd.nz * latticeConstant; sim->domain = initDecomposition( cmd.xproc, cmd.yproc, cmd.zproc, globalExtent); sim->boxes = initLinkCells(sim->domain, sim->pot->cutoff); sim->atoms = initAtoms(sim->boxes); // create lattice with desired temperature and displacement. createFccLattice(cmd.nx, cmd.ny, cmd.nz, latticeConstant, sim); setTemperature(sim, cmd.temperature); randomDisplacements(sim, cmd.initialDelta); sim->atomExchange = initAtomHaloExchange(sim->domain, sim->boxes); // Forces must be computed before we call the time stepper. startTimer(redistributeTimer); redistributeAtoms(sim); stopTimer(redistributeTimer); startTimer(computeForceTimer); computeForce(sim); stopTimer(computeForceTimer); kineticEnergy(sim); return sim; }
/// In CoMD 1.1, atoms are stored in link cells. Link cells are widely /// used in classical MD to avoid an O(N^2) search for atoms that /// interact. Link cells are formed by subdividing the local spatial /// domain with a Cartesian grid where the grid spacing in each /// direction is at least as big as he potential's cutoff distance. /// Because atoms don't interact beyond the potential cutoff, for an /// atom iAtom in any given link cell, we can be certain that all atoms /// that interact with iAtom are contained in the same link cell, or one /// of the 26 neighboring link cells. /// /// CoMD chooses the link cell size (boxSize) on each axis to be the /// shortest possible distance, longer than cutoff, such that the local /// domain size divided by boxSize is an integer. I.e., the link cells /// are commensurate with with the local domain size. While this does /// not result in the smallest possible link cells, it does allow us to /// keep a strict separation between the link cells that are entirely /// inside the local domain and those that represent halo regions. /// /// The number of local link cells in each direction is stored in /// gridSize. Local link cells have 3D grid coordinates (ix, iy, iz) /// where ix, iy, and iz can range from 0 to gridSize[iAxis]-1, /// whiere iAxis is 0 for x, 1 for y and 2 for the z direction. The /// number of local link cells is thus nLocalBoxes = /// gridSize[0]*gridSize[1]*gridSize[2]. /// /// The local link cells are surrounded by one complete shell of halo /// link cells. The halo cells provide temporary storage for halo or /// "ghost" atoms that belong to other tasks, but whose coordinates are /// needed locally to complete the force calculation. Halo link cells /// have at least one coordinate with a value of either -1 or /// gridSize[iAxis]. /// /// Because CoMD stores data in ordinary 1D C arrays, a mapping is /// needed from the 3D grid coords to a 1D array index. For the local /// cells we use the conventional mapping ix + iy*nx + iz*nx*ny. This /// keeps all of the local cells in a contiguous region of memory /// starting from the beginning of any relevant array and makes it easy /// to iterate the local cells in a single loop. Halo cells are mapped /// differently. After the local cells, the two planes of link cells /// that are face neighbors with local cells across the -x or +x axis /// are next. These are followed by face neighbors across the -y and +y /// axis (including cells that are y-face neighbors with an x-plane of /// halo cells), followed by all remaining cells in the -z and +z planes /// of halo cells. The total number of link cells (on each rank) is /// nTotalBoxes. /// /// Data storage arrays that are used in association with link cells /// should be allocated to store nTotalBoxes*MAXATOMS items. Data for /// the first atom in linkCell iBox is stored at index iBox*MAXATOMS. /// Data for subsequent atoms in the same link cell are stored /// sequentially, and the number of atoms in link cell iBox is /// nAtoms[iBox]. /// /// \see getBoxFromTuple is the 3D->1D mapping for link cell indices. /// \see getTuple is the 1D->3D mapping /// /// \param [in] cutoff The cutoff distance of the potential. LinkCell* initLinkCells(const Domain* domain, real_t cutoff) { assert(domain); LinkCell* ll = comdMalloc(sizeof(LinkCell)); for (int i = 0; i < 3; i++) { ll->localMin[i] = domain->localMin[i]; ll->localMax[i] = domain->localMax[i]; ll->gridSize[i] = domain->localExtent[i] / cutoff; // local number of boxes ll->boxSize[i] = domain->localExtent[i] / ((real_t) ll->gridSize[i]); ll->invBoxSize[i] = 1.0/ll->boxSize[i]; } ll->nInnerBoxes = (ll->gridSize[0]-2) * (ll->gridSize[1]-2) * (ll->gridSize[2]-2); ll->nLocalBoxes = ll->gridSize[0] * ll->gridSize[1] * ll->gridSize[2]; ll->nHaloBoxes = 2 * ((ll->gridSize[0] + 2) * (ll->gridSize[1] + ll->gridSize[2] + 2) + (ll->gridSize[1] * ll->gridSize[2])); printf ("Number of boxes: %d, %d, %d\n", ll->nInnerBoxes, ll->nLocalBoxes - ll->nInnerBoxes, ll->nHaloBoxes); ll->nTotalBoxes = ll->nLocalBoxes + ll->nHaloBoxes; ll->nAtoms = comdMalloc(ll->nTotalBoxes*sizeof(int)); for (int iBox=0; iBox<ll->nTotalBoxes; ++iBox) ll->nAtoms[iBox] = 0; assert ( (ll->gridSize[0] >= 2) && (ll->gridSize[1] >= 2) && (ll->gridSize[2] >= 2) ); // debug test for box allocation for (int iBox = 0; iBox < ll->nTotalBoxes; iBox++) { int ix, iy, iz; getTuple(ll, iBox, &ix, &iy, &iz); //printf("Box %d is located at [%d, %d, %d]\n", iBox, ix, iy, iz); } return ll; }
LinkCell* initLinkCells(const Domain* domain, real_t cutoff) { assert(domain); LinkCell* ll = (LinkCell*)comdMalloc(sizeof(LinkCell)); for (int i = 0; i < 3; i++) { ll->localMin[i] = domain->localMin[i]; ll->localMax[i] = domain->localMax[i]; ll->gridSize[i] = domain->localExtent[i] / cutoff; // local number of boxes ll->boxSize[i] = domain->localExtent[i] / ((real_t) ll->gridSize[i]); ll->invBoxSize[i] = 1.0/ll->boxSize[i]; } ll->nLocalBoxes = ll->gridSize[0] * ll->gridSize[1] * ll->gridSize[2]; ll->nHaloBoxes = 2 * ((ll->gridSize[0] + 2) * (ll->gridSize[1] + ll->gridSize[2] + 2) + (ll->gridSize[1] * ll->gridSize[2])); ll->nTotalBoxes = ll->nLocalBoxes + ll->nHaloBoxes; ll->nAtoms = (int*)comdMalloc(ll->nLocalBoxes*sizeof(int)); for (int iBox=0; iBox<ll->nLocalBoxes; ++iBox) { ll->nAtoms[iBox] = 0; } assert ( (ll->gridSize[0] >= 2) && (ll->gridSize[1] >= 2) && (ll->gridSize[2] >= 2) ); ll->nbrBoxes = (int**)comdMalloc(ll->nLocalBoxes*sizeof(int*)); for (int iBox=0; iBox<ll->nLocalBoxes; ++iBox) { ll->nbrBoxes[iBox] = (int*)comdMalloc(27*sizeof(int)); } for(int iBox=0; iBox<ll->nLocalBoxes; ++iBox) { getLocalNeighborBoxes(ll, iBox, ll->nbrBoxes[iBox]); } return ll; }
/// Base class constructor. HaloExchange* initHaloExchange(Domain* domain) { HaloExchange* hh = comdMalloc(sizeof(HaloExchange)); // Rank of neighbor task for each face. hh->nbrRank[HALO_X_MINUS] = processorNum(domain, -1, 0, 0); hh->nbrRank[HALO_X_PLUS] = processorNum(domain, +1, 0, 0); hh->nbrRank[HALO_Y_MINUS] = processorNum(domain, 0, -1, 0); hh->nbrRank[HALO_Y_PLUS] = processorNum(domain, 0, +1, 0); hh->nbrRank[HALO_Z_MINUS] = processorNum(domain, 0, 0, -1); hh->nbrRank[HALO_Z_PLUS] = processorNum(domain, 0, 0, +1); hh->bufCapacity = 0; // will be set by sub-class. return hh; }
Validate* initValidate(SimFlat* sim) { sumAtoms(sim); Validate* val = comdMalloc(sizeof(Validate)); val->eTot0 = (sim->ePotential + sim->eKinetic) / sim->atoms->nGlobal; val->nAtoms0 = sim->atoms->nGlobal; if (printRank()) { fprintf(screenOut, "\n"); printSeparator(screenOut); fprintf(screenOut, "Initial energy : %14.12f, atom count : %d \n", val->eTot0, val->nAtoms0); fprintf(screenOut, "\n"); } return val; }
/// Initialize an Lennard Jones potential for Copper. BasePotential* initLjPot(void) { LjPotential *pot = (LjPotential*)comdMalloc(sizeof(LjPotential)); pot->force = ljForce; pot->print = ljPrint; pot->destroy = ljDestroy; pot->sigma = 2.315; // Angstrom pot->epsilon = 0.167; // eV pot->mass = 63.55 * amuToInternalMass; // Atomic Mass Units (amu) pot->lat = 3.615; // Equilibrium lattice const in Angs strcpy(pot->latticeType, "FCC"); // lattice type, i.e. FCC, BCC, etc. pot->cutoff = 2.5*pot->sigma; // Potential cutoff in Angs strcpy(pot->name, "Cu"); pot->atomicNo = 29; return (BasePotential*) pot; }
/// Make a list of link cells that need to receive data across the /// specified face. Note that this list must be compatible with the /// corresponding send list to ensure that the data goes to the correct /// atoms. /// /// \see initLinkCells for information about the conventions for grid /// coordinates of link cells. int* mkForceRecvCellList(LinkCell* boxes, int face, int nCells) { int* list = comdMalloc(nCells*sizeof(int)); int xBegin, xEnd, yBegin, yEnd, zBegin, zEnd; int nx = boxes->gridSize[0]; int ny = boxes->gridSize[1]; int nz = boxes->gridSize[2]; switch(face) { case HALO_X_MINUS: xBegin=-1; xEnd=0; yBegin=0; yEnd=ny; zBegin=0; zEnd=nz; break; case HALO_X_PLUS: xBegin=nx; xEnd=nx+1; yBegin=0; yEnd=ny; zBegin=0; zEnd=nz; break; case HALO_Y_MINUS: xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=0; zBegin=0; zEnd=nz; break; case HALO_Y_PLUS: xBegin=-1; xEnd=nx+1; yBegin=ny; yEnd=ny+1; zBegin=0; zEnd=nz; break; case HALO_Z_MINUS: xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=ny+1; zBegin=-1; zEnd=0; break; case HALO_Z_PLUS: xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=ny+1; zBegin=nz; zEnd=nz+1; break; default: assert(1==0); } int count = 0; for (int ix=xBegin; ix<xEnd; ++ix) for (int iy=yBegin; iy<yEnd; ++iy) for (int iz=zBegin; iz<zEnd; ++iz) list[count++] = getBoxFromTuple(boxes, ix, iy, iz); assert(count == nCells); return list; }
/// Builds a structure to store interpolation data for a tabular /// function. Interpolation must be supported on the range /// \f$[x_0, x_n]\f$, where \f$x_n = n*dx\f$. /// /// \see interpolate /// \see bcastInterpolationObject /// \see destroyInterpolationObject /// /// \param [in] n number of values in the table. /// \param [in] x0 minimum ordinate value of the table. /// \param [in] dx spacing of the ordinate values. /// \param [in] data abscissa values. An array of size n. InterpolationObject* initInterpolationObject( int n, real_t x0, real_t dx, real_t* data) { InterpolationObject* table = (InterpolationObject *)comdMalloc(sizeof(InterpolationObject)) ; assert(table); table->values = (real_t*)comdCalloc(1, (n+3)*sizeof(real_t)); assert(table->values); table->values++; table->n = n; table->invDx = 1.0/dx; table->x0 = x0; for (int ii=0; ii<n; ++ii) table->values[ii] = data[ii]; table->values[-1] = table->values[0]; table->values[n+1] = table->values[n] = table->values[n-1]; return table; }
/// Calculate potential energy and forces for the EAM potential. /// /// Three steps are required: /// /// -# Loop over all atoms and their neighbors, compute the two-body /// interaction and the electron density at each atom /// -# Loop over all atoms, compute the embedding energy and its /// derivative for each atom /// -# Loop over all atoms and their neighbors, compute the embedding /// energy contribution to the force and add to the two-body force /// int eamForce(SimFlat* s) { EamPotential* pot = (EamPotential*) s->pot; assert(pot); // set up halo exchange and internal storage on first call to forces. if (pot->forceExchange == NULL) { int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes; pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->rhobar = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->forceExchange = initForceHaloExchange(s->domain, s->boxes); pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData)); pot->forceExchangeData->dfEmbed = pot->dfEmbed; pot->forceExchangeData->boxes = s->boxes; } real_t rCut2 = pot->cutoff*pot->cutoff; // zero forces / energy / rho /rhoprime real_t etot = 0.0; memset(s->atoms->f, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real3)); memset(s->atoms->U, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->dfEmbed, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->rhobar, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); // virial stress computation added here for (int m = 0;m<9;m++) { s->defInfo->stress[m] = 0.0; } int nbrBoxes[27]; // loop over local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; int nNbrBoxes = getNeighborBoxes(s->boxes, iBox, nbrBoxes); // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = nbrBoxes[jTmp]; if (jBox < iBox ) continue; int nJBox = s->boxes->nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++) { if ( (iBox==jBox) &&(ij <= ii) ) continue; double r2 = 0.0; real3 dr; for (int k=0; k<3; k++) { dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k]; r2+=dr[k]*dr[k]; } if(r2>rCut2) continue; double r = sqrt(r2); real_t phiTmp, dPhi, rhoTmp, dRho; interpolate(pot->phi, r, &phiTmp, &dPhi); interpolate(pot->rho, r, &rhoTmp, &dRho); for (int k=0; k<3; k++) { s->atoms->f[iOff][k] -= dPhi*dr[k]/r; s->atoms->f[jOff][k] += dPhi*dr[k]/r; } for (int i=0; i<3; i++) { for (int j=0; j<3; j++) { int m = 3*i + j; s->defInfo->stress[m] += 1.0*dPhi*dr[i]*dr[j]/r; } } // update energy terms // calculate energy contribution based on whether // the neighbor box is local or remote if (jBox < s->boxes->nLocalBoxes) etot += phiTmp; else etot += 0.5*phiTmp; s->atoms->U[iOff] += 0.5*phiTmp; s->atoms->U[jOff] += 0.5*phiTmp; // accumulate rhobar for each atom pot->rhobar[iOff] += rhoTmp; pot->rhobar[jOff] += rhoTmp; } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes // Compute Embedding Energy // loop over all local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int iOff; int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { real_t fEmbed, dfEmbed; interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed); pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange etot += fEmbed; s->atoms->U[iOff] += fEmbed; int iSpecies = s->atoms->iSpecies[iOff]; real_t invMass = 1.0/s->species[iSpecies].mass; for (int i=0; i<3; i++) { for (int j=0; j<3; j++) { int m = 3*i + j; s->defInfo->stress[m] -= s->atoms->p[iOff][i]*s->atoms->p[iOff][j]*invMass; } } } } // exchange derivative of the embedding energy with repsect to rhobar startTimer(eamHaloTimer); haloExchange(pot->forceExchange, pot->forceExchangeData); stopTimer(eamHaloTimer); // third pass // loop over local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; int nNbrBoxes = getNeighborBoxes(s->boxes, iBox, nbrBoxes); // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = nbrBoxes[jTmp]; if(jBox < iBox) continue; int nJBox = s->boxes->nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++) { if ((iBox==jBox) && (ij <= ii)) continue; double r2 = 0.0; real3 dr; for (int k=0; k<3; k++) { dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k]; r2+=dr[k]*dr[k]; } if(r2>=rCut2) continue; real_t r = sqrt(r2); real_t rhoTmp, dRho; interpolate(pot->rho, r, &rhoTmp, &dRho); for (int k=0; k<3; k++) { s->atoms->f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; s->atoms->f[jOff][k] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; } for (int i=0; i<3; i++) { for (int j=0; j<3; j++) { int m = 3*i + j; s->defInfo->stress[m] += 1.0*(pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[i]*dr[j]/r; } } } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes s->ePotential = (real_t) etot; for (int m = 0;m<9;m++) { s->defInfo->stress[m] = s->defInfo->stress[m]/s->defInfo->globalVolume; } return 0; }
/// Reads potential data from a funcfl file and populates /// corresponding members and InterpolationObjects in an EamPotential. /// /// funcfl is a file format for tabulated potential functions used by /// the original EAM code DYNAMO. A funcfl file contains an EAM /// potential for a single element. /// /// The contents of a funcfl file are: /// /// | Line Num | Description /// | :------: | :---------- /// | 1 | comments /// | 2 | elem amass latConstant latType /// | 3 | nrho drho nr dr rcutoff /// | 4 | embedding function values F(rhobar) starting at rhobar=0 /// | ... | (nrho values. Multiple values per line allowed.) /// | x' | electrostatic interation Z(r) starting at r=0 /// | ... | (nr values. Multiple values per line allowed.) /// | y' | electron density values rho(r) starting at r=0 /// | ... | (nr values. Multiple values per line allowed.) /// /// Where: /// - elem : atomic number for this element /// - amass : atomic mass for this element in AMU /// - latConstant : lattice constant for this elemnent in Angstroms /// - lattticeType : lattice type for this element (e.g. FCC) /// - nrho : number of values for the embedding function, F(rhobar) /// - drho : table spacing for rhobar /// - nr : number of values for Z(r) and rho(r) /// - dr : table spacing for r in Angstroms /// - rcutoff : potential cut-off distance in Angstroms /// /// funcfl format stores the "electrostatic interation" Z(r). This needs to /// be converted to the pair potential phi(r). /// using the formula /// \f[phi = Z(r) * Z(r) / r\f] /// NB: phi is not defined for r = 0 /// /// Z(r) is in atomic units (i.e., sqrt[Hartree * bohr]) so it is /// necesary to convert to eV. /// /// F(rhobar) is in eV. /// void eamReadFuncfl(EamPotential* pot, const char* dir, const char* potName) { char tmp[4096]; sprintf(tmp, "%s/%s", dir, potName); FILE* potFile = fopen(tmp, "r"); if (potFile == NULL) fileNotFound("eamReadFuncfl", tmp); // line 1 fgets(tmp, sizeof(tmp), potFile); char name[3]; sscanf(tmp, "%s", name); strcpy(pot->name, name); // line 2 int nAtomic; double mass, lat; char latticeType[8]; fgets(tmp,sizeof(tmp),potFile); sscanf(tmp, "%d %le %le %s", &nAtomic, &mass, &lat, latticeType); pot->atomicNo = nAtomic; pot->lat = lat; pot->mass = mass*amuToInternalMass; // file has mass in AMU. strcpy(pot->latticeType, latticeType); // line 3 int nRho, nR; double dRho, dR, cutoff; fgets(tmp,sizeof(tmp),potFile); sscanf(tmp, "%d %le %d %le %le", &nRho, &dRho, &nR, &dR, &cutoff); pot->cutoff = cutoff; real_t x0 = 0.0; // tables start at zero. // allocate read buffer int bufSize = MAX(nRho, nR); real_t* buf = comdMalloc(bufSize * sizeof(real_t)); // read embedding energy for (int ii=0; ii<nRho; ++ii) fscanf(potFile, FMT1, buf+ii); pot->f = initInterpolationObject(nRho, x0, dRho, buf); // read Z(r) and convert to phi(r) for (int ii=0; ii<nR; ++ii) fscanf(potFile, FMT1, buf+ii); for (int ii=1; ii<nR; ++ii) { real_t r = x0 + ii*dR; buf[ii] *= buf[ii] / r; buf[ii] *= hartreeToEv * bohrToAngs; // convert to eV } buf[0] = buf[1] + (buf[1] - buf[2]); // linear interpolation to get phi[0]. pot->phi = initInterpolationObject(nR, x0, dR, buf); // read electron density rho for (int ii=0; ii<nR; ++ii) fscanf(potFile, FMT1, buf+ii); pot->rho = initInterpolationObject(nR, x0, dR, buf); comdFree(buf); /* printPot(pot->f, "funcflDataF.txt"); */ /* printPot(pot->rho, "funcflDataRho.txt"); */ /* printPot(pot->phi, "funcflDataPhi.txt"); */ }
/// Reads potential data from a setfl file and populates /// corresponding members and InterpolationObjects in an EamPotential. /// /// setfl is a file format for tabulated potential functions used by /// the original EAM code DYNAMO. A setfl file contains EAM /// potentials for multiple elements. /// /// The contents of a setfl file are: /// /// | Line Num | Description /// | :------: | :---------- /// | 1 - 3 | comments /// | 4 | ntypes type1 type2 ... typen /// | 5 | nrho drho nr dr rcutoff /// | F, rho | Following line 5 there is a block for each atom type with F, and rho. /// | b1 | ielem(i) amass(i) latConst(i) latType(i) /// | b2 | embedding function values F(rhobar) starting at rhobar=0 /// | ... | (nrho values. Multiple values per line allowed.) /// | bn | electron density, starting at r=0 /// | ... | (nr values. Multiple values per line allowed.) /// | repeat | Return to b1 for each atom type. /// | phi | phi_ij for (1,1), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), ..., /// | p1 | pair potential between type i and type j, starting at r=0 /// | ... | (nr values. Multiple values per line allowed.) /// | repeat | Return to p1 for each phi_ij /// /// Where: /// - ntypes : number of element types in the potential /// - nrho : number of points the embedding energy F(rhobar) /// - drho : table spacing for rhobar /// - nr : number of points for rho(r) and phi(r) /// - dr : table spacing for r in Angstroms /// - rcutoff : cut-off distance in Angstroms /// - ielem(i) : atomic number for element(i) /// - amass(i) : atomic mass for element(i) in AMU /// - latConst(i) : lattice constant for element(i) in Angstroms /// - latType(i) : lattice type for element(i) /// /// setfl format stores r*phi(r), so we need to converted to the pair /// potential phi(r). In the file, phi(r)*r is in eV*Angstroms. /// NB: phi is not defined for r = 0 /// /// F(rhobar) is in eV. /// void eamReadSetfl(EamPotential* pot, const char* dir, const char* potName) { char tmp[4096]; sprintf(tmp, "%s/%s", dir, potName); FILE* potFile = fopen(tmp, "r"); if (potFile == NULL) fileNotFound("eamReadSetfl", tmp); // read the first 3 lines (comments) fgets(tmp, sizeof(tmp), potFile); fgets(tmp, sizeof(tmp), potFile); fgets(tmp, sizeof(tmp), potFile); // line 4 fgets(tmp, sizeof(tmp), potFile); int nElems; sscanf(tmp, "%d", &nElems); if( nElems != 1 ) notAlloyReady("eamReadSetfl"); //line 5 int nRho, nR; double dRho, dR, cutoff; // The same cutoff is used by all alloys, NB: cutoff = nR * dR is redundant fgets(tmp, sizeof(tmp), potFile); sscanf(tmp, "%d %le %d %le %le", &nRho, &dRho, &nR, &dR, &cutoff); pot->cutoff = cutoff; // **** THIS CODE IS RESTRICTED TO ONE ELEMENT // Per-atom header fgets(tmp, sizeof(tmp), potFile); int nAtomic; double mass, lat; char latticeType[8]; sscanf(tmp, "%d %le %le %s", &nAtomic, &mass, &lat, latticeType); pot->atomicNo = nAtomic; pot->lat = lat; pot->mass = mass * amuToInternalMass; // file has mass in AMU. strcpy(pot->latticeType, latticeType); // allocate read buffer int bufSize = MAX(nRho, nR); real_t* buf = comdMalloc(bufSize * sizeof(real_t)); real_t x0 = 0.0; // Read embedding energy F(rhobar) for (int ii=0; ii<nRho; ++ii) fscanf(potFile, FMT1, buf+ii); pot->f = initInterpolationObject(nRho, x0, dRho, buf); // Read electron density rho(r) for (int ii=0; ii<nR; ++ii) fscanf(potFile, FMT1, buf+ii); pot->rho = initInterpolationObject(nR, x0, dR, buf); // Read phi(r)*r and convert to phi(r) for (int ii=0; ii<nR; ++ii) fscanf(potFile, FMT1, buf+ii); for (int ii=1; ii<nR; ++ii) { real_t r = x0 + ii*dR; buf[ii] /= r; } buf[0] = buf[1] + (buf[1] - buf[2]); // Linear interpolation to get phi[0]. pot->phi = initInterpolationObject(nR, x0, dR, buf); comdFree(buf); // write to text file for comparison, currently commented out /* printPot(pot->f, "SetflDataF.txt"); */ /* printPot(pot->rho, "SetflDataRho.txt"); */ /* printPot(pot->phi, "SetflDataPhi.txt"); */ }
/// \details /// When called in proper sequence by redistributeAtoms, the atom halo /// exchange helps serve three purposes: /// - Send ghost atom data to neighbor tasks. /// - Shift atom coordinates by the global simulation size when they cross /// periodic boundaries. This shift is performed in loadAtomsBuffer. /// - Transfer ownership of atoms between tasks as the atoms move across /// spatial domain boundaries. This transfer of ownership occurs in /// two places. The former owner gives up ownership when /// updateLinkCells moves a formerly local atom into a halo link cell. /// The new owner accepts ownership when unloadAtomsBuffer calls /// putAtomInBox to place a received atom into a local link cell. /// /// This constructor does the following: /// /// - Sets the bufCapacity to hold the largest possible number of atoms /// that can be sent across a face. /// - Initialize function pointers to the atom-specific versions /// - Sets the number of link cells to send across each face. /// - Builds the list of link cells to send across each face. As /// explained in the comments for mkAtomCellList, this list must /// include any link cell, local or halo, that could possibly contain /// an atom that needs to be sent across the face. Atoms that need to /// be sent include "ghost atoms" that are located in local link /// cells that correspond to halo link cells on receiving tasks as well as /// formerly local atoms that have just moved into halo link cells and /// need to be sent to the rank that owns the spatial domain the atom /// has moved into. /// - Sets a coordinate shift factor for each face to account for /// periodic boundary conditions. For most faces the factor is zero. /// For faces on the +x, +y, or +z face of the simulation domain /// the factor is -1.0 (to shift the coordinates by -1 times the /// simulation domain size). For -x, -y, and -z faces of the /// simulation domain, the factor is +1.0. /// /// \see redistributeAtoms HaloExchange* initAtomHaloExchange(Domain* domain, LinkCell* boxes) { HaloExchange* hh = initHaloExchange(domain); int size0 = (boxes->gridSize[1]+2)*(boxes->gridSize[2]+2); int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]+2); int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2); int maxSize = MAX(size0, size1); maxSize = MAX(size1, size2); hh->bufCapacity = maxSize*2*MAXATOMS*sizeof(AtomMsg); hh->sendBufM = (char*)comdMalloc(hh->bufCapacity); hh->sendBufP = (char*)comdMalloc(hh->bufCapacity); hh->recvBufP = (char*)comdMalloc(hh->bufCapacity); hh->recvBufM = (char*)comdMalloc(hh->bufCapacity); // pin memory cudaHostRegister(hh->sendBufM, hh->bufCapacity, 0); cudaHostRegister(hh->sendBufP, hh->bufCapacity, 0); cudaHostRegister(hh->recvBufP, hh->bufCapacity, 0); cudaHostRegister(hh->recvBufM, hh->bufCapacity, 0); hh->loadBuffer = loadAtomsBuffer; hh->unloadBuffer = unloadAtomsBuffer; hh->destroy = destroyAtomsExchange; hh->hashTable = initHashTable((boxes->nTotalBoxes - boxes->nLocalBoxes) * MAXATOMS * 2); AtomExchangeParms* parms = (AtomExchangeParms*)comdMalloc(sizeof(AtomExchangeParms)); parms->nCells[HALO_X_MINUS] = 2*(boxes->gridSize[1]+2)*(boxes->gridSize[2]+2); parms->nCells[HALO_Y_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[2]+2); parms->nCells[HALO_Z_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[1]+2); parms->nCells[HALO_X_PLUS] = parms->nCells[HALO_X_MINUS]; parms->nCells[HALO_Y_PLUS] = parms->nCells[HALO_Y_MINUS]; parms->nCells[HALO_Z_PLUS] = parms->nCells[HALO_Z_MINUS]; for (int ii=0; ii<6; ++ii) { parms->cellList[ii] = mkAtomCellList(boxes, (enum HaloFaceOrder)ii, parms->nCells[ii]); // copy cell list to gpu cudaMalloc((void**)&parms->cellListGpu[ii], parms->nCells[ii] * sizeof(int)); cudaMemcpy(parms->cellListGpu[ii], parms->cellList[ii], parms->nCells[ii] * sizeof(int), cudaMemcpyHostToDevice); } // allocate scan buf int size = boxes->nLocalBoxes+1; if (size % 256 != 0) size = ((size + 255)/256)*256; int partial_size = size/256 + 1; if (partial_size % 256 != 0) partial_size = ((partial_size + 255)/256)*256; cudaMalloc((void**)&parms->d_natoms_buf, size * sizeof(int)); parms->h_natoms_buf = (int*) malloc( size * sizeof(int)); cudaMalloc((void**)&parms->d_partial_sums, partial_size * sizeof(int)); for (int ii=0; ii<6; ++ii) { parms->pbcFactor[ii] = (real_t*)comdMalloc(3*sizeof(real_t)); for (int jj=0; jj<3; ++jj) parms->pbcFactor[ii][jj] = 0.0; } int* procCoord = domain->procCoord; //alias int* procGrid = domain->procGrid; //alias if (procCoord[HALO_X_AXIS] == 0) parms->pbcFactor[HALO_X_MINUS][HALO_X_AXIS] = +1.0; if (procCoord[HALO_X_AXIS] == procGrid[HALO_X_AXIS]-1) parms->pbcFactor[HALO_X_PLUS][HALO_X_AXIS] = -1.0; if (procCoord[HALO_Y_AXIS] == 0) parms->pbcFactor[HALO_Y_MINUS][HALO_Y_AXIS] = +1.0; if (procCoord[HALO_Y_AXIS] == procGrid[HALO_Y_AXIS]-1) parms->pbcFactor[HALO_Y_PLUS][HALO_Y_AXIS] = -1.0; if (procCoord[HALO_Z_AXIS] == 0) parms->pbcFactor[HALO_Z_MINUS][HALO_Z_AXIS] = +1.0; if (procCoord[HALO_Z_AXIS] == procGrid[HALO_Z_AXIS]-1) parms->pbcFactor[HALO_Z_PLUS][HALO_Z_AXIS] = -1.0; hh->type = 0; hh->parms = parms; return hh; }
/// Calculate potential energy and forces for the EAM potential. /// /// Three steps are required: /// /// -# Loop over all atoms and their neighbors, compute the two-body /// interaction and the electron density at each atom /// -# Loop over all atoms, compute the embedding energy and its /// derivative for each atom /// -# Loop over all atoms and their neighbors, compute the embedding /// energy contribution to the force and add to the two-body force /// int eamForce(SimFlat* s) { EamPotential* pot = (EamPotential*) s->pot; assert(pot); // set up halo exchange and internal storage on first call to forces. if (pot->forceExchange == NULL) { int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes; pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->rhobar = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->forceExchange = initForceHaloExchange(s->domain, s->boxes); pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData)); pot->forceExchangeData->dfEmbed = pot->dfEmbed; pot->forceExchangeData->boxes = s->boxes; } real_t rCut2 = pot->cutoff*pot->cutoff; real_t etot = 0.; // zero forces / energy / rho /rhoprime int fsize = s->boxes->nTotalBoxes*MAXATOMS; //#pragma omp parallel for for (int ii=0; ii<fsize; ii++) { zeroReal3(s->atoms->f[ii]); //s->atoms->U[ii] = 0.;//never used pot->dfEmbed[ii] = 0.; pot->rhobar[ii] = 0.; } int nNbrBoxes = 27; // loop over local boxes //#pragma omp parallel for reduction(+:etot) for(int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++){ int nIBox = s->boxes->nAtoms[iBox]; // loop over neighbor boxes of iBox (some may be halo boxes) for(int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = s->boxes->nbrBoxes[iBox][jTmp]; int nJBox = s->boxes->nAtoms[jBox]; // loop over atoms in iBox for(int iOff=MAXATOMS*iBox; iOff<(iBox*MAXATOMS+nIBox); iOff++) { // loop over atoms in jBox for(int jOff=MAXATOMS*jBox; jOff<(jBox*MAXATOMS+nJBox); jOff++) { real3 dr; real_t r2 = 0.0; for(int k=0; k<3; k++) { dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k]; r2+=dr[k]*dr[k]; } if(r2 <= rCut2 && r2 > 0.0) { real_t r = sqrt(r2); real_t phiTmp, dPhi, rhoTmp, dRho; interpolate(pot->phi, r, &phiTmp, &dPhi); interpolate(pot->rho, r, &rhoTmp, &dRho); for(int k=0; k<3; k++) { s->atoms->f[iOff][k] -= dPhi*dr[k]/r; } // Calculate energy contribution //s->atoms->U[iOff] += 0.5*phiTmp;//never used etot += 0.5*phiTmp; // accumulate rhobar for each atom pot->rhobar[iOff] += rhoTmp; } } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes // Compute Embedding Energy // loop over all local boxes //#pragma omp parallel for reduction(+:etot) for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox; iOff<(MAXATOMS*iBox+nIBox); iOff++) { real_t fEmbed, dfEmbed; interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed); pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange //s->atoms->U[iOff] += fEmbed;//never used etot += fEmbed; } } // exchange derivative of the embedding energy with repsect to rhobar startTimer(eamHaloTimer); haloExchange(pot->forceExchange, pot->forceExchangeData); stopTimer(eamHaloTimer); // third pass // loop over local boxes //#pragma omp parallel for for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = s->boxes->nbrBoxes[iBox][jTmp]; int nJBox = s->boxes->nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox; iOff<(MAXATOMS*iBox+nIBox); iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox; jOff<(MAXATOMS*jBox+nJBox); jOff++) { real_t r2 = 0.0; real3 dr; for (int k=0; k<3; k++) { dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k]; r2+=dr[k]*dr[k]; } if(r2 <= rCut2 && r2 > 0.0) { real_t r = sqrt(r2); real_t rhoTmp, dRho; interpolate(pot->rho, r, &rhoTmp, &dRho); for (int k=0; k<3; k++) { s->atoms->f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; } } } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes s->ePotential = (real_t) etot; return 0; }
/// The force exchange is considerably simpler than the atom exchange. /// In the force case we only need to exchange data that is needed to /// complete the force calculation. Since the atoms have not moved we /// only need to send data from local link cells and we are guaranteed /// that the same atoms exist in the same order in corresponding halo /// cells on remote tasks. The only tricky part is the size of the /// plane of local cells that needs to be sent grows in each direction. /// This is because the y-axis send must send some of the data that was /// received from the x-axis send, and the z-axis must send some data /// from the y-axis send. This accumulation of data to send is /// responsible for data reaching neighbor cells that share only edges /// or corners. /// /// \see eam.c for an explanation of the requirement to exchange /// force data. HaloExchange* initForceHaloExchange(Domain* domain, LinkCell* boxes, int useGPU) { HaloExchange* hh = initHaloExchange(domain); if(useGPU){ hh->loadBuffer = loadForceBuffer; hh->unloadBuffer = unloadForceBuffer; }else{ hh->loadBuffer = loadForceBufferCpu; hh->unloadBuffer = unloadForceBufferCpu; } hh->destroy = destroyForceExchange; int size0 = (boxes->gridSize[1])*(boxes->gridSize[2]); int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]); int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2); int maxSize = MAX(size0, size1); maxSize = MAX(size1, size2); hh->bufCapacity = (maxSize)*MAXATOMS*sizeof(ForceMsg); hh->sendBufM = (char*)comdMalloc(hh->bufCapacity); hh->sendBufP = (char*)comdMalloc(hh->bufCapacity); hh->recvBufP = (char*)comdMalloc(hh->bufCapacity); hh->recvBufM = (char*)comdMalloc(hh->bufCapacity); // pin memory cudaHostRegister(hh->sendBufM, hh->bufCapacity, 0); cudaHostRegister(hh->sendBufP, hh->bufCapacity, 0); cudaHostRegister(hh->recvBufP, hh->bufCapacity, 0); cudaHostRegister(hh->recvBufM, hh->bufCapacity, 0); ForceExchangeParms* parms = (ForceExchangeParms*)comdMalloc(sizeof(ForceExchangeParms)); parms->nCells[HALO_X_MINUS] = (boxes->gridSize[1] )*(boxes->gridSize[2] ); parms->nCells[HALO_Y_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[2] ); parms->nCells[HALO_Z_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2); parms->nCells[HALO_X_PLUS] = parms->nCells[HALO_X_MINUS]; parms->nCells[HALO_Y_PLUS] = parms->nCells[HALO_Y_MINUS]; parms->nCells[HALO_Z_PLUS] = parms->nCells[HALO_Z_MINUS]; for (int ii=0; ii<6; ++ii) { parms->sendCells[ii] = mkForceSendCellList(boxes, ii, parms->nCells[ii]); parms->recvCells[ii] = mkForceRecvCellList(boxes, ii, parms->nCells[ii]); // copy cell list to gpu cudaMalloc((void**)&parms->sendCellsGpu[ii], parms->nCells[ii] * sizeof(int)); cudaMalloc((void**)&parms->recvCellsGpu[ii], parms->nCells[ii] * sizeof(int)); cudaMemcpy(parms->sendCellsGpu[ii], parms->sendCells[ii], parms->nCells[ii] * sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(parms->recvCellsGpu[ii], parms->recvCells[ii], parms->nCells[ii] * sizeof(int), cudaMemcpyHostToDevice); // allocate temp buf int size = parms->nCells[ii]+1; if (size % 256 != 0) size = ((size + 255)/256)*256; cudaMalloc((void**)&parms->natoms_buf[ii], size * sizeof(int)); cudaMalloc((void**)&parms->partial_sums[ii], (size/256 + 1) * sizeof(int)); } hh->hashTable = NULL; hh->type = 1; hh->parms = parms; return hh; }
SimFlat* initSimulation(Command cmd) { SimFlat* sim = comdMalloc(sizeof(SimFlat)); sim->nSteps = cmd.nSteps; sim->printRate = cmd.printRate; sim->dt = cmd.dt; sim->domain = NULL; sim->boxes = NULL; sim->atoms = NULL; sim->ePotential = 0.0; sim->eKinetic = 0.0; sim->atomExchange = NULL; sim->pot = initPotential(cmd.doeam, cmd.potDir, cmd.potName, cmd.potType); real_t latticeConstant = cmd.lat; if (cmd.lat < 0.0) latticeConstant = sim->pot->lat; // ensure input parameters make sense. sanityChecks(cmd, sim->pot->cutoff, latticeConstant, sim->pot->latticeType); sim->species = initSpecies(sim->pot); real3 globalExtent; globalExtent[0] = cmd.nx * latticeConstant; globalExtent[1] = cmd.ny * latticeConstant; globalExtent[2] = cmd.nz * latticeConstant; sim->domain = initDecomposition(cmd.xproc, cmd.yproc, cmd.zproc, globalExtent); sim->boxes = initLinkCells(sim->domain, sim->pot->cutoff); sim->atoms = initAtoms(sim->boxes); sim->defInfo = initDeformation(sim, cmd.defGrad); //printf("Got to here\n"); // create lattice with desired temperature and displacement. createFccLattice(cmd.nx, cmd.ny, cmd.nz, latticeConstant, sim); setTemperature(sim,0.0); randomDisplacements(sim, cmd.initialDelta); sim->atomExchange = initAtomHaloExchange(sim->domain, sim->boxes); forwardDeformation(sim); //eamForce(sim); // Procedure for energy density passing from the macrosolver to CoMD //setTemperature(sim,((cmd.energy*latticeVolume*cmd.nx*cmd.ny*cmd.nz-sim->ePotential)/sim->atoms->nGlobal)/(kB_eV * 1.5)); //randomDisplacements(sim, cmd.initialDelta); // Forces must be computed before we call the time stepper. startTimer(redistributeTimer); redistributeAtoms(sim); stopTimer(redistributeTimer); startTimer(computeForceTimer); computeForce(sim); stopTimer(computeForceTimer); double cohmmEnergy=cmd.energy*sim->defInfo->globalVolume; double temperatureFromEnergyDensity=((cohmmEnergy-sim->ePotential)/sim->atoms->nGlobal)/(kB_eV*1.5); setTemperature(sim,temperatureFromEnergyDensity); //uncomment to set temperature according to hmm energy density //setTemperature(sim,cmd.temperature); //uncomment to directly input temperature kineticEnergy(sim); return sim; }
int eamForceCpuNL(SimFlat* s) { EamPotential* pot = (EamPotential*) s->pot; assert(pot); // set up halo exchange and internal storage on first call to forces. if (pot->forceExchange == NULL) { int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes; pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->rhobar = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->forceExchange = initForceHaloExchange(s->domain, s->boxes,s->method<CPU_NL); pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData)); pot->forceExchangeData->dfEmbed = pot->dfEmbed; pot->forceExchangeData->boxes = s->boxes; } real_t rCut2 = pot->cutoff*pot->cutoff; // zero forces / energy / rho /rhoprime real_t etot = 0.0; zeroVecAll(&(s->atoms->f),s->boxes->nTotalBoxes*MAXATOMS); memset(s->atoms->U, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->dfEmbed, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->rhobar, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t)); NeighborList* neighborList = s->atoms->neighborList; // loop over local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { int iLid = s->atoms->lid[iOff]; assert(iLid < neighborList->nMaxLocal); int* iNeighborList = &(neighborList->list[neighborList->maxNeighbors * iLid]); const int nNeighbors = neighborList->nNeighbors[iLid]; // loop over atoms in neighborlist for (int ij=0; ij<nNeighbors; ij++) { int jOff = iNeighborList[ij]; double r2 = 0.0; real3_old dr; dr[0] = s->atoms->r.x[iOff] - s->atoms->r.x[jOff]; dr[1] = s->atoms->r.y[iOff] - s->atoms->r.y[jOff]; dr[2] = s->atoms->r.z[iOff] - s->atoms->r.z[jOff]; r2+=dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2]; if(r2>rCut2) continue; double r = sqrt(r2); real_t phiTmp, dPhi, rhoTmp, dRho; interpolate(pot->phi, r, &phiTmp, &dPhi); interpolate(pot->rho, r, &rhoTmp, &dRho); s->atoms->f.x[iOff] -= dPhi*dr[0]/r; s->atoms->f.y[iOff] -= dPhi*dr[1]/r; s->atoms->f.z[iOff] -= dPhi*dr[2]/r; s->atoms->f.x[jOff] += dPhi*dr[0]/r; s->atoms->f.y[jOff] += dPhi*dr[1]/r; s->atoms->f.z[jOff] += dPhi*dr[2]/r; // update energy terms // calculate energy contribution based on whether // the neighbor box is local or remote if (jOff / MAXATOMS < s->boxes->nLocalBoxes) etot += phiTmp; else etot += 0.5*phiTmp; s->atoms->U[iOff] += 0.5*phiTmp; s->atoms->U[jOff] += 0.5*phiTmp; // accumulate rhobar for each atom pot->rhobar[iOff] += rhoTmp; pot->rhobar[jOff] += rhoTmp; } // loop over atoms in neighborlist } // loop over atoms in iBox } // loop over local boxes // Compute Embedding Energy // loop over all local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { real_t fEmbed, dfEmbed; interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed); pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange etot += fEmbed; s->atoms->U[iOff] += fEmbed; } } // exchange derivative of the embedding energy with repsect to rhobar startTimer(eamHaloTimer); haloExchange(pot->forceExchange, pot->forceExchangeData); stopTimer(eamHaloTimer); // third pass // loop over local boxes for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) { int nIBox = s->boxes->nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { int iLid = s->atoms->lid[iOff]; assert(iLid < neighborList->nMaxLocal); int* iNeighborList = &(neighborList->list[ neighborList->maxNeighbors * iLid]); int nNeighbors = neighborList->nNeighbors[iLid]; // loop over atoms in neighborlist for (int ij=0; ij<nNeighbors; ij++) { int jOff = iNeighborList[ij]; double r2 = 0.0; real3_old dr; dr[0] = s->atoms->r.x[iOff] - s->atoms->r.x[jOff]; dr[1] = s->atoms->r.y[iOff] - s->atoms->r.y[jOff]; dr[2] = s->atoms->r.z[iOff] - s->atoms->r.z[jOff]; r2+=dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2]; if(r2>=rCut2) continue; real_t r = sqrt(r2); real_t rhoTmp, dRho; interpolate(pot->rho, r, &rhoTmp, &dRho); s->atoms->f.x[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[0]/r; s->atoms->f.y[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[1]/r; s->atoms->f.z[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[2]/r; s->atoms->f.x[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[0]/r; s->atoms->f.y[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[1]/r; s->atoms->f.z[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[2]/r; } // loop over atoms in neighborlist } // loop over atoms in iBox } // loop over local boxes // printf("nl: %f %f %f\n",s->atoms->f[MAXATOMS][0],s->atoms->f[MAXATOMS][1],s->atoms->f[MAXATOMS][2]); s->ePotential = (real_t) etot; return 0; }
/// Calculate potential energy and forces for the EAM potential. /// /// Three steps are required: /// /// -# Loop over all atoms and their neighbors, compute the two-body /// interaction and the electron density at each atom /// -# Loop over all atoms, compute the embedding energy and its /// derivative for each atom /// -# Loop over all atoms and their neighbors, compute the embedding /// energy contribution to the force and add to the two-body force /// int eamForce(SimFlat* s) { //OPT: loop invariant references Atoms* atoms = s->atoms; LinkCell* boxes = s->boxes; int nLocalBoxes = boxes->nLocalBoxes; int nTotalBoxes = boxes->nTotalBoxes; int* nAtoms = boxes->nAtoms; real3* atoms_r = atoms->r; real3* atoms_f = atoms->f; real_t* atoms_U = atoms->U; EamPotential* pot = (EamPotential*) s->pot; assert(pot); // set up halo exchange and internal storage on first call to forces. if (pot->forceExchange == NULL) { int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes; pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->rhobar = comdMalloc(maxTotalAtoms*sizeof(real_t)); pot->forceExchange = initForceHaloExchange(s->domain, s->boxes); pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData)); pot->forceExchangeData->dfEmbed = pot->dfEmbed; pot->forceExchangeData->boxes = s->boxes; } real_t rCut2 = pot->cutoff*pot->cutoff; // zero forces / energy / rho /rhoprime real_t etot = 0.0; memset(atoms_f, 0, nTotalBoxes*MAXATOMS*sizeof(real3)); memset(atoms_U, 0, nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->dfEmbed, 0, nTotalBoxes*MAXATOMS*sizeof(real_t)); memset(pot->rhobar, 0, nTotalBoxes*MAXATOMS*sizeof(real_t)); int nbrBoxes[27]; // loop over local boxes for (int iBox=0; iBox<nLocalBoxes; iBox++) { int nIBox = nAtoms[iBox]; int nNbrBoxes = getNeighborBoxes(boxes, iBox, nbrBoxes); // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = nbrBoxes[jTmp]; if (jBox < iBox ) continue; int nJBox = nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++) { if ( (iBox==jBox) &&(ij <= ii) ) continue; double r2 = 0.0; real3 dr; //OPT: loop unrolling // for (int k=0; k<3; k++) // { // dr[k]=atoms_r[iOff][k]-atoms_r[jOff][k]; // r2+=dr[k]*dr[k]; // } double dr0 = atoms_r[iOff][0]-atoms_r[jOff][0]; r2+=dr0*dr0; double dr1 = atoms_r[iOff][1]-atoms_r[jOff][1]; r2+=dr1*dr1; double dr2 = atoms_r[iOff][2]-atoms_r[jOff][2]; r2+=dr2*dr2; //End of OPT: loop unrolling if(r2>rCut2) continue; double r = sqrt(r2); real_t phiTmp, dPhi, rhoTmp, dRho; interpolate(pot->phi, r, &phiTmp, &dPhi); interpolate(pot->rho, r, &rhoTmp, &dRho); //OPT: loop unrolling // for (int k=0; k<3; k++) // { // atoms_f[iOff][k] -= dPhi*dr[k]/r; // atoms_f[jOff][k] += dPhi*dr[k]/r; // } real_t cal = dPhi*dr0/r; atoms_f[iOff][0] -= cal; atoms_f[jOff][0] += cal; cal = dPhi*dr1/r; atoms_f[iOff][1] -= cal; atoms_f[jOff][1] += cal; cal = dPhi*dr2/r; atoms_f[iOff][2] -= cal; atoms_f[jOff][2] += cal; //End of OPT: loop unrolling // update energy terms // calculate energy contribution based on whether // the neighbor box is local or remote if (jBox < nLocalBoxes) etot += phiTmp; else etot += 0.5*phiTmp; atoms_U[iOff] += 0.5*phiTmp; atoms_U[jOff] += 0.5*phiTmp; // accumulate rhobar for each atom pot->rhobar[iOff] += rhoTmp; pot->rhobar[jOff] += rhoTmp; } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes // Compute Embedding Energy // loop over all local boxes for (int iBox=0; iBox<nLocalBoxes; iBox++) { int iOff; int nIBox = nAtoms[iBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { real_t fEmbed, dfEmbed; interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed); pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange etot += fEmbed; atoms_U[iOff] += fEmbed; } } // exchange derivative of the embedding energy with repsect to rhobar startTimer(eamHaloTimer); haloExchange(pot->forceExchange, pot->forceExchangeData); stopTimer(eamHaloTimer); // third pass // loop over local boxes for (int iBox=0; iBox<nLocalBoxes; iBox++) { int nIBox = nAtoms[iBox]; int nNbrBoxes = getNeighborBoxes(boxes, iBox, nbrBoxes); // loop over neighbor boxes of iBox (some may be halo boxes) for (int jTmp=0; jTmp<nNbrBoxes; jTmp++) { int jBox = nbrBoxes[jTmp]; if(jBox < iBox) continue; int nJBox = nAtoms[jBox]; // loop over atoms in iBox for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++) { // loop over atoms in jBox for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++) { if ((iBox==jBox) && (ij <= ii)) continue; double r2 = 0.0; real3 dr; //OPT: loop unrolling // for (int k=0; k<3; k++) // { // dr[k]=atoms_r[iOff][k]-atoms_r[jOff][k]; // r2+=dr[k]*dr[k]; // } real_t dr0 = atoms_r[iOff][0]-atoms_r[jOff][0]; r2 += dr0*dr0; real_t dr1 = atoms_r[iOff][1]-atoms_r[jOff][1]; r2 += dr1*dr1; real_t dr2 = atoms_r[iOff][2]-atoms_r[jOff][2]; r2 += dr2*dr2; //End of OPT: loop unrolling if(r2>=rCut2) continue; real_t r = sqrt(r2); real_t rhoTmp, dRho; interpolate(pot->rho, r, &rhoTmp, &dRho); //OPT: loop unrolling // for (int k=0; k<3; k++) // { // atoms_f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; // atoms_f[jOff][k] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r; // } real_t cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr0/r; atoms_f[iOff][0] -= cal; atoms_f[jOff][0] += cal; cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr1/r; atoms_f[iOff][1] -= cal; atoms_f[jOff][1] += cal; cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr2/r; atoms_f[iOff][2] -= cal; atoms_f[jOff][2] += cal; //End of OPT: loop unrolling } // loop over atoms in jBox } // loop over atoms in iBox } // loop over neighbor boxes } // loop over local boxes s->ePotential = (real_t) etot; return 0; }