示例#1
0
文件: eam.c 项目: kempj/CoMD-tasking
/// Broadcasts an InterpolationObject from rank 0 to all other ranks.
///
/// It is commonly the case that the data needed to create the
/// interpolation table is available on only one task (for example, only
/// one task has read the data from a file).  Broadcasting the table
/// eliminates the need to put broadcast code in multiple table readers.
///
/// \see eamBcastPotential
void bcastInterpolationObject(InterpolationObject** table)
{
   struct
   {
      int n;
      real_t x0, invDx;
   } buf;

   if (getMyRank() == 0)
   {
      buf.n     = (*table)->n;
      buf.x0    = (*table)->x0;
      buf.invDx = (*table)->invDx;
   }
   bcastParallel(&buf, sizeof(buf), 0);

   if (getMyRank() != 0)
   {
      assert(*table == NULL);
      *table = comdMalloc(sizeof(InterpolationObject));
      (*table)->n      = buf.n;
      (*table)->x0     = buf.x0;
      (*table)->invDx  = buf.invDx;
      (*table)->values = comdMalloc(sizeof(real_t) * (buf.n+3) );
      (*table)->values++;
   }
   
   int valuesSize = sizeof(real_t) * ((*table)->n+3);
   bcastParallel((*table)->values-1, valuesSize, 0);
}
示例#2
0
/// This is the function that does the heavy lifting for the
/// communication of halo data.  It is called once for each axis and
/// sends and receives two message.  Loading and unloading of the
/// buffers is in the hands of the sub-class virtual functions.
///
/// \param [in] iAxis     Axis index.
/// \param [in, out] data Pointer to data that will be passed to the load and
///                       unload functions
void exchangeData(HaloExchange* haloExchange, void* data, int iAxis)
{
   enum HaloFaceOrder faceM = 2*iAxis;
   enum HaloFaceOrder faceP = faceM+1;

   char* sendBufM = comdMalloc(haloExchange->bufCapacity);
   char* sendBufP = comdMalloc(haloExchange->bufCapacity);
   char* recvBufM = comdMalloc(haloExchange->bufCapacity);
   char* recvBufP = comdMalloc(haloExchange->bufCapacity);

   int nSendM = haloExchange->loadBuffer(haloExchange->parms, data, faceM, sendBufM);
   int nSendP = haloExchange->loadBuffer(haloExchange->parms, data, faceP, sendBufP);

   int nbrRankM = haloExchange->nbrRank[faceM];
   int nbrRankP = haloExchange->nbrRank[faceP];

   int nRecvM, nRecvP;

   startTimer(commHaloTimer);
   nRecvP = sendReceiveParallel(sendBufM, nSendM, nbrRankM, recvBufP, haloExchange->bufCapacity, nbrRankP);
   nRecvM = sendReceiveParallel(sendBufP, nSendP, nbrRankP, recvBufM, haloExchange->bufCapacity, nbrRankM);
   stopTimer(commHaloTimer);
   
   haloExchange->unloadBuffer(haloExchange->parms, data, faceM, nRecvM, recvBufM);
   haloExchange->unloadBuffer(haloExchange->parms, data, faceP, nRecvP, recvBufP);
   comdFree(recvBufP);
   comdFree(recvBufM);
   comdFree(sendBufP);
   comdFree(sendBufM);
}
示例#3
0
HashTable* initHashTable(int nMaxEntries)
{

   HashTable *hashTable = (HashTable *) comdMalloc(sizeof(HashTable));

   hashTable->nMaxEntries = nMaxEntries; 
   hashTable->nEntriesPut = 0; //allocates a 5MB hashtable. This number is prime.
   hashTable->nEntriesGet = 0; //allocates a 5MB hashtable. This number is prime.

   hashTable->offset = (int*) comdMalloc(sizeof(int) * hashTable->nMaxEntries);

   emptyHashTable(hashTable);

   return hashTable;
}
示例#4
0
文件: eam.c 项目: kempj/CoMD-tasking
/// Allocate and initialize the EAM potential data structure.
///
/// \param [in] dir   The directory in which potential table files are found.
/// \param [in] file  The name of the potential table file.
/// \param [in] type  The file format of the potential file (setfl or funcfl).
BasePotential* initEamPot(const char* dir, const char* file, const char* type)
{
   EamPotential* pot = comdMalloc(sizeof(EamPotential));
   assert(pot);
   pot->force = eamForce;
   pot->print = eamPrint;
   pot->destroy = eamDestroy;
   pot->phi = NULL;
   pot->rho = NULL;
   pot->f   = NULL;

   // Initialization of the next three items requires information about
   // the parallel decomposition and link cells that isn't available
   // with the potential is initialized.  Hence, we defer their
   // initialization until the first time we call the force routine.
   pot->dfEmbed = NULL;
   pot->rhobar  = NULL;
   pot->forceExchange = NULL;

   if (getMyRank() == 0)
   {
      if (strcmp(type, "setfl" ) == 0)
         eamReadSetfl(pot, dir, file);
      else if (strcmp(type,"funcfl") == 0)
         eamReadFuncfl(pot, dir, file);
      else
         typeNotSupported("initEamPot", type);
   }
   eamBcastPotential(pot);
   
   return (BasePotential*) pot;
}
示例#5
0
/// The force exchange is considerably simpler than the atom exchange.
/// In the force case we only need to exchange data that is needed to
/// complete the force calculation.  Since the atoms have not moved we
/// only need to send data from local link cells and we are guaranteed
/// that the same atoms exist in the same order in corresponding halo
/// cells on remote tasks.  The only tricky part is the size of the
/// plane of local cells that needs to be sent grows in each direction.
/// This is because the y-axis send must send some of the data that was
/// received from the x-axis send, and the z-axis must send some data
/// from the y-axis send.  This accumulation of data to send is
/// responsible for data reaching neighbor cells that share only edges
/// or corners.
///
/// \see eam.c for an explanation of the requirement to exchange
/// force data.
HaloExchange* initForceHaloExchange(Domain* domain, LinkCell* boxes)
{
   HaloExchange* hh = initHaloExchange(domain);

   hh->loadBuffer = loadForceBuffer;
   hh->unloadBuffer = unloadForceBuffer;
   hh->destroy = destroyForceExchange;

   int size0 = (boxes->gridSize[1])*(boxes->gridSize[2]);
   int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]);
   int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
   int maxSize = MAX(size0, size1);
   maxSize = MAX(size1, size2);
   hh->bufCapacity = (maxSize)*MAXATOMS*sizeof(ForceMsg);

   ForceExchangeParms* parms = comdMalloc(sizeof(ForceExchangeParms));

   parms->nCells[HALO_X_MINUS] = (boxes->gridSize[1]  )*(boxes->gridSize[2]  );
   parms->nCells[HALO_Y_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[2]  );
   parms->nCells[HALO_Z_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
   parms->nCells[HALO_X_PLUS]  = parms->nCells[HALO_X_MINUS];
   parms->nCells[HALO_Y_PLUS]  = parms->nCells[HALO_Y_MINUS];
   parms->nCells[HALO_Z_PLUS]  = parms->nCells[HALO_Z_MINUS];

   for (int ii=0; ii<6; ++ii)
   {
      parms->sendCells[ii] = mkForceSendCellList(boxes, ii, parms->nCells[ii]);
      parms->recvCells[ii] = mkForceRecvCellList(boxes, ii, parms->nCells[ii]);
   }
   
   hh->parms = parms;
   return hh;
}
示例#6
0
/// Make a list of link cells that need to be sent across the specified
/// face.  For each face, the list must include all cells, local and
/// halo, in the first two planes of link cells.  Halo cells must be
/// included in the list of link cells to send since local atoms may
/// have moved from local cells into halo cells on this time step.
/// (Actual remote atoms should have been deleted, so the halo cells
/// should contain only these few atoms that have just crossed.)
/// Sending these atoms will allow them to be reassigned to the task
/// that covers the spatial domain they have moved into.
///
/// Note that link cell grid coordinates range from -1 to gridSize[iAxis].
/// \see initLinkCells for an explanation link cell grid coordinates.
///
/// \param [in] boxes  Link cell information.
/// \param [in] iFace  Index of the face data will be sent across.
/// \param [in] nCells Number of cells to send.  This is used for a
///                    consistency check.
/// \return The list of cells to send.  Caller is responsible to free
/// the list.
int* mkAtomCellList(LinkCell* boxes, enum HaloFaceOrder iFace, const int nCells)
{
   int* list = comdMalloc(nCells*sizeof(int));
   int xBegin = -1;
   int xEnd   = boxes->gridSize[0]+1;
   int yBegin = -1;
   int yEnd   = boxes->gridSize[1]+1;
   int zBegin = -1;
   int zEnd   = boxes->gridSize[2]+1;

   if (iFace == HALO_X_MINUS) xEnd = xBegin+2;
   if (iFace == HALO_X_PLUS)  xBegin = xEnd-2;
   if (iFace == HALO_Y_MINUS) yEnd = yBegin+2;
   if (iFace == HALO_Y_PLUS)  yBegin = yEnd-2;
   if (iFace == HALO_Z_MINUS) zEnd = zBegin+2;
   if (iFace == HALO_Z_PLUS)  zBegin = zEnd-2;

   int count = 0;
   for (int ix=xBegin; ix<xEnd; ++ix)
      for (int iy=yBegin; iy<yEnd; ++iy)
         for (int iz=zBegin; iz<zEnd; ++iz)
            list[count++] = getBoxFromTuple(boxes, ix, iy, iz);
   assert(count == nCells);
   return list;
}
示例#7
0
/// \param [in] xproc x-size of domain decomposition grid.
/// \param [in] yproc y-size of domain decomposition grid.
/// \param [in] zproc z-size of domain decomposition grid.
/// \param [in] globalExtent Size of the simulation domain (in Angstroms).
Domain* initDecomposition(int xproc, int yproc, int zproc, real3 globalExtent)
{
   assert( xproc * yproc * zproc == getNRanks());

   Domain* dd = comdMalloc(sizeof(Domain));
   dd->procGrid[0] = xproc;
   dd->procGrid[1] = yproc;
   dd->procGrid[2] = zproc;
   // calculate grid coordinates i,j,k for this processor
   int myRank = getMyRank();
   dd->procCoord[0] = myRank % dd->procGrid[0];
   myRank /= dd->procGrid[0];
   dd->procCoord[1] = myRank % dd->procGrid[1];
   dd->procCoord[2] = myRank / dd->procGrid[1];

   // initialialize global bounds
   for (int i = 0; i < 3; i++)
   {
      dd->globalMin[i] = 0;
      dd->globalMax[i] = globalExtent[i];
      dd->globalExtent[i] = dd->globalMax[i] - dd->globalMin[i];
   }

   // initialize local bounds on this processor
   for (int i = 0; i < 3; i++)
   {
      dd->localExtent[i] = dd->globalExtent[i] / dd->procGrid[i];
      dd->localMin[i] = dd->globalMin[i] +  dd->procCoord[i]    * dd->localExtent[i];
      dd->localMax[i] = dd->globalMin[i] + (dd->procCoord[i]+1) * dd->localExtent[i];
   }

   return dd;
}
示例#8
0
/// \details
/// When called in proper sequence by redistributeAtoms, the atom halo
/// exchange helps serve three purposes:
/// - Send ghost atom data to neighbor tasks.
/// - Shift atom coordinates by the global simulation size when they cross
///   periodic boundaries.  This shift is performed in loadAtomsBuffer.
/// - Transfer ownership of atoms between tasks as the atoms move across
///   spatial domain boundaries.  This transfer of ownership occurs in
///   two places.  The former owner gives up ownership when
///   updateLinkCells moves a formerly local atom into a halo link cell.
///   The new owner accepts ownership when unloadAtomsBuffer calls
///   putAtomInBox to place a received atom into a local link cell.
///
/// This constructor does the following:
///
/// - Sets the bufCapacity to hold the largest possible number of atoms
///   that can be sent across a face.
/// - Initialize function pointers to the atom-specific versions
/// - Sets the number of link cells to send across each face.
/// - Builds the list of link cells to send across each face.  As
///   explained in the comments for mkAtomCellList, this list must
///   include any link cell, local or halo, that could possibly contain
///   an atom that needs to be sent across the face.  Atoms that need to
///   be sent include "ghost atoms" that are located in local link
///   cells that correspond to halo link cells on receiving tasks as well as
///   formerly local atoms that have just moved into halo link cells and
///   need to be sent to the rank that owns the spatial domain the atom
///   has moved into.
/// - Sets a coordinate shift factor for each face to account for
///   periodic boundary conditions.  For most faces the factor is zero.
///   For faces on the +x, +y, or +z face of the simulation domain
///   the factor is -1.0 (to shift the coordinates by -1 times the
///   simulation domain size).  For -x, -y, and -z faces of the
///   simulation domain, the factor is +1.0.
///
/// \see redistributeAtoms
HaloExchange* initAtomHaloExchange(Domain* domain, LinkCell* boxes)
{
   HaloExchange* hh = initHaloExchange(domain);
   
   int size0 = (boxes->gridSize[1]+2)*(boxes->gridSize[2]+2);
   int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]+2);
   int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
   int maxSize = MAX(size0, size1);
   maxSize = MAX(size1, size2);
   hh->bufCapacity = maxSize*2*MAXATOMS*sizeof(AtomMsg);
   
   hh->loadBuffer = loadAtomsBuffer;
   hh->unloadBuffer = unloadAtomsBuffer;
   hh->destroy = destroyAtomsExchange;

   AtomExchangeParms* parms = comdMalloc(sizeof(AtomExchangeParms));

   parms->nCells[HALO_X_MINUS] = 2*(boxes->gridSize[1]+2)*(boxes->gridSize[2]+2);
   parms->nCells[HALO_Y_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[2]+2);
   parms->nCells[HALO_Z_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
   parms->nCells[HALO_X_PLUS]  = parms->nCells[HALO_X_MINUS];
   parms->nCells[HALO_Y_PLUS]  = parms->nCells[HALO_Y_MINUS];
   parms->nCells[HALO_Z_PLUS]  = parms->nCells[HALO_Z_MINUS];

   for (int ii=0; ii<6; ++ii)
      parms->cellList[ii] = mkAtomCellList(boxes, ii, parms->nCells[ii]);

   for (int ii=0; ii<6; ++ii)
   {
      parms->pbcFactor[ii] = comdMalloc(3*sizeof(real_t));
      for (int jj=0; jj<3; ++jj)
         parms->pbcFactor[ii][jj] = 0.0;
   }
   int* procCoord = domain->procCoord; //alias
   int* procGrid  = domain->procGrid; //alias
   if (procCoord[HALO_X_AXIS] == 0)                       parms->pbcFactor[HALO_X_MINUS][HALO_X_AXIS] = +1.0;
   if (procCoord[HALO_X_AXIS] == procGrid[HALO_X_AXIS]-1) parms->pbcFactor[HALO_X_PLUS][HALO_X_AXIS]  = -1.0;
   if (procCoord[HALO_Y_AXIS] == 0)                       parms->pbcFactor[HALO_Y_MINUS][HALO_Y_AXIS] = +1.0;
   if (procCoord[HALO_Y_AXIS] == procGrid[HALO_Y_AXIS]-1) parms->pbcFactor[HALO_Y_PLUS][HALO_Y_AXIS]  = -1.0;
   if (procCoord[HALO_Z_AXIS] == 0)                       parms->pbcFactor[HALO_Z_MINUS][HALO_Z_AXIS] = +1.0;
   if (procCoord[HALO_Z_AXIS] == procGrid[HALO_Z_AXIS]-1) parms->pbcFactor[HALO_Z_PLUS][HALO_Z_AXIS]  = -1.0;
   
   hh->parms = parms;
   return hh;
}
示例#9
0
/// \details
/// Call functions such as createFccLattice and setTemperature to set up
/// initial atom positions and momenta.
Atoms* initAtoms(LinkCell* boxes)
{
   Atoms* atoms = comdMalloc(sizeof(Atoms));

   int maxTotalAtoms = MAXATOMS*boxes->nTotalBoxes;

   atoms->gid =      (int*)   comdMalloc(maxTotalAtoms*sizeof(int));
   atoms->iSpecies = (int*)   comdMalloc(maxTotalAtoms*sizeof(int));
   atoms->r =        (real3*) comdMalloc(maxTotalAtoms*sizeof(real3));
   atoms->p =        (real3*) comdMalloc(maxTotalAtoms*sizeof(real3));
   atoms->f =        (real3*) comdMalloc(maxTotalAtoms*sizeof(real3));
   atoms->U =        (real_t*)comdMalloc(maxTotalAtoms*sizeof(real_t));

   atoms->nLocal = 0;
   atoms->nGlobal = 0;

   for (int iOff = 0; iOff < maxTotalAtoms; iOff++)
   {
      atoms->gid[iOff] = 0;
      atoms->iSpecies[iOff] = 0;
      zeroReal3(atoms->r[iOff]);
      zeroReal3(atoms->p[iOff]);
      zeroReal3(atoms->f[iOff]);
      atoms->U[iOff] = 0.;
   }

   return atoms;
}
示例#10
0
文件: CoMD.c 项目: shamouda/ocr-apps
SpeciesData* initSpecies(BasePotential* pot)
{
   SpeciesData* species = comdMalloc(sizeof(SpeciesData));

   strcpy(species->name, pot->name);
   species->atomicNo = pot->atomicNo;
   species->mass = pot->mass;

   return species;
}
示例#11
0
文件: CoMD.c 项目: shamouda/ocr-apps
/// Initialized the main CoMD data stucture, SimFlat, based on command
/// line input from the user.  Also performs certain sanity checks on
/// the input to screen out certain non-sensical inputs.
///
/// Simple data members such as the time step dt are initialized
/// directly, substructures such as the potential, the link cells, the
/// atoms, etc., are initialized by calling additional initialization
/// functions (initPotential(), initLinkCells(), initAtoms(), etc.).
/// Initialization order is set by the natural dependencies of the
/// substructure such as the atoms need the link cells so the link cells
/// must be initialized before the atoms.
SimFlat* initSimulation(Command cmd)
{
   SimFlat* sim = comdMalloc(sizeof(SimFlat));
   sim->nSteps = cmd.nSteps;
   sim->printRate = cmd.printRate;
   sim->dt = cmd.dt;
   sim->domain = NULL;
   sim->boxes = NULL;
   sim->atoms = NULL;
   sim->ePotential = 0.0;
   sim->eKinetic = 0.0;
   sim->atomExchange = NULL;

   sim->pot = initPotential(cmd.doeam, cmd.potDir, cmd.potName, cmd.potType);
   real_t latticeConstant = cmd.lat;
   if (cmd.lat < 0.0)
      latticeConstant = sim->pot->lat;

   // ensure input parameters make sense.
   sanityChecks(cmd, sim->pot->cutoff, latticeConstant, sim->pot->latticeType);

   sim->species = initSpecies(sim->pot);

   real3 globalExtent;
   globalExtent[0] = cmd.nx * latticeConstant;
   globalExtent[1] = cmd.ny * latticeConstant;
   globalExtent[2] = cmd.nz * latticeConstant;

   sim->domain = initDecomposition(
      cmd.xproc, cmd.yproc, cmd.zproc, globalExtent);

   sim->boxes = initLinkCells(sim->domain, sim->pot->cutoff);
   sim->atoms = initAtoms(sim->boxes);

   // create lattice with desired temperature and displacement.
   createFccLattice(cmd.nx, cmd.ny, cmd.nz, latticeConstant, sim);
   setTemperature(sim, cmd.temperature);
   randomDisplacements(sim, cmd.initialDelta);

   sim->atomExchange = initAtomHaloExchange(sim->domain, sim->boxes);

   // Forces must be computed before we call the time stepper.
   startTimer(redistributeTimer);
   redistributeAtoms(sim);
   stopTimer(redistributeTimer);

   startTimer(computeForceTimer);
   computeForce(sim);
   stopTimer(computeForceTimer);

   kineticEnergy(sim);

   return sim;
}
示例#12
0
/// In CoMD 1.1, atoms are stored in link cells.  Link cells are widely
/// used in classical MD to avoid an O(N^2) search for atoms that
/// interact.  Link cells are formed by subdividing the local spatial
/// domain with a Cartesian grid where the grid spacing in each
/// direction is at least as big as he potential's cutoff distance.
/// Because atoms don't interact beyond the potential cutoff, for an
/// atom iAtom in any given link cell, we can be certain that all atoms
/// that interact with iAtom are contained in the same link cell, or one
/// of the 26 neighboring link cells.
/// 
/// CoMD chooses the link cell size (boxSize) on each axis to be the
/// shortest possible distance, longer than cutoff, such that the local
/// domain size divided by boxSize is an integer.  I.e., the link cells
/// are commensurate with with the local domain size.  While this does
/// not result in the smallest possible link cells, it does allow us to
/// keep a strict separation between the link cells that are entirely
/// inside the local domain and those that represent halo regions.
///
/// The number of local link cells in each direction is stored in
/// gridSize.  Local link cells have 3D grid coordinates (ix, iy, iz)
/// where ix, iy, and iz can range from 0 to gridSize[iAxis]-1,
/// whiere iAxis is 0 for x, 1 for y and 2 for the z direction.  The
/// number of local link cells is thus nLocalBoxes =
/// gridSize[0]*gridSize[1]*gridSize[2].
///
/// The local link cells are surrounded by one complete shell of halo
/// link cells.  The halo cells provide temporary storage for halo or
/// "ghost" atoms that belong to other tasks, but whose coordinates are
/// needed locally to complete the force calculation.  Halo link cells
/// have at least one coordinate with a value of either -1 or
/// gridSize[iAxis].
///
/// Because CoMD stores data in ordinary 1D C arrays, a mapping is
/// needed from the 3D grid coords to a 1D array index.  For the local
/// cells we use the conventional mapping ix + iy*nx + iz*nx*ny.  This
/// keeps all of the local cells in a contiguous region of memory
/// starting from the beginning of any relevant array and makes it easy
/// to iterate the local cells in a single loop.  Halo cells are mapped
/// differently.  After the local cells, the two planes of link cells
/// that are face neighbors with local cells across the -x or +x axis
/// are next.  These are followed by face neighbors across the -y and +y
/// axis (including cells that are y-face neighbors with an x-plane of
/// halo cells), followed by all remaining cells in the -z and +z planes
/// of halo cells.  The total number of link cells (on each rank) is
/// nTotalBoxes.
///
/// Data storage arrays that are used in association with link cells
/// should be allocated to store nTotalBoxes*MAXATOMS items.  Data for
/// the first atom in linkCell iBox is stored at index iBox*MAXATOMS.
/// Data for subsequent atoms in the same link cell are stored
/// sequentially, and the number of atoms in link cell iBox is
/// nAtoms[iBox].
///
/// \see getBoxFromTuple is the 3D->1D mapping for link cell indices.
/// \see getTuple is the 1D->3D mapping
///
/// \param [in] cutoff The cutoff distance of the potential.
LinkCell* initLinkCells(const Domain* domain, real_t cutoff)
{
   assert(domain);
   LinkCell* ll = comdMalloc(sizeof(LinkCell));

   for (int i = 0; i < 3; i++)
   {
      ll->localMin[i] = domain->localMin[i];
      ll->localMax[i] = domain->localMax[i];
      ll->gridSize[i] = domain->localExtent[i] / cutoff; // local number of boxes
      ll->boxSize[i] = domain->localExtent[i] / ((real_t) ll->gridSize[i]);
      ll->invBoxSize[i] = 1.0/ll->boxSize[i];
   }

   ll->nInnerBoxes = (ll->gridSize[0]-2) * (ll->gridSize[1]-2) * (ll->gridSize[2]-2);

   ll->nLocalBoxes = ll->gridSize[0] * ll->gridSize[1] * ll->gridSize[2];
   
   ll->nHaloBoxes = 2 * ((ll->gridSize[0] + 2) *
                         (ll->gridSize[1] + ll->gridSize[2] + 2) +
                         (ll->gridSize[1] * ll->gridSize[2]));

   printf ("Number of boxes: %d, %d, %d\n", ll->nInnerBoxes, ll->nLocalBoxes - ll->nInnerBoxes, ll->nHaloBoxes);

   ll->nTotalBoxes = ll->nLocalBoxes + ll->nHaloBoxes;
   
   ll->nAtoms = comdMalloc(ll->nTotalBoxes*sizeof(int));
   for (int iBox=0; iBox<ll->nTotalBoxes; ++iBox)
      ll->nAtoms[iBox] = 0;

   assert ( (ll->gridSize[0] >= 2) && (ll->gridSize[1] >= 2) && (ll->gridSize[2] >= 2) );
   // debug test for box allocation
   for (int iBox = 0; iBox < ll->nTotalBoxes; iBox++)
   {
      int ix, iy, iz;
      getTuple(ll, iBox, &ix, &iy, &iz);
      //printf("Box %d is located at [%d, %d, %d]\n", iBox, ix, iy, iz);
   }
   return ll;
}
示例#13
0
LinkCell* initLinkCells(const Domain* domain, real_t cutoff)
{
    assert(domain);
    LinkCell* ll = (LinkCell*)comdMalloc(sizeof(LinkCell));

    for (int i = 0; i < 3; i++) {
        ll->localMin[i] = domain->localMin[i];
        ll->localMax[i] = domain->localMax[i];
        ll->gridSize[i] = domain->localExtent[i] / cutoff; // local number of boxes
        ll->boxSize[i] = domain->localExtent[i] / ((real_t) ll->gridSize[i]);
        ll->invBoxSize[i] = 1.0/ll->boxSize[i];
    }

    ll->nLocalBoxes = ll->gridSize[0] * ll->gridSize[1] * ll->gridSize[2];

    ll->nHaloBoxes = 2 * ((ll->gridSize[0] + 2) *
            (ll->gridSize[1] + ll->gridSize[2] + 2) +
            (ll->gridSize[1] * ll->gridSize[2]));

    ll->nTotalBoxes = ll->nLocalBoxes + ll->nHaloBoxes;

    ll->nAtoms = (int*)comdMalloc(ll->nLocalBoxes*sizeof(int));
    for (int iBox=0; iBox<ll->nLocalBoxes; ++iBox) {
        ll->nAtoms[iBox] = 0;
    }

    assert ( (ll->gridSize[0] >= 2) && (ll->gridSize[1] >= 2) && (ll->gridSize[2] >= 2) );

    ll->nbrBoxes = (int**)comdMalloc(ll->nLocalBoxes*sizeof(int*));
    for (int iBox=0; iBox<ll->nLocalBoxes; ++iBox) {
        ll->nbrBoxes[iBox] = (int*)comdMalloc(27*sizeof(int));
    }

    for(int iBox=0; iBox<ll->nLocalBoxes; ++iBox) {
        getLocalNeighborBoxes(ll, iBox, ll->nbrBoxes[iBox]);
    }

    return ll;
}
示例#14
0
/// Base class constructor.
HaloExchange* initHaloExchange(Domain* domain)
{
   HaloExchange* hh = comdMalloc(sizeof(HaloExchange));

   // Rank of neighbor task for each face.
   hh->nbrRank[HALO_X_MINUS] = processorNum(domain, -1,  0,  0);
   hh->nbrRank[HALO_X_PLUS]  = processorNum(domain, +1,  0,  0);
   hh->nbrRank[HALO_Y_MINUS] = processorNum(domain,  0, -1,  0);
   hh->nbrRank[HALO_Y_PLUS]  = processorNum(domain,  0, +1,  0);
   hh->nbrRank[HALO_Z_MINUS] = processorNum(domain,  0,  0, -1);
   hh->nbrRank[HALO_Z_PLUS]  = processorNum(domain,  0,  0, +1);
   hh->bufCapacity = 0; // will be set by sub-class.

   return hh;
}
示例#15
0
文件: CoMD.c 项目: shamouda/ocr-apps
Validate* initValidate(SimFlat* sim)
{
   sumAtoms(sim);
   Validate* val = comdMalloc(sizeof(Validate));
   val->eTot0 = (sim->ePotential + sim->eKinetic) / sim->atoms->nGlobal;
   val->nAtoms0 = sim->atoms->nGlobal;

   if (printRank())
   {
      fprintf(screenOut, "\n");
      printSeparator(screenOut);
      fprintf(screenOut, "Initial energy : %14.12f, atom count : %d \n",
            val->eTot0, val->nAtoms0);
      fprintf(screenOut, "\n");
   }
   return val;
}
示例#16
0
/// Initialize an Lennard Jones potential for Copper.
BasePotential* initLjPot(void)
{
   LjPotential *pot = (LjPotential*)comdMalloc(sizeof(LjPotential));
   pot->force = ljForce;
   pot->print = ljPrint;
   pot->destroy = ljDestroy;
   pot->sigma = 2.315;	                  // Angstrom
   pot->epsilon = 0.167;                  // eV
   pot->mass = 63.55 * amuToInternalMass; // Atomic Mass Units (amu)

   pot->lat = 3.615;                      // Equilibrium lattice const in Angs
   strcpy(pot->latticeType, "FCC");       // lattice type, i.e. FCC, BCC, etc.
   pot->cutoff = 2.5*pot->sigma;          // Potential cutoff in Angs

   strcpy(pot->name, "Cu");
   pot->atomicNo = 29;

   return (BasePotential*) pot;
}
示例#17
0
/// Make a list of link cells that need to receive data across the
/// specified face.  Note that this list must be compatible with the
/// corresponding send list to ensure that the data goes to the correct
/// atoms.
///
/// \see initLinkCells for information about the conventions for grid
/// coordinates of link cells.
int* mkForceRecvCellList(LinkCell* boxes, int face, int nCells)
{
   int* list = comdMalloc(nCells*sizeof(int));
   int xBegin, xEnd, yBegin, yEnd, zBegin, zEnd;

   int nx = boxes->gridSize[0];
   int ny = boxes->gridSize[1];
   int nz = boxes->gridSize[2];
   switch(face)
   {
     case HALO_X_MINUS:
      xBegin=-1; xEnd=0;    yBegin=0;  yEnd=ny;   zBegin=0;  zEnd=nz;
      break;
     case HALO_X_PLUS:
      xBegin=nx; xEnd=nx+1; yBegin=0;  yEnd=ny;   zBegin=0;  zEnd=nz;
      break;
     case HALO_Y_MINUS:
      xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=0;    zBegin=0;  zEnd=nz;
      break;
     case HALO_Y_PLUS:
      xBegin=-1; xEnd=nx+1; yBegin=ny; yEnd=ny+1; zBegin=0;  zEnd=nz;
      break;
     case HALO_Z_MINUS:
      xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=ny+1; zBegin=-1; zEnd=0;
      break;
     case HALO_Z_PLUS:
      xBegin=-1; xEnd=nx+1; yBegin=-1; yEnd=ny+1; zBegin=nz; zEnd=nz+1;
      break;
     default:
      assert(1==0);
   }
   
   int count = 0;
   for (int ix=xBegin; ix<xEnd; ++ix)
      for (int iy=yBegin; iy<yEnd; ++iy)
         for (int iz=zBegin; iz<zEnd; ++iz)
            list[count++] = getBoxFromTuple(boxes, ix, iy, iz);
   
   assert(count == nCells);
   return list;
}
示例#18
0
文件: eam.c 项目: kempj/CoMD-tasking
/// Builds a structure to store interpolation data for a tabular
/// function.  Interpolation must be supported on the range
/// \f$[x_0, x_n]\f$, where \f$x_n = n*dx\f$.
///
/// \see interpolate
/// \see bcastInterpolationObject
/// \see destroyInterpolationObject
///
/// \param [in] n    number of values in the table.
/// \param [in] x0   minimum ordinate value of the table.
/// \param [in] dx   spacing of the ordinate values.
/// \param [in] data abscissa values.  An array of size n. 
InterpolationObject* initInterpolationObject(
   int n, real_t x0, real_t dx, real_t* data)
{
   InterpolationObject* table =
      (InterpolationObject *)comdMalloc(sizeof(InterpolationObject)) ;
   assert(table);

   table->values = (real_t*)comdCalloc(1, (n+3)*sizeof(real_t));
   assert(table->values);

   table->values++; 
   table->n = n;
   table->invDx = 1.0/dx;
   table->x0 = x0;

   for (int ii=0; ii<n; ++ii)
      table->values[ii] = data[ii];
   
   table->values[-1] = table->values[0];
   table->values[n+1] = table->values[n] = table->values[n-1];

   return table;
}
示例#19
0
/// Calculate potential energy and forces for the EAM potential.
///
/// Three steps are required:
///
///   -# Loop over all atoms and their neighbors, compute the two-body
///   interaction and the electron density at each atom
///   -# Loop over all atoms, compute the embedding energy and its
///   derivative for each atom
///   -# Loop over all atoms and their neighbors, compute the embedding
///   energy contribution to the force and add to the two-body force
/// 
int eamForce(SimFlat* s)
{
   EamPotential* pot = (EamPotential*) s->pot;
   assert(pot);

   // set up halo exchange and internal storage on first call to forces.
   if (pot->forceExchange == NULL)
   {
      int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes;
      pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->rhobar  = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->forceExchange = initForceHaloExchange(s->domain, s->boxes);
      pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData));
      pot->forceExchangeData->dfEmbed = pot->dfEmbed;
      pot->forceExchangeData->boxes = s->boxes;
   }
   
   real_t rCut2 = pot->cutoff*pot->cutoff;

   // zero forces / energy / rho /rhoprime
   real_t etot = 0.0;
   memset(s->atoms->f,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real3));
   memset(s->atoms->U,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->dfEmbed, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->rhobar,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));

   // virial stress computation added here
   for (int m = 0;m<9;m++) 
   {
      s->defInfo->stress[m] = 0.0;
   }

   int nbrBoxes[27];
   // loop over local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox = s->boxes->nAtoms[iBox];
      int nNbrBoxes = getNeighborBoxes(s->boxes, iBox, nbrBoxes);
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = nbrBoxes[jTmp];
         if (jBox < iBox ) continue;

         int nJBox = s->boxes->nAtoms[jBox];
         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++)
            {
               if ( (iBox==jBox) &&(ij <= ii) ) continue;

               double r2 = 0.0;
               real3 dr;
               for (int k=0; k<3; k++)
               {
                  dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k];
                  r2+=dr[k]*dr[k];
               }
               if(r2>rCut2) continue;

               double r = sqrt(r2);

               real_t phiTmp, dPhi, rhoTmp, dRho;
               interpolate(pot->phi, r, &phiTmp, &dPhi);
               interpolate(pot->rho, r, &rhoTmp, &dRho);

               for (int k=0; k<3; k++)
               {
                  s->atoms->f[iOff][k] -= dPhi*dr[k]/r;
                  s->atoms->f[jOff][k] += dPhi*dr[k]/r;
               }

               for (int i=0; i<3; i++)
               {
                  for (int j=0; j<3; j++)
                  {
                     int m = 3*i + j;
                     s->defInfo->stress[m] += 1.0*dPhi*dr[i]*dr[j]/r;
                  }
               }

               // update energy terms
               // calculate energy contribution based on whether
               // the neighbor box is local or remote
               if (jBox < s->boxes->nLocalBoxes)
                  etot += phiTmp;
               else
                  etot += 0.5*phiTmp;

               s->atoms->U[iOff] += 0.5*phiTmp;
               s->atoms->U[jOff] += 0.5*phiTmp;

               // accumulate rhobar for each atom
               pot->rhobar[iOff] += rhoTmp;
               pot->rhobar[jOff] += rhoTmp;

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   // Compute Embedding Energy
   // loop over all local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int iOff;
      int nIBox =  s->boxes->nAtoms[iBox];

      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {
         real_t fEmbed, dfEmbed;
         interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed);
         pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange
         etot += fEmbed; 
         s->atoms->U[iOff] += fEmbed;
         int iSpecies = s->atoms->iSpecies[iOff];
         real_t invMass = 1.0/s->species[iSpecies].mass;
         for (int i=0; i<3; i++)
         {
            for (int j=0; j<3; j++)
            {
               int m = 3*i + j;
               s->defInfo->stress[m] -= s->atoms->p[iOff][i]*s->atoms->p[iOff][j]*invMass;
            }
         }
      }
   }

   // exchange derivative of the embedding energy with repsect to rhobar
   startTimer(eamHaloTimer);
   haloExchange(pot->forceExchange, pot->forceExchangeData);
   stopTimer(eamHaloTimer);

   // third pass
   // loop over local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox =  s->boxes->nAtoms[iBox];
      int nNbrBoxes = getNeighborBoxes(s->boxes, iBox, nbrBoxes);
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = nbrBoxes[jTmp];
         if(jBox < iBox) continue;

         int nJBox = s->boxes->nAtoms[jBox];
         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++)
            { 
               if ((iBox==jBox) && (ij <= ii))  continue;

               double r2 = 0.0;
               real3 dr;
               for (int k=0; k<3; k++)
               {
                  dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k];
                  r2+=dr[k]*dr[k];
               }
               if(r2>=rCut2) continue;

               real_t r = sqrt(r2);

               real_t rhoTmp, dRho;
               interpolate(pot->rho, r, &rhoTmp, &dRho);

               for (int k=0; k<3; k++)
               {
                  s->atoms->f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
                  s->atoms->f[jOff][k] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
               }

               for (int i=0; i<3; i++)
               {
                  for (int j=0; j<3; j++)
                  {
                     int m = 3*i + j;
                     s->defInfo->stress[m] += 1.0*(pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[i]*dr[j]/r;
                  }
               }

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   s->ePotential = (real_t) etot;

   for (int m = 0;m<9;m++) 
   {
      s->defInfo->stress[m] = s->defInfo->stress[m]/s->defInfo->globalVolume;
   }

   return 0;
}
示例#20
0
文件: eam.c 项目: kempj/CoMD-tasking
/// Reads potential data from a funcfl file and populates
/// corresponding members and InterpolationObjects in an EamPotential.
/// 
/// funcfl is a file format for tabulated potential functions used by
/// the original EAM code DYNAMO.  A funcfl file contains an EAM
/// potential for a single element.
/// 
/// The contents of a funcfl file are:
///
/// | Line Num | Description
/// | :------: | :----------
/// | 1        | comments
/// | 2        | elem amass latConstant latType
/// | 3        | nrho   drho   nr   dr    rcutoff
/// | 4        | embedding function values F(rhobar) starting at rhobar=0
/// |    ...   | (nrho values. Multiple values per line allowed.)
/// | x'       | electrostatic interation Z(r) starting at r=0
/// |    ...   | (nr values. Multiple values per line allowed.)
/// | y'       | electron density values rho(r) starting at r=0
/// |    ...   | (nr values. Multiple values per line allowed.)
///
/// Where:
///    -  elem          :   atomic number for this element
///    -  amass         :   atomic mass for this element in AMU
///    -  latConstant   :   lattice constant for this elemnent in Angstroms
///    -  lattticeType  :   lattice type for this element (e.g. FCC) 
///    -  nrho          :   number of values for the embedding function, F(rhobar)
///    -  drho          :   table spacing for rhobar
///    -  nr            :   number of values for Z(r) and rho(r)
///    -  dr            :   table spacing for r in Angstroms
///    -  rcutoff       :   potential cut-off distance in Angstroms
///
/// funcfl format stores the "electrostatic interation" Z(r).  This needs to
/// be converted to the pair potential phi(r).
/// using the formula 
/// \f[phi = Z(r) * Z(r) / r\f]
/// NB: phi is not defined for r = 0
///
/// Z(r) is in atomic units (i.e., sqrt[Hartree * bohr]) so it is
/// necesary to convert to eV.
///
/// F(rhobar) is in eV.
///
void eamReadFuncfl(EamPotential* pot, const char* dir, const char* potName)
{
   char tmp[4096];

   sprintf(tmp, "%s/%s", dir, potName);
   FILE* potFile = fopen(tmp, "r");
   if (potFile == NULL)
      fileNotFound("eamReadFuncfl", tmp);

   // line 1
   fgets(tmp, sizeof(tmp), potFile);
   char name[3];
   sscanf(tmp, "%s", name);
   strcpy(pot->name, name);

   // line 2
   int nAtomic;
   double mass, lat;
   char latticeType[8];
   fgets(tmp,sizeof(tmp),potFile);
   sscanf(tmp, "%d %le %le %s", &nAtomic, &mass, &lat, latticeType);
   pot->atomicNo = nAtomic;
   pot->lat = lat;
   pot->mass = mass*amuToInternalMass; // file has mass in AMU.
   strcpy(pot->latticeType, latticeType);

   // line 3
   int nRho, nR;
   double dRho, dR, cutoff;
   fgets(tmp,sizeof(tmp),potFile);
   sscanf(tmp, "%d %le %d %le %le", &nRho, &dRho, &nR, &dR, &cutoff);
   pot->cutoff = cutoff;
   real_t x0 = 0.0; // tables start at zero.

   // allocate read buffer
   int bufSize = MAX(nRho, nR);
   real_t* buf = comdMalloc(bufSize * sizeof(real_t));

   // read embedding energy
   for (int ii=0; ii<nRho; ++ii)
      fscanf(potFile, FMT1, buf+ii);
   pot->f = initInterpolationObject(nRho, x0, dRho, buf);

   // read Z(r) and convert to phi(r)
   for (int ii=0; ii<nR; ++ii)
      fscanf(potFile, FMT1, buf+ii);
   for (int ii=1; ii<nR; ++ii)
   {
      real_t r = x0 + ii*dR;
      buf[ii] *= buf[ii] / r;
      buf[ii] *= hartreeToEv * bohrToAngs; // convert to eV
   }
   buf[0] = buf[1] + (buf[1] - buf[2]); // linear interpolation to get phi[0].
   pot->phi = initInterpolationObject(nR, x0, dR, buf);

   // read electron density rho
   for (int ii=0; ii<nR; ++ii)
      fscanf(potFile, FMT1, buf+ii);
   pot->rho = initInterpolationObject(nR, x0, dR, buf);

   comdFree(buf);
   
/*    printPot(pot->f,   "funcflDataF.txt"); */
/*    printPot(pot->rho, "funcflDataRho.txt"); */
/*    printPot(pot->phi, "funcflDataPhi.txt"); */
}
示例#21
0
文件: eam.c 项目: kempj/CoMD-tasking
/// Reads potential data from a setfl file and populates
/// corresponding members and InterpolationObjects in an EamPotential.
///
/// setfl is a file format for tabulated potential functions used by
/// the original EAM code DYNAMO.  A setfl file contains EAM
/// potentials for multiple elements.
///
/// The contents of a setfl file are:
///
/// | Line Num | Description
/// | :------: | :----------
/// | 1 - 3    | comments
/// | 4        | ntypes type1 type2 ... typen
/// | 5        | nrho     drho     nr   dr   rcutoff
/// | F, rho   | Following line 5 there is a block for each atom type with F, and rho.
/// | b1       | ielem(i)   amass(i)     latConst(i)    latType(i)
/// | b2       | embedding function values F(rhobar) starting at rhobar=0
/// |   ...    | (nrho values. Multiple values per line allowed.)
/// | bn       | electron density, starting at r=0
/// |   ...    | (nr values. Multiple values per line allowed.)
/// | repeat   | Return to b1 for each atom type.
/// | phi      | phi_ij for (1,1), (2,1), (2,2), (3,1), (3,2), (3,3), (4,1), ..., 
/// | p1       | pair potential between type i and type j, starting at r=0
/// |   ...    | (nr values. Multiple values per line allowed.)
/// | repeat   | Return to p1 for each phi_ij
///
/// Where:
///    -  ntypes        :      number of element types in the potential  
///    -  nrho          :      number of points the embedding energy F(rhobar)
///    -  drho          :      table spacing for rhobar 
///    -  nr            :      number of points for rho(r) and phi(r)
///    -  dr            :      table spacing for r in Angstroms
///    -  rcutoff       :      cut-off distance in Angstroms
///    -  ielem(i)      :      atomic number for element(i)
///    -  amass(i)      :      atomic mass for element(i) in AMU
///    -  latConst(i)   :      lattice constant for element(i) in Angstroms
///    -  latType(i)    :      lattice type for element(i)  
///
/// setfl format stores r*phi(r), so we need to converted to the pair
/// potential phi(r).  In the file, phi(r)*r is in eV*Angstroms.
/// NB: phi is not defined for r = 0
///
/// F(rhobar) is in eV.
///
void eamReadSetfl(EamPotential* pot, const char* dir, const char* potName)
{
   char tmp[4096];
   sprintf(tmp, "%s/%s", dir, potName);

   FILE* potFile = fopen(tmp, "r");
   if (potFile == NULL)
      fileNotFound("eamReadSetfl", tmp);
   
   // read the first 3 lines (comments)
   fgets(tmp, sizeof(tmp), potFile);
   fgets(tmp, sizeof(tmp), potFile);
   fgets(tmp, sizeof(tmp), potFile);

   // line 4
   fgets(tmp, sizeof(tmp), potFile);
   int nElems;
   sscanf(tmp, "%d", &nElems);
   if( nElems != 1 )
      notAlloyReady("eamReadSetfl");

   //line 5
   int nRho, nR;
   double dRho, dR, cutoff;
   //  The same cutoff is used by all alloys, NB: cutoff = nR * dR is redundant
   fgets(tmp, sizeof(tmp), potFile);
   sscanf(tmp, "%d %le %d %le %le", &nRho, &dRho, &nR, &dR, &cutoff);
   pot->cutoff = cutoff;

   // **** THIS CODE IS RESTRICTED TO ONE ELEMENT
   // Per-atom header 
   fgets(tmp, sizeof(tmp), potFile);
   int nAtomic;
   double mass, lat;
   char latticeType[8];
   sscanf(tmp, "%d %le %le %s", &nAtomic, &mass, &lat, latticeType);
   pot->atomicNo = nAtomic;
   pot->lat = lat;
   pot->mass = mass * amuToInternalMass;  // file has mass in AMU.
   strcpy(pot->latticeType, latticeType);
   
   // allocate read buffer
   int bufSize = MAX(nRho, nR);
   real_t* buf = comdMalloc(bufSize * sizeof(real_t));
   real_t x0 = 0.0;

   // Read embedding energy F(rhobar)
   for (int ii=0; ii<nRho; ++ii)
      fscanf(potFile, FMT1, buf+ii);
   pot->f = initInterpolationObject(nRho, x0, dRho, buf);

   // Read electron density rho(r)
   for (int ii=0; ii<nR; ++ii)
      fscanf(potFile, FMT1, buf+ii);
   pot->rho = initInterpolationObject(nR, x0, dR, buf);

   // Read phi(r)*r and convert to phi(r)
   for (int ii=0; ii<nR; ++ii)
      fscanf(potFile, FMT1, buf+ii);
   for (int ii=1; ii<nR; ++ii)
   {
      real_t r = x0 + ii*dR;
      buf[ii] /= r;
   }
   buf[0] = buf[1] + (buf[1] - buf[2]); // Linear interpolation to get phi[0].
   pot->phi = initInterpolationObject(nR, x0, dR, buf);

   comdFree(buf);

   // write to text file for comparison, currently commented out
/*    printPot(pot->f, "SetflDataF.txt"); */
/*    printPot(pot->rho, "SetflDataRho.txt"); */
/*    printPot(pot->phi, "SetflDataPhi.txt");  */
}
示例#22
0
/// \details
/// When called in proper sequence by redistributeAtoms, the atom halo
/// exchange helps serve three purposes:
/// - Send ghost atom data to neighbor tasks.
/// - Shift atom coordinates by the global simulation size when they cross
///   periodic boundaries.  This shift is performed in loadAtomsBuffer.
/// - Transfer ownership of atoms between tasks as the atoms move across
///   spatial domain boundaries.  This transfer of ownership occurs in
///   two places.  The former owner gives up ownership when
///   updateLinkCells moves a formerly local atom into a halo link cell.
///   The new owner accepts ownership when unloadAtomsBuffer calls
///   putAtomInBox to place a received atom into a local link cell.
///
/// This constructor does the following:
///
/// - Sets the bufCapacity to hold the largest possible number of atoms
///   that can be sent across a face.
/// - Initialize function pointers to the atom-specific versions
/// - Sets the number of link cells to send across each face.
/// - Builds the list of link cells to send across each face.  As
///   explained in the comments for mkAtomCellList, this list must
///   include any link cell, local or halo, that could possibly contain
///   an atom that needs to be sent across the face.  Atoms that need to
///   be sent include "ghost atoms" that are located in local link
///   cells that correspond to halo link cells on receiving tasks as well as
///   formerly local atoms that have just moved into halo link cells and
///   need to be sent to the rank that owns the spatial domain the atom
///   has moved into.
/// - Sets a coordinate shift factor for each face to account for
///   periodic boundary conditions.  For most faces the factor is zero.
///   For faces on the +x, +y, or +z face of the simulation domain
///   the factor is -1.0 (to shift the coordinates by -1 times the
///   simulation domain size).  For -x, -y, and -z faces of the
///   simulation domain, the factor is +1.0.
///
/// \see redistributeAtoms
HaloExchange* initAtomHaloExchange(Domain* domain, LinkCell* boxes)
{
   HaloExchange* hh = initHaloExchange(domain);
   
   int size0 = (boxes->gridSize[1]+2)*(boxes->gridSize[2]+2);
   int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]+2);
   int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
   int maxSize = MAX(size0, size1);
   maxSize = MAX(size1, size2);
   hh->bufCapacity = maxSize*2*MAXATOMS*sizeof(AtomMsg);
   
   hh->sendBufM = (char*)comdMalloc(hh->bufCapacity);
   hh->sendBufP = (char*)comdMalloc(hh->bufCapacity);
   hh->recvBufP = (char*)comdMalloc(hh->bufCapacity);
   hh->recvBufM = (char*)comdMalloc(hh->bufCapacity);

   // pin memory
   cudaHostRegister(hh->sendBufM, hh->bufCapacity, 0);
   cudaHostRegister(hh->sendBufP, hh->bufCapacity, 0);
   cudaHostRegister(hh->recvBufP, hh->bufCapacity, 0);
   cudaHostRegister(hh->recvBufM, hh->bufCapacity, 0);

   hh->loadBuffer = loadAtomsBuffer;
   hh->unloadBuffer = unloadAtomsBuffer;
   hh->destroy = destroyAtomsExchange;

   hh->hashTable = initHashTable((boxes->nTotalBoxes - boxes->nLocalBoxes) * MAXATOMS * 2);

   AtomExchangeParms* parms = (AtomExchangeParms*)comdMalloc(sizeof(AtomExchangeParms));

   parms->nCells[HALO_X_MINUS] = 2*(boxes->gridSize[1]+2)*(boxes->gridSize[2]+2);
   parms->nCells[HALO_Y_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[2]+2);
   parms->nCells[HALO_Z_MINUS] = 2*(boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
   parms->nCells[HALO_X_PLUS]  = parms->nCells[HALO_X_MINUS];
   parms->nCells[HALO_Y_PLUS]  = parms->nCells[HALO_Y_MINUS];
   parms->nCells[HALO_Z_PLUS]  = parms->nCells[HALO_Z_MINUS];

   for (int ii=0; ii<6; ++ii) {
      parms->cellList[ii] = mkAtomCellList(boxes, (enum HaloFaceOrder)ii, parms->nCells[ii]);
	  
      // copy cell list to gpu
      cudaMalloc((void**)&parms->cellListGpu[ii], parms->nCells[ii] * sizeof(int));
      cudaMemcpy(parms->cellListGpu[ii], parms->cellList[ii], parms->nCells[ii] * sizeof(int), cudaMemcpyHostToDevice);
  
   }
   // allocate scan buf
   int size = boxes->nLocalBoxes+1;
   if (size % 256 != 0) size = ((size + 255)/256)*256;

   int partial_size = size/256 + 1;
   if (partial_size % 256 != 0) partial_size = ((partial_size + 255)/256)*256;

   cudaMalloc((void**)&parms->d_natoms_buf, size * sizeof(int));
   parms->h_natoms_buf = (int*) malloc( size * sizeof(int));
   cudaMalloc((void**)&parms->d_partial_sums, partial_size * sizeof(int));

   for (int ii=0; ii<6; ++ii)
   {
      parms->pbcFactor[ii] = (real_t*)comdMalloc(3*sizeof(real_t));
      for (int jj=0; jj<3; ++jj)
         parms->pbcFactor[ii][jj] = 0.0;
   }
   int* procCoord = domain->procCoord; //alias
   int* procGrid  = domain->procGrid; //alias
   if (procCoord[HALO_X_AXIS] == 0)                       parms->pbcFactor[HALO_X_MINUS][HALO_X_AXIS] = +1.0;
   if (procCoord[HALO_X_AXIS] == procGrid[HALO_X_AXIS]-1) parms->pbcFactor[HALO_X_PLUS][HALO_X_AXIS]  = -1.0;
   if (procCoord[HALO_Y_AXIS] == 0)                       parms->pbcFactor[HALO_Y_MINUS][HALO_Y_AXIS] = +1.0;
   if (procCoord[HALO_Y_AXIS] == procGrid[HALO_Y_AXIS]-1) parms->pbcFactor[HALO_Y_PLUS][HALO_Y_AXIS]  = -1.0;
   if (procCoord[HALO_Z_AXIS] == 0)                       parms->pbcFactor[HALO_Z_MINUS][HALO_Z_AXIS] = +1.0;
   if (procCoord[HALO_Z_AXIS] == procGrid[HALO_Z_AXIS]-1) parms->pbcFactor[HALO_Z_PLUS][HALO_Z_AXIS]  = -1.0;
   
   hh->type = 0;
   hh->parms = parms;
   return hh;
}
示例#23
0
文件: eam.c 项目: kempj/CoMD-tasking
/// Calculate potential energy and forces for the EAM potential.
///
/// Three steps are required:
///
///   -# Loop over all atoms and their neighbors, compute the two-body
///   interaction and the electron density at each atom
///   -# Loop over all atoms, compute the embedding energy and its
///   derivative for each atom
///   -# Loop over all atoms and their neighbors, compute the embedding
///   energy contribution to the force and add to the two-body force
/// 
int eamForce(SimFlat* s)
{
   EamPotential* pot = (EamPotential*) s->pot;
   assert(pot);

   // set up halo exchange and internal storage on first call to forces.
   if (pot->forceExchange == NULL)
   {
      int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes;
      pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->rhobar  = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->forceExchange = initForceHaloExchange(s->domain, s->boxes);
      pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData));
      pot->forceExchangeData->dfEmbed = pot->dfEmbed;
      pot->forceExchangeData->boxes = s->boxes;
   }
   
   real_t rCut2 = pot->cutoff*pot->cutoff;
   real_t etot = 0.;

   // zero forces / energy / rho /rhoprime
   int fsize = s->boxes->nTotalBoxes*MAXATOMS;
   //#pragma omp parallel for
   for (int ii=0; ii<fsize; ii++)
   {
      zeroReal3(s->atoms->f[ii]);
      //s->atoms->U[ii] = 0.;//never used
      pot->dfEmbed[ii] = 0.;
      pot->rhobar[ii] = 0.;
   }

   int nNbrBoxes = 27;
   // loop over local boxes
   //#pragma omp parallel for reduction(+:etot)
   for(int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++){
      int nIBox = s->boxes->nAtoms[iBox];
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for(int jTmp=0; jTmp<nNbrBoxes; jTmp++) {
         int jBox = s->boxes->nbrBoxes[iBox][jTmp];
         int nJBox = s->boxes->nAtoms[jBox];
         // loop over atoms in iBox
         for(int iOff=MAXATOMS*iBox; iOff<(iBox*MAXATOMS+nIBox); iOff++) {
            // loop over atoms in jBox
            for(int jOff=MAXATOMS*jBox; jOff<(jBox*MAXATOMS+nJBox); jOff++) {
               real3 dr;
               real_t r2 = 0.0;
               for(int k=0; k<3; k++) {
                  dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k];
                  r2+=dr[k]*dr[k];
               }
               if(r2 <= rCut2 && r2 > 0.0) {
                  real_t r = sqrt(r2);
                  real_t phiTmp, dPhi, rhoTmp, dRho;
                  interpolate(pot->phi, r, &phiTmp, &dPhi);
                  interpolate(pot->rho, r, &rhoTmp, &dRho);
                  for(int k=0; k<3; k++) {
                     s->atoms->f[iOff][k] -= dPhi*dr[k]/r;
                  }
                  // Calculate energy contribution
                  //s->atoms->U[iOff] += 0.5*phiTmp;//never used
                  etot += 0.5*phiTmp;
                  // accumulate rhobar for each atom
                  pot->rhobar[iOff] += rhoTmp;
               }

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   // Compute Embedding Energy
   // loop over all local boxes
   //#pragma omp parallel for reduction(+:etot)
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++) {
      int nIBox =  s->boxes->nAtoms[iBox];

      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox; iOff<(MAXATOMS*iBox+nIBox); iOff++)
      {
         real_t fEmbed, dfEmbed;
         interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed);
         pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange
         //s->atoms->U[iOff] += fEmbed;//never used
         etot += fEmbed;
      }
   }

   // exchange derivative of the embedding energy with repsect to rhobar
   startTimer(eamHaloTimer);
   haloExchange(pot->forceExchange, pot->forceExchangeData);
   stopTimer(eamHaloTimer);

   // third pass
   // loop over local boxes
   //#pragma omp parallel for
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox = s->boxes->nAtoms[iBox];

      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = s->boxes->nbrBoxes[iBox][jTmp];
         int nJBox = s->boxes->nAtoms[jBox];

         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox; iOff<(MAXATOMS*iBox+nIBox); iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox; jOff<(MAXATOMS*jBox+nJBox); jOff++)
            { 

               real_t r2 = 0.0;
               real3 dr;
               for (int k=0; k<3; k++)
               {
                  dr[k]=s->atoms->r[iOff][k]-s->atoms->r[jOff][k];
                  r2+=dr[k]*dr[k];
               }

               if(r2 <= rCut2 && r2 > 0.0)
               {

                  real_t r = sqrt(r2);

                  real_t rhoTmp, dRho;
                  interpolate(pot->rho, r, &rhoTmp, &dRho);

                  for (int k=0; k<3; k++)
                  {
                     s->atoms->f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
                  }
               }

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   s->ePotential = (real_t) etot;

   return 0;
}
示例#24
0
/// The force exchange is considerably simpler than the atom exchange.
/// In the force case we only need to exchange data that is needed to
/// complete the force calculation.  Since the atoms have not moved we
/// only need to send data from local link cells and we are guaranteed
/// that the same atoms exist in the same order in corresponding halo
/// cells on remote tasks.  The only tricky part is the size of the
/// plane of local cells that needs to be sent grows in each direction.
/// This is because the y-axis send must send some of the data that was
/// received from the x-axis send, and the z-axis must send some data
/// from the y-axis send.  This accumulation of data to send is
/// responsible for data reaching neighbor cells that share only edges
/// or corners.
///
/// \see eam.c for an explanation of the requirement to exchange
/// force data.
HaloExchange* initForceHaloExchange(Domain* domain, LinkCell* boxes, int useGPU)
{
   HaloExchange* hh = initHaloExchange(domain);

   if(useGPU){
      hh->loadBuffer = loadForceBuffer;
      hh->unloadBuffer = unloadForceBuffer;
   }else{
      hh->loadBuffer = loadForceBufferCpu;
      hh->unloadBuffer = unloadForceBufferCpu;
   }
   hh->destroy = destroyForceExchange;

   int size0 = (boxes->gridSize[1])*(boxes->gridSize[2]);
   int size1 = (boxes->gridSize[0]+2)*(boxes->gridSize[2]);
   int size2 = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
   int maxSize = MAX(size0, size1);
   maxSize = MAX(size1, size2);
   hh->bufCapacity = (maxSize)*MAXATOMS*sizeof(ForceMsg);

   hh->sendBufM = (char*)comdMalloc(hh->bufCapacity);
   hh->sendBufP = (char*)comdMalloc(hh->bufCapacity);
   hh->recvBufP = (char*)comdMalloc(hh->bufCapacity);
   hh->recvBufM = (char*)comdMalloc(hh->bufCapacity);

   // pin memory
   cudaHostRegister(hh->sendBufM, hh->bufCapacity, 0);
   cudaHostRegister(hh->sendBufP, hh->bufCapacity, 0);
   cudaHostRegister(hh->recvBufP, hh->bufCapacity, 0);
   cudaHostRegister(hh->recvBufM, hh->bufCapacity, 0);

   ForceExchangeParms* parms = (ForceExchangeParms*)comdMalloc(sizeof(ForceExchangeParms));

   parms->nCells[HALO_X_MINUS] = (boxes->gridSize[1]  )*(boxes->gridSize[2]  );
   parms->nCells[HALO_Y_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[2]  );
   parms->nCells[HALO_Z_MINUS] = (boxes->gridSize[0]+2)*(boxes->gridSize[1]+2);
   parms->nCells[HALO_X_PLUS]  = parms->nCells[HALO_X_MINUS];
   parms->nCells[HALO_Y_PLUS]  = parms->nCells[HALO_Y_MINUS];
   parms->nCells[HALO_Z_PLUS]  = parms->nCells[HALO_Z_MINUS];

   for (int ii=0; ii<6; ++ii)
   {
      parms->sendCells[ii] = mkForceSendCellList(boxes, ii, parms->nCells[ii]);
      parms->recvCells[ii] = mkForceRecvCellList(boxes, ii, parms->nCells[ii]);

      // copy cell list to gpu
      cudaMalloc((void**)&parms->sendCellsGpu[ii], parms->nCells[ii] * sizeof(int));
      cudaMalloc((void**)&parms->recvCellsGpu[ii], parms->nCells[ii] * sizeof(int));
      cudaMemcpy(parms->sendCellsGpu[ii], parms->sendCells[ii], parms->nCells[ii] * sizeof(int), cudaMemcpyHostToDevice);
      cudaMemcpy(parms->recvCellsGpu[ii], parms->recvCells[ii], parms->nCells[ii] * sizeof(int), cudaMemcpyHostToDevice);

      // allocate temp buf
      int size = parms->nCells[ii]+1;
      if (size % 256 != 0) size = ((size + 255)/256)*256;
      cudaMalloc((void**)&parms->natoms_buf[ii], size * sizeof(int));
      cudaMalloc((void**)&parms->partial_sums[ii], (size/256 + 1) * sizeof(int));
   }
   
   hh->hashTable = NULL;
   hh->type = 1;
   hh->parms = parms;
   return hh;
}
示例#25
0
SimFlat* initSimulation(Command cmd)
{
   SimFlat* sim = comdMalloc(sizeof(SimFlat));
   sim->nSteps = cmd.nSteps;
   sim->printRate = cmd.printRate;
   sim->dt = cmd.dt;
   sim->domain = NULL;
   sim->boxes = NULL;
   sim->atoms = NULL;
   sim->ePotential = 0.0;
   sim->eKinetic = 0.0;
   sim->atomExchange = NULL;

   sim->pot = initPotential(cmd.doeam, cmd.potDir, cmd.potName, cmd.potType);
   real_t latticeConstant = cmd.lat;
   
   if (cmd.lat < 0.0)
      latticeConstant = sim->pot->lat;

   // ensure input parameters make sense.
   sanityChecks(cmd, sim->pot->cutoff, latticeConstant, sim->pot->latticeType);

   sim->species = initSpecies(sim->pot);

   real3 globalExtent;
   globalExtent[0] = cmd.nx * latticeConstant;
   globalExtent[1] = cmd.ny * latticeConstant;
   globalExtent[2] = cmd.nz * latticeConstant;

   sim->domain = initDecomposition(cmd.xproc, cmd.yproc, cmd.zproc, globalExtent);

   sim->boxes = initLinkCells(sim->domain, sim->pot->cutoff);
   sim->atoms = initAtoms(sim->boxes);

   sim->defInfo = initDeformation(sim, cmd.defGrad);

   //printf("Got to here\n");

   // create lattice with desired temperature and displacement.
   createFccLattice(cmd.nx, cmd.ny, cmd.nz, latticeConstant, sim);

  
   setTemperature(sim,0.0);
   randomDisplacements(sim, cmd.initialDelta);

   sim->atomExchange = initAtomHaloExchange(sim->domain, sim->boxes);

   forwardDeformation(sim);
   
   //eamForce(sim);
   // Procedure for energy density passing from the macrosolver to CoMD 
   //setTemperature(sim,((cmd.energy*latticeVolume*cmd.nx*cmd.ny*cmd.nz-sim->ePotential)/sim->atoms->nGlobal)/(kB_eV * 1.5));
   //randomDisplacements(sim, cmd.initialDelta);


   // Forces must be computed before we call the time stepper.
   startTimer(redistributeTimer);
   redistributeAtoms(sim);
   stopTimer(redistributeTimer);

   startTimer(computeForceTimer);
   computeForce(sim);
   stopTimer(computeForceTimer);
   
   double cohmmEnergy=cmd.energy*sim->defInfo->globalVolume;
   double temperatureFromEnergyDensity=((cohmmEnergy-sim->ePotential)/sim->atoms->nGlobal)/(kB_eV*1.5);

   setTemperature(sim,temperatureFromEnergyDensity); //uncomment to set temperature according to hmm energy density
   //setTemperature(sim,cmd.temperature); //uncomment to directly input temperature
   kineticEnergy(sim);



   return sim;
}
示例#26
0
文件: eam.c 项目: NVIDIA/CoMD-CUDA
int eamForceCpuNL(SimFlat* s)
{
   EamPotential* pot = (EamPotential*) s->pot;
   assert(pot);

   // set up halo exchange and internal storage on first call to forces.
   if (pot->forceExchange == NULL)
   {
      int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes;
      pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->rhobar  = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->forceExchange = initForceHaloExchange(s->domain, s->boxes,s->method<CPU_NL);
      pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData));
      pot->forceExchangeData->dfEmbed = pot->dfEmbed;
      pot->forceExchangeData->boxes = s->boxes;
   }
   
   real_t rCut2 = pot->cutoff*pot->cutoff;

   // zero forces / energy / rho /rhoprime
   real_t etot = 0.0;
   zeroVecAll(&(s->atoms->f),s->boxes->nTotalBoxes*MAXATOMS);
   memset(s->atoms->U,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->dfEmbed, 0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->rhobar,  0, s->boxes->nTotalBoxes*MAXATOMS*sizeof(real_t));

   NeighborList* neighborList = s->atoms->neighborList;

   // loop over local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox = s->boxes->nAtoms[iBox];
      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {

              int iLid = s->atoms->lid[iOff];
              assert(iLid < neighborList->nMaxLocal);
              int* iNeighborList = &(neighborList->list[neighborList->maxNeighbors * iLid]);
              const int nNeighbors = neighborList->nNeighbors[iLid];
              // loop over atoms in neighborlist
              for (int ij=0; ij<nNeighbors; ij++)
              {
                      int jOff = iNeighborList[ij];

                      double r2 = 0.0;
                      real3_old dr;
                      dr[0] = s->atoms->r.x[iOff] - s->atoms->r.x[jOff];
                      dr[1] = s->atoms->r.y[iOff] - s->atoms->r.y[jOff];
                      dr[2] = s->atoms->r.z[iOff] - s->atoms->r.z[jOff];
                      r2+=dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2];
                      if(r2>rCut2) continue;

                      double r = sqrt(r2);

                      real_t phiTmp, dPhi, rhoTmp, dRho;
                      interpolate(pot->phi, r, &phiTmp, &dPhi);
                      interpolate(pot->rho, r, &rhoTmp, &dRho);

                      s->atoms->f.x[iOff] -= dPhi*dr[0]/r;
                      s->atoms->f.y[iOff] -= dPhi*dr[1]/r;
                      s->atoms->f.z[iOff] -= dPhi*dr[2]/r;
                      s->atoms->f.x[jOff] += dPhi*dr[0]/r; 
                      s->atoms->f.y[jOff] += dPhi*dr[1]/r; 
                      s->atoms->f.z[jOff] += dPhi*dr[2]/r; 

                      // update energy terms
                      // calculate energy contribution based on whether
                      // the neighbor box is local or remote
                      if (jOff / MAXATOMS < s->boxes->nLocalBoxes)
                              etot += phiTmp;
                      else
                              etot += 0.5*phiTmp;

                      s->atoms->U[iOff] += 0.5*phiTmp;
                      s->atoms->U[jOff] += 0.5*phiTmp;

                      // accumulate rhobar for each atom
                      pot->rhobar[iOff] += rhoTmp;
                      pot->rhobar[jOff] += rhoTmp; 

              } // loop over atoms in neighborlist 
      } // loop over atoms in iBox
   } // loop over local boxes

   // Compute Embedding Energy
   // loop over all local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox =  s->boxes->nAtoms[iBox];

      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {
         real_t fEmbed, dfEmbed;
         interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed);
         pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange
         etot += fEmbed; 
         s->atoms->U[iOff] += fEmbed;
      }
   }

   // exchange derivative of the embedding energy with repsect to rhobar
   startTimer(eamHaloTimer);
   haloExchange(pot->forceExchange, pot->forceExchangeData);
   stopTimer(eamHaloTimer);

   // third pass
   // loop over local boxes
   for (int iBox=0; iBox<s->boxes->nLocalBoxes; iBox++)
   {
      int nIBox =  s->boxes->nAtoms[iBox];
      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {
              int iLid = s->atoms->lid[iOff];
              assert(iLid < neighborList->nMaxLocal);
              int* iNeighborList = &(neighborList->list[ neighborList->maxNeighbors * iLid]);
              int nNeighbors = neighborList->nNeighbors[iLid];
              // loop over atoms in neighborlist
              for (int ij=0; ij<nNeighbors; ij++)
              {
                      int jOff = iNeighborList[ij];

                      double r2 = 0.0;
                      real3_old dr;
                      dr[0] = s->atoms->r.x[iOff] - s->atoms->r.x[jOff];
                      dr[1] = s->atoms->r.y[iOff] - s->atoms->r.y[jOff];
                      dr[2] = s->atoms->r.z[iOff] - s->atoms->r.z[jOff];
                      r2+=dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2];
                      
                      if(r2>=rCut2) continue;

                      real_t r = sqrt(r2);

                      real_t rhoTmp, dRho;
                      interpolate(pot->rho, r, &rhoTmp, &dRho);

                      s->atoms->f.x[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[0]/r;
                      s->atoms->f.y[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[1]/r;
                      s->atoms->f.z[iOff] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[2]/r;
                      s->atoms->f.x[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[0]/r;
                      s->atoms->f.y[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[1]/r;
                      s->atoms->f.z[jOff] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[2]/r;

              } // loop over atoms in neighborlist
      } // loop over atoms in iBox
   } // loop over local boxes
//   printf("nl: %f %f %f\n",s->atoms->f[MAXATOMS][0],s->atoms->f[MAXATOMS][1],s->atoms->f[MAXATOMS][2]);

   s->ePotential = (real_t) etot;

   return 0;
}
示例#27
0
/// Calculate potential energy and forces for the EAM potential.
///
/// Three steps are required:
///
///   -# Loop over all atoms and their neighbors, compute the two-body
///   interaction and the electron density at each atom
///   -# Loop over all atoms, compute the embedding energy and its
///   derivative for each atom
///   -# Loop over all atoms and their neighbors, compute the embedding
///   energy contribution to the force and add to the two-body force
/// 
int eamForce(SimFlat* s)
{
   //OPT: loop invariant references
   Atoms* atoms = s->atoms;
   LinkCell* boxes = s->boxes;
   int nLocalBoxes = boxes->nLocalBoxes;
   int nTotalBoxes = boxes->nTotalBoxes;
   int* nAtoms = boxes->nAtoms;
   real3* atoms_r = atoms->r;
   real3* atoms_f = atoms->f;
   real_t* atoms_U = atoms->U;

   EamPotential* pot = (EamPotential*) s->pot;
   assert(pot);

   // set up halo exchange and internal storage on first call to forces.
   if (pot->forceExchange == NULL)
   {
      int maxTotalAtoms = MAXATOMS*s->boxes->nTotalBoxes;
      pot->dfEmbed = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->rhobar  = comdMalloc(maxTotalAtoms*sizeof(real_t));
      pot->forceExchange = initForceHaloExchange(s->domain, s->boxes);
      pot->forceExchangeData = comdMalloc(sizeof(ForceExchangeData));
      pot->forceExchangeData->dfEmbed = pot->dfEmbed;
      pot->forceExchangeData->boxes = s->boxes;
   }
   
   real_t rCut2 = pot->cutoff*pot->cutoff;

   // zero forces / energy / rho /rhoprime
   real_t etot = 0.0;
   memset(atoms_f,  0, nTotalBoxes*MAXATOMS*sizeof(real3));
   memset(atoms_U,  0, nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->dfEmbed, 0, nTotalBoxes*MAXATOMS*sizeof(real_t));
   memset(pot->rhobar,  0, nTotalBoxes*MAXATOMS*sizeof(real_t));

   int nbrBoxes[27];
   // loop over local boxes
   for (int iBox=0; iBox<nLocalBoxes; iBox++)
   {
      int nIBox = nAtoms[iBox];
      int nNbrBoxes = getNeighborBoxes(boxes, iBox, nbrBoxes);
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = nbrBoxes[jTmp];
         if (jBox < iBox ) continue;

         int nJBox = nAtoms[jBox];
         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++)
            {
               if ( (iBox==jBox) &&(ij <= ii) ) continue;

               double r2 = 0.0;
               real3 dr;
//OPT: loop unrolling
//               for (int k=0; k<3; k++)
//               {
//                  dr[k]=atoms_r[iOff][k]-atoms_r[jOff][k];
//                  r2+=dr[k]*dr[k];
//               }
               double dr0 = atoms_r[iOff][0]-atoms_r[jOff][0];
               r2+=dr0*dr0;
               double dr1 = atoms_r[iOff][1]-atoms_r[jOff][1];
               r2+=dr1*dr1;
               double dr2 = atoms_r[iOff][2]-atoms_r[jOff][2];
               r2+=dr2*dr2;
//End of OPT: loop unrolling

               if(r2>rCut2) continue;

               double r = sqrt(r2);

               real_t phiTmp, dPhi, rhoTmp, dRho;
               interpolate(pot->phi, r, &phiTmp, &dPhi);
               interpolate(pot->rho, r, &rhoTmp, &dRho);

//OPT: loop unrolling
//               for (int k=0; k<3; k++)
//               {
//                  atoms_f[iOff][k] -= dPhi*dr[k]/r;
//                  atoms_f[jOff][k] += dPhi*dr[k]/r;
//               }
               real_t cal = dPhi*dr0/r;
               atoms_f[iOff][0] -= cal;
               atoms_f[jOff][0] += cal;
               cal = dPhi*dr1/r;
               atoms_f[iOff][1] -= cal;
               atoms_f[jOff][1] += cal;
               cal = dPhi*dr2/r;
               atoms_f[iOff][2] -= cal;
               atoms_f[jOff][2] += cal;
//End of OPT: loop unrolling

               // update energy terms
               // calculate energy contribution based on whether
               // the neighbor box is local or remote
               if (jBox < nLocalBoxes)
                  etot += phiTmp;
               else
                  etot += 0.5*phiTmp;

               atoms_U[iOff] += 0.5*phiTmp;
               atoms_U[jOff] += 0.5*phiTmp;

               // accumulate rhobar for each atom
               pot->rhobar[iOff] += rhoTmp;
               pot->rhobar[jOff] += rhoTmp;

            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   // Compute Embedding Energy
   // loop over all local boxes
   for (int iBox=0; iBox<nLocalBoxes; iBox++)
   {
      int iOff;
      int nIBox = nAtoms[iBox];

      // loop over atoms in iBox
      for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
      {
         real_t fEmbed, dfEmbed;
         interpolate(pot->f, pot->rhobar[iOff], &fEmbed, &dfEmbed);
         pot->dfEmbed[iOff] = dfEmbed; // save derivative for halo exchange
         etot += fEmbed; 
         atoms_U[iOff] += fEmbed;
      }
   }

   // exchange derivative of the embedding energy with repsect to rhobar
   startTimer(eamHaloTimer);
   haloExchange(pot->forceExchange, pot->forceExchangeData);
   stopTimer(eamHaloTimer);

   // third pass
   // loop over local boxes
   for (int iBox=0; iBox<nLocalBoxes; iBox++)
   {
      int nIBox = nAtoms[iBox];
      int nNbrBoxes = getNeighborBoxes(boxes, iBox, nbrBoxes);
      // loop over neighbor boxes of iBox (some may be halo boxes)
      for (int jTmp=0; jTmp<nNbrBoxes; jTmp++)
      {
         int jBox = nbrBoxes[jTmp];
         if(jBox < iBox) continue;

         int nJBox = nAtoms[jBox];
         // loop over atoms in iBox
         for (int iOff=MAXATOMS*iBox,ii=0; ii<nIBox; ii++,iOff++)
         {
            // loop over atoms in jBox
            for (int jOff=MAXATOMS*jBox,ij=0; ij<nJBox; ij++,jOff++)
            { 
               if ((iBox==jBox) && (ij <= ii))  continue;

               double r2 = 0.0;
               real3 dr;
//OPT: loop unrolling
//               for (int k=0; k<3; k++)
//               {
//                  dr[k]=atoms_r[iOff][k]-atoms_r[jOff][k];
//                  r2+=dr[k]*dr[k];
//               }
               real_t dr0 = atoms_r[iOff][0]-atoms_r[jOff][0];
               r2 += dr0*dr0;
               real_t dr1 = atoms_r[iOff][1]-atoms_r[jOff][1];
               r2 += dr1*dr1;
               real_t dr2 = atoms_r[iOff][2]-atoms_r[jOff][2];
               r2 += dr2*dr2;
//End of OPT: loop unrolling

               if(r2>=rCut2) continue;

               real_t r = sqrt(r2);

               real_t rhoTmp, dRho;
               interpolate(pot->rho, r, &rhoTmp, &dRho);

//OPT: loop unrolling
//               for (int k=0; k<3; k++)
//               {
//                  atoms_f[iOff][k] -= (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
//                  atoms_f[jOff][k] += (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr[k]/r;
//               }
               real_t cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr0/r;
               atoms_f[iOff][0] -= cal;
               atoms_f[jOff][0] += cal;
               cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr1/r;
               atoms_f[iOff][1] -= cal;
               atoms_f[jOff][1] += cal;
               cal = (pot->dfEmbed[iOff]+pot->dfEmbed[jOff])*dRho*dr2/r;
               atoms_f[iOff][2] -= cal;
               atoms_f[jOff][2] += cal;
//End of OPT: loop unrolling
            } // loop over atoms in jBox
         } // loop over atoms in iBox
      } // loop over neighbor boxes
   } // loop over local boxes

   s->ePotential = (real_t) etot;

   return 0;
}