Ejemplo n.º 1
0
void Communicate::sendMessage(int PE, void *msg, int size)
{
  if ( CmiMyPe() ) NAMD_bug("Communicate::sendMessage not from Pe 0");

  while ( CkpvAccess(CsmAcks) < nchildren ) {
    CmiDeliverMsgs(0);
  }
  CkpvAccess(CsmAcks) = 0;

  CmiSetHandler(msg, CsmHandlerIndex);
  switch(PE) {
    case ALL:
      NAMD_bug("Unexpected Communicate::sendMessage(ALL,...)");
      //CmiSyncBroadcastAll(size, (char *)msg);
      break;
    case ALLBUTME:
      //CmiSyncBroadcast(size, (char *)msg);
      if ( CmiNumNodes() > 2 ) {
        CmiSyncSend(CmiNodeFirst(2),size,(char*)msg);
      }
      if ( CmiNumNodes() > 1 ) {
        CmiSyncSend(CmiNodeFirst(1),size,(char*)msg);
      }
      break;
    default:
      NAMD_bug("Unexpected Communicate::sendMessage(PEL,...)");
      //CmiSyncSend(PE, size, (char *)msg);
      break;
  }
}
Ejemplo n.º 2
0
void IRSet::unchecked_insert(InfoRecord *info) 
{
#ifdef DEBUG_IRSET
  if (find(info)) NAMD_bug("IRSet::unchecked_insert duplicate");
#endif
    ++nElements;
    listNode *node = new listNode(info);
    node->next = head;
    head = node;
#ifdef DEBUG_IRSET
  int n = 0;
  while (node) { ++n; node = node->next; }
  if ( n != nElements ) NAMD_bug("IRSet::unchecked_insert count");
#endif
}
Ejemplo n.º 3
0
void *Communicate::getMessage(int PE, int tag)
{
  if ( CmiMyRank() ) NAMD_bug("Communicate::getMessage called on non-rank-zero Pe\n");

  int itag[2], rtag[2];
  void *msg;

  itag[0] = (PE==(-1)) ? (CmmWildCard) : PE;
  itag[1] = (tag==(-1)) ? (CmmWildCard) : tag;
  while((msg=CmmGet(CkpvAccess(CsmMessages),2,itag,rtag))==0) {
    CmiDeliverMsgs(0);
  }

  char *ackmsg = (char *) CmiAlloc(CmiMsgHeaderSizeBytes);
  CmiSetHandler(ackmsg, CsmAckHandlerIndex);
  CmiSyncSend(CmiNodeFirst((CmiMyNode()-1)/2), CmiMsgHeaderSizeBytes, ackmsg);

  while ( CkpvAccess(CsmAcks) < nchildren ) {
    CmiDeliverMsgs(0);
  }
  CkpvAccess(CsmAcks) = 0;

  int size = SIZEFIELD(msg);
  for ( int i = 2; i >= 1; --i ) {
    int node = CmiMyNode() * 2 + i;
    if ( node < CmiNumNodes() ) {
      CmiSyncSend(CmiNodeFirst(node),size,(char*)msg);
    }
  }

  return msg;
}
Ejemplo n.º 4
0
//----------------------------------------------------------------------
ComputeID ComputeMap::storeCompute(int inode, int maxPids, 
				   ComputeType type, 
				   int partition,int numPartitions)
{
  if (maxPids > numPidsAllocated) {
    NAMD_bug("ComputeMap::storeCompute called with maxPids > numPidsAllocated");
  }

  int cid;

  cid = nComputes;
  nComputes++;
  computeData.resize(nComputes);

  computeData[cid].node=inode;

  computeData[cid].type = type;
  computeData[cid].partition = partition;
  computeData[cid].numPartitions = numPartitions;

  computeData[cid].numPids = 0;

  #if defined(NAMD_MIC) && (MIC_SPLIT_WITH_HOST != 0)
    // By default, pass all non-bonded selfs and pairs to the device
    if (type == computeNonbondedSelfType || type == computeNonbondedPairType) {
      computeData[cid].directToDevice = 1;
    } else {
      computeData[cid].directToDevice = 0;
    }
  #endif

  return cid;
}
Ejemplo n.º 5
0
// data submitted from child
void ReductionMgr::remoteSubmit(ReductionSubmitMsg *msg) {
  int setID = msg->reductionSetID;
  ReductionSet *set = reductionSets[setID];
  int seqNum = msg->sequenceNumber
	+ set->addToRemoteSequenceNumber[childIndex(msg->sourceNode)];

//iout << "seq " << seqNum << " from " << msg->sourceNode << " received on " << CkMyPe() << "\n" << endi;
  int size = msg->dataSize;
  if ( size != set->dataSize ) {
    NAMD_bug("ReductionMgr::remoteSubmit data sizes do not match.");
  }

  BigReal *newData = msg->data;
  ReductionSetData *data = set->getData(seqNum);
  BigReal *curData = data->data;
#ifdef ARCH_POWERPC
#pragma disjoint (*curData,  *newData)
#pragma unroll(4)
#endif
  for ( int i = 0; i < size; ++i ) {
    curData[i] += newData[i];
  }
//  CkPrintf("[%d] reduction Submit received from node[%d] %d\n",
//    CkMyPe(),childIndex(msg->sourceNode),msg->sourceNode);
  delete msg;

  data->submitsRecorded++;
  if ( data->submitsRecorded == set->submitsRegistered ) {
    mergeAndDeliver(set,seqNum);
  }
}
Ejemplo n.º 6
0
ReductionSet::ReductionSet(int setID, int size, int numChildren) {
  if ( setID == REDUCTIONS_BASIC || setID == REDUCTIONS_AMD ) {
    if ( size != -1 ) {
      NAMD_bug("ReductionSet size specified for REDUCTIONS_BASIC or REDUCTIONS_AMD.");
    }
    size = REDUCTION_MAX_RESERVED;
  }
  if ( size == -1 ) NAMD_bug("ReductionSet size not specified.");
  dataSize = size;
  reductionSetID = setID;
  nextSequenceNumber = 0;
  submitsRegistered = 0;
  dataQueue = 0;
  requireRegistered = 0;
  threadIsWaiting = 0;
  addToRemoteSequenceNumber = new int[numChildren];
}
Ejemplo n.º 7
0
static void CsmHandler(void *msg)
{
  if ( CmiMyRank() ) NAMD_bug("Communicate CsmHandler on non-rank-zero pe");
  // get start of user message
  int *m = (int *) ((char *)msg+CmiMsgHeaderSizeBytes);
  // sending node  & tag act as tags
  CmmPut(CkpvAccess(CsmMessages), 2, m, msg);
}
Ejemplo n.º 8
0
void LdbCoordinator::barrier(void)
{
  if ( (nPatchesReported != nPatchesExpected) 
       || (nComputesReported != nComputesExpected)
       || (controllerReported != controllerExpected) )
  {
    NAMD_bug("Load balancer received wrong number of events.\n");
  }

  theLbdb->AtLocalBarrier(ldBarrierHandle);
}
Ejemplo n.º 9
0
int ScriptTcl::eval(const char *script, const char **resultPtr) {

#ifdef NAMD_TCL
  int code = Tcl_EvalEx(interp,script,-1,TCL_EVAL_GLOBAL);
  *resultPtr = Tcl_GetStringResult(interp);
  return code;
#else
  NAMD_bug("ScriptTcl::eval called without Tcl.");
  return -1;  // appease compiler
#endif
}
Ejemplo n.º 10
0
void ComputeMap::unpack (int n, ComputeData *ptr)
{
  DebugM(4,"Unpacking ComputeMap\n");

  if ( nComputes && n != nComputes ) {
    NAMD_bug("number of computes in new ComputeMap has changed!\n");
  }

  nComputes = n;
  computeData.resize(nComputes);
  memcpy(computeData.begin(), ptr, nComputes * sizeof(ComputeData));
}
Ejemplo n.º 11
0
void ComputeMap::extendPtrs() {
  if ( ! computePtrs ) NAMD_bug("ComputeMap::extendPtrs() 1");
  int oldN = nComputes;
  nComputes = computeData.size();
  if ( nComputes > oldN ) {
    Compute **oldPtrs = computePtrs;
    computePtrs = new Compute*[nComputes];
    memcpy(computePtrs, oldPtrs, oldN*sizeof(Compute*));
    memset(computePtrs+oldN, 0, (nComputes-oldN)*sizeof(Compute*));
    delete [] oldPtrs;
  }
}
Ejemplo n.º 12
0
void ComputeGridForce::doForce(FullAtom* p, Results* r)
{
    SimParameters *simParams = Node::Object()->simParameters;
    Molecule *mol = Node::Object()->molecule;
    
    Force *forces = r->f[Results::normal];
    BigReal energy = 0;
    Force extForce = 0.;
    Tensor extVirial;
    
    int numAtoms = homePatch->getNumAtoms();

    if ( mol->numGridforceGrids < 1 ) NAMD_bug("No grids loaded in ComputeGridForce::doForce()");
    
    for (int gridnum = 0; gridnum < mol->numGridforceGrids; gridnum++) {
	GridforceGrid *grid = mol->get_gridfrc_grid(gridnum);
	
	if (homePatch->flags.step % GF_OVERLAPCHECK_FREQ == 0) {
	    // only check on node 0 and every GF_OVERLAPCHECK_FREQ steps
	  if (simParams->langevinPistonOn || simParams->berendsenPressureOn) {
		// check for grid overlap if pressure control is on
		// not needed without pressure control, since the check is also performed on startup
      if (!grid->fits_lattice(homePatch->lattice)) {
        char errmsg[512];
        if (simParams->gridforcechecksize) {
          sprintf(errmsg, "Warning: Periodic cell basis too small for Gridforce grid %d.  Set gridforcechecksize off in configuration file to ignore.\n", gridnum);
          NAMD_die(errmsg);      
        }
      }
	 }
	}
	
	Position center = grid->get_center();
	
	if (homePatch->flags.step % 100 == 1) {
	    DebugM(3, "center = " << center << "\n" << endi);
	    DebugM(3, "e = " << grid->get_e() << "\n" << endi);
	}
	
	if (grid->get_grid_type() == GridforceGrid::GridforceGridTypeFull) {
	    GridforceFullMainGrid *g = (GridforceFullMainGrid *)grid;
	    do_calc(g, gridnum, p, numAtoms, mol, forces, energy, extForce, extVirial);
	} else if (grid->get_grid_type() == GridforceGrid::GridforceGridTypeLite) {
	    GridforceLiteGrid *g = (GridforceLiteGrid *)grid;
	    do_calc(g, gridnum, p, numAtoms, mol, forces, energy, extForce, extVirial);
	}
    }
    reduction->item(REDUCTION_MISC_ENERGY) += energy;
    ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,extForce);
    ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,extVirial);
    reduction->submit();
}
Ejemplo n.º 13
0
void ScriptTcl::load(char *scriptFile) {

#ifdef NAMD_TCL
  int code = Tcl_EvalFile(interp,scriptFile);
  const char *result = Tcl_GetStringResult(interp);
  if (*result != 0) CkPrintf("TCL: %s\n",result);
  if (code != TCL_OK) {
    const char *errorInfo = Tcl_GetVar(interp,"errorInfo",0);
    NAMD_die(errorInfo);
  }
#else
  NAMD_bug("ScriptTcl::load called without Tcl.");
#endif

}
Ejemplo n.º 14
0
void 
BroadcastMgr::recvBroadcast(BroadcastMsg *msg) {
  BOID *b;
  int counter;
  // Check if msg->id has any registrants
  if ( (b = boid.find(BOID(msg->id))) ) {
    // add message to taggedMsg container
    counter = b->broadcastSet->size();
    if (msg->node == CkMyPe()) counter--; // get rid of sender
    if ( counter < 0 ) NAMD_bug("BroadcastMgr::recvBroadcast counter < 0");
    else if ( counter > 0 ) {
      b->taggedMsg->add(TaggedMsg(msg->tag,msg->size,counter,msg->msg));

      // inform all registrants of mew message
      UniqueSetIter<BroadcastClientElem> bcIter(*(b->broadcastSet));
      for (bcIter = bcIter.begin(); bcIter != bcIter.end(); bcIter++) {
        bcIter->broadcastClient->awaken(msg->id, msg->tag);
      }
    }
  }
  delete msg;
}
Ejemplo n.º 15
0
// common code for submission and delivery
void ReductionMgr::mergeAndDeliver(ReductionSet *set, int seqNum) {

//iout << "seq " << seqNum << " complete on " << CkMyPe() << "\n" << endi;
 
    set->nextSequenceNumber++; // should match all clients

    ReductionSetData *data = set->getData(seqNum);
    if ( data->submitsRecorded != set->submitsRegistered ) {
      NAMD_bug("ReductionMgr::mergeAndDeliver not ready to deliver.");
    }

    if ( isRoot() ) {
      if ( set->requireRegistered ) {
	if ( set->threadIsWaiting && set->waitingForSequenceNumber == seqNum) {
	  // awaken the thread so it can take the data
	  CthAwaken(set->waitingThread);
	}
      } else {
	NAMD_die("ReductionSet::deliver will never deliver data");
      }
    } else {
      // send data to parent
      int size = set->dataSize;
      ReductionSubmitMsg *msg = new(&size,1) ReductionSubmitMsg;
      msg->reductionSetID = set->reductionSetID;
      msg->sourceNode = CkMyPe();
      msg->sequenceNumber = seqNum;
      msg->dataSize = set->dataSize;
      for ( int i = 0; i < msg->dataSize; ++i ) {
        msg->data[i] = data->data[i];
      }
      CProxy_ReductionMgr reductionProxy(thisgroup);
      reductionProxy[myParent].remoteSubmit(msg);
      delete set->removeData(seqNum);
    }

}
Ejemplo n.º 16
0
int IRSet::remove(InfoRecord * r) 
{
#ifdef DEBUG_IRSET
    listNode *node = head;
    int n = 0;
    while (node) { ++n; node = node->next; }
    if ( n != nElements ) NAMD_bug("IRSet::remove count");
#endif

  if (!head)
    return 0;

  listNode *p = head;
  listNode *q = p->next;

  if (p->info == r){
    head = q;
    delete p;
    --nElements;
    return 1;
  }

  while (q){
    if (q->info == r){
      p->next = q->next;
      delete q;
      --nElements;
      return 1;
    }
    else {
      p = q;
      q = q->next;
    }
  }
  return 0;
}
Ejemplo n.º 17
0
//----------------------------------------------------------------------
ComputeID ComputeMap::storeCompute(int inode, int maxPids, 
				   ComputeType type, 
				   int partition,int numPartitions)
{
  if (maxPids > numPidsAllocated) {
    NAMD_bug("ComputeMap::storeCompute called with maxPids > numPidsAllocated");
  }

  int cid;

  cid = nComputes;
  nComputes++;
  computeData.resize(nComputes);

  computeData[cid].node=inode;

  computeData[cid].type = type;
  computeData[cid].partition = partition;
  computeData[cid].numPartitions = numPartitions;

  computeData[cid].numPids = 0;

  return cid;
}
Ejemplo n.º 18
0
int ComputeMap::directToDevice(const ComputeID cid) const {
  if (cid < 0 || cid >= nComputes) {
    NAMD_bug("ComputeMap::directToDevice() called with an invalid cid value");
  }
  return computeData[cid].directToDevice;
}
Ejemplo n.º 19
0
void ComputeMap::setDirectToDevice(const ComputeID cid, const int d) {
  if (cid < 0 || cid >= nComputes) {
    NAMD_bug("ComputeMap::setDirectToDevice() called with an invalid cid value");
  }
  computeData[cid].directToDevice = ((d == 0) ? (0) : (1));
}
Ejemplo n.º 20
0
static void CsmAckHandler(void *msg)
{
  if ( CmiMyRank() ) NAMD_bug("Communicate CsmAckHandler on non-rank-zero pe");
  CmiFree(msg);
  CkpvAccess(CsmAcks) += 1;
}
Ejemplo n.º 21
0
void
ComputeMgr::createCompute(ComputeID i, ComputeMap *map)
{
    Compute *c;
    PatchID pid2[2];
    PatchIDList pids;
    int trans2[2];
    SimParameters *simParams = Node::Object()->simParameters;

    PatchID pid8[8];
    int trans8[8];

    switch ( map->type(i) )
    {
    case computeNonbondedSelfType:
#ifdef NAMD_CUDA
        register_cuda_compute_self(i,map->computeData[i].pids[0].pid);
#elif defined(NAMD_MIC)
        #if MIC_SPLIT_WITH_HOST != 0
	  if (map->directToDevice(i) == 0) {
            c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid,
                                         computeNonbondedWorkArrays,
                                         map->partition(i),map->partition(i)+1,
                                         map->numPartitions(i)); // unknown delete
            map->registerCompute(i,c);
            c->initialize();
          } else {
        #endif
            register_mic_compute_self(i,map->computeData[i].pids[0].pid,map->partition(i),map->numPartitions(i));
        #if MIC_SPLIT_WITH_HOST != 0
          }
        #endif
#else
        c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid,
                                     computeNonbondedWorkArrays,
                                     map->partition(i),map->partition(i)+1,
                                     map->numPartitions(i)); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
#endif
        break;
    case computeLCPOType:
        for (int j = 0; j < 8; j++) {
          pid8[j] = map->computeData[i].pids[j].pid;
          trans8[j] = map->computeData[i].pids[j].trans;
        }
        c = new ComputeLCPO(i,pid8,trans8,
             computeNonbondedWorkArrays,
             map->partition(i),map->partition(i)+1,
             map->numPartitions(i), 8);
        map->registerCompute(i,c);
        c->initialize();
      
        break;
    case computeNonbondedPairType:
        pid2[0] = map->computeData[i].pids[0].pid;
        trans2[0] = map->computeData[i].pids[0].trans;
        pid2[1] = map->computeData[i].pids[1].pid;
        trans2[1] = map->computeData[i].pids[1].trans;
#ifdef NAMD_CUDA
        register_cuda_compute_pair(i,pid2,trans2);
#elif defined(NAMD_MIC)
        #if MIC_SPLIT_WITH_HOST != 0
	  if (map->directToDevice(i) == 0) {
            c = new ComputeNonbondedPair(i,pid2,trans2,
                                         computeNonbondedWorkArrays,
                                         map->partition(i),map->partition(i)+1,
                                         map->numPartitions(i)); // unknown delete
            map->registerCompute(i,c);
            c->initialize();
          } else {
        #endif
            register_mic_compute_pair(i,pid2,trans2,map->partition(i),map->numPartitions(i));
        #if MIC_SPLIT_WITH_HOST != 0
          }
        #endif
#else
        c = new ComputeNonbondedPair(i,pid2,trans2,
                                     computeNonbondedWorkArrays,
                                     map->partition(i),map->partition(i)+1,
                                     map->numPartitions(i)); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
#endif
        break;
#ifdef NAMD_CUDA
    case computeNonbondedCUDAType:
	c = computeNonbondedCUDAObject = new ComputeNonbondedCUDA(i,this); // unknown delete
	map->registerCompute(i,c);
	c->initialize();
	break;
#endif
#ifdef NAMD_MIC
    case computeNonbondedMICType:
	c = computeNonbondedMICObject = new ComputeNonbondedMIC(i,this); // unknown delete
	map->registerCompute(i,c);
	c->initialize();
	break;
#endif
    case computeExclsType:
        PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
        c = new ComputeExcls(i,pids); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeBondsType:
        PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
        c = new ComputeBonds(i,pids); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeAnglesType:
        PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
        c = new ComputeAngles(i,pids); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeDihedralsType:
        PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
        c = new ComputeDihedrals(i,pids); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeImpropersType:
        PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
        c = new ComputeImpropers(i,pids); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeTholeType:
        PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
        c = new ComputeThole(i,pids); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeAnisoType:
        PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
        c = new ComputeAniso(i,pids); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeCrosstermsType:
        PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
        c = new ComputeCrossterms(i,pids); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeSelfExclsType:
        c = new ComputeSelfExcls(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeSelfBondsType:
        c = new ComputeSelfBonds(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeSelfAnglesType:
        c = new ComputeSelfAngles(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeSelfDihedralsType:
        c = new ComputeSelfDihedrals(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeSelfImpropersType:
        c = new ComputeSelfImpropers(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeSelfTholeType:
        c = new ComputeSelfThole(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeSelfAnisoType:
        c = new ComputeSelfAniso(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeSelfCrosstermsType:
        c = new ComputeSelfCrossterms(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
#ifdef DPMTA
    case computeDPMTAType:
        c = new ComputeDPMTA(i); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
#endif
#ifdef DPME
    case computeDPMEType:
        c = computeDPMEObject = new ComputeDPME(i,this); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
#endif
    case optPmeType:
        c = new OptPmeCompute(i); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computePmeType:
        c = new ComputePme(i,map->computeData[i].pids[0].pid); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeEwaldType:
        c = computeEwaldObject = new ComputeEwald(i,this); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeFullDirectType:
        c = new ComputeFullDirect(i); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeGlobalType:
        c = computeGlobalObject = new ComputeGlobal(i,this); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeStirType:
        c = new ComputeStir(i,map->computeData[i].pids[0].pid); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeExtType:
        c = new ComputeExt(i); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeGBISserType: //gbis serial
        c = new ComputeGBISser(i);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeFmmType: // FMM serial
        c = new ComputeFmmSerial(i);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeMsmSerialType: // MSM serial
        c = new ComputeMsmSerial(i);
        map->registerCompute(i,c);
        c->initialize();
        break;
#ifdef CHARM_HAS_MSA
    case computeMsmMsaType: // MSM parallel long-range part using MSA
        c = new ComputeMsmMsa(i);
        map->registerCompute(i,c);
        c->initialize();
        break;
#endif
    case computeMsmType: // MSM parallel
        c = new ComputeMsm(i);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeEFieldType:
        c = new ComputeEField(i,map->computeData[i].pids[0].pid); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
        /* BEGIN gf */
    case computeGridForceType:
        c = new ComputeGridForce(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
        /* END gf */
    case computeSphericalBCType:
        c = new ComputeSphericalBC(i,map->computeData[i].pids[0].pid); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeCylindricalBCType:
        c = new ComputeCylindricalBC(i,map->computeData[i].pids[0].pid); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeTclBCType:
        c = new ComputeTclBC(i); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeRestraintsType:
        c = new ComputeRestraints(i,map->computeData[i].pids[0].pid); // unknown delete
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeConsForceType:
        c = new ComputeConsForce(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
    case computeConsTorqueType:
        c = new ComputeConsTorque(i,map->computeData[i].pids[0].pid);
        map->registerCompute(i,c);
        c->initialize();
        break;
    default:
        NAMD_bug("Unknown compute type in ComputeMgr::createCompute().");
        break;
    }
}
Ejemplo n.º 22
0
void registerUserEventsForAllComputeObjs()
{
#ifdef TRACE_COMPUTE_OBJECTS
    ComputeMap *map = ComputeMap::Object();
    PatchMap *pmap = PatchMap::Object();     
    char user_des[50];
    int p1, p2;
    int adim, bdim, cdim;
    int t1, t2;
    int x1, y1, z1, x2, y2, z2;
    int dx, dy, dz;
    for (int i=0; i<map->numComputes(); i++)
    {
        memset(user_des, 0, 50);
        switch ( map->type(i) )
        {
        case computeNonbondedSelfType:
            sprintf(user_des, "computeNonBondedSelfType_%d_pid_%d", i, map->pid(i,0));
            break;
        case computeLCPOType:
            sprintf(user_des, "computeLCPOType_%d_pid_%d", i, map->pid(i,0));
            break;
        case computeNonbondedPairType:
            adim = pmap->gridsize_a();
            bdim = pmap->gridsize_b();
            cdim = pmap->gridsize_c();
            p1 = map->pid(i, 0);
            t1 = map->trans(i, 0);
            x1 = pmap->index_a(p1) + adim * Lattice::offset_a(t1);
            y1 = pmap->index_b(p1) + bdim * Lattice::offset_b(t1);
            z1 = pmap->index_c(p1) + cdim * Lattice::offset_c(t1);
            p2 = map->pid(i, 1);
            t2 = map->trans(i, 1);
            x2 = pmap->index_a(p2) + adim * Lattice::offset_a(t2);
            y2 = pmap->index_b(p2) + bdim * Lattice::offset_b(t2);
            z2 = pmap->index_c(p2) + cdim * Lattice::offset_c(t2);
            dx = abs(x1-x2);
            dy = abs(y1-y2);
            dz = abs(z1-z2);
            sprintf(user_des, "computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz);
            break;
        case computeExclsType:
            sprintf(user_des, "computeExclsType_%d", i);
            break;
        case computeBondsType:
            sprintf(user_des, "computeBondsType_%d", i);
            break;
        case computeAnglesType:
            sprintf(user_des, "computeAnglesType_%d", i);
            break;
        case computeDihedralsType:
            sprintf(user_des, "computeDihedralsType_%d", i);
            break;
        case computeImpropersType:
            sprintf(user_des, "computeImpropersType_%d", i);
            break;
        case computeTholeType:
            sprintf(user_des, "computeTholeType_%d", i);
            break;
        case computeAnisoType:
            sprintf(user_des, "computeAnisoType_%d", i);
            break;
        case computeCrosstermsType:
            sprintf(user_des, "computeCrosstermsType_%d", i);
            break;
        case computeSelfExclsType:
            sprintf(user_des, "computeSelfExclsType_%d", i);
            break;
        case computeSelfBondsType:
            sprintf(user_des, "computeSelfBondsType_%d", i);
            break;
        case computeSelfAnglesType:
            sprintf(user_des, "computeSelfAnglesType_%d", i);
            break;
        case computeSelfDihedralsType:
            sprintf(user_des, "computeSelfDihedralsType_%d", i);
            break;
        case computeSelfImpropersType:
            sprintf(user_des, "computeSelfImpropersType_%d", i);
            break;
        case computeSelfTholeType:
            sprintf(user_des, "computeSelfTholeType_%d", i);
            break;
        case computeSelfAnisoType:
            sprintf(user_des, "computeSelfAnisoType_%d", i);
            break;
        case computeSelfCrosstermsType:
            sprintf(user_des, "computeSelfCrosstermsType_%d", i);
            break;
#ifdef DPMTA
        case computeDPMTAType:
            sprintf(user_des, "computeDPMTAType_%d", i);
            break;
#endif
#ifdef DPME
        case computeDPMEType:
            sprintf(user_des, "computeDPMEType_%d", i);
            break;
#endif
        case computePmeType:
            sprintf(user_des, "computePMEType_%d", i);
            break;
        case computeEwaldType:
            sprintf(user_des, "computeEwaldType_%d", i);
            break;
        case computeFullDirectType:
            sprintf(user_des, "computeFullDirectType_%d", i);
            break;
        case computeGlobalType:
            sprintf(user_des, "computeGlobalType_%d", i);
            break;
        case computeStirType:
            sprintf(user_des, "computeStirType_%d", i);
            break;
        case computeExtType:
            sprintf(user_des, "computeExtType_%d", i);
            break;
        case computeEFieldType:
            sprintf(user_des, "computeEFieldType_%d", i);
            break;
            /* BEGIN gf */
        case computeGridForceType:
            sprintf(user_des, "computeGridForceType_%d", i);
            break;
            /* END gf */
        case computeSphericalBCType:
            sprintf(user_des, "computeSphericalBCType_%d", i);
            break;
        case computeCylindricalBCType:
            sprintf(user_des, "computeCylindricalBCType_%d", i);
            break;
        case computeTclBCType:
            sprintf(user_des, "computeTclBCType_%d", i);
            break;
        case computeRestraintsType:
            sprintf(user_des, "computeRestraintsType_%d", i);
            break;
        case computeConsForceType:
            sprintf(user_des, "computeConsForceType_%d", i);
            break;
        case computeConsTorqueType:
            sprintf(user_des, "computeConsTorqueType_%d", i);
            break;
        default:
            NAMD_bug("Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs().");
            break;
        }
	int user_des_len = strlen(user_des);
	char *user_des_cst = new char[user_des_len+1];
	memcpy(user_des_cst, user_des, user_des_len);
	user_des_cst[user_des_len] = 0;
	//Since the argument in traceRegisterUserEvent is supposed
	//to be a const string which will not be copied inside the
	//function when a new user event is created, user_des_cst 
	//has to be allocated in heap.
        int reEvenId = traceRegisterUserEvent(user_des_cst, TRACE_COMPOBJ_IDOFFSET+i);
	//printf("Register user event (%s) with id (%d)\n", user_des, reEvenId);
    }
#else
    return;
#endif
}
Ejemplo n.º 23
0
void LdbCoordinator::initialize(PatchMap *pMap, ComputeMap *cMap, int reinit)
{
  const SimParameters *simParams = Node::Object()->simParameters;

#if 0
  static int lbcreated = 0; // XXX static variables are unsafe for SMP
  // PE0 first time Create a load balancer
  if (CkMyPe() == 0 && !lbcreated) {
    if (simParams->ldbStrategy == LDBSTRAT_ALGNBOR) 
      CreateNamdNborLB();
    else {
      //   CreateCentralLB();
      CreateNamdCentLB();
    }
    lbcreated = 1;
  }
#endif

  //  DebugM(10,"stepsPerLdbCycle initialized\n");
  stepsPerLdbCycle = simParams->ldbPeriod;
  firstLdbStep = simParams->firstLdbStep;
  int lastLdbStep = simParams->lastLdbStep;
  int stepsPerCycle = simParams->stepsPerCycle;

  computeMap = cMap;
  patchMap = pMap;

  // Set the number of received messages correctly for node 0

  nStatsMessagesExpected = Node::Object()->numNodes();
  nStatsMessagesReceived = 0;

  if (patchNAtoms) 
    delete [] patchNAtoms;  // Depends on delete NULL to do nothing
  nPatches = patchMap->numPatches();
  patchNAtoms = new int[nPatches];

  typedef Sequencer *seqPtr;

  if ( ! reinit ) {
    delete [] sequencerThreads;  // Depends on delete NULL to do nothing
    sequencerThreads = new seqPtr[nPatches];
  }

  nLocalPatches=0;

  int i;
  for(i=0;i<nPatches;i++)
  {
    if (patchMap->node(i) == Node::Object()->myid())
    {
      nLocalPatches++;
      patchNAtoms[i]=0;
    } else {
      patchNAtoms[i]=-1;
    }
    if ( ! reinit ) sequencerThreads[i]=NULL;
  }
  if ( ! reinit ) controllerThread = NULL;
  if (nLocalPatches != patchMap->numHomePatches())
    NAMD_die("Disaggreement in patchMap data.\n");
 
  const int oldNumComputes = numComputes;
  nLocalComputes = 0;
  numComputes = computeMap->numComputes();

  for(i=0;i<numComputes;i++)  {
    if ( (computeMap->node(i) == Node::Object()->myid())
	 && ( 0
#ifndef NAMD_CUDA
	      || (computeMap->type(i) == computeNonbondedSelfType)
	      || (computeMap->type(i) == computeNonbondedPairType)
#endif
	      || (computeMap->type(i) == computeLCPOType)
	      || (computeMap->type(i) == computeSelfExclsType)
	      || (computeMap->type(i) == computeSelfBondsType)
	      || (computeMap->type(i) == computeSelfAnglesType)
	      || (computeMap->type(i) == computeSelfDihedralsType)
	      || (computeMap->type(i) == computeSelfImpropersType)
	      || (computeMap->type(i) == computeSelfTholeType)
	      || (computeMap->type(i) == computeSelfAnisoType)
	      || (computeMap->type(i) == computeSelfCrosstermsType)

                 || (computeMap->type(i) == computeBondsType)
                 || (computeMap->type(i) == computeExclsType)
                 || (computeMap->type(i) == computeAnglesType)
                 || (computeMap->type(i) == computeDihedralsType)
                 || (computeMap->type(i) == computeImpropersType)
                 || (computeMap->type(i) == computeTholeType)
                 || (computeMap->type(i) == computeAnisoType)
                 || (computeMap->type(i) == computeCrosstermsType)
	) ) {
      nLocalComputes++;
    }
  }
  
  // New LB frameworks registration

  // Allocate data structure to save incoming migrations.  Processor
  // zero will get all migrations

  // If this is the first time through, we need it register patches
  if (ldbCycleNum == reg_all_objs) {
    if ( Node::Object()->simParameters->ldBalancer == LDBAL_CENTRALIZED ) {
      reg_all_objs = 3;
    }
    // Tell the lbdb that I'm registering objects, until I'm done
    // registering them.
    theLbdb->RegisteringObjects(myHandle);
    
   if ( ldbCycleNum == 1 ) {
    patchHandles = new LDObjHandle[nLocalPatches];
    int patch_count=0;
    int i;
    for(i=0;i<nPatches;i++)
      if (patchMap->node(i) == Node::Object()->myid()) {
	LDObjid elemID;
	elemID.id[0] = i;
	elemID.id[1] = elemID.id[2] = elemID.id[3] = -2;

	if (patch_count >= nLocalPatches) {
	  iout << iFILE << iERROR << iPE 
	       << "LdbCoordinator found too many local patches!" << endi;
	  CkExit();
	}
        HomePatch *p = patchMap->homePatch(i);
        p->ldObjHandle = 
	patchHandles[patch_count] 
	  = theLbdb->RegisterObj(myHandle,elemID,0,0);
	patch_count++;

      }
   }
  
    if ( numComputes > oldNumComputes ) {
      // Register computes
      for(i=oldNumComputes; i<numComputes; i++)  {
	if ( computeMap->node(i) == Node::Object()->myid())
        {
	  if ( 0
#ifndef NAMD_CUDA
	          || (computeMap->type(i) == computeNonbondedSelfType)
	          || (computeMap->type(i) == computeNonbondedPairType)
#endif
	          || (computeMap->type(i) == computeLCPOType)
	          || (computeMap->type(i) == computeSelfExclsType)
	          || (computeMap->type(i) == computeSelfBondsType)
	          || (computeMap->type(i) == computeSelfAnglesType)
	          || (computeMap->type(i) == computeSelfDihedralsType)
	          || (computeMap->type(i) == computeSelfImpropersType)
	          || (computeMap->type(i) == computeSelfTholeType)
	          || (computeMap->type(i) == computeSelfAnisoType)
	          || (computeMap->type(i) == computeSelfCrosstermsType)
		)  {
	  // Register the object with the load balancer
	  // Store the depended patch IDs in the rest of the element ID
	  LDObjid elemID;
	  elemID.id[0] = i;
	
	  if (computeMap->numPids(i) > 2)
	    elemID.id[3] = computeMap->pid(i,2);
	  else elemID.id[3] = -1;

	  if (computeMap->numPids(i) > 1)
	    elemID.id[2] =  computeMap->pid(i,1);
	  else elemID.id[2] = -1;

	  if (computeMap->numPids(i) > 0)
	    elemID.id[1] =  computeMap->pid(i,0);
	  else elemID.id[1] = -1;

          Compute *c = computeMap->compute(i);
          if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer");

          c->ldObjHandle = theLbdb->RegisterObj(myHandle,elemID,0,1);
          }
          else if ( (computeMap->type(i) == computeBondsType)
                 || (computeMap->type(i) == computeExclsType)
                 || (computeMap->type(i) == computeAnglesType)
                 || (computeMap->type(i) == computeDihedralsType)
                 || (computeMap->type(i) == computeImpropersType)
                 || (computeMap->type(i) == computeTholeType)
                 || (computeMap->type(i) == computeAnisoType)
                 || (computeMap->type(i) == computeCrosstermsType)
               ) {
	  // Register the object with the load balancer
	  // Store the depended patch IDs in the rest of the element ID
	  LDObjid elemID;
	  elemID.id[0] = i;
	
	  elemID.id[1] = elemID.id[2] = elemID.id[3] = -3;

          Compute *c = computeMap->compute(i);
          if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer");

          c->ldObjHandle = theLbdb->RegisterObj(myHandle,elemID,0,0);
          }
	}
      }
    }
    theLbdb->DoneRegisteringObjects(myHandle);
  }

  // process saved migration messages, if any
  while ( migrateMsgs ) {
    LdbMigrateMsg *m = migrateMsgs;
    migrateMsgs = m->next;
    Compute *c = computeMap->compute(m->handle.id.id[0]);
    if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer 2");
    c->ldObjHandle = m->handle;
    delete m;
  }

  // Fixup to take care of the extra timestep at startup
  // This is pretty ugly here, but it makes the count correct
  
  // iout << "LDB Cycle Num: " << ldbCycleNum << "\n";

 if ( simParams->ldBalancer == LDBAL_CENTRALIZED ) {
  if (ldbCycleNum == 1 || ldbCycleNum == 3) {
    numStepsToRun = stepsPerCycle;
    totalStepsDone += numStepsToRun;
    takingLdbData = 0;
    theLbdb->CollectStatsOff();
  } else if (ldbCycleNum == 2 || ldbCycleNum == 4) {
    numStepsToRun = firstLdbStep - stepsPerCycle;
    while ( numStepsToRun <= 0 ) numStepsToRun += stepsPerCycle;
    totalStepsDone += numStepsToRun;
    takingLdbData = 1;
    theLbdb->CollectStatsOn();
  } else if ( (ldbCycleNum <= 6) || !takingLdbData )
  {
    totalStepsDone += firstLdbStep;
    if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
      numStepsToRun = -1;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    } else {
      numStepsToRun = firstLdbStep;
      takingLdbData = 1;
      theLbdb->CollectStatsOn();
    }
  }
  else 
  {
    totalStepsDone += stepsPerLdbCycle - firstLdbStep;
    if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
      numStepsToRun = -1;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    } else {
      numStepsToRun = stepsPerLdbCycle - firstLdbStep;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    }
  }
 } else {
  if (ldbCycleNum==1)
  {
    totalStepsDone += firstLdbStep;
    numStepsToRun = firstLdbStep;
    takingLdbData = 0;
    theLbdb->CollectStatsOff();
  }
  else if ( (ldbCycleNum <= 4) || !takingLdbData )
  {
    totalStepsDone += firstLdbStep;
    if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
      numStepsToRun = -1;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    } else {
      numStepsToRun = firstLdbStep;
      takingLdbData = 1;
      theLbdb->CollectStatsOn();
    }
  }
  else 
  {
    totalStepsDone += stepsPerLdbCycle - firstLdbStep;
    if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
      numStepsToRun = -1;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    } else {
      numStepsToRun = stepsPerLdbCycle - firstLdbStep;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    }
  }
 }

/*-----------------------------------------------------------------------------*
 * --------------------------------------------------------------------------- *
 * Comments inserted by Abhinav to clarify relation between ldbCycleNum,       *
 * load balancing step numbers (printed by the step() function) and            *
 * tracing of the steps                                                        *
 * --------------------------------------------------------------------------- *
 * If trace is turned off in the beginning, then tracing is turned on          *
 * at ldbCycleNum = 4 and turned off at ldbCycleNum = 8. ldbCycleNum can       *
 * be adjusted by specifying firstLdbStep and ldbPeriod which are set by       *
 * default to 5*stepspercycle and 200*stepspercycle if not specified.          *
 *                                                                             *
 * If we choose firstLdbStep = 20 and ldbPeriod = 100, we have the             *
 * following timeline (for these particular numbers):                          *
 *                                                                             *
 * Tracing         :  <------ off ------><------------- on -----------><-- off *
 * Ldb Step() No   :              1     2     3        4      5       6      7 *
 * Iteration Steps : 00====20====40====60====80======160====180=====260====280 *
 * ldbCycleNum     :  1     2     3     4     5        6      7       8      9 *
 * Instrumention   :          Inst  Inst  Inst           Inst            Inst  *
 * LDB Strategy    :              TLB  RLB   RLB            RLB            RLB *
 *                                                                             *
 * TLB = TorusLB                                                               *
 * RLB = RefineTorusLB                                                         *
 * Inst = Instrumentation Phase (no real load balancing)                       *
 * --------------------------------------------------------------------------- *
 *-----------------------------------------------------------------------------*
 */
#if 0 //replaced by traceBarrier at Controller and Sequencer
  if (traceAvailable()) {
    static int specialTracing = 0; // XXX static variables are unsafe for SMP
    if (ldbCycleNum == 1 && traceIsOn() == 0)  specialTracing = 1;
    if (specialTracing) {
      if (ldbCycleNum == 4) traceBegin();
      if (ldbCycleNum == 8) traceEnd();
    }
  }
#endif
  
  nPatchesReported = 0;
  nPatchesExpected = nLocalPatches;
  nComputesReported = 0;
  nComputesExpected = nLocalComputes * numStepsToRun;
  controllerReported = 0;
  controllerExpected = ! CkMyPe();

  if (CkMyPe() == 0)
  {
    if (computeArray == NULL)
      computeArray = new computeInfo[numComputes];
    if (patchArray == NULL)
      patchArray = new patchInfo[nPatches];
    if (processorArray == NULL)
      processorArray = new processorInfo[CkNumPes()];
  }
    
  theLbdb->ClearLoads();
}
Ejemplo n.º 24
0
void ComputeNonbondedUtil::select(void)
{
  if ( CkMyRank() ) return;

  // These defaults die cleanly if nothing appropriate is assigned.
  ComputeNonbondedUtil::calcPair = calc_error;
  ComputeNonbondedUtil::calcPairEnergy = calc_error;
  ComputeNonbondedUtil::calcSelf = calc_error;
  ComputeNonbondedUtil::calcSelfEnergy = calc_error;
  ComputeNonbondedUtil::calcFullPair = calc_error;
  ComputeNonbondedUtil::calcFullPairEnergy = calc_error;
  ComputeNonbondedUtil::calcFullSelf = calc_error;
  ComputeNonbondedUtil::calcFullSelfEnergy = calc_error;
  ComputeNonbondedUtil::calcMergePair = calc_error;
  ComputeNonbondedUtil::calcMergePairEnergy = calc_error;
  ComputeNonbondedUtil::calcMergeSelf = calc_error;
  ComputeNonbondedUtil::calcMergeSelfEnergy = calc_error;
  ComputeNonbondedUtil::calcSlowPair = calc_error;
  ComputeNonbondedUtil::calcSlowPairEnergy = calc_error;
  ComputeNonbondedUtil::calcSlowSelf = calc_error;
  ComputeNonbondedUtil::calcSlowSelfEnergy = calc_error;

  SimParameters * simParams = Node::Object()->simParameters;
  Parameters * params = Node::Object()->parameters;

  table_ener = params->table_ener;
  rowsize = params->rowsize;
  columnsize = params->columnsize;

  commOnly = simParams->commOnly;
  fixedAtomsOn = ( simParams->fixedAtomsOn && ! simParams->fixedAtomsForces );

  cutoff = simParams->cutoff;
  cutoff2 = cutoff*cutoff;

//fepb
  alchFepOn = simParams->alchFepOn;
  Fep_WCA_repuOn = simParams->alchFepWCARepuOn;
  Fep_WCA_dispOn = simParams->alchFepWCADispOn;
  alchThermIntOn = simParams->alchThermIntOn;
  alchLambda = alchLambda2 = 0;
  lesOn = simParams->lesOn;
  lesScaling = lesFactor = 0;
  Bool tabulatedEnergies = simParams->tabulatedEnergies;
  alchVdwShiftCoeff = simParams->alchVdwShiftCoeff;
  WCA_rcut1 = simParams->alchFepWCArcut1;
  WCA_rcut2 = simParams->alchFepWCArcut2;
  alchVdwLambdaEnd = simParams->alchVdwLambdaEnd;
  alchElecLambdaStart = simParams->alchElecLambdaStart;

  alchDecouple = simParams->alchDecouple;

  delete [] lambda_table;
  lambda_table = 0;

  pairInteractionOn = simParams->pairInteractionOn;
  pairInteractionSelf = simParams->pairInteractionSelf;
  pressureProfileOn = simParams->pressureProfileOn;

  // Ported by JLai -- Original JE - Go
  goForcesOn = simParams->goForcesOn;
  goMethod = simParams->goMethod; 
  // End of port

  accelMDOn = simParams->accelMDOn;

  drudeNbthole = simParams->drudeOn && (simParams->drudeNbtholeCut > 0.0);

  if ( drudeNbthole ) {
#ifdef NAMD_CUDA
    NAMD_die("drudeNbthole is not supported in CUDA version");
#endif
    if ( alchFepOn )
      NAMD_die("drudeNbthole is not supported with alchemical free-energy perturbation");
    if ( alchThermIntOn )
      NAMD_die("drudeNbthole is not supported with alchemical thermodynamic integration");
    if ( lesOn )
      NAMD_die("drudeNbthole is not supported with locally enhanced sampling");
    if ( pairInteractionOn )
      NAMD_die("drudeNbthole is not supported with pair interaction calculation");
    if ( pressureProfileOn )
      NAMD_die("drudeNbthole is not supported with pressure profile calculation");
  }

  if ( alchFepOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Alchemical free-energy perturbation is not supported in CUDA version");
#endif
    alchLambda = simParams->alchLambda;
    alchLambda2 = simParams->alchLambda2;
    ComputeNonbondedUtil::calcPair = calc_pair_energy_fep;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_fep;
    ComputeNonbondedUtil::calcSelf = calc_self_energy_fep;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_fep;
    ComputeNonbondedUtil::calcFullPair = calc_pair_energy_fullelect_fep;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_fep;
    ComputeNonbondedUtil::calcFullSelf = calc_self_energy_fullelect_fep;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_fep;
    ComputeNonbondedUtil::calcMergePair = calc_pair_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_energy_slow_fullelect_fep;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_fep;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_energy_slow_fullelect_fep;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_fep;
  }  else if ( alchThermIntOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Alchemical thermodynamic integration is not supported in CUDA version");
#endif
    alchLambda = simParams->alchLambda;
    ComputeNonbondedUtil::calcPair = calc_pair_ti;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_ti;
    ComputeNonbondedUtil::calcSelf = calc_self_ti;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_ti;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_ti;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_ti;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_ti;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_ti;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_ti;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_ti;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_ti;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_ti;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_ti;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_ti;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_ti;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_ti;
  } else if ( lesOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Locally enhanced sampling is not supported in CUDA version");
#endif
    lesFactor = simParams->lesFactor;
    lesScaling = 1.0 / (double)lesFactor;
    lambda_table = new BigReal[(lesFactor+1)*(lesFactor+1)];
    for ( int ip=0; ip<=lesFactor; ++ip ) {
      for ( int jp=0; jp<=lesFactor; ++jp ) {
        BigReal lambda_pair = 1.0;
        if (ip || jp ) {
          if (ip && jp && ip != jp) {
            lambda_pair = 0.0;
          } else {
            lambda_pair = lesScaling;
          }
        }
        lambda_table[(lesFactor+1)*ip+jp] = lambda_pair;
      }
    }
    ComputeNonbondedUtil::calcPair = calc_pair_les;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_les;
    ComputeNonbondedUtil::calcSelf = calc_self_les;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_les;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_les;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_les;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_les;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_les;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_les;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_les;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_les;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_les;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_les;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_les;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_les;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_les;
  } else if ( pressureProfileOn) {
#ifdef NAMD_CUDA
    NAMD_die("Pressure profile calculation is not supported in CUDA version");
#endif
    pressureProfileSlabs = simParams->pressureProfileSlabs;
    pressureProfileAtomTypes = simParams->pressureProfileAtomTypes;

    ComputeNonbondedUtil::calcPair = calc_pair_pprof;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_pprof;
    ComputeNonbondedUtil::calcSelf = calc_self_pprof;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_pprof;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_pprof;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_pprof;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_pprof;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_pprof;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_pprof;
  } else if ( pairInteractionOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Pair interaction calculation is not supported in CUDA version");
#endif
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_int;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_int;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_int;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_int;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_int;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_int;
  } else if ( tabulatedEnergies ) {
#ifdef NAMD_CUDA
    NAMD_die("Tabulated energies is not supported in CUDA version");
#endif
    ComputeNonbondedUtil::calcPair = calc_pair_tabener;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_tabener;
    ComputeNonbondedUtil::calcSelf = calc_self_tabener;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_tabener;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_tabener;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_tabener;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_tabener;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_tabener;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_tabener;
  } else if ( goForcesOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Go forces is not supported in CUDA version");
#endif
    ComputeNonbondedUtil::calcPair = calc_pair_go;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_go;
    ComputeNonbondedUtil::calcSelf = calc_self_go;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_go;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_go;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_go;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_go;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_go;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_go;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_go;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_go;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_go;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_go;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_go;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_go;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_go;
  } else {
    ComputeNonbondedUtil::calcPair = calc_pair;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy;
    ComputeNonbondedUtil::calcSelf = calc_self;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect;
  }

//fepe

  dielectric_1 = 1.0/simParams->dielectric;
  if ( ! ljTable ) ljTable = new LJTable;
  mol = Node::Object()->molecule;
  scaling = simParams->nonbondedScaling;
  if ( simParams->exclude == SCALED14 )
  {
    scale14 = simParams->scale14;
  }
  else
  {
    scale14 = 1.;
  }
  if ( simParams->switchingActive )
  {
    switchOn = simParams->switchingDist;
    switchOn_1 = 1.0/switchOn;
    // d0 = 1.0/(cutoff-switchOn);
    switchOn2 = switchOn*switchOn;
    c0 = 1.0/(cutoff2-switchOn2);

    if ( simParams->vdwForceSwitching ) {
      double switchOn3 = switchOn * switchOn2;
      double cutoff3 = cutoff * cutoff2;
      double switchOn6 = switchOn3 * switchOn3;
      double cutoff6 = cutoff3 * cutoff3;
      v_vdwa = -1. / ( switchOn6 * cutoff6 );
      v_vdwb = -1. / ( switchOn3 * cutoff3 );
      k_vdwa = cutoff6 / ( cutoff6 - switchOn6 );
      k_vdwb = cutoff3 / ( cutoff3 - switchOn3 );
      cutoff_3 = 1. / cutoff3;
      cutoff_6 = 1. / cutoff6;
    }
  }
  else
  {
    switchOn = cutoff;
    switchOn_1 = 1.0/switchOn;
    // d0 = 0.;  // avoid division by zero
    switchOn2 = switchOn*switchOn;
    c0 = 0.;  // avoid division by zero
  }
  c1 = c0*c0*c0;
  c3 = 3.0 * (cutoff2 - switchOn2);
  c5 = 0;
  c6 = 0;
  c7 = 0;
  c8 = 0;

  const int PMEOn = simParams->PMEOn;
  const int MSMOn = simParams->MSMOn;
  const int MSMSplit = simParams->MSMSplit;

  if ( PMEOn ) {
    ewaldcof = simParams->PMEEwaldCoefficient;
    BigReal TwoBySqrtPi = 1.12837916709551;
    pi_ewaldcof = TwoBySqrtPi * ewaldcof;
  }

  int splitType = SPLIT_NONE;
  if ( simParams->switchingActive ) splitType = SPLIT_SHIFT;
  if ( simParams->martiniSwitching ) splitType = SPLIT_MARTINI;
  if ( simParams->fullDirectOn || simParams->FMAOn || PMEOn || MSMOn ) {
    switch ( simParams->longSplitting ) {
      case C2:
      splitType = SPLIT_C2;
      break;

      case C1:
      splitType = SPLIT_C1;
      break;

      case XPLOR:
      NAMD_die("Sorry, XPLOR splitting not supported.");
      break;

      case SHARP:
      NAMD_die("Sorry, SHARP splitting not supported.");
      break;

      default:
      NAMD_die("Unknown splitting type found!");

    }
  }

  BigReal r2_tol = 0.1;
  
  r2_delta = 1.0;
  r2_delta_exp = 0;
  while ( r2_delta > r2_tol ) { r2_delta /= 2.0; r2_delta_exp += 1; }
  r2_delta_1 = 1.0 / r2_delta;

  if ( ! CkMyPe() ) {
    iout << iINFO << "NONBONDED TABLE R-SQUARED SPACING: " <<
				r2_delta << "\n" << endi;
  }

  BigReal r2_tmp = 1.0;
  int cutoff2_exp = 0;
  while ( (cutoff2 + r2_delta) > r2_tmp ) { r2_tmp *= 2.0; cutoff2_exp += 1; }

  int i;
  int n = (r2_delta_exp + cutoff2_exp) * 64 + 1;

  if ( ! CkMyPe() ) {
    iout << iINFO << "NONBONDED TABLE SIZE: " <<
				n << " POINTS\n" << endi;
  }

  if ( table_alloc ) delete [] table_alloc;
  table_alloc = new BigReal[61*n+16];
  BigReal *table_align = table_alloc;
  while ( ((long)table_align) % 128 ) ++table_align;
  table_noshort = table_align;
  table_short = table_align + 16*n;
  slow_table = table_align + 32*n;
  fast_table = table_align + 36*n;
  scor_table = table_align + 40*n;
  corr_table = table_align + 44*n;
  full_table = table_align + 48*n;
  vdwa_table = table_align + 52*n;
  vdwb_table = table_align + 56*n;
  r2_table = table_align + 60*n;
  BigReal *fast_i = fast_table + 4;
  BigReal *scor_i = scor_table + 4;
  BigReal *slow_i = slow_table + 4;
  BigReal *vdwa_i = vdwa_table + 4;
  BigReal *vdwb_i = vdwb_table + 4;
  BigReal *r2_i = r2_table;  *(r2_i++) = r2_delta;
  BigReal r2_limit = simParams->limitDist * simParams->limitDist;
  if ( r2_limit < r2_delta ) r2_limit = r2_delta;
  int r2_delta_i = 0;  // entry for r2 == r2_delta

  // fill in the table, fix up i==0 (r2==0) below
  for ( i=1; i<n; ++i ) {

    const BigReal r2_base = r2_delta * ( 1 << (i/64) );
    const BigReal r2_del = r2_base / 64.0;
    const BigReal r2 = r2_base - r2_delta + r2_del * (i%64);

    if ( r2 <= r2_limit ) r2_delta_i = i;

    const BigReal r = sqrt(r2);
    const BigReal r_1 = 1.0/r;
    const BigReal r_2 = 1.0/r2;

    // fast_ is defined as (full_ - slow_)
    // corr_ and fast_ are both zero at the cutoff, full_ is not
    // all three are approx 1/r at short distances

    // for actual interpolation, we use fast_ for fast forces and
    // scor_ = slow_ + corr_ - full_ and slow_ for slow forces
    // since these last two are of small magnitude

    BigReal fast_energy, fast_gradient;
    BigReal scor_energy, scor_gradient;
    BigReal slow_energy, slow_gradient;

    // corr_ is PME direct sum, or similar correction term
    // corr_energy is multiplied by r until later
    // corr_gradient is multiplied by -r^2 until later
    BigReal corr_energy, corr_gradient;

    
    if ( PMEOn ) {
      BigReal tmp_a = r * ewaldcof;
      BigReal tmp_b = erfc(tmp_a);
      corr_energy = tmp_b;
      corr_gradient = pi_ewaldcof*exp(-(tmp_a*tmp_a))*r + tmp_b;
    } else if ( MSMOn ) {
      BigReal a_1 = 1.0/cutoff;
      BigReal r_a = r * a_1;
      BigReal g, dg;
      SPOLY(&g, &dg, r_a, MSMSplit);
      corr_energy = 1 - r_a * g;
      corr_gradient = 1 + r_a*r_a * dg;
    } else {
      corr_energy = corr_gradient = 0;
    }

    switch(splitType) {
      case SPLIT_NONE:
        fast_energy = 1.0/r;
        fast_gradient = -1.0/r2;
        scor_energy = scor_gradient = 0;
        slow_energy = slow_gradient = 0;
	break;
      case SPLIT_SHIFT: {
	BigReal shiftVal = r2/cutoff2 - 1.0;
	shiftVal *= shiftVal;
	BigReal dShiftVal = 2.0 * (r2/cutoff2 - 1.0) * 2.0*r/cutoff2;
        fast_energy = shiftVal/r;
        fast_gradient = dShiftVal/r - shiftVal/r2;
        scor_energy = scor_gradient = 0;
        slow_energy = slow_gradient = 0;
        } 
	break;
      case SPLIT_MARTINI: { 
        // in Martini, the Coulomb switching distance is zero
        const BigReal COUL_SWITCH = 0.;
        // Gromacs shifting function
        const BigReal p1 = 1.;
        BigReal A1 = p1 * ((p1+1)*COUL_SWITCH-(p1+4)*cutoff)/(pow(cutoff,p1+2)*pow(cutoff-COUL_SWITCH,2));
        BigReal B1 = -p1 * ((p1+1)*COUL_SWITCH-(p1+3)*cutoff)/(pow(cutoff,p1+2)*pow(cutoff-COUL_SWITCH,3));
        BigReal X1 = 1.0/pow(cutoff,p1)-A1/3.0*pow(cutoff-COUL_SWITCH,3)-B1/4.0*pow(cutoff-COUL_SWITCH,4);
        BigReal r12 = (r-COUL_SWITCH)*(r-COUL_SWITCH);
        BigReal r13 = (r-COUL_SWITCH)*(r-COUL_SWITCH)*(r-COUL_SWITCH);
        BigReal shiftVal = -(A1/3.0)*r13 - (B1/4.0)*r12*r12 - X1;
        BigReal dShiftVal = -A1*r12 - B1*r13;
        fast_energy = (1/r) + shiftVal;
        fast_gradient = -1/(r2) + dShiftVal;
        scor_energy = scor_gradient = 0;
        slow_energy = slow_gradient = 0;
        } 
	break;
      case SPLIT_C1:
	// calculate actual energy and gradient
	slow_energy = 0.5/cutoff * (3.0 - (r2/cutoff2));
	slow_gradient = -1.0/cutoff2 * (r/cutoff);
	// calculate scor from slow and corr
	scor_energy = slow_energy + (corr_energy - 1.0)/r;
	scor_gradient = slow_gradient - (corr_gradient - 1.0)/r2;
	// calculate fast from slow
	fast_energy = 1.0/r - slow_energy;
	fast_gradient = -1.0/r2 - slow_gradient;
	break;
      case SPLIT_C2:
        //
        // Quintic splitting function contributed by
        // Bruce Berne, Ruhong Zhou, and Joe Morrone
        //
	// calculate actual energy and gradient
        slow_energy = r2/(cutoff*cutoff2) * (6.0 * (r2/cutoff2)
            - 15.0*(r/cutoff) + 10.0);
        slow_gradient = r/(cutoff*cutoff2) * (24.0 * (r2/cutoff2)
            - 45.0 *(r/cutoff) + 20.0);
	// calculate scor from slow and corr
        scor_energy = slow_energy + (corr_energy - 1.0)/r;
        scor_gradient = slow_gradient - (corr_gradient - 1.0)/r2;
	// calculate fast from slow
	fast_energy = 1.0/r - slow_energy;
	fast_gradient = -1.0/r2 - slow_gradient;
	break;
    }

    // foo_gradient is calculated as ( d foo_energy / d r )
    // and now divided by 2r to get ( d foo_energy / d r2 )

    fast_gradient *= 0.5 * r_1;
    scor_gradient *= 0.5 * r_1;
    slow_gradient *= 0.5 * r_1;

    // let modf be 1 if excluded, 1-scale14 if modified, 0 otherwise,
    // add scor_ - modf * slow_ to slow terms and
    // add fast_ - modf * fast_ to fast terms.

    BigReal vdwa_energy, vdwa_gradient;
    BigReal vdwb_energy, vdwb_gradient;

    const BigReal r_6 = r_2*r_2*r_2;
    const BigReal r_12 = r_6*r_6;

    // Lennard-Jones switching function
  if ( simParams->vdwForceSwitching ) {  // switch force
    // from Steinbach & Brooks, JCC 15, pgs 667-683, 1994, eqns 10-13
    if ( r2 > switchOn2 ) {
      BigReal tmpa = r_6 - cutoff_6;
      vdwa_energy = k_vdwa * tmpa * tmpa;
      BigReal tmpb = r_1 * r_2 - cutoff_3;
      vdwb_energy = k_vdwb * tmpb * tmpb;
      vdwa_gradient = -6.0 * k_vdwa * tmpa * r_2 * r_6;
      vdwb_gradient = -3.0 * k_vdwb * tmpb * r_2 * r_2 * r_1;
    } else {
      vdwa_energy = r_12 + v_vdwa;
      vdwb_energy = r_6 + v_vdwb;
      vdwa_gradient = -6.0 * r_2 * r_12;
      vdwb_gradient = -3.0 * r_2 * r_6;
    }
  } else if ( simParams->martiniSwitching ) { // switching fxn for Martini RBCG

    BigReal r12 = (r-switchOn)*(r-switchOn);        BigReal r13 = (r-switchOn)*(r-switchOn)*(r-switchOn);

    BigReal p6 = 6;
    BigReal A6 = p6 * ((p6+1)*switchOn-(p6+4)*cutoff)/(pow(cutoff,p6+2)*pow(cutoff-switchOn,2));
    BigReal B6 = -p6 * ((p6+1)*switchOn-(p6+3)*cutoff)/(pow(cutoff,p6+2)*pow(cutoff-switchOn,3));        
    BigReal C6 = 1.0/pow(cutoff,p6)-A6/3.0*pow(cutoff-switchOn,3)-B6/4.0*pow(cutoff-switchOn,4);

    BigReal p12 = 12;
    BigReal A12 = p12 * ((p12+1)*switchOn-(p12+4)*cutoff)/(pow(cutoff,p12+2)*pow(cutoff-switchOn,2));
    BigReal B12 = -p12 * ((p12+1)*switchOn-(p12+3)*cutoff)/(pow(cutoff,p12+2)*pow(cutoff-switchOn,3));
    BigReal C12 = 1.0/pow(cutoff,p12)-A12/3.0*pow(cutoff-switchOn,3)-B12/4.0*pow(cutoff-switchOn,4);

    BigReal LJshifttempA = -(A12/3)*r13 - (B12/4)*r12*r12 - C12;
    BigReal LJshifttempB = -(A6/3)*r13 - (B6/4)*r12*r12 - C6;
    const BigReal shiftValA =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJshifttempA : -C12);
    const BigReal shiftValB =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJshifttempB : -C6);

    BigReal LJdshifttempA = -A12*r12 - B12*r13;
    BigReal LJdshifttempB = -A6*r12 - B6*r13;
    const BigReal dshiftValA =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJdshifttempA*0.5*r_1 : 0 );
    const BigReal dshiftValB =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJdshifttempB*0.5*r_1 : 0 );




    //have not addressed r > cutoff

    //  dshiftValA*= 0.5*r_1;
    //  dshiftValB*= 0.5*r_1;

    vdwa_energy = r_12 + shiftValA;
    vdwb_energy = r_6 + shiftValB;
   
    vdwa_gradient = -6/pow(r,14) + dshiftValA ;
    vdwb_gradient = -3/pow(r,8) + dshiftValB;

  } else {  // switch energy
    const BigReal c2 = cutoff2-r2;
    const BigReal c4 = c2*(c3-2.0*c2);
    const BigReal switchVal =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? c2*c4*c1 : 1.0 );
    const BigReal dSwitchVal =        // d switchVal / d r2
                        ( r2 > switchOn2 ? 2*c1*(c2*c2-c4) : 0.0 );

    vdwa_energy = switchVal * r_12;
    vdwb_energy = switchVal * r_6;

    vdwa_gradient = ( dSwitchVal - 6.0 * switchVal * r_2 ) * r_12;
    vdwb_gradient = ( dSwitchVal - 3.0 * switchVal * r_2 ) * r_6;
  }


    *(fast_i++) = fast_energy;
    *(fast_i++) = fast_gradient;
    *(fast_i++) = 0;
    *(fast_i++) = 0;
    *(scor_i++) = scor_energy;
    *(scor_i++) = scor_gradient;
    *(scor_i++) = 0;
    *(scor_i++) = 0;
    *(slow_i++) = slow_energy;
    *(slow_i++) = slow_gradient;
    *(slow_i++) = 0;
    *(slow_i++) = 0;
    *(vdwa_i++) = vdwa_energy;
    *(vdwa_i++) = vdwa_gradient;
    *(vdwa_i++) = 0;
    *(vdwa_i++) = 0;
    *(vdwb_i++) = vdwb_energy;
    *(vdwb_i++) = vdwb_gradient;
    *(vdwb_i++) = 0;
    *(vdwb_i++) = 0;
    *(r2_i++) = r2 + r2_delta;

  }

  if ( ! r2_delta_i ) {
    NAMD_bug("Failed to find table entry for r2 == r2_limit\n");
  }
  if ( r2_table[r2_delta_i] > r2_limit + r2_delta ) {
    NAMD_bug("Found bad table entry for r2 == r2_limit\n");
  }

  int j;
  const char *table_name = "XXXX";
  int smooth_short = 0;
  for ( j=0; j<5; ++j ) {
    BigReal *t0 = 0;
    switch (j) {
      case 0: 
        t0 = fast_table;
        table_name = "FAST";
        smooth_short = 1;
      break;
      case 1: 
        t0 = scor_table;
        table_name = "SCOR";
        smooth_short = 0;
      break;
      case 2: 
        t0 = slow_table;
        table_name = "SLOW";
        smooth_short = 0;
      break;
      case 3: 
        t0 = vdwa_table;
        table_name = "VDWA";
        smooth_short = 1;
      break;
      case 4: 
        t0 = vdwb_table;
        table_name = "VDWB";
        smooth_short = 1;
      break;
    }
    // patch up data for i=0
    t0[0] = t0[4] - t0[5] * ( r2_delta / 64.0 );  // energy
    t0[1] = t0[5];  // gradient
    t0[2] = 0;
    t0[3] = 0;
    if ( smooth_short ) {
      BigReal energy0 = t0[4*r2_delta_i];
      BigReal gradient0 = t0[4*r2_delta_i+1];
      BigReal r20 = r2_table[r2_delta_i];
      t0[0] = energy0 - gradient0 * (r20 - r2_table[0]);  // energy
      t0[1] = gradient0;  // gradient
    }
    BigReal *t;
    for ( i=0,t=t0; i<(n-1); ++i,t+=4 ) {
      BigReal x = ( r2_delta * ( 1 << (i/64) ) ) / 64.0;
      if ( r2_table[i+1] != r2_table[i] + x ) {
        NAMD_bug("Bad table delta calculation.\n");
      }
      if ( smooth_short && i+1 < r2_delta_i ) {
        BigReal energy0 = t0[4*r2_delta_i];
        BigReal gradient0 = t0[4*r2_delta_i+1];
        BigReal r20 = r2_table[r2_delta_i];
        t[4] = energy0 - gradient0 * (r20 - r2_table[i+1]);  // energy
        t[5] = gradient0;  // gradient
      }
      BigReal v1 = t[0];
      BigReal g1 = t[1];
      BigReal v2 = t[4];
      BigReal g2 = t[5];
      // explicit formulas for v1 + g1 x + c x^2 + d x^3
      BigReal c = ( 3.0 * (v2 - v1) - x * (2.0 * g1 + g2) ) / ( x * x );
      BigReal d = ( -2.0 * (v2 - v1) + x * (g1 + g2) ) / ( x * x * x );
      // since v2 - v1 is imprecise, we refine c and d numerically
      // important because we need accurate forces (more than energies!)
      for ( int k=0; k < 2; ++k ) {
        BigReal dv = (v1 - v2) + ( ( d * x + c ) * x + g1 ) * x;
        BigReal dg = (g1 - g2) + ( 3.0 * d * x + 2.0 * c ) * x;
        c -= ( 3.0 * dv - x * dg ) / ( x * x );
        d -= ( -2.0 * dv + x * dg ) / ( x * x * x );
      }
      // store in the array;
      t[2] = c;  t[3] = d;
    }

    if ( ! CkMyPe() ) {
    BigReal dvmax = 0;
    BigReal dgmax = 0;
    BigReal dvmax_r = 0;
    BigReal dgmax_r = 0;
    BigReal fdvmax = 0;
    BigReal fdgmax = 0;
    BigReal fdvmax_r = 0;
    BigReal fdgmax_r = 0;
    BigReal dgcdamax = 0;
    BigReal dgcdimax = 0;
    BigReal dgcaimax = 0;
    BigReal dgcdamax_r = 0;
    BigReal dgcdimax_r = 0;
    BigReal dgcaimax_r = 0;
    BigReal fdgcdamax = 0;
    BigReal fdgcdimax = 0;
    BigReal fdgcaimax = 0;
    BigReal fdgcdamax_r = 0;
    BigReal fdgcdimax_r = 0;
    BigReal fdgcaimax_r = 0;
    BigReal gcm = fabs(t0[1]);  // gradient magnitude running average
    for ( i=0,t=t0; i<(n-1); ++i,t+=4 ) {
      const BigReal r2_base = r2_delta * ( 1 << (i/64) );
      const BigReal r2_del = r2_base / 64.0;
      const BigReal r2 = r2_base - r2_delta + r2_del * (i%64);
      const BigReal r = sqrt(r2);
      if ( r > cutoff ) break;
      BigReal x = r2_del;
      BigReal dv = ( ( t[3] * x + t[2] ) * x + t[1] ) * x + t[0] - t[4];
      BigReal dg = ( 3.0 * t[3] * x + 2.0 * t[2] ) * x + t[1] - t[5];
      if ( t[4] != 0. && fabs(dv/t[4]) > fdvmax ) {
        fdvmax = fabs(dv/t[4]); fdvmax_r = r;
      }
      if ( fabs(dv) > dvmax ) {
        dvmax = fabs(dv); dvmax_r = r;
      }
      if ( t[5] != 0. && fabs(dg/t[5]) > fdgmax ) {
        fdgmax = fabs(dg/t[5]); fdgmax_r = r;
      }
      if ( fabs(dg) > dgmax ) {
        dgmax = fabs(dg); dgmax_r = r;
      }
      BigReal gcd = (t[4] - t[0]) / x;  // centered difference gradient
      BigReal gcd_prec = (fabs(t[0]) + fabs(t[4])) * 1.e-15 / x;  // roundoff
      gcm = 0.9 * gcm + 0.1 * fabs(t[5]);  // magnitude running average
      BigReal gca = 0.5  * (t[1] + t[5]);  // centered average gradient
      BigReal gci = ( 0.75 * t[3] * x + t[2] ) * x + t[1];  // interpolated
      BigReal rc = sqrt(r2 + 0.5 * x);
      BigReal dgcda = gcd - gca;
      if ( dgcda != 0. && fabs(dgcda) < gcd_prec ) {
        // CkPrintf("ERROR %g < PREC %g AT %g AVG VAL %g\n", dgcda, gcd_prec, rc, gca);
        dgcda = 0.;
      }
      BigReal dgcdi = gcd - gci;
      if ( dgcdi != 0. && fabs(dgcdi) < gcd_prec ) {
        // CkPrintf("ERROR %g < PREC %g AT %g INT VAL %g\n", dgcdi, gcd_prec, rc, gci);
        dgcdi = 0.;
      }
      BigReal dgcai = gca - gci;
      if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcda/gcm) > fdgcdamax ) {
        fdgcdamax = fabs(dgcda/gcm); fdgcdamax_r = rc;
      }
      if ( fabs(dgcda) > fdgcdamax ) {
        dgcdamax = fabs(dgcda); dgcdamax_r = rc;
      }
      if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcdi/gcm) > fdgcdimax ) {
        fdgcdimax = fabs(dgcdi/gcm); fdgcdimax_r = rc;
      }
      if ( fabs(dgcdi) > fdgcdimax ) {
        dgcdimax = fabs(dgcdi); dgcdimax_r = rc;
      }
      if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcai/gcm) > fdgcaimax ) {
        fdgcaimax = fabs(dgcai/gcm); fdgcaimax_r = rc;
      }
      if ( fabs(dgcai) > fdgcaimax ) {
        dgcaimax = fabs(dgcai); dgcaimax_r = rc;
      }
#if 0
      CkPrintf("TABLE %s %g %g %g %g\n",table_name,rc,dgcda/gcm,dgcda,gci);
      if (dv != 0.) CkPrintf("TABLE %d ENERGY ERROR %g AT %g (%d)\n",j,dv,r,i);
      if (dg != 0.) CkPrintf("TABLE %d FORCE ERROR %g AT %g (%d)\n",j,dg,r,i);
#endif
    }
    if ( dvmax != 0.0 ) {
      iout << iINFO << "ABSOLUTE IMPRECISION IN " << table_name <<
        " TABLE ENERGY: " << dvmax << " AT " << dvmax_r << "\n" << endi;
    }
    if ( fdvmax != 0.0 ) {
      iout << iINFO << "RELATIVE IMPRECISION IN " << table_name <<
        " TABLE ENERGY: " << fdvmax << " AT " << fdvmax_r << "\n" << endi;
    }
    if ( dgmax != 0.0 ) {
      iout << iINFO << "ABSOLUTE IMPRECISION IN " << table_name <<
        " TABLE FORCE: " << dgmax << " AT " << dgmax_r << "\n" << endi;
    }
    if ( fdgmax != 0.0 ) {
      iout << iINFO << "RELATIVE IMPRECISION IN " << table_name <<
        " TABLE FORCE: " << fdgmax << " AT " << fdgmax_r << "\n" << endi;
    }
    if (fdgcdamax != 0.0 ) {
      iout << iINFO << "INCONSISTENCY IN " << table_name <<
        " TABLE ENERGY VS FORCE: " << fdgcdamax << " AT " << fdgcdamax_r << "\n" << endi;
      if ( fdgcdamax > 0.1 ) {
        iout << iERROR << "\n";
        iout << iERROR << "CALCULATED " << table_name <<
          " FORCE MAY NOT MATCH ENERGY! POSSIBLE BUG!\n";
        iout << iERROR << "\n";
      }
    }
    if (0 && fdgcdimax != 0.0 ) {
      iout << iINFO << "INCONSISTENCY IN " << table_name <<
        " TABLE ENERGY VS FORCE: " << fdgcdimax << " AT " << fdgcdimax_r << "\n" << endi;
    }
    if ( 0 && fdgcaimax != 0.0 ) {
      iout << iINFO << "INCONSISTENCY IN " << table_name <<
        " TABLE AVG VS INT FORCE: " << fdgcaimax << " AT " << fdgcaimax_r << "\n" << endi;
    }
    }

  }

  for ( i=0; i<4*n; ++i ) {
    corr_table[i] = fast_table[i] + scor_table[i];
    full_table[i] = fast_table[i] + slow_table[i];
  }

#if 0  
  for ( i=0; i<n; ++i ) {
   for ( int j=0; j<4; ++j ) {
    table_short[16*i+6-2*j] = table_noshort[16*i+6-2*j] = vdwa_table[4*i+j];
    table_short[16*i+7-2*j] = table_noshort[16*i+7-2*j] = vdwb_table[4*i+j];
    table_short[16*i+8+3-j] = fast_table[4*i+j];
    table_short[16*i+12+3-j] = scor_table[4*i+j];
    table_noshort[16*i+8+3-j] = corr_table[4*i+j];
    table_noshort[16*i+12+3-j] = full_table[4*i+j];
   }
  }
#endif 

  for ( i=0; i<n; ++i ) {
    table_short[16*i+ 0] = table_noshort[16*i+0] = -6.*vdwa_table[4*i+3];
    table_short[16*i+ 2] = table_noshort[16*i+2] = -6.*vdwb_table[4*i+3];
    table_short[16*i+ 4] = table_noshort[16*i+4] = -2.*vdwa_table[4*i+1];
    table_short[16*i+ 6] = table_noshort[16*i+6] = -2.*vdwb_table[4*i+1];
    
    table_short[16*i+1] = table_noshort[16*i+1] = -4.*vdwa_table[4*i+2];
    table_short[16*i+3] = table_noshort[16*i+3] = -4.*vdwb_table[4*i+2];
    table_short[16*i+5] = table_noshort[16*i+5] = -1.*vdwa_table[4*i+0];
    table_short[16*i+7] = table_noshort[16*i+7] = -1.*vdwb_table[4*i+0];
    
    table_short[16*i+8]  = -6.*fast_table[4*i+3];
    table_short[16*i+9]  = -4.*fast_table[4*i+2];
    table_short[16*i+10] = -2.*fast_table[4*i+1];
    table_short[16*i+11] = -1.*fast_table[4*i+0];

    table_noshort[16*i+8]  = -6.*corr_table[4*i+3];
    table_noshort[16*i+9]  = -4.*corr_table[4*i+2];
    table_noshort[16*i+10] = -2.*corr_table[4*i+1];
    table_noshort[16*i+11] = -1.*corr_table[4*i+0];

    table_short[16*i+12] = -6.*scor_table[4*i+3];
    table_short[16*i+13] = -4.*scor_table[4*i+2];
    table_short[16*i+14] = -2.*scor_table[4*i+1];
    table_short[16*i+15] = -1.*scor_table[4*i+0];

    table_noshort[16*i+12] = -6.*full_table[4*i+3];
    table_noshort[16*i+13] = -4.*full_table[4*i+2];
    table_noshort[16*i+14] = -2.*full_table[4*i+1];
    table_noshort[16*i+15] = -1.*full_table[4*i+0];
  }

#if 0
  char fname[100];
  sprintf(fname,"/tmp/namd.table.pe%d.dat",CkMyPe());
  FILE *f = fopen(fname,"w");
  for ( i=0; i<(n-1); ++i ) {
    const BigReal r2_base = r2_delta * ( 1 << (i/64) );
    const BigReal r2_del = r2_base / 64.0;
    const BigReal r2 = r2_base - r2_delta + r2_del * (i%64);
    BigReal *t;
    if ( r2 + r2_delta != r2_table[i] ) fprintf(f,"r2 error! ");
    fprintf(f,"%g",r2);
    t = fast_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = scor_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = slow_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = corr_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = full_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = vdwa_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = vdwb_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    fprintf(f,"\n");
  }
  fclose(f);
#endif

#ifdef NAMD_CUDA
  send_build_cuda_force_table();
#endif

}
Ejemplo n.º 25
0
void ComputeNonbondedUtil::calc_error(nonbonded *) {
  NAMD_bug("Tried to call missing nonbonded compute routine.");
}
Ejemplo n.º 26
0
//every doMigration
void ProxyPatch::receiveAll(ProxyDataMsg *msg)
{
  DebugM(3, "receiveAll(" << patchID << ")\n");

  if ( boxesOpen )
  {
    proxyMsgBufferStatus = PROXYALLMSGBUFFERED;    
    curProxyMsg = msg;
    return;
  }  

  //The prevProxyMsg has to be deleted after this if-statement because
  // positionPtrBegin points to the space inside the prevProxyMsg
  if(prevProxyMsg!=NULL) {
// #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
//       AtomMap::Object()->unregisterIDs(patchID,positionPtrBegin,positionPtrEnd);
// #else
      atomMapper->unregisterIDsCompAtomExt(pExt.begin(), pExt.end());
// #endif
  }
  //Now delete the ProxyDataMsg of the previous step
#if ! CMK_PERSISTENT_COMM || ! USE_PERSISTENT_TREE
  delete prevProxyMsg;
#endif
  curProxyMsg = msg;
  prevProxyMsg = curProxyMsg;

  flags = msg->flags;

#ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
  if ( ((int64)msg->positionList) % 32 ) { // not aligned
    p.resize(msg->plLen);
    positionPtrBegin = p.begin();
    memcpy(positionPtrBegin, msg->positionList, sizeof(CompAtom)*(msg->plLen));
  } else { // aligned
    positionPtrBegin = msg->positionList;
  }
  positionPtrEnd = positionPtrBegin + msg->plLen;
  if ( ((int64)positionPtrBegin) % 32 ) NAMD_bug("ProxyPatch::receiveAll positionPtrBegin not 32-byte aligned");
#else
  p.resize(msg->plLen);
  memcpy(p.begin(), msg->positionList, sizeof(CompAtom)*(msg->plLen));
#endif

// DMK
#if defined(NAMD_CUDA) || defined(NAMD_MIC)
  cudaAtomPtr = msg->cudaAtomList;
#endif

  numAtoms = msg->plLen;
  //numAtoms = p.size();
  
  avgPositionPtrBegin = msg->avgPositionList;
  avgPositionPtrEnd = msg->avgPositionList + msg->avgPlLen;
  
  // BEGIN LA
  velocityPtrBegin = msg->velocityList;
  velocityPtrEnd = msg->velocityList + msg->vlLen;
  // END LA

  if (flags.doGBIS) {
    intRad.resize(numAtoms*2);
    for (int i = 0; i < numAtoms*2;i++) {
      intRad[i] = msg->intRadList[i];
    }
  }

  if (flags.doLCPO) {
    lcpoType.resize(numAtoms);
    for (int i = 0; i < numAtoms; i++) {
      lcpoType[i] = msg->lcpoTypeList[i];
    }
  }

  //We cannot reuse the CompAtomExt list inside the msg because
  //the information is needed at every step. In the current implementation
  //scheme, the ProxyDataMsg msg will be deleted for every step.
  //In order to keep this information, we have to do the extra copy. But
  //this overhead is amortized among the steps that atoms don't migrate
  // --Chao Mei
  pExt.resize(msg->plExtLen);
  memcpy(pExt.begin(), msg->positionExtList, sizeof(CompAtomExt)*(msg->plExtLen));

  // DMK - Atom Separation (water vs. non-water)
  #if NAMD_SeparateWaters != 0
    numWaterAtoms = msg->numWaterAtoms;
  #endif

  positionsReady(1);
}
Ejemplo n.º 27
0
void Alg7::strategy()
{
  // double bestSize0, bestSize1, bestSize2;
  computeInfo *c;
  int numAssigned;
  processorInfo* goodP[3][3][2];  // goodP[# of real patches][# of proxies]
  processorInfo* poorP[3][3][2];  // fallback option

  double startTime = CmiWallTimer();

  // iout << iINFO << "calling makeHeaps. \n";
  adjustBackgroundLoadAndComputeAverage();
  makeHeaps();
  // iout << iINFO << "Before assignment\n" << endi;
  // printLoads();

  /*
  int numOverloaded = 0;
  for (int ip=0; ip<P; ip++) {
    if ( processors[ip].backgroundLoad > averageLoad ) ++numOverloaded;
  }
  if ( numOverloaded ) {
    iout << iWARN << numOverloaded
      << " processors are overloaded due to background load.\n" << endi;
  }
  */
	      
  numAssigned = 0;

  //   for (int i=0; i<numPatches; i++)
  //     { std::cout << "(" << patches[i].Id << "," << patches[i].processor ;}
  overLoad = 1.2;
  for (int ic=0; ic<numComputes; ic++) {

    // place computes w/ patches on heavily background loaded nodes first
    // place pair before self, because self is more flexible
    c = (computeInfo *) computeBgPairHeap->deleteMax();
    if ( ! c ) c = (computeInfo *) computeBgSelfHeap->deleteMax();
    if ( ! c ) c = (computeInfo *) computePairHeap->deleteMax();
    if ( ! c ) c = (computeInfo *) computeSelfHeap->deleteMax();

    if (c->processor != -1) continue; // skip to the next compute;

    if ( ! c ) NAMD_bug("Alg7: computesHeap empty!");
    int i,j,k;
    for(i=0;i<3;i++)
      for(j=0;j<3;j++) {
        for(k=0;k<2;k++) {
	  goodP[i][j][k]=0;
	  poorP[i][j][k]=0;
        }
      }

    // first try for at least one proxy
    {
      Iterator nextProc;
      processorInfo *p;

      p = &processors[patches[c->patch1].processor];
      togrid(goodP, poorP, p, c);

      p = &processors[patches[c->patch2].processor];
      togrid(goodP, poorP, p, c);

      p = (processorInfo *)patches[c->patch1].
                            proxiesOn.iterator((Iterator *)&nextProc);
      while (p) {
        togrid(goodP, poorP, p, c);
        p = (processorInfo *)patches[c->patch1].
                            proxiesOn.next((Iterator*)&nextProc);
      }

      p = (processorInfo *)patches[c->patch2].
                            proxiesOn.iterator((Iterator *)&nextProc);
      while (p) {
        togrid(goodP, poorP, p, c);
        p = (processorInfo *)patches[c->patch2].
                            proxiesOn.next((Iterator*)&nextProc);
      }
      p = 0;
      // prefer to place compute with existing proxies over home patches
      if ((p = goodP[0][2][0])    // No home, two proxies
       || (p = goodP[1][1][0])    // One home, one proxy
       || (p = goodP[2][0][0])    // Two home, no proxies
       || (p = goodP[0][1][0])    // No home, one proxy
       || (p = goodP[1][0][0])    // One home, no proxies
       || (p = goodP[0][0][0])    // No home, no proxies
       || (p = goodP[0][1][1])    // No home, one proxy
       || (p = goodP[1][0][1])    // One home, no proxies
       || (p = goodP[0][0][1])    // No home, no proxies
         ) {
        assign(c,p); numAssigned++;
        continue;
      }
    }

    // no luck, do it the long way

    heapIterator nextProcessor;
    processorInfo *p = (processorInfo *) 
      pes->iterator((heapIterator *) &nextProcessor);
    while (p) {
      togrid(goodP, poorP, p, c);
      p = (processorInfo *) pes->next(&nextProcessor);
    }

    //    if (numAssigned >= 0) {  Else is commented out below

    p = 0;
      // prefer to place compute with existing proxies over home patches
      if ((p = goodP[0][2][0])    // No home, two proxies
       || (p = goodP[1][1][0])    // One home, one proxy
       || (p = goodP[2][0][0])    // Two home, no proxies
       || (p = goodP[0][1][0])    // No home, one proxy
       || (p = goodP[1][0][0])    // One home, no proxies
       || (p = goodP[0][0][0])    // No home, no proxies
       || (p = goodP[0][1][1])    // No home, one proxy
       || (p = goodP[1][0][1])    // One home, no proxies
       || (p = goodP[0][0][1])    // No home, no proxies
       ) {
      assign(c,p); numAssigned++;
   } else if (   // overloaded processors
          (p = poorP[0][2][0])    // No home, two proxies
       || (p = poorP[1][1][0])    // One home, one proxy
       || (p = poorP[2][0][0])    // Two home, no proxies
       || (p = poorP[0][1][0])    // No home, one proxy
       || (p = poorP[1][0][0])    // One home, no proxies
       || (p = poorP[0][0][0])    // No home, no proxies
       || (p = poorP[0][1][1])    // No home, one proxy
       || (p = poorP[1][0][1])    // One home, no proxies
       || (p = poorP[0][0][1])    // No home, no proxies
       ) {
      //iout << iWARN << "overload assign to " << p->Id << "\n" << endi;
      assign(c,p); numAssigned++;
    } else {
      NAMD_bug("*** Alg 7 No receiver found 1 ***");
      break;
    }
  }

  printLoads();

  if ( computeMax() <= origMaxLoad ) {
    // binary-search refinement procedure
    multirefine(1.05);
    printLoads();
  }

}
Ejemplo n.º 28
0
//each timestep
void ProxyPatch::receiveData(ProxyDataMsg *msg)
{
  DebugM(3, "receiveData(" << patchID << ")\n");

  //delete the ProxyDataMsg of the previous step
  delete prevProxyMsg;
  prevProxyMsg = NULL;

  if ( boxesOpen )
  {
      proxyMsgBufferStatus = PROXYDATAMSGBUFFERED;
    // store message in queue (only need one element, though)
    curProxyMsg = msg;
    return;
  }

  //Reuse position arrays inside proxyDataMsg --Chao Mei
  curProxyMsg = msg;
  prevProxyMsg = curProxyMsg;
  flags = msg->flags;

#ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
  if ( ((int64)msg->positionList) % 32 ) { // not aligned
    p.resize(msg->plLen);
    positionPtrBegin = p.begin();
    memcpy(positionPtrBegin, msg->positionList, sizeof(CompAtom)*(msg->plLen));
  } else { // aligned
    positionPtrBegin = msg->positionList;
  }
  positionPtrEnd = positionPtrBegin + msg->plLen;
  if ( ((int64)positionPtrBegin) % 32 ) NAMD_bug("ProxyPatch::receiveData positionPtrBegin not 32-byte aligned");
#else
  p.resize(msg->plLen);
  memcpy(p.begin(), msg->positionList, sizeof(CompAtom)*(msg->plLen));
#endif

// DMK
#if defined(NAMD_CUDA) || defined(NAMD_MIC)
  cudaAtomPtr = msg->cudaAtomList;
#endif
  
  avgPositionPtrBegin = msg->avgPositionList;
  avgPositionPtrEnd = msg->avgPositionList + msg->avgPlLen;
  
  // BEGIN LA
  velocityPtrBegin = msg->velocityList;
  velocityPtrEnd = msg->velocityList + msg->vlLen;
  // END LA

  if ( numAtoms == -1 ) { // for new proxies since receiveAtoms is not called
      //numAtoms = p.size();
      numAtoms = msg->plLen;

      //Retrieve the CompAtomExt list
      CmiAssert(msg->plExtLen!=0);
      pExt.resize(msg->plExtLen);
      memcpy(pExt.begin(), msg->positionExtList, sizeof(CompAtomExt)*(msg->plExtLen));


    // DMK - Atom Separation (water vs. non-water)
    #if NAMD_SeparateWaters != 0
      numWaterAtoms = msg->numWaterAtoms;
    #endif

    positionsReady(1);
  } else {
    positionsReady(0);
  }
}