void Communicate::sendMessage(int PE, void *msg, int size) { if ( CmiMyPe() ) NAMD_bug("Communicate::sendMessage not from Pe 0"); while ( CkpvAccess(CsmAcks) < nchildren ) { CmiDeliverMsgs(0); } CkpvAccess(CsmAcks) = 0; CmiSetHandler(msg, CsmHandlerIndex); switch(PE) { case ALL: NAMD_bug("Unexpected Communicate::sendMessage(ALL,...)"); //CmiSyncBroadcastAll(size, (char *)msg); break; case ALLBUTME: //CmiSyncBroadcast(size, (char *)msg); if ( CmiNumNodes() > 2 ) { CmiSyncSend(CmiNodeFirst(2),size,(char*)msg); } if ( CmiNumNodes() > 1 ) { CmiSyncSend(CmiNodeFirst(1),size,(char*)msg); } break; default: NAMD_bug("Unexpected Communicate::sendMessage(PEL,...)"); //CmiSyncSend(PE, size, (char *)msg); break; } }
void IRSet::unchecked_insert(InfoRecord *info) { #ifdef DEBUG_IRSET if (find(info)) NAMD_bug("IRSet::unchecked_insert duplicate"); #endif ++nElements; listNode *node = new listNode(info); node->next = head; head = node; #ifdef DEBUG_IRSET int n = 0; while (node) { ++n; node = node->next; } if ( n != nElements ) NAMD_bug("IRSet::unchecked_insert count"); #endif }
void *Communicate::getMessage(int PE, int tag) { if ( CmiMyRank() ) NAMD_bug("Communicate::getMessage called on non-rank-zero Pe\n"); int itag[2], rtag[2]; void *msg; itag[0] = (PE==(-1)) ? (CmmWildCard) : PE; itag[1] = (tag==(-1)) ? (CmmWildCard) : tag; while((msg=CmmGet(CkpvAccess(CsmMessages),2,itag,rtag))==0) { CmiDeliverMsgs(0); } char *ackmsg = (char *) CmiAlloc(CmiMsgHeaderSizeBytes); CmiSetHandler(ackmsg, CsmAckHandlerIndex); CmiSyncSend(CmiNodeFirst((CmiMyNode()-1)/2), CmiMsgHeaderSizeBytes, ackmsg); while ( CkpvAccess(CsmAcks) < nchildren ) { CmiDeliverMsgs(0); } CkpvAccess(CsmAcks) = 0; int size = SIZEFIELD(msg); for ( int i = 2; i >= 1; --i ) { int node = CmiMyNode() * 2 + i; if ( node < CmiNumNodes() ) { CmiSyncSend(CmiNodeFirst(node),size,(char*)msg); } } return msg; }
//---------------------------------------------------------------------- ComputeID ComputeMap::storeCompute(int inode, int maxPids, ComputeType type, int partition,int numPartitions) { if (maxPids > numPidsAllocated) { NAMD_bug("ComputeMap::storeCompute called with maxPids > numPidsAllocated"); } int cid; cid = nComputes; nComputes++; computeData.resize(nComputes); computeData[cid].node=inode; computeData[cid].type = type; computeData[cid].partition = partition; computeData[cid].numPartitions = numPartitions; computeData[cid].numPids = 0; #if defined(NAMD_MIC) && (MIC_SPLIT_WITH_HOST != 0) // By default, pass all non-bonded selfs and pairs to the device if (type == computeNonbondedSelfType || type == computeNonbondedPairType) { computeData[cid].directToDevice = 1; } else { computeData[cid].directToDevice = 0; } #endif return cid; }
// data submitted from child void ReductionMgr::remoteSubmit(ReductionSubmitMsg *msg) { int setID = msg->reductionSetID; ReductionSet *set = reductionSets[setID]; int seqNum = msg->sequenceNumber + set->addToRemoteSequenceNumber[childIndex(msg->sourceNode)]; //iout << "seq " << seqNum << " from " << msg->sourceNode << " received on " << CkMyPe() << "\n" << endi; int size = msg->dataSize; if ( size != set->dataSize ) { NAMD_bug("ReductionMgr::remoteSubmit data sizes do not match."); } BigReal *newData = msg->data; ReductionSetData *data = set->getData(seqNum); BigReal *curData = data->data; #ifdef ARCH_POWERPC #pragma disjoint (*curData, *newData) #pragma unroll(4) #endif for ( int i = 0; i < size; ++i ) { curData[i] += newData[i]; } // CkPrintf("[%d] reduction Submit received from node[%d] %d\n", // CkMyPe(),childIndex(msg->sourceNode),msg->sourceNode); delete msg; data->submitsRecorded++; if ( data->submitsRecorded == set->submitsRegistered ) { mergeAndDeliver(set,seqNum); } }
ReductionSet::ReductionSet(int setID, int size, int numChildren) { if ( setID == REDUCTIONS_BASIC || setID == REDUCTIONS_AMD ) { if ( size != -1 ) { NAMD_bug("ReductionSet size specified for REDUCTIONS_BASIC or REDUCTIONS_AMD."); } size = REDUCTION_MAX_RESERVED; } if ( size == -1 ) NAMD_bug("ReductionSet size not specified."); dataSize = size; reductionSetID = setID; nextSequenceNumber = 0; submitsRegistered = 0; dataQueue = 0; requireRegistered = 0; threadIsWaiting = 0; addToRemoteSequenceNumber = new int[numChildren]; }
static void CsmHandler(void *msg) { if ( CmiMyRank() ) NAMD_bug("Communicate CsmHandler on non-rank-zero pe"); // get start of user message int *m = (int *) ((char *)msg+CmiMsgHeaderSizeBytes); // sending node & tag act as tags CmmPut(CkpvAccess(CsmMessages), 2, m, msg); }
void LdbCoordinator::barrier(void) { if ( (nPatchesReported != nPatchesExpected) || (nComputesReported != nComputesExpected) || (controllerReported != controllerExpected) ) { NAMD_bug("Load balancer received wrong number of events.\n"); } theLbdb->AtLocalBarrier(ldBarrierHandle); }
int ScriptTcl::eval(const char *script, const char **resultPtr) { #ifdef NAMD_TCL int code = Tcl_EvalEx(interp,script,-1,TCL_EVAL_GLOBAL); *resultPtr = Tcl_GetStringResult(interp); return code; #else NAMD_bug("ScriptTcl::eval called without Tcl."); return -1; // appease compiler #endif }
void ComputeMap::unpack (int n, ComputeData *ptr) { DebugM(4,"Unpacking ComputeMap\n"); if ( nComputes && n != nComputes ) { NAMD_bug("number of computes in new ComputeMap has changed!\n"); } nComputes = n; computeData.resize(nComputes); memcpy(computeData.begin(), ptr, nComputes * sizeof(ComputeData)); }
void ComputeMap::extendPtrs() { if ( ! computePtrs ) NAMD_bug("ComputeMap::extendPtrs() 1"); int oldN = nComputes; nComputes = computeData.size(); if ( nComputes > oldN ) { Compute **oldPtrs = computePtrs; computePtrs = new Compute*[nComputes]; memcpy(computePtrs, oldPtrs, oldN*sizeof(Compute*)); memset(computePtrs+oldN, 0, (nComputes-oldN)*sizeof(Compute*)); delete [] oldPtrs; } }
void ComputeGridForce::doForce(FullAtom* p, Results* r) { SimParameters *simParams = Node::Object()->simParameters; Molecule *mol = Node::Object()->molecule; Force *forces = r->f[Results::normal]; BigReal energy = 0; Force extForce = 0.; Tensor extVirial; int numAtoms = homePatch->getNumAtoms(); if ( mol->numGridforceGrids < 1 ) NAMD_bug("No grids loaded in ComputeGridForce::doForce()"); for (int gridnum = 0; gridnum < mol->numGridforceGrids; gridnum++) { GridforceGrid *grid = mol->get_gridfrc_grid(gridnum); if (homePatch->flags.step % GF_OVERLAPCHECK_FREQ == 0) { // only check on node 0 and every GF_OVERLAPCHECK_FREQ steps if (simParams->langevinPistonOn || simParams->berendsenPressureOn) { // check for grid overlap if pressure control is on // not needed without pressure control, since the check is also performed on startup if (!grid->fits_lattice(homePatch->lattice)) { char errmsg[512]; if (simParams->gridforcechecksize) { sprintf(errmsg, "Warning: Periodic cell basis too small for Gridforce grid %d. Set gridforcechecksize off in configuration file to ignore.\n", gridnum); NAMD_die(errmsg); } } } } Position center = grid->get_center(); if (homePatch->flags.step % 100 == 1) { DebugM(3, "center = " << center << "\n" << endi); DebugM(3, "e = " << grid->get_e() << "\n" << endi); } if (grid->get_grid_type() == GridforceGrid::GridforceGridTypeFull) { GridforceFullMainGrid *g = (GridforceFullMainGrid *)grid; do_calc(g, gridnum, p, numAtoms, mol, forces, energy, extForce, extVirial); } else if (grid->get_grid_type() == GridforceGrid::GridforceGridTypeLite) { GridforceLiteGrid *g = (GridforceLiteGrid *)grid; do_calc(g, gridnum, p, numAtoms, mol, forces, energy, extForce, extVirial); } } reduction->item(REDUCTION_MISC_ENERGY) += energy; ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,extForce); ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,extVirial); reduction->submit(); }
void ScriptTcl::load(char *scriptFile) { #ifdef NAMD_TCL int code = Tcl_EvalFile(interp,scriptFile); const char *result = Tcl_GetStringResult(interp); if (*result != 0) CkPrintf("TCL: %s\n",result); if (code != TCL_OK) { const char *errorInfo = Tcl_GetVar(interp,"errorInfo",0); NAMD_die(errorInfo); } #else NAMD_bug("ScriptTcl::load called without Tcl."); #endif }
void BroadcastMgr::recvBroadcast(BroadcastMsg *msg) { BOID *b; int counter; // Check if msg->id has any registrants if ( (b = boid.find(BOID(msg->id))) ) { // add message to taggedMsg container counter = b->broadcastSet->size(); if (msg->node == CkMyPe()) counter--; // get rid of sender if ( counter < 0 ) NAMD_bug("BroadcastMgr::recvBroadcast counter < 0"); else if ( counter > 0 ) { b->taggedMsg->add(TaggedMsg(msg->tag,msg->size,counter,msg->msg)); // inform all registrants of mew message UniqueSetIter<BroadcastClientElem> bcIter(*(b->broadcastSet)); for (bcIter = bcIter.begin(); bcIter != bcIter.end(); bcIter++) { bcIter->broadcastClient->awaken(msg->id, msg->tag); } } } delete msg; }
// common code for submission and delivery void ReductionMgr::mergeAndDeliver(ReductionSet *set, int seqNum) { //iout << "seq " << seqNum << " complete on " << CkMyPe() << "\n" << endi; set->nextSequenceNumber++; // should match all clients ReductionSetData *data = set->getData(seqNum); if ( data->submitsRecorded != set->submitsRegistered ) { NAMD_bug("ReductionMgr::mergeAndDeliver not ready to deliver."); } if ( isRoot() ) { if ( set->requireRegistered ) { if ( set->threadIsWaiting && set->waitingForSequenceNumber == seqNum) { // awaken the thread so it can take the data CthAwaken(set->waitingThread); } } else { NAMD_die("ReductionSet::deliver will never deliver data"); } } else { // send data to parent int size = set->dataSize; ReductionSubmitMsg *msg = new(&size,1) ReductionSubmitMsg; msg->reductionSetID = set->reductionSetID; msg->sourceNode = CkMyPe(); msg->sequenceNumber = seqNum; msg->dataSize = set->dataSize; for ( int i = 0; i < msg->dataSize; ++i ) { msg->data[i] = data->data[i]; } CProxy_ReductionMgr reductionProxy(thisgroup); reductionProxy[myParent].remoteSubmit(msg); delete set->removeData(seqNum); } }
int IRSet::remove(InfoRecord * r) { #ifdef DEBUG_IRSET listNode *node = head; int n = 0; while (node) { ++n; node = node->next; } if ( n != nElements ) NAMD_bug("IRSet::remove count"); #endif if (!head) return 0; listNode *p = head; listNode *q = p->next; if (p->info == r){ head = q; delete p; --nElements; return 1; } while (q){ if (q->info == r){ p->next = q->next; delete q; --nElements; return 1; } else { p = q; q = q->next; } } return 0; }
//---------------------------------------------------------------------- ComputeID ComputeMap::storeCompute(int inode, int maxPids, ComputeType type, int partition,int numPartitions) { if (maxPids > numPidsAllocated) { NAMD_bug("ComputeMap::storeCompute called with maxPids > numPidsAllocated"); } int cid; cid = nComputes; nComputes++; computeData.resize(nComputes); computeData[cid].node=inode; computeData[cid].type = type; computeData[cid].partition = partition; computeData[cid].numPartitions = numPartitions; computeData[cid].numPids = 0; return cid; }
int ComputeMap::directToDevice(const ComputeID cid) const { if (cid < 0 || cid >= nComputes) { NAMD_bug("ComputeMap::directToDevice() called with an invalid cid value"); } return computeData[cid].directToDevice; }
void ComputeMap::setDirectToDevice(const ComputeID cid, const int d) { if (cid < 0 || cid >= nComputes) { NAMD_bug("ComputeMap::setDirectToDevice() called with an invalid cid value"); } computeData[cid].directToDevice = ((d == 0) ? (0) : (1)); }
static void CsmAckHandler(void *msg) { if ( CmiMyRank() ) NAMD_bug("Communicate CsmAckHandler on non-rank-zero pe"); CmiFree(msg); CkpvAccess(CsmAcks) += 1; }
void ComputeMgr::createCompute(ComputeID i, ComputeMap *map) { Compute *c; PatchID pid2[2]; PatchIDList pids; int trans2[2]; SimParameters *simParams = Node::Object()->simParameters; PatchID pid8[8]; int trans8[8]; switch ( map->type(i) ) { case computeNonbondedSelfType: #ifdef NAMD_CUDA register_cuda_compute_self(i,map->computeData[i].pids[0].pid); #elif defined(NAMD_MIC) #if MIC_SPLIT_WITH_HOST != 0 if (map->directToDevice(i) == 0) { c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid, computeNonbondedWorkArrays, map->partition(i),map->partition(i)+1, map->numPartitions(i)); // unknown delete map->registerCompute(i,c); c->initialize(); } else { #endif register_mic_compute_self(i,map->computeData[i].pids[0].pid,map->partition(i),map->numPartitions(i)); #if MIC_SPLIT_WITH_HOST != 0 } #endif #else c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid, computeNonbondedWorkArrays, map->partition(i),map->partition(i)+1, map->numPartitions(i)); // unknown delete map->registerCompute(i,c); c->initialize(); #endif break; case computeLCPOType: for (int j = 0; j < 8; j++) { pid8[j] = map->computeData[i].pids[j].pid; trans8[j] = map->computeData[i].pids[j].trans; } c = new ComputeLCPO(i,pid8,trans8, computeNonbondedWorkArrays, map->partition(i),map->partition(i)+1, map->numPartitions(i), 8); map->registerCompute(i,c); c->initialize(); break; case computeNonbondedPairType: pid2[0] = map->computeData[i].pids[0].pid; trans2[0] = map->computeData[i].pids[0].trans; pid2[1] = map->computeData[i].pids[1].pid; trans2[1] = map->computeData[i].pids[1].trans; #ifdef NAMD_CUDA register_cuda_compute_pair(i,pid2,trans2); #elif defined(NAMD_MIC) #if MIC_SPLIT_WITH_HOST != 0 if (map->directToDevice(i) == 0) { c = new ComputeNonbondedPair(i,pid2,trans2, computeNonbondedWorkArrays, map->partition(i),map->partition(i)+1, map->numPartitions(i)); // unknown delete map->registerCompute(i,c); c->initialize(); } else { #endif register_mic_compute_pair(i,pid2,trans2,map->partition(i),map->numPartitions(i)); #if MIC_SPLIT_WITH_HOST != 0 } #endif #else c = new ComputeNonbondedPair(i,pid2,trans2, computeNonbondedWorkArrays, map->partition(i),map->partition(i)+1, map->numPartitions(i)); // unknown delete map->registerCompute(i,c); c->initialize(); #endif break; #ifdef NAMD_CUDA case computeNonbondedCUDAType: c = computeNonbondedCUDAObject = new ComputeNonbondedCUDA(i,this); // unknown delete map->registerCompute(i,c); c->initialize(); break; #endif #ifdef NAMD_MIC case computeNonbondedMICType: c = computeNonbondedMICObject = new ComputeNonbondedMIC(i,this); // unknown delete map->registerCompute(i,c); c->initialize(); break; #endif case computeExclsType: PatchMap::Object()->basePatchIDList(CkMyPe(),pids); c = new ComputeExcls(i,pids); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeBondsType: PatchMap::Object()->basePatchIDList(CkMyPe(),pids); c = new ComputeBonds(i,pids); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeAnglesType: PatchMap::Object()->basePatchIDList(CkMyPe(),pids); c = new ComputeAngles(i,pids); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeDihedralsType: PatchMap::Object()->basePatchIDList(CkMyPe(),pids); c = new ComputeDihedrals(i,pids); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeImpropersType: PatchMap::Object()->basePatchIDList(CkMyPe(),pids); c = new ComputeImpropers(i,pids); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeTholeType: PatchMap::Object()->basePatchIDList(CkMyPe(),pids); c = new ComputeThole(i,pids); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeAnisoType: PatchMap::Object()->basePatchIDList(CkMyPe(),pids); c = new ComputeAniso(i,pids); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeCrosstermsType: PatchMap::Object()->basePatchIDList(CkMyPe(),pids); c = new ComputeCrossterms(i,pids); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeSelfExclsType: c = new ComputeSelfExcls(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; case computeSelfBondsType: c = new ComputeSelfBonds(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; case computeSelfAnglesType: c = new ComputeSelfAngles(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; case computeSelfDihedralsType: c = new ComputeSelfDihedrals(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; case computeSelfImpropersType: c = new ComputeSelfImpropers(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; case computeSelfTholeType: c = new ComputeSelfThole(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; case computeSelfAnisoType: c = new ComputeSelfAniso(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; case computeSelfCrosstermsType: c = new ComputeSelfCrossterms(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; #ifdef DPMTA case computeDPMTAType: c = new ComputeDPMTA(i); // unknown delete map->registerCompute(i,c); c->initialize(); break; #endif #ifdef DPME case computeDPMEType: c = computeDPMEObject = new ComputeDPME(i,this); // unknown delete map->registerCompute(i,c); c->initialize(); break; #endif case optPmeType: c = new OptPmeCompute(i); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computePmeType: c = new ComputePme(i,map->computeData[i].pids[0].pid); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeEwaldType: c = computeEwaldObject = new ComputeEwald(i,this); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeFullDirectType: c = new ComputeFullDirect(i); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeGlobalType: c = computeGlobalObject = new ComputeGlobal(i,this); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeStirType: c = new ComputeStir(i,map->computeData[i].pids[0].pid); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeExtType: c = new ComputeExt(i); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeGBISserType: //gbis serial c = new ComputeGBISser(i); map->registerCompute(i,c); c->initialize(); break; case computeFmmType: // FMM serial c = new ComputeFmmSerial(i); map->registerCompute(i,c); c->initialize(); break; case computeMsmSerialType: // MSM serial c = new ComputeMsmSerial(i); map->registerCompute(i,c); c->initialize(); break; #ifdef CHARM_HAS_MSA case computeMsmMsaType: // MSM parallel long-range part using MSA c = new ComputeMsmMsa(i); map->registerCompute(i,c); c->initialize(); break; #endif case computeMsmType: // MSM parallel c = new ComputeMsm(i); map->registerCompute(i,c); c->initialize(); break; case computeEFieldType: c = new ComputeEField(i,map->computeData[i].pids[0].pid); // unknown delete map->registerCompute(i,c); c->initialize(); break; /* BEGIN gf */ case computeGridForceType: c = new ComputeGridForce(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; /* END gf */ case computeSphericalBCType: c = new ComputeSphericalBC(i,map->computeData[i].pids[0].pid); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeCylindricalBCType: c = new ComputeCylindricalBC(i,map->computeData[i].pids[0].pid); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeTclBCType: c = new ComputeTclBC(i); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeRestraintsType: c = new ComputeRestraints(i,map->computeData[i].pids[0].pid); // unknown delete map->registerCompute(i,c); c->initialize(); break; case computeConsForceType: c = new ComputeConsForce(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; case computeConsTorqueType: c = new ComputeConsTorque(i,map->computeData[i].pids[0].pid); map->registerCompute(i,c); c->initialize(); break; default: NAMD_bug("Unknown compute type in ComputeMgr::createCompute()."); break; } }
void registerUserEventsForAllComputeObjs() { #ifdef TRACE_COMPUTE_OBJECTS ComputeMap *map = ComputeMap::Object(); PatchMap *pmap = PatchMap::Object(); char user_des[50]; int p1, p2; int adim, bdim, cdim; int t1, t2; int x1, y1, z1, x2, y2, z2; int dx, dy, dz; for (int i=0; i<map->numComputes(); i++) { memset(user_des, 0, 50); switch ( map->type(i) ) { case computeNonbondedSelfType: sprintf(user_des, "computeNonBondedSelfType_%d_pid_%d", i, map->pid(i,0)); break; case computeLCPOType: sprintf(user_des, "computeLCPOType_%d_pid_%d", i, map->pid(i,0)); break; case computeNonbondedPairType: adim = pmap->gridsize_a(); bdim = pmap->gridsize_b(); cdim = pmap->gridsize_c(); p1 = map->pid(i, 0); t1 = map->trans(i, 0); x1 = pmap->index_a(p1) + adim * Lattice::offset_a(t1); y1 = pmap->index_b(p1) + bdim * Lattice::offset_b(t1); z1 = pmap->index_c(p1) + cdim * Lattice::offset_c(t1); p2 = map->pid(i, 1); t2 = map->trans(i, 1); x2 = pmap->index_a(p2) + adim * Lattice::offset_a(t2); y2 = pmap->index_b(p2) + bdim * Lattice::offset_b(t2); z2 = pmap->index_c(p2) + cdim * Lattice::offset_c(t2); dx = abs(x1-x2); dy = abs(y1-y2); dz = abs(z1-z2); sprintf(user_des, "computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz); break; case computeExclsType: sprintf(user_des, "computeExclsType_%d", i); break; case computeBondsType: sprintf(user_des, "computeBondsType_%d", i); break; case computeAnglesType: sprintf(user_des, "computeAnglesType_%d", i); break; case computeDihedralsType: sprintf(user_des, "computeDihedralsType_%d", i); break; case computeImpropersType: sprintf(user_des, "computeImpropersType_%d", i); break; case computeTholeType: sprintf(user_des, "computeTholeType_%d", i); break; case computeAnisoType: sprintf(user_des, "computeAnisoType_%d", i); break; case computeCrosstermsType: sprintf(user_des, "computeCrosstermsType_%d", i); break; case computeSelfExclsType: sprintf(user_des, "computeSelfExclsType_%d", i); break; case computeSelfBondsType: sprintf(user_des, "computeSelfBondsType_%d", i); break; case computeSelfAnglesType: sprintf(user_des, "computeSelfAnglesType_%d", i); break; case computeSelfDihedralsType: sprintf(user_des, "computeSelfDihedralsType_%d", i); break; case computeSelfImpropersType: sprintf(user_des, "computeSelfImpropersType_%d", i); break; case computeSelfTholeType: sprintf(user_des, "computeSelfTholeType_%d", i); break; case computeSelfAnisoType: sprintf(user_des, "computeSelfAnisoType_%d", i); break; case computeSelfCrosstermsType: sprintf(user_des, "computeSelfCrosstermsType_%d", i); break; #ifdef DPMTA case computeDPMTAType: sprintf(user_des, "computeDPMTAType_%d", i); break; #endif #ifdef DPME case computeDPMEType: sprintf(user_des, "computeDPMEType_%d", i); break; #endif case computePmeType: sprintf(user_des, "computePMEType_%d", i); break; case computeEwaldType: sprintf(user_des, "computeEwaldType_%d", i); break; case computeFullDirectType: sprintf(user_des, "computeFullDirectType_%d", i); break; case computeGlobalType: sprintf(user_des, "computeGlobalType_%d", i); break; case computeStirType: sprintf(user_des, "computeStirType_%d", i); break; case computeExtType: sprintf(user_des, "computeExtType_%d", i); break; case computeEFieldType: sprintf(user_des, "computeEFieldType_%d", i); break; /* BEGIN gf */ case computeGridForceType: sprintf(user_des, "computeGridForceType_%d", i); break; /* END gf */ case computeSphericalBCType: sprintf(user_des, "computeSphericalBCType_%d", i); break; case computeCylindricalBCType: sprintf(user_des, "computeCylindricalBCType_%d", i); break; case computeTclBCType: sprintf(user_des, "computeTclBCType_%d", i); break; case computeRestraintsType: sprintf(user_des, "computeRestraintsType_%d", i); break; case computeConsForceType: sprintf(user_des, "computeConsForceType_%d", i); break; case computeConsTorqueType: sprintf(user_des, "computeConsTorqueType_%d", i); break; default: NAMD_bug("Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs()."); break; } int user_des_len = strlen(user_des); char *user_des_cst = new char[user_des_len+1]; memcpy(user_des_cst, user_des, user_des_len); user_des_cst[user_des_len] = 0; //Since the argument in traceRegisterUserEvent is supposed //to be a const string which will not be copied inside the //function when a new user event is created, user_des_cst //has to be allocated in heap. int reEvenId = traceRegisterUserEvent(user_des_cst, TRACE_COMPOBJ_IDOFFSET+i); //printf("Register user event (%s) with id (%d)\n", user_des, reEvenId); } #else return; #endif }
void LdbCoordinator::initialize(PatchMap *pMap, ComputeMap *cMap, int reinit) { const SimParameters *simParams = Node::Object()->simParameters; #if 0 static int lbcreated = 0; // XXX static variables are unsafe for SMP // PE0 first time Create a load balancer if (CkMyPe() == 0 && !lbcreated) { if (simParams->ldbStrategy == LDBSTRAT_ALGNBOR) CreateNamdNborLB(); else { // CreateCentralLB(); CreateNamdCentLB(); } lbcreated = 1; } #endif // DebugM(10,"stepsPerLdbCycle initialized\n"); stepsPerLdbCycle = simParams->ldbPeriod; firstLdbStep = simParams->firstLdbStep; int lastLdbStep = simParams->lastLdbStep; int stepsPerCycle = simParams->stepsPerCycle; computeMap = cMap; patchMap = pMap; // Set the number of received messages correctly for node 0 nStatsMessagesExpected = Node::Object()->numNodes(); nStatsMessagesReceived = 0; if (patchNAtoms) delete [] patchNAtoms; // Depends on delete NULL to do nothing nPatches = patchMap->numPatches(); patchNAtoms = new int[nPatches]; typedef Sequencer *seqPtr; if ( ! reinit ) { delete [] sequencerThreads; // Depends on delete NULL to do nothing sequencerThreads = new seqPtr[nPatches]; } nLocalPatches=0; int i; for(i=0;i<nPatches;i++) { if (patchMap->node(i) == Node::Object()->myid()) { nLocalPatches++; patchNAtoms[i]=0; } else { patchNAtoms[i]=-1; } if ( ! reinit ) sequencerThreads[i]=NULL; } if ( ! reinit ) controllerThread = NULL; if (nLocalPatches != patchMap->numHomePatches()) NAMD_die("Disaggreement in patchMap data.\n"); const int oldNumComputes = numComputes; nLocalComputes = 0; numComputes = computeMap->numComputes(); for(i=0;i<numComputes;i++) { if ( (computeMap->node(i) == Node::Object()->myid()) && ( 0 #ifndef NAMD_CUDA || (computeMap->type(i) == computeNonbondedSelfType) || (computeMap->type(i) == computeNonbondedPairType) #endif || (computeMap->type(i) == computeLCPOType) || (computeMap->type(i) == computeSelfExclsType) || (computeMap->type(i) == computeSelfBondsType) || (computeMap->type(i) == computeSelfAnglesType) || (computeMap->type(i) == computeSelfDihedralsType) || (computeMap->type(i) == computeSelfImpropersType) || (computeMap->type(i) == computeSelfTholeType) || (computeMap->type(i) == computeSelfAnisoType) || (computeMap->type(i) == computeSelfCrosstermsType) || (computeMap->type(i) == computeBondsType) || (computeMap->type(i) == computeExclsType) || (computeMap->type(i) == computeAnglesType) || (computeMap->type(i) == computeDihedralsType) || (computeMap->type(i) == computeImpropersType) || (computeMap->type(i) == computeTholeType) || (computeMap->type(i) == computeAnisoType) || (computeMap->type(i) == computeCrosstermsType) ) ) { nLocalComputes++; } } // New LB frameworks registration // Allocate data structure to save incoming migrations. Processor // zero will get all migrations // If this is the first time through, we need it register patches if (ldbCycleNum == reg_all_objs) { if ( Node::Object()->simParameters->ldBalancer == LDBAL_CENTRALIZED ) { reg_all_objs = 3; } // Tell the lbdb that I'm registering objects, until I'm done // registering them. theLbdb->RegisteringObjects(myHandle); if ( ldbCycleNum == 1 ) { patchHandles = new LDObjHandle[nLocalPatches]; int patch_count=0; int i; for(i=0;i<nPatches;i++) if (patchMap->node(i) == Node::Object()->myid()) { LDObjid elemID; elemID.id[0] = i; elemID.id[1] = elemID.id[2] = elemID.id[3] = -2; if (patch_count >= nLocalPatches) { iout << iFILE << iERROR << iPE << "LdbCoordinator found too many local patches!" << endi; CkExit(); } HomePatch *p = patchMap->homePatch(i); p->ldObjHandle = patchHandles[patch_count] = theLbdb->RegisterObj(myHandle,elemID,0,0); patch_count++; } } if ( numComputes > oldNumComputes ) { // Register computes for(i=oldNumComputes; i<numComputes; i++) { if ( computeMap->node(i) == Node::Object()->myid()) { if ( 0 #ifndef NAMD_CUDA || (computeMap->type(i) == computeNonbondedSelfType) || (computeMap->type(i) == computeNonbondedPairType) #endif || (computeMap->type(i) == computeLCPOType) || (computeMap->type(i) == computeSelfExclsType) || (computeMap->type(i) == computeSelfBondsType) || (computeMap->type(i) == computeSelfAnglesType) || (computeMap->type(i) == computeSelfDihedralsType) || (computeMap->type(i) == computeSelfImpropersType) || (computeMap->type(i) == computeSelfTholeType) || (computeMap->type(i) == computeSelfAnisoType) || (computeMap->type(i) == computeSelfCrosstermsType) ) { // Register the object with the load balancer // Store the depended patch IDs in the rest of the element ID LDObjid elemID; elemID.id[0] = i; if (computeMap->numPids(i) > 2) elemID.id[3] = computeMap->pid(i,2); else elemID.id[3] = -1; if (computeMap->numPids(i) > 1) elemID.id[2] = computeMap->pid(i,1); else elemID.id[2] = -1; if (computeMap->numPids(i) > 0) elemID.id[1] = computeMap->pid(i,0); else elemID.id[1] = -1; Compute *c = computeMap->compute(i); if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer"); c->ldObjHandle = theLbdb->RegisterObj(myHandle,elemID,0,1); } else if ( (computeMap->type(i) == computeBondsType) || (computeMap->type(i) == computeExclsType) || (computeMap->type(i) == computeAnglesType) || (computeMap->type(i) == computeDihedralsType) || (computeMap->type(i) == computeImpropersType) || (computeMap->type(i) == computeTholeType) || (computeMap->type(i) == computeAnisoType) || (computeMap->type(i) == computeCrosstermsType) ) { // Register the object with the load balancer // Store the depended patch IDs in the rest of the element ID LDObjid elemID; elemID.id[0] = i; elemID.id[1] = elemID.id[2] = elemID.id[3] = -3; Compute *c = computeMap->compute(i); if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer"); c->ldObjHandle = theLbdb->RegisterObj(myHandle,elemID,0,0); } } } } theLbdb->DoneRegisteringObjects(myHandle); } // process saved migration messages, if any while ( migrateMsgs ) { LdbMigrateMsg *m = migrateMsgs; migrateMsgs = m->next; Compute *c = computeMap->compute(m->handle.id.id[0]); if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer 2"); c->ldObjHandle = m->handle; delete m; } // Fixup to take care of the extra timestep at startup // This is pretty ugly here, but it makes the count correct // iout << "LDB Cycle Num: " << ldbCycleNum << "\n"; if ( simParams->ldBalancer == LDBAL_CENTRALIZED ) { if (ldbCycleNum == 1 || ldbCycleNum == 3) { numStepsToRun = stepsPerCycle; totalStepsDone += numStepsToRun; takingLdbData = 0; theLbdb->CollectStatsOff(); } else if (ldbCycleNum == 2 || ldbCycleNum == 4) { numStepsToRun = firstLdbStep - stepsPerCycle; while ( numStepsToRun <= 0 ) numStepsToRun += stepsPerCycle; totalStepsDone += numStepsToRun; takingLdbData = 1; theLbdb->CollectStatsOn(); } else if ( (ldbCycleNum <= 6) || !takingLdbData ) { totalStepsDone += firstLdbStep; if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) { numStepsToRun = -1; takingLdbData = 0; theLbdb->CollectStatsOff(); } else { numStepsToRun = firstLdbStep; takingLdbData = 1; theLbdb->CollectStatsOn(); } } else { totalStepsDone += stepsPerLdbCycle - firstLdbStep; if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) { numStepsToRun = -1; takingLdbData = 0; theLbdb->CollectStatsOff(); } else { numStepsToRun = stepsPerLdbCycle - firstLdbStep; takingLdbData = 0; theLbdb->CollectStatsOff(); } } } else { if (ldbCycleNum==1) { totalStepsDone += firstLdbStep; numStepsToRun = firstLdbStep; takingLdbData = 0; theLbdb->CollectStatsOff(); } else if ( (ldbCycleNum <= 4) || !takingLdbData ) { totalStepsDone += firstLdbStep; if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) { numStepsToRun = -1; takingLdbData = 0; theLbdb->CollectStatsOff(); } else { numStepsToRun = firstLdbStep; takingLdbData = 1; theLbdb->CollectStatsOn(); } } else { totalStepsDone += stepsPerLdbCycle - firstLdbStep; if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) { numStepsToRun = -1; takingLdbData = 0; theLbdb->CollectStatsOff(); } else { numStepsToRun = stepsPerLdbCycle - firstLdbStep; takingLdbData = 0; theLbdb->CollectStatsOff(); } } } /*-----------------------------------------------------------------------------* * --------------------------------------------------------------------------- * * Comments inserted by Abhinav to clarify relation between ldbCycleNum, * * load balancing step numbers (printed by the step() function) and * * tracing of the steps * * --------------------------------------------------------------------------- * * If trace is turned off in the beginning, then tracing is turned on * * at ldbCycleNum = 4 and turned off at ldbCycleNum = 8. ldbCycleNum can * * be adjusted by specifying firstLdbStep and ldbPeriod which are set by * * default to 5*stepspercycle and 200*stepspercycle if not specified. * * * * If we choose firstLdbStep = 20 and ldbPeriod = 100, we have the * * following timeline (for these particular numbers): * * * * Tracing : <------ off ------><------------- on -----------><-- off * * Ldb Step() No : 1 2 3 4 5 6 7 * * Iteration Steps : 00====20====40====60====80======160====180=====260====280 * * ldbCycleNum : 1 2 3 4 5 6 7 8 9 * * Instrumention : Inst Inst Inst Inst Inst * * LDB Strategy : TLB RLB RLB RLB RLB * * * * TLB = TorusLB * * RLB = RefineTorusLB * * Inst = Instrumentation Phase (no real load balancing) * * --------------------------------------------------------------------------- * *-----------------------------------------------------------------------------* */ #if 0 //replaced by traceBarrier at Controller and Sequencer if (traceAvailable()) { static int specialTracing = 0; // XXX static variables are unsafe for SMP if (ldbCycleNum == 1 && traceIsOn() == 0) specialTracing = 1; if (specialTracing) { if (ldbCycleNum == 4) traceBegin(); if (ldbCycleNum == 8) traceEnd(); } } #endif nPatchesReported = 0; nPatchesExpected = nLocalPatches; nComputesReported = 0; nComputesExpected = nLocalComputes * numStepsToRun; controllerReported = 0; controllerExpected = ! CkMyPe(); if (CkMyPe() == 0) { if (computeArray == NULL) computeArray = new computeInfo[numComputes]; if (patchArray == NULL) patchArray = new patchInfo[nPatches]; if (processorArray == NULL) processorArray = new processorInfo[CkNumPes()]; } theLbdb->ClearLoads(); }
void ComputeNonbondedUtil::select(void) { if ( CkMyRank() ) return; // These defaults die cleanly if nothing appropriate is assigned. ComputeNonbondedUtil::calcPair = calc_error; ComputeNonbondedUtil::calcPairEnergy = calc_error; ComputeNonbondedUtil::calcSelf = calc_error; ComputeNonbondedUtil::calcSelfEnergy = calc_error; ComputeNonbondedUtil::calcFullPair = calc_error; ComputeNonbondedUtil::calcFullPairEnergy = calc_error; ComputeNonbondedUtil::calcFullSelf = calc_error; ComputeNonbondedUtil::calcFullSelfEnergy = calc_error; ComputeNonbondedUtil::calcMergePair = calc_error; ComputeNonbondedUtil::calcMergePairEnergy = calc_error; ComputeNonbondedUtil::calcMergeSelf = calc_error; ComputeNonbondedUtil::calcMergeSelfEnergy = calc_error; ComputeNonbondedUtil::calcSlowPair = calc_error; ComputeNonbondedUtil::calcSlowPairEnergy = calc_error; ComputeNonbondedUtil::calcSlowSelf = calc_error; ComputeNonbondedUtil::calcSlowSelfEnergy = calc_error; SimParameters * simParams = Node::Object()->simParameters; Parameters * params = Node::Object()->parameters; table_ener = params->table_ener; rowsize = params->rowsize; columnsize = params->columnsize; commOnly = simParams->commOnly; fixedAtomsOn = ( simParams->fixedAtomsOn && ! simParams->fixedAtomsForces ); cutoff = simParams->cutoff; cutoff2 = cutoff*cutoff; //fepb alchFepOn = simParams->alchFepOn; Fep_WCA_repuOn = simParams->alchFepWCARepuOn; Fep_WCA_dispOn = simParams->alchFepWCADispOn; alchThermIntOn = simParams->alchThermIntOn; alchLambda = alchLambda2 = 0; lesOn = simParams->lesOn; lesScaling = lesFactor = 0; Bool tabulatedEnergies = simParams->tabulatedEnergies; alchVdwShiftCoeff = simParams->alchVdwShiftCoeff; WCA_rcut1 = simParams->alchFepWCArcut1; WCA_rcut2 = simParams->alchFepWCArcut2; alchVdwLambdaEnd = simParams->alchVdwLambdaEnd; alchElecLambdaStart = simParams->alchElecLambdaStart; alchDecouple = simParams->alchDecouple; delete [] lambda_table; lambda_table = 0; pairInteractionOn = simParams->pairInteractionOn; pairInteractionSelf = simParams->pairInteractionSelf; pressureProfileOn = simParams->pressureProfileOn; // Ported by JLai -- Original JE - Go goForcesOn = simParams->goForcesOn; goMethod = simParams->goMethod; // End of port accelMDOn = simParams->accelMDOn; drudeNbthole = simParams->drudeOn && (simParams->drudeNbtholeCut > 0.0); if ( drudeNbthole ) { #ifdef NAMD_CUDA NAMD_die("drudeNbthole is not supported in CUDA version"); #endif if ( alchFepOn ) NAMD_die("drudeNbthole is not supported with alchemical free-energy perturbation"); if ( alchThermIntOn ) NAMD_die("drudeNbthole is not supported with alchemical thermodynamic integration"); if ( lesOn ) NAMD_die("drudeNbthole is not supported with locally enhanced sampling"); if ( pairInteractionOn ) NAMD_die("drudeNbthole is not supported with pair interaction calculation"); if ( pressureProfileOn ) NAMD_die("drudeNbthole is not supported with pressure profile calculation"); } if ( alchFepOn ) { #ifdef NAMD_CUDA NAMD_die("Alchemical free-energy perturbation is not supported in CUDA version"); #endif alchLambda = simParams->alchLambda; alchLambda2 = simParams->alchLambda2; ComputeNonbondedUtil::calcPair = calc_pair_energy_fep; ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_fep; ComputeNonbondedUtil::calcSelf = calc_self_energy_fep; ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_fep; ComputeNonbondedUtil::calcFullPair = calc_pair_energy_fullelect_fep; ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_fep; ComputeNonbondedUtil::calcFullSelf = calc_self_energy_fullelect_fep; ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_fep; ComputeNonbondedUtil::calcMergePair = calc_pair_energy_merge_fullelect_fep; ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_fep; ComputeNonbondedUtil::calcMergeSelf = calc_self_energy_merge_fullelect_fep; ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_fep; ComputeNonbondedUtil::calcSlowPair = calc_pair_energy_slow_fullelect_fep; ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_fep; ComputeNonbondedUtil::calcSlowSelf = calc_self_energy_slow_fullelect_fep; ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_fep; } else if ( alchThermIntOn ) { #ifdef NAMD_CUDA NAMD_die("Alchemical thermodynamic integration is not supported in CUDA version"); #endif alchLambda = simParams->alchLambda; ComputeNonbondedUtil::calcPair = calc_pair_ti; ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_ti; ComputeNonbondedUtil::calcSelf = calc_self_ti; ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_ti; ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_ti; ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_ti; ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_ti; ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_ti; ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_ti; ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_ti; ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_ti; ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_ti; ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_ti; ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_ti; ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_ti; ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_ti; } else if ( lesOn ) { #ifdef NAMD_CUDA NAMD_die("Locally enhanced sampling is not supported in CUDA version"); #endif lesFactor = simParams->lesFactor; lesScaling = 1.0 / (double)lesFactor; lambda_table = new BigReal[(lesFactor+1)*(lesFactor+1)]; for ( int ip=0; ip<=lesFactor; ++ip ) { for ( int jp=0; jp<=lesFactor; ++jp ) { BigReal lambda_pair = 1.0; if (ip || jp ) { if (ip && jp && ip != jp) { lambda_pair = 0.0; } else { lambda_pair = lesScaling; } } lambda_table[(lesFactor+1)*ip+jp] = lambda_pair; } } ComputeNonbondedUtil::calcPair = calc_pair_les; ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_les; ComputeNonbondedUtil::calcSelf = calc_self_les; ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_les; ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_les; ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_les; ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_les; ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_les; ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_les; ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_les; ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_les; ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_les; ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_les; ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_les; ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_les; ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_les; } else if ( pressureProfileOn) { #ifdef NAMD_CUDA NAMD_die("Pressure profile calculation is not supported in CUDA version"); #endif pressureProfileSlabs = simParams->pressureProfileSlabs; pressureProfileAtomTypes = simParams->pressureProfileAtomTypes; ComputeNonbondedUtil::calcPair = calc_pair_pprof; ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_pprof; ComputeNonbondedUtil::calcSelf = calc_self_pprof; ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_pprof; ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_pprof; ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_pprof; ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_pprof; ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_pprof; ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_pprof; ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_pprof; ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_pprof; ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_pprof; ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_pprof; ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_pprof; ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_pprof; ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_pprof; } else if ( pairInteractionOn ) { #ifdef NAMD_CUDA NAMD_die("Pair interaction calculation is not supported in CUDA version"); #endif ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_int; ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_int; ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_int; ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_int; ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_int; ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_int; } else if ( tabulatedEnergies ) { #ifdef NAMD_CUDA NAMD_die("Tabulated energies is not supported in CUDA version"); #endif ComputeNonbondedUtil::calcPair = calc_pair_tabener; ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_tabener; ComputeNonbondedUtil::calcSelf = calc_self_tabener; ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_tabener; ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_tabener; ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_tabener; ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_tabener; ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_tabener; ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_tabener; ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_tabener; ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_tabener; ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_tabener; ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_tabener; ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_tabener; ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_tabener; ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_tabener; } else if ( goForcesOn ) { #ifdef NAMD_CUDA NAMD_die("Go forces is not supported in CUDA version"); #endif ComputeNonbondedUtil::calcPair = calc_pair_go; ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_go; ComputeNonbondedUtil::calcSelf = calc_self_go; ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_go; ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_go; ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_go; ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_go; ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_go; ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_go; ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_go; ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_go; ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_go; ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_go; ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_go; ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_go; ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_go; } else { ComputeNonbondedUtil::calcPair = calc_pair; ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy; ComputeNonbondedUtil::calcSelf = calc_self; ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy; ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect; ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect; ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect; ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect; ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect; ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect; ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect; ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect; ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect; ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect; ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect; ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect; } //fepe dielectric_1 = 1.0/simParams->dielectric; if ( ! ljTable ) ljTable = new LJTable; mol = Node::Object()->molecule; scaling = simParams->nonbondedScaling; if ( simParams->exclude == SCALED14 ) { scale14 = simParams->scale14; } else { scale14 = 1.; } if ( simParams->switchingActive ) { switchOn = simParams->switchingDist; switchOn_1 = 1.0/switchOn; // d0 = 1.0/(cutoff-switchOn); switchOn2 = switchOn*switchOn; c0 = 1.0/(cutoff2-switchOn2); if ( simParams->vdwForceSwitching ) { double switchOn3 = switchOn * switchOn2; double cutoff3 = cutoff * cutoff2; double switchOn6 = switchOn3 * switchOn3; double cutoff6 = cutoff3 * cutoff3; v_vdwa = -1. / ( switchOn6 * cutoff6 ); v_vdwb = -1. / ( switchOn3 * cutoff3 ); k_vdwa = cutoff6 / ( cutoff6 - switchOn6 ); k_vdwb = cutoff3 / ( cutoff3 - switchOn3 ); cutoff_3 = 1. / cutoff3; cutoff_6 = 1. / cutoff6; } } else { switchOn = cutoff; switchOn_1 = 1.0/switchOn; // d0 = 0.; // avoid division by zero switchOn2 = switchOn*switchOn; c0 = 0.; // avoid division by zero } c1 = c0*c0*c0; c3 = 3.0 * (cutoff2 - switchOn2); c5 = 0; c6 = 0; c7 = 0; c8 = 0; const int PMEOn = simParams->PMEOn; const int MSMOn = simParams->MSMOn; const int MSMSplit = simParams->MSMSplit; if ( PMEOn ) { ewaldcof = simParams->PMEEwaldCoefficient; BigReal TwoBySqrtPi = 1.12837916709551; pi_ewaldcof = TwoBySqrtPi * ewaldcof; } int splitType = SPLIT_NONE; if ( simParams->switchingActive ) splitType = SPLIT_SHIFT; if ( simParams->martiniSwitching ) splitType = SPLIT_MARTINI; if ( simParams->fullDirectOn || simParams->FMAOn || PMEOn || MSMOn ) { switch ( simParams->longSplitting ) { case C2: splitType = SPLIT_C2; break; case C1: splitType = SPLIT_C1; break; case XPLOR: NAMD_die("Sorry, XPLOR splitting not supported."); break; case SHARP: NAMD_die("Sorry, SHARP splitting not supported."); break; default: NAMD_die("Unknown splitting type found!"); } } BigReal r2_tol = 0.1; r2_delta = 1.0; r2_delta_exp = 0; while ( r2_delta > r2_tol ) { r2_delta /= 2.0; r2_delta_exp += 1; } r2_delta_1 = 1.0 / r2_delta; if ( ! CkMyPe() ) { iout << iINFO << "NONBONDED TABLE R-SQUARED SPACING: " << r2_delta << "\n" << endi; } BigReal r2_tmp = 1.0; int cutoff2_exp = 0; while ( (cutoff2 + r2_delta) > r2_tmp ) { r2_tmp *= 2.0; cutoff2_exp += 1; } int i; int n = (r2_delta_exp + cutoff2_exp) * 64 + 1; if ( ! CkMyPe() ) { iout << iINFO << "NONBONDED TABLE SIZE: " << n << " POINTS\n" << endi; } if ( table_alloc ) delete [] table_alloc; table_alloc = new BigReal[61*n+16]; BigReal *table_align = table_alloc; while ( ((long)table_align) % 128 ) ++table_align; table_noshort = table_align; table_short = table_align + 16*n; slow_table = table_align + 32*n; fast_table = table_align + 36*n; scor_table = table_align + 40*n; corr_table = table_align + 44*n; full_table = table_align + 48*n; vdwa_table = table_align + 52*n; vdwb_table = table_align + 56*n; r2_table = table_align + 60*n; BigReal *fast_i = fast_table + 4; BigReal *scor_i = scor_table + 4; BigReal *slow_i = slow_table + 4; BigReal *vdwa_i = vdwa_table + 4; BigReal *vdwb_i = vdwb_table + 4; BigReal *r2_i = r2_table; *(r2_i++) = r2_delta; BigReal r2_limit = simParams->limitDist * simParams->limitDist; if ( r2_limit < r2_delta ) r2_limit = r2_delta; int r2_delta_i = 0; // entry for r2 == r2_delta // fill in the table, fix up i==0 (r2==0) below for ( i=1; i<n; ++i ) { const BigReal r2_base = r2_delta * ( 1 << (i/64) ); const BigReal r2_del = r2_base / 64.0; const BigReal r2 = r2_base - r2_delta + r2_del * (i%64); if ( r2 <= r2_limit ) r2_delta_i = i; const BigReal r = sqrt(r2); const BigReal r_1 = 1.0/r; const BigReal r_2 = 1.0/r2; // fast_ is defined as (full_ - slow_) // corr_ and fast_ are both zero at the cutoff, full_ is not // all three are approx 1/r at short distances // for actual interpolation, we use fast_ for fast forces and // scor_ = slow_ + corr_ - full_ and slow_ for slow forces // since these last two are of small magnitude BigReal fast_energy, fast_gradient; BigReal scor_energy, scor_gradient; BigReal slow_energy, slow_gradient; // corr_ is PME direct sum, or similar correction term // corr_energy is multiplied by r until later // corr_gradient is multiplied by -r^2 until later BigReal corr_energy, corr_gradient; if ( PMEOn ) { BigReal tmp_a = r * ewaldcof; BigReal tmp_b = erfc(tmp_a); corr_energy = tmp_b; corr_gradient = pi_ewaldcof*exp(-(tmp_a*tmp_a))*r + tmp_b; } else if ( MSMOn ) { BigReal a_1 = 1.0/cutoff; BigReal r_a = r * a_1; BigReal g, dg; SPOLY(&g, &dg, r_a, MSMSplit); corr_energy = 1 - r_a * g; corr_gradient = 1 + r_a*r_a * dg; } else { corr_energy = corr_gradient = 0; } switch(splitType) { case SPLIT_NONE: fast_energy = 1.0/r; fast_gradient = -1.0/r2; scor_energy = scor_gradient = 0; slow_energy = slow_gradient = 0; break; case SPLIT_SHIFT: { BigReal shiftVal = r2/cutoff2 - 1.0; shiftVal *= shiftVal; BigReal dShiftVal = 2.0 * (r2/cutoff2 - 1.0) * 2.0*r/cutoff2; fast_energy = shiftVal/r; fast_gradient = dShiftVal/r - shiftVal/r2; scor_energy = scor_gradient = 0; slow_energy = slow_gradient = 0; } break; case SPLIT_MARTINI: { // in Martini, the Coulomb switching distance is zero const BigReal COUL_SWITCH = 0.; // Gromacs shifting function const BigReal p1 = 1.; BigReal A1 = p1 * ((p1+1)*COUL_SWITCH-(p1+4)*cutoff)/(pow(cutoff,p1+2)*pow(cutoff-COUL_SWITCH,2)); BigReal B1 = -p1 * ((p1+1)*COUL_SWITCH-(p1+3)*cutoff)/(pow(cutoff,p1+2)*pow(cutoff-COUL_SWITCH,3)); BigReal X1 = 1.0/pow(cutoff,p1)-A1/3.0*pow(cutoff-COUL_SWITCH,3)-B1/4.0*pow(cutoff-COUL_SWITCH,4); BigReal r12 = (r-COUL_SWITCH)*(r-COUL_SWITCH); BigReal r13 = (r-COUL_SWITCH)*(r-COUL_SWITCH)*(r-COUL_SWITCH); BigReal shiftVal = -(A1/3.0)*r13 - (B1/4.0)*r12*r12 - X1; BigReal dShiftVal = -A1*r12 - B1*r13; fast_energy = (1/r) + shiftVal; fast_gradient = -1/(r2) + dShiftVal; scor_energy = scor_gradient = 0; slow_energy = slow_gradient = 0; } break; case SPLIT_C1: // calculate actual energy and gradient slow_energy = 0.5/cutoff * (3.0 - (r2/cutoff2)); slow_gradient = -1.0/cutoff2 * (r/cutoff); // calculate scor from slow and corr scor_energy = slow_energy + (corr_energy - 1.0)/r; scor_gradient = slow_gradient - (corr_gradient - 1.0)/r2; // calculate fast from slow fast_energy = 1.0/r - slow_energy; fast_gradient = -1.0/r2 - slow_gradient; break; case SPLIT_C2: // // Quintic splitting function contributed by // Bruce Berne, Ruhong Zhou, and Joe Morrone // // calculate actual energy and gradient slow_energy = r2/(cutoff*cutoff2) * (6.0 * (r2/cutoff2) - 15.0*(r/cutoff) + 10.0); slow_gradient = r/(cutoff*cutoff2) * (24.0 * (r2/cutoff2) - 45.0 *(r/cutoff) + 20.0); // calculate scor from slow and corr scor_energy = slow_energy + (corr_energy - 1.0)/r; scor_gradient = slow_gradient - (corr_gradient - 1.0)/r2; // calculate fast from slow fast_energy = 1.0/r - slow_energy; fast_gradient = -1.0/r2 - slow_gradient; break; } // foo_gradient is calculated as ( d foo_energy / d r ) // and now divided by 2r to get ( d foo_energy / d r2 ) fast_gradient *= 0.5 * r_1; scor_gradient *= 0.5 * r_1; slow_gradient *= 0.5 * r_1; // let modf be 1 if excluded, 1-scale14 if modified, 0 otherwise, // add scor_ - modf * slow_ to slow terms and // add fast_ - modf * fast_ to fast terms. BigReal vdwa_energy, vdwa_gradient; BigReal vdwb_energy, vdwb_gradient; const BigReal r_6 = r_2*r_2*r_2; const BigReal r_12 = r_6*r_6; // Lennard-Jones switching function if ( simParams->vdwForceSwitching ) { // switch force // from Steinbach & Brooks, JCC 15, pgs 667-683, 1994, eqns 10-13 if ( r2 > switchOn2 ) { BigReal tmpa = r_6 - cutoff_6; vdwa_energy = k_vdwa * tmpa * tmpa; BigReal tmpb = r_1 * r_2 - cutoff_3; vdwb_energy = k_vdwb * tmpb * tmpb; vdwa_gradient = -6.0 * k_vdwa * tmpa * r_2 * r_6; vdwb_gradient = -3.0 * k_vdwb * tmpb * r_2 * r_2 * r_1; } else { vdwa_energy = r_12 + v_vdwa; vdwb_energy = r_6 + v_vdwb; vdwa_gradient = -6.0 * r_2 * r_12; vdwb_gradient = -3.0 * r_2 * r_6; } } else if ( simParams->martiniSwitching ) { // switching fxn for Martini RBCG BigReal r12 = (r-switchOn)*(r-switchOn); BigReal r13 = (r-switchOn)*(r-switchOn)*(r-switchOn); BigReal p6 = 6; BigReal A6 = p6 * ((p6+1)*switchOn-(p6+4)*cutoff)/(pow(cutoff,p6+2)*pow(cutoff-switchOn,2)); BigReal B6 = -p6 * ((p6+1)*switchOn-(p6+3)*cutoff)/(pow(cutoff,p6+2)*pow(cutoff-switchOn,3)); BigReal C6 = 1.0/pow(cutoff,p6)-A6/3.0*pow(cutoff-switchOn,3)-B6/4.0*pow(cutoff-switchOn,4); BigReal p12 = 12; BigReal A12 = p12 * ((p12+1)*switchOn-(p12+4)*cutoff)/(pow(cutoff,p12+2)*pow(cutoff-switchOn,2)); BigReal B12 = -p12 * ((p12+1)*switchOn-(p12+3)*cutoff)/(pow(cutoff,p12+2)*pow(cutoff-switchOn,3)); BigReal C12 = 1.0/pow(cutoff,p12)-A12/3.0*pow(cutoff-switchOn,3)-B12/4.0*pow(cutoff-switchOn,4); BigReal LJshifttempA = -(A12/3)*r13 - (B12/4)*r12*r12 - C12; BigReal LJshifttempB = -(A6/3)*r13 - (B6/4)*r12*r12 - C6; const BigReal shiftValA = // used for Lennard-Jones ( r2 > switchOn2 ? LJshifttempA : -C12); const BigReal shiftValB = // used for Lennard-Jones ( r2 > switchOn2 ? LJshifttempB : -C6); BigReal LJdshifttempA = -A12*r12 - B12*r13; BigReal LJdshifttempB = -A6*r12 - B6*r13; const BigReal dshiftValA = // used for Lennard-Jones ( r2 > switchOn2 ? LJdshifttempA*0.5*r_1 : 0 ); const BigReal dshiftValB = // used for Lennard-Jones ( r2 > switchOn2 ? LJdshifttempB*0.5*r_1 : 0 ); //have not addressed r > cutoff // dshiftValA*= 0.5*r_1; // dshiftValB*= 0.5*r_1; vdwa_energy = r_12 + shiftValA; vdwb_energy = r_6 + shiftValB; vdwa_gradient = -6/pow(r,14) + dshiftValA ; vdwb_gradient = -3/pow(r,8) + dshiftValB; } else { // switch energy const BigReal c2 = cutoff2-r2; const BigReal c4 = c2*(c3-2.0*c2); const BigReal switchVal = // used for Lennard-Jones ( r2 > switchOn2 ? c2*c4*c1 : 1.0 ); const BigReal dSwitchVal = // d switchVal / d r2 ( r2 > switchOn2 ? 2*c1*(c2*c2-c4) : 0.0 ); vdwa_energy = switchVal * r_12; vdwb_energy = switchVal * r_6; vdwa_gradient = ( dSwitchVal - 6.0 * switchVal * r_2 ) * r_12; vdwb_gradient = ( dSwitchVal - 3.0 * switchVal * r_2 ) * r_6; } *(fast_i++) = fast_energy; *(fast_i++) = fast_gradient; *(fast_i++) = 0; *(fast_i++) = 0; *(scor_i++) = scor_energy; *(scor_i++) = scor_gradient; *(scor_i++) = 0; *(scor_i++) = 0; *(slow_i++) = slow_energy; *(slow_i++) = slow_gradient; *(slow_i++) = 0; *(slow_i++) = 0; *(vdwa_i++) = vdwa_energy; *(vdwa_i++) = vdwa_gradient; *(vdwa_i++) = 0; *(vdwa_i++) = 0; *(vdwb_i++) = vdwb_energy; *(vdwb_i++) = vdwb_gradient; *(vdwb_i++) = 0; *(vdwb_i++) = 0; *(r2_i++) = r2 + r2_delta; } if ( ! r2_delta_i ) { NAMD_bug("Failed to find table entry for r2 == r2_limit\n"); } if ( r2_table[r2_delta_i] > r2_limit + r2_delta ) { NAMD_bug("Found bad table entry for r2 == r2_limit\n"); } int j; const char *table_name = "XXXX"; int smooth_short = 0; for ( j=0; j<5; ++j ) { BigReal *t0 = 0; switch (j) { case 0: t0 = fast_table; table_name = "FAST"; smooth_short = 1; break; case 1: t0 = scor_table; table_name = "SCOR"; smooth_short = 0; break; case 2: t0 = slow_table; table_name = "SLOW"; smooth_short = 0; break; case 3: t0 = vdwa_table; table_name = "VDWA"; smooth_short = 1; break; case 4: t0 = vdwb_table; table_name = "VDWB"; smooth_short = 1; break; } // patch up data for i=0 t0[0] = t0[4] - t0[5] * ( r2_delta / 64.0 ); // energy t0[1] = t0[5]; // gradient t0[2] = 0; t0[3] = 0; if ( smooth_short ) { BigReal energy0 = t0[4*r2_delta_i]; BigReal gradient0 = t0[4*r2_delta_i+1]; BigReal r20 = r2_table[r2_delta_i]; t0[0] = energy0 - gradient0 * (r20 - r2_table[0]); // energy t0[1] = gradient0; // gradient } BigReal *t; for ( i=0,t=t0; i<(n-1); ++i,t+=4 ) { BigReal x = ( r2_delta * ( 1 << (i/64) ) ) / 64.0; if ( r2_table[i+1] != r2_table[i] + x ) { NAMD_bug("Bad table delta calculation.\n"); } if ( smooth_short && i+1 < r2_delta_i ) { BigReal energy0 = t0[4*r2_delta_i]; BigReal gradient0 = t0[4*r2_delta_i+1]; BigReal r20 = r2_table[r2_delta_i]; t[4] = energy0 - gradient0 * (r20 - r2_table[i+1]); // energy t[5] = gradient0; // gradient } BigReal v1 = t[0]; BigReal g1 = t[1]; BigReal v2 = t[4]; BigReal g2 = t[5]; // explicit formulas for v1 + g1 x + c x^2 + d x^3 BigReal c = ( 3.0 * (v2 - v1) - x * (2.0 * g1 + g2) ) / ( x * x ); BigReal d = ( -2.0 * (v2 - v1) + x * (g1 + g2) ) / ( x * x * x ); // since v2 - v1 is imprecise, we refine c and d numerically // important because we need accurate forces (more than energies!) for ( int k=0; k < 2; ++k ) { BigReal dv = (v1 - v2) + ( ( d * x + c ) * x + g1 ) * x; BigReal dg = (g1 - g2) + ( 3.0 * d * x + 2.0 * c ) * x; c -= ( 3.0 * dv - x * dg ) / ( x * x ); d -= ( -2.0 * dv + x * dg ) / ( x * x * x ); } // store in the array; t[2] = c; t[3] = d; } if ( ! CkMyPe() ) { BigReal dvmax = 0; BigReal dgmax = 0; BigReal dvmax_r = 0; BigReal dgmax_r = 0; BigReal fdvmax = 0; BigReal fdgmax = 0; BigReal fdvmax_r = 0; BigReal fdgmax_r = 0; BigReal dgcdamax = 0; BigReal dgcdimax = 0; BigReal dgcaimax = 0; BigReal dgcdamax_r = 0; BigReal dgcdimax_r = 0; BigReal dgcaimax_r = 0; BigReal fdgcdamax = 0; BigReal fdgcdimax = 0; BigReal fdgcaimax = 0; BigReal fdgcdamax_r = 0; BigReal fdgcdimax_r = 0; BigReal fdgcaimax_r = 0; BigReal gcm = fabs(t0[1]); // gradient magnitude running average for ( i=0,t=t0; i<(n-1); ++i,t+=4 ) { const BigReal r2_base = r2_delta * ( 1 << (i/64) ); const BigReal r2_del = r2_base / 64.0; const BigReal r2 = r2_base - r2_delta + r2_del * (i%64); const BigReal r = sqrt(r2); if ( r > cutoff ) break; BigReal x = r2_del; BigReal dv = ( ( t[3] * x + t[2] ) * x + t[1] ) * x + t[0] - t[4]; BigReal dg = ( 3.0 * t[3] * x + 2.0 * t[2] ) * x + t[1] - t[5]; if ( t[4] != 0. && fabs(dv/t[4]) > fdvmax ) { fdvmax = fabs(dv/t[4]); fdvmax_r = r; } if ( fabs(dv) > dvmax ) { dvmax = fabs(dv); dvmax_r = r; } if ( t[5] != 0. && fabs(dg/t[5]) > fdgmax ) { fdgmax = fabs(dg/t[5]); fdgmax_r = r; } if ( fabs(dg) > dgmax ) { dgmax = fabs(dg); dgmax_r = r; } BigReal gcd = (t[4] - t[0]) / x; // centered difference gradient BigReal gcd_prec = (fabs(t[0]) + fabs(t[4])) * 1.e-15 / x; // roundoff gcm = 0.9 * gcm + 0.1 * fabs(t[5]); // magnitude running average BigReal gca = 0.5 * (t[1] + t[5]); // centered average gradient BigReal gci = ( 0.75 * t[3] * x + t[2] ) * x + t[1]; // interpolated BigReal rc = sqrt(r2 + 0.5 * x); BigReal dgcda = gcd - gca; if ( dgcda != 0. && fabs(dgcda) < gcd_prec ) { // CkPrintf("ERROR %g < PREC %g AT %g AVG VAL %g\n", dgcda, gcd_prec, rc, gca); dgcda = 0.; } BigReal dgcdi = gcd - gci; if ( dgcdi != 0. && fabs(dgcdi) < gcd_prec ) { // CkPrintf("ERROR %g < PREC %g AT %g INT VAL %g\n", dgcdi, gcd_prec, rc, gci); dgcdi = 0.; } BigReal dgcai = gca - gci; if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcda/gcm) > fdgcdamax ) { fdgcdamax = fabs(dgcda/gcm); fdgcdamax_r = rc; } if ( fabs(dgcda) > fdgcdamax ) { dgcdamax = fabs(dgcda); dgcdamax_r = rc; } if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcdi/gcm) > fdgcdimax ) { fdgcdimax = fabs(dgcdi/gcm); fdgcdimax_r = rc; } if ( fabs(dgcdi) > fdgcdimax ) { dgcdimax = fabs(dgcdi); dgcdimax_r = rc; } if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcai/gcm) > fdgcaimax ) { fdgcaimax = fabs(dgcai/gcm); fdgcaimax_r = rc; } if ( fabs(dgcai) > fdgcaimax ) { dgcaimax = fabs(dgcai); dgcaimax_r = rc; } #if 0 CkPrintf("TABLE %s %g %g %g %g\n",table_name,rc,dgcda/gcm,dgcda,gci); if (dv != 0.) CkPrintf("TABLE %d ENERGY ERROR %g AT %g (%d)\n",j,dv,r,i); if (dg != 0.) CkPrintf("TABLE %d FORCE ERROR %g AT %g (%d)\n",j,dg,r,i); #endif } if ( dvmax != 0.0 ) { iout << iINFO << "ABSOLUTE IMPRECISION IN " << table_name << " TABLE ENERGY: " << dvmax << " AT " << dvmax_r << "\n" << endi; } if ( fdvmax != 0.0 ) { iout << iINFO << "RELATIVE IMPRECISION IN " << table_name << " TABLE ENERGY: " << fdvmax << " AT " << fdvmax_r << "\n" << endi; } if ( dgmax != 0.0 ) { iout << iINFO << "ABSOLUTE IMPRECISION IN " << table_name << " TABLE FORCE: " << dgmax << " AT " << dgmax_r << "\n" << endi; } if ( fdgmax != 0.0 ) { iout << iINFO << "RELATIVE IMPRECISION IN " << table_name << " TABLE FORCE: " << fdgmax << " AT " << fdgmax_r << "\n" << endi; } if (fdgcdamax != 0.0 ) { iout << iINFO << "INCONSISTENCY IN " << table_name << " TABLE ENERGY VS FORCE: " << fdgcdamax << " AT " << fdgcdamax_r << "\n" << endi; if ( fdgcdamax > 0.1 ) { iout << iERROR << "\n"; iout << iERROR << "CALCULATED " << table_name << " FORCE MAY NOT MATCH ENERGY! POSSIBLE BUG!\n"; iout << iERROR << "\n"; } } if (0 && fdgcdimax != 0.0 ) { iout << iINFO << "INCONSISTENCY IN " << table_name << " TABLE ENERGY VS FORCE: " << fdgcdimax << " AT " << fdgcdimax_r << "\n" << endi; } if ( 0 && fdgcaimax != 0.0 ) { iout << iINFO << "INCONSISTENCY IN " << table_name << " TABLE AVG VS INT FORCE: " << fdgcaimax << " AT " << fdgcaimax_r << "\n" << endi; } } } for ( i=0; i<4*n; ++i ) { corr_table[i] = fast_table[i] + scor_table[i]; full_table[i] = fast_table[i] + slow_table[i]; } #if 0 for ( i=0; i<n; ++i ) { for ( int j=0; j<4; ++j ) { table_short[16*i+6-2*j] = table_noshort[16*i+6-2*j] = vdwa_table[4*i+j]; table_short[16*i+7-2*j] = table_noshort[16*i+7-2*j] = vdwb_table[4*i+j]; table_short[16*i+8+3-j] = fast_table[4*i+j]; table_short[16*i+12+3-j] = scor_table[4*i+j]; table_noshort[16*i+8+3-j] = corr_table[4*i+j]; table_noshort[16*i+12+3-j] = full_table[4*i+j]; } } #endif for ( i=0; i<n; ++i ) { table_short[16*i+ 0] = table_noshort[16*i+0] = -6.*vdwa_table[4*i+3]; table_short[16*i+ 2] = table_noshort[16*i+2] = -6.*vdwb_table[4*i+3]; table_short[16*i+ 4] = table_noshort[16*i+4] = -2.*vdwa_table[4*i+1]; table_short[16*i+ 6] = table_noshort[16*i+6] = -2.*vdwb_table[4*i+1]; table_short[16*i+1] = table_noshort[16*i+1] = -4.*vdwa_table[4*i+2]; table_short[16*i+3] = table_noshort[16*i+3] = -4.*vdwb_table[4*i+2]; table_short[16*i+5] = table_noshort[16*i+5] = -1.*vdwa_table[4*i+0]; table_short[16*i+7] = table_noshort[16*i+7] = -1.*vdwb_table[4*i+0]; table_short[16*i+8] = -6.*fast_table[4*i+3]; table_short[16*i+9] = -4.*fast_table[4*i+2]; table_short[16*i+10] = -2.*fast_table[4*i+1]; table_short[16*i+11] = -1.*fast_table[4*i+0]; table_noshort[16*i+8] = -6.*corr_table[4*i+3]; table_noshort[16*i+9] = -4.*corr_table[4*i+2]; table_noshort[16*i+10] = -2.*corr_table[4*i+1]; table_noshort[16*i+11] = -1.*corr_table[4*i+0]; table_short[16*i+12] = -6.*scor_table[4*i+3]; table_short[16*i+13] = -4.*scor_table[4*i+2]; table_short[16*i+14] = -2.*scor_table[4*i+1]; table_short[16*i+15] = -1.*scor_table[4*i+0]; table_noshort[16*i+12] = -6.*full_table[4*i+3]; table_noshort[16*i+13] = -4.*full_table[4*i+2]; table_noshort[16*i+14] = -2.*full_table[4*i+1]; table_noshort[16*i+15] = -1.*full_table[4*i+0]; } #if 0 char fname[100]; sprintf(fname,"/tmp/namd.table.pe%d.dat",CkMyPe()); FILE *f = fopen(fname,"w"); for ( i=0; i<(n-1); ++i ) { const BigReal r2_base = r2_delta * ( 1 << (i/64) ); const BigReal r2_del = r2_base / 64.0; const BigReal r2 = r2_base - r2_delta + r2_del * (i%64); BigReal *t; if ( r2 + r2_delta != r2_table[i] ) fprintf(f,"r2 error! "); fprintf(f,"%g",r2); t = fast_table + 4*i; fprintf(f," %g %g %g %g", t[0], t[1], t[2], t[3]); t = scor_table + 4*i; fprintf(f," %g %g %g %g", t[0], t[1], t[2], t[3]); t = slow_table + 4*i; fprintf(f," %g %g %g %g", t[0], t[1], t[2], t[3]); t = corr_table + 4*i; fprintf(f," %g %g %g %g", t[0], t[1], t[2], t[3]); t = full_table + 4*i; fprintf(f," %g %g %g %g", t[0], t[1], t[2], t[3]); t = vdwa_table + 4*i; fprintf(f," %g %g %g %g", t[0], t[1], t[2], t[3]); t = vdwb_table + 4*i; fprintf(f," %g %g %g %g", t[0], t[1], t[2], t[3]); fprintf(f,"\n"); } fclose(f); #endif #ifdef NAMD_CUDA send_build_cuda_force_table(); #endif }
void ComputeNonbondedUtil::calc_error(nonbonded *) { NAMD_bug("Tried to call missing nonbonded compute routine."); }
//every doMigration void ProxyPatch::receiveAll(ProxyDataMsg *msg) { DebugM(3, "receiveAll(" << patchID << ")\n"); if ( boxesOpen ) { proxyMsgBufferStatus = PROXYALLMSGBUFFERED; curProxyMsg = msg; return; } //The prevProxyMsg has to be deleted after this if-statement because // positionPtrBegin points to the space inside the prevProxyMsg if(prevProxyMsg!=NULL) { // #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY // AtomMap::Object()->unregisterIDs(patchID,positionPtrBegin,positionPtrEnd); // #else atomMapper->unregisterIDsCompAtomExt(pExt.begin(), pExt.end()); // #endif } //Now delete the ProxyDataMsg of the previous step #if ! CMK_PERSISTENT_COMM || ! USE_PERSISTENT_TREE delete prevProxyMsg; #endif curProxyMsg = msg; prevProxyMsg = curProxyMsg; flags = msg->flags; #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY if ( ((int64)msg->positionList) % 32 ) { // not aligned p.resize(msg->plLen); positionPtrBegin = p.begin(); memcpy(positionPtrBegin, msg->positionList, sizeof(CompAtom)*(msg->plLen)); } else { // aligned positionPtrBegin = msg->positionList; } positionPtrEnd = positionPtrBegin + msg->plLen; if ( ((int64)positionPtrBegin) % 32 ) NAMD_bug("ProxyPatch::receiveAll positionPtrBegin not 32-byte aligned"); #else p.resize(msg->plLen); memcpy(p.begin(), msg->positionList, sizeof(CompAtom)*(msg->plLen)); #endif // DMK #if defined(NAMD_CUDA) || defined(NAMD_MIC) cudaAtomPtr = msg->cudaAtomList; #endif numAtoms = msg->plLen; //numAtoms = p.size(); avgPositionPtrBegin = msg->avgPositionList; avgPositionPtrEnd = msg->avgPositionList + msg->avgPlLen; // BEGIN LA velocityPtrBegin = msg->velocityList; velocityPtrEnd = msg->velocityList + msg->vlLen; // END LA if (flags.doGBIS) { intRad.resize(numAtoms*2); for (int i = 0; i < numAtoms*2;i++) { intRad[i] = msg->intRadList[i]; } } if (flags.doLCPO) { lcpoType.resize(numAtoms); for (int i = 0; i < numAtoms; i++) { lcpoType[i] = msg->lcpoTypeList[i]; } } //We cannot reuse the CompAtomExt list inside the msg because //the information is needed at every step. In the current implementation //scheme, the ProxyDataMsg msg will be deleted for every step. //In order to keep this information, we have to do the extra copy. But //this overhead is amortized among the steps that atoms don't migrate // --Chao Mei pExt.resize(msg->plExtLen); memcpy(pExt.begin(), msg->positionExtList, sizeof(CompAtomExt)*(msg->plExtLen)); // DMK - Atom Separation (water vs. non-water) #if NAMD_SeparateWaters != 0 numWaterAtoms = msg->numWaterAtoms; #endif positionsReady(1); }
void Alg7::strategy() { // double bestSize0, bestSize1, bestSize2; computeInfo *c; int numAssigned; processorInfo* goodP[3][3][2]; // goodP[# of real patches][# of proxies] processorInfo* poorP[3][3][2]; // fallback option double startTime = CmiWallTimer(); // iout << iINFO << "calling makeHeaps. \n"; adjustBackgroundLoadAndComputeAverage(); makeHeaps(); // iout << iINFO << "Before assignment\n" << endi; // printLoads(); /* int numOverloaded = 0; for (int ip=0; ip<P; ip++) { if ( processors[ip].backgroundLoad > averageLoad ) ++numOverloaded; } if ( numOverloaded ) { iout << iWARN << numOverloaded << " processors are overloaded due to background load.\n" << endi; } */ numAssigned = 0; // for (int i=0; i<numPatches; i++) // { std::cout << "(" << patches[i].Id << "," << patches[i].processor ;} overLoad = 1.2; for (int ic=0; ic<numComputes; ic++) { // place computes w/ patches on heavily background loaded nodes first // place pair before self, because self is more flexible c = (computeInfo *) computeBgPairHeap->deleteMax(); if ( ! c ) c = (computeInfo *) computeBgSelfHeap->deleteMax(); if ( ! c ) c = (computeInfo *) computePairHeap->deleteMax(); if ( ! c ) c = (computeInfo *) computeSelfHeap->deleteMax(); if (c->processor != -1) continue; // skip to the next compute; if ( ! c ) NAMD_bug("Alg7: computesHeap empty!"); int i,j,k; for(i=0;i<3;i++) for(j=0;j<3;j++) { for(k=0;k<2;k++) { goodP[i][j][k]=0; poorP[i][j][k]=0; } } // first try for at least one proxy { Iterator nextProc; processorInfo *p; p = &processors[patches[c->patch1].processor]; togrid(goodP, poorP, p, c); p = &processors[patches[c->patch2].processor]; togrid(goodP, poorP, p, c); p = (processorInfo *)patches[c->patch1]. proxiesOn.iterator((Iterator *)&nextProc); while (p) { togrid(goodP, poorP, p, c); p = (processorInfo *)patches[c->patch1]. proxiesOn.next((Iterator*)&nextProc); } p = (processorInfo *)patches[c->patch2]. proxiesOn.iterator((Iterator *)&nextProc); while (p) { togrid(goodP, poorP, p, c); p = (processorInfo *)patches[c->patch2]. proxiesOn.next((Iterator*)&nextProc); } p = 0; // prefer to place compute with existing proxies over home patches if ((p = goodP[0][2][0]) // No home, two proxies || (p = goodP[1][1][0]) // One home, one proxy || (p = goodP[2][0][0]) // Two home, no proxies || (p = goodP[0][1][0]) // No home, one proxy || (p = goodP[1][0][0]) // One home, no proxies || (p = goodP[0][0][0]) // No home, no proxies || (p = goodP[0][1][1]) // No home, one proxy || (p = goodP[1][0][1]) // One home, no proxies || (p = goodP[0][0][1]) // No home, no proxies ) { assign(c,p); numAssigned++; continue; } } // no luck, do it the long way heapIterator nextProcessor; processorInfo *p = (processorInfo *) pes->iterator((heapIterator *) &nextProcessor); while (p) { togrid(goodP, poorP, p, c); p = (processorInfo *) pes->next(&nextProcessor); } // if (numAssigned >= 0) { Else is commented out below p = 0; // prefer to place compute with existing proxies over home patches if ((p = goodP[0][2][0]) // No home, two proxies || (p = goodP[1][1][0]) // One home, one proxy || (p = goodP[2][0][0]) // Two home, no proxies || (p = goodP[0][1][0]) // No home, one proxy || (p = goodP[1][0][0]) // One home, no proxies || (p = goodP[0][0][0]) // No home, no proxies || (p = goodP[0][1][1]) // No home, one proxy || (p = goodP[1][0][1]) // One home, no proxies || (p = goodP[0][0][1]) // No home, no proxies ) { assign(c,p); numAssigned++; } else if ( // overloaded processors (p = poorP[0][2][0]) // No home, two proxies || (p = poorP[1][1][0]) // One home, one proxy || (p = poorP[2][0][0]) // Two home, no proxies || (p = poorP[0][1][0]) // No home, one proxy || (p = poorP[1][0][0]) // One home, no proxies || (p = poorP[0][0][0]) // No home, no proxies || (p = poorP[0][1][1]) // No home, one proxy || (p = poorP[1][0][1]) // One home, no proxies || (p = poorP[0][0][1]) // No home, no proxies ) { //iout << iWARN << "overload assign to " << p->Id << "\n" << endi; assign(c,p); numAssigned++; } else { NAMD_bug("*** Alg 7 No receiver found 1 ***"); break; } } printLoads(); if ( computeMax() <= origMaxLoad ) { // binary-search refinement procedure multirefine(1.05); printLoads(); } }
//each timestep void ProxyPatch::receiveData(ProxyDataMsg *msg) { DebugM(3, "receiveData(" << patchID << ")\n"); //delete the ProxyDataMsg of the previous step delete prevProxyMsg; prevProxyMsg = NULL; if ( boxesOpen ) { proxyMsgBufferStatus = PROXYDATAMSGBUFFERED; // store message in queue (only need one element, though) curProxyMsg = msg; return; } //Reuse position arrays inside proxyDataMsg --Chao Mei curProxyMsg = msg; prevProxyMsg = curProxyMsg; flags = msg->flags; #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY if ( ((int64)msg->positionList) % 32 ) { // not aligned p.resize(msg->plLen); positionPtrBegin = p.begin(); memcpy(positionPtrBegin, msg->positionList, sizeof(CompAtom)*(msg->plLen)); } else { // aligned positionPtrBegin = msg->positionList; } positionPtrEnd = positionPtrBegin + msg->plLen; if ( ((int64)positionPtrBegin) % 32 ) NAMD_bug("ProxyPatch::receiveData positionPtrBegin not 32-byte aligned"); #else p.resize(msg->plLen); memcpy(p.begin(), msg->positionList, sizeof(CompAtom)*(msg->plLen)); #endif // DMK #if defined(NAMD_CUDA) || defined(NAMD_MIC) cudaAtomPtr = msg->cudaAtomList; #endif avgPositionPtrBegin = msg->avgPositionList; avgPositionPtrEnd = msg->avgPositionList + msg->avgPlLen; // BEGIN LA velocityPtrBegin = msg->velocityList; velocityPtrEnd = msg->velocityList + msg->vlLen; // END LA if ( numAtoms == -1 ) { // for new proxies since receiveAtoms is not called //numAtoms = p.size(); numAtoms = msg->plLen; //Retrieve the CompAtomExt list CmiAssert(msg->plExtLen!=0); pExt.resize(msg->plExtLen); memcpy(pExt.begin(), msg->positionExtList, sizeof(CompAtomExt)*(msg->plExtLen)); // DMK - Atom Separation (water vs. non-water) #if NAMD_SeparateWaters != 0 numWaterAtoms = msg->numWaterAtoms; #endif positionsReady(1); } else { positionsReady(0); } }