コード例 #1
0
ファイル: init.C プロジェクト: gitter-badger/quinoa
static void _exitHandler(envelope *env)
{
  DEBUGF(("exitHandler called on %d msgtype: %d\n", CkMyPe(), env->getMsgtype()));
  switch(env->getMsgtype()) {
    case StartExitMsg:
      CkAssert(CkMyPe()==0);
      if (!_CkExitFnVec.isEmpty()) {
        CkExitFn fn = _CkExitFnVec.deq();
        fn();
        break;
      }
      // else goto next
    case ExitMsg:
      CkAssert(CkMyPe()==0);
      if(_exitStarted) {
        CmiFree(env);
        return;
      }
      _exitStarted = 1;
      CkNumberHandler(_charmHandlerIdx,(CmiHandler)_discardHandler);
      CkNumberHandler(_bocHandlerIdx, (CmiHandler)_discardHandler);
      env->setMsgtype(ReqStatMsg);
      env->setSrcPe(CkMyPe());
      // if exit in ring, instead of broadcasting, send in ring
      if (_ringexit){
	DEBUGF(("[%d] Ring Exit \n",CkMyPe()));
        const int stride = CkNumPes()/_ringtoken;
        int pe = 0;
        while (pe<CkNumPes()) {
          CmiSyncSend(pe, env->getTotalsize(), (char *)env);
          pe += stride;
        }
        CmiFree(env);
      }else{
	CmiSyncBroadcastAllAndFree(env->getTotalsize(), (char *)env);
      }	
      break;
    case ReqStatMsg:
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
      _messageLoggingExit();
#endif
      DEBUGF(("ReqStatMsg on %d\n", CkMyPe()));
      CkNumberHandler(_charmHandlerIdx,(CmiHandler)_discardHandler);
      CkNumberHandler(_bocHandlerIdx, (CmiHandler)_discardHandler);
      /*FAULT_EVAC*/
      if(CmiNodeAlive(CkMyPe())){
#if CMK_WITH_STATS
         _sendStats();
#endif
      _mainDone = 1; // This is needed because the destructors for
                     // readonly variables will be called when the program
		     // exits. If the destructor is called while _mainDone
		     // is 0, it will assume that the readonly variable was
		     // declared locally. On all processors other than 0, 
		     // _mainDone is never set to 1 before the program exits.
#if CMK_TRACE_ENABLED
      if (_ringexit) traceClose();
#endif
    }
      if (_ringexit) {
        int stride = CkNumPes()/_ringtoken;
        int pe = CkMyPe()+1;
        if (pe < CkNumPes() && pe % stride != 0)
          CmiSyncSendAndFree(pe, env->getTotalsize(), (char *)env);
        else
          CmiFree(env);
      }
      else
        CmiFree(env);
      //everyone exits here - there may be issues with leftover messages in the queue
#if CMK_WITH_STATS
      if(CkMyPe())
#endif
      {
        DEBUGF(("[%d] Calling converse exit \n",CkMyPe()));
        ConverseExit();
        if(CharmLibInterOperate)
          CpvAccess(interopExitFlag) = 1;
      }
      break;
#if CMK_WITH_STATS
    case StatMsg:
      CkAssert(CkMyPe()==0);
      _allStats[env->getSrcPe()] = (Stats*) EnvToUsr(env);
      _numStatsRecd++;
      DEBUGF(("StatMsg on %d with %d\n", CkMyPe(), _numStatsRecd));
			/*FAULT_EVAC*/
      if(_numStatsRecd==CkNumValidPes()) {
        _printStats();
        DEBUGF(("[%d] Calling converse exit \n",CkMyPe()));
        ConverseExit();
        if(CharmLibInterOperate)
          CpvAccess(interopExitFlag) = 1;
      }
      break;
#endif
    default:
      CmiAbort("Internal Error(_exitHandler): Unknown-msg-type. Contact Developers.\n");
  }
}
コード例 #2
0
ファイル: hello.C プロジェクト: brog2610/quinoa
  Hello(const CollideHandle &collide_) :collide(collide_)
  {
	  CkPrintf("Creating element %d on PE %d\n",thisIndex,CkMyPe());
	  nTimes=0;
	  CollideRegister(collide,thisIndex);
  }
コード例 #3
0
ファイル: LdbCoordinator.C プロジェクト: sun51/ece598HK
LdbCoordinator::LdbCoordinator()
{
  if (CkpvAccess(LdbCoordinator_instance) == NULL) {
    CkpvAccess(LdbCoordinator_instance) = this;
  } else {
    iout << iFILE << iERROR << iPE 
	 << "LdbCoordinator instanced twice on same node!" << endi;
    CkExit();
  }
  
#if 0
  // Create a load balancer
  if (CkMyPe() == 0) {
    //   CreateCentralLB();
    CreateNamdCentLB();
    //   CreateNamdNborLB();
  }
#endif

  ldbCycleNum = 1;
  takingLdbData = 1;
  totalStepsDone = 0;
  nLocalComputes = nLocalPatches = 0;
  patchNAtoms = (int *) NULL;
  sequencerThreads = (Sequencer **) NULL;
  ldbStatsFP = NULL;
  computeArray = NULL;
  patchArray = NULL;
  processorArray = NULL;

  // Register self as an object manager for new charm++ balancer framework
  theLbdb = LBDatabase::Object(); 

  // Set the load balancing period (in seconds).  Without this the
  // load balancing framework will hang until 1 second has passed
  // since the last load balancing, causing hiccups in very fast runs.
  // Unfortunately, the clock is already set for the first load
  // balancing, but only +LBPeriod 1.0e-5 can fix that in older charm.
  // For newer versions this is handled in initproc above.

  theLbdb->SetLBPeriod(1.0e-5);

  myOMid.id.idx = 1;
  LDCallbacks cb = { (LDMigrateFn)staticMigrateFn,
		     (LDStatsFn)staticStatsFn,
		     (LDQueryEstLoadFn)staticQueryEstLoadFn
                   };
  myHandle = theLbdb->RegisterOM(myOMid,(void*)this,cb);

  // Add myself as a local barrier receiver, so I know when I might
  // be registering objects.
  theLbdb->AddLocalBarrierReceiver((LDBarrierFn)staticReceiveAtSync,
				   (void*)this);;

  // Also, add a local barrier client, to trigger load balancing
  ldBarrierHandle = theLbdb->
    AddLocalBarrierClient((LDResumeFn)staticResumeFromSync,
			  (void*)this);
  migrateMsgs = 0; // linked list
  numComputes = 0;
  reg_all_objs = 1;
}
コード例 #4
0
ファイル: OrbLB.C プロジェクト: davidheryanto/sc14
void OrbLB::work(LDStats* stats)
{
#if CMK_LBDB_ON
  int i,j;

  statsData = stats;

  P = stats->nprocs();

  // calculate total number of migratable objects
  nObjs = stats->n_migrateobjs;
#ifdef DEBUG
  CmiPrintf("ORB: num objects:%d\n", nObjs);
#endif

  // create computeLoad and calculate tentative computes coordinates
  computeLoad = new ComputeLoad[nObjs];
  for (i=XDIR; i<=ZDIR; i++) vArray[i] = new VecArray[nObjs];

  // v[0] = XDIR  v[1] = YDIR v[2] = ZDIR
  // vArray[XDIR] is an array holding the x vector for all computes
  int objIdx = 0;
  for (i=0; i<stats->n_objs; i++) {
    LDObjData &odata = stats->objData[i];
    if (odata.migratable == 0) continue;
    computeLoad[objIdx].id = objIdx;
    computeLoad[objIdx].v[XDIR] = odata.objID().id[0];
    computeLoad[objIdx].v[YDIR] = odata.objID().id[1];
    computeLoad[objIdx].v[ZDIR] = odata.objID().id[2];
#if CMK_LB_CPUTIMER
    computeLoad[objIdx].load = _lb_args.useCpuTime()?odata.cpuTime:odata.wallTime;
#else
    computeLoad[objIdx].load = odata.wallTime;
#endif
    computeLoad[objIdx].refno = 0;
    computeLoad[objIdx].partition = NULL;
    for (int k=XDIR; k<=ZDIR; k++) {
        vArray[k][objIdx].id = objIdx;
        vArray[k][objIdx].v = computeLoad[objIdx].v[k];
    }
#ifdef DEBUG
    CmiPrintf("Object %d: %d %d %d load:%f\n", objIdx, computeLoad[objIdx].v[XDIR], computeLoad[objIdx].v[YDIR], computeLoad[objIdx].v[ZDIR], computeLoad[objIdx].load);
#endif
    objIdx ++;
  }
  CmiAssert(nObjs == objIdx);

  double t = CkWallTimer();

  quicksort(XDIR);
  quicksort(YDIR);
  quicksort(ZDIR);
#ifdef DEBUG
  CmiPrintf("qsort time: %f\n", CkWallTimer() - t);
#endif

  npartition = 0;
  for (i=0; i<P; i++)
    if (stats->procs[i].available == CmiTrue) npartition++;
  partitions = new Partition[npartition];

  double totalLoad = 0.0;
  int minx, miny, minz, maxx, maxy, maxz;
  minx = maxx= computeLoad[0].v[XDIR];
  miny = maxy= computeLoad[0].v[YDIR];
  minz = maxz= computeLoad[0].v[ZDIR];
  for (i=1; i<nObjs; i++) {
    totalLoad += computeLoad[i].load;
    if (computeLoad[i].v[XDIR] < minx) minx = computeLoad[i].v[XDIR];
    else if (computeLoad[i].v[XDIR] > maxx) maxx = computeLoad[i].v[XDIR];
    if (computeLoad[i].v[YDIR] < miny) miny = computeLoad[i].v[YDIR];
    else if (computeLoad[i].v[YDIR] > maxy) maxy = computeLoad[i].v[YDIR];
    if (computeLoad[i].v[ZDIR] < minz) minz = computeLoad[i].v[ZDIR];
    else if (computeLoad[i].v[ZDIR] > maxz) maxz = computeLoad[i].v[ZDIR];
  }

  top_partition.origin[XDIR] = minx;
  top_partition.origin[YDIR] = miny;
  top_partition.origin[ZDIR] = minz;
  top_partition.corner[XDIR] = maxx;
  top_partition.corner[YDIR] = maxy; 
  top_partition.corner[ZDIR] = maxz;

  top_partition.refno = 0;
  top_partition.load = 0.0;
  top_partition.count = nObjs;

  // if we take background load into account
  if (!_lb_args.ignoreBgLoad()) {
    top_partition.bkpes.resize(0);
    double total = totalLoad;
    for (i=0; i<P; i++) {
      if (!stats->procs[i].available) continue;
      double bkload = stats->procs[i].bg_walltime;
      total += bkload;
    }
    double averageLoad = total / npartition;
    for (i=0; i<P; i++) {
      if (!stats->procs[i].available) continue;
      double bkload = stats->procs[i].bg_walltime;
      if (bkload < averageLoad) top_partition.bkpes.push_back(i);
      else CkPrintf("OrbLB Info> PE %d with %f background load will have 0 object.\n", i, bkload);
    }
    npartition = top_partition.bkpes.size();
    // formally add these bg load to total load
    for (i=0; i<npartition; i++) 
      totalLoad += stats->procs[top_partition.bkpes[i]].bg_walltime; 
    if (_lb_args.debug()>=2) {
      CkPrintf("BG load: ");
      for (i=0; i<P; i++)  CkPrintf(" %f", stats->procs[i].bg_walltime);
      CkPrintf("\n");
      CkPrintf("Partition BG load: ");
      for (i=0; i<npartition; i++)  CkPrintf(" %f", stats->procs[top_partition.bkpes[i]].bg_walltime);
      CkPrintf("\n");
    }
  }

  top_partition.load = totalLoad;

  currentp = 0;
  refno = 0;

  // recursively divide
  rec_divide(npartition, top_partition);

  // mapping partitions to nodes
  mapPartitionsToNodes();

  // this is for sanity check
  int *num = new int[P];
  for (i=0; i<P; i++) num[i] = 0;

  for (i=0; i<nObjs; i++)
  {
    for (j=0; j<npartition; j++)
      if (computeLoad[i].refno == partitions[j].refno)   {
        computeLoad[i].partition = partitions+j;
        num[j] ++;
    }
    CmiAssert(computeLoad[i].partition != NULL);
  }

  for (i=0; i<npartition; i++)
    if (num[i] != partitions[i].count) 
      CmiAbort("OrbLB: Compute counts don't agree!\n");

  delete [] num;

  // Save output
  objIdx = 0;
  for(int obj=0;obj<stats->n_objs;obj++) {
      stats->to_proc[obj] = stats->from_proc[obj];
      LDObjData &odata = stats->objData[obj];
      if (odata.migratable == 0) { continue; }
      int frompe = stats->from_proc[obj];
      int tope = computeLoad[objIdx].partition->node;
      if (frompe != tope) {
        if (_lb_args.debug() >= 3) {
              CkPrintf("[%d] Obj %d migrating from %d to %d\n",
                     CkMyPe(),obj,frompe,tope);
        }
	stats->to_proc[obj] = tope;
      }
      objIdx ++;
  }

  // free memory
  delete [] computeLoad;
  for (i=0; i<3; i++) delete [] vArray[i];
  delete [] partitions;

  if (_lb_args.debug() >= 1)
    CkPrintf("OrbLB finished time: %fs\n", CkWallTimer() - t);
#endif
}
コード例 #5
0
ファイル: ComputeDPME.C プロジェクト: aar2163/NAMD-energy
void ComputeDPME::doWork()
{
  DebugM(4,"Entering ComputeDPME::doWork().\n");

  Pme2Particle *localData;

  ResizeArrayIter<PatchElem> ap(patchList);

  // Skip computations if nothing to do.
  if ( ! patchList[0].p->flags.doFullElectrostatics )
  {
    for (ap = ap.begin(); ap != ap.end(); ap++) {
      CompAtom *x = (*ap).positionBox->open();
      Results *r = (*ap).forceBox->open();
      (*ap).positionBox->close(&x);
      (*ap).forceBox->close(&r);
    }
    if ( master ) {
      master->reduction->submit();
    }
    return;
  }

  // allocate storage
  numLocalAtoms = 0;
  for (ap = ap.begin(); ap != ap.end(); ap++) {
    numLocalAtoms += (*ap).p->getNumAtoms();
  }

  Lattice lattice = patchList[0].p->flags.lattice;

  localData = new Pme2Particle[numLocalAtoms];  // given to message

  // get positions and charges
  Pme2Particle * data_ptr = localData;
  const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
				* ComputeNonbondedUtil::dielectric_1 );
  for (ap = ap.begin(); ap != ap.end(); ap++) {
    CompAtom *x = (*ap).positionBox->open();
    if ( patchList[0].p->flags.doMolly ) {
      (*ap).positionBox->close(&x);
      x = (*ap).avgPositionBox->open();
    }
    int numAtoms = (*ap).p->getNumAtoms();

    for(int i=0; i<numAtoms; ++i)
    {
      Vector tmp = lattice.delta(x[i].position);
      data_ptr->x = tmp.x;
      data_ptr->y = tmp.y;
      data_ptr->z = tmp.z;
      data_ptr->cg = coulomb_sqrt * x[i].charge;
      data_ptr->id = x[i].id;
      ++data_ptr;
    }

    if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
    else { (*ap).positionBox->close(&x); }
  }

  // send data to master
  ComputeDPMEDataMsg *msg = new ComputeDPMEDataMsg;
  msg->node = CkMyPe();
  msg->numParticles = numLocalAtoms;
  msg->particles = localData;
  comm->sendComputeDPMEData(msg);
}
コード例 #6
0
ファイル: TopoCentLB.C プロジェクト: quinoacomputing/quinoa
void TopoCentLB :: work(LDStats *stats)
{
  int proc;
  int i,j;
  int n_pes = stats->nprocs();
	
  if (_lb_args.debug() >= 2) {
    CkPrintf("In TopoCentLB Strategy...\n");
  }
  
  // Make sure that there is at least one available processor.
  for (proc = 0; proc < n_pes; proc++) {
    if (stats->procs[proc].available) {
      break;
    }
  }

  if (proc == n_pes) {
    CmiAbort ("TopoCentLB: no available processors!");
  }

  
  removeNonMigratable(stats, n_pes);
  int *newmap = new int[stats->n_objs];


  if(make_mapping)
    computePartitions(stats, n_pes, newmap);
  else {
    //mapping taken from previous algo
    for(i=0;i<stats->n_objs;i++) {
      newmap[i]=stats->from_proc[i];
    }
  }

  //Debugging Code
  if(_lb_args.debug() >=2){
    CkPrintf("Map obtained from partitioning:\n");
    for(i=0;i<stats->n_objs;i++)
      CkPrintf(" %d,%d ",i,newmap[i]);
  }

  int max_objs = findMaxObjs(newmap,stats->n_objs, n_pes);
	
  partgraph = new PartGraph(n_pes, max_objs);

  //Fill up the partition graph - first fill the nodes and then, the edges

  for(i=0;i<stats->n_objs;i++)
    {
      PartGraph::Node* n = &partgraph->nodes[newmap[i]];
      n->obj_list[n->num_objs]=i;
      n->num_objs++;
    }

  int *addedComm=new int[n_pes];
  
  stats->makeCommHash();
  
  int max_comm_part=-1;
	
  double max_comm=0;

  //Try putting random amount of communication on the partition graph edges to see if things work fine
  //This also checks the running time of the algorithm since number of edges is high than in a practical scenario
#ifdef RAND_COMM
  for(i = 0; i < n_pes; i++) {
    for(j = i+1; j < n_pes; j++) {
      int val;
      if(rand()%5==0)
	val=0;
      else
	val= rand()%1000;
				
      partgraph->edges[i][j] = val;
      partgraph->edges[j][i] = val;
			
      partgraph->nodes[i].comm += val;
      partgraph->nodes[j].comm += val;
			
      if(partgraph->nodes[i].comm > max_comm){
	max_comm = partgraph->nodes[i].comm;
	max_comm_part = i;
      }
      if(partgraph->nodes[j].comm > max_comm){
	max_comm = partgraph->nodes[j].comm;
	max_comm_part = j;
      }
    }
  }
#else
  //Adding communication to the partition graph edges
  for(i=0;i<stats->n_comm;i++)
    {
      //DO I consider other comm too....i.e. to or from a processor
      LDCommData &cdata = stats->commData[i];
      if(!cdata.from_proc() && cdata.receiver.get_type() == LD_OBJ_MSG){
    	int senderID = stats->getHash(cdata.sender);
    	int recverID = stats->getHash(cdata.receiver.get_destObj());
	CmiAssert(senderID < stats->n_objs);
	CmiAssert(recverID < stats->n_objs);
		
	if(newmap[senderID]==newmap[recverID])
	  continue;
	
	if(partgraph->edges[newmap[senderID]][newmap[recverID]] == 0){
	  partgraph->nodes[newmap[senderID]].degree++;
	  partgraph->nodes[newmap[recverID]].degree++;
	}
		
	partgraph->edges[newmap[senderID]][newmap[recverID]] += cdata.bytes;
	partgraph->edges[newmap[recverID]][newmap[senderID]] += cdata.bytes;
			
	partgraph->nodes[newmap[senderID]].comm += cdata.bytes;
	partgraph->nodes[newmap[recverID]].comm += cdata.bytes;

	//Keeping track of maximum communiacting partition
	if(partgraph->nodes[newmap[senderID]].comm > max_comm){
	  max_comm = partgraph->nodes[newmap[senderID]].comm;
	  max_comm_part = newmap[senderID];
	}
	if(partgraph->nodes[newmap[recverID]].comm > max_comm){
	  max_comm = partgraph->nodes[newmap[recverID]].comm;
	  max_comm_part = newmap[recverID];
	}
      }
      else if(cdata.receiver.get_type() == LD_OBJLIST_MSG) {
	int nobjs;
    	LDObjKey *objs = cdata.receiver.get_destObjs(nobjs);
	int senderID = stats->getHash(cdata.sender);
	for(j = 0; j < n_pes; j++)
	  addedComm[j]=0;
	for (j=0; j<nobjs; j++) {
	  int recverID = stats->getHash(objs[j]);
	  if((senderID == -1)||(recverID == -1))
	    if (_lb_args.migObjOnly()) continue;
	    else CkAbort("Error in search\n");
					
	  if(newmap[senderID]==newmap[recverID])
	    continue;
	
	  if(partgraph->edges[newmap[senderID]][newmap[recverID]] == 0){
	    partgraph->nodes[newmap[senderID]].degree++;
	    partgraph->nodes[newmap[recverID]].degree++;
	  }

	  //Communication added only once for a message sent to many objects on a single processor
	  if(!addedComm[newmap[recverID]]){
	    partgraph->edges[newmap[senderID]][newmap[recverID]] += cdata.bytes;
	    partgraph->edges[newmap[recverID]][newmap[senderID]] += cdata.bytes;
	
	    partgraph->nodes[newmap[senderID]].comm += cdata.bytes;
	    partgraph->nodes[newmap[recverID]].comm += cdata.bytes;

	    if(partgraph->nodes[newmap[senderID]].comm > max_comm){
	      max_comm = partgraph->nodes[newmap[senderID]].comm;
	      max_comm_part = newmap[senderID];
	    }
	    if(partgraph->nodes[newmap[recverID]].comm > max_comm){
	      max_comm = partgraph->nodes[newmap[recverID]].comm;
	      max_comm_part = newmap[recverID];
	    }
	    //bytesComm[newmap[senderID]][newmap[recverID]] += cdata.bytes;
	    //bytesComm[newmap[recverID]][newmap[senderID]] += cdata.bytes;
	    addedComm[newmap[recverID]]=1;
	  }
	}
      }

    }
#endif
	
  int *proc_mapping = new int[n_pes];
	
  delete [] addedComm;
		
  LBtopoFn topofn;

  //Parsing the command line input for getting the processor topology
  char *lbcopy = strdup(_lbtopo);
  char *ptr = strchr(lbcopy, ':');
  if (ptr!=NULL)
    ptr = strtok(lbcopy, ":");
  else
    ptr=lbcopy;

  topofn = LBTopoLookup(ptr);
  if (topofn == NULL) {
    char str[1024];
    CmiPrintf("TopoCentLB> Fatal error: Unknown topology: %s. Choose from:\n", ptr);
    printoutTopo();
    sprintf(str, "TopoCentLB> Fatal error: Unknown topology: %s", ptr);
    CmiAbort(str);
  }
  
  topo = topofn(n_pes);

  //Call the core routine to produce the partition processor mapping
  calculateMST(partgraph,topo,proc_mapping,max_comm_part);
  //Returned partition graph is a Maximum Spanning Tree -- converted in above function itself

  //Debugging code: Result of mapping partition graph onto processor graph
  if (_lb_args.debug()>1) {
    CkPrintf("Resultant mapping..(partition,processor)\n");
    for(i = 0; i < n_pes; i++)
      CkPrintf("%d,%d\n",i,proc_mapping[i]);
  }

  //Store the result in the load balancing database
  int pe;
  PartGraph::Node* n;
  for(i = 0; i < n_pes; i++){
    pe = proc_mapping[i];
    n = &partgraph->nodes[i];
    for(j=0;j<n->num_objs;j++){
      stats->to_proc[n->obj_list[j]] = pe;
      if (_lb_args.debug()>1) 
        CkPrintf("[%d] Obj %d migrating from %d to %d\n", CkMyPe(),n->obj_list[j],stats->from_proc[n->obj_list[j]],pe);
    }
  }

  delete[] newmap;
  delete[] proc_mapping;
  //Delete hopCount
  for(i = 0; i < n_pes; i++)
    delete[] hopCount[i];

  delete[] hopCount;
  delete[] heapMapping;
	
  delete partgraph;
}
コード例 #7
0
//! process command line arguments!
void TraceCounter::traceInit(char **argv)
{
  CpvInitialize(CountLogPool*, _logPool);
  CpvInitialize(char*, _logName);
  CpvInitialize(double, version);
  CpvInitialize(char**, _counterNames);
  CpvInitialize(char**, _counterDesc);
  CpvInitialize(int,    _numCounters);
  CpvInitialize(int, _reductionID);

  CpvAccess(_logName) = (char *) malloc(strlen(argv[0])+1);
  _MEMCHECK(CpvAccess(_logName));
  strcpy(CpvAccess(_logName), argv[0]);
  CpvAccess(version) = VER;

  int i;
  // parse command line args
  char* counters = NULL;
  commandLine_ = NULL;
  bool badArg = false;
  int numCounters = 0;
  if (CmiGetArgStringDesc(argv, "+counters", &counters, "Measure these performance counters")) {
    if (CmiMyPe()==0) { CmiPrintf("Counters: %s\n", counters); }
    int offset = 0;
    int limit = strlen(counters);
    char* ptr = counters;
    while (offset < limit && 
	   (ptr = strtok(&counters[offset], ",")) != NULL) 
    { 
      offset += strlen(ptr)+1;
      ptr = &ptr[strlen(ptr)+1];
      numCounters++; 
    }
    if (CmiMyPe()==0) { 
      CmiPrintf("There are %d counters\n", numCounters); 
    }
    commandLine_ = new CounterArg[numCounters];
    ptr = counters;
    for (i=0; i<numCounters; i++) {
      commandLine_[i].arg = ptr;
      if (!matchArg(&commandLine_[i])) { 
	if (CmiMyPe()==0) { CmiPrintf("Bad arg: [%s]\n", ptr); }
	badArg = true; 
      }
      ptr = &ptr[strlen(ptr)+1];
    }
  }
  commandLineSz_ = numCounters;

  // check to see if args are valid, output if not
  if (badArg || CmiGetArgFlagDesc(argv, "+count-help", "List available performance counters")) {
    if (CmiMyPe() == 0) { printHelp(); }
    ConverseExit();  return;
  }
  else if (counters == NULL) {
    if (CmiMyPe() == 0) { usage(); }
    ConverseExit();  return;
  }

  // get optional command line args
  overview_      = CmiGetArgFlag(argv, "+count-overview");  
  switchRandom_  = CmiGetArgFlag(argv, "+count-switchrandom");  
  switchByPhase_ = CmiGetArgFlag(argv, "+count-switchbyphase");
  noLog_         = CmiGetArgFlag(argv, "+count-nolog");
  writeByPhase_  = CmiGetArgFlag(argv, "+count-writebyphase");
  char* logName  = NULL;
  if (CmiGetArgString(argv, "+count-logname", &logName)) {
    CpvAccess(_logName) = logName;
    if (noLog_) {
      if (CkMyPe()==0) {
	CmiPrintf("+count-logname and +count-nolog are MUTUALLY EXCLUSIVE\n");
	usage();
	CmiAbort("");
      }
    }
  }
  if (switchByPhase_ && overview_) {
    if (CkMyPe()==0) {
      CmiPrintf(
	"+count-switchbyphase and +count-overview are MUTUALLY EXCLUSIVE\n"
	"+count-overview automatically switches by phase.\n");
      usage();
      CmiAbort("");
    }
  }
  if (writeByPhase_ && noLog_) {
    if (CkMyPe()==0) {
      CmiPrintf("+count-writebyphase and +count-nolog are MUTUALLY EXCLUSIVE\n");
      usage();
      CmiAbort("");
    }
  }

  // parse through commandLine_, figure out which belongs on which list (1 vs 2)
  CounterArg* last1 = NULL;
  CounterArg* last2 = NULL;
  CounterArg* tmp = NULL;
  counter1Sz_ = counter2Sz_ = 0;
  for (i=0; i<commandLineSz_; i++) {
    tmp = &commandLine_[i];
    if (tmp->code < NUM_COUNTER_ARGS/2) {
      if (counter1_ == NULL) { counter1_ = tmp;  last1 = counter1_; }
      else { last1->next = tmp;  last1 = tmp; }
      counter1Sz_++;
    }
    else {
      if (counter2_ == NULL) { counter2_ = tmp;  last2 = counter2_; }
      else { last2->next = tmp;  last2 = tmp; }
      counter2Sz_++;
    }
  }
  if (counter1_ == NULL) {
    printHelp();
    if (CmiMyPe()==0) {
      CmiPrintf("\nMust specify some counters with code < %d\n", 
		NUM_COUNTER_ARGS/2);
    }
    ConverseExit();
  }
  if (counter2_ == NULL) {
    printHelp();
    if (CmiMyPe()==0) {
      CmiPrintf("\nMust specify some counters with code >= %d\n", 
		NUM_COUNTER_ARGS/2);
    }
    ConverseExit();
  }
  last1->next = counter1_;
  last2->next = counter2_;

  // all args valid, now set up logging
  if (CmiMyPe() == 0) {
    CmiPrintf("Running with tracemode=counter and args:\n");
    // print out counter1 set
    tmp = counter1_;
    i = 0;
    do {
      CmiPrintf("  <counter1-%d>=%d %s %s\n", i, tmp->code, tmp->arg, tmp->desc);
      tmp = tmp->next;
      i++;
    } while (tmp != counter1_);
    // print out counter2 set
    tmp = counter2_;
    i = 0;
    do {
      CmiPrintf("  <counter2-%d>=%d %s %s\n", i, tmp->code, tmp->arg, tmp->desc);
      tmp = tmp->next;
      i++;
    } while (tmp != counter2_);

    CmiPrintf(
      "+count-overview %d\n+count-switchrandom %d\n"
      "+count-switchbyphase %d\n+count-nolog %d\n"
      "+count-logname %s\n+count-writebyphase %d\n",
      overview_, switchRandom_, switchByPhase_, noLog_, 
      logName, writeByPhase_);
  }

  // DEBUGF(("    DEBUG: Counter1=%d Counter2=%d\n", counter1_, counter2_));
  CpvAccess(_logPool) = new CountLogPool();

  // allocate names so can do reduction/analysis on the fly
  char** counterNames = new char*[counter1Sz_+counter2Sz_];
  char** counterDesc = new char*[counter1Sz_+counter2Sz_];
  tmp = counter1_;
  for (i=0; i<counter1Sz_; i++) {
    tmp->index = i;
    counterNames[i] = tmp->arg; 
    counterDesc[i] = tmp->desc;
    tmp = tmp->next;
  }
  tmp = counter2_;
  for (i=0; i<counter2Sz_; i++) {
    tmp->index = counter1Sz_+i;
    counterNames[counter1Sz_+i] = tmp->arg; 
    counterDesc[counter1Sz_+i] = tmp->desc;
    tmp = tmp->next;
  }
  CpvAccess(_counterNames) = counterNames;
  CpvAccess(_counterDesc) = counterDesc;
  CpvAccess(_numCounters) = numCounters;
  // don't erase counterNames or counterDesc, 
  // the reduction client will do it on the final reduction

  _MEMCHECK(CpvAccess(_logPool));
  CpvAccess(_logPool)->init(numCounters);
  DEBUGF(("%d/%d DEBUG: Created _logPool at %08x\n", 
          CmiMyPe(), CmiNumPes(), CpvAccess(_logPool)));
}
コード例 #8
0
ファイル: gvt.C プロジェクト: davidheryanto/sc14
/// ENTRY: Gathers PVT reports; calculates and broadcasts GVT to PVTs
void GVT::computeGVT(UpdateMsg *m)
{
#ifndef CMK_OPTIMIZE
  if(pose_config.stats)
    localStats->TimerStart(GVT_TIMER);
#endif
  CProxy_PVT p(ThePVT);
  CProxy_GVT g(TheGVT);
  GVTMsg *gmsg = new GVTMsg;
  POSE_TimeType lastGVT = 0, earliestMsg = POSE_UnsetTS, 
    earlyAny = POSE_UnsetTS;
  SRentry *tmpSRs = SRs;

  if (CkMyPe() != 0) startOffset = 1;
  if (m->runGVTflag == 1) done++;
  else {
    // see if message provides new min optGVT or conGVT
    if ((optGVT < 0) || ((m->optPVT > POSE_UnsetTS) && (m->optPVT < optGVT)))
      optGVT = m->optPVT;
    if ((conGVT < 0) || ((m->conPVT > POSE_UnsetTS) && (m->conPVT < conGVT)))
      conGVT = m->conPVT;
    if (m->maxSR > earlyAny) 
      earlyAny = m->maxSR;
    // add send/recv info to SRs
    /*    if (m->numEntries > 0)
      CkPrintf("GVT recv'd %d SRs from a PE, earliest=%d\n", m->numEntries, 
      m->SRs[0].timestamp);*/
    addSR(&SRs, m->SRs, optGVT, m->numEntries);
    done++;
  }
  CkFreeMsg(m);

  if (done == reportsExpected+startOffset) { // all PVT reports are in
#ifndef CMK_OPTIMIZE
    if(pose_config.stats)
      localStats->GvtInc();
#endif
    gvtIterationCount++;
    done = 0;
    startOffset = 1;
    lastGVT = estGVT; // store previous estimate
    if (lastGVT < 0) lastGVT = 0;
    estGVT = POSE_UnsetTS;
    
    // derive GVT estimate from min optimistic & conservative GVTs
    estGVT = optGVT;
    if ((conGVT > POSE_UnsetTS) && (estGVT > POSE_UnsetTS) && (conGVT < estGVT))  estGVT = conGVT;

    // Check if send/recv activity provides lower possible estimate
    /*    if (SRs) SRs->dump();
	  else CkPrintf("No SRs reported to GVT!\n");*/
    SRentry *tmp = SRs;
    POSE_TimeType lastSR = POSE_UnsetTS;
    while (tmp && ((tmp->timestamp <= estGVT) || (estGVT == POSE_UnsetTS))) {
      lastSR = tmp->timestamp;
      if (tmp->sends != tmp->recvs) {
	earliestMsg = tmp->timestamp;
	break;
      }
      tmp = tmp->next;
    }
    /*    if ((earliestMsg > POSE_UnsetTS) || (earlyAny > POSE_UnsetTS))
	  CkPrintf("GVT: earlyDiff=%d earlyAny=%d estGVT was %d.\n", earliestMsg, earlyAny, estGVT);*/
    if (((earliestMsg < estGVT) && (earliestMsg != POSE_UnsetTS)) ||
	(estGVT == POSE_UnsetTS))
      estGVT = earliestMsg;
    if ((lastSR != POSE_UnsetTS) && (estGVT == POSE_UnsetTS) && 
	(lastSR > lastGVT))
      estGVT = lastSR;

    // check for inactivity
    if ((optGVT == POSE_UnsetTS) && (earliestMsg == POSE_UnsetTS)) {
      inactive++;
      /*
      if (inactive == 1) {
	CkPrintf("[%d] Inactive... calling CkWaitQD...\n", CkMyPe());
	CkWaitQD();
	CkPrintf("[%d] Back from CkWaitQD...\n", CkMyPe());
      }
      */
      estGVT = lastGVT;
      if (inactive == 1) inactiveTime = lastGVT;
    }
    else if (estGVT < 0) {
      estGVT = lastGVT;
      inactive = 0;
    }
    else inactive = 0;

    // check the estimate
    //CkPrintf("opt=%d con=%d lastGVT=%d early=%d lastSR=%d et=%d\n", optGVT, conGVT, lastGVT, earliestMsg, lastSR, POSE_endtime);
    CmiAssert(estGVT >= lastGVT); 
    //if (estGVT % 1000 == 0)
    //CkPrintf("[%d] New GVT = %d\n", CkMyPe(), estGVT);
    //CkPrintf("[%d] New GVT = %lld\n", CkMyPe(), estGVT);

    // check for termination conditions
    int term = 0;
    if ((estGVT >= POSE_endtime) && (POSE_endtime > POSE_UnsetTS)) {
#if USE_LONG_TIMESTAMPS      
      CkPrintf("At endtime: %lld\n", POSE_endtime);
#else
      CkPrintf("At endtime: %d\n", POSE_endtime);
#endif
      term = 1;
    }
    else if (inactive > 2) {
#if USE_LONG_TIMESTAMPS      
      CkPrintf("Simulation inactive at time: %lld\n", inactiveTime);
#else
      CkPrintf("Simulation inactive at time: %d\n", inactiveTime);
#endif
      term = 1;
    }

    // report the last new GVT estimate to all PVT branches
    gmsg->estGVT = estGVT;
    gmsg->done = term;
    if (term) {
      //if (POSE_endtime > POSE_UnsetTS) gmsg->estGVT = POSE_endtime + 1;
      // else gmsg->estGVT++;
#if USE_LONG_TIMESTAMPS      
      CkPrintf("Final GVT = %lld\n", gmsg->estGVT);
#else
      CkPrintf("Final GVT = %d\n", gmsg->estGVT);
#endif
      p.setGVT(gmsg);
      POSE_stop();
    }
    else {
      p.setGVT(gmsg);

      if(pose_config.lb_on)
	{
	  // perform load balancing
#ifndef CMK_OPTIMIZE
	  if(pose_config.stats)
	    localStats->SwitchTimer(LB_TIMER);
#endif
	 
	  if (CkNumPes() > 1) {
	    nextLBstart++;
	    if (pose_config.lb_skip == nextLBstart) {
	      TheLBG.calculateLocalLoad();
	      nextLBstart = 0;
	    }
	  }
#ifndef CMK_OPTIMIZE
	  if(pose_config.stats)
	    localStats->SwitchTimer(GVT_TIMER);
#endif
	}

      // transmit data to start next GVT estimation on next GVT branch
      UpdateMsg *umsg = new UpdateMsg;
      umsg->maxSR=0;
      umsg->optPVT = estGVT;
      umsg->inactive = inactive;
      umsg->inactiveTime = inactiveTime;
      umsg->nextLB = nextLBstart;
      umsg->runGVTflag = 0;
      g[(CkMyPe()+1) % CkNumPes()].runGVT(umsg);
    }

    // reset static data
    optGVT = conGVT = POSE_UnsetTS;
    SRentry *cur = SRs;
    SRs = NULL;
    while (cur) {
      tmp = cur->next;
      delete cur;
      cur = tmp;
    }
  }
#ifndef CMK_OPTIMIZE
  if(pose_config.stats)
    localStats->TimerStop();
#endif
}
コード例 #9
0
ファイル: CentralLB.C プロジェクト: luyukunphy/namd
void CentralLB::ReceiveStats(CkMarshalledCLBStatsMessage &msg)
{
#if CMK_LBDB_ON
  if (statsMsgsList == NULL) {
    statsMsgsList = new CLBStatsMsg*[CkNumPes()];
    CmiAssert(statsMsgsList != NULL);
    for(int i=0; i < CkNumPes(); i++)
      statsMsgsList[i] = 0;
  }
  if (statsData == NULL) statsData = new LDStats;

    //  loop through all CLBStatsMsg in the incoming msg
  int count = msg.getCount();
  for (int num = 0; num < count; num++) 
  {
    CLBStatsMsg *m = msg.getMessage(num);
    CmiAssert(m!=NULL);
    const int pe = m->from_pe;
    DEBUGF(("Stats msg received, %d %d %d %p step %d\n", pe,stats_msg_count,m->n_objs,m,step()));
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))     
/*      
 *  if(m->step < step()){
 *    //TODO: if a processor is redoing an old load balance step..
 *    //tell it that the step is done and that it should not perform any migrations
 *      thisProxy[pe].ReceiveDummyMigration();
 *  }*/
#endif
	
    if(!CmiNodeAlive(pe)){
	DEBUGF(("[%d] ReceiveStats called from invalidProcessor %d\n",CkMyPe(),pe));
	continue;
    }
	
    if (m->avail_vector!=NULL) {
      LBDatabaseObj()->set_avail_vector(m->avail_vector,  m->next_lb);
    }

    if (statsMsgsList[pe] != 0) {
      CkPrintf("*** Unexpected CLBStatsMsg in ReceiveStats from PE %d ***\n",
	     pe);
    } else {
      statsMsgsList[pe] = m;
#if USE_REDUCTION
      depositData(m);
#else
      // store per processor data right away
      struct ProcStats &procStat = statsData->procs[pe];
      procStat.pe = pe;
      procStat.total_walltime = m->total_walltime;
      procStat.idletime = m->idletime;
      procStat.bg_walltime = m->bg_walltime;
#if CMK_LB_CPUTIMER
      procStat.total_cputime = m->total_cputime;
      procStat.bg_cputime = m->bg_cputime;
#endif
      procStat.pe_speed = m->pe_speed;
      //procStat.utilization = 1.0;
      procStat.available = CmiTrue;
      procStat.n_objs = m->n_objs;

      statsData->n_objs += m->n_objs;
      statsData->n_comm += m->n_comm;
#endif
#if defined(TEMP_LDB)
			procStat.pe_temp=m->pe_temp;
			procStat.pe_speed=m->pe_speed;
#endif

      stats_msg_count++;
    }
  }    // end of for

  const int clients = CkNumValidPes();
  DEBUGF(("THIS POINT count = %d, clients = %d\n",stats_msg_count,clients));
 
  if (stats_msg_count == clients) {
	DEBUGF(("[%d] All stats messages received \n",CmiMyPe()));
    statsData->nprocs() = stats_msg_count;
    thisProxy[CkMyPe()].LoadBalance();
  }
#endif
}
コード例 #10
0
ファイル: gvt.C プロジェクト: davidheryanto/sc14
/// Register poser with PVT
int PVT::objRegister(int arrIdx, POSE_TimeType safeTime, int sync, sim *myPtr)
{
  int i = objs.Insert(arrIdx, POSE_UnsetTS, sync, myPtr); // add to object list
  return(i*1000 + CkMyPe());                          // return unique PVT idx
}
コード例 #11
0
ファイル: gvt.C プロジェクト: davidheryanto/sc14
// Unregister poser from PVT
void PVT::objRemove(int pvtIdx)
{
  int idx = (pvtIdx-CkMyPe())/1000;  // calculate local index from unique index
  objs.Delete(idx);                  // delete the object
}
コード例 #12
0
ファイル: gvt.C プロジェクト: davidheryanto/sc14
/// ENTRY: receive GVT estimate; wake up objects
void PVT::setGVT(GVTMsg *m)
{
#ifndef CMK_OPTIMIZE
  if(pose_config.stats)
    localStats->TimerStart(GVT_TIMER);
#endif
  CProxy_PVT p(ThePVT);
  CkAssert(m->estGVT >= estGVT);
  estGVT = m->estGVT;
  int i, end = objs.getNumSpaces();
#ifdef POSE_COMM_ON  
  //PrioStreaming *pstrat = (PrioStreaming *)(POSE_commlib_insthndl.getStrategy());
  //pstrat->setBasePriority((estGVT+10) - POSE_TimeMax);
  //pstrat->setBasePriority(estGVT+10);
#endif
  simdone = m->done;
  CkFreeMsg(m);
  waitForFirst = 1;
  objs.Commit();
  objs.StratCalcs();  // sync strategy calculations
#ifdef MEM_TEMPORAL
  localTimePool->set_min_time(estGVT);
#endif

  // Parallel checkpointing: setGVT was broken into two functions, and
  // beginCheckpoint was added.  Only initiate the checkpointing
  // procedure on PE 0, after commits have occurred.  This should
  // minimize the amount of data written to disk.  In order to ensure
  // a stable state, we wait for quiescence to be reached before
  // beginning the checkpoint.  Inconsistent results were obtained
  // (possibly from messages still in transit) without this step.
  // Once quiescence is reached, PE 0 begins the checkpoint, and then
  // resumes the simulation in resumeAfterCheckpoint.  This Callback
  // function is also the first POSE function to be called when
  // restarting from a checkpoint.

  // Checkpoints are initiated approximately every
  // pose_config.checkpoint_gvt_interval GVT ticks or
  // pose_config.checkpoint_time_interval seconds (both defined in
  // pose_config.h).

  if ((CkMyPe() == 0) && (parCheckpointInProgress == 0) && (estGVT > 0) && 
      (((pose_config.checkpoint_gvt_interval > 0) && (estGVT >= (parLastCheckpointGVT + pose_config.checkpoint_gvt_interval))) || 
       ((pose_config.checkpoint_time_interval > 0) && 
	((CmiWallTimer() + parStartTime) >= (parLastCheckpointTime + (double)pose_config.checkpoint_time_interval))))) {
    // ensure that everything that can be committed has been
    objs.CheckpointCommit();
    // wait for quiescence to occur before checkpointing
    eventMsg *dummyMsg = new eventMsg();
    CkCallback cb(CkIndex_PVT::beginCheckpoint(dummyMsg), CkMyPe(), ThePVT);
    parCheckpointInProgress = 1;
    parLastCheckpointTime = CmiWallTimer() + parStartTime;
    CkStartQD(cb);
  } else if ((CkMyPe() == 0) && (parLBInProgress == 0) && 
      (((pose_config.lb_gvt_interval > 0) && (estGVT >= (parLastLBGVT + pose_config.lb_gvt_interval))))) {
    // wait for quiescence to occur before checkpointing
    eventMsg *dummyMsg = new eventMsg();
    CkCallback cb(CkIndex_PVT::beginLoadbalancing(dummyMsg), CkMyPe(), ThePVT);
    parLBInProgress = 1;
    CkStartQD(cb);
  } else {
    // skip checkpointing
    eventMsg *dummyMsg = new eventMsg();
    p[CkMyPe()].resumeAfterCheckpoint(dummyMsg);
  }
#ifndef CMK_OPTIMIZE
  if(pose_config.stats)
    localStats->TimerStop();
#endif
}
コード例 #13
0
ファイル: gvt.C プロジェクト: davidheryanto/sc14
/// Basic Constructor
PVT::PVT() 
{
#ifdef VERBOSE_DEBUG
  CkPrintf("[%d] constructing PVT\n",CkMyPe());
#endif
  CpvInitialize(int, stateRecovery);
  CpvAccess(stateRecovery) = 0;
  CpvInitialize(eventID, theEventID);
  CpvAccess(theEventID)=eventID();
  //  CpvAccess(theEventID).dump();
  //LBTurnInstrumentOff();
  optGVT = POSE_UnsetTS; conGVT = POSE_UnsetTS;
  rdone=0;
  SRs=NULL;
#ifdef POSE_COMM_ON
  //com_debug = 1;
#endif
#ifndef CMK_OPTIMIZE
  localStats = (localStat *)CkLocalBranch(theLocalStats);
  if (pose_config.stats) {
    localStats->TimerStart(GVT_TIMER);
  }
#endif
#ifdef MEM_TEMPORAL
  localTimePool = (TimePool *)CkLocalBranch(TempMemID);
  CkPrintf("NOTE: Temporal memory manager is ON!\n");
#endif
  optPVT = conPVT = estGVT = POSE_UnsetTS;
  startPhaseActive = gvtTurn = simdone = 0;
  SendsAndRecvs = new SRtable();
  SendsAndRecvs->Initialize();
  specEventCount = eventCount = waitForFirst = 0;
  iterMin = POSE_UnsetTS;
  int P=CkNumPes(), N=CkMyPe();
  reportReduceTo =  -1;
  if ((N < P-2) && (N%2 == 1)) { //odd
    reportTo = N-1;
    reportsExpected = reportEnd = 0;
  }
  else if (N < P-2) { //even
    reportTo = N;
    reportsExpected = 2; 
    if (N == P-3)
      reportsExpected = 1;
    reportEnd = 0;
    if (N < (P-2)/2)
      reportReduceTo = P-2;
    else reportReduceTo = P-1;
  }
  if (N == P-2) {
    reportTo = N;
    reportEnd = 1;
    reportsExpected = 1 + (P-2)/4 + ((P-2)%4)/2;
  }
  else if (N == P-1) {
    reportTo = N;
    reportEnd = 1;
    if (P==1) reportsExpected = 1;
    else reportsExpected = 1 + (P-2)/4 + (P-2)%2;
  }
  //  CkPrintf("PE %d reports to %d, receives %d reports, reduces and sends to %d, and reports directly to GVT if %d = 1!\n", CkMyPe(), reportTo, reportsExpected, reportReduceTo, reportEnd);

  parCheckpointInProgress = 0;
  parLastCheckpointGVT = 0;
  parLastCheckpointTime = parStartTime = 0.0;
  parLBInProgress = 0;
  parLastLBGVT = 0;
  //  debugBufferLoc = debugBufferWrapped = debugBufferDumped = 0;
#ifndef CMK_OPTIMIZE
  if(pose_config.stats)
    localStats->TimerStop();
#endif

  LBDatabase::Object()->AddMigrationDoneFn(staticDoneLB, this);
}
コード例 #14
0
ファイル: init.C プロジェクト: gitter-badger/quinoa
/**
  This is the main charm setup routine.  It's called
  on all processors after Converse initialization.
  This routine gets passed to Converse from "main.C".
  
  The main purpose of this routine is to set up the objects
  and Ckpv's used during a regular Charm run.  See the comment
  at the top of the file for overall flow.
*/
void _initCharm(int unused_argc, char **argv)
{ 
	int inCommThread = (CmiMyRank() == CmiMyNodeSize());

	DEBUGF(("[%d,%.6lf ] _initCharm started\n",CmiMyPe(),CmiWallTimer()));

	CkpvInitialize(size_t *, _offsets);
	CkpvAccess(_offsets) = new size_t[32];
	CkpvInitialize(PtrQ*,_buffQ);
	CkpvInitialize(PtrVec*,_bocInitVec);
	CkpvInitialize(void*, _currentChare);
	CkpvInitialize(int,   _currentChareType);
	CkpvInitialize(CkGroupID, _currentGroup);
	CkpvInitialize(void *, _currentNodeGroupObj);
	CkpvInitialize(CkGroupID, _currentGroupRednMgr);
	CkpvInitialize(GroupTable*, _groupTable);
	CkpvInitialize(GroupIDTable*, _groupIDTable);
	CkpvInitialize(CmiImmediateLockType, _groupTableImmLock);
        CkpvInitialize(bool, _destroyingNodeGroup);
        CkpvAccess(_destroyingNodeGroup) = false;
	CkpvInitialize(UInt, _numGroups);
	CkpvInitialize(int, _numInitsRecd);
	CkpvInitialize(int, _initdone);
	CkpvInitialize(char**, Ck_argv); CkpvAccess(Ck_argv)=argv;
	CkpvInitialize(MsgPool*, _msgPool);
	CkpvInitialize(CkCoreState *, _coreState);
	/*
		Added for evacuation-sayantan
	*/
#ifndef __BIGSIM__
	CpvInitialize(char *,_validProcessors);
#endif
	CkpvInitialize(char ,startedEvac);
	CpvInitialize(int,serializer);

	_initChareTables();            // for checkpointable plain chares

	CksvInitialize(UInt, _numNodeGroups);
	CksvInitialize(GroupTable*, _nodeGroupTable);
	CksvInitialize(GroupIDTable, _nodeGroupIDTable);
	CksvInitialize(CmiImmediateLockType, _nodeGroupTableImmLock);
	CksvInitialize(CmiNodeLock, _nodeLock);
	CksvInitialize(PtrVec*,_nodeBocInitVec);
	CksvInitialize(UInt,_numInitNodeMsgs);
	CkpvInitialize(int,_charmEpoch);
	CkpvAccess(_charmEpoch)=0;
	CksvInitialize(int, _triggersSent);
	CksvAccess(_triggersSent) = 0;

	CkpvInitialize(_CkOutStream*, _ckout);
	CkpvInitialize(_CkErrStream*, _ckerr);
	CkpvInitialize(Stats*, _myStats);

	CkpvAccess(_groupIDTable) = new GroupIDTable(0);
	CkpvAccess(_groupTable) = new GroupTable;
	CkpvAccess(_groupTable)->init();
	CkpvAccess(_groupTableImmLock) = CmiCreateImmediateLock();
	CkpvAccess(_numGroups) = 1; // make 0 an invalid group number
	CkpvAccess(_buffQ) = new PtrQ();
	CkpvAccess(_bocInitVec) = new PtrVec();

	CkpvAccess(_currentNodeGroupObj) = NULL;

	if(CkMyRank()==0)
	{
	  	CksvAccess(_numNodeGroups) = 1; //make 0 an invalid group number
          	CksvAccess(_numInitNodeMsgs) = 0;
		CksvAccess(_nodeLock) = CmiCreateLock();
		CksvAccess(_nodeGroupTable) = new GroupTable();
		CksvAccess(_nodeGroupTable)->init();
		CksvAccess(_nodeGroupTableImmLock) = CmiCreateImmediateLock();
		CksvAccess(_nodeBocInitVec) = new PtrVec();
	}

	CkCallbackInit();
	
	CmiNodeAllBarrier();

#if ! CMK_BIGSIM_CHARM
	initQd(argv);         // bigsim calls it in ConverseCommonInit
#endif

	CkpvAccess(_coreState)=new CkCoreState();

	CkpvAccess(_numInitsRecd) = 0;
	CkpvAccess(_initdone) = 0;

	CkpvAccess(_ckout) = new _CkOutStream();
	CkpvAccess(_ckerr) = new _CkErrStream();

	_charmHandlerIdx = CkRegisterHandler((CmiHandler)_bufferHandler);
	_initHandlerIdx = CkRegisterHandler((CmiHandler)_initHandler);
	CkNumberHandlerEx(_initHandlerIdx, (CmiHandlerEx)_initHandler, CkpvAccess(_coreState));
	_roRestartHandlerIdx = CkRegisterHandler((CmiHandler)_roRestartHandler);
	_exitHandlerIdx = CkRegisterHandler((CmiHandler)_exitHandler);
	//added for interoperabilitY
	_libExitHandlerIdx = CkRegisterHandler((CmiHandler)_libExitHandler);
	_bocHandlerIdx = CkRegisterHandler((CmiHandler)_initHandler);
	CkNumberHandlerEx(_bocHandlerIdx, (CmiHandlerEx)_initHandler, CkpvAccess(_coreState));

#ifdef __BIGSIM__
	if(BgNodeRank()==0) 
#endif
	_infoIdx = CldRegisterInfoFn((CldInfoFn)_infoFn);

	_triggerHandlerIdx = CkRegisterHandler((CmiHandler)_triggerHandler);
	_ckModuleInit();

	CldRegisterEstimator((CldEstimator)_charmLoadEstimator);

	_futuresModuleInit(); // part of futures implementation is a converse module
	_loadbalancerInit();
        _metabalancerInit();
	
#if CMK_MEM_CHECKPOINT
        init_memcheckpt(argv);
#endif

	initCharmProjections();
#if CMK_TRACE_IN_CHARM
        // initialize trace module in ck
        traceCharmInit(argv);
#endif
 	
    CkpvInitialize(int, envelopeEventID);
    CkpvAccess(envelopeEventID) = 0;
	CkMessageWatcherInit(argv,CkpvAccess(_coreState));
	
	/**
	  The rank-0 processor of each node calls the 
	  translator-generated "_register" routines. 
	  
	  _register routines call the charm.h "CkRegister*" routines,
	  which record function pointers and class information for
	  all Charm entities, like Chares, Arrays, and readonlies.
	  
	  There's one _register routine generated for each
	  .ci file.  _register routines *must* be called in the 
	  same order on every node, and *must not* be called by 
	  multiple threads simultaniously.
	*/
#ifdef __BIGSIM__
	if(BgNodeRank()==0) 
#else
	if(CkMyRank()==0)
#endif
	{
		SDAG::registerPUPables();
		CmiArgGroup("Charm++",NULL);
		_parseCommandLineOpts(argv);
		_registerInit();
		CkRegisterMsg("System", 0, 0, CkFreeMsg, sizeof(int));
		CkRegisterChareInCharm(CkRegisterChare("null", 0, TypeChare));
		CkIndex_Chare::__idx=CkRegisterChare("Chare", sizeof(Chare), TypeChare);
		CkRegisterChareInCharm(CkIndex_Chare::__idx);
		CkIndex_Group::__idx=CkRegisterChare("Group", sizeof(Group), TypeGroup);
                CkRegisterChareInCharm(CkIndex_Group::__idx);
		CkRegisterEp("null", (CkCallFnPtr)_nullFn, 0, 0, 0+CK_EP_INTRINSIC);
		
		/**
		  These _register calls are for the built-in
		  Charm .ci files, like arrays and load balancing.
		  If you add a .ci file to charm, you'll have to 
		  add a call to the _register routine here, or make
		  your library into a "-module".
		*/
		_registerCkFutures();
		_registerCkArray();
		_registerLBDatabase();
    _registerMetaBalancer();
		_registerCkCallback();
		_registertempo();
		_registerwaitqd();
		_registerCkCheckpoint();
#if CMK_MEM_CHECKPOINT
		_registerCkMemCheckpoint();
#endif


		/*
		  Setup Control Point Automatic Tuning Framework.

		  By default it is enabled as a part of charm, 
		  however it won't enable its tracing module 
		  unless a +CPEnableMeasurements command line argument
		  is specified. See trace-common.C for more info

		  Thus there should be no noticable overhead to 
		  always having the control point framework linked
		  in.
		  
		*/
#if CMK_WITH_CONTROLPOINT
		_registerPathHistory();
		_registerControlPoints();
		_registerTraceControlPoints();
#endif


		/**
		  CkRegisterMainModule is generated by the (unique)
		  "mainmodule" .ci file.  It will include calls to 
		  register all the .ci files.
		*/
		CkRegisterMainModule();

		/**
		  _registerExternalModules is actually generated by 
		  charmc at link time (as "moduleinit<pid>.C").  
		  
		  This generated routine calls the _register functions
		  for the .ci files of libraries linked using "-module".
		  This funny initialization is most useful for AMPI/FEM
		  programs, which don't have a .ci file and hence have
		  no other way to control the _register process.
		*/
		_registerExternalModules(argv);
		
		_registerDone();
	}
	/* The following will happen on every virtual processor in BigEmulator, not just on once per real processor */
	if (CkMyRank() == 0) {
	  CpdBreakPointInit();
	}
	CmiNodeAllBarrier();

	// Execute the initcalls registered in modules
	_initCallTable.enumerateInitCalls();

#if CMK_CHARMDEBUG
	CpdFinishInitialization();
#endif

	//CmiNodeAllBarrier();

	CkpvAccess(_myStats) = new Stats();
	CkpvAccess(_msgPool) = new MsgPool();

	CmiNodeAllBarrier();

#if !(__FAULT__)
	CmiBarrier();
	CmiBarrier();
	CmiBarrier();
#endif
#if CMK_SMP_TRACE_COMMTHREAD
	_TRACE_BEGIN_COMPUTATION();	
#else
 	if (!inCommThread) {
	  _TRACE_BEGIN_COMPUTATION();
	}
#endif

#ifdef ADAPT_SCHED_MEM
    if(CkMyRank()==0){
	memCriticalEntries = new int[numMemCriticalEntries];
	int memcnt=0;
	for(int i=0; i<_entryTable.size(); i++){
	    if(_entryTable[i]->isMemCritical){
		memCriticalEntries[memcnt++] = i;
	    }
	}
    }
#endif

#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
    _messageLoggingInit();
#endif

#ifndef __BIGSIM__
	/*
		FAULT_EVAC
	*/
	CpvAccess(_validProcessors) = new char[CkNumPes()];
	for(int vProc=0;vProc<CkNumPes();vProc++){
		CpvAccess(_validProcessors)[vProc]=1;
	}
	_ckEvacBcastIdx = CkRegisterHandler((CmiHandler)_ckEvacBcast);
	_ckAckEvacIdx = CkRegisterHandler((CmiHandler)_ckAckEvac);
#endif
	CkpvAccess(startedEvac) = 0;
	CpvAccess(serializer) = 0;

	evacuate = 0;
	CcdCallOnCondition(CcdSIGUSR1,(CcdVoidFn)CkDecideEvacPe,0);
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) 
    CcdCallOnCondition(CcdSIGUSR2,(CcdVoidFn)CkMlogRestart,0);
#endif

	if(_raiseEvac){
		processRaiseEvacFile(_raiseEvacFile);
		/*
		if(CkMyPe() == 2){
		//	CcdCallOnConditionKeep(CcdPERIODIC_10s,(CcdVoidFn)CkDecideEvacPe,0);
			CcdCallFnAfter((CcdVoidFn)CkDecideEvacPe, 0, 10000);
		}
		if(CkMyPe() == 3){
			CcdCallFnAfter((CcdVoidFn)CkDecideEvacPe, 0, 10000);
		}*/
	}	
    
    if (CkMyRank() == 0) {
      TopoManager_init();
    }
    CmiNodeAllBarrier();

    if (!_replaySystem) {
        CkFtFn  faultFunc_restart = CkRestartMain;
        if (faultFunc == NULL || faultFunc == faultFunc_restart) {         // this is not restart from memory
            // these two are blocking calls for non-bigsim
#if ! CMK_BIGSIM_CHARM
	  CmiInitCPUAffinity(argv);
          CmiInitMemAffinity(argv);
#endif
        }
        CmiInitCPUTopology(argv);
#if CMK_SHARED_VARS_POSIX_THREADS_SMP
        if (CmiCpuTopologyEnabled()) {
            int *pelist;
            int num;
            CmiGetPesOnPhysicalNode(0, &pelist, &num);
#if !CMK_MULTICORE && !CMK_SMP_NO_COMMTHD
            // Count communication threads, if present
            // XXX: Assuming uniformity of node size here
            num += num/CmiMyNodeSize();
#endif
            if (!_Cmi_forceSpinOnIdle && num > CmiNumCores())
            {
              if (CmiMyPe() == 0)
                CmiPrintf("\nCharm++> Warning: the number of SMP threads (%d) is greater than the number of physical cores (%d), so threads will sleep while idling. Use +CmiSpinOnIdle or +CmiSleepOnIdle to control this directly.\n\n", num, CmiNumCores());
              CmiLock(CksvAccess(_nodeLock));
              if (! _Cmi_sleepOnIdle) _Cmi_sleepOnIdle = 1;
              CmiUnlock(CksvAccess(_nodeLock));
            }
        }
#endif
    }

    if(CmiMyPe() == 0) {
        char *topoFilename;
        if(CmiGetArgStringDesc(argv,"+printTopo",&topoFilename,"topo file name")) 
        {
            std::stringstream sstm;
            sstm << topoFilename << "." << CmiMyPartition();
            std::string result = sstm.str();
            FILE *fp;
            fp = fopen(result.c_str(), "w");
            if (fp == NULL) {
              CkPrintf("Error opening %s file, writing to stdout\n", topoFilename);
              fp = stdout;
            }
	    TopoManager_printAllocation(fp);
            fclose(fp);
        }
    }

#if CMK_USE_PXSHM && ( CMK_CRAYXE || CMK_CRAYXC ) && CMK_SMP
      // for SMP on Cray XE6 (hopper) it seems pxshm has to be initialized
      // again after cpuaffinity is done
    if (CkMyRank() == 0) {
      CmiInitPxshm(argv);
    }
    CmiNodeAllBarrier();
#endif

    //CldCallback();
#if CMK_BIGSIM_CHARM && CMK_CHARMDEBUG
      // Register the BG handler for CCS. Notice that this is put into a variable shared by
      // the whole real processor. This because converse needs to find it. We check that all
      // virtual processors register the same index for this handler.
    CpdBgInit();
#endif

	if (faultFunc) {
#if CMK_WITH_STATS
		if (CkMyPe()==0) _allStats = new Stats*[CkNumPes()];
#endif
		if (!inCommThread) {
                  CkArgMsg *msg = (CkArgMsg *)CkAllocMsg(0, sizeof(CkArgMsg), 0);
                  msg->argc = CmiGetArgc(argv);
                  msg->argv = argv;
                  faultFunc(_restartDir, msg);
                  CkFreeMsg(msg);
                }
	}else if(CkMyPe()==0){
#if CMK_WITH_STATS
		_allStats = new Stats*[CkNumPes()];
#endif
		register size_t i, nMains=_mainTable.size();
		for(i=0;i<nMains;i++)  /* Create all mainchares */
		{
			register int size = _chareTable[_mainTable[i]->chareIdx]->size;
			register void *obj = malloc(size);
			_MEMCHECK(obj);
			_mainTable[i]->setObj(obj);
			CkpvAccess(_currentChare) = obj;
			CkpvAccess(_currentChareType) = _mainTable[i]->chareIdx;
			register CkArgMsg *msg = (CkArgMsg *)CkAllocMsg(0, sizeof(CkArgMsg), 0);
			msg->argc = CmiGetArgc(argv);
			msg->argv = argv;
			_entryTable[_mainTable[i]->entryIdx]->call(msg, obj);
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
            CpvAccess(_currentObj) = (Chare *)obj;
#endif
		}
                _mainDone = 1;

		_STATS_RECORD_CREATE_CHARE_N(nMains);
		_STATS_RECORD_PROCESS_CHARE_N(nMains);




		for(i=0;i<_readonlyMsgs.size();i++) /* Send out readonly messages */
		{
			register void *roMsg = (void *) *((char **)(_readonlyMsgs[i]->pMsg));
			if(roMsg==0)
				continue;
			//Pack the message and send it to all other processors
			register envelope *env = UsrToEnv(roMsg);
			env->setSrcPe(CkMyPe());
			env->setMsgtype(ROMsgMsg);
			env->setRoIdx(i);
			CmiSetHandler(env, _initHandlerIdx);
			CkPackMessage(&env);
			CmiSyncBroadcast(env->getTotalsize(), (char *)env);
			CpvAccess(_qd)->create(CkNumPes()-1);

			//For processor 0, unpack and re-set the global
			CkUnpackMessage(&env);
			_processROMsgMsg(env);
			_numInitMsgs++;
		}

		//Determine the size of the RODataMessage
		PUP::sizer ps;
		for(i=0;i<_readonlyTable.size();i++) _readonlyTable[i]->pupData(ps);

		//Allocate and fill out the RODataMessage
		envelope *env = _allocEnv(RODataMsg, ps.size());
		PUP::toMem pp((char *)EnvToUsr(env));
		for(i=0;i<_readonlyTable.size();i++) _readonlyTable[i]->pupData(pp);

		env->setCount(++_numInitMsgs);
		env->setSrcPe(CkMyPe());
		CmiSetHandler(env, _initHandlerIdx);
		DEBUGF(("[%d,%.6lf] RODataMsg being sent of size %d \n",CmiMyPe(),CmiWallTimer(),env->getTotalsize()));
		CmiSyncBroadcastAndFree(env->getTotalsize(), (char *)env);
		CpvAccess(_qd)->create(CkNumPes()-1);
		_initDone();
	}

	DEBUGF(("[%d,%d%.6lf] inCommThread %d\n",CmiMyPe(),CmiMyRank(),CmiWallTimer(),inCommThread));
	// when I am a communication thread, I don't participate initDone.
        if (inCommThread) {
                CkNumberHandlerEx(_bocHandlerIdx,(CmiHandlerEx)_processHandler,
                                        CkpvAccess(_coreState));
                CkNumberHandlerEx(_charmHandlerIdx,(CmiHandlerEx)_processHandler
,
                                        CkpvAccess(_coreState));
                _processBufferedMsgs();
        }

#if CMK_CHARMDEBUG
        // Should not use CpdFreeze inside a thread (since this processor is really a user-level thread)
       if (CpvAccess(cpdSuspendStartup))
       { 
          //CmiPrintf("In Parallel Debugging mode .....\n");
          CpdFreeze();
       }
#endif


#if __FAULT__
	if(killFlag){                                                  
                readKillFile();                                        
        }
#endif

}
コード例 #15
0
		simple(double pi)
		{
			ckout<<"I am a simple chare running from processor:"<<CkMyPe()<<endl;
			y = pi;
		};
コード例 #16
0
ファイル: CentralLB.C プロジェクト: luyukunphy/namd
void CentralLB::LoadBalance()
{
#if CMK_LBDB_ON
  int proc;
  const int clients = CkNumPes();

#if ! USE_REDUCTION
  // build data
  buildStats();
#else
  for (proc = 0; proc < clients; proc++) statsMsgsList[proc] = NULL;
#endif

  theLbdb->ResetAdaptive();
  if (!_lb_args.samePeSpeed()) statsData->normalize_speed();

  if (_lb_args.debug()) 
      CmiPrintf("\nCharmLB> %s: PE [%d] step %d starting at %f Memory: %f MB\n",
		  lbname, cur_ld_balancer, step(), start_lb_time,
		  CmiMemoryUsage()/(1024.0*1024.0));

  // if we are in simulation mode read data
  if (LBSimulation::doSimulation) simulationRead();

  char *availVector = LBDatabaseObj()->availVector();
  for(proc = 0; proc < clients; proc++)
      statsData->procs[proc].available = (CmiBool)availVector[proc];

  preprocess(statsData);

//    CkPrintf("Before Calling Strategy\n");

  if (_lb_args.printSummary()) {
      LBInfo info(clients);
        // not take comm data
      info.getInfo(statsData, clients, 0);
      LBRealType mLoad, mCpuLoad, totalLoad;
      info.getSummary(mLoad, mCpuLoad, totalLoad);
      int nmsgs, nbytes;
      statsData->computeNonlocalComm(nmsgs, nbytes);
      CkPrintf("[%d] Load Summary (before LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024);
//      if (_lb_args.debug() > 1) {
//        for (int i=0; i<statsData->n_objs; i++)
//          CmiPrintf("[%d] %.10f %.10f\n", i, statsData->objData[i].minWall, statsData->objData[i].maxWall);
//      }
  }

#if CMK_REPLAYSYSTEM
  LDHandle *loadBalancer_pointers;
  if (_replaySystem) {
    loadBalancer_pointers = (LDHandle*)malloc(CkNumPes()*sizeof(LDHandle));
    for (int i=0; i<statsData->n_objs; ++i) loadBalancer_pointers[statsData->from_proc[i]] = statsData->objData[i].handle.omhandle.ldb;
  }
#endif
  
  LBMigrateMsg* migrateMsg = Strategy(statsData);
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	migrateMsg->step = step();
#endif

#if CMK_REPLAYSYSTEM
  CpdHandleLBMessage(&migrateMsg);
  if (_replaySystem) {
    for (int i=0; i<migrateMsg->n_moves; ++i) migrateMsg->moves[i].obj.omhandle.ldb = loadBalancer_pointers[migrateMsg->moves[i].from_pe];
    free(loadBalancer_pointers);
  }
#endif
  
  LBDatabaseObj()->get_avail_vector(migrateMsg->avail_vector);
  migrateMsg->next_lb = LBDatabaseObj()->new_lbbalancer();

  // if this is the step at which we need to dump the database
  simulationWrite();

//  calculate predicted load
//  very time consuming though, so only happen when debugging is on
  if (_lb_args.printSummary()) {
      LBInfo info(clients);
        // not take comm data
      getPredictedLoadWithMsg(statsData, clients, migrateMsg, info, 0);
      LBRealType mLoad, mCpuLoad, totalLoad;
      info.getSummary(mLoad, mCpuLoad, totalLoad);
      int nmsgs, nbytes;
      statsData->computeNonlocalComm(nmsgs, nbytes);
      CkPrintf("[%d] Load Summary (after LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB useMem: %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024, (1.0*useMem())/1024);
      for (int i=0; i<clients; i++)
        migrateMsg->expectedLoad[i] = info.peLoads[i];
  }

  DEBUGF(("[%d]calling recv migration\n",CkMyPe()));
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) 
    lbDecisionCount++;
    migrateMsg->lbDecisionCount = lbDecisionCount;
#endif

  envelope *env = UsrToEnv(migrateMsg);
  if (1) {
      // broadcast
    thisProxy.ReceiveMigration(migrateMsg);
  }
  else {
    // split the migration for each processor
    for (int p=0; p<CkNumPes(); p++) {
      LBMigrateMsg *m = extractMigrateMsg(migrateMsg, p);
      thisProxy[p].ReceiveMigration(m);
    }
    delete migrateMsg;
  }

  // Zero out data structures for next cycle
  // CkPrintf("zeroing out data\n");
  statsData->clear();
  stats_msg_count=0;
#endif
}
コード例 #17
0
ファイル: parallel_part.C プロジェクト: davidheryanto/sc14
int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context){
  const char *caller="FEM_Create_connmsa"; 
  FEMAPI(caller);
  FEM_chunk *c=FEM_chunk::get(caller);
  FEM_Mesh *m=c->lookup(fem_mesh,caller);
  m->setAbsoluteGlobalno();
  int nelem = m->nElems();
  int numChunks;
  MPI_Comm_size((MPI_Comm)comm_context,&numChunks);
  printf("master -> number of elements %d \n",nelem);
  DEBUG(m->print(0));


  /*load the connectivity information into the eptr and
    eind datastructure. It will be read by the other slave 
    elements and used to call parmetis*/
  MSA1DINT eptrMSA(nelem,numChunks);
  MSA1DINT eindMSA(nelem*10,numChunks);
  /*
    after the msa array has been created and loaded with connectivity data
    tell the slaves about the msa array 
  */
  struct conndata data;
  data.nelem = nelem;
  data.nnode = m->node.size();
  data.arr1 = eptrMSA;
  data.arr2 = eindMSA;
  MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context);

  eptrMSA.enroll(numChunks);
  eindMSA.enroll(numChunks);
  MSA1DINT::Write wPtr = eptrMSA.getInitialWrite();
  MSA1DINT::Write wInd = eindMSA.getInitialWrite();
  int indcount=0,ptrcount=0;
  for(int t=0;t<m->elem.size();t++){
    if(m->elem.has(t)){
      FEM_Elem &k=m->elem[t];
      for(int e=0;e<k.size();e++){
				wPtr.set(ptrcount)=indcount;
				ptrcount++;
				for(int n=0;n<k.getNodesPer();n++){
				  wInd.set(indcount)=k.getConn(e,n);
				  indcount++;
				}
      }
    }
  }
  wPtr.set(ptrcount) = indcount;
  printf("master -> ptrcount %d indcount %d sizeof(MSA1DINT) %d sizeof(MSA1DINTLIST) %d memory %d\n",ptrcount,indcount,sizeof(MSA1DINT),sizeof(MSA1DINTLIST),CmiMemoryUsage());
  /*
    break up the mesh such that each chunk gets the same number of elements
    and the nodes corresponding to those elements. However this is not the partition.
    This is just distributing the data, so that when partition is done using parmetis
    all the requests for data do not go to chunk 0. Instead after partition each chunk
    can send the element and node data to the chunks that will need it
  */
  FEM_Mesh *mesh_array=FEM_break_mesh(m,ptrcount,numChunks);
  /*
    Send the broken up meshes to the different chunks. 
  */
  sendBrokenMeshes(mesh_array,comm_context);
  delete [] mesh_array;
  FEM_Mesh mypiece;
  MPI_Recv_pup(mypiece,masterRank,MESH_CHUNK_TAG,(MPI_Comm)comm_context);
	
  /*
    call parmetis
  */
  double  parStartTime = CkWallTimer();
  MSA1DINT::Read rPtr = wPtr.syncToRead();
  MSA1DINT::Read rInd = wInd.syncToRead();
  printf("starting FEM_call_parmetis \n");
  struct partconndata *partdata = FEM_call_parmetis(data.nelem, rPtr, rInd, comm_context);

  printf("done with parmetis %d FEM_Mesh %d in %.6lf \n",CmiMemoryUsage(),sizeof(FEM_Mesh),CkWallTimer()-parStartTime);
	
	double dataArrangeStartTime = CkWallTimer();
  /*
    Set up a msa to store the partitions to which a node belongs.
    A node can belong to multiple partitions.
  */
  int totalNodes = m->node.size();
  MSA1DINTLIST nodepart(totalNodes,numChunks);
  MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context);
  nodepart.enroll(numChunks);
  MSA1DINTLIST::Accum nodepartAcc = nodepart.getInitialAccum();
	
  FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context);
  printf("Creating mapping of node to partition took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
  dataArrangeStartTime = CkWallTimer();
  MSA1DINTLIST::Read nodepartRead = nodepartAcc.syncToRead();
	
  /*
    Set up a msa to store the nodes that belong to a partition
  */
  MSA1DNODELIST part2node(numChunks,numChunks);
  MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context);
  part2node.enroll(numChunks);
  MSA1DNODELIST::Accum part2nodeAcc = part2node.getInitialAccum();

  FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context);

	
  /*
    Get the list of elements and nodes that belong to this partition
  */
  MSA1DNODELIST::Read rPart2node = part2nodeAcc.syncToRead();
  NodeList lnodes = rPart2node.get(masterRank);
  lnodes.uniquify();
//  IntList lelems = part2elem.get(masterRank);
	

	printf("Creating mapping of  partition to node took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
  printf("Time spent doing +=ElemList %.6lf \n",elemlistaccTime);
	dataArrangeStartTime = CkWallTimer();

  /*
    Build an MSA of FEM_Mesh, with each index containing the mesh for that  chunk
  */
  MSA1DFEMMESH part2mesh(numChunks,numChunks);
  MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context);
  part2mesh.enroll(numChunks);
  MSA1DFEMMESH::Accum aPart2mesh = part2mesh.getInitialAccum();

  FEM_write_part2mesh(aPart2mesh,partdata, &data,nodepartRead,numChunks,masterRank,&mypiece);
  /*
    Get your mesh consisting of elements and nodes out of the mesh MSA
  */
  MSA1DFEMMESH::Read rPart2mesh = aPart2mesh.syncToRead();
  MeshElem me = rPart2mesh.get(masterRank);
  //printf("[%d] Number of elements in my partitioned mesh %d number of nodes %d \n",masterRank,me.m->nElems(),me.m->node.size());
	
  DEBUG(printf("[%d] Memory usage on vp 0 close to max %d \n",CkMyPe(),CmiMemoryUsage()));
	//Free up the eptr and eind MSA arrays stored in data
  delete &rPtr;
  delete &rInd;
  data.arr1.FreeMem();
  data.arr2.FreeMem();
  nodepart.FreeMem();
  DEBUG(printf("[%d] Memory usage on vp 0 after FreeMem %d \n",CkMyPe(),CmiMemoryUsage()));
	
  addIDXLists(me.m,lnodes,masterRank);
	
	part2node.FreeMem();
  DEBUG(printf("[%d] Memory usage on vp 0 after addIDXL %d \n",CkMyPe(),CmiMemoryUsage()));
	
  /*
    Broadcast  the user data to all the meshes
  */
  DEBUG(printf("[%d] Length of udata vector in master %d \n",masterRank,m->udata.size()));
  MPI_Bcast_pup(m->udata,masterRank,(MPI_Comm)comm_context);
  me.m->udata = m->udata;
	
	
  delete partdata;
  
	printf("[%d] Data Arrangement took %.6lf \n",masterRank,CkWallTimer()-dataArrangeStartTime);
	
	/*
    collect the ghost data and send it to all the chunks.
  */
  struct ghostdata *gdata = gatherGhosts();
  DEBUG(printf("[%d] number of ghost layers %d \n",masterRank,gdata->numLayers));
  MPI_Bcast_pup(*gdata,masterRank,(MPI_Comm)comm_context);

  /*
    make ghosts for this mesh
  */
  printf("[%d] Starting to generate number of ghost layers %d \n",masterRank,gdata->numLayers);
	double _startTime = CkWallTimer();
  makeGhosts(me.m,(MPI_Comm)comm_context,masterRank,gdata->numLayers,gdata->layers);
  delete gdata;
	
	printf("[%d] Ghost generation took %.6lf \n",masterRank,CkWallTimer()-_startTime);
	
  me.m->becomeGetting();
  FEM_chunk *chunk = FEM_chunk::get("FEM_Mesh_Parallel_broadcast");
  int tempMeshNo = chunk->meshes.put(me.m);
  int new_mesh = FEM_Mesh_copy(tempMeshNo);
	
  FEM_Mesh *nmesh = c->lookup(new_mesh,"master_parallel_broadcast");
  DEBUG(printf("[%d] Length of udata vector in master new_mesh %d \n",masterRank,nmesh->udata.size()));
	
	part2mesh.FreeMem();
  printf("[%d] Max Memory usage on vp 0 at end of parallel partition %d \n",CkMyPe(),CmiMaxMemoryUsage());
		
  return new_mesh;
}
コード例 #18
0
ファイル: CentralLB.C プロジェクト: luyukunphy/namd
void CentralLB::ProcessReceiveMigration(CkReductionMsg  *msg)
{
#if CMK_LBDB_ON
	int i;
        LBMigrateMsg *m = storedMigrateMsg;
        CmiAssert(m!=NULL);
        delete msg;

#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	int *dummyCounts;

	DEBUGF(("[%d] Starting ReceiveMigration WITH step %d m->step %d\n",CkMyPe(),step(),m->step));
	// CmiPrintf("[%d] Starting ReceiveMigration step %d m->step %d\n",CkMyPe(),step(),m->step);
	if(step() > m->step){
		char str[100];
		envelope *env = UsrToEnv(m);
		return;
	}
	lbDecisionCount = m->lbDecisionCount;
#endif

  if (_lb_args.debug() > 1) 
    if (CkMyPe()%1024==0) CmiPrintf("[%d] Starting ReceiveMigration step %d at %f\n",CkMyPe(),step(), CmiWallTimer());

  for (i=0; i<CkNumPes(); i++) theLbdb->lastLBInfo.expectedLoad[i] = m->expectedLoad[i];
  CmiAssert(migrates_expected <= 0 || migrates_completed == migrates_expected);
/*FAULT_EVAC*/
  if(!CmiNodeAlive(CkMyPe())){
	delete m;
	return;
  }
  migrates_expected = 0;
  future_migrates_expected = 0;
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	int sending=0;
    int dummy=0;
	LBDB *_myLBDB = theLbdb->getLBDB();
	if(_restartFlag){
        dummyCounts = new int[CmiNumPes()];
        bzero(dummyCounts,sizeof(int)*CmiNumPes());
    }
#endif
  for(i=0; i < m->n_moves; i++) {
    MigrateInfo& move = m->moves[i];
    const int me = CkMyPe();
    if (move.from_pe == me && move.to_pe != me) {
      DEBUGF(("[%d] migrating object to %d\n",move.from_pe,move.to_pe));
      // migrate object, in case it is already gone, inform toPe
#if (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_))
      if (theLbdb->Migrate(move.obj,move.to_pe) == 0) 
         thisProxy[move.to_pe].MissMigrate(!move.async_arrival);
#else
            if(_restartFlag == 0){
                DEBUG(CmiPrintf("[%d] need to move object from %d to %d \n",CkMyPe(),move.from_pe,move.to_pe));
                theLbdb->Migrate(move.obj,move.to_pe);
                sending++;
            }else{
                if(_myLBDB->validObjHandle(move.obj)){
                    DEBUG(CmiPrintf("[%d] need to move object from %d to %d \n",CkMyPe(),move.from_pe,move.to_pe));
                    theLbdb->Migrate(move.obj,move.to_pe);
                    sending++;
                }else{
                    DEBUG(CmiPrintf("[%d] dummy move to pe %d detected after restart \n",CmiMyPe(),move.to_pe));
                    dummyCounts[move.to_pe]++;
                    dummy++;
                }
            }
#endif
    } else if (move.from_pe != me && move.to_pe == me) {
       DEBUGF(("[%d] expecting object from %d\n",move.to_pe,move.from_pe));
      if (!move.async_arrival) migrates_expected++;
      else future_migrates_expected++;
    }
    else {
#if CMK_GLOBAL_LOCATION_UPDATE      
      UpdateLocation(move); 
#endif
    }

  }
  DEBUGF(("[%d] in ReceiveMigration %d moves expected: %d future expected: %d\n",CkMyPe(),m->n_moves, migrates_expected, future_migrates_expected));
  // if (_lb_debug) CkPrintf("[%d] expecting %d objects migrating.\n", CkMyPe(), migrates_expected);

#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	if(_restartFlag){
		sendDummyMigrationCounts(dummyCounts);
		_restartFlag  =0;
    	delete []dummyCounts;
	}
#endif


#if 0
  if (m->n_moves ==0) {
    theLbdb->SetLBPeriod(theLbdb->GetLBPeriod()*2);
  }
#endif
  cur_ld_balancer = m->next_lb;
  if((CkMyPe() == cur_ld_balancer) && (cur_ld_balancer != 0)){
      LBDatabaseObj()->set_avail_vector(m->avail_vector, -2);
  }

  if (migrates_expected == 0 || migrates_completed == migrates_expected)
    MigrationDone(1);
  delete m;

//	CkEvacuatedElement();
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
//  migrates_expected = 0;
//  //  ResumeClients(1);
#endif
#endif
}
コード例 #19
0
ファイル: hello.C プロジェクト: brog2610/quinoa
 Hello(int _aNum, CkGroupID mcastMgrGID): aNum(_aNum), mcastMgr(NULL), isCookieSet(false)
 {
   CkPrintf("Array %d, Element %d created on PE %d\n", aNum, thisIndex, CkMyPe());
   mcastMgr = CProxy_CkMulticastMgr(mcastMgrGID).ckLocalBranch();
 }
コード例 #20
0
ファイル: GridCommLB.C プロジェクト: gitter-badger/quinoa
void GridCommLB::Map_NonMigratable_Objects_To_PEs ()
{
    int i;


    for (i = 0; i < Num_Objects; i++) {
        if (!((&Object_Data[i])->migratable)) {
            if (_lb_args.debug() > 1) {
                CkPrintf ("[%d] GridCommLB identifies object %d as non-migratable.\n", CkMyPe(), i);
            }

            Assign_Object_To_PE (i, (&Object_Data[i])->from_pe);
        }
    }
}
コード例 #21
0
ファイル: OrbLB.C プロジェクト: davidheryanto/sc14
OrbLB::OrbLB(const CkLBOptions &opt): CentralLB(opt)
{
  lbname = "OrbLB";
  if (CkMyPe() == 0)
    CkPrintf("[%d] OrbLB created\n",CkMyPe());
}
コード例 #22
0
ファイル: GridCommLB.C プロジェクト: gitter-badger/quinoa
/**************************************************************************
** This method locates the minimum WAN PE in terms of number of objects
** that communicate with objects across a wide-area connection.  The search
** is constrained to PEs within the specified cluster.
**
** In the event of a "tie" (i.e., the number of WAN objects on a candidate
** PE is equal to the minimum number of WAN objects discovered so far) the
** tie is broken by considering the scaled CPU loads on the PEs.  The PE
** with the smaller scaled load is the better candidate.  In the event of
** a secondary tie, the secondary tie is broken by considering the number
** of LAN objects on the two PEs.
**
** The method returns -1 if no matching PE is found.
*/
int GridCommLB::Find_Minimum_PE (int cluster)
{
    if (CK_LDB_GridCommLB_Mode == 0) {
        int min_index;
        int min_objs;
        int i;


        min_index = -1;
        min_objs = MAXINT;

        for (i = 0; i < Num_PEs; i++) {
            if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
                if ((&PE_Data[i])->num_objs < min_objs) {
                    min_index = i;
                    min_objs = (&PE_Data[i])->num_objs;
                } else if (((&PE_Data[i])->num_objs == min_objs) &&
                           ((&PE_Data[i])->num_wan_objs < (&PE_Data[min_index])->num_wan_objs)) {
                    min_index = i;
                } else if (((&PE_Data[i])->num_objs == min_objs) &&
                           ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
                           ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs)) {
                    min_index = i;
                } else if (((&PE_Data[i])->num_objs == min_objs) &&
                           ((&PE_Data[i])->num_wan_objs == (&PE_Data[min_index])->num_wan_objs) &&
                           ((&PE_Data[i])->num_wan_msgs == (&PE_Data[min_index])->num_wan_msgs) &&
                           ((&PE_Data[i])->scaled_load < (&PE_Data[min_index])->scaled_load)) {
                    min_index = i;
                }
            }
        }

        return (min_index);
    } else if (CK_LDB_GridCommLB_Mode == 1) {
        int min_index;
        int min_load_index;
        double min_scaled_load;
        int min_wan_msgs_index;
        int min_wan_msgs;
        double load_tolerance;
        int i;


        min_index = -1;

        min_load_index = -1;
        min_scaled_load = MAXDOUBLE;

        min_wan_msgs_index = -1;
        min_wan_msgs = MAXINT;

        for (i = 0; i < Num_PEs; i++) {
            if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
                if ((&PE_Data[i])->scaled_load < min_scaled_load) {
                    min_load_index = i;
                    min_scaled_load = (&PE_Data[i])->scaled_load;
                }
                if ((&PE_Data[i])->num_wan_msgs < min_wan_msgs) {
                    min_wan_msgs_index = i;
                    min_wan_msgs = (&PE_Data[i])->num_wan_msgs;
                }
            }
        }

        // If no PE at all was found, return a -1.
        if (min_load_index < 0) {
            return (min_load_index);
        }

        // If the number of WAN messages on the lightest loaded PE happens to match the minimum number
        // of WAN messages overall, we win because this target PE is overall the minimum PE in terms
        // of both load *and* WAN messages.
        if ((&PE_Data[min_load_index])->num_wan_msgs <= (&PE_Data[min_wan_msgs_index])->num_wan_msgs) {
            return (min_load_index);
        }

        // Otherwise, we now search for PEs that have loads +/- our tolerance.  If any PE has a load
        // within our tolerance, check its number of WAN messages.  The one of these that has the
        // fewest WAN messages is probably the best candidate for placing the next object onto.

        load_tolerance = (&PE_Data[min_load_index])->scaled_load * CK_LDB_GridCommLB_Load_Tolerance;

        min_index = min_load_index;

        for (i = 0; i < Num_PEs; i++) {
            if (((&PE_Data[i])->available) && ((&PE_Data[i])->cluster == cluster)) {
                if (i != min_load_index) {
                    if (fabs ((&PE_Data[i])->scaled_load - (&PE_Data[min_load_index])->scaled_load) <= load_tolerance) {
                        if ((&PE_Data[i])->num_wan_msgs < (&PE_Data[min_index])->num_wan_msgs) {
                            min_index = i;
                        }
                    }
                }
            }
        }

        return (min_index);
    } else {
        if (_lb_args.debug() > 0) {
            CkPrintf ("[%d] GridCommLB was told to use bad mode (%d).\n", CkMyPe(), CK_LDB_GridCommLB_Mode);
        }
        return (-1);
    }
}
コード例 #23
0
ファイル: pgm-remesh.C プロジェクト: davidheryanto/sc14
extern "C" void
driver(void)
{
  int ignored;
  int i, count;  
  int myChunk=FEM_My_partition();
  
  /*Add a refinement object to FEM array*/
  CkPrintf("[%d] begin init\n",myChunk);
  FEM_REFINE2D_Init();
  CkPrintf("[%d] end init\n",myChunk);
  
  myGlobals g;
  FEM_Register(&g,(FEM_PupFn)pup_myGlobals);
  init_myGlobal(&g);
  
  g.nnodes = FEM_Mesh_get_length(FEM_Mesh_default_read(),FEM_NODE);
  int maxNodes = g.nnodes;
  g.maxnodes=2*maxNodes;
  g.m_i_fid=FEM_Create_field(FEM_DOUBLE,1,0,sizeof(double));
  resize_nodes((void *)&g,&g.nnodes,&maxNodes);
  int nghost=0;
  g.nelems=FEM_Mesh_get_length(FEM_Mesh_default_read(),FEM_ELEM);
  g.maxelems=g.nelems;
  resize_elems((void *)&g,&g.nelems,&g.maxelems);

  FEM_REFINE2D_Newmesh(FEM_Mesh_default_read(),FEM_NODE,FEM_ELEM);
  
  //Initialize associated data
  for (i=0;i<g.maxnodes;i++) {
    g.R_net[i]=g.d[i]=g.v[i]=g.a[i]=vector2d(0.0);
  }
  
  //Apply a small initial perturbation to positions
  for (i=0;i<g.nnodes;i++) {
    const double max=1.0e-15/15.0; //Tiny perturbation
    g.d[i].x+=max*(i&15);
    g.d[i].y+=max*((i+5)&15);
  }
  
  int fid=FEM_Create_field(FEM_DOUBLE,2,0,sizeof(vector2d));
  
  for (i=0;i<g.nelems;i++){
    checkTriangle(g,i);
  }	
  sleep(5);
  //Timeloop
  if (CkMyPe()==0){
    CkPrintf("Entering timeloop\n");
  }	
  //  int tSteps=0x70FF00FF;
  int tSteps=4;
  int z=13;
  calcMasses(g);
  double startTime=CkWallTimer();
  double curArea=2.5e-5/1024;
  int t = 0;

  // THIS IS THE INITIAL MESH SENT TO NetFEM
  if (1) { //Publish data to the net
    publishMeshToNetFEM(g,myChunk,t);
  }
  double desiredArea;
  /* 
  //should not be necessary as it would have been set in the init
  for (i=0; i<g.nnodes; i++) {
    g.validNode[i] = 1;
  }
  for (i=0; i<g.nelems; i++) {
    g.validElem[i] = 1;
  }*/
  double avgArea = 0.0;
  for (i=0;i<g.nelems;i++) {
    avgArea += calcArea(g, i);
  }
  avgArea /= g.nelems;
  for (t=1;t<=tSteps;t++) {
    /*    if (1) { //Structural mechanics
    //Compute forces on nodes exerted by elements
    CST_NL(g.coord,g.conn,g.R_net,g.d,matConst,g.nnodes,g.nelems,g.S11,g.S22,g.S12);
    //Communicate net force on shared nodes
    FEM_Update_field(fid,g.R_net);
    //Advance node positions
    advanceNodes(dt,g.nnodes,g.coord,g.R_net,g.a,g.v,g.d,g.m_i,(t%4)==0);
    }*/
    
    //Debugging/perf. output
    double curTime=CkWallTimer();
    double total=curTime-startTime;
    startTime=curTime;
    vector2d *loc;
    double *areas;
	
    // prepare to coarsen
    loc=new vector2d[2*g.nnodes];
    for (i=0;i<g.nnodes;i++) {
      loc[i]=g.coord[i];//+g.d[i];
    }
    areas=new double[g.nelems];
    for (i=0;i<g.nelems;i++) {
      areas[i] = avgArea;
    }
    //coarsen one element at a time
    //int coarseIdx = (23  + 4*t)%g.nnodes;
    //areas[coarseIdx] = calcArea(g,coarseIdx)*2.5;
    
    CkPrintf("[%d] Starting coarsening step: %d nodes, %d elements\n", myChunk,countValidEntities(g.validNode,g.nnodes),countValidEntities(g.validElem,g.nelems));
    FEM_REFINE2D_Coarsen(FEM_Mesh_default_read(),FEM_NODE,(double *)g.coord,FEM_ELEM,areas,FEM_SPARSE);
    repeat_after_split((void *)&g);
    g.nelems = FEM_Mesh_get_length(FEM_Mesh_default_read(),FEM_ELEM);
    g.nnodes = FEM_Mesh_get_length(FEM_Mesh_default_read(),FEM_NODE);
    CkPrintf("[%d] Done with coarsening step: %d nodes, %d elements\n",
	     myChunk,countValidEntities(g.validNode,g.nnodes),countValidEntities(g.validElem,g.nelems));
    delete [] loc;
    delete[] areas;
    // THIS IS THE COARSENED MESH SENT TO NetFEM
    if (1) { //Publish data to the net
      publishMeshToNetFEM(g,myChunk,2*t-1);
    }

    //prepare to refine
    loc=new vector2d[2*g.nnodes];
    for (i=0;i<g.nnodes;i++) {
      loc[i]=g.coord[i];//+g.d[i];
    }
    
    areas=new double[g.nelems];
    for (i=0;i<g.nelems;i++) {
      areas[i] = avgArea;
    }
    //refine one element at a time
    //int refIdx = (13  + 3*t)%g.nnodes;
    //areas[refIdx] = calcArea(g,refIdx)/1.5;
    
    CkPrintf("[%d] Starting refinement step: %d nodes, %d elements\n", myChunk,countValidEntities(g.validNode,g.nnodes),countValidEntities(g.validElem,g.nelems));
    FEM_REFINE2D_Split(FEM_Mesh_default_read(),FEM_NODE,(double *)loc,FEM_ELEM,areas,FEM_SPARSE);
    repeat_after_split((void *)&g);
    
    g.nelems = FEM_Mesh_get_length(FEM_Mesh_default_read(),FEM_ELEM);
    g.nnodes = FEM_Mesh_get_length(FEM_Mesh_default_read(),FEM_NODE);
    CkPrintf("[%d] Done with refinement step: %d nodes, %d elements\n",
	     myChunk,countValidEntities(g.validNode,g.nnodes),countValidEntities(g.validElem,g.nelems));
    delete [] loc;
    delete[] areas;
    // THIS IS THE REFINED MESH SENT TO NetFEM
    if (1) { //Publish data to the net
      publishMeshToNetFEM(g,myChunk,2*t);
    }
  }
  if (CkMyPe()==0)
    CkPrintf("Driver finished\n");
}
コード例 #24
0
ファイル: GridCommLB.C プロジェクト: gitter-badger/quinoa
/**************************************************************************
** The Charm++ load balancing framework invokes this method to cause the
** load balancer to migrate objects to "better" PEs.
*/
void GridCommLB::work (LDStats *stats)
{
    int i;


    if (_lb_args.debug() > 0) {
        CkPrintf ("[%d] GridCommLB is working (mode=%d, background load=%d, load tolerance=%f).\n", CkMyPe(), CK_LDB_GridCommLB_Mode, CK_LDB_GridCommLB_Background_Load, CK_LDB_GridCommLB_Load_Tolerance);
    }

    // Since this load balancer looks at communications data, it must initialize the CommHash.
    stats->makeCommHash ();

    // Initialize object variables for the number of PEs and number of objects.
    Num_PEs = stats->nprocs();
    Num_Objects = stats->n_objs;

    if (_lb_args.debug() > 0) {
        CkPrintf ("[%d] GridCommLB is examining %d PEs and %d objects.\n", CkMyPe(), Num_PEs, Num_Objects);
    }

    // Initialize the PE_Data[] data structure.
    Initialize_PE_Data (stats);

    // If at least one available PE does not exist, return from load balancing.
    if (Available_PE_Count() < 1) {
        if (_lb_args.debug() > 0) {
            CkPrintf ("[%d] GridCommLB finds no available PEs -- no balancing done.\n", CkMyPe());
        }

        delete [] PE_Data;

        return;
    }

    // Determine the number of clusters.
    // If any PE is not mapped to a cluster, return from load balancing.
    Num_Clusters = Compute_Number_Of_Clusters ();
    if (Num_Clusters < 1) {
        if (_lb_args.debug() > 0) {
            CkPrintf ("[%d] GridCommLB finds incomplete PE cluster map -- no balancing done.\n", CkMyPe());
        }

        delete [] PE_Data;

        return;
    }

    if (_lb_args.debug() > 0) {
        CkPrintf ("[%d] GridCommLB finds %d clusters.\n", CkMyPe(), Num_Clusters);
    }

    // Initialize the Object_Data[] data structure.
    Initialize_Object_Data (stats);

    // Examine all object-to-object messages for intra-cluster and inter-cluster communications.
    Examine_InterObject_Messages (stats);

    // Map non-migratable objects to PEs.
    Map_NonMigratable_Objects_To_PEs ();

    // Map migratable objects to PEs in each cluster.
    for (i = 0; i < Num_Clusters; i++) {
        Map_Migratable_Objects_To_PEs (i);
    }

    // Make the assignment of objects to PEs in the load balancer framework.
    for (i = 0; i < Num_Objects; i++) {
        stats->to_proc[i] = (&Object_Data[i])->to_pe;

        if (_lb_args.debug() > 2) {
            CkPrintf ("[%d] GridCommLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
        } else if (_lb_args.debug() > 1) {
            if (stats->to_proc[i] != stats->from_proc[i]) {
                CkPrintf ("[%d] GridCommLB migrates object %d from PE %d to PE %d.\n", CkMyPe(), i, stats->from_proc[i], stats->to_proc[i]);
            }
        }
    }

    // Free memory.
    delete [] Object_Data;
    delete [] PE_Data;
}
コード例 #25
0
Slave::Slave() {
	/* ==> read-only variables set by the main chare
	k = kInput;
	thresh = threshInput;
	max_level = 30;

	*/
	CkPrintf("Constructor of the Slave chare # %d is called on processor %d.\n", thisIndex, CkMyPe());

	int n = (int) log2(numProcesses);
	int l = thisIndex;

	Function *function = new Function(n, l, k, thresh, test1);

	mainProxy.done_refine();
}
コード例 #26
0
ファイル: MetisLB.C プロジェクト: quinoacomputing/quinoa
MetisLB::MetisLB(const CkLBOptions &opt): CBase_MetisLB(opt)
{
  lbname = "MetisLB";
  if (CkMyPe() == 0)
    CkPrintf("[%d] MetisLB created\n",CkMyPe());
}
コード例 #27
0
void ComputeNonbondedUtil::select(void)
{
  if ( CkMyRank() ) return;

  // These defaults die cleanly if nothing appropriate is assigned.
  ComputeNonbondedUtil::calcPair = calc_error;
  ComputeNonbondedUtil::calcPairEnergy = calc_error;
  ComputeNonbondedUtil::calcSelf = calc_error;
  ComputeNonbondedUtil::calcSelfEnergy = calc_error;
  ComputeNonbondedUtil::calcFullPair = calc_error;
  ComputeNonbondedUtil::calcFullPairEnergy = calc_error;
  ComputeNonbondedUtil::calcFullSelf = calc_error;
  ComputeNonbondedUtil::calcFullSelfEnergy = calc_error;
  ComputeNonbondedUtil::calcMergePair = calc_error;
  ComputeNonbondedUtil::calcMergePairEnergy = calc_error;
  ComputeNonbondedUtil::calcMergeSelf = calc_error;
  ComputeNonbondedUtil::calcMergeSelfEnergy = calc_error;
  ComputeNonbondedUtil::calcSlowPair = calc_error;
  ComputeNonbondedUtil::calcSlowPairEnergy = calc_error;
  ComputeNonbondedUtil::calcSlowSelf = calc_error;
  ComputeNonbondedUtil::calcSlowSelfEnergy = calc_error;

  SimParameters * simParams = Node::Object()->simParameters;
  Parameters * params = Node::Object()->parameters;

  table_ener = params->table_ener;
  rowsize = params->rowsize;
  columnsize = params->columnsize;

  commOnly = simParams->commOnly;
  fixedAtomsOn = ( simParams->fixedAtomsOn && ! simParams->fixedAtomsForces );

  cutoff = simParams->cutoff;
  cutoff2 = cutoff*cutoff;

//fepb
  alchFepOn = simParams->alchFepOn;
  Fep_WCA_repuOn = simParams->alchFepWCARepuOn;
  Fep_WCA_dispOn = simParams->alchFepWCADispOn;
  alchThermIntOn = simParams->alchThermIntOn;
  alchLambda = alchLambda2 = 0;
  lesOn = simParams->lesOn;
  lesScaling = lesFactor = 0;
  Bool tabulatedEnergies = simParams->tabulatedEnergies;
  alchVdwShiftCoeff = simParams->alchVdwShiftCoeff;
  WCA_rcut1 = simParams->alchFepWCArcut1;
  WCA_rcut2 = simParams->alchFepWCArcut2;
  alchVdwLambdaEnd = simParams->alchVdwLambdaEnd;
  alchElecLambdaStart = simParams->alchElecLambdaStart;

  alchDecouple = simParams->alchDecouple;

  delete [] lambda_table;
  lambda_table = 0;

  pairInteractionOn = simParams->pairInteractionOn;
  pairInteractionSelf = simParams->pairInteractionSelf;
  pressureProfileOn = simParams->pressureProfileOn;

  // Ported by JLai -- Original JE - Go
  goForcesOn = simParams->goForcesOn;
  goMethod = simParams->goMethod; 
  // End of port

  accelMDOn = simParams->accelMDOn;

  drudeNbthole = simParams->drudeOn && (simParams->drudeNbtholeCut > 0.0);

  if ( drudeNbthole ) {
#ifdef NAMD_CUDA
    NAMD_die("drudeNbthole is not supported in CUDA version");
#endif
    if ( alchFepOn )
      NAMD_die("drudeNbthole is not supported with alchemical free-energy perturbation");
    if ( alchThermIntOn )
      NAMD_die("drudeNbthole is not supported with alchemical thermodynamic integration");
    if ( lesOn )
      NAMD_die("drudeNbthole is not supported with locally enhanced sampling");
    if ( pairInteractionOn )
      NAMD_die("drudeNbthole is not supported with pair interaction calculation");
    if ( pressureProfileOn )
      NAMD_die("drudeNbthole is not supported with pressure profile calculation");
  }

  if ( alchFepOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Alchemical free-energy perturbation is not supported in CUDA version");
#endif
    alchLambda = simParams->alchLambda;
    alchLambda2 = simParams->alchLambda2;
    ComputeNonbondedUtil::calcPair = calc_pair_energy_fep;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_fep;
    ComputeNonbondedUtil::calcSelf = calc_self_energy_fep;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_fep;
    ComputeNonbondedUtil::calcFullPair = calc_pair_energy_fullelect_fep;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_fep;
    ComputeNonbondedUtil::calcFullSelf = calc_self_energy_fullelect_fep;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_fep;
    ComputeNonbondedUtil::calcMergePair = calc_pair_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_fep;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_energy_slow_fullelect_fep;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_fep;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_energy_slow_fullelect_fep;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_fep;
  }  else if ( alchThermIntOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Alchemical thermodynamic integration is not supported in CUDA version");
#endif
    alchLambda = simParams->alchLambda;
    ComputeNonbondedUtil::calcPair = calc_pair_ti;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_ti;
    ComputeNonbondedUtil::calcSelf = calc_self_ti;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_ti;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_ti;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_ti;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_ti;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_ti;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_ti;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_ti;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_ti;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_ti;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_ti;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_ti;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_ti;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_ti;
  } else if ( lesOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Locally enhanced sampling is not supported in CUDA version");
#endif
    lesFactor = simParams->lesFactor;
    lesScaling = 1.0 / (double)lesFactor;
    lambda_table = new BigReal[(lesFactor+1)*(lesFactor+1)];
    for ( int ip=0; ip<=lesFactor; ++ip ) {
      for ( int jp=0; jp<=lesFactor; ++jp ) {
        BigReal lambda_pair = 1.0;
        if (ip || jp ) {
          if (ip && jp && ip != jp) {
            lambda_pair = 0.0;
          } else {
            lambda_pair = lesScaling;
          }
        }
        lambda_table[(lesFactor+1)*ip+jp] = lambda_pair;
      }
    }
    ComputeNonbondedUtil::calcPair = calc_pair_les;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_les;
    ComputeNonbondedUtil::calcSelf = calc_self_les;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_les;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_les;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_les;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_les;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_les;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_les;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_les;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_les;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_les;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_les;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_les;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_les;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_les;
  } else if ( pressureProfileOn) {
#ifdef NAMD_CUDA
    NAMD_die("Pressure profile calculation is not supported in CUDA version");
#endif
    pressureProfileSlabs = simParams->pressureProfileSlabs;
    pressureProfileAtomTypes = simParams->pressureProfileAtomTypes;

    ComputeNonbondedUtil::calcPair = calc_pair_pprof;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_pprof;
    ComputeNonbondedUtil::calcSelf = calc_self_pprof;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_pprof;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_pprof;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_pprof;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_pprof;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_pprof;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_pprof;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_pprof;
  } else if ( pairInteractionOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Pair interaction calculation is not supported in CUDA version");
#endif
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_int;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_int;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_int;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_int;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_int;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_int;
  } else if ( tabulatedEnergies ) {
#ifdef NAMD_CUDA
    NAMD_die("Tabulated energies is not supported in CUDA version");
#endif
    ComputeNonbondedUtil::calcPair = calc_pair_tabener;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_tabener;
    ComputeNonbondedUtil::calcSelf = calc_self_tabener;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_tabener;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_tabener;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_tabener;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_tabener;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_tabener;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_tabener;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_tabener;
  } else if ( goForcesOn ) {
#ifdef NAMD_CUDA
    NAMD_die("Go forces is not supported in CUDA version");
#endif
    ComputeNonbondedUtil::calcPair = calc_pair_go;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy_go;
    ComputeNonbondedUtil::calcSelf = calc_self_go;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy_go;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect_go;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect_go;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect_go;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect_go;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect_go;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect_go;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect_go;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect_go;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect_go;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect_go;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect_go;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect_go;
  } else {
    ComputeNonbondedUtil::calcPair = calc_pair;
    ComputeNonbondedUtil::calcPairEnergy = calc_pair_energy;
    ComputeNonbondedUtil::calcSelf = calc_self;
    ComputeNonbondedUtil::calcSelfEnergy = calc_self_energy;
    ComputeNonbondedUtil::calcFullPair = calc_pair_fullelect;
    ComputeNonbondedUtil::calcFullPairEnergy = calc_pair_energy_fullelect;
    ComputeNonbondedUtil::calcFullSelf = calc_self_fullelect;
    ComputeNonbondedUtil::calcFullSelfEnergy = calc_self_energy_fullelect;
    ComputeNonbondedUtil::calcMergePair = calc_pair_merge_fullelect;
    ComputeNonbondedUtil::calcMergePairEnergy = calc_pair_energy_merge_fullelect;
    ComputeNonbondedUtil::calcMergeSelf = calc_self_merge_fullelect;
    ComputeNonbondedUtil::calcMergeSelfEnergy = calc_self_energy_merge_fullelect;
    ComputeNonbondedUtil::calcSlowPair = calc_pair_slow_fullelect;
    ComputeNonbondedUtil::calcSlowPairEnergy = calc_pair_energy_slow_fullelect;
    ComputeNonbondedUtil::calcSlowSelf = calc_self_slow_fullelect;
    ComputeNonbondedUtil::calcSlowSelfEnergy = calc_self_energy_slow_fullelect;
  }

//fepe

  dielectric_1 = 1.0/simParams->dielectric;
  if ( ! ljTable ) ljTable = new LJTable;
  mol = Node::Object()->molecule;
  scaling = simParams->nonbondedScaling;
  if ( simParams->exclude == SCALED14 )
  {
    scale14 = simParams->scale14;
  }
  else
  {
    scale14 = 1.;
  }
  if ( simParams->switchingActive )
  {
    switchOn = simParams->switchingDist;
    switchOn_1 = 1.0/switchOn;
    // d0 = 1.0/(cutoff-switchOn);
    switchOn2 = switchOn*switchOn;
    c0 = 1.0/(cutoff2-switchOn2);

    if ( simParams->vdwForceSwitching ) {
      double switchOn3 = switchOn * switchOn2;
      double cutoff3 = cutoff * cutoff2;
      double switchOn6 = switchOn3 * switchOn3;
      double cutoff6 = cutoff3 * cutoff3;
      v_vdwa = -1. / ( switchOn6 * cutoff6 );
      v_vdwb = -1. / ( switchOn3 * cutoff3 );
      k_vdwa = cutoff6 / ( cutoff6 - switchOn6 );
      k_vdwb = cutoff3 / ( cutoff3 - switchOn3 );
      cutoff_3 = 1. / cutoff3;
      cutoff_6 = 1. / cutoff6;
    }
  }
  else
  {
    switchOn = cutoff;
    switchOn_1 = 1.0/switchOn;
    // d0 = 0.;  // avoid division by zero
    switchOn2 = switchOn*switchOn;
    c0 = 0.;  // avoid division by zero
  }
  c1 = c0*c0*c0;
  c3 = 3.0 * (cutoff2 - switchOn2);
  c5 = 0;
  c6 = 0;
  c7 = 0;
  c8 = 0;

  const int PMEOn = simParams->PMEOn;
  const int MSMOn = simParams->MSMOn;
  const int MSMSplit = simParams->MSMSplit;

  if ( PMEOn ) {
    ewaldcof = simParams->PMEEwaldCoefficient;
    BigReal TwoBySqrtPi = 1.12837916709551;
    pi_ewaldcof = TwoBySqrtPi * ewaldcof;
  }

  int splitType = SPLIT_NONE;
  if ( simParams->switchingActive ) splitType = SPLIT_SHIFT;
  if ( simParams->martiniSwitching ) splitType = SPLIT_MARTINI;
  if ( simParams->fullDirectOn || simParams->FMAOn || PMEOn || MSMOn ) {
    switch ( simParams->longSplitting ) {
      case C2:
      splitType = SPLIT_C2;
      break;

      case C1:
      splitType = SPLIT_C1;
      break;

      case XPLOR:
      NAMD_die("Sorry, XPLOR splitting not supported.");
      break;

      case SHARP:
      NAMD_die("Sorry, SHARP splitting not supported.");
      break;

      default:
      NAMD_die("Unknown splitting type found!");

    }
  }

  BigReal r2_tol = 0.1;
  
  r2_delta = 1.0;
  r2_delta_exp = 0;
  while ( r2_delta > r2_tol ) { r2_delta /= 2.0; r2_delta_exp += 1; }
  r2_delta_1 = 1.0 / r2_delta;

  if ( ! CkMyPe() ) {
    iout << iINFO << "NONBONDED TABLE R-SQUARED SPACING: " <<
				r2_delta << "\n" << endi;
  }

  BigReal r2_tmp = 1.0;
  int cutoff2_exp = 0;
  while ( (cutoff2 + r2_delta) > r2_tmp ) { r2_tmp *= 2.0; cutoff2_exp += 1; }

  int i;
  int n = (r2_delta_exp + cutoff2_exp) * 64 + 1;

  if ( ! CkMyPe() ) {
    iout << iINFO << "NONBONDED TABLE SIZE: " <<
				n << " POINTS\n" << endi;
  }

  if ( table_alloc ) delete [] table_alloc;
  table_alloc = new BigReal[61*n+16];
  BigReal *table_align = table_alloc;
  while ( ((long)table_align) % 128 ) ++table_align;
  table_noshort = table_align;
  table_short = table_align + 16*n;
  slow_table = table_align + 32*n;
  fast_table = table_align + 36*n;
  scor_table = table_align + 40*n;
  corr_table = table_align + 44*n;
  full_table = table_align + 48*n;
  vdwa_table = table_align + 52*n;
  vdwb_table = table_align + 56*n;
  r2_table = table_align + 60*n;
  BigReal *fast_i = fast_table + 4;
  BigReal *scor_i = scor_table + 4;
  BigReal *slow_i = slow_table + 4;
  BigReal *vdwa_i = vdwa_table + 4;
  BigReal *vdwb_i = vdwb_table + 4;
  BigReal *r2_i = r2_table;  *(r2_i++) = r2_delta;
  BigReal r2_limit = simParams->limitDist * simParams->limitDist;
  if ( r2_limit < r2_delta ) r2_limit = r2_delta;
  int r2_delta_i = 0;  // entry for r2 == r2_delta

  // fill in the table, fix up i==0 (r2==0) below
  for ( i=1; i<n; ++i ) {

    const BigReal r2_base = r2_delta * ( 1 << (i/64) );
    const BigReal r2_del = r2_base / 64.0;
    const BigReal r2 = r2_base - r2_delta + r2_del * (i%64);

    if ( r2 <= r2_limit ) r2_delta_i = i;

    const BigReal r = sqrt(r2);
    const BigReal r_1 = 1.0/r;
    const BigReal r_2 = 1.0/r2;

    // fast_ is defined as (full_ - slow_)
    // corr_ and fast_ are both zero at the cutoff, full_ is not
    // all three are approx 1/r at short distances

    // for actual interpolation, we use fast_ for fast forces and
    // scor_ = slow_ + corr_ - full_ and slow_ for slow forces
    // since these last two are of small magnitude

    BigReal fast_energy, fast_gradient;
    BigReal scor_energy, scor_gradient;
    BigReal slow_energy, slow_gradient;

    // corr_ is PME direct sum, or similar correction term
    // corr_energy is multiplied by r until later
    // corr_gradient is multiplied by -r^2 until later
    BigReal corr_energy, corr_gradient;

    
    if ( PMEOn ) {
      BigReal tmp_a = r * ewaldcof;
      BigReal tmp_b = erfc(tmp_a);
      corr_energy = tmp_b;
      corr_gradient = pi_ewaldcof*exp(-(tmp_a*tmp_a))*r + tmp_b;
    } else if ( MSMOn ) {
      BigReal a_1 = 1.0/cutoff;
      BigReal r_a = r * a_1;
      BigReal g, dg;
      SPOLY(&g, &dg, r_a, MSMSplit);
      corr_energy = 1 - r_a * g;
      corr_gradient = 1 + r_a*r_a * dg;
    } else {
      corr_energy = corr_gradient = 0;
    }

    switch(splitType) {
      case SPLIT_NONE:
        fast_energy = 1.0/r;
        fast_gradient = -1.0/r2;
        scor_energy = scor_gradient = 0;
        slow_energy = slow_gradient = 0;
	break;
      case SPLIT_SHIFT: {
	BigReal shiftVal = r2/cutoff2 - 1.0;
	shiftVal *= shiftVal;
	BigReal dShiftVal = 2.0 * (r2/cutoff2 - 1.0) * 2.0*r/cutoff2;
        fast_energy = shiftVal/r;
        fast_gradient = dShiftVal/r - shiftVal/r2;
        scor_energy = scor_gradient = 0;
        slow_energy = slow_gradient = 0;
        } 
	break;
      case SPLIT_MARTINI: { 
        // in Martini, the Coulomb switching distance is zero
        const BigReal COUL_SWITCH = 0.;
        // Gromacs shifting function
        const BigReal p1 = 1.;
        BigReal A1 = p1 * ((p1+1)*COUL_SWITCH-(p1+4)*cutoff)/(pow(cutoff,p1+2)*pow(cutoff-COUL_SWITCH,2));
        BigReal B1 = -p1 * ((p1+1)*COUL_SWITCH-(p1+3)*cutoff)/(pow(cutoff,p1+2)*pow(cutoff-COUL_SWITCH,3));
        BigReal X1 = 1.0/pow(cutoff,p1)-A1/3.0*pow(cutoff-COUL_SWITCH,3)-B1/4.0*pow(cutoff-COUL_SWITCH,4);
        BigReal r12 = (r-COUL_SWITCH)*(r-COUL_SWITCH);
        BigReal r13 = (r-COUL_SWITCH)*(r-COUL_SWITCH)*(r-COUL_SWITCH);
        BigReal shiftVal = -(A1/3.0)*r13 - (B1/4.0)*r12*r12 - X1;
        BigReal dShiftVal = -A1*r12 - B1*r13;
        fast_energy = (1/r) + shiftVal;
        fast_gradient = -1/(r2) + dShiftVal;
        scor_energy = scor_gradient = 0;
        slow_energy = slow_gradient = 0;
        } 
	break;
      case SPLIT_C1:
	// calculate actual energy and gradient
	slow_energy = 0.5/cutoff * (3.0 - (r2/cutoff2));
	slow_gradient = -1.0/cutoff2 * (r/cutoff);
	// calculate scor from slow and corr
	scor_energy = slow_energy + (corr_energy - 1.0)/r;
	scor_gradient = slow_gradient - (corr_gradient - 1.0)/r2;
	// calculate fast from slow
	fast_energy = 1.0/r - slow_energy;
	fast_gradient = -1.0/r2 - slow_gradient;
	break;
      case SPLIT_C2:
        //
        // Quintic splitting function contributed by
        // Bruce Berne, Ruhong Zhou, and Joe Morrone
        //
	// calculate actual energy and gradient
        slow_energy = r2/(cutoff*cutoff2) * (6.0 * (r2/cutoff2)
            - 15.0*(r/cutoff) + 10.0);
        slow_gradient = r/(cutoff*cutoff2) * (24.0 * (r2/cutoff2)
            - 45.0 *(r/cutoff) + 20.0);
	// calculate scor from slow and corr
        scor_energy = slow_energy + (corr_energy - 1.0)/r;
        scor_gradient = slow_gradient - (corr_gradient - 1.0)/r2;
	// calculate fast from slow
	fast_energy = 1.0/r - slow_energy;
	fast_gradient = -1.0/r2 - slow_gradient;
	break;
    }

    // foo_gradient is calculated as ( d foo_energy / d r )
    // and now divided by 2r to get ( d foo_energy / d r2 )

    fast_gradient *= 0.5 * r_1;
    scor_gradient *= 0.5 * r_1;
    slow_gradient *= 0.5 * r_1;

    // let modf be 1 if excluded, 1-scale14 if modified, 0 otherwise,
    // add scor_ - modf * slow_ to slow terms and
    // add fast_ - modf * fast_ to fast terms.

    BigReal vdwa_energy, vdwa_gradient;
    BigReal vdwb_energy, vdwb_gradient;

    const BigReal r_6 = r_2*r_2*r_2;
    const BigReal r_12 = r_6*r_6;

    // Lennard-Jones switching function
  if ( simParams->vdwForceSwitching ) {  // switch force
    // from Steinbach & Brooks, JCC 15, pgs 667-683, 1994, eqns 10-13
    if ( r2 > switchOn2 ) {
      BigReal tmpa = r_6 - cutoff_6;
      vdwa_energy = k_vdwa * tmpa * tmpa;
      BigReal tmpb = r_1 * r_2 - cutoff_3;
      vdwb_energy = k_vdwb * tmpb * tmpb;
      vdwa_gradient = -6.0 * k_vdwa * tmpa * r_2 * r_6;
      vdwb_gradient = -3.0 * k_vdwb * tmpb * r_2 * r_2 * r_1;
    } else {
      vdwa_energy = r_12 + v_vdwa;
      vdwb_energy = r_6 + v_vdwb;
      vdwa_gradient = -6.0 * r_2 * r_12;
      vdwb_gradient = -3.0 * r_2 * r_6;
    }
  } else if ( simParams->martiniSwitching ) { // switching fxn for Martini RBCG

    BigReal r12 = (r-switchOn)*(r-switchOn);        BigReal r13 = (r-switchOn)*(r-switchOn)*(r-switchOn);

    BigReal p6 = 6;
    BigReal A6 = p6 * ((p6+1)*switchOn-(p6+4)*cutoff)/(pow(cutoff,p6+2)*pow(cutoff-switchOn,2));
    BigReal B6 = -p6 * ((p6+1)*switchOn-(p6+3)*cutoff)/(pow(cutoff,p6+2)*pow(cutoff-switchOn,3));        
    BigReal C6 = 1.0/pow(cutoff,p6)-A6/3.0*pow(cutoff-switchOn,3)-B6/4.0*pow(cutoff-switchOn,4);

    BigReal p12 = 12;
    BigReal A12 = p12 * ((p12+1)*switchOn-(p12+4)*cutoff)/(pow(cutoff,p12+2)*pow(cutoff-switchOn,2));
    BigReal B12 = -p12 * ((p12+1)*switchOn-(p12+3)*cutoff)/(pow(cutoff,p12+2)*pow(cutoff-switchOn,3));
    BigReal C12 = 1.0/pow(cutoff,p12)-A12/3.0*pow(cutoff-switchOn,3)-B12/4.0*pow(cutoff-switchOn,4);

    BigReal LJshifttempA = -(A12/3)*r13 - (B12/4)*r12*r12 - C12;
    BigReal LJshifttempB = -(A6/3)*r13 - (B6/4)*r12*r12 - C6;
    const BigReal shiftValA =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJshifttempA : -C12);
    const BigReal shiftValB =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJshifttempB : -C6);

    BigReal LJdshifttempA = -A12*r12 - B12*r13;
    BigReal LJdshifttempB = -A6*r12 - B6*r13;
    const BigReal dshiftValA =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJdshifttempA*0.5*r_1 : 0 );
    const BigReal dshiftValB =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? LJdshifttempB*0.5*r_1 : 0 );




    //have not addressed r > cutoff

    //  dshiftValA*= 0.5*r_1;
    //  dshiftValB*= 0.5*r_1;

    vdwa_energy = r_12 + shiftValA;
    vdwb_energy = r_6 + shiftValB;
   
    vdwa_gradient = -6/pow(r,14) + dshiftValA ;
    vdwb_gradient = -3/pow(r,8) + dshiftValB;

  } else {  // switch energy
    const BigReal c2 = cutoff2-r2;
    const BigReal c4 = c2*(c3-2.0*c2);
    const BigReal switchVal =         // used for Lennard-Jones
                        ( r2 > switchOn2 ? c2*c4*c1 : 1.0 );
    const BigReal dSwitchVal =        // d switchVal / d r2
                        ( r2 > switchOn2 ? 2*c1*(c2*c2-c4) : 0.0 );

    vdwa_energy = switchVal * r_12;
    vdwb_energy = switchVal * r_6;

    vdwa_gradient = ( dSwitchVal - 6.0 * switchVal * r_2 ) * r_12;
    vdwb_gradient = ( dSwitchVal - 3.0 * switchVal * r_2 ) * r_6;
  }


    *(fast_i++) = fast_energy;
    *(fast_i++) = fast_gradient;
    *(fast_i++) = 0;
    *(fast_i++) = 0;
    *(scor_i++) = scor_energy;
    *(scor_i++) = scor_gradient;
    *(scor_i++) = 0;
    *(scor_i++) = 0;
    *(slow_i++) = slow_energy;
    *(slow_i++) = slow_gradient;
    *(slow_i++) = 0;
    *(slow_i++) = 0;
    *(vdwa_i++) = vdwa_energy;
    *(vdwa_i++) = vdwa_gradient;
    *(vdwa_i++) = 0;
    *(vdwa_i++) = 0;
    *(vdwb_i++) = vdwb_energy;
    *(vdwb_i++) = vdwb_gradient;
    *(vdwb_i++) = 0;
    *(vdwb_i++) = 0;
    *(r2_i++) = r2 + r2_delta;

  }

  if ( ! r2_delta_i ) {
    NAMD_bug("Failed to find table entry for r2 == r2_limit\n");
  }
  if ( r2_table[r2_delta_i] > r2_limit + r2_delta ) {
    NAMD_bug("Found bad table entry for r2 == r2_limit\n");
  }

  int j;
  const char *table_name = "XXXX";
  int smooth_short = 0;
  for ( j=0; j<5; ++j ) {
    BigReal *t0 = 0;
    switch (j) {
      case 0: 
        t0 = fast_table;
        table_name = "FAST";
        smooth_short = 1;
      break;
      case 1: 
        t0 = scor_table;
        table_name = "SCOR";
        smooth_short = 0;
      break;
      case 2: 
        t0 = slow_table;
        table_name = "SLOW";
        smooth_short = 0;
      break;
      case 3: 
        t0 = vdwa_table;
        table_name = "VDWA";
        smooth_short = 1;
      break;
      case 4: 
        t0 = vdwb_table;
        table_name = "VDWB";
        smooth_short = 1;
      break;
    }
    // patch up data for i=0
    t0[0] = t0[4] - t0[5] * ( r2_delta / 64.0 );  // energy
    t0[1] = t0[5];  // gradient
    t0[2] = 0;
    t0[3] = 0;
    if ( smooth_short ) {
      BigReal energy0 = t0[4*r2_delta_i];
      BigReal gradient0 = t0[4*r2_delta_i+1];
      BigReal r20 = r2_table[r2_delta_i];
      t0[0] = energy0 - gradient0 * (r20 - r2_table[0]);  // energy
      t0[1] = gradient0;  // gradient
    }
    BigReal *t;
    for ( i=0,t=t0; i<(n-1); ++i,t+=4 ) {
      BigReal x = ( r2_delta * ( 1 << (i/64) ) ) / 64.0;
      if ( r2_table[i+1] != r2_table[i] + x ) {
        NAMD_bug("Bad table delta calculation.\n");
      }
      if ( smooth_short && i+1 < r2_delta_i ) {
        BigReal energy0 = t0[4*r2_delta_i];
        BigReal gradient0 = t0[4*r2_delta_i+1];
        BigReal r20 = r2_table[r2_delta_i];
        t[4] = energy0 - gradient0 * (r20 - r2_table[i+1]);  // energy
        t[5] = gradient0;  // gradient
      }
      BigReal v1 = t[0];
      BigReal g1 = t[1];
      BigReal v2 = t[4];
      BigReal g2 = t[5];
      // explicit formulas for v1 + g1 x + c x^2 + d x^3
      BigReal c = ( 3.0 * (v2 - v1) - x * (2.0 * g1 + g2) ) / ( x * x );
      BigReal d = ( -2.0 * (v2 - v1) + x * (g1 + g2) ) / ( x * x * x );
      // since v2 - v1 is imprecise, we refine c and d numerically
      // important because we need accurate forces (more than energies!)
      for ( int k=0; k < 2; ++k ) {
        BigReal dv = (v1 - v2) + ( ( d * x + c ) * x + g1 ) * x;
        BigReal dg = (g1 - g2) + ( 3.0 * d * x + 2.0 * c ) * x;
        c -= ( 3.0 * dv - x * dg ) / ( x * x );
        d -= ( -2.0 * dv + x * dg ) / ( x * x * x );
      }
      // store in the array;
      t[2] = c;  t[3] = d;
    }

    if ( ! CkMyPe() ) {
    BigReal dvmax = 0;
    BigReal dgmax = 0;
    BigReal dvmax_r = 0;
    BigReal dgmax_r = 0;
    BigReal fdvmax = 0;
    BigReal fdgmax = 0;
    BigReal fdvmax_r = 0;
    BigReal fdgmax_r = 0;
    BigReal dgcdamax = 0;
    BigReal dgcdimax = 0;
    BigReal dgcaimax = 0;
    BigReal dgcdamax_r = 0;
    BigReal dgcdimax_r = 0;
    BigReal dgcaimax_r = 0;
    BigReal fdgcdamax = 0;
    BigReal fdgcdimax = 0;
    BigReal fdgcaimax = 0;
    BigReal fdgcdamax_r = 0;
    BigReal fdgcdimax_r = 0;
    BigReal fdgcaimax_r = 0;
    BigReal gcm = fabs(t0[1]);  // gradient magnitude running average
    for ( i=0,t=t0; i<(n-1); ++i,t+=4 ) {
      const BigReal r2_base = r2_delta * ( 1 << (i/64) );
      const BigReal r2_del = r2_base / 64.0;
      const BigReal r2 = r2_base - r2_delta + r2_del * (i%64);
      const BigReal r = sqrt(r2);
      if ( r > cutoff ) break;
      BigReal x = r2_del;
      BigReal dv = ( ( t[3] * x + t[2] ) * x + t[1] ) * x + t[0] - t[4];
      BigReal dg = ( 3.0 * t[3] * x + 2.0 * t[2] ) * x + t[1] - t[5];
      if ( t[4] != 0. && fabs(dv/t[4]) > fdvmax ) {
        fdvmax = fabs(dv/t[4]); fdvmax_r = r;
      }
      if ( fabs(dv) > dvmax ) {
        dvmax = fabs(dv); dvmax_r = r;
      }
      if ( t[5] != 0. && fabs(dg/t[5]) > fdgmax ) {
        fdgmax = fabs(dg/t[5]); fdgmax_r = r;
      }
      if ( fabs(dg) > dgmax ) {
        dgmax = fabs(dg); dgmax_r = r;
      }
      BigReal gcd = (t[4] - t[0]) / x;  // centered difference gradient
      BigReal gcd_prec = (fabs(t[0]) + fabs(t[4])) * 1.e-15 / x;  // roundoff
      gcm = 0.9 * gcm + 0.1 * fabs(t[5]);  // magnitude running average
      BigReal gca = 0.5  * (t[1] + t[5]);  // centered average gradient
      BigReal gci = ( 0.75 * t[3] * x + t[2] ) * x + t[1];  // interpolated
      BigReal rc = sqrt(r2 + 0.5 * x);
      BigReal dgcda = gcd - gca;
      if ( dgcda != 0. && fabs(dgcda) < gcd_prec ) {
        // CkPrintf("ERROR %g < PREC %g AT %g AVG VAL %g\n", dgcda, gcd_prec, rc, gca);
        dgcda = 0.;
      }
      BigReal dgcdi = gcd - gci;
      if ( dgcdi != 0. && fabs(dgcdi) < gcd_prec ) {
        // CkPrintf("ERROR %g < PREC %g AT %g INT VAL %g\n", dgcdi, gcd_prec, rc, gci);
        dgcdi = 0.;
      }
      BigReal dgcai = gca - gci;
      if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcda/gcm) > fdgcdamax ) {
        fdgcdamax = fabs(dgcda/gcm); fdgcdamax_r = rc;
      }
      if ( fabs(dgcda) > fdgcdamax ) {
        dgcdamax = fabs(dgcda); dgcdamax_r = rc;
      }
      if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcdi/gcm) > fdgcdimax ) {
        fdgcdimax = fabs(dgcdi/gcm); fdgcdimax_r = rc;
      }
      if ( fabs(dgcdi) > fdgcdimax ) {
        dgcdimax = fabs(dgcdi); dgcdimax_r = rc;
      }
      if ( t[1]*t[5] > 0. && gcm != 0. && fabs(dgcai/gcm) > fdgcaimax ) {
        fdgcaimax = fabs(dgcai/gcm); fdgcaimax_r = rc;
      }
      if ( fabs(dgcai) > fdgcaimax ) {
        dgcaimax = fabs(dgcai); dgcaimax_r = rc;
      }
#if 0
      CkPrintf("TABLE %s %g %g %g %g\n",table_name,rc,dgcda/gcm,dgcda,gci);
      if (dv != 0.) CkPrintf("TABLE %d ENERGY ERROR %g AT %g (%d)\n",j,dv,r,i);
      if (dg != 0.) CkPrintf("TABLE %d FORCE ERROR %g AT %g (%d)\n",j,dg,r,i);
#endif
    }
    if ( dvmax != 0.0 ) {
      iout << iINFO << "ABSOLUTE IMPRECISION IN " << table_name <<
        " TABLE ENERGY: " << dvmax << " AT " << dvmax_r << "\n" << endi;
    }
    if ( fdvmax != 0.0 ) {
      iout << iINFO << "RELATIVE IMPRECISION IN " << table_name <<
        " TABLE ENERGY: " << fdvmax << " AT " << fdvmax_r << "\n" << endi;
    }
    if ( dgmax != 0.0 ) {
      iout << iINFO << "ABSOLUTE IMPRECISION IN " << table_name <<
        " TABLE FORCE: " << dgmax << " AT " << dgmax_r << "\n" << endi;
    }
    if ( fdgmax != 0.0 ) {
      iout << iINFO << "RELATIVE IMPRECISION IN " << table_name <<
        " TABLE FORCE: " << fdgmax << " AT " << fdgmax_r << "\n" << endi;
    }
    if (fdgcdamax != 0.0 ) {
      iout << iINFO << "INCONSISTENCY IN " << table_name <<
        " TABLE ENERGY VS FORCE: " << fdgcdamax << " AT " << fdgcdamax_r << "\n" << endi;
      if ( fdgcdamax > 0.1 ) {
        iout << iERROR << "\n";
        iout << iERROR << "CALCULATED " << table_name <<
          " FORCE MAY NOT MATCH ENERGY! POSSIBLE BUG!\n";
        iout << iERROR << "\n";
      }
    }
    if (0 && fdgcdimax != 0.0 ) {
      iout << iINFO << "INCONSISTENCY IN " << table_name <<
        " TABLE ENERGY VS FORCE: " << fdgcdimax << " AT " << fdgcdimax_r << "\n" << endi;
    }
    if ( 0 && fdgcaimax != 0.0 ) {
      iout << iINFO << "INCONSISTENCY IN " << table_name <<
        " TABLE AVG VS INT FORCE: " << fdgcaimax << " AT " << fdgcaimax_r << "\n" << endi;
    }
    }

  }

  for ( i=0; i<4*n; ++i ) {
    corr_table[i] = fast_table[i] + scor_table[i];
    full_table[i] = fast_table[i] + slow_table[i];
  }

#if 0  
  for ( i=0; i<n; ++i ) {
   for ( int j=0; j<4; ++j ) {
    table_short[16*i+6-2*j] = table_noshort[16*i+6-2*j] = vdwa_table[4*i+j];
    table_short[16*i+7-2*j] = table_noshort[16*i+7-2*j] = vdwb_table[4*i+j];
    table_short[16*i+8+3-j] = fast_table[4*i+j];
    table_short[16*i+12+3-j] = scor_table[4*i+j];
    table_noshort[16*i+8+3-j] = corr_table[4*i+j];
    table_noshort[16*i+12+3-j] = full_table[4*i+j];
   }
  }
#endif 

  for ( i=0; i<n; ++i ) {
    table_short[16*i+ 0] = table_noshort[16*i+0] = -6.*vdwa_table[4*i+3];
    table_short[16*i+ 2] = table_noshort[16*i+2] = -6.*vdwb_table[4*i+3];
    table_short[16*i+ 4] = table_noshort[16*i+4] = -2.*vdwa_table[4*i+1];
    table_short[16*i+ 6] = table_noshort[16*i+6] = -2.*vdwb_table[4*i+1];
    
    table_short[16*i+1] = table_noshort[16*i+1] = -4.*vdwa_table[4*i+2];
    table_short[16*i+3] = table_noshort[16*i+3] = -4.*vdwb_table[4*i+2];
    table_short[16*i+5] = table_noshort[16*i+5] = -1.*vdwa_table[4*i+0];
    table_short[16*i+7] = table_noshort[16*i+7] = -1.*vdwb_table[4*i+0];
    
    table_short[16*i+8]  = -6.*fast_table[4*i+3];
    table_short[16*i+9]  = -4.*fast_table[4*i+2];
    table_short[16*i+10] = -2.*fast_table[4*i+1];
    table_short[16*i+11] = -1.*fast_table[4*i+0];

    table_noshort[16*i+8]  = -6.*corr_table[4*i+3];
    table_noshort[16*i+9]  = -4.*corr_table[4*i+2];
    table_noshort[16*i+10] = -2.*corr_table[4*i+1];
    table_noshort[16*i+11] = -1.*corr_table[4*i+0];

    table_short[16*i+12] = -6.*scor_table[4*i+3];
    table_short[16*i+13] = -4.*scor_table[4*i+2];
    table_short[16*i+14] = -2.*scor_table[4*i+1];
    table_short[16*i+15] = -1.*scor_table[4*i+0];

    table_noshort[16*i+12] = -6.*full_table[4*i+3];
    table_noshort[16*i+13] = -4.*full_table[4*i+2];
    table_noshort[16*i+14] = -2.*full_table[4*i+1];
    table_noshort[16*i+15] = -1.*full_table[4*i+0];
  }

#if 0
  char fname[100];
  sprintf(fname,"/tmp/namd.table.pe%d.dat",CkMyPe());
  FILE *f = fopen(fname,"w");
  for ( i=0; i<(n-1); ++i ) {
    const BigReal r2_base = r2_delta * ( 1 << (i/64) );
    const BigReal r2_del = r2_base / 64.0;
    const BigReal r2 = r2_base - r2_delta + r2_del * (i%64);
    BigReal *t;
    if ( r2 + r2_delta != r2_table[i] ) fprintf(f,"r2 error! ");
    fprintf(f,"%g",r2);
    t = fast_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = scor_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = slow_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = corr_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = full_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = vdwa_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    t = vdwb_table + 4*i;
    fprintf(f,"   %g %g %g %g", t[0], t[1], t[2], t[3]);
    fprintf(f,"\n");
  }
  fclose(f);
#endif

#ifdef NAMD_CUDA
  send_build_cuda_force_table();
#endif

}
コード例 #28
0
ファイル: MetisLB.C プロジェクト: quinoacomputing/quinoa
void MetisLB::work(LDStats* stats)
{
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);
  ObjGraph *ogr = new ObjGraph(stats);

  /** ============================= STRATEGY ================================ */
  if (_lb_args.debug() >= 2) {
    CkPrintf("[%d] In MetisLB Strategy...\n", CkMyPe());
  }

  // convert ObjGraph to the adjacency structure
  int numVertices = ogr->vertices.size();	// number of vertices
  int numEdges = 0;				// number of edges

  double maxLoad = 0.0;
  int i, j, k, vert;

  /** remove duplicate edges from recvFrom */
  for(i = 0; i < numVertices; i++) {
    for(j = 0; j < ogr->vertices[i].sendToList.size(); j++) {
      vert = ogr->vertices[i].sendToList[j].getNeighborId();
      for(k = 0; k < ogr->vertices[i].recvFromList.size(); k++) {
	if(ogr->vertices[i].recvFromList[k].getNeighborId() == vert) {
	  ogr->vertices[i].sendToList[j].setNumBytes(ogr->vertices[i].sendToList[j].getNumBytes() + ogr->vertices[i].recvFromList[k].getNumBytes());
	  ogr->vertices[i].recvFromList.erase(ogr->vertices[i].recvFromList.begin() + k);
        }
      }
    }
  }

  /** the object load is normalized to an integer between 0 and 256 */
  for(i = 0; i < numVertices; i++) {
    if(ogr->vertices[i].getVertexLoad() > maxLoad)
      maxLoad = ogr->vertices[i].getVertexLoad();
    numEdges = numEdges + ogr->vertices[i].sendToList.size() + ogr->vertices[i].recvFromList.size();
  }

  /* adjacency list */
  idx_t *xadj = new idx_t[numVertices + 1];
  /* id of the neighbors */
  idx_t *adjncy = new idx_t[numEdges];
  /* weights of the vertices */
  idx_t *vwgt = new idx_t[numVertices];
  /* weights of the edges */
  idx_t *adjwgt = new idx_t[numEdges];

  int edgeNum = 0;
  double ratio = 256.0/maxLoad;

  for(i = 0; i < numVertices; i++) {
    xadj[i] = edgeNum;
    vwgt[i] = (int)ceil(ogr->vertices[i].getVertexLoad() * ratio);
    for(j = 0; j < ogr->vertices[i].sendToList.size(); j++) {
      adjncy[edgeNum] = ogr->vertices[i].sendToList[j].getNeighborId();
      adjwgt[edgeNum] = ogr->vertices[i].sendToList[j].getNumBytes();
      edgeNum++;
    }
    for(j = 0; j < ogr->vertices[i].recvFromList.size(); j++) {
      adjncy[edgeNum] = ogr->vertices[i].recvFromList[j].getNeighborId();
      adjwgt[edgeNum] = ogr->vertices[i].recvFromList[j].getNumBytes();
      edgeNum++;
    }
  }
  xadj[i] = edgeNum;
  CkAssert(edgeNum == numEdges);

  idx_t edgecut;		// number of edges cut by the partitioning
  idx_t *pemap;

  idx_t options[METIS_NOPTIONS];
  METIS_SetDefaultOptions(options);
  //options[METIS_OPTION_PTYPE] = METIS_PTYPE_RB;
  // C style numbering
  options[METIS_OPTION_NUMBERING] = 0;

  // number of constrains
  idx_t ncon = 1;
  // number of partitions
  idx_t numPes = parr->procs.size();
  real_t ubvec[ncon];
  // allow 10% imbalance
  ubvec[0] = 1.1;

  // mapping of objs to partitions
  pemap = new idx_t[numVertices];

  // Specifies size of vertices for computing the total communication volume
  idx_t *vsize = NULL;
  // This array of size nparts specifies the desired weight for each partition
  // and setting it to NULL indicates graph should be equally divided among
  // partitions
  real_t *tpwgts = NULL;

  int option = 0;
  if (WEIGHTED == option) {
    // set up the different weights between 0 and 1
    tpwgts = new real_t[numPes];
    for (i = 0; i < numPes; i++) {
      tpwgts[i] = 1.0/(real_t)numPes;
    }
  } else if (MULTI_CONSTRAINT == option) {
    CkAbort("Multiple constraints not implemented.\n");
  }

  // numVertices: num vertices in the graph; ncon: num balancing constrains
  // xadj, adjncy: of size n+1 and adjncy of 2m, adjncy[xadj[i]] through and
  // including adjncy[xadj[i+1]-1];
  // vwgt: weight of the vertices; vsize: amt of data that needs to be sent
  // for ith vertex is vsize[i]
  // adjwght: the weight of edges; numPes: total parts
  // tpwghts: target partition weight, can pass NULL to equally divide
  // ubvec: of size ncon to indicate allowed load imbalance tolerance (> 1.0)
  // options: array of options; edgecut: stores the edgecut; pemap: mapping
  METIS_PartGraphRecursive(&numVertices, &ncon,  xadj, adjncy, vwgt, vsize, adjwgt,
      &numPes, tpwgts, ubvec, options, &edgecut, pemap);

  delete[] xadj;
  delete[] adjncy;
  delete[] vwgt;
  delete[] adjwgt;
  delete[] vsize;
  delete[] tpwgts;

  if (_lb_args.debug() >= 1) {
   CkPrintf("[%d] MetisLB done! \n", CkMyPe());
  }

  for(i = 0; i < numVertices; i++) {
    if(pemap[i] != ogr->vertices[i].getCurrentPe())
      ogr->vertices[i].setNewPe(pemap[i]);
  }

  delete[] pemap;

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);
  delete parr;
  delete ogr;
}
コード例 #29
0
ファイル: LdbCoordinator.C プロジェクト: sun51/ece598HK
void LdbCoordinator::initialize(PatchMap *pMap, ComputeMap *cMap, int reinit)
{
  const SimParameters *simParams = Node::Object()->simParameters;

#if 0
  static int lbcreated = 0; // XXX static variables are unsafe for SMP
  // PE0 first time Create a load balancer
  if (CkMyPe() == 0 && !lbcreated) {
    if (simParams->ldbStrategy == LDBSTRAT_ALGNBOR) 
      CreateNamdNborLB();
    else {
      //   CreateCentralLB();
      CreateNamdCentLB();
    }
    lbcreated = 1;
  }
#endif

  //  DebugM(10,"stepsPerLdbCycle initialized\n");
  stepsPerLdbCycle = simParams->ldbPeriod;
  firstLdbStep = simParams->firstLdbStep;
  int lastLdbStep = simParams->lastLdbStep;
  int stepsPerCycle = simParams->stepsPerCycle;

  computeMap = cMap;
  patchMap = pMap;

  // Set the number of received messages correctly for node 0

  nStatsMessagesExpected = Node::Object()->numNodes();
  nStatsMessagesReceived = 0;

  if (patchNAtoms) 
    delete [] patchNAtoms;  // Depends on delete NULL to do nothing
  nPatches = patchMap->numPatches();
  patchNAtoms = new int[nPatches];

  typedef Sequencer *seqPtr;

  if ( ! reinit ) {
    delete [] sequencerThreads;  // Depends on delete NULL to do nothing
    sequencerThreads = new seqPtr[nPatches];
  }

  nLocalPatches=0;

  int i;
  for(i=0;i<nPatches;i++)
  {
    if (patchMap->node(i) == Node::Object()->myid())
    {
      nLocalPatches++;
      patchNAtoms[i]=0;
    } else {
      patchNAtoms[i]=-1;
    }
    if ( ! reinit ) sequencerThreads[i]=NULL;
  }
  if ( ! reinit ) controllerThread = NULL;
  if (nLocalPatches != patchMap->numHomePatches())
    NAMD_die("Disaggreement in patchMap data.\n");
 
  const int oldNumComputes = numComputes;
  nLocalComputes = 0;
  numComputes = computeMap->numComputes();

  for(i=0;i<numComputes;i++)  {
    if ( (computeMap->node(i) == Node::Object()->myid())
	 && ( 0
#ifndef NAMD_CUDA
	      || (computeMap->type(i) == computeNonbondedSelfType)
	      || (computeMap->type(i) == computeNonbondedPairType)
#endif
	      || (computeMap->type(i) == computeLCPOType)
	      || (computeMap->type(i) == computeSelfExclsType)
	      || (computeMap->type(i) == computeSelfBondsType)
	      || (computeMap->type(i) == computeSelfAnglesType)
	      || (computeMap->type(i) == computeSelfDihedralsType)
	      || (computeMap->type(i) == computeSelfImpropersType)
	      || (computeMap->type(i) == computeSelfTholeType)
	      || (computeMap->type(i) == computeSelfAnisoType)
	      || (computeMap->type(i) == computeSelfCrosstermsType)

                 || (computeMap->type(i) == computeBondsType)
                 || (computeMap->type(i) == computeExclsType)
                 || (computeMap->type(i) == computeAnglesType)
                 || (computeMap->type(i) == computeDihedralsType)
                 || (computeMap->type(i) == computeImpropersType)
                 || (computeMap->type(i) == computeTholeType)
                 || (computeMap->type(i) == computeAnisoType)
                 || (computeMap->type(i) == computeCrosstermsType)
	) ) {
      nLocalComputes++;
    }
  }
  
  // New LB frameworks registration

  // Allocate data structure to save incoming migrations.  Processor
  // zero will get all migrations

  // If this is the first time through, we need it register patches
  if (ldbCycleNum == reg_all_objs) {
    if ( Node::Object()->simParameters->ldBalancer == LDBAL_CENTRALIZED ) {
      reg_all_objs = 3;
    }
    // Tell the lbdb that I'm registering objects, until I'm done
    // registering them.
    theLbdb->RegisteringObjects(myHandle);
    
   if ( ldbCycleNum == 1 ) {
    patchHandles = new LDObjHandle[nLocalPatches];
    int patch_count=0;
    int i;
    for(i=0;i<nPatches;i++)
      if (patchMap->node(i) == Node::Object()->myid()) {
	LDObjid elemID;
	elemID.id[0] = i;
	elemID.id[1] = elemID.id[2] = elemID.id[3] = -2;

	if (patch_count >= nLocalPatches) {
	  iout << iFILE << iERROR << iPE 
	       << "LdbCoordinator found too many local patches!" << endi;
	  CkExit();
	}
        HomePatch *p = patchMap->homePatch(i);
        p->ldObjHandle = 
	patchHandles[patch_count] 
	  = theLbdb->RegisterObj(myHandle,elemID,0,0);
	patch_count++;

      }
   }
  
    if ( numComputes > oldNumComputes ) {
      // Register computes
      for(i=oldNumComputes; i<numComputes; i++)  {
	if ( computeMap->node(i) == Node::Object()->myid())
        {
	  if ( 0
#ifndef NAMD_CUDA
	          || (computeMap->type(i) == computeNonbondedSelfType)
	          || (computeMap->type(i) == computeNonbondedPairType)
#endif
	          || (computeMap->type(i) == computeLCPOType)
	          || (computeMap->type(i) == computeSelfExclsType)
	          || (computeMap->type(i) == computeSelfBondsType)
	          || (computeMap->type(i) == computeSelfAnglesType)
	          || (computeMap->type(i) == computeSelfDihedralsType)
	          || (computeMap->type(i) == computeSelfImpropersType)
	          || (computeMap->type(i) == computeSelfTholeType)
	          || (computeMap->type(i) == computeSelfAnisoType)
	          || (computeMap->type(i) == computeSelfCrosstermsType)
		)  {
	  // Register the object with the load balancer
	  // Store the depended patch IDs in the rest of the element ID
	  LDObjid elemID;
	  elemID.id[0] = i;
	
	  if (computeMap->numPids(i) > 2)
	    elemID.id[3] = computeMap->pid(i,2);
	  else elemID.id[3] = -1;

	  if (computeMap->numPids(i) > 1)
	    elemID.id[2] =  computeMap->pid(i,1);
	  else elemID.id[2] = -1;

	  if (computeMap->numPids(i) > 0)
	    elemID.id[1] =  computeMap->pid(i,0);
	  else elemID.id[1] = -1;

          Compute *c = computeMap->compute(i);
          if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer");

          c->ldObjHandle = theLbdb->RegisterObj(myHandle,elemID,0,1);
          }
          else if ( (computeMap->type(i) == computeBondsType)
                 || (computeMap->type(i) == computeExclsType)
                 || (computeMap->type(i) == computeAnglesType)
                 || (computeMap->type(i) == computeDihedralsType)
                 || (computeMap->type(i) == computeImpropersType)
                 || (computeMap->type(i) == computeTholeType)
                 || (computeMap->type(i) == computeAnisoType)
                 || (computeMap->type(i) == computeCrosstermsType)
               ) {
	  // Register the object with the load balancer
	  // Store the depended patch IDs in the rest of the element ID
	  LDObjid elemID;
	  elemID.id[0] = i;
	
	  elemID.id[1] = elemID.id[2] = elemID.id[3] = -3;

          Compute *c = computeMap->compute(i);
          if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer");

          c->ldObjHandle = theLbdb->RegisterObj(myHandle,elemID,0,0);
          }
	}
      }
    }
    theLbdb->DoneRegisteringObjects(myHandle);
  }

  // process saved migration messages, if any
  while ( migrateMsgs ) {
    LdbMigrateMsg *m = migrateMsgs;
    migrateMsgs = m->next;
    Compute *c = computeMap->compute(m->handle.id.id[0]);
    if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer 2");
    c->ldObjHandle = m->handle;
    delete m;
  }

  // Fixup to take care of the extra timestep at startup
  // This is pretty ugly here, but it makes the count correct
  
  // iout << "LDB Cycle Num: " << ldbCycleNum << "\n";

 if ( simParams->ldBalancer == LDBAL_CENTRALIZED ) {
  if (ldbCycleNum == 1 || ldbCycleNum == 3) {
    numStepsToRun = stepsPerCycle;
    totalStepsDone += numStepsToRun;
    takingLdbData = 0;
    theLbdb->CollectStatsOff();
  } else if (ldbCycleNum == 2 || ldbCycleNum == 4) {
    numStepsToRun = firstLdbStep - stepsPerCycle;
    while ( numStepsToRun <= 0 ) numStepsToRun += stepsPerCycle;
    totalStepsDone += numStepsToRun;
    takingLdbData = 1;
    theLbdb->CollectStatsOn();
  } else if ( (ldbCycleNum <= 6) || !takingLdbData )
  {
    totalStepsDone += firstLdbStep;
    if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
      numStepsToRun = -1;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    } else {
      numStepsToRun = firstLdbStep;
      takingLdbData = 1;
      theLbdb->CollectStatsOn();
    }
  }
  else 
  {
    totalStepsDone += stepsPerLdbCycle - firstLdbStep;
    if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
      numStepsToRun = -1;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    } else {
      numStepsToRun = stepsPerLdbCycle - firstLdbStep;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    }
  }
 } else {
  if (ldbCycleNum==1)
  {
    totalStepsDone += firstLdbStep;
    numStepsToRun = firstLdbStep;
    takingLdbData = 0;
    theLbdb->CollectStatsOff();
  }
  else if ( (ldbCycleNum <= 4) || !takingLdbData )
  {
    totalStepsDone += firstLdbStep;
    if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
      numStepsToRun = -1;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    } else {
      numStepsToRun = firstLdbStep;
      takingLdbData = 1;
      theLbdb->CollectStatsOn();
    }
  }
  else 
  {
    totalStepsDone += stepsPerLdbCycle - firstLdbStep;
    if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
      numStepsToRun = -1;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    } else {
      numStepsToRun = stepsPerLdbCycle - firstLdbStep;
      takingLdbData = 0;
      theLbdb->CollectStatsOff();
    }
  }
 }

/*-----------------------------------------------------------------------------*
 * --------------------------------------------------------------------------- *
 * Comments inserted by Abhinav to clarify relation between ldbCycleNum,       *
 * load balancing step numbers (printed by the step() function) and            *
 * tracing of the steps                                                        *
 * --------------------------------------------------------------------------- *
 * If trace is turned off in the beginning, then tracing is turned on          *
 * at ldbCycleNum = 4 and turned off at ldbCycleNum = 8. ldbCycleNum can       *
 * be adjusted by specifying firstLdbStep and ldbPeriod which are set by       *
 * default to 5*stepspercycle and 200*stepspercycle if not specified.          *
 *                                                                             *
 * If we choose firstLdbStep = 20 and ldbPeriod = 100, we have the             *
 * following timeline (for these particular numbers):                          *
 *                                                                             *
 * Tracing         :  <------ off ------><------------- on -----------><-- off *
 * Ldb Step() No   :              1     2     3        4      5       6      7 *
 * Iteration Steps : 00====20====40====60====80======160====180=====260====280 *
 * ldbCycleNum     :  1     2     3     4     5        6      7       8      9 *
 * Instrumention   :          Inst  Inst  Inst           Inst            Inst  *
 * LDB Strategy    :              TLB  RLB   RLB            RLB            RLB *
 *                                                                             *
 * TLB = TorusLB                                                               *
 * RLB = RefineTorusLB                                                         *
 * Inst = Instrumentation Phase (no real load balancing)                       *
 * --------------------------------------------------------------------------- *
 *-----------------------------------------------------------------------------*
 */
#if 0 //replaced by traceBarrier at Controller and Sequencer
  if (traceAvailable()) {
    static int specialTracing = 0; // XXX static variables are unsafe for SMP
    if (ldbCycleNum == 1 && traceIsOn() == 0)  specialTracing = 1;
    if (specialTracing) {
      if (ldbCycleNum == 4) traceBegin();
      if (ldbCycleNum == 8) traceEnd();
    }
  }
#endif
  
  nPatchesReported = 0;
  nPatchesExpected = nLocalPatches;
  nComputesReported = 0;
  nComputesExpected = nLocalComputes * numStepsToRun;
  controllerReported = 0;
  controllerExpected = ! CkMyPe();

  if (CkMyPe() == 0)
  {
    if (computeArray == NULL)
      computeArray = new computeInfo[numComputes];
    if (patchArray == NULL)
      patchArray = new patchInfo[nPatches];
    if (processorArray == NULL)
      processorArray = new processorInfo[CkNumPes()];
  }
    
  theLbdb->ClearLoads();
}
コード例 #30
0
ファイル: init.C プロジェクト: gitter-badger/quinoa
static void _bufferHandler(void *msg)
{
  DEBUGF(("[%d] _bufferHandler called.\n", CkMyPe()));
  CkpvAccess(_buffQ)->enq(msg);
}