Ejemplo n.º 1
0
void LdbCoordinator::printRequiredProxies(PatchID id, FILE *fp)
{
  // Check all two-away neighbors.
  // This is really just one-away neighbors, since 
  // two-away always returns zero: RKB
  int neighborNodes[PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
  const int nProxyNodes = requiredProxies(id,neighborNodes);

  fprintf(fp,"%4d ",nProxyNodes);

  for(int i=0;i<nProxyNodes;i++)
    fprintf(fp,"%4d ",neighborNodes[i]);
}
Ejemplo n.º 2
0
/**
 * @brief Builds the data structures required for the load balancing strategies in NAMD.
 */ 
int NamdHybridLB::buildData(LDStats* stats) {
  int n_pes = stats->nprocs();

  PatchMap* patchMap = PatchMap::Object();
  ComputeMap* computeMap = ComputeMap::Object();
  const SimParameters* simParams = Node::Object()->simParameters;

  BigReal bgfactor = simParams->ldbBackgroundScaling;
  BigReal pmebgfactor = simParams->ldbPMEBackgroundScaling;
  BigReal homebgfactor = simParams->ldbHomeBackgroundScaling;
  int pmeOn = simParams->PMEOn;
  int unLoadPme = simParams->ldbUnloadPME;
  int pmeBarrier = simParams->PMEBarrier;
  int unLoadZero = simParams->ldbUnloadZero;
  int unLoadOne = simParams->ldbUnloadOne;
  int unLoadIO= simParams->ldbUnloadOutputPEs;
  // traversing the list of processors and getting their load information
  int i, pe_no;
  for (i=0; i<n_pes; ++i) {
    pe_no = stats->procs[i].pe;

    // BACKUP processorArray[i].Id = i; 
    processorArray[i].Id = pe_no;               // absolute pe number
    processorArray[i].available = true;
    // BACKUP if ( pmeOn && isPmeProcessor(i) )
    if ( pmeOn && isPmeProcessor(pe_no) ) {
      processorArray[i].backgroundLoad = pmebgfactor * stats->procs[i].bg_walltime;
    // BACKUP } else if (patchMap->numPatchesOnNode(i) > 0) {
    } else if (patchMap->numPatchesOnNode(pe_no) > 0) {
      processorArray[i].backgroundLoad = homebgfactor * stats->procs[i].bg_walltime;
    } else {
      processorArray[i].backgroundLoad = bgfactor * stats->procs[i].bg_walltime;
    }
    processorArray[i].idleTime = stats->procs[i].idletime;
    processorArray[i].load = processorArray[i].computeLoad = 0.0;
  }

  // If I am group zero, then offload processor 0 and 1 in my group
  if(stats->procs[0].pe == 0) {
    if(unLoadZero) processorArray[0].available = false;
    if(unLoadOne) processorArray[1].available = false;
  }

  // if all pes are Pme, disable this flag
  if (pmeOn && unLoadPme) {
    for (i=0; i<n_pes; i++) {
      if(!isPmeProcessor(stats->procs[i].pe))  break;
    }
    if (i == n_pes) {
      iout << iINFO << "Turned off unLoadPme flag!\n"  << endi;
      unLoadPme = 0;
    }
  }

  if (pmeOn && unLoadPme) {
    for (i=0; i<n_pes; i++) {
      if ((pmeBarrier && i==0) || isPmeProcessor(stats->procs[i].pe)) 
	processorArray[i].available = false;
    }
  }

  // if all pes are output, disable this flag
#ifdef MEM_OPT_VERSION
  if (unLoadIO) {
      if (simParams->numoutputprocs == n_pes) {
	  iout << iINFO << "Turned off unLoadIO flag!\n"  << endi;
	  unLoadIO = 0;
      }
  }
  if (unLoadIO){
      for (i=0; i<n_pes; i++) {
	  if (isOutputProcessor(stats->procs[i].pe)) 
	      processorArray[i].available = false;
      }
  }
#endif

  // need to go over all patches to get all required proxies
  int numPatches = patchMap->numPatches();
  int totalLocalProxies = 0;
  int totalProxies = 0;
  for ( int pid=0; pid<numPatches; ++pid ) {
	int neighborNodes[PatchMap::MaxOneAway + PatchMap::MaxTwoAway];

	patchArray[pid].Id = pid;
	patchArray[pid].numAtoms = 0;
	patchArray[pid].processor = patchMap->node(pid);

	const int numProxies = 
#if 0 // USE_TOPOMAP - this function needs to be there for the hybrid case
	requiredProxiesOnProcGrid(pid,neighborNodes);
#else
	requiredProxies(pid, neighborNodes);
#endif

        int numLocalProxies = 0;
	for (int k=0; k<numProxies; k++) {
		if( (neighborNodes[k] >= stats->procs[0].pe) && (neighborNodes[k] <= stats->procs[n_pes-1].pe) ){
			++numLocalProxies;
			int index = neighborNodes[k] - stats->procs[0].pe;
  			processorArray[index].proxies.unchecked_insert(&patchArray[pid]);
  			patchArray[pid].proxiesOn.unchecked_insert(&processorArray[index]);
		}
	}
#if 0
	if ( numLocalProxies ) {
	    CkPrintf("LDB Pe %d patch %d has %d local of %d total proxies\n",
		CkMyPe(), pid, numLocalProxies, numProxies);
	}
#endif
	totalLocalProxies += numLocalProxies;
	totalProxies += numProxies;
  }
#if 0
  CkPrintf("LDB Pe %d has %d local of %d total proxies\n",
		CkMyPe(), totalLocalProxies, totalProxies);
#endif
  
  int nMoveableComputes=0;
  int index;

  int j;

  // this loop goes over only the objects in this group
  for(j=0; j < stats->n_objs; j++) {
	const LDObjData &this_obj = stats->objData[j];
      	int frompe = stats->from_proc[j];

	// filter out non-NAMD managed objects (like PME array)
      	if (this_obj.omID().id.idx != 1) {
                // CmiAssert(frompe>=0 && frompe<n_pes);
                // CkPrintf("non-NAMD object %d on pe %d with walltime %lf\n",
                // this_obj.id().id[0], frompe + stats->procs[0].pe, this_obj.wallTime);
		processorArray[frompe].backgroundLoad += this_obj.wallTime;
        	continue;
	}

      	if (this_obj.id().id[1] == -2) { // Its a patch
		// handled above to get required proxies from all patches
		processorArray[frompe].backgroundLoad += this_obj.wallTime;
	} else if (this_obj.id().id[1] == -3) { // Its a bonded compute
		processorArray[frompe].backgroundLoad += this_obj.wallTime;
	} else if (this_obj.migratable && this_obj.wallTime != 0.) { // Its a compute

		const int cid = this_obj.id().id[0];
		const int p0 = computeMap->pid(cid,0);

		// For self-interactions, just return the same pid twice
		int p1;
		if (computeMap->numPids(cid) > 1)
	  		p1 = computeMap->pid(cid,1);
			else p1 = p0;
			computeArray[nMoveableComputes].Id = cid;
			//BACKUP computeArray[nMoveableComputes].oldProcessor = stats->from_proc[j];
			if (frompe >= n_pes) {  // from outside
CkPrintf("assigning random old processor...this looks broken\n");
			  computeArray[nMoveableComputes].oldProcessor = CrnRand()%n_pes + stats->procs[0].pe;     // random
			}
			else {
			  computeArray[nMoveableComputes].oldProcessor = frompe + stats->procs[0].pe;
			}
			from_procs[nMoveableComputes] = frompe;

			//BACKUP2 index = stats->from_proc[j] - stats->procs[0].pe;
			//BACKUP processorArray[stats->from_proc[j]].computeLoad += this_obj.wallTime;
			int index = computeArray[nMoveableComputes].oldProcessor - stats->procs[0].pe; 
			processorArray[index].computeLoad += this_obj.wallTime;
			computeArray[nMoveableComputes].processor = -1;
			computeArray[nMoveableComputes].patch1 = p0;
			computeArray[nMoveableComputes].patch2 = p1;
			computeArray[nMoveableComputes].handle = this_obj.handle;
			computeArray[nMoveableComputes].load = this_obj.wallTime;
			nMoveableComputes++;
      	}
  }

  	for (i=0; i<n_pes; i++) {
	  processorArray[i].load = processorArray[i].backgroundLoad + processorArray[i].computeLoad;
  	}
  	stats->clear();
  	return nMoveableComputes;
}