Esempio n. 1
0
/**
 * This function implements a strategy similar to the one used in the 
 * centralized case in NamdCentLB.
 */
CLBMigrateMsg* NamdHybridLB::GrpLevelStrategy(LDStats* stats) {
  int numProcessors = stats->nprocs();	// number of processors at group level
  int numPatches = PatchMap::Object()->numPatches();
  ComputeMap *computeMap = ComputeMap::Object();
  const int numComputes = computeMap->numComputes();
  const int numGroupComputes = stats->n_migrateobjs;
  const SimParameters* simParams = Node::Object()->simParameters;

  if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
  // these data structures are global and need to be distributed
  if ( ! patchArray ) patchArray = new patchInfo[numPatches];
  if ( ! computeArray ) computeArray = new computeInfo[numGroupComputes];
  if ( ! from_procs ) from_procs = new int[numGroupComputes];

  int nMoveableComputes = buildData(stats);
  CmiAssert(nMoveableComputes <= numGroupComputes);


#if LDB_DEBUG
#define DUMP_LDBDATA 1
#define LOAD_LDBDATA 1
#endif

#if DUMP_LDBDATA 
  dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
#elif LOAD_LDBDATA
  loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
  // CkExit();
#endif

  double averageLoad = 0.;
  double avgCompute;
  double maxCompute;
  int maxComputeId;
  int numPesAvailable;
  {
   int i;
   double total = 0.;
   maxCompute = 0.;
   int maxi = 0;
   for (i=0; i<nMoveableComputes; i++) {
      double load = computeArray[i].load;
      total += load;
      if ( load > maxCompute ) { maxCompute = load;  maxi = i; }
   }
   avgCompute = total / nMoveableComputes;
   maxComputeId = computeArray[maxi].handle.id.id[0];

    int P = stats->nprocs();
   numPesAvailable = 0;
   for (i=0; i<P; i++) {
      if (processorArray[i].available) {
        ++numPesAvailable;
        total += processorArray[i].backgroundLoad;
      }
   }
   if (numPesAvailable == 0)
     NAMD_die("No processors available for load balancing!\n");

   averageLoad = total/numPesAvailable;
  }

  int i_split = 0;
  double maxUnsplit = 0.;

  if ( step() == 1 ) {
    for (int i=0; i<nMoveableComputes; i++) {
      const int cid = computeArray[i].handle.id.id[0];
      if ( computeMap->numPartitions(cid) == 0 ) {
        const double load = computeArray[i].load;
        if ( load > maxUnsplit ) maxUnsplit = load;
        continue;
      }
      ++i_split;
    }
  }

  {
    SplitComputesMsg *msg = new(i_split,i_split) SplitComputesMsg;
    msg->maxUnsplit = maxUnsplit;
    msg->averageLoad = averageLoad;
    msg->avgCompute = avgCompute;
    msg->maxCompute = maxCompute;
    msg->maxComputeId = maxComputeId;
    msg->nMoveableComputes = nMoveableComputes;
    msg->numPesAvailable = numPesAvailable;
    msg->n = i_split;

    if ( step() == 1 ) {
      i_split = 0;
      for (int i=0; i<nMoveableComputes; i++) {
        computeArray[i].processor = computeArray[i].oldProcessor;
        const int cid = computeArray[i].handle.id.id[0];
        if ( computeMap->numPartitions(cid) == 0 ) {
          continue;
        }
        msg->cid[i_split] = cid;
        msg->load[i_split] = computeArray[i].load;
        ++i_split;
      }
    }

    thisProxy[0].splitComputes(msg);
  }

  if ( step() == 1 ) {
    // compute splitting only
  } else if (simParams->ldbStrategy == LDBSTRAT_DEFAULT) { // default
    if (step() < 4)
      TorusLB(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors);
    else
      RefineTorusLB(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors, 1);
  } else if (simParams->ldbStrategy == LDBSTRAT_COMPREHENSIVE) {
    TorusLB(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors);
  } else if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
    RefineTorusLB(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors, 1);
  } else if (simParams->ldbStrategy == LDBSTRAT_OLD) {
    NAMD_die("Old load balancer strategy is not compatible with hybrid balancer.");
    if (step() < 4)
      Alg7(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors);
    else
      RefineOnly(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors);
  }

#if LDB_DEBUG && USE_TOPOMAP
  TopoManager tmgr;
  int pe1, pe2, pe3, hops=0;
  /* This is double counting the hops
  for(int i=0; i<nMoveableComputes; i++)
  {
    pe1 = computeArray[i].processor;
    pe2 = patchArray[computeArray[i].patch1].processor;
    pe3 = patchArray[computeArray[i].patch2].processor;
    hops += tmgr.getHopsBetweenRanks(pe1, pe2);
    if(computeArray[i].patch1 != computeArray[i].patch2)
      hops += tmgr.getHopsBetweenRanks(pe1, pe3);  
  }*/
  for (int i=0; i<numPatches; i++)  {
    //int num = patchArray[i].proxiesOn.numElements();
    pe1 = patchArray[i].processor;
    Iterator nextProc;
    processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
    while (p) {
      pe2 = p->Id;
      hops += tmgr.getHopsBetweenRanks(pe1, pe2);
      p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
    }
  }
  CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
#endif

#if DUMP_LDBDATA
  dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
#elif LOAD_LDBDATA
  dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
  // loadDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
  // CkExit();
#endif

  // For error checking:
  // Count up computes, to see if somebody doesn't have any computes
  int i;
#if 0
  int* computeCount = new int[numProcessors];
  for(i=0; i<numProcessors; i++)
    computeCount[i]=0;
  for(i=0; i<nMoveableComputes; i++)
    computeCount[computeArray[i].processor]++;
  for(i=0; i<numProcessors; i++) {
    if (computeCount[i]==0)
      iout << iINFO <<"Warning: Processor " << i 
	   << " has NO moveable computes.\n" << endi;
  }
  delete [] computeCount;
#endif
  
  CkVec<MigrateInfo *> migrateInfo;
  for(i=0;i<nMoveableComputes;i++) {
    if (computeArray[i].processor != from_procs[i]+stats->procs[0].pe) {
      /* CkPrintf("[%d] Obj %d migrating from %d (%d) to %d\n",
                     CkMyPe(),computeArray[i].handle.id.id[0],
			 from_procs[i], computeArray[i].oldProcessor, computeArray[i].processor); */
      MigrateInfo *migrateMe = new MigrateInfo;
      migrateMe->obj = computeArray[i].handle;
      //migrateMe->from_pe = computeArray[i].oldProcessor;
      int frompe = from_procs[i];
      if (frompe == numProcessors)
        frompe = -1;
      else
        frompe = frompe + stats->procs[0].pe;
      migrateMe->from_pe = frompe;
      migrateMe->to_pe = computeArray[i].processor;
      if (frompe == -1) {
          // don't know yet which processor this compute belongs to, but
	  // inform receiver
        LDObjData obj;
        obj.handle = computeArray[i].handle;
        thisProxy[computeArray[i].processor].ObjMigrated(obj, NULL, 0, currentLevel-1);
      } 
      migrateInfo.insertAtEnd(migrateMe);

      // sneak in updates to ComputeMap
      //ERASE CkPrintf("%d setting %d to processor %d\n",CkMyPe(),computeArray[i].handle.id.id[0],computeArray[i].processor);
      computeMap->setNewNode(computeArray[i].handle.id.id[0],
				computeArray[i].processor);
    }
  }
  // CkPrintf("LOAD BALANCING READY %d\n",CkMyPe()); 

  LBMigrateMsg* msg;
  msg = createMigrateMsg(migrateInfo, numProcessors);

  peLoads = new double [numProcessors]; 
  startPE = processorArray[0].Id;
  endPE = processorArray[numProcessors-1].Id;
  // CkPrintf("[%d] numProcessors=%d, %d to %d\n",CkMyPe(),numProcessors,processorArray[0].Id,processorArray[numProcessors-1].Id);
  for (i=0; i<numProcessors; i++) {
	peLoads[i] = processorArray[i].load;
  }


  delete [] from_procs;
  delete [] processorArray;
  delete [] patchArray;
  delete [] computeArray;

  from_procs = NULL;
  processorArray = NULL;
  patchArray = NULL;
  computeArray = NULL;
  
  return msg;

}
Esempio n. 2
0
    Main(CkArgMsg* m) {
        if ( (m->argc != 3) && (m->argc != 7) ) {
            CkPrintf("%s [array_size] [block_size]\n", m->argv[0]);
            CkPrintf("OR %s [array_size_X] [array_size_Y] [array_size_Z] [block_size_X] [block_size_Y] [block_size_Z]\n", m->argv[0]);
            CkAbort("Abort");
        }

        // set iteration counter to zero
        iterations = 0;

        // store the main proxy
        mainProxy = thisProxy;

        if(m->argc == 3) {
            arrayDimX = arrayDimY = arrayDimZ = atoi(m->argv[1]);
            blockDimX = blockDimY = blockDimZ = atoi(m->argv[2]);
        }
        else if (m->argc == 7) {
            arrayDimX = atoi(m->argv[1]);
            arrayDimY = atoi(m->argv[2]);
            arrayDimZ = atoi(m->argv[3]);
            blockDimX = atoi(m->argv[4]);
            blockDimY = atoi(m->argv[5]);
            blockDimZ = atoi(m->argv[6]);
        }

        if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0)
            CkAbort("array_size_X % block_size_X != 0!");
        if (arrayDimY < blockDimY || arrayDimY % blockDimY != 0)
            CkAbort("array_size_Y % block_size_Y != 0!");
        if (arrayDimZ < blockDimZ || arrayDimZ % blockDimZ != 0)
            CkAbort("array_size_Z % block_size_Z != 0!");

        num_chare_x = arrayDimX / blockDimX;
        num_chare_y = arrayDimY / blockDimY;
        num_chare_z = arrayDimZ / blockDimZ;

        // print info
        CkPrintf("\nSTENCIL COMPUTATION WITH NO BARRIERS\n");
        CkPrintf("Running Jacobi on %d processors with (%d, %d, %d) chares\n", CkNumPes(), num_chare_x, num_chare_y, num_chare_z);
        CkPrintf("Array Dimensions: %d %d %d\n", arrayDimX, arrayDimY, arrayDimZ);
        CkPrintf("Block Dimensions: %d %d %d\n", blockDimX, blockDimY, blockDimZ);

        // Create new array of worker chares
#if USE_TOPOMAP
        CProxy_JacobiMap map = CProxy_JacobiMap::ckNew(num_chare_x, num_chare_y, num_chare_z);
        CkPrintf("Topology Mapping is being done ... \n");
        CkArrayOptions opts(num_chare_x, num_chare_y, num_chare_z);
        opts.setMap(map);
        array = CProxy_Jacobi::ckNew(opts);
#else
        array = CProxy_Jacobi::ckNew(num_chare_x, num_chare_y, num_chare_z);
#endif

        TopoManager tmgr;
        CkArray *jarr = array.ckLocalBranch();
        int jmap[num_chare_x][num_chare_y][num_chare_z];

        int hops=0, p;
        for(int i=0; i<num_chare_x; i++)
            for(int j=0; j<num_chare_y; j++)
                for(int k=0; k<num_chare_z; k++) {
                    jmap[i][j][k] = jarr->procNum(CkArrayIndex3D(i, j, k));
                }

        for(int i=0; i<num_chare_x; i++)
            for(int j=0; j<num_chare_y; j++)
                for(int k=0; k<num_chare_z; k++) {
                    p = jmap[i][j][k];
                    hops += tmgr.getHopsBetweenRanks(p, jmap[wrap_x(i+1)][j][k]);
                    hops += tmgr.getHopsBetweenRanks(p, jmap[wrap_x(i-1)][j][k]);
                    hops += tmgr.getHopsBetweenRanks(p, jmap[i][wrap_y(j+1)][k]);
                    hops += tmgr.getHopsBetweenRanks(p, jmap[i][wrap_y(j-1)][k]);
                    hops += tmgr.getHopsBetweenRanks(p, jmap[i][j][wrap_z(k+1)]);
                    hops += tmgr.getHopsBetweenRanks(p, jmap[i][j][wrap_z(k-1)]);
                }
        CkPrintf("Total Hops: %d\n", hops);

#ifdef JACOBI_OPENMP
        CProxy_OmpInitializer ompInit =
            CProxy_OmpInitializer::ckNew(4);
#else
        //Start the computation
        start();
#endif
    }