示例#1
0
/*
* Map objects to PE for load balance. It takes in a min heap of objects which
* can be transferred and finds suitable receiver PEs. The mapping is stored in
* obj_no and the corresponding entry in obj_pe_no indicates the receiver PE.
*/
void DistributedLB::MapObjsToPe(minHeap &objs, CkVec<int> &obj_no,
    CkVec<int> &obj_pe_no) {
  int p_id;
  double p_load;
  int rand_pe;

  // While my load is more than the threshold, try to transfer objs
  while (my_load > (thr_avg)) {
    // If there is only one object, then nothing can be done to balance it.
    if (objs_count < 2) break;

    // Flag to indicate whether successful in finding a transfer
    bool success = false;

    // Get the smallest object
    InfoRecord* obj = objs.deleteMin();
    // No more objects to retrieve
    if (obj == 0) break;

    // If transferring this object makes this PE underloaded, then don't
    // transfer
    if ((my_load - obj->load) < (thr_avg)) {
      break;
    }

    // Pick random PE based on the probability and the find is successful only
    // if on transferring the object, that PE does not become overloaded
    do {
      rand_pe = PickRandReceiverPeIdx();
      if (rand_pe == -1) break;
      p_id = pe_no[rand_pe];
      p_load = loads[rand_pe];
      if ((p_load + obj->load) < avg_load) {
        success = true;
      }
      kMaxTrials--;
    } while (!success && (kMaxTrials > 0));

    // No successful in finding a suitable PE to transfer the object
    if (!success) {
      break;
    }

    // Found an object and a suitable PE to transfer it to. Decrement the obj
    // count and update the loads.
    obj_no.insertAtEnd(obj->Id);
    obj_pe_no.insertAtEnd(p_id);
    objs_count--;
    loads[rand_pe] += obj->load;
    my_load -= obj->load;

    // Send information to the receiver PE about this obj. This is necessary for
    // ack as well as finding out how many objs are migrating in
		thisProxy[p_id].InformMigration(obj->Id, CkMyPe(),
        my_stats->objData[obj->Id].wallTime, false);

    // This object is assigned, so we delete it from the heap
    delete obj;
  }
}
示例#2
0
// generate migrate message from stats->from_proc and to_proc
LBMigrateMsg * CentralLB::createMigrateMsg(LDStats* stats)
{
  int i;
  CkVec<MigrateInfo*> migrateInfo;
  for (i=0; i<stats->n_objs; i++) {
    LDObjData &objData = stats->objData[i];
    int frompe = stats->from_proc[i];
    int tope = stats->to_proc[i];
    if (frompe != tope) {
      //      CkPrintf("[%d] Obj %d migrating from %d to %d\n",
      //         CkMyPe(),obj,pe,dest);
      MigrateInfo *migrateMe = new MigrateInfo;
      migrateMe->obj = objData.handle;
      migrateMe->from_pe = frompe;
      migrateMe->to_pe = tope;
      migrateMe->async_arrival = objData.asyncArrival;
      migrateInfo.insertAtEnd(migrateMe);
    }
  }

  int migrate_count=migrateInfo.length();
  LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg;
  msg->n_moves = migrate_count;
  for(i=0; i < migrate_count; i++) {
    MigrateInfo* item = (MigrateInfo*) migrateInfo[i];
    msg->moves[i] = *item;
    delete item;
    migrateInfo[i] = 0;
  }
  return msg;
}
示例#3
0
void ComlibSectionInfo::getNodeLocalIndices(int nindices,
                                        CkArrayIndex *idxlist,
					CkArrayID &destArrayID,
                                        CkVec<CkArrayIndex> &idx_vec){    
    int acount = 0;
    idx_vec.resize(0);
    
    CkArray *a = (CkArray *)_localBranch(destArrayID);
    for(acount = 0; acount < nindices; acount++){
        //int p = ComlibGetLastKnown(destArrayID, idxlist[acount]);
        int p = a->lastKnown(idxlist[acount]);
        if(p == CkMyPe()) 
            idx_vec.insertAtEnd(idxlist[acount]);
    }
}
示例#4
0
LBMigrateMsg* WSLB::Strategy(WSLB::LDStats* stats, int count)
{
#if CMK_LBDB_ON
  //  CkPrintf("[%d] Strategy starting\n",CkMyPe());
  // Compute the average load to see if we are overloaded relative
  // to our neighbors
  const double load_factor = 1.05;
  double objload;

  double myload = myStats.total_walltime - myStats.idletime;
  double avgload = myload;
  int unvacated_neighbors = 0;
  int i;
  for(i=0; i < count; i++) {
    // If the neighbor is vacating, skip him
    if (stats[i].vacate_me)
      continue;

    // Scale times we need appropriately for relative proc speeds
    double hisload = stats[i].total_walltime - stats[i].idletime;
    const double hisusage = stats[i].usage;

    const double scale =  (myStats.proc_speed * usage) 
      / (stats[i].proc_speed * hisusage);

    hisload *= scale;
    stats[i].total_walltime *= scale;
    stats[i].idletime *= scale;

    //    CkPrintf("PE %d %d hisload = %f hisusage = %f\n",
    //	     CkMyPe(),i,hisload,hisusage);
    avgload += hisload;
    unvacated_neighbors++;
  }
  if (vacate && unvacated_neighbors == 0)
    CkPrintf("[%d] ALL NEIGHBORS WANT TO VACATE!!!\n",CkMyPe());

  avgload /= (unvacated_neighbors+1);

  CkVec<MigrateInfo*> migrateInfo;

  // If we want to vacate, we always dump our load, otherwise
  // only if we are overloaded

  if (vacate || myload > avgload) {
    //    CkPrintf("[%d] OVERLOAD My load is %f, average load is %f\n",
    //	     CkMyPe(),myload,avgload);

    // First, build heaps of other processors and my objects
    // Then assign objects to other processors until either
    //   - The smallest remaining object would put me below average, or
    //   - I only have 1 object left, or
    //   - The smallest remaining object would put someone else 
    //     above average

    // Build heaps
    minHeap procs(count);
    for(i=0; i < count; i++) {
      // If all my neighbors vacate, I won't have anyone to give work 
      // to
      if (!stats[i].vacate_me) {
	InfoRecord* item = new InfoRecord;
	item->load = stats[i].total_walltime - stats[i].idletime;
	item->Id =  stats[i].from_pe;
	procs.insert(item);
      }
    }
      
    maxHeap objs(myStats.obj_data_sz);
    for(i=0; i < myStats.obj_data_sz; i++) {
      InfoRecord* item = new InfoRecord;
      item->load = myStats.objData[i].wallTime;
      item->Id = i;
      objs.insert(item);
    }

    int objs_here = myStats.obj_data_sz;
    do {
      //      if (objs_here <= 1) break;  // For now, always leave 1 object

      InfoRecord* p;
      InfoRecord* obj;

      // Get the lightest-loaded processor
      p = procs.deleteMin();
      if (p == 0) {
	//	CkPrintf("[%d] No destination PE found!\n",CkMyPe());
	break;
      }

      // Get the biggest object
      bool objfound = false;
      do {
	obj = objs.deleteMax();
	if (obj == 0) break;

	objload = load_factor * obj->load;

	double new_p_load = p->load + objload;
	double my_new_load = myload - objload;

	// If we're vacating, the biggest object is always good.
	// Otherwise, only take it if it doesn't produce overload
	if (vacate || new_p_load < my_new_load) {
	  objfound = true;
	} else {
	  // This object is too big, so throw it away
//	  CkPrintf("[%d] Can't move object w/ load %f to proc %d load %f %f\n",
//		   CkMyPe(),obj->load,p->Id,p->load,avgload);
	  delete obj;
	}
      } while (!objfound);

      if (!objfound) {
	//	CkPrintf("[%d] No suitable object found!\n",CkMyPe());
	break;
      }

      const int me = CkMyPe();
      // Apparently we can give this object to this processor
      if (_lb_args.debug())
      CkPrintf("[%d] Obj %d of %d migrating from %d to %d\n",
	       CkMyPe(),obj->Id,myStats.obj_data_sz,me,p->Id);

      MigrateInfo* migrateMe = new MigrateInfo;
      migrateMe->obj = myStats.objData[obj->Id].handle;
      migrateMe->from_pe = me;
      migrateMe->to_pe = p->Id;
      migrateInfo.insertAtEnd(migrateMe);

      objs_here--;
      
      // We may want to assign more to this processor, so lets
      // update it and put it back in the heap
      p->load += objload;
      myload -= objload;
      procs.insert(p);
      
      // This object is assigned, so we delete it from the heap
      delete obj;

    } while(vacate || myload > avgload);

    // Now empty out the heaps
    InfoRecord* p;
    while (NULL!=(p=procs.deleteMin()))
      delete p;
    InfoRecord* obj;
    while (NULL!=(obj=objs.deleteMax()))
      delete obj;
  }  

  // Now build the message to actually perform the migrations
  int migrate_count=migrateInfo.length();
  //  if (migrate_count) {
  //    CkPrintf("PE %d: Sent away %d of %d objects\n",
  //	     CkMyPe(),migrate_count,myStats.obj_data_sz);
  //  }
  LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg;
  msg->n_moves = migrate_count;
  for(i=0; i < migrate_count; i++) {
    MigrateInfo* item = (MigrateInfo*) migrateInfo[i];
    msg->moves[i] = *item;
    delete item;
    migrateInfo[i] = 0;
  }

  return msg;
#else
  return NULL;
#endif
}
示例#5
0
/**
 * This function implements a strategy similar to the one used in the 
 * centralized case in NamdCentLB.
 */
CLBMigrateMsg* NamdHybridLB::GrpLevelStrategy(LDStats* stats) {
  int numProcessors = stats->nprocs();	// number of processors at group level
  int numPatches = PatchMap::Object()->numPatches();
  ComputeMap *computeMap = ComputeMap::Object();
  const int numComputes = computeMap->numComputes();
  const int numGroupComputes = stats->n_migrateobjs;
  const SimParameters* simParams = Node::Object()->simParameters;

  if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
  // these data structures are global and need to be distributed
  if ( ! patchArray ) patchArray = new patchInfo[numPatches];
  if ( ! computeArray ) computeArray = new computeInfo[numGroupComputes];
  if ( ! from_procs ) from_procs = new int[numGroupComputes];

  int nMoveableComputes = buildData(stats);
  CmiAssert(nMoveableComputes <= numGroupComputes);


#if LDB_DEBUG
#define DUMP_LDBDATA 1
#define LOAD_LDBDATA 1
#endif

#if DUMP_LDBDATA 
  dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
#elif LOAD_LDBDATA
  loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
  // CkExit();
#endif

  double averageLoad = 0.;
  double avgCompute;
  double maxCompute;
  int maxComputeId;
  int numPesAvailable;
  {
   int i;
   double total = 0.;
   maxCompute = 0.;
   int maxi = 0;
   for (i=0; i<nMoveableComputes; i++) {
      double load = computeArray[i].load;
      total += load;
      if ( load > maxCompute ) { maxCompute = load;  maxi = i; }
   }
   avgCompute = total / nMoveableComputes;
   maxComputeId = computeArray[maxi].handle.id.id[0];

    int P = stats->nprocs();
   numPesAvailable = 0;
   for (i=0; i<P; i++) {
      if (processorArray[i].available) {
        ++numPesAvailable;
        total += processorArray[i].backgroundLoad;
      }
   }
   if (numPesAvailable == 0)
     NAMD_die("No processors available for load balancing!\n");

   averageLoad = total/numPesAvailable;
  }

  int i_split = 0;
  double maxUnsplit = 0.;

  if ( step() == 1 ) {
    for (int i=0; i<nMoveableComputes; i++) {
      const int cid = computeArray[i].handle.id.id[0];
      if ( computeMap->numPartitions(cid) == 0 ) {
        const double load = computeArray[i].load;
        if ( load > maxUnsplit ) maxUnsplit = load;
        continue;
      }
      ++i_split;
    }
  }

  {
    SplitComputesMsg *msg = new(i_split,i_split) SplitComputesMsg;
    msg->maxUnsplit = maxUnsplit;
    msg->averageLoad = averageLoad;
    msg->avgCompute = avgCompute;
    msg->maxCompute = maxCompute;
    msg->maxComputeId = maxComputeId;
    msg->nMoveableComputes = nMoveableComputes;
    msg->numPesAvailable = numPesAvailable;
    msg->n = i_split;

    if ( step() == 1 ) {
      i_split = 0;
      for (int i=0; i<nMoveableComputes; i++) {
        computeArray[i].processor = computeArray[i].oldProcessor;
        const int cid = computeArray[i].handle.id.id[0];
        if ( computeMap->numPartitions(cid) == 0 ) {
          continue;
        }
        msg->cid[i_split] = cid;
        msg->load[i_split] = computeArray[i].load;
        ++i_split;
      }
    }

    thisProxy[0].splitComputes(msg);
  }

  if ( step() == 1 ) {
    // compute splitting only
  } else if (simParams->ldbStrategy == LDBSTRAT_DEFAULT) { // default
    if (step() < 4)
      TorusLB(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors);
    else
      RefineTorusLB(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors, 1);
  } else if (simParams->ldbStrategy == LDBSTRAT_COMPREHENSIVE) {
    TorusLB(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors);
  } else if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
    RefineTorusLB(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors, 1);
  } else if (simParams->ldbStrategy == LDBSTRAT_OLD) {
    NAMD_die("Old load balancer strategy is not compatible with hybrid balancer.");
    if (step() < 4)
      Alg7(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors);
    else
      RefineOnly(computeArray, patchArray, processorArray,
                  nMoveableComputes, numPatches, numProcessors);
  }

#if LDB_DEBUG && USE_TOPOMAP
  TopoManager tmgr;
  int pe1, pe2, pe3, hops=0;
  /* This is double counting the hops
  for(int i=0; i<nMoveableComputes; i++)
  {
    pe1 = computeArray[i].processor;
    pe2 = patchArray[computeArray[i].patch1].processor;
    pe3 = patchArray[computeArray[i].patch2].processor;
    hops += tmgr.getHopsBetweenRanks(pe1, pe2);
    if(computeArray[i].patch1 != computeArray[i].patch2)
      hops += tmgr.getHopsBetweenRanks(pe1, pe3);  
  }*/
  for (int i=0; i<numPatches; i++)  {
    //int num = patchArray[i].proxiesOn.numElements();
    pe1 = patchArray[i].processor;
    Iterator nextProc;
    processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
    while (p) {
      pe2 = p->Id;
      hops += tmgr.getHopsBetweenRanks(pe1, pe2);
      p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
    }
  }
  CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
#endif

#if DUMP_LDBDATA
  dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
#elif LOAD_LDBDATA
  dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
  // loadDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
  // CkExit();
#endif

  // For error checking:
  // Count up computes, to see if somebody doesn't have any computes
  int i;
#if 0
  int* computeCount = new int[numProcessors];
  for(i=0; i<numProcessors; i++)
    computeCount[i]=0;
  for(i=0; i<nMoveableComputes; i++)
    computeCount[computeArray[i].processor]++;
  for(i=0; i<numProcessors; i++) {
    if (computeCount[i]==0)
      iout << iINFO <<"Warning: Processor " << i 
	   << " has NO moveable computes.\n" << endi;
  }
  delete [] computeCount;
#endif
  
  CkVec<MigrateInfo *> migrateInfo;
  for(i=0;i<nMoveableComputes;i++) {
    if (computeArray[i].processor != from_procs[i]+stats->procs[0].pe) {
      /* CkPrintf("[%d] Obj %d migrating from %d (%d) to %d\n",
                     CkMyPe(),computeArray[i].handle.id.id[0],
			 from_procs[i], computeArray[i].oldProcessor, computeArray[i].processor); */
      MigrateInfo *migrateMe = new MigrateInfo;
      migrateMe->obj = computeArray[i].handle;
      //migrateMe->from_pe = computeArray[i].oldProcessor;
      int frompe = from_procs[i];
      if (frompe == numProcessors)
        frompe = -1;
      else
        frompe = frompe + stats->procs[0].pe;
      migrateMe->from_pe = frompe;
      migrateMe->to_pe = computeArray[i].processor;
      if (frompe == -1) {
          // don't know yet which processor this compute belongs to, but
	  // inform receiver
        LDObjData obj;
        obj.handle = computeArray[i].handle;
        thisProxy[computeArray[i].processor].ObjMigrated(obj, NULL, 0, currentLevel-1);
      } 
      migrateInfo.insertAtEnd(migrateMe);

      // sneak in updates to ComputeMap
      //ERASE CkPrintf("%d setting %d to processor %d\n",CkMyPe(),computeArray[i].handle.id.id[0],computeArray[i].processor);
      computeMap->setNewNode(computeArray[i].handle.id.id[0],
				computeArray[i].processor);
    }
  }
  // CkPrintf("LOAD BALANCING READY %d\n",CkMyPe()); 

  LBMigrateMsg* msg;
  msg = createMigrateMsg(migrateInfo, numProcessors);

  peLoads = new double [numProcessors]; 
  startPE = processorArray[0].Id;
  endPE = processorArray[numProcessors-1].Id;
  // CkPrintf("[%d] numProcessors=%d, %d to %d\n",CkMyPe(),numProcessors,processorArray[0].Id,processorArray[numProcessors-1].Id);
  for (i=0; i<numProcessors; i++) {
	peLoads[i] = processorArray[i].load;
  }


  delete [] from_procs;
  delete [] processorArray;
  delete [] patchArray;
  delete [] computeArray;

  from_procs = NULL;
  processorArray = NULL;
  patchArray = NULL;
  computeArray = NULL;
  
  return msg;

}
示例#6
0
LBMigrateMsg* NeighborCommLB::Strategy(NborBaseLB::LDStats* stats, int n_nbrs)
{
bool _lb_debug=0;
bool _lb_debug1=0;
bool _lb_debug2=0;
#if CMK_LBDB_ON
  //  CkPrintf("[%d] Strategy starting\n",CkMyPe());
  // Compute the average load to see if we are overloaded relative
  // to our neighbors
  double myload = myStats.total_walltime - myStats.idletime;
  double avgload = myload;
  int i;
  if (_lb_debug) 
    CkPrintf("[%d] Neighbor Count = %d\n", CkMyPe(), n_nbrs);
  
  for(i=0; i < n_nbrs; i++) {
    // Scale times we need appropriately for relative proc speeds
    const double scale =  ((double)myStats.pe_speed) 
      / stats[i].pe_speed;

    stats[i].total_walltime *= scale;
    stats[i].idletime *= scale;

    avgload += (stats[i].total_walltime - stats[i].idletime);
  }
  avgload /= (n_nbrs + 1);

  CkVec<MigrateInfo*> migrateInfo;

  if (_lb_debug) 
    CkPrintf("[%d] My load is %lf\n", CkMyPe(),myload);
  if (myload > avgload) {
    if (_lb_debug1) 
      CkPrintf("[%d] OVERLOAD My load is %lf average load is %lf\n", CkMyPe(), myload, avgload);

    // First of all, explore the topology and get dimension
    LBTopology* topo;
    {
      LBtopoFn topofn;
      topofn = LBTopoLookup(_lbtopo);
      if (topofn == NULL) {
        char str[1024];
        CmiPrintf("NeighborCommLB> Fatal error: Unknown topology: %s. Choose from:\n", _lbtopo);
        printoutTopo();
        sprintf(str, "NeighborCommLB> Fatal error: Unknown topology: %s", _lbtopo);
        CmiAbort(str);
      }
      topo = topofn(CkNumPes());
    }
    int dimension = topo->get_dimension();
    if (_lb_debug2) 
      CkPrintf("[%d] Topology dimension = %d\n", CkMyPe(), dimension);
    if (dimension == -1) {
      char str[1024];
      CmiPrintf("NeighborCommLB> Fatal error: Unsupported topology: %s. Only some of the following are supported:\n", _lbtopo);
      printoutTopo();
      sprintf(str, "NeighborCommLB> Fatal error: Unsupported topology: %s", _lbtopo);
      CmiAbort(str);
    }

    // Position of this processor
    int *myProc = new int[dimension];
    topo->get_processor_coordinates(myStats.from_pe, myProc);
    if (_lb_debug2) {
      char temp[1000];
      char* now=temp;
      sprintf(now, "[%d] Coordinates = [", CkMyPe());
      now += strlen(now);
      for(i=0;i<dimension;i++) {
        sprintf(now, "%d ", myProc[i]); 
        now +=strlen(now);
      }
      sprintf(now, "]\n");
      now += strlen(now);
      CkPrintf(temp);
    }

    // Then calculate the communication center of each object
    // The communication center is relative to myProc
    double **commcenter = new double*[myStats.n_objs];
    double *commamount = new double[myStats.n_objs];
    if(_lb_debug1) {
      CkPrintf("[%d] Number of Objs = %d \n", CkMyPe(), myStats.n_objs);
    }
    {
      memset(commamount, 0, sizeof(double)*myStats.n_objs);
      for(i=0; i<myStats.n_objs;i++) {
        commcenter[i] = new double[dimension];
        memset(commcenter[i], 0, sizeof(double)*dimension);
      }

      //coordinates of procs
      int *destProc = new int[dimension];
      int *diff = new int[dimension];
      
      //for each comm entry
      for(i=0; i<myStats.n_comm;i++) {
        int j;
        //for each object //TODO use hashtable to accelerate
        for(j=0; j<myStats.n_objs;j++) 
          if((myStats.objData[j].handle.omhandle.id == myStats.commData[i].sender.omId)
              && (myStats.objData[j].handle.id == myStats.commData[i].sender.objId)) {
            double comm=
              PER_MESSAGE_SEND_OVERHEAD * myStats.commData[i].messages 
              + PER_BYTE_SEND_OVERHEAD * myStats.commData[i].bytes;
            commamount[j] += comm;
            int dest_pe = myStats.commData[i].receiver.lastKnown();
            
            if(dest_pe==-1) continue;
            
              topo->get_processor_coordinates(dest_pe, destProc);
            topo->coordinate_difference(myProc, destProc, diff);
            int k;
            for(k=0;k<dimension;k++) {
              commcenter[j][k] += diff[k] * comm;
            }
          }
      }
      for(i=0; i<myStats.n_objs;i++) if (commamount[i]>0) {
        int k;
        double ratio = 1.0 /commamount[i];
        for(k=0;k<dimension;k++)
          commcenter[i][k] *= ratio;
      } else { //if no communication, set commcenter to myself
        int k;
        for(k=0;k<dimension;k++)
          commcenter[i][k] = 0;
      }
      
      delete [] destProc;
      delete [] diff;
    }
    
    if(_lb_debug2) {
      for(i=0;i<myStats.n_objs;i++) {
        char temp[1000];
        char* now=temp;
        sprintf(now, "[%d] Objs [%d] Load = %lf Comm Amount = %lf  ", 
          CkMyPe(), i, myStats.objData[i].wallTime, commamount[i] );
        now += strlen(now);
        sprintf(now, "Comm Center = [");
        now += strlen(now);
        int j;
        for(j=0;j<dimension;j++) {
          sprintf(now, "%lf ", commcenter[i][j]); 
          now += strlen(now);
        }
        sprintf(now, "]\n");
        now += strlen(now);
        CkPrintf(temp);
      }
    }
    
    // First, build heaps of my objects
    // Then assign objects to the least loaded other processors until either
    //   - The smallest remaining object would put me below average, or
    //   - I only have 1 object left, or
    //   - The smallest remaining object would put someone else 
    //     above average
    // Note: Object can only move towards its communication center!

    // My neighbors: 
    typedef struct _procInfo{
      int id;
      double load;
      int* difference;
    } procInfo;

    if(_lb_debug2) {
      CkPrintf("[%d] Querying neighborhood topology...\n", CkMyPe() );
    }

    procInfo* neighbors = new procInfo[n_nbrs];
    {
      int *destProc = new int[dimension];
      for(i=0; i < n_nbrs; i++) {
        neighbors[i].id = stats[i].from_pe;
        neighbors[i].load = stats[i].total_walltime - stats[i].idletime;
        neighbors[i].difference = new int[dimension];
        topo->get_processor_coordinates(neighbors[i].id, destProc);
        topo->coordinate_difference(myProc, destProc, neighbors[i].difference);
      }
      delete[] destProc;
    }
    
    if(_lb_debug2) {
      CkPrintf("[%d] Building obj heap...\n", CkMyPe() );
    }
    // My objects: build heaps
    maxHeap objs(myStats.n_objs);
    double totalObjLoad=0.0;
    for(i=0; i < myStats.n_objs; i++) {
      InfoRecord* item = new InfoRecord;
      item->load = myStats.objData[i].wallTime;
      totalObjLoad += item->load;
      item->Id = i;
      objs.insert(item);
    }

    if(_lb_debug2) {
      CkPrintf("[%d] Beginning distributing objects...\n", CkMyPe() );
    }

    // for each object
    while(objs.numElements()>0) {
      InfoRecord* obj;
      obj = objs.deleteMax();
      int bestDest = -1;
      for(i = 0; i < n_nbrs; i++)
	if(neighbors[i].load +obj->load < myload - obj->load && (bestDest==-1 || neighbors[i].load < neighbors[bestDest].load)) {
	  double dotsum=0;
	  int j;
	  for(j=0; j<dimension; j++) dotsum += (commcenter[obj->Id][j] * neighbors[i].difference[j]);
	  if(myload - avgload < totalObjLoad || dotsum>0.5 || (dotsum>0 && objs.numElements()==0) || commamount[obj->Id]==0) {
	    bestDest = i;
	  }
	}
      // Best place for the object
      if(bestDest != -1) {
        if(_lb_debug1) {
          CkPrintf("[%d] Obj[%d] will move to Proc[%d]\n", CkMyPe(), obj->Id, neighbors[bestDest].id);
        }
        //Migrate it
        MigrateInfo* migrateMe = new MigrateInfo;
        migrateMe->obj = myStats.objData[obj->Id].handle;
        migrateMe->from_pe = myStats.from_pe;
        migrateMe->to_pe = neighbors[bestDest].id;
        migrateInfo.insertAtEnd(migrateMe);
        //Modify loads
        myload -= obj->load;
        neighbors[bestDest].load += obj->load;
      }
      totalObjLoad -= obj->load;
      delete obj;
    }

    if(_lb_debug2) {
      CkPrintf("[%d] Clearing Up...\n", CkMyPe());
    }

    for(i=0; i<n_nbrs; i++) {
      delete[] neighbors[i].difference;
    }
    delete[] neighbors;
    
    delete[] myProc;

    for(i=0;i<myStats.n_objs;i++) {
      delete[] commcenter[i];
    }
    delete[] commcenter;
    delete[] commamount;        
  }  

  if(_lb_debug2) {
    CkPrintf("[%d] Generating result...\n", CkMyPe());
  }

  // Now build the message to actually perform the migrations
  int migrate_count=migrateInfo.length();
  //  if (migrate_count > 0) {
  //    CkPrintf("PE %d migrating %d elements\n",CkMyPe(),migrate_count);
  //  }
  LBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) LBMigrateMsg;
  msg->n_moves = migrate_count;
  for(i=0; i < migrate_count; i++) {
    MigrateInfo* item = (MigrateInfo*) migrateInfo[i];
    msg->moves[i] = *item;
    delete item;
    migrateInfo[i] = 0;
  }

  return msg;
#else
  return NULL;
#endif
}
示例#7
0
LBMigrateMsg * HybridBaseLB::createMigrateMsg(LDStats* stats)
{
#if CMK_LBDB_ON
  int i;

  LevelData *lData = levelData[currentLevel];

  CkVec<MigrateInfo*> migrateInfo;

  // stats contains all objects that belong to this group
  // outObjs contains objects that are migrated out
  for (i=0; i<stats->n_objs; i++) {
    LDObjData &objData = stats->objData[i];
    int frompe = stats->from_proc[i];
    int tope = stats->to_proc[i];
    CmiAssert(tope != -1);
    if (frompe != tope) {
      //      CkPrintf("[%d] Obj %d migrating from %d to %d\n",
      //         CkMyPe(),obj,pe,dest);
#if 0
      // delay until a summary is printed
      if (frompe == lData->nChildren)  {
        frompe = -1;
        CmiAssert(tope != -1 && tope != lData->nChildren);
      }
      else
        frompe = lData->children[frompe];
      if (tope != -1) {
        CmiAssert(tope < lData->nChildren);
        tope = lData->children[tope];
      }
#endif
      MigrateInfo *migrateMe = new MigrateInfo;
      migrateMe->obj = objData.handle;
      migrateMe->from_pe = frompe;
      migrateMe->to_pe = tope;
      migrateMe->async_arrival = objData.asyncArrival;
      migrateInfo.insertAtEnd(migrateMe);
    }
    else 
      CmiAssert(frompe != lData->nChildren);
  }

  // merge outgoing objs
  CkVec<MigrationRecord> &outObjs = lData->outObjs;
  for (i=0; i<outObjs.size(); i++) {
    MigrateInfo *migrateMe = new MigrateInfo;
    migrateMe->obj = outObjs[i].handle;
    migrateMe->from_pe = outObjs[i].fromPe;
    migrateMe->to_pe = -1;
//    migrateMe->async_arrival = objData.asyncArrival;
    migrateInfo.insertAtEnd(migrateMe);
  }

  // construct migration message
  int migrate_count=migrateInfo.length();
  DEBUGF(("[%d] level: %d has %d migrations. \n", CkMyPe(), currentLevel, migrate_count));
  // ignore avail_vector, etc for now
  //LBMigrateMsg * msg = new(migrate_count,count,count,0) LBMigrateMsg;
  LBMigrateMsg * msg = new(migrate_count,0,0,0) LBMigrateMsg;
  msg->level = currentLevel;
  msg->n_moves = migrate_count;
  for(i=0; i < migrate_count; i++) {
    MigrateInfo* item = (MigrateInfo*) migrateInfo[i];
    msg->moves[i] = *item;
    delete item;
    migrateInfo[i] = 0;
    DEBUGF(("[%d] obj (%d %d %d %d) migrate from %d to %d\n", CkMyPe(), item->obj.objID().id[0], item->obj.objID().id[1], item->obj.objID().id[2], item->obj.objID().id[3], item->from_pe, item->to_pe));
  }

  if (_lb_args.printSummary())  printSummary(stats, stats->nprocs());

  // translate relative pe number to its real number
  for(i=0; i < migrate_count; i++) {
    MigrateInfo* move = &msg->moves[i];
    if (move->to_pe != -1) {
      if (move->from_pe == lData->nChildren)  {
          // an object from outside group
        move->from_pe = -1;
        CmiAssert(move->to_pe != -1 && move->to_pe != lData->nChildren);
      }
      else
        move->from_pe = lData->children[move->from_pe];
      CmiAssert(move->to_pe < lData->nChildren);
      move->to_pe = lData->children[move->to_pe];
    }
  }

  return msg;
#else
  return NULL;
#endif
}