Beispiel #1
0
void RecBipartLB::work(LDStats *stats) {
  vector<Vertex *> ptrvector;
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);	// Processor Array
  ObjGraph *ogr = new ObjGraph(stats);		// Object Graph


  /** ============================= STRATEGY ================================ */
  level=0;
  peno=0;
  TOTALLOAD=0;
  numparts=CkNumPes();
  parray=parr;

  double avgLoad = parr->getAverageLoad();
  int numPes = parr->procs.size();

  parr->resetTotalLoad();
  for(int i=0;i<ogr->vertices.size();i++)
  {
    Vertex_helper *helper = new Vertex_helper();
    vhelpers.push_back(helper);
    ptrvector.push_back((Vertex *)&(ogr->vertices[i]));

  }

  RecursiveBiPart(ogr,ptrvector,1,numparts);

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);		// Send decisions back to LDStats
}
Beispiel #2
0
void GraphBFTLB::work(LDStats *stats) {
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);	// Processor Array
  ObjGraph *ogr = new ObjGraph(stats);		// Object Graph

  /** ============================= STRATEGY ================================ */
  double avgLoad = parr->getAverageLoad();
  int numPes = parr->procs.size();

  // CkPrintf("Average Load %g\n\n", avgLoad);
  // for(int i=0; i<numPes; i++)
  //  CkPrintf("PE [%d] %g %g\n", i, parr->procs[i].getTotalLoad(), parr->procs[i].getOverhead());
  parr->resetTotalLoad();

  int start = 0, nextPe = 0;
  std::queue<int> vertexq;

  // start at vertex with id 0
  vertexq.push(start);
  if(parr->procs[nextPe].getTotalLoad() + ogr->vertices[start].getVertexLoad() > avgLoad) {
    nextPe++;
    avgLoad += (avgLoad - parr->procs[nextPe].getTotalLoad())/(numPes-nextPe);
  }
  ogr->vertices[start].setNewPe(nextPe);
  // CkPrintf("[%d] %d %d %g %g %g\n", start, ogr->vertices[start].getCurrentPe(), ogr->vertices[start].getNewPe(), parr->procs[nextPe].getTotalLoad(), ogr->vertices[start].getVertexLoad(), parr->procs[nextPe].getTotalLoad() + ogr->vertices[start].getVertexLoad());
  parr->procs[nextPe].totalLoad() += ogr->vertices[start].getVertexLoad();

  int i, nbr;
  // breadth first traversal
  while(!vertexq.empty()) {
    start = vertexq.front();
    vertexq.pop();

    for(i = 0; i < ogr->vertices[start].sendToList.size(); i++) {
      // look at all neighbors of a node in the queue and map them while
      // inserting them in the queue (so we can look at their neighbors next)
      nbr = ogr->vertices[start].sendToList[i].getNeighborId();
      if(ogr->vertices[nbr].getNewPe() == -1) {
	vertexq.push(nbr);

	if(parr->procs[nextPe].getTotalLoad() + ogr->vertices[nbr].getVertexLoad() > avgLoad) {
	  nextPe++;
	  avgLoad += (avgLoad - parr->procs[nextPe].getTotalLoad())/(numPes-nextPe);
	}
	ogr->vertices[nbr].setNewPe(nextPe);
	// CkPrintf("[%d] %d %d %g %g %g\n", nbr, ogr->vertices[nbr].getCurrentPe(), ogr->vertices[nbr].getNewPe(), parr->procs[nextPe].getTotalLoad(), ogr->vertices[start].getVertexLoad(), parr->procs[nextPe].getTotalLoad() + ogr->vertices[start].getVertexLoad());
	parr->procs[nextPe].totalLoad() += ogr->vertices[nbr].getVertexLoad();
      }
    } // end of for loop
  } // end of while loop

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);		// Send decisions back to LDStats
}
void TempAwareGreedyLB::work(LDStats* stats)
{
CkPrintf("----------------- in TempAwareGreedyLB -----------\n");
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph

  /** ============================= STRATEGY ================================ */
  parr->resetTotalLoad();

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In TempAwareGreedyLB strategy\n",CkMyPe());

  int vert;

  // max heap of objects
  std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater());
  // min heap of processors
  std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());

  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    // Pop the least loaded processor
    ProcInfo p = parr->procs.front();
    std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
    parr->procs.pop_back();

    // Increment the load of the least loaded processor by the load of the
    // 'heaviest' unmapped object
    p.setTotalLoad(p.getTotalLoad() + ogr->vertices[vert].getVertexLoad());
    ogr->vertices[vert].setNewPe(p.getProcId());

    // Insert the least loaded processor with load updated back into the heap
    parr->procs.push_back(p);
    std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats
}
Beispiel #4
0
void RefineSwapLB::work(LDStats* stats)
{
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph


  /** ============================= STRATEGY ================================ */

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In RefineSwapLB strategy\n",CkMyPe());

  int vert;
  double avg_load = parr->getAverageLoad();
  double threshold = avg_load * 0.01;
  double lower_bound_load = avg_load - threshold;
  double upper_bound_load = avg_load + threshold;
  cout <<"Average load " << avg_load << endl;
  
  std::vector<int> min_pe_heap;
  std::vector<int> max_pe_heap;

  std::vector<int>* pe_obj = new std::vector<int>[parr->procs.size()];


  // Create a datastructure to store the objects in a processor
  for (int i = 0; i < ogr->vertices.size(); i++) {
    pe_obj[ogr->vertices[i].getCurrentPe()].push_back(i);
//    CkPrintf("%d pe %d: %lf\n", i, ogr->vertices[i].getCurrentPe(), ogr->vertices[i].getVertexLoad());
  }

  // Construct max heap of overloaded processors and min heap of underloaded
  // processors.
  for (int i = 0; i < parr->procs.size(); i++) {
    //CkPrintf("%d : %lf\n", i, parr->procs[i].getTotalLoad());
    if (parr->procs[i].getTotalLoad() > upper_bound_load) {
      max_pe_heap.push_back(i);
    } else if (parr->procs[i].getTotalLoad() < lower_bound_load) {
      min_pe_heap.push_back(i);
    }
  }

  std::make_heap(max_pe_heap.begin(), max_pe_heap.end(), ProcLoadGreaterIndex(parr));

  while (max_pe_heap.size() != 0 && min_pe_heap.size() != 0) {
    int p_index = getMax(parr, max_pe_heap);
    ProcInfo &pinfo = parr->procs[p_index];

    bool success = refine(parr, ogr, max_pe_heap, min_pe_heap, pe_obj, p_index, avg_load, threshold);
    

    if (!success) {
      // Swap with something. 

      if (!refineSwap(parr, ogr, max_pe_heap, min_pe_heap, pe_obj, p_index, avg_load,
            threshold)) {
        max_pe_heap.push_back(p_index);
        std::push_heap(max_pe_heap.begin(), max_pe_heap.end(),
            ProcLoadGreaterIndex(parr));
        break;
      }
    }
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats
  delete[] pe_obj;
  delete parr;
  delete ogr;
}
Beispiel #5
0
void TreeMatchLB::work(BaseLB::LDStats* stats)
{
  /** ========================= 1st Do Load Balancing =======================*/

  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph

  /** ============================= STRATEGY ================================ */
  parr->resetTotalLoad();

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In GreedyLB strategy\n",CkMyPe());

  int vert;

  // max heap of objects
  std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater());
  // min heap of processors
  std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());

  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    // Pop the least loaded processor
    ProcInfo p = parr->procs.front();
    std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
    parr->procs.pop_back();

    // Increment the load of the least loaded processor by the load of the
    // 'heaviest' unmapped object
    p.totalLoad() += ogr->vertices[vert].getVertexLoad();
    ogr->vertices[vert].setNewPe(p.getProcId());

    // Insert the least loaded processor with load updated back into the heap
    parr->procs.push_back(p);
    std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats


  /** ====================== 2nd do Topology aware mapping ====================*/



  int nb_procs;
  double **comm_mat;
  int i;
  int *object_mapping, *permutation;

  
  /* get number of processors and teh greedy load balancing*/
  nb_procs = stats->nprocs();
  object_mapping=stats->to_proc.getVec();
  
    
  stats->makeCommHash();
  // allocate communication matrix
  comm_mat=(double**)malloc(sizeof(double*)*nb_procs);
  for(i=0;i<nb_procs;i++){
    comm_mat[i]=(double*)calloc(nb_procs,sizeof(double));
  }
  
  /* Build the communicartion matrix*/
  for(i=0;i<stats->n_comm;i++){
    LDCommData &commData = stats->commData[i];
    if((!commData.from_proc())&&(commData.recv_type()==LD_OBJ_MSG)){
      /* object_mapping[i] is the processors of object i*/
      int from = object_mapping[stats->getHash(commData.sender)];
      int to = object_mapping[stats->getHash(commData.receiver.get_destObj())];
      if(from!=to){
	comm_mat[from][to]+=commData.bytes;
	comm_mat[to][from]+=commData.bytes;
      }
    }
  }
  
  /* build the topology of the hardware (abe machine here)*/   
  tm_topology_t *topology=build_abe_topology(nb_procs);
  display_topology(topology);
  /* compute the affinity tree */
  tree_t *comm_tree=build_tree_from_topology(topology,comm_mat,nb_procs,NULL,NULL);
  
  /* Compute the processor permutation*/
  permutation=(int*)malloc(sizeof(int)*nb_procs);
  map_topology_simple(topology,comm_tree,permutation,NULL);


  /* 
     Apply this perutation to all objects
     Side effect: object_mapping points to the stats->to_proc.getVec() 
     So, these lines change also stats->to_proc.getVec()
  */
  for(i=0;i<nb_procs;i++)
    object_mapping[i]=permutation[object_mapping[i]];

  // free communication matrix;
  for(i=0;i<nb_procs;i++){
      free(comm_mat[i]);
  }
  free(comm_mat);
  free_topology(topology);
}