예제 #1
0
void RecBipartLB::work(LDStats *stats) {
  vector<Vertex *> ptrvector;
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);	// Processor Array
  ObjGraph *ogr = new ObjGraph(stats);		// Object Graph


  /** ============================= STRATEGY ================================ */
  level=0;
  peno=0;
  TOTALLOAD=0;
  numparts=CkNumPes();
  parray=parr;

  double avgLoad = parr->getAverageLoad();
  int numPes = parr->procs.size();

  parr->resetTotalLoad();
  for(int i=0;i<ogr->vertices.size();i++)
  {
    Vertex_helper *helper = new Vertex_helper();
    vhelpers.push_back(helper);
    ptrvector.push_back((Vertex *)&(ogr->vertices[i]));

  }

  RecursiveBiPart(ogr,ptrvector,1,numparts);

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);		// Send decisions back to LDStats
}
예제 #2
0
void GraphBFTLB::work(LDStats *stats) {
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);	// Processor Array
  ObjGraph *ogr = new ObjGraph(stats);		// Object Graph

  /** ============================= STRATEGY ================================ */
  double avgLoad = parr->getAverageLoad();
  int numPes = parr->procs.size();

  // CkPrintf("Average Load %g\n\n", avgLoad);
  // for(int i=0; i<numPes; i++)
  //  CkPrintf("PE [%d] %g %g\n", i, parr->procs[i].getTotalLoad(), parr->procs[i].getOverhead());
  parr->resetTotalLoad();

  int start = 0, nextPe = 0;
  std::queue<int> vertexq;

  // start at vertex with id 0
  vertexq.push(start);
  if(parr->procs[nextPe].getTotalLoad() + ogr->vertices[start].getVertexLoad() > avgLoad) {
    nextPe++;
    avgLoad += (avgLoad - parr->procs[nextPe].getTotalLoad())/(numPes-nextPe);
  }
  ogr->vertices[start].setNewPe(nextPe);
  // CkPrintf("[%d] %d %d %g %g %g\n", start, ogr->vertices[start].getCurrentPe(), ogr->vertices[start].getNewPe(), parr->procs[nextPe].getTotalLoad(), ogr->vertices[start].getVertexLoad(), parr->procs[nextPe].getTotalLoad() + ogr->vertices[start].getVertexLoad());
  parr->procs[nextPe].totalLoad() += ogr->vertices[start].getVertexLoad();

  int i, nbr;
  // breadth first traversal
  while(!vertexq.empty()) {
    start = vertexq.front();
    vertexq.pop();

    for(i = 0; i < ogr->vertices[start].sendToList.size(); i++) {
      // look at all neighbors of a node in the queue and map them while
      // inserting them in the queue (so we can look at their neighbors next)
      nbr = ogr->vertices[start].sendToList[i].getNeighborId();
      if(ogr->vertices[nbr].getNewPe() == -1) {
	vertexq.push(nbr);

	if(parr->procs[nextPe].getTotalLoad() + ogr->vertices[nbr].getVertexLoad() > avgLoad) {
	  nextPe++;
	  avgLoad += (avgLoad - parr->procs[nextPe].getTotalLoad())/(numPes-nextPe);
	}
	ogr->vertices[nbr].setNewPe(nextPe);
	// CkPrintf("[%d] %d %d %g %g %g\n", nbr, ogr->vertices[nbr].getCurrentPe(), ogr->vertices[nbr].getNewPe(), parr->procs[nextPe].getTotalLoad(), ogr->vertices[start].getVertexLoad(), parr->procs[nextPe].getTotalLoad() + ogr->vertices[start].getVertexLoad());
	parr->procs[nextPe].totalLoad() += ogr->vertices[nbr].getVertexLoad();
      }
    } // end of for loop
  } // end of while loop

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);		// Send decisions back to LDStats
}
예제 #3
0
void TempAwareGreedyLB::work(LDStats* stats)
{
CkPrintf("----------------- in TempAwareGreedyLB -----------\n");
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph

  /** ============================= STRATEGY ================================ */
  parr->resetTotalLoad();

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In TempAwareGreedyLB strategy\n",CkMyPe());

  int vert;

  // max heap of objects
  std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater());
  // min heap of processors
  std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());

  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    // Pop the least loaded processor
    ProcInfo p = parr->procs.front();
    std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
    parr->procs.pop_back();

    // Increment the load of the least loaded processor by the load of the
    // 'heaviest' unmapped object
    p.setTotalLoad(p.getTotalLoad() + ogr->vertices[vert].getVertexLoad());
    ogr->vertices[vert].setNewPe(p.getProcId());

    // Insert the least loaded processor with load updated back into the heap
    parr->procs.push_back(p);
    std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats
}
예제 #4
0
void ScotchRefineLB::work(LDStats *stats) {
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);
  ObjGraph *ogr = new ObjGraph(stats);
  int cost_array[10] = {64, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};

  /** ============================= STRATEGY ================================ */
  // convert ObjGraph to the Scotch graph
  SCOTCH_Num baseval = 0;			// starting index of vertices
  SCOTCH_Num vertnbr = ogr->vertices.size();	// number of vertices
  SCOTCH_Num edgenbr = 0;			// number of edges

  SCOTCH_Num *oldpemap = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * vertnbr);

  double maxLoad = 0.0;
  double minLoad = 0.0;
  if (vertnbr > 0) {
    minLoad = ogr->vertices[baseval].getVertexLoad();
  }

  long maxBytes = 1;
  int i, j, k, vert;
  

  /** remove duplicate edges from recvFrom */
  for(i = baseval; i < vertnbr; i++) {
    for(j = 0; j < ogr->vertices[i].sendToList.size(); j++) {
      vert = ogr->vertices[i].sendToList[j].getNeighborId();
      for(k = 0; k < ogr->vertices[i].recvFromList.size(); k++) {
        if(ogr->vertices[i].recvFromList[k].getNeighborId() == vert) {
          ogr->vertices[i].sendToList[j].setNumBytes(ogr->vertices[i].sendToList[j].getNumBytes() + ogr->vertices[i].recvFromList[k].getNumBytes());
          ogr->vertices[i].recvFromList.erase(ogr->vertices[i].recvFromList.begin() + k);
        }
      }
    }
  }

  /** the object load is normalized to an integer between 0 and 256 */
  for(i = baseval; i < vertnbr; i++) {
    if(ogr->vertices[i].getVertexLoad() > maxLoad)
      maxLoad = ogr->vertices[i].getVertexLoad();

    if (ogr->vertices[i].getVertexLoad() < minLoad) {
      minLoad = ogr->vertices[i].getVertexLoad();
    }
    edgenbr += ogr->vertices[i].sendToList.size() + ogr->vertices[i].recvFromList.size();
    oldpemap[i] = ogr->vertices[i].getCurrentPe();
  }

  for(i = baseval; i < vertnbr; i++) {
    for(j = 0; j < ogr->vertices[i].sendToList.size(); j++) {
      if (ogr->vertices[i].sendToList[j].getNumBytes() > maxBytes) {
        maxBytes = ogr->vertices[i].sendToList[j].getNumBytes();
      }
    }
    for(j = 0; j < ogr->vertices[i].recvFromList.size(); j++) {
      if (ogr->vertices[i].recvFromList[j].getNumBytes() > maxBytes) {
        maxBytes = ogr->vertices[i].recvFromList[j].getNumBytes();
      }
    }
  }

  /* adjacency list */
  SCOTCH_Num *verttab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (vertnbr+1));
  /* loads of vertices */
  SCOTCH_Num *velotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * vertnbr);
  /* id of the neighbors */
  SCOTCH_Num *edgetab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr);
  /* number of bytes exchanged */
  SCOTCH_Num *edlotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr);

  int edgeNum = 0;
  double ratio = 256.0/maxLoad;
  double byteRatio = 1024.0/maxBytes;
  
  for(i = baseval; i < vertnbr; i++) {
    verttab[i] = edgeNum;
    velotab[i] = (int)ceil(ogr->vertices[i].getVertexLoad() * ratio);
    for(j = 0; j < ogr->vertices[i].sendToList.size(); j++) {
      edgetab[edgeNum] = ogr->vertices[i].sendToList[j].getNeighborId();
      edlotab[edgeNum] = (int) ceil(ogr->vertices[i].sendToList[j].getNumBytes()
          * byteRatio);
      edgeNum++;
    }
    for(j = 0; j < ogr->vertices[i].recvFromList.size(); j++) {
      edgetab[edgeNum] = ogr->vertices[i].recvFromList[j].getNeighborId();
      edlotab[edgeNum] = (int)
          ceil(ogr->vertices[i].recvFromList[j].getNumBytes() * byteRatio);
      edgeNum++;
    }
  }
  verttab[i] = edgeNum;
  CkAssert(edgeNum == edgenbr);

  SCOTCH_Graph graph;		// Graph to partition
  SCOTCH_Strat strat;		// Strategy to achieve partitioning

  /* Initialize data structures */
  SCOTCH_graphInit (&graph);
  SCOTCH_stratInit (&strat);

  SCOTCH_graphBuild (&graph, baseval, vertnbr, verttab, NULL, velotab, NULL, edgenbr, edgetab, edlotab); 
  SCOTCH_graphCheck (&graph);

  double migration_cost = 1024.0;

    if (step() == 0) {
      SCOTCH_stratGraphMapBuild (&strat, SCOTCH_STRATBALANCE, parr->procs.size (), 0.01);
    } else {
      SCOTCH_stratGraphMapBuild (&strat, SCOTCH_STRATBALANCE | SCOTCH_STRATREMAP, parr->procs.size (), 0.01);
    }

  SCOTCH_Num *pemap = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * vertnbr);

  // Takes as input the graph, arch graph, strategy, migration cost in
  // double, old mapping and new mapping

  if (step() == 0) {
    SCOTCH_graphPart(&graph, parr->procs.size(), &strat, pemap);
  } else {
    SCOTCH_graphRepart(&graph, parr->procs.size(), oldpemap, migration_cost, NULL, &strat, pemap);
  }
  
  SCOTCH_graphExit (&graph);
  SCOTCH_stratExit (&strat);

  free(verttab);
  free(velotab);
  free(edgetab);
  free(edlotab);

  for(i = baseval; i < vertnbr; i++) {
    if(pemap[i] != ogr->vertices[i].getCurrentPe())
      ogr->vertices[i].setNewPe(pemap[i]);
  }

  free(pemap);
  free(oldpemap);
  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);
  delete parr;
  delete ogr;
}
예제 #5
0
void RefineSwapLB::work(LDStats* stats)
{
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph


  /** ============================= STRATEGY ================================ */

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In RefineSwapLB strategy\n",CkMyPe());

  int vert;
  double avg_load = parr->getAverageLoad();
  double threshold = avg_load * 0.01;
  double lower_bound_load = avg_load - threshold;
  double upper_bound_load = avg_load + threshold;
  cout <<"Average load " << avg_load << endl;
  
  std::vector<int> min_pe_heap;
  std::vector<int> max_pe_heap;

  std::vector<int>* pe_obj = new std::vector<int>[parr->procs.size()];


  // Create a datastructure to store the objects in a processor
  for (int i = 0; i < ogr->vertices.size(); i++) {
    pe_obj[ogr->vertices[i].getCurrentPe()].push_back(i);
//    CkPrintf("%d pe %d: %lf\n", i, ogr->vertices[i].getCurrentPe(), ogr->vertices[i].getVertexLoad());
  }

  // Construct max heap of overloaded processors and min heap of underloaded
  // processors.
  for (int i = 0; i < parr->procs.size(); i++) {
    //CkPrintf("%d : %lf\n", i, parr->procs[i].getTotalLoad());
    if (parr->procs[i].getTotalLoad() > upper_bound_load) {
      max_pe_heap.push_back(i);
    } else if (parr->procs[i].getTotalLoad() < lower_bound_load) {
      min_pe_heap.push_back(i);
    }
  }

  std::make_heap(max_pe_heap.begin(), max_pe_heap.end(), ProcLoadGreaterIndex(parr));

  while (max_pe_heap.size() != 0 && min_pe_heap.size() != 0) {
    int p_index = getMax(parr, max_pe_heap);
    ProcInfo &pinfo = parr->procs[p_index];

    bool success = refine(parr, ogr, max_pe_heap, min_pe_heap, pe_obj, p_index, avg_load, threshold);
    

    if (!success) {
      // Swap with something. 

      if (!refineSwap(parr, ogr, max_pe_heap, min_pe_heap, pe_obj, p_index, avg_load,
            threshold)) {
        max_pe_heap.push_back(p_index);
        std::push_heap(max_pe_heap.begin(), max_pe_heap.end(),
            ProcLoadGreaterIndex(parr));
        break;
      }
    }
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats
  delete[] pe_obj;
  delete parr;
  delete ogr;
}
예제 #6
0
void MetisLB::work(LDStats* stats)
{
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);
  ObjGraph *ogr = new ObjGraph(stats);

  /** ============================= STRATEGY ================================ */
  if (_lb_args.debug() >= 2) {
    CkPrintf("[%d] In MetisLB Strategy...\n", CkMyPe());
  }

  // convert ObjGraph to the adjacency structure
  int numVertices = ogr->vertices.size();	// number of vertices
  int numEdges = 0;				// number of edges

  double maxLoad = 0.0;
  int i, j, k, vert;

  /** remove duplicate edges from recvFrom */
  for(i = 0; i < numVertices; i++) {
    for(j = 0; j < ogr->vertices[i].sendToList.size(); j++) {
      vert = ogr->vertices[i].sendToList[j].getNeighborId();
      for(k = 0; k < ogr->vertices[i].recvFromList.size(); k++) {
	if(ogr->vertices[i].recvFromList[k].getNeighborId() == vert) {
	  ogr->vertices[i].sendToList[j].setNumBytes(ogr->vertices[i].sendToList[j].getNumBytes() + ogr->vertices[i].recvFromList[k].getNumBytes());
	  ogr->vertices[i].recvFromList.erase(ogr->vertices[i].recvFromList.begin() + k);
        }
      }
    }
  }

  /** the object load is normalized to an integer between 0 and 256 */
  for(i = 0; i < numVertices; i++) {
    if(ogr->vertices[i].getVertexLoad() > maxLoad)
      maxLoad = ogr->vertices[i].getVertexLoad();
    numEdges = numEdges + ogr->vertices[i].sendToList.size() + ogr->vertices[i].recvFromList.size();
  }

  /* adjacency list */
  idx_t *xadj = new idx_t[numVertices + 1];
  /* id of the neighbors */
  idx_t *adjncy = new idx_t[numEdges];
  /* weights of the vertices */
  idx_t *vwgt = new idx_t[numVertices];
  /* weights of the edges */
  idx_t *adjwgt = new idx_t[numEdges];

  int edgeNum = 0;
  double ratio = 256.0/maxLoad;

  for(i = 0; i < numVertices; i++) {
    xadj[i] = edgeNum;
    vwgt[i] = (int)ceil(ogr->vertices[i].getVertexLoad() * ratio);
    for(j = 0; j < ogr->vertices[i].sendToList.size(); j++) {
      adjncy[edgeNum] = ogr->vertices[i].sendToList[j].getNeighborId();
      adjwgt[edgeNum] = ogr->vertices[i].sendToList[j].getNumBytes();
      edgeNum++;
    }
    for(j = 0; j < ogr->vertices[i].recvFromList.size(); j++) {
      adjncy[edgeNum] = ogr->vertices[i].recvFromList[j].getNeighborId();
      adjwgt[edgeNum] = ogr->vertices[i].recvFromList[j].getNumBytes();
      edgeNum++;
    }
  }
  xadj[i] = edgeNum;
  CkAssert(edgeNum == numEdges);

  idx_t edgecut;		// number of edges cut by the partitioning
  idx_t *pemap;

  idx_t options[METIS_NOPTIONS];
  METIS_SetDefaultOptions(options);
  //options[METIS_OPTION_PTYPE] = METIS_PTYPE_RB;
  // C style numbering
  options[METIS_OPTION_NUMBERING] = 0;

  // number of constrains
  idx_t ncon = 1;
  // number of partitions
  idx_t numPes = parr->procs.size();
  real_t ubvec[ncon];
  // allow 10% imbalance
  ubvec[0] = 1.1;

  // mapping of objs to partitions
  pemap = new idx_t[numVertices];

  // Specifies size of vertices for computing the total communication volume
  idx_t *vsize = NULL;
  // This array of size nparts specifies the desired weight for each partition
  // and setting it to NULL indicates graph should be equally divided among
  // partitions
  real_t *tpwgts = NULL;

  int option = 0;
  if (WEIGHTED == option) {
    // set up the different weights between 0 and 1
    tpwgts = new real_t[numPes];
    for (i = 0; i < numPes; i++) {
      tpwgts[i] = 1.0/(real_t)numPes;
    }
  } else if (MULTI_CONSTRAINT == option) {
    CkAbort("Multiple constraints not implemented.\n");
  }

  // numVertices: num vertices in the graph; ncon: num balancing constrains
  // xadj, adjncy: of size n+1 and adjncy of 2m, adjncy[xadj[i]] through and
  // including adjncy[xadj[i+1]-1];
  // vwgt: weight of the vertices; vsize: amt of data that needs to be sent
  // for ith vertex is vsize[i]
  // adjwght: the weight of edges; numPes: total parts
  // tpwghts: target partition weight, can pass NULL to equally divide
  // ubvec: of size ncon to indicate allowed load imbalance tolerance (> 1.0)
  // options: array of options; edgecut: stores the edgecut; pemap: mapping
  METIS_PartGraphRecursive(&numVertices, &ncon,  xadj, adjncy, vwgt, vsize, adjwgt,
      &numPes, tpwgts, ubvec, options, &edgecut, pemap);

  delete[] xadj;
  delete[] adjncy;
  delete[] vwgt;
  delete[] adjwgt;
  delete[] vsize;
  delete[] tpwgts;

  if (_lb_args.debug() >= 1) {
   CkPrintf("[%d] MetisLB done! \n", CkMyPe());
  }

  for(i = 0; i < numVertices; i++) {
    if(pemap[i] != ogr->vertices[i].getCurrentPe())
      ogr->vertices[i].setNewPe(pemap[i]);
  }

  delete[] pemap;

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);
  delete parr;
  delete ogr;
}
예제 #7
0
void TreeMatchLB::work(BaseLB::LDStats* stats)
{
  /** ========================= 1st Do Load Balancing =======================*/

  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph

  /** ============================= STRATEGY ================================ */
  parr->resetTotalLoad();

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In GreedyLB strategy\n",CkMyPe());

  int vert;

  // max heap of objects
  std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater());
  // min heap of processors
  std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());

  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    // Pop the least loaded processor
    ProcInfo p = parr->procs.front();
    std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
    parr->procs.pop_back();

    // Increment the load of the least loaded processor by the load of the
    // 'heaviest' unmapped object
    p.totalLoad() += ogr->vertices[vert].getVertexLoad();
    ogr->vertices[vert].setNewPe(p.getProcId());

    // Insert the least loaded processor with load updated back into the heap
    parr->procs.push_back(p);
    std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats


  /** ====================== 2nd do Topology aware mapping ====================*/



  int nb_procs;
  double **comm_mat;
  int i;
  int *object_mapping, *permutation;

  
  /* get number of processors and teh greedy load balancing*/
  nb_procs = stats->nprocs();
  object_mapping=stats->to_proc.getVec();
  
    
  stats->makeCommHash();
  // allocate communication matrix
  comm_mat=(double**)malloc(sizeof(double*)*nb_procs);
  for(i=0;i<nb_procs;i++){
    comm_mat[i]=(double*)calloc(nb_procs,sizeof(double));
  }
  
  /* Build the communicartion matrix*/
  for(i=0;i<stats->n_comm;i++){
    LDCommData &commData = stats->commData[i];
    if((!commData.from_proc())&&(commData.recv_type()==LD_OBJ_MSG)){
      /* object_mapping[i] is the processors of object i*/
      int from = object_mapping[stats->getHash(commData.sender)];
      int to = object_mapping[stats->getHash(commData.receiver.get_destObj())];
      if(from!=to){
	comm_mat[from][to]+=commData.bytes;
	comm_mat[to][from]+=commData.bytes;
      }
    }
  }
  
  /* build the topology of the hardware (abe machine here)*/   
  tm_topology_t *topology=build_abe_topology(nb_procs);
  display_topology(topology);
  /* compute the affinity tree */
  tree_t *comm_tree=build_tree_from_topology(topology,comm_mat,nb_procs,NULL,NULL);
  
  /* Compute the processor permutation*/
  permutation=(int*)malloc(sizeof(int)*nb_procs);
  map_topology_simple(topology,comm_tree,permutation,NULL);


  /* 
     Apply this perutation to all objects
     Side effect: object_mapping points to the stats->to_proc.getVec() 
     So, these lines change also stats->to_proc.getVec()
  */
  for(i=0;i<nb_procs;i++)
    object_mapping[i]=permutation[object_mapping[i]];

  // free communication matrix;
  for(i=0;i<nb_procs;i++){
      free(comm_mat[i]);
  }
  free(comm_mat);
  free_topology(topology);
}