C++ (Cpp) ProcLoadGreater 예제들

예제 #1

0

파일 보기

파일: TempAwareCommLB.C 프로젝트: quinoacomputing/quinoa

inline int popFromProcHeap(std::vector<int> & parr_above_avg, ProcArray *parr) {
  int p_index = parr_above_avg.front();
  std::pop_heap(parr_above_avg.begin(), parr_above_avg.end(),
      ProcLoadGreater(parr));
  parr_above_avg.pop_back();
  return p_index;
}

예제 #2

0

파일 보기

파일: TempAwareGreedyLB.C 프로젝트: gitter-badger/quinoa

void TempAwareGreedyLB::work(LDStats* stats)
{
CkPrintf("----------------- in TempAwareGreedyLB -----------\n");
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph

  /** ============================= STRATEGY ================================ */
  parr->resetTotalLoad();

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In TempAwareGreedyLB strategy\n",CkMyPe());

  int vert;

  // max heap of objects
  std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater());
  // min heap of processors
  std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());

  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    // Pop the least loaded processor
    ProcInfo p = parr->procs.front();
    std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
    parr->procs.pop_back();

    // Increment the load of the least loaded processor by the load of the
    // 'heaviest' unmapped object
    p.setTotalLoad(p.getTotalLoad() + ogr->vertices[vert].getVertexLoad());
    ogr->vertices[vert].setNewPe(p.getProcId());

    // Insert the least loaded processor with load updated back into the heap
    parr->procs.push_back(p);
    std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats
}

예제 #3

0

파일 보기

파일: TempAwareCommLB.C 프로젝트: quinoacomputing/quinoa

inline void updateLoadInfo(int p_index, int possible_pe, double upper_threshold_temp, double lower_threshold_temp,
                           std::vector<int>& parr_above_avg, std::vector<int>& parr_below_avg,
                           std::vector<bool> &proc_load_info, ProcArray *parr) {

  ProcInfo& p = parr->procs[p_index];
  ProcInfo& possible_pe_procinfo = parr->procs[possible_pe];

  // If the updated load is still greater than the average by the
  // threshold value, then push it back to the max heap
  if (p.getTotalLoad() > upper_threshold_temp) {
    parr_above_avg.push_back(p_index);
    std::push_heap(parr_above_avg.begin(), parr_above_avg.end(),
        ProcLoadGreater(parr));
    //CkPrintf("\t Pushing pe : %d to max heap\n", p.getProcId());
  } else if (p.getTotalLoad() < lower_threshold_temp) {
    parr_below_avg.push_back(p_index);
    proc_load_info[p_index] = true;
    //CkPrintf("\t Adding pe : %d to less loaded\n", p.getProcId());
  }

  // If the newly assigned processor's load is greater than the average
  // by the threshold value, then push it into the max heap.
  if (possible_pe_procinfo.getTotalLoad() > upper_threshold_temp) {
    // TODO: It should be the index in procarray :(
    parr_above_avg.push_back(possible_pe);
    std::push_heap(parr_above_avg.begin(), parr_above_avg.end(),
        ProcLoadGreater(parr));
    removeFromArray(possible_pe, parr_below_avg);
    proc_load_info[possible_pe] = false;
    //CkPrintf("\t Pusing pe : %d to max heap\n", possible_pe);
  } else if (possible_pe_procinfo.getTotalLoad() < lower_threshold_temp) {
  } else {
    removeFromArray(possible_pe, parr_below_avg);
    proc_load_info[possible_pe] = false;
    //CkPrintf("\t Removing from lower list pe : %d\n", possible_pe);
  }

}

예제 #4

0

파일 보기

파일: TreeMatchLB.C 프로젝트: quinoacomputing/quinoa

void TreeMatchLB::work(BaseLB::LDStats* stats)
{
  /** ========================= 1st Do Load Balancing =======================*/

  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph

  /** ============================= STRATEGY ================================ */
  parr->resetTotalLoad();

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In GreedyLB strategy\n",CkMyPe());

  int vert;

  // max heap of objects
  std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater());
  // min heap of processors
  std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());

  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    // Pop the least loaded processor
    ProcInfo p = parr->procs.front();
    std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
    parr->procs.pop_back();

    // Increment the load of the least loaded processor by the load of the
    // 'heaviest' unmapped object
    p.totalLoad() += ogr->vertices[vert].getVertexLoad();
    ogr->vertices[vert].setNewPe(p.getProcId());

    // Insert the least loaded processor with load updated back into the heap
    parr->procs.push_back(p);
    std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats


  /** ====================== 2nd do Topology aware mapping ====================*/



  int nb_procs;
  double **comm_mat;
  int i;
  int *object_mapping, *permutation;

  
  /* get number of processors and teh greedy load balancing*/
  nb_procs = stats->nprocs();
  object_mapping=stats->to_proc.getVec();
  
    
  stats->makeCommHash();
  // allocate communication matrix
  comm_mat=(double**)malloc(sizeof(double*)*nb_procs);
  for(i=0;i<nb_procs;i++){
    comm_mat[i]=(double*)calloc(nb_procs,sizeof(double));
  }
  
  /* Build the communicartion matrix*/
  for(i=0;i<stats->n_comm;i++){
    LDCommData &commData = stats->commData[i];
    if((!commData.from_proc())&&(commData.recv_type()==LD_OBJ_MSG)){
      /* object_mapping[i] is the processors of object i*/
      int from = object_mapping[stats->getHash(commData.sender)];
      int to = object_mapping[stats->getHash(commData.receiver.get_destObj())];
      if(from!=to){
	comm_mat[from][to]+=commData.bytes;
	comm_mat[to][from]+=commData.bytes;
      }
    }
  }
  
  /* build the topology of the hardware (abe machine here)*/   
  tm_topology_t *topology=build_abe_topology(nb_procs);
  display_topology(topology);
  /* compute the affinity tree */
  tree_t *comm_tree=build_tree_from_topology(topology,comm_mat,nb_procs,NULL,NULL);
  
  /* Compute the processor permutation*/
  permutation=(int*)malloc(sizeof(int)*nb_procs);
  map_topology_simple(topology,comm_tree,permutation,NULL);


  /* 
     Apply this perutation to all objects
     Side effect: object_mapping points to the stats->to_proc.getVec() 
     So, these lines change also stats->to_proc.getVec()
  */
  for(i=0;i<nb_procs;i++)
    object_mapping[i]=permutation[object_mapping[i]];

  // free communication matrix;
  for(i=0;i<nb_procs;i++){
      free(comm_mat[i]);
  }
  free(comm_mat);
  free_topology(topology);
}

예제 #5

0

파일 보기

파일: TempAwareCommLB.C 프로젝트: quinoacomputing/quinoa

void TempAwareCommLB::work(LDStats* stats) {
  /** ========================== INITIALIZATION ============================= */
#ifdef TEMP_LDB
//////////////////////////////////////////////////////
// initialize structures for TempLBs
	initStructs(stats);
	tempControl();
	populateEffectiveFreq(stats->nprocs());
//////////////////////////////////////////////////////
	CkPrintf(" ================== in TempAwareCommLB::work() ===========\n");
  ProcArrayTemp *parr = new ProcArrayTemp(stats,procFreq,procFreqNew);       // Processor Array
	parr->convertToInsts(stats);
  ObjGraphTemp *ogr = new ObjGraphTemp(stats,procFreq,procFreqNew);          // Object Graph
	ogr->convertToInsts(stats);
  double avgload = parr->getAverageLoad();      // Average load of processors

  // Sets to false if it is overloaded, else to true
  vector<bool> proc_load_info(parr->procs.size(), false);

  // Create an array of vectors for each processor mapping to the objects in
  // that processor
  std::vector<int>* parr_objs = new std::vector<int>[parr->procs.size()];

  upper_threshold_temp = avgload + (avgload * THRESHOLD);
  //lower_threshold = avgload - (avgload * THRESHOLD * THRESHOLD);
  lower_threshold_temp = avgload;

  int less_loaded_counter = 0;

  srand(time(NULL));
  /** ============================= STRATEGY ================================ */

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In TempAwareCommLB strategy\n",CkMyPe());

  CkPrintf("Average load %E\n", avgload);

  int vert, i, j;
  int curr_pe;

  // Iterate over all the chares and construct the peid, vector<chareid> array
  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    curr_pe = ogr->vertices[vert].getCurrentPe();
    parr_objs[curr_pe].push_back(vert);
    ogr->vertices[vert].setNewPe(curr_pe);
  }

  std::vector<int> parr_above_avg;
  std::vector<int> parr_below_avg;

  double pe_load;  

  // Insert into parr_above_avg if the processor fits under the criteria of
  // overloaded processor.
  // Insert the processor id into parr_below_avg if the processor is underloaded 
  for (vert = 0; vert < parr->procs.size(); vert++) {
    pe_load = parr->procs[vert].getTotalLoad();
    if (pe_load > upper_threshold_temp) {
      // Pushing ProcInfo into this list
      parr_above_avg.push_back(vert);
    } else if (pe_load < lower_threshold_temp) {
      parr_below_avg.push_back(parr->procs[vert].getProcId());
      proc_load_info[parr->procs[vert].getProcId()] = true;
      less_loaded_counter++;
    }
  }

  std::make_heap(parr_above_avg.begin(), parr_above_avg.end(),
      ProcLoadGreater(parr));

  int random;
  int randomly_obj_id;
  bool obj_allocated;
  int num_tries;
  // Allow as many swaps as there are chares
  int total_swaps = ogr->vertices.size() * SWAP_MULTIPLIER;
  int possible_pe;
  double obj_load;

  // Keep on loadbalancing until the number of above avg processors is 0
  while (parr_above_avg.size() != 0 && total_swaps > 0 && parr_below_avg.size() != 0) {
    // CkPrintf("Above avg : %d Below avg : %d Total swaps: %d\n", parr_above_avg.size(),
    //    parr_below_avg.size(), total_swaps);
    obj_allocated = false;
    num_tries = 0;

    // Pop the heaviest processor
    int p_index = popFromProcHeap(parr_above_avg, parr);
    ProcInfo& p = parr->procs[p_index];

    while (!obj_allocated && num_tries < parr_objs[p.getProcId()].size()) {

      // It might so happen that due to overhead load, it might not have any
      // more objects in its list
      if (parr_objs[p.getProcId()].size() == 0) {
        // CkPrintf("No obj left to be allocated\n");
        obj_allocated = true;
        break;
      }

      int randd = rand();
      random = randd % parr_objs[p.getProcId()].size();
      randomly_obj_id = parr_objs[p.getProcId()][random];
//need to update the load below .. account for freqs
      obj_load = ogr->vertices[randomly_obj_id].getVertexLoad();

      // CkPrintf("Heavy %d: Parr obj size : %d random : %d random obj id : %d\n", p_index,
      //     parr_objs[p.getProcId()].size(), randd, randomly_obj_id);
      std::vector<int> possible_pes;
      getPossiblePes(possible_pes, randomly_obj_id, ogr, parr);
      for (i = 0; i < possible_pes.size(); i++) {

        // If the heaviest communicating processor is there in the list, then
        // assign it to that.
        possible_pe = possible_pes[i];

        if ((parr->procs[possible_pe].getTotalLoad() + obj_load) < upper_threshold_temp) {
         // CkPrintf("**  Transfered %d(Load %lf) from %d:%d(Load %lf) to %d:%d(Load %lf)\n",
         //     randomly_obj_id, obj_load, CkNodeOf(p.getProcId()), p.getProcId(), p.getTotalLoad(),
         //     CkNodeOf(possible_pe), possible_pe,
         //     parr->procs[possible_pe].getTotalLoad());

          handleTransfer(randomly_obj_id, p, possible_pe, parr_objs, ogr, parr);
          obj_allocated = true;
          total_swaps--;
          updateLoadInfo(p_index, possible_pe, upper_threshold_temp, lower_threshold_temp,
              parr_above_avg, parr_below_avg, proc_load_info, parr);

          break;
        }
      }

      // Since there is no processor in the least loaded list with which this
      // chare communicates, pick a random least loaded processor.
      if (!obj_allocated) {
        //CkPrintf(":( Could not transfer to the nearest communicating ones\n");
        for (int x = 0; x < parr_below_avg.size(); x++) {
          int random_pe = parr_below_avg[x];
          if ((parr->procs[random_pe].getTotalLoad() + obj_load) < upper_threshold_temp) {
            obj_allocated = true;
            total_swaps--;
            handleTransfer(randomly_obj_id, p, random_pe, parr_objs, ogr, parr);
            updateLoadInfo(p_index, random_pe, upper_threshold_temp, lower_threshold_temp,
                           parr_above_avg, parr_below_avg, proc_load_info, parr);
            break;
          }
          num_tries++;
        }
      }
    }

    if (!obj_allocated) {
      //CkPrintf("!!!! Could not handle the heavy proc %d so giving up\n", p_index);
      // parr_above_avg.push_back(p_index);
      // std::push_heap(parr_above_avg.begin(), parr_above_avg.end(),
      //     ProcLoadGreater(parr));
    }
  }

  //CkPrintf("CommAwareRefine> After lb max load: %lf avg load: %lf\n", max_load, avg_load/parr->procs.size());

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats
  delete parr;
  delete ogr;
  delete[] parr_objs;
#else
	CmiAbort("TempLBs are not supported without the TEMP_LDB flag\n"); 
#endif
}