Ejemplo n.º 1
0
/* d: size of comm_speed */
void TreeMatchMapping(int nb_obj, int nb_proc, double **comm_mat,  double *obj_weight, double * comm_speed, int d, int *sol){
  tree_t *comm_tree;
  tm_topology_t *topology;
  double duration;

  int i;
  TIC;
  
  for(i=0;i<nb_obj;i++){
    sol[i]=i;
    //    printf("%f ",obj_weight[i]);
  }
  //printf("\n");
  

  //  return;

  topology=(tm_topology_t*)malloc(sizeof(tm_topology_t));
  topology->arity=(int*)malloc(sizeof(int)*MAX_LEVELS);
  topology->arity[0]=nb_proc;
  topology->nb_levels=decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity);
  printf("Topology nb levels=%d\n",topology->nb_levels);
  build_synthetic_proc_id(topology);

  if(topology->nb_levels>d)
    update_comm_speed(&comm_speed,d,topology->nb_levels);

  //exit(-1);
  //topology_to_arch(topology);

  //display_tab(arch,hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PROC));
  //display_tab(arch,96);
  //exit(-1);
  //int nb_core=topo_nb_proc(topology,1000);

  //display_tab(comm_mat,N);

  TIC;
  comm_tree=build_tree_from_topology(topology,comm_mat,nb_obj,obj_weight,comm_speed);
  printf("Tree buildinbg time=%f\n",TOC);
  TIC;
  map_topology(topology,comm_tree,nb_proc,1,sol,NULL);
  printf("Topology mapping time=%f\n",TOC);


  if(topology->nb_levels>d)
    free(comm_speed);

  free_topology(topology);
  free_tree(comm_tree);

  duration=TOC;
  printf("-------------- Mapping done in %.4fs!\n",duration);
}
Ejemplo n.º 2
0
void TreeMatchLB::work(BaseLB::LDStats* stats)
{
  /** ========================= 1st Do Load Balancing =======================*/

  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph

  /** ============================= STRATEGY ================================ */
  parr->resetTotalLoad();

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In GreedyLB strategy\n",CkMyPe());

  int vert;

  // max heap of objects
  std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater());
  // min heap of processors
  std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());

  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    // Pop the least loaded processor
    ProcInfo p = parr->procs.front();
    std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
    parr->procs.pop_back();

    // Increment the load of the least loaded processor by the load of the
    // 'heaviest' unmapped object
    p.totalLoad() += ogr->vertices[vert].getVertexLoad();
    ogr->vertices[vert].setNewPe(p.getProcId());

    // Insert the least loaded processor with load updated back into the heap
    parr->procs.push_back(p);
    std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats


  /** ====================== 2nd do Topology aware mapping ====================*/



  int nb_procs;
  double **comm_mat;
  int i;
  int *object_mapping, *permutation;

  
  /* get number of processors and teh greedy load balancing*/
  nb_procs = stats->nprocs();
  object_mapping=stats->to_proc.getVec();
  
    
  stats->makeCommHash();
  // allocate communication matrix
  comm_mat=(double**)malloc(sizeof(double*)*nb_procs);
  for(i=0;i<nb_procs;i++){
    comm_mat[i]=(double*)calloc(nb_procs,sizeof(double));
  }
  
  /* Build the communicartion matrix*/
  for(i=0;i<stats->n_comm;i++){
    LDCommData &commData = stats->commData[i];
    if((!commData.from_proc())&&(commData.recv_type()==LD_OBJ_MSG)){
      /* object_mapping[i] is the processors of object i*/
      int from = object_mapping[stats->getHash(commData.sender)];
      int to = object_mapping[stats->getHash(commData.receiver.get_destObj())];
      if(from!=to){
	comm_mat[from][to]+=commData.bytes;
	comm_mat[to][from]+=commData.bytes;
      }
    }
  }
  
  /* build the topology of the hardware (abe machine here)*/   
  tm_topology_t *topology=build_abe_topology(nb_procs);
  display_topology(topology);
  /* compute the affinity tree */
  tree_t *comm_tree=build_tree_from_topology(topology,comm_mat,nb_procs,NULL,NULL);
  
  /* Compute the processor permutation*/
  permutation=(int*)malloc(sizeof(int)*nb_procs);
  map_topology_simple(topology,comm_tree,permutation,NULL);


  /* 
     Apply this perutation to all objects
     Side effect: object_mapping points to the stats->to_proc.getVec() 
     So, these lines change also stats->to_proc.getVec()
  */
  for(i=0;i<nb_procs;i++)
    object_mapping[i]=permutation[object_mapping[i]];

  // free communication matrix;
  for(i=0;i<nb_procs;i++){
      free(comm_mat[i]);
  }
  free(comm_mat);
  free_topology(topology);
}