/* d: size of comm_speed */ void TreeMatchMapping(int nb_obj, int nb_proc, double **comm_mat, double *obj_weight, double * comm_speed, int d, int *sol){ tree_t *comm_tree; tm_topology_t *topology; double duration; int i; TIC; for(i=0;i<nb_obj;i++){ sol[i]=i; // printf("%f ",obj_weight[i]); } //printf("\n"); // return; topology=(tm_topology_t*)malloc(sizeof(tm_topology_t)); topology->arity=(int*)malloc(sizeof(int)*MAX_LEVELS); topology->arity[0]=nb_proc; topology->nb_levels=decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity); printf("Topology nb levels=%d\n",topology->nb_levels); build_synthetic_proc_id(topology); if(topology->nb_levels>d) update_comm_speed(&comm_speed,d,topology->nb_levels); //exit(-1); //topology_to_arch(topology); //display_tab(arch,hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PROC)); //display_tab(arch,96); //exit(-1); //int nb_core=topo_nb_proc(topology,1000); //display_tab(comm_mat,N); TIC; comm_tree=build_tree_from_topology(topology,comm_mat,nb_obj,obj_weight,comm_speed); printf("Tree buildinbg time=%f\n",TOC); TIC; map_topology(topology,comm_tree,nb_proc,1,sol,NULL); printf("Topology mapping time=%f\n",TOC); if(topology->nb_levels>d) free(comm_speed); free_topology(topology); free_tree(comm_tree); duration=TOC; printf("-------------- Mapping done in %.4fs!\n",duration); }
void TreeMatchLB::work(BaseLB::LDStats* stats) { /** ========================= 1st Do Load Balancing =======================*/ /** ========================== INITIALIZATION ============================= */ ProcArray *parr = new ProcArray(stats); // Processor Array ObjGraph *ogr = new ObjGraph(stats); // Object Graph /** ============================= STRATEGY ================================ */ parr->resetTotalLoad(); if (_lb_args.debug()>1) CkPrintf("[%d] In GreedyLB strategy\n",CkMyPe()); int vert; // max heap of objects std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater()); // min heap of processors std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); for(vert = 0; vert < ogr->vertices.size(); vert++) { // Pop the least loaded processor ProcInfo p = parr->procs.front(); std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); parr->procs.pop_back(); // Increment the load of the least loaded processor by the load of the // 'heaviest' unmapped object p.totalLoad() += ogr->vertices[vert].getVertexLoad(); ogr->vertices[vert].setNewPe(p.getProcId()); // Insert the least loaded processor with load updated back into the heap parr->procs.push_back(p); std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); } /** ============================== CLEANUP ================================ */ ogr->convertDecisions(stats); // Send decisions back to LDStats /** ====================== 2nd do Topology aware mapping ====================*/ int nb_procs; double **comm_mat; int i; int *object_mapping, *permutation; /* get number of processors and teh greedy load balancing*/ nb_procs = stats->nprocs(); object_mapping=stats->to_proc.getVec(); stats->makeCommHash(); // allocate communication matrix comm_mat=(double**)malloc(sizeof(double*)*nb_procs); for(i=0;i<nb_procs;i++){ comm_mat[i]=(double*)calloc(nb_procs,sizeof(double)); } /* Build the communicartion matrix*/ for(i=0;i<stats->n_comm;i++){ LDCommData &commData = stats->commData[i]; if((!commData.from_proc())&&(commData.recv_type()==LD_OBJ_MSG)){ /* object_mapping[i] is the processors of object i*/ int from = object_mapping[stats->getHash(commData.sender)]; int to = object_mapping[stats->getHash(commData.receiver.get_destObj())]; if(from!=to){ comm_mat[from][to]+=commData.bytes; comm_mat[to][from]+=commData.bytes; } } } /* build the topology of the hardware (abe machine here)*/ tm_topology_t *topology=build_abe_topology(nb_procs); display_topology(topology); /* compute the affinity tree */ tree_t *comm_tree=build_tree_from_topology(topology,comm_mat,nb_procs,NULL,NULL); /* Compute the processor permutation*/ permutation=(int*)malloc(sizeof(int)*nb_procs); map_topology_simple(topology,comm_tree,permutation,NULL); /* Apply this perutation to all objects Side effect: object_mapping points to the stats->to_proc.getVec() So, these lines change also stats->to_proc.getVec() */ for(i=0;i<nb_procs;i++) object_mapping[i]=permutation[object_mapping[i]]; // free communication matrix; for(i=0;i<nb_procs;i++){ free(comm_mat[i]); } free(comm_mat); free_topology(topology); }