void RecBipartLB::work(LDStats *stats) { vector<Vertex *> ptrvector; /** ========================== INITIALIZATION ============================= */ ProcArray *parr = new ProcArray(stats); // Processor Array ObjGraph *ogr = new ObjGraph(stats); // Object Graph /** ============================= STRATEGY ================================ */ level=0; peno=0; TOTALLOAD=0; numparts=CkNumPes(); parray=parr; double avgLoad = parr->getAverageLoad(); int numPes = parr->procs.size(); parr->resetTotalLoad(); for(int i=0;i<ogr->vertices.size();i++) { Vertex_helper *helper = new Vertex_helper(); vhelpers.push_back(helper); ptrvector.push_back((Vertex *)&(ogr->vertices[i])); } RecursiveBiPart(ogr,ptrvector,1,numparts); /** ============================== CLEANUP ================================ */ ogr->convertDecisions(stats); // Send decisions back to LDStats }
void GraphBFTLB::work(LDStats *stats) { /** ========================== INITIALIZATION ============================= */ ProcArray *parr = new ProcArray(stats); // Processor Array ObjGraph *ogr = new ObjGraph(stats); // Object Graph /** ============================= STRATEGY ================================ */ double avgLoad = parr->getAverageLoad(); int numPes = parr->procs.size(); // CkPrintf("Average Load %g\n\n", avgLoad); // for(int i=0; i<numPes; i++) // CkPrintf("PE [%d] %g %g\n", i, parr->procs[i].getTotalLoad(), parr->procs[i].getOverhead()); parr->resetTotalLoad(); int start = 0, nextPe = 0; std::queue<int> vertexq; // start at vertex with id 0 vertexq.push(start); if(parr->procs[nextPe].getTotalLoad() + ogr->vertices[start].getVertexLoad() > avgLoad) { nextPe++; avgLoad += (avgLoad - parr->procs[nextPe].getTotalLoad())/(numPes-nextPe); } ogr->vertices[start].setNewPe(nextPe); // CkPrintf("[%d] %d %d %g %g %g\n", start, ogr->vertices[start].getCurrentPe(), ogr->vertices[start].getNewPe(), parr->procs[nextPe].getTotalLoad(), ogr->vertices[start].getVertexLoad(), parr->procs[nextPe].getTotalLoad() + ogr->vertices[start].getVertexLoad()); parr->procs[nextPe].totalLoad() += ogr->vertices[start].getVertexLoad(); int i, nbr; // breadth first traversal while(!vertexq.empty()) { start = vertexq.front(); vertexq.pop(); for(i = 0; i < ogr->vertices[start].sendToList.size(); i++) { // look at all neighbors of a node in the queue and map them while // inserting them in the queue (so we can look at their neighbors next) nbr = ogr->vertices[start].sendToList[i].getNeighborId(); if(ogr->vertices[nbr].getNewPe() == -1) { vertexq.push(nbr); if(parr->procs[nextPe].getTotalLoad() + ogr->vertices[nbr].getVertexLoad() > avgLoad) { nextPe++; avgLoad += (avgLoad - parr->procs[nextPe].getTotalLoad())/(numPes-nextPe); } ogr->vertices[nbr].setNewPe(nextPe); // CkPrintf("[%d] %d %d %g %g %g\n", nbr, ogr->vertices[nbr].getCurrentPe(), ogr->vertices[nbr].getNewPe(), parr->procs[nextPe].getTotalLoad(), ogr->vertices[start].getVertexLoad(), parr->procs[nextPe].getTotalLoad() + ogr->vertices[start].getVertexLoad()); parr->procs[nextPe].totalLoad() += ogr->vertices[nbr].getVertexLoad(); } } // end of for loop } // end of while loop /** ============================== CLEANUP ================================ */ ogr->convertDecisions(stats); // Send decisions back to LDStats }
void TempAwareGreedyLB::work(LDStats* stats) { CkPrintf("----------------- in TempAwareGreedyLB -----------\n"); /** ========================== INITIALIZATION ============================= */ ProcArray *parr = new ProcArray(stats); // Processor Array ObjGraph *ogr = new ObjGraph(stats); // Object Graph /** ============================= STRATEGY ================================ */ parr->resetTotalLoad(); if (_lb_args.debug()>1) CkPrintf("[%d] In TempAwareGreedyLB strategy\n",CkMyPe()); int vert; // max heap of objects std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater()); // min heap of processors std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); for(vert = 0; vert < ogr->vertices.size(); vert++) { // Pop the least loaded processor ProcInfo p = parr->procs.front(); std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); parr->procs.pop_back(); // Increment the load of the least loaded processor by the load of the // 'heaviest' unmapped object p.setTotalLoad(p.getTotalLoad() + ogr->vertices[vert].getVertexLoad()); ogr->vertices[vert].setNewPe(p.getProcId()); // Insert the least loaded processor with load updated back into the heap parr->procs.push_back(p); std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); } /** ============================== CLEANUP ================================ */ ogr->convertDecisions(stats); // Send decisions back to LDStats }
void RefineSwapLB::work(LDStats* stats) { /** ========================== INITIALIZATION ============================= */ ProcArray *parr = new ProcArray(stats); // Processor Array ObjGraph *ogr = new ObjGraph(stats); // Object Graph /** ============================= STRATEGY ================================ */ if (_lb_args.debug()>1) CkPrintf("[%d] In RefineSwapLB strategy\n",CkMyPe()); int vert; double avg_load = parr->getAverageLoad(); double threshold = avg_load * 0.01; double lower_bound_load = avg_load - threshold; double upper_bound_load = avg_load + threshold; cout <<"Average load " << avg_load << endl; std::vector<int> min_pe_heap; std::vector<int> max_pe_heap; std::vector<int>* pe_obj = new std::vector<int>[parr->procs.size()]; // Create a datastructure to store the objects in a processor for (int i = 0; i < ogr->vertices.size(); i++) { pe_obj[ogr->vertices[i].getCurrentPe()].push_back(i); // CkPrintf("%d pe %d: %lf\n", i, ogr->vertices[i].getCurrentPe(), ogr->vertices[i].getVertexLoad()); } // Construct max heap of overloaded processors and min heap of underloaded // processors. for (int i = 0; i < parr->procs.size(); i++) { //CkPrintf("%d : %lf\n", i, parr->procs[i].getTotalLoad()); if (parr->procs[i].getTotalLoad() > upper_bound_load) { max_pe_heap.push_back(i); } else if (parr->procs[i].getTotalLoad() < lower_bound_load) { min_pe_heap.push_back(i); } } std::make_heap(max_pe_heap.begin(), max_pe_heap.end(), ProcLoadGreaterIndex(parr)); while (max_pe_heap.size() != 0 && min_pe_heap.size() != 0) { int p_index = getMax(parr, max_pe_heap); ProcInfo &pinfo = parr->procs[p_index]; bool success = refine(parr, ogr, max_pe_heap, min_pe_heap, pe_obj, p_index, avg_load, threshold); if (!success) { // Swap with something. if (!refineSwap(parr, ogr, max_pe_heap, min_pe_heap, pe_obj, p_index, avg_load, threshold)) { max_pe_heap.push_back(p_index); std::push_heap(max_pe_heap.begin(), max_pe_heap.end(), ProcLoadGreaterIndex(parr)); break; } } } /** ============================== CLEANUP ================================ */ ogr->convertDecisions(stats); // Send decisions back to LDStats delete[] pe_obj; delete parr; delete ogr; }
void TreeMatchLB::work(BaseLB::LDStats* stats) { /** ========================= 1st Do Load Balancing =======================*/ /** ========================== INITIALIZATION ============================= */ ProcArray *parr = new ProcArray(stats); // Processor Array ObjGraph *ogr = new ObjGraph(stats); // Object Graph /** ============================= STRATEGY ================================ */ parr->resetTotalLoad(); if (_lb_args.debug()>1) CkPrintf("[%d] In GreedyLB strategy\n",CkMyPe()); int vert; // max heap of objects std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater()); // min heap of processors std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); for(vert = 0; vert < ogr->vertices.size(); vert++) { // Pop the least loaded processor ProcInfo p = parr->procs.front(); std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); parr->procs.pop_back(); // Increment the load of the least loaded processor by the load of the // 'heaviest' unmapped object p.totalLoad() += ogr->vertices[vert].getVertexLoad(); ogr->vertices[vert].setNewPe(p.getProcId()); // Insert the least loaded processor with load updated back into the heap parr->procs.push_back(p); std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); } /** ============================== CLEANUP ================================ */ ogr->convertDecisions(stats); // Send decisions back to LDStats /** ====================== 2nd do Topology aware mapping ====================*/ int nb_procs; double **comm_mat; int i; int *object_mapping, *permutation; /* get number of processors and teh greedy load balancing*/ nb_procs = stats->nprocs(); object_mapping=stats->to_proc.getVec(); stats->makeCommHash(); // allocate communication matrix comm_mat=(double**)malloc(sizeof(double*)*nb_procs); for(i=0;i<nb_procs;i++){ comm_mat[i]=(double*)calloc(nb_procs,sizeof(double)); } /* Build the communicartion matrix*/ for(i=0;i<stats->n_comm;i++){ LDCommData &commData = stats->commData[i]; if((!commData.from_proc())&&(commData.recv_type()==LD_OBJ_MSG)){ /* object_mapping[i] is the processors of object i*/ int from = object_mapping[stats->getHash(commData.sender)]; int to = object_mapping[stats->getHash(commData.receiver.get_destObj())]; if(from!=to){ comm_mat[from][to]+=commData.bytes; comm_mat[to][from]+=commData.bytes; } } } /* build the topology of the hardware (abe machine here)*/ tm_topology_t *topology=build_abe_topology(nb_procs); display_topology(topology); /* compute the affinity tree */ tree_t *comm_tree=build_tree_from_topology(topology,comm_mat,nb_procs,NULL,NULL); /* Compute the processor permutation*/ permutation=(int*)malloc(sizeof(int)*nb_procs); map_topology_simple(topology,comm_tree,permutation,NULL); /* Apply this perutation to all objects Side effect: object_mapping points to the stats->to_proc.getVec() So, these lines change also stats->to_proc.getVec() */ for(i=0;i<nb_procs;i++) object_mapping[i]=permutation[object_mapping[i]]; // free communication matrix; for(i=0;i<nb_procs;i++){ free(comm_mat[i]); } free(comm_mat); free_topology(topology); }