inline int popFromProcHeap(std::vector<int> & parr_above_avg, ProcArray *parr) { int p_index = parr_above_avg.front(); std::pop_heap(parr_above_avg.begin(), parr_above_avg.end(), ProcLoadGreater(parr)); parr_above_avg.pop_back(); return p_index; }
void TempAwareGreedyLB::work(LDStats* stats) { CkPrintf("----------------- in TempAwareGreedyLB -----------\n"); /** ========================== INITIALIZATION ============================= */ ProcArray *parr = new ProcArray(stats); // Processor Array ObjGraph *ogr = new ObjGraph(stats); // Object Graph /** ============================= STRATEGY ================================ */ parr->resetTotalLoad(); if (_lb_args.debug()>1) CkPrintf("[%d] In TempAwareGreedyLB strategy\n",CkMyPe()); int vert; // max heap of objects std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater()); // min heap of processors std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); for(vert = 0; vert < ogr->vertices.size(); vert++) { // Pop the least loaded processor ProcInfo p = parr->procs.front(); std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); parr->procs.pop_back(); // Increment the load of the least loaded processor by the load of the // 'heaviest' unmapped object p.setTotalLoad(p.getTotalLoad() + ogr->vertices[vert].getVertexLoad()); ogr->vertices[vert].setNewPe(p.getProcId()); // Insert the least loaded processor with load updated back into the heap parr->procs.push_back(p); std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); } /** ============================== CLEANUP ================================ */ ogr->convertDecisions(stats); // Send decisions back to LDStats }
inline void updateLoadInfo(int p_index, int possible_pe, double upper_threshold_temp, double lower_threshold_temp, std::vector<int>& parr_above_avg, std::vector<int>& parr_below_avg, std::vector<bool> &proc_load_info, ProcArray *parr) { ProcInfo& p = parr->procs[p_index]; ProcInfo& possible_pe_procinfo = parr->procs[possible_pe]; // If the updated load is still greater than the average by the // threshold value, then push it back to the max heap if (p.getTotalLoad() > upper_threshold_temp) { parr_above_avg.push_back(p_index); std::push_heap(parr_above_avg.begin(), parr_above_avg.end(), ProcLoadGreater(parr)); //CkPrintf("\t Pushing pe : %d to max heap\n", p.getProcId()); } else if (p.getTotalLoad() < lower_threshold_temp) { parr_below_avg.push_back(p_index); proc_load_info[p_index] = true; //CkPrintf("\t Adding pe : %d to less loaded\n", p.getProcId()); } // If the newly assigned processor's load is greater than the average // by the threshold value, then push it into the max heap. if (possible_pe_procinfo.getTotalLoad() > upper_threshold_temp) { // TODO: It should be the index in procarray :( parr_above_avg.push_back(possible_pe); std::push_heap(parr_above_avg.begin(), parr_above_avg.end(), ProcLoadGreater(parr)); removeFromArray(possible_pe, parr_below_avg); proc_load_info[possible_pe] = false; //CkPrintf("\t Pusing pe : %d to max heap\n", possible_pe); } else if (possible_pe_procinfo.getTotalLoad() < lower_threshold_temp) { } else { removeFromArray(possible_pe, parr_below_avg); proc_load_info[possible_pe] = false; //CkPrintf("\t Removing from lower list pe : %d\n", possible_pe); } }
void TreeMatchLB::work(BaseLB::LDStats* stats) { /** ========================= 1st Do Load Balancing =======================*/ /** ========================== INITIALIZATION ============================= */ ProcArray *parr = new ProcArray(stats); // Processor Array ObjGraph *ogr = new ObjGraph(stats); // Object Graph /** ============================= STRATEGY ================================ */ parr->resetTotalLoad(); if (_lb_args.debug()>1) CkPrintf("[%d] In GreedyLB strategy\n",CkMyPe()); int vert; // max heap of objects std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater()); // min heap of processors std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); for(vert = 0; vert < ogr->vertices.size(); vert++) { // Pop the least loaded processor ProcInfo p = parr->procs.front(); std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); parr->procs.pop_back(); // Increment the load of the least loaded processor by the load of the // 'heaviest' unmapped object p.totalLoad() += ogr->vertices[vert].getVertexLoad(); ogr->vertices[vert].setNewPe(p.getProcId()); // Insert the least loaded processor with load updated back into the heap parr->procs.push_back(p); std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater()); } /** ============================== CLEANUP ================================ */ ogr->convertDecisions(stats); // Send decisions back to LDStats /** ====================== 2nd do Topology aware mapping ====================*/ int nb_procs; double **comm_mat; int i; int *object_mapping, *permutation; /* get number of processors and teh greedy load balancing*/ nb_procs = stats->nprocs(); object_mapping=stats->to_proc.getVec(); stats->makeCommHash(); // allocate communication matrix comm_mat=(double**)malloc(sizeof(double*)*nb_procs); for(i=0;i<nb_procs;i++){ comm_mat[i]=(double*)calloc(nb_procs,sizeof(double)); } /* Build the communicartion matrix*/ for(i=0;i<stats->n_comm;i++){ LDCommData &commData = stats->commData[i]; if((!commData.from_proc())&&(commData.recv_type()==LD_OBJ_MSG)){ /* object_mapping[i] is the processors of object i*/ int from = object_mapping[stats->getHash(commData.sender)]; int to = object_mapping[stats->getHash(commData.receiver.get_destObj())]; if(from!=to){ comm_mat[from][to]+=commData.bytes; comm_mat[to][from]+=commData.bytes; } } } /* build the topology of the hardware (abe machine here)*/ tm_topology_t *topology=build_abe_topology(nb_procs); display_topology(topology); /* compute the affinity tree */ tree_t *comm_tree=build_tree_from_topology(topology,comm_mat,nb_procs,NULL,NULL); /* Compute the processor permutation*/ permutation=(int*)malloc(sizeof(int)*nb_procs); map_topology_simple(topology,comm_tree,permutation,NULL); /* Apply this perutation to all objects Side effect: object_mapping points to the stats->to_proc.getVec() So, these lines change also stats->to_proc.getVec() */ for(i=0;i<nb_procs;i++) object_mapping[i]=permutation[object_mapping[i]]; // free communication matrix; for(i=0;i<nb_procs;i++){ free(comm_mat[i]); } free(comm_mat); free_topology(topology); }
void TempAwareCommLB::work(LDStats* stats) { /** ========================== INITIALIZATION ============================= */ #ifdef TEMP_LDB ////////////////////////////////////////////////////// // initialize structures for TempLBs initStructs(stats); tempControl(); populateEffectiveFreq(stats->nprocs()); ////////////////////////////////////////////////////// CkPrintf(" ================== in TempAwareCommLB::work() ===========\n"); ProcArrayTemp *parr = new ProcArrayTemp(stats,procFreq,procFreqNew); // Processor Array parr->convertToInsts(stats); ObjGraphTemp *ogr = new ObjGraphTemp(stats,procFreq,procFreqNew); // Object Graph ogr->convertToInsts(stats); double avgload = parr->getAverageLoad(); // Average load of processors // Sets to false if it is overloaded, else to true vector<bool> proc_load_info(parr->procs.size(), false); // Create an array of vectors for each processor mapping to the objects in // that processor std::vector<int>* parr_objs = new std::vector<int>[parr->procs.size()]; upper_threshold_temp = avgload + (avgload * THRESHOLD); //lower_threshold = avgload - (avgload * THRESHOLD * THRESHOLD); lower_threshold_temp = avgload; int less_loaded_counter = 0; srand(time(NULL)); /** ============================= STRATEGY ================================ */ if (_lb_args.debug()>1) CkPrintf("[%d] In TempAwareCommLB strategy\n",CkMyPe()); CkPrintf("Average load %E\n", avgload); int vert, i, j; int curr_pe; // Iterate over all the chares and construct the peid, vector<chareid> array for(vert = 0; vert < ogr->vertices.size(); vert++) { curr_pe = ogr->vertices[vert].getCurrentPe(); parr_objs[curr_pe].push_back(vert); ogr->vertices[vert].setNewPe(curr_pe); } std::vector<int> parr_above_avg; std::vector<int> parr_below_avg; double pe_load; // Insert into parr_above_avg if the processor fits under the criteria of // overloaded processor. // Insert the processor id into parr_below_avg if the processor is underloaded for (vert = 0; vert < parr->procs.size(); vert++) { pe_load = parr->procs[vert].getTotalLoad(); if (pe_load > upper_threshold_temp) { // Pushing ProcInfo into this list parr_above_avg.push_back(vert); } else if (pe_load < lower_threshold_temp) { parr_below_avg.push_back(parr->procs[vert].getProcId()); proc_load_info[parr->procs[vert].getProcId()] = true; less_loaded_counter++; } } std::make_heap(parr_above_avg.begin(), parr_above_avg.end(), ProcLoadGreater(parr)); int random; int randomly_obj_id; bool obj_allocated; int num_tries; // Allow as many swaps as there are chares int total_swaps = ogr->vertices.size() * SWAP_MULTIPLIER; int possible_pe; double obj_load; // Keep on loadbalancing until the number of above avg processors is 0 while (parr_above_avg.size() != 0 && total_swaps > 0 && parr_below_avg.size() != 0) { // CkPrintf("Above avg : %d Below avg : %d Total swaps: %d\n", parr_above_avg.size(), // parr_below_avg.size(), total_swaps); obj_allocated = false; num_tries = 0; // Pop the heaviest processor int p_index = popFromProcHeap(parr_above_avg, parr); ProcInfo& p = parr->procs[p_index]; while (!obj_allocated && num_tries < parr_objs[p.getProcId()].size()) { // It might so happen that due to overhead load, it might not have any // more objects in its list if (parr_objs[p.getProcId()].size() == 0) { // CkPrintf("No obj left to be allocated\n"); obj_allocated = true; break; } int randd = rand(); random = randd % parr_objs[p.getProcId()].size(); randomly_obj_id = parr_objs[p.getProcId()][random]; //need to update the load below .. account for freqs obj_load = ogr->vertices[randomly_obj_id].getVertexLoad(); // CkPrintf("Heavy %d: Parr obj size : %d random : %d random obj id : %d\n", p_index, // parr_objs[p.getProcId()].size(), randd, randomly_obj_id); std::vector<int> possible_pes; getPossiblePes(possible_pes, randomly_obj_id, ogr, parr); for (i = 0; i < possible_pes.size(); i++) { // If the heaviest communicating processor is there in the list, then // assign it to that. possible_pe = possible_pes[i]; if ((parr->procs[possible_pe].getTotalLoad() + obj_load) < upper_threshold_temp) { // CkPrintf("** Transfered %d(Load %lf) from %d:%d(Load %lf) to %d:%d(Load %lf)\n", // randomly_obj_id, obj_load, CkNodeOf(p.getProcId()), p.getProcId(), p.getTotalLoad(), // CkNodeOf(possible_pe), possible_pe, // parr->procs[possible_pe].getTotalLoad()); handleTransfer(randomly_obj_id, p, possible_pe, parr_objs, ogr, parr); obj_allocated = true; total_swaps--; updateLoadInfo(p_index, possible_pe, upper_threshold_temp, lower_threshold_temp, parr_above_avg, parr_below_avg, proc_load_info, parr); break; } } // Since there is no processor in the least loaded list with which this // chare communicates, pick a random least loaded processor. if (!obj_allocated) { //CkPrintf(":( Could not transfer to the nearest communicating ones\n"); for (int x = 0; x < parr_below_avg.size(); x++) { int random_pe = parr_below_avg[x]; if ((parr->procs[random_pe].getTotalLoad() + obj_load) < upper_threshold_temp) { obj_allocated = true; total_swaps--; handleTransfer(randomly_obj_id, p, random_pe, parr_objs, ogr, parr); updateLoadInfo(p_index, random_pe, upper_threshold_temp, lower_threshold_temp, parr_above_avg, parr_below_avg, proc_load_info, parr); break; } num_tries++; } } } if (!obj_allocated) { //CkPrintf("!!!! Could not handle the heavy proc %d so giving up\n", p_index); // parr_above_avg.push_back(p_index); // std::push_heap(parr_above_avg.begin(), parr_above_avg.end(), // ProcLoadGreater(parr)); } } //CkPrintf("CommAwareRefine> After lb max load: %lf avg load: %lf\n", max_load, avg_load/parr->procs.size()); /** ============================== CLEANUP ================================ */ ogr->convertDecisions(stats); // Send decisions back to LDStats delete parr; delete ogr; delete[] parr_objs; #else CmiAbort("TempLBs are not supported without the TEMP_LDB flag\n"); #endif }