Пример #1
0
void TempAwareRefineLB::work(LDStats* stats)
{
#ifdef TEMP_LDB
////////////////////////////////////////////////////
  numProcs=stats->nprocs();
  numChips=numProcs/logicalCoresPerChip;
  avgChipTemp=new float[numChips];
  if(procFreq!=NULL) delete [] procFreq;
	if(procFreqEffect!=NULL) delete [] procFreqEffect;
//  if(procFreqPtr!=NULL) delete [] procFreqPtr;
  if(procTemp!=NULL) delete [] procTemp;
  if(procFreqNew!=NULL) delete [] procFreqNew;
	if(procFreqNewEffect!=NULL) delete [] procFreqNewEffect;
  if(avgChipTemp!=NULL) delete [] avgChipTemp;

  procFreq = new int[numProcs];
	procFreqEffect = new int[numProcs];
//  procFreqPtr = new int[numProcs];
  procTemp = new float[numProcs];
  procFreqNew = new int[numProcs];
	procFreqNewEffect = new int[numProcs];
  avgChipTemp = new float[numChips];

  for(int i=0;i<numChips;i++) avgChipTemp[i]=0;

  for(int i=0;i<numProcs;i++)
  {
        procFreq[i] = stats->procs[i].pe_speed;
        procTemp[i] = stats->procs[i].pe_temp;
//      procFreqPtr[i] = getProcFreqPtr(freqs,numAvailFreqs,procFreq[i]);
        avgChipTemp[i/logicalCoresPerChip] += procTemp[i];
  }

  for(int i=0;i<numChips;i++) 
  {
        avgChipTemp[i]/=logicalCoresPerChip;
//CkPrintf("---- CHIP#%d has temp=%f ----------\n",i,avgChipTemp[i]);
  }
  for(int i=0;i<numChips;i++)
  {
	int over=0,under=0;
        if(avgChipTemp[i] > MAX_TEMP)
        {
		over=1;
                if(procFreqPtr[i*logicalCoresPerChip]==numAvailFreqs-1)
                {
                        for(int j=i*logicalCoresPerChip;j<i*logicalCoresPerChip+logicalCoresPerChip;j++) procFreqNew[j] = freqs[procFreqPtr[j]];
                        CkPrintf("CHIP#%d RUNNING HOT EVEN WITH MIN FREQUENCY!!\n",i);
                }
                else
                {
                        for(int j=i*logicalCoresPerChip;j<i*logicalCoresPerChip+logicalCoresPerChip;j++)
                        {
                                if(procFreqPtr[j]<numAvailFreqs-1) procFreqPtr[j]++;
#ifdef MAX_MIN
/// PLEASE COMMENT OUT .. TESTING ONLY
if(i==0) {procFreqPtr[j] = numAvailFreqs-1;/*CkPrintf("C for i:%d\n",j);*/}
//if(i<numChips-1) procFreqPtr[j]=0;
else  procFreqPtr[j]=0;
/////////////////////////
#endif
                                procFreqNew[j] = freqs[procFreqPtr[j]];
                        }
#ifndef ORG_VERSION
                        CkPrintf("!!!!! Chip#%d running HOT shifting from %d to %d temp=%f\n",i,procFreq[i*logicalCoresPerChip],procFreqNew[i*logicalCoresPerChip],avgChipTemp[i]);
#endif
                }
        }
        else
//	if(avgChipTemp[i] < MAX_TEMP-1)
        {
		under=1;
                if(procFreqPtr[i*logicalCoresPerChip]>0)
                {
                        for(int j=i*logicalCoresPerChip;j<i*logicalCoresPerChip+logicalCoresPerChip;j++)
                        {
                                if(procFreqPtr[j]>0)
                                        procFreqPtr[j]--;
#ifdef MAX_MIN
/// PLEASE COMMENT OUT .. TESTING ONLY
if(i==0) procFreqPtr[j] = numAvailFreqs-1;
//if(i<numChips-1) procFreqPtr[j]=0;
else  procFreqPtr[j]=0;
/////////////////////////
#endif
                                procFreqNew[j] = freqs[procFreqPtr[j]];
                        }
#ifndef ORG_VERSION
                        CkPrintf("!!!!! Chip#%d running COLD shifting from %d to %d temp=%f\n",i,procFreq[i*logicalCoresPerChip],procFreqNew[i*logicalCoresPerChip],avgChipTemp[i]);
#endif
                }
                else
                {
                        for(int j=i*logicalCoresPerChip;j<i*logicalCoresPerChip+logicalCoresPerChip;j++) procFreqNew[j] = freqs[procFreqPtr[j]];
                }
        }
/*
	if(under==0 && over==0) 
	{
		for(int j=i*logicalCoresPerChip;j<i*logicalCoresPerChip+logicalCoresPerChip;j++) procFreqNew[j] = freqs[procFreqPtr[j]];
	}
*/
//if(i==5) for(int j=i*c(resPerChip;j<i*logicalCoresPerChip+logicalCoresPerChip;j++) procFreqNew[j] = freqs[numAvailFreqs-1];
//else 
#ifdef ORG_VERSION
for(int j=i*logicalCoresPerChip;j<i*logicalCoresPerChip+logicalCoresPerChip;j++) procFreqNew[j] = freqs[0];
#endif
//for(int j=i*logicalCoresPerChip;j<i*logicalCoresPerChip+logicalCoresPerChip;j++) procFreqNew[j] = freqs[0];
  }
//for(int x=0;x<numProcs;x+=logicalCoresPerChip) if(procFreq[x]!=procFreqNew[x]) thisProxy[x].changeFreq(procFreqNew[x]);
//for(int x=0;x<numProcs;x++) CkPrintf("Procs#%d freq %d\n",x,procFreqNew[x]);
////////////////////////////////////////////////////

#ifndef NO_TEMP_LB
  int obj;
  int n_pes = stats->nprocs();

  //  CkPrintf("[%d] RefineLB strategy\n",CkMyPe());

  // RemoveNonMigratable(stats, n_pes);

  // get original object mapping
  int* from_procs = RefinerTemp::AllocProcs(n_pes, stats);
  for(obj=0;obj<stats->n_objs;obj++)  {
    int pe = stats->from_proc[obj];
    from_procs[obj] = pe;
  }
  // Get a new buffer to refine into
	populateEffectiveFreq(numProcs);
  int* to_procs = RefinerTemp::AllocProcs(n_pes, stats);
//  RefinerTemp refiner(1.03,procFreqEffect,procFreqNewEffect,n_pes);  // overload tolerance=1.05
	RefinerTemp refiner(1.03,procFreq,procFreqNew,n_pes);
  refiner.Refine(n_pes, stats, from_procs, to_procs);
  // Save output
	int migs=0;
	int *numMigs = new int[numProcs];
	int totE = 0;
	for(int mm=0;mm<numProcs;mm++) numMigs[mm] = 0;
  for(obj=0;obj<stats->n_objs;obj++) {
      int pe = stats->from_proc[obj];
			numMigs[to_procs[obj]]++;
//stats->objData[obj].objID();
  LDObjData &odata = stats->objData[obj];
	computeInfo *c1 = new computeInfo();
	c1->id = odata.objID();
//if(to_procs[obj]==3) CkPrintf("[%d,%d] going to 3 totE:%d\n",c1->id.getID()[0],c1->id.getID()[1],totE++);//,(stats->objData[obj].objID().getID())[1],totE++);
      if (to_procs[obj] != pe) {
	migs++;
        //if (_lb_args.debug()>=2)  
				{
//          CkPrintf("[%d,%d] Obj %d migrating from %d to %d\n",
//                 c1->id.getID()[0],c1->id.getID()[1],obj,pe,to_procs[obj]);
        }
        stats->to_proc[obj] = to_procs[obj];
      }
  }

	for(int mm=0;mm<numProcs;mm++)
	{
		//CkPrintf("PROC#%d freq:%d objs:%d ----------\n",mm,procFreqNew[mm],numMigs[mm]);
	}
  CkPrintf("TEMPLB INFO: Total Objs:%d migrations:%d time:%f \n",stats->n_objs,migs,CmiWallTimer()-starting);
  fprintf(migFile,"%f %d\n",CmiWallTimer()-starting,migs);
  // Free the refine buffers
  RefinerTemp::FreeProcs(from_procs);
  RefinerTemp::FreeProcs(to_procs);

#endif
//for(int x=0;x<numProcs;x++) CkPrintf("Procs#%d ------- freq %d\n",x,procFreqNew[x]);
/*
for(int x=0;x<numProcs;x+=logicalCoresPerChip) 
{
	if(procFreq[x]!=procFreqNew[x]) 
	{
		CkPrintf("Chaning the freq for PROC#%d\n",x);
		thisProxy[x].changeFreq(procFreqNew[x]);
	}
}
*/
for(int x=0;x<numProcs;x++)
  {
//CkPrintf("--------- Proc#%d %d numProcs=%d\n",x,procFreqNew[x],numProcs);
if(procFreq[x]!=procFreqNew[x]) thisProxy[x].changeFreq(procFreqNew[x]);
}
#endif // TEMP_LDB endif
}
Пример #2
0
void TempAwareCommLB::work(LDStats* stats) {
  /** ========================== INITIALIZATION ============================= */
#ifdef TEMP_LDB
//////////////////////////////////////////////////////
// initialize structures for TempLBs
	initStructs(stats);
	tempControl();
	populateEffectiveFreq(stats->nprocs());
//////////////////////////////////////////////////////
	CkPrintf(" ================== in TempAwareCommLB::work() ===========\n");
  ProcArrayTemp *parr = new ProcArrayTemp(stats,procFreq,procFreqNew);       // Processor Array
	parr->convertToInsts(stats);
  ObjGraphTemp *ogr = new ObjGraphTemp(stats,procFreq,procFreqNew);          // Object Graph
	ogr->convertToInsts(stats);
  double avgload = parr->getAverageLoad();      // Average load of processors

  // Sets to false if it is overloaded, else to true
  vector<bool> proc_load_info(parr->procs.size(), false);

  // Create an array of vectors for each processor mapping to the objects in
  // that processor
  std::vector<int>* parr_objs = new std::vector<int>[parr->procs.size()];

  upper_threshold_temp = avgload + (avgload * THRESHOLD);
  //lower_threshold = avgload - (avgload * THRESHOLD * THRESHOLD);
  lower_threshold_temp = avgload;

  int less_loaded_counter = 0;

  srand(time(NULL));
  /** ============================= STRATEGY ================================ */

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In TempAwareCommLB strategy\n",CkMyPe());

  CkPrintf("Average load %E\n", avgload);

  int vert, i, j;
  int curr_pe;

  // Iterate over all the chares and construct the peid, vector<chareid> array
  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    curr_pe = ogr->vertices[vert].getCurrentPe();
    parr_objs[curr_pe].push_back(vert);
    ogr->vertices[vert].setNewPe(curr_pe);
  }

  std::vector<int> parr_above_avg;
  std::vector<int> parr_below_avg;

  double pe_load;  

  // Insert into parr_above_avg if the processor fits under the criteria of
  // overloaded processor.
  // Insert the processor id into parr_below_avg if the processor is underloaded 
  for (vert = 0; vert < parr->procs.size(); vert++) {
    pe_load = parr->procs[vert].getTotalLoad();
    if (pe_load > upper_threshold_temp) {
      // Pushing ProcInfo into this list
      parr_above_avg.push_back(vert);
    } else if (pe_load < lower_threshold_temp) {
      parr_below_avg.push_back(parr->procs[vert].getProcId());
      proc_load_info[parr->procs[vert].getProcId()] = true;
      less_loaded_counter++;
    }
  }

  std::make_heap(parr_above_avg.begin(), parr_above_avg.end(),
      ProcLoadGreater(parr));

  int random;
  int randomly_obj_id;
  bool obj_allocated;
  int num_tries;
  // Allow as many swaps as there are chares
  int total_swaps = ogr->vertices.size() * SWAP_MULTIPLIER;
  int possible_pe;
  double obj_load;

  // Keep on loadbalancing until the number of above avg processors is 0
  while (parr_above_avg.size() != 0 && total_swaps > 0 && parr_below_avg.size() != 0) {
    // CkPrintf("Above avg : %d Below avg : %d Total swaps: %d\n", parr_above_avg.size(),
    //    parr_below_avg.size(), total_swaps);
    obj_allocated = false;
    num_tries = 0;

    // Pop the heaviest processor
    int p_index = popFromProcHeap(parr_above_avg, parr);
    ProcInfo& p = parr->procs[p_index];

    while (!obj_allocated && num_tries < parr_objs[p.getProcId()].size()) {

      // It might so happen that due to overhead load, it might not have any
      // more objects in its list
      if (parr_objs[p.getProcId()].size() == 0) {
        // CkPrintf("No obj left to be allocated\n");
        obj_allocated = true;
        break;
      }

      int randd = rand();
      random = randd % parr_objs[p.getProcId()].size();
      randomly_obj_id = parr_objs[p.getProcId()][random];
//need to update the load below .. account for freqs
      obj_load = ogr->vertices[randomly_obj_id].getVertexLoad();

      // CkPrintf("Heavy %d: Parr obj size : %d random : %d random obj id : %d\n", p_index,
      //     parr_objs[p.getProcId()].size(), randd, randomly_obj_id);
      std::vector<int> possible_pes;
      getPossiblePes(possible_pes, randomly_obj_id, ogr, parr);
      for (i = 0; i < possible_pes.size(); i++) {

        // If the heaviest communicating processor is there in the list, then
        // assign it to that.
        possible_pe = possible_pes[i];

        if ((parr->procs[possible_pe].getTotalLoad() + obj_load) < upper_threshold_temp) {
         // CkPrintf("**  Transfered %d(Load %lf) from %d:%d(Load %lf) to %d:%d(Load %lf)\n",
         //     randomly_obj_id, obj_load, CkNodeOf(p.getProcId()), p.getProcId(), p.getTotalLoad(),
         //     CkNodeOf(possible_pe), possible_pe,
         //     parr->procs[possible_pe].getTotalLoad());

          handleTransfer(randomly_obj_id, p, possible_pe, parr_objs, ogr, parr);
          obj_allocated = true;
          total_swaps--;
          updateLoadInfo(p_index, possible_pe, upper_threshold_temp, lower_threshold_temp,
              parr_above_avg, parr_below_avg, proc_load_info, parr);

          break;
        }
      }

      // Since there is no processor in the least loaded list with which this
      // chare communicates, pick a random least loaded processor.
      if (!obj_allocated) {
        //CkPrintf(":( Could not transfer to the nearest communicating ones\n");
        for (int x = 0; x < parr_below_avg.size(); x++) {
          int random_pe = parr_below_avg[x];
          if ((parr->procs[random_pe].getTotalLoad() + obj_load) < upper_threshold_temp) {
            obj_allocated = true;
            total_swaps--;
            handleTransfer(randomly_obj_id, p, random_pe, parr_objs, ogr, parr);
            updateLoadInfo(p_index, random_pe, upper_threshold_temp, lower_threshold_temp,
                           parr_above_avg, parr_below_avg, proc_load_info, parr);
            break;
          }
          num_tries++;
        }
      }
    }

    if (!obj_allocated) {
      //CkPrintf("!!!! Could not handle the heavy proc %d so giving up\n", p_index);
      // parr_above_avg.push_back(p_index);
      // std::push_heap(parr_above_avg.begin(), parr_above_avg.end(),
      //     ProcLoadGreater(parr));
    }
  }

  //CkPrintf("CommAwareRefine> After lb max load: %lf avg load: %lf\n", max_load, avg_load/parr->procs.size());

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats
  delete parr;
  delete ogr;
  delete[] parr_objs;
#else
	CmiAbort("TempLBs are not supported without the TEMP_LDB flag\n"); 
#endif
}