// - retrive the stack base address
static VOID OnThreadStart(THREADID, CONTEXT *ctxt, INT32, VOID *){
	ADDRINT stackBase = PIN_GetContextReg(ctxt, REG_STACK_PTR);
	ProcInfo *pInfo = ProcInfo::getInstance();
	pInfo->addThreadStackAddress(stackBase);
	pInfo->addThreadTebAddress();
	//MYINFO("-----------------a NEW Thread started!--------------------\n");
}
inline void handleTransfer(int randomly_obj_id, ProcInfo& p, int possible_pe, std::vector<int>* parr_objs, ObjGraph *ogr, ProcArray* parr) {
  ogr->vertices[randomly_obj_id].setNewPe(possible_pe);
  parr_objs[possible_pe].push_back(randomly_obj_id);
  ProcInfo &possible_pe_procinfo = parr->procs[possible_pe];

  p.totalLoad() -= ogr->vertices[randomly_obj_id].getVertexLoad();
  possible_pe_procinfo.totalLoad() += ogr->vertices[randomly_obj_id].getVertexLoad();
  eraseObjFromParrObjs(parr_objs[p.getProcId()], randomly_obj_id);
  //CkPrintf("After transfered %d from %d : Load %E to %d : Load %E\n", randomly_obj_id, p.getProcId(), p.getTotalLoad(),
  //    possible_pe, possible_pe_procinfo.getTotalLoad());
}
VOID VirtualFreeHook(UINT32 address_to_free){
	MYINFO("Have to free the address %08x\n" , address_to_free);
	ProcInfo *pInfo = ProcInfo::getInstance();
	std::vector<HeapZone> HeapMap = pInfo->getHeapMap();
	int index_to_remove = -1;
	MYINFO("HeapZone before free");
	for(unsigned index=0; index <  HeapMap.size(); index++) {
		if(address_to_free == pInfo->getHeapZoneByIndex(index)->begin){
			index_to_remove = index;
		}
	}
	if(index_to_remove != -1){
		pInfo->deleteHeapZone(index_to_remove);
	}
	MYINFO("HeapZone after free");
}
void TempAwareGreedyLB::work(LDStats* stats)
{
CkPrintf("----------------- in TempAwareGreedyLB -----------\n");
  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph

  /** ============================= STRATEGY ================================ */
  parr->resetTotalLoad();

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In TempAwareGreedyLB strategy\n",CkMyPe());

  int vert;

  // max heap of objects
  std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater());
  // min heap of processors
  std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());

  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    // Pop the least loaded processor
    ProcInfo p = parr->procs.front();
    std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
    parr->procs.pop_back();

    // Increment the load of the least loaded processor by the load of the
    // 'heaviest' unmapped object
    p.setTotalLoad(p.getTotalLoad() + ogr->vertices[vert].getVertexLoad());
    ogr->vertices[vert].setNewPe(p.getProcId());

    // Insert the least loaded processor with load updated back into the heap
    parr->procs.push_back(p);
    std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats
}
Beispiel #5
0
 bool operator()(ProcInfo p1, ProcInfo p2) {
   return (p1.getTotalLoad() > p2.getTotalLoad());
 }
Beispiel #6
0
void TreeMatchLB::work(BaseLB::LDStats* stats)
{
  /** ========================= 1st Do Load Balancing =======================*/

  /** ========================== INITIALIZATION ============================= */
  ProcArray *parr = new ProcArray(stats);       // Processor Array
  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph

  /** ============================= STRATEGY ================================ */
  parr->resetTotalLoad();

  if (_lb_args.debug()>1) 
    CkPrintf("[%d] In GreedyLB strategy\n",CkMyPe());

  int vert;

  // max heap of objects
  std::sort(ogr->vertices.begin(), ogr->vertices.end(), ObjLoadGreater());
  // min heap of processors
  std::make_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());

  for(vert = 0; vert < ogr->vertices.size(); vert++) {
    // Pop the least loaded processor
    ProcInfo p = parr->procs.front();
    std::pop_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
    parr->procs.pop_back();

    // Increment the load of the least loaded processor by the load of the
    // 'heaviest' unmapped object
    p.totalLoad() += ogr->vertices[vert].getVertexLoad();
    ogr->vertices[vert].setNewPe(p.getProcId());

    // Insert the least loaded processor with load updated back into the heap
    parr->procs.push_back(p);
    std::push_heap(parr->procs.begin(), parr->procs.end(), ProcLoadGreater());
  }

  /** ============================== CLEANUP ================================ */
  ogr->convertDecisions(stats);         // Send decisions back to LDStats


  /** ====================== 2nd do Topology aware mapping ====================*/



  int nb_procs;
  double **comm_mat;
  int i;
  int *object_mapping, *permutation;

  
  /* get number of processors and teh greedy load balancing*/
  nb_procs = stats->nprocs();
  object_mapping=stats->to_proc.getVec();
  
    
  stats->makeCommHash();
  // allocate communication matrix
  comm_mat=(double**)malloc(sizeof(double*)*nb_procs);
  for(i=0;i<nb_procs;i++){
    comm_mat[i]=(double*)calloc(nb_procs,sizeof(double));
  }
  
  /* Build the communicartion matrix*/
  for(i=0;i<stats->n_comm;i++){
    LDCommData &commData = stats->commData[i];
    if((!commData.from_proc())&&(commData.recv_type()==LD_OBJ_MSG)){
      /* object_mapping[i] is the processors of object i*/
      int from = object_mapping[stats->getHash(commData.sender)];
      int to = object_mapping[stats->getHash(commData.receiver.get_destObj())];
      if(from!=to){
	comm_mat[from][to]+=commData.bytes;
	comm_mat[to][from]+=commData.bytes;
      }
    }
  }
  
  /* build the topology of the hardware (abe machine here)*/   
  tm_topology_t *topology=build_abe_topology(nb_procs);
  display_topology(topology);
  /* compute the affinity tree */
  tree_t *comm_tree=build_tree_from_topology(topology,comm_mat,nb_procs,NULL,NULL);
  
  /* Compute the processor permutation*/
  permutation=(int*)malloc(sizeof(int)*nb_procs);
  map_topology_simple(topology,comm_tree,permutation,NULL);


  /* 
     Apply this perutation to all objects
     Side effect: object_mapping points to the stats->to_proc.getVec() 
     So, these lines change also stats->to_proc.getVec()
  */
  for(i=0;i<nb_procs;i++)
    object_mapping[i]=permutation[object_mapping[i]];

  // free communication matrix;
  for(i=0;i<nb_procs;i++){
      free(comm_mat[i]);
  }
  free(comm_mat);
  free_topology(topology);
}