Esempio n. 1
0
void CentralLB::ProcessAtSync()
{
#if CMK_LBDB_ON
  if (reduction_started) return;              // reducton in progress

  CmiAssert(CmiNodeAlive(CkMyPe()));
  if (CkMyPe() == cur_ld_balancer) {
    start_lb_time = CkWallTimer();
  }

#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	initMlogLBStep(thisgroup);
#endif

  // build message
  BuildStatsMsg();

#if USE_REDUCTION
    // reduction to get total number of objects and comm
    // so that processor 0 can pre-allocate load balancing database
  int counts[2];
  counts[0] = theLbdb->GetObjDataSz();
  counts[1] = theLbdb->GetCommDataSz();

  CkCallback cb(CkIndex_CentralLB::ReceiveCounts((CkReductionMsg*)NULL), 
                  thisProxy[0]);
  contribute(2*sizeof(int), counts, CkReduction::sum_int, cb);
  reduction_started = 1;
#else
  SendStats();
#endif
#endif
}
Esempio n. 2
0
void CentralLB::endMigrationDone(int balancing){
    DEBUGF(("[%d] CentralLB::endMigrationDone step %d\n",CkMyPe(),step()));


  if (balancing && _lb_args.syncResume()) {
    CkCallback cb(CkIndex_CentralLB::ResumeClients((CkReductionMsg*)NULL),
                  thisProxy);
    contribute(0, NULL, CkReduction::sum_int, cb);
  }
  else{
    if(CmiNodeAlive(CkMyPe())){
    DEBUGF(("[%d] Sending ResumeClients balancing %d \n",CkMyPe(),balancing));
    thisProxy [CkMyPe()].ResumeClients(balancing);
    }
  }

}
Esempio n. 3
0
void CentralLB::AtSync()
{
#if CMK_LBDB_ON
  DEBUGF(("[%d] CentralLB AtSync step %d!!!!!\n",CkMyPe(),step()));
#if CMK_MEM_CHECKPOINT	
  CkSetInLdb();
#endif
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	CpvAccess(_currentObj)=this;
#endif

  // if num of processor is only 1, nothing should happen
  if (!QueryBalanceNow(step()) || CkNumPes() == 1) {
    MigrationDone(0);
    return;
  }
  if(CmiNodeAlive(CkMyPe())){
    thisProxy [CkMyPe()].ProcessAtSync();
  }
#endif
}
Esempio n. 4
0
void CentralLB::MigrationDone(int balancing)
{
#if CMK_LBDB_ON
  migrates_completed = 0;
  migrates_expected = -1;
  // clear load stats
  if (balancing) theLbdb->ClearLoads();
  // Increment to next step
  theLbdb->incStep();
	DEBUGF(("[%d] Incrementing Step %d \n",CkMyPe(),step()));
  // if sync resume, invoke a barrier

#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
    savedBalancing = balancing;
    startLoadBalancingMlog(&resumeCentralLbAfterChkpt,(void *)this);
#endif

  LBDatabase::Object()->MigrationDone();    // call registered callbacks

  LoadbalanceDone(balancing);        // callback
#if (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_))
  // if sync resume invoke a barrier
  if (balancing && _lb_args.syncResume()) {
    CkCallback cb(CkIndex_CentralLB::ResumeClients((CkReductionMsg*)NULL), 
                  thisProxy);
    contribute(0, NULL, CkReduction::sum_int, cb);
  }
  else{	
    if(CmiNodeAlive(CkMyPe())){
	thisProxy [CkMyPe()].ResumeClients(balancing);
    }	
  }	
#if CMK_GRID_QUEUE_AVAILABLE
  CmiGridQueueDeregisterAll ();
  CpvAccess(CkGridObject) = NULL;
#endif
#endif 
#endif
}
Esempio n. 5
0
void CentralLB::ProcessReceiveMigration(CkReductionMsg  *msg)
{
#if CMK_LBDB_ON
	int i;
        LBMigrateMsg *m = storedMigrateMsg;
        CmiAssert(m!=NULL);
        delete msg;

#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	int *dummyCounts;

	DEBUGF(("[%d] Starting ReceiveMigration WITH step %d m->step %d\n",CkMyPe(),step(),m->step));
	// CmiPrintf("[%d] Starting ReceiveMigration step %d m->step %d\n",CkMyPe(),step(),m->step);
	if(step() > m->step){
		char str[100];
		envelope *env = UsrToEnv(m);
		return;
	}
	lbDecisionCount = m->lbDecisionCount;
#endif

  if (_lb_args.debug() > 1) 
    if (CkMyPe()%1024==0) CmiPrintf("[%d] Starting ReceiveMigration step %d at %f\n",CkMyPe(),step(), CmiWallTimer());

  for (i=0; i<CkNumPes(); i++) theLbdb->lastLBInfo.expectedLoad[i] = m->expectedLoad[i];
  CmiAssert(migrates_expected <= 0 || migrates_completed == migrates_expected);
/*FAULT_EVAC*/
  if(!CmiNodeAlive(CkMyPe())){
	delete m;
	return;
  }
  migrates_expected = 0;
  future_migrates_expected = 0;
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	int sending=0;
    int dummy=0;
	LBDB *_myLBDB = theLbdb->getLBDB();
	if(_restartFlag){
        dummyCounts = new int[CmiNumPes()];
        bzero(dummyCounts,sizeof(int)*CmiNumPes());
    }
#endif
  for(i=0; i < m->n_moves; i++) {
    MigrateInfo& move = m->moves[i];
    const int me = CkMyPe();
    if (move.from_pe == me && move.to_pe != me) {
      DEBUGF(("[%d] migrating object to %d\n",move.from_pe,move.to_pe));
      // migrate object, in case it is already gone, inform toPe
#if (!defined(_FAULT_MLOG_) && !defined(_FAULT_CAUSAL_))
      if (theLbdb->Migrate(move.obj,move.to_pe) == 0) 
         thisProxy[move.to_pe].MissMigrate(!move.async_arrival);
#else
            if(_restartFlag == 0){
                DEBUG(CmiPrintf("[%d] need to move object from %d to %d \n",CkMyPe(),move.from_pe,move.to_pe));
                theLbdb->Migrate(move.obj,move.to_pe);
                sending++;
            }else{
                if(_myLBDB->validObjHandle(move.obj)){
                    DEBUG(CmiPrintf("[%d] need to move object from %d to %d \n",CkMyPe(),move.from_pe,move.to_pe));
                    theLbdb->Migrate(move.obj,move.to_pe);
                    sending++;
                }else{
                    DEBUG(CmiPrintf("[%d] dummy move to pe %d detected after restart \n",CmiMyPe(),move.to_pe));
                    dummyCounts[move.to_pe]++;
                    dummy++;
                }
            }
#endif
    } else if (move.from_pe != me && move.to_pe == me) {
       DEBUGF(("[%d] expecting object from %d\n",move.to_pe,move.from_pe));
      if (!move.async_arrival) migrates_expected++;
      else future_migrates_expected++;
    }
    else {
#if CMK_GLOBAL_LOCATION_UPDATE      
      UpdateLocation(move); 
#endif
    }

  }
  DEBUGF(("[%d] in ReceiveMigration %d moves expected: %d future expected: %d\n",CkMyPe(),m->n_moves, migrates_expected, future_migrates_expected));
  // if (_lb_debug) CkPrintf("[%d] expecting %d objects migrating.\n", CkMyPe(), migrates_expected);

#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	if(_restartFlag){
		sendDummyMigrationCounts(dummyCounts);
		_restartFlag  =0;
    	delete []dummyCounts;
	}
#endif


#if 0
  if (m->n_moves ==0) {
    theLbdb->SetLBPeriod(theLbdb->GetLBPeriod()*2);
  }
#endif
  cur_ld_balancer = m->next_lb;
  if((CkMyPe() == cur_ld_balancer) && (cur_ld_balancer != 0)){
      LBDatabaseObj()->set_avail_vector(m->avail_vector, -2);
  }

  if (migrates_expected == 0 || migrates_completed == migrates_expected)
    MigrationDone(1);
  delete m;

//	CkEvacuatedElement();
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
//  migrates_expected = 0;
//  //  ResumeClients(1);
#endif
#endif
}
Esempio n. 6
0
void CentralLB::ReceiveStats(CkMarshalledCLBStatsMessage &msg)
{
#if CMK_LBDB_ON
  if (statsMsgsList == NULL) {
    statsMsgsList = new CLBStatsMsg*[CkNumPes()];
    CmiAssert(statsMsgsList != NULL);
    for(int i=0; i < CkNumPes(); i++)
      statsMsgsList[i] = 0;
  }
  if (statsData == NULL) statsData = new LDStats;

    //  loop through all CLBStatsMsg in the incoming msg
  int count = msg.getCount();
  for (int num = 0; num < count; num++) 
  {
    CLBStatsMsg *m = msg.getMessage(num);
    CmiAssert(m!=NULL);
    const int pe = m->from_pe;
    DEBUGF(("Stats msg received, %d %d %d %p step %d\n", pe,stats_msg_count,m->n_objs,m,step()));
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))     
/*      
 *  if(m->step < step()){
 *    //TODO: if a processor is redoing an old load balance step..
 *    //tell it that the step is done and that it should not perform any migrations
 *      thisProxy[pe].ReceiveDummyMigration();
 *  }*/
#endif
	
    if(!CmiNodeAlive(pe)){
	DEBUGF(("[%d] ReceiveStats called from invalidProcessor %d\n",CkMyPe(),pe));
	continue;
    }
	
    if (m->avail_vector!=NULL) {
      LBDatabaseObj()->set_avail_vector(m->avail_vector,  m->next_lb);
    }

    if (statsMsgsList[pe] != 0) {
      CkPrintf("*** Unexpected CLBStatsMsg in ReceiveStats from PE %d ***\n",
	     pe);
    } else {
      statsMsgsList[pe] = m;
#if USE_REDUCTION
      depositData(m);
#else
      // store per processor data right away
      struct ProcStats &procStat = statsData->procs[pe];
      procStat.pe = pe;
      procStat.total_walltime = m->total_walltime;
      procStat.idletime = m->idletime;
      procStat.bg_walltime = m->bg_walltime;
#if CMK_LB_CPUTIMER
      procStat.total_cputime = m->total_cputime;
      procStat.bg_cputime = m->bg_cputime;
#endif
      procStat.pe_speed = m->pe_speed;
      //procStat.utilization = 1.0;
      procStat.available = CmiTrue;
      procStat.n_objs = m->n_objs;

      statsData->n_objs += m->n_objs;
      statsData->n_comm += m->n_comm;
#endif
#if defined(TEMP_LDB)
			procStat.pe_temp=m->pe_temp;
			procStat.pe_speed=m->pe_speed;
#endif

      stats_msg_count++;
    }
  }    // end of for

  const int clients = CkNumValidPes();
  DEBUGF(("THIS POINT count = %d, clients = %d\n",stats_msg_count,clients));
 
  if (stats_msg_count == clients) {
	DEBUGF(("[%d] All stats messages received \n",CmiMyPe()));
    statsData->nprocs() = stats_msg_count;
    thisProxy[CkMyPe()].LoadBalance();
  }
#endif
}
Esempio n. 7
0
static void _exitHandler(envelope *env)
{
  DEBUGF(("exitHandler called on %d msgtype: %d\n", CkMyPe(), env->getMsgtype()));
  switch(env->getMsgtype()) {
    case StartExitMsg:
      CkAssert(CkMyPe()==0);
      if (!_CkExitFnVec.isEmpty()) {
        CkExitFn fn = _CkExitFnVec.deq();
        fn();
        break;
      }
      // else goto next
    case ExitMsg:
      CkAssert(CkMyPe()==0);
      if(_exitStarted) {
        CmiFree(env);
        return;
      }
      _exitStarted = 1;
      CkNumberHandler(_charmHandlerIdx,(CmiHandler)_discardHandler);
      CkNumberHandler(_bocHandlerIdx, (CmiHandler)_discardHandler);
      env->setMsgtype(ReqStatMsg);
      env->setSrcPe(CkMyPe());
      // if exit in ring, instead of broadcasting, send in ring
      if (_ringexit){
	DEBUGF(("[%d] Ring Exit \n",CkMyPe()));
        const int stride = CkNumPes()/_ringtoken;
        int pe = 0;
        while (pe<CkNumPes()) {
          CmiSyncSend(pe, env->getTotalsize(), (char *)env);
          pe += stride;
        }
        CmiFree(env);
      }else{
	CmiSyncBroadcastAllAndFree(env->getTotalsize(), (char *)env);
      }	
      break;
    case ReqStatMsg:
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
      _messageLoggingExit();
#endif
      DEBUGF(("ReqStatMsg on %d\n", CkMyPe()));
      CkNumberHandler(_charmHandlerIdx,(CmiHandler)_discardHandler);
      CkNumberHandler(_bocHandlerIdx, (CmiHandler)_discardHandler);
      /*FAULT_EVAC*/
      if(CmiNodeAlive(CkMyPe())){
#if CMK_WITH_STATS
         _sendStats();
#endif
      _mainDone = 1; // This is needed because the destructors for
                     // readonly variables will be called when the program
		     // exits. If the destructor is called while _mainDone
		     // is 0, it will assume that the readonly variable was
		     // declared locally. On all processors other than 0, 
		     // _mainDone is never set to 1 before the program exits.
#if CMK_TRACE_ENABLED
      if (_ringexit) traceClose();
#endif
    }
      if (_ringexit) {
        int stride = CkNumPes()/_ringtoken;
        int pe = CkMyPe()+1;
        if (pe < CkNumPes() && pe % stride != 0)
          CmiSyncSendAndFree(pe, env->getTotalsize(), (char *)env);
        else
          CmiFree(env);
      }
      else
        CmiFree(env);
      //everyone exits here - there may be issues with leftover messages in the queue
#if CMK_WITH_STATS
      if(CkMyPe())
#endif
      {
        DEBUGF(("[%d] Calling converse exit \n",CkMyPe()));
        ConverseExit();
        if(CharmLibInterOperate)
          CpvAccess(interopExitFlag) = 1;
      }
      break;
#if CMK_WITH_STATS
    case StatMsg:
      CkAssert(CkMyPe()==0);
      _allStats[env->getSrcPe()] = (Stats*) EnvToUsr(env);
      _numStatsRecd++;
      DEBUGF(("StatMsg on %d with %d\n", CkMyPe(), _numStatsRecd));
			/*FAULT_EVAC*/
      if(_numStatsRecd==CkNumValidPes()) {
        _printStats();
        DEBUGF(("[%d] Calling converse exit \n",CkMyPe()));
        ConverseExit();
        if(CharmLibInterOperate)
          CpvAccess(interopExitFlag) = 1;
      }
      break;
#endif
    default:
      CmiAbort("Internal Error(_exitHandler): Unknown-msg-type. Contact Developers.\n");
  }
}