Beispiel #1
0
// default load balancing strategy
LBMigrateMsg* CentralLB::Strategy(LDStats* stats)
{
#if CMK_LBDB_ON
  double strat_start_time = CkWallTimer();
  if (_lb_args.debug())
    CkPrintf("CharmLB> %s: PE [%d] strategy starting at %f\n", lbname, cur_ld_balancer, strat_start_time);

  work(stats);


  if (_lb_args.debug()>2)  {
    CkPrintf("CharmLB> Obj Map:\n");
    for (int i=0; i<stats->n_objs; i++) CkPrintf("%d ", stats->to_proc[i]);
    CkPrintf("\n");
  }

  LBMigrateMsg *msg = createMigrateMsg(stats);

	/* Extra feature for MetaBalancer
  if (_lb_args.metaLbOn()) {
    int clients = CkNumPes();
    LBInfo info(clients);
    getPredictedLoadWithMsg(stats, clients, msg, info, 0);
    LBRealType mLoad, mCpuLoad, totalLoad, totalLoadWComm;
    info.getSummary(mLoad, mCpuLoad, totalLoad);
    theLbdb->UpdateDataAfterLB(mLoad, mCpuLoad, totalLoad/clients);
  }
	*/

  if (_lb_args.debug()) {
    double strat_end_time = CkWallTimer();
    envelope *env = UsrToEnv(msg);

    double lbdbMemsize = LBDatabase::Object()->useMem()/1000;
    CkPrintf("CharmLB> %s: PE [%d] Memory: LBManager: %d KB CentralLB: %d KB\n",
  	      lbname, cur_ld_balancer, (int)lbdbMemsize, (int)(useMem()/1000));
    CkPrintf("CharmLB> %s: PE [%d] #Objects migrating: %d, LBMigrateMsg size: %.2f MB\n", lbname, cur_ld_balancer, msg->n_moves, env->getTotalsize()/1024.0/1024.0);
    CkPrintf("CharmLB> %s: PE [%d] strategy finished at %f duration %f s\n",
	      lbname, cur_ld_balancer, strat_end_time, strat_end_time-strat_start_time);
    theLbdb->SetStrategyCost(strat_end_time - strat_start_time);
  }
  return msg;
#else
  return NULL;
#endif
}
Beispiel #2
0
void HybridBaseLB::printSummary(LDStats *stats, int count)
{
  double stime = CkWallTimer();
#if 1
  if (currentLevel == 1 && stats!=NULL) {
      LBInfo info(count);
      info.getInfo(stats, count, 1);	// no comm cost
      double mLoad, mCpuLoad, totalLoad;
      info.getSummary(mLoad, mCpuLoad, totalLoad);
      int nmsgs, nbytes;
      stats->computeNonlocalComm(nmsgs, nbytes);
      //CkPrintf("[%d] Load Summary: max (with comm): %f max (obj only): %f total: %f on %d processors at step %d useMem: %fKB nonlocal: %d %dKB.\n", CkMyPe(), maxLoad, mCpuLoad, totalLoad, count, step(), (1.0*useMem())/1024, nmsgs, nbytes/1024);
      thisProxy[0].reportLBQulity(mLoad, mCpuLoad, totalLoad, nmsgs, nbytes/1024);
  }
#endif

  if (currentLevel == tree->numLevels()-2) {
      double mem = (1.0*useMem())/1024;
      thisProxy[0].reportLBMem(mem);
  }
  CkPrintf("[%d] Print Summary takes %f seconds. \n", CkMyPe(), CkWallTimer()-stime);
}
Beispiel #3
0
void CentralLB::LoadBalance()
{
#if CMK_LBDB_ON
  int proc;
  const int clients = CkNumPes();

#if ! USE_REDUCTION
  // build data
  buildStats();
#else
  for (proc = 0; proc < clients; proc++) statsMsgsList[proc] = NULL;
#endif

  theLbdb->ResetAdaptive();
  if (!_lb_args.samePeSpeed()) statsData->normalize_speed();

  if (_lb_args.debug()) 
      CmiPrintf("\nCharmLB> %s: PE [%d] step %d starting at %f Memory: %f MB\n",
		  lbname, cur_ld_balancer, step(), start_lb_time,
		  CmiMemoryUsage()/(1024.0*1024.0));

  // if we are in simulation mode read data
  if (LBSimulation::doSimulation) simulationRead();

  char *availVector = LBDatabaseObj()->availVector();
  for(proc = 0; proc < clients; proc++)
      statsData->procs[proc].available = (CmiBool)availVector[proc];

  preprocess(statsData);

//    CkPrintf("Before Calling Strategy\n");

  if (_lb_args.printSummary()) {
      LBInfo info(clients);
        // not take comm data
      info.getInfo(statsData, clients, 0);
      LBRealType mLoad, mCpuLoad, totalLoad;
      info.getSummary(mLoad, mCpuLoad, totalLoad);
      int nmsgs, nbytes;
      statsData->computeNonlocalComm(nmsgs, nbytes);
      CkPrintf("[%d] Load Summary (before LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024);
//      if (_lb_args.debug() > 1) {
//        for (int i=0; i<statsData->n_objs; i++)
//          CmiPrintf("[%d] %.10f %.10f\n", i, statsData->objData[i].minWall, statsData->objData[i].maxWall);
//      }
  }

#if CMK_REPLAYSYSTEM
  LDHandle *loadBalancer_pointers;
  if (_replaySystem) {
    loadBalancer_pointers = (LDHandle*)malloc(CkNumPes()*sizeof(LDHandle));
    for (int i=0; i<statsData->n_objs; ++i) loadBalancer_pointers[statsData->from_proc[i]] = statsData->objData[i].handle.omhandle.ldb;
  }
#endif
  
  LBMigrateMsg* migrateMsg = Strategy(statsData);
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
	migrateMsg->step = step();
#endif

#if CMK_REPLAYSYSTEM
  CpdHandleLBMessage(&migrateMsg);
  if (_replaySystem) {
    for (int i=0; i<migrateMsg->n_moves; ++i) migrateMsg->moves[i].obj.omhandle.ldb = loadBalancer_pointers[migrateMsg->moves[i].from_pe];
    free(loadBalancer_pointers);
  }
#endif
  
  LBDatabaseObj()->get_avail_vector(migrateMsg->avail_vector);
  migrateMsg->next_lb = LBDatabaseObj()->new_lbbalancer();

  // if this is the step at which we need to dump the database
  simulationWrite();

//  calculate predicted load
//  very time consuming though, so only happen when debugging is on
  if (_lb_args.printSummary()) {
      LBInfo info(clients);
        // not take comm data
      getPredictedLoadWithMsg(statsData, clients, migrateMsg, info, 0);
      LBRealType mLoad, mCpuLoad, totalLoad;
      info.getSummary(mLoad, mCpuLoad, totalLoad);
      int nmsgs, nbytes;
      statsData->computeNonlocalComm(nmsgs, nbytes);
      CkPrintf("[%d] Load Summary (after LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB useMem: %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024, (1.0*useMem())/1024);
      for (int i=0; i<clients; i++)
        migrateMsg->expectedLoad[i] = info.peLoads[i];
  }

  DEBUGF(("[%d]calling recv migration\n",CkMyPe()));
#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) 
    lbDecisionCount++;
    migrateMsg->lbDecisionCount = lbDecisionCount;
#endif

  envelope *env = UsrToEnv(migrateMsg);
  if (1) {
      // broadcast
    thisProxy.ReceiveMigration(migrateMsg);
  }
  else {
    // split the migration for each processor
    for (int p=0; p<CkNumPes(); p++) {
      LBMigrateMsg *m = extractMigrateMsg(migrateMsg, p);
      thisProxy[p].ReceiveMigration(m);
    }
    delete migrateMsg;
  }

  // Zero out data structures for next cycle
  // CkPrintf("zeroing out data\n");
  statsData->clear();
  stats_msg_count=0;
#endif
}
Beispiel #4
0
void CentralLB::simulationRead() {
  LBSimulation *simResults = NULL, *realResults;
  LBMigrateMsg *voidMessage = new (0,0,0,0) LBMigrateMsg();
  voidMessage->n_moves=0;
  for ( ;LBSimulation::simStepSize > 0; --LBSimulation::simStepSize, ++LBSimulation::simStep) {
    // here we are supposed to read the data from the dump database
    int simFileSize = strlen(LBSimulation::dumpFile) + 4;
    char *simFileName = (char *)malloc(simFileSize);
    while (sprintf(simFileName, "%s.%d", LBSimulation::dumpFile, LBSimulation::simStep) >= simFileSize) {
      free(simFileName);
      simFileSize+=3;
      simFileName = (char *)malloc(simFileSize);
    }
    readStatsMsgs(simFileName);

    // allocate simResults (only the first step)
    if (simResults == NULL) {
      simResults = new LBSimulation(LBSimulation::simProcs);
      realResults = new LBSimulation(LBSimulation::simProcs);
    }
    else {
      // should be the same number of procs of the original simulation!
      if (!LBSimulation::procsChanged) {
	// it means we have a previous step, so in simResults there is data.
	// we can now print the real effects of the load balancer during the simulation
	// or print the difference between the predicted data and the real one.
	realResults->reset();
	// reset to_proc of statsData to be equal to from_proc
	for (int k=0; k < statsData->n_objs; ++k) statsData->to_proc[k] = statsData->from_proc[k];
	findSimResults(statsData, LBSimulation::simProcs, voidMessage, realResults);
	simResults->PrintDifferences(realResults,statsData);
      }
      simResults->reset();
    }

    // now pass it to the strategy routine
    double startT = CkWallTimer();
    preprocess(statsData);
    CmiPrintf("%s> Strategy starts ... \n", lbname);
    LBMigrateMsg* migrateMsg = Strategy(statsData);
    CmiPrintf("%s> Strategy took %fs memory usage: CentralLB: %d KB.\n",
               lbname, CkWallTimer()-startT, (int)(useMem()/1000));

    // now calculate the results of the load balancing simulation
    findSimResults(statsData, LBSimulation::simProcs, migrateMsg, simResults);

    // now we have the simulation data, so print it and loop
    CmiPrintf("Charm++> LBSim: Simulation of load balancing step %d done.\n",LBSimulation::simStep);
    // **CWL** Officially recording my disdain here for using ints for bool
    if (LBSimulation::showDecisionsOnly) {
      simResults->PrintDecisions(migrateMsg, simFileName, 
				 LBSimulation::simProcs);
    } else {
      simResults->PrintSimulationResults();
    }

    free(simFileName);
    delete migrateMsg;
    CmiPrintf("Charm++> LBSim: Passing to the next step\n");
  }
  // deallocate simResults
  delete simResults;
  CmiPrintf("Charm++> Exiting...\n");
  CkExit();
}
Beispiel #5
0
//  LDStats data sent to parent contains real PE
//  LDStats in parent should contain relative PE
void HybridBaseLB::Loadbalancing(int atlevel)
{
  int i;

  CmiAssert(atlevel >= 1);
  CmiAssert(tree->isroot(CkMyPe(), atlevel));

  LevelData *lData = levelData[atlevel];
  LDStats *statsData = lData->statsData;
  CmiAssert(statsData);

  // at this time, all objects processor location is relative, and 
  // all incoming objects from outside group belongs to the fake root proc.

  // clear background load if needed
  if (_lb_args.ignoreBgLoad()) statsData->clearBgLoad();

  currentLevel = atlevel;
  int nclients = lData->nChildren;

  DEBUGF(("[%d] Calling Strategy ... \n", CkMyPe()));
  double start_lb_time, strat_end_time;
  start_lb_time = CkWallTimer();

  if ((statsStrategy == SHRINK || statsStrategy == SHRINK_NULL) && atlevel == tree->numLevels()-1) {
    // no obj and comm data
    LBVectorMigrateMsg* migrateMsg = VectorStrategy(statsData);
    strat_end_time = CkWallTimer();

    // send to children 
    thisProxy.ReceiveVectorMigration(migrateMsg, nclients, lData->children);
  }
  else {
    LBMigrateMsg* migrateMsg = Strategy(statsData);
    strat_end_time = CkWallTimer();

    // send to children 
    //CmiPrintf("[%d] level: %d nclients:%d children: %d %d\n", CkMyPe(), atlevel, nclients, lData->children[0], lData->children[1]);
    if (!group1_created)
      thisProxy.ReceiveMigration(migrateMsg, nclients, lData->children);
    else {
        // send in multicast tree
      thisProxy.ReceiveMigration(migrateMsg, group1);
      //CkSendMsgBranchGroup(CkIndex_HybridBaseLB::ReceiveMigration(NULL),  migrateMsg, thisgroup, group1);
    }
    // CkPrintf("[%d] ReceiveMigration takes %f \n", CkMyPe(), CkWallTimer()-strat_end_time);
  }

  if (_lb_args.debug()>0){
    CkPrintf("[%d] Loadbalancing Level %d (%d children) started at %f, elapsed time %f\n", CkMyPe(), atlevel, lData->nChildren, start_lb_time, strat_end_time-start_lb_time);
    if (atlevel == tree->numLevels()-1) {
    	CkPrintf("[%d] %s memUsage: %.2fKB\n", CkMyPe(), lbName(), (1.0*useMem())/1024);
    }
  }

  // inform new objects that are from outside group
  if (atlevel < tree->numLevels()-1) {
    for (i=0; i<statsData->n_objs; i++) {
      CmiAssert(statsData->from_proc[i] != -1);   // ???
      if (statsData->from_proc[i] == nclients)  {    // from outside
        CmiAssert(statsData->to_proc[i] < nclients);
        int tope = lData->children[statsData->to_proc[i]];
        // comm data
        CkVec<LDCommData> comms;
//        collectCommData(i, comms, atlevel);
        thisProxy[tope].ObjMigrated(statsData->objData[i], comms.getVec(), comms.size(), atlevel-1);
      }
    }
  }
}
Beispiel #6
0
void HbmLB::ResumeClients(int balancing)
{
#if CMK_LBDB_ON
  DEBUGF(("[%d] ResumeClients. \n", CkMyPe()));

  theLbdb->incStep();
  // reset 
  LevelData *lData = levelData[0];
  lData->clear();

  if (CkMyPe() == 0 && balancing) {
    double end_lb_time = CkWallTimer();
    if (_lb_args.debug())
      CkPrintf("[%s] Load balancing step %d finished at %f duration %f\n",
	        lbName(), step()-1,end_lb_time,end_lb_time - start_lb_time);
  }
  if (balancing && _lb_args.printSummary()) {
      int count = 1;
      LBInfo info(count);
      LDStats *stats = &myStats;
      info.getInfo(stats, count, 0);	// no comm cost
      LBRealType mLoad, mCpuLoad, totalLoad;
      info.getSummary(mLoad, mCpuLoad, totalLoad);
      int nmsgs, nbytes;
      stats->computeNonlocalComm(nmsgs, nbytes);
      CkPrintf("[%d] Load with %d objs: max (with comm): %f max (obj only): %f total: %f on %d processors at step %d useMem: %fKB nonlocal: %d %.2fKB.\n", CkMyPe(), stats->n_objs, mLoad, mCpuLoad, totalLoad, count, step()-1, (1.0*useMem())/1024, nmsgs, nbytes/1024.0);
      thisProxy[0].reportLBQulity(mLoad, mCpuLoad, totalLoad, nmsgs, 1.0*nbytes/1024.0);
  }

  // zero out stats
  theLbdb->ClearLoads();

  theLbdb->ResumeClients();
#endif
}