// default load balancing strategy LBMigrateMsg* CentralLB::Strategy(LDStats* stats) { #if CMK_LBDB_ON double strat_start_time = CkWallTimer(); if (_lb_args.debug()) CkPrintf("CharmLB> %s: PE [%d] strategy starting at %f\n", lbname, cur_ld_balancer, strat_start_time); work(stats); if (_lb_args.debug()>2) { CkPrintf("CharmLB> Obj Map:\n"); for (int i=0; i<stats->n_objs; i++) CkPrintf("%d ", stats->to_proc[i]); CkPrintf("\n"); } LBMigrateMsg *msg = createMigrateMsg(stats); /* Extra feature for MetaBalancer if (_lb_args.metaLbOn()) { int clients = CkNumPes(); LBInfo info(clients); getPredictedLoadWithMsg(stats, clients, msg, info, 0); LBRealType mLoad, mCpuLoad, totalLoad, totalLoadWComm; info.getSummary(mLoad, mCpuLoad, totalLoad); theLbdb->UpdateDataAfterLB(mLoad, mCpuLoad, totalLoad/clients); } */ if (_lb_args.debug()) { double strat_end_time = CkWallTimer(); envelope *env = UsrToEnv(msg); double lbdbMemsize = LBDatabase::Object()->useMem()/1000; CkPrintf("CharmLB> %s: PE [%d] Memory: LBManager: %d KB CentralLB: %d KB\n", lbname, cur_ld_balancer, (int)lbdbMemsize, (int)(useMem()/1000)); CkPrintf("CharmLB> %s: PE [%d] #Objects migrating: %d, LBMigrateMsg size: %.2f MB\n", lbname, cur_ld_balancer, msg->n_moves, env->getTotalsize()/1024.0/1024.0); CkPrintf("CharmLB> %s: PE [%d] strategy finished at %f duration %f s\n", lbname, cur_ld_balancer, strat_end_time, strat_end_time-strat_start_time); theLbdb->SetStrategyCost(strat_end_time - strat_start_time); } return msg; #else return NULL; #endif }
void HybridBaseLB::printSummary(LDStats *stats, int count) { double stime = CkWallTimer(); #if 1 if (currentLevel == 1 && stats!=NULL) { LBInfo info(count); info.getInfo(stats, count, 1); // no comm cost double mLoad, mCpuLoad, totalLoad; info.getSummary(mLoad, mCpuLoad, totalLoad); int nmsgs, nbytes; stats->computeNonlocalComm(nmsgs, nbytes); //CkPrintf("[%d] Load Summary: max (with comm): %f max (obj only): %f total: %f on %d processors at step %d useMem: %fKB nonlocal: %d %dKB.\n", CkMyPe(), maxLoad, mCpuLoad, totalLoad, count, step(), (1.0*useMem())/1024, nmsgs, nbytes/1024); thisProxy[0].reportLBQulity(mLoad, mCpuLoad, totalLoad, nmsgs, nbytes/1024); } #endif if (currentLevel == tree->numLevels()-2) { double mem = (1.0*useMem())/1024; thisProxy[0].reportLBMem(mem); } CkPrintf("[%d] Print Summary takes %f seconds. \n", CkMyPe(), CkWallTimer()-stime); }
void CentralLB::LoadBalance() { #if CMK_LBDB_ON int proc; const int clients = CkNumPes(); #if ! USE_REDUCTION // build data buildStats(); #else for (proc = 0; proc < clients; proc++) statsMsgsList[proc] = NULL; #endif theLbdb->ResetAdaptive(); if (!_lb_args.samePeSpeed()) statsData->normalize_speed(); if (_lb_args.debug()) CmiPrintf("\nCharmLB> %s: PE [%d] step %d starting at %f Memory: %f MB\n", lbname, cur_ld_balancer, step(), start_lb_time, CmiMemoryUsage()/(1024.0*1024.0)); // if we are in simulation mode read data if (LBSimulation::doSimulation) simulationRead(); char *availVector = LBDatabaseObj()->availVector(); for(proc = 0; proc < clients; proc++) statsData->procs[proc].available = (CmiBool)availVector[proc]; preprocess(statsData); // CkPrintf("Before Calling Strategy\n"); if (_lb_args.printSummary()) { LBInfo info(clients); // not take comm data info.getInfo(statsData, clients, 0); LBRealType mLoad, mCpuLoad, totalLoad; info.getSummary(mLoad, mCpuLoad, totalLoad); int nmsgs, nbytes; statsData->computeNonlocalComm(nmsgs, nbytes); CkPrintf("[%d] Load Summary (before LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024); // if (_lb_args.debug() > 1) { // for (int i=0; i<statsData->n_objs; i++) // CmiPrintf("[%d] %.10f %.10f\n", i, statsData->objData[i].minWall, statsData->objData[i].maxWall); // } } #if CMK_REPLAYSYSTEM LDHandle *loadBalancer_pointers; if (_replaySystem) { loadBalancer_pointers = (LDHandle*)malloc(CkNumPes()*sizeof(LDHandle)); for (int i=0; i<statsData->n_objs; ++i) loadBalancer_pointers[statsData->from_proc[i]] = statsData->objData[i].handle.omhandle.ldb; } #endif LBMigrateMsg* migrateMsg = Strategy(statsData); #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) migrateMsg->step = step(); #endif #if CMK_REPLAYSYSTEM CpdHandleLBMessage(&migrateMsg); if (_replaySystem) { for (int i=0; i<migrateMsg->n_moves; ++i) migrateMsg->moves[i].obj.omhandle.ldb = loadBalancer_pointers[migrateMsg->moves[i].from_pe]; free(loadBalancer_pointers); } #endif LBDatabaseObj()->get_avail_vector(migrateMsg->avail_vector); migrateMsg->next_lb = LBDatabaseObj()->new_lbbalancer(); // if this is the step at which we need to dump the database simulationWrite(); // calculate predicted load // very time consuming though, so only happen when debugging is on if (_lb_args.printSummary()) { LBInfo info(clients); // not take comm data getPredictedLoadWithMsg(statsData, clients, migrateMsg, info, 0); LBRealType mLoad, mCpuLoad, totalLoad; info.getSummary(mLoad, mCpuLoad, totalLoad); int nmsgs, nbytes; statsData->computeNonlocalComm(nmsgs, nbytes); CkPrintf("[%d] Load Summary (after LB): max (with bg load): %f max (obj only): %f average: %f at step %d nonlocal: %d msgs %.2fKB useMem: %.2fKB.\n", CkMyPe(), mLoad, mCpuLoad, totalLoad/clients, step(), nmsgs, 1.0*nbytes/1024, (1.0*useMem())/1024); for (int i=0; i<clients; i++) migrateMsg->expectedLoad[i] = info.peLoads[i]; } DEBUGF(("[%d]calling recv migration\n",CkMyPe())); #if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) lbDecisionCount++; migrateMsg->lbDecisionCount = lbDecisionCount; #endif envelope *env = UsrToEnv(migrateMsg); if (1) { // broadcast thisProxy.ReceiveMigration(migrateMsg); } else { // split the migration for each processor for (int p=0; p<CkNumPes(); p++) { LBMigrateMsg *m = extractMigrateMsg(migrateMsg, p); thisProxy[p].ReceiveMigration(m); } delete migrateMsg; } // Zero out data structures for next cycle // CkPrintf("zeroing out data\n"); statsData->clear(); stats_msg_count=0; #endif }
void CentralLB::simulationRead() { LBSimulation *simResults = NULL, *realResults; LBMigrateMsg *voidMessage = new (0,0,0,0) LBMigrateMsg(); voidMessage->n_moves=0; for ( ;LBSimulation::simStepSize > 0; --LBSimulation::simStepSize, ++LBSimulation::simStep) { // here we are supposed to read the data from the dump database int simFileSize = strlen(LBSimulation::dumpFile) + 4; char *simFileName = (char *)malloc(simFileSize); while (sprintf(simFileName, "%s.%d", LBSimulation::dumpFile, LBSimulation::simStep) >= simFileSize) { free(simFileName); simFileSize+=3; simFileName = (char *)malloc(simFileSize); } readStatsMsgs(simFileName); // allocate simResults (only the first step) if (simResults == NULL) { simResults = new LBSimulation(LBSimulation::simProcs); realResults = new LBSimulation(LBSimulation::simProcs); } else { // should be the same number of procs of the original simulation! if (!LBSimulation::procsChanged) { // it means we have a previous step, so in simResults there is data. // we can now print the real effects of the load balancer during the simulation // or print the difference between the predicted data and the real one. realResults->reset(); // reset to_proc of statsData to be equal to from_proc for (int k=0; k < statsData->n_objs; ++k) statsData->to_proc[k] = statsData->from_proc[k]; findSimResults(statsData, LBSimulation::simProcs, voidMessage, realResults); simResults->PrintDifferences(realResults,statsData); } simResults->reset(); } // now pass it to the strategy routine double startT = CkWallTimer(); preprocess(statsData); CmiPrintf("%s> Strategy starts ... \n", lbname); LBMigrateMsg* migrateMsg = Strategy(statsData); CmiPrintf("%s> Strategy took %fs memory usage: CentralLB: %d KB.\n", lbname, CkWallTimer()-startT, (int)(useMem()/1000)); // now calculate the results of the load balancing simulation findSimResults(statsData, LBSimulation::simProcs, migrateMsg, simResults); // now we have the simulation data, so print it and loop CmiPrintf("Charm++> LBSim: Simulation of load balancing step %d done.\n",LBSimulation::simStep); // **CWL** Officially recording my disdain here for using ints for bool if (LBSimulation::showDecisionsOnly) { simResults->PrintDecisions(migrateMsg, simFileName, LBSimulation::simProcs); } else { simResults->PrintSimulationResults(); } free(simFileName); delete migrateMsg; CmiPrintf("Charm++> LBSim: Passing to the next step\n"); } // deallocate simResults delete simResults; CmiPrintf("Charm++> Exiting...\n"); CkExit(); }
// LDStats data sent to parent contains real PE // LDStats in parent should contain relative PE void HybridBaseLB::Loadbalancing(int atlevel) { int i; CmiAssert(atlevel >= 1); CmiAssert(tree->isroot(CkMyPe(), atlevel)); LevelData *lData = levelData[atlevel]; LDStats *statsData = lData->statsData; CmiAssert(statsData); // at this time, all objects processor location is relative, and // all incoming objects from outside group belongs to the fake root proc. // clear background load if needed if (_lb_args.ignoreBgLoad()) statsData->clearBgLoad(); currentLevel = atlevel; int nclients = lData->nChildren; DEBUGF(("[%d] Calling Strategy ... \n", CkMyPe())); double start_lb_time, strat_end_time; start_lb_time = CkWallTimer(); if ((statsStrategy == SHRINK || statsStrategy == SHRINK_NULL) && atlevel == tree->numLevels()-1) { // no obj and comm data LBVectorMigrateMsg* migrateMsg = VectorStrategy(statsData); strat_end_time = CkWallTimer(); // send to children thisProxy.ReceiveVectorMigration(migrateMsg, nclients, lData->children); } else { LBMigrateMsg* migrateMsg = Strategy(statsData); strat_end_time = CkWallTimer(); // send to children //CmiPrintf("[%d] level: %d nclients:%d children: %d %d\n", CkMyPe(), atlevel, nclients, lData->children[0], lData->children[1]); if (!group1_created) thisProxy.ReceiveMigration(migrateMsg, nclients, lData->children); else { // send in multicast tree thisProxy.ReceiveMigration(migrateMsg, group1); //CkSendMsgBranchGroup(CkIndex_HybridBaseLB::ReceiveMigration(NULL), migrateMsg, thisgroup, group1); } // CkPrintf("[%d] ReceiveMigration takes %f \n", CkMyPe(), CkWallTimer()-strat_end_time); } if (_lb_args.debug()>0){ CkPrintf("[%d] Loadbalancing Level %d (%d children) started at %f, elapsed time %f\n", CkMyPe(), atlevel, lData->nChildren, start_lb_time, strat_end_time-start_lb_time); if (atlevel == tree->numLevels()-1) { CkPrintf("[%d] %s memUsage: %.2fKB\n", CkMyPe(), lbName(), (1.0*useMem())/1024); } } // inform new objects that are from outside group if (atlevel < tree->numLevels()-1) { for (i=0; i<statsData->n_objs; i++) { CmiAssert(statsData->from_proc[i] != -1); // ??? if (statsData->from_proc[i] == nclients) { // from outside CmiAssert(statsData->to_proc[i] < nclients); int tope = lData->children[statsData->to_proc[i]]; // comm data CkVec<LDCommData> comms; // collectCommData(i, comms, atlevel); thisProxy[tope].ObjMigrated(statsData->objData[i], comms.getVec(), comms.size(), atlevel-1); } } } }
void HbmLB::ResumeClients(int balancing) { #if CMK_LBDB_ON DEBUGF(("[%d] ResumeClients. \n", CkMyPe())); theLbdb->incStep(); // reset LevelData *lData = levelData[0]; lData->clear(); if (CkMyPe() == 0 && balancing) { double end_lb_time = CkWallTimer(); if (_lb_args.debug()) CkPrintf("[%s] Load balancing step %d finished at %f duration %f\n", lbName(), step()-1,end_lb_time,end_lb_time - start_lb_time); } if (balancing && _lb_args.printSummary()) { int count = 1; LBInfo info(count); LDStats *stats = &myStats; info.getInfo(stats, count, 0); // no comm cost LBRealType mLoad, mCpuLoad, totalLoad; info.getSummary(mLoad, mCpuLoad, totalLoad); int nmsgs, nbytes; stats->computeNonlocalComm(nmsgs, nbytes); CkPrintf("[%d] Load with %d objs: max (with comm): %f max (obj only): %f total: %f on %d processors at step %d useMem: %fKB nonlocal: %d %.2fKB.\n", CkMyPe(), stats->n_objs, mLoad, mCpuLoad, totalLoad, count, step()-1, (1.0*useMem())/1024, nmsgs, nbytes/1024.0); thisProxy[0].reportLBQulity(mLoad, mCpuLoad, totalLoad, nmsgs, 1.0*nbytes/1024.0); } // zero out stats theLbdb->ClearLoads(); theLbdb->ResumeClients(); #endif }