LoadBalanceScheme LoadBalancer::DetermineAppropriateScheme(avtContract_p input) { // // See if we have already have decided. If so, just return our cached // decision. // int index = input->GetPipelineIndex(); const LBInfo &lbinfo = pipelineInfo[index]; std::string dbname = lbinfo.db; avtDatabase *db = dbMap[dbname]; avtDataRequest_p data = input->GetDataRequest(); avtDatabaseMetaData *md = db->GetMetaData(db->GetMostRecentTimestep()); string meshName; TRY { meshName = md->MeshForVar(data->GetVariable()); } CATCHALL { // Probably a CMFE. return scheme; } ENDTRY; if (md->GetFormatCanDoDomainDecomposition()) return LOAD_BALANCE_DBPLUGIN_DYNAMIC; const avtMeshMetaData *mmd = md->GetMesh(meshName); if (mmd && mmd->loadBalanceScheme != LOAD_BALANCE_UNKNOWN) { debug1 << "Default load balance scheme \"" << LoadBalanceSchemeToString(scheme).c_str() << "\"" << " being overridden in favor of \"" << LoadBalanceSchemeToString(mmd->loadBalanceScheme).c_str() << "\"" << " for mesh \"" << meshName.c_str() << "\"" << endl; return mmd->loadBalanceScheme; } return scheme; }
std::string LoadBalancer::GetMeshName(avtContract_p input, int stateIndex) { const LBInfo &lbinfo = pipelineInfo[input->GetPipelineIndex()]; avtDatabase *db = dbMap[lbinfo.db]; avtDataRequest_p data = input->GetDataRequest(); avtDatabaseMetaData *md = db->GetMetaData(stateIndex); string meshName; TRY { meshName = md->MeshForVar(data->GetVariable()); } CATCHALL { } ENDTRY return meshName; }
avtDataRequest_p LoadBalancer::Reduce(avtContract_p input) { avtDataRequest_p data = input->GetDataRequest(); // // It is difficult for the load balancer to communicate with the originating // source because it is done through callbacks. // So we do it by setting a Boolean in the contract. Since there is only // one path that involves actually doing data replication, and many that don't, // we will unset the Boolean now and reset it in the case we actually do // data replication. // #ifdef PARALLEL // only used in parallel bool dataReplicationRequested = input->ReplicateSingleDomainOnAllProcessors(); #endif input->SetReplicateSingleDomainOnAllProcessors(false); // // Pipeline index 0 is reserved for meta-data. It should already be // load balanced. // if (input->GetPipelineIndex() == 0) { return data; } // // Assess load balancing specially for serial engines. // if (nProcs <= 1) { bool doDynLB = CheckDynamicLoadBalancing(input); if (!doDynLB && scheme != LOAD_BALANCE_STREAM) { pipelineInfo[input->GetPipelineIndex()].complete = true; return data; } else { avtDataObjectSource::RegisterProgressCallback(NULL,NULL); avtSILRestriction_p orig_silr = data->GetRestriction(); avtSILRestriction_p silr = new avtSILRestriction(orig_silr); avtDataRequest_p new_data = new avtDataRequest(data, silr); avtSILRestrictionTraverser trav(silr); vector<int> list; trav.GetDomainList(list); if (pipelineInfo[input->GetPipelineIndex()].current < 0) pipelineInfo[input->GetPipelineIndex()].current = 0; int domain = list[pipelineInfo[input->GetPipelineIndex()].current]; int sggDomain = avtStreamingGhostGenerator::LBGetNextDomain(); if (sggDomain >= 0) domain = sggDomain; vector<int> domainList(1, domain); new_data->GetRestriction() ->RestrictDomainsForLoadBalance(domainList); UpdateProgress(pipelineInfo[input->GetPipelineIndex()].current, (int)list.size()); pipelineInfo[input->GetPipelineIndex()].current++; if (pipelineInfo[input->GetPipelineIndex()].current == (int)list.size()) pipelineInfo[input->GetPipelineIndex()].complete = true; return new_data; } } #ifdef PARALLEL avtSILRestriction_p orig_silr = data->GetRestriction(); avtSILRestriction_p silr = new avtSILRestriction(orig_silr); avtDataRequest_p new_data = new avtDataRequest(data, silr); avtSILRestrictionTraverser trav(silr); // set up MPI message tags static int lastDomDoneMsg = GetUniqueMessageTag(); static int newDomToDoMsg = GetUniqueMessageTag(); // Make sure that we have domain to file mapping available. LBInfo &lbInfo(pipelineInfo[input->GetPipelineIndex()]); std::string meshName = GetMeshName(input, dbState[lbInfo.db]); GetIOInformation(lbInfo.db, dbState[lbInfo.db], meshName); if (scheme == LOAD_BALANCE_STREAM) { if (pipelineInfo[input->GetPipelineIndex()].current < 0) { pipelineInfo[input->GetPipelineIndex()].current = 0; // // We probably want to do something more sophisticated in the future // (like walking through a SIL). For now, just use the "chunks" // mechanism set up with convenience methods. // vector<int> list; trav.GetDomainList(list); int amountPer = list.size() / nProcs; int oneExtraUntil = list.size() % nProcs; int lastDomain = 0; for (int i = 0 ; i < nProcs ; i++) { if (i == rank) { int amount = amountPer + (i < oneExtraUntil ? 1 : 0); for (int j = 0 ; j < amount ; j++) { domainListForStreaming.push_back(list[j+lastDomain]); } } lastDomain += amountPer + (i < oneExtraUntil ? 1 : 0); } } int domain = domainListForStreaming[pipelineInfo[input->GetPipelineIndex()].current]; int sggDomain = avtStreamingGhostGenerator::LBGetNextDomain(); if (sggDomain >= 0) domain = sggDomain; vector<int> domainList(1, domain); new_data->GetRestriction() ->RestrictDomainsForLoadBalance(domainList); UpdateProgress(pipelineInfo[input->GetPipelineIndex()].current, domainListForStreaming.size()); pipelineInfo[input->GetPipelineIndex()].current++; if (pipelineInfo[input->GetPipelineIndex()].current == (int)domainListForStreaming.size()) { pipelineInfo[input->GetPipelineIndex()].complete = true; domainListForStreaming.clear(); } } // Can we do dynamic load balancing? else if (! CheckDynamicLoadBalancing(input)) { // // We probably want to do something more sophisticated in the future // (like walking through a SIL). For now, just use the "chunks" // mechanism set up with convenience methods. // vector<int> list; vector<int> mylist; trav.GetDomainList(list); if (dataReplicationRequested && list.size() == 1) { silr->RestrictDomainsForLoadBalance(list); pipelineInfo[input->GetPipelineIndex()].complete = true; // Communicate back to the pipeline that we are replicating. input->SetReplicateSingleDomainOnAllProcessors(true); return data; } // // For variables (including meshes) that require specific types of // load balancing, we override the scheme here // LoadBalanceScheme theScheme = DetermineAppropriateScheme(input); if (theScheme == LOAD_BALANCE_CONTIGUOUS_BLOCKS_TOGETHER) { int amountPer = list.size() / nProcs; int oneExtraUntil = list.size() % nProcs; int lastDomain = 0; for (int i = 0 ; i < nProcs ; i++) { if (i == rank) { int amount = amountPer + (i < oneExtraUntil ? 1 : 0); for (int j = 0 ; j < amount ; j++) { mylist.push_back(list[j+lastDomain]); } } lastDomain += amountPer + (i < oneExtraUntil ? 1 : 0); } } else if (theScheme == LOAD_BALANCE_STRIDE_ACROSS_BLOCKS) { for (size_t j = 0 ; j < list.size() ; j++) { if (j % nProcs == (size_t)rank) mylist.push_back(list[j]); } } else if (theScheme == LOAD_BALANCE_ABSOLUTE) { for (size_t j = 0 ; j < list.size() ; j++) { if (list[j] % nProcs == rank) mylist.push_back(list[j]); } } else if (theScheme == LOAD_BALANCE_RESTRICTED) { LBInfo &lbInfo(pipelineInfo[input->GetPipelineIndex()]); IOInfo &ioInfo(ioMap[lbInfo.db]); const HintList &hints(ioInfo.ioInfo.GetHints()); for (size_t j = 0 ; j < list.size() ; j++) { if (hints.size() >= (size_t)rank) { const vector<int> &doms = hints[rank]; int ndoms = doms.size(); for (int h=0; h<ndoms; h++) { if (doms[h] == list[j]) { mylist.push_back(list[j]); break; } } } } } else if (theScheme == LOAD_BALANCE_RANDOM_ASSIGNMENT) { // all procs randomly jumble the list of domain ids // all procs compute same jumbled list due to same seed // [ which won't be true on a heterogeneous platform ] size_t j; vector<int> jumbledList = list; srand(0xDeadBeef); for (j = 0 ; j < list.size() * 5; j++) { int i1 = rand() % list.size(); int i2 = rand() % list.size(); int tmp = jumbledList[i1]; jumbledList[i1] = jumbledList[i2]; jumbledList[i2] = tmp; } // now, do round-robin assignment from the jumbled list for (j = 0 ; j < list.size() ; j++) { if (j % nProcs == (size_t)rank) mylist.push_back(jumbledList[j]); } } else if (theScheme == LOAD_BALANCE_DBPLUGIN_DYNAMIC) { // Every processor gets the complete list mylist = list; } silr->RestrictDomainsForLoadBalance(mylist); pipelineInfo[input->GetPipelineIndex()].complete = true; } else { // disable progress updates from the filters this time around avtDataObjectSource::RegisterProgressCallback(NULL,NULL); LBInfo &lbInfo(pipelineInfo[input->GetPipelineIndex()]); IOInfo &ioInfo(ioMap[lbInfo.db]); if (rank == 0) { // ------------------------------------- // MASTER LOADBALANCER PROCESSES // ------------------------------------- // Allocate enough space to hold the completed domains ioInfo.domains.resize(nProcs); ioInfo.files.resize(nProcs); bool validFileMap = (ioInfo.fileMap.size() != 0); // Get the list of domains to process vector<int> domainList; trav.GetDomainList(domainList); // Make a work list and a completed list size_t totaldomains = domainList.size(); deque<int> incomplete(domainList.begin(), domainList.end()); vector<int> complete; debug5 << "LoadBalancer Master -- starting with " << incomplete.size() << " domains\n"; // pull from the incomplete list and push onto the complete list // until all domains are complete bool abort = false; int domain; UpdateProgress(0,0); while (complete.size() < totaldomains) { // check for an abort if (!abort && CheckAbort(false)) { abort = true; totaldomains -= incomplete.size(); incomplete.clear(); } // update the progress UpdateProgress(complete.size() + (domainList.size() - incomplete.size()), domainList.size()*2); // get the completed domain number MPI_Status stat; MPI_Recv(&domain, 1, MPI_INT, MPI_ANY_SOURCE, lastDomDoneMsg, VISIT_MPI_COMM, &stat); int processor = stat.MPI_SOURCE; // -1 means the first pass by the slave; nothing completed yet if (domain != -1) { // add it to the complete list complete.push_back(domain); } // figure out what to tell this processor to do if (incomplete.empty()) continue; // find a cached domain for next processor deque<int>::iterator i; for (i = incomplete.begin(); i != incomplete.end(); i++) { if (ioInfo.domains[processor].find(*i) != ioInfo.domains[processor].end()) break; } // if no match, try to find one that is in a file // already opened by this processor if (i == incomplete.end()) { for (i = incomplete.begin(); i != incomplete.end(); i++) { int fileno = 0; if (validFileMap) fileno = ioInfo.fileMap[*i]; if (ioInfo.files[processor].count(fileno) > 0) break; } } // if still no match, find one that is in a file // opened by the fewest number of processors if (i == incomplete.end()) { int mindomain = -1; int minopen = 999999999; for (i = incomplete.begin(); i != incomplete.end(); i++) { int fileno = 0; if (validFileMap) fileno = ioInfo.fileMap[*i]; // count the number of processors which have // this file opened int nopen = 0; for (size_t j=0; j<ioInfo.files.size(); j++) if (ioInfo.files[j].count(fileno) > 0) nopen++; if (nopen < minopen) { mindomain = *i; minopen = nopen; } } for (i = incomplete.begin(); i != incomplete.end(); i++) { if (*i == mindomain) break; } } // if no match, just take the next one in line if (i == incomplete.end()) i=incomplete.begin(); domain = *i; incomplete.erase(i); ioInfo.domains[processor].insert(domain); if (validFileMap) ioInfo.files[processor].insert(ioInfo.fileMap[domain]); else ioInfo.files[processor].insert(0); // send the new domain number to that processor debug5 << "LoadBalancer Master: sending domain " << domain << " to processor "<<processor<<"\n"; MPI_Send(&domain, 1, MPI_INT, processor, newDomToDoMsg, VISIT_MPI_COMM); } // we're all done -- -2 means to abort, -1 means to send results int status = abort ? -2 : -1; for (int i=1; i<nProcs; i++) MPI_Send(&status, 1, MPI_INT, i, newDomToDoMsg,VISIT_MPI_COMM); if (abort) EXCEPTION0(AbortException); // all work is done UpdateProgress(1,0); lbInfo.complete = true; new_data->GetRestriction()->TurnOffAll(); MPI_Barrier(VISIT_MPI_COMM); } else { // ------------------------------------- // SLAVE PROCESSES // ------------------------------------- // send our last completed domain to the master int domain = lbInfo.current; MPI_Send(&domain, 1, MPI_INT, 0, lastDomDoneMsg, VISIT_MPI_COMM); // get our new work unit MPI_Status stat; MPI_Recv(&domain, 1, MPI_INT, 0, newDomToDoMsg, VISIT_MPI_COMM, &stat); lbInfo.current = domain; if (domain == -2) { EXCEPTION0(AbortException); } else if (domain == -1) { // -1 is a tag for "no work" -- we are all done lbInfo.complete = true; new_data->GetRestriction()->TurnOffAll(); MPI_Barrier(VISIT_MPI_COMM); } else { vector<int> domainList(1, domain); new_data->GetRestriction() ->RestrictDomainsForLoadBalance(domainList); } } } // By intersecting with the original restriction, we will ensure that // we are catching restrictions beyond domains, like materials, etc. // See comments in SIL restriction code regarding 'FastIntersect'. new_data->GetRestriction()->FastIntersect(orig_silr); return new_data; #else EXCEPTION1(VisItException, "nprocs was > 1 in a non-parallel code"); #endif }
bool LoadBalancer::CheckDynamicLoadBalancing(avtContract_p input) { // // See if we have already have decided. If so, just return our cached // decision. // int index = input->GetPipelineIndex(); LBInfo &lbinfo = pipelineInfo[index]; if (lbinfo.haveInitializedDLB) return lbinfo.doDLB; // // If the user has not explicitly asked for DLB, then don't do it. // if (!allowDynamic) { // Almost always false. lbinfo.doDLB = false || (scheme == LOAD_BALANCE_STREAM); lbinfo.haveInitializedDLB = true; return lbinfo.doDLB; } // // Some hard and fast rules: // // Pipeline index 0 is reserved for meta-data and inlined pipelines. So // no DLB for those. // // Cannot dynamic load balance some pipelines because of the filters // in the pipeline. // // We cannot do dynamic load balancing if the database does not believe // we can do dynamic load balancing (for example because we need ghost // data communicated or materials reconstructed). // avtDataRequest_p data = input->GetDataRequest(); std::string dbname = lbinfo.db; avtDatabase *db = dbMap[dbname]; if (input->GetPipelineIndex() == 0 || input->ShouldUseStreaming() == false || db->CanDoStreaming(data) == false) { lbinfo.doDLB = false; lbinfo.haveInitializedDLB = true; return false; } // // Don't do DLB if we have 2 or 3 procs. It's not worth it. // if (nProcs == 2 || nProcs == 3) { lbinfo.doDLB = false; lbinfo.haveInitializedDLB = true; return false; } // // The user has asked for DLB. And nothing in the pipeline is prevent it. // Do it! // lbinfo.doDLB = true; lbinfo.haveInitializedDLB = true; return true; }