avtDataRequest_p LoadBalancer::Reduce(avtContract_p input) { avtDataRequest_p data = input->GetDataRequest(); // // It is difficult for the load balancer to communicate with the originating // source because it is done through callbacks. // So we do it by setting a Boolean in the contract. Since there is only // one path that involves actually doing data replication, and many that don't, // we will unset the Boolean now and reset it in the case we actually do // data replication. // #ifdef PARALLEL // only used in parallel bool dataReplicationRequested = input->ReplicateSingleDomainOnAllProcessors(); #endif input->SetReplicateSingleDomainOnAllProcessors(false); // // Pipeline index 0 is reserved for meta-data. It should already be // load balanced. // if (input->GetPipelineIndex() == 0) { return data; } // // Assess load balancing specially for serial engines. // if (nProcs <= 1) { bool doDynLB = CheckDynamicLoadBalancing(input); if (!doDynLB && scheme != LOAD_BALANCE_STREAM) { pipelineInfo[input->GetPipelineIndex()].complete = true; return data; } else { avtDataObjectSource::RegisterProgressCallback(NULL,NULL); avtSILRestriction_p orig_silr = data->GetRestriction(); avtSILRestriction_p silr = new avtSILRestriction(orig_silr); avtDataRequest_p new_data = new avtDataRequest(data, silr); avtSILRestrictionTraverser trav(silr); vector<int> list; trav.GetDomainList(list); if (pipelineInfo[input->GetPipelineIndex()].current < 0) pipelineInfo[input->GetPipelineIndex()].current = 0; int domain = list[pipelineInfo[input->GetPipelineIndex()].current]; int sggDomain = avtStreamingGhostGenerator::LBGetNextDomain(); if (sggDomain >= 0) domain = sggDomain; vector<int> domainList(1, domain); new_data->GetRestriction() ->RestrictDomainsForLoadBalance(domainList); UpdateProgress(pipelineInfo[input->GetPipelineIndex()].current, (int)list.size()); pipelineInfo[input->GetPipelineIndex()].current++; if (pipelineInfo[input->GetPipelineIndex()].current == (int)list.size()) pipelineInfo[input->GetPipelineIndex()].complete = true; return new_data; } } #ifdef PARALLEL avtSILRestriction_p orig_silr = data->GetRestriction(); avtSILRestriction_p silr = new avtSILRestriction(orig_silr); avtDataRequest_p new_data = new avtDataRequest(data, silr); avtSILRestrictionTraverser trav(silr); // set up MPI message tags static int lastDomDoneMsg = GetUniqueMessageTag(); static int newDomToDoMsg = GetUniqueMessageTag(); // Make sure that we have domain to file mapping available. LBInfo &lbInfo(pipelineInfo[input->GetPipelineIndex()]); std::string meshName = GetMeshName(input, dbState[lbInfo.db]); GetIOInformation(lbInfo.db, dbState[lbInfo.db], meshName); if (scheme == LOAD_BALANCE_STREAM) { if (pipelineInfo[input->GetPipelineIndex()].current < 0) { pipelineInfo[input->GetPipelineIndex()].current = 0; // // We probably want to do something more sophisticated in the future // (like walking through a SIL). For now, just use the "chunks" // mechanism set up with convenience methods. // vector<int> list; trav.GetDomainList(list); int amountPer = list.size() / nProcs; int oneExtraUntil = list.size() % nProcs; int lastDomain = 0; for (int i = 0 ; i < nProcs ; i++) { if (i == rank) { int amount = amountPer + (i < oneExtraUntil ? 1 : 0); for (int j = 0 ; j < amount ; j++) { domainListForStreaming.push_back(list[j+lastDomain]); } } lastDomain += amountPer + (i < oneExtraUntil ? 1 : 0); } } int domain = domainListForStreaming[pipelineInfo[input->GetPipelineIndex()].current]; int sggDomain = avtStreamingGhostGenerator::LBGetNextDomain(); if (sggDomain >= 0) domain = sggDomain; vector<int> domainList(1, domain); new_data->GetRestriction() ->RestrictDomainsForLoadBalance(domainList); UpdateProgress(pipelineInfo[input->GetPipelineIndex()].current, domainListForStreaming.size()); pipelineInfo[input->GetPipelineIndex()].current++; if (pipelineInfo[input->GetPipelineIndex()].current == (int)domainListForStreaming.size()) { pipelineInfo[input->GetPipelineIndex()].complete = true; domainListForStreaming.clear(); } } // Can we do dynamic load balancing? else if (! CheckDynamicLoadBalancing(input)) { // // We probably want to do something more sophisticated in the future // (like walking through a SIL). For now, just use the "chunks" // mechanism set up with convenience methods. // vector<int> list; vector<int> mylist; trav.GetDomainList(list); if (dataReplicationRequested && list.size() == 1) { silr->RestrictDomainsForLoadBalance(list); pipelineInfo[input->GetPipelineIndex()].complete = true; // Communicate back to the pipeline that we are replicating. input->SetReplicateSingleDomainOnAllProcessors(true); return data; } // // For variables (including meshes) that require specific types of // load balancing, we override the scheme here // LoadBalanceScheme theScheme = DetermineAppropriateScheme(input); if (theScheme == LOAD_BALANCE_CONTIGUOUS_BLOCKS_TOGETHER) { int amountPer = list.size() / nProcs; int oneExtraUntil = list.size() % nProcs; int lastDomain = 0; for (int i = 0 ; i < nProcs ; i++) { if (i == rank) { int amount = amountPer + (i < oneExtraUntil ? 1 : 0); for (int j = 0 ; j < amount ; j++) { mylist.push_back(list[j+lastDomain]); } } lastDomain += amountPer + (i < oneExtraUntil ? 1 : 0); } } else if (theScheme == LOAD_BALANCE_STRIDE_ACROSS_BLOCKS) { for (size_t j = 0 ; j < list.size() ; j++) { if (j % nProcs == (size_t)rank) mylist.push_back(list[j]); } } else if (theScheme == LOAD_BALANCE_ABSOLUTE) { for (size_t j = 0 ; j < list.size() ; j++) { if (list[j] % nProcs == rank) mylist.push_back(list[j]); } } else if (theScheme == LOAD_BALANCE_RESTRICTED) { LBInfo &lbInfo(pipelineInfo[input->GetPipelineIndex()]); IOInfo &ioInfo(ioMap[lbInfo.db]); const HintList &hints(ioInfo.ioInfo.GetHints()); for (size_t j = 0 ; j < list.size() ; j++) { if (hints.size() >= (size_t)rank) { const vector<int> &doms = hints[rank]; int ndoms = doms.size(); for (int h=0; h<ndoms; h++) { if (doms[h] == list[j]) { mylist.push_back(list[j]); break; } } } } } else if (theScheme == LOAD_BALANCE_RANDOM_ASSIGNMENT) { // all procs randomly jumble the list of domain ids // all procs compute same jumbled list due to same seed // [ which won't be true on a heterogeneous platform ] size_t j; vector<int> jumbledList = list; srand(0xDeadBeef); for (j = 0 ; j < list.size() * 5; j++) { int i1 = rand() % list.size(); int i2 = rand() % list.size(); int tmp = jumbledList[i1]; jumbledList[i1] = jumbledList[i2]; jumbledList[i2] = tmp; } // now, do round-robin assignment from the jumbled list for (j = 0 ; j < list.size() ; j++) { if (j % nProcs == (size_t)rank) mylist.push_back(jumbledList[j]); } } else if (theScheme == LOAD_BALANCE_DBPLUGIN_DYNAMIC) { // Every processor gets the complete list mylist = list; } silr->RestrictDomainsForLoadBalance(mylist); pipelineInfo[input->GetPipelineIndex()].complete = true; } else { // disable progress updates from the filters this time around avtDataObjectSource::RegisterProgressCallback(NULL,NULL); LBInfo &lbInfo(pipelineInfo[input->GetPipelineIndex()]); IOInfo &ioInfo(ioMap[lbInfo.db]); if (rank == 0) { // ------------------------------------- // MASTER LOADBALANCER PROCESSES // ------------------------------------- // Allocate enough space to hold the completed domains ioInfo.domains.resize(nProcs); ioInfo.files.resize(nProcs); bool validFileMap = (ioInfo.fileMap.size() != 0); // Get the list of domains to process vector<int> domainList; trav.GetDomainList(domainList); // Make a work list and a completed list size_t totaldomains = domainList.size(); deque<int> incomplete(domainList.begin(), domainList.end()); vector<int> complete; debug5 << "LoadBalancer Master -- starting with " << incomplete.size() << " domains\n"; // pull from the incomplete list and push onto the complete list // until all domains are complete bool abort = false; int domain; UpdateProgress(0,0); while (complete.size() < totaldomains) { // check for an abort if (!abort && CheckAbort(false)) { abort = true; totaldomains -= incomplete.size(); incomplete.clear(); } // update the progress UpdateProgress(complete.size() + (domainList.size() - incomplete.size()), domainList.size()*2); // get the completed domain number MPI_Status stat; MPI_Recv(&domain, 1, MPI_INT, MPI_ANY_SOURCE, lastDomDoneMsg, VISIT_MPI_COMM, &stat); int processor = stat.MPI_SOURCE; // -1 means the first pass by the slave; nothing completed yet if (domain != -1) { // add it to the complete list complete.push_back(domain); } // figure out what to tell this processor to do if (incomplete.empty()) continue; // find a cached domain for next processor deque<int>::iterator i; for (i = incomplete.begin(); i != incomplete.end(); i++) { if (ioInfo.domains[processor].find(*i) != ioInfo.domains[processor].end()) break; } // if no match, try to find one that is in a file // already opened by this processor if (i == incomplete.end()) { for (i = incomplete.begin(); i != incomplete.end(); i++) { int fileno = 0; if (validFileMap) fileno = ioInfo.fileMap[*i]; if (ioInfo.files[processor].count(fileno) > 0) break; } } // if still no match, find one that is in a file // opened by the fewest number of processors if (i == incomplete.end()) { int mindomain = -1; int minopen = 999999999; for (i = incomplete.begin(); i != incomplete.end(); i++) { int fileno = 0; if (validFileMap) fileno = ioInfo.fileMap[*i]; // count the number of processors which have // this file opened int nopen = 0; for (size_t j=0; j<ioInfo.files.size(); j++) if (ioInfo.files[j].count(fileno) > 0) nopen++; if (nopen < minopen) { mindomain = *i; minopen = nopen; } } for (i = incomplete.begin(); i != incomplete.end(); i++) { if (*i == mindomain) break; } } // if no match, just take the next one in line if (i == incomplete.end()) i=incomplete.begin(); domain = *i; incomplete.erase(i); ioInfo.domains[processor].insert(domain); if (validFileMap) ioInfo.files[processor].insert(ioInfo.fileMap[domain]); else ioInfo.files[processor].insert(0); // send the new domain number to that processor debug5 << "LoadBalancer Master: sending domain " << domain << " to processor "<<processor<<"\n"; MPI_Send(&domain, 1, MPI_INT, processor, newDomToDoMsg, VISIT_MPI_COMM); } // we're all done -- -2 means to abort, -1 means to send results int status = abort ? -2 : -1; for (int i=1; i<nProcs; i++) MPI_Send(&status, 1, MPI_INT, i, newDomToDoMsg,VISIT_MPI_COMM); if (abort) EXCEPTION0(AbortException); // all work is done UpdateProgress(1,0); lbInfo.complete = true; new_data->GetRestriction()->TurnOffAll(); MPI_Barrier(VISIT_MPI_COMM); } else { // ------------------------------------- // SLAVE PROCESSES // ------------------------------------- // send our last completed domain to the master int domain = lbInfo.current; MPI_Send(&domain, 1, MPI_INT, 0, lastDomDoneMsg, VISIT_MPI_COMM); // get our new work unit MPI_Status stat; MPI_Recv(&domain, 1, MPI_INT, 0, newDomToDoMsg, VISIT_MPI_COMM, &stat); lbInfo.current = domain; if (domain == -2) { EXCEPTION0(AbortException); } else if (domain == -1) { // -1 is a tag for "no work" -- we are all done lbInfo.complete = true; new_data->GetRestriction()->TurnOffAll(); MPI_Barrier(VISIT_MPI_COMM); } else { vector<int> domainList(1, domain); new_data->GetRestriction() ->RestrictDomainsForLoadBalance(domainList); } } } // By intersecting with the original restriction, we will ensure that // we are catching restrictions beyond domains, like materials, etc. // See comments in SIL restriction code regarding 'FastIntersect'. new_data->GetRestriction()->FastIntersect(orig_silr); return new_data; #else EXCEPTION1(VisItException, "nprocs was > 1 in a non-parallel code"); #endif }
avtDataRequest_p avtOriginatingSource::BalanceLoad(avtContract_p contract) { bool usesAllDomains =contract->GetDataRequest()->GetSIL().UsesAllDomains(); // // If it shouldn't use load balancing, then it has to do with auxiliary // data coming through our meta-data mechanism. Calling InitPipeline // would change the data attributes and it also causes an unnecessary // callback to our progress mechanism. // if (contract->ShouldUseLoadBalancing()) { InitPipeline(contract); } else if (contract->DoingOnDemandStreaming()) { GetOutput()->GetInfo().GetValidity().SetWhetherStreaming(true); } // // Allow the load balancer to split the load across processors. // bool dataReplicationOccurred = false; avtDataRequest_p rv = NULL; if (!UseLoadBalancer()) { debug5 << "This source should not load balance the data." << endl; rv = contract->GetDataRequest(); } else if (! contract->ShouldUseLoadBalancing()) { debug5 << "This pipeline has indicated that no load balancing should " << "be used." << endl; rv = contract->GetDataRequest(); } else if (loadBalanceFunction != NULL) { debug5 << "Using load balancer to reduce data." << endl; rv = loadBalanceFunction(loadBalanceFunctionArgs, contract); dataReplicationOccurred = contract->ReplicateSingleDomainOnAllProcessors(); } else { debug1 << "No load balancer exists to reduce data." << endl; rv = contract->GetDataRequest(); } // // Return the portion for this processor. // rv->SetUsesAllDomains(usesAllDomains); // // Tell the output if we are doing data replication. // if (dataReplicationOccurred) GetOutput()->GetInfo().GetAttributes().SetDataIsReplicatedOnAllProcessors(true); return rv; }