inline double Score::calculate_determinant(std::vector <std::vector<double> > M, int n){ Decomposition D; double determinant = 1.0; determinant = D.determinant(M, n); return determinant; }
void evalBestFitnessPlanDump::call( Decomposition & eo ) { if( eo.fitness() > _best ) { _best = eo.fitness(); dump( eo ); } }
void evalBestMakespanPlanDump::call( Decomposition & eo ) { if( eo.plan().makespan() < _best ) { _best = eo.plan().makespan(); dump( eo ); } }
TEST_F(DecompositionTest, IsNotRootByDefault) { Decomposition d = {DecompositionNode{{}}, solverFactory}; EXPECT_FALSE(d.isRoot()); d.setRoot(); EXPECT_TRUE(d.isRoot()); }
void Balancer::perfBalanceGPU(Cluster &cluster, Decomposition& decomp, const double kTimeEstimate) { const int kGPUOnly = 2; WorkQueue work_queue; WorkRequest work_request; const int kNumTotalGPUs = cluster.getNumTotalGPUs(); if (decomp.getNumSubDomains() == 0 || kNumTotalGPUs == 0) return; for (int gpu_index = 0; gpu_index < kNumTotalGPUs; ++gpu_index) { Node& gpu = cluster.getGlobalGPU(gpu_index); // fastest gpu will have largest weight, and thus move to front of queue work_request.setTimeDiff(kTimeEstimate - gpu.getBalTimeEst(1, kGPUOnly)); work_request.setIndex(gpu_index); work_queue.push(work_request); } const int kNumBlocks = decomp.getNumSubDomains(); // place data blocks on gpu's one-at-a-time for (int block_id = 0; block_id < kNumBlocks; ++block_id) { work_request = work_queue.top(); work_queue.pop(); Node& gpu = cluster.getGlobalGPU(work_request.getIndex()); gpu.incrementBalCount(); double time_diff = gpu.getBalTimeEst(1, kGPUOnly); work_request.setTimeDiff(time_diff); work_queue.push(work_request); //printWorkQueue(work_queue); } cluster.distributeBlocks(&decomp); }
RandomAccessIterator2 reduce_intervals(RandomAccessIterator1 first, Decomposition decomp, RandomAccessIterator2 result, BinaryFunction binary_op) { typedef typename thrust::iterator_value<RandomAccessIterator2>::type result_type; const size_t groupsize = 128; size_t heap_size = groupsize * sizeof(result_type); bulk::async(bulk::grid<groupsize,7>(decomp.size(),heap_size), reduce_intervals_kernel(), bulk::root.this_exec, first, decomp, result, binary_op); return result + decomp.size(); } // end reduce_intervals()
TEST_F(DecompositionTest, SetsParent) { Decomposition d = {DecompositionNode{{}}, solverFactory}; EXPECT_EQ(nullptr, d.getParent()); DecompositionPtr c{new Decomposition{DecompositionNode{{}}, solverFactory}}; c->setParent(&d); d.addChild(std::move(c)); EXPECT_EQ(&d, (*d.getChildren().begin())->getParent()); }
__host__ __device__ RandomAccessIterator2 reduce_intervals_(execution_policy<DerivedPolicy> &exec, RandomAccessIterator1 first, Decomposition decomp, RandomAccessIterator2 result, BinaryFunction binary_op) { typedef typename thrust::iterator_value<RandomAccessIterator2>::type result_type; const size_t groupsize = 128; size_t heap_size = groupsize * sizeof(result_type); bulk_::async(bulk_::grid<groupsize,7>(decomp.size(),heap_size,stream(thrust::detail::derived_cast(exec))), reduce_intervals_detail::reduce_intervals_kernel(), bulk_::root.this_exec, first, decomp, result, binary_op); return result + decomp.size(); } // end reduce_intervals()
RandomAccessIterator2 reduce_intervals_(execution_policy<DerivedPolicy> &, RandomAccessIterator1 first, Decomposition decomp, RandomAccessIterator2 result, BinaryFunction binary_op) { namespace bulk_ = thrust::system::cuda::detail::bulk; typedef typename thrust::iterator_value<RandomAccessIterator2>::type result_type; const size_t groupsize = 128; size_t heap_size = groupsize * sizeof(result_type); bulk_::async(bulk_::grid<groupsize,7>(decomp.size(),heap_size), reduce_intervals_detail::reduce_intervals_kernel(), bulk_::root.this_exec, first, decomp, result, binary_op); return result + decomp.size(); } // end reduce_intervals()
void declareDecomposition(const Decomposition& decomposition, std::ostream& out) { out << "% Decomposition facts" << std::endl; out << "currentNode(" << decomposition.getNode().getGlobalId() << ")." << std::endl; for(const auto& v : decomposition.getNode().getBag()) { out << "bag(" << decomposition.getNode().getGlobalId() << ',' << v << "). "; out << "current(" << v << ")." << std::endl; } out << "#const numChildNodes=" << decomposition.getChildren().size() << '.' << std::endl; if(decomposition.getChildren().empty()) out << "initial." << std::endl; else { for(const auto& child : decomposition.getChildren()) { out << "childNode(" << child->getNode().getGlobalId() << ")." << std::endl; for(const auto& v : child->getNode().getBag()) { out << "bag(" << child->getNode().getGlobalId() << ',' << v << "). "; out << "-introduced(" << v << ")." << std::endl; // Redundant } } } if(decomposition.isRoot()) out << "final." << std::endl; if(decomposition.isPostJoinNode()) out << "postJoin." << std::endl; // Redundant predicates for convenience... out << "introduced(X) :- current(X), not -introduced(X)." << std::endl; out << "removed(X) :- childNode(N), bag(N,X), not current(X)." << std::endl; }
Solver::Solver(const Decomposition& decomposition, const Application& app, const std::vector<std::string>& encodingFiles, bool tableMode, bool cardinalityCost, bool printStatistics) : ::Solver(decomposition, app) , encodingFiles(encodingFiles) , tableMode(tableMode) , cardinalityCost(cardinalityCost) , printStatistics(printStatistics) { Gringo::message_printer()->disable(Gringo::W_ATOM_UNDEFINED); #ifndef DISABLE_CHECKS // TODO: Implement tables::EncodingChecker if(!tableMode) { // Check the encoding, but only in the decomposition root. // Otherwise we'd probably do checks redundantly. if(decomposition.isRoot()) { std::ofstream dummyStream; std::unique_ptr<Gringo::Output::OutputBase> out(new Gringo::Output::OutputBase({}, dummyStream)); Gringo::Input::Program program; asp_utils::DummyGringoModule module; Gringo::Scripts scripts(module); Gringo::Defines defs; std::unique_ptr<EncodingChecker> encodingChecker{new trees::EncodingChecker(scripts, program, *out, defs)}; Gringo::Input::NonGroundParser parser(*encodingChecker); for(const auto& file : encodingFiles) parser.pushFile(std::string(file)); parser.parse(); encodingChecker->check(); } } #endif }
void DebugMachineReadable::solverInvocationResult(const Decomposition& decompositionNode, const ItemTree* result) { const auto id = decompositionNode.getNode().getGlobalId(); if(result) { std::cout << "% Facts describing the resulting item tree at node " << id << std::endl; std::ostringstream rootItemSetName; rootItemSetName << 'n' << id; solver::asp::Solver::declareItemTree(std::cout, result, false, id, rootItemSetName.str()); std::cout << std::endl; std::cout << "% Memory locations of the item tree nodes at decomposition node " << id << " (not passed to ASP)" << std::endl; declareItemTreeNodeMemoryAddresses(std::cout, result, rootItemSetName.str()); std::cout << std::endl; std::cout << "% Extension pointers at decomposition node " << id << " (not passed to ASP)" << std::endl; declareExtensionPointers(std::cout, result, rootItemSetName.str()); std::cout << std::endl; std::cout << "% (Derived) costs of non-leaf nodes of the item tree at decomposition node " << id << " (not passed to ASP)" << std::endl; declareDerivedCosts(std::cout, result, rootItemSetName.str()); std::cout << std::endl; } else std::cout << "% Item tree of node " << id << " is empty." << std::endl; }
std::unique_ptr<Solver> AspFactory::newSolver(const Decomposition& decomposition) const { if(optDefaultJoin.isUsed() && decomposition.isJoinNode()) return std::unique_ptr<Solver>(new DefaultJoin(decomposition, app)); else return std::unique_ptr<Solver>(new Asp(decomposition, app, optEncodingFile.getValue(), optTables.isUsed())); }
bool MutationDelGoal::operator()( Decomposition & decompo ) { if( decompo.size() <= 1 ) { return false; } else { #ifndef NDEBUG eo::log << eo::debug << "D"; eo::log.flush(); eo::log << eo::xdebug << " DelGoal:" << std::endl << "\tBefore: "; simplePrint( eo::log << eo::xdebug, decompo ); #endif unsigned int i = rng.random( std::min( static_cast<unsigned int>(decompo.size()), static_cast<unsigned int>(decompo.last_reached() + 1) ) ); decompo.erase( decompo.iter_at( i ) ); #ifndef NDEBUG eo::log << eo::xdebug << "\tdelete the " << i << "th goal" << std::endl; eo::log << eo::xdebug << "\tAfter: "; simplePrint( eo::log << eo::xdebug, decompo ); #endif return true; } }
void Balancer::balance(Cluster &cluster, Decomposition& decomp, const int kConfig) { const int kCPUAndGPU = 0; const int kCPUOnly = 1; const int kGPUOnly = 2; int blocks_per_node = 0; // num cpu nodes unsigned int total_nodes = cluster.getNumNodes(); size_t num_blocks = decomp.getNumSubDomains(); // initialize block directory cluster.setNumBlocks(num_blocks); if (kConfig != kCPUOnly) { unsigned int num_gpus = 0; for (unsigned int node_index = 0; node_index < cluster.getNumNodes(); ++node_index) { num_gpus += cluster.getNode(node_index).getNumChildren(); } if (kConfig == kGPUOnly) // gpu only total_nodes = num_gpus; else if (kConfig == kCPUAndGPU) // cpu and gpu total_nodes += num_gpus; } blocks_per_node = ceil(decomp.getNumSubDomains() / (float) total_nodes); for (unsigned int node_index = 0; node_index < cluster.getNumNodes(); ++node_index) { Node& node = cluster.getNode(node_index); if (kConfig == kCPUOnly || kConfig == kCPUAndGPU) { for (int subd = 0; subd < blocks_per_node && 0 < decomp.getNumSubDomains(); ++subd) { SubDomain *block = decomp.popSubDomain(); node.addSubDomain(block); } } if (kConfig == kGPUOnly || kConfig == kCPUAndGPU) { for (unsigned int gpu_index = 0; gpu_index < node.getNumChildren(); ++gpu_index) { Node& gpu = node.getChild(gpu_index); for (int subd = 0; subd < blocks_per_node && 0 < decomp.getNumSubDomains(); ++subd) { SubDomain *block = decomp.popSubDomain(); gpu.addSubDomain(block); } } } } /* the work is balanced, so we can fill the block directory */ cluster.storeBlockLocs(); }
void Solver::startSolvingForCurrentRowCombination() { // ++solverSetups; asyncResult.reset(); if(reground) { // Set up ASP solver config.solve.numModels = 0; // TODO The last parameter of clasp.startAsp in the next line is "allowUpdate". Does setting it to false have benefits? // WORKAROUND for BUG in ClaspFacade::startAsp() // TODO remove on update to new version if(clasp.ctx.numVars() == 0 && clasp.ctx.frozen()) clasp.ctx.reset(); Clasp::Asp::LogicProgram& claspProgramBuilder = static_cast<Clasp::Asp::LogicProgram&>(clasp.startAsp(config)); GringoOutputProcessor gringoOutput(claspProgramBuilder); std::unique_ptr<Gringo::Output::OutputBase> out(new Gringo::Output::OutputBase({}, gringoOutput)); Gringo::Input::Program program; asp_utils::DummyGringoModule module; Gringo::Scripts scripts(module); Gringo::Defines defs; Gringo::Input::NongroundProgramBuilder gringoProgramBuilder(scripts, program, *out, defs); Gringo::Input::NonGroundParser parser(gringoProgramBuilder); // Input: Induced subinstance std::unique_ptr<std::stringstream> instanceInput(new std::stringstream); asp_utils::induceSubinstance(*instanceInput, app.getInstance(), decomposition.getNode().getBag()); app.getPrinter().solverInvocationInput(decomposition, instanceInput->str()); // Input: Decomposition std::unique_ptr<std::stringstream> decompositionInput(new std::stringstream); asp_utils::declareDecomposition(decomposition, *decompositionInput); app.getPrinter().solverInvocationInput(decomposition, decompositionInput->str()); // Input: Child rows std::unique_ptr<std::stringstream> childRowsInput(new std::stringstream); *childRowsInput << "% Child row facts" << std::endl; for(const auto& row : getCurrentRowCombination()) { for(const auto& item : row->getItems()) *childRowsInput << "childItem(" << item << ")." << std::endl; for(const auto& item : row->getAuxItems()) *childRowsInput << "childAuxItem(" << item << ")." << std::endl; // TODO costs, etc. } app.getPrinter().solverInvocationInput(decomposition, childRowsInput->str()); // Pass input to ASP solver for(const auto& file : encodingFiles) parser.pushFile(std::string(file)); parser.pushStream("<instance>", std::move(instanceInput)); parser.pushStream("<decomposition>", std::move(decompositionInput)); parser.pushStream("<child_rows>", std::move(childRowsInput)); parser.parse(); // Ground program.rewrite(defs); program.check(); if(Gringo::message_printer()->hasError()) throw std::runtime_error("Grounding stopped because of errors"); auto gPrg = program.toGround(out->domains); Gringo::Ground::Parameters params; params.add("base", {}); gPrg.ground(params, scripts, *out); params.clear(); claspProgramBuilder.endProgram(); itemAtomInfos.clear(); for(const auto& atom : gringoOutput.getItemAtomInfos()) itemAtomInfos.emplace_back(ItemAtomInfo(atom, claspProgramBuilder)); auxItemAtomInfos.clear(); for(const auto& atom : gringoOutput.getAuxItemAtomInfos()) auxItemAtomInfos.emplace_back(AuxItemAtomInfo(atom, claspProgramBuilder)); // TODO costs etc. clasp.prepare(); } else { // Set external variables to the values of the current child row combination clasp.update(false, false); clasp.prepare(); // Mark atoms corresponding to items from the currently extended rows for(const auto& row : getCurrentRowCombination()) { for(const auto& item : row->getItems()) { assert(itemsToLitIndices.find(item) != itemsToLitIndices.end()); assert(itemsToLitIndices.at(item) < literals.size()); #ifdef DISABLE_CHECKS literals[itemsToLitIndices.at(item)].watch(); #else try { literals[itemsToLitIndices.at(item)].watch(); } catch(const std::out_of_range&) { std::ostringstream msg; msg << "Unknown variable; atom childItem(" << *item << ") not shown or not declared as external?"; throw std::runtime_error(msg.str()); } #endif } for(const auto& item : row->getAuxItems()) { assert(auxItemsToLitIndices.find(item) != auxItemsToLitIndices.end()); assert(auxItemsToLitIndices.at(item) < literals.size()); #ifdef DISABLE_CHECKS literals[auxItemsToLitIndices.at(item)].watch(); #else try { literals[auxItemsToLitIndices.at(item)].watch(); } catch(const std::out_of_range&) { std::ostringstream msg; msg << "Unknown variable; atom childAuxItem(" << *item << ") not shown or not declared as external?"; throw std::runtime_error(msg.str()); } #endif } } // Set marked literals to true and all others to false for(auto& lit : literals) { if(lit.watched()) { lit.clearWatch(); clasp.assume(lit); } else clasp.assume(~lit); } } asyncResult.reset(new BasicSolveIter(clasp)); }
void Balancer::perfBalance(Cluster &cluster, Decomposition& decomp, const int kConfig) { WorkQueue work_queue; WorkRequest work_request; double total_weight(0.0); double min_edge_weight(0.0); double procTime(0.0); double commTime(0.0); double timeEst = procTime; bool changed(false); const int kGPUOnly(2); const int kStrongest(3); Node &root = cluster.getNode(0); size_t num_blocks = decomp.getNumSubDomains(); // initialize block directory cluster.setNumBlocks(num_blocks); //get total iterations per second for cluster for (unsigned int node = 0; node < cluster.getNumNodes(); ++node) { total_weight += cluster.getNode(node).getTotalWeight(kConfig); min_edge_weight += cluster.getNode(node).getMinEdgeWeight(kConfig); } // quick estimation of runtime procTime = num_blocks / total_weight; commTime = num_blocks / min_edge_weight; timeEst = procTime; if (0.0 < min_edge_weight) timeEst += commTime; if (kGPUOnly == kConfig) { perfBalanceGPU(cluster, decomp, timeEst); } else if (kStrongest == kConfig) { perfBalanceStrongestDevice(cluster, decomp); } else { // perform initial task distribution for (size_t i = 0; i < num_blocks; ++i) root.incrementBalCount(); /*fprintf(stderr, "perfBalance: \n\ttime est: %f sec\n\tprocTime: %f sec\n\tcommTime: %f \ sec\n\ttotal weight:%e \n\tmin edge weight:%e.\n", timeEst, procTime, commTime, total_weight, min_edge_weight); // */ do { changed = false; // balance the work between nodes and root for (unsigned int cpu_index = 1; cpu_index < cluster.getNumNodes(); ++cpu_index) { Node& cpu_node = cluster.getNode(cpu_index); int work_deficit = cpu_node.getTotalWorkNeeded(timeEst, kConfig) - cpu_node.getBalCount(); if (0 > work_deficit) { // node has extra work int extra_blocks = abs(work_deficit); for (int block_index = 0; (block_index < extra_blocks) && (0 < cpu_node.getBalCount()); ++block_index) { // move block from child to parent cpu_node.decrementBalCount(); root.incrementBalCount(); changed = true; } } else if (0 < work_deficit) { //child needs more work work_request.setTimeDiff(timeEst - cpu_node.getBalTimeEst(0, kConfig)); work_request.setIndex(cpu_index); work_queue.push(work_request); } } for (unsigned int cpu_index = 0; cpu_index < root.getNumChildren(); ++cpu_index) { Node& cpu_node = root.getChild(cpu_index); int work_deficit = cpu_node.getTotalWorkNeeded(timeEst, kConfig) - cpu_node.getBalCount(); if (0 > work_deficit) { // child has extra work int extra_blocks = abs(work_deficit); for (int block_index = 0; (block_index < extra_blocks) && (0 < cpu_node.getBalCount()); ++block_index) { // move block from child to parent cpu_node.decrementBalCount(); root.incrementBalCount(); changed = true; } } else if (0 < work_deficit) { // child needs more work work_request.setTimeDiff(timeEst - cpu_node.getBalTimeEst(0, kConfig)); work_request.setIndex(-1 * cpu_index); // hack so I know to give to one of root's children work_queue.push(work_request); } } /* at this point we have all extra blocks, and now we need to distribute blocks to children that need it */ while (0 < root.getBalCount() && // there are blocks left to give !work_queue.empty()) { // there are requests left to fill // get largest request WorkRequest tmp = work_queue.top(); work_queue.pop(); double newTimeDiff = 0.0; int id = tmp.getIndex(); if (id <= 0) { // local child id = -1 * id; root.decrementBalCount(); root.getChild(id).incrementBalCount(); newTimeDiff = timeEst - root.getChild(id).getBalTimeEst(0, kConfig); changed = true; } else { // request was from another node in cluster root.decrementBalCount(); cluster.getNode(id).incrementBalCount(); newTimeDiff = timeEst - cluster.getNode(id).getBalTimeEst(0, kConfig); changed = true; } // if there is still work left to do put it back on // the queue so that it will reorder correctly if (0 < newTimeDiff) { tmp.setTimeDiff(newTimeDiff); work_queue.push(tmp); } } // balance the work within each node for (unsigned int node = 0; node < cluster.getNumNodes(); ++node) { changed |= balanceNode(cluster.getNode(node), timeEst, kConfig); } } while (changed); } /* now that we know where everything should go, distribute the blocks */ cluster.distributeBlocks(&decomp); /* the work is balanced, so we can fill the block directory */ cluster.storeBlockLocs(); }
void DebugMachineReadable::solverInvocationInput(const Decomposition& decompositionNode, const std::string& input) { std::cout << "% Input for solver at decomposition node " << decompositionNode.getNode().getGlobalId() << std::endl << input << std::endl; }
Solver::Solver(const Decomposition& decomposition, const Application& app, const std::vector<std::string>& encodingFiles, bool reground, BranchAndBoundLevel bbLevel) : ::LazySolver(decomposition, app, bbLevel) , reground(reground) , encodingFiles(encodingFiles) { Gringo::message_printer()->disable(Gringo::W_ATOM_UNDEFINED); if(!reground) { // Set up ASP solver config.solve.numModels = 0; Clasp::Asp::LogicProgram& claspProgramBuilder = static_cast<Clasp::Asp::LogicProgram&>(clasp.startAsp(config, true)); // TODO In leaves updates might not be necessary. struct LazyGringoOutputProcessor : GringoOutputProcessor { LazyGringoOutputProcessor(Solver* s, Clasp::Asp::LogicProgram& prg) : GringoOutputProcessor(prg), self(s) { } void storeAtom(unsigned int atomUid, Gringo::Value v) override { const std::string& n = *v.name(); if(n == "childItem") { ASP_CHECK(v.args().size() == 1, "'childItem' predicate does not have arity 1"); std::ostringstream argument; v.args().front().print(argument); self->itemsToLitIndices.emplace(String(argument.str()), self->literals.size()); self->literals.push_back(Clasp::posLit(atomUid)); } else if(n == "childAuxItem") { ASP_CHECK(v.args().size() == 1, "'childAuxItem' predicate does not have arity 1"); std::ostringstream argument; v.args().front().print(argument); self->auxItemsToLitIndices.emplace(String(argument.str()), self->literals.size()); self->literals.push_back(Clasp::posLit(atomUid)); } GringoOutputProcessor::storeAtom(atomUid, v); } Solver* self; } gringoOutput(this, claspProgramBuilder); std::unique_ptr<Gringo::Output::OutputBase> out(new Gringo::Output::OutputBase({}, gringoOutput)); Gringo::Input::Program program; asp_utils::DummyGringoModule module; Gringo::Scripts scripts(module); Gringo::Defines defs; Gringo::Input::NongroundProgramBuilder gringoProgramBuilder(scripts, program, *out, defs); Gringo::Input::NonGroundParser parser(gringoProgramBuilder); // Input: Induced subinstance std::unique_ptr<std::stringstream> instanceInput(new std::stringstream); asp_utils::induceSubinstance(*instanceInput, app.getInstance(), decomposition.getNode().getBag()); app.getPrinter().solverInvocationInput(decomposition, instanceInput->str()); // Input: Decomposition std::unique_ptr<std::stringstream> decompositionInput(new std::stringstream); asp_utils::declareDecomposition(decomposition, *decompositionInput); app.getPrinter().solverInvocationInput(decomposition, decompositionInput->str()); // Pass input to ASP solver for(const auto& file : encodingFiles) parser.pushFile(std::string(file)); parser.pushStream("<instance>", std::move(instanceInput)); parser.pushStream("<decomposition>", std::move(decompositionInput)); parser.parse(); // Ground program.rewrite(defs); program.check(); if(Gringo::message_printer()->hasError()) throw std::runtime_error("Grounding stopped because of errors"); auto gPrg = program.toGround(out->domains); Gringo::Ground::Parameters params; params.add("base", {}); gPrg.ground(params, scripts, *out); params.clear(); // Set value of external atoms to free for(const auto& p : literals) claspProgramBuilder.freeze(p.var(), Clasp::value_free); // Finalize ground program and create solver literals claspProgramBuilder.endProgram(); // Map externals to their solver literals for(auto& p : literals) { p = claspProgramBuilder.getLiteral(p.var()); assert(!p.watched()); // Literal must not be watched } for(const auto& atom : gringoOutput.getItemAtomInfos()) itemAtomInfos.emplace_back(ItemAtomInfo(atom, claspProgramBuilder)); for(const auto& atom : gringoOutput.getAuxItemAtomInfos()) auxItemAtomInfos.emplace_back(AuxItemAtomInfo(atom, claspProgramBuilder)); // for(const auto& atom : gringoOutput->getCurrentCostAtomInfos()) // currentCostAtomInfos.emplace_back(CurrentCostAtomInfo(atom, claspProgramBuilder)); // for(const auto& atom : gringoOutput->getCostAtomInfos()) // costAtomInfos.emplace_back(CostAtomInfo(atom, claspProgramBuilder))); // Prepare for solving. clasp.prepare(); } }
LeafSolver::LeafSolver(const Decomposition& decomposition, const Application& app) : ::Solver(decomposition, app) { if(decomposition.getNode().getBag().empty() == false) throw std::runtime_error("ASP solver requires empty leaves"); }
void Balancer::perfBalanceStrongestDevice(Cluster &cluster, Decomposition& decomp) { WorkQueue work_queue; WorkRequest work_request; double total_weight(0.0); double min_edge_weight(0.0); double procTime(0.0); double commTime(0.0); double timeEst = procTime; bool changed(false); const int kStrongest(3); const int kConfig = kStrongest; Node &root = cluster.getNode(0); const size_t kNumBlocks = decomp.getNumSubDomains(); // initialize block directory cluster.setNumBlocks(kNumBlocks); //get total iterations per second for cluster for (unsigned int node = 0; node < cluster.getNumNodes(); ++node) { total_weight += cluster.getNode(node).getTotalWeight(kConfig); min_edge_weight += cluster.getNode(node).getMinEdgeWeight(kConfig); } // quick estimation of runtime procTime = (0.0 == total_weight) ? std::numeric_limits<double>::max() : kNumBlocks / total_weight; commTime = (0.0 == min_edge_weight) ? std::numeric_limits<double>::max() : kNumBlocks / min_edge_weight; timeEst = procTime; timeEst += (std::numeric_limits<double>::max() - procTime >= commTime) ? commTime : 0.0; //printf("timeEst:%f\n", timeEst); // place all of the blocks on the root node for (size_t i = 0; i < kNumBlocks; ++i) root.incrementBalCount(); /*fprintf(stderr, "perfBalance: \n\ttime est: %f sec\n\tprocTime: %f sec\n\tcommTime: %f \ sec\n\ttotal weight:%e \n\tmin edge weight:%e.\n", timeEst, procTime, commTime, total_weight, min_edge_weight); // */ do { changed = false; //printf("beginning, changed == %s\n", (changed == true) ? "true" : "false"); // balance the work between nodes and root for (unsigned int cpu_index = 1; cpu_index < cluster.getNumNodes(); ++cpu_index) { Node& cpu_node = cluster.getNode(cpu_index); int work_deficit = cpu_node.getTotalWorkNeeded(timeEst, kConfig) - cpu_node.getBalCount(); if (0 > work_deficit) { // node has extra work //printf("cpu node %d has %d extra blocks.\n", cpu_index, work_deficit); int extra_blocks = abs(work_deficit); for (int block_index = 0; (block_index < extra_blocks) && (0 < cpu_node.getBalCount()); ++block_index) { // move block from child to parent cpu_node.decrementBalCount(); root.incrementBalCount(); changed = true; } } else if (0 < work_deficit) { //child needs more work work_request.setTimeDiff(timeEst - cpu_node.getBalTimeEst(0, kConfig)); work_request.setIndex(cpu_index); work_queue.push(work_request); } } // go through all of root nodes gpus for (unsigned int index = 0; index < root.getNumChildren(); ++index) { Node& node = root.getChild(index); int work_deficit = node.getTotalWorkNeeded(timeEst, kConfig) - node.getBalCount(); if (0 > work_deficit) { // child has extra work //printf("root child %d has %d blocks and only needs %d blocks.\n", index, // node.getBalCount(), node.getTotalWorkNeeded(timeEst, kConfig)); int extra_blocks = abs(work_deficit); for (int block_index = 0; (block_index < extra_blocks) && (0 < node.getBalCount()); ++block_index) { // move block from child to parent node.decrementBalCount(); root.incrementBalCount(); //changed = true; } } else if (0 < work_deficit) { // child needs more work work_request.setTimeDiff(timeEst - node.getBalTimeEst(0, kConfig)); work_request.setIndex(-1 * index); // hack so I know to give to one of root's children work_queue.push(work_request); } } /* at this point we have all extra blocks, and now we need to distribute blocks to children that need it */ //printf("after collecting extra blocks from children, changed == %s\n", // (changed == true) ? "true" : "false"); // while root has extra blocks and there are requests left to fill while (0 < (root.getBalCount() - root.getTotalWorkNeeded(timeEst, kConfig)) && !work_queue.empty()) { //printf("root needs %d blocks and has %d blocks.\n", // root.getTotalWorkNeeded(timeEst, kConfig), root.getBalCount()); // get largest request WorkRequest tmp = work_queue.top(); work_queue.pop(); double newTimeDiff = 0.0; int id = tmp.getIndex(); if (id <= 0) { // local child //printf("giving block to local child.\n"); id = -1 * id; root.decrementBalCount(); root.getChild(id).incrementBalCount(); newTimeDiff = timeEst - root.getChild(id).getBalTimeEst(0, kConfig); changed = true; } else { // request was from another node in cluster //printf("giving block to cpu node child.\n"); root.decrementBalCount(); cluster.getNode(id).incrementBalCount(); newTimeDiff = timeEst - cluster.getNode(id).getBalTimeEst(0, kConfig); changed = true; } // if there is still work left to do put it back on // the queue so that it will reorder correctly if (0 < newTimeDiff) { tmp.setTimeDiff(newTimeDiff); work_queue.push(tmp); } } //printf("after distributing extra blocks to cpu nodes, changed == %s\n", // (changed == true) ? "true" : "false"); // balance the work within each node for (unsigned int node = 0; node < cluster.getNumNodes(); ++node) { balanceNode(cluster.getNode(node), timeEst, kConfig); //changed |= balanceNode(cluster.getNode(node), timeEst, kConfig); } //printf("after balancing within each node, changed == %s\n", // (changed == true) ? "true" : "false"); //printClusterBalCount(cluster); //printf("************* END OF BALANCE ITERATION ***********\n"); } while (changed); }