void VsStorageOverheadThreshold::process() { cerr << "This is an experiment with name: " << this->getClassName() << endl; SimulationConf simConf; SchemaStats stats; Cost cost(stats); util::AutoTimer timer; ExperimentalData queryIOExp("QueryIOVsStorageOverheadThreshold"); ExperimentalData runningTimeExp("RunningTimeVsStorageOverheadThreshold"); ExperimentalData storageExp("StorageOverheadVsStorageOverheadThreshold"); auto expData = { &queryIOExp, &runningTimeExp, &storageExp }; makeQueryIOExp(&queryIOExp); makeRunningTimeExp(&runningTimeExp); makeStorageExp(&storageExp); for (auto exp : expData) exp->open(); auto solvers = { SolverFactory::instance().makeSinglePartition(), SolverFactory::instance().makePartitionPerAttribute(), SolverFactory::instance().makeOptimalOverlapping(), SolverFactory::instance().makeOptimalNonOverlapping(), SolverFactory::instance().makeHeuristicOverlapping(), SolverFactory::instance().makeHeuristicNonOverlapping() }; vector<double> storageOverheadThresholds = { 0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0 }; double total = solvers.size() * storageOverheadThresholds.size() * numRuns; double completed = 0; vector<util::RunningStat> io; vector<util::RunningStat> storage; vector<util::RunningStat> times; vector<std::string> names; vector<bool> errorFlags; for (auto solver : solvers) { io.push_back(util::RunningStat()); storage.push_back(util::RunningStat()); times.push_back(util::RunningStat()); names.push_back(solver->getClassName()); vector<std::string> names; errorFlags.push_back(false); } int j; for (double sot : storageOverheadThresholds) { for (int i = 0; i < numRuns; i++) { vector<std::unique_ptr<Attribute>> allAttributes; auto workloadAndStats = simConf.getQueryWorkloadAndStats(allAttributes); QueryWorkload workload = workloadAndStats.first; stats = workloadAndStats.second; j = 0; for (auto solver : solvers) { timer.start(); Partitioning partitioning; try { partitioning = solver->solve(workload, sot, stats); } catch (const runtime_error& error) { cerr << "Unable to find a solution" << endl; errorFlags.at(j) = true; } timer.stop(); if (!errorFlags.at(j)) { io.at(j).push(cost.getIOCost(partitioning, workload)); storage.at(j).push( cost.getStorageOverhead(partitioning, workload)); times.at(j).push( timer.getRealTimeInSeconds()); } j++; cerr << "."; completed++; } } int j = 0; for (auto solver : solvers) { if (!errorFlags.at(j)) { runningTimeExp.addRecord(); runningTimeExp.setFieldValue("solver", solver->getClassName()); runningTimeExp.setFieldValue( "storageOverheadThreshold", boost::str(boost::format("%.2f") % sot)); runningTimeExp.setFieldValue("time", times.at(j).getMean()); runningTimeExp.setFieldValue( "deviation", times.at(j).getStandardDeviation()); times.at(j).clear(); queryIOExp.addRecord(); queryIOExp.setFieldValue("solver", solver->getClassName()); queryIOExp.setFieldValue( "storageOverheadThreshold", boost::str(boost::format("%.2f") % sot)); queryIOExp.setFieldValue("io", io.at(j).getMean()); queryIOExp.setFieldValue( "deviation", io.at(j).getStandardDeviation()); io.at(j).clear(); storageExp.addRecord(); storageExp.setFieldValue("solver", solver->getClassName()); storageExp.setFieldValue( "storageOverheadThreshold", boost::str(boost::format("%.2f") % sot)); storageExp.setFieldValue("storage", storage.at(j).getMean()); storageExp.setFieldValue( "deviation", storage.at(j).getStandardDeviation()); storage.at(j).clear(); } else { runningTimeExp.addRecord(); runningTimeExp.setFieldValue("solver", solver->getClassName()); runningTimeExp.setFieldValue( "storageOverheadThreshold", boost::str(boost::format("%.2f") % sot)); runningTimeExp.setFieldValue("time", "n/a"); runningTimeExp.setFieldValue("deviation", "n/a"); times.at(j).clear(); queryIOExp.addRecord(); queryIOExp.setFieldValue("solver", solver->getClassName()); queryIOExp.setFieldValue( "storageOverheadThreshold", boost::str(boost::format("%.2f") % sot)); queryIOExp.setFieldValue("io", "n/a"); queryIOExp.setFieldValue("deviation", "n/a"); io.at(j).clear(); storageExp.addRecord(); storageExp.setFieldValue("solver", solver->getClassName()); storageExp.setFieldValue( "storageOverheadThreshold", boost::str(boost::format("%.2f") % sot)); storageExp.setFieldValue("storage", "n/a"); storageExp.setFieldValue("deviation", "n/a"); storage.at(j).clear(); errorFlags.at(j) = false; } j++; } cerr << " (" << (completed / total) * 100 << "%)" << endl; } for (auto exp : expData) exp->close(); };
void VsBlockSize::process() { SimulationConf simConf; double storageOverheadThreshold = 1.0; assert(graphs_.size() >= 1); simConf.setAttributeCount( graphs_[0]->getConf().getEdgeSchema().getAttributes().size()); std::vector<std::vector<core::FocusedIntervalQuery>> queries; std::vector<SchemaStats> stats; std::vector<QueryWorkload> workloads; std::cout << "Generating workload..." << std::endl; for (int i=0; i < numRuns_; i++) { std::cout << " " << i << "/" << numRuns_ << std::endl; std::vector<std::vector<std::string> > templates = simConf.getQueryTemplates(graphs_[0].get()); std::vector<core::FocusedIntervalQuery> qs = ExpSetupHelper::genQueries(templates, queryZipfParam_, numQueries_, tsStart_, tsEnd_, vertices_); ExpSetupHelper::runWorkload(graphs_[0].get(),qs); SchemaStats ss = graphs_[0]->getSchemaStats(); std::map<BucketId,common::QueryWorkload> ws = graphs_[0]->getWorkloads(); // Make sure everything is in one bucket assert(ws.size() == 1); QueryWorkload w = ws.begin()->second; // (queries, stats, workload) queries.push_back(qs); stats.push_back(ss); workloads.push_back(w); graphs_[0]->resetWorkloads(); } std::cout << "done." << std::endl; ExperimentalData edgeIOCountExp("EdgeIOCountVsBlockSize"); ExperimentalData edgeWriteIOCountExp("EdgeWriteIOCountVsBlockSize"); ExperimentalData edgeReadIOCountExp("EdgeReadIOCountVsBlockSize"); auto expData = { &edgeIOCountExp, &edgeWriteIOCountExp, &edgeReadIOCountExp }; makeEdgeIOCountExp(&edgeIOCountExp); makeEdgeWriteIOCountExp(&edgeWriteIOCountExp); makeEdgeReadIOCountExp(&edgeReadIOCountExp); for (auto exp : expData) exp->open(); vector<util::RunningStat> edgeIO; vector<util::RunningStat> edgeWriteIO; vector<util::RunningStat> edgeReadIO; vector<std::string> names; vector< shared_ptr<Solver> > solvers = { SolverFactory::instance().makeSinglePartition(), SolverFactory::instance().makeOptimalNonOverlapping(), SolverFactory::instance().makeHeuristicNonOverlapping() }; for (auto solver : solvers) { edgeIO.push_back(util::RunningStat()); edgeWriteIO.push_back(util::RunningStat()); edgeReadIO.push_back(util::RunningStat()); names.push_back(solver->getClassName()); } int solverIndex; size_t prevEdgeIOCount; size_t prevEdgeReadIOCount; size_t prevEdgeWriteIOCount; int x = 0; int total = graphs_.size() * numRuns_ * solvers.size(); std::cout << "Running experiments..." << std::endl; int blockSizeIndex = -1; for (auto iter = graphs_.begin(); iter != graphs_.end(); ++iter) { blockSizeIndex++; for (int i = 0; i < numRuns_; i++) { solverIndex = -1; for (auto solver : solvers) { solverIndex++; auto & partIndex = (*iter)->getPartitionIndex(); auto origParting = partIndex.getTimeSlicedPartitioning(Timestamp(0.0)); intergdb::common::Partitioning solverSolution = solver->solve(workloads[i], storageOverheadThreshold, stats[i]); std::cout << "Workload: " << workloads[i].toString() << std::endl; std::cout << "Summary size: " << workloads[i].getQuerySummaries().size() << std::endl; /*for (auto summary : workloads[i].getQuerySummaries()) std::cout << "Summary: " << summary.toString() << std::endl; */ std::cout << "Solver: " << solver->getClassName() << std::endl; std::cout << solverSolution.toString() << std::endl; TimeSlicedPartitioning newParting{}; // -inf to inf newParting.getPartitioning() = solverSolution.toStringSet(); partIndex.replaceTimeSlicedPartitioning( origParting, {newParting}); // to flush the filesystem cache //system(“purge”); prevEdgeIOCount = (*iter)->getEdgeIOCount(); prevEdgeReadIOCount = (*iter)->getEdgeReadIOCount(); prevEdgeWriteIOCount = (*iter)->getEdgeWriteIOCount(); ExpSetupHelper::runWorkload((*iter).get(),queries[i]); std::cout << (*iter)->getEdgeIOCount() - prevEdgeIOCount << std::endl; std::cout << (*iter)->getEdgeReadIOCount() - prevEdgeReadIOCount << std::endl; std::cout << (*iter)->getEdgeWriteIOCount() - prevEdgeWriteIOCount << std::endl; edgeIO[solverIndex].push( (*iter)->getEdgeIOCount() - prevEdgeIOCount); edgeReadIO[solverIndex].push( (*iter)->getEdgeReadIOCount() - prevEdgeReadIOCount); edgeWriteIO[solverIndex].push( (*iter)->getEdgeWriteIOCount() - prevEdgeWriteIOCount); x++; std::cout << " " << x << "/" << total << std::endl; } } for (int solverIndex = 0; solverIndex < solvers.size(); solverIndex++) { edgeIOCountExp.addRecord(); edgeIOCountExp.setFieldValue( "solver", solvers[solverIndex]->getClassName()); edgeIOCountExp.setFieldValue( "blockSize", boost::lexical_cast<std::string>(blockSizes_[blockSizeIndex])); edgeIOCountExp.setFieldValue( "edgeIO", edgeIO[solverIndex].getMean()); edgeIOCountExp.setFieldValue( "deviation", edgeIO[solverIndex].getStandardDeviation()); edgeIO[solverIndex].clear(); edgeWriteIOCountExp.addRecord(); edgeWriteIOCountExp.setFieldValue( "solver", solvers[solverIndex]->getClassName()); edgeWriteIOCountExp.setFieldValue("blockSize", boost::lexical_cast<std::string>(blockSizes_[blockSizeIndex])); edgeWriteIOCountExp.setFieldValue( "edgeWriteIO", edgeWriteIO[solverIndex].getMean()); edgeWriteIOCountExp.setFieldValue( "deviation", edgeWriteIO[solverIndex].getStandardDeviation()); edgeWriteIO[solverIndex].clear(); edgeReadIOCountExp.addRecord(); edgeReadIOCountExp.setFieldValue( "solver", solvers[solverIndex]->getClassName()); edgeReadIOCountExp.setFieldValue( "blockSize", boost::lexical_cast<std::string>(blockSizes_[blockSizeIndex])); edgeReadIOCountExp.setFieldValue( "edgeReadIO", edgeReadIO[solverIndex].getMean()); edgeReadIOCountExp.setFieldValue( "deviation", edgeReadIO[solverIndex].getStandardDeviation()); edgeReadIO[solverIndex].clear(); } } std::cout << "done." << std::endl; for (auto exp : expData) exp->close(); };
void VsTimeDeltaBFS::process() { SimulationConf simConf; double storageOverheadThreshold = 1.0; assert(graph_ != NULL); simConf.setAttributeCount( graph_->getConf().getEdgeSchema().getAttributes().size()); simConf.setQueryTypeCount(numQueryTemplates_); ExperimentalData edgeIOCountExp("EdgeIOCountVsTimeDeltaBFS"); ExperimentalData edgeWriteIOCountExp("EdgeWriteIOCountVsTimeDeltaBFS"); ExperimentalData edgeReadIOCountExp("EdgeReadIOCountVsTimeDeltaBFS"); auto expData = { &edgeIOCountExp, &edgeWriteIOCountExp, &edgeReadIOCountExp }; makeEdgeIOCountExp(&edgeIOCountExp); makeEdgeWriteIOCountExp(&edgeWriteIOCountExp); makeEdgeReadIOCountExp(&edgeReadIOCountExp); for (auto exp : expData) exp->open(); vector<util::RunningStat> edgeIO; vector<util::RunningStat> edgeWriteIO; vector<util::RunningStat> edgeReadIO; vector<std::string> names; vector< shared_ptr<Solver> > solvers = { SolverFactory::instance().makeSinglePartition(), SolverFactory::instance().makeOptimalNonOverlapping(), SolverFactory::instance().makeHeuristicNonOverlapping() }; for (auto solver : solvers) { edgeIO.push_back(util::RunningStat()); edgeWriteIO.push_back(util::RunningStat()); edgeReadIO.push_back(util::RunningStat()); names.push_back(solver->getClassName()); } int solverIndex; size_t prevEdgeIOCount; size_t prevEdgeReadIOCount; size_t prevEdgeWriteIOCount; std::cout << "Running experiments..." << std::endl; int deltaIndex = -1; SchemaStats stats = graph_->getSchemaStats(); std::cout << stats.toString() << std::endl; for (auto delta : timeDeltas_) { deltaIndex++; for (int i = 0; i < numRuns_; i++) { // For each run with a different delta, generate a different set of queries: // generate a different workload with numQueryTemplates std::vector<std::vector<std::string> > templates = simConf.getQueryTemplates(graph_.get()); std::vector<core::FocusedIntervalQuery> queries = ExpSetupHelper::genSearchQueries(templates, queryZipfParam_, numQueries_, tsStart_, tsEnd_, delta, vertices_); graph_->resetWorkloads(); ExpSetupHelper::runBFS(graph_.get(), queries); std::map<BucketId,common::QueryWorkload> ws = graph_->getWorkloads(); // Make sure everything is in one bucket assert(ws.size() == 1); QueryWorkload workload = ws.begin()->second; solverIndex = -1; for (auto solver : solvers) { solverIndex++; auto & partIndex = graph_->getPartitionIndex(); auto origParting = partIndex.getTimeSlicedPartitioning(Timestamp(0.0)); intergdb::common::Partitioning solverSolution = solver->solve(workload, storageOverheadThreshold, stats); std::cout << "Solver: " << solver->getClassName() << std::endl; std::cout << "numRuns: " << i << std::endl; std::cout << "delta: " << delta << std::endl; std::cout << solverSolution.toString() << std::endl; TimeSlicedPartitioning newParting{}; // -inf to inf newParting.getPartitioning() = solverSolution.toStringSet(); partIndex.replaceTimeSlicedPartitioning( origParting, {newParting}); prevEdgeIOCount = graph_->getEdgeIOCount(); prevEdgeReadIOCount = graph_->getEdgeReadIOCount(); prevEdgeWriteIOCount = graph_->getEdgeWriteIOCount(); ExpSetupHelper::runBFS(graph_.get(),queries); std::cout << "getEdgeIOCount: " << graph_->getEdgeIOCount() - prevEdgeIOCount << std::endl; std::cout << "getEdgeReadIOCount: " << graph_->getEdgeReadIOCount() - prevEdgeReadIOCount << std::endl; std::cout << "getEdgeWriteIOCount: " << graph_->getEdgeWriteIOCount() - prevEdgeWriteIOCount << std::endl; edgeIO[solverIndex].push( graph_->getEdgeIOCount() - prevEdgeIOCount); edgeReadIO[solverIndex].push( graph_->getEdgeReadIOCount() - prevEdgeReadIOCount); edgeWriteIO[solverIndex].push( graph_->getEdgeWriteIOCount() - prevEdgeWriteIOCount); } } for (int solverIndex = 0; solverIndex < solvers.size(); solverIndex++) { edgeIOCountExp.addRecord(); edgeIOCountExp.setFieldValue( "solver", solvers[solverIndex]->getClassName()); edgeIOCountExp.setFieldValue( "delta", boost::lexical_cast<std::string>(timeDeltas_[deltaIndex])); edgeIOCountExp.setFieldValue( "edgeIO", edgeIO[solverIndex].getMean()); edgeIOCountExp.setFieldValue( "deviation", edgeIO[solverIndex].getStandardDeviation()); edgeIO[solverIndex].clear(); edgeWriteIOCountExp.addRecord(); edgeWriteIOCountExp.setFieldValue( "solver", solvers[solverIndex]->getClassName()); edgeWriteIOCountExp.setFieldValue("delta", boost::lexical_cast<std::string>(timeDeltas_[deltaIndex])); edgeWriteIOCountExp.setFieldValue( "edgeWriteIO", edgeWriteIO[solverIndex].getMean()); edgeWriteIOCountExp.setFieldValue( "deviation", edgeWriteIO[solverIndex].getStandardDeviation()); edgeWriteIO[solverIndex].clear(); edgeReadIOCountExp.addRecord(); edgeReadIOCountExp.setFieldValue( "solver", solvers[solverIndex]->getClassName()); edgeReadIOCountExp.setFieldValue( "delta", boost::lexical_cast<std::string>(timeDeltas_[deltaIndex])); edgeReadIOCountExp.setFieldValue( "edgeReadIO", edgeReadIO[solverIndex].getMean()); edgeReadIOCountExp.setFieldValue( "deviation", edgeReadIO[solverIndex].getStandardDeviation()); edgeReadIO[solverIndex].clear(); } } // std::cout << "done." << std::endl; for (auto exp : expData) exp->close(); };
void VsNumAttributes::process() { cerr << "This is an experiment with name: " << this->getClassName() << endl; SimulationConf simConf; SchemaStats stats; Cost cost(stats); util::AutoTimer timer; ExperimentalData queryIOExp("QueryIOVsNumAttributes"); ExperimentalData runningTimeExp("RunningTimeVsNumAttributes"); ExperimentalData storageExp("StorageOverheadVsNumAttributes"); auto expData = { &queryIOExp, &runningTimeExp, &storageExp }; makeQueryIOExp(&queryIOExp); makeRunningTimeExp(&runningTimeExp); makeStorageExp(&storageExp); for (auto exp : expData) exp->open(); auto solvers = { SolverFactory::instance().makeSinglePartition(), SolverFactory::instance().makePartitionPerAttribute(), SolverFactory::instance().makeOptimalOverlapping(), SolverFactory::instance().makeOptimalNonOverlapping(), SolverFactory::instance().makeHeuristicOverlapping(), SolverFactory::instance().makeHeuristicNonOverlapping() }; //auto attributeCounts = {2, 4, 6, 8, 10, 12, 14, 16 }; auto attributeCounts = {32, 48, 64, 80, 96, 112, 128}; double total = solvers.size() * attributeCounts.size() * numRuns; double completed = 0; vector<util::RunningStat> io; vector<util::RunningStat> storage; vector<util::RunningStat> times; vector<std::string> names; for (auto solver : solvers) { io.push_back(util::RunningStat()); storage.push_back(util::RunningStat()); times.push_back(util::RunningStat()); names.push_back(solver->getClassName()); vector<std::string> names; } int j; for (double attributeCount : attributeCounts) { for (int i = 0; i < numRuns; i++) { simConf.setAttributeCount(attributeCount); vector<unique_ptr<Attribute>> allAttributes; auto workloadAndStats = simConf.getQueryWorkloadAndStats(allAttributes); QueryWorkload const& workload = workloadAndStats.first; stats = workloadAndStats.second; j = 0; for (auto solver : solvers) { timer.start(); Partitioning partitioning = solver->solve( workload, storageOverheadThreshold, stats); timer.stop(); io.at(j).push(cost.getIOCost(partitioning, workload)); storage.at(j).push(cost.getStorageOverhead( partitioning, workload)); times.at(j).push(timer.getRealTimeInSeconds()); j++; cerr << "."; completed++; } } int j = 0; for (auto solver : solvers) { runningTimeExp.addRecord(); runningTimeExp.setFieldValue("solver", solver->getClassName()); runningTimeExp.setFieldValue("attributes", attributeCount); runningTimeExp.setFieldValue("time", times.at(j).getMean()); runningTimeExp.setFieldValue( "deviation", times.at(j).getStandardDeviation()); times.at(j).clear(); queryIOExp.addRecord(); queryIOExp.setFieldValue("solver", solver->getClassName()); queryIOExp.setFieldValue("attributes", attributeCount); queryIOExp.setFieldValue("io", io.at(j).getMean()); queryIOExp.setFieldValue( "deviation", io.at(j).getStandardDeviation()); io.at(j).clear(); storageExp.addRecord(); storageExp.setFieldValue("solver", solver->getClassName()); storageExp.setFieldValue("attributes",attributeCount); storageExp.setFieldValue("storage", storage.at(j).getMean()); storageExp.setFieldValue( "deviation", storage.at(j).getStandardDeviation()); storage.at(j).clear(); j++; } cerr << " (" << (completed / total) * 100 << "%)" << endl; } for (auto exp : expData) exp->close(); };