void VsStorageOverheadThreshold::process()
{
    cerr << "This is an experiment with name: "
      << this->getClassName() << endl;

    SimulationConf simConf;
    SchemaStats stats;
    Cost cost(stats);
    util::AutoTimer timer;

    ExperimentalData queryIOExp("QueryIOVsStorageOverheadThreshold");
    ExperimentalData runningTimeExp("RunningTimeVsStorageOverheadThreshold");
    ExperimentalData storageExp("StorageOverheadVsStorageOverheadThreshold");

    auto expData = { &queryIOExp, &runningTimeExp, &storageExp };

    makeQueryIOExp(&queryIOExp);
    makeRunningTimeExp(&runningTimeExp);
    makeStorageExp(&storageExp);

    for (auto exp : expData)
        exp->open();

    auto solvers = {
        SolverFactory::instance().makeSinglePartition(),
        SolverFactory::instance().makePartitionPerAttribute(),
        SolverFactory::instance().makeOptimalOverlapping(),
        SolverFactory::instance().makeOptimalNonOverlapping(),
        SolverFactory::instance().makeHeuristicOverlapping(),
        SolverFactory::instance().makeHeuristicNonOverlapping()
    };

    vector<double> storageOverheadThresholds =
        { 0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0 };

    double total = solvers.size()
        * storageOverheadThresholds.size()
        * numRuns;
    double completed = 0;

    vector<util::RunningStat> io;
    vector<util::RunningStat> storage;
    vector<util::RunningStat> times;
    vector<std::string> names;
    vector<bool> errorFlags;


    for (auto solver : solvers) {
        io.push_back(util::RunningStat());
        storage.push_back(util::RunningStat());
        times.push_back(util::RunningStat());
        names.push_back(solver->getClassName());
        vector<std::string> names;
        errorFlags.push_back(false);
    }

    int j;
    for (double sot : storageOverheadThresholds) {
        for (int i = 0; i < numRuns; i++) {
            vector<std::unique_ptr<Attribute>> allAttributes;
            auto workloadAndStats =
                simConf.getQueryWorkloadAndStats(allAttributes);
            QueryWorkload workload = workloadAndStats.first;
            stats = workloadAndStats.second;
            j = 0;
            for (auto solver : solvers) {
                timer.start();
                Partitioning partitioning;
                try {
                    partitioning = solver->solve(workload, sot, stats);
                } catch (const runtime_error& error) {
                    cerr << "Unable to find a solution" << endl;
                    errorFlags.at(j) = true;
                }
                timer.stop();
                if (!errorFlags.at(j)) {
                    io.at(j).push(cost.getIOCost(partitioning, workload));
                    storage.at(j).push(
                        cost.getStorageOverhead(partitioning, workload));
                    times.at(j).push(
                        timer.getRealTimeInSeconds());
                }
                j++;
                cerr << ".";
                completed++;
            }
        }

        int j = 0;
        for (auto solver : solvers) {
            if (!errorFlags.at(j)) {
                runningTimeExp.addRecord();
                runningTimeExp.setFieldValue("solver", solver->getClassName());
                runningTimeExp.setFieldValue(
                    "storageOverheadThreshold",
                    boost::str(boost::format("%.2f") % sot));
                runningTimeExp.setFieldValue("time", times.at(j).getMean());
                runningTimeExp.setFieldValue(
                    "deviation", times.at(j).getStandardDeviation());
                times.at(j).clear();

                queryIOExp.addRecord();
                queryIOExp.setFieldValue("solver", solver->getClassName());
                queryIOExp.setFieldValue(
                    "storageOverheadThreshold",
                    boost::str(boost::format("%.2f") % sot));
                queryIOExp.setFieldValue("io", io.at(j).getMean());
                queryIOExp.setFieldValue(
                    "deviation", io.at(j).getStandardDeviation());
                io.at(j).clear();

                storageExp.addRecord();
                storageExp.setFieldValue("solver", solver->getClassName());
                storageExp.setFieldValue(
                    "storageOverheadThreshold",
                    boost::str(boost::format("%.2f") % sot));
                storageExp.setFieldValue("storage", storage.at(j).getMean());
                storageExp.setFieldValue(
                    "deviation", storage.at(j).getStandardDeviation());
                storage.at(j).clear();
            } else {
                runningTimeExp.addRecord();
                runningTimeExp.setFieldValue("solver", solver->getClassName());
                runningTimeExp.setFieldValue(
                    "storageOverheadThreshold",
                    boost::str(boost::format("%.2f") % sot));
                runningTimeExp.setFieldValue("time", "n/a");
                runningTimeExp.setFieldValue("deviation", "n/a");
                times.at(j).clear();

                queryIOExp.addRecord();
                queryIOExp.setFieldValue("solver", solver->getClassName());
                queryIOExp.setFieldValue(
                    "storageOverheadThreshold",
                    boost::str(boost::format("%.2f") % sot));
                queryIOExp.setFieldValue("io", "n/a");
                queryIOExp.setFieldValue("deviation", "n/a");
                io.at(j).clear();

                storageExp.addRecord();
                storageExp.setFieldValue("solver", solver->getClassName());
                storageExp.setFieldValue(
                    "storageOverheadThreshold",
                    boost::str(boost::format("%.2f") % sot));
                storageExp.setFieldValue("storage", "n/a");
                storageExp.setFieldValue("deviation", "n/a");
                storage.at(j).clear();

                errorFlags.at(j) = false;
            }
            j++;
        }
        cerr << " (" << (completed / total) * 100 << "%)" << endl;
    }

    for (auto exp : expData)
        exp->close();
};
Example #2
0
void VsBlockSize::process()
{
    SimulationConf simConf;
    double storageOverheadThreshold = 1.0;

    assert(graphs_.size() >= 1);
    simConf.setAttributeCount(
        graphs_[0]->getConf().getEdgeSchema().getAttributes().size());

    std::vector<std::vector<core::FocusedIntervalQuery>> queries;
    std::vector<SchemaStats> stats;
    std::vector<QueryWorkload> workloads;

    std::cout << "Generating workload..." << std::endl;
    for (int i=0; i < numRuns_; i++) {
        std::cout << "    " << i << "/" << numRuns_ << std::endl;
      
         std::vector<std::vector<std::string> > templates =
            simConf.getQueryTemplates(graphs_[0].get());
     
        std::vector<core::FocusedIntervalQuery> qs = 
            ExpSetupHelper::genQueries(templates,
                                       queryZipfParam_, 
                                       numQueries_,
                                       tsStart_,
                                       tsEnd_, 
                                       vertices_);

        ExpSetupHelper::runWorkload(graphs_[0].get(),qs);
        SchemaStats ss = graphs_[0]->getSchemaStats();
        std::map<BucketId,common::QueryWorkload> ws =
            graphs_[0]->getWorkloads();
        // Make sure everything is in one bucket
        assert(ws.size() == 1);
        QueryWorkload w = ws.begin()->second;

        // (queries, stats, workload)
        queries.push_back(qs);
        stats.push_back(ss);
        workloads.push_back(w);

        graphs_[0]->resetWorkloads();
    }
    std::cout << "done." << std::endl;

    ExperimentalData edgeIOCountExp("EdgeIOCountVsBlockSize");
    ExperimentalData edgeWriteIOCountExp("EdgeWriteIOCountVsBlockSize");
    ExperimentalData edgeReadIOCountExp("EdgeReadIOCountVsBlockSize");

    auto expData =
        { &edgeIOCountExp, &edgeWriteIOCountExp, &edgeReadIOCountExp };

    makeEdgeIOCountExp(&edgeIOCountExp);
    makeEdgeWriteIOCountExp(&edgeWriteIOCountExp);
    makeEdgeReadIOCountExp(&edgeReadIOCountExp);

    for (auto exp : expData)
        exp->open();

    vector<util::RunningStat> edgeIO;
    vector<util::RunningStat> edgeWriteIO;
    vector<util::RunningStat> edgeReadIO;
    vector<std::string> names;
    vector< shared_ptr<Solver> > solvers =
    {
        SolverFactory::instance().makeSinglePartition(),
        SolverFactory::instance().makeOptimalNonOverlapping(),
        SolverFactory::instance().makeHeuristicNonOverlapping()
    };

    for (auto solver : solvers) {
        edgeIO.push_back(util::RunningStat());
        edgeWriteIO.push_back(util::RunningStat());
        edgeReadIO.push_back(util::RunningStat());
        names.push_back(solver->getClassName());
    }

    int solverIndex;
    size_t prevEdgeIOCount;
    size_t prevEdgeReadIOCount;
    size_t prevEdgeWriteIOCount;
    int x = 0;
    int total = graphs_.size() * numRuns_ * solvers.size();

    std::cout << "Running experiments..." << std::endl;
    int blockSizeIndex = -1;
    for (auto iter = graphs_.begin(); iter != graphs_.end(); ++iter) {
        blockSizeIndex++;
        for (int i = 0; i < numRuns_; i++) {
            solverIndex = -1;
            for (auto solver : solvers) {
                solverIndex++;
                auto & partIndex = (*iter)->getPartitionIndex();
                auto origParting =
                    partIndex.getTimeSlicedPartitioning(Timestamp(0.0));
                intergdb::common::Partitioning solverSolution =
                     solver->solve(workloads[i], storageOverheadThreshold,
                                   stats[i]);
                std::cout << "Workload: "
                    << workloads[i].toString() << std::endl;
                std::cout << "Summary size: "
                    << workloads[i].getQuerySummaries().size() << std::endl;

                /*for (auto summary : workloads[i].getQuerySummaries())
                    std::cout << "Summary: "
                              << summary.toString() << std::endl;
                */

                std::cout << "Solver: " <<  solver->getClassName() << std::endl;

                std::cout << solverSolution.toString() << std::endl;
                TimeSlicedPartitioning newParting{}; // -inf to inf
                newParting.getPartitioning() = solverSolution.toStringSet();
                partIndex.replaceTimeSlicedPartitioning(
                    origParting, {newParting});
                // to flush the filesystem cache
                //system(“purge”);

                prevEdgeIOCount = (*iter)->getEdgeIOCount();
                prevEdgeReadIOCount = (*iter)->getEdgeReadIOCount();
                prevEdgeWriteIOCount = (*iter)->getEdgeWriteIOCount();

                ExpSetupHelper::runWorkload((*iter).get(),queries[i]);


                std::cout <<
                    (*iter)->getEdgeIOCount() - prevEdgeIOCount << std::endl;
                std::cout <<
                    (*iter)->getEdgeReadIOCount() - prevEdgeReadIOCount
                    << std::endl;
                std::cout <<
                    (*iter)->getEdgeWriteIOCount() - prevEdgeWriteIOCount
                    << std::endl;

                edgeIO[solverIndex].push(
                    (*iter)->getEdgeIOCount() - prevEdgeIOCount);
                edgeReadIO[solverIndex].push(
                    (*iter)->getEdgeReadIOCount() - prevEdgeReadIOCount);
                edgeWriteIO[solverIndex].push(
                    (*iter)->getEdgeWriteIOCount() - prevEdgeWriteIOCount);
                x++;
                std::cout << "    " << x << "/" << total << std::endl;
            }
        }

        for (int solverIndex = 0; solverIndex < solvers.size(); solverIndex++)
        {

            edgeIOCountExp.addRecord();
            edgeIOCountExp.setFieldValue(
                "solver", solvers[solverIndex]->getClassName());
            edgeIOCountExp.setFieldValue(
                "blockSize",
                boost::lexical_cast<std::string>(blockSizes_[blockSizeIndex]));
            edgeIOCountExp.setFieldValue(
                "edgeIO", edgeIO[solverIndex].getMean());
            edgeIOCountExp.setFieldValue(
                "deviation", edgeIO[solverIndex].getStandardDeviation());
            edgeIO[solverIndex].clear();

            edgeWriteIOCountExp.addRecord();
            edgeWriteIOCountExp.setFieldValue(
                "solver", solvers[solverIndex]->getClassName());
            edgeWriteIOCountExp.setFieldValue("blockSize",
                boost::lexical_cast<std::string>(blockSizes_[blockSizeIndex]));
            edgeWriteIOCountExp.setFieldValue(
                "edgeWriteIO", edgeWriteIO[solverIndex].getMean());
            edgeWriteIOCountExp.setFieldValue(
                "deviation", edgeWriteIO[solverIndex].getStandardDeviation());
            edgeWriteIO[solverIndex].clear();

            edgeReadIOCountExp.addRecord();
            edgeReadIOCountExp.setFieldValue(
                "solver", solvers[solverIndex]->getClassName());
            edgeReadIOCountExp.setFieldValue(
                "blockSize",
                boost::lexical_cast<std::string>(blockSizes_[blockSizeIndex]));
            edgeReadIOCountExp.setFieldValue(
                "edgeReadIO", edgeReadIO[solverIndex].getMean());
            edgeReadIOCountExp.setFieldValue(
                "deviation", edgeReadIO[solverIndex].getStandardDeviation());
            edgeReadIO[solverIndex].clear();
        }
    }

    std::cout << "done." << std::endl;

    for (auto exp : expData)
        exp->close();
};
Example #3
0
void VsTimeDeltaBFS::process()
{
    SimulationConf simConf;
    double storageOverheadThreshold = 1.0;
   
    assert(graph_ != NULL);
    simConf.setAttributeCount(
        graph_->getConf().getEdgeSchema().getAttributes().size());

    simConf.setQueryTypeCount(numQueryTemplates_);

    ExperimentalData edgeIOCountExp("EdgeIOCountVsTimeDeltaBFS");
    ExperimentalData edgeWriteIOCountExp("EdgeWriteIOCountVsTimeDeltaBFS");
    ExperimentalData edgeReadIOCountExp("EdgeReadIOCountVsTimeDeltaBFS");

    auto expData =
        { &edgeIOCountExp, &edgeWriteIOCountExp, &edgeReadIOCountExp };

    makeEdgeIOCountExp(&edgeIOCountExp);
    makeEdgeWriteIOCountExp(&edgeWriteIOCountExp);
    makeEdgeReadIOCountExp(&edgeReadIOCountExp);
     
    for (auto exp : expData)
        exp->open();

    vector<util::RunningStat> edgeIO;
    vector<util::RunningStat> edgeWriteIO;
    vector<util::RunningStat> edgeReadIO;
    vector<std::string> names;
    vector< shared_ptr<Solver> > solvers =
        {
            SolverFactory::instance().makeSinglePartition(),
            SolverFactory::instance().makeOptimalNonOverlapping(),
            SolverFactory::instance().makeHeuristicNonOverlapping()
        };

    for (auto solver : solvers) {
        edgeIO.push_back(util::RunningStat());
        edgeWriteIO.push_back(util::RunningStat());
        edgeReadIO.push_back(util::RunningStat());
        names.push_back(solver->getClassName());
    }

    int solverIndex;
    size_t prevEdgeIOCount;
    size_t prevEdgeReadIOCount;
    size_t prevEdgeWriteIOCount;
    
    std::cout << "Running experiments..." << std::endl;
  
    int deltaIndex = -1;

    SchemaStats stats = graph_->getSchemaStats();

    std::cout << stats.toString() << std::endl;

    for (auto delta : timeDeltas_) {
        deltaIndex++;
        for (int i = 0; i < numRuns_; i++) {
            
            // For each run with a different delta, generate a different set of queries:
            
            // generate a different workload with numQueryTemplates
            std::vector<std::vector<std::string> > templates =
                simConf.getQueryTemplates(graph_.get());

            std::vector<core::FocusedIntervalQuery> queries = 
                ExpSetupHelper::genSearchQueries(templates,
                                                 queryZipfParam_, 
                                                 numQueries_,
                                                 tsStart_,
                                                 tsEnd_, 
                                                 delta, 
                                                 vertices_);

            graph_->resetWorkloads();

            ExpSetupHelper::runBFS(graph_.get(), queries);
            
            std::map<BucketId,common::QueryWorkload> ws =
                graph_->getWorkloads();
            // Make sure everything is in one bucket
            assert(ws.size() == 1);
            QueryWorkload workload = ws.begin()->second;
            
            solverIndex = -1;
            for (auto solver : solvers) {
                solverIndex++;

                auto & partIndex = graph_->getPartitionIndex();
                auto origParting =
                    partIndex.getTimeSlicedPartitioning(Timestamp(0.0));
                intergdb::common::Partitioning solverSolution =
                    solver->solve(workload, storageOverheadThreshold, stats);
                
                std::cout << "Solver: " <<  solver->getClassName() << std::endl;
                std::cout << "numRuns: " << i << std::endl;
                std::cout << "delta: " << delta << std::endl;
                
                std::cout << solverSolution.toString() << std::endl;
                TimeSlicedPartitioning newParting{}; // -inf to inf
                newParting.getPartitioning() = solverSolution.toStringSet();
                partIndex.replaceTimeSlicedPartitioning(
                    origParting, {newParting});
                
                prevEdgeIOCount = graph_->getEdgeIOCount();
                prevEdgeReadIOCount = graph_->getEdgeReadIOCount();
                prevEdgeWriteIOCount = graph_->getEdgeWriteIOCount();
                
                ExpSetupHelper::runBFS(graph_.get(),queries);


                std::cout << "getEdgeIOCount: " << 
                    graph_->getEdgeIOCount() - prevEdgeIOCount << std::endl;
                std::cout << "getEdgeReadIOCount: " << 
                    graph_->getEdgeReadIOCount() - prevEdgeReadIOCount
                          << std::endl;
                std::cout << "getEdgeWriteIOCount: " << 
                    graph_->getEdgeWriteIOCount() - prevEdgeWriteIOCount
                          << std::endl;

                edgeIO[solverIndex].push(
                    graph_->getEdgeIOCount() - prevEdgeIOCount);
                edgeReadIO[solverIndex].push(
                    graph_->getEdgeReadIOCount() - prevEdgeReadIOCount);
                edgeWriteIO[solverIndex].push(
                    graph_->getEdgeWriteIOCount() - prevEdgeWriteIOCount);
                
            }
        }
    

        for (int solverIndex = 0; solverIndex < solvers.size(); solverIndex++)
        {

            edgeIOCountExp.addRecord();
            edgeIOCountExp.setFieldValue(
                "solver", solvers[solverIndex]->getClassName());
            edgeIOCountExp.setFieldValue(
                "delta",
                boost::lexical_cast<std::string>(timeDeltas_[deltaIndex]));
            edgeIOCountExp.setFieldValue(
                "edgeIO", edgeIO[solverIndex].getMean());
            edgeIOCountExp.setFieldValue(
                "deviation", edgeIO[solverIndex].getStandardDeviation());
            edgeIO[solverIndex].clear();

            edgeWriteIOCountExp.addRecord();
            edgeWriteIOCountExp.setFieldValue(
                "solver", solvers[solverIndex]->getClassName());
            edgeWriteIOCountExp.setFieldValue("delta",
                                              boost::lexical_cast<std::string>(timeDeltas_[deltaIndex]));
            edgeWriteIOCountExp.setFieldValue(
                "edgeWriteIO", edgeWriteIO[solverIndex].getMean());
            edgeWriteIOCountExp.setFieldValue(
                "deviation", edgeWriteIO[solverIndex].getStandardDeviation());
            edgeWriteIO[solverIndex].clear();

            edgeReadIOCountExp.addRecord();
            edgeReadIOCountExp.setFieldValue(
                "solver", solvers[solverIndex]->getClassName());
            edgeReadIOCountExp.setFieldValue(
                "delta",
                boost::lexical_cast<std::string>(timeDeltas_[deltaIndex]));
            edgeReadIOCountExp.setFieldValue(
                "edgeReadIO", edgeReadIO[solverIndex].getMean());
            edgeReadIOCountExp.setFieldValue(
                "deviation", edgeReadIO[solverIndex].getStandardDeviation());
            edgeReadIO[solverIndex].clear();
        }
    }

    // std::cout << "done." << std::endl;

     
    for (auto exp : expData)
        exp->close();
};
void VsNumAttributes::process()
{
    cerr << "This is an experiment with name: "
         << this->getClassName() << endl;

    SimulationConf simConf;
    SchemaStats stats;
    Cost cost(stats);
    util::AutoTimer timer;

    ExperimentalData queryIOExp("QueryIOVsNumAttributes");
    ExperimentalData runningTimeExp("RunningTimeVsNumAttributes");
    ExperimentalData storageExp("StorageOverheadVsNumAttributes");

    auto expData = { &queryIOExp, &runningTimeExp, &storageExp };

    makeQueryIOExp(&queryIOExp);
    makeRunningTimeExp(&runningTimeExp);
    makeStorageExp(&storageExp);

    for (auto exp : expData)
        exp->open();

    auto solvers = {
        SolverFactory::instance().makeSinglePartition(),
        SolverFactory::instance().makePartitionPerAttribute(),
        SolverFactory::instance().makeOptimalOverlapping(),
        SolverFactory::instance().makeOptimalNonOverlapping(),
        SolverFactory::instance().makeHeuristicOverlapping(),
        SolverFactory::instance().makeHeuristicNonOverlapping()
    };
    //auto attributeCounts = {2, 4, 6, 8, 10, 12, 14, 16 };
    auto attributeCounts = {32, 48, 64, 80, 96, 112, 128};

    double total = solvers.size() * attributeCounts.size()  * numRuns;
    double completed = 0;

    vector<util::RunningStat> io;
    vector<util::RunningStat> storage;
    vector<util::RunningStat> times;
    vector<std::string> names;


    for (auto solver : solvers) {
        io.push_back(util::RunningStat());
        storage.push_back(util::RunningStat());
        times.push_back(util::RunningStat());
        names.push_back(solver->getClassName());
        vector<std::string> names;
    }

    int j;
    for (double attributeCount : attributeCounts) {
        for (int i = 0; i < numRuns; i++) {
            simConf.setAttributeCount(attributeCount);
            vector<unique_ptr<Attribute>> allAttributes;
            auto workloadAndStats =
                simConf.getQueryWorkloadAndStats(allAttributes);
            QueryWorkload const& workload = workloadAndStats.first;
            stats = workloadAndStats.second;
            j = 0;
            for (auto solver : solvers) {
                timer.start();
                Partitioning partitioning = solver->solve(
                    workload, storageOverheadThreshold, stats);
                timer.stop();
                io.at(j).push(cost.getIOCost(partitioning, workload));
                storage.at(j).push(cost.getStorageOverhead(
                    partitioning, workload));
                times.at(j).push(timer.getRealTimeInSeconds());
                j++;
                cerr << ".";
                completed++;
            }
        }

        int j = 0;
        for (auto solver : solvers) {

            runningTimeExp.addRecord();
            runningTimeExp.setFieldValue("solver", solver->getClassName());
            runningTimeExp.setFieldValue("attributes", attributeCount);
            runningTimeExp.setFieldValue("time", times.at(j).getMean());
            runningTimeExp.setFieldValue(
                "deviation", times.at(j).getStandardDeviation());
            times.at(j).clear();

            queryIOExp.addRecord();
            queryIOExp.setFieldValue("solver", solver->getClassName());
            queryIOExp.setFieldValue("attributes", attributeCount);
            queryIOExp.setFieldValue("io", io.at(j).getMean());
            queryIOExp.setFieldValue(
                "deviation", io.at(j).getStandardDeviation());
            io.at(j).clear();

            storageExp.addRecord();
            storageExp.setFieldValue("solver", solver->getClassName());
            storageExp.setFieldValue("attributes",attributeCount);
            storageExp.setFieldValue("storage", storage.at(j).getMean());
            storageExp.setFieldValue(
                "deviation", storage.at(j).getStandardDeviation());
            storage.at(j).clear();

            j++;
        }
       cerr << " (" << (completed / total) * 100 << "%)" << endl;
    }

    for (auto exp : expData)
        exp->close();
};