static void
list_assembly(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    Events events;
    gather_events(tx, func_id);
    load_events(tx, func_id, events);

    SqlDatabase::StatementPtr stmt = tx->statement("select address, assembly from semantic_instructions where func_id = ?"
                                                   " order by position")->bind(0, func_id);
    for (SqlDatabase::Statement::iterator insn=stmt->begin(); insn!=stmt->end(); ++insn) {
        rose_addr_t addr = insn.get<rose_addr_t>(0);
        std::string assembly = insn.get<std::string>(1);
        Events::const_iterator ei=events.find(addr);

        // Assembly line prefix
        if (ei!=events.end() && ei->second.nexecuted>0) {
            std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x ";
        } else {
            std::cout <<std::string(11, ' ');
        }

        // Assembly instruction
        std::cout <<"| " <<StringUtility::addrToString(addr) <<":  " <<assembly <<"\n";

        if (ei!=events.end())
            show_events(ei->second);
    }
}
Beispiel #2
0
// Update the database by filling in test_results.first_error information for those tests that don't have a cached first error
// but which failed and have output.
static void
updateDatabase(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("update test_results test"
                                                " set first_error = substring("
#if 0 // [Robb Matzke 2016-02-08]
                                                // Look at all output stored in the database (which is typically only the last
                                                // few hundred lines of the complete output).
                                                "att.content "
#else
                                                // This coalesce tries to find where a parallel make command failed and looks
                                                // only at the following serial make, which is assumed to follow the parallel
                                                // make.
                                                "coalesce(substring(att.content from '(\\nmake: \\*\\*\\* \\[[-_a-zA-Z0-9]+\\] Error 1\n.+)'), att.content) "
#endif
                                                "from '(?n)("
                                                //----- regular expressions begin -----
                                                "\\merror: .+"
                                                "|catastrophic error: *\\n.+"
                                                "|^.* \\[err\\]: terminated after .+"
                                                "|^.* \\[err\\]: command died with .+"
                                                "|^.* \\[err\\]: +what\\(\\): .*"
                                                //----- regular expressions end -----
                                                ")')"
                                                " from attachments att" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                "    test.id = att.test_id and"
                                                "    test.first_error is null and"
                                                "    test.status <> 'end' and"
                                                "    att.name = 'Final output'");
    sqlBindArgs(q, args);
    q->execute();
}
void
computational_equivalent_classes(std::map<int,int>& norm_map)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select func_id, equivalent_func_id from equivalent_classes");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!= stmt->end(); ++row)
        norm_map[row.get<int>(0)] = row.get<int>(1);
}
static Dependencies
loadAllDependencies(const SqlDatabase::TransactionPtr &tx) {
    Dependencies dependencies;
    SqlDatabase::StatementPtr q = tx->statement("select name, value from dependencies where enabled <> 0");
    for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row)
        dependencies.insertMaybeDefault(row.get<std::string>(0)).push_back(row.get<std::string>(1));
    return dependencies;
}
Beispiel #5
0
// Clear all cached error information from the database.
static void
clearErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("update test_results set first_error = null" +
                                                sqlWhereClause(tx, settings, args));
    sqlBindArgs(q, args);
    q->execute();
}
void
find_clusters(int max_cluster_size_signed, SqlDatabase::TransactionPtr transaction)
{
    assert(max_cluster_size_signed >= 0);
    size_t max_cluster_size = max_cluster_size_signed;

    SqlDatabase::StatementPtr insert_stmt = transaction->statement("insert into fr_ignored_function_pairs"
                                            // 0        1         2
                                            "(func1_id, func2_id, from_cluster_of_size)"
                                            " values (?, ?, ?)");

    //Get all vetexes and find the union
    std::string _query_condition = "select func1_id, func2_id from fr_clone_pairs";
    SqlDatabase::StatementPtr stmt = transaction->statement(_query_condition);

    if (stmt->begin() == stmt->end())
        return;

    //Count how many vertices we have for boost graph
    int VERTEX_COUNT = transaction->statement("select count(*) from semantic_functions")->execute_int();

    typedef adjacency_list <vecS, vecS, undirectedS> Graph;
    typedef graph_traits<Graph>::vertex_descriptor Vertex;
    typedef graph_traits<Graph>::vertices_size_type VertexIndex;
    Graph graph(VERTEX_COUNT);

    std::vector<VertexIndex> rank(num_vertices(graph));
    std::vector<Vertex> parent(num_vertices(graph));

    typedef VertexIndex* Rank;
    typedef Vertex* Parent;
    disjoint_sets<Rank, Parent> ds(&rank[0], &parent[0]);
    initialize_incremental_components(graph, ds);
    incremental_components(graph, ds);

    graph_traits<Graph>::edge_descriptor edge;
    bool flag;

    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int func1 = row.get<int>(0);
        int func2 = row.get<int>(1);
        boost::tie(edge, flag) = add_edge(func1, func2, graph);
        ds.union_set(func1,func2);
    }

    typedef component_index<VertexIndex> Components;
    Components components(parent.begin(), parent.end());
    std::map<int,int> size_distribution;

    // Iterate through the component indices
    BOOST_FOREACH(VertexIndex current_index, components) {
        std::vector<int> cluster_functions;

        // Iterate through the child vertex indices for [current_index]
        BOOST_FOREACH(VertexIndex child_index, components[current_index]) {
            cluster_functions.push_back(child_index);
        }
Beispiel #7
0
Datei: callLSH.C Projekt: 8l/rose
static void
postprocess(const SqlDatabase::TransactionPtr &tx)
{
    int windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int();
    int stride = tx->statement("select stride from run_parameters limit 1")->execute_int();
    assert(windowSize != 0);
    assert(stride != 0);

    cerr << "About to delete from postprocessed_clusters" << endl;
    tx->execute("delete from postprocessed_clusters");
    cerr << "... done" << endl;

    cerr << "About to postprocess" << endl;
    SqlDatabase::StatementPtr cmd = tx->statement("select cluster, function_id, index_within_function, vectors_row"
                                                  " from clusters order by cluster, function_id, index_within_function");
    SqlDatabase::StatementPtr insertCmd = tx->statement("insert into postprocessed_clusters"
                                                        " select * from clusters where row_number = ?");
    const size_t numStridesThatMustBeDifferent = windowSize / (stride * 2);
    string last_cluster = "";
    string last_func_id = "";
    size_t last_index_within_function = 0;
    vector<string> rows_in_this_cluster;
    bool first = true;
    for (SqlDatabase::Statement::iterator postproc_reader=cmd->begin(); postproc_reader!=cmd->end(); ++postproc_reader) {
        string cluster = postproc_reader.get<std::string>(0);
        string function_id = postproc_reader.get<std::string>(1);
        size_t index_within_function = postproc_reader.get<size_t>(2);
        string cluster_row_number = postproc_reader.get<std::string>(3);
        bool differentFunction = cluster != last_cluster || function_id != last_func_id;
        bool endingCluster = differentFunction;
        bool beginningNewCluster = first || differentFunction;
        first = false;
        if (endingCluster) {
            if (rows_in_this_cluster.size() > 1) { // Skip clusters that have only one element left
                for (size_t i = 0; i < rows_in_this_cluster.size(); ++i) {
                    insertCmd->bind(0, rows_in_this_cluster[i]);
                    insertCmd->execute();
                }
            }
        }
        if (beginningNewCluster) {
            last_cluster = cluster;
            last_func_id = function_id;
            last_index_within_function = index_within_function;
            rows_in_this_cluster.clear();
        }
        bool keep = beginningNewCluster || (index_within_function >= last_index_within_function + numStridesThatMustBeDifferent);
        if (keep) {
            last_index_within_function = index_within_function;
            rows_in_this_cluster.push_back(cluster_row_number);
        }
    }
    cerr << "... done" << endl;
}
static void
load_source_code(const SqlDatabase::TransactionPtr &tx, Listing &listing/*in,out*/)
{
    SqlDatabase::StatementPtr stmt = tx->statement("select file_id, linenum, line from tmp_src");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int file_id = row.get<int>(0);
        int linenum = row.get<int>(1);
        SourcePosition srcpos(file_id, linenum);
        listing[srcpos].source_code = row.get<std::string>(2);
    }
}
Beispiel #9
0
// Count how many tests are missing first_error information when it should be available.
static void
countMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("select count(*)"
                                                " from test_results test"
                                                " join attachments att on test.id = att.test_id" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                "    test.first_error is null and"
                                                "    test.status <> 'end' and"
                                                "    att.name = 'Final output'");
    sqlBindArgs(q, args);
    int n = q->execute_int();
    std::cout <<n <<"\n";
}
CallVec*
load_function_api_calls_for(int func_id, bool reachability_graph)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select distinct scg.callee from "
                                                            + std::string(reachability_graph ? "semantic_rg" : "semantic_cg ") +
                                                            " as scg "
                                                            //" join tmp_interesting_funcs as tif on tif.func_id = scg.callee "
                                                            " where scg.caller=? ORDER BY scg.callee");
    stmt->bind(0, func_id);

    CallVec* call_vec = new CallVec;
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int callee_id = row.get<int>(0);
        call_vec->push_back(callee_id);
    }
    return call_vec;
}
Beispiel #11
0
// List the errors ordered by how common they are.
static void
listErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("select count(*) as n, status, test.first_error"
                                                " from test_results test" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                " test.first_error is not null"
                                                " group by status, test.first_error"
                                                " order by n desc");
    sqlBindArgs(q, args);
    for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row) {
        int count = row.get<int>(0);
        std::string status = row.get<std::string>(1);
        std::string mesg = row.get<std::string>(2);

        printf("%6d %-16s %s\n", count, status.c_str(), oneLineEscaped(mesg).c_str());
    }
}
CallVec*
load_api_calls_for(int func_id, int igroup_id, bool ignore_no_compares, int call_depth, bool expand_ncalls)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select distinct fio.pos, fio.callee_id, fio.ncalls"
                                                            " from semantic_fio_calls as fio"
                                                            " join tmp_interesting_funcs as f1"
                                                            // filter out functions with no compares
                                                            " on f1.func_id = fio.callee_id"
                                                            // filter on current parameters
                                                            " where fio.func_id = ? and fio.igroup_id = ?"
                                                            // filter out function not called directly
                                                            + std::string(call_depth >= 0 ? " and fio.caller_id = ?" : "")
                                                            +" order by fio.pos");
    stmt->bind(0, func_id);
    stmt->bind(1, igroup_id);

    if (call_depth >= 0)
        stmt->bind(2, func_id);

    CallVec* call_vec = new CallVec;
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int callee_id = row.get<int>(1);
        int ncalls    = row.get<int>(2);

        if (expand_ncalls) {
            for (int i = 0; i < ncalls; i++)
                call_vec->push_back(callee_id);
        } else {
            call_vec->push_back(callee_id);
        }
    }
    return call_vec;
}
Beispiel #13
0
// Generate the 'where' expression that limits what tests are being considered.
// Also appends variable values to the 'args' vector.
static std::string
sqlWhereClause(const SqlDatabase::TransactionPtr &tx, const Settings &settings, std::vector<std::string> &args /*in,out*/) {
    std::vector<std::string> constraints;

    if (settings.latestTests) {
        // Constrain the tests to be only the latest version of ROSE present in the database.
        SqlDatabase::StatementPtr q = tx->statement("select distinct rose, rose_date"
                                                    " from test_results"
                                                    " order by rose_date desc"
                                                    " limit 1");
        SqlDatabase::Statement::iterator row = q->begin();
        if (row != q->end()) {
            constraints.push_back("rose = ?");
            args.push_back(row.get<std::string>(0));
        }
    }

    if (constraints.empty())
        constraints.push_back("true");

    return " where " + boost::join(constraints, " and ");
}
Beispiel #14
0
static DependencyNames
loadDependencyNames(const SqlDatabase::TransactionPtr &tx) {
    DependencyNames retval;
    SqlDatabase::StatementPtr q = tx->statement("select distinct name from dependencies");
    for (SqlDatabase::Statement::iterator row=q->begin(); row!=q->end(); ++row) {
        std::string key = row.get<std::string>(0);
        retval.insert(key, "rmc_"+key);
    }

    // Additional key/column relationships
    retval.insert("id", "test.id");
    retval.insert("reporting_user", "auth_user.identity");
    retval.insert("reporting_time", "test.reporting_time");
    retval.insert("tester", "test.tester");
    retval.insert("os", "test.os");
    retval.insert("rose", "test.rose");
    retval.insert("rose_date", "test.rose_date");
    retval.insert("status", "test.status");
    retval.insert("duration", "test.duration");
    retval.insert("noutput", "test.noutput");
    retval.insert("nwarnings", "test.nwarnings");

    return retval;
}
Beispiel #15
0
Datei: callLSH.C Projekt: 8l/rose
void
insert_timing(const SqlDatabase::TransactionPtr &tx, std::string property_name, const timeval& before, const timeval& after,
              const rusage& ru_before, const rusage& ru_after)
{
    SqlDatabase::StatementPtr cmd = tx->statement("insert into timing"
                                                  // 0              1                2               3              4
                                                  " (property_name, total_wallclock, total_usertime, total_systime, wallclock,"
                                                  // 5        6
                                                  " usertime, systime)"
                                                  " values (?,?,?,?,?,?,?)");
    cmd->bind(0, property_name);
    cmd->bind(1, 0);
    cmd->bind(2, tvToDouble(ru_after.ru_utime));
    cmd->bind(3, tvToDouble(ru_after.ru_stime));
    cmd->bind(4, (tvToDouble(after) - tvToDouble(before)));
    cmd->bind(5, (tvToDouble(ru_after.ru_utime) - tvToDouble(ru_before.ru_utime)));
    cmd->bind(6, (tvToDouble(ru_after.ru_stime) - tvToDouble(ru_before.ru_stime)));
    cmd->execute();
}
/* Remove the functions from the compilation unit that is only available in one of the traces.
 *   - criteria complement of the functions from the files of the caller functions in the call trace is removed. */
std::pair<CallVec*, CallVec*>
remove_compilation_unit_complement(int func1_id, int func2_id, int igroup_id, int similarity, CallVec* func1_vec,
                                   CallVec* func2_vec)
{
    CallVec* new_func1_vec = new CallVec;
    CallVec* new_func2_vec = new CallVec;

    if (func1_vec->size() > 0 || func2_vec->size() > 0) {
        // Find the set complement of functions called by the two functions
        // - we are not interested in functions called by both
        std::set<int> func1_vec_set;
        std::set<int> func2_vec_set;

        for (CallVec::iterator it = func1_vec->begin(); it != func1_vec->end(); ++it)
            func1_vec_set.insert(*it);
        for (CallVec::iterator it = func2_vec->begin(); it != func2_vec->end(); ++it)
            func2_vec_set.insert(*it);

        std::set<int> func1_func2_complement;
        std::set_difference(func1_vec_set.begin(), func1_vec_set.end(), func2_vec_set.begin(), func2_vec_set.end(),
                            std::inserter(func1_func2_complement, func1_func2_complement.end()));

        // Find the compilation units in question. A compilation unit is in our case a file.
        SqlDatabase::StatementPtr func1_file_stmt = transaction->statement("select file_id from semantic_functions"
                                                                           " where id = ?");
        func1_file_stmt->bind(0, func1_id);
        int func1_file_id = func1_file_stmt->execute_int();

        SqlDatabase::StatementPtr func2_file_stmt = transaction->statement("select file_id from semantic_functions"
                                                                           " where id = ?");
        func2_file_stmt->bind(0, func2_id);
        int func2_file_id = func2_file_stmt->execute_int();

        // Find the functions that needs to be removed
        //  - all functions that has a clone in between the files
        SqlDatabase::StatementPtr stmt = transaction->statement("select sem.func1_id, sem.func2_id from semantic_funcsim as sem"
                                                                " join semantic_functions as sf1 on sem.func1_id = sf1.id"
                                                                " join semantic_functions as sf2 on sem.func2_id = sf2.id"
                                                                " where similarity >= ? and sf1.file_id in (?,?)"
                                                                "   and sf2.file_id in (?, ?) and sf1.file_id != sf2.file_id");
        stmt->bind(0, similarity);
        stmt->bind(1, func1_file_id);
        stmt->bind(2, func2_file_id);
        stmt->bind(3, func1_file_id);
        stmt->bind(4, func2_file_id);

        std::set<int> complement_functions;
        for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
            int clone_func1 = row.get<int>(0);
            int clone_func2 = row.get<int>(1);

            complement_functions.insert(clone_func1);
            complement_functions.insert(clone_func2);
        }

        // Find the functions we want to remove
        //  - functions present with clones in between the files that is not part of both traces
        std::set<int> remove_these;
        std::set_intersection(complement_functions.begin(), complement_functions.end(), func1_func2_complement.begin(),
                              func1_func2_complement.end(), std::inserter(remove_these, remove_these.end()));

        //prune functions to remove away from the call trace into new vectors
        for (CallVec::iterator it = func1_vec->begin(); it != func1_vec->end(); ++it) {
            if (remove_these.find(*it) == remove_these.end())
                new_func1_vec->push_back(*it);
        }

        for (CallVec::iterator it = func2_vec->begin(); it != func2_vec->end(); ++it) {
            if (remove_these.find(*it) == remove_these.end())
                new_func2_vec->push_back(*it);
        }
    }
    return std::pair<CallVec*, CallVec*>(new_func1_vec, new_func2_vec);
}
Beispiel #17
0
    void operator()() {
        // Database connections don't survive over fork() according to SqLite and PostgreSQL documentation, so open it again
        SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(databaseUrl)->transaction();

        // Use zero for the number of tests ran so that this child process doesn't try to update the semantic_history table.
        // If two or more processes try to change the same row (which they will if there's a non-zero number of tests) then
        // they will deadlock with each other.
        static const size_t NO_TESTS_RAN = 0;

        NameSet builtin_function_names;
        add_builtin_functions(builtin_function_names/*out*/);

        InputGroup igroup;
        WorkItem prevWorkItem;
        SgAsmInterpretation *prev_interp = NULL;
        MemoryMap ro_map;
        Disassembler::AddressSet whitelist_exports;         // dynamic functions that should be called
        PointerDetectors pointers;
        InsnCoverage insn_coverage;
        DynamicCallGraph dynamic_cg;
        Tracer tracer;
        ConsumedInputs consumed_inputs;
        FuncAnalyses funcinfo;
        OutputGroups ogroups; // do not load from database (that might take a very long time)
        time_t last_checkpoint = time(NULL);
        for (size_t workIdx=0; workIdx<work.size(); ++workIdx) {
            WorkItem &workItem = work[workIdx];

            // Load the input group from the database if necessary.
            if (workItem.igroup_id!=prevWorkItem.igroup_id) {
                if (!igroup.load(tx, workItem.igroup_id)) {
                    std::cerr <<argv0 <<": input group " <<workItem.igroup_id <<" is empty or does not exist\n";
                    exit(1);
                }
            }

            // Find the function to test
            IdFunctionMap::iterator func_found = functions.find(workItem.func_id);
            assert(func_found!=functions.end());
            SgAsmFunction *func = func_found->second;
            if (opt.verbosity>=LACONIC) {
                if (opt.verbosity>=EFFUSIVE)
                    std::cerr <<argv0 <<": " <<std::string(100, '=') <<"\n";
                std::cerr <<argv0 <<": processing function " <<function_to_str(func, function_ids) <<"\n";
            }
            SgAsmInterpretation *interp = SageInterface::getEnclosingNode<SgAsmInterpretation>(func);
            assert(interp!=NULL);

            // Do per-interpretation stuff
            if (interp!=prev_interp) {
                prev_interp = interp;
                assert(interp->get_map()!=NULL);
                ro_map = *interp->get_map();
                ro_map.require(MemoryMap::READABLE).prohibit(MemoryMap::WRITABLE).keep();
                Disassembler::AddressSet whitelist_imports = get_import_addresses(interp, builtin_function_names);
                whitelist_exports.clear(); // imports are addresses of import table slots; exports are functions
                overmap_dynlink_addresses(interp, *insns, opt.params.follow_calls, &ro_map, GOTPLT_VALUE,
                                          whitelist_imports, whitelist_exports/*out*/);
                if (opt.verbosity>=EFFUSIVE) {
                    std::cerr <<argv0 <<": memory map for SgAsmInterpretation:\n";
                    interp->get_map()->dump(std::cerr, argv0+":   ");
                }
            }

            // Run the test
            assert(insns!=NULL);
            assert(entry2id!=NULL);
            std::cerr <<"process " <<getpid() <<" about to run test " <<workIdx <<"/" <<work.size() <<" " <<workItem <<"\n";
            runOneTest(tx, workItem, pointers, func, function_ids, insn_coverage, dynamic_cg, tracer, consumed_inputs,
                       interp, whitelist_exports, cmd_id, igroup, funcinfo, *insns, &ro_map, *entry2id, ogroups);
            ++ntests_ran;

            // Checkpoint
            if (opt.checkpoint>0 && time(NULL)-last_checkpoint > opt.checkpoint) {
                if (!opt.dry_run)
                    tx = checkpoint(tx, ogroups, tracer, insn_coverage, dynamic_cg, consumed_inputs, NULL, NO_TESTS_RAN,
                                    cmd_id);
                last_checkpoint = time(NULL);
            }

            prevWorkItem = workItem;
        }
        std::cerr <<"process " <<getpid() <<" is done testing; now finishing up...\n";

        if (!tx->is_terminated()) {
            SqlDatabase::StatementPtr stmt = tx->statement("insert into semantic_funcpartials"
                                             " (func_id, ncalls, nretused, ntests, nvoids) values"
                                             " (?,       ?,      ?,        ?,      ?)");
            for (FuncAnalyses::iterator fi=funcinfo.begin(); fi!=funcinfo.end(); ++fi) {
                stmt->bind(0, fi->first);
                stmt->bind(1, fi->second.ncalls);
                stmt->bind(2, fi->second.nretused);
                stmt->bind(3, fi->second.ntests);
                stmt->bind(4, fi->second.nvoids);
                stmt->execute();
            }
        }

        // Cleanup
        if (!tx->is_terminated() && !opt.dry_run) {
            std::cerr <<"process " <<getpid() <<" is doing the final checkpoint\n";
            checkpoint(tx, ogroups, tracer, insn_coverage, dynamic_cg, consumed_inputs, NULL, NO_TESTS_RAN, cmd_id);
        }
        tx.reset();

        std::cerr <<"process " <<getpid() <<" finished\n";
    }
Beispiel #18
0
void
add_calls_to_syscalls_to_db(SqlDatabase::TransactionPtr tx, DirectedGraph* G, std::vector<SgAsmFunction*> all_functions)
{
    // load the functions in db into memory
    std::map<std::string, std::set<int> > symbolToId;
    SqlDatabase::StatementPtr cmd3 = tx->statement("select id, name  from semantic_functions");
    for (SqlDatabase::Statement::iterator r=cmd3->begin(); r!=cmd3->end(); ++r) {
        int func_id           = r.get<int>(0);
        std::string func_name = r.get<std::string>(1);

        if (func_name.size() == 0)
            continue;

        std::map<std::string, std::set<int> >::iterator fit = symbolToId.find(func_name);
        if (fit == symbolToId.end()) {
            std::set<int> function_ids;
            function_ids.insert(func_id);
            symbolToId[func_name] = function_ids;
        } else {
            fit->second.insert(func_id);
        }
    }

    DirectedGraph& graph = *G;
    SqlDatabase::StatementPtr stmt = tx->statement("insert into syscalls_made(caller, syscall_id, syscall_name) values(?,?,?)");

    // Iterate over all components of the reachability graph
    typedef graph_traits<DirectedGraph>::vertex_descriptor Vertex;
    graph_traits<DirectedGraph>::vertex_iterator i, end;
    for (tie(i, end) = vertices(graph); i != end; ++i) {
        if (*i < ids_reserved_for_syscalls)
            continue;

        std::set<int> syscalls;

        // Iterate through the child vertex indices for [current_index]
        std::vector<Vertex> reachable;
        boost::breadth_first_search(graph, *i,
                                    boost::visitor(boost::make_bfs_visitor(boost::write_property(boost::identity_property_map(),
                                                                                                 std::back_inserter(reachable),
                                                                                                 boost::on_discover_vertex()))));
        for (std::vector<Vertex>::iterator it = reachable.begin(); it != reachable.end(); ++it) {
            if (*it < ids_reserved_for_syscalls)
                syscalls.insert(*it);
        }

        int caller_id = *i - ids_reserved_for_syscalls;
        ROSE_ASSERT(caller_id >= 0);
        SgAsmFunction* caller = all_functions[caller_id];
        ROSE_ASSERT(isSgAsmFunction(caller) != NULL);

        std::string func_name = caller->get_name();
        if (func_name.length() == 0)
            continue;

        std::map<std::string, std::set<int> >::iterator equivalent_ids = symbolToId.find(func_name);
        if (equivalent_ids == symbolToId.end())
            equivalent_ids = symbolToId.find(func_name+"@plt");

        if (syscalls.size() > 0 && equivalent_ids != symbolToId.end()) {
            for (std::set<int>::iterator sit = syscalls.begin(); sit != syscalls.end(); ++sit) {
                int syscall_callee_id = *sit;
                extern std::map<int, std::string> linux32_syscalls; // defined in linux_syscalls.C
                const std::string &syscall_name = linux32_syscalls[syscall_callee_id];
                for (std::set<int>::iterator equivalent_id = equivalent_ids->second.begin();
                     equivalent_id != equivalent_ids->second.end(); ++ equivalent_id) {
                    stmt->bind(0, *equivalent_id);
                    stmt->bind(1, syscall_callee_id);
                    stmt->bind(2, syscall_name);
                    stmt->execute();
                }
            }
        }
    }
}
Beispiel #19
0
// Bind arguments to a statement
static void
sqlBindArgs(const SqlDatabase::StatementPtr &stmt, const std::vector<std::string> &args) {
    for (size_t i=0; i<args.size(); ++i)
        stmt->bind(i, args[i]);
}
Beispiel #20
0
static void
list_combined(const SqlDatabase::TransactionPtr &tx, int func_id, bool show_assembly)
{
    CloneDetection::FilesTable files(tx);

    Events events;
    gather_events(tx, func_id);
    load_events(tx, func_id, events/*out*/);
    gather_instructions(tx, func_id, events);

    Listing listing;
    gather_source_code(tx);
    load_source_code(tx, listing/*out*/);

    // Get lines of assembly code and insert them into the correct place in the Listing.
    if (show_assembly) {
        SqlDatabase::StatementPtr stmt = tx->statement("select"
                                                       // 0                1              2              3
                                                       " insn.src_file_id, insn.src_line, insn.position, insn.address,"
                                                       // 4             5        6
                                                       " insn.assembly, func.id, func.name"
                                                       " from tmp_insns as insn"
                                                       " join semantic_functions as func on insn.func_id = func.id"
                                                       " order by position");
        for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
            int src_file_id = row.get<int>(0);
            int src_line_num = row.get<int>(1);
            SourcePosition srcpos(src_file_id, src_line_num);
            int pos = row.get<int>(2);
            rose_addr_t addr = row.get<rose_addr_t>(3);
            std::string assembly = row.get<std::string>(4);
            int func_id = row.get<int>(5);
            std::string func_name = row.get<std::string>(6);
            listing[srcpos].assembly_code.insert(std::make_pair(addr, AssemblyCode(pos, addr, assembly, func_id, func_name)));
        }

        // Listing header
        std::cout <<"WARNING: This listing should be read cautiously. It is ordered according to the\n"
                  <<"         source code with assembly lines following the source code line from which\n"
                  <<"         they came.  However, the compiler does not always generate machine\n"
                  <<"         instructions in the same order as source code.  When a discontinuity\n"
                  <<"         occurs in the assembly instruction listing, it will be marked by a \"#\"\n"
                  <<"         character.  The assembly instructions are also numbered according to\n"
                  <<"         their relative positions in the binary function.\n"
                  <<"\n"
                  <<"         The prefix area contains either source location information or test trace\n"
                  <<"         information.  Note that trace information might be incomplete because\n"
                  <<"         tracing was disabled or only partially enabled, or the trace includes\n"
                  <<"         instructions that are not present in this function listing (e.g., when\n"
                  <<"         execution follows a CALL instruction). The following notes are possible:\n"
                  <<"           * \"Nx\" where N is an integer indicates that this instruction\n"
                  <<"             was reached N times during testing.  These notes are typically\n"
                  <<"             only attached to the first instruction of a basic block and only\n"
                  <<"             if the trace contains EV_REACHED events.  Lack of an Nx notation\n"
                  <<"             doesn't necessarily mean that the basic block was not reached, it\n"
                  <<"             only means that there is no EV_REACHED event for that block.\n"
                  <<"           * \"N<\" where N is an integer indicates that the instruction\n"
                  <<"             on the previous line consumed N inputs. Information about the\n"
                  <<"             inputs is listed on the right side of this line.\n"
                  <<"           * \"N>\" where N is an integer indicates that the instruction\n"
                  <<"             on the previous line produced N memory outputs. Information about the\n"
                  <<"             outputs is listed on the right side of this line. Only the final\n"
                  <<"             write to a memory address is considered a true output, and such\n"
                  <<"             writes will be marked with the string \"final\".\n"
                  <<"           * \"BR\" indicates that the instruction on the previous line is a\n"
                  <<"             control flow branch point. The right side of the line shows more\n"
                  <<"             detailed information about how many times the branch was taken.\n"
                  <<"           * \"FAULT\" indicates that the test was terminated at the previous\n"
                  <<"             instruction. The right side of the line shows the distribution of\n"
                  <<"             faults that occurred here.\n"
                  <<"\n"
                  <<"                /------------- Prefix area\n"
                  <<" /-------------/-------------- Source file ID or assembly function ID\n"
                  <<" |     /------/--------------- Source line number or assembly instruction index\n"
                  <<" |     |   /-/---------------- Instruction out-of-order indicator\n"
                  <<" |     |   |/     /----------- Instruction virtual address\n"
                  <<" |     |   |      |\n"
                  <<"vvvv vvvvv/|      |\n"
                  <<"vvvvvvvvvv v vvvvvvvvvv\n";
    }

    // Show the listing
    int prev_func_id = -1, prev_position = -1;
    std::set<int> seen_files;
    for (Listing::iterator li=listing.begin(); li!=listing.end(); ++li) {
        int file_id = li->first.file_id;
        if (seen_files.insert(file_id).second) {
            if (file_id>=0) {
                std::cout <<"\n" <<std::setw(4) <<std::right <<file_id <<".file  |"
                          <<(opt.colorize?"\033[33;4m":"") <<files.name(file_id) <<(opt.colorize?"\033[m":"") <<"\n";
            } else {
                std::cout <<"\n" <<std::string(11, ' ') <<"|"
                          <<(opt.colorize?"\033[33;4m":"") <<"instructions not associated with a source file"
                          <<(opt.colorize?"\033[m":"") <<"\n";
            }
        }
        if (file_id>=0) {
            std::cout <<std::setw(4) <<std::right <<file_id <<"." <<std::setw(6) <<std::left <<li->first.line_num
                      <<"|"
                      <<(opt.colorize?"\033[34m":"")
                      <<StringUtility::untab(li->second.source_code)
                      <<(opt.colorize?"\033[m":"") <<"\n";
        }

        for (Instructions::iterator ii=li->second.assembly_code.begin(); ii!=li->second.assembly_code.end(); ++ii) {
            const AssemblyCode assm = ii->second;
            if (assm.func_id!=prev_func_id) {
                std::cout <<std::string(11, ' ') <<"# "
                          <<(opt.colorize?"\033[33;4m":"") <<"function " <<StringUtility::numberToString(assm.func_id);
                if (!assm.func_name.empty())
                    std::cout <<" <" <<assm.func_name <<">";
                std::cout <<(opt.colorize?"\033[m":"") <<"\n";
            }

            Events::const_iterator ei=events.find(assm.addr);
            std::cout <<std::setw(4) <<std::right <<assm.func_id <<"." <<std::setw(6) <<std::left <<assm.pos
                      <<(prev_func_id==assm.func_id && prev_position+1==assm.pos ? "|" : "#");

            if (ei!=events.end() && ei->second.nexecuted>0) {
                std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x ";
            } else {
                std::cout <<std::string(11, ' ');
            }

            std::cout <<StringUtility::addrToString(assm.addr) <<":  "
                      <<(opt.colorize?"\033[32m":"") <<assm.assembly <<(opt.colorize?"\033[m":"") <<"\n";

            if (ei!=events.end())
                show_events(ei->second);

            prev_func_id = assm.func_id;
            prev_position = assm.pos;
        }
    }
}
Beispiel #21
0
int main(int argc, char* argv[])
{
  std::string database;
  //Timing
  struct timeval before;
  struct rusage ru_before;
  gettimeofday(&before, NULL);
  getrusage(RUSAGE_SELF, &ru_before);


  try {
	options_description desc("Allowed options");
	desc.add_options()
	  ("help", "produce a help message")
	  ("database,q", value< string >()->composing(), 
	   "the sqlite database that we are to use")
	  ;

	variables_map vm;
	store(command_line_parser(argc, argv).options(desc)
		.run(), vm);


	if (vm.count("help")) {
	  cout << desc;            
	  exit(0);
	}

	if (vm.count("database")!=1  ) {
	  std::cerr << "Missing options. Call as: findClones --database <database-name>" 
		<< std::endl;
	  exit(1);

	}

	database = vm["database"].as<string >();
	cout << "database: " << database << std::endl;


  }
  catch(exception& e) {
	cout << e.what() << "\n";
  }



  SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(database)->transaction();


  try {
	  tx->statement("create table IF NOT EXISTS largest_clones(row_number INTEGER PRIMARY KEY, function_id_A INTEGER, begin_index_within_function_A INTEGER, end_index_within_function_A INTEGER,"
            "function_id_B INTEGER , begin_index_within_function_B INTEGER, end_index_within_function_B INTEGER )")->execute();
  }
  catch(exception &ex) {
	cerr << "Exception Occurred: " << ex.what() << endl;
  }

   try {
	  tx->statement("delete from largest_clones")->execute();
  }
  catch(exception &ex) {
	cerr << "Exception Occurred: " << ex.what() << endl;
  }
 
  string filen = database;

  std::vector<std::pair <Element, Element> > listOfClonePairs;
  std::list<std::pair<int,int> > listOfFunctionClonePairs;

  int windowSize = 0;
  int stride = 0;
  try {
	windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int();
  } catch (exception& ex) {cerr << "Exception Occurred: " << ex.what() << endl;}
  try {
	stride = tx->statement("select stride from run_parameters limit 1")->execute_int();
  } catch (exception& ex) {cerr << "Exception Occurred: " << ex.what() << endl;}
  assert (windowSize != 0);
  assert (stride != 0);

  //Create set of clone pairs
  try{
    std::string selectSeparateDatasets ="SELECT cluster, function_id, index_within_function, vectors_row from  clusters ORDER BY cluster, function_id, index_within_function";

    SqlDatabase::StatementPtr cmd = tx->statement(selectSeparateDatasets);

    int64_t thisClusterName=-1;
    
    std::vector<Element> thisCluster;
   
    for (SqlDatabase::Statement::iterator r=cmd->begin(); r!=cmd->end(); ++r) {

      Element cur_elem;
      int64_t cluster       = r.get<int64_t>(0);
      cur_elem.function_id  = r.get<int64_t>(1);
      cur_elem.index_within_function = r.get<int64_t>(2);
      cur_elem.last_index_within_function = cur_elem.index_within_function;

      cur_elem.vectors_row      = r.get<int64_t>(3);
//      cur_elem.line        = boost::lexical_cast<int> ( datasets.getstring(5) );
//      cur_elem.offset      = boost::lexical_cast<int> ( datasets.getstring(6) );


      if( cluster == thisClusterName )
      {
      
        for( std::vector<Element>::iterator iItr = thisCluster.begin();
            iItr != thisCluster.end(); ++iItr )
        {


          if( cur_elem.function_id < iItr->function_id)
          listOfFunctionClonePairs.push_back(std::pair<int,int>(cur_elem.function_id, iItr->function_id));
          else
           listOfFunctionClonePairs.push_back(std::pair<int,int>( iItr->function_id, cur_elem.function_id  ));

          if( cur_elem < *iItr )
          {

             listOfClonePairs.push_back(std::pair<Element,Element>( cur_elem,*iItr ) );
          }
          else            
          {
 
             listOfClonePairs.push_back(std::pair<Element,Element>(*iItr,cur_elem ) );
          }
        };
        

      }else{
        thisCluster.clear();
        thisClusterName = cluster;

      }
      
      thisCluster.push_back( cur_elem );

      
    }
  }catch(exception &ex) {
	cerr << "Exception Occured: " << ex.what() << endl;
  }

  listOfFunctionClonePairs.sort();

  listOfFunctionClonePairs.unique();



  for(std::list<std::pair<int,int> >::iterator iItr = listOfFunctionClonePairs.begin();
      iItr != listOfFunctionClonePairs.end(); iItr++ )
  {
    if(iItr->first != iItr->second)
        std::cout << iItr->second << " " << iItr->first << std::endl;
  }

  
  return 0;
};
Beispiel #22
0
Datei: callLSH.C Projekt: 8l/rose
static void
callLSH(const SqlDatabase::TransactionPtr &tx, const std::string databaseName, double similarity_threshold, const string& Exec,
        int norm, size_t hash_function_size, size_t hash_table_count)
{
    double distance = sqrt((1. - similarity_threshold) * 50.);
    double false_negative_rate = ( similarity_threshold != 1.0) ? 0.0100 : 0;
    vector<CloneRange> ranges = computeranges(distance, 50, 100000);
    int maxNumElementsInGroup = -1;
    int maxNumElementIdx = -1;

    // FIXME: We can't pass parameters to the exec'd process this way because the parent's SQL statements are
    // being executed in a transaction -- they won't be visible in the child. [Robb P. Matzke 2013-08-12]
    tx->execute("delete from detection_parameters");
    tx->statement("insert into detection_parameters (similarity_threshold, false_negative_rate) values (?, ?)")
        ->bind(0, similarity_threshold)
        ->bind(1, false_negative_rate)
        ->execute();
  
    map<size_t, int> groupSizes;
    std::cout << "Looking for the biggest group" << std::endl;
    for (size_t i = 0; i < ranges.size(); ++i) {
        std::string sql = std::string("select count(*) from vectors where sum_of_counts >= ?") +
                          (ranges[i].high != -1 ? " and sum_of_counts <= ?" : "");
        SqlDatabase::StatementPtr cmd = tx->statement(sql);
        cmd->bind(0, ranges[i].low);
        if (ranges[i].high != -1)
            cmd->bind(1, ranges[i].high);
        int numElementsInGroup = cmd->execute_int();
        groupSizes[i] = numElementsInGroup;
        std::cerr << "The current group from " << ranges[i].low << " to " << ranges[i].high
                  << " is of size " << numElementsInGroup << std::endl;
        if (numElementsInGroup > maxNumElementsInGroup) {
            maxNumElementsInGroup = numElementsInGroup;
            maxNumElementIdx = i;
        }
    }

    std::cout << "Biggest group found " << ranges[maxNumElementIdx].low << " " << ranges[maxNumElementIdx].high << std::endl;
    char tempDirName[] = "/tmp/paramdirXXXXXX";
    char* mkdtempResult = mkdtemp(tempDirName);
    if (!mkdtempResult) {
	perror("mkdtemp: ");
	exit (1);
    }
    string paramFileName = string(tempDirName) + "/params";
    paramFileName = "/tmp/lshparamdirE40hF1/params";
    std::cout << "Number of groups :" << ranges.size() << std::endl;

    for (int i = 0; i < (int)ranges.size(); ++i) {
        size_t group = (i == 0) ? maxNumElementIdx : (i <= maxNumElementIdx) ? i - 1 : i;
        if (groupSizes[group] > 1) {
            std::cout << "Executing LSH code low " << ranges[group].low  
                      << " high " << ranges[group].high << " group  " << group << " size " << groupSizes[group] << std::endl;
            if(norm == 3) {
                executeLSHCode(tx, databaseName, Exec, paramFileName, ranges[group]);
            } else {
                executeLSHCodeLLNL(tx, databaseName, Exec, paramFileName, ranges[group], norm, similarity_threshold,
                                   false_negative_rate, groupSizes[group]);
            }
        }
    }
    unlink(paramFileName.c_str());
    rmdir(tempDirName);
}
Beispiel #23
0
// Load all events into memory.  Events are emitted for a particular function ID being analyzed, but if the 25-run-test
// --follow-calls was specified, then events for that function ID might be at instructions that are outside that function.
// We need to make note of those functions so that we can load all their instructions.
static void
load_events(const SqlDatabase::TransactionPtr &tx, int func_id, Events &events/*in,out*/)
{
    int specimen_id = tx->statement("select specimen_id from semantic_functions where id = ?")
                      ->bind(0, func_id)->execute_int();
    SqlDatabase::StatementPtr stmt = tx->statement("select"
                                                   // 0          1               2            3          4
                                                   " event.addr, event.event_id, event.minor, event.val, func.id,"
                                                   // 5               6
                                                   " event.igroup_id, event.pos"
                                                   " from tmp_events as event"
                                                   " join semantic_instructions as insn on event.addr = insn.address"
                                                   " join semantic_functions as func on insn.func_id = func.id"
                                                   " where func.specimen_id = ?"
                                                   " order by igroup_id, pos");
    stmt->bind(0, specimen_id);
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        rose_addr_t addr = row.get<rose_addr_t>(0);
        int event_id = row.get<int>(1);
        int minor = row.get<int>(2);
        int64_t val = row.get<int64_t>(3);
        events[addr].func_id = row.get<int>(4); // the hard-to-get ID, not the one stored in the events func_id column.
        int igroup_id = row.get<int>(5);
        int pos = row.get<int>(6);
        switch (event_id) {
            case CloneDetection::EV_REACHED: {
                ++events[addr].nexecuted;
                break;
            }
            case CloneDetection::EV_BRANCHED: {
                ++events[addr].nbranches;
                ++events[addr].branches[val];
                break;
            }
            case CloneDetection::EV_RETURNED: {
                ++events[addr].nreturns;
                break;
            }
            case CloneDetection::EV_CONSUME_INPUT: {
                ++events[addr].ninputs;
                assert(minor>=0);
                if ((size_t)minor>=events[addr].inputs.size())
                    events[addr].inputs.resize(minor+1);
                ++events[addr].inputs[minor][val];
                break;
            }
            case CloneDetection::EV_FAULT: {
                CloneDetection::AnalysisFault::Fault fault = (CloneDetection::AnalysisFault::Fault)minor;
                ++events[addr].nfaults;
                ++events[addr].faults[fault];
                break;
            }
            case CloneDetection::EV_MEM_WRITE: {
                OutputEventKey output_key(igroup_id, val);
                OutputEventValue output_val(pos, minor);
                // Track final writes to each address
                final_output_events[output_key] = output_val;
                // Append event to the appropriate instruction
                events[addr].outputs.push_back(std::make_pair(output_key, output_val));
            }
            default:
                /*void*/
                break;
        }
    }
}
int
main(int argc, char *argv[])
{
    std::ios::sync_with_stdio();
    argv0 = argv[0];
    {
        size_t slash = argv0.rfind('/');
        argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1);
        if (0==argv0.substr(0, 3).compare("lt-"))
            argv0 = argv0.substr(3);
    }

    Switches opt;
    int argno = 1;
    for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) {
        if (!strcmp(argv[argno], "--")) {
            ++argno;
            break;
        } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) {
            ::usage(0);
        } else if (!strncmp(argv[argno], "--entry=", 8)) {
            opt.entry_vas.insert(strtoull(argv[argno]+8, NULL, 0));
        } else if (!strcmp(argv[argno], "--file=list") || !strcmp(argv[argno], "--files=list")) {
            opt.list_files = true;
        } else if (!strncmp(argv[argno], "--file=", 7) || !strncmp(argv[argno], "--files=", 8)) {
            std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true);
            for (size_t i=0; i<ids.size(); ++i) {
                const char *s = ids[i].c_str();
                char *rest;
                errno = 0;
                int id = strtoul(s, &rest, 0);
                if (errno || rest==s || *rest) {
                    std::cerr <<argv0 <<": invalid file ID: " <<ids[i] <<"\n";
                    exit(1);
                }
                opt.files.insert(id);
            }
        } else if (!strncmp(argv[argno], "--function=", 11) || !strncmp(argv[argno], "--functions=", 12)) {
            std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true);
            if (ids.size()==1 && isalpha(ids[0][0]) && ids[0].find_first_of('.')!=std::string::npos) {
                std::vector<std::string> words = StringUtility::split(".", ids[0]);
                if (words.size()!=2 ||
                    !SqlDatabase::is_valid_table_name(words[0]) || !SqlDatabase::is_valid_table_name(words[1])) {
                    std::cerr <<argv0 <<": --function switch needs either IDs or a database TABLE.COLUMN\n";
                    exit(1);
                }
                opt.function_table = words[0];
                opt.function_column = words[1];
            } else {
                for (size_t i=0; i<ids.size(); ++i) {
                    const char *s = ids[i].c_str();
                    char *rest;
                    errno = 0;
                    int id = strtoul(s, &rest, 0);
                    if (errno || rest==s || *rest) {
                        std::cerr <<argv0 <<": invalid function ID: " <<ids[i] <<"\n";
                        exit(1);
                    }
                    opt.functions.insert(id);
                }
            }
        } else if (!strncmp(argv[argno], "--first-fuzz=", 13)) {
            opt.first_fuzz = strtoul(argv[argno]+13, NULL, 0);
        } else if (!strncmp(argv[argno], "--name=", 7)) {
            opt.names.insert(argv[argno]+7);
        } else if (!strncmp(argv[argno], "--nfuzz=", 8)) {
            opt.nfuzz = strtoul(argv[argno]+8, NULL, 0);
            opt.nfuzz_set = true;
        } else if (!strncmp(argv[argno], "--size=", 7)) {
            opt.ninsns = strtoul(argv[argno]+7, NULL, 0);
        } else if (!strcmp(argv[argno], "--specimen=list") || !strcmp(argv[argno], "--specimens=list")) {
            opt.list_specimens = true;
        } else if (!strncmp(argv[argno], "--specimen=", 11) || !strncmp(argv[argno], "--specimens=", 12)) {
            std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true);
            for (size_t i=0; i<ids.size(); ++i) {
                const char *s = ids[i].c_str();
                char *rest;
                errno = 0;
                int id = strtoul(s, &rest, 0);
                if (errno || rest==s || *rest) {
                    std::cerr <<argv0 <<": invalid specimen ID: " <<ids[i] <<"\n";
                    exit(1);
                }
                opt.specimens.insert(id);
            }
        } else {
            std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n"
                      <<"see \"" <<argv0 <<" --help\" for usage info.\n";
            exit(1);
        }
    }
    if (argno+1!=argc)
        ::usage(1);
    SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(argv[argno++])->transaction();

    // List the ID numbers and names for all specimen files
    if (opt.list_specimens) {
        SqlDatabase::Table<int, std::string> specimens;
        specimens.insert(tx->statement("select file.id, file.name"
                                       " from (select distinct specimen_id as id from semantic_functions) as specimen"
                                       " join semantic_files as file on specimen.id = file.id"
                                       " order by file.name"));
        specimens.headers("File ID", "Specimen Name");
        specimens.print(std::cout);
        return 0;
    }

    // List the ID numbers and names for all files containing functions
    if (opt.list_files) {
        SqlDatabase::Table<int, std::string> files;
        files.insert(tx->statement("select file.id, file.name"
                                   " from (select distinct file_id as id from semantic_functions) as used"
                                   " join semantic_files as file on used.id = file.id"
                                   " order by file.name"));
        files.headers("File ID", "Binary File Name");
        files.print(std::cout);
        return 0;
    }

    // Sanity checks
    if (!opt.functions.empty() && !opt.function_table.empty()) {
        std::cerr <<argv0 <<": --function=ID and --function=TABLE are mutually exclusive\n";
        exit(1);
    }
    if (0==tx->statement("select count(*) from semantic_functions")->execute_int()) {
        std::cerr <<argv0 <<": database has no functions; nothing to test\n";
        return 0;
    }
    if (0==tx->statement("select count(*) from semantic_inputvalues")->execute_int()) {
        std::cerr <<argv0 <<": database has no input groups; nothing to test\n";
        return 0;
    }

    // Create table tmp_functions containing IDs for selected functions and their specimen IDs
    std::vector<std::string> constraints;
    if (!opt.entry_vas.empty())
        constraints.push_back("func.entry_va " + SqlDatabase::in(opt.entry_vas));
    if (!opt.names.empty())
        constraints.push_back("func.name " + SqlDatabase::in_strings(opt.names, tx->driver()));
    if (!opt.specimens.empty())
        constraints.push_back("func.specimen_id " + SqlDatabase::in(opt.specimens));
    if (!opt.files.empty())
        constraints.push_back("func.file_id " + SqlDatabase::in(opt.files));
    if (!opt.functions.empty())
        constraints.push_back("func.id " + SqlDatabase::in(opt.functions));
    if (opt.ninsns>0)
        constraints.push_back("func.ninsns >= " + StringUtility::numberToString(opt.ninsns));
    std::string sql1 = "select func.id, func.specimen_id from semantic_functions as func";
    if (!opt.function_table.empty())
        sql1 += " join "+opt.function_table+" as flist on func.id = flist."+opt.function_column;
    if (!constraints.empty())
        sql1 += " where " + StringUtility::join(" and ", constraints);
    tx->execute("create temporary table tmp_functions as " + sql1);

    // Create table tmp_inputgroups containing IDs for selected input groups
    std::string sql2 = "select distinct igroup_id from semantic_inputvalues where igroup_id >= " +
                       StringUtility::numberToString(opt.first_fuzz);
    if (opt.nfuzz_set)
        sql2 += " and igroup_id < " + StringUtility::numberToString(opt.first_fuzz+opt.nfuzz);
    tx->execute("create temporary table tmp_inputgroups as " + sql2);

    // Create tmp_pending as the cross product of functions and inputgroups except for those already tested
    tx->execute("create temporary table tmp_pending as"
                "    select func.specimen_id as specimen_id, func.id as func_id, igroup.igroup_id as igroup_id"
                "      from tmp_functions as func"
                "      join tmp_inputgroups as igroup"
                "      on igroup.igroup_id is not null" // "on" clause and "is not null" (rather than "true") for portability
                "  except"
                "    select func.specimen_id, func.id, fio.igroup_id"
                "      from semantic_fio as fio"
                "      join semantic_functions as func on fio.func_id=func.id");
    SqlDatabase::StatementPtr stmt = tx->statement("select distinct specimen_id, func_id, igroup_id"
                                                   " from tmp_pending"
                                                   " order by specimen_id, igroup_id, func_id");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row)
        std::cout <<row.get<int>(0) <<"\t" <<row.get<int>(1) <<"\t" <<row.get<int>(2) <<"\n";

    // no need to commit, but if we change this in the future, be sure to add begin_command()/finish_command()
    return 0;
}
void
addVectorToDatabase(const SqlDatabase::TransactionPtr &tx, const SignatureVector& vec, const std::string& functionName,
                    size_t functionId, size_t indexWithinFunction, const std::string& normalizedUnparsedInstructions,
                    SgAsmx86Instruction* firstInsn[], const std::string& filename, size_t windowSize, size_t stride)
{
    ++numVectorsGenerated;

    vector<uint8_t> compressedCounts = compressVector(vec.getBase(), SignatureVector::Size);
    size_t vectorSum = 0;
    for (size_t i=0; i<SignatureVector::Size; ++i)
        vectorSum += vec[i];

    ExtentMap extent;
    for (size_t i=0; i<windowSize; ++i)
        extent.insert(Extent(firstInsn[i]->get_address(), firstInsn[i]->get_size()));

    unsigned char md[16];
    MD5((const unsigned char*)normalizedUnparsedInstructions.data(), normalizedUnparsedInstructions.size(), md);

    SqlDatabase::StatementPtr cmd = tx->statement("insert into vectors"
                                                  // 0   1            2                      3     4             5
                                                  " (id, function_id, index_within_function, line, last_insn_va, size,"
                                                  // 6            7           8
                                                  "sum_of_counts, counts_b64, instr_seq_b64)"
                                                  " values (?,?,?,?,?,?,?,?,?)");
    int vector_id = tx->statement("select coalesce(max(id),0)+1 from vectors")->execute_int(); // 1-origin
    cmd->bind(0, vector_id);
    cmd->bind(1, functionId);
    cmd->bind(2, indexWithinFunction);
    cmd->bind(3, firstInsn[0]->get_address());
    cmd->bind(4, firstInsn[windowSize-1]->get_address());
    cmd->bind(5, extent.size());
    cmd->bind(6, vectorSum);
    cmd->bind(7, StringUtility::encode_base64(&compressedCounts[0], compressedCounts.size()));
    cmd->bind(8, StringUtility::encode_base64(md, 16));
    cmd->execute();
}
Beispiel #26
0
void
insert_timing(const SqlDatabase::TransactionPtr &tx, std::string property_name, const int groupLow, const int groupHigh,
              const int num_elements, const int k, const int l, const timeval& before, const timeval& after,
              const rusage& ru_before, const rusage& ru_after)
{
    SqlDatabase::StatementPtr cmd = tx->statement("insert into group_timing"
                                                  // 0         1          2             3  4  5
                                                  " (groupLow, groupHigh, num_elements, K, L, total_wallclock,"
                                                  // 6              7              8          9         10
                                                  " total_usertime, total_systime, wallclock, usertime, systime)"
                                                  " values (?,?,?,?,?,?,?,?,?,?,?)");
    cmd->bind(0, groupLow);
    cmd->bind(1, groupHigh);
    cmd->bind(2, num_elements);
    cmd->bind(3, k);
    cmd->bind(4, l);
    cmd->bind(5, 0);
    cmd->bind(6, tvToDouble(ru_after.ru_utime));
    cmd->bind(7, tvToDouble(ru_after.ru_stime));
    cmd->bind(8, (tvToDouble(after) - tvToDouble(before)));
    cmd->bind(9, (tvToDouble(ru_after.ru_utime) - tvToDouble(ru_before.ru_utime)));
    cmd->bind(10, (tvToDouble(ru_after.ru_stime) - tvToDouble(ru_before.ru_stime)));
    cmd->execute();
}