void
compute_percent_similarity_statistics(double bucket_size, double increment,  SqlDatabase::TransactionPtr transaction)
{
    int num_pairs = transaction->statement("select count(*) from semantic_funcsim")->execute_int();
    transaction->execute("drop table IF EXISTS fr_percent_similar");
    transaction->execute("create table fr_percent_similar(similarity_low double precision, similarity_middle double precision,"
                         " similarity_high double precision, percent double precision, num_matches integer);");

    SqlDatabase::StatementPtr pecent_similar_stmt = transaction->statement("insert into fr_percent_similar"
            // 0              1                  2
            "(similarity_low, similarity_middle, similarity_high,"
            // 3       4
            " percent, num_matches) "
            " values (?, ?, ?, ?, ?)");
    for (double cur_bucket = 0.0; cur_bucket <= 1.0+bucket_size; cur_bucket+=increment) {
        int num_matches = transaction->statement("select count(*) from semantic_funcsim where "
                          " similarity >= " +
                          boost::lexical_cast<std::string>(cur_bucket - bucket_size) +
                          " and similarity < " +
                          boost::lexical_cast<std::string>(cur_bucket + bucket_size))->execute_int();
        pecent_similar_stmt->bind(0, cur_bucket - bucket_size < 0 ? 0 : cur_bucket - bucket_size);
        pecent_similar_stmt->bind(1, cur_bucket);
        pecent_similar_stmt->bind(2, cur_bucket + bucket_size >= 1.0 ? 1.0 : cur_bucket + bucket_size);
        pecent_similar_stmt->bind(3, num_pairs > 0 ? ((double) num_matches*100.0)/num_pairs : 0);
        pecent_similar_stmt->bind(4, num_matches);
        pecent_similar_stmt->execute();
    }
}
Exemple #2
0
int
main(int argc, char *argv[])
{
    std::ios::sync_with_stdio();
    argv0 = argv[0];
    {
        size_t slash = argv0.rfind('/');
        argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1);
        if (0==argv0.substr(0, 3).compare("lt-"))
            argv0 = argv0.substr(3);
    }

    int argno = 1;
    bool link = false;
    std::vector<std::string> signature_components;

    for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) {
        std::cout << argv[argno] << std::endl;
        if (!strcmp(argv[argno], "--")) {
            ++argno;
            break;
        } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) {
            ::usage(0);
        } else if (!strcmp(argv[argno], "--link")) {
            link = true;
        } else if (!strcmp(argv[argno], "--no-link")) {
            link = false;
        } else {
            std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n"
                      <<"see \"" <<argv0 <<" --help\" for usage info.\n";
            exit(1);
        }
    }
    if (argno+2!=argc)
        ::usage(1);

    std::string db_name(argv[argno++]);
    std::cout << "Connecting to db:" << db_name << std::endl;
    SqlDatabase::ConnectionPtr conn = SqlDatabase::Connection::create(db_name);
    transaction = conn->transaction();

    transaction->execute("drop table if exists syscalls_made;");
    transaction->execute("create table syscalls_made (caller integer references semantic_functions(id),"
                         " syscall_id integer, syscall_name text)");

    std::cout << "database name is : " << std::string(argv[argno]) << std::endl;
    std::string specimen_name = argv[argno++];

    // Parse the binary specimen
    SgAsmInterpretation *interp = open_specimen(specimen_name, argv0, link);
    assert(interp!=NULL);

    // Figure out what functions need to be added to the database.
    std::vector<SgAsmFunction*> all_functions = SageInterface::querySubTree<SgAsmFunction>(interp);
    DirectedGraph* G = create_reachability_graph(all_functions, interp);
    add_calls_to_syscalls_to_db(transaction, G, all_functions);
    analyze_data(transaction);
    transaction->commit();
    return 0;
}
Exemple #3
0
Fichier : lsh.C Projet : 8l/rose
void
get_run_parameters(const SqlDatabase::TransactionPtr &tx, int& windowSize, int& stride)
{
    windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int();
    stride = tx->statement("select stride from run_parameters limit 1")->execute_int();
    assert (windowSize != 0);
    assert (stride != 0);
}
    void operator()() {
        if (work.empty())
            return;
        int specimen_id = work.front().specimen_id;

        // Database connections don't survive over fork() according to SqLite and PostgreSQL documentation, so open it again
        SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(databaseUrl)->transaction();

        OutputGroups ogroups; // do not load from database (that might take a very long time)

        if (opt.verbosity>=LACONIC) {
            if (opt.verbosity>=EFFUSIVE)
                std::cerr <<argv0 <<": " <<std::string(100, '#') <<"\n";
            std::cerr <<argv0 <<": processing binary specimen \"" <<files.name(specimen_id) <<"\"\n";
        }

        // Parse the specimen
        SgProject *project = files.load_ast(tx, specimen_id);
        if (!project)
            project = open_specimen(tx, files, specimen_id, argv0);
        if (!project) {
            std::cerr <<argv0 <<": problems loading specimen\n";
            exit(1);
        }

        // Get list of specimen functions and initialize the instruction cache
        std::vector<SgAsmFunction*> all_functions = SageInterface::querySubTree<SgAsmFunction>(project);
        IdFunctionMap functions = existing_functions(tx, files, all_functions);
        FunctionIdMap function_ids;
        AddressIdMap entry2id;                              // maps function entry address to function ID
        for (IdFunctionMap::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
            function_ids[fi->second] = fi->first;
            entry2id[fi->second->get_entry_va()] = fi->first;
        }
        InstructionProvidor insns = InstructionProvidor(all_functions);

        // Split the work list into chunks, each containing testsPerChunk except the last, which may contain fewer.
        static const size_t testsPerChunk = 25;
        size_t nChunks = (work.size() + testsPerChunk - 1) / testsPerChunk;
        std::vector<SomeTests> jobs;
        for (size_t i=0; i<nChunks; ++i) {
            size_t beginWorkIdx = i * testsPerChunk;
            size_t endWorkIdx = std::min((i+1)*testsPerChunk, work.size());
            Work partWork(work.begin()+beginWorkIdx, work.begin()+endWorkIdx);
            jobs.push_back(SomeTests(partWork, databaseUrl, functions, function_ids, &insns, cmd_id, &entry2id));
        }

        // Run the parts in parallel using the maximum parallelism specified on the command-line.  We must commit our
        // transaction before forking, otherwise the children won't see the rows we've added to various tables.
        tx->commit();
        tx.reset();
        size_t nfailed = runInParallel(jobs, opt.nprocs);
        if (nfailed!=0) {
            std::cerr <<"SpecimenProcessor: " <<StringUtility::plural(nfailed, "jobs") <<" failed\n";
            exit(1);
        }
    }
void
createVectorsRespectingFunctionBoundaries(SgNode* top, const std::string& filename, size_t windowSize, size_t stride,
                                          const SqlDatabase::TransactionPtr &tx)
{
    struct InstructionSelector: SgAsmFunction::NodeSelector {
        virtual bool operator()(SgNode *node) {
            return isSgAsmInstruction(node)!=NULL;
        }
    } iselector;

    struct DataSelector: SgAsmFunction::NodeSelector {
        virtual bool operator()(SgNode *node) {
            return isSgAsmStaticData(node)!=NULL;
        }
    } dselector;

    SqlDatabase::StatementPtr cmd1 = tx->statement("insert into functions"
                                                   // 0   1     2              3         4      5      6
                                                   " (id, file, function_name, entry_va, isize, dsize, size)"
                                                   " values(?,?,?,?,?,?,?)");
    
    SqlDatabase::StatementPtr cmd2 = tx->statement("insert into instructions"
                                                   // 0        1     2            3                      4
                                                   " (address, size, function_id, index_within_function, assembly)"
                                                   " values (?,?,?,?,?)");

    vector<SgAsmFunction*> funcs = SageInterface::querySubTree<SgAsmFunction>(top);
    int functionId = tx->statement("select coalesce(max(id),-1)+1 from functions")->execute_int(); // zero origin
    
    for (vector<SgAsmFunction*>::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi, ++functionId) {
        ExtentMap e_insns, e_data, e_total;
        (*fi)->get_extent(&e_insns, NULL, NULL, &iselector);
        (*fi)->get_extent(&e_data,  NULL, NULL, &dselector);
        (*fi)->get_extent(&e_total);

	createVectorsForAllInstructions(*fi, filename, (*fi)->get_name(), functionId, windowSize, stride, tx);
        cmd1->bind(0, functionId);
        cmd1->bind(1, filename);
        cmd1->bind(2, (*fi)->get_name() );
        cmd1->bind(3, (*fi)->get_entry_va());
        cmd1->bind(4, e_insns.size());
        cmd1->bind(5, e_data.size());
        cmd1->bind(6, e_total.size());
        cmd1->execute();

        vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(*fi);
        for (size_t i=0; i<insns.size(); ++i) {
            cmd2->bind(0, insns[i]->get_address());
            cmd2->bind(1, insns[i]->get_size());
            cmd2->bind(2, functionId);
            cmd2->bind(3, i);
            cmd2->bind(4, unparseInstructionWithAddress(insns[i]));
            cmd2->execute();
        }
    }
    cerr << "Total vectors generated: " << numVectorsGenerated << endl;
}
void
find_clusters(int max_cluster_size_signed, SqlDatabase::TransactionPtr transaction)
{
    assert(max_cluster_size_signed >= 0);
    size_t max_cluster_size = max_cluster_size_signed;

    SqlDatabase::StatementPtr insert_stmt = transaction->statement("insert into fr_ignored_function_pairs"
                                            // 0        1         2
                                            "(func1_id, func2_id, from_cluster_of_size)"
                                            " values (?, ?, ?)");

    //Get all vetexes and find the union
    std::string _query_condition = "select func1_id, func2_id from fr_clone_pairs";
    SqlDatabase::StatementPtr stmt = transaction->statement(_query_condition);

    if (stmt->begin() == stmt->end())
        return;

    //Count how many vertices we have for boost graph
    int VERTEX_COUNT = transaction->statement("select count(*) from semantic_functions")->execute_int();

    typedef adjacency_list <vecS, vecS, undirectedS> Graph;
    typedef graph_traits<Graph>::vertex_descriptor Vertex;
    typedef graph_traits<Graph>::vertices_size_type VertexIndex;
    Graph graph(VERTEX_COUNT);

    std::vector<VertexIndex> rank(num_vertices(graph));
    std::vector<Vertex> parent(num_vertices(graph));

    typedef VertexIndex* Rank;
    typedef Vertex* Parent;
    disjoint_sets<Rank, Parent> ds(&rank[0], &parent[0]);
    initialize_incremental_components(graph, ds);
    incremental_components(graph, ds);

    graph_traits<Graph>::edge_descriptor edge;
    bool flag;

    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int func1 = row.get<int>(0);
        int func2 = row.get<int>(1);
        boost::tie(edge, flag) = add_edge(func1, func2, graph);
        ds.union_set(func1,func2);
    }

    typedef component_index<VertexIndex> Components;
    Components components(parent.begin(), parent.end());
    std::map<int,int> size_distribution;

    // Iterate through the component indices
    BOOST_FOREACH(VertexIndex current_index, components) {
        std::vector<int> cluster_functions;

        // Iterate through the child vertex indices for [current_index]
        BOOST_FOREACH(VertexIndex child_index, components[current_index]) {
            cluster_functions.push_back(child_index);
        }
// Show some general info about the function
static void
show_summary(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    SqlDatabase::Statement::iterator geninfo = tx->statement("select"
                                                             //  0              1          2           3
                                                             "   func.entry_va, func.name, file1.name, file2.name,"
                                                             //  4            5           6           7
                                                             "   func.ninsns, func.isize, func.dsize, func.size,"
                                                             //  8            9            10              11
                                                             "   func.digest, cmd.hashkey, cmd.begin_time, func.specimen_id,"
                                                             //  12
                                                             "   func.file_id"
                                                             " from semantic_functions as func"
                                                             " join semantic_files as file1 on func.specimen_id = file1.id"
                                                             " join semantic_files as file2 on func.file_id = file2.id"
                                                             " join semantic_history as cmd on func.cmd = cmd.hashkey"
                                                             " where func.id = ?")->bind(0, func_id)->begin();
    double returns_value = CloneDetection::function_returns_value(tx, func_id);

    std::cout <<"Function ID:                      " <<func_id <<"\n"
              <<"Entry virtual address:            " <<StringUtility::addrToString(geninfo.get<rose_addr_t>(0)) <<"\n"
              <<"Function name:                    " <<geninfo.get<std::string>(1) <<"\n"
              <<"Binary specimen name:             " <<geninfo.get<std::string>(2) <<" (id=" <<geninfo.get<int>(11) <<")\n";
    if (0!=geninfo.get<std::string>(2).compare(geninfo.get<std::string>(3)))
        std::cout <<"Binary file name:                 " <<geninfo.get<std::string>(3) <<" (id=" <<geninfo.get<int>(12) <<")\n";
    std::cout <<"Number of instructions:           " <<geninfo.get<size_t>(4) <<"\n"
              <<"Number of bytes for instructions: " <<geninfo.get<size_t>(5) <<"\n"
              <<"Number of bytes for static data:  " <<geninfo.get<size_t>(6) <<"\n"
              <<"Total number of bytes:            " <<geninfo.get<size_t>(7) <<"\n" // not necessarily the sum isize + dsize
              <<"Function returns a value:         " <<round(100.0*returns_value) <<"% probability\n"
              <<"Function static digest:           " <<geninfo.get<std::string>(8) <<"\n"
              <<"Command that inserted function:   " <<geninfo.get<int64_t>(9) <<" (command hashkey)\n"
              <<"Time that function was inserted:  " <<SqlDatabase::humanTimeRenderer(geninfo.get<time_t>(10), 0) <<"\n";


    size_t ntests = tx->statement("select count(*) from semantic_fio where func_id=?")->bind(0, func_id)->execute_int();
    if (0==ntests) {
        std::cout <<"Number of tests for function:     " <<ntests <<"\n";
    } else {
        SqlDatabase::StatementPtr stmt = tx->statement("select fault.name, count(*), 100.0*count(*)/?"
                                                       " from semantic_fio as fio"
                                                       " join semantic_faults as fault on fio.status = fault.id"
                                                       " where func_id = ?"
                                                       " group by fault.id, fault.name"
                                                       " order by fault.id")->bind(0, ntests)->bind(1, func_id);
        SqlDatabase::Table<std::string, size_t, double> statuses(stmt);
        if (statuses.size()==1) {
            std::cout <<"Number of tests for function:     " <<ntests <<" (all had status " <<statuses[0].v0 <<")\n";
        } else {
            std::cout <<"Number of tests for function:     " <<ntests <<"\n";
            statuses.headers("Status", "NTests", "Percent");
            statuses.line_prefix("    ");
            statuses.print(std::cout);
        }
    }
}
Exemple #8
0
int
main(int argc, char* argv[])
{
    std::string database;
    int norm = 1;
    double similarity_threshold=1.;

    size_t k;
    size_t l;
    try {
        options_description desc("Allowed options");
        desc.add_options()
            ("help", "produce a help message")
            ("database,q", value< string >()->composing(), 
             "the sqlite database that we are to use")
            ("norm,p", value< int >(&norm), "Exponent in p-norm to use (1 or 2 or 3 (MIT implementation) )")
            ("hash-function-size,k", value< size_t >(&k), "The number of elements in a single hash function")
            ("hash-table-count,l", value< size_t >(&l), "The number of separate hash tables to create")
            ("similarity,t", value< double >(&similarity_threshold), "The similarity threshold that is allowed in a clone pair");

        variables_map vm;
        store(parse_command_line(argc, argv, desc), vm);
        notify(vm);

        if (vm.count("help")) {
            cout << desc;            
            exit(0);
        }
		
        if (vm.count("database")!=1) {
            std::cerr << "Missing options. Call as: findClones --database <database-name>" 
                      << std::endl;
            exit(1);

        }

        database = vm["database"].as<string >();
        similarity_threshold = vm["similarity"].as<double>();
        cout << "database: " << database << std::endl;
    } catch(exception& e) {
        cout << e.what() << "\n";
    }

    std::cout << "The similarity threshold is " << similarity_threshold << std::endl;
    SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(database)->transaction();
    tx->statement("update run_parameters set similarity_threshold = ?")
        ->bind(0, similarity_threshold)
        ->execute();

    OperateOnClusters op(database, norm, similarity_threshold, k , l);
    op.analyzeClusters();
    //op.calculate_false_positives();

    tx->commit();
    return 0;
};
Exemple #9
0
static void
postprocess(const SqlDatabase::TransactionPtr &tx)
{
    int windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int();
    int stride = tx->statement("select stride from run_parameters limit 1")->execute_int();
    assert(windowSize != 0);
    assert(stride != 0);

    cerr << "About to delete from postprocessed_clusters" << endl;
    tx->execute("delete from postprocessed_clusters");
    cerr << "... done" << endl;

    cerr << "About to postprocess" << endl;
    SqlDatabase::StatementPtr cmd = tx->statement("select cluster, function_id, index_within_function, vectors_row"
                                                  " from clusters order by cluster, function_id, index_within_function");
    SqlDatabase::StatementPtr insertCmd = tx->statement("insert into postprocessed_clusters"
                                                        " select * from clusters where row_number = ?");
    const size_t numStridesThatMustBeDifferent = windowSize / (stride * 2);
    string last_cluster = "";
    string last_func_id = "";
    size_t last_index_within_function = 0;
    vector<string> rows_in_this_cluster;
    bool first = true;
    for (SqlDatabase::Statement::iterator postproc_reader=cmd->begin(); postproc_reader!=cmd->end(); ++postproc_reader) {
        string cluster = postproc_reader.get<std::string>(0);
        string function_id = postproc_reader.get<std::string>(1);
        size_t index_within_function = postproc_reader.get<size_t>(2);
        string cluster_row_number = postproc_reader.get<std::string>(3);
        bool differentFunction = cluster != last_cluster || function_id != last_func_id;
        bool endingCluster = differentFunction;
        bool beginningNewCluster = first || differentFunction;
        first = false;
        if (endingCluster) {
            if (rows_in_this_cluster.size() > 1) { // Skip clusters that have only one element left
                for (size_t i = 0; i < rows_in_this_cluster.size(); ++i) {
                    insertCmd->bind(0, rows_in_this_cluster[i]);
                    insertCmd->execute();
                }
            }
        }
        if (beginningNewCluster) {
            last_cluster = cluster;
            last_func_id = function_id;
            last_index_within_function = index_within_function;
            rows_in_this_cluster.clear();
        }
        bool keep = beginningNewCluster || (index_within_function >= last_index_within_function + numStridesThatMustBeDifferent);
        if (keep) {
            last_index_within_function = index_within_function;
            rows_in_this_cluster.push_back(cluster_row_number);
        }
    }
    cerr << "... done" << endl;
}
void
createVectorsNotRespectingFunctionBoundaries(SgNode* top, const std::string& filename, size_t windowSize, size_t stride,
                                             const SqlDatabase::TransactionPtr &tx)
{
    int functionId = tx->statement("select coalesce(max(id),-1)+1 from functions")->execute_int(); // zero origin
    std::string functionName = filename + "-all-instructions";
    createVectorsForAllInstructions(top, filename, functionName, functionId, windowSize, stride, tx);
    tx->statement("insert into functions(file, function_name) values (?,?)")
        ->bind(0, filename)
        ->bind(1, functionName)
        ->execute();
    cout << "Total vectors generated: " << numVectorsGenerated << endl;
}
// Create and populate the tmp_events table.
static void
gather_events(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    tx->execute("create temporary table tmp_events as select * from semantic_fio_trace limit 0");
    if (opt.show_trace) {
        std::string sql = "insert into tmp_events select * from semantic_fio_trace where func_id = ?";
        std::vector<std::string> igroups;
        for (std::set<int>::const_iterator i=opt.traces.begin(); i!=opt.traces.end(); ++i)
            igroups.push_back(StringUtility::numberToString(*i));
        if (!igroups.empty())
            sql += " and igroup_id in (" + StringUtility::join(", ", igroups) + ")";
        tx->statement(sql)->bind(0, func_id)->execute();
    }
}
Exemple #12
0
// List tests that are missing error information.
static void
listMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("select test.id, test.rose_date, test.os, users.name, test.tester"
                                                " from test_results test"
                                                " join attachments att on test.id = att.test_id"
                                                " join users on test.reporting_user = users.uid" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                "    test.first_error is null and"
                                                "    test.status <> 'end' and"
                                                "    att.name = 'Final output'"
                                                " order by test.id");
    sqlBindArgs(q, args);
    SqlDatabase::Table<int,                             // 0: id
                       int,                             // 1: rose_date
                       std::string,                     // 2: os
                       std::string,                     // 3: reporting_user
                       std::string>                     // 4: tester
        table(q);
    if (!table.empty()) {
        table.headers("Id", "ROSE date", "OS", "Reporting user", "Tester");
        table.reprint_headers(50);
        table.renderers().r1 = &timeRenderer;
        table.print(std::cout);
    }
}
static void
list_assembly(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    Events events;
    gather_events(tx, func_id);
    load_events(tx, func_id, events);

    SqlDatabase::StatementPtr stmt = tx->statement("select address, assembly from semantic_instructions where func_id = ?"
                                                   " order by position")->bind(0, func_id);
    for (SqlDatabase::Statement::iterator insn=stmt->begin(); insn!=stmt->end(); ++insn) {
        rose_addr_t addr = insn.get<rose_addr_t>(0);
        std::string assembly = insn.get<std::string>(1);
        Events::const_iterator ei=events.find(addr);

        // Assembly line prefix
        if (ei!=events.end() && ei->second.nexecuted>0) {
            std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x ";
        } else {
            std::cout <<std::string(11, ' ');
        }

        // Assembly instruction
        std::cout <<"| " <<StringUtility::addrToString(addr) <<":  " <<assembly <<"\n";

        if (ei!=events.end())
            show_events(ei->second);
    }
}
void
computational_equivalent_classes(std::map<int,int>& norm_map)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select func_id, equivalent_func_id from equivalent_classes");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!= stmt->end(); ++row)
        norm_map[row.get<int>(0)] = row.get<int>(1);
}
CallVec*
load_api_calls_for(int func_id, int igroup_id, bool ignore_no_compares, int call_depth, bool expand_ncalls)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select distinct fio.pos, fio.callee_id, fio.ncalls"
                                                            " from semantic_fio_calls as fio"
                                                            " join tmp_interesting_funcs as f1"
                                                            // filter out functions with no compares
                                                            " on f1.func_id = fio.callee_id"
                                                            // filter on current parameters
                                                            " where fio.func_id = ? and fio.igroup_id = ?"
                                                            // filter out function not called directly
                                                            + std::string(call_depth >= 0 ? " and fio.caller_id = ?" : "")
                                                            +" order by fio.pos");
    stmt->bind(0, func_id);
    stmt->bind(1, igroup_id);

    if (call_depth >= 0)
        stmt->bind(2, func_id);

    CallVec* call_vec = new CallVec;
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int callee_id = row.get<int>(1);
        int ncalls    = row.get<int>(2);

        if (expand_ncalls) {
            for (int i = 0; i < ncalls; i++)
                call_vec->push_back(callee_id);
        } else {
            call_vec->push_back(callee_id);
        }
    }
    return call_vec;
}
Exemple #16
0
// Update the database by filling in test_results.first_error information for those tests that don't have a cached first error
// but which failed and have output.
static void
updateDatabase(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("update test_results test"
                                                " set first_error = substring("
#if 0 // [Robb Matzke 2016-02-08]
                                                // Look at all output stored in the database (which is typically only the last
                                                // few hundred lines of the complete output).
                                                "att.content "
#else
                                                // This coalesce tries to find where a parallel make command failed and looks
                                                // only at the following serial make, which is assumed to follow the parallel
                                                // make.
                                                "coalesce(substring(att.content from '(\\nmake: \\*\\*\\* \\[[-_a-zA-Z0-9]+\\] Error 1\n.+)'), att.content) "
#endif
                                                "from '(?n)("
                                                //----- regular expressions begin -----
                                                "\\merror: .+"
                                                "|catastrophic error: *\\n.+"
                                                "|^.* \\[err\\]: terminated after .+"
                                                "|^.* \\[err\\]: command died with .+"
                                                "|^.* \\[err\\]: +what\\(\\): .*"
                                                //----- regular expressions end -----
                                                ")')"
                                                " from attachments att" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                "    test.id = att.test_id and"
                                                "    test.first_error is null and"
                                                "    test.status <> 'end' and"
                                                "    att.name = 'Final output'");
    sqlBindArgs(q, args);
    q->execute();
}
static int
find_function_or_exit(const SqlDatabase::TransactionPtr &tx, char *func_spec)
{
    char *rest;
    errno = 0;
    int func_id = -1;
    int func_spec_i = strtol(func_spec, &rest, 0);
    if (errno || rest==func_spec || *rest)
        func_spec_i = -1;
    if (-1==func_id && -1!=func_spec_i &&
        1==tx->statement("select count(*) from semantic_functions where id = ?")->bind(0, func_spec_i)->execute_int())
        func_id = func_spec_i;
    if (-1==func_id) {
        SqlDatabase::StatementPtr stmt1a = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name"
                                                         " from semantic_functions as func"
                                                         " join semantic_files as file on func.file_id = file.id"
                                                         " where entry_va = ?")->bind(0, func_spec_i);
        SqlDatabase::StatementPtr stmt1b = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name"
                                                         " from semantic_functions as func"
                                                         " join semantic_files as file on func.file_id = file.id"
                                                         " where func.name = ?")->bind(0, func_spec);
        SqlDatabase::StatementPtr stmt1c = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name"
                                                         " from semantic_functions as func"
                                                         " join semantic_files as file on func.file_id = file.id"
                                                         " where file.name like"
                                                         " '%/"+SqlDatabase::escape(func_spec, tx->driver(), false)+"'");
        SqlDatabase::Table<int, rose_addr_t, std::string, size_t, std::string> functions;
        if (func_spec_i!=-1)
            functions.insert(stmt1a);
        functions.insert(stmt1b);
        functions.insert(stmt1c);
        functions.headers("ID", "Entry VA", "Function Name", "NInsns", "Specimen Name");
        functions.renderers().r1 = &SqlDatabase::addr32Renderer;
        if (functions.empty()) {
            std::cout <<argv0 <<": no function found by ID, address, or name: " <<func_spec <<"\n";
            exit(0);
        } else if (1==functions.size()) {
            func_id = functions[0].v0;
        } else {
            std::cout <<argv0 <<": function specification is ambiguous: " <<func_spec <<"\n";
            functions.print(std::cout);
            exit(0);
        }
    }
    assert(func_id>=0);
    return func_id;
}
Exemple #18
0
// Clear all cached error information from the database.
static void
clearErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("update test_results set first_error = null" +
                                                sqlWhereClause(tx, settings, args));
    sqlBindArgs(q, args);
    q->execute();
}
static Dependencies
loadAllDependencies(const SqlDatabase::TransactionPtr &tx) {
    Dependencies dependencies;
    SqlDatabase::StatementPtr q = tx->statement("select name, value from dependencies where enabled <> 0");
    for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row)
        dependencies.insertMaybeDefault(row.get<std::string>(0)).push_back(row.get<std::string>(1));
    return dependencies;
}
Exemple #20
0
Fichier : lsh.C Projet : 8l/rose
void
insert_into_clusters(const SqlDatabase::TransactionPtr &tx, int cluster, int function_id, int index_within_function,
                     int vectors_row, double dist)
{
    int id = tx->statement("select coalesce(max(id),0)+1 from clusters")->execute_int();

    tx->statement("insert into clusters"
                  // 0   1        2            3                      4            5
                  " (id, cluster, function_id, index_within_function, vectors_row, dist)"
                  " values (?, ?,?,?,?,?)")
        ->bind(0, id)
        ->bind(1, cluster)
        ->bind(2, function_id)
        ->bind(3, index_within_function)
        ->bind(4, vectors_row)
        ->bind(5, dist)
        ->execute();
}
void
addFunctionStatistics(const SqlDatabase::TransactionPtr &tx, const std::string& filename, const std::string& functionName,
                      size_t functionId, size_t numInstructions)
{
    tx->statement("insert into function_statistics (function_id, num_instructions) values (?,?)")
        ->bind(0, functionId)
        ->bind(1, numInstructions)
        ->execute();
}
static void
load_source_code(const SqlDatabase::TransactionPtr &tx, Listing &listing/*in,out*/)
{
    SqlDatabase::StatementPtr stmt = tx->statement("select file_id, linenum, line from tmp_src");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int file_id = row.get<int>(0);
        int linenum = row.get<int>(1);
        SourcePosition srcpos(file_id, linenum);
        listing[srcpos].source_code = row.get<std::string>(2);
    }
}
static void
gather_source_code(const SqlDatabase::TransactionPtr &tx)
{
    tx->execute("create temporary table tmp_src as"
                "  select distinct src.*"
                "    from tmp_insns as insn"
                "    join semantic_sources as src"
                "      on insn.src_file_id=src.file_id"
                "      and src.linenum >= insn.src_line-10"
                "      and src.linenum <= insn.src_line+10");
}
Exemple #24
0
int
main(int argc, char *argv[])
{
    // Parse command-line
    opt.nprocs = nProcessors();
    int argno = parse_commandline(argc, argv);
    if (argno+1!=argc)
        usage(1);
    std::string databaseUrl = argv[argno++];

    SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(databaseUrl)->transaction();
    int64_t cmd_id = start_command(tx, argc, argv, "running tests");

    // Load worklist
    MultiWork work;
    load_sorted_work(work/*out*/);
    if (work.empty())
        return 0;


    // Load information about files.  The transaction is not saved anywhere.
    FilesTable files(tx);

    // We must commit our transaction before we fork, otherwise the child processes won't be able to see the rows we've
    // inserted. Specifically, the row in the semantic_history table that says who we are.  Destroy the smart pointer so that
    // the connection is even closed.
    tx->commit();
    tx.reset();

    // Process work items for each specimen sequentially
    BOOST_FOREACH (const Work &workForSpecimen, work)
    if (forkAndWait(SpecimenProcessor(workForSpecimen, files, databaseUrl, cmd_id)))
        exit(1);

    // Indicate that this command is finished
    tx = SqlDatabase::Connection::create(databaseUrl)->transaction();
    finish_command(tx, cmd_id, "ran tests");
    tx->commit();

    return 0;
}
Exemple #25
0
static void 
callExact(const SqlDatabase::TransactionPtr &tx, const std::string databaseName, const string& Exec)
{
    // FIXME: We can't pass parameters to the exec'd process this way because the parent's SQL statements are
    // being executed in a transaction -- they won't be visible in the child. [Robb P. Matzke 2013-08-12]
    tx->execute("delete from detection_parameters");
    tx->statement("insert into detection_parameters (similarity_threshold, false_negative_rate) values (?, ?)")
        ->bind(0, 1.0)
        ->bind(1, 0)
        ->execute();

    std::cout << "Start running exact clone detection" << std::endl;
    pid_t p = fork();
    if (p == -1) { // Error
        perror("fork: ");
        exit (1);
    }
    if (p == 0) { // Child
        vector<char*> args;
        args.push_back(strdup(Exec.c_str()));
        args.push_back(strdup("--database"));
        args.push_back(strdup(databaseName.c_str()));
        args.push_back(0);

        ostringstream outStr; 
        for (vector<char*>::iterator iItr = args.begin(); iItr != args.end(); ++iItr)
            outStr << *iItr << " ";
        std::cout << "Calling " << outStr.str() << std::endl;
        execv(Exec.c_str(), &args[0]);
        perror("execv: ");
        exit (1);
    } else { // Parent
        int status;
        if (waitpid(p, &status, 0) == -1) {
            perror("waitpid");
            abort();
        }
        cerr << "Status: " << status << endl;
        cerr << "Done waiting for Exact Clone Detection" << endl;
    }
}
// Create the tmp_insns table to hold all the instructions for the function-to-be-listed and all the instructions of all
// the functions that are mentioned in events.
static void
gather_instructions(const SqlDatabase::TransactionPtr tx, int func_id, const Events &events)
{
    std::set<std::string> func_ids;
    func_ids.insert(StringUtility::numberToString(func_id));
    for (Events::const_iterator ei=events.begin(); ei!=events.end(); ++ei)
        func_ids.insert(StringUtility::numberToString(ei->second.func_id));
    std::string sql = "create temporary table tmp_insns as"
                      " select * from semantic_instructions"
                      " where func_id in ("+StringUtility::join_range(", ", func_ids.begin(), func_ids.end())+")";
    tx->execute(sql);
}
Exemple #27
0
// Count how many tests are missing first_error information when it should be available.
static void
countMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("select count(*)"
                                                " from test_results test"
                                                " join attachments att on test.id = att.test_id" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                "    test.first_error is null and"
                                                "    test.status <> 'end' and"
                                                "    att.name = 'Final output'");
    sqlBindArgs(q, args);
    int n = q->execute_int();
    std::cout <<n <<"\n";
}
void
addVectorToDatabase(const SqlDatabase::TransactionPtr &tx, const SignatureVector& vec, const std::string& functionName,
                    size_t functionId, size_t indexWithinFunction, const std::string& normalizedUnparsedInstructions,
                    SgAsmx86Instruction* firstInsn[], const std::string& filename, size_t windowSize, size_t stride)
{
    ++numVectorsGenerated;

    vector<uint8_t> compressedCounts = compressVector(vec.getBase(), SignatureVector::Size);
    size_t vectorSum = 0;
    for (size_t i=0; i<SignatureVector::Size; ++i)
        vectorSum += vec[i];

    ExtentMap extent;
    for (size_t i=0; i<windowSize; ++i)
        extent.insert(Extent(firstInsn[i]->get_address(), firstInsn[i]->get_size()));

    unsigned char md[16];
    MD5((const unsigned char*)normalizedUnparsedInstructions.data(), normalizedUnparsedInstructions.size(), md);

    SqlDatabase::StatementPtr cmd = tx->statement("insert into vectors"
                                                  // 0   1            2                      3     4             5
                                                  " (id, function_id, index_within_function, line, last_insn_va, size,"
                                                  // 6            7           8
                                                  "sum_of_counts, counts_b64, instr_seq_b64)"
                                                  " values (?,?,?,?,?,?,?,?,?)");
    int vector_id = tx->statement("select coalesce(max(id),0)+1 from vectors")->execute_int(); // 1-origin
    cmd->bind(0, vector_id);
    cmd->bind(1, functionId);
    cmd->bind(2, indexWithinFunction);
    cmd->bind(3, firstInsn[0]->get_address());
    cmd->bind(4, firstInsn[windowSize-1]->get_address());
    cmd->bind(5, extent.size());
    cmd->bind(6, vectorSum);
    cmd->bind(7, StringUtility::encode_base64(&compressedCounts[0], compressedCounts.size()));
    cmd->bind(8, StringUtility::encode_base64(md, 16));
    cmd->execute();
}
Exemple #29
0
Fichier : lsh.C Projet : 8l/rose
void
insert_into_postprocessed_clusters(const SqlDatabase::TransactionPtr &tx, int cluster, int function_id, int index_within_function,
                                   int vectors_row, double dist)
{
    tx->statement("insert into postprocessed_clusters"
                  // 0        1            2                      3            4
                  " (cluster, function_id, index_within_function, vectors_row, dist)"
                  " values(?,?,?,?,?)")
        ->bind(0, cluster)
        ->bind(1, function_id)
        ->bind(2, index_within_function)
        ->bind(3, vectors_row)
        ->bind(4, dist)
        ->execute();
}
Exemple #30
0
int
main(int argc, char *argv[]) {
    Sawyer::initializeLibrary();
    mlog = Sawyer::Message::Facility("tool");
    Sawyer::Message::mfacilities.insertAndAdjust(mlog);

    // Parse the command-line
    Settings settings;
    std::vector<std::string> args = parseCommandLine(argc, argv, settings);
    SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(settings.databaseUri)->transaction();
    if (args.size() != 1) {
        mlog[FATAL] <<"incorrect usage; see --help\n";
        exit(1);
    }

    if (args[0] == "clear") {
        clearErrors(tx, settings);
    } else if (args[0] == "update") {
        updateDatabase(tx, settings);
    } else if (args[0] == "missing") {
        listMissingErrors(tx, settings);
    } else if (args[0] == "count-missing") {
        countMissingErrors(tx, settings);
    } else if (args[0] == "list") {
        listErrors(tx, settings);
    } else {
        mlog[FATAL] <<"unknown command \"" <<StringUtility::cEscape(args[0]) <<"\"; see --help\n";
        exit(1);
    }

    if (settings.dryRun) {
        mlog[WARN] <<"database was not modified (running with --dry-run)\n";
    } else {
        tx->commit();
    }
}