Esempio n. 1
0
void
compute_percent_similarity_statistics(double bucket_size, double increment,  SqlDatabase::TransactionPtr transaction)
{
    int num_pairs = transaction->statement("select count(*) from semantic_funcsim")->execute_int();
    transaction->execute("drop table IF EXISTS fr_percent_similar");
    transaction->execute("create table fr_percent_similar(similarity_low double precision, similarity_middle double precision,"
                         " similarity_high double precision, percent double precision, num_matches integer);");

    SqlDatabase::StatementPtr pecent_similar_stmt = transaction->statement("insert into fr_percent_similar"
            // 0              1                  2
            "(similarity_low, similarity_middle, similarity_high,"
            // 3       4
            " percent, num_matches) "
            " values (?, ?, ?, ?, ?)");
    for (double cur_bucket = 0.0; cur_bucket <= 1.0+bucket_size; cur_bucket+=increment) {
        int num_matches = transaction->statement("select count(*) from semantic_funcsim where "
                          " similarity >= " +
                          boost::lexical_cast<std::string>(cur_bucket - bucket_size) +
                          " and similarity < " +
                          boost::lexical_cast<std::string>(cur_bucket + bucket_size))->execute_int();
        pecent_similar_stmt->bind(0, cur_bucket - bucket_size < 0 ? 0 : cur_bucket - bucket_size);
        pecent_similar_stmt->bind(1, cur_bucket);
        pecent_similar_stmt->bind(2, cur_bucket + bucket_size >= 1.0 ? 1.0 : cur_bucket + bucket_size);
        pecent_similar_stmt->bind(3, num_pairs > 0 ? ((double) num_matches*100.0)/num_pairs : 0);
        pecent_similar_stmt->bind(4, num_matches);
        pecent_similar_stmt->execute();
    }
}
Esempio n. 2
0
File: lsh.C Progetto: 8l/rose
void
get_run_parameters(const SqlDatabase::TransactionPtr &tx, int& windowSize, int& stride)
{
    windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int();
    stride = tx->statement("select stride from run_parameters limit 1")->execute_int();
    assert (windowSize != 0);
    assert (stride != 0);
}
void
createVectorsRespectingFunctionBoundaries(SgNode* top, const std::string& filename, size_t windowSize, size_t stride,
                                          const SqlDatabase::TransactionPtr &tx)
{
    struct InstructionSelector: SgAsmFunction::NodeSelector {
        virtual bool operator()(SgNode *node) {
            return isSgAsmInstruction(node)!=NULL;
        }
    } iselector;

    struct DataSelector: SgAsmFunction::NodeSelector {
        virtual bool operator()(SgNode *node) {
            return isSgAsmStaticData(node)!=NULL;
        }
    } dselector;

    SqlDatabase::StatementPtr cmd1 = tx->statement("insert into functions"
                                                   // 0   1     2              3         4      5      6
                                                   " (id, file, function_name, entry_va, isize, dsize, size)"
                                                   " values(?,?,?,?,?,?,?)");
    
    SqlDatabase::StatementPtr cmd2 = tx->statement("insert into instructions"
                                                   // 0        1     2            3                      4
                                                   " (address, size, function_id, index_within_function, assembly)"
                                                   " values (?,?,?,?,?)");

    vector<SgAsmFunction*> funcs = SageInterface::querySubTree<SgAsmFunction>(top);
    int functionId = tx->statement("select coalesce(max(id),-1)+1 from functions")->execute_int(); // zero origin
    
    for (vector<SgAsmFunction*>::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi, ++functionId) {
        ExtentMap e_insns, e_data, e_total;
        (*fi)->get_extent(&e_insns, NULL, NULL, &iselector);
        (*fi)->get_extent(&e_data,  NULL, NULL, &dselector);
        (*fi)->get_extent(&e_total);

	createVectorsForAllInstructions(*fi, filename, (*fi)->get_name(), functionId, windowSize, stride, tx);
        cmd1->bind(0, functionId);
        cmd1->bind(1, filename);
        cmd1->bind(2, (*fi)->get_name() );
        cmd1->bind(3, (*fi)->get_entry_va());
        cmd1->bind(4, e_insns.size());
        cmd1->bind(5, e_data.size());
        cmd1->bind(6, e_total.size());
        cmd1->execute();

        vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(*fi);
        for (size_t i=0; i<insns.size(); ++i) {
            cmd2->bind(0, insns[i]->get_address());
            cmd2->bind(1, insns[i]->get_size());
            cmd2->bind(2, functionId);
            cmd2->bind(3, i);
            cmd2->bind(4, unparseInstructionWithAddress(insns[i]));
            cmd2->execute();
        }
    }
    cerr << "Total vectors generated: " << numVectorsGenerated << endl;
}
Esempio n. 4
0
void
find_clusters(int max_cluster_size_signed, SqlDatabase::TransactionPtr transaction)
{
    assert(max_cluster_size_signed >= 0);
    size_t max_cluster_size = max_cluster_size_signed;

    SqlDatabase::StatementPtr insert_stmt = transaction->statement("insert into fr_ignored_function_pairs"
                                            // 0        1         2
                                            "(func1_id, func2_id, from_cluster_of_size)"
                                            " values (?, ?, ?)");

    //Get all vetexes and find the union
    std::string _query_condition = "select func1_id, func2_id from fr_clone_pairs";
    SqlDatabase::StatementPtr stmt = transaction->statement(_query_condition);

    if (stmt->begin() == stmt->end())
        return;

    //Count how many vertices we have for boost graph
    int VERTEX_COUNT = transaction->statement("select count(*) from semantic_functions")->execute_int();

    typedef adjacency_list <vecS, vecS, undirectedS> Graph;
    typedef graph_traits<Graph>::vertex_descriptor Vertex;
    typedef graph_traits<Graph>::vertices_size_type VertexIndex;
    Graph graph(VERTEX_COUNT);

    std::vector<VertexIndex> rank(num_vertices(graph));
    std::vector<Vertex> parent(num_vertices(graph));

    typedef VertexIndex* Rank;
    typedef Vertex* Parent;
    disjoint_sets<Rank, Parent> ds(&rank[0], &parent[0]);
    initialize_incremental_components(graph, ds);
    incremental_components(graph, ds);

    graph_traits<Graph>::edge_descriptor edge;
    bool flag;

    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int func1 = row.get<int>(0);
        int func2 = row.get<int>(1);
        boost::tie(edge, flag) = add_edge(func1, func2, graph);
        ds.union_set(func1,func2);
    }

    typedef component_index<VertexIndex> Components;
    Components components(parent.begin(), parent.end());
    std::map<int,int> size_distribution;

    // Iterate through the component indices
    BOOST_FOREACH(VertexIndex current_index, components) {
        std::vector<int> cluster_functions;

        // Iterate through the child vertex indices for [current_index]
        BOOST_FOREACH(VertexIndex child_index, components[current_index]) {
            cluster_functions.push_back(child_index);
        }
Esempio n. 5
0
// Show some general info about the function
static void
show_summary(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    SqlDatabase::Statement::iterator geninfo = tx->statement("select"
                                                             //  0              1          2           3
                                                             "   func.entry_va, func.name, file1.name, file2.name,"
                                                             //  4            5           6           7
                                                             "   func.ninsns, func.isize, func.dsize, func.size,"
                                                             //  8            9            10              11
                                                             "   func.digest, cmd.hashkey, cmd.begin_time, func.specimen_id,"
                                                             //  12
                                                             "   func.file_id"
                                                             " from semantic_functions as func"
                                                             " join semantic_files as file1 on func.specimen_id = file1.id"
                                                             " join semantic_files as file2 on func.file_id = file2.id"
                                                             " join semantic_history as cmd on func.cmd = cmd.hashkey"
                                                             " where func.id = ?")->bind(0, func_id)->begin();
    double returns_value = CloneDetection::function_returns_value(tx, func_id);

    std::cout <<"Function ID:                      " <<func_id <<"\n"
              <<"Entry virtual address:            " <<StringUtility::addrToString(geninfo.get<rose_addr_t>(0)) <<"\n"
              <<"Function name:                    " <<geninfo.get<std::string>(1) <<"\n"
              <<"Binary specimen name:             " <<geninfo.get<std::string>(2) <<" (id=" <<geninfo.get<int>(11) <<")\n";
    if (0!=geninfo.get<std::string>(2).compare(geninfo.get<std::string>(3)))
        std::cout <<"Binary file name:                 " <<geninfo.get<std::string>(3) <<" (id=" <<geninfo.get<int>(12) <<")\n";
    std::cout <<"Number of instructions:           " <<geninfo.get<size_t>(4) <<"\n"
              <<"Number of bytes for instructions: " <<geninfo.get<size_t>(5) <<"\n"
              <<"Number of bytes for static data:  " <<geninfo.get<size_t>(6) <<"\n"
              <<"Total number of bytes:            " <<geninfo.get<size_t>(7) <<"\n" // not necessarily the sum isize + dsize
              <<"Function returns a value:         " <<round(100.0*returns_value) <<"% probability\n"
              <<"Function static digest:           " <<geninfo.get<std::string>(8) <<"\n"
              <<"Command that inserted function:   " <<geninfo.get<int64_t>(9) <<" (command hashkey)\n"
              <<"Time that function was inserted:  " <<SqlDatabase::humanTimeRenderer(geninfo.get<time_t>(10), 0) <<"\n";


    size_t ntests = tx->statement("select count(*) from semantic_fio where func_id=?")->bind(0, func_id)->execute_int();
    if (0==ntests) {
        std::cout <<"Number of tests for function:     " <<ntests <<"\n";
    } else {
        SqlDatabase::StatementPtr stmt = tx->statement("select fault.name, count(*), 100.0*count(*)/?"
                                                       " from semantic_fio as fio"
                                                       " join semantic_faults as fault on fio.status = fault.id"
                                                       " where func_id = ?"
                                                       " group by fault.id, fault.name"
                                                       " order by fault.id")->bind(0, ntests)->bind(1, func_id);
        SqlDatabase::Table<std::string, size_t, double> statuses(stmt);
        if (statuses.size()==1) {
            std::cout <<"Number of tests for function:     " <<ntests <<" (all had status " <<statuses[0].v0 <<")\n";
        } else {
            std::cout <<"Number of tests for function:     " <<ntests <<"\n";
            statuses.headers("Status", "NTests", "Percent");
            statuses.line_prefix("    ");
            statuses.print(std::cout);
        }
    }
}
Esempio n. 6
0
File: callLSH.C Progetto: 8l/rose
static void
postprocess(const SqlDatabase::TransactionPtr &tx)
{
    int windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int();
    int stride = tx->statement("select stride from run_parameters limit 1")->execute_int();
    assert(windowSize != 0);
    assert(stride != 0);

    cerr << "About to delete from postprocessed_clusters" << endl;
    tx->execute("delete from postprocessed_clusters");
    cerr << "... done" << endl;

    cerr << "About to postprocess" << endl;
    SqlDatabase::StatementPtr cmd = tx->statement("select cluster, function_id, index_within_function, vectors_row"
                                                  " from clusters order by cluster, function_id, index_within_function");
    SqlDatabase::StatementPtr insertCmd = tx->statement("insert into postprocessed_clusters"
                                                        " select * from clusters where row_number = ?");
    const size_t numStridesThatMustBeDifferent = windowSize / (stride * 2);
    string last_cluster = "";
    string last_func_id = "";
    size_t last_index_within_function = 0;
    vector<string> rows_in_this_cluster;
    bool first = true;
    for (SqlDatabase::Statement::iterator postproc_reader=cmd->begin(); postproc_reader!=cmd->end(); ++postproc_reader) {
        string cluster = postproc_reader.get<std::string>(0);
        string function_id = postproc_reader.get<std::string>(1);
        size_t index_within_function = postproc_reader.get<size_t>(2);
        string cluster_row_number = postproc_reader.get<std::string>(3);
        bool differentFunction = cluster != last_cluster || function_id != last_func_id;
        bool endingCluster = differentFunction;
        bool beginningNewCluster = first || differentFunction;
        first = false;
        if (endingCluster) {
            if (rows_in_this_cluster.size() > 1) { // Skip clusters that have only one element left
                for (size_t i = 0; i < rows_in_this_cluster.size(); ++i) {
                    insertCmd->bind(0, rows_in_this_cluster[i]);
                    insertCmd->execute();
                }
            }
        }
        if (beginningNewCluster) {
            last_cluster = cluster;
            last_func_id = function_id;
            last_index_within_function = index_within_function;
            rows_in_this_cluster.clear();
        }
        bool keep = beginningNewCluster || (index_within_function >= last_index_within_function + numStridesThatMustBeDifferent);
        if (keep) {
            last_index_within_function = index_within_function;
            rows_in_this_cluster.push_back(cluster_row_number);
        }
    }
    cerr << "... done" << endl;
}
void
createVectorsNotRespectingFunctionBoundaries(SgNode* top, const std::string& filename, size_t windowSize, size_t stride,
                                             const SqlDatabase::TransactionPtr &tx)
{
    int functionId = tx->statement("select coalesce(max(id),-1)+1 from functions")->execute_int(); // zero origin
    std::string functionName = filename + "-all-instructions";
    createVectorsForAllInstructions(top, filename, functionName, functionId, windowSize, stride, tx);
    tx->statement("insert into functions(file, function_name) values (?,?)")
        ->bind(0, filename)
        ->bind(1, functionName)
        ->execute();
    cout << "Total vectors generated: " << numVectorsGenerated << endl;
}
Esempio n. 8
0
// Update the database by filling in test_results.first_error information for those tests that don't have a cached first error
// but which failed and have output.
static void
updateDatabase(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("update test_results test"
                                                " set first_error = substring("
#if 0 // [Robb Matzke 2016-02-08]
                                                // Look at all output stored in the database (which is typically only the last
                                                // few hundred lines of the complete output).
                                                "att.content "
#else
                                                // This coalesce tries to find where a parallel make command failed and looks
                                                // only at the following serial make, which is assumed to follow the parallel
                                                // make.
                                                "coalesce(substring(att.content from '(\\nmake: \\*\\*\\* \\[[-_a-zA-Z0-9]+\\] Error 1\n.+)'), att.content) "
#endif
                                                "from '(?n)("
                                                //----- regular expressions begin -----
                                                "\\merror: .+"
                                                "|catastrophic error: *\\n.+"
                                                "|^.* \\[err\\]: terminated after .+"
                                                "|^.* \\[err\\]: command died with .+"
                                                "|^.* \\[err\\]: +what\\(\\): .*"
                                                //----- regular expressions end -----
                                                ")')"
                                                " from attachments att" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                "    test.id = att.test_id and"
                                                "    test.first_error is null and"
                                                "    test.status <> 'end' and"
                                                "    att.name = 'Final output'");
    sqlBindArgs(q, args);
    q->execute();
}
void
computational_equivalent_classes(std::map<int,int>& norm_map)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select func_id, equivalent_func_id from equivalent_classes");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!= stmt->end(); ++row)
        norm_map[row.get<int>(0)] = row.get<int>(1);
}
Esempio n. 10
0
// List tests that are missing error information.
static void
listMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("select test.id, test.rose_date, test.os, users.name, test.tester"
                                                " from test_results test"
                                                " join attachments att on test.id = att.test_id"
                                                " join users on test.reporting_user = users.uid" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                "    test.first_error is null and"
                                                "    test.status <> 'end' and"
                                                "    att.name = 'Final output'"
                                                " order by test.id");
    sqlBindArgs(q, args);
    SqlDatabase::Table<int,                             // 0: id
                       int,                             // 1: rose_date
                       std::string,                     // 2: os
                       std::string,                     // 3: reporting_user
                       std::string>                     // 4: tester
        table(q);
    if (!table.empty()) {
        table.headers("Id", "ROSE date", "OS", "Reporting user", "Tester");
        table.reprint_headers(50);
        table.renderers().r1 = &timeRenderer;
        table.print(std::cout);
    }
}
Esempio n. 11
0
CallVec*
load_api_calls_for(int func_id, int igroup_id, bool ignore_no_compares, int call_depth, bool expand_ncalls)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select distinct fio.pos, fio.callee_id, fio.ncalls"
                                                            " from semantic_fio_calls as fio"
                                                            " join tmp_interesting_funcs as f1"
                                                            // filter out functions with no compares
                                                            " on f1.func_id = fio.callee_id"
                                                            // filter on current parameters
                                                            " where fio.func_id = ? and fio.igroup_id = ?"
                                                            // filter out function not called directly
                                                            + std::string(call_depth >= 0 ? " and fio.caller_id = ?" : "")
                                                            +" order by fio.pos");
    stmt->bind(0, func_id);
    stmt->bind(1, igroup_id);

    if (call_depth >= 0)
        stmt->bind(2, func_id);

    CallVec* call_vec = new CallVec;
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int callee_id = row.get<int>(1);
        int ncalls    = row.get<int>(2);

        if (expand_ncalls) {
            for (int i = 0; i < ncalls; i++)
                call_vec->push_back(callee_id);
        } else {
            call_vec->push_back(callee_id);
        }
    }
    return call_vec;
}
Esempio n. 12
0
static void
list_assembly(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    Events events;
    gather_events(tx, func_id);
    load_events(tx, func_id, events);

    SqlDatabase::StatementPtr stmt = tx->statement("select address, assembly from semantic_instructions where func_id = ?"
                                                   " order by position")->bind(0, func_id);
    for (SqlDatabase::Statement::iterator insn=stmt->begin(); insn!=stmt->end(); ++insn) {
        rose_addr_t addr = insn.get<rose_addr_t>(0);
        std::string assembly = insn.get<std::string>(1);
        Events::const_iterator ei=events.find(addr);

        // Assembly line prefix
        if (ei!=events.end() && ei->second.nexecuted>0) {
            std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x ";
        } else {
            std::cout <<std::string(11, ' ');
        }

        // Assembly instruction
        std::cout <<"| " <<StringUtility::addrToString(addr) <<":  " <<assembly <<"\n";

        if (ei!=events.end())
            show_events(ei->second);
    }
}
Esempio n. 13
0
static int
find_function_or_exit(const SqlDatabase::TransactionPtr &tx, char *func_spec)
{
    char *rest;
    errno = 0;
    int func_id = -1;
    int func_spec_i = strtol(func_spec, &rest, 0);
    if (errno || rest==func_spec || *rest)
        func_spec_i = -1;
    if (-1==func_id && -1!=func_spec_i &&
        1==tx->statement("select count(*) from semantic_functions where id = ?")->bind(0, func_spec_i)->execute_int())
        func_id = func_spec_i;
    if (-1==func_id) {
        SqlDatabase::StatementPtr stmt1a = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name"
                                                         " from semantic_functions as func"
                                                         " join semantic_files as file on func.file_id = file.id"
                                                         " where entry_va = ?")->bind(0, func_spec_i);
        SqlDatabase::StatementPtr stmt1b = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name"
                                                         " from semantic_functions as func"
                                                         " join semantic_files as file on func.file_id = file.id"
                                                         " where func.name = ?")->bind(0, func_spec);
        SqlDatabase::StatementPtr stmt1c = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name"
                                                         " from semantic_functions as func"
                                                         " join semantic_files as file on func.file_id = file.id"
                                                         " where file.name like"
                                                         " '%/"+SqlDatabase::escape(func_spec, tx->driver(), false)+"'");
        SqlDatabase::Table<int, rose_addr_t, std::string, size_t, std::string> functions;
        if (func_spec_i!=-1)
            functions.insert(stmt1a);
        functions.insert(stmt1b);
        functions.insert(stmt1c);
        functions.headers("ID", "Entry VA", "Function Name", "NInsns", "Specimen Name");
        functions.renderers().r1 = &SqlDatabase::addr32Renderer;
        if (functions.empty()) {
            std::cout <<argv0 <<": no function found by ID, address, or name: " <<func_spec <<"\n";
            exit(0);
        } else if (1==functions.size()) {
            func_id = functions[0].v0;
        } else {
            std::cout <<argv0 <<": function specification is ambiguous: " <<func_spec <<"\n";
            functions.print(std::cout);
            exit(0);
        }
    }
    assert(func_id>=0);
    return func_id;
}
Esempio n. 14
0
// Clear all cached error information from the database.
static void
clearErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("update test_results set first_error = null" +
                                                sqlWhereClause(tx, settings, args));
    sqlBindArgs(q, args);
    q->execute();
}
Esempio n. 15
0
static Dependencies
loadAllDependencies(const SqlDatabase::TransactionPtr &tx) {
    Dependencies dependencies;
    SqlDatabase::StatementPtr q = tx->statement("select name, value from dependencies where enabled <> 0");
    for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row)
        dependencies.insertMaybeDefault(row.get<std::string>(0)).push_back(row.get<std::string>(1));
    return dependencies;
}
Esempio n. 16
0
File: lsh.C Progetto: 8l/rose
void
insert_into_clusters(const SqlDatabase::TransactionPtr &tx, int cluster, int function_id, int index_within_function,
                     int vectors_row, double dist)
{
    int id = tx->statement("select coalesce(max(id),0)+1 from clusters")->execute_int();

    tx->statement("insert into clusters"
                  // 0   1        2            3                      4            5
                  " (id, cluster, function_id, index_within_function, vectors_row, dist)"
                  " values (?, ?,?,?,?,?)")
        ->bind(0, id)
        ->bind(1, cluster)
        ->bind(2, function_id)
        ->bind(3, index_within_function)
        ->bind(4, vectors_row)
        ->bind(5, dist)
        ->execute();
}
Esempio n. 17
0
void
addFunctionStatistics(const SqlDatabase::TransactionPtr &tx, const std::string& filename, const std::string& functionName,
                      size_t functionId, size_t numInstructions)
{
    tx->statement("insert into function_statistics (function_id, num_instructions) values (?,?)")
        ->bind(0, functionId)
        ->bind(1, numInstructions)
        ->execute();
}
Esempio n. 18
0
File: findClones.C Progetto: 8l/rose
int
main(int argc, char* argv[])
{
    std::string database;
    int norm = 1;
    double similarity_threshold=1.;

    size_t k;
    size_t l;
    try {
        options_description desc("Allowed options");
        desc.add_options()
            ("help", "produce a help message")
            ("database,q", value< string >()->composing(), 
             "the sqlite database that we are to use")
            ("norm,p", value< int >(&norm), "Exponent in p-norm to use (1 or 2 or 3 (MIT implementation) )")
            ("hash-function-size,k", value< size_t >(&k), "The number of elements in a single hash function")
            ("hash-table-count,l", value< size_t >(&l), "The number of separate hash tables to create")
            ("similarity,t", value< double >(&similarity_threshold), "The similarity threshold that is allowed in a clone pair");

        variables_map vm;
        store(parse_command_line(argc, argv, desc), vm);
        notify(vm);

        if (vm.count("help")) {
            cout << desc;            
            exit(0);
        }
		
        if (vm.count("database")!=1) {
            std::cerr << "Missing options. Call as: findClones --database <database-name>" 
                      << std::endl;
            exit(1);

        }

        database = vm["database"].as<string >();
        similarity_threshold = vm["similarity"].as<double>();
        cout << "database: " << database << std::endl;
    } catch(exception& e) {
        cout << e.what() << "\n";
    }

    std::cout << "The similarity threshold is " << similarity_threshold << std::endl;
    SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(database)->transaction();
    tx->statement("update run_parameters set similarity_threshold = ?")
        ->bind(0, similarity_threshold)
        ->execute();

    OperateOnClusters op(database, norm, similarity_threshold, k , l);
    op.analyzeClusters();
    //op.calculate_false_positives();

    tx->commit();
    return 0;
};
Esempio n. 19
0
static void
load_source_code(const SqlDatabase::TransactionPtr &tx, Listing &listing/*in,out*/)
{
    SqlDatabase::StatementPtr stmt = tx->statement("select file_id, linenum, line from tmp_src");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int file_id = row.get<int>(0);
        int linenum = row.get<int>(1);
        SourcePosition srcpos(file_id, linenum);
        listing[srcpos].source_code = row.get<std::string>(2);
    }
}
Esempio n. 20
0
// Count how many tests are missing first_error information when it should be available.
static void
countMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("select count(*)"
                                                " from test_results test"
                                                " join attachments att on test.id = att.test_id" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                "    test.first_error is null and"
                                                "    test.status <> 'end' and"
                                                "    att.name = 'Final output'");
    sqlBindArgs(q, args);
    int n = q->execute_int();
    std::cout <<n <<"\n";
}
Esempio n. 21
0
// Create and populate the tmp_events table.
static void
gather_events(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    tx->execute("create temporary table tmp_events as select * from semantic_fio_trace limit 0");
    if (opt.show_trace) {
        std::string sql = "insert into tmp_events select * from semantic_fio_trace where func_id = ?";
        std::vector<std::string> igroups;
        for (std::set<int>::const_iterator i=opt.traces.begin(); i!=opt.traces.end(); ++i)
            igroups.push_back(StringUtility::numberToString(*i));
        if (!igroups.empty())
            sql += " and igroup_id in (" + StringUtility::join(", ", igroups) + ")";
        tx->statement(sql)->bind(0, func_id)->execute();
    }
}
Esempio n. 22
0
void
addVectorToDatabase(const SqlDatabase::TransactionPtr &tx, const SignatureVector& vec, const std::string& functionName,
                    size_t functionId, size_t indexWithinFunction, const std::string& normalizedUnparsedInstructions,
                    SgAsmx86Instruction* firstInsn[], const std::string& filename, size_t windowSize, size_t stride)
{
    ++numVectorsGenerated;

    vector<uint8_t> compressedCounts = compressVector(vec.getBase(), SignatureVector::Size);
    size_t vectorSum = 0;
    for (size_t i=0; i<SignatureVector::Size; ++i)
        vectorSum += vec[i];

    ExtentMap extent;
    for (size_t i=0; i<windowSize; ++i)
        extent.insert(Extent(firstInsn[i]->get_address(), firstInsn[i]->get_size()));

    unsigned char md[16];
    MD5((const unsigned char*)normalizedUnparsedInstructions.data(), normalizedUnparsedInstructions.size(), md);

    SqlDatabase::StatementPtr cmd = tx->statement("insert into vectors"
                                                  // 0   1            2                      3     4             5
                                                  " (id, function_id, index_within_function, line, last_insn_va, size,"
                                                  // 6            7           8
                                                  "sum_of_counts, counts_b64, instr_seq_b64)"
                                                  " values (?,?,?,?,?,?,?,?,?)");
    int vector_id = tx->statement("select coalesce(max(id),0)+1 from vectors")->execute_int(); // 1-origin
    cmd->bind(0, vector_id);
    cmd->bind(1, functionId);
    cmd->bind(2, indexWithinFunction);
    cmd->bind(3, firstInsn[0]->get_address());
    cmd->bind(4, firstInsn[windowSize-1]->get_address());
    cmd->bind(5, extent.size());
    cmd->bind(6, vectorSum);
    cmd->bind(7, StringUtility::encode_base64(&compressedCounts[0], compressedCounts.size()));
    cmd->bind(8, StringUtility::encode_base64(md, 16));
    cmd->execute();
}
Esempio n. 23
0
/****************************************************************************************
 *
 *
 * Compute how mean similar functions are to all other functions.
 *
 * The result is inserted into fr_mean_similarity on the test db, and fr_mean_similar on
 * the global db.
 *
 */
void
compute_mean_similarity_statistics(double bucket_size, double increment, SqlDatabase::TransactionPtr transaction)
{
    int num_pairs = transaction->statement("select count(*) from semantic_funcsim")->execute_int();


    transaction->execute("drop table IF EXISTS fr_mean_similarity;");
    transaction->execute("create table fr_mean_similarity as  select coalesce(sum(sf.similarity)/"+
                         boost::lexical_cast<std::string>(num_pairs)+
                         " ,0) as similarity,  ttf.id as func_id from semantic_funcsim as sf"+
                         " join semantic_functions as ttf on ttf.id = sf.func1_id  OR ttf.id = sf.func2_id GROUP BY ttf.id");

    transaction->execute("drop table IF EXISTS fr_mean_similar");
    transaction->execute("create table fr_mean_similar(similarity_low double precision, similarity_middle double precision,"
                         " similarity_high double precision, percent double precision);");

    SqlDatabase::StatementPtr mean_similar_stmt = transaction->statement("insert into fr_mean_similar"
            // 0              1                  2
            "(similarity_low, similarity_middle, similarity_high,"
            // 3
            " percent) "
            " values (?, ?, ?, ?)");

    for (double cur_bucket = 0.0; cur_bucket <= 1.0+bucket_size; cur_bucket+=increment) {
        int num_matches = transaction->statement("select count(*) from fr_mean_similarity where "
                          " similarity >= " +
                          boost::lexical_cast<std::string>(cur_bucket - bucket_size) +
                          " and similarity < " +
                          boost::lexical_cast<std::string>(cur_bucket + bucket_size))->execute_int();
        mean_similar_stmt->bind(0, cur_bucket - bucket_size < 0 ? 0 : cur_bucket - bucket_size);
        mean_similar_stmt->bind(1, cur_bucket);
        mean_similar_stmt->bind(2, cur_bucket + bucket_size >= 1.0 ? 1.0 : cur_bucket + bucket_size);
        mean_similar_stmt->bind(3, num_pairs > 0 ? ((double) num_matches*100.0)/num_pairs : 0);
        mean_similar_stmt->execute();
    }
}
Esempio n. 24
0
File: lsh.C Progetto: 8l/rose
void
insert_into_postprocessed_clusters(const SqlDatabase::TransactionPtr &tx, int cluster, int function_id, int index_within_function,
                                   int vectors_row, double dist)
{
    tx->statement("insert into postprocessed_clusters"
                  // 0        1            2                      3            4
                  " (cluster, function_id, index_within_function, vectors_row, dist)"
                  " values(?,?,?,?,?)")
        ->bind(0, cluster)
        ->bind(1, function_id)
        ->bind(2, index_within_function)
        ->bind(3, vectors_row)
        ->bind(4, dist)
        ->execute();
}
Esempio n. 25
0
CallVec*
load_function_api_calls_for(int func_id, bool reachability_graph)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select distinct scg.callee from "
                                                            + std::string(reachability_graph ? "semantic_rg" : "semantic_cg ") +
                                                            " as scg "
                                                            //" join tmp_interesting_funcs as tif on tif.func_id = scg.callee "
                                                            " where scg.caller=? ORDER BY scg.callee");
    stmt->bind(0, func_id);

    CallVec* call_vec = new CallVec;
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int callee_id = row.get<int>(0);
        call_vec->push_back(callee_id);
    }
    return call_vec;
}
Esempio n. 26
0
// Show the names of the source code files for this function.
static void
show_source_names(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    SqlDatabase::Table<int, std::string> srcfiles(tx->statement("select distinct file.id, file.name"
                                                                " from semantic_instructions as insn"
                                                                " join semantic_files as file on insn.src_file_id = file.id"
                                                                " where insn.func_id = ?"
                                                                " order by file.name")->bind(0, func_id));
    if (1==srcfiles.size()) {
        std::cout <<"Source file name:                 " <<srcfiles[0].v1 <<" (id=" <<srcfiles[0].v0 <<")\n";
    } else if (!srcfiles.empty()) {
        std::cout <<"Number of source files:           " <<srcfiles.size() <<"\n";
        srcfiles.headers("FileID", "Name");
        srcfiles.line_prefix("    ");
        srcfiles.print(std::cout);
    }
}
Esempio n. 27
0
// List the errors ordered by how common they are.
static void
listErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("select count(*) as n, status, test.first_error"
                                                " from test_results test" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                " test.first_error is not null"
                                                " group by status, test.first_error"
                                                " order by n desc");
    sqlBindArgs(q, args);
    for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row) {
        int count = row.get<int>(0);
        std::string status = row.get<std::string>(1);
        std::string mesg = row.get<std::string>(2);

        printf("%6d %-16s %s\n", count, status.c_str(), oneLineEscaped(mesg).c_str());
    }
}
Esempio n. 28
0
File: callLSH.C Progetto: 8l/rose
void
insert_timing(const SqlDatabase::TransactionPtr &tx, std::string property_name, const timeval& before, const timeval& after,
              const rusage& ru_before, const rusage& ru_after)
{
    SqlDatabase::StatementPtr cmd = tx->statement("insert into timing"
                                                  // 0              1                2               3              4
                                                  " (property_name, total_wallclock, total_usertime, total_systime, wallclock,"
                                                  // 5        6
                                                  " usertime, systime)"
                                                  " values (?,?,?,?,?,?,?)");
    cmd->bind(0, property_name);
    cmd->bind(1, 0);
    cmd->bind(2, tvToDouble(ru_after.ru_utime));
    cmd->bind(3, tvToDouble(ru_after.ru_stime));
    cmd->bind(4, (tvToDouble(after) - tvToDouble(before)));
    cmd->bind(5, (tvToDouble(ru_after.ru_utime) - tvToDouble(ru_before.ru_utime)));
    cmd->bind(6, (tvToDouble(ru_after.ru_stime) - tvToDouble(ru_before.ru_stime)));
    cmd->execute();
}
Esempio n. 29
0
// List tests that were run for this function
static void
show_tests(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    SqlDatabase::Table<int, size_t, size_t, size_t, size_t, size_t, size_t, size_t, int64_t,
                       std::string, double, double, int64_t> fio;
    fio.insert(tx->statement("select"
                             "   fio.igroup_id, fio.arguments_consumed, fio.locals_consumed, fio.globals_consumed,"
                             "   fio.functions_consumed, fio.integers_consumed, fio.pointers_consumed,"
                             "   fio.instructions_executed, fio.ogroup_id,"
                             "   fault.name, fio.elapsed_time, fio.cpu_time, fio.cmd"
                             " from semantic_fio as fio"
                             " join semantic_faults as fault on fio.status = fault.id"
                             " where func_id = ?"
                             " order by igroup_id")->bind(0, func_id));
    std::cout <<"Tests run for this function:\n";
    fio.headers("IGroup", "Args", "Locals", "Globals", "Funcs", "Ints", "Ptrs", "Insns", "OGroup", "Status", "Elapsed Time",
                "CPU Time", "Command");
    fio.line_prefix("    ");
    fio.print(std::cout);
}
Esempio n. 30
0
File: callLSH.C Progetto: 8l/rose
static void 
callExact(const SqlDatabase::TransactionPtr &tx, const std::string databaseName, const string& Exec)
{
    // FIXME: We can't pass parameters to the exec'd process this way because the parent's SQL statements are
    // being executed in a transaction -- they won't be visible in the child. [Robb P. Matzke 2013-08-12]
    tx->execute("delete from detection_parameters");
    tx->statement("insert into detection_parameters (similarity_threshold, false_negative_rate) values (?, ?)")
        ->bind(0, 1.0)
        ->bind(1, 0)
        ->execute();

    std::cout << "Start running exact clone detection" << std::endl;
    pid_t p = fork();
    if (p == -1) { // Error
        perror("fork: ");
        exit (1);
    }
    if (p == 0) { // Child
        vector<char*> args;
        args.push_back(strdup(Exec.c_str()));
        args.push_back(strdup("--database"));
        args.push_back(strdup(databaseName.c_str()));
        args.push_back(0);

        ostringstream outStr; 
        for (vector<char*>::iterator iItr = args.begin(); iItr != args.end(); ++iItr)
            outStr << *iItr << " ";
        std::cout << "Calling " << outStr.str() << std::endl;
        execv(Exec.c_str(), &args[0]);
        perror("execv: ");
        exit (1);
    } else { // Parent
        int status;
        if (waitpid(p, &status, 0) == -1) {
            perror("waitpid");
            abort();
        }
        cerr << "Status: " << status << endl;
        cerr << "Done waiting for Exact Clone Detection" << endl;
    }
}