void
find_clusters(int max_cluster_size_signed, SqlDatabase::TransactionPtr transaction)
{
    assert(max_cluster_size_signed >= 0);
    size_t max_cluster_size = max_cluster_size_signed;

    SqlDatabase::StatementPtr insert_stmt = transaction->statement("insert into fr_ignored_function_pairs"
                                            // 0        1         2
                                            "(func1_id, func2_id, from_cluster_of_size)"
                                            " values (?, ?, ?)");

    //Get all vetexes and find the union
    std::string _query_condition = "select func1_id, func2_id from fr_clone_pairs";
    SqlDatabase::StatementPtr stmt = transaction->statement(_query_condition);

    if (stmt->begin() == stmt->end())
        return;

    //Count how many vertices we have for boost graph
    int VERTEX_COUNT = transaction->statement("select count(*) from semantic_functions")->execute_int();

    typedef adjacency_list <vecS, vecS, undirectedS> Graph;
    typedef graph_traits<Graph>::vertex_descriptor Vertex;
    typedef graph_traits<Graph>::vertices_size_type VertexIndex;
    Graph graph(VERTEX_COUNT);

    std::vector<VertexIndex> rank(num_vertices(graph));
    std::vector<Vertex> parent(num_vertices(graph));

    typedef VertexIndex* Rank;
    typedef Vertex* Parent;
    disjoint_sets<Rank, Parent> ds(&rank[0], &parent[0]);
    initialize_incremental_components(graph, ds);
    incremental_components(graph, ds);

    graph_traits<Graph>::edge_descriptor edge;
    bool flag;

    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int func1 = row.get<int>(0);
        int func2 = row.get<int>(1);
        boost::tie(edge, flag) = add_edge(func1, func2, graph);
        ds.union_set(func1,func2);
    }

    typedef component_index<VertexIndex> Components;
    Components components(parent.begin(), parent.end());
    std::map<int,int> size_distribution;

    // Iterate through the component indices
    BOOST_FOREACH(VertexIndex current_index, components) {
        std::vector<int> cluster_functions;

        // Iterate through the child vertex indices for [current_index]
        BOOST_FOREACH(VertexIndex child_index, components[current_index]) {
            cluster_functions.push_back(child_index);
        }
Example #2
0
static void
list_assembly(const SqlDatabase::TransactionPtr &tx, int func_id)
{
    Events events;
    gather_events(tx, func_id);
    load_events(tx, func_id, events);

    SqlDatabase::StatementPtr stmt = tx->statement("select address, assembly from semantic_instructions where func_id = ?"
                                                   " order by position")->bind(0, func_id);
    for (SqlDatabase::Statement::iterator insn=stmt->begin(); insn!=stmt->end(); ++insn) {
        rose_addr_t addr = insn.get<rose_addr_t>(0);
        std::string assembly = insn.get<std::string>(1);
        Events::const_iterator ei=events.find(addr);

        // Assembly line prefix
        if (ei!=events.end() && ei->second.nexecuted>0) {
            std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x ";
        } else {
            std::cout <<std::string(11, ' ');
        }

        // Assembly instruction
        std::cout <<"| " <<StringUtility::addrToString(addr) <<":  " <<assembly <<"\n";

        if (ei!=events.end())
            show_events(ei->second);
    }
}
void
computational_equivalent_classes(std::map<int,int>& norm_map)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select func_id, equivalent_func_id from equivalent_classes");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!= stmt->end(); ++row)
        norm_map[row.get<int>(0)] = row.get<int>(1);
}
CallVec*
load_api_calls_for(int func_id, int igroup_id, bool ignore_no_compares, int call_depth, bool expand_ncalls)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select distinct fio.pos, fio.callee_id, fio.ncalls"
                                                            " from semantic_fio_calls as fio"
                                                            " join tmp_interesting_funcs as f1"
                                                            // filter out functions with no compares
                                                            " on f1.func_id = fio.callee_id"
                                                            // filter on current parameters
                                                            " where fio.func_id = ? and fio.igroup_id = ?"
                                                            // filter out function not called directly
                                                            + std::string(call_depth >= 0 ? " and fio.caller_id = ?" : "")
                                                            +" order by fio.pos");
    stmt->bind(0, func_id);
    stmt->bind(1, igroup_id);

    if (call_depth >= 0)
        stmt->bind(2, func_id);

    CallVec* call_vec = new CallVec;
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int callee_id = row.get<int>(1);
        int ncalls    = row.get<int>(2);

        if (expand_ncalls) {
            for (int i = 0; i < ncalls; i++)
                call_vec->push_back(callee_id);
        } else {
            call_vec->push_back(callee_id);
        }
    }
    return call_vec;
}
static Dependencies
loadAllDependencies(const SqlDatabase::TransactionPtr &tx) {
    Dependencies dependencies;
    SqlDatabase::StatementPtr q = tx->statement("select name, value from dependencies where enabled <> 0");
    for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row)
        dependencies.insertMaybeDefault(row.get<std::string>(0)).push_back(row.get<std::string>(1));
    return dependencies;
}
Example #6
0
static void
load_source_code(const SqlDatabase::TransactionPtr &tx, Listing &listing/*in,out*/)
{
    SqlDatabase::StatementPtr stmt = tx->statement("select file_id, linenum, line from tmp_src");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int file_id = row.get<int>(0);
        int linenum = row.get<int>(1);
        SourcePosition srcpos(file_id, linenum);
        listing[srcpos].source_code = row.get<std::string>(2);
    }
}
CallVec*
load_function_api_calls_for(int func_id, bool reachability_graph)
{
    SqlDatabase::StatementPtr stmt = transaction->statement("select distinct scg.callee from "
                                                            + std::string(reachability_graph ? "semantic_rg" : "semantic_cg ") +
                                                            " as scg "
                                                            //" join tmp_interesting_funcs as tif on tif.func_id = scg.callee "
                                                            " where scg.caller=? ORDER BY scg.callee");
    stmt->bind(0, func_id);

    CallVec* call_vec = new CallVec;
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        int callee_id = row.get<int>(0);
        call_vec->push_back(callee_id);
    }
    return call_vec;
}
Example #8
0
// List the errors ordered by how common they are.
static void
listErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) {
    std::vector<std::string> args;
    SqlDatabase::StatementPtr q = tx->statement("select count(*) as n, status, test.first_error"
                                                " from test_results test" +
                                                sqlWhereClause(tx, settings, args) + " and"
                                                " test.first_error is not null"
                                                " group by status, test.first_error"
                                                " order by n desc");
    sqlBindArgs(q, args);
    for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row) {
        int count = row.get<int>(0);
        std::string status = row.get<std::string>(1);
        std::string mesg = row.get<std::string>(2);

        printf("%6d %-16s %s\n", count, status.c_str(), oneLineEscaped(mesg).c_str());
    }
}
Example #9
0
// Generate the 'where' expression that limits what tests are being considered.
// Also appends variable values to the 'args' vector.
static std::string
sqlWhereClause(const SqlDatabase::TransactionPtr &tx, const Settings &settings, std::vector<std::string> &args /*in,out*/) {
    std::vector<std::string> constraints;

    if (settings.latestTests) {
        // Constrain the tests to be only the latest version of ROSE present in the database.
        SqlDatabase::StatementPtr q = tx->statement("select distinct rose, rose_date"
                                                    " from test_results"
                                                    " order by rose_date desc"
                                                    " limit 1");
        SqlDatabase::Statement::iterator row = q->begin();
        if (row != q->end()) {
            constraints.push_back("rose = ?");
            args.push_back(row.get<std::string>(0));
        }
    }

    if (constraints.empty())
        constraints.push_back("true");

    return " where " + boost::join(constraints, " and ");
}
Example #10
0
static DependencyNames
loadDependencyNames(const SqlDatabase::TransactionPtr &tx) {
    DependencyNames retval;
    SqlDatabase::StatementPtr q = tx->statement("select distinct name from dependencies");
    for (SqlDatabase::Statement::iterator row=q->begin(); row!=q->end(); ++row) {
        std::string key = row.get<std::string>(0);
        retval.insert(key, "rmc_"+key);
    }

    // Additional key/column relationships
    retval.insert("id", "test.id");
    retval.insert("reporting_user", "auth_user.identity");
    retval.insert("reporting_time", "test.reporting_time");
    retval.insert("tester", "test.tester");
    retval.insert("os", "test.os");
    retval.insert("rose", "test.rose");
    retval.insert("rose_date", "test.rose_date");
    retval.insert("status", "test.status");
    retval.insert("duration", "test.duration");
    retval.insert("noutput", "test.noutput");
    retval.insert("nwarnings", "test.nwarnings");

    return retval;
}
Example #11
0
static void
list_combined(const SqlDatabase::TransactionPtr &tx, int func_id, bool show_assembly)
{
    CloneDetection::FilesTable files(tx);

    Events events;
    gather_events(tx, func_id);
    load_events(tx, func_id, events/*out*/);
    gather_instructions(tx, func_id, events);

    Listing listing;
    gather_source_code(tx);
    load_source_code(tx, listing/*out*/);

    // Get lines of assembly code and insert them into the correct place in the Listing.
    if (show_assembly) {
        SqlDatabase::StatementPtr stmt = tx->statement("select"
                                                       // 0                1              2              3
                                                       " insn.src_file_id, insn.src_line, insn.position, insn.address,"
                                                       // 4             5        6
                                                       " insn.assembly, func.id, func.name"
                                                       " from tmp_insns as insn"
                                                       " join semantic_functions as func on insn.func_id = func.id"
                                                       " order by position");
        for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
            int src_file_id = row.get<int>(0);
            int src_line_num = row.get<int>(1);
            SourcePosition srcpos(src_file_id, src_line_num);
            int pos = row.get<int>(2);
            rose_addr_t addr = row.get<rose_addr_t>(3);
            std::string assembly = row.get<std::string>(4);
            int func_id = row.get<int>(5);
            std::string func_name = row.get<std::string>(6);
            listing[srcpos].assembly_code.insert(std::make_pair(addr, AssemblyCode(pos, addr, assembly, func_id, func_name)));
        }

        // Listing header
        std::cout <<"WARNING: This listing should be read cautiously. It is ordered according to the\n"
                  <<"         source code with assembly lines following the source code line from which\n"
                  <<"         they came.  However, the compiler does not always generate machine\n"
                  <<"         instructions in the same order as source code.  When a discontinuity\n"
                  <<"         occurs in the assembly instruction listing, it will be marked by a \"#\"\n"
                  <<"         character.  The assembly instructions are also numbered according to\n"
                  <<"         their relative positions in the binary function.\n"
                  <<"\n"
                  <<"         The prefix area contains either source location information or test trace\n"
                  <<"         information.  Note that trace information might be incomplete because\n"
                  <<"         tracing was disabled or only partially enabled, or the trace includes\n"
                  <<"         instructions that are not present in this function listing (e.g., when\n"
                  <<"         execution follows a CALL instruction). The following notes are possible:\n"
                  <<"           * \"Nx\" where N is an integer indicates that this instruction\n"
                  <<"             was reached N times during testing.  These notes are typically\n"
                  <<"             only attached to the first instruction of a basic block and only\n"
                  <<"             if the trace contains EV_REACHED events.  Lack of an Nx notation\n"
                  <<"             doesn't necessarily mean that the basic block was not reached, it\n"
                  <<"             only means that there is no EV_REACHED event for that block.\n"
                  <<"           * \"N<\" where N is an integer indicates that the instruction\n"
                  <<"             on the previous line consumed N inputs. Information about the\n"
                  <<"             inputs is listed on the right side of this line.\n"
                  <<"           * \"N>\" where N is an integer indicates that the instruction\n"
                  <<"             on the previous line produced N memory outputs. Information about the\n"
                  <<"             outputs is listed on the right side of this line. Only the final\n"
                  <<"             write to a memory address is considered a true output, and such\n"
                  <<"             writes will be marked with the string \"final\".\n"
                  <<"           * \"BR\" indicates that the instruction on the previous line is a\n"
                  <<"             control flow branch point. The right side of the line shows more\n"
                  <<"             detailed information about how many times the branch was taken.\n"
                  <<"           * \"FAULT\" indicates that the test was terminated at the previous\n"
                  <<"             instruction. The right side of the line shows the distribution of\n"
                  <<"             faults that occurred here.\n"
                  <<"\n"
                  <<"                /------------- Prefix area\n"
                  <<" /-------------/-------------- Source file ID or assembly function ID\n"
                  <<" |     /------/--------------- Source line number or assembly instruction index\n"
                  <<" |     |   /-/---------------- Instruction out-of-order indicator\n"
                  <<" |     |   |/     /----------- Instruction virtual address\n"
                  <<" |     |   |      |\n"
                  <<"vvvv vvvvv/|      |\n"
                  <<"vvvvvvvvvv v vvvvvvvvvv\n";
    }

    // Show the listing
    int prev_func_id = -1, prev_position = -1;
    std::set<int> seen_files;
    for (Listing::iterator li=listing.begin(); li!=listing.end(); ++li) {
        int file_id = li->first.file_id;
        if (seen_files.insert(file_id).second) {
            if (file_id>=0) {
                std::cout <<"\n" <<std::setw(4) <<std::right <<file_id <<".file  |"
                          <<(opt.colorize?"\033[33;4m":"") <<files.name(file_id) <<(opt.colorize?"\033[m":"") <<"\n";
            } else {
                std::cout <<"\n" <<std::string(11, ' ') <<"|"
                          <<(opt.colorize?"\033[33;4m":"") <<"instructions not associated with a source file"
                          <<(opt.colorize?"\033[m":"") <<"\n";
            }
        }
        if (file_id>=0) {
            std::cout <<std::setw(4) <<std::right <<file_id <<"." <<std::setw(6) <<std::left <<li->first.line_num
                      <<"|"
                      <<(opt.colorize?"\033[34m":"")
                      <<StringUtility::untab(li->second.source_code)
                      <<(opt.colorize?"\033[m":"") <<"\n";
        }

        for (Instructions::iterator ii=li->second.assembly_code.begin(); ii!=li->second.assembly_code.end(); ++ii) {
            const AssemblyCode assm = ii->second;
            if (assm.func_id!=prev_func_id) {
                std::cout <<std::string(11, ' ') <<"# "
                          <<(opt.colorize?"\033[33;4m":"") <<"function " <<StringUtility::numberToString(assm.func_id);
                if (!assm.func_name.empty())
                    std::cout <<" <" <<assm.func_name <<">";
                std::cout <<(opt.colorize?"\033[m":"") <<"\n";
            }

            Events::const_iterator ei=events.find(assm.addr);
            std::cout <<std::setw(4) <<std::right <<assm.func_id <<"." <<std::setw(6) <<std::left <<assm.pos
                      <<(prev_func_id==assm.func_id && prev_position+1==assm.pos ? "|" : "#");

            if (ei!=events.end() && ei->second.nexecuted>0) {
                std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x ";
            } else {
                std::cout <<std::string(11, ' ');
            }

            std::cout <<StringUtility::addrToString(assm.addr) <<":  "
                      <<(opt.colorize?"\033[32m":"") <<assm.assembly <<(opt.colorize?"\033[m":"") <<"\n";

            if (ei!=events.end())
                show_events(ei->second);

            prev_func_id = assm.func_id;
            prev_position = assm.pos;
        }
    }
}
Example #12
0
// Load all events into memory.  Events are emitted for a particular function ID being analyzed, but if the 25-run-test
// --follow-calls was specified, then events for that function ID might be at instructions that are outside that function.
// We need to make note of those functions so that we can load all their instructions.
static void
load_events(const SqlDatabase::TransactionPtr &tx, int func_id, Events &events/*in,out*/)
{
    int specimen_id = tx->statement("select specimen_id from semantic_functions where id = ?")
                      ->bind(0, func_id)->execute_int();
    SqlDatabase::StatementPtr stmt = tx->statement("select"
                                                   // 0          1               2            3          4
                                                   " event.addr, event.event_id, event.minor, event.val, func.id,"
                                                   // 5               6
                                                   " event.igroup_id, event.pos"
                                                   " from tmp_events as event"
                                                   " join semantic_instructions as insn on event.addr = insn.address"
                                                   " join semantic_functions as func on insn.func_id = func.id"
                                                   " where func.specimen_id = ?"
                                                   " order by igroup_id, pos");
    stmt->bind(0, specimen_id);
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
        rose_addr_t addr = row.get<rose_addr_t>(0);
        int event_id = row.get<int>(1);
        int minor = row.get<int>(2);
        int64_t val = row.get<int64_t>(3);
        events[addr].func_id = row.get<int>(4); // the hard-to-get ID, not the one stored in the events func_id column.
        int igroup_id = row.get<int>(5);
        int pos = row.get<int>(6);
        switch (event_id) {
            case CloneDetection::EV_REACHED: {
                ++events[addr].nexecuted;
                break;
            }
            case CloneDetection::EV_BRANCHED: {
                ++events[addr].nbranches;
                ++events[addr].branches[val];
                break;
            }
            case CloneDetection::EV_RETURNED: {
                ++events[addr].nreturns;
                break;
            }
            case CloneDetection::EV_CONSUME_INPUT: {
                ++events[addr].ninputs;
                assert(minor>=0);
                if ((size_t)minor>=events[addr].inputs.size())
                    events[addr].inputs.resize(minor+1);
                ++events[addr].inputs[minor][val];
                break;
            }
            case CloneDetection::EV_FAULT: {
                CloneDetection::AnalysisFault::Fault fault = (CloneDetection::AnalysisFault::Fault)minor;
                ++events[addr].nfaults;
                ++events[addr].faults[fault];
                break;
            }
            case CloneDetection::EV_MEM_WRITE: {
                OutputEventKey output_key(igroup_id, val);
                OutputEventValue output_val(pos, minor);
                // Track final writes to each address
                final_output_events[output_key] = output_val;
                // Append event to the appropriate instruction
                events[addr].outputs.push_back(std::make_pair(output_key, output_val));
            }
            default:
                /*void*/
                break;
        }
    }
}
/* Remove the functions from the compilation unit that is only available in one of the traces.
 *   - criteria complement of the functions from the files of the caller functions in the call trace is removed. */
std::pair<CallVec*, CallVec*>
remove_compilation_unit_complement(int func1_id, int func2_id, int igroup_id, int similarity, CallVec* func1_vec,
                                   CallVec* func2_vec)
{
    CallVec* new_func1_vec = new CallVec;
    CallVec* new_func2_vec = new CallVec;

    if (func1_vec->size() > 0 || func2_vec->size() > 0) {
        // Find the set complement of functions called by the two functions
        // - we are not interested in functions called by both
        std::set<int> func1_vec_set;
        std::set<int> func2_vec_set;

        for (CallVec::iterator it = func1_vec->begin(); it != func1_vec->end(); ++it)
            func1_vec_set.insert(*it);
        for (CallVec::iterator it = func2_vec->begin(); it != func2_vec->end(); ++it)
            func2_vec_set.insert(*it);

        std::set<int> func1_func2_complement;
        std::set_difference(func1_vec_set.begin(), func1_vec_set.end(), func2_vec_set.begin(), func2_vec_set.end(),
                            std::inserter(func1_func2_complement, func1_func2_complement.end()));

        // Find the compilation units in question. A compilation unit is in our case a file.
        SqlDatabase::StatementPtr func1_file_stmt = transaction->statement("select file_id from semantic_functions"
                                                                           " where id = ?");
        func1_file_stmt->bind(0, func1_id);
        int func1_file_id = func1_file_stmt->execute_int();

        SqlDatabase::StatementPtr func2_file_stmt = transaction->statement("select file_id from semantic_functions"
                                                                           " where id = ?");
        func2_file_stmt->bind(0, func2_id);
        int func2_file_id = func2_file_stmt->execute_int();

        // Find the functions that needs to be removed
        //  - all functions that has a clone in between the files
        SqlDatabase::StatementPtr stmt = transaction->statement("select sem.func1_id, sem.func2_id from semantic_funcsim as sem"
                                                                " join semantic_functions as sf1 on sem.func1_id = sf1.id"
                                                                " join semantic_functions as sf2 on sem.func2_id = sf2.id"
                                                                " where similarity >= ? and sf1.file_id in (?,?)"
                                                                "   and sf2.file_id in (?, ?) and sf1.file_id != sf2.file_id");
        stmt->bind(0, similarity);
        stmt->bind(1, func1_file_id);
        stmt->bind(2, func2_file_id);
        stmt->bind(3, func1_file_id);
        stmt->bind(4, func2_file_id);

        std::set<int> complement_functions;
        for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
            int clone_func1 = row.get<int>(0);
            int clone_func2 = row.get<int>(1);

            complement_functions.insert(clone_func1);
            complement_functions.insert(clone_func2);
        }

        // Find the functions we want to remove
        //  - functions present with clones in between the files that is not part of both traces
        std::set<int> remove_these;
        std::set_intersection(complement_functions.begin(), complement_functions.end(), func1_func2_complement.begin(),
                              func1_func2_complement.end(), std::inserter(remove_these, remove_these.end()));

        //prune functions to remove away from the call trace into new vectors
        for (CallVec::iterator it = func1_vec->begin(); it != func1_vec->end(); ++it) {
            if (remove_these.find(*it) == remove_these.end())
                new_func1_vec->push_back(*it);
        }

        for (CallVec::iterator it = func2_vec->begin(); it != func2_vec->end(); ++it) {
            if (remove_these.find(*it) == remove_these.end())
                new_func2_vec->push_back(*it);
        }
    }
    return std::pair<CallVec*, CallVec*>(new_func1_vec, new_func2_vec);
}
Example #14
0
File: callLSH.C Project: 8l/rose
static void
postprocess(const SqlDatabase::TransactionPtr &tx)
{
    int windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int();
    int stride = tx->statement("select stride from run_parameters limit 1")->execute_int();
    assert(windowSize != 0);
    assert(stride != 0);

    cerr << "About to delete from postprocessed_clusters" << endl;
    tx->execute("delete from postprocessed_clusters");
    cerr << "... done" << endl;

    cerr << "About to postprocess" << endl;
    SqlDatabase::StatementPtr cmd = tx->statement("select cluster, function_id, index_within_function, vectors_row"
                                                  " from clusters order by cluster, function_id, index_within_function");
    SqlDatabase::StatementPtr insertCmd = tx->statement("insert into postprocessed_clusters"
                                                        " select * from clusters where row_number = ?");
    const size_t numStridesThatMustBeDifferent = windowSize / (stride * 2);
    string last_cluster = "";
    string last_func_id = "";
    size_t last_index_within_function = 0;
    vector<string> rows_in_this_cluster;
    bool first = true;
    for (SqlDatabase::Statement::iterator postproc_reader=cmd->begin(); postproc_reader!=cmd->end(); ++postproc_reader) {
        string cluster = postproc_reader.get<std::string>(0);
        string function_id = postproc_reader.get<std::string>(1);
        size_t index_within_function = postproc_reader.get<size_t>(2);
        string cluster_row_number = postproc_reader.get<std::string>(3);
        bool differentFunction = cluster != last_cluster || function_id != last_func_id;
        bool endingCluster = differentFunction;
        bool beginningNewCluster = first || differentFunction;
        first = false;
        if (endingCluster) {
            if (rows_in_this_cluster.size() > 1) { // Skip clusters that have only one element left
                for (size_t i = 0; i < rows_in_this_cluster.size(); ++i) {
                    insertCmd->bind(0, rows_in_this_cluster[i]);
                    insertCmd->execute();
                }
            }
        }
        if (beginningNewCluster) {
            last_cluster = cluster;
            last_func_id = function_id;
            last_index_within_function = index_within_function;
            rows_in_this_cluster.clear();
        }
        bool keep = beginningNewCluster || (index_within_function >= last_index_within_function + numStridesThatMustBeDifferent);
        if (keep) {
            last_index_within_function = index_within_function;
            rows_in_this_cluster.push_back(cluster_row_number);
        }
    }
    cerr << "... done" << endl;
}
Example #15
0
File: printPairs.C Project: 8l/rose
int main(int argc, char* argv[])
{
  std::string database;
  //Timing
  struct timeval before;
  struct rusage ru_before;
  gettimeofday(&before, NULL);
  getrusage(RUSAGE_SELF, &ru_before);


  try {
	options_description desc("Allowed options");
	desc.add_options()
	  ("help", "produce a help message")
	  ("database,q", value< string >()->composing(), 
	   "the sqlite database that we are to use")
	  ;

	variables_map vm;
	store(command_line_parser(argc, argv).options(desc)
		.run(), vm);


	if (vm.count("help")) {
	  cout << desc;            
	  exit(0);
	}

	if (vm.count("database")!=1  ) {
	  std::cerr << "Missing options. Call as: findClones --database <database-name>" 
		<< std::endl;
	  exit(1);

	}

	database = vm["database"].as<string >();
	cout << "database: " << database << std::endl;


  }
  catch(exception& e) {
	cout << e.what() << "\n";
  }



  SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(database)->transaction();


  try {
	  tx->statement("create table IF NOT EXISTS largest_clones(row_number INTEGER PRIMARY KEY, function_id_A INTEGER, begin_index_within_function_A INTEGER, end_index_within_function_A INTEGER,"
            "function_id_B INTEGER , begin_index_within_function_B INTEGER, end_index_within_function_B INTEGER )")->execute();
  }
  catch(exception &ex) {
	cerr << "Exception Occurred: " << ex.what() << endl;
  }

   try {
	  tx->statement("delete from largest_clones")->execute();
  }
  catch(exception &ex) {
	cerr << "Exception Occurred: " << ex.what() << endl;
  }
 
  string filen = database;

  std::vector<std::pair <Element, Element> > listOfClonePairs;
  std::list<std::pair<int,int> > listOfFunctionClonePairs;

  int windowSize = 0;
  int stride = 0;
  try {
	windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int();
  } catch (exception& ex) {cerr << "Exception Occurred: " << ex.what() << endl;}
  try {
	stride = tx->statement("select stride from run_parameters limit 1")->execute_int();
  } catch (exception& ex) {cerr << "Exception Occurred: " << ex.what() << endl;}
  assert (windowSize != 0);
  assert (stride != 0);

  //Create set of clone pairs
  try{
    std::string selectSeparateDatasets ="SELECT cluster, function_id, index_within_function, vectors_row from  clusters ORDER BY cluster, function_id, index_within_function";

    SqlDatabase::StatementPtr cmd = tx->statement(selectSeparateDatasets);

    int64_t thisClusterName=-1;
    
    std::vector<Element> thisCluster;
   
    for (SqlDatabase::Statement::iterator r=cmd->begin(); r!=cmd->end(); ++r) {

      Element cur_elem;
      int64_t cluster       = r.get<int64_t>(0);
      cur_elem.function_id  = r.get<int64_t>(1);
      cur_elem.index_within_function = r.get<int64_t>(2);
      cur_elem.last_index_within_function = cur_elem.index_within_function;

      cur_elem.vectors_row      = r.get<int64_t>(3);
//      cur_elem.line        = boost::lexical_cast<int> ( datasets.getstring(5) );
//      cur_elem.offset      = boost::lexical_cast<int> ( datasets.getstring(6) );


      if( cluster == thisClusterName )
      {
      
        for( std::vector<Element>::iterator iItr = thisCluster.begin();
            iItr != thisCluster.end(); ++iItr )
        {


          if( cur_elem.function_id < iItr->function_id)
          listOfFunctionClonePairs.push_back(std::pair<int,int>(cur_elem.function_id, iItr->function_id));
          else
           listOfFunctionClonePairs.push_back(std::pair<int,int>( iItr->function_id, cur_elem.function_id  ));

          if( cur_elem < *iItr )
          {

             listOfClonePairs.push_back(std::pair<Element,Element>( cur_elem,*iItr ) );
          }
          else            
          {
 
             listOfClonePairs.push_back(std::pair<Element,Element>(*iItr,cur_elem ) );
          }
        };
        

      }else{
        thisCluster.clear();
        thisClusterName = cluster;

      }
      
      thisCluster.push_back( cur_elem );

      
    }
  }catch(exception &ex) {
	cerr << "Exception Occured: " << ex.what() << endl;
  }

  listOfFunctionClonePairs.sort();

  listOfFunctionClonePairs.unique();



  for(std::list<std::pair<int,int> >::iterator iItr = listOfFunctionClonePairs.begin();
      iItr != listOfFunctionClonePairs.end(); iItr++ )
  {
    if(iItr->first != iItr->second)
        std::cout << iItr->second << " " << iItr->first << std::endl;
  }

  
  return 0;
};
int
main(int argc, char *argv[])
{
    std::ios::sync_with_stdio();
    argv0 = argv[0];
    {
        size_t slash = argv0.rfind('/');
        argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1);
        if (0==argv0.substr(0, 3).compare("lt-"))
            argv0 = argv0.substr(3);
    }

    Switches opt;
    int argno = 1;
    for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) {
        if (!strcmp(argv[argno], "--")) {
            ++argno;
            break;
        } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) {
            ::usage(0);
        } else if (!strncmp(argv[argno], "--entry=", 8)) {
            opt.entry_vas.insert(strtoull(argv[argno]+8, NULL, 0));
        } else if (!strcmp(argv[argno], "--file=list") || !strcmp(argv[argno], "--files=list")) {
            opt.list_files = true;
        } else if (!strncmp(argv[argno], "--file=", 7) || !strncmp(argv[argno], "--files=", 8)) {
            std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true);
            for (size_t i=0; i<ids.size(); ++i) {
                const char *s = ids[i].c_str();
                char *rest;
                errno = 0;
                int id = strtoul(s, &rest, 0);
                if (errno || rest==s || *rest) {
                    std::cerr <<argv0 <<": invalid file ID: " <<ids[i] <<"\n";
                    exit(1);
                }
                opt.files.insert(id);
            }
        } else if (!strncmp(argv[argno], "--function=", 11) || !strncmp(argv[argno], "--functions=", 12)) {
            std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true);
            if (ids.size()==1 && isalpha(ids[0][0]) && ids[0].find_first_of('.')!=std::string::npos) {
                std::vector<std::string> words = StringUtility::split(".", ids[0]);
                if (words.size()!=2 ||
                    !SqlDatabase::is_valid_table_name(words[0]) || !SqlDatabase::is_valid_table_name(words[1])) {
                    std::cerr <<argv0 <<": --function switch needs either IDs or a database TABLE.COLUMN\n";
                    exit(1);
                }
                opt.function_table = words[0];
                opt.function_column = words[1];
            } else {
                for (size_t i=0; i<ids.size(); ++i) {
                    const char *s = ids[i].c_str();
                    char *rest;
                    errno = 0;
                    int id = strtoul(s, &rest, 0);
                    if (errno || rest==s || *rest) {
                        std::cerr <<argv0 <<": invalid function ID: " <<ids[i] <<"\n";
                        exit(1);
                    }
                    opt.functions.insert(id);
                }
            }
        } else if (!strncmp(argv[argno], "--first-fuzz=", 13)) {
            opt.first_fuzz = strtoul(argv[argno]+13, NULL, 0);
        } else if (!strncmp(argv[argno], "--name=", 7)) {
            opt.names.insert(argv[argno]+7);
        } else if (!strncmp(argv[argno], "--nfuzz=", 8)) {
            opt.nfuzz = strtoul(argv[argno]+8, NULL, 0);
            opt.nfuzz_set = true;
        } else if (!strncmp(argv[argno], "--size=", 7)) {
            opt.ninsns = strtoul(argv[argno]+7, NULL, 0);
        } else if (!strcmp(argv[argno], "--specimen=list") || !strcmp(argv[argno], "--specimens=list")) {
            opt.list_specimens = true;
        } else if (!strncmp(argv[argno], "--specimen=", 11) || !strncmp(argv[argno], "--specimens=", 12)) {
            std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true);
            for (size_t i=0; i<ids.size(); ++i) {
                const char *s = ids[i].c_str();
                char *rest;
                errno = 0;
                int id = strtoul(s, &rest, 0);
                if (errno || rest==s || *rest) {
                    std::cerr <<argv0 <<": invalid specimen ID: " <<ids[i] <<"\n";
                    exit(1);
                }
                opt.specimens.insert(id);
            }
        } else {
            std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n"
                      <<"see \"" <<argv0 <<" --help\" for usage info.\n";
            exit(1);
        }
    }
    if (argno+1!=argc)
        ::usage(1);
    SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(argv[argno++])->transaction();

    // List the ID numbers and names for all specimen files
    if (opt.list_specimens) {
        SqlDatabase::Table<int, std::string> specimens;
        specimens.insert(tx->statement("select file.id, file.name"
                                       " from (select distinct specimen_id as id from semantic_functions) as specimen"
                                       " join semantic_files as file on specimen.id = file.id"
                                       " order by file.name"));
        specimens.headers("File ID", "Specimen Name");
        specimens.print(std::cout);
        return 0;
    }

    // List the ID numbers and names for all files containing functions
    if (opt.list_files) {
        SqlDatabase::Table<int, std::string> files;
        files.insert(tx->statement("select file.id, file.name"
                                   " from (select distinct file_id as id from semantic_functions) as used"
                                   " join semantic_files as file on used.id = file.id"
                                   " order by file.name"));
        files.headers("File ID", "Binary File Name");
        files.print(std::cout);
        return 0;
    }

    // Sanity checks
    if (!opt.functions.empty() && !opt.function_table.empty()) {
        std::cerr <<argv0 <<": --function=ID and --function=TABLE are mutually exclusive\n";
        exit(1);
    }
    if (0==tx->statement("select count(*) from semantic_functions")->execute_int()) {
        std::cerr <<argv0 <<": database has no functions; nothing to test\n";
        return 0;
    }
    if (0==tx->statement("select count(*) from semantic_inputvalues")->execute_int()) {
        std::cerr <<argv0 <<": database has no input groups; nothing to test\n";
        return 0;
    }

    // Create table tmp_functions containing IDs for selected functions and their specimen IDs
    std::vector<std::string> constraints;
    if (!opt.entry_vas.empty())
        constraints.push_back("func.entry_va " + SqlDatabase::in(opt.entry_vas));
    if (!opt.names.empty())
        constraints.push_back("func.name " + SqlDatabase::in_strings(opt.names, tx->driver()));
    if (!opt.specimens.empty())
        constraints.push_back("func.specimen_id " + SqlDatabase::in(opt.specimens));
    if (!opt.files.empty())
        constraints.push_back("func.file_id " + SqlDatabase::in(opt.files));
    if (!opt.functions.empty())
        constraints.push_back("func.id " + SqlDatabase::in(opt.functions));
    if (opt.ninsns>0)
        constraints.push_back("func.ninsns >= " + StringUtility::numberToString(opt.ninsns));
    std::string sql1 = "select func.id, func.specimen_id from semantic_functions as func";
    if (!opt.function_table.empty())
        sql1 += " join "+opt.function_table+" as flist on func.id = flist."+opt.function_column;
    if (!constraints.empty())
        sql1 += " where " + StringUtility::join(" and ", constraints);
    tx->execute("create temporary table tmp_functions as " + sql1);

    // Create table tmp_inputgroups containing IDs for selected input groups
    std::string sql2 = "select distinct igroup_id from semantic_inputvalues where igroup_id >= " +
                       StringUtility::numberToString(opt.first_fuzz);
    if (opt.nfuzz_set)
        sql2 += " and igroup_id < " + StringUtility::numberToString(opt.first_fuzz+opt.nfuzz);
    tx->execute("create temporary table tmp_inputgroups as " + sql2);

    // Create tmp_pending as the cross product of functions and inputgroups except for those already tested
    tx->execute("create temporary table tmp_pending as"
                "    select func.specimen_id as specimen_id, func.id as func_id, igroup.igroup_id as igroup_id"
                "      from tmp_functions as func"
                "      join tmp_inputgroups as igroup"
                "      on igroup.igroup_id is not null" // "on" clause and "is not null" (rather than "true") for portability
                "  except"
                "    select func.specimen_id, func.id, fio.igroup_id"
                "      from semantic_fio as fio"
                "      join semantic_functions as func on fio.func_id=func.id");
    SqlDatabase::StatementPtr stmt = tx->statement("select distinct specimen_id, func_id, igroup_id"
                                                   " from tmp_pending"
                                                   " order by specimen_id, igroup_id, func_id");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row)
        std::cout <<row.get<int>(0) <<"\t" <<row.get<int>(1) <<"\t" <<row.get<int>(2) <<"\n";

    // no need to commit, but if we change this in the future, be sure to add begin_command()/finish_command()
    return 0;
}