Esempio n. 1
0
int
main(int argc, char *argv[])
{
    std::ios::sync_with_stdio();
    argv0 = argv[0];
    {
        size_t slash = argv0.rfind('/');
        argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1);
        if (0==argv0.substr(0, 3).compare("lt-"))
            argv0 = argv0.substr(3);
    }

    int argno = 1;
    bool link = false;
    std::vector<std::string> signature_components;

    for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) {
        std::cout << argv[argno] << std::endl;
        if (!strcmp(argv[argno], "--")) {
            ++argno;
            break;
        } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) {
            ::usage(0);
        } else if (!strcmp(argv[argno], "--link")) {
            link = true;
        } else if (!strcmp(argv[argno], "--no-link")) {
            link = false;
        } else {
            std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n"
                      <<"see \"" <<argv0 <<" --help\" for usage info.\n";
            exit(1);
        }
    }
    if (argno+2!=argc)
        ::usage(1);

    std::string db_name(argv[argno++]);
    std::cout << "Connecting to db:" << db_name << std::endl;
    SqlDatabase::ConnectionPtr conn = SqlDatabase::Connection::create(db_name);
    transaction = conn->transaction();

    transaction->execute("drop table if exists syscalls_made;");
    transaction->execute("create table syscalls_made (caller integer references semantic_functions(id),"
                         " syscall_id integer, syscall_name text)");

    std::cout << "database name is : " << std::string(argv[argno]) << std::endl;
    std::string specimen_name = argv[argno++];

    // Parse the binary specimen
    SgAsmInterpretation *interp = open_specimen(specimen_name, argv0, link);
    assert(interp!=NULL);

    // Figure out what functions need to be added to the database.
    std::vector<SgAsmFunction*> all_functions = SageInterface::querySubTree<SgAsmFunction>(interp);
    DirectedGraph* G = create_reachability_graph(all_functions, interp);
    add_calls_to_syscalls_to_db(transaction, G, all_functions);
    analyze_data(transaction);
    transaction->commit();
    return 0;
}
int
main(int argc, char *argv[])
{
    std::ios::sync_with_stdio();
    argv0 = argv[0];
    {
        size_t slash = argv0.rfind('/');
        argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1);
        if (0==argv0.substr(0, 3).compare("lt-"))
            argv0 = argv0.substr(3);
    }

    bool ignore_inline_candidates = false;
    bool ignore_no_compares = false;
    int  call_depth = -1;
    bool ignore_faults = true;
    double semantic_similarity_threshold = 0.70;
    bool expand_ncalls = false;
    bool reachability_graph = true;
    bool show_progress = false;
    bool verbose = false;
    std::string input_file_name;

    int argno = 1;
    for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) {
        if (!strcmp(argv[argno], "--")) {
            ++argno;
            break;
        } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) {
            usage(0);
        } else if (!strcmp(argv[argno], "--ignore-inline-candidates")) {
            ignore_inline_candidates = true;
        } else if (!strcmp(argv[argno], "--ignore-no-compares")) {
            ignore_no_compares = false;
        } else if (!strcmp(argv[argno], "--progress")) {
            show_progress = true;
        } else if (!strcmp(argv[argno], "--no-expand-ncalls")) {
            expand_ncalls = false;
        } else if (!strncmp(argv[argno], "--file=", 7)) {
            input_file_name = argv[argno]+7;
        } else if (!strcmp(argv[argno], "--verbose")) {
            verbose = true;
        } else if (!strncmp(argv[argno], "--call-depth=",13)) {
            call_depth = strtol(argv[argno]+13, NULL, 0);
        } else {
            std::cerr <<argv0 <<": unknown switch: " <<argv[argno] <<"\n"
                      <<argv0 <<": see --help for more info\n";
            exit(1);
        }
    }
    if (argno+1!=argc)
        usage(1);

    SqlDatabase::ConnectionPtr conn = SqlDatabase::Connection::create(argv[argno++]);
    transaction = conn->transaction();
    int64_t cmd_id = CloneDetection::start_command(transaction, argc, argv, "calculating api similarity");

    // Read function pairs from standard input or the file
    FunctionPairs worklist;
    if (input_file_name.empty()) {
        std::cerr <<argv0 <<": reading function pairs worklist from stdin...\n";
        worklist = load_worklist("stdin", stdin);
    } else {
        FILE *in = fopen(input_file_name.c_str(), "r");
        if (NULL==in) {
            std::cerr <<argv0 <<": " <<strerror(errno) <<": " << input_file_name <<"\n";
            exit(1);
        }
        worklist = load_worklist(input_file_name, in);
        fclose(in);
    }
    size_t npairs = worklist.size();
    std::cerr <<argv0 <<": work list has " <<npairs <<" function pair" <<(1==npairs?"":"s") <<"\n";

    // Process each function pair
    CloneDetection::Progress progress(npairs);
    progress.force_output(show_progress);

    // Load the computational equivalence classes
    std::map<int,int> norm_map;
    computational_equivalent_classes(norm_map);

    // Create list of functions and igroups to analyze
    SqlDatabase::StatementPtr insert_stmt = transaction->statement("insert into api_call_similarity"
                                                                   "(func1_id, func2_id, max_similarity, min_similarity,"
                                                                   " ave_similarity, cg_similarity)"
                                                                   " values (?, ?, ?, ?, ?, ?)");
    while (!worklist.empty()) {
        ++progress;
        int func1_id, func2_id;
        boost::tie(func1_id, func2_id) = worklist.shift();
        if (verbose)
            std::cerr <<argv0 <<": func1_id=" <<func1_id <<" func2_id=" <<func2_id <<"\n";

        SqlDatabase::StatementPtr igroup_stmt = transaction->statement("select distinct sem1.igroup_id"
                                                                       " from semantic_fio as sem1 "
                                                                       " join semantic_fio as sem2"
                                                                       "   on sem2.igroup_id = sem1.igroup_id"
                                                                       "   and sem2.func_id = ?"
                                                                       " where sem1.func_id = ? " +
                                                                       std::string(ignore_faults ?
                                                                                   " and sem1.status = 0 and sem2.status = 0" :
                                                                                   "") +
                                                                       " order by sem1.igroup_id");
        igroup_stmt->bind(0, func2_id);
        igroup_stmt->bind(1, func1_id);

        int ncompares = 0;
        double max_api_similarity = 0;
        double min_api_similarity = INT_MAX;
        double ave_api_similarity = 0;
        for (SqlDatabase::Statement::iterator row=igroup_stmt->begin(); row!=igroup_stmt->end(); ++row) {
            int igroup_id = row.get<int>(0);
            double api_similarity = similarity(func1_id, func2_id, igroup_id, semantic_similarity_threshold,
                                               ignore_inline_candidates, ignore_no_compares, call_depth, expand_ncalls, norm_map);

            if (api_similarity < 0)
                continue;

            max_api_similarity = std::max(api_similarity, max_api_similarity);
            min_api_similarity = std::min(api_similarity, min_api_similarity);
            ave_api_similarity += api_similarity;
            ncompares++;
        }

        if (ncompares == 0) {
            ave_api_similarity = 1.0;
            max_api_similarity = 1.0;
            min_api_similarity = 1.0;
        } else {
            ave_api_similarity = ave_api_similarity/ncompares;
        }

        // Find call similarity between functions
        double cg_similarity = whole_function_similarity(func1_id, func2_id, norm_map, reachability_graph);

        insert_stmt->bind(0, func1_id);
        insert_stmt->bind(1, func2_id);
        insert_stmt->bind(2, max_api_similarity);
        insert_stmt->bind(3, min_api_similarity);
        insert_stmt->bind(4, ave_api_similarity);
        insert_stmt->bind(5, cg_similarity);

        insert_stmt->execute();
    }

    progress.message("committing changes");
    std::string mesg = "calculated api similarity for "+
                       StringUtility::numberToString(npairs)+" function pair"+(1==npairs?"":"s");
    CloneDetection::finish_command(transaction, cmd_id, mesg);
    transaction->commit();
    progress.clear();
    return 0;
}
int
main(int argc, char *argv[])
{
    std::ios::sync_with_stdio();
    argv0 = argv[0];
    {
        size_t slash = argv0.rfind('/');
        argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1);
        if (0==argv0.substr(0, 3).compare("lt-"))
            argv0 = argv0.substr(3);
    }

    int argno = 1;
    for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) {
        if (!strcmp(argv[argno], "--")) {
            ++argno;
            break;
        } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) {
            ::usage(0);
        } else if (!strcmp(argv[argno], "--delete")) {
            opt.delete_old_data = true;
        } else if (!strncmp(argv[argno], "--exclude-functions=", 20)) {
            opt.exclude_functions_table = argv[argno]+20;
        } else if (!strcmp(argv[argno], "--no-delete")) {
            opt.delete_old_data = false;
        } else if (!strncmp(argv[argno], "--relation=", 11)) {
            opt.relation_id = strtol(argv[argno]+11, NULL, 0);
        } else {
            std::cerr <<argv0 <<": unknown switch: " <<argv[argno] <<"\n"
                      <<argv0 <<": see --help for more info\n";
            exit(1);
        }
    };
    if (argno+1!=argc)
        ::usage(1);
    time_t start_time = time(NULL);
    SqlDatabase::ConnectionPtr conn = SqlDatabase::Connection::create(argv[argno++]);
    SqlDatabase::TransactionPtr tx = conn->transaction();

    // Save ourself in the history if we're modifying the database.
    int64_t cmd_id=-1;
    if (opt.delete_old_data)
        cmd_id = CloneDetection::start_command(tx, argc, argv, "clearing funcsim data for relation #"+
                                               StringUtility::numberToString(opt.relation_id), start_time);

    // The 32-func-similarity tool needs this index, so we might as well create it here when we're running serially.  The
    // semantic_outputvalues table can be HUGE depending on how the analysis is configured (i.e., whether it saves output
    // values as a vector or set, whether it saves function calls and system calls, etc.).  Since creating the index could take
    // a few minutes, we'd rather not create it if it alread exists, but PostgreSQL v8 doesn't have a "CREATE INDEX IF NOT
    // EXISTS" ability.  Therefore, try to create the index right away before we make any other changes, and if creation fails
    // then start a new transaction (because the current one is hosed).
    std::cerr <<argv0 <<": creating output group index (could take a while)\n";
    try {
        SqlDatabase::TransactionPtr tx = conn->transaction();
        tx->execute("create index idx_ogroups_hashkey on semantic_outputvalues(hashkey)");
        tx->commit();
    } catch (const SqlDatabase::Exception&) {
        std::cerr <<argv0 <<": idx_ogroups_hashkey index already exists; NOT dropping and recreating\n";
    }

    // Delete old data.
    if (opt.delete_old_data)
        tx->statement("delete from semantic_funcsim where relation_id = ?")->bind(0, opt.relation_id)->execute();

    // Get the list of functions that should appear in the worklist.
    std::cerr <<argv0 <<": obtaining function list\n";
    std::string stmt1 = "create temporary table tmp_tested_funcs as"
                        " select distinct fio.func_id as func_id"
                        " from semantic_fio as fio";
    if (!opt.exclude_functions_table.empty()) {
        std::vector<std::string> parts = StringUtility::split('.', opt.exclude_functions_table, 2, true);
        if (parts.size()<2)
            parts.push_back("func_id");
        stmt1 += " left join " + parts.front() + " as exclude"
                 " on fio.func_id = exclude." + parts.back() +
                 " where exclude." + parts.back() + " is null";
    }
    tx->execute(stmt1);

    // Create pairs of function IDs for those functions which have been tested and for which no similarity measurement has been
    // computed.  (FIXME: We should probably recompute similarity that might have changed due to rerunning tests or running the
    // same function but with more input groups. [Robb P. Matzke 2013-06-19])
    std::cerr <<argv0 <<": creating work list\n";
    SqlDatabase::StatementPtr stmt2 = tx->statement("select distinct f1.func_id as func1_id, f2.func_id as func2_id"
                                                    " from tmp_tested_funcs as f1"
                                                    " join tmp_tested_funcs as f2 on f1.func_id < f2.func_id"
                                                    " except"
                                                    " select func1_id, func2_id from semantic_funcsim as sim"
                                                    " where sim.relation_id = ?");
    stmt2->bind(0, opt.relation_id);
    for (SqlDatabase::Statement::iterator row=stmt2->begin(); row!=stmt2->end(); ++row)
        std::cout <<row.get<int>(0) <<"\t" <<row.get<int>(1) <<"\n";

    if (cmd_id>=0)
        CloneDetection::finish_command(tx, cmd_id, "cleared funcsim table for relation #"+
                                       StringUtility::numberToString(opt.relation_id));

    tx->commit();
    return 0;
}