Exemplo n.º 1
0
/* Remove the functions from the compilation unit that is only available in one of the traces.
 *   - criteria complement of the functions from the files of the caller functions in the call trace is removed. */
std::pair<CallVec*, CallVec*>
remove_compilation_unit_complement(int func1_id, int func2_id, int igroup_id, int similarity, CallVec* func1_vec,
                                   CallVec* func2_vec)
{
    CallVec* new_func1_vec = new CallVec;
    CallVec* new_func2_vec = new CallVec;

    if (func1_vec->size() > 0 || func2_vec->size() > 0) {
        // Find the set complement of functions called by the two functions
        // - we are not interested in functions called by both
        std::set<int> func1_vec_set;
        std::set<int> func2_vec_set;

        for (CallVec::iterator it = func1_vec->begin(); it != func1_vec->end(); ++it)
            func1_vec_set.insert(*it);
        for (CallVec::iterator it = func2_vec->begin(); it != func2_vec->end(); ++it)
            func2_vec_set.insert(*it);

        std::set<int> func1_func2_complement;
        std::set_difference(func1_vec_set.begin(), func1_vec_set.end(), func2_vec_set.begin(), func2_vec_set.end(),
                            std::inserter(func1_func2_complement, func1_func2_complement.end()));

        // Find the compilation units in question. A compilation unit is in our case a file.
        SqlDatabase::StatementPtr func1_file_stmt = transaction->statement("select file_id from semantic_functions"
                                                                           " where id = ?");
        func1_file_stmt->bind(0, func1_id);
        int func1_file_id = func1_file_stmt->execute_int();

        SqlDatabase::StatementPtr func2_file_stmt = transaction->statement("select file_id from semantic_functions"
                                                                           " where id = ?");
        func2_file_stmt->bind(0, func2_id);
        int func2_file_id = func2_file_stmt->execute_int();

        // Find the functions that needs to be removed
        //  - all functions that has a clone in between the files
        SqlDatabase::StatementPtr stmt = transaction->statement("select sem.func1_id, sem.func2_id from semantic_funcsim as sem"
                                                                " join semantic_functions as sf1 on sem.func1_id = sf1.id"
                                                                " join semantic_functions as sf2 on sem.func2_id = sf2.id"
                                                                " where similarity >= ? and sf1.file_id in (?,?)"
                                                                "   and sf2.file_id in (?, ?) and sf1.file_id != sf2.file_id");
        stmt->bind(0, similarity);
        stmt->bind(1, func1_file_id);
        stmt->bind(2, func2_file_id);
        stmt->bind(3, func1_file_id);
        stmt->bind(4, func2_file_id);

        std::set<int> complement_functions;
        for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) {
            int clone_func1 = row.get<int>(0);
            int clone_func2 = row.get<int>(1);

            complement_functions.insert(clone_func1);
            complement_functions.insert(clone_func2);
        }

        // Find the functions we want to remove
        //  - functions present with clones in between the files that is not part of both traces
        std::set<int> remove_these;
        std::set_intersection(complement_functions.begin(), complement_functions.end(), func1_func2_complement.begin(),
                              func1_func2_complement.end(), std::inserter(remove_these, remove_these.end()));

        //prune functions to remove away from the call trace into new vectors
        for (CallVec::iterator it = func1_vec->begin(); it != func1_vec->end(); ++it) {
            if (remove_these.find(*it) == remove_these.end())
                new_func1_vec->push_back(*it);
        }

        for (CallVec::iterator it = func2_vec->begin(); it != func2_vec->end(); ++it) {
            if (remove_these.find(*it) == remove_these.end())
                new_func2_vec->push_back(*it);
        }
    }
    return std::pair<CallVec*, CallVec*>(new_func1_vec, new_func2_vec);
}
int
main(int argc, char *argv[])
{
    std::ios::sync_with_stdio();
    argv0 = argv[0];
    {
        size_t slash = argv0.rfind('/');
        argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1);
        if (0==argv0.substr(0, 3).compare("lt-"))
            argv0 = argv0.substr(3);
    }

    Switches opt;
    int argno = 1;
    for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) {
        if (!strcmp(argv[argno], "--")) {
            ++argno;
            break;
        } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) {
            ::usage(0);
        } else if (!strncmp(argv[argno], "--entry=", 8)) {
            opt.entry_vas.insert(strtoull(argv[argno]+8, NULL, 0));
        } else if (!strcmp(argv[argno], "--file=list") || !strcmp(argv[argno], "--files=list")) {
            opt.list_files = true;
        } else if (!strncmp(argv[argno], "--file=", 7) || !strncmp(argv[argno], "--files=", 8)) {
            std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true);
            for (size_t i=0; i<ids.size(); ++i) {
                const char *s = ids[i].c_str();
                char *rest;
                errno = 0;
                int id = strtoul(s, &rest, 0);
                if (errno || rest==s || *rest) {
                    std::cerr <<argv0 <<": invalid file ID: " <<ids[i] <<"\n";
                    exit(1);
                }
                opt.files.insert(id);
            }
        } else if (!strncmp(argv[argno], "--function=", 11) || !strncmp(argv[argno], "--functions=", 12)) {
            std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true);
            if (ids.size()==1 && isalpha(ids[0][0]) && ids[0].find_first_of('.')!=std::string::npos) {
                std::vector<std::string> words = StringUtility::split(".", ids[0]);
                if (words.size()!=2 ||
                    !SqlDatabase::is_valid_table_name(words[0]) || !SqlDatabase::is_valid_table_name(words[1])) {
                    std::cerr <<argv0 <<": --function switch needs either IDs or a database TABLE.COLUMN\n";
                    exit(1);
                }
                opt.function_table = words[0];
                opt.function_column = words[1];
            } else {
                for (size_t i=0; i<ids.size(); ++i) {
                    const char *s = ids[i].c_str();
                    char *rest;
                    errno = 0;
                    int id = strtoul(s, &rest, 0);
                    if (errno || rest==s || *rest) {
                        std::cerr <<argv0 <<": invalid function ID: " <<ids[i] <<"\n";
                        exit(1);
                    }
                    opt.functions.insert(id);
                }
            }
        } else if (!strncmp(argv[argno], "--first-fuzz=", 13)) {
            opt.first_fuzz = strtoul(argv[argno]+13, NULL, 0);
        } else if (!strncmp(argv[argno], "--name=", 7)) {
            opt.names.insert(argv[argno]+7);
        } else if (!strncmp(argv[argno], "--nfuzz=", 8)) {
            opt.nfuzz = strtoul(argv[argno]+8, NULL, 0);
            opt.nfuzz_set = true;
        } else if (!strncmp(argv[argno], "--size=", 7)) {
            opt.ninsns = strtoul(argv[argno]+7, NULL, 0);
        } else if (!strcmp(argv[argno], "--specimen=list") || !strcmp(argv[argno], "--specimens=list")) {
            opt.list_specimens = true;
        } else if (!strncmp(argv[argno], "--specimen=", 11) || !strncmp(argv[argno], "--specimens=", 12)) {
            std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true);
            for (size_t i=0; i<ids.size(); ++i) {
                const char *s = ids[i].c_str();
                char *rest;
                errno = 0;
                int id = strtoul(s, &rest, 0);
                if (errno || rest==s || *rest) {
                    std::cerr <<argv0 <<": invalid specimen ID: " <<ids[i] <<"\n";
                    exit(1);
                }
                opt.specimens.insert(id);
            }
        } else {
            std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n"
                      <<"see \"" <<argv0 <<" --help\" for usage info.\n";
            exit(1);
        }
    }
    if (argno+1!=argc)
        ::usage(1);
    SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(argv[argno++])->transaction();

    // List the ID numbers and names for all specimen files
    if (opt.list_specimens) {
        SqlDatabase::Table<int, std::string> specimens;
        specimens.insert(tx->statement("select file.id, file.name"
                                       " from (select distinct specimen_id as id from semantic_functions) as specimen"
                                       " join semantic_files as file on specimen.id = file.id"
                                       " order by file.name"));
        specimens.headers("File ID", "Specimen Name");
        specimens.print(std::cout);
        return 0;
    }

    // List the ID numbers and names for all files containing functions
    if (opt.list_files) {
        SqlDatabase::Table<int, std::string> files;
        files.insert(tx->statement("select file.id, file.name"
                                   " from (select distinct file_id as id from semantic_functions) as used"
                                   " join semantic_files as file on used.id = file.id"
                                   " order by file.name"));
        files.headers("File ID", "Binary File Name");
        files.print(std::cout);
        return 0;
    }

    // Sanity checks
    if (!opt.functions.empty() && !opt.function_table.empty()) {
        std::cerr <<argv0 <<": --function=ID and --function=TABLE are mutually exclusive\n";
        exit(1);
    }
    if (0==tx->statement("select count(*) from semantic_functions")->execute_int()) {
        std::cerr <<argv0 <<": database has no functions; nothing to test\n";
        return 0;
    }
    if (0==tx->statement("select count(*) from semantic_inputvalues")->execute_int()) {
        std::cerr <<argv0 <<": database has no input groups; nothing to test\n";
        return 0;
    }

    // Create table tmp_functions containing IDs for selected functions and their specimen IDs
    std::vector<std::string> constraints;
    if (!opt.entry_vas.empty())
        constraints.push_back("func.entry_va " + SqlDatabase::in(opt.entry_vas));
    if (!opt.names.empty())
        constraints.push_back("func.name " + SqlDatabase::in_strings(opt.names, tx->driver()));
    if (!opt.specimens.empty())
        constraints.push_back("func.specimen_id " + SqlDatabase::in(opt.specimens));
    if (!opt.files.empty())
        constraints.push_back("func.file_id " + SqlDatabase::in(opt.files));
    if (!opt.functions.empty())
        constraints.push_back("func.id " + SqlDatabase::in(opt.functions));
    if (opt.ninsns>0)
        constraints.push_back("func.ninsns >= " + StringUtility::numberToString(opt.ninsns));
    std::string sql1 = "select func.id, func.specimen_id from semantic_functions as func";
    if (!opt.function_table.empty())
        sql1 += " join "+opt.function_table+" as flist on func.id = flist."+opt.function_column;
    if (!constraints.empty())
        sql1 += " where " + StringUtility::join(" and ", constraints);
    tx->execute("create temporary table tmp_functions as " + sql1);

    // Create table tmp_inputgroups containing IDs for selected input groups
    std::string sql2 = "select distinct igroup_id from semantic_inputvalues where igroup_id >= " +
                       StringUtility::numberToString(opt.first_fuzz);
    if (opt.nfuzz_set)
        sql2 += " and igroup_id < " + StringUtility::numberToString(opt.first_fuzz+opt.nfuzz);
    tx->execute("create temporary table tmp_inputgroups as " + sql2);

    // Create tmp_pending as the cross product of functions and inputgroups except for those already tested
    tx->execute("create temporary table tmp_pending as"
                "    select func.specimen_id as specimen_id, func.id as func_id, igroup.igroup_id as igroup_id"
                "      from tmp_functions as func"
                "      join tmp_inputgroups as igroup"
                "      on igroup.igroup_id is not null" // "on" clause and "is not null" (rather than "true") for portability
                "  except"
                "    select func.specimen_id, func.id, fio.igroup_id"
                "      from semantic_fio as fio"
                "      join semantic_functions as func on fio.func_id=func.id");
    SqlDatabase::StatementPtr stmt = tx->statement("select distinct specimen_id, func_id, igroup_id"
                                                   " from tmp_pending"
                                                   " order by specimen_id, igroup_id, func_id");
    for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row)
        std::cout <<row.get<int>(0) <<"\t" <<row.get<int>(1) <<"\t" <<row.get<int>(2) <<"\n";

    // no need to commit, but if we change this in the future, be sure to add begin_command()/finish_command()
    return 0;
}