int main(int argc, char *argv[]) { std::ios::sync_with_stdio(); argv0 = argv[0]; { size_t slash = argv0.rfind('/'); argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1); if (0==argv0.substr(0, 3).compare("lt-")) argv0 = argv0.substr(3); } int argno = 1; bool link = false; std::vector<std::string> signature_components; for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) { std::cout << argv[argno] << std::endl; if (!strcmp(argv[argno], "--")) { ++argno; break; } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) { ::usage(0); } else if (!strcmp(argv[argno], "--link")) { link = true; } else if (!strcmp(argv[argno], "--no-link")) { link = false; } else { std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n" <<"see \"" <<argv0 <<" --help\" for usage info.\n"; exit(1); } } if (argno+2!=argc) ::usage(1); std::string db_name(argv[argno++]); std::cout << "Connecting to db:" << db_name << std::endl; SqlDatabase::ConnectionPtr conn = SqlDatabase::Connection::create(db_name); transaction = conn->transaction(); transaction->execute("drop table if exists syscalls_made;"); transaction->execute("create table syscalls_made (caller integer references semantic_functions(id)," " syscall_id integer, syscall_name text)"); std::cout << "database name is : " << std::string(argv[argno]) << std::endl; std::string specimen_name = argv[argno++]; // Parse the binary specimen SgAsmInterpretation *interp = open_specimen(specimen_name, argv0, link); assert(interp!=NULL); // Figure out what functions need to be added to the database. std::vector<SgAsmFunction*> all_functions = SageInterface::querySubTree<SgAsmFunction>(interp); DirectedGraph* G = create_reachability_graph(all_functions, interp); add_calls_to_syscalls_to_db(transaction, G, all_functions); analyze_data(transaction); transaction->commit(); return 0; }
int main(int argc, char *argv[]) { std::ios::sync_with_stdio(); argv0 = argv[0]; { size_t slash = argv0.rfind('/'); argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1); if (0==argv0.substr(0, 3).compare("lt-")) argv0 = argv0.substr(3); } bool ignore_inline_candidates = false; bool ignore_no_compares = false; int call_depth = -1; bool ignore_faults = true; double semantic_similarity_threshold = 0.70; bool expand_ncalls = false; bool reachability_graph = true; bool show_progress = false; bool verbose = false; std::string input_file_name; int argno = 1; for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) { if (!strcmp(argv[argno], "--")) { ++argno; break; } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) { usage(0); } else if (!strcmp(argv[argno], "--ignore-inline-candidates")) { ignore_inline_candidates = true; } else if (!strcmp(argv[argno], "--ignore-no-compares")) { ignore_no_compares = false; } else if (!strcmp(argv[argno], "--progress")) { show_progress = true; } else if (!strcmp(argv[argno], "--no-expand-ncalls")) { expand_ncalls = false; } else if (!strncmp(argv[argno], "--file=", 7)) { input_file_name = argv[argno]+7; } else if (!strcmp(argv[argno], "--verbose")) { verbose = true; } else if (!strncmp(argv[argno], "--call-depth=",13)) { call_depth = strtol(argv[argno]+13, NULL, 0); } else { std::cerr <<argv0 <<": unknown switch: " <<argv[argno] <<"\n" <<argv0 <<": see --help for more info\n"; exit(1); } } if (argno+1!=argc) usage(1); SqlDatabase::ConnectionPtr conn = SqlDatabase::Connection::create(argv[argno++]); transaction = conn->transaction(); int64_t cmd_id = CloneDetection::start_command(transaction, argc, argv, "calculating api similarity"); // Read function pairs from standard input or the file FunctionPairs worklist; if (input_file_name.empty()) { std::cerr <<argv0 <<": reading function pairs worklist from stdin...\n"; worklist = load_worklist("stdin", stdin); } else { FILE *in = fopen(input_file_name.c_str(), "r"); if (NULL==in) { std::cerr <<argv0 <<": " <<strerror(errno) <<": " << input_file_name <<"\n"; exit(1); } worklist = load_worklist(input_file_name, in); fclose(in); } size_t npairs = worklist.size(); std::cerr <<argv0 <<": work list has " <<npairs <<" function pair" <<(1==npairs?"":"s") <<"\n"; // Process each function pair CloneDetection::Progress progress(npairs); progress.force_output(show_progress); // Load the computational equivalence classes std::map<int,int> norm_map; computational_equivalent_classes(norm_map); // Create list of functions and igroups to analyze SqlDatabase::StatementPtr insert_stmt = transaction->statement("insert into api_call_similarity" "(func1_id, func2_id, max_similarity, min_similarity," " ave_similarity, cg_similarity)" " values (?, ?, ?, ?, ?, ?)"); while (!worklist.empty()) { ++progress; int func1_id, func2_id; boost::tie(func1_id, func2_id) = worklist.shift(); if (verbose) std::cerr <<argv0 <<": func1_id=" <<func1_id <<" func2_id=" <<func2_id <<"\n"; SqlDatabase::StatementPtr igroup_stmt = transaction->statement("select distinct sem1.igroup_id" " from semantic_fio as sem1 " " join semantic_fio as sem2" " on sem2.igroup_id = sem1.igroup_id" " and sem2.func_id = ?" " where sem1.func_id = ? " + std::string(ignore_faults ? " and sem1.status = 0 and sem2.status = 0" : "") + " order by sem1.igroup_id"); igroup_stmt->bind(0, func2_id); igroup_stmt->bind(1, func1_id); int ncompares = 0; double max_api_similarity = 0; double min_api_similarity = INT_MAX; double ave_api_similarity = 0; for (SqlDatabase::Statement::iterator row=igroup_stmt->begin(); row!=igroup_stmt->end(); ++row) { int igroup_id = row.get<int>(0); double api_similarity = similarity(func1_id, func2_id, igroup_id, semantic_similarity_threshold, ignore_inline_candidates, ignore_no_compares, call_depth, expand_ncalls, norm_map); if (api_similarity < 0) continue; max_api_similarity = std::max(api_similarity, max_api_similarity); min_api_similarity = std::min(api_similarity, min_api_similarity); ave_api_similarity += api_similarity; ncompares++; } if (ncompares == 0) { ave_api_similarity = 1.0; max_api_similarity = 1.0; min_api_similarity = 1.0; } else { ave_api_similarity = ave_api_similarity/ncompares; } // Find call similarity between functions double cg_similarity = whole_function_similarity(func1_id, func2_id, norm_map, reachability_graph); insert_stmt->bind(0, func1_id); insert_stmt->bind(1, func2_id); insert_stmt->bind(2, max_api_similarity); insert_stmt->bind(3, min_api_similarity); insert_stmt->bind(4, ave_api_similarity); insert_stmt->bind(5, cg_similarity); insert_stmt->execute(); } progress.message("committing changes"); std::string mesg = "calculated api similarity for "+ StringUtility::numberToString(npairs)+" function pair"+(1==npairs?"":"s"); CloneDetection::finish_command(transaction, cmd_id, mesg); transaction->commit(); progress.clear(); return 0; }
int main(int argc, char *argv[]) { std::ios::sync_with_stdio(); argv0 = argv[0]; { size_t slash = argv0.rfind('/'); argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1); if (0==argv0.substr(0, 3).compare("lt-")) argv0 = argv0.substr(3); } int argno = 1; for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) { if (!strcmp(argv[argno], "--")) { ++argno; break; } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) { ::usage(0); } else if (!strcmp(argv[argno], "--delete")) { opt.delete_old_data = true; } else if (!strncmp(argv[argno], "--exclude-functions=", 20)) { opt.exclude_functions_table = argv[argno]+20; } else if (!strcmp(argv[argno], "--no-delete")) { opt.delete_old_data = false; } else if (!strncmp(argv[argno], "--relation=", 11)) { opt.relation_id = strtol(argv[argno]+11, NULL, 0); } else { std::cerr <<argv0 <<": unknown switch: " <<argv[argno] <<"\n" <<argv0 <<": see --help for more info\n"; exit(1); } }; if (argno+1!=argc) ::usage(1); time_t start_time = time(NULL); SqlDatabase::ConnectionPtr conn = SqlDatabase::Connection::create(argv[argno++]); SqlDatabase::TransactionPtr tx = conn->transaction(); // Save ourself in the history if we're modifying the database. int64_t cmd_id=-1; if (opt.delete_old_data) cmd_id = CloneDetection::start_command(tx, argc, argv, "clearing funcsim data for relation #"+ StringUtility::numberToString(opt.relation_id), start_time); // The 32-func-similarity tool needs this index, so we might as well create it here when we're running serially. The // semantic_outputvalues table can be HUGE depending on how the analysis is configured (i.e., whether it saves output // values as a vector or set, whether it saves function calls and system calls, etc.). Since creating the index could take // a few minutes, we'd rather not create it if it alread exists, but PostgreSQL v8 doesn't have a "CREATE INDEX IF NOT // EXISTS" ability. Therefore, try to create the index right away before we make any other changes, and if creation fails // then start a new transaction (because the current one is hosed). std::cerr <<argv0 <<": creating output group index (could take a while)\n"; try { SqlDatabase::TransactionPtr tx = conn->transaction(); tx->execute("create index idx_ogroups_hashkey on semantic_outputvalues(hashkey)"); tx->commit(); } catch (const SqlDatabase::Exception&) { std::cerr <<argv0 <<": idx_ogroups_hashkey index already exists; NOT dropping and recreating\n"; } // Delete old data. if (opt.delete_old_data) tx->statement("delete from semantic_funcsim where relation_id = ?")->bind(0, opt.relation_id)->execute(); // Get the list of functions that should appear in the worklist. std::cerr <<argv0 <<": obtaining function list\n"; std::string stmt1 = "create temporary table tmp_tested_funcs as" " select distinct fio.func_id as func_id" " from semantic_fio as fio"; if (!opt.exclude_functions_table.empty()) { std::vector<std::string> parts = StringUtility::split('.', opt.exclude_functions_table, 2, true); if (parts.size()<2) parts.push_back("func_id"); stmt1 += " left join " + parts.front() + " as exclude" " on fio.func_id = exclude." + parts.back() + " where exclude." + parts.back() + " is null"; } tx->execute(stmt1); // Create pairs of function IDs for those functions which have been tested and for which no similarity measurement has been // computed. (FIXME: We should probably recompute similarity that might have changed due to rerunning tests or running the // same function but with more input groups. [Robb P. Matzke 2013-06-19]) std::cerr <<argv0 <<": creating work list\n"; SqlDatabase::StatementPtr stmt2 = tx->statement("select distinct f1.func_id as func1_id, f2.func_id as func2_id" " from tmp_tested_funcs as f1" " join tmp_tested_funcs as f2 on f1.func_id < f2.func_id" " except" " select func1_id, func2_id from semantic_funcsim as sim" " where sim.relation_id = ?"); stmt2->bind(0, opt.relation_id); for (SqlDatabase::Statement::iterator row=stmt2->begin(); row!=stmt2->end(); ++row) std::cout <<row.get<int>(0) <<"\t" <<row.get<int>(1) <<"\n"; if (cmd_id>=0) CloneDetection::finish_command(tx, cmd_id, "cleared funcsim table for relation #"+ StringUtility::numberToString(opt.relation_id)); tx->commit(); return 0; }