void compute_percent_similarity_statistics(double bucket_size, double increment, SqlDatabase::TransactionPtr transaction) { int num_pairs = transaction->statement("select count(*) from semantic_funcsim")->execute_int(); transaction->execute("drop table IF EXISTS fr_percent_similar"); transaction->execute("create table fr_percent_similar(similarity_low double precision, similarity_middle double precision," " similarity_high double precision, percent double precision, num_matches integer);"); SqlDatabase::StatementPtr pecent_similar_stmt = transaction->statement("insert into fr_percent_similar" // 0 1 2 "(similarity_low, similarity_middle, similarity_high," // 3 4 " percent, num_matches) " " values (?, ?, ?, ?, ?)"); for (double cur_bucket = 0.0; cur_bucket <= 1.0+bucket_size; cur_bucket+=increment) { int num_matches = transaction->statement("select count(*) from semantic_funcsim where " " similarity >= " + boost::lexical_cast<std::string>(cur_bucket - bucket_size) + " and similarity < " + boost::lexical_cast<std::string>(cur_bucket + bucket_size))->execute_int(); pecent_similar_stmt->bind(0, cur_bucket - bucket_size < 0 ? 0 : cur_bucket - bucket_size); pecent_similar_stmt->bind(1, cur_bucket); pecent_similar_stmt->bind(2, cur_bucket + bucket_size >= 1.0 ? 1.0 : cur_bucket + bucket_size); pecent_similar_stmt->bind(3, num_pairs > 0 ? ((double) num_matches*100.0)/num_pairs : 0); pecent_similar_stmt->bind(4, num_matches); pecent_similar_stmt->execute(); } }
int main(int argc, char *argv[]) { std::ios::sync_with_stdio(); argv0 = argv[0]; { size_t slash = argv0.rfind('/'); argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1); if (0==argv0.substr(0, 3).compare("lt-")) argv0 = argv0.substr(3); } int argno = 1; bool link = false; std::vector<std::string> signature_components; for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) { std::cout << argv[argno] << std::endl; if (!strcmp(argv[argno], "--")) { ++argno; break; } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) { ::usage(0); } else if (!strcmp(argv[argno], "--link")) { link = true; } else if (!strcmp(argv[argno], "--no-link")) { link = false; } else { std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n" <<"see \"" <<argv0 <<" --help\" for usage info.\n"; exit(1); } } if (argno+2!=argc) ::usage(1); std::string db_name(argv[argno++]); std::cout << "Connecting to db:" << db_name << std::endl; SqlDatabase::ConnectionPtr conn = SqlDatabase::Connection::create(db_name); transaction = conn->transaction(); transaction->execute("drop table if exists syscalls_made;"); transaction->execute("create table syscalls_made (caller integer references semantic_functions(id)," " syscall_id integer, syscall_name text)"); std::cout << "database name is : " << std::string(argv[argno]) << std::endl; std::string specimen_name = argv[argno++]; // Parse the binary specimen SgAsmInterpretation *interp = open_specimen(specimen_name, argv0, link); assert(interp!=NULL); // Figure out what functions need to be added to the database. std::vector<SgAsmFunction*> all_functions = SageInterface::querySubTree<SgAsmFunction>(interp); DirectedGraph* G = create_reachability_graph(all_functions, interp); add_calls_to_syscalls_to_db(transaction, G, all_functions); analyze_data(transaction); transaction->commit(); return 0; }
void get_run_parameters(const SqlDatabase::TransactionPtr &tx, int& windowSize, int& stride) { windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int(); stride = tx->statement("select stride from run_parameters limit 1")->execute_int(); assert (windowSize != 0); assert (stride != 0); }
void operator()() { if (work.empty()) return; int specimen_id = work.front().specimen_id; // Database connections don't survive over fork() according to SqLite and PostgreSQL documentation, so open it again SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(databaseUrl)->transaction(); OutputGroups ogroups; // do not load from database (that might take a very long time) if (opt.verbosity>=LACONIC) { if (opt.verbosity>=EFFUSIVE) std::cerr <<argv0 <<": " <<std::string(100, '#') <<"\n"; std::cerr <<argv0 <<": processing binary specimen \"" <<files.name(specimen_id) <<"\"\n"; } // Parse the specimen SgProject *project = files.load_ast(tx, specimen_id); if (!project) project = open_specimen(tx, files, specimen_id, argv0); if (!project) { std::cerr <<argv0 <<": problems loading specimen\n"; exit(1); } // Get list of specimen functions and initialize the instruction cache std::vector<SgAsmFunction*> all_functions = SageInterface::querySubTree<SgAsmFunction>(project); IdFunctionMap functions = existing_functions(tx, files, all_functions); FunctionIdMap function_ids; AddressIdMap entry2id; // maps function entry address to function ID for (IdFunctionMap::iterator fi=functions.begin(); fi!=functions.end(); ++fi) { function_ids[fi->second] = fi->first; entry2id[fi->second->get_entry_va()] = fi->first; } InstructionProvidor insns = InstructionProvidor(all_functions); // Split the work list into chunks, each containing testsPerChunk except the last, which may contain fewer. static const size_t testsPerChunk = 25; size_t nChunks = (work.size() + testsPerChunk - 1) / testsPerChunk; std::vector<SomeTests> jobs; for (size_t i=0; i<nChunks; ++i) { size_t beginWorkIdx = i * testsPerChunk; size_t endWorkIdx = std::min((i+1)*testsPerChunk, work.size()); Work partWork(work.begin()+beginWorkIdx, work.begin()+endWorkIdx); jobs.push_back(SomeTests(partWork, databaseUrl, functions, function_ids, &insns, cmd_id, &entry2id)); } // Run the parts in parallel using the maximum parallelism specified on the command-line. We must commit our // transaction before forking, otherwise the children won't see the rows we've added to various tables. tx->commit(); tx.reset(); size_t nfailed = runInParallel(jobs, opt.nprocs); if (nfailed!=0) { std::cerr <<"SpecimenProcessor: " <<StringUtility::plural(nfailed, "jobs") <<" failed\n"; exit(1); } }
void createVectorsRespectingFunctionBoundaries(SgNode* top, const std::string& filename, size_t windowSize, size_t stride, const SqlDatabase::TransactionPtr &tx) { struct InstructionSelector: SgAsmFunction::NodeSelector { virtual bool operator()(SgNode *node) { return isSgAsmInstruction(node)!=NULL; } } iselector; struct DataSelector: SgAsmFunction::NodeSelector { virtual bool operator()(SgNode *node) { return isSgAsmStaticData(node)!=NULL; } } dselector; SqlDatabase::StatementPtr cmd1 = tx->statement("insert into functions" // 0 1 2 3 4 5 6 " (id, file, function_name, entry_va, isize, dsize, size)" " values(?,?,?,?,?,?,?)"); SqlDatabase::StatementPtr cmd2 = tx->statement("insert into instructions" // 0 1 2 3 4 " (address, size, function_id, index_within_function, assembly)" " values (?,?,?,?,?)"); vector<SgAsmFunction*> funcs = SageInterface::querySubTree<SgAsmFunction>(top); int functionId = tx->statement("select coalesce(max(id),-1)+1 from functions")->execute_int(); // zero origin for (vector<SgAsmFunction*>::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi, ++functionId) { ExtentMap e_insns, e_data, e_total; (*fi)->get_extent(&e_insns, NULL, NULL, &iselector); (*fi)->get_extent(&e_data, NULL, NULL, &dselector); (*fi)->get_extent(&e_total); createVectorsForAllInstructions(*fi, filename, (*fi)->get_name(), functionId, windowSize, stride, tx); cmd1->bind(0, functionId); cmd1->bind(1, filename); cmd1->bind(2, (*fi)->get_name() ); cmd1->bind(3, (*fi)->get_entry_va()); cmd1->bind(4, e_insns.size()); cmd1->bind(5, e_data.size()); cmd1->bind(6, e_total.size()); cmd1->execute(); vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(*fi); for (size_t i=0; i<insns.size(); ++i) { cmd2->bind(0, insns[i]->get_address()); cmd2->bind(1, insns[i]->get_size()); cmd2->bind(2, functionId); cmd2->bind(3, i); cmd2->bind(4, unparseInstructionWithAddress(insns[i])); cmd2->execute(); } } cerr << "Total vectors generated: " << numVectorsGenerated << endl; }
void find_clusters(int max_cluster_size_signed, SqlDatabase::TransactionPtr transaction) { assert(max_cluster_size_signed >= 0); size_t max_cluster_size = max_cluster_size_signed; SqlDatabase::StatementPtr insert_stmt = transaction->statement("insert into fr_ignored_function_pairs" // 0 1 2 "(func1_id, func2_id, from_cluster_of_size)" " values (?, ?, ?)"); //Get all vetexes and find the union std::string _query_condition = "select func1_id, func2_id from fr_clone_pairs"; SqlDatabase::StatementPtr stmt = transaction->statement(_query_condition); if (stmt->begin() == stmt->end()) return; //Count how many vertices we have for boost graph int VERTEX_COUNT = transaction->statement("select count(*) from semantic_functions")->execute_int(); typedef adjacency_list <vecS, vecS, undirectedS> Graph; typedef graph_traits<Graph>::vertex_descriptor Vertex; typedef graph_traits<Graph>::vertices_size_type VertexIndex; Graph graph(VERTEX_COUNT); std::vector<VertexIndex> rank(num_vertices(graph)); std::vector<Vertex> parent(num_vertices(graph)); typedef VertexIndex* Rank; typedef Vertex* Parent; disjoint_sets<Rank, Parent> ds(&rank[0], &parent[0]); initialize_incremental_components(graph, ds); incremental_components(graph, ds); graph_traits<Graph>::edge_descriptor edge; bool flag; for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int func1 = row.get<int>(0); int func2 = row.get<int>(1); boost::tie(edge, flag) = add_edge(func1, func2, graph); ds.union_set(func1,func2); } typedef component_index<VertexIndex> Components; Components components(parent.begin(), parent.end()); std::map<int,int> size_distribution; // Iterate through the component indices BOOST_FOREACH(VertexIndex current_index, components) { std::vector<int> cluster_functions; // Iterate through the child vertex indices for [current_index] BOOST_FOREACH(VertexIndex child_index, components[current_index]) { cluster_functions.push_back(child_index); }
// Show some general info about the function static void show_summary(const SqlDatabase::TransactionPtr &tx, int func_id) { SqlDatabase::Statement::iterator geninfo = tx->statement("select" // 0 1 2 3 " func.entry_va, func.name, file1.name, file2.name," // 4 5 6 7 " func.ninsns, func.isize, func.dsize, func.size," // 8 9 10 11 " func.digest, cmd.hashkey, cmd.begin_time, func.specimen_id," // 12 " func.file_id" " from semantic_functions as func" " join semantic_files as file1 on func.specimen_id = file1.id" " join semantic_files as file2 on func.file_id = file2.id" " join semantic_history as cmd on func.cmd = cmd.hashkey" " where func.id = ?")->bind(0, func_id)->begin(); double returns_value = CloneDetection::function_returns_value(tx, func_id); std::cout <<"Function ID: " <<func_id <<"\n" <<"Entry virtual address: " <<StringUtility::addrToString(geninfo.get<rose_addr_t>(0)) <<"\n" <<"Function name: " <<geninfo.get<std::string>(1) <<"\n" <<"Binary specimen name: " <<geninfo.get<std::string>(2) <<" (id=" <<geninfo.get<int>(11) <<")\n"; if (0!=geninfo.get<std::string>(2).compare(geninfo.get<std::string>(3))) std::cout <<"Binary file name: " <<geninfo.get<std::string>(3) <<" (id=" <<geninfo.get<int>(12) <<")\n"; std::cout <<"Number of instructions: " <<geninfo.get<size_t>(4) <<"\n" <<"Number of bytes for instructions: " <<geninfo.get<size_t>(5) <<"\n" <<"Number of bytes for static data: " <<geninfo.get<size_t>(6) <<"\n" <<"Total number of bytes: " <<geninfo.get<size_t>(7) <<"\n" // not necessarily the sum isize + dsize <<"Function returns a value: " <<round(100.0*returns_value) <<"% probability\n" <<"Function static digest: " <<geninfo.get<std::string>(8) <<"\n" <<"Command that inserted function: " <<geninfo.get<int64_t>(9) <<" (command hashkey)\n" <<"Time that function was inserted: " <<SqlDatabase::humanTimeRenderer(geninfo.get<time_t>(10), 0) <<"\n"; size_t ntests = tx->statement("select count(*) from semantic_fio where func_id=?")->bind(0, func_id)->execute_int(); if (0==ntests) { std::cout <<"Number of tests for function: " <<ntests <<"\n"; } else { SqlDatabase::StatementPtr stmt = tx->statement("select fault.name, count(*), 100.0*count(*)/?" " from semantic_fio as fio" " join semantic_faults as fault on fio.status = fault.id" " where func_id = ?" " group by fault.id, fault.name" " order by fault.id")->bind(0, ntests)->bind(1, func_id); SqlDatabase::Table<std::string, size_t, double> statuses(stmt); if (statuses.size()==1) { std::cout <<"Number of tests for function: " <<ntests <<" (all had status " <<statuses[0].v0 <<")\n"; } else { std::cout <<"Number of tests for function: " <<ntests <<"\n"; statuses.headers("Status", "NTests", "Percent"); statuses.line_prefix(" "); statuses.print(std::cout); } } }
int main(int argc, char* argv[]) { std::string database; int norm = 1; double similarity_threshold=1.; size_t k; size_t l; try { options_description desc("Allowed options"); desc.add_options() ("help", "produce a help message") ("database,q", value< string >()->composing(), "the sqlite database that we are to use") ("norm,p", value< int >(&norm), "Exponent in p-norm to use (1 or 2 or 3 (MIT implementation) )") ("hash-function-size,k", value< size_t >(&k), "The number of elements in a single hash function") ("hash-table-count,l", value< size_t >(&l), "The number of separate hash tables to create") ("similarity,t", value< double >(&similarity_threshold), "The similarity threshold that is allowed in a clone pair"); variables_map vm; store(parse_command_line(argc, argv, desc), vm); notify(vm); if (vm.count("help")) { cout << desc; exit(0); } if (vm.count("database")!=1) { std::cerr << "Missing options. Call as: findClones --database <database-name>" << std::endl; exit(1); } database = vm["database"].as<string >(); similarity_threshold = vm["similarity"].as<double>(); cout << "database: " << database << std::endl; } catch(exception& e) { cout << e.what() << "\n"; } std::cout << "The similarity threshold is " << similarity_threshold << std::endl; SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(database)->transaction(); tx->statement("update run_parameters set similarity_threshold = ?") ->bind(0, similarity_threshold) ->execute(); OperateOnClusters op(database, norm, similarity_threshold, k , l); op.analyzeClusters(); //op.calculate_false_positives(); tx->commit(); return 0; };
static void postprocess(const SqlDatabase::TransactionPtr &tx) { int windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int(); int stride = tx->statement("select stride from run_parameters limit 1")->execute_int(); assert(windowSize != 0); assert(stride != 0); cerr << "About to delete from postprocessed_clusters" << endl; tx->execute("delete from postprocessed_clusters"); cerr << "... done" << endl; cerr << "About to postprocess" << endl; SqlDatabase::StatementPtr cmd = tx->statement("select cluster, function_id, index_within_function, vectors_row" " from clusters order by cluster, function_id, index_within_function"); SqlDatabase::StatementPtr insertCmd = tx->statement("insert into postprocessed_clusters" " select * from clusters where row_number = ?"); const size_t numStridesThatMustBeDifferent = windowSize / (stride * 2); string last_cluster = ""; string last_func_id = ""; size_t last_index_within_function = 0; vector<string> rows_in_this_cluster; bool first = true; for (SqlDatabase::Statement::iterator postproc_reader=cmd->begin(); postproc_reader!=cmd->end(); ++postproc_reader) { string cluster = postproc_reader.get<std::string>(0); string function_id = postproc_reader.get<std::string>(1); size_t index_within_function = postproc_reader.get<size_t>(2); string cluster_row_number = postproc_reader.get<std::string>(3); bool differentFunction = cluster != last_cluster || function_id != last_func_id; bool endingCluster = differentFunction; bool beginningNewCluster = first || differentFunction; first = false; if (endingCluster) { if (rows_in_this_cluster.size() > 1) { // Skip clusters that have only one element left for (size_t i = 0; i < rows_in_this_cluster.size(); ++i) { insertCmd->bind(0, rows_in_this_cluster[i]); insertCmd->execute(); } } } if (beginningNewCluster) { last_cluster = cluster; last_func_id = function_id; last_index_within_function = index_within_function; rows_in_this_cluster.clear(); } bool keep = beginningNewCluster || (index_within_function >= last_index_within_function + numStridesThatMustBeDifferent); if (keep) { last_index_within_function = index_within_function; rows_in_this_cluster.push_back(cluster_row_number); } } cerr << "... done" << endl; }
void createVectorsNotRespectingFunctionBoundaries(SgNode* top, const std::string& filename, size_t windowSize, size_t stride, const SqlDatabase::TransactionPtr &tx) { int functionId = tx->statement("select coalesce(max(id),-1)+1 from functions")->execute_int(); // zero origin std::string functionName = filename + "-all-instructions"; createVectorsForAllInstructions(top, filename, functionName, functionId, windowSize, stride, tx); tx->statement("insert into functions(file, function_name) values (?,?)") ->bind(0, filename) ->bind(1, functionName) ->execute(); cout << "Total vectors generated: " << numVectorsGenerated << endl; }
// Create and populate the tmp_events table. static void gather_events(const SqlDatabase::TransactionPtr &tx, int func_id) { tx->execute("create temporary table tmp_events as select * from semantic_fio_trace limit 0"); if (opt.show_trace) { std::string sql = "insert into tmp_events select * from semantic_fio_trace where func_id = ?"; std::vector<std::string> igroups; for (std::set<int>::const_iterator i=opt.traces.begin(); i!=opt.traces.end(); ++i) igroups.push_back(StringUtility::numberToString(*i)); if (!igroups.empty()) sql += " and igroup_id in (" + StringUtility::join(", ", igroups) + ")"; tx->statement(sql)->bind(0, func_id)->execute(); } }
// List tests that are missing error information. static void listMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("select test.id, test.rose_date, test.os, users.name, test.tester" " from test_results test" " join attachments att on test.id = att.test_id" " join users on test.reporting_user = users.uid" + sqlWhereClause(tx, settings, args) + " and" " test.first_error is null and" " test.status <> 'end' and" " att.name = 'Final output'" " order by test.id"); sqlBindArgs(q, args); SqlDatabase::Table<int, // 0: id int, // 1: rose_date std::string, // 2: os std::string, // 3: reporting_user std::string> // 4: tester table(q); if (!table.empty()) { table.headers("Id", "ROSE date", "OS", "Reporting user", "Tester"); table.reprint_headers(50); table.renderers().r1 = &timeRenderer; table.print(std::cout); } }
static void list_assembly(const SqlDatabase::TransactionPtr &tx, int func_id) { Events events; gather_events(tx, func_id); load_events(tx, func_id, events); SqlDatabase::StatementPtr stmt = tx->statement("select address, assembly from semantic_instructions where func_id = ?" " order by position")->bind(0, func_id); for (SqlDatabase::Statement::iterator insn=stmt->begin(); insn!=stmt->end(); ++insn) { rose_addr_t addr = insn.get<rose_addr_t>(0); std::string assembly = insn.get<std::string>(1); Events::const_iterator ei=events.find(addr); // Assembly line prefix if (ei!=events.end() && ei->second.nexecuted>0) { std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x "; } else { std::cout <<std::string(11, ' '); } // Assembly instruction std::cout <<"| " <<StringUtility::addrToString(addr) <<": " <<assembly <<"\n"; if (ei!=events.end()) show_events(ei->second); } }
void computational_equivalent_classes(std::map<int,int>& norm_map) { SqlDatabase::StatementPtr stmt = transaction->statement("select func_id, equivalent_func_id from equivalent_classes"); for (SqlDatabase::Statement::iterator row=stmt->begin(); row!= stmt->end(); ++row) norm_map[row.get<int>(0)] = row.get<int>(1); }
CallVec* load_api_calls_for(int func_id, int igroup_id, bool ignore_no_compares, int call_depth, bool expand_ncalls) { SqlDatabase::StatementPtr stmt = transaction->statement("select distinct fio.pos, fio.callee_id, fio.ncalls" " from semantic_fio_calls as fio" " join tmp_interesting_funcs as f1" // filter out functions with no compares " on f1.func_id = fio.callee_id" // filter on current parameters " where fio.func_id = ? and fio.igroup_id = ?" // filter out function not called directly + std::string(call_depth >= 0 ? " and fio.caller_id = ?" : "") +" order by fio.pos"); stmt->bind(0, func_id); stmt->bind(1, igroup_id); if (call_depth >= 0) stmt->bind(2, func_id); CallVec* call_vec = new CallVec; for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int callee_id = row.get<int>(1); int ncalls = row.get<int>(2); if (expand_ncalls) { for (int i = 0; i < ncalls; i++) call_vec->push_back(callee_id); } else { call_vec->push_back(callee_id); } } return call_vec; }
// Update the database by filling in test_results.first_error information for those tests that don't have a cached first error // but which failed and have output. static void updateDatabase(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("update test_results test" " set first_error = substring(" #if 0 // [Robb Matzke 2016-02-08] // Look at all output stored in the database (which is typically only the last // few hundred lines of the complete output). "att.content " #else // This coalesce tries to find where a parallel make command failed and looks // only at the following serial make, which is assumed to follow the parallel // make. "coalesce(substring(att.content from '(\\nmake: \\*\\*\\* \\[[-_a-zA-Z0-9]+\\] Error 1\n.+)'), att.content) " #endif "from '(?n)(" //----- regular expressions begin ----- "\\merror: .+" "|catastrophic error: *\\n.+" "|^.* \\[err\\]: terminated after .+" "|^.* \\[err\\]: command died with .+" "|^.* \\[err\\]: +what\\(\\): .*" //----- regular expressions end ----- ")')" " from attachments att" + sqlWhereClause(tx, settings, args) + " and" " test.id = att.test_id and" " test.first_error is null and" " test.status <> 'end' and" " att.name = 'Final output'"); sqlBindArgs(q, args); q->execute(); }
static int find_function_or_exit(const SqlDatabase::TransactionPtr &tx, char *func_spec) { char *rest; errno = 0; int func_id = -1; int func_spec_i = strtol(func_spec, &rest, 0); if (errno || rest==func_spec || *rest) func_spec_i = -1; if (-1==func_id && -1!=func_spec_i && 1==tx->statement("select count(*) from semantic_functions where id = ?")->bind(0, func_spec_i)->execute_int()) func_id = func_spec_i; if (-1==func_id) { SqlDatabase::StatementPtr stmt1a = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name" " from semantic_functions as func" " join semantic_files as file on func.file_id = file.id" " where entry_va = ?")->bind(0, func_spec_i); SqlDatabase::StatementPtr stmt1b = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name" " from semantic_functions as func" " join semantic_files as file on func.file_id = file.id" " where func.name = ?")->bind(0, func_spec); SqlDatabase::StatementPtr stmt1c = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name" " from semantic_functions as func" " join semantic_files as file on func.file_id = file.id" " where file.name like" " '%/"+SqlDatabase::escape(func_spec, tx->driver(), false)+"'"); SqlDatabase::Table<int, rose_addr_t, std::string, size_t, std::string> functions; if (func_spec_i!=-1) functions.insert(stmt1a); functions.insert(stmt1b); functions.insert(stmt1c); functions.headers("ID", "Entry VA", "Function Name", "NInsns", "Specimen Name"); functions.renderers().r1 = &SqlDatabase::addr32Renderer; if (functions.empty()) { std::cout <<argv0 <<": no function found by ID, address, or name: " <<func_spec <<"\n"; exit(0); } else if (1==functions.size()) { func_id = functions[0].v0; } else { std::cout <<argv0 <<": function specification is ambiguous: " <<func_spec <<"\n"; functions.print(std::cout); exit(0); } } assert(func_id>=0); return func_id; }
// Clear all cached error information from the database. static void clearErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("update test_results set first_error = null" + sqlWhereClause(tx, settings, args)); sqlBindArgs(q, args); q->execute(); }
static Dependencies loadAllDependencies(const SqlDatabase::TransactionPtr &tx) { Dependencies dependencies; SqlDatabase::StatementPtr q = tx->statement("select name, value from dependencies where enabled <> 0"); for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row) dependencies.insertMaybeDefault(row.get<std::string>(0)).push_back(row.get<std::string>(1)); return dependencies; }
void insert_into_clusters(const SqlDatabase::TransactionPtr &tx, int cluster, int function_id, int index_within_function, int vectors_row, double dist) { int id = tx->statement("select coalesce(max(id),0)+1 from clusters")->execute_int(); tx->statement("insert into clusters" // 0 1 2 3 4 5 " (id, cluster, function_id, index_within_function, vectors_row, dist)" " values (?, ?,?,?,?,?)") ->bind(0, id) ->bind(1, cluster) ->bind(2, function_id) ->bind(3, index_within_function) ->bind(4, vectors_row) ->bind(5, dist) ->execute(); }
void addFunctionStatistics(const SqlDatabase::TransactionPtr &tx, const std::string& filename, const std::string& functionName, size_t functionId, size_t numInstructions) { tx->statement("insert into function_statistics (function_id, num_instructions) values (?,?)") ->bind(0, functionId) ->bind(1, numInstructions) ->execute(); }
static void load_source_code(const SqlDatabase::TransactionPtr &tx, Listing &listing/*in,out*/) { SqlDatabase::StatementPtr stmt = tx->statement("select file_id, linenum, line from tmp_src"); for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int file_id = row.get<int>(0); int linenum = row.get<int>(1); SourcePosition srcpos(file_id, linenum); listing[srcpos].source_code = row.get<std::string>(2); } }
static void gather_source_code(const SqlDatabase::TransactionPtr &tx) { tx->execute("create temporary table tmp_src as" " select distinct src.*" " from tmp_insns as insn" " join semantic_sources as src" " on insn.src_file_id=src.file_id" " and src.linenum >= insn.src_line-10" " and src.linenum <= insn.src_line+10"); }
int main(int argc, char *argv[]) { // Parse command-line opt.nprocs = nProcessors(); int argno = parse_commandline(argc, argv); if (argno+1!=argc) usage(1); std::string databaseUrl = argv[argno++]; SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(databaseUrl)->transaction(); int64_t cmd_id = start_command(tx, argc, argv, "running tests"); // Load worklist MultiWork work; load_sorted_work(work/*out*/); if (work.empty()) return 0; // Load information about files. The transaction is not saved anywhere. FilesTable files(tx); // We must commit our transaction before we fork, otherwise the child processes won't be able to see the rows we've // inserted. Specifically, the row in the semantic_history table that says who we are. Destroy the smart pointer so that // the connection is even closed. tx->commit(); tx.reset(); // Process work items for each specimen sequentially BOOST_FOREACH (const Work &workForSpecimen, work) if (forkAndWait(SpecimenProcessor(workForSpecimen, files, databaseUrl, cmd_id))) exit(1); // Indicate that this command is finished tx = SqlDatabase::Connection::create(databaseUrl)->transaction(); finish_command(tx, cmd_id, "ran tests"); tx->commit(); return 0; }
static void callExact(const SqlDatabase::TransactionPtr &tx, const std::string databaseName, const string& Exec) { // FIXME: We can't pass parameters to the exec'd process this way because the parent's SQL statements are // being executed in a transaction -- they won't be visible in the child. [Robb P. Matzke 2013-08-12] tx->execute("delete from detection_parameters"); tx->statement("insert into detection_parameters (similarity_threshold, false_negative_rate) values (?, ?)") ->bind(0, 1.0) ->bind(1, 0) ->execute(); std::cout << "Start running exact clone detection" << std::endl; pid_t p = fork(); if (p == -1) { // Error perror("fork: "); exit (1); } if (p == 0) { // Child vector<char*> args; args.push_back(strdup(Exec.c_str())); args.push_back(strdup("--database")); args.push_back(strdup(databaseName.c_str())); args.push_back(0); ostringstream outStr; for (vector<char*>::iterator iItr = args.begin(); iItr != args.end(); ++iItr) outStr << *iItr << " "; std::cout << "Calling " << outStr.str() << std::endl; execv(Exec.c_str(), &args[0]); perror("execv: "); exit (1); } else { // Parent int status; if (waitpid(p, &status, 0) == -1) { perror("waitpid"); abort(); } cerr << "Status: " << status << endl; cerr << "Done waiting for Exact Clone Detection" << endl; } }
// Create the tmp_insns table to hold all the instructions for the function-to-be-listed and all the instructions of all // the functions that are mentioned in events. static void gather_instructions(const SqlDatabase::TransactionPtr tx, int func_id, const Events &events) { std::set<std::string> func_ids; func_ids.insert(StringUtility::numberToString(func_id)); for (Events::const_iterator ei=events.begin(); ei!=events.end(); ++ei) func_ids.insert(StringUtility::numberToString(ei->second.func_id)); std::string sql = "create temporary table tmp_insns as" " select * from semantic_instructions" " where func_id in ("+StringUtility::join_range(", ", func_ids.begin(), func_ids.end())+")"; tx->execute(sql); }
// Count how many tests are missing first_error information when it should be available. static void countMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("select count(*)" " from test_results test" " join attachments att on test.id = att.test_id" + sqlWhereClause(tx, settings, args) + " and" " test.first_error is null and" " test.status <> 'end' and" " att.name = 'Final output'"); sqlBindArgs(q, args); int n = q->execute_int(); std::cout <<n <<"\n"; }
void addVectorToDatabase(const SqlDatabase::TransactionPtr &tx, const SignatureVector& vec, const std::string& functionName, size_t functionId, size_t indexWithinFunction, const std::string& normalizedUnparsedInstructions, SgAsmx86Instruction* firstInsn[], const std::string& filename, size_t windowSize, size_t stride) { ++numVectorsGenerated; vector<uint8_t> compressedCounts = compressVector(vec.getBase(), SignatureVector::Size); size_t vectorSum = 0; for (size_t i=0; i<SignatureVector::Size; ++i) vectorSum += vec[i]; ExtentMap extent; for (size_t i=0; i<windowSize; ++i) extent.insert(Extent(firstInsn[i]->get_address(), firstInsn[i]->get_size())); unsigned char md[16]; MD5((const unsigned char*)normalizedUnparsedInstructions.data(), normalizedUnparsedInstructions.size(), md); SqlDatabase::StatementPtr cmd = tx->statement("insert into vectors" // 0 1 2 3 4 5 " (id, function_id, index_within_function, line, last_insn_va, size," // 6 7 8 "sum_of_counts, counts_b64, instr_seq_b64)" " values (?,?,?,?,?,?,?,?,?)"); int vector_id = tx->statement("select coalesce(max(id),0)+1 from vectors")->execute_int(); // 1-origin cmd->bind(0, vector_id); cmd->bind(1, functionId); cmd->bind(2, indexWithinFunction); cmd->bind(3, firstInsn[0]->get_address()); cmd->bind(4, firstInsn[windowSize-1]->get_address()); cmd->bind(5, extent.size()); cmd->bind(6, vectorSum); cmd->bind(7, StringUtility::encode_base64(&compressedCounts[0], compressedCounts.size())); cmd->bind(8, StringUtility::encode_base64(md, 16)); cmd->execute(); }
void insert_into_postprocessed_clusters(const SqlDatabase::TransactionPtr &tx, int cluster, int function_id, int index_within_function, int vectors_row, double dist) { tx->statement("insert into postprocessed_clusters" // 0 1 2 3 4 " (cluster, function_id, index_within_function, vectors_row, dist)" " values(?,?,?,?,?)") ->bind(0, cluster) ->bind(1, function_id) ->bind(2, index_within_function) ->bind(3, vectors_row) ->bind(4, dist) ->execute(); }
int main(int argc, char *argv[]) { Sawyer::initializeLibrary(); mlog = Sawyer::Message::Facility("tool"); Sawyer::Message::mfacilities.insertAndAdjust(mlog); // Parse the command-line Settings settings; std::vector<std::string> args = parseCommandLine(argc, argv, settings); SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(settings.databaseUri)->transaction(); if (args.size() != 1) { mlog[FATAL] <<"incorrect usage; see --help\n"; exit(1); } if (args[0] == "clear") { clearErrors(tx, settings); } else if (args[0] == "update") { updateDatabase(tx, settings); } else if (args[0] == "missing") { listMissingErrors(tx, settings); } else if (args[0] == "count-missing") { countMissingErrors(tx, settings); } else if (args[0] == "list") { listErrors(tx, settings); } else { mlog[FATAL] <<"unknown command \"" <<StringUtility::cEscape(args[0]) <<"\"; see --help\n"; exit(1); } if (settings.dryRun) { mlog[WARN] <<"database was not modified (running with --dry-run)\n"; } else { tx->commit(); } }