void compute_percent_similarity_statistics(double bucket_size, double increment, SqlDatabase::TransactionPtr transaction) { int num_pairs = transaction->statement("select count(*) from semantic_funcsim")->execute_int(); transaction->execute("drop table IF EXISTS fr_percent_similar"); transaction->execute("create table fr_percent_similar(similarity_low double precision, similarity_middle double precision," " similarity_high double precision, percent double precision, num_matches integer);"); SqlDatabase::StatementPtr pecent_similar_stmt = transaction->statement("insert into fr_percent_similar" // 0 1 2 "(similarity_low, similarity_middle, similarity_high," // 3 4 " percent, num_matches) " " values (?, ?, ?, ?, ?)"); for (double cur_bucket = 0.0; cur_bucket <= 1.0+bucket_size; cur_bucket+=increment) { int num_matches = transaction->statement("select count(*) from semantic_funcsim where " " similarity >= " + boost::lexical_cast<std::string>(cur_bucket - bucket_size) + " and similarity < " + boost::lexical_cast<std::string>(cur_bucket + bucket_size))->execute_int(); pecent_similar_stmt->bind(0, cur_bucket - bucket_size < 0 ? 0 : cur_bucket - bucket_size); pecent_similar_stmt->bind(1, cur_bucket); pecent_similar_stmt->bind(2, cur_bucket + bucket_size >= 1.0 ? 1.0 : cur_bucket + bucket_size); pecent_similar_stmt->bind(3, num_pairs > 0 ? ((double) num_matches*100.0)/num_pairs : 0); pecent_similar_stmt->bind(4, num_matches); pecent_similar_stmt->execute(); } }
void get_run_parameters(const SqlDatabase::TransactionPtr &tx, int& windowSize, int& stride) { windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int(); stride = tx->statement("select stride from run_parameters limit 1")->execute_int(); assert (windowSize != 0); assert (stride != 0); }
void createVectorsRespectingFunctionBoundaries(SgNode* top, const std::string& filename, size_t windowSize, size_t stride, const SqlDatabase::TransactionPtr &tx) { struct InstructionSelector: SgAsmFunction::NodeSelector { virtual bool operator()(SgNode *node) { return isSgAsmInstruction(node)!=NULL; } } iselector; struct DataSelector: SgAsmFunction::NodeSelector { virtual bool operator()(SgNode *node) { return isSgAsmStaticData(node)!=NULL; } } dselector; SqlDatabase::StatementPtr cmd1 = tx->statement("insert into functions" // 0 1 2 3 4 5 6 " (id, file, function_name, entry_va, isize, dsize, size)" " values(?,?,?,?,?,?,?)"); SqlDatabase::StatementPtr cmd2 = tx->statement("insert into instructions" // 0 1 2 3 4 " (address, size, function_id, index_within_function, assembly)" " values (?,?,?,?,?)"); vector<SgAsmFunction*> funcs = SageInterface::querySubTree<SgAsmFunction>(top); int functionId = tx->statement("select coalesce(max(id),-1)+1 from functions")->execute_int(); // zero origin for (vector<SgAsmFunction*>::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi, ++functionId) { ExtentMap e_insns, e_data, e_total; (*fi)->get_extent(&e_insns, NULL, NULL, &iselector); (*fi)->get_extent(&e_data, NULL, NULL, &dselector); (*fi)->get_extent(&e_total); createVectorsForAllInstructions(*fi, filename, (*fi)->get_name(), functionId, windowSize, stride, tx); cmd1->bind(0, functionId); cmd1->bind(1, filename); cmd1->bind(2, (*fi)->get_name() ); cmd1->bind(3, (*fi)->get_entry_va()); cmd1->bind(4, e_insns.size()); cmd1->bind(5, e_data.size()); cmd1->bind(6, e_total.size()); cmd1->execute(); vector<SgAsmInstruction*> insns = SageInterface::querySubTree<SgAsmInstruction>(*fi); for (size_t i=0; i<insns.size(); ++i) { cmd2->bind(0, insns[i]->get_address()); cmd2->bind(1, insns[i]->get_size()); cmd2->bind(2, functionId); cmd2->bind(3, i); cmd2->bind(4, unparseInstructionWithAddress(insns[i])); cmd2->execute(); } } cerr << "Total vectors generated: " << numVectorsGenerated << endl; }
void find_clusters(int max_cluster_size_signed, SqlDatabase::TransactionPtr transaction) { assert(max_cluster_size_signed >= 0); size_t max_cluster_size = max_cluster_size_signed; SqlDatabase::StatementPtr insert_stmt = transaction->statement("insert into fr_ignored_function_pairs" // 0 1 2 "(func1_id, func2_id, from_cluster_of_size)" " values (?, ?, ?)"); //Get all vetexes and find the union std::string _query_condition = "select func1_id, func2_id from fr_clone_pairs"; SqlDatabase::StatementPtr stmt = transaction->statement(_query_condition); if (stmt->begin() == stmt->end()) return; //Count how many vertices we have for boost graph int VERTEX_COUNT = transaction->statement("select count(*) from semantic_functions")->execute_int(); typedef adjacency_list <vecS, vecS, undirectedS> Graph; typedef graph_traits<Graph>::vertex_descriptor Vertex; typedef graph_traits<Graph>::vertices_size_type VertexIndex; Graph graph(VERTEX_COUNT); std::vector<VertexIndex> rank(num_vertices(graph)); std::vector<Vertex> parent(num_vertices(graph)); typedef VertexIndex* Rank; typedef Vertex* Parent; disjoint_sets<Rank, Parent> ds(&rank[0], &parent[0]); initialize_incremental_components(graph, ds); incremental_components(graph, ds); graph_traits<Graph>::edge_descriptor edge; bool flag; for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int func1 = row.get<int>(0); int func2 = row.get<int>(1); boost::tie(edge, flag) = add_edge(func1, func2, graph); ds.union_set(func1,func2); } typedef component_index<VertexIndex> Components; Components components(parent.begin(), parent.end()); std::map<int,int> size_distribution; // Iterate through the component indices BOOST_FOREACH(VertexIndex current_index, components) { std::vector<int> cluster_functions; // Iterate through the child vertex indices for [current_index] BOOST_FOREACH(VertexIndex child_index, components[current_index]) { cluster_functions.push_back(child_index); }
// Show some general info about the function static void show_summary(const SqlDatabase::TransactionPtr &tx, int func_id) { SqlDatabase::Statement::iterator geninfo = tx->statement("select" // 0 1 2 3 " func.entry_va, func.name, file1.name, file2.name," // 4 5 6 7 " func.ninsns, func.isize, func.dsize, func.size," // 8 9 10 11 " func.digest, cmd.hashkey, cmd.begin_time, func.specimen_id," // 12 " func.file_id" " from semantic_functions as func" " join semantic_files as file1 on func.specimen_id = file1.id" " join semantic_files as file2 on func.file_id = file2.id" " join semantic_history as cmd on func.cmd = cmd.hashkey" " where func.id = ?")->bind(0, func_id)->begin(); double returns_value = CloneDetection::function_returns_value(tx, func_id); std::cout <<"Function ID: " <<func_id <<"\n" <<"Entry virtual address: " <<StringUtility::addrToString(geninfo.get<rose_addr_t>(0)) <<"\n" <<"Function name: " <<geninfo.get<std::string>(1) <<"\n" <<"Binary specimen name: " <<geninfo.get<std::string>(2) <<" (id=" <<geninfo.get<int>(11) <<")\n"; if (0!=geninfo.get<std::string>(2).compare(geninfo.get<std::string>(3))) std::cout <<"Binary file name: " <<geninfo.get<std::string>(3) <<" (id=" <<geninfo.get<int>(12) <<")\n"; std::cout <<"Number of instructions: " <<geninfo.get<size_t>(4) <<"\n" <<"Number of bytes for instructions: " <<geninfo.get<size_t>(5) <<"\n" <<"Number of bytes for static data: " <<geninfo.get<size_t>(6) <<"\n" <<"Total number of bytes: " <<geninfo.get<size_t>(7) <<"\n" // not necessarily the sum isize + dsize <<"Function returns a value: " <<round(100.0*returns_value) <<"% probability\n" <<"Function static digest: " <<geninfo.get<std::string>(8) <<"\n" <<"Command that inserted function: " <<geninfo.get<int64_t>(9) <<" (command hashkey)\n" <<"Time that function was inserted: " <<SqlDatabase::humanTimeRenderer(geninfo.get<time_t>(10), 0) <<"\n"; size_t ntests = tx->statement("select count(*) from semantic_fio where func_id=?")->bind(0, func_id)->execute_int(); if (0==ntests) { std::cout <<"Number of tests for function: " <<ntests <<"\n"; } else { SqlDatabase::StatementPtr stmt = tx->statement("select fault.name, count(*), 100.0*count(*)/?" " from semantic_fio as fio" " join semantic_faults as fault on fio.status = fault.id" " where func_id = ?" " group by fault.id, fault.name" " order by fault.id")->bind(0, ntests)->bind(1, func_id); SqlDatabase::Table<std::string, size_t, double> statuses(stmt); if (statuses.size()==1) { std::cout <<"Number of tests for function: " <<ntests <<" (all had status " <<statuses[0].v0 <<")\n"; } else { std::cout <<"Number of tests for function: " <<ntests <<"\n"; statuses.headers("Status", "NTests", "Percent"); statuses.line_prefix(" "); statuses.print(std::cout); } } }
static void postprocess(const SqlDatabase::TransactionPtr &tx) { int windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int(); int stride = tx->statement("select stride from run_parameters limit 1")->execute_int(); assert(windowSize != 0); assert(stride != 0); cerr << "About to delete from postprocessed_clusters" << endl; tx->execute("delete from postprocessed_clusters"); cerr << "... done" << endl; cerr << "About to postprocess" << endl; SqlDatabase::StatementPtr cmd = tx->statement("select cluster, function_id, index_within_function, vectors_row" " from clusters order by cluster, function_id, index_within_function"); SqlDatabase::StatementPtr insertCmd = tx->statement("insert into postprocessed_clusters" " select * from clusters where row_number = ?"); const size_t numStridesThatMustBeDifferent = windowSize / (stride * 2); string last_cluster = ""; string last_func_id = ""; size_t last_index_within_function = 0; vector<string> rows_in_this_cluster; bool first = true; for (SqlDatabase::Statement::iterator postproc_reader=cmd->begin(); postproc_reader!=cmd->end(); ++postproc_reader) { string cluster = postproc_reader.get<std::string>(0); string function_id = postproc_reader.get<std::string>(1); size_t index_within_function = postproc_reader.get<size_t>(2); string cluster_row_number = postproc_reader.get<std::string>(3); bool differentFunction = cluster != last_cluster || function_id != last_func_id; bool endingCluster = differentFunction; bool beginningNewCluster = first || differentFunction; first = false; if (endingCluster) { if (rows_in_this_cluster.size() > 1) { // Skip clusters that have only one element left for (size_t i = 0; i < rows_in_this_cluster.size(); ++i) { insertCmd->bind(0, rows_in_this_cluster[i]); insertCmd->execute(); } } } if (beginningNewCluster) { last_cluster = cluster; last_func_id = function_id; last_index_within_function = index_within_function; rows_in_this_cluster.clear(); } bool keep = beginningNewCluster || (index_within_function >= last_index_within_function + numStridesThatMustBeDifferent); if (keep) { last_index_within_function = index_within_function; rows_in_this_cluster.push_back(cluster_row_number); } } cerr << "... done" << endl; }
void createVectorsNotRespectingFunctionBoundaries(SgNode* top, const std::string& filename, size_t windowSize, size_t stride, const SqlDatabase::TransactionPtr &tx) { int functionId = tx->statement("select coalesce(max(id),-1)+1 from functions")->execute_int(); // zero origin std::string functionName = filename + "-all-instructions"; createVectorsForAllInstructions(top, filename, functionName, functionId, windowSize, stride, tx); tx->statement("insert into functions(file, function_name) values (?,?)") ->bind(0, filename) ->bind(1, functionName) ->execute(); cout << "Total vectors generated: " << numVectorsGenerated << endl; }
// Update the database by filling in test_results.first_error information for those tests that don't have a cached first error // but which failed and have output. static void updateDatabase(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("update test_results test" " set first_error = substring(" #if 0 // [Robb Matzke 2016-02-08] // Look at all output stored in the database (which is typically only the last // few hundred lines of the complete output). "att.content " #else // This coalesce tries to find where a parallel make command failed and looks // only at the following serial make, which is assumed to follow the parallel // make. "coalesce(substring(att.content from '(\\nmake: \\*\\*\\* \\[[-_a-zA-Z0-9]+\\] Error 1\n.+)'), att.content) " #endif "from '(?n)(" //----- regular expressions begin ----- "\\merror: .+" "|catastrophic error: *\\n.+" "|^.* \\[err\\]: terminated after .+" "|^.* \\[err\\]: command died with .+" "|^.* \\[err\\]: +what\\(\\): .*" //----- regular expressions end ----- ")')" " from attachments att" + sqlWhereClause(tx, settings, args) + " and" " test.id = att.test_id and" " test.first_error is null and" " test.status <> 'end' and" " att.name = 'Final output'"); sqlBindArgs(q, args); q->execute(); }
void computational_equivalent_classes(std::map<int,int>& norm_map) { SqlDatabase::StatementPtr stmt = transaction->statement("select func_id, equivalent_func_id from equivalent_classes"); for (SqlDatabase::Statement::iterator row=stmt->begin(); row!= stmt->end(); ++row) norm_map[row.get<int>(0)] = row.get<int>(1); }
// List tests that are missing error information. static void listMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("select test.id, test.rose_date, test.os, users.name, test.tester" " from test_results test" " join attachments att on test.id = att.test_id" " join users on test.reporting_user = users.uid" + sqlWhereClause(tx, settings, args) + " and" " test.first_error is null and" " test.status <> 'end' and" " att.name = 'Final output'" " order by test.id"); sqlBindArgs(q, args); SqlDatabase::Table<int, // 0: id int, // 1: rose_date std::string, // 2: os std::string, // 3: reporting_user std::string> // 4: tester table(q); if (!table.empty()) { table.headers("Id", "ROSE date", "OS", "Reporting user", "Tester"); table.reprint_headers(50); table.renderers().r1 = &timeRenderer; table.print(std::cout); } }
CallVec* load_api_calls_for(int func_id, int igroup_id, bool ignore_no_compares, int call_depth, bool expand_ncalls) { SqlDatabase::StatementPtr stmt = transaction->statement("select distinct fio.pos, fio.callee_id, fio.ncalls" " from semantic_fio_calls as fio" " join tmp_interesting_funcs as f1" // filter out functions with no compares " on f1.func_id = fio.callee_id" // filter on current parameters " where fio.func_id = ? and fio.igroup_id = ?" // filter out function not called directly + std::string(call_depth >= 0 ? " and fio.caller_id = ?" : "") +" order by fio.pos"); stmt->bind(0, func_id); stmt->bind(1, igroup_id); if (call_depth >= 0) stmt->bind(2, func_id); CallVec* call_vec = new CallVec; for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int callee_id = row.get<int>(1); int ncalls = row.get<int>(2); if (expand_ncalls) { for (int i = 0; i < ncalls; i++) call_vec->push_back(callee_id); } else { call_vec->push_back(callee_id); } } return call_vec; }
static void list_assembly(const SqlDatabase::TransactionPtr &tx, int func_id) { Events events; gather_events(tx, func_id); load_events(tx, func_id, events); SqlDatabase::StatementPtr stmt = tx->statement("select address, assembly from semantic_instructions where func_id = ?" " order by position")->bind(0, func_id); for (SqlDatabase::Statement::iterator insn=stmt->begin(); insn!=stmt->end(); ++insn) { rose_addr_t addr = insn.get<rose_addr_t>(0); std::string assembly = insn.get<std::string>(1); Events::const_iterator ei=events.find(addr); // Assembly line prefix if (ei!=events.end() && ei->second.nexecuted>0) { std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x "; } else { std::cout <<std::string(11, ' '); } // Assembly instruction std::cout <<"| " <<StringUtility::addrToString(addr) <<": " <<assembly <<"\n"; if (ei!=events.end()) show_events(ei->second); } }
static int find_function_or_exit(const SqlDatabase::TransactionPtr &tx, char *func_spec) { char *rest; errno = 0; int func_id = -1; int func_spec_i = strtol(func_spec, &rest, 0); if (errno || rest==func_spec || *rest) func_spec_i = -1; if (-1==func_id && -1!=func_spec_i && 1==tx->statement("select count(*) from semantic_functions where id = ?")->bind(0, func_spec_i)->execute_int()) func_id = func_spec_i; if (-1==func_id) { SqlDatabase::StatementPtr stmt1a = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name" " from semantic_functions as func" " join semantic_files as file on func.file_id = file.id" " where entry_va = ?")->bind(0, func_spec_i); SqlDatabase::StatementPtr stmt1b = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name" " from semantic_functions as func" " join semantic_files as file on func.file_id = file.id" " where func.name = ?")->bind(0, func_spec); SqlDatabase::StatementPtr stmt1c = tx->statement("select func.id, func.entry_va, func.name, func.ninsns, file.name" " from semantic_functions as func" " join semantic_files as file on func.file_id = file.id" " where file.name like" " '%/"+SqlDatabase::escape(func_spec, tx->driver(), false)+"'"); SqlDatabase::Table<int, rose_addr_t, std::string, size_t, std::string> functions; if (func_spec_i!=-1) functions.insert(stmt1a); functions.insert(stmt1b); functions.insert(stmt1c); functions.headers("ID", "Entry VA", "Function Name", "NInsns", "Specimen Name"); functions.renderers().r1 = &SqlDatabase::addr32Renderer; if (functions.empty()) { std::cout <<argv0 <<": no function found by ID, address, or name: " <<func_spec <<"\n"; exit(0); } else if (1==functions.size()) { func_id = functions[0].v0; } else { std::cout <<argv0 <<": function specification is ambiguous: " <<func_spec <<"\n"; functions.print(std::cout); exit(0); } } assert(func_id>=0); return func_id; }
// Clear all cached error information from the database. static void clearErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("update test_results set first_error = null" + sqlWhereClause(tx, settings, args)); sqlBindArgs(q, args); q->execute(); }
static Dependencies loadAllDependencies(const SqlDatabase::TransactionPtr &tx) { Dependencies dependencies; SqlDatabase::StatementPtr q = tx->statement("select name, value from dependencies where enabled <> 0"); for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row) dependencies.insertMaybeDefault(row.get<std::string>(0)).push_back(row.get<std::string>(1)); return dependencies; }
void insert_into_clusters(const SqlDatabase::TransactionPtr &tx, int cluster, int function_id, int index_within_function, int vectors_row, double dist) { int id = tx->statement("select coalesce(max(id),0)+1 from clusters")->execute_int(); tx->statement("insert into clusters" // 0 1 2 3 4 5 " (id, cluster, function_id, index_within_function, vectors_row, dist)" " values (?, ?,?,?,?,?)") ->bind(0, id) ->bind(1, cluster) ->bind(2, function_id) ->bind(3, index_within_function) ->bind(4, vectors_row) ->bind(5, dist) ->execute(); }
void addFunctionStatistics(const SqlDatabase::TransactionPtr &tx, const std::string& filename, const std::string& functionName, size_t functionId, size_t numInstructions) { tx->statement("insert into function_statistics (function_id, num_instructions) values (?,?)") ->bind(0, functionId) ->bind(1, numInstructions) ->execute(); }
int main(int argc, char* argv[]) { std::string database; int norm = 1; double similarity_threshold=1.; size_t k; size_t l; try { options_description desc("Allowed options"); desc.add_options() ("help", "produce a help message") ("database,q", value< string >()->composing(), "the sqlite database that we are to use") ("norm,p", value< int >(&norm), "Exponent in p-norm to use (1 or 2 or 3 (MIT implementation) )") ("hash-function-size,k", value< size_t >(&k), "The number of elements in a single hash function") ("hash-table-count,l", value< size_t >(&l), "The number of separate hash tables to create") ("similarity,t", value< double >(&similarity_threshold), "The similarity threshold that is allowed in a clone pair"); variables_map vm; store(parse_command_line(argc, argv, desc), vm); notify(vm); if (vm.count("help")) { cout << desc; exit(0); } if (vm.count("database")!=1) { std::cerr << "Missing options. Call as: findClones --database <database-name>" << std::endl; exit(1); } database = vm["database"].as<string >(); similarity_threshold = vm["similarity"].as<double>(); cout << "database: " << database << std::endl; } catch(exception& e) { cout << e.what() << "\n"; } std::cout << "The similarity threshold is " << similarity_threshold << std::endl; SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(database)->transaction(); tx->statement("update run_parameters set similarity_threshold = ?") ->bind(0, similarity_threshold) ->execute(); OperateOnClusters op(database, norm, similarity_threshold, k , l); op.analyzeClusters(); //op.calculate_false_positives(); tx->commit(); return 0; };
static void load_source_code(const SqlDatabase::TransactionPtr &tx, Listing &listing/*in,out*/) { SqlDatabase::StatementPtr stmt = tx->statement("select file_id, linenum, line from tmp_src"); for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int file_id = row.get<int>(0); int linenum = row.get<int>(1); SourcePosition srcpos(file_id, linenum); listing[srcpos].source_code = row.get<std::string>(2); } }
// Count how many tests are missing first_error information when it should be available. static void countMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("select count(*)" " from test_results test" " join attachments att on test.id = att.test_id" + sqlWhereClause(tx, settings, args) + " and" " test.first_error is null and" " test.status <> 'end' and" " att.name = 'Final output'"); sqlBindArgs(q, args); int n = q->execute_int(); std::cout <<n <<"\n"; }
// Create and populate the tmp_events table. static void gather_events(const SqlDatabase::TransactionPtr &tx, int func_id) { tx->execute("create temporary table tmp_events as select * from semantic_fio_trace limit 0"); if (opt.show_trace) { std::string sql = "insert into tmp_events select * from semantic_fio_trace where func_id = ?"; std::vector<std::string> igroups; for (std::set<int>::const_iterator i=opt.traces.begin(); i!=opt.traces.end(); ++i) igroups.push_back(StringUtility::numberToString(*i)); if (!igroups.empty()) sql += " and igroup_id in (" + StringUtility::join(", ", igroups) + ")"; tx->statement(sql)->bind(0, func_id)->execute(); } }
void addVectorToDatabase(const SqlDatabase::TransactionPtr &tx, const SignatureVector& vec, const std::string& functionName, size_t functionId, size_t indexWithinFunction, const std::string& normalizedUnparsedInstructions, SgAsmx86Instruction* firstInsn[], const std::string& filename, size_t windowSize, size_t stride) { ++numVectorsGenerated; vector<uint8_t> compressedCounts = compressVector(vec.getBase(), SignatureVector::Size); size_t vectorSum = 0; for (size_t i=0; i<SignatureVector::Size; ++i) vectorSum += vec[i]; ExtentMap extent; for (size_t i=0; i<windowSize; ++i) extent.insert(Extent(firstInsn[i]->get_address(), firstInsn[i]->get_size())); unsigned char md[16]; MD5((const unsigned char*)normalizedUnparsedInstructions.data(), normalizedUnparsedInstructions.size(), md); SqlDatabase::StatementPtr cmd = tx->statement("insert into vectors" // 0 1 2 3 4 5 " (id, function_id, index_within_function, line, last_insn_va, size," // 6 7 8 "sum_of_counts, counts_b64, instr_seq_b64)" " values (?,?,?,?,?,?,?,?,?)"); int vector_id = tx->statement("select coalesce(max(id),0)+1 from vectors")->execute_int(); // 1-origin cmd->bind(0, vector_id); cmd->bind(1, functionId); cmd->bind(2, indexWithinFunction); cmd->bind(3, firstInsn[0]->get_address()); cmd->bind(4, firstInsn[windowSize-1]->get_address()); cmd->bind(5, extent.size()); cmd->bind(6, vectorSum); cmd->bind(7, StringUtility::encode_base64(&compressedCounts[0], compressedCounts.size())); cmd->bind(8, StringUtility::encode_base64(md, 16)); cmd->execute(); }
/**************************************************************************************** * * * Compute how mean similar functions are to all other functions. * * The result is inserted into fr_mean_similarity on the test db, and fr_mean_similar on * the global db. * */ void compute_mean_similarity_statistics(double bucket_size, double increment, SqlDatabase::TransactionPtr transaction) { int num_pairs = transaction->statement("select count(*) from semantic_funcsim")->execute_int(); transaction->execute("drop table IF EXISTS fr_mean_similarity;"); transaction->execute("create table fr_mean_similarity as select coalesce(sum(sf.similarity)/"+ boost::lexical_cast<std::string>(num_pairs)+ " ,0) as similarity, ttf.id as func_id from semantic_funcsim as sf"+ " join semantic_functions as ttf on ttf.id = sf.func1_id OR ttf.id = sf.func2_id GROUP BY ttf.id"); transaction->execute("drop table IF EXISTS fr_mean_similar"); transaction->execute("create table fr_mean_similar(similarity_low double precision, similarity_middle double precision," " similarity_high double precision, percent double precision);"); SqlDatabase::StatementPtr mean_similar_stmt = transaction->statement("insert into fr_mean_similar" // 0 1 2 "(similarity_low, similarity_middle, similarity_high," // 3 " percent) " " values (?, ?, ?, ?)"); for (double cur_bucket = 0.0; cur_bucket <= 1.0+bucket_size; cur_bucket+=increment) { int num_matches = transaction->statement("select count(*) from fr_mean_similarity where " " similarity >= " + boost::lexical_cast<std::string>(cur_bucket - bucket_size) + " and similarity < " + boost::lexical_cast<std::string>(cur_bucket + bucket_size))->execute_int(); mean_similar_stmt->bind(0, cur_bucket - bucket_size < 0 ? 0 : cur_bucket - bucket_size); mean_similar_stmt->bind(1, cur_bucket); mean_similar_stmt->bind(2, cur_bucket + bucket_size >= 1.0 ? 1.0 : cur_bucket + bucket_size); mean_similar_stmt->bind(3, num_pairs > 0 ? ((double) num_matches*100.0)/num_pairs : 0); mean_similar_stmt->execute(); } }
void insert_into_postprocessed_clusters(const SqlDatabase::TransactionPtr &tx, int cluster, int function_id, int index_within_function, int vectors_row, double dist) { tx->statement("insert into postprocessed_clusters" // 0 1 2 3 4 " (cluster, function_id, index_within_function, vectors_row, dist)" " values(?,?,?,?,?)") ->bind(0, cluster) ->bind(1, function_id) ->bind(2, index_within_function) ->bind(3, vectors_row) ->bind(4, dist) ->execute(); }
CallVec* load_function_api_calls_for(int func_id, bool reachability_graph) { SqlDatabase::StatementPtr stmt = transaction->statement("select distinct scg.callee from " + std::string(reachability_graph ? "semantic_rg" : "semantic_cg ") + " as scg " //" join tmp_interesting_funcs as tif on tif.func_id = scg.callee " " where scg.caller=? ORDER BY scg.callee"); stmt->bind(0, func_id); CallVec* call_vec = new CallVec; for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int callee_id = row.get<int>(0); call_vec->push_back(callee_id); } return call_vec; }
// Show the names of the source code files for this function. static void show_source_names(const SqlDatabase::TransactionPtr &tx, int func_id) { SqlDatabase::Table<int, std::string> srcfiles(tx->statement("select distinct file.id, file.name" " from semantic_instructions as insn" " join semantic_files as file on insn.src_file_id = file.id" " where insn.func_id = ?" " order by file.name")->bind(0, func_id)); if (1==srcfiles.size()) { std::cout <<"Source file name: " <<srcfiles[0].v1 <<" (id=" <<srcfiles[0].v0 <<")\n"; } else if (!srcfiles.empty()) { std::cout <<"Number of source files: " <<srcfiles.size() <<"\n"; srcfiles.headers("FileID", "Name"); srcfiles.line_prefix(" "); srcfiles.print(std::cout); } }
// List the errors ordered by how common they are. static void listErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("select count(*) as n, status, test.first_error" " from test_results test" + sqlWhereClause(tx, settings, args) + " and" " test.first_error is not null" " group by status, test.first_error" " order by n desc"); sqlBindArgs(q, args); for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row) { int count = row.get<int>(0); std::string status = row.get<std::string>(1); std::string mesg = row.get<std::string>(2); printf("%6d %-16s %s\n", count, status.c_str(), oneLineEscaped(mesg).c_str()); } }
void insert_timing(const SqlDatabase::TransactionPtr &tx, std::string property_name, const timeval& before, const timeval& after, const rusage& ru_before, const rusage& ru_after) { SqlDatabase::StatementPtr cmd = tx->statement("insert into timing" // 0 1 2 3 4 " (property_name, total_wallclock, total_usertime, total_systime, wallclock," // 5 6 " usertime, systime)" " values (?,?,?,?,?,?,?)"); cmd->bind(0, property_name); cmd->bind(1, 0); cmd->bind(2, tvToDouble(ru_after.ru_utime)); cmd->bind(3, tvToDouble(ru_after.ru_stime)); cmd->bind(4, (tvToDouble(after) - tvToDouble(before))); cmd->bind(5, (tvToDouble(ru_after.ru_utime) - tvToDouble(ru_before.ru_utime))); cmd->bind(6, (tvToDouble(ru_after.ru_stime) - tvToDouble(ru_before.ru_stime))); cmd->execute(); }
// List tests that were run for this function static void show_tests(const SqlDatabase::TransactionPtr &tx, int func_id) { SqlDatabase::Table<int, size_t, size_t, size_t, size_t, size_t, size_t, size_t, int64_t, std::string, double, double, int64_t> fio; fio.insert(tx->statement("select" " fio.igroup_id, fio.arguments_consumed, fio.locals_consumed, fio.globals_consumed," " fio.functions_consumed, fio.integers_consumed, fio.pointers_consumed," " fio.instructions_executed, fio.ogroup_id," " fault.name, fio.elapsed_time, fio.cpu_time, fio.cmd" " from semantic_fio as fio" " join semantic_faults as fault on fio.status = fault.id" " where func_id = ?" " order by igroup_id")->bind(0, func_id)); std::cout <<"Tests run for this function:\n"; fio.headers("IGroup", "Args", "Locals", "Globals", "Funcs", "Ints", "Ptrs", "Insns", "OGroup", "Status", "Elapsed Time", "CPU Time", "Command"); fio.line_prefix(" "); fio.print(std::cout); }
static void callExact(const SqlDatabase::TransactionPtr &tx, const std::string databaseName, const string& Exec) { // FIXME: We can't pass parameters to the exec'd process this way because the parent's SQL statements are // being executed in a transaction -- they won't be visible in the child. [Robb P. Matzke 2013-08-12] tx->execute("delete from detection_parameters"); tx->statement("insert into detection_parameters (similarity_threshold, false_negative_rate) values (?, ?)") ->bind(0, 1.0) ->bind(1, 0) ->execute(); std::cout << "Start running exact clone detection" << std::endl; pid_t p = fork(); if (p == -1) { // Error perror("fork: "); exit (1); } if (p == 0) { // Child vector<char*> args; args.push_back(strdup(Exec.c_str())); args.push_back(strdup("--database")); args.push_back(strdup(databaseName.c_str())); args.push_back(0); ostringstream outStr; for (vector<char*>::iterator iItr = args.begin(); iItr != args.end(); ++iItr) outStr << *iItr << " "; std::cout << "Calling " << outStr.str() << std::endl; execv(Exec.c_str(), &args[0]); perror("execv: "); exit (1); } else { // Parent int status; if (waitpid(p, &status, 0) == -1) { perror("waitpid"); abort(); } cerr << "Status: " << status << endl; cerr << "Done waiting for Exact Clone Detection" << endl; } }