void find_clusters(int max_cluster_size_signed, SqlDatabase::TransactionPtr transaction) { assert(max_cluster_size_signed >= 0); size_t max_cluster_size = max_cluster_size_signed; SqlDatabase::StatementPtr insert_stmt = transaction->statement("insert into fr_ignored_function_pairs" // 0 1 2 "(func1_id, func2_id, from_cluster_of_size)" " values (?, ?, ?)"); //Get all vetexes and find the union std::string _query_condition = "select func1_id, func2_id from fr_clone_pairs"; SqlDatabase::StatementPtr stmt = transaction->statement(_query_condition); if (stmt->begin() == stmt->end()) return; //Count how many vertices we have for boost graph int VERTEX_COUNT = transaction->statement("select count(*) from semantic_functions")->execute_int(); typedef adjacency_list <vecS, vecS, undirectedS> Graph; typedef graph_traits<Graph>::vertex_descriptor Vertex; typedef graph_traits<Graph>::vertices_size_type VertexIndex; Graph graph(VERTEX_COUNT); std::vector<VertexIndex> rank(num_vertices(graph)); std::vector<Vertex> parent(num_vertices(graph)); typedef VertexIndex* Rank; typedef Vertex* Parent; disjoint_sets<Rank, Parent> ds(&rank[0], &parent[0]); initialize_incremental_components(graph, ds); incremental_components(graph, ds); graph_traits<Graph>::edge_descriptor edge; bool flag; for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int func1 = row.get<int>(0); int func2 = row.get<int>(1); boost::tie(edge, flag) = add_edge(func1, func2, graph); ds.union_set(func1,func2); } typedef component_index<VertexIndex> Components; Components components(parent.begin(), parent.end()); std::map<int,int> size_distribution; // Iterate through the component indices BOOST_FOREACH(VertexIndex current_index, components) { std::vector<int> cluster_functions; // Iterate through the child vertex indices for [current_index] BOOST_FOREACH(VertexIndex child_index, components[current_index]) { cluster_functions.push_back(child_index); }
static void list_assembly(const SqlDatabase::TransactionPtr &tx, int func_id) { Events events; gather_events(tx, func_id); load_events(tx, func_id, events); SqlDatabase::StatementPtr stmt = tx->statement("select address, assembly from semantic_instructions where func_id = ?" " order by position")->bind(0, func_id); for (SqlDatabase::Statement::iterator insn=stmt->begin(); insn!=stmt->end(); ++insn) { rose_addr_t addr = insn.get<rose_addr_t>(0); std::string assembly = insn.get<std::string>(1); Events::const_iterator ei=events.find(addr); // Assembly line prefix if (ei!=events.end() && ei->second.nexecuted>0) { std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x "; } else { std::cout <<std::string(11, ' '); } // Assembly instruction std::cout <<"| " <<StringUtility::addrToString(addr) <<": " <<assembly <<"\n"; if (ei!=events.end()) show_events(ei->second); } }
void computational_equivalent_classes(std::map<int,int>& norm_map) { SqlDatabase::StatementPtr stmt = transaction->statement("select func_id, equivalent_func_id from equivalent_classes"); for (SqlDatabase::Statement::iterator row=stmt->begin(); row!= stmt->end(); ++row) norm_map[row.get<int>(0)] = row.get<int>(1); }
CallVec* load_api_calls_for(int func_id, int igroup_id, bool ignore_no_compares, int call_depth, bool expand_ncalls) { SqlDatabase::StatementPtr stmt = transaction->statement("select distinct fio.pos, fio.callee_id, fio.ncalls" " from semantic_fio_calls as fio" " join tmp_interesting_funcs as f1" // filter out functions with no compares " on f1.func_id = fio.callee_id" // filter on current parameters " where fio.func_id = ? and fio.igroup_id = ?" // filter out function not called directly + std::string(call_depth >= 0 ? " and fio.caller_id = ?" : "") +" order by fio.pos"); stmt->bind(0, func_id); stmt->bind(1, igroup_id); if (call_depth >= 0) stmt->bind(2, func_id); CallVec* call_vec = new CallVec; for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int callee_id = row.get<int>(1); int ncalls = row.get<int>(2); if (expand_ncalls) { for (int i = 0; i < ncalls; i++) call_vec->push_back(callee_id); } else { call_vec->push_back(callee_id); } } return call_vec; }
static Dependencies loadAllDependencies(const SqlDatabase::TransactionPtr &tx) { Dependencies dependencies; SqlDatabase::StatementPtr q = tx->statement("select name, value from dependencies where enabled <> 0"); for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row) dependencies.insertMaybeDefault(row.get<std::string>(0)).push_back(row.get<std::string>(1)); return dependencies; }
static void load_source_code(const SqlDatabase::TransactionPtr &tx, Listing &listing/*in,out*/) { SqlDatabase::StatementPtr stmt = tx->statement("select file_id, linenum, line from tmp_src"); for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int file_id = row.get<int>(0); int linenum = row.get<int>(1); SourcePosition srcpos(file_id, linenum); listing[srcpos].source_code = row.get<std::string>(2); } }
CallVec* load_function_api_calls_for(int func_id, bool reachability_graph) { SqlDatabase::StatementPtr stmt = transaction->statement("select distinct scg.callee from " + std::string(reachability_graph ? "semantic_rg" : "semantic_cg ") + " as scg " //" join tmp_interesting_funcs as tif on tif.func_id = scg.callee " " where scg.caller=? ORDER BY scg.callee"); stmt->bind(0, func_id); CallVec* call_vec = new CallVec; for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int callee_id = row.get<int>(0); call_vec->push_back(callee_id); } return call_vec; }
// List the errors ordered by how common they are. static void listErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("select count(*) as n, status, test.first_error" " from test_results test" + sqlWhereClause(tx, settings, args) + " and" " test.first_error is not null" " group by status, test.first_error" " order by n desc"); sqlBindArgs(q, args); for (SqlDatabase::Statement::iterator row = q->begin(); row != q->end(); ++row) { int count = row.get<int>(0); std::string status = row.get<std::string>(1); std::string mesg = row.get<std::string>(2); printf("%6d %-16s %s\n", count, status.c_str(), oneLineEscaped(mesg).c_str()); } }
// Generate the 'where' expression that limits what tests are being considered. // Also appends variable values to the 'args' vector. static std::string sqlWhereClause(const SqlDatabase::TransactionPtr &tx, const Settings &settings, std::vector<std::string> &args /*in,out*/) { std::vector<std::string> constraints; if (settings.latestTests) { // Constrain the tests to be only the latest version of ROSE present in the database. SqlDatabase::StatementPtr q = tx->statement("select distinct rose, rose_date" " from test_results" " order by rose_date desc" " limit 1"); SqlDatabase::Statement::iterator row = q->begin(); if (row != q->end()) { constraints.push_back("rose = ?"); args.push_back(row.get<std::string>(0)); } } if (constraints.empty()) constraints.push_back("true"); return " where " + boost::join(constraints, " and "); }
static DependencyNames loadDependencyNames(const SqlDatabase::TransactionPtr &tx) { DependencyNames retval; SqlDatabase::StatementPtr q = tx->statement("select distinct name from dependencies"); for (SqlDatabase::Statement::iterator row=q->begin(); row!=q->end(); ++row) { std::string key = row.get<std::string>(0); retval.insert(key, "rmc_"+key); } // Additional key/column relationships retval.insert("id", "test.id"); retval.insert("reporting_user", "auth_user.identity"); retval.insert("reporting_time", "test.reporting_time"); retval.insert("tester", "test.tester"); retval.insert("os", "test.os"); retval.insert("rose", "test.rose"); retval.insert("rose_date", "test.rose_date"); retval.insert("status", "test.status"); retval.insert("duration", "test.duration"); retval.insert("noutput", "test.noutput"); retval.insert("nwarnings", "test.nwarnings"); return retval; }
static void list_combined(const SqlDatabase::TransactionPtr &tx, int func_id, bool show_assembly) { CloneDetection::FilesTable files(tx); Events events; gather_events(tx, func_id); load_events(tx, func_id, events/*out*/); gather_instructions(tx, func_id, events); Listing listing; gather_source_code(tx); load_source_code(tx, listing/*out*/); // Get lines of assembly code and insert them into the correct place in the Listing. if (show_assembly) { SqlDatabase::StatementPtr stmt = tx->statement("select" // 0 1 2 3 " insn.src_file_id, insn.src_line, insn.position, insn.address," // 4 5 6 " insn.assembly, func.id, func.name" " from tmp_insns as insn" " join semantic_functions as func on insn.func_id = func.id" " order by position"); for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int src_file_id = row.get<int>(0); int src_line_num = row.get<int>(1); SourcePosition srcpos(src_file_id, src_line_num); int pos = row.get<int>(2); rose_addr_t addr = row.get<rose_addr_t>(3); std::string assembly = row.get<std::string>(4); int func_id = row.get<int>(5); std::string func_name = row.get<std::string>(6); listing[srcpos].assembly_code.insert(std::make_pair(addr, AssemblyCode(pos, addr, assembly, func_id, func_name))); } // Listing header std::cout <<"WARNING: This listing should be read cautiously. It is ordered according to the\n" <<" source code with assembly lines following the source code line from which\n" <<" they came. However, the compiler does not always generate machine\n" <<" instructions in the same order as source code. When a discontinuity\n" <<" occurs in the assembly instruction listing, it will be marked by a \"#\"\n" <<" character. The assembly instructions are also numbered according to\n" <<" their relative positions in the binary function.\n" <<"\n" <<" The prefix area contains either source location information or test trace\n" <<" information. Note that trace information might be incomplete because\n" <<" tracing was disabled or only partially enabled, or the trace includes\n" <<" instructions that are not present in this function listing (e.g., when\n" <<" execution follows a CALL instruction). The following notes are possible:\n" <<" * \"Nx\" where N is an integer indicates that this instruction\n" <<" was reached N times during testing. These notes are typically\n" <<" only attached to the first instruction of a basic block and only\n" <<" if the trace contains EV_REACHED events. Lack of an Nx notation\n" <<" doesn't necessarily mean that the basic block was not reached, it\n" <<" only means that there is no EV_REACHED event for that block.\n" <<" * \"N<\" where N is an integer indicates that the instruction\n" <<" on the previous line consumed N inputs. Information about the\n" <<" inputs is listed on the right side of this line.\n" <<" * \"N>\" where N is an integer indicates that the instruction\n" <<" on the previous line produced N memory outputs. Information about the\n" <<" outputs is listed on the right side of this line. Only the final\n" <<" write to a memory address is considered a true output, and such\n" <<" writes will be marked with the string \"final\".\n" <<" * \"BR\" indicates that the instruction on the previous line is a\n" <<" control flow branch point. The right side of the line shows more\n" <<" detailed information about how many times the branch was taken.\n" <<" * \"FAULT\" indicates that the test was terminated at the previous\n" <<" instruction. The right side of the line shows the distribution of\n" <<" faults that occurred here.\n" <<"\n" <<" /------------- Prefix area\n" <<" /-------------/-------------- Source file ID or assembly function ID\n" <<" | /------/--------------- Source line number or assembly instruction index\n" <<" | | /-/---------------- Instruction out-of-order indicator\n" <<" | | |/ /----------- Instruction virtual address\n" <<" | | | |\n" <<"vvvv vvvvv/| |\n" <<"vvvvvvvvvv v vvvvvvvvvv\n"; } // Show the listing int prev_func_id = -1, prev_position = -1; std::set<int> seen_files; for (Listing::iterator li=listing.begin(); li!=listing.end(); ++li) { int file_id = li->first.file_id; if (seen_files.insert(file_id).second) { if (file_id>=0) { std::cout <<"\n" <<std::setw(4) <<std::right <<file_id <<".file |" <<(opt.colorize?"\033[33;4m":"") <<files.name(file_id) <<(opt.colorize?"\033[m":"") <<"\n"; } else { std::cout <<"\n" <<std::string(11, ' ') <<"|" <<(opt.colorize?"\033[33;4m":"") <<"instructions not associated with a source file" <<(opt.colorize?"\033[m":"") <<"\n"; } } if (file_id>=0) { std::cout <<std::setw(4) <<std::right <<file_id <<"." <<std::setw(6) <<std::left <<li->first.line_num <<"|" <<(opt.colorize?"\033[34m":"") <<StringUtility::untab(li->second.source_code) <<(opt.colorize?"\033[m":"") <<"\n"; } for (Instructions::iterator ii=li->second.assembly_code.begin(); ii!=li->second.assembly_code.end(); ++ii) { const AssemblyCode assm = ii->second; if (assm.func_id!=prev_func_id) { std::cout <<std::string(11, ' ') <<"# " <<(opt.colorize?"\033[33;4m":"") <<"function " <<StringUtility::numberToString(assm.func_id); if (!assm.func_name.empty()) std::cout <<" <" <<assm.func_name <<">"; std::cout <<(opt.colorize?"\033[m":"") <<"\n"; } Events::const_iterator ei=events.find(assm.addr); std::cout <<std::setw(4) <<std::right <<assm.func_id <<"." <<std::setw(6) <<std::left <<assm.pos <<(prev_func_id==assm.func_id && prev_position+1==assm.pos ? "|" : "#"); if (ei!=events.end() && ei->second.nexecuted>0) { std::cout <<std::setw(9) <<std::right <<ei->second.nexecuted <<"x "; } else { std::cout <<std::string(11, ' '); } std::cout <<StringUtility::addrToString(assm.addr) <<": " <<(opt.colorize?"\033[32m":"") <<assm.assembly <<(opt.colorize?"\033[m":"") <<"\n"; if (ei!=events.end()) show_events(ei->second); prev_func_id = assm.func_id; prev_position = assm.pos; } } }
// Load all events into memory. Events are emitted for a particular function ID being analyzed, but if the 25-run-test // --follow-calls was specified, then events for that function ID might be at instructions that are outside that function. // We need to make note of those functions so that we can load all their instructions. static void load_events(const SqlDatabase::TransactionPtr &tx, int func_id, Events &events/*in,out*/) { int specimen_id = tx->statement("select specimen_id from semantic_functions where id = ?") ->bind(0, func_id)->execute_int(); SqlDatabase::StatementPtr stmt = tx->statement("select" // 0 1 2 3 4 " event.addr, event.event_id, event.minor, event.val, func.id," // 5 6 " event.igroup_id, event.pos" " from tmp_events as event" " join semantic_instructions as insn on event.addr = insn.address" " join semantic_functions as func on insn.func_id = func.id" " where func.specimen_id = ?" " order by igroup_id, pos"); stmt->bind(0, specimen_id); for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { rose_addr_t addr = row.get<rose_addr_t>(0); int event_id = row.get<int>(1); int minor = row.get<int>(2); int64_t val = row.get<int64_t>(3); events[addr].func_id = row.get<int>(4); // the hard-to-get ID, not the one stored in the events func_id column. int igroup_id = row.get<int>(5); int pos = row.get<int>(6); switch (event_id) { case CloneDetection::EV_REACHED: { ++events[addr].nexecuted; break; } case CloneDetection::EV_BRANCHED: { ++events[addr].nbranches; ++events[addr].branches[val]; break; } case CloneDetection::EV_RETURNED: { ++events[addr].nreturns; break; } case CloneDetection::EV_CONSUME_INPUT: { ++events[addr].ninputs; assert(minor>=0); if ((size_t)minor>=events[addr].inputs.size()) events[addr].inputs.resize(minor+1); ++events[addr].inputs[minor][val]; break; } case CloneDetection::EV_FAULT: { CloneDetection::AnalysisFault::Fault fault = (CloneDetection::AnalysisFault::Fault)minor; ++events[addr].nfaults; ++events[addr].faults[fault]; break; } case CloneDetection::EV_MEM_WRITE: { OutputEventKey output_key(igroup_id, val); OutputEventValue output_val(pos, minor); // Track final writes to each address final_output_events[output_key] = output_val; // Append event to the appropriate instruction events[addr].outputs.push_back(std::make_pair(output_key, output_val)); } default: /*void*/ break; } } }
/* Remove the functions from the compilation unit that is only available in one of the traces. * - criteria complement of the functions from the files of the caller functions in the call trace is removed. */ std::pair<CallVec*, CallVec*> remove_compilation_unit_complement(int func1_id, int func2_id, int igroup_id, int similarity, CallVec* func1_vec, CallVec* func2_vec) { CallVec* new_func1_vec = new CallVec; CallVec* new_func2_vec = new CallVec; if (func1_vec->size() > 0 || func2_vec->size() > 0) { // Find the set complement of functions called by the two functions // - we are not interested in functions called by both std::set<int> func1_vec_set; std::set<int> func2_vec_set; for (CallVec::iterator it = func1_vec->begin(); it != func1_vec->end(); ++it) func1_vec_set.insert(*it); for (CallVec::iterator it = func2_vec->begin(); it != func2_vec->end(); ++it) func2_vec_set.insert(*it); std::set<int> func1_func2_complement; std::set_difference(func1_vec_set.begin(), func1_vec_set.end(), func2_vec_set.begin(), func2_vec_set.end(), std::inserter(func1_func2_complement, func1_func2_complement.end())); // Find the compilation units in question. A compilation unit is in our case a file. SqlDatabase::StatementPtr func1_file_stmt = transaction->statement("select file_id from semantic_functions" " where id = ?"); func1_file_stmt->bind(0, func1_id); int func1_file_id = func1_file_stmt->execute_int(); SqlDatabase::StatementPtr func2_file_stmt = transaction->statement("select file_id from semantic_functions" " where id = ?"); func2_file_stmt->bind(0, func2_id); int func2_file_id = func2_file_stmt->execute_int(); // Find the functions that needs to be removed // - all functions that has a clone in between the files SqlDatabase::StatementPtr stmt = transaction->statement("select sem.func1_id, sem.func2_id from semantic_funcsim as sem" " join semantic_functions as sf1 on sem.func1_id = sf1.id" " join semantic_functions as sf2 on sem.func2_id = sf2.id" " where similarity >= ? and sf1.file_id in (?,?)" " and sf2.file_id in (?, ?) and sf1.file_id != sf2.file_id"); stmt->bind(0, similarity); stmt->bind(1, func1_file_id); stmt->bind(2, func2_file_id); stmt->bind(3, func1_file_id); stmt->bind(4, func2_file_id); std::set<int> complement_functions; for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) { int clone_func1 = row.get<int>(0); int clone_func2 = row.get<int>(1); complement_functions.insert(clone_func1); complement_functions.insert(clone_func2); } // Find the functions we want to remove // - functions present with clones in between the files that is not part of both traces std::set<int> remove_these; std::set_intersection(complement_functions.begin(), complement_functions.end(), func1_func2_complement.begin(), func1_func2_complement.end(), std::inserter(remove_these, remove_these.end())); //prune functions to remove away from the call trace into new vectors for (CallVec::iterator it = func1_vec->begin(); it != func1_vec->end(); ++it) { if (remove_these.find(*it) == remove_these.end()) new_func1_vec->push_back(*it); } for (CallVec::iterator it = func2_vec->begin(); it != func2_vec->end(); ++it) { if (remove_these.find(*it) == remove_these.end()) new_func2_vec->push_back(*it); } } return std::pair<CallVec*, CallVec*>(new_func1_vec, new_func2_vec); }
static void postprocess(const SqlDatabase::TransactionPtr &tx) { int windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int(); int stride = tx->statement("select stride from run_parameters limit 1")->execute_int(); assert(windowSize != 0); assert(stride != 0); cerr << "About to delete from postprocessed_clusters" << endl; tx->execute("delete from postprocessed_clusters"); cerr << "... done" << endl; cerr << "About to postprocess" << endl; SqlDatabase::StatementPtr cmd = tx->statement("select cluster, function_id, index_within_function, vectors_row" " from clusters order by cluster, function_id, index_within_function"); SqlDatabase::StatementPtr insertCmd = tx->statement("insert into postprocessed_clusters" " select * from clusters where row_number = ?"); const size_t numStridesThatMustBeDifferent = windowSize / (stride * 2); string last_cluster = ""; string last_func_id = ""; size_t last_index_within_function = 0; vector<string> rows_in_this_cluster; bool first = true; for (SqlDatabase::Statement::iterator postproc_reader=cmd->begin(); postproc_reader!=cmd->end(); ++postproc_reader) { string cluster = postproc_reader.get<std::string>(0); string function_id = postproc_reader.get<std::string>(1); size_t index_within_function = postproc_reader.get<size_t>(2); string cluster_row_number = postproc_reader.get<std::string>(3); bool differentFunction = cluster != last_cluster || function_id != last_func_id; bool endingCluster = differentFunction; bool beginningNewCluster = first || differentFunction; first = false; if (endingCluster) { if (rows_in_this_cluster.size() > 1) { // Skip clusters that have only one element left for (size_t i = 0; i < rows_in_this_cluster.size(); ++i) { insertCmd->bind(0, rows_in_this_cluster[i]); insertCmd->execute(); } } } if (beginningNewCluster) { last_cluster = cluster; last_func_id = function_id; last_index_within_function = index_within_function; rows_in_this_cluster.clear(); } bool keep = beginningNewCluster || (index_within_function >= last_index_within_function + numStridesThatMustBeDifferent); if (keep) { last_index_within_function = index_within_function; rows_in_this_cluster.push_back(cluster_row_number); } } cerr << "... done" << endl; }
int main(int argc, char* argv[]) { std::string database; //Timing struct timeval before; struct rusage ru_before; gettimeofday(&before, NULL); getrusage(RUSAGE_SELF, &ru_before); try { options_description desc("Allowed options"); desc.add_options() ("help", "produce a help message") ("database,q", value< string >()->composing(), "the sqlite database that we are to use") ; variables_map vm; store(command_line_parser(argc, argv).options(desc) .run(), vm); if (vm.count("help")) { cout << desc; exit(0); } if (vm.count("database")!=1 ) { std::cerr << "Missing options. Call as: findClones --database <database-name>" << std::endl; exit(1); } database = vm["database"].as<string >(); cout << "database: " << database << std::endl; } catch(exception& e) { cout << e.what() << "\n"; } SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(database)->transaction(); try { tx->statement("create table IF NOT EXISTS largest_clones(row_number INTEGER PRIMARY KEY, function_id_A INTEGER, begin_index_within_function_A INTEGER, end_index_within_function_A INTEGER," "function_id_B INTEGER , begin_index_within_function_B INTEGER, end_index_within_function_B INTEGER )")->execute(); } catch(exception &ex) { cerr << "Exception Occurred: " << ex.what() << endl; } try { tx->statement("delete from largest_clones")->execute(); } catch(exception &ex) { cerr << "Exception Occurred: " << ex.what() << endl; } string filen = database; std::vector<std::pair <Element, Element> > listOfClonePairs; std::list<std::pair<int,int> > listOfFunctionClonePairs; int windowSize = 0; int stride = 0; try { windowSize = tx->statement("select window_size from run_parameters limit 1")->execute_int(); } catch (exception& ex) {cerr << "Exception Occurred: " << ex.what() << endl;} try { stride = tx->statement("select stride from run_parameters limit 1")->execute_int(); } catch (exception& ex) {cerr << "Exception Occurred: " << ex.what() << endl;} assert (windowSize != 0); assert (stride != 0); //Create set of clone pairs try{ std::string selectSeparateDatasets ="SELECT cluster, function_id, index_within_function, vectors_row from clusters ORDER BY cluster, function_id, index_within_function"; SqlDatabase::StatementPtr cmd = tx->statement(selectSeparateDatasets); int64_t thisClusterName=-1; std::vector<Element> thisCluster; for (SqlDatabase::Statement::iterator r=cmd->begin(); r!=cmd->end(); ++r) { Element cur_elem; int64_t cluster = r.get<int64_t>(0); cur_elem.function_id = r.get<int64_t>(1); cur_elem.index_within_function = r.get<int64_t>(2); cur_elem.last_index_within_function = cur_elem.index_within_function; cur_elem.vectors_row = r.get<int64_t>(3); // cur_elem.line = boost::lexical_cast<int> ( datasets.getstring(5) ); // cur_elem.offset = boost::lexical_cast<int> ( datasets.getstring(6) ); if( cluster == thisClusterName ) { for( std::vector<Element>::iterator iItr = thisCluster.begin(); iItr != thisCluster.end(); ++iItr ) { if( cur_elem.function_id < iItr->function_id) listOfFunctionClonePairs.push_back(std::pair<int,int>(cur_elem.function_id, iItr->function_id)); else listOfFunctionClonePairs.push_back(std::pair<int,int>( iItr->function_id, cur_elem.function_id )); if( cur_elem < *iItr ) { listOfClonePairs.push_back(std::pair<Element,Element>( cur_elem,*iItr ) ); } else { listOfClonePairs.push_back(std::pair<Element,Element>(*iItr,cur_elem ) ); } }; }else{ thisCluster.clear(); thisClusterName = cluster; } thisCluster.push_back( cur_elem ); } }catch(exception &ex) { cerr << "Exception Occured: " << ex.what() << endl; } listOfFunctionClonePairs.sort(); listOfFunctionClonePairs.unique(); for(std::list<std::pair<int,int> >::iterator iItr = listOfFunctionClonePairs.begin(); iItr != listOfFunctionClonePairs.end(); iItr++ ) { if(iItr->first != iItr->second) std::cout << iItr->second << " " << iItr->first << std::endl; } return 0; };
int main(int argc, char *argv[]) { std::ios::sync_with_stdio(); argv0 = argv[0]; { size_t slash = argv0.rfind('/'); argv0 = slash==std::string::npos ? argv0 : argv0.substr(slash+1); if (0==argv0.substr(0, 3).compare("lt-")) argv0 = argv0.substr(3); } Switches opt; int argno = 1; for (/*void*/; argno<argc && '-'==argv[argno][0]; ++argno) { if (!strcmp(argv[argno], "--")) { ++argno; break; } else if (!strcmp(argv[argno], "--help") || !strcmp(argv[argno], "-h")) { ::usage(0); } else if (!strncmp(argv[argno], "--entry=", 8)) { opt.entry_vas.insert(strtoull(argv[argno]+8, NULL, 0)); } else if (!strcmp(argv[argno], "--file=list") || !strcmp(argv[argno], "--files=list")) { opt.list_files = true; } else if (!strncmp(argv[argno], "--file=", 7) || !strncmp(argv[argno], "--files=", 8)) { std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true); for (size_t i=0; i<ids.size(); ++i) { const char *s = ids[i].c_str(); char *rest; errno = 0; int id = strtoul(s, &rest, 0); if (errno || rest==s || *rest) { std::cerr <<argv0 <<": invalid file ID: " <<ids[i] <<"\n"; exit(1); } opt.files.insert(id); } } else if (!strncmp(argv[argno], "--function=", 11) || !strncmp(argv[argno], "--functions=", 12)) { std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true); if (ids.size()==1 && isalpha(ids[0][0]) && ids[0].find_first_of('.')!=std::string::npos) { std::vector<std::string> words = StringUtility::split(".", ids[0]); if (words.size()!=2 || !SqlDatabase::is_valid_table_name(words[0]) || !SqlDatabase::is_valid_table_name(words[1])) { std::cerr <<argv0 <<": --function switch needs either IDs or a database TABLE.COLUMN\n"; exit(1); } opt.function_table = words[0]; opt.function_column = words[1]; } else { for (size_t i=0; i<ids.size(); ++i) { const char *s = ids[i].c_str(); char *rest; errno = 0; int id = strtoul(s, &rest, 0); if (errno || rest==s || *rest) { std::cerr <<argv0 <<": invalid function ID: " <<ids[i] <<"\n"; exit(1); } opt.functions.insert(id); } } } else if (!strncmp(argv[argno], "--first-fuzz=", 13)) { opt.first_fuzz = strtoul(argv[argno]+13, NULL, 0); } else if (!strncmp(argv[argno], "--name=", 7)) { opt.names.insert(argv[argno]+7); } else if (!strncmp(argv[argno], "--nfuzz=", 8)) { opt.nfuzz = strtoul(argv[argno]+8, NULL, 0); opt.nfuzz_set = true; } else if (!strncmp(argv[argno], "--size=", 7)) { opt.ninsns = strtoul(argv[argno]+7, NULL, 0); } else if (!strcmp(argv[argno], "--specimen=list") || !strcmp(argv[argno], "--specimens=list")) { opt.list_specimens = true; } else if (!strncmp(argv[argno], "--specimen=", 11) || !strncmp(argv[argno], "--specimens=", 12)) { std::vector<std::string> ids = StringUtility::split(",", strchr(argv[argno], '=')+1, (size_t)-1, true); for (size_t i=0; i<ids.size(); ++i) { const char *s = ids[i].c_str(); char *rest; errno = 0; int id = strtoul(s, &rest, 0); if (errno || rest==s || *rest) { std::cerr <<argv0 <<": invalid specimen ID: " <<ids[i] <<"\n"; exit(1); } opt.specimens.insert(id); } } else { std::cerr <<argv0 <<": unrecognized switch: " <<argv[argno] <<"\n" <<"see \"" <<argv0 <<" --help\" for usage info.\n"; exit(1); } } if (argno+1!=argc) ::usage(1); SqlDatabase::TransactionPtr tx = SqlDatabase::Connection::create(argv[argno++])->transaction(); // List the ID numbers and names for all specimen files if (opt.list_specimens) { SqlDatabase::Table<int, std::string> specimens; specimens.insert(tx->statement("select file.id, file.name" " from (select distinct specimen_id as id from semantic_functions) as specimen" " join semantic_files as file on specimen.id = file.id" " order by file.name")); specimens.headers("File ID", "Specimen Name"); specimens.print(std::cout); return 0; } // List the ID numbers and names for all files containing functions if (opt.list_files) { SqlDatabase::Table<int, std::string> files; files.insert(tx->statement("select file.id, file.name" " from (select distinct file_id as id from semantic_functions) as used" " join semantic_files as file on used.id = file.id" " order by file.name")); files.headers("File ID", "Binary File Name"); files.print(std::cout); return 0; } // Sanity checks if (!opt.functions.empty() && !opt.function_table.empty()) { std::cerr <<argv0 <<": --function=ID and --function=TABLE are mutually exclusive\n"; exit(1); } if (0==tx->statement("select count(*) from semantic_functions")->execute_int()) { std::cerr <<argv0 <<": database has no functions; nothing to test\n"; return 0; } if (0==tx->statement("select count(*) from semantic_inputvalues")->execute_int()) { std::cerr <<argv0 <<": database has no input groups; nothing to test\n"; return 0; } // Create table tmp_functions containing IDs for selected functions and their specimen IDs std::vector<std::string> constraints; if (!opt.entry_vas.empty()) constraints.push_back("func.entry_va " + SqlDatabase::in(opt.entry_vas)); if (!opt.names.empty()) constraints.push_back("func.name " + SqlDatabase::in_strings(opt.names, tx->driver())); if (!opt.specimens.empty()) constraints.push_back("func.specimen_id " + SqlDatabase::in(opt.specimens)); if (!opt.files.empty()) constraints.push_back("func.file_id " + SqlDatabase::in(opt.files)); if (!opt.functions.empty()) constraints.push_back("func.id " + SqlDatabase::in(opt.functions)); if (opt.ninsns>0) constraints.push_back("func.ninsns >= " + StringUtility::numberToString(opt.ninsns)); std::string sql1 = "select func.id, func.specimen_id from semantic_functions as func"; if (!opt.function_table.empty()) sql1 += " join "+opt.function_table+" as flist on func.id = flist."+opt.function_column; if (!constraints.empty()) sql1 += " where " + StringUtility::join(" and ", constraints); tx->execute("create temporary table tmp_functions as " + sql1); // Create table tmp_inputgroups containing IDs for selected input groups std::string sql2 = "select distinct igroup_id from semantic_inputvalues where igroup_id >= " + StringUtility::numberToString(opt.first_fuzz); if (opt.nfuzz_set) sql2 += " and igroup_id < " + StringUtility::numberToString(opt.first_fuzz+opt.nfuzz); tx->execute("create temporary table tmp_inputgroups as " + sql2); // Create tmp_pending as the cross product of functions and inputgroups except for those already tested tx->execute("create temporary table tmp_pending as" " select func.specimen_id as specimen_id, func.id as func_id, igroup.igroup_id as igroup_id" " from tmp_functions as func" " join tmp_inputgroups as igroup" " on igroup.igroup_id is not null" // "on" clause and "is not null" (rather than "true") for portability " except" " select func.specimen_id, func.id, fio.igroup_id" " from semantic_fio as fio" " join semantic_functions as func on fio.func_id=func.id"); SqlDatabase::StatementPtr stmt = tx->statement("select distinct specimen_id, func_id, igroup_id" " from tmp_pending" " order by specimen_id, igroup_id, func_id"); for (SqlDatabase::Statement::iterator row=stmt->begin(); row!=stmt->end(); ++row) std::cout <<row.get<int>(0) <<"\t" <<row.get<int>(1) <<"\t" <<row.get<int>(2) <<"\n"; // no need to commit, but if we change this in the future, be sure to add begin_command()/finish_command() return 0; }