// Count how many tests are missing first_error information when it should be available. static void countMissingErrors(const SqlDatabase::TransactionPtr &tx, const Settings &settings) { std::vector<std::string> args; SqlDatabase::StatementPtr q = tx->statement("select count(*)" " from test_results test" " join attachments att on test.id = att.test_id" + sqlWhereClause(tx, settings, args) + " and" " test.first_error is null and" " test.status <> 'end' and" " att.name = 'Final output'"); sqlBindArgs(q, args); int n = q->execute_int(); std::cout <<n <<"\n"; }
static void callLSH(const SqlDatabase::TransactionPtr &tx, const std::string databaseName, double similarity_threshold, const string& Exec, int norm, size_t hash_function_size, size_t hash_table_count) { double distance = sqrt((1. - similarity_threshold) * 50.); double false_negative_rate = ( similarity_threshold != 1.0) ? 0.0100 : 0; vector<CloneRange> ranges = computeranges(distance, 50, 100000); int maxNumElementsInGroup = -1; int maxNumElementIdx = -1; // FIXME: We can't pass parameters to the exec'd process this way because the parent's SQL statements are // being executed in a transaction -- they won't be visible in the child. [Robb P. Matzke 2013-08-12] tx->execute("delete from detection_parameters"); tx->statement("insert into detection_parameters (similarity_threshold, false_negative_rate) values (?, ?)") ->bind(0, similarity_threshold) ->bind(1, false_negative_rate) ->execute(); map<size_t, int> groupSizes; std::cout << "Looking for the biggest group" << std::endl; for (size_t i = 0; i < ranges.size(); ++i) { std::string sql = std::string("select count(*) from vectors where sum_of_counts >= ?") + (ranges[i].high != -1 ? " and sum_of_counts <= ?" : ""); SqlDatabase::StatementPtr cmd = tx->statement(sql); cmd->bind(0, ranges[i].low); if (ranges[i].high != -1) cmd->bind(1, ranges[i].high); int numElementsInGroup = cmd->execute_int(); groupSizes[i] = numElementsInGroup; std::cerr << "The current group from " << ranges[i].low << " to " << ranges[i].high << " is of size " << numElementsInGroup << std::endl; if (numElementsInGroup > maxNumElementsInGroup) { maxNumElementsInGroup = numElementsInGroup; maxNumElementIdx = i; } } std::cout << "Biggest group found " << ranges[maxNumElementIdx].low << " " << ranges[maxNumElementIdx].high << std::endl; char tempDirName[] = "/tmp/paramdirXXXXXX"; char* mkdtempResult = mkdtemp(tempDirName); if (!mkdtempResult) { perror("mkdtemp: "); exit (1); } string paramFileName = string(tempDirName) + "/params"; paramFileName = "/tmp/lshparamdirE40hF1/params"; std::cout << "Number of groups :" << ranges.size() << std::endl; for (int i = 0; i < (int)ranges.size(); ++i) { size_t group = (i == 0) ? maxNumElementIdx : (i <= maxNumElementIdx) ? i - 1 : i; if (groupSizes[group] > 1) { std::cout << "Executing LSH code low " << ranges[group].low << " high " << ranges[group].high << " group " << group << " size " << groupSizes[group] << std::endl; if(norm == 3) { executeLSHCode(tx, databaseName, Exec, paramFileName, ranges[group]); } else { executeLSHCodeLLNL(tx, databaseName, Exec, paramFileName, ranges[group], norm, similarity_threshold, false_negative_rate, groupSizes[group]); } } } unlink(paramFileName.c_str()); rmdir(tempDirName); }