unsigned int LevenshteinDistance::static_distance(const Sequence &sequence1, const Sequence &sequence2, const unsigned int cost_sub, const unsigned int cost_indel) { size_t n = sequence1.length(); size_t m = sequence2.length(); unsigned int distances[n+1][m+1]; // Distance from empty string to the position for (size_t i = 0; i <= n; i++) { distances[i][0] = i * cost_indel; } for (size_t j = 0; j <= m; j++) { distances[0][j] = j * cost_indel; } for (size_t i = 1; i <= n; i++) { for (size_t j = 1; j <= m; j++) { unsigned int cost = 0; if (sequence1.at(i-1) != sequence2.at(j-1)) { cost = cost_sub; } distances[i][j] = std::min(std::min(distances[i-1][j-1] + cost, distances[i][j-1] + cost_indel), distances[i-1][j] + cost_indel); } } unsigned int min_distance = distances[n][m]; return (min_distance); }
double DeterminatorInterpolated::getPercentile(const Sequence &sequence, int percentile) const { size_t size = sequence.size(); double coeff = 100.0 / size; double calcIdx = percentile / coeff - .5; if (calcIdx <= 0) return sequence.front(); if (calcIdx >= size - 1) return sequence.back(); size_t idx0 = (size_t) floor(calcIdx); int v0 = sequence.at(idx0); int v1 = sequence.at(idx0 + 1); double p0 = 100.0 / size * (idx0); double slope = size / 100.0 * (percentile - p0); return v0 + (v1 - v0) * slope; }
bool LevenshteinDistance::is_seq_insertable(const std::vector<Sequence> &seqs, const Sequence &sequence, const size_t n, const unsigned int min_dist) { size_t n_elements = seqs.size(); size_t m = sequence.length(); unsigned int distances[n+1][m+1]; for (size_t i = 0; i <= n; i++) { distances[i][0] = i * cost_indel_; } for (size_t j = 0; j <= m; j++) { distances[0][j] = j * cost_indel_; } bool seq_is_insertable = true; for (size_t seq1_index = 0; seq1_index < n_elements && seq_is_insertable; seq1_index++) { Sequence sequence1 = seqs.at(seq1_index); for (size_t i = 1; i <= n; i++) { for (size_t j = 1; j <= m; j++) { unsigned int cost = 0; if (sequence1.at(i-1) != sequence.at(j-1)) { cost = cost_sub_; } distances[i][j] = std::min(std::min(distances[i-1][j-1] + cost, distances[i][j-1] + cost_indel_), distances[i-1][j] + cost_indel_); } } unsigned int min_distance = distances[n][m]; if (min_distance < min_dist) seq_is_insertable = false; } return seq_is_insertable; }
unsigned int LevenshteinDistance::static_min_seq_distance(const std::vector< Sequence > &seqs, const Sequence &sequence, const size_t n, const unsigned int cost_sub, const unsigned int cost_indel) { size_t n_elements = seqs.size(); size_t m = sequence.length(); unsigned int global_min_dist = UINT_MAX; unsigned int distances[n+1][m+1]; for (size_t i = 0; i <= n; i++) { distances[i][0] = i * cost_indel; } for (size_t j = 0; j <= m; j++) { distances[0][j] = j * cost_indel; } for (size_t seq1_index = 0; seq1_index < n_elements; seq1_index++) { Sequence sequence1 = seqs.at(seq1_index); for (size_t i = 1; i <= n; i++) { for (size_t j = 1; j <= m; j++) { unsigned int cost = cost_sub; if (sequence1.at(i-1) != sequence.at(j-1)) { cost = cost_sub; } distances[i][j] = std::min(std::min(distances[i-1][j-1] + cost, distances[i][j-1] + cost_indel), distances[i-1][j] + cost_indel); } } unsigned int min_distance = distances[n][m]; if (min_distance < global_min_dist) global_min_dist = min_distance; } return(global_min_dist); }