unsigned int LevenshteinDistance::static_distance(const Sequence &sequence1, const Sequence &sequence2, const unsigned int cost_sub, const unsigned int cost_indel) {
  size_t n = sequence1.length();
  size_t m = sequence2.length();

  unsigned int distances[n+1][m+1];

  // Distance from empty string to the position
  for (size_t i = 0; i <= n; i++) {
    distances[i][0] = i * cost_indel;
  }
  for (size_t j = 0; j <= m; j++) {
    distances[0][j] = j * cost_indel;
  }

  for (size_t i = 1; i <= n; i++) {
    for (size_t j = 1; j <= m; j++) {
      unsigned int cost = 0;

      if (sequence1.at(i-1) != sequence2.at(j-1)) {
        cost = cost_sub;
      }

      distances[i][j] = std::min(std::min(distances[i-1][j-1] + cost, distances[i][j-1] + cost_indel), distances[i-1][j] + cost_indel);
    }
  }

  unsigned int min_distance = distances[n][m];

  return (min_distance);

}
Exemplo n.º 2
0
double DeterminatorInterpolated::getPercentile(const Sequence &sequence, int percentile) const {
    size_t size = sequence.size();
    double coeff = 100.0 / size;

    double calcIdx = percentile / coeff - .5;

    if (calcIdx <= 0) return sequence.front();
    if (calcIdx >= size - 1) return sequence.back();

    size_t idx0 = (size_t) floor(calcIdx);
    int v0 = sequence.at(idx0);
    int v1 = sequence.at(idx0 + 1);

    double p0 = 100.0 / size * (idx0);

    double slope = size / 100.0 * (percentile - p0);

    return v0 + (v1 - v0) * slope;
}
bool LevenshteinDistance::is_seq_insertable(const std::vector<Sequence> &seqs, const Sequence &sequence, const size_t n, const unsigned int min_dist) {
  size_t n_elements = seqs.size();

  size_t m = sequence.length();

  unsigned int distances[n+1][m+1];

  for (size_t i = 0; i <= n; i++) {
    distances[i][0] = i * cost_indel_;
  }
  
  for (size_t j = 0; j <= m; j++) {
    distances[0][j] = j * cost_indel_;
  }

  bool seq_is_insertable = true;

  for (size_t seq1_index = 0; seq1_index < n_elements && seq_is_insertable; seq1_index++) {
    Sequence sequence1 = seqs.at(seq1_index);

    for (size_t i = 1; i <= n; i++) {
      for (size_t j = 1; j <= m; j++) {
        unsigned int cost = 0;

        if (sequence1.at(i-1) != sequence.at(j-1)) {
          cost = cost_sub_;
        }

        distances[i][j] = std::min(std::min(distances[i-1][j-1] + cost, distances[i][j-1] + cost_indel_), distances[i-1][j] + cost_indel_);

      }
    }

    unsigned int min_distance  = distances[n][m];

    if (min_distance < min_dist)
      seq_is_insertable = false;
  }

  return seq_is_insertable;

}
unsigned int LevenshteinDistance::static_min_seq_distance(const std::vector< Sequence > &seqs, const Sequence &sequence, const size_t n, const unsigned int cost_sub, const unsigned int cost_indel) {

  size_t n_elements = seqs.size();

  size_t m = sequence.length();

  unsigned int global_min_dist = UINT_MAX;
  
  unsigned int distances[n+1][m+1];

  for (size_t i = 0; i <= n; i++) {
    distances[i][0] = i * cost_indel;
  }
  for (size_t j = 0; j <= m; j++) {
    distances[0][j] = j * cost_indel;
  }

  for (size_t seq1_index = 0; seq1_index < n_elements; seq1_index++) {
    Sequence sequence1 = seqs.at(seq1_index);

    for (size_t i = 1; i <= n; i++) {
      for (size_t j = 1; j <= m; j++) {
        unsigned int cost = cost_sub;

        if (sequence1.at(i-1) != sequence.at(j-1)) {
          cost = cost_sub;
        }

        distances[i][j] = std::min(std::min(distances[i-1][j-1] + cost, distances[i][j-1] + cost_indel), distances[i-1][j] + cost_indel);

      }
    }

    unsigned int min_distance  = distances[n][m];

    if (min_distance < global_min_dist)
      global_min_dist = min_distance;

  }

  return(global_min_dist);
}