Exemplo n.º 1
0
GtUword gt_alignment_eval_generic(bool mapped,bool downcase,
                                  const GtAlignment *alignment)
{
  GtUword i, j, idx_u = 0, idx_v = 0, sumcost = 0, meoplen;
  GtMultieop meop;

  gt_assert(alignment != NULL && (!mapped || !downcase));
#ifndef NDEBUG
  gt_assert(gt_alignment_is_valid(alignment));
#endif

  meoplen = gt_multieoplist_get_num_entries(alignment->eops);
  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop.type) {
      case Mismatch:
        sumcost += meop.steps;
        idx_u += meop.steps;
        idx_v += meop.steps;
        break;
      case Match:
      case Replacement:
        for (j = 0; j < meop.steps; j++) {
          GtUchar a = alignment->u[idx_u],
                  b = alignment->v[idx_v];
          if (mapped)
          {
            if (ISSPECIAL(a) || ISSPECIAL(b) || a != b)
            {
              sumcost++;
            }
          } else
          {
            if (downcase)
            {
              a = tolower((int) a);
              b = tolower((int) b);
            }
            if (a != b)
            {
              sumcost++;
            }
          }
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        sumcost += meop.steps;
        idx_u += meop.steps;
        break;
      case Insertion:
        sumcost += meop.steps;
        idx_v += meop.steps;
        break;
    }
  }
  return sumcost;
}
Exemplo n.º 2
0
GtWord gt_alignment_eval_with_score(const GtAlignment *alignment,
                                  GtWord matchscore,
                                  GtWord mismatchscore,
                                  GtWord gapscore)
{
  GtUword i, j, idx_u = 0, idx_v = 0, meoplen;
  GtWord sumscore = 0;
  GtMultieop *meop;

  gt_assert(alignment != NULL);
  gt_assert(gt_alignment_is_valid(alignment));

  meoplen = gt_multieoplist_get_length(alignment->eops);

  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type) {
      case Mismatch:
      case Match:
      case Replacement:
        for (j = 0; j < meop->steps; j++) {
          if (alignment->u[idx_u] == alignment->v[idx_v] &&
              ISNOTSPECIAL(alignment->u[idx_u])) {
            sumscore += matchscore;
          }
          else {
            sumscore += mismatchscore;
          }
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        sumscore += gapscore * meop->steps;
        idx_u += meop->steps;
        break;
      case Insertion:
        sumscore += gapscore * meop->steps;
        idx_v += meop->steps;
        break;
    }
  }
  return sumscore;
}
Exemplo n.º 3
0
GtUword gt_alignment_eval(const GtAlignment *alignment)
{
  GtUword i, j, idx_u = 0, idx_v = 0, sumcost = 0, meoplen;
  GtMultieop *meop;

  gt_assert(alignment != NULL);
  gt_assert(gt_alignment_is_valid(alignment));

  meoplen = gt_multieoplist_get_length(alignment->eops);
  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type) {
      case Mismatch:
        for (j = 0; j < meop->steps; j++) {
          sumcost++;
          idx_u++;
          idx_v++;
        }
        break;
      case Match:
      case Replacement:
        for (j = 0; j < meop->steps; j++) {
          if (tolower((int) alignment->u[idx_u]) !=
              tolower((int) alignment->v[idx_v])) {
            sumcost++;
          }
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        sumcost += meop->steps;
        idx_u += meop->steps;
        break;
      case Insertion:
        sumcost += meop->steps;
        idx_v += meop->steps;
        break;
    }
  }
  return sumcost;
}
Exemplo n.º 4
0
void gt_alignment_show_with_mapped_chars(const GtAlignment *alignment,
                                         const GtUchar *characters,
                                         GtUchar wildcardshow,
                                         FILE *fp)
{
  GtUword i, j, idx_u, idx_v, meoplen;
  GtMultieop *meop;

  gt_assert(alignment);
  gt_assert(gt_alignment_is_valid(alignment));

  meoplen = gt_multieoplist_get_length(alignment->eops);
  /* output first line */
  idx_u = 0;
  for (i = meoplen; i > 0; i--)
  {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type)
    {
      case Mismatch:
      case Match:
      case Replacement:
      case Deletion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(ISSPECIAL(alignment->u[idx_u]) ?
                    (int) wildcardshow :
                    (int) characters[alignment->u[idx_u]], fp);
          idx_u++;
        }
        break;
      case Insertion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(GAPSYMBOL, fp);
        }
        break;
    }
  }
  gt_xfputc('\n', fp);
  /* output middle line */
  idx_u = idx_v = 0;
  for (i = meoplen; i > 0; i--)
  {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type)
    {
      case Mismatch:
      case Match:
      case Replacement:
        for (j = 0; j < meop->steps; j++)
        {
          if (alignment->u[idx_u] == alignment->v[idx_v] &&
              ISNOTSPECIAL(alignment->u[idx_u]))
          {
            gt_xfputc(MATCHSYMBOL, fp);
          } else
          {
            gt_xfputc(MISMATCHSYMBOL, fp);
          }
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(MISMATCHSYMBOL, fp);
          idx_u++;
        }
        break;
      case Insertion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(MISMATCHSYMBOL, fp);
          idx_v++;
        }
        break;
    }
  }
  gt_xfputc('\n', fp);
  /* ouput last line */
  idx_v = 0;
  for (i = meoplen; i > 0; i--)
  {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type)
    {
      case Mismatch:
      case Match:
      case Replacement:
      case Insertion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(ISSPECIAL(alignment->v[idx_v]) ?
                    (int) wildcardshow :
                    (int) characters[alignment->v[idx_v]], fp);
          idx_v++;
        }
        break;
      case Deletion:
        for (j = 0; j < meop->steps; j++)
        {
          gt_xfputc(GAPSYMBOL, fp);
        }
        break;
    }
  }
  gt_xfputc('\n', fp);
}
Exemplo n.º 5
0
/* XXX: add width parameter and format the GtAlignment accordingly */
void gt_alignment_show(const GtAlignment *alignment, FILE *fp)
{
  GtUword i, j, idx_u, idx_v, meoplen;
  GtMultieop *meop;

  gt_assert(alignment);
  gt_assert(gt_alignment_is_valid(alignment));

  meoplen = gt_multieoplist_get_length(alignment->eops);
  /* output first line */
  idx_u = 0;
  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type) {
      case Mismatch:
      case Match:
      case Replacement:
      case Deletion:
        for (j = 0; j < meop->steps; j++)
          gt_xfputc((int) alignment->u[idx_u++], fp);
        break;
      case Insertion:
        for (j = 0; j < meop->steps; j++)
          gt_xfputc(GAPSYMBOL, fp);
        break;
    }
  }
  gt_xfputc('\n', fp);
  /* output middle line */
  idx_u = idx_v = 0;
  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type) {
      case Mismatch:
      case Match:
      case Replacement:
        for (j = 0; j < meop->steps; j++) {
          if (tolower((int) alignment->u[idx_u++]) ==
              tolower((int) alignment->v[idx_v++]))
            gt_xfputc(MATCHSYMBOL, fp);
          else
            gt_xfputc(MISMATCHSYMBOL, fp);
        }
        break;
      case Deletion:
        for (j = 0; j < meop->steps; j++) {
          gt_xfputc(MISMATCHSYMBOL, fp);
          idx_u++;
        }
        break;
      case Insertion:
        for (j = 0; j < meop->steps; j++) {
          gt_xfputc(MISMATCHSYMBOL, fp);
          idx_v++;
        }
        break;
    }
  }
  gt_xfputc('\n', fp);
  /* ouput last line */
  idx_v = 0;
  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop->type) {
      case Mismatch:
      case Match:
      case Replacement:
      case Insertion:
        for (j = 0; j < meop->steps; j++)
          gt_xfputc((int) alignment->v[idx_v++], fp);
        break;
      case Deletion:
        for (j = 0; j < meop->steps; j++)
          gt_xfputc(GAPSYMBOL, fp);
        break;
    }
  }
  gt_xfputc('\n', fp);
}
Exemplo n.º 6
0
void gt_alignment_show_generic(GtUchar *buffer,
                               bool downcase,
                               const GtAlignment *alignment,
                               FILE *fp,
                               unsigned int width,
                               const GtUchar *characters,
                               GtUchar wildcardshow)
{
  GtMultieop meop;
  GtUword idx_eop, idx_u = 0, idx_v = 0, meoplen, alignmentlength = 0,
          suffix_bits_used = 0, prefix_positive = 0, pol_size = 0,
          firstseedcolumn = GT_UWORD_MAX,
          lastseedcolumn = GT_UWORD_MAX;
  const GtUword max_history = 64;
  unsigned int pos = 0;
  GtUchar *topbuf = buffer, *midbuf = NULL, *lowbuf = NULL;
  GtWord prefix_positive_sum = 0;
  uint64_t suffix_bits = 0, set_mask = 0;

  if (alignment->pol_info != NULL)
  {
    pol_size = GT_MULT2(alignment->pol_info->cut_depth);
    set_mask = ((uint64_t) 1) << (max_history - 1);
  }
  gt_assert(alignment != NULL && (characters == NULL || !downcase));
  topbuf[width] = '\n';
  midbuf = topbuf + width + 1;
  midbuf[width] = '\n';
  lowbuf = midbuf + width + 1;
  lowbuf[width] = '\n';
  meoplen = gt_multieoplist_get_num_entries(alignment->eops);
  gt_assert(meoplen > 0);
  idx_eop = meoplen - 1;
  while (true)
  {
    meop = gt_multieoplist_get_entry(alignment->eops, idx_eop);
    switch (meop.type)
    {
      GtUword j;

      case Mismatch:
      case Match:
      case Replacement:
        for (j = 0; j < meop.steps && idx_u < alignment->ulen &&
                                      idx_v < alignment->vlen; j++)
        {
          GtUchar a = alignment->u[idx_u];
          GtUchar b = alignment->v[idx_v];
          bool is_match;

          if (characters != NULL)
          {
            topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a];
            is_match = (a == b && !ISSPECIAL(a)) ? true : false;
            lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b];
          } else
          {
            topbuf[pos] = a;
            is_match = ((downcase && tolower((int) a) == tolower((int) b)) ||
                        (!downcase && a == b)) ? true : false;
            lowbuf[pos] = b;
          }
          if (is_match)
          {
            if (alignment->useedoffset <= idx_u &&
                idx_u < alignment->useedoffset + alignment->seedlen)
            {
              if (alignment->seed_display)
              {
                midbuf[pos] = (GtUchar) '+';
              } else
              {
                midbuf[pos] = (GtUchar) MATCHSYMBOL;
              }
              if (firstseedcolumn == GT_UWORD_MAX)
              {
                firstseedcolumn = alignmentlength;
              }
              lastseedcolumn = alignmentlength;
            } else
            {
              midbuf[pos] = (GtUchar) MATCHSYMBOL;
            }
          } else
          {
            midbuf[pos] = (GtUchar) MISMATCHSYMBOL;
          }
          pos = gt_alignment_show_advance(pos,width,topbuf,fp);
          GT_UPDATE_POSITIVE_INFO(is_match);
          alignmentlength++;
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        for (j = 0; j < meop.steps && idx_u < alignment->ulen; j++)
        {
          GtUchar a = alignment->u[idx_u++];

          if (characters != NULL)
          {
            topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a];
          } else
          {
            topbuf[pos] = a;
          }
          midbuf[pos] = (GtUchar) MISMATCHSYMBOL;
          lowbuf[pos] = (GtUchar) GAPSYMBOL;
          pos = gt_alignment_show_advance(pos,width,topbuf,fp);
          GT_UPDATE_POSITIVE_INFO(false);
          alignmentlength++;
        }
        break;
      case Insertion:
        for (j = 0; j < meop.steps && idx_v < alignment->vlen; j++)
        {
          GtUchar b = alignment->v[idx_v++];

          topbuf[pos] = (GtUchar) GAPSYMBOL;
          midbuf[pos] = (GtUchar) MISMATCHSYMBOL;
          if (characters != NULL)
          {
            lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b];
          } else
          {
            lowbuf[pos] = b;
          }
          pos = gt_alignment_show_advance(pos,width,topbuf,fp);
          GT_UPDATE_POSITIVE_INFO(false);
          alignmentlength++;
        }
        break;
    }
    if (idx_eop > 0 && (idx_u < alignment->ulen || idx_v < alignment->vlen))
    {
      idx_eop--;
    } else
    {
      break;
    }
  }
  if (pos > 0)
  {
    topbuf[pos] = '\n';
    fwrite(topbuf,sizeof *topbuf,pos+1,fp);
    midbuf[pos] = '\n';
    fwrite(midbuf,sizeof *midbuf,pos+1,fp);
    lowbuf[pos] = '\n';
    fwrite(lowbuf,sizeof *lowbuf,pos+1,fp);
  }
  if (alignment->pol_info != NULL)
  {
    GtUword suffix_positive;
    GtWord suffix_positive_sum = 0;
    bool startpolished = false, endpolished = false;

    for (suffix_positive = 0; suffix_positive < suffix_bits_used;
         suffix_positive++)
    {
      suffix_positive_sum += ((suffix_bits & set_mask)
                                ? alignment->pol_info->match_score
                                : -alignment->pol_info->difference_score);
      if (suffix_positive_sum < 0)
      {
        break;
      }
      set_mask >>= 1;
    }
    gt_assert(prefix_positive <= alignmentlength &&
              prefix_positive <= alignmentlength);
    if (prefix_positive >= pol_size || prefix_positive == alignmentlength ||
        firstseedcolumn < pol_size)
    {
      startpolished = true;
    }
    if (suffix_positive >= pol_size || suffix_positive == alignmentlength ||
        (lastseedcolumn != GT_UWORD_MAX &&
        lastseedcolumn + pol_size > alignmentlength))
    {
      endpolished = true;
    }
    printf("# polishing(m=" GT_WD ",d=" GT_WD ",p=" GT_WU
           "): " GT_WU "/" GT_WU,
           alignment->pol_info->match_score,
           -alignment->pol_info->difference_score,
           pol_size,
           prefix_positive,
           suffix_positive);
    if (firstseedcolumn < pol_size)
    {
      printf(", seed_on_start");
    }
    if (lastseedcolumn + pol_size > alignmentlength)
    {
      printf(", seed_on_end");
    }
    if (alignment->withpolcheck)
    {
      printf("\n");
      gt_assert(startpolished && endpolished);
    } else
    {
      if (!startpolished)
      {
        printf(", start not polished");
      }
      if (!endpolished)
      {
        printf(", end not polished");
      }
      printf("\n");
    }
  }
Exemplo n.º 7
0
static GtWord gt_alignment_eval_generic_with_affine_score(
                                               bool mapped,
                                               bool downcase,
                                               const GtUchar *characters,
                                               const GtAlignment *alignment,
                                               const GtScoreMatrix *scorematrix,
                                               GtWord matchscore,
                                               GtWord mismatchscore,
                                               GtWord gap_opening,
                                               GtWord gap_extension)
{
  GtUword i, j, idx_u = 0, idx_v = 0, meoplen;
  GtWord sumscore = 0;
  GtMultieop meop;
  AlignmentEoptype next_meop_type = Insertion + 1;

  gt_assert(alignment != NULL && (!mapped || !downcase));
  if (gt_alignment_get_length(alignment) == 0)
    return 0;
#ifndef NDEBUG
  gt_assert(gt_alignment_is_valid(alignment));
#endif

  meoplen = gt_multieoplist_get_num_entries(alignment->eops);
  for (i = meoplen; i > 0; i--) {
    meop = gt_multieoplist_get_entry(alignment->eops, i - 1);
    switch (meop.type) {
      case Mismatch:
      case Match:
      case Replacement:
        for (j = 0; j < meop.steps; j++) {
          GtUchar a = alignment->u[idx_u],
                  b = alignment->v[idx_v];
          if (mapped)
          {
            if (scorematrix != NULL)
            {
              sumscore += gt_score_matrix_get_score(scorematrix, a, b);
            } else
            {
              if (ISSPECIAL(a) || ISSPECIAL(b) ||
                  characters[a] != characters[b])
              {
                sumscore += mismatchscore;
              }
              else
                sumscore += matchscore;
            }
          } else
          {
            if (downcase)
            {
              a = tolower((int) a);
              b = tolower((int) b);
            }
            sumscore += (a != b) ? mismatchscore : matchscore;
          }
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        if (i < meoplen && next_meop_type == Deletion)
        {
          sumscore += gap_extension * meop.steps;
        } else
        {
          sumscore += gap_extension * meop.steps + gap_opening;
        }
        idx_u += meop.steps;
        break;
      case Insertion:
         if (i < meoplen && next_meop_type == Insertion)
        {
          sumscore += gap_extension * meop.steps;
        } else
        {
          sumscore += gap_extension * meop.steps + gap_opening;
        }
        idx_v += meop.steps;
        break;
    }
    next_meop_type = meop.type;
  }
  return sumscore;
}