Пример #1
0
void gt_alignment_show_generic(GtUchar *buffer,
                               bool downcase,
                               const GtAlignment *alignment,
                               FILE *fp,
                               unsigned int width,
                               const GtUchar *characters,
                               GtUchar wildcardshow)
{
  GtMultieop meop;
  GtUword idx_eop, idx_u = 0, idx_v = 0, meoplen, alignmentlength = 0,
          suffix_bits_used = 0, prefix_positive = 0, pol_size = 0,
          firstseedcolumn = GT_UWORD_MAX,
          lastseedcolumn = GT_UWORD_MAX;
  const GtUword max_history = 64;
  unsigned int pos = 0;
  GtUchar *topbuf = buffer, *midbuf = NULL, *lowbuf = NULL;
  GtWord prefix_positive_sum = 0;
  uint64_t suffix_bits = 0, set_mask = 0;

  if (alignment->pol_info != NULL)
  {
    pol_size = GT_MULT2(alignment->pol_info->cut_depth);
    set_mask = ((uint64_t) 1) << (max_history - 1);
  }
  gt_assert(alignment != NULL && (characters == NULL || !downcase));
  topbuf[width] = '\n';
  midbuf = topbuf + width + 1;
  midbuf[width] = '\n';
  lowbuf = midbuf + width + 1;
  lowbuf[width] = '\n';
  meoplen = gt_multieoplist_get_num_entries(alignment->eops);
  gt_assert(meoplen > 0);
  idx_eop = meoplen - 1;
  while (true)
  {
    meop = gt_multieoplist_get_entry(alignment->eops, idx_eop);
    switch (meop.type)
    {
      GtUword j;

      case Mismatch:
      case Match:
      case Replacement:
        for (j = 0; j < meop.steps && idx_u < alignment->ulen &&
                                      idx_v < alignment->vlen; j++)
        {
          GtUchar a = alignment->u[idx_u];
          GtUchar b = alignment->v[idx_v];
          bool is_match;

          if (characters != NULL)
          {
            topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a];
            is_match = (a == b && !ISSPECIAL(a)) ? true : false;
            lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b];
          } else
          {
            topbuf[pos] = a;
            is_match = ((downcase && tolower((int) a) == tolower((int) b)) ||
                        (!downcase && a == b)) ? true : false;
            lowbuf[pos] = b;
          }
          if (is_match)
          {
            if (alignment->useedoffset <= idx_u &&
                idx_u < alignment->useedoffset + alignment->seedlen)
            {
              if (alignment->seed_display)
              {
                midbuf[pos] = (GtUchar) '+';
              } else
              {
                midbuf[pos] = (GtUchar) MATCHSYMBOL;
              }
              if (firstseedcolumn == GT_UWORD_MAX)
              {
                firstseedcolumn = alignmentlength;
              }
              lastseedcolumn = alignmentlength;
            } else
            {
              midbuf[pos] = (GtUchar) MATCHSYMBOL;
            }
          } else
          {
            midbuf[pos] = (GtUchar) MISMATCHSYMBOL;
          }
          pos = gt_alignment_show_advance(pos,width,topbuf,fp);
          GT_UPDATE_POSITIVE_INFO(is_match);
          alignmentlength++;
          idx_u++;
          idx_v++;
        }
        break;
      case Deletion:
        for (j = 0; j < meop.steps && idx_u < alignment->ulen; j++)
        {
          GtUchar a = alignment->u[idx_u++];

          if (characters != NULL)
          {
            topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a];
          } else
          {
            topbuf[pos] = a;
          }
          midbuf[pos] = (GtUchar) MISMATCHSYMBOL;
          lowbuf[pos] = (GtUchar) GAPSYMBOL;
          pos = gt_alignment_show_advance(pos,width,topbuf,fp);
          GT_UPDATE_POSITIVE_INFO(false);
          alignmentlength++;
        }
        break;
      case Insertion:
        for (j = 0; j < meop.steps && idx_v < alignment->vlen; j++)
        {
          GtUchar b = alignment->v[idx_v++];

          topbuf[pos] = (GtUchar) GAPSYMBOL;
          midbuf[pos] = (GtUchar) MISMATCHSYMBOL;
          if (characters != NULL)
          {
            lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b];
          } else
          {
            lowbuf[pos] = b;
          }
          pos = gt_alignment_show_advance(pos,width,topbuf,fp);
          GT_UPDATE_POSITIVE_INFO(false);
          alignmentlength++;
        }
        break;
    }
    if (idx_eop > 0 && (idx_u < alignment->ulen || idx_v < alignment->vlen))
    {
      idx_eop--;
    } else
    {
      break;
    }
  }
  if (pos > 0)
  {
    topbuf[pos] = '\n';
    fwrite(topbuf,sizeof *topbuf,pos+1,fp);
    midbuf[pos] = '\n';
    fwrite(midbuf,sizeof *midbuf,pos+1,fp);
    lowbuf[pos] = '\n';
    fwrite(lowbuf,sizeof *lowbuf,pos+1,fp);
  }
  if (alignment->pol_info != NULL)
  {
    GtUword suffix_positive;
    GtWord suffix_positive_sum = 0;
    bool startpolished = false, endpolished = false;

    for (suffix_positive = 0; suffix_positive < suffix_bits_used;
         suffix_positive++)
    {
      suffix_positive_sum += ((suffix_bits & set_mask)
                                ? alignment->pol_info->match_score
                                : -alignment->pol_info->difference_score);
      if (suffix_positive_sum < 0)
      {
        break;
      }
      set_mask >>= 1;
    }
    gt_assert(prefix_positive <= alignmentlength &&
              prefix_positive <= alignmentlength);
    if (prefix_positive >= pol_size || prefix_positive == alignmentlength ||
        firstseedcolumn < pol_size)
    {
      startpolished = true;
    }
    if (suffix_positive >= pol_size || suffix_positive == alignmentlength ||
        (lastseedcolumn != GT_UWORD_MAX &&
        lastseedcolumn + pol_size > alignmentlength))
    {
      endpolished = true;
    }
    printf("# polishing(m=" GT_WD ",d=" GT_WD ",p=" GT_WU
           "): " GT_WU "/" GT_WU,
           alignment->pol_info->match_score,
           -alignment->pol_info->difference_score,
           pol_size,
           prefix_positive,
           suffix_positive);
    if (firstseedcolumn < pol_size)
    {
      printf(", seed_on_start");
    }
    if (lastseedcolumn + pol_size > alignmentlength)
    {
      printf(", seed_on_end");
    }
    if (alignment->withpolcheck)
    {
      printf("\n");
      gt_assert(startpolished && endpolished);
    } else
    {
      if (!startpolished)
      {
        printf(", start not polished");
      }
      if (!endpolished)
      {
        printf(", end not polished");
      }
      printf("\n");
    }
  }
Пример #2
0
void gt_eoplist_format_generic(FILE *fp,
                               const GtEoplist *eoplist,
                               GtEoplistReader *eoplist_reader,
                               bool distinguish_mismatch_match,
                               const GtUchar *characters,
                               GtUchar wildcardshow)
{
  GtCigarOp co;
  unsigned int pos = 0;
  GtUword idx_u = 0, idx_v = 0, alignmentlength = 0,
          firstseedcolumn = GT_UWORD_MAX;
  GtUchar *topbuf = eoplist_reader->outbuffer, *midbuf = NULL, *lowbuf = NULL;
#ifndef OUTSIDE_OF_GT
  uint64_t suffix_bits = 0, set_mask = 0;
  GtUword suffix_bits_used = 0, prefix_positive = 0, pol_size = 0,
          lastseedcolumn = GT_UWORD_MAX;
  const GtUword max_history = 64;
  GtWord prefix_positive_sum = 0;

  if (eoplist->pol_info != NULL)
  {
    pol_size = GT_MULT2(eoplist->pol_info->cut_depth);
    set_mask = ((uint64_t) 1) << (max_history - 1);
  }
#endif
  gt_assert(eoplist_reader != NULL);
  topbuf[eoplist_reader->width] = '\n';
  midbuf = topbuf + eoplist_reader->width + 1;
  midbuf[eoplist_reader->width] = '\n';
  lowbuf = midbuf + eoplist_reader->width + 1;
  lowbuf[eoplist_reader->width] = '\n';
  gt_eoplist_reader_reset(eoplist_reader,eoplist);
  if (distinguish_mismatch_match)
  {
    gt_eoplist_reader_distinguish_mismatch_match(eoplist_reader);
  }
  while (gt_eoplist_reader_next_cigar(&co,eoplist_reader))
  {
    switch (co.eoptype)
    {
      GtUword j;
      GtUchar cc_a, cc_b;

      case GtMatchOp:
      case GtMismatchOp:
        for (j = 0; j < co.iteration && idx_u < eoplist->ulen &&
                                        idx_v < eoplist->vlen; j++)
        {
          cc_a = eoplist->useq[idx_u];
          cc_b = eoplist->vseq[idx_v];
          bool is_match;

          if (characters != NULL)
          {
            topbuf[pos] = ISSPECIAL(cc_a) ? wildcardshow : characters[cc_a];
            lowbuf[pos] = ISSPECIAL(cc_b) ? wildcardshow : characters[cc_b];
            is_match = (cc_a == cc_b && !ISSPECIAL(cc_a)) ? true : false;
          } else
          {
            topbuf[pos] = cc_a;
            is_match = (cc_a == cc_b) ? true : false;
            lowbuf[pos] = cc_b;
          }
          if (is_match)
          {
            if (eoplist->useedoffset <= idx_u &&
                idx_u < eoplist->useedoffset + eoplist->seedlen)
            {
              if (eoplist->seed_display)
              {
                midbuf[pos] = (GtUchar) '+';
              } else
              {
                midbuf[pos] = (GtUchar) EOPLIST_MATCHSYMBOL;
              }
              if (firstseedcolumn == GT_UWORD_MAX)
              {
                firstseedcolumn = alignmentlength;
              }
#ifndef OUTSIDE_OF_GT
              lastseedcolumn = alignmentlength;
#endif
            } else
            {
              midbuf[pos] = (GtUchar) EOPLIST_MATCHSYMBOL;
            }
          } else
          {
            midbuf[pos] = (GtUchar) EOPLIST_MISMATCHSYMBOL;
          }
          pos = gt_eoplist_show_advance(pos,eoplist_reader->width,topbuf,fp);
          GT_UPDATE_POSITIVE_INFO(is_match);
          alignmentlength++;
          idx_u++;
          idx_v++;
        }
        break;
      case GtDeletionOp:
        for (j = 0; j < co.iteration && idx_u < eoplist->ulen; j++)
        {
          cc_a = eoplist->useq[idx_u++];
          if (characters != NULL)
          {
            topbuf[pos] = ISSPECIAL(cc_a) ? wildcardshow : characters[cc_a];
          } else
          {
            topbuf[pos] = cc_a;
          }
          midbuf[pos] = EOPLIST_MISMATCHSYMBOL;
          lowbuf[pos] = EOPLIST_GAPSYMBOL;
          pos = gt_eoplist_show_advance(pos,eoplist_reader->width,topbuf,fp);
          GT_UPDATE_POSITIVE_INFO(false);
          alignmentlength++;
        }
        break;
      case GtInsertionOp:
        for (j = 0; j < co.iteration && idx_v < eoplist->vlen; j++)
        {
          cc_b = eoplist->vseq[idx_v++];

          topbuf[pos] = EOPLIST_GAPSYMBOL;
          midbuf[pos] = EOPLIST_MISMATCHSYMBOL;
          if (characters != NULL)
          {
            lowbuf[pos] = ISSPECIAL(cc_b) ? wildcardshow : characters[cc_b];
          } else
          {
            lowbuf[pos] = cc_b;
          }
          pos = gt_eoplist_show_advance(pos,eoplist_reader->width,topbuf,fp);
          GT_UPDATE_POSITIVE_INFO(false);
          alignmentlength++;
        }
        break;
      default:
        fprintf(stderr,"file %s, line %d: illegal eoptype %d\n",
                       __FILE__,__LINE__,co.eoptype);
        exit(GT_EXIT_PROGRAMMING_ERROR);
    }
  }
  if (pos > 0)
  {
    topbuf[pos] = '\n';
    fwrite(topbuf,sizeof *topbuf,pos+1,fp);
    midbuf[pos] = '\n';
    fwrite(midbuf,sizeof *midbuf,pos+1,fp);
    lowbuf[pos] = '\n';
    fwrite(lowbuf,sizeof *lowbuf,pos+1,fp);
  }
#ifndef OUTSIDE_OF_GT
  if (eoplist->pol_info != NULL)
  {
    GtUword suffix_positive;
    GtWord suffix_positive_sum = 0;
    bool startpolished = false, endpolished = false;

    for (suffix_positive = 0; suffix_positive < suffix_bits_used;
         suffix_positive++)
    {
      suffix_positive_sum += ((suffix_bits & set_mask)
                                ? eoplist->pol_info->match_score
                                : -eoplist->pol_info->difference_score);
      if (suffix_positive_sum < 0)
      {
        break;
      }
      set_mask >>= 1;
    }
    gt_assert(prefix_positive <= alignmentlength);
    if (prefix_positive >= pol_size || prefix_positive == alignmentlength ||
        firstseedcolumn < pol_size)
    {
      startpolished = true;
    }
    if (suffix_positive >= pol_size || suffix_positive == alignmentlength ||
        (lastseedcolumn != GT_UWORD_MAX &&
         lastseedcolumn + pol_size > alignmentlength))
    {
      endpolished = true;
    }
    fprintf(fp, "# polishing(m=" GT_WD ",d=" GT_WD ",p=" GT_WU
            "): " GT_WU "/" GT_WU,
            eoplist->pol_info->match_score,
            -eoplist->pol_info->difference_score,
            pol_size,
            prefix_positive,
            suffix_positive);
    if (firstseedcolumn < pol_size)
    {
      fprintf(fp, ", seed_on_start");
    }
    if (lastseedcolumn + pol_size > alignmentlength)
    {
      fprintf(fp, ", seed_on_end");
    }
    if (eoplist->withpolcheck)
    {
      fprintf(fp, "\n");
      gt_assert(startpolished);
      gt_assert(endpolished);
    } else
    {
      if (!startpolished)
      {
        fprintf(fp, ", start not polished");
      }
      if (!endpolished)
      {
        fprintf(fp, ", end not polished");
      }
      fprintf(fp, "\n");
    }
  }