Example #1
0
GtEoplist *gt_eoplist_new_from_cigar(const char *cigarstring,GtUword length)
{
  const char *cptr;
  GtUword iteration = 0;
  GtEoplist *eoplist = gt_eoplist_new();

  for (cptr = cigarstring; cptr < cigarstring + length; cptr++)
  {
    if (isdigit(*cptr))
    {
      iteration = iteration * 10 + (GtUword) (*cptr - '0');
    } else
    {
      GtUword idx;

      switch (*cptr)
      {
        case DELETION_CHAR:
          for (idx = 0; idx < iteration; idx++)
          {
            gt_eoplist_deletion_add(eoplist);
          }
          break;
        case INSERTION_CHAR:
          for (idx = 0; idx < iteration; idx++)
          {
            gt_eoplist_insertion_add(eoplist);
          }
          break;
        case MATCH_CHAR:
        case REPLACEMENT_CHAR:
          gt_eoplist_match_add(eoplist,iteration);
          break;
        case MISMATCH_CHAR:
          for (idx = 0; idx < iteration; idx++)
          {
            gt_eoplist_mismatch_add(eoplist);
          }
          break;
        default:
          fprintf(stderr,"file %s, line %d: illegal symbol '%c' "
                         "in cigar string\n",__FILE__,__LINE__,*cptr);
          exit(GT_EXIT_PROGRAMMING_ERROR);
      }
      iteration = 0;
    }
  }
  return eoplist;
}
void gt_front_trace2eoplist_full_front_directed(GtEoplist *eoplist,
                                                const GtFrontTrace *front_trace,
                                                GtUword distance,
                                                const GtUchar *useq,
                                                GtUword ulen,
                                                const GtUchar *vseq,
                                                GtUword vlen)
{
  const GtBackreftable *basefront, *current;
  GtUword firstindex;
  GtWord diagonal = (GtWord) vlen - (GtWord) ulen;
  uint32_t row;
  uint8_t preferred_eop = FT_EOP_MISMATCH;

  gt_assert(front_trace != NULL &&
            front_trace->backref_nextfree >= 2 * distance + 1);
  basefront = front_trace->backref_table + front_trace->backref_nextfree
                                         - (2 * distance + 1);
  current = basefront + distance + diagonal;
  firstindex = gt_eoplist_length(eoplist);
  gt_assert(ulen <= (GtUword) UINT32_MAX);
  row = ulen;
  while (distance > 0)
  {
    GtUword nextrowadd;

    if (eoplist != NULL)
    {
      if (current->lcs > 0)
      {
        gt_eoplist_match_add(eoplist,current->lcs);
      }
    } else
    {
      gt_check_diagonal_run(useq, vseq, diagonal, row - current->lcs, row);
    }
    if (current->bits & preferred_eop)
    {
      if (preferred_eop == FT_EOP_MISMATCH)
      {
        nextrowadd = 1;
      } else
      {
        if (preferred_eop == FT_EOP_INSERTION)
        {
          gt_assert(-(GtWord) ulen < diagonal);
          diagonal--;
          nextrowadd = 0;
        } else
        {
          gt_assert(preferred_eop == FT_EOP_DELETION);
          gt_assert(diagonal < (GtWord) vlen);
          diagonal++;
          nextrowadd = 1;
        }
      }
    } else
    {
      if (current->bits & FT_EOP_MISMATCH)
      {
        preferred_eop = FT_EOP_MISMATCH;
        nextrowadd = 1;
      } else
      {
        if (current->bits & FT_EOP_INSERTION)
        {
          gt_assert(-(GtWord) ulen < diagonal);
          diagonal--;
          preferred_eop = FT_EOP_INSERTION;
          nextrowadd = 0;
        } else
        {
          gt_assert(current->bits & FT_EOP_DELETION);
          gt_assert(diagonal < (GtWord) vlen);
          diagonal++;
          preferred_eop = FT_EOP_DELETION;
          nextrowadd = 1;
        }
      }
    }
    if (eoplist != NULL)
    {
      if (preferred_eop == FT_EOP_DELETION)
      {
        gt_eoplist_deletion_add(eoplist);
      } else
      {
        if (preferred_eop == FT_EOP_INSERTION)
        {
          gt_eoplist_insertion_add(eoplist);
        } else
        {
          gt_eoplist_mismatch_add(eoplist);
        }
      }
    }
    distance--;
    basefront -= (2 * distance + 1);
    gt_assert(basefront >= front_trace->backref_table);
    gt_assert(row >= current->lcs + nextrowadd);
    row -= current->lcs + nextrowadd;
    current = basefront + distance + diagonal;
  }
  gt_assert(basefront == front_trace->backref_table && current->bits == 0);
  if (eoplist != NULL)
  {
    if (current->lcs > 0)
    {
      gt_eoplist_match_add(eoplist,current->lcs);
    }
    gt_eoplist_reverse_end(eoplist,firstindex);
  }
}
static void gt_front_trace_backtracepath2eoplist(GtEoplist *eoplist,
                                                 unsigned int lastlcs,
                                                 const GtBacktraceFrontpath
                                                   *backtracepath,
                                                GtUword elementsinbacktracepath,
                                                GT_UNUSED GtUword ulen,
                                                GT_UNUSED GtUword vlen)
{
  GtUword idx, deletions = 0, insertions = 0, mismatches = 0, matches = 0;

  if (lastlcs > 0)
  {
    gt_eoplist_match_add(eoplist,lastlcs);
    matches += lastlcs;
  }
  gt_assert(eoplist != NULL);
  for (idx = 0; idx < elementsinbacktracepath; idx++)
  {
    if (backtracepath[idx].eopcode == backtracepath_deletion)
    {
      gt_eoplist_deletion_add(eoplist);
      deletions++;
    } else
    {
      if (backtracepath[idx].eopcode == backtracepath_insertion)
      {
        gt_eoplist_insertion_add(eoplist);
        insertions++;
      } else
      {
        gt_eoplist_mismatch_add(eoplist);
        mismatches++;
      }
    }
    if (backtracepath[idx].lcs > 0)
    {
      gt_eoplist_match_add(eoplist,backtracepath[idx].lcs);
      matches += backtracepath[idx].lcs;
    }
  }
  /*
  if (matches + mismatches + deletions != ulen)
  {
    fprintf(stderr,
            "matches=" GT_WU ",mismatches=" GT_WU ",deletions=" GT_WU ","
            "sum=" GT_WU " != " GT_WU " = ulen\n",
             matches,mismatches,deletions,
             matches+mismatches+deletions,
             ulen);
  }
  if (matches + mismatches + insertions != vlen)
  {
    fprintf(stderr,
            "matches=" GT_WU ",mismatches=" GT_WU ",insertions=" GT_WU ","
            "sum=" GT_WU " " != " GT_WU " = vlen\n",
             matches,mismatches,insertions,
             matches+mismatches+insertions,
             vlen);
  }
  */
}
static void front_trace2eoplist_directed(GtEoplist *eoplist,
                                         const GtFrontTrace *front_trace,
                                         const GtUchar *useq,
                                         GT_UNUSED GtUword ulen,
                                         const GtUchar *vseq,
                                         GT_UNUSED GtUword vlen,
                                         const GtFtPolished_point *pp)
{
  GtUword distance, localoffset, globaloffset, remainingvalidfronts,
          totalrunlength = 0, trimleft;
  GtWord diagonal;
  unsigned int row, lcs;
  uint8_t trace, preferred_eop = FT_EOP_MISMATCH;

  gt_assert(front_trace != NULL && front_trace->gen_nextfree > 0 && pp != NULL);
  localoffset = polished_point2offset(front_trace,pp);
  remainingvalidfronts = valid_total_fronts(front_trace->gen_table,
                                            pp->distance,
                                            front_trace->gen_nextfree);
  gt_assert(remainingvalidfronts <= front_trace->backref_nextfree);
  globaloffset = front_trace->backref_nextfree - remainingvalidfronts;
  distance = pp->distance;
  diagonal = (GtWord) pp->alignedlen - (GtWord) GT_MULT2(pp->row);
  trace = front_trace->backref_table[globaloffset + localoffset].bits;
  lcs = front_trace->backref_table[globaloffset + localoffset].lcs;
  row = pp->row;
  trimleft = pp->trimleft;
  gt_assert(distance < front_trace->gen_nextfree);
  while (distance > 0)
  {
    GtUword nextrowadd;
    GtWord base_diagonal;

    if (eoplist != NULL)
    {
      if (lcs > 0)
      {
        gt_eoplist_match_add(eoplist,lcs);
      }
    } else
    {
      gt_check_diagonal_run(useq, vseq, diagonal, row - lcs, row);
    }
    if (trace & preferred_eop)
    {
      totalrunlength++;
      if (preferred_eop == FT_EOP_MISMATCH)
      {
        nextrowadd = 1;
      } else
      {
        if (preferred_eop == FT_EOP_INSERTION)
        {
          gt_assert(-(GtWord) ulen < diagonal);
          diagonal--;
          nextrowadd = 0;
        } else
        {
          gt_assert(preferred_eop == FT_EOP_DELETION);
          gt_assert(diagonal < (GtWord) vlen);
          diagonal++;
          nextrowadd = 1;
        }
      }
    } else
    {
      if (trace & FT_EOP_MISMATCH)
      {
        preferred_eop = FT_EOP_MISMATCH;
        nextrowadd = 1;
      } else
      {
        if (trace & FT_EOP_INSERTION)
        {
          gt_assert(-(GtWord) ulen < diagonal);
          diagonal--;
          preferred_eop = FT_EOP_INSERTION;
          nextrowadd = 0;
        } else
        {
          gt_assert(trace & FT_EOP_DELETION);
          gt_assert(diagonal < (GtWord) vlen);
          diagonal++;
          preferred_eop = FT_EOP_DELETION;
          nextrowadd = 1;
        }
      }
    }
    if (eoplist != NULL)
    {
      if (preferred_eop == FT_EOP_DELETION)
      {
        gt_eoplist_deletion_add(eoplist);
      } else
      {
        if (preferred_eop == FT_EOP_INSERTION)
        {
          gt_eoplist_insertion_add(eoplist);
        } else
        {
          gt_eoplist_mismatch_add(eoplist);
        }
      }
    }
    gt_assert(trimleft >=
              (GtUword) front_trace->gen_table[distance].trimleft_diff);
    trimleft -= (GtUword) front_trace->gen_table[distance].trimleft_diff;
    distance--;
    base_diagonal = (GtWord) trimleft - (GtWord) distance;
    gt_assert(base_diagonal <= diagonal);
    gt_assert(diagonal <
              base_diagonal + (GtWord) front_trace->gen_table[distance].valid);
    localoffset = (GtUword) (diagonal - base_diagonal);
    gt_assert((GtUword) front_trace->gen_table[distance].valid
              <= globaloffset);
    globaloffset -= (GtUword) front_trace->gen_table[distance].valid;
    gt_assert(row >= lcs + nextrowadd);
    row -= lcs + nextrowadd;
    trace = front_trace->backref_table[globaloffset + localoffset].bits;
    lcs = front_trace->backref_table[globaloffset + localoffset].lcs;
  }
  /*printf("avg runlength=%.2f\n",(double) pp->distance/totalrunlength);*/
  gt_assert(globaloffset + localoffset == 0 && trace == 0);
  if (eoplist != NULL && lcs > 0)
  {
    gt_eoplist_match_add(eoplist,lcs);
  }
}