예제 #1
0
파일: path_walker.c 프로젝트: 9beckert/TIR
void gth_path_walker_next(GthPathWalker *pw)
{
  gt_assert(pw && gth_path_walker_has_next(pw));
  if (!pw->last_eop_length) {
    pw->last_eop_type   = gt_editoperation_type(*pw->eopptr, pw->proteineop);
    pw->last_eop_length = gt_editoperation_length(*pw->eopptr, pw->proteineop);
    if (pw->forward)
      pw->eopptr--;
    else
      pw->eopptr++;
  }
  step(pw);
}
예제 #2
0
unsigned long gt_compute_indelcount(Editoperation *alignment,
                                 unsigned long alignmentlength, bool proteineop)
{
  unsigned long i, eoplength, indelcount = 0;
  Eoptype eoptype;

  for (i = 0; i < alignmentlength; i++) {
    eoptype   = gt_editoperation_type(alignment[i], proteineop);
    eoplength = gt_editoperation_length(alignment[i], proteineop);

    switch (eoptype) {
      case EOP_TYPE_MATCH:
        /* nothing to do */
        break;
      case EOP_TYPE_INTRON:
      case EOP_TYPE_INTRON_WITH_1_BASE_LEFT:
      case EOP_TYPE_INTRON_WITH_2_BASES_LEFT:
        indelcount += eoplength;
        break;
      case EOP_TYPE_MISMATCH:
        /* nothing to do */
        break;
      case EOP_TYPE_DELETION:
      case EOP_TYPE_INSERTION:
        if (proteineop)
          indelcount += eoplength * 3;
        else
          indelcount += eoplength;
        break;
      case EOP_TYPE_MISMATCH_WITH_1_GAP:
        gt_assert(proteineop);
        indelcount += eoplength;
        break;
      case EOP_TYPE_MISMATCH_WITH_2_GAPS:
        gt_assert(proteineop);
        indelcount += eoplength * 2;
        break;
      case EOP_TYPE_DELETION_WITH_1_GAP:
      case EOP_TYPE_DELETION_WITH_2_GAPS:
        if (proteineop)
          indelcount += eoplength * 3;
        else
          indelcount += eoplength;
        break;
      default: gt_assert(0);
    }
  }

  return indelcount;
}
예제 #3
0
static void cutoff_end_refseq(GthBacktracePath *bp, unsigned long reflength)
{

  unsigned long eoplength, i = 0;
  bool breakloop = false;
  Editoperation *eop;
  Eoptype eoptype;
  gt_assert(bp && reflength);
  gt_assert(bp->alphatype == DNA_ALPHA || bp->alphatype == PROTEIN_ALPHA);

  for (;;) {
    eop = (Editoperation*) gt_array_get(bp->editoperations, i);
    eoptype   = gt_editoperation_type(*eop, bp->alphatype == PROTEIN_ALPHA);
    eoplength = gt_editoperation_length(*eop, bp->alphatype == PROTEIN_ALPHA);
    i++;

    switch (eoptype) {
      case EOP_TYPE_DELETION:
      case EOP_TYPE_DELETION_WITH_1_GAP:
      case EOP_TYPE_DELETION_WITH_2_GAPS:
      case EOP_TYPE_INTRON:
      case EOP_TYPE_INTRON_WITH_1_BASE_LEFT:
      case EOP_TYPE_INTRON_WITH_2_BASES_LEFT:
        /* nothing to do */
        break;
      case EOP_TYPE_INSERTION:
      case EOP_TYPE_MISMATCH:
      case EOP_TYPE_MISMATCH_WITH_1_GAP:
      case EOP_TYPE_MISMATCH_WITH_2_GAPS:
      case EOP_TYPE_MATCH:
        if (eoplength >= reflength) {
          breakloop = true;
          if (eoplength > reflength) {
            gt_assert(eoplength > 2);
            *eop &= ~bp->max_identical_length;
            *eop |= eoplength - 1;
            i--;
          }
        }
        break;
      default: gt_assert(0);
    }
    if (breakloop)
      break;
    reflength -= eoplength;
  }

  if (i)
    gt_array_rem_span(bp->editoperations, 0, i-1);
}
예제 #4
0
static bool containsintronsorinsertions(bool leading,
                                        Editoperation *alignment,
                                        long alignmentlength,
                                        bool proteineop)
{
  Eoptype eoptype;
  long i;
  bool breakforloop = false;

  /* check for introns or insertions */
  for (i = leading ? alignmentlength - 1 : 0;
           leading ? i >= 0 : i < alignmentlength;
           leading ? i-- : i++) {
    eoptype = gt_editoperation_type(alignment[i], proteineop);

    /* if match, mismatch, or deletion -> break
       if insertion or intron -> return true */
    switch (eoptype) {
      case EOP_TYPE_MATCH:
      case EOP_TYPE_MISMATCH:
      case EOP_TYPE_MISMATCH_WITH_1_GAP:
      case EOP_TYPE_MISMATCH_WITH_2_GAPS:
      case EOP_TYPE_DELETION:
      case EOP_TYPE_DELETION_WITH_1_GAP:
      case EOP_TYPE_DELETION_WITH_2_GAPS:
        breakforloop = true;
        break;
      case EOP_TYPE_INSERTION:
      case EOP_TYPE_INTRON:
      case EOP_TYPE_INTRON_WITH_1_BASE_LEFT:
      case EOP_TYPE_INTRON_WITH_2_BASES_LEFT:
        return true;
      default: gt_assert(0);
    }
    if (breakforloop)
      break;
  }

  /* no introns or insertions found -> return false */
  return false;
}
예제 #5
0
bool gth_backtrace_path_last_is_intron(const GthBacktracePath *bp)
{
  Eoptype eoptype;

  gt_assert(bp);

  /* check if a dummy has just been inserted */
  if (bp->dummy_index != GT_UNDEF_ULONG &&
      gt_array_size(bp->editoperations) - 1 == bp->dummy_index) {
    return false;
  }

  eoptype = gt_editoperation_type(*(Editoperation*)
                               gt_array_get_last(bp->editoperations),
                               bp->alphatype == PROTEIN_ALPHA);
  if (eoptype == EOP_TYPE_INTRON ||
      eoptype == EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
      eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
    return true;
  }
  return false;

}
예제 #6
0
static void ensure_eop_of_len_1_before_introns(GtArray *editoperations)
{
  Editoperation eop, *eopptr;
  Eoptype eoptype;
  unsigned long eoplength;
  GtArray *backup;
  bool processing_necessary = false,
       split_match          = false;

  /* check if processing is necessary
     the check is rather simple, it might be possible that
     ``processing_necessary'' is set to ``true'' whereas in fact no processing
     is necessary */
  for (eopptr = gt_array_get_space(editoperations);
       eopptr < (Editoperation*) gt_array_get_space(editoperations) +
                                 gt_array_size(editoperations) - 1;
       eopptr++) {
    if ((eoptype = gt_editoperation_type(*eopptr, true)) ==
        EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
        eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
      processing_necessary = true;
      break;
    }
  }

  if (processing_necessary) {
    /* init backup for the editoperations */
    backup = gt_array_new(sizeof (Editoperation));

    /* fill backup */
    gt_array_add_array(backup, editoperations);

    /* reset the original edit operations */
    gt_array_set_size(editoperations, 0);

    /* process the backup and fill the original editoperations */
    for (eopptr = gt_array_get_space(backup);
         eopptr < (Editoperation*)
                  gt_array_get_space(backup) + gt_array_size(backup);
         eopptr++) {

      if ((eoptype = gt_editoperation_length(*eopptr, true)) ==
          EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
          eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
        split_match = true;
      }
      else if (split_match) {
        if (eoptype == EOP_TYPE_MATCH) {
          split_match = false;
          if ((eoplength = gt_editoperation_length(*eopptr, true)) > 1) {
            eop = 1;
            gt_array_add(editoperations, eop);
            eop = eoplength - 1;
            gt_array_add(editoperations, eop);
            continue;
          }
        }
        else if (eoptype == EOP_TYPE_MISMATCH ||
                 eoptype == EOP_TYPE_MISMATCH_WITH_1_GAP) {
          split_match = false;
        }
      }
      gt_array_add(editoperations, *eopptr);
    }

    /* free backup */
    gt_array_delete(backup);
  }
}
예제 #7
0
static void add_eop_type_to_eop_array(GtArray *bp, Eoptype eoptype,
                                      unsigned long length, bool proteineop)
{
  Editoperation eop,
                maxlen = proteineop ? (Editoperation) MAXIDENTICALLENGTH_PROTEIN
                                    : (Editoperation) MAXIDENTICALLENGTH;
  Eoptype tmp_eoptype;
  unsigned long i, times_maxlen = 0;

  gt_assert(length > 0);

  switch (eoptype) {
    case EOP_TYPE_MATCH:
      /* here we reproduce the artifact resulting from the dummys used in the
         backtracing procedure to make sure that the parsed array of edit
         operations is exactly the same as the one we have in memory */
      if (proteineop && /* this needs only to be checked for protein bp */
          length > 1 &&       /* and when the length is larger 1 */
          gt_array_size(bp)) { /* we have already stored an eop */
        tmp_eoptype = gt_editoperation_type(*(Editoperation*)
                                         gt_array_get_last(bp), proteineop);
        if (tmp_eoptype == EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
            tmp_eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
          eop = 1;
          gt_array_add(bp, eop);
          length--;
        }
      }

      /* we store the eop which has not maximal length first to make sure that
         after reversing the array of editoperations has the same form as the
         original one */
      DETERMINE_TIMES_MAXLEN;
      gt_assert(length > 0);
      eop = (Editoperation) length;
      gt_array_add(bp, eop);
      for (i = 0; i < times_maxlen; i++)
        gt_array_add(bp, maxlen);
      break;
    case EOP_TYPE_INTRON:
      DETERMINE_TIMES_MAXLEN;
      eop  = DELETIONEOP;
      eop += length;
      gt_array_add(bp, eop);
      eop  = DELETIONEOP;
      eop += maxlen;
      for (i = 0; i < times_maxlen; i++)
        gt_array_add(bp, eop);
      break;
    case EOP_TYPE_INTRON_WITH_1_BASE_LEFT:
      DETERMINE_TIMES_MAXLEN;
      eop  = DELETION_WITH_1_GAP_EOP;
      eop += length;
      gt_array_add(bp, eop);
      eop  = DELETION_WITH_1_GAP_EOP;
      eop += maxlen;
      for (i = 0; i < times_maxlen; i++)
        gt_array_add(bp, eop);
      break;
    case EOP_TYPE_INTRON_WITH_2_BASES_LEFT:
      DETERMINE_TIMES_MAXLEN;
      eop  = DELETION_WITH_2_GAPS_EOP;
      eop += length;
      gt_array_add(bp, eop);
      eop  = DELETION_WITH_2_GAPS_EOP;
      eop += maxlen;
      for (i = 0; i < times_maxlen; i++)
        gt_array_add(bp, eop);
      break;
    case EOP_TYPE_MISMATCH:
      eop = MISMATCHEOP;
      for (i = 0; i < length; i++)
        gt_array_add(bp, eop);
      break;
    case EOP_TYPE_DELETION:
      eop = DELETIONEOP;
      for (i = 0; i < length; i++)
        gt_array_add(bp, eop);
      break;
    case EOP_TYPE_INSERTION:
      eop = INSERTIONEOP;
      for (i = 0; i < length; i++)
        gt_array_add(bp, eop);
      break;
    case EOP_TYPE_MISMATCH_WITH_1_GAP:
      eop = MISMATCH_WITH_1_GAP_EOP;
      for (i = 0; i < length; i++)
        gt_array_add(bp, eop);
      break;
    case EOP_TYPE_MISMATCH_WITH_2_GAPS:
      eop = MISMATCH_WITH_2_GAPS_EOP;
      for (i = 0; i < length; i++)
        gt_array_add(bp, eop);
      break;
    case EOP_TYPE_DELETION_WITH_1_GAP:
      eop = DELETION_WITH_1_GAP_EOP;
      for (i = 0; i < length; i++)
        gt_array_add(bp, eop);
      break;
    case EOP_TYPE_DELETION_WITH_2_GAPS:
      eop = DELETION_WITH_2_GAPS_EOP;
      for (i = 0; i < length; i++)
        gt_array_add(bp, eop);
      break;
    default: gt_assert(0);
  }
}