示例#1
0
void gth_path_walker_next(GthPathWalker *pw)
{
  gt_assert(pw && gth_path_walker_has_next(pw));
  if (!pw->last_eop_length) {
    pw->last_eop_type   = gt_editoperation_type(*pw->eopptr, pw->proteineop);
    pw->last_eop_length = gt_editoperation_length(*pw->eopptr, pw->proteineop);
    if (pw->forward)
      pw->eopptr--;
    else
      pw->eopptr++;
  }
  step(pw);
}
示例#2
0
unsigned long gt_compute_indelcount(Editoperation *alignment,
                                 unsigned long alignmentlength, bool proteineop)
{
  unsigned long i, eoplength, indelcount = 0;
  Eoptype eoptype;

  for (i = 0; i < alignmentlength; i++) {
    eoptype   = gt_editoperation_type(alignment[i], proteineop);
    eoplength = gt_editoperation_length(alignment[i], proteineop);

    switch (eoptype) {
      case EOP_TYPE_MATCH:
        /* nothing to do */
        break;
      case EOP_TYPE_INTRON:
      case EOP_TYPE_INTRON_WITH_1_BASE_LEFT:
      case EOP_TYPE_INTRON_WITH_2_BASES_LEFT:
        indelcount += eoplength;
        break;
      case EOP_TYPE_MISMATCH:
        /* nothing to do */
        break;
      case EOP_TYPE_DELETION:
      case EOP_TYPE_INSERTION:
        if (proteineop)
          indelcount += eoplength * 3;
        else
          indelcount += eoplength;
        break;
      case EOP_TYPE_MISMATCH_WITH_1_GAP:
        gt_assert(proteineop);
        indelcount += eoplength;
        break;
      case EOP_TYPE_MISMATCH_WITH_2_GAPS:
        gt_assert(proteineop);
        indelcount += eoplength * 2;
        break;
      case EOP_TYPE_DELETION_WITH_1_GAP:
      case EOP_TYPE_DELETION_WITH_2_GAPS:
        if (proteineop)
          indelcount += eoplength * 3;
        else
          indelcount += eoplength;
        break;
      default: gt_assert(0);
    }
  }

  return indelcount;
}
示例#3
0
static void cutoff_end_refseq(GthBacktracePath *bp, unsigned long reflength)
{

  unsigned long eoplength, i = 0;
  bool breakloop = false;
  Editoperation *eop;
  Eoptype eoptype;
  gt_assert(bp && reflength);
  gt_assert(bp->alphatype == DNA_ALPHA || bp->alphatype == PROTEIN_ALPHA);

  for (;;) {
    eop = (Editoperation*) gt_array_get(bp->editoperations, i);
    eoptype   = gt_editoperation_type(*eop, bp->alphatype == PROTEIN_ALPHA);
    eoplength = gt_editoperation_length(*eop, bp->alphatype == PROTEIN_ALPHA);
    i++;

    switch (eoptype) {
      case EOP_TYPE_DELETION:
      case EOP_TYPE_DELETION_WITH_1_GAP:
      case EOP_TYPE_DELETION_WITH_2_GAPS:
      case EOP_TYPE_INTRON:
      case EOP_TYPE_INTRON_WITH_1_BASE_LEFT:
      case EOP_TYPE_INTRON_WITH_2_BASES_LEFT:
        /* nothing to do */
        break;
      case EOP_TYPE_INSERTION:
      case EOP_TYPE_MISMATCH:
      case EOP_TYPE_MISMATCH_WITH_1_GAP:
      case EOP_TYPE_MISMATCH_WITH_2_GAPS:
      case EOP_TYPE_MATCH:
        if (eoplength >= reflength) {
          breakloop = true;
          if (eoplength > reflength) {
            gt_assert(eoplength > 2);
            *eop &= ~bp->max_identical_length;
            *eop |= eoplength - 1;
            i--;
          }
        }
        break;
      default: gt_assert(0);
    }
    if (breakloop)
      break;
    reflength -= eoplength;
  }

  if (i)
    gt_array_rem_span(bp->editoperations, 0, i-1);
}
示例#4
0
void gth_backtrace_path_cutoff_walked_path(GthBacktracePath *bp,
                                           const GthPathWalker *pw,
                                           bool showeops, GtFile *outfp)
{
  unsigned int length;
  gt_assert(bp && pw);
  if (gth_path_walker_is_forward(pw)) {
    gt_assert(!backtrace_path_start_cutoffs_are_set(bp));
    if (showeops) {
      gt_file_xprintf(outfp, "%s(): show path walker\n", __func__);
      gth_path_walker_show(pw, outfp);
      gt_file_xprintf(outfp, "%s(): show backtrace path (before eop "
                         "removal)\n", __func__);
      gth_backtrace_path_show(bp, false, 0, outfp);
    }
    /* remove complete eops */
    gt_array_set_size(bp->editoperations,
                      gt_array_size(bp->editoperations) -
                      gth_path_walker_actual_eops(pw));
    if (showeops) {
      gt_file_xprintf(outfp, "%s(): show backtrace path (after eop "
                         "removal)\n", __func__);
      gth_backtrace_path_show(bp, false, 0, outfp);
    }
    /* remove part of last eop */
    if (gth_path_walker_steps_in_current_eop(pw)) {
      length = gt_editoperation_length(*(Editoperation*)
                                    gt_array_get_last(bp->editoperations),
                                    bp->alphatype == PROTEIN_ALPHA);
      gt_assert(length > gth_path_walker_steps_in_current_eop(pw));
      gt_editoperation_set_length(gt_array_get_last(bp->editoperations),
                               length-gth_path_walker_steps_in_current_eop(pw),
                               bp->alphatype == PROTEIN_ALPHA);
    }
    /* adjusting genomic and reference DP ranges */
    bp->gen_dp_start += gth_path_walker_gen_distance(pw);
    bp->gen_dp_length -= gth_path_walker_gen_distance(pw);
    bp->ref_dp_start += gth_path_walker_ref_distance(pw);
    bp->ref_dp_length -= gth_path_walker_ref_distance(pw);
  }
  else {
    gt_assert(0); /* XXX: implement reverse case */
    gt_assert(!backtrace_path_end_cutoffs_are_set(bp));
  }
}
示例#5
0
static void ensure_eop_of_len_1_before_introns(GtArray *editoperations)
{
  Editoperation eop, *eopptr;
  Eoptype eoptype;
  unsigned long eoplength;
  GtArray *backup;
  bool processing_necessary = false,
       split_match          = false;

  /* check if processing is necessary
     the check is rather simple, it might be possible that
     ``processing_necessary'' is set to ``true'' whereas in fact no processing
     is necessary */
  for (eopptr = gt_array_get_space(editoperations);
       eopptr < (Editoperation*) gt_array_get_space(editoperations) +
                                 gt_array_size(editoperations) - 1;
       eopptr++) {
    if ((eoptype = gt_editoperation_type(*eopptr, true)) ==
        EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
        eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
      processing_necessary = true;
      break;
    }
  }

  if (processing_necessary) {
    /* init backup for the editoperations */
    backup = gt_array_new(sizeof (Editoperation));

    /* fill backup */
    gt_array_add_array(backup, editoperations);

    /* reset the original edit operations */
    gt_array_set_size(editoperations, 0);

    /* process the backup and fill the original editoperations */
    for (eopptr = gt_array_get_space(backup);
         eopptr < (Editoperation*)
                  gt_array_get_space(backup) + gt_array_size(backup);
         eopptr++) {

      if ((eoptype = gt_editoperation_length(*eopptr, true)) ==
          EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
          eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
        split_match = true;
      }
      else if (split_match) {
        if (eoptype == EOP_TYPE_MATCH) {
          split_match = false;
          if ((eoplength = gt_editoperation_length(*eopptr, true)) > 1) {
            eop = 1;
            gt_array_add(editoperations, eop);
            eop = eoplength - 1;
            gt_array_add(editoperations, eop);
            continue;
          }
        }
        else if (eoptype == EOP_TYPE_MISMATCH ||
                 eoptype == EOP_TYPE_MISMATCH_WITH_1_GAP) {
          split_match = false;
        }
      }
      gt_array_add(editoperations, *eopptr);
    }

    /* free backup */
    gt_array_delete(backup);
  }
}