void gth_path_walker_next(GthPathWalker *pw) { gt_assert(pw && gth_path_walker_has_next(pw)); if (!pw->last_eop_length) { pw->last_eop_type = gt_editoperation_type(*pw->eopptr, pw->proteineop); pw->last_eop_length = gt_editoperation_length(*pw->eopptr, pw->proteineop); if (pw->forward) pw->eopptr--; else pw->eopptr++; } step(pw); }
unsigned long gt_compute_indelcount(Editoperation *alignment, unsigned long alignmentlength, bool proteineop) { unsigned long i, eoplength, indelcount = 0; Eoptype eoptype; for (i = 0; i < alignmentlength; i++) { eoptype = gt_editoperation_type(alignment[i], proteineop); eoplength = gt_editoperation_length(alignment[i], proteineop); switch (eoptype) { case EOP_TYPE_MATCH: /* nothing to do */ break; case EOP_TYPE_INTRON: case EOP_TYPE_INTRON_WITH_1_BASE_LEFT: case EOP_TYPE_INTRON_WITH_2_BASES_LEFT: indelcount += eoplength; break; case EOP_TYPE_MISMATCH: /* nothing to do */ break; case EOP_TYPE_DELETION: case EOP_TYPE_INSERTION: if (proteineop) indelcount += eoplength * 3; else indelcount += eoplength; break; case EOP_TYPE_MISMATCH_WITH_1_GAP: gt_assert(proteineop); indelcount += eoplength; break; case EOP_TYPE_MISMATCH_WITH_2_GAPS: gt_assert(proteineop); indelcount += eoplength * 2; break; case EOP_TYPE_DELETION_WITH_1_GAP: case EOP_TYPE_DELETION_WITH_2_GAPS: if (proteineop) indelcount += eoplength * 3; else indelcount += eoplength; break; default: gt_assert(0); } } return indelcount; }
static void cutoff_end_refseq(GthBacktracePath *bp, unsigned long reflength) { unsigned long eoplength, i = 0; bool breakloop = false; Editoperation *eop; Eoptype eoptype; gt_assert(bp && reflength); gt_assert(bp->alphatype == DNA_ALPHA || bp->alphatype == PROTEIN_ALPHA); for (;;) { eop = (Editoperation*) gt_array_get(bp->editoperations, i); eoptype = gt_editoperation_type(*eop, bp->alphatype == PROTEIN_ALPHA); eoplength = gt_editoperation_length(*eop, bp->alphatype == PROTEIN_ALPHA); i++; switch (eoptype) { case EOP_TYPE_DELETION: case EOP_TYPE_DELETION_WITH_1_GAP: case EOP_TYPE_DELETION_WITH_2_GAPS: case EOP_TYPE_INTRON: case EOP_TYPE_INTRON_WITH_1_BASE_LEFT: case EOP_TYPE_INTRON_WITH_2_BASES_LEFT: /* nothing to do */ break; case EOP_TYPE_INSERTION: case EOP_TYPE_MISMATCH: case EOP_TYPE_MISMATCH_WITH_1_GAP: case EOP_TYPE_MISMATCH_WITH_2_GAPS: case EOP_TYPE_MATCH: if (eoplength >= reflength) { breakloop = true; if (eoplength > reflength) { gt_assert(eoplength > 2); *eop &= ~bp->max_identical_length; *eop |= eoplength - 1; i--; } } break; default: gt_assert(0); } if (breakloop) break; reflength -= eoplength; } if (i) gt_array_rem_span(bp->editoperations, 0, i-1); }
void gth_backtrace_path_cutoff_walked_path(GthBacktracePath *bp, const GthPathWalker *pw, bool showeops, GtFile *outfp) { unsigned int length; gt_assert(bp && pw); if (gth_path_walker_is_forward(pw)) { gt_assert(!backtrace_path_start_cutoffs_are_set(bp)); if (showeops) { gt_file_xprintf(outfp, "%s(): show path walker\n", __func__); gth_path_walker_show(pw, outfp); gt_file_xprintf(outfp, "%s(): show backtrace path (before eop " "removal)\n", __func__); gth_backtrace_path_show(bp, false, 0, outfp); } /* remove complete eops */ gt_array_set_size(bp->editoperations, gt_array_size(bp->editoperations) - gth_path_walker_actual_eops(pw)); if (showeops) { gt_file_xprintf(outfp, "%s(): show backtrace path (after eop " "removal)\n", __func__); gth_backtrace_path_show(bp, false, 0, outfp); } /* remove part of last eop */ if (gth_path_walker_steps_in_current_eop(pw)) { length = gt_editoperation_length(*(Editoperation*) gt_array_get_last(bp->editoperations), bp->alphatype == PROTEIN_ALPHA); gt_assert(length > gth_path_walker_steps_in_current_eop(pw)); gt_editoperation_set_length(gt_array_get_last(bp->editoperations), length-gth_path_walker_steps_in_current_eop(pw), bp->alphatype == PROTEIN_ALPHA); } /* adjusting genomic and reference DP ranges */ bp->gen_dp_start += gth_path_walker_gen_distance(pw); bp->gen_dp_length -= gth_path_walker_gen_distance(pw); bp->ref_dp_start += gth_path_walker_ref_distance(pw); bp->ref_dp_length -= gth_path_walker_ref_distance(pw); } else { gt_assert(0); /* XXX: implement reverse case */ gt_assert(!backtrace_path_end_cutoffs_are_set(bp)); } }
static void ensure_eop_of_len_1_before_introns(GtArray *editoperations) { Editoperation eop, *eopptr; Eoptype eoptype; unsigned long eoplength; GtArray *backup; bool processing_necessary = false, split_match = false; /* check if processing is necessary the check is rather simple, it might be possible that ``processing_necessary'' is set to ``true'' whereas in fact no processing is necessary */ for (eopptr = gt_array_get_space(editoperations); eopptr < (Editoperation*) gt_array_get_space(editoperations) + gt_array_size(editoperations) - 1; eopptr++) { if ((eoptype = gt_editoperation_type(*eopptr, true)) == EOP_TYPE_INTRON_WITH_1_BASE_LEFT || eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) { processing_necessary = true; break; } } if (processing_necessary) { /* init backup for the editoperations */ backup = gt_array_new(sizeof (Editoperation)); /* fill backup */ gt_array_add_array(backup, editoperations); /* reset the original edit operations */ gt_array_set_size(editoperations, 0); /* process the backup and fill the original editoperations */ for (eopptr = gt_array_get_space(backup); eopptr < (Editoperation*) gt_array_get_space(backup) + gt_array_size(backup); eopptr++) { if ((eoptype = gt_editoperation_length(*eopptr, true)) == EOP_TYPE_INTRON_WITH_1_BASE_LEFT || eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) { split_match = true; } else if (split_match) { if (eoptype == EOP_TYPE_MATCH) { split_match = false; if ((eoplength = gt_editoperation_length(*eopptr, true)) > 1) { eop = 1; gt_array_add(editoperations, eop); eop = eoplength - 1; gt_array_add(editoperations, eop); continue; } } else if (eoptype == EOP_TYPE_MISMATCH || eoptype == EOP_TYPE_MISMATCH_WITH_1_GAP) { split_match = false; } } gt_array_add(editoperations, *eopptr); } /* free backup */ gt_array_delete(backup); } }