GtUword gt_alignment_eval_generic(bool mapped,bool downcase, const GtAlignment *alignment) { GtUword i, j, idx_u = 0, idx_v = 0, sumcost = 0, meoplen; GtMultieop meop; gt_assert(alignment != NULL && (!mapped || !downcase)); #ifndef NDEBUG gt_assert(gt_alignment_is_valid(alignment)); #endif meoplen = gt_multieoplist_get_num_entries(alignment->eops); for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop.type) { case Mismatch: sumcost += meop.steps; idx_u += meop.steps; idx_v += meop.steps; break; case Match: case Replacement: for (j = 0; j < meop.steps; j++) { GtUchar a = alignment->u[idx_u], b = alignment->v[idx_v]; if (mapped) { if (ISSPECIAL(a) || ISSPECIAL(b) || a != b) { sumcost++; } } else { if (downcase) { a = tolower((int) a); b = tolower((int) b); } if (a != b) { sumcost++; } } idx_u++; idx_v++; } break; case Deletion: sumcost += meop.steps; idx_u += meop.steps; break; case Insertion: sumcost += meop.steps; idx_v += meop.steps; break; } } return sumcost; }
GtWord gt_alignment_eval_with_score(const GtAlignment *alignment, GtWord matchscore, GtWord mismatchscore, GtWord gapscore) { GtUword i, j, idx_u = 0, idx_v = 0, meoplen; GtWord sumscore = 0; GtMultieop *meop; gt_assert(alignment != NULL); gt_assert(gt_alignment_is_valid(alignment)); meoplen = gt_multieoplist_get_length(alignment->eops); for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: for (j = 0; j < meop->steps; j++) { if (alignment->u[idx_u] == alignment->v[idx_v] && ISNOTSPECIAL(alignment->u[idx_u])) { sumscore += matchscore; } else { sumscore += mismatchscore; } idx_u++; idx_v++; } break; case Deletion: sumscore += gapscore * meop->steps; idx_u += meop->steps; break; case Insertion: sumscore += gapscore * meop->steps; idx_v += meop->steps; break; } } return sumscore; }
GtUword gt_alignment_eval(const GtAlignment *alignment) { GtUword i, j, idx_u = 0, idx_v = 0, sumcost = 0, meoplen; GtMultieop *meop; gt_assert(alignment != NULL); gt_assert(gt_alignment_is_valid(alignment)); meoplen = gt_multieoplist_get_length(alignment->eops); for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: for (j = 0; j < meop->steps; j++) { sumcost++; idx_u++; idx_v++; } break; case Match: case Replacement: for (j = 0; j < meop->steps; j++) { if (tolower((int) alignment->u[idx_u]) != tolower((int) alignment->v[idx_v])) { sumcost++; } idx_u++; idx_v++; } break; case Deletion: sumcost += meop->steps; idx_u += meop->steps; break; case Insertion: sumcost += meop->steps; idx_v += meop->steps; break; } } return sumcost; }
void gt_alignment_show_with_mapped_chars(const GtAlignment *alignment, const GtUchar *characters, GtUchar wildcardshow, FILE *fp) { GtUword i, j, idx_u, idx_v, meoplen; GtMultieop *meop; gt_assert(alignment); gt_assert(gt_alignment_is_valid(alignment)); meoplen = gt_multieoplist_get_length(alignment->eops); /* output first line */ idx_u = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(ISSPECIAL(alignment->u[idx_u]) ? (int) wildcardshow : (int) characters[alignment->u[idx_u]], fp); idx_u++; } break; case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(GAPSYMBOL, fp); } break; } } gt_xfputc('\n', fp); /* output middle line */ idx_u = idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: for (j = 0; j < meop->steps; j++) { if (alignment->u[idx_u] == alignment->v[idx_v] && ISNOTSPECIAL(alignment->u[idx_u])) { gt_xfputc(MATCHSYMBOL, fp); } else { gt_xfputc(MISMATCHSYMBOL, fp); } idx_u++; idx_v++; } break; case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_u++; } break; case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_v++; } break; } } gt_xfputc('\n', fp); /* ouput last line */ idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(ISSPECIAL(alignment->v[idx_v]) ? (int) wildcardshow : (int) characters[alignment->v[idx_v]], fp); idx_v++; } break; case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(GAPSYMBOL, fp); } break; } } gt_xfputc('\n', fp); }
/* XXX: add width parameter and format the GtAlignment accordingly */ void gt_alignment_show(const GtAlignment *alignment, FILE *fp) { GtUword i, j, idx_u, idx_v, meoplen; GtMultieop *meop; gt_assert(alignment); gt_assert(gt_alignment_is_valid(alignment)); meoplen = gt_multieoplist_get_length(alignment->eops); /* output first line */ idx_u = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Deletion: for (j = 0; j < meop->steps; j++) gt_xfputc((int) alignment->u[idx_u++], fp); break; case Insertion: for (j = 0; j < meop->steps; j++) gt_xfputc(GAPSYMBOL, fp); break; } } gt_xfputc('\n', fp); /* output middle line */ idx_u = idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: for (j = 0; j < meop->steps; j++) { if (tolower((int) alignment->u[idx_u++]) == tolower((int) alignment->v[idx_v++])) gt_xfputc(MATCHSYMBOL, fp); else gt_xfputc(MISMATCHSYMBOL, fp); } break; case Deletion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_u++; } break; case Insertion: for (j = 0; j < meop->steps; j++) { gt_xfputc(MISMATCHSYMBOL, fp); idx_v++; } break; } } gt_xfputc('\n', fp); /* ouput last line */ idx_v = 0; for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop->type) { case Mismatch: case Match: case Replacement: case Insertion: for (j = 0; j < meop->steps; j++) gt_xfputc((int) alignment->v[idx_v++], fp); break; case Deletion: for (j = 0; j < meop->steps; j++) gt_xfputc(GAPSYMBOL, fp); break; } } gt_xfputc('\n', fp); }
void gt_alignment_show_generic(GtUchar *buffer, bool downcase, const GtAlignment *alignment, FILE *fp, unsigned int width, const GtUchar *characters, GtUchar wildcardshow) { GtMultieop meop; GtUword idx_eop, idx_u = 0, idx_v = 0, meoplen, alignmentlength = 0, suffix_bits_used = 0, prefix_positive = 0, pol_size = 0, firstseedcolumn = GT_UWORD_MAX, lastseedcolumn = GT_UWORD_MAX; const GtUword max_history = 64; unsigned int pos = 0; GtUchar *topbuf = buffer, *midbuf = NULL, *lowbuf = NULL; GtWord prefix_positive_sum = 0; uint64_t suffix_bits = 0, set_mask = 0; if (alignment->pol_info != NULL) { pol_size = GT_MULT2(alignment->pol_info->cut_depth); set_mask = ((uint64_t) 1) << (max_history - 1); } gt_assert(alignment != NULL && (characters == NULL || !downcase)); topbuf[width] = '\n'; midbuf = topbuf + width + 1; midbuf[width] = '\n'; lowbuf = midbuf + width + 1; lowbuf[width] = '\n'; meoplen = gt_multieoplist_get_num_entries(alignment->eops); gt_assert(meoplen > 0); idx_eop = meoplen - 1; while (true) { meop = gt_multieoplist_get_entry(alignment->eops, idx_eop); switch (meop.type) { GtUword j; case Mismatch: case Match: case Replacement: for (j = 0; j < meop.steps && idx_u < alignment->ulen && idx_v < alignment->vlen; j++) { GtUchar a = alignment->u[idx_u]; GtUchar b = alignment->v[idx_v]; bool is_match; if (characters != NULL) { topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a]; is_match = (a == b && !ISSPECIAL(a)) ? true : false; lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b]; } else { topbuf[pos] = a; is_match = ((downcase && tolower((int) a) == tolower((int) b)) || (!downcase && a == b)) ? true : false; lowbuf[pos] = b; } if (is_match) { if (alignment->useedoffset <= idx_u && idx_u < alignment->useedoffset + alignment->seedlen) { if (alignment->seed_display) { midbuf[pos] = (GtUchar) '+'; } else { midbuf[pos] = (GtUchar) MATCHSYMBOL; } if (firstseedcolumn == GT_UWORD_MAX) { firstseedcolumn = alignmentlength; } lastseedcolumn = alignmentlength; } else { midbuf[pos] = (GtUchar) MATCHSYMBOL; } } else { midbuf[pos] = (GtUchar) MISMATCHSYMBOL; } pos = gt_alignment_show_advance(pos,width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(is_match); alignmentlength++; idx_u++; idx_v++; } break; case Deletion: for (j = 0; j < meop.steps && idx_u < alignment->ulen; j++) { GtUchar a = alignment->u[idx_u++]; if (characters != NULL) { topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a]; } else { topbuf[pos] = a; } midbuf[pos] = (GtUchar) MISMATCHSYMBOL; lowbuf[pos] = (GtUchar) GAPSYMBOL; pos = gt_alignment_show_advance(pos,width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; case Insertion: for (j = 0; j < meop.steps && idx_v < alignment->vlen; j++) { GtUchar b = alignment->v[idx_v++]; topbuf[pos] = (GtUchar) GAPSYMBOL; midbuf[pos] = (GtUchar) MISMATCHSYMBOL; if (characters != NULL) { lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b]; } else { lowbuf[pos] = b; } pos = gt_alignment_show_advance(pos,width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; } if (idx_eop > 0 && (idx_u < alignment->ulen || idx_v < alignment->vlen)) { idx_eop--; } else { break; } } if (pos > 0) { topbuf[pos] = '\n'; fwrite(topbuf,sizeof *topbuf,pos+1,fp); midbuf[pos] = '\n'; fwrite(midbuf,sizeof *midbuf,pos+1,fp); lowbuf[pos] = '\n'; fwrite(lowbuf,sizeof *lowbuf,pos+1,fp); } if (alignment->pol_info != NULL) { GtUword suffix_positive; GtWord suffix_positive_sum = 0; bool startpolished = false, endpolished = false; for (suffix_positive = 0; suffix_positive < suffix_bits_used; suffix_positive++) { suffix_positive_sum += ((suffix_bits & set_mask) ? alignment->pol_info->match_score : -alignment->pol_info->difference_score); if (suffix_positive_sum < 0) { break; } set_mask >>= 1; } gt_assert(prefix_positive <= alignmentlength && prefix_positive <= alignmentlength); if (prefix_positive >= pol_size || prefix_positive == alignmentlength || firstseedcolumn < pol_size) { startpolished = true; } if (suffix_positive >= pol_size || suffix_positive == alignmentlength || (lastseedcolumn != GT_UWORD_MAX && lastseedcolumn + pol_size > alignmentlength)) { endpolished = true; } printf("# polishing(m=" GT_WD ",d=" GT_WD ",p=" GT_WU "): " GT_WU "/" GT_WU, alignment->pol_info->match_score, -alignment->pol_info->difference_score, pol_size, prefix_positive, suffix_positive); if (firstseedcolumn < pol_size) { printf(", seed_on_start"); } if (lastseedcolumn + pol_size > alignmentlength) { printf(", seed_on_end"); } if (alignment->withpolcheck) { printf("\n"); gt_assert(startpolished && endpolished); } else { if (!startpolished) { printf(", start not polished"); } if (!endpolished) { printf(", end not polished"); } printf("\n"); } }
static GtWord gt_alignment_eval_generic_with_affine_score( bool mapped, bool downcase, const GtUchar *characters, const GtAlignment *alignment, const GtScoreMatrix *scorematrix, GtWord matchscore, GtWord mismatchscore, GtWord gap_opening, GtWord gap_extension) { GtUword i, j, idx_u = 0, idx_v = 0, meoplen; GtWord sumscore = 0; GtMultieop meop; AlignmentEoptype next_meop_type = Insertion + 1; gt_assert(alignment != NULL && (!mapped || !downcase)); if (gt_alignment_get_length(alignment) == 0) return 0; #ifndef NDEBUG gt_assert(gt_alignment_is_valid(alignment)); #endif meoplen = gt_multieoplist_get_num_entries(alignment->eops); for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop.type) { case Mismatch: case Match: case Replacement: for (j = 0; j < meop.steps; j++) { GtUchar a = alignment->u[idx_u], b = alignment->v[idx_v]; if (mapped) { if (scorematrix != NULL) { sumscore += gt_score_matrix_get_score(scorematrix, a, b); } else { if (ISSPECIAL(a) || ISSPECIAL(b) || characters[a] != characters[b]) { sumscore += mismatchscore; } else sumscore += matchscore; } } else { if (downcase) { a = tolower((int) a); b = tolower((int) b); } sumscore += (a != b) ? mismatchscore : matchscore; } idx_u++; idx_v++; } break; case Deletion: if (i < meoplen && next_meop_type == Deletion) { sumscore += gap_extension * meop.steps; } else { sumscore += gap_extension * meop.steps + gap_opening; } idx_u += meop.steps; break; case Insertion: if (i < meoplen && next_meop_type == Insertion) { sumscore += gap_extension * meop.steps; } else { sumscore += gap_extension * meop.steps + gap_opening; } idx_v += meop.steps; break; } next_meop_type = meop.type; } return sumscore; }