static GthFlt get_score(GtScoreMatrix *score_matrix, GtAlphabet *score_matrix_alphabet, unsigned char amino, unsigned char origreferencechar) { GthFlt rval = 0.0, scalefactor = SCALEFACTOR, indel_penalty = INDEL_PENALTY; if (amino == DASH || origreferencechar == DASH) { /* 1.) scaled INDEL_PENALTY for deletions from and insertions into genomic DNA of lengths 1, 2, or 3, irrespective of indel size */ rval = scalefactor * indel_penalty; } else if (amino != WILDCARD && amino <= CHAR_MAX && gt_alphabet_valid_input(score_matrix_alphabet, amino) && origreferencechar <= CHAR_MAX && gt_alphabet_valid_input(score_matrix_alphabet, origreferencechar)) { /* XXX: shorten this */ if (amino == GT_STOP_AMINO) { /* 2.) (-)2*INDEL_PENALTY for matching/mismatching a stop codon */ if (origreferencechar == GT_STOP_AMINO) rval = scalefactor * -2 * indel_penalty; else rval = scalefactor * 2 * indel_penalty; } else { /* 3.) amino acid substitution score */ if (origreferencechar == GT_STOP_AMINO) rval = scalefactor * 2 * indel_penalty; else { GtUchar code1, code2; int wcidx; code1 = gt_alphabet_encode(score_matrix_alphabet, amino); code2 = gt_alphabet_encode(score_matrix_alphabet, origreferencechar); wcidx = gt_alphabet_size(score_matrix_alphabet) - 1; rval = scalefactor * gt_score_matrix_get_score(score_matrix, code1 == WILDCARD ? wcidx : code1, code2 == WILDCARD ? wcidx : code2); } } } /* 4.) else: neutral score in case of wild-card characters in the genomic DNA */ return rval; }
void gt_score_matrix_show(const GtScoreMatrix *sm, FILE *fp) { unsigned i, j; gt_assert(sm && fp); /* show alphabet line */ gt_xfputc(' ', fp); for (i = 0; i < gt_alphabet_size(sm->alphabet); i++) fprintf(fp, " %c", gt_alphabet_decode(sm->alphabet, i)); gt_xfputc('\n', fp); /* show score lines */ for (i = 0; i < gt_alphabet_size(sm->alphabet); i++) { gt_xfputc(gt_alphabet_decode(sm->alphabet, i), fp); for (j = 0; j < gt_alphabet_size(sm->alphabet); j++) fprintf(fp, " %2d", gt_score_matrix_get_score(sm, i, j)); gt_xfputc('\n', fp); } }
GtWord gt_scorehandler_get_replacement(const GtScoreHandler *scorehandler, GtUchar a, GtUchar b) { gt_assert(scorehandler != NULL); if (scorehandler->scorematrix == NULL) { if (scorehandler->mappedsequence) { return ISSPECIAL(a) || ISSPECIAL(b) || a != b ? scorehandler->mismatchscore : scorehandler->matchscore; } if (scorehandler->downcase) { a = tolower((int) a); b = tolower((int) b); } return a != b ? scorehandler->mismatchscore : scorehandler->matchscore; } gt_assert(scorehandler->mappedsequence); return gt_score_matrix_get_score(scorehandler->scorematrix,a,b); }
GtScoreHandler *gt_scorehandler2costhandler(const GtScoreHandler *scorehandler) { GtScoreHandler *costhandler; gt_assert(scorehandler != NULL); if (scorehandler->scorematrix == NULL) { GtWord matchscore, mismatchscore, gap_extension, gap_opening, maxscore = MAX(MAX(GT_DIV2(scorehandler->matchscore+1), GT_DIV2(scorehandler->mismatchscore+1)), MAX(1 + scorehandler->gap_extension,0)); matchscore = 2 * maxscore - scorehandler->matchscore; mismatchscore = 2 * maxscore - scorehandler->mismatchscore; gap_extension = maxscore - scorehandler->gap_extension; gap_opening = -scorehandler->gap_opening; costhandler = gt_scorehandler_new(matchscore, mismatchscore, gap_opening, gap_extension); if (!scorehandler->mappedsequence) { gt_scorehandler_plain(costhandler); } } else { int maxscore; GtWord gap_extension, gap_opening; unsigned int i, j, dim = gt_score_matrix_get_dimension(scorehandler->scorematrix); GtScoreMatrix *costmatrix = gt_score_matrix_clone_empty(scorehandler->scorematrix); for (maxscore = 0, i = 0; i < dim; i++) { for (j = 0; j < dim; j++) { int val = gt_score_matrix_get_score(scorehandler->scorematrix, i, j); if (val > maxscore) { maxscore = val; } } } maxscore = MAX(GT_DIV2(maxscore+1), 1 + scorehandler->gap_extension); for (i = 0; i < dim; i++) { for (j = 0; j < dim; j++) { /* translate */ int score = gt_score_matrix_get_score(scorehandler->scorematrix,i,j); gt_score_matrix_set_score(costmatrix, i, j, 2 * maxscore - score); } } gap_extension = maxscore - scorehandler->gap_extension; gap_opening = -scorehandler->gap_opening; costhandler = gt_scorehandler_new( 0,0, gap_opening, gap_extension); gt_scorehandler_add_scorematrix(costhandler,costmatrix); } return costhandler; }
static GtWord gt_alignment_eval_generic_with_affine_score( bool mapped, bool downcase, const GtUchar *characters, const GtAlignment *alignment, const GtScoreMatrix *scorematrix, GtWord matchscore, GtWord mismatchscore, GtWord gap_opening, GtWord gap_extension) { GtUword i, j, idx_u = 0, idx_v = 0, meoplen; GtWord sumscore = 0; GtMultieop meop; AlignmentEoptype next_meop_type = Insertion + 1; gt_assert(alignment != NULL && (!mapped || !downcase)); if (gt_alignment_get_length(alignment) == 0) return 0; #ifndef NDEBUG gt_assert(gt_alignment_is_valid(alignment)); #endif meoplen = gt_multieoplist_get_num_entries(alignment->eops); for (i = meoplen; i > 0; i--) { meop = gt_multieoplist_get_entry(alignment->eops, i - 1); switch (meop.type) { case Mismatch: case Match: case Replacement: for (j = 0; j < meop.steps; j++) { GtUchar a = alignment->u[idx_u], b = alignment->v[idx_v]; if (mapped) { if (scorematrix != NULL) { sumscore += gt_score_matrix_get_score(scorematrix, a, b); } else { if (ISSPECIAL(a) || ISSPECIAL(b) || characters[a] != characters[b]) { sumscore += mismatchscore; } else sumscore += matchscore; } } else { if (downcase) { a = tolower((int) a); b = tolower((int) b); } sumscore += (a != b) ? mismatchscore : matchscore; } idx_u++; idx_v++; } break; case Deletion: if (i < meoplen && next_meop_type == Deletion) { sumscore += gap_extension * meop.steps; } else { sumscore += gap_extension * meop.steps + gap_opening; } idx_u += meop.steps; break; case Insertion: if (i < meoplen && next_meop_type == Insertion) { sumscore += gap_extension * meop.steps; } else { sumscore += gap_extension * meop.steps + gap_opening; } idx_v += meop.steps; break; } next_meop_type = meop.type; } return sumscore; }