/* Function: p7_GOATrace() * Synopsis: Optimal accuracy decoding: traceback. * Incept: SRE, Fri Feb 29 12:59:11 2008 [Janelia] * * Purpose: The traceback stage of the optimal accuracy decoding algorithm * \citep{Kall05}. * * Caller provides the OA DP matrix <gx> that was just * calculated by <p7_GOptimalAccuracy()>, as well as the * posterior decoding matrix <pp>, which was calculated by * Forward/Backward on a target sequence of length <L> * using the query model <gm>. * * Caller provides an empty traceback structure <tr> to * hold the result, allocated to hold optional posterior * probability annotation on residues (with * <p7_trace_CreateWithPP()>, generally). This will be * internally reallocated as needed for larger traces. * * Args: gm - query profile * pp - posterior decoding matrix created by <p7_PosteriorDecoding()> * gx - OA DP matrix calculated by <p7_OptimalAccuracyDP()> * tr - RESULT: OA traceback, allocated with posterior probs * * Returns: <eslOK> on success, and <tr> contains the OA traceback. * * Throws: <eslEMEM> on allocation error. */ int p7_GOATrace(const P7_PROFILE *gm, const P7_GMX *pp, const P7_GMX *gx, P7_TRACE *tr) { int i = gx->L; /* position in seq (1..L) */ int k = 0; /* position in model (1..M) */ float postprob; int sprv, scur; int status; #ifdef p7_DEBUGGING if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace isn't empty: forgot to Reuse()?"); #endif if ((status = p7_trace_AppendWithPP(tr, p7T_T, k, i, 0.0)) != eslOK) return status; if ((status = p7_trace_AppendWithPP(tr, p7T_C, k, i, 0.0)) != eslOK) return status; sprv = p7T_C; while (sprv != p7T_S) { switch (sprv) { case p7T_M: scur = select_m(gm, gx, i, k); k--; i--; break; case p7T_D: scur = select_d(gm, gx, i, k); k--; break; case p7T_I: scur = select_i(gm, gx, i, k); i--; break; case p7T_N: scur = select_n(i); break; case p7T_C: scur = select_c(gm, pp, gx, i); break; case p7T_J: scur = select_j(gm, pp, gx, i); break; case p7T_E: scur = select_e(gm, gx, i, &k); break; case p7T_B: scur = select_b(gm, gx, i); break; default: ESL_EXCEPTION(eslEINVAL, "bogus state in traceback"); } if (scur == -1) ESL_EXCEPTION(eslEINVAL, "OA traceback choice failed"); postprob = get_postprob(pp, scur, sprv, k, i); if ((status = p7_trace_AppendWithPP(tr, scur, k, i, postprob)) != eslOK) return status; /* For NCJ, we had to defer i decrement. */ if ( (scur == p7T_N || scur == p7T_J || scur == p7T_C) && scur == sprv) i--; sprv = scur; } tr->M = gm->M; tr->L = gx->L; return p7_trace_Reverse(tr); }
/* Function: p7_OATrace() * Synopsis: Optimal accuracy decoding: traceback. * Incept: SRE, Mon Aug 18 13:53:33 2008 [Janelia] * * Purpose: The traceback stage of the optimal accuracy decoding algorithm * \citep{Kall05}. * * Caller provides the OA DP matrix <ox> that was just * calculated by <p7_OptimalAccuracyDP()>, as well as the * posterior decoding matrix <pp>, which was calculated by * Forward/Backward on a target sequence using the query * model <gm>. Because the calculation depends only on * <pp>, the target sequence itself need not be provided. * * The resulting optimal accuracy decoding traceback is put * in a caller-provided traceback structure <tr>, which the * caller has allocated for optional posterior probability * annotation on residues (with <p7_trace_CreateWithPP()>, * generally). This structure will be reallocated * internally if necessary. * * Args: om - profile * pp - posterior probability matrix * ox - OA matrix to trace, LxM * tr - storage for the recovered traceback * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation error. * <eslEINVAL> if the trace <tr> isn't empty (needs to be Reuse()'d). */ int p7_OATrace(const P7_OPROFILE *om, const P7_OMX *pp, const P7_OMX *ox, P7_TRACE *tr) { int i = ox->L; /* position in sequence 1..L */ int k = 0; /* position in model 1..M */ int s0, s1; /* choice of a state */ float postprob; int status; if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace not empty; needs to be Reuse()'d?"); if ((status = p7_trace_AppendWithPP(tr, p7T_T, k, i, 0.0)) != eslOK) return status; if ((status = p7_trace_AppendWithPP(tr, p7T_C, k, i, 0.0)) != eslOK) return status; s0 = tr->st[tr->N-1]; while (s0 != p7T_S) { switch (s0) { case p7T_M: s1 = select_m(om, ox, i, k); k--; i--; break; case p7T_D: s1 = select_d(om, ox, i, k); k--; break; case p7T_I: s1 = select_i(om, ox, i, k); i--; break; case p7T_N: s1 = select_n(i); break; case p7T_C: s1 = select_c(om, pp, ox, i); break; case p7T_J: s1 = select_j(om, pp, ox, i); break; case p7T_E: s1 = select_e(om, ox, i, &k); break; case p7T_B: s1 = select_b(om, ox, i); break; default: ESL_EXCEPTION(eslEINVAL, "bogus state in traceback"); } if (s1 == -1) ESL_EXCEPTION(eslEINVAL, "OA traceback choice failed"); postprob = get_postprob(pp, s1, s0, k, i); if ((status = p7_trace_AppendWithPP(tr, s1, k, i, postprob)) != eslOK) return status; if ( (s1 == p7T_N || s1 == p7T_J || s1 == p7T_C) && s1 == s0) i--; s0 = s1; } /* end traceback, at S state */ tr->M = om->M; tr->L = ox->L; return p7_trace_Reverse(tr); }
/* Function: p7_alidisplay_Backconvert() * Synopsis: Convert an alidisplay to a faux trace and subsequence. * Incept: SRE, Wed Dec 10 09:49:28 2008 [Janelia] * * Purpose: Convert alignment display object <ad> to a faux subsequence * and faux subsequence trace, returning them in <ret_sq> and * <ret_tr>. * * The subsequence <*ret_sq> is digital; ascii residues in * <ad> are digitized using digital alphabet <abc>. * * The subsequence and trace are suitable for passing as * array elements to <p7_MultipleAlignment>. This is the * main purpose of backconversion. Results of a profile * search are stored in a hit list as a processed * <P7_ALIDISPLAY>, not as a <P7_TRACE> and <ESL_SQ>, to * reduce space and to reduce communication overhead in * parallelized search implementations. After reduction * to a final hit list, a master may want to construct a * multiple alignment of all the significant hits. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failures. <eslECORRUPT> on unexpected internal * data corruption. On any exception, <*ret_sq> and <*ret_tr> are * <NULL>. * * Xref: J4/29. */ int p7_alidisplay_Backconvert(const P7_ALIDISPLAY *ad, const ESL_ALPHABET *abc, ESL_SQ **ret_sq, P7_TRACE **ret_tr) { ESL_SQ *sq = NULL; /* RETURN: faux subsequence */ P7_TRACE *tr = NULL; /* RETURN: faux trace */ int subL = 0; /* subsequence length in the <ad> */ int a, i, k; /* coords for <ad>, <sq->dsq>, model */ char st; /* state type: MDI */ int status; /* Make a first pass over <ad> just to calculate subseq length */ for (a = 0; a < ad->N; a++) if (! esl_abc_CIsGap(abc, ad->aseq[a])) subL++; /* Allocations */ if ((sq = esl_sq_CreateDigital(abc)) == NULL) { status = eslEMEM; goto ERROR; } if ((status = esl_sq_GrowTo(sq, subL)) != eslOK) goto ERROR; if ((tr = (ad->ppline == NULL) ? p7_trace_Create() : p7_trace_CreateWithPP()) == NULL) { status = eslEMEM; goto ERROR; } if ((status = p7_trace_GrowTo(tr, subL+6)) != eslOK) goto ERROR; /* +6 is for SNB/ECT */ /* Construction of dsq, trace */ sq->dsq[0] = eslDSQ_SENTINEL; if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_S, 0, 0) : p7_trace_AppendWithPP(tr, p7T_S, 0, 0, 0.0))) != eslOK) goto ERROR; if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_N, 0, 0) : p7_trace_AppendWithPP(tr, p7T_N, 0, 0, 0.0))) != eslOK) goto ERROR; if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_B, 0, 0) : p7_trace_AppendWithPP(tr, p7T_B, 0, 0, 0.0))) != eslOK) goto ERROR; k = ad->hmmfrom; i = 1; for (a = 0; a < ad->N; a++) { if (esl_abc_CIsResidue(abc, ad->model[a])) { st = (esl_abc_CIsResidue(abc, ad->aseq[a]) ? p7T_M : p7T_D); } else st = p7T_I; if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, st, k, i) : p7_trace_AppendWithPP(tr, st, k, i, p7_alidisplay_DecodePostProb(ad->ppline[a])))) != eslOK) goto ERROR; switch (st) { case p7T_M: sq->dsq[i] = esl_abc_DigitizeSymbol(abc, ad->aseq[a]); k++; i++; break; case p7T_I: sq->dsq[i] = esl_abc_DigitizeSymbol(abc, ad->aseq[a]); i++; break; case p7T_D: k++; break; } } if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_E, 0, 0) : p7_trace_AppendWithPP(tr, p7T_E, 0, 0, 0.0))) != eslOK) goto ERROR; if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_C, 0, 0) : p7_trace_AppendWithPP(tr, p7T_C, 0, 0, 0.0))) != eslOK) goto ERROR; if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_T, 0, 0) : p7_trace_AppendWithPP(tr, p7T_T, 0, 0, 0.0))) != eslOK) goto ERROR; sq->dsq[i] = eslDSQ_SENTINEL; /* some sanity checks */ if (tr->N != ad->N + 6) ESL_XEXCEPTION(eslECORRUPT, "backconverted trace ended up with unexpected size (%s/%s)", ad->sqname, ad->hmmname); if (k != ad->hmmto + 1) ESL_XEXCEPTION(eslECORRUPT, "backconverted trace didn't end at expected place on model (%s/%s)", ad->sqname, ad->hmmname); if (i != subL + 1) ESL_XEXCEPTION(eslECORRUPT, "backconverted subseq didn't end at expected length (%s/%s)", ad->sqname, ad->hmmname); /* Set up <sq> annotation as a subseq of a source sequence */ if ((status = esl_sq_FormatName(sq, "%s/%ld-%ld", ad->sqname, ad->sqfrom, ad->sqto)) != eslOK) goto ERROR; if ((status = esl_sq_FormatDesc(sq, "[subseq from] %s", ad->sqdesc[0] != '\0' ? ad->sqdesc : ad->sqname)) != eslOK) goto ERROR; if ((status = esl_sq_SetSource (sq, ad->sqname)) != eslOK) goto ERROR; if (ad->sqacc[0] != '\0') { if ((status = esl_sq_SetAccession (sq, ad->sqacc)) != eslOK) goto ERROR; } sq->n = subL; sq->start = ad->sqfrom; sq->end = ad->sqto; sq->C = 0; sq->W = subL; sq->L = ad->L; tr->M = ad->M; tr->L = ad->L; *ret_sq = sq; *ret_tr = tr; return eslOK; ERROR: if (sq != NULL) esl_sq_Destroy(sq); if (tr != NULL) p7_trace_Destroy(tr); *ret_sq = NULL; *ret_tr = NULL; return status; }