int main(int argc, char const *argv[], char const *envp[]) { PRE(); DCL(); EXP(); INT(); FLP(); ARR(); STR(); MEM(); ENV(envp); SIG(); ERR(); MSC(); POS(); return 0; }
/* Function: p7_GMSV() * Synopsis: The MSV score algorithm (slow, correct version) * Incept: SRE, Thu Dec 27 08:33:39 2007 [Janelia] * * Purpose: Calculates the maximal score of ungapped local segment * pair alignments, taking advantage of the fact that this * is simply equivalent to setting all MM transitions to 1.0 * in a multihit local profile. * * Args: dsq - sequence in digitized form, 1..L * L - length of dsq * gm - profile (can be in any mode) * gx - DP matrix with room for an MxL alignment * nu - configuration: expected number of hits (use 2.0 as a default) * opt_sc - optRETURN: MSV lod score in nats. * * Returns: <eslOK> on success. * * Note: This is written deliberately as a modified p7_GViterbi * routine. It could be faster -- we don't need the * interleaved dp matrix or residue scores, since we aren't * calculating D or I states, for example, and we could do * without some of the special states -- but speed is the * job of the optimized implementations. Rather, the goal * here is to establish a stable, probabilistically correct * reference calculation. (Thus, the CC, NN, JJ transitions * are real scores here, not fixed to 0 as in the optimized * versions.) */ int p7_GMSV(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float nu, float *opt_sc) { float **dp = gx->dp; float *xmx = gx->xmx; float tloop = logf((float) L / (float) (L+3)); float tmove = logf( 3.0f / (float) (L+3)); float tbmk = logf( 2.0f / ((float) gm->M * (float) (gm->M+1))); float tej = logf((nu - 1.0f) / nu); float tec = logf(1.0f / nu); int i,k; XMX(0,p7G_N) = 0; XMX(0,p7G_B) = tmove; /* S->N->B, no N-tail */ XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) =-eslINFINITY; /* need seq to get here */ for (k = 0; k <= gm->M; k++) MMX(0,k) = -eslINFINITY; /* need seq to get here */ for (i = 1; i <= L; i++) { float const *rsc = gm->rsc[dsq[i]]; MMX(i,0) = -eslINFINITY; XMX(i,p7G_E) = -eslINFINITY; for (k = 1; k <= gm->M; k++) { MMX(i,k) = MSC(k) + ESL_MAX(MMX(i-1,k-1), XMX(i-1,p7G_B) + tbmk); XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), MMX(i,k)); } XMX(i,p7G_J) = ESL_MAX( XMX(i-1,p7G_J) + tloop, XMX(i, p7G_E) + tej); XMX(i,p7G_C) = ESL_MAX( XMX(i-1,p7G_C) + tloop, XMX(i, p7G_E) + tec); XMX(i,p7G_N) = XMX(i-1,p7G_N) + tloop; XMX(i,p7G_B) = ESL_MAX( XMX(i, p7G_N) + tmove, XMX(i, p7G_J) + tmove); } gx->M = gm->M; gx->L = L; if (opt_sc != NULL) *opt_sc = XMX(L,p7G_C) + tmove; return eslOK; }
/* Function: p7_GViterbi() * Synopsis: The Viterbi algorithm. * Incept: SRE, Tue Jan 30 10:50:53 2007 [Einstein's, St. Louis] * * Purpose: The standard Viterbi dynamic programming algorithm. * * Given a digital sequence <dsq> of length <L>, a profile * <gm>, and DP matrix <gx> allocated for at least <L> * by <gm->M> cells; calculate the maximum scoring path by * Viterbi; return the Viterbi score in <ret_sc>, and the * Viterbi matrix is in <gx>. * * The caller may then retrieve the Viterbi path by calling * <p7_GTrace()>. * * The Viterbi lod score is returned in nats. The caller * needs to subtract a null model lod score, then convert * to bits. * * Args: dsq - sequence in digitized form, 1..L * L - length of dsq * gm - profile. * gx - DP matrix with room for an MxL alignment * opt_sc - optRETURN: Viterbi lod score in nats * * Return: <eslOK> on success. */ int p7_GViterbi(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float *opt_sc) { float const *tsc = gm->tsc; float **dp = gx->dp; float *xmx = gx->xmx; int M = gm->M; int i,k; float esc = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY; /* Initialization of the zero row. */ XMX(0,p7G_N) = 0; /* S->N, p=1 */ XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE]; /* S->N->B, no N-tail */ XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY; /* need seq to get here */ for (k = 0; k <= gm->M; k++) MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY; /* need seq to get here */ /* DP recursion */ for (i = 1; i <= L; i++) { float const *rsc = gm->rsc[dsq[i]]; float sc; MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY; XMX(i,p7G_E) = -eslINFINITY; for (k = 1; k < gm->M; k++) { /* match state */ sc = ESL_MAX( MMX(i-1,k-1) + TSC(p7P_MM,k-1), IMX(i-1,k-1) + TSC(p7P_IM,k-1)); sc = ESL_MAX(sc, DMX(i-1,k-1) + TSC(p7P_DM,k-1)); sc = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,k-1)); MMX(i,k) = sc + MSC(k); /* E state update */ XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), MMX(i,k) + esc); /* in Viterbi alignments, Dk->E can't win in local mode (and * isn't possible in glocal mode), so don't bother * looking. */ /* insert state */ sc = ESL_MAX(MMX(i-1,k) + TSC(p7P_MI,k), IMX(i-1,k) + TSC(p7P_II,k)); IMX(i,k) = sc + ISC(k); /* delete state */ DMX(i,k) = ESL_MAX(MMX(i,k-1) + TSC(p7P_MD,k-1), DMX(i,k-1) + TSC(p7P_DD,k-1)); } /* Unrolled match state M. */ sc = ESL_MAX( MMX(i-1,M-1) + TSC(p7P_MM,M-1), IMX(i-1,M-1) + TSC(p7P_IM,M-1)); sc = ESL_MAX(sc, DMX(i-1,M-1 ) + TSC(p7P_DM,M-1)); sc = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,M-1)); MMX(i,M) = sc + MSC(M); /* Unrolled delete state D_M * (Unlike internal Dk->E transitions that can never appear in * Viterbi alignments, D_M->E is possible in glocal mode.) */ DMX(i,M) = ESL_MAX(MMX(i,M-1) + TSC(p7P_MD,M-1), DMX(i,M-1) + TSC(p7P_DD,M-1)); /* E state update; transition from M_M scores 0 by def'n */ sc = ESL_MAX(XMX(i,p7G_E), MMX(i,M)); XMX(i,p7G_E) = ESL_MAX(sc, DMX(i,M)); /* Now the special states. E must already be done, and B must follow N,J. * remember, N, C and J emissions are zero score by definition. */ /* J state */ sc = XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP]; /* J->J */ XMX(i,p7G_J) = ESL_MAX(sc, XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_LOOP]); /* E->J is E's "loop" */ /* C state */ sc = XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP]; XMX(i,p7G_C) = ESL_MAX(sc, XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_MOVE]); /* N state */ XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP]; /* B state */ sc = XMX(i,p7G_N) + gm->xsc[p7P_N][p7P_MOVE]; /* N->B is N's move */ XMX(i,p7G_B) = ESL_MAX(sc, XMX(i,p7G_J) + gm->xsc[p7P_J][p7P_MOVE]); /* J->B is J's move */ } /* T state (not stored) */ if (opt_sc != NULL) *opt_sc = XMX(L,p7G_C) + gm->xsc[p7P_C][p7P_MOVE]; gx->M = gm->M; gx->L = L; return eslOK; }
/* Function: p7_GViterbi_longtarget() * Synopsis: The Viterbi algorithm. * Incept: SRE, Tue Jan 30 10:50:53 2007 [Einstein's, St. Louis] * * Purpose: Given a digital sequence <dsq> of length <L>, a profile * <gm>, and DP matrix <gx> allocated for at least <L> * by <gm->M> cells; calculates the Viterbi score for * regions of <dsq>, and captures the positions at which * such regions exceed the score required to be * significant in the eyes of the calling function * (usually p=0.001). * * Args: dsq - sequence in digitized form, 1..L * L - length of dsq * gm - profile. * gx - DP matrix with room for an MxL alignment * filtersc - null or bias correction, required for translating a P-value threshold into a score threshold * P - p-value below which a region is captured as being above threshold * windowlist - RETURN: array of hit windows (start and end of diagonal) for the above-threshold areas * * Return: <eslOK> on success. */ int p7_GViterbi_longtarget(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float filtersc, double P, P7_HMM_WINDOWLIST *windowlist) { float const *tsc = gm->tsc; float **dp = gx->dp; float *xmx = gx->xmx; int M = gm->M; int i,k; float esc = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY; int16_t sc_thresh; float invP; /* Initialization of the zero row. */ XMX(0,p7G_N) = 0; /* S->N, p=1 */ XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE]; /* S->N->B, no N-tail */ XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY; /* need seq to get here */ for (k = 0; k <= gm->M; k++) MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY; /* need seq to get here */ /* * In p7_ViterbiFilter, converting from a scaled int Viterbi score * S (aka xE the score getting to state E) to a probability * goes like this: * S = XMX(i,p7G_E) * vsc = S + gm->xsc[p7P_E][p7P_MOVE] + gm->xsc[p7P_C][p7P_MOVE]; * P = esl_gumbel_surv((vfsc - filtersc) / eslCONST_LOG2 , gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA]); * and we're computing the threshold vsc, so invert it: * (vsc - filtersc) / eslCONST_LOG2 = esl_gumbel_invsurv( P, gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA]) * vsc = filtersc + eslCONST_LOG2 * esl_gumbel_invsurv( P, gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA]) * S = vsc - gm->xsc[p7P_E][p7P_MOVE] - gm->xsc[p7P_C][p7P_MOVE] */ invP = esl_gumbel_invsurv(P, gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA]); sc_thresh = (int) ceil (filtersc + (eslCONST_LOG2 * invP) - gm->xsc[p7P_E][p7P_MOVE] - gm->xsc[p7P_C][p7P_MOVE] ); /* DP recursion */ for (i = 1; i <= L; i++) { float const *rsc = gm->rsc[dsq[i]]; float sc; MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY; XMX(i,p7G_E) = -eslINFINITY; for (k = 1; k < gm->M; k++) { /* match state */ sc = ESL_MAX( MMX(i-1,k-1) + TSC(p7P_MM,k-1), IMX(i-1,k-1) + TSC(p7P_IM,k-1)); sc = ESL_MAX(sc, DMX(i-1,k-1) + TSC(p7P_DM,k-1)); sc = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,k-1)); MMX(i,k) = sc + MSC(k); /* E state update */ XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), MMX(i,k) + esc); /* in Viterbi alignments, Dk->E can't win in local mode (and * isn't possible in glocal mode), so don't bother * looking. */ /* insert state */ sc = ESL_MAX(MMX(i-1,k) + TSC(p7P_MI,k), IMX(i-1,k) + TSC(p7P_II,k)); IMX(i,k) = sc + ISC(k); /* delete state */ DMX(i,k) = ESL_MAX(MMX(i,k-1) + TSC(p7P_MD,k-1), DMX(i,k-1) + TSC(p7P_DD,k-1)); } /* Unrolled match state M. */ sc = ESL_MAX( MMX(i-1,M-1) + TSC(p7P_MM,M-1), IMX(i-1,M-1) + TSC(p7P_IM,M-1)); sc = ESL_MAX(sc, DMX(i-1,M-1 ) + TSC(p7P_DM,M-1)); sc = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,M-1)); MMX(i,M) = sc + MSC(M); /* Unrolled delete state D_M * (Unlike internal Dk->E transitions that can never appear in * Viterbi alignments, D_M->E is possible in glocal mode.) */ DMX(i,M) = ESL_MAX(MMX(i,M-1) + TSC(p7P_MD,M-1), DMX(i,M-1) + TSC(p7P_DD,M-1)); /* E state update; transition from M_M scores 0 by def'n */ sc = ESL_MAX(XMX(i,p7G_E), MMX(i,M)); XMX(i,p7G_E) = ESL_MAX(sc, DMX(i,M)); if (XMX(i,p7G_E) >= sc_thresh) { //hit score threshold. Add a window to the list, then reset scores. for (k = 1; k <= gm->M; k++) { if (MMX(i,k) == XMX(i,p7G_E)) { p7_hmmwindow_new(windowlist, 0, i, 0, k, 1, 0.0, p7_NOCOMPLEMENT ); } MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY; } } else { /* Now the special states. E must already be done, and B must follow N,J. * remember, N, C and J emissions are zero score by definition. */ /* J state */ sc = XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP]; /* J->J */ XMX(i,p7G_J) = ESL_MAX(sc, XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_LOOP]); /* E->J is E's "loop" */ /* C state */ sc = XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP]; XMX(i,p7G_C) = ESL_MAX(sc, XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_MOVE]); /* N state */ XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP]; /* B state */ sc = XMX(i,p7G_N) + gm->xsc[p7P_N][p7P_MOVE]; /* N->B is N's move */ XMX(i,p7G_B) = ESL_MAX(sc, XMX(i,p7G_J) + gm->xsc[p7P_J][p7P_MOVE]); /* J->B is J's move */ } } /* T state (not stored) */ gx->M = gm->M; gx->L = L; return eslOK; }
/* Function: p7_GTrace() * Incept: SRE, Thu Feb 1 10:25:56 2007 [UA 8018 St. Louis to Dulles] * * Purpose: Traceback of a Viterbi matrix: retrieval * of optimum alignment. * * This function is currently implemented as a * reconstruction traceback, rather than using a shadow * matrix. Because H3 uses floating point scores, and we * can't compare floats for equality, we have to compare * floats for near-equality and therefore, formally, we can * only guarantee a near-optimal traceback. However, even in * the unlikely event that a suboptimal is returned, the * score difference from true optimal will be negligible. * * Args: dsq - digital sequence aligned to, 1..L * L - length of <dsq> * gm - profile * mx - Viterbi matrix to trace, L x M * tr - storage for the recovered traceback. * * Return: <eslOK> on success. * <eslFAIL> if even the optimal path has zero probability; * in this case, the trace is set blank (<tr->N = 0>). * * Note: Care is taken to evaluate the prev+tsc+emission * calculations in exactly the same order that Viterbi did * them, lest you get numerical problems with * a+b+c = d; d-c != a+b because d,c are nearly equal. * (This bug appeared in dev: xref J1/121.) */ int p7_GTrace(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, const P7_GMX *gx, P7_TRACE *tr) { int i = L; /* position in seq (1..L) */ int k = 0; /* position in model (1..M) */ int M = gm->M; float **dp = gx->dp; /* so {MDI}MX() macros work */ float *xmx = gx->xmx; /* so XMX() macro works */ float tol = 1e-5; /* floating point "equality" test */ float const *tsc = gm->tsc; int sprv, scur; /* previous, current state in trace */ int status; #ifdef p7_DEBUGGING if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace isn't empty: forgot to Reuse()?"); #endif if ((status = p7_trace_Append(tr, p7T_T, k, i)) != eslOK) return status; if ((status = p7_trace_Append(tr, p7T_C, k, i)) != eslOK) return status; sprv = p7T_C; while (sprv != p7T_S) { float const *rsc = (i>0 ? gm->rsc[dsq[i]] : NULL); switch (sprv) { case p7T_C: /* C(i) comes from C(i-1) or E(i) */ if (XMX(i,p7G_C) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible C reached at i=%d", i); if (esl_FCompare(XMX(i, p7G_C), XMX(i-1, p7G_C) + gm->xsc[p7P_C][p7P_LOOP], tol) == eslOK) scur = p7T_C; else if (esl_FCompare(XMX(i, p7G_C), XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_MOVE], tol) == eslOK) scur = p7T_E; else ESL_EXCEPTION(eslFAIL, "C at i=%d couldn't be traced", i); break; case p7T_E: /* E connects from any M state. k set here */ if (XMX(i, p7G_E) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible E reached at i=%d", i); if (p7_profile_IsLocal(gm)) { scur = p7T_M; /* can't come from D, in a *local* Viterbi trace. */ for (k = M; k >= 1; k--) if (esl_FCompare(XMX(i, p7G_E), MMX(i,k), tol) == eslOK) break; if (k == 0) ESL_EXCEPTION(eslFAIL, "E at i=%d couldn't be traced", i); } else /* glocal mode: we either come from D_M or M_M */ { if (esl_FCompare(XMX(i, p7G_E), MMX(i,M), tol) == eslOK) { scur = p7T_M; k = M; } else if (esl_FCompare(XMX(i, p7G_E), DMX(i,M), tol) == eslOK) { scur = p7T_D; k = M; } else ESL_EXCEPTION(eslFAIL, "E at i=%d couldn't be traced", i); } break; case p7T_M: /* M connects from i-1,k-1, or B */ if (MMX(i,k) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible M reached at k=%d,i=%d", k,i); if (esl_FCompare(MMX(i,k), XMX(i-1,p7G_B) + TSC(p7P_BM, k-1) + MSC(k), tol) == eslOK) scur = p7T_B; else if (esl_FCompare(MMX(i,k), MMX(i-1,k-1) + TSC(p7P_MM, k-1) + MSC(k), tol) == eslOK) scur = p7T_M; else if (esl_FCompare(MMX(i,k), IMX(i-1,k-1) + TSC(p7P_IM, k-1) + MSC(k), tol) == eslOK) scur = p7T_I; else if (esl_FCompare(MMX(i,k), DMX(i-1,k-1) + TSC(p7P_DM, k-1) + MSC(k), tol) == eslOK) scur = p7T_D; else ESL_EXCEPTION(eslFAIL, "M at k=%d,i=%d couldn't be traced", k,i); k--; i--; break; case p7T_D: /* D connects from M,D at i,k-1 */ if (DMX(i, k) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible D reached at k=%d,i=%d", k,i); if (esl_FCompare(DMX(i,k), MMX(i, k-1) + TSC(p7P_MD, k-1), tol) == eslOK) scur = p7T_M; else if (esl_FCompare(DMX(i,k), DMX(i, k-1) + TSC(p7P_DD, k-1), tol) == eslOK) scur = p7T_D; else ESL_EXCEPTION(eslFAIL, "D at k=%d,i=%d couldn't be traced", k,i); k--; break; case p7T_I: /* I connects from M,I at i-1,k*/ if (IMX(i,k) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible I reached at k=%d,i=%d", k,i); if (esl_FCompare(IMX(i,k), MMX(i-1,k) + TSC(p7P_MI, k) + ISC(k), tol) == eslOK) scur = p7T_M; else if (esl_FCompare(IMX(i,k), IMX(i-1,k) + TSC(p7P_II, k) + ISC(k), tol) == eslOK) scur = p7T_I; else ESL_EXCEPTION(eslFAIL, "I at k=%d,i=%d couldn't be traced", k,i); i--; break; case p7T_N: /* N connects from S, N */ if (XMX(i, p7G_N) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible N reached at i=%d", i); scur = ( (i == 0) ? p7T_S : p7T_N); break; case p7T_B: /* B connects from N, J */ if (XMX(i,p7G_B) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible B reached at i=%d", i); if (esl_FCompare(XMX(i,p7G_B), XMX(i, p7G_N) + gm->xsc[p7P_N][p7P_MOVE], tol) == eslOK) scur = p7T_N; else if (esl_FCompare(XMX(i,p7G_B), XMX(i, p7G_J) + gm->xsc[p7P_J][p7P_MOVE], tol) == eslOK) scur = p7T_J; else ESL_EXCEPTION(eslFAIL, "B at i=%d couldn't be traced", i); break; case p7T_J: /* J connects from E(i) or J(i-1) */ if (XMX(i,p7G_J) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible J reached at i=%d", i); if (esl_FCompare(XMX(i,p7G_J), XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP], tol) == eslOK) scur = p7T_J; else if (esl_FCompare(XMX(i,p7G_J), XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_LOOP], tol) == eslOK) scur = p7T_E; else ESL_EXCEPTION(eslFAIL, "J at i=%d couldn't be traced", i); break; default: ESL_EXCEPTION(eslFAIL, "bogus state in traceback"); } /* end switch over statetype[tpos-1] */ /* Append this state and the current i,k to be explained to the growing trace */ if ((status = p7_trace_Append(tr, scur, k, i)) != eslOK) return status; /* For NCJ, we had to defer i decrement. */ if ( (scur == p7T_N || scur == p7T_J || scur == p7T_C) && scur == sprv) i--; sprv = scur; } /* end traceback, at S state */ tr->M = gm->M; tr->L = L; return p7_trace_Reverse(tr); }
/* Function: p7_SparseViterbi() * Synopsis: Viterbi optimal path algorithm, in sparse DP. * * Purpose: Compare profile <gm> to digital sequence <dsq> of length <L>, * by the Viterbi algorithm, using sparse dynamic programming, * as constrained by the sparse mask <sm>. * Fill in the sparse Viterbi matrix <sx>; (optionally) trace * back the optimal path and return it in the trace structure <opt_tr> * if the caller provides one; and (optionally) return the * Viterbi raw score in nats in <*opt_sc>. * * <sx> can be reused from previous calculations, even * smaller ones; see <p7_sparsemx_Reuse()>. If necessary, * it will be reallocated here, to be large enough for the * <gm->M> by <L> calculation restricted to masked cells * <sm>. * * Args: dsq - digital target sequence, 1..L * L - length of <dsq> * gm - profile * sm - sparse mask * sx - Viterbi matrix to fill; (may be reallocated here) * opt_tr - optRESULT: trace structure with optimal traceback; or NULL if caller doesn't want it * opt_sc - optRETURN: raw Viterbi score in nats; or NULL if result unwanted * * Returns: <eslOK> on success; <opt_tr>, if non-NULL, contains the optimal traceback; * and <*opt_sc> optionally contains the raw Viterbi score. */ int p7_SparseViterbi(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, const P7_SPARSEMASK *sm, P7_SPARSEMX *sx, P7_TRACE *opt_tr, float *opt_sc) { float const *tsc = gm->tsc; /* sets up TSC() macro, access to profile's transitions */ float const *rsc; /* will be set up for MSC(), ISC() macros for residue scores */ float *xpc; /* ptr that steps through current special cells */ float *dpc; /* ptr to step thru current row i main DP cells */ float *dpp; /* ptr to step thru previous row i-1 main DP cells */ float *last_dpc; /* used to reinit dpp after each sparse row computation */ int ng; float xE, xN, xJ, xB, xL, xG, xC; /* tmp scores on special states. only stored when in row bands, and on ia-1 before a seg */ float mlc, mgc; /* temporary score calculations M(i,k) */ float dlc, dgc; /* precalculated D(i,k+1) value on current row */ int *kc = sm->k[0]; /* <kc> points to the list of sparse cell indices k for current row i */ int *kp; /* <kp> points to the previous row's sparse cell index list */ int i,k; /* i,k row,col (seq position, profile position) cell coords */ int y,z; /* indices in lists of k coords on prev, current row */ int status; /* Contract checks on arguments */ ESL_DASSERT1( (sm->L == L) ); ESL_DASSERT1( (sm->M == gm->M) ); /* Assure that <sx> is allocated large enough (we might be reusing it). * Set its type now, so we can Dump/Validate/etc. during debugging this routine, if needed. */ if ( (status = p7_sparsemx_Reinit(sx, sm)) != eslOK) return status; sx->type = p7S_VITERBI; xN = 0.0f; xJ = -eslINFINITY; xC = -eslINFINITY; ng = 0; xpc = sx->xmx; dpc = sx->dp; for (i = 1; i <= L; i++) { if (! sm->n[i]) { ng++; continue; } /* skip rows that have no included cells */ /* Reinitialize and store specials for row ia-1 just outside sparsified segment */ if (i == 1 || ng) { *xpc++ = xE = -eslINFINITY; *xpc++ = xN = xN + ( ng ? ng * gm->xsc[p7P_N][p7P_LOOP] : 0.0); /* test ng, because we must watch out for 0*-inf special case */ *xpc++ = xJ = xJ + ( ng ? ng * gm->xsc[p7P_J][p7P_LOOP] : 0.0); *xpc++ = xB = ESL_MAX( xN + gm->xsc[p7P_N][p7P_MOVE], xJ + gm->xsc[p7P_J][p7P_MOVE]); *xpc++ = xL = xB + gm->xsc[p7P_B][0]; /* B->L */ *xpc++ = xG = xB + gm->xsc[p7P_B][1]; /* B->G */ *xpc++ = xC = xC + ( ng ? ng * gm->xsc[p7P_C][p7P_LOOP] : 0.0); *xpc++ = -eslINFINITY; /* JJ: this space only used in a Decoding matrix. */ *xpc++ = -eslINFINITY; /* CC: this space only used in a Decoding matrix. */ ng = 0; } rsc = gm->rsc[dsq[i]]; /* now MSC(k), ISC(k) residue score macros work */ last_dpc = dpc; /* remember where dpc started; dpp will be set here after we finish each row calculation */ kp = kc; /* last row we did becomes prev row now; ready to step through k indices of previous row's sparse cells */ kc = sm->k[i]; /* ditto for current row i */ dlc = dgc = xE = -eslINFINITY; for (z=0, y=0; z < sm->n[i]; z++) /* Iterate over the one or more sparse cells (i,k) that we calculate on this row. */ { k = kc[z]; /* next sparse cell to calculate: (i,k) */ /* Try to find cell i-1,k-1; then compute M(i,k) from it */ mlc = xL + TSC(p7P_LM, k-1); mgc = xG + TSC(p7P_GM, k-1); while (y < sm->n[i-1] && kp[y] < k-1) { y++; dpp+=p7S_NSCELLS; } if (y < sm->n[i-1] && kp[y] == k-1) { mlc = ESL_MAX( ESL_MAX( dpp[p7R_ML] + TSC(p7P_MM, k-1), dpp[p7R_IL] + TSC(p7P_IM, k-1)), ESL_MAX( dpp[p7R_DL] + TSC(p7P_DM, k-1), mlc)); mgc = ESL_MAX( ESL_MAX( dpp[p7R_MG] + TSC(p7P_MM, k-1), dpp[p7R_IG] + TSC(p7P_IM, k-1)), ESL_MAX( dpp[p7R_DG] + TSC(p7P_DM, k-1), mgc)); } *dpc++ = mlc = MSC(k) + mlc; *dpc++ = mgc = MSC(k) + mgc; /* Try to find cell i-1,k; then compute I(i,k) from it */ while (y < sm->n[i-1] && kp[y] < k) { y++; dpp+=p7S_NSCELLS; } if (y < sm->n[i-1] && kp[y] == k) { *dpc++ = ESL_MAX( dpp[p7R_ML] + TSC(p7P_MI,k), dpp[p7R_IL] + TSC(p7P_II, k)); // +ISC(k), if we weren't enforcing it to zero *dpc++ = ESL_MAX( dpp[p7R_MG] + TSC(p7P_MI,k), dpp[p7R_IG] + TSC(p7P_II, k)); // ditto } else { *dpc++ = -eslINFINITY; *dpc++ = -eslINFINITY; } /* local exit paths (a F/V difference here: in V, no Dk->E path can win */ xE = ESL_MAX(xE, mlc); /* delayed store of Dk; advance calculation of next D_k+1 */ *dpc++ = dlc; *dpc++ = dgc; if (z < sm->n[i]-1 && kc[z+1] == k+1) { /* is there a (i,k+1) cell to our right? */ dlc = ESL_MAX( mlc + TSC(p7P_MD, k), dlc + TSC(p7P_DD, k)); dgc = ESL_MAX( mgc + TSC(p7P_MD, k), dgc + TSC(p7P_DD, k)); } else { /* if not, we MUST consider {MD}Gk->Dk+1..E glocal exit path, even from internal sparse cells - not just last cell! */ xE = ESL_MAX( xE, TSC(p7P_DGE, k) + ESL_MAX( mgc + TSC(p7P_MD, k), dgc + TSC(p7P_DD, k))); // yes, the D path can contribute; we only use wing-retraction on sparse cells k where k+1 is unmarked; if k=M, for example, we must check D->E dlc = dgc = -eslINFINITY; } } *xpc++ = xE; // we already max'ed over all Mk->E exits, both local and glocal *xpc++ = xN = xN + gm->xsc[p7P_N][p7P_LOOP]; *xpc++ = xJ = ESL_MAX( xJ + gm->xsc[p7P_J][p7P_LOOP], xE + gm->xsc[p7P_E][p7P_LOOP]); *xpc++ = xB = ESL_MAX( xJ + gm->xsc[p7P_J][p7P_MOVE], xN + gm->xsc[p7P_N][p7P_MOVE]); *xpc++ = xL = xB + gm->xsc[p7P_B][0]; /* B->L */ *xpc++ = xG = xB + gm->xsc[p7P_B][1]; /* B->G */ *xpc++ = xC = ESL_MAX( xE + gm->xsc[p7P_E][p7P_MOVE], xC + gm->xsc[p7P_C][p7P_LOOP]); *xpc++ = -eslINFINITY; /* JJ: this space only used in a Decoding matrix. */ *xpc++ = -eslINFINITY; /* CC: this space only used in a Decoding matrix. */ /* now dpc is on the start of the next sparsified row */ dpp = last_dpc; } xC += ( ng ? ng * gm->xsc[p7P_C][p7P_LOOP] : 0.0f) + gm->xsc[p7P_C][p7P_MOVE]; if (opt_sc) *opt_sc = xC; if (opt_tr && xC != -eslINFINITY) return p7_sparse_trace_Viterbi(gm, sx, opt_tr); else return eslOK; }
/* Function: p7_GForward() * Synopsis: The Forward algorithm. * Incept: SRE, Mon Apr 16 13:57:35 2007 [Janelia] * * Purpose: The Forward dynamic programming algorithm. * * Given a digital sequence <dsq> of length <L>, a profile * <gm>, and DP matrix <gx> allocated for at least <gm->M> * by <L> cells; calculate the probability of the sequence * given the model using the Forward algorithm; return the * Forward matrix in <gx>, and the Forward score in <ret_sc>. * * The Forward score is in lod score form. To convert to a * bitscore, the caller needs to subtract a null model lod * score, then convert to bits. * * Args: dsq - sequence in digitized form, 1..L * L - length of dsq * gm - profile. * gx - DP matrix with room for an MxL alignment * opt_sc - optRETURN: Forward lod score in nats * * Return: <eslOK> on success. */ int p7_GForward(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float *opt_sc) { float const *tsc = gm->tsc; float **dp = gx->dp; float *xmx = gx->xmx; int M = gm->M; int i, k; float esc = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY; /* Initialization of the zero row, and the lookup table of the log * sum routine. */ XMX(0,p7G_N) = 0; /* S->N, p=1 */ XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE]; /* S->N->B, no N-tail */ XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY; /* need seq to get here */ for (k = 0; k <= M; k++) MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY; /* need seq to get here */ p7_FLogsumInit(); /* Recursion. Done as a pull. * Note some slightly wasteful boundary conditions: * tsc[0] = impossible for all eight transitions (no node 0) * D_1 is wastefully calculated (doesn't exist) */ for (i = 1; i <= L; i++) { float const *rsc = gm->rsc[dsq[i]]; float sc; MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY; XMX(i, p7G_E) = -eslINFINITY; for (k = 1; k < M; k++) { /* match state */ sc = p7_FLogsum(p7_FLogsum(MMX(i-1,k-1) + TSC(p7P_MM,k-1), IMX(i-1,k-1) + TSC(p7P_IM,k-1)), p7_FLogsum(XMX(i-1,p7G_B) + TSC(p7P_BM,k-1), DMX(i-1,k-1) + TSC(p7P_DM,k-1))); MMX(i,k) = sc + MSC(k); /* insert state */ sc = p7_FLogsum(MMX(i-1,k) + TSC(p7P_MI,k), IMX(i-1,k) + TSC(p7P_II,k)); IMX(i,k) = sc + ISC(k); /* delete state */ DMX(i,k) = p7_FLogsum(MMX(i,k-1) + TSC(p7P_MD,k-1), DMX(i,k-1) + TSC(p7P_DD,k-1)); /* E state update */ XMX(i,p7G_E) = p7_FLogsum(p7_FLogsum(MMX(i,k) + esc, DMX(i,k) + esc), XMX(i,p7G_E)); } /* unrolled match state M_M */ sc = p7_FLogsum(p7_FLogsum(MMX(i-1,M-1) + TSC(p7P_MM,M-1), IMX(i-1,M-1) + TSC(p7P_IM,M-1)), p7_FLogsum(XMX(i-1,p7G_B) + TSC(p7P_BM,M-1), DMX(i-1,M-1) + TSC(p7P_DM,M-1))); MMX(i,M) = sc + MSC(M); IMX(i,M) = -eslINFINITY; /* unrolled delete state D_M */ DMX(i,M) = p7_FLogsum(MMX(i,M-1) + TSC(p7P_MD,M-1), DMX(i,M-1) + TSC(p7P_DD,M-1)); /* unrolled E state update */ XMX(i,p7G_E) = p7_FLogsum(p7_FLogsum(MMX(i,M), DMX(i,M)), XMX(i,p7G_E)); /* J state */ XMX(i,p7G_J) = p7_FLogsum(XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP], XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_LOOP]); /* C state */ XMX(i,p7G_C) = p7_FLogsum(XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP], XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_MOVE]); /* N state */ XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP]; /* B state */ XMX(i,p7G_B) = p7_FLogsum(XMX(i, p7G_N) + gm->xsc[p7P_N][p7P_MOVE], XMX(i, p7G_J) + gm->xsc[p7P_J][p7P_MOVE]); } if (opt_sc != NULL) *opt_sc = XMX(L,p7G_C) + gm->xsc[p7P_C][p7P_MOVE]; gx->M = M; gx->L = L; return eslOK; }
/* Function: p7_GBackward() * Synopsis: The Backward algorithm. * Incept: SRE, Fri Dec 28 14:31:58 2007 [Janelia] * * Purpose: The Backward dynamic programming algorithm. * * Given a digital sequence <dsq> of length <L>, a profile * <gm>, and DP matrix <gx> allocated for at least <gm->M> * by <L> cells; calculate the probability of the sequence * given the model using the Backward algorithm; return the * Backward matrix in <gx>, and the Backward score in <ret_sc>. * * The Backward score is in lod score form. To convert to a * bitscore, the caller needs to subtract a null model lod * score, then convert to bits. * * Args: dsq - sequence in digitized form, 1..L * L - length of dsq * gm - profile * gx - DP matrix with room for an MxL alignment * opt_sc - optRETURN: Backward lod score in nats * * Return: <eslOK> on success. */ int p7_GBackward(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float *opt_sc) { float const *tsc = gm->tsc; float const *rsc = NULL; float **dp = gx->dp; float *xmx = gx->xmx; int M = gm->M; int i, k; float esc = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY; /* Note: backward calculates the probability we can get *out* of * cell i,k; exclusive of emitting residue x_i. */ p7_FLogsumInit(); /* Initialize the L row. */ XMX(L,p7G_J) = XMX(L,p7G_B) = XMX(L,p7G_N) = -eslINFINITY; XMX(L,p7G_C) = gm->xsc[p7P_C][p7P_MOVE]; /* C<-T */ XMX(L,p7G_E) = XMX(L,p7G_C) + gm->xsc[p7P_E][p7P_MOVE]; /* E<-C, no tail */ MMX(L,M) = DMX(L,M) = XMX(L,p7G_E); /* {MD}_M <- E (prob 1.0) */ IMX(L,M) = -eslINFINITY; /* no I_M state */ for (k = M-1; k >= 1; k--) { MMX(L,k) = p7_FLogsum( XMX(L,p7G_E) + esc, DMX(L, k+1) + TSC(p7P_MD,k)); DMX(L,k) = p7_FLogsum( XMX(L,p7G_E) + esc, DMX(L, k+1) + TSC(p7P_DD,k)); IMX(L,k) = -eslINFINITY; } /* Main recursion */ for (i = L-1; i >= 1; i--) { rsc = gm->rsc[dsq[i+1]]; XMX(i,p7G_B) = MMX(i+1,1) + TSC(p7P_BM,0) + MSC(1); /* t_BM index is 0 because it's stored off-by-one. */ for (k = 2; k <= M; k++) XMX(i,p7G_B) = p7_FLogsum(XMX(i, p7G_B), MMX(i+1,k) + TSC(p7P_BM,k-1) + MSC(k)); XMX(i,p7G_J) = p7_FLogsum( XMX(i+1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP], XMX(i, p7G_B) + gm->xsc[p7P_J][p7P_MOVE]); XMX(i,p7G_C) = XMX(i+1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP]; XMX(i,p7G_E) = p7_FLogsum( XMX(i, p7G_J) + gm->xsc[p7P_E][p7P_LOOP], XMX(i, p7G_C) + gm->xsc[p7P_E][p7P_MOVE]); XMX(i,p7G_N) = p7_FLogsum( XMX(i+1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP], XMX(i, p7G_B) + gm->xsc[p7P_N][p7P_MOVE]); MMX(i,M) = DMX(i,M) = XMX(i,p7G_E); IMX(i,M) = -eslINFINITY; for (k = M-1; k >= 1; k--) { MMX(i,k) = p7_FLogsum( p7_FLogsum(MMX(i+1,k+1) + TSC(p7P_MM,k) + MSC(k+1), IMX(i+1,k) + TSC(p7P_MI,k) + ISC(k)), p7_FLogsum(XMX(i,p7G_E) + esc, DMX(i, k+1) + TSC(p7P_MD,k))); IMX(i,k) = p7_FLogsum( MMX(i+1,k+1) + TSC(p7P_IM,k) + MSC(k+1), IMX(i+1,k) + TSC(p7P_II,k) + ISC(k)); DMX(i,k) = p7_FLogsum( MMX(i+1,k+1) + TSC(p7P_DM,k) + MSC(k+1), p7_FLogsum( DMX(i, k+1) + TSC(p7P_DD,k), XMX(i, p7G_E) + esc)); } } /* At i=0, only N,B states are reachable. */ rsc = gm->rsc[dsq[1]]; XMX(0,p7G_B) = MMX(1,1) + TSC(p7P_BM,0) + MSC(1); /* t_BM index is 0 because it's stored off-by-one. */ for (k = 2; k <= M; k++) XMX(0,p7G_B) = p7_FLogsum(XMX(0, p7G_B), MMX(1,k) + TSC(p7P_BM,k-1) + MSC(k)); XMX(i,p7G_J) = -eslINFINITY; XMX(i,p7G_C) = -eslINFINITY; XMX(i,p7G_E) = -eslINFINITY; XMX(i,p7G_N) = p7_FLogsum( XMX(1, p7G_N) + gm->xsc[p7P_N][p7P_LOOP], XMX(0, p7G_B) + gm->xsc[p7P_N][p7P_MOVE]); for (k = M; k >= 1; k--) MMX(i,M) = IMX(i,M) = DMX(i,M) = -eslINFINITY; if (opt_sc != NULL) *opt_sc = XMX(0,p7G_N); gx->M = M; gx->L = L; return eslOK; }