/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */ static inline int select_m(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */ int r = (k-1) / Q; vector float *tp = om->tfv + 7*q; /* *tp now at start of transitions to cur cell M(i,k) */ vector float xBv; vector float zerov; vector float mpv, dpv, ipv; union { vector float v; float p[4]; } u; float path[4]; int state[4] = { p7T_B, p7T_M, p7T_I, p7T_D }; xBv = esl_vmx_set_float(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]); zerov = (vector float) vec_splat_u32(0); if (q > 0) { mpv = ox->dpf[i-1][(q-1)*3 + p7X_M]; dpv = ox->dpf[i-1][(q-1)*3 + p7X_D]; ipv = ox->dpf[i-1][(q-1)*3 + p7X_I]; } else { mpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_M], 12); dpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_D], 12); ipv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_I], 12); } u.v = vec_madd(xBv, *tp, zerov); tp++; path[0] = u.p[r]; u.v = vec_madd(mpv, *tp, zerov); tp++; path[1] = u.p[r]; u.v = vec_madd(ipv, *tp, zerov); tp++; path[2] = u.p[r]; u.v = vec_madd(dpv, *tp, zerov); path[3] = u.p[r]; esl_vec_FNorm(path, 4); return state[esl_rnd_FChoose(rng, path, 4)]; }
/* D(i,k) is reached from M(i, k-1) or D(i,k-1). */ static inline int select_d(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; vector float zerov; vector float mpv, dpv; vector float tmdv, tddv; union { vector float v; float p[4]; } u; float path[2]; int state[2] = { p7T_M, p7T_D }; zerov = (vector float) vec_splat_u32(0); if (q > 0) { mpv = ox->dpf[i][(q-1)*3 + p7X_M]; dpv = ox->dpf[i][(q-1)*3 + p7X_D]; tmdv = om->tfv[7*(q-1) + p7O_MD]; tddv = om->tfv[7*Q + (q-1)]; } else { mpv = vec_sld(zerov, ox->dpf[i][(Q-1)*3 + p7X_M], 12); dpv = vec_sld(zerov, ox->dpf[i][(Q-1)*3 + p7X_D], 12); tmdv = vec_sld(zerov, om->tfv[7*(Q-1) + p7O_MD], 12); tddv = vec_sld(zerov, om->tfv[8*Q-1], 12); } u.v = vec_madd(mpv, tmdv, zerov); path[0] = u.p[r]; u.v = vec_madd(dpv, tddv, zerov); path[1] = u.p[r]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* sample_endpoints() * Incept: SRE, Mon Jan 22 10:43:20 2007 [Janelia] * * Purpose: Given a profile <gm> and random number source <r>, sample * a begin transition from the implicit probabilistic profile * model, yielding a sampled start and end node; return these * via <ret_kstart> and <ret_kend>. * * By construction, the entry at node <kstart> is into a * match state, but the exit from node <kend> might turn * out to be from either a match or delete state. * * We assume that exits j are uniformly distributed for a * particular entry point i: $a_{ij} =$ constant $\forall * j$. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation error. * * Xref: STL11/138 */ static int sample_endpoints(ESL_RANDOMNESS *r, const P7_PROFILE *gm, int *ret_kstart, int *ret_kend) { float *pstart = NULL; int k; int kstart, kend; int status; /* We have to backcalculate a probability distribution from the * lod B->Mk scores in a local model; this is a little time consuming, * but we don't have to do it often. */ ESL_ALLOC(pstart, sizeof(float) * (gm->M+1)); pstart[0] = 0.0f; for (k = 1; k <= gm->M; k++) pstart[k] = exp(p7P_TSC(gm, k-1, p7P_BM)) * (gm->M - k + 1); /* multiply p_ij by the number of exits j */ kstart = esl_rnd_FChoose(r, pstart, gm->M+1); /* sample the starting position from that distribution */ kend = kstart + esl_rnd_Roll(r, gm->M-kstart+1); /* and the exit uniformly from possible exits for it */ free(pstart); *ret_kstart = kstart; *ret_kend = kend; return eslOK; ERROR: if (pstart != NULL) free(pstart); *ret_kstart = 0; *ret_kend = 0; return status; }
/* D(i,k) is reached from M(i, k-1) or D(i,k-1). */ static inline int select_d(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; __m128 zerov = _mm_setzero_ps(); __m128 mpv, dpv; __m128 tmdv, tddv; union { __m128 v; float p[4]; } u; float path[2]; int state[2] = { p7T_M, p7T_D }; if (q > 0) { mpv = ox->dpf[i][(q-1)*3 + p7X_M]; dpv = ox->dpf[i][(q-1)*3 + p7X_D]; tmdv = om->tfv[7*(q-1) + p7O_MD]; tddv = om->tfv[7*Q + (q-1)]; } else { mpv = esl_sse_rightshift_ps(ox->dpf[i][(Q-1)*3 + p7X_M], zerov); dpv = esl_sse_rightshift_ps(ox->dpf[i][(Q-1)*3 + p7X_D], zerov); tmdv = esl_sse_rightshift_ps(om->tfv[7*(Q-1) + p7O_MD], zerov); tddv = esl_sse_rightshift_ps(om->tfv[8*Q-1], zerov); } u.v = _mm_mul_ps(mpv, tmdv); path[0] = u.p[r]; u.v = _mm_mul_ps(dpv, tddv); path[1] = u.p[r]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */ static inline int select_m(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell M(i,k) */ int r = (k-1) / Q; __m128 *tp = om->tfv + 7*q; /* *tp now at start of transitions to cur cell M(i,k) */ __m128 xBv = _mm_set1_ps(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]); __m128 zerov = _mm_setzero_ps(); __m128 mpv, dpv, ipv; union { __m128 v; float p[4]; } u; float path[4]; int state[4] = { p7T_B, p7T_M, p7T_I, p7T_D }; if (q > 0) { mpv = ox->dpf[i-1][(q-1)*3 + p7X_M]; dpv = ox->dpf[i-1][(q-1)*3 + p7X_D]; ipv = ox->dpf[i-1][(q-1)*3 + p7X_I]; } else { mpv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_M], zerov); dpv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_D], zerov); ipv = esl_sse_rightshift_ps(ox->dpf[i-1][(Q-1)*3 + p7X_I], zerov); } u.v = _mm_mul_ps(xBv, *tp); tp++; path[0] = u.p[r]; u.v = _mm_mul_ps(mpv, *tp); tp++; path[1] = u.p[r]; u.v = _mm_mul_ps(ipv, *tp); tp++; path[2] = u.p[r]; u.v = _mm_mul_ps(dpv, *tp); path[3] = u.p[r]; esl_vec_FNorm(path, 4); return state[esl_rnd_FChoose(rng, path, 4)]; }
/* Function: esl_hmm_Emit() * Synopsis: Emit a sequence from an HMM. * * Purpose: Sample one sequence from an <hmm>, using random * number generator <r>. Optionally return the sequence, * the state path, and/or the length via <opt_dsq>, * <opt_path>, and <opt_L>. * * If <opt_dsq> or <opt_path> are requested, caller * becomes responsible for free'ing their memory. * * Returns: <eslOK> on success. * * Throws: (no abnormal error conditions) */ int esl_hmm_Emit(ESL_RANDOMNESS *r, const ESL_HMM *hmm, ESL_DSQ **opt_dsq, int **opt_path, int *opt_L) { int k, L, allocL; int *path = NULL; ESL_DSQ *dsq = NULL; void *tmp = NULL; int status; ESL_ALLOC(dsq, sizeof(ESL_DSQ) * 256); ESL_ALLOC(path, sizeof(int) * 256); allocL = 256; dsq[0] = eslDSQ_SENTINEL; path[0] = -1; k = esl_rnd_FChoose(r, hmm->pi, hmm->M+1); L = 0; while (k != hmm->M) /* M is the implicit end state */ { L++; if (L >= allocL-1) { /* Reallocate path and seq if needed */ ESL_RALLOC(dsq, tmp, sizeof(ESL_DSQ) * (allocL*2)); ESL_RALLOC(path, tmp, sizeof(int) * (allocL*2)); allocL *= 2; } path[L] = k; dsq[L] = esl_rnd_FChoose(r, hmm->e[k], hmm->abc->K); k = esl_rnd_FChoose(r, hmm->t[k], hmm->M+1); } path[L+1] = hmm->M; /* sentinel for "end state" */ dsq[L+1] = eslDSQ_SENTINEL; if (opt_dsq != NULL) *opt_dsq = dsq; else free(dsq); if (opt_path != NULL) *opt_path = path; else free(path); if (opt_L != NULL) *opt_L = L; return eslOK; ERROR: if (path != NULL) free(path); if (dsq != NULL) free(dsq); return status; }
/* B(i) is reached from N(i) or J(i). */ static inline int select_b(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i) { float path[2]; int state[2] = { p7T_N, p7T_J }; path[0] = ox->xmx[i*p7X_NXCELLS+p7X_N] * om->xf[p7O_N][p7O_MOVE]; path[1] = ox->xmx[i*p7X_NXCELLS+p7X_J] * om->xf[p7O_J][p7O_MOVE]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* J(i) is reached from E(i) or J(i-1). */ static inline int select_j(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i) { float path[2]; int state[2] = { p7T_J, p7T_E }; path[0] = ox->xmx[(i-1)*p7X_NXCELLS+p7X_J] * om->xf[p7O_J][p7O_LOOP]; path[1] = ox->xmx[ i*p7X_NXCELLS+p7X_E] * om->xf[p7O_E][p7O_LOOP] * ox->xmx[i*p7X_NXCELLS+p7X_SCALE]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* The DChoose() and FChoose() unit tests. */ static void utest_choose(ESL_RANDOMNESS *r, int n, int nbins, int be_verbose) { double *pd = NULL; float *pf = NULL; int *ct = NULL; int i; double X2, diff, exp, X2p; if ((pd = malloc(sizeof(double) * nbins)) == NULL) esl_fatal("malloc failed"); if ((pf = malloc(sizeof(float) * nbins)) == NULL) esl_fatal("malloc failed"); if ((ct = malloc(sizeof(int) * nbins)) == NULL) esl_fatal("malloc failed"); /* Sample a random multinomial probability vector. */ if (esl_dirichlet_DSampleUniform(r, nbins, pd) != eslOK) esl_fatal("dirichlet sample failed"); esl_vec_D2F(pd, nbins, pf); /* Sample observed counts using DChoose(). */ esl_vec_ISet(ct, nbins, 0); for (i = 0; i < n; i++) ct[esl_rnd_DChoose(r, pd, nbins)]++; /* X^2 test on those observed counts. */ for (X2 = 0., i=0; i < nbins; i++) { exp = (double) n * pd[i]; diff = (double) ct[i] - exp; X2 += diff*diff/exp; } if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed"); if (be_verbose) printf("DChoose(): \t%g\n", X2p); if (X2p < 0.01) esl_fatal("chi squared test failed"); /* Repeat above for FChoose(). */ esl_vec_ISet(ct, nbins, 0); for (i = 0; i < n; i++) ct[esl_rnd_FChoose(r, pf, nbins)]++; for (X2 = 0., i=0; i < nbins; i++) { exp = (double) n * pd[i]; diff = (double) ct[i] - exp; X2 += diff*diff/exp; } if (esl_stats_ChiSquaredTest(nbins, X2, &X2p) != eslOK) esl_fatal("chi square eval failed"); if (be_verbose) printf("FChoose(): \t%g\n", X2p); if (X2p < 0.01) esl_fatal("chi squared test failed"); free(pd); free(pf); free(ct); return; }
/* I(i,k) is reached from M(i-1, k) or I(i-1,k). */ static inline int select_i(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; __m128 mpv = ox->dpf[i-1][q*3 + p7X_M]; __m128 ipv = ox->dpf[i-1][q*3 + p7X_I]; __m128 *tp = om->tfv + 7*q + p7O_MI; union { __m128 v; float p[4]; } u; float path[2]; int state[2] = { p7T_M, p7T_I }; u.v = _mm_mul_ps(mpv, *tp); tp++; path[0] = u.p[r]; u.v = _mm_mul_ps(ipv, *tp); path[1] = u.p[r]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* I(i,k) is reached from M(i-1, k) or I(i-1,k). */ static inline int select_i(ESL_RANDOMNESS *rng, const P7_OPROFILE *om, const P7_OMX *ox, int i, int k) { int Q = p7O_NQF(ox->M); int q = (k-1) % Q; /* (q,r) is position of the current DP cell D(i,k) */ int r = (k-1) / Q; vector float zerov; vector float mpv = ox->dpf[i-1][q*3 + p7X_M]; vector float ipv = ox->dpf[i-1][q*3 + p7X_I]; vector float *tp = om->tfv + 7*q + p7O_MI; union { vector float v; float p[4]; } u; float path[2]; int state[2] = { p7T_M, p7T_I }; zerov = (vector float) vec_splat_u32(0); u.v = vec_madd(mpv, *tp, zerov); tp++; path[0] = u.p[r]; u.v = vec_madd(ipv, *tp, zerov); path[1] = u.p[r]; esl_vec_FNorm(path, 2); return state[esl_rnd_FChoose(rng, path, 2)]; }
/* Function: p7_GStochasticTrace() * Synopsis: Stochastic traceback of a Forward matrix. * Incept: SRE, Thu Jan 3 15:39:20 2008 [Janelia] * * Purpose: Stochastic traceback of Forward matrix <gx> to * sample an alignment of digital sequence <dsq> * (of length <L>) to the profile <gm>. * * The sampled traceback is returned in <tr>, which the * caller must have at least made an initial allocation of * (the <tr> will be grown as needed here). * * Args: r - source of random numbers * dsq - digital sequence aligned to, 1..L * L - length of dsq * gm - profile * mx - Forward matrix to trace, L x M * tr - storage for the recovered traceback. * * Returns: <eslOK> on success. */ int p7_GStochasticTrace(ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, const P7_GMX *gx, P7_TRACE *tr) { int status; int i; /* position in seq (1..L) */ int k; /* position in model (1..M) */ int M = gm->M; float **dp = gx->dp; float *xmx = gx->xmx; float const *tsc = gm->tsc; float *sc; /* scores of possible choices: up to 2M-1, in the case of exits to E */ int scur, sprv; /* we'll index M states as 1..M, and D states as 2..M = M+2..2M: M0, D1 are impossibles. */ ESL_ALLOC(sc, sizeof(float) * (2*M+1)); k = 0; i = L; if ((status = p7_trace_Append(tr, p7T_T, k, i)) != eslOK) goto ERROR; if ((status = p7_trace_Append(tr, p7T_C, k, i)) != eslOK) goto ERROR; sprv = p7T_C; while (sprv != p7T_S) { switch (tr->st[tr->N-1]) { /* C(i) comes from C(i-1) or E(i) */ case p7T_C: if (XMX(i,p7G_C) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible C reached at i=%d", i); sc[0] = XMX(i-1, p7G_C) + gm->xsc[p7P_C][p7P_LOOP]; sc[1] = XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_MOVE]; esl_vec_FLogNorm(sc, 2); scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_C : p7T_E; break; /* E connects from any M or D state. k set here */ case p7T_E: if (XMX(i, p7G_E) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible E reached at i=%d", i); if (p7_profile_IsLocal(gm)) { /* local models come from any M, D */ sc[0] = sc[M+1] = -eslINFINITY; for (k = 1; k <= M; k++) sc[k] = MMX(i,k); for (k = 2; k <= M; k++) sc[k+M] = DMX(i,k); esl_vec_FLogNorm(sc, 2*M+1); /* now sc is a prob vector */ k = esl_rnd_FChoose(r, sc, 2*M+1); if (k <= M) scur = p7T_M; else { k -= M; scur = p7T_D; } } else { /* glocal models come from M_M or D_M */ k = M; sc[0] = MMX(i,M); sc[1] = DMX(i,M); esl_vec_FLogNorm(sc, 2); /* now sc is a prob vector */ scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_M : p7T_D; } break; /* M connects from {MDI} i-1,k-1, or B */ case p7T_M: if (MMX(i,k) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible M reached at k=%d,i=%d", k,i); sc[0] = XMX(i-1,p7G_B) + TSC(p7P_BM, k-1); sc[1] = MMX(i-1,k-1) + TSC(p7P_MM, k-1); sc[2] = IMX(i-1,k-1) + TSC(p7P_IM, k-1); sc[3] = DMX(i-1,k-1) + TSC(p7P_DM, k-1); esl_vec_FLogNorm(sc, 4); switch (esl_rnd_FChoose(r, sc, 4)) { case 0: scur = p7T_B; break; case 1: scur = p7T_M; break; case 2: scur = p7T_I; break; case 3: scur = p7T_D; break; } k--; i--; break; /* D connects from M,D at i,k-1 */ case p7T_D: if (DMX(i, k) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible D reached at k=%d,i=%d", k,i); sc[0] = MMX(i, k-1) + TSC(p7P_MD, k-1); sc[1] = DMX(i, k-1) + TSC(p7P_DD, k-1); esl_vec_FLogNorm(sc, 2); scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_M : p7T_D; k--; break; /* I connects from M,I at i-1,k */ case p7T_I: if (IMX(i,k) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible I reached at k=%d,i=%d", k,i); sc[0] = MMX(i-1,k) + TSC(p7P_MI, k); sc[1] = IMX(i-1,k) + TSC(p7P_II, k); esl_vec_FLogNorm(sc, 2); scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_M : p7T_I; i--; break; /* N connects from S, N */ case p7T_N: if (XMX(i, p7G_N) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible N reached at i=%d", i); scur = (i == 0) ? p7T_S : p7T_N; break; /* B connects from N, J */ case p7T_B: if (XMX(i,p7G_B) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible B reached at i=%d", i); sc[0] = XMX(i, p7G_N) + gm->xsc[p7P_N][p7P_MOVE]; sc[1] = XMX(i, p7G_J) + gm->xsc[p7P_J][p7P_MOVE]; esl_vec_FLogNorm(sc, 2); scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_N : p7T_J; break; /* J connects from E(i) or J(i-1) */ case p7T_J: if (XMX(i,p7G_J) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible J reached at i=%d", i); sc[0] = XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP]; sc[1] = XMX(i, p7G_E) + gm->xsc[p7P_E][p7P_LOOP]; esl_vec_FLogNorm(sc, 2); scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_J : p7T_E; break; default: ESL_XEXCEPTION(eslFAIL, "bogus state in traceback"); } /* end switch over statetype[tpos-1] */ /* Append this state and the current i,k to be explained to the growing trace */ if ((status = p7_trace_Append(tr, scur, k, i)) != eslOK) goto ERROR; /* For NCJ, we had to defer i decrement. */ if ( (scur == p7T_N || scur == p7T_J || scur == p7T_C) && scur == sprv) i--; sprv = scur; } /* end traceback, at S state */ if ((status = p7_trace_Reverse(tr)) != eslOK) goto ERROR; tr->M = gm->M; tr->L = L; free(sc); return eslOK; ERROR: if (sc != NULL) free(sc); return status; }
/* Function: p7_CoreEmit() * Incept: SRE, Tue Jan 9 10:20:51 2007 [Janelia] * * Purpose: Generate (sample) a sequence from a core HMM <hmm>. * * Optionally return the sequence and/or its trace in <sq> * and <tr>, respectively, which the caller has * allocated. Having the caller provide these reusable * objects allows re-use of both <sq> and <tr> in repeated * calls, saving malloc/free wastage. Either can be passed * as <NULL> if it isn't needed. * * This does not set any fields in the <sq> except for the * sequence itself. Caller must set the name, and any other * annotation it wants to add. * * Trace is relative to the core model: it may include * I_0 and I_M states, B->DD->M entry is explicit, and a * 0 length generated sequence is possible. * * Args: r - source of randomness * hmm - core HMM to generate from * sq - opt: digital sequence sampled (or NULL) * tr - opt: trace sampled (or NULL) * * Returns: <eslOK> on success; * optionally return the digital sequence through <ret_sq>, * and optionally return its trace in <ret_tr>. * * Throws: <eslECORRUPT> if emission gets us into an illegal state, * probably indicating that a probability that should have * been zero wasn't. * * Throws <eslEMEM> on a reallocation error. * * In these cases, the contents of <sq> and <tr> may be * corrupted. Caller should not trust their data, but may * safely reuse them. * * Xref: STL11/124. */ int p7_CoreEmit(ESL_RANDOMNESS *r, const P7_HMM *hmm, ESL_SQ *sq, P7_TRACE *tr) { int k = 0; /* position in model nodes 1..M */ int i = 0; /* position in sequence 1..L */ char st = p7T_B; /* state type */ int x; /* sampled residue */ int status; if (sq != NULL) esl_sq_Reuse(sq); if (tr != NULL) { if ((status = p7_trace_Reuse(tr)) != eslOK) goto ERROR; if ((status = p7_trace_Append(tr, st, k, i)) != eslOK) goto ERROR; } while (st != p7T_E) { /* Sample next state type, given current state type (and current k) */ switch (st) { case p7T_B: case p7T_M: switch (esl_rnd_FChoose(r, hmm->t[k], 3)) { case 0: st = p7T_M; break; case 1: st = p7T_I; break; case 2: st = p7T_D; break; default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible."); } break; case p7T_I: switch (esl_rnd_FChoose(r, hmm->t[k]+3, 2)) { case 0: st = p7T_M; break; case 1: st = p7T_I; break; default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible."); } break; case p7T_D: switch (esl_rnd_FChoose(r, hmm->t[k]+5, 2)) { case 0: st = p7T_M; break; case 1: st = p7T_D; break; default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible."); } break; default: ESL_XEXCEPTION(eslECORRUPT, "impossible state reached during emission"); } /* Bump k,i if needed, depending on new state type */ if (st == p7T_M || st == p7T_D) k++; if (st == p7T_M || st == p7T_I) i++; /* a transit to M_M+1 is a transit to the E state */ if (k == hmm->M+1) { if (st == p7T_M) { st = p7T_E; k = 0; } else ESL_XEXCEPTION(eslECORRUPT, "failed to reach E state properly"); } /* Sample new residue x if in match or insert */ if (st == p7T_M) x = esl_rnd_FChoose(r, hmm->mat[k], hmm->abc->K); else if (st == p7T_I) x = esl_rnd_FChoose(r, hmm->ins[k], hmm->abc->K); else x = eslDSQ_SENTINEL; /* Add state to trace */ if (tr != NULL) { if ((status = p7_trace_Append(tr, st, k, i)) != eslOK) goto ERROR; } /* Add x to sequence */ if (sq != NULL && x != eslDSQ_SENTINEL) if ((status = esl_sq_XAddResidue(sq, x)) != eslOK) goto ERROR; } /* Terminate the trace and sequence (both are optional, remember) */ if (tr != NULL) { tr->M = hmm->M; tr->L = i; } if (sq != NULL && (status = esl_sq_XAddResidue(sq, eslDSQ_SENTINEL)) != eslOK) goto ERROR; return eslOK; ERROR: return status; }
/* Function: p7_ProfileEmit() * Synopsis: Sample a sequence from the search form of the model. * Incept: SRE, Mon Jan 22 10:23:28 2007 [Janelia] * * Purpose: Sample a sequence from the implicit * probabilistic model of a Plan7 profile <gm>. This * requires also having the core probabilities of * the accompanying <hmm>, and the background * frequencies of null1 model <bg>. * * Optionally return the sequence and/or its trace in <sq> * and <tr>, respectively. Caller has allocated space for * both of these, though they may get reallocated/grown * here. Either can be passed as <NULL> if unneeded. * * Only the sequence field is set in the <sq>. Caller must * set the name, plus any other fields it wants to set. If * the <sq> was created in digital mode, this is the <sq->dsq>; * if the <sq> was created in text mode, this is <sq->seq>. * * <p7_ProfileEmit()> deliberately uses an <ESL_SQ> object * instead of a plain <ESL_DSQ *> or <char *> string, to * take advantage of the object's support for dynamic * reallocation of seq length, and to allow both digital and * text mode generation. * * Args: r - source of randomness * hmm - core probabilities of the profile * gm - configured search profile * sq - optRETURN: sampled sequence * tr - optRETURN: sampled trace * * Throws: (no abnormal error conditions) */ int p7_ProfileEmit(ESL_RANDOMNESS *r, const P7_HMM *hmm, const P7_PROFILE *gm, const P7_BG *bg, ESL_SQ *sq, P7_TRACE *tr) { char prv, st; /* prev, current state type */ int k = 0; /* position in model nodes 1..M */ int i = 0; /* position in sequence 1..L */ int x; /* sampled residue */ int kend = hmm->M; /* predestined end node */ int status; float xt[p7P_NXSTATES][p7P_NXTRANS]; /* Backcalculate the probabilities in the special states (loop and length model) */ for (i = 0; i < p7P_NXSTATES; i++) for (x = 0; x < p7P_NXTRANS; x++) xt[i][x] = exp(gm->xsc[i][x]); if (sq != NULL) esl_sq_Reuse(sq); if (tr != NULL) { if ((status = p7_trace_Reuse(tr)) != eslOK) goto ERROR; if ((status = p7_trace_Append(tr, p7T_S, k, i)) != eslOK) goto ERROR; if ((status = p7_trace_Append(tr, p7T_N, k, i)) != eslOK) goto ERROR; } st = p7T_N; i = 0; while (st != p7T_T) { /* Sample a state transition. After this section, prv and st (prev->current state) are set; * k also gets set if we make a B->Mk entry transition. */ prv = st; switch (st) { case p7T_B: if (p7_profile_IsLocal(gm)) { /* local mode: enter the implicit profile: choose our entry and our predestined exit */ if ((status = sample_endpoints(r, gm, &k, &kend)) != eslOK) goto ERROR; st = p7T_M; /* must be, because left wing is retracted */ } else { /* glocal mode: treat B as M_0, use its transitions to MID. */ /* FIXME: this is wrong. It should sample from B->Mk distribution! */ switch (esl_rnd_FChoose(r, P7H_TMAT(hmm, 0), p7H_NTMAT)) { case 0: st = p7T_M; k = 1; break; case 1: st = p7T_I; k = 0; break; case 2: st = p7T_D; k = 1; break; default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible."); } } break; case p7T_M: if (k == kend) st = p7T_E; /* check our preordained fate */ else { switch (esl_rnd_FChoose(r, P7H_TMAT(hmm, k), p7H_NTMAT)) { case 0: st = p7T_M; break; case 1: st = p7T_I; break; case 2: st = p7T_D; break; default: ESL_XEXCEPTION(eslEINCONCEIVABLE, "impossible."); } } break; case p7T_D: if (k == kend) st = p7T_E; else st = (esl_rnd_FChoose(r, P7H_TDEL(hmm, k), p7H_NTDEL) == 0) ? p7T_M : p7T_D; break; case p7T_I: st = (esl_rnd_FChoose(r, P7H_TINS(hmm, k), p7H_NTINS) == 0) ? p7T_M : p7T_I; break; case p7T_N: st = (esl_rnd_FChoose(r, xt[p7P_N], p7P_NXTRANS) == p7P_MOVE) ? p7T_B : p7T_N; break; case p7T_E: st = (esl_rnd_FChoose(r, xt[p7P_E], p7P_NXTRANS) == p7P_MOVE) ? p7T_C : p7T_J; break; case p7T_C: st = (esl_rnd_FChoose(r, xt[p7P_C], p7P_NXTRANS) == p7P_MOVE) ? p7T_T : p7T_C; break; case p7T_J: st = (esl_rnd_FChoose(r, xt[p7P_J], p7P_NXTRANS) == p7P_MOVE) ? p7T_B : p7T_J; break; default: ESL_XEXCEPTION(eslECORRUPT, "impossible state reached during emission"); } /* Based on the transition we just sampled, update k. */ if (st == p7T_E) k = 0; else if (st == p7T_M && prv != p7T_B) k++; /* be careful about B->Mk, where we already set k */ else if (st == p7T_D) k++; /* Based on the transition we just sampled, generate a residue. */ if (st == p7T_M) x = esl_rnd_FChoose(r, hmm->mat[k], hmm->abc->K); else if (st == p7T_I) x = esl_rnd_FChoose(r, hmm->ins[k], hmm->abc->K); else if ((st == p7T_N || st == p7T_C || st == p7T_J) && prv==st) x = esl_rnd_FChoose(r, bg->f, hmm->abc->K); else x = eslDSQ_SENTINEL; if (x != eslDSQ_SENTINEL) i++; /* Add residue (if any) to sequence */ if (sq != NULL && x != eslDSQ_SENTINEL && (status = esl_sq_XAddResidue(sq, x)) != eslOK) goto ERROR; /* Add state to trace. */ if (tr != NULL) { if ((status = p7_trace_Append(tr, st, k, i)) != eslOK) goto ERROR; } } /* Terminate the trace and sequence (both are optional, remember) */ if (tr != NULL) { tr->M = hmm->M; tr->L = i; } if (sq != NULL && (status = esl_sq_XAddResidue(sq, eslDSQ_SENTINEL)) != eslOK) goto ERROR; return eslOK; ERROR: return status; }