コード例 #1
0
ファイル: generic_null2.c プロジェクト: TuftsBCB/SMURFBuild
/* Function:  p7_GNull2_ByTrace()
 * Synopsis:  Assign null2 scores to an envelope by the sampling method.
 * Incept:    SRE, Thu May  1 10:00:43 2008 [Janelia]
 *
 * Purpose:   Given a traceback <tr> for an alignment of model <gm> to
 *            some target sequence; calculate null2 odds ratios $\frac{f'{x}}{f{x}}$ 
 *            as the state-usage-weighted emission probabilities,
 *            with state usages calculated by counting emissions used
 *            at positions <zstart..zend> in the trace.
 *            
 *            Because we only need to collect state usages from the
 *            trace <tr>, the target sequence is irrelevant. Because
 *            we are only averaging emission odds ratios from model
 *            <gm>, the configuration of <gm> is irrelevant (uni
 *            vs. multihit, or length config).
 *
 * Args:      gm     - model, in any configuration; only emission odds are used
 *            tr     - traceback for any region (or all) of a target sequence
 *            zstart - first elem in <tr> to collect from; use 0 for complete
 *            zend   - last elem in <tr> to collect from; use tr->N-1 for complete
 *            wrk    - DP matrix w/ at least one row, for workspace
 *            null2  - RESULT: odds ratios f'(x)/f(x) for all Kp residues
 * 
 * Returns:   <eslOK> on success, and the <ddef->n2sc> scores are set
 *            for region <i..j>.
 *
 * Throws:    <eslEMEM> on allocation error.
 */
int
p7_GNull2_ByTrace(const P7_PROFILE *gm, const P7_TRACE *tr, int zstart, int zend, P7_GMX *wrk, float *null2)
{
  float  **dp   = wrk->dp;	/* so that {MDI}MX() macros work */
  float   *xmx  = wrk->xmx;	/* so that XMX() macro works     */
  int      Ld   = 0;
  int      M    = gm->M;
  int      k;			/* index over model position     */
  int      x;			/* index over residues           */
  int      z;			/* index over trace position     */
  float    xfactor;
 
  /* We'll use the i=0 row in wrk for working space: dp[0][] and xmx[0..4]. */
  esl_vec_FSet(wrk->dp[0], (M+1)*p7G_NSCELLS, 0.0);
  esl_vec_FSet(wrk->xmx,   p7G_NXCELLS,       0.0);

  /* Calculate emitting state usage in this particular trace segment: */
  for (z = zstart; z <= zend; z++) 
    {
      switch (tr->st[z]) {
      case p7T_M:  Ld++; MMX(0,tr->k[z]) += 1.0; break;
      case p7T_I:  Ld++; IMX(0,tr->k[z]) += 1.0; break;
      case p7T_N:  if (tr->st[z-1] == p7T_N) { Ld++; XMX(0,p7G_N) += 1.0; } break;
      case p7T_C:  if (tr->st[z-1] == p7T_C) { Ld++; XMX(0,p7G_C) += 1.0; } break;
      case p7T_J:  if (tr->st[z-1] == p7T_J) { Ld++; XMX(0,p7G_J) += 1.0; } break;
      }
    }
  esl_vec_FScale(wrk->dp[0], (M+1)*p7G_NSCELLS, (1.0 / (float) Ld));
  esl_vec_FScale(wrk->xmx,   p7G_NXCELLS,       (1.0 / (float) Ld));
  
  /* Calculate null2's odds ratio emission probabilities, by taking
   * posterior weighted sum over all emission vectors used in paths
   * explaining the domain.
   */
  esl_vec_FSet(null2, gm->abc->K, 0.0);
  xfactor = XMX(0,p7G_N) + XMX(0,p7G_C) + XMX(0,p7G_J);
  for (x = 0; x < gm->abc->K; x++)
    {
      for (k = 1; k < M; k++)
	{
	  null2[x] += MMX(0,k) * expf(p7P_MSC(gm, k, x));
	  null2[x] += IMX(0,k) * expf(p7P_ISC(gm, k, x));
	}
      null2[x] += MMX(0,M) * expf(p7P_MSC(gm, M, x));
      null2[x] += xfactor;
    }
  /* now null2[x] = \frac{f_d(x)}{f_0(x)} odds ratios for all x in alphabet,
   * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies
   * for this envelope.
   */

  /* make valid scores for all degeneracies, by averaging the odds ratios. */
  esl_abc_FAvgScVec(gm->abc, null2);
  null2[gm->abc->K]    = 1.0;        /* gap character    */
  null2[gm->abc->Kp-2] = 1.0;	     /* nonresidue "*"   */
  null2[gm->abc->Kp-1] = 1.0;	     /* missing data "~" */

  return eslOK;
}
コード例 #2
0
/* Function:  p7_GDecoding()
 * Synopsis:  Posterior decoding of residue assignments.
 * Incept:    SRE, Fri Feb 29 10:16:21 2008 [Janelia]
 *
 * Purpose:   Calculates a posterior decoding of the residues in a
 *            target sequence, given profile <gm> and filled Forward
 *            and Backward matrices <fwd>, <bck> for the profile
 *            aligned to that target sequence. The resulting posterior
 *            decoding is stored in a DP matrix <pp>, provided by the
 *            caller.
 *            
 *            Each residue <i> must have been emitted by match state
 *            <1..M>, insert state <1..M-1>, or an NN, CC, or JJ loop
 *            transition.  For <dp = pp->dp>, <xmx = pp->xmx>,
 *            <MMX(i,k)> is the probability that match <k> emitted
 *            residue <i>; <IMX(i,k)> is the probability that insert
 *            <k> emitted residue <i>; <XMX(i,N)>,<XMX(i,C)>,
 *            <XMX(i,J)> are the probabilities that residue <i> was
 *            emitted on an NN, CC, or JJ transition. The sum over all
 *            these possibilities for a given residue <i> is 1.0.
 *
 *            Thus the only nonzero entries in a posterior decoding matrix
 *            <pp> are <M_{1..M}>, <I_{1..M-1}>, <N_{1..L-1}> (residue L
 *            can't be emitted by N), <C_{2..L}> (residue 1 can't be 
 *            emitted by C), and <J_{2..L-1}> (residues 1,L can't be
 *            emitted by J).
 *            
 *            In particular, row i=0 is unused (all zeros) in a pp
 *            matrix; the null2 calculation will take advantage of
 *            this by using the zero row for workspace.
 *            
 *            The caller may pass the Backward matrix <bck> as <pp>,
 *            in which case <bck> will be overwritten with
 *            <pp>. However, the caller may \emph{not} overwrite <fwd>
 *            this way; an <(i-1)> dependency in the calculation of
 *            NN, CC, JJ transitions prevents this.
 *
 * Args:      gm   - profile (must be the same that was used to fill <fwd>, <bck>).
 *            fwd  - filled Forward matrix 
 *            bck  - filled Backward matrix
 *            pp   - RESULT: posterior decoding matrix.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    (no abnormal error conditions)
 *
 * Note:      Burns time renormalizing each row. If you don't do this,
 *            probabilities will have an error of +/- 0.001 or so, creeping
 *            in from error in FLogsum()'s table approximation and even
 *            in log() and exp() themselves; including "probabilities"
 *            up to  ~1.001. Though this isn't going to break anything
 *            in normal use, it does drive the unit tests wild; the SSE
 *            implementation is more accurate, and unit tests that try
 *            to compare SSE and generic results will see differences,
 *            some sufficient to alter the choice of OA traceback.
 *    
 */
int
p7_GDecoding(const P7_PROFILE *gm, const P7_GMX *fwd, P7_GMX *bck, P7_GMX *pp)
{
  float      **dp   = pp->dp;
  float       *xmx  = pp->xmx;
  int          L    = fwd->L;
  int          M    = gm->M;
  int          i,k;
  float        overall_sc = fwd->xmx[p7G_NXCELLS*L + p7G_C] + gm->xsc[p7P_C][p7P_MOVE];
  float        denom;
  
  pp->M = M;
  pp->L = L;

  XMX(0, p7G_E) = 0.0;
  XMX(0, p7G_N) = 0.0;		
  XMX(0, p7G_J) = 0.0;		
  XMX(0, p7G_B) = 0.0;
  XMX(0, p7G_C) = 0.0;
  for (k = 0; k <= M; k++)
    MMX(0,k) = IMX(0,k) = DMX(0,k) = 0.0;
  
  for (i = 1; i <= L; i++)
    {
      denom = 0.0;
      MMX(i,0) = IMX(i,0) = DMX(i,0) = 0.0;
      for (k = 1; k < M; k++)
	{
	  MMX(i,k) = expf(fwd->dp[i][k*p7G_NSCELLS + p7G_M] + bck->dp[i][k*p7G_NSCELLS + p7G_M] - overall_sc); denom += MMX(i,k);
	  IMX(i,k) = expf(fwd->dp[i][k*p7G_NSCELLS + p7G_I] + bck->dp[i][k*p7G_NSCELLS + p7G_I] - overall_sc); denom += IMX(i,k);
	  DMX(i,k) = 0.;
	}
      MMX(i,M)     = expf(fwd->dp[i][M*p7G_NSCELLS + p7G_M] + bck->dp[i][M*p7G_NSCELLS + p7G_M] - overall_sc); denom += MMX(i,M);
      IMX(i,M)     = 0.;
      DMX(i,M)     = 0.;
      
      /* order doesn't matter.  note that this whole function is trivially simd parallel */
      XMX(i,p7G_E) = 0.;
      XMX(i,p7G_N) = expf(fwd->xmx[p7G_NXCELLS*(i-1) + p7G_N] + bck->xmx[p7G_NXCELLS*i + p7G_N] + gm->xsc[p7P_N][p7P_LOOP] - overall_sc);
      XMX(i,p7G_J) = expf(fwd->xmx[p7G_NXCELLS*(i-1) + p7G_J] + bck->xmx[p7G_NXCELLS*i + p7G_J] + gm->xsc[p7P_J][p7P_LOOP] - overall_sc);
      XMX(i,p7G_B) = 0.;
      XMX(i,p7G_C) = expf(fwd->xmx[p7G_NXCELLS*(i-1) + p7G_C] + bck->xmx[p7G_NXCELLS*i + p7G_C] + gm->xsc[p7P_C][p7P_LOOP] - overall_sc);
      denom += XMX(i,p7G_N) + XMX(i,p7G_J) + XMX(i,p7G_C);
      
      denom = 1.0 / denom;
      for (k = 1; k < M; k++) {  MMX(i,k) *= denom; IMX(i,k) *= denom; }
      MMX(i,M)     *= denom;
      XMX(i,p7G_N) *= denom;
      XMX(i,p7G_J) *= denom;
      XMX(i,p7G_C) *= denom;
    }
  return eslOK;
}
コード例 #3
0
ファイル: p7_omx.c プロジェクト: dboudour2002/musicHMMER
/* Function:  p7_omx_FDeconvert()
 * Synopsis:  Convert an optimized DP matrix to generic one.
 * Incept:    SRE, Tue Aug 19 17:58:13 2008 [Janelia]
 *
 * Purpose:   Convert the 32-bit float values in optimized DP matrix
 *            <ox> to a generic one <gx>. Caller provides <gx> with sufficient
 *            space to hold the <ox->M> by <ox->L> matrix.
 *            
 *            This function is used to gain access to the
 *            somewhat more powerful debugging and display
 *            tools available for generic DP matrices.
 */
int
p7_omx_FDeconvert(P7_OMX *ox, P7_GMX *gx)
{
  int Q = p7O_NQF(ox->M);
  int i, q, r, k;
  union { __m128 v; float p[4]; } u;
  float      **dp   = gx->dp;
  float       *xmx  = gx->xmx; 			    

  for (i = 0; i <= ox->L; i++)
    {
      MMX(i,0) = DMX(i,0) = IMX(i,0) = -eslINFINITY;
      for (q = 0; q < Q; q++)
	{
	  u.v = MMO(ox->dpf[i],q);  for (r = 0; r < 4; r++) { k = (Q*r)+q+1; if (k <= ox->M) MMX(i, (Q*r)+q+1) = u.p[r]; }
	  u.v = DMO(ox->dpf[i],q);  for (r = 0; r < 4; r++) { k = (Q*r)+q+1; if (k <= ox->M) DMX(i, (Q*r)+q+1) = u.p[r]; }
	  u.v = IMO(ox->dpf[i],q);  for (r = 0; r < 4; r++) { k = (Q*r)+q+1; if (k <= ox->M) IMX(i, (Q*r)+q+1) = u.p[r]; }
	}
      XMX(i,p7G_E) = ox->xmx[i*p7X_NXCELLS+p7X_E];
      XMX(i,p7G_N) = ox->xmx[i*p7X_NXCELLS+p7X_N];
      XMX(i,p7G_J) = ox->xmx[i*p7X_NXCELLS+p7X_J];
      XMX(i,p7G_B) = ox->xmx[i*p7X_NXCELLS+p7X_B];
      XMX(i,p7G_C) = ox->xmx[i*p7X_NXCELLS+p7X_C];
    }
  gx->L = ox->L;
  gx->M = ox->M;
  return eslOK;
}
コード例 #4
0
ファイル: generic_optacc.c プロジェクト: ElofssonLab/TOPCONS2
static inline int
select_i(const P7_PROFILE *gm, const P7_GMX *gx, int i, int k)
{
  float      **dp   = gx->dp;	/* so {MDI}MX() macros work       */
  float const *tsc  = gm->tsc;	/* so TSCDELTA() macro works */
  float        path[2];

  path[0] = TSCDELTA(p7P_MI, k) * MMX(i-1,k);
  path[1] = TSCDELTA(p7P_II, k) * IMX(i-1,k);
  return ((path[0] >= path[1]) ? p7T_M : p7T_I);
}
コード例 #5
0
ファイル: generic_optacc.c プロジェクト: ElofssonLab/TOPCONS2
static inline int
select_m(const P7_PROFILE *gm, const P7_GMX *gx, int i, int k)
{
  float      **dp   = gx->dp;	/* so {MDI}MX() macros work       */
  float       *xmx  = gx->xmx;	/* so XMX() macro works           */
  float const *tsc  = gm->tsc;	/* so TSCDELTA() macro works */
  float path[4];
  int   state[4] = { p7T_M, p7T_I, p7T_D, p7T_B };

  path[0] = TSCDELTA(p7P_MM, k-1) * MMX(i-1,k-1);
  path[1] = TSCDELTA(p7P_IM, k-1) * IMX(i-1,k-1);
  path[2] = TSCDELTA(p7P_DM, k-1) * DMX(i-1,k-1);
  path[3] = TSCDELTA(p7P_BM, k-1) * XMX(i-1,p7G_B);
  return state[esl_vec_FArgMax(path, 4)];
}
コード例 #6
0
ファイル: generic_optacc.c プロジェクト: ElofssonLab/TOPCONS2
static inline float
get_postprob(const P7_GMX *pp, int scur, int sprv, int k, int i)
{
  float **dp  = pp->dp;
  float  *xmx = pp->xmx;

  switch (scur) {
  case p7T_M: return MMX(i,k);
  case p7T_I: return IMX(i,k);
  case p7T_N: if (sprv == scur) return XMX(i,p7G_N);
  case p7T_C: if (sprv == scur) return XMX(i,p7G_C); 
  case p7T_J: if (sprv == scur) return XMX(i,p7G_J); 
  default:    return 0.0;
  }
}
コード例 #7
0
ファイル: generic_viterbi.c プロジェクト: TuftsBCB/SMURFBuild
/* Function:  p7_GViterbi()
 * Synopsis:  The Viterbi algorithm.
 * Incept:    SRE, Tue Jan 30 10:50:53 2007 [Einstein's, St. Louis]
 * 
 * Purpose:   The standard Viterbi dynamic programming algorithm. 
 *
 *            Given a digital sequence <dsq> of length <L>, a profile
 *            <gm>, and DP matrix <gx> allocated for at least <L>
 *            by <gm->M> cells; calculate the maximum scoring path by
 *            Viterbi; return the Viterbi score in <ret_sc>, and the
 *            Viterbi matrix is in <gx>.
 *            
 *            The caller may then retrieve the Viterbi path by calling
 *            <p7_GTrace()>.
 *           
 *            The Viterbi lod score is returned in nats. The caller
 *            needs to subtract a null model lod score, then convert
 *            to bits.
 *           
 * Args:      dsq    - sequence in digitized form, 1..L
 *            L      - length of dsq
 *            gm     - profile. 
 *            gx     - DP matrix with room for an MxL alignment
 *            opt_sc - optRETURN: Viterbi lod score in nats
 *           
 * Return:   <eslOK> on success.
 */
int
p7_GViterbi(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float *opt_sc)
{
  float const *tsc  = gm->tsc;
  float      **dp   = gx->dp;
  float       *xmx  = gx->xmx;
  int          M    = gm->M;
  int          i,k;
  float        esc  = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY;

  /* Initialization of the zero row.  */
  XMX(0,p7G_N) = 0;                                           /* S->N, p=1            */
  XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE];                    /* S->N->B, no N-tail   */
  XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY;  /* need seq to get here */
  for (k = 0; k <= gm->M; k++)
    MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY;            /* need seq to get here */

  /* DP recursion */
  for (i = 1; i <= L; i++) 
    {
      float const *rsc = gm->rsc[dsq[i]];
      float sc;

      MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
      XMX(i,p7G_E) = -eslINFINITY;
    
      for (k = 1; k < gm->M; k++) 
	{
  	  /* match state */
	  sc       = ESL_MAX(    MMX(i-1,k-1)   + TSC(p7P_MM,k-1), 
				 IMX(i-1,k-1)   + TSC(p7P_IM,k-1));
	  sc       = ESL_MAX(sc, DMX(i-1,k-1)   + TSC(p7P_DM,k-1));
	  sc       = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,k-1));
	  MMX(i,k) = sc + MSC(k);

	  /* E state update */
	  XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), MMX(i,k) + esc);
	  /* in Viterbi alignments, Dk->E can't win in local mode (and
	   * isn't possible in glocal mode), so don't bother
	   * looking. */

	  /* insert state */
	  sc = ESL_MAX(MMX(i-1,k) + TSC(p7P_MI,k),
		       IMX(i-1,k) + TSC(p7P_II,k));
	  IMX(i,k) = sc + ISC(k);
	  
	  /* delete state */
	  DMX(i,k) =  ESL_MAX(MMX(i,k-1) + TSC(p7P_MD,k-1),
			      DMX(i,k-1) + TSC(p7P_DD,k-1));
	}

      /* Unrolled match state M. */
      sc       = ESL_MAX(    MMX(i-1,M-1)   + TSC(p7P_MM,M-1),
			     IMX(i-1,M-1)   + TSC(p7P_IM,M-1));
      sc       = ESL_MAX(sc, DMX(i-1,M-1 )  + TSC(p7P_DM,M-1));
      sc       = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,M-1));
      MMX(i,M) = sc + MSC(M);
      
      /* Unrolled delete state D_M 
       * (Unlike internal Dk->E transitions that can never appear in 
       * Viterbi alignments, D_M->E is possible in glocal mode.)
       */
      DMX(i,M) = ESL_MAX(MMX(i,M-1) + TSC(p7P_MD,M-1),
			 DMX(i,M-1) + TSC(p7P_DD,M-1));

      /* E state update; transition from M_M scores 0 by def'n */
      sc  =          ESL_MAX(XMX(i,p7G_E), MMX(i,M));
      XMX(i,p7G_E) = ESL_MAX(sc,           DMX(i,M));
   
      /* Now the special states. E must already be done, and B must follow N,J.
       * remember, N, C and J emissions are zero score by definition.
       */
      /* J state */
      sc           =             XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP];   /* J->J */
      XMX(i,p7G_J) = ESL_MAX(sc, XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_LOOP]);  /* E->J is E's "loop" */
      
      /* C state */
      sc           =             XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP];
      XMX(i,p7G_C) = ESL_MAX(sc, XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_MOVE]);
      
      /* N state */
      XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP];
      
      /* B state */
      sc           =             XMX(i,p7G_N) + gm->xsc[p7P_N][p7P_MOVE];   /* N->B is N's move */
      XMX(i,p7G_B) = ESL_MAX(sc, XMX(i,p7G_J) + gm->xsc[p7P_J][p7P_MOVE]);  /* J->B is J's move */
    }
  
  /* T state (not stored) */
  if (opt_sc != NULL) *opt_sc = XMX(L,p7G_C) + gm->xsc[p7P_C][p7P_MOVE];
  gx->M = gm->M;
  gx->L = L;
  return eslOK;
}
コード例 #8
0
ファイル: generic_optacc.c プロジェクト: ElofssonLab/TOPCONS2
/* Function:  p7_GOptimalAccuracy()
 * Synopsis:  Optimal accuracy decoding: fill. 
 * Incept:    SRE, Fri Feb 29 11:56:49 2008 [Janelia]
 *
 * Purpose:   Calculates the fill step of the optimal accuracy decoding
 *            algorithm \citep{Kall05}.
 *            
 *            Caller provides the posterior decoding matrix <pp>,
 *            which was calculated by Forward/Backward on a target sequence
 *            of length <L> using the query model <gm>.
 *            
 *            Caller also provides a DP matrix <gx>, allocated for the
 *            <gm->M> by <pp->L> comparison. The routine fills this in
 *            with OA scores.
 *            
 * Args:      gm    - query profile      
 *            pp    - posterior decoding matrix created by <p7_GPosteriorDecoding()>
 *            gx    - RESULT: caller provided DP matrix for <gm->M> by <L> 
 *            ret_e - RETURN: expected number of correctly decoded positions 
 *
 * Returns:   <eslOK> on success, and <*ret_e> contains the final OA
 *            score, which is the expected number of correctly decoded
 *            positions in the target sequence (up to <L>).
 *
 * Throws:    (no abnormal error conditions)
 */
int
p7_GOptimalAccuracy(const P7_PROFILE *gm, const P7_GMX *pp, P7_GMX *gx, float *ret_e)
{
  int          L    = pp->L;
  float      **dp   = gx->dp;
  float       *xmx  = gx->xmx;
  float const *tsc  = gm->tsc;
  int          i,k;
  int          M    = gm->M;
  float        esc  = p7_profile_IsLocal(gm) ? 1.0 : 0.0;
  float        t1, t2;

  /* Initialization of the zero row (i=0; no residues to account for.  */
  XMX(0,p7G_N) = 0.;                                          /* S->N, p=1            */
  XMX(0,p7G_B) = 0.;                                          /* S->N->B, no N-tail   */
  XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY;  /* need seq to get here */
  for (k = 0; k <= M; k++)
    MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY;            /* need seq to get here */

  for (i = 1; i <= L; i++)
    {
      MMX(i,0) = IMX(i,0) = DMX(i,0) = XMX(i,p7G_E) = -eslINFINITY;

      for (k = 1; k < M; k++)
	{
	  MMX(i,k)     = ESL_MAX(ESL_MAX(TSCDELTA(p7P_MM, k-1) * (MMX(i-1,k-1)  + pp->dp[i][k*p7G_NSCELLS + p7G_M]),
					 TSCDELTA(p7P_IM, k-1) * (IMX(i-1,k-1)  + pp->dp[i][k*p7G_NSCELLS + p7G_M])),
				 ESL_MAX(TSCDELTA(p7P_DM, k-1) * (DMX(i-1,k-1)  + pp->dp[i][k*p7G_NSCELLS + p7G_M]),
					 TSCDELTA(p7P_BM, k-1) * (XMX(i-1,p7G_B)+ pp->dp[i][k*p7G_NSCELLS + p7G_M])));

	  XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), 
				 esc * MMX(i,k));

	  IMX(i,k)     = ESL_MAX(TSCDELTA(p7P_MI, k) * (MMX(i-1,k) + pp->dp[i][k*p7G_NSCELLS + p7G_I]),
				 TSCDELTA(p7P_II, k) * (IMX(i-1,k) + pp->dp[i][k*p7G_NSCELLS + p7G_I]));

	  DMX(i,k)     = ESL_MAX(TSCDELTA(p7P_MD, k-1) * MMX(i,k-1),
				 TSCDELTA(p7P_DD, k-1) * DMX(i,k-1));
	} 

      /* last node (k=M) is unrolled; it has no I state, and it has a p=1.0 {MD}->E transition even in local mode */
      MMX(i,M)     = ESL_MAX(ESL_MAX(TSCDELTA(p7P_MM, M-1) * (MMX(i-1,M-1)  + pp->dp[i][M*p7G_NSCELLS + p7G_M]),
				     TSCDELTA(p7P_IM, M-1) * (IMX(i-1,M-1)  + pp->dp[i][M*p7G_NSCELLS + p7G_M])),
			     ESL_MAX(TSCDELTA(p7P_DM, M-1) * (DMX(i-1,M-1)  + pp->dp[i][M*p7G_NSCELLS + p7G_M]),
				     TSCDELTA(p7P_BM, M-1) * (XMX(i-1,p7G_B)+ pp->dp[i][M*p7G_NSCELLS + p7G_M])));

      DMX(i,M)     = ESL_MAX(TSCDELTA(p7P_MD, M-1) * MMX(i,M-1),
			     TSCDELTA(p7P_DD, M-1) * DMX(i,M-1));

      /* note: we calculated XMX before DMX in the loop, because we probably had MMX(i,k) in a register. 
       * but now we can't do that, because XMX depends on DMX
       */
      XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), ESL_MAX(MMX(i,M), DMX(i, M)));

      /* now the special states; it's important that E is already done, and B is done after N,J */
      t1 = ( (gm->xsc[p7P_J][p7P_LOOP] == -eslINFINITY) ? FLT_MIN : 1.0);
      t2 = ( (gm->xsc[p7P_E][p7P_LOOP] == -eslINFINITY) ? FLT_MIN : 1.0);
      XMX(i, p7G_J) = ESL_MAX( t1 * (XMX(i-1,p7G_J) + pp->xmx[i*p7G_NXCELLS + p7G_J]),
			       t2 * XMX(i,  p7G_E));

      t1 = ( (gm->xsc[p7P_C][p7P_LOOP] == -eslINFINITY) ? FLT_MIN : 1.0);
      t2 = ( (gm->xsc[p7P_E][p7P_MOVE] == -eslINFINITY) ? FLT_MIN : 1.0);
      XMX(i,p7G_C) = ESL_MAX( t1 * (XMX(i-1,p7G_C) + pp->xmx[i*p7G_NXCELLS + p7G_C]),
			      t2 * XMX(i,  p7G_E));
      
      t1 = ( (gm->xsc[p7P_N][p7P_LOOP] == -eslINFINITY) ? FLT_MIN : 1.0);
      XMX(i,p7G_N) = t1 *  (XMX(i-1,p7G_N) + pp->xmx[i*p7G_NXCELLS + p7G_N]);

      t1 = ( (gm->xsc[p7P_N][p7P_MOVE] == -eslINFINITY) ? FLT_MIN : 1.0);
      t2 = ( (gm->xsc[p7P_J][p7P_MOVE] == -eslINFINITY) ? FLT_MIN : 1.0);
      XMX(i,p7G_B) = ESL_MAX( t1 * XMX(i,  p7G_N), 
			      t2 * XMX(i,  p7G_J));
    }
  
  *ret_e = XMX(L,p7G_C);
  return eslOK;
}
コード例 #9
0
/* Function:  p7_GStochasticTrace()
 * Synopsis:  Stochastic traceback of a Forward matrix.
 * Incept:    SRE, Thu Jan  3 15:39:20 2008 [Janelia]
 *
 * Purpose:   Stochastic traceback of Forward matrix <gx> to
 *            sample an alignment of digital sequence <dsq>
 *            (of length <L>) to the profile <gm>. 
 *            
 *            The sampled traceback is returned in <tr>, which the
 *            caller must have at least made an initial allocation of
 *            (the <tr> will be grown as needed here).
 *
 * Args:      r      - source of random numbers
 *            dsq    - digital sequence aligned to, 1..L 
 *            L      - length of dsq
 *            gm     - profile
 *            mx     - Forward matrix to trace, L x M
 *            tr     - storage for the recovered traceback.
 *
 * Returns:   <eslOK> on success.
 */
int
p7_GStochasticTrace(ESL_RANDOMNESS *r, const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, const P7_GMX *gx, P7_TRACE *tr)
{
  int     status;
  int     i;			/* position in seq (1..L) */
  int     k;			/* position in model (1..M) */
  int     M   = gm->M;
  float **dp  = gx->dp;
  float  *xmx = gx->xmx;
  float const *tsc  = gm->tsc;
  float  *sc;			/* scores of possible choices: up to 2M-1, in the case of exits to E  */
  int     scur, sprv;

  /* we'll index M states as 1..M, and D states as 2..M = M+2..2M: M0, D1 are impossibles. */
  ESL_ALLOC(sc, sizeof(float) * (2*M+1)); 

  k = 0;
  i = L;			
  if ((status = p7_trace_Append(tr, p7T_T, k, i)) != eslOK) goto ERROR;
  if ((status = p7_trace_Append(tr, p7T_C, k, i)) != eslOK) goto ERROR;
  sprv = p7T_C;
  while (sprv != p7T_S) 
    {
      switch (tr->st[tr->N-1]) {
      /* C(i) comes from C(i-1) or E(i) */
      case p7T_C:		
	if   (XMX(i,p7G_C) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible C reached at i=%d", i);

	sc[0] = XMX(i-1, p7G_C) + gm->xsc[p7P_C][p7P_LOOP];
	sc[1] = XMX(i,   p7G_E) + gm->xsc[p7P_E][p7P_MOVE];
	esl_vec_FLogNorm(sc, 2); 
	scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_C : p7T_E;
	break;

      /* E connects from any M or D state. k set here */
      case p7T_E:	
	if (XMX(i, p7G_E) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible E reached at i=%d", i);
	
	if (p7_profile_IsLocal(gm)) { /* local models come from any M, D */
	  sc[0] = sc[M+1] = -eslINFINITY;
	  for (k = 1; k <= M; k++) sc[k]   = MMX(i,k);
	  for (k = 2; k <= M; k++) sc[k+M] = DMX(i,k);
	  esl_vec_FLogNorm(sc, 2*M+1); /* now sc is a prob vector */
	  k = esl_rnd_FChoose(r, sc, 2*M+1);
	  if (k <= M)    scur = p7T_M;
	  else { k -= M; scur = p7T_D; }
	} else { 		/* glocal models come from M_M or D_M  */
	  k     = M;
	  sc[0] = MMX(i,M);
	  sc[1] = DMX(i,M);
	  esl_vec_FLogNorm(sc, 2); /* now sc is a prob vector */
	  scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_M : p7T_D;
	}
	break;

      /* M connects from {MDI} i-1,k-1, or B */
      case p7T_M:
	if (MMX(i,k) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible M reached at k=%d,i=%d", k,i);
	
	sc[0] = XMX(i-1,p7G_B) + TSC(p7P_BM, k-1);
	sc[1] = MMX(i-1,k-1)   + TSC(p7P_MM, k-1);
	sc[2] = IMX(i-1,k-1)   + TSC(p7P_IM, k-1);
	sc[3] = DMX(i-1,k-1)   + TSC(p7P_DM, k-1);
	esl_vec_FLogNorm(sc, 4); 
	switch (esl_rnd_FChoose(r, sc, 4)) {
	case 0: scur = p7T_B;   break;
	case 1: scur = p7T_M;   break;
	case 2: scur = p7T_I;   break;
	case 3: scur = p7T_D;   break;
	}
	k--; 
	i--;
	break;

      /* D connects from M,D at i,k-1 */
      case p7T_D:
	if (DMX(i, k) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible D reached at k=%d,i=%d", k,i);

	sc[0] = MMX(i, k-1) + TSC(p7P_MD, k-1);
	sc[1] = DMX(i, k-1) + TSC(p7P_DD, k-1);
	esl_vec_FLogNorm(sc, 2); 
	scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_M : p7T_D;
	k--;
	break;

      /* I connects from M,I at i-1,k */
      case p7T_I:
	if (IMX(i,k) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible I reached at k=%d,i=%d", k,i);
	
	sc[0] = MMX(i-1,k) + TSC(p7P_MI, k);
	sc[1] = IMX(i-1,k) + TSC(p7P_II, k);
	esl_vec_FLogNorm(sc, 2); 
	scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_M : p7T_I;
	i--;
	break;

      /* N connects from S, N */
      case p7T_N:
	if (XMX(i, p7G_N) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible N reached at i=%d", i);
	scur = (i == 0) ? p7T_S : p7T_N;
	break;

      /* B connects from N, J */
      case p7T_B:			
	if (XMX(i,p7G_B) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible B reached at i=%d", i);

	sc[0] = XMX(i, p7G_N) + gm->xsc[p7P_N][p7P_MOVE];
	sc[1] = XMX(i, p7G_J) + gm->xsc[p7P_J][p7P_MOVE];
	esl_vec_FLogNorm(sc, 2); 
	scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_N : p7T_J;
	break;

      /* J connects from E(i) or J(i-1) */
      case p7T_J:	
	if (XMX(i,p7G_J) == -eslINFINITY) ESL_XEXCEPTION(eslFAIL, "impossible J reached at i=%d", i);
	
	sc[0] = XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP];
	sc[1] = XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_LOOP];
	esl_vec_FLogNorm(sc, 2); 
	scur = (esl_rnd_FChoose(r, sc, 2) == 0) ? p7T_J : p7T_E;
	break;

      default: ESL_XEXCEPTION(eslFAIL, "bogus state in traceback");
      } /* end switch over statetype[tpos-1] */

      /* Append this state and the current i,k to be explained to the growing trace */
      if ((status = p7_trace_Append(tr, scur, k, i)) != eslOK) goto ERROR;

      /* For NCJ, we had to defer i decrement. */
      if ( (scur == p7T_N || scur == p7T_J || scur == p7T_C) && scur == sprv) i--;

      sprv = scur;
    } /* end traceback, at S state */

  if ((status = p7_trace_Reverse(tr)) != eslOK) goto ERROR;
  tr->M = gm->M;
  tr->L = L;
  free(sc);
  return eslOK;

 ERROR:
  if (sc != NULL) free(sc);
  return status;
}
コード例 #10
0
/* Function:  p7_GViterbi_longtarget()
 * Synopsis:  The Viterbi algorithm.
 * Incept:    SRE, Tue Jan 30 10:50:53 2007 [Einstein's, St. Louis]
 *
 * Purpose:   Given a digital sequence <dsq> of length <L>, a profile
 *            <gm>, and DP matrix <gx> allocated for at least <L>
 *            by <gm->M> cells; calculates the Viterbi score for
 *            regions of <dsq>, and captures the positions at which
 *            such regions exceed the score required to be
 *            significant in the eyes of the calling function
 *            (usually p=0.001).
 *
 * Args:      dsq    - sequence in digitized form, 1..L
 *            L      - length of dsq
 *            gm     - profile.
 *            gx     - DP matrix with room for an MxL alignment
 *            filtersc   - null or bias correction, required for translating a P-value threshold into a score threshold
 *            P       - p-value below which a region is captured as being above threshold
 *            windowlist - RETURN: array of hit windows (start and end of diagonal) for the above-threshold areas
 *
 * Return:   <eslOK> on success.
 */
int
p7_GViterbi_longtarget(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx,
                       float filtersc, double P, P7_HMM_WINDOWLIST *windowlist)
{
  float const *tsc  = gm->tsc;
  float      **dp   = gx->dp;
  float       *xmx  = gx->xmx;
  int          M    = gm->M;
  int          i,k;
  float        esc  = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY;

  int16_t sc_thresh;
  float invP;

  /* Initialization of the zero row.  */
  XMX(0,p7G_N) = 0;                                           /* S->N, p=1            */
  XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE];                    /* S->N->B, no N-tail   */
  XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY;  /* need seq to get here */
  for (k = 0; k <= gm->M; k++)
    MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY;            /* need seq to get here */


  /*
   *  In p7_ViterbiFilter, converting from a scaled int Viterbi score
   *  S (aka xE the score getting to state E) to a probability
   *  goes like this:
   *    S =  XMX(i,p7G_E)
   *    vsc =  S + gm->xsc[p7P_E][p7P_MOVE] +  gm->xsc[p7P_C][p7P_MOVE];
   *    P  = esl_gumbel_surv((vfsc - filtersc) / eslCONST_LOG2  ,  gm->evparam[p7_VMU],  gm->evparam[p7_VLAMBDA]);
   *  and we're computing the threshold vsc, so invert it:
   *    (vsc - filtersc) /  eslCONST_LOG2 = esl_gumbel_invsurv( P, gm->evparam[p7_VMU],  gm->evparam[p7_VLAMBDA])
   *    vsc = filtersc + eslCONST_LOG2 * esl_gumbel_invsurv( P, gm->evparam[p7_VMU],  gm->evparam[p7_VLAMBDA])
   *    S = vsc - gm->xsc[p7P_E][p7P_MOVE] -  gm->xsc[p7P_C][p7P_MOVE]
   */
    invP = esl_gumbel_invsurv(P, gm->evparam[p7_VMU],  gm->evparam[p7_VLAMBDA]);
    sc_thresh =   (int) ceil (filtersc + (eslCONST_LOG2 * invP)
                  - gm->xsc[p7P_E][p7P_MOVE] -  gm->xsc[p7P_C][p7P_MOVE] );


  /* DP recursion */
  for (i = 1; i <= L; i++)
  {
      float const *rsc = gm->rsc[dsq[i]];
      float sc;

      MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
      XMX(i,p7G_E) = -eslINFINITY;

      for (k = 1; k < gm->M; k++)
      {
          /* match state */
        sc       = ESL_MAX(    MMX(i-1,k-1)   + TSC(p7P_MM,k-1),
             IMX(i-1,k-1)   + TSC(p7P_IM,k-1));
        sc       = ESL_MAX(sc, DMX(i-1,k-1)   + TSC(p7P_DM,k-1));
        sc       = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,k-1));
        MMX(i,k) = sc + MSC(k);

        /* E state update */
        XMX(i,p7G_E) = ESL_MAX(XMX(i,p7G_E), MMX(i,k) + esc);
        /* in Viterbi alignments, Dk->E can't win in local mode (and
         * isn't possible in glocal mode), so don't bother
         * looking. */

        /* insert state */
        sc = ESL_MAX(MMX(i-1,k) + TSC(p7P_MI,k),
               IMX(i-1,k) + TSC(p7P_II,k));
        IMX(i,k) = sc + ISC(k);

        /* delete state */
        DMX(i,k) =  ESL_MAX(MMX(i,k-1) + TSC(p7P_MD,k-1),
                DMX(i,k-1) + TSC(p7P_DD,k-1));
      }

      /* Unrolled match state M. */
      sc       = ESL_MAX(    MMX(i-1,M-1)   + TSC(p7P_MM,M-1),
           IMX(i-1,M-1)   + TSC(p7P_IM,M-1));
      sc       = ESL_MAX(sc, DMX(i-1,M-1 )  + TSC(p7P_DM,M-1));
      sc       = ESL_MAX(sc, XMX(i-1,p7G_B) + TSC(p7P_BM,M-1));
      MMX(i,M) = sc + MSC(M);

      /* Unrolled delete state D_M
       * (Unlike internal Dk->E transitions that can never appear in
       * Viterbi alignments, D_M->E is possible in glocal mode.)
       */
      DMX(i,M) = ESL_MAX(MMX(i,M-1) + TSC(p7P_MD,M-1),
       DMX(i,M-1) + TSC(p7P_DD,M-1));

      /* E state update; transition from M_M scores 0 by def'n */
      sc  =          ESL_MAX(XMX(i,p7G_E), MMX(i,M));
      XMX(i,p7G_E) = ESL_MAX(sc,           DMX(i,M));


      if (XMX(i,p7G_E) >= sc_thresh) {
        //hit score threshold. Add a window to the list, then reset scores.

        for (k = 1; k <= gm->M; k++) {
          if (MMX(i,k) == XMX(i,p7G_E)) {
            p7_hmmwindow_new(windowlist, 0, i, 0, k, 1, 0.0, p7_NOCOMPLEMENT );
          }
          MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
        }
      } else {

        /* Now the special states. E must already be done, and B must follow N,J.
         * remember, N, C and J emissions are zero score by definition.
         */
        /* J state */
        sc           =             XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP];   /* J->J */
        XMX(i,p7G_J) = ESL_MAX(sc, XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_LOOP]);  /* E->J is E's "loop" */

        /* C state */
        sc           =             XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP];
        XMX(i,p7G_C) = ESL_MAX(sc, XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_MOVE]);

        /* N state */
        XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP];

        /* B state */
        sc           =             XMX(i,p7G_N) + gm->xsc[p7P_N][p7P_MOVE];   /* N->B is N's move */
        XMX(i,p7G_B) = ESL_MAX(sc, XMX(i,p7G_J) + gm->xsc[p7P_J][p7P_MOVE]);  /* J->B is J's move */

      }
   }

  /* T state (not stored) */
  gx->M = gm->M;
  gx->L = L;
  return eslOK;
}
コード例 #11
0
ファイル: generic_vtrace.c プロジェクト: TuftsBCB/SMURFBuild
/* Function: p7_GTrace()
 * Incept:   SRE, Thu Feb  1 10:25:56 2007 [UA 8018 St. Louis to Dulles]
 * 
 * Purpose:  Traceback of a Viterbi matrix: retrieval 
 *           of optimum alignment.
 *           
 *           This function is currently implemented as a
 *           reconstruction traceback, rather than using a shadow
 *           matrix. Because H3 uses floating point scores, and we
 *           can't compare floats for equality, we have to compare
 *           floats for near-equality and therefore, formally, we can
 *           only guarantee a near-optimal traceback. However, even in
 *           the unlikely event that a suboptimal is returned, the
 *           score difference from true optimal will be negligible.
 *           
 * Args:     dsq    - digital sequence aligned to, 1..L 
 *           L      - length of <dsq>
 *           gm     - profile
 *           mx     - Viterbi matrix to trace, L x M
 *           tr     - storage for the recovered traceback.
 *           
 * Return:   <eslOK> on success.
 *           <eslFAIL> if even the optimal path has zero probability;
 *           in this case, the trace is set blank (<tr->N = 0>).
 *
 * Note:     Care is taken to evaluate the prev+tsc+emission
 *           calculations in exactly the same order that Viterbi did
 *           them, lest you get numerical problems with
 *           a+b+c = d; d-c != a+b because d,c are nearly equal.
 *           (This bug appeared in dev: xref J1/121.)
 */
int
p7_GTrace(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, const P7_GMX *gx, P7_TRACE *tr)
{
  int          i   = L;		/* position in seq (1..L)         */
  int          k   = 0;		/* position in model (1..M)       */
  int          M   = gm->M;
  float      **dp  = gx->dp;	/* so {MDI}MX() macros work       */
  float       *xmx = gx->xmx;	/* so XMX() macro works           */
  float        tol = 1e-5;	/* floating point "equality" test */
  float const *tsc = gm->tsc;
  int     sprv, scur;		/* previous, current state in trace */
  int     status;

#ifdef p7_DEBUGGING
  if (tr->N != 0) ESL_EXCEPTION(eslEINVAL, "trace isn't empty: forgot to Reuse()?");
#endif

  if ((status = p7_trace_Append(tr, p7T_T, k, i)) != eslOK) return status;
  if ((status = p7_trace_Append(tr, p7T_C, k, i)) != eslOK) return status;
  sprv = p7T_C;
  while (sprv != p7T_S) {
    float const *rsc = (i>0 ? gm->rsc[dsq[i]] : NULL);

    switch (sprv) {
    case p7T_C:		/* C(i) comes from C(i-1) or E(i) */
      if   (XMX(i,p7G_C) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible C reached at i=%d", i);

      if      (esl_FCompare(XMX(i, p7G_C), XMX(i-1, p7G_C) + gm->xsc[p7P_C][p7P_LOOP], tol) == eslOK)  scur = p7T_C; 
      else if (esl_FCompare(XMX(i, p7G_C), XMX(i,   p7G_E) + gm->xsc[p7P_E][p7P_MOVE], tol) == eslOK)  scur = p7T_E; 
      else ESL_EXCEPTION(eslFAIL, "C at i=%d couldn't be traced", i);
      break;

    case p7T_E:		/* E connects from any M state. k set here */
      if (XMX(i, p7G_E) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible E reached at i=%d", i);

      if (p7_profile_IsLocal(gm))
	{
	  scur = p7T_M;		/* can't come from D, in a *local* Viterbi trace. */
	  for (k = M; k >= 1; k--) if (esl_FCompare(XMX(i, p7G_E), MMX(i,k), tol) == eslOK) break;
	  if (k == 0) ESL_EXCEPTION(eslFAIL, "E at i=%d couldn't be traced", i);
	}
      else 			/* glocal mode: we either come from D_M or M_M */
	{
	  if      (esl_FCompare(XMX(i, p7G_E), MMX(i,M), tol) == eslOK) { scur = p7T_M; k = M; }
	  else if (esl_FCompare(XMX(i, p7G_E), DMX(i,M), tol) == eslOK) { scur = p7T_D; k = M; }
	  else    ESL_EXCEPTION(eslFAIL, "E at i=%d couldn't be traced", i);
	}
      break;      

    case p7T_M:			/* M connects from i-1,k-1, or B */
      if (MMX(i,k) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible M reached at k=%d,i=%d", k,i);

      if      (esl_FCompare(MMX(i,k), XMX(i-1,p7G_B) + TSC(p7P_BM, k-1) + MSC(k), tol) == eslOK) scur = p7T_B;
      else if (esl_FCompare(MMX(i,k), MMX(i-1,k-1)   + TSC(p7P_MM, k-1) + MSC(k), tol) == eslOK) scur = p7T_M;
      else if (esl_FCompare(MMX(i,k), IMX(i-1,k-1)   + TSC(p7P_IM, k-1) + MSC(k), tol) == eslOK) scur = p7T_I;
      else if (esl_FCompare(MMX(i,k), DMX(i-1,k-1)   + TSC(p7P_DM, k-1) + MSC(k), tol) == eslOK) scur = p7T_D;
      else ESL_EXCEPTION(eslFAIL, "M at k=%d,i=%d couldn't be traced", k,i);
      k--; i--;
      break;

    case p7T_D:			/* D connects from M,D at i,k-1 */
      if (DMX(i, k) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible D reached at k=%d,i=%d", k,i);

      if      (esl_FCompare(DMX(i,k), MMX(i, k-1) + TSC(p7P_MD, k-1), tol) == eslOK) scur = p7T_M;
      else if (esl_FCompare(DMX(i,k), DMX(i, k-1) + TSC(p7P_DD, k-1), tol) == eslOK) scur = p7T_D;
      else ESL_EXCEPTION(eslFAIL, "D at k=%d,i=%d couldn't be traced", k,i);
      k--;
      break;

    case p7T_I:			/* I connects from M,I at i-1,k*/
      if (IMX(i,k) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible I reached at k=%d,i=%d", k,i);

      if      (esl_FCompare(IMX(i,k), MMX(i-1,k) + TSC(p7P_MI, k) + ISC(k), tol) == eslOK) scur = p7T_M;
      else if (esl_FCompare(IMX(i,k), IMX(i-1,k) + TSC(p7P_II, k) + ISC(k), tol) == eslOK) scur = p7T_I;
      else ESL_EXCEPTION(eslFAIL, "I at k=%d,i=%d couldn't be traced", k,i);
      i--;
      break;

    case p7T_N:			/* N connects from S, N */
      if (XMX(i, p7G_N) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible N reached at i=%d", i);
      scur = ( (i == 0) ? p7T_S : p7T_N);
      break;

    case p7T_B:			/* B connects from N, J */
      if (XMX(i,p7G_B) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible B reached at i=%d", i);

      if      (esl_FCompare(XMX(i,p7G_B), XMX(i, p7G_N) + gm->xsc[p7P_N][p7P_MOVE], tol) == eslOK) scur = p7T_N;
      else if (esl_FCompare(XMX(i,p7G_B), XMX(i, p7G_J) + gm->xsc[p7P_J][p7P_MOVE], tol) == eslOK) scur = p7T_J;
      else  ESL_EXCEPTION(eslFAIL, "B at i=%d couldn't be traced", i);
      break;

    case p7T_J:			/* J connects from E(i) or J(i-1) */
      if (XMX(i,p7G_J) == -eslINFINITY) ESL_EXCEPTION(eslFAIL, "impossible J reached at i=%d", i);

      if      (esl_FCompare(XMX(i,p7G_J), XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP], tol) == eslOK) scur = p7T_J; 
      else if (esl_FCompare(XMX(i,p7G_J), XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_LOOP], tol) == eslOK) scur = p7T_E; 
      else  ESL_EXCEPTION(eslFAIL, "J at i=%d couldn't be traced", i);
      break;

    default: ESL_EXCEPTION(eslFAIL, "bogus state in traceback");
    } /* end switch over statetype[tpos-1] */

    /* Append this state and the current i,k to be explained to the growing trace */
    if ((status = p7_trace_Append(tr, scur, k, i)) != eslOK) return status;

    /* For NCJ, we had to defer i decrement. */
    if ( (scur == p7T_N || scur == p7T_J || scur == p7T_C) && scur == sprv) i--;

    sprv = scur;
  } /* end traceback, at S state */

  tr->M = gm->M;
  tr->L = L;
  return p7_trace_Reverse(tr);
}
コード例 #12
0
ファイル: generic_null2.c プロジェクト: TuftsBCB/SMURFBuild
/* Function:  p7_GNull2_ByExpectation()
 * Synopsis:  Calculate null2 model from posterior probabilities.
 * Incept:    SRE, Thu Feb 28 09:52:28 2008 [Janelia]
 *
 * Purpose:   Calculate the "null2" model for the envelope encompassed
 *            by a posterior probability calculation <pp> for model
 *            <gm>.  Return the null2 odds emission probabilities
 *            $\frac{f'{x}}{f{x}}$ in <null2>, which caller
 *            provides as space for at least <alphabet->Kp> residues.
 *            
 *            The expectation method is applied to envelopes in
 *            simple, well resolved regions (regions containing just a
 *            single envelope, where no stochastic traceback
 *            clustering was required).
 *            
 *            Make sure that the posterior probability matrix <pp> has
 *            been calculated by the caller for only the envelope; thus
 *            its rows are numbered <1..Ld>, for envelope <ienv..jenv>
 *            of length <Ld=jenv-ienv+1>.
 *            
 * Args:      gm    - profile, in any mode, target length model set to <L>
 *            pp    - posterior prob matrix, for <gm> against domain envelope <dsq+i-1> (offset)
 *            null2 - RETURN: null2 odds ratios per residue; <0..Kp-1>; caller allocated space
 *
 * Returns:   <eslOK> on success; <null2> contains the null2 scores. The 0
 *            row of <pp> has been used as temp space, and happens to contain
 *            the expected frequency that each M,I,N,C,J state is used in this
 *            <pp> matrix to generate residues.
 *
 * Throws:    (no abnormal error conditions)
 */
int
p7_GNull2_ByExpectation(const P7_PROFILE *gm, P7_GMX *pp, float *null2)
{
  int      M      = gm->M;
  int      Ld     = pp->L;
  float  **dp     = pp->dp;
  float   *xmx    = pp->xmx;
  float    xfactor;
  int      x;			/* over symbols 0..K-1                       */
  int      i;			/* over offset envelope dsq positions 1..Ld  */
  int      k;			/* over model M states 1..M, I states 1..M-1 */

  /* Calculate expected # of times that each emitting state was used
   * in generating the Ld residues in this domain.
   * The 0 row in <wrk> is used to hold these numbers.
   */
  esl_vec_FCopy(pp->dp[1],            (M+1)*p7G_NSCELLS, pp->dp[0]); 
  esl_vec_FCopy(pp->xmx+p7G_NXCELLS,  p7G_NXCELLS,       pp->xmx);   
  for (i = 2; i <= Ld; i++)
    {
      esl_vec_FAdd(pp->dp[0], pp->dp[i],             (M+1)*p7G_NSCELLS);
      esl_vec_FAdd(pp->xmx,   pp->xmx+i*p7G_NXCELLS, p7G_NXCELLS); 
    }
  
  /* Convert those expected #'s to log frequencies; these we'll use as
   * the log posterior weights.
   */
  esl_vec_FLog(pp->dp[0], (M+1)*p7G_NSCELLS);
  esl_vec_FLog(pp->xmx,   p7G_NXCELLS);  

  esl_vec_FIncrement(pp->dp[0], (M+1)*p7G_NSCELLS, -log((float)Ld));
  esl_vec_FIncrement(pp->xmx,   p7G_NXCELLS,       -log((float)Ld)); 

  /* Calculate null2's log odds emission probabilities, by taking
   * posterior weighted sum over all emission vectors used in paths
   * explaining the domain.
   * This is dog-slow; a point for future optimization.
   */
  xfactor = XMX(0,p7G_N);
  xfactor = p7_FLogsum(xfactor, XMX(0,p7G_C));
  xfactor = p7_FLogsum(xfactor, XMX(0,p7G_J));
  esl_vec_FSet(null2, gm->abc->K, -eslINFINITY);
  for (x = 0; x < gm->abc->K; x++)
    { 
      for (k = 1; k < M; k++)
	{
	  null2[x] = p7_FLogsum(null2[x], MMX(0,k) + p7P_MSC(gm, k, x));
	  null2[x] = p7_FLogsum(null2[x], IMX(0,k) + p7P_ISC(gm, k, x));
	}
      null2[x] = p7_FLogsum(null2[x], MMX(0,M) + p7P_MSC(gm, k, x));
      null2[x] = p7_FLogsum(null2[x], xfactor);
    }

  esl_vec_FExp (null2, gm->abc->K);
  /* now null2[x] = \frac{f_d(x)}{f_0(x)} for all x in alphabet,
   * 0..K-1, where f_d(x) are the ad hoc "null2" residue frequencies
   * for this envelope.
   */

  /* make valid scores for all degeneracies, by averaging the odds ratios. */
  esl_abc_FAvgScVec(gm->abc, null2); /* does not set gap, nonres, missing  */
  null2[gm->abc->K]    = 1.0;        /* gap character    */
  null2[gm->abc->Kp-2] = 1.0;	     /* nonresidue "*"   */
  null2[gm->abc->Kp-1] = 1.0;	     /* missing data "~" */

  return eslOK;
}
コード例 #13
0
/* Function:  p7_GForward()
 * Synopsis:  The Forward algorithm.
 * Incept:    SRE, Mon Apr 16 13:57:35 2007 [Janelia]
 *
 * Purpose:   The Forward dynamic programming algorithm. 
 *
 *            Given a digital sequence <dsq> of length <L>, a profile
 *            <gm>, and DP matrix <gx> allocated for at least <gm->M>
 *            by <L> cells; calculate the probability of the sequence
 *            given the model using the Forward algorithm; return the
 *            Forward matrix in <gx>, and the Forward score in <ret_sc>.
 *           
 *            The Forward score is in lod score form.  To convert to a
 *            bitscore, the caller needs to subtract a null model lod
 *            score, then convert to bits.
 *           
 * Args:      dsq    - sequence in digitized form, 1..L
 *            L      - length of dsq
 *            gm     - profile. 
 *            gx     - DP matrix with room for an MxL alignment
 *            opt_sc - optRETURN: Forward lod score in nats
 *           
 * Return:    <eslOK> on success.
 */
int
p7_GForward(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float *opt_sc)
{
  float const *tsc  = gm->tsc;
  float      **dp   = gx->dp;
  float       *xmx  = gx->xmx; 			    
  int          M    = gm->M;
  int          i, k;  
  float        esc  = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY;

  /* Initialization of the zero row, and the lookup table of the log
   * sum routine.
   */
  XMX(0,p7G_N) = 0;                                           /* S->N, p=1            */
  XMX(0,p7G_B) = gm->xsc[p7P_N][p7P_MOVE];                    /* S->N->B, no N-tail   */
  XMX(0,p7G_E) = XMX(0,p7G_C) = XMX(0,p7G_J) = -eslINFINITY;  /* need seq to get here */
  for (k = 0; k <= M; k++)
    MMX(0,k) = IMX(0,k) = DMX(0,k) = -eslINFINITY;            /* need seq to get here */
  p7_FLogsumInit();

  /* Recursion. Done as a pull.
   * Note some slightly wasteful boundary conditions:  
   *    tsc[0] = impossible for all eight transitions (no node 0)
   *    D_1 is wastefully calculated (doesn't exist)
   */
  for (i = 1; i <= L; i++) 
    {
      float const *rsc = gm->rsc[dsq[i]];
      float sc;

      MMX(i,0) = IMX(i,0) = DMX(i,0) = -eslINFINITY;
      XMX(i, p7G_E) = -eslINFINITY;

      for (k = 1; k < M; k++)
	{
	  /* match state */
	  sc = p7_FLogsum(p7_FLogsum(MMX(i-1,k-1)   + TSC(p7P_MM,k-1), 
				     IMX(i-1,k-1)   + TSC(p7P_IM,k-1)),
			  p7_FLogsum(XMX(i-1,p7G_B) + TSC(p7P_BM,k-1),
				     DMX(i-1,k-1)   + TSC(p7P_DM,k-1)));
	  MMX(i,k) = sc + MSC(k);

	  /* insert state */
	  sc = p7_FLogsum(MMX(i-1,k) + TSC(p7P_MI,k),
			  IMX(i-1,k) + TSC(p7P_II,k));
	  IMX(i,k) = sc + ISC(k);

	  /* delete state */
	  DMX(i,k) = p7_FLogsum(MMX(i,k-1) + TSC(p7P_MD,k-1),
				DMX(i,k-1) + TSC(p7P_DD,k-1));

	  /* E state update */
	  XMX(i,p7G_E) = p7_FLogsum(p7_FLogsum(MMX(i,k) + esc,
					       DMX(i,k) + esc),
				               XMX(i,p7G_E));
	}
      /* unrolled match state M_M */
      sc = p7_FLogsum(p7_FLogsum(MMX(i-1,M-1)   + TSC(p7P_MM,M-1), 
				 IMX(i-1,M-1)   + TSC(p7P_IM,M-1)),
		      p7_FLogsum(XMX(i-1,p7G_B) + TSC(p7P_BM,M-1),
				 DMX(i-1,M-1)   + TSC(p7P_DM,M-1)));
      MMX(i,M) = sc + MSC(M);
      IMX(i,M) = -eslINFINITY;

      /* unrolled delete state D_M */
      DMX(i,M) = p7_FLogsum(MMX(i,M-1) + TSC(p7P_MD,M-1),
			    DMX(i,M-1) + TSC(p7P_DD,M-1));

      /* unrolled E state update */
      XMX(i,p7G_E) = p7_FLogsum(p7_FLogsum(MMX(i,M),
					   DMX(i,M)),
					   XMX(i,p7G_E));

      /* J state */
      XMX(i,p7G_J) = p7_FLogsum(XMX(i-1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP],
				XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_LOOP]);
      /* C state */
      XMX(i,p7G_C) = p7_FLogsum(XMX(i-1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP],
				XMX(i,  p7G_E) + gm->xsc[p7P_E][p7P_MOVE]);
      /* N state */
      XMX(i,p7G_N) = XMX(i-1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP];

      /* B state */
      XMX(i,p7G_B) = p7_FLogsum(XMX(i,  p7G_N) + gm->xsc[p7P_N][p7P_MOVE],
				XMX(i,  p7G_J) + gm->xsc[p7P_J][p7P_MOVE]);
    }

  if (opt_sc != NULL) *opt_sc = XMX(L,p7G_C) + gm->xsc[p7P_C][p7P_MOVE];
  gx->M = M;
  gx->L = L;
  return eslOK;
}
コード例 #14
0
/* Function:  p7_GBackward()
 * Synopsis:  The Backward algorithm.
 * Incept:    SRE, Fri Dec 28 14:31:58 2007 [Janelia]
 *
 * Purpose:   The Backward dynamic programming algorithm.
 * 
 *            Given a digital sequence <dsq> of length <L>, a profile
 *            <gm>, and DP matrix <gx> allocated for at least <gm->M>
 *            by <L> cells; calculate the probability of the sequence
 *            given the model using the Backward algorithm; return the
 *            Backward matrix in <gx>, and the Backward score in <ret_sc>.
 *           
 *            The Backward score is in lod score form. To convert to a
 *            bitscore, the caller needs to subtract a null model lod
 *            score, then convert to bits.
 *
 * Args:      dsq    - sequence in digitized form, 1..L
 *            L      - length of dsq
 *            gm     - profile 
 *            gx     - DP matrix with room for an MxL alignment
 *            opt_sc - optRETURN: Backward lod score in nats
 *           
 * Return:    <eslOK> on success.
 */
int
p7_GBackward(const ESL_DSQ *dsq, int L, const P7_PROFILE *gm, P7_GMX *gx, float *opt_sc)
{
  float const *tsc  = gm->tsc;
  float const *rsc  = NULL;
  float      **dp   = gx->dp;
  float       *xmx  = gx->xmx; 			    
  int          M    = gm->M;
  int          i, k;  
  float        esc  = p7_profile_IsLocal(gm) ? 0 : -eslINFINITY;

  /* Note: backward calculates the probability we can get *out* of
   * cell i,k; exclusive of emitting residue x_i.
   */
  p7_FLogsumInit();

  /* Initialize the L row.  */
  XMX(L,p7G_J) = XMX(L,p7G_B) = XMX(L,p7G_N) = -eslINFINITY;
  XMX(L,p7G_C) = gm->xsc[p7P_C][p7P_MOVE];                 /* C<-T          */
  XMX(L,p7G_E) = XMX(L,p7G_C) + gm->xsc[p7P_E][p7P_MOVE];  /* E<-C, no tail */
  
  MMX(L,M) = DMX(L,M) = XMX(L,p7G_E); /* {MD}_M <- E (prob 1.0) */
  IMX(L,M) = -eslINFINITY;	      /* no I_M state        */
  for (k = M-1; k >= 1; k--) {
    MMX(L,k) = p7_FLogsum( XMX(L,p7G_E) + esc,
			   DMX(L, k+1)  + TSC(p7P_MD,k));
    DMX(L,k) = p7_FLogsum( XMX(L,p7G_E) + esc,
			   DMX(L, k+1)  + TSC(p7P_DD,k));
    IMX(L,k) = -eslINFINITY;
  }
  
  /* Main recursion */
  for (i = L-1; i >= 1; i--)
    {
      rsc = gm->rsc[dsq[i+1]];

      XMX(i,p7G_B) = MMX(i+1,1) + TSC(p7P_BM,0) + MSC(1); /* t_BM index is 0 because it's stored off-by-one. */
      for (k = 2; k <= M; k++)
	XMX(i,p7G_B) = p7_FLogsum(XMX(i, p7G_B), MMX(i+1,k) + TSC(p7P_BM,k-1) + MSC(k));

      XMX(i,p7G_J) = p7_FLogsum( XMX(i+1,p7G_J) + gm->xsc[p7P_J][p7P_LOOP],
				 XMX(i,  p7G_B) + gm->xsc[p7P_J][p7P_MOVE]);
      
      XMX(i,p7G_C) = XMX(i+1,p7G_C) + gm->xsc[p7P_C][p7P_LOOP];
      
      XMX(i,p7G_E) = p7_FLogsum( XMX(i, p7G_J)  + gm->xsc[p7P_E][p7P_LOOP],
				 XMX(i, p7G_C)  + gm->xsc[p7P_E][p7P_MOVE]);
      
      XMX(i,p7G_N) = p7_FLogsum( XMX(i+1,p7G_N) + gm->xsc[p7P_N][p7P_LOOP],
				 XMX(i,  p7G_B) + gm->xsc[p7P_N][p7P_MOVE]);
      
      
      MMX(i,M) = DMX(i,M) = XMX(i,p7G_E);
      IMX(i,M) = -eslINFINITY;
      for (k = M-1; k >= 1; k--)
	{
	  MMX(i,k) = p7_FLogsum( p7_FLogsum(MMX(i+1,k+1) + TSC(p7P_MM,k) + MSC(k+1),
					    IMX(i+1,k)   + TSC(p7P_MI,k) + ISC(k)),
				 p7_FLogsum(XMX(i,p7G_E) + esc,
					    DMX(i,  k+1) + TSC(p7P_MD,k)));
      
	  IMX(i,k) = p7_FLogsum( MMX(i+1,k+1) + TSC(p7P_IM,k) + MSC(k+1),
				 IMX(i+1,k)   + TSC(p7P_II,k) + ISC(k));
	  
	  DMX(i,k) = p7_FLogsum( MMX(i+1,k+1) + TSC(p7P_DM,k) + MSC(k+1),
				 p7_FLogsum( DMX(i,  k+1)  + TSC(p7P_DD,k),
					     XMX(i, p7G_E) + esc));
	}
    }

  /* At i=0, only N,B states are reachable. */
  rsc = gm->rsc[dsq[1]];
  XMX(0,p7G_B) = MMX(1,1) + TSC(p7P_BM,0) + MSC(1); /* t_BM index is 0 because it's stored off-by-one. */
  for (k = 2; k <= M; k++)
    XMX(0,p7G_B) = p7_FLogsum(XMX(0, p7G_B), MMX(1,k) + TSC(p7P_BM,k-1) + MSC(k));
  XMX(i,p7G_J) = -eslINFINITY;
  XMX(i,p7G_C) = -eslINFINITY;
  XMX(i,p7G_E) = -eslINFINITY;
  XMX(i,p7G_N) = p7_FLogsum( XMX(1, p7G_N) + gm->xsc[p7P_N][p7P_LOOP],
			     XMX(0, p7G_B) + gm->xsc[p7P_N][p7P_MOVE]);
  for (k = M; k >= 1; k--)
    MMX(i,M) = IMX(i,M) = DMX(i,M) = -eslINFINITY;


  if (opt_sc != NULL) *opt_sc = XMX(0,p7G_N);
  gx->M = M;
  gx->L = L;
  return eslOK;
}