Exemple #1
0
/* M(i,k) is reached from B(i-1), M(i-1,k-1), D(i-1,k-1), or I(i-1,k-1). */
static inline int
select_m(const P7_OPROFILE *om, const P7_OMX *ox, int i, int k)
{
  int     Q     = p7O_NQF(ox->M);
  int     q     = (k-1) % Q;		/* (q,r) is position of the current DP cell M(i,k) */
  int     r     = (k-1) / Q;
  vector float *tp    = om->tfv + 7*q;       	/* *tp now at start of transitions to cur cell M(i,k) */
  vector float  xBv;
  vector float  zerov;
  vector float  mpv, dpv, ipv;
  union { vector float v; float p[4]; } u, tv;
  float   path[4];
  int     state[4] = { p7T_M, p7T_I, p7T_D, p7T_B };
  
  xBv   = esl_vmx_set_float(ox->xmx[(i-1)*p7X_NXCELLS+p7X_B]);
  zerov = (vector float) vec_splat_u32(0);

  if (q > 0) {
    mpv = ox->dpf[i-1][(q-1)*3 + p7X_M];
    dpv = ox->dpf[i-1][(q-1)*3 + p7X_D];
    ipv = ox->dpf[i-1][(q-1)*3 + p7X_I];
  } else {
    mpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_M], 12);
    dpv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_D], 12);
    ipv = vec_sld(zerov, ox->dpf[i-1][(Q-1)*3 + p7X_I], 12);
  }	  

  /* paths are numbered so that most desirable choice in case of tie is first. */
  u.v = xBv;  tv.v = *tp;  path[3] = ((tv.p[r] == 0.0) ?  -eslINFINITY : u.p[r]);  tp++;
  u.v = mpv;  tv.v = *tp;  path[0] = ((tv.p[r] == 0.0) ?  -eslINFINITY : u.p[r]);  tp++;
  u.v = ipv;  tv.v = *tp;  path[1] = ((tv.p[r] == 0.0) ?  -eslINFINITY : u.p[r]);  tp++;
  u.v = dpv;  tv.v = *tp;  path[2] = ((tv.p[r] == 0.0) ?  -eslINFINITY : u.p[r]);  
  return state[esl_vec_FArgMax(path, 4)];
}
Exemple #2
0
/* Function:  p7_emit_FancyConsensus()
 * Synopsis:  Emit a fancier consensus with upper/lower case and N/X's.
 * Incept:    SRE, Fri May 14 09:33:10 2010 [Janelia]
 *
 * Purpose:   Generate a consensus sequence for model <hmm>, consisting
 *            of the maximum probability residue in each match state;
 *            store this sequence in text-mode <sq> provided by the caller.
 *            
 *            If the probability of the consensus residue is less than
 *            <min_lower>, show an ``any'' residue (N or X) instead.
 *            If the probability of the consensus residue is $\geq$
 *            <min_lower>  and less than <min_upper>, show the residue
 *            as lower case; if it is $\geq$ <min_upper>, show it as
 *            upper case.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEINVAL> if the <sq> isn't in text mode.
 *
 * Xref:      SRE:J6/59.
 */
int
p7_emit_FancyConsensus(const P7_HMM *hmm, float min_lower, float min_upper, ESL_SQ *sq)
{
  int   k, x;
  float p;
  char  c;
  int   status;

  if (! esl_sq_IsText(sq)) ESL_EXCEPTION(eslEINVAL, "p7_emit_FancyConsensus() expects a text-mode <sq>");

  if ((status = esl_sq_GrowTo(sq, hmm->M)) != eslOK) return status;

  for (k = 1; k <= hmm->M; k++)
  {

    if (hmm->mm && hmm->mm[k] == 'm') { //masked position, spit out the degenerate code
      if ((status = esl_sq_CAddResidue(sq, tolower(esl_abc_CGetUnknown(hmm->abc))) ) != eslOK) return status;
    } else {
      p = esl_vec_FMax(   hmm->mat[k], hmm->abc->K);
      x = esl_vec_FArgMax(hmm->mat[k], hmm->abc->K);
  
      if      (p <  min_lower)  c = tolower(esl_abc_CGetUnknown(hmm->abc));
      else if (p >= min_upper)  c = toupper(hmm->abc->sym[x]);
      else                      c = tolower(hmm->abc->sym[x]);

      if ((status = esl_sq_CAddResidue(sq, c)) != eslOK) return status;
    }
  }
  if ((status = esl_sq_CAddResidue(sq, '\0')) != eslOK) return status;
  return eslOK;
}
static inline int
select_m(const P7_PROFILE *gm, const P7_GMX *gx, int i, int k)
{
  float      **dp   = gx->dp;	/* so {MDI}MX() macros work       */
  float       *xmx  = gx->xmx;	/* so XMX() macro works           */
  float const *tsc  = gm->tsc;	/* so TSCDELTA() macro works */
  float path[4];
  int   state[4] = { p7T_M, p7T_I, p7T_D, p7T_B };

  path[0] = TSCDELTA(p7P_MM, k-1) * MMX(i-1,k-1);
  path[1] = TSCDELTA(p7P_IM, k-1) * IMX(i-1,k-1);
  path[2] = TSCDELTA(p7P_DM, k-1) * DMX(i-1,k-1);
  path[3] = TSCDELTA(p7P_BM, k-1) * XMX(i-1,p7G_B);
  return state[esl_vec_FArgMax(path, 4)];
}
Exemple #4
0
/* Function:  p7_emit_SimpleConsensus()
 * Synopsis:  Generate simple consensus: ML residue in each match state
 * Incept:    SRE, Mon Sep  1 09:10:47 2008 [Janelia]
 *
 * Purpose:   Generate a simple consensus sequence for model <hmm>
 *            consisting of the maximum probability residue in each
 *            match state; store this consensus in digital <sq>.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEINVAL> if the <sq> isn't in digital mode.
 */
int
p7_emit_SimpleConsensus(const P7_HMM *hmm, ESL_SQ *sq)
{
  int k;
  int x;
  int status;
  
  if (! esl_sq_IsDigital(sq)) ESL_EXCEPTION(eslEINVAL, "p7_emit_SimpleConsensus() expects a digital-mode <sq>");
  if ((status = esl_sq_GrowTo(sq, hmm->M)) != eslOK) return status;

  for (k = 1; k <= hmm->M; k++)
    {
      if (hmm->mm && hmm->mm[k] == 'm') { //masked position, spit out the degenerate code
        if ((status = esl_sq_XAddResidue(sq, hmm->abc->Kp-3)) != eslOK) return status;
      } else {
        x = esl_vec_FArgMax(hmm->mat[k], hmm->abc->K);
        if ((status = esl_sq_XAddResidue(sq, x)) != eslOK) return status;
      }
    }
  if ((status = esl_sq_XAddResidue(sq, eslDSQ_SENTINEL)) != eslOK) return status;
  return eslOK;
}
Exemple #5
0
/* Function:  p7_ProfileConfig()
 * Synopsis:  Configure a search profile.
 * Incept:    SRE, Sun Sep 25 12:21:25 2005 [St. Louis]
 *
 * Purpose:   Given a model <hmm> with core probabilities, the null1
 *            model <bg>, a desired search <mode> (one of <p7_LOCAL>,
 *            <p7_GLOCAL>, <p7_UNILOCAL>, or <p7_UNIGLOCAL>), and an
 *            expected target sequence length <L>; configure the
 *            search model in <gm> with lod scores relative to the
 *            background frequencies in <bg>.
 *            
 * Returns:   <eslOK> on success; the profile <gm> now contains 
 *            scores and is ready for searching target sequences.
 *            
 * Throws:    <eslEMEM> on allocation error.
 */
int
p7_ProfileConfig(const P7_HMM *hmm, const P7_BG *bg, P7_PROFILE *gm, int L, int mode)
{
  int   k, x, z;	/* counters over states, residues, annotation */
  int   status;
  float *occ = NULL;
  float *tp, *rp;
  float  sc[p7_MAXCODE];
  float  mthresh;
  float  Z;
 
  /* Contract checks */
  if (gm->abc->type != hmm->abc->type) ESL_XEXCEPTION(eslEINVAL, "HMM and profile alphabet don't match");
  if (hmm->M > gm->allocM)             ESL_XEXCEPTION(eslEINVAL, "profile too small to hold HMM");

  /* Copy some pointer references and other info across from HMM  */
  gm->M      = hmm->M;
  gm->mode   = mode;
  gm->roff   = -1;
  gm->eoff   = -1;
  gm->offs[p7_MOFFSET] = -1;
  gm->offs[p7_FOFFSET] = -1;
  gm->offs[p7_POFFSET] = -1;
  if (gm->name != NULL) free(gm->name);
  if (gm->acc  != NULL) free(gm->acc);
  if (gm->desc != NULL) free(gm->desc);
  if ((status = esl_strdup(hmm->name,   -1, &(gm->name))) != eslOK) goto ERROR;
  if ((status = esl_strdup(hmm->acc,    -1, &(gm->acc)))  != eslOK) goto ERROR;
  if ((status = esl_strdup(hmm->desc,   -1, &(gm->desc))) != eslOK) goto ERROR;
  if (hmm->flags & p7H_RF) strcpy(gm->rf, hmm->rf);
  if (hmm->flags & p7H_CS) strcpy(gm->cs, hmm->cs);
  for (z = 0; z < p7_NEVPARAM; z++) gm->evparam[z] = hmm->evparam[z];
  for (z = 0; z < p7_NCUTOFFS; z++) gm->cutoff[z]  = hmm->cutoff[z];
  for (z = 0; z < p7_MAXABET;  z++) gm->compo[z]   = hmm->compo[z];

  /* Determine the "consensus" residue for each match position.
   * This is only used for alignment displays, not in any calculations.
   */
  if      (hmm->abc->type == eslAMINO) mthresh = 0.5;
  else if (hmm->abc->type == eslDNA)   mthresh = 0.9;
  else if (hmm->abc->type == eslRNA)   mthresh = 0.9;
  else                                 mthresh = 0.5;
  gm->consensus[0] = ' ';
  for (k = 1; k <= hmm->M; k++) {
    x = esl_vec_FArgMax(hmm->mat[k], hmm->abc->K);
    gm->consensus[k] = ((hmm->mat[k][x] > mthresh) ? toupper(hmm->abc->sym[x]) : tolower(hmm->abc->sym[x]));
  }
  gm->consensus[hmm->M+1] = '\0';

  /* Entry scores. */
  if (p7_profile_IsLocal(gm))
    {
      /* Local mode entry:  occ[k] /( \sum_i occ[i] * (M-i+1))
       * (Reduces to uniform 2/(M(M+1)) for occupancies of 1.0)  */
      Z = 0.;
      ESL_ALLOC(occ, sizeof(float) * (hmm->M+1));

      if ((status = p7_hmm_CalculateOccupancy(hmm, occ, NULL)) != eslOK) goto ERROR;
      for (k = 1; k <= hmm->M; k++) 
	Z += occ[k] * (float) (hmm->M-k+1);
      for (k = 1; k <= hmm->M; k++) 
	p7P_TSC(gm, k-1, p7P_BM) = log(occ[k] / Z); /* note off-by-one: entry at Mk stored as [k-1][BM] */

      free(occ);
    }
  else	/* glocal modes: left wing retraction; must be in log space for precision */
    {
      Z = log(hmm->t[0][p7H_MD]);
      p7P_TSC(gm, 0, p7P_BM) = log(1.0 - hmm->t[0][p7H_MD]);
      for (k = 1; k < hmm->M; k++) 
	{
	   p7P_TSC(gm, k, p7P_BM) = Z + log(hmm->t[k][p7H_DM]);
	   Z += log(hmm->t[k][p7H_DD]);
	}
    }

  /* E state loop/move probabilities: nonzero for MOVE allows loops/multihits
   * N,C,J transitions are set later by length config 
   */
  if (p7_profile_IsMultihit(gm)) {
    gm->xsc[p7P_E][p7P_MOVE] = -eslCONST_LOG2;   
    gm->xsc[p7P_E][p7P_LOOP] = -eslCONST_LOG2;   
    gm->nj                   = 1.0f;
  } else {
    gm->xsc[p7P_E][p7P_MOVE] = 0.0f;   
    gm->xsc[p7P_E][p7P_LOOP] = -eslINFINITY;  
    gm->nj                   = 0.0f;
  }

  /* Transition scores. */
  for (k = 1; k < gm->M; k++) {
    tp = gm->tsc + k * p7P_NTRANS;
    tp[p7P_MM] = log(hmm->t[k][p7H_MM]);
    tp[p7P_MI] = log(hmm->t[k][p7H_MI]);
    tp[p7P_MD] = log(hmm->t[k][p7H_MD]);
    tp[p7P_IM] = log(hmm->t[k][p7H_IM]);
    tp[p7P_II] = log(hmm->t[k][p7H_II]);
    tp[p7P_DM] = log(hmm->t[k][p7H_DM]);
    tp[p7P_DD] = log(hmm->t[k][p7H_DD]);
  }
  
  /* Match emission scores. */
  sc[hmm->abc->K]     = -eslINFINITY; /* gap character */
  sc[hmm->abc->Kp-2]  = -eslINFINITY; /* nonresidue character */
  sc[hmm->abc->Kp-1]  = -eslINFINITY; /* missing data character */
  for (k = 1; k <= hmm->M; k++) {
    for (x = 0; x < hmm->abc->K; x++) 
      sc[x] = log(hmm->mat[k][x] / bg->f[x]);
    esl_abc_FExpectScVec(hmm->abc, sc, bg->f); 
    for (x = 0; x < hmm->abc->Kp; x++) {
      rp = gm->rsc[x] + k * p7P_NR;
      rp[p7P_MSC] = sc[x];
    }
  }
  
  /* Insert emission scores */
  /* SRE, Fri Dec 5 08:41:08 2008: We currently hardwire insert scores
   * to 0, i.e. corresponding to the insertion emission probabilities
   * being equal to the background probabilities. Benchmarking shows
   * that setting inserts to informative emission distributions causes
   * more problems than it's worth: polar biased composition hits
   * driven by stretches of "insertion" occur, and are difficult to
   * correct for.
   */
  for (x = 0; x < gm->abc->Kp; x++)
    {
      for (k = 1; k < hmm->M; k++) p7P_ISC(gm, k, x) = 0.0f;
      p7P_ISC(gm, hmm->M, x) = -eslINFINITY;   /* init I_M to impossible.   */
    }
  for (k = 1; k <= hmm->M; k++) p7P_ISC(gm, k, gm->abc->K)    = -eslINFINITY; /* gap symbol */
  for (k = 1; k <= hmm->M; k++) p7P_ISC(gm, k, gm->abc->Kp-2) = -eslINFINITY; /* nonresidue symbol */
  for (k = 1; k <= hmm->M; k++) p7P_ISC(gm, k, gm->abc->Kp-1) = -eslINFINITY; /* missing data symbol */


#if 0
  /* original (informative) insert setting: relies on sc[K, Kp-1] initialization to -inf above */
  for (k = 1; k < hmm->M; k++) {
    for (x = 0; x < hmm->abc->K; x++) 
      sc[x] = log(hmm->ins[k][x] / bg->f[x]); 
    esl_abc_FExpectScVec(hmm->abc, sc, bg->f); 
    for (x = 0; x < hmm->abc->Kp; x++) {
      rp = gm->rsc[x] + k*p7P_NR;
      rp[p7P_ISC] = sc[x];
    }
  }    
  for (x = 0; x < hmm->abc->Kp; x++)
    p7P_ISC(gm, hmm->M, x) = -eslINFINITY;   /* init I_M to impossible.   */
#endif

  /* Remaining specials, [NCJ][MOVE | LOOP] are set by ReconfigLength()
   */
  gm->L = 0;			/* force ReconfigLength to reconfig */
  if ((status = p7_ReconfigLength(gm, L)) != eslOK) goto ERROR;
  return eslOK;

 ERROR:
  if (occ != NULL) free(occ);
  return status;
}
/* Function:  p7_ProfileConfig()
* Synopsis:  Configure a search profile.
* Incept:    SRE, Sun Sep 25 12:21:25 2005 [St. Louis]
*
* Purpose:   Given a model <hmm> with core probabilities, the null1
*            model <bg>, a desired search <mode> (one of <p7_LOCAL>,
*            <p7_GLOCAL>, <p7_UNILOCAL>, or <p7_UNIGLOCAL>), and an
*            expected target sequence length <L>; configure the
*            search model in <gm> with lod scores relative to the
*            background frequencies in <bg>.
*            
* Returns:   <eslOK> on success; the profile <gm> now contains 
*            scores and is ready for searching target sequences.
*            
* Throws:    <eslEMEM> on allocation error.
*/
int
p7_ProfileConfig(const P7_HMM *hmm, const P7_BG *bg, P7_PROFILE *gm, int L, int mode)
{
    int   k, x, z;	/* counters over states, residues, annotation */
    int   status;
    float *occ = NULL;
    float *tp, *rp;
    float  sc[p7_MAXCODE];
    float  mthresh;
    float  Z;

    /* Contract checks */
    if (gm->abc->type != hmm->abc->type) ESL_XEXCEPTION(eslEINVAL, "HMM and profile alphabet don't match");
    if (hmm->M > gm->allocM)             ESL_XEXCEPTION(eslEINVAL, "profile too small to hold HMM");

    /* Copy some pointer references and other info across from HMM  */
    gm->M      = hmm->M;
    gm->mode   = mode;
    gm->roff   = -1;
    gm->eoff   = -1;
    gm->offs[p7_MOFFSET] = -1;
    gm->offs[p7_FOFFSET] = -1;
    gm->offs[p7_POFFSET] = -1;
    if (gm->name != NULL) free(gm->name);
    if (gm->acc  != NULL) free(gm->acc);
    if (gm->desc != NULL) free(gm->desc);
    if ((status = esl_strdup(hmm->name,   -1, &(gm->name))) != eslOK) goto ERROR;
    if ((status = esl_strdup(hmm->acc,    -1, &(gm->acc)))  != eslOK) goto ERROR;
    if ((status = esl_strdup(hmm->desc,   -1, &(gm->desc))) != eslOK) goto ERROR;
    if (hmm->flags & p7H_RF) strcpy(gm->rf, hmm->rf);
    if (hmm->flags & p7H_CS) strcpy(gm->cs, hmm->cs);
    for (z = 0; z < p7_NEVPARAM; z++) gm->evparam[z] = hmm->evparam[z];
    for (z = 0; z < p7_NCUTOFFS; z++) gm->cutoff[z]  = hmm->cutoff[z];
    for (z = 0; z < p7_MAXABET;  z++) gm->compo[z]   = hmm->compo[z];

    /* Determine the "consensus" residue for each match position.
    * This is only used for alignment displays, not in any calculations.
    */
    if      (hmm->abc->type == eslAMINO) mthresh = 0.5;
    else if (hmm->abc->type == eslDNA)   mthresh = 0.9;
    else if (hmm->abc->type == eslRNA)   mthresh = 0.9;
    else                                 mthresh = 0.5;
    gm->consensus[0] = ' ';
    for (k = 1; k <= hmm->M; k++) {
        x = esl_vec_FArgMax(hmm->mat[k], hmm->abc->K);
        gm->consensus[k] = ((hmm->mat[k][x] > mthresh) ? toupper(hmm->abc->sym[x]) : tolower(hmm->abc->sym[x]));
    }
    gm->consensus[hmm->M+1] = '\0';

    /* Entry scores. */
    if (p7_profile_IsLocal(gm))
    {
        /* Local mode entry:  occ[k] /( \sum_i occ[i] * (M-i+1))
        * (Reduces to uniform 2/(M(M+1)) for occupancies of 1.0)  */
        Z = 0.;
        ESL_ALLOC_WITH_TYPE(occ, float*, sizeof(float) * (hmm->M+1));

        if ((status = p7_hmm_CalculateOccupancy(hmm, occ, NULL)) != eslOK) goto ERROR;
        for (k = 1; k <= hmm->M; k++) 
            Z += occ[k] * (float) (hmm->M-k+1);
        for (k = 1; k <= hmm->M; k++) 
            p7P_TSC(gm, k-1, p7P_BM) = log((double)(occ[k] / Z)); /* note off-by-one: entry at Mk stored as [k-1][BM] */

        free(occ);
    }
    else	/* glocal modes: left wing retraction; must be in log space for precision */
    {