Exemplo n.º 1
0
static int
make_occasionally_dishonest_casino(ESL_HMM **ret_hmm, ESL_ALPHABET **ret_abc)
{
  ESL_ALPHABET *abc = esl_alphabet_Create(eslDICE);
  ESL_HMM      *hmm = esl_hmm_Create(abc, 2);
  int           x;

  /* State 0 = fair die */
  hmm->pi[0] = 1.0;
  hmm->pi[1] = 0.0;
  hmm->pi[2] = 0.0;		/* no L=0 seqs */

  hmm->t[0][0] = 0.96;
  hmm->t[0][1] = 0.03;
  hmm->t[0][2] = 0.01;		/* end from state 0; mean length 100 */

  for (x = 0; x < abc->K; x++)
    hmm->e[0][x] = 1.0 / (float) abc->K;

  /* State 1 = loaded die */
  hmm->t[1][0] = 0.05;
  hmm->t[1][1] = 0.95;
  hmm->t[1][2] = 0.0;		/* no end from state 1 */

  for (x = 0; x < abc->K-1; x++) hmm->e[1][x] = 0.5 / ((float) abc->K-1);
  hmm->e[1][abc->K-1] = 0.5;

  esl_hmm_Configure(hmm, NULL);

  *ret_hmm = hmm;
  *ret_abc = abc;
  return eslOK;
}
Exemplo n.º 2
0
/* Function:  p7_bg_SetFilter()
 * Synopsis:  Configure filter HMM with new model composition.
 * Incept:    SRE, Fri Dec  5 09:08:15 2008 [Janelia]
 *
 * Purpose:   The "filter HMM" is an experimental filter in the
 *            acceleration pipeline for avoiding biased composition
 *            sequences. It has no effect on final scoring, if a
 *            sequence passes all steps of the pipeline; it is only
 *            used to eliminate biased sequences from further
 *            consideration early in the pipeline, before the big guns
 *            of domain postprocessing are applied.
 *            
 *            At least at present, it doesn't actually work as well as
 *            one would hope.  This will be an area of future work.
 *            What we really want to do is make a better null model of
 *            real protein sequences (and their biases), and incorporate
 *            that model into the flanks (NCJ states) of the profile.
 *            
 *            <compo> is the average model residue composition, from
 *            either the HMM or the copy in a profile or optimized
 *            profile. <M> is the length of the model in nodes.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    (no abnormal error conditions)
 *
 * Xref:      J4/25: generalized to use composition vector, not
 *                   specifically an HMM. 
 *                   
 * Note:      This looks like a two-state HMM, but if you start thinking
 *            about its length distribution ("oh my god, L0 assumes a
 *            fixed L=400 expectation, it's all wrong, it's not conditional
 *            on the target sequence length and length modeling's messed
 *            up!"), don't panic. It's set up as a conditional-on-L model;
 *            the P(L) term is added in p7_bg_FilterScore() below.                  
 */
int
p7_bg_SetFilter(P7_BG *bg, int M, const float *compo)
{
  float L0 = 400.0;		/* mean length in state 0 of filter HMM (normal background) */
  float L1 = (float) M / 8.0; 	/* mean length in state 1 of filter HMM (biased segment) */

  /* State 0 is the normal iid model. */
  bg->fhmm->t[0][0] =   L0 / (L0+1.0f);
  bg->fhmm->t[0][1] = 1.0f / (L0+1.0f);
  bg->fhmm->t[0][2] = 1.0f;          	/* 1.0 transition to E means we'll set length distribution externally. */
  esl_vec_FCopy(bg->f, bg->abc->K, bg->fhmm->e[0]);

  /* State 1 is the potentially biased model composition. */
  bg->fhmm->t[1][0] = 1.0f / (L1+1.0f);
  bg->fhmm->t[1][1] =   L1 / (L1+1.0f);
  bg->fhmm->t[1][2] = 1.0f;         	/* 1.0 transition to E means we'll set length distribution externally. */
  esl_vec_FCopy(compo, bg->abc->K, bg->fhmm->e[1]);

  bg->fhmm->pi[0] = 0.999;
  bg->fhmm->pi[1] = 0.001;

  esl_hmm_Configure(bg->fhmm, bg->f);
  return eslOK;
}
Exemplo n.º 3
0
ESL_HMM *
create_null_hmm(ESL_ALPHABET *abc)
{
  ESL_HMM *hmm;
  hmm = esl_hmm_Create(abc, 1);

  /* state 0 = normal iid model.*/
  hmm->t[0][0] = 1.0f;
  hmm->t[0][1] = 1.0f;		            /* external length distribution */

  /* SW50 iid frequencies: H3 default background */
  hmm->e[0][0]  =  0.0787945;		/* A */
  hmm->e[0][1]  =  0.0151600;		/* C */
  hmm->e[0][2]  =  0.0535222;		/* D */
  hmm->e[0][3]  =  0.0668298;		/* E */
  hmm->e[0][4]  =  0.0397062;		/* F */
  hmm->e[0][5]  =  0.0695071;		/* G */
  hmm->e[0][6]  =  0.0229198;		/* H */
  hmm->e[0][7]  =  0.0590092;		/* I */
  hmm->e[0][8]  =  0.0594422;		/* K */
  hmm->e[0][9]  =  0.0963728;		/* L */
  hmm->e[0][10] =  0.0237718;		/* M */
  hmm->e[0][11] =  0.0414386;		/* N */
  hmm->e[0][12] =  0.0482904;		/* P */
  hmm->e[0][13] =  0.0395639;		/* Q */
  hmm->e[0][14] =  0.0540978;		/* R */
  hmm->e[0][15] =  0.0683364;		/* S */
  hmm->e[0][16] =  0.0540687;		/* T */
  hmm->e[0][17] =  0.0673417;		/* V */
  hmm->e[0][18] =  0.0114135;		/* W */
  hmm->e[0][19] =  0.0304133;		/* Y */

  hmm->pi[0]    = 1.0;
  esl_hmm_Configure(hmm, NULL);
  return hmm;
}
Exemplo n.º 4
0
ESL_HMM *
create_test_hmm(ESL_ALPHABET *abc)
{
  ESL_HMM *hmm;
  int      L   = 400;
  int      M   = 200;

  hmm = esl_hmm_Create(abc, 2);

  /* state 0 = normal iid model. state 1 = biased state */

  hmm->t[0][0] = (float) L / (float) (L+1);
  hmm->t[0][1] = 1.0f / (float) (L+1);
  hmm->t[0][2] = 1.0;		            /* external length distribution */
  
  hmm->t[1][0] = (float) 2.0f / (float) (M+2);
  hmm->t[1][1] = (float) M / (float) (M+2);
  hmm->t[1][2] = 1.0;

  /* SW50 iid frequencies: H3 default background */
  hmm->e[0][0]  =  0.0787945;		/* A */
  hmm->e[0][1]  =  0.0151600;		/* C */
  hmm->e[0][2]  =  0.0535222;		/* D */
  hmm->e[0][3]  =  0.0668298;		/* E */
  hmm->e[0][4]  =  0.0397062;		/* F */
  hmm->e[0][5]  =  0.0695071;		/* G */
  hmm->e[0][6]  =  0.0229198;		/* H */
  hmm->e[0][7]  =  0.0590092;		/* I */
  hmm->e[0][8]  =  0.0594422;		/* K */
  hmm->e[0][9]  =  0.0963728;		/* L */
  hmm->e[0][10] =  0.0237718;		/* M */
  hmm->e[0][11] =  0.0414386;		/* N */
  hmm->e[0][12] =  0.0482904;		/* P */
  hmm->e[0][13] =  0.0395639;		/* Q */
  hmm->e[0][14] =  0.0540978;		/* R */
  hmm->e[0][15] =  0.0683364;		/* S */
  hmm->e[0][16] =  0.0540687;		/* T */
  hmm->e[0][17] =  0.0673417;		/* V */
  hmm->e[0][18] =  0.0114135;		/* W */
  hmm->e[0][19] =  0.0304133;		/* Y */

  /* average of MFS_1 core emissions */
  hmm->e[1][0]  =  0.1068;              /* A */
  hmm->e[1][1]  =  0.0110; 		/* C */
  hmm->e[1][2]  =  0.0242; 		/* D */
  hmm->e[1][3]  =  0.0293; 		/* E */
  hmm->e[1][4]  =  0.0621; 		/* F */
  hmm->e[1][5]  =  0.0899; 		/* G */
  hmm->e[1][6]  =  0.0139; 		/* H */
  hmm->e[1][7]  =  0.0762; 		/* I */
  hmm->e[1][8]  =  0.0319; 		/* K */
  hmm->e[1][9]  =  0.1274; 		/* L */
  hmm->e[1][10] =  0.0338; 		/* M */
  hmm->e[1][11] =  0.0285; 		/* N */
  hmm->e[1][12] =  0.0414; 		/* P */
  hmm->e[1][13] =  0.0266; 		/* Q */
  hmm->e[1][14] =  0.0375; 		/* R */
  hmm->e[1][15] =  0.0747; 		/* S */
  hmm->e[1][16] =  0.0568; 		/* T */
  hmm->e[1][17] =  0.0815; 		/* V */
  hmm->e[1][18] =  0.0161; 		/* W */
  hmm->e[1][19] =  0.0303; 		/* Y */

  hmm->pi[0]    = 0.99;
  hmm->pi[1]    = 0.01;

  esl_hmm_Configure(hmm, NULL);
  return hmm;
}