static int make_occasionally_dishonest_casino(ESL_HMM **ret_hmm, ESL_ALPHABET **ret_abc) { ESL_ALPHABET *abc = esl_alphabet_Create(eslDICE); ESL_HMM *hmm = esl_hmm_Create(abc, 2); int x; /* State 0 = fair die */ hmm->pi[0] = 1.0; hmm->pi[1] = 0.0; hmm->pi[2] = 0.0; /* no L=0 seqs */ hmm->t[0][0] = 0.96; hmm->t[0][1] = 0.03; hmm->t[0][2] = 0.01; /* end from state 0; mean length 100 */ for (x = 0; x < abc->K; x++) hmm->e[0][x] = 1.0 / (float) abc->K; /* State 1 = loaded die */ hmm->t[1][0] = 0.05; hmm->t[1][1] = 0.95; hmm->t[1][2] = 0.0; /* no end from state 1 */ for (x = 0; x < abc->K-1; x++) hmm->e[1][x] = 0.5 / ((float) abc->K-1); hmm->e[1][abc->K-1] = 0.5; esl_hmm_Configure(hmm, NULL); *ret_hmm = hmm; *ret_abc = abc; return eslOK; }
/* Function: p7_bg_SetFilter() * Synopsis: Configure filter HMM with new model composition. * Incept: SRE, Fri Dec 5 09:08:15 2008 [Janelia] * * Purpose: The "filter HMM" is an experimental filter in the * acceleration pipeline for avoiding biased composition * sequences. It has no effect on final scoring, if a * sequence passes all steps of the pipeline; it is only * used to eliminate biased sequences from further * consideration early in the pipeline, before the big guns * of domain postprocessing are applied. * * At least at present, it doesn't actually work as well as * one would hope. This will be an area of future work. * What we really want to do is make a better null model of * real protein sequences (and their biases), and incorporate * that model into the flanks (NCJ states) of the profile. * * <compo> is the average model residue composition, from * either the HMM or the copy in a profile or optimized * profile. <M> is the length of the model in nodes. * * Returns: <eslOK> on success. * * Throws: (no abnormal error conditions) * * Xref: J4/25: generalized to use composition vector, not * specifically an HMM. * * Note: This looks like a two-state HMM, but if you start thinking * about its length distribution ("oh my god, L0 assumes a * fixed L=400 expectation, it's all wrong, it's not conditional * on the target sequence length and length modeling's messed * up!"), don't panic. It's set up as a conditional-on-L model; * the P(L) term is added in p7_bg_FilterScore() below. */ int p7_bg_SetFilter(P7_BG *bg, int M, const float *compo) { float L0 = 400.0; /* mean length in state 0 of filter HMM (normal background) */ float L1 = (float) M / 8.0; /* mean length in state 1 of filter HMM (biased segment) */ /* State 0 is the normal iid model. */ bg->fhmm->t[0][0] = L0 / (L0+1.0f); bg->fhmm->t[0][1] = 1.0f / (L0+1.0f); bg->fhmm->t[0][2] = 1.0f; /* 1.0 transition to E means we'll set length distribution externally. */ esl_vec_FCopy(bg->f, bg->abc->K, bg->fhmm->e[0]); /* State 1 is the potentially biased model composition. */ bg->fhmm->t[1][0] = 1.0f / (L1+1.0f); bg->fhmm->t[1][1] = L1 / (L1+1.0f); bg->fhmm->t[1][2] = 1.0f; /* 1.0 transition to E means we'll set length distribution externally. */ esl_vec_FCopy(compo, bg->abc->K, bg->fhmm->e[1]); bg->fhmm->pi[0] = 0.999; bg->fhmm->pi[1] = 0.001; esl_hmm_Configure(bg->fhmm, bg->f); return eslOK; }
ESL_HMM * create_null_hmm(ESL_ALPHABET *abc) { ESL_HMM *hmm; hmm = esl_hmm_Create(abc, 1); /* state 0 = normal iid model.*/ hmm->t[0][0] = 1.0f; hmm->t[0][1] = 1.0f; /* external length distribution */ /* SW50 iid frequencies: H3 default background */ hmm->e[0][0] = 0.0787945; /* A */ hmm->e[0][1] = 0.0151600; /* C */ hmm->e[0][2] = 0.0535222; /* D */ hmm->e[0][3] = 0.0668298; /* E */ hmm->e[0][4] = 0.0397062; /* F */ hmm->e[0][5] = 0.0695071; /* G */ hmm->e[0][6] = 0.0229198; /* H */ hmm->e[0][7] = 0.0590092; /* I */ hmm->e[0][8] = 0.0594422; /* K */ hmm->e[0][9] = 0.0963728; /* L */ hmm->e[0][10] = 0.0237718; /* M */ hmm->e[0][11] = 0.0414386; /* N */ hmm->e[0][12] = 0.0482904; /* P */ hmm->e[0][13] = 0.0395639; /* Q */ hmm->e[0][14] = 0.0540978; /* R */ hmm->e[0][15] = 0.0683364; /* S */ hmm->e[0][16] = 0.0540687; /* T */ hmm->e[0][17] = 0.0673417; /* V */ hmm->e[0][18] = 0.0114135; /* W */ hmm->e[0][19] = 0.0304133; /* Y */ hmm->pi[0] = 1.0; esl_hmm_Configure(hmm, NULL); return hmm; }
ESL_HMM * create_test_hmm(ESL_ALPHABET *abc) { ESL_HMM *hmm; int L = 400; int M = 200; hmm = esl_hmm_Create(abc, 2); /* state 0 = normal iid model. state 1 = biased state */ hmm->t[0][0] = (float) L / (float) (L+1); hmm->t[0][1] = 1.0f / (float) (L+1); hmm->t[0][2] = 1.0; /* external length distribution */ hmm->t[1][0] = (float) 2.0f / (float) (M+2); hmm->t[1][1] = (float) M / (float) (M+2); hmm->t[1][2] = 1.0; /* SW50 iid frequencies: H3 default background */ hmm->e[0][0] = 0.0787945; /* A */ hmm->e[0][1] = 0.0151600; /* C */ hmm->e[0][2] = 0.0535222; /* D */ hmm->e[0][3] = 0.0668298; /* E */ hmm->e[0][4] = 0.0397062; /* F */ hmm->e[0][5] = 0.0695071; /* G */ hmm->e[0][6] = 0.0229198; /* H */ hmm->e[0][7] = 0.0590092; /* I */ hmm->e[0][8] = 0.0594422; /* K */ hmm->e[0][9] = 0.0963728; /* L */ hmm->e[0][10] = 0.0237718; /* M */ hmm->e[0][11] = 0.0414386; /* N */ hmm->e[0][12] = 0.0482904; /* P */ hmm->e[0][13] = 0.0395639; /* Q */ hmm->e[0][14] = 0.0540978; /* R */ hmm->e[0][15] = 0.0683364; /* S */ hmm->e[0][16] = 0.0540687; /* T */ hmm->e[0][17] = 0.0673417; /* V */ hmm->e[0][18] = 0.0114135; /* W */ hmm->e[0][19] = 0.0304133; /* Y */ /* average of MFS_1 core emissions */ hmm->e[1][0] = 0.1068; /* A */ hmm->e[1][1] = 0.0110; /* C */ hmm->e[1][2] = 0.0242; /* D */ hmm->e[1][3] = 0.0293; /* E */ hmm->e[1][4] = 0.0621; /* F */ hmm->e[1][5] = 0.0899; /* G */ hmm->e[1][6] = 0.0139; /* H */ hmm->e[1][7] = 0.0762; /* I */ hmm->e[1][8] = 0.0319; /* K */ hmm->e[1][9] = 0.1274; /* L */ hmm->e[1][10] = 0.0338; /* M */ hmm->e[1][11] = 0.0285; /* N */ hmm->e[1][12] = 0.0414; /* P */ hmm->e[1][13] = 0.0266; /* Q */ hmm->e[1][14] = 0.0375; /* R */ hmm->e[1][15] = 0.0747; /* S */ hmm->e[1][16] = 0.0568; /* T */ hmm->e[1][17] = 0.0815; /* V */ hmm->e[1][18] = 0.0161; /* W */ hmm->e[1][19] = 0.0303; /* Y */ hmm->pi[0] = 0.99; hmm->pi[1] = 0.01; esl_hmm_Configure(hmm, NULL); return hmm; }