/* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence, * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging. */ static void utest_basic(ESL_GETOPTS *go) { char *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n"; int fmt = eslMSAFILE_STOCKHOLM; char *targ = "GAATTC"; ESL_ALPHABET *abc = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_BG *bg = NULL; P7_PRIOR *pri = NULL; ESL_DSQ *dsq = NULL; P7_GMX *gx = NULL; P7_TRACE *tr = NULL; int L = strlen(targ); float vsc, vsc2, fsc; if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet"); if ((pri = p7_prior_CreateNucleic()) == NULL) esl_fatal("failed to create prior"); if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL) esl_fatal("failed to create MSA"); if (esl_msa_Digitize(abc, msa, NULL) != eslOK) esl_fatal("failed to digitize MSA"); if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model"); if (p7_ParameterEstimation(hmm, pri) != eslOK) esl_fatal("failed to parameterize GAATTC model"); if (p7_hmm_SetConsensus(hmm, NULL) != eslOK) esl_fatal("failed to make consensus"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create DNA null model"); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create GAATTC profile"); if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile"); if (p7_profile_Validate(gm, NULL, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!"); if (esl_abc_CreateDsq(abc, targ, &dsq) != eslOK) esl_fatal("failed to create GAATTC digital sequence"); if ((gx = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("failed to create DP matrix"); if ((tr = p7_trace_Create()) == NULL) esl_fatal("trace creation failed"); p7_GViterbi (dsq, L, gm, gx, &vsc); if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_GTrace (dsq, L, gm, gx, tr); p7_trace_Score(tr, dsq, gm, &vsc2); if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq); if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK) esl_fatal("trace score and Viterbi score don't agree."); p7_GForward (dsq, L, gm, gx, &fsc); if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_trace_Destroy(tr); p7_gmx_Destroy(gx); free(dsq); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); p7_prior_Destroy(pri); esl_alphabet_Destroy(abc); return; }
int main(int argc, char **argv) { ESL_ALPHABET *aa_abc = NULL, *nt_abc = NULL; ESL_MSA *msa1 = NULL, *msa2 = NULL, *msa3 = NULL, *msa4 = NULL, *msa5 = NULL; double uniform[5] = { 1.0, 1.0, 1.0, 1.0, 1.0 }; double wgt2[5] = { 0.833333, 0.833333, 0.833333, 0.833333, 1.66667 }; /* GSC, PB give same answer */ double gsc3[4] = { 1.125000, 0.875000, 0.875000, 1.125000 }; double pb3[4] = { 1.066667, 1.066667, 0.800000, 1.066667 }; double blosum3[4] = { 1.333333, 0.666667, 0.666667, 1.333333 }; double gsc4[4] = { 0.760870, 0.760870, 1.086957, 1.391304 }; double pb4[4] = { 0.800000, 0.800000, 1.000000, 1.400000 }; double blosum4[4] = { 0.666667, 0.666667, 1.333333, 1.333333 }; if ((aa_abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("failed to create amino alphabet"); if ((nt_abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create DNA alphabet"); /* msa1: all sequences identical. Any weighting method should assign uniform weights. * msa2: "contrived" example of [Henikoff94b]. "Correct" solution is 1==2, 3==4, and 5==2x other weights. * msa3: the "nitrogenase segments" example of [Henikoff94b]. * msa4: alignment that makes the same distances as Figure 4 from [Gerstein94] * msa5: gap pathology. no information here, so weighting methods should resort to uniform weights. */ if ((msa1 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nseq1 AAAAA\nseq2 AAAAA\nseq3 AAAAA\nseq4 AAAAA\nseq5 AAAAA\n//\n", eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 1 creation failed"); if ((msa2 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nseq1 AAAAA\nseq2 AAAAA\nseq3 CCCCC\nseq4 CCCCC\nseq5 TTTTT\n//\n", eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 2 creation failed"); if ((msa3 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nNIFE_CLOPA GYVGS\nNIFD_AZOVI GFDGF\nNIFD_BRAJA GYDGF\nNIFK_ANASP GYQGG\n//\n", eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 3 creation failed"); if ((msa4 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nA AAAAAAAAAA\nB TTAAAAAAAA\nC ATAAAACCCC\nD GGGAAGGGGG\n//\n", eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 4 creation failed"); if ((msa5 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nA A----\nB -C---\nC --G--\nD ---T-\nE ----T\n//\n", eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 5 creation failed"); utest_GSC(aa_abc, msa1, uniform); utest_GSC(nt_abc, msa1, uniform); utest_GSC(aa_abc, msa2, wgt2); utest_GSC(nt_abc, msa2, wgt2); utest_GSC(aa_abc, msa3, gsc3); /* no nt test on msa3: it's protein-only */ utest_GSC(aa_abc, msa4, gsc4); utest_GSC(nt_abc, msa4, gsc4); utest_GSC(aa_abc, msa5, uniform); utest_GSC(aa_abc, msa5, uniform); utest_PB(aa_abc, msa1, uniform); utest_PB(nt_abc, msa1, uniform); utest_PB(aa_abc, msa2, wgt2); utest_PB(nt_abc, msa2, wgt2); utest_PB(aa_abc, msa3, pb3); /* no nt test on msa3: it's protein-only */ utest_PB(aa_abc, msa4, pb4); utest_PB(nt_abc, msa4, pb4); utest_PB(aa_abc, msa5, uniform); utest_PB(nt_abc, msa5, uniform); utest_BLOSUM(aa_abc, msa1, 0.62, uniform); utest_BLOSUM(nt_abc, msa1, 0.62, uniform); utest_BLOSUM(aa_abc, msa2, 0.62, wgt2); utest_BLOSUM(nt_abc, msa2, 0.62, wgt2); utest_BLOSUM(aa_abc, msa3, 0.62, blosum3); /* no nt test on msa3: it's protein-only */ utest_BLOSUM(aa_abc, msa4, 0.62, blosum4); utest_BLOSUM(nt_abc, msa4, 0.62, blosum4); utest_BLOSUM(aa_abc, msa5, 0.62, uniform); utest_BLOSUM(nt_abc, msa5, 0.62, uniform); /* BLOSUM weights have the peculiar property of going flat at maxid=0.0 (everyone * clusters) or maxid=1.0 (nobody clusters). */ utest_BLOSUM(aa_abc, msa4, 0.0, uniform); utest_BLOSUM(aa_abc, msa4, 1.0, uniform); esl_msa_Destroy(msa1); esl_msa_Destroy(msa2); esl_msa_Destroy(msa3); esl_msa_Destroy(msa4); esl_msa_Destroy(msa5); esl_alphabet_Destroy(aa_abc); esl_alphabet_Destroy(nt_abc); exit(0); }