/* build_model(): * Given <msa>, choose HMM architecture, collect counts; * upon return, <*ret_hmm> is newly allocated and contains * relative-weighted observed counts. * Optionally, caller can request an array of inferred traces for * the <msa> too. */ static int build_model(P7_BUILDER *bld, ESL_MSA *msa, P7_HMM **ret_hmm, P7_TRACE ***opt_tr) { int status; if (bld->arch_strategy == p7_ARCH_FAST) { status = p7_Fastmodelmaker( msa, bld->symfrac, bld, ret_hmm, opt_tr); if (status == eslENORESULT) ESL_XFAIL(status, bld->errbuf, "Alignment %s has no consensus columns w/ > %d%% residues - can't build a model.\n", msa->name != NULL ? msa->name : "", (int) (100 * bld->symfrac)); else if (status == eslEMEM) ESL_XFAIL(status, bld->errbuf, "Memory allocation failure in model construction.\n"); else if (status != eslOK) ESL_XFAIL(status, bld->errbuf, "internal error in model construction.\n"); } else if (bld->arch_strategy == p7_ARCH_HAND) { status = p7_Handmodelmaker( msa, bld, ret_hmm, opt_tr); if (status == eslENORESULT) ESL_XFAIL(status, bld->errbuf, "Alignment %s has no annotated consensus columns - can't build a model.\n", msa->name != NULL ? msa->name : ""); else if (status == eslEFORMAT) ESL_XFAIL(status, bld->errbuf, "Alignment %s has no reference annotation line\n", msa->name != NULL ? msa->name : ""); else if (status == eslEMEM) ESL_XFAIL(status, bld->errbuf, "Memory allocation failure in model construction.\n"); else if (status != eslOK) ESL_XFAIL(status, bld->errbuf, "internal error in model construction.\n"); } return eslOK; ERROR: return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *msafile = esl_opt_GetArg(go, 1); int fmt = eslMSAFILE_UNKNOWN; ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_PRIOR *prior = NULL; P7_TRACE **trarr = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; ESL_MSA *postmsa = NULL; int i; int status; /* Standard idioms for opening and reading a digital MSA. (See esl_msafile.c example). */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); if ((status = esl_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp)) != eslOK) esl_msafile_OpenFailure(afp, status); bg = p7_bg_Create(abc); switch (abc->type) { case eslAMINO: prior = p7_prior_CreateAmino(); break; case eslDNA: prior = p7_prior_CreateNucleic(); break; case eslRNA: prior = p7_prior_CreateNucleic(); break; default: prior = p7_prior_CreateLaplace(abc); break; } if (prior == NULL) esl_fatal("Failed to initialize prior"); while ((status = esl_msafile_Read(afp, &msa)) != eslEOF) { if (status != eslOK) esl_msafile_ReadFailure(afp, status); /* The modelmakers collect counts in an HMM structure */ status = p7_Handmodelmaker(msa, NULL, &hmm, &trarr); if (status == eslENORESULT) esl_fatal("no consensus columns in alignment %s\n", msa->name); else if (status != eslOK) esl_fatal("failed to build HMM from alignment %s\n", msa->name); printf("COUNTS:\n"); p7_hmm_Dump(stdout, hmm); /* These counts, in combination with a prior, are converted to probability parameters */ status = p7_ParameterEstimation(hmm, prior); if (status != eslOK) esl_fatal("failed to parameterize HMM for %s", msa->name); printf("PROBABILITIES:\n"); p7_hmm_Dump(stdout, hmm); /* Just so we can dump a more informatively annotated trace - build a profile */ gm = p7_profile_Create(hmm->M, abc); p7_profile_Config (gm, hmm, bg); p7_profile_SetLength(gm, 400); /* Dump the individual traces */ for (i = 0; i < msa->nseq; i++) { printf("Trace %d: %s\n", i+1, msa->sqname[i]); p7_trace_DumpAnnotated(stdout, trarr[i], gm, msa->ax[i]); } /* Create an MSA from the individual traces */ status = p7_tracealign_MSA(msa, trarr, hmm->M, p7_DEFAULT, &postmsa); if (status != eslOK) esl_fatal("failed to create new MSA from traces\n"); esl_msafile_Write(stdout, postmsa, eslMSAFILE_PFAM); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); p7_trace_DestroyArray(trarr, msa->nseq); esl_msa_Destroy(postmsa); esl_msa_Destroy(msa); } esl_msafile_Close(afp); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
/* utest_fragments() * This exercises the building code that deals with fragments, * creating traces with B->X->{MDI}k and {MDI}k->X->E * transitions, and making sure we can make MSAs correctly * from them using p7_tracealign_MSA(). This code was initially * buggy when first written; bugs first detected by Elena, * Nov 2009 */ static void utest_fragments(void) { char *failmsg = "failure in build.c::utest_fragments() unit test"; char msafile[16] = "p7tmpXXXXXX"; /* tmpfile name template */ FILE *ofp = NULL; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; ESL_MSA *dmsa = NULL; ESL_MSA *postmsa = NULL; P7_HMM *hmm = NULL; P7_TRACE **trarr = NULL; int i; /* Write an MSA that tests fragment/missing data transitions. * When built with Handmodelmaker (using the RF line): * seq1 forces B->X->Mk and Mk->X->E missing data transitions; * seq2 forces B->X->Ik and Ik->X->E missing data transitions; * seq3 forces B->X->Dk and Dk->X->E missing data transitions. * * The first two cases can arise from fragment definition in * model construction, or in an input file. * * The X->Dk and Dk->X cases should never happen, but we don't * prohibit them. They can only arise in an input file, because * esl_msa_MarkFragments() converts everything before/after * first/last residue to ~, and won't leave a gap character in * between. * * There's nothing being tested by seq4 and seq5; they're just there. */ if (esl_tmpfile_named(msafile, &ofp) != eslOK) esl_fatal(failmsg); fprintf(ofp, "# STOCKHOLM 1.0\n"); fprintf(ofp, "#=GC RF xxxxx.xxxxxxxxxxxx.xxx\n"); fprintf(ofp, "seq1 ~~~~~~GHIKLMNPQRST~~~~\n"); fprintf(ofp, "seq2 ~~~~~aGHIKLMNPQRSTa~~~\n"); fprintf(ofp, "seq3 ~~~~~~~HIKLMNPQRS~~~~~\n"); fprintf(ofp, "seq4 ACDEF.GHIKLMNPQRST.VWY\n"); fprintf(ofp, "seq5 ACDEF.GHIKLMNPQRST.VWY\n"); fprintf(ofp, "//\n"); fclose(ofp); /* Read the original as text for comparison to postmsa. Make a digital copy for construction */ if (esl_msafile_Open(NULL, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp)!= eslOK) esl_fatal(failmsg); if (esl_msafile_Read(afp, &msa) != eslOK) esl_fatal(failmsg); if ((dmsa = esl_msa_Clone(msa)) == NULL) esl_fatal(failmsg); if (esl_msa_Digitize(abc, dmsa, NULL) != eslOK) esl_fatal(failmsg); if (p7_Handmodelmaker(dmsa, NULL, &hmm, &trarr) != eslOK) esl_fatal(failmsg); for (i = 0; i < dmsa->nseq; i++) if (p7_trace_Validate(trarr[i], abc, dmsa->ax[i], NULL) != eslOK) esl_fatal(failmsg); /* The example is contrived such that the traces should give exactly the * same (text) alignment as the input alignment; no tracedoctoring. * Not a trivial test; for example, sequence 2 has a B->X->I transition that * can be problematic to handle. */ if (p7_tracealign_MSA(dmsa, trarr, hmm->M, p7_DEFAULT, &postmsa) != eslOK) esl_fatal(failmsg); for (i = 0; i < msa->nseq; i++) if (strcmp(msa->aseq[i], postmsa->aseq[i]) != 0) esl_fatal(failmsg); p7_trace_DestroyArray(trarr, msa->nseq); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); esl_msa_Destroy(dmsa); esl_msa_Destroy(postmsa); esl_msafile_Close(afp); esl_alphabet_Destroy(abc); remove(msafile); return; }