/* make_post_msa() * * Optionally, we can return the alignment we actually built the model * from (including RF annotation on assigned consensus columns, and any * trace doctoring to enforce Plan7 consistency). */ static int make_post_msa(P7_BUILDER *bld, const ESL_MSA *premsa, const P7_HMM *hmm, P7_TRACE **tr, ESL_MSA **opt_postmsa) { ESL_MSA *postmsa = NULL; int optflags = p7_DEFAULT; int status; if (opt_postmsa == NULL) return eslOK; /* someday we might want to transfer more info from HMM to postmsa */ if ((status = p7_tracealign_MSA(premsa, tr, hmm->M, optflags, &postmsa)) != eslOK) goto ERROR; *opt_postmsa = postmsa; return eslOK; ERROR: if (postmsa != NULL) esl_msa_Destroy(postmsa); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *msafile = esl_opt_GetArg(go, 1); int fmt = eslMSAFILE_UNKNOWN; ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_PRIOR *prior = NULL; P7_TRACE **trarr = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; ESL_MSA *postmsa = NULL; int i; int status; /* Standard idioms for opening and reading a digital MSA. (See esl_msafile.c example). */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); if ((status = esl_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp)) != eslOK) esl_msafile_OpenFailure(afp, status); bg = p7_bg_Create(abc); switch (abc->type) { case eslAMINO: prior = p7_prior_CreateAmino(); break; case eslDNA: prior = p7_prior_CreateNucleic(); break; case eslRNA: prior = p7_prior_CreateNucleic(); break; default: prior = p7_prior_CreateLaplace(abc); break; } if (prior == NULL) esl_fatal("Failed to initialize prior"); while ((status = esl_msafile_Read(afp, &msa)) != eslEOF) { if (status != eslOK) esl_msafile_ReadFailure(afp, status); /* The modelmakers collect counts in an HMM structure */ status = p7_Handmodelmaker(msa, NULL, &hmm, &trarr); if (status == eslENORESULT) esl_fatal("no consensus columns in alignment %s\n", msa->name); else if (status != eslOK) esl_fatal("failed to build HMM from alignment %s\n", msa->name); printf("COUNTS:\n"); p7_hmm_Dump(stdout, hmm); /* These counts, in combination with a prior, are converted to probability parameters */ status = p7_ParameterEstimation(hmm, prior); if (status != eslOK) esl_fatal("failed to parameterize HMM for %s", msa->name); printf("PROBABILITIES:\n"); p7_hmm_Dump(stdout, hmm); /* Just so we can dump a more informatively annotated trace - build a profile */ gm = p7_profile_Create(hmm->M, abc); p7_profile_Config (gm, hmm, bg); p7_profile_SetLength(gm, 400); /* Dump the individual traces */ for (i = 0; i < msa->nseq; i++) { printf("Trace %d: %s\n", i+1, msa->sqname[i]); p7_trace_DumpAnnotated(stdout, trarr[i], gm, msa->ax[i]); } /* Create an MSA from the individual traces */ status = p7_tracealign_MSA(msa, trarr, hmm->M, p7_DEFAULT, &postmsa); if (status != eslOK) esl_fatal("failed to create new MSA from traces\n"); esl_msafile_Write(stdout, postmsa, eslMSAFILE_PFAM); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); p7_trace_DestroyArray(trarr, msa->nseq); esl_msa_Destroy(postmsa); esl_msa_Destroy(msa); } esl_msafile_Close(afp); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
/* utest_fragments() * This exercises the building code that deals with fragments, * creating traces with B->X->{MDI}k and {MDI}k->X->E * transitions, and making sure we can make MSAs correctly * from them using p7_tracealign_MSA(). This code was initially * buggy when first written; bugs first detected by Elena, * Nov 2009 */ static void utest_fragments(void) { char *failmsg = "failure in build.c::utest_fragments() unit test"; char msafile[16] = "p7tmpXXXXXX"; /* tmpfile name template */ FILE *ofp = NULL; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; ESL_MSA *dmsa = NULL; ESL_MSA *postmsa = NULL; P7_HMM *hmm = NULL; P7_TRACE **trarr = NULL; int i; /* Write an MSA that tests fragment/missing data transitions. * When built with Handmodelmaker (using the RF line): * seq1 forces B->X->Mk and Mk->X->E missing data transitions; * seq2 forces B->X->Ik and Ik->X->E missing data transitions; * seq3 forces B->X->Dk and Dk->X->E missing data transitions. * * The first two cases can arise from fragment definition in * model construction, or in an input file. * * The X->Dk and Dk->X cases should never happen, but we don't * prohibit them. They can only arise in an input file, because * esl_msa_MarkFragments() converts everything before/after * first/last residue to ~, and won't leave a gap character in * between. * * There's nothing being tested by seq4 and seq5; they're just there. */ if (esl_tmpfile_named(msafile, &ofp) != eslOK) esl_fatal(failmsg); fprintf(ofp, "# STOCKHOLM 1.0\n"); fprintf(ofp, "#=GC RF xxxxx.xxxxxxxxxxxx.xxx\n"); fprintf(ofp, "seq1 ~~~~~~GHIKLMNPQRST~~~~\n"); fprintf(ofp, "seq2 ~~~~~aGHIKLMNPQRSTa~~~\n"); fprintf(ofp, "seq3 ~~~~~~~HIKLMNPQRS~~~~~\n"); fprintf(ofp, "seq4 ACDEF.GHIKLMNPQRST.VWY\n"); fprintf(ofp, "seq5 ACDEF.GHIKLMNPQRST.VWY\n"); fprintf(ofp, "//\n"); fclose(ofp); /* Read the original as text for comparison to postmsa. Make a digital copy for construction */ if (esl_msafile_Open(NULL, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp)!= eslOK) esl_fatal(failmsg); if (esl_msafile_Read(afp, &msa) != eslOK) esl_fatal(failmsg); if ((dmsa = esl_msa_Clone(msa)) == NULL) esl_fatal(failmsg); if (esl_msa_Digitize(abc, dmsa, NULL) != eslOK) esl_fatal(failmsg); if (p7_Handmodelmaker(dmsa, NULL, &hmm, &trarr) != eslOK) esl_fatal(failmsg); for (i = 0; i < dmsa->nseq; i++) if (p7_trace_Validate(trarr[i], abc, dmsa->ax[i], NULL) != eslOK) esl_fatal(failmsg); /* The example is contrived such that the traces should give exactly the * same (text) alignment as the input alignment; no tracedoctoring. * Not a trivial test; for example, sequence 2 has a B->X->I transition that * can be problematic to handle. */ if (p7_tracealign_MSA(dmsa, trarr, hmm->M, p7_DEFAULT, &postmsa) != eslOK) esl_fatal(failmsg); for (i = 0; i < msa->nseq; i++) if (strcmp(msa->aseq[i], postmsa->aseq[i]) != 0) esl_fatal(failmsg); p7_trace_DestroyArray(trarr, msa->nseq); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); esl_msa_Destroy(dmsa); esl_msa_Destroy(postmsa); esl_msafile_Close(afp); esl_alphabet_Destroy(abc); remove(msafile); return; }