/* msa_shuffling() * SRE, Tue Jan 22 08:39:51 2008 [Market Street Cafe, Leesburg] * * Shuffling multiple sequence alignments */ static int msa_shuffling(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt) { char *msafile = esl_opt_GetArg(go, 1); int infmt = eslMSAFILE_UNKNOWN; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; ESL_MSA *shuf = NULL; int N = esl_opt_GetInteger(go, "-N"); int i; int status, mstatus; status = esl_msafile_Open(msafile, infmt, NULL, &afp); if (status == eslENOTFOUND) esl_fatal("Alignment file %s isn't readable\n", msafile); else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of %s\n", msafile); else if (status != eslOK) esl_fatal("Alignment file open failed (error %d)\n", status); while ((mstatus = esl_msa_Read(afp, &msa)) != eslEOF) { if (status == eslEFORMAT) esl_fatal("Alignment file parse error:\n%s\n", afp->errbuf); else if (status == eslEINVAL) esl_fatal("Alignment file parse error:\n%s\n", afp->errbuf); else if (status != eslOK) esl_fatal("Alignment file read failed with error code %d\n", status); shuf = esl_msa_Clone(msa); for (i = 0; i < N; i++) { if (esl_opt_GetBoolean(go, "--boot")) esl_msashuffle_Bootstrap(r, msa, shuf); else esl_msashuffle_Shuffle (r, msa, shuf); /* Set the name of the shuffled alignment */ if (msa->name != NULL) { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "%s-sample-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-sample", msa->name); } else { if (N > 1) esl_msa_FormatName(shuf, "%s-shuffle-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-shuffle", msa->name); } } else { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "sample-%d", i); else esl_msa_FormatName(shuf, "sample"); } else { if (N > 1) esl_msa_FormatName(shuf, "shuffle-%d", i); else esl_msa_FormatName(shuf, "shuffle"); } } esl_msa_Write(ofp, shuf, outfmt); } esl_msa_Destroy(shuf); esl_msa_Destroy(msa); } return eslOK; }
/* msa_shuffling() * * Shuffling multiple sequence alignments */ static int msa_shuffling(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt) { char *msafile = esl_opt_GetArg(go, 1); int infmt = eslMSAFILE_UNKNOWN; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; ESL_MSA *shuf = NULL; int N = esl_opt_GetInteger(go, "-N"); int i; int status; if ( (status = esl_msafile_Open(NULL, msafile, NULL, infmt, NULL, &afp)) != eslOK) esl_msafile_OpenFailure(afp, status); while ((status = esl_msafile_Read(afp, &msa)) != eslEOF) { if (status != eslOK) esl_msafile_ReadFailure(afp, status); shuf = esl_msa_Clone(msa); for (i = 0; i < N; i++) { if (esl_opt_GetBoolean(go, "--boot")) esl_msashuffle_Bootstrap(r, msa, shuf); else esl_msashuffle_Shuffle (r, msa, shuf); /* Set the name of the shuffled alignment */ if (msa->name != NULL) { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "%s-sample-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-sample", msa->name); } else { if (N > 1) esl_msa_FormatName(shuf, "%s-shuffle-%d", msa->name, i); else esl_msa_FormatName(shuf, "%s-shuffle", msa->name); } } else { if (esl_opt_GetBoolean(go, "--boot")) { if (N > 1) esl_msa_FormatName(shuf, "sample-%d", i); else esl_msa_FormatName(shuf, "sample"); } else { if (N > 1) esl_msa_FormatName(shuf, "shuffle-%d", i); else esl_msa_FormatName(shuf, "shuffle"); } } esl_msafile_Write(ofp, shuf, afp->format); } esl_msa_Destroy(shuf); esl_msa_Destroy(msa); } esl_msafile_Close(afp); return eslOK; }
/* utest_fragments() * This exercises the building code that deals with fragments, * creating traces with B->X->{MDI}k and {MDI}k->X->E * transitions, and making sure we can make MSAs correctly * from them using p7_tracealign_MSA(). This code was initially * buggy when first written; bugs first detected by Elena, * Nov 2009 */ static void utest_fragments(void) { char *failmsg = "failure in build.c::utest_fragments() unit test"; char msafile[16] = "p7tmpXXXXXX"; /* tmpfile name template */ FILE *ofp = NULL; ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; ESL_MSA *dmsa = NULL; ESL_MSA *postmsa = NULL; P7_HMM *hmm = NULL; P7_TRACE **trarr = NULL; int i; /* Write an MSA that tests fragment/missing data transitions. * When built with Handmodelmaker (using the RF line): * seq1 forces B->X->Mk and Mk->X->E missing data transitions; * seq2 forces B->X->Ik and Ik->X->E missing data transitions; * seq3 forces B->X->Dk and Dk->X->E missing data transitions. * * The first two cases can arise from fragment definition in * model construction, or in an input file. * * The X->Dk and Dk->X cases should never happen, but we don't * prohibit them. They can only arise in an input file, because * esl_msa_MarkFragments() converts everything before/after * first/last residue to ~, and won't leave a gap character in * between. * * There's nothing being tested by seq4 and seq5; they're just there. */ if (esl_tmpfile_named(msafile, &ofp) != eslOK) esl_fatal(failmsg); fprintf(ofp, "# STOCKHOLM 1.0\n"); fprintf(ofp, "#=GC RF xxxxx.xxxxxxxxxxxx.xxx\n"); fprintf(ofp, "seq1 ~~~~~~GHIKLMNPQRST~~~~\n"); fprintf(ofp, "seq2 ~~~~~aGHIKLMNPQRSTa~~~\n"); fprintf(ofp, "seq3 ~~~~~~~HIKLMNPQRS~~~~~\n"); fprintf(ofp, "seq4 ACDEF.GHIKLMNPQRST.VWY\n"); fprintf(ofp, "seq5 ACDEF.GHIKLMNPQRST.VWY\n"); fprintf(ofp, "//\n"); fclose(ofp); /* Read the original as text for comparison to postmsa. Make a digital copy for construction */ if (esl_msafile_Open(NULL, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp)!= eslOK) esl_fatal(failmsg); if (esl_msafile_Read(afp, &msa) != eslOK) esl_fatal(failmsg); if ((dmsa = esl_msa_Clone(msa)) == NULL) esl_fatal(failmsg); if (esl_msa_Digitize(abc, dmsa, NULL) != eslOK) esl_fatal(failmsg); if (p7_Handmodelmaker(dmsa, NULL, &hmm, &trarr) != eslOK) esl_fatal(failmsg); for (i = 0; i < dmsa->nseq; i++) if (p7_trace_Validate(trarr[i], abc, dmsa->ax[i], NULL) != eslOK) esl_fatal(failmsg); /* The example is contrived such that the traces should give exactly the * same (text) alignment as the input alignment; no tracedoctoring. * Not a trivial test; for example, sequence 2 has a B->X->I transition that * can be problematic to handle. */ if (p7_tracealign_MSA(dmsa, trarr, hmm->M, p7_DEFAULT, &postmsa) != eslOK) esl_fatal(failmsg); for (i = 0; i < msa->nseq; i++) if (strcmp(msa->aseq[i], postmsa->aseq[i]) != 0) esl_fatal(failmsg); p7_trace_DestroyArray(trarr, msa->nseq); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); esl_msa_Destroy(dmsa); esl_msa_Destroy(postmsa); esl_msafile_Close(afp); esl_alphabet_Destroy(abc); remove(msafile); return; }