Example #1
0
/* msa_shuffling()
 * SRE, Tue Jan 22 08:39:51 2008 [Market Street Cafe, Leesburg]
 * 
 * Shuffling multiple sequence alignments
 */
static int
msa_shuffling(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt)
{
  char        *msafile = esl_opt_GetArg(go, 1);
  int          infmt   = eslMSAFILE_UNKNOWN;
  ESL_MSAFILE *afp     = NULL;
  ESL_MSA     *msa     = NULL;
  ESL_MSA     *shuf    = NULL;
  int          N       = esl_opt_GetInteger(go, "-N");
  int          i;
  int          status, mstatus;

  status = esl_msafile_Open(msafile, infmt, NULL, &afp);
  if (status == eslENOTFOUND)    esl_fatal("Alignment file %s isn't readable\n", msafile);
  else if (status == eslEFORMAT) esl_fatal("Couldn't determine format of %s\n",  msafile);
  else if (status != eslOK)      esl_fatal("Alignment file open failed (error %d)\n", status);
  
  while ((mstatus = esl_msa_Read(afp, &msa)) != eslEOF)
    {
      if      (status == eslEFORMAT) esl_fatal("Alignment file parse error:\n%s\n", afp->errbuf);
      else if (status == eslEINVAL)  esl_fatal("Alignment file parse error:\n%s\n", afp->errbuf);
      else if (status != eslOK)      esl_fatal("Alignment file read failed with error code %d\n", status);

      shuf = esl_msa_Clone(msa);

      for (i = 0; i < N; i++)
	{
	  if (esl_opt_GetBoolean(go, "--boot")) esl_msashuffle_Bootstrap(r, msa, shuf);
	  else                                  esl_msashuffle_Shuffle  (r, msa, shuf);

	  /* Set the name of the shuffled alignment */
	  if (msa->name != NULL) {
	    if (esl_opt_GetBoolean(go, "--boot")) {
	      if (N > 1) esl_msa_FormatName(shuf, "%s-sample-%d", msa->name, i);
	      else       esl_msa_FormatName(shuf, "%s-sample",    msa->name);
	    } else {
	      if (N > 1) esl_msa_FormatName(shuf, "%s-shuffle-%d", msa->name, i);
	      else       esl_msa_FormatName(shuf, "%s-shuffle",    msa->name);
	    }
	  } else {
	    if (esl_opt_GetBoolean(go, "--boot")) {
	      if (N > 1) esl_msa_FormatName(shuf, "sample-%d", i);
	      else       esl_msa_FormatName(shuf, "sample");
	    } else {
	      if (N > 1) esl_msa_FormatName(shuf, "shuffle-%d", i);
	      else       esl_msa_FormatName(shuf, "shuffle");
	    }
	  }

	  esl_msa_Write(ofp, shuf, outfmt);
	}

      esl_msa_Destroy(shuf);
      esl_msa_Destroy(msa);
    }

  return eslOK;
}
Example #2
0
/* msa_shuffling()
 * 
 * Shuffling multiple sequence alignments
 */
static int
msa_shuffling(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt)
{
  char         *msafile = esl_opt_GetArg(go, 1);
  int           infmt   = eslMSAFILE_UNKNOWN;
  ESL_MSAFILE  *afp     = NULL;
  ESL_MSA      *msa     = NULL;
  ESL_MSA      *shuf    = NULL;
  int           N       = esl_opt_GetInteger(go, "-N");
  int           i;
  int           status;

  if ( (status = esl_msafile_Open(NULL, msafile, NULL, infmt, NULL, &afp)) != eslOK)
    esl_msafile_OpenFailure(afp, status);
  
  while ((status = esl_msafile_Read(afp, &msa)) != eslEOF)
    {
      if (status != eslOK) esl_msafile_ReadFailure(afp, status);

      shuf = esl_msa_Clone(msa);

      for (i = 0; i < N; i++)
	{
	  if (esl_opt_GetBoolean(go, "--boot")) esl_msashuffle_Bootstrap(r, msa, shuf);
	  else                                  esl_msashuffle_Shuffle  (r, msa, shuf);

	  /* Set the name of the shuffled alignment */
	  if (msa->name != NULL) {
	    if (esl_opt_GetBoolean(go, "--boot")) {
	      if (N > 1) esl_msa_FormatName(shuf, "%s-sample-%d", msa->name, i);
	      else       esl_msa_FormatName(shuf, "%s-sample",    msa->name);
	    } else {
	      if (N > 1) esl_msa_FormatName(shuf, "%s-shuffle-%d", msa->name, i);
	      else       esl_msa_FormatName(shuf, "%s-shuffle",    msa->name);
	    }
	  } else {
	    if (esl_opt_GetBoolean(go, "--boot")) {
	      if (N > 1) esl_msa_FormatName(shuf, "sample-%d", i);
	      else       esl_msa_FormatName(shuf, "sample");
	    } else {
	      if (N > 1) esl_msa_FormatName(shuf, "shuffle-%d", i);
	      else       esl_msa_FormatName(shuf, "shuffle");
	    }
	  }

	  esl_msafile_Write(ofp, shuf, afp->format);
	}

      esl_msa_Destroy(shuf);
      esl_msa_Destroy(msa);
    }

  esl_msafile_Close(afp);
  return eslOK;
}
Example #3
0
/* utest_fragments()
 * This exercises the building code that deals with fragments,
 * creating traces with B->X->{MDI}k and {MDI}k->X->E 
 * transitions, and making sure we can make MSAs correctly
 * from them using p7_tracealign_MSA(). This code was initially
 * buggy when first written; bugs first detected by Elena, 
 * Nov 2009
 */
static void
utest_fragments(void)
{
  char         *failmsg      = "failure in build.c::utest_fragments() unit test";
  char          msafile[16]  = "p7tmpXXXXXX"; /* tmpfile name template */
  FILE         *ofp          = NULL;
  ESL_ALPHABET *abc          = esl_alphabet_Create(eslAMINO);
  ESL_MSAFILE  *afp          = NULL;
  ESL_MSA      *msa          = NULL;
  ESL_MSA      *dmsa         = NULL;
  ESL_MSA      *postmsa      = NULL;
  P7_HMM       *hmm          = NULL;
  P7_TRACE    **trarr        = NULL;
  int           i;

  /* Write an MSA that tests fragment/missing data transitions. 
   * When built with Handmodelmaker (using the RF line):
   *   seq1 forces B->X->Mk and Mk->X->E missing data transitions; 
   *   seq2 forces B->X->Ik and Ik->X->E missing data transitions;
   *   seq3 forces B->X->Dk and Dk->X->E missing data transitions.
   *
   * The first two cases can arise from fragment definition in
   * model construction, or in an input file. 
   *
   * The X->Dk and Dk->X cases should never happen, but we don't
   * prohibit them. They can only arise in an input file, because
   * esl_msa_MarkFragments() converts everything before/after
   * first/last residue to ~, and won't leave a gap character in
   * between.
   *
   * There's nothing being tested by seq4 and seq5; they're just there.
   */
  if (esl_tmpfile_named(msafile, &ofp) != eslOK) esl_fatal(failmsg);
  fprintf(ofp, "# STOCKHOLM 1.0\n");
  fprintf(ofp, "#=GC RF xxxxx.xxxxxxxxxxxx.xxx\n");
  fprintf(ofp, "seq1    ~~~~~~GHIKLMNPQRST~~~~\n");
  fprintf(ofp, "seq2    ~~~~~aGHIKLMNPQRSTa~~~\n");
  fprintf(ofp, "seq3    ~~~~~~~HIKLMNPQRS~~~~~\n");
  fprintf(ofp, "seq4    ACDEF.GHIKLMNPQRST.VWY\n");
  fprintf(ofp, "seq5    ACDEF.GHIKLMNPQRST.VWY\n");
  fprintf(ofp, "//\n");
  fclose(ofp);

  /* Read the original as text for comparison to postmsa. Make a digital copy for construction */
  if (esl_msafile_Open(NULL, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp)!= eslOK) esl_fatal(failmsg);
  if (esl_msafile_Read(afp, &msa)                                          != eslOK) esl_fatal(failmsg);
  if ((dmsa = esl_msa_Clone(msa))                                           == NULL)  esl_fatal(failmsg);
  if (esl_msa_Digitize(abc, dmsa, NULL)                                     != eslOK) esl_fatal(failmsg);

  if (p7_Handmodelmaker(dmsa, NULL, &hmm, &trarr)                           != eslOK) esl_fatal(failmsg);
  for (i = 0; i < dmsa->nseq; i++)
    if (p7_trace_Validate(trarr[i], abc, dmsa->ax[i], NULL)                 != eslOK) esl_fatal(failmsg);

  /* The example is contrived such that the traces should give exactly the
   * same (text) alignment as the input alignment; no tracedoctoring.
   * Not a trivial test; for example, sequence 2 has a B->X->I transition that 
   * can be problematic to handle.
   */
  if (p7_tracealign_MSA(dmsa, trarr, hmm->M, p7_DEFAULT, &postmsa)          != eslOK) esl_fatal(failmsg);
  for (i = 0; i < msa->nseq; i++)
    if (strcmp(msa->aseq[i], postmsa->aseq[i]) != 0) esl_fatal(failmsg);

  p7_trace_DestroyArray(trarr, msa->nseq);
  p7_hmm_Destroy(hmm);
  esl_msa_Destroy(msa);
  esl_msa_Destroy(dmsa);
  esl_msa_Destroy(postmsa);
  esl_msafile_Close(afp);
  esl_alphabet_Destroy(abc);
  remove(msafile);
  return;
}