示例#1
0
/* build_model():
 * Given <msa>, choose HMM architecture, collect counts;
 * upon return, <*ret_hmm> is newly allocated and contains
 * relative-weighted observed counts.
 * Optionally, caller can request an array of inferred traces for
 * the <msa> too.
 */
static int
build_model(P7_BUILDER *bld, ESL_MSA *msa, P7_HMM **ret_hmm, P7_TRACE ***opt_tr)
{
  int status;

  if      (bld->arch_strategy == p7_ARCH_FAST)
    {
      status = p7_Fastmodelmaker( msa, bld->symfrac, bld, ret_hmm, opt_tr);
      if      (status == eslENORESULT) ESL_XFAIL(status, bld->errbuf, "Alignment %s has no consensus columns w/ > %d%% residues - can't build a model.\n", msa->name != NULL ? msa->name : "", (int) (100 * bld->symfrac));
      else if (status == eslEMEM)      ESL_XFAIL(status, bld->errbuf, "Memory allocation failure in model construction.\n");
      else if (status != eslOK)        ESL_XFAIL(status, bld->errbuf, "internal error in model construction.\n");      
    }
  else if (bld->arch_strategy == p7_ARCH_HAND)
    {
      status = p7_Handmodelmaker( msa, bld, ret_hmm, opt_tr);
      if      (status == eslENORESULT) ESL_XFAIL(status, bld->errbuf, "Alignment %s has no annotated consensus columns - can't build a model.\n", msa->name != NULL ? msa->name : "");
      else if (status == eslEFORMAT)   ESL_XFAIL(status, bld->errbuf, "Alignment %s has no reference annotation line\n", msa->name != NULL ? msa->name : "");            
      else if (status == eslEMEM)      ESL_XFAIL(status, bld->errbuf, "Memory allocation failure in model construction.\n");
      else if (status != eslOK)        ESL_XFAIL(status, bld->errbuf, "internal error in model construction.\n");
    }
  return eslOK;

 ERROR:
  return status;
}
示例#2
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS  *go        = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char         *msafile   = esl_opt_GetArg(go, 1);
  int           fmt       = eslMSAFILE_UNKNOWN;
  ESL_ALPHABET *abc       = NULL;
  ESL_MSAFILE  *afp       = NULL;
  ESL_MSA      *msa       = NULL;
  P7_HMM       *hmm       = NULL;
  P7_PRIOR     *prior     = NULL;
  P7_TRACE    **trarr     = NULL;
  P7_BG        *bg        = NULL;
  P7_PROFILE   *gm        = NULL;
  ESL_MSA      *postmsa   = NULL;
  int           i;
  int           status;
  
  /* Standard idioms for opening and reading a digital MSA. (See esl_msafile.c example). */
  if      (esl_opt_GetBoolean(go, "--rna"))   abc = esl_alphabet_Create(eslRNA);
  else if (esl_opt_GetBoolean(go, "--dna"))   abc = esl_alphabet_Create(eslDNA);
  else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO);

  if ((status = esl_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp)) != eslOK)
    esl_msafile_OpenFailure(afp, status);

  bg  = p7_bg_Create(abc);

  switch (abc->type) {
  case eslAMINO: prior = p7_prior_CreateAmino();      break;
  case eslDNA:   prior = p7_prior_CreateNucleic();    break;
  case eslRNA:   prior = p7_prior_CreateNucleic();    break;
  default:       prior = p7_prior_CreateLaplace(abc); break;
  }
  if (prior == NULL) esl_fatal("Failed to initialize prior");

  while ((status = esl_msafile_Read(afp, &msa)) != eslEOF)
    {
      if (status != eslOK) esl_msafile_ReadFailure(afp, status);

      /* The modelmakers collect counts in an HMM structure */
      status = p7_Handmodelmaker(msa, NULL, &hmm, &trarr);
      if      (status == eslENORESULT) esl_fatal("no consensus columns in alignment %s\n",  msa->name);
      else if (status != eslOK)        esl_fatal("failed to build HMM from alignment %s\n", msa->name);

      printf("COUNTS:\n");
      p7_hmm_Dump(stdout, hmm);

      /* These counts, in combination with a prior, are converted to probability parameters */
      status = p7_ParameterEstimation(hmm, prior);
      if (status != eslOK)             esl_fatal("failed to parameterize HMM for %s", msa->name);

      printf("PROBABILITIES:\n");
      p7_hmm_Dump(stdout, hmm);

      /* Just so we can dump a more informatively annotated trace - build a profile */
      gm = p7_profile_Create(hmm->M, abc);
      p7_profile_Config   (gm, hmm, bg);
      p7_profile_SetLength(gm, 400);

      /* Dump the individual traces */
      for (i = 0; i < msa->nseq; i++)
	{
	  printf("Trace %d: %s\n", i+1, msa->sqname[i]);
	  p7_trace_DumpAnnotated(stdout, trarr[i], gm, msa->ax[i]);
	}
      
      /* Create an MSA from the individual traces */
      status = p7_tracealign_MSA(msa, trarr, hmm->M, p7_DEFAULT, &postmsa);
      if (status != eslOK) esl_fatal("failed to create new MSA from traces\n");

      esl_msafile_Write(stdout, postmsa, eslMSAFILE_PFAM);

      p7_profile_Destroy(gm);
      p7_hmm_Destroy(hmm);
      p7_trace_DestroyArray(trarr, msa->nseq);
      esl_msa_Destroy(postmsa);
      esl_msa_Destroy(msa);
    }

  esl_msafile_Close(afp);
  p7_bg_Destroy(bg);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
示例#3
0
/* utest_fragments()
 * This exercises the building code that deals with fragments,
 * creating traces with B->X->{MDI}k and {MDI}k->X->E 
 * transitions, and making sure we can make MSAs correctly
 * from them using p7_tracealign_MSA(). This code was initially
 * buggy when first written; bugs first detected by Elena, 
 * Nov 2009
 */
static void
utest_fragments(void)
{
  char         *failmsg      = "failure in build.c::utest_fragments() unit test";
  char          msafile[16]  = "p7tmpXXXXXX"; /* tmpfile name template */
  FILE         *ofp          = NULL;
  ESL_ALPHABET *abc          = esl_alphabet_Create(eslAMINO);
  ESL_MSAFILE  *afp          = NULL;
  ESL_MSA      *msa          = NULL;
  ESL_MSA      *dmsa         = NULL;
  ESL_MSA      *postmsa      = NULL;
  P7_HMM       *hmm          = NULL;
  P7_TRACE    **trarr        = NULL;
  int           i;

  /* Write an MSA that tests fragment/missing data transitions. 
   * When built with Handmodelmaker (using the RF line):
   *   seq1 forces B->X->Mk and Mk->X->E missing data transitions; 
   *   seq2 forces B->X->Ik and Ik->X->E missing data transitions;
   *   seq3 forces B->X->Dk and Dk->X->E missing data transitions.
   *
   * The first two cases can arise from fragment definition in
   * model construction, or in an input file. 
   *
   * The X->Dk and Dk->X cases should never happen, but we don't
   * prohibit them. They can only arise in an input file, because
   * esl_msa_MarkFragments() converts everything before/after
   * first/last residue to ~, and won't leave a gap character in
   * between.
   *
   * There's nothing being tested by seq4 and seq5; they're just there.
   */
  if (esl_tmpfile_named(msafile, &ofp) != eslOK) esl_fatal(failmsg);
  fprintf(ofp, "# STOCKHOLM 1.0\n");
  fprintf(ofp, "#=GC RF xxxxx.xxxxxxxxxxxx.xxx\n");
  fprintf(ofp, "seq1    ~~~~~~GHIKLMNPQRST~~~~\n");
  fprintf(ofp, "seq2    ~~~~~aGHIKLMNPQRSTa~~~\n");
  fprintf(ofp, "seq3    ~~~~~~~HIKLMNPQRS~~~~~\n");
  fprintf(ofp, "seq4    ACDEF.GHIKLMNPQRST.VWY\n");
  fprintf(ofp, "seq5    ACDEF.GHIKLMNPQRST.VWY\n");
  fprintf(ofp, "//\n");
  fclose(ofp);

  /* Read the original as text for comparison to postmsa. Make a digital copy for construction */
  if (esl_msafile_Open(NULL, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp)!= eslOK) esl_fatal(failmsg);
  if (esl_msafile_Read(afp, &msa)                                          != eslOK) esl_fatal(failmsg);
  if ((dmsa = esl_msa_Clone(msa))                                           == NULL)  esl_fatal(failmsg);
  if (esl_msa_Digitize(abc, dmsa, NULL)                                     != eslOK) esl_fatal(failmsg);

  if (p7_Handmodelmaker(dmsa, NULL, &hmm, &trarr)                           != eslOK) esl_fatal(failmsg);
  for (i = 0; i < dmsa->nseq; i++)
    if (p7_trace_Validate(trarr[i], abc, dmsa->ax[i], NULL)                 != eslOK) esl_fatal(failmsg);

  /* The example is contrived such that the traces should give exactly the
   * same (text) alignment as the input alignment; no tracedoctoring.
   * Not a trivial test; for example, sequence 2 has a B->X->I transition that 
   * can be problematic to handle.
   */
  if (p7_tracealign_MSA(dmsa, trarr, hmm->M, p7_DEFAULT, &postmsa)          != eslOK) esl_fatal(failmsg);
  for (i = 0; i < msa->nseq; i++)
    if (strcmp(msa->aseq[i], postmsa->aseq[i]) != 0) esl_fatal(failmsg);

  p7_trace_DestroyArray(trarr, msa->nseq);
  p7_hmm_Destroy(hmm);
  esl_msa_Destroy(msa);
  esl_msa_Destroy(dmsa);
  esl_msa_Destroy(postmsa);
  esl_msafile_Close(afp);
  esl_alphabet_Destroy(abc);
  remove(msafile);
  return;
}