예제 #1
0
파일: build.c 프로젝트: EddyRivasLab/hmmer
int
main(int argc, char **argv)
{
  ESL_GETOPTS  *go        = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char         *msafile   = esl_opt_GetArg(go, 1);
  int           fmt       = eslMSAFILE_UNKNOWN;
  ESL_ALPHABET *abc       = NULL;
  ESL_MSAFILE  *afp       = NULL;
  ESL_MSA      *msa       = NULL;
  P7_HMM       *hmm       = NULL;
  P7_PRIOR     *prior     = NULL;
  P7_TRACE    **trarr     = NULL;
  P7_BG        *bg        = NULL;
  P7_PROFILE   *gm        = NULL;
  ESL_MSA      *postmsa   = NULL;
  int           i;
  int           status;
  
  /* Standard idioms for opening and reading a digital MSA. (See esl_msafile.c example). */
  if      (esl_opt_GetBoolean(go, "--rna"))   abc = esl_alphabet_Create(eslRNA);
  else if (esl_opt_GetBoolean(go, "--dna"))   abc = esl_alphabet_Create(eslDNA);
  else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO);

  if ((status = esl_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp)) != eslOK)
    esl_msafile_OpenFailure(afp, status);

  bg  = p7_bg_Create(abc);

  switch (abc->type) {
  case eslAMINO: prior = p7_prior_CreateAmino();      break;
  case eslDNA:   prior = p7_prior_CreateNucleic();    break;
  case eslRNA:   prior = p7_prior_CreateNucleic();    break;
  default:       prior = p7_prior_CreateLaplace(abc); break;
  }
  if (prior == NULL) esl_fatal("Failed to initialize prior");

  while ((status = esl_msafile_Read(afp, &msa)) != eslEOF)
    {
      if (status != eslOK) esl_msafile_ReadFailure(afp, status);

      /* The modelmakers collect counts in an HMM structure */
      status = p7_Handmodelmaker(msa, NULL, &hmm, &trarr);
      if      (status == eslENORESULT) esl_fatal("no consensus columns in alignment %s\n",  msa->name);
      else if (status != eslOK)        esl_fatal("failed to build HMM from alignment %s\n", msa->name);

      printf("COUNTS:\n");
      p7_hmm_Dump(stdout, hmm);

      /* These counts, in combination with a prior, are converted to probability parameters */
      status = p7_ParameterEstimation(hmm, prior);
      if (status != eslOK)             esl_fatal("failed to parameterize HMM for %s", msa->name);

      printf("PROBABILITIES:\n");
      p7_hmm_Dump(stdout, hmm);

      /* Just so we can dump a more informatively annotated trace - build a profile */
      gm = p7_profile_Create(hmm->M, abc);
      p7_profile_Config   (gm, hmm, bg);
      p7_profile_SetLength(gm, 400);

      /* Dump the individual traces */
      for (i = 0; i < msa->nseq; i++)
	{
	  printf("Trace %d: %s\n", i+1, msa->sqname[i]);
	  p7_trace_DumpAnnotated(stdout, trarr[i], gm, msa->ax[i]);
	}
      
      /* Create an MSA from the individual traces */
      status = p7_tracealign_MSA(msa, trarr, hmm->M, p7_DEFAULT, &postmsa);
      if (status != eslOK) esl_fatal("failed to create new MSA from traces\n");

      esl_msafile_Write(stdout, postmsa, eslMSAFILE_PFAM);

      p7_profile_Destroy(gm);
      p7_hmm_Destroy(hmm);
      p7_trace_DestroyArray(trarr, msa->nseq);
      esl_msa_Destroy(postmsa);
      esl_msa_Destroy(msa);
    }

  esl_msafile_Close(afp);
  p7_bg_Destroy(bg);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
예제 #2
0
/* Function:  p7_builder_Create()
 * Synopsis:  Create a default HMM construction configuration.
 *
 * Purpose:   Create a construction configuration for building
 *            HMMs in alphabet <abc>, and return a pointer to it.
 *            
 *            An application configuration <go> may optionally be
 *            provided. If <go> is <NULL>, default parameters are
 *            used. If <go> is non-<NULL>, it must include appropriate
 *            settings for all of the following ``standard build options'':
 *            
 *            Model construction:   --fast --hand --symfrac --fragthresh
 *            Relative weighting:   --wgsc --wblosum --wpb --wgiven --wid
 *            Effective seq #:      --eent --eclust --enone --eset --ere --esigma --eid
 *            Prior scheme:         --pnone --plaplace
 *            E-val calibration:    --EmL --EmN --EvL --EvN --EfL --EfN --Eft
 *            run-to-run variation: --seed
 *            
 *            See <hmmbuild.c> or other big users of the build
 *            pipeline for an example of appropriate <ESL_GETOPTS>
 *            initializations of these 24 options.
 */
P7_BUILDER *
p7_builder_Create(const ESL_GETOPTS *go, const ESL_ALPHABET *abc)
{
  P7_BUILDER *bld = NULL;
  int         seed;
  int         status;


  ESL_ALLOC(bld, sizeof(P7_BUILDER));
  bld->prior        = NULL;
  bld->r            = NULL;
  bld->S            = NULL;
  bld->Q            = NULL;
  bld->eset         = -1.0;	/* -1.0 = unset; must be set if effn_strategy is p7_EFFN_SET */
  bld->re_target    = -1.0;

  if (go == NULL) 
    {
      bld->arch_strategy = p7_ARCH_FAST;
      bld->wgt_strategy  = p7_WGT_PB;
      bld->effn_strategy = p7_EFFN_ENTROPY;
      seed               = 42;
    }
  else 
    {
      if      (esl_opt_GetBoolean(go, "--fast"))    bld->arch_strategy = p7_ARCH_FAST;
      else if (esl_opt_GetBoolean(go, "--hand"))    bld->arch_strategy = p7_ARCH_HAND;

      if      (esl_opt_GetBoolean(go, "--wpb"))     bld->wgt_strategy = p7_WGT_PB;
      else if (esl_opt_GetBoolean(go, "--wgsc"))    bld->wgt_strategy = p7_WGT_GSC;
      else if (esl_opt_GetBoolean(go, "--wblosum")) bld->wgt_strategy = p7_WGT_BLOSUM;
      else if (esl_opt_GetBoolean(go, "--wnone"))   bld->wgt_strategy = p7_WGT_NONE;
      else if (esl_opt_GetBoolean(go, "--wgiven"))  bld->wgt_strategy = p7_WGT_GIVEN;

      if      (esl_opt_GetBoolean(go, "--eent"))    bld->effn_strategy = p7_EFFN_ENTROPY;
      else if (esl_opt_GetBoolean(go, "--eclust"))  bld->effn_strategy = p7_EFFN_CLUST;
      else if (esl_opt_GetBoolean(go, "--enone"))   bld->effn_strategy = p7_EFFN_NONE;
      else if (esl_opt_IsOn      (go, "--eset"))  { bld->effn_strategy = p7_EFFN_SET;      bld->eset = esl_opt_GetReal(go, "--eset"); }

      seed = esl_opt_GetInteger(go, "--seed");
    }

  bld->max_insert_len = 0;

  /* The default RE target is alphabet dependent. */
  if (go != NULL &&  esl_opt_IsOn (go, "--ere")) 
    bld->re_target = esl_opt_GetReal(go, "--ere");
  else {
    switch (abc->type) {
    case eslAMINO:  bld->re_target = p7_ETARGET_AMINO; break;
    case eslDNA:    bld->re_target = p7_ETARGET_DNA;   break;
    case eslRNA:    bld->re_target = p7_ETARGET_DNA;   break;
    default:        bld->re_target = p7_ETARGET_OTHER; break;
    }
  }

  bld->symfrac    = (go != NULL) ?  esl_opt_GetReal   (go, "--symfrac")    : 0.5; 
  bld->fragthresh = (go != NULL) ?  esl_opt_GetReal   (go, "--fragthresh") : 0.5; 
  bld->wid        = (go != NULL) ?  esl_opt_GetReal   (go, "--wid")        : 0.62;
  bld->esigma     = (go != NULL) ?  esl_opt_GetReal   (go, "--esigma")     : 45.0;
  bld->eid        = (go != NULL) ?  esl_opt_GetReal   (go, "--eid")        : 0.62;
  bld->EmL        = (go != NULL) ?  esl_opt_GetInteger(go, "--EmL")        : 200;
  bld->EmN        = (go != NULL) ?  esl_opt_GetInteger(go, "--EmN")        : 200;
  bld->EvL        = (go != NULL) ?  esl_opt_GetInteger(go, "--EvL")        : 200;
  bld->EvN        = (go != NULL) ?  esl_opt_GetInteger(go, "--EvN")        : 200;
  bld->EfL        = (go != NULL) ?  esl_opt_GetInteger(go, "--EfL")        : 100;
  bld->EfN        = (go != NULL) ?  esl_opt_GetInteger(go, "--EfN")        : 200;
  bld->Eft        = (go != NULL) ?  esl_opt_GetReal   (go, "--Eft")        : 0.04;

  /* Normally we reinitialize the RNG to original seed before calibrating each model.
   * This eliminates run-to-run variation.
   * As a special case, seed==0 means choose an arbitrary seed and shut off the
   * reinitialization; this allows run-to-run variation.
   */

  bld->r            = esl_randomness_CreateFast(seed);
  bld->do_reseeding = (seed == 0) ? FALSE : TRUE;

  if      (go && esl_opt_GetBoolean(go, "--pnone") )     bld->prior = NULL;
  else if (go && esl_opt_GetBoolean(go, "--plaplace") )  bld->prior = p7_prior_CreateLaplace(abc);
  else
    {
      switch (abc->type) {
      case eslAMINO: bld->prior = p7_prior_CreateAmino();      break;
      case eslDNA:   bld->prior = p7_prior_CreateNucleic();    break;
      case eslRNA:   bld->prior = p7_prior_CreateNucleic();    break;
      default:       bld->prior = p7_prior_CreateLaplace(abc); break;
      }
      if (bld->prior == NULL) goto ERROR;
    }

  bld->abc       = abc;
  bld->errbuf[0] = '\0';

  bld->popen   = -1;
  bld->pextend = -1;

  return bld;
  
 ERROR:
  p7_builder_Destroy(bld);
  return NULL;
}