예제 #1
0
/* seq_generation()
 *
 * Generating sequences.
 */
static int
seq_generation(ESL_GETOPTS *go, ESL_RANDOMNESS *r, FILE *ofp, int outfmt)
{
  ESL_ALPHABET *abc = NULL;
  ESL_SQ       *sq  = NULL;
  double       *fq  = NULL;
  int           alphatype = eslUNKNOWN;   // static checkers can't see that 1 of --rna, --dna, --amino must be true
  int           N         = esl_opt_GetInteger(go, "-N");
  int           L         = esl_opt_GetInteger(go, "-L");
  int           i;
  int           status;

  if (L <= 0) esl_fatal("To generate sequences, set -L option (length of generated seqs) > 0 ");
  if (esl_opt_GetBoolean(go, "--rna"))   alphatype = eslRNA;
  if (esl_opt_GetBoolean(go, "--dna"))   alphatype = eslDNA;
  if (esl_opt_GetBoolean(go, "--amino")) alphatype = eslAMINO;
  abc = esl_alphabet_Create(alphatype);
  sq  = esl_sq_CreateDigital(abc);
  esl_sq_GrowTo(sq, L);

  /* Pick the iid frequency distribution to use */
  ESL_ALLOC(fq, sizeof(double) * abc->K);
  switch (alphatype) {
  case eslRNA:
  case eslDNA:    esl_vec_DSet(fq, 4, 0.25); break;
  case eslAMINO:  esl_composition_SW34(fq);  break;
  default:        esl_vec_DSet(fq, abc->K, 1.0 / (double) abc->K); break;
  }
    
  /* generate */
  for (i = 0; i < N; i++)
    {
      esl_rsq_xIID(r, fq, abc->K, L, sq->dsq);
      if (N > 1) esl_sq_FormatName(sq, "random%d", i);
      else       esl_sq_SetName(sq, "random");
      sq->n = L;
      esl_sqio_Write(ofp, sq, outfmt, FALSE);
    }

  free(fq);
  esl_alphabet_Destroy(abc);
  esl_sq_Destroy(sq);
  return eslOK;

 ERROR:
  if (fq != NULL) free(fq);
  esl_alphabet_Destroy(abc);
  esl_sq_Destroy(sq);
  return status;
}
예제 #2
0
static void
utest_Compare(void)
{
  ESL_RANDOMNESS *r    = esl_randomness_CreateFast(42);
  ESL_ALPHABET   *abc  = esl_alphabet_Create(eslAMINO);
  P7_HMM         *hmm  = NULL;
  P7_BG          *bg   = NULL;
  P7_PROFILE     *gm   = NULL;
  P7_PROFILE     *gm2  = NULL;
  int             M    = 200;
  int             L    = 400;

  p7_modelsample(r, M, abc, &hmm); /* master and worker's sampled profiles are identical */
  bg  = p7_bg_Create(abc);

  gm  = p7_profile_Create(hmm->M, abc);
  gm2 = p7_profile_Create(hmm->M, abc);

  p7_profile_Config(gm,  hmm, bg);
  p7_profile_Config(gm2, hmm, bg);

  p7_profile_SetLength(gm,  L);
  p7_profile_SetLength(gm2, L);

  if (p7_profile_Compare(gm, gm2, 0.001) != eslOK) p7_Die("identical profile comparison failed");
  
  p7_profile_Destroy(gm);
  p7_profile_Destroy(gm2);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_randomness_Destroy(r);
  return;
}
예제 #3
0
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;

  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
  p7_hmmfile_Close(hfp);

  esl_stopwatch_Start(w);
  while (N--)
    { /*                cfg   rng   bg    gm    om  */
      p7_Calibrate(hmm, NULL, NULL, NULL, NULL, NULL);
    }
  esl_stopwatch_Stop(w);
  esl_stopwatch_Display(stdout, w, "# CPU time: ");

  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_getopts_Destroy(go);
  return 0;
}
예제 #4
0
파일: p7_bg.c 프로젝트: TuftsBCB/SMURFBuild
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  char           *hmmfile = esl_opt_GetArg(go, 1);
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  int             i;
 
  /* Read one HMM from <hmmfile> */
  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");
  p7_hmmfile_Close(hfp);

  bg = p7_bg_Create(abc);

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    p7_bg_SetFilterByHMM(bg, hmm);
  esl_stopwatch_Stop(w);
  esl_stopwatch_Display(stdout, w, "# CPU time: ");

  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_stopwatch_Destroy(w);
  esl_getopts_Destroy(go);
  return 0;
}
예제 #5
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go    = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *r    = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc  = NULL;
  P7_HMM         *hmm  = NULL;
  P7_PROFILE     *gm   = NULL;
  P7_BG          *bg   = NULL;
  int             M    = 100;
  int             L    = 200;
  int             nseq = 20;
  char            errbuf[eslERRBUFSIZE];

  if ((abc = esl_alphabet_Create(eslAMINO))         == NULL)  esl_fatal("failed to create alphabet");
  if (p7_hmm_Sample(r, M, abc, &hmm)                != eslOK) esl_fatal("failed to sample an HMM");
  if ((bg = p7_bg_Create(abc))                      == NULL)  esl_fatal("failed to create null model");
  if ((gm = p7_profile_Create(hmm->M, abc))         == NULL)  esl_fatal("failed to create profile");
  if (p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL)    != eslOK) esl_fatal("failed to config profile");
  if (p7_hmm_Validate    (hmm, errbuf, 0.0001)      != eslOK) esl_fatal("whoops, HMM is bad!: %s", errbuf);
  if (p7_profile_Validate(gm,  errbuf, 0.0001)      != eslOK) esl_fatal("whoops, profile is bad!: %s", errbuf);

  utest_basic  (go);
  utest_viterbi(go, r, abc, bg, gm, nseq, L);

  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return 0;
}
예제 #6
0
int 
main(int argc, char **argv)
{
  char           *msg  = "p7_gmx unit test driver failed";
  ESL_GETOPTS    *go   = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *r    = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc  = esl_alphabet_Create(eslAMINO);
  P7_BG          *bg   = p7_bg_Create(abc);
  P7_HMM         *hmm  = NULL;
  P7_PROFILE     *gm   = NULL;
  int             M    = esl_opt_GetInteger(go, "-M");
  int             L    = esl_opt_GetInteger(go, "-L");
  float           tol  = esl_opt_GetReal   (go, "-t");

  p7_FLogsumInit();

  if (p7_hmm_Sample(r, M, abc, &hmm)                != eslOK) esl_fatal(msg);
  if ((gm = p7_profile_Create(hmm->M, abc))         == NULL)  esl_fatal(msg);
  if (p7_bg_SetLength(bg, L)                        != eslOK) esl_fatal(msg);
  if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL) != eslOK) esl_fatal(msg);

  utest_GrowTo();
  utest_Compare(r, gm, bg, L, tol);

  esl_getopts_Destroy(go);
  esl_randomness_Destroy(r);
  esl_alphabet_Destroy(abc);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_profile_Destroy(gm);
  return eslOK;
}
예제 #7
0
int
main(int argc, char **argv)
{
    ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
    char           *hmmfile = esl_opt_GetArg(go, 1);
    ESL_STOPWATCH  *w       = esl_stopwatch_Create();
    ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
    ESL_ALPHABET   *abc     = NULL;
    P7_HMMFILE     *hfp     = NULL;
    P7_HMM         *hmm     = NULL;
    P7_BG          *bg      = NULL;
    P7_PROFILE     *gm      = NULL;
    P7_GMX         *gx1     = NULL;
    P7_GMX         *gx2     = NULL;
    int             L       = esl_opt_GetInteger(go, "-L");
    int             N       = esl_opt_GetInteger(go, "-N");
    ESL_DSQ        *dsq     = malloc(sizeof(ESL_DSQ) * (L+2));
    float           null2[p7_MAXCODE];
    int             i;
    float           fsc, bsc;
    double          Mcs;

    if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
    if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");

    bg = p7_bg_Create(abc);
    p7_bg_SetLength(bg, L);
    gm = p7_profile_Create(hmm->M, abc);
    p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
    gx1 = p7_gmx_Create(gm->M, L);
    gx2 = p7_gmx_Create(gm->M, L);

    esl_rsq_xfIID(r, bg->f, abc->K, L, dsq);
    p7_GForward (dsq, L, gm, gx1, &fsc);
    p7_GBackward(dsq, L, gm, gx2, &bsc);
    p7_GDecoding(gm, gx1, gx2, gx2);

    esl_stopwatch_Start(w);
    for (i = 0; i < N; i++)
        p7_GNull2_ByExpectation(gm, gx2, null2);
    esl_stopwatch_Stop(w);

    Mcs  = (double) N * (double) L * (double) gm->M * 1e-6 / w->user;
    esl_stopwatch_Display(stdout, w, "# CPU time: ");
    printf("# M    = %d\n", gm->M);
    printf("# %.1f Mc/s\n", Mcs);

    free(dsq);
    p7_gmx_Destroy(gx1);
    p7_gmx_Destroy(gx2);
    p7_profile_Destroy(gm);
    p7_bg_Destroy(bg);
    p7_hmm_Destroy(hmm);
    p7_hmmfile_Close(hfp);
    esl_alphabet_Destroy(abc);
    esl_stopwatch_Destroy(w);
    esl_randomness_Destroy(r);
    esl_getopts_Destroy(go);
    return 0;
}
예제 #8
0
파일: build.c 프로젝트: EddyRivasLab/hmmer
/* utest_basic()
 * An MSA to ex{e,o}rcise past demons.
 *   1. seq2 gives an I->end transition.
 *   2. seq1 contains degenerate Z,X, exercising symbol counting
 *      of degenerate residues.
 */
static void
utest_basic(void)
{
  char         *failmsg      = "failure in build.c::utest_basic() unit test";
  char          msafile[16]  = "p7tmpXXXXXX"; /* tmpfile name template */
  FILE         *ofp          = NULL;
  ESL_ALPHABET *abc          = esl_alphabet_Create(eslAMINO);
  ESL_MSAFILE  *afp          = NULL;
  ESL_MSA      *msa          = NULL;
  P7_HMM       *hmm          = NULL;
  float         symfrac      = 0.5;

  if (esl_tmpfile_named(msafile, &ofp) != eslOK) esl_fatal(failmsg);
  fprintf(ofp, "# STOCKHOLM 1.0\n");
  fprintf(ofp, "#=GC RF --xxxxxxxxxxxxxxxx-xxx-x--\n");
  fprintf(ofp, "seq1    --ACDEFGHIKLMNPZXS-TVW-Yyy\n");
  fprintf(ofp, "seq2    aaACDEFGHIKLMNPQRS-TVWw---\n");
  fprintf(ofp, "seq3    aaAC-EFGHIKLMNPQRS-TVW-Y--\n");
  fprintf(ofp, "seq4    aaAC-EFGHIKLMNPQRS-TVW-Y--\n");
  fprintf(ofp, "//\n");
  fclose(ofp);

  if (esl_msafile_Open(&abc, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp) != eslOK) esl_fatal(failmsg);
  if (esl_msafile_Read(afp, &msa)                                           != eslOK) esl_fatal(failmsg);
  if (p7_Fastmodelmaker(msa, symfrac, NULL, &hmm, NULL)                     != eslOK) esl_fatal(failmsg);
  
  p7_hmm_Destroy(hmm);
  esl_msa_Destroy(msa);
  esl_msafile_Close(afp);
  esl_alphabet_Destroy(abc);
  remove(msafile);
  return;
}
예제 #9
0
static void 
utest_normalization(ESL_GETOPTS *go)
{
  char         *msg     = "seqmodel normalization utest failed";
  ESL_ALPHABET *abc     = esl_alphabet_Create(eslAMINO);
  char         *seq     = "ACDEFGHIKLMNPQRSTVWYBJZOUX";
  int           L       = strlen(seq);
  ESL_DSQ      *dsq     = NULL;
  float         popen   = 0.1;
  float         pextend = 0.4;
  P7_BUILDER   *bld     = NULL;
  P7_BG        *bg      = p7_bg_Create(abc);
  P7_HMM       *hmm     = NULL;
  char          errbuf[eslERRBUFSIZE];

  if ( esl_abc_CreateDsq(abc, seq, &dsq)                                                 != eslOK) esl_fatal(msg);
  if ( (bld = p7_builder_Create(NULL, abc))                                              == NULL)  esl_fatal(msg);
  if ( p7_builder_LoadScoreSystem(bld, "BLOSUM62", popen, pextend, bg)                   != eslOK) esl_fatal(msg); 
  if ( p7_Seqmodel(abc, dsq, L, "aatest", bld->Q, bg->f, bld->popen, bld->pextend, &hmm) != eslOK) esl_fatal(msg);

  if (p7_hmm_Validate(hmm, errbuf, 0.0001) != eslOK) esl_fatal("normalization utest failed\n%s\n", errbuf);

  free(dsq);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_builder_Destroy(bld);
  esl_alphabet_Destroy(abc);
}
예제 #10
0
static void
utest_ReadWrite(ESL_RANDOMNESS *rng)
{
  char          msg[]       = "bg Read/Write unit test failed";
  char          tmpfile[32] = "esltmpXXXXXX";
  FILE         *fp          = NULL;
  ESL_ALPHABET *abc         = NULL;   /* random alphabet choice eslRNA..eslDICE */
  float        *fq          = NULL;
  P7_BG        *bg          = NULL; 

  if ((abc = esl_alphabet_Create(esl_rnd_Roll(rng, 5) + 1)) == NULL)  esl_fatal(msg);
  if (( bg = p7_bg_Create(abc))                             == NULL)  esl_fatal(msg);
  if (( fq = malloc(sizeof(float) * abc->K))                == NULL)  esl_fatal(msg);                 
  do {
    if (esl_dirichlet_FSampleUniform(rng, abc->K, fq)      != eslOK) esl_fatal(msg);
  } while (esl_vec_FMin(fq, abc->K) < 0.001); /* small p's will get rounded off and fail FCompare() */
  esl_vec_FCopy(fq, abc->K, bg->f);

  if (esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg);
  if ( p7_bg_Write(fp, bg)            != eslOK) esl_fatal(msg);
  fclose(fp);

  esl_vec_FSet(bg->f, bg->abc->K, 0.0);
  if ( p7_bg_Read(tmpfile, bg, NULL)                 != eslOK) esl_fatal(msg);
  if ( esl_vec_FCompare(fq, bg->f, bg->abc->K, 0.01) != eslOK) esl_fatal(msg);

  p7_bg_Destroy(bg);
  esl_alphabet_Destroy(abc);
  free(fq);
  remove(tmpfile);
}
예제 #11
0
파일: io.c 프로젝트: TuftsBCB/SMURFBuild
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go   = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *r    = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc  = NULL;
  P7_BG          *bg   = NULL;
  P7_HMM         *hmm  = NULL;
  P7_OPROFILE    *om   = NULL;
  int             M    = esl_opt_GetInteger(go, "-M");
  int             L    = esl_opt_GetInteger(go, "-L");
  
  /* Sample a random HMM and optimized profile, in amino acid alphabet.  */
  if ((abc = esl_alphabet_Create(eslAMINO))                    == NULL)  esl_fatal("failed to create alphabet");
  if ((bg = p7_bg_Create(abc))                                 == NULL)  esl_fatal("failed to create null model");
  if (( p7_oprofile_Sample(r, abc, bg, M, L, &hmm, NULL, &om)) != eslOK) esl_fatal("failed to sample HMM and profile");

  /* unit test(s) */
  utest_ReadWrite(hmm, om);

  p7_oprofile_Destroy(om);
  p7_hmm_Destroy(hmm);
  p7_bg_Destroy(bg);
  esl_alphabet_Destroy(abc);
  esl_randomness_Destroy(r);
  esl_getopts_Destroy(go);
  return eslOK;
}
예제 #12
0
/* Create an SSI index file for open HMM file <hfp>.
 * Both name and accession of HMMs are stored as keys.
 */
static void
create_ssi_index(ESL_GETOPTS *go, P7_HMMFILE *hfp)
{
  ESL_NEWSSI   *ns      = NULL;
  ESL_ALPHABET *abc     = NULL;
  P7_HMM       *hmm     = NULL;
  int           nhmm    = 0;
  char         *ssifile = NULL;
  uint16_t      fh;
  int           status;

  if (esl_sprintf(&ssifile, "%s.ssi", hfp->fname) != eslOK) p7_Die("esl_sprintf() failed");

  status = esl_newssi_Open(ssifile, FALSE, &ns);
  if      (status == eslENOTFOUND)   esl_fatal("failed to open SSI index %s", ssifile);
  else if (status == eslEOVERWRITE)  esl_fatal("SSI index %s already exists; delete or rename it", ssifile);
  else if (status != eslOK)          esl_fatal("failed to create a new SSI index");

  if (esl_newssi_AddFile(ns, hfp->fname, 0, &fh) != eslOK) /* 0 = format code (HMMs don't have any yet) */
    esl_fatal("Failed to add HMM file %s to new SSI index\n", hfp->fname);

  printf("Working...    "); 
  fflush(stdout);
  
  while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF)
    {
      if      (status == eslEOD)       p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname);
      else if (status == eslEFORMAT)   p7_Fail("bad file format in HMM file %s",             hfp->fname);
      else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets",   hfp->fname);
      else if (status != eslOK)        p7_Fail("Unexpected error in reading HMMs from %s",   hfp->fname);
      nhmm++;

      if (hmm->name == NULL)           p7_Fail("Every HMM must have a name to be indexed. Failed to find name of HMM #%d\n", nhmm);

      if (esl_newssi_AddKey(ns, hmm->name, fh, hmm->offset, 0, 0) != eslOK)
	p7_Fail("Failed to add key %s to SSI index", hmm->name);

      if (hmm->acc) {
	if (esl_newssi_AddAlias(ns, hmm->acc, hmm->name) != eslOK)
	  p7_Fail("Failed to add secondary key %s to SSI index", hmm->acc);
      }
      p7_hmm_Destroy(hmm);
    }
  
  if (esl_newssi_Write(ns) != eslOK) 
    p7_Fail("Failed to write keys to ssi file %s\n", ssifile);

  printf("done.\n");
  if (ns->nsecondary > 0) 
    printf("Indexed %d HMMs (%ld names and %ld accessions).\n", nhmm, (long) ns->nprimary, (long) ns->nsecondary);
  else 
    printf("Indexed %d HMMs (%ld names).\n", nhmm, (long) ns->nprimary);
  printf("SSI index written to file %s\n", ssifile);

  free(ssifile);
  esl_alphabet_Destroy(abc);
  esl_newssi_Close(ns);
  return;
}  
예제 #13
0
static void
utest_goodfile(char *filename, int testnumber, int expected_alphatype, int expected_nseq, int expected_alen)
{
  ESL_ALPHABET        *abc          = NULL;
  ESLX_MSAFILE        *afp          = NULL;
  ESL_MSA             *msa1         = NULL;
  ESL_MSA             *msa2         = NULL;
  char                 tmpfile1[32] = "esltmpXXXXXX";
  char                 tmpfile2[32] = "esltmpXXXXXX";
  FILE                *ofp          = NULL;
  int                  status;

  /* guessing both the format and the alphabet should work: this is a digital open */
  /* PSIBLAST format is autodetected as SELEX, which is fine - selex parser is more general */
  if ( (status = eslx_msafile_Open(&abc, filename, NULL, eslMSAFILE_UNKNOWN, NULL, &afp)) != eslOK) esl_fatal("psiblast good file test %d failed: digital open",           testnumber);  
  if (afp->format != eslMSAFILE_SELEX)                                                              esl_fatal("psiblast good file test %d failed: format autodetection",   testnumber); 
  if (abc->type   != expected_alphatype)                                                            esl_fatal("psiblast good file test %d failed: alphabet autodetection", testnumber);
  afp->format = eslMSAFILE_PSIBLAST;

  /* This is a digital read, using <abc>. */
  if ( (status = esl_msafile_psiblast_Read(afp, &msa1))   != eslOK) esl_fatal("psiblast good file test %d failed: msa read, digital", testnumber);  
  if (msa1->nseq != expected_nseq || msa1->alen != expected_alen)   esl_fatal("psiblast good file test %d failed: nseq/alen",         testnumber);
  if (esl_msa_Validate(msa1, NULL) != eslOK)                        esl_fatal("psiblast good file test %d failed: msa1 invalid",      testnumber);
  eslx_msafile_Close(afp);  

  /* write it back out to a new tmpfile (digital write) */
  if ( (status = esl_tmpfile_named(tmpfile1, &ofp))      != eslOK) esl_fatal("psiblast good file test %d failed: tmpfile creation",   testnumber);
  if ( (status = esl_msafile_psiblast_Write(ofp, msa1))  != eslOK) esl_fatal("psiblast good file test %d failed: msa write, digital", testnumber);
  fclose(ofp);

  /* now open and read it as text mode, in known format. (We have to pass fmtd now, to deal with the possibility of a nonstandard name width) */
  if ( (status = eslx_msafile_Open(NULL, tmpfile1, NULL, eslMSAFILE_PSIBLAST, NULL, &afp)) != eslOK) esl_fatal("psiblast good file test %d failed: text mode open", testnumber);  
  if ( (status = esl_msafile_psiblast_Read(afp, &msa2))                                    != eslOK) esl_fatal("psiblast good file test %d failed: msa read, text", testnumber);  
  if (msa2->nseq != expected_nseq || msa2->alen != expected_alen)                                    esl_fatal("psiblast good file test %d failed: nseq/alen",      testnumber);
  if (esl_msa_Validate(msa2, NULL) != eslOK)                                                         esl_fatal("psiblast good file test %d failed: msa2 invalid",   testnumber);
  eslx_msafile_Close(afp);
  
  /* write it back out to a new tmpfile (text write) */
  if ( (status = esl_tmpfile_named(tmpfile2, &ofp))      != eslOK) esl_fatal("psiblast good file test %d failed: tmpfile creation", testnumber);
  if ( (status = esl_msafile_psiblast_Write(ofp, msa2))  != eslOK) esl_fatal("psiblast good file test %d failed: msa write, text",  testnumber);
  fclose(ofp);
  esl_msa_Destroy(msa2);

  /* open and read it in digital mode */
  if ( (status = eslx_msafile_Open(&abc, tmpfile1, NULL, eslMSAFILE_PSIBLAST, NULL, &afp)) != eslOK) esl_fatal("psiblast good file test %d failed: 2nd digital mode open", testnumber);  
  if ( (status = esl_msafile_psiblast_Read(afp, &msa2))                                    != eslOK) esl_fatal("psiblast good file test %d failed: 2nd digital msa read",  testnumber);  
  if (esl_msa_Validate(msa2, NULL) != eslOK)                                                         esl_fatal("psiblast good file test %d failed: msa2 invalid",          testnumber);
  eslx_msafile_Close(afp);

  /* this msa <msa2> should be identical to <msa1> */
  if (esl_msa_Compare(msa1, msa2) != eslOK) esl_fatal("psiblast good file test %d failed: msa compare", testnumber);  

  remove(tmpfile1);
  remove(tmpfile2);
  esl_msa_Destroy(msa1);
  esl_msa_Destroy(msa2);
  esl_alphabet_Destroy(abc);
}
예제 #14
0
파일: emit.c 프로젝트: ElofssonLab/TOPCONS2
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  ESL_RANDOMNESS *rng     = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  char           *hmmfile = esl_opt_GetArg(go, 1);
  int             L       = esl_opt_GetInteger(go, "-L");
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_TRACE       *tr      = p7_trace_Create();
  ESL_SQ         *sq      = NULL;
  char            errbuf[eslERRBUFSIZE];
  int             i;
  int             status;

  status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf);
  if      (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf);
  else if (status == eslEFORMAT)   p7_Fail("File format problem in trying to open HMM file %s.\n%s\n",                hmmfile, errbuf);
  else if (status != eslOK)        p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n",                       status, hmmfile, errbuf);  

  status = p7_hmmfile_Read(hfp, &abc, &hmm);
  if      (status == eslEFORMAT)   p7_Fail("Bad file format in HMM file %s:\n%s\n",          hfp->fname, hfp->errbuf);
  else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", hfp->fname, esl_abc_DecodeType(abc->type));
  else if (status == eslEOF)       p7_Fail("Empty HMM file %s? No HMM data found.\n",        hfp->fname);
  else if (status != eslOK)        p7_Fail("Unexpected error in reading HMMs from %s\n",     hfp->fname);

  p7_hmmfile_Close(hfp);

  bg = p7_bg_Create(abc);                p7_bg_SetLength(bg, L);
  gm = p7_profile_Create(hmm->M, abc);   p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  sq = esl_sq_CreateDigital(abc);

  for (i = 0; i < N; i++)
    {
      p7_ProfileEmit(rng, hmm, gm, bg, sq, tr);
      esl_sq_FormatName(sq, "%s-sample%d", hmm->name, i);
      esl_sqio_Write(stdout, sq, eslSQFILE_FASTA, FALSE);

      if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) esl_fatal(errbuf);

      esl_sq_Reuse(sq);
      p7_trace_Reuse(tr);
    }      

  esl_sq_Destroy(sq);
  p7_trace_Destroy(tr);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_randomness_Destroy(rng);
  esl_getopts_Destroy(go);
  return 0;
}
예제 #15
0
static void
utest_SendRecv(ESL_RANDOMNESS *rng, int my_rank, int nproc)
{
  char            msg[] = "utest_SendRecv() failed";
  ESL_ALPHABET   *abc   = esl_alphabet_Create(eslAMINO);
  P7_HMM         *hmm   = NULL;
  P7_HMM         *xhmm  = NULL;
  int             M     = 200;
  char           *wbuf  = NULL;
  int             wn    = 0;
  int             i;
  uint32_t        rngseed;
  MPI_Status      mpistatus;
  char            errmsg[eslERRBUFSIZE];

  if (my_rank == 0) 
    {
      /* First we send our RNG seed to all workers */
      rngseed = esl_randomness_GetSeed(rng);
      for (i = 1; i < nproc; i++)
	if (MPI_Send( &rngseed, 1, MPI_UNSIGNED, i, 0, MPI_COMM_WORLD) != MPI_SUCCESS) esl_fatal(msg);

      /* We sample an HMM that's going to be identical to the workers' */
      if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg);

      for (i = 1; i < nproc; i++)
	{
	  if (p7_hmm_mpi_Recv(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &wbuf, &wn, &abc, &xhmm) != eslOK) esl_fatal(msg);

	  if (p7_hmm_Validate(xhmm, errmsg, 0.001) != eslOK) esl_fatal("%s:\n   %s", msg, errmsg);
	  if (p7_hmm_Compare(hmm, xhmm, 0.001)     != eslOK) esl_fatal(msg);

	  p7_hmm_Destroy(xhmm);
	}
    }
  else 
    {
      /* Worker(s) must first receive the exact same RNG seed that the master is using. */
      if (MPI_Recv(&rngseed, 1, MPI_UNSIGNED, 0, 0, MPI_COMM_WORLD, &mpistatus) != MPI_SUCCESS) esl_fatal(msg);

      /* and then the worker(s) can create the exact same RNG (and random number sequence) that the master has */
      rng = esl_randomness_CreateFast(rngseed);

      /* so when the worker samples this HMM, the master has independently sampled an exact duplicate of it... */
      if (p7_modelsample(rng, M, abc, &hmm) != eslOK) esl_fatal(msg);

      /* each worker sends the HMM to the master (it's the same HMM for each worker. The test is intended for one master, one worker.) */
      if (p7_hmm_mpi_Send(hmm, 0, 0, MPI_COMM_WORLD, &wbuf, &wn) != eslOK) esl_fatal(msg);

      /* worker's RNG is a private copy; destroy it. Master keeps its RNG, which the caller is responsible for. */
      esl_randomness_Destroy(rng);
     }

  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  free(wbuf);
  return;
}
예제 #16
0
/* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence,
 * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging.
 */
static void
utest_basic(ESL_GETOPTS *go)
{
  char           *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n";
  int             fmt  = eslMSAFILE_STOCKHOLM;
  char           *targ = "GAATTC";
  ESL_ALPHABET   *abc  = NULL;
  ESL_MSA        *msa  = NULL;
  P7_HMM         *hmm  = NULL;
  P7_PROFILE     *gm   = NULL;
  P7_BG          *bg   = NULL;
  P7_PRIOR       *pri  = NULL;	
  ESL_DSQ        *dsq  = NULL;
  P7_GMX         *gx   = NULL;
  P7_TRACE        *tr  = NULL;
  int             L    = strlen(targ);
  float           vsc, vsc2, fsc;

  if ((abc = esl_alphabet_Create(eslDNA))          == NULL)  esl_fatal("failed to create alphabet");
  if ((pri = p7_prior_CreateNucleic())             == NULL)  esl_fatal("failed to create prior");
  if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL)  esl_fatal("failed to create MSA");
  if (esl_msa_Digitize(abc, msa, NULL)             != eslOK) esl_fatal("failed to digitize MSA");
  if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model");
  if (p7_ParameterEstimation(hmm, pri)             != eslOK) esl_fatal("failed to parameterize GAATTC model");
  if (p7_hmm_SetConsensus(hmm, NULL)               != eslOK) esl_fatal("failed to make consensus");
  if ((bg = p7_bg_Create(abc))                     == NULL)  esl_fatal("failed to create DNA null model");
  if ((gm = p7_profile_Create(hmm->M, abc))        == NULL)  esl_fatal("failed to create GAATTC profile");
  if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile");
  if (p7_profile_Validate(gm, NULL, 0.0001)        != eslOK) esl_fatal("whoops, profile is bad!");
  if (esl_abc_CreateDsq(abc, targ, &dsq)           != eslOK) esl_fatal("failed to create GAATTC digital sequence");
  if ((gx = p7_gmx_Create(gm->M, L))               == NULL)  esl_fatal("failed to create DP matrix");
  if ((tr = p7_trace_Create())                     == NULL)  esl_fatal("trace creation failed");

  p7_GViterbi   (dsq, L, gm, gx, &vsc);
  if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc);
  if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);

  p7_GTrace     (dsq, L, gm, gx, tr);
  p7_trace_Score(tr, dsq, gm, &vsc2);
  if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq);
  
  if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK)  esl_fatal("trace score and Viterbi score don't agree.");

  p7_GForward   (dsq, L, gm, gx, &fsc);
  if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc);
  if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);

  p7_trace_Destroy(tr);
  p7_gmx_Destroy(gx);
  free(dsq);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_msa_Destroy(msa);
  p7_prior_Destroy(pri);
  esl_alphabet_Destroy(abc);
  return;
}
예제 #17
0
static void
utest_goodfile(char *filename, int testnumber, int expected_alphatype, int expected_nseq, int expected_alen)
{
  ESL_ALPHABET        *abc          = NULL;
  ESL_MSAFILE         *afp          = NULL;
  ESL_MSA             *msa1         = NULL;
  ESL_MSA             *msa2         = NULL;
  char                 tmpfile1[32] = "esltmpXXXXXX";   
  char                 tmpfile2[32] = "esltmpXXXXXX";
  FILE                *ofp          = NULL;
  int                  status;

  /* A2M must be specified (no format guessing, unless we use .a2m suffix) but guessing the alphabet should work: this is a digital open */
  if ( (status = esl_msafile_Open(&abc, filename, NULL, eslMSAFILE_A2M, NULL, &afp)) != eslOK) esl_fatal("a2m good file test %d failed: digital open",           testnumber);  
  if (afp->format != eslMSAFILE_A2M)                                                           esl_fatal("a2m good file test %d failed: bad format",             testnumber);
  if (abc->type   != expected_alphatype)                                                       esl_fatal("a2m good file test %d failed: alphabet autodetection", testnumber);

  /* This is a digital read, using <abc>. */
  if ( (status = esl_msafile_a2m_Read(afp, &msa1))   != eslOK)     esl_fatal("a2m good file test %d failed: msa read, digital", testnumber);  
  if (msa1->nseq != expected_nseq || msa1->alen != expected_alen)  esl_fatal("a2m good file test %d failed: nseq/alen",         testnumber);
  if (esl_msa_Validate(msa1, NULL) != eslOK)                       esl_fatal("a2m good file test %d failed: msa invalid",       testnumber);
  esl_msafile_Close(afp);  

  /* write it back out to a new tmpfile (digital write) */
  if ( (status = esl_tmpfile_named(tmpfile1, &ofp)) != eslOK) esl_fatal("a2m good file test %d failed: tmpfile creation",   testnumber);
  if ( (status = esl_msafile_a2m_Write(ofp, msa1))  != eslOK) esl_fatal("a2m good file test %d failed: msa write, digital", testnumber);
  fclose(ofp);

  /* now open and read it as text mode, in known format. */
  if ( (status = esl_msafile_Open(NULL, tmpfile1, NULL, eslMSAFILE_A2M, NULL, &afp)) != eslOK) esl_fatal("a2m good file test %d failed: text mode open", testnumber);  
  if ( (status = esl_msafile_a2m_Read(afp, &msa2))                                   != eslOK) esl_fatal("a2m good file test %d failed: msa read, text", testnumber);  
  if (msa2->nseq != expected_nseq || msa2->alen != expected_alen)                              esl_fatal("a2m good file test %d failed: nseq/alen",      testnumber);
  if (esl_msa_Validate(msa2, NULL) != eslOK)                                                   esl_fatal("a2m good file test %d failed: msa invalid",       testnumber);
  esl_msafile_Close(afp);
  
  /* write it back out to a new tmpfile (text write) */
  if ( (status = esl_tmpfile_named(tmpfile2, &ofp)) != eslOK) esl_fatal("a2m good file test %d failed: tmpfile creation", testnumber);
  if ( (status = esl_msafile_a2m_Write(ofp, msa2))  != eslOK) esl_fatal("a2m good file test %d failed: msa write, text",  testnumber);
  fclose(ofp);
  esl_msa_Destroy(msa2);

  /* open and read it in digital mode */
  if ( (status = esl_msafile_Open(&abc, tmpfile1, NULL, eslMSAFILE_A2M, NULL, &afp)) != eslOK) esl_fatal("a2m good file test %d failed: 2nd digital mode open", testnumber);  
  if ( (status = esl_msafile_a2m_Read(afp, &msa2))                                   != eslOK) esl_fatal("a2m good file test %d failed: 2nd digital msa read",  testnumber);  
  if (esl_msa_Validate(msa2, NULL) != eslOK)                                                   esl_fatal("a2m good file test %d failed: msa invalid",           testnumber);
  esl_msafile_Close(afp);

  /* this msa <msa2> should be identical to <msa1> */
  if (esl_msa_Compare(msa1, msa2) != eslOK) esl_fatal("a2m good file test %d failed: msa compare", testnumber);  

  remove(tmpfile1);
  remove(tmpfile2);
  esl_msa_Destroy(msa1);
  esl_msa_Destroy(msa2);
  esl_alphabet_Destroy(abc);
}
예제 #18
0
static void
utest_oprofileSendRecv(int my_rank, int nproc)
{
  ESL_RANDOMNESS *r    = esl_randomness_CreateFast(42);
  ESL_ALPHABET   *abc  = esl_alphabet_Create(eslAMINO);
  P7_HMM         *hmm  = NULL;
  P7_BG          *bg   = NULL;
  P7_PROFILE     *gm   = NULL;
  P7_OPROFILE    *om   = NULL;
  P7_OPROFILE    *om2  = NULL;
  int             M    = 200;
  int             L    = 400;
  char           *wbuf = NULL;
  int             wn   = 0;
  int             i;
  char            errbuf[eslERRBUFSIZE];

  p7_hmm_Sample(r, M, abc, &hmm); /* master and worker's sampled profiles are identical */
  bg = p7_bg_Create(abc);
  gm = p7_profile_Create(hmm->M, abc);
  om = p7_oprofile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL);
  p7_oprofile_Convert(gm, om);
  p7_bg_SetLength  (bg, L);

  if (my_rank == 0)
    {
      for (i = 1; i < nproc; i++)
	{
	  ESL_DPRINTF1(("Master: receiving test profile\n"));
	  p7_oprofile_MPIRecv(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &wbuf, &wn, &abc, &om2);
	  ESL_DPRINTF1(("Master: test profile received\n"));

	  if (p7_oprofile_Compare(om, om2, 0.001, errbuf) != eslOK) 
	    p7_Die("Received profile not identical to what was sent\n%s", errbuf);

	  p7_oprofile_Destroy(om2);
	}
    }
  else 
    {
      ESL_DPRINTF1(("Worker %d: sending test profile\n", my_rank));
      p7_oprofile_MPISend(om, 0, 0, MPI_COMM_WORLD, &wbuf, &wn);
      ESL_DPRINTF1(("Worker %d: test profile sent\n", my_rank));
    }

  free(wbuf);
  p7_profile_Destroy(gm);
  p7_oprofile_Destroy(om);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_randomness_Destroy(r);
  return;
}
예제 #19
0
파일: emit.c 프로젝트: ElofssonLab/TOPCONS2
int
main(int argc, char **argv)
{
  char            *hmmfile = argv[1];  /* name of HMM file to read one HMM from   */
  ESL_ALPHABET    *abc     = NULL;     /* sequence alphabet                       */
  ESL_RANDOMNESS  *r       = NULL;     /* source of randomness                    */
  P7_HMMFILE      *hfp     = NULL;     /* open hmmfile                            */
  P7_HMM          *hmm     = NULL;     /* HMM to emit from                        */
  P7_PROFILE      *gm      = NULL;     /* profile HMM (scores)                    */
  P7_BG           *bg      = NULL;     /* null model                              */
  P7_TRACE        *tr      = NULL;     /* sampled trace                           */
  ESL_SQ          *sq      = NULL;     /* sampled digital sequence                */
  int              n       = 1000;
  int              counts[p7T_NSTATETYPES];
  int              i;
  float            sc;
  float            nullsc;
  double           bitscore;

  r  = esl_randomness_CreateFast(0);
  tr = p7_trace_Create();
  if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("failed to open %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("failed to read HMM");
  sq = esl_sq_CreateDigital(abc);
  bg = p7_bg_Create(abc);
  gm = p7_profile_Create(hmm->M, abc);

  p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL);

  for (i = 0; i < n; i++) 
    {
      p7_ProfileEmit(r, hmm, gm, bg, sq, tr);
      p7_trace_GetStateUseCounts(tr, counts);

      p7_ReconfigLength(gm, sq->n);
      p7_bg_SetLength(bg, sq->n);
      p7_trace_Score(tr, sq->dsq, gm, &sc);
      p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc);
      bitscore = (sc - nullsc)/ eslCONST_LOG2;

      printf("%d  %8.4f\n",
	     counts[p7T_M] + (counts[p7T_I] + counts[p7T_D])/2,
	     bitscore);
    }

  p7_profile_Destroy(gm);
  esl_sq_Destroy(sq);
  p7_trace_Destroy(tr);
  esl_randomness_Destroy(r);
  esl_alphabet_Destroy(abc);
  p7_hmmfile_Close(hfp);
  p7_hmm_Destroy(hmm);
  return eslOK;
} 
예제 #20
0
static void
utest_alphabet_config(int alphatype)
{
  char         *msg = "HMMER alphabet config unit test failed";
  ESL_ALPHABET *abc = NULL;

  if ((abc = esl_alphabet_Create(alphatype)) == NULL) esl_fatal(msg);
  if (abc->K  > p7_MAXABET)                           esl_fatal(msg);
  if (abc->Kp > p7_MAXCODE)                           esl_fatal(msg);
  esl_alphabet_Destroy(abc);
}
예제 #21
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS  *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char         *msafile = esl_opt_GetArg(go, 1);
  ESL_ALPHABET *abc     = NULL;
  int           infmt   = eslMSAFILE_UNKNOWN;
  ESLX_MSAFILE *afp     = NULL;
  ESL_MSA      *msa     = NULL;
  FILE         *ofp     = stdout;
  int           nali    = 0;
  int           namewidth;
  double        pid;
  int           nid, n;
  int           i,j;
  int           status;

  /* allow user to assert the input MSA alphabet */
  if      (esl_opt_GetBoolean(go, "--rna"))   abc = esl_alphabet_Create(eslRNA);
  else if (esl_opt_GetBoolean(go, "--dna"))   abc = esl_alphabet_Create(eslDNA);
  else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); 

  /* allow user to assert the input MSA format */
  if (esl_opt_IsOn(go, "--informat") &&
      (infmt = eslx_msafile_EncodeFormat(esl_opt_GetString(go, "--informat"))) == eslMSAFILE_UNKNOWN)
    esl_fatal("%s is not a valid MSA file format for --informat", esl_opt_GetString(go, "--informat"));

  /* digital open */
  if ( ( status = eslx_msafile_Open(&abc, msafile, NULL, infmt, NULL, &afp)) != eslOK)
    eslx_msafile_OpenFailure(afp, status);

  while ((status = eslx_msafile_Read(afp, &msa)) == eslOK)
    {	
      nali++;

      namewidth = esl_str_GetMaxWidth(msa->sqname, msa->nseq);

      for (i = 0; i < msa->nseq; i++)
	for (j = i+1; j < msa->nseq; j++)
	  {
	    esl_dst_XPairId(abc, msa->ax[i], msa->ax[j], &pid, &nid, &n);
	    fprintf(ofp, "%-*s %-*s %6.2f %6d %6d\n", namewidth, msa->sqname[i], namewidth, msa->sqname[j], pid*100.0, nid, n);
	  }

      esl_msa_Destroy(msa);
    }
  if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status); 

  eslx_msafile_Close(afp);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
예제 #22
0
/* multifetch:
 * given a file containing lines with one name or key per line;
 * parse the file line-by-line;
 * if we have an SSI index available, retrieve the HMMs by key
 * as we see each line;
 * else, without an SSI index, store the keys in a hash, then
 * read the entire HMM file in a single pass, outputting HMMs
 * that are in our keylist. 
 * 
 * Note that with an SSI index, you get the HMMs in the order they
 * appear in the <keyfile>, but without an SSI index, you get HMMs in
 * the order they occur in the HMM file.
 */
static void
multifetch(ESL_GETOPTS *go, FILE *ofp, char *keyfile, P7_HMMFILE *hfp)
{
  ESL_KEYHASH    *keys   = esl_keyhash_Create();
  ESL_FILEPARSER *efp    = NULL;
  ESL_ALPHABET   *abc    = NULL;
  P7_HMM         *hmm    = NULL;
  int             nhmm   = 0;
  char           *key;
  int             keylen;
  int             keyidx;
  int             status;
  
  if (esl_fileparser_Open(keyfile, NULL, &efp) != eslOK)  p7_Fail("Failed to open key file %s\n", keyfile);
  esl_fileparser_SetCommentChar(efp, '#');

  while (esl_fileparser_NextLine(efp) == eslOK)
    {
      if (esl_fileparser_GetTokenOnLine(efp, &key, &keylen) != eslOK)
	p7_Fail("Failed to read HMM name on line %d of file %s\n", efp->linenumber, keyfile);
      
      status = esl_key_Store(keys, key, &keyidx);
      if (status == eslEDUP) p7_Fail("HMM key %s occurs more than once in file %s\n", key, keyfile);
	
      if (hfp->ssi != NULL) { onefetch(go, ofp, key, hfp);  nhmm++; }
    }

  if (hfp->ssi == NULL) 
    {
      while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF)
	{
	  if      (status == eslEOD)       p7_Fail("read failed, HMM file %s may be truncated?", hfp->fname);
	  else if (status == eslEFORMAT)   p7_Fail("bad file format in HMM file %s",             hfp->fname);
	  else if (status == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets",   hfp->fname);
	  else if (status != eslOK)        p7_Fail("Unexpected error in reading HMMs from %s",   hfp->fname);

	  if (esl_key_Lookup(keys, hmm->name, &keyidx) == eslOK || 
	      ((hmm->acc) && esl_key_Lookup(keys, hmm->acc, &keyidx) == eslOK))
	    {
	      p7_hmmfile_WriteASCII(ofp, -1, hmm);
	      nhmm++;
	    }

	  p7_hmm_Destroy(hmm);
	}
    }
  
  if (ofp != stdout) printf("\nRetrieved %d HMMs.\n", nhmm);
  if (abc != NULL) esl_alphabet_Destroy(abc);
  esl_keyhash_Destroy(keys);
  esl_fileparser_Close(efp);
  return;
}
예제 #23
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go        = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  ESL_RANDOMNESS *rng       = esl_randomness_Create(0);
  char           *msafile   = esl_opt_GetArg(go, 1);
  int             fmt       = eslMSAFILE_UNKNOWN;
  ESL_ALPHABET   *abc       = NULL;
  ESLX_MSAFILE   *afp       = NULL;
  ESL_MSA        *msa       = NULL;
  int             textmode  = esl_opt_GetBoolean(go, "--text");
  int             nali      = 0;
  int             status;

  /* If you know the alphabet you want, create it - you'll pass it to eslx_msafile_Open() */
  if      (esl_opt_GetBoolean(go, "--rna"))   abc = esl_alphabet_Create(eslRNA);
  else if (esl_opt_GetBoolean(go, "--dna"))   abc = esl_alphabet_Create(eslDNA);
  else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); 

  /* Open in text or digital mode.
   *   To let the Open() function autoguess the format, you pass <infmt=eslMSAFILE_UNKNOWN>. 
   *   To let it autoguess the alphabet, you set <abc=NULL> and pass <&abc>.
   *   To open in text mode instead of digital, you pass <NULL> for the alphabet argument.
   * eslx_msafile_OpenFailure() is a convenience, printing various diagnostics of any
   * open failure to <stderr>. You can of course handle your own diagnostics instead.
   */
  if (textmode) status = eslx_msafile_Open(NULL, msafile, NULL, fmt, NULL, &afp);
  else          status = eslx_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp);
  if (status != eslOK)   eslx_msafile_OpenFailure(afp, status);
  
  fmt = afp->format;

  while ((status = eslx_msafile_Read(afp, &msa)) == eslOK)
    {	
      /* if digital MSA: msa->ax[idx=0..nseq-1][acol=1..alen] is the alignment data; 
       * if text MSA:  msa->aseq[idx=0..nseq-1][acol=0..alen-1] */
      nali++;
      
      /* permute it */
      esl_msashuffle_PermuteSequenceOrder(rng, msa);

      eslx_msafile_Write(stdout, msa, fmt);
      esl_msa_Destroy(msa);
    }
  if (nali == 0 || status != eslEOF) eslx_msafile_ReadFailure(afp, status); /* a convenience, like eslx_msafile_OpenFailure() */

  esl_alphabet_Destroy(abc);
  eslx_msafile_Close(afp);
  esl_randomness_Destroy(rng);
  esl_getopts_Destroy(go);
  exit(0);
}
예제 #24
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go   = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *r    = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc  = NULL;
  P7_BG          *bg   = NULL;
  int             M    = esl_opt_GetInteger(go, "-M");
  int             L    = esl_opt_GetInteger(go, "-L");
  int             N    = esl_opt_GetInteger(go, "-N");

  /* First round of tests for DNA alphabets.  */
  if ((abc = esl_alphabet_Create(eslDNA)) == NULL)  esl_fatal("failed to create alphabet");
  if ((bg = p7_bg_Create(abc))            == NULL)  esl_fatal("failed to create null model");

  if (esl_opt_GetBoolean(go, "-v")) printf("ViterbiFilter() tests, DNA\n");
  utest_viterbi_filter(r, abc, bg, M, L, N);   
  utest_viterbi_filter(r, abc, bg, 1, L, 10);  
  utest_viterbi_filter(r, abc, bg, M, 1, 10);  

  esl_alphabet_Destroy(abc);
  p7_bg_Destroy(bg);

  /* Second round of tests for amino alphabets.  */
  if ((abc = esl_alphabet_Create(eslAMINO)) == NULL)  esl_fatal("failed to create alphabet");
  if ((bg = p7_bg_Create(abc))              == NULL)  esl_fatal("failed to create null model");

  if (esl_opt_GetBoolean(go, "-v")) printf("ViterbiFilter() tests, protein\n");
  utest_viterbi_filter(r, abc, bg, M, L, N); 
  utest_viterbi_filter(r, abc, bg, 1, L, 10);
  utest_viterbi_filter(r, abc, bg, M, 1, 10);

  esl_alphabet_Destroy(abc);
  p7_bg_Destroy(bg);

  esl_getopts_Destroy(go);
  esl_randomness_Destroy(r);
  return eslOK;
}
예제 #25
0
void
free_QueueData(QUEUE_DATA *data)
{
  /* free the query data */
  esl_getopts_Destroy(data->opts);

  if (data->abc != NULL) esl_alphabet_Destroy(data->abc);
  if (data->hmm != NULL) p7_hmm_Destroy(data->hmm);
  if (data->seq != NULL) esl_sq_Destroy(data->seq);
  if (data->cmd != NULL) free(data->cmd);
  memset(data, 0, sizeof(*data));
  free(data);
}
예제 #26
0
int
main(int argc, char **argv)
{
  ESL_GETOPTS    *go   = p7_CreateDefaultApp(options, 0, argc, argv, banner, usage);
  ESL_RANDOMNESS *r    = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc  = NULL;
  P7_BG          *bg   = NULL;
  int             M    = esl_opt_GetInteger(go, "-M");
  int             L    = esl_opt_GetInteger(go, "-L");
  int             N    = esl_opt_GetInteger(go, "-N");

  /* first round of tests for DNA alphabets.  */
  if ((abc = esl_alphabet_Create(eslDNA)) == NULL)  esl_fatal("failed to create alphabet");
  if ((bg = p7_bg_Create(abc))            == NULL)  esl_fatal("failed to create null model");

  utest_optacc(go, r, abc, bg, M, L, N);   /* normal sized models */
  utest_optacc(go, r, abc, bg, 1, L, 10);  /* size 1 models       */
  utest_optacc(go, r, abc, bg, M, 1, 10);  /* size 1 sequences    */

  esl_alphabet_Destroy(abc);
  p7_bg_Destroy(bg);

  /* Second round of tests for amino alphabets.  */
  if ((abc = esl_alphabet_Create(eslAMINO)) == NULL)  esl_fatal("failed to create alphabet");
  if ((bg = p7_bg_Create(abc))              == NULL)  esl_fatal("failed to create null model");

  utest_optacc(go, r, abc, bg, M, L, N);   
  utest_optacc(go, r, abc, bg, 1, L, 10);  
  utest_optacc(go, r, abc, bg, M, 1, 10);  

  esl_alphabet_Destroy(abc);
  p7_bg_Destroy(bg);

  esl_getopts_Destroy(go);
  esl_randomness_Destroy(r);
  return eslOK;
}
예제 #27
0
static void
read_test_msas_digital(char *a2mfile, char *stkfile)
{
  char msg[]         = "A2M msa digital read unit test failed";
  ESL_ALPHABET *abc  = NULL;
  ESL_MSAFILE  *afp1 = NULL;
  ESL_MSAFILE  *afp2 = NULL;
  ESL_MSA      *msa1, *msa2, *msa3, *msa4;
  FILE         *a2mfp, *stkfp;
  char          a2mfile2[32] = "esltmpa2m2XXXXXX";
  char          stkfile2[32] = "esltmpstk2XXXXXX";

  if ( esl_msafile_Open(&abc, a2mfile, NULL, eslMSAFILE_A2M,       NULL, &afp1) != eslOK)  esl_fatal(msg);
  if ( !abc || abc->type != eslAMINO)                                                      esl_fatal(msg);
  if ( esl_msafile_Open(&abc, stkfile, NULL, eslMSAFILE_STOCKHOLM, NULL, &afp2) != eslOK)  esl_fatal(msg);
  if ( esl_msafile_a2m_Read      (afp1, &msa1)                                  != eslOK)  esl_fatal(msg);
  if ( esl_msafile_stockholm_Read(afp2, &msa2)                                  != eslOK)  esl_fatal(msg);
  if ( esl_msa_Compare(msa1, msa2)                                              != eslOK)  esl_fatal(msg);

  if ( esl_msafile_a2m_Read      (afp1, &msa3) != eslEOF) esl_fatal(msg);
  if ( esl_msafile_stockholm_Read(afp2, &msa3) != eslEOF) esl_fatal(msg);

  esl_msafile_Close(afp2);
  esl_msafile_Close(afp1);

  /* Now write stk to a2m file, and vice versa; then retest */
  if ( esl_tmpfile_named(a2mfile2, &a2mfp)                       != eslOK) esl_fatal(msg);
  if ( esl_tmpfile_named(stkfile2, &stkfp)                       != eslOK) esl_fatal(msg);
  if ( esl_msafile_a2m_Write      (a2mfp, msa2)                  != eslOK) esl_fatal(msg);
  if ( esl_msafile_stockholm_Write(stkfp, msa1, eslMSAFILE_PFAM) != eslOK) esl_fatal(msg);
  fclose(a2mfp);
  fclose(stkfp);
  if ( esl_msafile_Open(&abc, a2mfile2, NULL, eslMSAFILE_A2M,       NULL, &afp1) != eslOK) esl_fatal(msg);
  if ( esl_msafile_Open(&abc, stkfile2, NULL, eslMSAFILE_STOCKHOLM, NULL, &afp2) != eslOK) esl_fatal(msg);
  if ( esl_msafile_a2m_Read      (afp1, &msa3)                                   != eslOK) esl_fatal(msg);
  if ( esl_msafile_stockholm_Read(afp2, &msa4)                                   != eslOK) esl_fatal(msg);
  if ( esl_msa_Compare(msa3, msa4)                                               != eslOK) esl_fatal(msg);

  remove(a2mfile2);
  remove(stkfile2);
  esl_msafile_Close(afp2);
  esl_msafile_Close(afp1);

  esl_msa_Destroy(msa1);
  esl_msa_Destroy(msa2);
  esl_msa_Destroy(msa3);  
  esl_msa_Destroy(msa4);
  esl_alphabet_Destroy(abc);
}
예제 #28
0
파일: esl_hmm.c 프로젝트: nathanweeks/easel
int
main(int argc, char **argv)
{
  ESL_GETOPTS  *go        = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  ESL_ALPHABET *abc       = esl_alphabet_Create(eslAMINO);
  ESL_SQ       *sq        = esl_sq_CreateDigital(abc);
  ESL_SQFILE   *sqfp      = NULL;
  ESL_HMM      *hmm       = create_test_hmm(abc);
  ESL_HMM      *bg        = create_null_hmm(abc);
  ESL_HMX      *hmx       = esl_hmx_Create(400, hmm->M);
  int           format    = eslSQFILE_UNKNOWN;
  char         *seqfile   = esl_opt_GetArg(go, 1);
  float         fwdsc, nullsc;
  int           status;

  status = esl_sqfile_OpenDigital(abc, seqfile, format, NULL, &sqfp);
  if      (status == eslENOTFOUND) esl_fatal("No such file.");
  else if (status == eslEFORMAT)   esl_fatal("Format unrecognized.");
  else if (status != eslOK)        esl_fatal("Open failed, code %d.", status);

  while ((status = esl_sqio_Read(sqfp, sq)) == eslOK)
    {   
      esl_hmx_GrowTo(hmx, sq->n, hmm->M);

      esl_hmm_Forward(sq->dsq, sq->n, hmm,  hmx, &fwdsc);
      esl_hmm_Forward(sq->dsq, sq->n, bg, hmx, &nullsc);

      printf("%-16s %5d  %11.4f %8.4f    %11.4f %8.4f    %11.4f %8.4f\n", sq->name, (int) sq->n,
	     fwdsc  * eslCONST_LOG2R, (fwdsc  * eslCONST_LOG2R) / sq->n,
	     nullsc * eslCONST_LOG2R, (nullsc * eslCONST_LOG2R) / sq->n,
	     (fwdsc - nullsc) * eslCONST_LOG2R, (fwdsc-nullsc) * eslCONST_LOG2R / sq->n);

      esl_sq_Reuse(sq);
    }
  if      (status == eslEFORMAT) esl_fatal("Parse failed (sequence file %s)\n%s\n",
					   sqfp->filename, sqfp->get_error(sqfp));     
  else if (status != eslEOF)     esl_fatal("Unexpected error %d reading sequence file %s",
					   status, sqfp->filename);
 
  
  esl_sqfile_Close(sqfp);
  esl_sq_Destroy(sq);
  esl_hmm_Destroy(hmm);
  esl_hmm_Destroy(bg);
  esl_hmx_Destroy(hmx);
  esl_alphabet_Destroy(abc);
  esl_getopts_Destroy(go);
  return 0;
}
예제 #29
0
static int
thread_loop(ESL_THREADS *obj, ESL_WORK_QUEUE *queue, P7_HMMFILE *hfp)
{
  int  status   = eslOK;
  int  sstatus  = eslOK;
  int  eofCount = 0;
  P7_OM_BLOCK   *block;
  ESL_ALPHABET  *abc = NULL;
  void          *newBlock;

  esl_workqueue_Reset(queue);
  esl_threads_WaitForStart(obj);

  status = esl_workqueue_ReaderUpdate(queue, NULL, &newBlock);
  if (status != eslOK) esl_fatal("Work queue reader failed");
      
  /* Main loop: */
  while (sstatus == eslOK)
  {
      block = (P7_OM_BLOCK *) newBlock;
      sstatus = p7_oprofile_ReadBlockMSV(hfp, &abc, block);

      if (sstatus == eslEOF)
      {
        if (eofCount < esl_threads_GetWorkerCount(obj)) sstatus = eslOK;
        ++eofCount;
      }
	  
      if (sstatus == eslOK)
      {
        status = esl_workqueue_ReaderUpdate(queue, block, &newBlock);
        if (status != eslOK) esl_fatal("Work queue reader failed");
      }
  }

  status = esl_workqueue_ReaderUpdate(queue, block, NULL);
  if (status != eslOK) esl_fatal("Work queue reader failed");

  if (sstatus == eslEOF)
  {
      /* wait for all the threads to complete */
      esl_threads_WaitForFinish(obj);
      esl_workqueue_Complete(queue);  
  }
  
  esl_alphabet_Destroy(abc);
  return sstatus;
}
예제 #30
0
void destroy_hmmer_wrapper() {
  int index;
  if(models != NULL) {
    for(index = 0;index < num_models;index++) {
      p7_oprofile_Destroy(models[index]);
      p7_profile_Destroy(gmodels[index]);
    }
    free(models);
    free(gmodels);
  }

  if(wrapper_results != NULL) {
    for(index = 0;index < num_models;index++) {
      destroy_result(wrapper_results[index]);
    }
    free(wrapper_results);
  }
  
  if(bg != NULL) {
    p7_bg_Destroy(bg);
  }

  if(hmm_fp != NULL) {
    p7_hmmfile_Close(hmm_fp);
  }

  if(oxf) {
    p7_omx_Destroy(oxf);
  }
  if(oxb) {
    p7_omx_Destroy(oxb);
  }

  if(gxf) {
    p7_gmx_Destroy(gxf);
  }
  if(gxb) {
    p7_gmx_Destroy(gxb);
  }

  if(abc) {
    esl_alphabet_Destroy(abc);
  }
  if(tr) {
    p7_trace_Destroy(tr);
  }
}