示例#1
0
static void 
utest_normalization(ESL_GETOPTS *go)
{
  char         *msg     = "seqmodel normalization utest failed";
  ESL_ALPHABET *abc     = esl_alphabet_Create(eslAMINO);
  char         *seq     = "ACDEFGHIKLMNPQRSTVWYBJZOUX";
  int           L       = strlen(seq);
  ESL_DSQ      *dsq     = NULL;
  float         popen   = 0.1;
  float         pextend = 0.4;
  P7_BUILDER   *bld     = NULL;
  P7_BG        *bg      = p7_bg_Create(abc);
  P7_HMM       *hmm     = NULL;
  char          errbuf[eslERRBUFSIZE];

  if ( esl_abc_CreateDsq(abc, seq, &dsq)                                                 != eslOK) esl_fatal(msg);
  if ( (bld = p7_builder_Create(NULL, abc))                                              == NULL)  esl_fatal(msg);
  if ( p7_builder_LoadScoreSystem(bld, "BLOSUM62", popen, pextend, bg)                   != eslOK) esl_fatal(msg); 
  if ( p7_Seqmodel(abc, dsq, L, "aatest", bld->Q, bg->f, bld->popen, bld->pextend, &hmm) != eslOK) esl_fatal(msg);

  if (p7_hmm_Validate(hmm, errbuf, 0.0001) != eslOK) esl_fatal("normalization utest failed\n%s\n", errbuf);

  free(dsq);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  p7_builder_Destroy(bld);
  esl_alphabet_Destroy(abc);
}
示例#2
0
/* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence,
 * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging.
 */
static void
utest_basic(ESL_GETOPTS *go)
{
  char           *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n";
  int             fmt  = eslMSAFILE_STOCKHOLM;
  char           *targ = "GAATTC";
  ESL_ALPHABET   *abc  = NULL;
  ESL_MSA        *msa  = NULL;
  P7_HMM         *hmm  = NULL;
  P7_PROFILE     *gm   = NULL;
  P7_BG          *bg   = NULL;
  P7_PRIOR       *pri  = NULL;	
  ESL_DSQ        *dsq  = NULL;
  P7_GMX         *gx   = NULL;
  P7_TRACE        *tr  = NULL;
  int             L    = strlen(targ);
  float           vsc, vsc2, fsc;

  if ((abc = esl_alphabet_Create(eslDNA))          == NULL)  esl_fatal("failed to create alphabet");
  if ((pri = p7_prior_CreateNucleic())             == NULL)  esl_fatal("failed to create prior");
  if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL)  esl_fatal("failed to create MSA");
  if (esl_msa_Digitize(abc, msa, NULL)             != eslOK) esl_fatal("failed to digitize MSA");
  if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model");
  if (p7_ParameterEstimation(hmm, pri)             != eslOK) esl_fatal("failed to parameterize GAATTC model");
  if (p7_hmm_SetConsensus(hmm, NULL)               != eslOK) esl_fatal("failed to make consensus");
  if ((bg = p7_bg_Create(abc))                     == NULL)  esl_fatal("failed to create DNA null model");
  if ((gm = p7_profile_Create(hmm->M, abc))        == NULL)  esl_fatal("failed to create GAATTC profile");
  if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile");
  if (p7_profile_Validate(gm, NULL, 0.0001)        != eslOK) esl_fatal("whoops, profile is bad!");
  if (esl_abc_CreateDsq(abc, targ, &dsq)           != eslOK) esl_fatal("failed to create GAATTC digital sequence");
  if ((gx = p7_gmx_Create(gm->M, L))               == NULL)  esl_fatal("failed to create DP matrix");
  if ((tr = p7_trace_Create())                     == NULL)  esl_fatal("trace creation failed");

  p7_GViterbi   (dsq, L, gm, gx, &vsc);
  if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc);
  if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);

  p7_GTrace     (dsq, L, gm, gx, tr);
  p7_trace_Score(tr, dsq, gm, &vsc2);
  if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq);
  
  if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK)  esl_fatal("trace score and Viterbi score don't agree.");

  p7_GForward   (dsq, L, gm, gx, &fsc);
  if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc);
  if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);

  p7_trace_Destroy(tr);
  p7_gmx_Destroy(gx);
  free(dsq);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_msa_Destroy(msa);
  p7_prior_Destroy(pri);
  esl_alphabet_Destroy(abc);
  return;
}
int 
main(int argc, char **argv)
{
  ESL_GETOPTS   *go = NULL;
  ESL_RANDOMNESS *r = NULL;
  char  **as = NULL;		/* aligned character seqs (random, iid) */
  int     N,L;			/* # of seqs, and their aligned lengths */
  int seed;
  int i,j;
  int status;
  double p[4];			/* ACGT probabilities */
#ifdef eslAUGMENT_ALPHABET
  ESL_DSQ      **ax = NULL;		/* digitized alignment                  */
  ESL_ALPHABET *abc = NULL;
#endif

  /* Process command line
   */
  go = esl_getopts_Create(options);
  esl_opt_ProcessCmdline(go, argc, argv);
  esl_opt_VerifyConfig(go);
  if (esl_opt_GetBoolean(go, "-h") == TRUE) {
    puts(usage); 
    puts("\n  where options are:");
    esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2=indentation; 80=width */
    return 0;
  }
  L    = esl_opt_GetInteger(go, "-L");
  N    = esl_opt_GetInteger(go, "-N");
  seed = esl_opt_GetInteger(go, "--seed");
  if (esl_opt_ArgNumber(go) != 0) {
    puts("Incorrect number of command line arguments.");
    puts(usage);
    return 1;
  }
  esl_getopts_Destroy(go);

  /* Create a random DNA alignment;
   * force it to obey the conventions of the unit tests:
   *   0,1 are identical
   *   0,2 are completely dissimilar
   */
  r   = esl_randomness_Create(seed);
  for (i = 0; i < 4; i++) p[i] = 0.25;
  ESL_ALLOC(as, sizeof(char *) * N);
  for (i = 0; i < N; i++) 
    ESL_ALLOC(as[i], sizeof(char) * (L+1));
  esl_rsq_IID(r, "ACGT", p, 4, L, as[0]);
  strcpy(as[1], as[0]);
  esl_rsq_IID(r, "ACGT", p, 4, L, as[2]);
  for (j = 0; j < L; j++)
    while (as[2][j] == as[0][j])
      as[2][j] = "ACGT"[esl_rnd_Roll(r, 4)];
  for (i = 3; i < N; i++)
    esl_rsq_IID(r, "ACGT", p, 4, L, as[i]);

#ifdef eslAUGMENT_ALPHABET
  abc = esl_alphabet_Create(eslDNA);
  ESL_ALLOC(ax, sizeof(ESL_DSQ *) * N);
  for (i = 0; i < N; i++) 
    esl_abc_CreateDsq(abc, as[i], &(ax[i]));
#endif /*eslAUGMENT_ALPHABET*/


  /* Unit tests
   */
  if (utest_CPairId(as, N)               != eslOK) return eslFAIL;
  if (utest_CJukesCantor(4, as, N)       != eslOK) return eslFAIL;

#ifdef eslAUGMENT_ALPHABET
  if (utest_XPairId(abc, as, ax, N)      != eslOK) return eslFAIL;
  if (utest_XJukesCantor(abc, as, ax, N) != eslOK) return eslFAIL;
#endif /*eslAUGMENT_ALPHABET*/

#ifdef eslAUGMENT_DMATRIX
  if (utest_CPairIdMx(as, N)             != eslOK) return eslFAIL;
  if (utest_CDiffMx(as, N)               != eslOK) return eslFAIL;
  if (utest_CJukesCantorMx(4, as, N)     != eslOK) return eslFAIL;
#endif /* eslAUGMENT_DMATRIX*/

#if defined (eslAUGMENT_ALPHABET) && defined (eslAUGMENT_DMATRIX)
  if (utest_XPairIdMx(abc, as, ax, N)       != eslOK) return eslFAIL;
  if (utest_XDiffMx(abc, as, ax, N)         != eslOK) return eslFAIL;
  if (utest_XJukesCantorMx(abc, as, ax, N)  != eslOK) return eslFAIL;
#endif

  esl_randomness_Destroy(r);
  esl_Free2D((void **) as, N);
#ifdef eslAUGMENT_ALPHABET
  esl_alphabet_Destroy(abc);
  esl_Free2D((void **) ax, N);
#endif
  return eslOK;

 ERROR:
  return eslFAIL;
}