Example #1
0
/* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence,
 * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging.
 */
static void
utest_basic(ESL_GETOPTS *go)
{
  char           *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n";
  int             fmt  = eslMSAFILE_STOCKHOLM;
  char           *targ = "GAATTC";
  ESL_ALPHABET   *abc  = NULL;
  ESL_MSA        *msa  = NULL;
  P7_HMM         *hmm  = NULL;
  P7_PROFILE     *gm   = NULL;
  P7_BG          *bg   = NULL;
  P7_PRIOR       *pri  = NULL;	
  ESL_DSQ        *dsq  = NULL;
  P7_GMX         *gx   = NULL;
  P7_TRACE        *tr  = NULL;
  int             L    = strlen(targ);
  float           vsc, vsc2, fsc;

  if ((abc = esl_alphabet_Create(eslDNA))          == NULL)  esl_fatal("failed to create alphabet");
  if ((pri = p7_prior_CreateNucleic())             == NULL)  esl_fatal("failed to create prior");
  if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL)  esl_fatal("failed to create MSA");
  if (esl_msa_Digitize(abc, msa, NULL)             != eslOK) esl_fatal("failed to digitize MSA");
  if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model");
  if (p7_ParameterEstimation(hmm, pri)             != eslOK) esl_fatal("failed to parameterize GAATTC model");
  if (p7_hmm_SetConsensus(hmm, NULL)               != eslOK) esl_fatal("failed to make consensus");
  if ((bg = p7_bg_Create(abc))                     == NULL)  esl_fatal("failed to create DNA null model");
  if ((gm = p7_profile_Create(hmm->M, abc))        == NULL)  esl_fatal("failed to create GAATTC profile");
  if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile");
  if (p7_profile_Validate(gm, NULL, 0.0001)        != eslOK) esl_fatal("whoops, profile is bad!");
  if (esl_abc_CreateDsq(abc, targ, &dsq)           != eslOK) esl_fatal("failed to create GAATTC digital sequence");
  if ((gx = p7_gmx_Create(gm->M, L))               == NULL)  esl_fatal("failed to create DP matrix");
  if ((tr = p7_trace_Create())                     == NULL)  esl_fatal("trace creation failed");

  p7_GViterbi   (dsq, L, gm, gx, &vsc);
  if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc);
  if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);

  p7_GTrace     (dsq, L, gm, gx, tr);
  p7_trace_Score(tr, dsq, gm, &vsc2);
  if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq);
  
  if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK)  esl_fatal("trace score and Viterbi score don't agree.");

  p7_GForward   (dsq, L, gm, gx, &fsc);
  if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc);
  if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT);

  p7_trace_Destroy(tr);
  p7_gmx_Destroy(gx);
  free(dsq);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_msa_Destroy(msa);
  p7_prior_Destroy(pri);
  esl_alphabet_Destroy(abc);
  return;
}
Example #2
0
int
main(int argc, char **argv)
{
  ESL_ALPHABET *aa_abc = NULL,
               *nt_abc = NULL;
  ESL_MSA      *msa1   = NULL,
               *msa2   = NULL, 
               *msa3   = NULL,
               *msa4   = NULL,
               *msa5   = NULL;
  double uniform[5] = { 1.0, 1.0, 1.0, 1.0, 1.0 };
  double wgt2[5]    = { 0.833333, 0.833333, 0.833333, 0.833333, 1.66667 }; /* GSC, PB give same answer */
  double gsc3[4]    = { 1.125000, 0.875000, 0.875000, 1.125000 };
  double pb3[4]     = { 1.066667, 1.066667, 0.800000, 1.066667 };
  double blosum3[4] = { 1.333333, 0.666667, 0.666667, 1.333333 };
  double gsc4[4]    = { 0.760870, 0.760870, 1.086957, 1.391304 };
  double pb4[4]     = { 0.800000, 0.800000, 1.000000, 1.400000 };
  double blosum4[4] = { 0.666667, 0.666667, 1.333333, 1.333333 };
  
  if ((aa_abc = esl_alphabet_Create(eslAMINO)) == NULL)  esl_fatal("failed to create amino alphabet");
  if ((nt_abc = esl_alphabet_Create(eslDNA))   == NULL)  esl_fatal("failed to create DNA alphabet");

  /* msa1: all sequences identical. Any weighting method should assign uniform weights.
   * msa2: "contrived" example of [Henikoff94b]. "Correct" solution is 1==2, 3==4, and 5==2x other weights.
   * msa3: the "nitrogenase segments" example of [Henikoff94b].
   * msa4: alignment that makes the same distances as Figure 4 from [Gerstein94]
   * msa5: gap pathology. no information here, so weighting methods should resort to uniform weights.
   */
  if ((msa1 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nseq1 AAAAA\nseq2 AAAAA\nseq3 AAAAA\nseq4 AAAAA\nseq5 AAAAA\n//\n", 
				       eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 1 creation failed");
  if ((msa2 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nseq1 AAAAA\nseq2 AAAAA\nseq3 CCCCC\nseq4 CCCCC\nseq5 TTTTT\n//\n",
				       eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 2 creation failed");
  if ((msa3 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nNIFE_CLOPA GYVGS\nNIFD_AZOVI GFDGF\nNIFD_BRAJA GYDGF\nNIFK_ANASP GYQGG\n//\n",
				       eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 3 creation failed");
  if ((msa4 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nA  AAAAAAAAAA\nB  TTAAAAAAAA\nC  ATAAAACCCC\nD  GGGAAGGGGG\n//\n",
				       eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 4 creation failed");
  if ((msa5 = esl_msa_CreateFromString("# STOCKHOLM 1.0\n\nA  A----\nB  -C---\nC  --G--\nD  ---T-\nE  ----T\n//\n",
				       eslMSAFILE_STOCKHOLM)) == NULL) esl_fatal("msa 5 creation failed");

  utest_GSC(aa_abc, msa1, uniform);
  utest_GSC(nt_abc, msa1, uniform);
  utest_GSC(aa_abc, msa2, wgt2);
  utest_GSC(nt_abc, msa2, wgt2);
  utest_GSC(aa_abc, msa3, gsc3);
  /* no nt test on msa3: it's protein-only */
  utest_GSC(aa_abc, msa4, gsc4);
  utest_GSC(nt_abc, msa4, gsc4);
  utest_GSC(aa_abc, msa5, uniform);
  utest_GSC(aa_abc, msa5, uniform);

  utest_PB(aa_abc, msa1, uniform);
  utest_PB(nt_abc, msa1, uniform);
  utest_PB(aa_abc, msa2, wgt2);
  utest_PB(nt_abc, msa2, wgt2);
  utest_PB(aa_abc, msa3, pb3);
  /* no nt test on msa3: it's protein-only */
  utest_PB(aa_abc, msa4, pb4);
  utest_PB(nt_abc, msa4, pb4);
  utest_PB(aa_abc, msa5, uniform);
  utest_PB(nt_abc, msa5, uniform);

  utest_BLOSUM(aa_abc, msa1, 0.62, uniform);
  utest_BLOSUM(nt_abc, msa1, 0.62, uniform);
  utest_BLOSUM(aa_abc, msa2, 0.62, wgt2);
  utest_BLOSUM(nt_abc, msa2, 0.62, wgt2);
  utest_BLOSUM(aa_abc, msa3, 0.62, blosum3);
  /* no nt test on msa3: it's protein-only */
  utest_BLOSUM(aa_abc, msa4, 0.62, blosum4);
  utest_BLOSUM(nt_abc, msa4, 0.62, blosum4);
  utest_BLOSUM(aa_abc, msa5, 0.62, uniform);
  utest_BLOSUM(nt_abc, msa5, 0.62, uniform);

  /* BLOSUM weights have the peculiar property of going flat at maxid=0.0 (everyone
   * clusters) or maxid=1.0 (nobody clusters).
   */
  utest_BLOSUM(aa_abc, msa4, 0.0,  uniform);
  utest_BLOSUM(aa_abc, msa4, 1.0,  uniform);

  esl_msa_Destroy(msa1);
  esl_msa_Destroy(msa2);
  esl_msa_Destroy(msa3);
  esl_msa_Destroy(msa4);
  esl_msa_Destroy(msa5);
  esl_alphabet_Destroy(aa_abc);
  esl_alphabet_Destroy(nt_abc);
  exit(0);
}