/* The "basic" utest is a minimal driver for making a small DNA profile and a small DNA sequence, * then running Viterbi and Forward. It's useful for dumping DP matrices and profiles for debugging. */ static void utest_basic(ESL_GETOPTS *go) { char *query= "# STOCKHOLM 1.0\n\nseq1 GAATTC\nseq2 GAATTC\n//\n"; int fmt = eslMSAFILE_STOCKHOLM; char *targ = "GAATTC"; ESL_ALPHABET *abc = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_BG *bg = NULL; P7_PRIOR *pri = NULL; ESL_DSQ *dsq = NULL; P7_GMX *gx = NULL; P7_TRACE *tr = NULL; int L = strlen(targ); float vsc, vsc2, fsc; if ((abc = esl_alphabet_Create(eslDNA)) == NULL) esl_fatal("failed to create alphabet"); if ((pri = p7_prior_CreateNucleic()) == NULL) esl_fatal("failed to create prior"); if ((msa = esl_msa_CreateFromString(query, fmt)) == NULL) esl_fatal("failed to create MSA"); if (esl_msa_Digitize(abc, msa, NULL) != eslOK) esl_fatal("failed to digitize MSA"); if (p7_Fastmodelmaker(msa, 0.5, NULL, &hmm, NULL) != eslOK) esl_fatal("failed to create GAATTC model"); if (p7_ParameterEstimation(hmm, pri) != eslOK) esl_fatal("failed to parameterize GAATTC model"); if (p7_hmm_SetConsensus(hmm, NULL) != eslOK) esl_fatal("failed to make consensus"); if ((bg = p7_bg_Create(abc)) == NULL) esl_fatal("failed to create DNA null model"); if ((gm = p7_profile_Create(hmm->M, abc)) == NULL) esl_fatal("failed to create GAATTC profile"); if (p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL)!= eslOK) esl_fatal("failed to config profile"); if (p7_profile_Validate(gm, NULL, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!"); if (esl_abc_CreateDsq(abc, targ, &dsq) != eslOK) esl_fatal("failed to create GAATTC digital sequence"); if ((gx = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("failed to create DP matrix"); if ((tr = p7_trace_Create()) == NULL) esl_fatal("trace creation failed"); p7_GViterbi (dsq, L, gm, gx, &vsc); if (esl_opt_GetBoolean(go, "-v")) printf("Viterbi score: %.4f\n", vsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_GTrace (dsq, L, gm, gx, tr); p7_trace_Score(tr, dsq, gm, &vsc2); if (esl_opt_GetBoolean(go, "-v")) p7_trace_Dump(stdout, tr, gm, dsq); if (esl_FCompare(vsc, vsc2, 1e-5) != eslOK) esl_fatal("trace score and Viterbi score don't agree."); p7_GForward (dsq, L, gm, gx, &fsc); if (esl_opt_GetBoolean(go, "-v")) printf("Forward score: %.4f\n", fsc); if (esl_opt_GetBoolean(go, "-v")) p7_gmx_Dump(stdout, gx, p7_DEFAULT); p7_trace_Destroy(tr); p7_gmx_Destroy(gx); free(dsq); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_msa_Destroy(msa); p7_prior_Destroy(pri); esl_alphabet_Destroy(abc); return; }
/* Evaluate fx = rel entropy - etarget, which we want to be = 0, * for effective sequence number <x>. */ static int eweight_target_f(double Neff, void *params, double *ret_fx) { struct ew_param_s *p = (struct ew_param_s *) params; p7_hmm_CopyParameters(p->hmm, p->h2); p7_hmm_Scale(p->h2, Neff / (double) p->h2->nseq); p7_ParameterEstimation(p->h2, p->pri); *ret_fx = p7_MeanMatchRelativeEntropy(p->h2, p->bg) - p->etarget; return eslOK; }
/* parameterize() * Converts counts to probability parameters. */ static int parameterize(P7_BUILDER *bld, P7_HMM *hmm) { int status; if ((status = p7_ParameterEstimation(hmm, bld->prior)) != eslOK) ESL_XFAIL(status, bld->errbuf, "parameter estimation failed"); return eslOK; ERROR: return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *msafile = esl_opt_GetArg(go, 1); int fmt = eslMSAFILE_UNKNOWN; ESL_ALPHABET *abc = NULL; ESL_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_PRIOR *prior = NULL; P7_TRACE **trarr = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; ESL_MSA *postmsa = NULL; int i; int status; /* Standard idioms for opening and reading a digital MSA. (See esl_msafile.c example). */ if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA); else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); if ((status = esl_msafile_Open(&abc, msafile, NULL, fmt, NULL, &afp)) != eslOK) esl_msafile_OpenFailure(afp, status); bg = p7_bg_Create(abc); switch (abc->type) { case eslAMINO: prior = p7_prior_CreateAmino(); break; case eslDNA: prior = p7_prior_CreateNucleic(); break; case eslRNA: prior = p7_prior_CreateNucleic(); break; default: prior = p7_prior_CreateLaplace(abc); break; } if (prior == NULL) esl_fatal("Failed to initialize prior"); while ((status = esl_msafile_Read(afp, &msa)) != eslEOF) { if (status != eslOK) esl_msafile_ReadFailure(afp, status); /* The modelmakers collect counts in an HMM structure */ status = p7_Handmodelmaker(msa, NULL, &hmm, &trarr); if (status == eslENORESULT) esl_fatal("no consensus columns in alignment %s\n", msa->name); else if (status != eslOK) esl_fatal("failed to build HMM from alignment %s\n", msa->name); printf("COUNTS:\n"); p7_hmm_Dump(stdout, hmm); /* These counts, in combination with a prior, are converted to probability parameters */ status = p7_ParameterEstimation(hmm, prior); if (status != eslOK) esl_fatal("failed to parameterize HMM for %s", msa->name); printf("PROBABILITIES:\n"); p7_hmm_Dump(stdout, hmm); /* Just so we can dump a more informatively annotated trace - build a profile */ gm = p7_profile_Create(hmm->M, abc); p7_profile_Config (gm, hmm, bg); p7_profile_SetLength(gm, 400); /* Dump the individual traces */ for (i = 0; i < msa->nseq; i++) { printf("Trace %d: %s\n", i+1, msa->sqname[i]); p7_trace_DumpAnnotated(stdout, trarr[i], gm, msa->ax[i]); } /* Create an MSA from the individual traces */ status = p7_tracealign_MSA(msa, trarr, hmm->M, p7_DEFAULT, &postmsa); if (status != eslOK) esl_fatal("failed to create new MSA from traces\n"); esl_msafile_Write(stdout, postmsa, eslMSAFILE_PFAM); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); p7_trace_DestroyArray(trarr, msa->nseq); esl_msa_Destroy(postmsa); esl_msa_Destroy(msa); } esl_msafile_Close(afp); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }