/* tests: * 1. each sampled trace must validate. * 2. each trace must be <= viterbi trace score * 3. in a large # of traces, one is "equal" to the viterbi trace score. * (this of course is stochastic; but it's true for the particular * choice of RNG seed used in tests here.) */ static void utest_stotrace(ESL_GETOPTS *go, ESL_RANDOMNESS *rng, ESL_ALPHABET *abc, P7_PROFILE *gm, P7_OPROFILE *om, ESL_DSQ *dsq, int L, int ntrace) { P7_GMX *gx = NULL; P7_OMX *ox = NULL; P7_TRACE *tr = NULL; char errbuf[eslERRBUFSIZE]; int idx; float maxsc = -eslINFINITY; float vsc, sc; if ((gx = p7_gmx_Create(gm->M, L)) == NULL) esl_fatal("generic DP matrix creation failed"); if ((ox = p7_omx_Create(gm->M, L, L)) == NULL) esl_fatal("optimized DP matrix create failed"); if ((tr = p7_trace_Create()) == NULL) esl_fatal("trace creation failed"); if (p7_GViterbi(dsq, L, gm, gx, &vsc) != eslOK) esl_fatal("viterbi failed"); if (p7_Forward (dsq, L, om, ox, NULL) != eslOK) esl_fatal("forward failed"); for (idx = 0; idx < ntrace; idx++) { if (p7_StochasticTrace(rng, dsq, L, om, ox, tr) != eslOK) esl_fatal("stochastic trace failed"); if (p7_trace_Validate(tr, abc, dsq, errbuf) != eslOK) esl_fatal("trace invalid:\n%s", errbuf); if (p7_trace_Score(tr, dsq, gm, &sc) != eslOK) esl_fatal("trace scoring failed"); maxsc = ESL_MAX(sc, maxsc); if (sc > vsc) esl_fatal("sampled trace has score > optimal Viterbi path; not possible"); p7_trace_Reuse(tr); } if (esl_FCompare(maxsc, vsc, 0.1) != eslOK) esl_fatal("stochastic trace failed to sample the Viterbi path"); p7_trace_Destroy(tr); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); }
/* * compare to GForward() scores. */ static void utest_fwdback(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N) { char *msg = "forward/backward unit test failed"; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); P7_OMX *fwd = p7_omx_Create(M, 0, L); P7_OMX *bck = p7_omx_Create(M, 0, L); P7_OMX *oxf = p7_omx_Create(M, L, L); P7_OMX *oxb = p7_omx_Create(M, L, L); P7_GMX *gx = p7_gmx_Create(M, L); float tolerance; float fsc1, fsc2; float bsc1, bsc2; float generic_sc; p7_FLogsumInit(); if (p7_FLogsumError(-0.4, -0.5) > 0.0001) tolerance = 1.0; /* weaker test against GForward() */ else tolerance = 0.0001; /* stronger test: FLogsum() is in slow exact mode. */ p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om); while (N--) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, oxf, &fsc1); p7_Backward (dsq, L, om, oxf, oxb, &bsc1); p7_ForwardParser (dsq, L, om, fwd, &fsc2); p7_BackwardParser(dsq, L, om, fwd, bck, &bsc2); p7_GForward (dsq, L, gm, gx, &generic_sc); /* Forward and Backward scores should agree with high tolerance */ if (fabs(fsc1-bsc1) > 0.0001) esl_fatal(msg); if (fabs(fsc2-bsc2) > 0.0001) esl_fatal(msg); if (fabs(fsc1-fsc2) > 0.0001) esl_fatal(msg); /* GForward scores should approximate Forward scores, * with tolerance that depends on how logsum.c was compiled */ if (fabs(fsc1-generic_sc) > tolerance) esl_fatal(msg); } free(dsq); p7_hmm_Destroy(hmm); p7_omx_Destroy(oxb); p7_omx_Destroy(oxf); p7_omx_Destroy(bck); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); }
/* compare results to GDecoding(). */ static void utest_null2_expectation(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N, float tolerance) { char *msg = "decoding unit test failed"; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); P7_OMX *fwd = p7_omx_Create(M, L, L); P7_OMX *bck = p7_omx_Create(M, L, L); P7_OMX *pp = p7_omx_Create(M, L, L); P7_GMX *gxf = p7_gmx_Create(M, L); P7_GMX *gxb = p7_gmx_Create(M, L); P7_GMX *gpp = p7_gmx_Create(M, L); float *on2 = malloc(sizeof(float) * abc->Kp); float *gn2 = malloc(sizeof(float) * abc->Kp); float fsc1, fsc2; float bsc1, bsc2; if (!gn2 || !on2) esl_fatal(msg); if (p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om) != eslOK) esl_fatal(msg); while (N--) { if (esl_rsq_xfIID(r, bg->f, abc->K, L, dsq) != eslOK) esl_fatal(msg); if (p7_Forward (dsq, L, om, fwd, &fsc1) != eslOK) esl_fatal(msg); if (p7_Backward (dsq, L, om, fwd, bck, &bsc1) != eslOK) esl_fatal(msg); if (p7_Decoding(om, fwd, bck, pp) != eslOK) esl_fatal(msg); if (p7_Null2_ByExpectation(om, pp, on2) != eslOK) esl_fatal(msg); if (p7_GForward (dsq, L, gm, gxf, &fsc2) != eslOK) esl_fatal(msg); if (p7_GBackward(dsq, L, gm, gxb, &bsc2) != eslOK) esl_fatal(msg); if (p7_GDecoding(gm, gxf, gxb, gpp) != eslOK) esl_fatal(msg); if (p7_GNull2_ByExpectation(gm, gpp, gn2) != eslOK) esl_fatal(msg); if (esl_vec_FCompare(gn2, on2, abc->Kp, tolerance) != eslOK) esl_fatal(msg); } p7_gmx_Destroy(gpp); p7_gmx_Destroy(gxf); p7_gmx_Destroy(gxb); p7_omx_Destroy(pp); p7_omx_Destroy(fwd); p7_omx_Destroy(bck); free(on2); free(gn2); free(dsq); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); }
/* compare results to GDecoding(). */ static void utest_decoding(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N, float tolerance) { char *msg = "decoding unit test failed"; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); P7_OMX *fwd = p7_omx_Create(M, L, L); P7_OMX *bck = p7_omx_Create(M, L, L); P7_OMX *pp = p7_omx_Create(M, L, L); P7_GMX *gxf = p7_gmx_Create(M, L); P7_GMX *gxb = p7_gmx_Create(M, L); P7_GMX *gxp1 = p7_gmx_Create(M, L); P7_GMX *gxp2 = p7_gmx_Create(M, L); float fsc1, fsc2; float bsc1, bsc2; if (p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om) != eslOK) esl_fatal(msg); while (N--) { if (esl_rsq_xfIID(r, bg->f, abc->K, L, dsq) != eslOK) esl_fatal(msg); if (p7_Forward (dsq, L, om, fwd, &fsc1) != eslOK) esl_fatal(msg); if (p7_Backward (dsq, L, om, fwd, bck, &bsc1) != eslOK) esl_fatal(msg); if (p7_Decoding(om, fwd, bck, pp) != eslOK) esl_fatal(msg); if (p7_omx_FDeconvert(pp, gxp1) != eslOK) esl_fatal(msg); if (p7_GForward (dsq, L, gm, gxf, &fsc2) != eslOK) esl_fatal(msg); if (p7_GBackward(dsq, L, gm, gxb, &bsc2) != eslOK) esl_fatal(msg); if (p7_GDecoding(gm, gxf, gxb, gxp2) != eslOK) esl_fatal(msg); // p7_gmx_Dump(stdout, gxp1, p7_DEFAULT); // p7_gmx_Dump(stdout, gxp2, p7_DEFAULT); if (p7_gmx_Compare(gxp1, gxp2, tolerance) != eslOK) esl_fatal(msg); } p7_gmx_Destroy(gxp1); p7_gmx_Destroy(gxp2); p7_gmx_Destroy(gxf); p7_gmx_Destroy(gxb); p7_omx_Destroy(fwd); p7_omx_Destroy(bck); p7_omx_Destroy(pp); free(dsq); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; P7_OMX *pp = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc; double Mcs; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); fwd = p7_omx_Create(gm->M, L, L); bck = p7_omx_Create(gm->M, L, L); pp = p7_omx_Create(gm->M, L, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, fwd, &fsc); p7_Backward(dsq, L, om, fwd, bck, &bsc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_Decoding(om, fwd, bck, pp); esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(fwd); p7_omx_Destroy(bck); p7_omx_Destroy(pp); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); float null2[p7_MAXCODE]; int i,j,d,pos; int nsamples = 200; float fsc, bsc; double Mcs; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); ox1 = p7_omx_Create(gm->M, L, L); ox2 = p7_omx_Create(gm->M, L, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, ox1, &fsc); if (esl_opt_GetBoolean(go, "-t")) { P7_TRACE *tr = p7_trace_Create(); float *n2sc = malloc(sizeof(float) * (L+1)); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { /* This is approximately what p7_domaindef.c::region_trace_ensemble() is doing: */ for (j = 0; j < nsamples; j++) { p7_StochasticTrace(r, dsq, L, om, ox1, tr); p7_trace_Index(tr); pos = 1; for (d = 0; d < tr->ndom; d++) { p7_Null2_ByTrace(om, tr, tr->tfrom[d], tr->tto[d], ox2, null2); for (; pos <= tr->sqfrom[d]; pos++) n2sc[pos] += 1.0; for (; pos < tr->sqto[d]; pos++) n2sc[pos] += null2[dsq[pos]]; } for (; pos <= L; pos++) n2sc[pos] += 1.0; p7_trace_Reuse(tr); } for (pos = 1; pos <= L; pos++) n2sc[pos] = logf(n2sc[pos] / nsamples); } esl_stopwatch_Stop(w); free(n2sc); p7_trace_Destroy(tr); } else { p7_Backward(dsq, L, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_Null2_ByExpectation(om, ox2, null2); esl_stopwatch_Stop(w); } Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; P7_GMX *gx = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; P7_TRACE *tr = NULL; int format = eslSQFILE_UNKNOWN; char errbuf[eslERRBUFSIZE]; float fsc, bsc; float accscore; int status; /* Read in one HMM */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_OpenDigital(abc, seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); esl_sqfile_Close(sqfp); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); /* multihit local: H3 default */ om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); /* Allocations */ ox1 = p7_omx_Create(gm->M, sq->n, sq->n); ox2 = p7_omx_Create(gm->M, sq->n, sq->n); gx = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_CreateWithPP(); p7_FLogsumInit(); /* Run Forward, Backward; do OA fill and trace */ p7_Forward (sq->dsq, sq->n, om, ox1, &fsc); p7_Backward(sq->dsq, sq->n, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); /* <gx2> is now the posterior decoding matrix */ p7_OptimalAccuracy(om, ox2, ox1, &accscore); /* <gx1> is now the OA matrix */ p7_OATrace(om, ox2, ox1, tr); if (esl_opt_GetBoolean(go, "-d")) { p7_omx_FDeconvert(ox2, gx); p7_gmx_Dump(stdout, gx, p7_DEFAULT); } if (esl_opt_GetBoolean(go, "-m")) { p7_omx_FDeconvert(ox1, gx); p7_gmx_Dump(stdout, gx, p7_DEFAULT); } p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf); printf("fwd = %.4f nats\n", fsc); printf("bck = %.4f nats\n", bsc); printf("acc = %.4f (%.2f%%)\n", accscore, accscore * 100. / (float) sq->n); /* Cleanup */ esl_sq_Destroy(sq); p7_trace_Destroy(tr); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
/* * 1. Compare accscore to GOptimalAccuracy(). * 2. Compare trace to GOATrace(). * * Note: This test is subject to some expected noise and can fail * for entirely innocent reasons. Generic Forward/Backward calculations with * p7_GForward(), p7_GBackward() use coarse-grain table lookups to sum * log probabilities, and sufficient roundoff error can accumulate to * change the optimal accuracy traceback, causing this test to fail. * So, if optacc_utest fails, before you go looking for bugs, first * go to ../logsum.c, change the #ifdef to activate the slow/accurate * version, recompile and rerun optacc_utest. If the failure goes away, * you can ignore it. - SRE, Wed Dec 17 09:45:31 2008 */ static void utest_optacc(ESL_GETOPTS *go, ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N) { char *msg = "optimal accuracy unit test failed"; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; ESL_SQ *sq = esl_sq_CreateDigital(abc); P7_OMX *ox1 = p7_omx_Create(M, L, L); P7_OMX *ox2 = p7_omx_Create(M, L, L); P7_GMX *gx1 = p7_gmx_Create(M, L); P7_GMX *gx2 = p7_gmx_Create(M, L); P7_TRACE *tr = p7_trace_CreateWithPP(); P7_TRACE *trg = p7_trace_CreateWithPP(); P7_TRACE *tro = p7_trace_CreateWithPP(); float accscore_o; float fsc, bsc, accscore; float fsc_g, bsc_g, accscore_g, accscore_g2; float pptol = 0.01; float sctol = 0.001; float gtol; p7_FLogsumInit(); gtol = ( (p7_FLogsumError(-0.4, -0.5) > 0.0001) ? 0.1 : 0.001); if (p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om)!= eslOK) esl_fatal(msg); while (N--) { if (p7_ProfileEmit(r, hmm, gm, bg, sq, tro) != eslOK) esl_fatal(msg); if (p7_omx_GrowTo(ox1, M, sq->n, sq->n) != eslOK) esl_fatal(msg); if (p7_omx_GrowTo(ox2, M, sq->n, sq->n) != eslOK) esl_fatal(msg); if (p7_gmx_GrowTo(gx1, M, sq->n) != eslOK) esl_fatal(msg); if (p7_gmx_GrowTo(gx2, M, sq->n) != eslOK) esl_fatal(msg); if (p7_Forward (sq->dsq, sq->n, om, ox1, &fsc) != eslOK) esl_fatal(msg); if (p7_Backward(sq->dsq, sq->n, om, ox1, ox2, &bsc) != eslOK) esl_fatal(msg); if (p7_Decoding(om, ox1, ox2, ox2) != eslOK) esl_fatal(msg); if (p7_OptimalAccuracy(om, ox2, ox1, &accscore) != eslOK) esl_fatal(msg); #if 0 p7_omx_FDeconvert(ox1, gx1); p7_gmx_Dump(stdout, gx1, p7_DEFAULT); p7_omx_FDeconvert(ox2, gx1); p7_gmx_Dump(stdout, gx1, p7_DEFAULT); #endif if (p7_OATrace(om, ox2, ox1, tr) != eslOK) esl_fatal(msg); if (p7_GForward (sq->dsq, sq->n, gm, gx1, &fsc_g) != eslOK) esl_fatal(msg); if (p7_GBackward(sq->dsq, sq->n, gm, gx2, &bsc_g) != eslOK) esl_fatal(msg); #if 0 p7_gmx_Dump(stdout, gx1, p7_DEFAULT); /* fwd */ p7_gmx_Dump(stdout, gx2, p7_DEFAULT); /* bck */ #endif if (p7_GDecoding(gm, gx1, gx2, gx2) != eslOK) esl_fatal(msg); if (p7_GOptimalAccuracy(gm, gx2, gx1, &accscore_g) != eslOK) esl_fatal(msg); #if 0 p7_gmx_Dump(stdout, gx1, p7_DEFAULT); /* oa */ p7_gmx_Dump(stdout, gx2, p7_DEFAULT); /* pp */ #endif if (p7_GOATrace(gm, gx2, gx1, trg) != eslOK) esl_fatal(msg); if (p7_trace_SetPP(tro, gx2) != eslOK) esl_fatal(msg); if (esl_opt_GetBoolean(go, "--traces")) { p7_trace_Dump(stdout, tro, gm, sq->dsq); p7_trace_Dump(stdout, tr, gm, sq->dsq); p7_trace_Dump(stdout, trg, gm, sq->dsq); } if (p7_trace_Validate(tr, abc, sq->dsq, NULL) != eslOK) esl_fatal(msg); if (p7_trace_Validate(trg, abc, sq->dsq, NULL) != eslOK) esl_fatal(msg); if (p7_trace_Compare(tr, trg, pptol) != eslOK) esl_fatal(msg); accscore_o = p7_trace_GetExpectedAccuracy(tro); /* according to gx2; see p7_trace_SetPP() call above */ accscore_g2 = p7_trace_GetExpectedAccuracy(trg); #if 0 printf("%f %f %f %f\n", accscore, accscore_g, accscore_g2, accscore_o); #endif if (esl_FCompare(fsc, bsc, sctol) != eslOK) esl_fatal(msg); if (esl_FCompare(fsc_g, bsc_g, gtol) != eslOK) esl_fatal(msg); if (esl_FCompare(fsc, fsc_g, gtol) != eslOK) esl_fatal(msg); if (esl_FCompare(accscore, accscore_g, gtol) != eslOK) esl_fatal(msg); if (esl_FCompare(accscore_g, accscore_g2, gtol) != eslOK) esl_fatal(msg); if (accscore_g2 < accscore_o) esl_fatal(msg); /* the above deserves explanation: * - accscore_o is the accuracy of the originally emitted trace, according * to the generic posterior decoding matrix <gx2>. This is a lower bound * on the expected # of accurately aligned residues found by a DP * optimization. * - accscore is the accuracy found by the fast (vector) code DP implementation. * - accscore_g is the accuracy found by the generic DP implementation. * accscore and accscore_g should be nearly identical, * within tolerance of roundoff error accumulation and * the imprecision of Logsum() tables. * - accscore_g2 is the accuracy of the traceback identified by the generic * DP implementation. It should be identical (within order-of-evaluation * roundoff error) to accscore_g. * * the "accscore_g2 < accscore_o" test is carefully contrived. * accscore_o is a theoretical lower bound but because of fp error, * accscore and (much more rarely) even accscore_g can exceed accscore_o. * accscore_g2, however, is calculated with identical order of evaluation * as accscore_o if the optimal trace does turn out to be identical to * the originally emitted trace. It should be extremely unlikely (though * not impossible) for accscore_o to exceed accscore_g2. (The DP algorithm * would have to identify a trace that was different than the original trace, * which the DP algorithm, by order-of-evaluation, assigned higher accuracy, * but order-of-evaluation in traceback dependent code assigned lower accuracy. * [xref J5/29] */ esl_sq_Reuse(sq); p7_trace_Reuse(tr); p7_trace_Reuse(trg); p7_trace_Reuse(tro); } p7_trace_Destroy(tro); p7_trace_Destroy(trg); p7_trace_Destroy(tr); p7_gmx_Destroy(gx2); p7_gmx_Destroy(gx1); p7_omx_Destroy(ox2); p7_omx_Destroy(ox1); esl_sq_Destroy(sq); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; P7_TRACE *tr = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc, accscore; float fsc_g, bsc_g, accscore_g; double Mcs; p7_FLogsumInit(); if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001) p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode"); ox1 = p7_omx_Create(gm->M, L, L); ox2 = p7_omx_Create(gm->M, L, L); tr = p7_trace_CreateWithPP(); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, ox1, &fsc); p7_Backward(dsq, L, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_OptimalAccuracy(om, ox2, ox1, &accscore); if (! esl_opt_GetBoolean(go, "--notrace")) { p7_OATrace(om, ox2, ox1, tr); p7_trace_Reuse(tr); } } esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x") ) { gx1 = p7_gmx_Create(gm->M, L); gx2 = p7_gmx_Create(gm->M, L); p7_GForward (dsq, L, gm, gx1, &fsc_g); p7_GBackward(dsq, L, gm, gx2, &bsc_g); p7_GDecoding(gm, gx1, gx2, gx2); p7_GOptimalAccuracy(gm, gx2, gx1, &accscore_g); printf("generic: fwd=%8.4f bck=%8.4f acc=%8.4f\n", fsc_g, bsc_g, accscore_g); printf("VMX: fwd=%8.4f bck=%8.4f acc=%8.4f\n", fsc, bsc, accscore); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); } free(dsq); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_trace_Destroy(tr); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc; float fsc2, bsc2; double base_time, bench_time, Mcs; p7_FLogsumInit(); if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001) p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode"); if (esl_opt_GetBoolean(go, "-P")) { fwd = p7_omx_Create(gm->M, 0, L); bck = p7_omx_Create(gm->M, 0, L); } else { fwd = p7_omx_Create(gm->M, L, L); bck = p7_omx_Create(gm->M, L, L); } gx = p7_gmx_Create(gm->M, L); /* Get a baseline time: how long it takes just to generate the sequences */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); if (esl_opt_GetBoolean(go, "-P")) { if (! esl_opt_GetBoolean(go, "-B")) p7_ForwardParser (dsq, L, om, fwd, &fsc); if (! esl_opt_GetBoolean(go, "-F")) p7_BackwardParser(dsq, L, om, fwd, bck, &bsc); } else { if (! esl_opt_GetBoolean(go, "-B")) p7_Forward (dsq, L, om, fwd, &fsc); if (! esl_opt_GetBoolean(go, "-F")) p7_Backward(dsq, L, om, fwd, bck, &bsc); } if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x")) { p7_GForward (dsq, L, gm, gx, &fsc2); p7_GBackward(dsq, L, gm, gx, &bsc2); printf("%.4f %.4f %.4f %.4f\n", fsc, bsc, fsc2, bsc2); } } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(bck); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; ESL_RANDOMNESS *rng = esl_randomness_CreateFast(0); P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_TRACE *tr = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; int N = esl_opt_GetInteger(go, "-N"); int i; float vsc, fsc, tsc; char errbuf[eslERRBUFSIZE]; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); if (esl_opt_GetBoolean(go, "-p")) p7_oprofile_Dump(stdout, om); fwd = p7_omx_Create(gm->M, sq->n, sq->n); gx = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_Create(); if (esl_opt_GetBoolean(go, "-m") == TRUE) p7_omx_SetDumpMode(stdout, fwd, TRUE); p7_GViterbi(sq->dsq, sq->n, gm, gx, &vsc); p7_Forward (sq->dsq, sq->n, om, fwd, &fsc); for (i = 0; i < N; i++) { p7_StochasticTrace(rng, sq->dsq, sq->n, om, fwd, tr); p7_trace_Score(tr, sq->dsq, gm, &tsc); if (esl_opt_GetBoolean(go, "-t") == TRUE) p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace %d fails validation:\n%s\n", i, errbuf); printf("Sampled trace: %.4f nats\n", tsc); p7_trace_Reuse(tr); } printf("Forward score: %.4f nats\n", fsc); printf("Viterbi score: %.4f nats\n", vsc); /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_trace_Destroy(tr); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_randomness_Destroy(rng); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_TRACE *tr = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc, fsc, vsc; float bestsc = -eslINFINITY; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); fwd = p7_omx_Create(gm->M, L, L); gx = p7_gmx_Create(gm->M, L); tr = p7_trace_Create(); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_GViterbi(dsq, L, gm, gx, &vsc); p7_Forward (dsq, L, om, fwd, &fsc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_StochasticTrace(r, dsq, L, om, fwd, tr); p7_trace_Score(tr, dsq, gm, &sc); bestsc = ESL_MAX(bestsc, sc); p7_trace_Reuse(tr); } esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("forward sc = %.4f nats\n", fsc); printf("viterbi sc = %.4f nats\n", vsc); printf("max trace sc = %.4f nats\n", bestsc); free(dsq); p7_trace_Destroy(tr); p7_gmx_Destroy(gx); p7_omx_Destroy(fwd); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
void run_hmmer_pipeline(const char* seq) { int index, i, status; ESL_SQ* sq = esl_sq_CreateFrom(NULL, seq, NULL, NULL, NULL); P7_OPROFILE *om = NULL; P7_PROFILE *gm = NULL; float usc, vfsc, fwdsc; /* filter scores */ float filtersc; /* HMM null filter score */ float nullsc; /* null model score */ float seqbias; float seq_score; /* the corrected per-seq bit score */ double P; WRAPPER_RESULT* result; num_results = 0; if(sq->n == 0) { esl_sq_Destroy(sq); return; } esl_sq_Digitize(abc, sq); int n = 0; float oasc; for(index = 0;index < num_models;index++) { om = models[index]; p7_omx_Reuse(oxf); p7_omx_Reuse(oxb); p7_omx_GrowTo(oxf, om->M, sq->n, sq->n); p7_omx_GrowTo(oxb, om->M, sq->n, sq->n); p7_oprofile_ReconfigLength(om, sq->n); p7_bg_SetFilter(bg, om->M, om->compo); p7_bg_SetLength(bg, sq->n); //Calibrate null model p7_bg_NullOne(bg, sq->dsq, sq->n, &nullsc); //MSV Filter p7_MSVFilter(sq->dsq, sq->n, om, oxf, &usc); seq_score = (usc - nullsc) / eslCONST_LOG2; P = esl_gumbel_surv(seq_score, om->evparam[p7_MMU], om->evparam[p7_MLAMBDA]); if (P > f1) continue; //Bias filter (model compo) p7_bg_FilterScore(bg, sq->dsq, sq->n, &filtersc); seq_score = (usc - filtersc) / eslCONST_LOG2; P = esl_gumbel_surv(seq_score, om->evparam[p7_MMU], om->evparam[p7_MLAMBDA]); if (P > f1) continue; //Viterbi filter (Only do if P value from Bias is high) if(P > f2) { p7_ViterbiFilter(sq->dsq, sq->n, om, oxf, &vfsc); seq_score = (vfsc - filtersc) / eslCONST_LOG2; P = esl_gumbel_surv(seq_score, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]); if (P > f2) continue; } //Get the real probability (forward) p7_Forward(sq->dsq, sq->n, om, oxf, &fwdsc); seq_score = (fwdsc - filtersc) / eslCONST_LOG2; P = esl_exp_surv(seq_score, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]); if(hmmer_error) { fprintf(stderr, "HMM: %s, seq: %s", om->name, seq); hmmer_error = 0; continue; } if (P > f3) continue; //Real hit, go in to posterior decoding and alignment p7_omx_Reuse(oxb); p7_trace_Reuse(tr); p7_Backward(sq->dsq, sq->n, om, oxf, oxb, NULL); status = p7_Decoding(om, oxf, oxb, oxb); if(status == eslOK) { //And then trace the result p7_OptimalAccuracy(om, oxb, oxf, &oasc); p7_OATrace(om, oxb, oxf, tr); } else if(status == eslERANGE) { fprintf(stderr, "Decoding overflow on model %s\n", om->name); gm = gmodels[index]; if(gxf == NULL) { gxf = p7_gmx_Create(gm->M, sq->n); gxb = p7_gmx_Create(gm->M, sq->n); } else { p7_gmx_GrowTo(gxf, gm->M, sq->n); p7_gmx_GrowTo(gxb, gm->M, sq->n); } p7_ReconfigLength(gm, sq->n); p7_GForward (sq->dsq, sq->n, gm, gxf, &fwdsc); p7_GBackward(sq->dsq, sq->n, gm, gxb, NULL); p7_GDecoding(gm, gxf, gxb, gxb); p7_GOptimalAccuracy(gm, gxb, gxf, &oasc); p7_GOATrace (gm, gxb, gxf, tr); p7_gmx_Reuse(gxf); p7_gmx_Reuse(gxb); } if(hmmer_error) { fprintf(stderr, "HMM: %s, seq: %s", om->name, seq); hmmer_error = 0; continue; } result = wrapper_results[num_results]; reuse_result(result, tr->N + om->M, om->name); //We're way overallocating here, but it's hard to know at this point how much space we'll need for the alignment (plus leading and trailing gaps) trace_into(tr, result, sq, abc, om->M); result->bits = seq_score; num_results++; } esl_sq_Destroy(sq); }