int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); float null2[p7_MAXCODE]; int i; float fsc, bsc; double Mcs; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); gx1 = p7_gmx_Create(gm->M, L); gx2 = p7_gmx_Create(gm->M, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_GForward (dsq, L, gm, gx1, &fsc); p7_GBackward(dsq, L, gm, gx2, &bsc); p7_GDecoding(gm, gx1, gx2, gx2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_GNull2_ByExpectation(gm, gx2, null2); esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
/* compare results to GDecoding(). */ static void utest_null2_expectation(ESL_RANDOMNESS *r, ESL_ALPHABET *abc, P7_BG *bg, int M, int L, int N, float tolerance) { char *msg = "decoding unit test failed"; P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); P7_OMX *fwd = p7_omx_Create(M, L, L); P7_OMX *bck = p7_omx_Create(M, L, L); P7_OMX *pp = p7_omx_Create(M, L, L); P7_GMX *gxf = p7_gmx_Create(M, L); P7_GMX *gxb = p7_gmx_Create(M, L); P7_GMX *gpp = p7_gmx_Create(M, L); float *on2 = malloc(sizeof(float) * abc->Kp); float *gn2 = malloc(sizeof(float) * abc->Kp); float fsc1, fsc2; float bsc1, bsc2; if (!gn2 || !on2) esl_fatal(msg); if (p7_oprofile_Sample(r, abc, bg, M, L, &hmm, &gm, &om) != eslOK) esl_fatal(msg); while (N--) { if (esl_rsq_xfIID(r, bg->f, abc->K, L, dsq) != eslOK) esl_fatal(msg); if (p7_Forward (dsq, L, om, fwd, &fsc1) != eslOK) esl_fatal(msg); if (p7_Backward (dsq, L, om, fwd, bck, &bsc1) != eslOK) esl_fatal(msg); if (p7_Decoding(om, fwd, bck, pp) != eslOK) esl_fatal(msg); if (p7_Null2_ByExpectation(om, pp, on2) != eslOK) esl_fatal(msg); if (p7_GForward (dsq, L, gm, gxf, &fsc2) != eslOK) esl_fatal(msg); if (p7_GBackward(dsq, L, gm, gxb, &bsc2) != eslOK) esl_fatal(msg); if (p7_GDecoding(gm, gxf, gxb, gpp) != eslOK) esl_fatal(msg); if (p7_GNull2_ByExpectation(gm, gpp, gn2) != eslOK) esl_fatal(msg); if (esl_vec_FCompare(gn2, on2, abc->Kp, tolerance) != eslOK) esl_fatal(msg); } p7_gmx_Destroy(gpp); p7_gmx_Destroy(gxf); p7_gmx_Destroy(gxb); p7_omx_Destroy(pp); p7_omx_Destroy(fwd); p7_omx_Destroy(bck); free(on2); free(gn2); free(dsq); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_hmm_Destroy(hmm); }
/* glocal_rescore_isolated_domain() * EPN, Tue Oct 5 10:16:12 2010 * * Based on p7_domaindef.c's rescore_isolated_domain(). Modified * so that generic matrices (which can be used for glocally configured * models) can be used. This function finds a single glocal domain, not a * single local one. * * Also modified to optionally remove the Backward and OA alignment. * The decision to do these is determined by three input parameters: * <null2_is_done>: TRUE if we've already computed the null2 scores for * this region (see Sean's notes below). * <do_null2>: TRUE if we will apply a null2 penalty eventually * to this domain * <do_aln>: TRUE if we need the OA alignment * * Notes (verbatim) from p7_domaindef.c::rescore_isolated_domain(): *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * SRE, Fri Feb 8 09:18:33 2008 [Janelia] * * We have isolated a single domain's envelope from <i>..<j> in * sequence <sq>, and now we want to score it in isolation and obtain * an alignment display for it. * * (Later, we can add up all the individual domain scores from this * seq into a new per-seq score, to compare to the original per-seq * score). * * The caller provides model <om> configured in unilocal mode; by * using unilocal (as opposed to multilocal), we're going to force the * identification of a single domain in this envelope now. * * The alignment is an optimal accuracy alignment (sensu IH Holmes), * also obtained in unilocal mode. * * The caller provides DP matrices <ox1> and <ox2> with sufficient * space to hold Forward and Backward calculations for this domain * against the model. (The caller will typically already have matrices * sufficient for the complete sequence lying around, and can just use * those.) The caller also provides a <P7_DOMAINDEF> object which is * (efficiently, we trust) managing any necessary temporary working * space and heuristic thresholds. * * Returns <eslOK> if a domain was successfully identified, scored, * and aligned in the envelope; if so, the per-domain information is * registered in <ddef>, in <ddef->dcl>. * * And here's what's happened to our working memory: * * <ddef>: <ddef->tr> has been used, and possibly reallocated, for * the OA trace of the domain. Before exit, we called * <Reuse()> on it. * * <ox1> : happens to be holding OA score matrix for the domain * upon return, but that's not part of the spec; officially * its contents are "undefined". * * <ox2> : happens to be holding a posterior probability matrix * for the domain upon return, but we're not making that * part of the spec, so caller shouldn't rely on this; * spec just makes its contents "undefined". */ static int glocal_rescore_isolated_domain(P7_DOMAINDEF *ddef, const P7_PROFILE *gm, const ESL_SQ *sq, P7_GMX *gx1, P7_GMX *gx2, int i, int j, int null2_is_done, int do_null2, int do_aln) { P7_DOMAIN *dom = NULL; int Ld = j-i+1; float domcorrection = 0.0; float envsc, oasc; int z; int pos; float null2[p7_MAXCODE]; int status; p7_GForward (sq->dsq + i-1, Ld, gm, gx1, &envsc); oasc = 0.; if(do_null2 || do_aln) { p7_GBackward(sq->dsq + i-1, Ld, gm, gx2, NULL); status = p7_GDecoding(gm, gx1, gx2, gx2); /* <ox2> is now overwritten with post probabilities */ if (status == eslERANGE) return eslFAIL; /* rare: numeric overflow; domain is assumed to be repetitive garbage [J3/119-212] */ /* Is null2 set already for this i..j? (It is, if we're in a domain that * was defined by stochastic traceback clustering in a multidomain region; * it isn't yet, if we're in a simple one-domain region). If it isn't, * do it now, by the expectation (posterior decoding) method. */ if ((! null2_is_done) && do_null2) { p7_GNull2_ByExpectation(gm, gx2, null2); for (pos = i; pos <= j; pos++) ddef->n2sc[pos] = logf(null2[sq->dsq[pos]]); } if(do_null2) { for (pos = i; pos <= j; pos++) domcorrection += ddef->n2sc[pos]; /* domcorrection is in units of NATS */ } if(do_aln) { /* Find an optimal accuracy alignment */ p7_GOptimalAccuracy(gm, gx2, gx1, &oasc); /* <ox1> is now overwritten with OA scores */ p7_GOATrace (gm, gx2, gx1, ddef->tr); /* <tr>'s seq coords are offset by i-1, rel to orig dsq */ /* hack the trace's sq coords to be correct w.r.t. original dsq */ for (z = 0; z < ddef->tr->N; z++) if (ddef->tr->i[z] > 0) ddef->tr->i[z] += i-1; } /* get ptr to next empty domain structure in domaindef's results */ } if (ddef->ndom == ddef->nalloc) { void *p; ESL_RALLOC(ddef->dcl, p, sizeof(P7_DOMAIN) * (ddef->nalloc*2)); ddef->nalloc *= 2; } dom = &(ddef->dcl[ddef->ndom]); /* store the results in it */ dom->ienv = i; dom->jenv = j; dom->envsc = envsc; /* in units of NATS */ dom->domcorrection = domcorrection; /* in units of NATS, will be 0. if do_null2 == FALSE */ dom->oasc = oasc; /* in units of expected # of correctly aligned residues, will be 0. if do_aln == FALSE */ dom->dombias = 0.0; /* gets set later, using bg->omega and dombias */ dom->bitscore = 0.0; /* gets set later by caller, using envsc, null score, and dombias */ dom->lnP = 1.0; /* gets set later by caller, using bitscore */ dom->is_reported = FALSE; /* gets set later by caller */ dom->is_included = FALSE; /* gets set later by caller */ dom->ad = NULL; dom->iali = i; dom->jali = j; ddef->ndom++; if(do_aln) { p7_trace_Reuse(ddef->tr); } return eslOK; ERROR: if(do_aln) { p7_trace_Reuse(ddef->tr); } return status; }