int main(int argc, char **argv) { char *hmmfile = argv[1]; /* name of HMM file to read one HMM from */ ESL_ALPHABET *abc = NULL; /* sequence alphabet */ ESL_RANDOMNESS *r = NULL; /* source of randomness */ P7_HMMFILE *hfp = NULL; /* open hmmfile */ P7_HMM *hmm = NULL; /* HMM to emit from */ P7_PROFILE *gm = NULL; /* profile HMM (scores) */ P7_BG *bg = NULL; /* null model */ P7_TRACE *tr = NULL; /* sampled trace */ ESL_SQ *sq = NULL; /* sampled digital sequence */ int n = 1000; int counts[p7T_NSTATETYPES]; int i; float sc; float nullsc; double bitscore; r = esl_randomness_CreateFast(0); tr = p7_trace_Create(); if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("failed to open %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("failed to read HMM"); sq = esl_sq_CreateDigital(abc); bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); for (i = 0; i < n; i++) { p7_ProfileEmit(r, hmm, gm, bg, sq, tr); p7_trace_GetStateUseCounts(tr, counts); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_trace_Score(tr, sq->dsq, gm, &sc); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); bitscore = (sc - nullsc)/ eslCONST_LOG2; printf("%d %8.4f\n", counts[p7T_M] + (counts[p7T_I] + counts[p7T_D])/2, bitscore); } p7_profile_Destroy(gm); esl_sq_Destroy(sq); p7_trace_Destroy(tr); esl_randomness_Destroy(r); esl_alphabet_Destroy(abc); p7_hmmfile_Close(hfp); p7_hmm_Destroy(hmm); return eslOK; }
/* process_workunit() * * This is the routine that actually does the work. * * A work unit consists of one HMM, <hmm>. * The result is the <scores> array, which contains an array of N scores; * caller provides this memory. * How those scores are generated is controlled by the application configuration in <cfg>. */ static int process_workunit(ESL_GETOPTS *go, struct cfg_s *cfg, char *errbuf, P7_HMM *hmm, double *scores, int *alilens) { int L = esl_opt_GetInteger(go, "-L"); P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_REFMX *rmx = NULL; P7_CHECKPTMX *cx = NULL; P7_FILTERMX *fx = NULL; P7_TRACE *tr = NULL; ESL_DSQ *dsq = NULL; int i; int scounts[p7T_NSTATETYPES]; /* state usage counts from a trace */ float sc; float nullsc; int status; P7_HARDWARE *hw; if ((hw = p7_hardware_Create ()) == NULL) p7_Fail("Couldn't get HW information data structure"); /* Optionally set a custom background, determined by model composition; * an experimental hack. */ if (esl_opt_GetBoolean(go, "--bgcomp")) { float *p = NULL; float KL; p7_hmm_CompositionKLDist(hmm, cfg->bg, &KL, &p); esl_vec_FCopy(p, cfg->abc->K, cfg->bg->f); } /* Create and configure our generic profile, as requested */ gm = p7_profile_Create(hmm->M, cfg->abc); if (esl_opt_GetBoolean(go, "--multi")) { if (esl_opt_GetBoolean(go, "--dual")) { p7_profile_Config (gm, hmm, cfg->bg); } else if (esl_opt_GetBoolean(go, "--local")) { p7_profile_ConfigLocal (gm, hmm, cfg->bg, L); } else if (esl_opt_GetBoolean(go, "--glocal")) { p7_profile_ConfigGlocal(gm, hmm, cfg->bg, L); } } else if (esl_opt_GetBoolean(go, "--uni")) { if (esl_opt_GetBoolean(go, "--dual")) { p7_profile_ConfigCustom (gm, hmm, cfg->bg, L, 0.0, 0.5); } else if (esl_opt_GetBoolean(go, "--local")) { p7_profile_ConfigUnilocal (gm, hmm, cfg->bg, L); } else if (esl_opt_GetBoolean(go, "--glocal")) { p7_profile_ConfigUniglocal(gm, hmm, cfg->bg, L); } } p7_profile_SetLength(gm, L); p7_bg_SetLength(cfg->bg, L); if (esl_opt_GetBoolean(go, "--x-no-lengthmodel")) elide_length_model(gm, cfg->bg); /* Allocate DP matrix for <gm>. */ rmx = p7_refmx_Create(gm->M, L); /* Create and configure the vectorized profile, if needed; * and allocate its DP matrix */ if (esl_opt_GetBoolean(go, "--vector")) { om = p7_oprofile_Create(gm->M, cfg->abc, om->simd); p7_oprofile_Convert(gm, om); cx = p7_checkptmx_Create(gm->M, L, ESL_MBYTES(32), om->simd); fx = p7_filtermx_Create(gm->M, om->simd); } /* Remaining allocation */ ESL_ALLOC(dsq, sizeof(ESL_DSQ) * (L+2)); tr = p7_trace_Create(); /* Collect scores from N random sequences of length L */ for (i = 0; i < cfg->N; i++) { esl_rsq_xfIID(cfg->r, cfg->bg->f, cfg->abc->K, L, dsq); sc = eslINFINITY; /* Vectorized implementations of Viterbi, MSV may overflow. * In this case, they'll leave sc=eslINFINITY. * Then we fail over to the nonvector "generic" implementation. * That's why this next block isn't an if/else. */ if (esl_opt_GetBoolean(go, "--vector")) { if (esl_opt_GetBoolean(go, "--vit")) p7_ViterbiFilter(dsq, L, om, fx, &sc); else if (esl_opt_GetBoolean(go, "--fwd")) p7_ForwardFilter(dsq, L, om, cx, &sc); else if (esl_opt_GetBoolean(go, "--msv")) p7_MSVFilter (dsq, L, om, fx, &sc); } /* If we tried a vector calculation above but it overflowed, * or if we're to do --generic DP calculations, sc==eslINFINITY now; * hence the if condition here: */ if (sc == eslINFINITY) { if (esl_opt_GetBoolean(go, "--fwd")) p7_ReferenceForward(dsq, L, gm, rmx, &sc); /* any mode: dual,local,glocal; gm's config takes care of this */ else if (esl_opt_GetBoolean(go, "--vit")) p7_ReferenceViterbi(dsq, L, gm, rmx, tr, &sc); /* local-only mode. cmdline opts processing has already assured that --local set */ else if (esl_opt_GetBoolean(go, "--msv")) p7_Die("We used to be able to do a generic MSV algorithm - but no longer"); } /* Optional: get Viterbi alignment length too. */ if (esl_opt_GetBoolean(go, "-a")) /* -a only works with Viterbi; getopts has checked this already; <tr> must be valid */ { p7_trace_GetStateUseCounts(tr, scounts); /* there's various ways we could counts "alignment length". * Here we'll use the total length of model used, in nodes: M+D states. * score vs al would gives us relative entropy / model position. */ /* alilens[i] = scounts[p7T_D] + scounts[p7T_I]; SRE: temporarily testing this instead */ alilens[i] = scounts[p7T_ML] + scounts[p7T_DL] + scounts[p7T_IL] + scounts[p7T_MG] + scounts[p7T_DG] + scounts[p7T_IG]; p7_trace_Reuse(tr); } p7_bg_NullOne(cfg->bg, dsq, L, &nullsc); scores[i] = (sc - nullsc) / eslCONST_LOG2; if (cx) p7_checkptmx_Reuse(cx); if (fx) p7_filtermx_Reuse(fx); p7_refmx_Reuse(rmx); } status = eslOK; /* deliberate flowthru */ ERROR: if (dsq != NULL) free(dsq); p7_checkptmx_Destroy(cx); p7_filtermx_Destroy(fx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_refmx_Destroy(rmx); p7_trace_Destroy(tr); if (status == eslEMEM) sprintf(errbuf, "allocation failure"); return status; }