/* Function: p7_oprofile_Sample() * Synopsis: Sample a random profile. * Incept: MSF Tue Nov 3, 2009 [Janelia] * * Purpose: Sample a random profile of <M> nodes for alphabet <abc>, * using <r> as the source of random numbers. Parameterize * it for generation of target sequences of mean length * <L>. Calculate its log-odds scores using background * model <bg>. * * Args: r - random number generator * abc - emission alphabet * bg - background frequency model * M - size of sampled profile, in nodes * L - configured target seq mean length * opt_hmm - optRETURN: sampled HMM * opt_gm - optRETURN: sampled normal profile * opt_om - RETURN: optimized profile * * Returns: <eslOK> on success. * * Throws: (no abnormal error conditions) */ int p7_oprofile_Sample(ESL_RANDOMNESS *r, const ESL_ALPHABET *abc, const P7_BG *bg, int M, int L, P7_HMM **opt_hmm, P7_PROFILE **opt_gm, P7_OPROFILE **ret_om) { P7_HMM *hmm = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; int status; if ((gm = p7_profile_Create (M, abc)) == NULL) { status = eslEMEM; goto ERROR; } if ((om = p7_oprofile_Create(M, abc)) == NULL) { status = eslEMEM; goto ERROR; } if ((status = p7_hmm_Sample(r, M, abc, &hmm)) != eslOK) goto ERROR; if ((status = p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL)) != eslOK) goto ERROR; if ((status = p7_oprofile_Convert(gm, om)) != eslOK) goto ERROR; if ((status = p7_oprofile_ReconfigLength(om, L)) != eslOK) goto ERROR; if (opt_hmm != NULL) *opt_hmm = hmm; else p7_hmm_Destroy(hmm); if (opt_gm != NULL) *opt_gm = gm; else p7_profile_Destroy(gm); *ret_om = om; return eslOK; ERROR: if (opt_hmm != NULL) *opt_hmm = NULL; if (opt_gm != NULL) *opt_gm = NULL; *ret_om = NULL; return status; }
/* Function: p7_ViterbiMu() * Synopsis: Determines the local Viterbi Gumbel mu parameter for a model. * Incept: SRE, Tue May 19 10:26:19 2009 [Janelia] * * Purpose: Identical to p7_MSVMu(), above, except that it fits * Viterbi scores instead of MSV scores. * * The difference between the two mus is small, but can be * up to ~1 bit or so for large, low-info models [J4/126] so * decided to calibrate the two mus separately [J5/8]. * * Args: r : source of random numbers * om : score profile (length config is changed upon return!) * bg : null model (length config is changed upon return!) * L : length of sequences to simulate * N : number of sequences to simulate * lambda : known Gumbel lambda parameter * ret_vmu : RETURN: ML estimate of location param mu * * Returns: <eslOK> on success, and <ret_mu> contains the ML estimate * of $\mu$. * * Throws: (no abnormal error conditions) */ int p7_ViterbiMu(ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double *ret_vmu) { P7_OMX *ox = p7_omx_Create(om->M, 0, 0); /* DP matrix: 1 row version */ ESL_DSQ *dsq = NULL; double *xv = NULL; int i; float sc, nullsc; #ifndef p7_IMPL_DUMMY float maxsc = (32767.0 - om->base_w) / om->scale_w; /* if score overflows, use this [J4/139] */ #endif int status; if (ox == NULL) { status = eslEMEM; goto ERROR; } ESL_ALLOC(xv, sizeof(double) * N); ESL_ALLOC(dsq, sizeof(ESL_DSQ) * (L+2)); p7_oprofile_ReconfigLength(om, L); p7_bg_SetLength(bg, L); for (i = 0; i < N; i++) { if ((status = esl_rsq_xfIID(r, bg->f, om->abc->K, L, dsq)) != eslOK) goto ERROR; if ((status = p7_bg_NullOne(bg, dsq, L, &nullsc)) != eslOK) goto ERROR; status = p7_ViterbiFilter(dsq, L, om, ox, &sc); #ifndef p7_IMPL_DUMMY if (status == eslERANGE) { sc = maxsc; status = eslOK; } #endif if (status != eslOK) goto ERROR; xv[i] = (sc - nullsc) / eslCONST_LOG2; } if ((status = esl_gumbel_FitCompleteLoc(xv, N, lambda, ret_vmu)) != eslOK) goto ERROR; p7_omx_Destroy(ox); free(xv); free(dsq); return eslOK; ERROR: *ret_vmu = 0.0; if (ox != NULL) p7_omx_Destroy(ox); if (xv != NULL) free(xv); if (dsq != NULL) free(dsq); return status; }
/* Function: p7_Tau() * Synopsis: Determine Forward tau by brief simulation. * Incept: SRE, Thu Aug 9 15:08:39 2007 [Janelia] * * Purpose: Determine the <tau> parameter for an exponential tail fit * to the Forward score distribution for model <om>, on * random sequences with the composition of the background * model <bg>. This <tau> parameter is for an exponential * distribution anchored from $P=1.0$, so it's not really a * tail per se; but it's only an accurate fit in the tail * of the Forward score distribution, from about $P=0.001$ * or so. * * The determination of <tau> is done by a brief simulation * in which we fit a Gumbel distribution to a small number * of Forward scores of random sequences, and use that to * predict the location of the tail at probability <tailp>. * * The Gumbel is of course inaccurate, but we can use it * here solely as an empirical distribution to determine * the location of a reasonable <tau> more accurately on a * smaller number of samples than we could do with raw * order statistics. * * Typical choices are L=100, N=200, tailp=0.04, which * typically yield estimates $\hat{\mu}$ with a precision * (standard deviation) of $\pm$ 0.2 bits, corresponding to * a $\pm$ 15\% error in E-values. See [J1/135]. * * The use of Gumbel fitting to a small number of $N$ * samples and the extrapolation of $\hat{\mu}$ from the * estimated location of the 0.04 tail mass are both * empirical and carefully optimized against several * tradeoffs. Most importantly, around this choice of tail * probability, a systematic error introduced by the use of * the Gumbel fit is being cancelled by systematic error * introduced by the use of a higher tail probability than * the regime in which the exponential tail is a valid * approximation. See [J1/135] for discussion. * * This function changes the length configuration of both * <om> and <bg>. The caller must remember to reconfigure * both of their length models appropriately for any * subsequent alignments. * * Args: r : source of randomness * om : configured profile to sample sequences from * bg : null model (for background residue frequencies) * L : mean length model for seq emission from profile * N : number of sequences to generate * lambda : expected slope of the exponential tail (from p7_Lambda()) * tailp : tail mass from which we will extrapolate mu * ret_mu : RETURN: estimate for the Forward mu (base of exponential tail) * * Returns: <eslOK> on success, and <*ret_fv> is the score difference * in bits. * * Throws: <eslEMEM> on allocation error, and <*ret_fv> is 0. */ int p7_Tau(ESL_RANDOMNESS *r, P7_OPROFILE *om, P7_BG *bg, int L, int N, double lambda, double tailp, double *ret_tau) { P7_OMX *ox = p7_omx_Create(om->M, 0, L); /* DP matrix: for ForwardParser, L rows */ ESL_DSQ *dsq = NULL; double *xv = NULL; float fsc, nullsc; double gmu, glam; int status; int i; ESL_ALLOC(xv, sizeof(double) * N); ESL_ALLOC(dsq, sizeof(ESL_DSQ) * (L+2)); if (ox == NULL) { status = eslEMEM; goto ERROR; } p7_oprofile_ReconfigLength(om, L); p7_bg_SetLength(bg, L); for (i = 0; i < N; i++) { if ((status = esl_rsq_xfIID(r, bg->f, om->abc->K, L, dsq)) != eslOK) goto ERROR; if ((status = p7_ForwardParser(dsq, L, om, ox, &fsc)) != eslOK) goto ERROR; if ((status = p7_bg_NullOne(bg, dsq, L, &nullsc)) != eslOK) goto ERROR; xv[i] = (fsc - nullsc) / eslCONST_LOG2; } if ((status = esl_gumbel_FitComplete(xv, N, &gmu, &glam)) != eslOK) goto ERROR; /* Explanation of the eqn below: first find the x at which the Gumbel tail * mass is predicted to be equal to tailp. Then back up from that x * by log(tailp)/lambda to set the origin of the exponential tail to 1.0 * instead of tailp. */ *ret_tau = esl_gumbel_invcdf(1.0-tailp, gmu, glam) + (log(tailp) / lambda); free(xv); free(dsq); p7_omx_Destroy(ox); return eslOK; ERROR: *ret_tau = 0.; if (xv != NULL) free(xv); if (dsq != NULL) free(dsq); if (ox != NULL) p7_omx_Destroy(ox); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; P7_OMX *pp = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc; double Mcs; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); fwd = p7_omx_Create(gm->M, L, L); bck = p7_omx_Create(gm->M, L, L); pp = p7_omx_Create(gm->M, L, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, fwd, &fsc); p7_Backward(dsq, L, om, fwd, bck, &bsc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_Decoding(om, fwd, bck, pp); esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(fwd); p7_omx_Destroy(bck); p7_omx_Destroy(pp); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox = NULL; P7_GMX *gx = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc1, sc2; double base_time, bench_time, Mcs; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x")) p7_profile_SameAsVF(om, gm); ox = p7_omx_Create(gm->M, 0, 0); gx = p7_gmx_Create(gm->M, L); /* Get a baseline time: how long it takes just to generate the sequences */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; /* Run the benchmark */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_ViterbiFilter(dsq, L, om, ox, &sc1); if (esl_opt_GetBoolean(go, "-c")) { p7_GViterbi(dsq, L, gm, gx, &sc2); printf("%.4f %.4f\n", sc1, sc2); } if (esl_opt_GetBoolean(go, "-x")) { p7_GViterbi(dsq, L, gm, gx, &sc2); sc2 /= om->scale_w; if (om->mode == p7_UNILOCAL) sc2 -= 2.0; /* that's ~ L \log \frac{L}{L+2}, for our NN,CC,JJ */ else if (om->mode == p7_LOCAL) sc2 -= 3.0; /* that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ */ printf("%.4f %.4f\n", sc1, sc2); } } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox = NULL; P7_GMX *gx = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float vfraw, nullsc, vfscore; float graw, gscore; double P, gP; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); /* allocate DP matrices, both a generic and an optimized one */ ox = p7_omx_Create(gm->M, 0, sq->n); gx = p7_gmx_Create(gm->M, sq->n); /* Useful to place and compile in for debugging: p7_oprofile_Dump(stdout, om); dumps the optimized profile p7_omx_SetDumpMode(ox, TRUE); makes the fast DP algorithms dump their matrices p7_gmx_Dump(stdout, gx); dumps a generic DP matrix */ while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { p7_oprofile_ReconfigLength(om, sq->n); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_omx_GrowTo(ox, om->M, 0, sq->n); p7_gmx_GrowTo(gx, gm->M, sq->n); p7_ViterbiFilter (sq->dsq, sq->n, om, ox, &vfraw); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); vfscore = (vfraw - nullsc) / eslCONST_LOG2; P = esl_gumbel_surv(vfscore, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]); p7_GViterbi (sq->dsq, sq->n, gm, gx, &graw); gscore = (graw - nullsc) / eslCONST_LOG2; gP = esl_gumbel_surv(gscore, gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA]); if (esl_opt_GetBoolean(go, "-1")) { printf("%-30s\t%-20s\t%9.2g\t%7.2f\t%9.2g\t%7.2f\n", sq->name, hmm->name, P, vfscore, gP, gscore); } else if (esl_opt_GetBoolean(go, "-P")) { /* output suitable for direct use in profmark benchmark postprocessors: */ printf("%g\t%.2f\t%s\t%s\n", P, vfscore, sq->name, hmm->name); } else { printf("target sequence: %s\n", sq->name); printf("vit filter raw score: %.2f nats\n", vfraw); printf("null score: %.2f nats\n", nullsc); printf("per-seq score: %.2f bits\n", vfscore); printf("P-value: %g\n", P); printf("GViterbi raw score: %.2f nats\n", graw); printf("GViterbi seq score: %.2f bits\n", gscore); printf("GViterbi P-value: %g\n", gP); } esl_sq_Reuse(sq); } /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
static void scan_thread(void *arg) { int i; int count; int workeridx; WORKER_INFO *info; ESL_THREADS *obj; ESL_STOPWATCH *w; P7_BG *bg = NULL; /* null model */ P7_PIPELINE *pli = NULL; /* work pipeline */ P7_TOPHITS *th = NULL; /* top hit results */ obj = (ESL_THREADS *) arg; esl_threads_Started(obj, &workeridx); info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx); w = esl_stopwatch_Create(); esl_stopwatch_Start(w); /* Convert to an optimized model */ bg = p7_bg_Create(info->abc); /* Create processing pipeline and hit list */ th = p7_tophits_Create(); pli = p7_pipeline_Create(info->opts, 100, 100, FALSE, p7_SCAN_MODELS); p7_pli_NewSeq(pli, info->seq); /* loop until all sequences have been processed */ count = 1; while (count > 0) { int inx; int blksz; P7_OPROFILE **om; /* grab the next block of sequences */ if (pthread_mutex_lock(info->inx_mutex) != 0) p7_Fail("mutex lock failed"); inx = *info->inx; blksz = *info->blk_size; if (inx > *info->limit) { blksz /= 5; if (blksz < 1000) { *info->limit = info->om_cnt * 2; } else { *info->limit = inx + (info->om_cnt - inx) * 2 / 3; } } *info->blk_size = blksz; *info->inx += blksz; if (pthread_mutex_unlock(info->inx_mutex) != 0) p7_Fail("mutex unlock failed"); om = info->om_list + inx; count = info->om_cnt - inx; if (count > blksz) count = blksz; /* Main loop: */ for (i = 0; i < count; ++i, ++om) { p7_pli_NewModel(pli, *om, bg); p7_bg_SetLength(bg, info->seq->n); p7_oprofile_ReconfigLength(*om, info->seq->n); p7_Pipeline(pli, *om, bg, info->seq, th); p7_pipeline_Reuse(pli); } } /* make available the pipeline objects to the main thread */ info->th = th; info->pli = pli; /* clean up */ p7_bg_Destroy(bg); esl_stopwatch_Stop(w); info->elapsed = w->elapsed; esl_stopwatch_Destroy(w); esl_threads_Finished(obj, workeridx); pthread_exit(NULL); return; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); float null2[p7_MAXCODE]; int i,j,d,pos; int nsamples = 200; float fsc, bsc; double Mcs; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); ox1 = p7_omx_Create(gm->M, L, L); ox2 = p7_omx_Create(gm->M, L, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, ox1, &fsc); if (esl_opt_GetBoolean(go, "-t")) { P7_TRACE *tr = p7_trace_Create(); float *n2sc = malloc(sizeof(float) * (L+1)); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { /* This is approximately what p7_domaindef.c::region_trace_ensemble() is doing: */ for (j = 0; j < nsamples; j++) { p7_StochasticTrace(r, dsq, L, om, ox1, tr); p7_trace_Index(tr); pos = 1; for (d = 0; d < tr->ndom; d++) { p7_Null2_ByTrace(om, tr, tr->tfrom[d], tr->tto[d], ox2, null2); for (; pos <= tr->sqfrom[d]; pos++) n2sc[pos] += 1.0; for (; pos < tr->sqto[d]; pos++) n2sc[pos] += null2[dsq[pos]]; } for (; pos <= L; pos++) n2sc[pos] += 1.0; p7_trace_Reuse(tr); } for (pos = 1; pos <= L; pos++) n2sc[pos] = logf(n2sc[pos] / nsamples); } esl_stopwatch_Stop(w); free(n2sc); p7_trace_Destroy(tr); } else { p7_Backward(dsq, L, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_Null2_ByExpectation(om, ox2, null2); esl_stopwatch_Stop(w); } Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; P7_TRACE *tr = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc, accscore; float fsc_g, bsc_g, accscore_g; double Mcs; p7_FLogsumInit(); if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001) p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode"); ox1 = p7_omx_Create(gm->M, L, L); ox2 = p7_omx_Create(gm->M, L, L); tr = p7_trace_CreateWithPP(); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, ox1, &fsc); p7_Backward(dsq, L, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_OptimalAccuracy(om, ox2, ox1, &accscore); if (! esl_opt_GetBoolean(go, "--notrace")) { p7_OATrace(om, ox2, ox1, tr); p7_trace_Reuse(tr); } } esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x") ) { gx1 = p7_gmx_Create(gm->M, L); gx2 = p7_gmx_Create(gm->M, L); p7_GForward (dsq, L, gm, gx1, &fsc_g); p7_GBackward(dsq, L, gm, gx2, &bsc_g); p7_GDecoding(gm, gx1, gx2, gx2); p7_GOptimalAccuracy(gm, gx2, gx1, &accscore_g); printf("generic: fwd=%8.4f bck=%8.4f acc=%8.4f\n", fsc_g, bsc_g, accscore_g); printf("VMX: fwd=%8.4f bck=%8.4f acc=%8.4f\n", fsc, bsc, accscore); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); } free(dsq); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_trace_Destroy(tr); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
static void search_thread(void *arg) { int i; int count; int seed; int status; int workeridx; WORKER_INFO *info; ESL_THREADS *obj; ESL_SQ dbsq; ESL_STOPWATCH *w = NULL; /* timing stopwatch */ P7_BUILDER *bld = NULL; /* HMM construction configuration */ P7_BG *bg = NULL; /* null model */ P7_PIPELINE *pli = NULL; /* work pipeline */ P7_TOPHITS *th = NULL; /* top hit results */ P7_PROFILE *gm = NULL; /* generic model */ P7_OPROFILE *om = NULL; /* optimized query profile */ obj = (ESL_THREADS *) arg; esl_threads_Started(obj, &workeridx); info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx); w = esl_stopwatch_Create(); bg = p7_bg_Create(info->abc); esl_stopwatch_Start(w); /* set up the dummy description and accession fields */ dbsq.desc = ""; dbsq.acc = ""; /* process a query sequence or hmm */ if (info->seq != NULL) { bld = p7_builder_Create(NULL, info->abc); if ((seed = esl_opt_GetInteger(info->opts, "--seed")) > 0) { esl_randomness_Init(bld->r, seed); bld->do_reseeding = TRUE; } bld->EmL = esl_opt_GetInteger(info->opts, "--EmL"); bld->EmN = esl_opt_GetInteger(info->opts, "--EmN"); bld->EvL = esl_opt_GetInteger(info->opts, "--EvL"); bld->EvN = esl_opt_GetInteger(info->opts, "--EvN"); bld->EfL = esl_opt_GetInteger(info->opts, "--EfL"); bld->EfN = esl_opt_GetInteger(info->opts, "--EfN"); bld->Eft = esl_opt_GetReal (info->opts, "--Eft"); if (esl_opt_IsOn(info->opts, "--mxfile")) status = p7_builder_SetScoreSystem (bld, esl_opt_GetString(info->opts, "--mxfile"), NULL, esl_opt_GetReal(info->opts, "--popen"), esl_opt_GetReal(info->opts, "--pextend"), bg); else status = p7_builder_LoadScoreSystem(bld, esl_opt_GetString(info->opts, "--mx"), esl_opt_GetReal(info->opts, "--popen"), esl_opt_GetReal(info->opts, "--pextend"), bg); if (status != eslOK) { //client_error(info->sock, status, "hmmgpmd: failed to set single query sequence score system: %s", bld->errbuf); fprintf(stderr, "hmmpgmd: failed to set single query sequence score system: %s", bld->errbuf); pthread_exit(NULL); return; } p7_SingleBuilder(bld, info->seq, bg, NULL, NULL, NULL, &om); /* bypass HMM - only need model */ p7_builder_Destroy(bld); } else { gm = p7_profile_Create (info->hmm->M, info->abc); om = p7_oprofile_Create(info->hmm->M, info->abc); p7_ProfileConfig(info->hmm, bg, gm, 100, p7_LOCAL); p7_oprofile_Convert(gm, om); } /* Create processing pipeline and hit list */ th = p7_tophits_Create(); pli = p7_pipeline_Create(info->opts, om->M, 100, FALSE, p7_SEARCH_SEQS); p7_pli_NewModel(pli, om, bg); if (pli->Z_setby == p7_ZSETBY_NTARGETS) pli->Z = info->db_Z; /* loop until all sequences have been processed */ count = 1; while (count > 0) { int inx; int blksz; HMMER_SEQ **sq; /* grab the next block of sequences */ if (pthread_mutex_lock(info->inx_mutex) != 0) p7_Fail("mutex lock failed"); inx = *info->inx; blksz = *info->blk_size; if (inx > *info->limit) { blksz /= 5; if (blksz < 1000) { *info->limit = info->sq_cnt * 2; } else { *info->limit = inx + (info->sq_cnt - inx) * 2 / 3; } } *info->blk_size = blksz; *info->inx += blksz; if (pthread_mutex_unlock(info->inx_mutex) != 0) p7_Fail("mutex unlock failed"); sq = info->sq_list + inx; count = info->sq_cnt - inx; if (count > blksz) count = blksz; /* Main loop: */ for (i = 0; i < count; ++i, ++sq) { if ( !(info->range_list) || hmmpgmd_IsWithinRanges ((*sq)->idx, info->range_list)) { dbsq.name = (*sq)->name; dbsq.dsq = (*sq)->dsq; dbsq.n = (*sq)->n; dbsq.idx = (*sq)->idx; if((*sq)->desc != NULL) dbsq.desc = (*sq)->desc; p7_bg_SetLength(bg, dbsq.n); p7_oprofile_ReconfigLength(om, dbsq.n); p7_Pipeline(pli, om, bg, &dbsq, th); p7_pipeline_Reuse(pli); } } } /* make available the pipeline objects to the main thread */ info->th = th; info->pli = pli; /* clean up */ p7_bg_Destroy(bg); p7_oprofile_Destroy(om); if (gm != NULL) p7_profile_Destroy(gm); esl_stopwatch_Stop(w); info->elapsed = w->elapsed; esl_stopwatch_Destroy(w); esl_threads_Finished(obj, workeridx); pthread_exit(NULL); return; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc; float fsc2, bsc2; double base_time, bench_time, Mcs; p7_FLogsumInit(); if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001) p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode"); if (esl_opt_GetBoolean(go, "-P")) { fwd = p7_omx_Create(gm->M, 0, L); bck = p7_omx_Create(gm->M, 0, L); } else { fwd = p7_omx_Create(gm->M, L, L); bck = p7_omx_Create(gm->M, L, L); } gx = p7_gmx_Create(gm->M, L); /* Get a baseline time: how long it takes just to generate the sequences */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); if (esl_opt_GetBoolean(go, "-P")) { if (! esl_opt_GetBoolean(go, "-B")) p7_ForwardParser (dsq, L, om, fwd, &fsc); if (! esl_opt_GetBoolean(go, "-F")) p7_BackwardParser(dsq, L, om, fwd, bck, &bsc); } else { if (! esl_opt_GetBoolean(go, "-B")) p7_Forward (dsq, L, om, fwd, &fsc); if (! esl_opt_GetBoolean(go, "-F")) p7_Backward(dsq, L, om, fwd, bck, &bsc); } if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x")) { p7_GForward (dsq, L, gm, gx, &fsc2); p7_GBackward(dsq, L, gm, gx, &bsc2); printf("%.4f %.4f %.4f %.4f\n", fsc, bsc, fsc2, bsc2); } } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(bck); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float fraw, braw, nullsc, fsc; float gfraw, gbraw, gfsc; double P, gP; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Open sequence file for reading */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); /* p7_oprofile_Dump(stdout, om); */ /* allocate DP matrices for O(M+L) parsers */ fwd = p7_omx_Create(gm->M, 0, sq->n); bck = p7_omx_Create(gm->M, 0, sq->n); gx = p7_gmx_Create(gm->M, sq->n); /* allocate DP matrices for O(ML) fills */ /* fwd = p7_omx_Create(gm->M, sq->n, sq->n); */ /* bck = p7_omx_Create(gm->M, sq->n, sq->n); */ /* p7_omx_SetDumpMode(stdout, fwd, TRUE); */ /* makes the fast DP algorithms dump their matrices */ /* p7_omx_SetDumpMode(stdout, bck, TRUE); */ while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { p7_oprofile_ReconfigLength(om, sq->n); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_omx_GrowTo(fwd, om->M, 0, sq->n); p7_omx_GrowTo(bck, om->M, 0, sq->n); p7_gmx_GrowTo(gx, gm->M, sq->n); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); p7_ForwardParser (sq->dsq, sq->n, om, fwd, &fraw); p7_BackwardParser(sq->dsq, sq->n, om, fwd, bck, &braw); /* p7_Forward (sq->dsq, sq->n, om, fwd, &fsc); printf("forward: %.2f nats\n", fsc); */ /* p7_Backward(sq->dsq, sq->n, om, fwd, bck, &bsc); printf("backward: %.2f nats\n", bsc); */ /* Comparison to other F/B implementations */ p7_GForward (sq->dsq, sq->n, gm, gx, &gfraw); p7_GBackward (sq->dsq, sq->n, gm, gx, &gbraw); /* p7_gmx_Dump(stdout, gx); */ fsc = (fraw-nullsc) / eslCONST_LOG2; gfsc = (gfraw-nullsc) / eslCONST_LOG2; P = esl_exp_surv(fsc, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]); gP = esl_exp_surv(gfsc, gm->evparam[p7_FTAU], gm->evparam[p7_FLAMBDA]); if (esl_opt_GetBoolean(go, "-1")) { printf("%-30s\t%-20s\t%9.2g\t%6.1f\t%9.2g\t%6.1f\n", sq->name, hmm->name, P, fsc, gP, gfsc); } else if (esl_opt_GetBoolean(go, "-P")) { /* output suitable for direct use in profmark benchmark postprocessors: */ printf("%g\t%.2f\t%s\t%s\n", P, fsc, sq->name, hmm->name); } else { printf("target sequence: %s\n", sq->name); printf("fwd filter raw score: %.2f nats\n", fraw); printf("bck filter raw score: %.2f nats\n", braw); printf("null score: %.2f nats\n", nullsc); printf("per-seq score: %.2f bits\n", fsc); printf("P-value: %g\n", P); printf("GForward raw score: %.2f nats\n", gfraw); printf("GBackward raw score: %.2f nats\n", gbraw); printf("GForward seq score: %.2f bits\n", gfsc); printf("GForward P-value: %g\n", gP); } esl_sq_Reuse(sq); } /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_omx_Destroy(bck); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
static void pipeline_thread(void *arg) { int i, j; int status; int workeridx; WORKER_INFO *info; ESL_THREADS *obj; P7_OM_BLOCK *block; void *newBlock; P7_OPROFILE *om = NULL; P7_SCOREDATA *scoredata = NULL; /* hmm-specific data used by nhmmer */ P7_DOMAIN *dcl; int seq_len = 0; int prev_hit_cnt = 0; #ifdef eslAUGMENT_ALPHABET ESL_SQ *sq_revcmp = NULL; #endif /*eslAUGMENT_ALPHABET*/ impl_ThreadInit(); obj = (ESL_THREADS *) arg; esl_threads_Started(obj, &workeridx); info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx); status = esl_workqueue_WorkerUpdate(info->queue, NULL, &newBlock); if (status != eslOK) esl_fatal("Work queue worker failed"); #ifdef eslAUGMENT_ALPHABET //reverse complement if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL ) { sq_revcmp = esl_sq_CreateDigital(info->qsq->abc); esl_sq_Copy(info->qsq,sq_revcmp); esl_sq_ReverseComplement(sq_revcmp); info->pli->nres += info->qsq->n; } #endif /*eslAUGMENT_ALPHABET*/ /* loop until all blocks have been processed */ block = (P7_OM_BLOCK *) newBlock; while (block->count > 0) { /* Main loop: */ for (i = 0; i < block->count; ++i) { om = block->list[i]; seq_len = 0; p7_pli_NewModel(info->pli, om, info->bg); p7_bg_SetLength(info->bg, info->qsq->n); p7_oprofile_ReconfigLength(om, info->qsq->n); scoredata = p7_hmm_ScoreDataCreate(om, FALSE); #ifdef eslAUGMENT_ALPHABET //reverse complement if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL ) { p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, sq_revcmp, info->th, 0); p7_pipeline_Reuse(info->pli); // prepare for next search seq_len = info->qsq->n; for (j = prev_hit_cnt; j < info->th->N ; j++) { dcl = info->th->unsrt[j].dcl; // modify hit positions to account for the position of the window in the full sequence dcl->ienv = seq_len - dcl->ienv + 1; dcl->jenv = seq_len - dcl->jenv + 1; dcl->iali = seq_len - dcl->iali + 1; dcl->jali = seq_len - dcl->jali + 1; dcl->ad->sqfrom = seq_len - dcl->ad->sqfrom + 1; dcl->ad->sqto = seq_len - dcl->ad->sqto + 1; } } #endif if (info->pli->strand != p7_STRAND_BOTTOMONLY) { p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, info->qsq, info->th, 0); p7_pipeline_Reuse(info->pli); seq_len += info->qsq->n; } for (j = prev_hit_cnt; j < info->th->N ; j++) { info->th->unsrt[j].lnP += log((float)seq_len / (float)om->max_length); info->th->unsrt[j].dcl[0].lnP = info->th->unsrt[j].lnP; info->th->unsrt[j].sortkey = -1.0 * info->th->unsrt[j].lnP; info->th->unsrt[j].dcl[0].ad->L = om->M; } prev_hit_cnt = info->th->N; p7_hmm_ScoreDataDestroy(scoredata); p7_oprofile_Destroy(om); block->list[i] = NULL; } status = esl_workqueue_WorkerUpdate(info->queue, block, &newBlock); if (status != eslOK) esl_fatal("Work queue worker failed"); block = (P7_OM_BLOCK *) newBlock; } #ifdef eslAUGMENT_ALPHABET esl_sq_Destroy(sq_revcmp); #endif status = esl_workqueue_WorkerUpdate(info->queue, block, NULL); if (status != eslOK) esl_fatal("Work queue worker failed"); esl_threads_Finished(obj, workeridx); return; }
static int serial_loop(WORKER_INFO *info, P7_HMMFILE *hfp) { int status; int i; int seq_len = 0; int prev_hit_cnt = 0; P7_OPROFILE *om = NULL; P7_SCOREDATA *scoredata = NULL; /* hmm-specific data used by nhmmer */ ESL_ALPHABET *abc = NULL; P7_DOMAIN *dcl; #ifdef eslAUGMENT_ALPHABET ESL_SQ *sq_revcmp = NULL; if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL ) { sq_revcmp = esl_sq_CreateDigital(info->qsq->abc); esl_sq_Copy(info->qsq,sq_revcmp); esl_sq_ReverseComplement(sq_revcmp); info->pli->nres += info->qsq->n; } #endif /*eslAUGMENT_ALPHABET*/ /* Main loop: */ while ((status = p7_oprofile_ReadMSV(hfp, &abc, &om)) == eslOK) { seq_len = 0; p7_pli_NewModel(info->pli, om, info->bg); p7_bg_SetLength(info->bg, info->qsq->n); p7_oprofile_ReconfigLength(om, info->qsq->n); scoredata = p7_hmm_ScoreDataCreate(om, FALSE); #ifdef eslAUGMENT_ALPHABET //reverse complement if (info->pli->strand != p7_STRAND_TOPONLY && info->qsq->abc->complement != NULL ) { p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, sq_revcmp, info->th, 0); p7_pipeline_Reuse(info->pli); // prepare for next search seq_len = info->qsq->n; for (i = prev_hit_cnt; i < info->th->N ; i++) { dcl = info->th->unsrt[i].dcl; // modify hit positions to account for the position of the window in the full sequence dcl->ienv = seq_len - dcl->ienv + 1; dcl->jenv = seq_len - dcl->jenv + 1; dcl->iali = seq_len - dcl->iali + 1; dcl->jali = seq_len - dcl->jali + 1; dcl->ad->sqfrom = seq_len - dcl->ad->sqfrom + 1; dcl->ad->sqto = seq_len - dcl->ad->sqto + 1; } } #endif if (info->pli->strand != p7_STRAND_BOTTOMONLY) { p7_Pipeline_LongTarget(info->pli, om, scoredata, info->bg, info->qsq, info->th, 0); p7_pipeline_Reuse(info->pli); seq_len += info->qsq->n; } for (i = prev_hit_cnt; i < info->th->N ; i++) { info->th->unsrt[i].lnP += log((float)seq_len / (float)om->max_length); info->th->unsrt[i].dcl[0].lnP = info->th->unsrt[i].lnP; info->th->unsrt[i].sortkey = -1.0 * info->th->unsrt[i].lnP; info->th->unsrt[i].dcl[0].ad->L = om->M; } prev_hit_cnt = info->th->N; p7_oprofile_Destroy(om); p7_hmm_ScoreDataDestroy(scoredata); } esl_alphabet_Destroy(abc); #ifdef eslAUGMENT_ALPHABET esl_sq_Destroy(sq_revcmp); #endif return status; }
void run_hmmer_pipeline(const char* seq) { int index, i, status; ESL_SQ* sq = esl_sq_CreateFrom(NULL, seq, NULL, NULL, NULL); P7_OPROFILE *om = NULL; P7_PROFILE *gm = NULL; float usc, vfsc, fwdsc; /* filter scores */ float filtersc; /* HMM null filter score */ float nullsc; /* null model score */ float seqbias; float seq_score; /* the corrected per-seq bit score */ double P; WRAPPER_RESULT* result; num_results = 0; if(sq->n == 0) { esl_sq_Destroy(sq); return; } esl_sq_Digitize(abc, sq); int n = 0; float oasc; for(index = 0;index < num_models;index++) { om = models[index]; p7_omx_Reuse(oxf); p7_omx_Reuse(oxb); p7_omx_GrowTo(oxf, om->M, sq->n, sq->n); p7_omx_GrowTo(oxb, om->M, sq->n, sq->n); p7_oprofile_ReconfigLength(om, sq->n); p7_bg_SetFilter(bg, om->M, om->compo); p7_bg_SetLength(bg, sq->n); //Calibrate null model p7_bg_NullOne(bg, sq->dsq, sq->n, &nullsc); //MSV Filter p7_MSVFilter(sq->dsq, sq->n, om, oxf, &usc); seq_score = (usc - nullsc) / eslCONST_LOG2; P = esl_gumbel_surv(seq_score, om->evparam[p7_MMU], om->evparam[p7_MLAMBDA]); if (P > f1) continue; //Bias filter (model compo) p7_bg_FilterScore(bg, sq->dsq, sq->n, &filtersc); seq_score = (usc - filtersc) / eslCONST_LOG2; P = esl_gumbel_surv(seq_score, om->evparam[p7_MMU], om->evparam[p7_MLAMBDA]); if (P > f1) continue; //Viterbi filter (Only do if P value from Bias is high) if(P > f2) { p7_ViterbiFilter(sq->dsq, sq->n, om, oxf, &vfsc); seq_score = (vfsc - filtersc) / eslCONST_LOG2; P = esl_gumbel_surv(seq_score, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]); if (P > f2) continue; } //Get the real probability (forward) p7_Forward(sq->dsq, sq->n, om, oxf, &fwdsc); seq_score = (fwdsc - filtersc) / eslCONST_LOG2; P = esl_exp_surv(seq_score, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]); if(hmmer_error) { fprintf(stderr, "HMM: %s, seq: %s", om->name, seq); hmmer_error = 0; continue; } if (P > f3) continue; //Real hit, go in to posterior decoding and alignment p7_omx_Reuse(oxb); p7_trace_Reuse(tr); p7_Backward(sq->dsq, sq->n, om, oxf, oxb, NULL); status = p7_Decoding(om, oxf, oxb, oxb); if(status == eslOK) { //And then trace the result p7_OptimalAccuracy(om, oxb, oxf, &oasc); p7_OATrace(om, oxb, oxf, tr); } else if(status == eslERANGE) { fprintf(stderr, "Decoding overflow on model %s\n", om->name); gm = gmodels[index]; if(gxf == NULL) { gxf = p7_gmx_Create(gm->M, sq->n); gxb = p7_gmx_Create(gm->M, sq->n); } else { p7_gmx_GrowTo(gxf, gm->M, sq->n); p7_gmx_GrowTo(gxb, gm->M, sq->n); } p7_ReconfigLength(gm, sq->n); p7_GForward (sq->dsq, sq->n, gm, gxf, &fwdsc); p7_GBackward(sq->dsq, sq->n, gm, gxb, NULL); p7_GDecoding(gm, gxf, gxb, gxb); p7_GOptimalAccuracy(gm, gxb, gxf, &oasc); p7_GOATrace (gm, gxb, gxf, tr); p7_gmx_Reuse(gxf); p7_gmx_Reuse(gxb); } if(hmmer_error) { fprintf(stderr, "HMM: %s, seq: %s", om->name, seq); hmmer_error = 0; continue; } result = wrapper_results[num_results]; reuse_result(result, tr->N + om->M, om->name); //We're way overallocating here, but it's hard to know at this point how much space we'll need for the alignment (plus leading and trailing gaps) trace_into(tr, result, sq, abc, om->M); result->bits = seq_score; num_results++; } esl_sq_Destroy(sq); }