int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_TRACE *tr = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc, fsc, vsc; float bestsc = -eslINFINITY; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); fwd = p7_omx_Create(gm->M, L, L); gx = p7_gmx_Create(gm->M, L); tr = p7_trace_Create(); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_GViterbi(dsq, L, gm, gx, &vsc); p7_Forward (dsq, L, om, fwd, &fsc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_StochasticTrace(r, dsq, L, om, fwd, tr); p7_trace_Score(tr, dsq, gm, &sc); bestsc = ESL_MAX(bestsc, sc); p7_trace_Reuse(tr); } esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("forward sc = %.4f nats\n", fsc); printf("viterbi sc = %.4f nats\n", vsc); printf("max trace sc = %.4f nats\n", bestsc); free(dsq); p7_trace_Destroy(tr); p7_gmx_Destroy(gx); p7_omx_Destroy(fwd); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; double lambda = 0.0; double mmu = 0.0; double vmu = 0.0; double ftau = 0.0; int Z = esl_opt_GetInteger(go, "-Z"); int EmL = esl_opt_GetInteger(go, "--EmL"); int EmN = esl_opt_GetInteger(go, "--EmN"); int EvL = esl_opt_GetInteger(go, "--EvL"); int EvN = esl_opt_GetInteger(go, "--EvN"); int EfL = esl_opt_GetInteger(go, "--EfL"); int EfN = esl_opt_GetInteger(go, "--EfN"); int Eft = esl_opt_GetReal (go, "--Eft"); int iteration; int do_msv, do_vit, do_fwd; int status; if (esl_opt_GetBoolean(go, "--msvonly") == TRUE) { do_msv = TRUE; do_vit = FALSE; do_fwd = FALSE; } else if (esl_opt_GetBoolean(go, "--vitonly") == TRUE) { do_msv = FALSE; do_vit = TRUE; do_fwd = FALSE; } else if (esl_opt_GetBoolean(go, "--fwdonly") == TRUE) { do_msv = FALSE; do_vit = FALSE; do_fwd = TRUE; } else { do_msv = TRUE; do_vit = TRUE; do_fwd = TRUE; } if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (bg == NULL) bg = p7_bg_Create(abc); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, EvL, p7_LOCAL); /* the EvL doesn't matter */ om = p7_oprofile_Create(hmm->M, abc); p7_oprofile_Convert(gm, om); if (esl_opt_IsOn(go, "--lambda")) lambda = esl_opt_GetReal(go, "--lambda"); else p7_Lambda(hmm, bg, &lambda); for (iteration = 0; iteration < Z; iteration++) { if (do_msv) p7_MSVMu (r, om, bg, EmL, EmN, lambda, &mmu); if (do_vit) p7_ViterbiMu (r, om, bg, EvL, EvN, lambda, &vmu); if (do_fwd) p7_Tau (r, om, bg, EfL, EfN, lambda, Eft, &ftau); printf("%s %.4f %.4f %.4f %.4f\n", hmm->name, lambda, mmu, vmu, ftau); } p7_hmm_Destroy(hmm); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); } p7_hmmfile_Close(hfp); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; int i, j; int status = eslOK; P7_HMMFILE *hfp = NULL; /* open input HMM file */ P7_HMM *hmm = NULL; /* one HMM query */ ESL_ALPHABET *abc = NULL; /* digital alphabet */ P7_BG *bg = NULL; char errbuf[eslERRBUFSIZE]; char* hmmfile; float *rel_ents = NULL; float **heights = NULL; float **probs = NULL; float *ins_P = NULL; float *ins_expL = NULL; float *occupancy = NULL; int mode = HMMLOGO_RELENT_ALL; //default go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal(argv[0], "Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal(argv[0], "Error in configuration: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") ) { p7_banner (stdout, argv[0], banner); esl_usage (stdout, argv[0], usage); puts("\nOptions:"); esl_opt_DisplayHelp(stdout, go, 1, 2, 100); exit(0); } if (esl_opt_ArgNumber(go) != 1) esl_fatal(argv[0], "Incorrect number of command line arguments.\n"); hmmfile = esl_opt_GetArg(go, 1); if (esl_opt_IsOn(go, "--height_relent_all")) mode = HMMLOGO_RELENT_ALL; else if (esl_opt_IsOn(go, "--height_relent_abovebg")) mode = HMMLOGO_RELENT_ABOVEBG; else if (esl_opt_IsOn(go, "--height_score")) mode = HMMLOGO_SCORE; else mode = HMMLOGO_RELENT_ALL; //default /* Open the query profile HMM file */ status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); status = p7_hmmfile_Read(hfp, &abc, &hmm); bg = p7_bg_Create(abc); ESL_ALLOC(rel_ents, (hmm->M+1) * sizeof(float)); ESL_ALLOC(heights, (hmm->M+1) * sizeof(float*)); ESL_ALLOC(probs, (hmm->M+1) * sizeof(float*)); for (i = 1; i <= hmm->M; i++) { ESL_ALLOC(heights[i], abc->K * sizeof(float)); ESL_ALLOC(probs[i], abc->K * sizeof(float)); } /* residue heights */ if (mode == HMMLOGO_RELENT_ALL) { printf ("max expected height = %.2f\n", hmmlogo_maxHeight(bg) ); hmmlogo_RelativeEntropy_all(hmm, bg, rel_ents, probs, heights); } else if (mode == HMMLOGO_RELENT_ABOVEBG) { printf ("max expected height = %.2f\n", hmmlogo_maxHeight(bg) ); hmmlogo_RelativeEntropy_above_bg(hmm, bg, rel_ents, probs, heights); } else if (mode == HMMLOGO_SCORE) { hmmlogo_ScoreHeights(hmm, bg, heights ); } printf ("Residue heights\n"); for (i = 1; i <= hmm->M; i++) { printf("%d: ", i); for (j=0; j<abc->K; j++) printf("%6.3f ", heights[i][j] ); if (mode != HMMLOGO_SCORE) printf(" (%6.3f)", rel_ents[i]); printf("\n"); } if (rel_ents != NULL) free(rel_ents); if (heights != NULL) { for (i = 1; i <= hmm->M; i++) if (heights[i] != NULL) free(heights[i]); free(heights); } /* indel values */ if (! esl_opt_IsOn(go, "--no_indel")) { ESL_ALLOC(ins_P, (hmm->M+1) * sizeof(float)); ESL_ALLOC(ins_expL, (hmm->M+1) * sizeof(float)); ESL_ALLOC(occupancy, (hmm->M+1) * sizeof(float)); hmmlogo_IndelValues(hmm, ins_P, ins_expL, occupancy); printf ("Indel values\n"); for (i = 1; i <= hmm->M; i++) printf("%d: %6.3f %6.3f %6.3f\n", i, ins_P[i], ins_expL[i], occupancy[i] ); free(ins_P); free(ins_expL); free(occupancy); } p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); p7_bg_Destroy(bg); exit(0); ERROR: if (rel_ents != NULL) free(rel_ents); if (heights != NULL) { for (i = 1; i <= hmm->M; i++) if (heights[i] != NULL) free(heights[i]); free(heights); } if (hfp != NULL) p7_hmmfile_Close(hfp); if (abc != NULL) esl_alphabet_Destroy(abc); if (ins_P != NULL) free(ins_P); if (ins_expL != NULL) free(ins_expL); if (occupancy != NULL) free(occupancy); }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); float null2[p7_MAXCODE]; int i,j,d,pos; int nsamples = 200; float fsc, bsc; double Mcs; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); ox1 = p7_omx_Create(gm->M, L, L); ox2 = p7_omx_Create(gm->M, L, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, ox1, &fsc); if (esl_opt_GetBoolean(go, "-t")) { P7_TRACE *tr = p7_trace_Create(); float *n2sc = malloc(sizeof(float) * (L+1)); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { /* This is approximately what p7_domaindef.c::region_trace_ensemble() is doing: */ for (j = 0; j < nsamples; j++) { p7_StochasticTrace(r, dsq, L, om, ox1, tr); p7_trace_Index(tr); pos = 1; for (d = 0; d < tr->ndom; d++) { p7_Null2_ByTrace(om, tr, tr->tfrom[d], tr->tto[d], ox2, null2); for (; pos <= tr->sqfrom[d]; pos++) n2sc[pos] += 1.0; for (; pos < tr->sqto[d]; pos++) n2sc[pos] += null2[dsq[pos]]; } for (; pos <= L; pos++) n2sc[pos] += 1.0; p7_trace_Reuse(tr); } for (pos = 1; pos <= L; pos++) n2sc[pos] = logf(n2sc[pos] / nsamples); } esl_stopwatch_Stop(w); free(n2sc); p7_trace_Destroy(tr); } else { p7_Backward(dsq, L, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_Null2_ByExpectation(om, ox2, null2); esl_stopwatch_Stop(w); } Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox = NULL; P7_GMX *gx = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc1, sc2; double base_time, bench_time, Mcs; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x")) p7_profile_SameAsVF(om, gm); ox = p7_omx_Create(gm->M, 0, 0); gx = p7_gmx_Create(gm->M, L); /* Get a baseline time: how long it takes just to generate the sequences */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; /* Run the benchmark */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_ViterbiFilter(dsq, L, om, ox, &sc1); if (esl_opt_GetBoolean(go, "-c")) { p7_GViterbi(dsq, L, gm, gx, &sc2); printf("%.4f %.4f\n", sc1, sc2); } if (esl_opt_GetBoolean(go, "-x")) { p7_GViterbi(dsq, L, gm, gx, &sc2); sc2 /= om->scale_w; if (om->mode == p7_UNILOCAL) sc2 -= 2.0; /* that's ~ L \log \frac{L}{L+2}, for our NN,CC,JJ */ else if (om->mode == p7_LOCAL) sc2 -= 3.0; /* that's ~ L \log \frac{L}{L+3}, for our NN,CC,JJ */ printf("%.4f %.4f\n", sc1, sc2); } } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *gx = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float sc; double base_time, bench_time, Mcs; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL); gx = p7_gmx_Create(gm->M, L); /* Baseline time. */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; /* Benchmark time. */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_GViterbi (dsq, L, gm, gx, &sc); } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_gmx_Destroy(gx); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; P7_TRACE *tr = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc, accscore; double Mcs; p7_FLogsumInit(); if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL); gx1 = p7_gmx_Create(gm->M, L); gx2 = p7_gmx_Create(gm->M, L); tr = p7_trace_CreateWithPP(); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_GForward (dsq, L, gm, gx1, &fsc); p7_GBackward(dsq, L, gm, gx2, &bsc); p7_GDecoding(gm, gx1, gx2, gx2); /* <gx2> is now the posterior decoding matrix */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_GOptimalAccuracy(gm, gx2, gx1, &accscore); /* <gx1> is now the OA matrix */ if (! esl_opt_GetBoolean(go, "--notrace")) { p7_GOATrace(gm, gx2, gx1, tr); p7_trace_Reuse(tr); } } esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_trace_Destroy(tr); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); P7_BG *bg = p7_bg_Create(abc); int my_rank; int nproc; char *buf = NULL; int nbuf = 0; int subtotalM = 0; int allM = 0; int stalling = esl_opt_GetBoolean(go, "--stall"); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &nproc); while (stalling); /* Master MPI process: */ if (my_rank == 0) { ESL_STOPWATCH *w = esl_stopwatch_Create(); P7_HMMFILE *hfp = NULL; P7_OPROFILE *om = NULL; P7_HMM *hmm = NULL; /* Read HMMs from a file. */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); esl_stopwatch_Start(w); while (p7_oprofile_ReadMSV(hfp, &abc, &om) == eslOK && p7_oprofile_ReadRest(hfp, om) == eslOK) { if (!esl_opt_GetBoolean(go, "-b")) p7_oprofile_MPISend(om, 1, 0, MPI_COMM_WORLD, &buf, &nbuf); /* 1 = dest; 0 = tag */ p7_hmm_Destroy(hmm); p7_oprofile_Destroy(om); } p7_oprofile_MPISend(NULL, 1, 0, MPI_COMM_WORLD, &buf, &nbuf); /* send the "no more HMMs" sign */ MPI_Reduce(&subtotalM, &allM, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); printf("total: %d\n", allM); esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "CPU Time: "); esl_stopwatch_Destroy(w); } /* Worker MPI process: */ else { P7_OPROFILE *om_recd = NULL; while (p7_oprofile_MPIRecv(0, 0, MPI_COMM_WORLD, &buf, &nbuf, &abc, &om_recd) == eslOK) { subtotalM += om_recd->M; p7_oprofile_Destroy(om_recd); } MPI_Reduce(&subtotalM, &allM, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); } free(buf); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); MPI_Finalize(); exit(0); }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS*r = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET*abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm1, *gm2; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N") / SSE16_NVALS; int MaxPart = esl_opt_GetInteger(go, "-M"); int NROUNDS = esl_opt_GetInteger(go, "-R"); int check = esl_opt_GetBoolean(go, "-c"); __m128 resdata[10]; int i, j; float *sc1 = (float*) resdata; ESL_SQFILE *sqfp = NULL; DATA_COPS16 *dcops; struct timeb tbstart, tbend; int sumlengths = 0; float* results = NULL; srand(time(NULL)); if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm1 = p7_profile_Create(hmm->M, abc); gm2 = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm1, L, p7_UNILOCAL); p7_ProfileConfig(hmm, bg, gm2, L, p7_UNILOCAL); dcops = p7_ViterbiCOPSw_Create(gm1); p7_ViterbiCOPSW_Setup(dcops, L+100, MaxPart); // use max L dcops->L = L; int dbsize = SSE16_NVALS*N; SEQ **seqsdb= calloc(dbsize+1, sizeof(SEQ*)); int equallength = 1; if (esl_sqfile_OpenDigital(abc, seqfile, eslSQFILE_FASTA, NULL, &sqfp) == eslOK) { // Use Sequence file ESL_SQ* sq = esl_sq_CreateDigital(abc); int maxseqs, len=0; if (esl_opt_IsDefault(go, "-N")) // N not specified in cmdline maxseqs = INT_MAX; // no limit else maxseqs = SSE16_NVALS*N; // use cmdline limit for (j = 0; j < maxseqs && esl_sqio_Read(sqfp, sq) == eslOK; j++) { if (equallength && sq->n != len && j > 0) equallength = 0; len = sq->n; if (j > dbsize) { seqsdb = realloc(seqsdb, 2*(dbsize+1)*sizeof(SEQ*)); dbsize *= 2; } ESL_DSQ* dsq = sq->dsq; seqsdb[j] = malloc(sizeof(SEQ)); seqsdb[j]->length = len; seqsdb[j]->seq = malloc((len+4)*sizeof(ESL_DSQ)); memcpy(seqsdb[j]->seq, dsq, len+2); sumlengths += len; esl_sq_Reuse(sq); } N = j/SSE16_NVALS; } else // Not found database. Generate random sequences for (i = 0; i < N; i++) for (j = 0; j < SSE16_NVALS; j++) { int len = L; // - rand()%1000; seqsdb[i*SSE16_NVALS+j] = malloc(sizeof(SEQ)); seqsdb[i*SSE16_NVALS+j]->seq = malloc(len+4); seqsdb[i*SSE16_NVALS+j]->length = len; esl_rsq_xfIID(r, bg->f, abc->K, len, seqsdb[i*SSE16_NVALS+j]->seq); sumlengths += len; } printf("Viterbi COPS Word with %d threads, model %s. ModelLen: %d, #Segms: %d, SeqL.: %d, #seqs: %d, Partition: %d, #parts: %d\n", NTHREADS, hmmfile, gm1->M, (int) ceil(gm1->M/SSE16_NVALS), L, SSE16_NVALS*N*NROUNDS, dcops->partition, dcops->Npartitions); /* // No. of partitions computed without full parallelism ( == no. of threads active while some are idle) int Niters_part = dcops->Npartitions % NTHREADS; // No. of Model lines that could be computed but are wasted by idle threads waiting on the end int Nwasted_threads = dcops->partition * ((NTHREADS-Niters_part) % NTHREADS); // No. of lines in the last partition that go beyond M. It's wasted comp time by a single thread int Nwasted_leftover= (dcops->partition - gm1->M % dcops->partition) % dcops->partition; // Total number of wasted lines int wastedcomp = Nwasted_threads + Nwasted_leftover; // Total number of lines computed and waited for int totalcomp = wastedcomp + gm1->M; // same as: roundtop(gm1->M, dcops->partition * NTHREADS); printf("Total Comp Lines: %d | Wasted Comp Lines: %d\n", totalcomp, wastedcomp); */ if (check) results = (float*) alloc_m128_aligned64((N+1)*2); ftime(&tbstart); if (!equallength) { // Sort sequences by length qsort(seqsdb, N*SSE16_NVALS, sizeof(SEQ*), compare_seqs); } for (j = 0; j < NROUNDS; j++) for (i = 0; i < N; i++) { // if (i % 1000 == 0) printf("Seq %d\n", i); p7_ViterbiCOPSw_run(dcops, seqsdb+i*SSE16_NVALS, sc1); if (check) memcpy(results+i*SSE16_NVALS, sc1, 32); // 32 bytes indeed! SSE16_NVALS floats } ftime(&tbend); double secs = TIMEDIFF(tbstart,tbend); w->elapsed = w->user = secs; esl_stopwatch_Display(stdout, w, "# Opt CPU time: "); double compmillioncells = NROUNDS * (double) sumlengths * (double) hmm->M * 1e-6; printf("# %.0fM cells in %.1f Mc/s\n", compmillioncells, compmillioncells / secs); if (check) { P7_OPROFILE *om = p7_oprofile_Create(hmm->M, gm1->abc); p7_oprofile_Convert(gm1, om); P7_OMX *ox = p7_omx_Create(hmm->M, 0, 0); printf("Compare results against base version\n"); for (i = 0; i < N; i++) { int maxll = 0; float sc2; for (j = 0; j < SSE16_NVALS; j++) if (maxll < seqsdb[i*SSE16_NVALS+j]->length) maxll = seqsdb[i*SSE16_NVALS+j]->length; p7_oprofile_ReconfigRestLength(om, maxll); // p7_ReconfigLength(gm2, maxll); // emulate the lock-step inter-sequence reconfigs for (j = 0; j < SSE16_NVALS; j++) { // p7_ReconfigLength(gm2, seqsdb[i*SSE16_NVALS+j]->length); // p7_Viterbi_unilocal(seqsdb[i*SSE16_NVALS+j]->seq, seqsdb[i*SSE16_NVALS+j]->length, gm2, &sc3); // p7_Viterbi_unilocal_word(seqsdb[i*SSE16_NVALS+j]->seq, seqsdb[i*SSE16_NVALS+j]->length, gm2, &sc2); // p7_oprofile_ReconfigLength(om, seqsdb[i*SSE16_NVALS+j]->length); p7_ViterbiFilter(seqsdb[i*SSE16_NVALS+j]->seq, seqsdb[i*SSE16_NVALS+j]->length, om, ox, &sc2); sc2 += 1.0; // -2.0nat optimization, Local to Unilocal mode if (fabs(results[i*SSE16_NVALS+j] - sc2) > 0.0001) { printf("Seq %d Len %4d: %f - %f\tdiff: %f\n", i*SSE16_NVALS+j, seqsdb[i*SSE16_NVALS+j]->length, results[i*SSE16_NVALS+j], sc2, fabs(results[i*SSE16_NVALS+j] - sc2)); } } } } return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); int N = esl_opt_GetInteger(go, "-N"); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_TRACE *tr = NULL; ESL_SQ *sq = NULL; P7_ALIDISPLAY *ad = NULL; int i,z; if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, 0); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, 0, p7_UNIGLOCAL); /* that sets N,C,J to generate nothing */ om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); if (esl_opt_GetBoolean(go, "-p")) tr = p7_trace_CreateWithPP(); else tr = p7_trace_Create(); sq = esl_sq_CreateDigital(abc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_ProfileEmit(r, hmm, gm, bg, sq, tr); esl_sq_SetName(sq, "random"); if (! esl_opt_GetBoolean(go, "-b")) { if (esl_opt_GetBoolean(go, "-p")) for (z = 0; z < tr->N; z++) if (tr->i[z] > 0) tr->pp[z] = esl_random(r); ad = p7_alidisplay_Create(tr, 0, om, sq); p7_alidisplay_Print(stdout, ad, 40, 80, FALSE); p7_alidisplay_Destroy(ad); } p7_trace_Reuse(tr); esl_sq_Reuse(sq); } esl_stopwatch_Stop(w); esl_stopwatch_Display(stdout, w, "# CPU time: "); esl_sq_Destroy(sq); p7_trace_Destroy(tr); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_stopwatch_Destroy(w); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = NULL; /* command line processing */ ESL_ALPHABET *abc = NULL; char *hmmfile = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; int nhmm; double x; float KL; int status; char errbuf[eslERRBUFSIZE]; float nseq; int do_eval2score = 0; int do_score2eval = 0; int z_val; float e_val; float s_val; /* Process the command line options. */ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK || esl_opt_VerifyConfig(go) != eslOK) { printf("Failed to parse command line: %s\n", go->errbuf); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_GetBoolean(go, "-h") == TRUE) { p7_banner(stdout, argv[0], banner); esl_usage(stdout, argv[0], usage); puts("\nOptions:"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=docgroup, 2 = indentation; 80=textwidth*/ exit(0); } if (esl_opt_ArgNumber(go) != 1) { puts("Incorrect number of command line arguments."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if ((hmmfile = esl_opt_GetArg(go, 1)) == NULL) { puts("Failed to read <hmmfile> argument from command line."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } output_header(stdout, go); if ( esl_opt_IsOn(go, "--eval2score") ) { do_eval2score = TRUE; e_val = esl_opt_GetReal(go, "-E"); } else if ( esl_opt_IsOn(go, "--score2eval") ) { do_score2eval = TRUE; s_val = esl_opt_GetReal(go, "-S"); } else if ( esl_opt_IsUsed(go, "--baseZ") || esl_opt_IsUsed(go, "--baseZ1") || esl_opt_IsUsed(go, "-Z") ) { puts("The flags -Z, --baseZ, and --baseZ1 are for use with --eval2score and --score2eval."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } if (esl_opt_IsUsed(go, "--baseZ") ) { z_val = 1000000 * 2 * (long)(esl_opt_GetInteger(go, "--baseZ")); } else if (esl_opt_IsUsed(go, "--baseZ1") ) { z_val = 1000000 * (long)(esl_opt_GetInteger(go, "--baseZ1")); } else { z_val = esl_opt_GetInteger(go, "-Z"); } /* Initializations: open the HMM file */ status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); /* Main body: read HMMs one at a time, print one line of stats */ printf("#\n"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s", "idx", "name", "accession", "nseq", "eff_nseq", "M", "relent", "info", "p relE", "compKL"); if (do_eval2score) printf (" %6s %6.2g", "sc for", e_val); if (do_score2eval) printf (" %6s %6.2f", "E-val for", s_val); printf("\n"); printf("# %-4s %-20s %-12s %8s %8s %6s %6s %6s %6s %6s", "----", "--------------------", "------------", "--------", "--------", "------", "------", "------", "------", "------"); if (do_eval2score) printf (" %13s", "-------------"); if (do_score2eval) printf (" %13s", "-------------"); printf("\n"); nhmm = 0; while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslEOF) { if (status == eslEOD) esl_fatal("read failed, HMM file %s may be truncated?", hmmfile); else if (status == eslEFORMAT) esl_fatal("bad file format in HMM file %s", hmmfile); else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets", hmmfile); else if (status != eslOK) esl_fatal("Unexpected error in reading HMMs from %s", hmmfile); nhmm++; if ( esl_opt_IsOn(go, "--eval2score") || esl_opt_IsOn(go, "--score2eval") ) { if (esl_opt_IsUsed(go, "--baseZ") || esl_opt_IsUsed(go, "--baseZ1" ) ) { if ( hmm->abc->type != eslRNA && hmm->abc->type != eslDNA) { puts("The flags --baseZ and --baseZ1 can't be used with non-nucleotide models."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } } else if ( hmm->abc->type != eslAMINO && hmm->abc->type != eslRNA && hmm->abc->type != eslDNA) { puts("The flags --eval2score and --score2eval can't be used with non-sequence models."); esl_usage(stdout, argv[0], usage); printf("\nTo see more help on available options, do %s -h\n\n", argv[0]); exit(1); } } if (esl_opt_IsUsed(go, "--baseZ") ) { nseq = (float)z_val / (float)(hmm->max_length); } else if (esl_opt_IsUsed(go, "--baseZ1") ) { nseq = (float)z_val / (float)(hmm->max_length); } else { nseq = z_val; } if (bg == NULL) bg = p7_bg_Create(abc); p7_MeanPositionRelativeEntropy(hmm, bg, &x); p7_hmm_CompositionKLDist(hmm, bg, &KL, NULL); printf("%-6d %-20s %-12s %8d %8.2f %6d %6.2f %6.2f %6.2f %6.2f", nhmm, hmm->name, hmm->acc == NULL ? "-" : hmm->acc, hmm->nseq, hmm->eff_nseq, hmm->M, p7_MeanMatchRelativeEntropy(hmm, bg), p7_MeanMatchInfo(hmm, bg), x, KL); if ( do_eval2score ) { float sc; sc = esl_exp_invsurv( e_val / nseq , hmm->evparam[p7_FTAU], hmm->evparam[p7_FLAMBDA]); printf("%13.2f", sc); } else if ( do_score2eval) { float e; e = nseq * esl_exp_surv( s_val , hmm->evparam[p7_FTAU], hmm->evparam[p7_FLAMBDA]); printf("%13.2g", e); } printf("\n"); /* p7_MeanForwardScore(hmm, bg)); */ p7_hmm_Destroy(hmm); } p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); p7_hmmfile_Close(hfp); esl_getopts_Destroy(go); exit(0); }
int main(int argc, char **argv) { char *hmmfile = argv[1]; char *seqfile = argv[2]; ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox = NULL; P7_GMX *gx = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float sc; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_Logify(om); /* allocate DP matrices, both a generic and an optimized one */ ox = p7_omx_Create(gm->M, 0, sq->n); gx = p7_gmx_Create(gm->M, sq->n); /* Useful to place and compile in for debugging: p7_oprofile_Dump(stdout, om); dumps the optimized profile p7_omx_SetDumpMode(ox, TRUE); makes the fast DP algorithms dump their matrices p7_gmx_Dump(stdout, gx); dumps a generic DP matrix */ p7_ViterbiScore(sq->dsq, sq->n, om, ox, &sc); printf("viterbi (non-optimized): %.2f nats\n", sc); p7_GViterbi (sq->dsq, sq->n, gm, gx, &sc); printf("viterbi (generic): %.2f nats\n", sc); /* now in a real app, you'd need to convert raw nat scores to final bit * scores, by subtracting the null model score and rescaling. */ /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); return 0; }
/* serial_master() * The serial version of hmmsearch. * For each query HMM in <hmmdb> search the database for hits. * * A master can only return if it's successful. All errors are handled * immediately and fatally with p7_Fail(). */ static int serial_master(ESL_GETOPTS *go, struct cfg_s *cfg) { FILE *ofp = stdout; /* output file for results (default stdout) */ FILE *tblfp = NULL; /* output stream for tabular per-seq (--tblout) */ FILE *dfamtblfp = NULL; /* output stream for tabular Dfam format (--dfamtblout) */ FILE *aliscoresfp = NULL; /* output stream for alignment scores (--aliscoresout) */ // P7_HMM *hmm = NULL; /* one HMM query */ // P7_SCOREDATA *scoredata = NULL; int seqfmt = eslSQFILE_UNKNOWN; /* format of seqfile */ ESL_SQFILE *sqfp = NULL; /* open seqfile */ P7_HMMFILE *hfp = NULL; /* open HMM database file */ ESL_ALPHABET *abc = NULL; /* sequence alphabet */ P7_OPROFILE *om = NULL; /* target profile */ ESL_STOPWATCH *w = NULL; /* timing */ ESL_SQ *qsq = NULL; /* query sequence */ int nquery = 0; int textw; int status = eslOK; int hstatus = eslOK; int sstatus = eslOK; int i; int ncpus = 0; int infocnt = 0; WORKER_INFO *info = NULL; #ifdef HMMER_THREADS P7_OM_BLOCK *block = NULL; ESL_THREADS *threadObj= NULL; ESL_WORK_QUEUE *queue = NULL; #endif char errbuf[eslERRBUFSIZE]; double window_beta = -1.0 ; int window_length = -1; if (esl_opt_IsUsed(go, "--w_beta")) { if ( ( window_beta = esl_opt_GetReal(go, "--w_beta") ) < 0 || window_beta > 1 ) esl_fatal("Invalid window-length beta value\n"); } if (esl_opt_IsUsed(go, "--w_length")) { if (( window_length = esl_opt_GetInteger(go, "--w_length")) < 4 ) esl_fatal("Invalid window length value\n"); } w = esl_stopwatch_Create(); if (esl_opt_GetBoolean(go, "--notextw")) textw = 0; else textw = esl_opt_GetInteger(go, "--textw"); /* If caller declared an input format, decode it */ if (esl_opt_IsOn(go, "--qformat")) { seqfmt = esl_sqio_EncodeFormat(esl_opt_GetString(go, "--qformat")); if (seqfmt == eslSQFILE_UNKNOWN) p7_Fail("%s is not a recognized input sequence file format\n", esl_opt_GetString(go, "--qformat")); } /* validate options if running as a daemon */ // if (esl_opt_IsOn(go, "--daemon")) { /* running as a daemon, the input format must be type daemon */ // if (seqfmt != eslSQFILE_UNKNOWN && seqfmt != eslSQFILE_DAEMON) // esl_fatal("Input format %s not supported. Must be daemon\n", esl_opt_GetString(go, "--qformat")); // seqfmt = eslSQFILE_DAEMON; // if (strcmp(cfg->seqfile, "-") != 0) esl_fatal("Query sequence file must be '-'\n"); // } /* Open the target profile database to get the sequence alphabet */ status = p7_hmmfile_OpenE(cfg->hmmfile, p7_HMMDBENV, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", cfg->hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem, trying to open HMM file %s.\n%s\n", cfg->hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, cfg->hmmfile, errbuf); if (! hfp->is_pressed) p7_Fail("Failed to open binary auxfiles for %s: use hmmpress first\n", hfp->fname); hstatus = p7_oprofile_ReadMSV(hfp, &abc, &om); if (hstatus == eslEFORMAT) p7_Fail("bad format, binary auxfiles, %s:\n%s", cfg->hmmfile, hfp->errbuf); else if (hstatus == eslEINCOMPAT) p7_Fail("HMM file %s contains different alphabets", cfg->hmmfile); else if (hstatus != eslOK) p7_Fail("Unexpected error in reading HMMs from %s", cfg->hmmfile); p7_oprofile_Destroy(om); p7_hmmfile_Close(hfp); /* Open the query sequence database */ status = esl_sqfile_OpenDigital(abc, cfg->seqfile, seqfmt, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("Failed to open sequence file %s for reading\n", cfg->seqfile); else if (status == eslEFORMAT) p7_Fail("Sequence file %s is empty or misformatted\n", cfg->seqfile); else if (status == eslEINVAL) p7_Fail("Can't autodetect format of a stdin or .gz seqfile"); else if (status != eslOK) p7_Fail("Unexpected error %d opening sequence file %s\n", status, cfg->seqfile); if (sqfp->format > 100) // breaking the law! That range is reserved for msa, for aligned formats p7_Fail("%s contains a multiple sequence alignment; expect unaligned sequences, like FASTA\n", cfg->seqfile); qsq = esl_sq_CreateDigital(abc); /* Open the results output files */ if (esl_opt_IsOn(go, "-o")) { if ((ofp = fopen(esl_opt_GetString(go, "-o"), "w")) == NULL) esl_fatal("Failed to open output file %s for writing\n", esl_opt_GetString(go, "-o")); } if (esl_opt_IsOn(go, "--tblout")) { if ((tblfp = fopen(esl_opt_GetString(go, "--tblout"), "w")) == NULL) esl_fatal("Failed to open tabular per-seq output file %s for writing\n", esl_opt_GetString(go, "--tblfp")); } if (esl_opt_IsOn(go, "--dfamtblout")) { if ((dfamtblfp = fopen(esl_opt_GetString(go, "--dfamtblout"),"w")) == NULL) esl_fatal("Failed to open tabular dfam output file %s for writing\n", esl_opt_GetString(go, "--dfamtblout")); } if (esl_opt_IsOn(go, "--aliscoresout")) { if ((aliscoresfp = fopen(esl_opt_GetString(go, "--aliscoresout"),"w")) == NULL) esl_fatal("Failed to open alignment scores output file %s for writing\n", esl_opt_GetString(go, "--aliscoresout")); } output_header(ofp, go, cfg->hmmfile, cfg->seqfile); #ifdef HMMER_THREADS /* initialize thread data */ if (esl_opt_IsOn(go, "--cpu")) ncpus = esl_opt_GetInteger(go, "--cpu"); else esl_threads_CPUCount(&ncpus); if (ncpus > 0) { threadObj = esl_threads_Create(&pipeline_thread); queue = esl_workqueue_Create(ncpus * 2); } #endif infocnt = (ncpus == 0) ? 1 : ncpus; ESL_ALLOC(info, sizeof(*info) * infocnt); for (i = 0; i < infocnt; ++i) { info[i].bg = p7_bg_Create(abc); #ifdef HMMER_THREADS info[i].queue = queue; #endif } #ifdef HMMER_THREADS for (i = 0; i < ncpus * 2; ++i) { block = p7_oprofile_CreateBlock(BLOCK_SIZE); if (block == NULL) esl_fatal("Failed to allocate sequence block"); status = esl_workqueue_Init(queue, block); if (status != eslOK) esl_fatal("Failed to add block to work queue"); } #endif /* Outside loop: over each query sequence in <seqfile>. */ while ((sstatus = esl_sqio_Read(sqfp, qsq)) == eslOK) { if (sstatus == eslEMEM) p7_Fail("Memory allocation error reading sequence file\n", status); if (sstatus == eslEINCONCEIVABLE) p7_Fail("Unexpected error %d reading sequence file\n", status); // if (qsq->L > NHMMER_MAX_RESIDUE_COUNT) p7_Fail("Input sequence %s in file %s exceeds maximum length of %d bases.\n", qsq->name, cfg->seqfile, NHMMER_MAX_RESIDUE_COUNT); nquery++; esl_stopwatch_Start(w); /* Open the target profile database */ status = p7_hmmfile_OpenE(cfg->hmmfile, p7_HMMDBENV, &hfp, NULL); if (status != eslOK) p7_Fail("Unexpected error %d in opening hmm file %s.\n", status, cfg->hmmfile); #ifdef HMMER_THREADS /* if we are threaded, create a lock to prevent multiple readers */ if (ncpus > 0) { status = p7_hmmfile_CreateLock(hfp); if (status != eslOK) p7_Fail("Unexpected error %d creating lock\n", status); } #endif if (fprintf(ofp, "Query: %s [L=%ld]\n", qsq->name, (long) qsq->n) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (qsq->acc[0] != 0 && fprintf(ofp, "Accession: %s\n", qsq->acc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (qsq->desc[0] != 0 && fprintf(ofp, "Description: %s\n", qsq->desc) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); for (i = 0; i < infocnt; ++i) { /* Create processing pipeline and hit list */ info[i].th = p7_tophits_Create(); info[i].pli = p7_pipeline_Create(go, 100, 100, TRUE, p7_SCAN_MODELS); /* M_hint = 100, L_hint = 100 are just dummies for now */ info[i].pli->hfp = hfp; /* for two-stage input, pipeline needs <hfp> */ p7_pli_NewSeq(info[i].pli, qsq); info[i].qsq = qsq; if ( esl_opt_IsUsed(go, "--toponly") ) info[i].pli->strand = p7_STRAND_TOPONLY; else if ( esl_opt_IsUsed(go, "--bottomonly") ) info[i].pli->strand = p7_STRAND_BOTTOMONLY; else info[i].pli->strand = p7_STRAND_BOTH; #ifdef HMMER_THREADS if (ncpus > 0) esl_threads_AddThread(threadObj, &info[i]); #endif } #ifdef HMMER_THREADS if (ncpus > 0) hstatus = thread_loop(threadObj, queue, hfp); else hstatus = serial_loop(info, hfp); #else hstatus = serial_loop(info, hfp); #endif switch(hstatus) { case eslEFORMAT: p7_Fail("bad file format in HMM file %s", cfg->hmmfile); break; case eslEINCOMPAT: p7_Fail("HMM file %s contains different alphabets", cfg->hmmfile); break; case eslEOF: case eslOK: /* do nothing */ break; default: p7_Fail("Unexpected error in reading HMMs from %s", cfg->hmmfile); } /* merge the results of the search results */ for (i = 1; i < infocnt; ++i) { p7_tophits_Merge(info[0].th, info[i].th); p7_pipeline_Merge(info[0].pli, info[i].pli); p7_pipeline_Destroy(info[i].pli); p7_tophits_Destroy(info[i].th); } /* modify e-value to account for number of models */ for (i = 0; i < info->th->N ; i++) { info->th->unsrt[i].lnP += log((float)info->pli->nmodels); info->th->unsrt[i].dcl[0].lnP = info->th->unsrt[i].lnP; info->th->unsrt[i].sortkey = -1.0 * info->th->unsrt[i].lnP; } /* it's possible to have duplicates based on how viterbi ranges can overlap */ p7_tophits_SortByModelnameAndAlipos(info->th); p7_tophits_RemoveDuplicates(info->th, info->pli->use_bit_cutoffs); /* Print results */ p7_tophits_SortBySortkey(info->th); p7_tophits_Threshold(info->th, info->pli); //tally up total number of hits and target coverage info->pli->n_output = info->pli->pos_output = 0; for (i = 0; i < info->th->N; i++) { if ( (info->th->hit[i]->flags & p7_IS_REPORTED) || info->th->hit[i]->flags & p7_IS_INCLUDED) { info->pli->n_output++; info->pli->pos_output += abs(info->th->hit[i]->dcl[0].jali - info->th->hit[i]->dcl[0].iali) + 1; } } p7_tophits_Targets(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); p7_tophits_Domains(ofp, info->th, info->pli, textw); if (fprintf(ofp, "\n\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); if (tblfp) p7_tophits_TabularTargets(tblfp, qsq->name, qsq->acc, info->th, info->pli, (nquery == 1)); if (dfamtblfp) p7_tophits_TabularXfam(dfamtblfp, qsq->name, NULL, info->th, info->pli); if (aliscoresfp) p7_tophits_AliScores(aliscoresfp, qsq->name, info->th ); esl_stopwatch_Stop(w); info->pli->nseqs = 1; p7_pli_Statistics(ofp, info->pli, w); if (fprintf(ofp, "//\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); fflush(ofp); p7_hmmfile_Close(hfp); p7_pipeline_Destroy(info->pli); p7_tophits_Destroy(info->th); esl_sq_Reuse(qsq); } if (sstatus == eslEFORMAT) esl_fatal("Parse failed (sequence file %s):\n%s\n", sqfp->filename, esl_sqfile_GetErrorBuf(sqfp)); else if (sstatus != eslEOF) esl_fatal("Unexpected error %d reading sequence file %s", sstatus, sqfp->filename); /* Terminate outputs - any last words? */ if (tblfp) p7_tophits_TabularTail(tblfp, "hmmscan", p7_SCAN_MODELS, cfg->seqfile, cfg->hmmfile, go); if (ofp) { if (fprintf(ofp, "[ok]\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "write failed"); } /* Cleanup - prepare for successful exit */ for (i = 0; i < infocnt; ++i) p7_bg_Destroy(info[i].bg); #ifdef HMMER_THREADS if (ncpus > 0) { esl_workqueue_Reset(queue); while (esl_workqueue_Remove(queue, (void **) &block) == eslOK) p7_oprofile_DestroyBlock(block); esl_workqueue_Destroy(queue); esl_threads_Destroy(threadObj); } #endif free(info); esl_sq_Destroy(qsq); esl_stopwatch_Destroy(w); esl_alphabet_Destroy(abc); esl_sqfile_Close(sqfp); if (ofp != stdout) fclose(ofp); if (tblfp) fclose(tblfp); if (dfamtblfp) fclose(dfamtblfp); if (aliscoresfp) fclose(aliscoresfp); return eslOK; ERROR: if (ofp != stdout) fclose(ofp); if (tblfp) fclose(tblfp); if (dfamtblfp) fclose(dfamtblfp); if (aliscoresfp) fclose(aliscoresfp); return status; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; ESL_RANDOMNESS *rng = esl_randomness_CreateFast(0); P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_TRACE *tr = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; int N = esl_opt_GetInteger(go, "-N"); int i; float vsc, fsc, tsc; char errbuf[eslERRBUFSIZE]; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); if (esl_opt_GetBoolean(go, "-p")) p7_oprofile_Dump(stdout, om); fwd = p7_omx_Create(gm->M, sq->n, sq->n); gx = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_Create(); if (esl_opt_GetBoolean(go, "-m") == TRUE) p7_omx_SetDumpMode(stdout, fwd, TRUE); p7_GViterbi(sq->dsq, sq->n, gm, gx, &vsc); p7_Forward (sq->dsq, sq->n, om, fwd, &fsc); for (i = 0; i < N; i++) { p7_StochasticTrace(rng, sq->dsq, sq->n, om, fwd, tr); p7_trace_Score(tr, sq->dsq, gm, &tsc); if (esl_opt_GetBoolean(go, "-t") == TRUE) p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace %d fails validation:\n%s\n", i, errbuf); printf("Sampled trace: %.4f nats\n", tsc); p7_trace_Reuse(tr); } printf("Forward score: %.4f nats\n", fsc); printf("Viterbi score: %.4f nats\n", vsc); /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_trace_Destroy(tr); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_randomness_Destroy(rng); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
static void search_thread(void *arg) { int i; int count; int seed; int status; int workeridx; WORKER_INFO *info; ESL_THREADS *obj; ESL_SQ dbsq; ESL_STOPWATCH *w = NULL; /* timing stopwatch */ P7_BUILDER *bld = NULL; /* HMM construction configuration */ P7_BG *bg = NULL; /* null model */ P7_PIPELINE *pli = NULL; /* work pipeline */ P7_TOPHITS *th = NULL; /* top hit results */ P7_PROFILE *gm = NULL; /* generic model */ P7_OPROFILE *om = NULL; /* optimized query profile */ obj = (ESL_THREADS *) arg; esl_threads_Started(obj, &workeridx); info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx); w = esl_stopwatch_Create(); bg = p7_bg_Create(info->abc); esl_stopwatch_Start(w); /* set up the dummy description and accession fields */ dbsq.desc = ""; dbsq.acc = ""; /* process a query sequence or hmm */ if (info->seq != NULL) { bld = p7_builder_Create(NULL, info->abc); if ((seed = esl_opt_GetInteger(info->opts, "--seed")) > 0) { esl_randomness_Init(bld->r, seed); bld->do_reseeding = TRUE; } bld->EmL = esl_opt_GetInteger(info->opts, "--EmL"); bld->EmN = esl_opt_GetInteger(info->opts, "--EmN"); bld->EvL = esl_opt_GetInteger(info->opts, "--EvL"); bld->EvN = esl_opt_GetInteger(info->opts, "--EvN"); bld->EfL = esl_opt_GetInteger(info->opts, "--EfL"); bld->EfN = esl_opt_GetInteger(info->opts, "--EfN"); bld->Eft = esl_opt_GetReal (info->opts, "--Eft"); if (esl_opt_IsOn(info->opts, "--mxfile")) status = p7_builder_SetScoreSystem (bld, esl_opt_GetString(info->opts, "--mxfile"), NULL, esl_opt_GetReal(info->opts, "--popen"), esl_opt_GetReal(info->opts, "--pextend"), bg); else status = p7_builder_LoadScoreSystem(bld, esl_opt_GetString(info->opts, "--mx"), esl_opt_GetReal(info->opts, "--popen"), esl_opt_GetReal(info->opts, "--pextend"), bg); if (status != eslOK) { //client_error(info->sock, status, "hmmgpmd: failed to set single query sequence score system: %s", bld->errbuf); fprintf(stderr, "hmmpgmd: failed to set single query sequence score system: %s", bld->errbuf); pthread_exit(NULL); return; } p7_SingleBuilder(bld, info->seq, bg, NULL, NULL, NULL, &om); /* bypass HMM - only need model */ p7_builder_Destroy(bld); } else { gm = p7_profile_Create (info->hmm->M, info->abc); om = p7_oprofile_Create(info->hmm->M, info->abc); p7_ProfileConfig(info->hmm, bg, gm, 100, p7_LOCAL); p7_oprofile_Convert(gm, om); } /* Create processing pipeline and hit list */ th = p7_tophits_Create(); pli = p7_pipeline_Create(info->opts, om->M, 100, FALSE, p7_SEARCH_SEQS); p7_pli_NewModel(pli, om, bg); if (pli->Z_setby == p7_ZSETBY_NTARGETS) pli->Z = info->db_Z; /* loop until all sequences have been processed */ count = 1; while (count > 0) { int inx; int blksz; HMMER_SEQ **sq; /* grab the next block of sequences */ if (pthread_mutex_lock(info->inx_mutex) != 0) p7_Fail("mutex lock failed"); inx = *info->inx; blksz = *info->blk_size; if (inx > *info->limit) { blksz /= 5; if (blksz < 1000) { *info->limit = info->sq_cnt * 2; } else { *info->limit = inx + (info->sq_cnt - inx) * 2 / 3; } } *info->blk_size = blksz; *info->inx += blksz; if (pthread_mutex_unlock(info->inx_mutex) != 0) p7_Fail("mutex unlock failed"); sq = info->sq_list + inx; count = info->sq_cnt - inx; if (count > blksz) count = blksz; /* Main loop: */ for (i = 0; i < count; ++i, ++sq) { if ( !(info->range_list) || hmmpgmd_IsWithinRanges ((*sq)->idx, info->range_list)) { dbsq.name = (*sq)->name; dbsq.dsq = (*sq)->dsq; dbsq.n = (*sq)->n; dbsq.idx = (*sq)->idx; if((*sq)->desc != NULL) dbsq.desc = (*sq)->desc; p7_bg_SetLength(bg, dbsq.n); p7_oprofile_ReconfigLength(om, dbsq.n); p7_Pipeline(pli, om, bg, &dbsq, th); p7_pipeline_Reuse(pli); } } } /* make available the pipeline objects to the main thread */ info->th = th; info->pli = pli; /* clean up */ p7_bg_Destroy(bg); p7_oprofile_Destroy(om); if (gm != NULL) p7_profile_Destroy(gm); esl_stopwatch_Stop(w); info->elapsed = w->elapsed; esl_stopwatch_Destroy(w); esl_threads_Finished(obj, workeridx); pthread_exit(NULL); return; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float fraw, braw, nullsc, fsc; float gfraw, gbraw, gfsc; double P, gP; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Open sequence file for reading */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_UNILOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); /* p7_oprofile_Dump(stdout, om); */ /* allocate DP matrices for O(M+L) parsers */ fwd = p7_omx_Create(gm->M, 0, sq->n); bck = p7_omx_Create(gm->M, 0, sq->n); gx = p7_gmx_Create(gm->M, sq->n); /* allocate DP matrices for O(ML) fills */ /* fwd = p7_omx_Create(gm->M, sq->n, sq->n); */ /* bck = p7_omx_Create(gm->M, sq->n, sq->n); */ /* p7_omx_SetDumpMode(stdout, fwd, TRUE); */ /* makes the fast DP algorithms dump their matrices */ /* p7_omx_SetDumpMode(stdout, bck, TRUE); */ while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { p7_oprofile_ReconfigLength(om, sq->n); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_omx_GrowTo(fwd, om->M, 0, sq->n); p7_omx_GrowTo(bck, om->M, 0, sq->n); p7_gmx_GrowTo(gx, gm->M, sq->n); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); p7_ForwardParser (sq->dsq, sq->n, om, fwd, &fraw); p7_BackwardParser(sq->dsq, sq->n, om, fwd, bck, &braw); /* p7_Forward (sq->dsq, sq->n, om, fwd, &fsc); printf("forward: %.2f nats\n", fsc); */ /* p7_Backward(sq->dsq, sq->n, om, fwd, bck, &bsc); printf("backward: %.2f nats\n", bsc); */ /* Comparison to other F/B implementations */ p7_GForward (sq->dsq, sq->n, gm, gx, &gfraw); p7_GBackward (sq->dsq, sq->n, gm, gx, &gbraw); /* p7_gmx_Dump(stdout, gx); */ fsc = (fraw-nullsc) / eslCONST_LOG2; gfsc = (gfraw-nullsc) / eslCONST_LOG2; P = esl_exp_surv(fsc, om->evparam[p7_FTAU], om->evparam[p7_FLAMBDA]); gP = esl_exp_surv(gfsc, gm->evparam[p7_FTAU], gm->evparam[p7_FLAMBDA]); if (esl_opt_GetBoolean(go, "-1")) { printf("%-30s\t%-20s\t%9.2g\t%6.1f\t%9.2g\t%6.1f\n", sq->name, hmm->name, P, fsc, gP, gfsc); } else if (esl_opt_GetBoolean(go, "-P")) { /* output suitable for direct use in profmark benchmark postprocessors: */ printf("%g\t%.2f\t%s\t%s\n", P, fsc, sq->name, hmm->name); } else { printf("target sequence: %s\n", sq->name); printf("fwd filter raw score: %.2f nats\n", fraw); printf("bck filter raw score: %.2f nats\n", braw); printf("null score: %.2f nats\n", nullsc); printf("per-seq score: %.2f bits\n", fsc); printf("P-value: %g\n", P); printf("GForward raw score: %.2f nats\n", gfraw); printf("GBackward raw score: %.2f nats\n", gbraw); printf("GForward seq score: %.2f bits\n", gfsc); printf("GForward P-value: %g\n", gP); } esl_sq_Reuse(sq); } /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_omx_Destroy(bck); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
static void scan_thread(void *arg) { int i; int count; int workeridx; WORKER_INFO *info; ESL_THREADS *obj; ESL_STOPWATCH *w; P7_BG *bg = NULL; /* null model */ P7_PIPELINE *pli = NULL; /* work pipeline */ P7_TOPHITS *th = NULL; /* top hit results */ obj = (ESL_THREADS *) arg; esl_threads_Started(obj, &workeridx); info = (WORKER_INFO *) esl_threads_GetData(obj, workeridx); w = esl_stopwatch_Create(); esl_stopwatch_Start(w); /* Convert to an optimized model */ bg = p7_bg_Create(info->abc); /* Create processing pipeline and hit list */ th = p7_tophits_Create(); pli = p7_pipeline_Create(info->opts, 100, 100, FALSE, p7_SCAN_MODELS); p7_pli_NewSeq(pli, info->seq); /* loop until all sequences have been processed */ count = 1; while (count > 0) { int inx; int blksz; P7_OPROFILE **om; /* grab the next block of sequences */ if (pthread_mutex_lock(info->inx_mutex) != 0) p7_Fail("mutex lock failed"); inx = *info->inx; blksz = *info->blk_size; if (inx > *info->limit) { blksz /= 5; if (blksz < 1000) { *info->limit = info->om_cnt * 2; } else { *info->limit = inx + (info->om_cnt - inx) * 2 / 3; } } *info->blk_size = blksz; *info->inx += blksz; if (pthread_mutex_unlock(info->inx_mutex) != 0) p7_Fail("mutex unlock failed"); om = info->om_list + inx; count = info->om_cnt - inx; if (count > blksz) count = blksz; /* Main loop: */ for (i = 0; i < count; ++i, ++om) { p7_pli_NewModel(pli, *om, bg); p7_bg_SetLength(bg, info->seq->n); p7_oprofile_ReconfigLength(*om, info->seq->n); p7_Pipeline(pli, *om, bg, info->seq, th); p7_pipeline_Reuse(pli); } } /* make available the pipeline objects to the main thread */ info->th = th; info->pli = pli; /* clean up */ p7_bg_Destroy(bg); esl_stopwatch_Stop(w); info->elapsed = w->elapsed; esl_stopwatch_Destroy(w); esl_threads_Finished(obj, workeridx); pthread_exit(NULL); return; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc; float fsc2, bsc2; double base_time, bench_time, Mcs; p7_FLogsumInit(); if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001) p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode"); if (esl_opt_GetBoolean(go, "-P")) { fwd = p7_omx_Create(gm->M, 0, L); bck = p7_omx_Create(gm->M, 0, L); } else { fwd = p7_omx_Create(gm->M, L, L); bck = p7_omx_Create(gm->M, L, L); } gx = p7_gmx_Create(gm->M, L); /* Get a baseline time: how long it takes just to generate the sequences */ esl_stopwatch_Start(w); for (i = 0; i < N; i++) esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); esl_stopwatch_Stop(w); base_time = w->user; esl_stopwatch_Start(w); for (i = 0; i < N; i++) { esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); if (esl_opt_GetBoolean(go, "-P")) { if (! esl_opt_GetBoolean(go, "-B")) p7_ForwardParser (dsq, L, om, fwd, &fsc); if (! esl_opt_GetBoolean(go, "-F")) p7_BackwardParser(dsq, L, om, fwd, bck, &bsc); } else { if (! esl_opt_GetBoolean(go, "-B")) p7_Forward (dsq, L, om, fwd, &fsc); if (! esl_opt_GetBoolean(go, "-F")) p7_Backward(dsq, L, om, fwd, bck, &bsc); } if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x")) { p7_GForward (dsq, L, gm, gx, &fsc2); p7_GBackward(dsq, L, gm, gx, &bsc2); printf("%.4f %.4f %.4f %.4f\n", fsc, bsc, fsc2, bsc2); } } esl_stopwatch_Stop(w); bench_time = w->user - base_time; Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) bench_time; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(bck); p7_omx_Destroy(fwd); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *fwd = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; P7_TRACE *tr = NULL; int format = eslSQFILE_UNKNOWN; char errbuf[eslERRBUFSIZE]; float sc; int d; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); esl_sqfile_Close(sqfp); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); /* Allocate matrix and a trace */ fwd = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_Create(); /* Run Viterbi; do traceback */ p7_GViterbi (sq->dsq, sq->n, gm, fwd, &sc); p7_GTrace (sq->dsq, sq->n, gm, fwd, tr); /* Dump and validate the trace. */ p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf); /* Domain info in the trace. */ p7_trace_Index(tr); printf("# Viterbi: %d domains : ", tr->ndom); for (d = 0; d < tr->ndom; d++) printf("%6d %6d %6d %6d ", tr->sqfrom[d], tr->sqto[d], tr->hmmfrom[d], tr->hmmto[d]); printf("\n"); /* Cleanup */ p7_trace_Destroy(tr); p7_gmx_Destroy(fwd); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_sq_Destroy(sq); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; char *ghmmfile = esl_opt_GetArg(go, 1); /* HMMs parameterized for sequence generation */ char *ahmmfile = esl_opt_GetArg(go, 2); /* HMMs parameterized for alignment */ int N = esl_opt_GetInteger(go, "-N"); P7_HMMFILE *ghfp = NULL; P7_HMMFILE *ahfp = NULL; P7_HMM *ghmm = NULL; P7_HMM *ahmm = NULL; P7_PROFILE *ggm = NULL; P7_PROFILE *agm = NULL; P7_OPROFILE *aom = NULL; P7_BG *bg = NULL; ESL_SQ *sq = NULL; P7_TRACE *reftr = p7_trace_Create(); P7_TRACE *testtr = p7_trace_Create(); P7_TRACE_METRICS *tmetrics = p7_trace_metrics_Create(); P7_REFMX *rmx = p7_refmx_Create(100,100); // P7_FILTERMX *ox = NULL; P7_HARDWARE *hw; if ((hw = p7_hardware_Create ()) == NULL) p7_Fail("Couldn't get HW information data structure"); P7_SPARSEMASK *sm = p7_sparsemask_Create(100, 100, hw->simd); P7_SPARSEMX *sxv = p7_sparsemx_Create(NULL); int idx; char errbuf[eslERRBUFSIZE]; int status; p7_Init(); /* open HMM file containing models parameterized for generation (sampling) of seqs */ status = p7_hmmfile_OpenE(ghmmfile, NULL, &ghfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", ghmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", ghmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, ghmmfile, errbuf); /* open HMM file containing models parameterized for alignment (may be the same as ghmmfile) */ status = p7_hmmfile_OpenE(ahmmfile, NULL, &ahfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", ahmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", ahmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, ahmmfile, errbuf); while ( (status = p7_hmmfile_Read(ghfp, &abc, &ghmm)) == eslOK) /* <abc> gets set on first read */ { /* read the counterpart HMM from <ahfp> */ status = p7_hmmfile_Read(ahfp, &abc, &ahmm); if (status == eslEFORMAT) p7_Fail("Bad file format in HMM file %s:\n%s\n", ahfp->fname, ahfp->errbuf); else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", ahfp->fname, esl_abc_DecodeType(abc->type)); else if (status == eslEOF) p7_Fail("Empty HMM file %s? No HMM data found.\n", ahfp->fname); else if (status != eslOK) p7_Fail("Unexpected error in reading HMMs from %s\n", ahfp->fname); /* try to validate that they're the "same" */ if (ahmm->M != ghmm->M || strcmp(ahmm->name, ghmm->name) != 0) p7_Fail("<gen-hmmfile>, <ali-hmmfile> contain different set or order of models"); /* deferred one-time creation of structures that need to know the alphabet */ if (!bg) bg = p7_bg_Create(abc); if (!sq) sq = esl_sq_CreateDigital(abc); ggm = p7_profile_Create(ghmm->M, abc); agm = p7_profile_Create(ahmm->M, abc); aom = p7_oprofile_Create(ahmm->M, abc, hw->simd); p7_profile_ConfigCustom(ggm, ghmm, bg, esl_opt_GetInteger(go, "--gL"), esl_opt_GetReal(go, "--gnj"), esl_opt_GetReal(go, "--gpglocal")); p7_profile_ConfigCustom(agm, ahmm, bg, 100, esl_opt_GetReal(go, "--anj"), esl_opt_GetReal(go, "--apglocal")); p7_oprofile_Convert(agm, aom); for (idx = 1; idx <= N; idx++) { p7_ProfileEmit(rng, ghmm, ggm, bg, sq, reftr); if (esl_opt_GetBoolean(go, "--dumpseqs")) { esl_sq_FormatName(sq, "seq%d", idx); esl_sqio_Write(stdout, sq, eslSQFILE_FASTA, FALSE); } p7_bg_SetLength(bg, sq->n); p7_profile_SetLength(agm, sq->n); p7_sparsemask_Reinit(sm, agm->M, sq->n); p7_sparsemask_AddAll(sm); if (esl_opt_GetBoolean(go, "--vit")) p7_ReferenceViterbi(sq->dsq, sq->n, agm, rmx, testtr, /*opt_vsc=*/NULL); else p7_SparseViterbi (sq->dsq, sq->n, agm, sm, sxv, testtr, /*opt_vsc=*/NULL); p7_trace_metrics(reftr, testtr, tmetrics); p7_sparsemask_Reuse(sm); p7_sparsemx_Reuse(sxv); //p7_filtermx_Reuse(ox); p7_refmx_Reuse(rmx); esl_sq_Reuse(sq); p7_trace_Reuse(reftr); p7_trace_Reuse(testtr); } p7_oprofile_Destroy(aom); p7_profile_Destroy(ggm); p7_profile_Destroy(agm); p7_hmm_Destroy(ghmm); p7_hmm_Destroy(ahmm); } /* we leave the loop with <status> set by a p7_hmmfile_Read() on ghfp; if all is well, status=eslEOF */ if (status == eslEFORMAT) p7_Fail("Bad file format in HMM file %s:\n%s\n", ghfp->fname, ghfp->errbuf); else if (status == eslEINCOMPAT) p7_Fail("HMM in %s is not in the expected %s alphabet\n", ghfp->fname, esl_abc_DecodeType(abc->type)); else if (status != eslEOF) p7_Fail("Unexpected error in reading HMMs from %s\n", ghfp->fname); p7_trace_metrics_Dump(stdout, tmetrics); p7_hmmfile_Close(ghfp); p7_hmmfile_Close(ahfp); // p7_filtermx_Destroy(ox); p7_sparsemask_Destroy(sm); p7_sparsemx_Destroy(sxv); p7_refmx_Destroy(rmx); p7_trace_metrics_Destroy(tmetrics); p7_trace_Destroy(testtr); p7_trace_Destroy(reftr); p7_bg_Destroy(bg); esl_alphabet_Destroy(abc); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; P7_TRACE *tr = NULL; int format = eslSQFILE_UNKNOWN; char errbuf[eslERRBUFSIZE]; float fsc, bsc, vsc; float accscore; int status; /* Read in one HMM */ if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); p7_hmmfile_Close(hfp); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_OpenDigital(abc, seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence"); esl_sqfile_Close(sqfp); /* Configure a profile from the HMM */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); /* multihit local: H3 default */ /* Allocations */ gx1 = p7_gmx_Create(gm->M, sq->n); gx2 = p7_gmx_Create(gm->M, sq->n); tr = p7_trace_CreateWithPP(); p7_FLogsumInit(); /* Run Forward, Backward; do OA fill and trace */ p7_GForward (sq->dsq, sq->n, gm, gx1, &fsc); p7_GBackward(sq->dsq, sq->n, gm, gx2, &bsc); p7_GDecoding(gm, gx1, gx2, gx2); /* <gx2> is now the posterior decoding matrix */ p7_GOptimalAccuracy(gm, gx2, gx1, &accscore); /* <gx1> is now the OA matrix */ p7_GOATrace(gm, gx2, gx1, tr); if (esl_opt_GetBoolean(go, "-d")) p7_gmx_Dump(stdout, gx2, p7_DEFAULT); if (esl_opt_GetBoolean(go, "-m")) p7_gmx_Dump(stdout, gx1, p7_DEFAULT); p7_trace_Dump(stdout, tr, gm, sq->dsq); if (p7_trace_Validate(tr, abc, sq->dsq, errbuf) != eslOK) p7_Die("trace fails validation:\n%s\n", errbuf); printf("fwd = %.4f nats\n", fsc); printf("bck = %.4f nats\n", bsc); printf("acc = %.4f (%.2f%%)\n", accscore, accscore * 100. / (float) sq->n); p7_trace_Reuse(tr); p7_GViterbi(sq->dsq, sq->n, gm, gx1, &vsc); p7_GTrace (sq->dsq, sq->n, gm, gx1, tr); p7_trace_SetPP(tr, gx2); p7_trace_Dump(stdout, tr, gm, sq->dsq); printf("vit = %.4f nats\n", vsc); printf("acc = %.4f\n", p7_trace_GetExpectedAccuracy(tr)); /* Cleanup */ esl_sq_Destroy(sq); p7_trace_Destroy(tr); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_GMX *gx1 = NULL; P7_GMX *gx2 = NULL; P7_OMX *ox1 = NULL; P7_OMX *ox2 = NULL; P7_TRACE *tr = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc, accscore; float fsc_g, bsc_g, accscore_g; double Mcs; p7_FLogsumInit(); if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); if (esl_opt_GetBoolean(go, "-x") && p7_FLogsumError(-0.4, -0.5) > 0.0001) p7_Fail("-x here requires p7_Logsum() recompiled in slow exact mode"); ox1 = p7_omx_Create(gm->M, L, L); ox2 = p7_omx_Create(gm->M, L, L); tr = p7_trace_CreateWithPP(); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, ox1, &fsc); p7_Backward(dsq, L, om, ox1, ox2, &bsc); p7_Decoding(om, ox1, ox2, ox2); esl_stopwatch_Start(w); for (i = 0; i < N; i++) { p7_OptimalAccuracy(om, ox2, ox1, &accscore); if (! esl_opt_GetBoolean(go, "--notrace")) { p7_OATrace(om, ox2, ox1, tr); p7_trace_Reuse(tr); } } esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); if (esl_opt_GetBoolean(go, "-c") || esl_opt_GetBoolean(go, "-x") ) { gx1 = p7_gmx_Create(gm->M, L); gx2 = p7_gmx_Create(gm->M, L); p7_GForward (dsq, L, gm, gx1, &fsc_g); p7_GBackward(dsq, L, gm, gx2, &bsc_g); p7_GDecoding(gm, gx1, gx2, gx2); p7_GOptimalAccuracy(gm, gx2, gx1, &accscore_g); printf("generic: fwd=%8.4f bck=%8.4f acc=%8.4f\n", fsc_g, bsc_g, accscore_g); printf("VMX: fwd=%8.4f bck=%8.4f acc=%8.4f\n", fsc, bsc, accscore); p7_gmx_Destroy(gx1); p7_gmx_Destroy(gx2); } free(dsq); p7_omx_Destroy(ox1); p7_omx_Destroy(ox2); p7_trace_Destroy(tr); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return 0; }
int main(int argc, char **argv) { ESL_ALPHABET *abc = NULL; /* sequence alphabet */ ESL_GETOPTS *go = NULL; /* command line processing */ ESL_RANDOMNESS *r = NULL; /* source of randomness */ P7_HMM *hmm = NULL; /* sampled HMM to emit from */ P7_HMM *core = NULL; /* safe copy of the HMM, before config */ P7_BG *bg = NULL; /* null model */ ESL_SQ *sq = NULL; /* sampled sequence */ P7_TRACE *tr = NULL; /* sampled trace */ P7_PROFILE *gm = NULL; /* profile */ int i,j; int i1,i2; int k1,k2; int iseq; FILE *fp = NULL; double expected; int do_ilocal; char *hmmfile = NULL; int nseq; int do_swlike; int do_ungapped; int L; int M; int do_h2; char *ipsfile = NULL; char *kpsfile = NULL; ESL_DMATRIX *imx = NULL; ESL_DMATRIX *kmx = NULL; ESL_DMATRIX *iref = NULL; /* reference matrix: expected i distribution under ideality */ int Lbins; int status; char errbuf[eslERRBUFSIZE]; /***************************************************************** * Parse the command line *****************************************************************/ go = esl_getopts_Create(options); if (esl_opt_ProcessCmdline(go, argc, argv) != eslOK) esl_fatal("Failed to parse command line: %s\n", go->errbuf); if (esl_opt_VerifyConfig(go) != eslOK) esl_fatal("Failed to parse command line: %s\n", go->errbuf); if (esl_opt_GetBoolean(go, "-h") == TRUE) { puts(usage); puts("\n where options are:\n"); esl_opt_DisplayHelp(stdout, go, 0, 2, 80); /* 0=all docgroups; 2 = indentation; 80=textwidth*/ return eslOK; } do_ilocal = esl_opt_GetBoolean(go, "-i"); hmmfile = esl_opt_GetString (go, "-m"); nseq = esl_opt_GetInteger(go, "-n"); do_swlike = esl_opt_GetBoolean(go, "-s"); do_ungapped = esl_opt_GetBoolean(go, "-u"); L = esl_opt_GetInteger(go, "-L"); M = esl_opt_GetInteger(go, "-M"); do_h2 = esl_opt_GetBoolean(go, "-2"); ipsfile = esl_opt_GetString (go, "--ips"); kpsfile = esl_opt_GetString (go, "--kps"); if (esl_opt_ArgNumber(go) != 0) { puts("Incorrect number of command line arguments."); printf("Usage: %s [options]\n", argv[0]); return eslFAIL; } r = esl_randomness_CreateFast(0); if (hmmfile != NULL) { /* Read the HMM (and get alphabet from it) */ P7_HMMFILE *hfp = NULL; status = p7_hmmfile_OpenE(hmmfile, NULL, &hfp, errbuf); if (status == eslENOTFOUND) p7_Fail("File existence/permissions problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status == eslEFORMAT) p7_Fail("File format problem in trying to open HMM file %s.\n%s\n", hmmfile, errbuf); else if (status != eslOK) p7_Fail("Unexpected error %d in opening HMM file %s.\n%s\n", status, hmmfile, errbuf); if ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) != eslOK) { if (status == eslEOD) esl_fatal("read failed, HMM file %s may be truncated?", hmmfile); else if (status == eslEFORMAT) esl_fatal("bad file format in HMM file %s", hmmfile); else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets", hmmfile); else esl_fatal("Unexpected error in reading HMMs"); } M = hmm->M; p7_hmmfile_Close(hfp); } else { /* Or sample the HMM (create alphabet first) */ abc = esl_alphabet_Create(eslAMINO); if (do_ungapped) p7_hmm_SampleUngapped(r, M, abc, &hmm); else if (do_swlike) p7_hmm_SampleUniform (r, M, abc, 0.05, 0.5, 0.05, 0.2, &hmm); /* tmi, tii, tmd, tdd */ else p7_hmm_Sample (r, M, abc, &hmm); } Lbins = M; imx = esl_dmatrix_Create(Lbins, Lbins); iref = esl_dmatrix_Create(Lbins, Lbins); kmx = esl_dmatrix_Create(M, M); esl_dmatrix_SetZero(imx); esl_dmatrix_SetZero(iref); esl_dmatrix_SetZero(kmx); tr = p7_trace_Create(); sq = esl_sq_CreateDigital(abc); bg = p7_bg_Create(abc); core = p7_hmm_Clone(hmm); if (do_h2) { gm = p7_profile_Create(hmm->M, abc); p7_H2_ProfileConfig(hmm, bg, gm, p7_UNILOCAL); } else { gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_UNILOCAL); if (p7_hmm_Validate (hmm, NULL, 0.0001) != eslOK) esl_fatal("whoops, HMM is bad!"); if (p7_profile_Validate(gm, NULL, 0.0001) != eslOK) esl_fatal("whoops, profile is bad!"); } /* Sample endpoints. * Also sample an ideal reference distribution for i endpoints. i * endpoints are prone to discretization artifacts, when emitted * sequences have varying lengths. Taking log odds w.r.t. an ideal * reference that is subject to the same discretization artifacts * cancels out the effect. */ for (iseq = 0; iseq < nseq; iseq++) { if (do_ilocal) ideal_local_endpoints (r, core, sq, tr, Lbins, &i1, &i2, &k1, &k2); else profile_local_endpoints(r, core, gm, sq, tr, Lbins, &i1, &i2, &k1, &k2); imx->mx[i1-1][i2-1] += 1.; kmx->mx[k1-1][k2-1] += 1.; /* reference distribution for i */ ideal_local_endpoints (r, core, sq, tr, Lbins, &i1, &i2, &k1, &k2); iref->mx[i1-1][i2-1] += 1.; } /* Adjust both mx's to log_2(obs/exp) ratio */ printf("Before normalization/log-odds:\n"); printf(" i matrix values range from %f to %f\n", dmx_upper_min(imx), dmx_upper_max(imx)); printf(" k matrix values range from %f to %f\n", dmx_upper_min(kmx), dmx_upper_max(kmx)); printf("iref matrix values range from %f to %f\n", dmx_upper_min(iref), dmx_upper_max(iref)); expected = (double) nseq * 2. / (double) (M*(M+1)); for (i = 0; i < kmx->m; i++) for (j = i; j < kmx->n; j++) kmx->mx[i][j] = log(kmx->mx[i][j] / expected) / log(2.0); for (i = 0; i < imx->m; i++) for (j = i; j < imx->m; j++) if (iref->mx[i][j] == 0. && imx->mx[i][j] == 0.) imx->mx[i][j] = 0.; else if (iref->mx[i][j] == 0.) imx->mx[i][j] = eslINFINITY; else if (imx->mx[i][j] == 0.) imx->mx[i][j] = -eslINFINITY; else imx->mx[i][j] = log(imx->mx[i][j] / iref->mx[i][j]) / log(2.0); /* Print ps files */ if (kpsfile != NULL) { if ((fp = fopen(kpsfile, "w")) == NULL) esl_fatal("Failed to open output postscript file %s", kpsfile); dmx_Visualize(fp, kmx, -4., 5.); fclose(fp); } if (ipsfile != NULL) { if ((fp = fopen(ipsfile, "w")) == NULL) esl_fatal("Failed to open output postscript file %s", ipsfile); dmx_Visualize(fp, imx, -4., 5.); /* dmx_Visualize(fp, imx, dmx_upper_min(imx), dmx_upper_max(imx)); */ fclose(fp); } printf("After normalization/log-odds:\n"); printf("i matrix values range from %f to %f\n", dmx_upper_min(imx), dmx_upper_max(imx)); printf("k matrix values range from %f to %f\n", dmx_upper_min(kmx), dmx_upper_max(kmx)); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(core); p7_hmm_Destroy(hmm); p7_trace_Destroy(tr); esl_sq_Destroy(sq); esl_dmatrix_Destroy(imx); esl_dmatrix_Destroy(kmx); esl_alphabet_Destroy(abc); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
/* serial_master() * The serial version of hmmbuild. * For each MSA, build an HMM and save it. * * A master can only return if it's successful. All errors are handled immediately and fatally with p7_Fail(). */ static int serial_master(const ESL_GETOPTS *go, struct cfg_s *cfg) { int status; int i; int ncpus = 0; int infocnt = 0; WORKER_INFO *info = NULL; #ifdef HMMER_THREADS WORK_ITEM *item = NULL; ESL_THREADS *threadObj= NULL; ESL_WORK_QUEUE *queue = NULL; #endif char errmsg[eslERRBUFSIZE]; if ((status = init_master_cfg(go, cfg, errmsg)) != eslOK) p7_Fail(errmsg); #ifdef HMMER_THREADS /* initialize thread data */ if (esl_opt_IsOn(go, "--cpu")) ncpus = esl_opt_GetInteger(go, "--cpu"); else esl_threads_CPUCount(&ncpus); if (ncpus > 0) { threadObj = esl_threads_Create(&pipeline_thread); queue = esl_workqueue_Create(ncpus * 2); } #endif infocnt = (ncpus == 0) ? 1 : ncpus; ESL_ALLOC(info, sizeof(*info) * infocnt); for (i = 0; i < infocnt; ++i) { info[i].bg = p7_bg_Create(cfg->abc); info[i].bld = p7_builder_Create(go, cfg->abc); if (info[i].bld == NULL) p7_Fail("p7_builder_Create failed"); #ifdef HMMER_THREADS info[i].queue = queue; if (ncpus > 0) esl_threads_AddThread(threadObj, &info[i]); #endif } #ifdef HMMER_THREADS for (i = 0; i < ncpus * 2; ++i) { ESL_ALLOC(item, sizeof(*item)); item->nali = 0; item->processed = FALSE; item->postmsa = NULL; item->msa = NULL; item->hmm = NULL; item->entropy = 0.0; status = esl_workqueue_Init(queue, item); if (status != eslOK) esl_fatal("Failed to add block to work queue"); } #endif #ifdef HMMER_THREADS if (ncpus > 0) status = thread_loop(threadObj, queue, cfg); else status = serial_loop(info, cfg); #else status = serial_loop(info, cfg); #endif if (status == eslEFORMAT) esl_fatal("Alignment file parse error:\n%s\n", cfg->afp->errbuf); else if (status == eslEINVAL) esl_fatal("Alignment file parse error:\n%s\n", cfg->afp->errbuf); else if (status != eslEOF) esl_fatal("Alignment file read failed with error code %d\n", status); for (i = 0; i < infocnt; ++i) { p7_bg_Destroy(info[i].bg); p7_builder_Destroy(info[i].bld); } #ifdef HMMER_THREADS if (ncpus > 0) { esl_workqueue_Reset(queue); while (esl_workqueue_Remove(queue, (void **) &item) == eslOK) { free(item); } esl_workqueue_Destroy(queue); esl_threads_Destroy(threadObj); } #endif free(info); return eslOK; ERROR: return eslFAIL; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 2, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); char *seqfile = esl_opt_GetArg(go, 2); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *ox = NULL; P7_GMX *gx = NULL; ESL_SQ *sq = NULL; ESL_SQFILE *sqfp = NULL; int format = eslSQFILE_UNKNOWN; float vfraw, nullsc, vfscore; float graw, gscore; double P, gP; int status; /* Read in one HMM */ if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); /* Read in one sequence */ sq = esl_sq_CreateDigital(abc); status = esl_sqfile_Open(seqfile, format, NULL, &sqfp); if (status == eslENOTFOUND) p7_Fail("No such file."); else if (status == eslEFORMAT) p7_Fail("Format unrecognized."); else if (status == eslEINVAL) p7_Fail("Can't autodetect stdin or .gz."); else if (status != eslOK) p7_Fail("Open failed, code %d.", status); /* create default null model, then create and optimize profile */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, sq->n); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, sq->n, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); /* allocate DP matrices, both a generic and an optimized one */ ox = p7_omx_Create(gm->M, 0, sq->n); gx = p7_gmx_Create(gm->M, sq->n); /* Useful to place and compile in for debugging: p7_oprofile_Dump(stdout, om); dumps the optimized profile p7_omx_SetDumpMode(ox, TRUE); makes the fast DP algorithms dump their matrices p7_gmx_Dump(stdout, gx); dumps a generic DP matrix */ while ((status = esl_sqio_Read(sqfp, sq)) == eslOK) { p7_oprofile_ReconfigLength(om, sq->n); p7_ReconfigLength(gm, sq->n); p7_bg_SetLength(bg, sq->n); p7_omx_GrowTo(ox, om->M, 0, sq->n); p7_gmx_GrowTo(gx, gm->M, sq->n); p7_ViterbiFilter (sq->dsq, sq->n, om, ox, &vfraw); p7_bg_NullOne (bg, sq->dsq, sq->n, &nullsc); vfscore = (vfraw - nullsc) / eslCONST_LOG2; P = esl_gumbel_surv(vfscore, om->evparam[p7_VMU], om->evparam[p7_VLAMBDA]); p7_GViterbi (sq->dsq, sq->n, gm, gx, &graw); gscore = (graw - nullsc) / eslCONST_LOG2; gP = esl_gumbel_surv(gscore, gm->evparam[p7_VMU], gm->evparam[p7_VLAMBDA]); if (esl_opt_GetBoolean(go, "-1")) { printf("%-30s\t%-20s\t%9.2g\t%7.2f\t%9.2g\t%7.2f\n", sq->name, hmm->name, P, vfscore, gP, gscore); } else if (esl_opt_GetBoolean(go, "-P")) { /* output suitable for direct use in profmark benchmark postprocessors: */ printf("%g\t%.2f\t%s\t%s\n", P, vfscore, sq->name, hmm->name); } else { printf("target sequence: %s\n", sq->name); printf("vit filter raw score: %.2f nats\n", vfraw); printf("null score: %.2f nats\n", nullsc); printf("per-seq score: %.2f bits\n", vfscore); printf("P-value: %g\n", P); printf("GViterbi raw score: %.2f nats\n", graw); printf("GViterbi seq score: %.2f bits\n", gscore); printf("GViterbi P-value: %g\n", gP); } esl_sq_Reuse(sq); } /* cleanup */ esl_sq_Destroy(sq); esl_sqfile_Close(sqfp); p7_omx_Destroy(ox); p7_gmx_Destroy(gx); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }
/* mpi_master() * The MPI version of hmmbuild. * Follows standard pattern for a master/worker load-balanced MPI program (J1/78-79). * * A master can only return if it's successful. * Errors in an MPI master come in two classes: recoverable and nonrecoverable. * * Recoverable errors include all worker-side errors, and any * master-side error that do not affect MPI communication. Error * messages from recoverable messages are delayed until we've cleanly * shut down the workers. * * Unrecoverable errors are master-side errors that may affect MPI * communication, meaning we cannot count on being able to reach the * workers and shut them down. Unrecoverable errors result in immediate * p7_Fail()'s, which will cause MPI to shut down the worker processes * uncleanly. */ static void mpi_master(const ESL_GETOPTS *go, struct cfg_s *cfg) { int xstatus = eslOK; /* changes from OK on recoverable error */ int status; int have_work = TRUE; /* TRUE while alignments remain */ int nproc_working = 0; /* number of worker processes working, up to nproc-1 */ int wi; /* rank of next worker to get an alignment to work on */ char *buf = NULL; /* input/output buffer, for packed MPI messages */ int bn = 0; ESL_MSA *msa = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; ESL_MSA **msalist = NULL; ESL_MSA *postmsa = NULL; int *msaidx = NULL; char errmsg[eslERRBUFSIZE]; MPI_Status mpistatus; int n; int pos; double entropy; /* Master initialization: including, figure out the alphabet type. * If any failure occurs, delay printing error message until we've shut down workers. */ if (xstatus == eslOK) { if ((status = init_master_cfg(go, cfg, errmsg)) != eslOK) xstatus = status; } if (xstatus == eslOK) { bn = 4096; if ((buf = malloc(sizeof(char) * bn)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } } if (xstatus == eslOK) { if ((msalist = malloc(sizeof(ESL_MSA *) * cfg->nproc)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } } if (xstatus == eslOK) { if ((msaidx = malloc(sizeof(int) * cfg->nproc)) == NULL) { sprintf(errmsg, "allocation failed"); xstatus = eslEMEM; } } MPI_Bcast(&xstatus, 1, MPI_INT, 0, MPI_COMM_WORLD); if (xstatus != eslOK) { MPI_Finalize(); p7_Fail(errmsg); } ESL_DPRINTF1(("MPI master is initialized\n")); bg = p7_bg_Create(cfg->abc); for (wi = 0; wi < cfg->nproc; wi++) { msalist[wi] = NULL; msaidx[wi] = 0; } /* Worker initialization: * Because we've already successfully initialized the master before we start * initializing the workers, we don't expect worker initialization to fail; * so we just receive a quick OK/error code reply from each worker to be sure, * and don't worry about an informative message. */ MPI_Bcast(&(cfg->abc->type), 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Reduce(&xstatus, &status, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); if (status != eslOK) { MPI_Finalize(); p7_Fail("One or more MPI worker processes failed to initialize."); } ESL_DPRINTF1(("%d workers are initialized\n", cfg->nproc-1)); /* Main loop: combining load workers, send/receive, clear workers loops; * also, catch error states and die later, after clean shutdown of workers. * * When a recoverable error occurs, have_work = FALSE, xstatus != * eslOK, and errmsg is set to an informative message. No more * errmsg's can be received after the first one. We wait for all the * workers to clear their work units, then send them shutdown signals, * then finally print our errmsg and exit. * * Unrecoverable errors just crash us out with p7_Fail(). */ wi = 1; while (have_work || nproc_working) { if (have_work) { if ((status = esl_msa_Read(cfg->afp, &msa)) == eslOK) { cfg->nali++; ESL_DPRINTF1(("MPI master read MSA %s\n", msa->name == NULL? "" : msa->name)); } else { have_work = FALSE; if (status == eslEFORMAT) { xstatus = eslEFORMAT; snprintf(errmsg, eslERRBUFSIZE, "Alignment file parse error:\n%s\n", cfg->afp->errbuf); } else if (status == eslEINVAL) { xstatus = eslEFORMAT; snprintf(errmsg, eslERRBUFSIZE, "Alignment file parse error:\n%s\n", cfg->afp->errbuf); } else if (status != eslEOF) { xstatus = status; snprintf(errmsg, eslERRBUFSIZE, "Alignment file read unexpectedly failed with code %d\n", status); } ESL_DPRINTF1(("MPI master has run out of MSAs (having read %d)\n", cfg->nali)); } } if ((have_work && nproc_working == cfg->nproc-1) || (!have_work && nproc_working > 0)) { if (MPI_Probe(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &mpistatus) != 0) { MPI_Finalize(); p7_Fail("mpi probe failed"); } if (MPI_Get_count(&mpistatus, MPI_PACKED, &n) != 0) { MPI_Finalize(); p7_Fail("mpi get count failed"); } wi = mpistatus.MPI_SOURCE; ESL_DPRINTF1(("MPI master sees a result of %d bytes from worker %d\n", n, wi)); if (n > bn) { if ((buf = realloc(buf, sizeof(char) * n)) == NULL) p7_Fail("reallocation failed"); bn = n; } if (MPI_Recv(buf, bn, MPI_PACKED, wi, 0, MPI_COMM_WORLD, &mpistatus) != 0) { MPI_Finalize(); p7_Fail("mpi recv failed"); } ESL_DPRINTF1(("MPI master has received the buffer\n")); /* If we're in a recoverable error state, we're only clearing worker results; * just receive them, don't unpack them or print them. * But if our xstatus is OK, go ahead and process the result buffer. */ if (xstatus == eslOK) { pos = 0; if (MPI_Unpack(buf, bn, &pos, &xstatus, 1, MPI_INT, MPI_COMM_WORLD) != 0) { MPI_Finalize(); p7_Fail("mpi unpack failed");} if (xstatus == eslOK) /* worker reported success. Get the HMM. */ { ESL_DPRINTF1(("MPI master sees that the result buffer contains an HMM\n")); if (p7_hmm_MPIUnpack(buf, bn, &pos, MPI_COMM_WORLD, &(cfg->abc), &hmm) != eslOK) { MPI_Finalize(); p7_Fail("HMM unpack failed"); } ESL_DPRINTF1(("MPI master has unpacked the HMM\n")); if (cfg->postmsafile != NULL) { if (esl_msa_MPIUnpack(cfg->abc, buf, bn, &pos, MPI_COMM_WORLD, &postmsa) != eslOK) { MPI_Finalize(); p7_Fail("postmsa unpack failed");} } entropy = p7_MeanMatchRelativeEntropy(hmm, bg); if ((status = output_result(cfg, errmsg, msaidx[wi], msalist[wi], hmm, postmsa, entropy)) != eslOK) xstatus = status; esl_msa_Destroy(postmsa); postmsa = NULL; p7_hmm_Destroy(hmm); hmm = NULL; } else /* worker reported an error. Get the errmsg. */ { if (MPI_Unpack(buf, bn, &pos, errmsg, eslERRBUFSIZE, MPI_CHAR, MPI_COMM_WORLD) != 0) { MPI_Finalize(); p7_Fail("mpi unpack of errmsg failed"); } ESL_DPRINTF1(("MPI master sees that the result buffer contains an error message\n")); } } esl_msa_Destroy(msalist[wi]); msalist[wi] = NULL; msaidx[wi] = 0; nproc_working--; } if (have_work) { ESL_DPRINTF1(("MPI master is sending MSA %s to worker %d\n", msa->name == NULL ? "":msa->name, wi)); if (esl_msa_MPISend(msa, wi, 0, MPI_COMM_WORLD, &buf, &bn) != eslOK) p7_Fail("MPI msa send failed"); msalist[wi] = msa; msaidx[wi] = cfg->nali; /* 1..N for N alignments in the MSA database */ msa = NULL; wi++; nproc_working++; } } /* On success or recoverable errors: * Shut down workers cleanly. */ ESL_DPRINTF1(("MPI master is done. Shutting down all the workers cleanly\n")); for (wi = 1; wi < cfg->nproc; wi++) if (esl_msa_MPISend(NULL, wi, 0, MPI_COMM_WORLD, &buf, &bn) != eslOK) p7_Fail("MPI msa send failed"); free(buf); free(msaidx); free(msalist); p7_bg_Destroy(bg); if (xstatus != eslOK) { MPI_Finalize(); p7_Fail(errmsg); } else return; }
/* Function: p7_Calibrate() * Synopsis: Calibrate the E-value parameters of a model. * Incept: SRE, Thu Dec 25 09:29:31 2008 [Magallon] * * Purpose: Calibrate the E-value parameters of a model with * one calculation ($\lambda$) and two brief simulations * (Viterbi $\mu$, Forward $\tau$). * * Args: hmm - HMM to be calibrated * cfg_b - OPTCFG: ptr to optional build configuration; * if <NULL>, use default parameters. * byp_rng - BYPASS optimization: pass ptr to <ESL_RANDOMNESS> generator * if already known; * <*byp_rng> == NULL> if <rng> return is desired; * pass <NULL> to use and discard internal default. * byp_bg - BYPASS optimization: pass ptr to <P7_BG> if already known; * <*byp_bg == NULL> if <bg> return is desired; * pass <NULL> to use and discard internal default. * byp_gm - BYPASS optimization: pass ptr to <gm> profile if already known; * pass <*byp_gm == NULL> if <gm> return desired; * pass <NULL> to use and discard internal default. * byp_om - BYPASS optimization: pass ptr to <om> profile if already known; * pass <*byp_om == NULL> if <om> return desired; * pass <NULL> to use and discard internal default. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEINVAL> if <hmm>, <gm>, <om> aren't compatible somehow. * * Xref: J4/41 */ int p7_Calibrate(P7_HMM *hmm, P7_BUILDER *cfg_b, ESL_RANDOMNESS **byp_rng, P7_BG **byp_bg, P7_PROFILE **byp_gm, P7_OPROFILE **byp_om) { P7_BG *bg = (esl_byp_IsProvided(byp_bg) ? *byp_bg : NULL); P7_PROFILE *gm = (esl_byp_IsProvided(byp_gm) ? *byp_gm : NULL); P7_OPROFILE *om = (esl_byp_IsProvided(byp_om) ? *byp_om : NULL); ESL_RANDOMNESS *r = (esl_byp_IsProvided(byp_rng) ? *byp_rng : NULL); char *errbuf = ((cfg_b != NULL) ? cfg_b->errbuf : NULL); int EmL = ((cfg_b != NULL) ? cfg_b->EmL : 200); int EmN = ((cfg_b != NULL) ? cfg_b->EmN : 200); int EvL = ((cfg_b != NULL) ? cfg_b->EvL : 200); int EvN = ((cfg_b != NULL) ? cfg_b->EvN : 200); int EfL = ((cfg_b != NULL) ? cfg_b->EfL : 100); int EfN = ((cfg_b != NULL) ? cfg_b->EfN : 200); double Eft = ((cfg_b != NULL) ? cfg_b->Eft : 0.04); double lambda, mmu, vmu, tau; int status; /* Configure any objects we need * that weren't already passed to us as a bypass optimization */ if (r == NULL) { if ((r = esl_randomness_CreateFast(42)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create RNG"); } else if (cfg_b != NULL && cfg_b->do_reseeding) { esl_randomness_Init(r, esl_randomness_GetSeed(r)); } if (bg == NULL) { if ((bg = p7_bg_Create(hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to allocate background"); } /* there's an odd case where the <om> is provided and a <gm> isn't going to be returned * where we don't need a <gm> at all, and <gm> stays <NULL> after the next block. * Note that the <EvL> length in the ProfileConfig doesn't matter; the individual * calibration routines MSVMu(), etc. contain their own length reconfig calls. */ if ((esl_byp_IsInternal(byp_gm) && ! esl_byp_IsProvided(byp_om)) || esl_byp_IsReturned(byp_gm)) { if ( (gm = p7_profile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to allocate profile"); if ( (status = p7_ProfileConfig(hmm, bg, gm, EvL, p7_LOCAL)) != eslOK) ESL_XFAIL(status, errbuf, "failed to configure profile"); } if (om == NULL) { if ((om = p7_oprofile_Create(hmm->M, hmm->abc)) == NULL) ESL_XFAIL(eslEMEM, errbuf, "failed to create optimized profile"); if ((status = p7_oprofile_Convert(gm, om)) != eslOK) ESL_XFAIL(status, errbuf, "failed to convert to optimized profile"); } /* The calibration steps themselves */ if ((status = p7_Lambda(hmm, bg, &lambda)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine lambda"); if ((status = p7_MSVMu (r, om, bg, EmL, EmN, lambda, &mmu)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine msv mu"); if ((status = p7_ViterbiMu(r, om, bg, EvL, EvN, lambda, &vmu)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine vit mu"); if ((status = p7_Tau (r, om, bg, EfL, EfN, lambda, Eft, &tau)) != eslOK) ESL_XFAIL(status, errbuf, "failed to determine fwd tau"); /* Store results */ hmm->evparam[p7_MLAMBDA] = om->evparam[p7_MLAMBDA] = lambda; hmm->evparam[p7_VLAMBDA] = om->evparam[p7_VLAMBDA] = lambda; hmm->evparam[p7_FLAMBDA] = om->evparam[p7_FLAMBDA] = lambda; hmm->evparam[p7_MMU] = om->evparam[p7_MMU] = mmu; hmm->evparam[p7_VMU] = om->evparam[p7_VMU] = vmu; hmm->evparam[p7_FTAU] = om->evparam[p7_FTAU] = tau; hmm->flags |= p7H_STATS; if (gm != NULL) { gm->evparam[p7_MLAMBDA] = lambda; gm->evparam[p7_VLAMBDA] = lambda; gm->evparam[p7_FLAMBDA] = lambda; gm->evparam[p7_MMU] = mmu; gm->evparam[p7_VMU] = vmu; gm->evparam[p7_FTAU] = tau; } if (byp_rng != NULL) *byp_rng = r; else esl_randomness_Destroy(r); /* bypass convention: no-op if rng was provided.*/ if (byp_bg != NULL) *byp_bg = bg; else p7_bg_Destroy(bg); /* bypass convention: no-op if bg was provided. */ if (byp_gm != NULL) *byp_gm = gm; else p7_profile_Destroy(gm); /* bypass convention: no-op if gm was provided. */ if (byp_om != NULL) *byp_om = om; else p7_oprofile_Destroy(om); /* bypass convention: no-op if om was provided. */ return eslOK; ERROR: if (! esl_byp_IsProvided(byp_rng)) esl_randomness_Destroy(r); if (! esl_byp_IsProvided(byp_bg)) p7_bg_Destroy(bg); if (! esl_byp_IsProvided(byp_gm)) p7_profile_Destroy(gm); if (! esl_byp_IsProvided(byp_om)) p7_oprofile_Destroy(om); return status; }
static void mpi_worker(const ESL_GETOPTS *go, struct cfg_s *cfg) { int xstatus = eslOK; int status; int type; P7_BUILDER *bld = NULL; ESL_MSA *msa = NULL; ESL_MSA *postmsa = NULL; ESL_MSA **postmsa_ptr = (cfg->postmsafile != NULL) ? &postmsa : NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; char *wbuf = NULL; /* packed send/recv buffer */ void *tmp; /* for reallocation of wbuf */ int wn = 0; /* allocation size for wbuf */ int sz, n; /* size of a packed message */ int pos; char errmsg[eslERRBUFSIZE]; /* After master initialization: master broadcasts its status. */ MPI_Bcast(&xstatus, 1, MPI_INT, 0, MPI_COMM_WORLD); if (xstatus != eslOK) return; /* master saw an error code; workers do an immediate normal shutdown. */ ESL_DPRINTF2(("worker %d: sees that master has initialized\n", cfg->my_rank)); /* Master now broadcasts worker initialization information (alphabet type) * Workers returns their status post-initialization. * Initial allocation of wbuf must be large enough to guarantee that * we can pack an error result into it, because after initialization, * errors will be returned as packed (code, errmsg) messages. */ MPI_Bcast(&type, 1, MPI_INT, 0, MPI_COMM_WORLD); if (xstatus == eslOK) { if ((cfg->abc = esl_alphabet_Create(type)) == NULL) xstatus = eslEMEM; } if (xstatus == eslOK) { wn = 4096; if ((wbuf = malloc(wn * sizeof(char))) == NULL) xstatus = eslEMEM; } if (xstatus == eslOK) { if ((bld = p7_builder_Create(go, cfg->abc)) == NULL) xstatus = eslEMEM; } MPI_Reduce(&xstatus, &status, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); /* everyone sends xstatus back to master */ if (xstatus != eslOK) { if (wbuf != NULL) free(wbuf); if (bld != NULL) p7_builder_Destroy(bld); return; /* shutdown; we passed the error back for the master to deal with. */ } bg = p7_bg_Create(cfg->abc); ESL_DPRINTF2(("worker %d: initialized\n", cfg->my_rank)); /* source = 0 (master); tag = 0 */ while (esl_msa_MPIRecv(0, 0, MPI_COMM_WORLD, cfg->abc, &wbuf, &wn, &msa) == eslOK) { /* Build the HMM */ ESL_DPRINTF2(("worker %d: has received MSA %s (%d columns, %d seqs)\n", cfg->my_rank, msa->name, msa->alen, msa->nseq)); if ((status = p7_Builder(bld, msa, bg, &hmm, NULL, NULL, NULL, postmsa_ptr)) != eslOK) { strcpy(errmsg, bld->errbuf); goto ERROR; } ESL_DPRINTF2(("worker %d: has produced an HMM %s\n", cfg->my_rank, hmm->name)); /* Calculate upper bound on size of sending status, HMM, and optional postmsa; make sure wbuf can hold it. */ n = 0; if (MPI_Pack_size(1, MPI_INT, MPI_COMM_WORLD, &sz) != 0) goto ERROR; n += sz; if (p7_hmm_MPIPackSize( hmm, MPI_COMM_WORLD, &sz) != eslOK) goto ERROR; n += sz; if (esl_msa_MPIPackSize(postmsa, MPI_COMM_WORLD, &sz) != eslOK) goto ERROR; n += sz; if (n > wn) { ESL_RALLOC(wbuf, tmp, sizeof(char) * n); wn = n; } ESL_DPRINTF2(("worker %d: has calculated that HMM will pack into %d bytes\n", cfg->my_rank, n)); /* Send status, HMM, and optional postmsa back to the master */ pos = 0; if (MPI_Pack (&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD) != 0) goto ERROR; if (p7_hmm_MPIPack (hmm, wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR; if (esl_msa_MPIPack(postmsa, wbuf, wn, &pos, MPI_COMM_WORLD) != eslOK) goto ERROR; MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); ESL_DPRINTF2(("worker %d: has sent HMM to master in message of %d bytes\n", cfg->my_rank, pos)); esl_msa_Destroy(msa); msa = NULL; esl_msa_Destroy(postmsa); postmsa = NULL; p7_hmm_Destroy(hmm); hmm = NULL; } if (wbuf != NULL) free(wbuf); p7_builder_Destroy(bld); return; ERROR: ESL_DPRINTF2(("worker %d: fails, is sending an error message, as follows:\n%s\n", cfg->my_rank, errmsg)); pos = 0; MPI_Pack(&status, 1, MPI_INT, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Pack(errmsg, eslERRBUFSIZE, MPI_CHAR, wbuf, wn, &pos, MPI_COMM_WORLD); MPI_Send(wbuf, pos, MPI_PACKED, 0, 0, MPI_COMM_WORLD); if (wbuf != NULL) free(wbuf); if (msa != NULL) esl_msa_Destroy(msa); if (hmm != NULL) p7_hmm_Destroy(hmm); if (bld != NULL) p7_builder_Destroy(bld); return; }
int main(int argc, char **argv) { ESL_GETOPTS *go = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *hmmfile = esl_opt_GetArg(go, 1); ESL_STOPWATCH *w = esl_stopwatch_Create(); ESL_RANDOMNESS *r = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s")); ESL_ALPHABET *abc = NULL; P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; P7_OMX *fwd = NULL; P7_OMX *bck = NULL; P7_OMX *pp = NULL; int L = esl_opt_GetInteger(go, "-L"); int N = esl_opt_GetInteger(go, "-N"); ESL_DSQ *dsq = malloc(sizeof(ESL_DSQ) * (L+2)); int i; float fsc, bsc; double Mcs; if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile); if (p7_hmmfile_Read(hfp, &abc, &hmm) != eslOK) p7_Fail("Failed to read HMM"); bg = p7_bg_Create(abc); p7_bg_SetLength(bg, L); gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, L, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_ReconfigLength(om, L); fwd = p7_omx_Create(gm->M, L, L); bck = p7_omx_Create(gm->M, L, L); pp = p7_omx_Create(gm->M, L, L); esl_rsq_xfIID(r, bg->f, abc->K, L, dsq); p7_Forward (dsq, L, om, fwd, &fsc); p7_Backward(dsq, L, om, fwd, bck, &bsc); esl_stopwatch_Start(w); for (i = 0; i < N; i++) p7_Decoding(om, fwd, bck, pp); esl_stopwatch_Stop(w); Mcs = (double) N * (double) L * (double) gm->M * 1e-6 / (double) w->user; esl_stopwatch_Display(stdout, w, "# CPU time: "); printf("# M = %d\n", gm->M); printf("# %.1f Mc/s\n", Mcs); free(dsq); p7_omx_Destroy(fwd); p7_omx_Destroy(bck); p7_omx_Destroy(pp); p7_oprofile_Destroy(om); p7_profile_Destroy(gm); p7_bg_Destroy(bg); p7_hmm_Destroy(hmm); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_stopwatch_Destroy(w); esl_randomness_Destroy(r); esl_getopts_Destroy(go); return eslOK; }
int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); ESL_ALPHABET *abc = NULL; char *hmmfile = esl_opt_GetArg(go, 1); P7_HMMFILE *hfp = NULL; P7_HMM *hmm = NULL; P7_BG *bg = NULL; P7_PROFILE *gm = NULL; P7_OPROFILE *om = NULL; char *fname = NULL; char *pname = NULL; FILE *ffp = NULL; FILE *pfp = NULL; int nmodel = 0; uint64_t totM = 0; int status; status = p7_hmmfile_Open(hmmfile, NULL, &hfp); if (status == eslENOTFOUND) esl_fatal("Failed to open HMM file %s for reading.\n", hmmfile); else if (status == eslEFORMAT) esl_fatal("File %s does not appear to be in a recognized HMM format.\n", hmmfile); else if (status != eslOK) esl_fatal("Unexpected error %d in opening HMM file %s.\n", status, hmmfile); esl_sprintf(&fname, "%s.h3f", hmmfile); esl_sprintf(&pname, "%s.h3f", hmmfile); if ((ffp = fopen(fname, "wb")) == NULL) esl_fatal("failed to open %s\n", fname); if ((pfp = fopen(pname, "wb")) == NULL) esl_fatal("failed to open %s\n", pname); free(fname); free(pname); while ((status = p7_hmmfile_Read(hfp, &abc, &hmm)) == eslOK) { if (nmodel == 0) { /* first time initialization, now that alphabet known */ bg = p7_bg_Create(abc); p7_bg_SetLength(bg, 400); } if (esl_opt_GetBoolean(go, "-v")) printf("%s\n", hmm->name); nmodel++; totM += hmm->M; gm = p7_profile_Create(hmm->M, abc); p7_ProfileConfig(hmm, bg, gm, 400, p7_LOCAL); om = p7_oprofile_Create(gm->M, abc); p7_oprofile_Convert(gm, om); p7_oprofile_Write(ffp, pfp, om); p7_profile_Destroy(gm); p7_oprofile_Destroy(om); p7_hmm_Destroy(hmm); } if (status == eslEFORMAT) esl_fatal("bad file format in HMM file %s", hmmfile); else if (status == eslEINCOMPAT) esl_fatal("HMM file %s contains different alphabets", hmmfile); else if (status != eslEOF) esl_fatal("Unexpected error in reading HMMs from %s", hmmfile); fclose(ffp); fclose(pfp); p7_bg_Destroy(bg); p7_hmmfile_Close(hfp); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; }